summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--.mailmap2
-rw-r--r--.pylintrc2
-rw-r--r--Documentation/ABI/stable/sysfs-block15
-rw-r--r--Documentation/ABI/testing/sysfs-class-power43
-rw-r--r--Documentation/ABI/testing/sysfs-class-power-gaokun27
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu63
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd4
-rw-r--r--Documentation/ABI/testing/sysfs-driver-qat_ras8
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-acpi21
-rw-r--r--Documentation/ABI/testing/sysfs-fs-erofs8
-rw-r--r--Documentation/Makefile9
-rw-r--r--Documentation/RCU/listRCU.rst10
-rw-r--r--Documentation/RCU/whatisRCU.rst3
-rw-r--r--Documentation/admin-guide/blockdev/index.rst1
-rw-r--r--Documentation/admin-guide/blockdev/zoned_loop.rst169
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst81
-rw-r--r--Documentation/admin-guide/gpio/gpio-aggregator.rst107
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst2
-rw-r--r--Documentation/admin-guide/hw-vuln/indirect-target-selection.rst168
-rw-r--r--Documentation/admin-guide/hw-vuln/old_microcode.rst21
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt57
-rw-r--r--Documentation/admin-guide/namespaces/resource-control.rst24
-rw-r--r--Documentation/admin-guide/pm/cpufreq.rst8
-rw-r--r--Documentation/admin-guide/pm/intel_idle.rst21
-rw-r--r--Documentation/admin-guide/pm/intel_pstate.rst104
-rw-r--r--Documentation/admin-guide/quickly-build-trimmed-linux.rst4
-rw-r--r--Documentation/admin-guide/reporting-issues.rst6
-rw-r--r--Documentation/admin-guide/sysctl/vm.rst32
-rw-r--r--Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst4
-rw-r--r--Documentation/admin-guide/xfs.rst11
-rw-r--r--Documentation/arch/powerpc/htm.rst104
-rw-r--r--Documentation/arch/powerpc/kvm-nested.rst40
-rw-r--r--Documentation/arch/x86/amd-debugging.rst368
-rw-r--r--Documentation/arch/x86/cpuinfo.rst8
-rw-r--r--Documentation/arch/x86/index.rst2
-rw-r--r--Documentation/arch/x86/resume.svg4
-rw-r--r--Documentation/arch/x86/suspend.svg4
-rw-r--r--Documentation/arch/x86/x86_64/5level-paging.rst9
-rw-r--r--Documentation/arch/x86/x86_64/fsgs.rst2
-rw-r--r--Documentation/conf.py164
-rw-r--r--Documentation/core-api/dma-api.rst71
-rw-r--r--Documentation/core-api/genericirq.rst2
-rw-r--r--Documentation/core-api/irq/concepts.rst27
-rw-r--r--Documentation/core-api/irq/irq-domain.rst203
-rw-r--r--Documentation/core-api/printk-formats.rst3
-rw-r--r--Documentation/dev-tools/kunit/run_wrapper.rst2
-rw-r--r--Documentation/dev-tools/kunit/usage.rst38
-rw-r--r--Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.yaml1
-rw-r--r--Documentation/devicetree/bindings/crypto/amd,ccp-seattle-v1a.yaml38
-rw-r--r--Documentation/devicetree/bindings/crypto/amd-ccp.txt17
-rw-r--r--Documentation/devicetree/bindings/crypto/artpec6-crypto.txt16
-rw-r--r--Documentation/devicetree/bindings/crypto/axis,artpec6-crypto.yaml39
-rw-r--r--Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt22
-rw-r--r--Documentation/devicetree/bindings/crypto/brcm,spum-crypto.yaml44
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml10
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-sec6.txt157
-rw-r--r--Documentation/devicetree/bindings/crypto/hisilicon,hip06-sec.yaml134
-rw-r--r--Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt67
-rw-r--r--Documentation/devicetree/bindings/crypto/img,hash-accelerator.yaml69
-rw-r--r--Documentation/devicetree/bindings/crypto/img-hash.txt27
-rw-r--r--Documentation/devicetree/bindings/crypto/marvell,orion-crypto.yaml133
-rw-r--r--Documentation/devicetree/bindings/crypto/marvell-cesa.txt44
-rw-r--r--Documentation/devicetree/bindings/crypto/mediatek-crypto.txt25
-rw-r--r--Documentation/devicetree/bindings/crypto/mv_cesa.txt32
-rw-r--r--Documentation/devicetree/bindings/crypto/qcom-qce.yaml1
-rw-r--r--Documentation/devicetree/bindings/gpio/atmel,at91rm9200-gpio.yaml16
-rw-r--r--Documentation/devicetree/bindings/gpio/blaize,blzp1600-gpio.yaml77
-rw-r--r--Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml22
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mxs.yaml70
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml4
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-vf610.yaml7
-rw-r--r--Documentation/devicetree/bindings/gpio/maxim,max77759-gpio.yaml44
-rw-r--r--Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.yaml3
-rw-r--r--Documentation/devicetree/bindings/gpio/nxp,pcf8575.yaml26
-rw-r--r--Documentation/devicetree/bindings/gpio/realtek,otto-gpio.yaml8
-rw-r--r--Documentation/devicetree/bindings/gpio/renesas,em-gio.yaml20
-rw-r--r--Documentation/devicetree/bindings/gpio/renesas,rcar-gpio.yaml24
-rw-r--r--Documentation/devicetree/bindings/gpio/sifive,gpio.yaml6
-rw-r--r--Documentation/devicetree/bindings/gpio/spacemit,k1-gpio.yaml96
-rw-r--r--Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml24
-rw-r--r--Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml48
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/econet,en751221-intc.yaml78
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/sophgo,sg2042-msi.yaml4
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/renesas,rzg3e-xspi.yaml135
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77759.yaml99
-rw-r--r--Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml22
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl,esdhc.yaml1
-rw-r--r--Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml27
-rw-r--r--Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.txt29
-rw-r--r--Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.yaml66
-rw-r--r--Documentation/devicetree/bindings/mmc/mtk-sd.yaml2
-rw-r--r--Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml4
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-msm.yaml1
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci.txt13
-rw-r--r--Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml7
-rw-r--r--Documentation/devicetree/bindings/mmc/spacemit,sdhci.yaml53
-rw-r--r--Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt23
-rw-r--r--Documentation/devicetree/bindings/mmc/wm,wm8505-sdhc.yaml66
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml2
-rw-r--r--Documentation/devicetree/bindings/nvmem/maxim,max77759-nvmem.yaml32
-rw-r--r--Documentation/devicetree/bindings/pci/pci-ep.yaml68
-rw-r--r--Documentation/devicetree/bindings/power/allwinner,sun50i-h6-prcm-ppu.yaml42
-rw-r--r--Documentation/devicetree/bindings/power/mediatek,power-controller.yaml2
-rw-r--r--Documentation/devicetree/bindings/power/qcom,rpmpd.yaml1
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml42
-rw-r--r--Documentation/devicetree/bindings/power/reset/toradex,smarc-ec.yaml52
-rw-r--r--Documentation/devicetree/bindings/power/rockchip,power-controller.yaml1
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq24190.yaml1
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq25980.yaml36
-rw-r--r--Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml14
-rw-r--r--Documentation/devicetree/bindings/power/supply/ltc4162-l.yaml18
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max77705.yaml4
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max8971.yaml68
-rw-r--r--Documentation/devicetree/bindings/power/supply/pegatron,chagall-ec.yaml49
-rw-r--r--Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml67
-rw-r--r--Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml1
-rw-r--r--Documentation/devicetree/bindings/pwm/nxp,mc33xs2410.yaml118
-rw-r--r--Documentation/devicetree/bindings/pwm/renesas,rzg2l-gpt.yaml378
-rw-r--r--Documentation/devicetree/bindings/pwm/via,vt8500-pwm.yaml43
-rw-r--r--Documentation/devicetree/bindings/pwm/vt8500-pwm.txt18
-rw-r--r--Documentation/devicetree/bindings/regulator/adi,adp5055-regulator.yaml157
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml12
-rw-r--r--Documentation/devicetree/bindings/reset/atmel,at91sam9260-reset.yaml3
-rw-r--r--Documentation/devicetree/bindings/rng/rockchip,rk3588-rng.yaml5
-rw-r--r--Documentation/devicetree/bindings/sound/audio-graph-card2.yaml8
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs48l32.yaml195
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8375.yaml71
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8389.yaml50
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,mqs.yaml10
-rw-r--r--Documentation/devicetree/bindings/sound/loongson,ls1b-ac97.yaml68
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98925.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8188-mt6359.yaml7
-rw-r--r--Documentation/devicetree/bindings/sound/mt8186-mt6366-da7219-max98357.yaml7
-rw-r--r--Documentation/devicetree/bindings/sound/mt8195-mt6359.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra186-asrc.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra186-dspk.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-admaif.yaml17
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-adx.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-ahub.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-amx.yaml6
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-dmic.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-i2s.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-mbdrc.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-mixer.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-mvc.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-ope.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-peq.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-sfc.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml108
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,sm8250.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml6
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,alc203.yaml36
-rw-r--r--Documentation/devicetree/bindings/sound/richtek,rt9123.yaml56
-rw-r--r--Documentation/devicetree/bindings/sound/richtek,rt9123p.yaml48
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,rk3576-sai.yaml144
-rw-r--r--Documentation/devicetree/bindings/spi/fsl,dspi.yaml14
-rw-r--r--Documentation/devicetree/bindings/spi/nuvoton,wpcm450-fiu.yaml5
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml18
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml8
-rw-r--r--Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml43
-rw-r--r--Documentation/devicetree/bindings/spi/samsung,spi.yaml1
-rw-r--r--Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml13
-rw-r--r--Documentation/devicetree/bindings/spi/spi-rockchip.yaml1
-rw-r--r--Documentation/devicetree/bindings/spi/st,stm32mp25-ospi.yaml1
-rw-r--r--Documentation/devicetree/bindings/thermal/airoha,en7581-thermal.yaml48
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-tsens.yaml2
-rw-r--r--Documentation/devicetree/bindings/timer/altr,timer-1.0.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/altr,timer-1.0.yaml39
-rw-r--r--Documentation/devicetree/bindings/timer/arm,mps2-timer.txt28
-rw-r--r--Documentation/devicetree/bindings/timer/arm,mps2-timer.yaml49
-rw-r--r--Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.txt29
-rw-r--r--Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.yaml45
-rw-r--r--Documentation/devicetree/bindings/timer/cnxt,cx92755-timer.yaml49
-rw-r--r--Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt42
-rw-r--r--Documentation/devicetree/bindings/timer/csky,gx6605s-timer.yaml40
-rw-r--r--Documentation/devicetree/bindings/timer/csky,mptimer.txt42
-rw-r--r--Documentation/devicetree/bindings/timer/csky,mptimer.yaml46
-rw-r--r--Documentation/devicetree/bindings/timer/digicolor-timer.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/econet,en751221-timer.yaml80
-rw-r--r--Documentation/devicetree/bindings/timer/ezchip,nps400-timer.yaml45
-rw-r--r--Documentation/devicetree/bindings/timer/ezchip,nps400-timer0.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/ezchip,nps400-timer1.txt15
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,gtm.txt30
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,gtm.yaml83
-rw-r--r--Documentation/devicetree/bindings/timer/img,pistachio-gptimer.txt28
-rw-r--r--Documentation/devicetree/bindings/timer/img,pistachio-gptimer.yaml69
-rw-r--r--Documentation/devicetree/bindings/timer/jcore,pit.txt24
-rw-r--r--Documentation/devicetree/bindings/timer/jcore,pit.yaml43
-rw-r--r--Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt33
-rw-r--r--Documentation/devicetree/bindings/timer/lsi,zevio-timer.yaml56
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,armada-370-timer.yaml88
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt44
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,orion-timer.txt16
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,orion-timer.yaml43
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,s32g2-stm.yaml64
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,ostm.yaml12
-rw-r--r--Documentation/devicetree/bindings/timer/sifive,clint.yaml1
-rw-r--r--Documentation/devicetree/bindings/timer/snps,arc-timer.txt27
-rw-r--r--Documentation/devicetree/bindings/timer/snps,arc-timer.yaml45
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt14
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-gfrc.yaml30
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-rtc.txt14
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-rtc.yaml30
-rw-r--r--Documentation/devicetree/bindings/timer/socionext,milbeaut-timer.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/socionext,milbeaut-timer.yaml40
-rw-r--r--Documentation/devicetree/bindings/timer/st,spear-timer.txt16
-rw-r--r--Documentation/devicetree/bindings/timer/st,spear-timer.yaml36
-rw-r--r--Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml1
-rw-r--r--Documentation/devicetree/bindings/timer/ti,keystone-timer.txt29
-rw-r--r--Documentation/devicetree/bindings/timer/ti,keystone-timer.yaml63
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.yaml2
-rw-r--r--Documentation/doc-guide/sphinx.rst14
-rw-r--r--Documentation/driver-api/basics.rst3
-rw-r--r--Documentation/driver-api/dmaengine/provider.rst8
-rw-r--r--Documentation/driver-api/early-userspace/buffer-format.rst34
-rw-r--r--Documentation/driver-api/gpio/index.rst2
-rw-r--r--Documentation/driver-api/ntb.rst2
-rw-r--r--Documentation/driver-api/thermal/intel_dptf.rst21
-rw-r--r--Documentation/driver-api/usb/usb.rst1
-rw-r--r--Documentation/filesystems/bcachefs/casefolding.rst18
-rw-r--r--Documentation/filesystems/bcachefs/future/idle_work.rst78
-rw-r--r--Documentation/filesystems/bcachefs/index.rst7
-rw-r--r--Documentation/filesystems/erofs.rst1
-rw-r--r--Documentation/filesystems/fscrypt.rst189
-rw-r--r--Documentation/filesystems/index.rst1
-rw-r--r--Documentation/filesystems/iomap/design.rst16
-rw-r--r--Documentation/filesystems/locking.rst54
-rw-r--r--Documentation/filesystems/mount_api.rst16
-rw-r--r--Documentation/filesystems/netfs_library.rst1016
-rw-r--r--Documentation/filesystems/porting.rst40
-rw-r--r--Documentation/filesystems/relay.rst26
-rw-r--r--Documentation/filesystems/resctrl.rst (renamed from Documentation/arch/x86/resctrl.rst)6
-rw-r--r--Documentation/filesystems/vfs.rst39
-rw-r--r--Documentation/firmware-guide/acpi/dsd/data-node-references.rst26
-rw-r--r--Documentation/firmware-guide/acpi/dsd/graph.rst11
-rw-r--r--Documentation/firmware-guide/acpi/dsd/leds.rst7
-rw-r--r--Documentation/gpu/rfc/i915_scheduler.rst2
-rw-r--r--Documentation/hid/intel-thc-hid.rst8
-rw-r--r--Documentation/index.rst2
-rw-r--r--Documentation/kbuild/reproducible-builds.rst17
-rw-r--r--Documentation/leds/leds-class-multicolor.rst82
-rw-r--r--Documentation/netlink/specs/tc.yaml10
-rw-r--r--Documentation/networking/timestamping.rst8
-rw-r--r--Documentation/power/energy-model.rst8
-rw-r--r--Documentation/process/1.Intro.rst12
-rw-r--r--Documentation/process/adding-syscalls.rst84
-rw-r--r--Documentation/scheduler/sched-ext.rst14
-rw-r--r--Documentation/scheduler/sched-stats.rst2
-rw-r--r--Documentation/sphinx/automarkup.py97
-rw-r--r--Documentation/sphinx/kerneldoc.py219
-rw-r--r--Documentation/staging/speculation.rst1
-rw-r--r--Documentation/tools/rtla/common_timerlat_description.rst10
-rw-r--r--Documentation/tools/rtla/rtla-timerlat.rst9
-rw-r--r--Documentation/trace/coresight/panic.rst4
-rw-r--r--Documentation/trace/index.rst98
-rw-r--r--Documentation/translations/sp_SP/process/2.Process.rst11
-rw-r--r--Documentation/translations/sp_SP/process/howto.rst10
-rw-r--r--Documentation/translations/sp_SP/process/kernel-docs.rst5
-rw-r--r--Documentation/translations/sp_SP/process/submitting-patches.rst13
-rw-r--r--Documentation/translations/zh_CN/core-api/irq/irq-domain.rst8
-rw-r--r--Documentation/translations/zh_CN/core-api/printk-formats.rst3
-rw-r--r--Documentation/translations/zh_CN/driver-api/gpio/index.rst2
-rw-r--r--Documentation/translations/zh_CN/how-to.rst459
-rw-r--r--Documentation/translations/zh_CN/index.rst24
-rw-r--r--Documentation/translations/zh_CN/networking/index.rst160
-rw-r--r--Documentation/translations/zh_CN/networking/msg_zerocopy.rst223
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst19
-rw-r--r--MAINTAINERS202
-rw-r--r--Makefile10
-rw-r--r--arch/Kconfig8
-rw-r--r--arch/alpha/kernel/perf_event.c11
-rw-r--r--arch/arc/kernel/intc-arcv2.c2
-rw-r--r--arch/arc/kernel/intc-compact.c5
-rw-r--r--arch/arc/kernel/mcip.c3
-rw-r--r--arch/arc/kernel/perf_event.c6
-rw-r--r--arch/arm/boot/dts/amlogic/meson8.dtsi6
-rw-r--r--arch/arm/boot/dts/amlogic/meson8b.dtsi6
-rw-r--r--arch/arm/common/sa1111.c6
-rw-r--r--arch/arm/configs/exynos_defconfig3
-rw-r--r--arch/arm/configs/milbeaut_m10v_defconfig4
-rw-r--r--arch/arm/configs/multi_v7_defconfig22
-rw-r--r--arch/arm/configs/omap2plus_defconfig1
-rw-r--r--arch/arm/configs/pxa_defconfig4
-rw-r--r--arch/arm/configs/shmobile_defconfig28
-rw-r--r--arch/arm/configs/spitz_defconfig2
-rw-r--r--arch/arm/crypto/Kconfig59
-rw-r--r--arch/arm/crypto/Makefile20
-rw-r--r--arch/arm/crypto/aes-ce-glue.c104
-rw-r--r--arch/arm/crypto/aes-neonbs-glue.c118
-rw-r--r--arch/arm/crypto/blake2b-neon-glue.c21
-rw-r--r--arch/arm/crypto/chacha-glue.c352
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c104
-rw-r--r--arch/arm/crypto/poly1305-glue.c274
-rw-r--r--arch/arm/crypto/sha1-ce-glue.c36
-rw-r--r--arch/arm/crypto/sha1.h14
-rw-r--r--arch/arm/crypto/sha1_glue.c33
-rw-r--r--arch/arm/crypto/sha1_neon_glue.c39
-rw-r--r--arch/arm/crypto/sha2-ce-glue.c109
-rw-r--r--arch/arm/crypto/sha256_glue.c117
-rw-r--r--arch/arm/crypto/sha256_glue.h15
-rw-r--r--arch/arm/crypto/sha256_neon_glue.c92
-rw-r--r--arch/arm/crypto/sha512-glue.c36
-rw-r--r--arch/arm/crypto/sha512-neon-glue.c43
-rw-r--r--arch/arm/crypto/sha512.h6
-rw-r--r--arch/arm/include/asm/simd.h8
-rw-r--r--arch/arm/lib/Makefile6
-rw-r--r--arch/arm/lib/crc-t10dif.c (renamed from arch/arm/lib/crc-t10dif-glue.c)6
-rw-r--r--arch/arm/lib/crc32.c (renamed from arch/arm/lib/crc32-glue.c)6
-rw-r--r--arch/arm/lib/crypto/.gitignore3
-rw-r--r--arch/arm/lib/crypto/Kconfig31
-rw-r--r--arch/arm/lib/crypto/Makefile32
-rw-r--r--arch/arm/lib/crypto/blake2s-core.S (renamed from arch/arm/crypto/blake2s-core.S)0
-rw-r--r--arch/arm/lib/crypto/blake2s-glue.c (renamed from arch/arm/crypto/blake2s-glue.c)0
-rw-r--r--arch/arm/lib/crypto/chacha-glue.c138
-rw-r--r--arch/arm/lib/crypto/chacha-neon-core.S (renamed from arch/arm/crypto/chacha-neon-core.S)2
-rw-r--r--arch/arm/lib/crypto/chacha-scalar-core.S (renamed from arch/arm/crypto/chacha-scalar-core.S)5
-rw-r--r--arch/arm/lib/crypto/poly1305-armv4.pl (renamed from arch/arm/crypto/poly1305-armv4.pl)4
-rw-r--r--arch/arm/lib/crypto/poly1305-glue.c80
-rw-r--r--arch/arm/lib/crypto/sha256-armv4.pl (renamed from arch/arm/crypto/sha256-armv4.pl)20
-rw-r--r--arch/arm/lib/crypto/sha256-ce.S (renamed from arch/arm/crypto/sha2-ce-core.S)10
-rw-r--r--arch/arm/lib/crypto/sha256.c64
-rw-r--r--arch/arm/mach-exynos/suspend.c5
-rw-r--r--arch/arm/mach-imx/avic.c4
-rw-r--r--arch/arm/mach-imx/gpc.c5
-rw-r--r--arch/arm/mach-imx/tzic.c4
-rw-r--r--arch/arm/mach-omap1/irq.c3
-rw-r--r--arch/arm/mach-omap2/omap-wakeupgen.c5
-rw-r--r--arch/arm/mach-pxa/irq.c5
-rw-r--r--arch/arm/plat-orion/gpio.c18
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts38
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts14
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi22
-rw-r--r--arch/arm64/boot/dts/amazon/alpine-v2.dtsi2
-rw-r--r--arch/arm64/boot/dts/amazon/alpine-v3.dtsi2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl.dtsi6
-rw-r--r--arch/arm64/boot/dts/apple/t8103-j293.dts10
-rw-r--r--arch/arm64/boot/dts/apple/t8112-j493.dts10
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi12
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp.dtsi6
-rw-r--r--arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi8
-rw-r--r--arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588j.dtsi53
-rw-r--r--arch/arm64/configs/defconfig34
-rw-r--r--arch/arm64/crypto/Kconfig53
-rw-r--r--arch/arm64/crypto/Makefile20
-rw-r--r--arch/arm64/crypto/aes-glue.c124
-rw-r--r--arch/arm64/crypto/chacha-neon-glue.c237
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c143
-rw-r--r--arch/arm64/crypto/poly1305-glue.c232
-rw-r--r--arch/arm64/crypto/polyval-ce-glue.c73
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c70
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c192
-rw-r--r--arch/arm64/crypto/sha256-glue.c194
-rw-r--r--arch/arm64/crypto/sha3-ce-glue.c111
-rw-r--r--arch/arm64/crypto/sha512-ce-glue.c49
-rw-r--r--arch/arm64/crypto/sha512-glue.c35
-rw-r--r--arch/arm64/crypto/sm3-ce-glue.c48
-rw-r--r--arch/arm64/crypto/sm3-neon-glue.c48
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c100
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/insn.h1
-rw-r--r--arch/arm64/include/asm/spectre.h3
-rw-r--r--arch/arm64/kernel/proton-pack.c13
-rw-r--r--arch/arm64/lib/Makefile7
-rw-r--r--arch/arm64/lib/crc-t10dif.c (renamed from arch/arm64/lib/crc-t10dif-glue.c)6
-rw-r--r--arch/arm64/lib/crc32-core.S (renamed from arch/arm64/lib/crc32.S)0
-rw-r--r--arch/arm64/lib/crc32.c (renamed from arch/arm64/lib/crc32-glue.c)0
-rw-r--r--arch/arm64/lib/crypto/.gitignore3
-rw-r--r--arch/arm64/lib/crypto/Kconfig20
-rw-r--r--arch/arm64/lib/crypto/Makefile24
-rw-r--r--arch/arm64/lib/crypto/chacha-neon-core.S (renamed from arch/arm64/crypto/chacha-neon-core.S)2
-rw-r--r--arch/arm64/lib/crypto/chacha-neon-glue.c119
-rw-r--r--arch/arm64/lib/crypto/poly1305-armv8.pl (renamed from arch/arm64/crypto/poly1305-armv8.pl)0
-rw-r--r--arch/arm64/lib/crypto/poly1305-glue.c73
-rw-r--r--arch/arm64/lib/crypto/sha2-armv8.pl (renamed from arch/arm64/crypto/sha512-armv8.pl)2
-rw-r--r--arch/arm64/lib/crypto/sha256-ce.S (renamed from arch/arm64/crypto/sha2-ce-core.S)41
-rw-r--r--arch/arm64/lib/crypto/sha256.c75
-rw-r--r--arch/arm64/lib/insn.c60
-rw-r--r--arch/arm64/net/bpf_jit_comp.c57
-rw-r--r--arch/arm64/xen/hypercall.S21
-rw-r--r--arch/csky/kernel/perf_event.c3
-rw-r--r--arch/loongarch/configs/loongson3_defconfig2
-rw-r--r--arch/loongarch/include/asm/asm-prototypes.h8
-rw-r--r--arch/loongarch/include/asm/ptrace.h2
-rw-r--r--arch/loongarch/include/asm/uprobes.h1
-rw-r--r--arch/loongarch/kernel/entry.S22
-rw-r--r--arch/loongarch/kernel/genex.S7
-rw-r--r--arch/loongarch/kernel/kfpu.c22
-rw-r--r--arch/loongarch/kernel/perf_event.c3
-rw-r--r--arch/loongarch/kernel/process.c33
-rw-r--r--arch/loongarch/kernel/time.c2
-rw-r--r--arch/loongarch/kernel/uprobes.c11
-rw-r--r--arch/loongarch/lib/crc32-loongarch.c4
-rw-r--r--arch/loongarch/power/hibernate.c3
-rw-r--r--arch/m68k/configs/amcore_defconfig1
-rw-r--r--arch/m68k/configs/amiga_defconfig6
-rw-r--r--arch/m68k/configs/apollo_defconfig6
-rw-r--r--arch/m68k/configs/atari_defconfig6
-rw-r--r--arch/m68k/configs/bvme6000_defconfig6
-rw-r--r--arch/m68k/configs/hp300_defconfig6
-rw-r--r--arch/m68k/configs/mac_defconfig6
-rw-r--r--arch/m68k/configs/multi_defconfig6
-rw-r--r--arch/m68k/configs/mvme147_defconfig6
-rw-r--r--arch/m68k/configs/mvme16x_defconfig6
-rw-r--r--arch/m68k/configs/q40_defconfig6
-rw-r--r--arch/m68k/configs/sun3_defconfig6
-rw-r--r--arch/m68k/configs/sun3x_defconfig6
-rw-r--r--arch/m68k/kernel/setup_mm.c2
-rw-r--r--arch/m68k/mac/config.c2
-rw-r--r--arch/mips/ath25/ar2315.c4
-rw-r--r--arch/mips/ath25/ar5312.c4
-rw-r--r--arch/mips/cavium-octeon/Kconfig6
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-md5.c121
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha1.c138
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha256.c250
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha512.c157
-rw-r--r--arch/mips/cavium-octeon/octeon-irq.c25
-rw-r--r--arch/mips/configs/cavium_octeon_defconfig1
-rw-r--r--arch/mips/configs/decstation_64_defconfig1
-rw-r--r--arch/mips/configs/decstation_defconfig1
-rw-r--r--arch/mips/configs/decstation_r4k_defconfig1
-rw-r--r--arch/mips/configs/gcw0_defconfig1
-rw-r--r--arch/mips/configs/gpr_defconfig2
-rw-r--r--arch/mips/configs/ip28_defconfig1
-rw-r--r--arch/mips/configs/lemote2f_defconfig2
-rw-r--r--arch/mips/configs/mtx1_defconfig2
-rw-r--r--arch/mips/configs/rb532_defconfig2
-rw-r--r--arch/mips/crypto/Kconfig33
-rw-r--r--arch/mips/crypto/Makefile17
-rw-r--r--arch/mips/crypto/chacha-glue.c146
-rw-r--r--arch/mips/crypto/poly1305-glue.c192
-rw-r--r--arch/mips/include/asm/socket.h9
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c3
-rw-r--r--arch/mips/lantiq/irq.c2
-rw-r--r--arch/mips/lib/Makefile2
-rw-r--r--arch/mips/lib/crc32-mips.c4
-rw-r--r--arch/mips/lib/crypto/.gitignore2
-rw-r--r--arch/mips/lib/crypto/Kconfig12
-rw-r--r--arch/mips/lib/crypto/Makefile19
-rw-r--r--arch/mips/lib/crypto/chacha-core.S (renamed from arch/mips/crypto/chacha-core.S)0
-rw-r--r--arch/mips/lib/crypto/chacha-glue.c29
-rw-r--r--arch/mips/lib/crypto/poly1305-glue.c33
-rw-r--r--arch/mips/lib/crypto/poly1305-mips.pl (renamed from arch/mips/crypto/poly1305-mips.pl)12
-rw-r--r--arch/mips/pci/pci-ar2315.c4
-rw-r--r--arch/mips/pci/pci-rt3883.c7
-rw-r--r--arch/mips/ralink/irq.c2
-rw-r--r--arch/nios2/kernel/irq.c3
-rw-r--r--arch/parisc/configs/generic-32bit_defconfig2
-rw-r--r--arch/parisc/configs/generic-64bit_defconfig1
-rw-r--r--arch/powerpc/Kconfig11
-rw-r--r--arch/powerpc/boot/Makefile1
-rw-r--r--arch/powerpc/boot/rs6000.h6
-rw-r--r--arch/powerpc/configs/g5_defconfig2
-rw-r--r--arch/powerpc/configs/powernv_defconfig2
-rw-r--r--arch/powerpc/configs/ppc64_defconfig2
-rw-r--r--arch/powerpc/configs/ppc64e_defconfig2
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig2
-rw-r--r--arch/powerpc/crypto/Kconfig44
-rw-r--r--arch/powerpc/crypto/Makefile6
-rw-r--r--arch/powerpc/crypto/aes.c8
-rw-r--r--arch/powerpc/crypto/aes_cbc.c4
-rw-r--r--arch/powerpc/crypto/aes_ctr.c4
-rw-r--r--arch/powerpc/crypto/aes_xts.c4
-rw-r--r--arch/powerpc/crypto/chacha-p10-glue.c221
-rw-r--r--arch/powerpc/crypto/ghash.c91
-rw-r--r--arch/powerpc/crypto/md5-glue.c99
-rw-r--r--arch/powerpc/crypto/poly1305-p10-glue.c186
-rw-r--r--arch/powerpc/crypto/sha1-spe-glue.c130
-rw-r--r--arch/powerpc/crypto/sha1.c101
-rw-r--r--arch/powerpc/crypto/sha256-spe-glue.c235
-rw-r--r--arch/powerpc/include/asm/guest-state-buffer.h35
-rw-r--r--arch/powerpc/include/asm/hvcall.h13
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h20
-rw-r--r--arch/powerpc/include/asm/preempt.h16
-rw-r--r--arch/powerpc/include/asm/rtas.h4
-rw-r--r--arch/powerpc/include/uapi/asm/papr-indices.h41
-rw-r--r--arch/powerpc/include/uapi/asm/papr-physical-attestation.h31
-rw-r--r--arch/powerpc/include/uapi/asm/papr-platform-dump.h16
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/fadump.c6
-rw-r--r--arch/powerpc/kernel/interrupt.c6
-rw-r--r--arch/powerpc/kernel/iommu.c5
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c3
-rw-r--r--arch/powerpc/kernel/process.c8
-rw-r--r--arch/powerpc/kernel/rtas.c8
-rw-r--r--arch/powerpc/kernel/trace/ftrace_entry.S2
-rw-r--r--arch/powerpc/kexec/crash.c5
-rw-r--r--arch/powerpc/kvm/Kconfig13
-rw-r--r--arch/powerpc/kvm/book3s_hv.c20
-rw-r--r--arch/powerpc/kvm/book3s_hv_nestedv2.c6
-rw-r--r--arch/powerpc/kvm/booke.c8
-rw-r--r--arch/powerpc/kvm/guest-state-buffer.c39
-rw-r--r--arch/powerpc/kvm/test-guest-state-buffer.c214
-rw-r--r--arch/powerpc/kvm/timing.h4
-rw-r--r--arch/powerpc/lib/Makefile6
-rw-r--r--arch/powerpc/lib/crc-t10dif.c (renamed from arch/powerpc/lib/crc-t10dif-glue.c)18
-rw-r--r--arch/powerpc/lib/crc-vpmsum-template.S (renamed from arch/powerpc/lib/crc32-vpmsum_core.S)0
-rw-r--r--arch/powerpc/lib/crc32.c (renamed from arch/powerpc/lib/crc32-glue.c)17
-rw-r--r--arch/powerpc/lib/crc32c-vpmsum_asm.S2
-rw-r--r--arch/powerpc/lib/crct10dif-vpmsum_asm.S2
-rw-r--r--arch/powerpc/lib/crypto/Kconfig22
-rw-r--r--arch/powerpc/lib/crypto/Makefile10
-rw-r--r--arch/powerpc/lib/crypto/chacha-p10-glue.c100
-rw-r--r--arch/powerpc/lib/crypto/chacha-p10le-8x.S (renamed from arch/powerpc/crypto/chacha-p10le-8x.S)6
-rw-r--r--arch/powerpc/lib/crypto/poly1305-p10-glue.c96
-rw-r--r--arch/powerpc/lib/crypto/poly1305-p10le_64.S (renamed from arch/powerpc/crypto/poly1305-p10le_64.S)0
-rw-r--r--arch/powerpc/lib/crypto/sha256-spe-asm.S (renamed from arch/powerpc/crypto/sha256-spe-asm.S)0
-rw-r--r--arch/powerpc/lib/crypto/sha256.c70
-rw-r--r--arch/powerpc/lib/vmx-helper.c2
-rw-r--r--arch/powerpc/mm/fault.c5
-rw-r--r--arch/powerpc/mm/nohash/8xx.c32
-rw-r--r--arch/powerpc/net/bpf_jit.h20
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c33
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c6
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c15
-rw-r--r--arch/powerpc/perf/Makefile2
-rw-r--r--arch/powerpc/perf/core-book3s.c9
-rw-r--r--arch/powerpc/perf/core-fsl-emb.c6
-rw-r--r--arch/powerpc/perf/kvm-hv-pmu.c435
-rw-r--r--arch/powerpc/platforms/44x/gpio.c7
-rw-r--r--arch/powerpc/platforms/44x/uic.c7
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads_cpld.c3
-rw-r--r--arch/powerpc/platforms/52xx/media5200.c2
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c12
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pic.c4
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c13
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.c4
-rw-r--r--arch/powerpc/platforms/8xx/cpm1-ic.c5
-rw-r--r--arch/powerpc/platforms/8xx/cpm1.c12
-rw-r--r--arch/powerpc/platforms/8xx/pic.c5
-rw-r--r--arch/powerpc/platforms/embedded6xx/flipper-pic.c7
-rw-r--r--arch/powerpc/platforms/embedded6xx/hlwd-pic.c7
-rw-r--r--arch/powerpc/platforms/powermac/pic.c7
-rw-r--r--arch/powerpc/platforms/powermac/setup.c4
-rw-r--r--arch/powerpc/platforms/powermac/smp.c2
-rw-r--r--arch/powerpc/platforms/powermac/time.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c3
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c2
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c3
-rw-r--r--arch/powerpc/platforms/ps3/interrupt.c2
-rw-r--r--arch/powerpc/platforms/pseries/Makefile3
-rw-r--r--arch/powerpc/platforms/pseries/htmdump.c395
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c2
-rw-r--r--arch/powerpc/platforms/pseries/msi.c9
-rw-r--r--arch/powerpc/platforms/pseries/papr-indices.c488
-rw-r--r--arch/powerpc/platforms/pseries/papr-phy-attest.c288
-rw-r--r--arch/powerpc/platforms/pseries/papr-platform-dump.c411
-rw-r--r--arch/powerpc/platforms/pseries/papr-rtas-common.c311
-rw-r--r--arch/powerpc/platforms/pseries/papr-rtas-common.h61
-rw-r--r--arch/powerpc/platforms/pseries/papr-vpd.c352
-rw-r--r--arch/powerpc/sysdev/cpm2_pic.c5
-rw-r--r--arch/powerpc/sysdev/cpm_common.c6
-rw-r--r--arch/powerpc/sysdev/ehv_pic.c7
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c2
-rw-r--r--arch/powerpc/sysdev/ge/ge_pic.c7
-rw-r--r--arch/powerpc/sysdev/i8259.c4
-rw-r--r--arch/powerpc/sysdev/ipic.c7
-rw-r--r--arch/powerpc/sysdev/mpic.c17
-rw-r--r--arch/powerpc/sysdev/tsi108_pci.c4
-rw-r--r--arch/powerpc/sysdev/xive/common.c2
-rw-r--r--arch/powerpc/xmon/xmon.c2
-rw-r--r--arch/riscv/boot/dts/sophgo/cv18xx.dtsi2
-rw-r--r--arch/riscv/crypto/Kconfig23
-rw-r--r--arch/riscv/crypto/Makefile6
-rw-r--r--arch/riscv/crypto/chacha-riscv64-glue.c101
-rw-r--r--arch/riscv/crypto/ghash-riscv64-glue.c58
-rw-r--r--arch/riscv/crypto/sha256-riscv64-glue.c137
-rw-r--r--arch/riscv/crypto/sha512-riscv64-glue.c45
-rw-r--r--arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S4
-rw-r--r--arch/riscv/crypto/sm3-riscv64-glue.c47
-rw-r--r--arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S4
-rw-r--r--arch/riscv/include/asm/asm-prototypes.h2
-rw-r--r--arch/riscv/kernel/entry.S20
-rw-r--r--arch/riscv/kernel/process.c21
-rw-r--r--arch/riscv/lib/Makefile1
-rw-r--r--arch/riscv/lib/crypto/Kconfig16
-rw-r--r--arch/riscv/lib/crypto/Makefile7
-rw-r--r--arch/riscv/lib/crypto/chacha-riscv64-glue.c75
-rw-r--r--arch/riscv/lib/crypto/chacha-riscv64-zvkb.S (renamed from arch/riscv/crypto/chacha-riscv64-zvkb.S)71
-rw-r--r--arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S (renamed from arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S)8
-rw-r--r--arch/riscv/lib/crypto/sha256.c67
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/boot/ipl_parm.c7
-rw-r--r--arch/s390/boot/printk.c7
-rw-r--r--arch/s390/boot/startup.c17
-rw-r--r--arch/s390/boot/string.c12
-rw-r--r--arch/s390/configs/debug_defconfig5
-rw-r--r--arch/s390/configs/defconfig5
-rw-r--r--arch/s390/crypto/Kconfig33
-rw-r--r--arch/s390/crypto/Makefile4
-rw-r--r--arch/s390/crypto/chacha-glue.c124
-rw-r--r--arch/s390/crypto/ghash_s390.c104
-rw-r--r--arch/s390/crypto/hmac_s390.c174
-rw-r--r--arch/s390/crypto/paes_s390.c1815
-rw-r--r--arch/s390/crypto/sha.h22
-rw-r--r--arch/s390/crypto/sha1_s390.c20
-rw-r--r--arch/s390/crypto/sha256_s390.c143
-rw-r--r--arch/s390/crypto/sha3_256_s390.c58
-rw-r--r--arch/s390/crypto/sha3_512_s390.c65
-rw-r--r--arch/s390/crypto/sha512_s390.c62
-rw-r--r--arch/s390/crypto/sha_common.c84
-rw-r--r--arch/s390/hypfs/inode.c2
-rw-r--r--arch/s390/include/asm/asce.h36
-rw-r--r--arch/s390/include/asm/cpacf.h18
-rw-r--r--arch/s390/include/asm/cpufeature.h1
-rw-r--r--arch/s390/include/asm/diag288.h41
-rw-r--r--arch/s390/include/asm/futex.h6
-rw-r--r--arch/s390/include/asm/machine.h1
-rw-r--r--arch/s390/include/asm/mmu_context.h17
-rw-r--r--arch/s390/include/asm/pkey.h15
-rw-r--r--arch/s390/include/asm/ptrace.h47
-rw-r--r--arch/s390/include/asm/string.h20
-rw-r--r--arch/s390/include/asm/thread_info.h5
-rw-r--r--arch/s390/include/asm/uaccess.h12
-rw-r--r--arch/s390/include/asm/uv.h5
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/cert_store.c2
-rw-r--r--arch/s390/kernel/cpufeature.c5
-rw-r--r--arch/s390/kernel/crash_dump.c2
-rw-r--r--arch/s390/kernel/debug.c2
-rw-r--r--arch/s390/kernel/entry.S20
-rw-r--r--arch/s390/kernel/ipl.c27
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c2
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c4
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c5
-rw-r--r--arch/s390/kernel/processor.c16
-rw-r--r--arch/s390/kernel/ptrace.c33
-rw-r--r--arch/s390/kernel/smp.c2
-rw-r--r--arch/s390/kernel/uv.c47
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/lib/crc32.c (renamed from arch/s390/lib/crc32-glue.c)21
-rw-r--r--arch/s390/lib/crypto/Kconfig13
-rw-r--r--arch/s390/lib/crypto/Makefile6
-rw-r--r--arch/s390/lib/crypto/chacha-glue.c56
-rw-r--r--arch/s390/lib/crypto/chacha-s390.S (renamed from arch/s390/crypto/chacha-s390.S)0
-rw-r--r--arch/s390/lib/crypto/chacha-s390.h (renamed from arch/s390/crypto/chacha-s390.h)0
-rw-r--r--arch/s390/lib/crypto/sha256.c47
-rw-r--r--arch/s390/lib/string.c47
-rw-r--r--arch/s390/lib/uaccess.c5
-rw-r--r--arch/s390/mm/extmem.c18
-rw-r--r--arch/s390/mm/pgalloc.c17
-rw-r--r--arch/s390/pci/pci.c45
-rw-r--r--arch/s390/pci/pci_bus.h7
-rw-r--r--arch/s390/pci/pci_event.c22
-rw-r--r--arch/s390/pci/pci_mmio.c12
-rw-r--r--arch/sh/boards/mach-se/7343/irq.c7
-rw-r--r--arch/sh/boards/mach-se/7722/irq.c4
-rw-r--r--arch/sh/boards/mach-x3proto/gpio.c2
-rw-r--r--arch/sh/configs/migor_defconfig1
-rw-r--r--arch/sparc/configs/sparc64_defconfig2
-rw-r--r--arch/sparc/crypto/Kconfig10
-rw-r--r--arch/sparc/crypto/Makefile2
-rw-r--r--arch/sparc/crypto/aes_asm.S3
-rw-r--r--arch/sparc/crypto/aes_glue.c3
-rw-r--r--arch/sparc/crypto/camellia_asm.S3
-rw-r--r--arch/sparc/crypto/camellia_glue.c3
-rw-r--r--arch/sparc/crypto/des_asm.S3
-rw-r--r--arch/sparc/crypto/des_glue.c3
-rw-r--r--arch/sparc/crypto/md5_asm.S3
-rw-r--r--arch/sparc/crypto/md5_glue.c142
-rw-r--r--arch/sparc/crypto/sha1_asm.S3
-rw-r--r--arch/sparc/crypto/sha1_glue.c112
-rw-r--r--arch/sparc/crypto/sha256_glue.c210
-rw-r--r--arch/sparc/crypto/sha512_asm.S3
-rw-r--r--arch/sparc/crypto/sha512_glue.c105
-rw-r--r--arch/sparc/include/asm/opcodes.h (renamed from arch/sparc/crypto/opcodes.h)6
-rw-r--r--arch/sparc/kernel/Makefile1
-rw-r--r--arch/sparc/kernel/perf_event.c3
-rw-r--r--arch/sparc/kernel/setup.c46
-rw-r--r--arch/sparc/lib/Makefile3
-rw-r--r--arch/sparc/lib/crc32.c (renamed from arch/sparc/lib/crc32_glue.c)6
-rw-r--r--arch/sparc/lib/crc32c_asm.S3
-rw-r--r--arch/sparc/lib/crypto/Kconfig8
-rw-r--r--arch/sparc/lib/crypto/Makefile4
-rw-r--r--arch/sparc/lib/crypto/sha256.c64
-rw-r--r--arch/sparc/lib/crypto/sha256_asm.S (renamed from arch/sparc/crypto/sha256_asm.S)5
-rw-r--r--arch/um/Makefile1
-rw-r--r--arch/um/include/asm/fpu/api.h2
-rw-r--r--arch/um/kernel/um_arch.c2
-rw-r--r--arch/x86/Kconfig58
-rw-r--r--arch/x86/Kconfig.assembler14
-rw-r--r--arch/x86/Kconfig.cpu24
-rw-r--r--arch/x86/Kconfig.cpufeatures4
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/boot/bioscall.S4
-rw-r--r--arch/x86/boot/boot.h6
-rw-r--r--arch/x86/boot/compressed/Makefile10
-rw-r--r--arch/x86/boot/compressed/head_64.S1
-rw-r--r--arch/x86/boot/compressed/misc.c1
-rw-r--r--arch/x86/boot/compressed/misc.h8
-rw-r--r--arch/x86/boot/compressed/pgtable.h18
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c12
-rw-r--r--arch/x86/boot/compressed/sev-handle-vc.c134
-rw-r--r--arch/x86/boot/compressed/sev.c156
-rw-r--r--arch/x86/boot/compressed/sev.h21
-rw-r--r--arch/x86/boot/compressed/string.c8
-rw-r--r--arch/x86/boot/copy.S8
-rw-r--r--arch/x86/boot/header.S6
-rw-r--r--arch/x86/boot/startup/Makefile30
-rw-r--r--arch/x86/boot/startup/efi-mixed.S (renamed from drivers/firmware/efi/libstub/x86-mixed.S)0
-rw-r--r--arch/x86/boot/startup/gdt_idt.c71
-rw-r--r--arch/x86/boot/startup/la57toggle.S (renamed from arch/x86/boot/compressed/la57toggle.S)1
-rw-r--r--arch/x86/boot/startup/map_kernel.c217
-rw-r--r--arch/x86/boot/startup/sev-shared.c (renamed from arch/x86/coco/sev/shared.c)894
-rw-r--r--arch/x86/boot/startup/sev-startup.c368
-rw-r--r--arch/x86/boot/startup/sme.c (renamed from arch/x86/mm/mem_encrypt_identity.c)30
-rw-r--r--arch/x86/boot/string.c2
-rw-r--r--arch/x86/boot/video.c2
-rw-r--r--arch/x86/coco/core.c2
-rw-r--r--arch/x86/coco/sev/Makefile23
-rw-r--r--arch/x86/coco/sev/core.c2042
-rw-r--r--arch/x86/coco/sev/sev-nmi.c108
-rw-r--r--arch/x86/coco/sev/vc-handle.c1061
-rw-r--r--arch/x86/coco/sev/vc-shared.c504
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/crypto/Kconfig131
-rw-r--r--arch/x86/crypto/Makefile23
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c13
-rw-r--r--arch/x86/crypto/aes-ctr-avx-x86_64.S47
-rw-r--r--arch/x86/crypto/aes-xts-avx-x86_64.S206
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c174
-rw-r--r--arch/x86/crypto/aria_aesni_avx2_glue.c22
-rw-r--r--arch/x86/crypto/aria_aesni_avx_glue.c20
-rw-r--r--arch/x86/crypto/aria_gfni_avx512_glue.c22
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c21
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c21
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c21
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c20
-rw-r--r--arch/x86/crypto/chacha_glue.c311
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S5
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c289
-rw-r--r--arch/x86/crypto/poly1305_glue.c290
-rw-r--r--arch/x86/crypto/polyval-clmulni_glue.c72
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c21
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c21
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c21
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c89
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c467
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c75
-rw-r--r--arch/x86/crypto/sm3_avx_glue.c54
-rw-r--r--arch/x86/crypto/sm4_aesni_avx2_glue.c31
-rw-r--r--arch/x86/crypto/sm4_aesni_avx_glue.c31
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c21
-rw-r--r--arch/x86/entry/entry_64.S20
-rw-r--r--arch/x86/entry/vdso/vma.c35
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c2
-rw-r--r--arch/x86/events/amd/brs.c12
-rw-r--r--arch/x86/events/amd/core.c16
-rw-r--r--arch/x86/events/amd/ibs.c33
-rw-r--r--arch/x86/events/amd/iommu.c2
-rw-r--r--arch/x86/events/amd/lbr.c21
-rw-r--r--arch/x86/events/amd/power.c11
-rw-r--r--arch/x86/events/amd/uncore.c117
-rw-r--r--arch/x86/events/core.c91
-rw-r--r--arch/x86/events/intel/bts.c151
-rw-r--r--arch/x86/events/intel/core.c414
-rw-r--r--arch/x86/events/intel/cstate.c3
-rw-r--r--arch/x86/events/intel/ds.c77
-rw-r--r--arch/x86/events/intel/knc.c24
-rw-r--r--arch/x86/events/intel/lbr.c46
-rw-r--r--arch/x86/events/intel/p4.c28
-rw-r--r--arch/x86/events/intel/p6.c13
-rw-r--r--arch/x86/events/intel/pt.c37
-rw-r--r--arch/x86/events/intel/uncore.c15
-rw-r--r--arch/x86/events/intel/uncore_discovery.c11
-rw-r--r--arch/x86/events/intel/uncore_nhmex.c71
-rw-r--r--arch/x86/events/intel/uncore_snb.c43
-rw-r--r--arch/x86/events/intel/uncore_snbep.c51
-rw-r--r--arch/x86/events/msr.c4
-rw-r--r--arch/x86/events/perf_event.h72
-rw-r--r--arch/x86/events/perf_event_flags.h41
-rw-r--r--arch/x86/events/probe.c4
-rw-r--r--arch/x86/events/rapl.c9
-rw-r--r--arch/x86/events/utils.c1
-rw-r--r--arch/x86/events/zhaoxin/core.c20
-rw-r--r--arch/x86/hyperv/hv_apic.c11
-rw-r--r--arch/x86/hyperv/hv_init.c67
-rw-r--r--arch/x86/hyperv/hv_spinlock.c7
-rw-r--r--arch/x86/hyperv/hv_vtl.c5
-rw-r--r--arch/x86/hyperv/ivm.c5
-rw-r--r--arch/x86/include/asm/acrn.h2
-rw-r--r--arch/x86/include/asm/alternative.h43
-rw-r--r--arch/x86/include/asm/amd/fch.h13
-rw-r--r--arch/x86/include/asm/amd/hsmp.h (renamed from arch/x86/include/asm/amd_hsmp.h)2
-rw-r--r--arch/x86/include/asm/amd/ibs.h (renamed from arch/x86/include/asm/amd-ibs.h)5
-rw-r--r--arch/x86/include/asm/amd/nb.h (renamed from arch/x86/include/asm/amd_nb.h)2
-rw-r--r--arch/x86/include/asm/amd/node.h (renamed from arch/x86/include/asm/amd_node.h)0
-rw-r--r--arch/x86/include/asm/apic.h12
-rw-r--r--arch/x86/include/asm/arch_hweight.h6
-rw-r--r--arch/x86/include/asm/asm.h24
-rw-r--r--arch/x86/include/asm/bitops.h7
-rw-r--r--arch/x86/include/asm/boot.h10
-rw-r--r--arch/x86/include/asm/coco.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h32
-rw-r--r--arch/x86/include/asm/cpuid/api.h96
-rw-r--r--arch/x86/include/asm/cpuid/types.h95
-rw-r--r--arch/x86/include/asm/debugreg.h16
-rw-r--r--arch/x86/include/asm/elf.h4
-rw-r--r--arch/x86/include/asm/entry-common.h5
-rw-r--r--arch/x86/include/asm/fpu/api.h3
-rw-r--r--arch/x86/include/asm/fpu/sched.h38
-rw-r--r--arch/x86/include/asm/fpu/types.h24
-rw-r--r--arch/x86/include/asm/fpu/xstate.h3
-rw-r--r--arch/x86/include/asm/fred.h1
-rw-r--r--arch/x86/include/asm/fsgsbase.h4
-rw-r--r--arch/x86/include/asm/inat.h6
-rw-r--r--arch/x86/include/asm/io.h6
-rw-r--r--arch/x86/include/asm/kexec.h7
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/linkage.h10
-rw-r--r--arch/x86/include/asm/mem_encrypt.h2
-rw-r--r--arch/x86/include/asm/microcode.h4
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h15
-rw-r--r--arch/x86/include/asm/mshyperv.h3
-rw-r--r--arch/x86/include/asm/msr-index.h24
-rw-r--r--arch/x86/include/asm/msr.h214
-rw-r--r--arch/x86/include/asm/mwait.h66
-rw-r--r--arch/x86/include/asm/nmi.h49
-rw-r--r--arch/x86/include/asm/nospec-branch.h14
-rw-r--r--arch/x86/include/asm/page_32_types.h1
-rw-r--r--arch/x86/include/asm/page_64.h2
-rw-r--r--arch/x86/include/asm/page_64_types.h11
-rw-r--r--arch/x86/include/asm/page_types.h4
-rw-r--r--arch/x86/include/asm/paravirt.h63
-rw-r--r--arch/x86/include/asm/paravirt_types.h12
-rw-r--r--arch/x86/include/asm/percpu.h20
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h4
-rw-r--r--arch/x86/include/asm/pgtable.h10
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h26
-rw-r--r--arch/x86/include/asm/processor.h16
-rw-r--r--arch/x86/include/asm/resctrl.h21
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/sev-common.h2
-rw-r--r--arch/x86/include/asm/sev-internal.h105
-rw-r--r--arch/x86/include/asm/sev.h66
-rw-r--r--arch/x86/include/asm/simd.h6
-rw-r--r--arch/x86/include/asm/spec-ctrl.h2
-rw-r--r--arch/x86/include/asm/special_insns.h21
-rw-r--r--arch/x86/include/asm/string_32.h15
-rw-r--r--arch/x86/include/asm/suspend_32.h1
-rw-r--r--arch/x86/include/asm/suspend_64.h1
-rw-r--r--arch/x86/include/asm/switch_to.h4
-rw-r--r--arch/x86/include/asm/text-patching.h29
-rw-r--r--arch/x86/include/asm/trace/common.h12
-rw-r--r--arch/x86/include/asm/trace/fpu.h5
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h1
-rw-r--r--arch/x86/include/asm/tsc.h55
-rw-r--r--arch/x86/include/asm/uaccess_64.h6
-rw-r--r--arch/x86/include/asm/vdso.h8
-rw-r--r--arch/x86/include/asm/vdso/processor.h8
-rw-r--r--arch/x86/include/asm/x86_init.h1
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h2
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/cppc.c10
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/acpi/sleep.c1
-rw-r--r--arch/x86/kernel/alternative.c783
-rw-r--r--arch/x86/kernel/amd_gart_64.c2
-rw-r--r--arch/x86/kernel/amd_nb.c11
-rw-r--r--arch/x86/kernel/amd_node.c2
-rw-r--r--arch/x86/kernel/aperture_64.c2
-rw-r--r--arch/x86/kernel/apic/apic.c17
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c7
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c8
-rw-r--r--arch/x86/kernel/asm-offsets_32.c9
-rw-r--r--arch/x86/kernel/callthunks.c6
-rw-r--r--arch/x86/kernel/cet.c3
-rw-r--r--arch/x86/kernel/cpu/Makefile5
-rw-r--r--arch/x86/kernel/cpu/amd.c92
-rw-r--r--arch/x86/kernel/cpu/amd_cache_disable.c301
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c29
-rw-r--r--arch/x86/kernel/cpu/bugs.c1417
-rw-r--r--arch/x86/kernel/cpu/bus_lock.c19
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c1059
-rw-r--r--arch/x86/kernel/cpu/common.c227
-rw-r--r--arch/x86/kernel/cpu/cpu.h9
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c5
-rw-r--r--arch/x86/kernel/cpu/cpuid_0x2_table.c128
-rw-r--r--arch/x86/kernel/cpu/feat_ctl.c5
-rw-r--r--arch/x86/kernel/cpu/hygon.c7
-rw-r--r--arch/x86/kernel/cpu/intel.c134
-rw-r--r--arch/x86/kernel/cpu/intel_epb.c12
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c22
-rw-r--r--arch/x86/kernel/cpu/mce/core.c66
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c35
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c32
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h2
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c14
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/intel-ucode-defs.h150
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c13
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/resctrl/Makefile2
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c42
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c635
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h397
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c921
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c1119
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock_trace.h (renamed from arch/x86/kernel/cpu/resctrl/trace.h)26
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c4173
-rw-r--r--arch/x86/kernel/cpu/scattered.c3
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c3
-rw-r--r--arch/x86/kernel/cpu/topology.c3
-rw-r--r--arch/x86/kernel/cpu/topology_amd.c7
-rw-r--r--arch/x86/kernel/cpu/tsx.c21
-rw-r--r--arch/x86/kernel/cpu/umwait.c6
-rw-r--r--arch/x86/kernel/cpu/zhaoxin.c1
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/early_printk.c9
-rw-r--r--arch/x86/kernel/fpu/context.h4
-rw-r--r--arch/x86/kernel/fpu/core.c128
-rw-r--r--arch/x86/kernel/fpu/init.c21
-rw-r--r--arch/x86/kernel/fpu/regset.c22
-rw-r--r--arch/x86/kernel/fpu/signal.c29
-rw-r--r--arch/x86/kernel/fpu/xstate.c177
-rw-r--r--arch/x86/kernel/fpu/xstate.h31
-rw-r--r--arch/x86/kernel/fred.c21
-rw-r--r--arch/x86/kernel/ftrace.c20
-rw-r--r--arch/x86/kernel/head64.c284
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/head_64.S10
-rw-r--r--arch/x86/kernel/hpet.c5
-rw-r--r--arch/x86/kernel/jailhouse.c2
-rw-r--r--arch/x86/kernel/jump_label.c6
-rw-r--r--arch/x86/kernel/kprobes/core.c4
-rw-r--r--arch/x86/kernel/kprobes/opt.c6
-rw-r--r--arch/x86/kernel/kvm.c33
-rw-r--r--arch/x86/kernel/kvmclock.c6
-rw-r--r--arch/x86/kernel/machine_kexec_64.c48
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c8
-rw-r--r--arch/x86/kernel/module.c8
-rw-r--r--arch/x86/kernel/nmi.c87
-rw-r--r--arch/x86/kernel/nmi_selftest.c52
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/process.c44
-rw-r--r--arch/x86/kernel/process_32.c7
-rw-r--r--arch/x86/kernel/process_64.c28
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c2
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S6
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S250
-rw-r--r--arch/x86/kernel/setup.c36
-rw-r--r--arch/x86/kernel/shstk.c18
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/smpboot.c8
-rw-r--r--arch/x86/kernel/static_call.c6
-rw-r--r--arch/x86/kernel/trace_clock.c2
-rw-r--r--arch/x86/kernel/tracepoint.c21
-rw-r--r--arch/x86/kernel/traps.c19
-rw-r--r--arch/x86/kernel/tsc.c5
-rw-r--r--arch/x86/kernel/tsc_sync.c15
-rw-r--r--arch/x86/kernel/uprobes.c5
-rw-r--r--arch/x86/kernel/vmlinux.lds.S14
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/svm/avic.c3
-rw-r--r--arch/x86/kvm/svm/sev.c15
-rw-r--r--arch/x86/kvm/svm/svm.c51
-rw-r--r--arch/x86/kvm/vmx/nested.c5
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c5
-rw-r--r--arch/x86/kvm/vmx/sgx.c9
-rw-r--r--arch/x86/kvm/vmx/vmx.c79
-rw-r--r--arch/x86/kvm/x86.c42
-rw-r--r--arch/x86/lib/Makefile8
-rw-r--r--arch/x86/lib/crc-t10dif.c (renamed from arch/x86/lib/crc-t10dif-glue.c)4
-rw-r--r--arch/x86/lib/crc32.c (renamed from arch/x86/lib/crc32-glue.c)6
-rw-r--r--arch/x86/lib/crc64.c (renamed from arch/x86/lib/crc64-glue.c)4
-rw-r--r--arch/x86/lib/crypto/.gitignore2
-rw-r--r--arch/x86/lib/crypto/Kconfig34
-rw-r--r--arch/x86/lib/crypto/Makefile20
-rw-r--r--arch/x86/lib/crypto/blake2s-core.S (renamed from arch/x86/crypto/blake2s-core.S)4
-rw-r--r--arch/x86/lib/crypto/blake2s-glue.c (renamed from arch/x86/crypto/blake2s-glue.c)18
-rw-r--r--arch/x86/lib/crypto/chacha-avx2-x86_64.S (renamed from arch/x86/crypto/chacha-avx2-x86_64.S)0
-rw-r--r--arch/x86/lib/crypto/chacha-avx512vl-x86_64.S (renamed from arch/x86/crypto/chacha-avx512vl-x86_64.S)0
-rw-r--r--arch/x86/lib/crypto/chacha-ssse3-x86_64.S (renamed from arch/x86/crypto/chacha-ssse3-x86_64.S)0
-rw-r--r--arch/x86/lib/crypto/chacha_glue.c196
-rw-r--r--arch/x86/lib/crypto/poly1305-x86_64-cryptogams.pl (renamed from arch/x86/crypto/poly1305-x86_64-cryptogams.pl)41
-rw-r--r--arch/x86/lib/crypto/poly1305_glue.c129
-rw-r--r--arch/x86/lib/crypto/sha256-avx-asm.S (renamed from arch/x86/crypto/sha256-avx-asm.S)12
-rw-r--r--arch/x86/lib/crypto/sha256-avx2-asm.S (renamed from arch/x86/crypto/sha256-avx2-asm.S)12
-rw-r--r--arch/x86/lib/crypto/sha256-ni-asm.S (renamed from arch/x86/crypto/sha256_ni_asm.S)36
-rw-r--r--arch/x86/lib/crypto/sha256-ssse3-asm.S (renamed from arch/x86/crypto/sha256-ssse3-asm.S)14
-rw-r--r--arch/x86/lib/crypto/sha256.c80
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/lib/insn-eval.c20
-rw-r--r--arch/x86/lib/insn.c7
-rw-r--r--arch/x86/lib/iomem.c2
-rw-r--r--arch/x86/lib/kaslr.c2
-rw-r--r--arch/x86/lib/memcpy_64.S1
-rw-r--r--arch/x86/lib/memset_64.S1
-rw-r--r--arch/x86/lib/msr-smp.c16
-rw-r--r--arch/x86/lib/msr.c12
-rw-r--r--arch/x86/lib/retpoline.S50
-rw-r--r--arch/x86/lib/string_32.c17
-rw-r--r--arch/x86/lib/strstr_32.c6
-rw-r--r--arch/x86/lib/usercopy_32.c18
-rw-r--r--arch/x86/lib/x86-opcode-map.txt56
-rw-r--r--arch/x86/math-emu/fpu_aux.c2
-rw-r--r--arch/x86/math-emu/fpu_entry.c4
-rw-r--r--arch/x86/math-emu/fpu_system.h2
-rw-r--r--arch/x86/mm/Makefile10
-rw-r--r--arch/x86/mm/amdtopology.c2
-rw-r--r--arch/x86/mm/extable.c2
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/init.c23
-rw-r--r--arch/x86/mm/init_32.c12
-rw-r--r--arch/x86/mm/init_64.c19
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c2
-rw-r--r--arch/x86/mm/mm_internal.h4
-rw-r--r--arch/x86/mm/numa.c5
-rw-r--r--arch/x86/mm/numa_32.c61
-rw-r--r--arch/x86/mm/numa_64.c13
-rw-r--r--arch/x86/mm/numa_internal.h10
-rw-r--r--arch/x86/mm/pat/memtype.c36
-rw-r--r--arch/x86/mm/pat/set_memory.c4
-rw-r--r--arch/x86/mm/pgtable.c156
-rw-r--r--arch/x86/mm/pti.c4
-rw-r--r--arch/x86/mm/tlb.c148
-rw-r--r--arch/x86/net/bpf_jit_comp.c60
-rw-r--r--arch/x86/pci/amd_bus.c12
-rw-r--r--arch/x86/pci/fixup.c6
-rw-r--r--arch/x86/pci/mmconfig-shared.c3
-rw-r--r--arch/x86/platform/efi/efi_64.c8
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-rtc.c6
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c2
-rw-r--r--arch/x86/platform/pvh/head.S3
-rw-r--r--arch/x86/power/cpu.c27
-rw-r--r--arch/x86/power/hibernate.c6
-rw-r--r--arch/x86/power/hibernate_asm_32.S3
-rw-r--r--arch/x86/power/hibernate_asm_64.S3
-rw-r--r--arch/x86/realmode/init.c3
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk7
-rw-r--r--arch/x86/virt/svm/sev.c23
-rw-r--r--arch/x86/xen/enlighten_pv.c63
-rw-r--r--arch/x86/xen/mmu_pv.c4
-rw-r--r--arch/x86/xen/pmu.c73
-rw-r--r--arch/x86/xen/suspend.c7
-rw-r--r--arch/x86/xen/xen-ops.h5
-rw-r--r--arch/xtensa/configs/cadence_csp_defconfig1
-rw-r--r--arch/xtensa/kernel/perf_event.c3
-rw-r--r--block/Kconfig8
-rw-r--r--block/Makefile5
-rw-r--r--block/bdev.c3
-rw-r--r--block/bfq-iosched.c6
-rw-r--r--block/bio-integrity-auto.c62
-rw-r--r--block/bio-integrity.c4
-rw-r--r--block/bio.c160
-rw-r--r--block/blk-cgroup.c10
-rw-r--r--block/blk-core.c2
-rw-r--r--block/blk-crypto-fallback.c1
-rw-r--r--block/blk-map.c93
-rw-r--r--block/blk-merge.c137
-rw-r--r--block/blk-mq-debugfs.c13
-rw-r--r--block/blk-mq-dma.c116
-rw-r--r--block/blk-mq-sched.c53
-rw-r--r--block/blk-mq.c309
-rw-r--r--block/blk-mq.h7
-rw-r--r--block/blk-rq-qos.c4
-rw-r--r--block/blk-rq-qos.h21
-rw-r--r--block/blk-settings.c5
-rw-r--r--block/blk-sysfs.c34
-rw-r--r--block/blk-throttle.c411
-rw-r--r--block/blk-throttle.h36
-rw-r--r--block/blk-wbt.c11
-rw-r--r--block/blk.h50
-rw-r--r--block/bounce.c267
-rw-r--r--block/elevator.c329
-rw-r--r--block/elevator.h6
-rw-r--r--block/fops.c28
-rw-r--r--block/genhd.c266
-rw-r--r--block/mq-deadline.c2
-rw-r--r--crypto/842.c6
-rw-r--r--crypto/Kconfig82
-rw-r--r--crypto/Makefile22
-rw-r--r--crypto/acompress.c410
-rw-r--r--crypto/adiantum.c2
-rw-r--r--crypto/aead.c1
-rw-r--r--crypto/aegis128-core.c2
-rw-r--r--crypto/aes_generic.c2
-rw-r--r--crypto/ahash.c783
-rw-r--r--crypto/akcipher.c1
-rw-r--r--crypto/algapi.c82
-rw-r--r--crypto/algboss.c10
-rw-r--r--crypto/algif_aead.c101
-rw-r--r--crypto/algif_hash.c4
-rw-r--r--crypto/ansi_cprng.c2
-rw-r--r--crypto/anubis.c2
-rw-r--r--crypto/api.c37
-rw-r--r--crypto/arc4.c2
-rw-r--r--crypto/aria_generic.c2
-rw-r--r--crypto/asymmetric_keys/public_key.c36
-rw-r--r--crypto/asymmetric_keys/x509_cert_parser.c3
-rw-r--r--crypto/authenc.c34
-rw-r--r--crypto/authencesn.c40
-rw-r--r--crypto/blake2b_generic.c33
-rw-r--r--crypto/blowfish_generic.c2
-rw-r--r--crypto/camellia_generic.c2
-rw-r--r--crypto/cast5_generic.c2
-rw-r--r--crypto/cast6_generic.c2
-rw-r--r--crypto/cbc.c2
-rw-r--r--crypto/ccm.c65
-rw-r--r--crypto/chacha.c260
-rw-r--r--crypto/chacha20poly1305.c321
-rw-r--r--crypto/chacha_generic.c139
-rw-r--r--crypto/cmac.c94
-rw-r--r--crypto/crc32.c (renamed from crypto/crc32_generic.c)2
-rw-r--r--crypto/crc32c.c (renamed from crypto/crc32c_generic.c)2
-rw-r--r--crypto/cryptd.c2
-rw-r--r--crypto/crypto_engine.c31
-rw-r--r--crypto/crypto_null.c72
-rw-r--r--crypto/ctr.c2
-rw-r--r--crypto/cts.c2
-rw-r--r--crypto/curve25519-generic.c2
-rw-r--r--crypto/deflate.c355
-rw-r--r--crypto/des_generic.c2
-rw-r--r--crypto/dh.c2
-rw-r--r--crypto/drbg.c2
-rw-r--r--crypto/ecb.c2
-rw-r--r--crypto/ecdh.c2
-rw-r--r--crypto/ecdsa-p1363.c6
-rw-r--r--crypto/ecdsa-x962.c5
-rw-r--r--crypto/ecdsa.c4
-rw-r--r--crypto/echainiv.c20
-rw-r--r--crypto/ecrdsa.c2
-rw-r--r--crypto/essiv.c5
-rw-r--r--crypto/fcrypt.c2
-rw-r--r--crypto/fips.c2
-rw-r--r--crypto/gcm.c43
-rw-r--r--crypto/geniv.c13
-rw-r--r--crypto/ghash-generic.c58
-rw-r--r--crypto/hctr2.c2
-rw-r--r--crypto/hkdf.c2
-rw-r--r--crypto/hmac.c398
-rw-r--r--crypto/internal.h9
-rw-r--r--crypto/kdf_sp800108.c2
-rw-r--r--crypto/khazad.c2
-rw-r--r--crypto/kpp.c1
-rw-r--r--crypto/krb5enc.c2
-rw-r--r--crypto/lrw.c6
-rw-r--r--crypto/lskcipher.c1
-rw-r--r--crypto/lz4.c6
-rw-r--r--crypto/lz4hc.c6
-rw-r--r--crypto/lzo-rle.c6
-rw-r--r--crypto/lzo.c6
-rw-r--r--crypto/md4.c2
-rw-r--r--crypto/md5.c104
-rw-r--r--crypto/michael_mic.c2
-rw-r--r--crypto/nhpoly1305.c2
-rw-r--r--crypto/pcbc.c2
-rw-r--r--crypto/pcrypt.c2
-rw-r--r--crypto/poly1305_generic.c149
-rw-r--r--crypto/polyval-generic.c118
-rw-r--r--crypto/rmd160.c90
-rw-r--r--crypto/rng.c1
-rw-r--r--crypto/rsa.c2
-rw-r--r--crypto/rsassa-pkcs1.c2
-rw-r--r--crypto/scatterwalk.c274
-rw-r--r--crypto/scompress.c243
-rw-r--r--crypto/seed.c2
-rw-r--r--crypto/seqiv.c19
-rw-r--r--crypto/serpent_generic.c2
-rw-r--r--crypto/sha1_generic.c35
-rw-r--r--crypto/sha256.c283
-rw-r--r--crypto/sha256_generic.c110
-rw-r--r--crypto/sha3_generic.c101
-rw-r--r--crypto/sha512_generic.c52
-rw-r--r--crypto/shash.c276
-rw-r--r--crypto/sig.c10
-rw-r--r--crypto/skcipher.c262
-rw-r--r--crypto/sm3_generic.c33
-rw-r--r--crypto/sm4_generic.c2
-rw-r--r--crypto/streebog_generic.c73
-rw-r--r--crypto/tcrypt.c239
-rw-r--r--crypto/tcrypt.h4
-rw-r--r--crypto/tea.c2
-rw-r--r--crypto/testmgr.c160
-rw-r--r--crypto/testmgr.h288
-rw-r--r--crypto/twofish_generic.c2
-rw-r--r--crypto/wp512.c2
-rw-r--r--crypto/xcbc.c94
-rw-r--r--crypto/xctr.c2
-rw-r--r--crypto/xts.c6
-rw-r--r--crypto/xxhash_generic.c2
-rw-r--r--crypto/zstd.c2
-rw-r--r--drivers/accel/habanalabs/Kconfig2
-rw-r--r--drivers/accel/habanalabs/common/habanalabs_ioctl.c2
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c2
-rw-r--r--drivers/acpi/Kconfig3
-rw-r--r--drivers/acpi/Makefile1
-rw-r--r--drivers/acpi/acpi_extlog.c3
-rw-r--r--drivers/acpi/acpi_lpit.c2
-rw-r--r--drivers/acpi/acpi_mrrm.c183
-rw-r--r--drivers/acpi/acpi_pad.c2
-rw-r--r--drivers/acpi/acpi_pcc.c13
-rw-r--r--drivers/acpi/acpica/acapps.h4
-rw-r--r--drivers/acpi/acpica/accommon.h2
-rw-r--r--drivers/acpi/acpica/acconvert.h2
-rw-r--r--drivers/acpi/acpica/acdebug.h4
-rw-r--r--drivers/acpi/acpica/acdispat.h2
-rw-r--r--drivers/acpi/acpica/acevents.h2
-rw-r--r--drivers/acpi/acpica/acglobal.h2
-rw-r--r--drivers/acpi/acpica/achware.h2
-rw-r--r--drivers/acpi/acpica/acinterp.h5
-rw-r--r--drivers/acpi/acpica/aclocal.h6
-rw-r--r--drivers/acpi/acpica/acmacros.h2
-rw-r--r--drivers/acpi/acpica/acnamesp.h2
-rw-r--r--drivers/acpi/acpica/acobject.h2
-rw-r--r--drivers/acpi/acpica/acopcode.h2
-rw-r--r--drivers/acpi/acpica/acparser.h2
-rw-r--r--drivers/acpi/acpica/acpredef.h2
-rw-r--r--drivers/acpi/acpica/acresrc.h2
-rw-r--r--drivers/acpi/acpica/acstruct.h2
-rw-r--r--drivers/acpi/acpica/actables.h2
-rw-r--r--drivers/acpi/acpica/acutils.h2
-rw-r--r--drivers/acpi/acpica/amlcode.h2
-rw-r--r--drivers/acpi/acpica/amlresrc.h10
-rw-r--r--drivers/acpi/acpica/dbhistry.c2
-rw-r--r--drivers/acpi/acpica/dsargs.c2
-rw-r--r--drivers/acpi/acpica/dscontrol.c2
-rw-r--r--drivers/acpi/acpica/dsdebug.c2
-rw-r--r--drivers/acpi/acpica/dsfield.c2
-rw-r--r--drivers/acpi/acpica/dsinit.c2
-rw-r--r--drivers/acpi/acpica/dsmethod.c2
-rw-r--r--drivers/acpi/acpica/dsmthdat.c1
-rw-r--r--drivers/acpi/acpica/dsobject.c2
-rw-r--r--drivers/acpi/acpica/dsopcode.c2
-rw-r--r--drivers/acpi/acpica/dspkginit.c2
-rw-r--r--drivers/acpi/acpica/dsutils.c9
-rw-r--r--drivers/acpi/acpica/dswexec.c2
-rw-r--r--drivers/acpi/acpica/dswload.c2
-rw-r--r--drivers/acpi/acpica/dswload2.c2
-rw-r--r--drivers/acpi/acpica/dswscope.c2
-rw-r--r--drivers/acpi/acpica/dswstate.c2
-rw-r--r--drivers/acpi/acpica/evevent.c2
-rw-r--r--drivers/acpi/acpica/evglock.c2
-rw-r--r--drivers/acpi/acpica/evgpe.c2
-rw-r--r--drivers/acpi/acpica/evgpeblk.c2
-rw-r--r--drivers/acpi/acpica/evgpeinit.c2
-rw-r--r--drivers/acpi/acpica/evgpeutil.c2
-rw-r--r--drivers/acpi/acpica/evhandler.c2
-rw-r--r--drivers/acpi/acpica/evmisc.c2
-rw-r--r--drivers/acpi/acpica/evregion.c2
-rw-r--r--drivers/acpi/acpica/evrgnini.c2
-rw-r--r--drivers/acpi/acpica/evxface.c2
-rw-r--r--drivers/acpi/acpica/evxfevnt.c2
-rw-r--r--drivers/acpi/acpica/evxfgpe.c2
-rw-r--r--drivers/acpi/acpica/evxfregn.c2
-rw-r--r--drivers/acpi/acpica/exconcat.c2
-rw-r--r--drivers/acpi/acpica/exconfig.c2
-rw-r--r--drivers/acpi/acpica/exconvrt.c6
-rw-r--r--drivers/acpi/acpica/excreate.c2
-rw-r--r--drivers/acpi/acpica/exdebug.c2
-rw-r--r--drivers/acpi/acpica/exdump.c2
-rw-r--r--drivers/acpi/acpica/exfield.c2
-rw-r--r--drivers/acpi/acpica/exfldio.c2
-rw-r--r--drivers/acpi/acpica/exmisc.c2
-rw-r--r--drivers/acpi/acpica/exmutex.c2
-rw-r--r--drivers/acpi/acpica/exnames.c2
-rw-r--r--drivers/acpi/acpica/exoparg1.c2
-rw-r--r--drivers/acpi/acpica/exoparg2.c2
-rw-r--r--drivers/acpi/acpica/exoparg3.c2
-rw-r--r--drivers/acpi/acpica/exoparg6.c2
-rw-r--r--drivers/acpi/acpica/exprep.c2
-rw-r--r--drivers/acpi/acpica/exregion.c2
-rw-r--r--drivers/acpi/acpica/exresnte.c2
-rw-r--r--drivers/acpi/acpica/exresolv.c2
-rw-r--r--drivers/acpi/acpica/exresop.c2
-rw-r--r--drivers/acpi/acpica/exserial.c8
-rw-r--r--drivers/acpi/acpica/exstore.c2
-rw-r--r--drivers/acpi/acpica/exstoren.c2
-rw-r--r--drivers/acpi/acpica/exstorob.c2
-rw-r--r--drivers/acpi/acpica/exsystem.c2
-rw-r--r--drivers/acpi/acpica/extrace.c53
-rw-r--r--drivers/acpi/acpica/exutils.c2
-rw-r--r--drivers/acpi/acpica/hwacpi.c2
-rw-r--r--drivers/acpi/acpica/hwesleep.c2
-rw-r--r--drivers/acpi/acpica/hwgpe.c2
-rw-r--r--drivers/acpi/acpica/hwsleep.c2
-rw-r--r--drivers/acpi/acpica/hwtimer.c2
-rw-r--r--drivers/acpi/acpica/hwvalid.c2
-rw-r--r--drivers/acpi/acpica/hwxface.c2
-rw-r--r--drivers/acpi/acpica/hwxfsleep.c2
-rw-r--r--drivers/acpi/acpica/nsarguments.c2
-rw-r--r--drivers/acpi/acpica/nsconvert.c2
-rw-r--r--drivers/acpi/acpica/nsdump.c2
-rw-r--r--drivers/acpi/acpica/nsdumpdv.c2
-rw-r--r--drivers/acpi/acpica/nsinit.c2
-rw-r--r--drivers/acpi/acpica/nsload.c2
-rw-r--r--drivers/acpi/acpica/nsnames.c2
-rw-r--r--drivers/acpi/acpica/nsparse.c2
-rw-r--r--drivers/acpi/acpica/nspredef.c2
-rw-r--r--drivers/acpi/acpica/nsprepkg.c2
-rw-r--r--drivers/acpi/acpica/nsrepair.c2
-rw-r--r--drivers/acpi/acpica/nsrepair2.c4
-rw-r--r--drivers/acpi/acpica/nsutils.c2
-rw-r--r--drivers/acpi/acpica/nswalk.c2
-rw-r--r--drivers/acpi/acpica/nsxfname.c2
-rw-r--r--drivers/acpi/acpica/psargs.c2
-rw-r--r--drivers/acpi/acpica/psloop.c2
-rw-r--r--drivers/acpi/acpica/psobject.c54
-rw-r--r--drivers/acpi/acpica/psopcode.c2
-rw-r--r--drivers/acpi/acpica/psopinfo.c2
-rw-r--r--drivers/acpi/acpica/psparse.c2
-rw-r--r--drivers/acpi/acpica/psscope.c2
-rw-r--r--drivers/acpi/acpica/pstree.c2
-rw-r--r--drivers/acpi/acpica/psutils.c2
-rw-r--r--drivers/acpi/acpica/pswalk.c2
-rw-r--r--drivers/acpi/acpica/psxface.c2
-rw-r--r--drivers/acpi/acpica/rsaddr.c13
-rw-r--r--drivers/acpi/acpica/rscalc.c22
-rw-r--r--drivers/acpi/acpica/rslist.c12
-rw-r--r--drivers/acpi/acpica/tbdata.c2
-rw-r--r--drivers/acpi/acpica/tbfadt.c2
-rw-r--r--drivers/acpi/acpica/tbfind.c6
-rw-r--r--drivers/acpi/acpica/tbinstal.c2
-rw-r--r--drivers/acpi/acpica/tbprint.c2
-rw-r--r--drivers/acpi/acpica/tbutils.c2
-rw-r--r--drivers/acpi/acpica/tbxface.c2
-rw-r--r--drivers/acpi/acpica/tbxfload.c2
-rw-r--r--drivers/acpi/acpica/tbxfroot.c2
-rw-r--r--drivers/acpi/acpica/utaddress.c2
-rw-r--r--drivers/acpi/acpica/utalloc.c2
-rw-r--r--drivers/acpi/acpica/utascii.c2
-rw-r--r--drivers/acpi/acpica/utbuffer.c2
-rw-r--r--drivers/acpi/acpica/utcache.c4
-rw-r--r--drivers/acpi/acpica/utcksum.c2
-rw-r--r--drivers/acpi/acpica/utcopy.c2
-rw-r--r--drivers/acpi/acpica/utdebug.c2
-rw-r--r--drivers/acpi/acpica/utdecode.c2
-rw-r--r--drivers/acpi/acpica/utdelete.c2
-rw-r--r--drivers/acpi/acpica/uteval.c2
-rw-r--r--drivers/acpi/acpica/utglobal.c2
-rw-r--r--drivers/acpi/acpica/uthex.c2
-rw-r--r--drivers/acpi/acpica/utids.c2
-rw-r--r--drivers/acpi/acpica/utinit.c2
-rw-r--r--drivers/acpi/acpica/utlock.c2
-rw-r--r--drivers/acpi/acpica/utnonansi.c2
-rw-r--r--drivers/acpi/acpica/utobject.c2
-rw-r--r--drivers/acpi/acpica/utosi.c2
-rw-r--r--drivers/acpi/acpica/utpredef.c2
-rw-r--r--drivers/acpi/acpica/utprint.c9
-rw-r--r--drivers/acpi/acpica/utresrc.c14
-rw-r--r--drivers/acpi/acpica/uttrack.c2
-rw-r--r--drivers/acpi/acpica/utuuid.c2
-rw-r--r--drivers/acpi/acpica/utxface.c2
-rw-r--r--drivers/acpi/acpica/utxfinit.c2
-rw-r--r--drivers/acpi/apei/einj-core.c62
-rw-r--r--drivers/acpi/battery.c23
-rw-r--r--drivers/acpi/bus.c6
-rw-r--r--drivers/acpi/cppc_acpi.c329
-rw-r--r--drivers/acpi/ec.c6
-rw-r--r--drivers/acpi/osi.c1
-rw-r--r--drivers/acpi/pci_root.c2
-rw-r--r--drivers/acpi/platform_profile.c3
-rw-r--r--drivers/acpi/pptt.c11
-rw-r--r--drivers/acpi/processor_idle.c14
-rw-r--r--drivers/acpi/processor_perflib.c1
-rw-r--r--drivers/acpi/processor_throttling.c5
-rw-r--r--drivers/acpi/resource.c2
-rw-r--r--drivers/acpi/tables.c8
-rw-r--r--drivers/acpi/thermal.c10
-rw-r--r--drivers/acpi/viot.c2
-rw-r--r--drivers/android/binderfs.c4
-rw-r--r--drivers/base/arch_topology.c52
-rw-r--r--drivers/base/cpu.c6
-rw-r--r--drivers/base/devres.c11
-rw-r--r--drivers/base/node.c2
-rw-r--r--drivers/base/platform-msi.c1
-rw-r--r--drivers/base/power/main.c218
-rw-r--r--drivers/base/power/runtime.c46
-rw-r--r--drivers/base/power/sysfs.c15
-rw-r--r--drivers/base/power/wakeup.c2
-rw-r--r--drivers/base/power/wakeup_stats.c2
-rw-r--r--drivers/base/regmap/Kconfig4
-rw-r--r--drivers/base/regmap/regcache.c13
-rw-r--r--drivers/base/regmap/regmap-irq.c103
-rw-r--r--drivers/base/topology.c52
-rw-r--r--drivers/block/Kconfig19
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/brd.c225
-rw-r--r--drivers/block/loop.c3
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/rnbd/rnbd-srv.c7
-rw-r--r--drivers/block/ublk_drv.c571
-rw-r--r--drivers/block/virtio_blk.c4
-rw-r--r--drivers/block/zloop.c1385
-rw-r--r--drivers/bluetooth/btusb.c98
-rw-r--r--drivers/bus/moxtet.c6
-rw-r--r--drivers/cdrom/cdrom.c3
-rw-r--r--drivers/char/agp/amd64-agp.c2
-rw-r--r--drivers/char/agp/nvidia-agp.c1
-rw-r--r--drivers/char/hw_random/atmel-rng.c11
-rw-r--r--drivers/char/hw_random/mtk-rng.c9
-rw-r--r--drivers/char/hw_random/npcm-rng.c9
-rw-r--r--drivers/char/hw_random/rockchip-rng.c73
-rw-r--r--drivers/char/mem.c18
-rw-r--r--drivers/char/random.c56
-rw-r--r--drivers/char/tpm/Kconfig10
-rw-r--r--drivers/char/tpm/Makefile1
-rw-r--r--drivers/char/tpm/eventlog/tpm1.c7
-rw-r--r--drivers/char/tpm/tpm-buf.c6
-rw-r--r--drivers/char/tpm/tpm2-sessions.c20
-rw-r--r--drivers/char/tpm/tpm_crb_ffa.c74
-rw-r--r--drivers/char/tpm/tpm_svsm.c125
-rw-r--r--drivers/char/tpm/tpm_tis_core.h2
-rw-r--r--drivers/clk/clk-s2mps11.c3
-rw-r--r--drivers/clk/rockchip/clk-rk3576.c2
-rw-r--r--drivers/clk/sunxi-ng/ccu-sun20i-d1.c44
-rw-r--r--drivers/clk/sunxi-ng/ccu_mp.h25
-rw-r--r--drivers/clocksource/Kconfig20
-rw-r--r--drivers/clocksource/Makefile2
-rw-r--r--drivers/clocksource/renesas-ostm.c4
-rw-r--r--drivers/clocksource/timer-econet-en751221.c216
-rw-r--r--drivers/clocksource/timer-nxp-stm.c495
-rw-r--r--drivers/clocksource/timer-tegra186.c100
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c8
-rw-r--r--drivers/cpufreq/amd-pstate-ut.c29
-rw-r--r--drivers/cpufreq/amd-pstate.c147
-rw-r--r--drivers/cpufreq/amd-pstate.h3
-rw-r--r--drivers/cpufreq/amd_freq_sensitivity.c2
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c109
-rw-r--r--drivers/cpufreq/cpufreq.c463
-rw-r--r--drivers/cpufreq/e_powersaver.c6
-rw-r--r--drivers/cpufreq/elanfreq.c1
-rw-r--r--drivers/cpufreq/intel_pstate.c289
-rw-r--r--drivers/cpufreq/longhaul.c24
-rw-r--r--drivers/cpufreq/powernow-k7.c14
-rw-r--r--drivers/cpufreq/sc520_freq.c1
-rw-r--r--drivers/cpuidle/cpuidle-psci-domain.c2
-rw-r--r--drivers/cpuidle/cpuidle-psci.c83
-rw-r--r--drivers/cpuidle/cpuidle-psci.h4
-rw-r--r--drivers/cpuidle/governors/menu.c2
-rw-r--r--drivers/cpuidle/governors/teo.c4
-rw-r--r--drivers/crypto/Kconfig7
-rw-r--r--drivers/crypto/Makefile4
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c56
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c17
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c177
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h2
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c2
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c49
-rw-r--r--drivers/crypto/amcc/crypto4xx_alg.c110
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.c45
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.h17
-rw-r--r--drivers/crypto/atmel-aes.c5
-rw-r--r--drivers/crypto/atmel-sha.c6
-rw-r--r--drivers/crypto/atmel-tdes.c2
-rw-r--r--drivers/crypto/caam/ctrl.c1
-rw-r--r--drivers/crypto/cavium/Makefile3
-rw-r--r--drivers/crypto/cavium/zip/Makefile12
-rw-r--r--drivers/crypto/cavium/zip/common.h222
-rw-r--r--drivers/crypto/cavium/zip/zip_crypto.c261
-rw-r--r--drivers/crypto/cavium/zip/zip_crypto.h68
-rw-r--r--drivers/crypto/cavium/zip/zip_deflate.c200
-rw-r--r--drivers/crypto/cavium/zip/zip_deflate.h62
-rw-r--r--drivers/crypto/cavium/zip/zip_device.c202
-rw-r--r--drivers/crypto/cavium/zip/zip_device.h108
-rw-r--r--drivers/crypto/cavium/zip/zip_inflate.c223
-rw-r--r--drivers/crypto/cavium/zip/zip_inflate.h62
-rw-r--r--drivers/crypto/cavium/zip/zip_main.c603
-rw-r--r--drivers/crypto/cavium/zip/zip_main.h120
-rw-r--r--drivers/crypto/cavium/zip/zip_mem.c114
-rw-r--r--drivers/crypto/cavium/zip/zip_mem.h78
-rw-r--r--drivers/crypto/cavium/zip/zip_regs.h1347
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes.c15
-rw-r--r--drivers/crypto/ccp/ccp-crypto-des3.c13
-rw-r--r--drivers/crypto/ccp/ccp-crypto-main.c13
-rw-r--r--drivers/crypto/ccp/ccp-ops.c11
-rw-r--r--drivers/crypto/ccp/sev-dev.c254
-rw-r--r--drivers/crypto/ccp/sp-pci.c3
-rw-r--r--drivers/crypto/hisilicon/qm.c4
-rw-r--r--drivers/crypto/img-hash.c41
-rw-r--r--drivers/crypto/inside-secure/eip93/eip93-hash.c20
-rw-r--r--drivers/crypto/inside-secure/safexcel_hash.c2
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto_main.c87
-rw-r--r--drivers/crypto/intel/qat/Kconfig12
-rw-r--r--drivers/crypto/intel/qat/Makefile2
-rw-r--r--drivers/crypto/intel/qat/qat_420xx/Makefile1
-rw-r--r--drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c8
-rw-r--r--drivers/crypto/intel/qat/qat_420xx/adf_drv.c10
-rw-r--r--drivers/crypto/intel/qat/qat_4xxx/Makefile1
-rw-r--r--drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c12
-rw-r--r--drivers/crypto/intel/qat/qat_4xxx/adf_drv.c14
-rw-r--r--drivers/crypto/intel/qat/qat_6xxx/Makefile3
-rw-r--r--drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c845
-rw-r--r--drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h148
-rw-r--r--drivers/crypto/intel/qat/qat_6xxx/adf_drv.c226
-rw-r--r--drivers/crypto/intel/qat/qat_c3xxx/Makefile1
-rw-r--r--drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c2
-rw-r--r--drivers/crypto/intel/qat/qat_c3xxx/adf_drv.c41
-rw-r--r--drivers/crypto/intel/qat/qat_c3xxxvf/Makefile1
-rw-r--r--drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c2
-rw-r--r--drivers/crypto/intel/qat/qat_c62x/Makefile1
-rw-r--r--drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c2
-rw-r--r--drivers/crypto/intel/qat/qat_c62x/adf_drv.c41
-rw-r--r--drivers/crypto/intel/qat/qat_c62xvf/Makefile1
-rw-r--r--drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c2
-rw-r--r--drivers/crypto/intel/qat/qat_common/Makefile7
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_accel_devices.h24
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_admin.c1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_cfg_common.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_cfg_services.c3
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_cfg_services.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_dc.c (renamed from drivers/crypto/intel/qat/qat_common/adf_gen2_dc.c)50
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_dc.h17
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_fw_config.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen2_dc.h10
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c57
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen2_pfvf.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_config.c6
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_config.h3
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_dc.c83
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_dc.h10
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c70
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h2
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen6_pm.h28
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen6_ras.c818
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen6_ras.h504
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen6_shared.c49
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen6_shared.h15
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_timer.c (renamed from drivers/crypto/intel/qat/qat_common/adf_gen4_timer.c)18
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_timer.h (renamed from drivers/crypto/intel/qat/qat_common/adf_gen4_timer.h)10
-rw-r--r--drivers/crypto/intel/qat/qat_common/icp_qat_fw_comp.h23
-rw-r--r--drivers/crypto/intel/qat/qat_common/icp_qat_fw_loader_handle.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/icp_qat_hw_51_comp.h99
-rw-r--r--drivers/crypto/intel/qat/qat_common/icp_qat_hw_51_comp_defs.h318
-rw-r--r--drivers/crypto/intel/qat/qat_common/icp_qat_uclo.h23
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_comp_algs.c7
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_compression.c1
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_compression.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_hal.c13
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_uclo.c449
-rw-r--r--drivers/crypto/intel/qat/qat_dh895xcc/Makefile1
-rw-r--r--drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c2
-rw-r--r--drivers/crypto/intel/qat/qat_dh895xcc/adf_drv.c41
-rw-r--r--drivers/crypto/intel/qat/qat_dh895xccvf/Makefile1
-rw-r--r--drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c2
-rw-r--r--drivers/crypto/marvell/cesa/cesa.c2
-rw-r--r--drivers/crypto/marvell/cesa/cesa.h9
-rw-r--r--drivers/crypto/marvell/cesa/cipher.c3
-rw-r--r--drivers/crypto/marvell/cesa/hash.c2
-rw-r--r--drivers/crypto/marvell/cesa/tdma.c53
-rw-r--r--drivers/crypto/marvell/octeontx2/cn10k_cpt.c89
-rw-r--r--drivers/crypto/marvell/octeontx2/cn10k_cpt.h1
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cpt_common.h35
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c25
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptlf.c5
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptlf.h12
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c18
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c6
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c2
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c19
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c1
-rw-r--r--drivers/crypto/nx/nx-aes-cbc.c8
-rw-r--r--drivers/crypto/nx/nx-aes-ctr.c8
-rw-r--r--drivers/crypto/nx/nx-aes-ecb.c8
-rw-r--r--drivers/crypto/nx/nx-aes-xcbc.c128
-rw-r--r--drivers/crypto/nx/nx-sha256.c130
-rw-r--r--drivers/crypto/nx/nx-sha512.c143
-rw-r--r--drivers/crypto/nx/nx.c19
-rw-r--r--drivers/crypto/nx/nx.h11
-rw-r--r--drivers/crypto/omap-aes.c14
-rw-r--r--drivers/crypto/omap-sham.c14
-rw-r--r--drivers/crypto/padlock-sha.c478
-rw-r--r--drivers/crypto/rockchip/rk3288_crypto_ahash.c52
-rw-r--r--drivers/crypto/s5p-sss.c24
-rw-r--r--drivers/crypto/sa2ul.c63
-rw-r--r--drivers/crypto/tegra/tegra-se-hash.c52
-rw-r--r--drivers/crypto/xilinx/zynqmp-sha.c98
-rw-r--r--drivers/dma-buf/dma-resv.c5
-rw-r--r--drivers/dma-buf/st-dma-fence.c2
-rw-r--r--drivers/dma/amd/ptdma/ptdma-dmaengine.c19
-rw-r--r--drivers/dma/dmatest.c6
-rw-r--r--drivers/dma/fsl-edma-main.c2
-rw-r--r--drivers/dma/idxd/cdev.c13
-rw-r--r--drivers/dma/idxd/init.c159
-rw-r--r--drivers/dma/ioat/dca.c2
-rw-r--r--drivers/dma/mediatek/mtk-cqdma.c6
-rw-r--r--drivers/dma/ti/k3-udma.c10
-rw-r--r--drivers/edac/altera_edac.c4
-rw-r--r--drivers/edac/amd64_edac.c10
-rw-r--r--drivers/edac/bluefield_edac.c20
-rw-r--r--drivers/edac/i10nm_base.c479
-rw-r--r--drivers/edac/ie31200_edac.c7
-rw-r--r--drivers/edac/igen6_edac.c86
-rw-r--r--drivers/edac/mce_amd.c1
-rw-r--r--drivers/edac/skx_common.c1
-rw-r--r--drivers/edac/skx_common.h61
-rw-r--r--drivers/firewire/core-transaction.c2
-rw-r--r--drivers/firmware/cirrus/test/cs_dsp_mock_bin.c6
-rw-r--r--drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c15
-rw-r--r--drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c4
-rw-r--r--drivers/firmware/efi/libstub/Makefile1
-rw-r--r--drivers/firmware/efi/libstub/x86-5lvl.c2
-rw-r--r--drivers/firmware/psci/psci_checker.c2
-rw-r--r--drivers/firmware/samsung/exynos-acpm.c44
-rw-r--r--drivers/gpio/Kconfig64
-rw-r--r--drivers/gpio/Makefile4
-rw-r--r--drivers/gpio/TODO7
-rw-r--r--drivers/gpio/gpio-aggregator.c1104
-rw-r--r--drivers/gpio/gpio-bcm-kona.c1
-rw-r--r--drivers/gpio/gpio-blzp1600.c281
-rw-r--r--drivers/gpio/gpio-brcmstb.c2
-rw-r--r--drivers/gpio/gpio-davinci.c34
-rw-r--r--drivers/gpio/gpio-dln2.c7
-rw-r--r--drivers/gpio/gpio-ds4520.c6
-rw-r--r--drivers/gpio/gpio-eic-sprd.c5
-rw-r--r--drivers/gpio/gpio-em.c11
-rw-r--r--drivers/gpio/gpio-exar.c16
-rw-r--r--drivers/gpio/gpio-f7188x.c13
-rw-r--r--drivers/gpio/gpio-graniterapids.c6
-rw-r--r--drivers/gpio/gpio-grgpio.c9
-rw-r--r--drivers/gpio/gpio-gw-pld.c6
-rw-r--r--drivers/gpio/gpio-htc-egpio.c16
-rw-r--r--drivers/gpio/gpio-ich.c12
-rw-r--r--drivers/gpio/gpio-idt3243x.c2
-rw-r--r--drivers/gpio/gpio-imx-scu.c47
-rw-r--r--drivers/gpio/gpio-it87.c11
-rw-r--r--drivers/gpio/gpio-janz-ttl.c6
-rw-r--r--drivers/gpio/gpio-kempld.c7
-rw-r--r--drivers/gpio/gpio-ljca.c13
-rw-r--r--drivers/gpio/gpio-logicvc.c11
-rw-r--r--drivers/gpio/gpio-loongson-64bit.c6
-rw-r--r--drivers/gpio/gpio-loongson.c8
-rw-r--r--drivers/gpio/gpio-lp3943.c13
-rw-r--r--drivers/gpio/gpio-lp873x.c12
-rw-r--r--drivers/gpio/gpio-lp87565.c15
-rw-r--r--drivers/gpio/gpio-lpc18xx.c29
-rw-r--r--drivers/gpio/gpio-lpc32xx.c28
-rw-r--r--drivers/gpio/gpio-madera.c18
-rw-r--r--drivers/gpio/gpio-max3191x.c16
-rw-r--r--drivers/gpio/gpio-max730x.c9
-rw-r--r--drivers/gpio/gpio-max732x.c15
-rw-r--r--drivers/gpio/gpio-max77620.c13
-rw-r--r--drivers/gpio/gpio-max77759.c530
-rw-r--r--drivers/gpio/gpio-mb86s7x.c6
-rw-r--r--drivers/gpio/gpio-mc33880.c9
-rw-r--r--drivers/gpio/gpio-ml-ioh.c6
-rw-r--r--drivers/gpio/gpio-mpc8xxx.c8
-rw-r--r--drivers/gpio/gpio-mvebu.c17
-rw-r--r--drivers/gpio/gpio-mxc.c11
-rw-r--r--drivers/gpio/gpio-mxs.c4
-rw-r--r--drivers/gpio/gpio-pca953x.c38
-rw-r--r--drivers/gpio/gpio-pxa.c14
-rw-r--r--drivers/gpio/gpio-rockchip.c2
-rw-r--r--drivers/gpio/gpio-sa1100.c2
-rw-r--r--drivers/gpio/gpio-sodaville.c2
-rw-r--r--drivers/gpio/gpio-spacemit-k1.c293
-rw-r--r--drivers/gpio/gpio-tb10x.c2
-rw-r--r--drivers/gpio/gpio-timberdale.c10
-rw-r--r--drivers/gpio/gpio-twl4030.c5
-rw-r--r--drivers/gpio/gpio-vf610.c4
-rw-r--r--drivers/gpio/gpio-virtuser.c12
-rw-r--r--drivers/gpio/gpio-xgene-sb.c26
-rw-r--r--drivers/gpio/gpiolib-acpi-core.c (renamed from drivers/gpio/gpiolib-acpi.c)522
-rw-r--r--drivers/gpio/gpiolib-acpi-quirks.c363
-rw-r--r--drivers/gpio/gpiolib-acpi.h15
-rw-r--r--drivers/gpio/gpiolib-cdev.c3
-rw-r--r--drivers/gpio/gpiolib-devres.c89
-rw-r--r--drivers/gpio/gpiolib-of.c17
-rw-r--r--drivers/gpio/gpiolib-of.h6
-rw-r--r--drivers/gpio/gpiolib-sysfs.c8
-rw-r--r--drivers/gpio/gpiolib.c179
-rw-r--r--drivers/gpu/drm/Makefile2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c8
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c10
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dpms.c13
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c11
-rw-r--r--drivers/gpu/drm/drm_edid.c1
-rw-r--r--drivers/gpu/drm/drm_gpusvm.c37
-rw-r--r--drivers/gpu/drm/gud/gud_pipe.c2
-rw-r--r--drivers/gpu/drm/i915/Makefile2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c32
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c2
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c8
-rw-r--r--drivers/gpu/drm/i915/selftests/lib_sw_fence.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/librapl.c4
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_hdmi.c4
-rw-r--r--drivers/gpu/drm/msm/msm_mdss.c2
-rw-r--r--drivers/gpu/drm/tiny/panel-mipi-dbi.c5
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_bo_test.c2
-rw-r--r--drivers/gpu/drm/ttm/ttm_backup.c8
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_mi_commands.h4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h5
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h2
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c2
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c199
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h5
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h9
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c10
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c11
-rw-r--r--drivers/gpu/drm/xe/xe_module.c3
-rw-r--r--drivers/gpu/drm/xe/xe_module.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c14
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c7
-rw-r--r--drivers/gpu/drm/xe/xe_shrinker.c2
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c116
-rw-r--r--drivers/gpu/drm/xe/xe_svm.h5
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.h8
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c4
-rw-r--r--drivers/gpu/ipu-v3/ipu-common.c8
-rw-r--r--drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c12
-rw-r--r--drivers/hid/bpf/hid_bpf_dispatch.c9
-rw-r--r--drivers/hid/bpf/progs/XPPen__ACK05.bpf.c1
-rw-r--r--drivers/hid/hid-ids.h4
-rw-r--r--drivers/hid/hid-quirks.c2
-rw-r--r--drivers/hid/hid-steam.c2
-rw-r--r--drivers/hid/hid-thrustmaster.c1
-rw-r--r--drivers/hid/hid-uclogic-core.c7
-rw-r--r--drivers/hid/wacom_sys.c11
-rw-r--r--drivers/hv/channel.c65
-rw-r--r--drivers/hwmon/fam15h_power.c6
-rw-r--r--drivers/hwmon/hwmon-vid.c4
-rw-r--r--drivers/hwmon/k10temp.c2
-rw-r--r--drivers/i2c/busses/Kconfig2
-rw-r--r--drivers/i2c/busses/i2c-cht-wc.c2
-rw-r--r--drivers/i2c/busses/i2c-designware-pcidrv.c4
-rw-r--r--drivers/i2c/busses/i2c-piix4.c18
-rw-r--r--drivers/i2c/muxes/i2c-mux-pca954x.c6
-rw-r--r--drivers/idle/intel_idle.c139
-rw-r--r--drivers/iio/adc/stm32-adc-core.c7
-rw-r--r--drivers/infiniband/core/device.c6
-rw-r--r--drivers/infiniband/hw/irdma/main.c4
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c5
-rw-r--r--drivers/input/joystick/xpad.c3
-rw-r--r--drivers/input/rmi4/rmi_f34.c135
-rw-r--r--drivers/iommu/dma-iommu.c482
-rw-r--r--drivers/iommu/iommu.c84
-rw-r--r--drivers/irqchip/Kconfig5
-rw-r--r--drivers/irqchip/Makefile1
-rw-r--r--drivers/irqchip/exynos-combiner.c2
-rw-r--r--drivers/irqchip/irq-al-fic.c20
-rw-r--r--drivers/irqchip/irq-alpine-msi.c7
-rw-r--r--drivers/irqchip/irq-apple-aic.c4
-rw-r--r--drivers/irqchip/irq-armada-370-xp.c12
-rw-r--r--drivers/irqchip/irq-aspeed-i2c-ic.c2
-rw-r--r--drivers/irqchip/irq-aspeed-intc.c2
-rw-r--r--drivers/irqchip/irq-aspeed-scu-ic.c2
-rw-r--r--drivers/irqchip/irq-aspeed-vic.c4
-rw-r--r--drivers/irqchip/irq-ath79-misc.c4
-rw-r--r--drivers/irqchip/irq-atmel-aic-common.c2
-rw-r--r--drivers/irqchip/irq-atmel-aic.c19
-rw-r--r--drivers/irqchip/irq-atmel-aic5.c28
-rw-r--r--drivers/irqchip/irq-bcm2712-mip.c6
-rw-r--r--drivers/irqchip/irq-bcm2835.c2
-rw-r--r--drivers/irqchip/irq-bcm2836.c2
-rw-r--r--drivers/irqchip/irq-bcm6345-l1.c2
-rw-r--r--drivers/irqchip/irq-bcm7038-l1.c2
-rw-r--r--drivers/irqchip/irq-bcm7120-l2.c24
-rw-r--r--drivers/irqchip/irq-brcmstb-l2.c10
-rw-r--r--drivers/irqchip/irq-clps711x.c4
-rw-r--r--drivers/irqchip/irq-crossbar.c6
-rw-r--r--drivers/irqchip/irq-csky-apb-intc.c5
-rw-r--r--drivers/irqchip/irq-csky-mpintc.c2
-rw-r--r--drivers/irqchip/irq-davinci-cp-intc.c6
-rw-r--r--drivers/irqchip/irq-digicolor.c2
-rw-r--r--drivers/irqchip/irq-dw-apb-ictl.c5
-rw-r--r--drivers/irqchip/irq-econet-en751221.c310
-rw-r--r--drivers/irqchip/irq-ftintc010.c5
-rw-r--r--drivers/irqchip/irq-gic-v2m.c20
-rw-r--r--drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its-msi-parent.c41
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c76
-rw-r--r--drivers/irqchip/irq-gic-v3-mbi.c20
-rw-r--r--drivers/irqchip/irq-gic-v3.c4
-rw-r--r--drivers/irqchip/irq-goldfish-pic.c7
-rw-r--r--drivers/irqchip/irq-hip04.c6
-rw-r--r--drivers/irqchip/irq-i8259.c4
-rw-r--r--drivers/irqchip/irq-idt3243x.c2
-rw-r--r--drivers/irqchip/irq-imgpdc.c2
-rw-r--r--drivers/irqchip/irq-imx-gpcv2.c4
-rw-r--r--drivers/irqchip/irq-imx-intmux.c2
-rw-r--r--drivers/irqchip/irq-imx-irqsteer.c2
-rw-r--r--drivers/irqchip/irq-imx-mu-msi.c2
-rw-r--r--drivers/irqchip/irq-ingenic-tcu.c13
-rw-r--r--drivers/irqchip/irq-ingenic.c4
-rw-r--r--drivers/irqchip/irq-ixp4xx.c2
-rw-r--r--drivers/irqchip/irq-jcore-aic.c5
-rw-r--r--drivers/irqchip/irq-keystone.c4
-rw-r--r--drivers/irqchip/irq-lan966x-oic.c20
-rw-r--r--drivers/irqchip/irq-loongarch-avec.c2
-rw-r--r--drivers/irqchip/irq-loongarch-cpu.c2
-rw-r--r--drivers/irqchip/irq-loongson-eiointc.c2
-rw-r--r--drivers/irqchip/irq-loongson-htvec.c2
-rw-r--r--drivers/irqchip/irq-loongson-liointc.c11
-rw-r--r--drivers/irqchip/irq-loongson-pch-msi.c4
-rw-r--r--drivers/irqchip/irq-loongson-pch-pic.c2
-rw-r--r--drivers/irqchip/irq-lpc32xx.c4
-rw-r--r--drivers/irqchip/irq-ls-extirq.c4
-rw-r--r--drivers/irqchip/irq-ls-scfg-msi.c10
-rw-r--r--drivers/irqchip/irq-ls1x.c4
-rw-r--r--drivers/irqchip/irq-mchp-eic.c5
-rw-r--r--drivers/irqchip/irq-meson-gpio.c2
-rw-r--r--drivers/irqchip/irq-mips-cpu.c13
-rw-r--r--drivers/irqchip/irq-mips-gic.c15
-rw-r--r--drivers/irqchip/irq-mmp.c12
-rw-r--r--drivers/irqchip/irq-mscc-ocelot.c7
-rw-r--r--drivers/irqchip/irq-msi-lib.c9
-rw-r--r--drivers/irqchip/irq-mst-intc.c4
-rw-r--r--drivers/irqchip/irq-mtk-cirq.c5
-rw-r--r--drivers/irqchip/irq-mtk-sysirq.c4
-rw-r--r--drivers/irqchip/irq-mvebu-gicp.c28
-rw-r--r--drivers/irqchip/irq-mvebu-icu.c2
-rw-r--r--drivers/irqchip/irq-mvebu-odmi.c29
-rw-r--r--drivers/irqchip/irq-mvebu-pic.c4
-rw-r--r--drivers/irqchip/irq-mvebu-sei.c24
-rw-r--r--drivers/irqchip/irq-mxs.c4
-rw-r--r--drivers/irqchip/irq-nvic.c2
-rw-r--r--drivers/irqchip/irq-omap-intc.c4
-rw-r--r--drivers/irqchip/irq-or1k-pic.c4
-rw-r--r--drivers/irqchip/irq-orion.c6
-rw-r--r--drivers/irqchip/irq-owl-sirq.c4
-rw-r--r--drivers/irqchip/irq-pic32-evic.c6
-rw-r--r--drivers/irqchip/irq-pruss-intc.c7
-rw-r--r--drivers/irqchip/irq-qcom-mpm.c2
-rw-r--r--drivers/irqchip/irq-realtek-rtl.c2
-rw-r--r--drivers/irqchip/irq-renesas-intc-irqpin.c6
-rw-r--r--drivers/irqchip/irq-renesas-irqc.c4
-rw-r--r--drivers/irqchip/irq-renesas-rza1.c6
-rw-r--r--drivers/irqchip/irq-renesas-rzg2l.c6
-rw-r--r--drivers/irqchip/irq-renesas-rzv2h.c5
-rw-r--r--drivers/irqchip/irq-riscv-imsic-platform.c2
-rw-r--r--drivers/irqchip/irq-riscv-imsic-state.c12
-rw-r--r--drivers/irqchip/irq-riscv-intc.c2
-rw-r--r--drivers/irqchip/irq-sa11x0.c2
-rw-r--r--drivers/irqchip/irq-sg2042-msi.c154
-rw-r--r--drivers/irqchip/irq-sni-exiu.c6
-rw-r--r--drivers/irqchip/irq-sp7021-intc.c4
-rw-r--r--drivers/irqchip/irq-starfive-jh8100-intc.c4
-rw-r--r--drivers/irqchip/irq-stm32-exti.c25
-rw-r--r--drivers/irqchip/irq-stm32mp-exti.c9
-rw-r--r--drivers/irqchip/irq-sun4i.c2
-rw-r--r--drivers/irqchip/irq-sun6i-r.c4
-rw-r--r--drivers/irqchip/irq-sunxi-nmi.c11
-rw-r--r--drivers/irqchip/irq-tb10x.c21
-rw-r--r--drivers/irqchip/irq-tegra.c5
-rw-r--r--drivers/irqchip/irq-ti-sci-inta.c10
-rw-r--r--drivers/irqchip/irq-ti-sci-intr.c7
-rw-r--r--drivers/irqchip/irq-ts4800.c2
-rw-r--r--drivers/irqchip/irq-uniphier-aidet.c2
-rw-r--r--drivers/irqchip/irq-versatile-fpga.c4
-rw-r--r--drivers/irqchip/irq-vf610-mscm-ir.c6
-rw-r--r--drivers/irqchip/irq-vic.c5
-rw-r--r--drivers/irqchip/irq-vt8500.c153
-rw-r--r--drivers/irqchip/irq-wpcm450-aic.c2
-rw-r--r--drivers/irqchip/irq-xilinx-intc.c4
-rw-r--r--drivers/irqchip/irq-xtensa-mx.c5
-rw-r--r--drivers/irqchip/irq-xtensa-pic.c4
-rw-r--r--drivers/irqchip/irq-zevio.c4
-rw-r--r--drivers/irqchip/spear-shirq.c2
-rw-r--r--drivers/mailbox/qcom-ipcc.c4
-rw-r--r--drivers/md/bcache/super.c3
-rw-r--r--drivers/md/dm-bufio.c2
-rw-r--r--drivers/md/dm-integrity.c16
-rw-r--r--drivers/md/dm-raid.c3
-rw-r--r--drivers/md/md.c190
-rw-r--r--drivers/md/md.h18
-rw-r--r--drivers/md/raid1.c3
-rw-r--r--drivers/md/raid10.c9
-rw-r--r--drivers/md/raid5.c8
-rw-r--r--drivers/media/usb/pvrusb2/pvrusb2-hdw.c2
-rw-r--r--drivers/memory/omap-gpmc.c6
-rw-r--r--drivers/memory/renesas-rpc-if-regs.h147
-rw-r--r--drivers/memory/renesas-rpc-if.c714
-rw-r--r--drivers/memory/renesas-xspi-if-regs.h105
-rw-r--r--drivers/mfd/88pm860x-core.c4
-rw-r--r--drivers/mfd/Kconfig20
-rw-r--r--drivers/mfd/Makefile1
-rw-r--r--drivers/mfd/ab8500-core.c6
-rw-r--r--drivers/mfd/arizona-irq.c3
-rw-r--r--drivers/mfd/db8500-prcmu.c6
-rw-r--r--drivers/mfd/fsl-imx25-tsadc.c5
-rw-r--r--drivers/mfd/lp8788-irq.c2
-rw-r--r--drivers/mfd/max77759.c690
-rw-r--r--drivers/mfd/max8925-core.c4
-rw-r--r--drivers/mfd/max8997-irq.c4
-rw-r--r--drivers/mfd/max8998-irq.c2
-rw-r--r--drivers/mfd/mt6358-irq.c6
-rw-r--r--drivers/mfd/mt6397-irq.c6
-rw-r--r--drivers/mfd/qcom-pm8xxx.c6
-rw-r--r--drivers/mfd/stmfx.c2
-rw-r--r--drivers/mfd/stmpe.c4
-rw-r--r--drivers/mfd/tc3589x.c6
-rw-r--r--drivers/mfd/tps65217.c2
-rw-r--r--drivers/mfd/tps6586x.c2
-rw-r--r--drivers/mfd/twl4030-irq.c4
-rw-r--r--drivers/mfd/twl6030-irq.c5
-rw-r--r--drivers/mfd/wm831x-irq.c15
-rw-r--r--drivers/mfd/wm8994-irq.c4
-rw-r--r--drivers/misc/cs5535-mfgpt.c1
-rw-r--r--drivers/misc/hi6421v600-irq.c5
-rw-r--r--drivers/mmc/core/block.c16
-rw-r--r--drivers/mmc/core/card.h6
-rw-r--r--drivers/mmc/core/core.c48
-rw-r--r--drivers/mmc/core/core.h10
-rw-r--r--drivers/mmc/core/host.h8
-rw-r--r--drivers/mmc/core/mmc.c103
-rw-r--r--drivers/mmc/core/mmc_ops.c6
-rw-r--r--drivers/mmc/core/mmc_ops.h2
-rw-r--r--drivers/mmc/core/mmc_test.c16
-rw-r--r--drivers/mmc/core/queue.c6
-rw-r--r--drivers/mmc/core/quirks.h10
-rw-r--r--drivers/mmc/core/sd.c65
-rw-r--r--drivers/mmc/core/sdio.c6
-rw-r--r--drivers/mmc/core/slot-gpio.c8
-rw-r--r--drivers/mmc/host/Kconfig14
-rw-r--r--drivers/mmc/host/Makefile1
-rw-r--r--drivers/mmc/host/alcor.c3
-rw-r--r--drivers/mmc/host/bcm2835.c8
-rw-r--r--drivers/mmc/host/cavium-thunderx.c4
-rw-r--r--drivers/mmc/host/dw_mmc.c6
-rw-r--r--drivers/mmc/host/mtk-sd.c219
-rw-r--r--drivers/mmc/host/renesas_sdhi_core.c20
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c243
-rw-r--r--drivers/mmc/host/sdhci-of-dwcmshc.c40
-rw-r--r--drivers/mmc/host/sdhci-of-k1.c304
-rw-r--r--drivers/mmc/host/sdhci-omap.c2
-rw-r--r--drivers/mmc/host/sdhci.c12
-rw-r--r--drivers/mmc/host/sdhci.h2
-rw-r--r--drivers/mmc/host/sdhci_am654.c35
-rw-r--r--drivers/mmc/host/sunplus-mmc.c2
-rw-r--r--drivers/mmc/host/tmio_mmc_core.c6
-rw-r--r--drivers/mtd/nand/raw/cs553x_nand.c6
-rw-r--r--drivers/net/can/kvaser_pciefd.c182
-rw-r--r--drivers/net/can/slcan/slcan-core.c26
-rw-r--r--drivers/net/dsa/b53/b53_common.c33
-rw-r--r--drivers/net/dsa/b53/b53_regs.h14
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c140
-rw-r--r--drivers/net/dsa/microchip/ksz_ptp.c4
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c2
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.c6
-rw-r--r--drivers/net/dsa/qca/ar9331.c4
-rw-r--r--drivers/net/dsa/realtek/rtl8365mb.c4
-rw-r--r--drivers/net/dsa/realtek/rtl8366rb.c6
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c6
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.c22
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c36
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c9
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c19
-rw-r--r--drivers/net/ethernet/engleder/tsnep_main.c30
-rw-r--r--drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c16
-rw-r--r--drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c1
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf.h2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c10
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c18
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c24
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c11
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c10
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c9
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c3
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c19
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c2
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c7
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c2
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c2
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c10
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c8
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c4
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_type.h2
-rw-r--r--drivers/net/hyperv/hyperv_net.h13
-rw-r--r--drivers/net/hyperv/netvsc.c57
-rw-r--r--drivers/net/hyperv/netvsc_drv.c62
-rw-r--r--drivers/net/hyperv/rndis_filter.c24
-rw-r--r--drivers/net/phy/micrel.c7
-rw-r--r--drivers/net/team/team_core.c6
-rw-r--r--drivers/net/usb/lan78xx.c9
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c9
-rw-r--r--drivers/net/wireless/mediatek/mt76/dma.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/mcu.c4
-rw-r--r--drivers/ntb/msi.c22
-rw-r--r--drivers/nvme/common/auth.c15
-rw-r--r--drivers/nvme/host/auth.c30
-rw-r--r--drivers/nvme/host/core.c235
-rw-r--r--drivers/nvme/host/fc.c13
-rw-r--r--drivers/nvme/host/multipath.c209
-rw-r--r--drivers/nvme/host/nvme.h34
-rw-r--r--drivers/nvme/host/pci.c306
-rw-r--r--drivers/nvme/host/sysfs.c35
-rw-r--r--drivers/nvme/host/tcp.c14
-rw-r--r--drivers/nvme/target/admin-cmd.c31
-rw-r--r--drivers/nvme/target/auth.c21
-rw-r--r--drivers/nvme/target/core.c94
-rw-r--r--drivers/nvme/target/discovery.c2
-rw-r--r--drivers/nvme/target/fabrics-cmd.c12
-rw-r--r--drivers/nvme/target/fc.c96
-rw-r--r--drivers/nvme/target/fcloop.c439
-rw-r--r--drivers/nvme/target/loop.c29
-rw-r--r--drivers/nvme/target/nvmet.h24
-rw-r--r--drivers/nvme/target/pci-epf.c53
-rw-r--r--drivers/nvme/target/rdma.c8
-rw-r--r--drivers/nvme/target/tcp.c100
-rw-r--r--drivers/nvmem/Kconfig12
-rw-r--r--drivers/nvmem/Makefile2
-rw-r--r--drivers/nvmem/max77759-nvmem.c145
-rw-r--r--drivers/opp/core.c428
-rw-r--r--drivers/opp/cpu.c30
-rw-r--r--drivers/opp/of.c205
-rw-r--r--drivers/opp/opp.h1
-rw-r--r--drivers/pci/controller/Kconfig3
-rw-r--r--drivers/pci/controller/dwc/pci-dra7xx.c4
-rw-r--r--drivers/pci/controller/dwc/pci-keystone.c2
-rw-r--r--drivers/pci/controller/dwc/pcie-amd-mdb.c8
-rw-r--r--drivers/pci/controller/dwc/pcie-designware-host.c2
-rw-r--r--drivers/pci/controller/dwc/pcie-dw-rockchip.c4
-rw-r--r--drivers/pci/controller/dwc/pcie-uniphier.c2
-rw-r--r--drivers/pci/controller/mobiveil/pcie-mobiveil-host.c11
-rw-r--r--drivers/pci/controller/pci-aardvark.c14
-rw-r--r--drivers/pci/controller/pci-ftpci100.c4
-rw-r--r--drivers/pci/controller/pci-hyperv.c14
-rw-r--r--drivers/pci/controller/pci-mvebu.c6
-rw-r--r--drivers/pci/controller/pci-tegra.c63
-rw-r--r--drivers/pci/controller/pci-xgene-msi.c53
-rw-r--r--drivers/pci/controller/pcie-altera-msi.c4
-rw-r--r--drivers/pci/controller/pcie-altera.c2
-rw-r--r--drivers/pci/controller/pcie-apple.c69
-rw-r--r--drivers/pci/controller/pcie-brcmstb.c4
-rw-r--r--drivers/pci/controller/pcie-iproc-msi.c6
-rw-r--r--drivers/pci/controller/pcie-mediatek-gen3.c9
-rw-r--r--drivers/pci/controller/pcie-mediatek.c6
-rw-r--r--drivers/pci/controller/pcie-rockchip-host.c4
-rw-r--r--drivers/pci/controller/pcie-xilinx-cpm.c10
-rw-r--r--drivers/pci/controller/pcie-xilinx-dma-pl.c14
-rw-r--r--drivers/pci/controller/pcie-xilinx-nwl.c11
-rw-r--r--drivers/pci/controller/pcie-xilinx.c5
-rw-r--r--drivers/pci/controller/plda/pcie-plda-host.c16
-rw-r--r--drivers/pci/hotplug/s390_pci_hpc.c2
-rw-r--r--drivers/pci/msi/api.c8
-rw-r--r--drivers/pci/msi/msi.c179
-rw-r--r--drivers/pci/msi/msi.h2
-rw-r--r--drivers/pci/p2pdma.c38
-rw-r--r--drivers/pci/pci.h9
-rw-r--r--drivers/pci/tph.c44
-rw-r--r--drivers/perf/apple_m1_cpu_pmu.c3
-rw-r--r--drivers/perf/arm_pmuv3.c3
-rw-r--r--drivers/perf/arm_v6_pmu.c3
-rw-r--r--drivers/perf/arm_v7_pmu.c3
-rw-r--r--drivers/perf/arm_xscale_pmu.c6
-rw-r--r--drivers/phy/phy-can-transceiver.c22
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-ufs.c3
-rw-r--r--drivers/phy/renesas/phy-rcar-gen3-usb2.c135
-rw-r--r--drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c2
-rw-r--r--drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c2
-rw-r--r--drivers/phy/starfive/phy-jh7110-usb.c7
-rw-r--r--drivers/phy/tegra/xusb-tegra186.c46
-rw-r--r--drivers/phy/tegra/xusb.c8
-rw-r--r--drivers/pinctrl/mediatek/mtk-eint.c5
-rw-r--r--drivers/pinctrl/pinctrl-amd.c49
-rw-r--r--drivers/pinctrl/pinctrl-at91-pio4.c2
-rw-r--r--drivers/pinctrl/pinctrl-keembay.c2
-rw-r--r--drivers/pinctrl/pinctrl-single.c9
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm.c23
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-sunxi.c7
-rw-r--r--drivers/platform/chrome/Kconfig5
-rw-r--r--drivers/platform/chrome/Makefile3
-rw-r--r--drivers/platform/chrome/chromeos_of_hw_prober.c33
-rw-r--r--drivers/platform/chrome/cros_ec_debugfs.c52
-rw-r--r--drivers/platform/chrome/cros_ec_proto.c24
-rw-r--r--drivers/platform/chrome/cros_ec_proto_test_util.h5
-rw-r--r--drivers/platform/chrome/cros_ec_typec.c6
-rw-r--r--drivers/platform/chrome/cros_kbd_led_backlight.c24
-rw-r--r--drivers/platform/x86/amd/hsmp/acpi.c7
-rw-r--r--drivers/platform/x86/amd/hsmp/hsmp.c2
-rw-r--r--drivers/platform/x86/amd/hsmp/hsmp.h1
-rw-r--r--drivers/platform/x86/amd/hsmp/plat.c10
-rw-r--r--drivers/platform/x86/amd/pmc/mp1_stb.c2
-rw-r--r--drivers/platform/x86/amd/pmc/pmc-quirks.c10
-rw-r--r--drivers/platform/x86/amd/pmc/pmc.c2
-rw-r--r--drivers/platform/x86/amd/pmf/core.c2
-rw-r--r--drivers/platform/x86/amd/pmf/tee-if.c23
-rw-r--r--drivers/platform/x86/asus-wmi.c3
-rw-r--r--drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c2
-rw-r--r--drivers/platform/x86/fujitsu-laptop.c33
-rw-r--r--drivers/platform/x86/intel/ifs/core.c5
-rw-r--r--drivers/platform/x86/intel/ifs/load.c21
-rw-r--r--drivers/platform/x86/intel/ifs/runtest.c17
-rw-r--r--drivers/platform/x86/intel/int0002_vgpio.c2
-rw-r--r--drivers/platform/x86/intel/pmc/arl.c3
-rw-r--r--drivers/platform/x86/intel/pmc/cnp.c7
-rw-r--r--drivers/platform/x86/intel/pmc/core.c10
-rw-r--r--drivers/platform/x86/intel/speed_select_if/isst_if_common.c19
-rw-r--r--drivers/platform/x86/intel/speed_select_if/isst_if_mbox_msr.c15
-rw-r--r--drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c3
-rw-r--r--drivers/platform/x86/intel/tpmi_power_domains.c4
-rw-r--r--drivers/platform/x86/intel/turbo_max_3.c5
-rw-r--r--drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c11
-rw-r--r--drivers/platform/x86/intel_ips.c36
-rw-r--r--drivers/platform/x86/think-lmi.c26
-rw-r--r--drivers/platform/x86/think-lmi.h1
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c7
-rw-r--r--drivers/pmdomain/amlogic/meson-ee-pwrc.c78
-rw-r--r--drivers/pmdomain/arm/Kconfig6
-rw-r--r--drivers/pmdomain/bcm/bcm2835-power.c16
-rw-r--r--drivers/pmdomain/core.c133
-rw-r--r--drivers/pmdomain/governor.c2
-rw-r--r--drivers/pmdomain/mediatek/mt6893-pm-domains.h585
-rw-r--r--drivers/pmdomain/mediatek/mtk-pm-domains.c17
-rw-r--r--drivers/pmdomain/mediatek/mtk-pm-domains.h2
-rw-r--r--drivers/pmdomain/qcom/rpmhpd.c16
-rw-r--r--drivers/pmdomain/renesas/rcar-gen4-sysc.c5
-rw-r--r--drivers/pmdomain/renesas/rcar-sysc.c5
-rw-r--r--drivers/pmdomain/rockchip/pm-domains.c48
-rw-r--r--drivers/pmdomain/sunxi/Kconfig10
-rw-r--r--drivers/pmdomain/sunxi/Makefile1
-rw-r--r--drivers/pmdomain/sunxi/sun50i-h6-prcm-ppu.c208
-rw-r--r--drivers/pmdomain/ti/omap_prm.c8
-rw-r--r--drivers/pnp/quirks.c2
-rw-r--r--drivers/power/reset/Kconfig13
-rw-r--r--drivers/power/reset/Makefile1
-rw-r--r--drivers/power/reset/at91-reset.c5
-rw-r--r--drivers/power/reset/reboot-mode.c25
-rw-r--r--drivers/power/reset/syscon-reboot.c98
-rw-r--r--drivers/power/reset/tdx-ec-poweroff.c150
-rw-r--r--drivers/power/supply/Kconfig37
-rw-r--r--drivers/power/supply/Makefile3
-rw-r--r--drivers/power/supply/bq24190_charger.c14
-rw-r--r--drivers/power/supply/bq27xxx_battery.c2
-rw-r--r--drivers/power/supply/bq27xxx_battery_i2c.c13
-rw-r--r--drivers/power/supply/chagall-battery.c291
-rw-r--r--drivers/power/supply/collie_battery.c1
-rw-r--r--drivers/power/supply/cros_charge-control.c23
-rw-r--r--drivers/power/supply/gpio-charger.c4
-rw-r--r--drivers/power/supply/huawei-gaokun-battery.c645
-rw-r--r--drivers/power/supply/max17040_battery.c5
-rw-r--r--drivers/power/supply/max77705_charger.c20
-rw-r--r--drivers/power/supply/max8971_charger.c752
-rw-r--r--drivers/power/supply/power_supply_sysfs.c23
-rw-r--r--drivers/power/supply/rk817_charger.c2
-rw-r--r--drivers/power/supply/rt9471.c12
-rw-r--r--drivers/power/supply/test_power.c20
-rw-r--r--drivers/power/supply/wm831x_power.c20
-rw-r--r--drivers/powercap/intel_rapl_common.c1
-rw-r--r--drivers/powercap/intel_rapl_msr.c7
-rw-r--r--drivers/ptp/ptp_ocp.c24
-rw-r--r--drivers/pwm/Kconfig113
-rw-r--r--drivers/pwm/Makefile11
-rw-r--r--drivers/pwm/core.c118
-rw-r--r--drivers/pwm/pwm-adp5585.c1
-rw-r--r--drivers/pwm/pwm-loongson.c290
-rw-r--r--drivers/pwm/pwm-mc33xs2410.c391
-rw-r--r--drivers/pwm/pwm-meson.c123
-rw-r--r--drivers/pwm/pwm-pca9685.c8
-rw-r--r--drivers/pwm/pwm-pxa.c18
-rw-r--r--drivers/pwm/pwm-rzg2l-gpt.c447
-rw-r--r--drivers/pwm/pwm-stm32.c15
-rw-r--r--drivers/ras/amd/atl/internal.h4
-rw-r--r--drivers/regulator/Kconfig23
-rw-r--r--drivers/regulator/Makefile1
-rw-r--r--drivers/regulator/adp5055-regulator.c424
-rw-r--r--drivers/regulator/core.c2
-rw-r--r--drivers/regulator/da9121-regulator.c2
-rw-r--r--drivers/regulator/gpio-regulator.c10
-rw-r--r--drivers/regulator/max20086-regulator.c11
-rw-r--r--drivers/regulator/pca9450-regulator.c27
-rw-r--r--drivers/regulator/pf9453-regulator.c3
-rw-r--r--drivers/regulator/qcom_spmi-regulator.c69
-rw-r--r--drivers/regulator/rpi-panel-attiny-regulator.c83
-rw-r--r--drivers/regulator/s5m8767.c146
-rw-r--r--drivers/regulator/tps65219-regulator.c242
-rw-r--r--drivers/remoteproc/qcom_wcnss.c3
-rw-r--r--drivers/s390/block/dcssblk.c4
-rw-r--r--drivers/s390/char/con3270.c17
-rw-r--r--drivers/s390/char/diag_ftp.c2
-rw-r--r--drivers/s390/crypto/ap_bus.c74
-rw-r--r--drivers/s390/crypto/ap_bus.h30
-rw-r--r--drivers/s390/crypto/pkey_api.c50
-rw-r--r--drivers/s390/crypto/pkey_base.c34
-rw-r--r--drivers/s390/crypto/pkey_base.h37
-rw-r--r--drivers/s390/crypto/pkey_cca.c136
-rw-r--r--drivers/s390/crypto/pkey_ep11.c117
-rw-r--r--drivers/s390/crypto/pkey_pckmo.c9
-rw-r--r--drivers/s390/crypto/pkey_sysfs.c4
-rw-r--r--drivers/s390/crypto/pkey_uv.c44
-rw-r--r--drivers/s390/crypto/zcrypt_api.c167
-rw-r--r--drivers/s390/crypto/zcrypt_api.h16
-rw-r--r--drivers/s390/crypto/zcrypt_ccamisc.c486
-rw-r--r--drivers/s390/crypto/zcrypt_ccamisc.h49
-rw-r--r--drivers/s390/crypto/zcrypt_cex4.c39
-rw-r--r--drivers/s390/crypto/zcrypt_ep11misc.c454
-rw-r--r--drivers/s390/crypto/zcrypt_ep11misc.h27
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype50.c36
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype6.c109
-rw-r--r--drivers/s390/net/ctcm_mpc.c2
-rw-r--r--drivers/scsi/Kconfig3
-rw-r--r--drivers/scsi/aha152x.c1
-rw-r--r--drivers/scsi/imm.c1
-rw-r--r--drivers/scsi/megaraid/megaraid_mbox.c2
-rw-r--r--drivers/scsi/megaraid/megaraid_mm.c2
-rw-r--r--drivers/scsi/ppa.c1
-rw-r--r--drivers/scsi/scsi_ioctl.c2
-rw-r--r--drivers/scsi/scsi_lib.c6
-rw-r--r--drivers/scsi/sd_zbc.c6
-rw-r--r--drivers/scsi/storvsc_drv.c1
-rw-r--r--drivers/sh/intc/irqdomain.c5
-rw-r--r--drivers/soc/dove/pmu.c7
-rw-r--r--drivers/soc/fsl/qe/qe_ic.c4
-rw-r--r--drivers/soc/qcom/smp2p.c2
-rw-r--r--drivers/soc/qcom/smsm.c2
-rw-r--r--drivers/soc/renesas/Kconfig42
-rw-r--r--drivers/soc/samsung/exynos-usi.c2
-rw-r--r--drivers/soc/tegra/pmc.c5
-rw-r--r--drivers/soc/ti/ti_sci_inta_msi.c10
-rw-r--r--drivers/soundwire/bus.c9
-rw-r--r--drivers/spi/Kconfig4
-rw-r--r--drivers/spi/Makefile2
-rw-r--r--drivers/spi/atmel-quadspi.c26
-rw-r--r--drivers/spi/spi-amd-pci.c70
-rw-r--r--drivers/spi/spi-amd.c227
-rw-r--r--drivers/spi/spi-amd.h44
-rw-r--r--drivers/spi/spi-axi-spi-engine.c91
-rw-r--r--drivers/spi/spi-cadence-quadspi.c2
-rw-r--r--drivers/spi/spi-cavium-thunderx.c4
-rw-r--r--drivers/spi/spi-cs42l43.c4
-rw-r--r--drivers/spi/spi-dw-core.c2
-rw-r--r--drivers/spi/spi-fsl-dspi.c46
-rw-r--r--drivers/spi/spi-fsl-qspi.c81
-rw-r--r--drivers/spi/spi-gpio.c2
-rw-r--r--drivers/spi/spi-intel-pci.c8
-rw-r--r--drivers/spi/spi-intel-platform.c9
-rw-r--r--drivers/spi/spi-intel.c9
-rw-r--r--drivers/spi/spi-intel.h4
-rw-r--r--drivers/spi/spi-loopback-test.c10
-rw-r--r--drivers/spi/spi-meson-spicc.c241
-rw-r--r--drivers/spi/spi-nxp-fspi.c189
-rw-r--r--drivers/spi/spi-offload.c5
-rw-r--r--drivers/spi/spi-pci1xxxx.c24
-rw-r--r--drivers/spi/spi-qpic-snand.c166
-rw-r--r--drivers/spi/spi-rpc-if.c16
-rw-r--r--drivers/spi/spi-sh-msiof.c397
-rw-r--r--drivers/spi/spi-stm32-ospi.c2
-rw-r--r--drivers/spi/spi-sun4i.c5
-rw-r--r--drivers/spi/spi-tegra114.c6
-rw-r--r--drivers/spi/spi-tegra210-quad.c280
-rw-r--r--drivers/spi/spi-xcomm.c8
-rw-r--r--drivers/spi/spi.c19
-rw-r--r--drivers/staging/gpib/common/iblib.c2
-rw-r--r--drivers/thermal/Kconfig11
-rw-r--r--drivers/thermal/Makefile1
-rw-r--r--drivers/thermal/airoha_thermal.c489
-rw-r--r--drivers/thermal/amlogic_thermal.c16
-rw-r--r--drivers/thermal/broadcom/bcm2835_thermal.c2
-rw-r--r--drivers/thermal/intel/int340x_thermal/Makefile1
-rw-r--r--drivers/thermal/intel/int340x_thermal/platform_temperature_control.c243
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device.c18
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device.h3
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c5
-rw-r--r--drivers/thermal/intel/intel_hfi.c14
-rw-r--r--drivers/thermal/intel/intel_powerclamp.c4
-rw-r--r--drivers/thermal/intel/intel_tcc_cooling.c5
-rw-r--r--drivers/thermal/intel/therm_throt.c10
-rw-r--r--drivers/thermal/intel/x86_pkg_temp_thermal.c2
-rw-r--r--drivers/thermal/mediatek/lvts_thermal.c18
-rw-r--r--drivers/thermal/qcom/lmh.c3
-rw-r--r--drivers/thermal/qcom/tsens-v1.c62
-rw-r--r--drivers/thermal/qcom/tsens.c27
-rw-r--r--drivers/thermal/qcom/tsens.h4
-rw-r--r--drivers/thermal/tegra/soctherm.c2
-rw-r--r--drivers/ufs/host/ufs-qcom.c85
-rw-r--r--drivers/usb/atm/cxacru.c2
-rw-r--r--drivers/usb/gadget/function/f_midi2.c2
-rw-r--r--drivers/usb/misc/usbtest.c2
-rw-r--r--drivers/usb/storage/usb.c20
-rw-r--r--drivers/usb/typec/ucsi/ucsi_ccg.c2
-rw-r--r--drivers/video/fbdev/geode/display_gx.c1
-rw-r--r--drivers/video/fbdev/geode/gxfb_core.c3
-rw-r--r--drivers/video/fbdev/geode/lxfb_ops.c23
-rw-r--r--drivers/video/fbdev/geode/suspend_gx.c10
-rw-r--r--drivers/video/fbdev/geode/video_gx.c16
-rw-r--r--drivers/w1/slaves/w1_ds2406.c12
-rw-r--r--drivers/watchdog/diag288_wdt.c53
-rw-r--r--drivers/xen/balloon.c13
-rw-r--r--drivers/xen/swiotlb-xen.c1
-rw-r--r--fs/9p/vfs_addr.c1
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/Makefile1
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/afs/dir_silly.c6
-rw-r--r--fs/aio.c1
-rw-r--r--fs/anon_inodes.c45
-rw-r--r--fs/autofs/dev-ioctl.c3
-rw-r--r--fs/bcachefs/Kconfig8
-rw-r--r--fs/bcachefs/Makefile4
-rw-r--r--fs/bcachefs/alloc_background.c167
-rw-r--r--fs/bcachefs/alloc_background.h1
-rw-r--r--fs/bcachefs/alloc_foreground.c530
-rw-r--r--fs/bcachefs/alloc_foreground.h69
-rw-r--r--fs/bcachefs/alloc_types.h16
-rw-r--r--fs/bcachefs/async_objs.c132
-rw-r--r--fs/bcachefs/async_objs.h44
-rw-r--r--fs/bcachefs/async_objs_types.h25
-rw-r--r--fs/bcachefs/backpointers.c373
-rw-r--r--fs/bcachefs/backpointers.h14
-rw-r--r--fs/bcachefs/bcachefs.h225
-rw-r--r--fs/bcachefs/bcachefs_format.h30
-rw-r--r--fs/bcachefs/bkey.c47
-rw-r--r--fs/bcachefs/bkey.h4
-rw-r--r--fs/bcachefs/bkey_methods.c2
-rw-r--r--fs/bcachefs/bset.c64
-rw-r--r--fs/bcachefs/bset.h22
-rw-r--r--fs/bcachefs/btree_cache.c193
-rw-r--r--fs/bcachefs/btree_gc.c32
-rw-r--r--fs/bcachefs/btree_gc.h3
-rw-r--r--fs/bcachefs/btree_io.c346
-rw-r--r--fs/bcachefs/btree_io.h12
-rw-r--r--fs/bcachefs/btree_iter.c301
-rw-r--r--fs/bcachefs/btree_iter.h85
-rw-r--r--fs/bcachefs/btree_key_cache.c61
-rw-r--r--fs/bcachefs/btree_key_cache.h3
-rw-r--r--fs/bcachefs/btree_locking.c196
-rw-r--r--fs/bcachefs/btree_locking.h72
-rw-r--r--fs/bcachefs/btree_node_scan.c18
-rw-r--r--fs/bcachefs/btree_trans_commit.c79
-rw-r--r--fs/bcachefs/btree_types.h31
-rw-r--r--fs/bcachefs/btree_update.c74
-rw-r--r--fs/bcachefs/btree_update.h68
-rw-r--r--fs/bcachefs/btree_update_interior.c50
-rw-r--r--fs/bcachefs/btree_update_interior.h6
-rw-r--r--fs/bcachefs/btree_write_buffer.c20
-rw-r--r--fs/bcachefs/btree_write_buffer.h1
-rw-r--r--fs/bcachefs/buckets.c69
-rw-r--r--fs/bcachefs/chardev.c6
-rw-r--r--fs/bcachefs/checksum.c22
-rw-r--r--fs/bcachefs/checksum.h2
-rw-r--r--fs/bcachefs/clock.c2
-rw-r--r--fs/bcachefs/compress.c4
-rw-r--r--fs/bcachefs/darray.h13
-rw-r--r--fs/bcachefs/data_update.c207
-rw-r--r--fs/bcachefs/data_update.h15
-rw-r--r--fs/bcachefs/debug.c85
-rw-r--r--fs/bcachefs/debug.h20
-rw-r--r--fs/bcachefs/dirent.c46
-rw-r--r--fs/bcachefs/dirent.h2
-rw-r--r--fs/bcachefs/disk_accounting.c126
-rw-r--r--fs/bcachefs/disk_accounting.h28
-rw-r--r--fs/bcachefs/disk_groups.c123
-rw-r--r--fs/bcachefs/ec.c238
-rw-r--r--fs/bcachefs/ec.h9
-rw-r--r--fs/bcachefs/ec_types.h7
-rw-r--r--fs/bcachefs/enumerated_ref.c144
-rw-r--r--fs/bcachefs/enumerated_ref.h66
-rw-r--r--fs/bcachefs/enumerated_ref_types.h19
-rw-r--r--fs/bcachefs/errcode.h7
-rw-r--r--fs/bcachefs/error.c113
-rw-r--r--fs/bcachefs/error.h15
-rw-r--r--fs/bcachefs/extent_update.c67
-rw-r--r--fs/bcachefs/extent_update.h2
-rw-r--r--fs/bcachefs/extents.c136
-rw-r--r--fs/bcachefs/extents.h10
-rw-r--r--fs/bcachefs/extents_types.h1
-rw-r--r--fs/bcachefs/fast_list.c156
-rw-r--r--fs/bcachefs/fast_list.h41
-rw-r--r--fs/bcachefs/fs-io-direct.c7
-rw-r--r--fs/bcachefs/fs-io-pagecache.c18
-rw-r--r--fs/bcachefs/fs-io.c26
-rw-r--r--fs/bcachefs/fs-ioctl.c14
-rw-r--r--fs/bcachefs/fs.c60
-rw-r--r--fs/bcachefs/fsck.c407
-rw-r--r--fs/bcachefs/inode.c162
-rw-r--r--fs/bcachefs/inode.h39
-rw-r--r--fs/bcachefs/inode_format.h7
-rw-r--r--fs/bcachefs/io_read.c309
-rw-r--r--fs/bcachefs/io_read.h19
-rw-r--r--fs/bcachefs/io_write.c58
-rw-r--r--fs/bcachefs/io_write.h28
-rw-r--r--fs/bcachefs/io_write_types.h32
-rw-r--r--fs/bcachefs/journal.c86
-rw-r--r--fs/bcachefs/journal.h3
-rw-r--r--fs/bcachefs/journal_io.c171
-rw-r--r--fs/bcachefs/journal_reclaim.c56
-rw-r--r--fs/bcachefs/journal_seq_blacklist.c10
-rw-r--r--fs/bcachefs/journal_seq_blacklist.h1
-rw-r--r--fs/bcachefs/journal_types.h2
-rw-r--r--fs/bcachefs/migrate.c117
-rw-r--r--fs/bcachefs/migrate.h3
-rw-r--r--fs/bcachefs/move.c201
-rw-r--r--fs/bcachefs/move.h17
-rw-r--r--fs/bcachefs/move_types.h8
-rw-r--r--fs/bcachefs/movinggc.c217
-rw-r--r--fs/bcachefs/movinggc.h2
-rw-r--r--fs/bcachefs/namei.c260
-rw-r--r--fs/bcachefs/namei.h7
-rw-r--r--fs/bcachefs/nocow_locking.c4
-rw-r--r--fs/bcachefs/nocow_locking.h2
-rw-r--r--fs/bcachefs/opts.c170
-rw-r--r--fs/bcachefs/opts.h38
-rw-r--r--fs/bcachefs/rebalance.c226
-rw-r--r--fs/bcachefs/rebalance.h6
-rw-r--r--fs/bcachefs/rebalance_types.h5
-rw-r--r--fs/bcachefs/recovery.c134
-rw-r--r--fs/bcachefs/recovery.h3
-rw-r--r--fs/bcachefs/recovery_passes.c599
-rw-r--r--fs/bcachefs/recovery_passes.h26
-rw-r--r--fs/bcachefs/recovery_passes_format.h104
-rw-r--r--fs/bcachefs/recovery_passes_types.h93
-rw-r--r--fs/bcachefs/reflink.c5
-rw-r--r--fs/bcachefs/sb-counters_format.h2
-rw-r--r--fs/bcachefs/sb-downgrade.c9
-rw-r--r--fs/bcachefs/sb-errors_format.h8
-rw-r--r--fs/bcachefs/sb-members.c77
-rw-r--r--fs/bcachefs/sb-members.h62
-rw-r--r--fs/bcachefs/sb-members_format.h6
-rw-r--r--fs/bcachefs/sb-members_types.h1
-rw-r--r--fs/bcachefs/snapshot.c503
-rw-r--r--fs/bcachefs/snapshot.h35
-rw-r--r--fs/bcachefs/snapshot_format.h4
-rw-r--r--fs/bcachefs/snapshot_types.h57
-rw-r--r--fs/bcachefs/str_hash.c137
-rw-r--r--fs/bcachefs/str_hash.h10
-rw-r--r--fs/bcachefs/subvolume.c63
-rw-r--r--fs/bcachefs/subvolume.h5
-rw-r--r--fs/bcachefs/subvolume_types.h27
-rw-r--r--fs/bcachefs/super-io.c63
-rw-r--r--fs/bcachefs/super-io.h1
-rw-r--r--fs/bcachefs/super.c678
-rw-r--r--fs/bcachefs/super.h9
-rw-r--r--fs/bcachefs/sysfs.c108
-rw-r--r--fs/bcachefs/trace.h58
-rw-r--r--fs/bcachefs/util.c41
-rw-r--r--fs/bcachefs/util.h17
-rw-r--r--fs/bcachefs/xattr.c29
-rw-r--r--fs/bcachefs/xattr.h4
-rw-r--r--fs/bcachefs/xattr_format.h4
-rw-r--r--fs/bfs/inode.c30
-rw-r--r--fs/binfmt_elf.c147
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/btrfs/Kconfig32
-rw-r--r--fs/btrfs/async-thread.c3
-rw-r--r--fs/btrfs/backref.c12
-rw-r--r--fs/btrfs/backref.h4
-rw-r--r--fs/btrfs/bio.c55
-rw-r--r--fs/btrfs/bio.h3
-rw-r--r--fs/btrfs/block-group.c196
-rw-r--r--fs/btrfs/block-group.h11
-rw-r--r--fs/btrfs/block-rsv.c11
-rw-r--r--fs/btrfs/block-rsv.h1
-rw-r--r--fs/btrfs/btrfs_inode.h7
-rw-r--r--fs/btrfs/compression.c75
-rw-r--r--fs/btrfs/compression.h11
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/defrag.c143
-rw-r--r--fs/btrfs/delalloc-space.c51
-rw-r--r--fs/btrfs/delalloc-space.h4
-rw-r--r--fs/btrfs/delayed-inode.c73
-rw-r--r--fs/btrfs/delayed-ref.c9
-rw-r--r--fs/btrfs/delayed-ref.h1
-rw-r--r--fs/btrfs/dev-replace.c22
-rw-r--r--fs/btrfs/dev-replace.h2
-rw-r--r--fs/btrfs/direct-io.c75
-rw-r--r--fs/btrfs/discard.c19
-rw-r--r--fs/btrfs/disk-io.c199
-rw-r--r--fs/btrfs/disk-io.h5
-rw-r--r--fs/btrfs/extent-io-tree.c510
-rw-r--r--fs/btrfs/extent-io-tree.h165
-rw-r--r--fs/btrfs/extent-tree.c162
-rw-r--r--fs/btrfs/extent-tree.h4
-rw-r--r--fs/btrfs/extent_io.c958
-rw-r--r--fs/btrfs/extent_io.h9
-rw-r--r--fs/btrfs/extent_map.c175
-rw-r--r--fs/btrfs/extent_map.h47
-rw-r--r--fs/btrfs/fiemap.c9
-rw-r--r--fs/btrfs/file-item.c49
-rw-r--r--fs/btrfs/file-item.h6
-rw-r--r--fs/btrfs/file.c776
-rw-r--r--fs/btrfs/free-space-cache.c52
-rw-r--r--fs/btrfs/free-space-tree.c62
-rw-r--r--fs/btrfs/fs.h7
-rw-r--r--fs/btrfs/inode-item.c31
-rw-r--r--fs/btrfs/inode.c687
-rw-r--r--fs/btrfs/ioctl.c27
-rw-r--r--fs/btrfs/locking.c8
-rw-r--r--fs/btrfs/locking.h2
-rw-r--r--fs/btrfs/lzo.c5
-rw-r--r--fs/btrfs/messages.h83
-rw-r--r--fs/btrfs/ordered-data.c73
-rw-r--r--fs/btrfs/qgroup.c55
-rw-r--r--fs/btrfs/raid56.c219
-rw-r--r--fs/btrfs/reflink.c15
-rw-r--r--fs/btrfs/relocation.c112
-rw-r--r--fs/btrfs/scrub.c470
-rw-r--r--fs/btrfs/send.c88
-rw-r--r--fs/btrfs/space-info.c174
-rw-r--r--fs/btrfs/space-info.h12
-rw-r--r--fs/btrfs/subpage.c6
-rw-r--r--fs/btrfs/super.c28
-rw-r--r--fs/btrfs/sysfs.c27
-rw-r--r--fs/btrfs/tests/btrfs-tests.c32
-rw-r--r--fs/btrfs/tests/extent-io-tests.c61
-rw-r--r--fs/btrfs/tests/extent-map-tests.c102
-rw-r--r--fs/btrfs/tests/inode-tests.c107
-rw-r--r--fs/btrfs/transaction.c72
-rw-r--r--fs/btrfs/tree-checker.c22
-rw-r--r--fs/btrfs/tree-log.c66
-rw-r--r--fs/btrfs/volumes.c343
-rw-r--r--fs/btrfs/volumes.h11
-rw-r--r--fs/btrfs/zlib.c9
-rw-r--r--fs/btrfs/zoned.c28
-rw-r--r--fs/btrfs/zstd.c10
-rw-r--r--fs/buffer.c28
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/cachefiles/key.c1
-rw-r--r--fs/cachefiles/namei.c14
-rw-r--r--fs/configfs/dir.c4
-rw-r--r--fs/configfs/item.c2
-rw-r--r--fs/coredump.c461
-rw-r--r--fs/crypto/fscrypt_private.h75
-rw-r--r--fs/crypto/hkdf.c4
-rw-r--r--fs/crypto/inline_crypt.c44
-rw-r--r--fs/crypto/keyring.c132
-rw-r--r--fs/crypto/keysetup.c63
-rw-r--r--fs/crypto/keysetup_v1.c4
-rw-r--r--fs/dcache.c12
-rw-r--r--fs/debugfs/inode.c6
-rw-r--r--fs/ecryptfs/inode.c16
-rw-r--r--fs/efivarfs/internal.h1
-rw-r--r--fs/efivarfs/super.c206
-rw-r--r--fs/erofs/Kconfig14
-rw-r--r--fs/erofs/Makefile1
-rw-r--r--fs/erofs/compress.h10
-rw-r--r--fs/erofs/data.c5
-rw-r--r--fs/erofs/decompressor_crypto.c181
-rw-r--r--fs/erofs/decompressor_deflate.c20
-rw-r--r--fs/erofs/fileio.c5
-rw-r--r--fs/erofs/internal.h3
-rw-r--r--fs/erofs/super.c65
-rw-r--r--fs/erofs/sysfs.c67
-rw-r--r--fs/erofs/zdata.c79
-rw-r--r--fs/eventpoll.c7
-rw-r--r--fs/exec.c60
-rw-r--r--fs/exportfs/expfs.c6
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/f2fs/gc.c6
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/filesystems.c14
-rw-r--r--fs/fs_context.c6
-rw-r--r--fs/fs_parser.c55
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/readdir.c4
-rw-r--r--fs/gfs2/aops.c86
-rw-r--r--fs/gfs2/aops.h3
-rw-r--r--fs/gfs2/bmap.c9
-rw-r--r--fs/gfs2/glock.c3
-rw-r--r--fs/gfs2/glops.c9
-rw-r--r--fs/gfs2/incore.h9
-rw-r--r--fs/gfs2/inode.c99
-rw-r--r--fs/gfs2/inode.h1
-rw-r--r--fs/gfs2/lock_dlm.c11
-rw-r--r--fs/gfs2/log.c7
-rw-r--r--fs/gfs2/log.h11
-rw-r--r--fs/gfs2/lops.c17
-rw-r--r--fs/gfs2/lops.h2
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/meta_io.h4
-rw-r--r--fs/gfs2/ops_fstype.c65
-rw-r--r--fs/gfs2/recovery.c28
-rw-r--r--fs/gfs2/recovery.h2
-rw-r--r--fs/gfs2/super.c118
-rw-r--r--fs/gfs2/sys.c4
-rw-r--r--fs/gfs2/trans.c21
-rw-r--r--fs/gfs2/trans.h2
-rw-r--r--fs/gfs2/util.c2
-rw-r--r--fs/gfs2/xattr.c11
-rw-r--r--fs/gfs2/xattr.h2
-rw-r--r--fs/hfsplus/wrapper.c46
-rw-r--r--fs/internal.h7
-rw-r--r--fs/ioctl.c15
-rw-r--r--fs/iomap/buffered-io.c100
-rw-r--r--fs/iomap/trace.h27
-rw-r--r--fs/kernfs/mount.c17
-rw-r--r--fs/libfs.c13
-rw-r--r--fs/mpage.c13
-rw-r--r--fs/namei.c235
-rw-r--r--fs/namespace.c43
-rw-r--r--fs/nfs/client.c9
-rw-r--r--fs/nfs/dir.c15
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c6
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c6
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c6
-rw-r--r--fs/nfs/localio.c2
-rw-r--r--fs/nfs/netns.h6
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs4proc.c18
-rw-r--r--fs/nfs/nfs4trace.h34
-rw-r--r--fs/nfs/pnfs.c51
-rw-r--r--fs/nfs/pnfs.h4
-rw-r--r--fs/nfs/pnfs_nfs.c32
-rw-r--r--fs/nfs/symlink.c20
-rw-r--r--fs/nfs/unlink.c11
-rw-r--r--fs/nfsd/nfs3proc.c4
-rw-r--r--fs/nfsd/nfs3xdr.c4
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4recover.c13
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/nfsd/nfsproc.c5
-rw-r--r--fs/nfsd/vfs.c17
-rw-r--r--fs/omfs/inode.c176
-rw-r--r--fs/open.c14
-rw-r--r--fs/orangefs/inode.c9
-rw-r--r--fs/overlayfs/export.c6
-rw-r--r--fs/overlayfs/namei.c14
-rw-r--r--fs/overlayfs/overlayfs.h2
-rw-r--r--fs/overlayfs/readdir.c21
-rw-r--r--fs/pidfs.c165
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/meminfo.c3
-rw-r--r--fs/proc_namespace.c12
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/read_write.c4
-rw-r--r--fs/readdir.c47
-rw-r--r--fs/resctrl/Kconfig39
-rw-r--r--fs/resctrl/Makefile6
-rw-r--r--fs/resctrl/ctrlmondata.c661
-rw-r--r--fs/resctrl/internal.h426
-rw-r--r--fs/resctrl/monitor.c929
-rw-r--r--fs/resctrl/monitor_trace.h33
-rw-r--r--fs/resctrl/pseudo_lock.c1105
-rw-r--r--fs/resctrl/rdtgroup.c4353
-rw-r--r--fs/select.c4
-rw-r--r--fs/smb/client/cached_dir.c5
-rw-r--r--fs/smb/client/cifsfs.c3
-rw-r--r--fs/smb/client/file.c6
-rw-r--r--fs/smb/client/readdir.c10
-rw-r--r--fs/smb/client/smb2pdu.c2
-rw-r--r--fs/smb/server/oplock.c7
-rw-r--r--fs/smb/server/smb2pdu.c7
-rw-r--r--fs/smb/server/vfs.c16
-rw-r--r--fs/stat.c41
-rw-r--r--fs/super.c318
-rw-r--r--fs/tracefs/inode.c2
-rw-r--r--fs/ubifs/compress.c247
-rw-r--r--fs/udf/truncate.c2
-rw-r--r--fs/vboxsf/file.c47
-rw-r--r--fs/xattr.c24
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c5
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h6
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c4
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c343
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.h25
-rw-r--r--fs/xfs/scrub/fscounters.c4
-rw-r--r--fs/xfs/scrub/orphanage.c7
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/xfs_bio_io.c30
-rw-r--r--fs/xfs/xfs_bmap_item.c10
-rw-r--r--fs/xfs/xfs_bmap_item.h3
-rw-r--r--fs/xfs/xfs_buf.c122
-rw-r--r--fs/xfs/xfs_buf.h4
-rw-r--r--fs/xfs/xfs_buf_item.c19
-rw-r--r--fs/xfs/xfs_buf_item.h3
-rw-r--r--fs/xfs/xfs_discard.c17
-rw-r--r--fs/xfs/xfs_extfree_item.c10
-rw-r--r--fs/xfs/xfs_extfree_item.h3
-rw-r--r--fs/xfs/xfs_file.c87
-rw-r--r--fs/xfs/xfs_filestream.c15
-rw-r--r--fs/xfs/xfs_globals.c2
-rw-r--r--fs/xfs/xfs_inode.h14
-rw-r--r--fs/xfs/xfs_iomap.c190
-rw-r--r--fs/xfs/xfs_iomap.h1
-rw-r--r--fs/xfs/xfs_iops.c76
-rw-r--r--fs/xfs/xfs_iops.h3
-rw-r--r--fs/xfs/xfs_log.c32
-rw-r--r--fs/xfs/xfs_log_cil.c4
-rw-r--r--fs/xfs/xfs_log_priv.h13
-rw-r--r--fs/xfs/xfs_message.c16
-rw-r--r--fs/xfs/xfs_message.h4
-rw-r--r--fs/xfs/xfs_mount.c161
-rw-r--r--fs/xfs/xfs_mount.h27
-rw-r--r--fs/xfs/xfs_mru_cache.c15
-rw-r--r--fs/xfs/xfs_notify_failure.c6
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_refcount_item.c10
-rw-r--r--fs/xfs/xfs_refcount_item.h3
-rw-r--r--fs/xfs/xfs_reflink.c146
-rw-r--r--fs/xfs/xfs_reflink.h6
-rw-r--r--fs/xfs/xfs_rmap_item.c10
-rw-r--r--fs/xfs/xfs_rmap_item.h3
-rw-r--r--fs/xfs/xfs_super.c136
-rw-r--r--fs/xfs/xfs_sysctl.h2
-rw-r--r--fs/xfs/xfs_trace.h115
-rw-r--r--fs/xfs/xfs_trans_ail.c34
-rw-r--r--fs/xfs/xfs_zone_alloc.c109
-rw-r--r--fs/xfs/xfs_zone_gc.c5
-rw-r--r--fs/zonefs/super.c34
-rw-r--r--include/acpi/acbuffer.h2
-rw-r--r--include/acpi/acconfig.h2
-rw-r--r--include/acpi/acexcep.h2
-rw-r--r--include/acpi/acnames.h2
-rw-r--r--include/acpi/acoutput.h2
-rw-r--r--include/acpi/acpi.h2
-rw-r--r--include/acpi/acpiosxf.h2
-rw-r--r--include/acpi/acpixf.h4
-rw-r--r--include/acpi/acrestyp.h2
-rw-r--r--include/acpi/actbl.h8
-rw-r--r--include/acpi/actbl1.h47
-rw-r--r--include/acpi/actbl2.h335
-rw-r--r--include/acpi/actbl3.h4
-rw-r--r--include/acpi/actypes.h10
-rw-r--r--include/acpi/acuuid.h2
-rw-r--r--include/acpi/cppc_acpi.h30
-rw-r--r--include/acpi/platform/acenv.h2
-rw-r--r--include/acpi/platform/acenvex.h2
-rw-r--r--include/acpi/platform/acgcc.h10
-rw-r--r--include/acpi/platform/acgccex.h2
-rw-r--r--include/acpi/platform/aclinux.h2
-rw-r--r--include/acpi/platform/aclinuxex.h2
-rw-r--r--include/acpi/platform/aczephyr.h2
-rw-r--r--include/asm-generic/simd.h8
-rw-r--r--include/asm-generic/vmlinux.lds.h4
-rw-r--r--include/crypto/acompress.h109
-rw-r--r--include/crypto/algapi.h37
-rw-r--r--include/crypto/blake2b.h31
-rw-r--r--include/crypto/chacha.h89
-rw-r--r--include/crypto/ctr.h50
-rw-r--r--include/crypto/ghash.h4
-rw-r--r--include/crypto/hash.h176
-rw-r--r--include/crypto/internal/acompress.h128
-rw-r--r--include/crypto/internal/blake2b.h92
-rw-r--r--include/crypto/internal/blockhash.h52
-rw-r--r--include/crypto/internal/chacha.h43
-rw-r--r--include/crypto/internal/engine.h5
-rw-r--r--include/crypto/internal/geniv.h1
-rw-r--r--include/crypto/internal/hash.h117
-rw-r--r--include/crypto/internal/poly1305.h28
-rw-r--r--include/crypto/internal/scompress.h17
-rw-r--r--include/crypto/internal/sha2.h66
-rw-r--r--include/crypto/internal/simd.h10
-rw-r--r--include/crypto/internal/skcipher.h49
-rw-r--r--include/crypto/md5.h3
-rw-r--r--include/crypto/null.h3
-rw-r--r--include/crypto/poly1305.h67
-rw-r--r--include/crypto/polyval.h8
-rw-r--r--include/crypto/rng.h8
-rw-r--r--include/crypto/scatterwalk.h65
-rw-r--r--include/crypto/sha1.h9
-rw-r--r--include/crypto/sha1_base.h81
-rw-r--r--include/crypto/sha2.h62
-rw-r--r--include/crypto/sha256_base.h135
-rw-r--r--include/crypto/sha3.h20
-rw-r--r--include/crypto/sha512_base.h88
-rw-r--r--include/crypto/sig.h2
-rw-r--r--include/crypto/sm3.h4
-rw-r--r--include/crypto/sm3_base.h92
-rw-r--r--include/crypto/streebog.h5
-rw-r--r--include/drm/Makefile2
-rw-r--r--include/drm/drm_gpusvm.h47
-rw-r--r--include/drm/intel/pciids.h4
-rw-r--r--include/dt-bindings/power/mediatek,mt6893-power.h35
-rw-r--r--include/dt-bindings/power/rockchip,rk3562-power.h35
-rw-r--r--include/dt-bindings/sound/cs48l32.h20
-rw-r--r--include/hyperv/hvgdk_mini.h2
-rw-r--r--include/linux/acpi.h18
-rw-r--r--include/linux/alloc_tag.h12
-rw-r--r--include/linux/arch_topology.h8
-rw-r--r--include/linux/binfmts.h1
-rw-r--r--include/linux/bio.h26
-rw-r--r--include/linux/blk-mq.h10
-rw-r--r--include/linux/blk_types.h10
-rw-r--r--include/linux/blkdev.h24
-rw-r--r--include/linux/bpf-cgroup.h9
-rw-r--r--include/linux/cgroup-defs.h100
-rw-r--r--include/linux/cgroup.h50
-rw-r--r--include/linux/cleanup.h19
-rw-r--r--include/linux/codetag.h8
-rw-r--r--include/linux/configfs.h8
-rw-r--r--include/linux/coredump.h1
-rw-r--r--include/linux/cpu.h4
-rw-r--r--include/linux/cpufreq.h22
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/cpumask.h75
-rw-r--r--include/linux/crc16.h9
-rw-r--r--include/linux/crc32.h5
-rw-r--r--include/linux/crypto.h85
-rw-r--r--include/linux/dcache.h4
-rw-r--r--include/linux/device.h38
-rw-r--r--include/linux/device/devres.h41
-rw-r--r--include/linux/device_cgroup.h7
-rw-r--r--include/linux/dma-map-ops.h54
-rw-r--r--include/linux/dma-mapping.h85
-rw-r--r--include/linux/dmapool.h21
-rw-r--r--include/linux/energy_model.h2
-rw-r--r--include/linux/entry-common.h43
-rw-r--r--include/linux/execmem.h11
-rw-r--r--include/linux/file.h2
-rw-r--r--include/linux/find.h25
-rw-r--r--include/linux/fs.h48
-rw-r--r--include/linux/fs_parser.h7
-rw-r--r--include/linux/ftrace.h9
-rw-r--r--include/linux/futex.h36
-rw-r--r--include/linux/gpio/consumer.h11
-rw-r--r--include/linux/gpio/driver.h5
-rw-r--r--include/linux/highmem.h10
-rw-r--r--include/linux/hugetlb.h5
-rw-r--r--include/linux/hyperv.h7
-rw-r--r--include/linux/interrupt.h2
-rw-r--r--include/linux/io.h21
-rw-r--r--include/linux/io_uring/cmd.h9
-rw-r--r--include/linux/io_uring_types.h15
-rw-r--r--include/linux/iommu.h4
-rw-r--r--include/linux/irq.h28
-rw-r--r--include/linux/irqchip/irq-msi-lib.h (renamed from drivers/irqchip/irq-msi-lib.h)6
-rw-r--r--include/linux/irqdomain.h499
-rw-r--r--include/linux/jiffies.h2
-rw-r--r--include/linux/livepatch_sched.h14
-rw-r--r--include/linux/mfd/max77759.h165
-rw-r--r--include/linux/micrel_phy.h1
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/mm_types.h7
-rw-r--r--include/linux/mman.h2
-rw-r--r--include/linux/mmap_lock.h4
-rw-r--r--include/linux/mmc/card.h1
-rw-r--r--include/linux/mmc/slot-gpio.h4
-rw-r--r--include/linux/mmzone.h1
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/mount.h87
-rw-r--r--include/linux/mroute_base.h5
-rw-r--r--include/linux/msi.h23
-rw-r--r--include/linux/namei.h17
-rw-r--r--include/linux/net.h4
-rw-r--r--include/linux/nfs_fs_sb.h12
-rw-r--r--include/linux/nvme.h77
-rw-r--r--include/linux/page-flags.h7
-rw-r--r--include/linux/panic.h2
-rw-r--r--include/linux/part_stat.h2
-rw-r--r--include/linux/pci-p2pdma.h85
-rw-r--r--include/linux/pci.h4
-rw-r--r--include/linux/pci_ids.h2
-rw-r--r--include/linux/percpu-rwsem.h20
-rw-r--r--include/linux/percpu.h4
-rw-r--r--include/linux/perf_event.h298
-rw-r--r--include/linux/pgalloc_tag.h8
-rw-r--r--include/linux/pid.h2
-rw-r--r--include/linux/pidfs.h8
-rw-r--r--include/linux/pm_domain.h10
-rw-r--r--include/linux/pm_opp.h32
-rw-r--r--include/linux/pm_runtime.h4
-rw-r--r--include/linux/power_supply.h1
-rw-r--r--include/linux/psp-sev.h3
-rw-r--r--include/linux/pwm.h10
-rw-r--r--include/linux/ratelimit.h37
-rw-r--r--include/linux/ratelimit_types.h5
-rw-r--r--include/linux/rcuref.h22
-rw-r--r--include/linux/regmap.h3
-rw-r--r--include/linux/regulator/max8952.h2
-rw-r--r--include/linux/regulator/pca9450.h5
-rw-r--r--include/linux/resctrl.h38
-rw-r--r--include/linux/resctrl_types.h16
-rw-r--r--include/linux/restart_block.h2
-rw-r--r--include/linux/sched.h21
-rw-r--r--include/linux/sched/topology.h6
-rw-r--r--include/linux/shmem_fs.h7
-rw-r--r--include/linux/soundwire/sdw_intel.h2
-rw-r--r--include/linux/spi/sh_msiof.h125
-rw-r--r--include/linux/spi/spi.h78
-rw-r--r--include/linux/stat.h1
-rw-r--r--include/linux/string_helpers.h1
-rw-r--r--include/linux/suspend.h9
-rw-r--r--include/linux/tick.h7
-rw-r--r--include/linux/timer.h42
-rw-r--r--include/linux/topology.h9
-rw-r--r--include/linux/tpm.h21
-rw-r--r--include/linux/tpm_svsm.h149
-rw-r--r--include/linux/vmalloc.h9
-rw-r--r--include/linux/workqueue.h6
-rw-r--r--include/memory/renesas-rpc-if.h4
-rw-r--r--include/net/bluetooth/hci_core.h1
-rw-r--r--include/net/netdev_lock.h3
-rw-r--r--include/net/sch_generic.h15
-rw-r--r--include/net/xfrm.h1
-rw-r--r--include/scsi/scsi_host.h2
-rw-r--r--include/sound/core.h1
-rw-r--r--include/sound/cs-amp-lib.h2
-rw-r--r--include/sound/cs35l56.h29
-rw-r--r--include/sound/cs42l52.h29
-rw-r--r--include/sound/cs42l56.h45
-rw-r--r--include/sound/cs42l73.h19
-rw-r--r--include/sound/cs48l32.h47
-rw-r--r--include/sound/cs48l32_registers.h530
-rw-r--r--include/sound/dmaengine_pcm.h2
-rw-r--r--include/sound/gus.h22
-rw-r--r--include/sound/hdaudio.h6
-rw-r--r--include/sound/hdaudio_ext.h6
-rw-r--r--include/sound/jack.h6
-rw-r--r--include/sound/pcm.h4
-rw-r--r--include/sound/sdca_asoc.h42
-rw-r--r--include/sound/sdca_function.h71
-rw-r--r--include/sound/snd_wavefront.h4
-rw-r--r--include/sound/soc-acpi.h13
-rw-r--r--include/sound/soc-dapm.h4
-rw-r--r--include/sound/soc.h15
-rw-r--r--include/sound/soc_sdw_utils.h5
-rw-r--r--include/sound/sof.h1
-rw-r--r--include/sound/tas2781-comlib-i2c.h37
-rw-r--r--include/sound/tas2781.h81
-rw-r--r--include/sound/tpa6130a2-plat.h17
-rw-r--r--include/sound/ump_msg.h4
-rw-r--r--include/trace/events/block.h17
-rw-r--r--include/trace/events/btrfs.h89
-rw-r--r--include/trace/events/cgroup.h12
-rw-r--r--include/trace/events/erofs.h2
-rw-r--r--include/trace/events/exceptions.h (renamed from arch/x86/include/asm/trace/exceptions.h)27
-rw-r--r--include/trace/events/io_uring.h2
-rw-r--r--include/trace/events/sched.h34
-rw-r--r--include/uapi/linux/blktrace_api.h2
-rw-r--r--include/uapi/linux/fscrypt.h6
-rw-r--r--include/uapi/linux/futex.h9
-rw-r--r--include/uapi/linux/io_uring.h12
-rw-r--r--include/uapi/linux/perf_event.h657
-rw-r--r--include/uapi/linux/pidfd.h18
-rw-r--r--include/uapi/linux/prctl.h7
-rw-r--r--include/uapi/linux/stat.h8
-rw-r--r--include/uapi/linux/taskstats.h47
-rw-r--r--include/uapi/linux/ublk_cmd.h128
-rw-r--r--init/Kconfig36
-rw-r--r--init/main.c2
-rw-r--r--io_uring/Makefile6
-rw-r--r--io_uring/advise.c4
-rw-r--r--io_uring/cancel.c2
-rw-r--r--io_uring/cmd_net.c83
-rw-r--r--io_uring/epoll.c4
-rw-r--r--io_uring/eventfd.c66
-rw-r--r--io_uring/eventfd.h3
-rw-r--r--io_uring/fdinfo.c86
-rw-r--r--io_uring/fs.c10
-rw-r--r--io_uring/futex.c10
-rw-r--r--io_uring/io-wq.c65
-rw-r--r--io_uring/io-wq.h5
-rw-r--r--io_uring/io_uring.c288
-rw-r--r--io_uring/io_uring.h4
-rw-r--r--io_uring/kbuf.c148
-rw-r--r--io_uring/kbuf.h8
-rw-r--r--io_uring/memmap.c13
-rw-r--r--io_uring/memmap.h4
-rw-r--r--io_uring/msg_ring.c2
-rw-r--r--io_uring/net.c76
-rw-r--r--io_uring/nop.c2
-rw-r--r--io_uring/notif.c1
-rw-r--r--io_uring/opdef.c11
-rw-r--r--io_uring/openclose.c139
-rw-r--r--io_uring/openclose.h3
-rw-r--r--io_uring/poll.c4
-rw-r--r--io_uring/rsrc.c91
-rw-r--r--io_uring/rsrc.h28
-rw-r--r--io_uring/rw.c8
-rw-r--r--io_uring/rw.h2
-rw-r--r--io_uring/splice.c4
-rw-r--r--io_uring/statx.c2
-rw-r--r--io_uring/sync.c6
-rw-r--r--io_uring/tctx.c2
-rw-r--r--io_uring/timeout.c13
-rw-r--r--io_uring/timeout.h13
-rw-r--r--io_uring/truncate.c2
-rw-r--r--io_uring/uring_cmd.c96
-rw-r--r--io_uring/uring_cmd.h6
-rw-r--r--io_uring/waitid.c2
-rw-r--r--io_uring/xattr.c8
-rw-r--r--io_uring/zcrx.c372
-rw-r--r--io_uring/zcrx.h26
-rw-r--r--ipc/mqueue.c5
-rw-r--r--kernel/bpf/cgroup.c38
-rw-r--r--kernel/bpf/inode.c2
-rw-r--r--kernel/cgroup/cgroup-internal.h6
-rw-r--r--kernel/cgroup/cgroup.c148
-rw-r--r--kernel/cgroup/cpuset.c96
-rw-r--r--kernel/cgroup/rstat.c460
-rw-r--r--kernel/configs/xen.config3
-rw-r--r--kernel/cpu.c5
-rw-r--r--kernel/dma/direct.c44
-rw-r--r--kernel/dma/mapping.c18
-rw-r--r--kernel/entry/common.c49
-rw-r--r--kernel/events/core.c609
-rw-r--r--kernel/events/ring_buffer.c29
-rw-r--r--kernel/exit.c12
-rw-r--r--kernel/fork.c121
-rw-r--r--kernel/futex/core.c803
-rw-r--r--kernel/futex/futex.h74
-rw-r--r--kernel/futex/pi.c308
-rw-r--r--kernel/futex/requeue.c460
-rw-r--r--kernel/futex/waitwake.c207
-rw-r--r--kernel/irq/autoprobe.c26
-rw-r--r--kernel/irq/chip.c631
-rw-r--r--kernel/irq/cpuhotplug.c12
-rw-r--r--kernel/irq/debugfs.c7
-rw-r--r--kernel/irq/generic-chip.c47
-rw-r--r--kernel/irq/internals.h48
-rw-r--r--kernel/irq/irqdesc.c176
-rw-r--r--kernel/irq/irqdomain.c130
-rw-r--r--kernel/irq/manage.c1166
-rw-r--r--kernel/irq/msi.c192
-rw-r--r--kernel/irq/pm.c38
-rw-r--r--kernel/irq/proc.c67
-rw-r--r--kernel/irq/resend.c50
-rw-r--r--kernel/irq/spurious.c104
-rw-r--r--kernel/kcsan/kcsan_test.c2
-rw-r--r--kernel/livepatch/transition.c49
-rw-r--r--kernel/locking/lockdep.c76
-rw-r--r--kernel/locking/lockdep_internals.h1
-rw-r--r--kernel/locking/lockdep_proc.c2
-rw-r--r--kernel/locking/percpu-rwsem.c13
-rw-r--r--kernel/module/Kconfig5
-rw-r--r--kernel/module/main.c1
-rw-r--r--kernel/nsproxy.c30
-rw-r--r--kernel/padata.c3
-rw-r--r--kernel/panic.c30
-rw-r--r--kernel/pid.c6
-rw-r--r--kernel/power/energy_model.c72
-rw-r--r--kernel/power/hibernate.c39
-rw-r--r--kernel/power/main.c39
-rw-r--r--kernel/power/power.h8
-rw-r--r--kernel/power/process.c8
-rw-r--r--kernel/power/suspend.c7
-rw-r--r--kernel/power/swap.c103
-rw-r--r--kernel/power/wakelock.c3
-rw-r--r--kernel/rcu/rcu.h18
-rw-r--r--kernel/rcu/rcuscale.c2
-rw-r--r--kernel/rcu/rcutorture.c208
-rw-r--r--kernel/rcu/srcutree.c2
-rw-r--r--kernel/rcu/tree.c84
-rw-r--r--kernel/rcu/tree.h3
-rw-r--r--kernel/rcu/tree_exp.h2
-rw-r--r--kernel/rcu/tree_nocb.h10
-rw-r--r--kernel/rcu/tree_plugin.h2
-rw-r--r--kernel/rcu/tree_stall.h4
-rw-r--r--kernel/sched/core.c148
-rw-r--r--kernel/sched/cpufreq_schedutil.c9
-rw-r--r--kernel/sched/debug.c4
-rw-r--r--kernel/sched/ext.c1875
-rw-r--r--kernel/sched/ext.h13
-rw-r--r--kernel/sched/ext_idle.c307
-rw-r--r--kernel/sched/ext_idle.h3
-rw-r--r--kernel/sched/fair.c33
-rw-r--r--kernel/sched/isolation.c2
-rw-r--r--kernel/sched/rt.c105
-rw-r--r--kernel/sched/sched.h40
-rw-r--r--kernel/sched/syscalls.c5
-rw-r--r--kernel/sched/topology.c154
-rw-r--r--kernel/signal.c11
-rw-r--r--kernel/sys.c4
-rw-r--r--kernel/sysctl-test.c49
-rw-r--r--kernel/sysctl.c108
-rw-r--r--kernel/time/alarmtimer.c84
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/jiffies.c5
-rw-r--r--kernel/time/posix-timers.c23
-rw-r--r--kernel/time/sleep_timeout.c2
-rw-r--r--kernel/time/timer.c78
-rw-r--r--kernel/trace/blktrace.c11
-rw-r--r--kernel/trace/fprobe.c3
-rw-r--r--kernel/trace/ring_buffer.c8
-rw-r--r--kernel/trace/trace.c36
-rw-r--r--kernel/trace/trace_dynevent.c16
-rw-r--r--kernel/trace/trace_dynevent.h1
-rw-r--r--kernel/trace/trace_eprobe.c3
-rw-r--r--kernel/trace/trace_events_trigger.c2
-rw-r--r--kernel/trace/trace_functions.c6
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--kernel/trace/trace_probe.c9
-rw-r--r--kernel/trace/trace_stack.c22
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--kernel/workqueue.c17
-rw-r--r--lib/alloc_tag.c87
-rw-r--r--lib/codetag.c5
-rw-r--r--lib/crc16.c9
-rw-r--r--lib/crc32.c4
-rw-r--r--lib/crypto/Kconfig89
-rw-r--r--lib/crypto/Makefile24
-rw-r--r--lib/crypto/aescfb.c2
-rw-r--r--lib/crypto/aesgcm.c2
-rw-r--r--lib/crypto/blake2s.c2
-rw-r--r--lib/crypto/chacha.c40
-rw-r--r--lib/crypto/chacha20poly1305-selftest.c8
-rw-r--r--lib/crypto/chacha20poly1305.c55
-rw-r--r--lib/crypto/curve25519.c2
-rw-r--r--lib/crypto/libchacha.c2
-rw-r--r--lib/crypto/poly1305-generic.c24
-rw-r--r--lib/crypto/poly1305.c75
-rw-r--r--lib/crypto/sha256-generic.c137
-rw-r--r--lib/crypto/sha256.c150
-rw-r--r--lib/crypto/sm3.c (renamed from crypto/sm3.c)79
-rw-r--r--lib/find_bit.c11
-rw-r--r--lib/kunit/executor.c2
-rw-r--r--lib/kunit/static_stub.c2
-rw-r--r--lib/ratelimit.c75
-rw-r--r--lib/string_helpers.c39
-rw-r--r--lib/test_sysctl.c131
-rw-r--r--lib/vsprintf.c10
-rw-r--r--mm/cma.c5
-rw-r--r--mm/dmapool.c15
-rw-r--r--mm/execmem.c40
-rw-r--r--mm/hugetlb.c52
-rw-r--r--mm/internal.h1
-rw-r--r--mm/kasan/shadow.c92
-rw-r--r--mm/memcontrol.c10
-rw-r--r--mm/memory.c2
-rw-r--r--mm/migrate.c60
-rw-r--r--mm/mm_init.c1
-rw-r--r--mm/mremap.c3
-rw-r--r--mm/nommu.c18
-rw-r--r--mm/page-writeback.c28
-rw-r--r--mm/page_alloc.c96
-rw-r--r--mm/page_io.c3
-rw-r--r--mm/readahead.c20
-rw-r--r--mm/shmem.c33
-rw-r--r--mm/show_mem.c4
-rw-r--r--mm/swap.h4
-rw-r--r--mm/swap_state.c1
-rw-r--r--mm/swapfile.c11
-rw-r--r--mm/truncate.c20
-rw-r--r--mm/userfaultfd.c12
-rw-r--r--mm/vma.c1
-rw-r--r--mm/vmalloc.c24
-rw-r--r--mm/vmscan.c29
-rw-r--r--mm/zsmalloc.c8
-rw-r--r--net/batman-adv/hard-interface.c31
-rw-r--r--net/bluetooth/hci_conn.c24
-rw-r--r--net/bluetooth/hci_event.c73
-rw-r--r--net/bluetooth/l2cap_core.c15
-rw-r--r--net/bluetooth/mgmt.c9
-rw-r--r--net/bridge/br_multicast.c16
-rw-r--r--net/bridge/br_nf_core.c7
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/can/bcm.c79
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/devmem.c7
-rw-r--r--net/core/devmem.h2
-rw-r--r--net/core/netdev-genl.c11
-rw-r--r--net/core/sock.c12
-rw-r--r--net/dsa/tag_ksz.c19
-rw-r--r--net/ipv4/esp4.c53
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/xfrm4_input.c18
-rw-r--r--net/ipv6/esp6.c53
-rw-r--r--net/ipv6/ip6mr.c12
-rw-r--r--net/ipv6/xfrm6_input.c18
-rw-r--r--net/llc/af_llc.c8
-rw-r--r--net/mac80211/main.c6
-rw-r--r--net/mctp/device.c17
-rw-r--r--net/mctp/route.c4
-rw-r--r--net/sched/sch_codel.c2
-rw-r--r--net/sched/sch_fq.c2
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_fq_pie.c2
-rw-r--r--net/sched/sch_hfsc.c6
-rw-r--r--net/sched/sch_hhf.c2
-rw-r--r--net/sched/sch_pie.c2
-rw-r--r--net/sunrpc/rpc_pipe.c12
-rw-r--r--net/tipc/crypto.c5
-rw-r--r--net/tls/tls_strp.c3
-rw-r--r--net/unix/af_unix.c137
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--net/xfrm/espintcp.c4
-rw-r--r--net/xfrm/xfrm_ipcomp.c3
-rw-r--r--net/xfrm/xfrm_policy.c3
-rw-r--r--net/xfrm/xfrm_state.c6
-rw-r--r--rust/bindings/bindings_helper.h1
-rw-r--r--rust/helpers/mutex.c5
-rw-r--r--rust/kernel/configfs.rs1049
-rw-r--r--rust/kernel/lib.rs2
-rw-r--r--rust/kernel/sync/rcu.rs5
-rw-r--r--samples/ftrace/sample-trace-array.c2
-rw-r--r--samples/rust/Kconfig11
-rw-r--r--samples/rust/Makefile1
-rw-r--r--samples/rust/rust_configfs.rs192
-rw-r--r--scripts/Makefile.build2
-rw-r--r--scripts/Makefile.compiler4
-rw-r--r--scripts/Makefile.extrawarn42
-rw-r--r--scripts/Makefile.vmlinux4
-rw-r--r--scripts/Makefile.vmlinux_o4
-rwxr-xr-xscripts/checkpatch.pl2
-rwxr-xr-xscripts/find-unused-docs.sh2
-rw-r--r--scripts/gdb/linux/pgtable.py4
l---------[-rwxr-xr-x]scripts/kernel-doc2440
-rwxr-xr-xscripts/kernel-doc.pl2439
-rwxr-xr-xscripts/kernel-doc.py315
-rw-r--r--scripts/lib/kdoc/kdoc_files.py291
-rw-r--r--scripts/lib/kdoc/kdoc_output.py793
-rw-r--r--scripts/lib/kdoc/kdoc_parser.py1745
-rw-r--r--scripts/lib/kdoc/kdoc_re.py273
-rw-r--r--scripts/package/kernel.spec1
-rwxr-xr-xscripts/package/mkdebian2
-rw-r--r--security/apparmor/apparmorfs.c4
-rw-r--r--security/inode.c2
-rw-r--r--security/landlock/audit.c4
-rw-r--r--security/landlock/id.c33
-rw-r--r--security/landlock/syscalls.c3
-rw-r--r--security/selinux/selinuxfs.c4
-rw-r--r--sound/atmel/ac97c.c9
-rw-r--r--sound/core/control.c14
-rw-r--r--sound/core/device.c23
-rw-r--r--sound/core/jack.c19
-rw-r--r--sound/core/oss/pcm_oss.c3
-rw-r--r--sound/core/pcm_dmaengine.c21
-rw-r--r--sound/core/pcm_memory.c2
-rw-r--r--sound/core/pcm_misc.c30
-rw-r--r--sound/core/pcm_native.c11
-rw-r--r--sound/core/seq/seq_clientmgr.c52
-rw-r--r--sound/core/seq/seq_queue.c16
-rw-r--r--sound/core/seq/seq_queue.h1
-rw-r--r--sound/core/seq/seq_ump_convert.c18
-rw-r--r--sound/core/seq/seq_ump_convert.h1
-rw-r--r--sound/core/seq_device.c2
-rw-r--r--sound/hda/ext/hdac_ext_controller.c19
-rw-r--r--sound/hda/hda_bus_type.c6
-rw-r--r--sound/hda/hdac_device.c2
-rw-r--r--sound/hda/hdac_stream.c19
-rw-r--r--sound/hda/intel-dsp-config.c10
-rw-r--r--sound/hda/intel-nhlt.c19
-rw-r--r--sound/hda/intel-sdw-acpi.c2
-rw-r--r--sound/i2c/other/pt2258.c8
-rw-r--r--sound/isa/gus/gus_io.c229
-rw-r--r--sound/isa/gus/gus_main.c14
-rw-r--r--sound/isa/gus/gus_reset.c16
-rw-r--r--sound/isa/msnd/Makefile2
-rw-r--r--sound/isa/msnd/msnd.h4
-rw-r--r--sound/isa/msnd/msnd_midi.c163
-rw-r--r--sound/isa/msnd/msnd_pinnacle.c5
-rw-r--r--sound/isa/sb/emu8000.c3
-rw-r--r--sound/isa/sb/jazz16.c5
-rw-r--r--sound/isa/sb/sb16.c5
-rw-r--r--sound/isa/sb/sb8.c5
-rw-r--r--sound/isa/sb/sb8_midi.c3
-rw-r--r--sound/isa/sb/sb_mixer.c5
-rw-r--r--sound/mips/snd-n64.c9
-rw-r--r--sound/pci/ad1889.c7
-rw-r--r--sound/pci/ali5451/ali5451.c6
-rw-r--r--sound/pci/als300.c2
-rw-r--r--sound/pci/als4000.c2
-rw-r--r--sound/pci/asihpi/asihpi.c9
-rw-r--r--sound/pci/atiixp.c7
-rw-r--r--sound/pci/atiixp_modem.c7
-rw-r--r--sound/pci/au88x0/au88x0.c7
-rw-r--r--sound/pci/au88x0/au88x0_a3d.c10
-rw-r--r--sound/pci/aw2/aw2-alsa.c7
-rw-r--r--sound/pci/azt3328.c2
-rw-r--r--sound/pci/bt87x.c7
-rw-r--r--sound/pci/ca0106/ca0106_main.c2
-rw-r--r--sound/pci/cmipci.c2
-rw-r--r--sound/pci/cs4281.c13
-rw-r--r--sound/pci/cs46xx/cs46xx_lib.c2
-rw-r--r--sound/pci/cs46xx/dsp_spos_scb_lib.c7
-rw-r--r--sound/pci/cs5530.c7
-rw-r--r--sound/pci/cs5535audio/cs5535audio.c2
-rw-r--r--sound/pci/ctxfi/cttimer.c2
-rw-r--r--sound/pci/echoaudio/echoaudio.c2
-rw-r--r--sound/pci/emu10k1/emu10k1_main.c2
-rw-r--r--sound/pci/emu10k1/emu10k1x.c2
-rw-r--r--sound/pci/ens1370.c2
-rw-r--r--sound/pci/es1938.c2
-rw-r--r--sound/pci/es1968.c8
-rw-r--r--sound/pci/fm801.c2
-rw-r--r--sound/pci/hda/Kconfig33
-rw-r--r--sound/pci/hda/Makefile8
-rw-r--r--sound/pci/hda/cirrus_scodec_test.c117
-rw-r--r--sound/pci/hda/cs35l41_hda.c51
-rw-r--r--sound/pci/hda/cs35l41_hda_property.c6
-rw-r--r--sound/pci/hda/cs35l56_hda.c38
-rw-r--r--sound/pci/hda/cs35l56_hda_i2c.c3
-rw-r--r--sound/pci/hda/cs35l56_hda_spi.c3
-rw-r--r--sound/pci/hda/hda_acpi.c325
-rw-r--r--sound/pci/hda/hda_bind.c4
-rw-r--r--sound/pci/hda/hda_codec.c31
-rw-r--r--sound/pci/hda/hda_cs_dsp_ctl.c249
-rw-r--r--sound/pci/hda/hda_cs_dsp_ctl.h39
-rw-r--r--sound/pci/hda/hda_intel.c32
-rw-r--r--sound/pci/hda/hda_local.h2
-rw-r--r--sound/pci/hda/hda_tegra.c55
-rw-r--r--sound/pci/hda/patch_hdmi.c13
-rw-r--r--sound/pci/hda/patch_realtek.c19
-rw-r--r--sound/pci/hda/tas2781-spi.h157
-rw-r--r--sound/pci/hda/tas2781_hda.c377
-rw-r--r--sound/pci/hda/tas2781_hda.h90
-rw-r--r--sound/pci/hda/tas2781_hda_i2c.c498
-rw-r--r--sound/pci/hda/tas2781_hda_spi.c890
-rw-r--r--sound/pci/hda/tas2781_spi_fwlib.c2006
-rw-r--r--sound/pci/ice1712/ice1712.c2
-rw-r--r--sound/pci/ice1712/ice1724.c2
-rw-r--r--sound/pci/intel8x0.c2
-rw-r--r--sound/pci/intel8x0m.c2
-rw-r--r--sound/pci/korg1212/korg1212.c75
-rw-r--r--sound/pci/lola/lola.c16
-rw-r--r--sound/pci/lx6464es/lx6464es.c2
-rw-r--r--sound/pci/maestro3.c2
-rw-r--r--sound/pci/nm256/nm256.c2
-rw-r--r--sound/pci/oxygen/oxygen_lib.c2
-rw-r--r--sound/pci/riptide/riptide.c2
-rw-r--r--sound/pci/rme32.c2
-rw-r--r--sound/pci/rme96.c2
-rw-r--r--sound/pci/rme9652/hdsp.c2
-rw-r--r--sound/pci/rme9652/hdspm.c7
-rw-r--r--sound/pci/rme9652/rme9652.c2
-rw-r--r--sound/pci/sis7019.c2
-rw-r--r--sound/pci/sonicvibes.c2
-rw-r--r--sound/pci/trident/trident_main.c2
-rw-r--r--sound/pci/via82xx.c2
-rw-r--r--sound/pci/via82xx_modem.c2
-rw-r--r--sound/pci/vx222/vx222.c2
-rw-r--r--sound/pci/ymfpci/ymfpci_main.c2
-rw-r--r--sound/sh/Kconfig2
-rw-r--r--sound/sh/aica.c8
-rw-r--r--sound/soc/amd/acp-da7219-max98357a.c8
-rw-r--r--sound/soc/amd/acp-es8336.c4
-rw-r--r--sound/soc/amd/acp-rt5645.c6
-rw-r--r--sound/soc/amd/acp/acp-rembrandt.c2
-rw-r--r--sound/soc/amd/acp/acp-sdw-legacy-mach.c4
-rw-r--r--sound/soc/amd/acp/acp-sdw-sof-mach.c10
-rw-r--r--sound/soc/amd/acp/acp-sof-mach.c2
-rw-r--r--sound/soc/amd/acp/acp63.c2
-rw-r--r--sound/soc/amd/acp/acp70.c2
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c9
-rw-r--r--sound/soc/apple/mca.c23
-rw-r--r--sound/soc/codecs/Kconfig47
-rw-r--r--sound/soc/codecs/Makefile14
-rw-r--r--sound/soc/codecs/ac97.c10
-rw-r--r--sound/soc/codecs/adau7118.c6
-rw-r--r--sound/soc/codecs/ak4458.c10
-rw-r--r--sound/soc/codecs/ak5386.c28
-rw-r--r--sound/soc/codecs/aw88166.c7
-rw-r--r--sound/soc/codecs/aw88395/aw88395_device.c7
-rw-r--r--sound/soc/codecs/aw88399.c7
-rw-r--r--sound/soc/codecs/cs-amp-lib-test.c70
-rw-r--r--sound/soc/codecs/cs-amp-lib.c3
-rw-r--r--sound/soc/codecs/cs35l56-i2c.c23
-rw-r--r--sound/soc/codecs/cs35l56-sdw.c91
-rw-r--r--sound/soc/codecs/cs35l56-shared.c257
-rw-r--r--sound/soc/codecs/cs35l56-spi.c3
-rw-r--r--sound/soc/codecs/cs35l56.c48
-rw-r--r--sound/soc/codecs/cs35l56.h1
-rw-r--r--sound/soc/codecs/cs42l52.c112
-rw-r--r--sound/soc/codecs/cs42l56.c90
-rw-r--r--sound/soc/codecs/cs42l73.c70
-rw-r--r--sound/soc/codecs/cs48l32-tables.c540
-rw-r--r--sound/soc/codecs/cs48l32.c4073
-rw-r--r--sound/soc/codecs/cs48l32.h403
-rw-r--r--sound/soc/codecs/es8375.c793
-rw-r--r--sound/soc/codecs/es8375.h123
-rw-r--r--sound/soc/codecs/es8389.c962
-rw-r--r--sound/soc/codecs/es8389.h140
-rw-r--r--sound/soc/codecs/hdmi-codec.c1
-rw-r--r--sound/soc/codecs/idt821034.c17
-rw-r--r--sound/soc/codecs/pcm6240.c3
-rw-r--r--sound/soc/codecs/peb2466.c15
-rw-r--r--sound/soc/codecs/rt5665.c96
-rw-r--r--sound/soc/codecs/rt5665.h3
-rw-r--r--sound/soc/codecs/rt5668.c43
-rw-r--r--sound/soc/codecs/rt5668.h3
-rw-r--r--sound/soc/codecs/rt5677-spi.c4
-rw-r--r--sound/soc/codecs/rt5677.c7
-rw-r--r--sound/soc/codecs/rt712-sdca-dmic.c8
-rw-r--r--sound/soc/codecs/rt712-sdca.c8
-rw-r--r--sound/soc/codecs/rt722-sdca-sdw.c20
-rw-r--r--sound/soc/codecs/rt722-sdca-sdw.h1
-rw-r--r--sound/soc/codecs/rt722-sdca.c322
-rw-r--r--sound/soc/codecs/rt722-sdca.h6
-rw-r--r--sound/soc/codecs/rt9123.c503
-rw-r--r--sound/soc/codecs/rt9123p.c171
-rw-r--r--sound/soc/codecs/sma1307.c1
-rw-r--r--sound/soc/codecs/sta32x.c6
-rw-r--r--sound/soc/codecs/tas2764-quirks.h180
-rw-r--r--sound/soc/codecs/tas2764.c139
-rw-r--r--sound/soc/codecs/tas2764.h3
-rw-r--r--sound/soc/codecs/tas2770.c151
-rw-r--r--sound/soc/codecs/tas2770.h6
-rw-r--r--sound/soc/codecs/tas2781-comlib-i2c.c369
-rw-r--r--sound/soc/codecs/tas2781-comlib.c397
-rw-r--r--sound/soc/codecs/tas2781-fmwlib.c25
-rw-r--r--sound/soc/codecs/tas2781-i2c.c158
-rw-r--r--sound/soc/codecs/tlv320adc3xxx.c8
-rw-r--r--sound/soc/codecs/tlv320aic3x.c6
-rw-r--r--sound/soc/codecs/tpa6130a2.c54
-rw-r--r--sound/soc/codecs/twl4030.c79
-rw-r--r--sound/soc/codecs/wcd-mbhc-v2.c2
-rw-r--r--sound/soc/codecs/wcd-mbhc-v2.h2
-rw-r--r--sound/soc/codecs/wcd9335.c15
-rw-r--r--sound/soc/codecs/wcd937x.c2
-rw-r--r--sound/soc/codecs/wcd938x-sdw.c2
-rw-r--r--sound/soc/codecs/wcd938x.c69
-rw-r--r--sound/soc/codecs/wcd939x.c18
-rw-r--r--sound/soc/codecs/wm5100.c10
-rw-r--r--sound/soc/codecs/wm8903.c12
-rw-r--r--sound/soc/codecs/wm8962.c11
-rw-r--r--sound/soc/codecs/wm8996.c10
-rw-r--r--sound/soc/codecs/wm_adsp.c28
-rw-r--r--sound/soc/codecs/wsa883x.c2
-rw-r--r--sound/soc/codecs/wsa884x.c2
-rw-r--r--sound/soc/codecs/zl38060.c12
-rw-r--r--sound/soc/fsl/fsl_rpmsg.c28
-rw-r--r--sound/soc/fsl/fsl_rpmsg.h2
-rw-r--r--sound/soc/fsl/fsl_sai.c90
-rw-r--r--sound/soc/fsl/fsl_sai.h6
-rw-r--r--sound/soc/fsl/fsl_xcvr.c2
-rw-r--r--sound/soc/fsl/imx-card.c7
-rw-r--r--sound/soc/fsl/imx-pcm-rpmsg.c21
-rw-r--r--sound/soc/generic/simple-card-utils.c23
-rw-r--r--sound/soc/generic/test-component.c11
-rw-r--r--sound/soc/intel/atom/sst/sst.h6
-rw-r--r--sound/soc/intel/atom/sst/sst_drv_interface.c24
-rw-r--r--sound/soc/intel/atom/sst/sst_pci.c59
-rw-r--r--sound/soc/intel/atom/sst/sst_pvt.c33
-rw-r--r--sound/soc/intel/avs/Makefile6
-rw-r--r--sound/soc/intel/avs/avs.h13
-rw-r--r--sound/soc/intel/avs/board_selection.c181
-rw-r--r--sound/soc/intel/avs/boards/Kconfig8
-rw-r--r--sound/soc/intel/avs/boards/da7219.c11
-rw-r--r--sound/soc/intel/avs/boards/dmic.c12
-rw-r--r--sound/soc/intel/avs/boards/es8336.c11
-rw-r--r--sound/soc/intel/avs/boards/hdaudio.c25
-rw-r--r--sound/soc/intel/avs/boards/i2s_test.c15
-rw-r--r--sound/soc/intel/avs/boards/max98357a.c11
-rw-r--r--sound/soc/intel/avs/boards/max98373.c11
-rw-r--r--sound/soc/intel/avs/boards/max98927.c11
-rw-r--r--sound/soc/intel/avs/boards/nau8825.c11
-rw-r--r--sound/soc/intel/avs/boards/pcm3168a.c16
-rw-r--r--sound/soc/intel/avs/boards/probe.c5
-rw-r--r--sound/soc/intel/avs/boards/rt274.c11
-rw-r--r--sound/soc/intel/avs/boards/rt286.c11
-rw-r--r--sound/soc/intel/avs/boards/rt298.c11
-rw-r--r--sound/soc/intel/avs/boards/rt5514.c11
-rw-r--r--sound/soc/intel/avs/boards/rt5663.c11
-rw-r--r--sound/soc/intel/avs/boards/rt5682.c11
-rw-r--r--sound/soc/intel/avs/boards/ssm4567.c11
-rw-r--r--sound/soc/intel/avs/core.c68
-rw-r--r--sound/soc/intel/avs/dsp.c2
-rw-r--r--sound/soc/intel/avs/lnl.c27
-rw-r--r--sound/soc/intel/avs/loader.c11
-rw-r--r--sound/soc/intel/avs/messages.h34
-rw-r--r--sound/soc/intel/avs/mtl.c200
-rw-r--r--sound/soc/intel/avs/path.c59
-rw-r--r--sound/soc/intel/avs/pcm.c156
-rw-r--r--sound/soc/intel/avs/probes.c2
-rw-r--r--sound/soc/intel/avs/ptl.c98
-rw-r--r--sound/soc/intel/avs/registers.h40
-rw-r--r--sound/soc/intel/avs/tgl.c2
-rw-r--r--sound/soc/intel/avs/topology.c4
-rw-r--r--sound/soc/intel/avs/topology.h2
-rw-r--r--sound/soc/intel/avs/utils.h16
-rw-r--r--sound/soc/intel/boards/Kconfig8
-rw-r--r--sound/soc/intel/boards/sof_sdw.c48
-rw-r--r--sound/soc/intel/common/Makefile2
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-arl-match.c9
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-lnl-match.c15
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-mtl-match.c49
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-ptl-match.c124
-rw-r--r--sound/soc/intel/common/sof-function-topology-lib.c135
-rw-r--r--sound/soc/intel/common/sof-function-topology-lib.h15
-rw-r--r--sound/soc/loongson/Kconfig10
-rw-r--r--sound/soc/loongson/Makefile2
-rw-r--r--sound/soc/loongson/loongson1_ac97.c398
-rw-r--r--sound/soc/loongson/loongson_i2s_pci.c13
-rw-r--r--sound/soc/mediatek/Kconfig1
-rw-r--r--sound/soc/mediatek/mt8183/mt8183-afe-pcm.c571
-rw-r--r--sound/soc/mediatek/mt8188/mt8188-mt6359.c6
-rw-r--r--sound/soc/mediatek/mt8195/mt8195-mt6359.c45
-rw-r--r--sound/soc/meson/meson-card-utils.c2
-rw-r--r--sound/soc/qcom/qdsp6/q6apm-lpass-dais.c2
-rw-r--r--sound/soc/qcom/sc8280xp.c2
-rw-r--r--sound/soc/qcom/sdm845.c4
-rw-r--r--sound/soc/renesas/Kconfig7
-rw-r--r--sound/soc/renesas/rcar/Makefile3
-rw-r--r--sound/soc/renesas/rcar/adg.c32
-rw-r--r--sound/soc/renesas/rcar/core.c9
-rw-r--r--sound/soc/renesas/rcar/msiof.c566
-rw-r--r--sound/soc/rockchip/Kconfig10
-rw-r--r--sound/soc/rockchip/Makefile2
-rw-r--r--sound/soc/rockchip/rockchip_sai.c1555
-rw-r--r--sound/soc/rockchip/rockchip_sai.h251
-rw-r--r--sound/soc/sdca/Makefile2
-rw-r--r--sound/soc/sdca/sdca_asoc.c1311
-rw-r--r--sound/soc/sdca/sdca_functions.c10
-rw-r--r--sound/soc/sdca/sdca_regmap.c3
-rw-r--r--sound/soc/sdw_utils/soc_sdw_rt_amp.c2
-rw-r--r--sound/soc/sdw_utils/soc_sdw_utils.c202
-rw-r--r--sound/soc/soc-ac97.c15
-rw-r--r--sound/soc/soc-core.c130
-rw-r--r--sound/soc/soc-dapm.c84
-rw-r--r--sound/soc/soc-devres.c7
-rw-r--r--sound/soc/soc-utils.c13
-rw-r--r--sound/soc/sof/amd/Kconfig7
-rw-r--r--sound/soc/sof/amd/acp-dsp-offset.h10
-rw-r--r--sound/soc/sof/amd/acp.c135
-rw-r--r--sound/soc/sof/amd/acp.h7
-rw-r--r--sound/soc/sof/amd/pci-acp70.c10
-rw-r--r--sound/soc/sof/core.c10
-rw-r--r--sound/soc/sof/imx/imx8.c24
-rw-r--r--sound/soc/sof/intel/hda-bus.c2
-rw-r--r--sound/soc/sof/intel/hda.c16
-rw-r--r--sound/soc/sof/intel/hda.h1
-rw-r--r--sound/soc/sof/intel/pci-ptl.c30
-rw-r--r--sound/soc/sof/intel/ptl.c23
-rw-r--r--sound/soc/sof/ipc4-control.c11
-rw-r--r--sound/soc/sof/ipc4-pcm.c6
-rw-r--r--sound/soc/sof/sof-pci-dev.c16
-rw-r--r--sound/soc/sof/topology.c115
-rw-r--r--sound/soc/starfive/jh7110_tdm.c13
-rw-r--r--sound/soc/stm/stm32_sai.c18
-rw-r--r--sound/soc/sunxi/sun8i-codec.c13
-rw-r--r--sound/soc/tegra/tegra186_asrc.c18
-rw-r--r--sound/soc/tegra/tegra186_asrc.h12
-rw-r--r--sound/soc/tegra/tegra210_admaif.c223
-rw-r--r--sound/soc/tegra/tegra210_admaif.h78
-rw-r--r--sound/soc/tegra/tegra210_adx.c229
-rw-r--r--sound/soc/tegra/tegra210_adx.h36
-rw-r--r--sound/soc/tegra/tegra210_ahub.c850
-rw-r--r--sound/soc/tegra/tegra210_ahub.h52
-rw-r--r--sound/soc/tegra/tegra210_amx.c229
-rw-r--r--sound/soc/tegra/tegra210_amx.h34
-rw-r--r--sound/soc/tegra/tegra210_i2s.c231
-rw-r--r--sound/soc/tegra/tegra210_i2s.h51
-rw-r--r--sound/soc/tegra/tegra_audio_graph_card.c14
-rw-r--r--sound/soc/tegra/tegra_cif.h30
-rw-r--r--sound/soc/tegra/tegra_isomgr_bw.c7
-rw-r--r--sound/soc/ti/davinci-mcasp.c8
-rw-r--r--sound/sparc/amd7930.c9
-rw-r--r--sound/sparc/dbri.c9
-rw-r--r--sound/usb/fcp.c3
-rw-r--r--sound/usb/midi.c5
-rw-r--r--sound/usb/mixer.c22
-rw-r--r--sound/usb/mixer_quirks.c154
-rw-r--r--sound/usb/mixer_scarlett2.c3
-rw-r--r--sound/usb/mixer_us16x08.c32
-rw-r--r--sound/usb/quirks-table.h51
-rw-r--r--sound/usb/quirks.c5
-rw-r--r--sound/virtio/virtio_kctl.c8
-rw-r--r--sound/x86/intel_hdmi_audio.c7
-rw-r--r--tools/arch/x86/include/asm/amd/ibs.h (renamed from tools/arch/x86/include/asm/amd-ibs.h)2
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h24
-rw-r--r--tools/arch/x86/include/asm/inat.h6
-rw-r--r--tools/arch/x86/kcpuid/cpuid.csv791
-rw-r--r--tools/arch/x86/kcpuid/kcpuid.c375
-rw-r--r--tools/arch/x86/lib/insn.c7
-rw-r--r--tools/arch/x86/lib/x86-opcode-map.txt56
-rw-r--r--tools/arch/x86/tools/gen-insn-attr-x86.awk7
-rw-r--r--tools/include/nolibc/Makefile34
-rw-r--r--tools/include/nolibc/arch-aarch64.h1
-rw-r--r--tools/include/nolibc/arch-arm.h2
-rw-r--r--tools/include/nolibc/arch-i386.h2
-rw-r--r--tools/include/nolibc/arch-loongarch.h7
-rw-r--r--tools/include/nolibc/arch-m68k.h141
-rw-r--r--tools/include/nolibc/arch-powerpc.h2
-rw-r--r--tools/include/nolibc/arch-riscv.h1
-rw-r--r--tools/include/nolibc/arch-sparc.h191
-rw-r--r--tools/include/nolibc/arch-x86_64.h1
-rw-r--r--tools/include/nolibc/arch.h4
-rw-r--r--tools/include/nolibc/compiler.h9
-rw-r--r--tools/include/nolibc/crt.h5
-rw-r--r--tools/include/nolibc/ctype.h6
-rw-r--r--tools/include/nolibc/dirent.h10
-rw-r--r--tools/include/nolibc/elf.h15
-rw-r--r--tools/include/nolibc/errno.h6
-rw-r--r--tools/include/nolibc/fcntl.h69
-rw-r--r--tools/include/nolibc/getopt.h101
-rw-r--r--tools/include/nolibc/math.h31
-rw-r--r--tools/include/nolibc/nolibc.h21
-rw-r--r--tools/include/nolibc/poll.h55
-rw-r--r--tools/include/nolibc/sched.h50
-rw-r--r--tools/include/nolibc/signal.h6
-rw-r--r--tools/include/nolibc/std.h6
-rw-r--r--tools/include/nolibc/stddef.h24
-rw-r--r--tools/include/nolibc/stdint.h4
-rw-r--r--tools/include/nolibc/stdio.h167
-rw-r--r--tools/include/nolibc/stdlib.h54
-rw-r--r--tools/include/nolibc/string.h40
-rw-r--r--tools/include/nolibc/sys.h423
-rw-r--r--tools/include/nolibc/sys/auxv.h41
-rw-r--r--tools/include/nolibc/sys/ioctl.h29
-rw-r--r--tools/include/nolibc/sys/mman.h82
-rw-r--r--tools/include/nolibc/sys/mount.h37
-rw-r--r--tools/include/nolibc/sys/prctl.h36
-rw-r--r--tools/include/nolibc/sys/random.h34
-rw-r--r--tools/include/nolibc/sys/reboot.h34
-rw-r--r--tools/include/nolibc/sys/resource.h53
-rw-r--r--tools/include/nolibc/sys/stat.h94
-rw-r--r--tools/include/nolibc/sys/syscall.h19
-rw-r--r--tools/include/nolibc/sys/sysmacros.h20
-rw-r--r--tools/include/nolibc/sys/time.h49
-rw-r--r--tools/include/nolibc/sys/timerfd.h87
-rw-r--r--tools/include/nolibc/sys/types.h7
-rw-r--r--tools/include/nolibc/sys/utsname.h42
-rw-r--r--tools/include/nolibc/sys/wait.h116
-rw-r--r--tools/include/nolibc/time.h189
-rw-r--r--tools/include/nolibc/types.h32
-rw-r--r--tools/include/nolibc/unistd.h40
-rw-r--r--tools/include/uapi/linux/fanotify.h274
-rw-r--r--tools/include/uapi/linux/mount.h235
-rw-r--r--tools/include/uapi/linux/nsfs.h45
-rw-r--r--tools/include/uapi/linux/perf_event.h657
-rw-r--r--tools/include/uapi/linux/prctl.h45
-rw-r--r--tools/memory-model/Documentation/README7
-rw-r--r--tools/memory-model/Documentation/explanation.txt2
-rw-r--r--tools/memory-model/Documentation/locking.txt5
-rw-r--r--tools/memory-model/Documentation/ordering.txt22
-rw-r--r--tools/memory-model/Documentation/recipes.txt4
-rw-r--r--tools/memory-model/Documentation/references.txt3
-rw-r--r--tools/memory-model/Documentation/simple.txt4
-rwxr-xr-xtools/net/ynl/pyynl/ethtool.py22
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py7
-rw-r--r--tools/objtool/arch/x86/decode.c15
-rw-r--r--tools/objtool/elf.c38
-rw-r--r--tools/objtool/include/objtool/elf.h1
-rw-r--r--tools/perf/bench/Build1
-rw-r--r--tools/perf/bench/futex-hash.c7
-rw-r--r--tools/perf/bench/futex-lock-pi.c5
-rw-r--r--tools/perf/bench/futex-requeue.c6
-rw-r--r--tools/perf/bench/futex-wake-parallel.c9
-rw-r--r--tools/perf/bench/futex-wake.c4
-rw-r--r--tools/perf/bench/futex.c67
-rw-r--r--tools/perf/bench/futex.h5
-rwxr-xr-xtools/perf/check-headers.sh2
-rw-r--r--tools/perf/util/amd-sample-raw.c2
-rw-r--r--tools/power/acpi/common/cmfsize.c2
-rw-r--r--tools/power/acpi/common/getopt.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c4
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixdir.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixmap.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixxf.c2
-rw-r--r--tools/power/acpi/tools/acpidump/acpidump.h2
-rw-r--r--tools/power/acpi/tools/acpidump/apdump.c2
-rw-r--r--tools/power/acpi/tools/acpidump/apfiles.c4
-rw-r--r--tools/power/acpi/tools/acpidump/apmain.c2
-rw-r--r--tools/power/cpupower/Makefile13
-rw-r--r--tools/power/cpupower/README28
-rw-r--r--tools/power/cpupower/bindings/python/Makefile8
-rw-r--r--tools/power/cpupower/bindings/python/README13
-rw-r--r--tools/power/cpupower/cpupower-service.conf32
-rw-r--r--tools/power/cpupower/cpupower.service.in16
-rw-r--r--tools/power/cpupower/cpupower.sh26
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py3
-rw-r--r--tools/sched_ext/Makefile23
-rw-r--r--tools/sched_ext/include/scx/common.bpf.h2
-rw-r--r--tools/sched_ext/scx_qmap.bpf.c4
-rw-r--r--tools/sched_ext/scx_show_state.py14
-rw-r--r--tools/testing/crypto/chacha20-s390/test-cipher.c10
-rw-r--r--tools/testing/kunit/configs/all_tests.config1
-rw-r--r--tools/testing/kunit/kunit_json.py10
-rw-r--r--tools/testing/kunit/kunit_kernel.py8
-rw-r--r--tools/testing/kunit/qemu_configs/powerpc.py1
-rw-r--r--tools/testing/kunit/qemu_configs/powerpc32.py17
-rw-r--r--tools/testing/kunit/qemu_configs/powerpcle.py14
-rw-r--r--tools/testing/kunit/qemu_configs/riscv32.py17
-rw-r--r--tools/testing/kunit/qemu_configs/sparc.py2
-rw-r--r--tools/testing/kunit/qemu_configs/sparc64.py16
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/bpf/bench.c12
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c42
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_uprobes.sh2
-rw-r--r--tools/testing/selftests/bpf/config.aarch641
-rw-r--r--tools/testing/selftests/bpf/config.s390x1
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c18
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c9
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c477
-rwxr-xr-xtools/testing/selftests/cpufreq/cpufreq.sh18
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c55
-rw-r--r--tools/testing/selftests/filesystems/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/anon_inode_test.c69
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/Makefile9
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c38
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c557
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/Makefile6
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount.h36
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c86
-rw-r--r--tools/testing/selftests/filesystems/utils.c88
-rw-r--r--tools/testing/selftests/filesystems/utils.h3
-rw-r--r--tools/testing/selftests/filesystems/wrappers.h (renamed from tools/testing/selftests/filesystems/overlayfs/wrappers.h)46
-rw-r--r--tools/testing/selftests/ftrace/Makefile2
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore6
-rw-r--r--tools/testing/selftests/futex/functional/Makefile7
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa.c262
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c231
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c292
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh7
-rw-r--r--tools/testing/selftests/futex/include/futex2test.h70
-rw-r--r--tools/testing/selftests/gpio/Makefile2
-rw-r--r--tools/testing/selftests/gpio/config1
-rwxr-xr-xtools/testing/selftests/gpio/gpio-aggregator.sh727
-rw-r--r--tools/testing/selftests/kexec/Makefile7
-rw-r--r--tools/testing/selftests/kexec/test_kexec_jump.c72
-rwxr-xr-xtools/testing/selftests/kexec/test_kexec_jump.sh42
-rw-r--r--tools/testing/selftests/kselftest_harness.h170
-rw-r--r--tools/testing/selftests/kselftest_harness/.gitignore2
-rw-r--r--tools/testing/selftests/kselftest_harness/Makefile7
-rw-r--r--tools/testing/selftests/kselftest_harness/harness-selftest.c136
-rw-r--r--tools/testing/selftests/kselftest_harness/harness-selftest.expected64
-rwxr-xr-xtools/testing/selftests/kselftest_harness/harness-selftest.sh13
-rw-r--r--tools/testing/selftests/mount_setattr/Makefile2
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c61
-rw-r--r--tools/testing/selftests/nolibc/Makefile28
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test-linkage.c2
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c331
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh7
-rw-r--r--tools/testing/selftests/perf_events/watermark_signal.c2
-rw-r--r--tools/testing/selftests/pid_namespace/pid_max.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h22
-rw-r--r--tools/testing/selftests/pidfd/pidfd_bind_mount.c74
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/console-badness.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/srcu_lockdep.sh42
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh89
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot2
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh9
-rw-r--r--tools/testing/selftests/sched_ext/Makefile3
-rw-r--r--tools/testing/selftests/sched_ext/allowed_cpus.bpf.c144
-rw-r--r--tools/testing/selftests/sched_ext/allowed_cpus.c84
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c74
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu.c88
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c43
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu_fails.c61
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh30
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json27
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json24
-rw-r--r--tools/testing/selftests/timens/clock_nanosleep.c4
-rw-r--r--tools/testing/selftests/timens/exec.c2
-rw-r--r--tools/testing/selftests/timens/futex.c2
-rw-r--r--tools/testing/selftests/timens/gettime_perf.c2
-rw-r--r--tools/testing/selftests/timens/procfs.c2
-rw-r--r--tools/testing/selftests/timens/timens.c2
-rw-r--r--tools/testing/selftests/timens/timer.c4
-rw-r--r--tools/testing/selftests/timens/timerfd.c6
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c2
-rw-r--r--tools/testing/selftests/ublk/Makefile11
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c5
-rw-r--r--tools/testing/selftests/ublk/file_backed.c17
-rw-r--r--tools/testing/selftests/ublk/kublk.c153
-rw-r--r--tools/testing/selftests/ublk/kublk.h22
-rw-r--r--tools/testing/selftests/ublk/null.c55
-rw-r--r--tools/testing/selftests/ublk/stripe.c26
-rwxr-xr-xtools/testing/selftests/ublk/test_common.sh39
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_04.sh2
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_05.sh2
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_06.sh2
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_08.sh32
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_09.sh28
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_10.sh30
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_11.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_02.sh10
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_03.sh7
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh7
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_05.sh9
-rw-r--r--tools/testing/selftests/x86/Makefile3
-rw-r--r--tools/testing/selftests/x86/apx.c10
-rw-r--r--tools/testing/selftests/x86/bugs/Makefile3
-rwxr-xr-xtools/testing/selftests/x86/bugs/common.py164
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_indirect_alignment.py150
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_permutations.py109
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_ret_alignment.py139
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_sysfs.py65
-rw-r--r--tools/testing/selftests/x86/lam.c9
-rw-r--r--tools/testing/selftests/x86/xstate.c3
-rw-r--r--tools/testing/selftests/x86/xstate.h2
-rw-r--r--tools/testing/vsock/vsock_test.c28
-rw-r--r--tools/tracing/rtla/README.txt7
-rw-r--r--usr/include/Makefile4
3807 files changed, 135329 insertions, 75585 deletions
diff --git a/.gitignore b/.gitignore
index f2f63e47fb88..bf5ee6e01cd4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,6 +40,7 @@
*.o
*.o.*
*.patch
+*.pyc
*.rmeta
*.rpm
*.rsi
diff --git a/.mailmap b/.mailmap
index 1c70e51c789d..a885e2eefc69 100644
--- a/.mailmap
+++ b/.mailmap
@@ -102,6 +102,7 @@ Ard Biesheuvel <ardb@kernel.org> <ard.biesheuvel@linaro.org>
Arnaud Patard <arnaud.patard@rtp-net.org>
Arnd Bergmann <arnd@arndb.de>
Arun Kumar Neelakantam <quic_aneela@quicinc.com> <aneela@codeaurora.org>
+Asahi Lina <lina+kernel@asahilina.net> <lina@asahilina.net>
Ashok Raj Nagarajan <quic_arnagara@quicinc.com> <arnagara@codeaurora.org>
Ashwin Chaugule <quic_ashwinc@quicinc.com> <ashwinc@codeaurora.org>
Asutosh Das <quic_asutoshd@quicinc.com> <asutoshd@codeaurora.org>
@@ -312,6 +313,7 @@ Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
Jan Kuliga <jtkuliga.kdev@gmail.com> <jankul@alatek.krakow.pl>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
+Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@opinsys.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 000000000000..30b8ae1659f8
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,2 @@
+[MASTER]
+init-hook='import sys; sys.path += ["scripts/lib/kdoc", "scripts/lib/abi"]'
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 11545c9e2e93..4ba771b56b3b 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -547,6 +547,21 @@ Description:
[RO] Maximum size in bytes of a single element in a DMA
scatter/gather list.
+What: /sys/block/<disk>/queue/max_write_streams
+Date: November 2024
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] Maximum number of write streams supported, 0 if not
+ supported. If supported, valid values are 1 through
+ max_write_streams, inclusive.
+
+What: /sys/block/<disk>/queue/write_stream_granularity
+Date: November 2024
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] Granularity of a write stream in bytes. The granularity
+ of a write stream is the size that should be discarded or
+ overwritten together to avoid write amplification in the device.
What: /sys/block/<disk>/queue/max_segments
Date: March 2010
diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
index 2a5c1a09a28f..560124cc3177 100644
--- a/Documentation/ABI/testing/sysfs-class-power
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -822,3 +822,46 @@ Description:
Each entry is a link to the device which registered the extension.
Access: Read
+
+What: /sys/class/power_supply/max8971-charger/fast_charge_timer
+Date: May 2025
+KernelVersion: 6.15.0
+Contact: Svyatoslav Ryhel <clamor95@gmail.com>
+Description:
+ This entry shows and sets the maximum time the max8971
+ charger operates in fast-charge mode. When the timer expires
+ the device will terminate fast-charge mode (charging current
+ will drop to 0 A) and will trigger interrupt.
+
+ Valid values:
+
+ - 4 - 10 (hours), step by 1
+ - 0: disabled.
+
+What: /sys/class/power_supply/max8971-charger/top_off_threshold_current
+Date: May 2025
+KernelVersion: 6.15.0
+Contact: Svyatoslav Ryhel <clamor95@gmail.com>
+Description:
+ This entry shows and sets the charging current threshold for
+ entering top-off charging mode. When charging current in fast
+ charge mode drops below this value, the charger will trigger
+ interrupt and start top-off charging mode.
+
+ Valid values:
+
+ - 50000 - 200000 (microamps), step by 50000 (rounded down)
+
+What: /sys/class/power_supply/max8971-charger/top_off_timer
+Date: May 2025
+KernelVersion: 6.15.0
+Contact: Svyatoslav Ryhel <clamor95@gmail.com>
+Description:
+ This entry shows and sets the maximum time the max8971
+ charger operates in top-off charge mode. When the timer expires
+ the device will terminate top-off charge mode (charging current
+ will drop to 0 A) and will trigger interrupt.
+
+ Valid values:
+
+ - 0 - 70 (minutes), step by 10 (rounded down)
diff --git a/Documentation/ABI/testing/sysfs-class-power-gaokun b/Documentation/ABI/testing/sysfs-class-power-gaokun
new file mode 100644
index 000000000000..0633aed7b355
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-gaokun
@@ -0,0 +1,27 @@
+What: /sys/class/power_supply/gaokun-ec-battery/smart_charge_delay
+Date: March 2025
+KernelVersion: 6.15
+Contact: Pengyu Luo <mitltlatltl@gmail.com>
+Description:
+ This entry allows configuration of smart charging delay.
+
+ Smart charging behavior: when the power adapter is connected
+ for delay hours, battery charging will follow the rules of
+ charge_control_start_threshold and charge_control_end_threshold.
+ For more information about charge control, please refer to
+ sysfs-class-power.
+
+ Access: Read, Write
+
+ Valid values: In hours (non-negative)
+
+What: /sys/class/power_supply/gaokun-ec-battery/battery_adaptive_charge
+Date: March 2025
+KernelVersion: 6.15
+Contact: Pengyu Luo <mitltlatltl@gmail.com>
+Description:
+ This entry allows enabling battery adaptive charging.
+
+ Access: Read, Write
+
+ Valid values: 0 (disabled) or 1 (enabled)
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 206079d3bd5b..8a4d7ecf46ec 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -111,6 +111,7 @@ What: /sys/devices/system/cpu/cpuidle/available_governors
/sys/devices/system/cpu/cpuidle/current_driver
/sys/devices/system/cpu/cpuidle/current_governor
/sys/devices/system/cpu/cpuidle/current_governer_ro
+ /sys/devices/system/cpu/cpuidle/intel_c1_demotion
Date: September 2007
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Discover cpuidle policy and mechanism
@@ -132,7 +133,11 @@ Description: Discover cpuidle policy and mechanism
current_governor_ro: (RO) displays current idle policy.
- See Documentation/admin-guide/pm/cpuidle.rst and
+ intel_c1_demotion: (RW) enables/disables the C1 demotion
+ feature on Intel CPUs.
+
+ See Documentation/admin-guide/pm/cpuidle.rst,
+ Documentation/admin-guide/pm/intel_idle.rst, and
Documentation/driver-api/pm/cpuidle.rst for more information.
@@ -268,6 +273,60 @@ Description: Discover CPUs in the same CPU frequency coordination domain
This file is only present if the acpi-cpufreq or the cppc-cpufreq
drivers are in use.
+What: /sys/devices/system/cpu/cpuX/cpufreq/auto_select
+Date: May 2025
+Contact: linux-pm@vger.kernel.org
+Description: Autonomous selection enable
+
+ Read/write interface to control autonomous selection enable
+ Read returns autonomous selection status:
+ 0: autonomous selection is disabled
+ 1: autonomous selection is enabled
+
+ Write 'y' or '1' or 'on' to enable autonomous selection.
+ Write 'n' or '0' or 'off' to disable autonomous selection.
+
+ This file is only present if the cppc-cpufreq driver is in use.
+
+What: /sys/devices/system/cpu/cpuX/cpufreq/auto_act_window
+Date: May 2025
+Contact: linux-pm@vger.kernel.org
+Description: Autonomous activity window
+
+ This file indicates a moving utilization sensitivity window to
+ the platform's autonomous selection policy.
+
+ Read/write an integer represents autonomous activity window (in
+ microseconds) from/to this file. The max value to write is
+ 1270000000 but the max significand is 127. This means that if 128
+ is written to this file, 127 will be stored. If the value is
+ greater than 130, only the first two digits will be saved as
+ significand.
+
+ Writing a zero value to this file enable the platform to
+ determine an appropriate Activity Window depending on the workload.
+
+ Writing to this file only has meaning when Autonomous Selection is
+ enabled.
+
+ This file is only present if the cppc-cpufreq driver is in use.
+
+What: /sys/devices/system/cpu/cpuX/cpufreq/energy_performance_preference_val
+Date: May 2025
+Contact: linux-pm@vger.kernel.org
+Description: Energy performance preference
+
+ Read/write an 8-bit integer from/to this file. This file
+ represents a range of values from 0 (performance preference) to
+ 0xFF (energy efficiency preference) that influences the rate of
+ performance increase/decrease and the result of the hardware's
+ energy efficiency and performance optimization policies.
+
+ Writing to this file only has meaning when Autonomous Selection is
+ enabled.
+
+ This file is only present if the cppc-cpufreq driver is in use.
+
What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1}
Date: August 2008
@@ -511,11 +570,13 @@ Description: information about CPUs heterogeneity.
What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+ /sys/devices/system/cpu/vulnerabilities/indirect_target_selection
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
/sys/devices/system/cpu/vulnerabilities/l1tf
/sys/devices/system/cpu/vulnerabilities/mds
/sys/devices/system/cpu/vulnerabilities/meltdown
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+ /sys/devices/system/cpu/vulnerabilities/old_microcode
/sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling
/sys/devices/system/cpu/vulnerabilities/retbleed
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
index 2a19584d091e..8c9718d83e9d 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
+++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
@@ -1,6 +1,6 @@
What: /sys/bus/hid/drivers/hid-appletb-kbd/<dev>/mode
-Date: September, 2023
-KernelVersion: 6.5
+Date: March, 2025
+KernelVersion: 6.15
Contact: linux-input@vger.kernel.org
Description:
The set of keys displayed on the Touch Bar.
diff --git a/Documentation/ABI/testing/sysfs-driver-qat_ras b/Documentation/ABI/testing/sysfs-driver-qat_ras
index 176dea1e9c0a..82ceb04445ec 100644
--- a/Documentation/ABI/testing/sysfs-driver-qat_ras
+++ b/Documentation/ABI/testing/sysfs-driver-qat_ras
@@ -4,7 +4,7 @@ KernelVersion: 6.7
Contact: qat-linux@intel.com
Description: (RO) Reports the number of correctable errors detected by the device.
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_nonfatal
Date: January 2024
@@ -12,7 +12,7 @@ KernelVersion: 6.7
Contact: qat-linux@intel.com
Description: (RO) Reports the number of non fatal errors detected by the device.
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
Date: January 2024
@@ -20,7 +20,7 @@ KernelVersion: 6.7
Contact: qat-linux@intel.com
Description: (RO) Reports the number of fatal errors detected by the device.
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_ras/reset_error_counters
Date: January 2024
@@ -38,4 +38,4 @@ Description: (WO) Write to resets all error counters of a device.
# cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
0
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi
index 5249ad5a96d9..f4de60c4134d 100644
--- a/Documentation/ABI/testing/sysfs-firmware-acpi
+++ b/Documentation/ABI/testing/sysfs-firmware-acpi
@@ -248,3 +248,24 @@ Description:
# cat ff_pwr_btn
7 enabled
+What: /sys/firmware/acpi/memory_ranges/rangeX
+Date: February 2025
+Contact: Tony Luck <tony.luck@intel.com>
+Description:
+ On systems with the ACPI MRRM table reports the parameters for
+ each range.
+
+ base: Starting system physical address.
+
+ length: Length of this range in bytes.
+
+ node: NUMA node that this range belongs to. Negative numbers
+ indicate that the node number could not be determined (e.g
+ for an address range that is reserved for future hot add of
+ memory).
+
+ local_region_id: ID associated with access by agents
+ local to this range of addresses.
+
+ remote_region_id: ID associated with access by agents
+ non-local to this range of addresses.
diff --git a/Documentation/ABI/testing/sysfs-fs-erofs b/Documentation/ABI/testing/sysfs-fs-erofs
index b134146d735b..bf3b6299c15e 100644
--- a/Documentation/ABI/testing/sysfs-fs-erofs
+++ b/Documentation/ABI/testing/sysfs-fs-erofs
@@ -27,3 +27,11 @@ Description: Writing to this will drop compression-related caches,
- 1 : invalidate cached compressed folios
- 2 : drop in-memory pclusters
- 3 : drop in-memory pclusters and cached compressed folios
+
+What: /sys/fs/erofs/accel
+Date: May 2025
+Contact: "Bo Liu" <liubo03@inspur.com>
+Description: Used to set or show hardware accelerators in effect
+ and multiple accelerators are separated by '\n'.
+ Supported accelerator(s): qat_deflate.
+ Disable all accelerators with an empty string (echo > accel).
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 63094646df28..d30d66ddf1ad 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -60,9 +60,8 @@ endif #HAVE_LATEXMK
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
-KERNELDOC = $(srctree)/scripts/kernel-doc
-KERNELDOC_CONF = -D kerneldoc_srctree=$(srctree) -D kerneldoc_bin=$(KERNELDOC)
-ALLSPHINXOPTS = $(KERNELDOC_CONF) $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
+ALLSPHINXOPTS = -D kerneldoc_srctree=$(srctree) -D kerneldoc_bin=$(KERNELDOC)
+ALLSPHINXOPTS += $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
ifneq ($(wildcard $(srctree)/.config),)
ifeq ($(CONFIG_RUST),y)
# Let Sphinx know we will include rustdoc
@@ -83,9 +82,11 @@ loop_cmd = $(echo-cmd) $(cmd_$(1)) || exit;
# $5 reST source folder relative to $(src),
# e.g. "userspace-api/media" for the linux-tv book-set at ./Documentation/userspace-api/media
+PYTHONPYCACHEPREFIX ?= $(abspath $(BUILDDIR)/__pycache__)
+
quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/userspace-api/media $2 && \
- PYTHONDONTWRITEBYTECODE=1 \
+ PYTHONPYCACHEPREFIX="$(PYTHONPYCACHEPREFIX)" \
BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(src)/$5/$(SPHINX_CONF)) \
$(PYTHON3) $(srctree)/scripts/jobserver-exec \
$(CONFIG_SHELL) $(srctree)/Documentation/sphinx/parallel-wrapper.sh \
diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst
index ed5c9d8c9afe..d8bb98623c12 100644
--- a/Documentation/RCU/listRCU.rst
+++ b/Documentation/RCU/listRCU.rst
@@ -334,7 +334,7 @@ If the system-call audit module were to ever need to reject stale data, one way
to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the
``audit_entry`` structure, and modify audit_filter_task() as follows::
- static enum audit_state audit_filter_task(struct task_struct *tsk)
+ static struct audit_entry *audit_filter_task(struct task_struct *tsk, char **key)
{
struct audit_entry *e;
enum audit_state state;
@@ -346,16 +346,18 @@ to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to
if (e->deleted) {
spin_unlock(&e->lock);
rcu_read_unlock();
- return AUDIT_BUILD_CONTEXT;
+ return NULL;
}
rcu_read_unlock();
if (state == AUDIT_STATE_RECORD)
*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
- return state;
+ /* As long as e->lock is held, e is valid and
+ * its value is not stale */
+ return e;
}
}
rcu_read_unlock();
- return AUDIT_BUILD_CONTEXT;
+ return NULL;
}
The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index 53faeed7c190..be2eb6be16ec 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -15,6 +15,9 @@ to start learning about RCU:
| 2014 Big API Table https://lwn.net/Articles/609973/
| 6. The RCU API, 2019 Edition https://lwn.net/Articles/777036/
| 2019 Big API Table https://lwn.net/Articles/777165/
+| 7. The RCU API, 2024 Edition https://lwn.net/Articles/988638/
+| 2024 Background Information https://lwn.net/Articles/988641/
+| 2024 Big API Table https://lwn.net/Articles/988666/
For those preferring video:
diff --git a/Documentation/admin-guide/blockdev/index.rst b/Documentation/admin-guide/blockdev/index.rst
index 957ccf617797..3262397ebe8f 100644
--- a/Documentation/admin-guide/blockdev/index.rst
+++ b/Documentation/admin-guide/blockdev/index.rst
@@ -11,6 +11,7 @@ Block Devices
nbd
paride
ramdisk
+ zoned_loop
zram
drbd/index
diff --git a/Documentation/admin-guide/blockdev/zoned_loop.rst b/Documentation/admin-guide/blockdev/zoned_loop.rst
new file mode 100644
index 000000000000..9c7aa3b482f3
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/zoned_loop.rst
@@ -0,0 +1,169 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+Zoned Loop Block Device
+=======================
+
+.. Contents:
+
+ 1) Overview
+ 2) Creating a Zoned Device
+ 3) Deleting a Zoned Device
+ 4) Example
+
+
+1) Overview
+-----------
+
+The zoned loop block device driver (zloop) allows a user to create a zoned block
+device using one regular file per zone as backing storage. This driver does not
+directly control any hardware and uses read, write and truncate operations to
+regular files of a file system to emulate a zoned block device.
+
+Using zloop, zoned block devices with a configurable capacity, zone size and
+number of conventional zones can be created. The storage for each zone of the
+device is implemented using a regular file with a maximum size equal to the zone
+size. The size of a file backing a conventional zone is always equal to the zone
+size. The size of a file backing a sequential zone indicates the amount of data
+sequentially written to the file, that is, the size of the file directly
+indicates the position of the write pointer of the zone.
+
+When resetting a sequential zone, its backing file size is truncated to zero.
+Conversely, for a zone finish operation, the backing file is truncated to the
+zone size. With this, the maximum capacity of a zloop zoned block device created
+can be larger configured to be larger than the storage space available on the
+backing file system. Of course, for such configuration, writing more data than
+the storage space available on the backing file system will result in write
+errors.
+
+The zoned loop block device driver implements a complete zone transition state
+machine. That is, zones can be empty, implicitly opened, explicitly opened,
+closed or full. The current implementation does not support any limits on the
+maximum number of open and active zones.
+
+No user tools are necessary to create and delete zloop devices.
+
+2) Creating a Zoned Device
+--------------------------
+
+Once the zloop module is loaded (or if zloop is compiled in the kernel), the
+character device file /dev/zloop-control can be used to add a zloop device.
+This is done by writing an "add" command directly to the /dev/zloop-control
+device::
+
+ $ modprobe zloop
+ $ ls -l /dev/zloop*
+ crw-------. 1 root root 10, 123 Jan 6 19:18 /dev/zloop-control
+
+ $ mkdir -p <base directory/<device ID>
+ $ echo "add [options]" > /dev/zloop-control
+
+The options available for the add command can be listed by reading the
+/dev/zloop-control device::
+
+ $ cat /dev/zloop-control
+ add id=%d,capacity_mb=%u,zone_size_mb=%u,zone_capacity_mb=%u,conv_zones=%u,base_dir=%s,nr_queues=%u,queue_depth=%u,buffered_io
+ remove id=%d
+
+In more details, the options that can be used with the "add" command are as
+follows.
+
+================ ===========================================================
+id Device number (the X in /dev/zloopX).
+ Default: automatically assigned.
+capacity_mb Device total capacity in MiB. This is always rounded up to
+ the nearest higher multiple of the zone size.
+ Default: 16384 MiB (16 GiB).
+zone_size_mb Device zone size in MiB. Default: 256 MiB.
+zone_capacity_mb Device zone capacity (must always be equal to or lower than
+ the zone size. Default: zone size.
+conv_zones Total number of conventioanl zones starting from sector 0.
+ Default: 8.
+base_dir Path to the base directoy where to create the directory
+ containing the zone files of the device.
+ Default=/var/local/zloop.
+ The device directory containing the zone files is always
+ named with the device ID. E.g. the default zone file
+ directory for /dev/zloop0 is /var/local/zloop/0.
+nr_queues Number of I/O queues of the zoned block device. This value is
+ always capped by the number of online CPUs
+ Default: 1
+queue_depth Maximum I/O queue depth per I/O queue.
+ Default: 64
+buffered_io Do buffered IOs instead of direct IOs (default: false)
+================ ===========================================================
+
+3) Deleting a Zoned Device
+--------------------------
+
+Deleting an unused zoned loop block device is done by issuing the "remove"
+command to /dev/zloop-control, specifying the ID of the device to remove::
+
+ $ echo "remove id=X" > /dev/zloop-control
+
+The remove command does not have any option.
+
+A zoned device that was removed can be re-added again without any change to the
+state of the device zones: the device zones are restored to their last state
+before the device was removed. Adding again a zoned device after it was removed
+must always be done using the same configuration as when the device was first
+added. If a zone configuration change is detected, an error will be returned and
+the zoned device will not be created.
+
+To fully delete a zoned device, after executing the remove operation, the device
+base directory containing the backing files of the device zones must be deleted.
+
+4) Example
+----------
+
+The following sequence of commands creates a 2GB zoned device with zones of 64
+MB and a zone capacity of 63 MB::
+
+ $ modprobe zloop
+ $ mkdir -p /var/local/zloop/0
+ $ echo "add capacity_mb=2048,zone_size_mb=64,zone_capacity=63MB" > /dev/zloop-control
+
+For the device created (/dev/zloop0), the zone backing files are all created
+under the default base directory (/var/local/zloop)::
+
+ $ ls -l /var/local/zloop/0
+ total 0
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000000
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000001
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000002
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000003
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000004
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000005
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000006
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000007
+ -rw-------. 1 root root 0 Jan 6 22:23 seq-000008
+ -rw-------. 1 root root 0 Jan 6 22:23 seq-000009
+ ...
+
+The zoned device created (/dev/zloop0) can then be used normally::
+
+ $ lsblk -z
+ NAME ZONED ZONE-SZ ZONE-NR ZONE-AMAX ZONE-OMAX ZONE-APP ZONE-WGRAN
+ zloop0 host-managed 64M 32 0 0 1M 4K
+ $ blkzone report /dev/zloop0
+ start: 0x000000000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000020000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000040000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000060000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000080000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x0000a0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x0000c0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x0000e0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000100000, len 0x020000, cap 0x01f800, wptr 0x000000 reset:0 non-seq:0, zcond: 1(em) [type: 2(SEQ_WRITE_REQUIRED)]
+ start: 0x000120000, len 0x020000, cap 0x01f800, wptr 0x000000 reset:0 non-seq:0, zcond: 1(em) [type: 2(SEQ_WRITE_REQUIRED)]
+ ...
+
+Deleting this device is done using the command::
+
+ $ echo "remove id=0" > /dev/zloop-control
+
+The removed device can be re-added again using the same "add" command as when
+the device was first created. To fully delete a zoned device, its backing files
+should also be deleted after executing the remove command::
+
+ $ rm -r /var/local/zloop/0
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 1a16ce68a4d7..1edc26622594 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1076,7 +1076,7 @@ cpufreq governor about the minimum desired frequency which should always be
provided by a CPU, as well as the maximum desired frequency, which should not
be exceeded by a CPU.
-WARNING: cgroup2 cpu controller doesn't yet fully support the control of
+WARNING: cgroup2 cpu controller doesn't yet support the (bandwidth) control of
realtime processes. For a kernel built with the CONFIG_RT_GROUP_SCHED option
enabled for group scheduling of realtime processes, the cpu controller can only
be enabled when all RT processes are in the root cgroup. Be aware that system
@@ -1095,19 +1095,34 @@ realtime processes irrespective of CONFIG_RT_GROUP_SCHED.
CPU Interface Files
~~~~~~~~~~~~~~~~~~~
-All time durations are in microseconds.
+The interaction of a process with the cpu controller depends on its scheduling
+policy and the underlying scheduler. From the point of view of the cpu controller,
+processes can be categorized as follows:
+
+* Processes under the fair-class scheduler
+* Processes under a BPF scheduler with the ``cgroup_set_weight`` callback
+* Everything else: ``SCHED_{FIFO,RR,DEADLINE}`` and processes under a BPF scheduler
+ without the ``cgroup_set_weight`` callback
+
+For details on when a process is under the fair-class scheduler or a BPF scheduler,
+check out :ref:`Documentation/scheduler/sched-ext.rst <sched-ext>`.
+
+For each of the following interface files, the above categories
+will be referred to. All time durations are in microseconds.
cpu.stat
A read-only flat-keyed file.
This file exists whether the controller is enabled or not.
- It always reports the following three stats:
+ It always reports the following three stats, which account for all the
+ processes in the cgroup:
- usage_usec
- user_usec
- system_usec
- and the following five when the controller is enabled:
+ and the following five when the controller is enabled, which account for
+ only the processes under the fair-class scheduler:
- nr_periods
- nr_throttled
@@ -1125,6 +1140,10 @@ All time durations are in microseconds.
If the cgroup has been configured to be SCHED_IDLE (cpu.idle = 1),
then the weight will show as a 0.
+ This file affects only processes under the fair-class scheduler and a BPF
+ scheduler with the ``cgroup_set_weight`` callback depending on what the
+ callback actually does.
+
cpu.weight.nice
A read-write single value file which exists on non-root
cgroups. The default is "0".
@@ -1137,6 +1156,10 @@ All time durations are in microseconds.
granularity is coarser for the nice values, the read value is
the closest approximation of the current weight.
+ This file affects only processes under the fair-class scheduler and a BPF
+ scheduler with the ``cgroup_set_weight`` callback depending on what the
+ callback actually does.
+
cpu.max
A read-write two value file which exists on non-root cgroups.
The default is "max 100000".
@@ -1149,43 +1172,55 @@ All time durations are in microseconds.
$PERIOD duration. "max" for $MAX indicates no limit. If only
one number is written, $MAX is updated.
+ This file affects only processes under the fair-class scheduler.
+
cpu.max.burst
A read-write single value file which exists on non-root
cgroups. The default is "0".
The burst in the range [0, $MAX].
+ This file affects only processes under the fair-class scheduler.
+
cpu.pressure
A read-write nested-keyed file.
Shows pressure stall information for CPU. See
:ref:`Documentation/accounting/psi.rst <psi>` for details.
+ This file accounts for all the processes in the cgroup.
+
cpu.uclamp.min
- A read-write single value file which exists on non-root cgroups.
- The default is "0", i.e. no utilization boosting.
+ A read-write single value file which exists on non-root cgroups.
+ The default is "0", i.e. no utilization boosting.
+
+ The requested minimum utilization (protection) as a percentage
+ rational number, e.g. 12.34 for 12.34%.
- The requested minimum utilization (protection) as a percentage
- rational number, e.g. 12.34 for 12.34%.
+ This interface allows reading and setting minimum utilization clamp
+ values similar to the sched_setattr(2). This minimum utilization
+ value is used to clamp the task specific minimum utilization clamp,
+ including those of realtime processes.
- This interface allows reading and setting minimum utilization clamp
- values similar to the sched_setattr(2). This minimum utilization
- value is used to clamp the task specific minimum utilization clamp.
+ The requested minimum utilization (protection) is always capped by
+ the current value for the maximum utilization (limit), i.e.
+ `cpu.uclamp.max`.
- The requested minimum utilization (protection) is always capped by
- the current value for the maximum utilization (limit), i.e.
- `cpu.uclamp.max`.
+ This file affects all the processes in the cgroup.
cpu.uclamp.max
- A read-write single value file which exists on non-root cgroups.
- The default is "max". i.e. no utilization capping
+ A read-write single value file which exists on non-root cgroups.
+ The default is "max". i.e. no utilization capping
+
+ The requested maximum utilization (limit) as a percentage rational
+ number, e.g. 98.76 for 98.76%.
- The requested maximum utilization (limit) as a percentage rational
- number, e.g. 98.76 for 98.76%.
+ This interface allows reading and setting maximum utilization clamp
+ values similar to the sched_setattr(2). This maximum utilization
+ value is used to clamp the task specific maximum utilization clamp,
+ including those of realtime processes.
- This interface allows reading and setting maximum utilization clamp
- values similar to the sched_setattr(2). This maximum utilization
- value is used to clamp the task specific maximum utilization clamp.
+ This file affects all the processes in the cgroup.
cpu.idle
A read-write single value file which exists on non-root cgroups.
@@ -1197,7 +1232,7 @@ All time durations are in microseconds.
own relative priorities, but the cgroup itself will be treated as
very low priority relative to its peers.
-
+ This file affects only processes under the fair-class scheduler.
Memory
------
@@ -3019,7 +3054,7 @@ Filesystem Support for Writeback
--------------------------------
A filesystem can support cgroup writeback by updating
-address_space_operations->writepage[s]() to annotate bio's using the
+address_space_operations->writepages() to annotate bio's using the
following two functions.
wbc_init_bio(@wbc, @bio)
diff --git a/Documentation/admin-guide/gpio/gpio-aggregator.rst b/Documentation/admin-guide/gpio/gpio-aggregator.rst
index 5cd1e7221756..8374a9df9105 100644
--- a/Documentation/admin-guide/gpio/gpio-aggregator.rst
+++ b/Documentation/admin-guide/gpio/gpio-aggregator.rst
@@ -69,6 +69,113 @@ write-only attribute files in sysfs.
$ echo gpio-aggregator.0 > delete_device
+Aggregating GPIOs using Configfs
+--------------------------------
+
+**Group:** ``/config/gpio-aggregator``
+
+ This is the root directory of the gpio-aggregator configfs tree.
+
+**Group:** ``/config/gpio-aggregator/<example-name>``
+
+ This directory represents a GPIO aggregator device. You can assign any
+ name to ``<example-name>`` (e.g. ``agg0``), except names starting with
+ ``_sysfs`` prefix, which are reserved for auto-generated configfs
+ entries corresponding to devices created via Sysfs.
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/live``
+
+ The ``live`` attribute allows to trigger the actual creation of the device
+ once it's fully configured. Accepted values are:
+
+ * ``1``, ``yes``, ``true`` : enable the virtual device
+ * ``0``, ``no``, ``false`` : disable the virtual device
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/dev_name``
+
+ The read-only ``dev_name`` attribute exposes the name of the device as it
+ will appear in the system on the platform bus (e.g. ``gpio-aggregator.0``).
+ This is useful for identifying a character device for the newly created
+ aggregator. If it's ``gpio-aggregator.0``,
+ ``/sys/devices/platform/gpio-aggregator.0/gpiochipX`` path tells you that the
+ GPIO device id is ``X``.
+
+You must create subdirectories for each virtual line you want to
+instantiate, named exactly as ``line0``, ``line1``, ..., ``lineY``, when
+you want to instantiate ``Y+1`` (Y >= 0) lines. Configure all lines before
+activating the device by setting ``live`` to 1.
+
+**Group:** ``/config/gpio-aggregator/<example-name>/<lineY>/``
+
+ This directory represents a GPIO line to include in the aggregator.
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/key``
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/offset``
+
+ The default values after creating the ``<lineY>`` directory are:
+
+ * ``key`` : <empty>
+ * ``offset`` : -1
+
+ ``key`` must always be explicitly configured, while ``offset`` depends.
+ Two configuration patterns exist for each ``<lineY>``:
+
+ (a). For lookup by GPIO line name:
+
+ * Set ``key`` to the line name.
+ * Ensure ``offset`` remains -1 (the default).
+
+ (b). For lookup by GPIO chip name and the line offset within the chip:
+
+ * Set ``key`` to the chip name.
+ * Set ``offset`` to the line offset (0 <= ``offset`` < 65535).
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/name``
+
+ The ``name`` attribute sets a custom name for lineY. If left unset, the
+ line will remain unnamed.
+
+Once the configuration is done, the ``'live'`` attribute must be set to 1
+in order to instantiate the aggregator device. It can be set back to 0 to
+destroy the virtual device. The module will synchronously wait for the new
+aggregator device to be successfully probed and if this doesn't happen, writing
+to ``'live'`` will result in an error. This is a different behaviour from the
+case when you create it using sysfs ``new_device`` interface.
+
+.. note::
+
+ For aggregators created via Sysfs, the configfs entries are
+ auto-generated and appear as ``/config/gpio-aggregator/_sysfs.<N>/``. You
+ cannot add or remove line directories with mkdir(2)/rmdir(2). To modify
+ lines, you must use the "delete_device" interface to tear down the
+ existing device and reconfigure it from scratch. However, you can still
+ toggle the aggregator with the ``live`` attribute and adjust the
+ ``key``, ``offset``, and ``name`` attributes for each line when ``live``
+ is set to 0 by hand (i.e. it's not waiting for deferred probe).
+
+Sample configuration commands
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: sh
+
+ # Create a directory for an aggregator device
+ $ mkdir /sys/kernel/config/gpio-aggregator/agg0
+
+ # Configure each line
+ $ mkdir /sys/kernel/config/gpio-aggregator/agg0/line0
+ $ echo gpiochip0 > /sys/kernel/config/gpio-aggregator/agg0/line0/key
+ $ echo 6 > /sys/kernel/config/gpio-aggregator/agg0/line0/offset
+ $ echo test0 > /sys/kernel/config/gpio-aggregator/agg0/line0/name
+ $ mkdir /sys/kernel/config/gpio-aggregator/agg0/line1
+ $ echo gpiochip0 > /sys/kernel/config/gpio-aggregator/agg0/line1/key
+ $ echo 7 > /sys/kernel/config/gpio-aggregator/agg0/line1/offset
+ $ echo test1 > /sys/kernel/config/gpio-aggregator/agg0/line1/name
+
+ # Activate the aggregator device
+ $ echo 1 > /sys/kernel/config/gpio-aggregator/agg0/live
+
+
Generic GPIO Driver
-------------------
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index 451874b8135d..09890a8f3ee9 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -23,3 +23,5 @@ are configurable at compile, boot or run time.
gather_data_sampling
reg-file-data-sampling
rsb
+ old_microcode
+ indirect-target-selection
diff --git a/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
new file mode 100644
index 000000000000..d9ca64108d23
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
@@ -0,0 +1,168 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Indirect Target Selection (ITS)
+===============================
+
+ITS is a vulnerability in some Intel CPUs that support Enhanced IBRS and were
+released before Alder Lake. ITS may allow an attacker to control the prediction
+of indirect branches and RETs located in the lower half of a cacheline.
+
+ITS is assigned CVE-2024-28956 with a CVSS score of 4.7 (Medium).
+
+Scope of Impact
+---------------
+- **eIBRS Guest/Host Isolation**: Indirect branches in KVM/kernel may still be
+ predicted with unintended target corresponding to a branch in the guest.
+
+- **Intra-Mode BTI**: In-kernel training such as through cBPF or other native
+ gadgets.
+
+- **Indirect Branch Prediction Barrier (IBPB)**: After an IBPB, indirect
+ branches may still be predicted with targets corresponding to direct branches
+ executed prior to the IBPB. This is fixed by the IPU 2025.1 microcode, which
+ should be available via distro updates. Alternatively microcode can be
+ obtained from Intel's github repository [#f1]_.
+
+Affected CPUs
+-------------
+Below is the list of ITS affected CPUs [#f2]_ [#f3]_:
+
+ ======================== ============ ==================== ===============
+ Common name Family_Model eIBRS Intra-mode BTI
+ Guest/Host Isolation
+ ======================== ============ ==================== ===============
+ SKYLAKE_X (step >= 6) 06_55H Affected Affected
+ ICELAKE_X 06_6AH Not affected Affected
+ ICELAKE_D 06_6CH Not affected Affected
+ ICELAKE_L 06_7EH Not affected Affected
+ TIGERLAKE_L 06_8CH Not affected Affected
+ TIGERLAKE 06_8DH Not affected Affected
+ KABYLAKE_L (step >= 12) 06_8EH Affected Affected
+ KABYLAKE (step >= 13) 06_9EH Affected Affected
+ COMETLAKE 06_A5H Affected Affected
+ COMETLAKE_L 06_A6H Affected Affected
+ ROCKETLAKE 06_A7H Not affected Affected
+ ======================== ============ ==================== ===============
+
+- All affected CPUs enumerate Enhanced IBRS feature.
+- IBPB isolation is affected on all ITS affected CPUs, and need a microcode
+ update for mitigation.
+- None of the affected CPUs enumerate BHI_CTRL which was introduced in Golden
+ Cove (Alder Lake and Sapphire Rapids). This can help guests to determine the
+ host's affected status.
+- Intel Atom CPUs are not affected by ITS.
+
+Mitigation
+----------
+As only the indirect branches and RETs that have their last byte of instruction
+in the lower half of the cacheline are vulnerable to ITS, the basic idea behind
+the mitigation is to not allow indirect branches in the lower half.
+
+This is achieved by relying on existing retpoline support in the kernel, and in
+compilers. ITS-vulnerable retpoline sites are runtime patched to point to newly
+added ITS-safe thunks. These safe thunks consists of indirect branch in the
+second half of the cacheline. Not all retpoline sites are patched to thunks, if
+a retpoline site is evaluated to be ITS-safe, it is replaced with an inline
+indirect branch.
+
+Dynamic thunks
+~~~~~~~~~~~~~~
+From a dynamically allocated pool of safe-thunks, each vulnerable site is
+replaced with a new thunk, such that they get a unique address. This could
+improve the branch prediction accuracy. Also, it is a defense-in-depth measure
+against aliasing.
+
+Note, for simplicity, indirect branches in eBPF programs are always replaced
+with a jump to a static thunk in __x86_indirect_its_thunk_array. If required,
+in future this can be changed to use dynamic thunks.
+
+All vulnerable RETs are replaced with a static thunk, they do not use dynamic
+thunks. This is because RETs get their prediction from RSB mostly that does not
+depend on source address. RETs that underflow RSB may benefit from dynamic
+thunks. But, RETs significantly outnumber indirect branches, and any benefit
+from a unique source address could be outweighed by the increased icache
+footprint and iTLB pressure.
+
+Retpoline
+~~~~~~~~~
+Retpoline sequence also mitigates ITS-unsafe indirect branches. For this
+reason, when retpoline is enabled, ITS mitigation only relocates the RETs to
+safe thunks. Unless user requested the RSB-stuffing mitigation.
+
+RSB Stuffing
+~~~~~~~~~~~~
+RSB-stuffing via Call Depth Tracking is a mitigation for Retbleed RSB-underflow
+attacks. And it also mitigates RETs that are vulnerable to ITS.
+
+Mitigation in guests
+^^^^^^^^^^^^^^^^^^^^
+All guests deploy ITS mitigation by default, irrespective of eIBRS enumeration
+and Family/Model of the guest. This is because eIBRS feature could be hidden
+from a guest. One exception to this is when a guest enumerates BHI_DIS_S, which
+indicates that the guest is running on an unaffected host.
+
+To prevent guests from unnecessarily deploying the mitigation on unaffected
+platforms, Intel has defined ITS_NO bit(62) in MSR IA32_ARCH_CAPABILITIES. When
+a guest sees this bit set, it should not enumerate the ITS bug. Note, this bit
+is not set by any hardware, but is **intended for VMMs to synthesize** it for
+guests as per the host's affected status.
+
+Mitigation options
+^^^^^^^^^^^^^^^^^^
+The ITS mitigation can be controlled using the "indirect_target_selection"
+kernel parameter. The available options are:
+
+ ======== ===================================================================
+ on (default) Deploy the "Aligned branch/return thunks" mitigation.
+ If spectre_v2 mitigation enables retpoline, aligned-thunks are only
+ deployed for the affected RET instructions. Retpoline mitigates
+ indirect branches.
+
+ off Disable ITS mitigation.
+
+ vmexit Equivalent to "=on" if the CPU is affected by guest/host isolation
+ part of ITS. Otherwise, mitigation is not deployed. This option is
+ useful when host userspace is not in the threat model, and only
+ attacks from guest to host are considered.
+
+ stuff Deploy RSB-fill mitigation when retpoline is also deployed.
+ Otherwise, deploy the default mitigation. When retpoline mitigation
+ is enabled, RSB-stuffing via Call-Depth-Tracking also mitigates
+ ITS.
+
+ force Force the ITS bug and deploy the default mitigation.
+ ======== ===================================================================
+
+Sysfs reporting
+---------------
+
+The sysfs file showing ITS mitigation status is:
+
+ /sys/devices/system/cpu/vulnerabilities/indirect_target_selection
+
+Note, microcode mitigation status is not reported in this file.
+
+The possible values in this file are:
+
+.. list-table::
+
+ * - Not affected
+ - The processor is not vulnerable.
+ * - Vulnerable
+ - System is vulnerable and no mitigation has been applied.
+ * - Vulnerable, KVM: Not affected
+ - System is vulnerable to intra-mode BTI, but not affected by eIBRS
+ guest/host isolation.
+ * - Mitigation: Aligned branch/return thunks
+ - The mitigation is enabled, affected indirect branches and RETs are
+ relocated to safe thunks.
+ * - Mitigation: Retpolines, Stuffing RSB
+ - The mitigation is enabled using retpoline and RSB stuffing.
+
+References
+----------
+.. [#f1] Microcode repository - https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files
+
+.. [#f2] Affected Processors list - https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
+
+.. [#f3] Affected Processors list (machine readable) - https://github.com/intel/Intel-affected-processor-list
diff --git a/Documentation/admin-guide/hw-vuln/old_microcode.rst b/Documentation/admin-guide/hw-vuln/old_microcode.rst
new file mode 100644
index 000000000000..6ded8f86b8d0
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/old_microcode.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Old Microcode
+=============
+
+The kernel keeps a table of released microcode. Systems that had
+microcode older than this at boot will say "Vulnerable". This means
+that the system was vulnerable to some known CPU issue. It could be
+security or functional, the kernel does not know or care.
+
+You should update the CPU microcode to mitigate any exposure. This is
+usually accomplished by updating the files in
+/lib/firmware/intel-ucode/ via normal distribution updates. Intel also
+distributes these files in a github repo:
+
+ https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files.git
+
+Just like all the other hardware vulnerabilities, exposure is
+determined at boot. Runtime microcode updates do not change the status
+of this vulnerability.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d9fd26b95b34..ea81784be981 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1828,6 +1828,13 @@
lz4: Select LZ4 compression algorithm to
compress/decompress hibernation image.
+ hibernate.pm_test_delay=
+ [HIBERNATION]
+ Sets the number of seconds to remain in a hibernation test
+ mode before resuming the system (see
+ /sys/power/pm_test). Only available when CONFIG_PM_DEBUG
+ is set. Default value is 5.
+
highmem=nn[KMG] [KNL,BOOT,EARLY] forces the highmem zone to have an exact
size of <nn>. This works even on boxes that have no
highmem otherwise. This also works to reduce highmem
@@ -2202,6 +2209,23 @@
different crypto accelerators. This option can be used
to achieve best performance for particular HW.
+ indirect_target_selection= [X86,Intel] Mitigation control for Indirect
+ Target Selection(ITS) bug in Intel CPUs. Updated
+ microcode is also required for a fix in IBPB.
+
+ on: Enable mitigation (default).
+ off: Disable mitigation.
+ force: Force the ITS bug and deploy default
+ mitigation.
+ vmexit: Only deploy mitigation if CPU is affected by
+ guest/host isolation part of ITS.
+ stuff: Deploy RSB-fill mitigation when retpoline is
+ also deployed. Otherwise, deploy the default
+ mitigation.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
+
init= [KNL]
Format: <full_path>
Run specified binary instead of /sbin/init as init
@@ -3693,6 +3717,7 @@
expose users to several CPU vulnerabilities.
Equivalent to: if nokaslr then kpti=0 [ARM64]
gather_data_sampling=off [X86]
+ indirect_target_selection=off [X86]
kvm.nx_huge_pages=off [X86]
l1tf=off [X86]
mds=off [X86]
@@ -5654,6 +5679,31 @@
are zero, rcutorture acts as if is interpreted
they are all non-zero.
+ rcutorture.gpwrap_lag= [KNL]
+ Enable grace-period wrap lag testing. Setting
+ to false prevents the gpwrap lag test from
+ running. Default is true.
+
+ rcutorture.gpwrap_lag_gps= [KNL]
+ Set the value for grace-period wrap lag during
+ active lag testing periods. This controls how many
+ grace periods differences we tolerate between
+ rdp and rnp's gp_seq before setting overflow flag.
+ The default is always set to 8.
+
+ rcutorture.gpwrap_lag_cycle_mins= [KNL]
+ Set the total cycle duration for gpwrap lag
+ testing in minutes. This is the total time for
+ one complete cycle of active and inactive
+ testing periods. Default is 30 minutes.
+
+ rcutorture.gpwrap_lag_active_mins= [KNL]
+ Set the duration for which gpwrap lag is active
+ within each cycle, in minutes. During this time,
+ the grace-period wrap lag will be set to the
+ value specified by gpwrap_lag_gps. Default is
+ 5 minutes.
+
rcutorture.irqreader= [KNL]
Run RCU readers from irq handlers, or, more
accurately, from a timer handler. Not all RCU
@@ -6250,7 +6300,7 @@
port and the regular usb controller gets disabled.
root= [KNL] Root filesystem
- Usually this a a block device specifier of some kind,
+ Usually this is a block device specifier of some kind,
see the early_lookup_bdev comment in
block/early-lookup.c for details.
Alternatively this can be "ram" for the legacy initial
@@ -6277,6 +6327,11 @@
Memory area to be used by remote processor image,
managed by CMA.
+ rt_group_sched= [KNL] Enable or disable SCHED_RR/FIFO group scheduling
+ when CONFIG_RT_GROUP_SCHED=y. Defaults to
+ !CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED.
+ Format: <bool>
+
rw [KNL] Mount root device read-write on boot
S [KNL] Run init in single mode
diff --git a/Documentation/admin-guide/namespaces/resource-control.rst b/Documentation/admin-guide/namespaces/resource-control.rst
index 369556e00f0c..553a44803231 100644
--- a/Documentation/admin-guide/namespaces/resource-control.rst
+++ b/Documentation/admin-guide/namespaces/resource-control.rst
@@ -1,17 +1,17 @@
-===========================
-Namespaces research control
-===========================
+====================================
+User namespaces and resource control
+====================================
-There are a lot of kinds of objects in the kernel that don't have
-individual limits or that have limits that are ineffective when a set
-of processes is allowed to switch user ids. With user namespaces
-enabled in a kernel for people who don't trust their users or their
-users programs to play nice this problems becomes more acute.
+The kernel contains many kinds of objects that either don't have
+individual limits or that have limits which are ineffective when
+a set of processes is allowed to switch their UID. On a system
+where the admins don't trust their users or their users' programs,
+user namespaces expose the system to potential misuse of resources.
-Therefore it is recommended that memory control groups be enabled in
-kernels that enable user namespaces, and it is further recommended
-that userspace configure memory control groups to limit how much
-memory user's they don't trust to play nice can use.
+In order to mitigate this, we recommend that admins enable memory
+control groups on any system that enables user namespaces.
+Furthermore, we recommend that admins configure the memory control
+groups to limit the maximum memory usable by any untrusted user.
Memory control groups can be configured by installing the libcgroup
package present on most distros editing /etc/cgrules.conf,
diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 3950583f2b15..2d74af7f0efe 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -231,7 +231,7 @@ are the following:
present).
The existence of the limit may be a result of some (often unintentional)
- BIOS settings, restrictions coming from a service processor or another
+ BIOS settings, restrictions coming from a service processor or other
BIOS/HW-based mechanisms.
This does not cover ACPI thermal limitations which can be discovered
@@ -258,8 +258,8 @@ are the following:
extension on ARM). If one cannot be determined, this attribute should
not be present.
- Note, that failed attempt to retrieve current frequency for a given
- CPU(s) will result in an appropriate error, i.e: EAGAIN for CPU that
+ Note that failed attempt to retrieve current frequency for a given
+ CPU(s) will result in an appropriate error, i.e.: EAGAIN for CPU that
remains idle (raised on ARM).
``cpuinfo_max_freq``
@@ -499,7 +499,7 @@ This governor exposes the following tunables:
represented by it to be 1.5 times as high as the transition latency
(the default)::
- # echo `$(($(cat cpuinfo_transition_latency) * 3 / 2)) > ondemand/sampling_rate
+ # echo `$(($(cat cpuinfo_transition_latency) * 3 / 2))` > ondemand/sampling_rate
``up_threshold``
If the estimated CPU load is above this value (in percent), the governor
diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst
index 5940528146eb..ed6f055d4b14 100644
--- a/Documentation/admin-guide/pm/intel_idle.rst
+++ b/Documentation/admin-guide/pm/intel_idle.rst
@@ -38,6 +38,27 @@ instruction at all.
only way to pass early-configuration-time parameters to it is via the kernel
command line.
+Sysfs Interface
+===============
+
+The ``intel_idle`` driver exposes the following ``sysfs`` attributes in
+``/sys/devices/system/cpu/cpuidle/``:
+
+``intel_c1_demotion``
+ Enable or disable C1 demotion for all CPUs in the system. This file is
+ only exposed on platforms that support the C1 demotion feature and where
+ it was tested. Value 0 means that C1 demotion is disabled, value 1 means
+ that it is enabled. Write 0 or 1 to disable or enable C1 demotion for
+ all CPUs.
+
+ The C1 demotion feature involves the platform firmware demoting deep
+ C-state requests from the OS (e.g., C6 requests) to C1. The idea is that
+ firmware monitors CPU wake-up rate, and if it is higher than a
+ platform-specific threshold, the firmware demotes deep C-state requests
+ to C1. For example, Linux requests C6, but firmware noticed too many
+ wake-ups per second, and it keeps the CPU in C1. When the CPU stays in
+ C1 long enough, the platform promotes it back to C6. This may improve
+ some workloads' performance, but it may also increase power consumption.
.. _intel-idle-enumeration-of-states:
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index 78fc83ed2a7e..26e702c7016e 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -329,6 +329,106 @@ information listed above is the same for all of the processors supporting the
HWP feature, which is why ``intel_pstate`` works with all of them.]
+Support for Hybrid Processors
+=============================
+
+Some processors supported by ``intel_pstate`` contain two or more types of CPU
+cores differing by the maximum turbo P-state, performance vs power characteristics,
+cache sizes, and possibly other properties. They are commonly referred to as
+hybrid processors. To support them, ``intel_pstate`` requires HWP to be enabled
+and it assumes the HWP performance units to be the same for all CPUs in the
+system, so a given HWP performance level always represents approximately the
+same physical performance regardless of the core (CPU) type.
+
+Hybrid Processors with SMT
+--------------------------
+
+On systems where SMT (Simultaneous Multithreading), also referred to as
+HyperThreading (HT) in the context of Intel processors, is enabled on at least
+one core, ``intel_pstate`` assigns performance-based priorities to CPUs. Namely,
+the priority of a given CPU reflects its highest HWP performance level which
+causes the CPU scheduler to generally prefer more performant CPUs, so the less
+performant CPUs are used when the other ones are fully loaded. However, SMT
+siblings (that is, logical CPUs sharing one physical core) are treated in a
+special way such that if one of them is in use, the effective priority of the
+other ones is lowered below the priorities of the CPUs located in the other
+physical cores.
+
+This approach maximizes performance in the majority of cases, but unfortunately
+it also leads to excessive energy usage in some important scenarios, like video
+playback, which is not generally desirable. While there is no other viable
+choice with SMT enabled because the effective capacity and utilization of SMT
+siblings are hard to determine, hybrid processors without SMT can be handled in
+more energy-efficient ways.
+
+.. _CAS:
+
+Capacity-Aware Scheduling Support
+---------------------------------
+
+The capacity-aware scheduling (CAS) support in the CPU scheduler is enabled by
+``intel_pstate`` by default on hybrid processors without SMT. CAS generally
+causes the scheduler to put tasks on a CPU so long as there is a sufficient
+amount of spare capacity on it, and if the utilization of a given task is too
+high for it, the task will need to go somewhere else.
+
+Since CAS takes CPU capacities into account, it does not require CPU
+prioritization and it allows tasks to be distributed more symmetrically among
+the more performant and less performant CPUs. Once placed on a CPU with enough
+capacity to accommodate it, a task may just continue to run there regardless of
+whether or not the other CPUs are fully loaded, so on average CAS reduces the
+utilization of the more performant CPUs which causes the energy usage to be more
+balanced because the more performant CPUs are generally less energy-efficient
+than the less performant ones.
+
+In order to use CAS, the scheduler needs to know the capacity of each CPU in
+the system and it needs to be able to compute scale-invariant utilization of
+CPUs, so ``intel_pstate`` provides it with the requisite information.
+
+First of all, the capacity of each CPU is represented by the ratio of its highest
+HWP performance level, multiplied by 1024, to the highest HWP performance level
+of the most performant CPU in the system, which works because the HWP performance
+units are the same for all CPUs. Second, the frequency-invariance computations,
+carried out by the scheduler to always express CPU utilization in the same units
+regardless of the frequency it is currently running at, are adjusted to take the
+CPU capacity into account. All of this happens when ``intel_pstate`` has
+registered itself with the ``CPUFreq`` core and it has figured out that it is
+running on a hybrid processor without SMT.
+
+Energy-Aware Scheduling Support
+-------------------------------
+
+If ``CONFIG_ENERGY_MODEL`` has been set during kernel configuration and
+``intel_pstate`` runs on a hybrid processor without SMT, in addition to enabling
+`CAS <CAS_>`_ it registers an Energy Model for the processor. This allows the
+Energy-Aware Scheduling (EAS) support to be enabled in the CPU scheduler if
+``schedutil`` is used as the ``CPUFreq`` governor which requires ``intel_pstate``
+to operate in the `passive mode <Passive Mode_>`_.
+
+The Energy Model registered by ``intel_pstate`` is artificial (that is, it is
+based on abstract cost values and it does not include any real power numbers)
+and it is relatively simple to avoid unnecessary computations in the scheduler.
+There is a performance domain in it for every CPU in the system and the cost
+values for these performance domains have been chosen so that running a task on
+a less performant (small) CPU appears to be always cheaper than running that
+task on a more performant (big) CPU. However, for two CPUs of the same type,
+the cost difference depends on their current utilization, and the CPU whose
+current utilization is higher generally appears to be a more expensive
+destination for a given task. This helps to balance the load among CPUs of the
+same type.
+
+Since EAS works on top of CAS, high-utilization tasks are always migrated to
+CPUs with enough capacity to accommodate them, but thanks to EAS, low-utilization
+tasks tend to be placed on the CPUs that look less expensive to the scheduler.
+Effectively, this causes the less performant and less loaded CPUs to be
+preferred as long as they have enough spare capacity to run the given task
+which generally leads to reduced energy usage.
+
+The Energy Model created by ``intel_pstate`` can be inspected by looking at
+the ``energy_model`` directory in ``debugfs`` (typlically mounted on
+``/sys/kernel/debug/``).
+
+
User Space Interface in ``sysfs``
=================================
@@ -697,8 +797,8 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
Limits`_ for details).
``no_cas``
- Do not enable capacity-aware scheduling (CAS) which is enabled by
- default on hybrid systems.
+ Do not enable `capacity-aware scheduling <CAS_>`_ which is enabled by
+ default on hybrid systems without SMT.
Diagnostics and Tuning
======================
diff --git a/Documentation/admin-guide/quickly-build-trimmed-linux.rst b/Documentation/admin-guide/quickly-build-trimmed-linux.rst
index 07cfd8863b46..4a5ffb0996a3 100644
--- a/Documentation/admin-guide/quickly-build-trimmed-linux.rst
+++ b/Documentation/admin-guide/quickly-build-trimmed-linux.rst
@@ -347,7 +347,7 @@ again.
[:ref:`details<uninstall>`]
-.. _submit_improvements:
+.. _submit_improvements_qbtl:
Did you run into trouble following any of the above steps that is not cleared up
by the reference section below? Or do you have ideas how to improve the text?
@@ -1070,7 +1070,7 @@ complicated, and harder to follow.
That being said: this of course is a balancing act. Hence, if you think an
additional use-case is worth describing, suggest it to the maintainers of this
-document, as :ref:`described above <submit_improvements>`.
+document, as :ref:`described above <submit_improvements_qbtl>`.
..
diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst
index 2fd5a030235a..9a847506f6ec 100644
--- a/Documentation/admin-guide/reporting-issues.rst
+++ b/Documentation/admin-guide/reporting-issues.rst
@@ -41,7 +41,7 @@ If you are facing multiple issues with the Linux kernel at once, report each
separately. While writing your report, include all information relevant to the
issue, like the kernel and the distro used. In case of a regression, CC the
regressions mailing list (regressions@lists.linux.dev) to your report. Also try
-to pin-point the culprit with a bisection; if you succeed, include its
+to pinpoint the culprit with a bisection; if you succeed, include its
commit-id and CC everyone in the sign-off-by chain.
Once the report is out, answer any questions that come up and help where you
@@ -206,7 +206,7 @@ Reporting issues only occurring in older kernel version lines
This subsection is for you, if you tried the latest mainline kernel as outlined
above, but failed to reproduce your issue there; at the same time you want to
see the issue fixed in a still supported stable or longterm series or vendor
-kernels regularly rebased on those. If that the case, follow these steps:
+kernels regularly rebased on those. If that is the case, follow these steps:
* Prepare yourself for the possibility that going through the next few steps
might not get the issue solved in older releases: the fix might be too big
@@ -312,7 +312,7 @@ small modifications to a kernel based on a recent Linux version; that for
example often holds true for the mainline kernels shipped by Debian GNU/Linux
Sid or Fedora Rawhide. Some developers will also accept reports about issues
with kernels from distributions shipping the latest stable kernel, as long as
-its only slightly modified; that for example is often the case for Arch Linux,
+it's only slightly modified; that for example is often the case for Arch Linux,
regular Fedora releases, and openSUSE Tumbleweed. But keep in mind, you better
want to use a mainline Linux and avoid using a stable kernel for this
process, as outlined in the section 'Install a fresh kernel for testing' in more
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 8290177b4f75..d385985b305f 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -75,6 +75,7 @@ Currently, these files are in /proc/sys/vm:
- unprivileged_userfaultfd
- user_reserve_kbytes
- vfs_cache_pressure
+- vfs_cache_pressure_denom
- watermark_boost_factor
- watermark_scale_factor
- zone_reclaim_mode
@@ -1017,19 +1018,28 @@ vfs_cache_pressure
This percentage value controls the tendency of the kernel to reclaim
the memory which is used for caching of directory and inode objects.
-At the default value of vfs_cache_pressure=100 the kernel will attempt to
-reclaim dentries and inodes at a "fair" rate with respect to pagecache and
-swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer
-to retain dentry and inode caches. When vfs_cache_pressure=0, the kernel will
-never reclaim dentries and inodes due to memory pressure and this can easily
-lead to out-of-memory conditions. Increasing vfs_cache_pressure beyond 100
-causes the kernel to prefer to reclaim dentries and inodes.
+At the default value of vfs_cache_pressure=vfs_cache_pressure_denom the kernel
+will attempt to reclaim dentries and inodes at a "fair" rate with respect to
+pagecache and swapcache reclaim. Decreasing vfs_cache_pressure causes the
+kernel to prefer to retain dentry and inode caches. When vfs_cache_pressure=0,
+the kernel will never reclaim dentries and inodes due to memory pressure and
+this can easily lead to out-of-memory conditions. Increasing vfs_cache_pressure
+beyond vfs_cache_pressure_denom causes the kernel to prefer to reclaim dentries
+and inodes.
-Increasing vfs_cache_pressure significantly beyond 100 may have negative
-performance impact. Reclaim code needs to take various locks to find freeable
-directory and inode objects. With vfs_cache_pressure=1000, it will look for
-ten times more freeable objects than there are.
+Increasing vfs_cache_pressure significantly beyond vfs_cache_pressure_denom may
+have negative performance impact. Reclaim code needs to take various locks to
+find freeable directory and inode objects. When vfs_cache_pressure equals
+(10 * vfs_cache_pressure_denom), it will look for ten times more freeable
+objects than there are.
+Note: This setting should always be used together with vfs_cache_pressure_denom.
+
+vfs_cache_pressure_denom
+========================
+
+Defaults to 100 (minimum allowed value). Requires corresponding
+vfs_cache_pressure setting to take effect.
watermark_boost_factor
======================
diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
index 03c55151346c..d8946b084b1e 100644
--- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
+++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
@@ -267,7 +267,7 @@ culprit might be known already. For further details on what actually qualifies
as a regression check out Documentation/admin-guide/reporting-regressions.rst.
If you run into any problems while following this guide or have ideas how to
-improve it, :ref:`please let the kernel developers know <submit_improvements>`.
+improve it, :ref:`please let the kernel developers know <submit_improvements_vbbr>`.
.. _introprep_bissbs:
@@ -1055,7 +1055,7 @@ follow these instructions.
[:ref:`details <introoptional_bisref>`]
-.. _submit_improvements:
+.. _submit_improvements_vbbr:
Conclusion
----------
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
index 5becb441c3cb..a18328a5fb93 100644
--- a/Documentation/admin-guide/xfs.rst
+++ b/Documentation/admin-guide/xfs.rst
@@ -151,6 +151,17 @@ When mounting an XFS filesystem, the following options are accepted.
optional, and the log section can be separate from the data
section or contained within it.
+ max_atomic_write=value
+ Set the maximum size of an atomic write. The size may be
+ specified in bytes, in kilobytes with a "k" suffix, in megabytes
+ with a "m" suffix, or in gigabytes with a "g" suffix. The size
+ cannot be larger than the maximum write size, larger than the
+ size of any allocation group, or larger than the size of a
+ remapping operation that the log can complete atomically.
+
+ The default value is to set the maximum I/O completion size
+ to allow each CPU to handle one at a time.
+
max_open_zones=value
Specify the max number of zones to keep open for writing on a
zoned rt device. Many open zones aids file data separation
diff --git a/Documentation/arch/powerpc/htm.rst b/Documentation/arch/powerpc/htm.rst
new file mode 100644
index 000000000000..fcb4eb6306b1
--- /dev/null
+++ b/Documentation/arch/powerpc/htm.rst
@@ -0,0 +1,104 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _htm:
+
+===================================
+HTM (Hardware Trace Macro)
+===================================
+
+Athira Rajeev, 2 Mar 2025
+
+.. contents::
+ :depth: 3
+
+
+Basic overview
+==============
+
+H_HTM is used as an interface for executing Hardware Trace Macro (HTM)
+functions, including setup, configuration, control and dumping of the HTM data.
+For using HTM, it is required to setup HTM buffers and HTM operations can
+be controlled using the H_HTM hcall. The hcall can be invoked for any core/chip
+of the system from within a partition itself. To use this feature, a debugfs
+folder called "htmdump" is present under /sys/kernel/debug/powerpc.
+
+
+HTM debugfs example usage
+=========================
+
+.. code-block:: sh
+
+ # ls /sys/kernel/debug/powerpc/htmdump/
+ coreindexonchip htmcaps htmconfigure htmflags htminfo htmsetup
+ htmstart htmstatus htmtype nodalchipindex nodeindex trace
+
+Details on each file:
+
+* nodeindex, nodalchipindex, coreindexonchip specifies which partition to configure the HTM for.
+* htmtype: specifies the type of HTM. Supported target is hardwareTarget.
+* trace: is to read the HTM data.
+* htmconfigure: Configure/Deconfigure the HTM. Writing 1 to the file will configure the trace, writing 0 to the file will do deconfigure.
+* htmstart: start/Stop the HTM. Writing 1 to the file will start the tracing, writing 0 to the file will stop the tracing.
+* htmstatus: get the status of HTM. This is needed to understand the HTM state after each operation.
+* htmsetup: set the HTM buffer size. Size of HTM buffer is in power of 2
+* htminfo: provides the system processor configuration details. This is needed to understand the appropriate values for nodeindex, nodalchipindex, coreindexonchip.
+* htmcaps : provides the HTM capabilities like minimum/maximum buffer size, what kind of tracing the HTM supports etc.
+* htmflags : allows to pass flags to hcall. Currently supports controlling the wrapping of HTM buffer.
+
+To see the system processor configuration details:
+
+.. code-block:: sh
+
+ # cat /sys/kernel/debug/powerpc/htmdump/htminfo > htminfo_file
+
+The result can be interpreted using hexdump.
+
+To collect HTM traces for a partition represented by nodeindex as
+zero, nodalchipindex as 1 and coreindexonchip as 12
+
+.. code-block:: sh
+
+ # cd /sys/kernel/debug/powerpc/htmdump/
+ # echo 2 > htmtype
+ # echo 33 > htmsetup ( sets 8GB memory for HTM buffer, number is size in power of 2 )
+
+This requires a CEC reboot to get the HTM buffers allocated.
+
+.. code-block:: sh
+
+ # cd /sys/kernel/debug/powerpc/htmdump/
+ # echo 2 > htmtype
+ # echo 0 > nodeindex
+ # echo 1 > nodalchipindex
+ # echo 12 > coreindexonchip
+ # echo 1 > htmflags # to set noWrap for HTM buffers
+ # echo 1 > htmconfigure # Configure the HTM
+ # echo 1 > htmstart # Start the HTM
+ # echo 0 > htmstart # Stop the HTM
+ # echo 0 > htmconfigure # Deconfigure the HTM
+ # cat htmstatus # Dump the status of HTM entries as data
+
+Above will set the htmtype and core details, followed by executing respective HTM operation.
+
+Read the HTM trace data
+========================
+
+After starting the trace collection, run the workload
+of interest. Stop the trace collection after required period
+of time, and read the trace file.
+
+.. code-block:: sh
+
+ # cat /sys/kernel/debug/powerpc/htmdump/trace > trace_file
+
+This trace file will contain the relevant instruction traces
+collected during the workload execution. And can be used as
+input file for trace decoders to understand data.
+
+Benefits of using HTM debugfs interface
+=======================================
+
+It is now possible to collect traces for a particular core/chip
+from within any partition of the system and decode it. Through
+this enablement, a small partition can be dedicated to collect the
+trace data and analyze to provide important information for Performance
+analysis, Software tuning, or Hardware debug.
diff --git a/Documentation/arch/powerpc/kvm-nested.rst b/Documentation/arch/powerpc/kvm-nested.rst
index 5defd13cc6c1..574592505604 100644
--- a/Documentation/arch/powerpc/kvm-nested.rst
+++ b/Documentation/arch/powerpc/kvm-nested.rst
@@ -208,13 +208,9 @@ associated values for each ID in the GSB::
flags:
Bit 0: getGuestWideState: Request state of the Guest instead
of an individual VCPU.
- Bit 1: takeOwnershipOfVcpuState Indicate the L1 is taking
- over ownership of the VCPU state and that the L0 can free
- the storage holding the state. The VCPU state will need to
- be returned to the Hypervisor via H_GUEST_SET_STATE prior
- to H_GUEST_RUN_VCPU being called for this VCPU. The data
- returned in the dataBuffer is in a Hypervisor internal
- format.
+ Bit 1: getHostWideState: Request stats of the Host. This causes
+ the guestId and vcpuId parameters to be ignored and attempting
+ to get the VCPU/Guest state will cause an error.
Bits 2-63: Reserved
guestId: ID obtained from H_GUEST_CREATE
vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU
@@ -406,9 +402,10 @@ the partition like the timebase offset and partition scoped page
table information.
+--------+-------+----+--------+----------------------------------+
-| ID | Size | RW | Thread | Details |
-| | Bytes | | Guest | |
-| | | | Scope | |
+| ID | Size | RW |(H)ost | Details |
+| | Bytes | |(G)uest | |
+| | | |(T)hread| |
+| | | |Scope | |
+========+=======+====+========+==================================+
| 0x0000 | | RW | TG | NOP element |
+--------+-------+----+--------+----------------------------------+
@@ -434,6 +431,29 @@ table information.
| | | | |- 0x8 Table size. |
+--------+-------+----+--------+----------------------------------+
| 0x0007-| | | | Reserved |
+| 0x07FF | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x0800 | 0x08 | R | H | Current usage in bytes of the |
+| | | | | L0's Guest Management Space |
+| | | | | for an L1-Lpar. |
++--------+-------+----+--------+----------------------------------+
+| 0x0801 | 0x08 | R | H | Max bytes available in the |
+| | | | | L0's Guest Management Space for |
+| | | | | an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0802 | 0x08 | R | H | Current usage in bytes of the |
+| | | | | L0's Guest Page Table Management |
+| | | | | Space for an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0803 | 0x08 | R | H | Max bytes available in the L0's |
+| | | | | Guest Page Table Management |
+| | | | | Space for an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0804 | 0x08 | R | H | Cumulative Reclaimed bytes from |
+| | | | | L0 Guest's Page Table Management |
+| | | | | Space due to overcommit |
++--------+-------+----+--------+----------------------------------+
+| 0x0805-| | | | Reserved |
| 0x0BFF | | | | |
+--------+-------+----+--------+----------------------------------+
| 0x0C00 | 0x10 | RW | T |Run vCPU Input Buffer: |
diff --git a/Documentation/arch/x86/amd-debugging.rst b/Documentation/arch/x86/amd-debugging.rst
new file mode 100644
index 000000000000..d92bf59d62c7
--- /dev/null
+++ b/Documentation/arch/x86/amd-debugging.rst
@@ -0,0 +1,368 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Debugging AMD Zen systems
++++++++++++++++++++++++++
+
+Introduction
+============
+
+This document describes techniques that are useful for debugging issues with
+AMD Zen systems. It is intended for use by developers and technical users
+to help identify and resolve issues.
+
+S3 vs s2idle
+============
+
+On AMD systems, it's not possible to simultaneously support suspend-to-RAM (S3)
+and suspend-to-idle (s2idle). To confirm which mode your system supports you
+can look at ``cat /sys/power/mem_sleep``. If it shows ``s2idle [deep]`` then
+*S3* is supported. If it shows ``[s2idle]`` then *s2idle* is
+supported.
+
+On systems that support *S3*, the firmware will be utilized to put all hardware into
+the appropriate low power state.
+
+On systems that support *s2idle*, the kernel will be responsible for transitioning devices
+into the appropriate low power state. When all devices are in the appropriate low
+power state, the hardware will transition into a hardware sleep state.
+
+After a suspend cycle you can tell how much time was spent in a hardware sleep
+state by looking at ``cat /sys/power/suspend_stats/last_hw_sleep``.
+
+This flowchart explains how the AMD s2idle suspend flow works.
+
+.. kernel-figure:: suspend.svg
+
+This flowchart explains how the amd s2idle resume flow works.
+
+.. kernel-figure:: resume.svg
+
+s2idle debugging tool
+=====================
+
+As there are a lot of places that problems can occur, a debugging tool has been
+created at
+`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
+that can help test for common problems and offer suggestions.
+
+If you have an s2idle issue, it's best to start with this and follow instructions
+from its findings. If you continue to have an issue, raise a bug with the
+report generated from this script to
+`drm/amd gitlab <https://gitlab.freedesktop.org/drm/amd/-/issues/new?issuable_template=s2idle_BUG_TEMPLATE>`_.
+
+Spurious s2idle wakeups from an IRQ
+===================================
+
+Spurious wakeups will generally have an IRQ set to ``/sys/power/pm_wakeup_irq``.
+This can be matched to ``/proc/interrupts`` to determine what device woke the system.
+
+If this isn't enough to debug the problem, then the following sysfs files
+can be set to add more verbosity to the wakeup process: ::
+
+ # echo 1 | sudo tee /sys/power/pm_debug_messages
+ # echo 1 | sudo tee /sys/power/pm_print_times
+
+After making those changes, the kernel will display messages that can
+be traced back to kernel s2idle loop code as well as display any active
+GPIO sources while waking up.
+
+If the wakeup is caused by the ACPI SCI, additional ACPI debugging may be
+needed. These commands can enable additional trace data: ::
+
+ # echo enable | sudo tee /sys/module/acpi/parameters/trace_state
+ # echo 1 | sudo tee /sys/module/acpi/parameters/aml_debug_output
+ # echo 0x0800000f | sudo tee /sys/module/acpi/parameters/debug_level
+ # echo 0xffff0000 | sudo tee /sys/module/acpi/parameters/debug_layer
+
+Spurious s2idle wakeups from a GPIO
+===================================
+
+If a GPIO is active when waking up the system ideally you would look at the
+schematic to determine what device it is associated with. If the schematic
+is not available, another tactic is to look at the ACPI _EVT() entry
+to determine what device is notified when that GPIO is active.
+
+For a hypothetical example, say that GPIO 59 woke up the system. You can
+look at the SSDT to determine what device is notified when GPIO 59 is active.
+
+First convert the GPIO number into hex. ::
+
+ $ python3 -c "print(hex(59))"
+ 0x3b
+
+Next determine which ACPI table has the ``_EVT`` entry. For example: ::
+
+ $ sudo grep EVT /sys/firmware/acpi/tables/SSDT*
+ grep: /sys/firmware/acpi/tables/SSDT27: binary file matches
+
+Decode this table::
+
+ $ sudo cp /sys/firmware/acpi/tables/SSDT27 .
+ $ sudo iasl -d SSDT27
+
+Then look at the table and find the matching entry for GPIO 0x3b. ::
+
+ Case (0x3B)
+ {
+ M000 (0x393B)
+ M460 (" Notify (\\_SB.PCI0.GP17.XHC1, 0x02)\n", Zero, Zero, Zero, Zero, Zero, Zero)
+ Notify (\_SB.PCI0.GP17.XHC1, 0x02) // Device Wake
+ }
+
+You can see in this case that the device ``\_SB.PCI0.GP17.XHC1`` is notified
+when GPIO 59 is active. It's obvious this is an XHCI controller, but to go a
+step further you can figure out which XHCI controller it is by matching it to
+ACPI.::
+
+ $ grep "PCI0.GP17.XHC1" /sys/bus/acpi/devices/*/path
+ /sys/bus/acpi/devices/device:2d/path:\_SB_.PCI0.GP17.XHC1
+ /sys/bus/acpi/devices/device:2e/path:\_SB_.PCI0.GP17.XHC1.RHUB
+ /sys/bus/acpi/devices/device:2f/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1
+ /sys/bus/acpi/devices/device:30/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM0
+ /sys/bus/acpi/devices/device:31/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM1
+ /sys/bus/acpi/devices/device:32/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT2
+ /sys/bus/acpi/devices/LNXPOWER:0d/path:\_SB_.PCI0.GP17.XHC1.PWRS
+
+Here you can see it matches to ``device:2d``. Look at the ``physical_node``
+to determine what PCI device that actually is. ::
+
+ $ ls -l /sys/bus/acpi/devices/device:2d/physical_node
+ lrwxrwxrwx 1 root root 0 Feb 12 13:22 /sys/bus/acpi/devices/device:2d/physical_node -> ../../../../../pci0000:00/0000:00:08.1/0000:c2:00.4
+
+So there you have it: the PCI device associated with this GPIO wakeup was ``0000:c2:00.4``.
+
+The ``amd_s2idle.py`` script will capture most of these artifacts for you.
+
+s2idle PM debug messages
+========================
+
+During the s2idle flow on AMD systems, the ACPI LPS0 driver is responsible
+to check all uPEP constraints. Failing uPEP constraints does not prevent
+s0i3 entry. This means that if some constraints are not met, it is possible
+the kernel may attempt to enter s2idle even if there are some known issues.
+
+To activate PM debugging, either specify ``pm_debug_messagess`` kernel
+command-line option at boot or write to ``/sys/power/pm_debug_messages``.
+Unmet constraints will be displayed in the kernel log and can be
+viewed by logging tools that process kernel ring buffer like ``dmesg`` or
+``journalctl``."
+
+If the system freezes on entry/exit before these messages are flushed, a
+useful debugging tactic is to unbind the ``amd_pmc`` driver to prevent
+notification to the platform to start s0i3 entry. This will stop the
+system from freezing on entry or exit and let you view all the failed
+constraints. ::
+
+ cd /sys/bus/platform/drivers/amd_pmc
+ ls | grep AMD | sudo tee unbind
+
+After doing this, run the suspend cycle and look specifically for errors around: ::
+
+ ACPI: LPI: Constraint not met; min power state:%s current power state:%s
+
+Historical examples of s2idle issues
+====================================
+
+To help understand the types of issues that can occur and how to debug them,
+here are some historical examples of s2idle issues that have been resolved.
+
+Core offlining
+--------------
+An end user had reported that taking a core offline would prevent the system
+from properly entering s0i3. This was debugged using internal AMD tools
+to capture and display a stream of metrics from the hardware showing what changed
+when a core was offlined. It was determined that the hardware didn't get
+notification the offline cores were in the deepest state, and so it prevented
+CPU from going into the deepest state. The issue was debugged to a missing
+command to put cores into C3 upon offline.
+
+`commit d6b88ce2eb9d2 ("ACPI: processor idle: Allow playing dead in C3 state") <https://git.kernel.org/torvalds/c/d6b88ce2eb9d2>`_
+
+Corruption after resume
+-----------------------
+A big problem that occurred with Rembrandt was that there was graphical
+corruption after resume. This happened because of a misalignment of PSP
+and driver responsibility. The PSP will save and restore DMCUB, but the
+driver assumed it needed to reset DMCUB on resume.
+This actually was a misalignment for earlier silicon as well, but was not
+observed.
+
+`commit 79d6b9351f086 ("drm/amd/display: Don't reinitialize DMCUB on s0ix resume") <https://git.kernel.org/torvalds/c/79d6b9351f086>`_
+
+Back to Back suspends fail
+--------------------------
+When using a wakeup source that triggers the IRQ to wakeup, a bug in the
+pinctrl-amd driver may capture the wrong state of the IRQ and prevent the
+system going back to sleep properly.
+
+`commit b8c824a869f22 ("pinctrl: amd: Don't save/restore interrupt status and wake status bits") <https://git.kernel.org/torvalds/c/b8c824a869f22>`_
+
+Spurious timer based wakeup after 5 minutes
+-------------------------------------------
+The HPET was being used to program the wakeup source for the system, however
+this was causing a spurious wakeup after 5 minutes. The correct alarm to use
+was the ACPI alarm.
+
+`commit 3d762e21d5637 ("rtc: cmos: Use ACPI alarm for non-Intel x86 systems too") <https://git.kernel.org/torvalds/c/3d762e21d5637>`_
+
+Disk disappears after resume
+----------------------------
+After resuming from s2idle, the NVME disk would disappear. This was due to the
+BIOS not specifying the _DSD StorageD3Enable property. This caused the NVME
+driver not to put the disk into the expected state at suspend and to fail
+on resume.
+
+`commit e79a10652bbd3 ("ACPI: x86: Force StorageD3Enable on more products") <https://git.kernel.org/torvalds/c/e79a10652bbd3>`_
+
+Spurious IRQ1
+-------------
+A number of Renoir, Lucienne, Cezanne, & Barcelo platforms have a
+platform firmware bug where IRQ1 is triggered during s0i3 resume.
+
+This was fixed in the platform firmware, but a number of systems didn't
+receive any more platform firmware updates.
+
+`commit 8e60615e89321 ("platform/x86/amd: pmc: Disable IRQ1 wakeup for RN/CZN") <https://git.kernel.org/torvalds/c/8e60615e89321>`_
+
+Hardware timeout
+----------------
+The hardware performs many actions besides accepting the values from
+amd-pmc driver. As the communication path with the hardware is a mailbox,
+it's possible that it might not respond quickly enough.
+This issue manifested as a failure to suspend: ::
+
+ PM: dpm_run_callback(): acpi_subsys_suspend_noirq+0x0/0x50 returns -110
+ amd_pmc AMDI0005:00: PM: failed to suspend noirq: error -110
+
+The timing problem was identified by comparing the values of the idle mask.
+
+`commit 3c3c8e88c8712 ("platform/x86: amd-pmc: Increase the response register timeout") <https://git.kernel.org/torvalds/c/3c3c8e88c8712>`_
+
+Failed to reach hardware sleep state with panel on
+--------------------------------------------------
+On some Strix systems certain panels were observed to block the system from
+entering a hardware sleep state if the internal panel was on during the sequence.
+
+Even though the panel got turned off during suspend it exposed a timing problem
+where an interrupt caused the display hardware to wake up and block low power
+state entry.
+
+`commit 40b8c14936bd2 ("drm/amd/display: Disable unneeded hpd interrupts during dm_init") <https://git.kernel.org/torvalds/c/40b8c14936bd2>`_
+
+Runtime power consumption issues
+================================
+
+Runtime power consumption is influenced by many factors, including but not
+limited to the configuration of the PCIe Active State Power Management (ASPM),
+the display brightness, the EPP policy of the CPU, and the power management
+of the devices.
+
+ASPM
+----
+For the best runtime power consumption, ASPM should be programmed as intended
+by the BIOS from the hardware vendor. To accomplish this the Linux kernel
+should be compiled with ``CONFIG_PCIEASPM_DEFAULT`` set to ``y`` and the
+sysfs file ``/sys/module/pcie_aspm/parameters/policy`` should not be modified.
+
+Most notably, if L1.2 is not configured properly for any devices, the SoC
+will not be able to enter the deepest idle state.
+
+EPP Policy
+----------
+The ``energy_performance_preference`` sysfs file can be used to set a bias
+of efficiency or performance for a CPU. This has a direct relationship on
+the battery life when more heavily biased towards performance.
+
+
+BIOS debug messages
+===================
+
+Most OEM machines don't have a serial UART for outputting kernel or BIOS
+debug messages. However BIOS debug messages are useful for understanding
+both BIOS bugs and bugs with the Linux kernel drivers that call BIOS AML.
+
+As the BIOS on most OEM AMD systems are based off an AMD reference BIOS,
+the infrastructure used for exporting debugging messages is often the same
+as AMD reference BIOS.
+
+Manually Parsing
+----------------
+There is generally an ACPI method ``\M460`` that different paths of the AML
+will call to emit a message to the BIOS serial log. This method takes
+7 arguments, with the first being a string and the rest being optional
+integers::
+
+ Method (M460, 7, Serialized)
+
+Here is an example of a string that BIOS AML may call out using ``\M460``::
+
+ M460 (" OEM-ASL-PCIe Address (0x%X)._REG (%d %d) PCSA = %d\n", DADR, Arg0, Arg1, PCSA, Zero, Zero)
+
+Normally when executed, the ``\M460`` method would populate the additional
+arguments into the string. In order to get these messages from the Linux
+kernel a hook has been added into ACPICA that can capture the *arguments*
+sent to ``\M460`` and print them to the kernel ring buffer.
+For example the following message could be emitted into kernel ring buffer::
+
+ extrace-0174 ex_trace_args : " OEM-ASL-PCIe Address (0x%X)._REG (%d %d) PCSA = %d\n", ec106000, 2, 1, 1, 0, 0
+
+In order to get these messages, you need to compile with ``CONFIG_ACPI_DEBUG``
+and then turn on the following ACPICA tracing parameters.
+This can be done either on the kernel command line or at runtime:
+
+* ``acpi.trace_method_name=\M460``
+* ``acpi.trace_state=method``
+
+NOTE: These can be very noisy at bootup. If you turn these parameters on
+the kernel command, please also consider turning up ``CONFIG_LOG_BUF_SHIFT``
+to a larger size such as 17 to avoid losing early boot messages.
+
+Tool assisted Parsing
+---------------------
+As mentioned above, parsing by hand can be tedious, especially with a lot of
+messages. To help with this, a tool has been created at
+`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
+to help parse the messages.
+
+Random reboot issues
+====================
+
+When a random reboot occurs, the high-level reason for the reboot is stored
+in a register that will persist onto the next boot.
+
+There are 6 classes of reasons for the reboot:
+ * Software induced
+ * Power state transition
+ * Pin induced
+ * Hardware induced
+ * Remote reset
+ * Internal CPU event
+
+.. csv-table::
+ :header: "Bit", "Type", "Reason"
+ :align: left
+
+ "0", "Pin", "thermal pin BP_THERMTRIP_L was tripped"
+ "1", "Pin", "power button was pressed for 4 seconds"
+ "2", "Pin", "shutdown pin was tripped"
+ "4", "Remote", "remote ASF power off command was received"
+ "9", "Internal", "internal CPU thermal limit was tripped"
+ "16", "Pin", "system reset pin BP_SYS_RST_L was tripped"
+ "17", "Software", "software issued PCI reset"
+ "18", "Software", "software wrote 0x4 to reset control register 0xCF9"
+ "19", "Software", "software wrote 0x6 to reset control register 0xCF9"
+ "20", "Software", "software wrote 0xE to reset control register 0xCF9"
+ "21", "ACPI-state", "ACPI power state transition occurred"
+ "22", "Pin", "keyboard reset pin KB_RST_L was tripped"
+ "23", "Internal", "internal CPU shutdown event occurred"
+ "24", "Hardware", "system failed to boot before failed boot timer expired"
+ "25", "Hardware", "hardware watchdog timer expired"
+ "26", "Remote", "remote ASF reset command was received"
+ "27", "Internal", "an uncorrected error caused a data fabric sync flood event"
+ "29", "Internal", "FCH and MP1 failed warm reset handshake"
+ "30", "Internal", "a parity error occurred"
+ "31", "Internal", "a software sync flood event occurred"
+
+This information is read by the kernel at bootup and printed into
+the syslog. When a random reboot occurs this message can be helpful
+to determine the next component to debug.
diff --git a/Documentation/arch/x86/cpuinfo.rst b/Documentation/arch/x86/cpuinfo.rst
index f80e2a558d2a..dd8b7806944e 100644
--- a/Documentation/arch/x86/cpuinfo.rst
+++ b/Documentation/arch/x86/cpuinfo.rst
@@ -173,10 +173,10 @@ For example, when an old kernel is running on new hardware.
The kernel disabled support for it at compile-time
--------------------------------------------------
-For example, if 5-level-paging is not enabled when building (i.e.,
-CONFIG_X86_5LEVEL is not selected) the flag "la57" will not show up [#f1]_.
+For example, if Linear Address Masking (LAM) is not enabled when building (i.e.,
+CONFIG_ADDRESS_MASKING is not selected) the flag "lam" will not show up.
Even though the feature will still be detected via CPUID, the kernel disables
-it by clearing via setup_clear_cpu_cap(X86_FEATURE_LA57).
+it by clearing via setup_clear_cpu_cap(X86_FEATURE_LAM).
The feature is disabled at boot-time
------------------------------------
@@ -200,5 +200,3 @@ missing at runtime. For example, AVX flags will not show up if XSAVE feature
is disabled since they depend on XSAVE feature. Another example would be broken
CPUs and them missing microcode patches. Due to that, the kernel decides not to
enable a feature.
-
-.. [#f1] 5-level paging uses linear address of 57 bits.
diff --git a/Documentation/arch/x86/index.rst b/Documentation/arch/x86/index.rst
index 8ac64d7de4dc..8ea762494bcc 100644
--- a/Documentation/arch/x86/index.rst
+++ b/Documentation/arch/x86/index.rst
@@ -25,13 +25,13 @@ x86-specific Documentation
shstk
iommu
intel_txt
+ amd-debugging
amd-memory-encryption
amd_hsmp
tdx
pti
mds
microcode
- resctrl
tsx_async_abort
buslock
usb-legacy-support
diff --git a/Documentation/arch/x86/resume.svg b/Documentation/arch/x86/resume.svg
new file mode 100644
index 000000000000..ad8839f762bf
--- /dev/null
+++ b/Documentation/arch/x86/resume.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Do not edit this file with editors other than draw.io -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="582px" height="1152px" viewBox="-0.5 -0.5 582 1152" content="&lt;mxfile host=&quot;confluence.amd.com&quot; agent=&quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0&quot; version=&quot;24.7.10&quot; scale=&quot;1&quot; border=&quot;0&quot;&gt;&#10; &lt;diagram id=&quot;lFF5s3GfZ4Py3fAf5dUH&quot; name=&quot;Page-1&quot;&gt;&#10; &lt;mxGraphModel dx=&quot;2364&quot; dy=&quot;1473&quot; grid=&quot;1&quot; gridSize=&quot;10&quot; guides=&quot;1&quot; tooltips=&quot;1&quot; connect=&quot;1&quot; arrows=&quot;1&quot; fold=&quot;1&quot; page=&quot;0&quot; pageScale=&quot;1&quot; pageWidth=&quot;850&quot; pageHeight=&quot;1100&quot; math=&quot;0&quot; shadow=&quot;0&quot;&gt;&#10; &lt;root&gt;&#10; &lt;mxCell id=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;1&quot; parent=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-10&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-1&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-1&quot; value=&quot;Wakeup event occurs&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.start_2;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-240&quot; y=&quot;-190&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-8&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-4&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-56&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-13&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot; value=&quot;MP1 hands off control to OS&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;-190&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-4&quot; value=&quot;&amp;lt;span style=&amp;quot;background-color: rgb(232, 205, 151);&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: rgb(0, 0, 0); font-family: Helvetica; font-size: 12px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: center; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: normal; text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; float: none; display: inline !important;&amp;quot;&amp;gt;OS Moves one core out of ACPI C3&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;&amp;lt;/span&amp;gt;&quot; style=&quot;verticalLabelPosition=middle;verticalAlign=middle;html=1;shape=process;whiteSpace=wrap;rounded=1;size=0.14;arcSize=6;labelPosition=center;align=center;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;240&quot; y=&quot;-170&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-11&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-9&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-9&quot; value=&quot;MP0/MP1 boot process&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-80&quot; y=&quot;-190&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-27&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-13&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-13&quot; value=&quot;OS checks all wake sources&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;-40&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-29&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-44&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-29&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.28&quot; y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;8&quot; y=&quot;-8&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-34&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-41&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-34&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.1165&quot; y=&quot;-1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;153&quot; y=&quot;11&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot; value=&quot;ACPI fixed &amp;lt;br&amp;gt;event active&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;260&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-28&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-43&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-28&quot;&gt;&#10; &lt;mxGeometry y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;8&quot; y=&quot;-15&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-57&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.5;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-58&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-57&quot;&gt;&#10; &lt;mxGeometry x=&quot;0.0145&quot; y=&quot;1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;102&quot; y=&quot;9&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot; value=&quot;IRQ other &amp;lt;br&amp;gt;than ACPI SCI active&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;110&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-30&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;560&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-65&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-30&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.4694&quot; y=&quot;1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;9&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-36&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-45&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-36&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.2867&quot; y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;5&quot; y=&quot;8&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot; value=&quot;GPIO&amp;lt;br&amp;gt;IRQ shared&amp;lt;br&amp;gt;with SCI&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;410&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-31&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;660&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;710&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-32&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;810&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-48&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-32&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.1714&quot; y=&quot;-4&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;14&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-52&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-53&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-52&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.5259&quot; y=&quot;-5&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;220&quot; y=&quot;15&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-62&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-54&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-64&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-62&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.7472&quot; y=&quot;3&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-92&quot; y=&quot;13&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot; value=&quot;Any PM&amp;lt;br&amp;gt;wakeup event&amp;lt;br&amp;gt;pending&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;860&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-24&quot; value=&quot;Kernel resumes system&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-240&quot; y=&quot;-20&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-26&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-24&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot; value=&quot;uPEP driver removes OS_HINT&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-240&quot; y=&quot;110&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-37&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-40&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-37&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.56&quot; y=&quot;-2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;67&quot; y=&quot;12&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-60&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=-0.008;entryY=0.422;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;entryPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-38&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-50&quot; y=&quot;535&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;20&quot; y=&quot;685&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-61&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-60&quot;&gt;&#10; &lt;mxGeometry x=&quot;0.1126&quot; y=&quot;-3&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-16&quot; y=&quot;-85&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot; value=&quot;Any GPIO&amp;lt;br&amp;gt;w/ WAKESTS&amp;lt;br&amp;gt;active&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-90&quot; y=&quot;410&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-38&quot; value=&quot;Check for ACPI Notify() events&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;560&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-49&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-39&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-50&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-39&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-51&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-50&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.4506&quot; y=&quot;-2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;215&quot; y=&quot;12&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-39&quot; value=&quot;Any GPE &amp;lt;br&amp;gt;pending&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;710&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-63&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-54&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-55&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-54&quot; value=&quot;OS moves active &amp;lt;br&amp;gt;core back to&amp;lt;br&amp;gt;ACPI C3&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;240&quot; y=&quot;110&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-55&quot; value=&quot;MP1 puts system back to sleep&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F27979;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;240&quot; y=&quot;-20&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;/root&gt;&#10; &lt;/mxGraphModel&gt;&#10; &lt;/diagram&gt;&#10;&lt;/mxfile&gt;&#10;"><defs/><g><g data-cell-id="0"><g data-cell-id="1"><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-10"><g><path d="M 101 51 L 154.63 51" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 51 L 152.88 54.5 L 154.63 51 L 152.88 47.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-1"><g><ellipse cx="51" cy="51" rx="50" ry="50" fill="#0cf232" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Wakeup event occurs</div></div></div></foreignObject><image x="2" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-8"><g><path d="M 421 51 L 474.63 51" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 479.88 51 L 472.88 54.5 L 474.63 51 L 472.88 47.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-56"><g><path d="M 371 101 L 371 144.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 149.88 L 367.5 142.88 L 371 144.63 L 374.5 142.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-3"><g><rect x="321" y="1" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">MP1 hands off control to OS</div></div></div></foreignObject><image x="322" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-4"><g><rect x="481" y="21" width="100" height="60" rx="3.6" ry="3.6" fill="#f2cc8f" stroke="#e07a5f" pointer-events="all"/><path d="M 495 21 L 495 81 M 567 21 L 567 81" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 70px; height: 1px; padding-top: 51px; margin-left: 496px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><span style="background-color: rgb(232, 205, 151);"><span style="color: rgb(0, 0, 0); font-family: Helvetica; font-size: 12px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: center; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: normal; text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; float: none; display: inline !important;">OS Moves one core out of ACPI C3</span><br /></span></div></div></div></foreignObject><image x="496" y="30" width="70" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-11"><g><path d="M 261 51 L 314.63 51" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 319.88 51 L 312.88 54.5 L 314.63 51 L 312.88 47.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-9"><g><rect x="161" y="1" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">MP0/MP1 boot process</div></div></div></foreignObject><image x="162" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-27"><g><path d="M 371 251 L 371 294.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 299.88 L 367.5 292.88 L 371 294.63 L 374.5 292.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-13"><g><rect x="321" y="151" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">OS checks all wake sources</div></div></div></foreignObject><image x="322" y="187" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-29"><g><path d="M 371 551 L 371 594.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 599.88 L 367.5 592.88 L 371 594.63 L 374.5 592.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-44"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 562px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="556" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-34"><g><path d="M 321 501 L 51 501 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-41"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 512px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="506" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-15"><g><path d="M 371 451 L 421 501 L 371 551 L 321 501 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 501px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">ACPI fixed <br />event active</div></div></div></foreignObject><image x="322" y="487" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-28"><g><path d="M 371 401 L 371 444.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 449.88 L 367.5 442.88 L 371 444.63 L 374.5 442.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-43"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 412px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="406" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-57"><g><path d="M 321 351 L 107.37 351" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 102.12 351 L 109.12 347.5 L 107.37 351 L 109.12 354.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-58"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 362px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="356" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-16"><g><path d="M 371 301 L 421 351 L 371 401 L 321 351 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">IRQ other <br />than ACPI SCI active</div></div></div></foreignObject><image x="322" y="330" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-30"><g><path d="M 371 701 L 371 726 L 371 744.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 749.88 L 367.5 742.88 L 371 744.63 L 374.5 742.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-65"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 715px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="709" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-36"><g><path d="M 321 651 L 257.37 651" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 252.12 651 L 259.12 647.5 L 257.37 651 L 259.12 654.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-45"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 662px; margin-left: 302px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="293" y="656" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-17"><g><path d="M 371 601 L 421 651 L 371 701 L 321 651 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">GPIO<br />IRQ shared<br />with SCI</div></div></div></foreignObject><image x="322" y="630" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-31"><g><path d="M 371 851 L 371 894.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 899.88 L 367.5 892.88 L 371 894.63 L 374.5 892.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-32"><g><path d="M 371 1001 L 371.5 1026.5 L 371.13 1044.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371.02 1049.88 L 367.67 1042.81 L 371.13 1044.63 L 374.66 1042.96 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-48"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 1023px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="1017" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-52"><g><path d="M 321 1101 L 51 1101 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-53"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 1112px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="1106" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-62"><g><path d="M 421 1101 L 531 1101 L 531 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 531 402.12 L 534.5 409.12 L 531 407.37 L 527.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-64"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 1112px; margin-left: 432px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="425.5" y="1106" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-23"><g><path d="M 371 1051 L 421 1101 L 371 1151 L 321 1101 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 1101px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Any PM<br />wakeup event<br />pending</div></div></div></foreignObject><image x="322" y="1080" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-24"><g><path d="M 31.61 171 L 70.39 171 C 87.29 171 101 184.43 101 201 C 101 217.57 87.29 231 70.39 231 L 31.61 231 C 14.71 231 1 217.57 1 201 C 1 184.43 14.71 171 31.61 171 Z" fill="#0cf232" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Kernel resumes system</div></div></div></foreignObject><image x="2" y="187" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-26"><g><path d="M 51 301 L 51 237.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 232.12 L 54.5 239.12 L 51 237.37 L 47.5 239.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-25"><g><rect x="1" y="301" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">uPEP driver removes OS_HINT</div></div></div></foreignObject><image x="2" y="330" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-37"><g><path d="M 151 651 L 51 651 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-40"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 662px; margin-left: 142px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="133" y="656" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-60"><g><path d="M 201 701 L 201 793.2 L 313.83 793.2" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 319.08 793.2 L 312.08 796.7 L 313.83 793.2 L 312.08 789.7 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-61"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 712px; margin-left: 211px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="204.5" y="706" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-35"><g><path d="M 201 601 L 251 651 L 201 701 L 151 651 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 152px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Any GPIO<br />w/ WAKESTS<br />active</div></div></div></foreignObject><image x="152" y="630" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-38"><g><rect x="321" y="751" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 801px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Check for ACPI Notify() events</div></div></div></foreignObject><image x="322" y="787" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-49"><g><path d="M 371 1001 L 371 1044.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 1049.88 L 367.5 1042.88 L 371 1044.63 L 374.5 1042.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-50"><g><path d="M 321 951 L 51 951 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-51"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 962px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="956" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-39"><g><path d="M 371 901 L 421 951 L 371 1001 L 321 951 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 951px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Any GPE <br />pending</div></div></div></foreignObject><image x="322" y="937" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-63"><g><path d="M 531 301 L 531 237.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 531 232.12 L 534.5 239.12 L 531 237.37 L 527.5 239.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-54"><g><rect x="481" y="301" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 482px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">OS moves active <br />core back to<br />ACPI C3</div></div></div></foreignObject><image x="482" y="330" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-55"><g><path d="M 511.61 171 L 550.39 171 C 567.29 171 581 184.43 581 201 C 581 217.57 567.29 231 550.39 231 L 511.61 231 C 494.71 231 481 217.57 481 201 C 481 184.43 494.71 171 511.61 171 Z" fill="#f27979" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 482px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">MP1 puts system back to sleep</div></div></div></foreignObject><image x="482" y="187" width="98" height="32" xlink:href=""/></switch></g></g></g></g></g></g></svg> \ No newline at end of file
diff --git a/Documentation/arch/x86/suspend.svg b/Documentation/arch/x86/suspend.svg
new file mode 100644
index 000000000000..a69073c018d5
--- /dev/null
+++ b/Documentation/arch/x86/suspend.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Do not edit this file with editors other than draw.io -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="407px" height="1132px" viewBox="-0.5 -0.5 407 1132" content="&lt;mxfile host=&quot;confluence.amd.com&quot; agent=&quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0&quot; version=&quot;24.7.10&quot; scale=&quot;1&quot; border=&quot;0&quot;&gt;&#10; &lt;diagram id=&quot;46NsKM0iVOHTgNer6hpB&quot; name=&quot;Page-1&quot;&gt;&#10; &lt;mxGraphModel dx=&quot;1964&quot; dy=&quot;1073&quot; grid=&quot;1&quot; gridSize=&quot;10&quot; guides=&quot;1&quot; tooltips=&quot;1&quot; connect=&quot;1&quot; arrows=&quot;1&quot; fold=&quot;1&quot; page=&quot;0&quot; pageScale=&quot;1&quot; pageWidth=&quot;850&quot; pageHeight=&quot;1100&quot; math=&quot;0&quot; shadow=&quot;0&quot;&gt;&#10; &lt;root&gt;&#10; &lt;mxCell id=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;1&quot; parent=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-21&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-4&quot; target=&quot;8N6JJebqrzA787TgpwUj-12&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-4&quot; value=&quot;SFH driver notifies MP2 to stop all sensor collection&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;420&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-6&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-51&quot; target=&quot;8N6JJebqrzA787TgpwUj-4&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;330&quot; y=&quot;400&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;170&quot; y=&quot;450&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-37&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-6&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.2133&quot; y=&quot;-1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-22&quot; y=&quot;16&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-9&quot; value=&quot;Abort suspend; details logged in dmesg&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F27979;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;425&quot; y=&quot;140&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-12&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;420&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-18&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;380&quot; y=&quot;320&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-19&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-18&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.3265&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-27&quot; y=&quot;10&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-24&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-12&quot; target=&quot;8N6JJebqrzA787TgpwUj-28&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;340&quot; y=&quot;570&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;180&quot; y=&quot;620&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-38&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-24&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.0038&quot; y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint y=&quot;13&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-26&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;exitX=1;exitY=0.5;exitDx=0;exitDy=0;exitPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-12&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;410&quot; y=&quot;530&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;555&quot; y=&quot;230&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;Array as=&quot;points&quot;&gt;&#10; &lt;mxPoint x=&quot;475&quot; y=&quot;470&quot; /&gt;&#10; &lt;/Array&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-35&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-26&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.7458&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-1&quot; y=&quot;10&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-30&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-28&quot; target=&quot;8N6JJebqrzA787TgpwUj-29&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-28&quot; value=&quot;All devices go into deepest D-state or F-state&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;570&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-29&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;570&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-31&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-29&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;330&quot; y=&quot;760&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;170&quot; y=&quot;720&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-64&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-31&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.0683&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint y=&quot;15&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-34&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-29&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-36&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-34&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.8315&quot; y=&quot;-1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;2&quot; y=&quot;9&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-46&quot; value=&quot;GPIO driver suspends non-wake GPIOs&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;720&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-47&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-48&quot; target=&quot;8N6JJebqrzA787TgpwUj-50&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-48&quot; value=&quot;Suspend initiated from userspace&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.start_2;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;120&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-49&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-50&quot; target=&quot;8N6JJebqrzA787TgpwUj-51&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-50&quot; value=&quot;GPU driver shuts down clocks and sends SMU messages&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;270&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-51&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;270&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-53&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-54&quot; target=&quot;8N6JJebqrzA787TgpwUj-56&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-54&quot; value=&quot;ACPI s2idle driver notifies EC using _DSM&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;870&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-55&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-56&quot; target=&quot;8N6JJebqrzA787TgpwUj-58&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-56&quot; value=&quot;uPEP driver (amd-pmc) sends OS_HINT&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;1010&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-57&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-58&quot; target=&quot;8N6JJebqrzA787TgpwUj-59&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-58&quot; value=&quot;Put all x86 CPU cores into ACPI C3&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;1150&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-59&quot; value=&quot;s2idle loop waiting for IRQ &amp;lt;br&amp;gt;to wake&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;1170&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-65&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-60&quot; target=&quot;8N6JJebqrzA787TgpwUj-54&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-66&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-65&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.144&quot; y=&quot;-4&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-4&quot; y=&quot;14&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-60&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;720&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-61&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-60&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-62&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;440&quot; y=&quot;620&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-14&quot; y=&quot;160&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;/root&gt;&#10; &lt;/mxGraphModel&gt;&#10; &lt;/diagram&gt;&#10;&lt;/mxfile&gt;&#10;"><defs/><g><g data-cell-id="0"><g data-cell-id="1"><g data-cell-id="8N6JJebqrzA787TgpwUj-21"><g><path d="M 101 351 L 154.63 351" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 351 L 152.88 354.5 L 154.63 351 L 152.88 347.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-4"><g><rect x="1" y="301" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">SFH driver notifies MP2 to stop all sensor collection</div></div></div></foreignObject><image x="2" y="322.5" width="98" height="61" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-6"><g><path d="M 211 251 L 211 276 L 51 276 L 51 294.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 299.88 L 47.5 292.88 L 51 294.63 L 54.5 292.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-37"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 292px; margin-left: 132px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="125.5" y="286" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-9"><g><path d="M 336.61 21 L 375.39 21 C 392.29 21 406 34.43 406 51 C 406 67.57 392.29 81 375.39 81 L 336.61 81 C 319.71 81 306 67.57 306 51 C 306 34.43 319.71 21 336.61 21 Z" fill="#f27979" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Abort suspend; details logged in dmesg</div></div></div></foreignObject><image x="307" y="30" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-12"><g><path d="M 211 301 L 261 351 L 211 401 L 161 351 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="344.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-18"><g><path d="M 261 201 L 356 201.5 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-19"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 212px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="298" y="206" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-24"><g><path d="M 211 401 L 211 426 L 51 426 L 51 444.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 449.88 L 47.5 442.88 L 51 444.63 L 54.5 442.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-38"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 442px; margin-left: 132px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="125.5" y="436" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-26"><g><path d="M 261 351 L 356 351 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-35"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 362px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="298" y="356" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-30"><g><path d="M 101 501 L 154.63 501" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 501 L 152.88 504.5 L 154.63 501 L 152.88 497.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-28"><g><rect x="1" y="451" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 501px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">All devices go into deepest D-state or F-state</div></div></div></foreignObject><image x="2" y="480" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-29"><g><path d="M 211 451 L 261 501 L 211 551 L 161 501 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 501px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="494.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-31"><g><path d="M 211 551 L 211 576 L 51.5 576 L 51.13 594.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51.02 599.88 L 47.66 592.81 L 51.13 594.63 L 54.66 592.95 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-64"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 592px; margin-left: 139px;"><div data-drawio-colors="color: #393C56; background-color: rgb(255, 255, 255); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; background-color: rgb(255, 255, 255); white-space: nowrap;">no</div></div></div></foreignObject><image x="132.5" y="586" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-34"><g><path d="M 261 501 L 356 501 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-36"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 512px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="298" y="506" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-46"><g><rect x="1" y="601" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">GPIO driver suspends non-wake GPIOs</div></div></div></foreignObject><image x="2" y="630" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-47"><g><path d="M 51 101 L 51 144.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 149.88 L 47.5 142.88 L 51 144.63 L 54.5 142.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-48"><g><ellipse cx="51" cy="51" rx="50" ry="50" fill="#0cf232" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Suspend initiated from userspace</div></div></div></foreignObject><image x="2" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-49"><g><path d="M 101 201 L 154.63 201" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 201 L 152.88 204.5 L 154.63 201 L 152.88 197.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-50"><g><rect x="1" y="151" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">GPU driver shuts down clocks and sends SMU messages</div></div></div></foreignObject><image x="2" y="172.5" width="98" height="61" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-51"><g><path d="M 211 151 L 261 201 L 211 251 L 161 201 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="194.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-53"><g><path d="M 51 851 L 51 884.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 889.88 L 47.5 882.88 L 51 884.63 L 54.5 882.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-54"><g><rect x="1" y="751" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 801px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">ACPI s2idle driver notifies EC using _DSM</div></div></div></foreignObject><image x="2" y="780" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-55"><g><path d="M 51 991 L 51 1024.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 1029.88 L 47.5 1022.88 L 51 1024.63 L 54.5 1022.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-56"><g><rect x="1" y="891" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 941px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">uPEP driver (amd-pmc) sends OS_HINT</div></div></div></foreignObject><image x="2" y="920" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-57"><g><path d="M 101 1081 L 154.63 1081" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 1081 L 152.88 1084.5 L 154.63 1081 L 152.88 1077.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-58"><g><rect x="1" y="1031" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 1081px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Put all x86 CPU cores into ACPI C3</div></div></div></foreignObject><image x="2" y="1060" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-59"><g><path d="M 191.61 1051 L 230.39 1051 C 247.29 1051 261 1064.43 261 1081 C 261 1097.57 247.29 1111 230.39 1111 L 191.61 1111 C 174.71 1111 161 1097.57 161 1081 C 161 1064.43 174.71 1051 191.61 1051 Z" fill="#0cf232" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 1081px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">s2idle loop waiting for IRQ <br />to wake</div></div></div></foreignObject><image x="162" y="1060" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-65"><g><path d="M 211 701 L 211 726 L 51 726 L 51 744.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 749.88 L 47.5 742.88 L 51 744.63 L 54.5 742.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-66"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 737px; margin-left: 143px;"><div data-drawio-colors="color: #393C56; background-color: rgb(255, 255, 255); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; background-color: rgb(255, 255, 255); white-space: nowrap;">no</div></div></div></foreignObject><image x="136.5" y="731" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-60"><g><path d="M 211 601 L 261 651 L 211 701 L 161 651 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="644.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-61"><g><path d="M 261 651 L 356 651 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-62"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 662px; margin-left: 308px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="299" y="656" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g></g></g></svg> \ No newline at end of file
diff --git a/Documentation/arch/x86/x86_64/5level-paging.rst b/Documentation/arch/x86/x86_64/5level-paging.rst
index 71f882f4a173..ad7ddc13f79d 100644
--- a/Documentation/arch/x86/x86_64/5level-paging.rst
+++ b/Documentation/arch/x86/x86_64/5level-paging.rst
@@ -22,15 +22,6 @@ QEMU 2.9 and later support 5-level paging.
Virtual memory layout for 5-level paging is described in
Documentation/arch/x86/x86_64/mm.rst
-
-Enabling 5-level paging
-=======================
-CONFIG_X86_5LEVEL=y enables the feature.
-
-Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware.
-In this case additional page table level -- p4d -- will be folded at
-runtime.
-
User-space and large virtual address space
==========================================
On x86, 5-level paging enables 56-bit userspace virtual address space.
diff --git a/Documentation/arch/x86/x86_64/fsgs.rst b/Documentation/arch/x86/x86_64/fsgs.rst
index d07e445dac5c..6bda4d16d3f7 100644
--- a/Documentation/arch/x86/x86_64/fsgs.rst
+++ b/Documentation/arch/x86/x86_64/fsgs.rst
@@ -130,7 +130,7 @@ instructions. Clang 5 supports them as well.
=================== ===========================
_readfsbase_u64() Read the FS base register
- _readfsbase_u64() Read the GS base register
+ _readgsbase_u64() Read the GS base register
_writefsbase_u64() Write the FS base register
_writegsbase_u64() Write the GS base register
=================== ===========================
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 3dad1f90b098..12de52a2b17e 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -28,16 +28,6 @@ def have_command(cmd):
"""
return shutil.which(cmd) is not None
-# Get Sphinx version
-major, minor, patch = sphinx.version_info[:3]
-
-#
-# Warn about older versions that we don't want to support for much
-# longer.
-#
-if (major < 2) or (major == 2 and minor < 4):
- print('WARNING: support for Sphinx < 2.4 will be removed soon.')
-
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
@@ -57,76 +47,71 @@ extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include',
'maintainers_include', 'sphinx.ext.autosectionlabel',
'kernel_abi', 'kernel_feat', 'translations']
-if major >= 3:
- if (major > 3) or (minor > 0 or patch >= 2):
- # Sphinx c function parser is more pedantic with regards to type
- # checking. Due to that, having macros at c:function cause problems.
- # Those needed to be scaped by using c_id_attributes[] array
- c_id_attributes = [
- # GCC Compiler types not parsed by Sphinx:
- "__restrict__",
-
- # include/linux/compiler_types.h:
- "__iomem",
- "__kernel",
- "noinstr",
- "notrace",
- "__percpu",
- "__rcu",
- "__user",
- "__force",
- "__counted_by_le",
- "__counted_by_be",
-
- # include/linux/compiler_attributes.h:
- "__alias",
- "__aligned",
- "__aligned_largest",
- "__always_inline",
- "__assume_aligned",
- "__cold",
- "__attribute_const__",
- "__copy",
- "__pure",
- "__designated_init",
- "__visible",
- "__printf",
- "__scanf",
- "__gnu_inline",
- "__malloc",
- "__mode",
- "__no_caller_saved_registers",
- "__noclone",
- "__nonstring",
- "__noreturn",
- "__packed",
- "__pure",
- "__section",
- "__always_unused",
- "__maybe_unused",
- "__used",
- "__weak",
- "noinline",
- "__fix_address",
- "__counted_by",
-
- # include/linux/memblock.h:
- "__init_memblock",
- "__meminit",
-
- # include/linux/init.h:
- "__init",
- "__ref",
-
- # include/linux/linkage.h:
- "asmlinkage",
-
- # include/linux/btf.h
- "__bpf_kfunc",
- ]
-
-else:
- extensions.append('cdomain')
+# Since Sphinx version 3, the C function parser is more pedantic with regards
+# to type checking. Due to that, having macros at c:function cause problems.
+# Those needed to be escaped by using c_id_attributes[] array
+c_id_attributes = [
+ # GCC Compiler types not parsed by Sphinx:
+ "__restrict__",
+
+ # include/linux/compiler_types.h:
+ "__iomem",
+ "__kernel",
+ "noinstr",
+ "notrace",
+ "__percpu",
+ "__rcu",
+ "__user",
+ "__force",
+ "__counted_by_le",
+ "__counted_by_be",
+
+ # include/linux/compiler_attributes.h:
+ "__alias",
+ "__aligned",
+ "__aligned_largest",
+ "__always_inline",
+ "__assume_aligned",
+ "__cold",
+ "__attribute_const__",
+ "__copy",
+ "__pure",
+ "__designated_init",
+ "__visible",
+ "__printf",
+ "__scanf",
+ "__gnu_inline",
+ "__malloc",
+ "__mode",
+ "__no_caller_saved_registers",
+ "__noclone",
+ "__nonstring",
+ "__noreturn",
+ "__packed",
+ "__pure",
+ "__section",
+ "__always_unused",
+ "__maybe_unused",
+ "__used",
+ "__weak",
+ "noinline",
+ "__fix_address",
+ "__counted_by",
+
+ # include/linux/memblock.h:
+ "__init_memblock",
+ "__meminit",
+
+ # include/linux/init.h:
+ "__init",
+ "__ref",
+
+ # include/linux/linkage.h:
+ "asmlinkage",
+
+ # include/linux/btf.h
+ "__bpf_kfunc",
+]
# Ensure that autosectionlabel will produce unique names
autosectionlabel_prefix_document = True
@@ -149,10 +134,6 @@ if 'SPHINX_IMGMATH' in os.environ:
else:
sys.stderr.write("Unknown env SPHINX_IMGMATH=%s ignored.\n" % env_sphinx_imgmath)
-# Always load imgmath for Sphinx <1.8 or for epub docs
-load_imgmath = (load_imgmath or (major == 1 and minor < 8)
- or 'epub' in sys.argv)
-
if load_imgmath:
extensions.append("sphinx.ext.imgmath")
math_renderer = 'imgmath'
@@ -322,14 +303,6 @@ if "DOCS_CSS" in os.environ:
for l in css:
html_css_files.append(l)
-if major <= 1 and minor < 8:
- html_context = {
- 'css_files': [],
- }
-
- for l in html_css_files:
- html_context['css_files'].append('_static/' + l)
-
if html_theme == 'alabaster':
html_theme_options = {
'description': get_cline_version(),
@@ -409,11 +382,6 @@ latex_elements = {
''',
}
-# Fix reference escape troubles with Sphinx 1.4.x
-if major == 1:
- latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
-
-
# Load kerneldoc specific LaTeX settings
latex_elements['preamble'] += '''
% Load kerneldoc specific LaTeX settings
@@ -540,7 +508,7 @@ pdf_documents = [
# kernel-doc extension configuration for running Sphinx directly (e.g. by Read
# the Docs). In a normal build, these are supplied from the Makefile via command
# line arguments.
-kerneldoc_bin = '../scripts/kernel-doc'
+kerneldoc_bin = '../scripts/kernel-doc.py'
kerneldoc_srctree = '..'
# ------------------------------------------------------------------------------
diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst
index 8e3cce3d0a23..2ad08517e626 100644
--- a/Documentation/core-api/dma-api.rst
+++ b/Documentation/core-api/dma-api.rst
@@ -530,6 +530,77 @@ routines, e.g.:::
....
}
+Part Ie - IOVA-based DMA mappings
+---------------------------------
+
+These APIs allow a very efficient mapping when using an IOMMU. They are an
+optional path that requires extra code and are only recommended for drivers
+where DMA mapping performance, or the space usage for storing the DMA addresses
+matter. All the considerations from the previous section apply here as well.
+
+::
+
+ bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t size);
+
+Is used to try to allocate IOVA space for mapping operation. If it returns
+false this API can't be used for the given device and the normal streaming
+DMA mapping API should be used. The ``struct dma_iova_state`` is allocated
+by the driver and must be kept around until unmap time.
+
+::
+
+ static inline bool dma_use_iova(struct dma_iova_state *state)
+
+Can be used by the driver to check if the IOVA-based API is used after a
+call to dma_iova_try_alloc. This can be useful in the unmap path.
+
+::
+
+ int dma_iova_link(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs);
+
+Is used to link ranges to the IOVA previously allocated. The start of all
+but the first call to dma_iova_link for a given state must be aligned
+to the DMA merge boundary returned by ``dma_get_merge_boundary())``, and
+the size of all but the last range must be aligned to the DMA merge boundary
+as well.
+
+::
+
+ int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size);
+
+Must be called to sync the IOMMU page tables for IOVA-range mapped by one or
+more calls to ``dma_iova_link()``.
+
+For drivers that use a one-shot mapping, all ranges can be unmapped and the
+IOVA freed by calling:
+
+::
+
+ void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
+ size_t mapped_len, enum dma_data_direction dir,
+ unsigned long attrs);
+
+Alternatively drivers can dynamically manage the IOVA space by unmapping
+and mapping individual regions. In that case
+
+::
+
+ void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs);
+
+is used to unmap a range previously mapped, and
+
+::
+
+ void dma_iova_free(struct device *dev, struct dma_iova_state *state);
+
+is used to free the IOVA space. All regions must have been unmapped using
+``dma_iova_unlink()`` before calling ``dma_iova_free()``.
Part II - Non-coherent DMA allocations
--------------------------------------
diff --git a/Documentation/core-api/genericirq.rst b/Documentation/core-api/genericirq.rst
index 25f94dfd66fa..582bde9bf5a9 100644
--- a/Documentation/core-api/genericirq.rst
+++ b/Documentation/core-api/genericirq.rst
@@ -410,8 +410,6 @@ which are used in the generic IRQ layer.
.. kernel-doc:: include/linux/interrupt.h
:internal:
-.. kernel-doc:: include/linux/irqdomain.h
-
Public Functions Provided
=========================
diff --git a/Documentation/core-api/irq/concepts.rst b/Documentation/core-api/irq/concepts.rst
index 4273806a606b..7c4564f3cbdf 100644
--- a/Documentation/core-api/irq/concepts.rst
+++ b/Documentation/core-api/irq/concepts.rst
@@ -2,23 +2,24 @@
What is an IRQ?
===============
-An IRQ is an interrupt request from a device.
-Currently they can come in over a pin, or over a packet.
-Several devices may be connected to the same pin thus
-sharing an IRQ.
+An IRQ is an interrupt request from a device. Currently, they can come
+in over a pin, or over a packet. Several devices may be connected to
+the same pin thus sharing an IRQ. Such as on legacy PCI bus: All devices
+typically share 4 lanes/pins. Note that each device can request an
+interrupt on each of the lanes.
An IRQ number is a kernel identifier used to talk about a hardware
-interrupt source. Typically this is an index into the global irq_desc
-array, but except for what linux/interrupt.h implements the details
-are architecture specific.
+interrupt source. Typically, this is an index into the global irq_desc
+array or sparse_irqs tree. But except for what linux/interrupt.h
+implements, the details are architecture specific.
An IRQ number is an enumeration of the possible interrupt sources on a
-machine. Typically what is enumerated is the number of input pins on
-all of the interrupt controller in the system. In the case of ISA
-what is enumerated are the 16 input pins on the two i8259 interrupt
-controllers.
+machine. Typically, what is enumerated is the number of input pins on
+all of the interrupt controllers in the system. In the case of ISA,
+what is enumerated are the 8 input pins on each of the two i8259
+interrupt controllers.
Architectures can assign additional meaning to the IRQ numbers, and
-are encouraged to in the case where there is any manual configuration
-of the hardware involved. The ISA IRQs are a classic example of
+are encouraged to in the case where there is any manual configuration
+of the hardware involved. The ISA IRQs are a classic example of
assigning this kind of additional meaning.
diff --git a/Documentation/core-api/irq/irq-domain.rst b/Documentation/core-api/irq/irq-domain.rst
index f88a6ee67a35..a01c6ead1bc0 100644
--- a/Documentation/core-api/irq/irq-domain.rst
+++ b/Documentation/core-api/irq/irq-domain.rst
@@ -1,59 +1,77 @@
===============================================
-The irq_domain interrupt number mapping library
+The irq_domain Interrupt Number Mapping Library
===============================================
The current design of the Linux kernel uses a single large number
-space where each separate IRQ source is assigned a different number.
-This is simple when there is only one interrupt controller, but in
-systems with multiple interrupt controllers the kernel must ensure
+space where each separate IRQ source is assigned a unique number.
+This is simple when there is only one interrupt controller. But in
+systems with multiple interrupt controllers, the kernel must ensure
that each one gets assigned non-overlapping allocations of Linux
IRQ numbers.
The number of interrupt controllers registered as unique irqchips
-show a rising tendency: for example subdrivers of different kinds
+shows a rising tendency. For example, subdrivers of different kinds
such as GPIO controllers avoid reimplementing identical callback
mechanisms as the IRQ core system by modelling their interrupt
-handlers as irqchips, i.e. in effect cascading interrupt controllers.
+handlers as irqchips. I.e. in effect cascading interrupt controllers.
-Here the interrupt number loose all kind of correspondence to
-hardware interrupt numbers: whereas in the past, IRQ numbers could
-be chosen so they matched the hardware IRQ line into the root
-interrupt controller (i.e. the component actually fireing the
-interrupt line to the CPU) nowadays this number is just a number.
+So in the past, IRQ numbers could be chosen so that they match the
+hardware IRQ line into the root interrupt controller (i.e. the
+component actually firing the interrupt line to the CPU). Nowadays,
+this number is just a number and the number loose all kind of
+correspondence to hardware interrupt numbers.
-For this reason we need a mechanism to separate controller-local
-interrupt numbers, called hardware irq's, from Linux IRQ numbers.
+For this reason, we need a mechanism to separate controller-local
+interrupt numbers, called hardware IRQs, from Linux IRQ numbers.
The irq_alloc_desc*() and irq_free_desc*() APIs provide allocation of
-irq numbers, but they don't provide any support for reverse mapping of
+IRQ numbers, but they don't provide any support for reverse mapping of
the controller-local IRQ (hwirq) number into the Linux IRQ number
space.
-The irq_domain library adds mapping between hwirq and IRQ numbers on
-top of the irq_alloc_desc*() API. An irq_domain to manage mapping is
-preferred over interrupt controller drivers open coding their own
+The irq_domain library adds a mapping between hwirq and IRQ numbers on
+top of the irq_alloc_desc*() API. An irq_domain to manage the mapping
+is preferred over interrupt controller drivers open coding their own
reverse mapping scheme.
-irq_domain also implements translation from an abstract irq_fwspec
-structure to hwirq numbers (Device Tree and ACPI GSI so far), and can
-be easily extended to support other IRQ topology data sources.
+irq_domain also implements a translation from an abstract struct
+irq_fwspec to hwirq numbers (Device Tree, non-DT firmware node, ACPI
+GSI, and software node so far), and can be easily extended to support
+other IRQ topology data sources. The implementation is performed
+without any extra platform support code.
-irq_domain usage
+irq_domain Usage
================
-
-An interrupt controller driver creates and registers an irq_domain by
-calling one of the irq_domain_add_*() or irq_domain_create_*() functions
-(each mapping method has a different allocator function, more on that later).
-The function will return a pointer to the irq_domain on success. The caller
-must provide the allocator function with an irq_domain_ops structure.
+struct irq_domain could be defined as an irq domain controller. That
+is, it handles the mapping between hardware and virtual interrupt
+numbers for a given interrupt domain. The domain structure is
+generally created by the PIC code for a given PIC instance (though a
+domain can cover more than one PIC if they have a flat number model).
+It is the domain callbacks that are responsible for setting the
+irq_chip on a given irq_desc after it has been mapped.
+
+The host code and data structures use a fwnode_handle pointer to
+identify the domain. In some cases, and in order to preserve source
+code compatibility, this fwnode pointer is "upgraded" to a DT
+device_node. For those firmware infrastructures that do not provide a
+unique identifier for an interrupt controller, the irq_domain code
+offers a fwnode allocator.
+
+An interrupt controller driver creates and registers a struct irq_domain
+by calling one of the irq_domain_create_*() functions (each mapping
+method has a different allocator function, more on that later). The
+function will return a pointer to the struct irq_domain on success. The
+caller must provide the allocator function with a struct irq_domain_ops
+pointer.
In most cases, the irq_domain will begin empty without any mappings
between hwirq and IRQ numbers. Mappings are added to the irq_domain
by calling irq_create_mapping() which accepts the irq_domain and a
-hwirq number as arguments. If a mapping for the hwirq doesn't already
-exist then it will allocate a new Linux irq_desc, associate it with
-the hwirq, and call the .map() callback so the driver can perform any
-required hardware setup.
+hwirq number as arguments. If a mapping for the hwirq doesn't already
+exist, irq_create_mapping() allocates a new Linux irq_desc, associates
+it with the hwirq, and calls the :c:member:`irq_domain_ops.map()`
+callback. In there, the driver can perform any required hardware
+setup.
Once a mapping has been established, it can be retrieved or used via a
variety of methods:
@@ -63,8 +81,6 @@ variety of methods:
mapping.
- irq_find_mapping() returns a Linux IRQ number for a given domain and
hwirq number, and 0 if there was no mapping
-- irq_linear_revmap() is now identical to irq_find_mapping(), and is
- deprecated
- generic_handle_domain_irq() handles an interrupt described by a
domain and a hwirq number
@@ -77,9 +93,10 @@ be allocated.
If the driver has the Linux IRQ number or the irq_data pointer, and
needs to know the associated hwirq number (such as in the irq_chip
-callbacks) then it can be directly obtained from irq_data->hwirq.
+callbacks) then it can be directly obtained from
+:c:member:`irq_data.hwirq`.
-Types of irq_domain mappings
+Types of irq_domain Mappings
============================
There are several mechanisms available for reverse mapping from hwirq
@@ -92,7 +109,6 @@ Linear
::
- irq_domain_add_linear()
irq_domain_create_linear()
The linear reverse map maintains a fixed size table indexed by the
@@ -105,19 +121,13 @@ map are fixed time lookup for IRQ numbers, and irq_descs are only
allocated for in-use IRQs. The disadvantage is that the table must be
as large as the largest possible hwirq number.
-irq_domain_add_linear() and irq_domain_create_linear() are functionally
-equivalent, except for the first argument is different - the former
-accepts an Open Firmware specific 'struct device_node', while the latter
-accepts a more general abstraction 'struct fwnode_handle'.
-
-The majority of drivers should use the linear map.
+The majority of drivers should use the Linear map.
Tree
----
::
- irq_domain_add_tree()
irq_domain_create_tree()
The irq_domain maintains a radix tree map from hwirq numbers to Linux
@@ -129,11 +139,6 @@ since it doesn't need to allocate a table as large as the largest
hwirq number. The disadvantage is that hwirq to IRQ number lookup is
dependent on how many entries are in the table.
-irq_domain_add_tree() and irq_domain_create_tree() are functionally
-equivalent, except for the first argument is different - the former
-accepts an Open Firmware specific 'struct device_node', while the latter
-accepts a more general abstraction 'struct fwnode_handle'.
-
Very few drivers should need this mapping.
No Map
@@ -141,7 +146,7 @@ No Map
::
- irq_domain_add_nomap()
+ irq_domain_create_nomap()
The No Map mapping is to be used when the hwirq number is
programmable in the hardware. In this case it is best to program the
@@ -159,8 +164,6 @@ Legacy
::
- irq_domain_add_simple()
- irq_domain_add_legacy()
irq_domain_create_simple()
irq_domain_create_legacy()
@@ -189,13 +192,13 @@ supported. For example, ISA controllers would use the legacy map for
mapping Linux IRQs 0-15 so that existing ISA drivers get the correct IRQ
numbers.
-Most users of legacy mappings should use irq_domain_add_simple() or
-irq_domain_create_simple() which will use a legacy domain only if an IRQ range
-is supplied by the system and will otherwise use a linear domain mapping.
-The semantics of this call are such that if an IRQ range is specified then
-descriptors will be allocated on-the-fly for it, and if no range is
-specified it will fall through to irq_domain_add_linear() or
-irq_domain_create_linear() which means *no* irq descriptors will be allocated.
+Most users of legacy mappings should use irq_domain_create_simple()
+which will use a legacy domain only if an IRQ range is supplied by the
+system and will otherwise use a linear domain mapping. The semantics of
+this call are such that if an IRQ range is specified then descriptors
+will be allocated on-the-fly for it, and if no range is specified it
+will fall through to irq_domain_create_linear() which means *no* irq
+descriptors will be allocated.
A typical use case for simple domains is where an irqchip provider
is supporting both dynamic and static IRQ assignments.
@@ -206,13 +209,7 @@ that the driver using the simple domain call irq_create_mapping()
before any irq_find_mapping() since the latter will actually work
for the static IRQ assignment case.
-irq_domain_add_simple() and irq_domain_create_simple() as well as
-irq_domain_add_legacy() and irq_domain_create_legacy() are functionally
-equivalent, except for the first argument is different - the former
-accepts an Open Firmware specific 'struct device_node', while the latter
-accepts a more general abstraction 'struct fwnode_handle'.
-
-Hierarchy IRQ domain
+Hierarchy IRQ Domain
--------------------
On some architectures, there may be multiple interrupt controllers
@@ -253,20 +250,40 @@ There are four major interfaces to use hierarchy irq_domain:
4) irq_domain_deactivate_irq(): deactivate interrupt controller hardware
to stop delivering the interrupt.
-Following changes are needed to support hierarchy irq_domain:
+The following is needed to support hierarchy irq_domain:
-1) a new field 'parent' is added to struct irq_domain; it's used to
+1) The :c:member:`parent` field in struct irq_domain is used to
maintain irq_domain hierarchy information.
-2) a new field 'parent_data' is added to struct irq_data; it's used to
- build hierarchy irq_data to match hierarchy irq_domains. The irq_data
- is used to store irq_domain pointer and hardware irq number.
-3) new callbacks are added to struct irq_domain_ops to support hierarchy
- irq_domain operations.
-
-With support of hierarchy irq_domain and hierarchy irq_data ready, an
-irq_domain structure is built for each interrupt controller, and an
+2) The :c:member:`parent_data` field in struct irq_data is used to
+ build hierarchy irq_data to match hierarchy irq_domains. The
+ irq_data is used to store irq_domain pointer and hardware irq
+ number.
+3) The :c:member:`alloc()`, :c:member:`free()`, and other callbacks in
+ struct irq_domain_ops to support hierarchy irq_domain operations.
+
+With the support of hierarchy irq_domain and hierarchy irq_data ready,
+an irq_domain structure is built for each interrupt controller, and an
irq_data structure is allocated for each irq_domain associated with an
-IRQ. Now we could go one step further to support stacked(hierarchy)
+IRQ.
+
+For an interrupt controller driver to support hierarchy irq_domain, it
+needs to:
+
+1) Implement irq_domain_ops.alloc() and irq_domain_ops.free()
+2) Optionally, implement irq_domain_ops.activate() and
+ irq_domain_ops.deactivate().
+3) Optionally, implement an irq_chip to manage the interrupt controller
+ hardware.
+4) There is no need to implement irq_domain_ops.map() and
+ irq_domain_ops.unmap(). They are unused with hierarchy irq_domain.
+
+Note the hierarchy irq_domain is in no way x86-specific, and is
+heavily used to support other architectures, such as ARM, ARM64 etc.
+
+Stacked irq_chip
+~~~~~~~~~~~~~~~~
+
+Now, we could go one step further to support stacked (hierarchy)
irq_chip. That is, an irq_chip is associated with each irq_data along
the hierarchy. A child irq_chip may implement a required action by
itself or by cooperating with its parent irq_chip.
@@ -276,22 +293,28 @@ with the hardware managed by itself and may ask for services from its
parent irq_chip when needed. So we could achieve a much cleaner
software architecture.
-For an interrupt controller driver to support hierarchy irq_domain, it
-needs to:
-
-1) Implement irq_domain_ops.alloc and irq_domain_ops.free
-2) Optionally implement irq_domain_ops.activate and
- irq_domain_ops.deactivate.
-3) Optionally implement an irq_chip to manage the interrupt controller
- hardware.
-4) No need to implement irq_domain_ops.map and irq_domain_ops.unmap,
- they are unused with hierarchy irq_domain.
-
-Hierarchy irq_domain is in no way x86 specific, and is heavily used to
-support other architectures, such as ARM, ARM64 etc.
-
Debugging
=========
Most of the internals of the IRQ subsystem are exposed in debugfs by
turning CONFIG_GENERIC_IRQ_DEBUGFS on.
+
+Structures and Public Functions Provided
+========================================
+
+This chapter contains the autogenerated documentation of the structures
+and exported kernel API functions which are used for IRQ domains.
+
+.. kernel-doc:: include/linux/irqdomain.h
+
+.. kernel-doc:: kernel/irq/irqdomain.c
+ :export:
+
+Internal Functions Provided
+===========================
+
+This chapter contains the autogenerated documentation of the internal
+functions.
+
+.. kernel-doc:: kernel/irq/irqdomain.c
+ :internal:
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 4bdc394e86af..c787f72957a4 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -571,9 +571,8 @@ struct clk
::
%pC pll1
- %pCn pll1
-For printing struct clk structures. %pC and %pCn print the name of the clock
+For printing struct clk structures. %pC prints the name of the clock
(Common Clock Framework) or a unique 32-bit ID (legacy clock framework).
Passed by reference.
diff --git a/Documentation/dev-tools/kunit/run_wrapper.rst b/Documentation/dev-tools/kunit/run_wrapper.rst
index 19ddf5e07013..6697c71ee8ca 100644
--- a/Documentation/dev-tools/kunit/run_wrapper.rst
+++ b/Documentation/dev-tools/kunit/run_wrapper.rst
@@ -182,6 +182,8 @@ via UML. To run tests on qemu, by default it requires two flags:
is ignored), the tests will run via UML. Non-UML architectures,
for example: i386, x86_64, arm and so on; run on qemu.
+ ``--arch help`` lists all valid ``--arch`` values.
+
- ``--cross_compile``: Specifies the Kbuild toolchain. It passes the
same argument as passed to the ``CROSS_COMPILE`` variable used by
Kbuild. As a reminder, this will be the prefix for the toolchain
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 22955d56b379..038f480074fd 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -670,28 +670,50 @@ with ``kunit_remove_action``.
Testing Static Functions
------------------------
-If we do not want to expose functions or variables for testing, one option is to
-conditionally export the used symbol. For example:
+If you want to test static functions without exposing those functions outside of
+testing, one option is conditionally export the symbol. When KUnit is enabled,
+the symbol is exposed but remains static otherwise. To use this method, follow
+the template below.
.. code-block:: c
- /* In my_file.c */
+ /* In the file containing functions to test "my_file.c" */
- VISIBLE_IF_KUNIT int do_interesting_thing();
+ #include <kunit/visibility.h>
+ #include <my_file.h>
+ ...
+ VISIBLE_IF_KUNIT int do_interesting_thing()
+ {
+ ...
+ }
EXPORT_SYMBOL_IF_KUNIT(do_interesting_thing);
- /* In my_file.h */
+ /* In the header file "my_file.h" */
#if IS_ENABLED(CONFIG_KUNIT)
int do_interesting_thing(void);
#endif
-Alternatively, you could conditionally ``#include`` the test file at the end of
-your .c file. For example:
+ /* In the KUnit test file "my_file_test.c" */
+
+ #include <kunit/visibility.h>
+ #include <my_file.h>
+ ...
+ MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+ ...
+ // Use do_interesting_thing() in tests
+
+For a full example, see this `patch <https://lore.kernel.org/all/20221207014024.340230-3-rmoar@google.com/>`_
+where a test is modified to conditionally expose static functions for testing
+using the macros above.
+
+As an **alternative** to the method above, you could conditionally ``#include``
+the test file at the end of your .c file. This is not recommended but works
+if needed. For example:
.. code-block:: c
- /* In my_file.c */
+ /* In "my_file.c" */
static int do_interesting_thing();
diff --git a/Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.yaml b/Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.yaml
index 26362c9006e2..81a65e9f93f1 100644
--- a/Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.yaml
+++ b/Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.yaml
@@ -21,6 +21,7 @@ properties:
- const: nvidia,tegra210-aconnect
- items:
- enum:
+ - nvidia,tegra264-aconnect
- nvidia,tegra234-aconnect
- nvidia,tegra186-aconnect
- nvidia,tegra194-aconnect
diff --git a/Documentation/devicetree/bindings/crypto/amd,ccp-seattle-v1a.yaml b/Documentation/devicetree/bindings/crypto/amd,ccp-seattle-v1a.yaml
new file mode 100644
index 000000000000..32bf3a1c3b42
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/amd,ccp-seattle-v1a.yaml
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/amd,ccp-seattle-v1a.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: AMD Cryptographic Coprocessor (ccp)
+
+maintainers:
+ - Tom Lendacky <thomas.lendacky@amd.com>
+
+properties:
+ compatible:
+ const: amd,ccp-seattle-v1a
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ dma-coherent: true
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ crypto@e0100000 {
+ compatible = "amd,ccp-seattle-v1a";
+ reg = <0xe0100000 0x10000>;
+ interrupts = <0 3 4>;
+ dma-coherent;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/amd-ccp.txt b/Documentation/devicetree/bindings/crypto/amd-ccp.txt
deleted file mode 100644
index d87579d63da6..000000000000
--- a/Documentation/devicetree/bindings/crypto/amd-ccp.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-* AMD Cryptographic Coprocessor driver (ccp)
-
-Required properties:
-- compatible: Should be "amd,ccp-seattle-v1a"
-- reg: Address and length of the register set for the device
-- interrupts: Should contain the CCP interrupt
-
-Optional properties:
-- dma-coherent: Present if dma operations are coherent
-
-Example:
- ccp@e0100000 {
- compatible = "amd,ccp-seattle-v1a";
- reg = <0 0xe0100000 0 0x10000>;
- interrupt-parent = <&gic>;
- interrupts = <0 3 4>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt b/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt
deleted file mode 100644
index d9cca4875bd6..000000000000
--- a/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Axis crypto engine with PDMA interface.
-
-Required properties:
-- compatible : Should be one of the following strings:
- "axis,artpec6-crypto" for the version in the Axis ARTPEC-6 SoC
- "axis,artpec7-crypto" for the version in the Axis ARTPEC-7 SoC.
-- reg: Base address and size for the PDMA register area.
-- interrupts: Interrupt handle for the PDMA interrupt line.
-
-Example:
-
-crypto@f4264000 {
- compatible = "axis,artpec6-crypto";
- reg = <0xf4264000 0x1000>;
- interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/axis,artpec6-crypto.yaml b/Documentation/devicetree/bindings/crypto/axis,artpec6-crypto.yaml
new file mode 100644
index 000000000000..c91f81e3c39e
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/axis,artpec6-crypto.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/axis,artpec6-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Axis ARTPEC6 crypto engine with PDMA interface
+
+maintainers:
+ - Lars Persson <lars.persson@axis.com>
+
+properties:
+ compatible:
+ enum:
+ - axis,artpec6-crypto
+ - axis,artpec7-crypto
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ crypto@f4264000 {
+ compatible = "axis,artpec6-crypto";
+ reg = <0xf4264000 0x1000>;
+ interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt b/Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt
deleted file mode 100644
index 29b6007568eb..000000000000
--- a/Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-The Broadcom Secure Processing Unit (SPU) hardware supports symmetric
-cryptographic offload for Broadcom SoCs. A SoC may have multiple SPU hardware
-blocks.
-
-Required properties:
-- compatible: Should be one of the following:
- brcm,spum-crypto - for devices with SPU-M hardware
- brcm,spu2-crypto - for devices with SPU2 hardware
- brcm,spu2-v2-crypto - for devices with enhanced SPU2 hardware features like SHA3
- and Rabin Fingerprint support
- brcm,spum-nsp-crypto - for the Northstar Plus variant of the SPU-M hardware
-
-- reg: Should contain SPU registers location and length.
-- mboxes: The mailbox channel to be used to communicate with the SPU.
- Mailbox channels correspond to DMA rings on the device.
-
-Example:
- crypto@612d0000 {
- compatible = "brcm,spum-crypto";
- reg = <0 0x612d0000 0 0x900>;
- mboxes = <&pdc0 0>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/brcm,spum-crypto.yaml b/Documentation/devicetree/bindings/crypto/brcm,spum-crypto.yaml
new file mode 100644
index 000000000000..9a5fb61727fa
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/brcm,spum-crypto.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/brcm,spum-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom SPU Crypto Offload
+
+maintainers:
+ - Rob Rice <rob.rice@broadcom.com>
+
+description:
+ The Broadcom Secure Processing Unit (SPU) hardware supports symmetric
+ cryptographic offload for Broadcom SoCs. A SoC may have multiple SPU hardware
+ blocks.
+
+properties:
+ compatible:
+ enum:
+ - brcm,spum-crypto
+ - brcm,spu2-crypto
+ - brcm,spu2-v2-crypto # enhanced SPU2 hardware features like SHA3 and Rabin Fingerprint support
+ - brcm,spum-nsp-crypto # Northstar Plus variant of the SPU-M hardware
+
+ reg:
+ maxItems: 1
+
+ mboxes:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - mboxes
+
+additionalProperties: false
+
+examples:
+ - |
+ crypto@612d0000 {
+ compatible = "brcm,spum-crypto";
+ reg = <0x612d0000 0x900>;
+ mboxes = <&pdc0 0>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml b/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml
index f0c4a7c83568..75afa441e019 100644
--- a/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml
+++ b/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml
@@ -38,7 +38,9 @@ properties:
compatible:
oneOf:
- items:
- - const: fsl,sec-v5.4
+ - enum:
+ - fsl,sec-v5.4
+ - fsl,sec-v6.0
- const: fsl,sec-v5.0
- const: fsl,sec-v4.0
- items:
@@ -94,6 +96,12 @@ patternProperties:
compatible:
oneOf:
- items:
+ - const: fsl,sec-v6.0-job-ring
+ - const: fsl,sec-v5.2-job-ring
+ - const: fsl,sec-v5.0-job-ring
+ - const: fsl,sec-v4.4-job-ring
+ - const: fsl,sec-v4.0-job-ring
+ - items:
- const: fsl,sec-v5.4-job-ring
- const: fsl,sec-v5.0-job-ring
- const: fsl,sec-v4.0-job-ring
diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec6.txt b/Documentation/devicetree/bindings/crypto/fsl-sec6.txt
deleted file mode 100644
index 73b0eb950bb3..000000000000
--- a/Documentation/devicetree/bindings/crypto/fsl-sec6.txt
+++ /dev/null
@@ -1,157 +0,0 @@
-SEC 6 is as Freescale's Cryptographic Accelerator and Assurance Module (CAAM).
-Currently Freescale powerpc chip C29X is embedded with SEC 6.
-SEC 6 device tree binding include:
- -SEC 6 Node
- -Job Ring Node
- -Full Example
-
-=====================================================================
-SEC 6 Node
-
-Description
-
- Node defines the base address of the SEC 6 block.
- This block specifies the address range of all global
- configuration registers for the SEC 6 block.
- For example, In C293, we could see three SEC 6 node.
-
-PROPERTIES
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: Must include "fsl,sec-v6.0".
-
- - fsl,sec-era
- Usage: optional
- Value type: <u32>
- Definition: A standard property. Define the 'ERA' of the SEC
- device.
-
- - #address-cells
- Usage: required
- Value type: <u32>
- Definition: A standard property. Defines the number of cells
- for representing physical addresses in child nodes.
-
- - #size-cells
- Usage: required
- Value type: <u32>
- Definition: A standard property. Defines the number of cells
- for representing the size of physical addresses in
- child nodes.
-
- - reg
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property. Specifies the physical
- address and length of the SEC 6 configuration registers.
-
- - ranges
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property. Specifies the physical address
- range of the SEC 6.0 register space (-SNVS not included). A
- triplet that includes the child address, parent address, &
- length.
-
- Note: All other standard properties (see the Devicetree Specification)
- are allowed but are optional.
-
-EXAMPLE
- crypto@a0000 {
- compatible = "fsl,sec-v6.0";
- fsl,sec-era = <6>;
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0xa0000 0x20000>;
- ranges = <0 0xa0000 0x20000>;
- };
-
-=====================================================================
-Job Ring (JR) Node
-
- Child of the crypto node defines data processing interface to SEC 6
- across the peripheral bus for purposes of processing
- cryptographic descriptors. The specified address
- range can be made visible to one (or more) cores.
- The interrupt defined for this node is controlled within
- the address range of this node.
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: Must include "fsl,sec-v6.0-job-ring".
-
- - reg
- Usage: required
- Value type: <prop-encoded-array>
- Definition: Specifies a two JR parameters: an offset from
- the parent physical address and the length the JR registers.
-
- - interrupts
- Usage: required
- Value type: <prop_encoded-array>
- Definition: Specifies the interrupts generated by this
- device. The value of the interrupts property
- consists of one interrupt specifier. The format
- of the specifier is defined by the binding document
- describing the node's interrupt parent.
-
-EXAMPLE
- jr@1000 {
- compatible = "fsl,sec-v6.0-job-ring";
- reg = <0x1000 0x1000>;
- interrupts = <49 2 0 0>;
- };
-
-===================================================================
-Full Example
-
-Since some chips may contain more than one SEC, the dtsi contains
-only the node contents, not the node itself. A chip using the SEC
-should include the dtsi inside each SEC node. Example:
-
-In qoriq-sec6.0.dtsi:
-
- compatible = "fsl,sec-v6.0";
- fsl,sec-era = <6>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- jr@1000 {
- compatible = "fsl,sec-v6.0-job-ring",
- "fsl,sec-v5.2-job-ring",
- "fsl,sec-v5.0-job-ring",
- "fsl,sec-v4.4-job-ring",
- "fsl,sec-v4.0-job-ring";
- reg = <0x1000 0x1000>;
- };
-
- jr@2000 {
- compatible = "fsl,sec-v6.0-job-ring",
- "fsl,sec-v5.2-job-ring",
- "fsl,sec-v5.0-job-ring",
- "fsl,sec-v4.4-job-ring",
- "fsl,sec-v4.0-job-ring";
- reg = <0x2000 0x1000>;
- };
-
-In the C293 device tree, we add the include of public property:
-
- crypto@a0000 {
- /include/ "qoriq-sec6.0.dtsi"
- }
-
- crypto@a0000 {
- reg = <0xa0000 0x20000>;
- ranges = <0 0xa0000 0x20000>;
-
- jr@1000 {
- interrupts = <49 2 0 0>;
- };
-
- jr@2000 {
- interrupts = <50 2 0 0>;
- };
- };
diff --git a/Documentation/devicetree/bindings/crypto/hisilicon,hip06-sec.yaml b/Documentation/devicetree/bindings/crypto/hisilicon,hip06-sec.yaml
new file mode 100644
index 000000000000..2bfac9d1c020
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/hisilicon,hip06-sec.yaml
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/hisilicon,hip06-sec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hisilicon hip06/hip07 Security Accelerator
+
+maintainers:
+ - Jonathan Cameron <Jonathan.Cameron@huawei.com>
+
+properties:
+ compatible:
+ enum:
+ - hisilicon,hip06-sec
+ - hisilicon,hip07-sec
+
+ reg:
+ items:
+ - description: Registers for backend processing engines
+ - description: Registers for common functionality
+ - description: Registers for queue 0
+ - description: Registers for queue 1
+ - description: Registers for queue 2
+ - description: Registers for queue 3
+ - description: Registers for queue 4
+ - description: Registers for queue 5
+ - description: Registers for queue 6
+ - description: Registers for queue 7
+ - description: Registers for queue 8
+ - description: Registers for queue 9
+ - description: Registers for queue 10
+ - description: Registers for queue 11
+ - description: Registers for queue 12
+ - description: Registers for queue 13
+ - description: Registers for queue 14
+ - description: Registers for queue 15
+
+ interrupts:
+ items:
+ - description: SEC unit error queue interrupt
+ - description: Completion interrupt for queue 0
+ - description: Error interrupt for queue 0
+ - description: Completion interrupt for queue 1
+ - description: Error interrupt for queue 1
+ - description: Completion interrupt for queue 2
+ - description: Error interrupt for queue 2
+ - description: Completion interrupt for queue 3
+ - description: Error interrupt for queue 3
+ - description: Completion interrupt for queue 4
+ - description: Error interrupt for queue 4
+ - description: Completion interrupt for queue 5
+ - description: Error interrupt for queue 5
+ - description: Completion interrupt for queue 6
+ - description: Error interrupt for queue 6
+ - description: Completion interrupt for queue 7
+ - description: Error interrupt for queue 7
+ - description: Completion interrupt for queue 8
+ - description: Error interrupt for queue 8
+ - description: Completion interrupt for queue 9
+ - description: Error interrupt for queue 9
+ - description: Completion interrupt for queue 10
+ - description: Error interrupt for queue 10
+ - description: Completion interrupt for queue 11
+ - description: Error interrupt for queue 11
+ - description: Completion interrupt for queue 12
+ - description: Error interrupt for queue 12
+ - description: Completion interrupt for queue 13
+ - description: Error interrupt for queue 13
+ - description: Completion interrupt for queue 14
+ - description: Error interrupt for queue 14
+ - description: Completion interrupt for queue 15
+ - description: Error interrupt for queue 15
+
+ dma-coherent: true
+
+ iommus:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - dma-coherent
+
+additionalProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ crypto@400d2000000 {
+ compatible = "hisilicon,hip07-sec";
+ reg = <0x400 0xd0000000 0x0 0x10000
+ 0x400 0xd2000000 0x0 0x10000
+ 0x400 0xd2010000 0x0 0x10000
+ 0x400 0xd2020000 0x0 0x10000
+ 0x400 0xd2030000 0x0 0x10000
+ 0x400 0xd2040000 0x0 0x10000
+ 0x400 0xd2050000 0x0 0x10000
+ 0x400 0xd2060000 0x0 0x10000
+ 0x400 0xd2070000 0x0 0x10000
+ 0x400 0xd2080000 0x0 0x10000
+ 0x400 0xd2090000 0x0 0x10000
+ 0x400 0xd20a0000 0x0 0x10000
+ 0x400 0xd20b0000 0x0 0x10000
+ 0x400 0xd20c0000 0x0 0x10000
+ 0x400 0xd20d0000 0x0 0x10000
+ 0x400 0xd20e0000 0x0 0x10000
+ 0x400 0xd20f0000 0x0 0x10000
+ 0x400 0xd2100000 0x0 0x10000>;
+ interrupts = <576 4>,
+ <577 1>, <578 4>,
+ <579 1>, <580 4>,
+ <581 1>, <582 4>,
+ <583 1>, <584 4>,
+ <585 1>, <586 4>,
+ <587 1>, <588 4>,
+ <589 1>, <590 4>,
+ <591 1>, <592 4>,
+ <593 1>, <594 4>,
+ <595 1>, <596 4>,
+ <597 1>, <598 4>,
+ <599 1>, <600 4>,
+ <601 1>, <602 4>,
+ <603 1>, <604 4>,
+ <605 1>, <606 4>,
+ <607 1>, <608 4>;
+ dma-coherent;
+ iommus = <&p1_smmu_alg_a 0x600>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt b/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt
deleted file mode 100644
index d28fd1af01b4..000000000000
--- a/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-* Hisilicon hip07 Security Accelerator (SEC)
-
-Required properties:
-- compatible: Must contain one of
- - "hisilicon,hip06-sec"
- - "hisilicon,hip07-sec"
-- reg: Memory addresses and lengths of the memory regions through which
- this device is controlled.
- Region 0 has registers to control the backend processing engines.
- Region 1 has registers for functionality common to all queues.
- Regions 2-18 have registers for the 16 individual queues which are isolated
- both in hardware and within the driver.
-- interrupts: Interrupt specifiers.
- Refer to interrupt-controller/interrupts.txt for generic interrupt client node
- bindings.
- Interrupt 0 is for the SEC unit error queue.
- Interrupt 2N + 1 is the completion interrupt for queue N.
- Interrupt 2N + 2 is the error interrupt for queue N.
-- dma-coherent: The driver assumes coherent dma is possible.
-
-Optional properties:
-- iommus: The SEC units are behind smmu-v3 iommus.
- Refer to iommu/arm,smmu-v3.txt for more information.
-
-Example:
-
-p1_sec_a: crypto@400d2000000 {
- compatible = "hisilicon,hip07-sec";
- reg = <0x400 0xd0000000 0x0 0x10000
- 0x400 0xd2000000 0x0 0x10000
- 0x400 0xd2010000 0x0 0x10000
- 0x400 0xd2020000 0x0 0x10000
- 0x400 0xd2030000 0x0 0x10000
- 0x400 0xd2040000 0x0 0x10000
- 0x400 0xd2050000 0x0 0x10000
- 0x400 0xd2060000 0x0 0x10000
- 0x400 0xd2070000 0x0 0x10000
- 0x400 0xd2080000 0x0 0x10000
- 0x400 0xd2090000 0x0 0x10000
- 0x400 0xd20a0000 0x0 0x10000
- 0x400 0xd20b0000 0x0 0x10000
- 0x400 0xd20c0000 0x0 0x10000
- 0x400 0xd20d0000 0x0 0x10000
- 0x400 0xd20e0000 0x0 0x10000
- 0x400 0xd20f0000 0x0 0x10000
- 0x400 0xd2100000 0x0 0x10000>;
- interrupt-parent = <&p1_mbigen_sec_a>;
- iommus = <&p1_smmu_alg_a 0x600>;
- dma-coherent;
- interrupts = <576 4>,
- <577 1>, <578 4>,
- <579 1>, <580 4>,
- <581 1>, <582 4>,
- <583 1>, <584 4>,
- <585 1>, <586 4>,
- <587 1>, <588 4>,
- <589 1>, <590 4>,
- <591 1>, <592 4>,
- <593 1>, <594 4>,
- <595 1>, <596 4>,
- <597 1>, <598 4>,
- <599 1>, <600 4>,
- <601 1>, <602 4>,
- <603 1>, <604 4>,
- <605 1>, <606 4>,
- <607 1>, <608 4>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/img,hash-accelerator.yaml b/Documentation/devicetree/bindings/crypto/img,hash-accelerator.yaml
new file mode 100644
index 000000000000..46617561ef94
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/img,hash-accelerator.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/img,hash-accelerator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Imagination Technologies hardware hash accelerator
+
+maintainers:
+ - James Hartley <james.hartley@imgtec.com>
+
+description:
+ The hash accelerator provides hardware hashing acceleration for
+ SHA1, SHA224, SHA256 and MD5 hashes.
+
+properties:
+ compatible:
+ const: img,hash-accelerator
+
+ reg:
+ items:
+ - description: Register base address and size
+ - description: DMA port specifier
+
+ interrupts:
+ maxItems: 1
+
+ dmas:
+ maxItems: 1
+
+ dma-names:
+ items:
+ - const: tx
+
+ clocks:
+ items:
+ - description: System clock for hash block registers
+ - description: Hash clock for data path
+
+ clock-names:
+ items:
+ - const: sys
+ - const: hash
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - dmas
+ - dma-names
+ - clocks
+ - clock-names
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/mips-gic.h>
+ #include <dt-bindings/clock/pistachio-clk.h>
+
+ hash@18149600 {
+ compatible = "img,hash-accelerator";
+ reg = <0x18149600 0x100>, <0x18101100 0x4>;
+ interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&dma 8 0xffffffff 0>;
+ dma-names = "tx";
+ clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>;
+ clock-names = "sys", "hash";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/img-hash.txt b/Documentation/devicetree/bindings/crypto/img-hash.txt
deleted file mode 100644
index 91a3d757d641..000000000000
--- a/Documentation/devicetree/bindings/crypto/img-hash.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Imagination Technologies hardware hash accelerator
-
-The hash accelerator provides hardware hashing acceleration for
-SHA1, SHA224, SHA256 and MD5 hashes
-
-Required properties:
-
-- compatible : "img,hash-accelerator"
-- reg : Offset and length of the register set for the module, and the DMA port
-- interrupts : The designated IRQ line for the hashing module.
-- dmas : DMA specifier as per Documentation/devicetree/bindings/dma/dma.txt
-- dma-names : Should be "tx"
-- clocks : Clock specifiers
-- clock-names : "sys" Used to clock the hash block registers
- "hash" Used to clock data through the accelerator
-
-Example:
-
- hash: hash@18149600 {
- compatible = "img,hash-accelerator";
- reg = <0x18149600 0x100>, <0x18101100 0x4>;
- interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
- dmas = <&dma 8 0xffffffff 0>;
- dma-names = "tx";
- clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>;
- clock-names = "sys", "hash";
- };
diff --git a/Documentation/devicetree/bindings/crypto/marvell,orion-crypto.yaml b/Documentation/devicetree/bindings/crypto/marvell,orion-crypto.yaml
new file mode 100644
index 000000000000..b44d36c50ec4
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/marvell,orion-crypto.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/marvell,orion-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Cryptographic Engines And Security Accelerator
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Boris Brezillon <bbrezillon@kernel.org>
+
+description: |
+ Marvell Cryptographic Engines And Security Accelerator
+
+properties:
+ compatible:
+ enum:
+ - marvell,armada-370-crypto
+ - marvell,armada-xp-crypto
+ - marvell,armada-375-crypto
+ - marvell,armada-38x-crypto
+ - marvell,dove-crypto
+ - marvell,kirkwood-crypto
+ - marvell,orion-crypto
+
+ reg:
+ minItems: 1
+ items:
+ - description: Registers region
+ - description: SRAM region
+ deprecated: true
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: regs
+ - const: sram
+ deprecated: true
+
+ interrupts:
+ description: One interrupt for each CESA engine
+ minItems: 1
+ maxItems: 2
+
+ clocks:
+ description: One or two clocks for each CESA engine
+ minItems: 1
+ maxItems: 4
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: cesa0
+ - const: cesa1
+ - const: cesaz0
+ - const: cesaz1
+
+ marvell,crypto-srams:
+ description: Phandle(s) to crypto SRAM.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ minItems: 1
+ maxItems: 2
+ items:
+ maxItems: 1
+
+ marvell,crypto-sram-size:
+ description: SRAM size reserved for crypto operations.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0x800
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - interrupts
+ - marvell,crypto-srams
+
+allOf:
+ - if:
+ not:
+ properties:
+ compatible:
+ enum:
+ - marvell,kirkwood-crypto
+ - marvell,orion-crypto
+ then:
+ required:
+ - clocks
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - marvell,armada-370-crypto
+ - marvell,armada-375-crypto
+ - marvell,armada-38x-crypto
+ - marvell,armada-xp-crypto
+ then:
+ required:
+ - clock-names
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - marvell,armada-375-crypto
+ - marvell,armada-38x-crypto
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ clock-names:
+ minItems: 4
+ else:
+ properties:
+ clocks:
+ maxItems: 2
+ clock-names:
+ maxItems: 2
+
+additionalProperties: false
+
+examples:
+ - |
+ crypto@30000 {
+ compatible = "marvell,orion-crypto";
+ reg = <0x30000 0x10000>;
+ reg-names = "regs";
+ interrupts = <22>;
+ marvell,crypto-srams = <&crypto_sram>;
+ marvell,crypto-sram-size = <0x600>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/marvell-cesa.txt b/Documentation/devicetree/bindings/crypto/marvell-cesa.txt
deleted file mode 100644
index 28d3f2496b89..000000000000
--- a/Documentation/devicetree/bindings/crypto/marvell-cesa.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Marvell Cryptographic Engines And Security Accelerator
-
-Required properties:
-- compatible: should be one of the following string
- "marvell,orion-crypto"
- "marvell,kirkwood-crypto"
- "marvell,dove-crypto"
- "marvell,armada-370-crypto"
- "marvell,armada-xp-crypto"
- "marvell,armada-375-crypto"
- "marvell,armada-38x-crypto"
-- reg: base physical address of the engine and length of memory mapped
- region. Can also contain an entry for the SRAM attached to the CESA,
- but this representation is deprecated and marvell,crypto-srams should
- be used instead
-- reg-names: "regs". Can contain an "sram" entry, but this representation
- is deprecated and marvell,crypto-srams should be used instead
-- interrupts: interrupt number
-- clocks: reference to the crypto engines clocks. This property is not
- required for orion and kirkwood platforms
-- clock-names: "cesaX" and "cesazX", X should be replaced by the crypto engine
- id.
- This property is not required for the orion and kirkwoord
- platforms.
- "cesazX" clocks are not required on armada-370 platforms
-- marvell,crypto-srams: phandle to crypto SRAM definitions
-
-Optional properties:
-- marvell,crypto-sram-size: SRAM size reserved for crypto operations, if not
- specified the whole SRAM is used (2KB)
-
-
-Examples:
-
- crypto@90000 {
- compatible = "marvell,armada-xp-crypto";
- reg = <0x90000 0x10000>;
- reg-names = "regs";
- interrupts = <48>, <49>;
- clocks = <&gateclk 23>, <&gateclk 23>;
- clock-names = "cesa0", "cesa1";
- marvell,crypto-srams = <&crypto_sram0>, <&crypto_sram1>;
- marvell,crypto-sram-size = <0x600>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt b/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
deleted file mode 100644
index 450da3661cad..000000000000
--- a/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-MediaTek cryptographic accelerators
-
-Required properties:
-- compatible: Should be "mediatek,eip97-crypto"
-- reg: Address and length of the register set for the device
-- interrupts: Should contain the five crypto engines interrupts in numeric
- order. These are global system and four descriptor rings.
-- clocks: the clock used by the core
-- clock-names: Must contain "cryp".
-- power-domains: Must contain a reference to the PM domain.
-
-
-Example:
- crypto: crypto@1b240000 {
- compatible = "mediatek,eip97-crypto";
- reg = <0 0x1b240000 0 0x20000>;
- interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_LOW>,
- <GIC_SPI 83 IRQ_TYPE_LEVEL_LOW>,
- <GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>,
- <GIC_SPI 91 IRQ_TYPE_LEVEL_LOW>,
- <GIC_SPI 97 IRQ_TYPE_LEVEL_LOW>;
- clocks = <&ethsys CLK_ETHSYS_CRYPTO>;
- clock-names = "cryp";
- power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/mv_cesa.txt b/Documentation/devicetree/bindings/crypto/mv_cesa.txt
deleted file mode 100644
index d9b92e2f3138..000000000000
--- a/Documentation/devicetree/bindings/crypto/mv_cesa.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Marvell Cryptographic Engines And Security Accelerator
-
-Required properties:
-- compatible: should be one of the following string
- "marvell,orion-crypto"
- "marvell,kirkwood-crypto"
- "marvell,dove-crypto"
-- reg: base physical address of the engine and length of memory mapped
- region. Can also contain an entry for the SRAM attached to the CESA,
- but this representation is deprecated and marvell,crypto-srams should
- be used instead
-- reg-names: "regs". Can contain an "sram" entry, but this representation
- is deprecated and marvell,crypto-srams should be used instead
-- interrupts: interrupt number
-- clocks: reference to the crypto engines clocks. This property is only
- required for Dove platforms
-- marvell,crypto-srams: phandle to crypto SRAM definitions
-
-Optional properties:
-- marvell,crypto-sram-size: SRAM size reserved for crypto operations, if not
- specified the whole SRAM is used (2KB)
-
-Examples:
-
- crypto@30000 {
- compatible = "marvell,orion-crypto";
- reg = <0x30000 0x10000>;
- reg-names = "regs";
- interrupts = <22>;
- marvell,crypto-srams = <&crypto_sram>;
- marvell,crypto-sram-size = <0x600>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/qcom-qce.yaml b/Documentation/devicetree/bindings/crypto/qcom-qce.yaml
index 3f35122f7873..e009cb712fb8 100644
--- a/Documentation/devicetree/bindings/crypto/qcom-qce.yaml
+++ b/Documentation/devicetree/bindings/crypto/qcom-qce.yaml
@@ -45,6 +45,7 @@ properties:
- items:
- enum:
+ - qcom,qcs615-qce
- qcom,qcs8300-qce
- qcom,sa8775p-qce
- qcom,sc7280-qce
diff --git a/Documentation/devicetree/bindings/gpio/atmel,at91rm9200-gpio.yaml b/Documentation/devicetree/bindings/gpio/atmel,at91rm9200-gpio.yaml
index 3dd70933ed8e..d810043b56b6 100644
--- a/Documentation/devicetree/bindings/gpio/atmel,at91rm9200-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/atmel,at91rm9200-gpio.yaml
@@ -69,13 +69,13 @@ examples:
#include <dt-bindings/interrupt-controller/irq.h>
gpio@fffff400 {
- compatible = "atmel,at91rm9200-gpio";
- reg = <0xfffff400 0x200>;
- interrupts = <2 IRQ_TYPE_LEVEL_HIGH 1>;
- #gpio-cells = <2>;
- gpio-controller;
- interrupt-controller;
- #interrupt-cells = <2>;
- clocks = <&pmc PMC_TYPE_PERIPHERAL 2>;
+ compatible = "atmel,at91rm9200-gpio";
+ reg = <0xfffff400 0x200>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH 1>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 2>;
};
...
diff --git a/Documentation/devicetree/bindings/gpio/blaize,blzp1600-gpio.yaml b/Documentation/devicetree/bindings/gpio/blaize,blzp1600-gpio.yaml
new file mode 100644
index 000000000000..a05f6ea619c3
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/blaize,blzp1600-gpio.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/blaize,blzp1600-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Blaize BLZP1600 GPIO controller
+
+description:
+ Blaize BLZP1600 GPIO controller is an implementation of the VeriSilicon
+ APB GPIO v0.2 IP block. It has 32 ports each of which are intended to be
+ represented as child nodes with the generic GPIO-controller properties
+ as described in this binding's file.
+
+maintainers:
+ - Nikolaos Pasaloukos <nikolaos.pasaloukos@blaize.com>
+ - James Cowgill <james.cowgill@blaize.com>
+ - Matt Redfearn <matt.redfearn@blaize.com>
+ - Neil Jones <neil.jones@blaize.com>
+
+properties:
+ $nodename:
+ pattern: "^gpio@[0-9a-f]+$"
+
+ compatible:
+ enum:
+ - blaize,blzp1600-gpio
+
+ reg:
+ maxItems: 1
+
+ gpio-controller: true
+
+ '#gpio-cells':
+ const: 2
+
+ ngpios:
+ default: 32
+ minimum: 1
+ maximum: 32
+
+ interrupts:
+ maxItems: 1
+
+ gpio-line-names: true
+
+ interrupt-controller: true
+
+ '#interrupt-cells':
+ const: 2
+
+required:
+ - compatible
+ - reg
+ - gpio-controller
+ - '#gpio-cells'
+
+dependencies:
+ interrupt-controller: [ interrupts ]
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ gpio: gpio@4c0000 {
+ compatible = "blaize,blzp1600-gpio";
+ reg = <0x004c0000 0x1000>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpios = <32>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml b/Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml
index 0e5c22929bde..ab35bcf98101 100644
--- a/Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml
+++ b/Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml
@@ -71,15 +71,15 @@ unevaluatedProperties: false
examples:
- |
spi {
- #address-cells = <1>;
- #size-cells = <0>;
-
- gpio5: gpio5@0 {
- compatible = "fairchild,74hc595";
- reg = <0>;
- gpio-controller;
- #gpio-cells = <2>;
- registers-number = <4>;
- spi-max-frequency = <100000>;
- };
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpio5@0 {
+ compatible = "fairchild,74hc595";
+ reg = <0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ registers-number = <4>;
+ spi-max-frequency = <100000>;
+ };
};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mxs.yaml b/Documentation/devicetree/bindings/gpio/gpio-mxs.yaml
index 8ff54369d16c..b58e08c8ecd8 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-mxs.yaml
+++ b/Documentation/devicetree/bindings/gpio/gpio-mxs.yaml
@@ -84,52 +84,52 @@ examples:
reg = <0x80018000 0x2000>;
gpio@0 {
- compatible = "fsl,imx28-gpio";
- reg = <0>;
- interrupts = <127>;
- gpio-controller;
- #gpio-cells = <2>;
- interrupt-controller;
- #interrupt-cells = <2>;
+ compatible = "fsl,imx28-gpio";
+ reg = <0>;
+ interrupts = <127>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
};
gpio@1 {
- compatible = "fsl,imx28-gpio";
- reg = <1>;
- interrupts = <126>;
- gpio-controller;
- #gpio-cells = <2>;
- interrupt-controller;
- #interrupt-cells = <2>;
+ compatible = "fsl,imx28-gpio";
+ reg = <1>;
+ interrupts = <126>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
};
gpio@2 {
- compatible = "fsl,imx28-gpio";
- reg = <2>;
- interrupts = <125>;
- gpio-controller;
- #gpio-cells = <2>;
- interrupt-controller;
- #interrupt-cells = <2>;
+ compatible = "fsl,imx28-gpio";
+ reg = <2>;
+ interrupts = <125>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
};
gpio@3 {
- compatible = "fsl,imx28-gpio";
- reg = <3>;
- interrupts = <124>;
- gpio-controller;
- #gpio-cells = <2>;
- interrupt-controller;
- #interrupt-cells = <2>;
+ compatible = "fsl,imx28-gpio";
+ reg = <3>;
+ interrupts = <124>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
};
gpio@4 {
- compatible = "fsl,imx28-gpio";
- reg = <4>;
- interrupts = <123>;
- gpio-controller;
- #gpio-cells = <2>;
- interrupt-controller;
- #interrupt-cells = <2>;
+ compatible = "fsl,imx28-gpio";
+ reg = <4>;
+ interrupts = <123>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
};
};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml b/Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml
index 7b1eb08fa055..4d3f52f8d1b8 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml
+++ b/Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml
@@ -17,6 +17,9 @@ properties:
compatible:
oneOf:
- items:
+ - const: toradex,ecgpiol16
+ - const: nxp,pcal6416
+ - items:
- const: diodes,pi4ioe5v6534q
- const: nxp,pcal6534
- items:
@@ -132,6 +135,7 @@ allOf:
- maxim,max7325
- maxim,max7326
- maxim,max7327
+ - toradex,ecgpiol16
then:
properties:
reset-gpios: false
diff --git a/Documentation/devicetree/bindings/gpio/gpio-vf610.yaml b/Documentation/devicetree/bindings/gpio/gpio-vf610.yaml
index 4fb32e9aec0a..a31f64b6d40b 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-vf610.yaml
+++ b/Documentation/devicetree/bindings/gpio/gpio-vf610.yaml
@@ -70,6 +70,13 @@ properties:
minItems: 1
maxItems: 4
+ gpio-reserved-ranges: true
+
+ ngpios:
+ minimum: 1
+ maximum: 32
+ default: 32
+
patternProperties:
"^.+-hog(-[0-9]+)?$":
type: object
diff --git a/Documentation/devicetree/bindings/gpio/maxim,max77759-gpio.yaml b/Documentation/devicetree/bindings/gpio/maxim,max77759-gpio.yaml
new file mode 100644
index 000000000000..55734190d5eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/maxim,max77759-gpio.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/maxim,max77759-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim Integrated MAX77759 GPIO
+
+maintainers:
+ - André Draszik <andre.draszik@linaro.org>
+
+description: |
+ This module is part of the MAX77759 PMIC. For additional information, see
+ Documentation/devicetree/bindings/mfd/maxim,max77759.yaml.
+
+ The MAX77759 is a PMIC integrating, amongst others, a GPIO controller
+ including interrupt support for 2 GPIO lines.
+
+properties:
+ compatible:
+ const: maxim,max77759-gpio
+
+ "#gpio-cells":
+ const: 2
+
+ gpio-controller: true
+
+ gpio-line-names:
+ minItems: 1
+ maxItems: 2
+
+ "#interrupt-cells":
+ const: 2
+
+ interrupt-controller: true
+
+required:
+ - compatible
+ - "#gpio-cells"
+ - gpio-controller
+ - "#interrupt-cells"
+ - interrupt-controller
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.yaml b/Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.yaml
index 4ef06b2ff1ff..065f5761a93f 100644
--- a/Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.yaml
@@ -111,6 +111,9 @@ properties:
gpio-controller: true
+ gpio-ranges:
+ maxItems: 1
+
"#gpio-cells":
description: |
Indicates how many cells are used in a consumer's GPIO specifier. In the
diff --git a/Documentation/devicetree/bindings/gpio/nxp,pcf8575.yaml b/Documentation/devicetree/bindings/gpio/nxp,pcf8575.yaml
index 8bca574bb66d..5a6ecaa7b44b 100644
--- a/Documentation/devicetree/bindings/gpio/nxp,pcf8575.yaml
+++ b/Documentation/devicetree/bindings/gpio/nxp,pcf8575.yaml
@@ -128,17 +128,17 @@ additionalProperties: false
examples:
- |
i2c {
- #address-cells = <1>;
- #size-cells = <0>;
-
- pcf8575: gpio@20 {
- compatible = "nxp,pcf8575";
- reg = <0x20>;
- interrupt-parent = <&irqpin2>;
- interrupts = <3 0>;
- gpio-controller;
- #gpio-cells = <2>;
- interrupt-controller;
- #interrupt-cells = <2>;
- };
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpio@20 {
+ compatible = "nxp,pcf8575";
+ reg = <0x20>;
+ interrupt-parent = <&irqpin2>;
+ interrupts = <3 0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
};
diff --git a/Documentation/devicetree/bindings/gpio/realtek,otto-gpio.yaml b/Documentation/devicetree/bindings/gpio/realtek,otto-gpio.yaml
index 39fd959c45d2..728099c65824 100644
--- a/Documentation/devicetree/bindings/gpio/realtek,otto-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/realtek,otto-gpio.yaml
@@ -81,7 +81,7 @@ dependencies:
examples:
- |
- gpio@3500 {
+ gpio@3500 {
compatible = "realtek,rtl8380-gpio", "realtek,otto-gpio";
reg = <0x3500 0x1c>;
gpio-controller;
@@ -91,9 +91,9 @@ examples:
#interrupt-cells = <2>;
interrupt-parent = <&rtlintc>;
interrupts = <23>;
- };
+ };
- |
- gpio@3300 {
+ gpio@3300 {
compatible = "realtek,rtl9300-gpio", "realtek,otto-gpio";
reg = <0x3300 0x1c>, <0x3338 0x8>;
gpio-controller;
@@ -103,6 +103,6 @@ examples:
#interrupt-cells = <2>;
interrupt-parent = <&rtlintc>;
interrupts = <13>;
- };
+ };
...
diff --git a/Documentation/devicetree/bindings/gpio/renesas,em-gio.yaml b/Documentation/devicetree/bindings/gpio/renesas,em-gio.yaml
index 8bdef812c87c..49fb8f613ead 100644
--- a/Documentation/devicetree/bindings/gpio/renesas,em-gio.yaml
+++ b/Documentation/devicetree/bindings/gpio/renesas,em-gio.yaml
@@ -57,14 +57,14 @@ examples:
- |
#include <dt-bindings/interrupt-controller/arm-gic.h>
gpio0: gpio@e0050000 {
- compatible = "renesas,em-gio";
- reg = <0xe0050000 0x2c>, <0xe0050040 0x20>;
- interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
- gpio-controller;
- #gpio-cells = <2>;
- gpio-ranges = <&pfc 0 0 32>;
- ngpios = <32>;
- interrupt-controller;
- #interrupt-cells = <2>;
+ compatible = "renesas,em-gio";
+ reg = <0xe0050000 0x2c>, <0xe0050040 0x20>;
+ interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-ranges = <&pfc 0 0 32>;
+ ngpios = <32>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
};
diff --git a/Documentation/devicetree/bindings/gpio/renesas,rcar-gpio.yaml b/Documentation/devicetree/bindings/gpio/renesas,rcar-gpio.yaml
index cc7a950a6030..d32e103a64aa 100644
--- a/Documentation/devicetree/bindings/gpio/renesas,rcar-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/renesas,rcar-gpio.yaml
@@ -138,16 +138,16 @@ examples:
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/power/r8a77470-sysc.h>
gpio3: gpio@e6053000 {
- compatible = "renesas,gpio-r8a77470", "renesas,rcar-gen2-gpio";
- reg = <0xe6053000 0x50>;
- interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&cpg CPG_MOD 909>;
- power-domains = <&sysc R8A77470_PD_ALWAYS_ON>;
- resets = <&cpg 909>;
- gpio-controller;
- #gpio-cells = <2>;
- gpio-ranges = <&pfc 0 96 30>;
- gpio-reserved-ranges = <17 10>;
- interrupt-controller;
- #interrupt-cells = <2>;
+ compatible = "renesas,gpio-r8a77470", "renesas,rcar-gen2-gpio";
+ reg = <0xe6053000 0x50>;
+ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 909>;
+ power-domains = <&sysc R8A77470_PD_ALWAYS_ON>;
+ resets = <&cpg 909>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-ranges = <&pfc 0 96 30>;
+ gpio-reserved-ranges = <17 10>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
};
diff --git a/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
index fc095646adea..4bdc201b719e 100644
--- a/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
@@ -76,8 +76,8 @@ additionalProperties: false
examples:
- |
- #include <dt-bindings/clock/sifive-fu540-prci.h>
- gpio@10060000 {
+ #include <dt-bindings/clock/sifive-fu540-prci.h>
+ gpio@10060000 {
compatible = "sifive,fu540-c000-gpio", "sifive,gpio0";
interrupt-parent = <&plic>;
interrupts = <7>, <8>, <9>, <10>, <11>, <12>, <13>, <14>, <15>, <16>,
@@ -88,6 +88,6 @@ examples:
#gpio-cells = <2>;
interrupt-controller;
#interrupt-cells = <2>;
- };
+ };
...
diff --git a/Documentation/devicetree/bindings/gpio/spacemit,k1-gpio.yaml b/Documentation/devicetree/bindings/gpio/spacemit,k1-gpio.yaml
new file mode 100644
index 000000000000..ec0232e72c71
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/spacemit,k1-gpio.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/spacemit,k1-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SpacemiT K1 GPIO controller
+
+maintainers:
+ - Yixun Lan <dlan@gentoo.org>
+
+description:
+ The controller's registers are organized as sets of eight 32-bit
+ registers with each set of port controlling 32 pins. A single
+ interrupt line is shared for all of the pins by the controller.
+
+properties:
+ $nodename:
+ pattern: "^gpio@[0-9a-f]+$"
+
+ compatible:
+ const: spacemit,k1-gpio
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: GPIO Core Clock
+ - description: GPIO Bus Clock
+
+ clock-names:
+ items:
+ - const: core
+ - const: bus
+
+ resets:
+ maxItems: 1
+
+ gpio-controller: true
+
+ "#gpio-cells":
+ const: 3
+ description:
+ The first two cells are the GPIO bank index and offset inside the bank,
+ the third cell should specify GPIO flag.
+
+ gpio-ranges: true
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-controller: true
+
+ "#interrupt-cells":
+ const: 3
+ description:
+ The first two cells are the GPIO bank index and offset inside the bank,
+ the third cell should specify interrupt flag. The controller does not
+ support level interrupts, so flags of IRQ_TYPE_LEVEL_HIGH,
+ IRQ_TYPE_LEVEL_LOW should not be used.
+ Refer <dt-bindings/interrupt-controller/irq.h> for valid flags.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - gpio-controller
+ - "#gpio-cells"
+ - interrupts
+ - interrupt-controller
+ - "#interrupt-cells"
+ - gpio-ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ gpio@d4019000 {
+ compatible = "spacemit,k1-gpio";
+ reg = <0xd4019000 0x800>;
+ clocks =<&ccu 9>, <&ccu 61>;
+ clock-names = "core", "bus";
+ gpio-controller;
+ #gpio-cells = <3>;
+ interrupts = <58>;
+ interrupt-controller;
+ interrupt-parent = <&plic>;
+ #interrupt-cells = <3>;
+ gpio-ranges = <&pinctrl 0 0 0 32>,
+ <&pinctrl 1 0 32 32>,
+ <&pinctrl 2 0 64 32>,
+ <&pinctrl 3 0 96 32>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml b/Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml
index b085450b527f..712063417bc8 100644
--- a/Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml
+++ b/Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml
@@ -48,22 +48,22 @@ additionalProperties: false
examples:
- |
- #include <dt-bindings/interrupt-controller/irq.h>
- #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
- soc {
+ soc {
#address-cells = <2>;
#size-cells = <2>;
gpio: gpio@28020000 {
- compatible = "toshiba,gpio-tmpv7708";
- reg = <0 0x28020000 0 0x1000>;
- #gpio-cells = <0x2>;
- gpio-ranges = <&pmux 0 0 32>;
- gpio-controller;
- interrupt-controller;
- #interrupt-cells = <2>;
- interrupt-parent = <&gic>;
+ compatible = "toshiba,gpio-tmpv7708";
+ reg = <0 0x28020000 0 0x1000>;
+ #gpio-cells = <0x2>;
+ gpio-ranges = <&pmux 0 0 32>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&gic>;
};
- };
+ };
...
diff --git a/Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml b/Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml
index d3d8a2e143ed..8fbf12ca067e 100644
--- a/Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml
+++ b/Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml
@@ -126,29 +126,29 @@ examples:
- |
#include <dt-bindings/interrupt-controller/arm-gic.h>
- gpio@a0020000 {
- compatible = "xlnx,xps-gpio-1.00.a";
- reg = <0xa0020000 0x10000>;
- #gpio-cells = <2>;
- #interrupt-cells = <0x2>;
- clocks = <&zynqmp_clk 71>;
- gpio-controller;
- interrupt-controller;
- interrupt-names = "ip2intc_irpt";
- interrupt-parent = <&gic>;
- interrupts = <0 89 4>;
- xlnx,all-inputs = <0x0>;
- xlnx,all-inputs-2 = <0x0>;
- xlnx,all-outputs = <0x0>;
- xlnx,all-outputs-2 = <0x0>;
- xlnx,dout-default = <0x0>;
- xlnx,dout-default-2 = <0x0>;
- xlnx,gpio-width = <0x20>;
- xlnx,gpio2-width = <0x20>;
- xlnx,interrupt-present = <0x1>;
- xlnx,is-dual = <0x1>;
- xlnx,tri-default = <0xFFFFFFFF>;
- xlnx,tri-default-2 = <0xFFFFFFFF>;
- };
+ gpio@a0020000 {
+ compatible = "xlnx,xps-gpio-1.00.a";
+ reg = <0xa0020000 0x10000>;
+ #gpio-cells = <2>;
+ #interrupt-cells = <0x2>;
+ clocks = <&zynqmp_clk 71>;
+ gpio-controller;
+ interrupt-controller;
+ interrupt-names = "ip2intc_irpt";
+ interrupt-parent = <&gic>;
+ interrupts = <0 89 4>;
+ xlnx,all-inputs = <0x0>;
+ xlnx,all-inputs-2 = <0x0>;
+ xlnx,all-outputs = <0x0>;
+ xlnx,all-outputs-2 = <0x0>;
+ xlnx,dout-default = <0x0>;
+ xlnx,dout-default-2 = <0x0>;
+ xlnx,gpio-width = <0x20>;
+ xlnx,gpio2-width = <0x20>;
+ xlnx,interrupt-present = <0x1>;
+ xlnx,is-dual = <0x1>;
+ xlnx,tri-default = <0xFFFFFFFF>;
+ xlnx,tri-default-2 = <0xFFFFFFFF>;
+ };
...
diff --git a/Documentation/devicetree/bindings/interrupt-controller/econet,en751221-intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/econet,en751221-intc.yaml
new file mode 100644
index 000000000000..5536319c49c3
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/econet,en751221-intc.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/econet,en751221-intc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: EcoNet EN751221 Interrupt Controller
+
+maintainers:
+ - Caleb James DeLisle <cjd@cjdns.fr>
+
+description:
+ The EcoNet EN751221 Interrupt Controller is a simple interrupt controller
+ designed for the MIPS 34Kc MT SMP processor with 2 VPEs. Each interrupt can
+ be routed to either VPE but not both, so to support per-CPU interrupts, a
+ secondary IRQ number is allocated to control masking/unmasking on VPE#1. For
+ lack of a better term we call these "shadow interrupts". The assignment of
+ shadow interrupts is defined by the SoC integrator when wiring the interrupt
+ lines, so they are configurable in the device tree.
+
+allOf:
+ - $ref: /schemas/interrupt-controller.yaml#
+
+properties:
+ compatible:
+ const: econet,en751221-intc
+
+ reg:
+ maxItems: 1
+
+ "#interrupt-cells":
+ const: 1
+
+ interrupt-controller: true
+
+ interrupts:
+ maxItems: 1
+ description: Interrupt line connecting this controller to its parent.
+
+ econet,shadow-interrupts:
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ description:
+ An array of interrupt number pairs where each pair represents a shadow
+ interrupt relationship. The first number in each pair is the primary IRQ,
+ and the second is its shadow IRQ used for VPE#1 control. For example,
+ <8 3> means IRQ 8 is shadowed by IRQ 3, so IRQ 3 cannot be mapped, but
+ when VPE#1 requests IRQ 8, it will manipulate the IRQ 3 mask bit.
+ minItems: 1
+ maxItems: 20
+ items:
+ items:
+ - description: primary per-CPU IRQ
+ - description: shadow IRQ number
+
+required:
+ - compatible
+ - reg
+ - interrupt-controller
+ - "#interrupt-cells"
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ interrupt-controller@1fb40000 {
+ compatible = "econet,en751221-intc";
+ reg = <0x1fb40000 0x100>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ interrupt-parent = <&cpuintc>;
+ interrupts = <2>;
+
+ econet,shadow-interrupts = <7 2>, <8 3>, <13 12>, <30 29>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/interrupt-controller/sophgo,sg2042-msi.yaml b/Documentation/devicetree/bindings/interrupt-controller/sophgo,sg2042-msi.yaml
index e1ffd55fa7bf..f6b8b1d92f79 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/sophgo,sg2042-msi.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/sophgo,sg2042-msi.yaml
@@ -18,7 +18,9 @@ allOf:
properties:
compatible:
- const: sophgo,sg2042-msi
+ enum:
+ - sophgo,sg2042-msi
+ - sophgo,sg2044-msi
reg:
items:
diff --git a/Documentation/devicetree/bindings/memory-controllers/renesas,rzg3e-xspi.yaml b/Documentation/devicetree/bindings/memory-controllers/renesas,rzg3e-xspi.yaml
new file mode 100644
index 000000000000..2bfe63ec62dc
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/renesas,rzg3e-xspi.yaml
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/renesas,rzg3e-xspi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Expanded Serial Peripheral Interface (xSPI)
+
+maintainers:
+ - Biju Das <biju.das.jz@bp.renesas.com>
+
+description: |
+ Renesas xSPI allows a SPI flash connected to the SoC to be accessed via
+ the memory-mapping or the manual command mode.
+
+ The flash chip itself should be represented by a subnode of the XSPI node.
+ The flash interface is selected based on the "compatible" property of this
+ subnode:
+ - "jedec,spi-nor";
+
+allOf:
+ - $ref: /schemas/spi/spi-controller.yaml#
+
+properties:
+ compatible:
+ const: renesas,r9a09g047-xspi # RZ/G3E
+
+ reg:
+ items:
+ - description: xSPI registers
+ - description: direct mapping area
+
+ reg-names:
+ items:
+ - const: regs
+ - const: dirmap
+
+ interrupts:
+ items:
+ - description: Interrupt pulse signal by factors excluding errors
+ - description: Interrupt pulse signal by error factors
+
+ interrupt-names:
+ items:
+ - const: pulse
+ - const: err_pulse
+
+ clocks:
+ items:
+ - description: AHB clock
+ - description: AXI clock
+ - description: SPI clock
+ - description: Double speed SPI clock
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: axi
+ - const: spi
+ - const: spix2
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ items:
+ - description: Hardware reset
+ - description: AXI reset
+
+ reset-names:
+ items:
+ - const: hresetn
+ - const: aresetn
+
+ renesas,xspi-cs-addr-sys:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: |
+ Phandle to the system controller (sys) that allows to configure
+ xSPI CS0 and CS1 addresses.
+
+patternProperties:
+ "flash@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ contains:
+ const: jedec,spi-nor
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - interrupts
+ - interrupt-names
+ - clocks
+ - clock-names
+ - power-domains
+ - resets
+ - reset-names
+ - '#address-cells'
+ - '#size-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/renesas,r9a09g047-cpg.h>
+
+ spi@11030000 {
+ compatible = "renesas,r9a09g047-xspi";
+ reg = <0x11030000 0x10000>, <0x20000000 0x10000000>;
+ reg-names = "regs", "dirmap";
+ interrupts = <GIC_SPI 228 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 229 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "pulse", "err_pulse";
+ clocks = <&cpg CPG_MOD 0x9f>, <&cpg CPG_MOD 0xa0>,
+ <&cpg CPG_CORE 9>, <&cpg CPG_MOD 0xa1>;
+ clock-names = "ahb", "axi", "spi", "spix2";
+ power-domains = <&cpg>;
+ resets = <&cpg 0xa3>, <&cpg 0xa4>;
+ reset-names = "hresetn", "aresetn";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <40000000>;
+ spi-tx-bus-width = <1>;
+ spi-rx-bus-width = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/maxim,max77759.yaml b/Documentation/devicetree/bindings/mfd/maxim,max77759.yaml
new file mode 100644
index 000000000000..525de9ab3c2b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/maxim,max77759.yaml
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/maxim,max77759.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim Integrated MAX77759 PMIC for USB Type-C applications
+
+maintainers:
+ - André Draszik <andre.draszik@linaro.org>
+
+description: |
+ This is a part of device tree bindings for the MAX77759 companion Power
+ Management IC for USB Type-C applications.
+
+ The MAX77759 includes Battery Charger, Fuel Gauge, temperature sensors, USB
+ Type-C Port Controller (TCPC), NVMEM, and a GPIO expander.
+
+properties:
+ compatible:
+ const: maxim,max77759
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-controller: true
+
+ "#interrupt-cells":
+ const: 2
+
+ reg:
+ maxItems: 1
+
+ gpio:
+ $ref: /schemas/gpio/maxim,max77759-gpio.yaml
+
+ nvmem-0:
+ $ref: /schemas/nvmem/maxim,max77759-nvmem.yaml
+
+required:
+ - compatible
+ - interrupts
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pmic@66 {
+ compatible = "maxim,max77759";
+ reg = <0x66>;
+ interrupts-extended = <&gpa8 3 IRQ_TYPE_LEVEL_LOW>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ gpio {
+ compatible = "maxim,max77759-gpio";
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ nvmem-0 {
+ compatible = "maxim,max77759-nvmem";
+
+ nvmem-layout {
+ compatible = "fixed-layout";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ reboot-mode@0 {
+ reg = <0x0 0x4>;
+ };
+
+ boot-reason@4 {
+ reg = <0x4 0x4>;
+ };
+
+ shutdown-user-flag@8 {
+ reg = <0x8 0x1>;
+ };
+
+ rsoc@10 {
+ reg = <0xa 0x2>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml b/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml
index 9075add020bf..8e79de97b242 100644
--- a/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml
+++ b/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml
@@ -38,6 +38,15 @@ allOf:
- items:
- const: clk_out_sd1
- const: clk_in_sd1
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,rzn1-sdhci
+ then:
+ properties:
+ interrupts:
+ minItems: 2
properties:
compatible:
@@ -46,6 +55,10 @@ properties:
- const: arasan,sdhci-4.9a # generic Arasan SDHCI 4.9a PHY
- const: arasan,sdhci-5.1 # generic Arasan SDHCI 5.1 PHY
- items:
+ - const: renesas,r9a06g032-sdhci # Renesas RZ/N1D SoC
+ - const: renesas,rzn1-sdhci # Renesas RZ/N1 family
+ - const: arasan,sdhci-8.9a
+ - items:
- const: rockchip,rk3399-sdhci-5.1 # rk3399 eMMC PHY
- const: arasan,sdhci-5.1
description:
@@ -109,7 +122,14 @@ properties:
- const: gate
interrupts:
- maxItems: 1
+ minItems: 1
+ maxItems: 2
+
+ interrupt-names:
+ minItems: 1
+ items:
+ - const: int
+ - const: wakeup
phys:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/mmc/fsl,esdhc.yaml b/Documentation/devicetree/bindings/mmc/fsl,esdhc.yaml
index b86ffb53b18b..62087cf920df 100644
--- a/Documentation/devicetree/bindings/mmc/fsl,esdhc.yaml
+++ b/Documentation/devicetree/bindings/mmc/fsl,esdhc.yaml
@@ -24,6 +24,7 @@ properties:
- fsl,t1040-esdhc
- fsl,t4240-esdhc
- fsl,ls1012a-esdhc
+ - fsl,ls1021a-esdhc
- fsl,ls1028a-esdhc
- fsl,ls1088a-esdhc
- fsl,ls1043a-esdhc
diff --git a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml
index 3f48d8292d5b..ee2ddef36369 100644
--- a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml
+++ b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml
@@ -52,9 +52,14 @@ properties:
- const: core
- const: axi
+ dma-coherent: true
+
interrupts:
maxItems: 1
+ iommus:
+ maxItems: 1
+
marvell,pad-type:
$ref: /schemas/types.yaml#/definitions/string
enum:
@@ -142,7 +147,7 @@ properties:
This property provides the re-tuning counter.
allOf:
- - $ref: mmc-controller.yaml#
+ - $ref: sdhci-common.yaml#
- if:
properties:
compatible:
@@ -164,26 +169,6 @@ allOf:
marvell,pad-type: false
- - if:
- properties:
- compatible:
- contains:
- enum:
- - marvell,armada-cp110-sdhci
- - marvell,armada-ap807-sdhci
- - marvell,armada-ap806-sdhci
-
- then:
- properties:
- clocks:
- minItems: 2
-
- clock-names:
- items:
- - const: core
- - const: axi
-
-
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.txt b/Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.txt
deleted file mode 100644
index f064528effed..000000000000
--- a/Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-* Microchip PIC32 SDHCI Controller
-
-This file documents differences between the core properties in mmc.txt
-and the properties used by the sdhci-pic32 driver.
-
-Required properties:
-- compatible: Should be "microchip,pic32mzda-sdhci"
-- interrupts: Should contain interrupt
-- clock-names: Should be "base_clk", "sys_clk".
- See: Documentation/devicetree/bindings/resource-names.txt
-- clocks: Phandle to the clock.
- See: Documentation/devicetree/bindings/clock/clock-bindings.txt
-- pinctrl-names: A pinctrl state names "default" must be defined.
-- pinctrl-0: Phandle referencing pin configuration of the SDHCI controller.
- See: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
-
-Example:
-
- sdhci@1f8ec000 {
- compatible = "microchip,pic32mzda-sdhci";
- reg = <0x1f8ec000 0x100>;
- interrupts = <191 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&rootclk REF4CLK>, <&rootclk PB5CLK>;
- clock-names = "base_clk", "sys_clk";
- bus-width = <4>;
- cap-sd-highspeed;
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_sdhc1>;
- };
diff --git a/Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.yaml b/Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.yaml
new file mode 100644
index 000000000000..ca0ca7df9ee9
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/microchip,sdhci-pic32.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip PIC32 SDHI Controller
+
+description:
+ The Microchip PIC32 family of microcontrollers (MCUs) includes models with
+ Secure Digital Host Controller Interface (SDHCI) controllers, allowing them
+ to interface with Secure Digital (SD) cards. This interface is used for reading,
+ writing, and managing data on SD cards, enabling storage and data transfer
+ capabilities in embedded systems.
+
+allOf:
+ - $ref: mmc-controller.yaml
+
+maintainers:
+ - Ulf Hansson <ulf.hansson@linaro.org>
+
+properties:
+ compatible:
+ const: microchip,pic32mzda-sdhci
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: base_clk
+ - const: sys_clk
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - pinctrl-names
+ - pinctrl-0
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/clock/microchip,pic32-clock.h>
+ mmc@1f8ec000 {
+ compatible = "microchip,pic32mzda-sdhci";
+ reg = <0x1f8ec000 0x100>;
+ interrupts = <191 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&rootclk REF4CLK>, <&rootclk PB5CLK>;
+ clock-names = "base_clk", "sys_clk";
+ bus-width = <4>;
+ cap-sd-highspeed;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sdhc1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.yaml b/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
index 0debccbd6519..6dd26ad31491 100644
--- a/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
+++ b/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
@@ -32,6 +32,7 @@ properties:
- const: mediatek,mt2701-mmc
- items:
- enum:
+ - mediatek,mt6893-mmc
- mediatek,mt8186-mmc
- mediatek,mt8188-mmc
- mediatek,mt8192-mmc
@@ -299,6 +300,7 @@ allOf:
properties:
compatible:
enum:
+ - mediatek,mt6893-mmc
- mediatek,mt8186-mmc
- mediatek,mt8188-mmc
- mediatek,mt8195-mmc
diff --git a/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml b/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml
index 773baa6c2656..7563623876fc 100644
--- a/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml
+++ b/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml
@@ -69,7 +69,9 @@ properties:
- renesas,sdhi-r9a09g011 # RZ/V2M
- const: renesas,rzg2l-sdhi
- items:
- - const: renesas,sdhi-r9a09g047 # RZ/G3E
+ - enum:
+ - renesas,sdhi-r9a09g047 # RZ/G3E
+ - renesas,sdhi-r9a09g056 # RZ/V2N
- const: renesas,sdhi-r9a09g057 # RZ/V2H(P)
reg:
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml
index eed9063e9bb3..2b2cbce2458b 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml
@@ -60,6 +60,7 @@ properties:
- qcom,sm6125-sdhci
- qcom,sm6350-sdhci
- qcom,sm6375-sdhci
+ - qcom,sm7150-sdhci
- qcom,sm8150-sdhci
- qcom,sm8250-sdhci
- qcom,sm8350-sdhci
diff --git a/Documentation/devicetree/bindings/mmc/sdhci.txt b/Documentation/devicetree/bindings/mmc/sdhci.txt
deleted file mode 100644
index 0e9923a64024..000000000000
--- a/Documentation/devicetree/bindings/mmc/sdhci.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-The properties specific for SD host controllers. For properties shared by MMC
-host controllers refer to the mmc[1] bindings.
-
- [1] Documentation/devicetree/bindings/mmc/mmc.txt
-
-Optional properties:
-- sdhci-caps-mask: The sdhci capabilities register is incorrect. This 64bit
- property corresponds to the bits in the sdhci capability register. If the bit
- is on in the mask then the bit is incorrect in the register and should be
- turned off, before applying sdhci-caps.
-- sdhci-caps: The sdhci capabilities register is incorrect. This 64bit
- property corresponds to the bits in the sdhci capability register. If the
- bit is on in the property then the bit should be turned on.
diff --git a/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml b/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml
index e6e604072d3c..f882219a0a26 100644
--- a/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml
+++ b/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml
@@ -19,6 +19,9 @@ properties:
- rockchip,rk3562-dwcmshc
- rockchip,rk3576-dwcmshc
- const: rockchip,rk3588-dwcmshc
+ - items:
+ - const: sophgo,sg2044-dwcmshc
+ - const: sophgo,sg2042-dwcmshc
- enum:
- rockchip,rk3568-dwcmshc
- rockchip,rk3588-dwcmshc
@@ -117,10 +120,6 @@ allOf:
required:
- power-domains
- else:
- properties:
- power-domains: false
-
unevaluatedProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/mmc/spacemit,sdhci.yaml b/Documentation/devicetree/bindings/mmc/spacemit,sdhci.yaml
new file mode 100644
index 000000000000..13d9382058fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/spacemit,sdhci.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/spacemit,sdhci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SpacemiT SDHCI Controller
+
+maintainers:
+ - Yixun Lan <dlan@gentoo.org>
+
+allOf:
+ - $ref: mmc-controller.yaml#
+
+properties:
+ compatible:
+ const: spacemit,k1-sdhci
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: core clock, used by internal controller
+ - description: io clock, output for SD, SDIO, eMMC device
+
+ clock-names:
+ items:
+ - const: core
+ - const: io
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ mmc@d4281000 {
+ compatible = "spacemit,k1-sdhci";
+ reg = <0xd4281000 0x200>;
+ interrupts = <101>;
+ interrupt-parent = <&plic>;
+ clocks = <&clk_apmu 10>, <&clk_apmu 13>;
+ clock-names = "core", "io";
+ };
diff --git a/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt b/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt
deleted file mode 100644
index d7fb6abb3eb8..000000000000
--- a/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-* Wondermedia WM8505/WM8650 SD/MMC Host Controller
-
-This file documents differences between the core properties described
-by mmc.txt and the properties used by the wmt-sdmmc driver.
-
-Required properties:
-- compatible: Should be "wm,wm8505-sdhc".
-- interrupts: Two interrupts are required - regular irq and dma irq.
-
-Optional properties:
-- sdon-inverted: SD_ON bit is inverted on the controller
-
-Examples:
-
-sdhc@d800a000 {
- compatible = "wm,wm8505-sdhc";
- reg = <0xd800a000 0x1000>;
- interrupts = <20 21>;
- clocks = <&sdhc>;
- bus-width = <4>;
- sdon-inverted;
-};
-
diff --git a/Documentation/devicetree/bindings/mmc/wm,wm8505-sdhc.yaml b/Documentation/devicetree/bindings/mmc/wm,wm8505-sdhc.yaml
new file mode 100644
index 000000000000..5b55174e9088
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/wm,wm8505-sdhc.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/wm,wm8505-sdhc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: WonderMedia SoC SDHCI Controller
+
+maintainers:
+ - Alexey Charkov <alchark@gmail.com>
+
+allOf:
+ - $ref: mmc-controller.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - const: wm,wm8505-sdhc
+ - items:
+ - const: wm,wm8650-sdhc
+ - const: wm,wm8505-sdhc
+ - items:
+ - const: wm,wm8750-sdhc
+ - const: wm,wm8505-sdhc
+ - items:
+ - const: wm,wm8850-sdhc
+ - const: wm,wm8505-sdhc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: SDMMC controller interrupt
+ - description: SDMMC controller DMA interrupt
+
+ sdon-inverted:
+ type: boolean
+ description: All chips before (not including) WM8505 rev. A2 treated their
+ "clock stop" bit (register offset 0x08 a.k.a. SDMMC_BUSMODE, bit 0x10)
+ as "set 1 to disable SD clock", while all the later versions treated it
+ as "set 0 to disable SD clock". Set this property for later versions of
+ wm,wm8505-sdhc. On wm,wm8650-sdhc and later this property is implied and
+ does not need to be set explicitly
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ mmc@d800a000 {
+ compatible = "wm,wm8505-sdhc";
+ reg = <0xd800a000 0x1000>;
+ interrupts = <20>, <21>;
+ clocks = <&sdhc>;
+ bus-width = <4>;
+ sdon-inverted;
+ };
diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml b/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml
index e0ec53bc10c6..1525a50ded47 100644
--- a/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml
+++ b/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: http://devicetree.org/schemas/can/microchip,mcp2510.yaml#
+$id: http://devicetree.org/schemas/net/can/microchip,mcp2510.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Microchip MCP251X stand-alone CAN controller
diff --git a/Documentation/devicetree/bindings/nvmem/maxim,max77759-nvmem.yaml b/Documentation/devicetree/bindings/nvmem/maxim,max77759-nvmem.yaml
new file mode 100644
index 000000000000..1e3bd4433007
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvmem/maxim,max77759-nvmem.yaml
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/nvmem/maxim,max77759-nvmem.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim Integrated MAX77759 Non Volatile Memory
+
+maintainers:
+ - André Draszik <andre.draszik@linaro.org>
+
+description: |
+ This module is part of the MAX77759 PMIC. For additional information, see
+ Documentation/devicetree/bindings/mfd/maxim,max77759.yaml.
+
+ The MAX77759 is a PMIC integrating, amongst others, Non Volatile Memory
+ (NVMEM) with 30 bytes of storage which can be used by software to store
+ information or communicate with a boot loader.
+
+properties:
+ compatible:
+ const: maxim,max77759-nvmem
+
+ wp-gpios: false
+
+required:
+ - compatible
+
+allOf:
+ - $ref: nvmem.yaml#
+
+unevaluatedProperties: false
diff --git a/Documentation/devicetree/bindings/pci/pci-ep.yaml b/Documentation/devicetree/bindings/pci/pci-ep.yaml
index f75000e3093d..214caa4ec3d5 100644
--- a/Documentation/devicetree/bindings/pci/pci-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/pci-ep.yaml
@@ -17,6 +17,24 @@ properties:
$nodename:
pattern: "^pcie-ep@"
+ iommu-map:
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ items:
+ items:
+ - description: Device ID (see msi-map) base
+ maximum: 0x7ffff
+ - description: phandle to IOMMU
+ - description: IOMMU specifier base (currently always 1 cell)
+ - description: Number of Device IDs
+ maximum: 0x80000
+
+ iommu-map-mask:
+ description:
+ A mask to be applied to each Device ID prior to being mapped to an
+ IOMMU specifier per the iommu-map property.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ maximum: 0x7ffff
+
max-functions:
description: Maximum number of functions that can be configured
$ref: /schemas/types.yaml#/definitions/uint8
@@ -35,6 +53,56 @@ properties:
$ref: /schemas/types.yaml#/definitions/uint32
enum: [ 1, 2, 3, 4 ]
+ msi-map:
+ description: |
+ Maps a Device ID to an MSI and associated MSI specifier data.
+
+ A PCI Endpoint (EP) can use MSI as a doorbell function. This is achieved by
+ mapping the MSI controller's address into PCI BAR<n>. The PCI Root Complex
+ can write to this BAR<n>, triggering the EP to generate IRQ. This notifies
+ the EP-side driver of an event, eliminating the need for the driver to
+ continuously poll for status changes.
+
+ However, the EP cannot rely on Requester ID (RID) because the RID is
+ determined by the PCI topology of the host system. Since the EP may be
+ connected to different PCI hosts, the RID can vary between systems and is
+ therefore not a reliable identifier.
+
+ Each EP can support up to 8 physical functions and up to 65,536 virtual
+ functions. To uniquely identify each child device, a device ID is defined
+ as
+ - Bits [2:0] for the function number (func)
+ - Bits [18:3] for the virtual function index (vfunc)
+
+ The resulting device ID is computed as:
+
+ (func & 0x7) | (vfunc << 3)
+
+ The property is an arbitrary number of tuples of
+ (device-id-base, msi, msi-base,length).
+
+ Any Device ID id in the interval [id-base, id-base + length) is
+ associated with the listed MSI, with the MSI specifier
+ (id - id-base + msi-base).
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ items:
+ items:
+ - description: The Device ID base matched by the entry
+ maximum: 0x7ffff
+ - description: phandle to msi-controller node
+ - description: (optional) The msi-specifier produced for the first
+ Device ID matched by the entry. Currently, msi-specifier is 0 or
+ 1 cells.
+ - description: The length of consecutive Device IDs following the
+ Device ID base
+ maximum: 0x80000
+
+ msi-map-mask:
+ description: A mask to be applied to each Device ID prior to being
+ mapped to an msi-specifier per the msi-map property.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ maximum: 0x7ffff
+
num-lanes:
description: maximum number of lanes
$ref: /schemas/types.yaml#/definitions/uint32
diff --git a/Documentation/devicetree/bindings/power/allwinner,sun50i-h6-prcm-ppu.yaml b/Documentation/devicetree/bindings/power/allwinner,sun50i-h6-prcm-ppu.yaml
new file mode 100644
index 000000000000..73a9b4d6220e
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/allwinner,sun50i-h6-prcm-ppu.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/allwinner,sun50i-h6-prcm-ppu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner SoCs PRCM power domain controller
+
+maintainers:
+ - Andre Przywara <andre.przywara@arm.com>
+
+description:
+ The Allwinner Power Reset Clock Management (PRCM) unit contains bits to
+ control a few power domains.
+
+properties:
+ compatible:
+ enum:
+ - allwinner,sun50i-h6-prcm-ppu
+ - allwinner,sun50i-h616-prcm-ppu
+ - allwinner,sun55i-a523-prcm-ppu
+
+ reg:
+ maxItems: 1
+
+ '#power-domain-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ prcm_ppu: power-controller@7010210 {
+ compatible = "allwinner,sun50i-h616-prcm-ppu";
+ reg = <0x7010210 0x10>;
+ #power-domain-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml
index 591a080ca3ff..9c7cc632abee 100644
--- a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml
+++ b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml
@@ -25,6 +25,7 @@ properties:
enum:
- mediatek,mt6735-power-controller
- mediatek,mt6795-power-controller
+ - mediatek,mt6893-power-controller
- mediatek,mt8167-power-controller
- mediatek,mt8173-power-controller
- mediatek,mt8183-power-controller
@@ -88,6 +89,7 @@ $defs:
description: |
Power domain index. Valid values are defined in:
"include/dt-bindings/power/mt6795-power.h" - for MT8167 type power domain.
+ "include/dt-bindings/power/mediatek,mt6893-power.h" - for MT6893 type power domain.
"include/dt-bindings/power/mt8167-power.h" - for MT8167 type power domain.
"include/dt-bindings/power/mt8173-power.h" - for MT8173 type power domain.
"include/dt-bindings/power/mt8183-power.h" - for MT8183 type power domain.
diff --git a/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml b/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml
index 655687369a23..1bf65f2a583a 100644
--- a/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml
+++ b/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml
@@ -50,6 +50,7 @@ properties:
- qcom,sdx55-rpmhpd
- qcom,sdx65-rpmhpd
- qcom,sdx75-rpmhpd
+ - qcom,sm4450-rpmhpd
- qcom,sm6115-rpmpd
- qcom,sm6125-rpmpd
- qcom,sm6350-rpmhpd
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml b/Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml
index 19d3093e6cd2..ccd555870094 100644
--- a/Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml
+++ b/Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml
@@ -21,7 +21,9 @@ description: |+
properties:
compatible:
- const: syscon-reboot
+ enum:
+ - syscon-reboot
+ - google,gs101-reboot
mask:
$ref: /schemas/types.yaml#/definitions/uint32
@@ -49,12 +51,6 @@ properties:
priority:
default: 192
-oneOf:
- - required:
- - offset
- - required:
- - reg
-
required:
- compatible
@@ -63,12 +59,29 @@ additionalProperties: false
allOf:
- $ref: restart-handler.yaml#
- if:
- not:
- required:
- - mask
+ properties:
+ compatible:
+ contains:
+ const: google,gs101-reboot
then:
- required:
- - value
+ properties:
+ mask: false
+ offset: false
+ reg: false
+ value: false
+
+ else:
+ if:
+ not:
+ required:
+ - mask
+ then:
+ required:
+ - value
+
+ oneOf:
+ - required: [offset]
+ - required: [reg]
examples:
- |
@@ -78,3 +91,8 @@ examples:
offset = <0x0>;
mask = <0x1>;
};
+
+ - |
+ reboot {
+ compatible = "google,gs101-reboot";
+ };
diff --git a/Documentation/devicetree/bindings/power/reset/toradex,smarc-ec.yaml b/Documentation/devicetree/bindings/power/reset/toradex,smarc-ec.yaml
new file mode 100644
index 000000000000..ffcd5f2c2bf6
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/toradex,smarc-ec.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/reset/toradex,smarc-ec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toradex Embedded Controller
+
+maintainers:
+ - Emanuele Ghidoli <emanuele.ghidoli@toradex.com>
+ - Francesco Dolcini <francesco.dolcini@toradex.com>
+
+description: |
+ The Toradex Embedded Controller (EC) is used on Toradex SMARC modules,
+ primarily to manage power and reset functionalities.
+
+ The EC provides the following functions:
+ - Reads the SMARC POWER_BTN# and RESET_IN# signals and controls the PMIC accordingly.
+ - Controls the SoC boot mode signals based on the SMARC BOOT_SEL# and FORCE_RECOV# inputs.
+ - Manages the CARRIER_STDBY# signal in response to relevant SoC signals.
+
+ The EC runs a small firmware, factory programmed into its internal flash, and communicates over I2C.
+ It allows software to control power-off and reset functionalities of the module.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - toradex,smarc-imx95-ec
+ - toradex,smarc-imx8mp-ec
+ - const: toradex,smarc-ec
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ reset-controller@28 {
+ compatible = "toradex,smarc-imx95-ec", "toradex,smarc-ec";
+ reg = <0x28>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/power/rockchip,power-controller.yaml b/Documentation/devicetree/bindings/power/rockchip,power-controller.yaml
index ebab98987e49..f494b7710c09 100644
--- a/Documentation/devicetree/bindings/power/rockchip,power-controller.yaml
+++ b/Documentation/devicetree/bindings/power/rockchip,power-controller.yaml
@@ -40,6 +40,7 @@ properties:
- rockchip,rk3366-power-controller
- rockchip,rk3368-power-controller
- rockchip,rk3399-power-controller
+ - rockchip,rk3562-power-controller
- rockchip,rk3568-power-controller
- rockchip,rk3576-power-controller
- rockchip,rk3588-power-controller
diff --git a/Documentation/devicetree/bindings/power/supply/bq24190.yaml b/Documentation/devicetree/bindings/power/supply/bq24190.yaml
index 07adf88997b4..307c99c07721 100644
--- a/Documentation/devicetree/bindings/power/supply/bq24190.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq24190.yaml
@@ -19,6 +19,7 @@ properties:
- ti,bq24190
- ti,bq24192
- ti,bq24192i
+ - ti,bq24193
- ti,bq24196
- ti,bq24296
- ti,bq24297
diff --git a/Documentation/devicetree/bindings/power/supply/bq25980.yaml b/Documentation/devicetree/bindings/power/supply/bq25980.yaml
index b70ce8d7f86c..256adbef55eb 100644
--- a/Documentation/devicetree/bindings/power/supply/bq25980.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq25980.yaml
@@ -87,28 +87,28 @@ unevaluatedProperties: false
examples:
- |
bat: battery {
- compatible = "simple-battery";
- constant-charge-current-max-microamp = <4000000>;
- constant-charge-voltage-max-microvolt = <8400000>;
- precharge-current-microamp = <160000>;
- charge-term-current-microamp = <160000>;
+ compatible = "simple-battery";
+ constant-charge-current-max-microamp = <4000000>;
+ constant-charge-voltage-max-microvolt = <8400000>;
+ precharge-current-microamp = <160000>;
+ charge-term-current-microamp = <160000>;
};
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/interrupt-controller/irq.h>
i2c {
- #address-cells = <1>;
- #size-cells = <0>;
-
- bq25980: charger@65 {
- compatible = "ti,bq25980";
- reg = <0x65>;
- interrupt-parent = <&gpio1>;
- interrupts = <16 IRQ_TYPE_EDGE_FALLING>;
- ti,watchdog-timeout-ms = <0>;
- ti,sc-ocp-limit-microamp = <2000000>;
- ti,sc-ovp-limit-microvolt = <17800000>;
- monitored-battery = <&bat>;
- };
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bq25980: charger@65 {
+ compatible = "ti,bq25980";
+ reg = <0x65>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <16 IRQ_TYPE_EDGE_FALLING>;
+ ti,watchdog-timeout-ms = <0>;
+ ti,sc-ocp-limit-microamp = <2000000>;
+ ti,sc-ovp-limit-microvolt = <17800000>;
+ monitored-battery = <&bat>;
+ };
};
...
diff --git a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
index 741022b4449d..cb04fb25d8ac 100644
--- a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
+++ b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
@@ -48,14 +48,14 @@ examples:
#include <dt-bindings/iio/adc/ingenic,adc.h>
simple_battery: battery {
- compatible = "simple-battery";
- voltage-min-design-microvolt = <3600000>;
- voltage-max-design-microvolt = <4200000>;
+ compatible = "simple-battery";
+ voltage-min-design-microvolt = <3600000>;
+ voltage-max-design-microvolt = <4200000>;
};
ingenic-battery {
- compatible = "ingenic,jz4740-battery";
- io-channels = <&adc INGENIC_ADC_BATTERY>;
- io-channel-names = "battery";
- monitored-battery = <&simple_battery>;
+ compatible = "ingenic,jz4740-battery";
+ io-channels = <&adc INGENIC_ADC_BATTERY>;
+ io-channel-names = "battery";
+ monitored-battery = <&simple_battery>;
};
diff --git a/Documentation/devicetree/bindings/power/supply/ltc4162-l.yaml b/Documentation/devicetree/bindings/power/supply/ltc4162-l.yaml
index 06595a953659..bc7ed7b22085 100644
--- a/Documentation/devicetree/bindings/power/supply/ltc4162-l.yaml
+++ b/Documentation/devicetree/bindings/power/supply/ltc4162-l.yaml
@@ -61,13 +61,13 @@ additionalProperties: false
examples:
- |
i2c {
- #address-cells = <1>;
- #size-cells = <0>;
- charger: battery-charger@68 {
- compatible = "lltc,ltc4162-l";
- reg = <0x68>;
- lltc,rsnsb-micro-ohms = <10000>;
- lltc,rsnsi-micro-ohms = <16000>;
- lltc,cell-count = <2>;
- };
+ #address-cells = <1>;
+ #size-cells = <0>;
+ charger: battery-charger@68 {
+ compatible = "lltc,ltc4162-l";
+ reg = <0x68>;
+ lltc,rsnsb-micro-ohms = <10000>;
+ lltc,rsnsi-micro-ohms = <16000>;
+ lltc,cell-count = <2>;
+ };
};
diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max77705.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max77705.yaml
index bce7fabbd9d3..e3b84068993b 100644
--- a/Documentation/devicetree/bindings/power/supply/maxim,max77705.yaml
+++ b/Documentation/devicetree/bindings/power/supply/maxim,max77705.yaml
@@ -37,8 +37,8 @@ examples:
#include <dt-bindings/interrupt-controller/irq.h>
i2c {
- #address-cells = <1>;
- #size-cells = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
charger@69 {
compatible = "maxim,max77705-charger";
diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max8971.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max8971.yaml
new file mode 100644
index 000000000000..2244cc3d45a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/maxim,max8971.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/supply/maxim,max8971.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX8971 IC charger
+
+maintainers:
+ - Svyatoslav Ryhel <clamor95@gmail.com>
+
+description:
+ The MAX8971 is a compact, high-frequency, high-efficiency switch-mode charger
+ for a one-cell lithium-ion (Li+) battery.
+
+allOf:
+ - $ref: power-supply.yaml#
+
+properties:
+ compatible:
+ const: maxim,max8971
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ monitored-battery: true
+
+ port:
+ description:
+ An optional port node to link the extcon device to detect type of plug.
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ charger@35 {
+ compatible = "maxim,max8971";
+ reg = <0x35>;
+
+ interrupt-parent = <&gpio>;
+ interrupts = <74 IRQ_TYPE_LEVEL_LOW>;
+
+ monitored-battery = <&battery>;
+
+ port {
+ charger_input: endpoint {
+ remote-endpoint = <&extcon_output>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/power/supply/pegatron,chagall-ec.yaml b/Documentation/devicetree/bindings/power/supply/pegatron,chagall-ec.yaml
new file mode 100644
index 000000000000..defb0861e268
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/pegatron,chagall-ec.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/supply/pegatron,chagall-ec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Pegatron Chagall EC
+
+maintainers:
+ - Svyatoslav Ryhel <clamor95@gmail.com>
+
+description:
+ Pegatron Chagall EC is based on an 8-bit programmable microcontroller from
+ Infineon/Cypress Semiconductor, it communicates over I2C and is used in the
+ Pegatron Chagall tablet for fuel gauge and battery control functions.
+
+$ref: /schemas/power/supply/power-supply.yaml
+
+properties:
+ compatible:
+ const: pegatron,chagall-ec
+
+ reg:
+ maxItems: 1
+
+ monitored-battery: true
+ power-supplies: true
+
+required:
+ - compatible
+ - reg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ embedded-controller@10 {
+ compatible = "pegatron,chagall-ec";
+ reg = <0x10>;
+
+ monitored-battery = <&battery>;
+ power-supplies = <&mains>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml b/Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml
new file mode 100644
index 000000000000..5d64fb40a0d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/loongson,ls7a-pwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson PWM Controller
+
+maintainers:
+ - Binbin Zhou <zhoubinbin@loongson.cn>
+
+description:
+ The Loongson PWM has one pulse width output signal and one pulse input
+ signal to be measured.
+ It can be found on Loongson-2K series cpus and Loongson LS7A bridge chips.
+
+allOf:
+ - $ref: pwm.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - const: loongson,ls7a-pwm
+ - items:
+ - enum:
+ - loongson,ls2k0500-pwm
+ - loongson,ls2k1000-pwm
+ - loongson,ls2k2000-pwm
+ - const: loongson,ls7a-pwm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#pwm-cells':
+ description:
+ The first cell must have a value of 0, which specifies the PWM output signal;
+ The second cell is the period in nanoseconds;
+ The third cell flag supported by this binding is PWM_POLARITY_INVERTED.
+ const: 3
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/clock/loongson,ls2k-clk.h>
+
+ pwm@1fe22000 {
+ compatible = "loongson,ls2k1000-pwm", "loongson,ls7a-pwm";
+ reg = <0x1fe22000 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <24 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml b/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml
index 195e4371196b..68ef30414325 100644
--- a/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml
+++ b/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml
@@ -27,6 +27,7 @@ properties:
- const: mediatek,mt8173-disp-pwm
- items:
- enum:
+ - mediatek,mt6893-disp-pwm
- mediatek,mt8186-disp-pwm
- mediatek,mt8188-disp-pwm
- mediatek,mt8192-disp-pwm
diff --git a/Documentation/devicetree/bindings/pwm/nxp,mc33xs2410.yaml b/Documentation/devicetree/bindings/pwm/nxp,mc33xs2410.yaml
new file mode 100644
index 000000000000..1729fe5c3dfb
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/nxp,mc33xs2410.yaml
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/nxp,mc33xs2410.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: High-side switch MC33XS2410
+
+maintainers:
+ - Dimitri Fedrau <dima.fedrau@gmail.com>
+
+allOf:
+ - $ref: pwm.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+properties:
+ compatible:
+ const: nxp,mc33xs2410
+
+ reg:
+ maxItems: 1
+
+ spi-max-frequency:
+ maximum: 10000000
+
+ spi-cpha: true
+
+ spi-cs-setup-delay-ns:
+ minimum: 100
+ default: 100
+
+ spi-cs-hold-delay-ns:
+ minimum: 10
+ default: 10
+
+ spi-cs-inactive-delay-ns:
+ minimum: 300
+ default: 300
+
+ reset-gpios:
+ description:
+ GPIO connected to the active low reset pin.
+ maxItems: 1
+
+ "#pwm-cells":
+ const: 3
+
+ pwm-names:
+ items:
+ - const: di0
+ - const: di1
+ - const: di2
+ - const: di3
+
+ pwms:
+ description:
+ Direct inputs(di0-3) are used to directly turn-on or turn-off the
+ outputs.
+ maxItems: 4
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ description:
+ The external clock can be used if the internal clock doesn't meet
+ timing requirements over temperature and voltage operating range.
+ maxItems: 1
+
+ vdd-supply:
+ description:
+ Logic supply voltage
+
+ vspi-supply:
+ description:
+ Supply voltage for SPI
+
+ vpwr-supply:
+ description:
+ Power switch supply
+
+required:
+ - compatible
+ - reg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pwm@0 {
+ compatible = "nxp,mc33xs2410";
+ reg = <0x0>;
+ spi-max-frequency = <4000000>;
+ spi-cpha;
+ spi-cs-setup-delay-ns = <100>;
+ spi-cs-hold-delay-ns = <10>;
+ spi-cs-inactive-delay-ns = <300>;
+ reset-gpios = <&gpio3 22 GPIO_ACTIVE_LOW>;
+ #pwm-cells = <3>;
+ pwm-names = "di0", "di1", "di2", "di3";
+ pwms = <&pwm0 0 1000000>,
+ <&pwm1 0 1000000>,
+ <&pwm2 0 1000000>,
+ <&pwm3 0 1000000>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <31 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&clk_ext_fixed>;
+ vdd-supply = <&reg_3v3>;
+ vspi-supply = <&reg_3v3>;
+ vpwr-supply = <&reg_24v0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pwm/renesas,rzg2l-gpt.yaml b/Documentation/devicetree/bindings/pwm/renesas,rzg2l-gpt.yaml
new file mode 100644
index 000000000000..13b807765a30
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/renesas,rzg2l-gpt.yaml
@@ -0,0 +1,378 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/renesas,rzg2l-gpt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/G2L General PWM Timer (GPT)
+
+maintainers:
+ - Biju Das <biju.das.jz@bp.renesas.com>
+
+description: |
+ RZ/G2L General PWM Timer (GPT) composed of 8 channels with 32-bit timer
+ (GPT32E). It supports the following functions
+ * 32 bits x 8 channels.
+ * Up-counting or down-counting (saw waves) or up/down-counting
+ (triangle waves) for each counter.
+ * Clock sources independently selectable for each channel.
+ * Two I/O pins per channel.
+ * Two output compare/input capture registers per channel.
+ * For the two output compare/input capture registers of each channel,
+ four registers are provided as buffer registers and are capable of
+ operating as comparison registers when buffering is not in use.
+ * In output compare operation, buffer switching can be at crests or
+ troughs, enabling the generation of laterally asymmetric PWM waveforms.
+ * Registers for setting up frame cycles in each channel (with capability
+ for generating interrupts at overflow or underflow)
+ * Generation of dead times in PWM operation.
+ * Synchronous starting, stopping and clearing counters for arbitrary
+ channels.
+ * Starting, stopping, clearing and up/down counters in response to input
+ level comparison.
+ * Starting, clearing, stopping and up/down counters in response to a
+ maximum of four external triggers.
+ * Output pin disable function by dead time error and detected
+ short-circuits between output pins.
+ * A/D converter start triggers can be generated (GPT32E0 to GPT32E3)
+ * Enables the noise filter for input capture and external trigger
+ operation.
+
+ The below pwm channels are supported.
+ pwm0 - GPT32E0.GTIOC0A channel
+ pwm1 - GPT32E0.GTIOC0B channel
+ pwm2 - GPT32E1.GTIOC1A channel
+ pwm3 - GPT32E1.GTIOC1B channel
+ pwm4 - GPT32E2.GTIOC2A channel
+ pwm5 - GPT32E2.GTIOC2B channel
+ pwm6 - GPT32E3.GTIOC3A channel
+ pwm7 - GPT32E3.GTIOC3B channel
+ pwm8 - GPT32E4.GTIOC4A channel
+ pwm9 - GPT32E4.GTIOC4B channel
+ pwm10 - GPT32E5.GTIOC5A channel
+ pwm11 - GPT32E5.GTIOC5B channel
+ pwm12 - GPT32E6.GTIOC6A channel
+ pwm13 - GPT32E6.GTIOC6B channel
+ pwm14 - GPT32E7.GTIOC7A channel
+ pwm15 - GPT32E7.GTIOC7B channel
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - renesas,r9a07g044-gpt # RZ/G2{L,LC}
+ - renesas,r9a07g054-gpt # RZ/V2L
+ - const: renesas,rzg2l-gpt
+
+ reg:
+ maxItems: 1
+
+ '#pwm-cells':
+ const: 3
+
+ interrupts:
+ items:
+ - description: GPT32E0.GTCCRA input capture/compare match
+ - description: GPT32E0.GTCCRB input capture/compare
+ - description: GPT32E0.GTCCRC compare match
+ - description: GPT32E0.GTCCRD compare match
+ - description: GPT32E0.GTCCRE compare match
+ - description: GPT32E0.GTCCRF compare match
+ - description: GPT32E0.GTADTRA compare match
+ - description: GPT32E0.GTADTRB compare match
+ - description: GPT32E0.GTCNT overflow/GTPR compare match
+ - description: GPT32E0.GTCNT underflow
+ - description: GPT32E1.GTCCRA input capture/compare match
+ - description: GPT32E1.GTCCRB input capture/compare
+ - description: GPT32E1.GTCCRC compare match
+ - description: GPT32E1.GTCCRD compare match
+ - description: GPT32E1.GTCCRE compare match
+ - description: GPT32E1.GTCCRF compare match
+ - description: GPT32E1.GTADTRA compare match
+ - description: GPT32E1.GTADTRB compare match
+ - description: GPT32E1.GTCNT overflow/GTPR compare match
+ - description: GPT32E1.GTCNT underflow
+ - description: GPT32E2.GTCCRA input capture/compare match
+ - description: GPT32E2.GTCCRB input capture/compare
+ - description: GPT32E2.GTCCRC compare match
+ - description: GPT32E2.GTCCRD compare match
+ - description: GPT32E2.GTCCRE compare match
+ - description: GPT32E2.GTCCRF compare match
+ - description: GPT32E2.GTADTRA compare match
+ - description: GPT32E2.GTADTRB compare match
+ - description: GPT32E2.GTCNT overflow/GTPR compare match
+ - description: GPT32E2.GTCNT underflow
+ - description: GPT32E3.GTCCRA input capture/compare match
+ - description: GPT32E3.GTCCRB input capture/compare
+ - description: GPT32E3.GTCCRC compare match
+ - description: GPT32E3.GTCCRD compare match
+ - description: GPT32E3.GTCCRE compare match
+ - description: GPT32E3.GTCCRF compare match
+ - description: GPT32E3.GTADTRA compare match
+ - description: GPT32E3.GTADTRB compare match
+ - description: GPT32E3.GTCNT overflow/GTPR compare match
+ - description: GPT32E3.GTCNT underflow
+ - description: GPT32E4.GTCCRA input capture/compare match
+ - description: GPT32E4.GTCCRB input capture/compare
+ - description: GPT32E4.GTCCRC compare match
+ - description: GPT32E4.GTCCRD compare match
+ - description: GPT32E4.GTCCRE compare match
+ - description: GPT32E4.GTCCRF compare match
+ - description: GPT32E4.GTADTRA compare match
+ - description: GPT32E4.GTADTRB compare match
+ - description: GPT32E4.GTCNT overflow/GTPR compare match
+ - description: GPT32E4.GTCNT underflow
+ - description: GPT32E5.GTCCRA input capture/compare match
+ - description: GPT32E5.GTCCRB input capture/compare
+ - description: GPT32E5.GTCCRC compare match
+ - description: GPT32E5.GTCCRD compare match
+ - description: GPT32E5.GTCCRE compare match
+ - description: GPT32E5.GTCCRF compare match
+ - description: GPT32E5.GTADTRA compare match
+ - description: GPT32E5.GTADTRB compare match
+ - description: GPT32E5.GTCNT overflow/GTPR compare match
+ - description: GPT32E5.GTCNT underflow
+ - description: GPT32E6.GTCCRA input capture/compare match
+ - description: GPT32E6.GTCCRB input capture/compare
+ - description: GPT32E6.GTCCRC compare match
+ - description: GPT32E6.GTCCRD compare match
+ - description: GPT32E6.GTCCRE compare match
+ - description: GPT32E6.GTCCRF compare match
+ - description: GPT32E6.GTADTRA compare match
+ - description: GPT32E6.GTADTRB compare match
+ - description: GPT32E6.GTCNT overflow/GTPR compare match
+ - description: GPT32E6.GTCNT underflow
+ - description: GPT32E7.GTCCRA input capture/compare match
+ - description: GPT32E7.GTCCRB input capture/compare
+ - description: GPT32E7.GTCCRC compare match
+ - description: GPT32E7.GTCCRD compare match
+ - description: GPT32E7.GTCCRE compare match
+ - description: GPT32E7.GTCCRF compare match
+ - description: GPT32E7.GTADTRA compare match
+ - description: GPT32E7.GTADTRB compare match
+ - description: GPT32E7.GTCNT overflow/GTPR compare match
+ - description: GPT32E7.GTCNT underflow
+
+ interrupt-names:
+ items:
+ - const: ccmpa0
+ - const: ccmpb0
+ - const: cmpc0
+ - const: cmpd0
+ - const: cmpe0
+ - const: cmpf0
+ - const: adtrga0
+ - const: adtrgb0
+ - const: ovf0
+ - const: unf0
+ - const: ccmpa1
+ - const: ccmpb1
+ - const: cmpc1
+ - const: cmpd1
+ - const: cmpe1
+ - const: cmpf1
+ - const: adtrga1
+ - const: adtrgb1
+ - const: ovf1
+ - const: unf1
+ - const: ccmpa2
+ - const: ccmpb2
+ - const: cmpc2
+ - const: cmpd2
+ - const: cmpe2
+ - const: cmpf2
+ - const: adtrga2
+ - const: adtrgb2
+ - const: ovf2
+ - const: unf2
+ - const: ccmpa3
+ - const: ccmpb3
+ - const: cmpc3
+ - const: cmpd3
+ - const: cmpe3
+ - const: cmpf3
+ - const: adtrga3
+ - const: adtrgb3
+ - const: ovf3
+ - const: unf3
+ - const: ccmpa4
+ - const: ccmpb4
+ - const: cmpc4
+ - const: cmpd4
+ - const: cmpe4
+ - const: cmpf4
+ - const: adtrga4
+ - const: adtrgb4
+ - const: ovf4
+ - const: unf4
+ - const: ccmpa5
+ - const: ccmpb5
+ - const: cmpc5
+ - const: cmpd5
+ - const: cmpe5
+ - const: cmpf5
+ - const: adtrga5
+ - const: adtrgb5
+ - const: ovf5
+ - const: unf5
+ - const: ccmpa6
+ - const: ccmpb6
+ - const: cmpc6
+ - const: cmpd6
+ - const: cmpe6
+ - const: cmpf6
+ - const: adtrga6
+ - const: adtrgb6
+ - const: ovf6
+ - const: unf6
+ - const: ccmpa7
+ - const: ccmpb7
+ - const: cmpc7
+ - const: cmpd7
+ - const: cmpe7
+ - const: cmpf7
+ - const: adtrga7
+ - const: adtrgb7
+ - const: ovf7
+ - const: unf7
+
+ clocks:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - interrupt-names
+ - clocks
+ - power-domains
+ - resets
+
+allOf:
+ - $ref: pwm.yaml#
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r9a07g044-cpg.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ gpt: pwm@10048000 {
+ compatible = "renesas,r9a07g044-gpt", "renesas,rzg2l-gpt";
+ reg = <0x10048000 0x800>;
+ interrupts = <GIC_SPI 218 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 219 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 220 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 221 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 222 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 223 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 224 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 225 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 226 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 227 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 231 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 232 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 233 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 234 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 235 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 236 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 237 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 238 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 239 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 240 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 244 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 245 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 246 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 247 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 248 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 249 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 250 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 251 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 252 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 253 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 257 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 258 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 259 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 260 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 261 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 262 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 263 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 264 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 265 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 266 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 270 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 271 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 272 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 273 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 274 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 275 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 276 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 277 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 278 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 279 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 283 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 284 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 285 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 286 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 287 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 288 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 289 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 290 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 291 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 292 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 296 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 297 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 298 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 299 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 300 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 301 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 302 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 303 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 304 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 305 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 309 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 310 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 311 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 312 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 313 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 314 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 315 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 316 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 317 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 318 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "ccmpa0", "ccmpb0", "cmpc0", "cmpd0",
+ "cmpe0", "cmpf0", "adtrga0", "adtrgb0",
+ "ovf0", "unf0",
+ "ccmpa1", "ccmpb1", "cmpc1", "cmpd1",
+ "cmpe1", "cmpf1", "adtrga1", "adtrgb1",
+ "ovf1", "unf1",
+ "ccmpa2", "ccmpb2", "cmpc2", "cmpd2",
+ "cmpe2", "cmpf2", "adtrga2", "adtrgb2",
+ "ovf2", "unf2",
+ "ccmpa3", "ccmpb3", "cmpc3", "cmpd3",
+ "cmpe3", "cmpf3", "adtrga3", "adtrgb3",
+ "ovf3", "unf3",
+ "ccmpa4", "ccmpb4", "cmpc4", "cmpd4",
+ "cmpe4", "cmpf4", "adtrga4", "adtrgb4",
+ "ovf4", "unf4",
+ "ccmpa5", "ccmpb5", "cmpc5", "cmpd5",
+ "cmpe5", "cmpf5", "adtrga5", "adtrgb5",
+ "ovf5", "unf5",
+ "ccmpa6", "ccmpb6", "cmpc6", "cmpd6",
+ "cmpe6", "cmpf6", "adtrga6", "adtrgb6",
+ "ovf6", "unf6",
+ "ccmpa7", "ccmpb7", "cmpc7", "cmpd7",
+ "cmpe7", "cmpf7", "adtrga7", "adtrgb7",
+ "ovf7", "unf7";
+ clocks = <&cpg CPG_MOD R9A07G044_GPT_PCLK>;
+ power-domains = <&cpg>;
+ resets = <&cpg R9A07G044_GPT_RST_C>;
+ #pwm-cells = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/via,vt8500-pwm.yaml b/Documentation/devicetree/bindings/pwm/via,vt8500-pwm.yaml
new file mode 100644
index 000000000000..d9146ad715ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/via,vt8500-pwm.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/via,vt8500-pwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: VIA/Wondermedia VT8500/WM8xxx series SoC PWM controller
+
+maintainers:
+ - Alexey Charkov <alchark@gmail.com>
+
+allOf:
+ - $ref: pwm.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: via,vt8500-pwm
+
+ reg:
+ maxItems: 1
+
+ '#pwm-cells':
+ const: 3
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ pwm1: pwm@d8220000 {
+ compatible = "via,vt8500-pwm";
+ reg = <0xd8220000 0x1000>;
+ #pwm-cells = <3>;
+ clocks = <&clkpwm>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt b/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt
deleted file mode 100644
index 4fba93ce1985..000000000000
--- a/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-VIA/Wondermedia VT8500/WM8xxx series SoC PWM controller
-
-Required properties:
-- compatible: should be "via,vt8500-pwm"
-- reg: physical base address and length of the controller's registers
-- #pwm-cells: should be 3. See pwm.yaml in this directory for a description of
- the cells format. The only third cell flag supported by this binding is
- PWM_POLARITY_INVERTED.
-- clocks: phandle to the PWM source clock
-
-Example:
-
-pwm1: pwm@d8220000 {
- #pwm-cells = <3>;
- compatible = "via,vt8500-pwm";
- reg = <0xd8220000 0x1000>;
- clocks = <&clkpwm>;
-};
diff --git a/Documentation/devicetree/bindings/regulator/adi,adp5055-regulator.yaml b/Documentation/devicetree/bindings/regulator/adi,adp5055-regulator.yaml
new file mode 100644
index 000000000000..9c4ead4c9fd1
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/adi,adp5055-regulator.yaml
@@ -0,0 +1,157 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/adi,adp5055-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices ADP5055 Triple Buck Regulator
+
+maintainers:
+ - Alexis Czezar Torreno <alexisczezar.torreno@analog.com>
+
+description: |
+ The ADP5055 combines three high performance buck regulator. The device enables
+ direct connection to high input voltages up to 18 V with no preregulators.
+ https://www.analog.com/media/en/technical-documentation/data-sheets/adp5055.pdf
+
+properties:
+ compatible:
+ enum:
+ - adi,adp5055
+
+ reg:
+ enum:
+ - 0x70
+ - 0x71
+
+ adi,tset-us:
+ description:
+ Setting time used by the device. This is changed via soldering specific
+ resistor values on the CFG2 pin.
+ enum: [2600, 20800]
+ default: 2600
+
+ adi,ocp-blanking:
+ description:
+ If present, overcurrent protection (OCP) blanking for all regulator is on.
+ type: boolean
+
+ adi,delay-power-good:
+ description:
+ Configures delay timer of the power good (PWRGD) pin. Delay is based on
+ Tset which can be 2.6 ms or 20.8 ms.
+ type: boolean
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+patternProperties:
+ '^buck[0-2]$':
+ type: object
+ $ref: regulator.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ enable-gpios:
+ maxItems: 1
+ description:
+ GPIO specifier to enable the GPIO control for each regulator. The
+ driver supports two modes of enable, hardware only (GPIOs) or software
+ only (Registers). Pure hardware enabling requires each regulator to
+ contain this property. If at least one regulator does not have this,
+ the driver automatically switches to software only mode.
+
+ adi,dvs-limit-upper-microvolt:
+ description:
+ Configure the allowable upper side limit of the voltage output of each
+ regulator in microvolt. Relative to the default Vref trimming value.
+ Vref = 600 mV. Voltages are in 12 mV steps, value is autoadjusted.
+ Vout_high = Vref_trim + dvs-limit-upper.
+ minimum: 12000
+ maximum: 192000
+ default: 192000
+
+ adi,dvs-limit-lower-microvolt:
+ description:
+ Configure the allowable lower side limit of the voltage output of each
+ regulator in microvolt. Relative to the default Vref trimming value.
+ Vref = 600 mV. Voltages are in 12 mV steps, value is autoadjusted.
+ Vout_low = Vref_trim + dvs-limit-lower.
+ minimum: -190500
+ maximum: -10500
+ default: -190500
+
+ adi,fast-transient:
+ description:
+ Configures the fast transient sensitivity for each regulator.
+ "none" - No fast transient.
+ "3G_1.5%" - 1.5% window with 3*350uA/V
+ "5G_1.5%" - 1.5% window with 5*350uA/V
+ "5G_2.5%" - 2.5% window with 5*350uA/V
+ enum: [none, 3G_1.5%, 5G_1.5%, 5G_2.5%]
+ default: 5G_2.5%
+
+ adi,mask-power-good:
+ description:
+ If present, masks individual regulators PWRGD signal to the external
+ PWRGD hardware pin.
+ type: boolean
+
+ required:
+ - regulator-name
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ regulator@70 {
+ compatible = "adi,adp5055";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ adi,tset-us = <2600>;
+ adi,ocp-blanking;
+ adi,delay-power-good;
+
+ buck0 {
+ regulator-name = "buck0";
+ enable-gpios = <&gpio 17 GPIO_ACTIVE_HIGH>;
+ adi,dvs-limit-upper-microvolt = <192000>;
+ adi,dvs-limit-lower-microvolt = <(-190500)>;
+ adi,fast-transient = "5G_2.5%";
+ adi,mask-power-good;
+ };
+
+ buck1 {
+ regulator-name = "buck1";
+ enable-gpios = <&gpio 18 GPIO_ACTIVE_HIGH>;
+ adi,dvs-limit-upper-microvolt = <192000>;
+ adi,dvs-limit-lower-microvolt = <(-190500)>;
+ adi,fast-transient = "5G_2.5%";
+ adi,mask-power-good;
+ };
+
+ buck2 {
+ regulator-name = "buck2";
+ enable-gpios = <&gpio 19 GPIO_ACTIVE_HIGH>;
+ adi,dvs-limit-upper-microvolt = <192000>;
+ adi,dvs-limit-lower-microvolt = <(-190500)>;
+ adi,fast-transient = "5G_2.5%";
+ adi,mask-power-good;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml b/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml
index 6327bb2f6ee0..698266c09e25 100644
--- a/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml
@@ -33,7 +33,7 @@ patternProperties:
"^ldo-v(camio18|aud28|aux18|io18|io28|rf12|rf18|cn18|cn28|fe28)$":
type: object
- $ref: fixed-regulator.yaml#
+ $ref: regulator.yaml#
unevaluatedProperties: false
description:
Properties for single fixed LDO regulator.
@@ -112,7 +112,6 @@ examples:
regulator-enable-ramp-delay = <220>;
};
mt6357_vfe28_reg: ldo-vfe28 {
- compatible = "regulator-fixed";
regulator-name = "vfe28";
regulator-min-microvolt = <2800000>;
regulator-max-microvolt = <2800000>;
@@ -125,14 +124,12 @@ examples:
regulator-enable-ramp-delay = <110>;
};
mt6357_vrf18_reg: ldo-vrf18 {
- compatible = "regulator-fixed";
regulator-name = "vrf18";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
regulator-enable-ramp-delay = <110>;
};
mt6357_vrf12_reg: ldo-vrf12 {
- compatible = "regulator-fixed";
regulator-name = "vrf12";
regulator-min-microvolt = <1200000>;
regulator-max-microvolt = <1200000>;
@@ -157,14 +154,12 @@ examples:
regulator-enable-ramp-delay = <264>;
};
mt6357_vcn28_reg: ldo-vcn28 {
- compatible = "regulator-fixed";
regulator-name = "vcn28";
regulator-min-microvolt = <2800000>;
regulator-max-microvolt = <2800000>;
regulator-enable-ramp-delay = <264>;
};
mt6357_vcn18_reg: ldo-vcn18 {
- compatible = "regulator-fixed";
regulator-name = "vcn18";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
@@ -183,7 +178,6 @@ examples:
regulator-enable-ramp-delay = <264>;
};
mt6357_vcamio_reg: ldo-vcamio18 {
- compatible = "regulator-fixed";
regulator-name = "vcamio";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
@@ -212,28 +206,24 @@ examples:
regulator-always-on;
};
mt6357_vaux18_reg: ldo-vaux18 {
- compatible = "regulator-fixed";
regulator-name = "vaux18";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
regulator-enable-ramp-delay = <264>;
};
mt6357_vaud28_reg: ldo-vaud28 {
- compatible = "regulator-fixed";
regulator-name = "vaud28";
regulator-min-microvolt = <2800000>;
regulator-max-microvolt = <2800000>;
regulator-enable-ramp-delay = <264>;
};
mt6357_vio28_reg: ldo-vio28 {
- compatible = "regulator-fixed";
regulator-name = "vio28";
regulator-min-microvolt = <2800000>;
regulator-max-microvolt = <2800000>;
regulator-enable-ramp-delay = <264>;
};
mt6357_vio18_reg: ldo-vio18 {
- compatible = "regulator-fixed";
regulator-name = "vio18";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
diff --git a/Documentation/devicetree/bindings/reset/atmel,at91sam9260-reset.yaml b/Documentation/devicetree/bindings/reset/atmel,at91sam9260-reset.yaml
index c3b33bbc7319..84c4801df8d9 100644
--- a/Documentation/devicetree/bindings/reset/atmel,at91sam9260-reset.yaml
+++ b/Documentation/devicetree/bindings/reset/atmel,at91sam9260-reset.yaml
@@ -24,6 +24,9 @@ properties:
- microchip,sam9x60-rstc
- microchip,sama7g5-rstc
- items:
+ - const: microchip,sama7d65-rstc
+ - const: microchip,sama7g5-rstc
+ - items:
- const: atmel,sama5d3-rstc
- const: atmel,at91sam9g45-rstc
- items:
diff --git a/Documentation/devicetree/bindings/rng/rockchip,rk3588-rng.yaml b/Documentation/devicetree/bindings/rng/rockchip,rk3588-rng.yaml
index ca71b400bcae..fcc5be80142d 100644
--- a/Documentation/devicetree/bindings/rng/rockchip,rk3588-rng.yaml
+++ b/Documentation/devicetree/bindings/rng/rockchip,rk3588-rng.yaml
@@ -4,9 +4,9 @@
$id: http://devicetree.org/schemas/rng/rockchip,rk3588-rng.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
-title: Rockchip RK3588 TRNG
+title: Rockchip RK3576/RK3588 TRNG
-description: True Random Number Generator on Rockchip RK3588 SoC
+description: True Random Number Generator on Rockchip RK3576/RK3588 SoCs
maintainers:
- Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
@@ -14,6 +14,7 @@ maintainers:
properties:
compatible:
enum:
+ - rockchip,rk3576-rng
- rockchip,rk3588-rng
reg:
diff --git a/Documentation/devicetree/bindings/sound/audio-graph-card2.yaml b/Documentation/devicetree/bindings/sound/audio-graph-card2.yaml
index 94588353f852..40eb1d7d6cf1 100644
--- a/Documentation/devicetree/bindings/sound/audio-graph-card2.yaml
+++ b/Documentation/devicetree/bindings/sound/audio-graph-card2.yaml
@@ -18,11 +18,7 @@ properties:
label:
maxItems: 1
routing:
- description: |
- A list of the connections between audio components.
- Each entry is a pair of strings, the first being the
- connection's sink, the second being the connection's source.
- $ref: /schemas/types.yaml#/definitions/non-unique-string-array
+ $ref: audio-graph.yaml#/properties/routing
aux-devs:
description: |
List of phandles pointing to auxiliary devices, such
@@ -39,6 +35,8 @@ properties:
description: Codec to Codec node
hp-det-gpios:
$ref: audio-graph.yaml#/properties/hp-det-gpios
+ mic-det-gpios:
+ $ref: audio-graph.yaml#/properties/mic-det-gpios
widgets:
$ref: audio-graph.yaml#/properties/widgets
diff --git a/Documentation/devicetree/bindings/sound/cirrus,cs48l32.yaml b/Documentation/devicetree/bindings/sound/cirrus,cs48l32.yaml
new file mode 100644
index 000000000000..bf087b57aaf6
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/cirrus,cs48l32.yaml
@@ -0,0 +1,195 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/cirrus,cs48l32.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cirrus Logic CS48L32 audio DSP.
+
+maintainers:
+ - patches@opensource.cirrus.com
+
+description: |
+ The CS48L32 is a high-performance low-power audio DSP for smartphones and
+ other portable audio devices. The CS48L32 combines a programmable Halo Core
+ DSP with a variety of power-efficient fixed-function audio processors.
+
+ See also the binding headers:
+
+ include/dt-bindings/sound/cs48l32.yaml
+
+allOf:
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
+ - $ref: dai-common.yaml#
+
+properties:
+ compatible:
+ enum:
+ - cirrus,cs48l32
+
+ reg:
+ description: SPI chip-select number.
+ maxItems: 1
+
+ spi-max-frequency:
+ maximum: 25000000
+
+ vdd-a-supply:
+ description: Regulator supplying VDD_A
+
+ vdd-d-supply:
+ description: Regulator supplying VDD_D
+
+ vdd-io-supply:
+ description: Regulator supplying VDD_IO
+
+ vdd-cp-supply:
+ description: Regulator supplying VDD_CP
+
+ reset-gpios:
+ description:
+ One entry specifying the GPIO controlling /RESET. Although optional,
+ it is strongly recommended to use a hardware reset.
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: The clock supplied on MCLK1
+
+ clock-names:
+ const: mclk1
+
+ '#sound-dai-cells':
+ const: 1
+
+ cirrus,in-type:
+ description: |
+ A list of input type settings for each ADC input.
+ Inputs are one of these types:
+ CS48L32_IN_TYPE_DIFF : analog differential (default)
+ CS48L32_IN_TYPE_SE : analog single-ended
+
+ The type of the left (L) and right (R) channel on each input is
+ independently configured, as are the two groups of pins muxable to
+ the input (referred to in the datasheet as "1" and "2").
+
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - description:
+ IN1L_1 analog input type. One of the CS48L32_IN_TYPE_xxx.
+ minimum: 0
+ maximum: 1
+ default: 0
+ - description:
+ IN1R_1 analog input type. One of the CS48L32_IN_TYPE_xxx.
+ minimum: 0
+ maximum: 1
+ default: 0
+ - description:
+ IN1L_2 analog input type. One of the CS48L32_IN_TYPE_xxx.
+ minimum: 0
+ maximum: 1
+ default: 0
+ - description:
+ IN1R_2 analog input type. One of the CS48L32_IN_TYPE_xxx.
+ minimum: 0
+ maximum: 1
+ default: 0
+
+ cirrus,pdm-sup:
+ description: |
+ Indicate which MICBIAS output supplies bias to the microphone.
+ There is one cell per input (IN1, IN2, ...).
+
+ One of the CS48L32_MICBIAS_xxx values.
+ CS48L32_PDM_SUP_VOUT_MIC : mic biased from VOUT_MIC
+ CS48L32_PDM_SUP_MICBIAS1 : mic biased from MICBIAS1
+
+ Also see the INn_PDM_SUP field in the datasheet.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - description: IN1 PDM supply source
+ minimum: 0
+ maximum: 1
+ default: 0
+ - description: IN2 PDM supply source
+ minimum: 0
+ maximum: 1
+ default: 0
+
+required:
+ - compatible
+ - reg
+ - vdd-a-supply
+ - vdd-d-supply
+ - vdd-io-supply
+ - vdd-cp-supply
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/sound/cs48l32.h>
+
+ spi@e0006000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xe0006000 0x1000>;
+
+ codec@1 {
+ compatible = "cirrus,cs48l32";
+
+ reg = <0x1>;
+ spi-max-frequency = <2500000>;
+
+ vdd-a-supply = <&regulator_1v8>;
+ vdd-d-supply = <&regulator_1v2>;
+ vdd-io-supply = <&regulator_1v8>;
+ vdd-cp-supply = <&regulator_1v8>;
+
+ reset-gpios = <&gpio 0 0>;
+
+ clocks = <&clks 0>;
+ clock-names = "mclk1";
+
+ interrupt-parent = <&gpio0>;
+ interrupts = <56 8>;
+
+ #sound-dai-cells = <1>;
+
+ cirrus,in-type = <
+ CS48L32_IN_TYPE_DIFF CS48L32_IN_TYPE_DIFF
+ CS48L32_IN_TYPE_SE CS48L32_IN_TYPE_SE
+ >;
+
+ cirrus,pdm-sup = <
+ CS48L32_PDM_SUP_MICBIAS1 CS48L32_PDM_SUP_MICBIAS1
+ >;
+ };
+ };
+
+#
+# Minimal config
+#
+ - |
+ #include <dt-bindings/sound/cs48l32.h>
+
+ spi@e0006000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xe0006000 0x1000>;
+
+ codec@1 {
+ compatible = "cirrus,cs48l32";
+
+ reg = <0x1>;
+
+ vdd-a-supply = <&regulator_1v8>;
+ vdd-d-supply = <&regulator_1v2>;
+ vdd-io-supply = <&regulator_1v8>;
+ vdd-cp-supply = <&regulator_1v8>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/sound/everest,es8375.yaml b/Documentation/devicetree/bindings/sound/everest,es8375.yaml
new file mode 100644
index 000000000000..4a3d671c66b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/everest,es8375.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/everest,es8375.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Everest ES8375 audio CODEC
+
+maintainers:
+ - Michael Zhang <zhangyi@everest-semi.com>
+
+allOf:
+ - $ref: dai-common.yaml#
+
+properties:
+ compatible:
+ const: everest,es8375
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: clock for master clock (MCLK)
+
+ clock-names:
+ items:
+ - const: mclk
+
+ vdda-supply:
+ description:
+ Analogue power supply.
+
+ vddd-supply:
+ description:
+ Interface power supply.
+
+ everest,mclk-src:
+ $ref: /schemas/types.yaml#/definitions/uint8
+ description: |
+ Represents the MCLK/SCLK pair pins used as the internal clock.
+ 0 represents selecting MCLK.
+ 1 represents selecting SCLK.
+ enum: [0, 1]
+ default: 0
+
+ "#sound-dai-cells":
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - "#sound-dai-cells"
+ - vdda-supply
+ - vddd-supply
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ es8375: codec@18 {
+ compatible = "everest,es8375";
+ reg = <0x18>;
+ vdda-supply = <&vdd3v3>;
+ vddd-supply = <&vdd3v3>;
+ #sound-dai-cells = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/sound/everest,es8389.yaml b/Documentation/devicetree/bindings/sound/everest,es8389.yaml
new file mode 100644
index 000000000000..a673df485ab3
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/everest,es8389.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/everest,es8389.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Everest ES8389 audio CODEC
+
+maintainers:
+ - Michael Zhang <zhangyi@everest-semi.com>
+
+allOf:
+ - $ref: dai-common.yaml#
+
+properties:
+ compatible:
+ const: everest,es8389
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: clock for master clock (MCLK)
+
+ clock-names:
+ items:
+ - const: mclk
+
+ "#sound-dai-cells":
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - "#sound-dai-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ es8389: codec@10 {
+ compatible = "everest,es8389";
+ reg = <0x10>;
+ #sound-dai-cells = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/sound/fsl,mqs.yaml b/Documentation/devicetree/bindings/sound/fsl,mqs.yaml
index 8c22e8348b14..1415247c92c8 100644
--- a/Documentation/devicetree/bindings/sound/fsl,mqs.yaml
+++ b/Documentation/devicetree/bindings/sound/fsl,mqs.yaml
@@ -28,6 +28,9 @@ properties:
- fsl,imx95-aonmix-mqs
- fsl,imx95-netcmix-mqs
+ "#sound-dai-cells":
+ const: 0
+
clocks:
minItems: 1
maxItems: 2
@@ -49,12 +52,17 @@ properties:
resets:
maxItems: 1
+ port:
+ $ref: audio-graph-port.yaml#
+ unevaluatedProperties: false
+
required:
- compatible
- clocks
- clock-names
allOf:
+ - $ref: dai-common.yaml#
- if:
properties:
compatible:
@@ -86,7 +94,7 @@ allOf:
required:
- gpr
-additionalProperties: false
+unevaluatedProperties: false
examples:
- |
diff --git a/Documentation/devicetree/bindings/sound/loongson,ls1b-ac97.yaml b/Documentation/devicetree/bindings/sound/loongson,ls1b-ac97.yaml
new file mode 100644
index 000000000000..1c6a2771f942
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/loongson,ls1b-ac97.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/loongson,ls1b-ac97.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson-1 AC97 Controller
+
+maintainers:
+ - Keguang Zhang <keguang.zhang@gmail.com>
+
+description:
+ The Loongson-1 AC97 controller supports 2-channel stereo output and input.
+ It is paired with the DMA engine to handle playback and capture functions.
+
+allOf:
+ - $ref: dai-common.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - const: loongson,ls1b-ac97
+ - items:
+ - enum:
+ - loongson,ls1a-ac97
+ - loongson,ls1c-ac97
+ - const: loongson,ls1b-ac97
+
+ reg:
+ maxItems: 3
+
+ reg-names:
+ items:
+ - const: ac97
+ - const: audio-tx
+ - const: audio-rx
+
+ dmas:
+ maxItems: 2
+
+ dma-names:
+ items:
+ - const: tx
+ - const: rx
+
+ '#sound-dai-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - dmas
+ - dma-names
+ - '#sound-dai-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ audio-controller@1fe74000 {
+ compatible = "loongson,ls1b-ac97";
+ reg = <0x1fe74000 0x60>, <0x1fe72420 0x4>, <0x1fe74c4c 0x4>;
+ reg-names = "ac97", "audio-tx", "audio-rx";
+ dmas = <&dma 1>, <&dma 2>;
+ dma-names = "tx", "rx";
+ #sound-dai-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/maxim,max98925.yaml b/Documentation/devicetree/bindings/sound/maxim,max98925.yaml
index 32fd86204a7a..121e8d2d44da 100644
--- a/Documentation/devicetree/bindings/sound/maxim,max98925.yaml
+++ b/Documentation/devicetree/bindings/sound/maxim,max98925.yaml
@@ -77,11 +77,11 @@ additionalProperties: false
examples:
- |
+ #include <dt-bindings/gpio/gpio.h>
i2c {
#address-cells = <1>;
#size-cells = <0>;
- #include <dt-bindings/gpio/gpio.h>
audio-codec@3a {
compatible = "maxim,max98927";
reg = <0x3a>;
diff --git a/Documentation/devicetree/bindings/sound/mediatek,mt8188-mt6359.yaml b/Documentation/devicetree/bindings/sound/mediatek,mt8188-mt6359.yaml
index 76d5a437dc8f..7ba2ea2dfa0b 100644
--- a/Documentation/devicetree/bindings/sound/mediatek,mt8188-mt6359.yaml
+++ b/Documentation/devicetree/bindings/sound/mediatek,mt8188-mt6359.yaml
@@ -96,10 +96,9 @@ patternProperties:
mediatek,clk-provider:
$ref: /schemas/types.yaml#/definitions/string
description: Indicates dai-link clock master.
- items:
- enum:
- - cpu
- - codec
+ enum:
+ - cpu
+ - codec
additionalProperties: false
diff --git a/Documentation/devicetree/bindings/sound/mt8186-mt6366-da7219-max98357.yaml b/Documentation/devicetree/bindings/sound/mt8186-mt6366-da7219-max98357.yaml
index cbc641ecbe94..037f21443ad1 100644
--- a/Documentation/devicetree/bindings/sound/mt8186-mt6366-da7219-max98357.yaml
+++ b/Documentation/devicetree/bindings/sound/mt8186-mt6366-da7219-max98357.yaml
@@ -124,10 +124,9 @@ patternProperties:
mediatek,clk-provider:
$ref: /schemas/types.yaml#/definitions/string
description: Indicates dai-link clock master.
- items:
- enum:
- - cpu
- - codec
+ enum:
+ - cpu
+ - codec
required:
- link-name
diff --git a/Documentation/devicetree/bindings/sound/mt8195-mt6359.yaml b/Documentation/devicetree/bindings/sound/mt8195-mt6359.yaml
index 2af1d8ffbd8b..356e1feee962 100644
--- a/Documentation/devicetree/bindings/sound/mt8195-mt6359.yaml
+++ b/Documentation/devicetree/bindings/sound/mt8195-mt6359.yaml
@@ -21,6 +21,7 @@ properties:
- mediatek,mt8195_mt6359_rt1019_rt5682
- mediatek,mt8195_mt6359_rt1011_rt5682
- mediatek,mt8195_mt6359_max98390_rt5682
+ - mediatek,mt8195_mt6359
model:
$ref: /schemas/types.yaml#/definitions/string
@@ -44,6 +45,8 @@ properties:
- Right Spk
# Sources
+ - Headphone L
+ - Headphone R
- Headset Mic
- HPOL
- HPOR
@@ -88,6 +91,7 @@ patternProperties:
link-name:
description: Indicates dai-link name and PCM stream name
enum:
+ - DL_SRC_BE
- DPTX_BE
- ETDM1_IN_BE
- ETDM2_IN_BE
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml
index b4bee466d67a..da89523ccf5f 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml
@@ -23,6 +23,7 @@ properties:
enum:
- nvidia,tegra210-audio-graph-card
- nvidia,tegra186-audio-graph-card
+ - nvidia,tegra264-audio-graph-card
clocks:
minItems: 2
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra186-asrc.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra186-asrc.yaml
index e15f387c4c29..66b56e71599b 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra186-asrc.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra186-asrc.yaml
@@ -31,7 +31,9 @@ properties:
compatible:
oneOf:
- - const: nvidia,tegra186-asrc
+ - enum:
+ - nvidia,tegra186-asrc
+ - nvidia,tegra264-asrc
- items:
- enum:
- nvidia,tegra234-asrc
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra186-dspk.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra186-dspk.yaml
index e1362c77472b..46ba167081ef 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra186-dspk.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra186-dspk.yaml
@@ -29,6 +29,7 @@ properties:
- const: nvidia,tegra186-dspk
- items:
- enum:
+ - nvidia,tegra264-dspk
- nvidia,tegra234-dspk
- nvidia,tegra194-dspk
- const: nvidia,tegra186-dspk
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-admaif.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-admaif.yaml
index 15ab40aeab1e..b32f33214ba6 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-admaif.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-admaif.yaml
@@ -26,6 +26,7 @@ properties:
- enum:
- nvidia,tegra210-admaif
- nvidia,tegra186-admaif
+ - nvidia,tegra264-admaif
- items:
- enum:
- nvidia,tegra234-admaif
@@ -39,6 +40,19 @@ properties:
dma-names: true
+ interconnects:
+ items:
+ - description: APE read memory client
+ - description: APE write memory client
+
+ interconnect-names:
+ items:
+ - const: dma-mem # read
+ - const: write
+
+ iommus:
+ maxItems: 1
+
ports:
$ref: /schemas/graph.yaml#/properties/ports
description: |
@@ -74,6 +88,9 @@ then:
Should be "tx1", "tx2" ... "tx10" for DMA Tx channel
minItems: 1
maxItems: 20
+ interconnects: false
+ interconnect-names: false
+ iommus: false
else:
properties:
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-adx.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-adx.yaml
index e4c871797fa6..19a80929f93e 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-adx.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-adx.yaml
@@ -27,7 +27,9 @@ properties:
compatible:
oneOf:
- - const: nvidia,tegra210-adx
+ - enum:
+ - nvidia,tegra210-adx
+ - nvidia,tegra264-adx
- items:
- enum:
- nvidia,tegra234-adx
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-ahub.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-ahub.yaml
index c4abac81f207..1c9f24d26819 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-ahub.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-ahub.yaml
@@ -27,6 +27,7 @@ properties:
- nvidia,tegra210-ahub
- nvidia,tegra186-ahub
- nvidia,tegra234-ahub
+ - nvidia,tegra264-ahub
- items:
- const: nvidia,tegra194-ahub
- const: nvidia,tegra186-ahub
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-amx.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-amx.yaml
index 021b72546ba4..89712102cfdf 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-amx.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-amx.yaml
@@ -26,11 +26,13 @@ properties:
compatible:
oneOf:
- - const: nvidia,tegra210-amx
+ - enum:
+ - nvidia,tegra210-amx
+ - nvidia,tegra194-amx
+ - nvidia,tegra264-amx
- items:
- const: nvidia,tegra186-amx
- const: nvidia,tegra210-amx
- - const: nvidia,tegra194-amx
- items:
- const: nvidia,tegra234-amx
- const: nvidia,tegra194-amx
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-dmic.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-dmic.yaml
index bff551c35da7..bb8088878d4b 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-dmic.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-dmic.yaml
@@ -28,6 +28,7 @@ properties:
- const: nvidia,tegra210-dmic
- items:
- enum:
+ - nvidia,tegra264-dmic
- nvidia,tegra234-dmic
- nvidia,tegra194-dmic
- nvidia,tegra186-dmic
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-i2s.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-i2s.yaml
index a82f11fb6c9a..903e815af8fd 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-i2s.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-i2s.yaml
@@ -25,7 +25,9 @@ properties:
compatible:
oneOf:
- - const: nvidia,tegra210-i2s
+ - enum:
+ - nvidia,tegra210-i2s
+ - nvidia,tegra264-i2s
- items:
- enum:
- nvidia,tegra234-i2s
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-mbdrc.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-mbdrc.yaml
index 5b9198602fc6..4c121b9cde1e 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-mbdrc.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-mbdrc.yaml
@@ -23,6 +23,7 @@ properties:
- const: nvidia,tegra210-mbdrc
- items:
- enum:
+ - nvidia,tegra264-mbdrc
- nvidia,tegra234-mbdrc
- nvidia,tegra194-mbdrc
- nvidia,tegra186-mbdrc
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-mixer.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-mixer.yaml
index 049898f02e85..56b4c4fc123c 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-mixer.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-mixer.yaml
@@ -28,6 +28,7 @@ properties:
- const: nvidia,tegra210-amixer
- items:
- enum:
+ - nvidia,tegra264-amixer
- nvidia,tegra234-amixer
- nvidia,tegra194-amixer
- nvidia,tegra186-amixer
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-mvc.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-mvc.yaml
index d0280d8aa3af..bde4ac6319b1 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-mvc.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-mvc.yaml
@@ -31,6 +31,7 @@ properties:
- const: nvidia,tegra210-mvc
- items:
- enum:
+ - nvidia,tegra264-mvc
- nvidia,tegra234-mvc
- nvidia,tegra194-mvc
- nvidia,tegra186-mvc
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-ope.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-ope.yaml
index 9017fb6d575d..756c3096a2d6 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-ope.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-ope.yaml
@@ -25,6 +25,7 @@ properties:
- const: nvidia,tegra210-ope
- items:
- enum:
+ - nvidia,tegra264-ope
- nvidia,tegra234-ope
- nvidia,tegra194-ope
- nvidia,tegra186-ope
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-peq.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-peq.yaml
index 1e373c49d639..2f11a484dc2e 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-peq.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-peq.yaml
@@ -24,6 +24,7 @@ properties:
- const: nvidia,tegra210-peq
- items:
- enum:
+ - nvidia,tegra264-peq
- nvidia,tegra234-peq
- nvidia,tegra194-peq
- nvidia,tegra186-peq
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-sfc.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-sfc.yaml
index 185ca0be4f02..959aa7fffdac 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-sfc.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-sfc.yaml
@@ -28,6 +28,7 @@ properties:
- const: nvidia,tegra210-sfc
- items:
- enum:
+ - nvidia,tegra264-sfc
- nvidia,tegra234-sfc
- nvidia,tegra194-sfc
- nvidia,tegra186-sfc
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml
index 3ca9affb79a2..8a8767589ee0 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml
@@ -20,11 +20,13 @@ properties:
compatible:
oneOf:
- - const: nvidia,tegra30-hda
+ - enum:
+ - nvidia,tegra30-hda
+ - nvidia,tegra194-hda
+ - nvidia,tegra234-hda
+ - nvidia,tegra264-hda
- items:
- enum:
- - nvidia,tegra234-hda
- - nvidia,tegra194-hda
- nvidia,tegra186-hda
- nvidia,tegra210-hda
- nvidia,tegra124-hda
@@ -43,15 +45,12 @@ properties:
maxItems: 1
clocks:
- minItems: 2
+ minItems: 1
maxItems: 3
clock-names:
- minItems: 2
- items:
- - const: hda
- - const: hda2hdmi
- - const: hda2codec_2x
+ minItems: 1
+ maxItems: 3
resets:
minItems: 2
@@ -59,10 +58,7 @@ properties:
reset-names:
minItems: 2
- items:
- - const: hda
- - const: hda2hdmi
- - const: hda2codec_2x
+ maxItems: 3
power-domains:
maxItems: 1
@@ -93,6 +89,92 @@ required:
additionalProperties: false
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - nvidia,tegra30-hda
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ clock-names:
+ items:
+ - const: hda
+ - const: hda2hdmi
+ - const: hda2codec_2x
+ resets:
+ minItems: 3
+ reset-names:
+ items:
+ - const: hda
+ - const: hda2hdmi
+ - const: hda2codec_2x
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - nvidia,tegra194-hda
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ clock-names:
+ items:
+ - const: hda
+ - const: hda2hdmi
+ - const: hda2codec_2x
+ resets:
+ maxItems: 2
+ reset-names:
+ items:
+ - const: hda
+ - const: hda2hdmi
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - nvidia,tegra234-hda
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: hda
+ - const: hda2codec_2x
+ resets:
+ maxItems: 2
+ reset-names:
+ items:
+ - const: hda
+ - const: hda2codec_2x
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - nvidia,tegra264-hda
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+ clock-names:
+ items:
+ - const: hda
+ resets:
+ maxItems: 2
+ reset-names:
+ items:
+ - const: hda
+ - const: hda2codec_2x
+ power-domains: false
+
examples:
- |
#include<dt-bindings/clock/tegra124-car-common.h>
diff --git a/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml b/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml
index b9e33a7429b0..5564787b7542 100644
--- a/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml
@@ -31,6 +31,8 @@ properties:
- qcom,apq8096-sndcard
- qcom,qcm6490-idp-sndcard
- qcom,qcs6490-rb3gen2-sndcard
+ - qcom,qcs9075-sndcard
+ - qcom,qcs9100-sndcard
- qcom,qrb4210-rb2-sndcard
- qcom,qrb5165-rb5-sndcard
- qcom,sc7180-qdsp6-sndcard
diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml
index 10531350c336..ab1c6285dbf8 100644
--- a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml
@@ -23,9 +23,15 @@ properties:
- qcom,wcd9380-codec
- qcom,wcd9385-codec
+ mux-controls:
+ description: A reference to the audio mux switch for
+ switching CTIA/OMTP Headset types
+ maxItems: 1
+
us-euro-gpios:
description: GPIO spec for swapping gnd and mic segments
maxItems: 1
+ deprecated: true
required:
- compatible
diff --git a/Documentation/devicetree/bindings/sound/realtek,alc203.yaml b/Documentation/devicetree/bindings/sound/realtek,alc203.yaml
new file mode 100644
index 000000000000..6b90788b45eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/realtek,alc203.yaml
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/realtek,alc203.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Realtek ALC203 AC97 Audio Codec
+
+maintainers:
+ - Keguang Zhang <keguang.zhang@gmail.com>
+
+description:
+ ALC203 is a full duplex AC97 2.3 compatible stereo audio codec.
+
+allOf:
+ - $ref: dai-common.yaml#
+
+properties:
+ compatible:
+ const: realtek,alc203
+
+ '#sound-dai-cells':
+ const: 0
+
+required:
+ - compatible
+ - '#sound-dai-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ audio-codec {
+ compatible = "realtek,alc203";
+ #sound-dai-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/richtek,rt9123.yaml b/Documentation/devicetree/bindings/sound/richtek,rt9123.yaml
new file mode 100644
index 000000000000..5acb05cdfefd
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/richtek,rt9123.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/richtek,rt9123.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RT9123 Audio Amplifier
+
+maintainers:
+ - ChiYuan Huang <cy_huang@richtek.com>
+
+description:
+ RT9123 is a 3.2W mono Class-D audio amplifier that features high efficiency
+ and performance with ultra-low quiescent current. The digital audio interface
+ support various formats, including I2S, left-justified, right-justified, and
+ TDM formats.
+
+allOf:
+ - $ref: dai-common.yaml#
+
+properties:
+ compatible:
+ enum:
+ - richtek,rt9123
+
+ reg:
+ maxItems: 1
+
+ '#sound-dai-cells':
+ const: 0
+
+ enable-gpios:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - '#sound-dai-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ amplifier@5e {
+ compatible = "richtek,rt9123";
+ reg = <0x5e>;
+ enable-gpios = <&gpio 26 GPIO_ACTIVE_HIGH>;
+ #sound-dai-cells = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/sound/richtek,rt9123p.yaml b/Documentation/devicetree/bindings/sound/richtek,rt9123p.yaml
new file mode 100644
index 000000000000..693511dfdda4
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/richtek,rt9123p.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/richtek,rt9123p.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RT9123P Audio Amplifier
+
+maintainers:
+ - ChiYuan Huang <cy_huang@richtek.com>
+
+description:
+ RT9123P is a RT9123 variant which does not support I2C control.
+
+allOf:
+ - $ref: dai-common.yaml#
+
+properties:
+ compatible:
+ enum:
+ - richtek,rt9123p
+
+ '#sound-dai-cells':
+ const: 0
+
+ enable-gpios:
+ maxItems: 1
+
+ enable-delay-ms:
+ description:
+ Delay time for 'ENABLE' pin changes intended to make I2S clocks ready to
+ prevent speaker pop noise. The unit is in millisecond.
+
+required:
+ - compatible
+ - '#sound-dai-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ amplifier {
+ compatible = "richtek,rt9123p";
+ enable-gpios = <&gpio 26 GPIO_ACTIVE_HIGH>;
+ #sound-dai-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/rockchip,rk3576-sai.yaml b/Documentation/devicetree/bindings/sound/rockchip,rk3576-sai.yaml
new file mode 100644
index 000000000000..149da9a91451
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/rockchip,rk3576-sai.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/rockchip,rk3576-sai.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip Serial Audio Interface Controller
+
+description:
+ The Rockchip Serial Audio Interface (SAI) controller is a flexible audio
+ controller that implements the I2S, I2S/TDM and the PDM standards.
+
+maintainers:
+ - Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
+
+allOf:
+ - $ref: dai-common.yaml#
+
+properties:
+ compatible:
+ const: rockchip,rk3576-sai
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ dmas:
+ minItems: 1
+ maxItems: 2
+
+ dma-names:
+ minItems: 1
+ items:
+ - enum: [tx, rx]
+ - const: rx
+
+ clocks:
+ items:
+ - description: master audio clock
+ - description: AHB clock driving the interface
+
+ clock-names:
+ items:
+ - const: mclk
+ - const: hclk
+
+ resets:
+ minItems: 1
+ items:
+ - description: reset for the mclk domain
+ - description: reset for the hclk domain
+
+ reset-names:
+ minItems: 1
+ items:
+ - const: m
+ - const: h
+
+ port:
+ $ref: audio-graph-port.yaml#
+ unevaluatedProperties: false
+
+ power-domains:
+ maxItems: 1
+
+ "#sound-dai-cells":
+ const: 0
+
+ rockchip,sai-rx-route:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ description:
+ Defines the mapping of the controller's SDI ports to actual input lanes,
+ as well as the number of input lanes.
+ rockchip,sai-rx-route = <3> would mean sdi3 is receiving from data0, and
+ that there is only one receiving lane.
+ This property's absence is to be understood as only one receiving lane
+ being used if the controller has capture capabilities.
+ maxItems: 4
+ items:
+ minimum: 0
+ maximum: 3
+
+ rockchip,sai-tx-route:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ description:
+ Defines the mapping of the controller's SDO ports to actual output lanes,
+ as well as the number of output lanes.
+ rockchip,sai-tx-route = <3> would mean sdo3 is sending to data0, and
+ that there is only one transmitting lane.
+ This property's absence is to be understood as only one transmitting lane
+ being used if the controller has playback capabilities.
+ maxItems: 4
+ items:
+ minimum: 0
+ maximum: 3
+
+required:
+ - compatible
+ - reg
+ - dmas
+ - dma-names
+ - clocks
+ - clock-names
+ - "#sound-dai-cells"
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/rockchip,rk3576-cru.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/pinctrl/rockchip.h>
+ #include <dt-bindings/power/rockchip,rk3576-power.h>
+ #include <dt-bindings/reset/rockchip,rk3576-cru.h>
+
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ sai1: sai@2a610000 {
+ compatible = "rockchip,rk3576-sai";
+ reg = <0x0 0x2a610000 0x0 0x1000>;
+ interrupts = <GIC_SPI 188 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru MCLK_SAI1_8CH>, <&cru HCLK_SAI1_8CH>;
+ clock-names = "mclk", "hclk";
+ dmas = <&dmac0 2>, <&dmac0 3>;
+ dma-names = "tx", "rx";
+ power-domains = <&power RK3576_PD_AUDIO>;
+ resets = <&cru SRST_M_SAI1_8CH>, <&cru SRST_H_SAI1_8CH>;
+ reset-names = "m", "h";
+ pinctrl-names = "default";
+ pinctrl-0 = <&sai1m0_lrck
+ &sai1m0_sclk
+ &sai1m0_sdi0
+ &sai1m0_sdo0
+ &sai1m0_sdo1
+ &sai1m0_sdo2
+ &sai1m0_sdo3>;
+ rockchip,sai-tx-route = <3 1 2 0>;
+ #sound-dai-cells = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/spi/fsl,dspi.yaml b/Documentation/devicetree/bindings/spi/fsl,dspi.yaml
index 7ca8fceda717..bf9cce53c48d 100644
--- a/Documentation/devicetree/bindings/spi/fsl,dspi.yaml
+++ b/Documentation/devicetree/bindings/spi/fsl,dspi.yaml
@@ -105,12 +105,12 @@ examples:
big-endian;
flash@0 {
- compatible = "jedec,spi-nor";
- reg = <0>;
- spi-max-frequency = <16000000>;
- spi-cpol;
- spi-cpha;
- spi-cs-setup-delay-ns = <100>;
- spi-cs-hold-delay-ns = <50>;
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <16000000>;
+ spi-cpol;
+ spi-cpha;
+ spi-cs-setup-delay-ns = <100>;
+ spi-cs-hold-delay-ns = <50>;
};
};
diff --git a/Documentation/devicetree/bindings/spi/nuvoton,wpcm450-fiu.yaml b/Documentation/devicetree/bindings/spi/nuvoton,wpcm450-fiu.yaml
index 4e0d391e1d69..c97bf48b56b4 100644
--- a/Documentation/devicetree/bindings/spi/nuvoton,wpcm450-fiu.yaml
+++ b/Documentation/devicetree/bindings/spi/nuvoton,wpcm450-fiu.yaml
@@ -59,8 +59,3 @@ examples:
reg = <0>;
};
};
-
- shm: syscon@c8001000 {
- compatible = "nuvoton,wpcm450-shm", "syscon";
- reg = <0xc8001000 0x1000>;
- };
diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
index 48e97e240265..8b3640280559 100644
--- a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
@@ -10,9 +10,6 @@ maintainers:
- Thierry Reding <thierry.reding@gmail.com>
- Jonathan Hunter <jonathanh@nvidia.com>
-allOf:
- - $ref: spi-controller.yaml#
-
properties:
compatible:
enum:
@@ -47,6 +44,9 @@ properties:
- const: rx
- const: tx
+ iommus:
+ maxItems: 1
+
patternProperties:
"@[0-9a-f]+$":
type: object
@@ -69,6 +69,18 @@ required:
unevaluatedProperties: false
+allOf:
+ - $ref: spi-controller.yaml#
+ - if:
+ properties:
+ compatible:
+ not:
+ contains:
+ const: nvidia,tegra234-qspi
+ then:
+ properties:
+ iommus: false
+
examples:
- |
#include <dt-bindings/clock/tegra210-car.h>
diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml b/Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml
index aa3f93319203..cb1f15224b45 100644
--- a/Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml
+++ b/Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml
@@ -21,8 +21,12 @@ allOf:
properties:
compatible:
- enum:
- - qcom,ipq9574-snand
+ oneOf:
+ - items:
+ - enum:
+ - qcom,ipq5018-snand
+ - const: qcom,ipq9574-snand
+ - const: qcom,ipq9574-snand
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml b/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml
index 49649fc3f95a..e0c7047ae8ad 100644
--- a/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml
+++ b/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml
@@ -4,14 +4,11 @@
$id: http://devicetree.org/schemas/spi/renesas,sh-msiof.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
-title: Renesas MSIOF SPI controller
+title: Renesas MSIOF SPI / I2S controller
maintainers:
- Geert Uytterhoeven <geert+renesas@glider.be>
-allOf:
- - $ref: spi-controller.yaml#
-
properties:
compatible:
oneOf:
@@ -146,24 +143,38 @@ properties:
$ref: /schemas/types.yaml#/definitions/uint32
default: 64
+ # for MSIOF-I2S
+ port:
+ $ref: ../sound/audio-graph-port.yaml#
+ unevaluatedProperties: false
+
required:
- compatible
- reg
- interrupts
- clocks
- power-domains
- - '#address-cells'
- - '#size-cells'
-
-if:
- not:
- properties:
- compatible:
- contains:
- const: renesas,sh-mobile-msiof
-then:
- required:
- - resets
+
+allOf:
+ # additional "required""
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: renesas,sh-mobile-msiof
+ then:
+ required:
+ - resets
+
+ # If it doesn't have "port" node, it is "MSIOF-SPI"
+ - if:
+ not:
+ required:
+ - port
+ then:
+ allOf:
+ - $ref: spi-controller.yaml#
unevaluatedProperties: false
diff --git a/Documentation/devicetree/bindings/spi/samsung,spi.yaml b/Documentation/devicetree/bindings/spi/samsung,spi.yaml
index 3c206a64d60a..fe298d47b1a9 100644
--- a/Documentation/devicetree/bindings/spi/samsung,spi.yaml
+++ b/Documentation/devicetree/bindings/spi/samsung,spi.yaml
@@ -29,6 +29,7 @@ properties:
- items:
- enum:
- samsung,exynos8895-spi
+ - samsung,exynosautov920-spi
- const: samsung,exynos850-spi
- const: samsung,exynos7-spi
deprecated: true
diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
index 0bb443b8decd..8fc17e16efb2 100644
--- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
@@ -8,12 +8,13 @@ title: Peripheral-specific properties for a SPI bus.
description:
Many SPI controllers need to add properties to peripheral devices. They could
- be common properties like spi-max-frequency, spi-cpha, etc. or they could be
- controller specific like delay in clock or data lines, etc. These properties
- need to be defined in the peripheral node because they are per-peripheral and
- there can be multiple peripherals attached to a controller. All those
- properties are listed here. The controller specific properties should go in
- their own separate schema that should be referenced from here.
+ be common properties like spi-max-frequency, spi-cs-high, etc. or they could
+ be controller specific like delay in clock or data lines, etc. These
+ properties need to be defined in the peripheral node because they are
+ per-peripheral and there can be multiple peripherals attached to a
+ controller. All those properties are listed here. The controller specific
+ properties should go in their own separate schema that should be referenced
+ from here.
maintainers:
- Mark Brown <broonie@kernel.org>
diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml
index 104f5ffdd04e..748faf7f7081 100644
--- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml
@@ -34,6 +34,7 @@ properties:
- rockchip,rk3328-spi
- rockchip,rk3368-spi
- rockchip,rk3399-spi
+ - rockchip,rk3528-spi
- rockchip,rk3562-spi
- rockchip,rk3568-spi
- rockchip,rk3576-spi
diff --git a/Documentation/devicetree/bindings/spi/st,stm32mp25-ospi.yaml b/Documentation/devicetree/bindings/spi/st,stm32mp25-ospi.yaml
index 5f276f27dc4c..272bc308726b 100644
--- a/Documentation/devicetree/bindings/spi/st,stm32mp25-ospi.yaml
+++ b/Documentation/devicetree/bindings/spi/st,stm32mp25-ospi.yaml
@@ -68,6 +68,7 @@ required:
- compatible
- reg
- clocks
+ - resets
- interrupts
- st,syscfg-dlyb
diff --git a/Documentation/devicetree/bindings/thermal/airoha,en7581-thermal.yaml b/Documentation/devicetree/bindings/thermal/airoha,en7581-thermal.yaml
new file mode 100644
index 000000000000..ca0242ef0378
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/airoha,en7581-thermal.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/airoha,en7581-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Airoha EN7581 Thermal Sensor and Monitor
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+
+properties:
+ compatible:
+ const: airoha,en7581-thermal
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ airoha,chip-scu:
+ description: phandle to the chip SCU syscon
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ '#thermal-sensor-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - airoha,chip-scu
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ thermal-sensor@1efbd800 {
+ compatible = "airoha,en7581-thermal";
+ reg = <0x1efbd000 0xd5c>;
+ interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
+ airoha,chip-scu = <&chip_scu>;
+
+ #thermal-sensor-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
index f9d8012c8cf5..0e653bbe9884 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -39,6 +39,7 @@ properties:
- description: v1 of TSENS
items:
- enum:
+ - qcom,ipq5018-tsens
- qcom,msm8937-tsens
- qcom,msm8956-tsens
- qcom,msm8976-tsens
@@ -251,6 +252,7 @@ allOf:
compatible:
contains:
enum:
+ - qcom,ipq5018-tsens
- qcom,ipq8064-tsens
- qcom,msm8960-tsens
- qcom,tsens-v0_1
diff --git a/Documentation/devicetree/bindings/timer/altr,timer-1.0.txt b/Documentation/devicetree/bindings/timer/altr,timer-1.0.txt
deleted file mode 100644
index e698e3488735..000000000000
--- a/Documentation/devicetree/bindings/timer/altr,timer-1.0.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-Altera Timer
-
-Required properties:
-
-- compatible : should be "altr,timer-1.0"
-- reg : Specifies base physical address and size of the registers.
-- interrupts : Should contain the timer interrupt number
-- clock-frequency : The frequency of the clock that drives the counter, in Hz.
-
-Example:
-
-timer {
- compatible = "altr,timer-1.0";
- reg = <0x00400000 0x00000020>;
- interrupt-parent = <&cpu>;
- interrupts = <11>;
- clock-frequency = <125000000>;
-};
diff --git a/Documentation/devicetree/bindings/timer/altr,timer-1.0.yaml b/Documentation/devicetree/bindings/timer/altr,timer-1.0.yaml
new file mode 100644
index 000000000000..576260c72d42
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/altr,timer-1.0.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/altr,timer-1.0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Altera Timer
+
+maintainers:
+ - Dinh Nguyen <dinguyen@kernel.org>
+
+properties:
+ compatible:
+ const: altr,timer-1.0
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clock-frequency:
+ description: Frequency of the clock that drives the counter, in Hz.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@400000 {
+ compatible = "altr,timer-1.0";
+ reg = <0x00400000 0x00000020>;
+ interrupts = <11>;
+ clock-frequency = <125000000>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/arm,mps2-timer.txt b/Documentation/devicetree/bindings/timer/arm,mps2-timer.txt
deleted file mode 100644
index 48f84d74edde..000000000000
--- a/Documentation/devicetree/bindings/timer/arm,mps2-timer.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-ARM MPS2 timer
-
-The MPS2 platform has simple general-purpose 32 bits timers.
-
-Required properties:
-- compatible : Should be "arm,mps2-timer"
-- reg : Address and length of the register set
-- interrupts : Reference to the timer interrupt
-
-Required clocking property, have to be one of:
-- clocks : The input clock of the timer
-- clock-frequency : The rate in HZ in input of the ARM MPS2 timer
-
-Examples:
-
-timer1: mps2-timer@40000000 {
- compatible = "arm,mps2-timer";
- reg = <0x40000000 0x1000>;
- interrupts = <8>;
- clocks = <&sysclk>;
-};
-
-timer2: mps2-timer@40001000 {
- compatible = "arm,mps2-timer";
- reg = <0x40001000 0x1000>;
- interrupts = <9>;
- clock-frequency = <25000000>;
-};
diff --git a/Documentation/devicetree/bindings/timer/arm,mps2-timer.yaml b/Documentation/devicetree/bindings/timer/arm,mps2-timer.yaml
new file mode 100644
index 000000000000..64c6aedd7e8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/arm,mps2-timer.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm,mps2-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM MPS2 timer
+
+maintainers:
+ - Vladimir Murzin <vladimir.murzin@arm.com>
+
+description:
+ The MPS2 platform has simple general-purpose 32 bits timers.
+
+properties:
+ compatible:
+ const: arm,mps2-timer
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-frequency:
+ description: Rate in Hz of the timer input clock
+
+oneOf:
+ - required: [clocks]
+ - required: [clock-frequency]
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@40000000 {
+ compatible = "arm,mps2-timer";
+ reg = <0x40000000 0x1000>;
+ interrupts = <8>;
+ clocks = <&sysclk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.txt b/Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.txt
deleted file mode 100644
index d4c62e7b1714..000000000000
--- a/Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-* Cirrus Logic CLPS711X Timer Counter
-
-Required properties:
-- compatible: Shall contain "cirrus,ep7209-timer".
-- reg : Address and length of the register set.
-- interrupts: The interrupt number of the timer.
-- clocks : phandle of timer reference clock.
-
-Note: Each timer should have an alias correctly numbered in "aliases" node.
-
-Example:
- aliases {
- timer0 = &timer1;
- timer1 = &timer2;
- };
-
- timer1: timer@80000300 {
- compatible = "cirrus,ep7312-timer", "cirrus,ep7209-timer";
- reg = <0x80000300 0x4>;
- interrupts = <8>;
- clocks = <&clks 5>;
- };
-
- timer2: timer@80000340 {
- compatible = "cirrus,ep7312-timer", "cirrus,ep7209-timer";
- reg = <0x80000340 0x4>;
- interrupts = <9>;
- clocks = <&clks 6>;
- };
diff --git a/Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.yaml b/Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.yaml
new file mode 100644
index 000000000000..507b777e16bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/cirrus,clps711x-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cirrus Logic CLPS711X Timer Counter
+
+maintainers:
+ - Alexander Shiyan <shc_work@mail.ru>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - cirrus,ep7312-timer
+ - const: cirrus,ep7209-timer
+ - const: cirrus,ep7209-timer
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@80000300 {
+ compatible = "cirrus,ep7312-timer", "cirrus,ep7209-timer";
+ reg = <0x80000300 0x4>;
+ interrupts = <8>;
+ clocks = <&clks 5>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/cnxt,cx92755-timer.yaml b/Documentation/devicetree/bindings/timer/cnxt,cx92755-timer.yaml
new file mode 100644
index 000000000000..8f1a5af32a36
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/cnxt,cx92755-timer.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cnxt,cx92755-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Conexant Digicolor SoCs Timer Controller
+
+maintainers:
+ - Baruch Siach <baruch@tkos.co.il>
+
+properties:
+ compatible:
+ const: cnxt,cx92755-timer
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: Contains 8 interrupts, one for each timer
+ items:
+ - description: interrupt for timer 0
+ - description: interrupt for timer 1
+ - description: interrupt for timer 2
+ - description: interrupt for timer 3
+ - description: interrupt for timer 4
+ - description: interrupt for timer 5
+ - description: interrupt for timer 6
+ - description: interrupt for timer 7
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@f0000fc0 {
+ compatible = "cnxt,cx92755-timer";
+ reg = <0xf0000fc0 0x40>;
+ interrupts = <19>, <31>, <34>, <35>, <52>, <53>, <54>, <55>;
+ clocks = <&main_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt b/Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt
deleted file mode 100644
index 6b04344f4bea..000000000000
--- a/Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-=================
-gx6605s SOC Timer
-=================
-
-The timer is used in gx6605s soc as system timer and the driver
-contain clk event and clk source.
-
-==============================
-timer node bindings definition
-==============================
-
- Description: Describes gx6605s SOC timer
-
- PROPERTIES
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: must be "csky,gx6605s-timer"
- - reg
- Usage: required
- Value type: <u32 u32>
- Definition: <phyaddr size> in soc from cpu view
- - clocks
- Usage: required
- Value type: phandle + clock specifier cells
- Definition: must be input clk node
- - interrupt
- Usage: required
- Value type: <u32>
- Definition: must be timer irq num defined by soc
-
-Examples:
----------
-
- timer0: timer@20a000 {
- compatible = "csky,gx6605s-timer";
- reg = <0x0020a000 0x400>;
- clocks = <&dummy_apb_clk>;
- interrupts = <10>;
- interrupt-parent = <&intc>;
- };
diff --git a/Documentation/devicetree/bindings/timer/csky,gx6605s-timer.yaml b/Documentation/devicetree/bindings/timer/csky,gx6605s-timer.yaml
new file mode 100644
index 000000000000..888fc8113996
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/csky,gx6605s-timer.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/csky,gx6605s-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: gx6605s SOC Timer
+
+maintainers:
+ - Guo Ren <guoren@kernel.org>
+
+properties:
+ compatible:
+ const: csky,gx6605s-timer
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@20a000 {
+ compatible = "csky,gx6605s-timer";
+ reg = <0x0020a000 0x400>;
+ clocks = <&dummy_apb_clk>;
+ interrupts = <10>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/csky,mptimer.txt b/Documentation/devicetree/bindings/timer/csky,mptimer.txt
deleted file mode 100644
index f5c7e99cf52b..000000000000
--- a/Documentation/devicetree/bindings/timer/csky,mptimer.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-============================
-C-SKY Multi-processors Timer
-============================
-
-C-SKY multi-processors timer is designed for C-SKY SMP system and the
-regs is accessed by cpu co-processor 4 registers with mtcr/mfcr.
-
- - PTIM_CTLR "cr<0, 14>" Control reg to start reset timer.
- - PTIM_TSR "cr<1, 14>" Interrupt cleanup status reg.
- - PTIM_CCVR "cr<3, 14>" Current counter value reg.
- - PTIM_LVR "cr<6, 14>" Window value reg to trigger next event.
-
-==============================
-timer node bindings definition
-==============================
-
- Description: Describes SMP timer
-
- PROPERTIES
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: must be "csky,mptimer"
- - clocks
- Usage: required
- Value type: <node>
- Definition: must be input clk node
- - interrupts
- Usage: required
- Value type: <u32>
- Definition: must be timer irq num defined by soc
-
-Examples:
----------
-
- timer: timer {
- compatible = "csky,mptimer";
- clocks = <&dummy_apb_clk>;
- interrupts = <16>;
- interrupt-parent = <&intc>;
- };
diff --git a/Documentation/devicetree/bindings/timer/csky,mptimer.yaml b/Documentation/devicetree/bindings/timer/csky,mptimer.yaml
new file mode 100644
index 000000000000..12cc5282c8f8
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/csky,mptimer.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/csky,mptimer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: C-SKY Multi-processors Timer
+
+maintainers:
+ - Flavio Suligoi <f.suligoi@asem.it>
+ - Guo Ren <guoren@kernel.org>
+
+description: |
+ C-SKY multi-processors timer is designed for C-SKY SMP system and the regs are
+ accessed by cpu co-processor 4 registers with mtcr/mfcr.
+
+ - PTIM_CTLR "cr<0, 14>" Control reg to start reset timer.
+ - PTIM_TSR "cr<1, 14>" Interrupt cleanup status reg.
+ - PTIM_CCVR "cr<3, 14>" Current counter value reg.
+ - PTIM_LVR "cr<6, 14>" Window value reg to trigger next event.
+
+properties:
+ compatible:
+ items:
+ - const: csky,mptimer
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ timer {
+ compatible = "csky,mptimer";
+ clocks = <&dummy_apb_clk>;
+ interrupts = <16>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/digicolor-timer.txt b/Documentation/devicetree/bindings/timer/digicolor-timer.txt
deleted file mode 100644
index d1b659bbc29f..000000000000
--- a/Documentation/devicetree/bindings/timer/digicolor-timer.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-Conexant Digicolor SoCs Timer Controller
-
-Required properties:
-
-- compatible : should be "cnxt,cx92755-timer"
-- reg : Specifies base physical address and size of the "Agent Communication"
- timer registers
-- interrupts : Contains 8 interrupts, one for each timer
-- clocks: phandle to the main clock
-
-Example:
-
- timer@f0000fc0 {
- compatible = "cnxt,cx92755-timer";
- reg = <0xf0000fc0 0x40>;
- interrupts = <19>, <31>, <34>, <35>, <52>, <53>, <54>, <55>;
- clocks = <&main_clk>;
- };
diff --git a/Documentation/devicetree/bindings/timer/econet,en751221-timer.yaml b/Documentation/devicetree/bindings/timer/econet,en751221-timer.yaml
new file mode 100644
index 000000000000..c1e7c2b6afde
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/econet,en751221-timer.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/econet,en751221-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: EcoNet EN751221 High Precision Timer (HPT)
+
+maintainers:
+ - Caleb James DeLisle <cjd@cjdns.fr>
+
+description:
+ The EcoNet High Precision Timer (HPT) is a timer peripheral found in various
+ EcoNet SoCs, including the EN751221 and EN751627 families. It provides per-VPE
+ count/compare registers and a per-CPU control register, with a single interrupt
+ line using a percpu-devid interrupt mechanism.
+
+properties:
+ compatible:
+ oneOf:
+ - const: econet,en751221-timer
+ - items:
+ - const: econet,en751627-timer
+ - const: econet,en751221-timer
+
+ reg:
+ minItems: 1
+ maxItems: 2
+
+ interrupts:
+ maxItems: 1
+ description: A percpu-devid timer interrupt shared across CPUs.
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: econet,en751627-timer
+ then:
+ properties:
+ reg:
+ items:
+ - description: VPE timers 0 and 1
+ - description: VPE timers 2 and 3
+ else:
+ properties:
+ reg:
+ items:
+ - description: VPE timers 0 and 1
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@1fbf0400 {
+ compatible = "econet,en751627-timer", "econet,en751221-timer";
+ reg = <0x1fbf0400 0x100>, <0x1fbe0000 0x100>;
+ interrupt-parent = <&intc>;
+ interrupts = <30>;
+ clocks = <&hpt_clock>;
+ };
+ - |
+ timer@1fbf0400 {
+ compatible = "econet,en751221-timer";
+ reg = <0x1fbe0400 0x100>;
+ interrupt-parent = <&intc>;
+ interrupts = <30>;
+ clocks = <&hpt_clock>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.yaml b/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.yaml
new file mode 100644
index 000000000000..317c5010c4c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ezchip,nps400-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: EZChip NPS400 Timers
+
+maintainers:
+ - Noam Camus <noamca@mellanox.com>
+
+properties:
+ compatible:
+ enum:
+ - ezchip,nps400-timer0
+ - ezchip,nps400-timer1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: ezchip,nps400-timer0
+ then:
+ required: [ interrupts ]
+
+examples:
+ - |
+ timer {
+ compatible = "ezchip,nps400-timer0";
+ interrupts = <3>;
+ clocks = <&sysclk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/ezchip,nps400-timer0.txt b/Documentation/devicetree/bindings/timer/ezchip,nps400-timer0.txt
deleted file mode 100644
index e3cfce8fecc5..000000000000
--- a/Documentation/devicetree/bindings/timer/ezchip,nps400-timer0.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-NPS Network Processor
-
-Required properties:
-
-- compatible : should be "ezchip,nps400-timer0"
-
-Clocks required for compatible = "ezchip,nps400-timer0":
-- interrupts : The interrupt of the first timer
-- clocks : Must contain a single entry describing the clock input
-
-Example:
-
-timer {
- compatible = "ezchip,nps400-timer0";
- interrupts = <3>;
- clocks = <&sysclk>;
-};
diff --git a/Documentation/devicetree/bindings/timer/ezchip,nps400-timer1.txt b/Documentation/devicetree/bindings/timer/ezchip,nps400-timer1.txt
deleted file mode 100644
index c0ab4190b8fb..000000000000
--- a/Documentation/devicetree/bindings/timer/ezchip,nps400-timer1.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-NPS Network Processor
-
-Required properties:
-
-- compatible : should be "ezchip,nps400-timer1"
-
-Clocks required for compatible = "ezchip,nps400-timer1":
-- clocks : Must contain a single entry describing the clock input
-
-Example:
-
-timer {
- compatible = "ezchip,nps400-timer1";
- clocks = <&sysclk>;
-};
diff --git a/Documentation/devicetree/bindings/timer/fsl,gtm.txt b/Documentation/devicetree/bindings/timer/fsl,gtm.txt
deleted file mode 100644
index fc1c571f7412..000000000000
--- a/Documentation/devicetree/bindings/timer/fsl,gtm.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-* Freescale General-purpose Timers Module
-
-Required properties:
- - compatible : should be
- "fsl,<chip>-gtm", "fsl,gtm" for SOC GTMs
- "fsl,<chip>-qe-gtm", "fsl,qe-gtm", "fsl,gtm" for QE GTMs
- "fsl,<chip>-cpm2-gtm", "fsl,cpm2-gtm", "fsl,gtm" for CPM2 GTMs
- - reg : should contain gtm registers location and length (0x40).
- - interrupts : should contain four interrupts.
- - clock-frequency : specifies the frequency driving the timer.
-
-Example:
-
-timer@500 {
- compatible = "fsl,mpc8360-gtm", "fsl,gtm";
- reg = <0x500 0x40>;
- interrupts = <90 8 78 8 84 8 72 8>;
- interrupt-parent = <&ipic>;
- /* filled by u-boot */
- clock-frequency = <0>;
-};
-
-timer@440 {
- compatible = "fsl,mpc8360-qe-gtm", "fsl,qe-gtm", "fsl,gtm";
- reg = <0x440 0x40>;
- interrupts = <12 13 14 15>;
- interrupt-parent = <&qeic>;
- /* filled by u-boot */
- clock-frequency = <0>;
-};
diff --git a/Documentation/devicetree/bindings/timer/fsl,gtm.yaml b/Documentation/devicetree/bindings/timer/fsl,gtm.yaml
new file mode 100644
index 000000000000..1f35f1ee0be2
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/fsl,gtm.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/fsl,gtm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale General-purpose Timers Module
+
+maintainers:
+ - J. Neuschäfer <j.ne@posteo.net>
+
+properties:
+ compatible:
+ oneOf:
+ # for SoC GTMs
+ - items:
+ - enum:
+ - fsl,mpc8308-gtm
+ - fsl,mpc8313-gtm
+ - fsl,mpc8315-gtm
+ - fsl,mpc8360-gtm
+ - const: fsl,gtm
+
+ # for QE GTMs
+ - items:
+ - enum:
+ - fsl,mpc8360-qe-gtm
+ - fsl,mpc8569-qe-gtm
+ - const: fsl,qe-gtm
+ - const: fsl,gtm
+
+ # for CPM2 GTMs (no known examples)
+ - items:
+ # - enum:
+ # - fsl,<chip>-cpm2-gtm
+ - const: fsl,cpm2-gtm
+ - const: fsl,gtm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: Interrupt for timer 1 (e.g. GTM1 or GTM5)
+ - description: Interrupt for timer 2 (e.g. GTM2 or GTM6)
+ - description: Interrupt for timer 3 (e.g. GTM3 or GTM7)
+ - description: Interrupt for timer 4 (e.g. GTM4 or GTM8)
+
+ clock-frequency: true
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clock-frequency
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ timer@500 {
+ compatible = "fsl,mpc8360-gtm", "fsl,gtm";
+ reg = <0x500 0x40>;
+ interrupts = <90 IRQ_TYPE_LEVEL_LOW>,
+ <78 IRQ_TYPE_LEVEL_LOW>,
+ <84 IRQ_TYPE_LEVEL_LOW>,
+ <72 IRQ_TYPE_LEVEL_LOW>;
+ /* filled by u-boot */
+ clock-frequency = <0>;
+ };
+
+ - |
+ timer@440 {
+ compatible = "fsl,mpc8360-qe-gtm", "fsl,qe-gtm", "fsl,gtm";
+ reg = <0x440 0x40>;
+ interrupts = <12>, <13>, <14>, <15>;
+ /* filled by u-boot */
+ clock-frequency = <0>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/timer/img,pistachio-gptimer.txt b/Documentation/devicetree/bindings/timer/img,pistachio-gptimer.txt
deleted file mode 100644
index 7afce80bf6a0..000000000000
--- a/Documentation/devicetree/bindings/timer/img,pistachio-gptimer.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-* Pistachio general-purpose timer based clocksource
-
-Required properties:
- - compatible: "img,pistachio-gptimer".
- - reg: Address range of the timer registers.
- - interrupts: An interrupt for each of the four timers
- - clocks: Should contain a clock specifier for each entry in clock-names
- - clock-names: Should contain the following entries:
- "sys", interface clock
- "slow", slow counter clock
- "fast", fast counter clock
- - img,cr-periph: Must contain a phandle to the peripheral control
- syscon node.
-
-Example:
- timer: timer@18102000 {
- compatible = "img,pistachio-gptimer";
- reg = <0x18102000 0x100>;
- interrupts = <GIC_SHARED 60 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SHARED 61 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SHARED 62 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SHARED 63 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk_periph PERIPH_CLK_COUNTER_FAST>,
- <&clk_periph PERIPH_CLK_COUNTER_SLOW>,
- <&cr_periph SYS_CLK_TIMER>;
- clock-names = "fast", "slow", "sys";
- img,cr-periph = <&cr_periph>;
- };
diff --git a/Documentation/devicetree/bindings/timer/img,pistachio-gptimer.yaml b/Documentation/devicetree/bindings/timer/img,pistachio-gptimer.yaml
new file mode 100644
index 000000000000..a8654bcf68a9
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/img,pistachio-gptimer.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/img,pistachio-gptimer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Pistachio general-purpose timer
+
+maintainers:
+ - Ezequiel Garcia <ezequiel.garcia@imgtec.com>
+
+properties:
+ compatible:
+ const: img,pistachio-gptimer
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: Timer0 interrupt
+ - description: Timer1 interrupt
+ - description: Timer2 interrupt
+ - description: Timer3 interrupt
+
+ clocks:
+ items:
+ - description: Fast counter clock
+ - description: Slow counter clock
+ - description: Interface clock
+
+ clock-names:
+ items:
+ - const: fast
+ - const: slow
+ - const: sys
+
+ img,cr-periph:
+ description: Peripheral control syscon phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - img,cr-periph
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/mips-gic.h>
+ #include <dt-bindings/clock/pistachio-clk.h>
+
+ timer@18102000 {
+ compatible = "img,pistachio-gptimer";
+ reg = <0x18102000 0x100>;
+ interrupts = <GIC_SHARED 60 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 61 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 62 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 63 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk_periph PERIPH_CLK_COUNTER_FAST>,
+ <&clk_periph PERIPH_CLK_COUNTER_SLOW>,
+ <&cr_periph SYS_CLK_TIMER>;
+ clock-names = "fast", "slow", "sys";
+ img,cr-periph = <&cr_periph>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/jcore,pit.txt b/Documentation/devicetree/bindings/timer/jcore,pit.txt
deleted file mode 100644
index af5dd35469d7..000000000000
--- a/Documentation/devicetree/bindings/timer/jcore,pit.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-J-Core Programmable Interval Timer and Clocksource
-
-Required properties:
-
-- compatible: Must be "jcore,pit".
-
-- reg: Memory region(s) for timer/clocksource registers. For SMP,
- there should be one region per cpu, indexed by the sequential,
- zero-based hardware cpu number.
-
-- interrupts: An interrupt to assign for the timer. The actual pit
- core is integrated with the aic and allows the timer interrupt
- assignment to be programmed by software, but this property is
- required in order to reserve an interrupt number that doesn't
- conflict with other devices.
-
-
-Example:
-
-timer@200 {
- compatible = "jcore,pit";
- reg = < 0x200 0x30 0x500 0x30 >;
- interrupts = < 0x48 >;
-};
diff --git a/Documentation/devicetree/bindings/timer/jcore,pit.yaml b/Documentation/devicetree/bindings/timer/jcore,pit.yaml
new file mode 100644
index 000000000000..9e6e25b75293
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/jcore,pit.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/jcore,pit.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: J-Core Programmable Interval Timer and Clocksource
+
+maintainers:
+ - Rich Felker <dalias@libc.org>
+
+properties:
+ compatible:
+ const: jcore,pit
+
+ reg:
+ description:
+ Memory region(s) for timer/clocksource registers. For SMP, there should be
+ one region per cpu, indexed by the sequential, zero-based hardware cpu
+ number.
+
+ interrupts:
+ description:
+ An interrupt to assign for the timer. The actual pit core is integrated
+ with the aic and allows the timer interrupt assignment to be programmed by
+ software, but this property is required in order to reserve an interrupt
+ number that doesn't conflict with other devices.
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@200 {
+ compatible = "jcore,pit";
+ reg = <0x200 0x30 0x500 0x30>;
+ interrupts = <0x48>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt b/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt
deleted file mode 100644
index b2d07ad90e9a..000000000000
--- a/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-TI-NSPIRE timer
-
-Required properties:
-
-- compatible : should be "lsi,zevio-timer".
-- reg : The physical base address and size of the timer (always first).
-- clocks: phandle to the source clock.
-
-Optional properties:
-
-- interrupts : The interrupt number of the first timer.
-- reg : The interrupt acknowledgement registers
- (always after timer base address)
-
-If any of the optional properties are not given, the timer is added as a
-clock-source only.
-
-Example:
-
-timer {
- compatible = "lsi,zevio-timer";
- reg = <0x900D0000 0x1000>, <0x900A0020 0x8>;
- interrupts = <19>;
- clocks = <&timer_clk>;
-};
-
-Example (no clock-events):
-
-timer {
- compatible = "lsi,zevio-timer";
- reg = <0x900D0000 0x1000>;
- clocks = <&timer_clk>;
-};
diff --git a/Documentation/devicetree/bindings/timer/lsi,zevio-timer.yaml b/Documentation/devicetree/bindings/timer/lsi,zevio-timer.yaml
new file mode 100644
index 000000000000..358455d8e7a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/lsi,zevio-timer.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/lsi,zevio-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI-NSPIRE timer
+
+maintainers:
+ - Daniel Tang <dt.tangr@gmail.com>
+
+properties:
+ compatible:
+ const: lsi,zevio-timer
+
+ reg:
+ minItems: 1
+ items:
+ - description: Timer registers
+ - description: Interrupt acknowledgement registers (optional)
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+
+allOf:
+ - if:
+ required: [ interrupts ]
+ then:
+ properties:
+ reg:
+ minItems: 2
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@900d0000 {
+ compatible = "lsi,zevio-timer";
+ reg = <0x900D0000 0x1000>, <0x900A0020 0x8>;
+ interrupts = <19>;
+ clocks = <&timer_clk>;
+ };
+ - |
+ timer@900d0000 {
+ compatible = "lsi,zevio-timer";
+ reg = <0x900D0000 0x1000>;
+ clocks = <&timer_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/marvell,armada-370-timer.yaml b/Documentation/devicetree/bindings/timer/marvell,armada-370-timer.yaml
new file mode 100644
index 000000000000..bc0677fe86eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/marvell,armada-370-timer.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/marvell,armada-370-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Armada 370, 375, 380 and XP Timers
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: marvell,armada-380-timer
+ - const: marvell,armada-xp-timer
+ - items:
+ - const: marvell,armada-375-timer
+ - const: marvell,armada-370-timer
+ - enum:
+ - marvell,armada-370-timer
+ - marvell,armada-xp-timer
+
+ reg:
+ items:
+ - description: Global timer registers
+ - description: Local/private timer registers
+
+ interrupts:
+ items:
+ - description: Global timer interrupt 0
+ - description: Global timer interrupt 1
+ - description: Global timer interrupt 2
+ - description: Global timer interrupt 3
+ - description: First private timer interrupt
+ - description: Second private timer interrupt
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: nbclk
+ - const: fixed
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - marvell,armada-375-timer
+ - marvell,armada-xp-timer
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ clock-names:
+ minItems: 2
+ required:
+ - clock-names
+ else:
+ properties:
+ clocks:
+ maxItems: 1
+ clock-names:
+ maxItems: 1
+
+examples:
+ - |
+ timer@20300 {
+ compatible = "marvell,armada-xp-timer";
+ reg = <0x20300 0x30>, <0x21040 0x30>;
+ interrupts = <37>, <38>, <39>, <40>, <5>, <6>;
+ clocks = <&coreclk 2>, <&refclk>;
+ clock-names = "nbclk", "fixed";
+ };
diff --git a/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt b/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt
deleted file mode 100644
index e9c78ce880e6..000000000000
--- a/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Marvell Armada 370 and Armada XP Timers
----------------------------------------
-
-Required properties:
-- compatible: Should be one of the following
- "marvell,armada-370-timer",
- "marvell,armada-375-timer",
- "marvell,armada-xp-timer".
-- interrupts: Should contain the list of Global Timer interrupts and
- then local timer interrupts
-- reg: Should contain location and length for timers register. First
- pair for the Global Timer registers, second pair for the
- local/private timers.
-
-Clocks required for compatible = "marvell,armada-370-timer":
-- clocks : Must contain a single entry describing the clock input
-
-Clocks required for compatibles = "marvell,armada-xp-timer",
- "marvell,armada-375-timer":
-- clocks : Must contain an entry for each entry in clock-names.
-- clock-names : Must include the following entries:
- "nbclk" (L2/coherency fabric clock),
- "fixed" (Reference 25 MHz fixed-clock).
-
-Examples:
-
-- Armada 370:
-
- timer {
- compatible = "marvell,armada-370-timer";
- reg = <0x20300 0x30>, <0x21040 0x30>;
- interrupts = <37>, <38>, <39>, <40>, <5>, <6>;
- clocks = <&coreclk 2>;
- };
-
-- Armada XP:
-
- timer {
- compatible = "marvell,armada-xp-timer";
- reg = <0x20300 0x30>, <0x21040 0x30>;
- interrupts = <37>, <38>, <39>, <40>, <5>, <6>;
- clocks = <&coreclk 2>, <&refclk>;
- clock-names = "nbclk", "fixed";
- };
diff --git a/Documentation/devicetree/bindings/timer/marvell,orion-timer.txt b/Documentation/devicetree/bindings/timer/marvell,orion-timer.txt
deleted file mode 100644
index cd1a0c256f94..000000000000
--- a/Documentation/devicetree/bindings/timer/marvell,orion-timer.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Marvell Orion SoC timer
-
-Required properties:
-- compatible: shall be "marvell,orion-timer"
-- reg: base address of the timer register starting with TIMERS CONTROL register
-- interrupts: should contain the interrupts for Timer0 and Timer1
-- clocks: phandle of timer reference clock (tclk)
-
-Example:
- timer: timer {
- compatible = "marvell,orion-timer";
- reg = <0x20300 0x20>;
- interrupt-parent = <&bridge_intc>;
- interrupts = <1>, <2>;
- clocks = <&core_clk 0>;
- };
diff --git a/Documentation/devicetree/bindings/timer/marvell,orion-timer.yaml b/Documentation/devicetree/bindings/timer/marvell,orion-timer.yaml
new file mode 100644
index 000000000000..f973afffa5ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/marvell,orion-timer.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/marvell,orion-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Orion SoC timer
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ compatible:
+ const: marvell,orion-timer
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: Timer0 interrupt
+ - description: Timer1 interrupt
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@20300 {
+ compatible = "marvell,orion-timer";
+ reg = <0x20300 0x20>;
+ interrupts = <1>, <2>;
+ clocks = <&core_clk 0>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/nxp,s32g2-stm.yaml b/Documentation/devicetree/bindings/timer/nxp,s32g2-stm.yaml
new file mode 100644
index 000000000000..b44b9794bb85
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nxp,s32g2-stm.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/nxp,s32g2-stm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP System Timer Module (STM)
+
+maintainers:
+ - Daniel Lezcano <daniel.lezcano@kernel.org>
+
+description:
+ The System Timer Module supports commonly required system and application
+ software timing functions. STM includes a 32-bit count-up timer and four
+ 32-bit compare channels with a separate interrupt source for each channel.
+ The timer is driven by the STM module clock divided by an 8-bit prescale
+ value.
+
+properties:
+ compatible:
+ oneOf:
+ - const: nxp,s32g2-stm
+ - items:
+ - const: nxp,s32g3-stm
+ - const: nxp,s32g2-stm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Counter clock
+ - description: Module clock
+ - description: Register clock
+
+ clock-names:
+ items:
+ - const: counter
+ - const: module
+ - const: register
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ timer@4011c000 {
+ compatible = "nxp,s32g2-stm";
+ reg = <0x4011c000 0x3000>;
+ interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks 0x3b>, <&clks 0x3c>, <&clks 0x3c>;
+ clock-names = "counter", "module", "register";
+ };
diff --git a/Documentation/devicetree/bindings/timer/renesas,ostm.yaml b/Documentation/devicetree/bindings/timer/renesas,ostm.yaml
index 9ba858f094ab..0983c1efec80 100644
--- a/Documentation/devicetree/bindings/timer/renesas,ostm.yaml
+++ b/Documentation/devicetree/bindings/timer/renesas,ostm.yaml
@@ -26,6 +26,7 @@ properties:
- renesas,r9a07g043-ostm # RZ/G2UL and RZ/Five
- renesas,r9a07g044-ostm # RZ/G2{L,LC}
- renesas,r9a07g054-ostm # RZ/V2L
+ - renesas,r9a09g056-ostm # RZ/V2N
- renesas,r9a09g057-ostm # RZ/V2H(P)
- const: renesas,ostm # Generic
@@ -54,12 +55,11 @@ required:
if:
properties:
compatible:
- contains:
- enum:
- - renesas,r9a07g043-ostm
- - renesas,r9a07g044-ostm
- - renesas,r9a07g054-ostm
- - renesas,r9a09g057-ostm
+ not:
+ contains:
+ enum:
+ - renesas,r7s72100-ostm
+ - renesas,r7s9210-ostm
then:
required:
- resets
diff --git a/Documentation/devicetree/bindings/timer/sifive,clint.yaml b/Documentation/devicetree/bindings/timer/sifive,clint.yaml
index 653e2e0ca878..d85a1a088b35 100644
--- a/Documentation/devicetree/bindings/timer/sifive,clint.yaml
+++ b/Documentation/devicetree/bindings/timer/sifive,clint.yaml
@@ -30,6 +30,7 @@ properties:
- items:
- enum:
- canaan,k210-clint # Canaan Kendryte K210
+ - eswin,eic7700-clint # ESWIN EIC7700
- sifive,fu540-c000-clint # SiFive FU540
- spacemit,k1-clint # SpacemiT K1
- starfive,jh7100-clint # StarFive JH7100
diff --git a/Documentation/devicetree/bindings/timer/snps,arc-timer.txt b/Documentation/devicetree/bindings/timer/snps,arc-timer.txt
deleted file mode 100644
index b02ab0af10ce..000000000000
--- a/Documentation/devicetree/bindings/timer/snps,arc-timer.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Synopsys ARC Local Timer with Interrupt Capabilities
-- Found on all ARC CPUs (ARC700/ARCHS)
-- Can be optionally programmed to interrupt on Limit
-- Two identical copies TIMER0 and TIMER1 exist in ARC cores and historically
- TIMER0 used as clockevent provider (true for all ARC cores)
- TIMER1 used for clocksource (mandatory for ARC700, optional for ARC HS)
-
-Required properties:
-
-- compatible : should be "snps,arc-timer"
-- interrupts : single Interrupt going into parent intc
- (16 for ARCHS cores, 3 for ARC700 cores)
-- clocks : phandle to the source clock
-
-Example:
-
- timer0 {
- compatible = "snps,arc-timer";
- interrupts = <3>;
- interrupt-parent = <&core_intc>;
- clocks = <&core_clk>;
- };
-
- timer1 {
- compatible = "snps,arc-timer";
- clocks = <&core_clk>;
- };
diff --git a/Documentation/devicetree/bindings/timer/snps,arc-timer.yaml b/Documentation/devicetree/bindings/timer/snps,arc-timer.yaml
new file mode 100644
index 000000000000..0d1e37db6f8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/snps,arc-timer.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/snps,arc-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys ARC Local Timer
+
+maintainers:
+ - Vineet Gupta <vgupta@synopsys.com>
+
+description: >
+ Synopsys ARC Local Timer with Interrupt Capabilities
+
+ - Found on all ARC CPUs (ARC700/ARCHS)
+ - Can be optionally programmed to interrupt on Limit
+ - Two identical copies TIMER0 and TIMER1 exist in ARC cores and historically
+ TIMER0 used as clockevent provider (true for all ARC cores)
+ TIMER1 used for clocksource (mandatory for ARC700, optional for ARC HS)
+
+properties:
+ compatible:
+ const: snps,arc-timer
+
+ interrupts:
+ maxItems: 1
+ description: A single timer interrupt going into the parent interrupt controller.
+ Use <16> for ARCHS cores, <3> for ARC700 cores.
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ timer0 {
+ compatible = "snps,arc-timer";
+ interrupts = <3>;
+ clocks = <&core_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt b/Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt
deleted file mode 100644
index b6cd1b3922de..000000000000
--- a/Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Synopsys ARC Free Running 64-bit Global Timer for ARC HS CPUs
-- clocksource provider for SMP SoC
-
-Required properties:
-
-- compatible : should be "snps,archs-gfrc"
-- clocks : phandle to the source clock
-
-Example:
-
- gfrc {
- compatible = "snps,archs-gfrc";
- clocks = <&core_clk>;
- };
diff --git a/Documentation/devicetree/bindings/timer/snps,archs-gfrc.yaml b/Documentation/devicetree/bindings/timer/snps,archs-gfrc.yaml
new file mode 100644
index 000000000000..fb16f4aba1c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/snps,archs-gfrc.yaml
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/snps,archs-gfrc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys ARC Free Running 64-bit Global Timer for ARC HS CPUs
+
+maintainers:
+ - Vineet Gupta <vgupta@synopsys.com>
+
+properties:
+ compatible:
+ const: snps,archs-gfrc
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ timer {
+ compatible = "snps,archs-gfrc";
+ clocks = <&core_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/snps,archs-rtc.txt b/Documentation/devicetree/bindings/timer/snps,archs-rtc.txt
deleted file mode 100644
index 47bd7a702f3f..000000000000
--- a/Documentation/devicetree/bindings/timer/snps,archs-rtc.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Synopsys ARC Free Running 64-bit Local Timer for ARC HS CPUs
-- clocksource provider for UP SoC
-
-Required properties:
-
-- compatible : should be "snps,archs-rtc"
-- clocks : phandle to the source clock
-
-Example:
-
- rtc {
- compatible = "snps,arc-rtc";
- clocks = <&core_clk>;
- };
diff --git a/Documentation/devicetree/bindings/timer/snps,archs-rtc.yaml b/Documentation/devicetree/bindings/timer/snps,archs-rtc.yaml
new file mode 100644
index 000000000000..7478810eb24a
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/snps,archs-rtc.yaml
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/snps,archs-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys ARC Free Running 64-bit Local Timer for ARC HS CPUs
+
+maintainers:
+ - Vineet Gupta <vgupta@synopsys.com>
+
+properties:
+ compatible:
+ const: snps,archs-rtc
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ rtc {
+ compatible = "snps,archs-rtc";
+ clocks = <&core_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/socionext,milbeaut-timer.txt b/Documentation/devicetree/bindings/timer/socionext,milbeaut-timer.txt
deleted file mode 100644
index ac44c4b67530..000000000000
--- a/Documentation/devicetree/bindings/timer/socionext,milbeaut-timer.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Milbeaut SoCs Timer Controller
-
-Required properties:
-
-- compatible : should be "socionext,milbeaut-timer".
-- reg : Specifies base physical address and size of the registers.
-- interrupts : The interrupt of the first timer.
-- clocks: phandle to the input clk.
-
-Example:
-
-timer {
- compatible = "socionext,milbeaut-timer";
- reg = <0x1e000050 0x20>
- interrupts = <0 91 4>;
- clocks = <&clk 4>;
-};
diff --git a/Documentation/devicetree/bindings/timer/socionext,milbeaut-timer.yaml b/Documentation/devicetree/bindings/timer/socionext,milbeaut-timer.yaml
new file mode 100644
index 000000000000..9ab72b762314
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/socionext,milbeaut-timer.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/socionext,milbeaut-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Milbeaut SoCs Timer Controller
+
+maintainers:
+ - Sugaya Taichi <sugaya.taichi@socionext.com>
+
+properties:
+ compatible:
+ const: socionext,milbeaut-timer
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@1e000050 {
+ compatible = "socionext,milbeaut-timer";
+ reg = <0x1e000050 0x20>;
+ interrupts = <0 91 4>;
+ clocks = <&clk 4>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/st,spear-timer.txt b/Documentation/devicetree/bindings/timer/st,spear-timer.txt
deleted file mode 100644
index b5238a07da17..000000000000
--- a/Documentation/devicetree/bindings/timer/st,spear-timer.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-* SPEAr ARM Timer
-
-** Timer node required properties:
-
-- compatible : Should be:
- "st,spear-timer"
-- reg: Address range of the timer registers
-- interrupt: Should contain the timer interrupt number
-
-Example:
-
- timer@f0000000 {
- compatible = "st,spear-timer";
- reg = <0xf0000000 0x400>;
- interrupts = <2>;
- };
diff --git a/Documentation/devicetree/bindings/timer/st,spear-timer.yaml b/Documentation/devicetree/bindings/timer/st,spear-timer.yaml
new file mode 100644
index 000000000000..9f26b5f2b38a
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/st,spear-timer.yaml
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/st,spear-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SPEAr ARM Timer
+
+maintainers:
+ - Viresh Kumar <vireshk@kernel.org>
+ - Shiraz Hashim <shiraz.linux.kernel@gmail.com>
+
+properties:
+ compatible:
+ const: st,spear-timer
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@f0000000 {
+ compatible = "st,spear-timer";
+ reg = <0xf0000000 0x400>;
+ interrupts = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml b/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml
index 2e92bcdeb423..4ed30efe4052 100644
--- a/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml
+++ b/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml
@@ -14,6 +14,7 @@ properties:
items:
- enum:
- sophgo,sg2042-aclint-mtimer
+ - sophgo,sg2044-aclint-mtimer
- const: thead,c900-aclint-mtimer
reg:
diff --git a/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt b/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt
deleted file mode 100644
index d3905a5412b8..000000000000
--- a/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-* Device tree bindings for Texas instruments Keystone timer
-
-This document provides bindings for the 64-bit timer in the KeyStone
-architecture devices. The timer can be configured as a general-purpose 64-bit
-timer, dual general-purpose 32-bit timers. When configured as dual 32-bit
-timers, each half can operate in conjunction (chain mode) or independently
-(unchained mode) of each other.
-
-It is global timer is a free running up-counter and can generate interrupt
-when the counter reaches preset counter values.
-
-Documentation:
-https://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf
-
-Required properties:
-
-- compatible : should be "ti,keystone-timer".
-- reg : specifies base physical address and count of the registers.
-- interrupts : interrupt generated by the timer.
-- clocks : the clock feeding the timer clock.
-
-Example:
-
-timer@22f0000 {
- compatible = "ti,keystone-timer";
- reg = <0x022f0000 0x80>;
- interrupts = <GIC_SPI 110 IRQ_TYPE_EDGE_RISING>;
- clocks = <&clktimer15>;
-};
diff --git a/Documentation/devicetree/bindings/timer/ti,keystone-timer.yaml b/Documentation/devicetree/bindings/timer/ti,keystone-timer.yaml
new file mode 100644
index 000000000000..1caf5ce64f01
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/ti,keystone-timer.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ti,keystone-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI Keystone timer
+
+maintainers:
+ - Alexander A. Klimov <grandmaster@al2klimov.de>
+ - Ivan Khoronzhuk <ivan.khoronzhuk@ti.com>
+
+description: >
+ A 64-bit timer in the KeyStone architecture devices. The timer can be
+ configured as a general-purpose 64-bit timer, dual general-purpose 32-bit
+ timers. When configured as dual 32-bit timers, each half can operate in
+ conjunction (chain mode) or independently (unchained mode) of each other.
+
+ It is global timer is a free running up-counter and can generate interrupt
+ when the counter reaches preset counter values.
+
+ Documentation:
+ https://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf
+
+properties:
+ compatible:
+ const: ti,keystone-timer
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-names:
+ items:
+ - const: irq
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: timer
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ timer@22f0000 {
+ compatible = "ti,keystone-timer";
+ reg = <0x022f0000 0x80>;
+ interrupts = <110 IRQ_TYPE_EDGE_RISING>;
+ clocks = <&clktimer15>;
+ };
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 86f6a19b28ae..75ffaebc749d 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -1158,6 +1158,8 @@ patternProperties:
description: Parallax Inc.
"^pda,.*":
description: Precision Design Associates, Inc.
+ "^pegatron,.*":
+ description: Pegatron Corporation
"^pericom,.*":
description: Pericom Technology Inc.
"^pervasive,.*":
diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst
index 8081ebfe48bc..5a91df105141 100644
--- a/Documentation/doc-guide/sphinx.rst
+++ b/Documentation/doc-guide/sphinx.rst
@@ -28,7 +28,7 @@ Sphinx Install
==============
The ReST markups currently used by the Documentation/ files are meant to be
-built with ``Sphinx`` version 2.4.4 or higher.
+built with ``Sphinx`` version 3.4.3 or higher.
There's a script that checks for the Sphinx requirements. Please see
:ref:`sphinx-pre-install` for further details.
@@ -42,12 +42,6 @@ with your distributions. In order to do so, it is recommended to install
Sphinx inside a virtual environment, using ``virtualenv-3``
or ``virtualenv``, depending on how your distribution packaged Python 3.
-.. note::
-
- #) It is recommended to use the RTD theme for html output. Depending
- on the Sphinx version, it should be installed separately,
- with ``pip install sphinx_rtd_theme``.
-
In summary, if you want to install the latest version of Sphinx, you
should do::
@@ -162,6 +156,12 @@ By default, the "Alabaster" theme is used to build the HTML documentation;
this theme is bundled with Sphinx and need not be installed separately.
The Sphinx theme can be overridden by using the ``DOCS_THEME`` make variable.
+.. note::
+
+ Some people might prefer to use the RTD theme for html output.
+ Depending on the Sphinx version, it should be installed separately,
+ with ``pip install sphinx_rtd_theme``.
+
There is another make variable ``SPHINXDIRS``, which is useful when test
building a subset of documentation. For example, you can build documents
under ``Documentation/doc-guide`` by running
diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst
index d78b7c328ff7..5e9f7aee71a7 100644
--- a/Documentation/driver-api/basics.rst
+++ b/Documentation/driver-api/basics.rst
@@ -108,6 +108,9 @@ Kernel objects manipulation
.. kernel-doc:: lib/kobject.c
:export:
+.. kernel-doc:: lib/kobject_uevent.c
+ :export:
+
Kernel utility functions
------------------------
diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst
index 3085f8b460fa..8f0910668ca3 100644
--- a/Documentation/driver-api/dmaengine/provider.rst
+++ b/Documentation/driver-api/dmaengine/provider.rst
@@ -217,10 +217,12 @@ Currently, the types available are:
- DMA_ASYNC_TX
- - Must not be set by the device, and will be set by the framework
- if needed
+ - The device supports asynchronous memory-to-memory operations,
+ including memcpy, memset, xor, pq, xor_val, and pq_val.
- - TODO: What is it about?
+ - This capability is automatically set by the DMA engine
+ framework and must not be configured manually by device
+ drivers.
- DMA_SLAVE
diff --git a/Documentation/driver-api/early-userspace/buffer-format.rst b/Documentation/driver-api/early-userspace/buffer-format.rst
index 7f74e301fdf3..726bfa2fe70d 100644
--- a/Documentation/driver-api/early-userspace/buffer-format.rst
+++ b/Documentation/driver-api/early-userspace/buffer-format.rst
@@ -4,20 +4,18 @@ initramfs buffer format
Al Viro, H. Peter Anvin
-Last revision: 2002-01-13
-
-Starting with kernel 2.5.x, the old "initial ramdisk" protocol is
-getting {replaced/complemented} with the new "initial ramfs"
-(initramfs) protocol. The initramfs contents is passed using the same
-memory buffer protocol used by the initrd protocol, but the contents
+With kernel 2.5.x, the old "initial ramdisk" protocol was complemented
+with an "initial ramfs" protocol. The initramfs content is passed
+using the same memory buffer protocol used by initrd, but the content
is different. The initramfs buffer contains an archive which is
-expanded into a ramfs filesystem; this document details the format of
-the initramfs buffer format.
+expanded into a ramfs filesystem; this document details the initramfs
+buffer format.
The initramfs buffer format is based around the "newc" or "crc" CPIO
formats, and can be created with the cpio(1) utility. The cpio
-archive can be compressed using gzip(1). One valid version of an
-initramfs buffer is thus a single .cpio.gz file.
+archive can be compressed using gzip(1), or any other algorithm provided
+via CONFIG_DECOMPRESS_*. One valid version of an initramfs buffer is
+thus a single .cpio.gz file.
The full format of the initramfs buffer is defined by the following
grammar, where::
@@ -25,12 +23,20 @@ grammar, where::
* is used to indicate "0 or more occurrences of"
(|) indicates alternatives
+ indicates concatenation
- GZIP() indicates the gzip(1) of the operand
+ GZIP() indicates gzip compression of the operand
+ BZIP2() indicates bzip2 compression of the operand
+ LZMA() indicates lzma compression of the operand
+ XZ() indicates xz compression of the operand
+ LZO() indicates lzo compression of the operand
+ LZ4() indicates lz4 compression of the operand
+ ZSTD() indicates zstd compression of the operand
ALGN(n) means padding with null bytes to an n-byte boundary
- initramfs := ("\0" | cpio_archive | cpio_gzip_archive)*
+ initramfs := ("\0" | cpio_archive | cpio_compressed_archive)*
- cpio_gzip_archive := GZIP(cpio_archive)
+ cpio_compressed_archive := (GZIP(cpio_archive) | BZIP2(cpio_archive)
+ | LZMA(cpio_archive) | XZ(cpio_archive) | LZO(cpio_archive)
+ | LZ4(cpio_archive) | ZSTD(cpio_archive))
cpio_archive := cpio_file* + (<nothing> | cpio_trailer)
@@ -75,6 +81,8 @@ c_chksum 8 bytes Checksum of data field if c_magic is 070702;
The c_mode field matches the contents of st_mode returned by stat(2)
on Linux, and encodes the file type and file permissions.
+c_mtime is ignored unless CONFIG_INITRAMFS_PRESERVE_MTIME=y is set.
+
The c_filesize should be zero for any file which is not a regular file
or symlink.
diff --git a/Documentation/driver-api/gpio/index.rst b/Documentation/driver-api/gpio/index.rst
index 34b57cee3391..43f6a3afe10b 100644
--- a/Documentation/driver-api/gpio/index.rst
+++ b/Documentation/driver-api/gpio/index.rst
@@ -27,7 +27,7 @@ Core
ACPI support
============
-.. kernel-doc:: drivers/gpio/gpiolib-acpi.c
+.. kernel-doc:: drivers/gpio/gpiolib-acpi-core.c
:export:
Device tree support
diff --git a/Documentation/driver-api/ntb.rst b/Documentation/driver-api/ntb.rst
index e991d92b8b1d..a49c41383779 100644
--- a/Documentation/driver-api/ntb.rst
+++ b/Documentation/driver-api/ntb.rst
@@ -35,7 +35,7 @@ anyone who has written a pci driver.
NTB Typical client driver implementation
----------------------------------------
-Primary purpose of NTB is to share some peace of memory between at least two
+Primary purpose of NTB is to share some piece of memory between at least two
systems. So the NTB device features like Scratchpad/Message registers are
mainly used to perform the proper memory window initialization. Typically
there are two types of memory window interfaces supported by the NTB API:
diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst
index 8fb8c5b2d685..ec5769accae0 100644
--- a/Documentation/driver-api/thermal/intel_dptf.rst
+++ b/Documentation/driver-api/thermal/intel_dptf.rst
@@ -191,6 +191,27 @@ ABI.
User space can specify any one of the available workload type using
this interface.
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/ptc_0_control`
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/ptc_1_control`
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/ptc_2_control`
+
+All these controls needs admin privilege to update.
+
+``enable`` (RW)
+ 1 for enable, 0 for disable. Shows the current enable status of
+ platform temperature control feature. User space can enable/disable
+ hardware controls.
+
+``temperature_target`` (RW)
+ Update a new temperature target in milli degree celsius for hardware to
+ use for the temperature control.
+
+Given that this is platform temperature control, it is expected that a
+single user-level manager owns and manages the controls. If multiple
+user-level software applications attempt to write different targets, it
+can lead to unexpected behavior.
+
+
DPTF Processor thermal RFIM interface
--------------------------------------------
diff --git a/Documentation/driver-api/usb/usb.rst b/Documentation/driver-api/usb/usb.rst
index 89f9c37bb979..976fb4221062 100644
--- a/Documentation/driver-api/usb/usb.rst
+++ b/Documentation/driver-api/usb/usb.rst
@@ -161,6 +161,7 @@ rely on 64bit DMA to eliminate another kind of bounce buffer.
.. kernel-doc:: drivers/usb/core/urb.c
:export:
+.. c:namespace:: usb_core
.. kernel-doc:: drivers/usb/core/message.c
:export:
diff --git a/Documentation/filesystems/bcachefs/casefolding.rst b/Documentation/filesystems/bcachefs/casefolding.rst
index ba5de97d155f..871a38f557e8 100644
--- a/Documentation/filesystems/bcachefs/casefolding.rst
+++ b/Documentation/filesystems/bcachefs/casefolding.rst
@@ -88,3 +88,21 @@ This would fail if negative dentry's were cached.
This is slightly suboptimal, but could be fixed in future with some vfs work.
+
+References
+----------
+
+(from Peter Anvin, on the list)
+
+It is worth noting that Microsoft has basically declared their
+"recommended" case folding (upcase) table to be permanently frozen (for
+new filesystem instances in the case where they use an on-disk
+translation table created at format time.) As far as I know they have
+never supported anything other than 1:1 conversion of BMP code points,
+nor normalization.
+
+The exFAT specification enumerates the full recommended upcase table,
+although in a somewhat annoying format (basically a hex dump of
+compressed data):
+
+https://learn.microsoft.com/en-us/windows/win32/fileio/exfat-specification
diff --git a/Documentation/filesystems/bcachefs/future/idle_work.rst b/Documentation/filesystems/bcachefs/future/idle_work.rst
new file mode 100644
index 000000000000..59a332509dcd
--- /dev/null
+++ b/Documentation/filesystems/bcachefs/future/idle_work.rst
@@ -0,0 +1,78 @@
+Idle/background work classes design doc:
+
+Right now, our behaviour at idle isn't ideal, it was designed for servers that
+would be under sustained load, to keep pending work at a "medium" level, to
+let work build up so we can process it in more efficient batches, while also
+giving headroom for bursts in load.
+
+But for desktops or mobile - scenarios where work is less sustained and power
+usage is more important - we want to operate differently, with a "rush to
+idle" so the system can go to sleep. We don't want to be dribbling out
+background work while the system should be idle.
+
+The complicating factor is that there are a number of background tasks, which
+form a heirarchy (or a digraph, depending on how you divide it up) - one
+background task may generate work for another.
+
+Thus proper idle detection needs to model this heirarchy.
+
+- Foreground writes
+- Page cache writeback
+- Copygc, rebalance
+- Journal reclaim
+
+When we implement idle detection and rush to idle, we need to be careful not
+to disturb too much the existing behaviour that works reasonably well when the
+system is under sustained load (or perhaps improve it in the case of
+rebalance, which currently does not actively attempt to let work batch up).
+
+SUSTAINED LOAD REGIME
+---------------------
+
+When the system is under continuous load, we want these jobs to run
+continuously - this is perhaps best modelled with a P/D controller, where
+they'll be trying to keep a target value (i.e. fragmented disk space,
+available journal space) roughly in the middle of some range.
+
+The goal under sustained load is to balance our ability to handle load spikes
+without running out of x resource (free disk space, free space in the
+journal), while also letting some work accumululate to be batched (or become
+unnecessary).
+
+For example, we don't want to run copygc too aggressively, because then it
+will be evacuating buckets that would have become empty (been overwritten or
+deleted) anyways, and we don't want to wait until we're almost out of free
+space because then the system will behave unpredicably - suddenly we're doing
+a lot more work to service each write and the system becomes much slower.
+
+IDLE REGIME
+-----------
+
+When the system becomes idle, we should start flushing our pending work
+quicker so the system can go to sleep.
+
+Note that the definition of "idle" depends on where in the heirarchy a task
+is - a task should start flushing work more quickly when the task above it has
+stopped generating new work.
+
+e.g. rebalance should start flushing more quickly when page cache writeback is
+idle, and journal reclaim should only start flushing more quickly when both
+copygc and rebalance are idle.
+
+It's important to let work accumulate when more work is still incoming and we
+still have room, because flushing is always more efficient if we let it batch
+up. New writes may overwrite data before rebalance moves it, and tasks may be
+generating more updates for the btree nodes that journal reclaim needs to flush.
+
+On idle, how much work we do at each interval should be proportional to the
+length of time we have been idle for. If we're idle only for a short duration,
+we shouldn't flush everything right away; the system might wake up and start
+generating new work soon, and flushing immediately might end up doing a lot of
+work that would have been unnecessary if we'd allowed things to batch more.
+
+To summarize, we will need:
+
+ - A list of classes for background tasks that generate work, which will
+ include one "foreground" class.
+ - Tracking for each class - "Am I doing work, or have I gone to sleep?"
+ - And each class should check the class above it when deciding how much work to issue.
diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst
index 3864d0ae89c1..e5c4c2120b93 100644
--- a/Documentation/filesystems/bcachefs/index.rst
+++ b/Documentation/filesystems/bcachefs/index.rst
@@ -29,3 +29,10 @@ At this moment, only a few of these are described here.
casefolding
errorcodes
+
+Future design
+-------------
+.. toctree::
+ :maxdepth: 1
+
+ future/idle_work
diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index c293f8e37468..7ddb235aee9d 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -128,6 +128,7 @@ device=%s Specify a path to an extra device to be used together.
fsid=%s Specify a filesystem image ID for Fscache back-end.
domain_id=%s Specify a domain ID in fscache mode so that different images
with the same blobs under a given domain ID can share storage.
+fsoffset=%llu Specify block-aligned filesystem offset for the primary device.
=================== =========================================================
Sysfs Entries
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index e80329908549..29e84d125e02 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -70,7 +70,7 @@ Online attacks
--------------
fscrypt (and storage encryption in general) can only provide limited
-protection, if any at all, against online attacks. In detail:
+protection against online attacks. In detail:
Side-channel attacks
~~~~~~~~~~~~~~~~~~~~
@@ -99,16 +99,23 @@ Therefore, any encryption-specific access control checks would merely
be enforced by kernel *code* and therefore would be largely redundant
with the wide variety of access control mechanisms already available.)
-Kernel memory compromise
-~~~~~~~~~~~~~~~~~~~~~~~~
+Read-only kernel memory compromise
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Unless `hardware-wrapped keys`_ are used, an attacker who gains the
+ability to read from arbitrary kernel memory, e.g. by mounting a
+physical attack or by exploiting a kernel security vulnerability, can
+compromise all fscrypt keys that are currently in-use. This also
+extends to cold boot attacks; if the system is suddenly powered off,
+keys the system was using may remain in memory for a short time.
-An attacker who compromises the system enough to read from arbitrary
-memory, e.g. by mounting a physical attack or by exploiting a kernel
-security vulnerability, can compromise all encryption keys that are
-currently in use.
+However, if hardware-wrapped keys are used, then the fscrypt master
+keys and file contents encryption keys (but not other types of fscrypt
+subkeys such as filenames encryption keys) are protected from
+compromises of arbitrary kernel memory.
-However, fscrypt allows encryption keys to be removed from the kernel,
-which may protect them from later compromise.
+In addition, fscrypt allows encryption keys to be removed from the
+kernel, which may protect them from later compromise.
In more detail, the FS_IOC_REMOVE_ENCRYPTION_KEY ioctl (or the
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS ioctl) can wipe a master
@@ -144,6 +151,24 @@ However, these ioctls have some limitations:
accelerator hardware (if used by the crypto API to implement any of
the algorithms), or in other places not explicitly considered here.
+Full system compromise
+~~~~~~~~~~~~~~~~~~~~~~
+
+An attacker who gains "root" access and/or the ability to execute
+arbitrary kernel code can freely exfiltrate data that is protected by
+any in-use fscrypt keys. Thus, usually fscrypt provides no meaningful
+protection in this scenario. (Data that is protected by a key that is
+absent throughout the entire attack remains protected, modulo the
+limitations of key removal mentioned above in the case where the key
+was removed prior to the attack.)
+
+However, if `hardware-wrapped keys`_ are used, such attackers will be
+unable to exfiltrate the master keys or file contents keys in a form
+that will be usable after the system is powered off. This may be
+useful if the attacker is significantly time-limited and/or
+bandwidth-limited, so they can only exfiltrate some data and need to
+rely on a later offline attack to exfiltrate the rest of it.
+
Limitations of v1 policies
~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -170,6 +195,10 @@ policies on all new encrypted directories.
Key hierarchy
=============
+Note: this section assumes the use of raw keys rather than
+hardware-wrapped keys. The use of hardware-wrapped keys modifies the
+key hierarchy slightly. For details, see `Hardware-wrapped keys`_.
+
Master Keys
-----------
@@ -832,7 +861,9 @@ a pointer to struct fscrypt_add_key_arg, defined as follows::
struct fscrypt_key_specifier key_spec;
__u32 raw_size;
__u32 key_id;
- __u32 __reserved[8];
+ #define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001
+ __u32 flags;
+ __u32 __reserved[7];
__u8 raw[];
};
@@ -851,7 +882,7 @@ a pointer to struct fscrypt_add_key_arg, defined as follows::
struct fscrypt_provisioning_key_payload {
__u32 type;
- __u32 __reserved;
+ __u32 flags;
__u8 raw[];
};
@@ -879,24 +910,32 @@ as follows:
Alternatively, if ``key_id`` is nonzero, this field must be 0, since
in that case the size is implied by the specified Linux keyring key.
-- ``key_id`` is 0 if the raw key is given directly in the ``raw``
- field. Otherwise ``key_id`` is the ID of a Linux keyring key of
- type "fscrypt-provisioning" whose payload is
- struct fscrypt_provisioning_key_payload whose ``raw`` field contains
- the raw key and whose ``type`` field matches ``key_spec.type``.
- Since ``raw`` is variable-length, the total size of this key's
- payload must be ``sizeof(struct fscrypt_provisioning_key_payload)``
- plus the raw key size. The process must have Search permission on
- this key.
-
- Most users should leave this 0 and specify the raw key directly.
- The support for specifying a Linux keyring key is intended mainly to
+- ``key_id`` is 0 if the key is given directly in the ``raw`` field.
+ Otherwise ``key_id`` is the ID of a Linux keyring key of type
+ "fscrypt-provisioning" whose payload is struct
+ fscrypt_provisioning_key_payload whose ``raw`` field contains the
+ key, whose ``type`` field matches ``key_spec.type``, and whose
+ ``flags`` field matches ``flags``. Since ``raw`` is
+ variable-length, the total size of this key's payload must be
+ ``sizeof(struct fscrypt_provisioning_key_payload)`` plus the number
+ of key bytes. The process must have Search permission on this key.
+
+ Most users should leave this 0 and specify the key directly. The
+ support for specifying a Linux keyring key is intended mainly to
allow re-adding keys after a filesystem is unmounted and re-mounted,
- without having to store the raw keys in userspace memory.
+ without having to store the keys in userspace memory.
+
+- ``flags`` contains optional flags from ``<linux/fscrypt.h>``:
+
+ - FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED: This denotes that the key is a
+ hardware-wrapped key. See `Hardware-wrapped keys`_. This flag
+ can't be used if FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR is used.
- ``raw`` is a variable-length field which must contain the actual
key, ``raw_size`` bytes long. Alternatively, if ``key_id`` is
- nonzero, then this field is unused.
+ nonzero, then this field is unused. Note that despite being named
+ ``raw``, if FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED is specified then it
+ will contain a wrapped key, not a raw key.
For v2 policy keys, the kernel keeps track of which user (identified
by effective user ID) added the key, and only allows the key to be
@@ -908,8 +947,8 @@ prevent that other user from unexpectedly removing it. Therefore,
FS_IOC_ADD_ENCRYPTION_KEY may also be used to add a v2 policy key
*again*, even if it's already added by other user(s). In this case,
FS_IOC_ADD_ENCRYPTION_KEY will just install a claim to the key for the
-current user, rather than actually add the key again (but the raw key
-must still be provided, as a proof of knowledge).
+current user, rather than actually add the key again (but the key must
+still be provided, as a proof of knowledge).
FS_IOC_ADD_ENCRYPTION_KEY returns 0 if either the key or a claim to
the key was either added or already exists.
@@ -918,20 +957,23 @@ FS_IOC_ADD_ENCRYPTION_KEY can fail with the following errors:
- ``EACCES``: FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR was specified, but the
caller does not have the CAP_SYS_ADMIN capability in the initial
- user namespace; or the raw key was specified by Linux key ID but the
+ user namespace; or the key was specified by Linux key ID but the
process lacks Search permission on the key.
+- ``EBADMSG``: invalid hardware-wrapped key
- ``EDQUOT``: the key quota for this user would be exceeded by adding
the key
- ``EINVAL``: invalid key size or key specifier type, or reserved bits
were set
-- ``EKEYREJECTED``: the raw key was specified by Linux key ID, but the
- key has the wrong type
-- ``ENOKEY``: the raw key was specified by Linux key ID, but no key
- exists with that ID
+- ``EKEYREJECTED``: the key was specified by Linux key ID, but the key
+ has the wrong type
+- ``ENOKEY``: the key was specified by Linux key ID, but no key exists
+ with that ID
- ``ENOTTY``: this type of filesystem does not implement encryption
- ``EOPNOTSUPP``: the kernel was not configured with encryption
support for this filesystem, or the filesystem superblock has not
- had encryption enabled on it
+ had encryption enabled on it; or a hardware wrapped key was specified
+ but the filesystem does not support inline encryption or the hardware
+ does not support hardware-wrapped keys
Legacy method
~~~~~~~~~~~~~
@@ -994,9 +1036,8 @@ or removed by non-root users.
These ioctls don't work on keys that were added via the legacy
process-subscribed keyrings mechanism.
-Before using these ioctls, read the `Kernel memory compromise`_
-section for a discussion of the security goals and limitations of
-these ioctls.
+Before using these ioctls, read the `Online attacks`_ section for a
+discussion of the security goals and limitations of these ioctls.
FS_IOC_REMOVE_ENCRYPTION_KEY
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1316,7 +1357,8 @@ inline encryption hardware doesn't have the needed crypto capabilities
(e.g. support for the needed encryption algorithm and data unit size)
and where blk-crypto-fallback is unusable. (For blk-crypto-fallback
to be usable, it must be enabled in the kernel configuration with
-CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y.)
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y, and the file must be
+protected by a raw key rather than a hardware-wrapped key.)
Currently fscrypt always uses the filesystem block size (which is
usually 4096 bytes) as the data unit size. Therefore, it can only use
@@ -1324,7 +1366,76 @@ inline encryption hardware that supports that data unit size.
Inline encryption doesn't affect the ciphertext or other aspects of
the on-disk format, so users may freely switch back and forth between
-using "inlinecrypt" and not using "inlinecrypt".
+using "inlinecrypt" and not using "inlinecrypt". An exception is that
+files that are protected by a hardware-wrapped key can only be
+encrypted/decrypted by the inline encryption hardware and therefore
+can only be accessed when the "inlinecrypt" mount option is used. For
+more information about hardware-wrapped keys, see below.
+
+Hardware-wrapped keys
+---------------------
+
+fscrypt supports using *hardware-wrapped keys* when the inline
+encryption hardware supports it. Such keys are only present in kernel
+memory in wrapped (encrypted) form; they can only be unwrapped
+(decrypted) by the inline encryption hardware and are temporally bound
+to the current boot. This prevents the keys from being compromised if
+kernel memory is leaked. This is done without limiting the number of
+keys that can be used and while still allowing the execution of
+cryptographic tasks that are tied to the same key but can't use inline
+encryption hardware, e.g. filenames encryption.
+
+Note that hardware-wrapped keys aren't specific to fscrypt; they are a
+block layer feature (part of *blk-crypto*). For more details about
+hardware-wrapped keys, see the block layer documentation at
+:ref:`Documentation/block/inline-encryption.rst
+<hardware_wrapped_keys>`. The rest of this section just focuses on
+the details of how fscrypt can use hardware-wrapped keys.
+
+fscrypt supports hardware-wrapped keys by allowing the fscrypt master
+keys to be hardware-wrapped keys as an alternative to raw keys. To
+add a hardware-wrapped key with `FS_IOC_ADD_ENCRYPTION_KEY`_,
+userspace must specify FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED in the
+``flags`` field of struct fscrypt_add_key_arg and also in the
+``flags`` field of struct fscrypt_provisioning_key_payload when
+applicable. The key must be in ephemerally-wrapped form, not
+long-term wrapped form.
+
+Some limitations apply. First, files protected by a hardware-wrapped
+key are tied to the system's inline encryption hardware. Therefore
+they can only be accessed when the "inlinecrypt" mount option is used,
+and they can't be included in portable filesystem images. Second,
+currently the hardware-wrapped key support is only compatible with
+`IV_INO_LBLK_64 policies`_ and `IV_INO_LBLK_32 policies`_, as it
+assumes that there is just one file contents encryption key per
+fscrypt master key rather than one per file. Future work may address
+this limitation by passing per-file nonces down the storage stack to
+allow the hardware to derive per-file keys.
+
+Implementation-wise, to encrypt/decrypt the contents of files that are
+protected by a hardware-wrapped key, fscrypt uses blk-crypto,
+attaching the hardware-wrapped key to the bio crypt contexts. As is
+the case with raw keys, the block layer will program the key into a
+keyslot when it isn't already in one. However, when programming a
+hardware-wrapped key, the hardware doesn't program the given key
+directly into a keyslot but rather unwraps it (using the hardware's
+ephemeral wrapping key) and derives the inline encryption key from it.
+The inline encryption key is the key that actually gets programmed
+into a keyslot, and it is never exposed to software.
+
+However, fscrypt doesn't just do file contents encryption; it also
+uses its master keys to derive filenames encryption keys, key
+identifiers, and sometimes some more obscure types of subkeys such as
+dirhash keys. So even with file contents encryption out of the
+picture, fscrypt still needs a raw key to work with. To get such a
+key from a hardware-wrapped key, fscrypt asks the inline encryption
+hardware to derive a cryptographically isolated "software secret" from
+the hardware-wrapped key. fscrypt uses this "software secret" to key
+its KDF to derive all subkeys other than file contents keys.
+
+Note that this implies that the hardware-wrapped key feature only
+protects the file contents encryption keys. It doesn't protect other
+fscrypt subkeys such as filenames encryption keys.
Direct I/O support
==================
@@ -1409,7 +1520,7 @@ read the ciphertext into the page cache and decrypt it in-place. The
folio lock must be held until decryption has finished, to prevent the
folio from becoming visible to userspace prematurely.
-For the write path (->writepage()) of regular files, filesystems
+For the write path (->writepages()) of regular files, filesystems
cannot encrypt data in-place in the page cache, since the cached
plaintext must be preserved. Instead, filesystems must encrypt into a
temporary buffer or "bounce page", then write out the temporary
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index a9cf8e950b15..32618512a965 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -113,6 +113,7 @@ Documentation for filesystem implementations.
qnx6
ramfs-rootfs-initramfs
relay
+ resctrl
romfs
smb/index
spufs/index
diff --git a/Documentation/filesystems/iomap/design.rst b/Documentation/filesystems/iomap/design.rst
index e29651a42eec..f2df9b6df988 100644
--- a/Documentation/filesystems/iomap/design.rst
+++ b/Documentation/filesystems/iomap/design.rst
@@ -243,13 +243,25 @@ The fields are as follows:
regular file data.
This is only useful for FIEMAP.
- * **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can
- be set by the filesystem for its own purposes.
+ * **IOMAP_F_BOUNDARY**: This indicates I/O and its completion must not be
+ merged with any other I/O or completion. Filesystems must use this when
+ submitting I/O to devices that cannot handle I/O crossing certain LBAs
+ (e.g. ZNS devices). This flag applies only to buffered I/O writeback; all
+ other functions ignore it.
+
+ * **IOMAP_F_PRIVATE**: This flag is reserved for filesystem private use.
* **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target
block assigned to it yet and the file system will do that in the bio
submission handler, splitting the I/O as needed.
+ * **IOMAP_F_ATOMIC_BIO**: This indicates write I/O must be submitted with the
+ ``REQ_ATOMIC`` flag set in the bio. Filesystems need to set this flag to
+ inform iomap that the write I/O operation requires torn-write protection
+ based on HW-offload mechanism. They must also ensure that mapping updates
+ upon the completion of the I/O must be performed in a single metadata
+ update.
+
These flags can be set by iomap itself during file operations.
The filesystem should supply an ``->iomap_end`` function if it needs
to observe these flags:
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 0ec0bb6eb0fb..2e567e341c3b 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -249,7 +249,6 @@ address_space_operations
========================
prototypes::
- int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*read_folio)(struct file *, struct folio *);
int (*writepages)(struct address_space *, struct writeback_control *);
bool (*dirty_folio)(struct address_space *, struct folio *folio);
@@ -280,7 +279,6 @@ locking rules:
====================== ======================== ========= ===============
ops folio locked i_rwsem invalidate_lock
====================== ======================== ========= ===============
-writepage: yes, unlocks (see below)
read_folio: yes, unlocks shared
writepages:
dirty_folio: maybe
@@ -309,54 +307,6 @@ completion.
->readahead() unlocks the folios that I/O is attempted on like ->read_folio().
-->writepage() is used for two purposes: for "memory cleansing" and for
-"sync". These are quite different operations and the behaviour may differ
-depending upon the mode.
-
-If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then
-it *must* start I/O against the page, even if that would involve
-blocking on in-progress I/O.
-
-If writepage is called for memory cleansing (sync_mode ==
-WBC_SYNC_NONE) then its role is to get as much writeout underway as
-possible. So writepage should try to avoid blocking against
-currently-in-progress I/O.
-
-If the filesystem is not called for "sync" and it determines that it
-would need to block against in-progress I/O to be able to start new I/O
-against the page the filesystem should redirty the page with
-redirty_page_for_writepage(), then unlock the page and return zero.
-This may also be done to avoid internal deadlocks, but rarely.
-
-If the filesystem is called for sync then it must wait on any
-in-progress I/O and then start new I/O.
-
-The filesystem should unlock the page synchronously, before returning to the
-caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE
-value. WRITEPAGE_ACTIVATE means that page cannot really be written out
-currently, and VM should stop calling ->writepage() on this page for some
-time. VM does this by moving page to the head of the active list, hence the
-name.
-
-Unless the filesystem is going to redirty_page_for_writepage(), unlock the page
-and return zero, writepage *must* run set_page_writeback() against the page,
-followed by unlocking it. Once set_page_writeback() has been run against the
-page, write I/O can be submitted and the write I/O completion handler must run
-end_page_writeback() once the I/O is complete. If no I/O is submitted, the
-filesystem must run end_page_writeback() against the page before returning from
-writepage.
-
-That is: after 2.5.12, pages which are under writeout are *not* locked. Note,
-if the filesystem needs the page to be locked during writeout, that is ok, too,
-the page is allowed to be unlocked at any point in time between the calls to
-set_page_writeback() and end_page_writeback().
-
-Note, failure to run either redirty_page_for_writepage() or the combination of
-set_page_writeback()/end_page_writeback() on a page submitted to writepage
-will leave the page itself marked clean but it will be tagged as dirty in the
-radix tree. This incoherency can lead to all sorts of hard-to-debug problems
-in the filesystem like having dirty inodes at umount and losing written data.
-
->writepages() is used for periodic writeback and for syscall-initiated
sync operations. The address_space should start I/O against at least
``*nr_to_write`` pages. ``*nr_to_write`` must be decremented for each page
@@ -364,8 +314,8 @@ which is written. The address_space implementation may write more (or less)
pages than ``*nr_to_write`` asks for, but it should try to be reasonably close.
If nr_to_write is NULL, all dirty pages must be written.
-writepages should _only_ write pages which are present on
-mapping->io_pages.
+writepages should _only_ write pages which are present in
+mapping->i_pages.
->dirty_folio() is called from various places in the kernel when
the target folio is marked as needing writeback. The folio cannot be
diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst
index d92c276f1575..e149b89118c8 100644
--- a/Documentation/filesystems/mount_api.rst
+++ b/Documentation/filesystems/mount_api.rst
@@ -671,7 +671,6 @@ The members are as follows:
fsparam_bool() fs_param_is_bool
fsparam_u32() fs_param_is_u32
fsparam_u32oct() fs_param_is_u32_octal
- fsparam_u32hex() fs_param_is_u32_hex
fsparam_s32() fs_param_is_s32
fsparam_u64() fs_param_is_u64
fsparam_enum() fs_param_is_enum
@@ -755,21 +754,6 @@ process the parameters it is given.
* ::
- bool validate_constant_table(const struct constant_table *tbl,
- size_t tbl_size,
- int low, int high, int special);
-
- Validate a constant table. Checks that all the elements are appropriately
- ordered, that there are no duplicates and that the values are between low
- and high inclusive, though provision is made for one allowable special
- value outside of that range. If no special value is required, special
- should just be set to lie inside the low-to-high range.
-
- If all is good, true is returned. If the table is invalid, errors are
- logged to the kernel log buffer and false is returned.
-
- * ::
-
bool fs_validate_description(const char *name,
const struct fs_parameter_description *desc);
diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst
index 3886c14f89f4..939b4b624fad 100644
--- a/Documentation/filesystems/netfs_library.rst
+++ b/Documentation/filesystems/netfs_library.rst
@@ -1,33 +1,187 @@
.. SPDX-License-Identifier: GPL-2.0
-=================================
-Network Filesystem Helper Library
-=================================
+===================================
+Network Filesystem Services Library
+===================================
.. Contents:
- Overview.
+ - Requests and streams.
+ - Subrequests.
+ - Result collection and retry.
+ - Local caching.
+ - Content encryption (fscrypt).
- Per-inode context.
- Inode context helper functions.
- - Buffered read helpers.
- - Read helper functions.
- - Read helper structures.
- - Read helper operations.
- - Read helper procedure.
- - Read helper cache API.
+ - Inode locking.
+ - Inode writeback.
+ - High-level VFS API.
+ - Unlocked read/write iter.
+ - Pre-locked read/write iter.
+ - Monolithic files API.
+ - Memory-mapped I/O API.
+ - High-level VM API.
+ - Deprecated PG_private2 API.
+ - I/O request API.
+ - Request structure.
+ - Stream structure.
+ - Subrequest structure.
+ - Filesystem methods.
+ - Terminating a subrequest.
+ - Local cache API.
+ - API function reference.
Overview
========
-The network filesystem helper library is a set of functions designed to aid a
-network filesystem in implementing VM/VFS operations. For the moment, that
-just includes turning various VM buffered read operations into requests to read
-from the server. The helper library, however, can also interpose other
-services, such as local caching or local data encryption.
+The network filesystem services library, netfslib, is a set of functions
+designed to aid a network filesystem in implementing VM/VFS API operations. It
+takes over the normal buffered read, readahead, write and writeback and also
+handles unbuffered and direct I/O.
-Note that the library module doesn't link against local caching directly, so
-access must be provided by the netfs.
+The library provides support for (re-)negotiation of I/O sizes and retrying
+failed I/O as well as local caching and will, in the future, provide content
+encryption.
+
+It insulates the filesystem from VM interface changes as much as possible and
+handles VM features such as large multipage folios. The filesystem basically
+just has to provide a way to perform read and write RPC calls.
+
+The way I/O is organised inside netfslib consists of a number of objects:
+
+ * A *request*. A request is used to track the progress of the I/O overall and
+ to hold on to resources. The collection of results is done at the request
+ level. The I/O within a request is divided into a number of parallel
+ streams of subrequests.
+
+ * A *stream*. A non-overlapping series of subrequests. The subrequests
+ within a stream do not have to be contiguous.
+
+ * A *subrequest*. This is the basic unit of I/O. It represents a single RPC
+ call or a single cache I/O operation. The library passes these to the
+ filesystem and the cache to perform.
+
+Requests and Streams
+--------------------
+
+When actually performing I/O (as opposed to just copying into the pagecache),
+netfslib will create one or more requests to track the progress of the I/O and
+to hold resources.
+
+A read operation will have a single stream and the subrequests within that
+stream may be of mixed origins, for instance mixing RPC subrequests and cache
+subrequests.
+
+On the other hand, a write operation may have multiple streams, where each
+stream targets a different destination. For instance, there may be one stream
+writing to the local cache and one to the server. Currently, only two streams
+are allowed, but this could be increased if parallel writes to multiple servers
+is desired.
+
+The subrequests within a write stream do not need to match alignment or size
+with the subrequests in another write stream and netfslib performs the tiling
+of subrequests in each stream over the source buffer independently. Further,
+each stream may contain holes that don't correspond to holes in the other
+stream.
+
+In addition, the subrequests do not need to correspond to the boundaries of the
+folios or vectors in the source/destination buffer. The library handles the
+collection of results and the wrangling of folio flags and references.
+
+Subrequests
+-----------
+
+Subrequests are at the heart of the interaction between netfslib and the
+filesystem using it. Each subrequest is expected to correspond to a single
+read or write RPC or cache operation. The library will stitch together the
+results from a set of subrequests to provide a higher level operation.
+
+Netfslib has two interactions with the filesystem or the cache when setting up
+a subrequest. First, there's an optional preparatory step that allows the
+filesystem to negotiate the limits on the subrequest, both in terms of maximum
+number of bytes and maximum number of vectors (e.g. for RDMA). This may
+involve negotiating with the server (e.g. cifs needing to acquire credits).
+
+And, secondly, there's the issuing step in which the subrequest is handed off
+to the filesystem to perform.
+
+Note that these two steps are done slightly differently between read and write:
+
+ * For reads, the VM/VFS tells us how much is being requested up front, so the
+ library can preset maximum values that the cache and then the filesystem can
+ then reduce. The cache also gets consulted first on whether it wants to do
+ a read before the filesystem is consulted.
+
+ * For writeback, it is unknown how much there will be to write until the
+ pagecache is walked, so no limit is set by the library.
+
+Once a subrequest is completed, the filesystem or cache informs the library of
+the completion and then collection is invoked. Depending on whether the
+request is synchronous or asynchronous, the collection of results will be done
+in either the application thread or in a work queue.
+
+Result Collection and Retry
+---------------------------
+
+As subrequests complete, the results are collected and collated by the library
+and folio unlocking is performed progressively (if appropriate). Once the
+request is complete, async completion will be invoked (again, if appropriate).
+It is possible for the filesystem to provide interim progress reports to the
+library to cause folio unlocking to happen earlier if possible.
+
+If any subrequests fail, netfslib can retry them. It will wait until all
+subrequests are completed, offer the filesystem the opportunity to fiddle with
+the resources/state held by the request and poke at the subrequests before
+re-preparing and re-issuing the subrequests.
+
+This allows the tiling of contiguous sets of failed subrequest within a stream
+to be changed, adding more subrequests or ditching excess as necessary (for
+instance, if the network sizes change or the server decides it wants smaller
+chunks).
+
+Further, if one or more contiguous cache-read subrequests fail, the library
+will pass them to the filesystem to perform instead, renegotiating and retiling
+them as necessary to fit with the filesystem's parameters rather than those of
+the cache.
+
+Local Caching
+-------------
+
+One of the services netfslib provides, via ``fscache``, is the option to cache
+on local disk a copy of the data obtained from/written to a network filesystem.
+The library will manage the storing, retrieval and some invalidation of data
+automatically on behalf of the filesystem if a cookie is attached to the
+``netfs_inode``.
+
+Note that local caching used to use the PG_private_2 (aliased as PG_fscache) to
+keep track of a page that was being written to the cache, but this is now
+deprecated as PG_private_2 will be removed.
+
+Instead, folios that are read from the server for which there was no data in
+the cache will be marked as dirty and will have ``folio->private`` set to a
+special value (``NETFS_FOLIO_COPY_TO_CACHE``) and left to writeback to write.
+If the folio is modified before that happened, the special value will be
+cleared and the write will become normally dirty.
+
+When writeback occurs, folios that are so marked will only be written to the
+cache and not to the server. Writeback handles mixed cache-only writes and
+server-and-cache writes by using two streams, sending one to the cache and one
+to the server. The server stream will have gaps in it corresponding to those
+folios.
+
+Content Encryption (fscrypt)
+----------------------------
+
+Though it does not do so yet, at some point netfslib will acquire the ability
+to do client-side content encryption on behalf of the network filesystem (Ceph,
+for example). fscrypt can be used for this if appropriate (it may not be -
+cifs, for example).
+
+The data will be stored encrypted in the local cache using the same manner of
+encryption as the data written to the server and the library will impose bounce
+buffering and RMW cycles as necessary.
Per-Inode Context
@@ -40,10 +194,13 @@ structure is defined::
struct netfs_inode {
struct inode inode;
const struct netfs_request_ops *ops;
- struct fscache_cookie *cache;
+ struct fscache_cookie * cache;
+ loff_t remote_i_size;
+ unsigned long flags;
+ ...
};
-A network filesystem that wants to use netfs lib must place one of these in its
+A network filesystem that wants to use netfslib must place one of these in its
inode wrapper struct instead of the VFS ``struct inode``. This can be done in
a way similar to the following::
@@ -56,7 +213,8 @@ This allows netfslib to find its state by using ``container_of()`` from the
inode pointer, thereby allowing the netfslib helper functions to be pointed to
directly by the VFS/VM operation tables.
-The structure contains the following fields:
+The structure contains the following fields that are of interest to the
+filesystem:
* ``inode``
@@ -71,6 +229,37 @@ The structure contains the following fields:
Local caching cookie, or NULL if no caching is enabled. This field does not
exist if fscache is disabled.
+ * ``remote_i_size``
+
+ The size of the file on the server. This differs from inode->i_size if
+ local modifications have been made but not yet written back.
+
+ * ``flags``
+
+ A set of flags, some of which the filesystem might be interested in:
+
+ * ``NETFS_ICTX_MODIFIED_ATTR``
+
+ Set if netfslib modifies mtime/ctime. The filesystem is free to ignore
+ this or clear it.
+
+ * ``NETFS_ICTX_UNBUFFERED``
+
+ Do unbuffered I/O upon the file. Like direct I/O but without the
+ alignment limitations. RMW will be performed if necessary. The pagecache
+ will not be used unless mmap() is also used.
+
+ * ``NETFS_ICTX_WRITETHROUGH``
+
+ Do writethrough caching upon the file. I/O will be set up and dispatched
+ as buffered writes are made to the page cache. mmap() does the normal
+ writeback thing.
+
+ * ``NETFS_ICTX_SINGLE_NO_UPLOAD``
+
+ Set if the file has a monolithic content that must be read entirely in a
+ single go and must not be written back to the server, though it can be
+ cached (e.g. AFS directories).
Inode Context Helper Functions
------------------------------
@@ -84,117 +273,250 @@ set the operations table pointer::
then a function to cast from the VFS inode structure to the netfs context::
- struct netfs_inode *netfs_node(struct inode *inode);
+ struct netfs_inode *netfs_inode(struct inode *inode);
and finally, a function to get the cache cookie pointer from the context
attached to an inode (or NULL if fscache is disabled)::
struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx);
+Inode Locking
+-------------
+
+A number of functions are provided to manage the locking of i_rwsem for I/O and
+to effectively extend it to provide more separate classes of exclusion::
+
+ int netfs_start_io_read(struct inode *inode);
+ void netfs_end_io_read(struct inode *inode);
+ int netfs_start_io_write(struct inode *inode);
+ void netfs_end_io_write(struct inode *inode);
+ int netfs_start_io_direct(struct inode *inode);
+ void netfs_end_io_direct(struct inode *inode);
+
+The exclusion breaks down into four separate classes:
+
+ 1) Buffered reads and writes.
+
+ Buffered reads can run concurrently each other and with buffered writes,
+ but buffered writes cannot run concurrently with each other.
+
+ 2) Direct reads and writes.
+
+ Direct (and unbuffered) reads and writes can run concurrently since they do
+ not share local buffering (i.e. the pagecache) and, in a network
+ filesystem, are expected to have exclusion managed on the server (though
+ this may not be the case for, say, Ceph).
+
+ 3) Other major inode modifying operations (e.g. truncate, fallocate).
+
+ These should just access i_rwsem directly.
+
+ 4) mmap().
+
+ mmap'd accesses might operate concurrently with any of the other classes.
+ They might form the buffer for an intra-file loopback DIO read/write. They
+ might be permitted on unbuffered files.
+
+Inode Writeback
+---------------
+
+Netfslib will pin resources on an inode for future writeback (such as pinning
+use of an fscache cookie) when an inode is dirtied. However, this pinning
+needs careful management. To manage the pinning, the following sequence
+occurs:
+
+ 1) An inode state flag ``I_PINNING_NETFS_WB`` is set by netfslib when the
+ pinning begins (when a folio is dirtied, for example) if the cache is
+ active to stop the cache structures from being discarded and the cache
+ space from being culled. This also prevents re-getting of cache resources
+ if the flag is already set.
+
+ 2) This flag then cleared inside the inode lock during inode writeback in the
+ VM - and the fact that it was set is transferred to ``->unpinned_netfs_wb``
+ in ``struct writeback_control``.
+
+ 3) If ``->unpinned_netfs_wb`` is now set, the write_inode procedure is forced.
+
+ 4) The filesystem's ``->write_inode()`` function is invoked to do the cleanup.
+
+ 5) The filesystem invokes netfs to do its cleanup.
+
+To do the cleanup, netfslib provides a function to do the resource unpinning::
+
+ int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc);
+
+If the filesystem doesn't need to do anything else, this may be set as a its
+``.write_inode`` method.
+
+Further, if an inode is deleted, the filesystem's write_inode method may not
+get called, so::
+
+ void netfs_clear_inode_writeback(struct inode *inode, const void *aux);
-Buffered Read Helpers
-=====================
+must be called from ``->evict_inode()`` *before* ``clear_inode()`` is called.
-The library provides a set of read helpers that handle the ->read_folio(),
-->readahead() and much of the ->write_begin() VM operations and translate them
-into a common call framework.
-The following services are provided:
+High-Level VFS API
+==================
- * Handle folios that span multiple pages.
+Netfslib provides a number of sets of API calls for the filesystem to delegate
+VFS operations to. Netfslib, in turn, will call out to the filesystem and the
+cache to negotiate I/O sizes, issue RPCs and provide places for it to intervene
+at various times.
- * Insulate the netfs from VM interface changes.
+Unlocked Read/Write Iter
+------------------------
- * Allow the netfs to arbitrarily split reads up into pieces, even ones that
- don't match folio sizes or folio alignments and that may cross folios.
+The first API set is for the delegation of operations to netfslib when the
+filesystem is called through the standard VFS read/write_iter methods::
- * Allow the netfs to expand a readahead request in both directions to meet its
- needs.
+ ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from);
+ ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from);
- * Allow the netfs to partially fulfil a read, which will then be resubmitted.
+They can be assigned directly to ``.read_iter`` and ``.write_iter``. They
+perform the inode locking themselves and the first two will switch between
+buffered I/O and DIO as appropriate.
- * Handle local caching, allowing cached data and server-read data to be
- interleaved for a single request.
+Pre-Locked Read/Write Iter
+--------------------------
- * Handle clearing of bufferage that isn't on the server.
+The second API set is for the delegation of operations to netfslib when the
+filesystem is called through the standard VFS methods, but needs to do some
+other stuff before or after calling netfslib whilst still inside locked section
+(e.g. Ceph negotiating caps). The unbuffered read function is::
- * Handle retrying of reads that failed, switching reads from the cache to the
- server as necessary.
+ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter);
- * In the future, this is a place that other services can be performed, such as
- local encryption of data to be stored remotely or in the cache.
+This must not be assigned directly to ``.read_iter`` and the filesystem is
+responsible for performing the inode locking before calling it. In the case of
+buffered read, the filesystem should use ``filemap_read()``.
-From the network filesystem, the helpers require a table of operations. This
-includes a mandatory method to issue a read operation along with a number of
-optional methods.
+There are three functions for writes::
+ ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from,
+ struct netfs_group *netfs_group);
+ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
+ struct netfs_group *netfs_group);
+ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter,
+ struct netfs_group *netfs_group);
-Read Helper Functions
+These must not be assigned directly to ``.write_iter`` and the filesystem is
+responsible for performing the inode locking before calling them.
+
+The first two functions are for buffered writes; the first just adds some
+standard write checks and jumps to the second, but if the filesystem wants to
+do the checks itself, it can use the second directly. The third function is
+for unbuffered or DIO writes.
+
+On all three write functions, there is a writeback group pointer (which should
+be NULL if the filesystem doesn't use this). Writeback groups are set on
+folios when they're modified. If a folio to-be-modified is already marked with
+a different group, it is flushed first. The writeback API allows writing back
+of a specific group.
+
+Memory-Mapped I/O API
---------------------
-Three read helpers are provided::
+An API for support of mmap()'d I/O is provided::
+
+ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
+
+This allows the filesystem to delegate ``.page_mkwrite`` to netfslib. The
+filesystem should not take the inode lock before calling it, but, as with the
+locked write functions above, this does take a writeback group pointer. If the
+page to be made writable is in a different group, it will be flushed first.
+
+Monolithic Files API
+--------------------
+
+There is also a special API set for files for which the content must be read in
+a single RPC (and not written back) and is maintained as a monolithic blob
+(e.g. an AFS directory), though it can be stored and updated in the local cache::
+
+ ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_iter *iter);
+ void netfs_single_mark_inode_dirty(struct inode *inode);
+ int netfs_writeback_single(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct iov_iter *iter);
+
+The first function reads from a file into the given buffer, reading from the
+cache in preference if the data is cached there; the second function allows the
+inode to be marked dirty, causing a later writeback; and the third function can
+be called from the writeback code to write the data to the cache, if there is
+one.
- void netfs_readahead(struct readahead_control *ractl);
- int netfs_read_folio(struct file *file,
- struct folio *folio);
- int netfs_write_begin(struct netfs_inode *ctx,
- struct file *file,
- struct address_space *mapping,
- loff_t pos,
- unsigned int len,
- struct folio **_folio,
- void **_fsdata);
+The inode should be marked ``NETFS_ICTX_SINGLE_NO_UPLOAD`` if this API is to be
+used. The writeback function requires the buffer to be of ITER_FOLIOQ type.
-Each corresponds to a VM address space operation. These operations use the
-state in the per-inode context.
+High-Level VM API
+==================
-For ->readahead() and ->read_folio(), the network filesystem just point directly
-at the corresponding read helper; whereas for ->write_begin(), it may be a
-little more complicated as the network filesystem might want to flush
-conflicting writes or track dirty data and needs to put the acquired folio if
-an error occurs after calling the helper.
+Netfslib also provides a number of sets of API calls for the filesystem to
+delegate VM operations to. Again, netfslib, in turn, will call out to the
+filesystem and the cache to negotiate I/O sizes, issue RPCs and provide places
+for it to intervene at various times::
-The helpers manage the read request, calling back into the network filesystem
-through the supplied table of operations. Waits will be performed as
-necessary before returning for helpers that are meant to be synchronous.
+ void netfs_readahead(struct readahead_control *);
+ int netfs_read_folio(struct file *, struct folio *);
+ int netfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc);
+ bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
+ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
+ bool netfs_release_folio(struct folio *folio, gfp_t gfp);
-If an error occurs, the ->free_request() will be called to clean up the
-netfs_io_request struct allocated. If some parts of the request are in
-progress when an error occurs, the request will get partially completed if
-sufficient data is read.
+These are ``address_space_operations`` methods and can be set directly in the
+operations table.
-Additionally, there is::
+Deprecated PG_private_2 API
+---------------------------
- * void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
- ssize_t transferred_or_error,
- bool was_async);
+There is also a deprecated function for filesystems that still use the
+``->write_begin`` method::
-which should be called to complete a read subrequest. This is given the number
-of bytes transferred or a negative error code, plus a flag indicating whether
-the operation was asynchronous (ie. whether the follow-on processing can be
-done in the current context, given this may involve sleeping).
+ int netfs_write_begin(struct netfs_inode *inode, struct file *file,
+ struct address_space *mapping, loff_t pos, unsigned int len,
+ struct folio **_folio, void **_fsdata);
+It uses the deprecated PG_private_2 flag and so should not be used.
-Read Helper Structures
-----------------------
-The read helpers make use of a couple of structures to maintain the state of
-the read. The first is a structure that manages a read request as a whole::
+I/O Request API
+===============
+
+The I/O request API comprises a number of structures and a number of functions
+that the filesystem may need to use.
+
+Request Structure
+-----------------
+
+The request structure manages the request as a whole, holding some resources
+and state on behalf of the filesystem and tracking the collection of results::
struct netfs_io_request {
+ enum netfs_io_origin origin;
struct inode *inode;
struct address_space *mapping;
- struct netfs_cache_resources cache_resources;
+ struct netfs_group *group;
+ struct netfs_io_stream io_streams[];
void *netfs_priv;
- loff_t start;
- size_t len;
- loff_t i_size;
- const struct netfs_request_ops *netfs_ops;
+ void *netfs_priv2;
+ unsigned long long start;
+ unsigned long long len;
+ unsigned long long i_size;
unsigned int debug_id;
+ unsigned long flags;
...
};
-The above fields are the ones the netfs can use. They are:
+Many of the fields are for internal use, but the fields shown here are of
+interest to the filesystem:
+
+ * ``origin``
+
+ The origin of the request (readahead, read_folio, DIO read, writeback, ...).
* ``inode``
* ``mapping``
@@ -202,11 +524,19 @@ The above fields are the ones the netfs can use. They are:
The inode and the address space of the file being read from. The mapping
may or may not point to inode->i_data.
- * ``cache_resources``
+ * ``group``
+
+ The writeback group this request is dealing with or NULL. This holds a ref
+ on the group.
+
+ * ``io_streams``
- Resources for the local cache to use, if present.
+ The parallel streams of subrequests available to the request. Currently two
+ are available, but this may be made extensible in future. ``NR_IO_STREAMS``
+ indicates the size of the array.
* ``netfs_priv``
+ * ``netfs_priv2``
The network filesystem's private data. The value for this can be passed in
to the helper functions or set during the request.
@@ -221,37 +551,121 @@ The above fields are the ones the netfs can use. They are:
The size of the file at the start of the request.
- * ``netfs_ops``
-
- A pointer to the operation table. The value for this is passed into the
- helper functions.
-
* ``debug_id``
A number allocated to this operation that can be displayed in trace lines
for reference.
+ * ``flags``
+
+ Flags for managing and controlling the operation of the request. Some of
+ these may be of interest to the filesystem:
+
+ * ``NETFS_RREQ_RETRYING``
+
+ Netfslib sets this when generating retries.
+
+ * ``NETFS_RREQ_PAUSE``
+
+ The filesystem can set this to request to pause the library's subrequest
+ issuing loop - but care needs to be taken as netfslib may also set it.
+
+ * ``NETFS_RREQ_NONBLOCK``
+ * ``NETFS_RREQ_BLOCKED``
+
+ Netfslib sets the first to indicate that non-blocking mode was set by the
+ caller and the filesystem can set the second to indicate that it would
+ have had to block.
+
+ * ``NETFS_RREQ_USE_PGPRIV2``
+
+ The filesystem can set this if it wants to use PG_private_2 to track
+ whether a folio is being written to the cache. This is deprecated as
+ PG_private_2 is going to go away.
+
+If the filesystem wants more private data than is afforded by this structure,
+then it should wrap it and provide its own allocator.
+
+Stream Structure
+----------------
+
+A request is comprised of one or more parallel streams and each stream may be
+aimed at a different target.
+
+For read requests, only stream 0 is used. This can contain a mixture of
+subrequests aimed at different sources. For write requests, stream 0 is used
+for the server and stream 1 is used for the cache. For buffered writeback,
+stream 0 is not enabled unless a normal dirty folio is encountered, at which
+point ->begin_writeback() will be invoked and the filesystem can mark the
+stream available.
+
+The stream struct looks like::
+
+ struct netfs_io_stream {
+ unsigned char stream_nr;
+ bool avail;
+ size_t sreq_max_len;
+ unsigned int sreq_max_segs;
+ unsigned int submit_extendable_to;
+ ...
+ };
+
+A number of members are available for access/use by the filesystem:
+
+ * ``stream_nr``
+
+ The number of the stream within the request.
+
+ * ``avail``
+
+ True if the stream is available for use. The filesystem should set this on
+ stream zero if in ->begin_writeback().
+
+ * ``sreq_max_len``
+ * ``sreq_max_segs``
+
+ These are set by the filesystem or the cache in ->prepare_read() or
+ ->prepare_write() for each subrequest to indicate the maximum number of
+ bytes and, optionally, the maximum number of segments (if not 0) that that
+ subrequest can support.
+
+ * ``submit_extendable_to``
-The second structure is used to manage individual slices of the overall read
-request::
+ The size that a subrequest can be rounded up to beyond the EOF, given the
+ available buffer. This allows the cache to work out if it can do a DIO read
+ or write that straddles the EOF marker.
+
+Subrequest Structure
+--------------------
+
+Individual units of I/O are managed by the subrequest structure. These
+represent slices of the overall request and run independently::
struct netfs_io_subrequest {
struct netfs_io_request *rreq;
- loff_t start;
+ struct iov_iter io_iter;
+ unsigned long long start;
size_t len;
size_t transferred;
unsigned long flags;
+ short error;
unsigned short debug_index;
+ unsigned char stream_nr;
...
};
-Each subrequest is expected to access a single source, though the helpers will
+Each subrequest is expected to access a single source, though the library will
handle falling back from one source type to another. The members are:
* ``rreq``
A pointer to the read request.
+ * ``io_iter``
+
+ An I/O iterator representing a slice of the buffer to be read into or
+ written from.
+
* ``start``
* ``len``
@@ -260,241 +674,300 @@ handle falling back from one source type to another. The members are:
* ``transferred``
- The amount of data transferred so far of the length of this slice. The
- network filesystem or cache should start the operation this far into the
- slice. If a short read occurs, the helpers will call again, having updated
- this to reflect the amount read so far.
+ The amount of data transferred so far for this subrequest. This should be
+ added to with the length of the transfer made by this issuance of the
+ subrequest. If this is less than ``len`` then the subrequest may be
+ reissued to continue.
* ``flags``
- Flags pertaining to the read. There are two of interest to the filesystem
- or cache:
+ Flags for managing the subrequest. There are a number of interest to the
+ filesystem or cache:
+
+ * ``NETFS_SREQ_MADE_PROGRESS``
+
+ Set by the filesystem to indicates that at least one byte of data was read
+ or written.
+
+ * ``NETFS_SREQ_HIT_EOF``
+
+ The filesystem should set this if a read hit the EOF on the file (in which
+ case ``transferred`` should stop at the EOF). Netfslib may expand the
+ subrequest out to the size of the folio containing the EOF on the off
+ chance that a third party change happened or a DIO read may have asked for
+ more than is available. The library will clear any excess pagecache.
* ``NETFS_SREQ_CLEAR_TAIL``
- This can be set to indicate that the remainder of the slice, from
- transferred to len, should be cleared.
+ The filesystem can set this to indicate that the remainder of the slice,
+ from transferred to len, should be cleared. Do not set if HIT_EOF is set.
+
+ * ``NETFS_SREQ_NEED_RETRY``
+
+ The filesystem can set this to tell netfslib to retry the subrequest.
+
+ * ``NETFS_SREQ_BOUNDARY``
+
+ This can be set by the filesystem on a subrequest to indicate that it ends
+ at a boundary with the filesystem structure (e.g. at the end of a Ceph
+ object). It tells netfslib not to retile subrequests across it.
* ``NETFS_SREQ_SEEK_DATA_READ``
- This is a hint to the cache that it might want to try skipping ahead to
- the next data (ie. using SEEK_DATA).
+ This is a hint from netfslib to the cache that it might want to try
+ skipping ahead to the next data (ie. using SEEK_DATA).
+
+ * ``error``
+
+ This is for the filesystem to store result of the subrequest. It should be
+ set to 0 if successful and a negative error code otherwise.
* ``debug_index``
+ * ``stream_nr``
A number allocated to this slice that can be displayed in trace lines for
- reference.
+ reference and the number of the request stream that it belongs to.
+If necessary, the filesystem can get and put extra refs on the subrequest it is
+given::
-Read Helper Operations
-----------------------
+ void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
+ enum netfs_sreq_ref_trace what);
+ void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
+ enum netfs_sreq_ref_trace what);
-The network filesystem must provide the read helpers with a table of operations
-through which it can issue requests and negotiate::
+using netfs trace codes to indicate the reason. Care must be taken, however,
+as once control of the subrequest is returned to netfslib, the same subrequest
+can be reissued/retried.
+
+Filesystem Methods
+------------------
+
+The filesystem sets a table of operations in ``netfs_inode`` for netfslib to
+use::
struct netfs_request_ops {
- void (*init_request)(struct netfs_io_request *rreq, struct file *file);
+ mempool_t *request_pool;
+ mempool_t *subrequest_pool;
+ int (*init_request)(struct netfs_io_request *rreq, struct file *file);
void (*free_request)(struct netfs_io_request *rreq);
+ void (*free_subrequest)(struct netfs_io_subrequest *rreq);
void (*expand_readahead)(struct netfs_io_request *rreq);
- bool (*clamp_length)(struct netfs_io_subrequest *subreq);
+ int (*prepare_read)(struct netfs_io_subrequest *subreq);
void (*issue_read)(struct netfs_io_subrequest *subreq);
- bool (*is_still_valid)(struct netfs_io_request *rreq);
- int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
- struct folio **foliop, void **_fsdata);
void (*done)(struct netfs_io_request *rreq);
+ void (*update_i_size)(struct inode *inode, loff_t i_size);
+ void (*post_modify)(struct inode *inode);
+ void (*begin_writeback)(struct netfs_io_request *wreq);
+ void (*prepare_write)(struct netfs_io_subrequest *subreq);
+ void (*issue_write)(struct netfs_io_subrequest *subreq);
+ void (*retry_request)(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream);
+ void (*invalidate_cache)(struct netfs_io_request *wreq);
};
-The operations are as follows:
-
- * ``init_request()``
+The table starts with a pair of optional pointers to memory pools from which
+requests and subrequests can be allocated. If these are not given, netfslib
+has default pools that it will use instead. If the filesystem wraps the netfs
+structs in its own larger structs, then it will need to use its own pools.
+Netfslib will allocate directly from the pools.
- [Optional] This is called to initialise the request structure. It is given
- the file for reference.
+The methods defined in the table are:
+ * ``init_request()``
* ``free_request()``
+ * ``free_subrequest()``
- [Optional] This is called as the request is being deallocated so that the
- filesystem can clean up any state it has attached there.
+ [Optional] A filesystem may implement these to initialise or clean up any
+ resources that it attaches to the request or subrequest.
* ``expand_readahead()``
[Optional] This is called to allow the filesystem to expand the size of a
- readahead read request. The filesystem gets to expand the request in both
- directions, though it's not permitted to reduce it as the numbers may
- represent an allocation already made. If local caching is enabled, it gets
- to expand the request first.
+ readahead request. The filesystem gets to expand the request in both
+ directions, though it must retain the initial region as that may represent
+ an allocation already made. If local caching is enabled, it gets to expand
+ the request first.
Expansion is communicated by changing ->start and ->len in the request
structure. Note that if any change is made, ->len must be increased by at
least as much as ->start is reduced.
- * ``clamp_length()``
-
- [Optional] This is called to allow the filesystem to reduce the size of a
- subrequest. The filesystem can use this, for example, to chop up a request
- that has to be split across multiple servers or to put multiple reads in
- flight.
-
- This should return 0 on success and an error code on error.
-
- * ``issue_read()``
+ * ``prepare_read()``
- [Required] The helpers use this to dispatch a subrequest to the server for
- reading. In the subrequest, ->start, ->len and ->transferred indicate what
- data should be read from the server.
+ [Optional] This is called to allow the filesystem to limit the size of a
+ subrequest. It may also limit the number of individual regions in iterator,
+ such as required by RDMA. This information should be set on stream zero in::
- There is no return value; the netfs_subreq_terminated() function should be
- called to indicate whether or not the operation succeeded and how much data
- it transferred. The filesystem also should not deal with setting folios
- uptodate, unlocking them or dropping their refs - the helpers need to deal
- with this as they have to coordinate with copying to the local cache.
+ rreq->io_streams[0].sreq_max_len
+ rreq->io_streams[0].sreq_max_segs
- Note that the helpers have the folios locked, but not pinned. It is
- possible to use the ITER_XARRAY iov iterator to refer to the range of the
- inode that is being operated upon without the need to allocate large bvec
- tables.
+ The filesystem can use this, for example, to chop up a request that has to
+ be split across multiple servers or to put multiple reads in flight.
- * ``is_still_valid()``
+ Zero should be returned on success and an error code otherwise.
- [Optional] This is called to find out if the data just read from the local
- cache is still valid. It should return true if it is still valid and false
- if not. If it's not still valid, it will be reread from the server.
+ * ``issue_read()``
- * ``check_write_begin()``
+ [Required] Netfslib calls this to dispatch a subrequest to the server for
+ reading. In the subrequest, ->start, ->len and ->transferred indicate what
+ data should be read from the server and ->io_iter indicates the buffer to be
+ used.
- [Optional] This is called from the netfs_write_begin() helper once it has
- allocated/grabbed the folio to be modified to allow the filesystem to flush
- conflicting state before allowing it to be modified.
+ There is no return value; the ``netfs_read_subreq_terminated()`` function
+ should be called to indicate that the subrequest completed either way.
+ ->error, ->transferred and ->flags should be updated before completing. The
+ termination can be done asynchronously.
- It may unlock and discard the folio it was given and set the caller's folio
- pointer to NULL. It should return 0 if everything is now fine (``*foliop``
- left set) or the op should be retried (``*foliop`` cleared) and any other
- error code to abort the operation.
+ Note: the filesystem must not deal with setting folios uptodate, unlocking
+ them or dropping their refs - the library deals with this as it may have to
+ stitch together the results of multiple subrequests that variously overlap
+ the set of folios.
- * ``done``
+ * ``done()``
- [Optional] This is called after the folios in the request have all been
+ [Optional] This is called after the folios in a read request have all been
unlocked (and marked uptodate if applicable).
+ * ``update_i_size()``
+
+ [Optional] This is invoked by netfslib at various points during the write
+ paths to ask the filesystem to update its idea of the file size. If not
+ given, netfslib will set i_size and i_blocks and update the local cache
+ cookie.
+
+ * ``post_modify()``
+
+ [Optional] This is called after netfslib writes to the pagecache or when it
+ allows an mmap'd page to be marked as writable.
+
+ * ``begin_writeback()``
+
+ [Optional] Netfslib calls this when processing a writeback request if it
+ finds a dirty page that isn't simply marked NETFS_FOLIO_COPY_TO_CACHE,
+ indicating it must be written to the server. This allows the filesystem to
+ only set up writeback resources when it knows it's going to have to perform
+ a write.
+
+ * ``prepare_write()``
+ [Optional] This is called to allow the filesystem to limit the size of a
+ subrequest. It may also limit the number of individual regions in iterator,
+ such as required by RDMA. This information should be set on stream to which
+ the subrequest belongs::
-Read Helper Procedure
----------------------
-
-The read helpers work by the following general procedure:
-
- * Set up the request.
-
- * For readahead, allow the local cache and then the network filesystem to
- propose expansions to the read request. This is then proposed to the VM.
- If the VM cannot fully perform the expansion, a partially expanded read will
- be performed, though this may not get written to the cache in its entirety.
-
- * Loop around slicing chunks off of the request to form subrequests:
-
- * If a local cache is present, it gets to do the slicing, otherwise the
- helpers just try to generate maximal slices.
-
- * The network filesystem gets to clamp the size of each slice if it is to be
- the source. This allows rsize and chunking to be implemented.
+ rreq->io_streams[subreq->stream_nr].sreq_max_len
+ rreq->io_streams[subreq->stream_nr].sreq_max_segs
- * The helpers issue a read from the cache or a read from the server or just
- clears the slice as appropriate.
+ The filesystem can use this, for example, to chop up a request that has to
+ be split across multiple servers or to put multiple writes in flight.
- * The next slice begins at the end of the last one.
+ This is not permitted to return an error. Instead, in the event of failure,
+ ``netfs_prepare_write_failed()`` must be called.
- * As slices finish being read, they terminate.
+ * ``issue_write()``
- * When all the subrequests have terminated, the subrequests are assessed and
- any that are short or have failed are reissued:
+ [Required] This is used to dispatch a subrequest to the server for writing.
+ In the subrequest, ->start, ->len and ->transferred indicate what data
+ should be written to the server and ->io_iter indicates the buffer to be
+ used.
- * Failed cache requests are issued against the server instead.
+ There is no return value; the ``netfs_write_subreq_terminated()`` function
+ should be called to indicate that the subrequest completed either way.
+ ->error, ->transferred and ->flags should be updated before completing. The
+ termination can be done asynchronously.
- * Failed server requests just fail.
+ Note: the filesystem must not deal with removing the dirty or writeback
+ marks on folios involved in the operation and should not take refs or pins
+ on them, but should leave retention to netfslib.
- * Short reads against either source will be reissued against that source
- provided they have transferred some more data:
+ * ``retry_request()``
- * The cache may need to skip holes that it can't do DIO from.
+ [Optional] Netfslib calls this at the beginning of a retry cycle. This
+ allows the filesystem to examine the state of the request, the subrequests
+ in the indicated stream and of its own data and make adjustments or
+ renegotiate resources.
+
+ * ``invalidate_cache()``
- * If NETFS_SREQ_CLEAR_TAIL was set, a short read will be cleared to the
- end of the slice instead of reissuing.
+ [Optional] This is called by netfslib to invalidate data stored in the local
+ cache in the event that writing to the local cache fails, providing updated
+ coherency data that netfs can't provide.
- * Once the data is read, the folios that have been fully read/cleared:
+Terminating a subrequest
+------------------------
- * Will be marked uptodate.
+When a subrequest completes, there are a number of functions that the cache or
+subrequest can call to inform netfslib of the status change. One function is
+provided to terminate a write subrequest at the preparation stage and acts
+synchronously:
- * If a cache is present, will be marked with PG_fscache.
+ * ``void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq);``
- * Unlocked
+ Indicate that the ->prepare_write() call failed. The ``error`` field should
+ have been updated.
- * Any folios that need writing to the cache will then have DIO writes issued.
+Note that ->prepare_read() can return an error as a read can simply be aborted.
+Dealing with writeback failure is trickier.
- * Synchronous operations will wait for reading to be complete.
+The other functions are used for subrequests that got as far as being issued:
- * Writes to the cache will proceed asynchronously and the folios will have the
- PG_fscache mark removed when that completes.
+ * ``void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq);``
- * The request structures will be cleaned up when everything has completed.
+ Tell netfslib that a read subrequest has terminated. The ``error``,
+ ``flags`` and ``transferred`` fields should have been updated.
+ * ``void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error);``
-Read Helper Cache API
----------------------
+ Tell netfslib that a write subrequest has terminated. Either the amount of
+ data processed or the negative error code can be passed in. This is
+ can be used as a kiocb completion function.
-When implementing a local cache to be used by the read helpers, two things are
-required: some way for the network filesystem to initialise the caching for a
-read request and a table of operations for the helpers to call.
+ * ``void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq);``
-To begin a cache operation on an fscache object, the following function is
-called::
+ This is provided to optionally update netfslib on the incremental progress
+ of a read, allowing some folios to be unlocked early and does not actually
+ terminate the subrequest. The ``transferred`` field should have been
+ updated.
- int fscache_begin_read_operation(struct netfs_io_request *rreq,
- struct fscache_cookie *cookie);
+Local Cache API
+---------------
-passing in the request pointer and the cookie corresponding to the file. This
-fills in the cache resources mentioned below.
+Netfslib provides a separate API for a local cache to implement, though it
+provides some somewhat similar routines to the filesystem request API.
-The netfs_io_request object contains a place for the cache to hang its
+Firstly, the netfs_io_request object contains a place for the cache to hang its
state::
struct netfs_cache_resources {
const struct netfs_cache_ops *ops;
void *cache_priv;
void *cache_priv2;
+ unsigned int debug_id;
+ unsigned int inval_counter;
};
-This contains an operations table pointer and two private pointers. The
-operation table looks like the following::
+This contains an operations table pointer and two private pointers plus the
+debug ID of the fscache cookie for tracing purposes and an invalidation counter
+that is cranked by calls to ``fscache_invalidate()`` allowing cache subrequests
+to be invalidated after completion.
+
+The cache operation table looks like the following::
struct netfs_cache_ops {
void (*end_operation)(struct netfs_cache_resources *cres);
-
void (*expand_readahead)(struct netfs_cache_resources *cres,
loff_t *_start, size_t *_len, loff_t i_size);
-
enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq,
- loff_t i_size);
-
+ loff_t i_size);
int (*read)(struct netfs_cache_resources *cres,
loff_t start_pos,
struct iov_iter *iter,
bool seek_data,
netfs_io_terminated_t term_func,
void *term_func_priv);
-
- int (*prepare_write)(struct netfs_cache_resources *cres,
- loff_t *_start, size_t *_len, loff_t i_size,
- bool no_space_allocated_yet);
-
- int (*write)(struct netfs_cache_resources *cres,
- loff_t start_pos,
- struct iov_iter *iter,
- netfs_io_terminated_t term_func,
- void *term_func_priv);
-
- int (*query_occupancy)(struct netfs_cache_resources *cres,
- loff_t start, size_t len, size_t granularity,
- loff_t *_data_start, size_t *_data_len);
+ void (*prepare_write_subreq)(struct netfs_io_subrequest *subreq);
+ void (*issue_write)(struct netfs_io_subrequest *subreq);
};
With a termination handler function pointer::
@@ -511,10 +984,16 @@ The methods defined in the table are:
* ``expand_readahead()``
- [Optional] Called at the beginning of a netfs_readahead() operation to allow
- the cache to expand a request in either direction. This allows the cache to
+ [Optional] Called at the beginning of a readahead operation to allow the
+ cache to expand a request in either direction. This allows the cache to
size the request appropriately for the cache granularity.
+ * ``prepare_read()``
+
+ [Required] Called to configure the next slice of a request. ->start and
+ ->len in the subrequest indicate where and how big the next slice can be;
+ the cache gets to reduce the length to match its granularity requirements.
+
The function is passed pointers to the start and length in its parameters,
plus the size of the file for reference, and adjusts the start and length
appropriately. It should return one of:
@@ -528,12 +1007,6 @@ The methods defined in the table are:
downloaded from the server or read from the cache - or whether slicing
should be given up at the current point.
- * ``prepare_read()``
-
- [Required] Called to configure the next slice of a request. ->start and
- ->len in the subrequest indicate where and how big the next slice can be;
- the cache gets to reduce the length to match its granularity requirements.
-
* ``read()``
[Required] Called to read from the cache. The start file offset is given
@@ -547,44 +1020,33 @@ The methods defined in the table are:
indicating whether the termination is definitely happening in the caller's
context.
- * ``prepare_write()``
+ * ``prepare_write_subreq()``
- [Required] Called to prepare a write to the cache to take place. This
- involves checking to see whether the cache has sufficient space to honour
- the write. ``*_start`` and ``*_len`` indicate the region to be written; the
- region can be shrunk or it can be expanded to a page boundary either way as
- necessary to align for direct I/O. i_size holds the size of the object and
- is provided for reference. no_space_allocated_yet is set to true if the
- caller is certain that no data has been written to that region - for example
- if it tried to do a read from there already.
+ [Required] This is called to allow the cache to limit the size of a
+ subrequest. It may also limit the number of individual regions in iterator,
+ such as required by DIO/DMA. This information should be set on stream to
+ which the subrequest belongs::
- * ``write()``
+ rreq->io_streams[subreq->stream_nr].sreq_max_len
+ rreq->io_streams[subreq->stream_nr].sreq_max_segs
- [Required] Called to write to the cache. The start file offset is given
- along with an iterator to write from, which gives the length also.
-
- Also provided is a pointer to a termination handler function and private
- data to pass to that function. The termination function should be called
- with the number of bytes transferred or an error code, plus a flag
- indicating whether the termination is definitely happening in the caller's
- context.
+ The filesystem can use this, for example, to chop up a request that has to
+ be split across multiple servers or to put multiple writes in flight.
- * ``query_occupancy()``
+ This is not permitted to return an error. In the event of failure,
+ ``netfs_prepare_write_failed()`` must be called.
- [Required] Called to find out where the next piece of data is within a
- particular region of the cache. The start and length of the region to be
- queried are passed in, along with the granularity to which the answer needs
- to be aligned. The function passes back the start and length of the data,
- if any, available within that region. Note that there may be a hole at the
- front.
+ * ``issue_write()``
- It returns 0 if some data was found, -ENODATA if there was no usable data
- within the region or -ENOBUFS if there is no caching on this file.
+ [Required] This is used to dispatch a subrequest to the cache for writing.
+ In the subrequest, ->start, ->len and ->transferred indicate what data
+ should be written to the cache and ->io_iter indicates the buffer to be
+ used.
-Note that these methods are passed a pointer to the cache resource structure,
-not the read request structure as they could be used in other situations where
-there isn't a read request structure as well, such as writing dirty data to the
-cache.
+ There is no return value; the ``netfs_write_subreq_terminated()`` function
+ should be called to indicate that the subrequest completed either way.
+ ->error, ->transferred and ->flags should be updated before completing. The
+ termination can be done asynchronously.
API Function Reference
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 767b2927c762..3111ef5592f3 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1203,3 +1203,43 @@ should use d_drop();d_splice_alias() and return the result of the latter.
If a positive dentry cannot be returned for some reason, in-kernel
clients such as cachefiles, nfsd, smb/server may not perform ideally but
will fail-safe.
+
+---
+
+** mandatory**
+
+lookup_one(), lookup_one_unlocked(), lookup_one_positive_unlocked() now
+take a qstr instead of a name and len. These, not the "one_len"
+versions, should be used whenever accessing a filesystem from outside
+that filesysmtem, through a mount point - which will have a mnt_idmap.
+
+---
+
+** mandatory**
+
+Functions try_lookup_one_len(), lookup_one_len(),
+lookup_one_len_unlocked() and lookup_positive_unlocked() have been
+renamed to try_lookup_noperm(), lookup_noperm(),
+lookup_noperm_unlocked(), lookup_noperm_positive_unlocked(). They now
+take a qstr instead of separate name and length. QSTR() can be used
+when strlen() is needed for the length.
+
+For try_lookup_noperm() a reference to the qstr is passed in case the
+hash might subsequently be needed.
+
+These function no longer do any permission checking - they previously
+checked that the caller has 'X' permission on the parent. They must
+ONLY be used internally by a filesystem on itself when it knows that
+permissions are irrelevant or in a context where permission checks have
+already been performed such as after vfs_path_parent_lookup()
+
+---
+
+** mandatory**
+
+d_hash_and_lookup() is no longer exported or available outside the VFS.
+Use try_lookup_noperm() instead. This adds name validation and takes
+arguments in the opposite order but is otherwise identical.
+
+Using try_lookup_noperm() will require linux/namei.h to be included.
+
diff --git a/Documentation/filesystems/relay.rst b/Documentation/filesystems/relay.rst
index 04ad083cfe62..46447dbc75ad 100644
--- a/Documentation/filesystems/relay.rst
+++ b/Documentation/filesystems/relay.rst
@@ -32,7 +32,7 @@ functions in the relay interface code - please see that for details.
Semantics
=========
-Each relay channel has one buffer per CPU, each buffer has one or more
+Each relay channel has one buffer per CPU; each buffer has one or more
sub-buffers. Messages are written to the first sub-buffer until it is
too full to contain a new message, in which case it is written to
the next (if available). Messages are never split across sub-buffers.
@@ -40,7 +40,7 @@ At this point, userspace can be notified so it empties the first
sub-buffer, while the kernel continues writing to the next.
When notified that a sub-buffer is full, the kernel knows how many
-bytes of it are padding i.e. unused space occurring because a complete
+bytes of it are padding, i.e., unused space occurring because a complete
message couldn't fit into a sub-buffer. Userspace can use this
knowledge to copy only valid data.
@@ -71,7 +71,7 @@ klog and relay-apps example code
================================
The relay interface itself is ready to use, but to make things easier,
-a couple simple utility functions and a set of examples are provided.
+a couple of simple utility functions and a set of examples are provided.
The relay-apps example tarball, available on the relay sourceforge
site, contains a set of self-contained examples, each consisting of a
@@ -91,7 +91,7 @@ registered will data actually be logged (see the klog and kleak
examples for details).
It is of course possible to use the relay interface from scratch,
-i.e. without using any of the relay-apps example code or klog, but
+i.e., without using any of the relay-apps example code or klog, but
you'll have to implement communication between userspace and kernel,
allowing both to convey the state of buffers (full, empty, amount of
padding). The read() interface both removes padding and internally
@@ -119,7 +119,7 @@ mmap() results in channel buffer being mapped into the caller's
must map the entire file, which is NRBUF * SUBBUFSIZE.
read() read the contents of a channel buffer. The bytes read are
- 'consumed' by the reader, i.e. they won't be available
+ 'consumed' by the reader, i.e., they won't be available
again to subsequent reads. If the channel is being used
in no-overwrite mode (the default), it can be read at any
time even if there's an active kernel writer. If the
@@ -138,7 +138,7 @@ poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are
notified when sub-buffer boundaries are crossed.
close() decrements the channel buffer's refcount. When the refcount
- reaches 0, i.e. when no process or kernel client has the
+ reaches 0, i.e., when no process or kernel client has the
buffer open, the channel buffer is freed.
=========== ============================================================
@@ -149,7 +149,7 @@ host filesystem must be mounted. For example::
.. Note::
- the host filesystem doesn't need to be mounted for kernel
+ The host filesystem doesn't need to be mounted for kernel
clients to create or use channels - it only needs to be
mounted when user space applications need access to the buffer
data.
@@ -325,7 +325,7 @@ section, as it pertains mainly to mmap() implementations.
In 'overwrite' mode, also known as 'flight recorder' mode, writes
continuously cycle around the buffer and will never fail, but will
unconditionally overwrite old data regardless of whether it's actually
-been consumed. In no-overwrite mode, writes will fail, i.e. data will
+been consumed. In no-overwrite mode, writes will fail, i.e., data will
be lost, if the number of unconsumed sub-buffers equals the total
number of sub-buffers in the channel. It should be clear that if
there is no consumer or if the consumer can't consume sub-buffers fast
@@ -344,7 +344,7 @@ initialize the next sub-buffer if appropriate 2) finalize the previous
sub-buffer if appropriate and 3) return a boolean value indicating
whether or not to actually move on to the next sub-buffer.
-To implement 'no-overwrite' mode, the userspace client would provide
+To implement 'no-overwrite' mode, the userspace client provides
an implementation of the subbuf_start() callback something like the
following::
@@ -364,9 +364,9 @@ following::
return 1;
}
-If the current buffer is full, i.e. all sub-buffers remain unconsumed,
+If the current buffer is full, i.e., all sub-buffers remain unconsumed,
the callback returns 0 to indicate that the buffer switch should not
-occur yet, i.e. until the consumer has had a chance to read the
+occur yet, i.e., until the consumer has had a chance to read the
current set of ready sub-buffers. For the relay_buf_full() function
to make sense, the consumer is responsible for notifying the relay
interface when sub-buffers have been consumed via
@@ -400,7 +400,7 @@ consulted.
The default subbuf_start() implementation, used if the client doesn't
define any callbacks, or doesn't define the subbuf_start() callback,
-implements the simplest possible 'no-overwrite' mode, i.e. it does
+implements the simplest possible 'no-overwrite' mode, i.e., it does
nothing but return 0.
Header information can be reserved at the beginning of each sub-buffer
@@ -467,7 +467,7 @@ rather than open and close a new channel for each use. relay_reset()
can be used for this purpose - it resets a channel to its initial
state without reallocating channel buffer memory or destroying
existing mappings. It should however only be called when it's safe to
-do so, i.e. when the channel isn't currently being written to.
+do so, i.e., when the channel isn't currently being written to.
Finally, there are a couple of utility callbacks that can be used for
different purposes. buf_mapped() is called whenever a channel buffer
diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/filesystems/resctrl.rst
index 6768fc1fad16..c7949dd44f2f 100644
--- a/Documentation/arch/x86/resctrl.rst
+++ b/Documentation/filesystems/resctrl.rst
@@ -1,9 +1,9 @@
.. SPDX-License-Identifier: GPL-2.0
.. include:: <isonum.txt>
-===========================================
-User Interface for Resource Control feature
-===========================================
+=====================================================
+User Interface for Resource Control feature (resctrl)
+=====================================================
:Copyright: |copy| 2016 Intel Corporation
:Authors: - Fenghua Yu <fenghua.yu@intel.com>
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index ae79c30b6c0c..bf051c7da6b8 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -716,9 +716,8 @@ page lookup by address, and keeping track of pages tagged as Dirty or
Writeback.
The first can be used independently to the others. The VM can try to
-either write dirty pages in order to clean them, or release clean pages
-in order to reuse them. To do this it can call the ->writepage method
-on dirty pages, and ->release_folio on clean folios with the private
+release clean pages in order to reuse them. To do this it can call
+->release_folio on clean folios with the private
flag set. Clean pages without PagePrivate and with no external references
will be released without notice being given to the address_space.
@@ -731,8 +730,8 @@ maintains information about the PG_Dirty and PG_Writeback status of each
page, so that pages with either of these flags can be found quickly.
The Dirty tag is primarily used by mpage_writepages - the default
-->writepages method. It uses the tag to find dirty pages to call
-->writepage on. If mpage_writepages is not used (i.e. the address
+->writepages method. It uses the tag to find dirty pages to
+write back. If mpage_writepages is not used (i.e. the address
provides its own ->writepages) , the PAGECACHE_TAG_DIRTY tag is almost
unused. write_inode_now and sync_inode do use it (through
__sync_single_inode) to check if ->writepages has been successful in
@@ -756,23 +755,23 @@ pages, however the address_space has finer control of write sizes.
The read process essentially only requires 'read_folio'. The write
process is more complicated and uses write_begin/write_end or
-dirty_folio to write data into the address_space, and writepage and
+dirty_folio to write data into the address_space, and
writepages to writeback data to storage.
Adding and removing pages to/from an address_space is protected by the
inode's i_mutex.
When data is written to a page, the PG_Dirty flag should be set. It
-typically remains set until writepage asks for it to be written. This
+typically remains set until writepages asks for it to be written. This
should clear PG_Dirty and set PG_Writeback. It can be actually written
at any point after PG_Dirty is clear. Once it is known to be safe,
PG_Writeback is cleared.
Writeback makes use of a writeback_control structure to direct the
-operations. This gives the writepage and writepages operations some
+operations. This gives the writepages operation some
information about the nature of and reason for the writeback request,
and the constraints under which it is being done. It is also used to
-return information back to the caller about the result of a writepage or
+return information back to the caller about the result of a
writepages request.
@@ -819,7 +818,6 @@ cache in your filesystem. The following members are defined:
.. code-block:: c
struct address_space_operations {
- int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*read_folio)(struct file *, struct folio *);
int (*writepages)(struct address_space *, struct writeback_control *);
bool (*dirty_folio)(struct address_space *, struct folio *);
@@ -848,25 +846,6 @@ cache in your filesystem. The following members are defined:
int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
};
-``writepage``
- called by the VM to write a dirty page to backing store. This
- may happen for data integrity reasons (i.e. 'sync'), or to free
- up memory (flush). The difference can be seen in
- wbc->sync_mode. The PG_Dirty flag has been cleared and
- PageLocked is true. writepage should start writeout, should set
- PG_Writeback, and should make sure the page is unlocked, either
- synchronously or asynchronously when the write operation
- completes.
-
- If wbc->sync_mode is WB_SYNC_NONE, ->writepage doesn't have to
- try too hard if there are problems, and may choose to write out
- other pages from the mapping if that is easier (e.g. due to
- internal dependencies). If it chooses not to start writeout, it
- should return AOP_WRITEPAGE_ACTIVATE so that the VM will not
- keep calling ->writepage on that page.
-
- See the file "Locking" for more details.
-
``read_folio``
Called by the page cache to read a folio from the backing store.
The 'file' argument supplies authentication information to network
@@ -909,7 +888,7 @@ cache in your filesystem. The following members are defined:
given and that many pages should be written if possible. If no
->writepages is given, then mpage_writepages is used instead.
This will choose pages from the address space that are tagged as
- DIRTY and will pass them to ->writepage.
+ DIRTY and will write them back.
``dirty_folio``
called by the VM to mark a folio as dirty. This is particularly
diff --git a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
index 8d8b53e96bcf..ccb4b153e6f2 100644
--- a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
+++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
@@ -12,11 +12,14 @@ ACPI in general allows referring to device objects in the tree only.
Hierarchical data extension nodes may not be referred to directly, hence this
document defines a scheme to implement such references.
-A reference consist of the device object name followed by one or more
-hierarchical data extension [dsd-guide] keys. Specifically, the hierarchical
-data extension node which is referred to by the key shall lie directly under
-the parent object i.e. either the device object or another hierarchical data
-extension node.
+A reference to a _DSD hierarchical data node is a string consisting of a
+device object reference followed by a dot (".") and a relative path to a data
+node object. Do not use non-string references as this will produce a copy of
+the hierarchical data node, not a reference!
+
+The hierarchical data extension node which is referred to shall be located
+directly under its parent object i.e. either the device object or another
+hierarchical data extension node [dsd-guide].
The keys in the hierarchical data nodes shall consist of the name of the node,
"@" character and the number of the node in hexadecimal notation (without pre-
@@ -33,11 +36,9 @@ extension key.
Example
=======
-In the ASL snippet below, the "reference" _DSD property contains a
-device object reference to DEV0 and under that device object, a
-hierarchical data extension key "node@1" referring to the NOD1 object
-and lastly, a hierarchical data extension key "anothernode" referring to
-the ANOD object which is also the final target node of the reference.
+In the ASL snippet below, the "reference" _DSD property contains a string
+reference to a hierarchical data extension node ANOD under DEV0 under the parent
+of DEV1. ANOD is also the final target node of the reference.
::
Device (DEV0)
@@ -76,10 +77,7 @@ the ANOD object which is also the final target node of the reference.
Name (_DSD, Package () {
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
Package () {
- Package () {
- "reference", Package () {
- ^DEV0, "node@1", "anothernode"
- }
+ Package () { "reference", "^DEV0.ANOD" }
},
}
})
diff --git a/Documentation/firmware-guide/acpi/dsd/graph.rst b/Documentation/firmware-guide/acpi/dsd/graph.rst
index b9dbfc73ed25..d6ae5ffa748c 100644
--- a/Documentation/firmware-guide/acpi/dsd/graph.rst
+++ b/Documentation/firmware-guide/acpi/dsd/graph.rst
@@ -66,12 +66,9 @@ of that port shall be zero. Similarly, if a port may only have a single
endpoint, the number of that endpoint shall be zero.
The endpoint reference uses property extension with "remote-endpoint" property
-name followed by a reference in the same package. Such references consist of
-the remote device reference, the first package entry of the port data extension
-reference under the device and finally the first package entry of the endpoint
-data extension reference under the port. Individual references thus appear as::
+name followed by a string reference in the same package. [data-node-ref]::
- Package() { device, "port@X", "endpoint@Y" }
+ "device.datanode"
In the above example, "X" is the number of the port and "Y" is the number of
the endpoint.
@@ -109,7 +106,7 @@ A simple example of this is show below::
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
Package () {
Package () { "reg", 0 },
- Package () { "remote-endpoint", Package() { \_SB.PCI0.ISP, "port@4", "endpoint@0" } },
+ Package () { "remote-endpoint", "\\_SB.PCI0.ISP.EP40" },
}
})
}
@@ -141,7 +138,7 @@ A simple example of this is show below::
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
Package () {
Package () { "reg", 0 },
- Package () { "remote-endpoint", Package () { \_SB.PCI0.I2C2.CAM0, "port@0", "endpoint@0" } },
+ Package () { "remote-endpoint", "\\_SB.PCI0.I2C2.CAM0.EP00" },
}
})
}
diff --git a/Documentation/firmware-guide/acpi/dsd/leds.rst b/Documentation/firmware-guide/acpi/dsd/leds.rst
index 93db592c93c7..a97cd07d49be 100644
--- a/Documentation/firmware-guide/acpi/dsd/leds.rst
+++ b/Documentation/firmware-guide/acpi/dsd/leds.rst
@@ -15,11 +15,6 @@ Referring to LEDs in Device tree is documented in [video-interfaces], in
"flash-leds" property documentation. In short, LEDs are directly referred to by
using phandles.
-While Device tree allows referring to any node in the tree [devicetree], in
-ACPI references are limited to device nodes only [acpi]. For this reason using
-the same mechanism on ACPI is not possible. A mechanism to refer to non-device
-ACPI nodes is documented in [data-node-ref].
-
ACPI allows (as does DT) using integer arguments after the reference. A
combination of the LED driver device reference and an integer argument,
referring to the "reg" property of the relevant LED, is used to identify
@@ -74,7 +69,7 @@ omitted. ::
Package () {
Package () {
"flash-leds",
- Package () { ^LED, "led@0", ^LED, "led@1" },
+ Package () { "^LED.LED0", "^LED.LED1" },
}
}
})
diff --git a/Documentation/gpu/rfc/i915_scheduler.rst b/Documentation/gpu/rfc/i915_scheduler.rst
index c237ebc024cd..2974525f0ac5 100644
--- a/Documentation/gpu/rfc/i915_scheduler.rst
+++ b/Documentation/gpu/rfc/i915_scheduler.rst
@@ -26,7 +26,7 @@ i915 with the DRM scheduler is:
which configures a slot with N contexts
* After I915_CONTEXT_ENGINES_EXT_PARALLEL a user can submit N batches to
a slot in a single execbuf IOCTL and the batches run on the GPU in
- paralllel
+ parallel
* Initially only for GuC submission but execlists can be supported if
needed
* Convert the i915 to use the DRM scheduler
diff --git a/Documentation/hid/intel-thc-hid.rst b/Documentation/hid/intel-thc-hid.rst
index 6c417205ac6a..dc9250787fc5 100644
--- a/Documentation/hid/intel-thc-hid.rst
+++ b/Documentation/hid/intel-thc-hid.rst
@@ -182,7 +182,7 @@ value and use PIO write (by setting SubIP write opcode) to do a write operation.
THC also includes two GPIO pins, one for interrupt and the other for device reset control.
-Interrupt line can be configured to either level triggerred or edge triggerred by setting MMIO
+Interrupt line can be configured to either level triggered or edge triggered by setting MMIO
Control register.
Reset line is controlled by BIOS (or EFI) through ACPI _RST method, driver needs to call this
@@ -302,10 +302,10 @@ waiting for interrupt ready then read out the data from system memory.
3.3.2 Software DMA channel
~~~~~~~~~~~~~~~~~~~~~~~~~~
-THC supports a software triggerred RxDMA mode to read the touch data from touch IC. This SW RxDMA
+THC supports a software triggered RxDMA mode to read the touch data from touch IC. This SW RxDMA
is the 3rd THC RxDMA engine with the similar functionalities as the existing two RxDMAs, the only
-difference is this SW RxDMA is triggerred by software, and RxDMA2 is triggerred by external Touch IC
-interrupt. It gives a flexiblity to software driver to use RxDMA read Touch IC data in any time.
+difference is this SW RxDMA is triggered by software, and RxDMA2 is triggered by external Touch IC
+interrupt. It gives a flexibility to software driver to use RxDMA read Touch IC data in any time.
Before software starts a SW RxDMA, it shall stop the 1st and 2nd RxDMA, clear PRD read/write pointer
and quiesce the device interrupt (THC_DEVINT_QUIESCE_HW_STS = 1), other operations are the same with
diff --git a/Documentation/index.rst b/Documentation/index.rst
index f9f525f4c0dd..c0cf79a87c3a 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -84,7 +84,7 @@ which are kept separately from the kernel's own documentation.
Firmware-related documentation
==============================
The following holds information on the kernel's expectations regarding the
-platform firmwares.
+platform firmware.
.. toctree::
:maxdepth: 1
diff --git a/Documentation/kbuild/reproducible-builds.rst b/Documentation/kbuild/reproducible-builds.rst
index a7762486c93f..f2dcc39044e6 100644
--- a/Documentation/kbuild/reproducible-builds.rst
+++ b/Documentation/kbuild/reproducible-builds.rst
@@ -46,6 +46,21 @@ The kernel embeds the building user and host names in
`KBUILD_BUILD_USER and KBUILD_BUILD_HOST`_ variables. If you are
building from a git commit, you could use its committer address.
+Absolute filenames
+------------------
+
+When the kernel is built out-of-tree, debug information may include
+absolute filenames for the source files. This must be overridden by
+including the ``-fdebug-prefix-map`` option in the `KCFLAGS`_ variable.
+
+Depending on the compiler used, the ``__FILE__`` macro may also expand
+to an absolute filename in an out-of-tree build. Kbuild automatically
+uses the ``-fmacro-prefix-map`` option to prevent this, if it is
+supported.
+
+The Reproducible Builds web site has more information about these
+`prefix-map options`_.
+
Generated files in source packages
----------------------------------
@@ -116,5 +131,7 @@ See ``scripts/setlocalversion`` for details.
.. _KBUILD_BUILD_TIMESTAMP: kbuild.html#kbuild-build-timestamp
.. _KBUILD_BUILD_USER and KBUILD_BUILD_HOST: kbuild.html#kbuild-build-user-kbuild-build-host
+.. _KCFLAGS: kbuild.html#kcflags
+.. _prefix-map options: https://reproducible-builds.org/docs/build-path/
.. _Reproducible Builds project: https://reproducible-builds.org/
.. _SOURCE_DATE_EPOCH: https://reproducible-builds.org/docs/source-date-epoch/
diff --git a/Documentation/leds/leds-class-multicolor.rst b/Documentation/leds/leds-class-multicolor.rst
index c57b98bfd387..c6b47b4093c4 100644
--- a/Documentation/leds/leds-class-multicolor.rst
+++ b/Documentation/leds/leds-class-multicolor.rst
@@ -18,24 +18,28 @@ array. These files are children under the LED parent node created by the
led_class framework. The led_class framework is documented in led-class.rst
within this documentation directory.
-Each colored LED will be indexed under the multi_* files. The order of the
-colors will be arbitrary. The multi_index file can be read to determine the
+Each colored LED will be indexed under the ``multi_*`` files. The order of the
+colors will be arbitrary. The ``multi_index`` file can be read to determine the
color name to indexed value.
-The multi_index file is an array that contains the string list of the colors as
-they are defined in each multi_* array file.
+The ``multi_index`` file is an array that contains the string list of the colors as
+they are defined in each ``multi_*`` array file.
-The multi_intensity is an array that can be read or written to for the
+The ``multi_intensity`` is an array that can be read or written to for the
individual color intensities. All elements within this array must be written in
order for the color LED intensities to be updated.
Directory Layout Example
========================
-root:/sys/class/leds/multicolor:status# ls -lR
--rw-r--r-- 1 root root 4096 Oct 19 16:16 brightness
--r--r--r-- 1 root root 4096 Oct 19 16:16 max_brightness
--r--r--r-- 1 root root 4096 Oct 19 16:16 multi_index
--rw-r--r-- 1 root root 4096 Oct 19 16:16 multi_intensity
+.. code-block:: console
+
+ root:/sys/class/leds/multicolor:status# ls -lR
+ -rw-r--r-- 1 root root 4096 Oct 19 16:16 brightness
+ -r--r--r-- 1 root root 4096 Oct 19 16:16 max_brightness
+ -r--r--r-- 1 root root 4096 Oct 19 16:16 multi_index
+ -rw-r--r-- 1 root root 4096 Oct 19 16:16 multi_intensity
+
+..
Multicolor Class Brightness Control
===================================
@@ -43,27 +47,31 @@ The brightness level for each LED is calculated based on the color LED
intensity setting divided by the global max_brightness setting multiplied by
the requested brightness.
-led_brightness = brightness * multi_intensity/max_brightness
+``led_brightness = brightness * multi_intensity/max_brightness``
Example:
A user first writes the multi_intensity file with the brightness levels
for each LED that are necessary to achieve a certain color output from a
multicolor LED group.
-cat /sys/class/leds/multicolor:status/multi_index
-green blue red
+.. code-block:: console
+
+ # cat /sys/class/leds/multicolor:status/multi_index
+ green blue red
-echo 43 226 138 > /sys/class/leds/multicolor:status/multi_intensity
+ # echo 43 226 138 > /sys/class/leds/multicolor:status/multi_intensity
-red -
- intensity = 138
- max_brightness = 255
-green -
- intensity = 43
- max_brightness = 255
-blue -
- intensity = 226
- max_brightness = 255
+ red -
+ intensity = 138
+ max_brightness = 255
+ green -
+ intensity = 43
+ max_brightness = 255
+ blue -
+ intensity = 226
+ max_brightness = 255
+
+..
The user can control the brightness of that multicolor LED group by writing the
global 'brightness' control. Assuming a max_brightness of 255 the user
@@ -71,16 +79,28 @@ may want to dim the LED color group to half. The user would write a value of
128 to the global brightness file then the values written to each LED will be
adjusted base on this value.
-cat /sys/class/leds/multicolor:status/max_brightness
-255
-echo 128 > /sys/class/leds/multicolor:status/brightness
+.. code-block:: console
+
+ # cat /sys/class/leds/multicolor:status/max_brightness
+ 255
+ # echo 128 > /sys/class/leds/multicolor:status/brightness
-adjusted_red_value = 128 * 138/255 = 69
-adjusted_green_value = 128 * 43/255 = 21
-adjusted_blue_value = 128 * 226/255 = 113
+..
+
+.. code-block:: none
+
+ adjusted_red_value = 128 * 138/255 = 69
+ adjusted_green_value = 128 * 43/255 = 21
+ adjusted_blue_value = 128 * 226/255 = 113
+
+..
Reading the global brightness file will return the current brightness value of
the color LED group.
-cat /sys/class/leds/multicolor:status/brightness
-128
+.. code-block:: console
+
+ # cat /sys/class/leds/multicolor:status/brightness
+ 128
+
+..
diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml
index aacccea5dfe4..953aa837958b 100644
--- a/Documentation/netlink/specs/tc.yaml
+++ b/Documentation/netlink/specs/tc.yaml
@@ -2017,7 +2017,8 @@ attribute-sets:
attributes:
-
name: act
- type: nest
+ type: indexed-array
+ sub-type: nest
nested-attributes: tc-act-attrs
-
name: police
@@ -2250,7 +2251,8 @@ attribute-sets:
attributes:
-
name: act
- type: nest
+ type: indexed-array
+ sub-type: nest
nested-attributes: tc-act-attrs
-
name: police
@@ -2745,7 +2747,7 @@ attribute-sets:
type: u16
byte-order: big-endian
-
- name: key-l2-tpv3-sid
+ name: key-l2tpv3-sid
type: u32
byte-order: big-endian
-
@@ -3504,7 +3506,7 @@ attribute-sets:
name: rate64
type: u64
-
- name: prate4
+ name: prate64
type: u64
-
name: burst
diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst
index b8fef8101176..7aabead90648 100644
--- a/Documentation/networking/timestamping.rst
+++ b/Documentation/networking/timestamping.rst
@@ -811,11 +811,9 @@ Documentation/devicetree/bindings/ptp/timestamper.txt for more details.
3.2.4 Other caveats for MAC drivers
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Stacked PHCs, especially DSA (but not only) - since that doesn't require any
-modification to MAC drivers, so it is more difficult to ensure correctness of
-all possible code paths - is that they uncover bugs which were impossible to
-trigger before the existence of stacked PTP clocks. One example has to do with
-this line of code, already presented earlier::
+The use of stacked PHCs may uncover MAC driver bugs which were impossible to
+trigger without them. One example has to do with this line of code, already
+presented earlier::
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst
index ada4938c37e5..cbdf7520aaa6 100644
--- a/Documentation/power/energy-model.rst
+++ b/Documentation/power/energy-model.rst
@@ -230,7 +230,7 @@ Drivers must provide a pointer to the allocated and initialized new EM
and will be visible to other sub-systems in the kernel (thermal, powercap).
The main design goal for this API is to be fast and avoid extra calculations
or memory allocations at runtime. When pre-computed EMs are available in the
-device driver, than it should be possible to simply re-use them with low
+device driver, then it should be possible to simply reuse them with low
performance overhead.
In order to free the EM, provided earlier by the driver (e.g. when the module
@@ -381,17 +381,17 @@ up periodically to check the temperature and modify the EM data::
26 rcu_read_unlock();
27
28 /* Calculate 'cost' values for EAS */
- 29 ret = em_dev_compute_costs(dev, table, pd->nr_perf_states);
+ 29 ret = em_dev_compute_costs(dev, new_table, pd->nr_perf_states);
30 if (ret) {
31 dev_warn(dev, "EM: compute costs failed %d\n", ret);
- 32 em_free_table(em_table);
+ 32 em_table_free(em_table);
33 return;
34 }
35
36 ret = em_dev_update_perf_domain(dev, em_table);
37 if (ret) {
38 dev_warn(dev, "EM: update failed %d\n", ret);
- 39 em_free_table(em_table);
+ 39 em_table_free(em_table);
40 return;
41 }
42
diff --git a/Documentation/process/1.Intro.rst b/Documentation/process/1.Intro.rst
index c3d0270bbfb3..25ca49f7ae4d 100644
--- a/Documentation/process/1.Intro.rst
+++ b/Documentation/process/1.Intro.rst
@@ -251,12 +251,12 @@ there is no prospect of a migration to version 3 of the GPL in the
foreseeable future.
It is imperative that all code contributed to the kernel be legitimately
-free software. For that reason, code from anonymous (or pseudonymous)
-contributors will not be accepted. All contributors are required to "sign
-off" on their code, stating that the code can be distributed with the
-kernel under the GPL. Code which has not been licensed as free software by
-its owner, or which risks creating copyright-related problems for the
-kernel (such as code which derives from reverse-engineering efforts lacking
+free software. For that reason, code from contributors without a known
+identity or anonymous contributors will not be accepted. All contributors are
+required to "sign off" on their code, stating that the code can be distributed
+with the kernel under the GPL. Code which has not been licensed as free
+software by its owner, or which risks creating copyright-related problems for
+the kernel (such as code which derives from reverse-engineering efforts lacking
proper safeguards) cannot be contributed.
Questions about copyright-related issues are common on Linux development
diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst
index 906c47f1a9e5..fc0b0bbcd34d 100644
--- a/Documentation/process/adding-syscalls.rst
+++ b/Documentation/process/adding-syscalls.rst
@@ -248,6 +248,52 @@ To summarize, you need a commit that includes:
- fallback stub in ``kernel/sys_ni.c``
+.. _syscall_generic_6_11:
+
+Since 6.11
+~~~~~~~~~~
+
+Starting with kernel version 6.11, general system call implementation for the
+following architectures no longer requires modifications to
+``include/uapi/asm-generic/unistd.h``:
+
+ - arc
+ - arm64
+ - csky
+ - hexagon
+ - loongarch
+ - nios2
+ - openrisc
+ - riscv
+
+Instead, you need to update ``scripts/syscall.tbl`` and, if applicable, adjust
+``arch/*/kernel/Makefile.syscalls``.
+
+As ``scripts/syscall.tbl`` serves as a common syscall table across multiple
+architectures, a new entry is required in this table::
+
+ 468 common xyzzy sys_xyzzy
+
+Note that adding an entry to ``scripts/syscall.tbl`` with the "common" ABI
+also affects all architectures that share this table. For more limited or
+architecture-specific changes, consider using an architecture-specific ABI or
+defining a new one.
+
+If a new ABI, say ``xyz``, is introduced, the corresponding updates should be
+made to ``arch/*/kernel/Makefile.syscalls`` as well::
+
+ syscall_abis_{32,64} += xyz (...)
+
+To summarize, you need a commit that includes:
+
+ - ``CONFIG`` option for the new function, normally in ``init/Kconfig``
+ - ``SYSCALL_DEFINEn(xyzzy, ...)`` for the entry point
+ - corresponding prototype in ``include/linux/syscalls.h``
+ - new entry in ``scripts/syscall.tbl``
+ - (if needed) Makefile updates in ``arch/*/kernel/Makefile.syscalls``
+ - fallback stub in ``kernel/sys_ni.c``
+
+
x86 System Call Implementation
------------------------------
@@ -353,6 +399,41 @@ To summarize, you need:
``include/uapi/asm-generic/unistd.h``
+Since 6.11
+~~~~~~~~~~
+
+This applies to all the architectures listed in :ref:`Since 6.11<syscall_generic_6_11>`
+under "Generic System Call Implementation", except arm64. See
+:ref:`Compatibility System Calls (arm64)<compat_arm64>` for more information.
+
+You need to extend the entry in ``scripts/syscall.tbl`` with an extra column
+to indicate that a 32-bit userspace program running on a 64-bit kernel should
+hit the compat entry point::
+
+ 468 common xyzzy sys_xyzzy compat_sys_xyzzy
+
+To summarize, you need:
+
+ - ``COMPAT_SYSCALL_DEFINEn(xyzzy, ...)`` for the compat entry point
+ - corresponding prototype in ``include/linux/compat.h``
+ - modification of the entry in ``scripts/syscall.tbl`` to include an extra
+ "compat" column
+ - (if needed) 32-bit mapping struct in ``include/linux/compat.h``
+
+
+.. _compat_arm64:
+
+Compatibility System Calls (arm64)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+On arm64, there is a dedicated syscall table for compatibility system calls
+targeting 32-bit (AArch32) userspace: ``arch/arm64/tools/syscall_32.tbl``.
+You need to add an additional line to this table specifying the compat
+entry point::
+
+ 468 common xyzzy sys_xyzzy compat_sys_xyzzy
+
+
Compatibility System Calls (x86)
--------------------------------
@@ -575,3 +656,6 @@ References and Sources
- Recommendation from Linus Torvalds that x32 system calls should prefer
compatibility with 64-bit versions rather than 32-bit versions:
https://lore.kernel.org/r/CA+55aFxfmwfB7jbbrXxa=K7VBYPfAvmu3XOkGrLbB1UFjX1+Ew@mail.gmail.com
+ - Patch series revising system call table infrastructure to use
+ scripts/syscall.tbl across multiple architectures:
+ https://lore.kernel.org/lkml/20240704143611.2979589-1-arnd@kernel.org
diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst
index 0b2654e2164b..a1869c38046e 100644
--- a/Documentation/scheduler/sched-ext.rst
+++ b/Documentation/scheduler/sched-ext.rst
@@ -1,3 +1,5 @@
+.. _sched-ext:
+
==========================
Extensible Scheduler Class
==========================
@@ -47,8 +49,8 @@ options should be enabled to use sched_ext:
sched_ext is used only when the BPF scheduler is loaded and running.
If a task explicitly sets its scheduling policy to ``SCHED_EXT``, it will be
-treated as ``SCHED_NORMAL`` and scheduled by CFS until the BPF scheduler is
-loaded.
+treated as ``SCHED_NORMAL`` and scheduled by the fair-class scheduler until the
+BPF scheduler is loaded.
When the BPF scheduler is loaded and ``SCX_OPS_SWITCH_PARTIAL`` is not set
in ``ops->flags``, all ``SCHED_NORMAL``, ``SCHED_BATCH``, ``SCHED_IDLE``, and
@@ -57,11 +59,11 @@ in ``ops->flags``, all ``SCHED_NORMAL``, ``SCHED_BATCH``, ``SCHED_IDLE``, and
However, when the BPF scheduler is loaded and ``SCX_OPS_SWITCH_PARTIAL`` is
set in ``ops->flags``, only tasks with the ``SCHED_EXT`` policy are scheduled
by sched_ext, while tasks with ``SCHED_NORMAL``, ``SCHED_BATCH`` and
-``SCHED_IDLE`` policies are scheduled by CFS.
+``SCHED_IDLE`` policies are scheduled by the fair-class scheduler.
Terminating the sched_ext scheduler program, triggering `SysRq-S`, or
detection of any internal error including stalled runnable tasks aborts the
-BPF scheduler and reverts all tasks back to CFS.
+BPF scheduler and reverts all tasks back to the fair-class scheduler.
.. code-block:: none
@@ -197,8 +199,8 @@ Dispatch Queues
To match the impedance between the scheduler core and the BPF scheduler,
sched_ext uses DSQs (dispatch queues) which can operate as both a FIFO and a
priority queue. By default, there is one global FIFO (``SCX_DSQ_GLOBAL``),
-and one local dsq per CPU (``SCX_DSQ_LOCAL``). The BPF scheduler can manage
-an arbitrary number of dsq's using ``scx_bpf_create_dsq()`` and
+and one local DSQ per CPU (``SCX_DSQ_LOCAL``). The BPF scheduler can manage
+an arbitrary number of DSQs using ``scx_bpf_create_dsq()`` and
``scx_bpf_destroy_dsq()``.
A CPU always executes a task from its local DSQ. A task is "inserted" into a
diff --git a/Documentation/scheduler/sched-stats.rst b/Documentation/scheduler/sched-stats.rst
index 08b6bc9a315c..d82e7d2b54f0 100644
--- a/Documentation/scheduler/sched-stats.rst
+++ b/Documentation/scheduler/sched-stats.rst
@@ -135,7 +135,7 @@ of idleness (busy, idle and newly idle):
cpu was idle but no busier group was found
23) # of times in this domain sched_balance_rq() was called when the
- was just becoming idle
+ cpu was just becoming idle
24) # of times in this domain sched_balance_rq() checked but found the
load did not require balancing when the cpu was just becoming idle
25) # of times in this domain sched_balance_rq() tried to move one or more
diff --git a/Documentation/sphinx/automarkup.py b/Documentation/sphinx/automarkup.py
index ecf54d22e9dc..fd633f7a0bc3 100644
--- a/Documentation/sphinx/automarkup.py
+++ b/Documentation/sphinx/automarkup.py
@@ -128,13 +128,8 @@ def note_failure(target):
# own C role, but both match the same regex, so we try both.
#
def markup_func_ref_sphinx3(docname, app, match):
- cdom = app.env.domains['c']
- #
- # Go through the dance of getting an xref out of the C domain
- #
base_target = match.group(2)
target_text = nodes.Text(match.group(0))
- xref = None
possible_targets = [base_target]
# Check if this document has a namespace, and if so, try
# cross-referencing inside it first.
@@ -146,22 +141,8 @@ def markup_func_ref_sphinx3(docname, app, match):
if (target not in Skipfuncs) and not failure_seen(target):
lit_text = nodes.literal(classes=['xref', 'c', 'c-func'])
lit_text += target_text
- pxref = addnodes.pending_xref('', refdomain = 'c',
- reftype = 'function',
- reftarget = target,
- modname = None,
- classname = None)
- #
- # XXX The Latex builder will throw NoUri exceptions here,
- # work around that by ignoring them.
- #
- try:
- xref = cdom.resolve_xref(app.env, docname, app.builder,
- 'function', target, pxref,
- lit_text)
- except NoUri:
- xref = None
-
+ xref = add_and_resolve_xref(app, docname, 'c', 'function',
+ target, contnode=lit_text)
if xref:
return xref
note_failure(target)
@@ -188,13 +169,8 @@ def markup_c_ref(docname, app, match):
RE_typedef: 'type',
}
- cdom = app.env.domains['c']
- #
- # Go through the dance of getting an xref out of the C domain
- #
base_target = match.group(2)
target_text = nodes.Text(match.group(0))
- xref = None
possible_targets = [base_target]
# Check if this document has a namespace, and if so, try
# cross-referencing inside it first.
@@ -206,21 +182,9 @@ def markup_c_ref(docname, app, match):
if not (match.re == RE_function and target in Skipfuncs):
lit_text = nodes.literal(classes=['xref', 'c', class_str[match.re]])
lit_text += target_text
- pxref = addnodes.pending_xref('', refdomain = 'c',
- reftype = reftype_str[match.re],
- reftarget = target, modname = None,
- classname = None)
- #
- # XXX The Latex builder will throw NoUri exceptions here,
- # work around that by ignoring them.
- #
- try:
- xref = cdom.resolve_xref(app.env, docname, app.builder,
- reftype_str[match.re], target, pxref,
- lit_text)
- except NoUri:
- xref = None
-
+ xref = add_and_resolve_xref(app, docname, 'c',
+ reftype_str[match.re], target,
+ contnode=lit_text)
if xref:
return xref
@@ -231,30 +195,12 @@ def markup_c_ref(docname, app, match):
# cross reference to that page
#
def markup_doc_ref(docname, app, match):
- stddom = app.env.domains['std']
- #
- # Go through the dance of getting an xref out of the std domain
- #
absolute = match.group(1)
target = match.group(2)
if absolute:
target = "/" + target
- xref = None
- pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc',
- reftarget = target, modname = None,
- classname = None, refexplicit = False)
- #
- # XXX The Latex builder will throw NoUri exceptions here,
- # work around that by ignoring them.
- #
- try:
- xref = stddom.resolve_xref(app.env, docname, app.builder, 'doc',
- target, pxref, None)
- except NoUri:
- xref = None
- #
- # Return the xref if we got it; otherwise just return the plain text.
- #
+
+ xref = add_and_resolve_xref(app, docname, 'std', 'doc', target)
if xref:
return xref
else:
@@ -265,10 +211,6 @@ def markup_doc_ref(docname, app, match):
# with a cross reference to that page
#
def markup_abi_ref(docname, app, match, warning=False):
- stddom = app.env.domains['std']
- #
- # Go through the dance of getting an xref out of the std domain
- #
kernel_abi = get_kernel_abi()
fname = match.group(1)
@@ -280,7 +222,18 @@ def markup_abi_ref(docname, app, match, warning=False):
kernel_abi.log.warning("%s not found", fname)
return nodes.Text(match.group(0))
- pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'ref',
+ xref = add_and_resolve_xref(app, docname, 'std', 'ref', target)
+ if xref:
+ return xref
+ else:
+ return nodes.Text(match.group(0))
+
+def add_and_resolve_xref(app, docname, domain, reftype, target, contnode=None):
+ #
+ # Go through the dance of getting an xref out of the corresponding domain
+ #
+ dom_obj = app.env.domains[domain]
+ pxref = addnodes.pending_xref('', refdomain = domain, reftype = reftype,
reftarget = target, modname = None,
classname = None, refexplicit = False)
@@ -289,17 +242,15 @@ def markup_abi_ref(docname, app, match, warning=False):
# work around that by ignoring them.
#
try:
- xref = stddom.resolve_xref(app.env, docname, app.builder, 'ref',
- target, pxref, None)
+ xref = dom_obj.resolve_xref(app.env, docname, app.builder, reftype,
+ target, pxref, contnode)
except NoUri:
xref = None
- #
- # Return the xref if we got it; otherwise just return the plain text.
- #
+
if xref:
return xref
- else:
- return nodes.Text(match.group(0))
+
+ return None
#
# Variant of markup_abi_ref() that warns whan a reference is not found
diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py
index 39ddae6ae7dd..b818d4c77924 100644
--- a/Documentation/sphinx/kerneldoc.py
+++ b/Documentation/sphinx/kerneldoc.py
@@ -40,8 +40,40 @@ from docutils.parsers.rst import directives, Directive
import sphinx
from sphinx.util.docutils import switch_source_input
from sphinx.util import logging
+from pprint import pformat
+
+srctree = os.path.abspath(os.environ["srctree"])
+sys.path.insert(0, os.path.join(srctree, "scripts/lib/kdoc"))
+
+from kdoc_files import KernelFiles
+from kdoc_output import RestFormat
__version__ = '1.0'
+kfiles = None
+logger = logging.getLogger(__name__)
+
+def cmd_str(cmd):
+ """
+ Helper function to output a command line that can be used to produce
+ the same records via command line. Helpful to debug troubles at the
+ script.
+ """
+
+ cmd_line = ""
+
+ for w in cmd:
+ if w == "" or " " in w:
+ esc_cmd = "'" + w + "'"
+ else:
+ esc_cmd = w
+
+ if cmd_line:
+ cmd_line += " " + esc_cmd
+ continue
+ else:
+ cmd_line = esc_cmd
+
+ return cmd_line
class KernelDocDirective(Directive):
"""Extract kernel-doc comments from the specified file"""
@@ -56,19 +88,48 @@ class KernelDocDirective(Directive):
'functions': directives.unchanged,
}
has_content = False
- logger = logging.getLogger('kerneldoc')
+ verbose = 0
+
+ parse_args = {}
+ msg_args = {}
+
+ def handle_args(self):
- def run(self):
env = self.state.document.settings.env
cmd = [env.config.kerneldoc_bin, '-rst', '-enable-lineno']
filename = env.config.kerneldoc_srctree + '/' + self.arguments[0]
+
+ # Arguments used by KernelFiles.parse() function
+ self.parse_args = {
+ "file_list": [filename],
+ "export_file": []
+ }
+
+ # Arguments used by KernelFiles.msg() function
+ self.msg_args = {
+ "enable_lineno": True,
+ "export": False,
+ "internal": False,
+ "symbol": [],
+ "nosymbol": [],
+ "no_doc_sections": False
+ }
+
export_file_patterns = []
+ verbose = os.environ.get("V")
+ if verbose:
+ try:
+ self.verbose = int(verbose)
+ except ValueError:
+ pass
+
# Tell sphinx of the dependency
env.note_dependency(os.path.abspath(filename))
- tab_width = self.options.get('tab-width', self.state.document.settings.tab_width)
+ self.tab_width = self.options.get('tab-width',
+ self.state.document.settings.tab_width)
# 'function' is an alias of 'identifiers'
if 'functions' in self.options:
@@ -77,80 +138,166 @@ class KernelDocDirective(Directive):
# FIXME: make this nicer and more robust against errors
if 'export' in self.options:
cmd += ['-export']
+ self.msg_args["export"] = True
export_file_patterns = str(self.options.get('export')).split()
elif 'internal' in self.options:
cmd += ['-internal']
+ self.msg_args["internal"] = True
export_file_patterns = str(self.options.get('internal')).split()
elif 'doc' in self.options:
- cmd += ['-function', str(self.options.get('doc'))]
+ func = str(self.options.get('doc'))
+ cmd += ['-function', func]
+ self.msg_args["symbol"].append(func)
elif 'identifiers' in self.options:
identifiers = self.options.get('identifiers').split()
if identifiers:
for i in identifiers:
+ i = i.rstrip("\\").strip()
+ if not i:
+ continue
+
cmd += ['-function', i]
+ self.msg_args["symbol"].append(i)
else:
cmd += ['-no-doc-sections']
+ self.msg_args["no_doc_sections"] = True
if 'no-identifiers' in self.options:
no_identifiers = self.options.get('no-identifiers').split()
if no_identifiers:
for i in no_identifiers:
+ i = i.rstrip("\\").strip()
+ if not i:
+ continue
+
cmd += ['-nosymbol', i]
+ self.msg_args["nosymbol"].append(i)
for pattern in export_file_patterns:
+ pattern = pattern.rstrip("\\").strip()
+ if not pattern:
+ continue
+
for f in glob.glob(env.config.kerneldoc_srctree + '/' + pattern):
env.note_dependency(os.path.abspath(f))
cmd += ['-export-file', f]
+ self.parse_args["export_file"].append(f)
+
+ # Export file is needed by both parse and msg, as kernel-doc
+ # cache exports.
+ self.msg_args["export_file"] = self.parse_args["export_file"]
cmd += [filename]
- try:
- self.logger.verbose("calling kernel-doc '%s'" % (" ".join(cmd)))
+ return cmd
+
+ def run_cmd(self, cmd):
+ """
+ Execute an external kernel-doc command.
+ """
+
+ env = self.state.document.settings.env
+ node = nodes.section()
+
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = p.communicate()
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = p.communicate()
+ out, err = codecs.decode(out, 'utf-8'), codecs.decode(err, 'utf-8')
- out, err = codecs.decode(out, 'utf-8'), codecs.decode(err, 'utf-8')
+ if p.returncode != 0:
+ sys.stderr.write(err)
- if p.returncode != 0:
- sys.stderr.write(err)
+ logger.warning("kernel-doc '%s' failed with return code %d"
+ % (" ".join(cmd), p.returncode))
+ return [nodes.error(None, nodes.paragraph(text = "kernel-doc missing"))]
+ elif env.config.kerneldoc_verbosity > 0:
+ sys.stderr.write(err)
+
+ filenames = self.parse_args["file_list"]
+ for filename in filenames:
+ self.parse_msg(filename, node, out, cmd)
+
+ return node.children
+
+ def parse_msg(self, filename, node, out, cmd):
+ """
+ Handles a kernel-doc output for a given file
+ """
+
+ env = self.state.document.settings.env
- self.logger.warning("kernel-doc '%s' failed with return code %d"
- % (" ".join(cmd), p.returncode))
- return [nodes.error(None, nodes.paragraph(text = "kernel-doc missing"))]
- elif env.config.kerneldoc_verbosity > 0:
- sys.stderr.write(err)
+ lines = statemachine.string2lines(out, self.tab_width,
+ convert_whitespace=True)
+ result = ViewList()
+
+ lineoffset = 0;
+ line_regex = re.compile(r"^\.\. LINENO ([0-9]+)$")
+ for line in lines:
+ match = line_regex.search(line)
+ if match:
+ # sphinx counts lines from 0
+ lineoffset = int(match.group(1)) - 1
+ # we must eat our comments since the upset the markup
+ else:
+ doc = str(env.srcdir) + "/" + env.docname + ":" + str(self.lineno)
+ result.append(line, doc + ": " + filename, lineoffset)
+ lineoffset += 1
+
+ self.do_parse(result, node)
+
+ def run_kdoc(self, cmd, kfiles):
+ """
+ Execute kernel-doc classes directly instead of running as a separate
+ command.
+ """
+
+ env = self.state.document.settings.env
- lines = statemachine.string2lines(out, tab_width, convert_whitespace=True)
- result = ViewList()
+ node = nodes.section()
- lineoffset = 0;
- line_regex = re.compile(r"^\.\. LINENO ([0-9]+)$")
- for line in lines:
- match = line_regex.search(line)
- if match:
- # sphinx counts lines from 0
- lineoffset = int(match.group(1)) - 1
- # we must eat our comments since the upset the markup
- else:
- doc = str(env.srcdir) + "/" + env.docname + ":" + str(self.lineno)
- result.append(line, doc + ": " + filename, lineoffset)
- lineoffset += 1
+ kfiles.parse(**self.parse_args)
+ filenames = self.parse_args["file_list"]
- node = nodes.section()
- self.do_parse(result, node)
+ for filename, out in kfiles.msg(**self.msg_args, filenames=filenames):
+ self.parse_msg(filename, node, out, cmd)
- return node.children
+ return node.children
+
+ def run(self):
+ global kfiles
+
+ cmd = self.handle_args()
+ if self.verbose >= 1:
+ logger.info(cmd_str(cmd))
+
+ try:
+ if kfiles:
+ return self.run_kdoc(cmd, kfiles)
+ else:
+ return self.run_cmd(cmd)
except Exception as e: # pylint: disable=W0703
- self.logger.warning("kernel-doc '%s' processing failed with: %s" %
- (" ".join(cmd), str(e)))
+ logger.warning("kernel-doc '%s' processing failed with: %s" %
+ (cmd_str(cmd), pformat(e)))
return [nodes.error(None, nodes.paragraph(text = "kernel-doc missing"))]
def do_parse(self, result, node):
with switch_source_input(self.state, result):
self.state.nested_parse(result, 0, node, match_titles=1)
+def setup_kfiles(app):
+ global kfiles
+
+ kerneldoc_bin = app.env.config.kerneldoc_bin
+
+ if kerneldoc_bin and kerneldoc_bin.endswith("kernel-doc.py"):
+ print("Using Python kernel-doc")
+ out_style = RestFormat()
+ kfiles = KernelFiles(out_style=out_style, logger=logger)
+ else:
+ print(f"Using {kerneldoc_bin}")
+
+
def setup(app):
app.add_config_value('kerneldoc_bin', None, 'env')
app.add_config_value('kerneldoc_srctree', None, 'env')
@@ -158,6 +305,8 @@ def setup(app):
app.add_directive('kernel-doc', KernelDocDirective)
+ app.connect('builder-inited', setup_kfiles)
+
return dict(
version = __version__,
parallel_read_safe = True,
diff --git a/Documentation/staging/speculation.rst b/Documentation/staging/speculation.rst
index 8045d99bcf12..0d526ba55d14 100644
--- a/Documentation/staging/speculation.rst
+++ b/Documentation/staging/speculation.rst
@@ -63,7 +63,6 @@ of an out-of-bounds address, while the second call will influence
microarchitectural state dependent on this value. This may provide an
arbitrary read primitive.
-====================================
Mitigating speculation side-channels
====================================
diff --git a/Documentation/tools/rtla/common_timerlat_description.rst b/Documentation/tools/rtla/common_timerlat_description.rst
index 321201cb8597..49fcae3ffdec 100644
--- a/Documentation/tools/rtla/common_timerlat_description.rst
+++ b/Documentation/tools/rtla/common_timerlat_description.rst
@@ -6,5 +6,13 @@ debugging of operating system timer latency.
The *timerlat* tracer outputs information in two ways. It periodically
prints the timer latency at the timer *IRQ* handler and the *Thread*
-handler. It also enable the trace of the most relevant information via
+handler. It also enables the trace of the most relevant information via
**osnoise:** tracepoints.
+
+The **rtla timerlat** tool sets the options of the *timerlat* tracer
+and collects and displays a summary of the results. By default,
+the collection is done synchronously in kernel space using a dedicated
+BPF program attached to the *timerlat* tracer. If either BPF or
+the **osnoise:timerlat_sample** tracepoint it attaches to is
+unavailable, the **rtla timerlat** tool falls back to using tracefs to
+process the data asynchronously in user space.
diff --git a/Documentation/tools/rtla/rtla-timerlat.rst b/Documentation/tools/rtla/rtla-timerlat.rst
index 44a49e6f302b..20e2d259467f 100644
--- a/Documentation/tools/rtla/rtla-timerlat.rst
+++ b/Documentation/tools/rtla/rtla-timerlat.rst
@@ -16,13 +16,10 @@ DESCRIPTION
.. include:: common_timerlat_description.rst
-The *timerlat* tracer outputs information in two ways. It periodically
-prints the timer latency at the timer *IRQ* handler and the *Thread* handler.
-It also provides information for each noise via the **osnoise:** tracepoints.
The **rtla timerlat top** mode displays a summary of the periodic output
-from the *timerlat* tracer. The **rtla hist hist** mode displays a histogram
-of each tracer event occurrence. For further details, please refer to the
-respective man page.
+from the *timerlat* tracer. The **rtla timerlat hist** mode displays
+a histogram of each tracer event occurrence. For further details, please
+refer to the respective man page.
MODES
=====
diff --git a/Documentation/trace/coresight/panic.rst b/Documentation/trace/coresight/panic.rst
index a58aa914c241..6e4bde953cae 100644
--- a/Documentation/trace/coresight/panic.rst
+++ b/Documentation/trace/coresight/panic.rst
@@ -67,8 +67,8 @@ Trace data captured at the time of panic, can be read from rebooted kernel
or from crashdump kernel using a special device file /dev/crash_tmc_xxx.
This device file is created only when there is a valid crashdata available.
-General flow of trace capture and decode incase of kernel panic
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+General flow of trace capture and decode in case of kernel panic
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1. Enable source and sink on all the cores using the sysfs interface.
ETR sinks should have trace buffers allocated from reserved memory,
by selecting "resrv" buffer mode from sysfs.
diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst
index 2c991dc96ace..cc1dc5a087e8 100644
--- a/Documentation/trace/index.rst
+++ b/Documentation/trace/index.rst
@@ -1,39 +1,103 @@
-==========================
-Linux Tracing Technologies
-==========================
+================================
+Linux Tracing Technologies Guide
+================================
+
+Tracing in the Linux kernel is a powerful mechanism that allows
+developers and system administrators to analyze and debug system
+behavior. This guide provides documentation on various tracing
+frameworks and tools available in the Linux kernel.
+
+Introduction to Tracing
+-----------------------
+
+This section provides an overview of Linux tracing mechanisms
+and debugging approaches.
.. toctree::
- :maxdepth: 2
+ :maxdepth: 1
- ftrace-design
+ debugging
+ tracepoints
tracepoint-analysis
+ ring-buffer-map
+
+Core Tracing Frameworks
+-----------------------
+
+The following are the primary tracing frameworks integrated into
+the Linux kernel.
+
+.. toctree::
+ :maxdepth: 1
+
ftrace
+ ftrace-design
ftrace-uses
- fprobe
kprobes
kprobetrace
- uprobetracer
fprobetrace
- tracepoints
+ fprobe
+ ring-buffer-design
+
+Event Tracing and Analysis
+--------------------------
+
+A detailed explanation of event tracing mechanisms and their
+applications.
+
+.. toctree::
+ :maxdepth: 1
+
events
events-kmem
events-power
events-nmi
events-msr
- mmiotrace
+ boottime-trace
histogram
histogram-design
- boottime-trace
- debugging
- hwlat_detector
- osnoise-tracer
- timerlat-tracer
+
+Hardware and Performance Tracing
+--------------------------------
+
+This section covers tracing features that monitor hardware
+interactions and system performance.
+
+.. toctree::
+ :maxdepth: 1
+
intel_th
- ring-buffer-design
- ring-buffer-map
stm
sys-t
coresight/index
- user_events
rv/index
hisi-ptt
+ mmiotrace
+ hwlat_detector
+ osnoise-tracer
+ timerlat-tracer
+
+User-Space Tracing
+------------------
+
+These tools allow tracing user-space applications and
+interactions.
+
+.. toctree::
+ :maxdepth: 1
+
+ user_events
+ uprobetracer
+
+Additional Resources
+--------------------
+
+For more details, refer to the respective documentation of each
+tracing tool and framework.
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/translations/sp_SP/process/2.Process.rst b/Documentation/translations/sp_SP/process/2.Process.rst
index 5993eed71563..c21b0134cfa1 100644
--- a/Documentation/translations/sp_SP/process/2.Process.rst
+++ b/Documentation/translations/sp_SP/process/2.Process.rst
@@ -428,13 +428,14 @@ los desarrolladores, que corren el riesgo de quedar enterrados bajo una
carga de correo electrónico, incumplir las convenciones utilizadas en las
listas de Linux, o ambas cosas.
-La mayoría de las listas de correo del kernel se ejecutan en
-vger.kernel.org; la lista principal se puede encontrar en:
+La mayoría de las listas de correo del kernel se alojan en kernel.org; la
+lista principal se puede encontrar en:
- http://vger.kernel.org/vger-lists.html
+ https://subspace.kernel.org
-Sim embargo, hay listas alojadas en otros lugares; varios de ellos se
-encuentran en redhat.com/mailman/listinfo.
+Sin embargo, hay listas alojadas en otros lugares; consulte el archivo
+MAINTAINERS para obtener la lista relevante para cualquier subsistema en
+particular.
La lista de correo principal para el desarrollo del kernel es, por
supuesto, linux-kernel. Esta lista es un lugar intimidante; el volumen
diff --git a/Documentation/translations/sp_SP/process/howto.rst b/Documentation/translations/sp_SP/process/howto.rst
index 72ea855ac9dc..e1a6e6a52ae4 100644
--- a/Documentation/translations/sp_SP/process/howto.rst
+++ b/Documentation/translations/sp_SP/process/howto.rst
@@ -334,7 +334,7 @@ con el árbol principal, necesitan probar su integración. Para ello, existe
un repositorio especial de pruebas en el que se encuentran casi todos los
árboles de subsistema, actualizado casi a diario:
- https://git.kernel.org/?p=linux/kernel/git/next/linux-next.git
+ https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
De esta manera, linux-next ofrece una perspectiva resumida de lo que se
espera que entre en el kernel principal en el próximo período de "merge"
@@ -378,13 +378,13 @@ desarrolladores del kernel participan en la lista de correo del kernel de
Linux. Detalles sobre cómo para suscribirse y darse de baja de la lista se
pueden encontrar en:
- http://vger.kernel.org/vger-lists.html#linux-kernel
+ https://subspace.kernel.org/subscribing.html
Existen archivos de la lista de correo en la web en muchos lugares
distintos. Utilice un motor de búsqueda para encontrar estos archivos. Por
ejemplo:
- http://dir.gmane.org/gmane.linux.kernel
+ https://lore.kernel.org/linux-kernel/
Es muy recomendable que busque en los archivos sobre el tema que desea
tratar, antes de publicarlo en la lista. Un montón de cosas ya discutidas
@@ -398,13 +398,13 @@ los diferentes grupos.
Muchas de las listas están alojadas en kernel.org. La información sobre
estas puede ser encontrada en:
- http://vger.kernel.org/vger-lists.html
+ https://subspace.kernel.org
Recuerde mantener buenos hábitos de comportamiento al usar las listas.
Aunque un poco cursi, la siguiente URL tiene algunas pautas simples para
interactuar con la lista (o cualquier lista):
- http://www.albion.com/netiquette/
+ https://subspace.kernel.org/etiquette.html
Si varias personas responden a su correo, el CC (lista de destinatarios)
puede hacerse bastante grande. No elimine a nadie de la lista CC: sin una
diff --git a/Documentation/translations/sp_SP/process/kernel-docs.rst b/Documentation/translations/sp_SP/process/kernel-docs.rst
index a62c6854f59b..b9e0ca4be324 100644
--- a/Documentation/translations/sp_SP/process/kernel-docs.rst
+++ b/Documentation/translations/sp_SP/process/kernel-docs.rst
@@ -170,9 +170,8 @@ Recursos varios
* Título: **linux-kernel mailing list archives and search engines**
- :URL: http://vger.kernel.org/vger-lists.html
- :URL: http://www.uwsg.indiana.edu/hypermail/linux/kernel/index.html
- :URL: http://groups.google.com/group/mlist.linux.kernel
+ :URL: https://subspace.kernel.org
+ :URL: https://lore.kernel.org
:Palabras Clave: linux-kernel, archives, buscar, search, archivos.
:Descripción: Algunos de los archivadores de listas de correo del
kernel de Linux. Si usted tiene uno mejor/otro, por favor hágamelo
diff --git a/Documentation/translations/sp_SP/process/submitting-patches.rst b/Documentation/translations/sp_SP/process/submitting-patches.rst
index 328ec80bd61d..ecb08b14c2c0 100644
--- a/Documentation/translations/sp_SP/process/submitting-patches.rst
+++ b/Documentation/translations/sp_SP/process/submitting-patches.rst
@@ -136,11 +136,11 @@ algo documentado en la web, referencie esto.
Cuando se vincule a archivos de listas de correo, preferiblemente use el
servicio de archivador de mensajes lore.kernel.org. Para crear la URL del
-enlace, utilice el contenido del encabezado ("header") ``Message-Id`` del
+enlace, utilice el contenido del encabezado ("header") ``Message-ID`` del
mensaje sin los corchetes angulares que lo rodean.
Por ejemplo::
- Link: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/
+ Link: https://lore.kernel.org/30th.anniversary.repost@klaava.Helsinki.FI
Verifique el enlace para asegurarse de que realmente funciona y apunta al
mensaje correspondiente.
@@ -257,10 +257,10 @@ archivo MAINTAINERS una lista específica de los subsistemas; su parche
probablemente recibirá más atención allí. Sin embargo, no envíe spam a
listas no relacionadas.
-Muchas listas relacionadas con el kernel están alojadas en vger.kernel.org;
+Muchas listas relacionadas con el kernel están alojadas en kernel.org;
puedes encontrar un listado de estas en
-http://vger.kernel.org/vger-lists.html. Existen listas relacionadas con el
-kernel alojadas en otros lugares, no obstante.
+https://subspace.kernel.org. Existen listas relacionadas con el kernel
+alojadas en otros lugares, no obstante.
¡No envíe más de 15 parches a la vez a las listas de correo de vger!
@@ -907,9 +907,6 @@ Referencias
<http://www.kroah.com/log/linux/maintainer-06.html>
-NO!!!! Gente, no mas bombas enormes de parches a linux-kernel@vger.kernel.org!
- <https://lore.kernel.org/r/20050711.125305.08322243.davem@davemloft.net>
-
Kernel Documentation/process/coding-style.rst
Email de Linus Torvalds sobre la forma canónica de los parches:
diff --git a/Documentation/translations/zh_CN/core-api/irq/irq-domain.rst b/Documentation/translations/zh_CN/core-api/irq/irq-domain.rst
index 9174fce12c1b..4a2d3b27aa4d 100644
--- a/Documentation/translations/zh_CN/core-api/irq/irq-domain.rst
+++ b/Documentation/translations/zh_CN/core-api/irq/irq-domain.rst
@@ -60,8 +60,6 @@ irq_domain和一个hwirqå·ä½œä¸ºå‚数。 如果hwirq的映射还ä¸å­˜åœ¨ï¼Œé‚
- irq_find_mapping()返回给定域和hwirqçš„Linux IRQå·ï¼Œå¦‚果没有映射则返回0。
-- irq_linear_revmap()现与irq_find_mapping()相åŒï¼Œå·²è¢«åºŸå¼ƒã€‚
-
- generic_handle_domain_irq()处ç†ä¸€ä¸ªç”±åŸŸå’Œhwirqå·æè¿°çš„ä¸­æ–­ã€‚
请注æ„,irq域的查找必须å‘生在与RCU读临界区兼容的上下文中。
@@ -83,7 +81,6 @@ irq_domain映射的类型
::
- irq_domain_add_linear()
irq_domain_create_linear()
线性å呿˜ å°„维护了一个固定大å°çš„表,该表以hwirqå·ä¸ºç´¢å¼•。 当一个hwirq被映射
@@ -104,7 +101,6 @@ irq_domain_add_linear()和irq_domain_create_linear()在功能上是等价的,
::
- irq_domain_add_tree()
irq_domain_create_tree()
irq_domain维护ç€ä»Žhwirqå·åˆ°Linux IRQçš„radix的树状映射。 当一个hwirq被映射时,
@@ -124,7 +120,7 @@ irq_domain_add_tree()å’Œirq_domain_create_tree()在功能上是等价的,除äº
::
- irq_domain_add_nomap()
+ irq_domain_create_nomap()
当硬件中的hwirqå·æ˜¯å¯ç¼–程的时候,就å¯ä»¥é‡‡ç”¨æ— æ˜ å°„类型。 åœ¨è¿™ç§æƒ…况下,最好将
Linux IRQå·ç¼–入硬件本身,这样就ä¸éœ€è¦æ˜ å°„了。 调用irq_create_direct_mapping()
@@ -138,8 +134,6 @@ Linux IRQå·ç¼–入硬件本身,这样就ä¸éœ€è¦æ˜ å°„了。 调用irq_create
::
- irq_domain_add_simple()
- irq_domain_add_legacy()
irq_domain_create_simple()
irq_domain_create_legacy()
diff --git a/Documentation/translations/zh_CN/core-api/printk-formats.rst b/Documentation/translations/zh_CN/core-api/printk-formats.rst
index bd36d35eba4e..96a917ecc93f 100644
--- a/Documentation/translations/zh_CN/core-api/printk-formats.rst
+++ b/Documentation/translations/zh_CN/core-api/printk-formats.rst
@@ -523,9 +523,8 @@ clk结构体
::
%pC pll1
- %pCn pll1
-用于打å°clk结构。%pC å’Œ %pCn æ‰“å°æ—¶é’Ÿçš„å称(通用时钟框架)或唯一的32ä½
+用于打å°clk结构。%pC æ‰“å°æ—¶é’Ÿçš„å称(通用时钟框架)或唯一的32ä½
ID(传统时钟框架)。
通过引用传递。
diff --git a/Documentation/translations/zh_CN/driver-api/gpio/index.rst b/Documentation/translations/zh_CN/driver-api/gpio/index.rst
index e4d54724a1b5..f64a69f771ca 100644
--- a/Documentation/translations/zh_CN/driver-api/gpio/index.rst
+++ b/Documentation/translations/zh_CN/driver-api/gpio/index.rst
@@ -42,7 +42,7 @@ ACPI支æŒ
该API在以下内核代ç ä¸­:
-drivers/gpio/gpiolib-acpi.c
+drivers/gpio/gpiolib-acpi-core.c
设备树支æŒ
==========
diff --git a/Documentation/translations/zh_CN/how-to.rst b/Documentation/translations/zh_CN/how-to.rst
new file mode 100644
index 000000000000..569b0209385a
--- /dev/null
+++ b/Documentation/translations/zh_CN/how-to.rst
@@ -0,0 +1,459 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+Linux内核中文文档翻译规范
+=========================
+
+修订记录:
+ - v1.0 2025å¹´3月28日,å¸å»¶è…¾ã€æ…•冬亮共åŒç¼–写了该规范。
+
+制定规范的背景
+==============
+
+过去几年,在广大社区爱好者的å‹å¥½åˆä½œä¸‹ï¼ŒLinux 内核中文文档迎æ¥äº†è“¬å‹ƒçš„å‘
+å±•ã€‚åœ¨ç¿»è¯‘çš„æ—©æœŸï¼Œä¸€åˆ‡éƒ½æ˜¯æ··ä¹±çš„ï¼Œç¤¾åŒºå¯¹è¯‘ç¨¿åªæœ‰ä¸€ä¸ªå‡†ç¡®ç¿»è¯‘çš„è¦æ±‚,以鼓
+励更多的开å‘者å‚与进æ¥ï¼Œè¿™æ˜¯ä»Ž0到1的必然过程,所以早期的中文文档目录更加
+具有多样性,ä¸è¿‡å¥½åœ¨æ–‡æ¡£ä¸å¤šï¼Œç»´æŠ¤ä¸Šå¹¶æ²¡æœ‰è¿‡å¤§çš„压力。
+
+然而,世事å˜å¹»ï¼Œä¸è§‰æœ‰å¹´ï¼ŒçŽ°åœ¨å†…æ ¸ä¸­æ–‡æ–‡æ¡£åœ¨å‰è¿›çš„é“路上越走越远,很多潜
+åœ¨çš„é—®é¢˜é€æ¸æµ®å‡ºæ°´é¢ï¼Œè€Œä¸”éšç€ä¸­æ–‡æ–‡æ¡£æ•°é‡çš„增加,翻译更多的文档与æé«˜ä¸­
+文文档å¯ç»´æŠ¤æ€§ä¹‹é—´çš„矛盾愈å‘å°–é”。由于文档翻译的特殊性,很多开å‘者并ä¸ä¼š
+一直更新文档,如果中文文档è½åŽè‹±æ–‡æ–‡æ¡£å¤ªå¤šï¼Œæ–‡æ¡£æ›´æ–°çš„工作é‡ä¼šè¿œå¤§äºŽé‡æ–°
+翻译。而且邮件列表中陆续有新的é¢å­”出现,他们那股热情,就åƒç‡ƒçƒ§çš„ç«ç„°ï¼Œèƒ½
+çž¬é—´ç‚¹ç‡ƒæ•´ä¸ªç©ºé—´ï¼Œå¯æ˜¯ä»–们的补ä¸å¾€å¾€å…·æœ‰ä¸ªæ€§ï¼Œè¿™ä¼šç»™å®¡é˜…带æ¥äº†å¾ˆå¤§çš„困难,
+reviewer 们åªèƒ½è€å¿ƒåœ°æŒ‡å¯¼ä»–们如何与社区更好地åˆä½œï¼Œä½†æ˜¯è¿™é¡¹å·¥ä½œå…·æœ‰é‡å¤
+æ€§ï¼Œé•¿æ­¤ä»¥å¾€ï¼Œä¼šæ¸æ¸æµ‡ç­ reviewer 审阅的热情。
+
+è™½ç„¶å†…æ ¸æ–‡æ¡£ä¸­å·²ç»æœ‰äº†ç±»ä¼¼çš„贡献指å—,但是缺ä¹ä¸“门针对于中文翻译的,尤其
+是对于新手æ¥è¯´ï¼Œæµè§ˆå¤§é‡çš„æ–‡æ¡£å而更加迷惑,该文档就是为了缓解这一问题而
+编写,目的是为æä¾›ç»™æ–°æ‰‹ä¸€ä¸ªå¿«é€Ÿç¿»è¯‘指å—。
+
+详细的贡献指å—:Documentation/translations/zh_CN/process/index.rst。
+
+环境æ­å»º
+========
+
+工欲善其事必先利其器,如果您目å‰å¯¹å†…核文档翻译满怀热情,并且会独立地安装
+linux å‘行版和简å•地使用 linux 命令行,那么å¯ä»¥è¿…速开始了。若您尚ä¸å…·å¤‡è¯¥
+能力,很多网站上会有详细的手把手教程,最多一个上åˆï¼Œæ‚¨åº”该就能掌æ¡å¯¹åº”技
+èƒ½ã€‚æ‚¨éœ€è¦æ³¨æ„的一点是,请ä¸è¦ä½¿ç”¨ root 用户进行åŽç»­æ­¥éª¤å’Œæ–‡æ¡£ç¿»è¯‘。
+
+拉å–开呿 ‘
+----------
+
+中文文档翻译工作目å‰ç‹¬ç«‹äºŽ linux-doc 开呿 ‘å¼€å±•ï¼Œæ‰€ä»¥æ‚¨éœ€è¦æ‹‰å–è¯¥å¼€å‘æ ‘,
+打开终端命令行执行::
+
+ git clone git://git.kernel.org/pub/scm/linux/kernel/git/alexs/linux.git
+
+如果您é‡åˆ°ç½‘络连接问题,也å¯ä»¥æ‰§è¡Œä»¥ä¸‹å‘½ä»¤::
+
+ git clone https://mirrors.hust.edu.cn/git/kernel-doc-zh.git linux
+
+这是 Alex 开呿 ‘的镜åƒåº“,æ¯ä¸¤ä¸ªå°æ—¶åŒæ­¥ä¸€æ¬¡ä¸Šæ¸¸ã€‚如果您了解到更快的 mirror,
+è¯·éšæ—¶ **添加** 。
+
+命令执行完毕åŽï¼Œæ‚¨ä¼šåœ¨å½“å‰ç›®å½•下得到一个 linux 目录,该目录就是您之åŽçš„工作
+仓库,请把它放在一个稳妥的ä½ç½®ã€‚
+
+安装文档构建环境
+----------------
+
+å†…æ ¸ä»“åº“é‡Œé¢æä¾›äº†ä¸€ä¸ªåŠè‡ªåŠ¨åŒ–è„šæœ¬ï¼Œæ‰§è¡Œè¯¥è„šæœ¬ï¼Œä¼šæ£€æµ‹æ‚¨çš„å‘行版中需è¦å®‰
+装哪些软件包,请按照命令行æç¤ºè¿›è¡Œå®‰è£…,通常您åªéœ€è¦å¤åˆ¶å‘½ä»¤å¹¶æ‰§è¡Œå°±è¡Œã€‚
+::
+
+ cd linux
+ ./scripts/sphinx-pre-install
+
+以Fedora为例,它的输出是这样的::
+
+ You should run:
+
+ sudo dnf install -y dejavu-sans-fonts dejavu-sans-mono-fonts dejavu-serif-fonts google-noto-sans-cjk-fonts graphviz-gd latexmk librsvg2-tools texlive-anyfontsize texlive-capt-of texlive-collection-fontsrecommended texlive-ctex texlive-eqparbox texlive-fncychap texlive-framed texlive-luatex85 texlive-multirow texlive-needspace texlive-tabulary texlive-threeparttable texlive-upquote texlive-wrapfig texlive-xecjk
+
+ Sphinx needs to be installed either:
+ 1) via pip/pypi with:
+
+ /usr/bin/python3 -m venv sphinx_latest
+ . sphinx_latest/bin/activate
+ pip install -r ./Documentation/sphinx/requirements.txt
+
+ If you want to exit the virtualenv, you can use:
+ deactivate
+
+ 2) As a package with:
+
+ sudo dnf install -y python3-sphinx
+
+ Please note that Sphinx >= 3.0 will currently produce false-positive
+ warning when the same name is used for more than one type (functions,
+ structs, enums,...). This is known Sphinx bug. For more details, see:
+ https://github.com/sphinx-doc/sphinx/pull/8313
+
+请您按照æç¤ºå¤åˆ¶æ‰“å°çš„命令到命令行执行,您必须具备 root æƒé™æ‰èƒ½æ‰§è¡Œ sudo
+开头的命令。
+
+如果您处于一个多用户环境中,为了é¿å…对其他人造æˆå½±å“,建议您é…ç½®å•用户
+sphinx 虚拟环境,å³åªéœ€è¦æ‰§è¡Œ::
+
+ /usr/bin/python3 -m venv sphinx_latest
+ . sphinx_latest/bin/activate
+ pip install -r ./Documentation/sphinx/requirements.txt
+
+æœ€åŽæ‰§è¡Œä»¥ä¸‹å‘½ä»¤é€€å‡ºè™šæ‹ŸçŽ¯å¢ƒ::
+
+ deactivate
+
+您å¯ä»¥åœ¨ä»»ä½•需è¦çš„æ—¶å€™å†æ¬¡æ‰§è¡Œä»¥ä¸‹å‘½ä»¤è¿›å…¥è™šæ‹ŸçŽ¯å¢ƒ::
+
+ . sphinx_latest/bin/activate
+
+进行第一次文档编译
+------------------
+
+è¿›å…¥å¼€å‘æ ‘目录::
+
+ cd linux
+
+这是一个标准的编译和调试æµç¨‹ï¼Œè¯·æ¯æ¬¡æž„建时都严格执行::
+
+ . sphinx_latest/bin/activate
+ make cleandocs
+ make htmldocs
+ deactivate
+
+检查编译结果
+------------
+
+编译输出在Documentation/output/目录下,请用æµè§ˆå™¨æ‰“开该目录下对应
+的文件进行检查。
+
+git和邮箱é…ç½®
+-------------
+
+打开命令行执行::
+
+ sudo dnf install git-email
+ vim ~/.gitconfig
+
+这里是我的一个é…ç½®æ–‡ä»¶ç¤ºèŒƒï¼Œè¯·æ ¹æ®æ‚¨çš„é‚®ç®±åŸŸåæœåŠ¡å•†æä¾›çš„æ‰‹å†Œæ›¿æ¢åˆ°å¯¹
+应的字段。
+::
+
+ [user]
+ name = Yanteng Si # 这会出现在您的补ä¸å¤´éƒ¨ç­¾åæ 
+ email = si.yanteng@linux.dev # 这会出现在您的补ä¸å¤´éƒ¨ç­¾åæ 
+
+ [sendemail]
+ from = Yanteng Si <si.yanteng@linux.dev> # 这会出现在您的补ä¸å¤´éƒ¨
+ smtpencryption = ssl
+ smtpserver = smtp.migadu.com
+ smtpuser = si.yanteng@linux.dev
+ smtppass = <passwd> # 建议使用第三方客户端专用密ç 
+ chainreplyto = false
+ smtpserverport = 465
+
+关于邮件客户端的é…置,请查阅Documentation/translations/zh_CN/process/email-clients.rst。
+
+开始翻译文档
+============
+
+文档索引结构
+------------
+
+ç›®å‰ä¸­æ–‡æ–‡æ¡£æ˜¯åœ¨Documentation/translations/zh_CN/目录下进行,该
+目录结构最终会与Documentation/结构一致,所以您åªéœ€è¦å°†æ‚¨æ„Ÿå…´è¶£çš„英文
+文档文件和对应的 index.rst å¤åˆ¶åˆ° zh_CN 目录下对应的ä½ç½®ï¼Œç„¶åŽä¿®æ”¹æ›´
+上一级的 index å³å¯å¼€å§‹æ‚¨çš„翻译。
+
+为了ä¿è¯ç¿»è¯‘的文档补ä¸è¢«é¡ºåˆ©åˆå¹¶ï¼Œä¸å»ºè®®å¤šäººåŒæ—¶ç¿»è¯‘一个目录,因为这会
+造æˆè¡¥ä¸ä¹‹é—´äº’相ä¾èµ–,往往会导致一部分补ä¸è¢«åˆå¹¶ï¼Œå¦ä¸€éƒ¨åˆ†äº§ç”Ÿå†²çªã€‚
+
+如果实在无法é¿å…ä¸¤ä¸ªäººåŒæ—¶å¯¹ä¸€ä¸ªç›®å½•进行翻译的情况,请将补ä¸åˆ¶ä½œè¿›ä¸€ä¸ªè¡¥
+ä¸é›†ã€‚ä½†æ˜¯ä¸æŽ¨è刚开始就这么åšï¼Œå› ä¸ºç»è¿‡å®žè·µï¼Œåœ¨æ²¡æœ‰æŒ‡å¯¼çš„æƒ…况下,新手很
+难一次处ç†å¥½è¿™ä¸ªè¡¥ä¸é›†ã€‚
+
+请执行以下命令,新建开å‘分支::
+
+ git checkout docs-next
+ git branch my-trans
+ git checkout my-trans
+
+译文格å¼è¦æ±‚
+------------
+
+ - æ¯è¡Œé•¿åº¦æœ€å¤šä¸è¶…过40个字符
+ - æ¯è¡Œé•¿åº¦è¯·ä¿æŒä¸€è‡´
+ - 标题的下划线长度请按照一个英文一个字符ã€ä¸€ä¸ªä¸­æ–‡ä¸¤ä¸ªå­—符与标题对é½
+ - å…¶å®ƒçš„ä¿®é¥°ç¬¦è¯·ä¸Žè‹±æ–‡æ–‡æ¡£ä¿æŒä¸€è‡´
+
+æ­¤å¤–åœ¨è¯‘æ–‡çš„å¤´éƒ¨ï¼Œæ‚¨éœ€è¦æ’入以下内容::
+
+ .. SPDX-License-Identifier: GPL-2.0
+ .. include:: ../disclaimer-zh_CN.rst #您需è¦äº†è§£è¯¥æ–‡ä»¶çš„路径,根
+ æ®æ‚¨å®žé™…ç¿»è¯‘çš„æ–‡æ¡£çµæ´»è°ƒæ•´
+
+ :Original: Documentation/xxx/xxx.rst #替æ¢ä¸ºæ‚¨ç¿»è¯‘的英文文档路径
+
+ :翻译:
+
+ å¸å»¶è…¾ Yanteng Si <si.yanteng@linux.dev> #替æ¢ä¸ºæ‚¨è‡ªå·±çš„è”系方å¼
+
+翻译技巧
+--------
+
+中文文档有æ¯è¡Œ40字符é™åˆ¶ï¼Œå› ä¸ºä¸€ä¸ªä¸­æ–‡å­—符等于2个英文字符。但是社区并没有
+é‚£ä¹ˆä¸¥æ ¼ï¼Œä¸€ä¸ªè¯€çªæ˜¯å°†æ‚¨çš„翻译的内容与英文原文的æ¯è¡Œé•¿åº¦å¯¹é½å³å¯ï¼Œè¿™æ ·ï¼Œ
+您也ä¸å¿…总是检查有没有超é™ã€‚
+
+如果您的英文阅读能力有é™ï¼Œå¯ä»¥è€ƒè™‘使用辅助翻译工具,例如 deepseek 。但是您
+必须仔细地打磨,使译文达到“信达雅â€çš„æ ‡å‡†ã€‚
+
+**请注æ„** ç¤¾åŒºä¸æŽ¥å—纯机器翻译的文档,社区工作建立在信任的基础上,请认真对待。
+
+编译和检查
+----------
+
+请执行::
+
+ . sphinx_latest/bin/activate
+ make cleandocs
+ make htmldocs
+
+解决与您翻译的文档相关的 warning å’Œ errorï¼Œç„¶åŽæ‰§è¡Œ::
+
+ make cleandocs #该步骤ä¸èƒ½çœç•¥ï¼Œå¦åˆ™å¯èƒ½ä¸ä¼šå†æ¬¡è¾“出真实存在的警告
+ make htmldocs
+ deactivate
+
+进入 output 目录用æµè§ˆå™¨æ‰“å¼€æ‚¨ç¿»è¯‘çš„æ–‡æ¡£ï¼Œæ£€æŸ¥æ¸²æŸ“çš„é¡µé¢æ˜¯å¦æ­£å¸¸ï¼Œå¦‚果正常,
+继续进行åŽç»­æ­¥éª¤ï¼Œå¦åˆ™è¯·å°è¯•解决。
+
+制作补ä¸
+========
+
+æäº¤æ”¹åЍ
+--------
+
+执行以下命令,在弹出的交互å¼é¡µé¢ä¸­å¡«å†™å¿…è¦çš„ä¿¡æ¯ã€‚
+::
+
+ git add .
+ git commit -s -v
+
+请å‚考以下信æ¯è¿›è¡Œè¾“å…¥::
+
+ docs/zh_CN: Add self-protection index Chinese translation
+
+ Translate .../security/self-protection.rst into Chinese.
+
+ Update the translation through commit b080e52110ea #请执行git log <您翻译的英文文档路径> å¤åˆ¶æœ€é¡¶éƒ¨ç¬¬ä¸€ä¸ªè¡¥ä¸çš„sha值的å‰12ä½ï¼Œæ›¿æ¢æŽ‰12ä½sha值。
+ ("docs: update self-protection __ro_after_init status")
+
+ Signed-off-by: Yanteng Si <si.yanteng@linux.dev> #如果您å‰é¢çš„æ­¥éª¤æ­£ç¡®æ‰§è¡Œï¼Œè¯¥è¡Œä¼šè‡ªåŠ¨æ˜¾ç¤ºï¼Œå¦åˆ™è¯·æ£€æŸ¥gitconfig文件。
+
+ä¿å­˜å¹¶é€€å‡ºã€‚
+
+**请注æ„** 以上四行,缺少任何一行,您都将会在第一轮审阅åŽè¿”工,如果您需è¦ä¸€ä¸ªæ›´åŠ æ˜Žç¡®çš„ç¤ºä¾‹ï¼Œè¯·å¯¹ zh_CN 目录执行 git log。
+
+导出补ä¸å’Œåˆ¶ä½œå°é¢
+------------------
+
+这个时候,å¯ä»¥å¯¼å‡ºè¡¥ä¸ï¼Œåšå‘é€é‚®ä»¶åˆ—表最åŽçš„准备了。命令行执行::
+
+ git format-patch -N
+
+ç„¶åŽå‘½ä»¤è¡Œä¼šè¾“出类似下é¢çš„内容::
+
+ 0001-docs-zh_CN-add-xxxxxxxx.patch
+ 0002-docs-zh_CN-add-xxxxxxxx.patch
+ ……
+
+测试补ä¸
+--------
+
+内核æä¾›äº†ä¸€ä¸ªè¡¥ä¸æ£€æµ‹è„šæœ¬ï¼Œè¯·æ‰§è¡Œ::
+
+ ./scripts/checkpatch.pl *.patch
+
+å‚考脚本输出,解决掉所有的 error å’Œ warningï¼Œé€šå¸¸æƒ…å†µä¸‹ï¼Œåªæœ‰ä¸‹é¢è¿™ä¸ª
+warning ä¸éœ€è¦è§£å†³::
+
+ WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
+
+一个简å•çš„è§£å†³æ–¹æ³•æ˜¯ä¸€æ¬¡åªæ£€æŸ¥ä¸€ä¸ªè¡¥ä¸ï¼Œç„¶åŽæ‰“上该补ä¸ï¼Œç›´æŽ¥å¯¹è¯‘文进行修改,
+ç„¶åŽæ‰§è¡Œä»¥ä¸‹å‘½ä»¤ä¸ºè¡¥ä¸è¿½åŠ æ›´æ”¹::
+
+ git checkout docs-next
+ git branch test-trans
+ git am 0001-xxxxx.patch
+ ./scripts/checkpatch.pl 0001-xxxxx.patch
+ 直接修改您的翻译
+ git add .
+ git am --amend
+ ä¿å­˜é€€å‡º
+ git am 0002-xxxxx.patch
+ ……
+
+釿–°å¯¼å‡ºå†æ¬¡æ£€æµ‹ï¼Œé‡å¤è¿™ä¸ªè¿‡ç¨‹ï¼Œç›´åˆ°å¤„ç†å®Œæ‰€æœ‰çš„è¡¥ä¸ã€‚
+
+最åŽï¼Œå¦‚果检测时没有 warning å’Œ error 需è¦è¢«å¤„ç†æˆ–è€…æ‚¨åªæœ‰ä¸€ä¸ªè¡¥ä¸ï¼Œè¯·è·³
+过下é¢è¿™ä¸ªæ­¥éª¤ï¼Œå¦åˆ™è¯·é‡æ–°å¯¼å‡ºè¡¥ä¸åˆ¶ä½œå°é¢::
+
+ git format-patch -N --cover-letter --thread=shallow #Nä¸ºæ‚¨çš„è¡¥ä¸æ•°é‡,N一般è¦å¤§äºŽ1。
+
+ç„¶åŽå‘½ä»¤è¡Œä¼šè¾“出类似下é¢çš„内容::
+
+ 0000-cover-letter.patch
+ 0001-docs-zh_CN-add-xxxxxxxx.patch
+ 0002-docs-zh_CN-add-xxxxxxxx.patch
+
+您需è¦ç”¨ç¼–辑器打开0å·è¡¥ä¸ï¼Œä¿®æ”¹ä¸¤å¤„内容::
+
+ vim 0000-cover-letter.patch
+
+ ...
+ Subject: [PATCH 0/1] *** SUBJECT HERE *** #修改该字段,概括您的补ä¸é›†éƒ½åšäº†å“ªäº›äº‹æƒ…
+
+ *** BLURB HERE *** #修改该字段,详细æè¿°æ‚¨çš„è¡¥ä¸é›†åšäº†å“ªäº›äº‹æƒ…
+
+ Yanteng Si (1):
+ docs/zh_CN: add xxxxx
+ ...
+
+å¦‚æžœæ‚¨åªæœ‰ä¸€ä¸ªè¡¥ä¸ï¼Œåˆ™å¯ä»¥ä¸åˆ¶ä½œå°é¢ï¼Œå³0å·è¡¥ä¸ï¼Œåªéœ€è¦æ‰§è¡Œ::
+
+ git format-patch -1
+
+æŠŠè¡¥ä¸æäº¤åˆ°é‚®ä»¶åˆ—è¡¨
+====================
+
+æ­å–œæ‚¨ï¼Œæ‚¨çš„æ–‡æ¡£ç¿»è¯‘现在å¯ä»¥æäº¤åˆ°é‚®ä»¶åˆ—表了。
+
+获å–维护者和审阅者邮箱以åŠé‚®ä»¶åˆ—表地å€
+--------------------------------------
+
+内核æä¾›äº†ä¸€ä¸ªè‡ªåŠ¨åŒ–è„šæœ¬å·¥å…·ï¼Œè¯·æ‰§è¡Œ::
+
+ ./scripts/get_maintainer.pl *.patch
+
+将输出的邮箱地å€ä¿å­˜ä¸‹æ¥ã€‚
+
+å°†è¡¥ä¸æäº¤åˆ°é‚®ä»¶åˆ—è¡¨
+--------------------
+
+æ‰“å¼€ä¸Šé¢æ‚¨ä¿å­˜çš„邮件地å€ï¼Œæ‰§è¡Œ::
+
+ git send-email *.patch --to <maintainer email addr> --cc <others addr> #一个to对应一个地å€ï¼Œä¸€ä¸ªcc对应一个地å€ï¼Œæœ‰å‡ ä¸ªå°±å†™å‡ ä¸ªã€‚
+
+执行该命令时,请确ä¿ç½‘络通常,邮件å‘逿ˆåŠŸä¸€èˆ¬ä¼šè¿”å›ž250。
+
+您å¯ä»¥å…ˆå‘é€ç»™è‡ªå·±ï¼Œå°è¯•å‘出的 patch 是å¦å¯ä»¥ç”¨ 'git am' 工具正常打上。
+如果检查正常, 您就å¯ä»¥æ”¾å¿ƒçš„å‘é€åˆ°ç¤¾åŒºè¯„审了。
+
+如果该步骤被中断,您å¯ä»¥æ£€æŸ¥ä¸€ä¸‹ï¼Œç»§ç»­ç”¨ä¸Šæ¡å‘½ä»¤å‘é€å¤±è´¥çš„è¡¥ä¸ï¼Œä¸€å®šä¸è¦å†
+次å‘é€å·²ç»å‘逿ˆåŠŸçš„è¡¥ä¸ã€‚
+
+积æžå‚与审阅过程并迭代补ä¸
+==========================
+
+è¡¥ä¸æäº¤åˆ°é‚®ä»¶åˆ—è¡¨å¹¶ä¸ä»£è¡¨ä¸‡äº‹å¤§å‰ï¼Œæ‚¨è¿˜éœ€è¦ç§¯æžå›žå¤ maintainer å’Œ
+reviewer 的评论,åšåˆ°æ¯æ¡éƒ½æœ‰å›žå¤ï¼Œæ¯ä¸ªå›žå¤éƒ½è½å®žåˆ°ä½ã€‚
+
+如何回å¤è¯„论
+------------
+
+ - 请先将您的邮箱客户端信件回å¤ä¿®æ”¹ä¸º **纯文本** æ ¼å¼ï¼Œå¹¶åŽ»é™¤æ‰€æœ‰ç­¾å,尤其是
+ ä¼ä¸šé‚®ç®±ã€‚
+ - ç„¶åŽç‚¹å‡»å›žå¤æŒ‰é’®ï¼Œå¹¶å°†è¦å›žå¤çš„邮件带入,
+ - 在第一æ¡è¯„论行尾æ¢è¡Œï¼Œè¾“入您的回å¤
+ - 在第二æ¡è¯„论行尾æ¢è¡Œï¼Œè¾“入您的回å¤
+ - 直到处ç†å®Œæœ€åŽä¸€æ¡è¯„论,æ¢è¡Œç©ºä¸¤è¡Œè¾“入问候语和署å
+
+注æ„,信件回å¤è¯·å°½é‡ä½¿ç”¨è‹±æ–‡ã€‚
+
+迭代补ä¸
+--------
+
+建议您æ¯å›žå¤ä¸€æ¡è¯„论,就修改一处翻译。然åŽé‡æ–°ç”Ÿæˆè¡¥ä¸ï¼Œç›¸ä¿¡æ‚¨çŽ°åœ¨å·²ç»å…·
+å¤‡äº†çµæ´»ä½¿ç”¨ git am --amend 的能力。
+
+æ¯æ¬¡è¿­ä»£ä¸€ä¸ªè¡¥ä¸ï¼Œä¸è¦ä¸€æ¬¡å¤šä¸ª::
+
+ git am <您è¦ä¿®æ”¹çš„è¡¥ä¸>
+ 直接对文件进行您的修改
+ git add .
+ git commit --amend
+
+当您将所有的评论è½å®žåˆ°ä½åŽï¼Œå¯¼å‡ºç¬¬äºŒç‰ˆè¡¥ä¸ï¼Œå¹¶ä¿®æ”¹å°é¢::
+
+ git format-patch -N -v 2 --cover-letter --thread=shallow
+
+打开0å·è¡¥ä¸ï¼Œåœ¨ BLURB HERE 处编写相较于上个版本,您åšäº†å“ªäº›æ”¹åŠ¨ã€‚
+
+ç„¶åŽæ‰§è¡Œ::
+
+ git send-email v2* --to <maintainer email addr> --cc <others addr>
+
+这样,新的一版补ä¸å°±åˆå‘é€åˆ°é‚®ä»¶åˆ—表等待审阅,之åŽå°±æ˜¯é‡å¤è¿™ä¸ªè¿‡ç¨‹ã€‚
+
+审阅周期
+--------
+
+因为有时邮件列表比较ç¹å¿™ï¼Œæ‚¨çš„邮件å¯èƒ½ä¼šè¢«æ·¹æ²¡ï¼Œå¦‚果超过两周没有得到任何
+回å¤ï¼Œè¯·è‡ªå·±å›žå¤è‡ªå·±ï¼Œå›žå¤çš„内容为 Ping.
+
+最终,如果您è½å®žå¥½äº†æ‰€æœ‰çš„è¯„è®ºï¼Œå¹¶ä¸”ä¸€æ®µæ—¶é—´åŽæ²¡æœ‰æœ€æ–°çš„评论,您的补ä¸å°†
+会先进入 Alex çš„å¼€å‘æ ‘,然åŽè¿›å…¥ linux-doc 开呿 ‘ï¼Œæœ€ç»ˆåœ¨ä¸‹ä¸ªçª—å£æ‰“å¼€
+æ—¶åˆå¹¶è¿› mainline 仓库。
+
+紧急处ç†
+--------
+
+如果您å‘é€åˆ°é‚®ä»¶åˆ—表之åŽã€‚å‘现å‘错了补ä¸é›†ï¼Œå°¤å…¶æ˜¯åœ¨å¤šä¸ªç‰ˆæœ¬è¿­ä»£çš„过程中;
+自己å‘现了一些ä¸å¦¥çš„翻译;å‘é€é”™äº†é‚®ä»¶åˆ—表……
+
+git email默认会抄é€ç»™æ‚¨ä¸€ä»½ï¼Œæ‰€ä»¥æ‚¨å¯ä»¥åˆ‡æ¢ä¸ºå®¡é˜…者的角色审查自己的补ä¸ï¼Œ
+并留下评论,æè¿°æœ‰ä½•ä¸å¦¥ï¼Œå°†åœ¨ä¸‹ä¸ªç‰ˆæœ¬æ€Žä¹ˆæ”¹ï¼Œå¹¶ä»˜è¯¸è¡ŒåŠ¨ï¼Œé‡æ–°æäº¤ï¼Œä½†æ˜¯
+注æ„频率,æ¯å¤©æäº¤çš„æ¬¡æ•°ä¸è¦è¶…过两次。
+
+新手任务
+--------
+对于首次å‚与 Linux 内核中文文档翻译的新手,建议您在 linux 目录中è¿è¡Œä»¥ä¸‹å‘½ä»¤ï¼š
+::
+
+ ./script/checktransupdate.py -l zh_CN``
+
+该命令会列出需è¦ç¿»è¯‘或更新的英文文档。
+
+关于详细æ“作说明,请å‚考: Documentation/translations/zh_CN/doc-guide/checktransupdate.rst\
+
+进阶
+----
+
+希望您ä¸åªæ˜¯å•纯的翻译内核文档,在熟悉了一起与社区工作之åŽï¼Œæ‚¨å¯ä»¥å®¡é˜…å…¶ä»–
+å¼€å‘者的翻译,或者æå‡ºå…·æœ‰å»ºè®¾æ€§çš„ä¸»å¼ ã€‚ä¸Žæ­¤åŒæ—¶ï¼Œä¸Žæ–‡æ¡£å¯¹åº”çš„ä»£ç æ›´åŠ æœ‰è¶£ï¼Œ
+而且需è¦å®Œå–„çš„åœ°æ–¹è¿˜æœ‰å¾ˆå¤šï¼Œå‹‡æ•¢åœ°åŽ»æŽ¢ç´¢ï¼Œç„¶åŽæäº¤ä½ çš„æƒ³æ³•å§ã€‚
+
+常è§çš„问题
+==========
+
+Maintainer回å¤è¡¥ä¸ä¸èƒ½æ­£å¸¸apply
+-------------------------------
+
+这通常是因为您的补ä¸ä¸Žé‚®ä»¶åˆ—表其他人的补ä¸äº§ç”Ÿäº†å†²çªï¼Œåˆ«äººçš„è¡¥ä¸å…ˆè¢« apply 了,
+您的补ä¸é›†å°±æ— æ³•æˆåŠŸ apply äº†ï¼Œè¿™éœ€è¦æ‚¨æ›´æ–°æœ¬åœ°åˆ†æ”¯ï¼Œåœ¨æœ¬åœ°è§£å†³å®Œå†²çªåŽå†æ¬¡æäº¤ã€‚
+
+请尽é‡é¿å…冲çªï¼Œä¸è¦å¤šä¸ªäººåŒæ—¶ç¿»è¯‘一个目录。翻译之å‰å¯ä»¥é€šè¿‡ git log 查看您感
+兴趣的目录近期有没有其他人翻译,如果有,请æå‰ç§ä¿¡è”系对方,请求其代为å‘逿‚¨
+的补ä¸ã€‚如果对方未æ¥ä¸€ä¸ªæœˆå†…没有æäº¤æ–°è¡¥ä¸çš„æ‰“算,您å¯ä»¥ç‹¬è‡ªå‘é€ã€‚
+
+回信被邮件列表拒收
+------------------
+
+大部分情况下,是由于您å‘é€äº†éžçº¯æ–‡æœ¬æ ¼å¼çš„信件,请尽é‡é¿å…使用 webmail,推è
+使用邮件客户端,比如 thunderbird,记得在设置中的回信é…置那改为纯文本å‘é€ã€‚
+
+如果超过了24å°æ—¶ï¼Œæ‚¨ä¾æ—§æ²¡æœ‰åœ¨<https://lore.kernel.org/linux-doc/>å‘现您的邮
+件,请è”系您的网络管ç†å‘˜å¸®å¿™è§£å†³ã€‚
diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index cc512ca54172..b08c09d8e96e 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -21,18 +21,18 @@
这是中文内核文档树的顶级目录。内核文档,就åƒå†…核本身一样,在很大程度上是一
é¡¹æ­£åœ¨è¿›è¡Œçš„å·¥ä½œï¼›å½“æˆ‘ä»¬åŠªåŠ›å°†è®¸å¤šåˆ†æ•£çš„æ–‡ä»¶æ•´åˆæˆä¸€ä¸ªè¿žè´¯çš„æ•´ä½“时尤其如此。
å¦å¤–ï¼Œéšæ—¶æ¬¢è¿Žæ‚¨å¯¹å†…核文档进行改进;如果您想æä¾›å¸®åŠ©ï¼Œè¯·åŠ å…¥vger.kernel.org
-上的linux-doc邮件列表。
-
-顺便说下,中文文档也需è¦éµå®ˆå†…核编ç é£Žæ ¼ï¼Œé£Žæ ¼ä¸­ä¸­æ–‡å’Œè‹±æ–‡çš„主è¦ä¸åŒå°±æ˜¯ä¸­æ–‡
-的字符标点å ç”¨ä¸¤ä¸ªè‹±æ–‡å­—ç¬¦å®½åº¦ï¼Œæ‰€ä»¥ï¼Œå½“è‹±æ–‡è¦æ±‚ä¸è¦è¶…过æ¯è¡Œ100个字符时,
-中文就ä¸è¦è¶…过50个字符。å¦å¤–ï¼Œä¹Ÿè¦æ³¨æ„'-','='等符å·ä¸Žç›¸å…³æ ‡é¢˜çš„对é½ã€‚在将
-è¡¥ä¸æäº¤åˆ°ç¤¾åŒºä¹‹å‰ï¼Œä¸€å®šè¦è¿›è¡Œå¿…è¦çš„ ``checkpatch.pl`` 检查和编译测试,确ä¿
-在 ``make htmldocs/pdfdocs`` 中ä¸å¢žåŠ æ–°çš„å‘Šè­¦ï¼Œæœ€åŽï¼Œå®‰è£…检查你生æˆçš„
-html/pdf æ–‡ä»¶ï¼Œç¡®è®¤å®ƒä»¬çœ‹èµ·æ¥æ˜¯æ­£å¸¸çš„。
-
-æäº¤ä¹‹å‰è¯·ç¡®è®¤ä½ çš„è¡¥ä¸å¯ä»¥æ­£å¸¸æäº¤åˆ°ä¸­æ–‡æ–‡æ¡£ç»´æŠ¤åº“:
-https://git.kernel.org/pub/scm/linux/kernel/git/alexs/linux.git/
-如果你的补ä¸ä¾èµ–于其他人的补ä¸, å¯ä»¥ä¸Žå…¶ä»–人商é‡åŽç”±æŸä¸€ä¸ªäººåˆå¹¶æäº¤ã€‚
+上的linux-doc邮件列表,并按照Documentation/translations/zh_CN/how-to.rst的
+指引æäº¤è¡¥ä¸ã€‚æäº¤è¡¥ä¸ä¹‹å‰è¯·ç¡®ä¿æ‰§è¡Œ"make htmldocsâ€åŽæ— ä¸Žç¿»è¯‘有关的异常输出。
+
+如何翻译内核文档
+----------------
+
+翻译文档本身是一件很简å•的事情,但是æäº¤è¡¥ä¸éœ€è¦æ³¨æ„一些细节,为了ä¿è¯å†…核中文文档的高质é‡å¯æŒç»­å‘展,æä¾›äº†ä¸€ä»½ç¿»è¯‘指å—。
+
+.. toctree::
+ :maxdepth: 1
+
+ how-to.rst
与Linux 内核社区一起工作
------------------------
diff --git a/Documentation/translations/zh_CN/networking/index.rst b/Documentation/translations/zh_CN/networking/index.rst
new file mode 100644
index 000000000000..d07dd69f980b
--- /dev/null
+++ b/Documentation/translations/zh_CN/networking/index.rst
@@ -0,0 +1,160 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/networking/index.rst
+
+:翻译:
+
+ 王亚鑫 Wang Yaxin <wang.yaxin@zte.com.cn>
+
+:校译:
+
+网络
+====
+
+有关网络设备(netdev)开å‘过程的详细指å—,请å‚考::ref:`netdev-FAQ`
+
+目录:
+
+.. toctree::
+ :maxdepth: 1
+
+ msg_zerocopy
+
+Todolist:
+
+* af_xdp
+* bareudp
+* batman-adv
+* can
+* can_ucan_protocol
+* device_drivers/index
+* diagnostic/index
+* dsa/index
+* devlink/index
+* caif/index
+* ethtool-netlink
+* ieee802154
+* iso15765-2
+* j1939
+* kapi
+* failover
+* net_dim
+* net_failover
+* page_pool
+* phy
+* sfp-phylink
+* alias
+* bridge
+* snmp_counter
+* checksum-offloads
+* segmentation-offloads
+* scaling
+* tls
+* tls-offload
+* tls-handshake
+* nfc
+* 6lowpan
+* 6pack
+* arcnet-hardware
+* arcnet
+* atm
+* ax25
+* bonding
+* cdc_mbim
+* dccp
+* dctcp
+* devmem
+* dns_resolver
+* driver
+* eql
+* fib_trie
+* filter
+* generic-hdlc
+* generic_netlink
+* netlink_spec/index
+* gen_stats
+* gtp
+* ila
+* ioam6-sysctl
+* ip_dynaddr
+* ipsec
+* ip-sysctl
+* ipv6
+* ipvlan
+* ipvs-sysctl
+* kcm
+* l2tp
+* lapb-module
+* mac80211-injection
+* mctp
+* mpls-sysctl
+* mptcp
+* mptcp-sysctl
+* multiqueue
+* multi-pf-netdev
+* napi
+* net_cachelines/index
+* netconsole
+* netdev-features
+* netdevices
+* netfilter-sysctl
+* netif-msg
+* netmem
+* nexthop-group-resilient
+* nf_conntrack-sysctl
+* nf_flowtable
+* oa-tc6-framework
+* openvswitch
+* operstates
+* packet_mmap
+* phonet
+* phy-link-topology
+* pktgen
+* plip
+* ppp_generic
+* proc_net_tcp
+* pse-pd/index
+* radiotap-headers
+* rds
+* regulatory
+* representors
+* rxrpc
+* sctp
+* secid
+* seg6-sysctl
+* skbuff
+* smc-sysctl
+* sriov
+* statistics
+* strparser
+* switchdev
+* sysfs-tagging
+* tc-actions-env-rules
+* tc-queue-filters
+* tcp_ao
+* tcp-thin
+* team
+* timestamping
+* tipc
+* tproxy
+* tuntap
+* udplite
+* vrf
+* vxlan
+* x25
+* x25-iface
+* xfrm_device
+* xfrm_proc
+* xfrm_sync
+* xfrm_sysctl
+* xdp-rx-metadata
+* xsk-tx-metadata
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/translations/zh_CN/networking/msg_zerocopy.rst b/Documentation/translations/zh_CN/networking/msg_zerocopy.rst
new file mode 100644
index 000000000000..821b32c4d1bf
--- /dev/null
+++ b/Documentation/translations/zh_CN/networking/msg_zerocopy.rst
@@ -0,0 +1,223 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/networking/msg_zerocopy.rst
+
+:翻译:
+
+ 王亚鑫 Wang Yaxin <wang.yaxin@zte.com.cn>
+
+:校译:
+
+ - å¾é‘« xu xin <xu.xin16@zte.com.cn>
+ - 何酿ž— He Peilin <he.peilin@zte.com.cn>
+
+============
+MSG_ZEROCOPY
+============
+
+简介
+====
+
+MSG_ZEROCOPY 标志用于å¯ç”¨å¥—接字å‘é€è°ƒç”¨çš„å…æ‹·è´åŠŸèƒ½ã€‚è¯¥åŠŸèƒ½ç›®å‰é€‚用于 TCPã€UDP å’Œ VSOCK
+(使用 virtio 传输)套接字。
+
+机é‡ä¸Žæ³¨æ„事项
+--------------
+
+在用户进程与内核之间拷è´å¤§åž‹ç¼“冲区å¯èƒ½ä¼šæ¶ˆè€—大é‡èµ„æºã€‚Linux 支æŒå¤šç§å…æ‹·è´çš„æŽ¥å£ï¼Œå¦‚sendfile
+å’Œ splice。MSG_ZEROCOPY 标志将底层的拷è´é¿å…机制扩展到了常è§çš„套接字å‘é€è°ƒç”¨ä¸­ã€‚
+
+å…æ‹·è´å¹¶éžæ¯«æ— ä»£ä»·ã€‚在实现上,它通过页é¢å›ºå®šï¼ˆpage pinning)将按字节拷è´çš„æˆæœ¬æ›¿æ¢ä¸ºé¡µé¢ç»Ÿè®¡
+(page accounting)和完æˆé€šçŸ¥çš„开销。因此,MSG_ZEROCOPY 通常仅在写入é‡è¶…过大约 10 KB æ—¶
+æ‰æœ‰æ•ˆã€‚
+
+页é¢å›ºå®šè¿˜ä¼šæ”¹å˜ç³»ç»Ÿè°ƒç”¨çš„语义。它会暂时在进程和网络堆栈之间共享缓冲区。与拷è´ä¸åŒï¼Œè¿›ç¨‹åœ¨ç³»ç»Ÿ
+调用返回åŽä¸èƒ½ç«‹å³è¦†ç›–缓冲区,å¦åˆ™å¯èƒ½ä¼šä¿®æ”¹æ­£åœ¨ä¼ è¾“中的数æ®ã€‚内核的完整性ä¸ä¼šå—到影å“,但有缺
+陷的程åºå¯èƒ½ä¼šç ´åè‡ªå·±çš„æ•°æ®æµã€‚
+
+当内核返回数æ®å¯ä»¥å®‰å…¨ä¿®æ”¹çš„通知时,进程æ‰å¯ä»¥ä¿®æ”¹æ•°æ®ã€‚因此,将现有应用程åºè½¬æ¢ä¸ºä½¿ç”¨
+MSG_ZEROCOPY å¹¶éžæ€»æ˜¯åƒç®€å•地传递该标志那样容易。
+
+更多信æ¯
+--------
+
+本文档的大部分内容是æ¥è‡ªäºŽ netdev 2.1 上å‘表的一篇长篇论文。如需更深入的信æ¯ï¼Œè¯·å‚阅该论文和
+演讲,或者æµè§ˆ LWN.net 上的精彩报é“,也å¯ä»¥ç›´æŽ¥é˜…读æºç ã€‚
+
+ 论文ã€å¹»ç¯ç‰‡ã€è§†é¢‘:
+ https://netdevconf.org/2.1/session.html?debruijn
+
+ LWN 文章:
+ https://lwn.net/Articles/726917/
+
+ è¡¥ä¸é›†ï¼š
+ [PATCH net-next v4 0/9] socket sendmsg MSG_ZEROCOPY
+ https://lore.kernel.org/netdev/20170803202945.70750-1-willemdebruijn.kernel@gmail.com
+
+接å£
+====
+
+传递 MSG_ZEROCOPY 标志是å¯ç”¨å…æ‹·è´åŠŸèƒ½çš„æœ€æ˜Žæ˜¾æ­¥éª¤ï¼Œä½†å¹¶éžå”¯ä¸€çš„æ­¥éª¤ã€‚
+
+套接字设置
+----------
+
+当应用程åºå‘ send 系统调用传递未定义的标志时,内核通常会宽容对待。默认情况下,它会简å•地忽略
+这些标志。为了é¿å…为那些å¶ç„¶ä¼ é€’此标志的é—留进程å¯ç”¨å…æ‹·è´æ¨¡å¼ï¼Œè¿›ç¨‹å¿…须首先通过设置套接字选项
+æ¥è¡¨æ˜Žæ„图:
+
+::
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
+ error(1, errno, "setsockopt zerocopy");
+
+传输
+----
+
+对 send(或 sendtoã€sendmsgã€sendmmsg)本身的改动éžå¸¸ç®€å•。åªéœ€ä¼ é€’新的标志å³å¯ã€‚
+
+::
+
+ ret = send(fd, buf, sizeof(buf), MSG_ZEROCOPY);
+
+å¦‚æžœé›¶æ‹·è´æ“作失败,将返回 -1,并设置 errno 为 ENOBUFSã€‚è¿™ç§æƒ…况å¯èƒ½å‘生在套接字超出其
+optmem é™åˆ¶ï¼Œæˆ–者用户超出其é”定页é¢çš„ ulimit 时。
+
+æ··åˆä½¿ç”¨å…æ‹·è´å’Œæ‹·è´
+~~~~~~~~~~~~~~~~~~~~
+
+è®¸å¤šå·¥ä½œè´Ÿè½½åŒæ—¶åŒ…å«å¤§åž‹å’Œå°åž‹ç¼“å†²åŒºã€‚ç”±äºŽå¯¹äºŽå°æ•°æ®åŒ…æ¥è¯´ï¼Œå…æ‹·è´çš„æˆæœ¬é«˜äºŽæ‹·è´ï¼Œå› æ­¤è¯¥
+功能是通过标志实现的。带有标志的调用和没有标志的调用å¯ä»¥å®‰å…¨åœ°æ··åˆä½¿ç”¨ã€‚
+
+通知
+----
+
+当内核认为å¯ä»¥å®‰å…¨åœ°é‡ç”¨ä¹‹å‰ä¼ é€’的缓冲区时,它必须通知进程。完æˆé€šçŸ¥åœ¨å¥—接字的错误队列上
+排队,类似于传输时间戳接å£ã€‚
+
+通知本身是一个简å•的标é‡å€¼ã€‚æ¯ä¸ªå¥—æŽ¥å­—éƒ½ç»´æŠ¤ä¸€ä¸ªå†…éƒ¨çš„æ— ç¬¦å· 32 ä½è®¡æ•°å™¨ã€‚æ¯æ¬¡å¸¦æœ‰
+MSG_ZEROCOPY 标志的 send 调用æˆåŠŸå‘逿•°æ®æ—¶ï¼Œè®¡æ•°å™¨éƒ½ä¼šå¢žåŠ ã€‚å¦‚æžœè°ƒç”¨å¤±è´¥æˆ–é•¿åº¦ä¸ºé›¶ï¼Œ
+则计数器ä¸ä¼šå¢žåŠ ã€‚è¯¥è®¡æ•°å™¨ç»Ÿè®¡ç³»ç»Ÿè°ƒç”¨çš„è°ƒç”¨æ¬¡æ•°ï¼Œè€Œä¸æ˜¯å­—节数。在 UINT_MAX 次调用åŽï¼Œ
+计数器会循环。
+
+通知接收
+~~~~~~~~
+
+下é¢çš„代ç ç‰‡æ®µå±•示了 API 的使用。在最简å•çš„æƒ…å†µä¸‹ï¼Œæ¯æ¬¡ send 系统调用åŽï¼Œéƒ½ä¼šå¯¹é”™è¯¯é˜Ÿåˆ—
+进行轮询和 recvmsg 调用。
+
+从错误队列读å–始终是一个éžé˜»å¡žæ“作。poll 调用用于阻塞,直到出现错误。它会在其输出标志中
+设置 POLLERR。该标志ä¸éœ€è¦åœ¨ events 字段中设置。错误会无æ¡ä»¶åœ°å‘出信å·ã€‚
+
+::
+
+ pfd.fd = fd;
+ pfd.events = 0;
+ if (poll(&pfd, 1, -1) != 1 || pfd.revents & POLLERR == 0)
+ error(1, errno, "poll");
+
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret == -1)
+ error(1, errno, "recvmsg");
+
+read_notification(msg);
+
+
+这个示例仅用于演示目的。在实际应用中,ä¸ç­‰å¾…通知,而是æ¯éš”几次 send 调用就进行一次éžé˜»å¡ž
+读å–会更高效。
+
+é›¶æ‹·è´é€šçŸ¥å¯ä»¥ä¸Žå…¶ä»–套接字æ“作乱åºå¤„ç†ã€‚通常,拥有错误队列套接字会阻塞其他æ“作,直到错误
+被读å–。然而,零拷è´é€šçŸ¥å…·æœ‰é›¶é”™è¯¯ä»£ç ï¼Œå› æ­¤ä¸ä¼šé˜»å¡ž send å’Œ recv 调用。
+
+通知批处ç†
+~~~~~~~~~~~~
+
+å¯ä»¥ä½¿ç”¨ recvmmsg 调用æ¥ä¸€æ¬¡æ€§è¯»å–多个未决的数æ®åŒ…ã€‚è¿™é€šå¸¸ä¸æ˜¯å¿…éœ€çš„ã€‚åœ¨æ¯æ¡æ¶ˆæ¯ä¸­ï¼Œå†…æ ¸
+è¿”å›žçš„ä¸æ˜¯ä¸€ä¸ªå•一的值,而是一个范围。当错误队列上有一个通知正在等待接收时,它会将连续的通
+知åˆå¹¶èµ·æ¥ã€‚
+
+当一个新的通知å³å°†è¢«æŽ’队时,它会检查队列尾部的通知的范围是å¦å¯ä»¥æ‰©å±•ä»¥åŒ…å«æ–°çš„值。如果是这
+样,它会丢弃新的通知数æ®åŒ…,并增大未处ç†é€šçŸ¥çš„范围上é™å€¼ã€‚
+
+对于按顺åºç¡®è®¤æ•°æ®çš„å议(如 TCP),æ¯ä¸ªé€šçŸ¥éƒ½å¯ä»¥åˆå¹¶åˆ°å‰ä¸€ä¸ªé€šçŸ¥ä¸­ï¼Œå› æ­¤åœ¨ä»»ä½•时候在等待
+的通知都ä¸ä¼šè¶…过一个。
+
+有åºäº¤ä»˜æ˜¯å¸¸è§çš„æƒ…况,但ä¸èƒ½ä¿è¯ã€‚在é‡ä¼ å’Œå¥—接字拆除时,通知å¯èƒ½ä¼šä¹±åºåˆ°è¾¾ã€‚
+
+通知解æž
+~~~~~~~~
+
+下é¢çš„代ç ç‰‡æ®µæ¼”ç¤ºäº†å¦‚ä½•è§£æžæŽ§åˆ¶æ¶ˆæ¯ï¼šå‰é¢ä»£ç ç‰‡æ®µä¸­çš„ read_notification() 调用。通知
+ä»¥æ ‡å‡†é”™è¯¯æ ¼å¼ sock_extended_err ç¼–ç ã€‚
+
+控制数æ®ä¸­çš„级别和类型字段是åè®®æ—特定的,对于 TCP 或 UDP 套接字,分别为 IP_RECVERR 或
+IPV6_RECVERR。对于 VSOCK 套接字,cmsg_level 为 SOL_VSOCK,cmsg_type 为 VSOCK_RECVERR。
+
+é”™è¯¯æ¥æºæ˜¯æ–°çš„类型 SO_EE_ORIGIN_ZEROCOPYã€‚å¦‚å‰æ‰€è¿°ï¼Œee_errno 为零,以é¿å…在套接字上
+阻塞地读å–和写入系统调用。
+
+32 ä½é€šçŸ¥èŒƒå›´ç¼–ç ä¸º [ee_info, ee_data]。这个范围是包å«è¾¹ç•Œå€¼çš„。除了下é¢è®¨è®ºçš„ ee_code
+字段外,结构中的其他字段应被视为未定义的。
+
+::
+
+ struct sock_extended_err *serr;
+ struct cmsghdr *cm;
+
+ cm = CMSG_FIRSTHDR(msg);
+ if (cm->cmsg_level != SOL_IP &&
+ cm->cmsg_type != IP_RECVERR)
+ error(1, 0, "cmsg");
+
+ serr = (void *) CMSG_DATA(cm);
+ if (serr->ee_errno != 0 ||
+ serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+ error(1, 0, "serr");
+
+printf("completed: %u..%u\n", serr->ee_info, serr->ee_data);
+
+
+延迟拷è´
+~~~~~~~~
+
+传递标志 MSG_ZEROCOPY 是å‘内核å‘出的一个æç¤ºï¼Œè®©å†…æ ¸é‡‡ç”¨å…æ‹·è´çš„ç­–ç•¥ï¼ŒåŒæ—¶ä¹Ÿæ˜¯ä¸€ç§çº¦
+定,å³å†…核会对完æˆé€šçŸ¥è¿›è¡ŒæŽ’队处ç†ã€‚但这并ä¸ä¿è¯æ‹·è´æ“作一定会被çœç•¥ã€‚
+
+æ‹·è´é¿å…䏿€»æ˜¯é€‚ç”¨çš„ã€‚ä¸æ”¯æŒåˆ†æ•£/èšé›† I/O 的设备无法å‘é€ç”±å†…核生æˆçš„å议头加上零拷è´ç”¨æˆ·
+æ•°æ®ç»„æˆçš„æ•°æ®åŒ…。数æ®åŒ…å¯èƒ½éœ€è¦åœ¨å议栈底层转æ¢ä¸ºä¸€ä»½ç§æœ‰æ•°æ®å‰¯æœ¬ï¼Œä¾‹å¦‚用于计算校验和。
+
+在所有这些情况下,当内核释放对共享页é¢çš„æŒæœ‰æƒæ—¶ï¼Œå®ƒä¼šè¿”回一个完æˆé€šçŸ¥ã€‚该通知å¯èƒ½åœ¨ï¼ˆå·²
+æ‹·è´ï¼‰æ•°æ®å®Œå…¨ä¼ è¾“之å‰åˆ°è¾¾ã€‚因此。零拷è´å®Œæˆé€šçŸ¥å¹¶ä¸æ˜¯ä¼ è¾“完æˆé€šçŸ¥ã€‚
+
+如果数æ®ä¸åœ¨ç¼“存中,延迟拷è´å¯èƒ½ä¼šæ¯”ç«‹å³åœ¨ç³»ç»Ÿè°ƒç”¨ä¸­æ‹·è´å¼€é”€æ›´å¤§ã€‚进程还会因通知处ç†è€Œäº§
+ç”Ÿæˆæœ¬ï¼Œä½†å´æ²¡æœ‰å¸¦æ¥ä»»ä½•好处。因此,内核会在返回时通过在 ee_code 字段中设置标志
+SO_EE_CODE_ZEROCOPY_COPIED æ¥æŒ‡ç¤ºæ•°æ®æ˜¯å¦ä»¥æ‹·è´çš„æ–¹å¼å®Œæˆã€‚进程å¯ä»¥åˆ©ç”¨è¿™ä¸ªä¿¡å·ï¼Œåœ¨
+åŒä¸€å¥—接字上åŽç»­çš„è¯·æ±‚ä¸­åœæ­¢ä¼ é€’ MSG_ZEROCOPY 标志。
+
+实现
+====
+
+环回
+----
+
+对于 TCP 和 UDP:
+如果接收进程ä¸è¯»å–其套接字,å‘é€åˆ°æœ¬åœ°å¥—接字的数æ®å¯èƒ½ä¼šæ— é™æœŸæŽ’é˜Ÿã€‚æ— é™æœŸçš„通知延迟是ä¸
+坿ޥå—的。因此,所有使用 MSG_ZEROCOPY 生æˆå¹¶çŽ¯å›žåˆ°æœ¬åœ°å¥—æŽ¥å­—çš„æ•°æ®åŒ…都将产生延迟拷è´ã€‚
+这包括环回到数æ®åŒ…套接字(例如,tcpdump)和 tun 设备。
+
+对于 VSOCK:
+å‘é€åˆ°æœ¬åœ°å¥—接字的数æ®è·¯å¾„ä¸Žéžæœ¬åœ°å¥—接字相åŒã€‚
+
+测试
+====
+
+更具体的示例代ç å¯ä»¥åœ¨å†…æ ¸æºç çš„ tools/testing/selftests/net/msg_zerocopy.c 中找到。
+
+è¦ç•™æ„环回约æŸé—®é¢˜ã€‚该测试å¯ä»¥åœ¨ä¸€å¯¹ä¸»æœºä¹‹é—´è¿›è¡Œã€‚但如果是在本地的一对进程之间è¿è¡Œï¼Œä¾‹å¦‚当使用
+msg_zerocopy.sh 脚本在跨命å空间的虚拟以太网(veth)对之间è¿è¡Œæ—¶ï¼Œæµ‹è¯•å°†ä¸ä¼šæ˜¾ç¤ºå‡ºä»»ä½•性能
+æå‡ã€‚为了便于测试,å¯ä»¥é€šè¿‡è®© skb_orphan_frags_rx 与 skb_orphan_frags 相åŒï¼Œæ¥æš‚时放宽
+环回é™åˆ¶ã€‚
+
+对于 VSOCK 类型套接字的示例å¯ä»¥åœ¨ tools/testing/vsock/vsock_test_zerocopy.c 中找到。
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 7a1409ecc238..3d1171fd96c1 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -28,10 +28,10 @@ or number from the table below. Because of the large number of drivers,
many drivers share a partial letter with other drivers.
If you are writing a driver for a new device and need a letter, pick an
-unused block with enough room for expansion: 32 to 256 ioctl commands.
-You can register the block by patching this file and submitting the
-patch to Linus Torvalds. Or you can e-mail me at <mec@shout.net> and
-I'll register one for you.
+unused block with enough room for expansion: 32 to 256 ioctl commands
+should suffice. You can register the block by patching this file and
+submitting the patch through :doc:`usual patch submission process
+</process/submitting-patches>`.
The second argument to _IO, _IOW, _IOR, or _IOWR is a sequence number
to distinguish ioctls from each other. The third argument to _IOW,
@@ -62,9 +62,8 @@ Following this convention is good because:
(5) When following the convention, the driver code can use generic
code to copy the parameters between user and kernel space.
-This table lists ioctls visible from user land for Linux/x86. It contains
-most drivers up to 2.6.31, but I know I am missing some. There has been
-no attempt to list non-X86 architectures or ioctls from drivers/staging/.
+This table lists ioctls visible from userland, excluding ones from
+drivers/staging/.
==== ===== ======================================================= ================================================================
Code Seq# Include File Comments
@@ -366,6 +365,12 @@ Code Seq# Include File Comments
<mailto:linuxppc-dev>
0xB2 01-02 arch/powerpc/include/uapi/asm/papr-sysparm.h powerpc/pseries system parameter API
<mailto:linuxppc-dev>
+0xB2 03-05 arch/powerpc/include/uapi/asm/papr-indices.h powerpc/pseries indices API
+ <mailto:linuxppc-dev>
+0xB2 06-07 arch/powerpc/include/uapi/asm/papr-platform-dump.h powerpc/pseries Platform Dump API
+ <mailto:linuxppc-dev>
+0xB2 08 powerpc/include/uapi/asm/papr-physical-attestation.h powerpc/pseries Physical Attestation API
+ <mailto:linuxppc-dev>
0xB3 00 linux/mmc/ioctl.h
0xB4 00-0F linux/gpio.h <mailto:linux-gpio@vger.kernel.org>
0xB5 00-0F uapi/linux/rpmsg.h <mailto:linux-remoteproc@vger.kernel.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index f21f1dabb5fe..76926087ee0a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1097,7 +1097,7 @@ R: Carlos Bilbao <carlos.bilbao@kernel.org>
L: platform-driver-x86@vger.kernel.org
S: Maintained
F: Documentation/arch/x86/amd_hsmp.rst
-F: arch/x86/include/asm/amd_hsmp.h
+F: arch/x86/include/asm/amd/hsmp.h
F: arch/x86/include/uapi/asm/amd_hsmp.h
F: drivers/platform/x86/amd/hsmp/
@@ -1142,7 +1142,7 @@ M: Mario Limonciello <mario.limonciello@amd.com>
M: Yazen Ghannam <yazen.ghannam@amd.com>
L: linux-kernel@vger.kernel.org
S: Supported
-F: arch/x86/include/asm/amd_node.h
+F: arch/x86/include/asm/amd/node.h
F: arch/x86/kernel/amd_node.c
AMD PDS CORE DRIVER
@@ -1216,7 +1216,9 @@ AMD SPI DRIVER
M: Raju Rangoju <Raju.Rangoju@amd.com>
L: linux-spi@vger.kernel.org
S: Supported
+F: drivers/spi/spi-amd-pci.c
F: drivers/spi/spi-amd.c
+F: drivers/spi/spi-amd.h
AMD XDNA DRIVER
M: Min Ma <min.ma@amd.com>
@@ -1567,6 +1569,13 @@ W: https://ez.analog.com/linux-software-drivers
F: Documentation/devicetree/bindings/iio/filter/adi,admv8818.yaml
F: drivers/iio/filter/admv8818.c
+ANALOG DEVICES INC ADP5055 DRIVER
+M: Alexis Czezar Torreno <alexisczezar.torreno@analog.com>
+S: Supported
+W: https://ez.analog.com/linux-software-drivers
+F: Documentation/devicetree/bindings/regulator/adi,adp5055-regulator.yaml
+F: drivers/regulator/adp5055-regulator.c
+
ANALOG DEVICES INC ADP5061 DRIVER
M: Michael Hennerich <Michael.Hennerich@analog.com>
L: linux-pm@vger.kernel.org
@@ -1682,7 +1691,7 @@ M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
M: Arve Hjønnevåg <arve@android.com>
M: Todd Kjos <tkjos@android.com>
M: Martijn Coenen <maco@android.com>
-M: Joel Fernandes <joel@joelfernandes.org>
+M: Joel Fernandes <joelagnelf@nvidia.com>
M: Christian Brauner <christian@brauner.io>
M: Carlos Llamas <cmllamas@google.com>
M: Suren Baghdasaryan <surenb@google.com>
@@ -3435,6 +3444,7 @@ M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Odd Fixes
F: Documentation/devicetree/bindings/i2c/i2c-wmt.txt
+F: Documentation/devicetree/bindings/pwm/via,vt8500-pwm.yaml
F: arch/arm/boot/dts/vt8500/
F: arch/arm/mach-vt8500/
F: drivers/clocksource/timer-vt8500.c
@@ -4203,6 +4213,16 @@ F: Documentation/ABI/stable/sysfs-class-bluetooth
F: include/net/bluetooth/
F: net/bluetooth/
+BLZP1600 GPIO DRIVER
+M: James Cowgill <james.cowgill@blaize.com>
+M: Matt Redfearn <matt.redfearn@blaize.com>
+M: Neil Jones <neil.jones@blaize.com>
+M: Nikolaos Pasaloukos <nikolaos.pasaloukos@blaize.com>
+L: linux-gpio@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/gpio/blaize,blzp1600-gpio.yaml
+F: drivers/gpio/gpio-blzp1600.c
+
BONDING DRIVER
M: Jay Vosburgh <jv@jvosburgh.net>
L: netdev@vger.kernel.org
@@ -5510,7 +5530,7 @@ F: Documentation/dev-tools/checkpatch.rst
CHINESE DOCUMENTATION
M: Alex Shi <alexs@kernel.org>
-M: Yanteng Si <siyanteng@loongson.cn>
+M: Yanteng Si <si.yanteng@linux.dev>
R: Dongliang Mu <dzm91@hust.edu.cn>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/alexs/linux.git
S: Maintained
@@ -5655,7 +5675,6 @@ F: include/sound/cs*
F: sound/pci/hda/cirrus*
F: sound/pci/hda/cs*
F: sound/pci/hda/hda_component*
-F: sound/pci/hda/hda_cs_dsp_ctl.*
F: sound/soc/codecs/cs*
CIRRUS LOGIC HAPTIC DRIVERS
@@ -5983,7 +6002,9 @@ S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/a.hindborg/linux.git configfs-next
F: fs/configfs/
F: include/linux/configfs.h
+F: rust/kernel/configfs.rs
F: samples/configfs/
+F: samples/rust/rust_configfs.rs
CONGATEC BOARD CONTROLLER MFD DRIVER
M: Thomas Richard <thomas.richard@bootlin.com>
@@ -6261,6 +6282,7 @@ F: Documentation/staging/crc*
F: arch/*/lib/crc*
F: include/linux/crc*
F: lib/crc*
+F: lib/tests/crc_kunit.c
F: scripts/gen-crc-consts.py
CREATIVE SB0540
@@ -6296,6 +6318,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git
F: Documentation/crypto/
F: Documentation/devicetree/bindings/crypto/
F: arch/*/crypto/
+F: arch/*/lib/crypto/
F: crypto/
F: drivers/crypto/
F: include/crypto/
@@ -7084,7 +7107,10 @@ T: git git://git.lwn.net/linux.git docs-next
F: Documentation/
F: scripts/check-variable-fonts.sh
F: scripts/documentation-file-ref-check
-F: scripts/kernel-doc
+F: scripts/get_abi.py
+F: scripts/kernel-doc*
+F: scripts/lib/abi/*
+F: scripts/lib/kdoc/*
F: scripts/sphinx-pre-install
X: Documentation/ABI/
X: Documentation/admin-guide/media/
@@ -10116,7 +10142,7 @@ L: linux-acpi@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git
F: Documentation/firmware-guide/acpi/gpio-properties.rst
-F: drivers/gpio/gpiolib-acpi.c
+F: drivers/gpio/gpiolib-acpi-*.c
F: drivers/gpio/gpiolib-acpi.h
GPIO AGGREGATOR
@@ -10147,6 +10173,13 @@ F: drivers/gpio/gpio-regmap.c
F: include/linux/gpio/regmap.h
K: (devm_)?gpio_regmap_(un)?register
+GPIO SLOPPY LOGIC ANALYZER
+M: Wolfram Sang <wsa+renesas@sang-engineering.com>
+S: Supported
+F: Documentation/dev-tools/gpio-sloppy-logic-analyzer.rst
+F: drivers/gpio/gpio-sloppy-logic-analyzer.c
+F: tools/gpio/gpio-sloppy-logic-analyzer.sh
+
GPIO SUBSYSTEM
M: Linus Walleij <linus.walleij@linaro.org>
M: Bartosz Golaszewski <brgl@bgdev.pl>
@@ -10969,6 +11002,7 @@ M: Pengyu Luo <mitltlatltl@gmail.com>
S: Maintained
F: Documentation/devicetree/bindings/platform/huawei,gaokun-ec.yaml
F: drivers/platform/arm64/huawei-gaokun-ec.c
+F: drivers/power/supply/huawei-gaokun-battery.c
F: include/linux/platform_data/huawei-gaokun-ec.h
HUGETLB SUBSYSTEM
@@ -11099,6 +11133,14 @@ L: linuxppc-dev@lists.ozlabs.org
S: Odd Fixes
F: drivers/tty/hvc/
+HUNG TASK DETECTOR
+M: Andrew Morton <akpm@linux-foundation.org>
+R: Lance Yang <lance.yang@linux.dev>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: include/linux/hung_task.h
+F: kernel/hung_task.c
+
I2C ACPI SUPPORT
M: Mika Westerberg <westeri@kernel.org>
L: linux-i2c@vger.kernel.org
@@ -13646,7 +13688,6 @@ M: Madhavan Srinivasan <maddy@linux.ibm.com>
M: Michael Ellerman <mpe@ellerman.id.au>
R: Nicholas Piggin <npiggin@gmail.com>
R: Christophe Leroy <christophe.leroy@csgroup.eu>
-R: Naveen N Rao <naveen@kernel.org>
L: linuxppc-dev@lists.ozlabs.org
S: Supported
W: https://github.com/linuxppc/wiki/wiki
@@ -13727,7 +13768,7 @@ M: Luc Maranget <luc.maranget@inria.fr>
M: "Paul E. McKenney" <paulmck@kernel.org>
R: Akira Yokosawa <akiyks@gmail.com>
R: Daniel Lustig <dlustig@nvidia.com>
-R: Joel Fernandes <joel@joelfernandes.org>
+R: Joel Fernandes <joelagnelf@nvidia.com>
L: linux-kernel@vger.kernel.org
L: linux-arch@vger.kernel.org
L: lkmm@lists.linux.dev
@@ -13932,6 +13973,13 @@ S: Maintained
F: Documentation/devicetree/bindings/i2c/loongson,ls2x-i2c.yaml
F: drivers/i2c/busses/i2c-ls2x.c
+LOONGSON PWM DRIVER
+M: Binbin Zhou <zhoubinbin@loongson.cn>
+L: linux-pwm@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml
+F: drivers/pwm/pwm-loongson.c
+
LOONGSON-2 SOC SERIES CLOCK DRIVER
M: Yinbo Zhu <zhuyinbo@loongson.cn>
L: linux-clk@vger.kernel.org
@@ -14289,9 +14337,8 @@ F: drivers/gpu/drm/armada/
F: include/uapi/drm/armada_drm.h
MARVELL CRYPTO DRIVER
-M: Boris Brezillon <bbrezillon@kernel.org>
-M: Arnaud Ebalard <arno@natisbad.org>
M: Srujana Challa <schalla@marvell.com>
+M: Bharat Bhushan <bbhushan2@marvell.com>
L: linux-crypto@vger.kernel.org
S: Maintained
F: drivers/crypto/marvell/
@@ -14598,6 +14645,16 @@ F: Documentation/devicetree/bindings/mfd/maxim,max77714.yaml
F: drivers/mfd/max77714.c
F: include/linux/mfd/max77714.h
+MAXIM MAX77759 PMIC MFD DRIVER
+M: André Draszik <andre.draszik@linaro.org>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/*/maxim,max77759*.yaml
+F: drivers/gpio/gpio-max77759.c
+F: drivers/mfd/max77759.c
+F: drivers/nvmem/max77759-nvmem.c
+F: include/linux/mfd/max77759.h
+
MAXIM MAX77802 PMIC REGULATOR DEVICE DRIVER
M: Javier Martinez Canillas <javier@dowhile0.org>
L: linux-kernel@vger.kernel.org
@@ -15542,6 +15599,53 @@ S: Maintained
F: include/linux/execmem.h
F: mm/execmem.c
+MEMORY MANAGEMENT - GUP (GET USER PAGES)
+M: Andrew Morton <akpm@linux-foundation.org>
+M: David Hildenbrand <david@redhat.com>
+R: Jason Gunthorpe <jgg@nvidia.com>
+R: John Hubbard <jhubbard@nvidia.com>
+R: Peter Xu <peterx@redhat.com>
+L: linux-mm@kvack.org
+S: Maintained
+W: http://www.linux-mm.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
+F: mm/gup.c
+
+MEMORY MANAGEMENT - KSM (Kernel Samepage Merging)
+M: Andrew Morton <akpm@linux-foundation.org>
+M: David Hildenbrand <david@redhat.com>
+R: Xu Xin <xu.xin16@zte.com.cn>
+R: Chengming Zhou <chengming.zhou@linux.dev>
+L: linux-mm@kvack.org
+S: Maintained
+W: http://www.linux-mm.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
+F: Documentation/admin-guide/mm/ksm.rst
+F: Documentation/mm/ksm.rst
+F: include/linux/ksm.h
+F: include/trace/events/ksm.h
+F: mm/ksm.c
+
+MEMORY MANAGEMENT - MEMORY POLICY AND MIGRATION
+M: Andrew Morton <akpm@linux-foundation.org>
+M: David Hildenbrand <david@redhat.com>
+R: Zi Yan <ziy@nvidia.com>
+R: Matthew Brost <matthew.brost@intel.com>
+R: Joshua Hahn <joshua.hahnjy@gmail.com>
+R: Rakie Kim <rakie.kim@sk.com>
+R: Byungchul Park <byungchul@sk.com>
+R: Gregory Price <gourry@gourry.net>
+R: Ying Huang <ying.huang@linux.alibaba.com>
+L: linux-mm@kvack.org
+S: Maintained
+W: http://www.linux-mm.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
+F: include/linux/mempolicy.h
+F: include/linux/migrate.h
+F: mm/mempolicy.c
+F: mm/migrate.c
+F: mm/migrate_device.c
+
MEMORY MANAGEMENT - NUMA MEMBLOCKS AND NUMA EMULATION
M: Andrew Morton <akpm@linux-foundation.org>
M: Mike Rapoport <rppt@kernel.org>
@@ -15554,7 +15658,7 @@ F: mm/numa_memblks.c
MEMORY MANAGEMENT - PAGE ALLOCATOR
M: Andrew Morton <akpm@linux-foundation.org>
-R: Vlastimil Babka <vbabka@suse.cz>
+M: Vlastimil Babka <vbabka@suse.cz>
R: Suren Baghdasaryan <surenb@google.com>
R: Michal Hocko <mhocko@suse.com>
R: Brendan Jackman <jackmanb@google.com>
@@ -15562,10 +15666,25 @@ R: Johannes Weiner <hannes@cmpxchg.org>
R: Zi Yan <ziy@nvidia.com>
L: linux-mm@kvack.org
S: Maintained
+F: include/linux/compaction.h
+F: include/linux/gfp.h
+F: include/linux/page-isolation.h
F: mm/compaction.c
F: mm/page_alloc.c
-F: include/linux/gfp.h
-F: include/linux/compaction.h
+F: mm/page_isolation.c
+
+MEMORY MANAGEMENT - RECLAIM
+M: Andrew Morton <akpm@linux-foundation.org>
+M: Johannes Weiner <hannes@cmpxchg.org>
+R: David Hildenbrand <david@redhat.com>
+R: Michal Hocko <mhocko@kernel.org>
+R: Qi Zheng <zhengqi.arch@bytedance.com>
+R: Shakeel Butt <shakeel.butt@linux.dev>
+R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+L: linux-mm@kvack.org
+S: Maintained
+F: mm/pt_reclaim.c
+F: mm/vmscan.c
MEMORY MANAGEMENT - RMAP (REVERSE MAPPING)
M: Andrew Morton <akpm@linux-foundation.org>
@@ -16349,6 +16468,7 @@ F: arch/mips/include/asm/mach-loongson32/
F: arch/mips/loongson32/
F: drivers/*/*loongson1*
F: drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c
+F: sound/soc/loongson/loongson1_ac97.c
MIPS/LOONGSON2EF ARCHITECTURE
M: Jiaxun Yang <jiaxun.yang@flygoat.com>
@@ -18432,7 +18552,7 @@ F: include/uapi/linux/ppdev.h
PARAVIRT_OPS INTERFACE
M: Juergen Gross <jgross@suse.com>
R: Ajay Kaher <ajay.kaher@broadcom.com>
-R: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+R: Alexey Makhalov <alexey.makhalov@broadcom.com>
R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: virtualization@lists.linux.dev
L: x86@kernel.org
@@ -20422,11 +20542,14 @@ F: tools/testing/selftests/net/rds/
RDT - RESOURCE ALLOCATION
M: Tony Luck <tony.luck@intel.com>
M: Reinette Chatre <reinette.chatre@intel.com>
+R: Dave Martin <Dave.Martin@arm.com>
+R: James Morse <james.morse@arm.com>
L: linux-kernel@vger.kernel.org
S: Supported
-F: Documentation/arch/x86/resctrl*
+F: Documentation/filesystems/resctrl.rst
F: arch/x86/include/asm/resctrl.h
F: arch/x86/kernel/cpu/resctrl/
+F: fs/resctrl/
F: include/linux/resctrl*.h
F: tools/testing/selftests/resctrl/
@@ -20434,14 +20557,14 @@ READ-COPY UPDATE (RCU)
M: "Paul E. McKenney" <paulmck@kernel.org>
M: Frederic Weisbecker <frederic@kernel.org> (kernel/rcu/tree_nocb.h)
M: Neeraj Upadhyay <neeraj.upadhyay@kernel.org> (kernel/rcu/tasks.h)
-M: Joel Fernandes <joel@joelfernandes.org>
+M: Joel Fernandes <joelagnelf@nvidia.com>
M: Josh Triplett <josh@joshtriplett.org>
M: Boqun Feng <boqun.feng@gmail.com>
M: Uladzislau Rezki <urezki@gmail.com>
R: Steven Rostedt <rostedt@goodmis.org>
R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
R: Lai Jiangshan <jiangshanlai@gmail.com>
-R: Zqiang <qiang.zhang1211@gmail.com>
+R: Zqiang <qiang.zhang@linux.dev>
L: rcu@vger.kernel.org
S: Supported
W: http://www.rdrop.com/users/paulmck/RCU/
@@ -20594,8 +20717,8 @@ F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml
F: drivers/i2c/busses/i2c-emev2.c
RENESAS ETHERNET AVB DRIVER
-M: Paul Barker <paul.barker.ct@bp.renesas.com>
M: Niklas Söderlund <niklas.soderlund@ragnatech.se>
+R: Paul Barker <paul@pbarker.dev>
L: netdev@vger.kernel.org
L: linux-renesas-soc@vger.kernel.org
S: Maintained
@@ -21053,6 +21176,13 @@ F: Documentation/devicetree/bindings/sound/rockchip,rk3308-codec.yaml
F: sound/soc/codecs/rk3308_codec.c
F: sound/soc/codecs/rk3308_codec.h
+ROCKCHIP SAI DRIVER
+M: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
+L: linux-rockchip@lists.infradead.org
+S: Maintained
+F: Documentation/devicetree/bindings/sound/rockchip,rk3576-sai.yaml
+F: sound/soc/rockchip/rockchip_sai.*
+
ROCKCHIP VIDEO DECODER DRIVER
M: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
L: linux-media@vger.kernel.org
@@ -21886,6 +22016,7 @@ F: include/linux/seccomp.h
F: include/uapi/linux/seccomp.h
F: kernel/seccomp.c
F: tools/testing/selftests/kselftest_harness.h
+F: tools/testing/selftests/kselftest_harness/
F: tools/testing/selftests/seccomp/*
K: \bsecure_computing
K: \bTIF_SECCOMP\b
@@ -22797,10 +22928,10 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git
F: Documentation/devicetree/bindings/sound/
F: Documentation/sound/soc/
F: include/dt-bindings/sound/
-F: include/sound/cs-amp-lib.h
-F: include/sound/cs35l*
-F: include/sound/cs4271.h
-F: include/sound/cs42l*
+F: include/sound/cs*
+X: include/sound/cs4231-regs.h
+X: include/sound/cs8403.h
+X: include/sound/cs8427.h
F: include/sound/madera-pdata.h
F: include/sound/soc*
F: include/sound/sof.h
@@ -22916,7 +23047,6 @@ F: drivers/accessibility/speakup/
SPEAR PLATFORM/CLOCK/PINCTRL SUPPORT
M: Viresh Kumar <vireshk@kernel.org>
-M: Shiraz Hashim <shiraz.linux.kernel@gmail.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: soc@lists.linux.dev
S: Maintained
@@ -24029,7 +24159,6 @@ F: Documentation/devicetree/bindings/sound/ti,tlv320*.yaml
F: Documentation/devicetree/bindings/sound/ti,tlv320adcx140.yaml
F: include/sound/tas2*.h
F: include/sound/tlv320*.h
-F: include/sound/tpa6130a2-plat.h
F: sound/pci/hda/tas2781_hda_i2c.c
F: sound/soc/codecs/pcm1681.c
F: sound/soc/codecs/pcm1789*.*
@@ -24555,6 +24684,13 @@ L: platform-driver-x86@vger.kernel.org
S: Maintained
F: drivers/platform/x86/topstar-laptop.c
+TORADEX EMBEDDED CONTROLLER DRIVER
+M: Emanuele Ghidoli <ghidoliemanuele@gmail.com>
+M: Francesco Dolcini <francesco@dolcini.it>
+S: Maintained
+F: Documentation/devicetree/bindings/power/reset/toradex,smarc-ec.yaml
+F: drivers/power/reset/tdx-ec-poweroff.c
+
TORTURE-TEST MODULES
M: Davidlohr Bueso <dave@stgolabs.net>
M: "Paul E. McKenney" <paulmck@kernel.org>
@@ -25894,7 +26030,7 @@ F: tools/testing/vsock/
VMALLOC
M: Andrew Morton <akpm@linux-foundation.org>
-R: Uladzislau Rezki <urezki@gmail.com>
+M: Uladzislau Rezki <urezki@gmail.com>
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
@@ -25918,7 +26054,7 @@ F: drivers/misc/vmw_balloon.c
VMWARE HYPERVISOR INTERFACE
M: Ajay Kaher <ajay.kaher@broadcom.com>
-M: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+M: Alexey Makhalov <alexey.makhalov@broadcom.com>
R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: virtualization@lists.linux.dev
L: x86@kernel.org
@@ -25946,7 +26082,7 @@ F: drivers/scsi/vmw_pvscsi.h
VMWARE VIRTUAL PTP CLOCK DRIVER
M: Nick Shi <nick.shi@broadcom.com>
R: Ajay Kaher <ajay.kaher@broadcom.com>
-R: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+R: Alexey Makhalov <alexey.makhalov@broadcom.com>
R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: netdev@vger.kernel.org
S: Supported
@@ -26290,7 +26426,7 @@ R: Ahmed S. Darwish <darwi@linutronix.de>
L: x86-cpuid@lists.linux.dev
S: Maintained
W: https://x86-cpuid.org
-F: tools/arch/x86/kcpuid/cpuid.csv
+F: tools/arch/x86/kcpuid/
X86 ENTRY CODE
M: Andy Lutomirski <luto@kernel.org>
@@ -26818,6 +26954,14 @@ L: linux-kernel@vger.kernel.org
S: Maintained
F: arch/x86/kernel/cpu/zhaoxin.c
+ZONED LOOP DEVICE
+M: Damien Le Moal <dlemoal@kernel.org>
+R: Christoph Hellwig <hch@lst.de>
+L: linux-block@vger.kernel.org
+S: Maintained
+F: Documentation/admin-guide/blockdev/zoned_loop.rst
+F: drivers/block/zloop.c
+
ZONEFS FILESYSTEM
M: Damien Le Moal <dlemoal@kernel.org>
M: Naohiro Aota <naohiro.aota@wdc.com>
diff --git a/Makefile b/Makefile
index 64c514f4bc19..3244c2a519e2 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 15
SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION =
NAME = Baby Opossum Posse
# *DOCUMENTATION*
@@ -458,6 +458,11 @@ endif
HOSTRUSTC = rustc
HOSTPKG_CONFIG = pkg-config
+# the KERNELDOC macro needs to be exported, as scripts/Makefile.build
+# has a logic to call it
+KERNELDOC = $(srctree)/scripts/kernel-doc.py
+export KERNELDOC
+
KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
-O2 -fomit-frame-pointer -std=gnu11
KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS)
@@ -1068,8 +1073,7 @@ KBUILD_CFLAGS += -fno-builtin-wcslen
# change __FILE__ to the relative path to the source directory
ifdef building_out_of_srctree
-KBUILD_CPPFLAGS += $(call cc-option,-ffile-prefix-map=$(srcroot)/=)
-KBUILD_RUSTFLAGS += --remap-path-prefix=$(srcroot)/=
+KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=)
endif
# include additional Makefiles when needed
diff --git a/arch/Kconfig b/arch/Kconfig
index b0adb665041f..a3308a220f86 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1518,6 +1518,14 @@ config STRICT_MODULE_RWX
config ARCH_HAS_PHYS_TO_DMA
bool
+config ARCH_HAS_CPU_RESCTRL
+ bool
+ help
+ An architecture selects this option to indicate that the necessary
+ hooks are provided to support the common memory system usage
+ monitoring and control interfaces provided by the 'resctrl'
+ filesystem (see RESCTRL_FS).
+
config HAVE_ARCH_COMPILER_H
bool
help
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 1f0eb4f25c0f..a3eaab094ece 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -852,14 +852,9 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1);
perf_sample_data_init(&data, 0, hwc->last_period);
- if (alpha_perf_event_set_period(event, hwc, idx)) {
- if (perf_event_overflow(event, &data, regs)) {
- /* Interrupts coming too quickly; "throttle" the
- * counter, i.e., disable it for a little while.
- */
- alpha_pmu_stop(event, 0);
- }
- }
+ if (alpha_perf_event_set_period(event, hwc, idx))
+ perf_event_overflow(event, &data, regs);
+
wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
return;
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index fea29d9d18d6..809edc59af25 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -170,7 +170,7 @@ init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
if (parent)
panic("DeviceTree incore intc not a root irq controller\n");
- root_domain = irq_domain_add_linear(intc, nr_cpu_irqs, &arcv2_irq_ops, NULL);
+ root_domain = irq_domain_create_linear(of_fwnode_handle(intc), nr_cpu_irqs, &arcv2_irq_ops, NULL);
if (!root_domain)
panic("root irq domain not avail\n");
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index 1d2ff1c6a61b..1b159e9e0234 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -112,8 +112,9 @@ init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
if (parent)
panic("DeviceTree incore intc not a root irq controller\n");
- root_domain = irq_domain_add_linear(intc, NR_CPU_IRQS,
- &arc_intc_domain_ops, NULL);
+ root_domain = irq_domain_create_linear(of_fwnode_handle(intc),
+ NR_CPU_IRQS,
+ &arc_intc_domain_ops, NULL);
if (!root_domain)
panic("root irq domain not avail\n");
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index cdd370ec9280..02b28a9324f4 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -391,7 +391,8 @@ idu_of_init(struct device_node *intc, struct device_node *parent)
pr_info("MCIP: IDU supports %u common irqs\n", nr_irqs);
- domain = irq_domain_add_linear(intc, nr_irqs, &idu_irq_ops, NULL);
+ domain = irq_domain_create_linear(of_fwnode_handle(intc), nr_irqs,
+ &idu_irq_ops, NULL);
/* Parent interrupts (core-intc) are already mapped */
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 6e5a651cd75c..ed6d4f0cd621 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -599,10 +599,8 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
arc_perf_event_update(event, &event->hw, event->hw.idx);
perf_sample_data_init(&data, 0, hwc->last_period);
- if (arc_pmu_event_set_period(event)) {
- if (perf_event_overflow(event, &data, regs))
- arc_pmu_stop(event, 0);
- }
+ if (arc_pmu_event_set_period(event))
+ perf_event_overflow(event, &data, regs);
active_ints &= ~BIT(idx);
} while (active_ints);
diff --git a/arch/arm/boot/dts/amlogic/meson8.dtsi b/arch/arm/boot/dts/amlogic/meson8.dtsi
index 847f7b1f1e96..f785e0de0847 100644
--- a/arch/arm/boot/dts/amlogic/meson8.dtsi
+++ b/arch/arm/boot/dts/amlogic/meson8.dtsi
@@ -451,7 +451,7 @@
pwm_ef: pwm@86c0 {
compatible = "amlogic,meson8-pwm-v2";
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "Video PLL" */
+ <0>, /* unknown/untested, the datasheet calls it "Video PLL" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
reg = <0x86c0 0x10>;
@@ -705,7 +705,7 @@
&pwm_ab {
compatible = "amlogic,meson8-pwm-v2";
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "Video PLL" */
+ <0>, /* unknown/untested, the datasheet calls it "Video PLL" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
};
@@ -713,7 +713,7 @@
&pwm_cd {
compatible = "amlogic,meson8-pwm-v2";
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "Video PLL" */
+ <0>, /* unknown/untested, the datasheet calls it "Video PLL" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
};
diff --git a/arch/arm/boot/dts/amlogic/meson8b.dtsi b/arch/arm/boot/dts/amlogic/meson8b.dtsi
index 0876611ce26a..fdb0abe23a0c 100644
--- a/arch/arm/boot/dts/amlogic/meson8b.dtsi
+++ b/arch/arm/boot/dts/amlogic/meson8b.dtsi
@@ -406,7 +406,7 @@
compatible = "amlogic,meson8b-pwm-v2", "amlogic,meson8-pwm-v2";
reg = <0x86c0 0x10>;
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "Video PLL" */
+ <0>, /* unknown/untested, the datasheet calls it "Video PLL" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
#pwm-cells = <3>;
@@ -680,7 +680,7 @@
&pwm_ab {
compatible = "amlogic,meson8b-pwm-v2", "amlogic,meson8-pwm-v2";
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "Video PLL" */
+ <0>, /* unknown/untested, the datasheet calls it "Video PLL" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
};
@@ -688,7 +688,7 @@
&pwm_cd {
compatible = "amlogic,meson8b-pwm-v2", "amlogic,meson8-pwm-v2";
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "Video PLL" */
+ <0>, /* unknown/untested, the datasheet calls it "Video PLL" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
};
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 9846f30990f7..02eda44a6faa 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -416,9 +416,9 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
writel_relaxed(~0, irqbase + SA1111_INTSTATCLR0);
writel_relaxed(~0, irqbase + SA1111_INTSTATCLR1);
- sachip->irqdomain = irq_domain_add_linear(NULL, SA1111_IRQ_NR,
- &sa1111_irqdomain_ops,
- sachip);
+ sachip->irqdomain = irq_domain_create_linear(NULL, SA1111_IRQ_NR,
+ &sa1111_irqdomain_ops,
+ sachip);
if (!sachip->irqdomain) {
irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
return -ENOMEM;
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index e81a5d6c1c20..e81964cce516 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -349,7 +349,7 @@ CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_XTS=m
@@ -364,7 +364,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_SHA1_ARM_NEON=m
-CONFIG_CRYPTO_SHA256_ARM=m
CONFIG_CRYPTO_SHA512_ARM=m
CONFIG_CRYPTO_AES_ARM_BS=m
CONFIG_CRYPTO_CHACHA20_NEON=m
diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig
index 275ddf7a3a14..242e7d5a3f68 100644
--- a/arch/arm/configs/milbeaut_m10v_defconfig
+++ b/arch/arm/configs/milbeaut_m10v_defconfig
@@ -93,15 +93,13 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_KEYS=y
-CONFIG_CRYPTO_MANAGER=y
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_SELFTESTS=y
# CONFIG_CRYPTO_ECHAINIV is not set
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_SEQIV=m
CONFIG_CRYPTO_GHASH_ARM_CE=m
CONFIG_CRYPTO_SHA1_ARM_NEON=m
CONFIG_CRYPTO_SHA1_ARM_CE=m
-CONFIG_CRYPTO_SHA2_ARM_CE=m
CONFIG_CRYPTO_SHA512_ARM=m
CONFIG_CRYPTO_AES_ARM=m
CONFIG_CRYPTO_AES_ARM_BS=m
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index ad037c175fdb..aca01ad6aafc 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1121,25 +1121,6 @@ CONFIG_QCOM_SMSM=y
CONFIG_QCOM_SOCINFO=m
CONFIG_QCOM_STATS=m
CONFIG_QCOM_WCNSS_CTRL=m
-CONFIG_ARCH_EMEV2=y
-CONFIG_ARCH_R8A7794=y
-CONFIG_ARCH_R8A7779=y
-CONFIG_ARCH_R8A7790=y
-CONFIG_ARCH_R8A7778=y
-CONFIG_ARCH_R8A7793=y
-CONFIG_ARCH_R8A7791=y
-CONFIG_ARCH_R8A7792=y
-CONFIG_ARCH_R8A7740=y
-CONFIG_ARCH_R8A73A4=y
-CONFIG_ARCH_R7S72100=y
-CONFIG_ARCH_R7S9210=y
-CONFIG_ARCH_R8A77470=y
-CONFIG_ARCH_R8A7745=y
-CONFIG_ARCH_R8A7742=y
-CONFIG_ARCH_R8A7743=y
-CONFIG_ARCH_R8A7744=y
-CONFIG_ARCH_R9A06G032=y
-CONFIG_ARCH_SH73A0=y
CONFIG_ROCKCHIP_IODOMAIN=y
CONFIG_ARCH_TEGRA_2x_SOC=y
CONFIG_ARCH_TEGRA_3x_SOC=y
@@ -1203,7 +1184,7 @@ CONFIG_PWM_BCM2835=y
CONFIG_PWM_BRCMSTB=m
CONFIG_PWM_FSL_FTM=m
CONFIG_PWM_MESON=m
-CONFIG_PWM_RCAR=m
+CONFIG_PWM_RENESAS_RCAR=m
CONFIG_PWM_RENESAS_TPU=y
CONFIG_PWM_ROCKCHIP=m
CONFIG_PWM_SAMSUNG=m
@@ -1301,7 +1282,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_GHASH_ARM_CE=m
CONFIG_CRYPTO_SHA1_ARM_NEON=m
CONFIG_CRYPTO_SHA1_ARM_CE=m
-CONFIG_CRYPTO_SHA2_ARM_CE=m
CONFIG_CRYPTO_SHA512_ARM=m
CONFIG_CRYPTO_AES_ARM=m
CONFIG_CRYPTO_AES_ARM_BS=m
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 75b326bc7830..317f977e509e 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -697,7 +697,6 @@ CONFIG_SECURITY=y
CONFIG_CRYPTO_MICHAEL_MIC=y
CONFIG_CRYPTO_GHASH_ARM_CE=m
CONFIG_CRYPTO_SHA1_ARM_NEON=m
-CONFIG_CRYPTO_SHA256_ARM=m
CONFIG_CRYPTO_SHA512_ARM=m
CONFIG_CRYPTO_AES_ARM=m
CONFIG_CRYPTO_AES_ARM_BS=m
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index 24fca8608554..ded4b9a5accf 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -636,10 +636,9 @@ CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_UTF8=m
CONFIG_TIMER_STATS=y
CONFIG_SECURITY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_AUTHENC=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
@@ -660,7 +659,6 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRYPTO_LZO=y
CONFIG_CRYPTO_SHA1_ARM=m
-CONFIG_CRYPTO_SHA256_ARM=m
CONFIG_CRYPTO_SHA512_ARM=m
CONFIG_CRYPTO_AES_ARM=m
CONFIG_FONTS=y
diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig
index 8c30ed14e52c..7c3d6a8f0038 100644
--- a/arch/arm/configs/shmobile_defconfig
+++ b/arch/arm/configs/shmobile_defconfig
@@ -63,6 +63,7 @@ CONFIG_SMSC_PHY=y
CONFIG_CAN_RCAR=y
CONFIG_INPUT_EVDEV=y
CONFIG_KEYBOARD_GPIO=y
+CONFIG_KEYBOARD_GPIO_POLLED=y
# CONFIG_INPUT_MOUSE is not set
CONFIG_INPUT_TOUCHSCREEN=y
CONFIG_TOUCHSCREEN_EDT_FT5X06=y
@@ -84,6 +85,7 @@ CONFIG_SERIAL_8250_EM=y
CONFIG_SERIAL_SH_SCI=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_DEMUX_PINCTRL=y
+CONFIG_I2C_DESIGNWARE_CORE=y
CONFIG_I2C_EMEV2=y
CONFIG_I2C_GPIO=y
CONFIG_I2C_RIIC=y
@@ -104,7 +106,7 @@ CONFIG_GPIO_PCF857X=y
CONFIG_POWER_RESET=y
CONFIG_POWER_RESET_RMOBILE=y
CONFIG_POWER_SUPPLY=y
-# CONFIG_HWMON is not set
+CONFIG_SENSORS_LM75=y
CONFIG_THERMAL=y
CONFIG_CPU_THERMAL=y
CONFIG_RCAR_THERMAL=y
@@ -174,6 +176,9 @@ CONFIG_USB_RENESAS_USBHS_UDC=y
CONFIG_USB_RENESAS_USBF=y
CONFIG_USB_ETH=y
CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_ARASAN=y
CONFIG_MMC_SDHI=y
CONFIG_MMC_SH_MMCIF=y
CONFIG_NEW_LEDS=y
@@ -195,29 +200,10 @@ CONFIG_RCAR_DMAC=y
CONFIG_RENESAS_USB_DMAC=y
CONFIG_RZ_DMAC=y
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_ARCH_EMEV2=y
-CONFIG_ARCH_R8A7794=y
-CONFIG_ARCH_R8A7779=y
-CONFIG_ARCH_R8A7790=y
-CONFIG_ARCH_R8A7778=y
-CONFIG_ARCH_R8A7793=y
-CONFIG_ARCH_R8A7791=y
-CONFIG_ARCH_R8A7792=y
-CONFIG_ARCH_R8A7740=y
-CONFIG_ARCH_R8A73A4=y
-CONFIG_ARCH_R7S72100=y
-CONFIG_ARCH_R7S9210=y
-CONFIG_ARCH_R8A77470=y
-CONFIG_ARCH_R8A7745=y
-CONFIG_ARCH_R8A7742=y
-CONFIG_ARCH_R8A7743=y
-CONFIG_ARCH_R8A7744=y
-CONFIG_ARCH_R9A06G032=y
-CONFIG_ARCH_SH73A0=y
CONFIG_IIO=y
CONFIG_AK8975=y
CONFIG_PWM=y
-CONFIG_PWM_RCAR=y
+CONFIG_PWM_RENESAS_RCAR=y
CONFIG_PWM_RENESAS_TPU=y
CONFIG_PHY_RCAR_GEN2=y
CONFIG_PHY_RCAR_GEN3_USB2=y
diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig
index ffec59e3f49c..ac2a0f998c73 100644
--- a/arch/arm/configs/spitz_defconfig
+++ b/arch/arm/configs/spitz_defconfig
@@ -215,7 +215,7 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_DEBUG_KERNEL=y
CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_SHA256=m
CONFIG_CRYPTO_AES=m
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 23e4ea067ddb..7efb9a8596e4 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -46,30 +46,6 @@ config CRYPTO_NHPOLY1305_NEON
Architecture: arm using:
- NEON (Advanced SIMD) extensions
-config CRYPTO_POLY1305_ARM
- tristate
- select CRYPTO_HASH
- select CRYPTO_ARCH_HAVE_LIB_POLY1305
- default CRYPTO_LIB_POLY1305_INTERNAL
- help
- Poly1305 authenticator algorithm (RFC7539)
-
- Architecture: arm optionally using
- - NEON (Advanced SIMD) extensions
-
-config CRYPTO_BLAKE2S_ARM
- bool "Hash functions: BLAKE2s"
- select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
- help
- BLAKE2s cryptographic hash function (RFC 7693)
-
- Architecture: arm
-
- This is faster than the generic implementations of BLAKE2s and
- BLAKE2b, but slower than the NEON implementation of BLAKE2b.
- There is no NEON implementation of BLAKE2s, since NEON doesn't
- really help with it.
-
config CRYPTO_BLAKE2B_NEON
tristate "Hash functions: BLAKE2b (NEON)"
depends on KERNEL_MODE_NEON
@@ -117,27 +93,6 @@ config CRYPTO_SHA1_ARM_CE
Architecture: arm using ARMv8 Crypto Extensions
-config CRYPTO_SHA2_ARM_CE
- tristate "Hash functions: SHA-224 and SHA-256 (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_SHA256_ARM
- select CRYPTO_HASH
- help
- SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
- Architecture: arm using
- - ARMv8 Crypto Extensions
-
-config CRYPTO_SHA256_ARM
- tristate "Hash functions: SHA-224 and SHA-256 (NEON)"
- select CRYPTO_HASH
- depends on !CPU_V7M
- help
- SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
- Architecture: arm using
- - NEON (Advanced SIMD) extensions
-
config CRYPTO_SHA512_ARM
tristate "Hash functions: SHA-384 and SHA-512 (NEON)"
select CRYPTO_HASH
@@ -172,7 +127,6 @@ config CRYPTO_AES_ARM_BS
select CRYPTO_AES_ARM
select CRYPTO_SKCIPHER
select CRYPTO_LIB_AES
- select CRYPTO_SIMD
help
Length-preserving ciphers: AES cipher algorithms (FIPS-197)
with block cipher modes:
@@ -200,7 +154,6 @@ config CRYPTO_AES_ARM_CE
depends on KERNEL_MODE_NEON
select CRYPTO_SKCIPHER
select CRYPTO_LIB_AES
- select CRYPTO_SIMD
help
Length-preserving ciphers: AES cipher algorithms (FIPS-197)
with block cipher modes:
@@ -214,17 +167,5 @@ config CRYPTO_AES_ARM_CE
Architecture: arm using:
- ARMv8 Crypto Extensions
-config CRYPTO_CHACHA20_NEON
- tristate
- select CRYPTO_SKCIPHER
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
- default CRYPTO_LIB_CHACHA_INTERNAL
- help
- Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
- stream cipher algorithms
-
- Architecture: arm using:
- - NEON (Advanced SIMD) extensions
-
endmenu
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 3d0e23ff9e74..8479137c6e80 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -7,37 +7,25 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
-obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
-obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o
obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
-obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
-obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o
obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
-sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
-sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
-libblake2s-arm-y:= blake2s-core.o blake2s-glue.o
blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o
sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
-sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
-chacha-neon-y := chacha-scalar-core.o chacha-glue.o
-chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
-poly1305-arm-y := poly1305-core.o poly1305-glue.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
curve25519-neon-y := curve25519-core.o curve25519-glue.o
@@ -47,14 +35,8 @@ quiet_cmd_perl = PERL $@
$(obj)/%-core.S: $(src)/%-armv4.pl
$(call cmd,perl)
-clean-files += poly1305-core.S sha256-core.S sha512-core.S
+clean-files += sha512-core.S
aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1
-AFLAGS_sha256-core.o += $(aflags-thumb2-y)
AFLAGS_sha512-core.o += $(aflags-thumb2-y)
-
-# massage the perlasm code a bit so we only get the NEON routine if we need it
-poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
-poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
-AFLAGS_poly1305-core.o += $(poly1305-aflags-y) $(aflags-thumb2-y)
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 1cf61f51e766..00591895d540 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -10,8 +10,6 @@
#include <asm/simd.h>
#include <linux/unaligned.h>
#include <crypto/aes.h>
-#include <crypto/ctr.h>
-#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/cpufeature.h>
@@ -418,29 +416,6 @@ static int ctr_encrypt(struct skcipher_request *req)
return err;
}
-static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
-{
- struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
- unsigned long flags;
-
- /*
- * Temporarily disable interrupts to avoid races where
- * cachelines are evicted when the CPU is interrupted
- * to do something else.
- */
- local_irq_save(flags);
- aes_encrypt(ctx, dst, src);
- local_irq_restore(flags);
-}
-
-static int ctr_encrypt_sync(struct skcipher_request *req)
-{
- if (!crypto_simd_usable())
- return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
-
- return ctr_encrypt(req);
-}
-
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -586,10 +561,9 @@ static int xts_decrypt(struct skcipher_request *req)
}
static struct skcipher_alg aes_algs[] = { {
- .base.cra_name = "__ecb(aes)",
- .base.cra_driver_name = "__ecb-aes-ce",
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-aes-ce",
.base.cra_priority = 300,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.base.cra_module = THIS_MODULE,
@@ -600,10 +574,9 @@ static struct skcipher_alg aes_algs[] = { {
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
- .base.cra_name = "__cbc(aes)",
- .base.cra_driver_name = "__cbc-aes-ce",
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-aes-ce",
.base.cra_priority = 300,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.base.cra_module = THIS_MODULE,
@@ -615,10 +588,9 @@ static struct skcipher_alg aes_algs[] = { {
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
- .base.cra_name = "__cts(cbc(aes))",
- .base.cra_driver_name = "__cts-cbc-aes-ce",
+ .base.cra_name = "cts(cbc(aes))",
+ .base.cra_driver_name = "cts-cbc-aes-ce",
.base.cra_priority = 300,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.base.cra_module = THIS_MODULE,
@@ -631,10 +603,9 @@ static struct skcipher_alg aes_algs[] = { {
.encrypt = cts_cbc_encrypt,
.decrypt = cts_cbc_decrypt,
}, {
- .base.cra_name = "__ctr(aes)",
- .base.cra_driver_name = "__ctr-aes-ce",
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-aes-ce",
.base.cra_priority = 300,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.base.cra_module = THIS_MODULE,
@@ -647,25 +618,9 @@ static struct skcipher_alg aes_algs[] = { {
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
}, {
- .base.cra_name = "ctr(aes)",
- .base.cra_driver_name = "ctr-aes-ce-sync",
- .base.cra_priority = 300 - 1,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .chunksize = AES_BLOCK_SIZE,
- .setkey = ce_aes_setkey,
- .encrypt = ctr_encrypt_sync,
- .decrypt = ctr_encrypt_sync,
-}, {
- .base.cra_name = "__xts(aes)",
- .base.cra_driver_name = "__xts-aes-ce",
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "xts-aes-ce",
.base.cra_priority = 300,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.base.cra_module = THIS_MODULE,
@@ -679,51 +634,14 @@ static struct skcipher_alg aes_algs[] = { {
.decrypt = xts_decrypt,
} };
-static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
-
static void aes_exit(void)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
- simd_skcipher_free(aes_simd_algs[i]);
-
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
static int __init aes_init(void)
{
- struct simd_skcipher_alg *simd;
- const char *basename;
- const char *algname;
- const char *drvname;
- int err;
- int i;
-
- err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
- if (err)
- return err;
-
- for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
- if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
- continue;
-
- algname = aes_algs[i].base.cra_name + 2;
- drvname = aes_algs[i].base.cra_driver_name + 2;
- basename = aes_algs[i].base.cra_driver_name;
- simd = simd_skcipher_create_compat(aes_algs + i, algname, drvname, basename);
- err = PTR_ERR(simd);
- if (IS_ERR(simd))
- goto unregister_simds;
-
- aes_simd_algs[i] = simd;
- }
-
- return 0;
-
-unregister_simds:
- aes_exit();
- return err;
+ return crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
module_cpu_feature_match(AES, aes_init);
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index f6be80b5938b..c60104dc1585 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -8,8 +8,6 @@
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/aes.h>
-#include <crypto/ctr.h>
-#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <crypto/xts.h>
@@ -59,11 +57,6 @@ struct aesbs_xts_ctx {
struct crypto_aes_ctx tweak_key;
};
-struct aesbs_ctr_ctx {
- struct aesbs_ctx key; /* must be first member */
- struct crypto_aes_ctx fallback;
-};
-
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
@@ -200,25 +193,6 @@ static int cbc_decrypt(struct skcipher_request *req)
return err;
}
-static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
-{
- struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = aes_expandkey(&ctx->fallback, in_key, key_len);
- if (err)
- return err;
-
- ctx->key.rounds = 6 + key_len / 4;
-
- kernel_neon_begin();
- aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
- kernel_neon_end();
-
- return 0;
-}
-
static int ctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -254,21 +228,6 @@ static int ctr_encrypt(struct skcipher_request *req)
return err;
}
-static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
-{
- struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- __aes_arm_encrypt(ctx->fallback.key_enc, ctx->key.rounds, src, dst);
-}
-
-static int ctr_encrypt_sync(struct skcipher_request *req)
-{
- if (!crypto_simd_usable())
- return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
-
- return ctr_encrypt(req);
-}
-
static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
@@ -374,13 +333,12 @@ static int xts_decrypt(struct skcipher_request *req)
}
static struct skcipher_alg aes_algs[] = { {
- .base.cra_name = "__ecb(aes)",
- .base.cra_driver_name = "__ecb-aes-neonbs",
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
.base.cra_module = THIS_MODULE,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
@@ -389,13 +347,12 @@ static struct skcipher_alg aes_algs[] = { {
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
- .base.cra_name = "__cbc(aes)",
- .base.cra_driver_name = "__cbc-aes-neonbs",
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.base.cra_module = THIS_MODULE,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
@@ -405,13 +362,12 @@ static struct skcipher_alg aes_algs[] = { {
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
- .base.cra_name = "__ctr(aes)",
- .base.cra_driver_name = "__ctr-aes-neonbs",
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
.base.cra_module = THIS_MODULE,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
@@ -422,29 +378,12 @@ static struct skcipher_alg aes_algs[] = { {
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
}, {
- .base.cra_name = "ctr(aes)",
- .base.cra_driver_name = "ctr-aes-neonbs-sync",
- .base.cra_priority = 250 - 1,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .chunksize = AES_BLOCK_SIZE,
- .walksize = 8 * AES_BLOCK_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = aesbs_ctr_setkey_sync,
- .encrypt = ctr_encrypt_sync,
- .decrypt = ctr_encrypt_sync,
-}, {
- .base.cra_name = "__xts(aes)",
- .base.cra_driver_name = "__xts-aes-neonbs",
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "xts-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
.base.cra_module = THIS_MODULE,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
@@ -455,55 +394,18 @@ static struct skcipher_alg aes_algs[] = { {
.decrypt = xts_decrypt,
} };
-static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
-
static void aes_exit(void)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
- if (aes_simd_algs[i])
- simd_skcipher_free(aes_simd_algs[i]);
-
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
static int __init aes_init(void)
{
- struct simd_skcipher_alg *simd;
- const char *basename;
- const char *algname;
- const char *drvname;
- int err;
- int i;
-
if (!(elf_hwcap & HWCAP_NEON))
return -ENODEV;
- err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
- if (err)
- return err;
-
- for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
- if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
- continue;
-
- algname = aes_algs[i].base.cra_name + 2;
- drvname = aes_algs[i].base.cra_driver_name + 2;
- basename = aes_algs[i].base.cra_driver_name;
- simd = simd_skcipher_create_compat(aes_algs + i, algname, drvname, basename);
- err = PTR_ERR(simd);
- if (IS_ERR(simd))
- goto unregister_simds;
-
- aes_simd_algs[i] = simd;
- }
- return 0;
-
-unregister_simds:
- aes_exit();
- return err;
+ return crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
-late_initcall(aes_init);
+module_init(aes_init);
module_exit(aes_exit);
diff --git a/arch/arm/crypto/blake2b-neon-glue.c b/arch/arm/crypto/blake2b-neon-glue.c
index 4b59d027ba4a..2ff443a91724 100644
--- a/arch/arm/crypto/blake2b-neon-glue.c
+++ b/arch/arm/crypto/blake2b-neon-glue.c
@@ -7,7 +7,6 @@
#include <crypto/internal/blake2b.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <linux/module.h>
#include <linux/sizes.h>
@@ -21,11 +20,6 @@ asmlinkage void blake2b_compress_neon(struct blake2b_state *state,
static void blake2b_compress_arch(struct blake2b_state *state,
const u8 *block, size_t nblocks, u32 inc)
{
- if (!crypto_simd_usable()) {
- blake2b_compress_generic(state, block, nblocks, inc);
- return;
- }
-
do {
const size_t blocks = min_t(size_t, nblocks,
SZ_4K / BLAKE2B_BLOCK_SIZE);
@@ -42,12 +36,14 @@ static void blake2b_compress_arch(struct blake2b_state *state,
static int crypto_blake2b_update_neon(struct shash_desc *desc,
const u8 *in, unsigned int inlen)
{
- return crypto_blake2b_update(desc, in, inlen, blake2b_compress_arch);
+ return crypto_blake2b_update_bo(desc, in, inlen, blake2b_compress_arch);
}
-static int crypto_blake2b_final_neon(struct shash_desc *desc, u8 *out)
+static int crypto_blake2b_finup_neon(struct shash_desc *desc, const u8 *in,
+ unsigned int inlen, u8 *out)
{
- return crypto_blake2b_final(desc, out, blake2b_compress_arch);
+ return crypto_blake2b_finup(desc, in, inlen, out,
+ blake2b_compress_arch);
}
#define BLAKE2B_ALG(name, driver_name, digest_size) \
@@ -55,7 +51,9 @@ static int crypto_blake2b_final_neon(struct shash_desc *desc, u8 *out)
.base.cra_name = name, \
.base.cra_driver_name = driver_name, \
.base.cra_priority = 200, \
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY | \
+ CRYPTO_AHASH_ALG_BLOCK_ONLY | \
+ CRYPTO_AHASH_ALG_FINAL_NONZERO, \
.base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \
.base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \
.base.cra_module = THIS_MODULE, \
@@ -63,8 +61,9 @@ static int crypto_blake2b_final_neon(struct shash_desc *desc, u8 *out)
.setkey = crypto_blake2b_setkey, \
.init = crypto_blake2b_init, \
.update = crypto_blake2b_update_neon, \
- .final = crypto_blake2b_final_neon, \
+ .finup = crypto_blake2b_finup_neon, \
.descsize = sizeof(struct blake2b_state), \
+ .statesize = BLAKE2B_STATE_SIZE, \
}
static struct shash_alg blake2b_neon_algs[] = {
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
deleted file mode 100644
index 50e635512046..000000000000
--- a/arch/arm/crypto/chacha-glue.c
+++ /dev/null
@@ -1,352 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
- * including ChaCha20 (RFC7539)
- *
- * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- * Copyright (C) 2015 Martin Willi
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/jump_label.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/cputype.h>
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
- int nrounds);
-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
- int nrounds, unsigned int nbytes);
-asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
-
-asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
- const u32 *state, int nrounds);
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
-
-static inline bool neon_usable(void)
-{
- return static_branch_likely(&use_neon) && crypto_simd_usable();
-}
-
-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds)
-{
- u8 buf[CHACHA_BLOCK_SIZE];
-
- while (bytes > CHACHA_BLOCK_SIZE) {
- unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
-
- chacha_4block_xor_neon(state, dst, src, nrounds, l);
- bytes -= l;
- src += l;
- dst += l;
- state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
- }
- if (bytes) {
- const u8 *s = src;
- u8 *d = dst;
-
- if (bytes != CHACHA_BLOCK_SIZE)
- s = d = memcpy(buf, src, bytes);
- chacha_block_xor_neon(state, d, s, nrounds);
- if (d != dst)
- memcpy(dst, buf, bytes);
- state[12]++;
- }
-}
-
-void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
-{
- if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
- hchacha_block_arm(state, stream, nrounds);
- } else {
- kernel_neon_begin();
- hchacha_block_neon(state, stream, nrounds);
- kernel_neon_end();
- }
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
- int nrounds)
-{
- if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
- bytes <= CHACHA_BLOCK_SIZE) {
- chacha_doarm(dst, src, bytes, state, nrounds);
- state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
- return;
- }
-
- do {
- unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
-
- kernel_neon_begin();
- chacha_doneon(state, dst, src, todo, nrounds);
- kernel_neon_end();
-
- bytes -= todo;
- src += todo;
- dst += todo;
- } while (bytes);
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-static int chacha_stream_xor(struct skcipher_request *req,
- const struct chacha_ctx *ctx, const u8 *iv,
- bool neon)
-{
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- chacha_init(state, ctx->key, iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
- chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, state, ctx->nrounds);
- state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
- } else {
- kernel_neon_begin();
- chacha_doneon(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes, ctx->nrounds);
- kernel_neon_end();
- }
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static int do_chacha(struct skcipher_request *req, bool neon)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return chacha_stream_xor(req, ctx, req->iv, neon);
-}
-
-static int chacha_arm(struct skcipher_request *req)
-{
- return do_chacha(req, false);
-}
-
-static int chacha_neon(struct skcipher_request *req)
-{
- return do_chacha(req, neon_usable());
-}
-
-static int do_xchacha(struct skcipher_request *req, bool neon)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct chacha_ctx subctx;
- u32 state[16];
- u8 real_iv[16];
-
- chacha_init(state, ctx->key, req->iv);
-
- if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
- hchacha_block_arm(state, subctx.key, ctx->nrounds);
- } else {
- kernel_neon_begin();
- hchacha_block_neon(state, subctx.key, ctx->nrounds);
- kernel_neon_end();
- }
- subctx.nrounds = ctx->nrounds;
-
- memcpy(&real_iv[0], req->iv + 24, 8);
- memcpy(&real_iv[8], req->iv + 16, 8);
- return chacha_stream_xor(req, &subctx, real_iv, neon);
-}
-
-static int xchacha_arm(struct skcipher_request *req)
-{
- return do_xchacha(req, false);
-}
-
-static int xchacha_neon(struct skcipher_request *req)
-{
- return do_xchacha(req, neon_usable());
-}
-
-static struct skcipher_alg arm_algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-arm",
- .base.cra_priority = 200,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = chacha_arm,
- .decrypt = chacha_arm,
- }, {
- .base.cra_name = "xchacha20",
- .base.cra_driver_name = "xchacha20-arm",
- .base.cra_priority = 200,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = xchacha_arm,
- .decrypt = xchacha_arm,
- }, {
- .base.cra_name = "xchacha12",
- .base.cra_driver_name = "xchacha12-arm",
- .base.cra_priority = 200,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha12_setkey,
- .encrypt = xchacha_arm,
- .decrypt = xchacha_arm,
- },
-};
-
-static struct skcipher_alg neon_algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = chacha_neon,
- .decrypt = chacha_neon,
- }, {
- .base.cra_name = "xchacha20",
- .base.cra_driver_name = "xchacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = xchacha_neon,
- .decrypt = xchacha_neon,
- }, {
- .base.cra_name = "xchacha12",
- .base.cra_driver_name = "xchacha12-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = chacha12_setkey,
- .encrypt = xchacha_neon,
- .decrypt = xchacha_neon,
- }
-};
-
-static int __init chacha_simd_mod_init(void)
-{
- int err = 0;
-
- if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
- err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
- if (err)
- return err;
- }
-
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
- int i;
-
- switch (read_cpuid_part()) {
- case ARM_CPU_PART_CORTEX_A7:
- case ARM_CPU_PART_CORTEX_A5:
- /*
- * The Cortex-A7 and Cortex-A5 do not perform well with
- * the NEON implementation but do incredibly with the
- * scalar one and use less power.
- */
- for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
- neon_algs[i].base.cra_priority = 0;
- break;
- default:
- static_branch_enable(&use_neon);
- }
-
- if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
- err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
- if (err)
- crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
- }
- }
- return err;
-}
-
-static void __exit chacha_simd_mod_fini(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
- crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
- crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
- }
-}
-
-module_init(chacha_simd_mod_init);
-module_exit(chacha_simd_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-arm");
-MODULE_ALIAS_CRYPTO("xchacha20");
-MODULE_ALIAS_CRYPTO("xchacha20-arm");
-MODULE_ALIAS_CRYPTO("xchacha12");
-MODULE_ALIAS_CRYPTO("xchacha12-arm");
-#ifdef CONFIG_KERNEL_MODE_NEON
-MODULE_ALIAS_CRYPTO("chacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha12-neon");
-#endif
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index aabfcf522a2c..a52dcc8c1e33 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -8,22 +8,22 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
-#include <asm/simd.h>
-#include <linux/unaligned.h>
#include <crypto/aes.h>
-#include <crypto/gcm.h>
#include <crypto/b128ops.h>
-#include <crypto/cryptd.h>
+#include <crypto/gcm.h>
+#include <crypto/gf128mul.h>
+#include <crypto/ghash.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
-#include <crypto/gf128mul.h>
#include <crypto/scatterwalk.h>
#include <linux/cpufeature.h>
-#include <linux/crypto.h>
+#include <linux/errno.h>
#include <linux/jump_label.h>
+#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>");
@@ -32,9 +32,6 @@ MODULE_ALIAS_CRYPTO("ghash");
MODULE_ALIAS_CRYPTO("gcm(aes)");
MODULE_ALIAS_CRYPTO("rfc4106(gcm(aes))");
-#define GHASH_BLOCK_SIZE 16
-#define GHASH_DIGEST_SIZE 16
-
#define RFC4106_NONCE_SIZE 4
struct ghash_key {
@@ -49,10 +46,8 @@ struct gcm_key {
u8 nonce[]; // for RFC4106 nonce
};
-struct ghash_desc_ctx {
+struct arm_ghash_desc_ctx {
u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
- u8 buf[GHASH_BLOCK_SIZE];
- u32 count;
};
asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
@@ -65,9 +60,9 @@ static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_p64);
static int ghash_init(struct shash_desc *desc)
{
- struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
- *ctx = (struct ghash_desc_ctx){};
+ *ctx = (struct arm_ghash_desc_ctx){};
return 0;
}
@@ -85,52 +80,49 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
static int ghash_update(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
- struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
- unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
-
- ctx->count += len;
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+ struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ int blocks;
- if ((partial + len) >= GHASH_BLOCK_SIZE) {
- struct ghash_key *key = crypto_shash_ctx(desc->tfm);
- int blocks;
+ blocks = len / GHASH_BLOCK_SIZE;
+ ghash_do_update(blocks, ctx->digest, src, key, NULL);
+ return len - blocks * GHASH_BLOCK_SIZE;
+}
- if (partial) {
- int p = GHASH_BLOCK_SIZE - partial;
+static int ghash_export(struct shash_desc *desc, void *out)
+{
+ struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ u8 *dst = out;
- memcpy(ctx->buf + partial, src, p);
- src += p;
- len -= p;
- }
+ put_unaligned_be64(ctx->digest[1], dst);
+ put_unaligned_be64(ctx->digest[0], dst + 8);
+ return 0;
+}
- blocks = len / GHASH_BLOCK_SIZE;
- len %= GHASH_BLOCK_SIZE;
+static int ghash_import(struct shash_desc *desc, const void *in)
+{
+ struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ const u8 *src = in;
- ghash_do_update(blocks, ctx->digest, src, key,
- partial ? ctx->buf : NULL);
- src += blocks * GHASH_BLOCK_SIZE;
- partial = 0;
- }
- if (len)
- memcpy(ctx->buf + partial, src, len);
+ ctx->digest[1] = get_unaligned_be64(src);
+ ctx->digest[0] = get_unaligned_be64(src + 8);
return 0;
}
-static int ghash_final(struct shash_desc *desc, u8 *dst)
+static int ghash_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *dst)
{
- struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
- unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+ struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
- if (partial) {
- struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+ if (len) {
+ u8 buf[GHASH_BLOCK_SIZE] = {};
- memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
- ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
+ memcpy(buf, src, len);
+ ghash_do_update(1, ctx->digest, buf, key, NULL);
+ memzero_explicit(buf, sizeof(buf));
}
- put_unaligned_be64(ctx->digest[1], dst);
- put_unaligned_be64(ctx->digest[0], dst + 8);
-
- *ctx = (struct ghash_desc_ctx){};
- return 0;
+ return ghash_export(desc, dst);
}
static void ghash_reflect(u64 h[], const be128 *k)
@@ -175,13 +167,17 @@ static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
- .final = ghash_final,
+ .finup = ghash_finup,
.setkey = ghash_setkey,
- .descsize = sizeof(struct ghash_desc_ctx),
+ .export = ghash_export,
+ .import = ghash_import,
+ .descsize = sizeof(struct arm_ghash_desc_ctx),
+ .statesize = sizeof(struct ghash_desc_ctx),
.base.cra_name = "ghash",
.base.cra_driver_name = "ghash-ce",
.base.cra_priority = 300,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.base.cra_blocksize = GHASH_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
@@ -317,9 +313,6 @@ static int gcm_encrypt(struct aead_request *req, const u8 *iv, u32 assoclen)
u8 *tag, *dst;
int tail, err;
- if (WARN_ON_ONCE(!may_use_simd()))
- return -EBUSY;
-
err = skcipher_walk_aead_encrypt(&walk, req, false);
kernel_neon_begin();
@@ -409,9 +402,6 @@ static int gcm_decrypt(struct aead_request *req, const u8 *iv, u32 assoclen)
u8 *tag, *dst;
int tail, err, ret;
- if (WARN_ON_ONCE(!may_use_simd()))
- return -EBUSY;
-
scatterwalk_map_and_copy(otag, req->src,
req->assoclen + req->cryptlen - authsize,
authsize, 0);
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
deleted file mode 100644
index 4464ffbf8fd1..000000000000
--- a/arch/arm/crypto/poly1305-glue.c
+++ /dev/null
@@ -1,274 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
- *
- * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <linux/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/poly1305.h>
-#include <crypto/internal/simd.h>
-#include <linux/cpufeature.h>
-#include <linux/crypto.h>
-#include <linux/jump_label.h>
-#include <linux/module.h>
-
-void poly1305_init_arm(void *state, const u8 *key);
-void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
-void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
-void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
-
-void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
-{
-}
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
-
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
-{
- poly1305_init_arm(&dctx->h, key);
- dctx->s[0] = get_unaligned_le32(key + 16);
- dctx->s[1] = get_unaligned_le32(key + 20);
- dctx->s[2] = get_unaligned_le32(key + 24);
- dctx->s[3] = get_unaligned_le32(key + 28);
- dctx->buflen = 0;
-}
-EXPORT_SYMBOL(poly1305_init_arch);
-
-static int arm_poly1305_init(struct shash_desc *desc)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- dctx->buflen = 0;
- dctx->rset = 0;
- dctx->sset = false;
-
- return 0;
-}
-
-static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
- u32 len, u32 hibit, bool do_neon)
-{
- if (unlikely(!dctx->sset)) {
- if (!dctx->rset) {
- poly1305_init_arm(&dctx->h, src);
- src += POLY1305_BLOCK_SIZE;
- len -= POLY1305_BLOCK_SIZE;
- dctx->rset = 1;
- }
- if (len >= POLY1305_BLOCK_SIZE) {
- dctx->s[0] = get_unaligned_le32(src + 0);
- dctx->s[1] = get_unaligned_le32(src + 4);
- dctx->s[2] = get_unaligned_le32(src + 8);
- dctx->s[3] = get_unaligned_le32(src + 12);
- src += POLY1305_BLOCK_SIZE;
- len -= POLY1305_BLOCK_SIZE;
- dctx->sset = true;
- }
- if (len < POLY1305_BLOCK_SIZE)
- return;
- }
-
- len &= ~(POLY1305_BLOCK_SIZE - 1);
-
- if (static_branch_likely(&have_neon) && likely(do_neon))
- poly1305_blocks_neon(&dctx->h, src, len, hibit);
- else
- poly1305_blocks_arm(&dctx->h, src, len, hibit);
-}
-
-static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
- const u8 *src, u32 len, bool do_neon)
-{
- if (unlikely(dctx->buflen)) {
- u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
-
- memcpy(dctx->buf + dctx->buflen, src, bytes);
- src += bytes;
- len -= bytes;
- dctx->buflen += bytes;
-
- if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- arm_poly1305_blocks(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE, 1, false);
- dctx->buflen = 0;
- }
- }
-
- if (likely(len >= POLY1305_BLOCK_SIZE)) {
- arm_poly1305_blocks(dctx, src, len, 1, do_neon);
- src += round_down(len, POLY1305_BLOCK_SIZE);
- len %= POLY1305_BLOCK_SIZE;
- }
-
- if (unlikely(len)) {
- dctx->buflen = len;
- memcpy(dctx->buf, src, len);
- }
-}
-
-static int arm_poly1305_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- arm_poly1305_do_update(dctx, src, srclen, false);
- return 0;
-}
-
-static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
- const u8 *src,
- unsigned int srclen)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- bool do_neon = crypto_simd_usable() && srclen > 128;
-
- if (static_branch_likely(&have_neon) && do_neon)
- kernel_neon_begin();
- arm_poly1305_do_update(dctx, src, srclen, do_neon);
- if (static_branch_likely(&have_neon) && do_neon)
- kernel_neon_end();
- return 0;
-}
-
-void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
- unsigned int nbytes)
-{
- bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
- crypto_simd_usable();
-
- if (unlikely(dctx->buflen)) {
- u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
-
- memcpy(dctx->buf + dctx->buflen, src, bytes);
- src += bytes;
- nbytes -= bytes;
- dctx->buflen += bytes;
-
- if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- poly1305_blocks_arm(&dctx->h, dctx->buf,
- POLY1305_BLOCK_SIZE, 1);
- dctx->buflen = 0;
- }
- }
-
- if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
- unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
-
- if (static_branch_likely(&have_neon) && do_neon) {
- do {
- unsigned int todo = min_t(unsigned int, len, SZ_4K);
-
- kernel_neon_begin();
- poly1305_blocks_neon(&dctx->h, src, todo, 1);
- kernel_neon_end();
-
- len -= todo;
- src += todo;
- } while (len);
- } else {
- poly1305_blocks_arm(&dctx->h, src, len, 1);
- src += len;
- }
- nbytes %= POLY1305_BLOCK_SIZE;
- }
-
- if (unlikely(nbytes)) {
- dctx->buflen = nbytes;
- memcpy(dctx->buf, src, nbytes);
- }
-}
-EXPORT_SYMBOL(poly1305_update_arch);
-
-void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
-{
- if (unlikely(dctx->buflen)) {
- dctx->buf[dctx->buflen++] = 1;
- memset(dctx->buf + dctx->buflen, 0,
- POLY1305_BLOCK_SIZE - dctx->buflen);
- poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
- }
-
- poly1305_emit_arm(&dctx->h, dst, dctx->s);
- *dctx = (struct poly1305_desc_ctx){};
-}
-EXPORT_SYMBOL(poly1305_final_arch);
-
-static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (unlikely(!dctx->sset))
- return -ENOKEY;
-
- poly1305_final_arch(dctx, dst);
- return 0;
-}
-
-static struct shash_alg arm_poly1305_algs[] = {{
- .init = arm_poly1305_init,
- .update = arm_poly1305_update,
- .final = arm_poly1305_final,
- .digestsize = POLY1305_DIGEST_SIZE,
- .descsize = sizeof(struct poly1305_desc_ctx),
-
- .base.cra_name = "poly1305",
- .base.cra_driver_name = "poly1305-arm",
- .base.cra_priority = 150,
- .base.cra_blocksize = POLY1305_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-#ifdef CONFIG_KERNEL_MODE_NEON
-}, {
- .init = arm_poly1305_init,
- .update = arm_poly1305_update_neon,
- .final = arm_poly1305_final,
- .digestsize = POLY1305_DIGEST_SIZE,
- .descsize = sizeof(struct poly1305_desc_ctx),
-
- .base.cra_name = "poly1305",
- .base.cra_driver_name = "poly1305-neon",
- .base.cra_priority = 200,
- .base.cra_blocksize = POLY1305_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-#endif
-}};
-
-static int __init arm_poly1305_mod_init(void)
-{
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
- (elf_hwcap & HWCAP_NEON))
- static_branch_enable(&have_neon);
- else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
- /* register only the first entry */
- return crypto_register_shash(&arm_poly1305_algs[0]);
-
- return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
- crypto_register_shashes(arm_poly1305_algs,
- ARRAY_SIZE(arm_poly1305_algs)) : 0;
-}
-
-static void __exit arm_poly1305_mod_exit(void)
-{
- if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
- return;
- if (!static_branch_likely(&have_neon)) {
- crypto_unregister_shash(&arm_poly1305_algs[0]);
- return;
- }
- crypto_unregister_shashes(arm_poly1305_algs,
- ARRAY_SIZE(arm_poly1305_algs));
-}
-
-module_init(arm_poly1305_mod_init);
-module_exit(arm_poly1305_mod_exit);
-
-MODULE_DESCRIPTION("Accelerated Poly1305 transform for ARM");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("poly1305");
-MODULE_ALIAS_CRYPTO("poly1305-arm");
-MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
index de9100c67b37..fac07a4799de 100644
--- a/arch/arm/crypto/sha1-ce-glue.c
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -5,20 +5,14 @@
* Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
+#include <asm/neon.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
#include <linux/cpufeature.h>
-#include <linux/crypto.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-#include "sha1.h"
-
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@@ -29,50 +23,36 @@ asmlinkage void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable() ||
- (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
- return sha1_update_arm(desc, data, len);
+ int remain;
kernel_neon_begin();
- sha1_base_do_update(desc, data, len, sha1_ce_transform);
+ remain = sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform);
kernel_neon_end();
- return 0;
+ return remain;
}
static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!crypto_simd_usable())
- return sha1_finup_arm(desc, data, len, out);
-
kernel_neon_begin();
- if (len)
- sha1_base_do_update(desc, data, len, sha1_ce_transform);
- sha1_base_do_finalize(desc, sha1_ce_transform);
+ sha1_base_do_finup(desc, data, len, sha1_ce_transform);
kernel_neon_end();
return sha1_base_finish(desc, out);
}
-static int sha1_ce_final(struct shash_desc *desc, u8 *out)
-{
- return sha1_ce_finup(desc, NULL, 0, out);
-}
-
static struct shash_alg alg = {
.init = sha1_base_init,
.update = sha1_ce_update,
- .final = sha1_ce_final,
.finup = sha1_ce_finup,
- .descsize = sizeof(struct sha1_state),
+ .descsize = SHA1_STATE_SIZE,
.digestsize = SHA1_DIGEST_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ce",
.cra_priority = 200,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/arm/crypto/sha1.h b/arch/arm/crypto/sha1.h
deleted file mode 100644
index b1b7e21da2c3..000000000000
--- a/arch/arm/crypto/sha1.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_ARM_CRYPTO_SHA1_H
-#define ASM_ARM_CRYPTO_SHA1_H
-
-#include <linux/crypto.h>
-#include <crypto/sha1.h>
-
-extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
- unsigned int len);
-
-extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out);
-
-#endif
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index 95a727bcd664..255da00c7d98 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -12,53 +12,42 @@
*/
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
-#include <asm/byteorder.h>
-
-#include "sha1.h"
+#include <linux/kernel.h>
+#include <linux/module.h>
asmlinkage void sha1_block_data_order(struct sha1_state *digest,
const u8 *data, int rounds);
-int sha1_update_arm(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int sha1_update_arm(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
/* make sure signature matches sha1_block_fn() */
BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
- return sha1_base_do_update(desc, data, len, sha1_block_data_order);
+ return sha1_base_do_update_blocks(desc, data, len,
+ sha1_block_data_order);
}
-EXPORT_SYMBOL_GPL(sha1_update_arm);
-static int sha1_final(struct shash_desc *desc, u8 *out)
+static int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- sha1_base_do_finalize(desc, sha1_block_data_order);
+ sha1_base_do_finup(desc, data, len, sha1_block_data_order);
return sha1_base_finish(desc, out);
}
-int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- sha1_base_do_update(desc, data, len, sha1_block_data_order);
- return sha1_final(desc, out);
-}
-EXPORT_SYMBOL_GPL(sha1_finup_arm);
-
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = sha1_update_arm,
- .final = sha1_final,
.finup = sha1_finup_arm,
- .descsize = sizeof(struct sha1_state),
+ .descsize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-asm",
.cra_priority = 150,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index 9c70b87e69f7..d321850f22a6 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -13,18 +13,12 @@
* Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
*/
+#include <asm/neon.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-#include "sha1.h"
+#include <linux/kernel.h>
+#include <linux/module.h>
asmlinkage void sha1_transform_neon(struct sha1_state *state_h,
const u8 *data, int rounds);
@@ -32,50 +26,37 @@ asmlinkage void sha1_transform_neon(struct sha1_state *state_h,
static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable() ||
- (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
- return sha1_update_arm(desc, data, len);
+ int remain;
kernel_neon_begin();
- sha1_base_do_update(desc, data, len, sha1_transform_neon);
+ remain = sha1_base_do_update_blocks(desc, data, len,
+ sha1_transform_neon);
kernel_neon_end();
- return 0;
+ return remain;
}
static int sha1_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!crypto_simd_usable())
- return sha1_finup_arm(desc, data, len, out);
-
kernel_neon_begin();
- if (len)
- sha1_base_do_update(desc, data, len, sha1_transform_neon);
- sha1_base_do_finalize(desc, sha1_transform_neon);
+ sha1_base_do_finup(desc, data, len, sha1_transform_neon);
kernel_neon_end();
return sha1_base_finish(desc, out);
}
-static int sha1_neon_final(struct shash_desc *desc, u8 *out)
-{
- return sha1_neon_finup(desc, NULL, 0, out);
-}
-
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = sha1_neon_update,
- .final = sha1_neon_final,
.finup = sha1_neon_finup,
- .descsize = sizeof(struct sha1_state),
+ .descsize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-neon",
.cra_priority = 250,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
deleted file mode 100644
index aeac45bfbf9f..000000000000
--- a/arch/arm/crypto/sha2-ce-glue.c
+++ /dev/null
@@ -1,109 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
- *
- * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <crypto/sha2.h>
-#include <crypto/sha256_base.h>
-#include <linux/cpufeature.h>
-#include <linux/crypto.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/simd.h>
-#include <asm/neon.h>
-#include <linux/unaligned.h>
-
-#include "sha256_glue.h"
-
-MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-
-asmlinkage void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
- int blocks);
-
-static int sha2_ce_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable() ||
- (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
- return crypto_sha256_arm_update(desc, data, len);
-
- kernel_neon_begin();
- sha256_base_do_update(desc, data, len,
- (sha256_block_fn *)sha2_ce_transform);
- kernel_neon_end();
-
- return 0;
-}
-
-static int sha2_ce_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- if (!crypto_simd_usable())
- return crypto_sha256_arm_finup(desc, data, len, out);
-
- kernel_neon_begin();
- if (len)
- sha256_base_do_update(desc, data, len,
- (sha256_block_fn *)sha2_ce_transform);
- sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
- kernel_neon_end();
-
- return sha256_base_finish(desc, out);
-}
-
-static int sha2_ce_final(struct shash_desc *desc, u8 *out)
-{
- return sha2_ce_finup(desc, NULL, 0, out);
-}
-
-static struct shash_alg algs[] = { {
- .init = sha224_base_init,
- .update = sha2_ce_update,
- .final = sha2_ce_final,
- .finup = sha2_ce_finup,
- .descsize = sizeof(struct sha256_state),
- .digestsize = SHA224_DIGEST_SIZE,
- .base = {
- .cra_name = "sha224",
- .cra_driver_name = "sha224-ce",
- .cra_priority = 300,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .init = sha256_base_init,
- .update = sha2_ce_update,
- .final = sha2_ce_final,
- .finup = sha2_ce_finup,
- .descsize = sizeof(struct sha256_state),
- .digestsize = SHA256_DIGEST_SIZE,
- .base = {
- .cra_name = "sha256",
- .cra_driver_name = "sha256-ce",
- .cra_priority = 300,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
-
-static int __init sha2_ce_mod_init(void)
-{
- return crypto_register_shashes(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit sha2_ce_mod_fini(void)
-{
- crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-}
-
-module_cpu_feature_match(SHA2, sha2_ce_mod_init);
-module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
deleted file mode 100644
index f85933fdec75..000000000000
--- a/arch/arm/crypto/sha256_glue.c
+++ /dev/null
@@ -1,117 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
- * using optimized ARM assembler and NEON instructions.
- *
- * Copyright © 2015 Google Inc.
- *
- * This file is based on sha256_ssse3_glue.c:
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- */
-
-#include <crypto/internal/hash.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <crypto/sha2.h>
-#include <crypto/sha256_base.h>
-#include <asm/simd.h>
-#include <asm/neon.h>
-
-#include "sha256_glue.h"
-
-asmlinkage void sha256_block_data_order(struct sha256_state *state,
- const u8 *data, int num_blks);
-
-int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- /* make sure casting to sha256_block_fn() is safe */
- BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
-
- return sha256_base_do_update(desc, data, len, sha256_block_data_order);
-}
-EXPORT_SYMBOL(crypto_sha256_arm_update);
-
-static int crypto_sha256_arm_final(struct shash_desc *desc, u8 *out)
-{
- sha256_base_do_finalize(desc, sha256_block_data_order);
- return sha256_base_finish(desc, out);
-}
-
-int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- sha256_base_do_update(desc, data, len, sha256_block_data_order);
- return crypto_sha256_arm_final(desc, out);
-}
-EXPORT_SYMBOL(crypto_sha256_arm_finup);
-
-static struct shash_alg algs[] = { {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = crypto_sha256_arm_update,
- .final = crypto_sha256_arm_final,
- .finup = crypto_sha256_arm_finup,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name = "sha256-asm",
- .cra_priority = 150,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = crypto_sha256_arm_update,
- .final = crypto_sha256_arm_final,
- .finup = crypto_sha256_arm_finup,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name = "sha224-asm",
- .cra_priority = 150,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
-
-static int __init sha256_mod_init(void)
-{
- int res = crypto_register_shashes(algs, ARRAY_SIZE(algs));
-
- if (res < 0)
- return res;
-
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
- res = crypto_register_shashes(sha256_neon_algs,
- ARRAY_SIZE(sha256_neon_algs));
-
- if (res < 0)
- crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
- }
-
- return res;
-}
-
-static void __exit sha256_mod_fini(void)
-{
- crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
- crypto_unregister_shashes(sha256_neon_algs,
- ARRAY_SIZE(sha256_neon_algs));
-}
-
-module_init(sha256_mod_init);
-module_exit(sha256_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON");
-
-MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h
deleted file mode 100644
index 9f0d578bab5f..000000000000
--- a/arch/arm/crypto/sha256_glue.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _CRYPTO_SHA256_GLUE_H
-#define _CRYPTO_SHA256_GLUE_H
-
-#include <linux/crypto.h>
-
-extern struct shash_alg sha256_neon_algs[2];
-
-int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
-
-int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *hash);
-
-#endif /* _CRYPTO_SHA256_GLUE_H */
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
deleted file mode 100644
index ccdcfff71910..000000000000
--- a/arch/arm/crypto/sha256_neon_glue.c
+++ /dev/null
@@ -1,92 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
- * using NEON instructions.
- *
- * Copyright © 2015 Google Inc.
- *
- * This file is based on sha512_neon_glue.c:
- * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- */
-
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <crypto/sha2.h>
-#include <crypto/sha256_base.h>
-#include <asm/byteorder.h>
-#include <asm/simd.h>
-#include <asm/neon.h>
-
-#include "sha256_glue.h"
-
-asmlinkage void sha256_block_data_order_neon(struct sha256_state *digest,
- const u8 *data, int num_blks);
-
-static int crypto_sha256_neon_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable() ||
- (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
- return crypto_sha256_arm_update(desc, data, len);
-
- kernel_neon_begin();
- sha256_base_do_update(desc, data, len, sha256_block_data_order_neon);
- kernel_neon_end();
-
- return 0;
-}
-
-static int crypto_sha256_neon_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- if (!crypto_simd_usable())
- return crypto_sha256_arm_finup(desc, data, len, out);
-
- kernel_neon_begin();
- if (len)
- sha256_base_do_update(desc, data, len,
- sha256_block_data_order_neon);
- sha256_base_do_finalize(desc, sha256_block_data_order_neon);
- kernel_neon_end();
-
- return sha256_base_finish(desc, out);
-}
-
-static int crypto_sha256_neon_final(struct shash_desc *desc, u8 *out)
-{
- return crypto_sha256_neon_finup(desc, NULL, 0, out);
-}
-
-struct shash_alg sha256_neon_algs[] = { {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = crypto_sha256_neon_update,
- .final = crypto_sha256_neon_final,
- .finup = crypto_sha256_neon_finup,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name = "sha256-neon",
- .cra_priority = 250,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = crypto_sha256_neon_update,
- .final = crypto_sha256_neon_final,
- .finup = crypto_sha256_neon_finup,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name = "sha224-neon",
- .cra_priority = 250,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c
index 1be5bd498af3..f8a6480889b1 100644
--- a/arch/arm/crypto/sha512-glue.c
+++ b/arch/arm/crypto/sha512-glue.c
@@ -5,15 +5,14 @@
* Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
+#include <asm/hwcap.h>
+#include <asm/neon.h>
#include <crypto/internal/hash.h>
#include <crypto/sha2.h>
#include <crypto/sha512_base.h>
-#include <linux/crypto.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-
#include "sha512.h"
MODULE_DESCRIPTION("Accelerated SHA-384/SHA-512 secure hash for ARM");
@@ -28,50 +27,47 @@ MODULE_ALIAS_CRYPTO("sha512-arm");
asmlinkage void sha512_block_data_order(struct sha512_state *state,
u8 const *src, int blocks);
-int sha512_arm_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int sha512_arm_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- return sha512_base_do_update(desc, data, len, sha512_block_data_order);
+ return sha512_base_do_update_blocks(desc, data, len,
+ sha512_block_data_order);
}
-static int sha512_arm_final(struct shash_desc *desc, u8 *out)
+static int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- sha512_base_do_finalize(desc, sha512_block_data_order);
+ sha512_base_do_finup(desc, data, len, sha512_block_data_order);
return sha512_base_finish(desc, out);
}
-int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- sha512_base_do_update(desc, data, len, sha512_block_data_order);
- return sha512_arm_final(desc, out);
-}
-
static struct shash_alg sha512_arm_algs[] = { {
.init = sha384_base_init,
.update = sha512_arm_update,
- .final = sha512_arm_final,
.finup = sha512_arm_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.digestsize = SHA384_DIGEST_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-arm",
.cra_priority = 250,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.init = sha512_base_init,
.update = sha512_arm_update,
- .final = sha512_arm_final,
.finup = sha512_arm_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.digestsize = SHA512_DIGEST_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-arm",
.cra_priority = 250,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c
index c6e58fe475ac..bd528077fefb 100644
--- a/arch/arm/crypto/sha512-neon-glue.c
+++ b/arch/arm/crypto/sha512-neon-glue.c
@@ -5,16 +5,13 @@
* Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
+#include <asm/neon.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <crypto/sha2.h>
#include <crypto/sha512_base.h>
-#include <linux/crypto.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <asm/simd.h>
-#include <asm/neon.h>
-
#include "sha512.h"
MODULE_ALIAS_CRYPTO("sha384-neon");
@@ -26,51 +23,36 @@ asmlinkage void sha512_block_data_order_neon(struct sha512_state *state,
static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable() ||
- (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
- return sha512_arm_update(desc, data, len);
+ int remain;
kernel_neon_begin();
- sha512_base_do_update(desc, data, len, sha512_block_data_order_neon);
+ remain = sha512_base_do_update_blocks(desc, data, len,
+ sha512_block_data_order_neon);
kernel_neon_end();
-
- return 0;
+ return remain;
}
static int sha512_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!crypto_simd_usable())
- return sha512_arm_finup(desc, data, len, out);
-
kernel_neon_begin();
- if (len)
- sha512_base_do_update(desc, data, len,
- sha512_block_data_order_neon);
- sha512_base_do_finalize(desc, sha512_block_data_order_neon);
+ sha512_base_do_finup(desc, data, len, sha512_block_data_order_neon);
kernel_neon_end();
-
return sha512_base_finish(desc, out);
}
-static int sha512_neon_final(struct shash_desc *desc, u8 *out)
-{
- return sha512_neon_finup(desc, NULL, 0, out);
-}
-
struct shash_alg sha512_neon_algs[] = { {
.init = sha384_base_init,
.update = sha512_neon_update,
- .final = sha512_neon_final,
.finup = sha512_neon_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.digestsize = SHA384_DIGEST_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-neon",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
@@ -78,14 +60,15 @@ struct shash_alg sha512_neon_algs[] = { {
}, {
.init = sha512_base_init,
.update = sha512_neon_update,
- .final = sha512_neon_final,
.finup = sha512_neon_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.digestsize = SHA512_DIGEST_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-neon",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/arm/crypto/sha512.h b/arch/arm/crypto/sha512.h
index e14572be76d1..eeaee52cda69 100644
--- a/arch/arm/crypto/sha512.h
+++ b/arch/arm/crypto/sha512.h
@@ -1,9 +1,3 @@
/* SPDX-License-Identifier: GPL-2.0 */
-int sha512_arm_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
-
-int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out);
-
extern struct shash_alg sha512_neon_algs[2];
diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h
index 82191dbd7e78..d37559762180 100644
--- a/arch/arm/include/asm/simd.h
+++ b/arch/arm/include/asm/simd.h
@@ -1,8 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SIMD_H
+#define _ASM_SIMD_H
-#include <linux/hardirq.h>
+#include <linux/compiler_attributes.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
static __must_check inline bool may_use_simd(void)
{
return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq();
}
+
+#endif /* _ASM_SIMD_H */
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 007874320937..91ea0e29107a 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -5,6 +5,8 @@
# Copyright (C) 1995-2000 Russell King
#
+obj-y += crypto/
+
lib-y := changebit.o csumipv6.o csumpartial.o \
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
delay.o delay-loop.o findbit.o memchr.o memcpy.o \
@@ -47,7 +49,7 @@ endif
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-$(CONFIG_CRC32_ARCH) += crc32-arm.o
-crc32-arm-y := crc32-glue.o crc32-core.o
+crc32-arm-y := crc32.o crc32-core.o
obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-arm.o
-crc-t10dif-arm-y := crc-t10dif-glue.o crc-t10dif-core.o
+crc-t10dif-arm-y := crc-t10dif.o crc-t10dif-core.o
diff --git a/arch/arm/lib/crc-t10dif-glue.c b/arch/arm/lib/crc-t10dif.c
index 6efad3d78284..1093f8ec13b0 100644
--- a/arch/arm/lib/crc-t10dif-glue.c
+++ b/arch/arm/lib/crc-t10dif.c
@@ -16,8 +16,8 @@
#include <asm/neon.h>
#include <asm/simd.h>
-static DEFINE_STATIC_KEY_FALSE(have_neon);
-static DEFINE_STATIC_KEY_FALSE(have_pmull);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
@@ -60,7 +60,7 @@ static int __init crc_t10dif_arm_init(void)
}
return 0;
}
-arch_initcall(crc_t10dif_arm_init);
+subsys_initcall(crc_t10dif_arm_init);
static void __exit crc_t10dif_arm_exit(void)
{
diff --git a/arch/arm/lib/crc32-glue.c b/arch/arm/lib/crc32.c
index 4340351dbde8..f2bef8849c7c 100644
--- a/arch/arm/lib/crc32-glue.c
+++ b/arch/arm/lib/crc32.c
@@ -18,8 +18,8 @@
#include <asm/neon.h>
#include <asm/simd.h>
-static DEFINE_STATIC_KEY_FALSE(have_crc32);
-static DEFINE_STATIC_KEY_FALSE(have_pmull);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
#define PMULL_MIN_LEN 64 /* min size of buffer for pmull functions */
@@ -103,7 +103,7 @@ static int __init crc32_arm_init(void)
static_branch_enable(&have_pmull);
return 0;
}
-arch_initcall(crc32_arm_init);
+subsys_initcall(crc32_arm_init);
static void __exit crc32_arm_exit(void)
{
diff --git a/arch/arm/lib/crypto/.gitignore b/arch/arm/lib/crypto/.gitignore
new file mode 100644
index 000000000000..12d74d8b03d0
--- /dev/null
+++ b/arch/arm/lib/crypto/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+poly1305-core.S
+sha256-core.S
diff --git a/arch/arm/lib/crypto/Kconfig b/arch/arm/lib/crypto/Kconfig
new file mode 100644
index 000000000000..d1ad664f0c67
--- /dev/null
+++ b/arch/arm/lib/crypto/Kconfig
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_BLAKE2S_ARM
+ bool "Hash functions: BLAKE2s"
+ select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+ help
+ BLAKE2s cryptographic hash function (RFC 7693)
+
+ Architecture: arm
+
+ This is faster than the generic implementations of BLAKE2s and
+ BLAKE2b, but slower than the NEON implementation of BLAKE2b.
+ There is no NEON implementation of BLAKE2s, since NEON doesn't
+ really help with it.
+
+config CRYPTO_CHACHA20_NEON
+ tristate
+ default CRYPTO_LIB_CHACHA
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_POLY1305_ARM
+ tristate
+ default CRYPTO_LIB_POLY1305
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
+
+config CRYPTO_SHA256_ARM
+ tristate
+ depends on !CPU_V7M
+ default CRYPTO_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
diff --git a/arch/arm/lib/crypto/Makefile b/arch/arm/lib/crypto/Makefile
new file mode 100644
index 000000000000..431f77c3ff6f
--- /dev/null
+++ b/arch/arm/lib/crypto/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o
+libblake2s-arm-y := blake2s-core.o blake2s-glue.o
+
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+chacha-neon-y := chacha-scalar-core.o chacha-glue.o
+chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
+
+obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
+poly1305-arm-y := poly1305-core.o poly1305-glue.o
+
+obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
+sha256-arm-y := sha256.o sha256-core.o
+sha256-arm-$(CONFIG_KERNEL_MODE_NEON) += sha256-ce.o
+
+quiet_cmd_perl = PERL $@
+ cmd_perl = $(PERL) $(<) > $(@)
+
+$(obj)/%-core.S: $(src)/%-armv4.pl
+ $(call cmd,perl)
+
+clean-files += poly1305-core.S sha256-core.S
+
+aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1
+
+# massage the perlasm code a bit so we only get the NEON routine if we need it
+poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
+poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
+AFLAGS_poly1305-core.o += $(poly1305-aflags-y) $(aflags-thumb2-y)
+
+AFLAGS_sha256-core.o += $(aflags-thumb2-y)
diff --git a/arch/arm/crypto/blake2s-core.S b/arch/arm/lib/crypto/blake2s-core.S
index df40e46601f1..df40e46601f1 100644
--- a/arch/arm/crypto/blake2s-core.S
+++ b/arch/arm/lib/crypto/blake2s-core.S
diff --git a/arch/arm/crypto/blake2s-glue.c b/arch/arm/lib/crypto/blake2s-glue.c
index 0238a70d9581..0238a70d9581 100644
--- a/arch/arm/crypto/blake2s-glue.c
+++ b/arch/arm/lib/crypto/blake2s-glue.c
diff --git a/arch/arm/lib/crypto/chacha-glue.c b/arch/arm/lib/crypto/chacha-glue.c
new file mode 100644
index 000000000000..88ec96415283
--- /dev/null
+++ b/arch/arm/lib/crypto/chacha-glue.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ChaCha and HChaCha functions (ARM optimized)
+ *
+ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 Martin Willi
+ */
+
+#include <crypto/chacha.h>
+#include <crypto/internal/simd.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cputype.h>
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
+ u8 *dst, const u8 *src, int nrounds);
+asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state,
+ u8 *dst, const u8 *src,
+ int nrounds, unsigned int nbytes);
+asmlinkage void hchacha_block_arm(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds);
+asmlinkage void hchacha_block_neon(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds);
+
+asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+ const struct chacha_state *state, int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
+
+static inline bool neon_usable(void)
+{
+ return static_branch_likely(&use_neon) && crypto_simd_usable();
+}
+
+static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ u8 buf[CHACHA_BLOCK_SIZE];
+
+ while (bytes > CHACHA_BLOCK_SIZE) {
+ unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
+
+ chacha_4block_xor_neon(state, dst, src, nrounds, l);
+ bytes -= l;
+ src += l;
+ dst += l;
+ state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
+ }
+ if (bytes) {
+ const u8 *s = src;
+ u8 *d = dst;
+
+ if (bytes != CHACHA_BLOCK_SIZE)
+ s = d = memcpy(buf, src, bytes);
+ chacha_block_xor_neon(state, d, s, nrounds);
+ if (d != dst)
+ memcpy(dst, buf, bytes);
+ state->x[12]++;
+ }
+}
+
+void hchacha_block_arch(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds)
+{
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
+ hchacha_block_arm(state, out, nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, out, nrounds);
+ kernel_neon_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
+ bytes <= CHACHA_BLOCK_SIZE) {
+ chacha_doarm(dst, src, bytes, state, nrounds);
+ state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
+ return;
+ }
+
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, todo, nrounds);
+ kernel_neon_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+bool chacha_is_arch_optimized(void)
+{
+ /* We always can use at least the ARM scalar implementation. */
+ return true;
+}
+EXPORT_SYMBOL(chacha_is_arch_optimized);
+
+static int __init chacha_arm_mod_init(void)
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
+ switch (read_cpuid_part()) {
+ case ARM_CPU_PART_CORTEX_A7:
+ case ARM_CPU_PART_CORTEX_A5:
+ /*
+ * The Cortex-A7 and Cortex-A5 do not perform well with
+ * the NEON implementation but do incredibly with the
+ * scalar one and use less power.
+ */
+ break;
+ default:
+ static_branch_enable(&use_neon);
+ }
+ }
+ return 0;
+}
+subsys_initcall(chacha_arm_mod_init);
+
+static void __exit chacha_arm_mod_exit(void)
+{
+}
+module_exit(chacha_arm_mod_exit);
+
+MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/lib/crypto/chacha-neon-core.S
index 13d12f672656..ddd62b6294a5 100644
--- a/arch/arm/crypto/chacha-neon-core.S
+++ b/arch/arm/lib/crypto/chacha-neon-core.S
@@ -1,5 +1,5 @@
/*
- * ChaCha/XChaCha NEON helper functions
+ * ChaCha/HChaCha NEON helper functions
*
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
*
diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/lib/crypto/chacha-scalar-core.S
index 083fe1ab96d0..4951df05c158 100644
--- a/arch/arm/crypto/chacha-scalar-core.S
+++ b/arch/arm/lib/crypto/chacha-scalar-core.S
@@ -367,7 +367,7 @@
/*
* void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
- * const u32 *state, int nrounds);
+ * const struct chacha_state *state, int nrounds);
*/
ENTRY(chacha_doarm)
cmp r2, #0 // len == 0?
@@ -407,7 +407,8 @@ ENTRY(chacha_doarm)
ENDPROC(chacha_doarm)
/*
- * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
+ * void hchacha_block_arm(const struct chacha_state *state,
+ * u32 out[HCHACHA_OUT_WORDS], int nrounds);
*/
ENTRY(hchacha_block_arm)
push {r1,r4-r11,lr}
diff --git a/arch/arm/crypto/poly1305-armv4.pl b/arch/arm/lib/crypto/poly1305-armv4.pl
index 6d79498d3115..d57c6e2fc84a 100644
--- a/arch/arm/crypto/poly1305-armv4.pl
+++ b/arch/arm/lib/crypto/poly1305-armv4.pl
@@ -43,9 +43,9 @@ $code.=<<___;
#else
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
-# define poly1305_init poly1305_init_arm
+# define poly1305_init poly1305_block_init_arch
# define poly1305_blocks poly1305_blocks_arm
-# define poly1305_emit poly1305_emit_arm
+# define poly1305_emit poly1305_emit_arch
.globl poly1305_blocks_neon
#endif
diff --git a/arch/arm/lib/crypto/poly1305-glue.c b/arch/arm/lib/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..2603b0771f2c
--- /dev/null
+++ b/arch/arm/lib/crypto/poly1305-glue.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <crypto/internal/poly1305.h>
+#include <linux/cpufeature.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/unaligned.h>
+
+asmlinkage void poly1305_block_init_arch(
+ struct poly1305_block_state *state,
+ const u8 raw_key[POLY1305_BLOCK_SIZE]);
+EXPORT_SYMBOL_GPL(poly1305_block_init_arch);
+asmlinkage void poly1305_blocks_arm(struct poly1305_block_state *state,
+ const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state,
+ const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit_arch(const struct poly1305_state *state,
+ u8 digest[POLY1305_DIGEST_SIZE],
+ const u32 nonce[4]);
+EXPORT_SYMBOL_GPL(poly1305_emit_arch);
+
+void __weak poly1305_blocks_neon(struct poly1305_block_state *state,
+ const u8 *src, u32 len, u32 hibit)
+{
+}
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src,
+ unsigned int len, u32 padbit)
+{
+ len = round_down(len, POLY1305_BLOCK_SIZE);
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ static_branch_likely(&have_neon)) {
+ do {
+ unsigned int todo = min_t(unsigned int, len, SZ_4K);
+
+ kernel_neon_begin();
+ poly1305_blocks_neon(state, src, todo, padbit);
+ kernel_neon_end();
+
+ len -= todo;
+ src += todo;
+ } while (len);
+ } else
+ poly1305_blocks_arm(state, src, len, padbit);
+}
+EXPORT_SYMBOL_GPL(poly1305_blocks_arch);
+
+bool poly1305_is_arch_optimized(void)
+{
+ /* We always can use at least the ARM scalar implementation. */
+ return true;
+}
+EXPORT_SYMBOL(poly1305_is_arch_optimized);
+
+static int __init arm_poly1305_mod_init(void)
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ (elf_hwcap & HWCAP_NEON))
+ static_branch_enable(&have_neon);
+ return 0;
+}
+subsys_initcall(arm_poly1305_mod_init);
+
+static void __exit arm_poly1305_mod_exit(void)
+{
+}
+module_exit(arm_poly1305_mod_exit);
+
+MODULE_DESCRIPTION("Accelerated Poly1305 transform for ARM");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/lib/crypto/sha256-armv4.pl
index f3a2b54efd4e..8122db7fd599 100644
--- a/arch/arm/crypto/sha256-armv4.pl
+++ b/arch/arm/lib/crypto/sha256-armv4.pl
@@ -204,18 +204,18 @@ K256:
.word 0 @ terminator
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha256_block_data_order
+.word OPENSSL_armcap_P-sha256_blocks_arch
#endif
.align 5
-.global sha256_block_data_order
-.type sha256_block_data_order,%function
-sha256_block_data_order:
-.Lsha256_block_data_order:
+.global sha256_blocks_arch
+.type sha256_blocks_arch,%function
+sha256_blocks_arch:
+.Lsha256_blocks_arch:
#if __ARM_ARCH__<7
- sub r3,pc,#8 @ sha256_block_data_order
+ sub r3,pc,#8 @ sha256_blocks_arch
#else
- adr r3,.Lsha256_block_data_order
+ adr r3,.Lsha256_blocks_arch
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap
@@ -282,7 +282,7 @@ $code.=<<___;
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
-.size sha256_block_data_order,.-sha256_block_data_order
+.size sha256_blocks_arch,.-sha256_blocks_arch
___
######################################################################
# NEON stuff
@@ -470,8 +470,8 @@ sha256_block_data_order_neon:
stmdb sp!,{r4-r12,lr}
sub $H,sp,#16*4+16
- adr $Ktbl,.Lsha256_block_data_order
- sub $Ktbl,$Ktbl,#.Lsha256_block_data_order-K256
+ adr $Ktbl,.Lsha256_blocks_arch
+ sub $Ktbl,$Ktbl,#.Lsha256_blocks_arch-K256
bic $H,$H,#15 @ align for 128-bit stores
mov $t2,sp
mov sp,$H @ alloca
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/lib/crypto/sha256-ce.S
index b6369d2440a1..ac2c9b01b22d 100644
--- a/arch/arm/crypto/sha2-ce-core.S
+++ b/arch/arm/lib/crypto/sha256-ce.S
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions
+ * sha256-ce.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions
*
* Copyright (C) 2015 Linaro Ltd.
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
@@ -67,10 +67,10 @@
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
/*
- * void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
- int blocks);
+ * void sha256_ce_transform(u32 state[SHA256_STATE_WORDS],
+ * const u8 *data, size_t nblocks);
*/
-ENTRY(sha2_ce_transform)
+ENTRY(sha256_ce_transform)
/* load state */
vld1.32 {dga-dgb}, [r0]
@@ -120,4 +120,4 @@ ENTRY(sha2_ce_transform)
/* store new state */
vst1.32 {dga-dgb}, [r0]
bx lr
-ENDPROC(sha2_ce_transform)
+ENDPROC(sha256_ce_transform)
diff --git a/arch/arm/lib/crypto/sha256.c b/arch/arm/lib/crypto/sha256.c
new file mode 100644
index 000000000000..109192e54b0f
--- /dev/null
+++ b/arch/arm/lib/crypto/sha256.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256 optimized for ARM
+ *
+ * Copyright 2025 Google LLC
+ */
+#include <asm/neon.h>
+#include <crypto/internal/sha2.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+asmlinkage void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
+asmlinkage void sha256_block_data_order_neon(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+asmlinkage void sha256_ce_transform(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce);
+
+void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ static_branch_likely(&have_neon)) {
+ kernel_neon_begin();
+ if (static_branch_likely(&have_ce))
+ sha256_ce_transform(state, data, nblocks);
+ else
+ sha256_block_data_order_neon(state, data, nblocks);
+ kernel_neon_end();
+ } else {
+ sha256_blocks_arch(state, data, nblocks);
+ }
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_simd);
+
+bool sha256_is_arch_optimized(void)
+{
+ /* We always can use at least the ARM scalar implementation. */
+ return true;
+}
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
+
+static int __init sha256_arm_mod_init(void)
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
+ static_branch_enable(&have_neon);
+ if (elf_hwcap2 & HWCAP2_SHA2)
+ static_branch_enable(&have_ce);
+ }
+ return 0;
+}
+subsys_initcall(sha256_arm_mod_init);
+
+static void __exit sha256_arm_mod_exit(void)
+{
+}
+module_exit(sha256_arm_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-256 optimized for ARM");
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index cac4e82f6c82..150a1e56dcae 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -209,9 +209,8 @@ static int __init exynos_pmu_irq_init(struct device_node *node,
return -ENOMEM;
}
- domain = irq_domain_add_hierarchy(parent_domain, 0, 0,
- node, &exynos_pmu_domain_ops,
- NULL);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, 0, of_fwnode_handle(node),
+ &exynos_pmu_domain_ops, NULL);
if (!domain) {
iounmap(pmu_base_addr);
pmu_base_addr = NULL;
diff --git a/arch/arm/mach-imx/avic.c b/arch/arm/mach-imx/avic.c
index cf6546ddc7a3..3067c06b4b8e 100644
--- a/arch/arm/mach-imx/avic.c
+++ b/arch/arm/mach-imx/avic.c
@@ -201,8 +201,8 @@ static void __init mxc_init_irq(void __iomem *irqbase)
WARN_ON(irq_base < 0);
np = of_find_compatible_node(NULL, NULL, "fsl,avic");
- domain = irq_domain_add_legacy(np, AVIC_NUM_IRQS, irq_base, 0,
- &irq_domain_simple_ops, NULL);
+ domain = irq_domain_create_legacy(of_fwnode_handle(np), AVIC_NUM_IRQS, irq_base, 0,
+ &irq_domain_simple_ops, NULL);
WARN_ON(!domain);
for (i = 0; i < AVIC_NUM_IRQS / 32; i++, irq_base += 32)
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 5909088d5482..2e633569d2f8 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -245,9 +245,8 @@ static int __init imx_gpc_init(struct device_node *node,
if (WARN_ON(!gpc_base))
return -ENOMEM;
- domain = irq_domain_add_hierarchy(parent_domain, 0, GPC_MAX_IRQS,
- node, &imx_gpc_domain_ops,
- NULL);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, GPC_MAX_IRQS, of_fwnode_handle(node),
+ &imx_gpc_domain_ops, NULL);
if (!domain) {
iounmap(gpc_base);
return -ENOMEM;
diff --git a/arch/arm/mach-imx/tzic.c b/arch/arm/mach-imx/tzic.c
index 8b3d98d288d9..50a5668e65d2 100644
--- a/arch/arm/mach-imx/tzic.c
+++ b/arch/arm/mach-imx/tzic.c
@@ -175,8 +175,8 @@ static int __init tzic_init_dt(struct device_node *np, struct device_node *p)
irq_base = irq_alloc_descs(-1, 0, TZIC_NUM_IRQS, numa_node_id());
WARN_ON(irq_base < 0);
- domain = irq_domain_add_legacy(np, TZIC_NUM_IRQS, irq_base, 0,
- &irq_domain_simple_ops, NULL);
+ domain = irq_domain_create_legacy(of_fwnode_handle(np), TZIC_NUM_IRQS, irq_base, 0,
+ &irq_domain_simple_ops, NULL);
WARN_ON(!domain);
for (i = 0; i < 4; i++, irq_base += 32)
diff --git a/arch/arm/mach-omap1/irq.c b/arch/arm/mach-omap1/irq.c
index 9b587ecebb1c..bb1bc060ecd8 100644
--- a/arch/arm/mach-omap1/irq.c
+++ b/arch/arm/mach-omap1/irq.c
@@ -220,8 +220,7 @@ void __init omap1_init_irq(void)
omap_l2_irq = irq_base;
omap_l2_irq -= NR_IRQS_LEGACY;
- domain = irq_domain_add_legacy(NULL, nr_irqs, irq_base, 0,
- &irq_domain_simple_ops, NULL);
+ domain = irq_domain_create_legacy(NULL, nr_irqs, irq_base, 0, &irq_domain_simple_ops, NULL);
pr_info("Total of %lu interrupts in %i interrupt banks\n",
nr_irqs, irq_bank_count);
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
index 6f0d6120c174..a66b1dc61571 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.c
+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
@@ -585,9 +585,8 @@ static int __init wakeupgen_init(struct device_node *node,
wakeupgen_ops = &am43xx_wakeupgen_ops;
}
- domain = irq_domain_add_hierarchy(parent_domain, 0, max_irqs,
- node, &wakeupgen_domain_ops,
- NULL);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, max_irqs, of_fwnode_handle(node),
+ &wakeupgen_domain_ops, NULL);
if (!domain) {
iounmap(wakeupgen_base);
return -ENOMEM;
diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index d9cadd97748a..5bfce8aa4102 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -147,9 +147,8 @@ pxa_init_irq_common(struct device_node *node, int irq_nr,
int n;
pxa_internal_irq_nr = irq_nr;
- pxa_irq_domain = irq_domain_add_legacy(node, irq_nr,
- PXA_IRQ(0), 0,
- &pxa_irq_ops, NULL);
+ pxa_irq_domain = irq_domain_create_legacy(of_fwnode_handle(node), irq_nr, PXA_IRQ(0), 0,
+ &pxa_irq_ops, NULL);
if (!pxa_irq_domain)
panic("Unable to add PXA IRQ domain\n");
irq_set_default_domain(pxa_irq_domain);
diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c
index 595e9cb33c1d..326616fbdc44 100644
--- a/arch/arm/plat-orion/gpio.c
+++ b/arch/arm/plat-orion/gpio.c
@@ -496,11 +496,10 @@ static void orion_gpio_unmask_irq(struct irq_data *d)
u32 reg_val;
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
reg_val = irq_reg_readl(gc, ct->regs.mask);
reg_val |= mask;
irq_reg_writel(gc, reg_val, ct->regs.mask);
- irq_gc_unlock(gc);
}
static void orion_gpio_mask_irq(struct irq_data *d)
@@ -510,11 +509,10 @@ static void orion_gpio_mask_irq(struct irq_data *d)
u32 mask = d->mask;
u32 reg_val;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
reg_val = irq_reg_readl(gc, ct->regs.mask);
reg_val &= ~mask;
irq_reg_writel(gc, reg_val, ct->regs.mask);
- irq_gc_unlock(gc);
}
void __init orion_gpio_init(int gpio_base, int ngpio,
@@ -602,12 +600,12 @@ void __init orion_gpio_init(int gpio_base, int ngpio,
IRQ_NOREQUEST, IRQ_LEVEL | IRQ_NOPROBE);
/* Setup irq domain on top of the generic chip. */
- ochip->domain = irq_domain_add_legacy(NULL,
- ochip->chip.ngpio,
- ochip->secondary_irq_base,
- ochip->secondary_irq_base,
- &irq_domain_simple_ops,
- ochip);
+ ochip->domain = irq_domain_create_legacy(NULL,
+ ochip->chip.ngpio,
+ ochip->secondary_irq_base,
+ ochip->secondary_irq_base,
+ &irq_domain_simple_ops,
+ ochip);
if (!ochip->domain)
panic("%s: couldn't allocate irq domain (DT).\n",
ochip->chip.label);
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
index 13a0e63afeaf..2c64d834a2c4 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
@@ -152,28 +152,12 @@
vcc-pg-supply = <&reg_aldo1>;
};
-&r_ir {
- linux,rc-map-name = "rc-beelink-gs1";
- status = "okay";
-};
-
-&r_pio {
- /*
- * FIXME: We can't add that supply for now since it would
- * create a circular dependency between pinctrl, the regulator
- * and the RSB Bus.
- *
- * vcc-pl-supply = <&reg_aldo1>;
- */
- vcc-pm-supply = <&reg_aldo1>;
-};
-
-&r_rsb {
+&r_i2c {
status = "okay";
- axp805: pmic@745 {
+ axp805: pmic@36 {
compatible = "x-powers,axp805", "x-powers,axp806";
- reg = <0x745>;
+ reg = <0x36>;
interrupt-parent = <&r_intc>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
@@ -291,6 +275,22 @@
};
};
+&r_ir {
+ linux,rc-map-name = "rc-beelink-gs1";
+ status = "okay";
+};
+
+&r_pio {
+ /*
+ * PL0 and PL1 are used for PMIC I2C
+ * don't enable the pl-supply else
+ * it will fail at boot
+ *
+ * vcc-pl-supply = <&reg_aldo1>;
+ */
+ vcc-pm-supply = <&reg_aldo1>;
+};
+
&spdif {
pinctrl-names = "default";
pinctrl-0 = <&spdif_tx_pin>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts
index ab87c3447cd7..f005072c68a1 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts
@@ -176,16 +176,12 @@
vcc-pg-supply = <&reg_vcc_wifi_io>;
};
-&r_ir {
- status = "okay";
-};
-
-&r_rsb {
+&r_i2c {
status = "okay";
- axp805: pmic@745 {
+ axp805: pmic@36 {
compatible = "x-powers,axp805", "x-powers,axp806";
- reg = <0x745>;
+ reg = <0x36>;
interrupt-parent = <&r_intc>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
@@ -296,6 +292,10 @@
};
};
+&r_ir {
+ status = "okay";
+};
+
&rtc {
clocks = <&ext_osc32k>;
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi
index d05dc5d6e6b9..e34dbb992021 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi
@@ -113,20 +113,12 @@
vcc-pg-supply = <&reg_aldo1>;
};
-&r_ir {
- status = "okay";
-};
-
-&r_pio {
- vcc-pm-supply = <&reg_bldo3>;
-};
-
-&r_rsb {
+&r_i2c {
status = "okay";
- axp805: pmic@745 {
+ axp805: pmic@36 {
compatible = "x-powers,axp805", "x-powers,axp806";
- reg = <0x745>;
+ reg = <0x36>;
interrupt-parent = <&r_intc>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
@@ -241,6 +233,14 @@
};
};
+&r_ir {
+ status = "okay";
+};
+
+&r_pio {
+ vcc-pm-supply = <&reg_bldo3>;
+};
+
&rtc {
clocks = <&ext_osc32k>;
};
diff --git a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi
index da9de4986660..5a72f0b64247 100644
--- a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi
+++ b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi
@@ -151,7 +151,7 @@
al,msi-num-spis = <160>;
};
- io-fabric@fc000000 {
+ io-bus@fc000000 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi
index 8b6156b5af65..dea60d136c2e 100644
--- a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi
+++ b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi
@@ -361,7 +361,7 @@
interrupt-parent = <&gic>;
};
- io-fabric@fc000000 {
+ io-bus@fc000000 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
index ab2b3f15ef19..69834b49673d 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
@@ -2313,7 +2313,7 @@
"amlogic,meson8-pwm-v2";
reg = <0x0 0x19000 0x0 0x20>;
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "vid_pll" */
+ <0>, /* unknown/untested, the datasheet calls it "vid_pll" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
#pwm-cells = <3>;
@@ -2325,7 +2325,7 @@
"amlogic,meson8-pwm-v2";
reg = <0x0 0x1a000 0x0 0x20>;
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "vid_pll" */
+ <0>, /* unknown/untested, the datasheet calls it "vid_pll" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
#pwm-cells = <3>;
@@ -2337,7 +2337,7 @@
"amlogic,meson8-pwm-v2";
reg = <0x0 0x1b000 0x0 0x20>;
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "vid_pll" */
+ <0>, /* unknown/untested, the datasheet calls it "vid_pll" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
#pwm-cells = <3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi
index de35fa2d7a6d..8e3e3354ed67 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi
@@ -116,6 +116,10 @@
status = "okay";
};
+&clkc_audio {
+ status = "okay";
+};
+
&frddr_a {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index 8ebce7114a60..6c134592c7bb 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -741,7 +741,7 @@
&pwm_ab {
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "vid_pll" */
+ <0>, /* unknown/untested, the datasheet calls it "vid_pll" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
};
@@ -752,14 +752,14 @@
&pwm_cd {
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "vid_pll" */
+ <0>, /* unknown/untested, the datasheet calls it "vid_pll" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
};
&pwm_ef {
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "vid_pll" */
+ <0>, /* unknown/untested, the datasheet calls it "vid_pll" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 2dc2fdaecf9f..19b8a39de6a0 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -811,7 +811,7 @@
&pwm_ab {
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "vid_pll" */
+ <0>, /* unknown/untested, the datasheet calls it "vid_pll" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
};
@@ -822,14 +822,14 @@
&pwm_cd {
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "vid_pll" */
+ <0>, /* unknown/untested, the datasheet calls it "vid_pll" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
};
&pwm_ef {
clocks = <&xtal>,
- <>, /* unknown/untested, the datasheet calls it "vid_pll" */
+ <0>, /* unknown/untested, the datasheet calls it "vid_pll" */
<&clkc CLKID_FCLK_DIV4>,
<&clkc CLKID_FCLK_DIV3>;
};
diff --git a/arch/arm64/boot/dts/apple/t8103-j293.dts b/arch/arm64/boot/dts/apple/t8103-j293.dts
index 2dfe7b895b2b..e2d9439397f7 100644
--- a/arch/arm64/boot/dts/apple/t8103-j293.dts
+++ b/arch/arm64/boot/dts/apple/t8103-j293.dts
@@ -77,6 +77,16 @@
};
};
+/*
+ * The driver depends on boot loader initialized state which resets when this
+ * power-domain is powered off. This happens on suspend or when the driver is
+ * missing during boot. Mark the domain as always on until the driver can
+ * handle this.
+ */
+&ps_dispdfr_be {
+ apple,always-on;
+};
+
&display_dfr {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/apple/t8112-j493.dts b/arch/arm64/boot/dts/apple/t8112-j493.dts
index 3d73f9ee2f46..be86d34c6696 100644
--- a/arch/arm64/boot/dts/apple/t8112-j493.dts
+++ b/arch/arm64/boot/dts/apple/t8112-j493.dts
@@ -40,6 +40,16 @@
};
};
+/*
+ * The driver depends on boot loader initialized state which resets when this
+ * power-domain is powered off. This happens on suspend or when the driver is
+ * missing during boot. Mark the domain as always on until the driver can
+ * handle this.
+ */
+&ps_dispdfr_be {
+ apple,always-on;
+};
+
&display_dfr {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi
index dc0ccd723c6d..2ce1860b244d 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi
@@ -88,3 +88,5 @@
<0>, <0>, <400000000>,
<1039500000>;
};
+
+/delete-node/ &{noc_opp_table/opp-1000000000};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi
index b2ac2583a592..b59da91fdd04 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi
@@ -35,7 +35,6 @@
<0x1 0x00000000 0 0xc0000000>;
};
-
reg_usdhc2_vmmc: regulator-usdhc2-vmmc {
compatible = "regulator-fixed";
regulator-name = "VSD_3V3";
@@ -46,6 +45,16 @@
startup-delay-us = <100>;
off-on-delay-us = <12000>;
};
+
+ reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc {
+ compatible = "regulator-gpio";
+ regulator-name = "VSD_VSEL";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ gpios = <&gpio2 12 GPIO_ACTIVE_HIGH>;
+ states = <3300000 0x0 1800000 0x1>;
+ vin-supply = <&ldo5>;
+ };
};
&A53_0 {
@@ -205,6 +214,7 @@
pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>;
cd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
vmmc-supply = <&reg_usdhc2_vmmc>;
+ vqmmc-supply = <&reg_usdhc2_vqmmc>;
bus-width = <4>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index ce6793b2d57e..7c1c87eab54c 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -1645,6 +1645,12 @@
opp-hz = /bits/ 64 <200000000>;
};
+ /* Nominal drive mode maximum */
+ opp-800000000 {
+ opp-hz = /bits/ 64 <800000000>;
+ };
+
+ /* Overdrive mode maximum */
opp-1000000000 {
opp-hz = /bits/ 64 <1000000000>;
};
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
index 3a9b6907185d..242820845707 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
@@ -26,6 +26,8 @@
leds {
compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi_quad_pins>;
led-power1 {
label = "udpu:green:power";
@@ -82,8 +84,6 @@
&spi0 {
status = "okay";
- pinctrl-names = "default";
- pinctrl-0 = <&spi_quad_pins>;
flash@0 {
compatible = "jedec,spi-nor";
@@ -108,6 +108,10 @@
};
};
+&spi_quad_pins {
+ function = "gpio";
+};
+
&pinctrl_nb {
i2c2_recovery_pins: i2c2-recovery-pins {
groups = "i2c2";
diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi b/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi
index 1edfd643b25a..a334ef0629d1 100644
--- a/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi
@@ -31,7 +31,7 @@
};
vcc3v3_btreg: vcc3v3-btreg {
- compatible = "regulator-gpio";
+ compatible = "regulator-fixed";
enable-active-high;
pinctrl-names = "default";
pinctrl-0 = <&bt_enable_h>;
@@ -39,7 +39,6 @@
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
regulator-always-on;
- states = <3300000 0x0>;
};
vcc3v3_rf_aux_mod: regulator-vcc3v3-rf-aux-mod {
diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi b/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi
index 80db778c9684..b60e68faa83a 100644
--- a/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi
@@ -26,5 +26,5 @@
};
&vcc3v3_btreg {
- enable-gpios = <&gpio1 RK_PC3 GPIO_ACTIVE_HIGH>;
+ gpios = <&gpio1 RK_PC3 GPIO_ACTIVE_HIGH>;
};
diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts
index 165d09ccb942..5886b802c520 100644
--- a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts
+++ b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts
@@ -39,5 +39,5 @@
};
&vcc3v3_btreg {
- enable-gpios = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>;
+ gpios = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>;
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
index 541dca12bf1a..046dbe329017 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
@@ -43,7 +43,7 @@
sdio_pwrseq: sdio-pwrseq {
compatible = "mmc-pwrseq-simple";
clocks = <&rk808 1>;
- clock-names = "lpo";
+ clock-names = "ext_clock";
pinctrl-names = "default";
pinctrl-0 = <&wifi_enable_h>;
reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi
index a48351471764..e7ba477e75f9 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi
@@ -775,7 +775,7 @@
rockchip,default-sample-phase = <90>;
status = "okay";
- sdio-wifi@1 {
+ wifi@1 {
compatible = "brcm,bcm4329-fmac";
reg = <1>;
interrupt-parent = <&gpio2>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts b/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts
index 7bd32d230ad2..b80d628c426b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts
@@ -619,6 +619,8 @@
bus-width = <8>;
max-frequency = <200000000>;
non-removable;
+ pinctrl-names = "default";
+ pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd &emmc_datastrobe>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
index 828bde7fab68..314067ba6f3c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
@@ -610,7 +610,7 @@
reg = <0x51>;
clock-output-names = "hym8563";
interrupt-parent = <&gpio0>;
- interrupts = <RK_PB0 IRQ_TYPE_LEVEL_LOW>;
+ interrupts = <RK_PA0 IRQ_TYPE_LEVEL_LOW>;
pinctrl-names = "default";
pinctrl-0 = <&hym8563_int>;
wakeup-source;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi
index 1af0a30866f6..af431fdcbea7 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi
@@ -222,6 +222,10 @@
compatible = "realtek,rt5616";
reg = <0x1b>;
#sound-dai-cells = <0>;
+ assigned-clocks = <&cru I2S0_8CH_MCLKOUT>;
+ assigned-clock-rates = <12288000>;
+ clocks = <&cru I2S0_8CH_MCLKOUT>;
+ clock-names = "mclk";
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
index 711ac4f2c7cb..60ad272982ad 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
@@ -214,6 +214,8 @@
};
&package_thermal {
+ polling-delay = <1000>;
+
trips {
package_active1: trip-active1 {
temperature = <45000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi
index bce72bac4503..3045cb3bd68c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi
@@ -11,20 +11,15 @@
compatible = "operating-points-v2";
opp-shared;
- opp-1416000000 {
- opp-hz = /bits/ 64 <1416000000>;
+ opp-1200000000 {
+ opp-hz = /bits/ 64 <1200000000>;
opp-microvolt = <750000 750000 950000>;
clock-latency-ns = <40000>;
opp-suspend;
};
- opp-1608000000 {
- opp-hz = /bits/ 64 <1608000000>;
- opp-microvolt = <887500 887500 950000>;
- clock-latency-ns = <40000>;
- };
- opp-1704000000 {
- opp-hz = /bits/ 64 <1704000000>;
- opp-microvolt = <937500 937500 950000>;
+ opp-1296000000 {
+ opp-hz = /bits/ 64 <1296000000>;
+ opp-microvolt = <775000 775000 950000>;
clock-latency-ns = <40000>;
};
};
@@ -33,9 +28,14 @@
compatible = "operating-points-v2";
opp-shared;
+ opp-1200000000{
+ opp-hz = /bits/ 64 <1200000000>;
+ opp-microvolt = <750000 750000 950000>;
+ clock-latency-ns = <40000>;
+ };
opp-1416000000 {
opp-hz = /bits/ 64 <1416000000>;
- opp-microvolt = <750000 750000 950000>;
+ opp-microvolt = <762500 762500 950000>;
clock-latency-ns = <40000>;
};
opp-1608000000 {
@@ -43,25 +43,20 @@
opp-microvolt = <787500 787500 950000>;
clock-latency-ns = <40000>;
};
- opp-1800000000 {
- opp-hz = /bits/ 64 <1800000000>;
- opp-microvolt = <875000 875000 950000>;
- clock-latency-ns = <40000>;
- };
- opp-2016000000 {
- opp-hz = /bits/ 64 <2016000000>;
- opp-microvolt = <950000 950000 950000>;
- clock-latency-ns = <40000>;
- };
};
cluster2_opp_table: opp-table-cluster2 {
compatible = "operating-points-v2";
opp-shared;
+ opp-1200000000{
+ opp-hz = /bits/ 64 <1200000000>;
+ opp-microvolt = <750000 750000 950000>;
+ clock-latency-ns = <40000>;
+ };
opp-1416000000 {
opp-hz = /bits/ 64 <1416000000>;
- opp-microvolt = <750000 750000 950000>;
+ opp-microvolt = <762500 762500 950000>;
clock-latency-ns = <40000>;
};
opp-1608000000 {
@@ -69,16 +64,6 @@
opp-microvolt = <787500 787500 950000>;
clock-latency-ns = <40000>;
};
- opp-1800000000 {
- opp-hz = /bits/ 64 <1800000000>;
- opp-microvolt = <875000 875000 950000>;
- clock-latency-ns = <40000>;
- };
- opp-2016000000 {
- opp-hz = /bits/ 64 <2016000000>;
- opp-microvolt = <950000 950000 950000>;
- clock-latency-ns = <40000>;
- };
};
gpu_opp_table: opp-table {
@@ -104,10 +89,6 @@
opp-hz = /bits/ 64 <700000000>;
opp-microvolt = <750000 750000 850000>;
};
- opp-850000000 {
- opp-hz = /bits/ 64 <800000000>;
- opp-microvolt = <787500 787500 850000>;
- };
};
};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 5bb8f09422a2..a61154545c89 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1010,6 +1010,7 @@ CONFIG_SND_SOC_ROCKCHIP_RT5645=m
CONFIG_SND_SOC_RK3399_GRU_SOUND=m
CONFIG_SND_SOC_SAMSUNG=y
CONFIG_SND_SOC_RCAR=m
+CONFIG_SND_SOC_MSIOF=m
CONFIG_SND_SOC_RZ=m
CONFIG_SND_SOC_SOF_TOPLEVEL=y
CONFIG_SND_SOC_SOF_OF=y
@@ -1474,29 +1475,6 @@ CONFIG_QCOM_WCNSS_CTRL=m
CONFIG_QCOM_APR=m
CONFIG_QCOM_ICC_BWMON=m
CONFIG_QCOM_PBS=m
-CONFIG_ARCH_R8A77995=y
-CONFIG_ARCH_R8A77990=y
-CONFIG_ARCH_R8A77951=y
-CONFIG_ARCH_R8A77965=y
-CONFIG_ARCH_R8A77960=y
-CONFIG_ARCH_R8A77961=y
-CONFIG_ARCH_R8A779F0=y
-CONFIG_ARCH_R8A77980=y
-CONFIG_ARCH_R8A77970=y
-CONFIG_ARCH_R8A779A0=y
-CONFIG_ARCH_R8A779G0=y
-CONFIG_ARCH_R8A779H0=y
-CONFIG_ARCH_R8A774C0=y
-CONFIG_ARCH_R8A774E1=y
-CONFIG_ARCH_R8A774A1=y
-CONFIG_ARCH_R8A774B1=y
-CONFIG_ARCH_R9A07G043=y
-CONFIG_ARCH_R9A07G044=y
-CONFIG_ARCH_R9A07G054=y
-CONFIG_ARCH_R9A08G045=y
-CONFIG_ARCH_R9A09G011=y
-CONFIG_ARCH_R9A09G047=y
-CONFIG_ARCH_R9A09G057=y
CONFIG_ROCKCHIP_IODOMAIN=y
CONFIG_ARCH_TEGRA_132_SOC=y
CONFIG_ARCH_TEGRA_210_SOC=y
@@ -1550,10 +1528,11 @@ CONFIG_PWM_IMX27=m
CONFIG_PWM_MESON=m
CONFIG_PWM_MTK_DISP=m
CONFIG_PWM_MEDIATEK=m
-CONFIG_PWM_RCAR=m
+CONFIG_PWM_RENESAS_RCAR=m
+CONFIG_PWM_RENESAS_RZG2L_GPT=m
+CONFIG_PWM_RENESAS_RZ_MTU3=m
CONFIG_PWM_RENESAS_TPU=m
CONFIG_PWM_ROCKCHIP=y
-CONFIG_PWM_RZ_MTU3=m
CONFIG_PWM_SAMSUNG=y
CONFIG_PWM_SL28CPLD=m
CONFIG_PWM_SUN4I=m
@@ -1729,15 +1708,14 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_SECURITY=y
CONFIG_CRYPTO_USER=y
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CHACHA20=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_ECHAINIV=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_ANSI_CPRNG=y
CONFIG_CRYPTO_USER_API_RNG=m
-CONFIG_CRYPTO_CHACHA20_NEON=m
CONFIG_CRYPTO_GHASH_ARM64_CE=y
CONFIG_CRYPTO_SHA1_ARM64_CE=y
-CONFIG_CRYPTO_SHA2_ARM64_CE=y
CONFIG_CRYPTO_SHA512_ARM64_CE=m
CONFIG_CRYPTO_SHA3_ARM64=m
CONFIG_CRYPTO_SM3_ARM64_CE=m
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 3418c8d3c78d..c44b0f202a1f 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -25,18 +25,6 @@ config CRYPTO_NHPOLY1305_NEON
Architecture: arm64 using:
- NEON (Advanced SIMD) extensions
-config CRYPTO_POLY1305_NEON
- tristate
- depends on KERNEL_MODE_NEON
- select CRYPTO_HASH
- select CRYPTO_ARCH_HAVE_LIB_POLY1305
- default CRYPTO_LIB_POLY1305_INTERNAL
- help
- Poly1305 authenticator algorithm (RFC7539)
-
- Architecture: arm64 using:
- - NEON (Advanced SIMD) extensions
-
config CRYPTO_SHA1_ARM64_CE
tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
@@ -48,25 +36,6 @@ config CRYPTO_SHA1_ARM64_CE
Architecture: arm64 using:
- ARMv8 Crypto Extensions
-config CRYPTO_SHA256_ARM64
- tristate "Hash functions: SHA-224 and SHA-256"
- select CRYPTO_HASH
- help
- SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
- Architecture: arm64
-
-config CRYPTO_SHA2_ARM64_CE
- tristate "Hash functions: SHA-224 and SHA-256 (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_HASH
- select CRYPTO_SHA256_ARM64
- help
- SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
- Architecture: arm64 using:
- - ARMv8 Crypto Extensions
-
config CRYPTO_SHA512_ARM64
tristate "Hash functions: SHA-384 and SHA-512"
select CRYPTO_HASH
@@ -101,7 +70,7 @@ config CRYPTO_SM3_NEON
tristate "Hash functions: SM3 (NEON)"
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
- select CRYPTO_SM3
+ select CRYPTO_LIB_SM3
help
SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012)
@@ -112,7 +81,7 @@ config CRYPTO_SM3_ARM64_CE
tristate "Hash functions: SM3 (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
- select CRYPTO_SM3
+ select CRYPTO_LIB_SM3
help
SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012)
@@ -143,7 +112,7 @@ config CRYPTO_AES_ARM64
config CRYPTO_AES_ARM64_CE
tristate "Ciphers: AES (ARMv8 Crypto Extensions)"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_LIB_AES
help
@@ -186,20 +155,6 @@ config CRYPTO_AES_ARM64_NEON_BLK
Architecture: arm64 using:
- NEON (Advanced SIMD) extensions
-config CRYPTO_CHACHA20_NEON
- tristate
- depends on KERNEL_MODE_NEON
- select CRYPTO_SKCIPHER
- select CRYPTO_LIB_CHACHA_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
- default CRYPTO_LIB_CHACHA_INTERNAL
- help
- Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
- stream cipher algorithms
-
- Architecture: arm64 using:
- - NEON (Advanced SIMD) extensions
-
config CRYPTO_AES_ARM64_BS
tristate "Ciphers: AES, modes: ECB/CBC/CTR/XCTR/XTS modes (bit-sliced NEON)"
depends on KERNEL_MODE_NEON
@@ -267,7 +222,7 @@ config CRYPTO_SM4_ARM64_NEON_BLK
config CRYPTO_AES_ARM64_CE_CCM
tristate "AEAD cipher: AES in CCM mode (ARMv8 Crypto Extensions)"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM64_CE
select CRYPTO_AES_ARM64_CE_BLK
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index e7139c4768ce..c231c980c514 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -8,9 +8,6 @@
obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
-obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
-sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
-
obj-$(CONFIG_CRYPTO_SHA512_ARM64_CE) += sha512-ce.o
sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o
@@ -56,19 +53,9 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o
obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
aes-neon-blk-y := aes-glue-neon.o aes-neon.o
-obj-$(CONFIG_CRYPTO_SHA256_ARM64) += sha256-arm64.o
-sha256-arm64-y := sha256-glue.o sha256-core.o
-
obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
sha512-arm64-y := sha512-glue.o sha512-core.o
-obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
-
-obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o
-poly1305-neon-y := poly1305-core.o poly1305-glue.o
-AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64
-
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
@@ -81,10 +68,7 @@ aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $(<) void $(@)
-$(obj)/%-core.S: $(src)/%-armv8.pl
- $(call cmd,perlasm)
-
-$(obj)/sha256-core.S: $(src)/sha512-armv8.pl
+$(obj)/sha512-core.S: $(src)/../lib/crypto/sha2-armv8.pl
$(call cmd,perlasm)
-clean-files += poly1305-core.S sha256-core.S sha512-core.S
+clean-files += sha512-core.S
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index b0150999743f..81560f722b9d 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -5,19 +5,20 @@
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
-#include <asm/neon.h>
#include <asm/hwcap.h>
-#include <asm/simd.h>
+#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/ctr.h>
-#include <crypto/sha2.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
+#include <crypto/sha2.h>
+#include <crypto/utils.h>
#include <crypto/xts.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
#include "aes-ce-setkey.h"
@@ -130,7 +131,6 @@ struct mac_tfm_ctx {
};
struct mac_desc_ctx {
- unsigned int len;
u8 dg[AES_BLOCK_SIZE];
};
@@ -869,109 +869,64 @@ static int mac_init(struct shash_desc *desc)
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
memset(ctx->dg, 0, AES_BLOCK_SIZE);
- ctx->len = 0;
-
return 0;
}
static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
- u8 dg[], int enc_before, int enc_after)
+ u8 dg[], int enc_before)
{
int rounds = 6 + ctx->key_length / 4;
+ int rem;
- if (crypto_simd_usable()) {
- int rem;
-
- do {
- kernel_neon_begin();
- rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
- dg, enc_before, enc_after);
- kernel_neon_end();
- in += (blocks - rem) * AES_BLOCK_SIZE;
- blocks = rem;
- enc_before = 0;
- } while (blocks);
- } else {
- if (enc_before)
- aes_encrypt(ctx, dg, dg);
-
- while (blocks--) {
- crypto_xor(dg, in, AES_BLOCK_SIZE);
- in += AES_BLOCK_SIZE;
-
- if (blocks || enc_after)
- aes_encrypt(ctx, dg, dg);
- }
- }
+ do {
+ kernel_neon_begin();
+ rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
+ dg, enc_before, !enc_before);
+ kernel_neon_end();
+ in += (blocks - rem) * AES_BLOCK_SIZE;
+ blocks = rem;
+ } while (blocks);
}
static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
+ int blocks = len / AES_BLOCK_SIZE;
- while (len > 0) {
- unsigned int l;
-
- if ((ctx->len % AES_BLOCK_SIZE) == 0 &&
- (ctx->len + len) > AES_BLOCK_SIZE) {
-
- int blocks = len / AES_BLOCK_SIZE;
-
- len %= AES_BLOCK_SIZE;
-
- mac_do_update(&tctx->key, p, blocks, ctx->dg,
- (ctx->len != 0), (len != 0));
-
- p += blocks * AES_BLOCK_SIZE;
-
- if (!len) {
- ctx->len = AES_BLOCK_SIZE;
- break;
- }
- ctx->len = 0;
- }
-
- l = min(len, AES_BLOCK_SIZE - ctx->len);
-
- if (l <= AES_BLOCK_SIZE) {
- crypto_xor(ctx->dg + ctx->len, p, l);
- ctx->len += l;
- len -= l;
- p += l;
- }
- }
-
- return 0;
+ len %= AES_BLOCK_SIZE;
+ mac_do_update(&tctx->key, p, blocks, ctx->dg, 0);
+ return len;
}
-static int cbcmac_final(struct shash_desc *desc, u8 *out)
+static int cbcmac_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
- mac_do_update(&tctx->key, NULL, 0, ctx->dg, (ctx->len != 0), 0);
-
+ if (len) {
+ crypto_xor(ctx->dg, src, len);
+ mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1);
+ }
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
-
return 0;
}
-static int cmac_final(struct shash_desc *desc, u8 *out)
+static int cmac_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
+ u8 *out)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
u8 *consts = tctx->consts;
- if (ctx->len != AES_BLOCK_SIZE) {
- ctx->dg[ctx->len] ^= 0x80;
+ crypto_xor(ctx->dg, src, len);
+ if (len != AES_BLOCK_SIZE) {
+ ctx->dg[len] ^= 0x80;
consts += AES_BLOCK_SIZE;
}
-
- mac_do_update(&tctx->key, consts, 1, ctx->dg, 0, 1);
-
+ mac_do_update(&tctx->key, consts, 1, ctx->dg, 0);
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
-
return 0;
}
@@ -979,6 +934,8 @@ static struct shash_alg mac_algs[] = { {
.base.cra_name = "cmac(aes)",
.base.cra_driver_name = "cmac-aes-" MODE,
.base.cra_priority = PRIO,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINAL_NONZERO,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
2 * AES_BLOCK_SIZE,
@@ -987,13 +944,15 @@ static struct shash_alg mac_algs[] = { {
.digestsize = AES_BLOCK_SIZE,
.init = mac_init,
.update = mac_update,
- .final = cmac_final,
+ .finup = cmac_finup,
.setkey = cmac_setkey,
.descsize = sizeof(struct mac_desc_ctx),
}, {
.base.cra_name = "xcbc(aes)",
.base.cra_driver_name = "xcbc-aes-" MODE,
.base.cra_priority = PRIO,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINAL_NONZERO,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
2 * AES_BLOCK_SIZE,
@@ -1002,21 +961,22 @@ static struct shash_alg mac_algs[] = { {
.digestsize = AES_BLOCK_SIZE,
.init = mac_init,
.update = mac_update,
- .final = cmac_final,
+ .finup = cmac_finup,
.setkey = xcbc_setkey,
.descsize = sizeof(struct mac_desc_ctx),
}, {
.base.cra_name = "cbcmac(aes)",
.base.cra_driver_name = "cbcmac-aes-" MODE,
.base.cra_priority = PRIO,
- .base.cra_blocksize = 1,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx),
.base.cra_module = THIS_MODULE,
.digestsize = AES_BLOCK_SIZE,
.init = mac_init,
.update = mac_update,
- .final = cbcmac_final,
+ .finup = cbcmac_finup,
.setkey = cbcmac_setkey,
.descsize = sizeof(struct mac_desc_ctx),
} };
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
deleted file mode 100644
index 229876acfc58..000000000000
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
- * including ChaCha20 (RFC7539)
- *
- * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/jump_label.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src,
- int nrounds);
-asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
- int nrounds, int bytes);
-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
-
-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
- int bytes, int nrounds)
-{
- while (bytes > 0) {
- int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
-
- if (l <= CHACHA_BLOCK_SIZE) {
- u8 buf[CHACHA_BLOCK_SIZE];
-
- memcpy(buf, src, l);
- chacha_block_xor_neon(state, buf, buf, nrounds);
- memcpy(dst, buf, l);
- state[12] += 1;
- break;
- }
- chacha_4block_xor_neon(state, dst, src, nrounds, l);
- bytes -= l;
- src += l;
- dst += l;
- state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
- }
-}
-
-void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
-{
- if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
- hchacha_block_generic(state, stream, nrounds);
- } else {
- kernel_neon_begin();
- hchacha_block_neon(state, stream, nrounds);
- kernel_neon_end();
- }
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
- int nrounds)
-{
- if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
- !crypto_simd_usable())
- return chacha_crypt_generic(state, dst, src, bytes, nrounds);
-
- do {
- unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
-
- kernel_neon_begin();
- chacha_doneon(state, dst, src, todo, nrounds);
- kernel_neon_end();
-
- bytes -= todo;
- src += todo;
- dst += todo;
- } while (bytes);
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-static int chacha_neon_stream_xor(struct skcipher_request *req,
- const struct chacha_ctx *ctx, const u8 *iv)
-{
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- chacha_init(state, ctx->key, iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = rounddown(nbytes, walk.stride);
-
- if (!static_branch_likely(&have_neon) ||
- !crypto_simd_usable()) {
- chacha_crypt_generic(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes,
- ctx->nrounds);
- } else {
- kernel_neon_begin();
- chacha_doneon(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes, ctx->nrounds);
- kernel_neon_end();
- }
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static int chacha_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return chacha_neon_stream_xor(req, ctx, req->iv);
-}
-
-static int xchacha_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct chacha_ctx subctx;
- u32 state[16];
- u8 real_iv[16];
-
- chacha_init(state, ctx->key, req->iv);
- hchacha_block_arch(state, subctx.key, ctx->nrounds);
- subctx.nrounds = ctx->nrounds;
-
- memcpy(&real_iv[0], req->iv + 24, 8);
- memcpy(&real_iv[8], req->iv + 16, 8);
- return chacha_neon_stream_xor(req, &subctx, real_iv);
-}
-
-static struct skcipher_alg algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = chacha_neon,
- .decrypt = chacha_neon,
- }, {
- .base.cra_name = "xchacha20",
- .base.cra_driver_name = "xchacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = xchacha_neon,
- .decrypt = xchacha_neon,
- }, {
- .base.cra_name = "xchacha12",
- .base.cra_driver_name = "xchacha12-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = chacha12_setkey,
- .encrypt = xchacha_neon,
- .decrypt = xchacha_neon,
- }
-};
-
-static int __init chacha_simd_mod_init(void)
-{
- if (!cpu_have_named_feature(ASIMD))
- return 0;
-
- static_branch_enable(&have_neon);
-
- return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
- crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
-}
-
-static void __exit chacha_simd_mod_fini(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && cpu_have_named_feature(ASIMD))
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-module_init(chacha_simd_mod_init);
-module_exit(chacha_simd_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha20");
-MODULE_ALIAS_CRYPTO("xchacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha12");
-MODULE_ALIAS_CRYPTO("xchacha12-neon");
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 071e122f9c37..4995b6e22335 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -6,30 +6,27 @@
*/
#include <asm/neon.h>
-#include <asm/simd.h>
-#include <linux/unaligned.h>
#include <crypto/aes.h>
-#include <crypto/gcm.h>
-#include <crypto/algapi.h>
#include <crypto/b128ops.h>
+#include <crypto/gcm.h>
+#include <crypto/ghash.h>
#include <crypto/gf128mul.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/cpufeature.h>
-#include <linux/crypto.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("ghash");
-#define GHASH_BLOCK_SIZE 16
-#define GHASH_DIGEST_SIZE 16
-
#define RFC4106_NONCE_SIZE 4
struct ghash_key {
@@ -37,10 +34,8 @@ struct ghash_key {
u64 h[][2];
};
-struct ghash_desc_ctx {
+struct arm_ghash_desc_ctx {
u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
- u8 buf[GHASH_BLOCK_SIZE];
- u32 count;
};
struct gcm_aes_ctx {
@@ -65,36 +60,12 @@ asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
static int ghash_init(struct shash_desc *desc)
{
- struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
- *ctx = (struct ghash_desc_ctx){};
+ *ctx = (struct arm_ghash_desc_ctx){};
return 0;
}
-static void ghash_do_update(int blocks, u64 dg[], const char *src,
- struct ghash_key *key, const char *head)
-{
- be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
-
- do {
- const u8 *in = src;
-
- if (head) {
- in = head;
- blocks++;
- head = NULL;
- } else {
- src += GHASH_BLOCK_SIZE;
- }
-
- crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
- gf128mul_lle(&dst, &key->k);
- } while (--blocks);
-
- dg[0] = be64_to_cpu(dst.b);
- dg[1] = be64_to_cpu(dst.a);
-}
-
static __always_inline
void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
struct ghash_key *key, const char *head,
@@ -103,13 +74,9 @@ void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
u64 const h[][2],
const char *head))
{
- if (likely(crypto_simd_usable())) {
- kernel_neon_begin();
- simd_update(blocks, dg, src, key->h, head);
- kernel_neon_end();
- } else {
- ghash_do_update(blocks, dg, src, key, head);
- }
+ kernel_neon_begin();
+ simd_update(blocks, dg, src, key->h, head);
+ kernel_neon_end();
}
/* avoid hogging the CPU for too long */
@@ -118,61 +85,59 @@ void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
static int ghash_update(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
- struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
- unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+ struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+ int blocks;
- ctx->count += len;
+ blocks = len / GHASH_BLOCK_SIZE;
+ len -= blocks * GHASH_BLOCK_SIZE;
- if ((partial + len) >= GHASH_BLOCK_SIZE) {
- struct ghash_key *key = crypto_shash_ctx(desc->tfm);
- int blocks;
-
- if (partial) {
- int p = GHASH_BLOCK_SIZE - partial;
+ do {
+ int chunk = min(blocks, MAX_BLOCKS);
- memcpy(ctx->buf + partial, src, p);
- src += p;
- len -= p;
- }
+ ghash_do_simd_update(chunk, ctx->digest, src, key, NULL,
+ pmull_ghash_update_p8);
+ blocks -= chunk;
+ src += chunk * GHASH_BLOCK_SIZE;
+ } while (unlikely(blocks > 0));
+ return len;
+}
- blocks = len / GHASH_BLOCK_SIZE;
- len %= GHASH_BLOCK_SIZE;
+static int ghash_export(struct shash_desc *desc, void *out)
+{
+ struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ u8 *dst = out;
- do {
- int chunk = min(blocks, MAX_BLOCKS);
+ put_unaligned_be64(ctx->digest[1], dst);
+ put_unaligned_be64(ctx->digest[0], dst + 8);
+ return 0;
+}
- ghash_do_simd_update(chunk, ctx->digest, src, key,
- partial ? ctx->buf : NULL,
- pmull_ghash_update_p8);
+static int ghash_import(struct shash_desc *desc, const void *in)
+{
+ struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ const u8 *src = in;
- blocks -= chunk;
- src += chunk * GHASH_BLOCK_SIZE;
- partial = 0;
- } while (unlikely(blocks > 0));
- }
- if (len)
- memcpy(ctx->buf + partial, src, len);
+ ctx->digest[1] = get_unaligned_be64(src);
+ ctx->digest[0] = get_unaligned_be64(src + 8);
return 0;
}
-static int ghash_final(struct shash_desc *desc, u8 *dst)
+static int ghash_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *dst)
{
- struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
- unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
-
- if (partial) {
- struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+ struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
- memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+ if (len) {
+ u8 buf[GHASH_BLOCK_SIZE] = {};
- ghash_do_simd_update(1, ctx->digest, ctx->buf, key, NULL,
+ memcpy(buf, src, len);
+ ghash_do_simd_update(1, ctx->digest, src, key, NULL,
pmull_ghash_update_p8);
+ memzero_explicit(buf, sizeof(buf));
}
- put_unaligned_be64(ctx->digest[1], dst);
- put_unaligned_be64(ctx->digest[0], dst + 8);
-
- memzero_explicit(ctx, sizeof(*ctx));
- return 0;
+ return ghash_export(desc, dst);
}
static void ghash_reflect(u64 h[], const be128 *k)
@@ -205,6 +170,7 @@ static struct shash_alg ghash_alg = {
.base.cra_name = "ghash",
.base.cra_driver_name = "ghash-neon",
.base.cra_priority = 150,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.base.cra_blocksize = GHASH_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
@@ -212,9 +178,12 @@ static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
- .final = ghash_final,
+ .finup = ghash_finup,
.setkey = ghash_setkey,
- .descsize = sizeof(struct ghash_desc_ctx),
+ .export = ghash_export,
+ .import = ghash_import,
+ .descsize = sizeof(struct arm_ghash_desc_ctx),
+ .statesize = sizeof(struct ghash_desc_ctx),
};
static int num_rounds(struct crypto_aes_ctx *ctx)
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
deleted file mode 100644
index 18883ea438f3..000000000000
--- a/arch/arm64/crypto/poly1305-glue.c
+++ /dev/null
@@ -1,232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
- *
- * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <linux/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/poly1305.h>
-#include <crypto/internal/simd.h>
-#include <linux/cpufeature.h>
-#include <linux/crypto.h>
-#include <linux/jump_label.h>
-#include <linux/module.h>
-
-asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
-asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
-asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
-asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
-
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
-{
- poly1305_init_arm64(&dctx->h, key);
- dctx->s[0] = get_unaligned_le32(key + 16);
- dctx->s[1] = get_unaligned_le32(key + 20);
- dctx->s[2] = get_unaligned_le32(key + 24);
- dctx->s[3] = get_unaligned_le32(key + 28);
- dctx->buflen = 0;
-}
-EXPORT_SYMBOL(poly1305_init_arch);
-
-static int neon_poly1305_init(struct shash_desc *desc)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- dctx->buflen = 0;
- dctx->rset = 0;
- dctx->sset = false;
-
- return 0;
-}
-
-static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
- u32 len, u32 hibit, bool do_neon)
-{
- if (unlikely(!dctx->sset)) {
- if (!dctx->rset) {
- poly1305_init_arm64(&dctx->h, src);
- src += POLY1305_BLOCK_SIZE;
- len -= POLY1305_BLOCK_SIZE;
- dctx->rset = 1;
- }
- if (len >= POLY1305_BLOCK_SIZE) {
- dctx->s[0] = get_unaligned_le32(src + 0);
- dctx->s[1] = get_unaligned_le32(src + 4);
- dctx->s[2] = get_unaligned_le32(src + 8);
- dctx->s[3] = get_unaligned_le32(src + 12);
- src += POLY1305_BLOCK_SIZE;
- len -= POLY1305_BLOCK_SIZE;
- dctx->sset = true;
- }
- if (len < POLY1305_BLOCK_SIZE)
- return;
- }
-
- len &= ~(POLY1305_BLOCK_SIZE - 1);
-
- if (static_branch_likely(&have_neon) && likely(do_neon))
- poly1305_blocks_neon(&dctx->h, src, len, hibit);
- else
- poly1305_blocks(&dctx->h, src, len, hibit);
-}
-
-static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
- const u8 *src, u32 len, bool do_neon)
-{
- if (unlikely(dctx->buflen)) {
- u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
-
- memcpy(dctx->buf + dctx->buflen, src, bytes);
- src += bytes;
- len -= bytes;
- dctx->buflen += bytes;
-
- if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- neon_poly1305_blocks(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE, 1, false);
- dctx->buflen = 0;
- }
- }
-
- if (likely(len >= POLY1305_BLOCK_SIZE)) {
- neon_poly1305_blocks(dctx, src, len, 1, do_neon);
- src += round_down(len, POLY1305_BLOCK_SIZE);
- len %= POLY1305_BLOCK_SIZE;
- }
-
- if (unlikely(len)) {
- dctx->buflen = len;
- memcpy(dctx->buf, src, len);
- }
-}
-
-static int neon_poly1305_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- bool do_neon = crypto_simd_usable() && srclen > 128;
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (static_branch_likely(&have_neon) && do_neon)
- kernel_neon_begin();
- neon_poly1305_do_update(dctx, src, srclen, do_neon);
- if (static_branch_likely(&have_neon) && do_neon)
- kernel_neon_end();
- return 0;
-}
-
-void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
- unsigned int nbytes)
-{
- if (unlikely(dctx->buflen)) {
- u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
-
- memcpy(dctx->buf + dctx->buflen, src, bytes);
- src += bytes;
- nbytes -= bytes;
- dctx->buflen += bytes;
-
- if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
- dctx->buflen = 0;
- }
- }
-
- if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
- unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
-
- if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
- do {
- unsigned int todo = min_t(unsigned int, len, SZ_4K);
-
- kernel_neon_begin();
- poly1305_blocks_neon(&dctx->h, src, todo, 1);
- kernel_neon_end();
-
- len -= todo;
- src += todo;
- } while (len);
- } else {
- poly1305_blocks(&dctx->h, src, len, 1);
- src += len;
- }
- nbytes %= POLY1305_BLOCK_SIZE;
- }
-
- if (unlikely(nbytes)) {
- dctx->buflen = nbytes;
- memcpy(dctx->buf, src, nbytes);
- }
-}
-EXPORT_SYMBOL(poly1305_update_arch);
-
-void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
-{
- if (unlikely(dctx->buflen)) {
- dctx->buf[dctx->buflen++] = 1;
- memset(dctx->buf + dctx->buflen, 0,
- POLY1305_BLOCK_SIZE - dctx->buflen);
- poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
- }
-
- poly1305_emit(&dctx->h, dst, dctx->s);
- memzero_explicit(dctx, sizeof(*dctx));
-}
-EXPORT_SYMBOL(poly1305_final_arch);
-
-static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (unlikely(!dctx->sset))
- return -ENOKEY;
-
- poly1305_final_arch(dctx, dst);
- return 0;
-}
-
-static struct shash_alg neon_poly1305_alg = {
- .init = neon_poly1305_init,
- .update = neon_poly1305_update,
- .final = neon_poly1305_final,
- .digestsize = POLY1305_DIGEST_SIZE,
- .descsize = sizeof(struct poly1305_desc_ctx),
-
- .base.cra_name = "poly1305",
- .base.cra_driver_name = "poly1305-neon",
- .base.cra_priority = 200,
- .base.cra_blocksize = POLY1305_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-};
-
-static int __init neon_poly1305_mod_init(void)
-{
- if (!cpu_have_named_feature(ASIMD))
- return 0;
-
- static_branch_enable(&have_neon);
-
- return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
- crypto_register_shash(&neon_poly1305_alg) : 0;
-}
-
-static void __exit neon_poly1305_mod_exit(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
- crypto_unregister_shash(&neon_poly1305_alg);
-}
-
-module_init(neon_poly1305_mod_init);
-module_exit(neon_poly1305_mod_exit);
-
-MODULE_DESCRIPTION("Poly1305 transform using NEON instructions");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("poly1305");
-MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/arch/arm64/crypto/polyval-ce-glue.c b/arch/arm64/crypto/polyval-ce-glue.c
index 0a3b5718df85..c4e653688ea0 100644
--- a/arch/arm64/crypto/polyval-ce-glue.c
+++ b/arch/arm64/crypto/polyval-ce-glue.c
@@ -15,17 +15,15 @@
* ARMv8 Crypto Extensions instructions to implement the finite field operations.
*/
-#include <crypto/algapi.h>
+#include <asm/neon.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <crypto/polyval.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
+#include <crypto/utils.h>
+#include <linux/cpufeature.h>
+#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
+#include <linux/string.h>
#define NUM_KEY_POWERS 8
@@ -38,7 +36,6 @@ struct polyval_tfm_ctx {
struct polyval_desc_ctx {
u8 buffer[POLYVAL_BLOCK_SIZE];
- u32 bytes;
};
asmlinkage void pmull_polyval_update(const struct polyval_tfm_ctx *keys,
@@ -48,25 +45,16 @@ asmlinkage void pmull_polyval_mul(u8 *op1, const u8 *op2);
static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
const u8 *in, size_t nblocks, u8 *accumulator)
{
- if (likely(crypto_simd_usable())) {
- kernel_neon_begin();
- pmull_polyval_update(keys, in, nblocks, accumulator);
- kernel_neon_end();
- } else {
- polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
- nblocks, accumulator);
- }
+ kernel_neon_begin();
+ pmull_polyval_update(keys, in, nblocks, accumulator);
+ kernel_neon_end();
}
static void internal_polyval_mul(u8 *op1, const u8 *op2)
{
- if (likely(crypto_simd_usable())) {
- kernel_neon_begin();
- pmull_polyval_mul(op1, op2);
- kernel_neon_end();
- } else {
- polyval_mul_non4k(op1, op2);
- }
+ kernel_neon_begin();
+ pmull_polyval_mul(op1, op2);
+ kernel_neon_end();
}
static int polyval_arm64_setkey(struct crypto_shash *tfm,
@@ -103,49 +91,27 @@ static int polyval_arm64_update(struct shash_desc *desc,
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
- u8 *pos;
unsigned int nblocks;
- unsigned int n;
-
- if (dctx->bytes) {
- n = min(srclen, dctx->bytes);
- pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
-
- dctx->bytes -= n;
- srclen -= n;
- while (n--)
- *pos++ ^= *src++;
-
- if (!dctx->bytes)
- internal_polyval_mul(dctx->buffer,
- tctx->key_powers[NUM_KEY_POWERS-1]);
- }
-
- while (srclen >= POLYVAL_BLOCK_SIZE) {
+ do {
/* allow rescheduling every 4K bytes */
nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
internal_polyval_update(tctx, src, nblocks, dctx->buffer);
srclen -= nblocks * POLYVAL_BLOCK_SIZE;
src += nblocks * POLYVAL_BLOCK_SIZE;
- }
+ } while (srclen >= POLYVAL_BLOCK_SIZE);
- if (srclen) {
- dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
- pos = dctx->buffer;
- while (srclen--)
- *pos++ ^= *src++;
- }
-
- return 0;
+ return srclen;
}
-static int polyval_arm64_final(struct shash_desc *desc, u8 *dst)
+static int polyval_arm64_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *dst)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
- if (dctx->bytes) {
+ if (len) {
+ crypto_xor(dctx->buffer, src, len);
internal_polyval_mul(dctx->buffer,
tctx->key_powers[NUM_KEY_POWERS-1]);
}
@@ -159,13 +125,14 @@ static struct shash_alg polyval_alg = {
.digestsize = POLYVAL_DIGEST_SIZE,
.init = polyval_arm64_init,
.update = polyval_arm64_update,
- .final = polyval_arm64_final,
+ .finup = polyval_arm64_finup,
.setkey = polyval_arm64_setkey,
.descsize = sizeof(struct polyval_desc_ctx),
.base = {
.cra_name = "polyval",
.cra_driver_name = "polyval-ce",
.cra_priority = 200,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = POLYVAL_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct polyval_tfm_ctx),
.cra_module = THIS_MODULE,
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index cbd14f208f83..65b6980817e5 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -7,14 +7,14 @@
#include <asm/neon.h>
#include <asm/simd.h>
-#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
#include <linux/cpufeature.h>
-#include <linux/crypto.h>
+#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/string.h>
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
@@ -56,79 +56,49 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
{
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
- if (!crypto_simd_usable())
- return crypto_sha1_update(desc, data, len);
-
sctx->finalize = 0;
- sha1_base_do_update(desc, data, len, sha1_ce_transform);
-
- return 0;
+ return sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform);
}
static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
- bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE) && len;
-
- if (!crypto_simd_usable())
- return crypto_sha1_finup(desc, data, len, out);
+ bool finalized = false;
/*
* Allow the asm code to perform the finalization if there is no
* partial data and the input is a round multiple of the block size.
*/
- sctx->finalize = finalize;
-
- sha1_base_do_update(desc, data, len, sha1_ce_transform);
- if (!finalize)
- sha1_base_do_finalize(desc, sha1_ce_transform);
- return sha1_base_finish(desc, out);
-}
-
-static int sha1_ce_final(struct shash_desc *desc, u8 *out)
-{
- struct sha1_ce_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable())
- return crypto_sha1_finup(desc, NULL, 0, out);
-
- sctx->finalize = 0;
- sha1_base_do_finalize(desc, sha1_ce_transform);
+ if (len >= SHA1_BLOCK_SIZE) {
+ unsigned int remain = len - round_down(len, SHA1_BLOCK_SIZE);
+
+ finalized = !remain;
+ sctx->finalize = finalized;
+ sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform);
+ data += len - remain;
+ len = remain;
+ }
+ if (!finalized) {
+ sctx->finalize = 0;
+ sha1_base_do_finup(desc, data, len, sha1_ce_transform);
+ }
return sha1_base_finish(desc, out);
}
-static int sha1_ce_export(struct shash_desc *desc, void *out)
-{
- struct sha1_ce_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, &sctx->sst, sizeof(struct sha1_state));
- return 0;
-}
-
-static int sha1_ce_import(struct shash_desc *desc, const void *in)
-{
- struct sha1_ce_state *sctx = shash_desc_ctx(desc);
-
- memcpy(&sctx->sst, in, sizeof(struct sha1_state));
- sctx->finalize = 0;
- return 0;
-}
-
static struct shash_alg alg = {
.init = sha1_base_init,
.update = sha1_ce_update,
- .final = sha1_ce_final,
.finup = sha1_ce_finup,
- .import = sha1_ce_import,
- .export = sha1_ce_export,
.descsize = sizeof(struct sha1_ce_state),
- .statesize = sizeof(struct sha1_state),
+ .statesize = SHA1_STATE_SIZE,
.digestsize = SHA1_DIGEST_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ce",
.cra_priority = 200,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
deleted file mode 100644
index 6b4866a88ded..000000000000
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ /dev/null
@@ -1,192 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
- *
- * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <linux/unaligned.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <crypto/sha2.h>
-#include <crypto/sha256_base.h>
-#include <linux/cpufeature.h>
-#include <linux/crypto.h>
-#include <linux/module.h>
-
-MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_ALIAS_CRYPTO("sha256");
-
-struct sha256_ce_state {
- struct sha256_state sst;
- u32 finalize;
-};
-
-extern const u32 sha256_ce_offsetof_count;
-extern const u32 sha256_ce_offsetof_finalize;
-
-asmlinkage int __sha256_ce_transform(struct sha256_ce_state *sst, u8 const *src,
- int blocks);
-
-static void sha256_ce_transform(struct sha256_state *sst, u8 const *src,
- int blocks)
-{
- while (blocks) {
- int rem;
-
- kernel_neon_begin();
- rem = __sha256_ce_transform(container_of(sst,
- struct sha256_ce_state,
- sst), src, blocks);
- kernel_neon_end();
- src += (blocks - rem) * SHA256_BLOCK_SIZE;
- blocks = rem;
- }
-}
-
-const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
- sst.count);
-const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
- finalize);
-
-asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks);
-
-static void sha256_arm64_transform(struct sha256_state *sst, u8 const *src,
- int blocks)
-{
- sha256_block_data_order(sst->state, src, blocks);
-}
-
-static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha256_ce_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable())
- return sha256_base_do_update(desc, data, len,
- sha256_arm64_transform);
-
- sctx->finalize = 0;
- sha256_base_do_update(desc, data, len, sha256_ce_transform);
-
- return 0;
-}
-
-static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct sha256_ce_state *sctx = shash_desc_ctx(desc);
- bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE) && len;
-
- if (!crypto_simd_usable()) {
- if (len)
- sha256_base_do_update(desc, data, len,
- sha256_arm64_transform);
- sha256_base_do_finalize(desc, sha256_arm64_transform);
- return sha256_base_finish(desc, out);
- }
-
- /*
- * Allow the asm code to perform the finalization if there is no
- * partial data and the input is a round multiple of the block size.
- */
- sctx->finalize = finalize;
-
- sha256_base_do_update(desc, data, len, sha256_ce_transform);
- if (!finalize)
- sha256_base_do_finalize(desc, sha256_ce_transform);
- return sha256_base_finish(desc, out);
-}
-
-static int sha256_ce_final(struct shash_desc *desc, u8 *out)
-{
- struct sha256_ce_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable()) {
- sha256_base_do_finalize(desc, sha256_arm64_transform);
- return sha256_base_finish(desc, out);
- }
-
- sctx->finalize = 0;
- sha256_base_do_finalize(desc, sha256_ce_transform);
- return sha256_base_finish(desc, out);
-}
-
-static int sha256_ce_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- sha256_base_init(desc);
- return sha256_ce_finup(desc, data, len, out);
-}
-
-static int sha256_ce_export(struct shash_desc *desc, void *out)
-{
- struct sha256_ce_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, &sctx->sst, sizeof(struct sha256_state));
- return 0;
-}
-
-static int sha256_ce_import(struct shash_desc *desc, const void *in)
-{
- struct sha256_ce_state *sctx = shash_desc_ctx(desc);
-
- memcpy(&sctx->sst, in, sizeof(struct sha256_state));
- sctx->finalize = 0;
- return 0;
-}
-
-static struct shash_alg algs[] = { {
- .init = sha224_base_init,
- .update = sha256_ce_update,
- .final = sha256_ce_final,
- .finup = sha256_ce_finup,
- .export = sha256_ce_export,
- .import = sha256_ce_import,
- .descsize = sizeof(struct sha256_ce_state),
- .statesize = sizeof(struct sha256_state),
- .digestsize = SHA224_DIGEST_SIZE,
- .base = {
- .cra_name = "sha224",
- .cra_driver_name = "sha224-ce",
- .cra_priority = 200,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .init = sha256_base_init,
- .update = sha256_ce_update,
- .final = sha256_ce_final,
- .finup = sha256_ce_finup,
- .digest = sha256_ce_digest,
- .export = sha256_ce_export,
- .import = sha256_ce_import,
- .descsize = sizeof(struct sha256_ce_state),
- .statesize = sizeof(struct sha256_state),
- .digestsize = SHA256_DIGEST_SIZE,
- .base = {
- .cra_name = "sha256",
- .cra_driver_name = "sha256-ce",
- .cra_priority = 200,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
-
-static int __init sha2_ce_mod_init(void)
-{
- return crypto_register_shashes(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit sha2_ce_mod_fini(void)
-{
- crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-}
-
-module_cpu_feature_match(SHA2, sha2_ce_mod_init);
-module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
deleted file mode 100644
index 35356987cc1e..000000000000
--- a/arch/arm64/crypto/sha256-glue.c
+++ /dev/null
@@ -1,194 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Linux/arm64 port of the OpenSSL SHA256 implementation for AArch64
- *
- * Copyright (c) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <crypto/sha2.h>
-#include <crypto/sha256_base.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash for arm64");
-MODULE_AUTHOR("Andy Polyakov <appro@openssl.org>");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_ALIAS_CRYPTO("sha256");
-
-asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
- unsigned int num_blks);
-EXPORT_SYMBOL(sha256_block_data_order);
-
-static void sha256_arm64_transform(struct sha256_state *sst, u8 const *src,
- int blocks)
-{
- sha256_block_data_order(sst->state, src, blocks);
-}
-
-asmlinkage void sha256_block_neon(u32 *digest, const void *data,
- unsigned int num_blks);
-
-static void sha256_neon_transform(struct sha256_state *sst, u8 const *src,
- int blocks)
-{
- sha256_block_neon(sst->state, src, blocks);
-}
-
-static int crypto_sha256_arm64_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- return sha256_base_do_update(desc, data, len, sha256_arm64_transform);
-}
-
-static int crypto_sha256_arm64_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- if (len)
- sha256_base_do_update(desc, data, len, sha256_arm64_transform);
- sha256_base_do_finalize(desc, sha256_arm64_transform);
-
- return sha256_base_finish(desc, out);
-}
-
-static int crypto_sha256_arm64_final(struct shash_desc *desc, u8 *out)
-{
- return crypto_sha256_arm64_finup(desc, NULL, 0, out);
-}
-
-static struct shash_alg algs[] = { {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = crypto_sha256_arm64_update,
- .final = crypto_sha256_arm64_final,
- .finup = crypto_sha256_arm64_finup,
- .descsize = sizeof(struct sha256_state),
- .base.cra_name = "sha256",
- .base.cra_driver_name = "sha256-arm64",
- .base.cra_priority = 125,
- .base.cra_blocksize = SHA256_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-}, {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = crypto_sha256_arm64_update,
- .final = crypto_sha256_arm64_final,
- .finup = crypto_sha256_arm64_finup,
- .descsize = sizeof(struct sha256_state),
- .base.cra_name = "sha224",
- .base.cra_driver_name = "sha224-arm64",
- .base.cra_priority = 125,
- .base.cra_blocksize = SHA224_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-} };
-
-static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable())
- return sha256_base_do_update(desc, data, len,
- sha256_arm64_transform);
-
- while (len > 0) {
- unsigned int chunk = len;
-
- /*
- * Don't hog the CPU for the entire time it takes to process all
- * input when running on a preemptible kernel, but process the
- * data block by block instead.
- */
- if (IS_ENABLED(CONFIG_PREEMPTION) &&
- chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE)
- chunk = SHA256_BLOCK_SIZE -
- sctx->count % SHA256_BLOCK_SIZE;
-
- kernel_neon_begin();
- sha256_base_do_update(desc, data, chunk, sha256_neon_transform);
- kernel_neon_end();
- data += chunk;
- len -= chunk;
- }
- return 0;
-}
-
-static int sha256_finup_neon(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- if (!crypto_simd_usable()) {
- if (len)
- sha256_base_do_update(desc, data, len,
- sha256_arm64_transform);
- sha256_base_do_finalize(desc, sha256_arm64_transform);
- } else {
- if (len)
- sha256_update_neon(desc, data, len);
- kernel_neon_begin();
- sha256_base_do_finalize(desc, sha256_neon_transform);
- kernel_neon_end();
- }
- return sha256_base_finish(desc, out);
-}
-
-static int sha256_final_neon(struct shash_desc *desc, u8 *out)
-{
- return sha256_finup_neon(desc, NULL, 0, out);
-}
-
-static struct shash_alg neon_algs[] = { {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = sha256_update_neon,
- .final = sha256_final_neon,
- .finup = sha256_finup_neon,
- .descsize = sizeof(struct sha256_state),
- .base.cra_name = "sha256",
- .base.cra_driver_name = "sha256-arm64-neon",
- .base.cra_priority = 150,
- .base.cra_blocksize = SHA256_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-}, {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = sha256_update_neon,
- .final = sha256_final_neon,
- .finup = sha256_finup_neon,
- .descsize = sizeof(struct sha256_state),
- .base.cra_name = "sha224",
- .base.cra_driver_name = "sha224-arm64-neon",
- .base.cra_priority = 150,
- .base.cra_blocksize = SHA224_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-} };
-
-static int __init sha256_mod_init(void)
-{
- int ret = crypto_register_shashes(algs, ARRAY_SIZE(algs));
- if (ret)
- return ret;
-
- if (cpu_have_named_feature(ASIMD)) {
- ret = crypto_register_shashes(neon_algs, ARRAY_SIZE(neon_algs));
- if (ret)
- crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
- }
- return ret;
-}
-
-static void __exit sha256_mod_fini(void)
-{
- if (cpu_have_named_feature(ASIMD))
- crypto_unregister_shashes(neon_algs, ARRAY_SIZE(neon_algs));
- crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-}
-
-module_init(sha256_mod_init);
-module_exit(sha256_mod_fini);
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index 5662c3ac49e9..b4f1001046c9 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -12,13 +12,13 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
-#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <crypto/sha3.h>
#include <linux/cpufeature.h>
-#include <linux/crypto.h>
+#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
@@ -35,74 +35,55 @@ static int sha3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha3_state *sctx = shash_desc_ctx(desc);
- unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
-
- if (!crypto_simd_usable())
- return crypto_sha3_update(desc, data, len);
-
- if ((sctx->partial + len) >= sctx->rsiz) {
- int blocks;
-
- if (sctx->partial) {
- int p = sctx->rsiz - sctx->partial;
-
- memcpy(sctx->buf + sctx->partial, data, p);
- kernel_neon_begin();
- sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size);
- kernel_neon_end();
-
- data += p;
- len -= p;
- sctx->partial = 0;
- }
-
- blocks = len / sctx->rsiz;
- len %= sctx->rsiz;
-
- while (blocks) {
- int rem;
-
- kernel_neon_begin();
- rem = sha3_ce_transform(sctx->st, data, blocks,
- digest_size);
- kernel_neon_end();
- data += (blocks - rem) * sctx->rsiz;
- blocks = rem;
- }
- }
-
- if (len) {
- memcpy(sctx->buf + sctx->partial, data, len);
- sctx->partial += len;
- }
- return 0;
+ struct crypto_shash *tfm = desc->tfm;
+ unsigned int bs, ds;
+ int blocks;
+
+ ds = crypto_shash_digestsize(tfm);
+ bs = crypto_shash_blocksize(tfm);
+ blocks = len / bs;
+ len -= blocks * bs;
+ do {
+ int rem;
+
+ kernel_neon_begin();
+ rem = sha3_ce_transform(sctx->st, data, blocks, ds);
+ kernel_neon_end();
+ data += (blocks - rem) * bs;
+ blocks = rem;
+ } while (blocks);
+ return len;
}
-static int sha3_final(struct shash_desc *desc, u8 *out)
+static int sha3_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
+ u8 *out)
{
struct sha3_state *sctx = shash_desc_ctx(desc);
- unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
+ struct crypto_shash *tfm = desc->tfm;
__le64 *digest = (__le64 *)out;
+ u8 block[SHA3_224_BLOCK_SIZE];
+ unsigned int bs, ds;
int i;
- if (!crypto_simd_usable())
- return crypto_sha3_final(desc, out);
+ ds = crypto_shash_digestsize(tfm);
+ bs = crypto_shash_blocksize(tfm);
+ memcpy(block, src, len);
- sctx->buf[sctx->partial++] = 0x06;
- memset(sctx->buf + sctx->partial, 0, sctx->rsiz - sctx->partial);
- sctx->buf[sctx->rsiz - 1] |= 0x80;
+ block[len++] = 0x06;
+ memset(block + len, 0, bs - len);
+ block[bs - 1] |= 0x80;
kernel_neon_begin();
- sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size);
+ sha3_ce_transform(sctx->st, block, 1, ds);
kernel_neon_end();
+ memzero_explicit(block , sizeof(block));
- for (i = 0; i < digest_size / 8; i++)
+ for (i = 0; i < ds / 8; i++)
put_unaligned_le64(sctx->st[i], digest++);
- if (digest_size & 4)
+ if (ds & 4)
put_unaligned_le32(sctx->st[i], (__le32 *)digest);
- memzero_explicit(sctx, sizeof(*sctx));
return 0;
}
@@ -110,10 +91,11 @@ static struct shash_alg algs[] = { {
.digestsize = SHA3_224_DIGEST_SIZE,
.init = crypto_sha3_init,
.update = sha3_update,
- .final = sha3_final,
- .descsize = sizeof(struct sha3_state),
+ .finup = sha3_finup,
+ .descsize = SHA3_STATE_SIZE,
.base.cra_name = "sha3-224",
.base.cra_driver_name = "sha3-224-ce",
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.base.cra_blocksize = SHA3_224_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.base.cra_priority = 200,
@@ -121,10 +103,11 @@ static struct shash_alg algs[] = { {
.digestsize = SHA3_256_DIGEST_SIZE,
.init = crypto_sha3_init,
.update = sha3_update,
- .final = sha3_final,
- .descsize = sizeof(struct sha3_state),
+ .finup = sha3_finup,
+ .descsize = SHA3_STATE_SIZE,
.base.cra_name = "sha3-256",
.base.cra_driver_name = "sha3-256-ce",
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.base.cra_blocksize = SHA3_256_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.base.cra_priority = 200,
@@ -132,10 +115,11 @@ static struct shash_alg algs[] = { {
.digestsize = SHA3_384_DIGEST_SIZE,
.init = crypto_sha3_init,
.update = sha3_update,
- .final = sha3_final,
- .descsize = sizeof(struct sha3_state),
+ .finup = sha3_finup,
+ .descsize = SHA3_STATE_SIZE,
.base.cra_name = "sha3-384",
.base.cra_driver_name = "sha3-384-ce",
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.base.cra_blocksize = SHA3_384_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.base.cra_priority = 200,
@@ -143,10 +127,11 @@ static struct shash_alg algs[] = { {
.digestsize = SHA3_512_DIGEST_SIZE,
.init = crypto_sha3_init,
.update = sha3_update,
- .final = sha3_final,
- .descsize = sizeof(struct sha3_state),
+ .finup = sha3_finup,
+ .descsize = SHA3_STATE_SIZE,
.base.cra_name = "sha3-512",
.base.cra_driver_name = "sha3-512-ce",
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.base.cra_blocksize = SHA3_512_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.base.cra_priority = 200,
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index 071f64293227..6fb3001fa2c9 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -10,14 +10,11 @@
*/
#include <asm/neon.h>
-#include <asm/simd.h>
-#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <crypto/sha2.h>
#include <crypto/sha512_base.h>
#include <linux/cpufeature.h>
-#include <linux/crypto.h>
+#include <linux/kernel.h>
#include <linux/module.h>
MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash using ARMv8 Crypto Extensions");
@@ -29,12 +26,10 @@ MODULE_ALIAS_CRYPTO("sha512");
asmlinkage int __sha512_ce_transform(struct sha512_state *sst, u8 const *src,
int blocks);
-asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks);
-
static void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
int blocks)
{
- while (blocks) {
+ do {
int rem;
kernel_neon_begin();
@@ -42,67 +37,47 @@ static void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
kernel_neon_end();
src += (blocks - rem) * SHA512_BLOCK_SIZE;
blocks = rem;
- }
-}
-
-static void sha512_arm64_transform(struct sha512_state *sst, u8 const *src,
- int blocks)
-{
- sha512_block_data_order(sst->state, src, blocks);
+ } while (blocks);
}
static int sha512_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- sha512_block_fn *fn = crypto_simd_usable() ? sha512_ce_transform
- : sha512_arm64_transform;
-
- sha512_base_do_update(desc, data, len, fn);
- return 0;
+ return sha512_base_do_update_blocks(desc, data, len,
+ sha512_ce_transform);
}
static int sha512_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- sha512_block_fn *fn = crypto_simd_usable() ? sha512_ce_transform
- : sha512_arm64_transform;
-
- sha512_base_do_update(desc, data, len, fn);
- sha512_base_do_finalize(desc, fn);
- return sha512_base_finish(desc, out);
-}
-
-static int sha512_ce_final(struct shash_desc *desc, u8 *out)
-{
- sha512_block_fn *fn = crypto_simd_usable() ? sha512_ce_transform
- : sha512_arm64_transform;
-
- sha512_base_do_finalize(desc, fn);
+ sha512_base_do_finup(desc, data, len, sha512_ce_transform);
return sha512_base_finish(desc, out);
}
static struct shash_alg algs[] = { {
.init = sha384_base_init,
.update = sha512_ce_update,
- .final = sha512_ce_final,
.finup = sha512_ce_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.digestsize = SHA384_DIGEST_SIZE,
.base.cra_name = "sha384",
.base.cra_driver_name = "sha384-ce",
.base.cra_priority = 200,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.base.cra_blocksize = SHA512_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
}, {
.init = sha512_base_init,
.update = sha512_ce_update,
- .final = sha512_ce_final,
.finup = sha512_ce_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.digestsize = SHA512_DIGEST_SIZE,
.base.cra_name = "sha512",
.base.cra_driver_name = "sha512-ce",
.base.cra_priority = 200,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.base.cra_blocksize = SHA512_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
} };
diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c
index 62f129dea83d..15aa9d8b7b2c 100644
--- a/arch/arm64/crypto/sha512-glue.c
+++ b/arch/arm64/crypto/sha512-glue.c
@@ -6,11 +6,10 @@
*/
#include <crypto/internal/hash.h>
-#include <linux/types.h>
-#include <linux/string.h>
#include <crypto/sha2.h>
#include <crypto/sha512_base.h>
-#include <asm/neon.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash for arm64");
MODULE_AUTHOR("Andy Polyakov <appro@openssl.org>");
@@ -19,59 +18,53 @@ MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("sha384");
MODULE_ALIAS_CRYPTO("sha512");
-asmlinkage void sha512_block_data_order(u64 *digest, const void *data,
- unsigned int num_blks);
-EXPORT_SYMBOL(sha512_block_data_order);
+asmlinkage void sha512_blocks_arch(u64 *digest, const void *data,
+ unsigned int num_blks);
static void sha512_arm64_transform(struct sha512_state *sst, u8 const *src,
int blocks)
{
- sha512_block_data_order(sst->state, src, blocks);
+ sha512_blocks_arch(sst->state, src, blocks);
}
static int sha512_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return sha512_base_do_update(desc, data, len, sha512_arm64_transform);
+ return sha512_base_do_update_blocks(desc, data, len,
+ sha512_arm64_transform);
}
static int sha512_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (len)
- sha512_base_do_update(desc, data, len, sha512_arm64_transform);
- sha512_base_do_finalize(desc, sha512_arm64_transform);
-
+ sha512_base_do_finup(desc, data, len, sha512_arm64_transform);
return sha512_base_finish(desc, out);
}
-static int sha512_final(struct shash_desc *desc, u8 *out)
-{
- return sha512_finup(desc, NULL, 0, out);
-}
-
static struct shash_alg algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = sha512_update,
- .final = sha512_final,
.finup = sha512_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.base.cra_name = "sha512",
.base.cra_driver_name = "sha512-arm64",
.base.cra_priority = 150,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.base.cra_blocksize = SHA512_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = sha512_update,
- .final = sha512_final,
.finup = sha512_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.base.cra_name = "sha384",
.base.cra_driver_name = "sha384-arm64",
.base.cra_priority = 150,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.base.cra_blocksize = SHA384_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
} };
diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
index 1a71788c4cda..eac6f5fa0abe 100644
--- a/arch/arm64/crypto/sm3-ce-glue.c
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -6,14 +6,11 @@
*/
#include <asm/neon.h>
-#include <asm/simd.h>
-#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
#include <linux/cpufeature.h>
-#include <linux/crypto.h>
+#include <linux/kernel.h>
#include <linux/module.h>
MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions");
@@ -26,50 +23,20 @@ asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- if (!crypto_simd_usable()) {
- sm3_update(shash_desc_ctx(desc), data, len);
- return 0;
- }
+ int remain;
kernel_neon_begin();
- sm3_base_do_update(desc, data, len, sm3_ce_transform);
+ remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform);
kernel_neon_end();
-
- return 0;
-}
-
-static int sm3_ce_final(struct shash_desc *desc, u8 *out)
-{
- if (!crypto_simd_usable()) {
- sm3_final(shash_desc_ctx(desc), out);
- return 0;
- }
-
- kernel_neon_begin();
- sm3_base_do_finalize(desc, sm3_ce_transform);
- kernel_neon_end();
-
- return sm3_base_finish(desc, out);
+ return remain;
}
static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!crypto_simd_usable()) {
- struct sm3_state *sctx = shash_desc_ctx(desc);
-
- if (len)
- sm3_update(sctx, data, len);
- sm3_final(sctx, out);
- return 0;
- }
-
kernel_neon_begin();
- if (len)
- sm3_base_do_update(desc, data, len, sm3_ce_transform);
- sm3_base_do_finalize(desc, sm3_ce_transform);
+ sm3_base_do_finup(desc, data, len, sm3_ce_transform);
kernel_neon_end();
-
return sm3_base_finish(desc, out);
}
@@ -77,11 +44,12 @@ static struct shash_alg sm3_alg = {
.digestsize = SM3_DIGEST_SIZE,
.init = sm3_base_init,
.update = sm3_ce_update,
- .final = sm3_ce_final,
.finup = sm3_ce_finup,
- .descsize = sizeof(struct sm3_state),
+ .descsize = SM3_STATE_SIZE,
.base.cra_name = "sm3",
.base.cra_driver_name = "sm3-ce",
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.base.cra_blocksize = SM3_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.base.cra_priority = 400,
diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c
index 8dd71ce79b69..6c4611a503a3 100644
--- a/arch/arm64/crypto/sm3-neon-glue.c
+++ b/arch/arm64/crypto/sm3-neon-glue.c
@@ -6,14 +6,11 @@
*/
#include <asm/neon.h>
-#include <asm/simd.h>
-#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
#include <linux/cpufeature.h>
-#include <linux/crypto.h>
+#include <linux/kernel.h>
#include <linux/module.h>
@@ -23,50 +20,20 @@ asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
static int sm3_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- if (!crypto_simd_usable()) {
- sm3_update(shash_desc_ctx(desc), data, len);
- return 0;
- }
+ int remain;
kernel_neon_begin();
- sm3_base_do_update(desc, data, len, sm3_neon_transform);
+ remain = sm3_base_do_update_blocks(desc, data, len, sm3_neon_transform);
kernel_neon_end();
-
- return 0;
-}
-
-static int sm3_neon_final(struct shash_desc *desc, u8 *out)
-{
- if (!crypto_simd_usable()) {
- sm3_final(shash_desc_ctx(desc), out);
- return 0;
- }
-
- kernel_neon_begin();
- sm3_base_do_finalize(desc, sm3_neon_transform);
- kernel_neon_end();
-
- return sm3_base_finish(desc, out);
+ return remain;
}
static int sm3_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!crypto_simd_usable()) {
- struct sm3_state *sctx = shash_desc_ctx(desc);
-
- if (len)
- sm3_update(sctx, data, len);
- sm3_final(sctx, out);
- return 0;
- }
-
kernel_neon_begin();
- if (len)
- sm3_base_do_update(desc, data, len, sm3_neon_transform);
- sm3_base_do_finalize(desc, sm3_neon_transform);
+ sm3_base_do_finup(desc, data, len, sm3_neon_transform);
kernel_neon_end();
-
return sm3_base_finish(desc, out);
}
@@ -74,11 +41,12 @@ static struct shash_alg sm3_alg = {
.digestsize = SM3_DIGEST_SIZE,
.init = sm3_base_init,
.update = sm3_neon_update,
- .final = sm3_neon_final,
.finup = sm3_neon_finup,
- .descsize = sizeof(struct sm3_state),
+ .descsize = SM3_STATE_SIZE,
.base.cra_name = "sm3",
.base.cra_driver_name = "sm3-neon",
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.base.cra_blocksize = SM3_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.base.cra_priority = 200,
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 43741bed874e..7a60e7b559dc 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -8,19 +8,18 @@
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
-#include <linux/module.h>
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-#include <linux/cpufeature.h>
#include <asm/neon.h>
-#include <asm/simd.h>
#include <crypto/b128ops.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
-#include <crypto/xts.h>
#include <crypto/sm4.h>
+#include <crypto/utils.h>
+#include <crypto/xts.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
#define BYTES2BLKS(nbytes) ((nbytes) >> 4)
@@ -64,7 +63,6 @@ struct sm4_mac_tfm_ctx {
};
struct sm4_mac_desc_ctx {
- unsigned int len;
u8 digest[SM4_BLOCK_SIZE];
};
@@ -591,8 +589,6 @@ static int sm4_mac_init(struct shash_desc *desc)
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
memset(ctx->digest, 0, SM4_BLOCK_SIZE);
- ctx->len = 0;
-
return 0;
}
@@ -601,87 +597,50 @@ static int sm4_mac_update(struct shash_desc *desc, const u8 *p,
{
struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
- unsigned int l, nblocks;
-
- if (len == 0)
- return 0;
-
- if (ctx->len || ctx->len + len < SM4_BLOCK_SIZE) {
- l = min(len, SM4_BLOCK_SIZE - ctx->len);
-
- crypto_xor(ctx->digest + ctx->len, p, l);
- ctx->len += l;
- len -= l;
- p += l;
- }
-
- if (len && (ctx->len % SM4_BLOCK_SIZE) == 0) {
- kernel_neon_begin();
-
- if (len < SM4_BLOCK_SIZE && ctx->len == SM4_BLOCK_SIZE) {
- sm4_ce_crypt_block(tctx->key.rkey_enc,
- ctx->digest, ctx->digest);
- ctx->len = 0;
- } else {
- nblocks = len / SM4_BLOCK_SIZE;
- len %= SM4_BLOCK_SIZE;
+ unsigned int nblocks = len / SM4_BLOCK_SIZE;
- sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
- nblocks, (ctx->len == SM4_BLOCK_SIZE),
- (len != 0));
-
- p += nblocks * SM4_BLOCK_SIZE;
-
- if (len == 0)
- ctx->len = SM4_BLOCK_SIZE;
- }
-
- kernel_neon_end();
-
- if (len) {
- crypto_xor(ctx->digest, p, len);
- ctx->len = len;
- }
- }
-
- return 0;
+ len %= SM4_BLOCK_SIZE;
+ kernel_neon_begin();
+ sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
+ nblocks, false, true);
+ kernel_neon_end();
+ return len;
}
-static int sm4_cmac_final(struct shash_desc *desc, u8 *out)
+static int sm4_cmac_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
const u8 *consts = tctx->consts;
- if (ctx->len != SM4_BLOCK_SIZE) {
- ctx->digest[ctx->len] ^= 0x80;
+ crypto_xor(ctx->digest, src, len);
+ if (len != SM4_BLOCK_SIZE) {
+ ctx->digest[len] ^= 0x80;
consts += SM4_BLOCK_SIZE;
}
-
kernel_neon_begin();
sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
false, true);
kernel_neon_end();
-
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
-
return 0;
}
-static int sm4_cbcmac_final(struct shash_desc *desc, u8 *out)
+static int sm4_cbcmac_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
- if (ctx->len) {
+ if (len) {
+ crypto_xor(ctx->digest, src, len);
kernel_neon_begin();
sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
ctx->digest);
kernel_neon_end();
}
-
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
-
return 0;
}
@@ -691,6 +650,8 @@ static struct shash_alg sm4_mac_algs[] = {
.cra_name = "cmac(sm4)",
.cra_driver_name = "cmac-sm4-ce",
.cra_priority = 400,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINAL_NONZERO,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx)
+ SM4_BLOCK_SIZE * 2,
@@ -699,7 +660,7 @@ static struct shash_alg sm4_mac_algs[] = {
.digestsize = SM4_BLOCK_SIZE,
.init = sm4_mac_init,
.update = sm4_mac_update,
- .final = sm4_cmac_final,
+ .finup = sm4_cmac_finup,
.setkey = sm4_cmac_setkey,
.descsize = sizeof(struct sm4_mac_desc_ctx),
}, {
@@ -707,6 +668,8 @@ static struct shash_alg sm4_mac_algs[] = {
.cra_name = "xcbc(sm4)",
.cra_driver_name = "xcbc-sm4-ce",
.cra_priority = 400,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINAL_NONZERO,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx)
+ SM4_BLOCK_SIZE * 2,
@@ -715,7 +678,7 @@ static struct shash_alg sm4_mac_algs[] = {
.digestsize = SM4_BLOCK_SIZE,
.init = sm4_mac_init,
.update = sm4_mac_update,
- .final = sm4_cmac_final,
+ .finup = sm4_cmac_finup,
.setkey = sm4_xcbc_setkey,
.descsize = sizeof(struct sm4_mac_desc_ctx),
}, {
@@ -723,14 +686,15 @@ static struct shash_alg sm4_mac_algs[] = {
.cra_name = "cbcmac(sm4)",
.cra_driver_name = "cbcmac-sm4-ce",
.cra_priority = 400,
- .cra_blocksize = 1,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
+ .cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx),
.cra_module = THIS_MODULE,
},
.digestsize = SM4_BLOCK_SIZE,
.init = sm4_mac_init,
.update = sm4_mac_update,
- .final = sm4_cbcmac_final,
+ .finup = sm4_cbcmac_finup,
.setkey = sm4_cbcmac_setkey,
.descsize = sizeof(struct sm4_mac_desc_ctx),
}
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index d1cc0571798b..dffff6763812 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -81,6 +81,7 @@
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_X1C 0xD4C
#define ARM_CPU_PART_CORTEX_A520 0xD80
#define ARM_CPU_PART_CORTEX_A710 0xD47
#define ARM_CPU_PART_CORTEX_A715 0xD4D
@@ -168,6 +169,7 @@
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 39577f1d079a..18c7811774d3 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -706,6 +706,7 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
}
#endif
u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
+u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type);
u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
enum aarch64_insn_system_register sysreg);
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index f1524cdeacf1..8fef12626090 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -97,6 +97,9 @@ enum mitigation_state arm64_get_meltdown_state(void);
enum mitigation_state arm64_get_spectre_bhb_state(void);
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
+extern bool __nospectre_bhb;
+u8 get_spectre_bhb_loop_value(void);
+bool is_spectre_bhb_fw_mitigated(void);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr);
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index b607f6dfc5e6..edf1783ffc81 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -891,6 +891,7 @@ static u8 spectre_bhb_loop_affected(void)
MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
@@ -999,6 +1000,11 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
return true;
}
+u8 get_spectre_bhb_loop_value(void)
+{
+ return max_bhb_k;
+}
+
static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
{
const char *v = arm64_get_bp_hardening_vector(slot);
@@ -1016,7 +1022,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
isb();
}
-static bool __read_mostly __nospectre_bhb;
+bool __read_mostly __nospectre_bhb;
static int __init parse_spectre_bhb_param(char *str)
{
__nospectre_bhb = true;
@@ -1094,6 +1100,11 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
update_mitigation_state(&spectre_bhb_state, state);
}
+bool is_spectre_bhb_fw_mitigated(void)
+{
+ return test_bit(BHB_FW, &system_bhb_mitigations);
+}
+
/* Patched to NOP when enabled */
void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt,
__le32 *origptr,
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 4d49dff721a8..027bfa9689c6 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+
+obj-y += crypto/
+
lib-y := clear_user.o delay.o copy_from_user.o \
copy_to_user.o copy_page.o \
clear_page.o csum.o insn.o memchr.o memcpy.o \
@@ -14,10 +17,10 @@ endif
lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
obj-$(CONFIG_CRC32_ARCH) += crc32-arm64.o
-crc32-arm64-y := crc32.o crc32-glue.o
+crc32-arm64-y := crc32.o crc32-core.o
obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-arm64.o
-crc-t10dif-arm64-y := crc-t10dif-glue.o crc-t10dif-core.o
+crc-t10dif-arm64-y := crc-t10dif.o crc-t10dif-core.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/arm64/lib/crc-t10dif-glue.c b/arch/arm64/lib/crc-t10dif.c
index bacd18f23168..c2ffe4fdb59d 100644
--- a/arch/arm64/lib/crc-t10dif-glue.c
+++ b/arch/arm64/lib/crc-t10dif.c
@@ -17,8 +17,8 @@
#include <asm/neon.h>
#include <asm/simd.h>
-static DEFINE_STATIC_KEY_FALSE(have_asimd);
-static DEFINE_STATIC_KEY_FALSE(have_pmull);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
@@ -61,7 +61,7 @@ static int __init crc_t10dif_arm64_init(void)
}
return 0;
}
-arch_initcall(crc_t10dif_arm64_init);
+subsys_initcall(crc_t10dif_arm64_init);
static void __exit crc_t10dif_arm64_exit(void)
{
diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32-core.S
index 68825317460f..68825317460f 100644
--- a/arch/arm64/lib/crc32.S
+++ b/arch/arm64/lib/crc32-core.S
diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32.c
index ed3acd71178f..ed3acd71178f 100644
--- a/arch/arm64/lib/crc32-glue.c
+++ b/arch/arm64/lib/crc32.c
diff --git a/arch/arm64/lib/crypto/.gitignore b/arch/arm64/lib/crypto/.gitignore
new file mode 100644
index 000000000000..12d74d8b03d0
--- /dev/null
+++ b/arch/arm64/lib/crypto/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+poly1305-core.S
+sha256-core.S
diff --git a/arch/arm64/lib/crypto/Kconfig b/arch/arm64/lib/crypto/Kconfig
new file mode 100644
index 000000000000..129a7685cb4c
--- /dev/null
+++ b/arch/arm64/lib/crypto/Kconfig
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_CHACHA20_NEON
+ tristate
+ depends on KERNEL_MODE_NEON
+ default CRYPTO_LIB_CHACHA
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_POLY1305_NEON
+ tristate
+ depends on KERNEL_MODE_NEON
+ default CRYPTO_LIB_POLY1305
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
+
+config CRYPTO_SHA256_ARM64
+ tristate
+ default CRYPTO_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
diff --git a/arch/arm64/lib/crypto/Makefile b/arch/arm64/lib/crypto/Makefile
new file mode 100644
index 000000000000..946c09903711
--- /dev/null
+++ b/arch/arm64/lib/crypto/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+
+obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o
+poly1305-neon-y := poly1305-core.o poly1305-glue.o
+AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_block_init_arch
+AFLAGS_poly1305-core.o += -Dpoly1305_emit=poly1305_emit_arch
+
+obj-$(CONFIG_CRYPTO_SHA256_ARM64) += sha256-arm64.o
+sha256-arm64-y := sha256.o sha256-core.o
+sha256-arm64-$(CONFIG_KERNEL_MODE_NEON) += sha256-ce.o
+
+quiet_cmd_perlasm = PERLASM $@
+ cmd_perlasm = $(PERL) $(<) void $(@)
+
+$(obj)/%-core.S: $(src)/%-armv8.pl
+ $(call cmd,perlasm)
+
+$(obj)/sha256-core.S: $(src)/sha2-armv8.pl
+ $(call cmd,perlasm)
+
+clean-files += poly1305-core.S sha256-core.S
diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/lib/crypto/chacha-neon-core.S
index b70ac76f2610..80079586ecc7 100644
--- a/arch/arm64/crypto/chacha-neon-core.S
+++ b/arch/arm64/lib/crypto/chacha-neon-core.S
@@ -1,5 +1,5 @@
/*
- * ChaCha/XChaCha NEON helper functions
+ * ChaCha/HChaCha NEON helper functions
*
* Copyright (C) 2016-2018 Linaro, Ltd. <ard.biesheuvel@linaro.org>
*
diff --git a/arch/arm64/lib/crypto/chacha-neon-glue.c b/arch/arm64/lib/crypto/chacha-neon-glue.c
new file mode 100644
index 000000000000..d0188f974ca5
--- /dev/null
+++ b/arch/arm64/lib/crypto/chacha-neon-glue.c
@@ -0,0 +1,119 @@
+/*
+ * ChaCha and HChaCha functions (ARM64 optimized)
+ *
+ * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/chacha.h>
+#include <crypto/internal/simd.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
+ u8 *dst, const u8 *src, int nrounds);
+asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state,
+ u8 *dst, const u8 *src,
+ int nrounds, int bytes);
+asmlinkage void hchacha_block_neon(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src,
+ int bytes, int nrounds)
+{
+ while (bytes > 0) {
+ int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
+
+ if (l <= CHACHA_BLOCK_SIZE) {
+ u8 buf[CHACHA_BLOCK_SIZE];
+
+ memcpy(buf, src, l);
+ chacha_block_xor_neon(state, buf, buf, nrounds);
+ memcpy(dst, buf, l);
+ state->x[12] += 1;
+ break;
+ }
+ chacha_4block_xor_neon(state, dst, src, nrounds, l);
+ bytes -= l;
+ src += l;
+ dst += l;
+ state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
+ }
+}
+
+void hchacha_block_arch(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds)
+{
+ if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
+ hchacha_block_generic(state, out, nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, out, nrounds);
+ kernel_neon_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
+ !crypto_simd_usable())
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, todo, nrounds);
+ kernel_neon_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+bool chacha_is_arch_optimized(void)
+{
+ return static_key_enabled(&have_neon);
+}
+EXPORT_SYMBOL(chacha_is_arch_optimized);
+
+static int __init chacha_simd_mod_init(void)
+{
+ if (cpu_have_named_feature(ASIMD))
+ static_branch_enable(&have_neon);
+ return 0;
+}
+subsys_initcall(chacha_simd_mod_init);
+
+static void __exit chacha_simd_mod_exit(void)
+{
+}
+module_exit(chacha_simd_mod_exit);
+
+MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM64 optimized)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/lib/crypto/poly1305-armv8.pl
index 22c9069c0650..22c9069c0650 100644
--- a/arch/arm64/crypto/poly1305-armv8.pl
+++ b/arch/arm64/lib/crypto/poly1305-armv8.pl
diff --git a/arch/arm64/lib/crypto/poly1305-glue.c b/arch/arm64/lib/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..6a661cf04821
--- /dev/null
+++ b/arch/arm64/lib/crypto/poly1305-glue.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <crypto/internal/poly1305.h>
+#include <linux/cpufeature.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/unaligned.h>
+
+asmlinkage void poly1305_block_init_arch(
+ struct poly1305_block_state *state,
+ const u8 raw_key[POLY1305_BLOCK_SIZE]);
+EXPORT_SYMBOL_GPL(poly1305_block_init_arch);
+asmlinkage void poly1305_blocks(struct poly1305_block_state *state,
+ const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state,
+ const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit_arch(const struct poly1305_state *state,
+ u8 digest[POLY1305_DIGEST_SIZE],
+ const u32 nonce[4]);
+EXPORT_SYMBOL_GPL(poly1305_emit_arch);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src,
+ unsigned int len, u32 padbit)
+{
+ len = round_down(len, POLY1305_BLOCK_SIZE);
+ if (static_branch_likely(&have_neon)) {
+ do {
+ unsigned int todo = min_t(unsigned int, len, SZ_4K);
+
+ kernel_neon_begin();
+ poly1305_blocks_neon(state, src, todo, 1);
+ kernel_neon_end();
+
+ len -= todo;
+ src += todo;
+ } while (len);
+ } else
+ poly1305_blocks(state, src, len, 1);
+}
+EXPORT_SYMBOL_GPL(poly1305_blocks_arch);
+
+bool poly1305_is_arch_optimized(void)
+{
+ /* We always can use at least the ARM64 scalar implementation. */
+ return true;
+}
+EXPORT_SYMBOL(poly1305_is_arch_optimized);
+
+static int __init neon_poly1305_mod_init(void)
+{
+ if (cpu_have_named_feature(ASIMD))
+ static_branch_enable(&have_neon);
+ return 0;
+}
+subsys_initcall(neon_poly1305_mod_init);
+
+static void __exit neon_poly1305_mod_exit(void)
+{
+}
+module_exit(neon_poly1305_mod_exit);
+
+MODULE_DESCRIPTION("Poly1305 authenticator (ARM64 optimized)");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/lib/crypto/sha2-armv8.pl
index 35ec9ae99fe1..4aebd20c498b 100644
--- a/arch/arm64/crypto/sha512-armv8.pl
+++ b/arch/arm64/lib/crypto/sha2-armv8.pl
@@ -95,7 +95,7 @@ if ($output =~ /512/) {
$reg_t="w";
}
-$func="sha${BITS}_block_data_order";
+$func="sha${BITS}_blocks_arch";
($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30));
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/lib/crypto/sha256-ce.S
index fce84d88ddb2..f3e21c6d87d2 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/lib/crypto/sha256-ce.S
@@ -71,8 +71,8 @@
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
/*
- * int __sha256_ce_transform(struct sha256_ce_state *sst, u8 const *src,
- * int blocks)
+ * size_t __sha256_ce_transform(u32 state[SHA256_STATE_WORDS],
+ * const u8 *data, size_t nblocks);
*/
.text
SYM_FUNC_START(__sha256_ce_transform)
@@ -86,20 +86,16 @@ SYM_FUNC_START(__sha256_ce_transform)
/* load state */
ld1 {dgav.4s, dgbv.4s}, [x0]
- /* load sha256_ce_state::finalize */
- ldr_l w4, sha256_ce_offsetof_finalize, x4
- ldr w4, [x0, x4]
-
/* load input */
0: ld1 {v16.4s-v19.4s}, [x1], #64
- sub w2, w2, #1
+ sub x2, x2, #1
CPU_LE( rev32 v16.16b, v16.16b )
CPU_LE( rev32 v17.16b, v17.16b )
CPU_LE( rev32 v18.16b, v18.16b )
CPU_LE( rev32 v19.16b, v19.16b )
-1: add t0.4s, v16.4s, v0.4s
+ add t0.4s, v16.4s, v0.4s
mov dg0v.16b, dgav.16b
mov dg1v.16b, dgbv.16b
@@ -127,31 +123,14 @@ CPU_LE( rev32 v19.16b, v19.16b )
add dgav.4s, dgav.4s, dg0v.4s
add dgbv.4s, dgbv.4s, dg1v.4s
- /* handled all input blocks? */
- cbz w2, 2f
- cond_yield 3f, x5, x6
- b 0b
+ /* return early if voluntary preemption is needed */
+ cond_yield 1f, x5, x6
- /*
- * Final block: add padding and total bit count.
- * Skip if the input size was not a round multiple of the block size,
- * the padding is handled by the C code in that case.
- */
-2: cbz x4, 3f
- ldr_l w4, sha256_ce_offsetof_count, x4
- ldr x4, [x0, x4]
- movi v17.2d, #0
- mov x8, #0x80000000
- movi v18.2d, #0
- ror x7, x4, #29 // ror(lsl(x4, 3), 32)
- fmov d16, x8
- mov x4, #0
- mov v19.d[0], xzr
- mov v19.d[1], x7
- b 1b
+ /* handled all input blocks? */
+ cbnz x2, 0b
/* store new state */
-3: st1 {dgav.4s, dgbv.4s}, [x0]
- mov w0, w2
+1: st1 {dgav.4s, dgbv.4s}, [x0]
+ mov x0, x2
ret
SYM_FUNC_END(__sha256_ce_transform)
diff --git a/arch/arm64/lib/crypto/sha256.c b/arch/arm64/lib/crypto/sha256.c
new file mode 100644
index 000000000000..bcf7a3adc0c4
--- /dev/null
+++ b/arch/arm64/lib/crypto/sha256.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256 optimized for ARM64
+ *
+ * Copyright 2025 Google LLC
+ */
+#include <asm/neon.h>
+#include <crypto/internal/sha2.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+asmlinkage void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
+asmlinkage void sha256_block_neon(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+asmlinkage size_t __sha256_ce_transform(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce);
+
+void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ static_branch_likely(&have_neon)) {
+ if (static_branch_likely(&have_ce)) {
+ do {
+ size_t rem;
+
+ kernel_neon_begin();
+ rem = __sha256_ce_transform(state,
+ data, nblocks);
+ kernel_neon_end();
+ data += (nblocks - rem) * SHA256_BLOCK_SIZE;
+ nblocks = rem;
+ } while (nblocks);
+ } else {
+ kernel_neon_begin();
+ sha256_block_neon(state, data, nblocks);
+ kernel_neon_end();
+ }
+ } else {
+ sha256_blocks_arch(state, data, nblocks);
+ }
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_simd);
+
+bool sha256_is_arch_optimized(void)
+{
+ /* We always can use at least the ARM64 scalar implementation. */
+ return true;
+}
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
+
+static int __init sha256_arm64_mod_init(void)
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ cpu_have_named_feature(ASIMD)) {
+ static_branch_enable(&have_neon);
+ if (cpu_have_named_feature(SHA2))
+ static_branch_enable(&have_ce);
+ }
+ return 0;
+}
+subsys_initcall(sha256_arm64_mod_init);
+
+static void __exit sha256_arm64_mod_exit(void)
+{
+}
+module_exit(sha256_arm64_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-256 optimized for ARM64");
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index 9bef696e2230..4e298baddc2e 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -5,6 +5,7 @@
*
* Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
*/
+#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/printk.h>
@@ -1500,43 +1501,41 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
}
-u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
+static u32 __get_barrier_crm_val(enum aarch64_insn_mb_type type)
{
- u32 opt;
- u32 insn;
-
switch (type) {
case AARCH64_INSN_MB_SY:
- opt = 0xf;
- break;
+ return 0xf;
case AARCH64_INSN_MB_ST:
- opt = 0xe;
- break;
+ return 0xe;
case AARCH64_INSN_MB_LD:
- opt = 0xd;
- break;
+ return 0xd;
case AARCH64_INSN_MB_ISH:
- opt = 0xb;
- break;
+ return 0xb;
case AARCH64_INSN_MB_ISHST:
- opt = 0xa;
- break;
+ return 0xa;
case AARCH64_INSN_MB_ISHLD:
- opt = 0x9;
- break;
+ return 0x9;
case AARCH64_INSN_MB_NSH:
- opt = 0x7;
- break;
+ return 0x7;
case AARCH64_INSN_MB_NSHST:
- opt = 0x6;
- break;
+ return 0x6;
case AARCH64_INSN_MB_NSHLD:
- opt = 0x5;
- break;
+ return 0x5;
default:
- pr_err("%s: unknown dmb type %d\n", __func__, type);
+ pr_err("%s: unknown barrier type %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
+}
+
+u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
+{
+ u32 opt;
+ u32 insn;
+
+ opt = __get_barrier_crm_val(type);
+ if (opt == AARCH64_BREAK_FAULT)
+ return AARCH64_BREAK_FAULT;
insn = aarch64_insn_get_dmb_value();
insn &= ~GENMASK(11, 8);
@@ -1545,6 +1544,21 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
return insn;
}
+u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type)
+{
+ u32 opt, insn;
+
+ opt = __get_barrier_crm_val(type);
+ if (opt == AARCH64_BREAK_FAULT)
+ return AARCH64_BREAK_FAULT;
+
+ insn = aarch64_insn_get_dsb_base_value();
+ insn &= ~GENMASK(11, 8);
+ insn |= (opt << 8);
+
+ return insn;
+}
+
u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
enum aarch64_insn_system_register sysreg)
{
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 70d7c89d3ac9..634d78422adb 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) "bpf_jit: " fmt
+#include <linux/arm-smccc.h>
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/filter.h>
@@ -17,6 +18,7 @@
#include <asm/asm-extable.h>
#include <asm/byteorder.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
#include <asm/insn.h>
#include <asm/text-patching.h>
@@ -939,7 +941,51 @@ static void build_plt(struct jit_ctx *ctx)
plt->target = (u64)&dummy_tramp;
}
-static void build_epilogue(struct jit_ctx *ctx)
+/* Clobbers BPF registers 1-4, aka x0-x3 */
+static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx)
+{
+ const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */
+ u8 k = get_spectre_bhb_loop_value();
+
+ if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) ||
+ cpu_mitigations_off() || __nospectre_bhb ||
+ arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE)
+ return;
+
+ if (capable(CAP_SYS_ADMIN))
+ return;
+
+ if (supports_clearbhb(SCOPE_SYSTEM)) {
+ emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx);
+ return;
+ }
+
+ if (k) {
+ emit_a64_mov_i64(r1, k, ctx);
+ emit(A64_B(1), ctx);
+ emit(A64_SUBS_I(true, r1, r1, 1), ctx);
+ emit(A64_B_(A64_COND_NE, -2), ctx);
+ emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx);
+ emit(aarch64_insn_get_isb_value(), ctx);
+ }
+
+ if (is_spectre_bhb_fw_mitigated()) {
+ emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR,
+ ARM_SMCCC_ARCH_WORKAROUND_3), ctx);
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
+ emit(aarch64_insn_get_hvc_value(), ctx);
+ break;
+ case SMCCC_CONDUIT_SMC:
+ emit(aarch64_insn_get_smc_value(), ctx);
+ break;
+ default:
+ pr_err_once("Firmware mitigation enabled with unknown conduit\n");
+ }
+ }
+}
+
+static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
{
const u8 r0 = bpf2a64[BPF_REG_0];
const u8 ptr = bpf2a64[TCCNT_PTR];
@@ -952,10 +998,13 @@ static void build_epilogue(struct jit_ctx *ctx)
emit(A64_POP(A64_ZR, ptr, A64_SP), ctx);
+ if (was_classic)
+ build_bhb_mitigation(ctx);
+
/* Restore FP/LR registers */
emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
- /* Set return value */
+ /* Move the return value from bpf:r0 (aka x7) to x0 */
emit(A64_MOV(1, A64_R(0), r0), ctx);
/* Authenticate lr */
@@ -1898,7 +1947,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
ctx.epilogue_offset = ctx.idx;
- build_epilogue(&ctx);
+ build_epilogue(&ctx, was_classic);
build_plt(&ctx);
extable_align = __alignof__(struct exception_table_entry);
@@ -1961,7 +2010,7 @@ skip_init_ctx:
goto out_free_hdr;
}
- build_epilogue(&ctx);
+ build_epilogue(&ctx, was_classic);
build_plt(&ctx);
/* Extra pass to validate JITed code. */
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 9d01361696a1..ae551b857137 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -83,7 +83,26 @@ HYPERCALL3(vcpu_op);
HYPERCALL1(platform_op_raw);
HYPERCALL2(multicall);
HYPERCALL2(vm_assist);
-HYPERCALL3(dm_op);
+
+SYM_FUNC_START(HYPERVISOR_dm_op)
+ mov x16, #__HYPERVISOR_dm_op; \
+ /*
+ * dm_op hypercalls are issued by the userspace. The kernel needs to
+ * enable access to TTBR0_EL1 as the hypervisor would issue stage 1
+ * translations to user memory via AT instructions. Since AT
+ * instructions are not affected by the PAN bit (ARMv8.1), we only
+ * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
+ * is enabled (it implies that hardware UAO and PAN disabled).
+ */
+ uaccess_ttbr0_enable x6, x7, x8
+ hvc XEN_IMM
+
+ /*
+ * Disable userspace access from kernel once the hyp call completed.
+ */
+ uaccess_ttbr0_disable x6, x7
+ ret
+SYM_FUNC_END(HYPERVISOR_dm_op);
SYM_FUNC_START(privcmd_call)
mov x16, x0
diff --git a/arch/csky/kernel/perf_event.c b/arch/csky/kernel/perf_event.c
index e5f18420ce64..e0a36acd265b 100644
--- a/arch/csky/kernel/perf_event.c
+++ b/arch/csky/kernel/perf_event.c
@@ -1139,8 +1139,7 @@ static irqreturn_t csky_pmu_handle_irq(int irq_num, void *dev)
perf_sample_data_init(&data, 0, hwc->last_period);
csky_pmu_event_set_period(event);
- if (perf_event_overflow(event, &data, regs))
- csky_pmu_stop_event(event);
+ perf_event_overflow(event, &data, regs);
}
csky_pmu_enable(&csky_pmu.pmu);
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 90f21dfe22b1..0d59af6007b7 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -1026,7 +1026,7 @@ CONFIG_SECURITY_APPARMOR=y
CONFIG_SECURITY_YAMA=y
CONFIG_DEFAULT_SECURITY_DAC=y
CONFIG_CRYPTO_USER=m
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_SELFTESTS=y
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
index 51f224bcfc65..704066b4f736 100644
--- a/arch/loongarch/include/asm/asm-prototypes.h
+++ b/arch/loongarch/include/asm/asm-prototypes.h
@@ -12,3 +12,11 @@ __int128_t __ashlti3(__int128_t a, int b);
__int128_t __ashrti3(__int128_t a, int b);
__int128_t __lshrti3(__int128_t a, int b);
#endif
+
+asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev,
+ struct pt_regs *regs);
+
+asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
+ struct pt_regs *regs,
+ int (*fn)(void *),
+ void *fn_arg);
diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
index a5b63c84f854..e5d21e836d99 100644
--- a/arch/loongarch/include/asm/ptrace.h
+++ b/arch/loongarch/include/asm/ptrace.h
@@ -55,7 +55,7 @@ static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long v
/* Query offset/name of register from its name/offset */
extern int regs_query_register_offset(const char *name);
-#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last))
+#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last) - sizeof(unsigned long))
/**
* regs_get_register() - get register value from its offset
diff --git a/arch/loongarch/include/asm/uprobes.h b/arch/loongarch/include/asm/uprobes.h
index 99a0d198927f..025fc3f0a102 100644
--- a/arch/loongarch/include/asm/uprobes.h
+++ b/arch/loongarch/include/asm/uprobes.h
@@ -15,7 +15,6 @@ typedef u32 uprobe_opcode_t;
#define UPROBE_XOLBP_INSN __emit_break(BRK_UPROBE_XOLBP)
struct arch_uprobe {
- unsigned long resume_era;
u32 insn[2];
u32 ixol[2];
bool simulate;
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index 48e7e34e355e..2abc29e57381 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -77,24 +77,22 @@ SYM_CODE_START(handle_syscall)
SYM_CODE_END(handle_syscall)
_ASM_NOKPROBE(handle_syscall)
-SYM_CODE_START(ret_from_fork)
+SYM_CODE_START(ret_from_fork_asm)
UNWIND_HINT_REGS
- bl schedule_tail # a0 = struct task_struct *prev
- move a0, sp
- bl syscall_exit_to_user_mode
+ move a1, sp
+ bl ret_from_fork
RESTORE_STATIC
RESTORE_SOME
RESTORE_SP_AND_RET
-SYM_CODE_END(ret_from_fork)
+SYM_CODE_END(ret_from_fork_asm)
-SYM_CODE_START(ret_from_kernel_thread)
+SYM_CODE_START(ret_from_kernel_thread_asm)
UNWIND_HINT_REGS
- bl schedule_tail # a0 = struct task_struct *prev
- move a0, s1
- jirl ra, s0, 0
- move a0, sp
- bl syscall_exit_to_user_mode
+ move a1, sp
+ move a2, s0
+ move a3, s1
+ bl ret_from_kernel_thread
RESTORE_STATIC
RESTORE_SOME
RESTORE_SP_AND_RET
-SYM_CODE_END(ret_from_kernel_thread)
+SYM_CODE_END(ret_from_kernel_thread_asm)
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 4f0912141781..733a7665e434 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -16,6 +16,7 @@
#include <asm/stackframe.h>
#include <asm/thread_info.h>
+ .section .cpuidle.text, "ax"
.align 5
SYM_FUNC_START(__arch_cpu_idle)
/* start of idle interrupt region */
@@ -31,14 +32,16 @@ SYM_FUNC_START(__arch_cpu_idle)
*/
idle 0
/* end of idle interrupt region */
-1: jr ra
+idle_exit:
+ jr ra
SYM_FUNC_END(__arch_cpu_idle)
+ .previous
SYM_CODE_START(handle_vint)
UNWIND_HINT_UNDEFINED
BACKUP_T0T1
SAVE_ALL
- la_abs t1, 1b
+ la_abs t1, idle_exit
LONG_L t0, sp, PT_ERA
/* 3 instructions idle interrupt region */
ori t0, t0, 0b1100
diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
index ec5b28e570c9..4c476904227f 100644
--- a/arch/loongarch/kernel/kfpu.c
+++ b/arch/loongarch/kernel/kfpu.c
@@ -18,11 +18,28 @@ static unsigned int euen_mask = CSR_EUEN_FPEN;
static DEFINE_PER_CPU(bool, in_kernel_fpu);
static DEFINE_PER_CPU(unsigned int, euen_current);
+static inline void fpregs_lock(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
+ else
+ local_bh_disable();
+}
+
+static inline void fpregs_unlock(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
+ else
+ local_bh_enable();
+}
+
void kernel_fpu_begin(void)
{
unsigned int *euen_curr;
- preempt_disable();
+ if (!irqs_disabled())
+ fpregs_lock();
WARN_ON(this_cpu_read(in_kernel_fpu));
@@ -73,7 +90,8 @@ void kernel_fpu_end(void)
this_cpu_write(in_kernel_fpu, false);
- preempt_enable();
+ if (!irqs_disabled())
+ fpregs_unlock();
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index f86a4b838dd7..8ad098703488 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -479,8 +479,7 @@ static void handle_associated_event(struct cpu_hw_events *cpuc, int idx,
if (!loongarch_pmu_event_set_period(event, hwc, idx))
return;
- if (perf_event_overflow(event, data, regs))
- loongarch_pmu_disable_event(idx);
+ perf_event_overflow(event, data, regs);
}
static irqreturn_t pmu_handle_irq(int irq, void *dev)
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 6e58f65455c7..3582f591bab2 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -13,6 +13,7 @@
#include <linux/cpu.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/entry-common.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
@@ -34,6 +35,7 @@
#include <linux/nmi.h>
#include <asm/asm.h>
+#include <asm/asm-prototypes.h>
#include <asm/bootinfo.h>
#include <asm/cpu.h>
#include <asm/elf.h>
@@ -47,6 +49,7 @@
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/reg.h>
+#include <asm/switch_to.h>
#include <asm/unwind.h>
#include <asm/vdso.h>
@@ -63,8 +66,9 @@ EXPORT_SYMBOL(__stack_chk_guard);
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
-asmlinkage void ret_from_fork(void);
-asmlinkage void ret_from_kernel_thread(void);
+asmlinkage void restore_and_ret(void);
+asmlinkage void ret_from_fork_asm(void);
+asmlinkage void ret_from_kernel_thread_asm(void);
void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
{
@@ -138,6 +142,23 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
+asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev,
+ struct pt_regs *regs)
+{
+ schedule_tail(prev);
+ syscall_exit_to_user_mode(regs);
+}
+
+asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
+ struct pt_regs *regs,
+ int (*fn)(void *),
+ void *fn_arg)
+{
+ schedule_tail(prev);
+ fn(fn_arg);
+ syscall_exit_to_user_mode(regs);
+}
+
/*
* Copy architecture-specific thread state
*/
@@ -165,8 +186,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.reg03 = childksp;
p->thread.reg23 = (unsigned long)args->fn;
p->thread.reg24 = (unsigned long)args->fn_arg;
- p->thread.reg01 = (unsigned long)ret_from_kernel_thread;
- p->thread.sched_ra = (unsigned long)ret_from_kernel_thread;
+ p->thread.reg01 = (unsigned long)ret_from_kernel_thread_asm;
+ p->thread.sched_ra = (unsigned long)ret_from_kernel_thread_asm;
memset(childregs, 0, sizeof(struct pt_regs));
childregs->csr_euen = p->thread.csr_euen;
childregs->csr_crmd = p->thread.csr_crmd;
@@ -182,8 +203,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
childregs->regs[3] = usp;
p->thread.reg03 = (unsigned long) childregs;
- p->thread.reg01 = (unsigned long) ret_from_fork;
- p->thread.sched_ra = (unsigned long) ret_from_fork;
+ p->thread.reg01 = (unsigned long) ret_from_fork_asm;
+ p->thread.sched_ra = (unsigned long) ret_from_fork_asm;
/*
* New tasks lose permission to use the fpu. This accelerates context
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index e2d3bfeb6366..bc75a3a69fc8 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -111,7 +111,7 @@ static unsigned long __init get_loops_per_jiffy(void)
return lpj;
}
-static long init_offset __nosavedata;
+static long init_offset;
void save_counter(void)
{
diff --git a/arch/loongarch/kernel/uprobes.c b/arch/loongarch/kernel/uprobes.c
index 87abc7137b73..6022eb0f71db 100644
--- a/arch/loongarch/kernel/uprobes.c
+++ b/arch/loongarch/kernel/uprobes.c
@@ -42,7 +42,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
utask->autask.saved_trap_nr = current->thread.trap_nr;
current->thread.trap_nr = UPROBE_TRAP_NR;
instruction_pointer_set(regs, utask->xol_vaddr);
- user_enable_single_step(current);
return 0;
}
@@ -53,13 +52,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
current->thread.trap_nr = utask->autask.saved_trap_nr;
-
- if (auprobe->simulate)
- instruction_pointer_set(regs, auprobe->resume_era);
- else
- instruction_pointer_set(regs, utask->vaddr + LOONGARCH_INSN_SIZE);
-
- user_disable_single_step(current);
+ instruction_pointer_set(regs, utask->vaddr + LOONGARCH_INSN_SIZE);
return 0;
}
@@ -70,7 +63,6 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
current->thread.trap_nr = utask->autask.saved_trap_nr;
instruction_pointer_set(regs, utask->vaddr);
- user_disable_single_step(current);
}
bool arch_uprobe_xol_was_trapped(struct task_struct *t)
@@ -90,7 +82,6 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
insn.word = auprobe->insn[0];
arch_simulate_insn(insn, regs);
- auprobe->resume_era = regs->csr_era;
return true;
}
diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c
index c44ee4f32557..b37cd8537b45 100644
--- a/arch/loongarch/lib/crc32-loongarch.c
+++ b/arch/loongarch/lib/crc32-loongarch.c
@@ -26,7 +26,7 @@ do { \
#define CRC32(crc, value, size) _CRC32(crc, value, size, crc)
#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc)
-static DEFINE_STATIC_KEY_FALSE(have_crc32);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
{
@@ -114,7 +114,7 @@ static int __init crc32_loongarch_init(void)
static_branch_enable(&have_crc32);
return 0;
}
-arch_initcall(crc32_loongarch_init);
+subsys_initcall(crc32_loongarch_init);
static void __exit crc32_loongarch_exit(void)
{
diff --git a/arch/loongarch/power/hibernate.c b/arch/loongarch/power/hibernate.c
index 1e0590542f98..e7b7346592cb 100644
--- a/arch/loongarch/power/hibernate.c
+++ b/arch/loongarch/power/hibernate.c
@@ -2,6 +2,7 @@
#include <asm/fpu.h>
#include <asm/loongson.h>
#include <asm/sections.h>
+#include <asm/time.h>
#include <asm/tlbflush.h>
#include <linux/suspend.h>
@@ -14,6 +15,7 @@ struct pt_regs saved_regs;
void save_processor_state(void)
{
+ save_counter();
saved_crmd = csr_read32(LOONGARCH_CSR_CRMD);
saved_prmd = csr_read32(LOONGARCH_CSR_PRMD);
saved_euen = csr_read32(LOONGARCH_CSR_EUEN);
@@ -26,6 +28,7 @@ void save_processor_state(void)
void restore_processor_state(void)
{
+ sync_counter();
csr_write32(saved_crmd, LOONGARCH_CSR_CRMD);
csr_write32(saved_prmd, LOONGARCH_CSR_PRMD);
csr_write32(saved_euen, LOONGARCH_CSR_EUEN);
diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig
index 110279a64aa4..60767811e34a 100644
--- a/arch/m68k/configs/amcore_defconfig
+++ b/arch/m68k/configs/amcore_defconfig
@@ -2,7 +2,6 @@ CONFIG_LOCALVERSION="amcore-002"
CONFIG_DEFAULT_HOSTNAME="amcore"
CONFIG_SYSVIPC=y
# CONFIG_FHANDLE is not set
-# CONFIG_USELIB is not set
CONFIG_LOG_BUF_SHIFT=14
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_AIO is not set
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 31ecb8b7b9f1..6a644122d38e 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -551,7 +551,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -602,7 +602,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -621,8 +620,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -632,7 +629,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 1f57514624d5..2284c04b9b55 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -508,7 +508,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -559,7 +559,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -578,8 +577,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -589,7 +586,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 02db7a48e57e..7cf84fd14e78 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -528,7 +528,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -579,7 +579,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -598,8 +597,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -609,7 +606,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index f0e673cb17eb..ef269ea337a1 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -500,7 +500,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -551,7 +551,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -570,8 +569,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -581,7 +578,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index e8ca5a50b86d..fe427837fe6f 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -510,7 +510,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -561,7 +561,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -580,8 +579,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -591,7 +588,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index b3a270441bb1..4a0c7716b560 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -527,7 +527,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -578,7 +578,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -597,8 +596,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -608,7 +605,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index d215dba006ce..6d3b20e9d0b2 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -614,7 +614,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -665,7 +665,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -684,8 +683,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -695,7 +692,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index a888ed93ff82..b57d9234979b 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -500,7 +500,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -551,7 +551,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -570,8 +569,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -581,7 +578,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index b481782375f6..cf0f7f10ebc6 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -501,7 +501,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -552,7 +552,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -571,8 +570,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -582,7 +579,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 6eba743d8eb5..2205e7ae55e5 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -517,7 +517,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -568,7 +568,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -587,8 +586,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -598,7 +595,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 9bdbb418ffa8..0e22f6acf575 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -498,7 +498,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -549,7 +549,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -567,8 +566,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -578,7 +575,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index e1cf20fa5343..f63fd6ab3e68 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -498,7 +498,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
@@ -549,7 +549,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_PRIME_NUMBERS=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -568,8 +567,6 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_KSTRTOX=m
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
@@ -579,7 +576,6 @@ CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 0fba32552836..c7e8de0d34bb 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -484,7 +484,7 @@ static int hardware_proc_show(struct seq_file *m, void *v)
if (mach_get_model)
mach_get_model(model);
else
- strcpy(model, "Unknown m68k");
+ strscpy(model, "Unknown m68k");
seq_printf(m, "Model:\t\t%s\n", model);
for (mem = 0, i = 0; i < m68k_num_memory; i++)
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index e324410ef239..d26c7f4f8c36 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -793,7 +793,7 @@ static void __init mac_identify(void)
}
macintosh_config = mac_data_table;
- for (m = macintosh_config; m->ident != -1; m++) {
+ for (m = &mac_data_table[1]; m->ident != -1; m++) {
if (m->ident == model) {
macintosh_config = m;
break;
diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c
index 8ccf167c167e..e8c38aaf46a2 100644
--- a/arch/mips/ath25/ar2315.c
+++ b/arch/mips/ath25/ar2315.c
@@ -149,8 +149,8 @@ void __init ar2315_arch_init_irq(void)
ath25_irq_dispatch = ar2315_irq_dispatch;
- domain = irq_domain_add_linear(NULL, AR2315_MISC_IRQ_COUNT,
- &ar2315_misc_irq_domain_ops, NULL);
+ domain = irq_domain_create_linear(NULL, AR2315_MISC_IRQ_COUNT,
+ &ar2315_misc_irq_domain_ops, NULL);
if (!domain)
panic("Failed to add IRQ domain");
diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c
index cfa103518113..4a1d874be766 100644
--- a/arch/mips/ath25/ar5312.c
+++ b/arch/mips/ath25/ar5312.c
@@ -143,8 +143,8 @@ void __init ar5312_arch_init_irq(void)
ath25_irq_dispatch = ar5312_irq_dispatch;
- domain = irq_domain_add_linear(NULL, AR5312_MISC_IRQ_COUNT,
- &ar5312_misc_irq_domain_ops, NULL);
+ domain = irq_domain_create_linear(NULL, AR5312_MISC_IRQ_COUNT,
+ &ar5312_misc_irq_domain_ops, NULL);
if (!domain)
panic("Failed to add IRQ domain");
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index 450e979ef5d9..11f4aa6e80e9 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -23,6 +23,12 @@ config CAVIUM_OCTEON_CVMSEG_SIZE
legally range is from zero to 54 cache blocks (i.e. CVMSEG LM is
between zero and 6192 bytes).
+config CRYPTO_SHA256_OCTEON
+ tristate
+ default CRYPTO_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256
+ select CRYPTO_LIB_SHA256_GENERIC
+
endif # CPU_CAVIUM_OCTEON
if CAVIUM_OCTEON_SOC
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c
index 5ee4ade99b99..fbc84eb7fedf 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-md5.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c
@@ -19,22 +19,26 @@
* any later version.
*/
+#include <asm/octeon/octeon.h>
+#include <crypto/internal/hash.h>
#include <crypto/md5.h>
-#include <linux/init.h>
-#include <linux/types.h>
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
-#include <asm/byteorder.h>
-#include <asm/octeon/octeon.h>
-#include <crypto/internal/hash.h>
+#include <linux/unaligned.h>
#include "octeon-crypto.h"
+struct octeon_md5_state {
+ __le32 hash[MD5_HASH_WORDS];
+ u64 byte_count;
+};
+
/*
* We pass everything as 64-bit. OCTEON can handle misaligned data.
*/
-static void octeon_md5_store_hash(struct md5_state *ctx)
+static void octeon_md5_store_hash(struct octeon_md5_state *ctx)
{
u64 *hash = (u64 *)ctx->hash;
@@ -42,7 +46,7 @@ static void octeon_md5_store_hash(struct md5_state *ctx)
write_octeon_64bit_hash_dword(hash[1], 1);
}
-static void octeon_md5_read_hash(struct md5_state *ctx)
+static void octeon_md5_read_hash(struct octeon_md5_state *ctx)
{
u64 *hash = (u64 *)ctx->hash;
@@ -66,13 +70,12 @@ static void octeon_md5_transform(const void *_block)
static int octeon_md5_init(struct shash_desc *desc)
{
- struct md5_state *mctx = shash_desc_ctx(desc);
+ struct octeon_md5_state *mctx = shash_desc_ctx(desc);
- mctx->hash[0] = MD5_H0;
- mctx->hash[1] = MD5_H1;
- mctx->hash[2] = MD5_H2;
- mctx->hash[3] = MD5_H3;
- cpu_to_le32_array(mctx->hash, 4);
+ mctx->hash[0] = cpu_to_le32(MD5_H0);
+ mctx->hash[1] = cpu_to_le32(MD5_H1);
+ mctx->hash[2] = cpu_to_le32(MD5_H2);
+ mctx->hash[3] = cpu_to_le32(MD5_H3);
mctx->byte_count = 0;
return 0;
@@ -81,52 +84,38 @@ static int octeon_md5_init(struct shash_desc *desc)
static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct md5_state *mctx = shash_desc_ctx(desc);
- const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
+ struct octeon_md5_state *mctx = shash_desc_ctx(desc);
struct octeon_cop2_state state;
unsigned long flags;
mctx->byte_count += len;
-
- if (avail > len) {
- memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
- data, len);
- return 0;
- }
-
- memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data,
- avail);
-
flags = octeon_crypto_enable(&state);
octeon_md5_store_hash(mctx);
- octeon_md5_transform(mctx->block);
- data += avail;
- len -= avail;
-
- while (len >= sizeof(mctx->block)) {
+ do {
octeon_md5_transform(data);
- data += sizeof(mctx->block);
- len -= sizeof(mctx->block);
- }
+ data += MD5_HMAC_BLOCK_SIZE;
+ len -= MD5_HMAC_BLOCK_SIZE;
+ } while (len >= MD5_HMAC_BLOCK_SIZE);
octeon_md5_read_hash(mctx);
octeon_crypto_disable(&state, flags);
-
- memcpy(mctx->block, data, len);
-
- return 0;
+ mctx->byte_count -= len;
+ return len;
}
-static int octeon_md5_final(struct shash_desc *desc, u8 *out)
+static int octeon_md5_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int offset, u8 *out)
{
- struct md5_state *mctx = shash_desc_ctx(desc);
- const unsigned int offset = mctx->byte_count & 0x3f;
- char *p = (char *)mctx->block + offset;
+ struct octeon_md5_state *mctx = shash_desc_ctx(desc);
int padding = 56 - (offset + 1);
struct octeon_cop2_state state;
+ u32 block[MD5_BLOCK_WORDS];
unsigned long flags;
+ char *p;
+ p = memcpy(block, src, offset);
+ p += offset;
*p++ = 0x80;
flags = octeon_crypto_enable(&state);
@@ -134,39 +123,56 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
if (padding < 0) {
memset(p, 0x00, padding + sizeof(u64));
- octeon_md5_transform(mctx->block);
- p = (char *)mctx->block;
+ octeon_md5_transform(block);
+ p = (char *)block;
padding = 56;
}
memset(p, 0, padding);
- mctx->block[14] = mctx->byte_count << 3;
- mctx->block[15] = mctx->byte_count >> 29;
- cpu_to_le32_array(mctx->block + 14, 2);
- octeon_md5_transform(mctx->block);
+ mctx->byte_count += offset;
+ block[14] = mctx->byte_count << 3;
+ block[15] = mctx->byte_count >> 29;
+ cpu_to_le32_array(block + 14, 2);
+ octeon_md5_transform(block);
octeon_md5_read_hash(mctx);
octeon_crypto_disable(&state, flags);
+ memzero_explicit(block, sizeof(block));
memcpy(out, mctx->hash, sizeof(mctx->hash));
- memset(mctx, 0, sizeof(*mctx));
return 0;
}
static int octeon_md5_export(struct shash_desc *desc, void *out)
{
- struct md5_state *ctx = shash_desc_ctx(desc);
-
- memcpy(out, ctx, sizeof(*ctx));
+ struct octeon_md5_state *ctx = shash_desc_ctx(desc);
+ union {
+ u8 *u8;
+ u32 *u32;
+ u64 *u64;
+ } p = { .u8 = out };
+ int i;
+
+ for (i = 0; i < MD5_HASH_WORDS; i++)
+ put_unaligned(le32_to_cpu(ctx->hash[i]), p.u32++);
+ put_unaligned(ctx->byte_count, p.u64);
return 0;
}
static int octeon_md5_import(struct shash_desc *desc, const void *in)
{
- struct md5_state *ctx = shash_desc_ctx(desc);
-
- memcpy(ctx, in, sizeof(*ctx));
+ struct octeon_md5_state *ctx = shash_desc_ctx(desc);
+ union {
+ const u8 *u8;
+ const u32 *u32;
+ const u64 *u64;
+ } p = { .u8 = in };
+ int i;
+
+ for (i = 0; i < MD5_HASH_WORDS; i++)
+ ctx->hash[i] = cpu_to_le32(get_unaligned(p.u32++));
+ ctx->byte_count = get_unaligned(p.u64);
return 0;
}
@@ -174,15 +180,16 @@ static struct shash_alg alg = {
.digestsize = MD5_DIGEST_SIZE,
.init = octeon_md5_init,
.update = octeon_md5_update,
- .final = octeon_md5_final,
+ .finup = octeon_md5_finup,
.export = octeon_md5_export,
.import = octeon_md5_import,
- .descsize = sizeof(struct md5_state),
- .statesize = sizeof(struct md5_state),
+ .statesize = MD5_STATE_SIZE,
+ .descsize = sizeof(struct octeon_md5_state),
.base = {
.cra_name = "md5",
.cra_driver_name= "octeon-md5",
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
index 37a07b3c4568..e70f21a473da 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-sha1.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
@@ -13,15 +13,13 @@
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
*/
-#include <linux/mm.h>
+#include <asm/octeon/octeon.h>
+#include <crypto/internal/hash.h>
#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
-#include <linux/init.h>
-#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <asm/byteorder.h>
-#include <asm/octeon/octeon.h>
-#include <crypto/internal/hash.h>
#include "octeon-crypto.h"
@@ -58,49 +56,23 @@ static void octeon_sha1_read_hash(struct sha1_state *sctx)
memzero_explicit(&hash_tail.dword, sizeof(hash_tail.dword));
}
-static void octeon_sha1_transform(const void *_block)
+static void octeon_sha1_transform(struct sha1_state *sctx, const u8 *src,
+ int blocks)
{
- const u64 *block = _block;
-
- write_octeon_64bit_block_dword(block[0], 0);
- write_octeon_64bit_block_dword(block[1], 1);
- write_octeon_64bit_block_dword(block[2], 2);
- write_octeon_64bit_block_dword(block[3], 3);
- write_octeon_64bit_block_dword(block[4], 4);
- write_octeon_64bit_block_dword(block[5], 5);
- write_octeon_64bit_block_dword(block[6], 6);
- octeon_sha1_start(block[7]);
-}
-
-static void __octeon_sha1_update(struct sha1_state *sctx, const u8 *data,
- unsigned int len)
-{
- unsigned int partial;
- unsigned int done;
- const u8 *src;
-
- partial = sctx->count % SHA1_BLOCK_SIZE;
- sctx->count += len;
- done = 0;
- src = data;
-
- if ((partial + len) >= SHA1_BLOCK_SIZE) {
- if (partial) {
- done = -partial;
- memcpy(sctx->buffer + partial, data,
- done + SHA1_BLOCK_SIZE);
- src = sctx->buffer;
- }
-
- do {
- octeon_sha1_transform(src);
- done += SHA1_BLOCK_SIZE;
- src = data + done;
- } while (done + SHA1_BLOCK_SIZE <= len);
-
- partial = 0;
- }
- memcpy(sctx->buffer + partial, src, len - done);
+ do {
+ const u64 *block = (const u64 *)src;
+
+ write_octeon_64bit_block_dword(block[0], 0);
+ write_octeon_64bit_block_dword(block[1], 1);
+ write_octeon_64bit_block_dword(block[2], 2);
+ write_octeon_64bit_block_dword(block[3], 3);
+ write_octeon_64bit_block_dword(block[4], 4);
+ write_octeon_64bit_block_dword(block[5], 5);
+ write_octeon_64bit_block_dword(block[6], 6);
+ octeon_sha1_start(block[7]);
+
+ src += SHA1_BLOCK_SIZE;
+ } while (--blocks);
}
static int octeon_sha1_update(struct shash_desc *desc, const u8 *data,
@@ -109,95 +81,47 @@ static int octeon_sha1_update(struct shash_desc *desc, const u8 *data,
struct sha1_state *sctx = shash_desc_ctx(desc);
struct octeon_cop2_state state;
unsigned long flags;
-
- /*
- * Small updates never reach the crypto engine, so the generic sha1 is
- * faster because of the heavyweight octeon_crypto_enable() /
- * octeon_crypto_disable().
- */
- if ((sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
- return crypto_sha1_update(desc, data, len);
+ int remain;
flags = octeon_crypto_enable(&state);
octeon_sha1_store_hash(sctx);
- __octeon_sha1_update(sctx, data, len);
+ remain = sha1_base_do_update_blocks(desc, data, len,
+ octeon_sha1_transform);
octeon_sha1_read_hash(sctx);
octeon_crypto_disable(&state, flags);
-
- return 0;
+ return remain;
}
-static int octeon_sha1_final(struct shash_desc *desc, u8 *out)
+static int octeon_sha1_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
- static const u8 padding[64] = { 0x80, };
struct octeon_cop2_state state;
- __be32 *dst = (__be32 *)out;
- unsigned int pad_len;
unsigned long flags;
- unsigned int index;
- __be64 bits;
- int i;
-
- /* Save number of bits. */
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64. */
- index = sctx->count & 0x3f;
- pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
flags = octeon_crypto_enable(&state);
octeon_sha1_store_hash(sctx);
- __octeon_sha1_update(sctx, padding, pad_len);
-
- /* Append length (before padding). */
- __octeon_sha1_update(sctx, (const u8 *)&bits, sizeof(bits));
+ sha1_base_do_finup(desc, src, len, octeon_sha1_transform);
octeon_sha1_read_hash(sctx);
octeon_crypto_disable(&state, flags);
-
- /* Store state in digest */
- for (i = 0; i < 5; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Zeroize sensitive information. */
- memset(sctx, 0, sizeof(*sctx));
-
- return 0;
-}
-
-static int octeon_sha1_export(struct shash_desc *desc, void *out)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
- return 0;
-}
-
-static int octeon_sha1_import(struct shash_desc *desc, const void *in)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
- return 0;
+ return sha1_base_finish(desc, out);
}
static struct shash_alg octeon_sha1_alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = octeon_sha1_update,
- .final = octeon_sha1_final,
- .export = octeon_sha1_export,
- .import = octeon_sha1_import,
- .descsize = sizeof(struct sha1_state),
- .statesize = sizeof(struct sha1_state),
+ .finup = octeon_sha1_finup,
+ .descsize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name= "octeon-sha1",
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
index 435e4a6e7f13..f93faaf1f4af 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-sha256.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
@@ -1,8 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Cryptographic API.
- *
- * SHA-224 and SHA-256 Secure Hash Algorithm.
+ * SHA-256 Secure Hash Algorithm.
*
* Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
*
@@ -14,15 +12,10 @@
* SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
*/
-#include <linux/mm.h>
-#include <crypto/sha2.h>
-#include <crypto/sha256_base.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/module.h>
-#include <asm/byteorder.h>
#include <asm/octeon/octeon.h>
-#include <crypto/internal/hash.h>
+#include <crypto/internal/sha2.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include "octeon-crypto.h"
@@ -30,212 +23,51 @@
* We pass everything as 64-bit. OCTEON can handle misaligned data.
*/
-static void octeon_sha256_store_hash(struct sha256_state *sctx)
-{
- u64 *hash = (u64 *)sctx->state;
-
- write_octeon_64bit_hash_dword(hash[0], 0);
- write_octeon_64bit_hash_dword(hash[1], 1);
- write_octeon_64bit_hash_dword(hash[2], 2);
- write_octeon_64bit_hash_dword(hash[3], 3);
-}
-
-static void octeon_sha256_read_hash(struct sha256_state *sctx)
-{
- u64 *hash = (u64 *)sctx->state;
-
- hash[0] = read_octeon_64bit_hash_dword(0);
- hash[1] = read_octeon_64bit_hash_dword(1);
- hash[2] = read_octeon_64bit_hash_dword(2);
- hash[3] = read_octeon_64bit_hash_dword(3);
-}
-
-static void octeon_sha256_transform(const void *_block)
-{
- const u64 *block = _block;
-
- write_octeon_64bit_block_dword(block[0], 0);
- write_octeon_64bit_block_dword(block[1], 1);
- write_octeon_64bit_block_dword(block[2], 2);
- write_octeon_64bit_block_dword(block[3], 3);
- write_octeon_64bit_block_dword(block[4], 4);
- write_octeon_64bit_block_dword(block[5], 5);
- write_octeon_64bit_block_dword(block[6], 6);
- octeon_sha256_start(block[7]);
-}
-
-static void __octeon_sha256_update(struct sha256_state *sctx, const u8 *data,
- unsigned int len)
-{
- unsigned int partial;
- unsigned int done;
- const u8 *src;
-
- partial = sctx->count % SHA256_BLOCK_SIZE;
- sctx->count += len;
- done = 0;
- src = data;
-
- if ((partial + len) >= SHA256_BLOCK_SIZE) {
- if (partial) {
- done = -partial;
- memcpy(sctx->buf + partial, data,
- done + SHA256_BLOCK_SIZE);
- src = sctx->buf;
- }
-
- do {
- octeon_sha256_transform(src);
- done += SHA256_BLOCK_SIZE;
- src = data + done;
- } while (done + SHA256_BLOCK_SIZE <= len);
-
- partial = 0;
- }
- memcpy(sctx->buf + partial, src, len - done);
-}
-
-static int octeon_sha256_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- struct octeon_cop2_state state;
- unsigned long flags;
-
- /*
- * Small updates never reach the crypto engine, so the generic sha256 is
- * faster because of the heavyweight octeon_crypto_enable() /
- * octeon_crypto_disable().
- */
- if ((sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
- return crypto_sha256_update(desc, data, len);
-
- flags = octeon_crypto_enable(&state);
- octeon_sha256_store_hash(sctx);
-
- __octeon_sha256_update(sctx, data, len);
-
- octeon_sha256_read_hash(sctx);
- octeon_crypto_disable(&state, flags);
-
- return 0;
-}
-
-static int octeon_sha256_final(struct shash_desc *desc, u8 *out)
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- static const u8 padding[64] = { 0x80, };
- struct octeon_cop2_state state;
- __be32 *dst = (__be32 *)out;
- unsigned int pad_len;
+ struct octeon_cop2_state cop2_state;
+ u64 *state64 = (u64 *)state;
unsigned long flags;
- unsigned int index;
- __be64 bits;
- int i;
-
- /* Save number of bits. */
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64. */
- index = sctx->count & 0x3f;
- pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
-
- flags = octeon_crypto_enable(&state);
- octeon_sha256_store_hash(sctx);
-
- __octeon_sha256_update(sctx, padding, pad_len);
-
- /* Append length (before padding). */
- __octeon_sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
-
- octeon_sha256_read_hash(sctx);
- octeon_crypto_disable(&state, flags);
-
- /* Store state in digest */
- for (i = 0; i < 8; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Zeroize sensitive information. */
- memset(sctx, 0, sizeof(*sctx));
-
- return 0;
-}
-
-static int octeon_sha224_final(struct shash_desc *desc, u8 *hash)
-{
- u8 D[SHA256_DIGEST_SIZE];
-
- octeon_sha256_final(desc, D);
- memcpy(hash, D, SHA224_DIGEST_SIZE);
- memzero_explicit(D, SHA256_DIGEST_SIZE);
-
- return 0;
-}
-
-static int octeon_sha256_export(struct shash_desc *desc, void *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
- return 0;
-}
-
-static int octeon_sha256_import(struct shash_desc *desc, const void *in)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
- return 0;
-}
-
-static struct shash_alg octeon_sha256_algs[2] = { {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = octeon_sha256_update,
- .final = octeon_sha256_final,
- .export = octeon_sha256_export,
- .import = octeon_sha256_import,
- .descsize = sizeof(struct sha256_state),
- .statesize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name= "octeon-sha256",
- .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = octeon_sha256_update,
- .final = octeon_sha224_final,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name= "octeon-sha224",
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
-
-static int __init octeon_sha256_mod_init(void)
-{
if (!octeon_has_crypto())
- return -ENOTSUPP;
- return crypto_register_shashes(octeon_sha256_algs,
- ARRAY_SIZE(octeon_sha256_algs));
+ return sha256_blocks_generic(state, data, nblocks);
+
+ flags = octeon_crypto_enable(&cop2_state);
+ write_octeon_64bit_hash_dword(state64[0], 0);
+ write_octeon_64bit_hash_dword(state64[1], 1);
+ write_octeon_64bit_hash_dword(state64[2], 2);
+ write_octeon_64bit_hash_dword(state64[3], 3);
+
+ do {
+ const u64 *block = (const u64 *)data;
+
+ write_octeon_64bit_block_dword(block[0], 0);
+ write_octeon_64bit_block_dword(block[1], 1);
+ write_octeon_64bit_block_dword(block[2], 2);
+ write_octeon_64bit_block_dword(block[3], 3);
+ write_octeon_64bit_block_dword(block[4], 4);
+ write_octeon_64bit_block_dword(block[5], 5);
+ write_octeon_64bit_block_dword(block[6], 6);
+ octeon_sha256_start(block[7]);
+
+ data += SHA256_BLOCK_SIZE;
+ } while (--nblocks);
+
+ state64[0] = read_octeon_64bit_hash_dword(0);
+ state64[1] = read_octeon_64bit_hash_dword(1);
+ state64[2] = read_octeon_64bit_hash_dword(2);
+ state64[3] = read_octeon_64bit_hash_dword(3);
+ octeon_crypto_disable(&cop2_state, flags);
}
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
-static void __exit octeon_sha256_mod_fini(void)
+bool sha256_is_arch_optimized(void)
{
- crypto_unregister_shashes(octeon_sha256_algs,
- ARRAY_SIZE(octeon_sha256_algs));
+ return octeon_has_crypto();
}
-
-module_init(octeon_sha256_mod_init);
-module_exit(octeon_sha256_mod_fini);
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm (OCTEON)");
+MODULE_DESCRIPTION("SHA-256 Secure Hash Algorithm (OCTEON)");
MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
index 2dee9354e33f..215311053db3 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-sha512.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
@@ -13,15 +13,12 @@
* Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
*/
-#include <linux/mm.h>
+#include <asm/octeon/octeon.h>
+#include <crypto/internal/hash.h>
#include <crypto/sha2.h>
#include <crypto/sha512_base.h>
-#include <linux/init.h>
-#include <linux/types.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <asm/byteorder.h>
-#include <asm/octeon/octeon.h>
-#include <crypto/internal/hash.h>
#include "octeon-crypto.h"
@@ -53,60 +50,31 @@ static void octeon_sha512_read_hash(struct sha512_state *sctx)
sctx->state[7] = read_octeon_64bit_hash_sha512(7);
}
-static void octeon_sha512_transform(const void *_block)
+static void octeon_sha512_transform(struct sha512_state *sctx,
+ const u8 *src, int blocks)
{
- const u64 *block = _block;
-
- write_octeon_64bit_block_sha512(block[0], 0);
- write_octeon_64bit_block_sha512(block[1], 1);
- write_octeon_64bit_block_sha512(block[2], 2);
- write_octeon_64bit_block_sha512(block[3], 3);
- write_octeon_64bit_block_sha512(block[4], 4);
- write_octeon_64bit_block_sha512(block[5], 5);
- write_octeon_64bit_block_sha512(block[6], 6);
- write_octeon_64bit_block_sha512(block[7], 7);
- write_octeon_64bit_block_sha512(block[8], 8);
- write_octeon_64bit_block_sha512(block[9], 9);
- write_octeon_64bit_block_sha512(block[10], 10);
- write_octeon_64bit_block_sha512(block[11], 11);
- write_octeon_64bit_block_sha512(block[12], 12);
- write_octeon_64bit_block_sha512(block[13], 13);
- write_octeon_64bit_block_sha512(block[14], 14);
- octeon_sha512_start(block[15]);
-}
-
-static void __octeon_sha512_update(struct sha512_state *sctx, const u8 *data,
- unsigned int len)
-{
- unsigned int part_len;
- unsigned int index;
- unsigned int i;
-
- /* Compute number of bytes mod 128. */
- index = sctx->count[0] % SHA512_BLOCK_SIZE;
-
- /* Update number of bytes. */
- if ((sctx->count[0] += len) < len)
- sctx->count[1]++;
-
- part_len = SHA512_BLOCK_SIZE - index;
-
- /* Transform as many times as possible. */
- if (len >= part_len) {
- memcpy(&sctx->buf[index], data, part_len);
- octeon_sha512_transform(sctx->buf);
-
- for (i = part_len; i + SHA512_BLOCK_SIZE <= len;
- i += SHA512_BLOCK_SIZE)
- octeon_sha512_transform(&data[i]);
-
- index = 0;
- } else {
- i = 0;
- }
-
- /* Buffer remaining input. */
- memcpy(&sctx->buf[index], &data[i], len - i);
+ do {
+ const u64 *block = (const u64 *)src;
+
+ write_octeon_64bit_block_sha512(block[0], 0);
+ write_octeon_64bit_block_sha512(block[1], 1);
+ write_octeon_64bit_block_sha512(block[2], 2);
+ write_octeon_64bit_block_sha512(block[3], 3);
+ write_octeon_64bit_block_sha512(block[4], 4);
+ write_octeon_64bit_block_sha512(block[5], 5);
+ write_octeon_64bit_block_sha512(block[6], 6);
+ write_octeon_64bit_block_sha512(block[7], 7);
+ write_octeon_64bit_block_sha512(block[8], 8);
+ write_octeon_64bit_block_sha512(block[9], 9);
+ write_octeon_64bit_block_sha512(block[10], 10);
+ write_octeon_64bit_block_sha512(block[11], 11);
+ write_octeon_64bit_block_sha512(block[12], 12);
+ write_octeon_64bit_block_sha512(block[13], 13);
+ write_octeon_64bit_block_sha512(block[14], 14);
+ octeon_sha512_start(block[15]);
+
+ src += SHA512_BLOCK_SIZE;
+ } while (--blocks);
}
static int octeon_sha512_update(struct shash_desc *desc, const u8 *data,
@@ -115,89 +83,48 @@ static int octeon_sha512_update(struct shash_desc *desc, const u8 *data,
struct sha512_state *sctx = shash_desc_ctx(desc);
struct octeon_cop2_state state;
unsigned long flags;
-
- /*
- * Small updates never reach the crypto engine, so the generic sha512 is
- * faster because of the heavyweight octeon_crypto_enable() /
- * octeon_crypto_disable().
- */
- if ((sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
- return crypto_sha512_update(desc, data, len);
+ int remain;
flags = octeon_crypto_enable(&state);
octeon_sha512_store_hash(sctx);
- __octeon_sha512_update(sctx, data, len);
+ remain = sha512_base_do_update_blocks(desc, data, len,
+ octeon_sha512_transform);
octeon_sha512_read_hash(sctx);
octeon_crypto_disable(&state, flags);
-
- return 0;
+ return remain;
}
-static int octeon_sha512_final(struct shash_desc *desc, u8 *hash)
+static int octeon_sha512_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *hash)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
- static u8 padding[128] = { 0x80, };
struct octeon_cop2_state state;
- __be64 *dst = (__be64 *)hash;
- unsigned int pad_len;
unsigned long flags;
- unsigned int index;
- __be64 bits[2];
- int i;
-
- /* Save number of bits. */
- bits[1] = cpu_to_be64(sctx->count[0] << 3);
- bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
-
- /* Pad out to 112 mod 128. */
- index = sctx->count[0] & 0x7f;
- pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
flags = octeon_crypto_enable(&state);
octeon_sha512_store_hash(sctx);
- __octeon_sha512_update(sctx, padding, pad_len);
-
- /* Append length (before padding). */
- __octeon_sha512_update(sctx, (const u8 *)bits, sizeof(bits));
+ sha512_base_do_finup(desc, src, len, octeon_sha512_transform);
octeon_sha512_read_hash(sctx);
octeon_crypto_disable(&state, flags);
-
- /* Store state in digest. */
- for (i = 0; i < 8; i++)
- dst[i] = cpu_to_be64(sctx->state[i]);
-
- /* Zeroize sensitive information. */
- memset(sctx, 0, sizeof(struct sha512_state));
-
- return 0;
-}
-
-static int octeon_sha384_final(struct shash_desc *desc, u8 *hash)
-{
- u8 D[64];
-
- octeon_sha512_final(desc, D);
-
- memcpy(hash, D, 48);
- memzero_explicit(D, 64);
-
- return 0;
+ return sha512_base_finish(desc, hash);
}
static struct shash_alg octeon_sha512_algs[2] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = octeon_sha512_update,
- .final = octeon_sha512_final,
- .descsize = sizeof(struct sha512_state),
+ .finup = octeon_sha512_finup,
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name= "octeon-sha512",
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -205,12 +132,14 @@ static struct shash_alg octeon_sha512_algs[2] = { {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = octeon_sha512_update,
- .final = octeon_sha384_final,
- .descsize = sizeof(struct sha512_state),
+ .finup = octeon_sha512_finup,
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name= "octeon-sha384",
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index e6b4d9c0c169..5c3de175ef5b 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -1503,8 +1503,8 @@ static int __init octeon_irq_init_ciu(
/* Mips internal */
octeon_irq_init_core();
- ciu_domain = irq_domain_add_tree(
- ciu_node, &octeon_irq_domain_ciu_ops, dd);
+ ciu_domain = irq_domain_create_tree(of_fwnode_handle(ciu_node), &octeon_irq_domain_ciu_ops,
+ dd);
irq_set_default_domain(ciu_domain);
/* CIU_0 */
@@ -1637,8 +1637,8 @@ static int __init octeon_irq_init_gpio(
if (gpiod) {
/* gpio domain host_data is the base hwirq number. */
gpiod->base_hwirq = base_hwirq;
- irq_domain_add_linear(
- gpio_node, 16, &octeon_irq_domain_gpio_ops, gpiod);
+ irq_domain_create_linear(of_fwnode_handle(gpio_node), 16,
+ &octeon_irq_domain_gpio_ops, gpiod);
} else {
pr_warn("Cannot allocate memory for GPIO irq_domain.\n");
return -ENOMEM;
@@ -2074,8 +2074,8 @@ static int __init octeon_irq_init_ciu2(
/* Mips internal */
octeon_irq_init_core();
- ciu_domain = irq_domain_add_tree(
- ciu_node, &octeon_irq_domain_ciu2_ops, NULL);
+ ciu_domain = irq_domain_create_tree(of_fwnode_handle(ciu_node), &octeon_irq_domain_ciu2_ops,
+ NULL);
irq_set_default_domain(ciu_domain);
/* CUI2 */
@@ -2331,11 +2331,12 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
}
host_data->max_bits = val;
- cib_domain = irq_domain_add_linear(ciu_node, host_data->max_bits,
- &octeon_irq_domain_cib_ops,
- host_data);
+ cib_domain = irq_domain_create_linear(of_fwnode_handle(ciu_node),
+ host_data->max_bits,
+ &octeon_irq_domain_cib_ops,
+ host_data);
if (!cib_domain) {
- pr_err("ERROR: Couldn't irq_domain_add_linear()\n");
+ pr_err("ERROR: Couldn't irq_domain_create_linear()\n");
return -ENOMEM;
}
@@ -2918,8 +2919,8 @@ static int __init octeon_irq_init_ciu3(struct device_node *ciu_node,
* Initialize all domains to use the default domain. Specific major
* blocks will overwrite the default domain as needed.
*/
- domain = irq_domain_add_tree(ciu_node, &octeon_dflt_domain_ciu3_ops,
- ciu3_info);
+ domain = irq_domain_create_tree(of_fwnode_handle(ciu_node), &octeon_dflt_domain_ciu3_ops,
+ ciu3_info);
for (i = 0; i < MAX_CIU3_DOMAINS; i++)
ciu3_info->domain[i] = domain;
diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig
index f523ee6f25bf..88ae0aa85364 100644
--- a/arch/mips/configs/cavium_octeon_defconfig
+++ b/arch/mips/configs/cavium_octeon_defconfig
@@ -157,7 +157,6 @@ CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MD5_OCTEON=y
CONFIG_CRYPTO_SHA1_OCTEON=m
-CONFIG_CRYPTO_SHA256_OCTEON=m
CONFIG_CRYPTO_SHA512_OCTEON=m
CONFIG_CRYPTO_DES=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig
index 9655567614aa..85a4472cb058 100644
--- a/arch/mips/configs/decstation_64_defconfig
+++ b/arch/mips/configs/decstation_64_defconfig
@@ -168,7 +168,6 @@ CONFIG_NLS_ISO8859_14=m
CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_UTF8=m
CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig
index 1539fe8eb34d..a3b2c8da2dde 100644
--- a/arch/mips/configs/decstation_defconfig
+++ b/arch/mips/configs/decstation_defconfig
@@ -163,7 +163,6 @@ CONFIG_NLS_ISO8859_14=m
CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_UTF8=m
CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig
index 58c36720c94a..a476717b8a6a 100644
--- a/arch/mips/configs/decstation_r4k_defconfig
+++ b/arch/mips/configs/decstation_r4k_defconfig
@@ -163,7 +163,6 @@ CONFIG_NLS_ISO8859_14=m
CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_UTF8=m
CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
diff --git a/arch/mips/configs/gcw0_defconfig b/arch/mips/configs/gcw0_defconfig
index bc1ef66e3999..8b7ad877e07a 100644
--- a/arch/mips/configs/gcw0_defconfig
+++ b/arch/mips/configs/gcw0_defconfig
@@ -13,7 +13,6 @@ CONFIG_MIPS_CMDLINE_DTB_EXTEND=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_BOUNCE is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
index 12f3eed8a946..48c8feec958f 100644
--- a/arch/mips/configs/gpr_defconfig
+++ b/arch/mips/configs/gpr_defconfig
@@ -273,7 +273,7 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_850=y
CONFIG_NLS_ISO8859_1=y
CONFIG_CRYPTO_AUTHENC=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig
index e0040110a3ee..6db21e498faa 100644
--- a/arch/mips/configs/ip28_defconfig
+++ b/arch/mips/configs/ip28_defconfig
@@ -60,6 +60,5 @@ CONFIG_TMPFS_POSIX_ACL=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_ROOT_NFS=y
-CONFIG_CRYPTO_MANAGER=y
# CONFIG_CRYPTO_HW is not set
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 71d6340497c9..5038a27d035f 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -297,7 +297,7 @@ CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=y
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index 06b7a0b97eca..cbf9c35a6177 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -662,7 +662,7 @@ CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MD5=y
diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig
index 42b161d587c7..9fb114ef5e2d 100644
--- a/arch/mips/configs/rb532_defconfig
+++ b/arch/mips/configs/rb532_defconfig
@@ -153,6 +153,6 @@ CONFIG_JFFS2_FS=y
CONFIG_JFFS2_SUMMARY=y
CONFIG_JFFS2_COMPRESSION_OPTIONS=y
CONFIG_SQUASHFS=y
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
# CONFIG_CRYPTO_HW is not set
CONFIG_STRIP_ASM_SYMS=y
diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig
index 545fc0e12422..6bf073ae7613 100644
--- a/arch/mips/crypto/Kconfig
+++ b/arch/mips/crypto/Kconfig
@@ -2,17 +2,6 @@
menu "Accelerated Cryptographic Algorithms for CPU (mips)"
-config CRYPTO_POLY1305_MIPS
- tristate
- depends on MIPS
- select CRYPTO_HASH
- select CRYPTO_ARCH_HAVE_LIB_POLY1305
- default CRYPTO_LIB_POLY1305_INTERNAL
- help
- Poly1305 authenticator algorithm (RFC7539)
-
- Architecture: mips
-
config CRYPTO_MD5_OCTEON
tristate "Digests: MD5 (OCTEON)"
depends on CPU_CAVIUM_OCTEON
@@ -33,16 +22,6 @@ config CRYPTO_SHA1_OCTEON
Architecture: mips OCTEON
-config CRYPTO_SHA256_OCTEON
- tristate "Hash functions: SHA-224 and SHA-256 (OCTEON)"
- depends on CPU_CAVIUM_OCTEON
- select CRYPTO_SHA256
- select CRYPTO_HASH
- help
- SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
- Architecture: mips OCTEON using crypto instructions, when available
-
config CRYPTO_SHA512_OCTEON
tristate "Hash functions: SHA-384 and SHA-512 (OCTEON)"
depends on CPU_CAVIUM_OCTEON
@@ -53,16 +32,4 @@ config CRYPTO_SHA512_OCTEON
Architecture: mips OCTEON using crypto instructions, when available
-config CRYPTO_CHACHA_MIPS
- tristate
- depends on CPU_MIPS32_R2
- select CRYPTO_SKCIPHER
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
- default CRYPTO_LIB_CHACHA_INTERNAL
- help
- Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
- stream cipher algorithms
-
- Architecture: MIPS32r2
-
endmenu
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
index fddc88281412..5adb631a69c1 100644
--- a/arch/mips/crypto/Makefile
+++ b/arch/mips/crypto/Makefile
@@ -3,20 +3,3 @@
# Makefile for MIPS crypto files..
#
-obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
-chacha-mips-y := chacha-core.o chacha-glue.o
-AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
-
-obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
-poly1305-mips-y := poly1305-core.o poly1305-glue.o
-
-perlasm-flavour-$(CONFIG_32BIT) := o32
-perlasm-flavour-$(CONFIG_64BIT) := 64
-
-quiet_cmd_perlasm = PERLASM $@
- cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
-
-$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE
- $(call if_changed,perlasm)
-
-targets += poly1305-core.S
diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c
deleted file mode 100644
index f6fc2e1079a1..000000000000
--- a/arch/mips/crypto/chacha-glue.c
+++ /dev/null
@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * MIPS accelerated ChaCha and XChaCha stream ciphers,
- * including ChaCha20 (RFC7539)
- *
- * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- */
-
-#include <asm/byteorder.h>
-#include <crypto/algapi.h>
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds);
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds);
-EXPORT_SYMBOL(hchacha_block_arch);
-
-static int chacha_mips_stream_xor(struct skcipher_request *req,
- const struct chacha_ctx *ctx, const u8 *iv)
-{
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- chacha_init(state, ctx->key, iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, ctx->nrounds);
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static int chacha_mips(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return chacha_mips_stream_xor(req, ctx, req->iv);
-}
-
-static int xchacha_mips(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct chacha_ctx subctx;
- u32 state[16];
- u8 real_iv[16];
-
- chacha_init(state, ctx->key, req->iv);
-
- hchacha_block(state, subctx.key, ctx->nrounds);
- subctx.nrounds = ctx->nrounds;
-
- memcpy(&real_iv[0], req->iv + 24, 8);
- memcpy(&real_iv[8], req->iv + 16, 8);
- return chacha_mips_stream_xor(req, &subctx, real_iv);
-}
-
-static struct skcipher_alg algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-mips",
- .base.cra_priority = 200,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = chacha_mips,
- .decrypt = chacha_mips,
- }, {
- .base.cra_name = "xchacha20",
- .base.cra_driver_name = "xchacha20-mips",
- .base.cra_priority = 200,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = xchacha_mips,
- .decrypt = xchacha_mips,
- }, {
- .base.cra_name = "xchacha12",
- .base.cra_driver_name = "xchacha12-mips",
- .base.cra_priority = 200,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha12_setkey,
- .encrypt = xchacha_mips,
- .decrypt = xchacha_mips,
- }
-};
-
-static int __init chacha_simd_mod_init(void)
-{
- return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
- crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
-}
-
-static void __exit chacha_simd_mod_fini(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER))
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-module_init(chacha_simd_mod_init);
-module_exit(chacha_simd_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-mips");
-MODULE_ALIAS_CRYPTO("xchacha20");
-MODULE_ALIAS_CRYPTO("xchacha20-mips");
-MODULE_ALIAS_CRYPTO("xchacha12");
-MODULE_ALIAS_CRYPTO("xchacha12-mips");
diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
deleted file mode 100644
index c03ad0bbe69c..000000000000
--- a/arch/mips/crypto/poly1305-glue.c
+++ /dev/null
@@ -1,192 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS
- *
- * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/poly1305.h>
-#include <linux/cpufeature.h>
-#include <linux/crypto.h>
-#include <linux/module.h>
-
-asmlinkage void poly1305_init_mips(void *state, const u8 *key);
-asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
-asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce);
-
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
-{
- poly1305_init_mips(&dctx->h, key);
- dctx->s[0] = get_unaligned_le32(key + 16);
- dctx->s[1] = get_unaligned_le32(key + 20);
- dctx->s[2] = get_unaligned_le32(key + 24);
- dctx->s[3] = get_unaligned_le32(key + 28);
- dctx->buflen = 0;
-}
-EXPORT_SYMBOL(poly1305_init_arch);
-
-static int mips_poly1305_init(struct shash_desc *desc)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- dctx->buflen = 0;
- dctx->rset = 0;
- dctx->sset = false;
-
- return 0;
-}
-
-static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
- u32 len, u32 hibit)
-{
- if (unlikely(!dctx->sset)) {
- if (!dctx->rset) {
- poly1305_init_mips(&dctx->h, src);
- src += POLY1305_BLOCK_SIZE;
- len -= POLY1305_BLOCK_SIZE;
- dctx->rset = 1;
- }
- if (len >= POLY1305_BLOCK_SIZE) {
- dctx->s[0] = get_unaligned_le32(src + 0);
- dctx->s[1] = get_unaligned_le32(src + 4);
- dctx->s[2] = get_unaligned_le32(src + 8);
- dctx->s[3] = get_unaligned_le32(src + 12);
- src += POLY1305_BLOCK_SIZE;
- len -= POLY1305_BLOCK_SIZE;
- dctx->sset = true;
- }
- if (len < POLY1305_BLOCK_SIZE)
- return;
- }
-
- len &= ~(POLY1305_BLOCK_SIZE - 1);
-
- poly1305_blocks_mips(&dctx->h, src, len, hibit);
-}
-
-static int mips_poly1305_update(struct shash_desc *desc, const u8 *src,
- unsigned int len)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (unlikely(dctx->buflen)) {
- u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
-
- memcpy(dctx->buf + dctx->buflen, src, bytes);
- src += bytes;
- len -= bytes;
- dctx->buflen += bytes;
-
- if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1);
- dctx->buflen = 0;
- }
- }
-
- if (likely(len >= POLY1305_BLOCK_SIZE)) {
- mips_poly1305_blocks(dctx, src, len, 1);
- src += round_down(len, POLY1305_BLOCK_SIZE);
- len %= POLY1305_BLOCK_SIZE;
- }
-
- if (unlikely(len)) {
- dctx->buflen = len;
- memcpy(dctx->buf, src, len);
- }
- return 0;
-}
-
-void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
- unsigned int nbytes)
-{
- if (unlikely(dctx->buflen)) {
- u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
-
- memcpy(dctx->buf + dctx->buflen, src, bytes);
- src += bytes;
- nbytes -= bytes;
- dctx->buflen += bytes;
-
- if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- poly1305_blocks_mips(&dctx->h, dctx->buf,
- POLY1305_BLOCK_SIZE, 1);
- dctx->buflen = 0;
- }
- }
-
- if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
- unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
-
- poly1305_blocks_mips(&dctx->h, src, len, 1);
- src += len;
- nbytes %= POLY1305_BLOCK_SIZE;
- }
-
- if (unlikely(nbytes)) {
- dctx->buflen = nbytes;
- memcpy(dctx->buf, src, nbytes);
- }
-}
-EXPORT_SYMBOL(poly1305_update_arch);
-
-void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
-{
- if (unlikely(dctx->buflen)) {
- dctx->buf[dctx->buflen++] = 1;
- memset(dctx->buf + dctx->buflen, 0,
- POLY1305_BLOCK_SIZE - dctx->buflen);
- poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
- }
-
- poly1305_emit_mips(&dctx->h, dst, dctx->s);
- *dctx = (struct poly1305_desc_ctx){};
-}
-EXPORT_SYMBOL(poly1305_final_arch);
-
-static int mips_poly1305_final(struct shash_desc *desc, u8 *dst)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (unlikely(!dctx->sset))
- return -ENOKEY;
-
- poly1305_final_arch(dctx, dst);
- return 0;
-}
-
-static struct shash_alg mips_poly1305_alg = {
- .init = mips_poly1305_init,
- .update = mips_poly1305_update,
- .final = mips_poly1305_final,
- .digestsize = POLY1305_DIGEST_SIZE,
- .descsize = sizeof(struct poly1305_desc_ctx),
-
- .base.cra_name = "poly1305",
- .base.cra_driver_name = "poly1305-mips",
- .base.cra_priority = 200,
- .base.cra_blocksize = POLY1305_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-};
-
-static int __init mips_poly1305_mod_init(void)
-{
- return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
- crypto_register_shash(&mips_poly1305_alg) : 0;
-}
-
-static void __exit mips_poly1305_mod_exit(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
- crypto_unregister_shash(&mips_poly1305_alg);
-}
-
-module_init(mips_poly1305_mod_init);
-module_exit(mips_poly1305_mod_exit);
-
-MODULE_DESCRIPTION("Poly1305 transform (MIPS accelerated");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("poly1305");
-MODULE_ALIAS_CRYPTO("poly1305-mips");
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index 4724a563c5bf..43a09f0dd3ff 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -36,15 +36,6 @@ enum sock_type {
SOCK_PACKET = 10,
};
-#define SOCK_MAX (SOCK_PACKET + 1)
-/* Mask which covers at least up to SOCK_MASK-1. The
- * * remaining bits are used as flags. */
-#define SOCK_TYPE_MASK 0xf
-
-/* Flags for socket, socketpair, paccept */
-#define SOCK_CLOEXEC O_CLOEXEC
-#define SOCK_NONBLOCK O_NONBLOCK
-
#define ARCH_HAS_SOCKET_TYPES 1
#endif /* _ASM_SOCKET_H */
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index c4d6b09136b1..196a070349b0 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -791,8 +791,7 @@ static void handle_associated_event(struct cpu_hw_events *cpuc,
if (!mipspmu_event_set_period(event, hwc, idx))
return;
- if (perf_event_overflow(event, data, regs))
- mipsxx_pmu_disable_event(idx);
+ perf_event_overflow(event, data, regs);
}
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 8f208007b8e8..a112573b6e37 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -377,7 +377,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
for (i = 0; i < MAX_IM; i++)
irq_set_chained_handler(i + 2, ltq_hw_irq_handler);
- ltq_domain = irq_domain_add_linear(node,
+ ltq_domain = irq_domain_create_linear(of_fwnode_handle(node),
(MAX_IM * INT_NUM_IM_OFFSET) + MIPS_CPU_IRQ_CASCADE,
&irq_domain_ops, 0);
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index 9c024e6d5e54..9d75845ef78e 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -3,6 +3,8 @@
# Makefile for MIPS-specific library files..
#
+obj-y += crypto/
+
lib-y += bitops.o csum_partial.o delay.o memcpy.o memset.o \
mips-atomic.o strncpy_user.o \
strnlen_user.o uncached.o
diff --git a/arch/mips/lib/crc32-mips.c b/arch/mips/lib/crc32-mips.c
index 676a4b3e290b..45e4d2c9fbf5 100644
--- a/arch/mips/lib/crc32-mips.c
+++ b/arch/mips/lib/crc32-mips.c
@@ -62,7 +62,7 @@ do { \
#define CRC32C(crc, value, size) \
_CRC32(crc, value, size, crc32c)
-static DEFINE_STATIC_KEY_FALSE(have_crc32);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
{
@@ -163,7 +163,7 @@ static int __init crc32_mips_init(void)
static_branch_enable(&have_crc32);
return 0;
}
-arch_initcall(crc32_mips_init);
+subsys_initcall(crc32_mips_init);
static void __exit crc32_mips_exit(void)
{
diff --git a/arch/mips/lib/crypto/.gitignore b/arch/mips/lib/crypto/.gitignore
new file mode 100644
index 000000000000..0d47d4f21c6d
--- /dev/null
+++ b/arch/mips/lib/crypto/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+poly1305-core.S
diff --git a/arch/mips/lib/crypto/Kconfig b/arch/mips/lib/crypto/Kconfig
new file mode 100644
index 000000000000..0670a170c1be
--- /dev/null
+++ b/arch/mips/lib/crypto/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_CHACHA_MIPS
+ tristate
+ depends on CPU_MIPS32_R2
+ default CRYPTO_LIB_CHACHA
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_POLY1305_MIPS
+ tristate
+ default CRYPTO_LIB_POLY1305
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
diff --git a/arch/mips/lib/crypto/Makefile b/arch/mips/lib/crypto/Makefile
new file mode 100644
index 000000000000..804488c7aded
--- /dev/null
+++ b/arch/mips/lib/crypto/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
+chacha-mips-y := chacha-core.o chacha-glue.o
+AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
+
+obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
+poly1305-mips-y := poly1305-core.o poly1305-glue.o
+
+perlasm-flavour-$(CONFIG_32BIT) := o32
+perlasm-flavour-$(CONFIG_64BIT) := 64
+
+quiet_cmd_perlasm = PERLASM $@
+ cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
+
+$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE
+ $(call if_changed,perlasm)
+
+targets += poly1305-core.S
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/lib/crypto/chacha-core.S
index 5755f69cfe00..5755f69cfe00 100644
--- a/arch/mips/crypto/chacha-core.S
+++ b/arch/mips/lib/crypto/chacha-core.S
diff --git a/arch/mips/lib/crypto/chacha-glue.c b/arch/mips/lib/crypto/chacha-glue.c
new file mode 100644
index 000000000000..88c097594eb0
--- /dev/null
+++ b/arch/mips/lib/crypto/chacha-glue.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ChaCha and HChaCha functions (MIPS optimized)
+ *
+ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <crypto/chacha.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+asmlinkage void chacha_crypt_arch(struct chacha_state *state,
+ u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds);
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+asmlinkage void hchacha_block_arch(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds);
+EXPORT_SYMBOL(hchacha_block_arch);
+
+bool chacha_is_arch_optimized(void)
+{
+ return true;
+}
+EXPORT_SYMBOL(chacha_is_arch_optimized);
+
+MODULE_DESCRIPTION("ChaCha and HChaCha functions (MIPS optimized)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/mips/lib/crypto/poly1305-glue.c b/arch/mips/lib/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..764a38a65200
--- /dev/null
+++ b/arch/mips/lib/crypto/poly1305-glue.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <crypto/internal/poly1305.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/unaligned.h>
+
+asmlinkage void poly1305_block_init_arch(
+ struct poly1305_block_state *state,
+ const u8 raw_key[POLY1305_BLOCK_SIZE]);
+EXPORT_SYMBOL_GPL(poly1305_block_init_arch);
+asmlinkage void poly1305_blocks_arch(struct poly1305_block_state *state,
+ const u8 *src, u32 len, u32 hibit);
+EXPORT_SYMBOL_GPL(poly1305_blocks_arch);
+asmlinkage void poly1305_emit_arch(const struct poly1305_state *state,
+ u8 digest[POLY1305_DIGEST_SIZE],
+ const u32 nonce[4]);
+EXPORT_SYMBOL_GPL(poly1305_emit_arch);
+
+bool poly1305_is_arch_optimized(void)
+{
+ return true;
+}
+EXPORT_SYMBOL(poly1305_is_arch_optimized);
+
+MODULE_DESCRIPTION("Poly1305 transform (MIPS accelerated");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/mips/crypto/poly1305-mips.pl b/arch/mips/lib/crypto/poly1305-mips.pl
index b05bab884ed2..399f10c3e385 100644
--- a/arch/mips/crypto/poly1305-mips.pl
+++ b/arch/mips/lib/crypto/poly1305-mips.pl
@@ -93,9 +93,9 @@ $code.=<<___;
#endif
#ifdef __KERNEL__
-# define poly1305_init poly1305_init_mips
-# define poly1305_blocks poly1305_blocks_mips
-# define poly1305_emit poly1305_emit_mips
+# define poly1305_init poly1305_block_init_arch
+# define poly1305_blocks poly1305_blocks_arch
+# define poly1305_emit poly1305_emit_arch
#endif
#if defined(__MIPSEB__) && !defined(MIPSEB)
@@ -565,9 +565,9 @@ $code.=<<___;
#endif
#ifdef __KERNEL__
-# define poly1305_init poly1305_init_mips
-# define poly1305_blocks poly1305_blocks_mips
-# define poly1305_emit poly1305_emit_mips
+# define poly1305_init poly1305_block_init_arch
+# define poly1305_blocks poly1305_blocks_arch
+# define poly1305_emit poly1305_emit_arch
#endif
#if defined(__MIPSEB__) && !defined(MIPSEB)
diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c
index a925842ee125..17fa97ec6ffb 100644
--- a/arch/mips/pci/pci-ar2315.c
+++ b/arch/mips/pci/pci-ar2315.c
@@ -469,8 +469,8 @@ static int ar2315_pci_probe(struct platform_device *pdev)
if (err)
return err;
- apc->domain = irq_domain_add_linear(NULL, AR2315_PCI_IRQ_COUNT,
- &ar2315_pci_irq_domain_ops, apc);
+ apc->domain = irq_domain_create_linear(NULL, AR2315_PCI_IRQ_COUNT,
+ &ar2315_pci_irq_domain_ops, apc);
if (!apc->domain) {
dev_err(dev, "failed to add IRQ domain\n");
return -ENOMEM;
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index 4ac68a534e4f..14454ece485d 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -208,9 +208,10 @@ static int rt3883_pci_irq_init(struct device *dev,
rt3883_pci_w32(rpc, 0, RT3883_PCI_REG_PCIENA);
rpc->irq_domain =
- irq_domain_add_linear(rpc->intc_of_node, RT3883_PCI_IRQ_COUNT,
- &rt3883_pci_irq_domain_ops,
- rpc);
+ irq_domain_create_linear(of_fwnode_handle(rpc->intc_of_node),
+ RT3883_PCI_IRQ_COUNT,
+ &rt3883_pci_irq_domain_ops,
+ rpc);
if (!rpc->irq_domain) {
dev_err(dev, "unable to add IRQ domain\n");
return -ENODEV;
diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c
index 46aef0a1b22a..af5bbbea949b 100644
--- a/arch/mips/ralink/irq.c
+++ b/arch/mips/ralink/irq.c
@@ -176,7 +176,7 @@ static int __init intc_of_init(struct device_node *node,
/* route all INTC interrupts to MIPS HW0 interrupt */
rt_intc_w32(0, INTC_REG_TYPE);
- domain = irq_domain_add_legacy(node, RALINK_INTC_IRQ_COUNT,
+ domain = irq_domain_create_legacy(of_fwnode_handle(node), RALINK_INTC_IRQ_COUNT,
RALINK_INTC_IRQ_BASE, 0, &irq_domain_ops, NULL);
if (!domain)
panic("Failed to add irqdomain");
diff --git a/arch/nios2/kernel/irq.c b/arch/nios2/kernel/irq.c
index 8fa280660051..73568d8e21e0 100644
--- a/arch/nios2/kernel/irq.c
+++ b/arch/nios2/kernel/irq.c
@@ -69,7 +69,8 @@ void __init init_IRQ(void)
BUG_ON(!node);
- domain = irq_domain_add_linear(node, NIOS2_CPU_NR_IRQS, &irq_ops, NULL);
+ domain = irq_domain_create_linear(of_fwnode_handle(node),
+ NIOS2_CPU_NR_IRQS, &irq_ops, NULL);
BUG_ON(!domain);
irq_set_default_domain(domain);
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 5b65c9859613..94928d114d4c 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -251,7 +251,7 @@ CONFIG_CIFS=m
CONFIG_CIFS_XATTR=y
CONFIG_CIFS_POSIX=y
# CONFIG_CIFS_DEBUG is not set
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig
index ecc9ffcc11cd..d8cd7f858b2a 100644
--- a/arch/parisc/configs/generic-64bit_defconfig
+++ b/arch/parisc/configs/generic-64bit_defconfig
@@ -283,7 +283,6 @@ CONFIG_NLS_ASCII=m
CONFIG_NLS_ISO8859_1=m
CONFIG_NLS_ISO8859_2=m
CONFIG_NLS_UTF8=m
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6722625a406a..c3e0cc83f120 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -277,6 +277,7 @@ config PPC
select HAVE_PERF_EVENTS_NMI if PPC64
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_DYNAMIC_KEY
select HAVE_RETHOOK if KPROBES
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
@@ -894,7 +895,7 @@ config DATA_SHIFT
int "Data shift" if DATA_SHIFT_BOOL
default 24 if STRICT_KERNEL_RWX && PPC64
range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
- range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
+ range 14 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_85xx
default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
@@ -907,10 +908,10 @@ config DATA_SHIFT
On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
Smaller is the alignment, greater is the number of necessary DBATs.
- On 8xx, large pages (512kb or 8M) are used to map kernel linear
- memory. Aligning to 8M reduces TLB misses as only 8M pages are used
- in that case. If PIN_TLB is selected, it must be aligned to 8M as
- 8M pages will be pinned.
+ On 8xx, large pages (16kb or 512kb or 8M) are used to map kernel
+ linear memory. Aligning to 8M reduces TLB misses as only 8M pages
+ are used in that case. If PIN_TLB is selected, it must be aligned
+ to 8M as 8M pages will be pinned.
config ARCH_FORCE_MAX_ORDER
int "Order of maximal physically contiguous allocations"
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 184d0680e661..a7ab087d412c 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -70,6 +70,7 @@ BOOTCPPFLAGS := -nostdinc $(LINUXINCLUDE)
BOOTCPPFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include)
BOOTCFLAGS := $(BOOTTARGETFLAGS) \
+ -std=gnu11 \
-Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -O2 \
-msoft-float -mno-altivec -mno-vsx \
diff --git a/arch/powerpc/boot/rs6000.h b/arch/powerpc/boot/rs6000.h
index a9d879155ef9..16df8f3c43f1 100644
--- a/arch/powerpc/boot/rs6000.h
+++ b/arch/powerpc/boot/rs6000.h
@@ -1,11 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* IBM RS/6000 "XCOFF" file definitions for BFD.
Copyright (C) 1990, 1991 Free Software Foundation, Inc.
- FIXME: Can someone provide a transliteration of this name into ASCII?
- Using the following chars caused a compiler warning on HIUX (so I replaced
- them with octal escapes), and isn't useful without an understanding of what
- character set it is.
- Written by Mimi Ph\373\364ng-Th\345o V\365 of IBM
+ Written by Mimi Phuong-Thao Vo of IBM
and John Gilmore of Cygnus Support. */
/********************** FILE HEADER **********************/
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index 7e58f3e6c987..428f17b45513 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -235,7 +235,7 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_BOOTX_TEXT=y
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MICHAEL_MIC=m
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 6b6d7467fecf..379229c982a4 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -317,7 +317,7 @@ CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
CONFIG_XMON=y
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MD5_PPC=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 5fa154185efa..3423c405cad4 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -377,7 +377,7 @@ CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
CONFIG_IMA_ARCH_POLICY=y
CONFIG_IMA_APPRAISE_MODSIG=y
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_SERPENT=m
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index d2e659a2d8cb..90247b2a0ab0 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -220,7 +220,7 @@ CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
CONFIG_XMON=y
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index a91a766b71a4..242c1fab9d46 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -1073,7 +1073,7 @@ CONFIG_SECURITY_NETWORK_XFRM=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index 370db8192ce6..caaa359f4742 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -17,7 +17,6 @@ config CRYPTO_CURVE25519_PPC64
config CRYPTO_MD5_PPC
tristate "Digests: MD5"
- depends on PPC
select CRYPTO_HASH
help
MD5 message digest algorithm (RFC1321)
@@ -26,7 +25,6 @@ config CRYPTO_MD5_PPC
config CRYPTO_SHA1_PPC
tristate "Hash functions: SHA-1"
- depends on PPC
help
SHA-1 secure hash algorithm (FIPS 180)
@@ -34,27 +32,16 @@ config CRYPTO_SHA1_PPC
config CRYPTO_SHA1_PPC_SPE
tristate "Hash functions: SHA-1 (SPE)"
- depends on PPC && SPE
+ depends on SPE
help
SHA-1 secure hash algorithm (FIPS 180)
Architecture: powerpc using
- SPE (Signal Processing Engine) extensions
-config CRYPTO_SHA256_PPC_SPE
- tristate "Hash functions: SHA-224 and SHA-256 (SPE)"
- depends on PPC && SPE
- select CRYPTO_SHA256
- select CRYPTO_HASH
- help
- SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
- Architecture: powerpc using
- - SPE (Signal Processing Engine) extensions
-
config CRYPTO_AES_PPC_SPE
tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (SPE)"
- depends on PPC && SPE
+ depends on SPE
select CRYPTO_SKCIPHER
help
Block ciphers: AES cipher algorithms (FIPS-197)
@@ -92,33 +79,6 @@ config CRYPTO_AES_GCM_P10
Support for cryptographic acceleration instructions on Power10 or
later CPU. This module supports stitched acceleration for AES/GCM.
-config CRYPTO_CHACHA20_P10
- tristate
- depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
- select CRYPTO_SKCIPHER
- select CRYPTO_LIB_CHACHA_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
- default CRYPTO_LIB_CHACHA_INTERNAL
- help
- Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
- stream cipher algorithms
-
- Architecture: PowerPC64
- - Power10 or later
- - Little-endian
-
-config CRYPTO_POLY1305_P10
- tristate "Hash functions: Poly1305 (P10 or later)"
- depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
- select CRYPTO_HASH
- select CRYPTO_LIB_POLY1305_GENERIC
- help
- Poly1305 authenticator algorithm (RFC7539)
-
- Architecture: PowerPC64
- - Power10 or later
- - Little-endian
-
config CRYPTO_DEV_VMX
bool "Support for VMX cryptographic acceleration instructions"
depends on PPC64 && VSX
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 2f00b22b0823..8c2936ae466f 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -9,10 +9,7 @@ obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o
obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
-obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o
-obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o
-obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o
obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
obj-$(CONFIG_CRYPTO_CURVE25519_PPC64) += curve25519-ppc64le.o
@@ -20,10 +17,7 @@ aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-
md5-ppc-y := md5-asm.o md5-glue.o
sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
-sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o
-chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o
-poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o
vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
curve25519-ppc64le-y := curve25519-ppc64le-core.o curve25519-ppc64le_asm.o
diff --git a/arch/powerpc/crypto/aes.c b/arch/powerpc/crypto/aes.c
index ec06189fbf99..3f1e5e894902 100644
--- a/arch/powerpc/crypto/aes.c
+++ b/arch/powerpc/crypto/aes.c
@@ -7,15 +7,15 @@
* Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
*/
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/crypto.h>
-#include <linux/delay.h>
#include <asm/simd.h>
#include <asm/switch_to.h>
#include <crypto/aes.h>
#include <crypto/internal/cipher.h>
#include <crypto/internal/simd.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
#include "aesp8-ppc.h"
diff --git a/arch/powerpc/crypto/aes_cbc.c b/arch/powerpc/crypto/aes_cbc.c
index ed0debc7acb5..5f2a4f375eef 100644
--- a/arch/powerpc/crypto/aes_cbc.c
+++ b/arch/powerpc/crypto/aes_cbc.c
@@ -12,6 +12,10 @@
#include <crypto/aes.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
#include "aesp8-ppc.h"
diff --git a/arch/powerpc/crypto/aes_ctr.c b/arch/powerpc/crypto/aes_ctr.c
index 3da75f42529a..e27c4036e711 100644
--- a/arch/powerpc/crypto/aes_ctr.c
+++ b/arch/powerpc/crypto/aes_ctr.c
@@ -12,6 +12,10 @@
#include <crypto/aes.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
#include "aesp8-ppc.h"
diff --git a/arch/powerpc/crypto/aes_xts.c b/arch/powerpc/crypto/aes_xts.c
index dabbccb41550..9440e771cede 100644
--- a/arch/powerpc/crypto/aes_xts.c
+++ b/arch/powerpc/crypto/aes_xts.c
@@ -13,6 +13,10 @@
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
#include "aesp8-ppc.h"
diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c
deleted file mode 100644
index d8796decc1fb..000000000000
--- a/arch/powerpc/crypto/chacha-p10-glue.c
+++ /dev/null
@@ -1,221 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * PowerPC P10 (ppc64le) accelerated ChaCha and XChaCha stream ciphers,
- * including ChaCha20 (RFC7539)
- *
- * Copyright 2023- IBM Corp. All rights reserved.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <linux/sizes.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-
-asmlinkage void chacha_p10le_8x(u32 *state, u8 *dst, const u8 *src,
- unsigned int len, int nrounds);
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10);
-
-static void vsx_begin(void)
-{
- preempt_disable();
- enable_kernel_vsx();
-}
-
-static void vsx_end(void)
-{
- disable_kernel_vsx();
- preempt_enable();
-}
-
-static void chacha_p10_do_8x(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds)
-{
- unsigned int l = bytes & ~0x0FF;
-
- if (l > 0) {
- chacha_p10le_8x(state, dst, src, l, nrounds);
- bytes -= l;
- src += l;
- dst += l;
- state[12] += l / CHACHA_BLOCK_SIZE;
- }
-
- if (bytes > 0)
- chacha_crypt_generic(state, dst, src, bytes, nrounds);
-}
-
-void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
-{
- hchacha_block_generic(state, stream, nrounds);
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
- int nrounds)
-{
- if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE ||
- !crypto_simd_usable())
- return chacha_crypt_generic(state, dst, src, bytes, nrounds);
-
- do {
- unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
-
- vsx_begin();
- chacha_p10_do_8x(state, dst, src, todo, nrounds);
- vsx_end();
-
- bytes -= todo;
- src += todo;
- dst += todo;
- } while (bytes);
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-static int chacha_p10_stream_xor(struct skcipher_request *req,
- const struct chacha_ctx *ctx, const u8 *iv)
-{
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
- if (err)
- return err;
-
- chacha_init(state, ctx->key, iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = rounddown(nbytes, walk.stride);
-
- if (!crypto_simd_usable()) {
- chacha_crypt_generic(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes,
- ctx->nrounds);
- } else {
- vsx_begin();
- chacha_p10_do_8x(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes, ctx->nrounds);
- vsx_end();
- }
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- if (err)
- break;
- }
-
- return err;
-}
-
-static int chacha_p10(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return chacha_p10_stream_xor(req, ctx, req->iv);
-}
-
-static int xchacha_p10(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct chacha_ctx subctx;
- u32 state[16];
- u8 real_iv[16];
-
- chacha_init(state, ctx->key, req->iv);
- hchacha_block_arch(state, subctx.key, ctx->nrounds);
- subctx.nrounds = ctx->nrounds;
-
- memcpy(&real_iv[0], req->iv + 24, 8);
- memcpy(&real_iv[8], req->iv + 16, 8);
- return chacha_p10_stream_xor(req, &subctx, real_iv);
-}
-
-static struct skcipher_alg algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-p10",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = chacha_p10,
- .decrypt = chacha_p10,
- }, {
- .base.cra_name = "xchacha20",
- .base.cra_driver_name = "xchacha20-p10",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = xchacha_p10,
- .decrypt = xchacha_p10,
- }, {
- .base.cra_name = "xchacha12",
- .base.cra_driver_name = "xchacha12-p10",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha12_setkey,
- .encrypt = xchacha_p10,
- .decrypt = xchacha_p10,
- }
-};
-
-static int __init chacha_p10_init(void)
-{
- if (!cpu_has_feature(CPU_FTR_ARCH_31))
- return 0;
-
- static_branch_enable(&have_p10);
-
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit chacha_p10_exit(void)
-{
- if (!static_branch_likely(&have_p10))
- return;
-
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-module_init(chacha_p10_init);
-module_exit(chacha_p10_exit);
-
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)");
-MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-p10");
-MODULE_ALIAS_CRYPTO("xchacha20");
-MODULE_ALIAS_CRYPTO("xchacha20-p10");
-MODULE_ALIAS_CRYPTO("xchacha12");
-MODULE_ALIAS_CRYPTO("xchacha12-p10");
diff --git a/arch/powerpc/crypto/ghash.c b/arch/powerpc/crypto/ghash.c
index 77eca20bc7ac..7308735bdb33 100644
--- a/arch/powerpc/crypto/ghash.c
+++ b/arch/powerpc/crypto/ghash.c
@@ -11,19 +11,18 @@
* Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
*/
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/crypto.h>
-#include <linux/delay.h>
-#include <asm/simd.h>
+#include "aesp8-ppc.h"
#include <asm/switch_to.h>
#include <crypto/aes.h>
+#include <crypto/gf128mul.h>
#include <crypto/ghash.h>
-#include <crypto/scatterwalk.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
-#include <crypto/b128ops.h>
-#include "aesp8-ppc.h"
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
void gcm_init_p8(u128 htable[16], const u64 Xi[2]);
void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]);
@@ -39,15 +38,12 @@ struct p8_ghash_ctx {
struct p8_ghash_desc_ctx {
u64 shash[2];
- u8 buffer[GHASH_DIGEST_SIZE];
- int bytes;
};
static int p8_ghash_init(struct shash_desc *desc)
{
struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- dctx->bytes = 0;
memset(dctx->shash, 0, GHASH_DIGEST_SIZE);
return 0;
}
@@ -74,27 +70,30 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
}
static inline void __ghash_block(struct p8_ghash_ctx *ctx,
- struct p8_ghash_desc_ctx *dctx)
+ struct p8_ghash_desc_ctx *dctx,
+ const u8 *src)
{
if (crypto_simd_usable()) {
preempt_disable();
pagefault_disable();
enable_kernel_vsx();
- gcm_ghash_p8(dctx->shash, ctx->htable,
- dctx->buffer, GHASH_DIGEST_SIZE);
+ gcm_ghash_p8(dctx->shash, ctx->htable, src, GHASH_BLOCK_SIZE);
disable_kernel_vsx();
pagefault_enable();
preempt_enable();
} else {
- crypto_xor((u8 *)dctx->shash, dctx->buffer, GHASH_BLOCK_SIZE);
+ crypto_xor((u8 *)dctx->shash, src, GHASH_BLOCK_SIZE);
gf128mul_lle((be128 *)dctx->shash, &ctx->key);
}
}
-static inline void __ghash_blocks(struct p8_ghash_ctx *ctx,
- struct p8_ghash_desc_ctx *dctx,
- const u8 *src, unsigned int srclen)
+static inline int __ghash_blocks(struct p8_ghash_ctx *ctx,
+ struct p8_ghash_desc_ctx *dctx,
+ const u8 *src, unsigned int srclen)
{
+ int remain = srclen - round_down(srclen, GHASH_BLOCK_SIZE);
+
+ srclen -= remain;
if (crypto_simd_usable()) {
preempt_disable();
pagefault_disable();
@@ -105,62 +104,38 @@ static inline void __ghash_blocks(struct p8_ghash_ctx *ctx,
pagefault_enable();
preempt_enable();
} else {
- while (srclen >= GHASH_BLOCK_SIZE) {
+ do {
crypto_xor((u8 *)dctx->shash, src, GHASH_BLOCK_SIZE);
gf128mul_lle((be128 *)dctx->shash, &ctx->key);
srclen -= GHASH_BLOCK_SIZE;
src += GHASH_BLOCK_SIZE;
- }
+ } while (srclen);
}
+
+ return remain;
}
static int p8_ghash_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
- unsigned int len;
struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- if (dctx->bytes) {
- if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) {
- memcpy(dctx->buffer + dctx->bytes, src,
- srclen);
- dctx->bytes += srclen;
- return 0;
- }
- memcpy(dctx->buffer + dctx->bytes, src,
- GHASH_DIGEST_SIZE - dctx->bytes);
-
- __ghash_block(ctx, dctx);
-
- src += GHASH_DIGEST_SIZE - dctx->bytes;
- srclen -= GHASH_DIGEST_SIZE - dctx->bytes;
- dctx->bytes = 0;
- }
- len = srclen & ~(GHASH_DIGEST_SIZE - 1);
- if (len) {
- __ghash_blocks(ctx, dctx, src, len);
- src += len;
- srclen -= len;
- }
- if (srclen) {
- memcpy(dctx->buffer, src, srclen);
- dctx->bytes = srclen;
- }
- return 0;
+ return __ghash_blocks(ctx, dctx, src, srclen);
}
-static int p8_ghash_final(struct shash_desc *desc, u8 *out)
+static int p8_ghash_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
- int i;
struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- if (dctx->bytes) {
- for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++)
- dctx->buffer[i] = 0;
- __ghash_block(ctx, dctx);
- dctx->bytes = 0;
+ if (len) {
+ u8 buf[GHASH_BLOCK_SIZE] = {};
+
+ memcpy(buf, src, len);
+ __ghash_block(ctx, dctx, buf);
+ memzero_explicit(buf, sizeof(buf));
}
memcpy(out, dctx->shash, GHASH_DIGEST_SIZE);
return 0;
@@ -170,14 +145,14 @@ struct shash_alg p8_ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = p8_ghash_init,
.update = p8_ghash_update,
- .final = p8_ghash_final,
+ .finup = p8_ghash_finup,
.setkey = p8_ghash_setkey,
- .descsize = sizeof(struct p8_ghash_desc_ctx)
- + sizeof(struct ghash_desc_ctx),
+ .descsize = sizeof(struct p8_ghash_desc_ctx),
.base = {
.cra_name = "ghash",
.cra_driver_name = "p8_ghash",
.cra_priority = 1000,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct p8_ghash_ctx),
.cra_module = THIS_MODULE,
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c
index c24f605033bd..204440a90cd8 100644
--- a/arch/powerpc/crypto/md5-glue.c
+++ b/arch/powerpc/crypto/md5-glue.c
@@ -8,25 +8,13 @@
*/
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
#include <crypto/md5.h>
-#include <asm/byteorder.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks);
-static inline void ppc_md5_clear_context(struct md5_state *sctx)
-{
- int count = sizeof(struct md5_state) >> 2;
- u32 *ptr = (u32 *)sctx;
-
- /* make sure we can clear the fast way */
- BUILD_BUG_ON(sizeof(struct md5_state) % 4);
- do { *ptr++ = 0; } while (--count);
-}
-
static int ppc_md5_init(struct shash_desc *desc)
{
struct md5_state *sctx = shash_desc_ctx(desc);
@@ -44,79 +32,34 @@ static int ppc_md5_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct md5_state *sctx = shash_desc_ctx(desc);
- const unsigned int offset = sctx->byte_count & 0x3f;
- unsigned int avail = 64 - offset;
- const u8 *src = data;
- sctx->byte_count += len;
-
- if (avail > len) {
- memcpy((char *)sctx->block + offset, src, len);
- return 0;
- }
-
- if (offset) {
- memcpy((char *)sctx->block + offset, src, avail);
- ppc_md5_transform(sctx->hash, (const u8 *)sctx->block, 1);
- len -= avail;
- src += avail;
- }
-
- if (len > 63) {
- ppc_md5_transform(sctx->hash, src, len >> 6);
- src += len & ~0x3f;
- len &= 0x3f;
- }
-
- memcpy((char *)sctx->block, src, len);
- return 0;
+ sctx->byte_count += round_down(len, MD5_HMAC_BLOCK_SIZE);
+ ppc_md5_transform(sctx->hash, data, len >> 6);
+ return len - round_down(len, MD5_HMAC_BLOCK_SIZE);
}
-static int ppc_md5_final(struct shash_desc *desc, u8 *out)
+static int ppc_md5_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int offset, u8 *out)
{
struct md5_state *sctx = shash_desc_ctx(desc);
- const unsigned int offset = sctx->byte_count & 0x3f;
- const u8 *src = (const u8 *)sctx->block;
- u8 *p = (u8 *)src + offset;
- int padlen = 55 - offset;
- __le64 *pbits = (__le64 *)((char *)sctx->block + 56);
+ __le64 block[MD5_BLOCK_WORDS] = {};
+ u8 *p = memcpy(block, src, offset);
__le32 *dst = (__le32 *)out;
+ __le64 *pbits;
+ src = p;
+ p += offset;
*p++ = 0x80;
-
- if (padlen < 0) {
- memset(p, 0x00, padlen + sizeof (u64));
- ppc_md5_transform(sctx->hash, src, 1);
- p = (char *)sctx->block;
- padlen = 56;
- }
-
- memset(p, 0, padlen);
+ sctx->byte_count += offset;
+ pbits = &block[(MD5_BLOCK_WORDS / (offset > 55 ? 1 : 2)) - 1];
*pbits = cpu_to_le64(sctx->byte_count << 3);
- ppc_md5_transform(sctx->hash, src, 1);
+ ppc_md5_transform(sctx->hash, src, (pbits - block + 1) / 8);
+ memzero_explicit(block, sizeof(block));
dst[0] = cpu_to_le32(sctx->hash[0]);
dst[1] = cpu_to_le32(sctx->hash[1]);
dst[2] = cpu_to_le32(sctx->hash[2]);
dst[3] = cpu_to_le32(sctx->hash[3]);
-
- ppc_md5_clear_context(sctx);
- return 0;
-}
-
-static int ppc_md5_export(struct shash_desc *desc, void *out)
-{
- struct md5_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
- return 0;
-}
-
-static int ppc_md5_import(struct shash_desc *desc, const void *in)
-{
- struct md5_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
return 0;
}
@@ -124,15 +67,13 @@ static struct shash_alg alg = {
.digestsize = MD5_DIGEST_SIZE,
.init = ppc_md5_init,
.update = ppc_md5_update,
- .final = ppc_md5_final,
- .export = ppc_md5_export,
- .import = ppc_md5_import,
- .descsize = sizeof(struct md5_state),
- .statesize = sizeof(struct md5_state),
+ .finup = ppc_md5_finup,
+ .descsize = MD5_STATE_SIZE,
.base = {
.cra_name = "md5",
.cra_driver_name= "md5-ppc",
.cra_priority = 200,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/powerpc/crypto/poly1305-p10-glue.c b/arch/powerpc/crypto/poly1305-p10-glue.c
deleted file mode 100644
index 369686e9370b..000000000000
--- a/arch/powerpc/crypto/poly1305-p10-glue.c
+++ /dev/null
@@ -1,186 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Poly1305 authenticator algorithm, RFC7539.
- *
- * Copyright 2023- IBM Corp. All rights reserved.
- */
-
-#include <crypto/algapi.h>
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/jump_label.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/poly1305.h>
-#include <crypto/internal/simd.h>
-#include <linux/cpufeature.h>
-#include <linux/unaligned.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-
-asmlinkage void poly1305_p10le_4blocks(void *h, const u8 *m, u32 mlen);
-asmlinkage void poly1305_64s(void *h, const u8 *m, u32 mlen, int highbit);
-asmlinkage void poly1305_emit_64(void *h, void *s, u8 *dst);
-
-static void vsx_begin(void)
-{
- preempt_disable();
- enable_kernel_vsx();
-}
-
-static void vsx_end(void)
-{
- disable_kernel_vsx();
- preempt_enable();
-}
-
-static int crypto_poly1305_p10_init(struct shash_desc *desc)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- poly1305_core_init(&dctx->h);
- dctx->buflen = 0;
- dctx->rset = 0;
- dctx->sset = false;
-
- return 0;
-}
-
-static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
- const u8 *inp, unsigned int len)
-{
- unsigned int acc = 0;
-
- if (unlikely(!dctx->sset)) {
- if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
- struct poly1305_core_key *key = &dctx->core_r;
-
- key->key.r64[0] = get_unaligned_le64(&inp[0]);
- key->key.r64[1] = get_unaligned_le64(&inp[8]);
- inp += POLY1305_BLOCK_SIZE;
- len -= POLY1305_BLOCK_SIZE;
- acc += POLY1305_BLOCK_SIZE;
- dctx->rset = 1;
- }
- if (len >= POLY1305_BLOCK_SIZE) {
- dctx->s[0] = get_unaligned_le32(&inp[0]);
- dctx->s[1] = get_unaligned_le32(&inp[4]);
- dctx->s[2] = get_unaligned_le32(&inp[8]);
- dctx->s[3] = get_unaligned_le32(&inp[12]);
- acc += POLY1305_BLOCK_SIZE;
- dctx->sset = true;
- }
- }
- return acc;
-}
-
-static int crypto_poly1305_p10_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- unsigned int bytes, used;
-
- if (unlikely(dctx->buflen)) {
- bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
- memcpy(dctx->buf + dctx->buflen, src, bytes);
- src += bytes;
- srclen -= bytes;
- dctx->buflen += bytes;
-
- if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE))) {
- vsx_begin();
- poly1305_64s(&dctx->h, dctx->buf,
- POLY1305_BLOCK_SIZE, 1);
- vsx_end();
- }
- dctx->buflen = 0;
- }
- }
-
- if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
- used = crypto_poly1305_setdctxkey(dctx, src, bytes);
- if (likely(used)) {
- srclen -= used;
- src += used;
- }
- if (crypto_simd_usable() && (srclen >= POLY1305_BLOCK_SIZE*4)) {
- vsx_begin();
- poly1305_p10le_4blocks(&dctx->h, src, srclen);
- vsx_end();
- src += srclen - (srclen % (POLY1305_BLOCK_SIZE * 4));
- srclen %= POLY1305_BLOCK_SIZE * 4;
- }
- while (srclen >= POLY1305_BLOCK_SIZE) {
- vsx_begin();
- poly1305_64s(&dctx->h, src, POLY1305_BLOCK_SIZE, 1);
- vsx_end();
- srclen -= POLY1305_BLOCK_SIZE;
- src += POLY1305_BLOCK_SIZE;
- }
- }
-
- if (unlikely(srclen)) {
- dctx->buflen = srclen;
- memcpy(dctx->buf, src, srclen);
- }
-
- return 0;
-}
-
-static int crypto_poly1305_p10_final(struct shash_desc *desc, u8 *dst)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (unlikely(!dctx->sset))
- return -ENOKEY;
-
- if ((dctx->buflen)) {
- dctx->buf[dctx->buflen++] = 1;
- memset(dctx->buf + dctx->buflen, 0,
- POLY1305_BLOCK_SIZE - dctx->buflen);
- vsx_begin();
- poly1305_64s(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
- vsx_end();
- dctx->buflen = 0;
- }
-
- poly1305_emit_64(&dctx->h, &dctx->s, dst);
- return 0;
-}
-
-static struct shash_alg poly1305_alg = {
- .digestsize = POLY1305_DIGEST_SIZE,
- .init = crypto_poly1305_p10_init,
- .update = crypto_poly1305_p10_update,
- .final = crypto_poly1305_p10_final,
- .descsize = sizeof(struct poly1305_desc_ctx),
- .base = {
- .cra_name = "poly1305",
- .cra_driver_name = "poly1305-p10",
- .cra_priority = 300,
- .cra_blocksize = POLY1305_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- },
-};
-
-static int __init poly1305_p10_init(void)
-{
- return crypto_register_shash(&poly1305_alg);
-}
-
-static void __exit poly1305_p10_exit(void)
-{
- crypto_unregister_shash(&poly1305_alg);
-}
-
-module_cpu_feature_match(PPC_MODULE_FEATURE_P10, poly1305_p10_init);
-module_exit(poly1305_p10_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
-MODULE_DESCRIPTION("Optimized Poly1305 for P10");
-MODULE_ALIAS_CRYPTO("poly1305");
-MODULE_ALIAS_CRYPTO("poly1305-p10");
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c
index 9170892a8557..04c88e173ce1 100644
--- a/arch/powerpc/crypto/sha1-spe-glue.c
+++ b/arch/powerpc/crypto/sha1-spe-glue.c
@@ -7,16 +7,13 @@
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*/
+#include <asm/switch_to.h>
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
-#include <asm/byteorder.h>
-#include <asm/switch_to.h>
-#include <linux/hardirq.h>
+#include <linux/kernel.h>
+#include <linux/preempt.h>
+#include <linux/module.h>
/*
* MAX_BYTES defines the number of bytes that are allowed to be processed
@@ -30,7 +27,7 @@
*/
#define MAX_BYTES 2048
-extern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks);
+asmlinkage void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks);
static void spe_begin(void)
{
@@ -46,126 +43,45 @@ static void spe_end(void)
preempt_enable();
}
-static inline void ppc_sha1_clear_context(struct sha1_state *sctx)
+static void ppc_spe_sha1_block(struct sha1_state *sctx, const u8 *src,
+ int blocks)
{
- int count = sizeof(struct sha1_state) >> 2;
- u32 *ptr = (u32 *)sctx;
-
- /* make sure we can clear the fast way */
- BUILD_BUG_ON(sizeof(struct sha1_state) % 4);
- do { *ptr++ = 0; } while (--count);
-}
-
-static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- const unsigned int offset = sctx->count & 0x3f;
- const unsigned int avail = 64 - offset;
- unsigned int bytes;
- const u8 *src = data;
-
- if (avail > len) {
- sctx->count += len;
- memcpy((char *)sctx->buffer + offset, src, len);
- return 0;
- }
-
- sctx->count += len;
-
- if (offset) {
- memcpy((char *)sctx->buffer + offset, src, avail);
+ do {
+ int unit = min(blocks, MAX_BYTES / SHA1_BLOCK_SIZE);
spe_begin();
- ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1);
+ ppc_spe_sha1_transform(sctx->state, src, unit);
spe_end();
- len -= avail;
- src += avail;
- }
-
- while (len > 63) {
- bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
- bytes = bytes & ~0x3f;
-
- spe_begin();
- ppc_spe_sha1_transform(sctx->state, src, bytes >> 6);
- spe_end();
-
- src += bytes;
- len -= bytes;
- }
-
- memcpy((char *)sctx->buffer, src, len);
- return 0;
-}
-
-static int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- const unsigned int offset = sctx->count & 0x3f;
- char *p = (char *)sctx->buffer + offset;
- int padlen;
- __be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56);
- __be32 *dst = (__be32 *)out;
-
- padlen = 55 - offset;
- *p++ = 0x80;
-
- spe_begin();
-
- if (padlen < 0) {
- memset(p, 0x00, padlen + sizeof (u64));
- ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
- p = (char *)sctx->buffer;
- padlen = 56;
- }
-
- memset(p, 0, padlen);
- *pbits = cpu_to_be64(sctx->count << 3);
- ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
-
- spe_end();
-
- dst[0] = cpu_to_be32(sctx->state[0]);
- dst[1] = cpu_to_be32(sctx->state[1]);
- dst[2] = cpu_to_be32(sctx->state[2]);
- dst[3] = cpu_to_be32(sctx->state[3]);
- dst[4] = cpu_to_be32(sctx->state[4]);
-
- ppc_sha1_clear_context(sctx);
- return 0;
+ src += unit * SHA1_BLOCK_SIZE;
+ blocks -= unit;
+ } while (blocks);
}
-static int ppc_spe_sha1_export(struct shash_desc *desc, void *out)
+static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
- return 0;
+ return sha1_base_do_update_blocks(desc, data, len, ppc_spe_sha1_block);
}
-static int ppc_spe_sha1_import(struct shash_desc *desc, const void *in)
+static int ppc_spe_sha1_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
- return 0;
+ sha1_base_do_finup(desc, src, len, ppc_spe_sha1_block);
+ return sha1_base_finish(desc, out);
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = ppc_spe_sha1_update,
- .final = ppc_spe_sha1_final,
- .export = ppc_spe_sha1_export,
- .import = ppc_spe_sha1_import,
- .descsize = sizeof(struct sha1_state),
- .statesize = sizeof(struct sha1_state),
+ .finup = ppc_spe_sha1_finup,
+ .descsize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-ppc-spe",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c
index f283bbd3f121..4593946aa9b3 100644
--- a/arch/powerpc/crypto/sha1.c
+++ b/arch/powerpc/crypto/sha1.c
@@ -13,107 +13,46 @@
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
*/
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
-#include <asm/byteorder.h>
-
-void powerpc_sha_transform(u32 *state, const u8 *src);
-
-static int powerpc_sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial, done;
- const u8 *src;
-
- partial = sctx->count & 0x3f;
- sctx->count += len;
- done = 0;
- src = data;
-
- if ((partial + len) > 63) {
-
- if (partial) {
- done = -partial;
- memcpy(sctx->buffer + partial, data, done + 64);
- src = sctx->buffer;
- }
-
- do {
- powerpc_sha_transform(sctx->state, src);
- done += 64;
- src = data + done;
- } while (done + 63 < len);
-
- partial = 0;
- }
- memcpy(sctx->buffer + partial, src, len - done);
-
- return 0;
-}
+#include <linux/kernel.h>
+#include <linux/module.h>
+asmlinkage void powerpc_sha_transform(u32 *state, const u8 *src);
-/* Add padding and return the message digest. */
-static int powerpc_sha1_final(struct shash_desc *desc, u8 *out)
+static void powerpc_sha_block(struct sha1_state *sctx, const u8 *data,
+ int blocks)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- u32 i, index, padlen;
- __be64 bits;
- static const u8 padding[64] = { 0x80, };
-
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64 */
- index = sctx->count & 0x3f;
- padlen = (index < 56) ? (56 - index) : ((64+56) - index);
- powerpc_sha1_update(desc, padding, padlen);
-
- /* Append length */
- powerpc_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-
- /* Store state in digest */
- for (i = 0; i < 5; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof *sctx);
-
- return 0;
+ do {
+ powerpc_sha_transform(sctx->state, data);
+ data += 64;
+ } while (--blocks);
}
-static int powerpc_sha1_export(struct shash_desc *desc, void *out)
+static int powerpc_sha1_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
- return 0;
+ return sha1_base_do_update_blocks(desc, data, len, powerpc_sha_block);
}
-static int powerpc_sha1_import(struct shash_desc *desc, const void *in)
+/* Add padding and return the message digest. */
+static int powerpc_sha1_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
- return 0;
+ sha1_base_do_finup(desc, src, len, powerpc_sha_block);
+ return sha1_base_finish(desc, out);
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = powerpc_sha1_update,
- .final = powerpc_sha1_final,
- .export = powerpc_sha1_export,
- .import = powerpc_sha1_import,
- .descsize = sizeof(struct sha1_state),
- .statesize = sizeof(struct sha1_state),
+ .finup = powerpc_sha1_finup,
+ .descsize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-powerpc",
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c
deleted file mode 100644
index 2997d13236e0..000000000000
--- a/arch/powerpc/crypto/sha256-spe-glue.c
+++ /dev/null
@@ -1,235 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Glue code for SHA-256 implementation for SPE instructions (PPC)
- *
- * Based on generic implementation. The assembler module takes care
- * about the SPE registers so it can run from interrupt context.
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <crypto/sha2.h>
-#include <crypto/sha256_base.h>
-#include <asm/byteorder.h>
-#include <asm/switch_to.h>
-#include <linux/hardirq.h>
-
-/*
- * MAX_BYTES defines the number of bytes that are allowed to be processed
- * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000
- * operations per 64 bytes. e500 cores can issue two arithmetic instructions
- * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
- * Thus 1KB of input data will need an estimated maximum of 18,000 cycles.
- * Headroom for cache misses included. Even with the low end model clocked
- * at 667 MHz this equals to a critical time window of less than 27us.
- *
- */
-#define MAX_BYTES 1024
-
-extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks);
-
-static void spe_begin(void)
-{
- /* We just start SPE operations and will save SPE registers later. */
- preempt_disable();
- enable_kernel_spe();
-}
-
-static void spe_end(void)
-{
- disable_kernel_spe();
- /* reenable preemption */
- preempt_enable();
-}
-
-static inline void ppc_sha256_clear_context(struct sha256_state *sctx)
-{
- int count = sizeof(struct sha256_state) >> 2;
- u32 *ptr = (u32 *)sctx;
-
- /* make sure we can clear the fast way */
- BUILD_BUG_ON(sizeof(struct sha256_state) % 4);
- do { *ptr++ = 0; } while (--count);
-}
-
-static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- const unsigned int offset = sctx->count & 0x3f;
- const unsigned int avail = 64 - offset;
- unsigned int bytes;
- const u8 *src = data;
-
- if (avail > len) {
- sctx->count += len;
- memcpy((char *)sctx->buf + offset, src, len);
- return 0;
- }
-
- sctx->count += len;
-
- if (offset) {
- memcpy((char *)sctx->buf + offset, src, avail);
-
- spe_begin();
- ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1);
- spe_end();
-
- len -= avail;
- src += avail;
- }
-
- while (len > 63) {
- /* cut input data into smaller blocks */
- bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
- bytes = bytes & ~0x3f;
-
- spe_begin();
- ppc_spe_sha256_transform(sctx->state, src, bytes >> 6);
- spe_end();
-
- src += bytes;
- len -= bytes;
- }
-
- memcpy((char *)sctx->buf, src, len);
- return 0;
-}
-
-static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- const unsigned int offset = sctx->count & 0x3f;
- char *p = (char *)sctx->buf + offset;
- int padlen;
- __be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56);
- __be32 *dst = (__be32 *)out;
-
- padlen = 55 - offset;
- *p++ = 0x80;
-
- spe_begin();
-
- if (padlen < 0) {
- memset(p, 0x00, padlen + sizeof (u64));
- ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
- p = (char *)sctx->buf;
- padlen = 56;
- }
-
- memset(p, 0, padlen);
- *pbits = cpu_to_be64(sctx->count << 3);
- ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
-
- spe_end();
-
- dst[0] = cpu_to_be32(sctx->state[0]);
- dst[1] = cpu_to_be32(sctx->state[1]);
- dst[2] = cpu_to_be32(sctx->state[2]);
- dst[3] = cpu_to_be32(sctx->state[3]);
- dst[4] = cpu_to_be32(sctx->state[4]);
- dst[5] = cpu_to_be32(sctx->state[5]);
- dst[6] = cpu_to_be32(sctx->state[6]);
- dst[7] = cpu_to_be32(sctx->state[7]);
-
- ppc_sha256_clear_context(sctx);
- return 0;
-}
-
-static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out)
-{
- __be32 D[SHA256_DIGEST_SIZE >> 2];
- __be32 *dst = (__be32 *)out;
-
- ppc_spe_sha256_final(desc, (u8 *)D);
-
- /* avoid bytewise memcpy */
- dst[0] = D[0];
- dst[1] = D[1];
- dst[2] = D[2];
- dst[3] = D[3];
- dst[4] = D[4];
- dst[5] = D[5];
- dst[6] = D[6];
-
- /* clear sensitive data */
- memzero_explicit(D, SHA256_DIGEST_SIZE);
- return 0;
-}
-
-static int ppc_spe_sha256_export(struct shash_desc *desc, void *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
- return 0;
-}
-
-static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
- return 0;
-}
-
-static struct shash_alg algs[2] = { {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = ppc_spe_sha256_update,
- .final = ppc_spe_sha256_final,
- .export = ppc_spe_sha256_export,
- .import = ppc_spe_sha256_import,
- .descsize = sizeof(struct sha256_state),
- .statesize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name= "sha256-ppc-spe",
- .cra_priority = 300,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = ppc_spe_sha256_update,
- .final = ppc_spe_sha224_final,
- .export = ppc_spe_sha256_export,
- .import = ppc_spe_sha256_import,
- .descsize = sizeof(struct sha256_state),
- .statesize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name= "sha224-ppc-spe",
- .cra_priority = 300,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
-
-static int __init ppc_spe_sha256_mod_init(void)
-{
- return crypto_register_shashes(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit ppc_spe_sha256_mod_fini(void)
-{
- crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-}
-
-module_init(ppc_spe_sha256_mod_init);
-module_exit(ppc_spe_sha256_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized");
-
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_ALIAS_CRYPTO("sha224-ppc-spe");
-MODULE_ALIAS_CRYPTO("sha256");
-MODULE_ALIAS_CRYPTO("sha256-ppc-spe");
diff --git a/arch/powerpc/include/asm/guest-state-buffer.h b/arch/powerpc/include/asm/guest-state-buffer.h
index d107abe1468f..acd61eb36d59 100644
--- a/arch/powerpc/include/asm/guest-state-buffer.h
+++ b/arch/powerpc/include/asm/guest-state-buffer.h
@@ -28,6 +28,21 @@
/* Process Table Info */
#define KVMPPC_GSID_PROCESS_TABLE 0x0006
+/* Guest Management Heap Size */
+#define KVMPPC_GSID_L0_GUEST_HEAP 0x0800
+
+/* Guest Management Heap Max Size */
+#define KVMPPC_GSID_L0_GUEST_HEAP_MAX 0x0801
+
+/* Guest Pagetable Size */
+#define KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE 0x0802
+
+/* Guest Pagetable Max Size */
+#define KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX 0x0803
+
+/* Guest Pagetable Reclaim in bytes */
+#define KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM 0x0804
+
/* H_GUEST_RUN_VCPU input buffer Info */
#define KVMPPC_GSID_RUN_INPUT 0x0C00
/* H_GUEST_RUN_VCPU output buffer Info */
@@ -106,6 +121,11 @@
#define KVMPPC_GSE_GUESTWIDE_COUNT \
(KVMPPC_GSE_GUESTWIDE_END - KVMPPC_GSE_GUESTWIDE_START + 1)
+#define KVMPPC_GSE_HOSTWIDE_START KVMPPC_GSID_L0_GUEST_HEAP
+#define KVMPPC_GSE_HOSTWIDE_END KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM
+#define KVMPPC_GSE_HOSTWIDE_COUNT \
+ (KVMPPC_GSE_HOSTWIDE_END - KVMPPC_GSE_HOSTWIDE_START + 1)
+
#define KVMPPC_GSE_META_START KVMPPC_GSID_RUN_INPUT
#define KVMPPC_GSE_META_END KVMPPC_GSID_VPA
#define KVMPPC_GSE_META_COUNT (KVMPPC_GSE_META_END - KVMPPC_GSE_META_START + 1)
@@ -130,7 +150,8 @@
(KVMPPC_GSE_INTR_REGS_END - KVMPPC_GSE_INTR_REGS_START + 1)
#define KVMPPC_GSE_IDEN_COUNT \
- (KVMPPC_GSE_GUESTWIDE_COUNT + KVMPPC_GSE_META_COUNT + \
+ (KVMPPC_GSE_HOSTWIDE_COUNT + \
+ KVMPPC_GSE_GUESTWIDE_COUNT + KVMPPC_GSE_META_COUNT + \
KVMPPC_GSE_DW_REGS_COUNT + KVMPPC_GSE_W_REGS_COUNT + \
KVMPPC_GSE_VSRS_COUNT + KVMPPC_GSE_INTR_REGS_COUNT)
@@ -139,10 +160,11 @@
*/
enum {
KVMPPC_GS_CLASS_GUESTWIDE = 0x01,
- KVMPPC_GS_CLASS_META = 0x02,
- KVMPPC_GS_CLASS_DWORD_REG = 0x04,
- KVMPPC_GS_CLASS_WORD_REG = 0x08,
- KVMPPC_GS_CLASS_VECTOR = 0x10,
+ KVMPPC_GS_CLASS_HOSTWIDE = 0x02,
+ KVMPPC_GS_CLASS_META = 0x04,
+ KVMPPC_GS_CLASS_DWORD_REG = 0x08,
+ KVMPPC_GS_CLASS_WORD_REG = 0x10,
+ KVMPPC_GS_CLASS_VECTOR = 0x18,
KVMPPC_GS_CLASS_INTR = 0x20,
};
@@ -164,6 +186,7 @@ enum {
*/
enum {
KVMPPC_GS_FLAGS_WIDE = 0x01,
+ KVMPPC_GS_FLAGS_HOST_WIDE = 0x02,
};
/**
@@ -287,7 +310,7 @@ struct kvmppc_gs_msg_ops {
* struct kvmppc_gs_msg - a guest state message
* @bitmap: the guest state ids that should be included
* @ops: modify message behavior for reading and writing to buffers
- * @flags: guest wide or thread wide
+ * @flags: host wide, guest wide or thread wide
* @data: location where buffer data will be written to or from.
*
* A guest state message is allows flexibility in sending in receiving data
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index eeef13db2770..6df6dbbe1e7c 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -490,14 +490,15 @@
#define H_RPTI_PAGE_ALL (-1UL)
/* Flags for H_GUEST_{S,G}_STATE */
-#define H_GUEST_FLAGS_WIDE (1UL<<(63-0))
+#define H_GUEST_FLAGS_WIDE (1UL << (63 - 0))
+#define H_GUEST_FLAGS_HOST_WIDE (1UL << (63 - 1))
/* Flag values used for H_{S,G}SET_GUEST_CAPABILITIES */
-#define H_GUEST_CAP_COPY_MEM (1UL<<(63-0))
-#define H_GUEST_CAP_POWER9 (1UL<<(63-1))
-#define H_GUEST_CAP_POWER10 (1UL<<(63-2))
-#define H_GUEST_CAP_POWER11 (1UL<<(63-3))
-#define H_GUEST_CAP_BITMAP2 (1UL<<(63-63))
+#define H_GUEST_CAP_COPY_MEM (1UL << (63 - 0))
+#define H_GUEST_CAP_POWER9 (1UL << (63 - 1))
+#define H_GUEST_CAP_POWER10 (1UL << (63 - 2))
+#define H_GUEST_CAP_POWER11 (1UL << (63 - 3))
+#define H_GUEST_CAP_BITMAP2 (1UL << (63 - 63))
/*
* Defines for H_HTM - Macros for hardware trace macro (HTM) function.
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 91be7b885944..f2b6cc4341bb 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -65,6 +65,14 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa)
return vpa_call(H_VPA_REG_DTL, cpu, vpa);
}
+/*
+ * Invokes H_HTM hcall with parameters passed from htm_hcall_wrapper.
+ * flags: Set to hardwareTarget.
+ * target: Specifies target using node index, nodal chip index and core index.
+ * operation : action to perform ie configure, start, stop, deconfigure, trace
+ * based on the HTM type.
+ * param1, param2, param3: parameters for each action.
+ */
static inline long htm_call(unsigned long flags, unsigned long target,
unsigned long operation, unsigned long param1,
unsigned long param2, unsigned long param3)
@@ -73,17 +81,17 @@ static inline long htm_call(unsigned long flags, unsigned long target,
param1, param2, param3);
}
-static inline long htm_get_dump_hardware(unsigned long nodeindex,
+static inline long htm_hcall_wrapper(unsigned long flags, unsigned long nodeindex,
unsigned long nodalchipindex, unsigned long coreindexonchip,
- unsigned long type, unsigned long addr, unsigned long size,
- unsigned long offset)
+ unsigned long type, unsigned long htm_op, unsigned long param1, unsigned long param2,
+ unsigned long param3)
{
- return htm_call(H_HTM_FLAGS_HARDWARE_TARGET,
+ return htm_call(H_HTM_FLAGS_HARDWARE_TARGET | flags,
H_HTM_TARGET_NODE_INDEX(nodeindex) |
H_HTM_TARGET_NODAL_CHIP_INDEX(nodalchipindex) |
H_HTM_TARGET_CORE_INDEX_ON_CHIP(coreindexonchip),
- H_HTM_OP(H_HTM_OP_DUMP_DATA) | H_HTM_TYPE(type),
- addr, size, offset);
+ H_HTM_OP(htm_op) | H_HTM_TYPE(type),
+ param1, param2, param3);
}
extern void vpa_init(int cpu);
diff --git a/arch/powerpc/include/asm/preempt.h b/arch/powerpc/include/asm/preempt.h
new file mode 100644
index 000000000000..000e2b9681f3
--- /dev/null
+++ b/arch/powerpc/include/asm/preempt.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_PREEMPT_H
+#define __ASM_POWERPC_PREEMPT_H
+
+#include <asm-generic/preempt.h>
+
+#if defined(CONFIG_PREEMPT_DYNAMIC)
+#include <linux/jump_label.h>
+DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+#define need_irq_preemption() \
+ (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
+#else
+#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
+#endif
+
+#endif /* __ASM_POWERPC_PREEMPT_H */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 04406162fc5a..75fa0293c508 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -515,6 +515,10 @@ extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
extern unsigned long rtas_rmo_buf;
extern struct mutex rtas_ibm_get_vpd_lock;
+extern struct mutex rtas_ibm_get_indices_lock;
+extern struct mutex rtas_ibm_set_dynamic_indicator_lock;
+extern struct mutex rtas_ibm_get_dynamic_sensor_state_lock;
+extern struct mutex rtas_ibm_physical_attestation_lock;
#define GLOBAL_INTERRUPT_QUEUE 9005
diff --git a/arch/powerpc/include/uapi/asm/papr-indices.h b/arch/powerpc/include/uapi/asm/papr-indices.h
new file mode 100644
index 000000000000..c2999d89d52a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-indices.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_INDICES_H_
+#define _UAPI_PAPR_INDICES_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+#define LOC_CODE_SIZE 80
+#define RTAS_GET_INDICES_BUF_SIZE SZ_4K
+
+struct papr_indices_io_block {
+ union {
+ struct {
+ __u8 is_sensor; /* 0 for indicator and 1 for sensor */
+ __u32 indice_type;
+ } indices;
+ struct {
+ __u32 token; /* Sensor or indicator token */
+ __u32 state; /* get / set state */
+ /*
+ * PAPR+ 12.3.2.4 Converged Location Code Rules - Length
+ * Restrictions. 79 characters plus null.
+ */
+ char location_code_str[LOC_CODE_SIZE]; /* location code */
+ } dynamic_param;
+ };
+};
+
+/*
+ * ioctls for /dev/papr-indices.
+ * PAPR_INDICES_IOC_GET: Returns a get-indices handle fd to read data
+ * PAPR_DYNAMIC_SENSOR_IOC_GET: Gets the state of the input sensor
+ * PAPR_DYNAMIC_INDICATOR_IOC_SET: Sets the new state for the input indicator
+ */
+#define PAPR_INDICES_IOC_GET _IOW(PAPR_MISCDEV_IOC_ID, 3, struct papr_indices_io_block)
+#define PAPR_DYNAMIC_SENSOR_IOC_GET _IOWR(PAPR_MISCDEV_IOC_ID, 4, struct papr_indices_io_block)
+#define PAPR_DYNAMIC_INDICATOR_IOC_SET _IOW(PAPR_MISCDEV_IOC_ID, 5, struct papr_indices_io_block)
+
+
+#endif /* _UAPI_PAPR_INDICES_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-physical-attestation.h b/arch/powerpc/include/uapi/asm/papr-physical-attestation.h
new file mode 100644
index 000000000000..ea746837bb9a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-physical-attestation.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_PHYSICAL_ATTESTATION_H_
+#define _UAPI_PAPR_PHYSICAL_ATTESTATION_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+#define PAPR_PHYATTEST_MAX_INPUT 4084 /* Max 4K buffer: 4K-12 */
+
+/*
+ * Defined in PAPR 2.13+ 21.6 Attestation Command Structures.
+ * User space pass this struct and the max size should be 4K.
+ */
+struct papr_phy_attest_io_block {
+ __u8 version;
+ __u8 command;
+ __u8 TCG_major_ver;
+ __u8 TCG_minor_ver;
+ __be32 length;
+ __be32 correlator;
+ __u8 payload[PAPR_PHYATTEST_MAX_INPUT];
+};
+
+/*
+ * ioctl for /dev/papr-physical-attestation. Returns a attestation
+ * command fd handle
+ */
+#define PAPR_PHY_ATTEST_IOC_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 8, struct papr_phy_attest_io_block)
+
+#endif /* _UAPI_PAPR_PHYSICAL_ATTESTATION_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-platform-dump.h b/arch/powerpc/include/uapi/asm/papr-platform-dump.h
new file mode 100644
index 000000000000..8a1c060e89a9
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-platform-dump.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_PLATFORM_DUMP_H_
+#define _UAPI_PAPR_PLATFORM_DUMP_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+/*
+ * ioctl for /dev/papr-platform-dump. Returns a platform-dump handle fd
+ * corresponding to dump tag.
+ */
+#define PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 6, __u64)
+#define PAPR_PLATFORM_DUMP_IOC_INVALIDATE _IOW(PAPR_MISCDEV_IOC_ID, 7, __u64)
+
+#endif /* _UAPI_PAPR_PLATFORM_DUMP_H_ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 6ac621155ec3..9d1ab3971694 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -160,9 +160,7 @@ endif
obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o
-ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),)
obj-y += ppc_save_regs.o
-endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index df16c7f547ab..8ca49e40c473 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -290,10 +290,8 @@ static void __init fadump_show_config(void)
if (!fw_dump.fadump_supported)
return;
- pr_debug("Fadump enabled : %s\n",
- (fw_dump.fadump_enabled ? "yes" : "no"));
- pr_debug("Dump Active : %s\n",
- (fw_dump.dump_active ? "yes" : "no"));
+ pr_debug("Fadump enabled : %s\n", str_yes_no(fw_dump.fadump_enabled));
+ pr_debug("Dump Active : %s\n", str_yes_no(fw_dump.dump_active));
pr_debug("Dump section sizes:\n");
pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 8f4acc55407b..e0c681d0b076 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -25,6 +25,10 @@
unsigned long global_dbcr0[NR_CPUS];
#endif
+#if defined(CONFIG_PREEMPT_DYNAMIC)
+DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
static inline bool exit_must_hard_disable(void)
@@ -396,7 +400,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
/* Returning to a kernel context with local irqs enabled. */
WARN_ON_ONCE(!(regs->msr & MSR_EE));
again:
- if (IS_ENABLED(CONFIG_PREEMPTION)) {
+ if (need_irq_preemption()) {
/* Return to preemptible kernel context */
if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
if (preempt_count() == 0)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 0ebae6e4c19d..244eb4857e7f 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -16,6 +16,7 @@
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/dma-mapping.h>
#include <linux/bitmap.h>
#include <linux/iommu-helper.h>
@@ -769,8 +770,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
iommu_table_clear(tbl);
if (!welcomed) {
- printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
- novmerge ? "disabled" : "enabled");
+ pr_info("IOMMU table initialized, virtual merging %s\n",
+ str_disabled_enabled(novmerge));
welcomed = 1;
}
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index 3816a2bf2b84..d083b4517065 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -9,6 +9,7 @@
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/of.h>
+#include <linux/string.h>
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
@@ -56,7 +57,7 @@ static int __init proc_ppc64_init(void)
{
struct proc_dir_entry *pde;
- strcpy((char *)systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
+ strscpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
systemcfg->version.major = SYSTEMCFG_MAJOR;
systemcfg->version.minor = SYSTEMCFG_MINOR;
systemcfg->processor = mfspr(SPRN_PVR);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ef91f71e07c4..855e09886503 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1000,7 +1000,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
WARN_ON(tm_suspend_disabled);
- TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
+ TM_DEBUG("---- tm_reclaim on pid %d (NIP=%lx, "
"ccr=%lx, msr=%lx, trap=%lx)\n",
tsk->pid, thr->regs->nip,
thr->regs->ccr, thr->regs->msr,
@@ -1008,7 +1008,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
tm_reclaim_thread(thr, TM_CAUSE_RESCHED);
- TM_DEBUG("--- tm_reclaim on pid %d complete\n",
+ TM_DEBUG("---- tm_reclaim on pid %d complete\n",
tsk->pid);
out_and_saveregs:
@@ -2367,14 +2367,14 @@ void __no_sanitize_address show_stack(struct task_struct *tsk,
(sp + STACK_INT_FRAME_REGS);
lr = regs->link;
- printk("%s--- interrupt: %lx at %pS\n",
+ printk("%s---- interrupt: %lx at %pS\n",
loglvl, regs->trap, (void *)regs->nip);
// Detect the case of an empty pt_regs at the very base
// of the stack and suppress showing it in full.
if (!empty_user_regs(regs, tsk)) {
__show_regs(regs);
- printk("%s--- interrupt: %lx\n", loglvl, regs->trap);
+ printk("%s---- interrupt: %lx\n", loglvl, regs->trap);
}
firstframe = 1;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index d7a738f1858d..e61245c4468e 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -92,12 +92,12 @@ struct rtas_function {
* Per-function locks for sequence-based RTAS functions.
*/
static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock);
-static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
-static DEFINE_MUTEX(rtas_ibm_get_indices_lock);
static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock);
-static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock);
-static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
+DEFINE_MUTEX(rtas_ibm_physical_attestation_lock);
DEFINE_MUTEX(rtas_ibm_get_vpd_lock);
+DEFINE_MUTEX(rtas_ibm_get_indices_lock);
+DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
+DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
static struct rtas_function rtas_function_table[] __ro_after_init = {
[RTAS_FNIDX__CHECK_EXCEPTION] = {
diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S
index 2c1b24100eca..3565c67fc638 100644
--- a/arch/powerpc/kernel/trace/ftrace_entry.S
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@ -212,10 +212,10 @@
bne- 1f
mr r3, r15
+1: mtlr r3
.if \allregs == 0
REST_GPR(15, r1)
.endif
-1: mtlr r3
#endif
/* Restore gprs */
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 9ac3266e4965..a325c1c02f96 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -359,7 +359,10 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
is_via_system_reset = 1;
- crash_smp_send_stop();
+ if (IS_ENABLED(CONFIG_SMP))
+ crash_smp_send_stop();
+ else
+ crash_kexec_prepare();
crash_save_cpu(regs, crashing_cpu);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index dbfdc126bf14..2f2702c867f7 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -83,6 +83,7 @@ config KVM_BOOK3S_64_HV
depends on KVM_BOOK3S_64 && PPC_POWERNV
select KVM_BOOK3S_HV_POSSIBLE
select KVM_GENERIC_MMU_NOTIFIER
+ select KVM_BOOK3S_HV_PMU
select CMA
help
Support running unmodified book3s_64 guest kernels in
@@ -171,6 +172,18 @@ config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND
those buggy L1s which saves the L2 state, at the cost of performance
in all nested-capable guest entry/exit.
+config KVM_BOOK3S_HV_PMU
+ tristate "Hypervisor Perf events for KVM Book3s-HV"
+ depends on KVM_BOOK3S_64_HV
+ help
+ Enable Book3s-HV Hypervisor Perf events PMU named 'kvm-hv'. These
+ Perf events give an overview of hypervisor performance overall
+ instead of a specific guests. Currently the PMU reports
+ L0-Hypervisor stats on a kvm-hv enabled PSeries LPAR like:
+ * Total/Used Guest-Heap
+ * Total/Used Guest Page-table Memory
+ * Total amount of Guest Page-table Memory reclaimed
+
config KVM_BOOKE_HV
bool
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 19f4d298dd17..7667563fb9ff 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -6541,10 +6541,6 @@ static struct kvmppc_ops kvm_ops_hv = {
.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
.arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
.hcall_implemented = kvmppc_hcall_impl_hv,
-#ifdef CONFIG_KVM_XICS
- .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
- .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
-#endif
.configure_mmu = kvmhv_configure_mmu,
.get_rmmu_info = kvmhv_get_rmmu_info,
.set_smt_mode = kvmhv_set_smt_mode,
@@ -6662,6 +6658,22 @@ static int kvmppc_book3s_init_hv(void)
return r;
}
+#if defined(CONFIG_KVM_XICS)
+ /*
+ * IRQ bypass is supported only for interrupts whose EOI operations are
+ * handled via OPAL calls. Therefore, register IRQ bypass handlers
+ * exclusively for PowerNV KVM when booted with 'xive=off', indicating
+ * the use of the emulated XICS interrupt controller.
+ */
+ if (!kvmhv_on_pseries()) {
+ pr_info("KVM-HV: Enabling IRQ bypass\n");
+ kvm_ops_hv.irq_bypass_add_producer =
+ kvmppc_irq_bypass_add_producer_hv;
+ kvm_ops_hv.irq_bypass_del_producer =
+ kvmppc_irq_bypass_del_producer_hv;
+ }
+#endif
+
kvm_ops_hv.owner = THIS_MODULE;
kvmppc_hv_ops = &kvm_ops_hv;
diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c
index e5c7ce1fb761..87691cf86cae 100644
--- a/arch/powerpc/kvm/book3s_hv_nestedv2.c
+++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c
@@ -123,6 +123,12 @@ static size_t gs_msg_ops_vcpu_get_size(struct kvmppc_gs_msg *gsm)
case KVMPPC_GSID_PROCESS_TABLE:
case KVMPPC_GSID_RUN_INPUT:
case KVMPPC_GSID_RUN_OUTPUT:
+ /* Host wide counters */
+ case KVMPPC_GSID_L0_GUEST_HEAP:
+ case KVMPPC_GSID_L0_GUEST_HEAP_MAX:
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE:
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX:
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM:
break;
default:
size += kvmppc_gse_total_size(kvmppc_gsid_size(iden));
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 6a4805968966..791d1942a058 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -572,7 +572,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
/*
* Return the number of jiffies until the next timeout. If the timeout is
- * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * longer than the TIMER_NEXT_MAX_DELTA, then return TIMER_NEXT_MAX_DELTA
* because the larger value can break the timer APIs.
*/
static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
@@ -598,7 +598,7 @@ static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
if (do_div(nr_jiffies, tb_ticks_per_jiffy))
nr_jiffies++;
- return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+ return min_t(unsigned long long, nr_jiffies, TIMER_NEXT_MAX_DELTA);
}
static void arm_next_watchdog(struct kvm_vcpu *vcpu)
@@ -616,10 +616,10 @@ static void arm_next_watchdog(struct kvm_vcpu *vcpu)
spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
nr_jiffies = watchdog_next_timeout(vcpu);
/*
- * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
+ * If the number of jiffies of watchdog timer >= TIMER_NEXT_MAX_DELTA
* then do not run the watchdog timer as this can break timer APIs.
*/
- if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+ if (nr_jiffies < TIMER_NEXT_MAX_DELTA)
mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
else
timer_delete(&vcpu->arch.wdt_timer);
diff --git a/arch/powerpc/kvm/guest-state-buffer.c b/arch/powerpc/kvm/guest-state-buffer.c
index b80dbc58621f..871cf60ddeb6 100644
--- a/arch/powerpc/kvm/guest-state-buffer.c
+++ b/arch/powerpc/kvm/guest-state-buffer.c
@@ -92,6 +92,10 @@ static int kvmppc_gsid_class(u16 iden)
(iden <= KVMPPC_GSE_GUESTWIDE_END))
return KVMPPC_GS_CLASS_GUESTWIDE;
+ if ((iden >= KVMPPC_GSE_HOSTWIDE_START) &&
+ (iden <= KVMPPC_GSE_HOSTWIDE_END))
+ return KVMPPC_GS_CLASS_HOSTWIDE;
+
if ((iden >= KVMPPC_GSE_META_START) && (iden <= KVMPPC_GSE_META_END))
return KVMPPC_GS_CLASS_META;
@@ -118,6 +122,21 @@ static int kvmppc_gsid_type(u16 iden)
int type = -1;
switch (kvmppc_gsid_class(iden)) {
+ case KVMPPC_GS_CLASS_HOSTWIDE:
+ switch (iden) {
+ case KVMPPC_GSID_L0_GUEST_HEAP:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_HEAP_MAX:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM:
+ type = KVMPPC_GSE_BE64;
+ break;
+ }
+ break;
case KVMPPC_GS_CLASS_GUESTWIDE:
switch (iden) {
case KVMPPC_GSID_HOST_STATE_SIZE:
@@ -187,6 +206,9 @@ unsigned long kvmppc_gsid_flags(u16 iden)
case KVMPPC_GS_CLASS_GUESTWIDE:
flags = KVMPPC_GS_FLAGS_WIDE;
break;
+ case KVMPPC_GS_CLASS_HOSTWIDE:
+ flags = KVMPPC_GS_FLAGS_HOST_WIDE;
+ break;
case KVMPPC_GS_CLASS_META:
case KVMPPC_GS_CLASS_DWORD_REG:
case KVMPPC_GS_CLASS_WORD_REG:
@@ -310,6 +332,13 @@ static inline int kvmppc_gse_flatten_iden(u16 iden)
bit += KVMPPC_GSE_GUESTWIDE_COUNT;
+ if (class == KVMPPC_GS_CLASS_HOSTWIDE) {
+ bit += iden - KVMPPC_GSE_HOSTWIDE_START;
+ return bit;
+ }
+
+ bit += KVMPPC_GSE_HOSTWIDE_COUNT;
+
if (class == KVMPPC_GS_CLASS_META) {
bit += iden - KVMPPC_GSE_META_START;
return bit;
@@ -356,6 +385,12 @@ static inline u16 kvmppc_gse_unflatten_iden(int bit)
}
bit -= KVMPPC_GSE_GUESTWIDE_COUNT;
+ if (bit < KVMPPC_GSE_HOSTWIDE_COUNT) {
+ iden = KVMPPC_GSE_HOSTWIDE_START + bit;
+ return iden;
+ }
+ bit -= KVMPPC_GSE_HOSTWIDE_COUNT;
+
if (bit < KVMPPC_GSE_META_COUNT) {
iden = KVMPPC_GSE_META_START + bit;
return iden;
@@ -588,6 +623,8 @@ int kvmppc_gsb_send(struct kvmppc_gs_buff *gsb, unsigned long flags)
if (flags & KVMPPC_GS_FLAGS_WIDE)
hflags |= H_GUEST_FLAGS_WIDE;
+ if (flags & KVMPPC_GS_FLAGS_HOST_WIDE)
+ hflags |= H_GUEST_FLAGS_HOST_WIDE;
rc = plpar_guest_set_state(hflags, gsb->guest_id, gsb->vcpu_id,
__pa(gsb->hdr), gsb->capacity, &i);
@@ -613,6 +650,8 @@ int kvmppc_gsb_recv(struct kvmppc_gs_buff *gsb, unsigned long flags)
if (flags & KVMPPC_GS_FLAGS_WIDE)
hflags |= H_GUEST_FLAGS_WIDE;
+ if (flags & KVMPPC_GS_FLAGS_HOST_WIDE)
+ hflags |= H_GUEST_FLAGS_HOST_WIDE;
rc = plpar_guest_get_state(hflags, gsb->guest_id, gsb->vcpu_id,
__pa(gsb->hdr), gsb->capacity, &i);
diff --git a/arch/powerpc/kvm/test-guest-state-buffer.c b/arch/powerpc/kvm/test-guest-state-buffer.c
index bfd225329a18..5ccca306997a 100644
--- a/arch/powerpc/kvm/test-guest-state-buffer.c
+++ b/arch/powerpc/kvm/test-guest-state-buffer.c
@@ -5,6 +5,7 @@
#include <kunit/test.h>
#include <asm/guest-state-buffer.h>
+#include <asm/kvm_ppc.h>
static void test_creating_buffer(struct kunit *test)
{
@@ -141,6 +142,16 @@ static void test_gs_bitmap(struct kunit *test)
i++;
}
+ for (u16 iden = KVMPPC_GSID_L0_GUEST_HEAP;
+ iden <= KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM; iden++) {
+ kvmppc_gsbm_set(&gsbm, iden);
+ kvmppc_gsbm_set(&gsbm1, iden);
+ KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden));
+ kvmppc_gsbm_clear(&gsbm, iden);
+ KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden));
+ i++;
+ }
+
for (u16 iden = KVMPPC_GSID_RUN_INPUT; iden <= KVMPPC_GSID_VPA;
iden++) {
kvmppc_gsbm_set(&gsbm, iden);
@@ -309,12 +320,215 @@ static void test_gs_msg(struct kunit *test)
kvmppc_gsm_free(gsm);
}
+/* Test data struct for hostwide/L0 counters */
+struct kvmppc_gs_msg_test_hostwide_data {
+ u64 guest_heap;
+ u64 guest_heap_max;
+ u64 guest_pgtable_size;
+ u64 guest_pgtable_size_max;
+ u64 guest_pgtable_reclaim;
+};
+
+static size_t test_hostwide_get_size(struct kvmppc_gs_msg *gsm)
+
+{
+ size_t size = 0;
+ u16 ids[] = {
+ KVMPPC_GSID_L0_GUEST_HEAP,
+ KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(ids); i++)
+ size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i]));
+ return size;
+}
+
+static int test_hostwide_fill_info(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm)
+{
+ struct kvmppc_gs_msg_test_hostwide_data *data = gsm->data;
+
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_HEAP,
+ data->guest_heap);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ data->guest_heap_max);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ data->guest_pgtable_size);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ data->guest_pgtable_size_max);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM,
+ data->guest_pgtable_reclaim);
+
+ return 0;
+}
+
+static int test_hostwide_refresh_info(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb)
+{
+ struct kvmppc_gs_parser gsp = { 0 };
+ struct kvmppc_gs_msg_test_hostwide_data *data = gsm->data;
+ struct kvmppc_gs_elem *gse;
+ int rc;
+
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ if (rc < 0)
+ return rc;
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP);
+ if (gse)
+ data->guest_heap = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ if (gse)
+ data->guest_heap_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ if (gse)
+ data->guest_pgtable_size = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ if (gse)
+ data->guest_pgtable_size_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+ if (gse)
+ data->guest_pgtable_reclaim = kvmppc_gse_get_u64(gse);
+
+ return 0;
+}
+
+static struct kvmppc_gs_msg_ops gs_msg_test_hostwide_ops = {
+ .get_size = test_hostwide_get_size,
+ .fill_info = test_hostwide_fill_info,
+ .refresh_info = test_hostwide_refresh_info,
+};
+
+static void test_gs_hostwide_msg(struct kunit *test)
+{
+ struct kvmppc_gs_msg_test_hostwide_data test_data = {
+ .guest_heap = 0xdeadbeef,
+ .guest_heap_max = ~0ULL,
+ .guest_pgtable_size = 0xff,
+ .guest_pgtable_size_max = 0xffffff,
+ .guest_pgtable_reclaim = 0xdeadbeef,
+ };
+ struct kvmppc_gs_msg *gsm;
+ struct kvmppc_gs_buff *gsb;
+
+ gsm = kvmppc_gsm_new(&gs_msg_test_hostwide_ops, &test_data, GSM_SEND,
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm);
+
+ gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+
+ kvmppc_gsm_fill_info(gsm, gsb);
+
+ memset(&test_data, 0, sizeof(test_data));
+
+ kvmppc_gsm_refresh_info(gsm, gsb);
+ KUNIT_EXPECT_EQ(test, test_data.guest_heap, 0xdeadbeef);
+ KUNIT_EXPECT_EQ(test, test_data.guest_heap_max, ~0ULL);
+ KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_size, 0xff);
+ KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_size_max, 0xffffff);
+ KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_reclaim, 0xdeadbeef);
+
+ kvmppc_gsm_free(gsm);
+}
+
+/* Test if the H_GUEST_GET_STATE for hostwide counters works */
+static void test_gs_hostwide_counters(struct kunit *test)
+{
+ struct kvmppc_gs_msg_test_hostwide_data test_data;
+ struct kvmppc_gs_parser gsp = { 0 };
+
+ struct kvmppc_gs_msg *gsm;
+ struct kvmppc_gs_buff *gsb;
+ struct kvmppc_gs_elem *gse;
+ int rc;
+
+ if (!kvmhv_on_pseries())
+ kunit_skip(test, "This test need a kmv-hv guest");
+
+ gsm = kvmppc_gsm_new(&gs_msg_test_hostwide_ops, &test_data, GSM_SEND,
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm);
+
+ gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+
+ kvmppc_gsm_fill_info(gsm, gsb);
+
+ /* With HOST_WIDE flags guestid and vcpuid will be ignored */
+ rc = kvmppc_gsb_recv(gsb, KVMPPC_GS_FLAGS_HOST_WIDE);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ /* Parse the guest state buffer is successful */
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ /* Parse the GSB and get the counters */
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 Heap counter missing");
+ kunit_info(test, "Guest Heap Size=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 Heap counter max missing");
+ kunit_info(test, "Guest Heap Size Max=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table size missing");
+ kunit_info(test, "Guest Page-table Size=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table size-max missing");
+ kunit_info(test, "Guest Page-table Size Max=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table reclaim size missing");
+ kunit_info(test, "Guest Page-table Reclaim Size=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ kvmppc_gsm_free(gsm);
+ kvmppc_gsb_free(gsb);
+}
+
static struct kunit_case guest_state_buffer_testcases[] = {
KUNIT_CASE(test_creating_buffer),
KUNIT_CASE(test_adding_element),
KUNIT_CASE(test_gs_bitmap),
KUNIT_CASE(test_gs_parsing),
KUNIT_CASE(test_gs_msg),
+ KUNIT_CASE(test_gs_hostwide_msg),
+ KUNIT_CASE(test_gs_hostwide_counters),
{}
};
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index 45817ab82bb4..14b0e23f601f 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -38,11 +38,7 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
{
/* type has to be known at build time for optimization */
-
- /* The BUILD_BUG_ON below breaks in funny ways, commented out
- * for now ... -BenH
BUILD_BUG_ON(!__builtin_constant_p(type));
- */
switch (type) {
case EXT_INTR_EXITS:
vcpu->stat.ext_intr_exits++;
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index dd8a4b52a0cc..481f968e42c7 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -3,6 +3,8 @@
# Makefile for ppc-specific library files..
#
+obj-y += crypto/
+
CFLAGS_code-patching.o += -fno-stack-protector
CFLAGS_feature-fixups.o += -fno-stack-protector
@@ -79,9 +81,9 @@ CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec)
CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include)
obj-$(CONFIG_CRC32_ARCH) += crc32-powerpc.o
-crc32-powerpc-y := crc32-glue.o crc32c-vpmsum_asm.o
+crc32-powerpc-y := crc32.o crc32c-vpmsum_asm.o
obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-powerpc.o
-crc-t10dif-powerpc-y := crc-t10dif-glue.o crct10dif-vpmsum_asm.o
+crc-t10dif-powerpc-y := crc-t10dif.o crct10dif-vpmsum_asm.o
obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/crc-t10dif-glue.c b/arch/powerpc/lib/crc-t10dif.c
index f411b0120cc5..be23ded3a9df 100644
--- a/arch/powerpc/lib/crc-t10dif-glue.c
+++ b/arch/powerpc/lib/crc-t10dif.c
@@ -6,22 +6,22 @@
* [based on crc32c-vpmsum_glue.c]
*/
-#include <linux/crc-t10dif.h>
+#include <asm/switch_to.h>
#include <crypto/internal/simd.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
#include <linux/cpufeature.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
+#include <linux/crc-t10dif.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/uaccess.h>
#define VMX_ALIGN 16
#define VMX_ALIGN_MASK (VMX_ALIGN-1)
#define VECTOR_BREAKPOINT 64
-static DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len);
@@ -71,7 +71,7 @@ static int __init crc_t10dif_powerpc_init(void)
static_branch_enable(&have_vec_crypto);
return 0;
}
-arch_initcall(crc_t10dif_powerpc_init);
+subsys_initcall(crc_t10dif_powerpc_init);
static void __exit crc_t10dif_powerpc_exit(void)
{
diff --git a/arch/powerpc/lib/crc32-vpmsum_core.S b/arch/powerpc/lib/crc-vpmsum-template.S
index b0f87f595b26..b0f87f595b26 100644
--- a/arch/powerpc/lib/crc32-vpmsum_core.S
+++ b/arch/powerpc/lib/crc-vpmsum-template.S
diff --git a/arch/powerpc/lib/crc32-glue.c b/arch/powerpc/lib/crc32.c
index dbd10f339183..0d9befb6e7b8 100644
--- a/arch/powerpc/lib/crc32-glue.c
+++ b/arch/powerpc/lib/crc32.c
@@ -1,19 +1,20 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/crc32.h>
+#include <asm/switch_to.h>
#include <crypto/internal/simd.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
#include <linux/cpufeature.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
+#include <linux/crc32.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/uaccess.h>
#define VMX_ALIGN 16
#define VMX_ALIGN_MASK (VMX_ALIGN-1)
#define VECTOR_BREAKPOINT 512
-static DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
u32 __crc32c_vpmsum(u32 crc, const u8 *p, size_t len);
@@ -72,7 +73,7 @@ static int __init crc32_powerpc_init(void)
static_branch_enable(&have_vec_crypto);
return 0;
}
-arch_initcall(crc32_powerpc_init);
+subsys_initcall(crc32_powerpc_init);
static void __exit crc32_powerpc_exit(void)
{
diff --git a/arch/powerpc/lib/crc32c-vpmsum_asm.S b/arch/powerpc/lib/crc32c-vpmsum_asm.S
index bf442004ea1f..1b35c55cce0a 100644
--- a/arch/powerpc/lib/crc32c-vpmsum_asm.S
+++ b/arch/powerpc/lib/crc32c-vpmsum_asm.S
@@ -839,4 +839,4 @@
#define CRC_FUNCTION_NAME __crc32c_vpmsum
#define REFLECT
-#include "crc32-vpmsum_core.S"
+#include "crc-vpmsum-template.S"
diff --git a/arch/powerpc/lib/crct10dif-vpmsum_asm.S b/arch/powerpc/lib/crct10dif-vpmsum_asm.S
index f0b93a0fe168..47a6266d89a8 100644
--- a/arch/powerpc/lib/crct10dif-vpmsum_asm.S
+++ b/arch/powerpc/lib/crct10dif-vpmsum_asm.S
@@ -842,4 +842,4 @@
.octa 0x0000000000000000000000018bb70000
#define CRC_FUNCTION_NAME __crct10dif_vpmsum
-#include "crc32-vpmsum_core.S"
+#include "crc-vpmsum-template.S"
diff --git a/arch/powerpc/lib/crypto/Kconfig b/arch/powerpc/lib/crypto/Kconfig
new file mode 100644
index 000000000000..3f9e1bbd9905
--- /dev/null
+++ b/arch/powerpc/lib/crypto/Kconfig
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_CHACHA20_P10
+ tristate
+ depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
+ default CRYPTO_LIB_CHACHA
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_POLY1305_P10
+ tristate
+ depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
+ depends on BROKEN # Needs to be fixed to work in softirq context
+ default CRYPTO_LIB_POLY1305
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
+ select CRYPTO_LIB_POLY1305_GENERIC
+
+config CRYPTO_SHA256_PPC_SPE
+ tristate
+ depends on SPE
+ default CRYPTO_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256
diff --git a/arch/powerpc/lib/crypto/Makefile b/arch/powerpc/lib/crypto/Makefile
new file mode 100644
index 000000000000..27f231f8e334
--- /dev/null
+++ b/arch/powerpc/lib/crypto/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o
+chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o
+
+obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o
+poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o
+
+obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
+sha256-ppc-spe-y := sha256.o sha256-spe-asm.o
diff --git a/arch/powerpc/lib/crypto/chacha-p10-glue.c b/arch/powerpc/lib/crypto/chacha-p10-glue.c
new file mode 100644
index 000000000000..fcd23c6f1590
--- /dev/null
+++ b/arch/powerpc/lib/crypto/chacha-p10-glue.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ChaCha stream cipher (P10 accelerated)
+ *
+ * Copyright 2023- IBM Corp. All rights reserved.
+ */
+
+#include <crypto/chacha.h>
+#include <crypto/internal/simd.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/sizes.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+asmlinkage void chacha_p10le_8x(const struct chacha_state *state, u8 *dst,
+ const u8 *src, unsigned int len, int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10);
+
+static void vsx_begin(void)
+{
+ preempt_disable();
+ enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+ disable_kernel_vsx();
+ preempt_enable();
+}
+
+static void chacha_p10_do_8x(struct chacha_state *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ unsigned int l = bytes & ~0x0FF;
+
+ if (l > 0) {
+ chacha_p10le_8x(state, dst, src, l, nrounds);
+ bytes -= l;
+ src += l;
+ dst += l;
+ state->x[12] += l / CHACHA_BLOCK_SIZE;
+ }
+
+ if (bytes > 0)
+ chacha_crypt_generic(state, dst, src, bytes, nrounds);
+}
+
+void hchacha_block_arch(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds)
+{
+ hchacha_block_generic(state, out, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE ||
+ !crypto_simd_usable())
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ vsx_begin();
+ chacha_p10_do_8x(state, dst, src, todo, nrounds);
+ vsx_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+bool chacha_is_arch_optimized(void)
+{
+ return static_key_enabled(&have_p10);
+}
+EXPORT_SYMBOL(chacha_is_arch_optimized);
+
+static int __init chacha_p10_init(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ static_branch_enable(&have_p10);
+ return 0;
+}
+subsys_initcall(chacha_p10_init);
+
+static void __exit chacha_p10_exit(void)
+{
+}
+module_exit(chacha_p10_exit);
+
+MODULE_DESCRIPTION("ChaCha stream cipher (P10 accelerated)");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/powerpc/crypto/chacha-p10le-8x.S b/arch/powerpc/lib/crypto/chacha-p10le-8x.S
index 17bedb66b822..b29562bd5d40 100644
--- a/arch/powerpc/crypto/chacha-p10le-8x.S
+++ b/arch/powerpc/lib/crypto/chacha-p10le-8x.S
@@ -7,9 +7,6 @@
#===================================================================================
# Written by Danny Tsen <dtsen@us.ibm.com>
#
-# chacha_p10le_8x(u32 *state, byte *dst, const byte *src,
-# size_t len, int nrounds);
-#
# do rounds, 8 quarter rounds
# 1. a += b; d ^= a; d <<<= 16;
# 2. c += d; b ^= c; b <<<= 12;
@@ -575,7 +572,8 @@
.endm
#
-# chacha20_p10le_8x(u32 *state, byte *dst, const byte *src, size_t len, int nrounds);
+# void chacha_p10le_8x(const struct chacha_state *state, u8 *dst, const u8 *src,
+# unsigned int len, int nrounds);
#
SYM_FUNC_START(chacha_p10le_8x)
.align 5
diff --git a/arch/powerpc/lib/crypto/poly1305-p10-glue.c b/arch/powerpc/lib/crypto/poly1305-p10-glue.c
new file mode 100644
index 000000000000..3f1664a724b6
--- /dev/null
+++ b/arch/powerpc/lib/crypto/poly1305-p10-glue.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Poly1305 authenticator algorithm, RFC7539.
+ *
+ * Copyright 2023- IBM Corp. All rights reserved.
+ */
+#include <asm/switch_to.h>
+#include <crypto/internal/poly1305.h>
+#include <linux/cpufeature.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/unaligned.h>
+
+asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state, const u8 *m, u32 mlen);
+asmlinkage void poly1305_64s(struct poly1305_block_state *state, const u8 *m, u32 mlen, int highbit);
+asmlinkage void poly1305_emit_64(const struct poly1305_state *state, const u32 nonce[4], u8 digest[POLY1305_DIGEST_SIZE]);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10);
+
+static void vsx_begin(void)
+{
+ preempt_disable();
+ enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+ disable_kernel_vsx();
+ preempt_enable();
+}
+
+void poly1305_block_init_arch(struct poly1305_block_state *dctx,
+ const u8 raw_key[POLY1305_BLOCK_SIZE])
+{
+ if (!static_key_enabled(&have_p10))
+ return poly1305_block_init_generic(dctx, raw_key);
+
+ dctx->h = (struct poly1305_state){};
+ dctx->core_r.key.r64[0] = get_unaligned_le64(raw_key + 0);
+ dctx->core_r.key.r64[1] = get_unaligned_le64(raw_key + 8);
+}
+EXPORT_SYMBOL_GPL(poly1305_block_init_arch);
+
+void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src,
+ unsigned int len, u32 padbit)
+{
+ if (!static_key_enabled(&have_p10))
+ return poly1305_blocks_generic(state, src, len, padbit);
+ vsx_begin();
+ if (len >= POLY1305_BLOCK_SIZE * 4) {
+ poly1305_p10le_4blocks(state, src, len);
+ src += len - (len % (POLY1305_BLOCK_SIZE * 4));
+ len %= POLY1305_BLOCK_SIZE * 4;
+ }
+ while (len >= POLY1305_BLOCK_SIZE) {
+ poly1305_64s(state, src, POLY1305_BLOCK_SIZE, padbit);
+ len -= POLY1305_BLOCK_SIZE;
+ src += POLY1305_BLOCK_SIZE;
+ }
+ vsx_end();
+}
+EXPORT_SYMBOL_GPL(poly1305_blocks_arch);
+
+void poly1305_emit_arch(const struct poly1305_state *state,
+ u8 digest[POLY1305_DIGEST_SIZE],
+ const u32 nonce[4])
+{
+ if (!static_key_enabled(&have_p10))
+ return poly1305_emit_generic(state, digest, nonce);
+ poly1305_emit_64(state, nonce, digest);
+}
+EXPORT_SYMBOL_GPL(poly1305_emit_arch);
+
+bool poly1305_is_arch_optimized(void)
+{
+ return static_key_enabled(&have_p10);
+}
+EXPORT_SYMBOL(poly1305_is_arch_optimized);
+
+static int __init poly1305_p10_init(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ static_branch_enable(&have_p10);
+ return 0;
+}
+subsys_initcall(poly1305_p10_init);
+
+static void __exit poly1305_p10_exit(void)
+{
+}
+module_exit(poly1305_p10_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
+MODULE_DESCRIPTION("Optimized Poly1305 for P10");
diff --git a/arch/powerpc/crypto/poly1305-p10le_64.S b/arch/powerpc/lib/crypto/poly1305-p10le_64.S
index a3c1987f1ecd..a3c1987f1ecd 100644
--- a/arch/powerpc/crypto/poly1305-p10le_64.S
+++ b/arch/powerpc/lib/crypto/poly1305-p10le_64.S
diff --git a/arch/powerpc/crypto/sha256-spe-asm.S b/arch/powerpc/lib/crypto/sha256-spe-asm.S
index cd99d71dae34..cd99d71dae34 100644
--- a/arch/powerpc/crypto/sha256-spe-asm.S
+++ b/arch/powerpc/lib/crypto/sha256-spe-asm.S
diff --git a/arch/powerpc/lib/crypto/sha256.c b/arch/powerpc/lib/crypto/sha256.c
new file mode 100644
index 000000000000..6b0f079587eb
--- /dev/null
+++ b/arch/powerpc/lib/crypto/sha256.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256 Secure Hash Algorithm, SPE optimized
+ *
+ * Based on generic implementation. The assembler module takes care
+ * about the SPE registers so it can run from interrupt context.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/switch_to.h>
+#include <crypto/internal/sha2.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000
+ * operations per 64 bytes. e500 cores can issue two arithmetic instructions
+ * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
+ * Thus 1KB of input data will need an estimated maximum of 18,000 cycles.
+ * Headroom for cache misses included. Even with the low end model clocked
+ * at 667 MHz this equals to a critical time window of less than 27us.
+ *
+ */
+#define MAX_BYTES 1024
+
+extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks);
+
+static void spe_begin(void)
+{
+ /* We just start SPE operations and will save SPE registers later. */
+ preempt_disable();
+ enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+ disable_kernel_spe();
+ /* reenable preemption */
+ preempt_enable();
+}
+
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ do {
+ /* cut input data into smaller blocks */
+ u32 unit = min_t(size_t, nblocks,
+ MAX_BYTES / SHA256_BLOCK_SIZE);
+
+ spe_begin();
+ ppc_spe_sha256_transform(state, data, unit);
+ spe_end();
+
+ data += unit * SHA256_BLOCK_SIZE;
+ nblocks -= unit;
+ } while (nblocks);
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
+
+bool sha256_is_arch_optimized(void)
+{
+ return true;
+}
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-256 Secure Hash Algorithm, SPE optimized");
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index 58ed6bd613a6..54340912398f 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -45,7 +45,7 @@ int exit_vmx_usercopy(void)
* set and we are preemptible. The hack here is to schedule a
* decrementer to fire here and reschedule for us if necessary.
*/
- if (IS_ENABLED(CONFIG_PREEMPTION) && need_resched())
+ if (need_irq_preemption() && need_resched())
set_dec(1);
return 0;
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index c156fe0d53c3..806c74e0d5ab 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/types.h>
#include <linux/pagemap.h>
#include <linux/ptrace.h>
@@ -218,7 +219,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
// Read/write fault blocked by KUAP is bad, it can never succeed.
if (bad_kuap_fault(regs, address, is_write)) {
pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
- is_write ? "write" : "read", address,
+ str_write_read(is_write), address,
from_kuid(&init_user_ns, current_uid()));
// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
@@ -625,7 +626,7 @@ static void __bad_page_fault(struct pt_regs *regs, int sig)
case INTERRUPT_DATA_STORAGE:
case INTERRUPT_H_DATA_STORAGE:
pr_alert("BUG: %s on %s at 0x%08lx\n", msg,
- is_write ? "write" : "read", regs->dar);
+ str_write_read(is_write), regs->dar);
break;
case INTERRUPT_DATA_SEGMENT:
pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar);
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 8b54f12d1889..ab1505cf42bf 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -54,20 +54,13 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa,
{
pmd_t *pmdp = pmd_off_k(va);
pte_t *ptep;
-
- if (WARN_ON(psize != MMU_PAGE_512K && psize != MMU_PAGE_8M))
- return -EINVAL;
+ unsigned int shift = mmu_psize_to_shift(psize);
if (new) {
if (WARN_ON(slab_is_available()))
return -EINVAL;
- if (psize == MMU_PAGE_512K) {
- ptep = early_pte_alloc_kernel(pmdp, va);
- /* The PTE should never be already present */
- if (WARN_ON(pte_present(*ptep) && pgprot_val(prot)))
- return -EINVAL;
- } else {
+ if (psize == MMU_PAGE_8M) {
if (WARN_ON(!pmd_none(*pmdp) || !pmd_none(*(pmdp + 1))))
return -EINVAL;
@@ -78,20 +71,25 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa,
pmd_populate_kernel(&init_mm, pmdp + 1, ptep);
ptep = (pte_t *)pmdp;
+ } else {
+ ptep = early_pte_alloc_kernel(pmdp, va);
+ /* The PTE should never be already present */
+ if (WARN_ON(pte_present(*ptep) && pgprot_val(prot)))
+ return -EINVAL;
}
} else {
- if (psize == MMU_PAGE_512K)
- ptep = pte_offset_kernel(pmdp, va);
- else
+ if (psize == MMU_PAGE_8M)
ptep = (pte_t *)pmdp;
+ else
+ ptep = pte_offset_kernel(pmdp, va);
}
if (WARN_ON(!ptep))
return -ENOMEM;
set_huge_pte_at(&init_mm, va, ptep,
- pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)),
- 1UL << mmu_psize_to_shift(psize));
+ arch_make_huge_pte(pfn_pte(pa >> PAGE_SHIFT, prot), shift, 0),
+ 1UL << shift);
return 0;
}
@@ -123,14 +121,18 @@ static int mmu_mapin_ram_chunk(unsigned long offset, unsigned long top,
unsigned long p = offset;
int err = 0;
- WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K));
+ WARN_ON(!IS_ALIGNED(offset, SZ_16K) || !IS_ALIGNED(top, SZ_16K));
+ for (; p < ALIGN(p, SZ_512K) && p < top && !err; p += SZ_16K, v += SZ_16K)
+ err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_16K, new);
for (; p < ALIGN(p, SZ_8M) && p < top && !err; p += SZ_512K, v += SZ_512K)
err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
for (; p < ALIGN_DOWN(top, SZ_8M) && p < top && !err; p += SZ_8M, v += SZ_8M)
err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new);
for (; p < ALIGN_DOWN(top, SZ_512K) && p < top && !err; p += SZ_512K, v += SZ_512K)
err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
+ for (; p < ALIGN_DOWN(top, SZ_16K) && p < top && !err; p += SZ_16K, v += SZ_16K)
+ err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_16K, new);
if (!new)
flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top);
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 6beacaec63d3..4c26912c2e3c 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -51,8 +51,16 @@
EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \
} while (0)
-/* Sign-extended 32-bit immediate load */
+/*
+ * Sign-extended 32-bit immediate load
+ *
+ * If this is a dummy pass (!image), account for
+ * maximum possible instructions.
+ */
#define PPC_LI32(d, i) do { \
+ if (!image) \
+ ctx->idx += 2; \
+ else { \
if ((int)(uintptr_t)(i) >= -32768 && \
(int)(uintptr_t)(i) < 32768) \
EMIT(PPC_RAW_LI(d, i)); \
@@ -60,10 +68,15 @@
EMIT(PPC_RAW_LIS(d, IMM_H(i))); \
if (IMM_L(i)) \
EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \
- } } while(0)
+ } \
+ } } while (0)
#ifdef CONFIG_PPC64
+/* If dummy pass (!image), account for maximum possible instructions */
#define PPC_LI64(d, i) do { \
+ if (!image) \
+ ctx->idx += 5; \
+ else { \
if ((long)(i) >= -2147483648 && \
(long)(i) < 2147483648) \
PPC_LI32(d, i); \
@@ -84,7 +97,8 @@
if ((uintptr_t)(i) & 0x000000000000ffffULL) \
EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \
0xffff)); \
- } } while (0)
+ } \
+ } } while (0)
#define PPC_LI_ADDR PPC_LI64
#ifndef CONFIG_PPC_KERNEL_PCREL
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 2991bb171a9b..c0684733e9d6 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -504,10 +504,11 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ct
EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off));
if (!p->jited)
PPC_LI_ADDR(_R4, (unsigned long)p->insnsi);
- if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func,
- BRANCH_SET_LINK)) {
- if (image)
- image[ctx->idx] = ppc_inst_val(branch_insn);
+ /* Account for max possible instructions during dummy pass for size calculation */
+ if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx],
+ (unsigned long)p->bpf_func,
+ BRANCH_SET_LINK)) {
+ image[ctx->idx] = ppc_inst_val(branch_insn);
ctx->idx++;
} else {
EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func)));
@@ -889,7 +890,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
/* Reserve space to patch branch instruction to skip fexit progs */
- im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
+ if (ro_image) /* image is NULL for dummy pass */
+ im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
EMIT(PPC_RAW_NOP());
}
@@ -912,7 +914,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
+ if (ro_image) /* image is NULL for dummy pass */
+ im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
PPC_LI_ADDR(_R3, im);
ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
(unsigned long)__bpf_tramp_exit);
@@ -973,25 +976,9 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *func_addr)
{
struct bpf_tramp_image im;
- void *image;
int ret;
- /*
- * Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
- * This will NOT cause fragmentation in direct map, as we do not
- * call set_memory_*() on this buffer.
- *
- * We cannot use kvmalloc here, because we need image to be in
- * module memory range.
- */
- image = bpf_jit_alloc_exec(PAGE_SIZE);
- if (!image)
- return -ENOMEM;
-
- ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
- m, flags, tlinks, func_addr);
- bpf_jit_free_exec(image);
-
+ ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr);
return ret;
}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index c4db278dae36..0aace304dfe1 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -313,7 +313,6 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
u64 func_addr;
u32 true_cond;
u32 tmp_idx;
- int j;
if (i && (BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_ALU) &&
(BPF_CLASS(prevcode) == BPF_ALU64 || BPF_CLASS(prevcode) == BPF_ALU) &&
@@ -1099,13 +1098,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
* 16 byte instruction that uses two 'struct bpf_insn'
*/
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
- tmp_idx = ctx->idx;
PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
PPC_LI32(dst_reg, (u32)insn[i].imm);
- /* padding to allow full 4 instructions for later patching */
- if (!image)
- for (j = ctx->idx - tmp_idx; j < 4; j++)
- EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
break;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 233703b06d7c..5daa77aee7f7 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -227,7 +227,14 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *
#ifdef CONFIG_PPC_KERNEL_PCREL
reladdr = func_addr - local_paca->kernelbase;
- if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {
+ /*
+ * If fimage is NULL (the initial pass to find image size),
+ * account for the maximum no. of instructions possible.
+ */
+ if (!fimage) {
+ ctx->idx += 7;
+ return 0;
+ } else if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {
EMIT(PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)));
/* Align for subsequent prefix instruction */
if (!IS_ALIGNED((unsigned long)fimage + CTX_NIA(ctx), 8))
@@ -412,7 +419,6 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
u64 imm64;
u32 true_cond;
u32 tmp_idx;
- int j;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -1046,12 +1052,7 @@ emit_clear:
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
imm64 = ((u64)(u32) insn[i].imm) |
(((u64)(u32) insn[i+1].imm) << 32);
- tmp_idx = ctx->idx;
PPC_LI64(dst_reg, imm64);
- /* padding to allow full 5 instructions for later patching */
- if (!image)
- for (j = ctx->idx - tmp_idx; j < 5; j++)
- EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
break;
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index ac2cf58d62db..7f53fcb7495a 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -18,6 +18,8 @@ obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
obj-$(CONFIG_VPA_PMU) += vpa-pmu.o
+obj-$(CONFIG_KVM_BOOK3S_HV_PMU) += kvm-hv-pmu.o
+
obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o
obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b906d28f74fd..8b0081441f85 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2239,6 +2239,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct pt_regs *regs)
{
u64 period = event->hw.sample_period;
+ const u64 last_period = event->hw.last_period;
s64 prev, delta, left;
int record = 0;
@@ -2320,7 +2321,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
+ perf_sample_data_init(&data, ~0ULL, last_period);
if (event->attr.sample_type & PERF_SAMPLE_ADDR_TYPE)
perf_get_data_addr(event, regs, &data.addr);
@@ -2343,12 +2344,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
}
- if (perf_event_overflow(event, &data, regs))
- power_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
} else if (period) {
/* Account for interrupt in case of invalid SIAR */
- if (perf_event_account_interrupt(event))
- power_pmu_stop(event, 0);
+ perf_event_account_interrupt(event);
}
}
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index 1a53ab08447c..7120ab20cbfe 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -590,6 +590,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct pt_regs *regs)
{
u64 period = event->hw.sample_period;
+ const u64 last_period = event->hw.last_period;
s64 prev, delta, left;
int record = 0;
@@ -632,10 +633,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
- if (perf_event_overflow(event, &data, regs))
- fsl_emb_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
}
diff --git a/arch/powerpc/perf/kvm-hv-pmu.c b/arch/powerpc/perf/kvm-hv-pmu.c
new file mode 100644
index 000000000000..ae264c9080ef
--- /dev/null
+++ b/arch/powerpc/perf/kvm-hv-pmu.c
@@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Description: PMUs specific to running nested KVM-HV guests
+ * on Book3S processors (specifically POWER9 and later).
+ */
+
+#define pr_fmt(fmt) "kvmppc-pmu: " fmt
+
+#include "asm-generic/local64.h"
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/ratelimit.h>
+#include <linux/kvm_host.h>
+#include <linux/gfp_types.h>
+#include <linux/pgtable.h>
+#include <linux/perf_event.h>
+#include <linux/spinlock_types.h>
+#include <linux/spinlock.h>
+
+#include <asm/types.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include <asm/pte-walk.h>
+#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/firmware.h>
+
+#include "asm/guest-state-buffer.h"
+
+enum kvmppc_pmu_eventid {
+ KVMPPC_EVENT_HOST_HEAP,
+ KVMPPC_EVENT_HOST_HEAP_MAX,
+ KVMPPC_EVENT_HOST_PGTABLE,
+ KVMPPC_EVENT_HOST_PGTABLE_MAX,
+ KVMPPC_EVENT_HOST_PGTABLE_RECLAIM,
+ KVMPPC_EVENT_MAX,
+};
+
+#define KVMPPC_PMU_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, kvmppc_events_sysfs_show, _id)
+
+static ssize_t kvmppc_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+/* Holds the hostwide stats */
+static struct kvmppc_hostwide_stats {
+ u64 guest_heap;
+ u64 guest_heap_max;
+ u64 guest_pgtable_size;
+ u64 guest_pgtable_size_max;
+ u64 guest_pgtable_reclaim;
+} l0_stats;
+
+/* Protect access to l0_stats */
+static DEFINE_SPINLOCK(lock_l0_stats);
+
+/* GSB related structs needed to talk to L0 */
+static struct kvmppc_gs_msg *gsm_l0_stats;
+static struct kvmppc_gs_buff *gsb_l0_stats;
+static struct kvmppc_gs_parser gsp_l0_stats;
+
+static struct attribute *kvmppc_pmu_events_attr[] = {
+ KVMPPC_PMU_EVENT_ATTR(host_heap, KVMPPC_EVENT_HOST_HEAP),
+ KVMPPC_PMU_EVENT_ATTR(host_heap_max, KVMPPC_EVENT_HOST_HEAP_MAX),
+ KVMPPC_PMU_EVENT_ATTR(host_pagetable, KVMPPC_EVENT_HOST_PGTABLE),
+ KVMPPC_PMU_EVENT_ATTR(host_pagetable_max, KVMPPC_EVENT_HOST_PGTABLE_MAX),
+ KVMPPC_PMU_EVENT_ATTR(host_pagetable_reclaim, KVMPPC_EVENT_HOST_PGTABLE_RECLAIM),
+ NULL,
+};
+
+static const struct attribute_group kvmppc_pmu_events_group = {
+ .name = "events",
+ .attrs = kvmppc_pmu_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-5");
+static struct attribute *kvmppc_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group kvmppc_pmu_format_group = {
+ .name = "format",
+ .attrs = kvmppc_pmu_format_attr,
+};
+
+static const struct attribute_group *kvmppc_pmu_attr_groups[] = {
+ &kvmppc_pmu_events_group,
+ &kvmppc_pmu_format_group,
+ NULL,
+};
+
+/*
+ * Issue the hcall to get the L0-host stats.
+ * Should be called with l0-stat lock held
+ */
+static int kvmppc_update_l0_stats(void)
+{
+ int rc;
+
+ /* With HOST_WIDE flags guestid and vcpuid will be ignored */
+ rc = kvmppc_gsb_recv(gsb_l0_stats, KVMPPC_GS_FLAGS_HOST_WIDE);
+ if (rc)
+ goto out;
+
+ /* Parse the guest state buffer is successful */
+ rc = kvmppc_gse_parse(&gsp_l0_stats, gsb_l0_stats);
+ if (rc)
+ goto out;
+
+ /* Update the l0 returned stats*/
+ memset(&l0_stats, 0, sizeof(l0_stats));
+ rc = kvmppc_gsm_refresh_info(gsm_l0_stats, gsb_l0_stats);
+
+out:
+ return rc;
+}
+
+/* Update the value of the given perf_event */
+static int kvmppc_pmu_event_update(struct perf_event *event)
+{
+ int rc;
+ u64 curr_val, prev_val;
+ unsigned long flags;
+ unsigned int config = event->attr.config;
+
+ /* Ensure no one else is modifying the l0_stats */
+ spin_lock_irqsave(&lock_l0_stats, flags);
+
+ rc = kvmppc_update_l0_stats();
+ if (!rc) {
+ switch (config) {
+ case KVMPPC_EVENT_HOST_HEAP:
+ curr_val = l0_stats.guest_heap;
+ break;
+ case KVMPPC_EVENT_HOST_HEAP_MAX:
+ curr_val = l0_stats.guest_heap_max;
+ break;
+ case KVMPPC_EVENT_HOST_PGTABLE:
+ curr_val = l0_stats.guest_pgtable_size;
+ break;
+ case KVMPPC_EVENT_HOST_PGTABLE_MAX:
+ curr_val = l0_stats.guest_pgtable_size_max;
+ break;
+ case KVMPPC_EVENT_HOST_PGTABLE_RECLAIM:
+ curr_val = l0_stats.guest_pgtable_reclaim;
+ break;
+ default:
+ rc = -ENOENT;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&lock_l0_stats, flags);
+
+ /* If no error than update the perf event */
+ if (!rc) {
+ prev_val = local64_xchg(&event->hw.prev_count, curr_val);
+ if (curr_val > prev_val)
+ local64_add(curr_val - prev_val, &event->count);
+ }
+
+ return rc;
+}
+
+static int kvmppc_pmu_event_init(struct perf_event *event)
+{
+ unsigned int config = event->attr.config;
+
+ pr_debug("%s: Event(%p) id=%llu cpu=%x on_cpu=%x config=%u",
+ __func__, event, event->id, event->cpu,
+ event->oncpu, config);
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (config >= KVMPPC_EVENT_MAX)
+ return -EINVAL;
+
+ local64_set(&event->hw.prev_count, 0);
+ local64_set(&event->count, 0);
+
+ return 0;
+}
+
+static void kvmppc_pmu_del(struct perf_event *event, int flags)
+{
+ kvmppc_pmu_event_update(event);
+}
+
+static int kvmppc_pmu_add(struct perf_event *event, int flags)
+{
+ if (flags & PERF_EF_START)
+ return kvmppc_pmu_event_update(event);
+ return 0;
+}
+
+static void kvmppc_pmu_read(struct perf_event *event)
+{
+ kvmppc_pmu_event_update(event);
+}
+
+/* Return the size of the needed guest state buffer */
+static size_t hostwide_get_size(struct kvmppc_gs_msg *gsm)
+
+{
+ size_t size = 0;
+ const u16 ids[] = {
+ KVMPPC_GSID_L0_GUEST_HEAP,
+ KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(ids); i++)
+ size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i]));
+ return size;
+}
+
+/* Populate the request guest state buffer */
+static int hostwide_fill_info(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm)
+{
+ int rc = 0;
+ struct kvmppc_hostwide_stats *stats = gsm->data;
+
+ /*
+ * It doesn't matter what values are put into request buffer as
+ * they are going to be overwritten anyways. But for the sake of
+ * testcode and symmetry contents of existing stats are put
+ * populated into the request guest state buffer.
+ */
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_HEAP,
+ stats->guest_heap);
+
+ if (!rc && kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ stats->guest_heap_max);
+
+ if (!rc && kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ stats->guest_pgtable_size);
+ if (!rc &&
+ kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ stats->guest_pgtable_size_max);
+ if (!rc &&
+ kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM,
+ stats->guest_pgtable_reclaim);
+
+ return rc;
+}
+
+/* Parse and update the host wide stats from returned gsb */
+static int hostwide_refresh_info(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb)
+{
+ struct kvmppc_gs_parser gsp = { 0 };
+ struct kvmppc_hostwide_stats *stats = gsm->data;
+ struct kvmppc_gs_elem *gse;
+ int rc;
+
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ if (rc < 0)
+ return rc;
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP);
+ if (gse)
+ stats->guest_heap = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ if (gse)
+ stats->guest_heap_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ if (gse)
+ stats->guest_pgtable_size = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ if (gse)
+ stats->guest_pgtable_size_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+ if (gse)
+ stats->guest_pgtable_reclaim = kvmppc_gse_get_u64(gse);
+
+ return 0;
+}
+
+/* gsb-message ops for setting up/parsing */
+static struct kvmppc_gs_msg_ops gsb_ops_l0_stats = {
+ .get_size = hostwide_get_size,
+ .fill_info = hostwide_fill_info,
+ .refresh_info = hostwide_refresh_info,
+};
+
+static int kvmppc_init_hostwide(void)
+{
+ int rc = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&lock_l0_stats, flags);
+
+ /* already registered ? */
+ if (gsm_l0_stats) {
+ rc = 0;
+ goto out;
+ }
+
+ /* setup the Guest state message/buffer to talk to L0 */
+ gsm_l0_stats = kvmppc_gsm_new(&gsb_ops_l0_stats, &l0_stats,
+ GSM_SEND, GFP_KERNEL);
+ if (!gsm_l0_stats) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Populate the Idents */
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_HEAP);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+
+ /* allocate GSB. Guest/Vcpu Id is ignored */
+ gsb_l0_stats = kvmppc_gsb_new(kvmppc_gsm_size(gsm_l0_stats), 0, 0,
+ GFP_KERNEL);
+ if (!gsb_l0_stats) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* ask the ops to fill in the info */
+ rc = kvmppc_gsm_fill_info(gsm_l0_stats, gsb_l0_stats);
+
+out:
+ if (rc) {
+ if (gsm_l0_stats)
+ kvmppc_gsm_free(gsm_l0_stats);
+ if (gsb_l0_stats)
+ kvmppc_gsb_free(gsb_l0_stats);
+ gsm_l0_stats = NULL;
+ gsb_l0_stats = NULL;
+ }
+ spin_unlock_irqrestore(&lock_l0_stats, flags);
+ return rc;
+}
+
+static void kvmppc_cleanup_hostwide(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&lock_l0_stats, flags);
+
+ if (gsm_l0_stats)
+ kvmppc_gsm_free(gsm_l0_stats);
+ if (gsb_l0_stats)
+ kvmppc_gsb_free(gsb_l0_stats);
+ gsm_l0_stats = NULL;
+ gsb_l0_stats = NULL;
+
+ spin_unlock_irqrestore(&lock_l0_stats, flags);
+}
+
+/* L1 wide counters PMU */
+static struct pmu kvmppc_pmu = {
+ .module = THIS_MODULE,
+ .task_ctx_nr = perf_sw_context,
+ .name = "kvm-hv",
+ .event_init = kvmppc_pmu_event_init,
+ .add = kvmppc_pmu_add,
+ .del = kvmppc_pmu_del,
+ .read = kvmppc_pmu_read,
+ .attr_groups = kvmppc_pmu_attr_groups,
+ .type = -1,
+ .scope = PERF_PMU_SCOPE_SYS_WIDE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static int __init kvmppc_register_pmu(void)
+{
+ int rc = -EOPNOTSUPP;
+
+ /* only support events for nestedv2 right now */
+ if (kvmhv_is_nestedv2()) {
+ rc = kvmppc_init_hostwide();
+ if (rc)
+ goto out;
+
+ /* Register the pmu */
+ rc = perf_pmu_register(&kvmppc_pmu, kvmppc_pmu.name, -1);
+ if (rc)
+ goto out;
+
+ pr_info("Registered kvm-hv pmu");
+ }
+
+out:
+ return rc;
+}
+
+static void __exit kvmppc_unregister_pmu(void)
+{
+ if (kvmhv_is_nestedv2()) {
+ kvmppc_cleanup_hostwide();
+
+ if (kvmppc_pmu.type != -1)
+ perf_pmu_unregister(&kvmppc_pmu);
+
+ pr_info("kvmhv_pmu unregistered.\n");
+ }
+}
+
+module_init(kvmppc_register_pmu);
+module_exit(kvmppc_unregister_pmu);
+MODULE_DESCRIPTION("KVM PPC Book3s-hv PMU");
+MODULE_AUTHOR("Vaibhav Jain <vaibhav@linux.ibm.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/44x/gpio.c b/arch/powerpc/platforms/44x/gpio.c
index e5f2319e5cbe..d540e261d85a 100644
--- a/arch/powerpc/platforms/44x/gpio.c
+++ b/arch/powerpc/platforms/44x/gpio.c
@@ -75,8 +75,7 @@ __ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
clrbits32(&regs->or, GPIO_MASK(gpio));
}
-static void
-ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+static int ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
{
struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
unsigned long flags;
@@ -88,6 +87,8 @@ ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
spin_unlock_irqrestore(&chip->lock, flags);
pr_debug("%s: gpio: %d val: %d\n", __func__, gpio, val);
+
+ return 0;
}
static int ppc4xx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
@@ -179,7 +180,7 @@ static int __init ppc4xx_add_gpiochips(void)
gc->direction_input = ppc4xx_gpio_dir_in;
gc->direction_output = ppc4xx_gpio_dir_out;
gc->get = ppc4xx_gpio_get;
- gc->set = ppc4xx_gpio_set;
+ gc->set_rv = ppc4xx_gpio_set;
ret = of_mm_gpiochip_add_data(np, mm_gc, ppc4xx_gc);
if (ret)
diff --git a/arch/powerpc/platforms/44x/uic.c b/arch/powerpc/platforms/44x/uic.c
index 31f760c2ec5d..85daf841fd3f 100644
--- a/arch/powerpc/platforms/44x/uic.c
+++ b/arch/powerpc/platforms/44x/uic.c
@@ -254,8 +254,9 @@ static struct uic * __init uic_init_one(struct device_node *node)
}
uic->dcrbase = *dcrreg;
- uic->irqhost = irq_domain_add_linear(node, NR_UIC_INTS, &uic_host_ops,
- uic);
+ uic->irqhost = irq_domain_create_linear(of_fwnode_handle(node),
+ NR_UIC_INTS, &uic_host_ops,
+ uic);
if (! uic->irqhost)
return NULL; /* FIXME: panic? */
@@ -327,5 +328,5 @@ unsigned int uic_get_irq(void)
msr = mfdcr(primary_uic->dcrbase + UIC_MSR);
src = 32 - ffs(msr);
- return irq_linear_revmap(primary_uic->irqhost, src);
+ return irq_find_mapping(primary_uic->irqhost, src);
}
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index e995eb30bf09..2cf3c6237337 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -188,7 +188,8 @@ mpc5121_ads_cpld_pic_init(void)
cpld_pic_node = of_node_get(np);
- cpld_pic_host = irq_domain_add_linear(np, 16, &cpld_pic_host_ops, NULL);
+ cpld_pic_host = irq_domain_create_linear(of_fwnode_handle(np), 16,
+ &cpld_pic_host_ops, NULL);
if (!cpld_pic_host) {
printk(KERN_ERR "CPLD PIC: failed to allocate irq host!\n");
goto end;
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index 19626cd42406..bc7f83cfec1d 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -168,7 +168,7 @@ static void __init media5200_init_irq(void)
spin_lock_init(&media5200_irq.lock);
- media5200_irq.irqhost = irq_domain_add_linear(fpga_np,
+ media5200_irq.irqhost = irq_domain_create_linear(of_fwnode_handle(fpga_np),
MEDIA5200_NUM_IRQS, &media5200_irq_ops, &media5200_irq);
if (!media5200_irq.irqhost)
goto out;
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 1ea591ec6083..bda707d848a6 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -247,9 +247,9 @@ mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
if (!cascade_virq)
return;
- gpt->irqhost = irq_domain_add_linear(node, 1, &mpc52xx_gpt_irq_ops, gpt);
+ gpt->irqhost = irq_domain_create_linear(of_fwnode_handle(node), 1, &mpc52xx_gpt_irq_ops, gpt);
if (!gpt->irqhost) {
- dev_err(gpt->dev, "irq_domain_add_linear() failed\n");
+ dev_err(gpt->dev, "irq_domain_create_linear() failed\n");
return;
}
@@ -280,7 +280,7 @@ static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio)
return (in_be32(&gpt->regs->status) >> 8) & 1;
}
-static void
+static int
mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
{
struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
@@ -293,6 +293,8 @@ mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
raw_spin_lock_irqsave(&gpt->lock, flags);
clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r);
raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+ return 0;
}
static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
@@ -334,7 +336,7 @@ static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt)
gpt->gc.direction_input = mpc52xx_gpt_gpio_dir_in;
gpt->gc.direction_output = mpc52xx_gpt_gpio_dir_out;
gpt->gc.get = mpc52xx_gpt_gpio_get;
- gpt->gc.set = mpc52xx_gpt_gpio_set;
+ gpt->gc.set_rv = mpc52xx_gpt_gpio_set;
gpt->gc.base = -1;
gpt->gc.parent = gpt->dev;
@@ -369,7 +371,7 @@ struct mpc52xx_gpt_priv *mpc52xx_gpt_from_irq(int irq)
mutex_lock(&mpc52xx_gpt_list_mutex);
list_for_each(pos, &mpc52xx_gpt_list) {
gpt = container_of(pos, struct mpc52xx_gpt_priv, list);
- if (gpt->irqhost && irq == irq_linear_revmap(gpt->irqhost, 0)) {
+ if (gpt->irqhost && irq == irq_find_mapping(gpt->irqhost, 0)) {
mutex_unlock(&mpc52xx_gpt_list_mutex);
return gpt;
}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
index 43c881d31ca6..eb6a4e745c08 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -446,7 +446,7 @@ void __init mpc52xx_init_irq(void)
* As last step, add an irq host to translate the real
* hw irq information provided by the ofw to linux virq
*/
- mpc52xx_irqhost = irq_domain_add_linear(picnode,
+ mpc52xx_irqhost = irq_domain_create_linear(of_fwnode_handle(picnode),
MPC52xx_IRQ_HIGHTESTHWIRQ,
&mpc52xx_irqhost_ops, NULL);
@@ -515,5 +515,5 @@ unsigned int mpc52xx_get_irq(void)
return 0;
}
- return irq_linear_revmap(mpc52xx_irqhost, irq);
+ return irq_find_mapping(mpc52xx_irqhost, irq);
}
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 4d8fa9ed1a67..6e37dfc6c5c9 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -92,10 +92,11 @@ static void mcu_power_off(void)
mutex_unlock(&mcu->lock);
}
-static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+static int mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
{
struct mcu *mcu = gpiochip_get_data(gc);
u8 bit = 1 << (4 + gpio);
+ int ret;
mutex_lock(&mcu->lock);
if (val)
@@ -103,14 +104,16 @@ static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
else
mcu->reg_ctrl |= bit;
- i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, mcu->reg_ctrl);
+ ret = i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL,
+ mcu->reg_ctrl);
mutex_unlock(&mcu->lock);
+
+ return ret;
}
static int mcu_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
{
- mcu_gpio_set(gc, gpio, val);
- return 0;
+ return mcu_gpio_set(gc, gpio, val);
}
static int mcu_gpiochip_add(struct mcu *mcu)
@@ -123,7 +126,7 @@ static int mcu_gpiochip_add(struct mcu *mcu)
gc->can_sleep = 1;
gc->ngpio = MCU_NUM_GPIO;
gc->base = -1;
- gc->set = mcu_gpio_set;
+ gc->set_rv = mcu_gpio_set;
gc->direction_output = mcu_gpio_dir_out;
gc->parent = dev;
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index 60e0b8947ce6..4b69fb321a68 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -83,7 +83,7 @@ static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq)
if (cause >> (i + 16))
break;
}
- return irq_linear_revmap(socrates_fpga_pic_irq_host,
+ return irq_find_mapping(socrates_fpga_pic_irq_host,
(irq_hw_number_t)i);
}
@@ -278,7 +278,7 @@ void __init socrates_fpga_pic_init(struct device_node *pic)
int i;
/* Setup an irq_domain structure */
- socrates_fpga_pic_irq_host = irq_domain_add_linear(pic,
+ socrates_fpga_pic_irq_host = irq_domain_create_linear(of_fwnode_handle(pic),
SOCRATES_FPGA_NUM_IRQS, &socrates_fpga_pic_host_ops, NULL);
if (socrates_fpga_pic_irq_host == NULL) {
pr_err("FPGA PIC: Unable to allocate host\n");
diff --git a/arch/powerpc/platforms/8xx/cpm1-ic.c b/arch/powerpc/platforms/8xx/cpm1-ic.c
index a18fc7c99f83..a49d4a9ab3bc 100644
--- a/arch/powerpc/platforms/8xx/cpm1-ic.c
+++ b/arch/powerpc/platforms/8xx/cpm1-ic.c
@@ -59,7 +59,7 @@ static int cpm_get_irq(struct irq_desc *desc)
cpm_vec = in_be16(&data->reg->cpic_civr);
cpm_vec >>= 11;
- return irq_linear_revmap(data->host, cpm_vec);
+ return irq_find_mapping(data->host, cpm_vec);
}
static void cpm_cascade(struct irq_desc *desc)
@@ -110,7 +110,8 @@ static int cpm_pic_probe(struct platform_device *pdev)
out_be32(&data->reg->cpic_cimr, 0);
- data->host = irq_domain_add_linear(dev->of_node, 64, &cpm_pic_host_ops, data);
+ data->host = irq_domain_create_linear(of_fwnode_handle(dev->of_node),
+ 64, &cpm_pic_host_ops, data);
if (!data->host)
return -ENODEV;
diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
index 1dc095ad48fc..7462c221115c 100644
--- a/arch/powerpc/platforms/8xx/cpm1.c
+++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -417,7 +417,7 @@ static void __cpm1_gpio16_set(struct cpm1_gpio16_chip *cpm1_gc, u16 pin_mask, in
out_be16(&iop->dat, cpm1_gc->cpdata);
}
-static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
+static int cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
unsigned long flags;
@@ -428,6 +428,8 @@ static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
__cpm1_gpio16_set(cpm1_gc, pin_mask, value);
spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+ return 0;
}
static int cpm1_gpio16_to_irq(struct gpio_chip *gc, unsigned int gpio)
@@ -497,7 +499,7 @@ int cpm1_gpiochip_add16(struct device *dev)
gc->direction_input = cpm1_gpio16_dir_in;
gc->direction_output = cpm1_gpio16_dir_out;
gc->get = cpm1_gpio16_get;
- gc->set = cpm1_gpio16_set;
+ gc->set_rv = cpm1_gpio16_set;
gc->to_irq = cpm1_gpio16_to_irq;
gc->parent = dev;
gc->owner = THIS_MODULE;
@@ -554,7 +556,7 @@ static void __cpm1_gpio32_set(struct cpm1_gpio32_chip *cpm1_gc, u32 pin_mask, in
out_be32(&iop->dat, cpm1_gc->cpdata);
}
-static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
+static int cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc);
unsigned long flags;
@@ -565,6 +567,8 @@ static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
__cpm1_gpio32_set(cpm1_gc, pin_mask, value);
spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+ return 0;
}
static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
@@ -618,7 +622,7 @@ int cpm1_gpiochip_add32(struct device *dev)
gc->direction_input = cpm1_gpio32_dir_in;
gc->direction_output = cpm1_gpio32_dir_out;
gc->get = cpm1_gpio32_get;
- gc->set = cpm1_gpio32_set;
+ gc->set_rv = cpm1_gpio32_set;
gc->parent = dev;
gc->owner = THIS_MODULE;
diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index ea6b0e523c60..933d6ab7f512 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -80,7 +80,7 @@ unsigned int mpc8xx_get_irq(void)
if (irq == PIC_VEC_SPURRIOUS)
return 0;
- return irq_linear_revmap(mpc8xx_pic_host, irq);
+ return irq_find_mapping(mpc8xx_pic_host, irq);
}
@@ -146,7 +146,8 @@ void __init mpc8xx_pic_init(void)
if (!siu_reg)
goto out;
- mpc8xx_pic_host = irq_domain_add_linear(np, 64, &mpc8xx_pic_host_ops, NULL);
+ mpc8xx_pic_host = irq_domain_create_linear(of_fwnode_handle(np), 64,
+ &mpc8xx_pic_host_ops, NULL);
if (!mpc8xx_pic_host)
printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index 013d66304c31..91a8f0a7086e 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -149,8 +149,9 @@ static struct irq_domain * __init flipper_pic_init(struct device_node *np)
__flipper_quiesce(io_base);
- irq_domain = irq_domain_add_linear(np, FLIPPER_NR_IRQS,
- &flipper_irq_domain_ops, io_base);
+ irq_domain = irq_domain_create_linear(of_fwnode_handle(np),
+ FLIPPER_NR_IRQS,
+ &flipper_irq_domain_ops, io_base);
if (!irq_domain) {
pr_err("failed to allocate irq_domain\n");
return NULL;
@@ -172,7 +173,7 @@ unsigned int flipper_pic_get_irq(void)
return 0; /* no more IRQs pending */
irq = __ffs(irq_status);
- return irq_linear_revmap(flipper_irq_host, irq);
+ return irq_find_mapping(flipper_irq_host, irq);
}
/*
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 4d2d92de30af..b57e87b0b3ce 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -175,8 +175,9 @@ static struct irq_domain *__init hlwd_pic_init(struct device_node *np)
__hlwd_quiesce(io_base);
- irq_domain = irq_domain_add_linear(np, HLWD_NR_IRQS,
- &hlwd_irq_domain_ops, io_base);
+ irq_domain = irq_domain_create_linear(of_fwnode_handle(np),
+ HLWD_NR_IRQS,
+ &hlwd_irq_domain_ops, io_base);
if (!irq_domain) {
pr_err("failed to allocate irq_domain\n");
iounmap(io_base);
@@ -189,7 +190,7 @@ static struct irq_domain *__init hlwd_pic_init(struct device_node *np)
unsigned int hlwd_pic_get_irq(void)
{
unsigned int hwirq = __hlwd_pic_get_irq(hlwd_irq_host);
- return hwirq ? irq_linear_revmap(hlwd_irq_host, hwirq) : 0;
+ return hwirq ? irq_find_mapping(hlwd_irq_host, hwirq) : 0;
}
/*
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 03a7c51f2645..c37783a03d25 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -250,7 +250,7 @@ static unsigned int pmac_pic_get_irq(void)
raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
if (unlikely(irq < 0))
return 0;
- return irq_linear_revmap(pmac_pic_host, irq);
+ return irq_find_mapping(pmac_pic_host, irq);
}
static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node,
@@ -327,8 +327,9 @@ static void __init pmac_pic_probe_oldstyle(void)
/*
* Allocate an irq host
*/
- pmac_pic_host = irq_domain_add_linear(master, max_irqs,
- &pmac_pic_host_ops, NULL);
+ pmac_pic_host = irq_domain_create_linear(of_fwnode_handle(master),
+ max_irqs,
+ &pmac_pic_host_ops, NULL);
BUG_ON(pmac_pic_host == NULL);
irq_set_default_domain(pmac_pic_host);
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 6de1cd5d8a58..e119ced05d10 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -45,6 +45,7 @@
#include <linux/root_dev.h>
#include <linux/bitops.h>
#include <linux/suspend.h>
+#include <linux/string_choices.h>
#include <linux/of.h>
#include <linux/of_platform.h>
@@ -238,8 +239,7 @@ static void __init l2cr_init(void)
_set_L2CR(0);
_set_L2CR(*l2cr);
pr_info("L2CR overridden (0x%x), backside cache is %s\n",
- *l2cr, ((*l2cr) & 0x80000000) ?
- "enabled" : "disabled");
+ *l2cr, str_enabled_disabled((*l2cr) & 0x80000000));
}
of_node_put(np);
break;
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 09e7fe24fac1..88e92af8acf9 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -190,7 +190,7 @@ static int __init psurge_secondary_ipi_init(void)
{
int rc = -ENOMEM;
- psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL);
+ psurge_host = irq_domain_create_nomap(NULL, ~0, &psurge_host_ops, NULL);
if (psurge_host)
psurge_secondary_virq = irq_create_direct_mapping(psurge_host);
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 8633891b7aa5..b4426a35aca3 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -15,6 +15,7 @@
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/time.h>
@@ -77,7 +78,7 @@ long __init pmac_time_init(void)
delta |= 0xFF000000UL;
dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0);
printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60,
- dst ? "on" : "off");
+ str_on_off(dst));
#endif
return delta;
}
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index d92759c21fae..e180bd8e1400 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -191,7 +191,8 @@ int __init opal_event_init(void)
* fall back to the legacy method (opal_event_request(...))
* anyway. */
dn = of_find_compatible_node(NULL, NULL, "ibm,opal-event");
- opal_event_irqchip.domain = irq_domain_add_linear(dn, MAX_NUM_EVENTS,
+ opal_event_irqchip.domain = irq_domain_create_linear(of_fwnode_handle(dn),
+ MAX_NUM_EVENTS,
&opal_event_domain_ops, &opal_event_irqchip);
of_node_put(dn);
if (!opal_event_irqchip.domain) {
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index ae4b549b5ca0..d8ccf2c9b98a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1897,7 +1897,7 @@ static int __init pnv_msi_allocate_domains(struct pci_controller *hose, unsigned
return -ENOMEM;
}
- hose->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(hose->dn),
+ hose->msi_domain = pci_msi_create_irq_domain(of_fwnode_handle(hose->dn),
&pnv_msi_domain_info,
hose->dev_domain);
if (!hose->msi_domain) {
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index 61722133eb2d..22d91ac424dd 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/reboot.h>
#include <linux/rcuwait.h>
+#include <linux/string_choices.h>
#include <asm/firmware.h>
#include <asm/lv1call.h>
@@ -724,7 +725,7 @@ static irqreturn_t ps3_notification_interrupt(int irq, void *data)
static int ps3_notification_read_write(struct ps3_notification_device *dev,
u64 lpar, int write)
{
- const char *op = write ? "write" : "read";
+ const char *op = str_write_read(write);
unsigned long flags;
int res;
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index 95e96bd61a20..a4ad4b49eef7 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -743,7 +743,7 @@ void __init ps3_init_IRQ(void)
unsigned cpu;
struct irq_domain *host;
- host = irq_domain_add_nomap(NULL, PS3_PLUG_MAX + 1, &ps3_host_ops, NULL);
+ host = irq_domain_create_nomap(NULL, PS3_PLUG_MAX + 1, &ps3_host_ops, NULL);
irq_set_default_domain(host);
for_each_possible_cpu(cpu) {
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 3f3e3492e436..57222678bb3f 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -3,7 +3,8 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
obj-y := lpar.o hvCall.o nvram.o reconfig.o \
of_helpers.o rtas-work-area.o papr-sysparm.o \
- papr-vpd.o \
+ papr-rtas-common.o papr-vpd.o papr-indices.o \
+ papr-platform-dump.o papr-phy-attest.o \
setup.o iommu.o event_sources.o ras.o \
firmware.o power.o dlpar.o mobility.o rng.o \
pci.o pci_dlpar.o eeh_pseries.o msi.o \
diff --git a/arch/powerpc/platforms/pseries/htmdump.c b/arch/powerpc/platforms/pseries/htmdump.c
index 57fc1700f604..742ec52c9d4d 100644
--- a/arch/powerpc/platforms/pseries/htmdump.c
+++ b/arch/powerpc/platforms/pseries/htmdump.c
@@ -10,28 +10,40 @@
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/plpar_wrappers.h>
+#include <asm/kvm_guest.h>
static void *htm_buf;
+static void *htm_status_buf;
+static void *htm_info_buf;
+static void *htm_caps_buf;
static u32 nodeindex;
static u32 nodalchipindex;
static u32 coreindexonchip;
static u32 htmtype;
+static u32 htmconfigure;
+static u32 htmstart;
+static u32 htmsetup;
+static u64 htmflags;
+
static struct dentry *htmdump_debugfs_dir;
+#define HTM_ENABLE 1
+#define HTM_DISABLE 0
+#define HTM_NOWRAP 1
+#define HTM_WRAP 0
-static ssize_t htmdump_read(struct file *filp, char __user *ubuf,
- size_t count, loff_t *ppos)
+/*
+ * Check the return code for H_HTM hcall.
+ * Return non-zero value (1) if either H_PARTIAL or H_SUCCESS
+ * is returned. For other return codes:
+ * Return zero if H_NOT_AVAILABLE.
+ * Return -EBUSY if hcall return busy.
+ * Return -EINVAL if any parameter or operation is not valid.
+ * Return -EPERM if HTM Virtualization Engine Technology code
+ * is not applied.
+ * Return -EIO if the HTM state is not valid.
+ */
+static ssize_t htm_return_check(long rc)
{
- void *htm_buf = filp->private_data;
- unsigned long page, read_size, available;
- loff_t offset;
- long rc;
-
- page = ALIGN_DOWN(*ppos, PAGE_SIZE);
- offset = (*ppos) % PAGE_SIZE;
-
- rc = htm_get_dump_hardware(nodeindex, nodalchipindex, coreindexonchip,
- htmtype, virt_to_phys(htm_buf), PAGE_SIZE, page);
-
switch (rc) {
case H_SUCCESS:
/* H_PARTIAL for the case where all available data can't be
@@ -65,6 +77,38 @@ static ssize_t htmdump_read(struct file *filp, char __user *ubuf,
return -EPERM;
}
+ /*
+ * Return 1 for H_SUCCESS/H_PARTIAL
+ */
+ return 1;
+}
+
+static ssize_t htmdump_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ void *htm_buf = filp->private_data;
+ unsigned long page, read_size, available;
+ loff_t offset;
+ long rc, ret;
+
+ page = ALIGN_DOWN(*ppos, PAGE_SIZE);
+ offset = (*ppos) % PAGE_SIZE;
+
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm dump (H_HTM_OP_DUMP_DATA)
+ * - last three values are address, size and offset
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_DUMP_DATA, virt_to_phys(htm_buf),
+ PAGE_SIZE, page);
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed for op: H_HTM_OP_DUMP_DATA, returning %ld\n", ret);
+ return ret;
+ }
+
available = PAGE_SIZE;
read_size = min(count, available);
*ppos += read_size;
@@ -77,6 +121,292 @@ static const struct file_operations htmdump_fops = {
.open = simple_open,
};
+static int htmconfigure_set(void *data, u64 val)
+{
+ long rc, ret;
+ unsigned long param1 = -1, param2 = -1;
+
+ /*
+ * value as 1 : configure HTM.
+ * value as 0 : deconfigure HTM. Return -EINVAL for
+ * other values.
+ */
+ if (val == HTM_ENABLE) {
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm configure (H_HTM_OP_CONFIGURE)
+ * - If htmflags is set, param1 and param2 will be -1
+ * which is an indicator to use default htm mode reg mask
+ * and htm mode reg value.
+ * - last three values are unused, hence set to zero
+ */
+ if (!htmflags) {
+ param1 = 0;
+ param2 = 0;
+ }
+
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_CONFIGURE, param1, param2, 0);
+ } else if (val == HTM_DISABLE) {
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm deconfigure (H_HTM_OP_DECONFIGURE)
+ * - last three values are unused, hence set to zero
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_DECONFIGURE, 0, 0, 0);
+ } else
+ return -EINVAL;
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed, returning %ld\n", ret);
+ return ret;
+ }
+
+ /* Set htmconfigure if operation succeeds */
+ htmconfigure = val;
+
+ return 0;
+}
+
+static int htmconfigure_get(void *data, u64 *val)
+{
+ *val = htmconfigure;
+ return 0;
+}
+
+static int htmstart_set(void *data, u64 val)
+{
+ long rc, ret;
+
+ /*
+ * value as 1: start HTM
+ * value as 0: stop HTM
+ * Return -EINVAL for other values.
+ */
+ if (val == HTM_ENABLE) {
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm start (H_HTM_OP_START)
+ * - last three values are unused, hence set to zero
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_START, 0, 0, 0);
+
+ } else if (val == HTM_DISABLE) {
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm stop (H_HTM_OP_STOP)
+ * - last three values are unused, hence set to zero
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_STOP, 0, 0, 0);
+ } else
+ return -EINVAL;
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed, returning %ld\n", ret);
+ return ret;
+ }
+
+ /* Set htmstart if H_HTM_OP_START/H_HTM_OP_STOP operation succeeds */
+ htmstart = val;
+
+ return 0;
+}
+
+static int htmstart_get(void *data, u64 *val)
+{
+ *val = htmstart;
+ return 0;
+}
+
+static ssize_t htmstatus_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ void *htm_status_buf = filp->private_data;
+ long rc, ret;
+ u64 *num_entries;
+ u64 to_copy;
+ int htmstatus_flag;
+
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm status (H_HTM_OP_STATUS)
+ * - last three values as addr, size and offset
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_STATUS, virt_to_phys(htm_status_buf),
+ PAGE_SIZE, 0);
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed for op: H_HTM_OP_STATUS, returning %ld\n", ret);
+ return ret;
+ }
+
+ /*
+ * HTM status buffer, start of buffer + 0x10 gives the
+ * number of HTM entries in the buffer. Each nest htm status
+ * entry is 0x6 bytes where each core htm status entry is
+ * 0x8 bytes.
+ * So total count to copy is:
+ * 32 bytes (for first 7 fields) + (number of HTM entries * entry size)
+ */
+ num_entries = htm_status_buf + 0x10;
+ if (htmtype == 0x2)
+ htmstatus_flag = 0x8;
+ else
+ htmstatus_flag = 0x6;
+ to_copy = 32 + (be64_to_cpu(*num_entries) * htmstatus_flag);
+ return simple_read_from_buffer(ubuf, count, ppos, htm_status_buf, to_copy);
+}
+
+static const struct file_operations htmstatus_fops = {
+ .llseek = NULL,
+ .read = htmstatus_read,
+ .open = simple_open,
+};
+
+static ssize_t htminfo_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ void *htm_info_buf = filp->private_data;
+ long rc, ret;
+ u64 *num_entries;
+ u64 to_copy;
+
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm status (H_HTM_OP_STATUS)
+ * - last three values as addr, size and offset
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_DUMP_SYSPROC_CONF, virt_to_phys(htm_info_buf),
+ PAGE_SIZE, 0);
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed for op: H_HTM_OP_DUMP_SYSPROC_CONF, returning %ld\n", ret);
+ return ret;
+ }
+
+ /*
+ * HTM status buffer, start of buffer + 0x10 gives the
+ * number of HTM entries in the buffer. Each entry of processor
+ * is 16 bytes.
+ *
+ * So total count to copy is:
+ * 32 bytes (for first 5 fields) + (number of HTM entries * entry size)
+ */
+ num_entries = htm_info_buf + 0x10;
+ to_copy = 32 + (be64_to_cpu(*num_entries) * 16);
+ return simple_read_from_buffer(ubuf, count, ppos, htm_info_buf, to_copy);
+}
+
+static ssize_t htmcaps_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ void *htm_caps_buf = filp->private_data;
+ long rc, ret;
+
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm capabilities (H_HTM_OP_CAPABILITIES)
+ * - last three values as addr, size (0x80 for Capabilities Output Buffer
+ * and zero
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_CAPABILITIES, virt_to_phys(htm_caps_buf),
+ 0x80, 0);
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed for op: H_HTM_OP_CAPABILITIES, returning %ld\n", ret);
+ return ret;
+ }
+
+ return simple_read_from_buffer(ubuf, count, ppos, htm_caps_buf, 0x80);
+}
+
+static const struct file_operations htminfo_fops = {
+ .llseek = NULL,
+ .read = htminfo_read,
+ .open = simple_open,
+};
+
+static const struct file_operations htmcaps_fops = {
+ .llseek = NULL,
+ .read = htmcaps_read,
+ .open = simple_open,
+};
+
+static int htmsetup_set(void *data, u64 val)
+{
+ long rc, ret;
+
+ /*
+ * Input value: HTM buffer size in the power of 2
+ * example: hex value 0x21 ( decimal: 33 ) is for
+ * 8GB
+ * Invoke H_HTM call with:
+ * - operation as htm start (H_HTM_OP_SETUP)
+ * - parameter 1 set to input value.
+ * - last two values are unused, hence set to zero
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_SETUP, val, 0, 0);
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed for op: H_HTM_OP_SETUP, returning %ld\n", ret);
+ return ret;
+ }
+
+ /* Set htmsetup if H_HTM_OP_SETUP operation succeeds */
+ htmsetup = val;
+
+ return 0;
+}
+
+static int htmsetup_get(void *data, u64 *val)
+{
+ *val = htmsetup;
+ return 0;
+}
+
+static int htmflags_set(void *data, u64 val)
+{
+ /*
+ * Input value:
+ * Currently supported flag value is to enable/disable
+ * HTM buffer wrap. wrap is used along with "configure"
+ * to prevent HTM buffer from wrapping.
+ * Writing 1 will set noWrap while configuring HTM
+ */
+ if (val == HTM_NOWRAP)
+ htmflags = H_HTM_FLAGS_NOWRAP;
+ else if (val == HTM_WRAP)
+ htmflags = 0;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int htmflags_get(void *data, u64 *val)
+{
+ *val = htmflags;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(htmconfigure_fops, htmconfigure_get, htmconfigure_set, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(htmstart_fops, htmstart_get, htmstart_set, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(htmsetup_fops, htmsetup_get, htmsetup_set, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(htmflags_fops, htmflags_get, htmflags_set, "%llu\n");
+
static int htmdump_init_debugfs(void)
{
htm_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
@@ -98,11 +428,50 @@ static int htmdump_init_debugfs(void)
htmdump_debugfs_dir, &htmtype);
debugfs_create_file("trace", 0400, htmdump_debugfs_dir, htm_buf, &htmdump_fops);
+ /*
+ * Debugfs interface files to control HTM operations:
+ */
+ debugfs_create_file("htmconfigure", 0600, htmdump_debugfs_dir, NULL, &htmconfigure_fops);
+ debugfs_create_file("htmstart", 0600, htmdump_debugfs_dir, NULL, &htmstart_fops);
+ debugfs_create_file("htmsetup", 0600, htmdump_debugfs_dir, NULL, &htmsetup_fops);
+ debugfs_create_file("htmflags", 0600, htmdump_debugfs_dir, NULL, &htmflags_fops);
+
+ /* Debugfs interface file to present status of HTM */
+ htm_status_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!htm_status_buf) {
+ pr_err("Failed to allocate htmstatus buf\n");
+ return -ENOMEM;
+ }
+
+ /* Debugfs interface file to present System Processor Configuration */
+ htm_info_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!htm_info_buf) {
+ pr_err("Failed to allocate htm info buf\n");
+ return -ENOMEM;
+ }
+
+ /* Debugfs interface file to present HTM capabilities */
+ htm_caps_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!htm_caps_buf) {
+ pr_err("Failed to allocate htm caps buf\n");
+ return -ENOMEM;
+ }
+
+ debugfs_create_file("htmstatus", 0400, htmdump_debugfs_dir, htm_status_buf, &htmstatus_fops);
+ debugfs_create_file("htminfo", 0400, htmdump_debugfs_dir, htm_info_buf, &htminfo_fops);
+ debugfs_create_file("htmcaps", 0400, htmdump_debugfs_dir, htm_caps_buf, &htmcaps_fops);
+
return 0;
}
static int __init htmdump_init(void)
{
+ /* Disable on kvm guest */
+ if (is_kvm_guest()) {
+ pr_info("htmdump not supported inside KVM guest\n");
+ return -EOPNOTSUPP;
+ }
+
if (htmdump_init_debugfs())
return -ENOMEM;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index d6ebc19fb99c..eec333dd2e59 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -197,7 +197,7 @@ static void tce_iommu_userspace_view_free(struct iommu_table *tbl)
static void tce_free_pSeries(struct iommu_table *tbl)
{
- if (!tbl->it_userspace)
+ if (tbl->it_userspace)
tce_iommu_userspace_view_free(tbl);
}
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index f9d80111c322..ee1c8c6898a3 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -525,7 +525,12 @@ static struct msi_domain_info pseries_msi_domain_info = {
static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
{
- __pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+ struct pci_dev *dev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
+
+ if (dev->current_state == PCI_D0)
+ __pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+ else
+ get_cached_msi_msg(data->irq, msg);
}
static struct irq_chip pseries_msi_irq_chip = {
@@ -628,7 +633,7 @@ static int __pseries_msi_allocate_domains(struct pci_controller *phb,
return -ENOMEM;
}
- phb->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(phb->dn),
+ phb->msi_domain = pci_msi_create_irq_domain(of_fwnode_handle(phb->dn),
&pseries_msi_domain_info,
phb->dev_domain);
if (!phb->msi_domain) {
diff --git a/arch/powerpc/platforms/pseries/papr-indices.c b/arch/powerpc/platforms/pseries/papr-indices.c
new file mode 100644
index 000000000000..3c7545591c45
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-indices.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-indices: " fmt
+
+#include <linux/build_bug.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/lockdep.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
+#include <uapi/asm/papr-indices.h>
+#include "papr-rtas-common.h"
+
+/*
+ * Function-specific return values for ibm,set-dynamic-indicator and
+ * ibm,get-dynamic-sensor-state RTAS calls.
+ * PAPR+ v2.13 7.3.18 and 7.3.19.
+ */
+#define RTAS_IBM_DYNAMIC_INDICE_NO_INDICATOR -3
+
+/**
+ * struct rtas_get_indices_params - Parameters (in and out) for
+ * ibm,get-indices.
+ * @is_sensor: In: Caller-provided whether sensor or indicator.
+ * @indice_type:In: Caller-provided indice (sensor or indicator) token
+ * @work_area: In: Caller-provided work area buffer for results.
+ * @next: In: Sequence number. Out: Next sequence number.
+ * @status: Out: RTAS call status.
+ */
+struct rtas_get_indices_params {
+ u8 is_sensor;
+ u32 indice_type;
+ struct rtas_work_area *work_area;
+ u32 next;
+ s32 status;
+};
+
+/*
+ * rtas_ibm_get_indices() - Call ibm,get-indices to fill a work area buffer.
+ * @params: See &struct rtas_ibm_get_indices_params.
+ *
+ * Calls ibm,get-indices until it errors or successfully deposits data
+ * into the supplied work area. Handles RTAS retry statuses. Maps RTAS
+ * error statuses to reasonable errno values.
+ *
+ * The caller is expected to invoke rtas_ibm_get_indices() multiple times
+ * to retrieve all indices data for the provided indice type. Only one
+ * sequence should be in progress at any time; starting a new sequence
+ * will disrupt any sequence already in progress. Serialization of
+ * indices retrieval sequences is the responsibility of the caller.
+ *
+ * The caller should inspect @params.status to determine whether more
+ * calls are needed to complete the sequence.
+ *
+ * Context: May sleep.
+ * Return: -ve on error, 0 otherwise.
+ */
+static int rtas_ibm_get_indices(struct rtas_get_indices_params *params)
+{
+ struct rtas_work_area *work_area = params->work_area;
+ const s32 token = rtas_function_token(RTAS_FN_IBM_GET_INDICES);
+ u32 rets;
+ s32 fwrc;
+ int ret;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ lockdep_assert_held(&rtas_ibm_get_indices_lock);
+
+ do {
+ fwrc = rtas_call(token, 5, 2, &rets, params->is_sensor,
+ params->indice_type,
+ rtas_work_area_phys(work_area),
+ rtas_work_area_size(work_area),
+ params->next);
+ } while (rtas_busy_delay(fwrc));
+
+ switch (fwrc) {
+ case RTAS_HARDWARE_ERROR:
+ ret = -EIO;
+ break;
+ case RTAS_INVALID_PARAMETER: /* Indicator type is not supported */
+ ret = -EINVAL;
+ break;
+ case RTAS_SEQ_START_OVER:
+ ret = -EAGAIN;
+ pr_info_ratelimited("Indices changed during retrieval, retrying\n");
+ params->next = 1;
+ break;
+ case RTAS_SEQ_MORE_DATA:
+ params->next = rets;
+ ret = 0;
+ break;
+ case RTAS_SEQ_COMPLETE:
+ params->next = 0;
+ ret = 0;
+ break;
+ default:
+ ret = -EIO;
+ pr_err_ratelimited("unexpected ibm,get-indices status %d\n", fwrc);
+ break;
+ }
+
+ params->status = fwrc;
+ return ret;
+}
+
+/*
+ * Internal indices sequence APIs. A sequence is a series of calls to
+ * ibm,get-indices for a given location code. The sequence ends when
+ * an error is encountered or all indices for the input has been
+ * returned.
+ */
+
+/*
+ * indices_sequence_begin() - Begin a indices retrieval sequence.
+ *
+ * Context: May sleep.
+ */
+static void indices_sequence_begin(struct papr_rtas_sequence *seq)
+{
+ struct rtas_get_indices_params *param;
+
+ param = (struct rtas_get_indices_params *)seq->params;
+ /*
+ * We could allocate the work area before acquiring the
+ * function lock, but that would allow concurrent requests to
+ * exhaust the limited work area pool for no benefit. So
+ * allocate the work area under the lock.
+ */
+ mutex_lock(&rtas_ibm_get_indices_lock);
+ param->work_area = rtas_work_area_alloc(RTAS_GET_INDICES_BUF_SIZE);
+ param->next = 1;
+ param->status = 0;
+}
+
+/*
+ * indices_sequence_end() - Finalize a indices retrieval sequence.
+ *
+ * Releases resources obtained by indices_sequence_begin().
+ */
+static void indices_sequence_end(struct papr_rtas_sequence *seq)
+{
+ struct rtas_get_indices_params *param;
+
+ param = (struct rtas_get_indices_params *)seq->params;
+ rtas_work_area_free(param->work_area);
+ mutex_unlock(&rtas_ibm_get_indices_lock);
+}
+
+/*
+ * Work function to be passed to papr_rtas_blob_generate().
+ *
+ * ibm,get-indices RTAS call fills the work area with the certain
+ * format but does not return the bytes written in the buffer. So
+ * instead of kernel parsing this work area to determine the buffer
+ * length, copy the complete work area (RTAS_GET_INDICES_BUF_SIZE)
+ * to the blob and let the user space to obtain the data.
+ * Means RTAS_GET_INDICES_BUF_SIZE data will be returned for each
+ * read().
+ */
+
+static const char *indices_sequence_fill_work_area(struct papr_rtas_sequence *seq,
+ size_t *len)
+{
+ struct rtas_get_indices_params *p;
+ bool init_state;
+
+ p = (struct rtas_get_indices_params *)seq->params;
+ init_state = (p->next == 1) ? true : false;
+
+ if (papr_rtas_sequence_should_stop(seq, p->status, init_state))
+ return NULL;
+ if (papr_rtas_sequence_set_err(seq, rtas_ibm_get_indices(p)))
+ return NULL;
+
+ *len = RTAS_GET_INDICES_BUF_SIZE;
+ return rtas_work_area_raw_buf(p->work_area);
+}
+
+/*
+ * papr_indices_handle_read - returns indices blob data to the user space
+ *
+ * ibm,get-indices RTAS call fills the work area with the certian
+ * format but does not return the bytes written in the buffer and
+ * copied RTAS_GET_INDICES_BUF_SIZE data to the blob for each RTAS
+ * call. So send RTAS_GET_INDICES_BUF_SIZE buffer to the user space
+ * for each read().
+ */
+static ssize_t papr_indices_handle_read(struct file *file,
+ char __user *buf, size_t size, loff_t *off)
+{
+ const struct papr_rtas_blob *blob = file->private_data;
+
+ /* we should not instantiate a handle without any data attached. */
+ if (!papr_rtas_blob_has_data(blob)) {
+ pr_err_once("handle without data\n");
+ return -EIO;
+ }
+
+ if (size < RTAS_GET_INDICES_BUF_SIZE) {
+ pr_err_once("Invalid buffer length %ld, expect %d\n",
+ size, RTAS_GET_INDICES_BUF_SIZE);
+ return -EINVAL;
+ } else if (size > RTAS_GET_INDICES_BUF_SIZE)
+ size = RTAS_GET_INDICES_BUF_SIZE;
+
+ return simple_read_from_buffer(buf, size, off, blob->data, blob->len);
+}
+
+static const struct file_operations papr_indices_handle_ops = {
+ .read = papr_indices_handle_read,
+ .llseek = papr_rtas_common_handle_seek,
+ .release = papr_rtas_common_handle_release,
+};
+
+/*
+ * papr_indices_create_handle() - Create a fd-based handle for reading
+ * indices data
+ * @ubuf: Input parameters to RTAS call such as whether sensor or indicator
+ * and indice type in user memory
+ *
+ * Handler for PAPR_INDICES_IOC_GET ioctl command. Validates @ubuf
+ * and instantiates an immutable indices "blob" for it. The blob is
+ * attached to a file descriptor for reading by user space. The memory
+ * backing the blob is freed when the file is released.
+ *
+ * The entire requested indices is retrieved by this call and all
+ * necessary RTAS interactions are performed before returning the fd
+ * to user space. This keeps the read handler simple and ensures that
+ * the kernel can prevent interleaving of ibm,get-indices call sequences.
+ *
+ * Return: The installed fd number if successful, -ve errno otherwise.
+ */
+static long papr_indices_create_handle(struct papr_indices_io_block __user *ubuf)
+{
+ struct papr_rtas_sequence seq = {};
+ struct rtas_get_indices_params params = {};
+ int fd;
+
+ if (get_user(params.is_sensor, &ubuf->indices.is_sensor))
+ return -EFAULT;
+
+ if (get_user(params.indice_type, &ubuf->indices.indice_type))
+ return -EFAULT;
+
+ seq = (struct papr_rtas_sequence) {
+ .begin = indices_sequence_begin,
+ .end = indices_sequence_end,
+ .work = indices_sequence_fill_work_area,
+ };
+
+ seq.params = &params;
+ fd = papr_rtas_setup_file_interface(&seq,
+ &papr_indices_handle_ops, "[papr-indices]");
+
+ return fd;
+}
+
+/*
+ * Create work area with the input parameters. This function is used
+ * for both ibm,set-dynamic-indicator and ibm,get-dynamic-sensor-state
+ * RTAS Calls.
+ */
+static struct rtas_work_area *
+papr_dynamic_indice_buf_from_user(struct papr_indices_io_block __user *ubuf,
+ struct papr_indices_io_block *kbuf)
+{
+ struct rtas_work_area *work_area;
+ u32 length;
+ __be32 len_be;
+
+ if (copy_from_user(kbuf, ubuf, sizeof(*kbuf)))
+ return ERR_PTR(-EFAULT);
+
+
+ if (!string_is_terminated(kbuf->dynamic_param.location_code_str,
+ ARRAY_SIZE(kbuf->dynamic_param.location_code_str)))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * The input data in the work area should be as follows:
+ * - 32-bit integer length of the location code string,
+ * including NULL.
+ * - Location code string, NULL terminated, identifying the
+ * token (sensor or indicator).
+ * PAPR 2.13 - R1–7.3.18–5 ibm,set-dynamic-indicator
+ * - R1–7.3.19–5 ibm,get-dynamic-sensor-state
+ */
+ /*
+ * Length that user space passed should also include NULL
+ * terminator.
+ */
+ length = strlen(kbuf->dynamic_param.location_code_str) + 1;
+ if (length > LOC_CODE_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ len_be = cpu_to_be32(length);
+
+ work_area = rtas_work_area_alloc(LOC_CODE_SIZE + sizeof(u32));
+ memcpy(rtas_work_area_raw_buf(work_area), &len_be, sizeof(u32));
+ memcpy((rtas_work_area_raw_buf(work_area) + sizeof(u32)),
+ &kbuf->dynamic_param.location_code_str, length);
+
+ return work_area;
+}
+
+/**
+ * papr_dynamic_indicator_ioc_set - ibm,set-dynamic-indicator RTAS Call
+ * PAPR 2.13 7.3.18
+ *
+ * @ubuf: Input parameters to RTAS call such as indicator token and
+ * new state.
+ *
+ * Returns success or -errno.
+ */
+static long papr_dynamic_indicator_ioc_set(struct papr_indices_io_block __user *ubuf)
+{
+ struct papr_indices_io_block kbuf;
+ struct rtas_work_area *work_area;
+ s32 fwrc, token, ret;
+
+ token = rtas_function_token(RTAS_FN_IBM_SET_DYNAMIC_INDICATOR);
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ mutex_lock(&rtas_ibm_set_dynamic_indicator_lock);
+ work_area = papr_dynamic_indice_buf_from_user(ubuf, &kbuf);
+ if (IS_ERR(work_area)) {
+ ret = PTR_ERR(work_area);
+ goto out;
+ }
+
+ do {
+ fwrc = rtas_call(token, 3, 1, NULL,
+ kbuf.dynamic_param.token,
+ kbuf.dynamic_param.state,
+ rtas_work_area_phys(work_area));
+ } while (rtas_busy_delay(fwrc));
+
+ rtas_work_area_free(work_area);
+
+ switch (fwrc) {
+ case RTAS_SUCCESS:
+ ret = 0;
+ break;
+ case RTAS_IBM_DYNAMIC_INDICE_NO_INDICATOR: /* No such indicator */
+ ret = -EOPNOTSUPP;
+ break;
+ default:
+ pr_err("unexpected ibm,set-dynamic-indicator result %d\n",
+ fwrc);
+ fallthrough;
+ case RTAS_HARDWARE_ERROR: /* Hardware/platform error */
+ ret = -EIO;
+ break;
+ }
+
+out:
+ mutex_unlock(&rtas_ibm_set_dynamic_indicator_lock);
+ return ret;
+}
+
+/**
+ * papr_dynamic_sensor_ioc_get - ibm,get-dynamic-sensor-state RTAS Call
+ * PAPR 2.13 7.3.19
+ *
+ * @ubuf: Input parameters to RTAS call such as sensor token
+ * Copies the state in user space buffer.
+ *
+ *
+ * Returns success or -errno.
+ */
+
+static long papr_dynamic_sensor_ioc_get(struct papr_indices_io_block __user *ubuf)
+{
+ struct papr_indices_io_block kbuf;
+ struct rtas_work_area *work_area;
+ s32 fwrc, token, ret;
+ u32 rets;
+
+ token = rtas_function_token(RTAS_FN_IBM_GET_DYNAMIC_SENSOR_STATE);
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ mutex_lock(&rtas_ibm_get_dynamic_sensor_state_lock);
+ work_area = papr_dynamic_indice_buf_from_user(ubuf, &kbuf);
+ if (IS_ERR(work_area)) {
+ ret = PTR_ERR(work_area);
+ goto out;
+ }
+
+ do {
+ fwrc = rtas_call(token, 2, 2, &rets,
+ kbuf.dynamic_param.token,
+ rtas_work_area_phys(work_area));
+ } while (rtas_busy_delay(fwrc));
+
+ rtas_work_area_free(work_area);
+
+ switch (fwrc) {
+ case RTAS_SUCCESS:
+ if (put_user(rets, &ubuf->dynamic_param.state))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ break;
+ case RTAS_IBM_DYNAMIC_INDICE_NO_INDICATOR: /* No such indicator */
+ ret = -EOPNOTSUPP;
+ break;
+ default:
+ pr_err("unexpected ibm,get-dynamic-sensor result %d\n",
+ fwrc);
+ fallthrough;
+ case RTAS_HARDWARE_ERROR: /* Hardware/platform error */
+ ret = -EIO;
+ break;
+ }
+
+out:
+ mutex_unlock(&rtas_ibm_get_dynamic_sensor_state_lock);
+ return ret;
+}
+
+/*
+ * Top-level ioctl handler for /dev/papr-indices.
+ */
+static long papr_indices_dev_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ void __user *argp = (__force void __user *)arg;
+ long ret;
+
+ switch (ioctl) {
+ case PAPR_INDICES_IOC_GET:
+ ret = papr_indices_create_handle(argp);
+ break;
+ case PAPR_DYNAMIC_SENSOR_IOC_GET:
+ ret = papr_dynamic_sensor_ioc_get(argp);
+ break;
+ case PAPR_DYNAMIC_INDICATOR_IOC_SET:
+ if (filp->f_mode & FMODE_WRITE)
+ ret = papr_dynamic_indicator_ioc_set(argp);
+ else
+ ret = -EBADF;
+ break;
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+
+ return ret;
+}
+
+static const struct file_operations papr_indices_ops = {
+ .unlocked_ioctl = papr_indices_dev_ioctl,
+};
+
+static struct miscdevice papr_indices_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "papr-indices",
+ .fops = &papr_indices_ops,
+};
+
+static __init int papr_indices_init(void)
+{
+ if (!rtas_function_implemented(RTAS_FN_IBM_GET_INDICES))
+ return -ENODEV;
+
+ if (!rtas_function_implemented(RTAS_FN_IBM_SET_DYNAMIC_INDICATOR))
+ return -ENODEV;
+
+ if (!rtas_function_implemented(RTAS_FN_IBM_GET_DYNAMIC_SENSOR_STATE))
+ return -ENODEV;
+
+ return misc_register(&papr_indices_dev);
+}
+machine_device_initcall(pseries, papr_indices_init);
diff --git a/arch/powerpc/platforms/pseries/papr-phy-attest.c b/arch/powerpc/platforms/pseries/papr-phy-attest.c
new file mode 100644
index 000000000000..1907f2411567
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-phy-attest.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-phy-attest: " fmt
+
+#include <linux/build_bug.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/lockdep.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
+#include <uapi/asm/papr-physical-attestation.h>
+#include "papr-rtas-common.h"
+
+/**
+ * struct rtas_phy_attest_params - Parameters (in and out) for
+ * ibm,physical-attestation.
+ *
+ * @cmd: In: Caller-provided attestation command buffer. Must be
+ * RTAS-addressable.
+ * @work_area: In: Caller-provided work area buffer for attestation
+ * command structure
+ * Out: Caller-provided work area buffer for the response
+ * @cmd_len: In: Caller-provided attestation command structure
+ * length
+ * @sequence: In: Sequence number. Out: Next sequence number.
+ * @written: Out: Bytes written by ibm,physical-attestation to
+ * @work_area.
+ * @status: Out: RTAS call status.
+ */
+struct rtas_phy_attest_params {
+ struct papr_phy_attest_io_block cmd;
+ struct rtas_work_area *work_area;
+ u32 cmd_len;
+ u32 sequence;
+ u32 written;
+ s32 status;
+};
+
+/**
+ * rtas_physical_attestation() - Call ibm,physical-attestation to
+ * fill a work area buffer.
+ * @params: See &struct rtas_phy_attest_params.
+ *
+ * Calls ibm,physical-attestation until it errors or successfully
+ * deposits data into the supplied work area. Handles RTAS retry
+ * statuses. Maps RTAS error statuses to reasonable errno values.
+ *
+ * The caller is expected to invoke rtas_physical_attestation()
+ * multiple times to retrieve all the data for the provided
+ * attestation command. Only one sequence should be in progress at
+ * any time; starting a new sequence will disrupt any sequence
+ * already in progress. Serialization of attestation retrieval
+ * sequences is the responsibility of the caller.
+ *
+ * The caller should inspect @params.status to determine whether more
+ * calls are needed to complete the sequence.
+ *
+ * Context: May sleep.
+ * Return: -ve on error, 0 otherwise.
+ */
+static int rtas_physical_attestation(struct rtas_phy_attest_params *params)
+{
+ struct rtas_work_area *work_area;
+ s32 fwrc, token;
+ u32 rets[2];
+ int ret;
+
+ work_area = params->work_area;
+ token = rtas_function_token(RTAS_FN_IBM_PHYSICAL_ATTESTATION);
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ lockdep_assert_held(&rtas_ibm_physical_attestation_lock);
+
+ do {
+ fwrc = rtas_call(token, 3, 3, rets,
+ rtas_work_area_phys(work_area),
+ params->cmd_len,
+ params->sequence);
+ } while (rtas_busy_delay(fwrc));
+
+ switch (fwrc) {
+ case RTAS_HARDWARE_ERROR:
+ ret = -EIO;
+ break;
+ case RTAS_INVALID_PARAMETER:
+ ret = -EINVAL;
+ break;
+ case RTAS_SEQ_MORE_DATA:
+ params->sequence = rets[0];
+ fallthrough;
+ case RTAS_SEQ_COMPLETE:
+ params->written = rets[1];
+ /*
+ * Kernel or firmware bug, do not continue.
+ */
+ if (WARN(params->written > rtas_work_area_size(work_area),
+ "possible write beyond end of work area"))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ break;
+ default:
+ ret = -EIO;
+ pr_err_ratelimited("unexpected ibm,get-phy_attest status %d\n", fwrc);
+ break;
+ }
+
+ params->status = fwrc;
+ return ret;
+}
+
+/*
+ * Internal physical-attestation sequence APIs. A physical-attestation
+ * sequence is a series of calls to get ibm,physical-attestation
+ * for a given attestation command. The sequence ends when an error
+ * is encountered or all data for the attestation command has been
+ * returned.
+ */
+
+/**
+ * phy_attest_sequence_begin() - Begin a response data for attestation
+ * command retrieval sequence.
+ * @seq: user specified parameters for RTAS call from seq struct.
+ *
+ * Context: May sleep.
+ */
+static void phy_attest_sequence_begin(struct papr_rtas_sequence *seq)
+{
+ struct rtas_phy_attest_params *param;
+
+ /*
+ * We could allocate the work area before acquiring the
+ * function lock, but that would allow concurrent requests to
+ * exhaust the limited work area pool for no benefit. So
+ * allocate the work area under the lock.
+ */
+ mutex_lock(&rtas_ibm_physical_attestation_lock);
+ param = (struct rtas_phy_attest_params *)seq->params;
+ param->work_area = rtas_work_area_alloc(SZ_4K);
+ memcpy(rtas_work_area_raw_buf(param->work_area), &param->cmd,
+ param->cmd_len);
+ param->sequence = 1;
+ param->status = 0;
+}
+
+/**
+ * phy_attest_sequence_end() - Finalize a attestation command
+ * response retrieval sequence.
+ * @seq: Sequence state.
+ *
+ * Releases resources obtained by phy_attest_sequence_begin().
+ */
+static void phy_attest_sequence_end(struct papr_rtas_sequence *seq)
+{
+ struct rtas_phy_attest_params *param;
+
+ param = (struct rtas_phy_attest_params *)seq->params;
+ rtas_work_area_free(param->work_area);
+ mutex_unlock(&rtas_ibm_physical_attestation_lock);
+ kfree(param);
+}
+
+/*
+ * Generator function to be passed to papr_rtas_blob_generate().
+ */
+static const char *phy_attest_sequence_fill_work_area(struct papr_rtas_sequence *seq,
+ size_t *len)
+{
+ struct rtas_phy_attest_params *p;
+ bool init_state;
+
+ p = (struct rtas_phy_attest_params *)seq->params;
+ init_state = (p->written == 0) ? true : false;
+
+ if (papr_rtas_sequence_should_stop(seq, p->status, init_state))
+ return NULL;
+ if (papr_rtas_sequence_set_err(seq, rtas_physical_attestation(p)))
+ return NULL;
+ *len = p->written;
+ return rtas_work_area_raw_buf(p->work_area);
+}
+
+static const struct file_operations papr_phy_attest_handle_ops = {
+ .read = papr_rtas_common_handle_read,
+ .llseek = papr_rtas_common_handle_seek,
+ .release = papr_rtas_common_handle_release,
+};
+
+/**
+ * papr_phy_attest_create_handle() - Create a fd-based handle for
+ * reading the response for the given attestation command.
+ * @ulc: Attestation command in user memory; defines the scope of
+ * data for the attestation command to retrieve.
+ *
+ * Handler for PAPR_PHYSICAL_ATTESTATION_IOC_CREATE_HANDLE ioctl
+ * command. Validates @ulc and instantiates an immutable response
+ * "blob" for attestation command. The blob is attached to a file
+ * descriptor for reading by user space. The memory backing the blob
+ * is freed when the file is released.
+ *
+ * The entire requested response buffer for the attestation command
+ * retrieved by this call and all necessary RTAS interactions are
+ * performed before returning the fd to user space. This keeps the
+ * read handler simple and ensures that kernel can prevent
+ * interleaving ibm,physical-attestation call sequences.
+ *
+ * Return: The installed fd number if successful, -ve errno otherwise.
+ */
+static long papr_phy_attest_create_handle(struct papr_phy_attest_io_block __user *ulc)
+{
+ struct rtas_phy_attest_params *params;
+ struct papr_rtas_sequence seq = {};
+ int fd;
+
+ /*
+ * Freed in phy_attest_sequence_end().
+ */
+ params = kzalloc(sizeof(*params), GFP_KERNEL_ACCOUNT);
+ if (!params)
+ return -ENOMEM;
+
+ if (copy_from_user(&params->cmd, ulc,
+ sizeof(struct papr_phy_attest_io_block)))
+ return -EFAULT;
+
+ params->cmd_len = be32_to_cpu(params->cmd.length);
+ seq = (struct papr_rtas_sequence) {
+ .begin = phy_attest_sequence_begin,
+ .end = phy_attest_sequence_end,
+ .work = phy_attest_sequence_fill_work_area,
+ };
+
+ seq.params = (void *)params;
+
+ fd = papr_rtas_setup_file_interface(&seq,
+ &papr_phy_attest_handle_ops,
+ "[papr-physical-attestation]");
+
+ return fd;
+}
+
+/*
+ * Top-level ioctl handler for /dev/papr-physical-attestation.
+ */
+static long papr_phy_attest_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+{
+ void __user *argp = (__force void __user *)arg;
+ long ret;
+
+ switch (ioctl) {
+ case PAPR_PHY_ATTEST_IOC_HANDLE:
+ ret = papr_phy_attest_create_handle(argp);
+ break;
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+ return ret;
+}
+
+static const struct file_operations papr_phy_attest_ops = {
+ .unlocked_ioctl = papr_phy_attest_dev_ioctl,
+};
+
+static struct miscdevice papr_phy_attest_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "papr-physical-attestation",
+ .fops = &papr_phy_attest_ops,
+};
+
+static __init int papr_phy_attest_init(void)
+{
+ if (!rtas_function_implemented(RTAS_FN_IBM_PHYSICAL_ATTESTATION))
+ return -ENODEV;
+
+ return misc_register(&papr_phy_attest_dev);
+}
+machine_device_initcall(pseries, papr_phy_attest_init);
diff --git a/arch/powerpc/platforms/pseries/papr-platform-dump.c b/arch/powerpc/platforms/pseries/papr-platform-dump.c
new file mode 100644
index 000000000000..f8d55eccdb6b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-platform-dump.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-platform-dump: " fmt
+
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <asm/machdep.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
+#include <uapi/asm/papr-platform-dump.h>
+
+/*
+ * Function-specific return values for ibm,platform-dump, derived from
+ * PAPR+ v2.13 7.3.3.4.1 "ibm,platform-dump RTAS Call".
+ */
+#define RTAS_IBM_PLATFORM_DUMP_COMPLETE 0 /* Complete dump retrieved. */
+#define RTAS_IBM_PLATFORM_DUMP_CONTINUE 1 /* Continue dump */
+#define RTAS_NOT_AUTHORIZED -9002 /* Not Authorized */
+
+#define RTAS_IBM_PLATFORM_DUMP_START 2 /* Linux status to start dump */
+
+/**
+ * struct ibm_platform_dump_params - Parameters (in and out) for
+ * ibm,platform-dump
+ * @work_area: In: work area buffer for results.
+ * @buf_length: In: work area buffer length in bytes
+ * @dump_tag_hi: In: Most-significant 32 bits of a Dump_Tag representing
+ * an id of the dump being processed.
+ * @dump_tag_lo: In: Least-significant 32 bits of a Dump_Tag representing
+ * an id of the dump being processed.
+ * @sequence_hi: In: Sequence number in most-significant 32 bits.
+ * Out: Next sequence number in most-significant 32 bits.
+ * @sequence_lo: In: Sequence number in Least-significant 32 bits
+ * Out: Next sequence number in Least-significant 32 bits.
+ * @bytes_ret_hi: Out: Bytes written in most-significant 32 bits.
+ * @bytes_ret_lo: Out: Bytes written in Least-significant 32 bits.
+ * @status: Out: RTAS call status.
+ * @list: Maintain the list of dumps are in progress. Can
+ * retrieve multiple dumps with different dump IDs at
+ * the same time but not with the same dump ID. This list
+ * is used to determine whether the dump for the same ID
+ * is in progress.
+ */
+struct ibm_platform_dump_params {
+ struct rtas_work_area *work_area;
+ u32 buf_length;
+ u32 dump_tag_hi;
+ u32 dump_tag_lo;
+ u32 sequence_hi;
+ u32 sequence_lo;
+ u32 bytes_ret_hi;
+ u32 bytes_ret_lo;
+ s32 status;
+ struct list_head list;
+};
+
+/*
+ * Multiple dumps with different dump IDs can be retrieved at the same
+ * time, but not with dame dump ID. platform_dump_list_mutex and
+ * platform_dump_list are used to prevent this behavior.
+ */
+static DEFINE_MUTEX(platform_dump_list_mutex);
+static LIST_HEAD(platform_dump_list);
+
+/**
+ * rtas_ibm_platform_dump() - Call ibm,platform-dump to fill a work area
+ * buffer.
+ * @params: See &struct ibm_platform_dump_params.
+ * @buf_addr: Address of dump buffer (work_area)
+ * @buf_length: Length of the buffer in bytes (min. 1024)
+ *
+ * Calls ibm,platform-dump until it errors or successfully deposits data
+ * into the supplied work area. Handles RTAS retry statuses. Maps RTAS
+ * error statuses to reasonable errno values.
+ *
+ * Can request multiple dumps with different dump IDs at the same time,
+ * but not with the same dump ID which is prevented with the check in
+ * the ioctl code (papr_platform_dump_create_handle()).
+ *
+ * The caller should inspect @params.status to determine whether more
+ * calls are needed to complete the sequence.
+ *
+ * Context: May sleep.
+ * Return: -ve on error, 0 for dump complete and 1 for continue dump
+ */
+static int rtas_ibm_platform_dump(struct ibm_platform_dump_params *params,
+ phys_addr_t buf_addr, u32 buf_length)
+{
+ u32 rets[4];
+ s32 fwrc;
+ int ret = 0;
+
+ do {
+ fwrc = rtas_call(rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP),
+ 6, 5,
+ rets,
+ params->dump_tag_hi,
+ params->dump_tag_lo,
+ params->sequence_hi,
+ params->sequence_lo,
+ buf_addr,
+ buf_length);
+ } while (rtas_busy_delay(fwrc));
+
+ switch (fwrc) {
+ case RTAS_HARDWARE_ERROR:
+ ret = -EIO;
+ break;
+ case RTAS_NOT_AUTHORIZED:
+ ret = -EPERM;
+ break;
+ case RTAS_IBM_PLATFORM_DUMP_CONTINUE:
+ case RTAS_IBM_PLATFORM_DUMP_COMPLETE:
+ params->sequence_hi = rets[0];
+ params->sequence_lo = rets[1];
+ params->bytes_ret_hi = rets[2];
+ params->bytes_ret_lo = rets[3];
+ break;
+ default:
+ ret = -EIO;
+ pr_err_ratelimited("unexpected ibm,platform-dump status %d\n",
+ fwrc);
+ break;
+ }
+
+ params->status = fwrc;
+ return ret;
+}
+
+/*
+ * Platform dump is used with multiple RTAS calls to retrieve the
+ * complete dump for the provided dump ID. Once the complete dump is
+ * retrieved, the hypervisor returns dump complete status (0) for the
+ * last RTAS call and expects the caller issues one more call with
+ * NULL buffer to invalidate the dump so that the hypervisor can remove
+ * the dump.
+ *
+ * After the specific dump is invalidated in the hypervisor, expect the
+ * dump complete status for the new sequence - the user space initiates
+ * new request for the same dump ID.
+ */
+static ssize_t papr_platform_dump_handle_read(struct file *file,
+ char __user *buf, size_t size, loff_t *off)
+{
+ struct ibm_platform_dump_params *params = file->private_data;
+ u64 total_bytes;
+ s32 fwrc;
+
+ /*
+ * Dump already completed with the previous read calls.
+ * In case if the user space issues further reads, returns
+ * -EINVAL.
+ */
+ if (!params->buf_length) {
+ pr_warn_once("Platform dump completed for dump ID %llu\n",
+ (u64) (((u64)params->dump_tag_hi << 32) |
+ params->dump_tag_lo));
+ return -EINVAL;
+ }
+
+ /*
+ * The hypervisor returns status 0 if no more data available to
+ * download. The dump will be invalidated with ioctl (see below).
+ */
+ if (params->status == RTAS_IBM_PLATFORM_DUMP_COMPLETE) {
+ params->buf_length = 0;
+ /*
+ * Returns 0 to the user space so that user
+ * space read stops.
+ */
+ return 0;
+ }
+
+ if (size < SZ_1K) {
+ pr_err_once("Buffer length should be minimum 1024 bytes\n");
+ return -EINVAL;
+ } else if (size > params->buf_length) {
+ /*
+ * Allocate 4K work area. So if the user requests > 4K,
+ * resize the buffer length.
+ */
+ size = params->buf_length;
+ }
+
+ fwrc = rtas_ibm_platform_dump(params,
+ rtas_work_area_phys(params->work_area),
+ size);
+ if (fwrc < 0)
+ return fwrc;
+
+ total_bytes = (u64) (((u64)params->bytes_ret_hi << 32) |
+ params->bytes_ret_lo);
+
+ /*
+ * Kernel or firmware bug, do not continue.
+ */
+ if (WARN(total_bytes > size, "possible write beyond end of work area"))
+ return -EFAULT;
+
+ if (copy_to_user(buf, rtas_work_area_raw_buf(params->work_area),
+ total_bytes))
+ return -EFAULT;
+
+ return total_bytes;
+}
+
+static int papr_platform_dump_handle_release(struct inode *inode,
+ struct file *file)
+{
+ struct ibm_platform_dump_params *params = file->private_data;
+
+ if (params->work_area)
+ rtas_work_area_free(params->work_area);
+
+ mutex_lock(&platform_dump_list_mutex);
+ list_del(&params->list);
+ mutex_unlock(&platform_dump_list_mutex);
+
+ kfree(params);
+ file->private_data = NULL;
+ return 0;
+}
+
+/*
+ * This ioctl is used to invalidate the dump assuming the user space
+ * issue this ioctl after obtain the complete dump.
+ * Issue the last RTAS call with NULL buffer to invalidate the dump
+ * which means dump will be freed in the hypervisor.
+ */
+static long papr_platform_dump_invalidate_ioctl(struct file *file,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct ibm_platform_dump_params *params;
+ u64 __user *argp = (void __user *)arg;
+ u64 param_dump_tag, dump_tag;
+
+ if (ioctl != PAPR_PLATFORM_DUMP_IOC_INVALIDATE)
+ return -ENOIOCTLCMD;
+
+ if (get_user(dump_tag, argp))
+ return -EFAULT;
+
+ /*
+ * private_data is freeded during release(), so should not
+ * happen.
+ */
+ if (!file->private_data) {
+ pr_err("No valid FD to invalidate dump for the ID(%llu)\n",
+ dump_tag);
+ return -EINVAL;
+ }
+
+ params = file->private_data;
+ param_dump_tag = (u64) (((u64)params->dump_tag_hi << 32) |
+ params->dump_tag_lo);
+ if (dump_tag != param_dump_tag) {
+ pr_err("Invalid dump ID(%llu) to invalidate dump\n",
+ dump_tag);
+ return -EINVAL;
+ }
+
+ if (params->status != RTAS_IBM_PLATFORM_DUMP_COMPLETE) {
+ pr_err("Platform dump is not complete, but requested "
+ "to invalidate dump for ID(%llu)\n",
+ dump_tag);
+ return -EINPROGRESS;
+ }
+
+ return rtas_ibm_platform_dump(params, 0, 0);
+}
+
+static const struct file_operations papr_platform_dump_handle_ops = {
+ .read = papr_platform_dump_handle_read,
+ .release = papr_platform_dump_handle_release,
+ .unlocked_ioctl = papr_platform_dump_invalidate_ioctl,
+};
+
+/**
+ * papr_platform_dump_create_handle() - Create a fd-based handle for
+ * reading platform dump
+ *
+ * Handler for PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE ioctl command
+ * Allocates RTAS parameter struct and work area and attached to the
+ * file descriptor for reading by user space with the multiple RTAS
+ * calls until the dump is completed. This memory allocation is freed
+ * when the file is released.
+ *
+ * Multiple dump requests with different IDs are allowed at the same
+ * time, but not with the same dump ID. So if the user space is
+ * already opened file descriptor for the specific dump ID, return
+ * -EALREADY for the next request.
+ *
+ * @dump_tag: Dump ID for the dump requested to retrieve from the
+ * hypervisor
+ *
+ * Return: The installed fd number if successful, -ve errno otherwise.
+ */
+static long papr_platform_dump_create_handle(u64 dump_tag)
+{
+ struct ibm_platform_dump_params *params;
+ u64 param_dump_tag;
+ struct file *file;
+ long err;
+ int fd;
+
+ /*
+ * Return failure if the user space is already opened FD for
+ * the specific dump ID. This check will prevent multiple dump
+ * requests for the same dump ID at the same time. Generally
+ * should not expect this, but in case.
+ */
+ list_for_each_entry(params, &platform_dump_list, list) {
+ param_dump_tag = (u64) (((u64)params->dump_tag_hi << 32) |
+ params->dump_tag_lo);
+ if (dump_tag == param_dump_tag) {
+ pr_err("Platform dump for ID(%llu) is already in progress\n",
+ dump_tag);
+ return -EALREADY;
+ }
+ }
+
+ params = kzalloc(sizeof(struct ibm_platform_dump_params),
+ GFP_KERNEL_ACCOUNT);
+ if (!params)
+ return -ENOMEM;
+
+ params->work_area = rtas_work_area_alloc(SZ_4K);
+ params->buf_length = SZ_4K;
+ params->dump_tag_hi = (u32)(dump_tag >> 32);
+ params->dump_tag_lo = (u32)(dump_tag & 0x00000000ffffffffULL);
+ params->status = RTAS_IBM_PLATFORM_DUMP_START;
+
+ fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ err = fd;
+ goto free_area;
+ }
+
+ file = anon_inode_getfile_fmode("[papr-platform-dump]",
+ &papr_platform_dump_handle_ops,
+ (void *)params, O_RDONLY,
+ FMODE_LSEEK | FMODE_PREAD);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto put_fd;
+ }
+
+ fd_install(fd, file);
+
+ list_add(&params->list, &platform_dump_list);
+
+ pr_info("%s (%d) initiated platform dump for dump tag %llu\n",
+ current->comm, current->pid, dump_tag);
+ return fd;
+put_fd:
+ put_unused_fd(fd);
+free_area:
+ rtas_work_area_free(params->work_area);
+ kfree(params);
+ return err;
+}
+
+/*
+ * Top-level ioctl handler for /dev/papr-platform-dump.
+ */
+static long papr_platform_dump_dev_ioctl(struct file *filp,
+ unsigned int ioctl,
+ unsigned long arg)
+{
+ u64 __user *argp = (void __user *)arg;
+ u64 dump_tag;
+ long ret;
+
+ if (get_user(dump_tag, argp))
+ return -EFAULT;
+
+ switch (ioctl) {
+ case PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE:
+ mutex_lock(&platform_dump_list_mutex);
+ ret = papr_platform_dump_create_handle(dump_tag);
+ mutex_unlock(&platform_dump_list_mutex);
+ break;
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+ return ret;
+}
+
+static const struct file_operations papr_platform_dump_ops = {
+ .unlocked_ioctl = papr_platform_dump_dev_ioctl,
+};
+
+static struct miscdevice papr_platform_dump_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "papr-platform-dump",
+ .fops = &papr_platform_dump_ops,
+};
+
+static __init int papr_platform_dump_init(void)
+{
+ if (!rtas_function_implemented(RTAS_FN_IBM_PLATFORM_DUMP))
+ return -ENODEV;
+
+ return misc_register(&papr_platform_dump_dev);
+}
+machine_device_initcall(pseries, papr_platform_dump_init);
diff --git a/arch/powerpc/platforms/pseries/papr-rtas-common.c b/arch/powerpc/platforms/pseries/papr-rtas-common.c
new file mode 100644
index 000000000000..33c606e3378a
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-rtas-common.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-common: " fmt
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/sched/signal.h>
+#include "papr-rtas-common.h"
+
+/*
+ * Sequence based RTAS HCALL has to issue multiple times to retrieve
+ * complete data from the hypervisor. For some of these RTAS calls,
+ * the OS should not interleave calls with different input until the
+ * sequence is completed. So data is collected for these calls during
+ * ioctl handle and export to user space with read() handle.
+ * This file provides common functions needed for such sequence based
+ * RTAS calls Ex: ibm,get-vpd and ibm,get-indices.
+ */
+
+bool papr_rtas_blob_has_data(const struct papr_rtas_blob *blob)
+{
+ return blob->data && blob->len;
+}
+
+void papr_rtas_blob_free(const struct papr_rtas_blob *blob)
+{
+ if (blob) {
+ kvfree(blob->data);
+ kfree(blob);
+ }
+}
+
+/**
+ * papr_rtas_blob_extend() - Append data to a &struct papr_rtas_blob.
+ * @blob: The blob to extend.
+ * @data: The new data to append to @blob.
+ * @len: The length of @data.
+ *
+ * Context: May sleep.
+ * Return: -ENOMEM on allocation failure, 0 otherwise.
+ */
+static int papr_rtas_blob_extend(struct papr_rtas_blob *blob,
+ const char *data, size_t len)
+{
+ const size_t new_len = blob->len + len;
+ const size_t old_len = blob->len;
+ const char *old_ptr = blob->data;
+ char *new_ptr;
+
+ new_ptr = kvrealloc(old_ptr, new_len, GFP_KERNEL_ACCOUNT);
+ if (!new_ptr)
+ return -ENOMEM;
+
+ memcpy(&new_ptr[old_len], data, len);
+ blob->data = new_ptr;
+ blob->len = new_len;
+ return 0;
+}
+
+/**
+ * papr_rtas_blob_generate() - Construct a new &struct papr_rtas_blob.
+ * @seq: work function of the caller that is called to obtain
+ * data with the caller RTAS call.
+ *
+ * The @work callback is invoked until it returns NULL. @seq is
+ * passed to @work in its first argument on each call. When
+ * @work returns data, it should store the data length in its
+ * second argument.
+ *
+ * Context: May sleep.
+ * Return: A completely populated &struct papr_rtas_blob, or NULL on error.
+ */
+static const struct papr_rtas_blob *
+papr_rtas_blob_generate(struct papr_rtas_sequence *seq)
+{
+ struct papr_rtas_blob *blob;
+ const char *buf;
+ size_t len;
+ int err = 0;
+
+ blob = kzalloc(sizeof(*blob), GFP_KERNEL_ACCOUNT);
+ if (!blob)
+ return NULL;
+
+ if (!seq->work)
+ return ERR_PTR(-EINVAL);
+
+
+ while (err == 0 && (buf = seq->work(seq, &len)))
+ err = papr_rtas_blob_extend(blob, buf, len);
+
+ if (err != 0 || !papr_rtas_blob_has_data(blob))
+ goto free_blob;
+
+ return blob;
+free_blob:
+ papr_rtas_blob_free(blob);
+ return NULL;
+}
+
+int papr_rtas_sequence_set_err(struct papr_rtas_sequence *seq, int err)
+{
+ /* Preserve the first error recorded. */
+ if (seq->error == 0)
+ seq->error = err;
+
+ return seq->error;
+}
+
+/*
+ * Higher-level retrieval code below. These functions use the
+ * papr_rtas_blob_* and sequence_* APIs defined above to create fd-based
+ * handles for consumption by user space.
+ */
+
+/**
+ * papr_rtas_run_sequence() - Run a single retrieval sequence.
+ * @seq: Functions of the caller to complete the sequence
+ *
+ * Context: May sleep. Holds a mutex and an RTAS work area for its
+ * duration. Typically performs multiple sleepable slab
+ * allocations.
+ *
+ * Return: A populated &struct papr_rtas_blob on success. Encoded error
+ * pointer otherwise.
+ */
+static const struct papr_rtas_blob *papr_rtas_run_sequence(struct papr_rtas_sequence *seq)
+{
+ const struct papr_rtas_blob *blob;
+
+ if (seq->begin)
+ seq->begin(seq);
+
+ blob = papr_rtas_blob_generate(seq);
+ if (!blob)
+ papr_rtas_sequence_set_err(seq, -ENOMEM);
+
+ if (seq->end)
+ seq->end(seq);
+
+
+ if (seq->error) {
+ papr_rtas_blob_free(blob);
+ return ERR_PTR(seq->error);
+ }
+
+ return blob;
+}
+
+/**
+ * papr_rtas_retrieve() - Return the data blob that is exposed to
+ * user space.
+ * @seq: RTAS call specific functions to be invoked until the
+ * sequence is completed.
+ *
+ * Run sequences against @param until a blob is successfully
+ * instantiated, or a hard error is encountered, or a fatal signal is
+ * pending.
+ *
+ * Context: May sleep.
+ * Return: A fully populated data blob when successful. Encoded error
+ * pointer otherwise.
+ */
+const struct papr_rtas_blob *papr_rtas_retrieve(struct papr_rtas_sequence *seq)
+{
+ const struct papr_rtas_blob *blob;
+
+ /*
+ * EAGAIN means the sequence returns error with a -4 (data
+ * changed and need to start the sequence) status from RTAS calls
+ * and we should attempt a new sequence. PAPR+ (v2.13 R1–7.3.20–5
+ * - ibm,get-vpd, R1–7.3.17–6 - ibm,get-indices) indicates that
+ * this should be a transient condition, not something that
+ * happens continuously. But we'll stop trying on a fatal signal.
+ */
+ do {
+ blob = papr_rtas_run_sequence(seq);
+ if (!IS_ERR(blob)) /* Success. */
+ break;
+ if (PTR_ERR(blob) != -EAGAIN) /* Hard error. */
+ break;
+ cond_resched();
+ } while (!fatal_signal_pending(current));
+
+ return blob;
+}
+
+/**
+ * papr_rtas_setup_file_interface - Complete the sequence and obtain
+ * the data and export to user space with fd-based handles. Then the
+ * user spave gets the data with read() handle.
+ * @seq: RTAS call specific functions to get the data.
+ * @fops: RTAS call specific file operations such as read().
+ * @name: RTAS call specific char device node.
+ *
+ * Return: FD handle for consumption by user space
+ */
+long papr_rtas_setup_file_interface(struct papr_rtas_sequence *seq,
+ const struct file_operations *fops,
+ char *name)
+{
+ const struct papr_rtas_blob *blob;
+ struct file *file;
+ long ret;
+ int fd;
+
+ blob = papr_rtas_retrieve(seq);
+ if (IS_ERR(blob))
+ return PTR_ERR(blob);
+
+ fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ ret = fd;
+ goto free_blob;
+ }
+
+ file = anon_inode_getfile_fmode(name, fops, (void *)blob,
+ O_RDONLY, FMODE_LSEEK | FMODE_PREAD);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto put_fd;
+ }
+
+ fd_install(fd, file);
+ return fd;
+
+put_fd:
+ put_unused_fd(fd);
+free_blob:
+ papr_rtas_blob_free(blob);
+ return ret;
+}
+
+/*
+ * papr_rtas_sequence_should_stop() - Determine whether RTAS retrieval
+ * sequence should continue.
+ *
+ * Examines the sequence error state and outputs of the last call to
+ * the specific RTAS to determine whether the sequence in progress
+ * should continue or stop.
+ *
+ * Return: True if the sequence has encountered an error or if all data
+ * for this sequence has been retrieved. False otherwise.
+ */
+bool papr_rtas_sequence_should_stop(const struct papr_rtas_sequence *seq,
+ s32 status, bool init_state)
+{
+ bool done;
+
+ if (seq->error)
+ return true;
+
+ switch (status) {
+ case RTAS_SEQ_COMPLETE:
+ if (init_state)
+ done = false; /* Initial state. */
+ else
+ done = true; /* All data consumed. */
+ break;
+ case RTAS_SEQ_MORE_DATA:
+ done = false; /* More data available. */
+ break;
+ default:
+ done = true; /* Error encountered. */
+ break;
+ }
+
+ return done;
+}
+
+/*
+ * User space read to retrieve data for the corresponding RTAS call.
+ * papr_rtas_blob is filled with the data using the corresponding RTAS
+ * call sequence API.
+ */
+ssize_t papr_rtas_common_handle_read(struct file *file,
+ char __user *buf, size_t size, loff_t *off)
+{
+ const struct papr_rtas_blob *blob = file->private_data;
+
+ /* We should not instantiate a handle without any data attached. */
+ if (!papr_rtas_blob_has_data(blob)) {
+ pr_err_once("handle without data\n");
+ return -EIO;
+ }
+
+ return simple_read_from_buffer(buf, size, off, blob->data, blob->len);
+}
+
+int papr_rtas_common_handle_release(struct inode *inode,
+ struct file *file)
+{
+ const struct papr_rtas_blob *blob = file->private_data;
+
+ papr_rtas_blob_free(blob);
+
+ return 0;
+}
+
+loff_t papr_rtas_common_handle_seek(struct file *file, loff_t off,
+ int whence)
+{
+ const struct papr_rtas_blob *blob = file->private_data;
+
+ return fixed_size_llseek(file, off, whence, blob->len);
+}
diff --git a/arch/powerpc/platforms/pseries/papr-rtas-common.h b/arch/powerpc/platforms/pseries/papr-rtas-common.h
new file mode 100644
index 000000000000..4ceabcaf4905
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-rtas-common.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_PAPR_RTAS_COMMON_H
+#define _ASM_POWERPC_PAPR_RTAS_COMMON_H
+
+#include <linux/types.h>
+
+/*
+ * Return codes for sequence based RTAS calls.
+ * Not listed under PAPR+ v2.13 7.2.8: "Return Codes".
+ * But defined in the specific section of each RTAS call.
+ */
+#define RTAS_SEQ_COMPLETE 0 /* All data has been retrieved. */
+#define RTAS_SEQ_MORE_DATA 1 /* More data is available */
+#define RTAS_SEQ_START_OVER -4 /* Data changed, restart call sequence. */
+
+/*
+ * Internal "blob" APIs for accumulating RTAS call results into
+ * an immutable buffer to be attached to a file descriptor.
+ */
+struct papr_rtas_blob {
+ const char *data;
+ size_t len;
+};
+
+/**
+ * struct papr_sequence - State for managing a sequence of RTAS calls.
+ * @error: Shall be zero as long as the sequence has not encountered an error,
+ * -ve errno otherwise. Use papr_rtas_sequence_set_err() to update.
+ * @params: Parameter block to pass to rtas_*() calls.
+ * @begin: Work area allocation and initialize the needed parameter
+ * values passed to RTAS call
+ * @end: Free the allocated work area
+ * @work: Obtain data with RTAS call and invoke it until the sequence is
+ * completed.
+ *
+ */
+struct papr_rtas_sequence {
+ int error;
+ void *params;
+ void (*begin)(struct papr_rtas_sequence *seq);
+ void (*end)(struct papr_rtas_sequence *seq);
+ const char *(*work)(struct papr_rtas_sequence *seq, size_t *len);
+};
+
+extern bool papr_rtas_blob_has_data(const struct papr_rtas_blob *blob);
+extern void papr_rtas_blob_free(const struct papr_rtas_blob *blob);
+extern int papr_rtas_sequence_set_err(struct papr_rtas_sequence *seq,
+ int err);
+extern const struct papr_rtas_blob *papr_rtas_retrieve(struct papr_rtas_sequence *seq);
+extern long papr_rtas_setup_file_interface(struct papr_rtas_sequence *seq,
+ const struct file_operations *fops, char *name);
+extern bool papr_rtas_sequence_should_stop(const struct papr_rtas_sequence *seq,
+ s32 status, bool init_state);
+extern ssize_t papr_rtas_common_handle_read(struct file *file,
+ char __user *buf, size_t size, loff_t *off);
+extern int papr_rtas_common_handle_release(struct inode *inode,
+ struct file *file);
+extern loff_t papr_rtas_common_handle_seek(struct file *file, loff_t off,
+ int whence);
+#endif /* _ASM_POWERPC_PAPR_RTAS_COMMON_H */
+
diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c b/arch/powerpc/platforms/pseries/papr-vpd.c
index c86950d7105a..f38c188fc4a1 100644
--- a/arch/powerpc/platforms/pseries/papr-vpd.c
+++ b/arch/powerpc/platforms/pseries/papr-vpd.c
@@ -2,7 +2,6 @@
#define pr_fmt(fmt) "papr-vpd: " fmt
-#include <linux/anon_inodes.h>
#include <linux/build_bug.h>
#include <linux/file.h>
#include <linux/fs.h>
@@ -20,14 +19,7 @@
#include <asm/rtas-work-area.h>
#include <asm/rtas.h>
#include <uapi/asm/papr-vpd.h>
-
-/*
- * Function-specific return values for ibm,get-vpd, derived from PAPR+
- * v2.13 7.3.20 "ibm,get-vpd RTAS Call".
- */
-#define RTAS_IBM_GET_VPD_COMPLETE 0 /* All VPD has been retrieved. */
-#define RTAS_IBM_GET_VPD_MORE_DATA 1 /* More VPD is available. */
-#define RTAS_IBM_GET_VPD_START_OVER -4 /* VPD changed, restart call sequence. */
+#include "papr-rtas-common.h"
/**
* struct rtas_ibm_get_vpd_params - Parameters (in and out) for ibm,get-vpd.
@@ -91,13 +83,14 @@ static int rtas_ibm_get_vpd(struct rtas_ibm_get_vpd_params *params)
case RTAS_INVALID_PARAMETER:
ret = -EINVAL;
break;
- case RTAS_IBM_GET_VPD_START_OVER:
+ case RTAS_SEQ_START_OVER:
ret = -EAGAIN;
+ pr_info_ratelimited("VPD changed during retrieval, retrying\n");
break;
- case RTAS_IBM_GET_VPD_MORE_DATA:
+ case RTAS_SEQ_MORE_DATA:
params->sequence = rets[0];
fallthrough;
- case RTAS_IBM_GET_VPD_COMPLETE:
+ case RTAS_SEQ_COMPLETE:
params->written = rets[1];
/*
* Kernel or firmware bug, do not continue.
@@ -119,91 +112,6 @@ static int rtas_ibm_get_vpd(struct rtas_ibm_get_vpd_params *params)
}
/*
- * Internal VPD "blob" APIs for accumulating ibm,get-vpd results into
- * an immutable buffer to be attached to a file descriptor.
- */
-struct vpd_blob {
- const char *data;
- size_t len;
-};
-
-static bool vpd_blob_has_data(const struct vpd_blob *blob)
-{
- return blob->data && blob->len;
-}
-
-static void vpd_blob_free(const struct vpd_blob *blob)
-{
- if (blob) {
- kvfree(blob->data);
- kfree(blob);
- }
-}
-
-/**
- * vpd_blob_extend() - Append data to a &struct vpd_blob.
- * @blob: The blob to extend.
- * @data: The new data to append to @blob.
- * @len: The length of @data.
- *
- * Context: May sleep.
- * Return: -ENOMEM on allocation failure, 0 otherwise.
- */
-static int vpd_blob_extend(struct vpd_blob *blob, const char *data, size_t len)
-{
- const size_t new_len = blob->len + len;
- const size_t old_len = blob->len;
- const char *old_ptr = blob->data;
- char *new_ptr;
-
- new_ptr = kvrealloc(old_ptr, new_len, GFP_KERNEL_ACCOUNT);
- if (!new_ptr)
- return -ENOMEM;
-
- memcpy(&new_ptr[old_len], data, len);
- blob->data = new_ptr;
- blob->len = new_len;
- return 0;
-}
-
-/**
- * vpd_blob_generate() - Construct a new &struct vpd_blob.
- * @generator: Function that supplies the blob data.
- * @arg: Context pointer supplied by caller, passed to @generator.
- *
- * The @generator callback is invoked until it returns NULL. @arg is
- * passed to @generator in its first argument on each call. When
- * @generator returns data, it should store the data length in its
- * second argument.
- *
- * Context: May sleep.
- * Return: A completely populated &struct vpd_blob, or NULL on error.
- */
-static const struct vpd_blob *
-vpd_blob_generate(const char * (*generator)(void *, size_t *), void *arg)
-{
- struct vpd_blob *blob;
- const char *buf;
- size_t len;
- int err = 0;
-
- blob = kzalloc(sizeof(*blob), GFP_KERNEL_ACCOUNT);
- if (!blob)
- return NULL;
-
- while (err == 0 && (buf = generator(arg, &len)))
- err = vpd_blob_extend(blob, buf, len);
-
- if (err != 0 || !vpd_blob_has_data(blob))
- goto free_blob;
-
- return blob;
-free_blob:
- vpd_blob_free(blob);
- return NULL;
-}
-
-/*
* Internal VPD sequence APIs. A VPD sequence is a series of calls to
* ibm,get-vpd for a given location code. The sequence ends when an
* error is encountered or all VPD for the location code has been
@@ -211,30 +119,14 @@ free_blob:
*/
/**
- * struct vpd_sequence - State for managing a VPD sequence.
- * @error: Shall be zero as long as the sequence has not encountered an error,
- * -ve errno otherwise. Use vpd_sequence_set_err() to update this.
- * @params: Parameter block to pass to rtas_ibm_get_vpd().
- */
-struct vpd_sequence {
- int error;
- struct rtas_ibm_get_vpd_params params;
-};
-
-/**
* vpd_sequence_begin() - Begin a VPD retrieval sequence.
- * @seq: Uninitialized sequence state.
- * @loc_code: Location code that defines the scope of the VPD to return.
- *
- * Initializes @seq with the resources necessary to carry out a VPD
- * sequence. Callers must pass @seq to vpd_sequence_end() regardless
- * of whether the sequence succeeds.
+ * @seq: vpd call parameters from sequence struct
*
* Context: May sleep.
*/
-static void vpd_sequence_begin(struct vpd_sequence *seq,
- const struct papr_location_code *loc_code)
+static void vpd_sequence_begin(struct papr_rtas_sequence *seq)
{
+ struct rtas_ibm_get_vpd_params *vpd_params;
/*
* Use a static data structure for the location code passed to
* RTAS to ensure it's in the RMA and avoid a separate work
@@ -242,6 +134,7 @@ static void vpd_sequence_begin(struct vpd_sequence *seq,
*/
static struct papr_location_code static_loc_code;
+ vpd_params = (struct rtas_ibm_get_vpd_params *)seq->params;
/*
* We could allocate the work area before acquiring the
* function lock, but that would allow concurrent requests to
@@ -249,14 +142,12 @@ static void vpd_sequence_begin(struct vpd_sequence *seq,
* allocate the work area under the lock.
*/
mutex_lock(&rtas_ibm_get_vpd_lock);
- static_loc_code = *loc_code;
- *seq = (struct vpd_sequence) {
- .params = {
- .work_area = rtas_work_area_alloc(SZ_4K),
- .loc_code = &static_loc_code,
- .sequence = 1,
- },
- };
+ static_loc_code = *(struct papr_location_code *)vpd_params->loc_code;
+ vpd_params = (struct rtas_ibm_get_vpd_params *)seq->params;
+ vpd_params->work_area = rtas_work_area_alloc(SZ_4K);
+ vpd_params->loc_code = &static_loc_code;
+ vpd_params->sequence = 1;
+ vpd_params->status = 0;
}
/**
@@ -265,180 +156,39 @@ static void vpd_sequence_begin(struct vpd_sequence *seq,
*
* Releases resources obtained by vpd_sequence_begin().
*/
-static void vpd_sequence_end(struct vpd_sequence *seq)
+static void vpd_sequence_end(struct papr_rtas_sequence *seq)
{
- rtas_work_area_free(seq->params.work_area);
- mutex_unlock(&rtas_ibm_get_vpd_lock);
-}
-
-/**
- * vpd_sequence_should_stop() - Determine whether a VPD retrieval sequence
- * should continue.
- * @seq: VPD sequence state.
- *
- * Examines the sequence error state and outputs of the last call to
- * ibm,get-vpd to determine whether the sequence in progress should
- * continue or stop.
- *
- * Return: True if the sequence has encountered an error or if all VPD for
- * this sequence has been retrieved. False otherwise.
- */
-static bool vpd_sequence_should_stop(const struct vpd_sequence *seq)
-{
- bool done;
-
- if (seq->error)
- return true;
+ struct rtas_ibm_get_vpd_params *vpd_params;
- switch (seq->params.status) {
- case 0:
- if (seq->params.written == 0)
- done = false; /* Initial state. */
- else
- done = true; /* All data consumed. */
- break;
- case 1:
- done = false; /* More data available. */
- break;
- default:
- done = true; /* Error encountered. */
- break;
- }
-
- return done;
-}
-
-static int vpd_sequence_set_err(struct vpd_sequence *seq, int err)
-{
- /* Preserve the first error recorded. */
- if (seq->error == 0)
- seq->error = err;
-
- return seq->error;
+ vpd_params = (struct rtas_ibm_get_vpd_params *)seq->params;
+ rtas_work_area_free(vpd_params->work_area);
+ mutex_unlock(&rtas_ibm_get_vpd_lock);
}
/*
- * Generator function to be passed to vpd_blob_generate().
+ * Generator function to be passed to papr_rtas_blob_generate().
*/
-static const char *vpd_sequence_fill_work_area(void *arg, size_t *len)
+static const char *vpd_sequence_fill_work_area(struct papr_rtas_sequence *seq,
+ size_t *len)
{
- struct vpd_sequence *seq = arg;
- struct rtas_ibm_get_vpd_params *p = &seq->params;
+ struct rtas_ibm_get_vpd_params *p;
+ bool init_state;
- if (vpd_sequence_should_stop(seq))
+ p = (struct rtas_ibm_get_vpd_params *)seq->params;
+ init_state = (p->written == 0) ? true : false;
+
+ if (papr_rtas_sequence_should_stop(seq, p->status, init_state))
return NULL;
- if (vpd_sequence_set_err(seq, rtas_ibm_get_vpd(p)))
+ if (papr_rtas_sequence_set_err(seq, rtas_ibm_get_vpd(p)))
return NULL;
*len = p->written;
return rtas_work_area_raw_buf(p->work_area);
}
-/*
- * Higher-level VPD retrieval code below. These functions use the
- * vpd_blob_* and vpd_sequence_* APIs defined above to create fd-based
- * VPD handles for consumption by user space.
- */
-
-/**
- * papr_vpd_run_sequence() - Run a single VPD retrieval sequence.
- * @loc_code: Location code that defines the scope of VPD to return.
- *
- * Context: May sleep. Holds a mutex and an RTAS work area for its
- * duration. Typically performs multiple sleepable slab
- * allocations.
- *
- * Return: A populated &struct vpd_blob on success. Encoded error
- * pointer otherwise.
- */
-static const struct vpd_blob *papr_vpd_run_sequence(const struct papr_location_code *loc_code)
-{
- const struct vpd_blob *blob;
- struct vpd_sequence seq;
-
- vpd_sequence_begin(&seq, loc_code);
- blob = vpd_blob_generate(vpd_sequence_fill_work_area, &seq);
- if (!blob)
- vpd_sequence_set_err(&seq, -ENOMEM);
- vpd_sequence_end(&seq);
-
- if (seq.error) {
- vpd_blob_free(blob);
- return ERR_PTR(seq.error);
- }
-
- return blob;
-}
-
-/**
- * papr_vpd_retrieve() - Return the VPD for a location code.
- * @loc_code: Location code that defines the scope of VPD to return.
- *
- * Run VPD sequences against @loc_code until a blob is successfully
- * instantiated, or a hard error is encountered, or a fatal signal is
- * pending.
- *
- * Context: May sleep.
- * Return: A fully populated VPD blob when successful. Encoded error
- * pointer otherwise.
- */
-static const struct vpd_blob *papr_vpd_retrieve(const struct papr_location_code *loc_code)
-{
- const struct vpd_blob *blob;
-
- /*
- * EAGAIN means the sequence errored with a -4 (VPD changed)
- * status from ibm,get-vpd, and we should attempt a new
- * sequence. PAPR+ v2.13 R1–7.3.20–5 indicates that this
- * should be a transient condition, not something that happens
- * continuously. But we'll stop trying on a fatal signal.
- */
- do {
- blob = papr_vpd_run_sequence(loc_code);
- if (!IS_ERR(blob)) /* Success. */
- break;
- if (PTR_ERR(blob) != -EAGAIN) /* Hard error. */
- break;
- pr_info_ratelimited("VPD changed during retrieval, retrying\n");
- cond_resched();
- } while (!fatal_signal_pending(current));
-
- return blob;
-}
-
-static ssize_t papr_vpd_handle_read(struct file *file, char __user *buf, size_t size, loff_t *off)
-{
- const struct vpd_blob *blob = file->private_data;
-
- /* bug: we should not instantiate a handle without any data attached. */
- if (!vpd_blob_has_data(blob)) {
- pr_err_once("handle without data\n");
- return -EIO;
- }
-
- return simple_read_from_buffer(buf, size, off, blob->data, blob->len);
-}
-
-static int papr_vpd_handle_release(struct inode *inode, struct file *file)
-{
- const struct vpd_blob *blob = file->private_data;
-
- vpd_blob_free(blob);
-
- return 0;
-}
-
-static loff_t papr_vpd_handle_seek(struct file *file, loff_t off, int whence)
-{
- const struct vpd_blob *blob = file->private_data;
-
- return fixed_size_llseek(file, off, whence, blob->len);
-}
-
-
static const struct file_operations papr_vpd_handle_ops = {
- .read = papr_vpd_handle_read,
- .llseek = papr_vpd_handle_seek,
- .release = papr_vpd_handle_release,
+ .read = papr_rtas_common_handle_read,
+ .llseek = papr_rtas_common_handle_seek,
+ .release = papr_rtas_common_handle_release,
};
/**
@@ -460,10 +210,9 @@ static const struct file_operations papr_vpd_handle_ops = {
*/
static long papr_vpd_create_handle(struct papr_location_code __user *ulc)
{
+ struct rtas_ibm_get_vpd_params vpd_params = {};
+ struct papr_rtas_sequence seq = {};
struct papr_location_code klc;
- const struct vpd_blob *blob;
- struct file *file;
- long err;
int fd;
if (copy_from_user(&klc, ulc, sizeof(klc)))
@@ -472,30 +221,19 @@ static long papr_vpd_create_handle(struct papr_location_code __user *ulc)
if (!string_is_terminated(klc.str, ARRAY_SIZE(klc.str)))
return -EINVAL;
- blob = papr_vpd_retrieve(&klc);
- if (IS_ERR(blob))
- return PTR_ERR(blob);
+ seq = (struct papr_rtas_sequence) {
+ .begin = vpd_sequence_begin,
+ .end = vpd_sequence_end,
+ .work = vpd_sequence_fill_work_area,
+ };
- fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
- if (fd < 0) {
- err = fd;
- goto free_blob;
- }
+ vpd_params.loc_code = &klc;
+ seq.params = (void *)&vpd_params;
+
+ fd = papr_rtas_setup_file_interface(&seq, &papr_vpd_handle_ops,
+ "[papr-vpd]");
- file = anon_inode_getfile_fmode("[papr-vpd]", &papr_vpd_handle_ops,
- (void *)blob, O_RDONLY,
- FMODE_LSEEK | FMODE_PREAD);
- if (IS_ERR(file)) {
- err = PTR_ERR(file);
- goto put_fd;
- }
- fd_install(fd, file);
return fd;
-put_fd:
- put_unused_fd(fd);
-free_blob:
- vpd_blob_free(blob);
- return err;
}
/*
diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c
index e14493685fe8..4a59ed1d62ce 100644
--- a/arch/powerpc/sysdev/cpm2_pic.c
+++ b/arch/powerpc/sysdev/cpm2_pic.c
@@ -207,7 +207,7 @@ unsigned int cpm2_get_irq(void)
if (irq == 0)
return(-1);
- return irq_linear_revmap(cpm2_pic_host, irq);
+ return irq_find_mapping(cpm2_pic_host, irq);
}
static int cpm2_pic_host_map(struct irq_domain *h, unsigned int virq,
@@ -259,7 +259,8 @@ void cpm2_pic_init(struct device_node *node)
out_be32(&cpm2_intctl->ic_scprrl, 0x05309770);
/* create a legacy host */
- cpm2_pic_host = irq_domain_add_linear(node, 64, &cpm2_pic_host_ops, NULL);
+ cpm2_pic_host = irq_domain_create_linear(of_fwnode_handle(node), 64,
+ &cpm2_pic_host_ops, NULL);
if (cpm2_pic_host == NULL) {
printk(KERN_ERR "CPM2 PIC: failed to allocate irq host!\n");
return;
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 47db732981a8..e22fc638dbc7 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -138,7 +138,7 @@ static void __cpm2_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask,
out_be32(&iop->dat, cpm2_gc->cpdata);
}
-static void cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
+static int cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(gc);
@@ -150,6 +150,8 @@ static void cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
__cpm2_gpio32_set(mm_gc, pin_mask, value);
spin_unlock_irqrestore(&cpm2_gc->lock, flags);
+
+ return 0;
}
static int cpm2_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
@@ -208,7 +210,7 @@ int cpm2_gpiochip_add32(struct device *dev)
gc->direction_input = cpm2_gpio32_dir_in;
gc->direction_output = cpm2_gpio32_dir_out;
gc->get = cpm2_gpio32_get;
- gc->set = cpm2_gpio32_set;
+ gc->set_rv = cpm2_gpio32_set;
gc->parent = dev;
gc->owner = THIS_MODULE;
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
index fb502b72fca1..b6f9774038e1 100644
--- a/arch/powerpc/sysdev/ehv_pic.c
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -175,7 +175,7 @@ unsigned int ehv_pic_get_irq(void)
* this will also setup revmap[] in the slow path for the first
* time, next calls will always use fast path by indexing revmap
*/
- return irq_linear_revmap(global_ehv_pic->irqhost, irq);
+ return irq_find_mapping(global_ehv_pic->irqhost, irq);
}
static int ehv_pic_host_match(struct irq_domain *h, struct device_node *node,
@@ -269,8 +269,9 @@ void __init ehv_pic_init(void)
return;
}
- ehv_pic->irqhost = irq_domain_add_linear(np, NR_EHV_PIC_INTS,
- &ehv_pic_host_ops, ehv_pic);
+ ehv_pic->irqhost = irq_domain_create_linear(of_fwnode_handle(np),
+ NR_EHV_PIC_INTS,
+ &ehv_pic_host_ops, ehv_pic);
if (!ehv_pic->irqhost) {
of_node_put(np);
kfree(ehv_pic);
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 7b9a5ea9cad9..4fe8a7b1b288 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -412,7 +412,7 @@ static int fsl_of_msi_probe(struct platform_device *dev)
}
platform_set_drvdata(dev, msi);
- msi->irqhost = irq_domain_add_linear(dev->dev.of_node,
+ msi->irqhost = irq_domain_create_linear(of_fwnode_handle(dev->dev.of_node),
NR_MSI_IRQS_MAX, &fsl_msi_host_ops, msi);
if (msi->irqhost == NULL) {
diff --git a/arch/powerpc/sysdev/ge/ge_pic.c b/arch/powerpc/sysdev/ge/ge_pic.c
index a6c424680c37..0bc3f0b36528 100644
--- a/arch/powerpc/sysdev/ge/ge_pic.c
+++ b/arch/powerpc/sysdev/ge/ge_pic.c
@@ -214,8 +214,9 @@ void __init gef_pic_init(struct device_node *np)
}
/* Setup an irq_domain structure */
- gef_pic_irq_host = irq_domain_add_linear(np, GEF_PIC_NUM_IRQS,
- &gef_pic_host_ops, NULL);
+ gef_pic_irq_host = irq_domain_create_linear(of_fwnode_handle(np),
+ GEF_PIC_NUM_IRQS,
+ &gef_pic_host_ops, NULL);
if (gef_pic_irq_host == NULL)
return;
@@ -244,7 +245,7 @@ unsigned int gef_pic_get_irq(void)
if (active & (0x1 << hwirq))
break;
}
- virq = irq_linear_revmap(gef_pic_irq_host,
+ virq = irq_find_mapping(gef_pic_irq_host,
(irq_hw_number_t)hwirq);
}
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 06e391485da7..99bb2b916949 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -260,8 +260,8 @@ void i8259_init(struct device_node *node, unsigned long intack_addr)
raw_spin_unlock_irqrestore(&i8259_lock, flags);
/* create a legacy host */
- i8259_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
- &i8259_host_ops, NULL);
+ i8259_host = irq_domain_create_legacy(of_fwnode_handle(node), NR_IRQS_LEGACY, 0, 0,
+ &i8259_host_ops, NULL);
if (i8259_host == NULL) {
printk(KERN_ERR "i8259: failed to allocate irq host !\n");
return;
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index a35be0232978..70be2105865d 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -711,8 +711,9 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags)
if (ipic == NULL)
return NULL;
- ipic->irqhost = irq_domain_add_linear(node, NR_IPIC_INTS,
- &ipic_host_ops, ipic);
+ ipic->irqhost = irq_domain_create_linear(of_fwnode_handle(node),
+ NR_IPIC_INTS,
+ &ipic_host_ops, ipic);
if (ipic->irqhost == NULL) {
kfree(ipic);
return NULL;
@@ -800,7 +801,7 @@ unsigned int ipic_get_irq(void)
if (irq == 0) /* 0 --> no irq is pending */
return 0;
- return irq_linear_revmap(primary_ipic->irqhost, irq);
+ return irq_find_mapping(primary_ipic->irqhost, irq);
}
#ifdef CONFIG_SUSPEND
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 4afbab83a2e2..ad7310bba00b 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -27,6 +27,7 @@
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/slab.h>
+#include <linux/string_choices.h>
#include <linux/syscore_ops.h>
#include <linux/ratelimit.h>
#include <linux/pgtable.h>
@@ -474,9 +475,9 @@ static void __init mpic_scan_ht_msi(struct mpic *mpic, u8 __iomem *devbase,
addr = addr | ((u64)readl(base + HT_MSI_ADDR_HI) << 32);
}
- printk(KERN_DEBUG "mpic: - HT:%02x.%x %s MSI mapping found @ 0x%llx\n",
- PCI_SLOT(devfn), PCI_FUNC(devfn),
- flags & HT_MSI_FLAGS_ENABLE ? "enabled" : "disabled", addr);
+ pr_debug("mpic: - HT:%02x.%x %s MSI mapping found @ 0x%llx\n",
+ PCI_SLOT(devfn), PCI_FUNC(devfn),
+ str_enabled_disabled(flags & HT_MSI_FLAGS_ENABLE), addr);
if (!(flags & HT_MSI_FLAGS_ENABLE))
writeb(flags | HT_MSI_FLAGS_ENABLE, base + HT_MSI_FLAGS);
@@ -1483,9 +1484,9 @@ struct mpic * __init mpic_alloc(struct device_node *node,
mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1);
mpic->isu_mask = (1 << mpic->isu_shift) - 1;
- mpic->irqhost = irq_domain_add_linear(mpic->node,
- intvec_top,
- &mpic_host_ops, mpic);
+ mpic->irqhost = irq_domain_create_linear(of_fwnode_handle(mpic->node),
+ intvec_top,
+ &mpic_host_ops, mpic);
/*
* FIXME: The code leaks the MPIC object and mappings here; this
@@ -1785,7 +1786,7 @@ static unsigned int _mpic_get_one_irq(struct mpic *mpic, int reg)
return 0;
}
- return irq_linear_revmap(mpic->irqhost, src);
+ return irq_find_mapping(mpic->irqhost, src);
}
unsigned int mpic_get_one_irq(struct mpic *mpic)
@@ -1823,7 +1824,7 @@ unsigned int mpic_get_coreint_irq(void)
return 0;
}
- return irq_linear_revmap(mpic->irqhost, src);
+ return irq_find_mapping(mpic->irqhost, src);
#else
return 0;
#endif
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 0e42f7bad7db..07d0f6a83879 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -404,8 +404,8 @@ void __init tsi108_pci_int_init(struct device_node *node)
{
DBG("Tsi108_pci_int_init: initializing PCI interrupts\n");
- pci_irq_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
- &pci_irq_domain_ops, NULL);
+ pci_irq_host = irq_domain_create_legacy(of_fwnode_handle(node), NR_IRQS_LEGACY, 0, 0,
+ &pci_irq_domain_ops, NULL);
if (pci_irq_host == NULL) {
printk(KERN_ERR "pci_irq_host: failed to allocate irq domain!\n");
return;
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index dc2e61837396..f10592405024 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1464,7 +1464,7 @@ static const struct irq_domain_ops xive_irq_domain_ops = {
static void __init xive_init_host(struct device_node *np)
{
- xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL);
+ xive_irq_domain = irq_domain_create_tree(of_fwnode_handle(np), &xive_irq_domain_ops, NULL);
if (WARN_ON(xive_irq_domain == NULL))
return;
irq_set_default_domain(xive_irq_domain);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 88abffa8b54c..cb3a3244ae6f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1770,7 +1770,7 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr,
sp + STACK_INT_FRAME_REGS);
break;
}
- printf("--- Exception: %lx %s at ", regs.trap,
+ printf("---- Exception: %lx %s at ", regs.trap,
getvecname(TRAP(&regs)));
pc = regs.nip;
lr = regs.link;
diff --git a/arch/riscv/boot/dts/sophgo/cv18xx.dtsi b/arch/riscv/boot/dts/sophgo/cv18xx.dtsi
index c18822ec849f..58cd546392e0 100644
--- a/arch/riscv/boot/dts/sophgo/cv18xx.dtsi
+++ b/arch/riscv/boot/dts/sophgo/cv18xx.dtsi
@@ -341,7 +341,7 @@
1024 1024 1024 1024>;
snps,priority = <0 1 2 3 4 5 6 7>;
snps,dma-masters = <2>;
- snps,data-width = <4>;
+ snps,data-width = <2>;
status = "disabled";
};
diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig
index c67095a3d669..cd9b776602f8 100644
--- a/arch/riscv/crypto/Kconfig
+++ b/arch/riscv/crypto/Kconfig
@@ -18,16 +18,6 @@ config CRYPTO_AES_RISCV64
- Zvkb vector crypto extension (CTR)
- Zvkg vector crypto extension (XTS)
-config CRYPTO_CHACHA_RISCV64
- tristate "Ciphers: ChaCha"
- depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
- select CRYPTO_SKCIPHER
- help
- Length-preserving ciphers: ChaCha20 stream cipher algorithm
-
- Architecture: riscv64 using:
- - Zvkb vector crypto extension
-
config CRYPTO_GHASH_RISCV64
tristate "Hash functions: GHASH"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
@@ -38,17 +28,6 @@ config CRYPTO_GHASH_RISCV64
Architecture: riscv64 using:
- Zvkg vector crypto extension
-config CRYPTO_SHA256_RISCV64
- tristate "Hash functions: SHA-224 and SHA-256"
- depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
- select CRYPTO_SHA256
- help
- SHA-224 and SHA-256 secure hash algorithm (FIPS 180)
-
- Architecture: riscv64 using:
- - Zvknha or Zvknhb vector crypto extensions
- - Zvkb vector crypto extension
-
config CRYPTO_SHA512_RISCV64
tristate "Hash functions: SHA-384 and SHA-512"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
@@ -64,7 +43,7 @@ config CRYPTO_SM3_RISCV64
tristate "Hash functions: SM3 (ShangMi 3)"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
select CRYPTO_HASH
- select CRYPTO_SM3
+ select CRYPTO_LIB_SM3
help
SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012)
diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile
index 247c7bc7288c..e10e8257734e 100644
--- a/arch/riscv/crypto/Makefile
+++ b/arch/riscv/crypto/Makefile
@@ -4,15 +4,9 @@ obj-$(CONFIG_CRYPTO_AES_RISCV64) += aes-riscv64.o
aes-riscv64-y := aes-riscv64-glue.o aes-riscv64-zvkned.o \
aes-riscv64-zvkned-zvbb-zvkg.o aes-riscv64-zvkned-zvkb.o
-obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
-chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
-
obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
-obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
-sha256-riscv64-y := sha256-riscv64-glue.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
-
obj-$(CONFIG_CRYPTO_SHA512_RISCV64) += sha512-riscv64.o
sha512-riscv64-y := sha512-riscv64-glue.o sha512-riscv64-zvknhb-zvkb.o
diff --git a/arch/riscv/crypto/chacha-riscv64-glue.c b/arch/riscv/crypto/chacha-riscv64-glue.c
deleted file mode 100644
index 10b46f36375a..000000000000
--- a/arch/riscv/crypto/chacha-riscv64-glue.c
+++ /dev/null
@@ -1,101 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ChaCha20 using the RISC-V vector crypto extensions
- *
- * Copyright (C) 2023 SiFive, Inc.
- * Author: Jerry Shih <jerry.shih@sifive.com>
- */
-
-#include <asm/simd.h>
-#include <asm/vector.h>
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/linkage.h>
-#include <linux/module.h>
-
-asmlinkage void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out,
- size_t len, const u32 iv[4]);
-
-static int riscv64_chacha20_crypt(struct skcipher_request *req)
-{
- u32 iv[CHACHA_IV_SIZE / sizeof(u32)];
- u8 block_buffer[CHACHA_BLOCK_SIZE];
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- unsigned int tail_bytes;
- int err;
-
- iv[0] = get_unaligned_le32(req->iv);
- iv[1] = get_unaligned_le32(req->iv + 4);
- iv[2] = get_unaligned_le32(req->iv + 8);
- iv[3] = get_unaligned_le32(req->iv + 12);
-
- err = skcipher_walk_virt(&walk, req, false);
- while (walk.nbytes) {
- nbytes = walk.nbytes & ~(CHACHA_BLOCK_SIZE - 1);
- tail_bytes = walk.nbytes & (CHACHA_BLOCK_SIZE - 1);
- kernel_vector_begin();
- if (nbytes) {
- chacha20_zvkb(ctx->key, walk.src.virt.addr,
- walk.dst.virt.addr, nbytes, iv);
- iv[0] += nbytes / CHACHA_BLOCK_SIZE;
- }
- if (walk.nbytes == walk.total && tail_bytes > 0) {
- memcpy(block_buffer, walk.src.virt.addr + nbytes,
- tail_bytes);
- chacha20_zvkb(ctx->key, block_buffer, block_buffer,
- CHACHA_BLOCK_SIZE, iv);
- memcpy(walk.dst.virt.addr + nbytes, block_buffer,
- tail_bytes);
- tail_bytes = 0;
- }
- kernel_vector_end();
-
- err = skcipher_walk_done(&walk, tail_bytes);
- }
-
- return err;
-}
-
-static struct skcipher_alg riscv64_chacha_alg = {
- .setkey = chacha20_setkey,
- .encrypt = riscv64_chacha20_crypt,
- .decrypt = riscv64_chacha20_crypt,
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .base = {
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct chacha_ctx),
- .cra_priority = 300,
- .cra_name = "chacha20",
- .cra_driver_name = "chacha20-riscv64-zvkb",
- .cra_module = THIS_MODULE,
- },
-};
-
-static int __init riscv64_chacha_mod_init(void)
-{
- if (riscv_isa_extension_available(NULL, ZVKB) &&
- riscv_vector_vlen() >= 128)
- return crypto_register_skcipher(&riscv64_chacha_alg);
-
- return -ENODEV;
-}
-
-static void __exit riscv64_chacha_mod_exit(void)
-{
- crypto_unregister_skcipher(&riscv64_chacha_alg);
-}
-
-module_init(riscv64_chacha_mod_init);
-module_exit(riscv64_chacha_mod_exit);
-
-MODULE_DESCRIPTION("ChaCha20 (RISC-V accelerated)");
-MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/riscv/crypto/ghash-riscv64-glue.c b/arch/riscv/crypto/ghash-riscv64-glue.c
index 312e7891fd0a..d86073d25387 100644
--- a/arch/riscv/crypto/ghash-riscv64-glue.c
+++ b/arch/riscv/crypto/ghash-riscv64-glue.c
@@ -11,11 +11,16 @@
#include <asm/simd.h>
#include <asm/vector.h>
+#include <crypto/b128ops.h>
+#include <crypto/gf128mul.h>
#include <crypto/ghash.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
-#include <linux/linkage.h>
+#include <crypto/utils.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/string.h>
asmlinkage void ghash_zvkg(be128 *accumulator, const be128 *key, const u8 *data,
size_t len);
@@ -26,8 +31,6 @@ struct riscv64_ghash_tfm_ctx {
struct riscv64_ghash_desc_ctx {
be128 accumulator;
- u8 buffer[GHASH_BLOCK_SIZE];
- u32 bytes;
};
static int riscv64_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
@@ -78,50 +81,24 @@ static int riscv64_ghash_update(struct shash_desc *desc, const u8 *src,
{
const struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- unsigned int len;
-
- if (dctx->bytes) {
- if (dctx->bytes + srclen < GHASH_BLOCK_SIZE) {
- memcpy(dctx->buffer + dctx->bytes, src, srclen);
- dctx->bytes += srclen;
- return 0;
- }
- memcpy(dctx->buffer + dctx->bytes, src,
- GHASH_BLOCK_SIZE - dctx->bytes);
- riscv64_ghash_blocks(tctx, dctx, dctx->buffer,
- GHASH_BLOCK_SIZE);
- src += GHASH_BLOCK_SIZE - dctx->bytes;
- srclen -= GHASH_BLOCK_SIZE - dctx->bytes;
- dctx->bytes = 0;
- }
-
- len = round_down(srclen, GHASH_BLOCK_SIZE);
- if (len) {
- riscv64_ghash_blocks(tctx, dctx, src, len);
- src += len;
- srclen -= len;
- }
- if (srclen) {
- memcpy(dctx->buffer, src, srclen);
- dctx->bytes = srclen;
- }
-
- return 0;
+ riscv64_ghash_blocks(tctx, dctx, src,
+ round_down(srclen, GHASH_BLOCK_SIZE));
+ return srclen - round_down(srclen, GHASH_BLOCK_SIZE);
}
-static int riscv64_ghash_final(struct shash_desc *desc, u8 *out)
+static int riscv64_ghash_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
const struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- int i;
- if (dctx->bytes) {
- for (i = dctx->bytes; i < GHASH_BLOCK_SIZE; i++)
- dctx->buffer[i] = 0;
+ if (len) {
+ u8 buf[GHASH_BLOCK_SIZE] = {};
- riscv64_ghash_blocks(tctx, dctx, dctx->buffer,
- GHASH_BLOCK_SIZE);
+ memcpy(buf, src, len);
+ riscv64_ghash_blocks(tctx, dctx, buf, GHASH_BLOCK_SIZE);
+ memzero_explicit(buf, sizeof(buf));
}
memcpy(out, &dctx->accumulator, GHASH_DIGEST_SIZE);
@@ -131,7 +108,7 @@ static int riscv64_ghash_final(struct shash_desc *desc, u8 *out)
static struct shash_alg riscv64_ghash_alg = {
.init = riscv64_ghash_init,
.update = riscv64_ghash_update,
- .final = riscv64_ghash_final,
+ .finup = riscv64_ghash_finup,
.setkey = riscv64_ghash_setkey,
.descsize = sizeof(struct riscv64_ghash_desc_ctx),
.digestsize = GHASH_DIGEST_SIZE,
@@ -139,6 +116,7 @@ static struct shash_alg riscv64_ghash_alg = {
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct riscv64_ghash_tfm_ctx),
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_name = "ghash",
.cra_driver_name = "ghash-riscv64-zvkg",
.cra_module = THIS_MODULE,
diff --git a/arch/riscv/crypto/sha256-riscv64-glue.c b/arch/riscv/crypto/sha256-riscv64-glue.c
deleted file mode 100644
index 71e051e40a64..000000000000
--- a/arch/riscv/crypto/sha256-riscv64-glue.c
+++ /dev/null
@@ -1,137 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SHA-256 and SHA-224 using the RISC-V vector crypto extensions
- *
- * Copyright (C) 2022 VRULL GmbH
- * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
- *
- * Copyright (C) 2023 SiFive, Inc.
- * Author: Jerry Shih <jerry.shih@sifive.com>
- */
-
-#include <asm/simd.h>
-#include <asm/vector.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <crypto/sha256_base.h>
-#include <linux/linkage.h>
-#include <linux/module.h>
-
-/*
- * Note: the asm function only uses the 'state' field of struct sha256_state.
- * It is assumed to be the first field.
- */
-asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb(
- struct sha256_state *state, const u8 *data, int num_blocks);
-
-static int riscv64_sha256_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- /*
- * Ensure struct sha256_state begins directly with the SHA-256
- * 256-bit internal state, as this is what the asm function expects.
- */
- BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
-
- if (crypto_simd_usable()) {
- kernel_vector_begin();
- sha256_base_do_update(desc, data, len,
- sha256_transform_zvknha_or_zvknhb_zvkb);
- kernel_vector_end();
- } else {
- crypto_sha256_update(desc, data, len);
- }
- return 0;
-}
-
-static int riscv64_sha256_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- if (crypto_simd_usable()) {
- kernel_vector_begin();
- if (len)
- sha256_base_do_update(
- desc, data, len,
- sha256_transform_zvknha_or_zvknhb_zvkb);
- sha256_base_do_finalize(
- desc, sha256_transform_zvknha_or_zvknhb_zvkb);
- kernel_vector_end();
-
- return sha256_base_finish(desc, out);
- }
-
- return crypto_sha256_finup(desc, data, len, out);
-}
-
-static int riscv64_sha256_final(struct shash_desc *desc, u8 *out)
-{
- return riscv64_sha256_finup(desc, NULL, 0, out);
-}
-
-static int riscv64_sha256_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return sha256_base_init(desc) ?:
- riscv64_sha256_finup(desc, data, len, out);
-}
-
-static struct shash_alg riscv64_sha256_algs[] = {
- {
- .init = sha256_base_init,
- .update = riscv64_sha256_update,
- .final = riscv64_sha256_final,
- .finup = riscv64_sha256_finup,
- .digest = riscv64_sha256_digest,
- .descsize = sizeof(struct sha256_state),
- .digestsize = SHA256_DIGEST_SIZE,
- .base = {
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_priority = 300,
- .cra_name = "sha256",
- .cra_driver_name = "sha256-riscv64-zvknha_or_zvknhb-zvkb",
- .cra_module = THIS_MODULE,
- },
- }, {
- .init = sha224_base_init,
- .update = riscv64_sha256_update,
- .final = riscv64_sha256_final,
- .finup = riscv64_sha256_finup,
- .descsize = sizeof(struct sha256_state),
- .digestsize = SHA224_DIGEST_SIZE,
- .base = {
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_priority = 300,
- .cra_name = "sha224",
- .cra_driver_name = "sha224-riscv64-zvknha_or_zvknhb-zvkb",
- .cra_module = THIS_MODULE,
- },
- },
-};
-
-static int __init riscv64_sha256_mod_init(void)
-{
- /* Both zvknha and zvknhb provide the SHA-256 instructions. */
- if ((riscv_isa_extension_available(NULL, ZVKNHA) ||
- riscv_isa_extension_available(NULL, ZVKNHB)) &&
- riscv_isa_extension_available(NULL, ZVKB) &&
- riscv_vector_vlen() >= 128)
- return crypto_register_shashes(riscv64_sha256_algs,
- ARRAY_SIZE(riscv64_sha256_algs));
-
- return -ENODEV;
-}
-
-static void __exit riscv64_sha256_mod_exit(void)
-{
- crypto_unregister_shashes(riscv64_sha256_algs,
- ARRAY_SIZE(riscv64_sha256_algs));
-}
-
-module_init(riscv64_sha256_mod_init);
-module_exit(riscv64_sha256_mod_exit);
-
-MODULE_DESCRIPTION("SHA-256 (RISC-V accelerated)");
-MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("sha256");
-MODULE_ALIAS_CRYPTO("sha224");
diff --git a/arch/riscv/crypto/sha512-riscv64-glue.c b/arch/riscv/crypto/sha512-riscv64-glue.c
index 43b56a08aeb5..4634fca78ae2 100644
--- a/arch/riscv/crypto/sha512-riscv64-glue.c
+++ b/arch/riscv/crypto/sha512-riscv64-glue.c
@@ -14,7 +14,7 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sha512_base.h>
-#include <linux/linkage.h>
+#include <linux/kernel.h>
#include <linux/module.h>
/*
@@ -24,8 +24,8 @@
asmlinkage void sha512_transform_zvknhb_zvkb(
struct sha512_state *state, const u8 *data, int num_blocks);
-static int riscv64_sha512_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static void sha512_block(struct sha512_state *state, const u8 *data,
+ int num_blocks)
{
/*
* Ensure struct sha512_state begins directly with the SHA-512
@@ -35,35 +35,24 @@ static int riscv64_sha512_update(struct shash_desc *desc, const u8 *data,
if (crypto_simd_usable()) {
kernel_vector_begin();
- sha512_base_do_update(desc, data, len,
- sha512_transform_zvknhb_zvkb);
+ sha512_transform_zvknhb_zvkb(state, data, num_blocks);
kernel_vector_end();
} else {
- crypto_sha512_update(desc, data, len);
+ sha512_generic_block_fn(state, data, num_blocks);
}
- return 0;
}
-static int riscv64_sha512_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
+static int riscv64_sha512_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- if (crypto_simd_usable()) {
- kernel_vector_begin();
- if (len)
- sha512_base_do_update(desc, data, len,
- sha512_transform_zvknhb_zvkb);
- sha512_base_do_finalize(desc, sha512_transform_zvknhb_zvkb);
- kernel_vector_end();
-
- return sha512_base_finish(desc, out);
- }
-
- return crypto_sha512_finup(desc, data, len, out);
+ return sha512_base_do_update_blocks(desc, data, len, sha512_block);
}
-static int riscv64_sha512_final(struct shash_desc *desc, u8 *out)
+static int riscv64_sha512_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- return riscv64_sha512_finup(desc, NULL, 0, out);
+ sha512_base_do_finup(desc, data, len, sha512_block);
+ return sha512_base_finish(desc, out);
}
static int riscv64_sha512_digest(struct shash_desc *desc, const u8 *data,
@@ -77,14 +66,15 @@ static struct shash_alg riscv64_sha512_algs[] = {
{
.init = sha512_base_init,
.update = riscv64_sha512_update,
- .final = riscv64_sha512_final,
.finup = riscv64_sha512_finup,
.digest = riscv64_sha512_digest,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.digestsize = SHA512_DIGEST_SIZE,
.base = {
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_name = "sha512",
.cra_driver_name = "sha512-riscv64-zvknhb-zvkb",
.cra_module = THIS_MODULE,
@@ -92,13 +82,14 @@ static struct shash_alg riscv64_sha512_algs[] = {
}, {
.init = sha384_base_init,
.update = riscv64_sha512_update,
- .final = riscv64_sha512_final,
.finup = riscv64_sha512_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.digestsize = SHA384_DIGEST_SIZE,
.base = {
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_name = "sha384",
.cra_driver_name = "sha384-riscv64-zvknhb-zvkb",
.cra_module = THIS_MODULE,
diff --git a/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S b/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S
index 3a9ae210f915..89f4a10d12dd 100644
--- a/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S
+++ b/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S
@@ -43,7 +43,7 @@
// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknhb')
// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
-#include <linux/cfi_types.h>
+#include <linux/linkage.h>
.text
.option arch, +zvknhb, +zvkb
@@ -95,7 +95,7 @@
// void sha512_transform_zvknhb_zvkb(u64 state[8], const u8 *data,
// int num_blocks);
-SYM_TYPED_FUNC_START(sha512_transform_zvknhb_zvkb)
+SYM_FUNC_START(sha512_transform_zvknhb_zvkb)
// Setup mask for the vmerge to replace the first word (idx==0) in
// message scheduling. There are 4 words, so an 8-bit mask suffices.
diff --git a/arch/riscv/crypto/sm3-riscv64-glue.c b/arch/riscv/crypto/sm3-riscv64-glue.c
index e1737a970c7c..abdfe4a63a27 100644
--- a/arch/riscv/crypto/sm3-riscv64-glue.c
+++ b/arch/riscv/crypto/sm3-riscv64-glue.c
@@ -13,8 +13,9 @@
#include <asm/vector.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
+#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
-#include <linux/linkage.h>
+#include <linux/kernel.h>
#include <linux/module.h>
/*
@@ -24,8 +25,8 @@
asmlinkage void sm3_transform_zvksh_zvkb(
struct sm3_state *state, const u8 *data, int num_blocks);
-static int riscv64_sm3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static void sm3_block(struct sm3_state *state, const u8 *data,
+ int num_blocks)
{
/*
* Ensure struct sm3_state begins directly with the SM3
@@ -35,52 +36,36 @@ static int riscv64_sm3_update(struct shash_desc *desc, const u8 *data,
if (crypto_simd_usable()) {
kernel_vector_begin();
- sm3_base_do_update(desc, data, len, sm3_transform_zvksh_zvkb);
+ sm3_transform_zvksh_zvkb(state, data, num_blocks);
kernel_vector_end();
} else {
- sm3_update(shash_desc_ctx(desc), data, len);
+ sm3_block_generic(state, data, num_blocks);
}
- return 0;
}
-static int riscv64_sm3_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
+static int riscv64_sm3_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- struct sm3_state *ctx;
-
- if (crypto_simd_usable()) {
- kernel_vector_begin();
- if (len)
- sm3_base_do_update(desc, data, len,
- sm3_transform_zvksh_zvkb);
- sm3_base_do_finalize(desc, sm3_transform_zvksh_zvkb);
- kernel_vector_end();
-
- return sm3_base_finish(desc, out);
- }
-
- ctx = shash_desc_ctx(desc);
- if (len)
- sm3_update(ctx, data, len);
- sm3_final(ctx, out);
-
- return 0;
+ return sm3_base_do_update_blocks(desc, data, len, sm3_block);
}
-static int riscv64_sm3_final(struct shash_desc *desc, u8 *out)
+static int riscv64_sm3_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- return riscv64_sm3_finup(desc, NULL, 0, out);
+ sm3_base_do_finup(desc, data, len, sm3_block);
+ return sm3_base_finish(desc, out);
}
static struct shash_alg riscv64_sm3_alg = {
.init = sm3_base_init,
.update = riscv64_sm3_update,
- .final = riscv64_sm3_final,
.finup = riscv64_sm3_finup,
- .descsize = sizeof(struct sm3_state),
+ .descsize = SM3_STATE_SIZE,
.digestsize = SM3_DIGEST_SIZE,
.base = {
.cra_blocksize = SM3_BLOCK_SIZE,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_priority = 300,
.cra_name = "sm3",
.cra_driver_name = "sm3-riscv64-zvksh-zvkb",
diff --git a/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S b/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S
index a2b65d961c04..4fe754846f65 100644
--- a/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S
+++ b/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S
@@ -43,7 +43,7 @@
// - RISC-V Vector SM3 Secure Hash extension ('Zvksh')
// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
-#include <linux/cfi_types.h>
+#include <linux/linkage.h>
.text
.option arch, +zvksh, +zvkb
@@ -81,7 +81,7 @@
.endm
// void sm3_transform_zvksh_zvkb(u32 state[8], const u8 *data, int num_blocks);
-SYM_TYPED_FUNC_START(sm3_transform_zvksh_zvkb)
+SYM_FUNC_START(sm3_transform_zvksh_zvkb)
// Load the state and endian-swap each 32-bit word.
vsetivli zero, 8, e32, m2, ta, ma
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index cd627ec289f1..bfc8ea5f9319 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -52,6 +52,8 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
DECLARE_DO_ERROR_INFO(do_trap_break);
+asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs);
+asmlinkage void ret_from_fork_user(struct pt_regs *regs);
asmlinkage void handle_bad_stack(struct pt_regs *regs);
asmlinkage void do_page_fault(struct pt_regs *regs);
asmlinkage void do_irq(struct pt_regs *regs);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 33a5a9f2a0d4..0fb338000c6d 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -319,17 +319,21 @@ SYM_CODE_END(handle_kernel_stack_overflow)
ASM_NOKPROBE(handle_kernel_stack_overflow)
#endif
-SYM_CODE_START(ret_from_fork)
+SYM_CODE_START(ret_from_fork_kernel_asm)
+ call schedule_tail
+ move a0, s1 /* fn_arg */
+ move a1, s0 /* fn */
+ move a2, sp /* pt_regs */
+ call ret_from_fork_kernel
+ j ret_from_exception
+SYM_CODE_END(ret_from_fork_kernel_asm)
+
+SYM_CODE_START(ret_from_fork_user_asm)
call schedule_tail
- beqz s0, 1f /* not from kernel thread */
- /* Call fn(arg) */
- move a0, s1
- jalr s0
-1:
move a0, sp /* pt_regs */
- call syscall_exit_to_user_mode
+ call ret_from_fork_user
j ret_from_exception
-SYM_CODE_END(ret_from_fork)
+SYM_CODE_END(ret_from_fork_user_asm)
#ifdef CONFIG_IRQ_STACKS
/*
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 15d8f75902f8..bbf7ec6a75c0 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -17,7 +17,9 @@
#include <linux/ptrace.h>
#include <linux/uaccess.h>
#include <linux/personality.h>
+#include <linux/entry-common.h>
+#include <asm/asm-prototypes.h>
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/csr.h>
@@ -36,7 +38,8 @@ unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
-extern asmlinkage void ret_from_fork(void);
+extern asmlinkage void ret_from_fork_kernel_asm(void);
+extern asmlinkage void ret_from_fork_user_asm(void);
void noinstr arch_cpu_idle(void)
{
@@ -206,6 +209,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
+asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs)
+{
+ fn(fn_arg);
+
+ syscall_exit_to_user_mode(regs);
+}
+
+asmlinkage void ret_from_fork_user(struct pt_regs *regs)
+{
+ syscall_exit_to_user_mode(regs);
+}
+
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
unsigned long clone_flags = args->flags;
@@ -228,6 +243,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.s[0] = (unsigned long)args->fn;
p->thread.s[1] = (unsigned long)args->fn_arg;
+ p->thread.ra = (unsigned long)ret_from_fork_kernel_asm;
} else {
*childregs = *(current_pt_regs());
/* Turn off status.VS */
@@ -237,12 +253,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (clone_flags & CLONE_SETTLS)
childregs->tp = tls;
childregs->a0 = 0; /* Return value of fork() */
- p->thread.s[0] = 0;
+ p->thread.ra = (unsigned long)ret_from_fork_user_asm;
}
p->thread.riscv_v_flags = 0;
if (has_vector() || has_xtheadvector())
riscv_v_thread_alloc(p);
- p->thread.ra = (unsigned long)ret_from_fork;
p->thread.sp = (unsigned long)childregs; /* kernel sp */
return 0;
}
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index b1c46153606a..0baec92d2f55 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
+obj-y += crypto/
lib-y += delay.o
lib-y += memcpy.o
lib-y += memset.o
diff --git a/arch/riscv/lib/crypto/Kconfig b/arch/riscv/lib/crypto/Kconfig
new file mode 100644
index 000000000000..47c99ea97ce2
--- /dev/null
+++ b/arch/riscv/lib/crypto/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_CHACHA_RISCV64
+ tristate
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ default CRYPTO_LIB_CHACHA
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ select CRYPTO_LIB_CHACHA_GENERIC
+
+config CRYPTO_SHA256_RISCV64
+ tristate
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ default CRYPTO_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
+ select CRYPTO_LIB_SHA256_GENERIC
diff --git a/arch/riscv/lib/crypto/Makefile b/arch/riscv/lib/crypto/Makefile
new file mode 100644
index 000000000000..b7cb877a2c07
--- /dev/null
+++ b/arch/riscv/lib/crypto/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
+chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
+
+obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
+sha256-riscv64-y := sha256.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
diff --git a/arch/riscv/lib/crypto/chacha-riscv64-glue.c b/arch/riscv/lib/crypto/chacha-riscv64-glue.c
new file mode 100644
index 000000000000..8c3f11d79be3
--- /dev/null
+++ b/arch/riscv/lib/crypto/chacha-riscv64-glue.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ChaCha stream cipher (RISC-V optimized)
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/simd.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb);
+
+asmlinkage void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out,
+ size_t nblocks, int nrounds);
+
+void hchacha_block_arch(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds)
+{
+ hchacha_block_generic(state, out, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ u8 block_buffer[CHACHA_BLOCK_SIZE];
+ unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE;
+ unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE;
+
+ if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable())
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ kernel_vector_begin();
+ if (full_blocks) {
+ chacha_zvkb(state, src, dst, full_blocks, nrounds);
+ src += full_blocks * CHACHA_BLOCK_SIZE;
+ dst += full_blocks * CHACHA_BLOCK_SIZE;
+ }
+ if (tail_bytes) {
+ memcpy(block_buffer, src, tail_bytes);
+ chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds);
+ memcpy(dst, block_buffer, tail_bytes);
+ }
+ kernel_vector_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+bool chacha_is_arch_optimized(void)
+{
+ return static_key_enabled(&use_zvkb);
+}
+EXPORT_SYMBOL(chacha_is_arch_optimized);
+
+static int __init riscv64_chacha_mod_init(void)
+{
+ if (riscv_isa_extension_available(NULL, ZVKB) &&
+ riscv_vector_vlen() >= 128)
+ static_branch_enable(&use_zvkb);
+ return 0;
+}
+subsys_initcall(riscv64_chacha_mod_init);
+
+static void __exit riscv64_chacha_mod_exit(void)
+{
+}
+module_exit(riscv64_chacha_mod_exit);
+
+MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)");
+MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/riscv/crypto/chacha-riscv64-zvkb.S b/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S
index bf057737ac69..b777d0b4e379 100644
--- a/arch/riscv/crypto/chacha-riscv64-zvkb.S
+++ b/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S
@@ -46,11 +46,11 @@
.text
.option arch, +zvkb
-#define KEYP a0
+#define STATEP a0
#define INP a1
#define OUTP a2
-#define LEN a3
-#define IVP a4
+#define NBLOCKS a3
+#define NROUNDS a4
#define CONSTS0 a5
#define CONSTS1 a6
@@ -59,7 +59,7 @@
#define TMP t1
#define VL t2
#define STRIDE t3
-#define NROUNDS t4
+#define ROUND_CTR t4
#define KEY0 s0
#define KEY1 s1
#define KEY2 s2
@@ -132,14 +132,16 @@
vror.vi \b3, \b3, 32 - 7
.endm
-// void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out, size_t len,
-// const u32 iv[4]);
+// void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out,
+// size_t nblocks, int nrounds);
//
-// |len| must be nonzero and a multiple of 64 (CHACHA_BLOCK_SIZE).
-// The counter is treated as 32-bit, following the RFC7539 convention.
-SYM_FUNC_START(chacha20_zvkb)
- srli LEN, LEN, 6 // Bytes to blocks
-
+// |nblocks| is the number of 64-byte blocks to process, and must be nonzero.
+//
+// |state| gives the ChaCha state matrix, including the 32-bit counter in
+// state->x[12] following the RFC7539 convention; note that this differs from
+// the original Salsa20 paper which uses a 64-bit counter in state->x[12..13].
+// The updated 32-bit counter is written back to state->x[12] before returning.
+SYM_FUNC_START(chacha_zvkb)
addi sp, sp, -96
sd s0, 0(sp)
sd s1, 8(sp)
@@ -157,26 +159,26 @@ SYM_FUNC_START(chacha20_zvkb)
li STRIDE, 64
// Set up the initial state matrix in scalar registers.
- li CONSTS0, 0x61707865 // "expa" little endian
- li CONSTS1, 0x3320646e // "nd 3" little endian
- li CONSTS2, 0x79622d32 // "2-by" little endian
- li CONSTS3, 0x6b206574 // "te k" little endian
- lw KEY0, 0(KEYP)
- lw KEY1, 4(KEYP)
- lw KEY2, 8(KEYP)
- lw KEY3, 12(KEYP)
- lw KEY4, 16(KEYP)
- lw KEY5, 20(KEYP)
- lw KEY6, 24(KEYP)
- lw KEY7, 28(KEYP)
- lw COUNTER, 0(IVP)
- lw NONCE0, 4(IVP)
- lw NONCE1, 8(IVP)
- lw NONCE2, 12(IVP)
+ lw CONSTS0, 0(STATEP)
+ lw CONSTS1, 4(STATEP)
+ lw CONSTS2, 8(STATEP)
+ lw CONSTS3, 12(STATEP)
+ lw KEY0, 16(STATEP)
+ lw KEY1, 20(STATEP)
+ lw KEY2, 24(STATEP)
+ lw KEY3, 28(STATEP)
+ lw KEY4, 32(STATEP)
+ lw KEY5, 36(STATEP)
+ lw KEY6, 40(STATEP)
+ lw KEY7, 44(STATEP)
+ lw COUNTER, 48(STATEP)
+ lw NONCE0, 52(STATEP)
+ lw NONCE1, 56(STATEP)
+ lw NONCE2, 60(STATEP)
.Lblock_loop:
// Set vl to the number of blocks to process in this iteration.
- vsetvli VL, LEN, e32, m1, ta, ma
+ vsetvli VL, NBLOCKS, e32, m1, ta, ma
// Set up the initial state matrix for the next VL blocks in v0-v15.
// v{i} holds the i'th 32-bit word of the state matrix for all blocks.
@@ -203,16 +205,16 @@ SYM_FUNC_START(chacha20_zvkb)
// v{16+i} holds the i'th 32-bit word for all blocks.
vlsseg8e32.v v16, (INP), STRIDE
- li NROUNDS, 20
+ mv ROUND_CTR, NROUNDS
.Lnext_doubleround:
- addi NROUNDS, NROUNDS, -2
+ addi ROUND_CTR, ROUND_CTR, -2
// column round
chacha_round v0, v4, v8, v12, v1, v5, v9, v13, \
v2, v6, v10, v14, v3, v7, v11, v15
// diagonal round
chacha_round v0, v5, v10, v15, v1, v6, v11, v12, \
v2, v7, v8, v13, v3, v4, v9, v14
- bnez NROUNDS, .Lnext_doubleround
+ bnez ROUND_CTR, .Lnext_doubleround
// Load the second half of the input data for each block into v24-v31.
// v{24+i} holds the {8+i}'th 32-bit word for all blocks.
@@ -271,12 +273,13 @@ SYM_FUNC_START(chacha20_zvkb)
// Update the counter, the remaining number of blocks, and the input and
// output pointers according to the number of blocks processed (VL).
add COUNTER, COUNTER, VL
- sub LEN, LEN, VL
+ sub NBLOCKS, NBLOCKS, VL
slli TMP, VL, 6
add OUTP, OUTP, TMP
add INP, INP, TMP
- bnez LEN, .Lblock_loop
+ bnez NBLOCKS, .Lblock_loop
+ sw COUNTER, 48(STATEP)
ld s0, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
@@ -291,4 +294,4 @@ SYM_FUNC_START(chacha20_zvkb)
ld s11, 88(sp)
addi sp, sp, 96
ret
-SYM_FUNC_END(chacha20_zvkb)
+SYM_FUNC_END(chacha_zvkb)
diff --git a/arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S b/arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S
index 8ebcc17de4dc..fad501ad0617 100644
--- a/arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S
+++ b/arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S
@@ -43,7 +43,7 @@
// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb')
// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
-#include <linux/cfi_types.h>
+#include <linux/linkage.h>
.text
.option arch, +zvknha, +zvkb
@@ -106,9 +106,9 @@
sha256_4rounds \last, \k3, W3, W0, W1, W2
.endm
-// void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[8], const u8 *data,
-// int num_blocks);
-SYM_TYPED_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb)
+// void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[SHA256_STATE_WORDS],
+// const u8 *data, size_t nblocks);
+SYM_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb)
// Load the round constants into K0-K15.
vsetivli zero, 4, e32, m1, ta, ma
diff --git a/arch/riscv/lib/crypto/sha256.c b/arch/riscv/lib/crypto/sha256.c
new file mode 100644
index 000000000000..71808397dff4
--- /dev/null
+++ b/arch/riscv/lib/crypto/sha256.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256 (RISC-V accelerated)
+ *
+ * Copyright (C) 2022 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/vector.h>
+#include <crypto/internal/sha2.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb(
+ u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_extensions);
+
+void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ if (static_branch_likely(&have_extensions)) {
+ kernel_vector_begin();
+ sha256_transform_zvknha_or_zvknhb_zvkb(state, data, nblocks);
+ kernel_vector_end();
+ } else {
+ sha256_blocks_generic(state, data, nblocks);
+ }
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_simd);
+
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ sha256_blocks_generic(state, data, nblocks);
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
+
+bool sha256_is_arch_optimized(void)
+{
+ return static_key_enabled(&have_extensions);
+}
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
+
+static int __init riscv64_sha256_mod_init(void)
+{
+ /* Both zvknha and zvknhb provide the SHA-256 instructions. */
+ if ((riscv_isa_extension_available(NULL, ZVKNHA) ||
+ riscv_isa_extension_available(NULL, ZVKNHB)) &&
+ riscv_isa_extension_available(NULL, ZVKB) &&
+ riscv_vector_vlen() >= 128)
+ static_branch_enable(&have_extensions);
+ return 0;
+}
+subsys_initcall(riscv64_sha256_mod_init);
+
+static void __exit riscv64_sha256_mod_exit(void)
+{
+}
+module_exit(riscv64_sha256_mod_exit);
+
+MODULE_DESCRIPTION("SHA-256 (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 99fb986fca6e..0c16dc443e2f 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -146,6 +146,7 @@ config S390
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_IPC_PARSE_VERSION
+ select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_KERNEL_PMD_MKWRITE
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index d04e9b89d14a..f584d7da29cb 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -179,7 +179,7 @@ void setup_boot_command_line(void)
if (has_ebcdic_char(parmarea.command_line))
EBCASC(parmarea.command_line, COMMAND_LINE_SIZE);
/* copy arch command line */
- strcpy(early_command_line, strim(parmarea.command_line));
+ strscpy(early_command_line, strim(parmarea.command_line));
/* append IPL PARM data to the boot command line */
if (!is_prot_virt_guest() && ipl_block_valid)
@@ -253,7 +253,8 @@ void parse_boot_command_line(void)
int rc;
__kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
- args = strcpy(command_line_buf, early_command_line);
+ strscpy(command_line_buf, early_command_line);
+ args = command_line_buf;
while (*args) {
args = next_arg(args, &param, &val);
@@ -309,7 +310,7 @@ void parse_boot_command_line(void)
if (!strcmp(param, "bootdebug")) {
bootdebug = true;
if (val)
- strncpy(bootdebug_filter, val, sizeof(bootdebug_filter) - 1);
+ strscpy(bootdebug_filter, val);
}
if (!strcmp(param, "quiet"))
boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET;
diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c
index 8cf6331bc060..4bb6bc95704e 100644
--- a/arch/s390/boot/printk.c
+++ b/arch/s390/boot/printk.c
@@ -29,7 +29,8 @@ static void boot_rb_add(const char *str, size_t len)
/* store strings separated by '\0' */
if (len + 1 > avail)
boot_rb_off = 0;
- strcpy(boot_rb + boot_rb_off, str);
+ avail = sizeof(boot_rb) - boot_rb_off - 1;
+ strscpy(boot_rb + boot_rb_off, str, avail);
boot_rb_off += len + 1;
}
@@ -158,10 +159,10 @@ static noinline char *strsym(char *buf, void *ip)
p = findsym((unsigned long)ip, &off, &len);
if (p) {
- strncpy(buf, p, MAX_SYMLEN);
+ strscpy(buf, p, MAX_SYMLEN);
/* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */
p = buf + strnlen(buf, MAX_SYMLEN - 15);
- strcpy(p, "+0x");
+ strscpy(p, "+0x", MAX_SYMLEN - (p - buf));
as_hex(p + 3, off, 0);
strcat(p, "/0x");
as_hex(p + strlen(p), len, 0);
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 06316fb8e0fa..da8337e63a3e 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -6,6 +6,7 @@
#include <asm/boot_data.h>
#include <asm/extmem.h>
#include <asm/sections.h>
+#include <asm/diag288.h>
#include <asm/maccess.h>
#include <asm/machine.h>
#include <asm/sysinfo.h>
@@ -71,6 +72,20 @@ static void detect_machine_type(void)
set_machine_feature(MFEATURE_VM);
}
+static void detect_diag288(void)
+{
+ /* "BEGIN" in EBCDIC character set */
+ static const char cmd[] = "\xc2\xc5\xc7\xc9\xd5";
+ unsigned long action, len;
+
+ action = machine_is_vm() ? (unsigned long)cmd : LPARWDT_RESTART;
+ len = machine_is_vm() ? sizeof(cmd) : 0;
+ if (__diag288(WDT_FUNC_INIT, MIN_INTERVAL, action, len))
+ return;
+ __diag288(WDT_FUNC_CANCEL, 0, 0, 0);
+ set_machine_feature(MFEATURE_DIAG288);
+}
+
static void detect_diag9c(void)
{
unsigned int cpu;
@@ -519,6 +534,8 @@ void startup_kernel(void)
detect_facilities();
detect_diag9c();
detect_machine_type();
+ /* detect_diag288() needs machine type */
+ detect_diag288();
cmma_init();
sanitize_prot_virt_host();
max_physmem_end = detect_max_physmem_end();
diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c
index f6b9b1df48a8..bd68161434a6 100644
--- a/arch/s390/boot/string.c
+++ b/arch/s390/boot/string.c
@@ -29,6 +29,18 @@ int strncmp(const char *cs, const char *ct, size_t count)
return 0;
}
+ssize_t sized_strscpy(char *dst, const char *src, size_t count)
+{
+ size_t len;
+
+ if (count == 0)
+ return -E2BIG;
+ len = strnlen(src, count - 1);
+ memcpy(dst, src, len);
+ dst[len] = '\0';
+ return src[len] ? -E2BIG : len;
+}
+
void *memset64(uint64_t *s, uint64_t v, size_t count)
{
uint64_t *xs = s;
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 24b22f6a9e99..8ecad727497e 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -755,10 +755,10 @@ CONFIG_FORTIFY_SOURCE=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_USER=m
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_SELFTESTS=y
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
@@ -806,7 +806,6 @@ CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA3_256_S390=m
CONFIG_CRYPTO_SHA3_512_S390=m
CONFIG_CRYPTO_GHASH_S390=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 2b8b42d569bc..c13a77765162 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -741,10 +741,10 @@ CONFIG_IMA_APPRAISE=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_USER=m
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_SELFTESTS=y
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
@@ -793,7 +793,6 @@ CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA3_256_S390=m
CONFIG_CRYPTO_SHA3_512_S390=m
CONFIG_CRYPTO_GHASH_S390=m
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
index 8c4db8b64fa2..e2c27588b21a 100644
--- a/arch/s390/crypto/Kconfig
+++ b/arch/s390/crypto/Kconfig
@@ -4,7 +4,6 @@ menu "Accelerated Cryptographic Algorithms for CPU (s390)"
config CRYPTO_SHA512_S390
tristate "Hash functions: SHA-384 and SHA-512"
- depends on S390
select CRYPTO_HASH
help
SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
@@ -15,7 +14,6 @@ config CRYPTO_SHA512_S390
config CRYPTO_SHA1_S390
tristate "Hash functions: SHA-1"
- depends on S390
select CRYPTO_HASH
help
SHA-1 secure hash algorithm (FIPS 180)
@@ -24,20 +22,8 @@ config CRYPTO_SHA1_S390
It is available as of z990.
-config CRYPTO_SHA256_S390
- tristate "Hash functions: SHA-224 and SHA-256"
- depends on S390
- select CRYPTO_HASH
- help
- SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
- Architecture: s390
-
- It is available as of z9.
-
config CRYPTO_SHA3_256_S390
tristate "Hash functions: SHA3-224 and SHA3-256"
- depends on S390
select CRYPTO_HASH
help
SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202)
@@ -48,7 +34,6 @@ config CRYPTO_SHA3_256_S390
config CRYPTO_SHA3_512_S390
tristate "Hash functions: SHA3-384 and SHA3-512"
- depends on S390
select CRYPTO_HASH
help
SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202)
@@ -59,7 +44,6 @@ config CRYPTO_SHA3_512_S390
config CRYPTO_GHASH_S390
tristate "Hash functions: GHASH"
- depends on S390
select CRYPTO_HASH
help
GCM GHASH hash function (NIST SP800-38D)
@@ -70,7 +54,6 @@ config CRYPTO_GHASH_S390
config CRYPTO_AES_S390
tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM"
- depends on S390
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
help
@@ -92,7 +75,6 @@ config CRYPTO_AES_S390
config CRYPTO_DES_S390
tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR"
- depends on S390
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
select CRYPTO_LIB_DES
@@ -107,23 +89,8 @@ config CRYPTO_DES_S390
As of z990 the ECB and CBC mode are hardware accelerated.
As of z196 the CTR mode is hardware accelerated.
-config CRYPTO_CHACHA_S390
- tristate
- depends on S390
- select CRYPTO_SKCIPHER
- select CRYPTO_LIB_CHACHA_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
- default CRYPTO_LIB_CHACHA_INTERNAL
- help
- Length-preserving cipher: ChaCha20 stream cipher (RFC 7539)
-
- Architecture: s390
-
- It is available as of z13.
-
config CRYPTO_HMAC_S390
tristate "Keyed-hash message authentication code: HMAC"
- depends on S390
select CRYPTO_HASH
help
s390 specific HMAC hardware support for SHA224, SHA256, SHA384 and
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 14dafadbcbed..21757d86cd49 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -4,17 +4,13 @@
#
obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_SHA3_256_S390) += sha3_256_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
-obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o
obj-y += arch_random.o
-
-chacha_s390-y := chacha-glue.o chacha-s390.o
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
deleted file mode 100644
index 920e9f0941e7..000000000000
--- a/arch/s390/crypto/chacha-glue.c
+++ /dev/null
@@ -1,124 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * s390 ChaCha stream cipher.
- *
- * Copyright IBM Corp. 2021
- */
-
-#define KMSG_COMPONENT "chacha_s390"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/algapi.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-#include <asm/fpu.h>
-#include "chacha-s390.h"
-
-static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
- unsigned int nbytes, const u32 *key,
- u32 *counter)
-{
- DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
-
- kernel_fpu_begin(&vxstate, KERNEL_VXR);
- chacha20_vx(dst, src, nbytes, key, counter);
- kernel_fpu_end(&vxstate, KERNEL_VXR);
-
- *counter += round_up(nbytes, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
-}
-
-static int chacha20_s390(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- u32 state[CHACHA_STATE_WORDS] __aligned(16);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int rc;
-
- rc = skcipher_walk_virt(&walk, req, false);
- chacha_init(state, ctx->key, req->iv);
-
- while (walk.nbytes > 0) {
- nbytes = walk.nbytes;
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- if (nbytes <= CHACHA_BLOCK_SIZE) {
- chacha_crypt_generic(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes,
- ctx->nrounds);
- } else {
- chacha20_crypt_s390(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes,
- &state[4], &state[12]);
- }
- rc = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
- return rc;
-}
-
-void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
-{
- /* TODO: implement hchacha_block_arch() in assembly */
- hchacha_block_generic(state, stream, nrounds);
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds)
-{
- /* s390 chacha20 implementation has 20 rounds hard-coded,
- * it cannot handle a block of data or less, but otherwise
- * it can handle data of arbitrary size
- */
- if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx())
- chacha_crypt_generic(state, dst, src, bytes, nrounds);
- else
- chacha20_crypt_s390(state, dst, src, bytes,
- &state[4], &state[12]);
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-static struct skcipher_alg chacha_algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-s390",
- .base.cra_priority = 900,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = chacha20_s390,
- .decrypt = chacha20_s390,
- }
-};
-
-static int __init chacha_mod_init(void)
-{
- return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
- crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0;
-}
-
-static void __exit chacha_mod_fini(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER))
- crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init);
-module_exit(chacha_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha20 stream cipher");
-MODULE_LICENSE("GPL v2");
-
-MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 0800a2a5799f..dcbcee37cb63 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -8,29 +8,28 @@
* Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
+#include <asm/cpacf.h>
+#include <crypto/ghash.h>
#include <crypto/internal/hash.h>
-#include <linux/module.h>
#include <linux/cpufeature.h>
-#include <asm/cpacf.h>
-
-#define GHASH_BLOCK_SIZE 16
-#define GHASH_DIGEST_SIZE 16
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
-struct ghash_ctx {
+struct s390_ghash_ctx {
u8 key[GHASH_BLOCK_SIZE];
};
-struct ghash_desc_ctx {
+struct s390_ghash_desc_ctx {
u8 icv[GHASH_BLOCK_SIZE];
u8 key[GHASH_BLOCK_SIZE];
- u8 buffer[GHASH_BLOCK_SIZE];
- u32 bytes;
};
static int ghash_init(struct shash_desc *desc)
{
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
memset(dctx, 0, sizeof(*dctx));
memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
@@ -41,7 +40,7 @@ static int ghash_init(struct shash_desc *desc)
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
- struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+ struct s390_ghash_ctx *ctx = crypto_shash_ctx(tfm);
if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
@@ -54,80 +53,71 @@ static int ghash_setkey(struct crypto_shash *tfm,
static int ghash_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
unsigned int n;
- u8 *buf = dctx->buffer;
-
- if (dctx->bytes) {
- u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
- n = min(srclen, dctx->bytes);
- dctx->bytes -= n;
- srclen -= n;
-
- memcpy(pos, src, n);
- src += n;
+ n = srclen & ~(GHASH_BLOCK_SIZE - 1);
+ cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
+ return srclen - n;
+}
- if (!dctx->bytes) {
- cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
- GHASH_BLOCK_SIZE);
- }
- }
+static void ghash_flush(struct s390_ghash_desc_ctx *dctx, const u8 *src,
+ unsigned int len)
+{
+ if (len) {
+ u8 buf[GHASH_BLOCK_SIZE] = {};
- n = srclen & ~(GHASH_BLOCK_SIZE - 1);
- if (n) {
- cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
- src += n;
- srclen -= n;
+ memcpy(buf, src, len);
+ cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
+ memzero_explicit(buf, sizeof(buf));
}
+}
- if (srclen) {
- dctx->bytes = GHASH_BLOCK_SIZE - srclen;
- memcpy(buf, src, srclen);
- }
+static int ghash_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *dst)
+{
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ ghash_flush(dctx, src, len);
+ memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
return 0;
}
-static int ghash_flush(struct ghash_desc_ctx *dctx)
+static int ghash_export(struct shash_desc *desc, void *out)
{
- u8 *buf = dctx->buffer;
-
- if (dctx->bytes) {
- u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
-
- memset(pos, 0, dctx->bytes);
- cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
- dctx->bytes = 0;
- }
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ memcpy(out, dctx->icv, GHASH_DIGEST_SIZE);
return 0;
}
-static int ghash_final(struct shash_desc *desc, u8 *dst)
+static int ghash_import(struct shash_desc *desc, const void *in)
{
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- int ret;
+ struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- ret = ghash_flush(dctx);
- if (!ret)
- memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
- return ret;
+ memcpy(dctx->icv, in, GHASH_DIGEST_SIZE);
+ memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
+ return 0;
}
static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
- .final = ghash_final,
+ .finup = ghash_finup,
.setkey = ghash_setkey,
- .descsize = sizeof(struct ghash_desc_ctx),
+ .export = ghash_export,
+ .import = ghash_import,
+ .statesize = sizeof(struct ghash_desc_ctx),
+ .descsize = sizeof(struct s390_ghash_desc_ctx),
.base = {
.cra_name = "ghash",
.cra_driver_name = "ghash-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = GHASH_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ghash_ctx),
+ .cra_ctxsize = sizeof(struct s390_ghash_ctx),
.cra_module = THIS_MODULE,
},
};
diff --git a/arch/s390/crypto/hmac_s390.c b/arch/s390/crypto/hmac_s390.c
index bba9a818dfdc..93a1098d9f8d 100644
--- a/arch/s390/crypto/hmac_s390.c
+++ b/arch/s390/crypto/hmac_s390.c
@@ -9,10 +9,14 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <asm/cpacf.h>
-#include <crypto/sha2.h>
#include <crypto/internal/hash.h>
+#include <crypto/hmac.h>
+#include <crypto/sha2.h>
#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/string.h>
/*
* KMAC param block layout for sha2 function codes:
@@ -71,32 +75,31 @@ union s390_kmac_gr0 {
struct s390_kmac_sha2_ctx {
u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + MAX_BLOCK_SIZE];
union s390_kmac_gr0 gr0;
- u8 buf[MAX_BLOCK_SIZE];
- unsigned int buflen;
+ u64 buflen[2];
};
/*
* kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize
*/
-static inline void kmac_sha2_set_imbl(u8 *param, unsigned int buflen,
- unsigned int blocksize)
+static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo,
+ u64 buflen_hi, unsigned int blocksize)
{
u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize);
switch (blocksize) {
case SHA256_BLOCK_SIZE:
- *(u64 *)imbl = (u64)buflen * BITS_PER_BYTE;
+ *(u64 *)imbl = buflen_lo * BITS_PER_BYTE;
break;
case SHA512_BLOCK_SIZE:
- *(u128 *)imbl = (u128)buflen * BITS_PER_BYTE;
+ *(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3;
break;
default:
break;
}
}
-static int hash_key(const u8 *in, unsigned int inlen,
- u8 *digest, unsigned int digestsize)
+static int hash_data(const u8 *in, unsigned int inlen,
+ u8 *digest, unsigned int digestsize, bool final)
{
unsigned long func;
union {
@@ -123,19 +126,23 @@ static int hash_key(const u8 *in, unsigned int inlen,
switch (digestsize) {
case SHA224_DIGEST_SIZE:
- func = CPACF_KLMD_SHA_256;
+ func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256;
PARAM_INIT(256, 224, inlen * 8);
+ if (!final)
+ digestsize = SHA256_DIGEST_SIZE;
break;
case SHA256_DIGEST_SIZE:
- func = CPACF_KLMD_SHA_256;
+ func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256;
PARAM_INIT(256, 256, inlen * 8);
break;
case SHA384_DIGEST_SIZE:
- func = CPACF_KLMD_SHA_512;
+ func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512;
PARAM_INIT(512, 384, inlen * 8);
+ if (!final)
+ digestsize = SHA512_DIGEST_SIZE;
break;
case SHA512_DIGEST_SIZE:
- func = CPACF_KLMD_SHA_512;
+ func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512;
PARAM_INIT(512, 512, inlen * 8);
break;
default:
@@ -151,6 +158,12 @@ static int hash_key(const u8 *in, unsigned int inlen,
return 0;
}
+static int hash_key(const u8 *in, unsigned int inlen,
+ u8 *digest, unsigned int digestsize)
+{
+ return hash_data(in, inlen, digest, digestsize, true);
+}
+
static int s390_hmac_sha2_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
@@ -176,7 +189,8 @@ static int s390_hmac_sha2_init(struct shash_desc *desc)
memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
tfm_ctx->key, bs);
- ctx->buflen = 0;
+ ctx->buflen[0] = 0;
+ ctx->buflen[1] = 0;
ctx->gr0.reg = 0;
switch (crypto_shash_digestsize(desc->tfm)) {
case SHA224_DIGEST_SIZE:
@@ -203,48 +217,31 @@ static int s390_hmac_sha2_update(struct shash_desc *desc,
{
struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
unsigned int bs = crypto_shash_blocksize(desc->tfm);
- unsigned int offset, n;
-
- /* check current buffer */
- offset = ctx->buflen % bs;
- ctx->buflen += len;
- if (offset + len < bs)
- goto store;
-
- /* process one stored block */
- if (offset) {
- n = bs - offset;
- memcpy(ctx->buf + offset, data, n);
- ctx->gr0.iimp = 1;
- _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, bs);
- data += n;
- len -= n;
- offset = 0;
- }
- /* process as many blocks as possible */
- if (len >= bs) {
- n = (len / bs) * bs;
- ctx->gr0.iimp = 1;
- _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n);
- data += n;
- len -= n;
- }
-store:
- /* store incomplete block in buffer */
- if (len)
- memcpy(ctx->buf + offset, data, len);
+ unsigned int n = round_down(len, bs);
- return 0;
+ ctx->buflen[0] += n;
+ if (ctx->buflen[0] < n)
+ ctx->buflen[1]++;
+
+ /* process as many blocks as possible */
+ ctx->gr0.iimp = 1;
+ _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n);
+ return len - n;
}
-static int s390_hmac_sha2_final(struct shash_desc *desc, u8 *out)
+static int s390_hmac_sha2_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
unsigned int bs = crypto_shash_blocksize(desc->tfm);
+ ctx->buflen[0] += len;
+ if (ctx->buflen[0] < len)
+ ctx->buflen[1]++;
+
ctx->gr0.iimp = 0;
- kmac_sha2_set_imbl(ctx->param, ctx->buflen, bs);
- _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, ctx->buflen % bs);
+ kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs);
+ _cpacf_kmac(&ctx->gr0.reg, ctx->param, src, len);
memcpy(out, ctx->param, crypto_shash_digestsize(desc->tfm));
return 0;
@@ -262,7 +259,7 @@ static int s390_hmac_sha2_digest(struct shash_desc *desc,
return rc;
ctx->gr0.iimp = 0;
- kmac_sha2_set_imbl(ctx->param, len,
+ kmac_sha2_set_imbl(ctx->param, len, 0,
crypto_shash_blocksize(desc->tfm));
_cpacf_kmac(&ctx->gr0.reg, ctx->param, data, len);
memcpy(out, ctx->param, ds);
@@ -270,22 +267,89 @@ static int s390_hmac_sha2_digest(struct shash_desc *desc,
return 0;
}
-#define S390_HMAC_SHA2_ALG(x) { \
+static int s390_hmac_export_zero(struct shash_desc *desc, void *out)
+{
+ struct crypto_shash *tfm = desc->tfm;
+ u8 ipad[SHA512_BLOCK_SIZE];
+ struct s390_hmac_ctx *ctx;
+ unsigned int bs;
+ int err, i;
+
+ ctx = crypto_shash_ctx(tfm);
+ bs = crypto_shash_blocksize(tfm);
+ for (i = 0; i < bs; i++)
+ ipad[i] = ctx->key[i] ^ HMAC_IPAD_VALUE;
+
+ err = hash_data(ipad, bs, out, crypto_shash_digestsize(tfm), false);
+ memzero_explicit(ipad, sizeof(ipad));
+ return err;
+}
+
+static int s390_hmac_export(struct shash_desc *desc, void *out)
+{
+ struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bs = crypto_shash_blocksize(desc->tfm);
+ unsigned int ds = bs / 2;
+ union {
+ u8 *u8;
+ u64 *u64;
+ } p = { .u8 = out };
+ int err = 0;
+
+ if (!ctx->gr0.ikp)
+ err = s390_hmac_export_zero(desc, out);
+ else
+ memcpy(p.u8, ctx->param, ds);
+ p.u8 += ds;
+ put_unaligned(ctx->buflen[0], p.u64++);
+ if (ds == SHA512_DIGEST_SIZE)
+ put_unaligned(ctx->buflen[1], p.u64);
+ return err;
+}
+
+static int s390_hmac_import(struct shash_desc *desc, const void *in)
+{
+ struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bs = crypto_shash_blocksize(desc->tfm);
+ unsigned int ds = bs / 2;
+ union {
+ const u8 *u8;
+ const u64 *u64;
+ } p = { .u8 = in };
+ int err;
+
+ err = s390_hmac_sha2_init(desc);
+ memcpy(ctx->param, p.u8, ds);
+ p.u8 += ds;
+ ctx->buflen[0] = get_unaligned(p.u64++);
+ if (ds == SHA512_DIGEST_SIZE)
+ ctx->buflen[1] = get_unaligned(p.u64);
+ if (ctx->buflen[0] | ctx->buflen[1])
+ ctx->gr0.ikp = 1;
+ return err;
+}
+
+#define S390_HMAC_SHA2_ALG(x, ss) { \
.fc = CPACF_KMAC_HMAC_SHA_##x, \
.alg = { \
.init = s390_hmac_sha2_init, \
.update = s390_hmac_sha2_update, \
- .final = s390_hmac_sha2_final, \
+ .finup = s390_hmac_sha2_finup, \
.digest = s390_hmac_sha2_digest, \
.setkey = s390_hmac_sha2_setkey, \
+ .export = s390_hmac_export, \
+ .import = s390_hmac_import, \
.descsize = sizeof(struct s390_kmac_sha2_ctx), \
.halg = { \
+ .statesize = ss, \
.digestsize = SHA##x##_DIGEST_SIZE, \
.base = { \
.cra_name = "hmac(sha" #x ")", \
.cra_driver_name = "hmac_s390_sha" #x, \
.cra_blocksize = SHA##x##_BLOCK_SIZE, \
.cra_priority = 400, \
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | \
+ CRYPTO_AHASH_ALG_FINUP_MAX, \
.cra_ctxsize = sizeof(struct s390_hmac_ctx), \
.cra_module = THIS_MODULE, \
}, \
@@ -298,10 +362,10 @@ static struct s390_hmac_alg {
unsigned int fc;
struct shash_alg alg;
} s390_hmac_algs[] = {
- S390_HMAC_SHA2_ALG(224),
- S390_HMAC_SHA2_ALG(256),
- S390_HMAC_SHA2_ALG(384),
- S390_HMAC_SHA2_ALG(512),
+ S390_HMAC_SHA2_ALG(224, sizeof(struct crypto_sha256_state)),
+ S390_HMAC_SHA2_ALG(256, sizeof(struct crypto_sha256_state)),
+ S390_HMAC_SHA2_ALG(384, SHA512_STATE_SIZE),
+ S390_HMAC_SHA2_ALG(512, SHA512_STATE_SIZE),
};
static __always_inline void _s390_hmac_algs_unregister(void)
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 511093713a6f..8a340c16acb4 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -5,7 +5,7 @@
* s390 implementation of the AES Cipher Algorithm with protected keys.
*
* s390 Version:
- * Copyright IBM Corp. 2017, 2023
+ * Copyright IBM Corp. 2017, 2025
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Harald Freudenberger <freude@de.ibm.com>
*/
@@ -13,16 +13,18 @@
#define KMSG_COMPONENT "paes_s390"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-#include <crypto/aes.h>
-#include <crypto/algapi.h>
-#include <linux/bug.h>
-#include <linux/err.h>
-#include <linux/module.h>
+#include <linux/atomic.h>
#include <linux/cpufeature.h>
+#include <linux/delay.h>
+#include <linux/err.h>
#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
-#include <linux/delay.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/engine.h>
#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
#include <asm/cpacf.h>
@@ -44,23 +46,61 @@ static DEFINE_MUTEX(ctrblk_lock);
static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
+static struct crypto_engine *paes_crypto_engine;
+#define MAX_QLEN 10
+
+/*
+ * protected key specific stuff
+ */
+
struct paes_protkey {
u32 type;
u32 len;
u8 protkey[PXTS_256_PROTKEY_SIZE];
};
-struct key_blob {
- /*
- * Small keys will be stored in the keybuf. Larger keys are
- * stored in extra allocated memory. In both cases does
- * key point to the memory where the key is stored.
- * The code distinguishes by checking keylen against
- * sizeof(keybuf). See the two following helper functions.
- */
- u8 *key;
- u8 keybuf[128];
+#define PK_STATE_NO_KEY 0
+#define PK_STATE_CONVERT_IN_PROGRESS 1
+#define PK_STATE_VALID 2
+
+struct s390_paes_ctx {
+ /* source key material used to derive a protected key from */
+ u8 keybuf[PAES_MAX_KEYSIZE];
+ unsigned int keylen;
+
+ /* cpacf function code to use with this protected key type */
+ long fc;
+
+ /* nr of requests enqueued via crypto engine which use this tfm ctx */
+ atomic_t via_engine_ctr;
+
+ /* spinlock to atomic read/update all the following fields */
+ spinlock_t pk_lock;
+
+ /* see PK_STATE* defines above, < 0 holds convert failure rc */
+ int pk_state;
+ /* if state is valid, pk holds the protected key */
+ struct paes_protkey pk;
+};
+
+struct s390_pxts_ctx {
+ /* source key material used to derive a protected key from */
+ u8 keybuf[2 * PAES_MAX_KEYSIZE];
unsigned int keylen;
+
+ /* cpacf function code to use with this protected key type */
+ long fc;
+
+ /* nr of requests enqueued via crypto engine which use this tfm ctx */
+ atomic_t via_engine_ctr;
+
+ /* spinlock to atomic read/update all the following fields */
+ spinlock_t pk_lock;
+
+ /* see PK_STATE* defines above, < 0 holds convert failure rc */
+ int pk_state;
+ /* if state is valid, pk[] hold(s) the protected key(s) */
+ struct paes_protkey pk[2];
};
/*
@@ -89,214 +129,370 @@ static inline u32 make_clrkey_token(const u8 *ck, size_t cklen, u8 *dest)
return sizeof(*token) + cklen;
}
-static inline int _key_to_kb(struct key_blob *kb,
- const u8 *key,
- unsigned int keylen)
+/*
+ * paes_ctx_setkey() - Set key value into context, maybe construct
+ * a clear key token digestible by pkey from a clear key value.
+ */
+static inline int paes_ctx_setkey(struct s390_paes_ctx *ctx,
+ const u8 *key, unsigned int keylen)
{
+ if (keylen > sizeof(ctx->keybuf))
+ return -EINVAL;
+
switch (keylen) {
case 16:
case 24:
case 32:
/* clear key value, prepare pkey clear key token in keybuf */
- memset(kb->keybuf, 0, sizeof(kb->keybuf));
- kb->keylen = make_clrkey_token(key, keylen, kb->keybuf);
- kb->key = kb->keybuf;
+ memset(ctx->keybuf, 0, sizeof(ctx->keybuf));
+ ctx->keylen = make_clrkey_token(key, keylen, ctx->keybuf);
break;
default:
/* other key material, let pkey handle this */
- if (keylen <= sizeof(kb->keybuf))
- kb->key = kb->keybuf;
- else {
- kb->key = kmalloc(keylen, GFP_KERNEL);
- if (!kb->key)
- return -ENOMEM;
- }
- memcpy(kb->key, key, keylen);
- kb->keylen = keylen;
+ memcpy(ctx->keybuf, key, keylen);
+ ctx->keylen = keylen;
break;
}
return 0;
}
-static inline int _xts_key_to_kb(struct key_blob *kb,
- const u8 *key,
- unsigned int keylen)
+/*
+ * pxts_ctx_setkey() - Set key value into context, maybe construct
+ * a clear key token digestible by pkey from a clear key value.
+ */
+static inline int pxts_ctx_setkey(struct s390_pxts_ctx *ctx,
+ const u8 *key, unsigned int keylen)
{
size_t cklen = keylen / 2;
- memset(kb->keybuf, 0, sizeof(kb->keybuf));
+ if (keylen > sizeof(ctx->keybuf))
+ return -EINVAL;
switch (keylen) {
case 32:
case 64:
/* clear key value, prepare pkey clear key tokens in keybuf */
- kb->key = kb->keybuf;
- kb->keylen = make_clrkey_token(key, cklen, kb->key);
- kb->keylen += make_clrkey_token(key + cklen, cklen,
- kb->key + kb->keylen);
+ memset(ctx->keybuf, 0, sizeof(ctx->keybuf));
+ ctx->keylen = make_clrkey_token(key, cklen, ctx->keybuf);
+ ctx->keylen += make_clrkey_token(key + cklen, cklen,
+ ctx->keybuf + ctx->keylen);
break;
default:
/* other key material, let pkey handle this */
- if (keylen <= sizeof(kb->keybuf)) {
- kb->key = kb->keybuf;
- } else {
- kb->key = kmalloc(keylen, GFP_KERNEL);
- if (!kb->key)
- return -ENOMEM;
- }
- memcpy(kb->key, key, keylen);
- kb->keylen = keylen;
+ memcpy(ctx->keybuf, key, keylen);
+ ctx->keylen = keylen;
break;
}
return 0;
}
-static inline void _free_kb_keybuf(struct key_blob *kb)
+/*
+ * Convert the raw key material into a protected key via PKEY api.
+ * This function may sleep - don't call in non-sleeping context.
+ */
+static inline int convert_key(const u8 *key, unsigned int keylen,
+ struct paes_protkey *pk)
{
- if (kb->key && kb->key != kb->keybuf
- && kb->keylen > sizeof(kb->keybuf)) {
- kfree_sensitive(kb->key);
- kb->key = NULL;
+ int rc, i;
+
+ pk->len = sizeof(pk->protkey);
+
+ /*
+ * In case of a busy card retry with increasing delay
+ * of 200, 400, 800 and 1600 ms - in total 3 s.
+ */
+ for (rc = -EIO, i = 0; rc && i < 5; i++) {
+ if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) {
+ rc = -EINTR;
+ goto out;
+ }
+ rc = pkey_key2protkey(key, keylen,
+ pk->protkey, &pk->len, &pk->type,
+ PKEY_XFLAG_NOMEMALLOC);
}
- memzero_explicit(kb->keybuf, sizeof(kb->keybuf));
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-struct s390_paes_ctx {
- struct key_blob kb;
+/*
+ * (Re-)Convert the raw key material from the ctx into a protected key
+ * via convert_key() function. Update the pk_state, pk_type, pk_len
+ * and the protected key in the tfm context.
+ * Please note this function may be invoked concurrently with the very
+ * same tfm context. The pk_lock spinlock in the context ensures an
+ * atomic update of the pk and the pk state but does not guarantee any
+ * order of update. So a fresh converted valid protected key may get
+ * updated with an 'old' expired key value. As the cpacf instructions
+ * detect this, refuse to operate with an invalid key and the calling
+ * code triggers a (re-)conversion this does no harm. This may lead to
+ * unnecessary additional conversion but never to invalid data on en-
+ * or decrypt operations.
+ */
+static int paes_convert_key(struct s390_paes_ctx *ctx)
+{
struct paes_protkey pk;
- spinlock_t pk_lock;
- unsigned long fc;
-};
+ int rc;
-struct s390_pxts_ctx {
- struct key_blob kb;
- struct paes_protkey pk[2];
- spinlock_t pk_lock;
- unsigned long fc;
-};
+ spin_lock_bh(&ctx->pk_lock);
+ ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS;
+ spin_unlock_bh(&ctx->pk_lock);
-static inline int __paes_keyblob2pkey(const u8 *key, unsigned int keylen,
- struct paes_protkey *pk)
-{
- int i, rc = -EIO;
+ rc = convert_key(ctx->keybuf, ctx->keylen, &pk);
- /* try three times in case of busy card */
- for (i = 0; rc && i < 3; i++) {
- if (rc == -EBUSY && in_task()) {
- if (msleep_interruptible(1000))
- return -EINTR;
- }
- rc = pkey_key2protkey(key, keylen, pk->protkey, &pk->len,
- &pk->type);
+ /* update context */
+ spin_lock_bh(&ctx->pk_lock);
+ if (rc) {
+ ctx->pk_state = rc;
+ } else {
+ ctx->pk_state = PK_STATE_VALID;
+ ctx->pk = pk;
}
+ spin_unlock_bh(&ctx->pk_lock);
+ memzero_explicit(&pk, sizeof(pk));
+ pr_debug("rc=%d\n", rc);
return rc;
}
-static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
+/*
+ * (Re-)Convert the raw xts key material from the ctx into a
+ * protected key via convert_key() function. Update the pk_state,
+ * pk_type, pk_len and the protected key in the tfm context.
+ * See also comments on function paes_convert_key.
+ */
+static int pxts_convert_key(struct s390_pxts_ctx *ctx)
{
- struct paes_protkey pk;
+ struct paes_protkey pk0, pk1;
+ size_t split_keylen;
int rc;
- pk.len = sizeof(pk.protkey);
- rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk);
+ spin_lock_bh(&ctx->pk_lock);
+ ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS;
+ spin_unlock_bh(&ctx->pk_lock);
+
+ rc = convert_key(ctx->keybuf, ctx->keylen, &pk0);
if (rc)
- return rc;
+ goto out;
+
+ switch (pk0.type) {
+ case PKEY_KEYTYPE_AES_128:
+ case PKEY_KEYTYPE_AES_256:
+ /* second keytoken required */
+ if (ctx->keylen % 2) {
+ rc = -EINVAL;
+ goto out;
+ }
+ split_keylen = ctx->keylen / 2;
+ rc = convert_key(ctx->keybuf + split_keylen,
+ split_keylen, &pk1);
+ if (rc)
+ goto out;
+ if (pk0.type != pk1.type) {
+ rc = -EINVAL;
+ goto out;
+ }
+ break;
+ case PKEY_KEYTYPE_AES_XTS_128:
+ case PKEY_KEYTYPE_AES_XTS_256:
+ /* single key */
+ pk1.type = 0;
+ break;
+ default:
+ /* unsupported protected keytype */
+ rc = -EINVAL;
+ goto out;
+ }
+out:
+ /* update context */
spin_lock_bh(&ctx->pk_lock);
- memcpy(&ctx->pk, &pk, sizeof(pk));
+ if (rc) {
+ ctx->pk_state = rc;
+ } else {
+ ctx->pk_state = PK_STATE_VALID;
+ ctx->pk[0] = pk0;
+ ctx->pk[1] = pk1;
+ }
spin_unlock_bh(&ctx->pk_lock);
- return 0;
+ memzero_explicit(&pk0, sizeof(pk0));
+ memzero_explicit(&pk1, sizeof(pk1));
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int ecb_paes_init(struct crypto_skcipher *tfm)
-{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+/*
+ * PAES ECB implementation
+ */
- ctx->kb.key = NULL;
- spin_lock_init(&ctx->pk_lock);
+struct ecb_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
- return 0;
-}
+struct s390_pecb_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ struct ecb_param param;
+};
-static void ecb_paes_exit(struct crypto_skcipher *tfm)
+static int ecb_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
{
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- _free_kb_keybuf(&ctx->kb);
-}
-
-static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx)
-{
- unsigned long fc;
+ long fc;
int rc;
- rc = __paes_convert_key(ctx);
+ /* set raw key into context */
+ rc = paes_ctx_setkey(ctx, in_key, key_len);
if (rc)
- return rc;
+ goto out;
- /* Pick the correct function code based on the protected key type */
- fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0;
+ /* convert key into protected key */
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
- /* Check if the function code is available */
+ /* Pick the correct function code based on the protected key type */
+ switch (ctx->pk.type) {
+ case PKEY_KEYTYPE_AES_128:
+ fc = CPACF_KM_PAES_128;
+ break;
+ case PKEY_KEYTYPE_AES_192:
+ fc = CPACF_KM_PAES_192;
+ break;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KM_PAES_256;
+ break;
+ default:
+ fc = 0;
+ break;
+ }
ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
- return ctx->fc ? 0 : -EINVAL;
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+static int ecb_paes_do_crypt(struct s390_paes_ctx *ctx,
+ struct s390_pecb_req_ctx *req_ctx,
+ bool maysleep)
{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- int rc;
-
- _free_kb_keybuf(&ctx->kb);
- rc = _key_to_kb(&ctx->kb, in_key, key_len);
+ struct ecb_param *param = &req_ctx->param;
+ struct skcipher_walk *walk = &req_ctx->walk;
+ unsigned int nbytes, n, k;
+ int pk_state, rc = 0;
+
+ if (!req_ctx->param_init_done) {
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ req_ctx->param_init_done = true;
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ }
if (rc)
- return rc;
+ goto out;
- return __ecb_paes_set_key(ctx);
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ k = cpacf_km(ctx->fc | req_ctx->modifier, param,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
+ if (k)
+ rc = skcipher_walk_done(walk, nbytes - k);
+ if (k < n) {
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ }
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
+ struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct {
- u8 key[PAES_256_PROTKEY_SIZE];
- } param;
- struct skcipher_walk walk;
- unsigned int nbytes, n, k;
+ struct skcipher_walk *walk = &req_ctx->walk;
int rc;
- rc = skcipher_walk_virt(&walk, req, false);
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
+
+ rc = skcipher_walk_virt(walk, req, false);
if (rc)
- return rc;
+ goto out;
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
+ req_ctx->modifier = modifier;
+ req_ctx->param_init_done = false;
- while ((nbytes = walk.nbytes) != 0) {
- /* only use complete blocks */
- n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_km(ctx->fc | modifier, &param,
- walk.dst.virt.addr, walk.src.virt.addr, n);
- if (k)
- rc = skcipher_walk_done(&walk, nbytes - k);
- if (k < n) {
- if (__paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
- }
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = ecb_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
}
+
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
return rc;
}
@@ -310,112 +506,256 @@ static int ecb_paes_decrypt(struct skcipher_request *req)
return ecb_paes_crypt(req, CPACF_DECRYPT);
}
-static struct skcipher_alg ecb_paes_alg = {
- .base.cra_name = "ecb(paes)",
- .base.cra_driver_name = "ecb-paes-s390",
- .base.cra_priority = 401, /* combo: aes + ecb + 1 */
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
- .init = ecb_paes_init,
- .exit = ecb_paes_exit,
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .setkey = ecb_paes_set_key,
- .encrypt = ecb_paes_encrypt,
- .decrypt = ecb_paes_decrypt,
-};
-
-static int cbc_paes_init(struct crypto_skcipher *tfm)
+static int ecb_paes_init(struct crypto_skcipher *tfm)
{
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- ctx->kb.key = NULL;
+ memset(ctx, 0, sizeof(*ctx));
spin_lock_init(&ctx->pk_lock);
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pecb_req_ctx));
+
return 0;
}
-static void cbc_paes_exit(struct crypto_skcipher *tfm)
+static void ecb_paes_exit(struct crypto_skcipher *tfm)
{
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- _free_kb_keybuf(&ctx->kb);
+ memzero_explicit(ctx, sizeof(*ctx));
}
-static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
+static int ecb_paes_do_one_request(struct crypto_engine *engine, void *areq)
{
- unsigned long fc;
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
int rc;
- rc = __paes_convert_key(ctx);
- if (rc)
- return rc;
+ /* walk has already been prepared */
+
+ rc = ecb_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
+ }
- /* Pick the correct function code based on the protected key type */
- fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMC_PAES_192 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KMC_PAES_256 : 0;
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
+}
- /* Check if the function code is available */
- ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+static struct skcipher_engine_alg ecb_paes_alg = {
+ .base = {
+ .base.cra_name = "ecb(paes)",
+ .base.cra_driver_name = "ecb-paes-s390",
+ .base.cra_priority = 401, /* combo: aes + ecb + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.base.cra_list),
+ .init = ecb_paes_init,
+ .exit = ecb_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .setkey = ecb_paes_setkey,
+ .encrypt = ecb_paes_encrypt,
+ .decrypt = ecb_paes_decrypt,
+ },
+ .op = {
+ .do_one_request = ecb_paes_do_one_request,
+ },
+};
- return ctx->fc ? 0 : -EINVAL;
-}
+/*
+ * PAES CBC implementation
+ */
+
+struct cbc_param {
+ u8 iv[AES_BLOCK_SIZE];
+ u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
+
+struct s390_pcbc_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ struct cbc_param param;
+};
-static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+static int cbc_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
{
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ long fc;
int rc;
- _free_kb_keybuf(&ctx->kb);
- rc = _key_to_kb(&ctx->kb, in_key, key_len);
+ /* set raw key into context */
+ rc = paes_ctx_setkey(ctx, in_key, key_len);
if (rc)
- return rc;
+ goto out;
- return __cbc_paes_set_key(ctx);
+ /* convert raw key into protected key */
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
+
+ /* Pick the correct function code based on the protected key type */
+ switch (ctx->pk.type) {
+ case PKEY_KEYTYPE_AES_128:
+ fc = CPACF_KMC_PAES_128;
+ break;
+ case PKEY_KEYTYPE_AES_192:
+ fc = CPACF_KMC_PAES_192;
+ break;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KMC_PAES_256;
+ break;
+ default:
+ fc = 0;
+ break;
+ }
+ ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+static int cbc_paes_do_crypt(struct s390_paes_ctx *ctx,
+ struct s390_pcbc_req_ctx *req_ctx,
+ bool maysleep)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct {
- u8 iv[AES_BLOCK_SIZE];
- u8 key[PAES_256_PROTKEY_SIZE];
- } param;
- struct skcipher_walk walk;
+ struct cbc_param *param = &req_ctx->param;
+ struct skcipher_walk *walk = &req_ctx->walk;
unsigned int nbytes, n, k;
- int rc;
-
- rc = skcipher_walk_virt(&walk, req, false);
+ int pk_state, rc = 0;
+
+ if (!req_ctx->param_init_done) {
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ req_ctx->param_init_done = true;
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ }
if (rc)
- return rc;
+ goto out;
- memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
+ memcpy(param->iv, walk->iv, AES_BLOCK_SIZE);
- while ((nbytes = walk.nbytes) != 0) {
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_kmc(ctx->fc | modifier, &param,
- walk.dst.virt.addr, walk.src.virt.addr, n);
+ k = cpacf_kmc(ctx->fc | req_ctx->modifier, param,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
if (k) {
- memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
- rc = skcipher_walk_done(&walk, nbytes - k);
+ memcpy(walk->iv, param->iv, AES_BLOCK_SIZE);
+ rc = skcipher_walk_done(walk, nbytes - k);
}
if (k < n) {
- if (__paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
spin_unlock_bh(&ctx->pk_lock);
}
}
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+{
+ struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
+
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
+
+ rc = skcipher_walk_virt(walk, req, false);
+ if (rc)
+ goto out;
+
+ req_ctx->modifier = modifier;
+ req_ctx->param_init_done = false;
+
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = cbc_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
return rc;
}
@@ -429,496 +769,882 @@ static int cbc_paes_decrypt(struct skcipher_request *req)
return cbc_paes_crypt(req, CPACF_DECRYPT);
}
-static struct skcipher_alg cbc_paes_alg = {
- .base.cra_name = "cbc(paes)",
- .base.cra_driver_name = "cbc-paes-s390",
- .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.cra_list),
- .init = cbc_paes_init,
- .exit = cbc_paes_exit,
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = cbc_paes_set_key,
- .encrypt = cbc_paes_encrypt,
- .decrypt = cbc_paes_decrypt,
-};
-
-static int xts_paes_init(struct crypto_skcipher *tfm)
+static int cbc_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- ctx->kb.key = NULL;
+ memset(ctx, 0, sizeof(*ctx));
spin_lock_init(&ctx->pk_lock);
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pcbc_req_ctx));
+
return 0;
}
-static void xts_paes_exit(struct crypto_skcipher *tfm)
+static void cbc_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- _free_kb_keybuf(&ctx->kb);
+ memzero_explicit(ctx, sizeof(*ctx));
}
-static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx)
+static int cbc_paes_do_one_request(struct crypto_engine *engine, void *areq)
{
- struct paes_protkey pk0, pk1;
- size_t split_keylen;
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
int rc;
- pk0.len = sizeof(pk0.protkey);
- pk1.len = sizeof(pk1.protkey);
-
- rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk0);
- if (rc)
- return rc;
+ /* walk has already been prepared */
+
+ rc = cbc_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
+ }
- switch (pk0.type) {
- case PKEY_KEYTYPE_AES_128:
- case PKEY_KEYTYPE_AES_256:
- /* second keytoken required */
- if (ctx->kb.keylen % 2)
- return -EINVAL;
- split_keylen = ctx->kb.keylen / 2;
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
+}
- rc = __paes_keyblob2pkey(ctx->kb.key + split_keylen,
- split_keylen, &pk1);
- if (rc)
- return rc;
+static struct skcipher_engine_alg cbc_paes_alg = {
+ .base = {
+ .base.cra_name = "cbc(paes)",
+ .base.cra_driver_name = "cbc-paes-s390",
+ .base.cra_priority = 402, /* cbc-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.base.cra_list),
+ .init = cbc_paes_init,
+ .exit = cbc_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = cbc_paes_setkey,
+ .encrypt = cbc_paes_encrypt,
+ .decrypt = cbc_paes_decrypt,
+ },
+ .op = {
+ .do_one_request = cbc_paes_do_one_request,
+ },
+};
- if (pk0.type != pk1.type)
- return -EINVAL;
- break;
- case PKEY_KEYTYPE_AES_XTS_128:
- case PKEY_KEYTYPE_AES_XTS_256:
- /* single key */
- pk1.type = 0;
- break;
- default:
- /* unsupported protected keytype */
- return -EINVAL;
- }
+/*
+ * PAES CTR implementation
+ */
- spin_lock_bh(&ctx->pk_lock);
- ctx->pk[0] = pk0;
- ctx->pk[1] = pk1;
- spin_unlock_bh(&ctx->pk_lock);
+struct ctr_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
- return 0;
-}
+struct s390_pctr_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ struct ctr_param param;
+};
-static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
+static int ctr_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
{
- unsigned long fc;
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ long fc;
int rc;
- rc = __xts_paes_convert_key(ctx);
+ /* set raw key into context */
+ rc = paes_ctx_setkey(ctx, in_key, key_len);
if (rc)
- return rc;
+ goto out;
+
+ /* convert raw key into protected key */
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
/* Pick the correct function code based on the protected key type */
- switch (ctx->pk[0].type) {
+ switch (ctx->pk.type) {
case PKEY_KEYTYPE_AES_128:
- fc = CPACF_KM_PXTS_128;
- break;
- case PKEY_KEYTYPE_AES_256:
- fc = CPACF_KM_PXTS_256;
+ fc = CPACF_KMCTR_PAES_128;
break;
- case PKEY_KEYTYPE_AES_XTS_128:
- fc = CPACF_KM_PXTS_128_FULL;
+ case PKEY_KEYTYPE_AES_192:
+ fc = CPACF_KMCTR_PAES_192;
break;
- case PKEY_KEYTYPE_AES_XTS_256:
- fc = CPACF_KM_PXTS_256_FULL;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KMCTR_PAES_256;
break;
default:
fc = 0;
break;
}
+ ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
- /* Check if the function code is available */
- ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static inline unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+{
+ unsigned int i, n;
+
+ /* only use complete blocks, max. PAGE_SIZE */
+ memcpy(ctrptr, iv, AES_BLOCK_SIZE);
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+ for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+ memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+ crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ ctrptr += AES_BLOCK_SIZE;
+ }
+ return n;
+}
+
+static int ctr_paes_do_crypt(struct s390_paes_ctx *ctx,
+ struct s390_pctr_req_ctx *req_ctx,
+ bool maysleep)
+{
+ struct ctr_param *param = &req_ctx->param;
+ struct skcipher_walk *walk = &req_ctx->walk;
+ u8 buf[AES_BLOCK_SIZE], *ctrptr;
+ unsigned int nbytes, n, k;
+ int pk_state, locked, rc = 0;
+
+ if (!req_ctx->param_init_done) {
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ req_ctx->param_init_done = true;
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ if (rc)
+ goto out;
+
+ locked = mutex_trylock(&ctrblk_lock);
+
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ n = AES_BLOCK_SIZE;
+ if (nbytes >= 2 * AES_BLOCK_SIZE && locked)
+ n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+ ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
+ k = cpacf_kmctr(ctx->fc, param, walk->dst.virt.addr,
+ walk->src.virt.addr, n, ctrptr);
+ if (k) {
+ if (ctrptr == ctrblk)
+ memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE);
+ crypto_inc(walk->iv, AES_BLOCK_SIZE);
+ rc = skcipher_walk_done(walk, nbytes - k);
+ }
+ if (k < n) {
+ if (!maysleep) {
+ if (locked)
+ mutex_unlock(&ctrblk_lock);
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc) {
+ if (locked)
+ mutex_unlock(&ctrblk_lock);
+ goto out;
+ }
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ }
+ if (locked)
+ mutex_unlock(&ctrblk_lock);
+
+ /* final block may be < AES_BLOCK_SIZE, copy only nbytes */
+ if (nbytes) {
+ memset(buf, 0, AES_BLOCK_SIZE);
+ memcpy(buf, walk->src.virt.addr, nbytes);
+ while (1) {
+ if (cpacf_kmctr(ctx->fc, param, buf,
+ buf, AES_BLOCK_SIZE,
+ walk->iv) == AES_BLOCK_SIZE)
+ break;
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ memcpy(walk->dst.virt.addr, buf, nbytes);
+ crypto_inc(walk->iv, AES_BLOCK_SIZE);
+ rc = skcipher_walk_done(walk, 0);
+ }
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int ctr_paes_crypt(struct skcipher_request *req)
+{
+ struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
+
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
+
+ rc = skcipher_walk_virt(walk, req, false);
+ if (rc)
+ goto out;
+
+ req_ctx->param_init_done = false;
+
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = ctr_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int ctr_paes_init(struct crypto_skcipher *tfm)
+{
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ memset(ctx, 0, sizeof(*ctx));
+ spin_lock_init(&ctx->pk_lock);
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pctr_req_ctx));
+
+ return 0;
+}
+
+static void ctr_paes_exit(struct crypto_skcipher *tfm)
+{
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ memzero_explicit(ctx, sizeof(*ctx));
+}
+
+static int ctr_paes_do_one_request(struct crypto_engine *engine, void *areq)
+{
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
- return ctx->fc ? 0 : -EINVAL;
+ /* walk has already been prepared */
+
+ rc = ctr_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
+ }
+
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
}
-static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int in_keylen)
+static struct skcipher_engine_alg ctr_paes_alg = {
+ .base = {
+ .base.cra_name = "ctr(paes)",
+ .base.cra_driver_name = "ctr-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.base.cra_list),
+ .init = ctr_paes_init,
+ .exit = ctr_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_paes_setkey,
+ .encrypt = ctr_paes_crypt,
+ .decrypt = ctr_paes_crypt,
+ .chunksize = AES_BLOCK_SIZE,
+ },
+ .op = {
+ .do_one_request = ctr_paes_do_one_request,
+ },
+};
+
+/*
+ * PAES XTS implementation
+ */
+
+struct xts_full_km_param {
+ u8 key[64];
+ u8 tweak[16];
+ u8 nap[16];
+ u8 wkvp[32];
+} __packed;
+
+struct xts_km_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+ u8 init[16];
+} __packed;
+
+struct xts_pcc_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+ u8 tweak[16];
+ u8 block[16];
+ u8 bit[16];
+ u8 xts[16];
+} __packed;
+
+struct s390_pxts_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ union {
+ struct xts_full_km_param full_km_param;
+ struct xts_km_param km_param;
+ } param;
+};
+
+static int xts_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int in_keylen)
{
struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 ckey[2 * AES_MAX_KEY_SIZE];
unsigned int ckey_len;
+ long fc;
int rc;
if ((in_keylen == 32 || in_keylen == 64) &&
xts_verify_key(tfm, in_key, in_keylen))
return -EINVAL;
- _free_kb_keybuf(&ctx->kb);
- rc = _xts_key_to_kb(&ctx->kb, in_key, in_keylen);
+ /* set raw key into context */
+ rc = pxts_ctx_setkey(ctx, in_key, in_keylen);
if (rc)
- return rc;
+ goto out;
- rc = __xts_paes_set_key(ctx);
+ /* convert raw key(s) into protected key(s) */
+ rc = pxts_convert_key(ctx);
if (rc)
- return rc;
+ goto out;
/*
- * It is not possible on a single protected key (e.g. full AES-XTS) to
- * check, if k1 and k2 are the same.
- */
- if (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128 ||
- ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_256)
- return 0;
- /*
* xts_verify_key verifies the key length is not odd and makes
* sure that the two keys are not the same. This can be done
- * on the two protected keys as well
+ * on the two protected keys as well - but not for full xts keys.
*/
- ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ?
- AES_KEYSIZE_128 : AES_KEYSIZE_256;
- memcpy(ckey, ctx->pk[0].protkey, ckey_len);
- memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
- return xts_verify_key(tfm, ckey, 2*ckey_len);
+ if (ctx->pk[0].type == PKEY_KEYTYPE_AES_128 ||
+ ctx->pk[0].type == PKEY_KEYTYPE_AES_256) {
+ ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ?
+ AES_KEYSIZE_128 : AES_KEYSIZE_256;
+ memcpy(ckey, ctx->pk[0].protkey, ckey_len);
+ memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
+ rc = xts_verify_key(tfm, ckey, 2 * ckey_len);
+ memzero_explicit(ckey, sizeof(ckey));
+ if (rc)
+ goto out;
+ }
+
+ /* Pick the correct function code based on the protected key type */
+ switch (ctx->pk[0].type) {
+ case PKEY_KEYTYPE_AES_128:
+ fc = CPACF_KM_PXTS_128;
+ break;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KM_PXTS_256;
+ break;
+ case PKEY_KEYTYPE_AES_XTS_128:
+ fc = CPACF_KM_PXTS_128_FULL;
+ break;
+ case PKEY_KEYTYPE_AES_XTS_256:
+ fc = CPACF_KM_PXTS_256_FULL;
+ break;
+ default:
+ fc = 0;
+ break;
+ }
+ ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int paes_xts_crypt_full(struct skcipher_request *req,
- unsigned long modifier)
+static int xts_paes_do_crypt_fullkey(struct s390_pxts_ctx *ctx,
+ struct s390_pxts_req_ctx *req_ctx,
+ bool maysleep)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct xts_full_km_param *param = &req_ctx->param.full_km_param;
+ struct skcipher_walk *walk = &req_ctx->walk;
unsigned int keylen, offset, nbytes, n, k;
- struct {
- u8 key[64];
- u8 tweak[16];
- u8 nap[16];
- u8 wkvp[32];
- } fxts_param = {
- .nap = {0},
- };
- struct skcipher_walk walk;
- int rc;
+ int rc = 0;
- rc = skcipher_walk_virt(&walk, req, false);
- if (rc)
- return rc;
+ /*
+ * The calling function xts_paes_do_crypt() ensures the
+ * protected key state is always PK_STATE_VALID when this
+ * function is invoked.
+ */
keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 64;
offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 0;
- spin_lock_bh(&ctx->pk_lock);
- memcpy(fxts_param.key + offset, ctx->pk[0].protkey, keylen);
- memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen,
- sizeof(fxts_param.wkvp));
- spin_unlock_bh(&ctx->pk_lock);
- memcpy(fxts_param.tweak, walk.iv, sizeof(fxts_param.tweak));
- fxts_param.nap[0] = 0x01; /* initial alpha power (1, little-endian) */
+ if (!req_ctx->param_init_done) {
+ memset(param, 0, sizeof(*param));
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+ memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp));
+ spin_unlock_bh(&ctx->pk_lock);
+ memcpy(param->tweak, walk->iv, sizeof(param->tweak));
+ param->nap[0] = 0x01; /* initial alpha power (1, little-endian) */
+ req_ctx->param_init_done = true;
+ }
- while ((nbytes = walk.nbytes) != 0) {
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_km(ctx->fc | modifier, fxts_param.key + offset,
- walk.dst.virt.addr, walk.src.virt.addr, n);
+ k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
if (k)
- rc = skcipher_walk_done(&walk, nbytes - k);
+ rc = skcipher_walk_done(walk, nbytes - k);
if (k < n) {
- if (__xts_paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = pxts_convert_key(ctx);
+ if (rc)
+ goto out;
spin_lock_bh(&ctx->pk_lock);
- memcpy(fxts_param.key + offset, ctx->pk[0].protkey,
- keylen);
- memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen,
- sizeof(fxts_param.wkvp));
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+ memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp));
spin_unlock_bh(&ctx->pk_lock);
}
}
+out:
+ pr_debug("rc=%d\n", rc);
return rc;
}
-static int paes_xts_crypt(struct skcipher_request *req, unsigned long modifier)
+static inline int __xts_2keys_prep_param(struct s390_pxts_ctx *ctx,
+ struct xts_km_param *param,
+ struct skcipher_walk *walk,
+ unsigned int keylen,
+ unsigned int offset, bool maysleep)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct xts_pcc_param pcc_param;
+ unsigned long cc = 1;
+ int rc = 0;
+
+ while (cc) {
+ memset(&pcc_param, 0, sizeof(pcc_param));
+ memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+ spin_unlock_bh(&ctx->pk_lock);
+ cc = cpacf_pcc(ctx->fc, pcc_param.key + offset);
+ if (cc) {
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ break;
+ }
+ rc = pxts_convert_key(ctx);
+ if (rc)
+ break;
+ continue;
+ }
+ memcpy(param->init, pcc_param.xts, 16);
+ }
+
+ memzero_explicit(pcc_param.key, sizeof(pcc_param.key));
+ return rc;
+}
+
+static int xts_paes_do_crypt_2keys(struct s390_pxts_ctx *ctx,
+ struct s390_pxts_req_ctx *req_ctx,
+ bool maysleep)
+{
+ struct xts_km_param *param = &req_ctx->param.km_param;
+ struct skcipher_walk *walk = &req_ctx->walk;
unsigned int keylen, offset, nbytes, n, k;
- struct {
- u8 key[PAES_256_PROTKEY_SIZE];
- u8 tweak[16];
- u8 block[16];
- u8 bit[16];
- u8 xts[16];
- } pcc_param;
- struct {
- u8 key[PAES_256_PROTKEY_SIZE];
- u8 init[16];
- } xts_param;
- struct skcipher_walk walk;
- int rc;
+ int rc = 0;
- rc = skcipher_walk_virt(&walk, req, false);
- if (rc)
- return rc;
+ /*
+ * The calling function xts_paes_do_crypt() ensures the
+ * protected key state is always PK_STATE_VALID when this
+ * function is invoked.
+ */
keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
- memset(&pcc_param, 0, sizeof(pcc_param));
- memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
- spin_lock_bh(&ctx->pk_lock);
- memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
- memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
- spin_unlock_bh(&ctx->pk_lock);
- cpacf_pcc(ctx->fc, pcc_param.key + offset);
- memcpy(xts_param.init, pcc_param.xts, 16);
+ if (!req_ctx->param_init_done) {
+ rc = __xts_2keys_prep_param(ctx, param, walk,
+ keylen, offset, maysleep);
+ if (rc)
+ goto out;
+ req_ctx->param_init_done = true;
+ }
- while ((nbytes = walk.nbytes) != 0) {
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
- walk.dst.virt.addr, walk.src.virt.addr, n);
+ k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
if (k)
- rc = skcipher_walk_done(&walk, nbytes - k);
+ rc = skcipher_walk_done(walk, nbytes - k);
if (k < n) {
- if (__xts_paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = pxts_convert_key(ctx);
+ if (rc)
+ goto out;
spin_lock_bh(&ctx->pk_lock);
- memcpy(xts_param.key + offset,
- ctx->pk[0].protkey, keylen);
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
spin_unlock_bh(&ctx->pk_lock);
}
}
+out:
+ pr_debug("rc=%d\n", rc);
return rc;
}
-static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+static int xts_paes_do_crypt(struct s390_pxts_ctx *ctx,
+ struct s390_pxts_req_ctx *req_ctx,
+ bool maysleep)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int pk_state, rc = 0;
+
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ if (rc)
+ goto out;
+ /* Call the 'real' crypt function based on the xts prot key type. */
switch (ctx->fc) {
case CPACF_KM_PXTS_128:
case CPACF_KM_PXTS_256:
- return paes_xts_crypt(req, modifier);
+ rc = xts_paes_do_crypt_2keys(ctx, req_ctx, maysleep);
+ break;
case CPACF_KM_PXTS_128_FULL:
case CPACF_KM_PXTS_256_FULL:
- return paes_xts_crypt_full(req, modifier);
+ rc = xts_paes_do_crypt_fullkey(ctx, req_ctx, maysleep);
+ break;
default:
- return -EINVAL;
+ rc = -EINVAL;
}
-}
-static int xts_paes_encrypt(struct skcipher_request *req)
-{
- return xts_paes_crypt(req, 0);
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int xts_paes_decrypt(struct skcipher_request *req)
+static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- return xts_paes_crypt(req, CPACF_DECRYPT);
-}
+ struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
-static struct skcipher_alg xts_paes_alg = {
- .base.cra_name = "xts(paes)",
- .base.cra_driver_name = "xts-paes-s390",
- .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct s390_pxts_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.cra_list),
- .init = xts_paes_init,
- .exit = xts_paes_exit,
- .min_keysize = 2 * PAES_MIN_KEYSIZE,
- .max_keysize = 2 * PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = xts_paes_set_key,
- .encrypt = xts_paes_encrypt,
- .decrypt = xts_paes_decrypt,
-};
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
-static int ctr_paes_init(struct crypto_skcipher *tfm)
-{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ rc = skcipher_walk_virt(walk, req, false);
+ if (rc)
+ goto out;
- ctx->kb.key = NULL;
- spin_lock_init(&ctx->pk_lock);
+ req_ctx->modifier = modifier;
+ req_ctx->param_init_done = false;
- return 0;
-}
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = xts_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
-static void ctr_paes_exit(struct crypto_skcipher *tfm)
-{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
- _free_kb_keybuf(&ctx->kb);
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
+static int xts_paes_encrypt(struct skcipher_request *req)
{
- unsigned long fc;
- int rc;
-
- rc = __paes_convert_key(ctx);
- if (rc)
- return rc;
-
- /* Pick the correct function code based on the protected key type */
- fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMCTR_PAES_192 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_256) ?
- CPACF_KMCTR_PAES_256 : 0;
-
- /* Check if the function code is available */
- ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+ return xts_paes_crypt(req, 0);
+}
- return ctx->fc ? 0 : -EINVAL;
+static int xts_paes_decrypt(struct skcipher_request *req)
+{
+ return xts_paes_crypt(req, CPACF_DECRYPT);
}
-static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+static int xts_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- int rc;
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
- _free_kb_keybuf(&ctx->kb);
- rc = _key_to_kb(&ctx->kb, in_key, key_len);
- if (rc)
- return rc;
+ memset(ctx, 0, sizeof(*ctx));
+ spin_lock_init(&ctx->pk_lock);
- return __ctr_paes_set_key(ctx);
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pxts_req_ctx));
+
+ return 0;
}
-static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+static void xts_paes_exit(struct crypto_skcipher *tfm)
{
- unsigned int i, n;
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
- /* only use complete blocks, max. PAGE_SIZE */
- memcpy(ctrptr, iv, AES_BLOCK_SIZE);
- n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
- for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
- memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
- crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
- ctrptr += AES_BLOCK_SIZE;
- }
- return n;
+ memzero_explicit(ctx, sizeof(*ctx));
}
-static int ctr_paes_crypt(struct skcipher_request *req)
+static int xts_paes_do_one_request(struct crypto_engine *engine, void *areq)
{
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- u8 buf[AES_BLOCK_SIZE], *ctrptr;
- struct {
- u8 key[PAES_256_PROTKEY_SIZE];
- } param;
- struct skcipher_walk walk;
- unsigned int nbytes, n, k;
- int rc, locked;
-
- rc = skcipher_walk_virt(&walk, req, false);
- if (rc)
- return rc;
-
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
-
- locked = mutex_trylock(&ctrblk_lock);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
- while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
- n = AES_BLOCK_SIZE;
- if (nbytes >= 2*AES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk.iv, nbytes);
- ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
- k = cpacf_kmctr(ctx->fc, &param, walk.dst.virt.addr,
- walk.src.virt.addr, n, ctrptr);
- if (k) {
- if (ctrptr == ctrblk)
- memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE,
- AES_BLOCK_SIZE);
- crypto_inc(walk.iv, AES_BLOCK_SIZE);
- rc = skcipher_walk_done(&walk, nbytes - k);
- }
- if (k < n) {
- if (__paes_convert_key(ctx)) {
- if (locked)
- mutex_unlock(&ctrblk_lock);
- return skcipher_walk_done(&walk, -EIO);
- }
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
- }
- }
- if (locked)
- mutex_unlock(&ctrblk_lock);
- /*
- * final block may be < AES_BLOCK_SIZE, copy only nbytes
- */
- if (nbytes) {
- memset(buf, 0, AES_BLOCK_SIZE);
- memcpy(buf, walk.src.virt.addr, nbytes);
- while (1) {
- if (cpacf_kmctr(ctx->fc, &param, buf,
- buf, AES_BLOCK_SIZE,
- walk.iv) == AES_BLOCK_SIZE)
- break;
- if (__paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
- }
- memcpy(walk.dst.virt.addr, buf, nbytes);
- crypto_inc(walk.iv, AES_BLOCK_SIZE);
- rc = skcipher_walk_done(&walk, nbytes);
+ /* walk has already been prepared */
+
+ rc = xts_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
}
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
return rc;
}
-static struct skcipher_alg ctr_paes_alg = {
- .base.cra_name = "ctr(paes)",
- .base.cra_driver_name = "ctr-paes-s390",
- .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.cra_list),
- .init = ctr_paes_init,
- .exit = ctr_paes_exit,
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ctr_paes_set_key,
- .encrypt = ctr_paes_crypt,
- .decrypt = ctr_paes_crypt,
- .chunksize = AES_BLOCK_SIZE,
+static struct skcipher_engine_alg xts_paes_alg = {
+ .base = {
+ .base.cra_name = "xts(paes)",
+ .base.cra_driver_name = "xts-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_pxts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.base.cra_list),
+ .init = xts_paes_init,
+ .exit = xts_paes_exit,
+ .min_keysize = 2 * PAES_MIN_KEYSIZE,
+ .max_keysize = 2 * PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_paes_setkey,
+ .encrypt = xts_paes_encrypt,
+ .decrypt = xts_paes_decrypt,
+ },
+ .op = {
+ .do_one_request = xts_paes_do_one_request,
+ },
};
-static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg)
+/*
+ * alg register, unregister, module init, exit
+ */
+
+static struct miscdevice paes_dev = {
+ .name = "paes",
+ .minor = MISC_DYNAMIC_MINOR,
+};
+
+static inline void __crypto_unregister_skcipher(struct skcipher_engine_alg *alg)
{
- if (!list_empty(&alg->base.cra_list))
- crypto_unregister_skcipher(alg);
+ if (!list_empty(&alg->base.base.cra_list))
+ crypto_engine_unregister_skcipher(alg);
}
static void paes_s390_fini(void)
{
+ if (paes_crypto_engine) {
+ crypto_engine_stop(paes_crypto_engine);
+ crypto_engine_exit(paes_crypto_engine);
+ }
__crypto_unregister_skcipher(&ctr_paes_alg);
__crypto_unregister_skcipher(&xts_paes_alg);
__crypto_unregister_skcipher(&cbc_paes_alg);
__crypto_unregister_skcipher(&ecb_paes_alg);
if (ctrblk)
- free_page((unsigned long) ctrblk);
+ free_page((unsigned long)ctrblk);
+ misc_deregister(&paes_dev);
}
static int __init paes_s390_init(void)
{
int rc;
+ /* register a simple paes pseudo misc device */
+ rc = misc_register(&paes_dev);
+ if (rc)
+ return rc;
+
+ /* with this pseudo devie alloc and start a crypto engine */
+ paes_crypto_engine =
+ crypto_engine_alloc_init_and_set(paes_dev.this_device,
+ true, NULL, false, MAX_QLEN);
+ if (!paes_crypto_engine) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ rc = crypto_engine_start(paes_crypto_engine);
+ if (rc) {
+ crypto_engine_exit(paes_crypto_engine);
+ paes_crypto_engine = NULL;
+ goto out_err;
+ }
+
/* Query available functions for KM, KMC and KMCTR */
cpacf_query(CPACF_KM, &km_functions);
cpacf_query(CPACF_KMC, &kmc_functions);
@@ -927,40 +1653,45 @@ static int __init paes_s390_init(void)
if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
- rc = crypto_register_skcipher(&ecb_paes_alg);
+ rc = crypto_engine_register_skcipher(&ecb_paes_alg);
if (rc)
goto out_err;
+ pr_debug("%s registered\n", ecb_paes_alg.base.base.cra_driver_name);
}
if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
- rc = crypto_register_skcipher(&cbc_paes_alg);
+ rc = crypto_engine_register_skcipher(&cbc_paes_alg);
if (rc)
goto out_err;
+ pr_debug("%s registered\n", cbc_paes_alg.base.base.cra_driver_name);
}
if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
- rc = crypto_register_skcipher(&xts_paes_alg);
+ rc = crypto_engine_register_skcipher(&xts_paes_alg);
if (rc)
goto out_err;
+ pr_debug("%s registered\n", xts_paes_alg.base.base.cra_driver_name);
}
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
- ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+ ctrblk = (u8 *)__get_free_page(GFP_KERNEL);
if (!ctrblk) {
rc = -ENOMEM;
goto out_err;
}
- rc = crypto_register_skcipher(&ctr_paes_alg);
+ rc = crypto_engine_register_skcipher(&ctr_paes_alg);
if (rc)
goto out_err;
+ pr_debug("%s registered\n", ctr_paes_alg.base.base.cra_driver_name);
}
return 0;
+
out_err:
paes_s390_fini();
return rc;
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
index 2bb22db54c31..d757ccbce2b4 100644
--- a/arch/s390/crypto/sha.h
+++ b/arch/s390/crypto/sha.h
@@ -10,27 +10,33 @@
#ifndef _CRYPTO_ARCH_S390_SHA_H
#define _CRYPTO_ARCH_S390_SHA_H
-#include <linux/crypto.h>
-#include <crypto/sha1.h>
#include <crypto/sha2.h>
#include <crypto/sha3.h>
+#include <linux/types.h>
/* must be big enough for the largest SHA variant */
-#define SHA3_STATE_SIZE 200
#define CPACF_MAX_PARMBLOCK_SIZE SHA3_STATE_SIZE
#define SHA_MAX_BLOCK_SIZE SHA3_224_BLOCK_SIZE
+#define S390_SHA_CTX_SIZE sizeof(struct s390_sha_ctx)
struct s390_sha_ctx {
u64 count; /* message length in bytes */
- u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)];
- u8 buf[SHA_MAX_BLOCK_SIZE];
+ union {
+ u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)];
+ struct {
+ u64 state[SHA512_DIGEST_SIZE / sizeof(u64)];
+ u64 count_hi;
+ } sha512;
+ };
int func; /* KIMD function to use */
- int first_message_part;
+ bool first_message_part;
};
struct shash_desc;
-int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len);
-int s390_sha_final(struct shash_desc *desc, u8 *out);
+int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data,
+ unsigned int len);
+int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
+ u8 *out);
#endif
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index bc3a22704e09..d229cbd2ba22 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -18,12 +18,12 @@
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
*/
+#include <asm/cpacf.h>
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
#include <crypto/sha1.h>
-#include <asm/cpacf.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include "sha.h"
@@ -49,7 +49,6 @@ static int s390_sha1_export(struct shash_desc *desc, void *out)
octx->count = sctx->count;
memcpy(octx->state, sctx->state, sizeof(octx->state));
- memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer));
return 0;
}
@@ -60,7 +59,6 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in)
sctx->count = ictx->count;
memcpy(sctx->state, ictx->state, sizeof(ictx->state));
- memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer));
sctx->func = CPACF_KIMD_SHA_1;
return 0;
}
@@ -68,16 +66,18 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in)
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = s390_sha1_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
+ .update = s390_sha_update_blocks,
+ .finup = s390_sha_finup,
.export = s390_sha1_export,
.import = s390_sha1_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha1_state),
+ .descsize = S390_SHA_CTX_SIZE,
+ .statesize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
deleted file mode 100644
index 6f1ccdf93d3e..000000000000
--- a/arch/s390/crypto/sha256_s390.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm.
- *
- * s390 Version:
- * Copyright IBM Corp. 2005, 2011
- * Author(s): Jan Glauber (jang@de.ibm.com)
- */
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <crypto/sha2.h>
-#include <asm/cpacf.h>
-
-#include "sha.h"
-
-static int s390_sha256_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA256_H0;
- sctx->state[1] = SHA256_H1;
- sctx->state[2] = SHA256_H2;
- sctx->state[3] = SHA256_H3;
- sctx->state[4] = SHA256_H4;
- sctx->state[5] = SHA256_H5;
- sctx->state[6] = SHA256_H6;
- sctx->state[7] = SHA256_H7;
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA_256;
-
- return 0;
-}
-
-static int sha256_export(struct shash_desc *desc, void *out)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- struct sha256_state *octx = out;
-
- octx->count = sctx->count;
- memcpy(octx->state, sctx->state, sizeof(octx->state));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
- return 0;
-}
-
-static int sha256_import(struct shash_desc *desc, const void *in)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha256_state *ictx = in;
-
- sctx->count = ictx->count;
- memcpy(sctx->state, ictx->state, sizeof(ictx->state));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->func = CPACF_KIMD_SHA_256;
- return 0;
-}
-
-static struct shash_alg sha256_alg = {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = s390_sha256_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha256_export,
- .import = sha256_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name= "sha256-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int s390_sha224_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA224_H0;
- sctx->state[1] = SHA224_H1;
- sctx->state[2] = SHA224_H2;
- sctx->state[3] = SHA224_H3;
- sctx->state[4] = SHA224_H4;
- sctx->state[5] = SHA224_H5;
- sctx->state[6] = SHA224_H6;
- sctx->state[7] = SHA224_H7;
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA_256;
-
- return 0;
-}
-
-static struct shash_alg sha224_alg = {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = s390_sha224_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha256_export,
- .import = sha256_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name= "sha224-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int __init sha256_s390_init(void)
-{
- int ret;
-
- if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
- return -ENODEV;
- ret = crypto_register_shash(&sha256_alg);
- if (ret < 0)
- goto out;
- ret = crypto_register_shash(&sha224_alg);
- if (ret < 0)
- crypto_unregister_shash(&sha256_alg);
-out:
- return ret;
-}
-
-static void __exit sha256_s390_fini(void)
-{
- crypto_unregister_shash(&sha224_alg);
- crypto_unregister_shash(&sha256_alg);
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha256_s390_init);
-module_exit(sha256_s390_fini);
-
-MODULE_ALIAS_CRYPTO("sha256");
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c
index a84ef692f572..4a7731ac6bcd 100644
--- a/arch/s390/crypto/sha3_256_s390.c
+++ b/arch/s390/crypto/sha3_256_s390.c
@@ -8,12 +8,14 @@
* Copyright IBM Corp. 2019
* Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com)
*/
+#include <asm/cpacf.h>
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
#include <crypto/sha3.h>
-#include <asm/cpacf.h>
+#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
#include "sha.h"
@@ -21,11 +23,11 @@ static int sha3_256_init(struct shash_desc *desc)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- if (!test_facility(86)) /* msa 12 */
+ sctx->first_message_part = test_facility(86);
+ if (!sctx->first_message_part)
memset(sctx->state, 0, sizeof(sctx->state));
sctx->count = 0;
sctx->func = CPACF_KIMD_SHA3_256;
- sctx->first_message_part = 1;
return 0;
}
@@ -35,11 +37,11 @@ static int sha3_256_export(struct shash_desc *desc, void *out)
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
struct sha3_state *octx = out;
- octx->rsiz = sctx->count;
+ if (sctx->first_message_part) {
+ memset(sctx->state, 0, sizeof(sctx->state));
+ sctx->first_message_part = 0;
+ }
memcpy(octx->st, sctx->state, sizeof(octx->st));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
- octx->partial = sctx->first_message_part;
-
return 0;
}
@@ -48,10 +50,9 @@ static int sha3_256_import(struct shash_desc *desc, const void *in)
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
const struct sha3_state *ictx = in;
- sctx->count = ictx->rsiz;
+ sctx->count = 0;
memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->first_message_part = ictx->partial;
+ sctx->first_message_part = 0;
sctx->func = CPACF_KIMD_SHA3_256;
return 0;
@@ -60,30 +61,26 @@ static int sha3_256_import(struct shash_desc *desc, const void *in)
static int sha3_224_import(struct shash_desc *desc, const void *in)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha3_state *ictx = in;
- sctx->count = ictx->rsiz;
- memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->first_message_part = ictx->partial;
+ sha3_256_import(desc, in);
sctx->func = CPACF_KIMD_SHA3_224;
-
return 0;
}
static struct shash_alg sha3_256_alg = {
.digestsize = SHA3_256_DIGEST_SIZE, /* = 32 */
.init = sha3_256_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
+ .update = s390_sha_update_blocks,
+ .finup = s390_sha_finup,
.export = sha3_256_export,
.import = sha3_256_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
+ .descsize = S390_SHA_CTX_SIZE,
+ .statesize = SHA3_STATE_SIZE,
.base = {
.cra_name = "sha3-256",
.cra_driver_name = "sha3-256-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA3_256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -93,28 +90,25 @@ static int sha3_224_init(struct shash_desc *desc)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- if (!test_facility(86)) /* msa 12 */
- memset(sctx->state, 0, sizeof(sctx->state));
- sctx->count = 0;
+ sha3_256_init(desc);
sctx->func = CPACF_KIMD_SHA3_224;
- sctx->first_message_part = 1;
-
return 0;
}
static struct shash_alg sha3_224_alg = {
.digestsize = SHA3_224_DIGEST_SIZE,
.init = sha3_224_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
+ .update = s390_sha_update_blocks,
+ .finup = s390_sha_finup,
.export = sha3_256_export, /* same as for 256 */
.import = sha3_224_import, /* function code different! */
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
+ .descsize = S390_SHA_CTX_SIZE,
+ .statesize = SHA3_STATE_SIZE,
.base = {
.cra_name = "sha3-224",
.cra_driver_name = "sha3-224-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA3_224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c
index 07528fc98ff7..018f02fff444 100644
--- a/arch/s390/crypto/sha3_512_s390.c
+++ b/arch/s390/crypto/sha3_512_s390.c
@@ -7,12 +7,14 @@
* Copyright IBM Corp. 2019
* Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com)
*/
+#include <asm/cpacf.h>
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
#include <crypto/sha3.h>
-#include <asm/cpacf.h>
+#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
#include "sha.h"
@@ -20,11 +22,11 @@ static int sha3_512_init(struct shash_desc *desc)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- if (!test_facility(86)) /* msa 12 */
+ sctx->first_message_part = test_facility(86);
+ if (!sctx->first_message_part)
memset(sctx->state, 0, sizeof(sctx->state));
sctx->count = 0;
sctx->func = CPACF_KIMD_SHA3_512;
- sctx->first_message_part = 1;
return 0;
}
@@ -34,13 +36,12 @@ static int sha3_512_export(struct shash_desc *desc, void *out)
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
struct sha3_state *octx = out;
- octx->rsiz = sctx->count;
- octx->rsizw = sctx->count >> 32;
+ if (sctx->first_message_part) {
+ memset(sctx->state, 0, sizeof(sctx->state));
+ sctx->first_message_part = 0;
+ }
memcpy(octx->st, sctx->state, sizeof(octx->st));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
- octx->partial = sctx->first_message_part;
-
return 0;
}
@@ -49,13 +50,9 @@ static int sha3_512_import(struct shash_desc *desc, const void *in)
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
const struct sha3_state *ictx = in;
- if (unlikely(ictx->rsizw))
- return -ERANGE;
- sctx->count = ictx->rsiz;
-
+ sctx->count = 0;
memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->first_message_part = ictx->partial;
+ sctx->first_message_part = 0;
sctx->func = CPACF_KIMD_SHA3_512;
return 0;
@@ -64,33 +61,26 @@ static int sha3_512_import(struct shash_desc *desc, const void *in)
static int sha3_384_import(struct shash_desc *desc, const void *in)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha3_state *ictx = in;
- if (unlikely(ictx->rsizw))
- return -ERANGE;
- sctx->count = ictx->rsiz;
-
- memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->first_message_part = ictx->partial;
+ sha3_512_import(desc, in);
sctx->func = CPACF_KIMD_SHA3_384;
-
return 0;
}
static struct shash_alg sha3_512_alg = {
.digestsize = SHA3_512_DIGEST_SIZE,
.init = sha3_512_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
+ .update = s390_sha_update_blocks,
+ .finup = s390_sha_finup,
.export = sha3_512_export,
.import = sha3_512_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
+ .descsize = S390_SHA_CTX_SIZE,
+ .statesize = SHA3_STATE_SIZE,
.base = {
.cra_name = "sha3-512",
.cra_driver_name = "sha3-512-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA3_512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -102,28 +92,25 @@ static int sha3_384_init(struct shash_desc *desc)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- if (!test_facility(86)) /* msa 12 */
- memset(sctx->state, 0, sizeof(sctx->state));
- sctx->count = 0;
+ sha3_512_init(desc);
sctx->func = CPACF_KIMD_SHA3_384;
- sctx->first_message_part = 1;
-
return 0;
}
static struct shash_alg sha3_384_alg = {
.digestsize = SHA3_384_DIGEST_SIZE,
.init = sha3_384_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
+ .update = s390_sha_update_blocks,
+ .finup = s390_sha_finup,
.export = sha3_512_export, /* same as for 512 */
.import = sha3_384_import, /* function code different! */
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
+ .descsize = S390_SHA_CTX_SIZE,
+ .statesize = SHA3_STATE_SIZE,
.base = {
.cra_name = "sha3-384",
.cra_driver_name = "sha3-384-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA3_384_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_sha_ctx),
.cra_module = THIS_MODULE,
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 04f11c407763..33711a29618c 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -7,14 +7,13 @@
* Copyright IBM Corp. 2007
* Author(s): Jan Glauber (jang@de.ibm.com)
*/
+#include <asm/cpacf.h>
#include <crypto/internal/hash.h>
#include <crypto/sha2.h>
+#include <linux/cpufeature.h>
#include <linux/errno.h>
-#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <asm/cpacf.h>
#include "sha.h"
@@ -22,15 +21,16 @@ static int sha512_init(struct shash_desc *desc)
{
struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
- *(__u64 *)&ctx->state[0] = SHA512_H0;
- *(__u64 *)&ctx->state[2] = SHA512_H1;
- *(__u64 *)&ctx->state[4] = SHA512_H2;
- *(__u64 *)&ctx->state[6] = SHA512_H3;
- *(__u64 *)&ctx->state[8] = SHA512_H4;
- *(__u64 *)&ctx->state[10] = SHA512_H5;
- *(__u64 *)&ctx->state[12] = SHA512_H6;
- *(__u64 *)&ctx->state[14] = SHA512_H7;
+ ctx->sha512.state[0] = SHA512_H0;
+ ctx->sha512.state[1] = SHA512_H1;
+ ctx->sha512.state[2] = SHA512_H2;
+ ctx->sha512.state[3] = SHA512_H3;
+ ctx->sha512.state[4] = SHA512_H4;
+ ctx->sha512.state[5] = SHA512_H5;
+ ctx->sha512.state[6] = SHA512_H6;
+ ctx->sha512.state[7] = SHA512_H7;
ctx->count = 0;
+ ctx->sha512.count_hi = 0;
ctx->func = CPACF_KIMD_SHA_512;
return 0;
@@ -42,9 +42,8 @@ static int sha512_export(struct shash_desc *desc, void *out)
struct sha512_state *octx = out;
octx->count[0] = sctx->count;
- octx->count[1] = 0;
+ octx->count[1] = sctx->sha512.count_hi;
memcpy(octx->state, sctx->state, sizeof(octx->state));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
return 0;
}
@@ -53,12 +52,10 @@ static int sha512_import(struct shash_desc *desc, const void *in)
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
const struct sha512_state *ictx = in;
- if (unlikely(ictx->count[1]))
- return -ERANGE;
sctx->count = ictx->count[0];
+ sctx->sha512.count_hi = ictx->count[1];
memcpy(sctx->state, ictx->state, sizeof(ictx->state));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
sctx->func = CPACF_KIMD_SHA_512;
return 0;
}
@@ -66,16 +63,18 @@ static int sha512_import(struct shash_desc *desc, const void *in)
static struct shash_alg sha512_alg = {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
+ .update = s390_sha_update_blocks,
+ .finup = s390_sha_finup,
.export = sha512_export,
.import = sha512_import,
.descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha512_state),
+ .statesize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name= "sha512-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -87,15 +86,16 @@ static int sha384_init(struct shash_desc *desc)
{
struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
- *(__u64 *)&ctx->state[0] = SHA384_H0;
- *(__u64 *)&ctx->state[2] = SHA384_H1;
- *(__u64 *)&ctx->state[4] = SHA384_H2;
- *(__u64 *)&ctx->state[6] = SHA384_H3;
- *(__u64 *)&ctx->state[8] = SHA384_H4;
- *(__u64 *)&ctx->state[10] = SHA384_H5;
- *(__u64 *)&ctx->state[12] = SHA384_H6;
- *(__u64 *)&ctx->state[14] = SHA384_H7;
+ ctx->sha512.state[0] = SHA384_H0;
+ ctx->sha512.state[1] = SHA384_H1;
+ ctx->sha512.state[2] = SHA384_H2;
+ ctx->sha512.state[3] = SHA384_H3;
+ ctx->sha512.state[4] = SHA384_H4;
+ ctx->sha512.state[5] = SHA384_H5;
+ ctx->sha512.state[6] = SHA384_H6;
+ ctx->sha512.state[7] = SHA384_H7;
ctx->count = 0;
+ ctx->sha512.count_hi = 0;
ctx->func = CPACF_KIMD_SHA_512;
return 0;
@@ -104,17 +104,19 @@ static int sha384_init(struct shash_desc *desc)
static struct shash_alg sha384_alg = {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
+ .update = s390_sha_update_blocks,
+ .finup = s390_sha_finup,
.export = sha512_export,
.import = sha512_import,
.descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha512_state),
+ .statesize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name= "sha384-s390",
.cra_priority = 300,
.cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_ctxsize = sizeof(struct s390_sha_ctx),
.cra_module = THIS_MODULE,
}
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index 961d7d522af1..b5e2c365ea05 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -13,50 +13,33 @@
#include <asm/cpacf.h>
#include "sha.h"
-int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
+int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
unsigned int bsize = crypto_shash_blocksize(desc->tfm);
- unsigned int index, n;
+ struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int n;
int fc;
- /* how much is already in the buffer? */
- index = ctx->count % bsize;
- ctx->count += len;
-
- if ((index + len) < bsize)
- goto store;
-
fc = ctx->func;
if (ctx->first_message_part)
- fc |= test_facility(86) ? CPACF_KIMD_NIP : 0;
-
- /* process one stored block */
- if (index) {
- memcpy(ctx->buf + index, data, bsize - index);
- cpacf_kimd(fc, ctx->state, ctx->buf, bsize);
- ctx->first_message_part = 0;
- fc &= ~CPACF_KIMD_NIP;
- data += bsize - index;
- len -= bsize - index;
- index = 0;
- }
+ fc |= CPACF_KIMD_NIP;
/* process as many blocks as possible */
- if (len >= bsize) {
- n = (len / bsize) * bsize;
- cpacf_kimd(fc, ctx->state, data, n);
- ctx->first_message_part = 0;
- data += n;
- len -= n;
+ n = (len / bsize) * bsize;
+ ctx->count += n;
+ switch (ctx->func) {
+ case CPACF_KLMD_SHA_512:
+ case CPACF_KLMD_SHA3_384:
+ if (ctx->count < n)
+ ctx->sha512.count_hi++;
+ break;
}
-store:
- if (len)
- memcpy(ctx->buf + index , data, len);
-
- return 0;
+ cpacf_kimd(fc, ctx->state, data, n);
+ ctx->first_message_part = 0;
+ return len - n;
}
-EXPORT_SYMBOL_GPL(s390_sha_update);
+EXPORT_SYMBOL_GPL(s390_sha_update_blocks);
static int s390_crypto_shash_parmsize(int func)
{
@@ -77,15 +60,15 @@ static int s390_crypto_shash_parmsize(int func)
}
}
-int s390_sha_final(struct shash_desc *desc, u8 *out)
+int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
+ u8 *out)
{
struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
- unsigned int bsize = crypto_shash_blocksize(desc->tfm);
- u64 bits;
- unsigned int n;
int mbl_offset, fc;
+ u64 bits;
+
+ ctx->count += len;
- n = ctx->count % bsize;
bits = ctx->count * 8;
mbl_offset = s390_crypto_shash_parmsize(ctx->func);
if (mbl_offset < 0)
@@ -95,17 +78,16 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
/* set total msg bit length (mbl) in CPACF parmblock */
switch (ctx->func) {
- case CPACF_KLMD_SHA_1:
- case CPACF_KLMD_SHA_256:
- memcpy(ctx->state + mbl_offset, &bits, sizeof(bits));
- break;
case CPACF_KLMD_SHA_512:
- /*
- * the SHA512 parmblock has a 128-bit mbl field, clear
- * high-order u64 field, copy bits to low-order u64 field
- */
- memset(ctx->state + mbl_offset, 0x00, sizeof(bits));
+ /* The SHA512 parmblock has a 128-bit mbl field. */
+ if (ctx->count < len)
+ ctx->sha512.count_hi++;
+ ctx->sha512.count_hi <<= 3;
+ ctx->sha512.count_hi |= ctx->count >> 61;
mbl_offset += sizeof(u64) / sizeof(u32);
+ fallthrough;
+ case CPACF_KLMD_SHA_1:
+ case CPACF_KLMD_SHA_256:
memcpy(ctx->state + mbl_offset, &bits, sizeof(bits));
break;
case CPACF_KLMD_SHA3_224:
@@ -121,16 +103,14 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
fc |= test_facility(86) ? CPACF_KLMD_DUFOP : 0;
if (ctx->first_message_part)
fc |= CPACF_KLMD_NIP;
- cpacf_klmd(fc, ctx->state, ctx->buf, n);
+ cpacf_klmd(fc, ctx->state, src, len);
/* copy digest to out */
memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm));
- /* wipe context */
- memset(ctx, 0, sizeof *ctx);
return 0;
}
-EXPORT_SYMBOL_GPL(s390_sha_final);
+EXPORT_SYMBOL_GPL(s390_sha_finup);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("s390 SHA cipher common functions");
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 04ea1c03a5ff..96409573c75d 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -342,7 +342,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
struct inode *inode;
inode_lock(d_inode(parent));
- dentry = lookup_one_len(name, parent, strlen(name));
+ dentry = lookup_noperm(&QSTR(name), parent);
if (IS_ERR(dentry)) {
dentry = ERR_PTR(-ENOMEM);
goto fail;
diff --git a/arch/s390/include/asm/asce.h b/arch/s390/include/asm/asce.h
new file mode 100644
index 000000000000..f6dfaaba735a
--- /dev/null
+++ b/arch/s390/include/asm/asce.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_ASCE_H
+#define _ASM_S390_ASCE_H
+
+#include <linux/thread_info.h>
+#include <linux/irqflags.h>
+#include <asm/lowcore.h>
+#include <asm/ctlreg.h>
+
+static inline bool enable_sacf_uaccess(void)
+{
+ unsigned long flags;
+
+ if (test_thread_flag(TIF_ASCE_PRIMARY))
+ return true;
+ local_irq_save(flags);
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ set_thread_flag(TIF_ASCE_PRIMARY);
+ local_irq_restore(flags);
+ return false;
+}
+
+static inline void disable_sacf_uaccess(bool previous)
+{
+ unsigned long flags;
+
+ if (previous)
+ return;
+ local_irq_save(flags);
+ local_ctl_load(1, &get_lowcore()->user_asce);
+ clear_thread_flag(TIF_ASCE_PRIMARY);
+ local_irq_restore(flags);
+}
+
+#endif /* _ASM_S390_ASCE_H */
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index 59ab1192e2d5..54cb97603ec0 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -649,18 +649,30 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
* instruction
* @func: the function code passed to PCC; see CPACF_KM_xxx defines
* @param: address of parameter block; see POP for details on each func
+ *
+ * Returns the condition code, this is
+ * 0 - cc code 0 (normal completion)
+ * 1 - cc code 1 (protected key wkvp mismatch or src operand out of range)
+ * 2 - cc code 2 (something invalid, scalar multiply infinity, ...)
+ * Condition code 3 (partial completion) is handled within the asm code
+ * and never returned.
*/
-static inline void cpacf_pcc(unsigned long func, void *param)
+static inline int cpacf_pcc(unsigned long func, void *param)
{
+ int cc;
+
asm volatile(
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,0,0\n" /* PCC opcode */
" brc 1,0b\n" /* handle partial completion */
- :
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_PCC)
- : "cc", "memory", "0", "1");
+ : CC_CLOBBER_LIST("memory", "0", "1"));
+
+ return CC_TRANSFORM(cc);
}
/**
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
index e08169bd63a5..6c6a99660e78 100644
--- a/arch/s390/include/asm/cpufeature.h
+++ b/arch/s390/include/asm/cpufeature.h
@@ -15,6 +15,7 @@ enum {
S390_CPU_FEATURE_MSA,
S390_CPU_FEATURE_VXRS,
S390_CPU_FEATURE_UV,
+ S390_CPU_FEATURE_D288,
MAX_CPU_FEATURES
};
diff --git a/arch/s390/include/asm/diag288.h b/arch/s390/include/asm/diag288.h
new file mode 100644
index 000000000000..5e1b43cea9d6
--- /dev/null
+++ b/arch/s390/include/asm/diag288.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_DIAG288_H
+#define _ASM_S390_DIAG288_H
+
+#include <asm/asm-extable.h>
+#include <asm/types.h>
+
+#define MIN_INTERVAL 15 /* Minimal time supported by diag288 */
+#define MAX_INTERVAL 3600 /* One hour should be enough - pure estimation */
+
+#define WDT_DEFAULT_TIMEOUT 30
+
+/* Function codes - init, change, cancel */
+#define WDT_FUNC_INIT 0
+#define WDT_FUNC_CHANGE 1
+#define WDT_FUNC_CANCEL 2
+#define WDT_FUNC_CONCEAL 0x80000000
+
+/* Action codes for LPAR watchdog */
+#define LPARWDT_RESTART 0
+
+static inline int __diag288(unsigned int func, unsigned int timeout,
+ unsigned long action, unsigned int len)
+{
+ union register_pair r1 = { .even = func, .odd = timeout, };
+ union register_pair r3 = { .even = action, .odd = len, };
+ int rc = -EINVAL;
+
+ asm volatile(
+ " diag %[r1],%[r3],0x288\n"
+ "0: lhi %[rc],0\n"
+ "1:"
+ EX_TABLE(0b, 1b)
+ : [rc] "+d" (rc)
+ : [r1] "d" (r1.pair), [r3] "d" (r3.pair)
+ : "cc", "memory");
+ return rc;
+}
+
+#endif /* _ASM_S390_DIAG288_H */
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index f5781794356b..942f21c39697 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -13,9 +13,11 @@
static uaccess_kmsan_or_inline int \
__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \
{ \
+ bool sacf_flag; \
int rc, new; \
\
instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \
+ sacf_flag = enable_sacf_uaccess(); \
asm_inline volatile( \
" sacf 256\n" \
"0: l %[old],%[uaddr]\n" \
@@ -32,6 +34,7 @@ __futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \
[new] "=&d" (new), [uaddr] "+Q" (*uaddr) \
: [oparg] "d" (oparg) \
: "cc"); \
+ disable_sacf_uaccess(sacf_flag); \
if (!rc) \
instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \
return rc; \
@@ -75,9 +78,11 @@ int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
static uaccess_kmsan_or_inline
int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval)
{
+ bool sacf_flag;
int rc;
instrument_copy_from_user_before(uval, uaddr, sizeof(*uval));
+ sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" sacf 256\n"
"0: cs %[old],%[new],%[uaddr]\n"
@@ -88,6 +93,7 @@ int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32
: [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr)
: [new] "d" (newval)
: "cc", "memory");
+ disable_sacf_uaccess(sacf_flag);
*uval = oldval;
instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0);
return rc;
diff --git a/arch/s390/include/asm/machine.h b/arch/s390/include/asm/machine.h
index 54478caa5237..8abe5afdbfc4 100644
--- a/arch/s390/include/asm/machine.h
+++ b/arch/s390/include/asm/machine.h
@@ -18,6 +18,7 @@
#define MFEATURE_VM 7
#define MFEATURE_KVM 8
#define MFEATURE_LPAR 9
+#define MFEATURE_DIAG288 10
#ifndef __ASSEMBLY__
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 88f84beebb9e..d9b8501bc93d 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
#include <linux/mm_types.h>
#include <asm/tlbflush.h>
#include <asm/ctlreg.h>
+#include <asm/asce.h>
#include <asm-generic/mm_hooks.h>
#define init_new_context init_new_context
@@ -77,7 +78,8 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *
else
get_lowcore()->user_asce.val = next->context.asce;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
- /* Clear previous user-ASCE from CR7 */
+ /* Clear previous user-ASCE from CR1 and CR7 */
+ local_ctl_load(1, &s390_invalid_asce);
local_ctl_load(7, &s390_invalid_asce);
if (prev != next)
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
@@ -99,6 +101,7 @@ static inline void finish_arch_post_lock_switch(void)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
+ unsigned long flags;
if (mm) {
preempt_disable();
@@ -108,15 +111,25 @@ static inline void finish_arch_post_lock_switch(void)
__tlb_flush_mm_lazy(mm);
preempt_enable();
}
+ local_irq_save(flags);
+ if (test_thread_flag(TIF_ASCE_PRIMARY))
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ else
+ local_ctl_load(1, &get_lowcore()->user_asce);
local_ctl_load(7, &get_lowcore()->user_asce);
+ local_irq_restore(flags);
}
#define activate_mm activate_mm
static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
- switch_mm(prev, next, current);
+ switch_mm_irqs_off(prev, next, current);
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ if (test_thread_flag(TIF_ASCE_PRIMARY))
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ else
+ local_ctl_load(1, &get_lowcore()->user_asce);
local_ctl_load(7, &get_lowcore()->user_asce);
}
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
index 5dca1a46a9f6..b7b59faf16f4 100644
--- a/arch/s390/include/asm/pkey.h
+++ b/arch/s390/include/asm/pkey.h
@@ -20,9 +20,22 @@
* @param key pointer to a buffer containing the key blob
* @param keylen size of the key blob in bytes
* @param protkey pointer to buffer receiving the protected key
+ * @param xflags additional execution flags (see PKEY_XFLAG_* definitions below)
+ * As of now the only supported flag is PKEY_XFLAG_NOMEMALLOC.
* @return 0 on success, negative errno value on failure
*/
int pkey_key2protkey(const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype);
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 xflags);
+
+/*
+ * If this flag is given in the xflags parameter, the pkey implementation
+ * is not allowed to allocate memory but instead should fall back to use
+ * preallocated memory or simple fail with -ENOMEM.
+ * This flag is for protected key derive within a cipher or similar
+ * which must not allocate memory which would cause io operations - see
+ * also the CRYPTO_ALG_ALLOCATES_MEMORY flag in crypto.h.
+ */
+#define PKEY_XFLAG_NOMEMALLOC 0x0001
#endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index c66f3fc6daaf..62c0ab4a4b9d 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -9,6 +9,7 @@
#include <linux/bits.h>
#include <uapi/asm/ptrace.h>
+#include <asm/thread_info.h>
#include <asm/tpi.h>
#define PIF_SYSCALL 0 /* inside a system call */
@@ -126,7 +127,6 @@ struct pt_regs {
struct tpi_info tpi_info;
};
unsigned long flags;
- unsigned long cr1;
unsigned long last_break;
};
@@ -229,8 +229,44 @@ static inline void instruction_pointer_set(struct pt_regs *regs,
int regs_query_register_offset(const char *name);
const char *regs_query_register_name(unsigned int offset);
-unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
-unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
+
+static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+ return regs->gprs[15];
+}
+
+static __always_inline unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+ if (offset >= NUM_GPRS)
+ return 0;
+ return regs->gprs[offset];
+}
+
+static __always_inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+ unsigned long ksp = kernel_stack_pointer(regs);
+
+ return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:pt_regs which contains kernel stack pointer.
+ * @n:stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long addr;
+
+ addr = kernel_stack_pointer(regs) + n * sizeof(long);
+ if (!regs_within_kernel_stack(regs, addr))
+ return 0;
+ return READ_ONCE_NOCHECK(addr);
+}
/**
* regs_get_kernel_argument() - get Nth function argument in kernel
@@ -251,11 +287,6 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
return regs_get_kernel_stack_nth(regs, argoffset + n);
}
-static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
-{
- return regs->gprs[15];
-}
-
static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
{
regs->gprs[2] = rc;
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index 2ab868cbae6c..f8f68f4ef255 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -26,11 +26,9 @@ void *memmove(void *dest, const void *src, size_t n);
#define __HAVE_ARCH_MEMSCAN /* inline & arch function */
#define __HAVE_ARCH_STRCAT /* inline & arch function */
#define __HAVE_ARCH_STRCMP /* arch function */
-#define __HAVE_ARCH_STRCPY /* inline & arch function */
#define __HAVE_ARCH_STRLCAT /* arch function */
#define __HAVE_ARCH_STRLEN /* inline & arch function */
#define __HAVE_ARCH_STRNCAT /* arch function */
-#define __HAVE_ARCH_STRNCPY /* arch function */
#define __HAVE_ARCH_STRNLEN /* inline & arch function */
#define __HAVE_ARCH_STRSTR /* arch function */
#define __HAVE_ARCH_MEMSET16 /* arch function */
@@ -42,7 +40,6 @@ int memcmp(const void *s1, const void *s2, size_t n);
int strcmp(const char *s1, const char *s2);
size_t strlcat(char *dest, const char *src, size_t n);
char *strncat(char *dest, const char *src, size_t n);
-char *strncpy(char *dest, const char *src, size_t n);
char *strstr(const char *s1, const char *s2);
#endif /* !defined(CONFIG_KASAN) && !defined(CONFIG_KMSAN) */
@@ -155,22 +152,6 @@ static inline char *strcat(char *dst, const char *src)
}
#endif
-#ifdef __HAVE_ARCH_STRCPY
-static inline char *strcpy(char *dst, const char *src)
-{
- char *ret = dst;
-
- asm volatile(
- " lghi 0,0\n"
- "0: mvst %[dst],%[src]\n"
- " jo 0b"
- : [dst] "+&a" (dst), [src] "+&a" (src)
- :
- : "cc", "memory", "0");
- return ret;
-}
-#endif
-
#if defined(__HAVE_ARCH_STRLEN) || (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__))
static inline size_t __no_sanitize_prefix_strfunc(strlen)(const char *s)
{
@@ -208,7 +189,6 @@ static inline size_t strnlen(const char * s, size_t n)
void *memchr(const void * s, int c, size_t n);
void *memscan(void *s, int c, size_t n);
char *strcat(char *dst, const char *src);
-char *strcpy(char *dst, const char *src);
size_t strlen(const char *s);
size_t strnlen(const char * s, size_t n);
#endif /* !IN_ARCH_STRING_C */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 91f569cae1ce..391eb04d26d8 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -9,6 +9,7 @@
#define _ASM_THREAD_INFO_H
#include <linux/bits.h>
+#include <vdso/page.h>
/*
* General size of kernel stacks
@@ -24,8 +25,6 @@
#define STACK_INIT_OFFSET (THREAD_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
#ifndef __ASSEMBLY__
-#include <asm/lowcore.h>
-#include <asm/page.h>
/*
* low level task data that entry.S needs immediate access to
@@ -64,6 +63,7 @@ void arch_setup_new_exec(void);
#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */
#define TIF_UPROBE 4 /* breakpointed or single-stepping */
#define TIF_PATCH_PENDING 5 /* pending live patching update */
+#define TIF_ASCE_PRIMARY 6 /* primary asce is kernel asce */
#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */
#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
@@ -85,6 +85,7 @@ void arch_setup_new_exec(void);
#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY)
#define _TIF_UPROBE BIT(TIF_UPROBE)
#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
+#define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY)
#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE)
#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 8629d70ec38b..a43fc88c0050 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -19,6 +19,7 @@
#include <asm/extable.h>
#include <asm/facility.h>
#include <asm-generic/access_ok.h>
+#include <asm/asce.h>
#include <linux/instrumented.h>
void debug_user_asce(int exit);
@@ -478,6 +479,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
__uint128_t old, __uint128_t new,
unsigned long key, int size)
{
+ bool sacf_flag;
int rc = 0;
switch (size) {
@@ -490,6 +492,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
_old = ((unsigned int)old & 0xff) << shift;
_new = ((unsigned int)new & 0xff) << shift;
mask = ~(0xff << shift);
+ sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
@@ -524,6 +527,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[default_key] "J" (PAGE_DEFAULT_KEY),
[max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
: "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
*(unsigned char *)uval = prev >> shift;
if (!count)
rc = -EAGAIN;
@@ -538,6 +542,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
_old = ((unsigned int)old & 0xffff) << shift;
_new = ((unsigned int)new & 0xffff) << shift;
mask = ~(0xffff << shift);
+ sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
@@ -572,6 +577,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[default_key] "J" (PAGE_DEFAULT_KEY),
[max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
: "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
*(unsigned short *)uval = prev >> shift;
if (!count)
rc = -EAGAIN;
@@ -580,6 +586,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
case 4: {
unsigned int prev = old;
+ sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
@@ -595,12 +602,14 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
*(unsigned int *)uval = prev;
return rc;
}
case 8: {
unsigned long prev = old;
+ sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
@@ -616,12 +625,14 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
*(unsigned long *)uval = prev;
return rc;
}
case 16: {
__uint128_t prev = old;
+ sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
@@ -637,6 +648,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
*(__uint128_t *)uval = prev;
return rc;
}
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 46fb0ef6f984..b008402ec9aa 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -616,8 +616,9 @@ static inline int uv_remove_shared(unsigned long addr)
return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
}
-int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN],
- struct uv_secret_list_item_hdr *secret);
+int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret);
int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size);
extern int prot_virt_host;
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 841e05f7fa7e..95ecad9c7d7d 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -50,7 +50,6 @@ int main(void)
OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
OFFSET(__PT_INT_CODE, pt_regs, int_code);
OFFSET(__PT_FLAGS, pt_regs, flags);
- OFFSET(__PT_CR1, pt_regs, cr1);
OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
index 03f3a1e52430..c217a5e64094 100644
--- a/arch/s390/kernel/cert_store.c
+++ b/arch/s390/kernel/cert_store.c
@@ -138,7 +138,7 @@ static void cert_store_key_describe(const struct key *key, struct seq_file *m)
* First 64 bytes of the key description is key name in EBCDIC CP 500.
* Convert it to ASCII for displaying in /proc/keys.
*/
- strscpy(ascii, key->description, sizeof(ascii));
+ strscpy(ascii, key->description);
EBCASC_500(ascii, VC_NAME_LEN_BYTES);
seq_puts(m, ascii);
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
index 1b2ae42a0c15..76210f001028 100644
--- a/arch/s390/kernel/cpufeature.c
+++ b/arch/s390/kernel/cpufeature.c
@@ -5,11 +5,13 @@
#include <linux/cpufeature.h>
#include <linux/bug.h>
+#include <asm/machine.h>
#include <asm/elf.h>
enum {
TYPE_HWCAP,
TYPE_FACILITY,
+ TYPE_MACHINE,
};
struct s390_cpu_feature {
@@ -21,6 +23,7 @@ static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
[S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
[S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
[S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158},
+ [S390_CPU_FEATURE_D288] = {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288},
};
/*
@@ -38,6 +41,8 @@ int cpu_have_feature(unsigned int num)
return !!(elf_hwcap & BIT(feature->num));
case TYPE_FACILITY:
return test_facility(feature->num);
+ case TYPE_MACHINE:
+ return test_machine_feature(feature->num);
default:
WARN_ON_ONCE(1);
return 0;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 4a981266b483..adb164223f8c 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -354,7 +354,7 @@ static void *nt_prpsinfo(void *ptr)
memset(&prpsinfo, 0, sizeof(prpsinfo));
prpsinfo.pr_sname = 'R';
- strcpy(prpsinfo.pr_fname, "vmlinux");
+ strscpy(prpsinfo.pr_fname, "vmlinux");
return nt_init(ptr, PRPSINFO, prpsinfo);
}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index ce038e9205f7..2a41be2f7925 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -251,7 +251,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
rc->level = level;
rc->buf_size = buf_size;
rc->entry_size = sizeof(debug_entry_t) + buf_size;
- strscpy(rc->name, name, sizeof(rc->name));
+ strscpy(rc->name, name);
memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
refcount_set(&(rc->ref_count), 0);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 9980c17ba22d..0f00f4b06d51 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -116,7 +116,7 @@ _LPP_OFFSET = __LC_LPP
.macro SIEEXIT sie_control,lowcore
lg %r9,\sie_control # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce
+ lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce
lg %r9,__LC_CURRENT(\lowcore)
mvi __TI_sie(%r9),0
larl %r9,sie_exit # skip forward to sie_exit
@@ -208,7 +208,7 @@ SYM_FUNC_START(__sie64a)
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
GET_LC %r14
- lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce
+ lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce
lg %r14,__LC_CURRENT(%r14)
mvi __TI_sie(%r14),0
SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
@@ -240,7 +240,6 @@ SYM_CODE_START(system_call)
lghi %r14,0
.Lsysc_per:
STBEAR __LC_LAST_BREAK(%r13)
- lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
lg %r15,__LC_KERNEL_STACK(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
@@ -261,7 +260,6 @@ SYM_CODE_START(system_call)
lgr %r3,%r14
brasl %r14,__do_syscall
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE(%r13)
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
@@ -278,7 +276,6 @@ SYM_CODE_START(ret_from_fork)
brasl %r14,__ret_from_fork
STACKLEAK_ERASE
GET_LC %r13
- lctlg %c1,%c1,__LC_USER_ASCE(%r13)
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
@@ -299,10 +296,7 @@ SYM_CODE_START(pgm_check_handler)
lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
xgr %r10,%r10
tmhh %r8,0x0001 # coming from user space?
- jno .Lpgm_skip_asce
- lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
- j 3f # -> fault in user space
-.Lpgm_skip_asce:
+ jo 3f # -> fault in user space
#if IS_ENABLED(CONFIG_KVM)
lg %r11,__LC_CURRENT(%r13)
tm __TI_sie(%r11),0xff
@@ -340,7 +334,6 @@ SYM_CODE_START(pgm_check_handler)
tmhh %r8,0x0001 # returning to user space?
jno .Lpgm_exit_kernel
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
stpt __LC_EXIT_TIMER(%r13)
.Lpgm_exit_kernel:
@@ -384,8 +377,7 @@ SYM_CODE_START(\name)
#endif
0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
-1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
- lg %r15,__LC_KERNEL_STACK(%r13)
+1: lg %r15,__LC_KERNEL_STACK(%r13)
2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@@ -408,7 +400,6 @@ SYM_CODE_START(\name)
tmhh %r8,0x0001 # returning to user ?
jno 2f
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
stpt __LC_EXIT_TIMER(%r13)
2: LBEAR __PT_LAST_BREAK(%r11)
@@ -476,8 +467,6 @@ SYM_CODE_START(mcck_int_handler)
.Lmcck_user:
lg %r15,__LC_MCCK_STACK(%r13)
la %r11,STACK_FRAME_OVERHEAD(%r15)
- stctg %c1,%c1,__PT_CR1(%r11)
- lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
mvc __PT_R0(128,%r11),0(%r14)
@@ -495,7 +484,6 @@ SYM_CODE_START(mcck_int_handler)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,s390_do_machine_check
- lctlg %c1,%c1,__PT_CR1(%r11)
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 3b9d9ccfad63..ff15f91affde 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -270,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
{ \
if (len >= sizeof(_value)) \
return -E2BIG; \
- len = strscpy(_value, buf, sizeof(_value)); \
+ len = strscpy(_value, buf); \
if ((ssize_t)len < 0) \
return len; \
strim(_value); \
@@ -2249,26 +2249,28 @@ static int __init s390_ipl_init(void)
__initcall(s390_ipl_init);
-static void __init strncpy_skip_quote(char *dst, char *src, int n)
+static void __init strscpy_skip_quote(char *dst, char *src, int n)
{
int sx, dx;
- dx = 0;
- for (sx = 0; src[sx] != 0; sx++) {
+ if (!n)
+ return;
+ for (sx = 0, dx = 0; src[sx]; sx++) {
if (src[sx] == '"')
continue;
- dst[dx++] = src[sx];
- if (dx >= n)
+ dst[dx] = src[sx];
+ if (dx + 1 == n)
break;
+ dx++;
}
+ dst[dx] = '\0';
}
static int __init vmcmd_on_reboot_setup(char *str)
{
if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_reboot, str, VMCMD_MAX_SIZE);
- vmcmd_on_reboot[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot));
on_reboot_trigger.action = &vmcmd_action;
return 1;
}
@@ -2278,8 +2280,7 @@ static int __init vmcmd_on_panic_setup(char *str)
{
if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_panic, str, VMCMD_MAX_SIZE);
- vmcmd_on_panic[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic));
on_panic_trigger.action = &vmcmd_action;
return 1;
}
@@ -2289,8 +2290,7 @@ static int __init vmcmd_on_halt_setup(char *str)
{
if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_halt, str, VMCMD_MAX_SIZE);
- vmcmd_on_halt[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt));
on_halt_trigger.action = &vmcmd_action;
return 1;
}
@@ -2300,8 +2300,7 @@ static int __init vmcmd_on_poff_setup(char *str)
{
if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_poff, str, VMCMD_MAX_SIZE);
- vmcmd_on_poff[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff));
on_poff_trigger.action = &vmcmd_action;
return 1;
}
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index e657fad7e376..6a262e198e35 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -980,8 +980,6 @@ static int cfdiag_push_sample(struct perf_event *event,
}
overflow = perf_event_overflow(event, &data, &regs);
- if (overflow)
- event->pmu->stop(event, 0);
perf_event_update_userpage(event);
return overflow;
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 690a293eb10d..7ace1f9e4ccf 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -290,8 +290,8 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
-CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
-CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index ad22799d8a7d..91469401f2c9 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1072,10 +1072,7 @@ static int perf_push_sample(struct perf_event *event,
overflow = 0;
if (perf_event_exclude(event, &regs, sde_regs))
goto out;
- if (perf_event_overflow(event, &data, &regs)) {
- overflow = 1;
- event->pmu->stop(event, 0);
- }
+ overflow = perf_event_overflow(event, &data, &regs);
perf_event_update_userpage(event);
out:
return overflow;
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 80b1f7a29f11..11f70c1e2797 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -268,35 +268,35 @@ static int __init setup_elf_platform(void)
add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
default: /* Use "z10" as default. */
- strcpy(elf_platform, "z10");
+ strscpy(elf_platform, "z10");
break;
case 0x2817:
case 0x2818:
- strcpy(elf_platform, "z196");
+ strscpy(elf_platform, "z196");
break;
case 0x2827:
case 0x2828:
- strcpy(elf_platform, "zEC12");
+ strscpy(elf_platform, "zEC12");
break;
case 0x2964:
case 0x2965:
- strcpy(elf_platform, "z13");
+ strscpy(elf_platform, "z13");
break;
case 0x3906:
case 0x3907:
- strcpy(elf_platform, "z14");
+ strscpy(elf_platform, "z14");
break;
case 0x8561:
case 0x8562:
- strcpy(elf_platform, "z15");
+ strscpy(elf_platform, "z15");
break;
case 0x3931:
case 0x3932:
- strcpy(elf_platform, "z16");
+ strscpy(elf_platform, "z16");
break;
case 0x9175:
case 0x9176:
- strcpy(elf_platform, "z17");
+ strscpy(elf_platform, "z17");
break;
}
return 0;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 34b8d9e745df..e1240f6b29fa 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -1524,13 +1524,6 @@ static const char *gpr_names[NUM_GPRS] = {
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
};
-unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
-{
- if (offset >= NUM_GPRS)
- return 0;
- return regs->gprs[offset];
-}
-
int regs_query_register_offset(const char *name)
{
unsigned long offset;
@@ -1550,29 +1543,3 @@ const char *regs_query_register_name(unsigned int offset)
return NULL;
return gpr_names[offset];
}
-
-static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
-{
- unsigned long ksp = kernel_stack_pointer(regs);
-
- return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
-}
-
-/**
- * regs_get_kernel_stack_nth() - get Nth entry of the stack
- * @regs:pt_regs which contains kernel stack pointer.
- * @n:stack entry number.
- *
- * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
- * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
- * this returns 0.
- */
-unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
-{
- unsigned long addr;
-
- addr = kernel_stack_pointer(regs) + n * sizeof(long);
- if (!regs_within_kernel_stack(regs, addr))
- return 0;
- return READ_ONCE_NOCHECK(addr);
-}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 63f41dfaba85..81f12bb77f62 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -263,7 +263,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
abs_lc = get_abs_lowcore();
memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
put_abs_lowcore(abs_lc);
- lc->cregs_save_area[1] = lc->kernel_asce;
+ lc->cregs_save_area[1] = lc->user_asce;
lc->cregs_save_area[7] = lc->user_asce;
save_access_regs((unsigned int *) lc->access_regs_save_area);
arch_spin_lock_setup(cpu);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 9a5d5be8acf4..4ab0b6b4866e 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -782,7 +782,12 @@ out_kobj:
device_initcall(uv_sysfs_init);
/*
- * Find the secret with the secret_id in the provided list.
+ * Locate a secret in the list by its id.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Search for a secret with the given secret_id in the Ultravisor secret store.
*
* Context: might sleep.
*/
@@ -803,12 +808,15 @@ static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN],
/*
* Do the actual search for `uv_get_secret_metadata`.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
*
* Context: might sleep.
*/
-static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
- struct uv_secret_list *list,
- struct uv_secret_list_item_hdr *secret)
+int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret)
{
u16 start_idx = 0;
u16 list_rc;
@@ -830,36 +838,7 @@ static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
return -ENOENT;
}
-
-/**
- * uv_get_secret_metadata() - get secret metadata for a given secret id.
- * @secret_id: search pattern.
- * @secret: output data, containing the secret's metadata.
- *
- * Search for a secret with the given secret_id in the Ultravisor secret store.
- *
- * Context: might sleep.
- *
- * Return:
- * * %0: - Found entry; secret->idx and secret->type are valid.
- * * %ENOENT - No entry found.
- * * %ENODEV: - Not supported: UV not available or command not available.
- * * %EIO: - Other unexpected UV error.
- */
-int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN],
- struct uv_secret_list_item_hdr *secret)
-{
- struct uv_secret_list *buf;
- int rc;
-
- buf = kzalloc(sizeof(*buf), GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- rc = find_secret(secret_id, buf, secret);
- kfree(buf);
- return rc;
-}
-EXPORT_SYMBOL_GPL(uv_get_secret_metadata);
+EXPORT_SYMBOL_GPL(uv_find_secret);
/**
* uv_retrieve_secret() - get the secret value for the secret index.
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 14bbfe50033c..cd35cdbfa871 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,6 +3,7 @@
# Makefile for s390-specific library files..
#
+obj-y += crypto/
lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o
lib-y += csum-partial.o
obj-y += mem.o xor.o
@@ -26,4 +27,4 @@ lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o
-crc32-s390-y := crc32-glue.o crc32le-vx.o crc32be-vx.o
+crc32-s390-y := crc32.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32.c
index 124214a27340..3c4b344417c1 100644
--- a/arch/s390/lib/crc32-glue.c
+++ b/arch/s390/lib/crc32.c
@@ -18,8 +18,6 @@
#define VX_ALIGNMENT 16L
#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
-static DEFINE_STATIC_KEY_FALSE(have_vxrs);
-
/*
* DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
*
@@ -34,8 +32,7 @@ static DEFINE_STATIC_KEY_FALSE(have_vxrs);
unsigned long prealign, aligned, remaining; \
DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \
\
- if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || \
- !static_branch_likely(&have_vxrs)) \
+ if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || !cpu_has_vx()) \
return ___crc32_sw(crc, data, datalen); \
\
if ((unsigned long)data & VX_ALIGN_MASK) { \
@@ -64,25 +61,13 @@ DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base)
-static int __init crc32_s390_init(void)
-{
- if (cpu_have_feature(S390_CPU_FEATURE_VXRS))
- static_branch_enable(&have_vxrs);
- return 0;
-}
-arch_initcall(crc32_s390_init);
-
-static void __exit crc32_s390_exit(void)
-{
-}
-module_exit(crc32_s390_exit);
-
u32 crc32_optimizations(void)
{
- if (static_key_enabled(&have_vxrs))
+ if (cpu_has_vx()) {
return CRC32_LE_OPTIMIZATION |
CRC32_BE_OPTIMIZATION |
CRC32C_OPTIMIZATION;
+ }
return 0;
}
EXPORT_SYMBOL(crc32_optimizations);
diff --git a/arch/s390/lib/crypto/Kconfig b/arch/s390/lib/crypto/Kconfig
new file mode 100644
index 000000000000..e3f855ef4393
--- /dev/null
+++ b/arch/s390/lib/crypto/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_CHACHA_S390
+ tristate
+ default CRYPTO_LIB_CHACHA
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_SHA256_S390
+ tristate
+ default CRYPTO_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256
+ select CRYPTO_LIB_SHA256_GENERIC
diff --git a/arch/s390/lib/crypto/Makefile b/arch/s390/lib/crypto/Makefile
new file mode 100644
index 000000000000..920197967f46
--- /dev/null
+++ b/arch/s390/lib/crypto/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
+chacha_s390-y := chacha-glue.o chacha-s390.o
+
+obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256.o
diff --git a/arch/s390/lib/crypto/chacha-glue.c b/arch/s390/lib/crypto/chacha-glue.c
new file mode 100644
index 000000000000..f95ba3483bbc
--- /dev/null
+++ b/arch/s390/lib/crypto/chacha-glue.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ChaCha stream cipher (s390 optimized)
+ *
+ * Copyright IBM Corp. 2021
+ */
+
+#define KMSG_COMPONENT "chacha_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <crypto/chacha.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <asm/fpu.h>
+#include "chacha-s390.h"
+
+void hchacha_block_arch(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds)
+{
+ /* TODO: implement hchacha_block_arch() in assembly */
+ hchacha_block_generic(state, out, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ /* s390 chacha20 implementation has 20 rounds hard-coded,
+ * it cannot handle a block of data or less, but otherwise
+ * it can handle data of arbitrary size
+ */
+ if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) {
+ chacha_crypt_generic(state, dst, src, bytes, nrounds);
+ } else {
+ DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
+
+ kernel_fpu_begin(&vxstate, KERNEL_VXR);
+ chacha20_vx(dst, src, bytes, &state->x[4], &state->x[12]);
+ kernel_fpu_end(&vxstate, KERNEL_VXR);
+
+ state->x[12] += round_up(bytes, CHACHA_BLOCK_SIZE) /
+ CHACHA_BLOCK_SIZE;
+ }
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+bool chacha_is_arch_optimized(void)
+{
+ return cpu_has_vx();
+}
+EXPORT_SYMBOL(chacha_is_arch_optimized);
+
+MODULE_DESCRIPTION("ChaCha stream cipher (s390 optimized)");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/lib/crypto/chacha-s390.S
index 63f3102678c0..63f3102678c0 100644
--- a/arch/s390/crypto/chacha-s390.S
+++ b/arch/s390/lib/crypto/chacha-s390.S
diff --git a/arch/s390/crypto/chacha-s390.h b/arch/s390/lib/crypto/chacha-s390.h
index 733744ce30f5..733744ce30f5 100644
--- a/arch/s390/crypto/chacha-s390.h
+++ b/arch/s390/lib/crypto/chacha-s390.h
diff --git a/arch/s390/lib/crypto/sha256.c b/arch/s390/lib/crypto/sha256.c
new file mode 100644
index 000000000000..7dfe120fafab
--- /dev/null
+++ b/arch/s390/lib/crypto/sha256.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256 optimized using the CP Assist for Cryptographic Functions (CPACF)
+ *
+ * Copyright 2025 Google LLC
+ */
+#include <asm/cpacf.h>
+#include <crypto/internal/sha2.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_cpacf_sha256);
+
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ if (static_branch_likely(&have_cpacf_sha256))
+ cpacf_kimd(CPACF_KIMD_SHA_256, state, data,
+ nblocks * SHA256_BLOCK_SIZE);
+ else
+ sha256_blocks_generic(state, data, nblocks);
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
+
+bool sha256_is_arch_optimized(void)
+{
+ return static_key_enabled(&have_cpacf_sha256);
+}
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
+
+static int __init sha256_s390_mod_init(void)
+{
+ if (cpu_have_feature(S390_CPU_FEATURE_MSA) &&
+ cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
+ static_branch_enable(&have_cpacf_sha256);
+ return 0;
+}
+subsys_initcall(sha256_s390_mod_init);
+
+static void __exit sha256_s390_mod_exit(void)
+{
+}
+module_exit(sha256_s390_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-256 using the CP Assist for Cryptographic Functions (CPACF)");
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index 373fa1f01937..099de76e8b1a 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -78,50 +78,6 @@ EXPORT_SYMBOL(strnlen);
#endif
/**
- * strcpy - Copy a %NUL terminated string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- *
- * returns a pointer to @dest
- */
-#ifdef __HAVE_ARCH_STRCPY
-char *strcpy(char *dest, const char *src)
-{
- char *ret = dest;
-
- asm volatile(
- " lghi 0,0\n"
- "0: mvst %[dest],%[src]\n"
- " jo 0b\n"
- : [dest] "+&a" (dest), [src] "+&a" (src)
- :
- : "cc", "memory", "0");
- return ret;
-}
-EXPORT_SYMBOL(strcpy);
-#endif
-
-/**
- * strncpy - Copy a length-limited, %NUL-terminated string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @n: The maximum number of bytes to copy
- *
- * The result is not %NUL-terminated if the source exceeds
- * @n bytes.
- */
-#ifdef __HAVE_ARCH_STRNCPY
-char *strncpy(char *dest, const char *src, size_t n)
-{
- size_t len = __strnend(src, n) - src;
- memset(dest + len, 0, n - len);
- memcpy(dest, src, len);
- return dest;
-}
-EXPORT_SYMBOL(strncpy);
-#endif
-
-/**
* strcat - Append one %NUL-terminated string to another
* @dest: The string to be appended to
* @src: The string to append to it
@@ -181,9 +137,6 @@ EXPORT_SYMBOL(strlcat);
* @n: The maximum numbers of bytes to copy
*
* returns a pointer to @dest
- *
- * Note that in contrast to strncpy, strncat ensures the result is
- * terminated.
*/
#ifdef __HAVE_ARCH_STRNCAT
char *strncat(char *dest, const char *src, size_t n)
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index cec20db88479..fa7d98fa1320 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -17,17 +17,18 @@
#ifdef CONFIG_DEBUG_ENTRY
void debug_user_asce(int exit)
{
+ struct lowcore *lc = get_lowcore();
struct ctlreg cr1, cr7;
local_ctl_store(1, &cr1);
local_ctl_store(7, &cr7);
- if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val)
+ if (cr1.val == lc->user_asce.val && cr7.val == lc->user_asce.val)
return;
panic("incorrect ASCE on kernel %s\n"
"cr1: %016lx cr7: %016lx\n"
"kernel: %016lx user: %016lx\n",
exit ? "exit" : "entry", cr1.val, cr7.val,
- get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val);
+ lc->kernel_asce.val, lc->user_asce.val);
}
#endif /*CONFIG_DEBUG_ENTRY */
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index a6b8b8ea9086..f7da53e212f5 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -530,6 +530,14 @@ segment_modify_shared (char *name, int do_nonshared)
return rc;
}
+static void __dcss_diag_purge_on_cpu_0(void *data)
+{
+ struct dcss_segment *seg = (struct dcss_segment *)data;
+ unsigned long dummy;
+
+ dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+}
+
/*
* Decrease the use count of a DCSS segment and remove
* it from the address space if nobody is using it
@@ -538,7 +546,6 @@ segment_modify_shared (char *name, int do_nonshared)
void
segment_unload(char *name)
{
- unsigned long dummy;
struct dcss_segment *seg;
if (!machine_is_vm())
@@ -556,7 +563,14 @@ segment_unload(char *name)
kfree(seg->res);
vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
list_del(&seg->list);
- dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+ /*
+ * Workaround for z/VM issue, where calling the DCSS unload diag on
+ * a non-IPL CPU would cause bogus sclp maximum memory detection on
+ * next IPL.
+ * IPL CPU 0 cannot be set offline, so the dcss_diag() call can
+ * directly be scheduled to that CPU.
+ */
+ smp_call_function_single(0, __dcss_diag_purge_on_cpu_0, seg, 1);
kfree(seg);
out_unlock:
mutex_unlock(&dcss_lock);
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index e3a6f8ae156c..d177bea0bd73 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -38,11 +38,15 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
static void __crst_table_upgrade(void *arg)
{
struct mm_struct *mm = arg;
+ struct ctlreg asce;
/* change all active ASCEs to avoid the creation of new TLBs */
if (current->active_mm == mm) {
- get_lowcore()->user_asce.val = mm->context.asce;
- local_ctl_load(7, &get_lowcore()->user_asce);
+ asce.val = mm->context.asce;
+ get_lowcore()->user_asce = asce;
+ local_ctl_load(7, &asce);
+ if (!test_thread_flag(TIF_ASCE_PRIMARY))
+ local_ctl_load(1, &asce);
}
__tlb_flush_local();
}
@@ -52,6 +56,8 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
unsigned long asce_limit = mm->context.asce_limit;
+ mmap_assert_write_locked(mm);
+
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
VM_BUG_ON(asce_limit < _REGION2_SIZE);
@@ -75,13 +81,6 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
spin_lock_bh(&mm->page_table_lock);
- /*
- * This routine gets called with mmap_lock lock held and there is
- * no reason to optimize for the case of otherwise. However, if
- * that would ever change, the below check will let us know.
- */
- VM_BUG_ON(asce_limit != mm->context.asce_limit);
-
if (p4d) {
__pgd = (unsigned long *) mm->pgd;
p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 5bbdc4190b8b..cd6676c2d602 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -45,6 +45,7 @@
/* list of all detected zpci devices */
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
+static DEFINE_MUTEX(zpci_add_remove_lock);
static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
static DEFINE_SPINLOCK(zpci_domain_lock);
@@ -70,6 +71,15 @@ EXPORT_SYMBOL_GPL(zpci_aipb);
struct airq_iv *zpci_aif_sbv;
EXPORT_SYMBOL_GPL(zpci_aif_sbv);
+void zpci_zdev_put(struct zpci_dev *zdev)
+{
+ if (!zdev)
+ return;
+ mutex_lock(&zpci_add_remove_lock);
+ kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock);
+ mutex_unlock(&zpci_add_remove_lock);
+}
+
struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
@@ -837,6 +847,7 @@ int zpci_add_device(struct zpci_dev *zdev)
{
int rc;
+ mutex_lock(&zpci_add_remove_lock);
zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state);
rc = zpci_init_iommu(zdev);
if (rc)
@@ -850,12 +861,14 @@ int zpci_add_device(struct zpci_dev *zdev)
spin_lock(&zpci_list_lock);
list_add_tail(&zdev->entry, &zpci_list);
spin_unlock(&zpci_list_lock);
+ mutex_unlock(&zpci_add_remove_lock);
return 0;
error_destroy_iommu:
zpci_destroy_iommu(zdev);
error:
zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc);
+ mutex_unlock(&zpci_add_remove_lock);
return rc;
}
@@ -925,21 +938,20 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
* @zdev: the zpci_dev that was reserved
*
* Handle the case that a given zPCI function was reserved by another system.
- * After a call to this function the zpci_dev can not be found via
- * get_zdev_by_fid() anymore but may still be accessible via existing
- * references though it will not be functional anymore.
*/
void zpci_device_reserved(struct zpci_dev *zdev)
{
- /*
- * Remove device from zpci_list as it is going away. This also
- * makes sure we ignore subsequent zPCI events for this device.
- */
- spin_lock(&zpci_list_lock);
- list_del(&zdev->entry);
- spin_unlock(&zpci_list_lock);
+ lockdep_assert_held(&zdev->state_lock);
+ /* We may declare the device reserved multiple times */
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ return;
zdev->state = ZPCI_FN_STATE_RESERVED;
zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+ /*
+ * The underlying device is gone. Allow the zdev to be freed
+ * as soon as all other references are gone by accounting for
+ * the removal as a dropped reference.
+ */
zpci_zdev_put(zdev);
}
@@ -947,13 +959,14 @@ void zpci_release_device(struct kref *kref)
{
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
+ lockdep_assert_held(&zpci_add_remove_lock);
WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED);
-
- if (zdev->zbus->bus)
- zpci_bus_remove_device(zdev, false);
-
- if (zdev_enabled(zdev))
- zpci_disable_device(zdev);
+ /*
+ * We already hold zpci_list_lock thanks to kref_put_lock().
+ * This makes sure no new reference can be taken from the list.
+ */
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
if (zdev->has_hp_slot)
zpci_exit_slot(zdev);
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index e86a9419d233..ae3d7a9159bd 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -21,11 +21,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev);
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
void zpci_release_device(struct kref *kref);
-static inline void zpci_zdev_put(struct zpci_dev *zdev)
-{
- if (zdev)
- kref_put(&zdev->kref, zpci_release_device);
-}
+
+void zpci_zdev_put(struct zpci_dev *zdev);
static inline void zpci_zdev_get(struct zpci_dev *zdev)
{
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 7bd7721c1239..2fbee3887d13 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -335,6 +335,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
zdev->state = ZPCI_FN_STATE_STANDBY;
}
+static void zpci_event_reappear(struct zpci_dev *zdev)
+{
+ lockdep_assert_held(&zdev->state_lock);
+ /*
+ * The zdev is in the reserved state. This means that it was presumed to
+ * go away but there are still undropped references. Now, the platform
+ * announced its availability again. Bring back the lingering zdev
+ * to standby. This is safe because we hold a temporary reference
+ * now so that it won't go away. Account for the re-appearance of the
+ * underlying device by incrementing the reference count.
+ */
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+ zpci_zdev_get(zdev);
+ zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh);
+}
+
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
@@ -358,8 +374,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
}
} else {
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ zpci_event_reappear(zdev);
/* the configuration request may be stale */
- if (zdev->state != ZPCI_FN_STATE_STANDBY)
+ else if (zdev->state != ZPCI_FN_STATE_STANDBY)
break;
zdev->state = ZPCI_FN_STATE_CONFIGURED;
}
@@ -375,6 +393,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
}
} else {
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ zpci_event_reappear(zdev);
zpci_update_fh(zdev, ccdf->fh);
}
break;
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 5fcc1a3b04bd..51e7a28af899 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -32,8 +32,10 @@ static inline int __pcistb_mio_inuser(
u64 len, u8 *status)
{
int cc, exception;
+ bool sacf_flag;
exception = 1;
+ sacf_flag = enable_sacf_uaccess();
asm_inline volatile (
" sacf 256\n"
"0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
@@ -44,6 +46,7 @@ static inline int __pcistb_mio_inuser(
: CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception)
: [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src))
: CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
*status = len >> 24 & 0xff;
return exception ? -ENXIO : CC_TRANSFORM(cc);
}
@@ -54,6 +57,7 @@ static inline int __pcistg_mio_inuser(
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
int cc, exception;
+ bool sacf_flag;
u64 val = 0;
u64 cnt = ulen;
u8 tmp;
@@ -64,6 +68,7 @@ static inline int __pcistg_mio_inuser(
* address space. pcistg then uses the user mappings.
*/
exception = 1;
+ sacf_flag = enable_sacf_uaccess();
asm_inline volatile (
" sacf 256\n"
"0: llgc %[tmp],0(%[src])\n"
@@ -81,6 +86,7 @@ static inline int __pcistg_mio_inuser(
CC_OUT(cc, cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
:
: CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
*status = ioaddr_len.odd >> 24 & 0xff;
cc = exception ? -ENXIO : CC_TRANSFORM(cc);
@@ -204,6 +210,7 @@ static inline int __pcilg_mio_inuser(
u64 ulen, u8 *status)
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
+ bool sacf_flag;
u64 cnt = ulen;
int shift = ulen * 8;
int cc, exception;
@@ -215,6 +222,7 @@ static inline int __pcilg_mio_inuser(
* user address @dst
*/
exception = 1;
+ sacf_flag = enable_sacf_uaccess();
asm_inline volatile (
" sacf 256\n"
"0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
@@ -236,10 +244,10 @@ static inline int __pcilg_mio_inuser(
: [ioaddr_len] "+&d" (ioaddr_len.pair), [exc] "+d" (exception),
CC_OUT(cc, cc), [val] "=d" (val),
[dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
- [shift] "+d" (shift)
+ [shift] "+a" (shift)
:
: CC_CLOBBER_LIST("memory"));
-
+ disable_sacf_uaccess(sacf_flag);
cc = exception ? -ENXIO : CC_TRANSFORM(cc);
/* did we write everything to the user space buffer? */
if (!cc && cnt != 0)
diff --git a/arch/sh/boards/mach-se/7343/irq.c b/arch/sh/boards/mach-se/7343/irq.c
index f9f3b14f70d5..730c01b225bd 100644
--- a/arch/sh/boards/mach-se/7343/irq.c
+++ b/arch/sh/boards/mach-se/7343/irq.c
@@ -47,8 +47,9 @@ static void __init se7343_domain_init(void)
{
int i;
- se7343_irq_domain = irq_domain_add_linear(NULL, SE7343_FPGA_IRQ_NR,
- &irq_domain_simple_ops, NULL);
+ se7343_irq_domain = irq_domain_create_linear(NULL, SE7343_FPGA_IRQ_NR,
+ &irq_domain_simple_ops,
+ NULL);
if (unlikely(!se7343_irq_domain)) {
printk("Failed to get IRQ domain\n");
return;
@@ -70,7 +71,7 @@ static void __init se7343_gc_init(void)
struct irq_chip_type *ct;
unsigned int irq_base;
- irq_base = irq_linear_revmap(se7343_irq_domain, 0);
+ irq_base = irq_find_mapping(se7343_irq_domain, 0);
gc = irq_alloc_generic_chip(DRV_NAME, 1, irq_base, se7343_irq_regs,
handle_level_irq);
diff --git a/arch/sh/boards/mach-se/7722/irq.c b/arch/sh/boards/mach-se/7722/irq.c
index efa96edd47dc..49aa3a2b1b8f 100644
--- a/arch/sh/boards/mach-se/7722/irq.c
+++ b/arch/sh/boards/mach-se/7722/irq.c
@@ -46,7 +46,7 @@ static void __init se7722_domain_init(void)
{
int i;
- se7722_irq_domain = irq_domain_add_linear(NULL, SE7722_FPGA_IRQ_NR,
+ se7722_irq_domain = irq_domain_create_linear(NULL, SE7722_FPGA_IRQ_NR,
&irq_domain_simple_ops, NULL);
if (unlikely(!se7722_irq_domain)) {
printk("Failed to get IRQ domain\n");
@@ -69,7 +69,7 @@ static void __init se7722_gc_init(void)
struct irq_chip_type *ct;
unsigned int irq_base;
- irq_base = irq_linear_revmap(se7722_irq_domain, 0);
+ irq_base = irq_find_mapping(se7722_irq_domain, 0);
gc = irq_alloc_generic_chip(DRV_NAME, 1, irq_base, se7722_irq_regs,
handle_level_irq);
diff --git a/arch/sh/boards/mach-x3proto/gpio.c b/arch/sh/boards/mach-x3proto/gpio.c
index f82d3a6a844a..c13d51b29702 100644
--- a/arch/sh/boards/mach-x3proto/gpio.c
+++ b/arch/sh/boards/mach-x3proto/gpio.c
@@ -108,7 +108,7 @@ int __init x3proto_gpio_setup(void)
if (unlikely(ret))
goto err_gpio;
- x3proto_irq_domain = irq_domain_add_linear(NULL, NR_BASEBOARD_GPIOS,
+ x3proto_irq_domain = irq_domain_create_linear(NULL, NR_BASEBOARD_GPIOS,
&x3proto_gpio_irq_ops, NULL);
if (unlikely(!x3proto_irq_domain))
goto err_irq;
diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig
index fc2010c241fb..31dbd8888aaa 100644
--- a/arch/sh/configs/migor_defconfig
+++ b/arch/sh/configs/migor_defconfig
@@ -87,6 +87,5 @@ CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_DEBUG_FS=y
-CONFIG_CRYPTO_MANAGER=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index f1ba0fefe1f9..7a7c4dec2925 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -205,7 +205,7 @@ CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_UPROBE_EVENTS=y
CONFIG_KEYS=y
CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
diff --git a/arch/sparc/crypto/Kconfig b/arch/sparc/crypto/Kconfig
index e858597de89d..a6ba319c42dc 100644
--- a/arch/sparc/crypto/Kconfig
+++ b/arch/sparc/crypto/Kconfig
@@ -36,16 +36,6 @@ config CRYPTO_SHA1_SPARC64
Architecture: sparc64
-config CRYPTO_SHA256_SPARC64
- tristate "Hash functions: SHA-224 and SHA-256"
- depends on SPARC64
- select CRYPTO_SHA256
- select CRYPTO_HASH
- help
- SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
- Architecture: sparc64 using crypto instructions, when available
-
config CRYPTO_SHA512_SPARC64
tristate "Hash functions: SHA-384 and SHA-512"
depends on SPARC64
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile
index a2d7fca40cb4..701c39edb0d7 100644
--- a/arch/sparc/crypto/Makefile
+++ b/arch/sparc/crypto/Makefile
@@ -4,7 +4,6 @@
#
obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o
-obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o
obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o
obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o
@@ -13,7 +12,6 @@ obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o
obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) += camellia-sparc64.o
sha1-sparc64-y := sha1_asm.o sha1_glue.o
-sha256-sparc64-y := sha256_asm.o sha256_glue.o
sha512-sparc64-y := sha512_asm.o sha512_glue.o
md5-sparc64-y := md5_asm.o md5_glue.o
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S
index 155cefb98520..f291174a72a1 100644
--- a/arch/sparc/crypto/aes_asm.S
+++ b/arch/sparc/crypto/aes_asm.S
@@ -1,9 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
+#include <asm/opcodes.h>
#include <asm/visasm.h>
-#include "opcodes.h"
-
#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index 683150830356..359f22643b05 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -27,11 +27,10 @@
#include <crypto/internal/skcipher.h>
#include <asm/fpumacro.h>
+#include <asm/opcodes.h>
#include <asm/pstate.h>
#include <asm/elf.h>
-#include "opcodes.h"
-
struct aes_ops {
void (*encrypt)(const u64 *key, const u32 *input, u32 *output);
void (*decrypt)(const u64 *key, const u32 *input, u32 *output);
diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S
index dcdc9193fcd7..8471b346ef54 100644
--- a/arch/sparc/crypto/camellia_asm.S
+++ b/arch/sparc/crypto/camellia_asm.S
@@ -1,9 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
+#include <asm/opcodes.h>
#include <asm/visasm.h>
-#include "opcodes.h"
-
#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \
CAMELLIA_F(KEY_BASE + 2, I0, I1, I0) \
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
index aaa9714378e6..e7a1e1c42b99 100644
--- a/arch/sparc/crypto/camellia_glue.c
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -15,11 +15,10 @@
#include <crypto/internal/skcipher.h>
#include <asm/fpumacro.h>
+#include <asm/opcodes.h>
#include <asm/pstate.h>
#include <asm/elf.h>
-#include "opcodes.h"
-
#define CAMELLIA_MIN_KEY_SIZE 16
#define CAMELLIA_MAX_KEY_SIZE 32
#define CAMELLIA_BLOCK_SIZE 16
diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S
index 7157468a679d..d534446cbef9 100644
--- a/arch/sparc/crypto/des_asm.S
+++ b/arch/sparc/crypto/des_asm.S
@@ -1,9 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
+#include <asm/opcodes.h>
#include <asm/visasm.h>
-#include "opcodes.h"
-
.align 32
ENTRY(des_sparc64_key_expand)
/* %o0=input_key, %o1=output_key */
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index a499102bf706..e50ec4cd57cd 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -16,11 +16,10 @@
#include <crypto/internal/skcipher.h>
#include <asm/fpumacro.h>
+#include <asm/opcodes.h>
#include <asm/pstate.h>
#include <asm/elf.h>
-#include "opcodes.h"
-
struct des_sparc64_ctx {
u64 encrypt_expkey[DES_EXPKEY_WORDS / 2];
u64 decrypt_expkey[DES_EXPKEY_WORDS / 2];
diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S
index 7a6637455f37..60b544e4d205 100644
--- a/arch/sparc/crypto/md5_asm.S
+++ b/arch/sparc/crypto/md5_asm.S
@@ -1,9 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
+#include <asm/opcodes.h>
#include <asm/visasm.h>
-#include "opcodes.h"
-
ENTRY(md5_sparc64_transform)
/* %o0 = digest, %o1 = data, %o2 = rounds */
VISEntryHalf
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c
index 511db98d590a..b3615f0cdf62 100644
--- a/arch/sparc/crypto/md5_glue.c
+++ b/arch/sparc/crypto/md5_glue.c
@@ -14,121 +14,104 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <asm/elf.h>
+#include <asm/opcodes.h>
+#include <asm/pstate.h>
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
#include <crypto/md5.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
-#include <asm/pstate.h>
-#include <asm/elf.h>
-
-#include "opcodes.h"
+struct sparc_md5_state {
+ __le32 hash[MD5_HASH_WORDS];
+ u64 byte_count;
+};
-asmlinkage void md5_sparc64_transform(u32 *digest, const char *data,
+asmlinkage void md5_sparc64_transform(__le32 *digest, const char *data,
unsigned int rounds);
static int md5_sparc64_init(struct shash_desc *desc)
{
- struct md5_state *mctx = shash_desc_ctx(desc);
+ struct sparc_md5_state *mctx = shash_desc_ctx(desc);
- mctx->hash[0] = MD5_H0;
- mctx->hash[1] = MD5_H1;
- mctx->hash[2] = MD5_H2;
- mctx->hash[3] = MD5_H3;
- le32_to_cpu_array(mctx->hash, 4);
+ mctx->hash[0] = cpu_to_le32(MD5_H0);
+ mctx->hash[1] = cpu_to_le32(MD5_H1);
+ mctx->hash[2] = cpu_to_le32(MD5_H2);
+ mctx->hash[3] = cpu_to_le32(MD5_H3);
mctx->byte_count = 0;
return 0;
}
-static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data,
- unsigned int len, unsigned int partial)
-{
- unsigned int done = 0;
-
- sctx->byte_count += len;
- if (partial) {
- done = MD5_HMAC_BLOCK_SIZE - partial;
- memcpy((u8 *)sctx->block + partial, data, done);
- md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1);
- }
- if (len - done >= MD5_HMAC_BLOCK_SIZE) {
- const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE;
-
- md5_sparc64_transform(sctx->hash, data + done, rounds);
- done += rounds * MD5_HMAC_BLOCK_SIZE;
- }
-
- memcpy(sctx->block, data + done, len - done);
-}
-
static int md5_sparc64_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct md5_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
-
- /* Handle the fast case right here */
- if (partial + len < MD5_HMAC_BLOCK_SIZE) {
- sctx->byte_count += len;
- memcpy((u8 *)sctx->block + partial, data, len);
- } else
- __md5_sparc64_update(sctx, data, len, partial);
+ struct sparc_md5_state *sctx = shash_desc_ctx(desc);
- return 0;
+ sctx->byte_count += round_down(len, MD5_HMAC_BLOCK_SIZE);
+ md5_sparc64_transform(sctx->hash, data, len / MD5_HMAC_BLOCK_SIZE);
+ return len - round_down(len, MD5_HMAC_BLOCK_SIZE);
}
/* Add padding and return the message digest. */
-static int md5_sparc64_final(struct shash_desc *desc, u8 *out)
+static int md5_sparc64_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int offset, u8 *out)
{
- struct md5_state *sctx = shash_desc_ctx(desc);
- unsigned int i, index, padlen;
- u32 *dst = (u32 *)out;
- __le64 bits;
- static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, };
-
- bits = cpu_to_le64(sctx->byte_count << 3);
-
- /* Pad out to 56 mod 64 and append length */
- index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
- padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index);
-
- /* We need to fill a whole block for __md5_sparc64_update() */
- if (padlen <= 56) {
- sctx->byte_count += padlen;
- memcpy((u8 *)sctx->block + index, padding, padlen);
- } else {
- __md5_sparc64_update(sctx, padding, padlen, index);
- }
- __md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
+ struct sparc_md5_state *sctx = shash_desc_ctx(desc);
+ __le64 block[MD5_BLOCK_WORDS] = {};
+ u8 *p = memcpy(block, src, offset);
+ __le32 *dst = (__le32 *)out;
+ __le64 *pbits;
+ int i;
+
+ src = p;
+ p += offset;
+ *p++ = 0x80;
+ sctx->byte_count += offset;
+ pbits = &block[(MD5_BLOCK_WORDS / (offset > 55 ? 1 : 2)) - 1];
+ *pbits = cpu_to_le64(sctx->byte_count << 3);
+ md5_sparc64_transform(sctx->hash, src, (pbits - block + 1) / 8);
+ memzero_explicit(block, sizeof(block));
/* Store state in digest */
for (i = 0; i < MD5_HASH_WORDS; i++)
dst[i] = sctx->hash[i];
- /* Wipe context */
- memset(sctx, 0, sizeof(*sctx));
-
return 0;
}
static int md5_sparc64_export(struct shash_desc *desc, void *out)
{
- struct md5_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
+ struct sparc_md5_state *sctx = shash_desc_ctx(desc);
+ union {
+ u8 *u8;
+ u32 *u32;
+ u64 *u64;
+ } p = { .u8 = out };
+ int i;
+ for (i = 0; i < MD5_HASH_WORDS; i++)
+ put_unaligned(le32_to_cpu(sctx->hash[i]), p.u32++);
+ put_unaligned(sctx->byte_count, p.u64);
return 0;
}
static int md5_sparc64_import(struct shash_desc *desc, const void *in)
{
- struct md5_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
+ struct sparc_md5_state *sctx = shash_desc_ctx(desc);
+ union {
+ const u8 *u8;
+ const u32 *u32;
+ const u64 *u64;
+ } p = { .u8 = in };
+ int i;
+ for (i = 0; i < MD5_HASH_WORDS; i++)
+ sctx->hash[i] = cpu_to_le32(get_unaligned(p.u32++));
+ sctx->byte_count = get_unaligned(p.u64);
return 0;
}
@@ -136,15 +119,16 @@ static struct shash_alg alg = {
.digestsize = MD5_DIGEST_SIZE,
.init = md5_sparc64_init,
.update = md5_sparc64_update,
- .final = md5_sparc64_final,
+ .finup = md5_sparc64_finup,
.export = md5_sparc64_export,
.import = md5_sparc64_import,
- .descsize = sizeof(struct md5_state),
- .statesize = sizeof(struct md5_state),
+ .descsize = sizeof(struct sparc_md5_state),
+ .statesize = sizeof(struct sparc_md5_state),
.base = {
.cra_name = "md5",
.cra_driver_name= "md5-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S
index 7d8bf354f0e7..00b46bac1b08 100644
--- a/arch/sparc/crypto/sha1_asm.S
+++ b/arch/sparc/crypto/sha1_asm.S
@@ -1,9 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
+#include <asm/opcodes.h>
#include <asm/visasm.h>
-#include "opcodes.h"
-
ENTRY(sha1_sparc64_transform)
/* %o0 = digest, %o1 = data, %o2 = rounds */
VISEntryHalf
diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c
index 06b7becfcb21..ef19d5023b1b 100644
--- a/arch/sparc/crypto/sha1_glue.c
+++ b/arch/sparc/crypto/sha1_glue.c
@@ -11,124 +11,44 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <asm/elf.h>
+#include <asm/opcodes.h>
+#include <asm/pstate.h>
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
-#include <asm/pstate.h>
-#include <asm/elf.h>
-
-#include "opcodes.h"
-
-asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data,
- unsigned int rounds);
-
-static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data,
- unsigned int len, unsigned int partial)
-{
- unsigned int done = 0;
-
- sctx->count += len;
- if (partial) {
- done = SHA1_BLOCK_SIZE - partial;
- memcpy(sctx->buffer + partial, data, done);
- sha1_sparc64_transform(sctx->state, sctx->buffer, 1);
- }
- if (len - done >= SHA1_BLOCK_SIZE) {
- const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-
- sha1_sparc64_transform(sctx->state, data + done, rounds);
- done += rounds * SHA1_BLOCK_SIZE;
- }
-
- memcpy(sctx->buffer, data + done, len - done);
-}
+asmlinkage void sha1_sparc64_transform(struct sha1_state *digest,
+ const u8 *data, int rounds);
static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-
- /* Handle the fast case right here */
- if (partial + len < SHA1_BLOCK_SIZE) {
- sctx->count += len;
- memcpy(sctx->buffer + partial, data, len);
- } else
- __sha1_sparc64_update(sctx, data, len, partial);
-
- return 0;
+ return sha1_base_do_update_blocks(desc, data, len,
+ sha1_sparc64_transform);
}
/* Add padding and return the message digest. */
-static int sha1_sparc64_final(struct shash_desc *desc, u8 *out)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int i, index, padlen;
- __be32 *dst = (__be32 *)out;
- __be64 bits;
- static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64 and append length */
- index = sctx->count % SHA1_BLOCK_SIZE;
- padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
-
- /* We need to fill a whole block for __sha1_sparc64_update() */
- if (padlen <= 56) {
- sctx->count += padlen;
- memcpy(sctx->buffer + index, padding, padlen);
- } else {
- __sha1_sparc64_update(sctx, padding, padlen, index);
- }
- __sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
-
- /* Store state in digest */
- for (i = 0; i < 5; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha1_sparc64_export(struct shash_desc *desc, void *out)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha1_sparc64_import(struct shash_desc *desc, const void *in)
+static int sha1_sparc64_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
+ sha1_base_do_finup(desc, src, len, sha1_sparc64_transform);
+ return sha1_base_finish(desc, out);
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = sha1_sparc64_update,
- .final = sha1_sparc64_final,
- .export = sha1_sparc64_export,
- .import = sha1_sparc64_import,
- .descsize = sizeof(struct sha1_state),
- .statesize = sizeof(struct sha1_state),
+ .finup = sha1_sparc64_finup,
+ .descsize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c
deleted file mode 100644
index 285561a1cde5..000000000000
--- a/arch/sparc/crypto/sha256_glue.c
+++ /dev/null
@@ -1,210 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes.
- *
- * This is based largely upon crypto/sha256_generic.c
- *
- * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <crypto/sha2.h>
-#include <crypto/sha256_base.h>
-
-#include <asm/pstate.h>
-#include <asm/elf.h>
-
-#include "opcodes.h"
-
-asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data,
- unsigned int rounds);
-
-static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data,
- unsigned int len, unsigned int partial)
-{
- unsigned int done = 0;
-
- sctx->count += len;
- if (partial) {
- done = SHA256_BLOCK_SIZE - partial;
- memcpy(sctx->buf + partial, data, done);
- sha256_sparc64_transform(sctx->state, sctx->buf, 1);
- }
- if (len - done >= SHA256_BLOCK_SIZE) {
- const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
-
- sha256_sparc64_transform(sctx->state, data + done, rounds);
- done += rounds * SHA256_BLOCK_SIZE;
- }
-
- memcpy(sctx->buf, data + done, len - done);
-}
-
-static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
-
- /* Handle the fast case right here */
- if (partial + len < SHA256_BLOCK_SIZE) {
- sctx->count += len;
- memcpy(sctx->buf + partial, data, len);
- } else
- __sha256_sparc64_update(sctx, data, len, partial);
-
- return 0;
-}
-
-static int sha256_sparc64_final(struct shash_desc *desc, u8 *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- unsigned int i, index, padlen;
- __be32 *dst = (__be32 *)out;
- __be64 bits;
- static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
-
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64 and append length */
- index = sctx->count % SHA256_BLOCK_SIZE;
- padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index);
-
- /* We need to fill a whole block for __sha256_sparc64_update() */
- if (padlen <= 56) {
- sctx->count += padlen;
- memcpy(sctx->buf + index, padding, padlen);
- } else {
- __sha256_sparc64_update(sctx, padding, padlen, index);
- }
- __sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
-
- /* Store state in digest */
- for (i = 0; i < 8; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash)
-{
- u8 D[SHA256_DIGEST_SIZE];
-
- sha256_sparc64_final(desc, D);
-
- memcpy(hash, D, SHA224_DIGEST_SIZE);
- memzero_explicit(D, SHA256_DIGEST_SIZE);
-
- return 0;
-}
-
-static int sha256_sparc64_export(struct shash_desc *desc, void *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
- return 0;
-}
-
-static int sha256_sparc64_import(struct shash_desc *desc, const void *in)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
- return 0;
-}
-
-static struct shash_alg sha256_alg = {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = sha256_sparc64_update,
- .final = sha256_sparc64_final,
- .export = sha256_sparc64_export,
- .import = sha256_sparc64_import,
- .descsize = sizeof(struct sha256_state),
- .statesize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name= "sha256-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static struct shash_alg sha224_alg = {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = sha256_sparc64_update,
- .final = sha224_sparc64_final,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name= "sha224-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static bool __init sparc64_has_sha256_opcode(void)
-{
- unsigned long cfr;
-
- if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
- return false;
-
- __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
- if (!(cfr & CFR_SHA256))
- return false;
-
- return true;
-}
-
-static int __init sha256_sparc64_mod_init(void)
-{
- if (sparc64_has_sha256_opcode()) {
- int ret = crypto_register_shash(&sha224_alg);
- if (ret < 0)
- return ret;
-
- ret = crypto_register_shash(&sha256_alg);
- if (ret < 0) {
- crypto_unregister_shash(&sha224_alg);
- return ret;
- }
-
- pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n");
- return 0;
- }
- pr_info("sparc64 sha256 opcode not available.\n");
- return -ENODEV;
-}
-
-static void __exit sha256_sparc64_mod_fini(void)
-{
- crypto_unregister_shash(&sha224_alg);
- crypto_unregister_shash(&sha256_alg);
-}
-
-module_init(sha256_sparc64_mod_init);
-module_exit(sha256_sparc64_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated");
-
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_ALIAS_CRYPTO("sha256");
-
-#include "crop_devid.c"
diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S
index b2f6e6728802..9932b4fe1b59 100644
--- a/arch/sparc/crypto/sha512_asm.S
+++ b/arch/sparc/crypto/sha512_asm.S
@@ -1,9 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
+#include <asm/opcodes.h>
#include <asm/visasm.h>
-#include "opcodes.h"
-
ENTRY(sha512_sparc64_transform)
/* %o0 = digest, %o1 = data, %o2 = rounds */
VISEntry
diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c
index d66efa4ec59a..47b9277b6877 100644
--- a/arch/sparc/crypto/sha512_glue.c
+++ b/arch/sparc/crypto/sha512_glue.c
@@ -10,115 +10,42 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <asm/elf.h>
+#include <asm/opcodes.h>
+#include <asm/pstate.h>
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
#include <crypto/sha2.h>
#include <crypto/sha512_base.h>
-
-#include <asm/pstate.h>
-#include <asm/elf.h>
-
-#include "opcodes.h"
+#include <linux/kernel.h>
+#include <linux/module.h>
asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data,
unsigned int rounds);
-static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data,
- unsigned int len, unsigned int partial)
+static void sha512_block(struct sha512_state *sctx, const u8 *src, int blocks)
{
- unsigned int done = 0;
-
- if ((sctx->count[0] += len) < len)
- sctx->count[1]++;
- if (partial) {
- done = SHA512_BLOCK_SIZE - partial;
- memcpy(sctx->buf + partial, data, done);
- sha512_sparc64_transform(sctx->state, sctx->buf, 1);
- }
- if (len - done >= SHA512_BLOCK_SIZE) {
- const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
-
- sha512_sparc64_transform(sctx->state, data + done, rounds);
- done += rounds * SHA512_BLOCK_SIZE;
- }
-
- memcpy(sctx->buf, data + done, len - done);
+ sha512_sparc64_transform(sctx->state, src, blocks);
}
static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
-
- /* Handle the fast case right here */
- if (partial + len < SHA512_BLOCK_SIZE) {
- if ((sctx->count[0] += len) < len)
- sctx->count[1]++;
- memcpy(sctx->buf + partial, data, len);
- } else
- __sha512_sparc64_update(sctx, data, len, partial);
-
- return 0;
+ return sha512_base_do_update_blocks(desc, data, len, sha512_block);
}
-static int sha512_sparc64_final(struct shash_desc *desc, u8 *out)
+static int sha512_sparc64_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
- unsigned int i, index, padlen;
- __be64 *dst = (__be64 *)out;
- __be64 bits[2];
- static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
-
- /* Save number of bits */
- bits[1] = cpu_to_be64(sctx->count[0] << 3);
- bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
-
- /* Pad out to 112 mod 128 and append length */
- index = sctx->count[0] % SHA512_BLOCK_SIZE;
- padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index);
-
- /* We need to fill a whole block for __sha512_sparc64_update() */
- if (padlen <= 112) {
- if ((sctx->count[0] += padlen) < padlen)
- sctx->count[1]++;
- memcpy(sctx->buf + index, padding, padlen);
- } else {
- __sha512_sparc64_update(sctx, padding, padlen, index);
- }
- __sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112);
-
- /* Store state in digest */
- for (i = 0; i < 8; i++)
- dst[i] = cpu_to_be64(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash)
-{
- u8 D[64];
-
- sha512_sparc64_final(desc, D);
-
- memcpy(hash, D, 48);
- memzero_explicit(D, 64);
-
- return 0;
+ sha512_base_do_finup(desc, src, len, sha512_block);
+ return sha512_base_finish(desc, out);
}
static struct shash_alg sha512 = {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = sha512_sparc64_update,
- .final = sha512_sparc64_final,
- .descsize = sizeof(struct sha512_state),
+ .finup = sha512_sparc64_finup,
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name= "sha512-sparc64",
@@ -132,8 +59,8 @@ static struct shash_alg sha384 = {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = sha512_sparc64_update,
- .final = sha384_sparc64_final,
- .descsize = sizeof(struct sha512_state),
+ .finup = sha512_sparc64_finup,
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name= "sha384-sparc64",
diff --git a/arch/sparc/crypto/opcodes.h b/arch/sparc/include/asm/opcodes.h
index 417b6a10a337..ebfda6eb49b2 100644
--- a/arch/sparc/crypto/opcodes.h
+++ b/arch/sparc/include/asm/opcodes.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _OPCODES_H
-#define _OPCODES_H
+#ifndef _SPARC_ASM_OPCODES_H
+#define _SPARC_ASM_OPCODES_H
#define SPARC_CR_OPCODE_PRIORITY 300
@@ -97,4 +97,4 @@
#define MOVXTOD_G7_F62 \
.word 0xbfb02307;
-#endif /* _OPCODES_H */
+#endif /* _SPARC_ASM_OPCODES_H */
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 58ea4ef9b622..3453f330e363 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -35,6 +35,7 @@ obj-y += process.o
obj-y += signal_$(BITS).o
obj-y += sigutil_$(BITS).o
obj-$(CONFIG_SPARC32) += ioport.o
+obj-y += setup.o
obj-y += setup_$(BITS).o
obj-y += idprom.o
obj-y += sys_sparc_$(BITS).o
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index f02a283a8e8f..cae4d33002a5 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1668,8 +1668,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
if (!sparc_perf_event_set_period(event, hwc, idx))
continue;
- if (perf_event_overflow(event, &data, regs))
- sparc_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
finish_clock = sched_clock();
diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c
new file mode 100644
index 000000000000..4975867d9001
--- /dev/null
+++ b/arch/sparc/kernel/setup.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/setup.h>
+#include <linux/sysctl.h>
+
+static const struct ctl_table sparc_sysctl_table[] = {
+ {
+ .procname = "reboot-cmd",
+ .data = reboot_command,
+ .maxlen = 256,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+ {
+ .procname = "stop-a",
+ .data = &stop_a_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "scons-poweroff",
+ .data = &scons_pwroff,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_SPARC64
+ {
+ .procname = "tsb-ratio",
+ .data = &sysctl_tsb_ratio,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+};
+
+
+static int __init init_sparc_sysctls(void)
+{
+ register_sysctl_init("kernel", sparc_sysctl_table);
+ return 0;
+}
+
+arch_initcall(init_sparc_sysctls);
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 5724d0f356eb..5cf9781d68b4 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -4,6 +4,7 @@
asflags-y := -ansi -DST_DIV0=0x02
+obj-y += crypto/
lib-$(CONFIG_SPARC32) += ashrdi3.o
lib-$(CONFIG_SPARC32) += memcpy.o memset.o
lib-y += strlen.o
@@ -54,4 +55,4 @@ obj-$(CONFIG_SPARC64) += iomap.o
obj-$(CONFIG_SPARC32) += atomic32.o
obj-$(CONFIG_SPARC64) += PeeCeeI.o
obj-$(CONFIG_CRC32_ARCH) += crc32-sparc.o
-crc32-sparc-y := crc32_glue.o crc32c_asm.o
+crc32-sparc-y := crc32.o crc32c_asm.o
diff --git a/arch/sparc/lib/crc32_glue.c b/arch/sparc/lib/crc32.c
index a70752c729cf..40d4720a42a1 100644
--- a/arch/sparc/lib/crc32_glue.c
+++ b/arch/sparc/lib/crc32.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Glue code for CRC32C optimized for sparc64 crypto opcodes.
+/* CRC32c (Castagnoli), sparc64 crc32c opcode accelerated
*
* This is based largely upon arch/x86/crypto/crc32c-intel.c
*
@@ -17,7 +17,7 @@
#include <asm/pstate.h>
#include <asm/elf.h>
-static DEFINE_STATIC_KEY_FALSE(have_crc32c_opcode);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32c_opcode);
u32 crc32_le_arch(u32 crc, const u8 *data, size_t len)
{
@@ -74,7 +74,7 @@ static int __init crc32_sparc_init(void)
pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
return 0;
}
-arch_initcall(crc32_sparc_init);
+subsys_initcall(crc32_sparc_init);
static void __exit crc32_sparc_exit(void)
{
diff --git a/arch/sparc/lib/crc32c_asm.S b/arch/sparc/lib/crc32c_asm.S
index ee454fa6aed6..4db873850f44 100644
--- a/arch/sparc/lib/crc32c_asm.S
+++ b/arch/sparc/lib/crc32c_asm.S
@@ -1,10 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
+#include <asm/opcodes.h>
#include <asm/visasm.h>
#include <asm/asi.h>
-#include "../crypto/opcodes.h"
-
ENTRY(crc32c_sparc64)
/* %o0=crc32p, %o1=data_ptr, %o2=len */
VISEntryHalf
diff --git a/arch/sparc/lib/crypto/Kconfig b/arch/sparc/lib/crypto/Kconfig
new file mode 100644
index 000000000000..e5c3e4d3dba6
--- /dev/null
+++ b/arch/sparc/lib/crypto/Kconfig
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_SHA256_SPARC64
+ tristate
+ depends on SPARC64
+ default CRYPTO_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256
+ select CRYPTO_LIB_SHA256_GENERIC
diff --git a/arch/sparc/lib/crypto/Makefile b/arch/sparc/lib/crypto/Makefile
new file mode 100644
index 000000000000..75ee244ad6f7
--- /dev/null
+++ b/arch/sparc/lib/crypto/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o
+sha256-sparc64-y := sha256.o sha256_asm.o
diff --git a/arch/sparc/lib/crypto/sha256.c b/arch/sparc/lib/crypto/sha256.c
new file mode 100644
index 000000000000..8bdec2db08b3
--- /dev/null
+++ b/arch/sparc/lib/crypto/sha256.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SHA-256 accelerated using the sparc64 sha256 opcodes
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <asm/elf.h>
+#include <asm/opcodes.h>
+#include <asm/pstate.h>
+#include <crypto/internal/sha2.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha256_opcodes);
+
+asmlinkage void sha256_sparc64_transform(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ if (static_branch_likely(&have_sha256_opcodes))
+ sha256_sparc64_transform(state, data, nblocks);
+ else
+ sha256_blocks_generic(state, data, nblocks);
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
+
+bool sha256_is_arch_optimized(void)
+{
+ return static_key_enabled(&have_sha256_opcodes);
+}
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
+
+static int __init sha256_sparc64_mod_init(void)
+{
+ unsigned long cfr;
+
+ if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+ return 0;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+ if (!(cfr & CFR_SHA256))
+ return 0;
+
+ static_branch_enable(&have_sha256_opcodes);
+ pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n");
+ return 0;
+}
+subsys_initcall(sha256_sparc64_mod_init);
+
+static void __exit sha256_sparc64_mod_exit(void)
+{
+}
+module_exit(sha256_sparc64_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-256 accelerated using the sparc64 sha256 opcodes");
diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/lib/crypto/sha256_asm.S
index 0b39ec7d7ca2..ddcdd3daf31e 100644
--- a/arch/sparc/crypto/sha256_asm.S
+++ b/arch/sparc/lib/crypto/sha256_asm.S
@@ -1,11 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
+#include <asm/opcodes.h>
#include <asm/visasm.h>
-#include "opcodes.h"
-
ENTRY(sha256_sparc64_transform)
- /* %o0 = digest, %o1 = data, %o2 = rounds */
+ /* %o0 = state, %o1 = data, %o2 = nblocks */
VISEntryHalf
ld [%o0 + 0x00], %f0
ld [%o0 + 0x04], %f1
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 1d36a613aad8..9ed792e565c9 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -154,5 +154,6 @@ MRPROPER_FILES += $(HOST_DIR)/include/generated
archclean:
@find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
-o -name '*.gcov' \) -type f -print | xargs rm -f
+ $(Q)$(MAKE) -f $(srctree)/Makefile ARCH=$(HEADER_ARCH) clean
export HEADER_ARCH SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING DEV_NULL_PATH
diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
index 71bfd9ef3938..3abf67c83c40 100644
--- a/arch/um/include/asm/fpu/api.h
+++ b/arch/um/include/asm/fpu/api.h
@@ -2,6 +2,8 @@
#ifndef _ASM_UM_FPU_API_H
#define _ASM_UM_FPU_API_H
+#include <linux/types.h>
+
/* Copyright (c) 2020 Cambridge Greys Ltd
* Copyright (c) 2020 Red Hat Inc.
* A set of "dummy" defines to allow the direct inclusion
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index d4b3b6742ec8..2f5ee045bc7a 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -477,7 +477,7 @@ void *text_poke_copy(void *addr, const void *opcode, size_t len)
return text_poke(addr, opcode, len);
}
-void text_poke_sync(void)
+void smp_text_poke_sync_each_cpu(void)
{
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5873c9e39919..0be4937203c7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -38,6 +38,7 @@ config X86_64
select ARCH_HAS_ELFCORE_COMPAT
select ZONE_DMA32
select EXECMEM if DYNAMIC_FTRACE
+ select ACPI_MRRM if ACPI
config FORCE_DYNAMIC_FTRACE
def_bool y
@@ -153,6 +154,7 @@ config X86
select ARCH_WANT_HUGETLB_VMEMMAP_PREINIT if X86_64
select ARCH_WANTS_THP_SWAP if X86_64
select ARCH_HAS_PARANOID_L1D_FLUSH
+ select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select BUILDTIME_TABLE_SORT
select CLKEVT_I8253
select CLOCKSOURCE_WATCHDOG
@@ -426,8 +428,7 @@ config DYNAMIC_PHYSICAL_MASK
config PGTABLE_LEVELS
int
- default 5 if X86_5LEVEL
- default 4 if X86_64
+ default 5 if X86_64
default 3 if X86_PAE
default 2
@@ -507,8 +508,9 @@ config X86_MPPARSE
config X86_CPU_RESCTRL
bool "x86 CPU resource control support"
depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
- select KERNFS
- select PROC_CPU_RESCTRL if PROC_FS
+ depends on MISC_FILESYSTEMS
+ select ARCH_HAS_CPU_RESCTRL
+ select RESCTRL_FS
select RESCTRL_FS_PSEUDO_LOCK
help
Enable x86 CPU resource control support.
@@ -526,12 +528,6 @@ config X86_CPU_RESCTRL
Say N if unsure.
-config RESCTRL_FS_PSEUDO_LOCK
- bool
- help
- Software mechanism to pin data in a cache portion using
- micro-architecture specific knowledge.
-
config X86_FRED
bool "Flexible Return and Event Delivery"
depends on X86_64
@@ -799,6 +795,7 @@ config PARAVIRT
config PARAVIRT_XXL
bool
+ depends on X86_64
config PARAVIRT_DEBUG
bool "paravirt-ops debugging"
@@ -1463,27 +1460,6 @@ config X86_PAE
has the cost of more pagetable lookup overhead, and also
consumes more pagetable space per process.
-config X86_5LEVEL
- bool "Enable 5-level page tables support"
- default y
- select DYNAMIC_MEMORY_LAYOUT
- select SPARSEMEM_VMEMMAP
- depends on X86_64
- help
- 5-level paging enables access to larger address space:
- up to 128 PiB of virtual address space and 4 PiB of
- physical address space.
-
- It will be supported by future Intel CPUs.
-
- A kernel with the option enabled can be booted on machines that
- support 4- or 5-level paging.
-
- See Documentation/arch/x86/x86_64/5level-paging.rst for more
- information.
-
- Say N if unsure.
-
config X86_DIRECT_GBPAGES
def_bool y
depends on X86_64
@@ -1579,6 +1555,7 @@ config ARCH_SPARSEMEM_ENABLE
def_bool y
select SPARSEMEM_STATIC if X86_32
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
+ select SPARSEMEM_VMEMMAP if X86_64
config ARCH_SPARSEMEM_DEFAULT
def_bool X86_64 || (NUMA && X86_32)
@@ -2167,17 +2144,10 @@ config PHYSICAL_ALIGN
Don't change this unless you know what you are doing.
-config DYNAMIC_MEMORY_LAYOUT
- bool
- help
- This option makes base addresses of vmalloc and vmemmap as well as
- __PAGE_OFFSET movable during boot.
-
config RANDOMIZE_MEMORY
bool "Randomize the kernel memory sections"
depends on X86_64
depends on RANDOMIZE_BASE
- select DYNAMIC_MEMORY_LAYOUT
default RANDOMIZE_BASE
help
Randomizes the base virtual address of kernel memory sections
@@ -2711,6 +2681,18 @@ config MITIGATION_SSB
of speculative execution in a similar way to the Meltdown and Spectre
security vulnerabilities.
+config MITIGATION_ITS
+ bool "Enable Indirect Target Selection mitigation"
+ depends on CPU_SUP_INTEL && X86_64
+ depends on MITIGATION_RETPOLINE && MITIGATION_RETHUNK
+ select EXECMEM
+ default y
+ help
+ Enable Indirect Target Selection (ITS) mitigation. ITS is a bug in
+ BPU on some Intel CPUs that may allow Spectre V2 style attacks. If
+ disabled, mitigation cannot be enabled via cmdline.
+ See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst>
+
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler
index 6d20a6ce0507..c827f694fb72 100644
--- a/arch/x86/Kconfig.assembler
+++ b/arch/x86/Kconfig.assembler
@@ -6,20 +6,6 @@ config AS_AVX512
help
Supported by binutils >= 2.25 and LLVM integrated assembler
-config AS_SHA1_NI
- def_bool $(as-instr,sha1msg1 %xmm0$(comma)%xmm1)
- help
- Supported by binutils >= 2.24 and LLVM integrated assembler
-
-config AS_SHA256_NI
- def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1)
- help
- Supported by binutils >= 2.24 and LLVM integrated assembler
-config AS_TPAUSE
- def_bool $(as-instr,tpause %ecx)
- help
- Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7
-
config AS_GFNI
def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2)
help
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 753b8763abae..f928cf6e3252 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -245,6 +245,30 @@ config MATOM
endchoice
+config CC_HAS_MARCH_NATIVE
+ # This flag might not be available in cross-compilers:
+ def_bool $(cc-option, -march=native)
+ # LLVM 18 has an easily triggered internal compiler error in core
+ # networking code with '-march=native' on certain systems:
+ # https://github.com/llvm/llvm-project/issues/72026
+ # LLVM 19 introduces an optimization that resolves some high stack
+ # usage warnings that only appear wth '-march=native'.
+ depends on CC_IS_GCC || CLANG_VERSION >= 190100
+
+config X86_NATIVE_CPU
+ bool "Build and optimize for local/native CPU"
+ depends on X86_64
+ depends on CC_HAS_MARCH_NATIVE
+ help
+ Optimize for the current CPU used to compile the kernel.
+ Use this option if you intend to build the kernel for your
+ local machine.
+
+ Note that such a kernel might not work optimally on a
+ different x86 machine.
+
+ If unsure, say N.
+
config X86_GENERIC
bool "Generic x86 support"
depends on X86_32
diff --git a/arch/x86/Kconfig.cpufeatures b/arch/x86/Kconfig.cpufeatures
index e12d5b7e39a2..250c10627ab3 100644
--- a/arch/x86/Kconfig.cpufeatures
+++ b/arch/x86/Kconfig.cpufeatures
@@ -132,10 +132,6 @@ config X86_DISABLED_FEATURE_OSPKE
def_bool y
depends on !X86_INTEL_MEMORY_PROTECTION_KEYS
-config X86_DISABLED_FEATURE_LA57
- def_bool y
- depends on !X86_5LEVEL
-
config X86_DISABLED_FEATURE_PTI
def_bool y
depends on !MITIGATION_PAGE_TABLE_ISOLATION
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 594723005d95..1913d342969b 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -173,8 +173,13 @@ else
# Use -mskip-rax-setup if supported.
KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
+ifdef CONFIG_X86_NATIVE_CPU
+ KBUILD_CFLAGS += -march=native
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=native
+else
KBUILD_CFLAGS += -march=x86-64 -mtune=generic
KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic
+endif
KBUILD_CFLAGS += -mno-red-zone
KBUILD_CFLAGS += -mcmodel=kernel
@@ -281,6 +286,7 @@ archprepare: $(cpufeaturemasks.hdr)
###
# Kernel objects
+core-y += arch/x86/boot/startup/
libs-y += arch/x86/lib/
# drivers-y are linked after core-y
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
index aa9b96457584..cf4a6155714e 100644
--- a/arch/x86/boot/bioscall.S
+++ b/arch/x86/boot/bioscall.S
@@ -32,7 +32,7 @@ intcall:
movw %dx, %si
movw %sp, %di
movw $11, %cx
- rep; movsl
+ rep movsl
/* Pop full state from the stack */
popal
@@ -67,7 +67,7 @@ intcall:
jz 4f
movw %sp, %si
movw $11, %cx
- rep; movsl
+ rep movsl
4: addw $44, %sp
/* Restore state and return */
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 38f17a1e1e36..60580836daf7 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -34,7 +34,7 @@
extern struct setup_header hdr;
extern struct boot_params boot_params;
-#define cpu_relax() asm volatile("rep; nop")
+#define cpu_relax() asm volatile("pause")
static inline void io_delay(void)
{
@@ -155,14 +155,14 @@ static inline void wrgs32(u32 v, addr_t addr)
static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
{
bool diff;
- asm volatile("fs; repe; cmpsb" CC_SET(nz)
+ asm volatile("fs repe cmpsb" CC_SET(nz)
: CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
{
bool diff;
- asm volatile("gs; repe; cmpsb" CC_SET(nz)
+ asm volatile("gs repe cmpsb" CC_SET(nz)
: CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fdbce022db55..f4f7b22d8113 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -44,10 +44,10 @@ KBUILD_CFLAGS += -D__DISABLE_EXPORTS
KBUILD_CFLAGS += $(call cc-option,-Wa$(comma)-mrelax-relocations=no)
KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
-# sev.c indirectly includes inat-table.h which is generated during
+# sev-decode-insn.c indirectly includes inat-table.c which is generated during
# compilation and stored in $(objtree). Add the directory to the includes so
# that the compiler finds it even with out-of-tree builds (make O=/some/path).
-CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/
+CFLAGS_sev-handle-vc.o += -I$(objtree)/arch/x86/lib/
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
@@ -73,7 +73,7 @@ LDFLAGS_vmlinux += -T
hostprogs := mkpiggy
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
-sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABbCDGRSTtVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
quiet_cmd_voffset = VOFFSET $@
cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
@@ -96,8 +96,7 @@ ifdef CONFIG_X86_64
vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o
- vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
- vmlinux-objs-y += $(obj)/la57toggle.o
+ vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o $(obj)/sev-handle-vc.o
endif
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
@@ -106,6 +105,7 @@ vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o
vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
+vmlinux-libs-$(CONFIG_X86_64) += $(objtree)/arch/x86/boot/startup/lib.a
$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index eafd4f185e77..d9dab940ff62 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -35,7 +35,6 @@
#include <asm/bootparam.h>
#include <asm/desc_defs.h>
#include <asm/trapnr.h>
-#include "pgtable.h"
/*
* Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 1cdcd4aaf395..94b5991da001 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -14,7 +14,6 @@
#include "misc.h"
#include "error.h"
-#include "pgtable.h"
#include "../string.h"
#include "../voffset.h"
#include <asm/bootparam_utils.h>
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index dd8d1a85f671..db1048621ea2 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -136,6 +136,9 @@ static inline void console_init(void)
#endif
#ifdef CONFIG_AMD_MEM_ENCRYPT
+struct es_em_ctxt;
+struct insn;
+
void sev_enable(struct boot_params *bp);
void snp_check_features(void);
void sev_es_shutdown_ghcb(void);
@@ -143,6 +146,11 @@ extern bool sev_es_check_ghcb_fault(unsigned long address);
void snp_set_page_private(unsigned long paddr);
void snp_set_page_shared(unsigned long paddr);
void sev_prep_identity_maps(unsigned long top_level_pgt);
+
+enum es_result vc_decode_insn(struct es_em_ctxt *ctxt);
+bool insn_has_rep_prefix(struct insn *insn);
+void sev_insn_decode_init(void);
+bool early_setup_ghcb(void);
#else
static inline void sev_enable(struct boot_params *bp)
{
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
deleted file mode 100644
index 6d595abe06b3..000000000000
--- a/arch/x86/boot/compressed/pgtable.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef BOOT_COMPRESSED_PAGETABLE_H
-#define BOOT_COMPRESSED_PAGETABLE_H
-
-#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
-
-#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0
-
-#ifndef __ASSEMBLER__
-
-extern unsigned long *trampoline_32bit;
-
-extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
-
-extern const u16 trampoline_ljmp_imm_offset;
-
-#endif /* __ASSEMBLER__ */
-#endif /* BOOT_COMPRESSED_PAGETABLE_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index d8c5de40669d..bdd26050dff7 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -4,19 +4,16 @@
#include <asm/bootparam_utils.h>
#include <asm/e820/types.h>
#include <asm/processor.h>
-#include "pgtable.h"
#include "../string.h"
#include "efi.h"
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
-#ifdef CONFIG_X86_5LEVEL
/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
unsigned int __section(".data") __pgtable_l5_enabled;
unsigned int __section(".data") pgdir_shift = 39;
unsigned int __section(".data") ptrs_per_p4d = 1;
-#endif
/* Buffer to preserve trampoline memory */
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
@@ -115,18 +112,13 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
* Check if LA57 is desired and supported.
*
* There are several parts to the check:
- * - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
* - if user asked to disable 5-level paging: no5lvl in cmdline
* - if the machine supports 5-level paging:
* + CPUID leaf 7 is supported
* + the leaf has the feature bit set
- *
- * That's substitute for boot_cpu_has() in early boot code.
*/
- if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
- !cmdline_find_option_bool("no5lvl") &&
- native_cpuid_eax(0) >= 7 &&
- (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
+ if (!cmdline_find_option_bool("no5lvl") &&
+ native_cpuid_eax(0) >= 7 && (native_cpuid_ecx(7) & BIT(16))) {
l5_required = true;
/* Initialize variables for 5-level paging */
diff --git a/arch/x86/boot/compressed/sev-handle-vc.c b/arch/x86/boot/compressed/sev-handle-vc.c
new file mode 100644
index 000000000000..89dd02de2a0f
--- /dev/null
+++ b/arch/x86/boot/compressed/sev-handle-vc.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "misc.h"
+#include "sev.h"
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <asm/insn.h>
+#include <asm/pgtable_types.h>
+#include <asm/ptrace.h>
+#include <asm/sev.h>
+#include <asm/trapnr.h>
+#include <asm/trap_pf.h>
+#include <asm/fpu/xcr.h>
+
+#define __BOOT_COMPRESSED
+
+/* Basic instruction decoding support needed */
+#include "../../lib/inat.c"
+#include "../../lib/insn.c"
+
+/*
+ * Copy a version of this function here - insn-eval.c can't be used in
+ * pre-decompression code.
+ */
+bool insn_has_rep_prefix(struct insn *insn)
+{
+ insn_byte_t p;
+ int i;
+
+ insn_get_prefixes(insn);
+
+ for_each_insn_prefix(insn, i, p) {
+ if (p == 0xf2 || p == 0xf3)
+ return true;
+ }
+
+ return false;
+}
+
+enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+ char buffer[MAX_INSN_SIZE];
+ int ret;
+
+ memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
+
+ ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+ if (ret < 0)
+ return ES_DECODE_FAILED;
+
+ return ES_OK;
+}
+
+extern void sev_insn_decode_init(void) __alias(inat_init_tables);
+
+/*
+ * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and
+ * doesn't use segments.
+ */
+static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
+{
+ return 0UL;
+}
+
+static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+ void *dst, char *buf, size_t size)
+{
+ memcpy(dst, buf, size);
+
+ return ES_OK;
+}
+
+static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+ void *src, char *buf, size_t size)
+{
+ memcpy(buf, src, size);
+
+ return ES_OK;
+}
+
+static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
+{
+ return ES_OK;
+}
+
+static bool fault_in_kernel_space(unsigned long address)
+{
+ return false;
+}
+
+#define sev_printk(fmt, ...)
+
+#include "../../coco/sev/vc-shared.c"
+
+void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
+{
+ struct es_em_ctxt ctxt;
+ enum es_result result;
+
+ if (!boot_ghcb && !early_setup_ghcb())
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+
+ vc_ghcb_invalidate(boot_ghcb);
+ result = vc_init_em_ctxt(&ctxt, regs, exit_code);
+ if (result != ES_OK)
+ goto finish;
+
+ result = vc_check_opcode_bytes(&ctxt, exit_code);
+ if (result != ES_OK)
+ goto finish;
+
+ switch (exit_code) {
+ case SVM_EXIT_RDTSC:
+ case SVM_EXIT_RDTSCP:
+ result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code);
+ break;
+ case SVM_EXIT_IOIO:
+ result = vc_handle_ioio(boot_ghcb, &ctxt);
+ break;
+ case SVM_EXIT_CPUID:
+ result = vc_handle_cpuid(boot_ghcb, &ctxt);
+ break;
+ default:
+ result = ES_UNSUPPORTED;
+ break;
+ }
+
+finish:
+ if (result == ES_OK)
+ vc_finish_insn(&ctxt);
+ else if (result != ES_RETRY)
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+}
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 0003e4416efd..fd1b67dfea22 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -21,99 +21,14 @@
#include <asm/fpu/xcr.h>
#include <asm/ptrace.h>
#include <asm/svm.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include "error.h"
-#include "../msr.h"
+#include "sev.h"
static struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
struct ghcb *boot_ghcb;
-/*
- * Copy a version of this function here - insn-eval.c can't be used in
- * pre-decompression code.
- */
-static bool insn_has_rep_prefix(struct insn *insn)
-{
- insn_byte_t p;
- int i;
-
- insn_get_prefixes(insn);
-
- for_each_insn_prefix(insn, i, p) {
- if (p == 0xf2 || p == 0xf3)
- return true;
- }
-
- return false;
-}
-
-/*
- * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and
- * doesn't use segments.
- */
-static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
-{
- return 0UL;
-}
-
-static inline u64 sev_es_rd_ghcb_msr(void)
-{
- struct msr m;
-
- boot_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m);
-
- return m.q;
-}
-
-static inline void sev_es_wr_ghcb_msr(u64 val)
-{
- struct msr m;
-
- m.q = val;
- boot_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m);
-}
-
-static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
-{
- char buffer[MAX_INSN_SIZE];
- int ret;
-
- memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
-
- ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
- if (ret < 0)
- return ES_DECODE_FAILED;
-
- return ES_OK;
-}
-
-static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
- void *dst, char *buf, size_t size)
-{
- memcpy(dst, buf, size);
-
- return ES_OK;
-}
-
-static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
- void *src, char *buf, size_t size)
-{
- memcpy(buf, src, size);
-
- return ES_OK;
-}
-
-static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
-{
- return ES_OK;
-}
-
-static bool fault_in_kernel_space(unsigned long address)
-{
- return false;
-}
-
#undef __init
#define __init
@@ -122,24 +37,27 @@ static bool fault_in_kernel_space(unsigned long address)
#define __BOOT_COMPRESSED
-/* Basic instruction decoding support needed */
-#include "../../lib/inat.c"
-#include "../../lib/insn.c"
+extern struct svsm_ca *boot_svsm_caa;
+extern u64 boot_svsm_caa_pa;
-/* Include code for early handlers */
-#include "../../coco/sev/shared.c"
-
-static struct svsm_ca *svsm_get_caa(void)
+struct svsm_ca *svsm_get_caa(void)
{
return boot_svsm_caa;
}
-static u64 svsm_get_caa_pa(void)
+u64 svsm_get_caa_pa(void)
{
return boot_svsm_caa_pa;
}
-static int svsm_perform_call_protocol(struct svsm_call *call)
+int svsm_perform_call_protocol(struct svsm_call *call);
+
+u8 snp_vmpl;
+
+/* Include code for early handlers */
+#include "../../boot/startup/sev-shared.c"
+
+int svsm_perform_call_protocol(struct svsm_call *call)
{
struct ghcb *ghcb;
int ret;
@@ -157,7 +75,7 @@ static int svsm_perform_call_protocol(struct svsm_call *call)
return ret;
}
-bool sev_snp_enabled(void)
+static bool sev_snp_enabled(void)
{
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
}
@@ -212,7 +130,7 @@ void snp_set_page_shared(unsigned long paddr)
__page_state_change(paddr, SNP_PAGE_STATE_SHARED);
}
-static bool early_setup_ghcb(void)
+bool early_setup_ghcb(void)
{
if (set_page_decrypted((unsigned long)&boot_ghcb_page))
return false;
@@ -223,7 +141,7 @@ static bool early_setup_ghcb(void)
boot_ghcb = &boot_ghcb_page;
/* Initialize lookup tables for the instruction decoder */
- inat_init_tables();
+ sev_insn_decode_init();
/* SNP guest requires the GHCB GPA must be registered */
if (sev_snp_enabled())
@@ -296,46 +214,6 @@ bool sev_es_check_ghcb_fault(unsigned long address)
return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page);
}
-void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
-{
- struct es_em_ctxt ctxt;
- enum es_result result;
-
- if (!boot_ghcb && !early_setup_ghcb())
- sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
-
- vc_ghcb_invalidate(boot_ghcb);
- result = vc_init_em_ctxt(&ctxt, regs, exit_code);
- if (result != ES_OK)
- goto finish;
-
- result = vc_check_opcode_bytes(&ctxt, exit_code);
- if (result != ES_OK)
- goto finish;
-
- switch (exit_code) {
- case SVM_EXIT_RDTSC:
- case SVM_EXIT_RDTSCP:
- result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code);
- break;
- case SVM_EXIT_IOIO:
- result = vc_handle_ioio(boot_ghcb, &ctxt);
- break;
- case SVM_EXIT_CPUID:
- result = vc_handle_cpuid(boot_ghcb, &ctxt);
- break;
- default:
- result = ES_UNSUPPORTED;
- break;
- }
-
-finish:
- if (result == ES_OK)
- vc_finish_insn(&ctxt);
- else if (result != ES_RETRY)
- sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
-}
-
/*
* SNP_FEATURES_IMPL_REQ is the mask of SNP features that will need
* guest side implementation for proper functioning of the guest. If any
diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h
index d3900384b8ab..92f79c21939c 100644
--- a/arch/x86/boot/compressed/sev.h
+++ b/arch/x86/boot/compressed/sev.h
@@ -10,14 +10,31 @@
#ifdef CONFIG_AMD_MEM_ENCRYPT
-bool sev_snp_enabled(void);
+#include "../msr.h"
+
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 sev_get_status(void);
bool early_is_sevsnp_guest(void);
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+ struct msr m;
+
+ boot_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m);
+
+ return m.q;
+}
+
+static inline void sev_es_wr_ghcb_msr(u64 val)
+{
+ struct msr m;
+
+ m.q = val;
+ boot_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m);
+}
+
#else
-static inline bool sev_snp_enabled(void) { return false; }
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 sev_get_status(void) { return 0; }
static inline bool early_is_sevsnp_guest(void) { return false; }
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
index 81fc1eaa3229..9af19d9614cb 100644
--- a/arch/x86/boot/compressed/string.c
+++ b/arch/x86/boot/compressed/string.c
@@ -15,9 +15,9 @@ static void *____memcpy(void *dest, const void *src, size_t n)
{
int d0, d1, d2;
asm volatile(
- "rep ; movsl\n\t"
+ "rep movsl\n\t"
"movl %4,%%ecx\n\t"
- "rep ; movsb\n\t"
+ "rep movsb"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
: "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
: "memory");
@@ -29,9 +29,9 @@ static void *____memcpy(void *dest, const void *src, size_t n)
{
long d0, d1, d2;
asm volatile(
- "rep ; movsq\n\t"
+ "rep movsq\n\t"
"movq %4,%%rcx\n\t"
- "rep ; movsb\n\t"
+ "rep movsb"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
: "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src)
: "memory");
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
index 6afd05e819d2..3973a67cd04e 100644
--- a/arch/x86/boot/copy.S
+++ b/arch/x86/boot/copy.S
@@ -22,10 +22,10 @@ SYM_FUNC_START_NOALIGN(memcpy)
movw %dx, %si
pushw %cx
shrw $2, %cx
- rep; movsl
+ rep movsl
popw %cx
andw $3, %cx
- rep; movsb
+ rep movsb
popw %di
popw %si
retl
@@ -38,10 +38,10 @@ SYM_FUNC_START_NOALIGN(memset)
imull $0x01010101,%eax
pushw %cx
shrw $2, %cx
- rep; stosl
+ rep stosl
popw %cx
andw $3, %cx
- rep; stosb
+ rep stosb
popw %di
retl
SYM_FUNC_END(memset)
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b5c79f43359b..e30649e44d8f 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -361,12 +361,8 @@ xloadflags:
#endif
#ifdef CONFIG_X86_64
-#ifdef CONFIG_X86_5LEVEL
#define XLF56 (XLF_5LEVEL|XLF_5LEVEL_ENABLED)
#else
-#define XLF56 XLF_5LEVEL
-#endif
-#else
#define XLF56 0
#endif
@@ -585,7 +581,7 @@ start_of_setup:
xorl %eax, %eax
subw %di, %cx
shrw $2, %cx
- rep; stosl
+ rep stosl
# Jump to C code (should not return)
calll main
diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile
new file mode 100644
index 000000000000..b514f7e81332
--- /dev/null
+++ b/arch/x86/boot/startup/Makefile
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
+
+KBUILD_AFLAGS += -D__DISABLE_EXPORTS
+KBUILD_CFLAGS += -D__DISABLE_EXPORTS -mcmodel=small -fPIC \
+ -Os -DDISABLE_BRANCH_PROFILING \
+ $(DISABLE_STACKLEAK_PLUGIN) \
+ -fno-stack-protector -D__NO_FORTIFY \
+ -fno-jump-tables \
+ -include $(srctree)/include/linux/hidden.h
+
+# disable ftrace hooks and LTO
+KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS))
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+KMSAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
+obj-$(CONFIG_X86_64) += gdt_idt.o map_kernel.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT) += sme.o sev-startup.o
+
+lib-$(CONFIG_X86_64) += la57toggle.o
+lib-$(CONFIG_EFI_MIXED) += efi-mixed.o
+
+#
+# Disable objtool validation for all library code, which is intended
+# to be linked into the decompressor or the EFI stub but not vmlinux
+#
+$(patsubst %.o,$(obj)/%.o,$(lib-y)): OBJECT_FILES_NON_STANDARD := y
diff --git a/drivers/firmware/efi/libstub/x86-mixed.S b/arch/x86/boot/startup/efi-mixed.S
index e04ed99bc449..e04ed99bc449 100644
--- a/drivers/firmware/efi/libstub/x86-mixed.S
+++ b/arch/x86/boot/startup/efi-mixed.S
diff --git a/arch/x86/boot/startup/gdt_idt.c b/arch/x86/boot/startup/gdt_idt.c
new file mode 100644
index 000000000000..a3112a69b06a
--- /dev/null
+++ b/arch/x86/boot/startup/gdt_idt.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+#include <asm/desc.h>
+#include <asm/init.h>
+#include <asm/setup.h>
+#include <asm/sev.h>
+#include <asm/trapnr.h>
+
+/*
+ * Data structures and code used for IDT setup in head_64.S. The bringup-IDT is
+ * used until the idt_table takes over. On the boot CPU this happens in
+ * x86_64_start_kernel(), on secondary CPUs in start_secondary(). In both cases
+ * this happens in the functions called from head_64.S.
+ *
+ * The idt_table can't be used that early because all the code modifying it is
+ * in idt.c and can be instrumented by tracing or KASAN, which both don't work
+ * during early CPU bringup. Also the idt_table has the runtime vectors
+ * configured which require certain CPU state to be setup already (like TSS),
+ * which also hasn't happened yet in early CPU bringup.
+ */
+static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data;
+
+/* This may run while still in the direct mapping */
+void __head startup_64_load_idt(void *vc_handler)
+{
+ struct desc_ptr desc = {
+ .address = (unsigned long)rip_rel_ptr(bringup_idt_table),
+ .size = sizeof(bringup_idt_table) - 1,
+ };
+ struct idt_data data;
+ gate_desc idt_desc;
+
+ /* @vc_handler is set only for a VMM Communication Exception */
+ if (vc_handler) {
+ init_idt_data(&data, X86_TRAP_VC, vc_handler);
+ idt_init_desc(&idt_desc, &data);
+ native_write_idt_entry((gate_desc *)desc.address, X86_TRAP_VC, &idt_desc);
+ }
+
+ native_load_idt(&desc);
+}
+
+/*
+ * Setup boot CPU state needed before kernel switches to virtual addresses.
+ */
+void __head startup_64_setup_gdt_idt(void)
+{
+ struct gdt_page *gp = rip_rel_ptr((void *)(__force unsigned long)&gdt_page);
+ void *handler = NULL;
+
+ struct desc_ptr startup_gdt_descr = {
+ .address = (unsigned long)gp->gdt,
+ .size = GDT_SIZE - 1,
+ };
+
+ /* Load GDT */
+ native_load_gdt(&startup_gdt_descr);
+
+ /* New GDT is live - reload data segment registers */
+ asm volatile("movl %%eax, %%ds\n"
+ "movl %%eax, %%ss\n"
+ "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+ handler = rip_rel_ptr(vc_no_ghcb);
+
+ startup_64_load_idt(handler);
+}
diff --git a/arch/x86/boot/compressed/la57toggle.S b/arch/x86/boot/startup/la57toggle.S
index 9ee002387eb1..370075b4d95b 100644
--- a/arch/x86/boot/compressed/la57toggle.S
+++ b/arch/x86/boot/startup/la57toggle.S
@@ -5,7 +5,6 @@
#include <asm/boot.h>
#include <asm/msr.h>
#include <asm/processor-flags.h>
-#include "pgtable.h"
/*
* This is the 32-bit trampoline that will be copied over to low memory. It
diff --git a/arch/x86/boot/startup/map_kernel.c b/arch/x86/boot/startup/map_kernel.c
new file mode 100644
index 000000000000..332dbe6688c4
--- /dev/null
+++ b/arch/x86/boot/startup/map_kernel.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pgtable.h>
+
+#include <asm/init.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sev.h>
+
+extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
+extern unsigned int next_early_pgt;
+
+static inline bool check_la57_support(void)
+{
+ /*
+ * 5-level paging is detected and enabled at kernel decompression
+ * stage. Only check if it has been enabled there.
+ */
+ if (!(native_read_cr4() & X86_CR4_LA57))
+ return false;
+
+ __pgtable_l5_enabled = 1;
+ pgdir_shift = 48;
+ ptrs_per_p4d = 512;
+
+ return true;
+}
+
+static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
+ pmdval_t *pmd,
+ unsigned long p2v_offset)
+{
+ unsigned long paddr, paddr_end;
+ int i;
+
+ /* Encrypt the kernel and related (if SME is active) */
+ sme_encrypt_kernel(bp);
+
+ /*
+ * Clear the memory encryption mask from the .bss..decrypted section.
+ * The bss section will be memset to zero later in the initialization so
+ * there is no need to zero it after changing the memory encryption
+ * attribute.
+ */
+ if (sme_get_me_mask()) {
+ paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted);
+ paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted);
+
+ for (; paddr < paddr_end; paddr += PMD_SIZE) {
+ /*
+ * On SNP, transition the page to shared in the RMP table so that
+ * it is consistent with the page table attribute change.
+ *
+ * __start_bss_decrypted has a virtual address in the high range
+ * mapping (kernel .text). PVALIDATE, by way of
+ * early_snp_set_memory_shared(), requires a valid virtual
+ * address but the kernel is currently running off of the identity
+ * mapping so use the PA to get a *currently* valid virtual address.
+ */
+ early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD);
+
+ i = pmd_index(paddr - p2v_offset);
+ pmd[i] -= sme_get_me_mask();
+ }
+ }
+
+ /*
+ * Return the SME encryption mask (if SME is active) to be used as a
+ * modifier for the initial pgdir entry programmed into CR3.
+ */
+ return sme_get_me_mask();
+}
+
+/*
+ * This code is compiled using PIC codegen because it will execute from the
+ * early 1:1 mapping of memory, which deviates from the mapping expected by the
+ * linker. Due to this deviation, taking the address of a global variable will
+ * produce an ambiguous result when using the plain & operator. Instead,
+ * rip_rel_ptr() must be used, which will return the RIP-relative address in
+ * the 1:1 mapping of memory. Kernel virtual addresses can be determined by
+ * subtracting p2v_offset from the RIP-relative address.
+ */
+unsigned long __head __startup_64(unsigned long p2v_offset,
+ struct boot_params *bp)
+{
+ pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);
+ unsigned long physaddr = (unsigned long)rip_rel_ptr(_text);
+ unsigned long va_text, va_end;
+ unsigned long pgtable_flags;
+ unsigned long load_delta;
+ pgdval_t *pgd;
+ p4dval_t *p4d;
+ pudval_t *pud;
+ pmdval_t *pmd, pmd_entry;
+ bool la57;
+ int i;
+
+ la57 = check_la57_support();
+
+ /* Is the address too large? */
+ if (physaddr >> MAX_PHYSMEM_BITS)
+ for (;;);
+
+ /*
+ * Compute the delta between the address I am compiled to run at
+ * and the address I am actually running at.
+ */
+ phys_base = load_delta = __START_KERNEL_map + p2v_offset;
+
+ /* Is the address not 2M aligned? */
+ if (load_delta & ~PMD_MASK)
+ for (;;);
+
+ va_text = physaddr - p2v_offset;
+ va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset;
+
+ /* Include the SME encryption mask in the fixup value */
+ load_delta += sme_get_me_mask();
+
+ /* Fixup the physical addresses in the page table */
+
+ pgd = rip_rel_ptr(early_top_pgt);
+ pgd[pgd_index(__START_KERNEL_map)] += load_delta;
+
+ if (la57) {
+ p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt);
+ p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
+
+ pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
+ }
+
+ level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta;
+ level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta;
+
+ for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
+ level2_fixmap_pgt[i].pmd += load_delta;
+
+ /*
+ * Set up the identity mapping for the switchover. These
+ * entries should *NOT* have the global bit set! This also
+ * creates a bunch of nonsense entries but that is fine --
+ * it avoids problems around wraparound.
+ */
+
+ pud = &early_pgts[0]->pmd;
+ pmd = &early_pgts[1]->pmd;
+ next_early_pgt = 2;
+
+ pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
+
+ if (la57) {
+ p4d = &early_pgts[next_early_pgt++]->pmd;
+
+ i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
+ pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
+ pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
+
+ i = physaddr >> P4D_SHIFT;
+ p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
+ p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
+ } else {
+ i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
+ pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
+ pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
+ }
+
+ i = physaddr >> PUD_SHIFT;
+ pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
+ pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
+
+ pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
+ pmd_entry += sme_get_me_mask();
+ pmd_entry += physaddr;
+
+ for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) {
+ int idx = i + (physaddr >> PMD_SHIFT);
+
+ pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
+ }
+
+ /*
+ * Fixup the kernel text+data virtual addresses. Note that
+ * we might write invalid pmds, when the kernel is relocated
+ * cleanup_highmap() fixes this up along with the mappings
+ * beyond _end.
+ *
+ * Only the region occupied by the kernel image has so far
+ * been checked against the table of usable memory regions
+ * provided by the firmware, so invalidate pages outside that
+ * region. A page table entry that maps to a reserved area of
+ * memory would allow processor speculation into that area,
+ * and on some hardware (particularly the UV platform) even
+ * speculative access to some reserved areas is caught as an
+ * error, causing the BIOS to halt the system.
+ */
+
+ pmd = rip_rel_ptr(level2_kernel_pgt);
+
+ /* invalidate pages before the kernel image */
+ for (i = 0; i < pmd_index(va_text); i++)
+ pmd[i] &= ~_PAGE_PRESENT;
+
+ /* fixup pages that are part of the kernel image */
+ for (; i <= pmd_index(va_end); i++)
+ if (pmd[i] & _PAGE_PRESENT)
+ pmd[i] += load_delta;
+
+ /* invalidate pages after the kernel image */
+ for (; i < PTRS_PER_PMD; i++)
+ pmd[i] &= ~_PAGE_PRESENT;
+
+ return sme_postprocess_startup(bp, pmd, p2v_offset);
+}
diff --git a/arch/x86/coco/sev/shared.c b/arch/x86/boot/startup/sev-shared.c
index 2e4122f8aa6b..7a706db87b93 100644
--- a/arch/x86/coco/sev/shared.c
+++ b/arch/x86/boot/startup/sev-shared.c
@@ -14,76 +14,23 @@
#ifndef __BOOT_COMPRESSED
#define error(v) pr_err(v)
#define has_cpuflag(f) boot_cpu_has(f)
-#define sev_printk(fmt, ...) printk(fmt, ##__VA_ARGS__)
-#define sev_printk_rtl(fmt, ...) printk_ratelimited(fmt, ##__VA_ARGS__)
#else
#undef WARN
#define WARN(condition, format...) (!!(condition))
-#define sev_printk(fmt, ...)
-#define sev_printk_rtl(fmt, ...)
#undef vc_forward_exception
#define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n")
#endif
/*
* SVSM related information:
- * When running under an SVSM, the VMPL that Linux is executing at must be
- * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
- *
* During boot, the page tables are set up as identity mapped and later
* changed to use kernel virtual addresses. Maintain separate virtual and
* physical addresses for the CAA to allow SVSM functions to be used during
* early boot, both with identity mapped virtual addresses and proper kernel
* virtual addresses.
*/
-u8 snp_vmpl __ro_after_init;
-EXPORT_SYMBOL_GPL(snp_vmpl);
-static struct svsm_ca *boot_svsm_caa __ro_after_init;
-static u64 boot_svsm_caa_pa __ro_after_init;
-
-static struct svsm_ca *svsm_get_caa(void);
-static u64 svsm_get_caa_pa(void);
-static int svsm_perform_call_protocol(struct svsm_call *call);
-
-/* I/O parameters for CPUID-related helpers */
-struct cpuid_leaf {
- u32 fn;
- u32 subfn;
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
-};
-
-/*
- * Individual entries of the SNP CPUID table, as defined by the SNP
- * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
- */
-struct snp_cpuid_fn {
- u32 eax_in;
- u32 ecx_in;
- u64 xcr0_in;
- u64 xss_in;
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- u64 __reserved;
-} __packed;
-
-/*
- * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
- * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
- * of 64 entries per CPUID table.
- */
-#define SNP_CPUID_COUNT_MAX 64
-
-struct snp_cpuid_table {
- u32 count;
- u32 __reserved1;
- u64 __reserved2;
- struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
-} __packed;
+struct svsm_ca *boot_svsm_caa __ro_after_init;
+u64 boot_svsm_caa_pa __ro_after_init;
/*
* Since feature negotiation related variables are set early in the boot
@@ -107,7 +54,7 @@ static u32 cpuid_std_range_max __ro_after_init;
static u32 cpuid_hyp_range_max __ro_after_init;
static u32 cpuid_ext_range_max __ro_after_init;
-static bool __init sev_es_check_cpu_features(void)
+bool __init sev_es_check_cpu_features(void)
{
if (!has_cpuflag(X86_FEATURE_RDRAND)) {
error("RDRAND instruction not supported - no trusted source of randomness available\n");
@@ -117,7 +64,7 @@ static bool __init sev_es_check_cpu_features(void)
return true;
}
-static void __head __noreturn
+void __head __noreturn
sev_es_terminate(unsigned int set, unsigned int reason)
{
u64 val = GHCB_MSR_TERM_REQ;
@@ -136,7 +83,7 @@ sev_es_terminate(unsigned int set, unsigned int reason)
/*
* The hypervisor features are available from GHCB version 2 onward.
*/
-static u64 get_hv_features(void)
+u64 get_hv_features(void)
{
u64 val;
@@ -153,7 +100,7 @@ static u64 get_hv_features(void)
return GHCB_MSR_HV_FT_RESP_VAL(val);
}
-static void snp_register_ghcb_early(unsigned long paddr)
+void snp_register_ghcb_early(unsigned long paddr)
{
unsigned long pfn = paddr >> PAGE_SHIFT;
u64 val;
@@ -169,7 +116,7 @@ static void snp_register_ghcb_early(unsigned long paddr)
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
}
-static bool sev_es_negotiate_protocol(void)
+bool sev_es_negotiate_protocol(void)
{
u64 val;
@@ -190,39 +137,6 @@ static bool sev_es_negotiate_protocol(void)
return true;
}
-static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
-{
- ghcb->save.sw_exit_code = 0;
- __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
-}
-
-static bool vc_decoding_needed(unsigned long exit_code)
-{
- /* Exceptions don't require to decode the instruction */
- return !(exit_code >= SVM_EXIT_EXCP_BASE &&
- exit_code <= SVM_EXIT_LAST_EXCP);
-}
-
-static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt,
- struct pt_regs *regs,
- unsigned long exit_code)
-{
- enum es_result ret = ES_OK;
-
- memset(ctxt, 0, sizeof(*ctxt));
- ctxt->regs = regs;
-
- if (vc_decoding_needed(exit_code))
- ret = vc_decode_insn(ctxt);
-
- return ret;
-}
-
-static void vc_finish_insn(struct es_em_ctxt *ctxt)
-{
- ctxt->regs->ip += ctxt->insn.length;
-}
-
static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
u32 ret;
@@ -344,7 +258,7 @@ static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
* Fill in protocol and format specifiers. This can be called very early
* in the boot, so use rip-relative references as needed.
*/
- ghcb->protocol_version = RIP_REL_REF(ghcb_version);
+ ghcb->protocol_version = ghcb_version;
ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
@@ -371,10 +285,10 @@ static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
return svsm_process_result_codes(call);
}
-static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt,
- u64 exit_code, u64 exit_info_1,
- u64 exit_info_2)
+enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ u64 exit_code, u64 exit_info_1,
+ u64 exit_info_2)
{
/* Fill in protocol and format specifiers */
ghcb->protocol_version = ghcb_version;
@@ -473,9 +387,9 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid
* while running with the initial identity mapping as well as the
* switch-over to kernel virtual addresses later.
*/
-static const struct snp_cpuid_table *snp_cpuid_get_table(void)
+const struct snp_cpuid_table *snp_cpuid_get_table(void)
{
- return &RIP_REL_REF(cpuid_table_copy);
+ return rip_rel_ptr(&cpuid_table_copy);
}
/*
@@ -672,7 +586,7 @@ snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
* Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
* should be treated as fatal by caller.
*/
-static int __head
+int __head
snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -701,9 +615,9 @@ snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
/* Skip post-processing for out-of-range zero leafs. */
- if (!(leaf->fn <= RIP_REL_REF(cpuid_std_range_max) ||
- (leaf->fn >= 0x40000000 && leaf->fn <= RIP_REL_REF(cpuid_hyp_range_max)) ||
- (leaf->fn >= 0x80000000 && leaf->fn <= RIP_REL_REF(cpuid_ext_range_max))))
+ if (!(leaf->fn <= cpuid_std_range_max ||
+ (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
+ (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
return 0;
}
@@ -782,391 +696,6 @@ fail:
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
}
-static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt,
- unsigned long address,
- bool write)
-{
- if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) {
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.error_code = X86_PF_USER;
- ctxt->fi.cr2 = address;
- if (write)
- ctxt->fi.error_code |= X86_PF_WRITE;
-
- return ES_EXCEPTION;
- }
-
- return ES_OK;
-}
-
-static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
- void *src, char *buf,
- unsigned int data_size,
- unsigned int count,
- bool backwards)
-{
- int i, b = backwards ? -1 : 1;
- unsigned long address = (unsigned long)src;
- enum es_result ret;
-
- ret = vc_insn_string_check(ctxt, address, false);
- if (ret != ES_OK)
- return ret;
-
- for (i = 0; i < count; i++) {
- void *s = src + (i * data_size * b);
- char *d = buf + (i * data_size);
-
- ret = vc_read_mem(ctxt, s, d, data_size);
- if (ret != ES_OK)
- break;
- }
-
- return ret;
-}
-
-static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
- void *dst, char *buf,
- unsigned int data_size,
- unsigned int count,
- bool backwards)
-{
- int i, s = backwards ? -1 : 1;
- unsigned long address = (unsigned long)dst;
- enum es_result ret;
-
- ret = vc_insn_string_check(ctxt, address, true);
- if (ret != ES_OK)
- return ret;
-
- for (i = 0; i < count; i++) {
- void *d = dst + (i * data_size * s);
- char *b = buf + (i * data_size);
-
- ret = vc_write_mem(ctxt, d, b, data_size);
- if (ret != ES_OK)
- break;
- }
-
- return ret;
-}
-
-#define IOIO_TYPE_STR BIT(2)
-#define IOIO_TYPE_IN 1
-#define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR)
-#define IOIO_TYPE_OUT 0
-#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
-
-#define IOIO_REP BIT(3)
-
-#define IOIO_ADDR_64 BIT(9)
-#define IOIO_ADDR_32 BIT(8)
-#define IOIO_ADDR_16 BIT(7)
-
-#define IOIO_DATA_32 BIT(6)
-#define IOIO_DATA_16 BIT(5)
-#define IOIO_DATA_8 BIT(4)
-
-#define IOIO_SEG_ES (0 << 10)
-#define IOIO_SEG_DS (3 << 10)
-
-static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
-{
- struct insn *insn = &ctxt->insn;
- size_t size;
- u64 port;
-
- *exitinfo = 0;
-
- switch (insn->opcode.bytes[0]) {
- /* INS opcodes */
- case 0x6c:
- case 0x6d:
- *exitinfo |= IOIO_TYPE_INS;
- *exitinfo |= IOIO_SEG_ES;
- port = ctxt->regs->dx & 0xffff;
- break;
-
- /* OUTS opcodes */
- case 0x6e:
- case 0x6f:
- *exitinfo |= IOIO_TYPE_OUTS;
- *exitinfo |= IOIO_SEG_DS;
- port = ctxt->regs->dx & 0xffff;
- break;
-
- /* IN immediate opcodes */
- case 0xe4:
- case 0xe5:
- *exitinfo |= IOIO_TYPE_IN;
- port = (u8)insn->immediate.value & 0xffff;
- break;
-
- /* OUT immediate opcodes */
- case 0xe6:
- case 0xe7:
- *exitinfo |= IOIO_TYPE_OUT;
- port = (u8)insn->immediate.value & 0xffff;
- break;
-
- /* IN register opcodes */
- case 0xec:
- case 0xed:
- *exitinfo |= IOIO_TYPE_IN;
- port = ctxt->regs->dx & 0xffff;
- break;
-
- /* OUT register opcodes */
- case 0xee:
- case 0xef:
- *exitinfo |= IOIO_TYPE_OUT;
- port = ctxt->regs->dx & 0xffff;
- break;
-
- default:
- return ES_DECODE_FAILED;
- }
-
- *exitinfo |= port << 16;
-
- switch (insn->opcode.bytes[0]) {
- case 0x6c:
- case 0x6e:
- case 0xe4:
- case 0xe6:
- case 0xec:
- case 0xee:
- /* Single byte opcodes */
- *exitinfo |= IOIO_DATA_8;
- size = 1;
- break;
- default:
- /* Length determined by instruction parsing */
- *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
- : IOIO_DATA_32;
- size = (insn->opnd_bytes == 2) ? 2 : 4;
- }
-
- switch (insn->addr_bytes) {
- case 2:
- *exitinfo |= IOIO_ADDR_16;
- break;
- case 4:
- *exitinfo |= IOIO_ADDR_32;
- break;
- case 8:
- *exitinfo |= IOIO_ADDR_64;
- break;
- }
-
- if (insn_has_rep_prefix(insn))
- *exitinfo |= IOIO_REP;
-
- return vc_ioio_check(ctxt, (u16)port, size);
-}
-
-static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
- struct pt_regs *regs = ctxt->regs;
- u64 exit_info_1, exit_info_2;
- enum es_result ret;
-
- ret = vc_ioio_exitinfo(ctxt, &exit_info_1);
- if (ret != ES_OK)
- return ret;
-
- if (exit_info_1 & IOIO_TYPE_STR) {
-
- /* (REP) INS/OUTS */
-
- bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF);
- unsigned int io_bytes, exit_bytes;
- unsigned int ghcb_count, op_count;
- unsigned long es_base;
- u64 sw_scratch;
-
- /*
- * For the string variants with rep prefix the amount of in/out
- * operations per #VC exception is limited so that the kernel
- * has a chance to take interrupts and re-schedule while the
- * instruction is emulated.
- */
- io_bytes = (exit_info_1 >> 4) & 0x7;
- ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes;
-
- op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1;
- exit_info_2 = min(op_count, ghcb_count);
- exit_bytes = exit_info_2 * io_bytes;
-
- es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
-
- /* Read bytes of OUTS into the shared buffer */
- if (!(exit_info_1 & IOIO_TYPE_IN)) {
- ret = vc_insn_string_read(ctxt,
- (void *)(es_base + regs->si),
- ghcb->shared_buffer, io_bytes,
- exit_info_2, df);
- if (ret)
- return ret;
- }
-
- /*
- * Issue an VMGEXIT to the HV to consume the bytes from the
- * shared buffer or to have it write them into the shared buffer
- * depending on the instruction: OUTS or INS.
- */
- sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
- ghcb_set_sw_scratch(ghcb, sw_scratch);
- ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
- exit_info_1, exit_info_2);
- if (ret != ES_OK)
- return ret;
-
- /* Read bytes from shared buffer into the guest's destination. */
- if (exit_info_1 & IOIO_TYPE_IN) {
- ret = vc_insn_string_write(ctxt,
- (void *)(es_base + regs->di),
- ghcb->shared_buffer, io_bytes,
- exit_info_2, df);
- if (ret)
- return ret;
-
- if (df)
- regs->di -= exit_bytes;
- else
- regs->di += exit_bytes;
- } else {
- if (df)
- regs->si -= exit_bytes;
- else
- regs->si += exit_bytes;
- }
-
- if (exit_info_1 & IOIO_REP)
- regs->cx -= exit_info_2;
-
- ret = regs->cx ? ES_RETRY : ES_OK;
-
- } else {
-
- /* IN/OUT into/from rAX */
-
- int bits = (exit_info_1 & 0x70) >> 1;
- u64 rax = 0;
-
- if (!(exit_info_1 & IOIO_TYPE_IN))
- rax = lower_bits(regs->ax, bits);
-
- ghcb_set_rax(ghcb, rax);
-
- ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
- if (ret != ES_OK)
- return ret;
-
- if (exit_info_1 & IOIO_TYPE_IN) {
- if (!ghcb_rax_is_valid(ghcb))
- return ES_VMM_ERROR;
- regs->ax = lower_bits(ghcb->save.rax, bits);
- }
- }
-
- return ret;
-}
-
-static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
- struct pt_regs *regs = ctxt->regs;
- struct cpuid_leaf leaf;
- int ret;
-
- leaf.fn = regs->ax;
- leaf.subfn = regs->cx;
- ret = snp_cpuid(ghcb, ctxt, &leaf);
- if (!ret) {
- regs->ax = leaf.eax;
- regs->bx = leaf.ebx;
- regs->cx = leaf.ecx;
- regs->dx = leaf.edx;
- }
-
- return ret;
-}
-
-static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
-{
- struct pt_regs *regs = ctxt->regs;
- u32 cr4 = native_read_cr4();
- enum es_result ret;
- int snp_cpuid_ret;
-
- snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt);
- if (!snp_cpuid_ret)
- return ES_OK;
- if (snp_cpuid_ret != -EOPNOTSUPP)
- return ES_VMM_ERROR;
-
- ghcb_set_rax(ghcb, regs->ax);
- ghcb_set_rcx(ghcb, regs->cx);
-
- if (cr4 & X86_CR4_OSXSAVE)
- /* Safe to read xcr0 */
- ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
- else
- /* xgetbv will cause #GP - use reset value for xcr0 */
- ghcb_set_xcr0(ghcb, 1);
-
- ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
- if (ret != ES_OK)
- return ret;
-
- if (!(ghcb_rax_is_valid(ghcb) &&
- ghcb_rbx_is_valid(ghcb) &&
- ghcb_rcx_is_valid(ghcb) &&
- ghcb_rdx_is_valid(ghcb)))
- return ES_VMM_ERROR;
-
- regs->ax = ghcb->save.rax;
- regs->bx = ghcb->save.rbx;
- regs->cx = ghcb->save.rcx;
- regs->dx = ghcb->save.rdx;
-
- return ES_OK;
-}
-
-static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt,
- unsigned long exit_code)
-{
- bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
- enum es_result ret;
-
- /*
- * The hypervisor should not be intercepting RDTSC/RDTSCP when Secure
- * TSC is enabled. A #VC exception will be generated if the RDTSC/RDTSCP
- * instructions are being intercepted. If this should occur and Secure
- * TSC is enabled, guest execution should be terminated as the guest
- * cannot rely on the TSC value provided by the hypervisor.
- */
- if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
- return ES_VMM_ERROR;
-
- ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
- if (ret != ES_OK)
- return ret;
-
- if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) &&
- (!rdtscp || ghcb_rcx_is_valid(ghcb))))
- return ES_VMM_ERROR;
-
- ctxt->regs->ax = ghcb->save.rax;
- ctxt->regs->dx = ghcb->save.rdx;
- if (rdtscp)
- ctxt->regs->cx = ghcb->save.rcx;
-
- return ES_OK;
-}
-
struct cc_setup_data {
struct setup_data header;
u32 cc_blob_address;
@@ -1224,36 +753,14 @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
if (fn->eax_in == 0x0)
- RIP_REL_REF(cpuid_std_range_max) = fn->eax;
+ cpuid_std_range_max = fn->eax;
else if (fn->eax_in == 0x40000000)
- RIP_REL_REF(cpuid_hyp_range_max) = fn->eax;
+ cpuid_hyp_range_max = fn->eax;
else if (fn->eax_in == 0x80000000)
- RIP_REL_REF(cpuid_ext_range_max) = fn->eax;
+ cpuid_ext_range_max = fn->eax;
}
}
-static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
- int ret, u64 svsm_ret)
-{
- WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
- pfn, action, page_size, ret, svsm_ret);
-
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
-}
-
-static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
-{
- unsigned int page_size;
- bool action;
- u64 pfn;
-
- pfn = pc->entry[pc->cur_index].pfn;
- action = pc->entry[pc->cur_index].action;
- page_size = pc->entry[pc->cur_index].page_size;
-
- __pval_terminate(pfn, action, page_size, ret, svsm_ret);
-}
-
static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
{
struct svsm_pvalidate_call *pc;
@@ -1296,11 +803,7 @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
{
int ret;
- /*
- * This can be called very early during boot, so use rIP-relative
- * references as needed.
- */
- if (RIP_REL_REF(snp_vmpl)) {
+ if (snp_vmpl) {
svsm_pval_4k_page(paddr, validate);
} else {
ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
@@ -1309,351 +812,6 @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
}
}
-static void pval_pages(struct snp_psc_desc *desc)
-{
- struct psc_entry *e;
- unsigned long vaddr;
- unsigned int size;
- unsigned int i;
- bool validate;
- u64 pfn;
- int rc;
-
- for (i = 0; i <= desc->hdr.end_entry; i++) {
- e = &desc->entries[i];
-
- pfn = e->gfn;
- vaddr = (unsigned long)pfn_to_kaddr(pfn);
- size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
- validate = e->operation == SNP_PAGE_STATE_PRIVATE;
-
- rc = pvalidate(vaddr, size, validate);
- if (!rc)
- continue;
-
- if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
- unsigned long vaddr_end = vaddr + PMD_SIZE;
-
- for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
- rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
- if (rc)
- __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
- }
- } else {
- __pval_terminate(pfn, validate, size, rc, 0);
- }
- }
-}
-
-static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
- struct svsm_pvalidate_call *pc)
-{
- struct svsm_pvalidate_entry *pe;
-
- /* Nothing in the CA yet */
- pc->num_entries = 0;
- pc->cur_index = 0;
-
- pe = &pc->entry[0];
-
- while (pfn < pfn_end) {
- pe->page_size = RMP_PG_SIZE_4K;
- pe->action = action;
- pe->ignore_cf = 0;
- pe->pfn = pfn;
-
- pe++;
- pfn++;
-
- pc->num_entries++;
- if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
- break;
- }
-
- return pfn;
-}
-
-static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
- struct svsm_pvalidate_call *pc)
-{
- struct svsm_pvalidate_entry *pe;
- struct psc_entry *e;
-
- /* Nothing in the CA yet */
- pc->num_entries = 0;
- pc->cur_index = 0;
-
- pe = &pc->entry[0];
- e = &desc->entries[desc_entry];
-
- while (desc_entry <= desc->hdr.end_entry) {
- pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
- pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
- pe->ignore_cf = 0;
- pe->pfn = e->gfn;
-
- pe++;
- e++;
-
- desc_entry++;
- pc->num_entries++;
- if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
- break;
- }
-
- return desc_entry;
-}
-
-static void svsm_pval_pages(struct snp_psc_desc *desc)
-{
- struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
- unsigned int i, pv_4k_count = 0;
- struct svsm_pvalidate_call *pc;
- struct svsm_call call = {};
- unsigned long flags;
- bool action;
- u64 pc_pa;
- int ret;
-
- /*
- * This can be called very early in the boot, use native functions in
- * order to avoid paravirt issues.
- */
- flags = native_local_irq_save();
-
- /*
- * The SVSM calling area (CA) can support processing 510 entries at a
- * time. Loop through the Page State Change descriptor until the CA is
- * full or the last entry in the descriptor is reached, at which time
- * the SVSM is invoked. This repeats until all entries in the descriptor
- * are processed.
- */
- call.caa = svsm_get_caa();
-
- pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
- pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
-
- /* Protocol 0, Call ID 1 */
- call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
- call.rcx = pc_pa;
-
- for (i = 0; i <= desc->hdr.end_entry;) {
- i = svsm_build_ca_from_psc_desc(desc, i, pc);
-
- do {
- ret = svsm_perform_call_protocol(&call);
- if (!ret)
- continue;
-
- /*
- * Check if the entry failed because of an RMP mismatch (a
- * PVALIDATE at 2M was requested, but the page is mapped in
- * the RMP as 4K).
- */
-
- if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
- pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
- /* Save this entry for post-processing at 4K */
- pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
-
- /* Skip to the next one unless at the end of the list */
- pc->cur_index++;
- if (pc->cur_index < pc->num_entries)
- ret = -EAGAIN;
- else
- ret = 0;
- }
- } while (ret == -EAGAIN);
-
- if (ret)
- svsm_pval_terminate(pc, ret, call.rax_out);
- }
-
- /* Process any entries that failed to be validated at 2M and validate them at 4K */
- for (i = 0; i < pv_4k_count; i++) {
- u64 pfn, pfn_end;
-
- action = pv_4k[i].action;
- pfn = pv_4k[i].pfn;
- pfn_end = pfn + 512;
-
- while (pfn < pfn_end) {
- pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
-
- ret = svsm_perform_call_protocol(&call);
- if (ret)
- svsm_pval_terminate(pc, ret, call.rax_out);
- }
- }
-
- native_local_irq_restore(flags);
-}
-
-static void pvalidate_pages(struct snp_psc_desc *desc)
-{
- if (snp_vmpl)
- svsm_pval_pages(desc);
- else
- pval_pages(desc);
-}
-
-static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
-{
- int cur_entry, end_entry, ret = 0;
- struct snp_psc_desc *data;
- struct es_em_ctxt ctxt;
-
- vc_ghcb_invalidate(ghcb);
-
- /* Copy the input desc into GHCB shared buffer */
- data = (struct snp_psc_desc *)ghcb->shared_buffer;
- memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
-
- /*
- * As per the GHCB specification, the hypervisor can resume the guest
- * before processing all the entries. Check whether all the entries
- * are processed. If not, then keep retrying. Note, the hypervisor
- * will update the data memory directly to indicate the status, so
- * reference the data->hdr everywhere.
- *
- * The strategy here is to wait for the hypervisor to change the page
- * state in the RMP table before guest accesses the memory pages. If the
- * page state change was not successful, then later memory access will
- * result in a crash.
- */
- cur_entry = data->hdr.cur_entry;
- end_entry = data->hdr.end_entry;
-
- while (data->hdr.cur_entry <= data->hdr.end_entry) {
- ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
-
- /* This will advance the shared buffer data points to. */
- ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
-
- /*
- * Page State Change VMGEXIT can pass error code through
- * exit_info_2.
- */
- if (WARN(ret || ghcb->save.sw_exit_info_2,
- "SNP: PSC failed ret=%d exit_info_2=%llx\n",
- ret, ghcb->save.sw_exit_info_2)) {
- ret = 1;
- goto out;
- }
-
- /* Verify that reserved bit is not set */
- if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
- ret = 1;
- goto out;
- }
-
- /*
- * Sanity check that entry processing is not going backwards.
- * This will happen only if hypervisor is tricking us.
- */
- if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
-"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
- end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
- ret = 1;
- goto out;
- }
- }
-
-out:
- return ret;
-}
-
-static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt,
- unsigned long exit_code)
-{
- unsigned int opcode = (unsigned int)ctxt->insn.opcode.value;
- u8 modrm = ctxt->insn.modrm.value;
-
- switch (exit_code) {
-
- case SVM_EXIT_IOIO:
- case SVM_EXIT_NPF:
- /* handled separately */
- return ES_OK;
-
- case SVM_EXIT_CPUID:
- if (opcode == 0xa20f)
- return ES_OK;
- break;
-
- case SVM_EXIT_INVD:
- if (opcode == 0x080f)
- return ES_OK;
- break;
-
- case SVM_EXIT_MONITOR:
- /* MONITOR and MONITORX instructions generate the same error code */
- if (opcode == 0x010f && (modrm == 0xc8 || modrm == 0xfa))
- return ES_OK;
- break;
-
- case SVM_EXIT_MWAIT:
- /* MWAIT and MWAITX instructions generate the same error code */
- if (opcode == 0x010f && (modrm == 0xc9 || modrm == 0xfb))
- return ES_OK;
- break;
-
- case SVM_EXIT_MSR:
- /* RDMSR */
- if (opcode == 0x320f ||
- /* WRMSR */
- opcode == 0x300f)
- return ES_OK;
- break;
-
- case SVM_EXIT_RDPMC:
- if (opcode == 0x330f)
- return ES_OK;
- break;
-
- case SVM_EXIT_RDTSC:
- if (opcode == 0x310f)
- return ES_OK;
- break;
-
- case SVM_EXIT_RDTSCP:
- if (opcode == 0x010f && modrm == 0xf9)
- return ES_OK;
- break;
-
- case SVM_EXIT_READ_DR7:
- if (opcode == 0x210f &&
- X86_MODRM_REG(ctxt->insn.modrm.value) == 7)
- return ES_OK;
- break;
-
- case SVM_EXIT_VMMCALL:
- if (opcode == 0x010f && modrm == 0xd9)
- return ES_OK;
-
- break;
-
- case SVM_EXIT_WRITE_DR7:
- if (opcode == 0x230f &&
- X86_MODRM_REG(ctxt->insn.modrm.value) == 7)
- return ES_OK;
- break;
-
- case SVM_EXIT_WBINVD:
- if (opcode == 0x90f)
- return ES_OK;
- break;
-
- default:
- break;
- }
-
- sev_printk(KERN_ERR "Wrong/unhandled opcode bytes: 0x%x, exit_code: 0x%lx, rIP: 0x%lx\n",
- opcode, exit_code, ctxt->regs->ip);
-
- return ES_UNSUPPORTED;
-}
-
/*
* Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM
* services needed when not running in VMPL0.
@@ -1681,7 +839,7 @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
* routine is running identity mapped when called, both by the decompressor
* code and the early kernel code.
*/
- if (!rmpadjust((unsigned long)&RIP_REL_REF(boot_ghcb_page), RMP_PG_SIZE_4K, 1))
+ if (!rmpadjust((unsigned long)rip_rel_ptr(&boot_ghcb_page), RMP_PG_SIZE_4K, 1))
return false;
/*
@@ -1698,7 +856,7 @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
if (!secrets_page->svsm_guest_vmpl)
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_VMPL0);
- RIP_REL_REF(snp_vmpl) = secrets_page->svsm_guest_vmpl;
+ snp_vmpl = secrets_page->svsm_guest_vmpl;
caa = secrets_page->svsm_caa;
@@ -1713,8 +871,8 @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
* The CA is identity mapped when this routine is called, both by the
* decompressor code and the early kernel code.
*/
- RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)caa;
- RIP_REL_REF(boot_svsm_caa_pa) = caa;
+ boot_svsm_caa = (struct svsm_ca *)caa;
+ boot_svsm_caa_pa = caa;
/* Advertise the SVSM presence via CPUID. */
cpuid_table = (struct snp_cpuid_table *)snp_cpuid_get_table();
diff --git a/arch/x86/boot/startup/sev-startup.c b/arch/x86/boot/startup/sev-startup.c
new file mode 100644
index 000000000000..0b7e3b950183
--- /dev/null
+++ b/arch/x86/boot/startup/sev-startup.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2019 SUSE
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#define pr_fmt(fmt) "SEV: " fmt
+
+#include <linux/percpu-defs.h>
+#include <linux/cc_platform.h>
+#include <linux/printk.h>
+#include <linux/mm_types.h>
+#include <linux/set_memory.h>
+#include <linux/memblock.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/cpumask.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/psp-sev.h>
+#include <uapi/linux/sev-guest.h>
+
+#include <asm/init.h>
+#include <asm/cpu_entry_area.h>
+#include <asm/stacktrace.h>
+#include <asm/sev.h>
+#include <asm/sev-internal.h>
+#include <asm/insn-eval.h>
+#include <asm/fpu/xcr.h>
+#include <asm/processor.h>
+#include <asm/realmode.h>
+#include <asm/setup.h>
+#include <asm/traps.h>
+#include <asm/svm.h>
+#include <asm/smp.h>
+#include <asm/cpu.h>
+#include <asm/apic.h>
+#include <asm/cpuid/api.h>
+#include <asm/cmdline.h>
+
+/* For early boot hypervisor communication in SEV-ES enabled guests */
+struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
+
+/*
+ * Needs to be in the .data section because we need it NULL before bss is
+ * cleared
+ */
+struct ghcb *boot_ghcb __section(".data");
+
+/* Bitmap of SEV features supported by the hypervisor */
+u64 sev_hv_features __ro_after_init;
+
+/* Secrets page physical address from the CC blob */
+u64 sev_secrets_pa __ro_after_init;
+
+/* For early boot SVSM communication */
+struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
+
+DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
+DEFINE_PER_CPU(u64, svsm_caa_pa);
+
+/*
+ * Nothing shall interrupt this code path while holding the per-CPU
+ * GHCB. The backup GHCB is only for NMIs interrupting this path.
+ *
+ * Callers must disable local interrupts around it.
+ */
+noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ WARN_ON(!irqs_disabled());
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (unlikely(data->ghcb_active)) {
+ /* GHCB is already in use - save its contents */
+
+ if (unlikely(data->backup_ghcb_active)) {
+ /*
+ * Backup-GHCB is also already in use. There is no way
+ * to continue here so just kill the machine. To make
+ * panic() work, mark GHCBs inactive so that messages
+ * can be printed out.
+ */
+ data->ghcb_active = false;
+ data->backup_ghcb_active = false;
+
+ instrumentation_begin();
+ panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+ instrumentation_end();
+ }
+
+ /* Mark backup_ghcb active before writing to it */
+ data->backup_ghcb_active = true;
+
+ state->ghcb = &data->backup_ghcb;
+
+ /* Backup GHCB content */
+ *state->ghcb = *ghcb;
+ } else {
+ state->ghcb = NULL;
+ data->ghcb_active = true;
+ }
+
+ return ghcb;
+}
+
+/* Include code shared with pre-decompression boot stage */
+#include "sev-shared.c"
+
+noinstr void __sev_put_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ WARN_ON(!irqs_disabled());
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (state->ghcb) {
+ /* Restore GHCB from Backup */
+ *ghcb = *state->ghcb;
+ data->backup_ghcb_active = false;
+ state->ghcb = NULL;
+ } else {
+ /*
+ * Invalidate the GHCB so a VMGEXIT instruction issued
+ * from userspace won't appear to be valid.
+ */
+ vc_ghcb_invalidate(ghcb);
+ data->ghcb_active = false;
+ }
+}
+
+int svsm_perform_call_protocol(struct svsm_call *call)
+{
+ struct ghcb_state state;
+ unsigned long flags;
+ struct ghcb *ghcb;
+ int ret;
+
+ /*
+ * This can be called very early in the boot, use native functions in
+ * order to avoid paravirt issues.
+ */
+ flags = native_local_irq_save();
+
+ if (sev_cfg.ghcbs_initialized)
+ ghcb = __sev_get_ghcb(&state);
+ else if (boot_ghcb)
+ ghcb = boot_ghcb;
+ else
+ ghcb = NULL;
+
+ do {
+ ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
+ : svsm_perform_msr_protocol(call);
+ } while (ret == -EAGAIN);
+
+ if (sev_cfg.ghcbs_initialized)
+ __sev_put_ghcb(&state);
+
+ native_local_irq_restore(flags);
+
+ return ret;
+}
+
+void __head
+early_set_pages_state(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages, enum psc_op op)
+{
+ unsigned long paddr_end;
+ u64 val;
+
+ vaddr = vaddr & PAGE_MASK;
+
+ paddr = paddr & PAGE_MASK;
+ paddr_end = paddr + (npages << PAGE_SHIFT);
+
+ while (paddr < paddr_end) {
+ /* Page validation must be rescinded before changing to shared */
+ if (op == SNP_PAGE_STATE_SHARED)
+ pvalidate_4k_page(vaddr, paddr, false);
+
+ /*
+ * Use the MSR protocol because this function can be called before
+ * the GHCB is established.
+ */
+ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
+ VMGEXIT();
+
+ val = sev_es_rd_ghcb_msr();
+
+ if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP)
+ goto e_term;
+
+ if (GHCB_MSR_PSC_RESP_VAL(val))
+ goto e_term;
+
+ /* Page validation must be performed after changing to private */
+ if (op == SNP_PAGE_STATE_PRIVATE)
+ pvalidate_4k_page(vaddr, paddr, true);
+
+ vaddr += PAGE_SIZE;
+ paddr += PAGE_SIZE;
+ }
+
+ return;
+
+e_term:
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+}
+
+void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages)
+{
+ /*
+ * This can be invoked in early boot while running identity mapped, so
+ * use an open coded check for SNP instead of using cc_platform_has().
+ * This eliminates worries about jump tables or checking boot_cpu_data
+ * in the cc_platform_has() function.
+ */
+ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ return;
+
+ /*
+ * Ask the hypervisor to mark the memory pages as private in the RMP
+ * table.
+ */
+ early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE);
+}
+
+void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages)
+{
+ /*
+ * This can be invoked in early boot while running identity mapped, so
+ * use an open coded check for SNP instead of using cc_platform_has().
+ * This eliminates worries about jump tables or checking boot_cpu_data
+ * in the cc_platform_has() function.
+ */
+ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ return;
+
+ /* Ask hypervisor to mark the memory pages shared in the RMP table. */
+ early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
+}
+
+/*
+ * Initial set up of SNP relies on information provided by the
+ * Confidential Computing blob, which can be passed to the kernel
+ * in the following ways, depending on how it is booted:
+ *
+ * - when booted via the boot/decompress kernel:
+ * - via boot_params
+ *
+ * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH):
+ * - via a setup_data entry, as defined by the Linux Boot Protocol
+ *
+ * Scan for the blob in that order.
+ */
+static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+{
+ struct cc_blob_sev_info *cc_info;
+
+ /* Boot kernel would have passed the CC blob via boot_params. */
+ if (bp->cc_blob_address) {
+ cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address;
+ goto found_cc_info;
+ }
+
+ /*
+ * If kernel was booted directly, without the use of the
+ * boot/decompression kernel, the CC blob may have been passed via
+ * setup_data instead.
+ */
+ cc_info = find_cc_blob_setup_data(bp);
+ if (!cc_info)
+ return NULL;
+
+found_cc_info:
+ if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
+ snp_abort();
+
+ return cc_info;
+}
+
+static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
+{
+ struct svsm_call call = {};
+ int ret;
+ u64 pa;
+
+ /*
+ * Record the SVSM Calling Area address (CAA) if the guest is not
+ * running at VMPL0. The CA will be used to communicate with the
+ * SVSM to perform the SVSM services.
+ */
+ if (!svsm_setup_ca(cc_info))
+ return;
+
+ /*
+ * It is very early in the boot and the kernel is running identity
+ * mapped but without having adjusted the pagetables to where the
+ * kernel was loaded (physbase), so the get the CA address using
+ * RIP-relative addressing.
+ */
+ pa = (u64)rip_rel_ptr(&boot_svsm_ca_page);
+
+ /*
+ * Switch over to the boot SVSM CA while the current CA is still
+ * addressable. There is no GHCB at this point so use the MSR protocol.
+ *
+ * SVSM_CORE_REMAP_CA call:
+ * RAX = 0 (Protocol=0, CallID=0)
+ * RCX = New CA GPA
+ */
+ call.caa = svsm_get_caa();
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
+ call.rcx = pa;
+ ret = svsm_perform_call_protocol(&call);
+ if (ret)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL);
+
+ boot_svsm_caa = (struct svsm_ca *)pa;
+ boot_svsm_caa_pa = pa;
+}
+
+bool __head snp_init(struct boot_params *bp)
+{
+ struct cc_blob_sev_info *cc_info;
+
+ if (!bp)
+ return false;
+
+ cc_info = find_cc_blob(bp);
+ if (!cc_info)
+ return false;
+
+ if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE)
+ sev_secrets_pa = cc_info->secrets_phys;
+ else
+ return false;
+
+ setup_cpuid_table(cc_info);
+
+ svsm_setup(cc_info);
+
+ /*
+ * The CC blob will be used later to access the secrets page. Cache
+ * it here like the boot kernel does.
+ */
+ bp->cc_blob_address = (u32)(unsigned long)cc_info;
+
+ return true;
+}
+
+void __head __noreturn snp_abort(void)
+{
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+}
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/boot/startup/sme.c
index 5eecdd92da10..70ea1748c0a7 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/boot/startup/sme.c
@@ -45,8 +45,6 @@
#include <asm/coco.h>
#include <asm/sev.h>
-#include "mm_internal.h"
-
#define PGD_FLAGS _KERNPG_TABLE_NOENC
#define P4D_FLAGS _KERNPG_TABLE_NOENC
#define PUD_FLAGS _KERNPG_TABLE_NOENC
@@ -299,8 +297,7 @@ void __head sme_encrypt_kernel(struct boot_params *bp)
* instrumentation or checking boot_cpu_data in the cc_platform_has()
* function.
*/
- if (!sme_get_me_mask() ||
- RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED)
+ if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED)
return;
/*
@@ -318,8 +315,8 @@ void __head sme_encrypt_kernel(struct boot_params *bp)
* memory from being cached.
*/
- kernel_start = (unsigned long)RIP_REL_REF(_text);
- kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE);
+ kernel_start = (unsigned long)rip_rel_ptr(_text);
+ kernel_end = ALIGN((unsigned long)rip_rel_ptr(_end), PMD_SIZE);
kernel_len = kernel_end - kernel_start;
initrd_start = 0;
@@ -345,7 +342,7 @@ void __head sme_encrypt_kernel(struct boot_params *bp)
* pagetable structures for the encryption of the kernel
* pagetable structures for workarea (in case not currently mapped)
*/
- execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea);
+ execute_start = workarea_start = (unsigned long)rip_rel_ptr(sme_workarea);
execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE;
execute_len = execute_end - execute_start;
@@ -526,7 +523,7 @@ void __head sme_enable(struct boot_params *bp)
me_mask = 1UL << (ebx & 0x3f);
/* Check the SEV MSR whether SEV or SME is enabled */
- RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV);
+ sev_status = msr = native_rdmsrq(MSR_AMD64_SEV);
feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
/*
@@ -557,13 +554,22 @@ void __head sme_enable(struct boot_params *bp)
return;
/* For SME, check the SYSCFG MSR */
- msr = __rdmsr(MSR_AMD64_SYSCFG);
+ msr = native_rdmsrq(MSR_AMD64_SYSCFG);
if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
return;
}
- RIP_REL_REF(sme_me_mask) = me_mask;
- RIP_REL_REF(physical_mask) &= ~me_mask;
- RIP_REL_REF(cc_vendor) = CC_VENDOR_AMD;
+ sme_me_mask = me_mask;
+ physical_mask &= ~me_mask;
+ cc_vendor = CC_VENDOR_AMD;
cc_set_mask(me_mask);
}
+
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
+/* Local version for startup code, which never operates on user page tables */
+__weak
+pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
+{
+ return pgd;
+}
+#endif
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 84f7a883ce1e..f35369bb14c5 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -32,7 +32,7 @@
int memcmp(const void *s1, const void *s2, size_t len)
{
bool diff;
- asm("repe; cmpsb" CC_SET(nz)
+ asm("repe cmpsb" CC_SET(nz)
: CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index f2e96905b3fe..0641c8c46aee 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -292,7 +292,7 @@ static void restore_screen(void)
"shrw %%cx ; "
"jnc 1f ; "
"stosw \n\t"
- "1: rep;stosl ; "
+ "1: rep stosl ; "
"popw %%es"
: "+D" (dst), "+c" (npad)
: "bdS" (video_segment),
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index 9a0ddda3aa69..d4610af68114 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -18,7 +18,9 @@
#include <asm/processor.h>
enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE;
+SYM_PIC_ALIAS(cc_vendor);
u64 cc_mask __ro_after_init;
+SYM_PIC_ALIAS(cc_mask);
static struct cc_attr_flags {
__u64 host_sev_snp : 1,
diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile
index dcb06dc8b5ae..db3255b979bd 100644
--- a/arch/x86/coco/sev/Makefile
+++ b/arch/x86/coco/sev/Makefile
@@ -1,22 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y += core.o
-
-# jump tables are emitted using absolute references in non-PIC code
-# so they cannot be used in the early SEV startup code
-CFLAGS_core.o += -fno-jump-tables
-
-ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_core.o = -pg
-endif
-
-KASAN_SANITIZE_core.o := n
-KMSAN_SANITIZE_core.o := n
-KCOV_INSTRUMENT_core.o := n
-
-# With some compiler versions the generated code results in boot hangs, caused
-# by several compilation units. To be safe, disable all instrumentation.
-KCSAN_SANITIZE := n
+obj-y += core.o sev-nmi.o vc-handle.o
# Clang 14 and older may fail to respect __no_sanitize_undefined when inlining
-UBSAN_SANITIZE := n
+UBSAN_SANITIZE_sev-nmi.o := n
+
+# GCC may fail to respect __no_sanitize_address when inlining
+KASAN_SANITIZE_sev-nmi.o := n
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index b0c1a7a57497..fbc1215d2746 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -31,6 +31,7 @@
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
#include <asm/sev.h>
+#include <asm/sev-internal.h>
#include <asm/insn-eval.h>
#include <asm/fpu/xcr.h>
#include <asm/processor.h>
@@ -41,10 +42,9 @@
#include <asm/smp.h>
#include <asm/cpu.h>
#include <asm/apic.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/cmdline.h>
-
-#define DR7_RESET_VALUE 0x400
+#include <asm/msr.h>
/* AP INIT values as documented in the APM2 section "Processor Initialization State" */
#define AP_INIT_CS_LIMIT 0xffff
@@ -81,21 +81,6 @@ static const char * const sev_status_feat_names[] = {
[MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
};
-/* For early boot hypervisor communication in SEV-ES enabled guests */
-static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
-
-/*
- * Needs to be in the .data section because we need it NULL before bss is
- * cleared
- */
-static struct ghcb *boot_ghcb __section(".data");
-
-/* Bitmap of SEV features supported by the hypervisor */
-static u64 sev_hv_features __ro_after_init;
-
-/* Secrets page physical address from the CC blob */
-static u64 secrets_pa __ro_after_init;
-
/*
* For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and
* initializes snp_tsc_scale and snp_tsc_offset. These values are replicated
@@ -105,558 +90,196 @@ static u64 snp_tsc_scale __ro_after_init;
static u64 snp_tsc_offset __ro_after_init;
static u64 snp_tsc_freq_khz __ro_after_init;
-/* #VC handler runtime per-CPU data */
-struct sev_es_runtime_data {
- struct ghcb ghcb_page;
-
- /*
- * Reserve one page per CPU as backup storage for the unencrypted GHCB.
- * It is needed when an NMI happens while the #VC handler uses the real
- * GHCB, and the NMI handler itself is causing another #VC exception. In
- * that case the GHCB content of the first handler needs to be backed up
- * and restored.
- */
- struct ghcb backup_ghcb;
-
- /*
- * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
- * There is no need for it to be atomic, because nothing is written to
- * the GHCB between the read and the write of ghcb_active. So it is safe
- * to use it when a nested #VC exception happens before the write.
- *
- * This is necessary for example in the #VC->NMI->#VC case when the NMI
- * happens while the first #VC handler uses the GHCB. When the NMI code
- * raises a second #VC handler it might overwrite the contents of the
- * GHCB written by the first handler. To avoid this the content of the
- * GHCB is saved and restored when the GHCB is detected to be in use
- * already.
- */
- bool ghcb_active;
- bool backup_ghcb_active;
-
- /*
- * Cached DR7 value - write it on DR7 writes and return it on reads.
- * That value will never make it to the real hardware DR7 as debugging
- * is currently unsupported in SEV-ES guests.
- */
- unsigned long dr7;
-};
-
-struct ghcb_state {
- struct ghcb *ghcb;
-};
-
-/* For early boot SVSM communication */
-static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
-
-static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
-static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
-static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
-static DEFINE_PER_CPU(u64, svsm_caa_pa);
-
-static __always_inline bool on_vc_stack(struct pt_regs *regs)
-{
- unsigned long sp = regs->sp;
-
- /* User-mode RSP is not trusted */
- if (user_mode(regs))
- return false;
-
- /* SYSCALL gap still has user-mode RSP */
- if (ip_within_syscall_gap(regs))
- return false;
-
- return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
-}
+DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
+DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
/*
- * This function handles the case when an NMI is raised in the #VC
- * exception handler entry code, before the #VC handler has switched off
- * its IST stack. In this case, the IST entry for #VC must be adjusted,
- * so that any nested #VC exception will not overwrite the stack
- * contents of the interrupted #VC handler.
- *
- * The IST entry is adjusted unconditionally so that it can be also be
- * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
- * nested sev_es_ist_exit() call may adjust back the IST entry too
- * early.
- *
- * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
- * on the NMI IST stack, as they are only called from NMI handling code
- * right now.
+ * SVSM related information:
+ * When running under an SVSM, the VMPL that Linux is executing at must be
+ * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
*/
-void noinstr __sev_es_ist_enter(struct pt_regs *regs)
-{
- unsigned long old_ist, new_ist;
-
- /* Read old IST entry */
- new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+u8 snp_vmpl __ro_after_init;
+EXPORT_SYMBOL_GPL(snp_vmpl);
- /*
- * If NMI happened while on the #VC IST stack, set the new IST
- * value below regs->sp, so that the interrupted stack frame is
- * not overwritten by subsequent #VC exceptions.
- */
- if (on_vc_stack(regs))
- new_ist = regs->sp;
-
- /*
- * Reserve additional 8 bytes and store old IST value so this
- * adjustment can be unrolled in __sev_es_ist_exit().
- */
- new_ist -= sizeof(old_ist);
- *(unsigned long *)new_ist = old_ist;
-
- /* Set new IST entry */
- this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
-}
-
-void noinstr __sev_es_ist_exit(void)
+static u64 __init get_snp_jump_table_addr(void)
{
- unsigned long ist;
+ struct snp_secrets_page *secrets;
+ void __iomem *mem;
+ u64 addr;
- /* Read IST entry */
- ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+ mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
+ if (!mem) {
+ pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
+ return 0;
+ }
- if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
- return;
+ secrets = (__force struct snp_secrets_page *)mem;
+
+ addr = secrets->os_area.ap_jump_table_pa;
+ iounmap(mem);
- /* Read back old IST entry and write it to the TSS */
- this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
+ return addr;
}
-/*
- * Nothing shall interrupt this code path while holding the per-CPU
- * GHCB. The backup GHCB is only for NMIs interrupting this path.
- *
- * Callers must disable local interrupts around it.
- */
-static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
+static u64 __init get_jump_table_addr(void)
{
- struct sev_es_runtime_data *data;
+ struct ghcb_state state;
+ unsigned long flags;
struct ghcb *ghcb;
+ u64 ret = 0;
- WARN_ON(!irqs_disabled());
-
- data = this_cpu_read(runtime_data);
- ghcb = &data->ghcb_page;
-
- if (unlikely(data->ghcb_active)) {
- /* GHCB is already in use - save its contents */
-
- if (unlikely(data->backup_ghcb_active)) {
- /*
- * Backup-GHCB is also already in use. There is no way
- * to continue here so just kill the machine. To make
- * panic() work, mark GHCBs inactive so that messages
- * can be printed out.
- */
- data->ghcb_active = false;
- data->backup_ghcb_active = false;
-
- instrumentation_begin();
- panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
- instrumentation_end();
- }
-
- /* Mark backup_ghcb active before writing to it */
- data->backup_ghcb_active = true;
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return get_snp_jump_table_addr();
- state->ghcb = &data->backup_ghcb;
+ local_irq_save(flags);
- /* Backup GHCB content */
- *state->ghcb = *ghcb;
- } else {
- state->ghcb = NULL;
- data->ghcb_active = true;
- }
+ ghcb = __sev_get_ghcb(&state);
- return ghcb;
-}
+ vc_ghcb_invalidate(ghcb);
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
+ ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
-static inline u64 sev_es_rd_ghcb_msr(void)
-{
- return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
-}
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
-static __always_inline void sev_es_wr_ghcb_msr(u64 val)
-{
- u32 low, high;
+ if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
+ ghcb_sw_exit_info_2_is_valid(ghcb))
+ ret = ghcb->save.sw_exit_info_2;
- low = (u32)(val);
- high = (u32)(val >> 32);
+ __sev_put_ghcb(&state);
- native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
-}
+ local_irq_restore(flags);
-static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
- unsigned char *buffer)
-{
- return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
+ return ret;
}
-static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
+static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
+ int ret, u64 svsm_ret)
{
- char buffer[MAX_INSN_SIZE];
- int insn_bytes;
-
- insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
- if (insn_bytes == 0) {
- /* Nothing could be copied */
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
- ctxt->fi.cr2 = ctxt->regs->ip;
- return ES_EXCEPTION;
- } else if (insn_bytes == -EINVAL) {
- /* Effective RIP could not be calculated */
- ctxt->fi.vector = X86_TRAP_GP;
- ctxt->fi.error_code = 0;
- ctxt->fi.cr2 = 0;
- return ES_EXCEPTION;
- }
+ WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
+ pfn, action, page_size, ret, svsm_ret);
- if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
- return ES_DECODE_FAILED;
-
- if (ctxt->insn.immediate.got)
- return ES_OK;
- else
- return ES_DECODE_FAILED;
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
}
-static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
+static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
{
- char buffer[MAX_INSN_SIZE];
- int res, ret;
+ unsigned int page_size;
+ bool action;
+ u64 pfn;
- res = vc_fetch_insn_kernel(ctxt, buffer);
- if (res) {
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.error_code = X86_PF_INSTR;
- ctxt->fi.cr2 = ctxt->regs->ip;
- return ES_EXCEPTION;
- }
+ pfn = pc->entry[pc->cur_index].pfn;
+ action = pc->entry[pc->cur_index].action;
+ page_size = pc->entry[pc->cur_index].page_size;
- ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
- if (ret < 0)
- return ES_DECODE_FAILED;
- else
- return ES_OK;
+ __pval_terminate(pfn, action, page_size, ret, svsm_ret);
}
-static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+static void pval_pages(struct snp_psc_desc *desc)
{
- if (user_mode(ctxt->regs))
- return __vc_decode_user_insn(ctxt);
- else
- return __vc_decode_kern_insn(ctxt);
-}
+ struct psc_entry *e;
+ unsigned long vaddr;
+ unsigned int size;
+ unsigned int i;
+ bool validate;
+ u64 pfn;
+ int rc;
-static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
- char *dst, char *buf, size_t size)
-{
- unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
+ for (i = 0; i <= desc->hdr.end_entry; i++) {
+ e = &desc->entries[i];
- /*
- * This function uses __put_user() independent of whether kernel or user
- * memory is accessed. This works fine because __put_user() does no
- * sanity checks of the pointer being accessed. All that it does is
- * to report when the access failed.
- *
- * Also, this function runs in atomic context, so __put_user() is not
- * allowed to sleep. The page-fault handler detects that it is running
- * in atomic context and will not try to take mmap_sem and handle the
- * fault, so additional pagefault_enable()/disable() calls are not
- * needed.
- *
- * The access can't be done via copy_to_user() here because
- * vc_write_mem() must not use string instructions to access unsafe
- * memory. The reason is that MOVS is emulated by the #VC handler by
- * splitting the move up into a read and a write and taking a nested #VC
- * exception on whatever of them is the MMIO access. Using string
- * instructions here would cause infinite nesting.
- */
- switch (size) {
- case 1: {
- u8 d1;
- u8 __user *target = (u8 __user *)dst;
-
- memcpy(&d1, buf, 1);
- if (__put_user(d1, target))
- goto fault;
- break;
- }
- case 2: {
- u16 d2;
- u16 __user *target = (u16 __user *)dst;
+ pfn = e->gfn;
+ vaddr = (unsigned long)pfn_to_kaddr(pfn);
+ size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
+ validate = e->operation == SNP_PAGE_STATE_PRIVATE;
- memcpy(&d2, buf, 2);
- if (__put_user(d2, target))
- goto fault;
- break;
- }
- case 4: {
- u32 d4;
- u32 __user *target = (u32 __user *)dst;
+ rc = pvalidate(vaddr, size, validate);
+ if (!rc)
+ continue;
- memcpy(&d4, buf, 4);
- if (__put_user(d4, target))
- goto fault;
- break;
- }
- case 8: {
- u64 d8;
- u64 __user *target = (u64 __user *)dst;
+ if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
+ unsigned long vaddr_end = vaddr + PMD_SIZE;
- memcpy(&d8, buf, 8);
- if (__put_user(d8, target))
- goto fault;
- break;
- }
- default:
- WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
- return ES_UNSUPPORTED;
+ for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
+ rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
+ if (rc)
+ __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
+ }
+ } else {
+ __pval_terminate(pfn, validate, size, rc, 0);
+ }
}
-
- return ES_OK;
-
-fault:
- if (user_mode(ctxt->regs))
- error_code |= X86_PF_USER;
-
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.error_code = error_code;
- ctxt->fi.cr2 = (unsigned long)dst;
-
- return ES_EXCEPTION;
}
-static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
- char *src, char *buf, size_t size)
+static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
+ struct svsm_pvalidate_call *pc)
{
- unsigned long error_code = X86_PF_PROT;
+ struct svsm_pvalidate_entry *pe;
- /*
- * This function uses __get_user() independent of whether kernel or user
- * memory is accessed. This works fine because __get_user() does no
- * sanity checks of the pointer being accessed. All that it does is
- * to report when the access failed.
- *
- * Also, this function runs in atomic context, so __get_user() is not
- * allowed to sleep. The page-fault handler detects that it is running
- * in atomic context and will not try to take mmap_sem and handle the
- * fault, so additional pagefault_enable()/disable() calls are not
- * needed.
- *
- * The access can't be done via copy_from_user() here because
- * vc_read_mem() must not use string instructions to access unsafe
- * memory. The reason is that MOVS is emulated by the #VC handler by
- * splitting the move up into a read and a write and taking a nested #VC
- * exception on whatever of them is the MMIO access. Using string
- * instructions here would cause infinite nesting.
- */
- switch (size) {
- case 1: {
- u8 d1;
- u8 __user *s = (u8 __user *)src;
-
- if (__get_user(d1, s))
- goto fault;
- memcpy(buf, &d1, 1);
- break;
- }
- case 2: {
- u16 d2;
- u16 __user *s = (u16 __user *)src;
-
- if (__get_user(d2, s))
- goto fault;
- memcpy(buf, &d2, 2);
- break;
- }
- case 4: {
- u32 d4;
- u32 __user *s = (u32 __user *)src;
-
- if (__get_user(d4, s))
- goto fault;
- memcpy(buf, &d4, 4);
- break;
- }
- case 8: {
- u64 d8;
- u64 __user *s = (u64 __user *)src;
- if (__get_user(d8, s))
- goto fault;
- memcpy(buf, &d8, 8);
- break;
- }
- default:
- WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
- return ES_UNSUPPORTED;
- }
+ /* Nothing in the CA yet */
+ pc->num_entries = 0;
+ pc->cur_index = 0;
- return ES_OK;
+ pe = &pc->entry[0];
-fault:
- if (user_mode(ctxt->regs))
- error_code |= X86_PF_USER;
+ while (pfn < pfn_end) {
+ pe->page_size = RMP_PG_SIZE_4K;
+ pe->action = action;
+ pe->ignore_cf = 0;
+ pe->pfn = pfn;
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.error_code = error_code;
- ctxt->fi.cr2 = (unsigned long)src;
+ pe++;
+ pfn++;
- return ES_EXCEPTION;
-}
-
-static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
- unsigned long vaddr, phys_addr_t *paddr)
-{
- unsigned long va = (unsigned long)vaddr;
- unsigned int level;
- phys_addr_t pa;
- pgd_t *pgd;
- pte_t *pte;
-
- pgd = __va(read_cr3_pa());
- pgd = &pgd[pgd_index(va)];
- pte = lookup_address_in_pgd(pgd, va, &level);
- if (!pte) {
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.cr2 = vaddr;
- ctxt->fi.error_code = 0;
-
- if (user_mode(ctxt->regs))
- ctxt->fi.error_code |= X86_PF_USER;
-
- return ES_EXCEPTION;
+ pc->num_entries++;
+ if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
+ break;
}
- if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
- /* Emulated MMIO to/from encrypted memory not supported */
- return ES_UNSUPPORTED;
-
- pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
- pa |= va & ~page_level_mask(level);
-
- *paddr = pa;
-
- return ES_OK;
+ return pfn;
}
-static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
+static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
+ struct svsm_pvalidate_call *pc)
{
- BUG_ON(size > 4);
-
- if (user_mode(ctxt->regs)) {
- struct thread_struct *t = &current->thread;
- struct io_bitmap *iobm = t->io_bitmap;
- size_t idx;
-
- if (!iobm)
- goto fault;
-
- for (idx = port; idx < port + size; ++idx) {
- if (test_bit(idx, iobm->bitmap))
- goto fault;
- }
- }
-
- return ES_OK;
+ struct svsm_pvalidate_entry *pe;
+ struct psc_entry *e;
-fault:
- ctxt->fi.vector = X86_TRAP_GP;
- ctxt->fi.error_code = 0;
+ /* Nothing in the CA yet */
+ pc->num_entries = 0;
+ pc->cur_index = 0;
- return ES_EXCEPTION;
-}
+ pe = &pc->entry[0];
+ e = &desc->entries[desc_entry];
-static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
-{
- long error_code = ctxt->fi.error_code;
- int trapnr = ctxt->fi.vector;
+ while (desc_entry <= desc->hdr.end_entry) {
+ pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
+ pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
+ pe->ignore_cf = 0;
+ pe->pfn = e->gfn;
- ctxt->regs->orig_ax = ctxt->fi.error_code;
+ pe++;
+ e++;
- switch (trapnr) {
- case X86_TRAP_GP:
- exc_general_protection(ctxt->regs, error_code);
- break;
- case X86_TRAP_UD:
- exc_invalid_op(ctxt->regs);
- break;
- case X86_TRAP_PF:
- write_cr2(ctxt->fi.cr2);
- exc_page_fault(ctxt->regs, error_code);
- break;
- case X86_TRAP_AC:
- exc_alignment_check(ctxt->regs, error_code);
- break;
- default:
- pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
- BUG();
+ desc_entry++;
+ pc->num_entries++;
+ if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
+ break;
}
-}
-
-/* Include code shared with pre-decompression boot stage */
-#include "shared.c"
-
-static inline struct svsm_ca *svsm_get_caa(void)
-{
- /*
- * Use rIP-relative references when called early in the boot. If
- * ->use_cas is set, then it is late in the boot and no need
- * to worry about rIP-relative references.
- */
- if (RIP_REL_REF(sev_cfg).use_cas)
- return this_cpu_read(svsm_caa);
- else
- return RIP_REL_REF(boot_svsm_caa);
-}
-static u64 svsm_get_caa_pa(void)
-{
- /*
- * Use rIP-relative references when called early in the boot. If
- * ->use_cas is set, then it is late in the boot and no need
- * to worry about rIP-relative references.
- */
- if (RIP_REL_REF(sev_cfg).use_cas)
- return this_cpu_read(svsm_caa_pa);
- else
- return RIP_REL_REF(boot_svsm_caa_pa);
+ return desc_entry;
}
-static noinstr void __sev_put_ghcb(struct ghcb_state *state)
+static void svsm_pval_pages(struct snp_psc_desc *desc)
{
- struct sev_es_runtime_data *data;
- struct ghcb *ghcb;
-
- WARN_ON(!irqs_disabled());
-
- data = this_cpu_read(runtime_data);
- ghcb = &data->ghcb_page;
-
- if (state->ghcb) {
- /* Restore GHCB from Backup */
- *ghcb = *state->ghcb;
- data->backup_ghcb_active = false;
- state->ghcb = NULL;
- } else {
- /*
- * Invalidate the GHCB so a VMGEXIT instruction issued
- * from userspace won't appear to be valid.
- */
- vc_ghcb_invalidate(ghcb);
- data->ghcb_active = false;
- }
-}
-
-static int svsm_perform_call_protocol(struct svsm_call *call)
-{
- struct ghcb_state state;
+ struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
+ unsigned int i, pv_4k_count = 0;
+ struct svsm_pvalidate_call *pc;
+ struct svsm_call call = {};
unsigned long flags;
- struct ghcb *ghcb;
+ bool action;
+ u64 pc_pa;
int ret;
/*
@@ -666,180 +289,145 @@ static int svsm_perform_call_protocol(struct svsm_call *call)
flags = native_local_irq_save();
/*
- * Use rip-relative references when called early in the boot. If
- * ghcbs_initialized is set, then it is late in the boot and no need
- * to worry about rip-relative references in called functions.
+ * The SVSM calling area (CA) can support processing 510 entries at a
+ * time. Loop through the Page State Change descriptor until the CA is
+ * full or the last entry in the descriptor is reached, at which time
+ * the SVSM is invoked. This repeats until all entries in the descriptor
+ * are processed.
*/
- if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
- ghcb = __sev_get_ghcb(&state);
- else if (RIP_REL_REF(boot_ghcb))
- ghcb = RIP_REL_REF(boot_ghcb);
- else
- ghcb = NULL;
+ call.caa = svsm_get_caa();
- do {
- ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
- : svsm_perform_msr_protocol(call);
- } while (ret == -EAGAIN);
+ pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
+ pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
- if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
- __sev_put_ghcb(&state);
+ /* Protocol 0, Call ID 1 */
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
+ call.rcx = pc_pa;
- native_local_irq_restore(flags);
+ for (i = 0; i <= desc->hdr.end_entry;) {
+ i = svsm_build_ca_from_psc_desc(desc, i, pc);
- return ret;
-}
+ do {
+ ret = svsm_perform_call_protocol(&call);
+ if (!ret)
+ continue;
-void noinstr __sev_es_nmi_complete(void)
-{
- struct ghcb_state state;
- struct ghcb *ghcb;
+ /*
+ * Check if the entry failed because of an RMP mismatch (a
+ * PVALIDATE at 2M was requested, but the page is mapped in
+ * the RMP as 4K).
+ */
- ghcb = __sev_get_ghcb(&state);
+ if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
+ pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
+ /* Save this entry for post-processing at 4K */
+ pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
+
+ /* Skip to the next one unless at the end of the list */
+ pc->cur_index++;
+ if (pc->cur_index < pc->num_entries)
+ ret = -EAGAIN;
+ else
+ ret = 0;
+ }
+ } while (ret == -EAGAIN);
- vc_ghcb_invalidate(ghcb);
- ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
- ghcb_set_sw_exit_info_1(ghcb, 0);
- ghcb_set_sw_exit_info_2(ghcb, 0);
+ if (ret)
+ svsm_pval_terminate(pc, ret, call.rax_out);
+ }
- sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
- VMGEXIT();
+ /* Process any entries that failed to be validated at 2M and validate them at 4K */
+ for (i = 0; i < pv_4k_count; i++) {
+ u64 pfn, pfn_end;
- __sev_put_ghcb(&state);
-}
+ action = pv_4k[i].action;
+ pfn = pv_4k[i].pfn;
+ pfn_end = pfn + 512;
-static u64 __init get_snp_jump_table_addr(void)
-{
- struct snp_secrets_page *secrets;
- void __iomem *mem;
- u64 addr;
+ while (pfn < pfn_end) {
+ pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
- mem = ioremap_encrypted(secrets_pa, PAGE_SIZE);
- if (!mem) {
- pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
- return 0;
+ ret = svsm_perform_call_protocol(&call);
+ if (ret)
+ svsm_pval_terminate(pc, ret, call.rax_out);
+ }
}
- secrets = (__force struct snp_secrets_page *)mem;
-
- addr = secrets->os_area.ap_jump_table_pa;
- iounmap(mem);
-
- return addr;
+ native_local_irq_restore(flags);
}
-static u64 __init get_jump_table_addr(void)
+static void pvalidate_pages(struct snp_psc_desc *desc)
{
- struct ghcb_state state;
- unsigned long flags;
- struct ghcb *ghcb;
- u64 ret = 0;
-
- if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
- return get_snp_jump_table_addr();
-
- local_irq_save(flags);
+ if (snp_vmpl)
+ svsm_pval_pages(desc);
+ else
+ pval_pages(desc);
+}
- ghcb = __sev_get_ghcb(&state);
+static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
+{
+ int cur_entry, end_entry, ret = 0;
+ struct snp_psc_desc *data;
+ struct es_em_ctxt ctxt;
vc_ghcb_invalidate(ghcb);
- ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
- ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
- ghcb_set_sw_exit_info_2(ghcb, 0);
-
- sev_es_wr_ghcb_msr(__pa(ghcb));
- VMGEXIT();
-
- if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
- ghcb_sw_exit_info_2_is_valid(ghcb))
- ret = ghcb->save.sw_exit_info_2;
-
- __sev_put_ghcb(&state);
-
- local_irq_restore(flags);
-
- return ret;
-}
-static void __head
-early_set_pages_state(unsigned long vaddr, unsigned long paddr,
- unsigned long npages, enum psc_op op)
-{
- unsigned long paddr_end;
- u64 val;
+ /* Copy the input desc into GHCB shared buffer */
+ data = (struct snp_psc_desc *)ghcb->shared_buffer;
+ memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
- vaddr = vaddr & PAGE_MASK;
+ /*
+ * As per the GHCB specification, the hypervisor can resume the guest
+ * before processing all the entries. Check whether all the entries
+ * are processed. If not, then keep retrying. Note, the hypervisor
+ * will update the data memory directly to indicate the status, so
+ * reference the data->hdr everywhere.
+ *
+ * The strategy here is to wait for the hypervisor to change the page
+ * state in the RMP table before guest accesses the memory pages. If the
+ * page state change was not successful, then later memory access will
+ * result in a crash.
+ */
+ cur_entry = data->hdr.cur_entry;
+ end_entry = data->hdr.end_entry;
- paddr = paddr & PAGE_MASK;
- paddr_end = paddr + (npages << PAGE_SHIFT);
+ while (data->hdr.cur_entry <= data->hdr.end_entry) {
+ ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
- while (paddr < paddr_end) {
- /* Page validation must be rescinded before changing to shared */
- if (op == SNP_PAGE_STATE_SHARED)
- pvalidate_4k_page(vaddr, paddr, false);
+ /* This will advance the shared buffer data points to. */
+ ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
/*
- * Use the MSR protocol because this function can be called before
- * the GHCB is established.
+ * Page State Change VMGEXIT can pass error code through
+ * exit_info_2.
*/
- sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
- VMGEXIT();
-
- val = sev_es_rd_ghcb_msr();
-
- if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP)
- goto e_term;
-
- if (GHCB_MSR_PSC_RESP_VAL(val))
- goto e_term;
+ if (WARN(ret || ghcb->save.sw_exit_info_2,
+ "SNP: PSC failed ret=%d exit_info_2=%llx\n",
+ ret, ghcb->save.sw_exit_info_2)) {
+ ret = 1;
+ goto out;
+ }
- /* Page validation must be performed after changing to private */
- if (op == SNP_PAGE_STATE_PRIVATE)
- pvalidate_4k_page(vaddr, paddr, true);
+ /* Verify that reserved bit is not set */
+ if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
+ ret = 1;
+ goto out;
+ }
- vaddr += PAGE_SIZE;
- paddr += PAGE_SIZE;
+ /*
+ * Sanity check that entry processing is not going backwards.
+ * This will happen only if hypervisor is tricking us.
+ */
+ if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
+"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
+ end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
+ ret = 1;
+ goto out;
+ }
}
- return;
-
-e_term:
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
-}
-
-void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
- unsigned long npages)
-{
- /*
- * This can be invoked in early boot while running identity mapped, so
- * use an open coded check for SNP instead of using cc_platform_has().
- * This eliminates worries about jump tables or checking boot_cpu_data
- * in the cc_platform_has() function.
- */
- if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
- return;
-
- /*
- * Ask the hypervisor to mark the memory pages as private in the RMP
- * table.
- */
- early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE);
-}
-
-void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
- unsigned long npages)
-{
- /*
- * This can be invoked in early boot while running identity mapped, so
- * use an open coded check for SNP instead of using cc_platform_has().
- * This eliminates worries about jump tables or checking boot_cpu_data
- * in the cc_platform_has() function.
- */
- if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
- return;
-
- /* Ask hypervisor to mark the memory pages shared in the RMP table. */
- early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
+out:
+ return ret;
}
static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
@@ -959,6 +547,102 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
}
+static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
+{
+ bool create = event != SVM_VMGEXIT_AP_DESTROY;
+ struct ghcb_state state;
+ unsigned long flags;
+ struct ghcb *ghcb;
+ int ret = 0;
+
+ local_irq_save(flags);
+
+ ghcb = __sev_get_ghcb(&state);
+
+ vc_ghcb_invalidate(ghcb);
+
+ if (create)
+ ghcb_set_rax(ghcb, vmsa->sev_features);
+
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
+ ghcb_set_sw_exit_info_1(ghcb,
+ ((u64)apic_id << 32) |
+ ((u64)snp_vmpl << 16) |
+ event);
+ ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
+ lower_32_bits(ghcb->save.sw_exit_info_1)) {
+ pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
+ ret = -EINVAL;
+ }
+
+ __sev_put_ghcb(&state);
+
+ local_irq_restore(flags);
+
+ return ret;
+}
+
+static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
+{
+ int ret;
+
+ if (snp_vmpl) {
+ struct svsm_call call = {};
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ call.caa = this_cpu_read(svsm_caa);
+ call.rcx = __pa(va);
+
+ if (make_vmsa) {
+ /* Protocol 0, Call ID 2 */
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
+ call.rdx = __pa(caa);
+ call.r8 = apic_id;
+ } else {
+ /* Protocol 0, Call ID 3 */
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
+ }
+
+ ret = svsm_perform_call_protocol(&call);
+
+ local_irq_restore(flags);
+ } else {
+ /*
+ * If the kernel runs at VMPL0, it can change the VMSA
+ * bit for a page using the RMPADJUST instruction.
+ * However, for the instruction to succeed it must
+ * target the permissions of a lesser privileged (higher
+ * numbered) VMPL level, so use VMPL1.
+ */
+ u64 attrs = 1;
+
+ if (make_vmsa)
+ attrs |= RMPADJUST_VMSA_PAGE_BIT;
+
+ ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
+ }
+
+ return ret;
+}
+
+static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
+{
+ int err;
+
+ err = snp_set_vmsa(vmsa, NULL, apic_id, false);
+ if (err)
+ pr_err("clear VMSA page failed (%u), leaking page\n", err);
+ else
+ free_page((unsigned long)vmsa);
+}
+
static void set_pte_enc(pte_t *kpte, int level, void *va)
{
struct pte_enc_desc d = {
@@ -1005,7 +689,8 @@ static void unshare_all_memory(void)
data = per_cpu(runtime_data, cpu);
ghcb = (unsigned long)&data->ghcb_page;
- if (addr <= ghcb && ghcb <= addr + size) {
+ /* Handle the case of a huge page containing the GHCB page */
+ if (addr <= ghcb && ghcb < addr + size) {
skipped_addr = true;
break;
}
@@ -1055,11 +740,70 @@ void snp_kexec_begin(void)
pr_warn("Failed to stop shared<->private conversions\n");
}
+/*
+ * Shutdown all APs except the one handling kexec/kdump and clearing
+ * the VMSA tag on AP's VMSA pages as they are not being used as
+ * VMSA page anymore.
+ */
+static void shutdown_all_aps(void)
+{
+ struct sev_es_save_area *vmsa;
+ int apic_id, this_cpu, cpu;
+
+ this_cpu = get_cpu();
+
+ /*
+ * APs are already in HLT loop when enc_kexec_finish() callback
+ * is invoked.
+ */
+ for_each_present_cpu(cpu) {
+ vmsa = per_cpu(sev_vmsa, cpu);
+
+ /*
+ * The BSP or offlined APs do not have guest allocated VMSA
+ * and there is no need to clear the VMSA tag for this page.
+ */
+ if (!vmsa)
+ continue;
+
+ /*
+ * Cannot clear the VMSA tag for the currently running vCPU.
+ */
+ if (this_cpu == cpu) {
+ unsigned long pa;
+ struct page *p;
+
+ pa = __pa(vmsa);
+ /*
+ * Mark the VMSA page of the running vCPU as offline
+ * so that is excluded and not touched by makedumpfile
+ * while generating vmcore during kdump.
+ */
+ p = pfn_to_online_page(pa >> PAGE_SHIFT);
+ if (p)
+ __SetPageOffline(p);
+ continue;
+ }
+
+ apic_id = cpuid_to_apicid[cpu];
+
+ /*
+ * Issue AP destroy to ensure AP gets kicked out of guest mode
+ * to allow using RMPADJUST to remove the VMSA tag on it's
+ * VMSA page.
+ */
+ vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
+ snp_cleanup_vmsa(vmsa, apic_id);
+ }
+
+ put_cpu();
+}
+
void snp_kexec_finish(void)
{
struct sev_es_runtime_data *data;
+ unsigned long size, addr;
unsigned int level, cpu;
- unsigned long size;
struct ghcb *ghcb;
pte_t *pte;
@@ -1069,6 +813,8 @@ void snp_kexec_finish(void)
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
return;
+ shutdown_all_aps();
+
unshare_all_memory();
/*
@@ -1085,54 +831,11 @@ void snp_kexec_finish(void)
ghcb = &data->ghcb_page;
pte = lookup_address((unsigned long)ghcb, &level);
size = page_level_size(level);
- set_pte_enc(pte, level, (void *)ghcb);
- snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE));
- }
-}
-
-static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
-{
- int ret;
-
- if (snp_vmpl) {
- struct svsm_call call = {};
- unsigned long flags;
-
- local_irq_save(flags);
-
- call.caa = this_cpu_read(svsm_caa);
- call.rcx = __pa(va);
-
- if (make_vmsa) {
- /* Protocol 0, Call ID 2 */
- call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
- call.rdx = __pa(caa);
- call.r8 = apic_id;
- } else {
- /* Protocol 0, Call ID 3 */
- call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
- }
-
- ret = svsm_perform_call_protocol(&call);
-
- local_irq_restore(flags);
- } else {
- /*
- * If the kernel runs at VMPL0, it can change the VMSA
- * bit for a page using the RMPADJUST instruction.
- * However, for the instruction to succeed it must
- * target the permissions of a lesser privileged (higher
- * numbered) VMPL level, so use VMPL1.
- */
- u64 attrs = 1;
-
- if (make_vmsa)
- attrs |= RMPADJUST_VMSA_PAGE_BIT;
-
- ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
+ /* Handle the case of a huge page containing the GHCB page */
+ addr = (unsigned long)ghcb & page_level_mask(level);
+ set_pte_enc(pte, level, (void *)addr);
+ snp_set_memory_private(addr, (size / PAGE_SIZE));
}
-
- return ret;
}
#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
@@ -1166,24 +869,10 @@ static void *snp_alloc_vmsa_page(int cpu)
return page_address(p + 1);
}
-static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
-{
- int err;
-
- err = snp_set_vmsa(vmsa, NULL, apic_id, false);
- if (err)
- pr_err("clear VMSA page failed (%u), leaking page\n", err);
- else
- free_page((unsigned long)vmsa);
-}
-
static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
{
struct sev_es_save_area *cur_vmsa, *vmsa;
- struct ghcb_state state;
struct svsm_ca *caa;
- unsigned long flags;
- struct ghcb *ghcb;
u8 sipi_vector;
int cpu, ret;
u64 cr4;
@@ -1297,33 +986,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
}
/* Issue VMGEXIT AP Creation NAE event */
- local_irq_save(flags);
-
- ghcb = __sev_get_ghcb(&state);
-
- vc_ghcb_invalidate(ghcb);
- ghcb_set_rax(ghcb, vmsa->sev_features);
- ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
- ghcb_set_sw_exit_info_1(ghcb,
- ((u64)apic_id << 32) |
- ((u64)snp_vmpl << 16) |
- SVM_VMGEXIT_AP_CREATE);
- ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
-
- sev_es_wr_ghcb_msr(__pa(ghcb));
- VMGEXIT();
-
- if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
- lower_32_bits(ghcb->save.sw_exit_info_1)) {
- pr_err("SNP AP Creation error\n");
- ret = -EINVAL;
- }
-
- __sev_put_ghcb(&state);
-
- local_irq_restore(flags);
-
- /* Perform cleanup if there was an error */
+ ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
if (ret) {
snp_cleanup_vmsa(vmsa, apic_id);
vmsa = NULL;
@@ -1417,90 +1080,6 @@ int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
return 0;
}
-/* Writes to the SVSM CAA MSR are ignored */
-static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write)
-{
- if (write)
- return ES_OK;
-
- regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa));
- regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa));
-
- return ES_OK;
-}
-
-/*
- * TSC related accesses should not exit to the hypervisor when a guest is
- * executing with Secure TSC enabled, so special handling is required for
- * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ.
- */
-static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write)
-{
- u64 tsc;
-
- /*
- * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled.
- * Terminate the SNP guest when the interception is enabled.
- */
- if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ)
- return ES_VMM_ERROR;
-
- /*
- * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC
- * to return undefined values, so ignore all writes.
- *
- * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use
- * the value returned by rdtsc_ordered().
- */
- if (write) {
- WARN_ONCE(1, "TSC MSR writes are verboten!\n");
- return ES_OK;
- }
-
- tsc = rdtsc_ordered();
- regs->ax = lower_32_bits(tsc);
- regs->dx = upper_32_bits(tsc);
-
- return ES_OK;
-}
-
-static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
- struct pt_regs *regs = ctxt->regs;
- enum es_result ret;
- bool write;
-
- /* Is it a WRMSR? */
- write = ctxt->insn.opcode.bytes[1] == 0x30;
-
- switch (regs->cx) {
- case MSR_SVSM_CAA:
- return __vc_handle_msr_caa(regs, write);
- case MSR_IA32_TSC:
- case MSR_AMD64_GUEST_TSC_FREQ:
- if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
- return __vc_handle_secure_tsc_msrs(regs, write);
- break;
- default:
- break;
- }
-
- ghcb_set_rcx(ghcb, regs->cx);
- if (write) {
- ghcb_set_rax(ghcb, regs->ax);
- ghcb_set_rdx(ghcb, regs->dx);
- }
-
- ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0);
-
- if ((ret == ES_OK) && !write) {
- regs->ax = ghcb->save.rax;
- regs->dx = ghcb->save.rdx;
- }
-
- return ret;
-}
-
static void snp_register_per_cpu_ghcb(void)
{
struct sev_es_runtime_data *data;
@@ -1713,748 +1292,6 @@ void __init sev_es_init_vc_handling(void)
initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
}
-static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
-{
- int trapnr = ctxt->fi.vector;
-
- if (trapnr == X86_TRAP_PF)
- native_write_cr2(ctxt->fi.cr2);
-
- ctxt->regs->orig_ax = ctxt->fi.error_code;
- do_early_exception(ctxt->regs, trapnr);
-}
-
-static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
-{
- long *reg_array;
- int offset;
-
- reg_array = (long *)ctxt->regs;
- offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
-
- if (offset < 0)
- return NULL;
-
- offset /= sizeof(long);
-
- return reg_array + offset;
-}
-static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
- unsigned int bytes, bool read)
-{
- u64 exit_code, exit_info_1, exit_info_2;
- unsigned long ghcb_pa = __pa(ghcb);
- enum es_result res;
- phys_addr_t paddr;
- void __user *ref;
-
- ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
- if (ref == (void __user *)-1L)
- return ES_UNSUPPORTED;
-
- exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
-
- res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
- if (res != ES_OK) {
- if (res == ES_EXCEPTION && !read)
- ctxt->fi.error_code |= X86_PF_WRITE;
-
- return res;
- }
-
- exit_info_1 = paddr;
- /* Can never be greater than 8 */
- exit_info_2 = bytes;
-
- ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
-
- return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
-}
-
-/*
- * The MOVS instruction has two memory operands, which raises the
- * problem that it is not known whether the access to the source or the
- * destination caused the #VC exception (and hence whether an MMIO read
- * or write operation needs to be emulated).
- *
- * Instead of playing games with walking page-tables and trying to guess
- * whether the source or destination is an MMIO range, split the move
- * into two operations, a read and a write with only one memory operand.
- * This will cause a nested #VC exception on the MMIO address which can
- * then be handled.
- *
- * This implementation has the benefit that it also supports MOVS where
- * source _and_ destination are MMIO regions.
- *
- * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
- * rare operation. If it turns out to be a performance problem the split
- * operations can be moved to memcpy_fromio() and memcpy_toio().
- */
-static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
- unsigned int bytes)
-{
- unsigned long ds_base, es_base;
- unsigned char *src, *dst;
- unsigned char buffer[8];
- enum es_result ret;
- bool rep;
- int off;
-
- ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
- es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
-
- if (ds_base == -1L || es_base == -1L) {
- ctxt->fi.vector = X86_TRAP_GP;
- ctxt->fi.error_code = 0;
- return ES_EXCEPTION;
- }
-
- src = ds_base + (unsigned char *)ctxt->regs->si;
- dst = es_base + (unsigned char *)ctxt->regs->di;
-
- ret = vc_read_mem(ctxt, src, buffer, bytes);
- if (ret != ES_OK)
- return ret;
-
- ret = vc_write_mem(ctxt, dst, buffer, bytes);
- if (ret != ES_OK)
- return ret;
-
- if (ctxt->regs->flags & X86_EFLAGS_DF)
- off = -bytes;
- else
- off = bytes;
-
- ctxt->regs->si += off;
- ctxt->regs->di += off;
-
- rep = insn_has_rep_prefix(&ctxt->insn);
- if (rep)
- ctxt->regs->cx -= 1;
-
- if (!rep || ctxt->regs->cx == 0)
- return ES_OK;
- else
- return ES_RETRY;
-}
-
-static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
- struct insn *insn = &ctxt->insn;
- enum insn_mmio_type mmio;
- unsigned int bytes = 0;
- enum es_result ret;
- u8 sign_byte;
- long *reg_data;
-
- mmio = insn_decode_mmio(insn, &bytes);
- if (mmio == INSN_MMIO_DECODE_FAILED)
- return ES_DECODE_FAILED;
-
- if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) {
- reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs);
- if (!reg_data)
- return ES_DECODE_FAILED;
- }
-
- if (user_mode(ctxt->regs))
- return ES_UNSUPPORTED;
-
- switch (mmio) {
- case INSN_MMIO_WRITE:
- memcpy(ghcb->shared_buffer, reg_data, bytes);
- ret = vc_do_mmio(ghcb, ctxt, bytes, false);
- break;
- case INSN_MMIO_WRITE_IMM:
- memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
- ret = vc_do_mmio(ghcb, ctxt, bytes, false);
- break;
- case INSN_MMIO_READ:
- ret = vc_do_mmio(ghcb, ctxt, bytes, true);
- if (ret)
- break;
-
- /* Zero-extend for 32-bit operation */
- if (bytes == 4)
- *reg_data = 0;
-
- memcpy(reg_data, ghcb->shared_buffer, bytes);
- break;
- case INSN_MMIO_READ_ZERO_EXTEND:
- ret = vc_do_mmio(ghcb, ctxt, bytes, true);
- if (ret)
- break;
-
- /* Zero extend based on operand size */
- memset(reg_data, 0, insn->opnd_bytes);
- memcpy(reg_data, ghcb->shared_buffer, bytes);
- break;
- case INSN_MMIO_READ_SIGN_EXTEND:
- ret = vc_do_mmio(ghcb, ctxt, bytes, true);
- if (ret)
- break;
-
- if (bytes == 1) {
- u8 *val = (u8 *)ghcb->shared_buffer;
-
- sign_byte = (*val & 0x80) ? 0xff : 0x00;
- } else {
- u16 *val = (u16 *)ghcb->shared_buffer;
-
- sign_byte = (*val & 0x8000) ? 0xff : 0x00;
- }
-
- /* Sign extend based on operand size */
- memset(reg_data, sign_byte, insn->opnd_bytes);
- memcpy(reg_data, ghcb->shared_buffer, bytes);
- break;
- case INSN_MMIO_MOVS:
- ret = vc_handle_mmio_movs(ctxt, bytes);
- break;
- default:
- ret = ES_UNSUPPORTED;
- break;
- }
-
- return ret;
-}
-
-static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
-{
- struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
- long val, *reg = vc_insn_get_rm(ctxt);
- enum es_result ret;
-
- if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
- return ES_VMM_ERROR;
-
- if (!reg)
- return ES_DECODE_FAILED;
-
- val = *reg;
-
- /* Upper 32 bits must be written as zeroes */
- if (val >> 32) {
- ctxt->fi.vector = X86_TRAP_GP;
- ctxt->fi.error_code = 0;
- return ES_EXCEPTION;
- }
-
- /* Clear out other reserved bits and set bit 10 */
- val = (val & 0xffff23ffL) | BIT(10);
-
- /* Early non-zero writes to DR7 are not supported */
- if (!data && (val & ~DR7_RESET_VALUE))
- return ES_UNSUPPORTED;
-
- /* Using a value of 0 for ExitInfo1 means RAX holds the value */
- ghcb_set_rax(ghcb, val);
- ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
- if (ret != ES_OK)
- return ret;
-
- if (data)
- data->dr7 = val;
-
- return ES_OK;
-}
-
-static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
-{
- struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
- long *reg = vc_insn_get_rm(ctxt);
-
- if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
- return ES_VMM_ERROR;
-
- if (!reg)
- return ES_DECODE_FAILED;
-
- if (data)
- *reg = data->dr7;
- else
- *reg = DR7_RESET_VALUE;
-
- return ES_OK;
-}
-
-static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
-{
- return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
-}
-
-static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
- enum es_result ret;
-
- ghcb_set_rcx(ghcb, ctxt->regs->cx);
-
- ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
- if (ret != ES_OK)
- return ret;
-
- if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
- return ES_VMM_ERROR;
-
- ctxt->regs->ax = ghcb->save.rax;
- ctxt->regs->dx = ghcb->save.rdx;
-
- return ES_OK;
-}
-
-static enum es_result vc_handle_monitor(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
-{
- /*
- * Treat it as a NOP and do not leak a physical address to the
- * hypervisor.
- */
- return ES_OK;
-}
-
-static enum es_result vc_handle_mwait(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
-{
- /* Treat the same as MONITOR/MONITORX */
- return ES_OK;
-}
-
-static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
-{
- enum es_result ret;
-
- ghcb_set_rax(ghcb, ctxt->regs->ax);
- ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
-
- if (x86_platform.hyper.sev_es_hcall_prepare)
- x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
-
- ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
- if (ret != ES_OK)
- return ret;
-
- if (!ghcb_rax_is_valid(ghcb))
- return ES_VMM_ERROR;
-
- ctxt->regs->ax = ghcb->save.rax;
-
- /*
- * Call sev_es_hcall_finish() after regs->ax is already set.
- * This allows the hypervisor handler to overwrite it again if
- * necessary.
- */
- if (x86_platform.hyper.sev_es_hcall_finish &&
- !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
- return ES_VMM_ERROR;
-
- return ES_OK;
-}
-
-static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
-{
- /*
- * Calling ecx_alignment_check() directly does not work, because it
- * enables IRQs and the GHCB is active. Forward the exception and call
- * it later from vc_forward_exception().
- */
- ctxt->fi.vector = X86_TRAP_AC;
- ctxt->fi.error_code = 0;
- return ES_EXCEPTION;
-}
-
-static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
- struct ghcb *ghcb,
- unsigned long exit_code)
-{
- enum es_result result = vc_check_opcode_bytes(ctxt, exit_code);
-
- if (result != ES_OK)
- return result;
-
- switch (exit_code) {
- case SVM_EXIT_READ_DR7:
- result = vc_handle_dr7_read(ghcb, ctxt);
- break;
- case SVM_EXIT_WRITE_DR7:
- result = vc_handle_dr7_write(ghcb, ctxt);
- break;
- case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
- result = vc_handle_trap_ac(ghcb, ctxt);
- break;
- case SVM_EXIT_RDTSC:
- case SVM_EXIT_RDTSCP:
- result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
- break;
- case SVM_EXIT_RDPMC:
- result = vc_handle_rdpmc(ghcb, ctxt);
- break;
- case SVM_EXIT_INVD:
- pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
- result = ES_UNSUPPORTED;
- break;
- case SVM_EXIT_CPUID:
- result = vc_handle_cpuid(ghcb, ctxt);
- break;
- case SVM_EXIT_IOIO:
- result = vc_handle_ioio(ghcb, ctxt);
- break;
- case SVM_EXIT_MSR:
- result = vc_handle_msr(ghcb, ctxt);
- break;
- case SVM_EXIT_VMMCALL:
- result = vc_handle_vmmcall(ghcb, ctxt);
- break;
- case SVM_EXIT_WBINVD:
- result = vc_handle_wbinvd(ghcb, ctxt);
- break;
- case SVM_EXIT_MONITOR:
- result = vc_handle_monitor(ghcb, ctxt);
- break;
- case SVM_EXIT_MWAIT:
- result = vc_handle_mwait(ghcb, ctxt);
- break;
- case SVM_EXIT_NPF:
- result = vc_handle_mmio(ghcb, ctxt);
- break;
- default:
- /*
- * Unexpected #VC exception
- */
- result = ES_UNSUPPORTED;
- }
-
- return result;
-}
-
-static __always_inline bool is_vc2_stack(unsigned long sp)
-{
- return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
-}
-
-static __always_inline bool vc_from_invalid_context(struct pt_regs *regs)
-{
- unsigned long sp, prev_sp;
-
- sp = (unsigned long)regs;
- prev_sp = regs->sp;
-
- /*
- * If the code was already executing on the VC2 stack when the #VC
- * happened, let it proceed to the normal handling routine. This way the
- * code executing on the VC2 stack can cause #VC exceptions to get handled.
- */
- return is_vc2_stack(sp) && !is_vc2_stack(prev_sp);
-}
-
-static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
-{
- struct ghcb_state state;
- struct es_em_ctxt ctxt;
- enum es_result result;
- struct ghcb *ghcb;
- bool ret = true;
-
- ghcb = __sev_get_ghcb(&state);
-
- vc_ghcb_invalidate(ghcb);
- result = vc_init_em_ctxt(&ctxt, regs, error_code);
-
- if (result == ES_OK)
- result = vc_handle_exitcode(&ctxt, ghcb, error_code);
-
- __sev_put_ghcb(&state);
-
- /* Done - now check the result */
- switch (result) {
- case ES_OK:
- vc_finish_insn(&ctxt);
- break;
- case ES_UNSUPPORTED:
- pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
- error_code, regs->ip);
- ret = false;
- break;
- case ES_VMM_ERROR:
- pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
- error_code, regs->ip);
- ret = false;
- break;
- case ES_DECODE_FAILED:
- pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
- error_code, regs->ip);
- ret = false;
- break;
- case ES_EXCEPTION:
- vc_forward_exception(&ctxt);
- break;
- case ES_RETRY:
- /* Nothing to do */
- break;
- default:
- pr_emerg("Unknown result in %s():%d\n", __func__, result);
- /*
- * Emulating the instruction which caused the #VC exception
- * failed - can't continue so print debug information
- */
- BUG();
- }
-
- return ret;
-}
-
-static __always_inline bool vc_is_db(unsigned long error_code)
-{
- return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
-}
-
-/*
- * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
- * and will panic when an error happens.
- */
-DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
-{
- irqentry_state_t irq_state;
-
- /*
- * With the current implementation it is always possible to switch to a
- * safe stack because #VC exceptions only happen at known places, like
- * intercepted instructions or accesses to MMIO areas/IO ports. They can
- * also happen with code instrumentation when the hypervisor intercepts
- * #DB, but the critical paths are forbidden to be instrumented, so #DB
- * exceptions currently also only happen in safe places.
- *
- * But keep this here in case the noinstr annotations are violated due
- * to bug elsewhere.
- */
- if (unlikely(vc_from_invalid_context(regs))) {
- instrumentation_begin();
- panic("Can't handle #VC exception from unsupported context\n");
- instrumentation_end();
- }
-
- /*
- * Handle #DB before calling into !noinstr code to avoid recursive #DB.
- */
- if (vc_is_db(error_code)) {
- exc_debug(regs);
- return;
- }
-
- irq_state = irqentry_nmi_enter(regs);
-
- instrumentation_begin();
-
- if (!vc_raw_handle_exception(regs, error_code)) {
- /* Show some debug info */
- show_regs(regs);
-
- /* Ask hypervisor to sev_es_terminate */
- sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
-
- /* If that fails and we get here - just panic */
- panic("Returned from Terminate-Request to Hypervisor\n");
- }
-
- instrumentation_end();
- irqentry_nmi_exit(regs, irq_state);
-}
-
-/*
- * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
- * and will kill the current task with SIGBUS when an error happens.
- */
-DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
-{
- /*
- * Handle #DB before calling into !noinstr code to avoid recursive #DB.
- */
- if (vc_is_db(error_code)) {
- noist_exc_debug(regs);
- return;
- }
-
- irqentry_enter_from_user_mode(regs);
- instrumentation_begin();
-
- if (!vc_raw_handle_exception(regs, error_code)) {
- /*
- * Do not kill the machine if user-space triggered the
- * exception. Send SIGBUS instead and let user-space deal with
- * it.
- */
- force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
- }
-
- instrumentation_end();
- irqentry_exit_to_user_mode(regs);
-}
-
-bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
-{
- unsigned long exit_code = regs->orig_ax;
- struct es_em_ctxt ctxt;
- enum es_result result;
-
- vc_ghcb_invalidate(boot_ghcb);
-
- result = vc_init_em_ctxt(&ctxt, regs, exit_code);
- if (result == ES_OK)
- result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
-
- /* Done - now check the result */
- switch (result) {
- case ES_OK:
- vc_finish_insn(&ctxt);
- break;
- case ES_UNSUPPORTED:
- early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
- exit_code, regs->ip);
- goto fail;
- case ES_VMM_ERROR:
- early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
- exit_code, regs->ip);
- goto fail;
- case ES_DECODE_FAILED:
- early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
- exit_code, regs->ip);
- goto fail;
- case ES_EXCEPTION:
- vc_early_forward_exception(&ctxt);
- break;
- case ES_RETRY:
- /* Nothing to do */
- break;
- default:
- BUG();
- }
-
- return true;
-
-fail:
- show_regs(regs);
-
- sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
-}
-
-/*
- * Initial set up of SNP relies on information provided by the
- * Confidential Computing blob, which can be passed to the kernel
- * in the following ways, depending on how it is booted:
- *
- * - when booted via the boot/decompress kernel:
- * - via boot_params
- *
- * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH):
- * - via a setup_data entry, as defined by the Linux Boot Protocol
- *
- * Scan for the blob in that order.
- */
-static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
-{
- struct cc_blob_sev_info *cc_info;
-
- /* Boot kernel would have passed the CC blob via boot_params. */
- if (bp->cc_blob_address) {
- cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address;
- goto found_cc_info;
- }
-
- /*
- * If kernel was booted directly, without the use of the
- * boot/decompression kernel, the CC blob may have been passed via
- * setup_data instead.
- */
- cc_info = find_cc_blob_setup_data(bp);
- if (!cc_info)
- return NULL;
-
-found_cc_info:
- if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
- snp_abort();
-
- return cc_info;
-}
-
-static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
-{
- struct svsm_call call = {};
- int ret;
- u64 pa;
-
- /*
- * Record the SVSM Calling Area address (CAA) if the guest is not
- * running at VMPL0. The CA will be used to communicate with the
- * SVSM to perform the SVSM services.
- */
- if (!svsm_setup_ca(cc_info))
- return;
-
- /*
- * It is very early in the boot and the kernel is running identity
- * mapped but without having adjusted the pagetables to where the
- * kernel was loaded (physbase), so the get the CA address using
- * RIP-relative addressing.
- */
- pa = (u64)&RIP_REL_REF(boot_svsm_ca_page);
-
- /*
- * Switch over to the boot SVSM CA while the current CA is still
- * addressable. There is no GHCB at this point so use the MSR protocol.
- *
- * SVSM_CORE_REMAP_CA call:
- * RAX = 0 (Protocol=0, CallID=0)
- * RCX = New CA GPA
- */
- call.caa = svsm_get_caa();
- call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
- call.rcx = pa;
- ret = svsm_perform_call_protocol(&call);
- if (ret)
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL);
-
- RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa;
- RIP_REL_REF(boot_svsm_caa_pa) = pa;
-}
-
-bool __head snp_init(struct boot_params *bp)
-{
- struct cc_blob_sev_info *cc_info;
-
- if (!bp)
- return false;
-
- cc_info = find_cc_blob(bp);
- if (!cc_info)
- return false;
-
- if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE)
- secrets_pa = cc_info->secrets_phys;
- else
- return false;
-
- setup_cpuid_table(cc_info);
-
- svsm_setup(cc_info);
-
- /*
- * The CC blob will be used later to access the secrets page. Cache
- * it here like the boot kernel does.
- */
- bp->cc_blob_address = (u32)(unsigned long)cc_info;
-
- return true;
-}
-
-void __head __noreturn snp_abort(void)
-{
- sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
-}
-
/*
* SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
* enabled, as the alternative (fallback) logic for DMI probing in the legacy
@@ -2625,11 +1462,74 @@ e_restore_irq:
return ret;
}
+/**
+ * snp_svsm_vtpm_probe() - Probe if SVSM provides a vTPM device
+ *
+ * Check that there is SVSM and that it supports at least TPM_SEND_COMMAND
+ * which is the only request used so far.
+ *
+ * Return: true if the platform provides a vTPM SVSM device, false otherwise.
+ */
+static bool snp_svsm_vtpm_probe(void)
+{
+ struct svsm_call call = {};
+
+ /* The vTPM device is available only if a SVSM is present */
+ if (!snp_vmpl)
+ return false;
+
+ call.caa = svsm_get_caa();
+ call.rax = SVSM_VTPM_CALL(SVSM_VTPM_QUERY);
+
+ if (svsm_perform_call_protocol(&call))
+ return false;
+
+ /* Check platform commands contains TPM_SEND_COMMAND - platform command 8 */
+ return call.rcx_out & BIT_ULL(8);
+}
+
+/**
+ * snp_svsm_vtpm_send_command() - Execute a vTPM operation on SVSM
+ * @buffer: A buffer used to both send the command and receive the response.
+ *
+ * Execute a SVSM_VTPM_CMD call as defined by
+ * "Secure VM Service Module for SEV-SNP Guests" Publication # 58019 Revision: 1.00
+ *
+ * All command request/response buffers have a common structure as specified by
+ * the following table:
+ * Byte Size     In/Out    Description
+ * Offset    (Bytes)
+ * 0x000     4          In        Platform command
+ *                         Out       Platform command response size
+ *
+ * Each command can build upon this common request/response structure to create
+ * a structure specific to the command. See include/linux/tpm_svsm.h for more
+ * details.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int snp_svsm_vtpm_send_command(u8 *buffer)
+{
+ struct svsm_call call = {};
+
+ call.caa = svsm_get_caa();
+ call.rax = SVSM_VTPM_CALL(SVSM_VTPM_CMD);
+ call.rcx = __pa(buffer);
+
+ return svsm_perform_call_protocol(&call);
+}
+EXPORT_SYMBOL_GPL(snp_svsm_vtpm_send_command);
+
static struct platform_device sev_guest_device = {
.name = "sev-guest",
.id = -1,
};
+static struct platform_device tpm_svsm_device = {
+ .name = "tpm-svsm",
+ .id = -1,
+};
+
static int __init snp_init_platform_device(void)
{
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
@@ -2638,7 +1538,11 @@ static int __init snp_init_platform_device(void)
if (platform_device_register(&sev_guest_device))
return -ENODEV;
- pr_info("SNP guest platform device initialized.\n");
+ if (snp_svsm_vtpm_probe() &&
+ platform_device_register(&tpm_svsm_device))
+ return -ENODEV;
+
+ pr_info("SNP guest platform devices initialized.\n");
return 0;
}
device_initcall(snp_init_platform_device);
@@ -2835,7 +1739,7 @@ struct snp_msg_desc *snp_msg_alloc(void)
if (!mdesc)
return ERR_PTR(-ENOMEM);
- mem = ioremap_encrypted(secrets_pa, PAGE_SIZE);
+ mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
if (!mem)
goto e_free_mdesc;
@@ -3278,7 +2182,7 @@ void __init snp_secure_tsc_init(void)
return;
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
- rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
+ rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000);
x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
diff --git a/arch/x86/coco/sev/sev-nmi.c b/arch/x86/coco/sev/sev-nmi.c
new file mode 100644
index 000000000000..d8dfaddfb367
--- /dev/null
+++ b/arch/x86/coco/sev/sev-nmi.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2019 SUSE
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#define pr_fmt(fmt) "SEV: " fmt
+
+#include <linux/bug.h>
+#include <linux/kernel.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/msr.h>
+#include <asm/ptrace.h>
+#include <asm/sev.h>
+#include <asm/sev-internal.h>
+
+static __always_inline bool on_vc_stack(struct pt_regs *regs)
+{
+ unsigned long sp = regs->sp;
+
+ /* User-mode RSP is not trusted */
+ if (user_mode(regs))
+ return false;
+
+ /* SYSCALL gap still has user-mode RSP */
+ if (ip_within_syscall_gap(regs))
+ return false;
+
+ return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
+}
+
+/*
+ * This function handles the case when an NMI is raised in the #VC
+ * exception handler entry code, before the #VC handler has switched off
+ * its IST stack. In this case, the IST entry for #VC must be adjusted,
+ * so that any nested #VC exception will not overwrite the stack
+ * contents of the interrupted #VC handler.
+ *
+ * The IST entry is adjusted unconditionally so that it can be also be
+ * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
+ * nested sev_es_ist_exit() call may adjust back the IST entry too
+ * early.
+ *
+ * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
+ * on the NMI IST stack, as they are only called from NMI handling code
+ * right now.
+ */
+void noinstr __sev_es_ist_enter(struct pt_regs *regs)
+{
+ unsigned long old_ist, new_ist;
+
+ /* Read old IST entry */
+ new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+
+ /*
+ * If NMI happened while on the #VC IST stack, set the new IST
+ * value below regs->sp, so that the interrupted stack frame is
+ * not overwritten by subsequent #VC exceptions.
+ */
+ if (on_vc_stack(regs))
+ new_ist = regs->sp;
+
+ /*
+ * Reserve additional 8 bytes and store old IST value so this
+ * adjustment can be unrolled in __sev_es_ist_exit().
+ */
+ new_ist -= sizeof(old_ist);
+ *(unsigned long *)new_ist = old_ist;
+
+ /* Set new IST entry */
+ this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
+}
+
+void noinstr __sev_es_ist_exit(void)
+{
+ unsigned long ist;
+
+ /* Read IST entry */
+ ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+
+ if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
+ return;
+
+ /* Read back old IST entry and write it to the TSS */
+ this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
+}
+
+void noinstr __sev_es_nmi_complete(void)
+{
+ struct ghcb_state state;
+ struct ghcb *ghcb;
+
+ ghcb = __sev_get_ghcb(&state);
+
+ vc_ghcb_invalidate(ghcb);
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
+ ghcb_set_sw_exit_info_1(ghcb, 0);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
+
+ sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
+ VMGEXIT();
+
+ __sev_put_ghcb(&state);
+}
diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c
new file mode 100644
index 000000000000..0989d98da130
--- /dev/null
+++ b/arch/x86/coco/sev/vc-handle.c
@@ -0,0 +1,1061 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2019 SUSE
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#define pr_fmt(fmt) "SEV: " fmt
+
+#include <linux/sched/debug.h> /* For show_regs() */
+#include <linux/cc_platform.h>
+#include <linux/printk.h>
+#include <linux/mm_types.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/psp-sev.h>
+#include <uapi/linux/sev-guest.h>
+
+#include <asm/init.h>
+#include <asm/stacktrace.h>
+#include <asm/sev.h>
+#include <asm/sev-internal.h>
+#include <asm/insn-eval.h>
+#include <asm/fpu/xcr.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/traps.h>
+#include <asm/svm.h>
+#include <asm/smp.h>
+#include <asm/cpu.h>
+#include <asm/apic.h>
+#include <asm/cpuid/api.h>
+
+static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+ unsigned long vaddr, phys_addr_t *paddr)
+{
+ unsigned long va = (unsigned long)vaddr;
+ unsigned int level;
+ phys_addr_t pa;
+ pgd_t *pgd;
+ pte_t *pte;
+
+ pgd = __va(read_cr3_pa());
+ pgd = &pgd[pgd_index(va)];
+ pte = lookup_address_in_pgd(pgd, va, &level);
+ if (!pte) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.cr2 = vaddr;
+ ctxt->fi.error_code = 0;
+
+ if (user_mode(ctxt->regs))
+ ctxt->fi.error_code |= X86_PF_USER;
+
+ return ES_EXCEPTION;
+ }
+
+ if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
+ /* Emulated MMIO to/from encrypted memory not supported */
+ return ES_UNSUPPORTED;
+
+ pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
+ pa |= va & ~page_level_mask(level);
+
+ *paddr = pa;
+
+ return ES_OK;
+}
+
+static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
+{
+ BUG_ON(size > 4);
+
+ if (user_mode(ctxt->regs)) {
+ struct thread_struct *t = &current->thread;
+ struct io_bitmap *iobm = t->io_bitmap;
+ size_t idx;
+
+ if (!iobm)
+ goto fault;
+
+ for (idx = port; idx < port + size; ++idx) {
+ if (test_bit(idx, iobm->bitmap))
+ goto fault;
+ }
+ }
+
+ return ES_OK;
+
+fault:
+ ctxt->fi.vector = X86_TRAP_GP;
+ ctxt->fi.error_code = 0;
+
+ return ES_EXCEPTION;
+}
+
+void vc_forward_exception(struct es_em_ctxt *ctxt)
+{
+ long error_code = ctxt->fi.error_code;
+ int trapnr = ctxt->fi.vector;
+
+ ctxt->regs->orig_ax = ctxt->fi.error_code;
+
+ switch (trapnr) {
+ case X86_TRAP_GP:
+ exc_general_protection(ctxt->regs, error_code);
+ break;
+ case X86_TRAP_UD:
+ exc_invalid_op(ctxt->regs);
+ break;
+ case X86_TRAP_PF:
+ write_cr2(ctxt->fi.cr2);
+ exc_page_fault(ctxt->regs, error_code);
+ break;
+ case X86_TRAP_AC:
+ exc_alignment_check(ctxt->regs, error_code);
+ break;
+ default:
+ pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
+ BUG();
+ }
+}
+
+static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
+ unsigned char *buffer)
+{
+ return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
+}
+
+static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
+{
+ char buffer[MAX_INSN_SIZE];
+ int insn_bytes;
+
+ insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
+ if (insn_bytes == 0) {
+ /* Nothing could be copied */
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
+ ctxt->fi.cr2 = ctxt->regs->ip;
+ return ES_EXCEPTION;
+ } else if (insn_bytes == -EINVAL) {
+ /* Effective RIP could not be calculated */
+ ctxt->fi.vector = X86_TRAP_GP;
+ ctxt->fi.error_code = 0;
+ ctxt->fi.cr2 = 0;
+ return ES_EXCEPTION;
+ }
+
+ if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
+ return ES_DECODE_FAILED;
+
+ if (ctxt->insn.immediate.got)
+ return ES_OK;
+ else
+ return ES_DECODE_FAILED;
+}
+
+static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
+{
+ char buffer[MAX_INSN_SIZE];
+ int res, ret;
+
+ res = vc_fetch_insn_kernel(ctxt, buffer);
+ if (res) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_INSTR;
+ ctxt->fi.cr2 = ctxt->regs->ip;
+ return ES_EXCEPTION;
+ }
+
+ ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+ if (ret < 0)
+ return ES_DECODE_FAILED;
+ else
+ return ES_OK;
+}
+
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+ if (user_mode(ctxt->regs))
+ return __vc_decode_user_insn(ctxt);
+ else
+ return __vc_decode_kern_insn(ctxt);
+}
+
+static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+ char *dst, char *buf, size_t size)
+{
+ unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
+
+ /*
+ * This function uses __put_user() independent of whether kernel or user
+ * memory is accessed. This works fine because __put_user() does no
+ * sanity checks of the pointer being accessed. All that it does is
+ * to report when the access failed.
+ *
+ * Also, this function runs in atomic context, so __put_user() is not
+ * allowed to sleep. The page-fault handler detects that it is running
+ * in atomic context and will not try to take mmap_sem and handle the
+ * fault, so additional pagefault_enable()/disable() calls are not
+ * needed.
+ *
+ * The access can't be done via copy_to_user() here because
+ * vc_write_mem() must not use string instructions to access unsafe
+ * memory. The reason is that MOVS is emulated by the #VC handler by
+ * splitting the move up into a read and a write and taking a nested #VC
+ * exception on whatever of them is the MMIO access. Using string
+ * instructions here would cause infinite nesting.
+ */
+ switch (size) {
+ case 1: {
+ u8 d1;
+ u8 __user *target = (u8 __user *)dst;
+
+ memcpy(&d1, buf, 1);
+ if (__put_user(d1, target))
+ goto fault;
+ break;
+ }
+ case 2: {
+ u16 d2;
+ u16 __user *target = (u16 __user *)dst;
+
+ memcpy(&d2, buf, 2);
+ if (__put_user(d2, target))
+ goto fault;
+ break;
+ }
+ case 4: {
+ u32 d4;
+ u32 __user *target = (u32 __user *)dst;
+
+ memcpy(&d4, buf, 4);
+ if (__put_user(d4, target))
+ goto fault;
+ break;
+ }
+ case 8: {
+ u64 d8;
+ u64 __user *target = (u64 __user *)dst;
+
+ memcpy(&d8, buf, 8);
+ if (__put_user(d8, target))
+ goto fault;
+ break;
+ }
+ default:
+ WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+ return ES_UNSUPPORTED;
+ }
+
+ return ES_OK;
+
+fault:
+ if (user_mode(ctxt->regs))
+ error_code |= X86_PF_USER;
+
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = error_code;
+ ctxt->fi.cr2 = (unsigned long)dst;
+
+ return ES_EXCEPTION;
+}
+
+static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+ char *src, char *buf, size_t size)
+{
+ unsigned long error_code = X86_PF_PROT;
+
+ /*
+ * This function uses __get_user() independent of whether kernel or user
+ * memory is accessed. This works fine because __get_user() does no
+ * sanity checks of the pointer being accessed. All that it does is
+ * to report when the access failed.
+ *
+ * Also, this function runs in atomic context, so __get_user() is not
+ * allowed to sleep. The page-fault handler detects that it is running
+ * in atomic context and will not try to take mmap_sem and handle the
+ * fault, so additional pagefault_enable()/disable() calls are not
+ * needed.
+ *
+ * The access can't be done via copy_from_user() here because
+ * vc_read_mem() must not use string instructions to access unsafe
+ * memory. The reason is that MOVS is emulated by the #VC handler by
+ * splitting the move up into a read and a write and taking a nested #VC
+ * exception on whatever of them is the MMIO access. Using string
+ * instructions here would cause infinite nesting.
+ */
+ switch (size) {
+ case 1: {
+ u8 d1;
+ u8 __user *s = (u8 __user *)src;
+
+ if (__get_user(d1, s))
+ goto fault;
+ memcpy(buf, &d1, 1);
+ break;
+ }
+ case 2: {
+ u16 d2;
+ u16 __user *s = (u16 __user *)src;
+
+ if (__get_user(d2, s))
+ goto fault;
+ memcpy(buf, &d2, 2);
+ break;
+ }
+ case 4: {
+ u32 d4;
+ u32 __user *s = (u32 __user *)src;
+
+ if (__get_user(d4, s))
+ goto fault;
+ memcpy(buf, &d4, 4);
+ break;
+ }
+ case 8: {
+ u64 d8;
+ u64 __user *s = (u64 __user *)src;
+ if (__get_user(d8, s))
+ goto fault;
+ memcpy(buf, &d8, 8);
+ break;
+ }
+ default:
+ WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+ return ES_UNSUPPORTED;
+ }
+
+ return ES_OK;
+
+fault:
+ if (user_mode(ctxt->regs))
+ error_code |= X86_PF_USER;
+
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = error_code;
+ ctxt->fi.cr2 = (unsigned long)src;
+
+ return ES_EXCEPTION;
+}
+
+#define sev_printk(fmt, ...) printk(fmt, ##__VA_ARGS__)
+
+#include "vc-shared.c"
+
+/* Writes to the SVSM CAA MSR are ignored */
+static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write)
+{
+ if (write)
+ return ES_OK;
+
+ regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa));
+ regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa));
+
+ return ES_OK;
+}
+
+/*
+ * TSC related accesses should not exit to the hypervisor when a guest is
+ * executing with Secure TSC enabled, so special handling is required for
+ * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ.
+ */
+static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write)
+{
+ u64 tsc;
+
+ /*
+ * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled.
+ * Terminate the SNP guest when the interception is enabled.
+ */
+ if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ)
+ return ES_VMM_ERROR;
+
+ /*
+ * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC
+ * to return undefined values, so ignore all writes.
+ *
+ * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use
+ * the value returned by rdtsc_ordered().
+ */
+ if (write) {
+ WARN_ONCE(1, "TSC MSR writes are verboten!\n");
+ return ES_OK;
+ }
+
+ tsc = rdtsc_ordered();
+ regs->ax = lower_32_bits(tsc);
+ regs->dx = upper_32_bits(tsc);
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ struct pt_regs *regs = ctxt->regs;
+ enum es_result ret;
+ bool write;
+
+ /* Is it a WRMSR? */
+ write = ctxt->insn.opcode.bytes[1] == 0x30;
+
+ switch (regs->cx) {
+ case MSR_SVSM_CAA:
+ return __vc_handle_msr_caa(regs, write);
+ case MSR_IA32_TSC:
+ case MSR_AMD64_GUEST_TSC_FREQ:
+ if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
+ return __vc_handle_secure_tsc_msrs(regs, write);
+ break;
+ default:
+ break;
+ }
+
+ ghcb_set_rcx(ghcb, regs->cx);
+ if (write) {
+ ghcb_set_rax(ghcb, regs->ax);
+ ghcb_set_rdx(ghcb, regs->dx);
+ }
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0);
+
+ if ((ret == ES_OK) && !write) {
+ regs->ax = ghcb->save.rax;
+ regs->dx = ghcb->save.rdx;
+ }
+
+ return ret;
+}
+
+static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
+{
+ int trapnr = ctxt->fi.vector;
+
+ if (trapnr == X86_TRAP_PF)
+ native_write_cr2(ctxt->fi.cr2);
+
+ ctxt->regs->orig_ax = ctxt->fi.error_code;
+ do_early_exception(ctxt->regs, trapnr);
+}
+
+static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
+{
+ long *reg_array;
+ int offset;
+
+ reg_array = (long *)ctxt->regs;
+ offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
+
+ if (offset < 0)
+ return NULL;
+
+ offset /= sizeof(long);
+
+ return reg_array + offset;
+}
+static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+ unsigned int bytes, bool read)
+{
+ u64 exit_code, exit_info_1, exit_info_2;
+ unsigned long ghcb_pa = __pa(ghcb);
+ enum es_result res;
+ phys_addr_t paddr;
+ void __user *ref;
+
+ ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
+ if (ref == (void __user *)-1L)
+ return ES_UNSUPPORTED;
+
+ exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
+
+ res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
+ if (res != ES_OK) {
+ if (res == ES_EXCEPTION && !read)
+ ctxt->fi.error_code |= X86_PF_WRITE;
+
+ return res;
+ }
+
+ exit_info_1 = paddr;
+ /* Can never be greater than 8 */
+ exit_info_2 = bytes;
+
+ ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
+
+ return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
+}
+
+/*
+ * The MOVS instruction has two memory operands, which raises the
+ * problem that it is not known whether the access to the source or the
+ * destination caused the #VC exception (and hence whether an MMIO read
+ * or write operation needs to be emulated).
+ *
+ * Instead of playing games with walking page-tables and trying to guess
+ * whether the source or destination is an MMIO range, split the move
+ * into two operations, a read and a write with only one memory operand.
+ * This will cause a nested #VC exception on the MMIO address which can
+ * then be handled.
+ *
+ * This implementation has the benefit that it also supports MOVS where
+ * source _and_ destination are MMIO regions.
+ *
+ * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
+ * rare operation. If it turns out to be a performance problem the split
+ * operations can be moved to memcpy_fromio() and memcpy_toio().
+ */
+static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
+ unsigned int bytes)
+{
+ unsigned long ds_base, es_base;
+ unsigned char *src, *dst;
+ unsigned char buffer[8];
+ enum es_result ret;
+ bool rep;
+ int off;
+
+ ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
+ es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
+
+ if (ds_base == -1L || es_base == -1L) {
+ ctxt->fi.vector = X86_TRAP_GP;
+ ctxt->fi.error_code = 0;
+ return ES_EXCEPTION;
+ }
+
+ src = ds_base + (unsigned char *)ctxt->regs->si;
+ dst = es_base + (unsigned char *)ctxt->regs->di;
+
+ ret = vc_read_mem(ctxt, src, buffer, bytes);
+ if (ret != ES_OK)
+ return ret;
+
+ ret = vc_write_mem(ctxt, dst, buffer, bytes);
+ if (ret != ES_OK)
+ return ret;
+
+ if (ctxt->regs->flags & X86_EFLAGS_DF)
+ off = -bytes;
+ else
+ off = bytes;
+
+ ctxt->regs->si += off;
+ ctxt->regs->di += off;
+
+ rep = insn_has_rep_prefix(&ctxt->insn);
+ if (rep)
+ ctxt->regs->cx -= 1;
+
+ if (!rep || ctxt->regs->cx == 0)
+ return ES_OK;
+ else
+ return ES_RETRY;
+}
+
+static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ struct insn *insn = &ctxt->insn;
+ enum insn_mmio_type mmio;
+ unsigned int bytes = 0;
+ enum es_result ret;
+ u8 sign_byte;
+ long *reg_data;
+
+ mmio = insn_decode_mmio(insn, &bytes);
+ if (mmio == INSN_MMIO_DECODE_FAILED)
+ return ES_DECODE_FAILED;
+
+ if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) {
+ reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs);
+ if (!reg_data)
+ return ES_DECODE_FAILED;
+ }
+
+ if (user_mode(ctxt->regs))
+ return ES_UNSUPPORTED;
+
+ switch (mmio) {
+ case INSN_MMIO_WRITE:
+ memcpy(ghcb->shared_buffer, reg_data, bytes);
+ ret = vc_do_mmio(ghcb, ctxt, bytes, false);
+ break;
+ case INSN_MMIO_WRITE_IMM:
+ memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
+ ret = vc_do_mmio(ghcb, ctxt, bytes, false);
+ break;
+ case INSN_MMIO_READ:
+ ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+ if (ret)
+ break;
+
+ /* Zero-extend for 32-bit operation */
+ if (bytes == 4)
+ *reg_data = 0;
+
+ memcpy(reg_data, ghcb->shared_buffer, bytes);
+ break;
+ case INSN_MMIO_READ_ZERO_EXTEND:
+ ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+ if (ret)
+ break;
+
+ /* Zero extend based on operand size */
+ memset(reg_data, 0, insn->opnd_bytes);
+ memcpy(reg_data, ghcb->shared_buffer, bytes);
+ break;
+ case INSN_MMIO_READ_SIGN_EXTEND:
+ ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+ if (ret)
+ break;
+
+ if (bytes == 1) {
+ u8 *val = (u8 *)ghcb->shared_buffer;
+
+ sign_byte = (*val & 0x80) ? 0xff : 0x00;
+ } else {
+ u16 *val = (u16 *)ghcb->shared_buffer;
+
+ sign_byte = (*val & 0x8000) ? 0xff : 0x00;
+ }
+
+ /* Sign extend based on operand size */
+ memset(reg_data, sign_byte, insn->opnd_bytes);
+ memcpy(reg_data, ghcb->shared_buffer, bytes);
+ break;
+ case INSN_MMIO_MOVS:
+ ret = vc_handle_mmio_movs(ctxt, bytes);
+ break;
+ default:
+ ret = ES_UNSUPPORTED;
+ break;
+ }
+
+ return ret;
+}
+
+static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+ long val, *reg = vc_insn_get_rm(ctxt);
+ enum es_result ret;
+
+ if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
+ return ES_VMM_ERROR;
+
+ if (!reg)
+ return ES_DECODE_FAILED;
+
+ val = *reg;
+
+ /* Upper 32 bits must be written as zeroes */
+ if (val >> 32) {
+ ctxt->fi.vector = X86_TRAP_GP;
+ ctxt->fi.error_code = 0;
+ return ES_EXCEPTION;
+ }
+
+ /* Clear out other reserved bits and set bit 10 */
+ val = (val & 0xffff23ffL) | BIT(10);
+
+ /* Early non-zero writes to DR7 are not supported */
+ if (!data && (val & ~DR7_RESET_VALUE))
+ return ES_UNSUPPORTED;
+
+ /* Using a value of 0 for ExitInfo1 means RAX holds the value */
+ ghcb_set_rax(ghcb, val);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (data)
+ data->dr7 = val;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+ long *reg = vc_insn_get_rm(ctxt);
+
+ if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
+ return ES_VMM_ERROR;
+
+ if (!reg)
+ return ES_DECODE_FAILED;
+
+ if (data)
+ *reg = data->dr7;
+ else
+ *reg = DR7_RESET_VALUE;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
+}
+
+static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ enum es_result ret;
+
+ ghcb_set_rcx(ghcb, ctxt->regs->cx);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
+ return ES_VMM_ERROR;
+
+ ctxt->regs->ax = ghcb->save.rax;
+ ctxt->regs->dx = ghcb->save.rdx;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_monitor(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ /*
+ * Treat it as a NOP and do not leak a physical address to the
+ * hypervisor.
+ */
+ return ES_OK;
+}
+
+static enum es_result vc_handle_mwait(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ /* Treat the same as MONITOR/MONITORX */
+ return ES_OK;
+}
+
+static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ enum es_result ret;
+
+ ghcb_set_rax(ghcb, ctxt->regs->ax);
+ ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
+
+ if (x86_platform.hyper.sev_es_hcall_prepare)
+ x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!ghcb_rax_is_valid(ghcb))
+ return ES_VMM_ERROR;
+
+ ctxt->regs->ax = ghcb->save.rax;
+
+ /*
+ * Call sev_es_hcall_finish() after regs->ax is already set.
+ * This allows the hypervisor handler to overwrite it again if
+ * necessary.
+ */
+ if (x86_platform.hyper.sev_es_hcall_finish &&
+ !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
+ return ES_VMM_ERROR;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ /*
+ * Calling ecx_alignment_check() directly does not work, because it
+ * enables IRQs and the GHCB is active. Forward the exception and call
+ * it later from vc_forward_exception().
+ */
+ ctxt->fi.vector = X86_TRAP_AC;
+ ctxt->fi.error_code = 0;
+ return ES_EXCEPTION;
+}
+
+static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
+ struct ghcb *ghcb,
+ unsigned long exit_code)
+{
+ enum es_result result = vc_check_opcode_bytes(ctxt, exit_code);
+
+ if (result != ES_OK)
+ return result;
+
+ switch (exit_code) {
+ case SVM_EXIT_READ_DR7:
+ result = vc_handle_dr7_read(ghcb, ctxt);
+ break;
+ case SVM_EXIT_WRITE_DR7:
+ result = vc_handle_dr7_write(ghcb, ctxt);
+ break;
+ case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
+ result = vc_handle_trap_ac(ghcb, ctxt);
+ break;
+ case SVM_EXIT_RDTSC:
+ case SVM_EXIT_RDTSCP:
+ result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
+ break;
+ case SVM_EXIT_RDPMC:
+ result = vc_handle_rdpmc(ghcb, ctxt);
+ break;
+ case SVM_EXIT_INVD:
+ pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
+ result = ES_UNSUPPORTED;
+ break;
+ case SVM_EXIT_CPUID:
+ result = vc_handle_cpuid(ghcb, ctxt);
+ break;
+ case SVM_EXIT_IOIO:
+ result = vc_handle_ioio(ghcb, ctxt);
+ break;
+ case SVM_EXIT_MSR:
+ result = vc_handle_msr(ghcb, ctxt);
+ break;
+ case SVM_EXIT_VMMCALL:
+ result = vc_handle_vmmcall(ghcb, ctxt);
+ break;
+ case SVM_EXIT_WBINVD:
+ result = vc_handle_wbinvd(ghcb, ctxt);
+ break;
+ case SVM_EXIT_MONITOR:
+ result = vc_handle_monitor(ghcb, ctxt);
+ break;
+ case SVM_EXIT_MWAIT:
+ result = vc_handle_mwait(ghcb, ctxt);
+ break;
+ case SVM_EXIT_NPF:
+ result = vc_handle_mmio(ghcb, ctxt);
+ break;
+ default:
+ /*
+ * Unexpected #VC exception
+ */
+ result = ES_UNSUPPORTED;
+ }
+
+ return result;
+}
+
+static __always_inline bool is_vc2_stack(unsigned long sp)
+{
+ return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
+}
+
+static __always_inline bool vc_from_invalid_context(struct pt_regs *regs)
+{
+ unsigned long sp, prev_sp;
+
+ sp = (unsigned long)regs;
+ prev_sp = regs->sp;
+
+ /*
+ * If the code was already executing on the VC2 stack when the #VC
+ * happened, let it proceed to the normal handling routine. This way the
+ * code executing on the VC2 stack can cause #VC exceptions to get handled.
+ */
+ return is_vc2_stack(sp) && !is_vc2_stack(prev_sp);
+}
+
+static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
+{
+ struct ghcb_state state;
+ struct es_em_ctxt ctxt;
+ enum es_result result;
+ struct ghcb *ghcb;
+ bool ret = true;
+
+ ghcb = __sev_get_ghcb(&state);
+
+ vc_ghcb_invalidate(ghcb);
+ result = vc_init_em_ctxt(&ctxt, regs, error_code);
+
+ if (result == ES_OK)
+ result = vc_handle_exitcode(&ctxt, ghcb, error_code);
+
+ __sev_put_ghcb(&state);
+
+ /* Done - now check the result */
+ switch (result) {
+ case ES_OK:
+ vc_finish_insn(&ctxt);
+ break;
+ case ES_UNSUPPORTED:
+ pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
+ error_code, regs->ip);
+ ret = false;
+ break;
+ case ES_VMM_ERROR:
+ pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
+ error_code, regs->ip);
+ ret = false;
+ break;
+ case ES_DECODE_FAILED:
+ pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
+ error_code, regs->ip);
+ ret = false;
+ break;
+ case ES_EXCEPTION:
+ vc_forward_exception(&ctxt);
+ break;
+ case ES_RETRY:
+ /* Nothing to do */
+ break;
+ default:
+ pr_emerg("Unknown result in %s():%d\n", __func__, result);
+ /*
+ * Emulating the instruction which caused the #VC exception
+ * failed - can't continue so print debug information
+ */
+ BUG();
+ }
+
+ return ret;
+}
+
+static __always_inline bool vc_is_db(unsigned long error_code)
+{
+ return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
+}
+
+/*
+ * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
+ * and will panic when an error happens.
+ */
+DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
+{
+ irqentry_state_t irq_state;
+
+ /*
+ * With the current implementation it is always possible to switch to a
+ * safe stack because #VC exceptions only happen at known places, like
+ * intercepted instructions or accesses to MMIO areas/IO ports. They can
+ * also happen with code instrumentation when the hypervisor intercepts
+ * #DB, but the critical paths are forbidden to be instrumented, so #DB
+ * exceptions currently also only happen in safe places.
+ *
+ * But keep this here in case the noinstr annotations are violated due
+ * to bug elsewhere.
+ */
+ if (unlikely(vc_from_invalid_context(regs))) {
+ instrumentation_begin();
+ panic("Can't handle #VC exception from unsupported context\n");
+ instrumentation_end();
+ }
+
+ /*
+ * Handle #DB before calling into !noinstr code to avoid recursive #DB.
+ */
+ if (vc_is_db(error_code)) {
+ exc_debug(regs);
+ return;
+ }
+
+ irq_state = irqentry_nmi_enter(regs);
+
+ instrumentation_begin();
+
+ if (!vc_raw_handle_exception(regs, error_code)) {
+ /* Show some debug info */
+ show_regs(regs);
+
+ /* Ask hypervisor to sev_es_terminate */
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+
+ /* If that fails and we get here - just panic */
+ panic("Returned from Terminate-Request to Hypervisor\n");
+ }
+
+ instrumentation_end();
+ irqentry_nmi_exit(regs, irq_state);
+}
+
+/*
+ * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
+ * and will kill the current task with SIGBUS when an error happens.
+ */
+DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
+{
+ /*
+ * Handle #DB before calling into !noinstr code to avoid recursive #DB.
+ */
+ if (vc_is_db(error_code)) {
+ noist_exc_debug(regs);
+ return;
+ }
+
+ irqentry_enter_from_user_mode(regs);
+ instrumentation_begin();
+
+ if (!vc_raw_handle_exception(regs, error_code)) {
+ /*
+ * Do not kill the machine if user-space triggered the
+ * exception. Send SIGBUS instead and let user-space deal with
+ * it.
+ */
+ force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
+ }
+
+ instrumentation_end();
+ irqentry_exit_to_user_mode(regs);
+}
+
+bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
+{
+ unsigned long exit_code = regs->orig_ax;
+ struct es_em_ctxt ctxt;
+ enum es_result result;
+
+ vc_ghcb_invalidate(boot_ghcb);
+
+ result = vc_init_em_ctxt(&ctxt, regs, exit_code);
+ if (result == ES_OK)
+ result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
+
+ /* Done - now check the result */
+ switch (result) {
+ case ES_OK:
+ vc_finish_insn(&ctxt);
+ break;
+ case ES_UNSUPPORTED:
+ early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
+ exit_code, regs->ip);
+ goto fail;
+ case ES_VMM_ERROR:
+ early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
+ exit_code, regs->ip);
+ goto fail;
+ case ES_DECODE_FAILED:
+ early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
+ exit_code, regs->ip);
+ goto fail;
+ case ES_EXCEPTION:
+ vc_early_forward_exception(&ctxt);
+ break;
+ case ES_RETRY:
+ /* Nothing to do */
+ break;
+ default:
+ BUG();
+ }
+
+ return true;
+
+fail:
+ show_regs(regs);
+
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+}
+
diff --git a/arch/x86/coco/sev/vc-shared.c b/arch/x86/coco/sev/vc-shared.c
new file mode 100644
index 000000000000..2c0ab0fdc060
--- /dev/null
+++ b/arch/x86/coco/sev/vc-shared.c
@@ -0,0 +1,504 @@
+// SPDX-License-Identifier: GPL-2.0
+
+static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt,
+ unsigned long exit_code)
+{
+ unsigned int opcode = (unsigned int)ctxt->insn.opcode.value;
+ u8 modrm = ctxt->insn.modrm.value;
+
+ switch (exit_code) {
+
+ case SVM_EXIT_IOIO:
+ case SVM_EXIT_NPF:
+ /* handled separately */
+ return ES_OK;
+
+ case SVM_EXIT_CPUID:
+ if (opcode == 0xa20f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_INVD:
+ if (opcode == 0x080f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_MONITOR:
+ /* MONITOR and MONITORX instructions generate the same error code */
+ if (opcode == 0x010f && (modrm == 0xc8 || modrm == 0xfa))
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_MWAIT:
+ /* MWAIT and MWAITX instructions generate the same error code */
+ if (opcode == 0x010f && (modrm == 0xc9 || modrm == 0xfb))
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_MSR:
+ /* RDMSR */
+ if (opcode == 0x320f ||
+ /* WRMSR */
+ opcode == 0x300f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_RDPMC:
+ if (opcode == 0x330f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_RDTSC:
+ if (opcode == 0x310f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_RDTSCP:
+ if (opcode == 0x010f && modrm == 0xf9)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_READ_DR7:
+ if (opcode == 0x210f &&
+ X86_MODRM_REG(ctxt->insn.modrm.value) == 7)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_VMMCALL:
+ if (opcode == 0x010f && modrm == 0xd9)
+ return ES_OK;
+
+ break;
+
+ case SVM_EXIT_WRITE_DR7:
+ if (opcode == 0x230f &&
+ X86_MODRM_REG(ctxt->insn.modrm.value) == 7)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_WBINVD:
+ if (opcode == 0x90f)
+ return ES_OK;
+ break;
+
+ default:
+ break;
+ }
+
+ sev_printk(KERN_ERR "Wrong/unhandled opcode bytes: 0x%x, exit_code: 0x%lx, rIP: 0x%lx\n",
+ opcode, exit_code, ctxt->regs->ip);
+
+ return ES_UNSUPPORTED;
+}
+
+static bool vc_decoding_needed(unsigned long exit_code)
+{
+ /* Exceptions don't require to decode the instruction */
+ return !(exit_code >= SVM_EXIT_EXCP_BASE &&
+ exit_code <= SVM_EXIT_LAST_EXCP);
+}
+
+static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt,
+ struct pt_regs *regs,
+ unsigned long exit_code)
+{
+ enum es_result ret = ES_OK;
+
+ memset(ctxt, 0, sizeof(*ctxt));
+ ctxt->regs = regs;
+
+ if (vc_decoding_needed(exit_code))
+ ret = vc_decode_insn(ctxt);
+
+ return ret;
+}
+
+static void vc_finish_insn(struct es_em_ctxt *ctxt)
+{
+ ctxt->regs->ip += ctxt->insn.length;
+}
+
+static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt,
+ unsigned long address,
+ bool write)
+{
+ if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_USER;
+ ctxt->fi.cr2 = address;
+ if (write)
+ ctxt->fi.error_code |= X86_PF_WRITE;
+
+ return ES_EXCEPTION;
+ }
+
+ return ES_OK;
+}
+
+static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
+ void *src, char *buf,
+ unsigned int data_size,
+ unsigned int count,
+ bool backwards)
+{
+ int i, b = backwards ? -1 : 1;
+ unsigned long address = (unsigned long)src;
+ enum es_result ret;
+
+ ret = vc_insn_string_check(ctxt, address, false);
+ if (ret != ES_OK)
+ return ret;
+
+ for (i = 0; i < count; i++) {
+ void *s = src + (i * data_size * b);
+ char *d = buf + (i * data_size);
+
+ ret = vc_read_mem(ctxt, s, d, data_size);
+ if (ret != ES_OK)
+ break;
+ }
+
+ return ret;
+}
+
+static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
+ void *dst, char *buf,
+ unsigned int data_size,
+ unsigned int count,
+ bool backwards)
+{
+ int i, s = backwards ? -1 : 1;
+ unsigned long address = (unsigned long)dst;
+ enum es_result ret;
+
+ ret = vc_insn_string_check(ctxt, address, true);
+ if (ret != ES_OK)
+ return ret;
+
+ for (i = 0; i < count; i++) {
+ void *d = dst + (i * data_size * s);
+ char *b = buf + (i * data_size);
+
+ ret = vc_write_mem(ctxt, d, b, data_size);
+ if (ret != ES_OK)
+ break;
+ }
+
+ return ret;
+}
+
+#define IOIO_TYPE_STR BIT(2)
+#define IOIO_TYPE_IN 1
+#define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR)
+#define IOIO_TYPE_OUT 0
+#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
+
+#define IOIO_REP BIT(3)
+
+#define IOIO_ADDR_64 BIT(9)
+#define IOIO_ADDR_32 BIT(8)
+#define IOIO_ADDR_16 BIT(7)
+
+#define IOIO_DATA_32 BIT(6)
+#define IOIO_DATA_16 BIT(5)
+#define IOIO_DATA_8 BIT(4)
+
+#define IOIO_SEG_ES (0 << 10)
+#define IOIO_SEG_DS (3 << 10)
+
+static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
+{
+ struct insn *insn = &ctxt->insn;
+ size_t size;
+ u64 port;
+
+ *exitinfo = 0;
+
+ switch (insn->opcode.bytes[0]) {
+ /* INS opcodes */
+ case 0x6c:
+ case 0x6d:
+ *exitinfo |= IOIO_TYPE_INS;
+ *exitinfo |= IOIO_SEG_ES;
+ port = ctxt->regs->dx & 0xffff;
+ break;
+
+ /* OUTS opcodes */
+ case 0x6e:
+ case 0x6f:
+ *exitinfo |= IOIO_TYPE_OUTS;
+ *exitinfo |= IOIO_SEG_DS;
+ port = ctxt->regs->dx & 0xffff;
+ break;
+
+ /* IN immediate opcodes */
+ case 0xe4:
+ case 0xe5:
+ *exitinfo |= IOIO_TYPE_IN;
+ port = (u8)insn->immediate.value & 0xffff;
+ break;
+
+ /* OUT immediate opcodes */
+ case 0xe6:
+ case 0xe7:
+ *exitinfo |= IOIO_TYPE_OUT;
+ port = (u8)insn->immediate.value & 0xffff;
+ break;
+
+ /* IN register opcodes */
+ case 0xec:
+ case 0xed:
+ *exitinfo |= IOIO_TYPE_IN;
+ port = ctxt->regs->dx & 0xffff;
+ break;
+
+ /* OUT register opcodes */
+ case 0xee:
+ case 0xef:
+ *exitinfo |= IOIO_TYPE_OUT;
+ port = ctxt->regs->dx & 0xffff;
+ break;
+
+ default:
+ return ES_DECODE_FAILED;
+ }
+
+ *exitinfo |= port << 16;
+
+ switch (insn->opcode.bytes[0]) {
+ case 0x6c:
+ case 0x6e:
+ case 0xe4:
+ case 0xe6:
+ case 0xec:
+ case 0xee:
+ /* Single byte opcodes */
+ *exitinfo |= IOIO_DATA_8;
+ size = 1;
+ break;
+ default:
+ /* Length determined by instruction parsing */
+ *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
+ : IOIO_DATA_32;
+ size = (insn->opnd_bytes == 2) ? 2 : 4;
+ }
+
+ switch (insn->addr_bytes) {
+ case 2:
+ *exitinfo |= IOIO_ADDR_16;
+ break;
+ case 4:
+ *exitinfo |= IOIO_ADDR_32;
+ break;
+ case 8:
+ *exitinfo |= IOIO_ADDR_64;
+ break;
+ }
+
+ if (insn_has_rep_prefix(insn))
+ *exitinfo |= IOIO_REP;
+
+ return vc_ioio_check(ctxt, (u16)port, size);
+}
+
+static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ struct pt_regs *regs = ctxt->regs;
+ u64 exit_info_1, exit_info_2;
+ enum es_result ret;
+
+ ret = vc_ioio_exitinfo(ctxt, &exit_info_1);
+ if (ret != ES_OK)
+ return ret;
+
+ if (exit_info_1 & IOIO_TYPE_STR) {
+
+ /* (REP) INS/OUTS */
+
+ bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF);
+ unsigned int io_bytes, exit_bytes;
+ unsigned int ghcb_count, op_count;
+ unsigned long es_base;
+ u64 sw_scratch;
+
+ /*
+ * For the string variants with rep prefix the amount of in/out
+ * operations per #VC exception is limited so that the kernel
+ * has a chance to take interrupts and re-schedule while the
+ * instruction is emulated.
+ */
+ io_bytes = (exit_info_1 >> 4) & 0x7;
+ ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes;
+
+ op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1;
+ exit_info_2 = min(op_count, ghcb_count);
+ exit_bytes = exit_info_2 * io_bytes;
+
+ es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
+
+ /* Read bytes of OUTS into the shared buffer */
+ if (!(exit_info_1 & IOIO_TYPE_IN)) {
+ ret = vc_insn_string_read(ctxt,
+ (void *)(es_base + regs->si),
+ ghcb->shared_buffer, io_bytes,
+ exit_info_2, df);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Issue an VMGEXIT to the HV to consume the bytes from the
+ * shared buffer or to have it write them into the shared buffer
+ * depending on the instruction: OUTS or INS.
+ */
+ sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
+ ghcb_set_sw_scratch(ghcb, sw_scratch);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
+ exit_info_1, exit_info_2);
+ if (ret != ES_OK)
+ return ret;
+
+ /* Read bytes from shared buffer into the guest's destination. */
+ if (exit_info_1 & IOIO_TYPE_IN) {
+ ret = vc_insn_string_write(ctxt,
+ (void *)(es_base + regs->di),
+ ghcb->shared_buffer, io_bytes,
+ exit_info_2, df);
+ if (ret)
+ return ret;
+
+ if (df)
+ regs->di -= exit_bytes;
+ else
+ regs->di += exit_bytes;
+ } else {
+ if (df)
+ regs->si -= exit_bytes;
+ else
+ regs->si += exit_bytes;
+ }
+
+ if (exit_info_1 & IOIO_REP)
+ regs->cx -= exit_info_2;
+
+ ret = regs->cx ? ES_RETRY : ES_OK;
+
+ } else {
+
+ /* IN/OUT into/from rAX */
+
+ int bits = (exit_info_1 & 0x70) >> 1;
+ u64 rax = 0;
+
+ if (!(exit_info_1 & IOIO_TYPE_IN))
+ rax = lower_bits(regs->ax, bits);
+
+ ghcb_set_rax(ghcb, rax);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (exit_info_1 & IOIO_TYPE_IN) {
+ if (!ghcb_rax_is_valid(ghcb))
+ return ES_VMM_ERROR;
+ regs->ax = lower_bits(ghcb->save.rax, bits);
+ }
+ }
+
+ return ret;
+}
+
+static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ struct pt_regs *regs = ctxt->regs;
+ struct cpuid_leaf leaf;
+ int ret;
+
+ leaf.fn = regs->ax;
+ leaf.subfn = regs->cx;
+ ret = snp_cpuid(ghcb, ctxt, &leaf);
+ if (!ret) {
+ regs->ax = leaf.eax;
+ regs->bx = leaf.ebx;
+ regs->cx = leaf.ecx;
+ regs->dx = leaf.edx;
+ }
+
+ return ret;
+}
+
+static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ struct pt_regs *regs = ctxt->regs;
+ u32 cr4 = native_read_cr4();
+ enum es_result ret;
+ int snp_cpuid_ret;
+
+ snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt);
+ if (!snp_cpuid_ret)
+ return ES_OK;
+ if (snp_cpuid_ret != -EOPNOTSUPP)
+ return ES_VMM_ERROR;
+
+ ghcb_set_rax(ghcb, regs->ax);
+ ghcb_set_rcx(ghcb, regs->cx);
+
+ if (cr4 & X86_CR4_OSXSAVE)
+ /* Safe to read xcr0 */
+ ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
+ else
+ /* xgetbv will cause #GP - use reset value for xcr0 */
+ ghcb_set_xcr0(ghcb, 1);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) &&
+ ghcb_rbx_is_valid(ghcb) &&
+ ghcb_rcx_is_valid(ghcb) &&
+ ghcb_rdx_is_valid(ghcb)))
+ return ES_VMM_ERROR;
+
+ regs->ax = ghcb->save.rax;
+ regs->bx = ghcb->save.rbx;
+ regs->cx = ghcb->save.rcx;
+ regs->dx = ghcb->save.rdx;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ unsigned long exit_code)
+{
+ bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
+ enum es_result ret;
+
+ /*
+ * The hypervisor should not be intercepting RDTSC/RDTSCP when Secure
+ * TSC is enabled. A #VC exception will be generated if the RDTSC/RDTSCP
+ * instructions are being intercepted. If this should occur and Secure
+ * TSC is enabled, guest execution should be terminated as the guest
+ * cannot rely on the TSC value provided by the hypervisor.
+ */
+ if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
+ return ES_VMM_ERROR;
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) &&
+ (!rdtscp || ghcb_rcx_is_valid(ghcb))))
+ return ES_VMM_ERROR;
+
+ ctxt->regs->ax = ghcb->save.rax;
+ ctxt->regs->dx = ghcb->save.rdx;
+ if (rdtscp)
+ ctxt->regs->cx = ghcb->save.rcx;
+
+ return ES_OK;
+}
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 91801138b10b..7cd2f395f301 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,7 +1,6 @@
CONFIG_WERROR=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_USELIB=y
CONFIG_AUDIT=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index 3d948f10c94c..56cfdc79e2c6 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -4,7 +4,7 @@ menu "Accelerated Cryptographic Algorithms for CPU (x86)"
config CRYPTO_CURVE25519_X86
tristate
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_KPP
select CRYPTO_LIB_CURVE25519_GENERIC
select CRYPTO_ARCH_HAVE_LIB_CURVE25519
@@ -17,13 +17,11 @@ config CRYPTO_CURVE25519_X86
config CRYPTO_AES_NI_INTEL
tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XCTR, XTS, GCM (AES-NI/VAES)"
- depends on X86
select CRYPTO_AEAD
select CRYPTO_LIB_AES
select CRYPTO_LIB_GF128MUL
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
- select CRYPTO_SIMD
help
Block cipher: AES cipher algorithms
AEAD cipher: AES with GCM
@@ -38,7 +36,7 @@ config CRYPTO_AES_NI_INTEL
config CRYPTO_BLOWFISH_X86_64
tristate "Ciphers: Blowfish, modes: ECB, CBC"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_BLOWFISH_COMMON
imply CRYPTO_CTR
@@ -50,7 +48,7 @@ config CRYPTO_BLOWFISH_X86_64
config CRYPTO_CAMELLIA_X86_64
tristate "Ciphers: Camellia with modes: ECB, CBC"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
imply CRYPTO_CTR
help
@@ -61,10 +59,9 @@ config CRYPTO_CAMELLIA_X86_64
config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
tristate "Ciphers: Camellia with modes: ECB, CBC (AES-NI/AVX)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_CAMELLIA_X86_64
- select CRYPTO_SIMD
imply CRYPTO_XTS
help
Length-preserving ciphers: Camellia with ECB and CBC modes
@@ -75,7 +72,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64
tristate "Ciphers: Camellia with modes: ECB, CBC (AES-NI/AVX2)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_CAMELLIA_AESNI_AVX_X86_64
help
Length-preserving ciphers: Camellia with ECB and CBC modes
@@ -86,11 +83,10 @@ config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64
config CRYPTO_CAST5_AVX_X86_64
tristate "Ciphers: CAST5 with modes: ECB, CBC (AVX)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_CAST5
select CRYPTO_CAST_COMMON
- select CRYPTO_SIMD
imply CRYPTO_CTR
help
Length-preserving ciphers: CAST5 (CAST-128) cipher algorithm
@@ -103,11 +99,10 @@ config CRYPTO_CAST5_AVX_X86_64
config CRYPTO_CAST6_AVX_X86_64
tristate "Ciphers: CAST6 with modes: ECB, CBC (AVX)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_CAST6
select CRYPTO_CAST_COMMON
- select CRYPTO_SIMD
imply CRYPTO_XTS
imply CRYPTO_CTR
help
@@ -121,7 +116,7 @@ config CRYPTO_CAST6_AVX_X86_64
config CRYPTO_DES3_EDE_X86_64
tristate "Ciphers: Triple DES EDE with modes: ECB, CBC"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_LIB_DES
imply CRYPTO_CTR
@@ -135,10 +130,9 @@ config CRYPTO_DES3_EDE_X86_64
config CRYPTO_SERPENT_SSE2_X86_64
tristate "Ciphers: Serpent with modes: ECB, CBC (SSE2)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_SERPENT
- select CRYPTO_SIMD
imply CRYPTO_CTR
help
Length-preserving ciphers: Serpent cipher algorithm
@@ -151,10 +145,9 @@ config CRYPTO_SERPENT_SSE2_X86_64
config CRYPTO_SERPENT_SSE2_586
tristate "Ciphers: Serpent with modes: ECB, CBC (32-bit with SSE2)"
- depends on X86 && !64BIT
+ depends on !64BIT
select CRYPTO_SKCIPHER
select CRYPTO_SERPENT
- select CRYPTO_SIMD
imply CRYPTO_CTR
help
Length-preserving ciphers: Serpent cipher algorithm
@@ -167,10 +160,9 @@ config CRYPTO_SERPENT_SSE2_586
config CRYPTO_SERPENT_AVX_X86_64
tristate "Ciphers: Serpent with modes: ECB, CBC (AVX)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_SERPENT
- select CRYPTO_SIMD
imply CRYPTO_XTS
imply CRYPTO_CTR
help
@@ -184,7 +176,7 @@ config CRYPTO_SERPENT_AVX_X86_64
config CRYPTO_SERPENT_AVX2_X86_64
tristate "Ciphers: Serpent with modes: ECB, CBC (AVX2)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SERPENT_AVX_X86_64
help
Length-preserving ciphers: Serpent cipher algorithm
@@ -197,9 +189,8 @@ config CRYPTO_SERPENT_AVX2_X86_64
config CRYPTO_SM4_AESNI_AVX_X86_64
tristate "Ciphers: SM4 with modes: ECB, CBC, CTR (AES-NI/AVX)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
- select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_SM4
help
@@ -218,9 +209,8 @@ config CRYPTO_SM4_AESNI_AVX_X86_64
config CRYPTO_SM4_AESNI_AVX2_X86_64
tristate "Ciphers: SM4 with modes: ECB, CBC, CTR (AES-NI/AVX2)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
- select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_SM4
select CRYPTO_SM4_AESNI_AVX_X86_64
@@ -240,7 +230,7 @@ config CRYPTO_SM4_AESNI_AVX2_X86_64
config CRYPTO_TWOFISH_586
tristate "Ciphers: Twofish (32-bit)"
- depends on (X86 || UML_X86) && !64BIT
+ depends on !64BIT
select CRYPTO_ALGAPI
select CRYPTO_TWOFISH_COMMON
imply CRYPTO_CTR
@@ -251,7 +241,7 @@ config CRYPTO_TWOFISH_586
config CRYPTO_TWOFISH_X86_64
tristate "Ciphers: Twofish"
- depends on (X86 || UML_X86) && 64BIT
+ depends on 64BIT
select CRYPTO_ALGAPI
select CRYPTO_TWOFISH_COMMON
imply CRYPTO_CTR
@@ -262,7 +252,7 @@ config CRYPTO_TWOFISH_X86_64
config CRYPTO_TWOFISH_X86_64_3WAY
tristate "Ciphers: Twofish with modes: ECB, CBC (3-way parallel)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_TWOFISH_COMMON
select CRYPTO_TWOFISH_X86_64
@@ -277,9 +267,8 @@ config CRYPTO_TWOFISH_X86_64_3WAY
config CRYPTO_TWOFISH_AVX_X86_64
tristate "Ciphers: Twofish with modes: ECB, CBC (AVX)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
- select CRYPTO_SIMD
select CRYPTO_TWOFISH_COMMON
select CRYPTO_TWOFISH_X86_64
select CRYPTO_TWOFISH_X86_64_3WAY
@@ -295,9 +284,8 @@ config CRYPTO_TWOFISH_AVX_X86_64
config CRYPTO_ARIA_AESNI_AVX_X86_64
tristate "Ciphers: ARIA with modes: ECB, CTR (AES-NI/AVX/GFNI)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
- select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_ARIA
help
@@ -313,9 +301,8 @@ config CRYPTO_ARIA_AESNI_AVX_X86_64
config CRYPTO_ARIA_AESNI_AVX2_X86_64
tristate "Ciphers: ARIA with modes: ECB, CTR (AES-NI/AVX2/GFNI)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SKCIPHER
- select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_ARIA
select CRYPTO_ARIA_AESNI_AVX_X86_64
@@ -332,9 +319,8 @@ config CRYPTO_ARIA_AESNI_AVX2_X86_64
config CRYPTO_ARIA_GFNI_AVX512_X86_64
tristate "Ciphers: ARIA with modes: ECB, CTR (AVX512/GFNI)"
- depends on X86 && 64BIT && AS_AVX512 && AS_GFNI
+ depends on 64BIT && AS_GFNI
select CRYPTO_SKCIPHER
- select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_ARIA
select CRYPTO_ARIA_AESNI_AVX_X86_64
@@ -349,27 +335,10 @@ config CRYPTO_ARIA_GFNI_AVX512_X86_64
Processes 64 blocks in parallel.
-config CRYPTO_CHACHA20_X86_64
- tristate
- depends on X86 && 64BIT
- select CRYPTO_SKCIPHER
- select CRYPTO_LIB_CHACHA_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
- default CRYPTO_LIB_CHACHA_INTERNAL
- help
- Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
- stream cipher algorithms
-
- Architecture: x86_64 using:
- - SSSE3 (Supplemental SSE3)
- - AVX2 (Advanced Vector Extensions 2)
- - AVX-512VL (Advanced Vector Extensions-512VL)
-
config CRYPTO_AEGIS128_AESNI_SSE2
tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE4.1)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_AEAD
- select CRYPTO_SIMD
help
AEGIS-128 AEAD algorithm
@@ -379,7 +348,7 @@ config CRYPTO_AEGIS128_AESNI_SSE2
config CRYPTO_NHPOLY1305_SSE2
tristate "Hash functions: NHPoly1305 (SSE2)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_NHPOLY1305
help
NHPoly1305 hash function for Adiantum
@@ -389,7 +358,7 @@ config CRYPTO_NHPOLY1305_SSE2
config CRYPTO_NHPOLY1305_AVX2
tristate "Hash functions: NHPoly1305 (AVX2)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_NHPOLY1305
help
NHPoly1305 hash function for Adiantum
@@ -397,21 +366,9 @@ config CRYPTO_NHPOLY1305_AVX2
Architecture: x86_64 using:
- AVX2 (Advanced Vector Extensions 2)
-config CRYPTO_BLAKE2S_X86
- bool "Hash functions: BLAKE2s (SSSE3/AVX-512)"
- depends on X86 && 64BIT
- select CRYPTO_LIB_BLAKE2S_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
- help
- BLAKE2s cryptographic hash function (RFC 7693)
-
- Architecture: x86_64 using:
- - SSSE3 (Supplemental SSE3)
- - AVX-512 (Advanced Vector Extensions-512)
-
config CRYPTO_POLYVAL_CLMUL_NI
tristate "Hash functions: POLYVAL (CLMUL-NI)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_POLYVAL
help
POLYVAL hash function for HCTR2
@@ -419,23 +376,9 @@ config CRYPTO_POLYVAL_CLMUL_NI
Architecture: x86_64 using:
- CLMUL-NI (carry-less multiplication new instructions)
-config CRYPTO_POLY1305_X86_64
- tristate
- depends on X86 && 64BIT
- select CRYPTO_HASH
- select CRYPTO_LIB_POLY1305_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_POLY1305
- default CRYPTO_LIB_POLY1305_INTERNAL
- help
- Poly1305 authenticator algorithm (RFC7539)
-
- Architecture: x86_64 using:
- - SSE2 (Streaming SIMD Extensions 2)
- - AVX2 (Advanced Vector Extensions 2)
-
config CRYPTO_SHA1_SSSE3
tristate "Hash functions: SHA-1 (SSSE3/AVX/AVX2/SHA-NI)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SHA1
select CRYPTO_HASH
help
@@ -447,23 +390,9 @@ config CRYPTO_SHA1_SSSE3
- AVX2 (Advanced Vector Extensions 2)
- SHA-NI (SHA Extensions New Instructions)
-config CRYPTO_SHA256_SSSE3
- tristate "Hash functions: SHA-224 and SHA-256 (SSSE3/AVX/AVX2/SHA-NI)"
- depends on X86 && 64BIT
- select CRYPTO_SHA256
- select CRYPTO_HASH
- help
- SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
- Architecture: x86_64 using:
- - SSSE3 (Supplemental SSE3)
- - AVX (Advanced Vector Extensions)
- - AVX2 (Advanced Vector Extensions 2)
- - SHA-NI (SHA Extensions New Instructions)
-
config CRYPTO_SHA512_SSSE3
tristate "Hash functions: SHA-384 and SHA-512 (SSSE3/AVX/AVX2)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_SHA512
select CRYPTO_HASH
help
@@ -476,9 +405,9 @@ config CRYPTO_SHA512_SSSE3
config CRYPTO_SM3_AVX_X86_64
tristate "Hash functions: SM3 (AVX)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_HASH
- select CRYPTO_SM3
+ select CRYPTO_LIB_SM3
help
SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3
@@ -489,7 +418,7 @@ config CRYPTO_SM3_AVX_X86_64
config CRYPTO_GHASH_CLMUL_NI_INTEL
tristate "Hash functions: GHASH (CLMUL-NI)"
- depends on X86 && 64BIT
+ depends on 64BIT
select CRYPTO_CRYPTD
help
GCM GHASH hash function (NIST SP800-38D)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 5d19f41bde58..aa289a9e0153 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -42,10 +42,6 @@ cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
-obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o
-chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha_glue.o
-chacha-x86_64-$(CONFIG_AS_AVX512) += chacha-avx512vl-x86_64.o
-
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
aesni-intel-$(CONFIG_64BIT) += aes-ctr-avx-x86_64.o \
@@ -56,29 +52,17 @@ aesni-intel-$(CONFIG_64BIT) += aes-gcm-avx10-x86_64.o
endif
obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
-sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o
-sha1-ssse3-$(CONFIG_AS_SHA1_NI) += sha1_ni_asm.o
-
-obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
-sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
-sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o
+sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ni_asm.o sha1_ssse3_glue.o
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
-obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o
-libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o
-
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) += polyval-clmulni.o
polyval-clmulni-y := polyval-clmulni_asm.o polyval-clmulni_glue.o
-obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
-poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o
-targets += poly1305-x86_64-cryptogams.S
-
obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
@@ -104,10 +88,5 @@ aria-aesni-avx2-x86_64-y := aria-aesni-avx2-asm_64.o aria_aesni_avx2_glue.o
obj-$(CONFIG_CRYPTO_ARIA_GFNI_AVX512_X86_64) += aria-gfni-avx512-x86_64.o
aria-gfni-avx512-x86_64-y := aria-gfni-avx512-asm_64.o aria_gfni_avx512_glue.o
-quiet_cmd_perlasm = PERLASM $@
- cmd_perlasm = $(PERL) $< > $@
-$(obj)/%.S: $(src)/%.pl FORCE
- $(call if_changed,perlasm)
-
# Disable GCOV in odd or sensitive code
GCOV_PROFILE_curve25519-x86_64.o := n
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 26786e15abac..f1b6d40154e3 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -8,7 +8,6 @@
*/
#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
@@ -233,21 +232,18 @@ static struct aead_alg crypto_aegis128_aesni_alg = {
.chunksize = AEGIS128_BLOCK_SIZE,
.base = {
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aegis_ctx) +
__alignof__(struct aegis_ctx),
.cra_priority = 400,
- .cra_name = "__aegis128",
- .cra_driver_name = "__aegis128-aesni",
+ .cra_name = "aegis128",
+ .cra_driver_name = "aegis128-aesni",
.cra_module = THIS_MODULE,
}
};
-static struct simd_aead_alg *simd_alg;
-
static int __init crypto_aegis128_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM4_1) ||
@@ -255,13 +251,12 @@ static int __init crypto_aegis128_aesni_module_init(void)
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
- return simd_register_aeads_compat(&crypto_aegis128_aesni_alg, 1,
- &simd_alg);
+ return crypto_register_aead(&crypto_aegis128_aesni_alg);
}
static void __exit crypto_aegis128_aesni_module_exit(void)
{
- simd_unregister_aeads(&crypto_aegis128_aesni_alg, 1, &simd_alg);
+ crypto_unregister_aead(&crypto_aegis128_aesni_alg);
}
module_init(crypto_aegis128_aesni_module_init);
diff --git a/arch/x86/crypto/aes-ctr-avx-x86_64.S b/arch/x86/crypto/aes-ctr-avx-x86_64.S
index 1685d8b24b2c..bbbfd80f5a50 100644
--- a/arch/x86/crypto/aes-ctr-avx-x86_64.S
+++ b/arch/x86/crypto/aes-ctr-avx-x86_64.S
@@ -48,8 +48,7 @@
// using the following sets of CPU features:
// - AES-NI && AVX
// - VAES && AVX2
-// - VAES && (AVX10/256 || (AVX512BW && AVX512VL)) && BMI2
-// - VAES && (AVX10/512 || (AVX512BW && AVX512VL)) && BMI2
+// - VAES && AVX512BW && AVX512VL && BMI2
//
// See the function definitions at the bottom of the file for more information.
@@ -76,7 +75,6 @@
.text
// Move a vector between memory and a register.
-// The register operand must be in the first 16 vector registers.
.macro _vmovdqu src, dst
.if VL < 64
vmovdqu \src, \dst
@@ -86,7 +84,6 @@
.endm
// Move a vector between registers.
-// The registers must be in the first 16 vector registers.
.macro _vmovdqa src, dst
.if VL < 64
vmovdqa \src, \dst
@@ -96,7 +93,7 @@
.endm
// Broadcast a 128-bit value from memory to all 128-bit lanes of a vector
-// register. The register operand must be in the first 16 vector registers.
+// register.
.macro _vbroadcast128 src, dst
.if VL == 16
vmovdqu \src, \dst
@@ -108,7 +105,6 @@
.endm
// XOR two vectors together.
-// Any register operands must be in the first 16 vector registers.
.macro _vpxor src1, src2, dst
.if VL < 64
vpxor \src1, \src2, \dst
@@ -199,8 +195,8 @@
// XOR each with the zero-th round key. Also update LE_CTR if !\final.
.macro _prepare_2_ctr_vecs is_xctr, i0, i1, final=0
.if \is_xctr
- .if USE_AVX10
- _vmovdqa LE_CTR, AESDATA\i0
+ .if USE_AVX512
+ vmovdqa64 LE_CTR, AESDATA\i0
vpternlogd $0x96, XCTR_IV, RNDKEY0, AESDATA\i0
.else
vpxor XCTR_IV, LE_CTR, AESDATA\i0
@@ -208,7 +204,7 @@
.endif
vpaddq LE_CTR_INC1, LE_CTR, AESDATA\i1
- .if USE_AVX10
+ .if USE_AVX512
vpternlogd $0x96, XCTR_IV, RNDKEY0, AESDATA\i1
.else
vpxor XCTR_IV, AESDATA\i1, AESDATA\i1
@@ -481,18 +477,12 @@
.Lxor_tail_partial_vec_0\@:
// XOR the remaining 1 <= LEN < VL bytes. It's easy if masked
// loads/stores are available; otherwise it's a bit harder...
-.if USE_AVX10
- .if VL <= 32
- mov $-1, %eax
- bzhi LEN, %eax, %eax
- kmovd %eax, %k1
- .else
+.if USE_AVX512
mov $-1, %rax
bzhi LEN64, %rax, %rax
kmovq %rax, %k1
- .endif
vmovdqu8 (SRC), AESDATA1{%k1}{z}
- _vpxor AESDATA1, AESDATA0, AESDATA0
+ vpxord AESDATA1, AESDATA0, AESDATA0
vmovdqu8 AESDATA0, (DST){%k1}
.else
.if VL == 32
@@ -554,7 +544,7 @@
// eliminates carries. |ctr| is the per-message block counter starting at 1.
.set VL, 16
-.set USE_AVX10, 0
+.set USE_AVX512, 0
SYM_TYPED_FUNC_START(aes_ctr64_crypt_aesni_avx)
_aes_ctr_crypt 0
SYM_FUNC_END(aes_ctr64_crypt_aesni_avx)
@@ -564,7 +554,7 @@ SYM_FUNC_END(aes_xctr_crypt_aesni_avx)
#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
.set VL, 32
-.set USE_AVX10, 0
+.set USE_AVX512, 0
SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx2)
_aes_ctr_crypt 0
SYM_FUNC_END(aes_ctr64_crypt_vaes_avx2)
@@ -572,21 +562,12 @@ SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx2)
_aes_ctr_crypt 1
SYM_FUNC_END(aes_xctr_crypt_vaes_avx2)
-.set VL, 32
-.set USE_AVX10, 1
-SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_256)
- _aes_ctr_crypt 0
-SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_256)
-SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_256)
- _aes_ctr_crypt 1
-SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_256)
-
.set VL, 64
-.set USE_AVX10, 1
-SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_512)
+.set USE_AVX512, 1
+SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx512)
_aes_ctr_crypt 0
-SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_512)
-SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_512)
+SYM_FUNC_END(aes_ctr64_crypt_vaes_avx512)
+SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx512)
_aes_ctr_crypt 1
-SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_512)
+SYM_FUNC_END(aes_xctr_crypt_vaes_avx512)
#endif // CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ
diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S
index 93ba0ddbe009..db79cdf81588 100644
--- a/arch/x86/crypto/aes-xts-avx-x86_64.S
+++ b/arch/x86/crypto/aes-xts-avx-x86_64.S
@@ -52,32 +52,25 @@
* different code, it uses a macro to generate several implementations that
* share similar source code but are targeted at different CPUs, listed below:
*
- * AES-NI + AVX
+ * AES-NI && AVX
* - 128-bit vectors (1 AES block per vector)
* - VEX-coded instructions
* - xmm0-xmm15
* - This is for older CPUs that lack VAES but do have AVX.
*
- * VAES + VPCLMULQDQ + AVX2
+ * VAES && VPCLMULQDQ && AVX2
* - 256-bit vectors (2 AES blocks per vector)
* - VEX-coded instructions
* - ymm0-ymm15
- * - This is for CPUs that have VAES but lack AVX512 or AVX10,
- * e.g. Intel's Alder Lake and AMD's Zen 3.
+ * - This is for CPUs that have VAES but either lack AVX512 (e.g. Intel's
+ * Alder Lake and AMD's Zen 3) or downclock too eagerly when using zmm
+ * registers (e.g. Intel's Ice Lake).
*
- * VAES + VPCLMULQDQ + AVX10/256 + BMI2
- * - 256-bit vectors (2 AES blocks per vector)
+ * VAES && VPCLMULQDQ && AVX512BW && AVX512VL && BMI2
+ * - 512-bit vectors (4 AES blocks per vector)
* - EVEX-coded instructions
- * - ymm0-ymm31
- * - This is for CPUs that have AVX512 but where using zmm registers causes
- * downclocking, and for CPUs that have AVX10/256 but not AVX10/512.
- * - By "AVX10/256" we really mean (AVX512BW + AVX512VL) || AVX10/256.
- * To avoid confusion with 512-bit, we just write AVX10/256.
- *
- * VAES + VPCLMULQDQ + AVX10/512 + BMI2
- * - Same as the previous one, but upgrades to 512-bit vectors
- * (4 AES blocks per vector) in zmm0-zmm31.
- * - This is for CPUs that have good AVX512 or AVX10/512 support.
+ * - zmm0-zmm31
+ * - This is for CPUs that have good AVX512 support.
*
* This file doesn't have an implementation for AES-NI alone (without AVX), as
* the lack of VEX would make all the assembly code different.
@@ -107,9 +100,20 @@
// exists when there's a carry out of the low 64 bits of the tweak.
.quad 0x87, 1
+ // These are the shift amounts that are needed when multiplying by [x^0,
+ // x^1, x^2, x^3] to compute the first vector of tweaks when VL=64.
+ //
+ // The right shifts by 64 are expected to zeroize the destination.
+ // 'vpsrlvq' is indeed defined to do that; i.e. it doesn't truncate the
+ // amount to 64 & 63 = 0 like the 'shr' scalar shift instruction would.
+.Lrshift_amounts:
+ .byte 64, 64, 63, 63, 62, 62, 61, 61
+.Llshift_amounts:
+ .byte 0, 0, 1, 1, 2, 2, 3, 3
+
// This table contains constants for vpshufb and vpblendvb, used to
// handle variable byte shifts and blending during ciphertext stealing
- // on CPUs that don't support AVX10-style masking.
+ // on CPUs that don't support AVX512-style masking.
.Lcts_permute_table:
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
@@ -138,7 +142,7 @@
.irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
_define_Vi \i
.endr
-.if USE_AVX10
+.if USE_AVX512
.irp i, 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
_define_Vi \i
.endr
@@ -193,7 +197,7 @@
// keys to the *end* of this register range. I.e., AES-128 uses
// KEY5-KEY14, AES-192 uses KEY3-KEY14, and AES-256 uses KEY1-KEY14.
// (All also use KEY0 for the XOR-only "round" at the beginning.)
-.if USE_AVX10
+.if USE_AVX512
.set KEY1_XMM, %xmm16
.set KEY1, V16
.set KEY2_XMM, %xmm17
@@ -227,7 +231,6 @@
.endm
// Move a vector between memory and a register.
-// The register operand must be in the first 16 vector registers.
.macro _vmovdqu src, dst
.if VL < 64
vmovdqu \src, \dst
@@ -238,9 +241,9 @@
// Broadcast a 128-bit value into a vector.
.macro _vbroadcast128 src, dst
-.if VL == 16 && !USE_AVX10
+.if VL == 16
vmovdqu \src, \dst
-.elseif VL == 32 && !USE_AVX10
+.elseif VL == 32
vbroadcasti128 \src, \dst
.else
vbroadcasti32x4 \src, \dst
@@ -248,7 +251,6 @@
.endm
// XOR two vectors together.
-// Any register operands must be in the first 16 vector registers.
.macro _vpxor src1, src2, dst
.if VL < 64
vpxor \src1, \src2, \dst
@@ -259,7 +261,7 @@
// XOR three vectors together.
.macro _xor3 src1, src2, src3_and_dst
-.if USE_AVX10
+.if USE_AVX512
// vpternlogd with immediate 0x96 is a three-argument XOR.
vpternlogd $0x96, \src1, \src2, \src3_and_dst
.else
@@ -274,7 +276,7 @@
vpshufd $0x13, \src, \tmp
vpaddq \src, \src, \dst
vpsrad $31, \tmp, \tmp
-.if USE_AVX10
+.if USE_AVX512
vpternlogd $0x78, GF_POLY_XMM, \tmp, \dst
.else
vpand GF_POLY_XMM, \tmp, \tmp
@@ -303,52 +305,75 @@
// Given the first XTS tweak at (TWEAK), compute the first set of tweaks and
// store them in the vector registers TWEAK0-TWEAK3. Clobbers V0-V5.
.macro _compute_first_set_of_tweaks
- vmovdqu (TWEAK), TWEAK0_XMM
- _vbroadcast128 .Lgf_poly(%rip), GF_POLY
.if VL == 16
- // With VL=16, multiplying by x serially is fastest.
+ vmovdqu (TWEAK), TWEAK0_XMM
+ vmovdqu .Lgf_poly(%rip), GF_POLY
_next_tweak TWEAK0, %xmm0, TWEAK1
_next_tweak TWEAK1, %xmm0, TWEAK2
_next_tweak TWEAK2, %xmm0, TWEAK3
-.else
-.if VL == 32
- // Compute the second block of TWEAK0.
+.elseif VL == 32
+ vmovdqu (TWEAK), TWEAK0_XMM
+ vbroadcasti128 .Lgf_poly(%rip), GF_POLY
+
+ // Compute the first vector of tweaks.
_next_tweak TWEAK0_XMM, %xmm0, %xmm1
vinserti128 $1, %xmm1, TWEAK0, TWEAK0
-.elseif VL == 64
- // Compute the remaining blocks of TWEAK0.
- _next_tweak TWEAK0_XMM, %xmm0, %xmm1
- _next_tweak %xmm1, %xmm0, %xmm2
- _next_tweak %xmm2, %xmm0, %xmm3
- vinserti32x4 $1, %xmm1, TWEAK0, TWEAK0
- vinserti32x4 $2, %xmm2, TWEAK0, TWEAK0
- vinserti32x4 $3, %xmm3, TWEAK0, TWEAK0
-.endif
- // Compute TWEAK[1-3] from TWEAK0.
- vpsrlq $64 - 1*VL/16, TWEAK0, V0
- vpsrlq $64 - 2*VL/16, TWEAK0, V2
- vpsrlq $64 - 3*VL/16, TWEAK0, V4
+
+ // Compute the next three vectors of tweaks:
+ // TWEAK1 = TWEAK0 * [x^2, x^2]
+ // TWEAK2 = TWEAK0 * [x^4, x^4]
+ // TWEAK3 = TWEAK0 * [x^6, x^6]
+ vpsrlq $64 - 2, TWEAK0, V0
+ vpsrlq $64 - 4, TWEAK0, V2
+ vpsrlq $64 - 6, TWEAK0, V4
vpclmulqdq $0x01, GF_POLY, V0, V1
vpclmulqdq $0x01, GF_POLY, V2, V3
vpclmulqdq $0x01, GF_POLY, V4, V5
vpslldq $8, V0, V0
vpslldq $8, V2, V2
vpslldq $8, V4, V4
- vpsllq $1*VL/16, TWEAK0, TWEAK1
- vpsllq $2*VL/16, TWEAK0, TWEAK2
- vpsllq $3*VL/16, TWEAK0, TWEAK3
-.if USE_AVX10
- vpternlogd $0x96, V0, V1, TWEAK1
- vpternlogd $0x96, V2, V3, TWEAK2
- vpternlogd $0x96, V4, V5, TWEAK3
-.else
+ vpsllq $2, TWEAK0, TWEAK1
+ vpsllq $4, TWEAK0, TWEAK2
+ vpsllq $6, TWEAK0, TWEAK3
vpxor V0, TWEAK1, TWEAK1
vpxor V2, TWEAK2, TWEAK2
vpxor V4, TWEAK3, TWEAK3
vpxor V1, TWEAK1, TWEAK1
vpxor V3, TWEAK2, TWEAK2
vpxor V5, TWEAK3, TWEAK3
-.endif
+.else
+ vbroadcasti32x4 (TWEAK), TWEAK0
+ vbroadcasti32x4 .Lgf_poly(%rip), GF_POLY
+
+ // Compute the first vector of tweaks:
+ // TWEAK0 = broadcast128(TWEAK) * [x^0, x^1, x^2, x^3]
+ vpmovzxbq .Lrshift_amounts(%rip), V4
+ vpsrlvq V4, TWEAK0, V0
+ vpclmulqdq $0x01, GF_POLY, V0, V1
+ vpmovzxbq .Llshift_amounts(%rip), V4
+ vpslldq $8, V0, V0
+ vpsllvq V4, TWEAK0, TWEAK0
+ vpternlogd $0x96, V0, V1, TWEAK0
+
+ // Compute the next three vectors of tweaks:
+ // TWEAK1 = TWEAK0 * [x^4, x^4, x^4, x^4]
+ // TWEAK2 = TWEAK0 * [x^8, x^8, x^8, x^8]
+ // TWEAK3 = TWEAK0 * [x^12, x^12, x^12, x^12]
+ // x^8 only needs byte-aligned shifts, so optimize accordingly.
+ vpsrlq $64 - 4, TWEAK0, V0
+ vpsrldq $(64 - 8) / 8, TWEAK0, V2
+ vpsrlq $64 - 12, TWEAK0, V4
+ vpclmulqdq $0x01, GF_POLY, V0, V1
+ vpclmulqdq $0x01, GF_POLY, V2, V3
+ vpclmulqdq $0x01, GF_POLY, V4, V5
+ vpslldq $8, V0, V0
+ vpslldq $8, V4, V4
+ vpsllq $4, TWEAK0, TWEAK1
+ vpslldq $8 / 8, TWEAK0, TWEAK2
+ vpsllq $12, TWEAK0, TWEAK3
+ vpternlogd $0x96, V0, V1, TWEAK1
+ vpxord V3, TWEAK2, TWEAK2
+ vpternlogd $0x96, V4, V5, TWEAK3
.endif
.endm
@@ -474,26 +499,26 @@
lea OFFS-16(KEY, KEYLEN64, 4), KEY
// If all 32 SIMD registers are available, cache all the round keys.
-.if USE_AVX10
+.if USE_AVX512
cmp $24, KEYLEN
jl .Laes128\@
je .Laes192\@
- _vbroadcast128 -6*16(KEY), KEY1
- _vbroadcast128 -5*16(KEY), KEY2
+ vbroadcasti32x4 -6*16(KEY), KEY1
+ vbroadcasti32x4 -5*16(KEY), KEY2
.Laes192\@:
- _vbroadcast128 -4*16(KEY), KEY3
- _vbroadcast128 -3*16(KEY), KEY4
+ vbroadcasti32x4 -4*16(KEY), KEY3
+ vbroadcasti32x4 -3*16(KEY), KEY4
.Laes128\@:
- _vbroadcast128 -2*16(KEY), KEY5
- _vbroadcast128 -1*16(KEY), KEY6
- _vbroadcast128 0*16(KEY), KEY7
- _vbroadcast128 1*16(KEY), KEY8
- _vbroadcast128 2*16(KEY), KEY9
- _vbroadcast128 3*16(KEY), KEY10
- _vbroadcast128 4*16(KEY), KEY11
- _vbroadcast128 5*16(KEY), KEY12
- _vbroadcast128 6*16(KEY), KEY13
- _vbroadcast128 7*16(KEY), KEY14
+ vbroadcasti32x4 -2*16(KEY), KEY5
+ vbroadcasti32x4 -1*16(KEY), KEY6
+ vbroadcasti32x4 0*16(KEY), KEY7
+ vbroadcasti32x4 1*16(KEY), KEY8
+ vbroadcasti32x4 2*16(KEY), KEY9
+ vbroadcasti32x4 3*16(KEY), KEY10
+ vbroadcasti32x4 4*16(KEY), KEY11
+ vbroadcasti32x4 5*16(KEY), KEY12
+ vbroadcasti32x4 6*16(KEY), KEY13
+ vbroadcasti32x4 7*16(KEY), KEY14
.endif
.endm
@@ -521,7 +546,7 @@
// using the same key for all block(s). The round key is loaded from the
// appropriate register or memory location for round \i. May clobber \tmp.
.macro _vaes_1x enc, i, xmm_suffix, data, tmp
-.if USE_AVX10
+.if USE_AVX512
_vaes \enc, KEY\i\xmm_suffix, \data
.else
.ifnb \xmm_suffix
@@ -538,7 +563,7 @@
// appropriate register or memory location for round \i. In addition, does two
// steps of the computation of the next set of tweaks. May clobber V4 and V5.
.macro _vaes_4x enc, i
-.if USE_AVX10
+.if USE_AVX512
_tweak_step (2*(\i-5))
_vaes \enc, KEY\i, V0
_vaes \enc, KEY\i, V1
@@ -574,7 +599,7 @@
.irp i, 5,6,7,8,9,10,11,12,13
_vaes_1x \enc, \i, \xmm_suffix, \data, tmp=\tmp
.endr
-.if USE_AVX10
+.if USE_AVX512
vpxord KEY14\xmm_suffix, \tweak, \tmp
.else
.ifnb \xmm_suffix
@@ -617,11 +642,11 @@
// This is the main loop, en/decrypting 4*VL bytes per iteration.
// XOR each source block with its tweak and the zero-th round key.
-.if USE_AVX10
- _vmovdqu 0*VL(SRC), V0
- _vmovdqu 1*VL(SRC), V1
- _vmovdqu 2*VL(SRC), V2
- _vmovdqu 3*VL(SRC), V3
+.if USE_AVX512
+ vmovdqu8 0*VL(SRC), V0
+ vmovdqu8 1*VL(SRC), V1
+ vmovdqu8 2*VL(SRC), V2
+ vmovdqu8 3*VL(SRC), V3
vpternlogd $0x96, TWEAK0, KEY0, V0
vpternlogd $0x96, TWEAK1, KEY0, V1
vpternlogd $0x96, TWEAK2, KEY0, V2
@@ -654,7 +679,7 @@
// Reduce latency by doing the XOR before the vaesenclast, utilizing the
// property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a)
// (and likewise for vaesdeclast).
-.if USE_AVX10
+.if USE_AVX512
_tweak_step 18
_tweak_step 19
vpxord TWEAK0, KEY14, V4
@@ -762,7 +787,7 @@
_aes_crypt \enc, _XMM, TWEAK1_XMM, %xmm0, tmp=%xmm1
.endif
-.if USE_AVX10
+.if USE_AVX512
// Create a mask that has the first LEN bits set.
mov $-1, %r9d
bzhi LEN, %r9d, %r9d
@@ -811,7 +836,7 @@
// u8 iv[AES_BLOCK_SIZE]);
//
// Encrypt |iv| using the AES key |tweak_key| to get the first tweak. Assumes
-// that the CPU supports AES-NI and AVX, but not necessarily VAES or AVX10.
+// that the CPU supports AES-NI and AVX, but not necessarily VAES or AVX512.
SYM_TYPED_FUNC_START(aes_xts_encrypt_iv)
.set TWEAK_KEY, %rdi
.set IV, %rsi
@@ -853,7 +878,7 @@ SYM_FUNC_END(aes_xts_encrypt_iv)
// multiple of 16, then this function updates |tweak| to contain the next tweak.
.set VL, 16
-.set USE_AVX10, 0
+.set USE_AVX512, 0
SYM_TYPED_FUNC_START(aes_xts_encrypt_aesni_avx)
_aes_xts_crypt 1
SYM_FUNC_END(aes_xts_encrypt_aesni_avx)
@@ -863,7 +888,7 @@ SYM_FUNC_END(aes_xts_decrypt_aesni_avx)
#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
.set VL, 32
-.set USE_AVX10, 0
+.set USE_AVX512, 0
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx2)
_aes_xts_crypt 1
SYM_FUNC_END(aes_xts_encrypt_vaes_avx2)
@@ -871,21 +896,12 @@ SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx2)
_aes_xts_crypt 0
SYM_FUNC_END(aes_xts_decrypt_vaes_avx2)
-.set VL, 32
-.set USE_AVX10, 1
-SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_256)
- _aes_xts_crypt 1
-SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256)
-SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256)
- _aes_xts_crypt 0
-SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256)
-
.set VL, 64
-.set USE_AVX10, 1
-SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_512)
+.set USE_AVX512, 1
+SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx512)
_aes_xts_crypt 1
-SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_512)
-SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_512)
+SYM_FUNC_END(aes_xts_encrypt_vaes_avx512)
+SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx512)
_aes_xts_crypt 0
-SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_512)
+SYM_FUNC_END(aes_xts_decrypt_vaes_avx512)
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index bc655d794a95..061b1ced93c5 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -566,10 +566,9 @@ static struct crypto_alg aesni_cipher_alg = {
static struct skcipher_alg aesni_skciphers[] = {
{
.base = {
- .cra_name = "__ecb(aes)",
- .cra_driver_name = "__ecb-aes-aesni",
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-aesni",
.cra_priority = 400,
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
@@ -581,10 +580,9 @@ static struct skcipher_alg aesni_skciphers[] = {
.decrypt = ecb_decrypt,
}, {
.base = {
- .cra_name = "__cbc(aes)",
- .cra_driver_name = "__cbc-aes-aesni",
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-aesni",
.cra_priority = 400,
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
@@ -597,10 +595,9 @@ static struct skcipher_alg aesni_skciphers[] = {
.decrypt = cbc_decrypt,
}, {
.base = {
- .cra_name = "__cts(cbc(aes))",
- .cra_driver_name = "__cts-cbc-aes-aesni",
+ .cra_name = "cts(cbc(aes))",
+ .cra_driver_name = "cts-cbc-aes-aesni",
.cra_priority = 400,
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
@@ -615,10 +612,9 @@ static struct skcipher_alg aesni_skciphers[] = {
#ifdef CONFIG_X86_64
}, {
.base = {
- .cra_name = "__ctr(aes)",
- .cra_driver_name = "__ctr-aes-aesni",
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-aesni",
.cra_priority = 400,
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
@@ -633,10 +629,9 @@ static struct skcipher_alg aesni_skciphers[] = {
#endif
}, {
.base = {
- .cra_name = "__xts(aes)",
- .cra_driver_name = "__xts-aes-aesni",
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-aesni",
.cra_priority = 401,
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = XTS_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
@@ -651,9 +646,6 @@ static struct skcipher_alg aesni_skciphers[] = {
}
};
-static
-struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
-
#ifdef CONFIG_X86_64
asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key,
u8 iv[AES_BLOCK_SIZE]);
@@ -792,10 +784,9 @@ static int xctr_crypt_##suffix(struct skcipher_request *req) \
} \
\
static struct skcipher_alg skcipher_algs_##suffix[] = {{ \
- .base.cra_name = "__xts(aes)", \
- .base.cra_driver_name = "__xts-aes-" driver_name_suffix, \
+ .base.cra_name = "xts(aes)", \
+ .base.cra_driver_name = "xts-aes-" driver_name_suffix, \
.base.cra_priority = priority, \
- .base.cra_flags = CRYPTO_ALG_INTERNAL, \
.base.cra_blocksize = AES_BLOCK_SIZE, \
.base.cra_ctxsize = XTS_AES_CTX_SIZE, \
.base.cra_module = THIS_MODULE, \
@@ -807,10 +798,9 @@ static struct skcipher_alg skcipher_algs_##suffix[] = {{ \
.encrypt = xts_encrypt_##suffix, \
.decrypt = xts_decrypt_##suffix, \
}, { \
- .base.cra_name = "__ctr(aes)", \
- .base.cra_driver_name = "__ctr-aes-" driver_name_suffix, \
+ .base.cra_name = "ctr(aes)", \
+ .base.cra_driver_name = "ctr-aes-" driver_name_suffix, \
.base.cra_priority = priority, \
- .base.cra_flags = CRYPTO_ALG_INTERNAL, \
.base.cra_blocksize = 1, \
.base.cra_ctxsize = CRYPTO_AES_CTX_SIZE, \
.base.cra_module = THIS_MODULE, \
@@ -822,10 +812,9 @@ static struct skcipher_alg skcipher_algs_##suffix[] = {{ \
.encrypt = ctr_crypt_##suffix, \
.decrypt = ctr_crypt_##suffix, \
}, { \
- .base.cra_name = "__xctr(aes)", \
- .base.cra_driver_name = "__xctr-aes-" driver_name_suffix, \
+ .base.cra_name = "xctr(aes)", \
+ .base.cra_driver_name = "xctr-aes-" driver_name_suffix, \
.base.cra_priority = priority, \
- .base.cra_flags = CRYPTO_ALG_INTERNAL, \
.base.cra_blocksize = 1, \
.base.cra_ctxsize = CRYPTO_AES_CTX_SIZE, \
.base.cra_module = THIS_MODULE, \
@@ -836,16 +825,12 @@ static struct skcipher_alg skcipher_algs_##suffix[] = {{ \
.setkey = aesni_skcipher_setkey, \
.encrypt = xctr_crypt_##suffix, \
.decrypt = xctr_crypt_##suffix, \
-}}; \
- \
-static struct simd_skcipher_alg * \
-simd_skcipher_algs_##suffix[ARRAY_SIZE(skcipher_algs_##suffix)]
+}}
DEFINE_AVX_SKCIPHER_ALGS(aesni_avx, "aesni-avx", 500);
#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
DEFINE_AVX_SKCIPHER_ALGS(vaes_avx2, "vaes-avx2", 600);
-DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_256, "vaes-avx10_256", 700);
-DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_512, "vaes-avx10_512", 800);
+DEFINE_AVX_SKCIPHER_ALGS(vaes_avx512, "vaes-avx512", 800);
#endif
/* The common part of the x86_64 AES-GCM key struct */
@@ -1499,10 +1484,9 @@ static struct aead_alg aes_gcm_algs_##suffix[] = { { \
.chunksize = AES_BLOCK_SIZE, \
.maxauthsize = 16, \
.base = { \
- .cra_name = "__gcm(aes)", \
- .cra_driver_name = "__" generic_driver_name, \
+ .cra_name = "gcm(aes)", \
+ .cra_driver_name = generic_driver_name, \
.cra_priority = (priority), \
- .cra_flags = CRYPTO_ALG_INTERNAL, \
.cra_blocksize = 1, \
.cra_ctxsize = (ctxsize), \
.cra_module = THIS_MODULE, \
@@ -1516,17 +1500,14 @@ static struct aead_alg aes_gcm_algs_##suffix[] = { { \
.chunksize = AES_BLOCK_SIZE, \
.maxauthsize = 16, \
.base = { \
- .cra_name = "__rfc4106(gcm(aes))", \
- .cra_driver_name = "__" rfc_driver_name, \
+ .cra_name = "rfc4106(gcm(aes))", \
+ .cra_driver_name = rfc_driver_name, \
.cra_priority = (priority), \
- .cra_flags = CRYPTO_ALG_INTERNAL, \
.cra_blocksize = 1, \
.cra_ctxsize = (ctxsize), \
.cra_module = THIS_MODULE, \
}, \
-} }; \
- \
-static struct simd_aead_alg *aes_gcm_simdalgs_##suffix[2] \
+} }
/* aes_gcm_algs_aesni */
DEFINE_GCM_ALGS(aesni, /* no flags */ 0,
@@ -1556,14 +1537,12 @@ static int __init register_avx_algs(void)
if (!boot_cpu_has(X86_FEATURE_AVX))
return 0;
- err = simd_register_skciphers_compat(skcipher_algs_aesni_avx,
- ARRAY_SIZE(skcipher_algs_aesni_avx),
- simd_skcipher_algs_aesni_avx);
+ err = crypto_register_skciphers(skcipher_algs_aesni_avx,
+ ARRAY_SIZE(skcipher_algs_aesni_avx));
if (err)
return err;
- err = simd_register_aeads_compat(aes_gcm_algs_aesni_avx,
- ARRAY_SIZE(aes_gcm_algs_aesni_avx),
- aes_gcm_simdalgs_aesni_avx);
+ err = crypto_register_aeads(aes_gcm_algs_aesni_avx,
+ ARRAY_SIZE(aes_gcm_algs_aesni_avx));
if (err)
return err;
/*
@@ -1579,9 +1558,8 @@ static int __init register_avx_algs(void)
!boot_cpu_has(X86_FEATURE_PCLMULQDQ) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
return 0;
- err = simd_register_skciphers_compat(skcipher_algs_vaes_avx2,
- ARRAY_SIZE(skcipher_algs_vaes_avx2),
- simd_skcipher_algs_vaes_avx2);
+ err = crypto_register_skciphers(skcipher_algs_vaes_avx2,
+ ARRAY_SIZE(skcipher_algs_vaes_avx2));
if (err)
return err;
@@ -1592,76 +1570,52 @@ static int __init register_avx_algs(void)
XFEATURE_MASK_AVX512, NULL))
return 0;
- err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_256,
- ARRAY_SIZE(skcipher_algs_vaes_avx10_256),
- simd_skcipher_algs_vaes_avx10_256);
- if (err)
- return err;
- err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256,
- ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
- aes_gcm_simdalgs_vaes_avx10_256);
+ err = crypto_register_aeads(aes_gcm_algs_vaes_avx10_256,
+ ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256));
if (err)
return err;
if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
int i;
- for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx10_512); i++)
- skcipher_algs_vaes_avx10_512[i].base.cra_priority = 1;
+ for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx512); i++)
+ skcipher_algs_vaes_avx512[i].base.cra_priority = 1;
for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++)
aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1;
}
- err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_512,
- ARRAY_SIZE(skcipher_algs_vaes_avx10_512),
- simd_skcipher_algs_vaes_avx10_512);
+ err = crypto_register_skciphers(skcipher_algs_vaes_avx512,
+ ARRAY_SIZE(skcipher_algs_vaes_avx512));
if (err)
return err;
- err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512,
- ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512),
- aes_gcm_simdalgs_vaes_avx10_512);
+ err = crypto_register_aeads(aes_gcm_algs_vaes_avx10_512,
+ ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512));
if (err)
return err;
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
return 0;
}
+#define unregister_skciphers(A) \
+ if (refcount_read(&(A)[0].base.cra_refcnt) != 0) \
+ crypto_unregister_skciphers((A), ARRAY_SIZE(A))
+#define unregister_aeads(A) \
+ if (refcount_read(&(A)[0].base.cra_refcnt) != 0) \
+ crypto_unregister_aeads((A), ARRAY_SIZE(A))
+
static void unregister_avx_algs(void)
{
- if (simd_skcipher_algs_aesni_avx[0])
- simd_unregister_skciphers(skcipher_algs_aesni_avx,
- ARRAY_SIZE(skcipher_algs_aesni_avx),
- simd_skcipher_algs_aesni_avx);
- if (aes_gcm_simdalgs_aesni_avx[0])
- simd_unregister_aeads(aes_gcm_algs_aesni_avx,
- ARRAY_SIZE(aes_gcm_algs_aesni_avx),
- aes_gcm_simdalgs_aesni_avx);
+ unregister_skciphers(skcipher_algs_aesni_avx);
+ unregister_aeads(aes_gcm_algs_aesni_avx);
#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
- if (simd_skcipher_algs_vaes_avx2[0])
- simd_unregister_skciphers(skcipher_algs_vaes_avx2,
- ARRAY_SIZE(skcipher_algs_vaes_avx2),
- simd_skcipher_algs_vaes_avx2);
- if (simd_skcipher_algs_vaes_avx10_256[0])
- simd_unregister_skciphers(skcipher_algs_vaes_avx10_256,
- ARRAY_SIZE(skcipher_algs_vaes_avx10_256),
- simd_skcipher_algs_vaes_avx10_256);
- if (aes_gcm_simdalgs_vaes_avx10_256[0])
- simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256,
- ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
- aes_gcm_simdalgs_vaes_avx10_256);
- if (simd_skcipher_algs_vaes_avx10_512[0])
- simd_unregister_skciphers(skcipher_algs_vaes_avx10_512,
- ARRAY_SIZE(skcipher_algs_vaes_avx10_512),
- simd_skcipher_algs_vaes_avx10_512);
- if (aes_gcm_simdalgs_vaes_avx10_512[0])
- simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512,
- ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512),
- aes_gcm_simdalgs_vaes_avx10_512);
+ unregister_skciphers(skcipher_algs_vaes_avx2);
+ unregister_skciphers(skcipher_algs_vaes_avx512);
+ unregister_aeads(aes_gcm_algs_vaes_avx10_256);
+ unregister_aeads(aes_gcm_algs_vaes_avx10_512);
#endif
}
#else /* CONFIG_X86_64 */
static struct aead_alg aes_gcm_algs_aesni[0];
-static struct simd_aead_alg *aes_gcm_simdalgs_aesni[0];
static int __init register_avx_algs(void)
{
@@ -1690,15 +1644,13 @@ static int __init aesni_init(void)
if (err)
return err;
- err = simd_register_skciphers_compat(aesni_skciphers,
- ARRAY_SIZE(aesni_skciphers),
- aesni_simd_skciphers);
+ err = crypto_register_skciphers(aesni_skciphers,
+ ARRAY_SIZE(aesni_skciphers));
if (err)
goto unregister_cipher;
- err = simd_register_aeads_compat(aes_gcm_algs_aesni,
- ARRAY_SIZE(aes_gcm_algs_aesni),
- aes_gcm_simdalgs_aesni);
+ err = crypto_register_aeads(aes_gcm_algs_aesni,
+ ARRAY_SIZE(aes_gcm_algs_aesni));
if (err)
goto unregister_skciphers;
@@ -1710,12 +1662,11 @@ static int __init aesni_init(void)
unregister_avx:
unregister_avx_algs();
- simd_unregister_aeads(aes_gcm_algs_aesni,
- ARRAY_SIZE(aes_gcm_algs_aesni),
- aes_gcm_simdalgs_aesni);
+ crypto_unregister_aeads(aes_gcm_algs_aesni,
+ ARRAY_SIZE(aes_gcm_algs_aesni));
unregister_skciphers:
- simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
- aesni_simd_skciphers);
+ crypto_unregister_skciphers(aesni_skciphers,
+ ARRAY_SIZE(aesni_skciphers));
unregister_cipher:
crypto_unregister_alg(&aesni_cipher_alg);
return err;
@@ -1723,11 +1674,10 @@ unregister_cipher:
static void __exit aesni_exit(void)
{
- simd_unregister_aeads(aes_gcm_algs_aesni,
- ARRAY_SIZE(aes_gcm_algs_aesni),
- aes_gcm_simdalgs_aesni);
- simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
- aesni_simd_skciphers);
+ crypto_unregister_aeads(aes_gcm_algs_aesni,
+ ARRAY_SIZE(aes_gcm_algs_aesni));
+ crypto_unregister_skciphers(aesni_skciphers,
+ ARRAY_SIZE(aesni_skciphers));
crypto_unregister_alg(&aesni_cipher_alg);
unregister_avx_algs();
}
diff --git a/arch/x86/crypto/aria_aesni_avx2_glue.c b/arch/x86/crypto/aria_aesni_avx2_glue.c
index 87a11804fc77..b4bddcd58457 100644
--- a/arch/x86/crypto/aria_aesni_avx2_glue.c
+++ b/arch/x86/crypto/aria_aesni_avx2_glue.c
@@ -6,7 +6,6 @@
*/
#include <crypto/algapi.h>
-#include <crypto/internal/simd.h>
#include <crypto/aria.h>
#include <linux/crypto.h>
#include <linux/err.h>
@@ -165,10 +164,9 @@ static int aria_avx2_init_tfm(struct crypto_skcipher *tfm)
static struct skcipher_alg aria_algs[] = {
{
- .base.cra_name = "__ecb(aria)",
- .base.cra_driver_name = "__ecb-aria-avx2",
+ .base.cra_name = "ecb(aria)",
+ .base.cra_driver_name = "ecb-aria-avx2",
.base.cra_priority = 500,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = ARIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aria_ctx),
.base.cra_module = THIS_MODULE,
@@ -178,11 +176,10 @@ static struct skcipher_alg aria_algs[] = {
.encrypt = aria_avx2_ecb_encrypt,
.decrypt = aria_avx2_ecb_decrypt,
}, {
- .base.cra_name = "__ctr(aria)",
- .base.cra_driver_name = "__ctr-aria-avx2",
+ .base.cra_name = "ctr(aria)",
+ .base.cra_driver_name = "ctr-aria-avx2",
.base.cra_priority = 500,
- .base.cra_flags = CRYPTO_ALG_INTERNAL |
- CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE,
+ .base.cra_flags = CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct aria_ctx),
.base.cra_module = THIS_MODULE,
@@ -197,8 +194,6 @@ static struct skcipher_alg aria_algs[] = {
}
};
-static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)];
-
static int __init aria_avx2_init(void)
{
const char *feature_name;
@@ -233,15 +228,12 @@ static int __init aria_avx2_init(void)
aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_ctr_crypt_32way;
}
- return simd_register_skciphers_compat(aria_algs,
- ARRAY_SIZE(aria_algs),
- aria_simd_algs);
+ return crypto_register_skciphers(aria_algs, ARRAY_SIZE(aria_algs));
}
static void __exit aria_avx2_exit(void)
{
- simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs),
- aria_simd_algs);
+ crypto_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs));
}
module_init(aria_avx2_init);
diff --git a/arch/x86/crypto/aria_aesni_avx_glue.c b/arch/x86/crypto/aria_aesni_avx_glue.c
index 4e1516b76669..ab9b38d05332 100644
--- a/arch/x86/crypto/aria_aesni_avx_glue.c
+++ b/arch/x86/crypto/aria_aesni_avx_glue.c
@@ -6,7 +6,6 @@
*/
#include <crypto/algapi.h>
-#include <crypto/internal/simd.h>
#include <crypto/aria.h>
#include <linux/crypto.h>
#include <linux/err.h>
@@ -152,10 +151,9 @@ static int aria_avx_init_tfm(struct crypto_skcipher *tfm)
static struct skcipher_alg aria_algs[] = {
{
- .base.cra_name = "__ecb(aria)",
- .base.cra_driver_name = "__ecb-aria-avx",
+ .base.cra_name = "ecb(aria)",
+ .base.cra_driver_name = "ecb-aria-avx",
.base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = ARIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aria_ctx),
.base.cra_module = THIS_MODULE,
@@ -165,10 +163,9 @@ static struct skcipher_alg aria_algs[] = {
.encrypt = aria_avx_ecb_encrypt,
.decrypt = aria_avx_ecb_decrypt,
}, {
- .base.cra_name = "__ctr(aria)",
- .base.cra_driver_name = "__ctr-aria-avx",
+ .base.cra_name = "ctr(aria)",
+ .base.cra_driver_name = "ctr-aria-avx",
.base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct aria_ctx),
.base.cra_module = THIS_MODULE,
@@ -184,8 +181,6 @@ static struct skcipher_alg aria_algs[] = {
}
};
-static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)];
-
static int __init aria_avx_init(void)
{
const char *feature_name;
@@ -213,15 +208,12 @@ static int __init aria_avx_init(void)
aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_ctr_crypt_16way;
}
- return simd_register_skciphers_compat(aria_algs,
- ARRAY_SIZE(aria_algs),
- aria_simd_algs);
+ return crypto_register_skciphers(aria_algs, ARRAY_SIZE(aria_algs));
}
static void __exit aria_avx_exit(void)
{
- simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs),
- aria_simd_algs);
+ crypto_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs));
}
module_init(aria_avx_init);
diff --git a/arch/x86/crypto/aria_gfni_avx512_glue.c b/arch/x86/crypto/aria_gfni_avx512_glue.c
index f4a2208d2638..363cbf4399cc 100644
--- a/arch/x86/crypto/aria_gfni_avx512_glue.c
+++ b/arch/x86/crypto/aria_gfni_avx512_glue.c
@@ -6,7 +6,6 @@
*/
#include <crypto/algapi.h>
-#include <crypto/internal/simd.h>
#include <crypto/aria.h>
#include <linux/crypto.h>
#include <linux/err.h>
@@ -165,10 +164,9 @@ static int aria_avx512_init_tfm(struct crypto_skcipher *tfm)
static struct skcipher_alg aria_algs[] = {
{
- .base.cra_name = "__ecb(aria)",
- .base.cra_driver_name = "__ecb-aria-avx512",
+ .base.cra_name = "ecb(aria)",
+ .base.cra_driver_name = "ecb-aria-avx512",
.base.cra_priority = 600,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = ARIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aria_ctx),
.base.cra_module = THIS_MODULE,
@@ -178,11 +176,10 @@ static struct skcipher_alg aria_algs[] = {
.encrypt = aria_avx512_ecb_encrypt,
.decrypt = aria_avx512_ecb_decrypt,
}, {
- .base.cra_name = "__ctr(aria)",
- .base.cra_driver_name = "__ctr-aria-avx512",
+ .base.cra_name = "ctr(aria)",
+ .base.cra_driver_name = "ctr-aria-avx512",
.base.cra_priority = 600,
- .base.cra_flags = CRYPTO_ALG_INTERNAL |
- CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE,
+ .base.cra_flags = CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct aria_ctx),
.base.cra_module = THIS_MODULE,
@@ -197,8 +194,6 @@ static struct skcipher_alg aria_algs[] = {
}
};
-static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)];
-
static int __init aria_avx512_init(void)
{
const char *feature_name;
@@ -229,15 +224,12 @@ static int __init aria_avx512_init(void)
aria_ops.aria_decrypt_64way = aria_gfni_avx512_decrypt_64way;
aria_ops.aria_ctr_crypt_64way = aria_gfni_avx512_ctr_crypt_64way;
- return simd_register_skciphers_compat(aria_algs,
- ARRAY_SIZE(aria_algs),
- aria_simd_algs);
+ return crypto_register_skciphers(aria_algs, ARRAY_SIZE(aria_algs));
}
static void __exit aria_avx512_exit(void)
{
- simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs),
- aria_simd_algs);
+ crypto_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs));
}
module_init(aria_avx512_init);
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index e7e4d64e9577..2d2f4e16537c 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -6,7 +6,6 @@
*/
#include <crypto/algapi.h>
-#include <crypto/internal/simd.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/module.h>
@@ -69,10 +68,9 @@ static int cbc_decrypt(struct skcipher_request *req)
static struct skcipher_alg camellia_algs[] = {
{
- .base.cra_name = "__ecb(camellia)",
- .base.cra_driver_name = "__ecb-camellia-aesni-avx2",
+ .base.cra_name = "ecb(camellia)",
+ .base.cra_driver_name = "ecb-camellia-aesni-avx2",
.base.cra_priority = 500,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct camellia_ctx),
.base.cra_module = THIS_MODULE,
@@ -82,10 +80,9 @@ static struct skcipher_alg camellia_algs[] = {
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
- .base.cra_name = "__cbc(camellia)",
- .base.cra_driver_name = "__cbc-camellia-aesni-avx2",
+ .base.cra_name = "cbc(camellia)",
+ .base.cra_driver_name = "cbc-camellia-aesni-avx2",
.base.cra_priority = 500,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct camellia_ctx),
.base.cra_module = THIS_MODULE,
@@ -98,8 +95,6 @@ static struct skcipher_alg camellia_algs[] = {
},
};
-static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
-
static int __init camellia_aesni_init(void)
{
const char *feature_name;
@@ -118,15 +113,13 @@ static int __init camellia_aesni_init(void)
return -ENODEV;
}
- return simd_register_skciphers_compat(camellia_algs,
- ARRAY_SIZE(camellia_algs),
- camellia_simd_algs);
+ return crypto_register_skciphers(camellia_algs,
+ ARRAY_SIZE(camellia_algs));
}
static void __exit camellia_aesni_fini(void)
{
- simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
- camellia_simd_algs);
+ crypto_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs));
}
module_init(camellia_aesni_init);
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index c7ccf63e741e..a7d162388142 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -6,7 +6,6 @@
*/
#include <crypto/algapi.h>
-#include <crypto/internal/simd.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/module.h>
@@ -69,10 +68,9 @@ static int cbc_decrypt(struct skcipher_request *req)
static struct skcipher_alg camellia_algs[] = {
{
- .base.cra_name = "__ecb(camellia)",
- .base.cra_driver_name = "__ecb-camellia-aesni",
+ .base.cra_name = "ecb(camellia)",
+ .base.cra_driver_name = "ecb-camellia-aesni",
.base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct camellia_ctx),
.base.cra_module = THIS_MODULE,
@@ -82,10 +80,9 @@ static struct skcipher_alg camellia_algs[] = {
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
- .base.cra_name = "__cbc(camellia)",
- .base.cra_driver_name = "__cbc-camellia-aesni",
+ .base.cra_name = "cbc(camellia)",
+ .base.cra_driver_name = "cbc-camellia-aesni",
.base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct camellia_ctx),
.base.cra_module = THIS_MODULE,
@@ -98,8 +95,6 @@ static struct skcipher_alg camellia_algs[] = {
}
};
-static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
-
static int __init camellia_aesni_init(void)
{
const char *feature_name;
@@ -117,15 +112,13 @@ static int __init camellia_aesni_init(void)
return -ENODEV;
}
- return simd_register_skciphers_compat(camellia_algs,
- ARRAY_SIZE(camellia_algs),
- camellia_simd_algs);
+ return crypto_register_skciphers(camellia_algs,
+ ARRAY_SIZE(camellia_algs));
}
static void __exit camellia_aesni_fini(void)
{
- simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
- camellia_simd_algs);
+ crypto_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs));
}
module_init(camellia_aesni_init);
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 3976a87f92ad..3aca04d43b34 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -8,7 +8,6 @@
#include <crypto/algapi.h>
#include <crypto/cast5.h>
-#include <crypto/internal/simd.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/module.h>
@@ -64,10 +63,9 @@ static int cbc_decrypt(struct skcipher_request *req)
static struct skcipher_alg cast5_algs[] = {
{
- .base.cra_name = "__ecb(cast5)",
- .base.cra_driver_name = "__ecb-cast5-avx",
+ .base.cra_name = "ecb(cast5)",
+ .base.cra_driver_name = "ecb-cast5-avx",
.base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAST5_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct cast5_ctx),
.base.cra_module = THIS_MODULE,
@@ -77,10 +75,9 @@ static struct skcipher_alg cast5_algs[] = {
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
- .base.cra_name = "__cbc(cast5)",
- .base.cra_driver_name = "__cbc-cast5-avx",
+ .base.cra_name = "cbc(cast5)",
+ .base.cra_driver_name = "cbc-cast5-avx",
.base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAST5_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct cast5_ctx),
.base.cra_module = THIS_MODULE,
@@ -93,8 +90,6 @@ static struct skcipher_alg cast5_algs[] = {
}
};
-static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
-
static int __init cast5_init(void)
{
const char *feature_name;
@@ -105,15 +100,13 @@ static int __init cast5_init(void)
return -ENODEV;
}
- return simd_register_skciphers_compat(cast5_algs,
- ARRAY_SIZE(cast5_algs),
- cast5_simd_algs);
+ return crypto_register_skciphers(cast5_algs,
+ ARRAY_SIZE(cast5_algs));
}
static void __exit cast5_exit(void)
{
- simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs),
- cast5_simd_algs);
+ crypto_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs));
}
module_init(cast5_init);
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 7e2aea372349..c4dd28c30303 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -14,7 +14,6 @@
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/cast6.h>
-#include <crypto/internal/simd.h>
#include "ecb_cbc_helpers.h"
@@ -64,10 +63,9 @@ static int cbc_decrypt(struct skcipher_request *req)
static struct skcipher_alg cast6_algs[] = {
{
- .base.cra_name = "__ecb(cast6)",
- .base.cra_driver_name = "__ecb-cast6-avx",
+ .base.cra_name = "ecb(cast6)",
+ .base.cra_driver_name = "ecb-cast6-avx",
.base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAST6_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct cast6_ctx),
.base.cra_module = THIS_MODULE,
@@ -77,10 +75,9 @@ static struct skcipher_alg cast6_algs[] = {
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
- .base.cra_name = "__cbc(cast6)",
- .base.cra_driver_name = "__cbc-cast6-avx",
+ .base.cra_name = "cbc(cast6)",
+ .base.cra_driver_name = "cbc-cast6-avx",
.base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAST6_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct cast6_ctx),
.base.cra_module = THIS_MODULE,
@@ -93,8 +90,6 @@ static struct skcipher_alg cast6_algs[] = {
},
};
-static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)];
-
static int __init cast6_init(void)
{
const char *feature_name;
@@ -105,15 +100,12 @@ static int __init cast6_init(void)
return -ENODEV;
}
- return simd_register_skciphers_compat(cast6_algs,
- ARRAY_SIZE(cast6_algs),
- cast6_simd_algs);
+ return crypto_register_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs));
}
static void __exit cast6_exit(void)
{
- simd_unregister_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs),
- cast6_simd_algs);
+ crypto_unregister_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs));
}
module_init(cast6_init);
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
deleted file mode 100644
index 8bb74a272879..000000000000
--- a/arch/x86/crypto/chacha_glue.c
+++ /dev/null
@@ -1,311 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * x64 SIMD accelerated ChaCha and XChaCha stream ciphers,
- * including ChaCha20 (RFC7539)
- *
- * Copyright (C) 2015 Martin Willi
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
- unsigned int len, int nrounds);
-asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
- unsigned int len, int nrounds);
-asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
-
-asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
- unsigned int len, int nrounds);
-asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
- unsigned int len, int nrounds);
-asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
- unsigned int len, int nrounds);
-
-asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
- unsigned int len, int nrounds);
-asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
- unsigned int len, int nrounds);
-asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
- unsigned int len, int nrounds);
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
-
-static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
-{
- len = min(len, maxblocks * CHACHA_BLOCK_SIZE);
- return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
-}
-
-static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds)
-{
- if (IS_ENABLED(CONFIG_AS_AVX512) &&
- static_branch_likely(&chacha_use_avx512vl)) {
- while (bytes >= CHACHA_BLOCK_SIZE * 8) {
- chacha_8block_xor_avx512vl(state, dst, src, bytes,
- nrounds);
- bytes -= CHACHA_BLOCK_SIZE * 8;
- src += CHACHA_BLOCK_SIZE * 8;
- dst += CHACHA_BLOCK_SIZE * 8;
- state[12] += 8;
- }
- if (bytes > CHACHA_BLOCK_SIZE * 4) {
- chacha_8block_xor_avx512vl(state, dst, src, bytes,
- nrounds);
- state[12] += chacha_advance(bytes, 8);
- return;
- }
- if (bytes > CHACHA_BLOCK_SIZE * 2) {
- chacha_4block_xor_avx512vl(state, dst, src, bytes,
- nrounds);
- state[12] += chacha_advance(bytes, 4);
- return;
- }
- if (bytes) {
- chacha_2block_xor_avx512vl(state, dst, src, bytes,
- nrounds);
- state[12] += chacha_advance(bytes, 2);
- return;
- }
- }
-
- if (static_branch_likely(&chacha_use_avx2)) {
- while (bytes >= CHACHA_BLOCK_SIZE * 8) {
- chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
- bytes -= CHACHA_BLOCK_SIZE * 8;
- src += CHACHA_BLOCK_SIZE * 8;
- dst += CHACHA_BLOCK_SIZE * 8;
- state[12] += 8;
- }
- if (bytes > CHACHA_BLOCK_SIZE * 4) {
- chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
- state[12] += chacha_advance(bytes, 8);
- return;
- }
- if (bytes > CHACHA_BLOCK_SIZE * 2) {
- chacha_4block_xor_avx2(state, dst, src, bytes, nrounds);
- state[12] += chacha_advance(bytes, 4);
- return;
- }
- if (bytes > CHACHA_BLOCK_SIZE) {
- chacha_2block_xor_avx2(state, dst, src, bytes, nrounds);
- state[12] += chacha_advance(bytes, 2);
- return;
- }
- }
-
- while (bytes >= CHACHA_BLOCK_SIZE * 4) {
- chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
- bytes -= CHACHA_BLOCK_SIZE * 4;
- src += CHACHA_BLOCK_SIZE * 4;
- dst += CHACHA_BLOCK_SIZE * 4;
- state[12] += 4;
- }
- if (bytes > CHACHA_BLOCK_SIZE) {
- chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
- state[12] += chacha_advance(bytes, 4);
- return;
- }
- if (bytes) {
- chacha_block_xor_ssse3(state, dst, src, bytes, nrounds);
- state[12]++;
- }
-}
-
-void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
-{
- if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
- hchacha_block_generic(state, stream, nrounds);
- } else {
- kernel_fpu_begin();
- hchacha_block_ssse3(state, stream, nrounds);
- kernel_fpu_end();
- }
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
- int nrounds)
-{
- if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
- bytes <= CHACHA_BLOCK_SIZE)
- return chacha_crypt_generic(state, dst, src, bytes, nrounds);
-
- do {
- unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
-
- kernel_fpu_begin();
- chacha_dosimd(state, dst, src, todo, nrounds);
- kernel_fpu_end();
-
- bytes -= todo;
- src += todo;
- dst += todo;
- } while (bytes);
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-static int chacha_simd_stream_xor(struct skcipher_request *req,
- const struct chacha_ctx *ctx, const u8 *iv)
-{
- u32 state[CHACHA_STATE_WORDS] __aligned(8);
- struct skcipher_walk walk;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- chacha_init(state, ctx->key, iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- if (!static_branch_likely(&chacha_use_simd) ||
- !crypto_simd_usable()) {
- chacha_crypt_generic(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes,
- ctx->nrounds);
- } else {
- kernel_fpu_begin();
- chacha_dosimd(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes,
- ctx->nrounds);
- kernel_fpu_end();
- }
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static int chacha_simd(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return chacha_simd_stream_xor(req, ctx, req->iv);
-}
-
-static int xchacha_simd(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- u32 state[CHACHA_STATE_WORDS] __aligned(8);
- struct chacha_ctx subctx;
- u8 real_iv[16];
-
- chacha_init(state, ctx->key, req->iv);
-
- if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
- kernel_fpu_begin();
- hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
- kernel_fpu_end();
- } else {
- hchacha_block_generic(state, subctx.key, ctx->nrounds);
- }
- subctx.nrounds = ctx->nrounds;
-
- memcpy(&real_iv[0], req->iv + 24, 8);
- memcpy(&real_iv[8], req->iv + 16, 8);
- return chacha_simd_stream_xor(req, &subctx, real_iv);
-}
-
-static struct skcipher_alg algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-simd",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = chacha_simd,
- .decrypt = chacha_simd,
- }, {
- .base.cra_name = "xchacha20",
- .base.cra_driver_name = "xchacha20-simd",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = xchacha_simd,
- .decrypt = xchacha_simd,
- }, {
- .base.cra_name = "xchacha12",
- .base.cra_driver_name = "xchacha12-simd",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha12_setkey,
- .encrypt = xchacha_simd,
- .decrypt = xchacha_simd,
- },
-};
-
-static int __init chacha_simd_mod_init(void)
-{
- if (!boot_cpu_has(X86_FEATURE_SSSE3))
- return 0;
-
- static_branch_enable(&chacha_use_simd);
-
- if (boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
- static_branch_enable(&chacha_use_avx2);
-
- if (IS_ENABLED(CONFIG_AS_AVX512) &&
- boot_cpu_has(X86_FEATURE_AVX512VL) &&
- boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
- static_branch_enable(&chacha_use_avx512vl);
- }
- return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
- crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
-}
-
-static void __exit chacha_simd_mod_fini(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && boot_cpu_has(X86_FEATURE_SSSE3))
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-module_init(chacha_simd_mod_init);
-module_exit(chacha_simd_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (x64 SIMD accelerated)");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-simd");
-MODULE_ALIAS_CRYPTO("xchacha20");
-MODULE_ALIAS_CRYPTO("xchacha20-simd");
-MODULE_ALIAS_CRYPTO("xchacha12");
-MODULE_ALIAS_CRYPTO("xchacha12-simd");
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 99cb983ded9e..c4fbaa82ed7a 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -103,8 +103,8 @@ SYM_FUNC_START(clmul_ghash_mul)
SYM_FUNC_END(clmul_ghash_mul)
/*
- * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
- * const le128 *shash);
+ * int clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
+ * const le128 *shash);
*/
SYM_FUNC_START(clmul_ghash_update)
FRAME_BEGIN
@@ -127,6 +127,7 @@ SYM_FUNC_START(clmul_ghash_update)
pshufb BSWAP, DATA
movups DATA, (%rdi)
.Lupdate_just_ret:
+ mov %rdx, %rax
FRAME_END
RET
SYM_FUNC_END(clmul_ghash_update)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index c759ec808bf1..aea5d4d06be7 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -7,41 +7,27 @@
* Author: Huang Ying <ying.huang@intel.com>
*/
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/crypto.h>
-#include <crypto/algapi.h>
-#include <crypto/cryptd.h>
-#include <crypto/gf128mul.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
+#include <crypto/b128ops.h>
+#include <crypto/ghash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/utils.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
#include <linux/unaligned.h>
-#define GHASH_BLOCK_SIZE 16
-#define GHASH_DIGEST_SIZE 16
+asmlinkage void clmul_ghash_mul(char *dst, const le128 *shash);
-void clmul_ghash_mul(char *dst, const le128 *shash);
+asmlinkage int clmul_ghash_update(char *dst, const char *src,
+ unsigned int srclen, const le128 *shash);
-void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
- const le128 *shash);
-
-struct ghash_async_ctx {
- struct cryptd_ahash *cryptd_tfm;
-};
-
-struct ghash_ctx {
+struct x86_ghash_ctx {
le128 shash;
};
-struct ghash_desc_ctx {
- u8 buffer[GHASH_BLOCK_SIZE];
- u32 bytes;
-};
-
static int ghash_init(struct shash_desc *desc)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
@@ -54,7 +40,7 @@ static int ghash_init(struct shash_desc *desc)
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
- struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+ struct x86_ghash_ctx *ctx = crypto_shash_ctx(tfm);
u64 a, b;
if (keylen != GHASH_BLOCK_SIZE)
@@ -95,64 +81,38 @@ static int ghash_setkey(struct crypto_shash *tfm,
static int ghash_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
+ struct x86_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
u8 *dst = dctx->buffer;
+ int remain;
kernel_fpu_begin();
- if (dctx->bytes) {
- int n = min(srclen, dctx->bytes);
- u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
-
- dctx->bytes -= n;
- srclen -= n;
-
- while (n--)
- *pos++ ^= *src++;
-
- if (!dctx->bytes)
- clmul_ghash_mul(dst, &ctx->shash);
- }
-
- clmul_ghash_update(dst, src, srclen, &ctx->shash);
+ remain = clmul_ghash_update(dst, src, srclen, &ctx->shash);
kernel_fpu_end();
-
- if (srclen & 0xf) {
- src += srclen - (srclen & 0xf);
- srclen &= 0xf;
- dctx->bytes = GHASH_BLOCK_SIZE - srclen;
- while (srclen--)
- *dst++ ^= *src++;
- }
-
- return 0;
+ return remain;
}
-static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+static void ghash_flush(struct x86_ghash_ctx *ctx, struct ghash_desc_ctx *dctx,
+ const u8 *src, unsigned int len)
{
u8 *dst = dctx->buffer;
- if (dctx->bytes) {
- u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
-
- while (dctx->bytes--)
- *tmp++ ^= 0;
-
- kernel_fpu_begin();
+ kernel_fpu_begin();
+ if (len) {
+ crypto_xor(dst, src, len);
clmul_ghash_mul(dst, &ctx->shash);
- kernel_fpu_end();
}
-
- dctx->bytes = 0;
+ kernel_fpu_end();
}
-static int ghash_final(struct shash_desc *desc, u8 *dst)
+static int ghash_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *dst)
{
+ struct x86_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
u8 *buf = dctx->buffer;
- ghash_flush(ctx, dctx);
+ ghash_flush(ctx, dctx, src, len);
memcpy(dst, buf, GHASH_BLOCK_SIZE);
return 0;
@@ -162,186 +122,20 @@ static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
- .final = ghash_final,
+ .finup = ghash_finup,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
.base = {
- .cra_name = "__ghash",
- .cra_driver_name = "__ghash-pclmulqdqni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-pclmulqdqni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = GHASH_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ghash_ctx),
+ .cra_ctxsize = sizeof(struct x86_ghash_ctx),
.cra_module = THIS_MODULE,
},
};
-static int ghash_async_init(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *cryptd_req = ahash_request_ctx(req);
- struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
- struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
- struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
-
- desc->tfm = child;
- return crypto_shash_init(desc);
-}
-
-static void ghash_init_cryptd_req(struct ahash_request *req)
-{
- struct ahash_request *cryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
- struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
- ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
- ahash_request_set_callback(cryptd_req, req->base.flags,
- req->base.complete, req->base.data);
- ahash_request_set_crypt(cryptd_req, req->src, req->result,
- req->nbytes);
-}
-
-static int ghash_async_update(struct ahash_request *req)
-{
- struct ahash_request *cryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
- struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
- if (!crypto_simd_usable() ||
- (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
- ghash_init_cryptd_req(req);
- return crypto_ahash_update(cryptd_req);
- } else {
- struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
- return shash_ahash_update(req, desc);
- }
-}
-
-static int ghash_async_final(struct ahash_request *req)
-{
- struct ahash_request *cryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
- struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
- if (!crypto_simd_usable() ||
- (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
- ghash_init_cryptd_req(req);
- return crypto_ahash_final(cryptd_req);
- } else {
- struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
- return crypto_shash_final(desc, req->result);
- }
-}
-
-static int ghash_async_import(struct ahash_request *req, const void *in)
-{
- struct ahash_request *cryptd_req = ahash_request_ctx(req);
- struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-
- ghash_async_init(req);
- memcpy(dctx, in, sizeof(*dctx));
- return 0;
-
-}
-
-static int ghash_async_export(struct ahash_request *req, void *out)
-{
- struct ahash_request *cryptd_req = ahash_request_ctx(req);
- struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-
- memcpy(out, dctx, sizeof(*dctx));
- return 0;
-
-}
-
-static int ghash_async_digest(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *cryptd_req = ahash_request_ctx(req);
- struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
- if (!crypto_simd_usable() ||
- (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
- ghash_init_cryptd_req(req);
- return crypto_ahash_digest(cryptd_req);
- } else {
- struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
- struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
-
- desc->tfm = child;
- return shash_ahash_digest(req, desc);
- }
-}
-
-static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
- struct crypto_ahash *child = &ctx->cryptd_tfm->base;
-
- crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
- crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
- & CRYPTO_TFM_REQ_MASK);
- return crypto_ahash_setkey(child, key, keylen);
-}
-
-static int ghash_async_init_tfm(struct crypto_tfm *tfm)
-{
- struct cryptd_ahash *cryptd_tfm;
- struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
-
- cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni",
- CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ctx->cryptd_tfm = cryptd_tfm;
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- crypto_ahash_reqsize(&cryptd_tfm->base));
-
- return 0;
-}
-
-static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
-{
- struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
-
- cryptd_free_ahash(ctx->cryptd_tfm);
-}
-
-static struct ahash_alg ghash_async_alg = {
- .init = ghash_async_init,
- .update = ghash_async_update,
- .final = ghash_async_final,
- .setkey = ghash_async_setkey,
- .digest = ghash_async_digest,
- .export = ghash_async_export,
- .import = ghash_async_import,
- .halg = {
- .digestsize = GHASH_DIGEST_SIZE,
- .statesize = sizeof(struct ghash_desc_ctx),
- .base = {
- .cra_name = "ghash",
- .cra_driver_name = "ghash-clmulni",
- .cra_priority = 400,
- .cra_ctxsize = sizeof(struct ghash_async_ctx),
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = GHASH_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_init = ghash_async_init_tfm,
- .cra_exit = ghash_async_exit_tfm,
- },
- },
-};
-
static const struct x86_cpu_id pcmul_cpu_id[] = {
X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */
{}
@@ -350,29 +144,14 @@ MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
static int __init ghash_pclmulqdqni_mod_init(void)
{
- int err;
-
if (!x86_match_cpu(pcmul_cpu_id))
return -ENODEV;
- err = crypto_register_shash(&ghash_alg);
- if (err)
- goto err_out;
- err = crypto_register_ahash(&ghash_async_alg);
- if (err)
- goto err_shash;
-
- return 0;
-
-err_shash:
- crypto_unregister_shash(&ghash_alg);
-err_out:
- return err;
+ return crypto_register_shash(&ghash_alg);
}
static void __exit ghash_pclmulqdqni_mod_exit(void)
{
- crypto_unregister_ahash(&ghash_async_alg);
crypto_unregister_shash(&ghash_alg);
}
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
deleted file mode 100644
index 08ff4b489f7e..000000000000
--- a/arch/x86/crypto/poly1305_glue.c
+++ /dev/null
@@ -1,290 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
-/*
- * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/poly1305.h>
-#include <crypto/internal/simd.h>
-#include <linux/crypto.h>
-#include <linux/jump_label.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
-
-asmlinkage void poly1305_init_x86_64(void *ctx,
- const u8 key[POLY1305_BLOCK_SIZE]);
-asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
- const size_t len, const u32 padbit);
-asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
- const u32 nonce[4]);
-asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
- const u32 nonce[4]);
-asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
- const u32 padbit);
-asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
- const u32 padbit);
-asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
- const size_t len, const u32 padbit);
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512);
-
-struct poly1305_arch_internal {
- union {
- struct {
- u32 h[5];
- u32 is_base2_26;
- };
- u64 hs[3];
- };
- u64 r[2];
- u64 pad;
- struct { u32 r2, r1, r4, r3; } rn[9];
-};
-
-/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
- * the unfortunate situation of using AVX and then having to go back to scalar
- * -- because the user is silly and has called the update function from two
- * separate contexts -- then we need to convert back to the original base before
- * proceeding. It is possible to reason that the initial reduction below is
- * sufficient given the implementation invariants. However, for an avoidance of
- * doubt and because this is not performance critical, we do the full reduction
- * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py
- */
-static void convert_to_base2_64(void *ctx)
-{
- struct poly1305_arch_internal *state = ctx;
- u32 cy;
-
- if (!state->is_base2_26)
- return;
-
- cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
- cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
- cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
- cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
- state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
- state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
- state->hs[2] = state->h[4] >> 24;
-#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
- cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
- state->hs[2] &= 3;
- state->hs[0] += cy;
- state->hs[1] += (cy = ULT(state->hs[0], cy));
- state->hs[2] += ULT(state->hs[1], cy);
-#undef ULT
- state->is_base2_26 = 0;
-}
-
-static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE])
-{
- poly1305_init_x86_64(ctx, key);
-}
-
-static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
- const u32 padbit)
-{
- struct poly1305_arch_internal *state = ctx;
-
- /* SIMD disables preemption, so relax after processing each page. */
- BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
- SZ_4K % POLY1305_BLOCK_SIZE);
-
- if (!static_branch_likely(&poly1305_use_avx) ||
- (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
- !crypto_simd_usable()) {
- convert_to_base2_64(ctx);
- poly1305_blocks_x86_64(ctx, inp, len, padbit);
- return;
- }
-
- do {
- const size_t bytes = min_t(size_t, len, SZ_4K);
-
- kernel_fpu_begin();
- if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
- poly1305_blocks_avx512(ctx, inp, bytes, padbit);
- else if (static_branch_likely(&poly1305_use_avx2))
- poly1305_blocks_avx2(ctx, inp, bytes, padbit);
- else
- poly1305_blocks_avx(ctx, inp, bytes, padbit);
- kernel_fpu_end();
-
- len -= bytes;
- inp += bytes;
- } while (len);
-}
-
-static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
- const u32 nonce[4])
-{
- if (!static_branch_likely(&poly1305_use_avx))
- poly1305_emit_x86_64(ctx, mac, nonce);
- else
- poly1305_emit_avx(ctx, mac, nonce);
-}
-
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
-{
- poly1305_simd_init(&dctx->h, key);
- dctx->s[0] = get_unaligned_le32(&key[16]);
- dctx->s[1] = get_unaligned_le32(&key[20]);
- dctx->s[2] = get_unaligned_le32(&key[24]);
- dctx->s[3] = get_unaligned_le32(&key[28]);
- dctx->buflen = 0;
- dctx->sset = true;
-}
-EXPORT_SYMBOL(poly1305_init_arch);
-
-static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
- const u8 *inp, unsigned int len)
-{
- unsigned int acc = 0;
- if (unlikely(!dctx->sset)) {
- if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
- poly1305_simd_init(&dctx->h, inp);
- inp += POLY1305_BLOCK_SIZE;
- len -= POLY1305_BLOCK_SIZE;
- acc += POLY1305_BLOCK_SIZE;
- dctx->rset = 1;
- }
- if (len >= POLY1305_BLOCK_SIZE) {
- dctx->s[0] = get_unaligned_le32(&inp[0]);
- dctx->s[1] = get_unaligned_le32(&inp[4]);
- dctx->s[2] = get_unaligned_le32(&inp[8]);
- dctx->s[3] = get_unaligned_le32(&inp[12]);
- acc += POLY1305_BLOCK_SIZE;
- dctx->sset = true;
- }
- }
- return acc;
-}
-
-void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
- unsigned int srclen)
-{
- unsigned int bytes, used;
-
- if (unlikely(dctx->buflen)) {
- bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
- memcpy(dctx->buf + dctx->buflen, src, bytes);
- src += bytes;
- srclen -= bytes;
- dctx->buflen += bytes;
-
- if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf, POLY1305_BLOCK_SIZE)))
- poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
- dctx->buflen = 0;
- }
- }
-
- if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
- srclen -= bytes;
- used = crypto_poly1305_setdctxkey(dctx, src, bytes);
- if (likely(bytes - used))
- poly1305_simd_blocks(&dctx->h, src + used, bytes - used, 1);
- src += bytes;
- }
-
- if (unlikely(srclen)) {
- dctx->buflen = srclen;
- memcpy(dctx->buf, src, srclen);
- }
-}
-EXPORT_SYMBOL(poly1305_update_arch);
-
-void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
-{
- if (unlikely(dctx->buflen)) {
- dctx->buf[dctx->buflen++] = 1;
- memset(dctx->buf + dctx->buflen, 0,
- POLY1305_BLOCK_SIZE - dctx->buflen);
- poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
- }
-
- poly1305_simd_emit(&dctx->h, dst, dctx->s);
- memzero_explicit(dctx, sizeof(*dctx));
-}
-EXPORT_SYMBOL(poly1305_final_arch);
-
-static int crypto_poly1305_init(struct shash_desc *desc)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- *dctx = (struct poly1305_desc_ctx){};
- return 0;
-}
-
-static int crypto_poly1305_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- poly1305_update_arch(dctx, src, srclen);
- return 0;
-}
-
-static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (unlikely(!dctx->sset))
- return -ENOKEY;
-
- poly1305_final_arch(dctx, dst);
- return 0;
-}
-
-static struct shash_alg alg = {
- .digestsize = POLY1305_DIGEST_SIZE,
- .init = crypto_poly1305_init,
- .update = crypto_poly1305_update,
- .final = crypto_poly1305_final,
- .descsize = sizeof(struct poly1305_desc_ctx),
- .base = {
- .cra_name = "poly1305",
- .cra_driver_name = "poly1305-simd",
- .cra_priority = 300,
- .cra_blocksize = POLY1305_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- },
-};
-
-static int __init poly1305_simd_mod_init(void)
-{
- if (boot_cpu_has(X86_FEATURE_AVX) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
- static_branch_enable(&poly1305_use_avx);
- if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
- static_branch_enable(&poly1305_use_avx2);
- if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) &&
- /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */
- boot_cpu_data.x86_vfm != INTEL_SKYLAKE_X)
- static_branch_enable(&poly1305_use_avx512);
- return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0;
-}
-
-static void __exit poly1305_simd_mod_exit(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
- crypto_unregister_shash(&alg);
-}
-
-module_init(poly1305_simd_mod_init);
-module_exit(poly1305_simd_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
-MODULE_DESCRIPTION("Poly1305 authenticator");
-MODULE_ALIAS_CRYPTO("poly1305");
-MODULE_ALIAS_CRYPTO("poly1305-simd");
diff --git a/arch/x86/crypto/polyval-clmulni_glue.c b/arch/x86/crypto/polyval-clmulni_glue.c
index 8fa58b0f3cb3..6b466867f91a 100644
--- a/arch/x86/crypto/polyval-clmulni_glue.c
+++ b/arch/x86/crypto/polyval-clmulni_glue.c
@@ -16,16 +16,15 @@
* operations.
*/
-#include <crypto/algapi.h>
+#include <asm/cpu_device_id.h>
+#include <asm/fpu/api.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
#include <crypto/polyval.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
+#include <crypto/utils.h>
+#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
+#include <linux/string.h>
#define POLYVAL_ALIGN 16
#define POLYVAL_ALIGN_ATTR __aligned(POLYVAL_ALIGN)
@@ -42,7 +41,6 @@ struct polyval_tfm_ctx {
struct polyval_desc_ctx {
u8 buffer[POLYVAL_BLOCK_SIZE];
- u32 bytes;
};
asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
@@ -57,25 +55,16 @@ static inline struct polyval_tfm_ctx *polyval_tfm_ctx(struct crypto_shash *tfm)
static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
const u8 *in, size_t nblocks, u8 *accumulator)
{
- if (likely(crypto_simd_usable())) {
- kernel_fpu_begin();
- clmul_polyval_update(keys, in, nblocks, accumulator);
- kernel_fpu_end();
- } else {
- polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
- nblocks, accumulator);
- }
+ kernel_fpu_begin();
+ clmul_polyval_update(keys, in, nblocks, accumulator);
+ kernel_fpu_end();
}
static void internal_polyval_mul(u8 *op1, const u8 *op2)
{
- if (likely(crypto_simd_usable())) {
- kernel_fpu_begin();
- clmul_polyval_mul(op1, op2);
- kernel_fpu_end();
- } else {
- polyval_mul_non4k(op1, op2);
- }
+ kernel_fpu_begin();
+ clmul_polyval_mul(op1, op2);
+ kernel_fpu_end();
}
static int polyval_x86_setkey(struct crypto_shash *tfm,
@@ -112,49 +101,27 @@ static int polyval_x86_update(struct shash_desc *desc,
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm);
- u8 *pos;
unsigned int nblocks;
- unsigned int n;
-
- if (dctx->bytes) {
- n = min(srclen, dctx->bytes);
- pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
-
- dctx->bytes -= n;
- srclen -= n;
-
- while (n--)
- *pos++ ^= *src++;
- if (!dctx->bytes)
- internal_polyval_mul(dctx->buffer,
- tctx->key_powers[NUM_KEY_POWERS-1]);
- }
-
- while (srclen >= POLYVAL_BLOCK_SIZE) {
+ do {
/* Allow rescheduling every 4K bytes. */
nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
internal_polyval_update(tctx, src, nblocks, dctx->buffer);
srclen -= nblocks * POLYVAL_BLOCK_SIZE;
src += nblocks * POLYVAL_BLOCK_SIZE;
- }
+ } while (srclen >= POLYVAL_BLOCK_SIZE);
- if (srclen) {
- dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
- pos = dctx->buffer;
- while (srclen--)
- *pos++ ^= *src++;
- }
-
- return 0;
+ return srclen;
}
-static int polyval_x86_final(struct shash_desc *desc, u8 *dst)
+static int polyval_x86_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *dst)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm);
- if (dctx->bytes) {
+ if (len) {
+ crypto_xor(dctx->buffer, src, len);
internal_polyval_mul(dctx->buffer,
tctx->key_powers[NUM_KEY_POWERS-1]);
}
@@ -168,13 +135,14 @@ static struct shash_alg polyval_alg = {
.digestsize = POLYVAL_DIGEST_SIZE,
.init = polyval_x86_init,
.update = polyval_x86_update,
- .final = polyval_x86_final,
+ .finup = polyval_x86_finup,
.setkey = polyval_x86_setkey,
.descsize = sizeof(struct polyval_desc_ctx),
.base = {
.cra_name = "polyval",
.cra_driver_name = "polyval-clmulni",
.cra_priority = 200,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = POLYVAL_BLOCK_SIZE,
.cra_ctxsize = POLYVAL_CTX_SIZE,
.cra_module = THIS_MODULE,
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 347e97f4b713..f5f2121b7956 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -10,7 +10,6 @@
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
-#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
#include "serpent-avx.h"
@@ -65,10 +64,9 @@ static int cbc_decrypt(struct skcipher_request *req)
static struct skcipher_alg serpent_algs[] = {
{
- .base.cra_name = "__ecb(serpent)",
- .base.cra_driver_name = "__ecb-serpent-avx2",
+ .base.cra_name = "ecb(serpent)",
+ .base.cra_driver_name = "ecb-serpent-avx2",
.base.cra_priority = 600,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = SERPENT_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct serpent_ctx),
.base.cra_module = THIS_MODULE,
@@ -78,10 +76,9 @@ static struct skcipher_alg serpent_algs[] = {
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
- .base.cra_name = "__cbc(serpent)",
- .base.cra_driver_name = "__cbc-serpent-avx2",
+ .base.cra_name = "cbc(serpent)",
+ .base.cra_driver_name = "cbc-serpent-avx2",
.base.cra_priority = 600,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = SERPENT_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct serpent_ctx),
.base.cra_module = THIS_MODULE,
@@ -94,8 +91,6 @@ static struct skcipher_alg serpent_algs[] = {
},
};
-static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
-
static int __init serpent_avx2_init(void)
{
const char *feature_name;
@@ -110,15 +105,13 @@ static int __init serpent_avx2_init(void)
return -ENODEV;
}
- return simd_register_skciphers_compat(serpent_algs,
- ARRAY_SIZE(serpent_algs),
- serpent_simd_algs);
+ return crypto_register_skciphers(serpent_algs,
+ ARRAY_SIZE(serpent_algs));
}
static void __exit serpent_avx2_fini(void)
{
- simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
- serpent_simd_algs);
+ crypto_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs));
}
module_init(serpent_avx2_init);
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 6c248e1ea4ef..e640abc1cb8a 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -13,7 +13,6 @@
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
-#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
#include "serpent-avx.h"
@@ -71,10 +70,9 @@ static int cbc_decrypt(struct skcipher_request *req)
static struct skcipher_alg serpent_algs[] = {
{
- .base.cra_name = "__ecb(serpent)",
- .base.cra_driver_name = "__ecb-serpent-avx",
+ .base.cra_name = "ecb(serpent)",
+ .base.cra_driver_name = "ecb-serpent-avx",
.base.cra_priority = 500,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = SERPENT_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct serpent_ctx),
.base.cra_module = THIS_MODULE,
@@ -84,10 +82,9 @@ static struct skcipher_alg serpent_algs[] = {
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
- .base.cra_name = "__cbc(serpent)",
- .base.cra_driver_name = "__cbc-serpent-avx",
+ .base.cra_name = "cbc(serpent)",
+ .base.cra_driver_name = "cbc-serpent-avx",
.base.cra_priority = 500,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = SERPENT_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct serpent_ctx),
.base.cra_module = THIS_MODULE,
@@ -100,8 +97,6 @@ static struct skcipher_alg serpent_algs[] = {
},
};
-static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
-
static int __init serpent_init(void)
{
const char *feature_name;
@@ -112,15 +107,13 @@ static int __init serpent_init(void)
return -ENODEV;
}
- return simd_register_skciphers_compat(serpent_algs,
- ARRAY_SIZE(serpent_algs),
- serpent_simd_algs);
+ return crypto_register_skciphers(serpent_algs,
+ ARRAY_SIZE(serpent_algs));
}
static void __exit serpent_exit(void)
{
- simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
- serpent_simd_algs);
+ crypto_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs));
}
module_init(serpent_init);
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index d78f37e9b2cf..80ee17ec21b4 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -18,7 +18,6 @@
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/b128ops.h>
-#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
#include "serpent-sse2.h"
@@ -74,10 +73,9 @@ static int cbc_decrypt(struct skcipher_request *req)
static struct skcipher_alg serpent_algs[] = {
{
- .base.cra_name = "__ecb(serpent)",
- .base.cra_driver_name = "__ecb-serpent-sse2",
+ .base.cra_name = "ecb(serpent)",
+ .base.cra_driver_name = "ecb-serpent-sse2",
.base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = SERPENT_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct serpent_ctx),
.base.cra_module = THIS_MODULE,
@@ -87,10 +85,9 @@ static struct skcipher_alg serpent_algs[] = {
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
- .base.cra_name = "__cbc(serpent)",
- .base.cra_driver_name = "__cbc-serpent-sse2",
+ .base.cra_name = "cbc(serpent)",
+ .base.cra_driver_name = "cbc-serpent-sse2",
.base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = SERPENT_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct serpent_ctx),
.base.cra_module = THIS_MODULE,
@@ -103,8 +100,6 @@ static struct skcipher_alg serpent_algs[] = {
},
};
-static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
-
static int __init serpent_sse2_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2)) {
@@ -112,15 +107,13 @@ static int __init serpent_sse2_init(void)
return -ENODEV;
}
- return simd_register_skciphers_compat(serpent_algs,
- ARRAY_SIZE(serpent_algs),
- serpent_simd_algs);
+ return crypto_register_skciphers(serpent_algs,
+ ARRAY_SIZE(serpent_algs));
}
static void __exit serpent_sse2_exit(void)
{
- simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
- serpent_simd_algs);
+ crypto_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs));
}
module_init(serpent_sse2_init);
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index ab8bc54f254d..0a912bfc86c5 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -16,21 +16,17 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <asm/cpu_device_id.h>
+#include <asm/simd.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
static const struct x86_cpu_id module_cpu_ids[] = {
-#ifdef CONFIG_AS_SHA1_NI
X86_MATCH_FEATURE(X86_FEATURE_SHA_NI, NULL),
-#endif
X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
@@ -38,14 +34,10 @@ static const struct x86_cpu_id module_cpu_ids[] = {
};
MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
-static int sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len, sha1_block_fn *sha1_xform)
+static inline int sha1_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len, sha1_block_fn *sha1_xform)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable() ||
- (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
- return crypto_sha1_update(desc, data, len);
+ int remain;
/*
* Make sure struct sha1_state begins directly with the SHA1
@@ -54,22 +46,18 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
kernel_fpu_begin();
- sha1_base_do_update(desc, data, len, sha1_xform);
+ remain = sha1_base_do_update_blocks(desc, data, len, sha1_xform);
kernel_fpu_end();
- return 0;
+ return remain;
}
-static int sha1_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out, sha1_block_fn *sha1_xform)
+static inline int sha1_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out,
+ sha1_block_fn *sha1_xform)
{
- if (!crypto_simd_usable())
- return crypto_sha1_finup(desc, data, len, out);
-
kernel_fpu_begin();
- if (len)
- sha1_base_do_update(desc, data, len, sha1_xform);
- sha1_base_do_finalize(desc, sha1_xform);
+ sha1_base_do_finup(desc, data, len, sha1_xform);
kernel_fpu_end();
return sha1_base_finish(desc, out);
@@ -90,23 +78,17 @@ static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
return sha1_finup(desc, data, len, out, sha1_transform_ssse3);
}
-/* Add padding and return the message digest. */
-static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
-{
- return sha1_ssse3_finup(desc, NULL, 0, out);
-}
-
static struct shash_alg sha1_ssse3_alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = sha1_ssse3_update,
- .final = sha1_ssse3_final,
.finup = sha1_ssse3_finup,
- .descsize = sizeof(struct sha1_state),
+ .descsize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ssse3",
.cra_priority = 150,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -140,22 +122,17 @@ static int sha1_avx_finup(struct shash_desc *desc, const u8 *data,
return sha1_finup(desc, data, len, out, sha1_transform_avx);
}
-static int sha1_avx_final(struct shash_desc *desc, u8 *out)
-{
- return sha1_avx_finup(desc, NULL, 0, out);
-}
-
static struct shash_alg sha1_avx_alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = sha1_avx_update,
- .final = sha1_avx_final,
.finup = sha1_avx_finup,
- .descsize = sizeof(struct sha1_state),
+ .descsize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-avx",
.cra_priority = 160,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -200,8 +177,8 @@ static bool avx2_usable(void)
return false;
}
-static void sha1_apply_transform_avx2(struct sha1_state *state,
- const u8 *data, int blocks)
+static inline void sha1_apply_transform_avx2(struct sha1_state *state,
+ const u8 *data, int blocks)
{
/* Select the optimal transform based on data block size */
if (blocks >= SHA1_AVX2_BLOCK_OPTSIZE)
@@ -222,22 +199,17 @@ static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data,
return sha1_finup(desc, data, len, out, sha1_apply_transform_avx2);
}
-static int sha1_avx2_final(struct shash_desc *desc, u8 *out)
-{
- return sha1_avx2_finup(desc, NULL, 0, out);
-}
-
static struct shash_alg sha1_avx2_alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = sha1_avx2_update,
- .final = sha1_avx2_final,
.finup = sha1_avx2_finup,
- .descsize = sizeof(struct sha1_state),
+ .descsize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-avx2",
.cra_priority = 170,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -256,7 +228,6 @@ static void unregister_sha1_avx2(void)
crypto_unregister_shash(&sha1_avx2_alg);
}
-#ifdef CONFIG_AS_SHA1_NI
asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data,
int rounds);
@@ -272,22 +243,17 @@ static int sha1_ni_finup(struct shash_desc *desc, const u8 *data,
return sha1_finup(desc, data, len, out, sha1_ni_transform);
}
-static int sha1_ni_final(struct shash_desc *desc, u8 *out)
-{
- return sha1_ni_finup(desc, NULL, 0, out);
-}
-
static struct shash_alg sha1_ni_alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = sha1_ni_update,
- .final = sha1_ni_final,
.finup = sha1_ni_finup,
- .descsize = sizeof(struct sha1_state),
+ .descsize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ni",
.cra_priority = 250,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -306,11 +272,6 @@ static void unregister_sha1_ni(void)
crypto_unregister_shash(&sha1_ni_alg);
}
-#else
-static inline int register_sha1_ni(void) { return 0; }
-static inline void unregister_sha1_ni(void) { }
-#endif
-
static int __init sha1_ssse3_mod_init(void)
{
if (!x86_match_cpu(module_cpu_ids))
@@ -360,6 +321,4 @@ MODULE_ALIAS_CRYPTO("sha1");
MODULE_ALIAS_CRYPTO("sha1-ssse3");
MODULE_ALIAS_CRYPTO("sha1-avx");
MODULE_ALIAS_CRYPTO("sha1-avx2");
-#ifdef CONFIG_AS_SHA1_NI
MODULE_ALIAS_CRYPTO("sha1-ni");
-#endif
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
deleted file mode 100644
index e04a43d9f7d5..000000000000
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ /dev/null
@@ -1,467 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Glue code for the SHA256 Secure Hash Algorithm assembler implementations
- * using SSSE3, AVX, AVX2, and SHA-NI instructions.
- *
- * This file is based on sha256_generic.c
- *
- * Copyright (C) 2013 Intel Corporation.
- *
- * Author:
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <crypto/sha2.h>
-#include <crypto/sha256_base.h>
-#include <linux/string.h>
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
-
-asmlinkage void sha256_transform_ssse3(struct sha256_state *state,
- const u8 *data, int blocks);
-
-static const struct x86_cpu_id module_cpu_ids[] = {
-#ifdef CONFIG_AS_SHA256_NI
- X86_MATCH_FEATURE(X86_FEATURE_SHA_NI, NULL),
-#endif
- X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
- X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
- X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
-
-static int _sha256_update(struct shash_desc *desc, const u8 *data,
- unsigned int len, sha256_block_fn *sha256_xform)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable() ||
- (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
- return crypto_sha256_update(desc, data, len);
-
- /*
- * Make sure struct sha256_state begins directly with the SHA256
- * 256-bit internal state, as this is what the asm functions expect.
- */
- BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
-
- kernel_fpu_begin();
- sha256_base_do_update(desc, data, len, sha256_xform);
- kernel_fpu_end();
-
- return 0;
-}
-
-static int sha256_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out, sha256_block_fn *sha256_xform)
-{
- if (!crypto_simd_usable())
- return crypto_sha256_finup(desc, data, len, out);
-
- kernel_fpu_begin();
- if (len)
- sha256_base_do_update(desc, data, len, sha256_xform);
- sha256_base_do_finalize(desc, sha256_xform);
- kernel_fpu_end();
-
- return sha256_base_finish(desc, out);
-}
-
-static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- return _sha256_update(desc, data, len, sha256_transform_ssse3);
-}
-
-static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return sha256_finup(desc, data, len, out, sha256_transform_ssse3);
-}
-
-/* Add padding and return the message digest. */
-static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
-{
- return sha256_ssse3_finup(desc, NULL, 0, out);
-}
-
-static int sha256_ssse3_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return sha256_base_init(desc) ?:
- sha256_ssse3_finup(desc, data, len, out);
-}
-
-static struct shash_alg sha256_ssse3_algs[] = { {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = sha256_ssse3_update,
- .final = sha256_ssse3_final,
- .finup = sha256_ssse3_finup,
- .digest = sha256_ssse3_digest,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name = "sha256-ssse3",
- .cra_priority = 150,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = sha256_ssse3_update,
- .final = sha256_ssse3_final,
- .finup = sha256_ssse3_finup,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name = "sha224-ssse3",
- .cra_priority = 150,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
-
-static int register_sha256_ssse3(void)
-{
- if (boot_cpu_has(X86_FEATURE_SSSE3))
- return crypto_register_shashes(sha256_ssse3_algs,
- ARRAY_SIZE(sha256_ssse3_algs));
- return 0;
-}
-
-static void unregister_sha256_ssse3(void)
-{
- if (boot_cpu_has(X86_FEATURE_SSSE3))
- crypto_unregister_shashes(sha256_ssse3_algs,
- ARRAY_SIZE(sha256_ssse3_algs));
-}
-
-asmlinkage void sha256_transform_avx(struct sha256_state *state,
- const u8 *data, int blocks);
-
-static int sha256_avx_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- return _sha256_update(desc, data, len, sha256_transform_avx);
-}
-
-static int sha256_avx_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return sha256_finup(desc, data, len, out, sha256_transform_avx);
-}
-
-static int sha256_avx_final(struct shash_desc *desc, u8 *out)
-{
- return sha256_avx_finup(desc, NULL, 0, out);
-}
-
-static int sha256_avx_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return sha256_base_init(desc) ?:
- sha256_avx_finup(desc, data, len, out);
-}
-
-static struct shash_alg sha256_avx_algs[] = { {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = sha256_avx_update,
- .final = sha256_avx_final,
- .finup = sha256_avx_finup,
- .digest = sha256_avx_digest,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name = "sha256-avx",
- .cra_priority = 160,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = sha256_avx_update,
- .final = sha256_avx_final,
- .finup = sha256_avx_finup,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name = "sha224-avx",
- .cra_priority = 160,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
-
-static bool avx_usable(void)
-{
- if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
- if (boot_cpu_has(X86_FEATURE_AVX))
- pr_info("AVX detected but unusable.\n");
- return false;
- }
-
- return true;
-}
-
-static int register_sha256_avx(void)
-{
- if (avx_usable())
- return crypto_register_shashes(sha256_avx_algs,
- ARRAY_SIZE(sha256_avx_algs));
- return 0;
-}
-
-static void unregister_sha256_avx(void)
-{
- if (avx_usable())
- crypto_unregister_shashes(sha256_avx_algs,
- ARRAY_SIZE(sha256_avx_algs));
-}
-
-asmlinkage void sha256_transform_rorx(struct sha256_state *state,
- const u8 *data, int blocks);
-
-static int sha256_avx2_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- return _sha256_update(desc, data, len, sha256_transform_rorx);
-}
-
-static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return sha256_finup(desc, data, len, out, sha256_transform_rorx);
-}
-
-static int sha256_avx2_final(struct shash_desc *desc, u8 *out)
-{
- return sha256_avx2_finup(desc, NULL, 0, out);
-}
-
-static int sha256_avx2_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return sha256_base_init(desc) ?:
- sha256_avx2_finup(desc, data, len, out);
-}
-
-static struct shash_alg sha256_avx2_algs[] = { {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = sha256_avx2_update,
- .final = sha256_avx2_final,
- .finup = sha256_avx2_finup,
- .digest = sha256_avx2_digest,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name = "sha256-avx2",
- .cra_priority = 170,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = sha256_avx2_update,
- .final = sha256_avx2_final,
- .finup = sha256_avx2_finup,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name = "sha224-avx2",
- .cra_priority = 170,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
-
-static bool avx2_usable(void)
-{
- if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
- boot_cpu_has(X86_FEATURE_BMI2))
- return true;
-
- return false;
-}
-
-static int register_sha256_avx2(void)
-{
- if (avx2_usable())
- return crypto_register_shashes(sha256_avx2_algs,
- ARRAY_SIZE(sha256_avx2_algs));
- return 0;
-}
-
-static void unregister_sha256_avx2(void)
-{
- if (avx2_usable())
- crypto_unregister_shashes(sha256_avx2_algs,
- ARRAY_SIZE(sha256_avx2_algs));
-}
-
-#ifdef CONFIG_AS_SHA256_NI
-asmlinkage void sha256_ni_transform(struct sha256_state *digest,
- const u8 *data, int rounds);
-
-static int sha256_ni_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- return _sha256_update(desc, data, len, sha256_ni_transform);
-}
-
-static int sha256_ni_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return sha256_finup(desc, data, len, out, sha256_ni_transform);
-}
-
-static int sha256_ni_final(struct shash_desc *desc, u8 *out)
-{
- return sha256_ni_finup(desc, NULL, 0, out);
-}
-
-static int sha256_ni_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return sha256_base_init(desc) ?:
- sha256_ni_finup(desc, data, len, out);
-}
-
-static struct shash_alg sha256_ni_algs[] = { {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = sha256_ni_update,
- .final = sha256_ni_final,
- .finup = sha256_ni_finup,
- .digest = sha256_ni_digest,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name = "sha256-ni",
- .cra_priority = 250,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = sha256_ni_update,
- .final = sha256_ni_final,
- .finup = sha256_ni_finup,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name = "sha224-ni",
- .cra_priority = 250,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
-
-static int register_sha256_ni(void)
-{
- if (boot_cpu_has(X86_FEATURE_SHA_NI))
- return crypto_register_shashes(sha256_ni_algs,
- ARRAY_SIZE(sha256_ni_algs));
- return 0;
-}
-
-static void unregister_sha256_ni(void)
-{
- if (boot_cpu_has(X86_FEATURE_SHA_NI))
- crypto_unregister_shashes(sha256_ni_algs,
- ARRAY_SIZE(sha256_ni_algs));
-}
-
-#else
-static inline int register_sha256_ni(void) { return 0; }
-static inline void unregister_sha256_ni(void) { }
-#endif
-
-static int __init sha256_ssse3_mod_init(void)
-{
- if (!x86_match_cpu(module_cpu_ids))
- return -ENODEV;
-
- if (register_sha256_ssse3())
- goto fail;
-
- if (register_sha256_avx()) {
- unregister_sha256_ssse3();
- goto fail;
- }
-
- if (register_sha256_avx2()) {
- unregister_sha256_avx();
- unregister_sha256_ssse3();
- goto fail;
- }
-
- if (register_sha256_ni()) {
- unregister_sha256_avx2();
- unregister_sha256_avx();
- unregister_sha256_ssse3();
- goto fail;
- }
-
- return 0;
-fail:
- return -ENODEV;
-}
-
-static void __exit sha256_ssse3_mod_fini(void)
-{
- unregister_sha256_ni();
- unregister_sha256_avx2();
- unregister_sha256_avx();
- unregister_sha256_ssse3();
-}
-
-module_init(sha256_ssse3_mod_init);
-module_exit(sha256_ssse3_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
-
-MODULE_ALIAS_CRYPTO("sha256");
-MODULE_ALIAS_CRYPTO("sha256-ssse3");
-MODULE_ALIAS_CRYPTO("sha256-avx");
-MODULE_ALIAS_CRYPTO("sha256-avx2");
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_ALIAS_CRYPTO("sha224-ssse3");
-MODULE_ALIAS_CRYPTO("sha224-avx");
-MODULE_ALIAS_CRYPTO("sha224-avx2");
-#ifdef CONFIG_AS_SHA256_NI
-MODULE_ALIAS_CRYPTO("sha256-ni");
-MODULE_ALIAS_CRYPTO("sha224-ni");
-#endif
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 6d3b85e53d0e..067684c54395 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -27,17 +27,13 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <asm/cpu_device_id.h>
+#include <asm/simd.h>
#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/types.h>
#include <crypto/sha2.h>
#include <crypto/sha512_base.h>
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
asmlinkage void sha512_transform_ssse3(struct sha512_state *state,
const u8 *data, int blocks);
@@ -45,11 +41,7 @@ asmlinkage void sha512_transform_ssse3(struct sha512_state *state,
static int sha512_update(struct shash_desc *desc, const u8 *data,
unsigned int len, sha512_block_fn *sha512_xform)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable() ||
- (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
- return crypto_sha512_update(desc, data, len);
+ int remain;
/*
* Make sure struct sha512_state begins directly with the SHA512
@@ -58,22 +50,17 @@ static int sha512_update(struct shash_desc *desc, const u8 *data,
BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
kernel_fpu_begin();
- sha512_base_do_update(desc, data, len, sha512_xform);
+ remain = sha512_base_do_update_blocks(desc, data, len, sha512_xform);
kernel_fpu_end();
- return 0;
+ return remain;
}
static int sha512_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out, sha512_block_fn *sha512_xform)
{
- if (!crypto_simd_usable())
- return crypto_sha512_finup(desc, data, len, out);
-
kernel_fpu_begin();
- if (len)
- sha512_base_do_update(desc, data, len, sha512_xform);
- sha512_base_do_finalize(desc, sha512_xform);
+ sha512_base_do_finup(desc, data, len, sha512_xform);
kernel_fpu_end();
return sha512_base_finish(desc, out);
@@ -91,23 +78,18 @@ static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
return sha512_finup(desc, data, len, out, sha512_transform_ssse3);
}
-/* Add padding and return the message digest. */
-static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
-{
- return sha512_ssse3_finup(desc, NULL, 0, out);
-}
-
static struct shash_alg sha512_ssse3_algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = sha512_ssse3_update,
- .final = sha512_ssse3_final,
.finup = sha512_ssse3_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-ssse3",
.cra_priority = 150,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -115,13 +97,14 @@ static struct shash_alg sha512_ssse3_algs[] = { {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = sha512_ssse3_update,
- .final = sha512_ssse3_final,
.finup = sha512_ssse3_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-ssse3",
.cra_priority = 150,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -167,23 +150,18 @@ static int sha512_avx_finup(struct shash_desc *desc, const u8 *data,
return sha512_finup(desc, data, len, out, sha512_transform_avx);
}
-/* Add padding and return the message digest. */
-static int sha512_avx_final(struct shash_desc *desc, u8 *out)
-{
- return sha512_avx_finup(desc, NULL, 0, out);
-}
-
static struct shash_alg sha512_avx_algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = sha512_avx_update,
- .final = sha512_avx_final,
.finup = sha512_avx_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-avx",
.cra_priority = 160,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -191,13 +169,14 @@ static struct shash_alg sha512_avx_algs[] = { {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = sha512_avx_update,
- .final = sha512_avx_final,
.finup = sha512_avx_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-avx",
.cra_priority = 160,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -233,23 +212,18 @@ static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data,
return sha512_finup(desc, data, len, out, sha512_transform_rorx);
}
-/* Add padding and return the message digest. */
-static int sha512_avx2_final(struct shash_desc *desc, u8 *out)
-{
- return sha512_avx2_finup(desc, NULL, 0, out);
-}
-
static struct shash_alg sha512_avx2_algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = sha512_avx2_update,
- .final = sha512_avx2_final,
.finup = sha512_avx2_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-avx2",
.cra_priority = 170,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -257,13 +231,14 @@ static struct shash_alg sha512_avx2_algs[] = { {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = sha512_avx2_update,
- .final = sha512_avx2_final,
.finup = sha512_avx2_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-avx2",
.cra_priority = 170,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/x86/crypto/sm3_avx_glue.c b/arch/x86/crypto/sm3_avx_glue.c
index 661b6f22ffcd..6e8c42b9dc8e 100644
--- a/arch/x86/crypto/sm3_avx_glue.c
+++ b/arch/x86/crypto/sm3_avx_glue.c
@@ -10,12 +10,11 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
-#include <asm/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
asmlinkage void sm3_transform_avx(struct sm3_state *state,
const u8 *data, int nblocks);
@@ -23,13 +22,7 @@ asmlinkage void sm3_transform_avx(struct sm3_state *state,
static int sm3_avx_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct sm3_state *sctx = shash_desc_ctx(desc);
-
- if (!crypto_simd_usable() ||
- (sctx->count % SM3_BLOCK_SIZE) + len < SM3_BLOCK_SIZE) {
- sm3_update(sctx, data, len);
- return 0;
- }
+ int remain;
/*
* Make sure struct sm3_state begins directly with the SM3
@@ -38,45 +31,17 @@ static int sm3_avx_update(struct shash_desc *desc, const u8 *data,
BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0);
kernel_fpu_begin();
- sm3_base_do_update(desc, data, len, sm3_transform_avx);
+ remain = sm3_base_do_update_blocks(desc, data, len, sm3_transform_avx);
kernel_fpu_end();
-
- return 0;
+ return remain;
}
static int sm3_avx_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!crypto_simd_usable()) {
- struct sm3_state *sctx = shash_desc_ctx(desc);
-
- if (len)
- sm3_update(sctx, data, len);
-
- sm3_final(sctx, out);
- return 0;
- }
-
kernel_fpu_begin();
- if (len)
- sm3_base_do_update(desc, data, len, sm3_transform_avx);
- sm3_base_do_finalize(desc, sm3_transform_avx);
+ sm3_base_do_finup(desc, data, len, sm3_transform_avx);
kernel_fpu_end();
-
- return sm3_base_finish(desc, out);
-}
-
-static int sm3_avx_final(struct shash_desc *desc, u8 *out)
-{
- if (!crypto_simd_usable()) {
- sm3_final(shash_desc_ctx(desc), out);
- return 0;
- }
-
- kernel_fpu_begin();
- sm3_base_do_finalize(desc, sm3_transform_avx);
- kernel_fpu_end();
-
return sm3_base_finish(desc, out);
}
@@ -84,13 +49,14 @@ static struct shash_alg sm3_avx_alg = {
.digestsize = SM3_DIGEST_SIZE,
.init = sm3_base_init,
.update = sm3_avx_update,
- .final = sm3_avx_final,
.finup = sm3_avx_finup,
- .descsize = sizeof(struct sm3_state),
+ .descsize = SM3_STATE_SIZE,
.base = {
.cra_name = "sm3",
.cra_driver_name = "sm3-avx",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SM3_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c
index 1148fd4cd57f..fec0ab7a63dd 100644
--- a/arch/x86/crypto/sm4_aesni_avx2_glue.c
+++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c
@@ -8,11 +8,10 @@
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
+#include <asm/fpu/api.h>
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
-#include <asm/simd.h>
-#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#include "sm4-avx.h"
@@ -48,10 +47,9 @@ static int ctr_crypt(struct skcipher_request *req)
static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
{
.base = {
- .cra_name = "__ecb(sm4)",
- .cra_driver_name = "__ecb-sm4-aesni-avx2",
+ .cra_name = "ecb(sm4)",
+ .cra_driver_name = "ecb-sm4-aesni-avx2",
.cra_priority = 500,
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
@@ -64,10 +62,9 @@ static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
.decrypt = sm4_avx_ecb_decrypt,
}, {
.base = {
- .cra_name = "__cbc(sm4)",
- .cra_driver_name = "__cbc-sm4-aesni-avx2",
+ .cra_name = "cbc(sm4)",
+ .cra_driver_name = "cbc-sm4-aesni-avx2",
.cra_priority = 500,
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
@@ -81,10 +78,9 @@ static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
.decrypt = cbc_decrypt,
}, {
.base = {
- .cra_name = "__ctr(sm4)",
- .cra_driver_name = "__ctr-sm4-aesni-avx2",
+ .cra_name = "ctr(sm4)",
+ .cra_driver_name = "ctr-sm4-aesni-avx2",
.cra_priority = 500,
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
@@ -100,9 +96,6 @@ static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
}
};
-static struct simd_skcipher_alg *
-simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)];
-
static int __init sm4_init(void)
{
const char *feature_name;
@@ -121,16 +114,14 @@ static int __init sm4_init(void)
return -ENODEV;
}
- return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers,
- ARRAY_SIZE(sm4_aesni_avx2_skciphers),
- simd_sm4_aesni_avx2_skciphers);
+ return crypto_register_skciphers(sm4_aesni_avx2_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx2_skciphers));
}
static void __exit sm4_exit(void)
{
- simd_unregister_skciphers(sm4_aesni_avx2_skciphers,
- ARRAY_SIZE(sm4_aesni_avx2_skciphers),
- simd_sm4_aesni_avx2_skciphers);
+ crypto_unregister_skciphers(sm4_aesni_avx2_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx2_skciphers));
}
module_init(sm4_init);
diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c
index 85b4ca78b47b..72867fc49ce8 100644
--- a/arch/x86/crypto/sm4_aesni_avx_glue.c
+++ b/arch/x86/crypto/sm4_aesni_avx_glue.c
@@ -8,11 +8,10 @@
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
+#include <asm/fpu/api.h>
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
-#include <asm/simd.h>
-#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#include "sm4-avx.h"
@@ -263,10 +262,9 @@ static int ctr_crypt(struct skcipher_request *req)
static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
{
.base = {
- .cra_name = "__ecb(sm4)",
- .cra_driver_name = "__ecb-sm4-aesni-avx",
+ .cra_name = "ecb(sm4)",
+ .cra_driver_name = "ecb-sm4-aesni-avx",
.cra_priority = 400,
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
@@ -279,10 +277,9 @@ static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
.decrypt = sm4_avx_ecb_decrypt,
}, {
.base = {
- .cra_name = "__cbc(sm4)",
- .cra_driver_name = "__cbc-sm4-aesni-avx",
+ .cra_name = "cbc(sm4)",
+ .cra_driver_name = "cbc-sm4-aesni-avx",
.cra_priority = 400,
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
@@ -296,10 +293,9 @@ static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
.decrypt = cbc_decrypt,
}, {
.base = {
- .cra_name = "__ctr(sm4)",
- .cra_driver_name = "__ctr-sm4-aesni-avx",
+ .cra_name = "ctr(sm4)",
+ .cra_driver_name = "ctr-sm4-aesni-avx",
.cra_priority = 400,
- .cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
@@ -315,9 +311,6 @@ static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
}
};
-static struct simd_skcipher_alg *
-simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
-
static int __init sm4_init(void)
{
const char *feature_name;
@@ -335,16 +328,14 @@ static int __init sm4_init(void)
return -ENODEV;
}
- return simd_register_skciphers_compat(sm4_aesni_avx_skciphers,
- ARRAY_SIZE(sm4_aesni_avx_skciphers),
- simd_sm4_aesni_avx_skciphers);
+ return crypto_register_skciphers(sm4_aesni_avx_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx_skciphers));
}
static void __exit sm4_exit(void)
{
- simd_unregister_skciphers(sm4_aesni_avx_skciphers,
- ARRAY_SIZE(sm4_aesni_avx_skciphers),
- simd_sm4_aesni_avx_skciphers);
+ crypto_unregister_skciphers(sm4_aesni_avx_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx_skciphers));
}
module_init(sm4_init);
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 3eb3440b477a..9e20db013750 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -13,7 +13,6 @@
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
-#include <crypto/internal/simd.h>
#include <crypto/twofish.h>
#include "twofish.h"
@@ -74,10 +73,9 @@ static int cbc_decrypt(struct skcipher_request *req)
static struct skcipher_alg twofish_algs[] = {
{
- .base.cra_name = "__ecb(twofish)",
- .base.cra_driver_name = "__ecb-twofish-avx",
+ .base.cra_name = "ecb(twofish)",
+ .base.cra_driver_name = "ecb-twofish-avx",
.base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = TF_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct twofish_ctx),
.base.cra_module = THIS_MODULE,
@@ -87,10 +85,9 @@ static struct skcipher_alg twofish_algs[] = {
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
- .base.cra_name = "__cbc(twofish)",
- .base.cra_driver_name = "__cbc-twofish-avx",
+ .base.cra_name = "cbc(twofish)",
+ .base.cra_driver_name = "cbc-twofish-avx",
.base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = TF_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct twofish_ctx),
.base.cra_module = THIS_MODULE,
@@ -103,8 +100,6 @@ static struct skcipher_alg twofish_algs[] = {
},
};
-static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)];
-
static int __init twofish_init(void)
{
const char *feature_name;
@@ -114,15 +109,13 @@ static int __init twofish_init(void)
return -ENODEV;
}
- return simd_register_skciphers_compat(twofish_algs,
- ARRAY_SIZE(twofish_algs),
- twofish_simd_algs);
+ return crypto_register_skciphers(twofish_algs,
+ ARRAY_SIZE(twofish_algs));
}
static void __exit twofish_exit(void)
{
- simd_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs),
- twofish_simd_algs);
+ crypto_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs));
}
module_init(twofish_init);
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f40bdf97d390..ed04a968cc7d 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1525,7 +1525,9 @@ SYM_CODE_END(rewind_stack_and_make_dead)
* ORC to unwind properly.
*
* The alignment is for performance and not for safety, and may be safely
- * refactored in the future if needed.
+ * refactored in the future if needed. The .skips are for safety, to ensure
+ * that all RETs are in the second half of a cacheline to mitigate Indirect
+ * Target Selection, rather than taking the slowpath via its_return_thunk.
*/
SYM_FUNC_START(clear_bhb_loop)
ANNOTATE_NOENDBR
@@ -1536,10 +1538,22 @@ SYM_FUNC_START(clear_bhb_loop)
call 1f
jmp 5f
.align 64, 0xcc
+ /*
+ * Shift instructions so that the RET is in the upper half of the
+ * cacheline and don't take the slowpath to its_return_thunk.
+ */
+ .skip 32 - (.Lret1 - 1f), 0xcc
ANNOTATE_INTRA_FUNCTION_CALL
1: call 2f
- RET
+.Lret1: RET
.align 64, 0xcc
+ /*
+ * As above shift instructions for RET at .Lret2 as well.
+ *
+ * This should be ideally be: .skip 32 - (.Lret2 - 2f), 0xcc
+ * but some Clang versions (e.g. 18) don't like this.
+ */
+ .skip 32 - 18, 0xcc
2: movl $5, %eax
3: jmp 4f
nop
@@ -1547,7 +1561,7 @@ SYM_FUNC_START(clear_bhb_loop)
jnz 3b
sub $1, %ecx
jnz 1b
- RET
+.Lret2: RET
5: lfence
pop %rbp
RET
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index adb299d3b6a1..afe105b2f907 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -65,7 +65,6 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
static void vdso_fix_landing(const struct vdso_image *image,
struct vm_area_struct *new_vma)
{
-#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
if (in_ia32_syscall() && image == &vdso_image_32) {
struct pt_regs *regs = current_pt_regs();
unsigned long vdso_land = image->sym_int80_landing_pad;
@@ -76,7 +75,6 @@ static void vdso_fix_landing(const struct vdso_image *image,
if (regs->ip == old_land_addr)
regs->ip = new_vma->vm_start + vdso_land;
}
-#endif
}
static int vdso_mremap(const struct vm_special_mapping *sm,
@@ -227,7 +225,6 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr)
return map_vdso(image, addr);
}
-#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static int load_vdso32(void)
{
if (vdso32_enabled != 1) /* Other values all mean "disabled" */
@@ -235,45 +232,38 @@ static int load_vdso32(void)
return map_vdso(&vdso_image_32, 0);
}
-#endif
-#ifdef CONFIG_X86_64
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
- if (!vdso64_enabled)
- return 0;
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ if (!vdso64_enabled)
+ return 0;
+
+ return map_vdso(&vdso_image_64, 0);
+ }
- return map_vdso(&vdso_image_64, 0);
+ return load_vdso32();
}
#ifdef CONFIG_COMPAT
int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp, bool x32)
{
-#ifdef CONFIG_X86_X32_ABI
- if (x32) {
+ if (IS_ENABLED(CONFIG_X86_X32_ABI) && x32) {
if (!vdso64_enabled)
return 0;
return map_vdso(&vdso_image_x32, 0);
}
-#endif
-#ifdef CONFIG_IA32_EMULATION
- return load_vdso32();
-#else
+
+ if (IS_ENABLED(CONFIG_IA32_EMULATION))
+ return load_vdso32();
+
return 0;
-#endif
-}
-#endif
-#else
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
-{
- return load_vdso32();
}
#endif
bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
{
-#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
const struct vdso_image *image = current->mm->context.vdso_image;
unsigned long vdso = (unsigned long) current->mm->context.vdso;
@@ -282,7 +272,6 @@ bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
regs->ip == vdso + image->sym_vdso32_rt_sigreturn_landing_pad)
return true;
}
-#endif
return false;
}
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 2fb7d53cf333..c9103a6fa06e 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -341,9 +341,7 @@ void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
-#if CONFIG_PGTABLE_LEVELS >= 5
set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER));
-#endif
pud = pud_offset(p4d, VSYSCALL_ADDR);
set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
pmd = pmd_offset(pud, VSYSCALL_ADDR);
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
index ec3427463382..06f35a6b58a5 100644
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -44,12 +44,12 @@ static inline unsigned int brs_to(int idx)
static __always_inline void set_debug_extn_cfg(u64 val)
{
/* bits[4:3] must always be set to 11b */
- __wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32);
+ native_wrmsrq(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3);
}
static __always_inline u64 get_debug_extn_cfg(void)
{
- return __rdmsr(MSR_AMD_DBG_EXTN_CFG);
+ return native_rdmsrq(MSR_AMD_DBG_EXTN_CFG);
}
static bool __init amd_brs_detect(void)
@@ -187,7 +187,7 @@ void amd_brs_reset(void)
/*
* Mark first entry as poisoned
*/
- wrmsrl(brs_to(0), BRS_POISON);
+ wrmsrq(brs_to(0), BRS_POISON);
}
int __init amd_brs_init(void)
@@ -325,7 +325,7 @@ void amd_brs_drain(void)
u32 brs_idx = tos - i;
u64 from, to;
- rdmsrl(brs_to(brs_idx), to);
+ rdmsrq(brs_to(brs_idx), to);
/* Entry does not belong to us (as marked by kernel) */
if (to == BRS_POISON)
@@ -341,7 +341,7 @@ void amd_brs_drain(void)
if (!amd_brs_match_plm(event, to))
continue;
- rdmsrl(brs_from(brs_idx), from);
+ rdmsrq(brs_from(brs_idx), from);
perf_clear_branch_entry_bitfields(br+nr);
@@ -371,7 +371,7 @@ static void amd_brs_poison_buffer(void)
idx = amd_brs_get_tos(&cfg);
/* Poison target of entry */
- wrmsrl(brs_to(idx), BRS_POISON);
+ wrmsrq(brs_to(idx), BRS_POISON);
}
/*
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 30d6ceb4c8ad..b20661b8621d 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -9,6 +9,7 @@
#include <linux/jiffies.h>
#include <asm/apicdef.h>
#include <asm/apic.h>
+#include <asm/msr.h>
#include <asm/nmi.h>
#include "../perf_event.h"
@@ -563,13 +564,13 @@ static void amd_pmu_cpu_reset(int cpu)
return;
/* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
+ wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
/*
* Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze
* and PerfCntrGLobalStatus.PerfCntrOvfl
*/
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
+ wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask);
}
@@ -651,7 +652,7 @@ static void amd_pmu_cpu_dead(int cpu)
static __always_inline void amd_pmu_set_global_ctl(u64 ctl)
{
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
+ wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
}
static inline u64 amd_pmu_get_global_status(void)
@@ -659,7 +660,7 @@ static inline u64 amd_pmu_get_global_status(void)
u64 status;
/* PerfCntrGlobalStatus is read-only */
- rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
+ rdmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
return status;
}
@@ -672,14 +673,14 @@ static inline void amd_pmu_ack_global_status(u64 status)
* clears the same bit in PerfCntrGlobalStatus
*/
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
+ wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
}
static bool amd_pmu_test_overflow_topbit(int idx)
{
u64 counter;
- rdmsrl(x86_pmu_event_addr(idx), counter);
+ rdmsrq(x86_pmu_event_addr(idx), counter);
return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
}
@@ -1003,8 +1004,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
/*
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 0252b7ea8bca..112f43b23ebf 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -15,6 +15,7 @@
#include <linux/sched/clock.h>
#include <asm/apic.h>
+#include <asm/msr.h>
#include "../perf_event.h"
@@ -26,7 +27,7 @@ static u32 ibs_caps;
#include <linux/hardirq.h>
#include <asm/nmi.h>
-#include <asm/amd-ibs.h>
+#include <asm/amd/ibs.h>
/* attr.config2 */
#define IBS_SW_FILTER_MASK 1
@@ -424,7 +425,7 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
* prev count manually on overflow.
*/
while (!perf_event_try_update(event, count, 64)) {
- rdmsrl(event->hw.config_base, *config);
+ rdmsrq(event->hw.config_base, *config);
count = perf_ibs->get_count(*config);
}
}
@@ -435,9 +436,9 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
u64 tmp = hwc->config | config;
if (perf_ibs->fetch_count_reset_broken)
- wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
+ wrmsrq(hwc->config_base, tmp & ~perf_ibs->enable_mask);
- wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
+ wrmsrq(hwc->config_base, tmp | perf_ibs->enable_mask);
}
/*
@@ -452,9 +453,9 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
{
config &= ~perf_ibs->cnt_mask;
if (boot_cpu_data.x86 == 0x10)
- wrmsrl(hwc->config_base, config);
+ wrmsrq(hwc->config_base, config);
config &= ~perf_ibs->enable_mask;
- wrmsrl(hwc->config_base, config);
+ wrmsrq(hwc->config_base, config);
}
/*
@@ -513,7 +514,7 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
if (!stopping && (hwc->state & PERF_HES_UPTODATE))
return;
- rdmsrl(hwc->config_base, config);
+ rdmsrq(hwc->config_base, config);
if (stopping) {
/*
@@ -1256,7 +1257,7 @@ fail:
hwc = &event->hw;
msr = hwc->config_base;
buf = ibs_data.regs;
- rdmsrl(msr, *buf);
+ rdmsrq(msr, *buf);
if (!(*buf++ & perf_ibs->valid_mask))
goto fail;
@@ -1274,7 +1275,7 @@ fail:
offset_max = perf_ibs_get_offset_max(perf_ibs, event, check_rip);
do {
- rdmsrl(msr + offset, *buf++);
+ rdmsrq(msr + offset, *buf++);
size++;
offset = find_next_bit(perf_ibs->offset_mask,
perf_ibs->offset_max,
@@ -1304,17 +1305,17 @@ fail:
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
if (perf_ibs == &perf_ibs_op) {
if (ibs_caps & IBS_CAPS_BRNTRGT) {
- rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
+ rdmsrq(MSR_AMD64_IBSBRTARGET, *buf++);
br_target_idx = size;
size++;
}
if (ibs_caps & IBS_CAPS_OPDATA4) {
- rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
+ rdmsrq(MSR_AMD64_IBSOPDATA4, *buf++);
size++;
}
}
if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) {
- rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++);
+ rdmsrq(MSR_AMD64_ICIBSEXTDCTL, *buf++);
size++;
}
}
@@ -1373,9 +1374,7 @@ fail:
hwc->sample_period = perf_ibs->min_period;
out:
- if (throttle) {
- perf_ibs_stop(event, 0);
- } else {
+ if (!throttle) {
if (perf_ibs == &perf_ibs_op) {
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
@@ -1565,7 +1564,7 @@ static inline int ibs_eilvt_valid(void)
preempt_disable();
- rdmsrl(MSR_AMD64_IBSCTL, val);
+ rdmsrq(MSR_AMD64_IBSCTL, val);
offset = val & IBSCTL_LVT_OFFSET_MASK;
if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
@@ -1680,7 +1679,7 @@ static inline int get_ibs_lvt_offset(void)
{
u64 val;
- rdmsrl(MSR_AMD64_IBSCTL, val);
+ rdmsrq(MSR_AMD64_IBSCTL, val);
if (!(val & IBSCTL_LVT_OFFSET_VALID))
return -EINVAL;
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index f8228d8243f7..a721da9987dd 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -16,6 +16,8 @@
#include <linux/slab.h>
#include <linux/amd-iommu.h>
+#include <asm/msr.h>
+
#include "../perf_event.h"
#include "iommu.h"
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index c06ccca96851..d24da377df77 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/perf_event.h>
+#include <asm/msr.h>
#include <asm/perf_event.h>
#include "../perf_event.h"
@@ -61,19 +62,19 @@ struct branch_entry {
static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
{
- wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
+ wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
}
static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
{
- wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
+ wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
}
static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
{
u64 val;
- rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
+ rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
return val;
}
@@ -82,7 +83,7 @@ static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
{
u64 val;
- rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
+ rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
return val;
}
@@ -333,7 +334,7 @@ void amd_pmu_lbr_reset(void)
cpuc->last_task_ctx = NULL;
cpuc->last_log_id = 0;
- wrmsrl(MSR_AMD64_LBR_SELECT, 0);
+ wrmsrq(MSR_AMD64_LBR_SELECT, 0);
}
void amd_pmu_lbr_add(struct perf_event *event)
@@ -396,16 +397,16 @@ void amd_pmu_lbr_enable_all(void)
/* Set hardware branch filter */
if (cpuc->lbr_select) {
lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
- wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
+ wrmsrq(MSR_AMD64_LBR_SELECT, lbr_select);
}
if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
}
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
+ rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ wrmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
void amd_pmu_lbr_disable_all(void)
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index 37d5b380516e..dad42790cf7d 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include "../perf_event.h"
/* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */
@@ -48,8 +49,8 @@ static void event_update(struct perf_event *event)
prev_pwr_acc = hwc->pwr_acc;
prev_ptsc = hwc->ptsc;
- rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc);
- rdmsrl(MSR_F15H_PTSC, new_ptsc);
+ rdmsrq(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc);
+ rdmsrq(MSR_F15H_PTSC, new_ptsc);
/*
* Calculate the CU power consumption over a time period, the unit of
@@ -75,8 +76,8 @@ static void __pmu_event_start(struct perf_event *event)
event->hw.state = 0;
- rdmsrl(MSR_F15H_PTSC, event->hw.ptsc);
- rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc);
+ rdmsrq(MSR_F15H_PTSC, event->hw.ptsc);
+ rdmsrq(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc);
}
static void pmu_event_start(struct perf_event *event, int mode)
@@ -272,7 +273,7 @@ static int __init amd_power_pmu_init(void)
cpu_pwr_sample_ratio = cpuid_ecx(0x80000007);
- if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) {
+ if (rdmsrq_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) {
pr_err("Failed to read max compute unit power accumulator MSR\n");
return -ENODEV;
}
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 49c26ce2b115..e8b6af199c73 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -21,6 +21,7 @@
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
#define NUM_COUNTERS_L3 6
+#define NUM_COUNTERS_MAX 64
#define RDPMC_BASE_NB 6
#define RDPMC_BASE_LLC 10
@@ -38,7 +39,10 @@ struct amd_uncore_ctx {
int refcnt;
int cpu;
struct perf_event **events;
- struct hlist_node node;
+ unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
+ int nr_active;
+ struct hrtimer hrtimer;
+ u64 hrtimer_duration;
};
struct amd_uncore_pmu {
@@ -83,11 +87,51 @@ struct amd_uncore {
static struct amd_uncore uncores[UNCORE_TYPE_MAX];
+/* Interval for hrtimer, defaults to 60000 milliseconds */
+static unsigned int update_interval = 60 * MSEC_PER_SEC;
+module_param(update_interval, uint, 0444);
+
static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct amd_uncore_pmu, pmu);
}
+static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
+{
+ struct amd_uncore_ctx *ctx;
+ struct perf_event *event;
+ int bit;
+
+ ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
+
+ if (!ctx->nr_active || ctx->cpu != smp_processor_id())
+ return HRTIMER_NORESTART;
+
+ for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
+ event = ctx->events[bit];
+ event->pmu->read(event);
+ }
+
+ hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
+ return HRTIMER_RESTART;
+}
+
+static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
+{
+ hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
+ HRTIMER_MODE_REL_PINNED_HARD);
+}
+
+static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
+{
+ hrtimer_cancel(&ctx->hrtimer);
+}
+
+static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
+{
+ hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
+}
+
static void amd_uncore_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -106,9 +150,9 @@ static void amd_uncore_read(struct perf_event *event)
* read counts directly from the corresponding PERF_CTR.
*/
if (hwc->event_base_rdpmc < 0)
- rdmsrl(hwc->event_base, new);
+ rdmsrq(hwc->event_base, new);
else
- rdpmcl(hwc->event_base_rdpmc, new);
+ new = rdpmc(hwc->event_base_rdpmc);
local64_set(&hwc->prev_count, new);
delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
@@ -118,27 +162,40 @@ static void amd_uncore_read(struct perf_event *event)
static void amd_uncore_start(struct perf_event *event, int flags)
{
+ struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+ struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
struct hw_perf_event *hwc = &event->hw;
+ if (!ctx->nr_active++)
+ amd_uncore_start_hrtimer(ctx);
+
if (flags & PERF_EF_RELOAD)
- wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
+ wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count));
hwc->state = 0;
- wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
+ __set_bit(hwc->idx, ctx->active_mask);
+ wrmsrq(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
perf_event_update_userpage(event);
}
static void amd_uncore_stop(struct perf_event *event, int flags)
{
+ struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+ struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
struct hw_perf_event *hwc = &event->hw;
- wrmsrl(hwc->config_base, hwc->config);
+ wrmsrq(hwc->config_base, hwc->config);
hwc->state |= PERF_HES_STOPPED;
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
event->pmu->read(event);
hwc->state |= PERF_HES_UPTODATE;
}
+
+ if (!--ctx->nr_active)
+ amd_uncore_cancel_hrtimer(ctx);
+
+ __clear_bit(hwc->idx, ctx->active_mask);
}
static int amd_uncore_add(struct perf_event *event, int flags)
@@ -491,6 +548,9 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
goto fail;
}
+ amd_uncore_init_hrtimer(curr);
+ curr->hrtimer_duration = (u64)update_interval * NSEC_PER_MSEC;
+
cpumask_set_cpu(cpu, &pmu->active_mask);
}
@@ -880,16 +940,55 @@ static int amd_uncore_umc_event_init(struct perf_event *event)
static void amd_uncore_umc_start(struct perf_event *event, int flags)
{
+ struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+ struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
struct hw_perf_event *hwc = &event->hw;
+ if (!ctx->nr_active++)
+ amd_uncore_start_hrtimer(ctx);
+
if (flags & PERF_EF_RELOAD)
- wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
+ wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count));
hwc->state = 0;
- wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
+ __set_bit(hwc->idx, ctx->active_mask);
+ wrmsrq(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
perf_event_update_userpage(event);
}
+static void amd_uncore_umc_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev, new, shift;
+ s64 delta;
+
+ shift = COUNTER_SHIFT + 1;
+ prev = local64_read(&hwc->prev_count);
+
+ /*
+ * UMC counters do not have RDPMC assignments. Read counts directly
+ * from the corresponding PERF_CTR.
+ */
+ rdmsrl(hwc->event_base, new);
+
+ /*
+ * Unlike the other uncore counters, UMC counters saturate and set the
+ * Overflow bit (bit 48) on overflow. Since they do not roll over,
+ * proactively reset the corresponding PERF_CTR when bit 47 is set so
+ * that the counter never gets a chance to saturate.
+ */
+ if (new & BIT_ULL(63 - COUNTER_SHIFT)) {
+ wrmsrl(hwc->event_base, 0);
+ local64_set(&hwc->prev_count, 0);
+ } else {
+ local64_set(&hwc->prev_count, new);
+ }
+
+ delta = (new << shift) - (prev << shift);
+ delta >>= shift;
+ local64_add(delta, &event->count);
+}
+
static
void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
{
@@ -968,7 +1067,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
.del = amd_uncore_del,
.start = amd_uncore_umc_start,
.stop = amd_uncore_stop,
- .read = amd_uncore_read,
+ .read = amd_uncore_umc_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
.module = THIS_MODULE,
};
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 139ad80d1df3..7610f26dfbd9 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -32,6 +32,7 @@
#include <asm/apic.h>
#include <asm/stacktrace.h>
+#include <asm/msr.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/alternative.h>
@@ -95,6 +96,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all);
+
/*
* This one is magic, it will get called even when PMU init fails (because
* there is no PMU), in which case it should simply return NULL.
@@ -134,7 +140,7 @@ u64 x86_perf_event_update(struct perf_event *event)
*/
prev_raw_count = local64_read(&hwc->prev_count);
do {
- rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+ new_raw_count = rdpmc(hwc->event_base_rdpmc);
} while (!local64_try_cmpxchg(&hwc->prev_count,
&prev_raw_count, new_raw_count));
@@ -269,7 +275,7 @@ bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
*/
for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) {
reg = x86_pmu_config_addr(i);
- ret = rdmsrl_safe(reg, &val);
+ ret = rdmsrq_safe(reg, &val);
if (ret)
goto msr_fail;
if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
@@ -283,7 +289,7 @@ bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
if (*(u64 *)fixed_cntr_mask) {
reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- ret = rdmsrl_safe(reg, &val);
+ ret = rdmsrq_safe(reg, &val);
if (ret)
goto msr_fail;
for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) {
@@ -314,11 +320,11 @@ bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
reg = x86_pmu_event_addr(reg_safe);
- if (rdmsrl_safe(reg, &val))
+ if (rdmsrq_safe(reg, &val))
goto msr_fail;
val ^= 0xffffUL;
- ret = wrmsrl_safe(reg, val);
- ret |= rdmsrl_safe(reg, &val_new);
+ ret = wrmsrq_safe(reg, val);
+ ret |= rdmsrq_safe(reg, &val_new);
if (ret || val != val_new)
goto msr_fail;
@@ -674,6 +680,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
event->hw.idx = -1;
event->hw.last_cpu = -1;
event->hw.last_tag = ~0ULL;
+ event->hw.dyn_constraint = ~0ULL;
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
@@ -693,13 +700,13 @@ void x86_pmu_disable_all(void)
if (!test_bit(idx, cpuc->active_mask))
continue;
- rdmsrl(x86_pmu_config_addr(idx), val);
+ rdmsrq(x86_pmu_config_addr(idx), val);
if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
continue;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu_config_addr(idx), val);
+ wrmsrq(x86_pmu_config_addr(idx), val);
if (is_counter_pair(hwc))
- wrmsrl(x86_pmu_config_addr(idx + 1), 0);
+ wrmsrq(x86_pmu_config_addr(idx + 1), 0);
}
}
@@ -756,15 +763,16 @@ void x86_pmu_enable_all(int added)
int is_x86_event(struct perf_event *event)
{
- int i;
-
- if (!is_hybrid())
- return event->pmu == &pmu;
-
- for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
- if (event->pmu == &x86_pmu.hybrid_pmu[i].pmu)
- return true;
- }
+ /*
+ * For a non-hybrid platforms, the type of X86 pmu is
+ * always PERF_TYPE_RAW.
+ * For a hybrid platform, the PERF_PMU_CAP_EXTENDED_HW_TYPE
+ * is a unique capability for the X86 PMU.
+ * Use them to detect a X86 event.
+ */
+ if (event->pmu->type == PERF_TYPE_RAW ||
+ event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE)
+ return true;
return false;
}
@@ -1420,14 +1428,14 @@ int x86_perf_event_set_period(struct perf_event *event)
*/
local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+ wrmsrq(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
* Sign extend the Merge event counter's upper 16 bits since
* we currently declare a 48-bit counter width
*/
if (is_counter_pair(hwc))
- wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
+ wrmsrq(x86_pmu_event_addr(idx + 1), 0xffff);
perf_event_update_userpage(event);
@@ -1550,10 +1558,10 @@ void perf_event_print_debug(void)
return;
if (x86_pmu.version >= 2) {
- rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
- rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
- rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+ rdmsrq(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+ rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status);
+ rdmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+ rdmsrq(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
pr_info("\n");
pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
@@ -1561,19 +1569,19 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
if (pebs_constraints) {
- rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
+ rdmsrq(MSR_IA32_PEBS_ENABLE, pebs);
pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
}
if (x86_pmu.lbr_nr) {
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl);
}
}
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) {
- rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
- rdmsrl(x86_pmu_event_addr(idx), pmc_count);
+ rdmsrq(x86_pmu_config_addr(idx), pmc_ctrl);
+ rdmsrq(x86_pmu_event_addr(idx), pmc_count);
prev_left = per_cpu(pmc_prev_left[idx], cpu);
@@ -1587,7 +1595,7 @@ void perf_event_print_debug(void)
for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
if (fixed_counter_disabled(idx, cpuc->pmu))
continue;
- rdmsrl(x86_pmu_fixed_ctr_addr(idx), pmc_count);
+ rdmsrq(x86_pmu_fixed_ctr_addr(idx), pmc_count);
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
cpu, idx, pmc_count);
@@ -1683,6 +1691,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
struct cpu_hw_events *cpuc;
struct perf_event *event;
int idx, handled = 0;
+ u64 last_period;
u64 val;
cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1702,6 +1711,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
continue;
event = cpuc->events[idx];
+ last_period = event->hw.last_period;
val = static_call(x86_pmu_update)(event);
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1715,12 +1725,11 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
if (!static_call(x86_pmu_set_period)(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
if (handled)
@@ -2046,6 +2055,11 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_filter, x86_pmu.filter);
static_call_update(x86_pmu_late_setup, x86_pmu.late_setup);
+
+ static_call_update(x86_pmu_pebs_enable, x86_pmu.pebs_enable);
+ static_call_update(x86_pmu_pebs_disable, x86_pmu.pebs_disable);
+ static_call_update(x86_pmu_pebs_enable_all, x86_pmu.pebs_enable_all);
+ static_call_update(x86_pmu_pebs_disable_all, x86_pmu.pebs_disable_all);
}
static void _x86_pmu_read(struct perf_event *event)
@@ -2496,9 +2510,9 @@ void perf_clear_dirty_counters(void)
if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mask)))
continue;
- wrmsrl(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0);
+ wrmsrq(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0);
} else {
- wrmsrl(x86_pmu_event_addr(i), 0);
+ wrmsrq(x86_pmu_event_addr(i), 0);
}
}
@@ -2803,8 +2817,15 @@ static unsigned long get_segment_base(unsigned int segment)
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
+ /*
+ * If we're not in a valid context with a real (not just lazy)
+ * user mm, then don't even try.
+ */
+ if (!nmi_uaccess_okay())
+ return 0;
+
/* IRQs are off, so this synchronizes with smp_store_release */
- ldt = READ_ONCE(current->active_mm->context.ldt);
+ ldt = smp_load_acquire(&current->mm->context.ldt);
if (!ldt || idx >= ldt->nr_entries)
return 0;
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index a95e6c91c4d7..61da6b8a3d51 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -17,6 +17,7 @@
#include <linux/sizes.h>
#include <asm/perf_event.h>
+#include <asm/msr.h>
#include "../perf_event.h"
@@ -80,54 +81,54 @@ static void *
bts_buffer_setup_aux(struct perf_event *event, void **pages,
int nr_pages, bool overwrite)
{
- struct bts_buffer *buf;
+ struct bts_buffer *bb;
struct page *page;
int cpu = event->cpu;
int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
unsigned long offset;
size_t size = nr_pages << PAGE_SHIFT;
- int pg, nbuf, pad;
+ int pg, nr_buf, pad;
/* count all the high order buffers */
- for (pg = 0, nbuf = 0; pg < nr_pages;) {
+ for (pg = 0, nr_buf = 0; pg < nr_pages;) {
page = virt_to_page(pages[pg]);
pg += buf_nr_pages(page);
- nbuf++;
+ nr_buf++;
}
/*
* to avoid interrupts in overwrite mode, only allow one physical
*/
- if (overwrite && nbuf > 1)
+ if (overwrite && nr_buf > 1)
return NULL;
- buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
- if (!buf)
+ bb = kzalloc_node(struct_size(bb, buf, nr_buf), GFP_KERNEL, node);
+ if (!bb)
return NULL;
- buf->nr_pages = nr_pages;
- buf->nr_bufs = nbuf;
- buf->snapshot = overwrite;
- buf->data_pages = pages;
- buf->real_size = size - size % BTS_RECORD_SIZE;
+ bb->nr_pages = nr_pages;
+ bb->nr_bufs = nr_buf;
+ bb->snapshot = overwrite;
+ bb->data_pages = pages;
+ bb->real_size = size - size % BTS_RECORD_SIZE;
- for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
+ for (pg = 0, nr_buf = 0, offset = 0, pad = 0; nr_buf < bb->nr_bufs; nr_buf++) {
unsigned int __nr_pages;
page = virt_to_page(pages[pg]);
__nr_pages = buf_nr_pages(page);
- buf->buf[nbuf].page = page;
- buf->buf[nbuf].offset = offset;
- buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
- buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
- pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
- buf->buf[nbuf].size -= pad;
+ bb->buf[nr_buf].page = page;
+ bb->buf[nr_buf].offset = offset;
+ bb->buf[nr_buf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+ bb->buf[nr_buf].size = buf_size(page) - bb->buf[nr_buf].displacement;
+ pad = bb->buf[nr_buf].size % BTS_RECORD_SIZE;
+ bb->buf[nr_buf].size -= pad;
pg += __nr_pages;
offset += __nr_pages << PAGE_SHIFT;
}
- return buf;
+ return bb;
}
static void bts_buffer_free_aux(void *data)
@@ -135,25 +136,25 @@ static void bts_buffer_free_aux(void *data)
kfree(data);
}
-static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
+static unsigned long bts_buffer_offset(struct bts_buffer *bb, unsigned int idx)
{
- return buf->buf[idx].offset + buf->buf[idx].displacement;
+ return bb->buf[idx].offset + bb->buf[idx].displacement;
}
static void
-bts_config_buffer(struct bts_buffer *buf)
+bts_config_buffer(struct bts_buffer *bb)
{
int cpu = raw_smp_processor_id();
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- struct bts_phys *phys = &buf->buf[buf->cur_buf];
+ struct bts_phys *phys = &bb->buf[bb->cur_buf];
unsigned long index, thresh = 0, end = phys->size;
struct page *page = phys->page;
- index = local_read(&buf->head);
+ index = local_read(&bb->head);
- if (!buf->snapshot) {
- if (buf->end < phys->offset + buf_size(page))
- end = buf->end - phys->offset - phys->displacement;
+ if (!bb->snapshot) {
+ if (bb->end < phys->offset + buf_size(page))
+ end = bb->end - phys->offset - phys->displacement;
index -= phys->offset + phys->displacement;
@@ -168,7 +169,7 @@ bts_config_buffer(struct bts_buffer *buf)
ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
ds->bts_index = ds->bts_buffer_base + index;
ds->bts_absolute_maximum = ds->bts_buffer_base + end;
- ds->bts_interrupt_threshold = !buf->snapshot
+ ds->bts_interrupt_threshold = !bb->snapshot
? ds->bts_buffer_base + thresh
: ds->bts_absolute_maximum + BTS_RECORD_SIZE;
}
@@ -184,16 +185,16 @@ static void bts_update(struct bts_ctx *bts)
{
int cpu = raw_smp_processor_id();
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- struct bts_buffer *buf = perf_get_aux(&bts->handle);
+ struct bts_buffer *bb = perf_get_aux(&bts->handle);
unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
- if (!buf)
+ if (!bb)
return;
- head = index + bts_buffer_offset(buf, buf->cur_buf);
- old = local_xchg(&buf->head, head);
+ head = index + bts_buffer_offset(bb, bb->cur_buf);
+ old = local_xchg(&bb->head, head);
- if (!buf->snapshot) {
+ if (!bb->snapshot) {
if (old == head)
return;
@@ -205,9 +206,9 @@ static void bts_update(struct bts_ctx *bts)
* old and head are always in the same physical buffer, so we
* can subtract them to get the data size.
*/
- local_add(head - old, &buf->data_size);
+ local_add(head - old, &bb->data_size);
} else {
- local_set(&buf->data_size, head);
+ local_set(&bb->data_size, head);
}
/*
@@ -218,7 +219,7 @@ static void bts_update(struct bts_ctx *bts)
}
static int
-bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
+bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle);
/*
* Ordering PMU callbacks wrt themselves and the PMI is done by means
@@ -232,17 +233,17 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
static void __bts_event_start(struct perf_event *event)
{
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
- struct bts_buffer *buf = perf_get_aux(&bts->handle);
+ struct bts_buffer *bb = perf_get_aux(&bts->handle);
u64 config = 0;
- if (!buf->snapshot)
+ if (!bb->snapshot)
config |= ARCH_PERFMON_EVENTSEL_INT;
if (!event->attr.exclude_kernel)
config |= ARCH_PERFMON_EVENTSEL_OS;
if (!event->attr.exclude_user)
config |= ARCH_PERFMON_EVENTSEL_USR;
- bts_config_buffer(buf);
+ bts_config_buffer(bb);
/*
* local barrier to make sure that ds configuration made it
@@ -261,13 +262,13 @@ static void bts_event_start(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
- struct bts_buffer *buf;
+ struct bts_buffer *bb;
- buf = perf_aux_output_begin(&bts->handle, event);
- if (!buf)
+ bb = perf_aux_output_begin(&bts->handle, event);
+ if (!bb)
goto fail_stop;
- if (bts_buffer_reset(buf, &bts->handle))
+ if (bts_buffer_reset(bb, &bts->handle))
goto fail_end_stop;
bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
@@ -306,27 +307,27 @@ static void bts_event_stop(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
- struct bts_buffer *buf = NULL;
+ struct bts_buffer *bb = NULL;
int state = READ_ONCE(bts->state);
if (state == BTS_STATE_ACTIVE)
__bts_event_stop(event, BTS_STATE_STOPPED);
if (state != BTS_STATE_STOPPED)
- buf = perf_get_aux(&bts->handle);
+ bb = perf_get_aux(&bts->handle);
event->hw.state |= PERF_HES_STOPPED;
if (flags & PERF_EF_UPDATE) {
bts_update(bts);
- if (buf) {
- if (buf->snapshot)
+ if (bb) {
+ if (bb->snapshot)
bts->handle.head =
- local_xchg(&buf->data_size,
- buf->nr_pages << PAGE_SHIFT);
+ local_xchg(&bb->data_size,
+ bb->nr_pages << PAGE_SHIFT);
perf_aux_output_end(&bts->handle,
- local_xchg(&buf->data_size, 0));
+ local_xchg(&bb->data_size, 0));
}
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
@@ -382,19 +383,19 @@ void intel_bts_disable_local(void)
}
static int
-bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
+bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle)
{
unsigned long head, space, next_space, pad, gap, skip, wakeup;
unsigned int next_buf;
struct bts_phys *phys, *next_phys;
int ret;
- if (buf->snapshot)
+ if (bb->snapshot)
return 0;
- head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
+ head = handle->head & ((bb->nr_pages << PAGE_SHIFT) - 1);
- phys = &buf->buf[buf->cur_buf];
+ phys = &bb->buf[bb->cur_buf];
space = phys->offset + phys->displacement + phys->size - head;
pad = space;
if (space > handle->size) {
@@ -403,10 +404,10 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
}
if (space <= BTS_SAFETY_MARGIN) {
/* See if next phys buffer has more space */
- next_buf = buf->cur_buf + 1;
- if (next_buf >= buf->nr_bufs)
+ next_buf = bb->cur_buf + 1;
+ if (next_buf >= bb->nr_bufs)
next_buf = 0;
- next_phys = &buf->buf[next_buf];
+ next_phys = &bb->buf[next_buf];
gap = buf_size(phys->page) - phys->displacement - phys->size +
next_phys->displacement;
skip = pad + gap;
@@ -431,8 +432,8 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
* anymore, so we must not be racing with
* bts_update().
*/
- buf->cur_buf = next_buf;
- local_set(&buf->head, head);
+ bb->cur_buf = next_buf;
+ local_set(&bb->head, head);
}
}
}
@@ -445,7 +446,7 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
space -= space % BTS_RECORD_SIZE;
}
- buf->end = head + space;
+ bb->end = head + space;
/*
* If we have no space, the lost notification would have been sent when
@@ -462,7 +463,7 @@ int intel_bts_interrupt(void)
struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
struct bts_ctx *bts;
struct perf_event *event;
- struct bts_buffer *buf;
+ struct bts_buffer *bb;
s64 old_head;
int err = -ENOSPC, handled = 0;
@@ -485,8 +486,8 @@ int intel_bts_interrupt(void)
if (READ_ONCE(bts->state) == BTS_STATE_STOPPED)
return handled;
- buf = perf_get_aux(&bts->handle);
- if (!buf)
+ bb = perf_get_aux(&bts->handle);
+ if (!bb)
return handled;
/*
@@ -494,26 +495,26 @@ int intel_bts_interrupt(void)
* there's no other way of telling, because the pointer will
* keep moving
*/
- if (buf->snapshot)
+ if (bb->snapshot)
return 0;
- old_head = local_read(&buf->head);
+ old_head = local_read(&bb->head);
bts_update(bts);
/* no new data */
- if (old_head == local_read(&buf->head))
+ if (old_head == local_read(&bb->head))
return handled;
- perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0));
+ perf_aux_output_end(&bts->handle, local_xchg(&bb->data_size, 0));
- buf = perf_aux_output_begin(&bts->handle, event);
- if (buf)
- err = bts_buffer_reset(buf, &bts->handle);
+ bb = perf_aux_output_begin(&bts->handle, event);
+ if (bb)
+ err = bts_buffer_reset(bb, &bts->handle);
if (err) {
WRITE_ONCE(bts->state, BTS_STATE_STOPPED);
- if (buf) {
+ if (bb) {
/*
* BTS_STATE_STOPPED should be visible before
* cleared handle::event
@@ -599,7 +600,11 @@ static void bts_event_read(struct perf_event *event)
static __init int bts_init(void)
{
- if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
+ if (!boot_cpu_has(X86_FEATURE_DTES64))
+ return -ENODEV;
+
+ x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
+ if (!x86_pmu.bts)
return -ENODEV;
if (boot_cpu_has(X86_FEATURE_PTI)) {
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index c5f385413392..466283326630 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -23,6 +23,7 @@
#include <asm/intel_pt.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include "../perf_event.h"
@@ -2224,6 +2225,18 @@ static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
+EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_skt, "event=0x9c,umask=0x01");
+EVENT_ATTR_STR(topdown-retiring, td_retiring_skt, "event=0xc2,umask=0x02");
+EVENT_ATTR_STR(topdown-be-bound, td_be_bound_skt, "event=0xa4,umask=0x02");
+
+static struct attribute *skt_events_attrs[] = {
+ EVENT_PTR(td_fe_bound_skt),
+ EVENT_PTR(td_retiring_skt),
+ EVENT_PTR(td_bad_spec_cmt),
+ EVENT_PTR(td_be_bound_skt),
+ NULL,
+};
+
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -2285,7 +2298,7 @@ static __always_inline void __intel_pmu_disable_all(bool bts)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0);
if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
intel_pmu_disable_bts();
@@ -2294,7 +2307,7 @@ static __always_inline void __intel_pmu_disable_all(bool bts)
static __always_inline void intel_pmu_disable_all(void)
{
__intel_pmu_disable_all(true);
- intel_pmu_pebs_disable_all();
+ static_call_cond(x86_pmu_pebs_disable_all)();
intel_pmu_lbr_disable_all();
}
@@ -2306,11 +2319,11 @@ static void __intel_pmu_enable_all(int added, bool pmi)
intel_pmu_lbr_enable_all(pmi);
if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) {
- wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val);
+ wrmsrq(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val);
cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val;
}
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL,
intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@ -2326,7 +2339,7 @@ static void __intel_pmu_enable_all(int added, bool pmi)
static void intel_pmu_enable_all(int added)
{
- intel_pmu_pebs_enable_all();
+ static_call_cond(x86_pmu_pebs_enable_all)();
__intel_pmu_enable_all(added, false);
}
@@ -2426,12 +2439,12 @@ static void intel_pmu_nhm_workaround(void)
}
for (i = 0; i < 4; i++) {
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
- wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+ wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+ wrmsrq(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
}
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
for (i = 0; i < 4; i++) {
event = cpuc->events[i];
@@ -2441,7 +2454,7 @@ static void intel_pmu_nhm_workaround(void)
__x86_pmu_enable_event(&event->hw,
ARCH_PERFMON_EVENTSEL_ENABLE);
} else
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
+ wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
}
}
@@ -2458,7 +2471,7 @@ static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
if (cpuc->tfa_shadow != val) {
cpuc->tfa_shadow = val;
- wrmsrl(MSR_TSX_FORCE_ABORT, val);
+ wrmsrq(MSR_TSX_FORCE_ABORT, val);
}
}
@@ -2489,14 +2502,14 @@ static inline u64 intel_pmu_get_status(void)
{
u64 status;
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+ rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status);
return status;
}
static inline void intel_pmu_ack_status(u64 ack)
{
- wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}
static inline bool event_is_checkpointed(struct perf_event *event)
@@ -2583,7 +2596,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
* so we don't trigger the event without PEBS bit set.
*/
if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_disable(event);
+ static_call(x86_pmu_pebs_disable)(event);
}
static void intel_pmu_assign_event(struct perf_event *event, int idx)
@@ -2603,6 +2616,9 @@ static void intel_pmu_del_event(struct perf_event *event)
intel_pmu_lbr_del(event);
if (event->attr.precise_ip)
intel_pmu_pebs_del(event);
+ if (is_pebs_counter_event_group(event) ||
+ is_acr_event_group(event))
+ this_cpu_ptr(&cpu_hw_events)->n_late_setup--;
}
static int icl_set_topdown_event_period(struct perf_event *event)
@@ -2619,15 +2635,15 @@ static int icl_set_topdown_event_period(struct perf_event *event)
* Don't need to clear them again.
*/
if (left == x86_pmu.max_period) {
- wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
- wrmsrl(MSR_PERF_METRICS, 0);
+ wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrq(MSR_PERF_METRICS, 0);
hwc->saved_slots = 0;
hwc->saved_metric = 0;
}
if ((hwc->saved_slots) && is_slots_event(event)) {
- wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
- wrmsrl(MSR_PERF_METRICS, hwc->saved_metric);
+ wrmsrq(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
+ wrmsrq(MSR_PERF_METRICS, hwc->saved_metric);
}
perf_event_update_userpage(event);
@@ -2724,12 +2740,12 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end,
if (!val) {
/* read Fixed counter 3 */
- rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
+ slots = rdpmc(3 | INTEL_PMC_FIXED_RDPMC_BASE);
if (!slots)
return 0;
/* read PERF_METRICS */
- rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
+ metrics = rdpmc(INTEL_PMC_FIXED_RDPMC_METRICS);
} else {
slots = val[0];
metrics = val[1];
@@ -2773,8 +2789,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end,
if (reset) {
/* The fixed counter 3 has to be written before the PERF_METRICS. */
- wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
- wrmsrl(MSR_PERF_METRICS, 0);
+ wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrq(MSR_PERF_METRICS, 0);
if (event)
update_saved_topdown_regs(event, 0, 0, metric_end);
}
@@ -2880,6 +2896,52 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
cpuc->fixed_ctrl_val |= bits;
}
+static void intel_pmu_config_acr(int idx, u64 mask, u32 reload)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int msr_b, msr_c;
+
+ if (!mask && !cpuc->acr_cfg_b[idx])
+ return;
+
+ if (idx < INTEL_PMC_IDX_FIXED) {
+ msr_b = MSR_IA32_PMC_V6_GP0_CFG_B;
+ msr_c = MSR_IA32_PMC_V6_GP0_CFG_C;
+ } else {
+ msr_b = MSR_IA32_PMC_V6_FX0_CFG_B;
+ msr_c = MSR_IA32_PMC_V6_FX0_CFG_C;
+ idx -= INTEL_PMC_IDX_FIXED;
+ }
+
+ if (cpuc->acr_cfg_b[idx] != mask) {
+ wrmsrl(msr_b + x86_pmu.addr_offset(idx, false), mask);
+ cpuc->acr_cfg_b[idx] = mask;
+ }
+ /* Only need to update the reload value when there is a valid config value. */
+ if (mask && cpuc->acr_cfg_c[idx] != reload) {
+ wrmsrl(msr_c + x86_pmu.addr_offset(idx, false), reload);
+ cpuc->acr_cfg_c[idx] = reload;
+ }
+}
+
+static void intel_pmu_enable_acr(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!is_acr_event_group(event) || !event->attr.config2) {
+ /*
+ * The disable doesn't clear the ACR CFG register.
+ * Check and clear the ACR CFG register.
+ */
+ intel_pmu_config_acr(hwc->idx, 0, 0);
+ return;
+ }
+
+ intel_pmu_config_acr(hwc->idx, hwc->config1, -hwc->sample_period);
+}
+
+DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
+
static void intel_pmu_enable_event(struct perf_event *event)
{
u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
@@ -2887,16 +2949,19 @@ static void intel_pmu_enable_event(struct perf_event *event)
int idx = hwc->idx;
if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_enable(event);
+ static_call(x86_pmu_pebs_enable)(event);
switch (idx) {
case 0 ... INTEL_PMC_IDX_FIXED - 1:
if (branch_sample_counters(event))
enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
intel_set_masks(event, idx);
+ static_call_cond(intel_pmu_enable_acr_event)(event);
__x86_pmu_enable_event(hwc, enable_mask);
break;
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
+ static_call_cond(intel_pmu_enable_acr_event)(event);
+ fallthrough;
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
intel_pmu_enable_fixed(event);
break;
@@ -2914,12 +2979,51 @@ static void intel_pmu_enable_event(struct perf_event *event)
}
}
+static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc)
+{
+ struct perf_event *event, *leader;
+ int i, j, idx;
+
+ for (i = 0; i < cpuc->n_events; i++) {
+ leader = cpuc->event_list[i];
+ if (!is_acr_event_group(leader))
+ continue;
+
+ /* The ACR events must be contiguous. */
+ for (j = i; j < cpuc->n_events; j++) {
+ event = cpuc->event_list[j];
+ if (event->group_leader != leader->group_leader)
+ break;
+ for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) {
+ if (WARN_ON_ONCE(i + idx > cpuc->n_events))
+ return;
+ __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1);
+ }
+ }
+ i = j - 1;
+ }
+}
+
+void intel_pmu_late_setup(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (!cpuc->n_late_setup)
+ return;
+
+ intel_pmu_pebs_late_setup(cpuc);
+ intel_pmu_acr_late_setup(cpuc);
+}
+
static void intel_pmu_add_event(struct perf_event *event)
{
if (event->attr.precise_ip)
intel_pmu_pebs_add(event);
if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_add(event);
+ if (is_pebs_counter_event_group(event) ||
+ is_acr_event_group(event))
+ this_cpu_ptr(&cpu_hw_events)->n_late_setup++;
}
/*
@@ -2937,7 +3041,7 @@ int intel_pmu_save_and_restart(struct perf_event *event)
*/
if (unlikely(event_is_checkpointed(event))) {
/* No race with NMIs because the counter should not be armed */
- wrmsrl(event->hw.event_base, 0);
+ wrmsrq(event->hw.event_base, 0);
local64_set(&event->hw.prev_count, 0);
}
return static_call(x86_pmu_set_period)(event);
@@ -2976,13 +3080,13 @@ static void intel_pmu_reset(void)
pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) {
- wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
- wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
+ wrmsrq_safe(x86_pmu_config_addr(idx), 0ull);
+ wrmsrq_safe(x86_pmu_event_addr(idx), 0ull);
}
for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) {
if (fixed_counter_disabled(idx, cpuc->pmu))
continue;
- wrmsrl_safe(x86_pmu_fixed_ctr_addr(idx), 0ull);
+ wrmsrq_safe(x86_pmu_fixed_ctr_addr(idx), 0ull);
}
if (ds)
@@ -2991,7 +3095,7 @@ static void intel_pmu_reset(void)
/* Ack all overflows and disable fixed counters */
if (x86_pmu.version >= 2) {
intel_pmu_ack_status(intel_pmu_get_status());
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0);
}
/* Reset LBRs and LBR freezing */
@@ -3035,8 +3139,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
continue;
perf_sample_data_init(data, 0, event->hw.last_period);
- if (perf_event_overflow(event, data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, data, regs);
/* Inject one fake event is enough. */
break;
@@ -3101,7 +3204,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
* Update the MSR if pebs_enabled is changed.
*/
if (pebs_enabled != cpuc->pebs_enabled)
- wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+ wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
/*
* Above PEBS handler (PEBS counters snapshotting) has updated fixed
@@ -3141,6 +3244,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
+ u64 last_period;
handled++;
@@ -3168,16 +3272,17 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
if (is_pebs_counter_event_group(event))
x86_pmu.drain_pebs(regs, &data);
+ last_period = event->hw.last_period;
+
if (!intel_pmu_save_and_restart(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
if (has_branch_stack(event))
intel_pmu_lbr_save_brstack(&data, cpuc, event);
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
return handled;
@@ -3739,10 +3844,9 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
if (cpuc->excl_cntrs)
return intel_get_excl_constraints(cpuc, event, idx, c2);
- /* Not all counters support the branch counter feature. */
- if (branch_sample_counters(event)) {
+ if (event->hw.dyn_constraint != ~0ULL) {
c2 = dyn_constraint(cpuc, c2, idx);
- c2->idxmsk64 &= x86_pmu.lbr_counters;
+ c2->idxmsk64 &= event->hw.dyn_constraint;
c2->weight = hweight64(c2->idxmsk64);
}
@@ -4083,6 +4187,39 @@ end:
return start;
}
+static inline bool intel_pmu_has_acr(struct pmu *pmu)
+{
+ return !!hybrid(pmu, acr_cause_mask64);
+}
+
+static bool intel_pmu_is_acr_group(struct perf_event *event)
+{
+ /* The group leader has the ACR flag set */
+ if (is_acr_event_group(event))
+ return true;
+
+ /* The acr_mask is set */
+ if (event->attr.config2)
+ return true;
+
+ return false;
+}
+
+static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
+ u64 *cause_mask, int *num)
+{
+ event->hw.dyn_constraint &= hybrid(event->pmu, acr_cntr_mask64);
+ *cause_mask |= event->attr.config2;
+ *num += 1;
+}
+
+static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event,
+ int idx, u64 cause_mask)
+{
+ if (test_bit(idx, (unsigned long *)&cause_mask))
+ event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64);
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -4144,15 +4281,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
leader = event->group_leader;
if (branch_sample_call_stack(leader))
return -EINVAL;
- if (branch_sample_counters(leader))
+ if (branch_sample_counters(leader)) {
num++;
+ leader->hw.dyn_constraint &= x86_pmu.lbr_counters;
+ }
leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
for_each_sibling_event(sibling, leader) {
if (branch_sample_call_stack(sibling))
return -EINVAL;
- if (branch_sample_counters(sibling))
+ if (branch_sample_counters(sibling)) {
num++;
+ sibling->hw.dyn_constraint &= x86_pmu.lbr_counters;
+ }
}
if (num > fls(x86_pmu.lbr_counters))
@@ -4207,6 +4348,94 @@ static int intel_pmu_hw_config(struct perf_event *event)
event->attr.precise_ip)
event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
+ if (intel_pmu_has_acr(event->pmu) && intel_pmu_is_acr_group(event)) {
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct pmu *pmu = event->pmu;
+ bool has_sw_event = false;
+ int num = 0, idx = 0;
+ u64 cause_mask = 0;
+
+ /* Not support perf metrics */
+ if (is_metric_event(event))
+ return -EINVAL;
+
+ /* Not support freq mode */
+ if (event->attr.freq)
+ return -EINVAL;
+
+ /* PDist is not supported */
+ if (event->attr.config2 && event->attr.precise_ip > 2)
+ return -EINVAL;
+
+ /* The reload value cannot exceeds the max period */
+ if (event->attr.sample_period > x86_pmu.max_period)
+ return -EINVAL;
+ /*
+ * The counter-constraints of each event cannot be finalized
+ * unless the whole group is scanned. However, it's hard
+ * to know whether the event is the last one of the group.
+ * Recalculate the counter-constraints for each event when
+ * adding a new event.
+ *
+ * The group is traversed twice, which may be optimized later.
+ * In the first round,
+ * - Find all events which do reload when other events
+ * overflow and set the corresponding counter-constraints
+ * - Add all events, which can cause other events reload,
+ * in the cause_mask
+ * - Error out if the number of events exceeds the HW limit
+ * - The ACR events must be contiguous.
+ * Error out if there are non-X86 events between ACR events.
+ * This is not a HW limit, but a SW limit.
+ * With the assumption, the intel_pmu_acr_late_setup() can
+ * easily convert the event idx to counter idx without
+ * traversing the whole event list.
+ */
+ if (!is_x86_event(leader))
+ return -EINVAL;
+
+ if (leader->attr.config2)
+ intel_pmu_set_acr_cntr_constr(leader, &cause_mask, &num);
+
+ if (leader->nr_siblings) {
+ for_each_sibling_event(sibling, leader) {
+ if (!is_x86_event(sibling)) {
+ has_sw_event = true;
+ continue;
+ }
+ if (!sibling->attr.config2)
+ continue;
+ if (has_sw_event)
+ return -EINVAL;
+ intel_pmu_set_acr_cntr_constr(sibling, &cause_mask, &num);
+ }
+ }
+ if (leader != event && event->attr.config2) {
+ if (has_sw_event)
+ return -EINVAL;
+ intel_pmu_set_acr_cntr_constr(event, &cause_mask, &num);
+ }
+
+ if (hweight64(cause_mask) > hweight64(hybrid(pmu, acr_cause_mask64)) ||
+ num > hweight64(hybrid(event->pmu, acr_cntr_mask64)))
+ return -EINVAL;
+ /*
+ * In the second round, apply the counter-constraints for
+ * the events which can cause other events reload.
+ */
+ intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask);
+
+ if (leader->nr_siblings) {
+ for_each_sibling_event(sibling, leader)
+ intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask);
+ }
+
+ if (leader != event)
+ intel_pmu_set_acr_caused_constr(event, idx, cause_mask);
+
+ leader->hw.flags |= PERF_X86_EVENT_ACR;
+ }
+
if ((event->attr.type == PERF_TYPE_HARDWARE) ||
(event->attr.type == PERF_TYPE_HW_CACHE))
return 0;
@@ -4354,7 +4583,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
.guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask,
};
- if (!x86_pmu.pebs)
+ if (!x86_pmu.ds_pebs)
return arr;
/*
@@ -4952,7 +5181,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
goto err;
}
- if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) {
+ if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_DYN_CONSTRAINT)) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
@@ -5041,7 +5270,7 @@ static inline bool intel_pmu_broken_perf_cap(void)
return false;
}
-static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
+static void update_pmu_cap(struct pmu *pmu)
{
unsigned int cntr, fixed_cntr, ecx, edx;
union cpuid35_eax eax;
@@ -5050,20 +5279,30 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx);
if (ebx.split.umask2)
- pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2;
+ hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2;
if (ebx.split.eq)
- pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ;
+ hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ;
if (eax.split.cntr_subleaf) {
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
&cntr, &fixed_cntr, &ecx, &edx);
- pmu->cntr_mask64 = cntr;
- pmu->fixed_cntr_mask64 = fixed_cntr;
+ hybrid(pmu, cntr_mask64) = cntr;
+ hybrid(pmu, fixed_cntr_mask64) = fixed_cntr;
+ }
+
+ if (eax.split.acr_subleaf) {
+ cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF,
+ &cntr, &fixed_cntr, &ecx, &edx);
+ /* The mask of the counters which can be reloaded */
+ hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED);
+
+ /* The mask of the counters which can cause a reload of reloadable counters */
+ hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED);
}
if (!intel_pmu_broken_perf_cap()) {
/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
+ rdmsrq(MSR_IA32_PERF_CAPABILITIES, hybrid(pmu, intel_cap).capabilities);
}
}
@@ -5150,7 +5389,7 @@ static bool init_hybrid_pmu(int cpu)
goto end;
if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
- update_pmu_cap(pmu);
+ update_pmu_cap(&pmu->pmu);
intel_pmu_check_hybrid_pmus(pmu);
@@ -5211,7 +5450,7 @@ static void intel_pmu_cpu_starting(int cpu)
if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) {
union perf_capabilities perf_cap;
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities);
+ rdmsrq(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities);
if (!perf_cap.perf_metrics) {
x86_pmu.intel_cap.perf_metrics = 0;
x86_pmu.intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
@@ -5524,7 +5763,7 @@ static __init void intel_clovertown_quirk(void)
* these chips.
*/
pr_warn("PEBS disabled due to CPU errata\n");
- x86_pmu.pebs = 0;
+ x86_pmu.ds_pebs = 0;
x86_pmu.pebs_constraints = NULL;
}
@@ -5619,24 +5858,24 @@ static bool check_msr(unsigned long msr, u64 mask)
* matches, this is needed to detect certain hardware emulators
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
- if (rdmsrl_safe(msr, &val_old))
+ if (rdmsrq_safe(msr, &val_old))
return false;
/*
- * Only change the bits which can be updated by wrmsrl.
+ * Only change the bits which can be updated by wrmsrq.
*/
val_tmp = val_old ^ mask;
if (is_lbr_from(msr))
val_tmp = lbr_from_signext_quirk_wr(val_tmp);
- if (wrmsrl_safe(msr, val_tmp) ||
- rdmsrl_safe(msr, &val_new))
+ if (wrmsrq_safe(msr, val_tmp) ||
+ rdmsrq_safe(msr, &val_new))
return false;
/*
- * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
- * should equal rdmsrl()'s even with the quirk.
+ * Quirk only affects validation in wrmsr(), so wrmsrq()'s value
+ * should equal rdmsrq()'s even with the quirk.
*/
if (val_new != val_tmp)
return false;
@@ -5647,7 +5886,7 @@ static bool check_msr(unsigned long msr, u64 mask)
/* Here it's sure that the MSR can be safely accessed.
* Restore the old value and return.
*/
- wrmsrl(msr, val_old);
+ wrmsrq(msr, val_old);
return true;
}
@@ -6012,7 +6251,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
static umode_t
pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
- return x86_pmu.pebs ? attr->mode : 0;
+ return x86_pmu.ds_pebs ? attr->mode : 0;
}
static umode_t
@@ -6043,6 +6282,21 @@ td_is_visible(struct kobject *kobj, struct attribute *attr, int i)
return attr->mode;
}
+PMU_FORMAT_ATTR(acr_mask, "config2:0-63");
+
+static struct attribute *format_acr_attrs[] = {
+ &format_attr_acr_mask.attr,
+ NULL
+};
+
+static umode_t
+acr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+
+ return intel_pmu_has_acr(dev_get_drvdata(dev)) ? attr->mode : 0;
+}
+
static struct attribute_group group_events_td = {
.name = "events",
.is_visible = td_is_visible,
@@ -6085,6 +6339,12 @@ static struct attribute_group group_format_evtsel_ext = {
.is_visible = evtsel_ext_is_visible,
};
+static struct attribute_group group_format_acr = {
+ .name = "format",
+ .attrs = format_acr_attrs,
+ .is_visible = acr_is_visible,
+};
+
static struct attribute_group group_default = {
.attrs = intel_pmu_attrs,
.is_visible = default_is_visible,
@@ -6099,6 +6359,7 @@ static const struct attribute_group *attr_update[] = {
&group_format_extra,
&group_format_extra_skl,
&group_format_evtsel_ext,
+ &group_format_acr,
&group_default,
NULL,
};
@@ -6383,6 +6644,7 @@ static const struct attribute_group *hybrid_attr_update[] = {
&group_caps_lbr,
&hybrid_group_format_extra,
&group_format_evtsel_ext,
+ &group_format_acr,
&group_default,
&hybrid_group_cpus,
NULL,
@@ -6575,6 +6837,7 @@ static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
intel_pmu_init_grt(pmu);
hybrid(pmu, event_constraints) = intel_skt_event_constraints;
hybrid(pmu, extra_regs) = intel_cmt_extra_regs;
+ static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
}
__init int intel_pmu_init(void)
@@ -6635,6 +6898,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
+ x86_pmu.config_mask = X86_RAW_EVENT_MASK;
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
@@ -6651,7 +6915,7 @@ __init int intel_pmu_init(void)
if (boot_cpu_has(X86_FEATURE_PDCM)) {
u64 capabilities;
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
+ rdmsrq(MSR_IA32_PERF_CAPABILITIES, capabilities);
x86_pmu.intel_cap.capabilities = capabilities;
}
@@ -6663,7 +6927,7 @@ __init int intel_pmu_init(void)
if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
intel_pmu_arch_lbr_init();
- intel_ds_init();
+ intel_pebs_init();
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
@@ -6674,6 +6938,12 @@ __init int intel_pmu_init(void)
}
/*
+ * Many features on and after V6 require dynamic constraint,
+ * e.g., Arch PEBS, ACR.
+ */
+ if (version >= 6)
+ x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
+ /*
* Install the hw-cache-events table:
*/
switch (boot_cpu_data.x86_vfm) {
@@ -6884,6 +7154,18 @@ __init int intel_pmu_init(void)
name = "crestmont";
break;
+ case INTEL_ATOM_DARKMONT_X:
+ intel_pmu_init_skt(NULL);
+ intel_pmu_pebs_data_source_cmt();
+ x86_pmu.pebs_latency_data = cmt_latency_data;
+ x86_pmu.get_event_constraints = cmt_get_event_constraints;
+ td_attr = skt_events_attrs;
+ mem_attr = grt_mem_attrs;
+ extra_attr = cmt_format_attr;
+ pr_cont("Darkmont events, ");
+ name = "darkmont";
+ break;
+
case INTEL_WESTMERE:
case INTEL_WESTMERE_EP:
case INTEL_WESTMERE_EX:
@@ -7433,6 +7715,18 @@ __init int intel_pmu_init(void)
x86_pmu.attr_update = hybrid_attr_update;
}
+ /*
+ * The archPerfmonExt (0x23) includes an enhanced enumeration of
+ * PMU architectural features with a per-core view. For non-hybrid,
+ * each core has the same PMU capabilities. It's good enough to
+ * update the x86_pmu from the booting CPU. For hybrid, the x86_pmu
+ * is used to keep the common capabilities. Still keep the values
+ * from the leaf 0xa. The core specific update will be done later
+ * when a new type is online.
+ */
+ if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
+ update_pmu_cap(NULL);
+
intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
&x86_pmu.fixed_cntr_mask64,
&x86_pmu.intel_ctrl);
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index ae4ec16156bb..ec753e39b007 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -111,6 +111,7 @@
#include <linux/nospec.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/msr.h>
#include "../perf_event.h"
#include "../probe.h"
@@ -320,7 +321,7 @@ static inline u64 cstate_pmu_read_counter(struct perf_event *event)
{
u64 val;
- rdmsrl(event->hw.event_base, val);
+ rdmsrq(event->hw.event_base, val);
return val;
}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 9b20acc0e932..c0b7ac1c7594 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -10,6 +10,7 @@
#include <asm/tlbflush.h>
#include <asm/insn.h>
#include <asm/io.h>
+#include <asm/msr.h>
#include <asm/timer.h>
#include "../perf_event.h"
@@ -624,7 +625,7 @@ static int alloc_pebs_buffer(int cpu)
int max, node = cpu_to_node(cpu);
void *buffer, *insn_buff, *cea;
- if (!x86_pmu.pebs)
+ if (!x86_pmu.ds_pebs)
return 0;
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
@@ -659,7 +660,7 @@ static void release_pebs_buffer(int cpu)
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
void *cea;
- if (!x86_pmu.pebs)
+ if (!x86_pmu.ds_pebs)
return;
kfree(per_cpu(insn_buffer, cpu));
@@ -734,7 +735,7 @@ void release_ds_buffers(void)
{
int cpu;
- if (!x86_pmu.bts && !x86_pmu.pebs)
+ if (!x86_pmu.bts && !x86_pmu.ds_pebs)
return;
for_each_possible_cpu(cpu)
@@ -750,7 +751,8 @@ void release_ds_buffers(void)
}
for_each_possible_cpu(cpu) {
- release_pebs_buffer(cpu);
+ if (x86_pmu.ds_pebs)
+ release_pebs_buffer(cpu);
release_bts_buffer(cpu);
}
}
@@ -761,15 +763,17 @@ void reserve_ds_buffers(void)
int cpu;
x86_pmu.bts_active = 0;
- x86_pmu.pebs_active = 0;
- if (!x86_pmu.bts && !x86_pmu.pebs)
+ if (x86_pmu.ds_pebs)
+ x86_pmu.pebs_active = 0;
+
+ if (!x86_pmu.bts && !x86_pmu.ds_pebs)
return;
if (!x86_pmu.bts)
bts_err = 1;
- if (!x86_pmu.pebs)
+ if (!x86_pmu.ds_pebs)
pebs_err = 1;
for_each_possible_cpu(cpu) {
@@ -781,7 +785,8 @@ void reserve_ds_buffers(void)
if (!bts_err && alloc_bts_buffer(cpu))
bts_err = 1;
- if (!pebs_err && alloc_pebs_buffer(cpu))
+ if (x86_pmu.ds_pebs && !pebs_err &&
+ alloc_pebs_buffer(cpu))
pebs_err = 1;
if (bts_err && pebs_err)
@@ -793,7 +798,7 @@ void reserve_ds_buffers(void)
release_bts_buffer(cpu);
}
- if (pebs_err) {
+ if (x86_pmu.ds_pebs && pebs_err) {
for_each_possible_cpu(cpu)
release_pebs_buffer(cpu);
}
@@ -805,7 +810,7 @@ void reserve_ds_buffers(void)
if (x86_pmu.bts && !bts_err)
x86_pmu.bts_active = 1;
- if (x86_pmu.pebs && !pebs_err)
+ if (x86_pmu.ds_pebs && !pebs_err)
x86_pmu.pebs_active = 1;
for_each_possible_cpu(cpu) {
@@ -1355,9 +1360,8 @@ static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
}
-static void intel_pmu_late_setup(void)
+void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *event;
u64 pebs_data_cfg = 0;
int i;
@@ -1517,7 +1521,7 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
else
value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
}
- wrmsrl(base + idx, value);
+ wrmsrq(base + idx, value);
}
static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
@@ -1554,7 +1558,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
*/
intel_pmu_drain_pebs_buffer();
adaptive_pebs_record_size_update();
- wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg);
+ wrmsrq(MSR_PEBS_DATA_CFG, pebs_data_cfg);
cpuc->active_pebs_data_cfg = pebs_data_cfg;
}
}
@@ -1617,7 +1621,7 @@ void intel_pmu_pebs_disable(struct perf_event *event)
intel_pmu_pebs_via_pt_disable(event);
if (cpuc->enabled)
- wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+ wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
}
@@ -1627,7 +1631,7 @@ void intel_pmu_pebs_enable_all(void)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (cpuc->pebs_enabled)
- wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+ wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
}
void intel_pmu_pebs_disable_all(void)
@@ -1828,8 +1832,6 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
perf_sample_data_init(data, 0, event->hw.last_period);
- data->period = event->hw.last_period;
-
/*
* Use latency for weight (only avail with PEBS-LL)
*/
@@ -2082,7 +2084,6 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
sample_type = event->attr.sample_type;
format_group = basic->format_group;
perf_sample_data_init(data, 0, event->hw.last_period);
- data->period = event->hw.last_period;
setup_pebs_time(event, data, basic->tsc);
@@ -2276,7 +2277,7 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
WARN_ON(this_cpu_read(cpu_hw_events.enabled));
prev_raw_count = local64_read(&hwc->prev_count);
- rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+ new_raw_count = rdpmc(hwc->event_base_rdpmc);
local64_set(&hwc->prev_count, new_raw_count);
/*
@@ -2359,8 +2360,7 @@ __intel_pmu_pebs_last_event(struct perf_event *event,
* All but the last records are processed.
* The last one is left to be able to call the overflow handler.
*/
- if (perf_event_overflow(event, data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, data, regs);
}
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
@@ -2465,8 +2465,9 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_
setup_pebs_fixed_sample_data);
}
-static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
+static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask)
{
+ u64 pebs_enabled = cpuc->pebs_enabled & mask;
struct perf_event *event;
int bit;
@@ -2477,7 +2478,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int
* It needs to call intel_pmu_save_and_restart_reload() to
* update the event->count for this case.
*/
- for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
+ for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
event = cpuc->events[bit];
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
intel_pmu_save_and_restart_reload(event, 0);
@@ -2512,7 +2513,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
}
if (unlikely(base >= top)) {
- intel_pmu_pebs_event_update_no_drain(cpuc, size);
+ intel_pmu_pebs_event_update_no_drain(cpuc, mask);
return;
}
@@ -2588,8 +2589,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
if (error[bit]) {
perf_log_lost_samples(event, error[bit]);
- if (iregs && perf_event_account_interrupt(event))
- x86_pmu_stop(event, 0);
+ if (iregs)
+ perf_event_account_interrupt(event);
}
if (counts[bit]) {
@@ -2626,7 +2627,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
(hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
if (unlikely(base >= top)) {
- intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
+ intel_pmu_pebs_event_update_no_drain(cpuc, mask);
return;
}
@@ -2669,10 +2670,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
}
/*
- * BTS, PEBS probe and setup
+ * PEBS probe and setup
*/
-void __init intel_ds_init(void)
+void __init intel_pebs_init(void)
{
/*
* No support for 32bit formats
@@ -2680,13 +2681,12 @@ void __init intel_ds_init(void)
if (!boot_cpu_has(X86_FEATURE_DTES64))
return;
- x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
- x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+ x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS);
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
if (x86_pmu.version <= 4)
x86_pmu.pebs_no_isolation = 1;
- if (x86_pmu.pebs) {
+ if (x86_pmu.ds_pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
char *pebs_qual = "";
int format = x86_pmu.intel_cap.pebs_format;
@@ -2694,6 +2694,11 @@ void __init intel_ds_init(void)
if (format < 4)
x86_pmu.intel_cap.pebs_baseline = 0;
+ x86_pmu.pebs_enable = intel_pmu_pebs_enable;
+ x86_pmu.pebs_disable = intel_pmu_pebs_disable;
+ x86_pmu.pebs_enable_all = intel_pmu_pebs_enable_all;
+ x86_pmu.pebs_disable_all = intel_pmu_pebs_disable_all;
+
switch (format) {
case 0:
pr_cont("PEBS fmt0%c, ", pebs_type);
@@ -2778,7 +2783,7 @@ void __init intel_ds_init(void)
default:
pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
- x86_pmu.pebs = 0;
+ x86_pmu.ds_pebs = 0;
}
}
}
@@ -2787,8 +2792,8 @@ void perf_restore_debug_store(void)
{
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
- if (!x86_pmu.bts && !x86_pmu.pebs)
+ if (!x86_pmu.bts && !x86_pmu.ds_pebs)
return;
- wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
+ wrmsrq(MSR_IA32_DS_AREA, (unsigned long)ds);
}
diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c
index 034a1f6a457c..e614baf42926 100644
--- a/arch/x86/events/intel/knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <asm/hardirq.h>
+#include <asm/msr.h>
#include "../perf_event.h"
@@ -159,18 +160,18 @@ static void knc_pmu_disable_all(void)
{
u64 val;
- rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+ rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
- wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+ wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
}
static void knc_pmu_enable_all(int added)
{
u64 val;
- rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+ rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
- wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+ wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
}
static inline void
@@ -182,7 +183,7 @@ knc_pmu_disable_event(struct perf_event *event)
val = hwc->config;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+ (void)wrmsrq_safe(hwc->config_base + hwc->idx, val);
}
static void knc_pmu_enable_event(struct perf_event *event)
@@ -193,21 +194,21 @@ static void knc_pmu_enable_event(struct perf_event *event)
val = hwc->config;
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+ (void)wrmsrq_safe(hwc->config_base + hwc->idx, val);
}
static inline u64 knc_pmu_get_status(void)
{
u64 status;
- rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
+ rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
return status;
}
static inline void knc_pmu_ack_status(u64 ack)
{
- wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
+ wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
}
static int knc_pmu_handle_irq(struct pt_regs *regs)
@@ -241,19 +242,20 @@ again:
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
+ u64 last_period;
handled++;
if (!test_bit(bit, cpuc->active_mask))
continue;
+ last_period = event->hw.last_period;
if (!intel_pmu_save_and_restart(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
/*
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index f44c3d866f24..7aa59966e7c3 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -137,9 +137,9 @@ static void __intel_pmu_lbr_enable(bool pmi)
if (cpuc->lbr_sel)
lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel)
- wrmsrl(MSR_LBR_SELECT, lbr_select);
+ wrmsrq(MSR_LBR_SELECT, lbr_select);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
orig_debugctl = debugctl;
if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
@@ -155,10 +155,10 @@ static void __intel_pmu_lbr_enable(bool pmi)
debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
if (orig_debugctl != debugctl)
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
- wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
+ wrmsrq(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
}
void intel_pmu_lbr_reset_32(void)
@@ -166,7 +166,7 @@ void intel_pmu_lbr_reset_32(void)
int i;
for (i = 0; i < x86_pmu.lbr_nr; i++)
- wrmsrl(x86_pmu.lbr_from + i, 0);
+ wrmsrq(x86_pmu.lbr_from + i, 0);
}
void intel_pmu_lbr_reset_64(void)
@@ -174,17 +174,17 @@ void intel_pmu_lbr_reset_64(void)
int i;
for (i = 0; i < x86_pmu.lbr_nr; i++) {
- wrmsrl(x86_pmu.lbr_from + i, 0);
- wrmsrl(x86_pmu.lbr_to + i, 0);
+ wrmsrq(x86_pmu.lbr_from + i, 0);
+ wrmsrq(x86_pmu.lbr_to + i, 0);
if (x86_pmu.lbr_has_info)
- wrmsrl(x86_pmu.lbr_info + i, 0);
+ wrmsrq(x86_pmu.lbr_info + i, 0);
}
}
static void intel_pmu_arch_lbr_reset(void)
{
/* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */
- wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
+ wrmsrq(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
}
void intel_pmu_lbr_reset(void)
@@ -199,7 +199,7 @@ void intel_pmu_lbr_reset(void)
cpuc->last_task_ctx = NULL;
cpuc->last_log_id = 0;
if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select)
- wrmsrl(MSR_LBR_SELECT, 0);
+ wrmsrq(MSR_LBR_SELECT, 0);
}
/*
@@ -209,7 +209,7 @@ static inline u64 intel_pmu_lbr_tos(void)
{
u64 tos;
- rdmsrl(x86_pmu.lbr_tos, tos);
+ rdmsrq(x86_pmu.lbr_tos, tos);
return tos;
}
@@ -282,17 +282,17 @@ static u64 lbr_from_signext_quirk_rd(u64 val)
static __always_inline void wrlbr_from(unsigned int idx, u64 val)
{
val = lbr_from_signext_quirk_wr(val);
- wrmsrl(x86_pmu.lbr_from + idx, val);
+ wrmsrq(x86_pmu.lbr_from + idx, val);
}
static __always_inline void wrlbr_to(unsigned int idx, u64 val)
{
- wrmsrl(x86_pmu.lbr_to + idx, val);
+ wrmsrq(x86_pmu.lbr_to + idx, val);
}
static __always_inline void wrlbr_info(unsigned int idx, u64 val)
{
- wrmsrl(x86_pmu.lbr_info + idx, val);
+ wrmsrq(x86_pmu.lbr_info + idx, val);
}
static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
@@ -302,7 +302,7 @@ static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
if (lbr)
return lbr->from;
- rdmsrl(x86_pmu.lbr_from + idx, val);
+ rdmsrq(x86_pmu.lbr_from + idx, val);
return lbr_from_signext_quirk_rd(val);
}
@@ -314,7 +314,7 @@ static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
if (lbr)
return lbr->to;
- rdmsrl(x86_pmu.lbr_to + idx, val);
+ rdmsrq(x86_pmu.lbr_to + idx, val);
return val;
}
@@ -326,7 +326,7 @@ static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
if (lbr)
return lbr->info;
- rdmsrl(x86_pmu.lbr_info + idx, val);
+ rdmsrq(x86_pmu.lbr_info + idx, val);
return val;
}
@@ -380,10 +380,10 @@ void intel_pmu_lbr_restore(void *ctx)
wrlbr_info(lbr_idx, 0);
}
- wrmsrl(x86_pmu.lbr_tos, tos);
+ wrmsrq(x86_pmu.lbr_tos, tos);
if (cpuc->lbr_select)
- wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
+ wrmsrq(MSR_LBR_SELECT, task_ctx->lbr_sel);
}
static void intel_pmu_arch_lbr_restore(void *ctx)
@@ -475,7 +475,7 @@ void intel_pmu_lbr_save(void *ctx)
task_ctx->tos = tos;
if (cpuc->lbr_select)
- rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
+ rdmsrq(MSR_LBR_SELECT, task_ctx->lbr_sel);
}
static void intel_pmu_arch_lbr_save(void *ctx)
@@ -752,7 +752,7 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
u64 lbr;
} msr_lastbranch;
- rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
+ rdmsrq(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
perf_clear_branch_entry_bitfields(br);
@@ -1602,7 +1602,7 @@ void __init intel_pmu_arch_lbr_init(void)
goto clear_arch_lbr;
/* Apply the max depth of Arch LBR */
- if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
+ if (wrmsrq_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
goto clear_arch_lbr;
x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask;
@@ -1618,7 +1618,7 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_nr = lbr_nr;
if (!!x86_pmu.lbr_counters)
- x86_pmu.flags |= PMU_FL_BR_CNTR;
+ x86_pmu.flags |= PMU_FL_BR_CNTR | PMU_FL_DYN_CONSTRAINT;
if (x86_pmu.lbr_mispred)
static_branch_enable(&x86_lbr_mispred);
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index c85a9fc44355..e5fd7367e45d 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -13,6 +13,7 @@
#include <asm/cpu_device_id.h>
#include <asm/hardirq.h>
#include <asm/apic.h>
+#include <asm/msr.h>
#include "../perf_event.h"
@@ -859,9 +860,9 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
u64 v;
/* an official way for overflow indication */
- rdmsrl(hwc->config_base, v);
+ rdmsrq(hwc->config_base, v);
if (v & P4_CCCR_OVF) {
- wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
+ wrmsrq(hwc->config_base, v & ~P4_CCCR_OVF);
return 1;
}
@@ -872,7 +873,7 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
* the counter has reached zero value and continued counting before
* real NMI signal was received:
*/
- rdmsrl(hwc->event_base, v);
+ rdmsrq(hwc->event_base, v);
if (!(v & ARCH_P4_UNFLAGGED_BIT))
return 1;
@@ -897,8 +898,8 @@ static void p4_pmu_disable_pebs(void)
* So at moment let leave metrics turned on forever -- it's
* ok for now but need to be revisited!
*
- * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0);
- * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
+ * (void)wrmsrq_safe(MSR_IA32_PEBS_ENABLE, 0);
+ * (void)wrmsrq_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
*/
}
@@ -911,7 +912,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
* state we need to clear P4_CCCR_OVF, otherwise interrupt get
* asserted again and again
*/
- (void)wrmsrl_safe(hwc->config_base,
+ (void)wrmsrq_safe(hwc->config_base,
p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
}
@@ -944,8 +945,8 @@ static void p4_pmu_enable_pebs(u64 config)
bind = &p4_pebs_bind_map[idx];
- (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
- (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
+ (void)wrmsrq_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
+ (void)wrmsrq_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
}
static void __p4_pmu_enable_event(struct perf_event *event)
@@ -979,8 +980,8 @@ static void __p4_pmu_enable_event(struct perf_event *event)
*/
p4_pmu_enable_pebs(hwc->config);
- (void)wrmsrl_safe(escr_addr, escr_conf);
- (void)wrmsrl_safe(hwc->config_base,
+ (void)wrmsrq_safe(escr_addr, escr_conf);
+ (void)wrmsrq_safe(hwc->config_base,
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
}
@@ -1024,7 +1025,7 @@ static int p4_pmu_set_period(struct perf_event *event)
*
* the former idea is taken from OProfile code
*/
- wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+ wrmsrq(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
}
return ret;
@@ -1072,8 +1073,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
continue;
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
if (handled)
@@ -1398,7 +1398,7 @@ __init int p4_pmu_init(void)
*/
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
reg = x86_pmu_config_addr(i);
- wrmsrl_safe(reg, 0ULL);
+ wrmsrq_safe(reg, 0ULL);
}
return 0;
diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c
index 65b45e9d7016..6e41de355bd8 100644
--- a/arch/x86/events/intel/p6.c
+++ b/arch/x86/events/intel/p6.c
@@ -3,6 +3,7 @@
#include <linux/types.h>
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include "../perf_event.h"
@@ -142,9 +143,9 @@ static void p6_pmu_disable_all(void)
u64 val;
/* p6 only has one enable register */
- rdmsrl(MSR_P6_EVNTSEL0, val);
+ rdmsrq(MSR_P6_EVNTSEL0, val);
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(MSR_P6_EVNTSEL0, val);
+ wrmsrq(MSR_P6_EVNTSEL0, val);
}
static void p6_pmu_enable_all(int added)
@@ -152,9 +153,9 @@ static void p6_pmu_enable_all(int added)
unsigned long val;
/* p6 only has one enable register */
- rdmsrl(MSR_P6_EVNTSEL0, val);
+ rdmsrq(MSR_P6_EVNTSEL0, val);
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(MSR_P6_EVNTSEL0, val);
+ wrmsrq(MSR_P6_EVNTSEL0, val);
}
static inline void
@@ -163,7 +164,7 @@ p6_pmu_disable_event(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
u64 val = P6_NOP_EVENT;
- (void)wrmsrl_safe(hwc->config_base, val);
+ (void)wrmsrq_safe(hwc->config_base, val);
}
static void p6_pmu_enable_event(struct perf_event *event)
@@ -180,7 +181,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
* to actually enable the events.
*/
- (void)wrmsrl_safe(hwc->config_base, val);
+ (void)wrmsrq_safe(hwc->config_base, val);
}
PMU_FORMAT_ATTR(event, "config:0-7" );
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index fa37565f6418..e8cf29d2b10c 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -18,12 +18,13 @@
#include <linux/slab.h>
#include <linux/device.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
#include <asm/io.h>
#include <asm/intel_pt.h>
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include "../perf_event.h"
#include "pt.h"
@@ -194,7 +195,7 @@ static int __init pt_pmu_hw_init(void)
int ret;
long i;
- rdmsrl(MSR_PLATFORM_INFO, reg);
+ rdmsrq(MSR_PLATFORM_INFO, reg);
pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8;
/*
@@ -230,7 +231,7 @@ static int __init pt_pmu_hw_init(void)
* "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
* post-VMXON.
*/
- rdmsrl(MSR_IA32_VMX_MISC, reg);
+ rdmsrq(MSR_IA32_VMX_MISC, reg);
if (reg & BIT(14))
pt_pmu.vmx = true;
}
@@ -426,7 +427,7 @@ static void pt_config_start(struct perf_event *event)
if (READ_ONCE(pt->vmx_on))
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
else
- wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+ wrmsrq(MSR_IA32_RTIT_CTL, ctl);
WRITE_ONCE(event->hw.aux_config, ctl);
}
@@ -485,12 +486,12 @@ static u64 pt_config_filters(struct perf_event *event)
/* avoid redundant msr writes */
if (pt->filters.filter[range].msr_a != filter->msr_a) {
- wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
+ wrmsrq(pt_address_ranges[range].msr_a, filter->msr_a);
pt->filters.filter[range].msr_a = filter->msr_a;
}
if (pt->filters.filter[range].msr_b != filter->msr_b) {
- wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
+ wrmsrq(pt_address_ranges[range].msr_b, filter->msr_b);
pt->filters.filter[range].msr_b = filter->msr_b;
}
@@ -509,7 +510,7 @@ static void pt_config(struct perf_event *event)
/* First round: clear STATUS, in particular the PSB byte counter. */
if (!event->hw.aux_config) {
perf_event_itrace_started(event);
- wrmsrl(MSR_IA32_RTIT_STATUS, 0);
+ wrmsrq(MSR_IA32_RTIT_STATUS, 0);
}
reg = pt_config_filters(event);
@@ -569,7 +570,7 @@ static void pt_config_stop(struct perf_event *event)
ctl &= ~RTIT_CTL_TRACEEN;
if (!READ_ONCE(pt->vmx_on))
- wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+ wrmsrq(MSR_IA32_RTIT_CTL, ctl);
WRITE_ONCE(event->hw.aux_config, ctl);
@@ -658,13 +659,13 @@ static void pt_config_buffer(struct pt_buffer *buf)
reg = virt_to_phys(base);
if (pt->output_base != reg) {
pt->output_base = reg;
- wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
+ wrmsrq(MSR_IA32_RTIT_OUTPUT_BASE, reg);
}
reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);
if (pt->output_mask != reg) {
pt->output_mask = reg;
- wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+ wrmsrq(MSR_IA32_RTIT_OUTPUT_MASK, reg);
}
}
@@ -926,7 +927,7 @@ static void pt_handle_status(struct pt *pt)
int advance = 0;
u64 status;
- rdmsrl(MSR_IA32_RTIT_STATUS, status);
+ rdmsrq(MSR_IA32_RTIT_STATUS, status);
if (status & RTIT_STATUS_ERROR) {
pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
@@ -970,7 +971,7 @@ static void pt_handle_status(struct pt *pt)
if (advance)
pt_buffer_advance(buf);
- wrmsrl(MSR_IA32_RTIT_STATUS, status);
+ wrmsrq(MSR_IA32_RTIT_STATUS, status);
}
/**
@@ -985,12 +986,12 @@ static void pt_read_offset(struct pt_buffer *buf)
struct topa_page *tp;
if (!buf->single) {
- rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
+ rdmsrq(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
tp = phys_to_virt(pt->output_base);
buf->cur = &tp->topa;
}
- rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
+ rdmsrq(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
/* offset within current output region */
buf->output_off = pt->output_mask >> 32;
/* index of current output region within this table */
@@ -1585,7 +1586,7 @@ void intel_pt_handle_vmx(int on)
/* Turn PTs back on */
if (!on && event)
- wrmsrl(MSR_IA32_RTIT_CTL, event->hw.aux_config);
+ wrmsrq(MSR_IA32_RTIT_CTL, event->hw.aux_config);
local_irq_restore(flags);
}
@@ -1611,7 +1612,7 @@ static void pt_event_start(struct perf_event *event, int mode)
* PMI might have just cleared these, so resume_allowed
* must be checked again also.
*/
- rdmsrl(MSR_IA32_RTIT_STATUS, status);
+ rdmsrq(MSR_IA32_RTIT_STATUS, status);
if (!(status & (RTIT_STATUS_TRIGGEREN |
RTIT_STATUS_ERROR |
RTIT_STATUS_STOPPED)) &&
@@ -1839,7 +1840,7 @@ static __init int pt_init(void)
for_each_online_cpu(cpu) {
u64 ctl;
- ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
+ ret = rdmsrq_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
if (!ret && (ctl & RTIT_CTL_TRACEEN))
prior_warn++;
}
@@ -1863,6 +1864,8 @@ static __init int pt_init(void)
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
+ else
+ pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_PREFER_LARGE;
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE |
PERF_PMU_CAP_ITRACE |
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index a34e50fc4a8f..e0815a12db90 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -3,6 +3,7 @@
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/msr.h>
#include "uncore.h"
#include "uncore_discovery.h"
@@ -150,7 +151,7 @@ u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *eve
{
u64 count;
- rdmsrl(event->hw.event_base, count);
+ rdmsrq(event->hw.event_base, count);
return count;
}
@@ -305,17 +306,11 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
{
struct intel_uncore_box *box;
struct perf_event *event;
- unsigned long flags;
int bit;
box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
if (!box->n_active || box->cpu != smp_processor_id())
return HRTIMER_NORESTART;
- /*
- * disable local interrupt to prevent uncore_pmu_event_start/stop
- * to interrupt the update process
- */
- local_irq_save(flags);
/*
* handle boxes with an active event list as opposed to active
@@ -328,8 +323,6 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
uncore_perf_event_update(box, box->events[bit]);
- local_irq_restore(flags);
-
hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
return HRTIMER_RESTART;
}
@@ -337,7 +330,7 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
{
hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
- HRTIMER_MODE_REL_PINNED);
+ HRTIMER_MODE_REL_PINNED_HARD);
}
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
@@ -347,7 +340,7 @@ void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
{
- hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
}
static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 571e44b49691..18a3022f26a0 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -5,6 +5,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <asm/msr.h>
#include "uncore.h"
#include "uncore_discovery.h"
@@ -441,17 +442,17 @@ static u64 intel_generic_uncore_box_ctl(struct intel_uncore_box *box)
void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box)
{
- wrmsrl(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_INT);
+ wrmsrq(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_INT);
}
void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box)
{
- wrmsrl(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ);
+ wrmsrq(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ);
}
void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box)
{
- wrmsrl(intel_generic_uncore_box_ctl(box), 0);
+ wrmsrq(intel_generic_uncore_box_ctl(box), 0);
}
static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box,
@@ -459,7 +460,7 @@ static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box,
{
struct hw_perf_event *hwc = &event->hw;
- wrmsrl(hwc->config_base, hwc->config);
+ wrmsrq(hwc->config_base, hwc->config);
}
static void intel_generic_uncore_msr_disable_event(struct intel_uncore_box *box,
@@ -467,7 +468,7 @@ static void intel_generic_uncore_msr_disable_event(struct intel_uncore_box *box,
{
struct hw_perf_event *hwc = &event->hw;
- wrmsrl(hwc->config_base, 0);
+ wrmsrq(hwc->config_base, 0);
}
static struct intel_uncore_ops generic_uncore_msr_ops = {
diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
index 466833478e81..8962e7cb21e3 100644
--- a/arch/x86/events/intel/uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Nehalem-EX/Westmere-EX uncore support */
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include "uncore.h"
/* NHM-EX event control */
@@ -200,12 +201,12 @@ DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
{
- wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
+ wrmsrq(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
}
static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box)
{
- wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0);
+ wrmsrq(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0);
}
static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
@@ -214,12 +215,12 @@ static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
u64 config;
if (msr) {
- rdmsrl(msr, config);
+ rdmsrq(msr, config);
config &= ~((1ULL << uncore_num_counters(box)) - 1);
/* WBox has a fixed counter */
if (uncore_msr_fixed_ctl(box))
config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
- wrmsrl(msr, config);
+ wrmsrq(msr, config);
}
}
@@ -229,18 +230,18 @@ static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
u64 config;
if (msr) {
- rdmsrl(msr, config);
+ rdmsrq(msr, config);
config |= (1ULL << uncore_num_counters(box)) - 1;
/* WBox has a fixed counter */
if (uncore_msr_fixed_ctl(box))
config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
- wrmsrl(msr, config);
+ wrmsrq(msr, config);
}
}
static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
{
- wrmsrl(event->hw.config_base, 0);
+ wrmsrq(event->hw.config_base, 0);
}
static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
@@ -248,11 +249,11 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p
struct hw_perf_event *hwc = &event->hw;
if (hwc->idx == UNCORE_PMC_IDX_FIXED)
- wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
+ wrmsrq(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+ wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
else
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+ wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
}
#define NHMEX_UNCORE_OPS_COMMON_INIT() \
@@ -382,10 +383,10 @@ static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct per
struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
if (reg1->idx != EXTRA_REG_NONE) {
- wrmsrl(reg1->reg, reg1->config);
- wrmsrl(reg1->reg + 1, reg2->config);
+ wrmsrq(reg1->reg, reg1->config);
+ wrmsrq(reg1->reg + 1, reg2->config);
}
- wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+ wrmsrq(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
(hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
}
@@ -467,12 +468,12 @@ static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct per
struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
if (reg1->idx != EXTRA_REG_NONE) {
- wrmsrl(reg1->reg, 0);
- wrmsrl(reg1->reg + 1, reg1->config);
- wrmsrl(reg1->reg + 2, reg2->config);
- wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
+ wrmsrq(reg1->reg, 0);
+ wrmsrq(reg1->reg + 1, reg1->config);
+ wrmsrq(reg1->reg + 2, reg2->config);
+ wrmsrq(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
}
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+ wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
}
static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
@@ -842,25 +843,25 @@ static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct per
idx = __BITS_VALUE(reg1->idx, 0, 8);
if (idx != 0xff)
- wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
+ wrmsrq(__BITS_VALUE(reg1->reg, 0, 16),
nhmex_mbox_shared_reg_config(box, idx));
idx = __BITS_VALUE(reg1->idx, 1, 8);
if (idx != 0xff)
- wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
+ wrmsrq(__BITS_VALUE(reg1->reg, 1, 16),
nhmex_mbox_shared_reg_config(box, idx));
if (reg2->idx != EXTRA_REG_NONE) {
- wrmsrl(reg2->reg, 0);
+ wrmsrq(reg2->reg, 0);
if (reg2->config != ~0ULL) {
- wrmsrl(reg2->reg + 1,
+ wrmsrq(reg2->reg + 1,
reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
- wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
+ wrmsrq(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
(reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
- wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
+ wrmsrq(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
}
}
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+ wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
}
DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3");
@@ -1121,31 +1122,31 @@ static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct per
switch (idx % 6) {
case 0:
- wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
+ wrmsrq(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
break;
case 1:
- wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
+ wrmsrq(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
break;
case 2:
case 3:
- wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port),
+ wrmsrq(NHMEX_R_MSR_PORTN_QLX_CFG(port),
uncore_shared_reg_config(box, 2 + (idx / 6) * 5));
break;
case 4:
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
+ wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
hwc->config >> 32);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
+ wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
+ wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
break;
case 5:
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
+ wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
hwc->config >> 32);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
+ wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
+ wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
break;
}
- wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+ wrmsrq(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
(hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
}
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index edb7fd50efe0..a1a96833e30e 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
+#include <asm/msr.h>
#include "uncore.h"
#include "uncore_discovery.h"
@@ -260,34 +261,34 @@ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct per
struct hw_perf_event *hwc = &event->hw;
if (hwc->idx < UNCORE_PMC_IDX_FIXED)
- wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+ wrmsrq(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
else
- wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
+ wrmsrq(hwc->config_base, SNB_UNC_CTL_EN);
}
static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
{
- wrmsrl(event->hw.config_base, 0);
+ wrmsrq(event->hw.config_base, 0);
}
static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0) {
- wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+ wrmsrq(SNB_UNC_PERF_GLOBAL_CTL,
SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
}
}
static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
{
- wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+ wrmsrq(SNB_UNC_PERF_GLOBAL_CTL,
SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
}
static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
- wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0);
+ wrmsrq(SNB_UNC_PERF_GLOBAL_CTL, 0);
}
static struct uncore_event_desc snb_uncore_events[] = {
@@ -372,7 +373,7 @@ void snb_uncore_cpu_init(void)
static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0) {
- wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+ wrmsrq(SKL_UNC_PERF_GLOBAL_CTL,
SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
}
@@ -383,14 +384,14 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
{
- wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+ wrmsrq(SKL_UNC_PERF_GLOBAL_CTL,
SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
}
static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
- wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0);
+ wrmsrq(SKL_UNC_PERF_GLOBAL_CTL, 0);
}
static struct intel_uncore_ops skl_uncore_msr_ops = {
@@ -504,7 +505,7 @@ static int icl_get_cbox_num(void)
{
u64 num_boxes;
- rdmsrl(ICL_UNC_CBO_CONFIG, num_boxes);
+ rdmsrq(ICL_UNC_CBO_CONFIG, num_boxes);
return num_boxes & ICL_UNC_NUM_CBO_MASK;
}
@@ -525,7 +526,7 @@ static struct intel_uncore_type *tgl_msr_uncores[] = {
static void rkl_uncore_msr_init_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
- wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN);
+ wrmsrq(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN);
}
void tgl_uncore_cpu_init(void)
@@ -541,24 +542,24 @@ void tgl_uncore_cpu_init(void)
static void adl_uncore_msr_init_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
- wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN);
+ wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN);
}
static void adl_uncore_msr_enable_box(struct intel_uncore_box *box)
{
- wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN);
+ wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN);
}
static void adl_uncore_msr_disable_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
- wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0);
+ wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, 0);
}
static void adl_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
- wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0);
+ wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, 0);
}
static struct intel_uncore_ops adl_uncore_msr_ops = {
@@ -691,7 +692,7 @@ static struct intel_uncore_type mtl_uncore_hac_cbox = {
static void mtl_uncore_msr_init_box(struct intel_uncore_box *box)
{
- wrmsrl(uncore_msr_box_ctl(box), SNB_UNC_GLOBAL_CTL_EN);
+ wrmsrq(uncore_msr_box_ctl(box), SNB_UNC_GLOBAL_CTL_EN);
}
static struct intel_uncore_ops mtl_uncore_msr_ops = {
@@ -758,7 +759,7 @@ static struct intel_uncore_type *lnl_msr_uncores[] = {
static void lnl_uncore_msr_init_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
- wrmsrl(LNL_UNC_MSR_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN);
+ wrmsrq(LNL_UNC_MSR_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN);
}
static struct intel_uncore_ops lnl_uncore_msr_ops = {
@@ -1306,12 +1307,12 @@ int skl_uncore_pci_init(void)
/* Nehalem uncore support */
static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
{
- wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
+ wrmsrq(NHM_UNC_PERF_GLOBAL_CTL, 0);
}
static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
{
- wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
+ wrmsrq(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
}
static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
@@ -1319,9 +1320,9 @@ static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct per
struct hw_perf_event *hwc = &event->hw;
if (hwc->idx < UNCORE_PMC_IDX_FIXED)
- wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+ wrmsrq(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
else
- wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
+ wrmsrq(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
}
static struct attribute *nhm_uncore_formats_attr[] = {
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 76d96df1475a..2824dc9950be 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* SandyBridge-EP/IvyTown uncore support */
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include "uncore.h"
#include "uncore_discovery.h"
@@ -618,9 +619,9 @@ static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
msr = uncore_msr_box_ctl(box);
if (msr) {
- rdmsrl(msr, config);
+ rdmsrq(msr, config);
config |= SNBEP_PMON_BOX_CTL_FRZ;
- wrmsrl(msr, config);
+ wrmsrq(msr, config);
}
}
@@ -631,9 +632,9 @@ static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
msr = uncore_msr_box_ctl(box);
if (msr) {
- rdmsrl(msr, config);
+ rdmsrq(msr, config);
config &= ~SNBEP_PMON_BOX_CTL_FRZ;
- wrmsrl(msr, config);
+ wrmsrq(msr, config);
}
}
@@ -643,9 +644,9 @@ static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct p
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
if (reg1->idx != EXTRA_REG_NONE)
- wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0));
+ wrmsrq(reg1->reg, uncore_shared_reg_config(box, 0));
- wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+ wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
}
static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
@@ -653,7 +654,7 @@ static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
{
struct hw_perf_event *hwc = &event->hw;
- wrmsrl(hwc->config_base, hwc->config);
+ wrmsrq(hwc->config_base, hwc->config);
}
static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
@@ -661,7 +662,7 @@ static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
unsigned msr = uncore_msr_box_ctl(box);
if (msr)
- wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
+ wrmsrq(msr, SNBEP_PMON_BOX_CTL_INT);
}
static struct attribute *snbep_uncore_formats_attr[] = {
@@ -1532,7 +1533,7 @@ static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box)
{
unsigned msr = uncore_msr_box_ctl(box);
if (msr)
- wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT);
+ wrmsrq(msr, IVBEP_PMON_BOX_CTL_INT);
}
static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box)
@@ -1783,11 +1784,11 @@ static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_ev
if (reg1->idx != EXTRA_REG_NONE) {
u64 filter = uncore_shared_reg_config(box, 0);
- wrmsrl(reg1->reg, filter & 0xffffffff);
- wrmsrl(reg1->reg + 6, filter >> 32);
+ wrmsrq(reg1->reg, filter & 0xffffffff);
+ wrmsrq(reg1->reg + 6, filter >> 32);
}
- wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+ wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
}
static struct intel_uncore_ops ivbep_uncore_cbox_ops = {
@@ -2767,11 +2768,11 @@ static void hswep_cbox_enable_event(struct intel_uncore_box *box,
if (reg1->idx != EXTRA_REG_NONE) {
u64 filter = uncore_shared_reg_config(box, 0);
- wrmsrl(reg1->reg, filter & 0xffffffff);
- wrmsrl(reg1->reg + 1, filter >> 32);
+ wrmsrq(reg1->reg, filter & 0xffffffff);
+ wrmsrq(reg1->reg + 1, filter >> 32);
}
- wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+ wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
}
static struct intel_uncore_ops hswep_uncore_cbox_ops = {
@@ -2816,7 +2817,7 @@ static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box)
for_each_set_bit(i, (unsigned long *)&init, 64) {
flags |= (1ULL << i);
- wrmsrl(msr, flags);
+ wrmsrq(msr, flags);
}
}
}
@@ -3708,7 +3709,7 @@ static void skx_iio_enable_event(struct intel_uncore_box *box,
{
struct hw_perf_event *hwc = &event->hw;
- wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+ wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
}
static struct intel_uncore_ops skx_uncore_iio_ops = {
@@ -3765,7 +3766,7 @@ static int skx_msr_cpu_bus_read(int cpu, u64 *topology)
{
u64 msr_value;
- if (rdmsrl_on_cpu(cpu, SKX_MSR_CPU_BUS_NUMBER, &msr_value) ||
+ if (rdmsrq_on_cpu(cpu, SKX_MSR_CPU_BUS_NUMBER, &msr_value) ||
!(msr_value & SKX_MSR_CPU_BUS_VALID_BIT))
return -ENXIO;
@@ -4655,9 +4656,9 @@ static void snr_cha_enable_event(struct intel_uncore_box *box,
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
if (reg1->idx != EXTRA_REG_NONE)
- wrmsrl(reg1->reg, reg1->config);
+ wrmsrq(reg1->reg, reg1->config);
- wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+ wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
}
static struct intel_uncore_ops snr_uncore_chabox_ops = {
@@ -5882,9 +5883,9 @@ static void spr_uncore_msr_enable_event(struct intel_uncore_box *box,
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
if (reg1->idx != EXTRA_REG_NONE)
- wrmsrl(reg1->reg, reg1->config);
+ wrmsrq(reg1->reg, reg1->config);
- wrmsrl(hwc->config_base, hwc->config);
+ wrmsrq(hwc->config_base, hwc->config);
}
static void spr_uncore_msr_disable_event(struct intel_uncore_box *box,
@@ -5894,9 +5895,9 @@ static void spr_uncore_msr_disable_event(struct intel_uncore_box *box,
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
if (reg1->idx != EXTRA_REG_NONE)
- wrmsrl(reg1->reg, 0);
+ wrmsrq(reg1->reg, 0);
- wrmsrl(hwc->config_base, 0);
+ wrmsrq(hwc->config_base, 0);
}
static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event)
@@ -6485,7 +6486,7 @@ void spr_uncore_cpu_init(void)
* of UNCORE_SPR_CHA) is incorrect on some SPR variants because of a
* firmware bug. Using the value from SPR_MSR_UNC_CBO_CONFIG to replace it.
*/
- rdmsrl(SPR_MSR_UNC_CBO_CONFIG, num_cbo);
+ rdmsrq(SPR_MSR_UNC_CBO_CONFIG, num_cbo);
/*
* The MSR doesn't work on the EMR XCC, but the firmware bug doesn't impact
* the EMR XCC. Don't let the value from the MSR replace the existing value.
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 45b1866ff051..7f5007a4752a 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -3,6 +3,8 @@
#include <linux/sysfs.h>
#include <linux/nospec.h>
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
+
#include "probe.h"
enum perf_msr_id {
@@ -231,7 +233,7 @@ static inline u64 msr_read_counter(struct perf_event *event)
u64 now;
if (event->hw.event_base)
- rdmsrl(event->hw.event_base, now);
+ rdmsrq(event->hw.event_base, now);
else
now = rdtsc_ordered();
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 46d120597bab..2b969386dcdd 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -17,6 +17,7 @@
#include <asm/fpu/xstate.h>
#include <asm/intel_ds.h>
#include <asm/cpu.h>
+#include <asm/msr.h>
/* To enable MSR tracing please use the generic trace points. */
@@ -127,6 +128,11 @@ static inline bool is_pebs_counter_event_group(struct perf_event *event)
return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR);
}
+static inline bool is_acr_event_group(struct perf_event *event)
+{
+ return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR);
+}
+
struct amd_nb {
int nb_id; /* NorthBridge id */
int refcnt; /* reference count */
@@ -268,6 +274,7 @@ struct cpu_hw_events {
struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
int n_excl; /* the number of exclusive events */
+ int n_late_setup; /* the num of events needs late setup */
unsigned int txn_flags;
int is_fake;
@@ -293,6 +300,10 @@ struct cpu_hw_events {
u64 fixed_ctrl_val;
u64 active_fixed_ctrl_val;
+ /* Intel ACR configuration */
+ u64 acr_cfg_b[X86_PMC_IDX_MAX];
+ u64 acr_cfg_c[X86_PMC_IDX_MAX];
+
/*
* Intel LBR bits
*/
@@ -714,6 +725,15 @@ struct x86_hybrid_pmu {
u64 fixed_cntr_mask64;
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
+
+ union {
+ u64 acr_cntr_mask64;
+ unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
+ union {
+ u64 acr_cause_mask64;
+ unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
struct event_constraint unconstrained;
u64 hw_cache_event_ids
@@ -796,6 +816,10 @@ struct x86_pmu {
int (*hw_config)(struct perf_event *event);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
void (*late_setup)(void);
+ void (*pebs_enable)(struct perf_event *event);
+ void (*pebs_disable)(struct perf_event *event);
+ void (*pebs_enable_all)(void);
+ void (*pebs_disable_all)(void);
unsigned eventsel;
unsigned perfctr;
unsigned fixedctr;
@@ -812,6 +836,14 @@ struct x86_pmu {
u64 fixed_cntr_mask64;
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
+ union {
+ u64 acr_cntr_mask64;
+ unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
+ union {
+ u64 acr_cause_mask64;
+ unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
int cntval_bits;
u64 cntval_mask;
union {
@@ -878,7 +910,7 @@ struct x86_pmu {
*/
unsigned int bts :1,
bts_active :1,
- pebs :1,
+ ds_pebs :1,
pebs_active :1,
pebs_broken :1,
pebs_prec_dist :1,
@@ -1049,6 +1081,7 @@ do { \
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */
+#define PMU_FL_DYN_CONSTRAINT 0x800 /* Needs dynamic constraint */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -1091,6 +1124,7 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\
.pmu_type = _pmu, \
}
+int is_x86_event(struct perf_event *event);
struct pmu *x86_get_pmu(unsigned int cpu);
extern struct x86_pmu x86_pmu __read_mostly;
@@ -1098,6 +1132,10 @@ DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period);
DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update);
DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
DECLARE_STATIC_CALL(x86_pmu_late_setup, *x86_pmu.late_setup);
+DECLARE_STATIC_CALL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable);
+DECLARE_STATIC_CALL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable);
+DECLARE_STATIC_CALL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all);
+DECLARE_STATIC_CALL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all);
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
{
@@ -1205,16 +1243,16 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
if (hwc->extra_reg.reg)
- wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
+ wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config);
/*
* Add enabled Merge event on next counter
* if large increment event being enabled on this counter
*/
if (is_counter_pair(hwc))
- wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en);
+ wrmsrq(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en);
- wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
+ wrmsrq(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
}
void x86_pmu_enable_all(int added);
@@ -1230,10 +1268,10 @@ static inline void x86_pmu_disable_event(struct perf_event *event)
u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
struct hw_perf_event *hwc = &event->hw;
- wrmsrl(hwc->config_base, hwc->config & ~disable_mask);
+ wrmsrq(hwc->config_base, hwc->config & ~disable_mask);
if (is_counter_pair(hwc))
- wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);
+ wrmsrq(x86_pmu_config_addr(hwc->idx + 1), 0);
}
void x86_pmu_enable_event(struct perf_event *event);
@@ -1401,12 +1439,12 @@ static __always_inline void __amd_pmu_lbr_disable(void)
{
u64 dbg_ctl, dbg_extn_cfg;
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
+ rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ wrmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
}
}
@@ -1538,21 +1576,21 @@ static inline bool intel_pmu_has_bts(struct perf_event *event)
static __always_inline void __intel_pmu_pebs_disable_all(void)
{
- wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+ wrmsrq(MSR_IA32_PEBS_ENABLE, 0);
}
static __always_inline void __intel_pmu_arch_lbr_disable(void)
{
- wrmsrl(MSR_ARCH_LBR_CTL, 0);
+ wrmsrq(MSR_ARCH_LBR_CTL, 0);
}
static __always_inline void __intel_pmu_lbr_disable(void)
{
u64 debugctl;
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
}
int intel_pmu_save_and_restart(struct perf_event *event);
@@ -1587,6 +1625,8 @@ void intel_pmu_disable_bts(void);
int intel_pmu_drain_bts_buffer(void);
+void intel_pmu_late_setup(void);
+
u64 grt_latency_data(struct perf_event *event, u64 status);
u64 cmt_latency_data(struct perf_event *event, u64 status);
@@ -1643,11 +1683,13 @@ void intel_pmu_pebs_disable_all(void);
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
+void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc);
+
void intel_pmu_drain_pebs_buffer(void);
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
-void intel_ds_init(void);
+void intel_pebs_init(void);
void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
struct cpu_hw_events *cpuc,
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index 1d9e385649b5..70078334e4a3 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -2,23 +2,24 @@
/*
* struct hw_perf_event.flags flags
*/
-PERF_ARCH(PEBS_LDLAT, 0x00001) /* ld+ldlat data address sampling */
-PERF_ARCH(PEBS_ST, 0x00002) /* st data address sampling */
-PERF_ARCH(PEBS_ST_HSW, 0x00004) /* haswell style datala, store */
-PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */
-PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */
-PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */
-PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */
-PERF_ARCH(PEBS_CNTR, 0x00080) /* PEBS counters snapshot */
-PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */
-PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */
-PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */
-PERF_ARCH(PEBS_VIA_PT, 0x00800) /* use PT buffer for PEBS */
-PERF_ARCH(PAIR, 0x01000) /* Large Increment per Cycle */
-PERF_ARCH(LBR_SELECT, 0x02000) /* Save/Restore MSR_LBR_SELECT */
-PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
-PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
-PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
-PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
-PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
-PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */
+PERF_ARCH(PEBS_LDLAT, 0x0000001) /* ld+ldlat data address sampling */
+PERF_ARCH(PEBS_ST, 0x0000002) /* st data address sampling */
+PERF_ARCH(PEBS_ST_HSW, 0x0000004) /* haswell style datala, store */
+PERF_ARCH(PEBS_LD_HSW, 0x0000008) /* haswell style datala, load */
+PERF_ARCH(PEBS_NA_HSW, 0x0000010) /* haswell style datala, unknown */
+PERF_ARCH(EXCL, 0x0000020) /* HT exclusivity on counter */
+PERF_ARCH(DYNAMIC, 0x0000040) /* dynamic alloc'd constraint */
+PERF_ARCH(PEBS_CNTR, 0x0000080) /* PEBS counters snapshot */
+PERF_ARCH(EXCL_ACCT, 0x0000100) /* accounted EXCL event */
+PERF_ARCH(AUTO_RELOAD, 0x0000200) /* use PEBS auto-reload */
+PERF_ARCH(LARGE_PEBS, 0x0000400) /* use large PEBS */
+PERF_ARCH(PEBS_VIA_PT, 0x0000800) /* use PT buffer for PEBS */
+PERF_ARCH(PAIR, 0x0001000) /* Large Increment per Cycle */
+PERF_ARCH(LBR_SELECT, 0x0002000) /* Save/Restore MSR_LBR_SELECT */
+PERF_ARCH(TOPDOWN, 0x0004000) /* Count Topdown slots/metrics events */
+PERF_ARCH(PEBS_STLAT, 0x0008000) /* st+stlat data address sampling */
+PERF_ARCH(AMD_BRS, 0x0010000) /* AMD Branch Sampling */
+PERF_ARCH(PEBS_LAT_HYBRID, 0x0020000) /* ld and st lat for hybrid */
+PERF_ARCH(NEEDS_BRANCH_STACK, 0x0040000) /* require branch stack setup */
+PERF_ARCH(BRANCH_COUNTERS, 0x0080000) /* logs the counters in the extra space of each branch */
+PERF_ARCH(ACR, 0x0100000) /* Auto counter reload */
diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c
index 600bf8d15c0c..bb719d0d3f0b 100644
--- a/arch/x86/events/probe.c
+++ b/arch/x86/events/probe.c
@@ -2,6 +2,8 @@
#include <linux/export.h>
#include <linux/types.h>
#include <linux/bits.h>
+
+#include <asm/msr.h>
#include "probe.h"
static umode_t
@@ -43,7 +45,7 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
if (msr[bit].test && !msr[bit].test(bit, data))
continue;
/* Virt sucks; you cannot tell if a R/O MSR is present :/ */
- if (rdmsrl_safe(msr[bit].msr, &val))
+ if (rdmsrq_safe(msr[bit].msr, &val))
continue;
mask = msr[bit].mask;
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 8ddace8cea96..defd86137f12 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -65,6 +65,7 @@
#include <linux/nospec.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/msr.h>
#include "perf_event.h"
#include "probe.h"
@@ -192,7 +193,7 @@ static inline unsigned int get_rapl_pmu_idx(int cpu, int scope)
static inline u64 rapl_read_counter(struct perf_event *event)
{
u64 raw;
- rdmsrl(event->hw.event_base, raw);
+ rdmsrq(event->hw.event_base, raw);
return raw;
}
@@ -221,7 +222,7 @@ static u64 rapl_event_update(struct perf_event *event)
prev_raw_count = local64_read(&hwc->prev_count);
do {
- rdmsrl(event->hw.event_base, new_raw_count);
+ rdmsrq(event->hw.event_base, new_raw_count);
} while (!local64_try_cmpxchg(&hwc->prev_count,
&prev_raw_count, new_raw_count));
@@ -610,8 +611,8 @@ static int rapl_check_hw_unit(void)
u64 msr_rapl_power_unit_bits;
int i;
- /* protect rdmsrl() to handle virtualization */
- if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits))
+ /* protect rdmsrq() to handle virtualization */
+ if (rdmsrq_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits))
return -1;
for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++)
rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c
index dab4ed199227..77fd00b3305e 100644
--- a/arch/x86/events/utils.c
+++ b/arch/x86/events/utils.c
@@ -2,6 +2,7 @@
#include <asm/insn.h>
#include <linux/mm.h>
+#include <asm/msr.h>
#include "perf_event.h"
static int decode_branch_type(struct insn *insn)
diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
index 2fd9b0cf9a5e..4bdfcf091200 100644
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -15,6 +15,7 @@
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
#include <asm/apic.h>
+#include <asm/msr.h>
#include "../perf_event.h"
@@ -254,26 +255,26 @@ static __initconst const u64 zxe_hw_cache_event_ids
static void zhaoxin_pmu_disable_all(void)
{
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0);
}
static void zhaoxin_pmu_enable_all(int added)
{
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
}
static inline u64 zhaoxin_pmu_get_status(void)
{
u64 status;
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+ rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status);
return status;
}
static inline void zhaoxin_pmu_ack_status(u64 ack)
{
- wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}
static inline void zxc_pmu_ack_status(u64 ack)
@@ -293,9 +294,9 @@ static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc)
mask = 0xfULL << (idx * 4);
- rdmsrl(hwc->config_base, ctrl_val);
+ rdmsrq(hwc->config_base, ctrl_val);
ctrl_val &= ~mask;
- wrmsrl(hwc->config_base, ctrl_val);
+ wrmsrq(hwc->config_base, ctrl_val);
}
static void zhaoxin_pmu_disable_event(struct perf_event *event)
@@ -329,10 +330,10 @@ static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
bits <<= (idx * 4);
mask = 0xfULL << (idx * 4);
- rdmsrl(hwc->config_base, ctrl_val);
+ rdmsrq(hwc->config_base, ctrl_val);
ctrl_val &= ~mask;
ctrl_val |= bits;
- wrmsrl(hwc->config_base, ctrl_val);
+ wrmsrq(hwc->config_base, ctrl_val);
}
static void zhaoxin_pmu_enable_event(struct perf_event *event)
@@ -397,8 +398,7 @@ again:
if (!x86_perf_event_set_period(event))
continue;
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
/*
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 6d91ac5f9836..bfde0a3498b9 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -28,6 +28,7 @@
#include <asm/hypervisor.h>
#include <asm/mshyperv.h>
#include <asm/apic.h>
+#include <asm/msr.h>
#include <asm/trace/hyperv.h>
@@ -37,7 +38,7 @@ static u64 hv_apic_icr_read(void)
{
u64 reg_val;
- rdmsrl(HV_X64_MSR_ICR, reg_val);
+ rdmsrq(HV_X64_MSR_ICR, reg_val);
return reg_val;
}
@@ -49,7 +50,7 @@ static void hv_apic_icr_write(u32 low, u32 id)
reg_val = reg_val << 32;
reg_val |= low;
- wrmsrl(HV_X64_MSR_ICR, reg_val);
+ wrmsrq(HV_X64_MSR_ICR, reg_val);
}
static u32 hv_apic_read(u32 reg)
@@ -75,10 +76,10 @@ static void hv_apic_write(u32 reg, u32 val)
{
switch (reg) {
case APIC_EOI:
- wrmsr(HV_X64_MSR_EOI, val, 0);
+ wrmsrq(HV_X64_MSR_EOI, val);
break;
case APIC_TASKPRI:
- wrmsr(HV_X64_MSR_TPR, val, 0);
+ wrmsrq(HV_X64_MSR_TPR, val);
break;
default:
native_apic_mem_write(reg, val);
@@ -92,7 +93,7 @@ static void hv_apic_eoi_write(void)
if (hvp && (xchg(&hvp->apic_assist, 0) & 0x1))
return;
- wrmsr(HV_X64_MSR_EOI, APIC_EOI_ACK, 0);
+ wrmsrq(HV_X64_MSR_EOI, APIC_EOI_ACK);
}
static bool cpu_is_self(int cpu)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index ddeb40930bc8..5d27194a2efa 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -21,6 +21,7 @@
#include <asm/hypervisor.h>
#include <hyperv/hvhdk.h>
#include <asm/mshyperv.h>
+#include <asm/msr.h>
#include <asm/idtentry.h>
#include <asm/set_memory.h>
#include <linux/kexec.h>
@@ -62,7 +63,7 @@ static int hyperv_init_ghcb(void)
* returned by MSR_AMD64_SEV_ES_GHCB is above shared
* memory boundary and map it here.
*/
- rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
+ rdmsrq(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
ghcb_gpa &= ~ms_hyperv.shared_gpa_boundary;
@@ -95,7 +96,7 @@ static int hv_cpu_init(unsigned int cpu)
* For root partition we get the hypervisor provided VP assist
* page, instead of allocating a new page.
*/
- rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+ rdmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
*hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
PAGE_SIZE, MEMREMAP_WB);
} else {
@@ -128,7 +129,7 @@ static int hv_cpu_init(unsigned int cpu)
}
if (!WARN_ON(!(*hvp))) {
msr.enable = 1;
- wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+ wrmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
}
return hyperv_init_ghcb();
@@ -140,7 +141,7 @@ static void hv_reenlightenment_notify(struct work_struct *dummy)
{
struct hv_tsc_emulation_status emu_status;
- rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+ rdmsrq(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
/* Don't issue the callback if TSC accesses are not emulated */
if (hv_reenlightenment_cb && emu_status.inprogress)
@@ -153,11 +154,11 @@ void hyperv_stop_tsc_emulation(void)
u64 freq;
struct hv_tsc_emulation_status emu_status;
- rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+ rdmsrq(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
emu_status.inprogress = 0;
- wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+ wrmsrq(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
- rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
+ rdmsrq(HV_X64_MSR_TSC_FREQUENCY, freq);
tsc_khz = div64_u64(freq, 1000);
}
EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation);
@@ -203,8 +204,8 @@ void set_hv_tscchange_cb(void (*cb)(void))
re_ctrl.target_vp = hv_vp_index[get_cpu()];
- wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
- wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl));
+ wrmsrq(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ wrmsrq(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl));
put_cpu();
}
@@ -217,9 +218,9 @@ void clear_hv_tscchange_cb(void)
if (!hv_reenlightenment_available())
return;
- rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+ rdmsrq(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
re_ctrl.enabled = 0;
- wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+ wrmsrq(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
hv_reenlightenment_cb = NULL;
}
@@ -251,16 +252,16 @@ static int hv_cpu_die(unsigned int cpu)
*/
memunmap(hv_vp_assist_page[cpu]);
hv_vp_assist_page[cpu] = NULL;
- rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+ rdmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
msr.enable = 0;
}
- wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+ wrmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
}
if (hv_reenlightenment_cb == NULL)
return 0;
- rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ rdmsrq(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
if (re_ctrl.target_vp == hv_vp_index[cpu]) {
/*
* Reassign reenlightenment notifications to some other online
@@ -274,7 +275,7 @@ static int hv_cpu_die(unsigned int cpu)
else
re_ctrl.enabled = 0;
- wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ wrmsrq(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
}
return 0;
@@ -331,9 +332,9 @@ static int hv_suspend(void)
hv_hypercall_pg = NULL;
/* Disable the hypercall page in the hypervisor */
- rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
hypercall_msr.enable = 0;
- wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
ret = hv_cpu_die(0);
return ret;
@@ -348,11 +349,11 @@ static void hv_resume(void)
WARN_ON(ret);
/* Re-enable the hypercall page */
- rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
hypercall_msr.enable = 1;
hypercall_msr.guest_physical_address =
vmalloc_to_pfn(hv_hypercall_pg_saved);
- wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
hv_hypercall_pg = hv_hypercall_pg_saved;
hv_hypercall_pg_saved = NULL;
@@ -499,7 +500,7 @@ void __init hyperv_init(void)
* in such a VM and is only used in such a VM.
*/
guest_id = hv_generate_guest_id(LINUX_VERSION_CODE);
- wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
+ wrmsrq(HV_X64_MSR_GUEST_OS_ID, guest_id);
/* With the paravisor, the VM must also write the ID via GHCB/GHCI */
hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
@@ -515,7 +516,7 @@ void __init hyperv_init(void)
if (hv_hypercall_pg == NULL)
goto clean_guest_os_id;
- rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
hypercall_msr.enable = 1;
if (hv_root_partition()) {
@@ -532,7 +533,7 @@ void __init hyperv_init(void)
* so it is populated with code, then copy the code to an
* executable page.
*/
- wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
pg = vmalloc_to_page(hv_hypercall_pg);
src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE,
@@ -544,7 +545,7 @@ void __init hyperv_init(void)
hv_remap_tsc_clocksource();
} else {
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
- wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
}
skip_hypercall_pg_init:
@@ -608,7 +609,7 @@ skip_hypercall_pg_init:
return;
clean_guest_os_id:
- wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
+ wrmsrq(HV_X64_MSR_GUEST_OS_ID, 0);
hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
cpuhp_remove_state(CPUHP_AP_HYPERV_ONLINE);
free_ghcb_page:
@@ -629,7 +630,7 @@ void hyperv_cleanup(void)
union hv_reference_tsc_msr tsc_msr;
/* Reset our OS id */
- wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
+ wrmsrq(HV_X64_MSR_GUEST_OS_ID, 0);
hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
/*
@@ -667,18 +668,18 @@ void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die)
return;
panic_reported = true;
- rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
+ rdmsrq(HV_X64_MSR_GUEST_OS_ID, guest_id);
- wrmsrl(HV_X64_MSR_CRASH_P0, err);
- wrmsrl(HV_X64_MSR_CRASH_P1, guest_id);
- wrmsrl(HV_X64_MSR_CRASH_P2, regs->ip);
- wrmsrl(HV_X64_MSR_CRASH_P3, regs->ax);
- wrmsrl(HV_X64_MSR_CRASH_P4, regs->sp);
+ wrmsrq(HV_X64_MSR_CRASH_P0, err);
+ wrmsrq(HV_X64_MSR_CRASH_P1, guest_id);
+ wrmsrq(HV_X64_MSR_CRASH_P2, regs->ip);
+ wrmsrq(HV_X64_MSR_CRASH_P3, regs->ax);
+ wrmsrq(HV_X64_MSR_CRASH_P4, regs->sp);
/*
* Let Hyper-V know there is crash data available
*/
- wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY);
+ wrmsrq(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY);
}
EXPORT_SYMBOL_GPL(hyperv_report_panic);
@@ -701,7 +702,7 @@ bool hv_is_hyperv_initialized(void)
* that the hypercall page is setup
*/
hypercall_msr.as_uint64 = 0;
- rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
return hypercall_msr.enable;
}
diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c
index 151e851bef09..81b006601370 100644
--- a/arch/x86/hyperv/hv_spinlock.c
+++ b/arch/x86/hyperv/hv_spinlock.c
@@ -15,6 +15,7 @@
#include <asm/mshyperv.h>
#include <asm/paravirt.h>
#include <asm/apic.h>
+#include <asm/msr.h>
static bool hv_pvspin __initdata = true;
@@ -39,18 +40,18 @@ static void hv_qlock_wait(u8 *byte, u8 val)
* To prevent a race against the unlock path it is required to
* disable interrupts before accessing the HV_X64_MSR_GUEST_IDLE
* MSR. Otherwise, if the IPI from hv_qlock_kick() arrives between
- * the lock value check and the rdmsrl() then the vCPU might be put
+ * the lock value check and the rdmsrq() then the vCPU might be put
* into 'idle' state by the hypervisor and kept in that state for
* an unspecified amount of time.
*/
local_irq_save(flags);
/*
- * Only issue the rdmsrl() when the lock state has not changed.
+ * Only issue the rdmsrq() when the lock state has not changed.
*/
if (READ_ONCE(*byte) == val) {
unsigned long msr_val;
- rdmsrl(HV_X64_MSR_GUEST_IDLE, msr_val);
+ rdmsrq(HV_X64_MSR_GUEST_IDLE, msr_val);
(void)msr_val;
}
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 13242ed8ff16..4580936dcb03 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -11,6 +11,7 @@
#include <asm/desc.h>
#include <asm/i8259.h>
#include <asm/mshyperv.h>
+#include <asm/msr.h>
#include <asm/realmode.h>
#include <asm/reboot.h>
#include <../kernel/smpboot.h>
@@ -149,11 +150,11 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored)
input->vp_context.rip = rip;
input->vp_context.rsp = rsp;
input->vp_context.rflags = 0x0000000000000002;
- input->vp_context.efer = __rdmsr(MSR_EFER);
+ input->vp_context.efer = native_rdmsrq(MSR_EFER);
input->vp_context.cr0 = native_read_cr0();
input->vp_context.cr3 = __native_read_cr3();
input->vp_context.cr4 = native_read_cr4();
- input->vp_context.msr_cr_pat = __rdmsr(MSR_IA32_CR_PAT);
+ input->vp_context.msr_cr_pat = native_rdmsrq(MSR_IA32_CR_PAT);
input->vp_context.idtr.limit = idt_ptr.size;
input->vp_context.idtr.base = idt_ptr.address;
input->vp_context.gdtr.limit = gdt_ptr.size;
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 77bf05f06b9e..09a165a3c41e 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -22,6 +22,7 @@
#include <asm/realmode.h>
#include <asm/e820/api.h>
#include <asm/desc.h>
+#include <asm/msr.h>
#include <uapi/asm/vmx.h>
#ifdef CONFIG_AMD_MEM_ENCRYPT
@@ -110,12 +111,12 @@ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
static inline u64 rd_ghcb_msr(void)
{
- return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
+ return native_rdmsrq(MSR_AMD64_SEV_ES_GHCB);
}
static inline void wr_ghcb_msr(u64 val)
{
- native_wrmsrl(MSR_AMD64_SEV_ES_GHCB, val);
+ native_wrmsrq(MSR_AMD64_SEV_ES_GHCB, val);
}
static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code,
diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h
index 1dd14381bcb6..fab11192c60a 100644
--- a/arch/x86/include/asm/acrn.h
+++ b/arch/x86/include/asm/acrn.h
@@ -25,7 +25,7 @@ void acrn_remove_intr_handler(void);
static inline u32 acrn_cpuid_base(void)
{
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return hypervisor_cpuid_base("ACRNACRNACRN", 0);
+ return cpuid_base_hypervisor("ACRNACRNACRN", 0);
return 0;
}
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 4a37a8bd87fd..15bc07a5ebb3 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -6,6 +6,7 @@
#include <linux/stringify.h>
#include <linux/objtool.h>
#include <asm/asm.h>
+#include <asm/bug.h>
#define ALT_FLAGS_SHIFT 16
@@ -82,6 +83,12 @@ struct alt_instr {
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+extern s32 __retpoline_sites[], __retpoline_sites_end[];
+extern s32 __return_sites[], __return_sites_end[];
+extern s32 __cfi_sites[], __cfi_sites_end[];
+extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
+extern s32 __smp_locks[], __smp_locks_end[];
+
/*
* Debug flag that can be tested to see whether alternative
* instructions were patched in already:
@@ -124,6 +131,37 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
}
#endif
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_init_mod(struct module *mod);
+extern void its_fini_mod(struct module *mod);
+extern void its_free_mod(struct module *mod);
+extern u8 *its_static_thunk(int reg);
+#else /* CONFIG_MITIGATION_ITS */
+static inline void its_init_mod(struct module *mod) { }
+static inline void its_fini_mod(struct module *mod) { }
+static inline void its_free_mod(struct module *mod) { }
+static inline u8 *its_static_thunk(int reg)
+{
+ WARN_ONCE(1, "ITS not compiled in");
+
+ return NULL;
+}
+#endif
+
+#if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL)
+extern bool cpu_wants_rethunk(void);
+extern bool cpu_wants_rethunk_at(void *addr);
+#else
+static __always_inline bool cpu_wants_rethunk(void)
+{
+ return false;
+}
+static __always_inline bool cpu_wants_rethunk_at(void *addr)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_SMP
extern void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
@@ -335,11 +373,6 @@ void nop_func(void);
__ALTERNATIVE(\oldinstr, \newinstr, \ft_flags)
.endm
-#define old_len 141b-140b
-#define new_len1 144f-143f
-#define new_len2 145f-144f
-#define new_len3 146f-145f
-
/*
* Same as ALTERNATIVE macro above but for two alternatives. If CPU
* has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
diff --git a/arch/x86/include/asm/amd/fch.h b/arch/x86/include/asm/amd/fch.h
new file mode 100644
index 000000000000..2cf5153edbc2
--- /dev/null
+++ b/arch/x86/include/asm/amd/fch.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_AMD_FCH_H_
+#define _ASM_X86_AMD_FCH_H_
+
+#define FCH_PM_BASE 0xFED80300
+
+/* Register offsets from PM base: */
+#define FCH_PM_DECODEEN 0x00
+#define FCH_PM_DECODEEN_SMBUS0SEL GENMASK(20, 19)
+#define FCH_PM_SCRATCH 0x80
+#define FCH_PM_S5_RESET_STATUS 0xC0
+
+#endif /* _ASM_X86_AMD_FCH_H_ */
diff --git a/arch/x86/include/asm/amd_hsmp.h b/arch/x86/include/asm/amd/hsmp.h
index 03c2ce3edaf5..2137f62853ed 100644
--- a/arch/x86/include/asm/amd_hsmp.h
+++ b/arch/x86/include/asm/amd/hsmp.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-
#ifndef _ASM_X86_AMD_HSMP_H_
#define _ASM_X86_AMD_HSMP_H_
@@ -13,4 +12,5 @@ static inline int hsmp_send_message(struct hsmp_message *msg)
return -ENODEV;
}
#endif
+
#endif /*_ASM_X86_AMD_HSMP_H_*/
diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd/ibs.h
index 77f3a589a99a..3ee5903982c2 100644
--- a/arch/x86/include/asm/amd-ibs.h
+++ b/arch/x86/include/asm/amd/ibs.h
@@ -1,4 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_AMD_IBS_H
+#define _ASM_X86_AMD_IBS_H
+
/*
* From PPR Vol 1 for AMD Family 19h Model 01h B1
* 55898 Rev 0.35 - Feb 5, 2021
@@ -151,3 +154,5 @@ struct perf_ibs_data {
};
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
};
+
+#endif /* _ASM_X86_AMD_IBS_H */
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd/nb.h
index adfa0854cf2d..ddb5108cf46c 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd/nb.h
@@ -4,7 +4,7 @@
#include <linux/ioport.h>
#include <linux/pci.h>
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
struct amd_nb_bus_dev_range {
u8 bus;
diff --git a/arch/x86/include/asm/amd_node.h b/arch/x86/include/asm/amd/node.h
index 23fe617898a8..23fe617898a8 100644
--- a/arch/x86/include/asm/amd_node.h
+++ b/arch/x86/include/asm/amd/node.h
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c903d358405d..68e10e30fe9b 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -120,7 +120,7 @@ static inline bool apic_is_x2apic_enabled(void)
{
u64 msr;
- if (rdmsrl_safe(MSR_IA32_APICBASE, &msr))
+ if (rdmsrq_safe(MSR_IA32_APICBASE, &msr))
return false;
return msr & X2APIC_ENABLE;
}
@@ -209,12 +209,12 @@ static inline void native_apic_msr_write(u32 reg, u32 v)
reg == APIC_LVR)
return;
- wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
+ wrmsrq(APIC_BASE_MSR + (reg >> 4), v);
}
static inline void native_apic_msr_eoi(void)
{
- __wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
+ native_wrmsrq(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK);
}
static inline u32 native_apic_msr_read(u32 reg)
@@ -224,20 +224,20 @@ static inline u32 native_apic_msr_read(u32 reg)
if (reg == APIC_DFR)
return -1;
- rdmsrl(APIC_BASE_MSR + (reg >> 4), msr);
+ rdmsrq(APIC_BASE_MSR + (reg >> 4), msr);
return (u32)msr;
}
static inline void native_x2apic_icr_write(u32 low, u32 id)
{
- wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+ wrmsrq(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
}
static inline u64 native_x2apic_icr_read(void)
{
unsigned long val;
- rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+ rdmsrq(APIC_BASE_MSR + (APIC_ICR >> 4), val);
return val;
}
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index cbc6157f0b4b..b5982b94bdba 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -16,8 +16,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;
- asm_inline (ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE
- "call __sw_hweight32",
+ asm_inline (ALTERNATIVE("call __sw_hweight32",
"popcntl %[val], %[cnt]", X86_FEATURE_POPCNT)
: [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT
: [val] REG_IN (w));
@@ -46,8 +45,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
{
unsigned long res;
- asm_inline (ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE
- "call __sw_hweight64",
+ asm_inline (ALTERNATIVE("call __sw_hweight64",
"popcntq %[val], %[cnt]", X86_FEATURE_POPCNT)
: [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT
: [val] REG_IN (w));
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index cc2881576c2c..f963848024a5 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -114,17 +114,12 @@
#endif
#ifndef __ASSEMBLER__
-#ifndef __pic__
static __always_inline __pure void *rip_rel_ptr(void *p)
{
asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p));
return p;
}
-#define RIP_REL_REF(var) (*(typeof(&(var)))rip_rel_ptr(&(var)))
-#else
-#define RIP_REL_REF(var) (var)
-#endif
#endif
/*
@@ -243,5 +238,24 @@ register unsigned long current_stack_pointer asm(_ASM_SP);
#define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT)
+/*
+ * Both i386 and x86_64 returns 64-bit values in edx:eax for certain
+ * instructions, but GCC's "A" constraint has different meanings.
+ * For i386, "A" means exactly edx:eax, while for x86_64 it
+ * means rax *or* rdx.
+ *
+ * These helpers wrapping these semantic differences save one instruction
+ * clearing the high half of 'low':
+ */
+#ifdef CONFIG_X86_64
+# define EAX_EDX_DECLARE_ARGS(val, low, high) unsigned long low, high
+# define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32)
+# define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
+#else
+# define EAX_EDX_DECLARE_ARGS(val, low, high) u64 val
+# define EAX_EDX_VAL(val, low, high) (val)
+# define EAX_EDX_RET(val, low, high) "=A" (val)
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 100413aff640..eebbc8889e70 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -248,7 +248,7 @@ arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
static __always_inline unsigned long variable__ffs(unsigned long word)
{
- asm("rep; bsf %1,%0"
+ asm("tzcnt %1,%0"
: "=r" (word)
: ASM_INPUT_RM (word));
return word;
@@ -267,10 +267,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
static __always_inline unsigned long variable_ffz(unsigned long word)
{
- asm("rep; bsf %1,%0"
- : "=r" (word)
- : "r" (~word));
- return word;
+ return variable__ffs(~word);
}
/**
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 3f02ff6d333d..02b23aa78955 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -74,6 +74,11 @@
# define BOOT_STACK_SIZE 0x1000
#endif
+#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
+
+#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0
+
#ifndef __ASSEMBLER__
extern unsigned int output_len;
extern const unsigned long kernel_text_size;
@@ -83,6 +88,11 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
void (*error)(char *x));
extern struct boot_params *boot_params_ptr;
+extern unsigned long *trampoline_32bit;
+extern const u16 trampoline_ljmp_imm_offset;
+
+void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
+
#endif
#endif /* _ASM_X86_BOOT_H */
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index e7225452963f..e1dbf8df1b69 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -22,7 +22,7 @@ static inline u64 cc_get_mask(void)
static inline void cc_set_mask(u64 mask)
{
- RIP_REL_REF(cc_mask) = mask;
+ cc_mask = mask;
}
u64 cc_mkenc(u64 val);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6c2c152d8a67..5b50e0e35129 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -75,7 +75,7 @@
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */
#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */
#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */
-/* Free ( 3*32+ 6) */
+#define X86_FEATURE_ZEN6 ( 3*32+ 6) /* CPU based on Zen6 microarchitecture */
/* Free ( 3*32+ 7) */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */
#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */
@@ -476,11 +476,13 @@
#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */
#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
-#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
-#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
-#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
-#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
-#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */
+#define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */
+#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */
+#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */
+#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
+#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
+#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
/*
* BUG word(s)
@@ -519,7 +521,7 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */
@@ -527,10 +529,14 @@
#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */
/* BUG word 2 */
-#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */
-#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */
-#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
-#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
-#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
-#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
+#define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */
+#define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */
+#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */
+#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
+#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
+#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
+#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
+
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h
index 9c180c9cc58e..44fa82e1267c 100644
--- a/arch/x86/include/asm/cpuid/api.h
+++ b/arch/x86/include/asm/cpuid/api.h
@@ -14,9 +14,9 @@
*/
#ifdef CONFIG_X86_32
-bool have_cpuid_p(void);
+bool cpuid_feature(void);
#else
-static inline bool have_cpuid_p(void)
+static inline bool cpuid_feature(void)
{
return true;
}
@@ -36,9 +36,9 @@ static inline void native_cpuid(u32 *eax, u32 *ebx,
}
#define NATIVE_CPUID_REG(reg) \
-static inline u32 native_cpuid_##reg(u32 op) \
+static inline u32 native_cpuid_##reg(u32 op) \
{ \
- u32 eax = op, ebx, ecx = 0, edx; \
+ u32 eax = op, ebx, ecx = 0, edx; \
\
native_cpuid(&eax, &ebx, &ecx, &edx); \
\
@@ -160,6 +160,10 @@ static inline void __cpuid_read_reg(u32 leaf, u32 subleaf,
__cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \
}
+/*
+ * Hypervisor-related APIs:
+ */
+
static __always_inline bool cpuid_function_is_indexed(u32 function)
{
switch (function) {
@@ -184,14 +188,14 @@ static __always_inline bool cpuid_function_is_indexed(u32 function)
return false;
}
-#define for_each_possible_hypervisor_cpuid_base(function) \
+#define for_each_possible_cpuid_base_hypervisor(function) \
for (function = 0x40000000; function < 0x40010000; function += 0x100)
-static inline u32 hypervisor_cpuid_base(const char *sig, u32 leaves)
+static inline u32 cpuid_base_hypervisor(const char *sig, u32 leaves)
{
u32 base, eax, signature[3];
- for_each_possible_hypervisor_cpuid_base(base) {
+ for_each_possible_cpuid_base_hypervisor(base) {
cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
/*
@@ -207,4 +211,82 @@ static inline u32 hypervisor_cpuid_base(const char *sig, u32 leaves)
return 0;
}
+/*
+ * CPUID(0x2) parsing:
+ */
+
+/**
+ * cpuid_leaf_0x2() - Return sanitized CPUID(0x2) register output
+ * @regs: Output parameter
+ *
+ * Query CPUID(0x2) and store its output in @regs. Force set any
+ * invalid 1-byte descriptor returned by the hardware to zero (the NULL
+ * cache/TLB descriptor) before returning it to the caller.
+ *
+ * Use for_each_cpuid_0x2_desc() to iterate over the register output in
+ * parsed form.
+ */
+static inline void cpuid_leaf_0x2(union leaf_0x2_regs *regs)
+{
+ cpuid_leaf(0x2, regs);
+
+ /*
+ * All Intel CPUs must report an iteration count of 1. In case
+ * of bogus hardware, treat all returned descriptors as NULL.
+ */
+ if (regs->desc[0] != 0x01) {
+ for (int i = 0; i < 4; i++)
+ regs->regv[i] = 0;
+ return;
+ }
+
+ /*
+ * The most significant bit (MSB) of each register must be clear.
+ * If a register is invalid, replace its descriptors with NULL.
+ */
+ for (int i = 0; i < 4; i++) {
+ if (regs->reg[i].invalid)
+ regs->regv[i] = 0;
+ }
+}
+
+/**
+ * for_each_cpuid_0x2_desc() - Iterator for parsed CPUID(0x2) descriptors
+ * @_regs: CPUID(0x2) register output, as returned by cpuid_leaf_0x2()
+ * @_ptr: u8 pointer, for macro internal use only
+ * @_desc: Pointer to the parsed CPUID(0x2) descriptor at each iteration
+ *
+ * Loop over the 1-byte descriptors in the passed CPUID(0x2) output registers
+ * @_regs. Provide the parsed information for each descriptor through @_desc.
+ *
+ * To handle cache-specific descriptors, switch on @_desc->c_type. For TLB
+ * descriptors, switch on @_desc->t_type.
+ *
+ * Example usage for cache descriptors::
+ *
+ * const struct leaf_0x2_table *desc;
+ * union leaf_0x2_regs regs;
+ * u8 *ptr;
+ *
+ * cpuid_leaf_0x2(&regs);
+ * for_each_cpuid_0x2_desc(regs, ptr, desc) {
+ * switch (desc->c_type) {
+ * ...
+ * }
+ * }
+ */
+#define for_each_cpuid_0x2_desc(_regs, _ptr, _desc) \
+ for (_ptr = &(_regs).desc[1]; \
+ _ptr < &(_regs).desc[16] && (_desc = &cpuid_0x2_table[*_ptr]); \
+ _ptr++)
+
+/*
+ * CPUID(0x80000006) parsing:
+ */
+
+static inline bool cpuid_amd_hygon_has_l3_cache(void)
+{
+ return cpuid_edx(0x80000006);
+}
+
#endif /* _ASM_X86_CPUID_API_H */
diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h
index 8582e27e836d..8a00364b79de 100644
--- a/arch/x86/include/asm/cpuid/types.h
+++ b/arch/x86/include/asm/cpuid/types.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_CPUID_TYPES_H
#define _ASM_X86_CPUID_TYPES_H
+#include <linux/build_bug.h>
#include <linux/types.h>
/*
@@ -29,4 +30,98 @@ enum cpuid_regs_idx {
#define CPUID_LEAF_FREQ 0x16
#define CPUID_LEAF_TILE 0x1d
+/*
+ * Types for CPUID(0x2) parsing:
+ */
+
+struct leaf_0x2_reg {
+ u32 : 31,
+ invalid : 1;
+};
+
+union leaf_0x2_regs {
+ struct leaf_0x2_reg reg[4];
+ u32 regv[4];
+ u8 desc[16];
+};
+
+/*
+ * Leaf 0x2 1-byte descriptors' cache types
+ * To be used for their mappings at cpuid_0x2_table[]
+ *
+ * Start at 1 since type 0 is reserved for HW byte descriptors which are
+ * not recognized by the kernel; i.e., those without an explicit mapping.
+ */
+enum _cache_table_type {
+ CACHE_L1_INST = 1,
+ CACHE_L1_DATA,
+ CACHE_L2,
+ CACHE_L3
+ /* Adjust __TLB_TABLE_TYPE_BEGIN before adding more types */
+} __packed;
+#ifndef __CHECKER__
+static_assert(sizeof(enum _cache_table_type) == 1);
+#endif
+
+/*
+ * Ensure that leaf 0x2 cache and TLB type values do not intersect,
+ * since they share the same type field at struct cpuid_0x2_table.
+ */
+#define __TLB_TABLE_TYPE_BEGIN (CACHE_L3 + 1)
+
+/*
+ * Leaf 0x2 1-byte descriptors' TLB types
+ * To be used for their mappings at cpuid_0x2_table[]
+ */
+enum _tlb_table_type {
+ TLB_INST_4K = __TLB_TABLE_TYPE_BEGIN,
+ TLB_INST_4M,
+ TLB_INST_2M_4M,
+ TLB_INST_ALL,
+
+ TLB_DATA_4K,
+ TLB_DATA_4M,
+ TLB_DATA_2M_4M,
+ TLB_DATA_4K_4M,
+ TLB_DATA_1G,
+ TLB_DATA_1G_2M_4M,
+
+ TLB_DATA0_4K,
+ TLB_DATA0_4M,
+ TLB_DATA0_2M_4M,
+
+ STLB_4K,
+ STLB_4K_2M,
+} __packed;
+#ifndef __CHECKER__
+static_assert(sizeof(enum _tlb_table_type) == 1);
+#endif
+
+/*
+ * Combined parsing table for leaf 0x2 cache and TLB descriptors.
+ */
+
+struct leaf_0x2_table {
+ union {
+ enum _cache_table_type c_type;
+ enum _tlb_table_type t_type;
+ };
+ union {
+ short c_size;
+ short entries;
+ };
+};
+
+extern const struct leaf_0x2_table cpuid_0x2_table[256];
+
+/*
+ * All of leaf 0x2's one-byte TLB descriptors implies the same number of entries
+ * for their respective TLB types. TLB descriptor 0x63 is an exception: it
+ * implies 4 dTLB entries for 1GB pages and 32 dTLB entries for 2MB or 4MB pages.
+ *
+ * Encode that descriptor's dTLB entry count for 2MB/4MB pages here, as the entry
+ * count for dTLB 1GB pages is already encoded at the cpuid_0x2_table[]'s mapping.
+ */
+#define TLB_0x63_2M_4M_ENTRIES 32
+
#endif /* _ASM_X86_CPUID_TYPES_H */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index fdbbbfec745a..363110e6b2e3 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -23,7 +23,7 @@ DECLARE_PER_CPU(unsigned long, cpu_dr7);
static __always_inline unsigned long native_get_debugreg(int regno)
{
- unsigned long val = 0; /* Damn you, gcc! */
+ unsigned long val;
switch (regno) {
case 0:
@@ -43,7 +43,7 @@ static __always_inline unsigned long native_get_debugreg(int regno)
break;
case 7:
/*
- * Apply __FORCE_ORDER to DR7 reads to forbid re-ordering them
+ * Use "asm volatile" for DR7 reads to forbid re-ordering them
* with other code.
*
* This is needed because a DR7 access can cause a #VC exception
@@ -55,7 +55,7 @@ static __always_inline unsigned long native_get_debugreg(int regno)
* re-ordered to happen before the call to sev_es_ist_enter(),
* causing stack recursion.
*/
- asm volatile("mov %%db7, %0" : "=r" (val) : __FORCE_ORDER);
+ asm volatile("mov %%db7, %0" : "=r" (val));
break;
default:
BUG();
@@ -83,15 +83,15 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value)
break;
case 7:
/*
- * Apply __FORCE_ORDER to DR7 writes to forbid re-ordering them
+ * Use "asm volatile" for DR7 writes to forbid re-ordering them
* with other code.
*
* While is didn't happen with a DR7 write (see the DR7 read
* comment above which explains where it happened), add the
- * __FORCE_ORDER here too to avoid similar problems in the
+ * "asm volatile" here too to avoid similar problems in the
* future.
*/
- asm volatile("mov %0, %%db7" ::"r" (value), __FORCE_ORDER);
+ asm volatile("mov %0, %%db7" ::"r" (value));
break;
default:
BUG();
@@ -169,7 +169,7 @@ static inline unsigned long get_debugctlmsr(void)
if (boot_cpu_data.x86 < 6)
return 0;
#endif
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
+ rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
return debugctlmsr;
}
@@ -180,7 +180,7 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
if (boot_cpu_data.x86 < 6)
return;
#endif
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
+ wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
}
#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 128602612eca..6c8fdc96be7e 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -76,12 +76,8 @@ typedef struct user_i387_struct elf_fpregset_t;
#include <asm/vdso.h>
-#ifdef CONFIG_X86_64
extern unsigned int vdso64_enabled;
-#endif
-#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
extern unsigned int vdso32_enabled;
-#endif
/*
* This is used to ensure we don't load something for the wrong architecture.
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 77d20555e04d..d535a97c7284 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -53,7 +53,6 @@ static inline void arch_exit_work(unsigned long ti_work)
if (unlikely(ti_work & _TIF_IO_BITMAP))
tss_update_io_bitmap();
- fpregs_assert_state_consistent();
if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
switch_fpu_return();
}
@@ -61,7 +60,9 @@ static inline void arch_exit_work(unsigned long ti_work)
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
- if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work))
+ fpregs_assert_state_consistent();
+
+ if (unlikely(ti_work))
arch_exit_work(ti_work);
fred_update_rsp0();
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index f42de5f05e7e..cd6f194a912b 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -126,6 +126,7 @@ static inline void fpstate_init_soft(struct swregs_state *soft) {}
#endif
/* State tracking */
+DECLARE_PER_CPU(bool, kernel_fpu_allowed);
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
/* Process cleanup */
@@ -136,7 +137,7 @@ static inline void fpstate_free(struct fpu *fpu) { }
#endif
/* fpstate-related functions which are exported to KVM */
-extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);
+extern void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature);
extern u64 xstate_get_guest_group_perm(void);
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index c485f1944c5f..c060549c6c94 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -10,7 +10,7 @@
#include <asm/trace/fpu.h>
extern void save_fpregs_to_fpstate(struct fpu *fpu);
-extern void fpu__drop(struct fpu *fpu);
+extern void fpu__drop(struct task_struct *tsk);
extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
unsigned long shstk_addr);
extern void fpu_flush_thread(void);
@@ -18,31 +18,25 @@ extern void fpu_flush_thread(void);
/*
* FPU state switching for scheduling.
*
- * This is a two-stage process:
+ * switch_fpu() saves the old state and sets TIF_NEED_FPU_LOAD if
+ * TIF_NEED_FPU_LOAD is not set. This is done within the context
+ * of the old process.
*
- * - switch_fpu_prepare() saves the old state.
- * This is done within the context of the old process.
- *
- * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
- * will get loaded on return to userspace, or when the kernel needs it.
- *
- * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
- * are saved in the current thread's FPU register state.
- *
- * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
- * hold current()'s FPU registers. It is required to load the
+ * Once TIF_NEED_FPU_LOAD is set, it is required to load the
* registers before returning to userland or using the content
* otherwise.
*
* The FPU context is only stored/restored for a user task and
* PF_KTHREAD is used to distinguish between kernel and user threads.
*/
-static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
+static inline void switch_fpu(struct task_struct *old, int cpu)
{
- if (cpu_feature_enabled(X86_FEATURE_FPU) &&
+ if (!test_tsk_thread_flag(old, TIF_NEED_FPU_LOAD) &&
+ cpu_feature_enabled(X86_FEATURE_FPU) &&
!(old->flags & (PF_KTHREAD | PF_USER_WORKER))) {
- struct fpu *old_fpu = &old->thread.fpu;
+ struct fpu *old_fpu = x86_task_fpu(old);
+ set_tsk_thread_flag(old, TIF_NEED_FPU_LOAD);
save_fpregs_to_fpstate(old_fpu);
/*
* The save operation preserved register state, so the
@@ -50,7 +44,7 @@ static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
* current CPU number in @old_fpu, so the next return
* to user space can avoid the FPU register restore
* when is returns on the same CPU and still owns the
- * context.
+ * context. See fpregs_restore_userregs().
*/
old_fpu->last_cpu = cpu;
@@ -58,14 +52,4 @@ static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
}
}
-/*
- * Delay loading of the complete FPU state until the return to userland.
- * PKRU is handled separately.
- */
-static inline void switch_fpu_finish(struct task_struct *new)
-{
- if (cpu_feature_enabled(X86_FEATURE_FPU))
- set_tsk_thread_flag(new, TIF_NEED_FPU_LOAD);
-}
-
#endif /* _ASM_X86_FPU_SCHED_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index de16862bf230..1c94121acd3d 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -125,6 +125,7 @@ enum xfeature {
XFEATURE_RSRVD_COMP_16,
XFEATURE_XTILE_CFG,
XFEATURE_XTILE_DATA,
+ XFEATURE_APX,
XFEATURE_MAX,
};
@@ -145,6 +146,7 @@ enum xfeature {
#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR)
#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG)
#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA)
+#define XFEATURE_MASK_APX (1 << XFEATURE_APX)
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
@@ -304,6 +306,13 @@ struct xtile_data {
} __packed;
/*
+ * State component 19: 8B extended general purpose register.
+ */
+struct apx_state {
+ u64 egpr[16];
+} __packed;
+
+/*
* State component 10 is supervisor state used for context-switching the
* PASID state.
*/
@@ -407,9 +416,11 @@ struct fpu_state_perm {
/*
* @__state_perm:
*
- * This bitmap indicates the permission for state components, which
- * are available to a thread group. The permission prctl() sets the
- * enabled state bits in thread_group_leader()->thread.fpu.
+ * This bitmap indicates the permission for state components
+ * available to a thread group, including both user and supervisor
+ * components and software-defined bits like FPU_GUEST_PERM_LOCKED.
+ * The permission prctl() sets the enabled state bits in
+ * thread_group_leader()->thread.fpu.
*
* All run time operations use the per thread information in the
* currently active fpu.fpstate which contains the xfeature masks
@@ -525,13 +536,6 @@ struct fpu_guest {
u64 xfeatures;
/*
- * @perm: xfeature bitmap of features which are
- * permitted to be enabled for the guest
- * vCPU.
- */
- u64 perm;
-
- /*
* @xfd_err: Save the guest value.
*/
u64 xfd_err;
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 7f39fe7980c5..b308a76afbb7 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -32,7 +32,8 @@
XFEATURE_MASK_PKRU | \
XFEATURE_MASK_BNDREGS | \
XFEATURE_MASK_BNDCSR | \
- XFEATURE_MASK_XTILE)
+ XFEATURE_MASK_XTILE | \
+ XFEATURE_MASK_APX)
/*
* Features which are restored when returning to user space.
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 2a29e5216881..12b34d5b2953 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -9,6 +9,7 @@
#include <linux/const.h>
#include <asm/asm.h>
+#include <asm/msr.h>
#include <asm/trapnr.h>
/*
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index 02f239569b93..ab2547f97c2c 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -60,7 +60,7 @@ static inline unsigned long x86_fsbase_read_cpu(void)
if (boot_cpu_has(X86_FEATURE_FSGSBASE))
fsbase = rdfsbase();
else
- rdmsrl(MSR_FS_BASE, fsbase);
+ rdmsrq(MSR_FS_BASE, fsbase);
return fsbase;
}
@@ -70,7 +70,7 @@ static inline void x86_fsbase_write_cpu(unsigned long fsbase)
if (boot_cpu_has(X86_FEATURE_FSGSBASE))
wrfsbase(fsbase);
else
- wrmsrl(MSR_FS_BASE, fsbase);
+ wrmsrq(MSR_FS_BASE, fsbase);
}
extern unsigned long x86_gsbase_read_cpu_inactive(void);
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 53e4015242b4..97f341777db5 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -82,6 +82,7 @@
#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8))
#define INAT_REX2_VARIANT (1 << (INAT_FLAG_OFFS + 9))
#define INAT_EVEX_SCALABLE (1 << (INAT_FLAG_OFFS + 10))
+#define INAT_INV64 (1 << (INAT_FLAG_OFFS + 11))
/* Attribute making macros for attribute tables */
#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
@@ -242,4 +243,9 @@ static inline int inat_evex_scalable(insn_attr_t attr)
{
return attr & INAT_EVEX_SCALABLE;
}
+
+static inline int inat_is_invalid64(insn_attr_t attr)
+{
+ return attr & INAT_INV64;
+}
#endif
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index e889c3bab5a2..ca309a3227c7 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -217,7 +217,7 @@ void memset_io(volatile void __iomem *, int, size_t);
static inline void __iowrite32_copy(void __iomem *to, const void *from,
size_t count)
{
- asm volatile("rep ; movsl"
+ asm volatile("rep movsl"
: "=&c"(count), "=&D"(to), "=&S"(from)
: "0"(count), "1"(to), "2"(from)
: "memory");
@@ -282,7 +282,7 @@ static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \
count--; \
} \
} else { \
- asm volatile("rep; outs" #bwl \
+ asm volatile("rep outs" #bwl \
: "+S"(addr), "+c"(count) \
: "d"(port) : "memory"); \
} \
@@ -298,7 +298,7 @@ static inline void ins##bwl(u16 port, void *addr, unsigned long count) \
count--; \
} \
} else { \
- asm volatile("rep; ins" #bwl \
+ asm volatile("rep ins" #bwl \
: "+D"(addr), "+c"(count) \
: "d"(port) : "memory"); \
} \
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 5432457d2338..f2ad77929d6e 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -8,6 +8,9 @@
# define PA_PGD 2
# define PA_SWAP_PAGE 3
# define PAGES_NR 4
+#else
+/* Size of each exception handler referenced by the IDT */
+# define KEXEC_DEBUG_EXC_HANDLER_SIZE 6 /* PUSHI, PUSHI, 2-byte JMP */
#endif
# define KEXEC_CONTROL_PAGE_SIZE 4096
@@ -59,6 +62,10 @@ struct kimage;
extern unsigned long kexec_va_control_page;
extern unsigned long kexec_pa_table_page;
extern unsigned long kexec_pa_swap_page;
+extern gate_desc kexec_debug_idt[];
+extern unsigned char kexec_debug_exc_vectors[];
+extern uint16_t kexec_debug_8250_port;
+extern unsigned long kexec_debug_8250_mmio32;
#endif
/*
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7bc174a1f1cb..9c971f846108 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -34,6 +34,7 @@
#include <asm/desc.h>
#include <asm/mtrr.h>
#include <asm/msr-index.h>
+#include <asm/msr.h>
#include <asm/asm.h>
#include <asm/irq_remapping.h>
#include <asm/kvm_page_track.h>
@@ -2278,7 +2279,7 @@ static inline unsigned long read_msr(unsigned long msr)
{
u64 value;
- rdmsrl(msr, value);
+ rdmsrq(msr, value);
return value;
}
#endif
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index b51d8a4673f5..9d38ae744a2e 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -141,5 +141,15 @@
#define SYM_FUNC_START_WEAK_NOALIGN(name) \
SYM_START(name, SYM_L_WEAK, SYM_A_NONE)
+/*
+ * Expose 'sym' to the startup code in arch/x86/boot/startup/, by emitting an
+ * alias prefixed with __pi_
+ */
+#ifdef __ASSEMBLER__
+#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_L_GLOBAL)
+#else
+#define SYM_PIC_ALIAS(sym) extern typeof(sym) __PASTE(__pi_, sym) __alias(sym)
+#endif
+
#endif /* _ASM_X86_LINKAGE_H */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 1530ee301dfe..ea6494628cb0 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -61,7 +61,7 @@ void __init sev_es_init_vc_handling(void);
static inline u64 sme_get_me_mask(void)
{
- return RIP_REL_REF(sme_me_mask);
+ return sme_me_mask;
}
#define __bss_decrypted __section(".bss..decrypted")
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index be7cddc414e4..8b41f26f003b 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_MICROCODE_H
#define _ASM_X86_MICROCODE_H
+#include <asm/msr.h>
+
struct cpu_signature {
unsigned int sig;
unsigned int pf;
@@ -63,7 +65,7 @@ static inline u32 intel_get_microcode_revision(void)
{
u32 rev, dummy;
- native_wrmsrl(MSR_IA32_UCODE_REV, 0);
+ native_wrmsrq(MSR_IA32_UCODE_REV, 0);
/* As documented in the SDM: Do a CPUID 1 here */
native_cpuid_eax(1);
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 8b8055a8eb9e..0fe9c569d171 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -16,6 +16,8 @@
#define MM_CONTEXT_LOCK_LAM 2
/* Allow LAM and SVA coexisting */
#define MM_CONTEXT_FORCE_TAGGED_SVA 3
+/* Tracks mm_cpumask */
+#define MM_CONTEXT_NOTRACK 4
/*
* x86 has arch-specific MMU state beyond what lives in mm_struct.
@@ -44,9 +46,7 @@ typedef struct {
struct ldt_struct *ldt;
#endif
-#ifdef CONFIG_X86_64
unsigned long flags;
-#endif
#ifdef CONFIG_ADDRESS_MASKING
/* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 2398058b6e83..73bf3b1b44e8 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -190,7 +190,7 @@ extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
#define activate_mm(prev, next) \
do { \
paravirt_enter_mmap(next); \
- switch_mm((prev), (next), NULL); \
+ switch_mm_irqs_off((prev), (next), NULL); \
} while (0);
#ifdef CONFIG_X86_32
@@ -247,6 +247,16 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
}
#endif
+static inline bool is_notrack_mm(struct mm_struct *mm)
+{
+ return test_bit(MM_CONTEXT_NOTRACK, &mm->context.flags);
+}
+
+static inline void set_notrack_mm(struct mm_struct *mm)
+{
+ set_bit(MM_CONTEXT_NOTRACK, &mm->context.flags);
+}
+
/*
* We only want to enforce protection keys on the current process
* because we effectively have no access to PKRU for other
@@ -272,4 +282,7 @@ unsigned long __get_current_cr3_fast(void);
#include <asm-generic/mmu_context.h>
+extern struct mm_struct *use_temporary_mm(struct mm_struct *temp_mm);
+extern void unuse_temporary_mm(struct mm_struct *prev_mm);
+
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index bab5ccfc60a7..778444310cfb 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -8,6 +8,7 @@
#include <linux/io.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
+#include <asm/msr.h>
#include <hyperv/hvhdk.h>
/*
@@ -304,7 +305,7 @@ void hv_set_non_nested_msr(unsigned int reg, u64 value);
static __always_inline u64 hv_raw_get_msr(unsigned int reg)
{
- return __rdmsr(reg);
+ return native_rdmsrq(reg);
}
#else /* CONFIG_HYPERV */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e6134ef2263d..b7dded3c8113 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -211,6 +211,14 @@
* VERW clears CPU Register
* File.
*/
+#define ARCH_CAP_ITS_NO BIT_ULL(62) /*
+ * Not susceptible to
+ * Indirect Target Selection.
+ * This bit is not set by
+ * HW, but is synthesized by
+ * VMMs for guests to know
+ * their affected status.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -525,7 +533,7 @@
#define MSR_HWP_CAPABILITIES 0x00000771
#define MSR_HWP_REQUEST_PKG 0x00000772
#define MSR_HWP_INTERRUPT 0x00000773
-#define MSR_HWP_REQUEST 0x00000774
+#define MSR_HWP_REQUEST 0x00000774
#define MSR_HWP_STATUS 0x00000777
/* CPUID.6.EAX */
@@ -542,16 +550,16 @@
#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
/* IA32_HWP_REQUEST */
-#define HWP_MIN_PERF(x) (x & 0xff)
-#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
+#define HWP_MIN_PERF(x) (x & 0xff)
+#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
-#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
+#define HWP_ENERGY_PERF_PREFERENCE(x) (((u64)x & 0xff) << 24)
#define HWP_EPP_PERFORMANCE 0x00
#define HWP_EPP_BALANCE_PERFORMANCE 0x80
#define HWP_EPP_BALANCE_POWERSAVE 0xC0
#define HWP_EPP_POWERSAVE 0xFF
-#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
-#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
+#define HWP_ACTIVITY_WINDOW(x) ((u64)(x & 0xff3) << 32)
+#define HWP_PACKAGE_CONTROL(x) ((u64)(x & 0x1) << 42)
/* IA32_HWP_STATUS */
#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
@@ -594,7 +602,11 @@
/* V6 PMON MSR range */
#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
+#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902
+#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903
#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
+#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982
+#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983
#define MSR_IA32_PMC_V6_STEP 4
/* KeyID partitioning between MKTME and TDX */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 9397a319d165..4096b8af4ba7 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -12,6 +12,7 @@
#include <uapi/asm/msr.h>
#include <asm/shared/msr.h>
+#include <linux/types.h>
#include <linux/percpu.h>
struct msr_info {
@@ -37,23 +38,6 @@ struct saved_msrs {
};
/*
- * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A"
- * constraint has different meanings. For i386, "A" means exactly
- * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead,
- * it means rax *or* rdx.
- */
-#ifdef CONFIG_X86_64
-/* Using 64-bit values saves one instruction clearing the high half of low */
-#define DECLARE_ARGS(val, low, high) unsigned long low, high
-#define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32)
-#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
-#else
-#define DECLARE_ARGS(val, low, high) unsigned long long val
-#define EAX_EDX_VAL(val, low, high) (val)
-#define EAX_EDX_RET(val, low, high) "=A" (val)
-#endif
-
-/*
* Be very careful with includes. This header is prone to include loops.
*/
#include <asm/atomic.h>
@@ -63,13 +47,13 @@ struct saved_msrs {
DECLARE_TRACEPOINT(read_msr);
DECLARE_TRACEPOINT(write_msr);
DECLARE_TRACEPOINT(rdpmc);
-extern void do_trace_write_msr(unsigned int msr, u64 val, int failed);
-extern void do_trace_read_msr(unsigned int msr, u64 val, int failed);
-extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed);
+extern void do_trace_write_msr(u32 msr, u64 val, int failed);
+extern void do_trace_read_msr(u32 msr, u64 val, int failed);
+extern void do_trace_rdpmc(u32 msr, u64 val, int failed);
#else
-static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {}
-static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {}
-static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
+static inline void do_trace_write_msr(u32 msr, u64 val, int failed) {}
+static inline void do_trace_read_msr(u32 msr, u64 val, int failed) {}
+static inline void do_trace_rdpmc(u32 msr, u64 val, int failed) {}
#endif
/*
@@ -79,9 +63,9 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
* think of extending them - you will be slapped with a stinking trout or a frozen
* shark will reach you, wherever you are! You've been warned.
*/
-static __always_inline unsigned long long __rdmsr(unsigned int msr)
+static __always_inline u64 __rdmsr(u32 msr)
{
- DECLARE_ARGS(val, low, high);
+ EAX_EDX_DECLARE_ARGS(val, low, high);
asm volatile("1: rdmsr\n"
"2:\n"
@@ -91,12 +75,12 @@ static __always_inline unsigned long long __rdmsr(unsigned int msr)
return EAX_EDX_VAL(val, low, high);
}
-static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
+static __always_inline void __wrmsrq(u32 msr, u64 val)
{
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
- : : "c" (msr), "a"(low), "d" (high) : "memory");
+ : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)) : "memory");
}
#define native_rdmsr(msr, val1, val2) \
@@ -106,16 +90,20 @@ do { \
(void)((val2) = (u32)(__val >> 32)); \
} while (0)
+static __always_inline u64 native_rdmsrq(u32 msr)
+{
+ return __rdmsr(msr);
+}
+
#define native_wrmsr(msr, low, high) \
- __wrmsr(msr, low, high)
+ __wrmsrq((msr), (u64)(high) << 32 | (low))
-#define native_wrmsrl(msr, val) \
- __wrmsr((msr), (u32)((u64)(val)), \
- (u32)((u64)(val) >> 32))
+#define native_wrmsrq(msr, val) \
+ __wrmsrq((msr), (val))
-static inline unsigned long long native_read_msr(unsigned int msr)
+static inline u64 native_read_msr(u32 msr)
{
- unsigned long long val;
+ u64 val;
val = __rdmsr(msr);
@@ -125,34 +113,35 @@ static inline unsigned long long native_read_msr(unsigned int msr)
return val;
}
-static inline unsigned long long native_read_msr_safe(unsigned int msr,
- int *err)
+static inline int native_read_msr_safe(u32 msr, u64 *p)
{
- DECLARE_ARGS(val, low, high);
+ int err;
+ EAX_EDX_DECLARE_ARGS(val, low, high);
asm volatile("1: rdmsr ; xor %[err],%[err]\n"
"2:\n\t"
_ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err])
- : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
+ : [err] "=r" (err), EAX_EDX_RET(val, low, high)
: "c" (msr));
if (tracepoint_enabled(read_msr))
- do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
- return EAX_EDX_VAL(val, low, high);
+ do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), err);
+
+ *p = EAX_EDX_VAL(val, low, high);
+
+ return err;
}
/* Can be uninlined because referenced by paravirt */
-static inline void notrace
-native_write_msr(unsigned int msr, u32 low, u32 high)
+static inline void notrace native_write_msr(u32 msr, u64 val)
{
- __wrmsr(msr, low, high);
+ native_wrmsrq(msr, val);
if (tracepoint_enabled(write_msr))
- do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
+ do_trace_write_msr(msr, val, 0);
}
/* Can be uninlined because referenced by paravirt */
-static inline int notrace
-native_write_msr_safe(unsigned int msr, u32 low, u32 high)
+static inline int notrace native_write_msr_safe(u32 msr, u64 val)
{
int err;
@@ -160,73 +149,19 @@ native_write_msr_safe(unsigned int msr, u32 low, u32 high)
"2:\n\t"
_ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
: [err] "=a" (err)
- : "c" (msr), "0" (low), "d" (high)
+ : "c" (msr), "0" ((u32)val), "d" ((u32)(val >> 32))
: "memory");
if (tracepoint_enabled(write_msr))
- do_trace_write_msr(msr, ((u64)high << 32 | low), err);
+ do_trace_write_msr(msr, val, err);
return err;
}
extern int rdmsr_safe_regs(u32 regs[8]);
extern int wrmsr_safe_regs(u32 regs[8]);
-/**
- * rdtsc() - returns the current TSC without ordering constraints
- *
- * rdtsc() returns the result of RDTSC as a 64-bit integer. The
- * only ordering constraint it supplies is the ordering implied by
- * "asm volatile": it will put the RDTSC in the place you expect. The
- * CPU can and will speculatively execute that RDTSC, though, so the
- * results can be non-monotonic if compared on different CPUs.
- */
-static __always_inline unsigned long long rdtsc(void)
+static inline u64 native_read_pmc(int counter)
{
- DECLARE_ARGS(val, low, high);
-
- asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
-
- return EAX_EDX_VAL(val, low, high);
-}
-
-/**
- * rdtsc_ordered() - read the current TSC in program order
- *
- * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
- * It is ordered like a load to a global in-memory counter. It should
- * be impossible to observe non-monotonic rdtsc_unordered() behavior
- * across multiple CPUs as long as the TSC is synced.
- */
-static __always_inline unsigned long long rdtsc_ordered(void)
-{
- DECLARE_ARGS(val, low, high);
-
- /*
- * The RDTSC instruction is not ordered relative to memory
- * access. The Intel SDM and the AMD APM are both vague on this
- * point, but empirically an RDTSC instruction can be
- * speculatively executed before prior loads. An RDTSC
- * immediately after an appropriate barrier appears to be
- * ordered as a normal load, that is, it provides the same
- * ordering guarantees as reading from a global memory location
- * that some other imaginary CPU is updating continuously with a
- * time stamp.
- *
- * Thus, use the preferred barrier on the respective CPU, aiming for
- * RDTSCP as the default.
- */
- asm volatile(ALTERNATIVE_2("rdtsc",
- "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
- "rdtscp", X86_FEATURE_RDTSCP)
- : EAX_EDX_RET(val, low, high)
- /* RDTSCP clobbers ECX with MSR_TSC_AUX. */
- :: "ecx");
-
- return EAX_EDX_VAL(val, low, high);
-}
-
-static inline unsigned long long native_read_pmc(int counter)
-{
- DECLARE_ARGS(val, low, high);
+ EAX_EDX_DECLARE_ARGS(val, low, high);
asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
if (tracepoint_enabled(rdpmc))
@@ -251,51 +186,44 @@ do { \
(void)((high) = (u32)(__val >> 32)); \
} while (0)
-static inline void wrmsr(unsigned int msr, u32 low, u32 high)
+static inline void wrmsr(u32 msr, u32 low, u32 high)
{
- native_write_msr(msr, low, high);
+ native_write_msr(msr, (u64)high << 32 | low);
}
-#define rdmsrl(msr, val) \
+#define rdmsrq(msr, val) \
((val) = native_read_msr((msr)))
-static inline void wrmsrl(unsigned int msr, u64 val)
+static inline void wrmsrq(u32 msr, u64 val)
{
- native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
+ native_write_msr(msr, val);
}
/* wrmsr with exception handling */
-static inline int wrmsr_safe(unsigned int msr, u32 low, u32 high)
+static inline int wrmsrq_safe(u32 msr, u64 val)
{
- return native_write_msr_safe(msr, low, high);
+ return native_write_msr_safe(msr, val);
}
/* rdmsr with exception handling */
#define rdmsr_safe(msr, low, high) \
({ \
- int __err; \
- u64 __val = native_read_msr_safe((msr), &__err); \
+ u64 __val; \
+ int __err = native_read_msr_safe((msr), &__val); \
(*low) = (u32)__val; \
(*high) = (u32)(__val >> 32); \
__err; \
})
-static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p)
+static inline int rdmsrq_safe(u32 msr, u64 *p)
{
- int err;
-
- *p = native_read_msr_safe(msr, &err);
- return err;
+ return native_read_msr_safe(msr, p);
}
-#define rdpmc(counter, low, high) \
-do { \
- u64 _l = native_read_pmc((counter)); \
- (low) = (u32)_l; \
- (high) = (u32)(_l >> 32); \
-} while (0)
-
-#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
+static __always_inline u64 rdpmc(int counter)
+{
+ return native_read_pmc(counter);
+}
#endif /* !CONFIG_PARAVIRT_XXL */
@@ -315,11 +243,11 @@ static __always_inline void wrmsrns(u32 msr, u64 val)
}
/*
- * 64-bit version of wrmsr_safe():
+ * Dual u32 version of wrmsrq_safe():
*/
-static inline int wrmsrl_safe(u32 msr, u64 val)
+static inline int wrmsr_safe(u32 msr, u32 low, u32 high)
{
- return wrmsr_safe(msr, (u32)val, (u32)(val >> 32));
+ return wrmsrq_safe(msr, (u64)high << 32 | low);
}
struct msr __percpu *msrs_alloc(void);
@@ -330,14 +258,14 @@ int msr_clear_bit(u32 msr, u8 bit);
#ifdef CONFIG_SMP
int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
-int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
+int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
+int wrmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
-int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
+int rdmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
+int wrmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
#else /* CONFIG_SMP */
@@ -351,14 +279,14 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
wrmsr(msr_no, l, h);
return 0;
}
-static inline int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+static inline int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
{
- rdmsrl(msr_no, *q);
+ rdmsrq(msr_no, *q);
return 0;
}
-static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+static inline int wrmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
{
- wrmsrl(msr_no, q);
+ wrmsrq(msr_no, q);
return 0;
}
static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
@@ -380,13 +308,13 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
return wrmsr_safe(msr_no, l, h);
}
-static inline int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+static inline int rdmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
{
- return rdmsrl_safe(msr_no, q);
+ return rdmsrq_safe(msr_no, q);
}
-static inline int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+static inline int wrmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
{
- return wrmsrl_safe(msr_no, q);
+ return wrmsrq_safe(msr_no, q);
}
static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
{
@@ -397,5 +325,11 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
return wrmsr_safe_regs(regs);
}
#endif /* CONFIG_SMP */
+
+/* Compatibility wrappers: */
+#define rdmsrl(msr, val) rdmsrq(msr, val)
+#define wrmsrl(msr, val) wrmsrq(msr, val)
+#define rdmsrl_on_cpu(cpu, msr, q) rdmsrq_on_cpu(cpu, msr, q)
+
#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index ce857ef54cf1..dd2b129b0418 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -25,29 +25,31 @@
#define TPAUSE_C01_STATE 1
#define TPAUSE_C02_STATE 0
-static __always_inline void __monitor(const void *eax, unsigned long ecx,
- unsigned long edx)
+static __always_inline void __monitor(const void *eax, u32 ecx, u32 edx)
{
- /* "monitor %eax, %ecx, %edx;" */
- asm volatile(".byte 0x0f, 0x01, 0xc8;"
- :: "a" (eax), "c" (ecx), "d"(edx));
+ /*
+ * Use the instruction mnemonic with implicit operands, as the LLVM
+ * assembler fails to assemble the mnemonic with explicit operands:
+ */
+ asm volatile("monitor" :: "a" (eax), "c" (ecx), "d" (edx));
}
-static __always_inline void __monitorx(const void *eax, unsigned long ecx,
- unsigned long edx)
+static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx)
{
- /* "monitorx %eax, %ecx, %edx;" */
- asm volatile(".byte 0x0f, 0x01, 0xfa;"
+ /* "monitorx %eax, %ecx, %edx" */
+ asm volatile(".byte 0x0f, 0x01, 0xfa"
:: "a" (eax), "c" (ecx), "d"(edx));
}
-static __always_inline void __mwait(unsigned long eax, unsigned long ecx)
+static __always_inline void __mwait(u32 eax, u32 ecx)
{
mds_idle_clear_cpu_buffers();
- /* "mwait %eax, %ecx;" */
- asm volatile(".byte 0x0f, 0x01, 0xc9;"
- :: "a" (eax), "c" (ecx));
+ /*
+ * Use the instruction mnemonic with implicit operands, as the LLVM
+ * assembler fails to assemble the mnemonic with explicit operands:
+ */
+ asm volatile("mwait" :: "a" (eax), "c" (ecx));
}
/*
@@ -76,13 +78,12 @@ static __always_inline void __mwait(unsigned long eax, unsigned long ecx)
* EAX (logical) address to monitor
* ECX #GP if not zero
*/
-static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx,
- unsigned long ecx)
+static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx)
{
/* No MDS buffer clear as this is AMD/HYGON only */
- /* "mwaitx %eax, %ebx, %ecx;" */
- asm volatile(".byte 0x0f, 0x01, 0xfb;"
+ /* "mwaitx %eax, %ebx, %ecx" */
+ asm volatile(".byte 0x0f, 0x01, 0xfb"
:: "a" (eax), "b" (ebx), "c" (ecx));
}
@@ -95,12 +96,11 @@ static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx,
* executing mwait, it would otherwise go unnoticed and the next tick
* would not be reprogrammed accordingly before mwait ever wakes up.
*/
-static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+static __always_inline void __sti_mwait(u32 eax, u32 ecx)
{
mds_idle_clear_cpu_buffers();
- /* "mwait %eax, %ecx;" */
- asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
- :: "a" (eax), "c" (ecx));
+
+ asm volatile("sti; mwait" :: "a" (eax), "c" (ecx));
}
/*
@@ -113,16 +113,13 @@ static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
* New with Core Duo processors, MWAIT can take some hints based on CPU
* capability.
*/
-static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx)
{
if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
- if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
- mb();
- clflush((void *)&current_thread_info()->flags);
- mb();
- }
+ const void *addr = &current_thread_info()->flags;
- __monitor((void *)&current_thread_info()->flags, 0, 0);
+ alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
+ __monitor(addr, 0, 0);
if (!need_resched()) {
if (ecx & 1) {
@@ -144,16 +141,9 @@ static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned lo
*/
static inline void __tpause(u32 ecx, u32 edx, u32 eax)
{
- /* "tpause %ecx, %edx, %eax;" */
- #ifdef CONFIG_AS_TPAUSE
- asm volatile("tpause %%ecx\n"
- :
- : "c"(ecx), "d"(edx), "a"(eax));
- #else
- asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
- :
- : "c"(ecx), "d"(edx), "a"(eax));
- #endif
+ /* "tpause %ecx" */
+ asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1"
+ :: "c" (ecx), "d" (edx), "a" (eax));
}
#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index f677382093f3..79d88d12c8fb 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -14,12 +14,26 @@ extern void release_perfctr_nmi(unsigned int);
extern int reserve_evntsel_nmi(unsigned int);
extern void release_evntsel_nmi(unsigned int);
-extern int unknown_nmi_panic;
-
#endif /* CONFIG_X86_LOCAL_APIC */
+extern int unknown_nmi_panic;
+extern int panic_on_unrecovered_nmi;
+extern int panic_on_io_nmi;
+
+/* NMI handler flags */
#define NMI_FLAG_FIRST 1
+/**
+ * enum - NMI types.
+ * @NMI_LOCAL: Local NMI, CPU-specific NMI generated by the Local APIC.
+ * @NMI_UNKNOWN: Unknown NMI, the source of the NMI may not be identified.
+ * @NMI_SERR: System Error NMI, typically triggered by PCI errors.
+ * @NMI_IO_CHECK: I/O Check NMI, related to I/O errors.
+ * @NMI_MAX: Maximum value for NMI types.
+ *
+ * NMI types are used to categorize NMIs and to dispatch them to the
+ * appropriate handler.
+ */
enum {
NMI_LOCAL=0,
NMI_UNKNOWN,
@@ -28,6 +42,7 @@ enum {
NMI_MAX
};
+/* NMI handler return values */
#define NMI_DONE 0
#define NMI_HANDLED 1
@@ -41,6 +56,25 @@ struct nmiaction {
const char *name;
};
+/**
+ * register_nmi_handler - Register a handler for a specific NMI type
+ * @t: NMI type (e.g. NMI_LOCAL)
+ * @fn: The NMI handler
+ * @fg: Flags associated with the NMI handler
+ * @n: Name of the NMI handler
+ * @init: Optional __init* attributes for struct nmiaction
+ *
+ * Adds the provided handler to the list of handlers for the specified
+ * NMI type. Handlers flagged with NMI_FLAG_FIRST would be executed first.
+ *
+ * Sometimes the source of an NMI can't be reliably determined which
+ * results in an NMI being tagged as "unknown". Register an additional
+ * handler using the NMI type - NMI_UNKNOWN to handle such cases. The
+ * caller would get one last chance to assume responsibility for the
+ * NMI.
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
#define register_nmi_handler(t, fn, fg, n, init...) \
({ \
static struct nmiaction init fn##_na = { \
@@ -54,7 +88,16 @@ struct nmiaction {
int __register_nmi_handler(unsigned int, struct nmiaction *);
-void unregister_nmi_handler(unsigned int, const char *);
+/**
+ * unregister_nmi_handler - Unregister a handler for a specific NMI type
+ * @type: NMI type (e.g. NMI_LOCAL)
+ * @name: Name of the NMI handler used during registration
+ *
+ * Removes the handler associated with the specified NMI type from the
+ * NMI handler list. The "name" is used as a lookup key to identify the
+ * handler.
+ */
+void unregister_nmi_handler(unsigned int type, const char *name);
void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler);
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 5c43f145454d..20d754b98f3f 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -327,7 +327,7 @@
.endm
.macro CLEAR_BRANCH_HISTORY_VMEXIT
- ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_VMEXIT
.endm
#else
#define CLEAR_BRANCH_HISTORY
@@ -336,10 +336,14 @@
#else /* __ASSEMBLER__ */
+#define ITS_THUNK_SIZE 64
+
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+typedef u8 its_thunk_t[ITS_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
+extern its_thunk_t __x86_indirect_its_thunk_array[];
#ifdef CONFIG_MITIGATION_RETHUNK
extern void __x86_return_thunk(void);
@@ -363,6 +367,12 @@ static inline void srso_return_thunk(void) {}
static inline void srso_alias_return_thunk(void) {}
#endif
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_return_thunk(void);
+#else
+static inline void its_return_thunk(void) {}
+#endif
+
extern void retbleed_return_thunk(void);
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
@@ -561,7 +571,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
-DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear);
extern u16 mds_verw_sel;
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index a9b62e0e6f79..623f1e9f493e 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -73,7 +73,6 @@ extern unsigned int __VMALLOC_RESERVE;
extern int sysctl_legacy_va_layout;
extern void find_low_pfn_range(void);
-extern void setup_bootmem_allocator(void);
#endif /* !__ASSEMBLER__ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index d3aab6f4e59a..015d23f3e01f 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -62,7 +62,6 @@ static inline void clear_page(void *page)
void copy_page(void *to, void *from);
KCFI_REFERENCE(copy_page);
-#ifdef CONFIG_X86_5LEVEL
/*
* User space process size. This is the first address outside the user range.
* There are a few constraints that determine this:
@@ -93,7 +92,6 @@ static __always_inline unsigned long task_size_max(void)
return ret;
}
-#endif /* CONFIG_X86_5LEVEL */
#endif /* !__ASSEMBLER__ */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 1faa8f88850a..7400dab373fe 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -41,25 +41,14 @@
#define __PAGE_OFFSET_BASE_L5 _AC(0xff11000000000000, UL)
#define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL)
-#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
#define __PAGE_OFFSET page_offset_base
-#else
-#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4
-#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
/* See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map. */
#define __PHYSICAL_MASK_SHIFT 52
-
-#ifdef CONFIG_X86_5LEVEL
#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47)
-/* See task_size_max() in <asm/page_64.h> */
-#else
-#define __VIRTUAL_MASK_SHIFT 47
-#define task_size_max() ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
-#endif
#define TASK_SIZE_MAX task_size_max()
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 9f77bf03d747..018a8d906ca3 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -29,9 +29,7 @@
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
/* Physical address where kernel should be loaded. */
-#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
- + (CONFIG_PHYSICAL_ALIGN - 1)) \
- & ~(CONFIG_PHYSICAL_ALIGN - 1))
+#define LOAD_PHYSICAL_ADDR __ALIGN_KERNEL_MASK(CONFIG_PHYSICAL_START, CONFIG_PHYSICAL_ALIGN - 1)
#define __START_KERNEL (__START_KERNEL_map + LOAD_PHYSICAL_ADDR)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c4c23190925c..b5e59a7ba0d0 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -175,26 +175,24 @@ static inline void __write_cr4(unsigned long x)
PVOP_VCALL1(cpu.write_cr4, x);
}
-static inline u64 paravirt_read_msr(unsigned msr)
+static inline u64 paravirt_read_msr(u32 msr)
{
return PVOP_CALL1(u64, cpu.read_msr, msr);
}
-static inline void paravirt_write_msr(unsigned msr,
- unsigned low, unsigned high)
+static inline void paravirt_write_msr(u32 msr, u64 val)
{
- PVOP_VCALL3(cpu.write_msr, msr, low, high);
+ PVOP_VCALL2(cpu.write_msr, msr, val);
}
-static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
+static inline int paravirt_read_msr_safe(u32 msr, u64 *val)
{
- return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err);
+ return PVOP_CALL2(int, cpu.read_msr_safe, msr, val);
}
-static inline int paravirt_write_msr_safe(unsigned msr,
- unsigned low, unsigned high)
+static inline int paravirt_write_msr_safe(u32 msr, u64 val)
{
- return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high);
+ return PVOP_CALL2(int, cpu.write_msr_safe, msr, val);
}
#define rdmsr(msr, val1, val2) \
@@ -204,55 +202,46 @@ do { \
val2 = _l >> 32; \
} while (0)
-#define wrmsr(msr, val1, val2) \
-do { \
- paravirt_write_msr(msr, val1, val2); \
-} while (0)
+static __always_inline void wrmsr(u32 msr, u32 low, u32 high)
+{
+ paravirt_write_msr(msr, (u64)high << 32 | low);
+}
-#define rdmsrl(msr, val) \
+#define rdmsrq(msr, val) \
do { \
val = paravirt_read_msr(msr); \
} while (0)
-static inline void wrmsrl(unsigned msr, u64 val)
+static inline void wrmsrq(u32 msr, u64 val)
{
- wrmsr(msr, (u32)val, (u32)(val>>32));
+ paravirt_write_msr(msr, val);
}
-#define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b)
+static inline int wrmsrq_safe(u32 msr, u64 val)
+{
+ return paravirt_write_msr_safe(msr, val);
+}
/* rdmsr with exception handling */
#define rdmsr_safe(msr, a, b) \
({ \
- int _err; \
- u64 _l = paravirt_read_msr_safe(msr, &_err); \
+ u64 _l; \
+ int _err = paravirt_read_msr_safe((msr), &_l); \
(*a) = (u32)_l; \
- (*b) = _l >> 32; \
+ (*b) = (u32)(_l >> 32); \
_err; \
})
-static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
+static __always_inline int rdmsrq_safe(u32 msr, u64 *p)
{
- int err;
-
- *p = paravirt_read_msr_safe(msr, &err);
- return err;
+ return paravirt_read_msr_safe(msr, p);
}
-static inline unsigned long long paravirt_read_pmc(int counter)
+static __always_inline u64 rdpmc(int counter)
{
return PVOP_CALL1(u64, cpu.read_pmc, counter);
}
-#define rdpmc(counter, low, high) \
-do { \
- u64 _l = paravirt_read_pmc(counter); \
- low = (u32)_l; \
- high = _l >> 32; \
-} while (0)
-
-#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
-
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
PVOP_VCALL2(cpu.alloc_ldt, ldt, entries);
@@ -474,8 +463,6 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
PVOP_VCALL2(mmu.set_p4d, p4dp, val);
}
-#if CONFIG_PGTABLE_LEVELS >= 5
-
static inline p4d_t __p4d(p4dval_t val)
{
p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val,
@@ -507,8 +494,6 @@ static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
set_pgd(pgdp, native_make_pgd(0)); \
} while (0)
-#endif /* CONFIG_PGTABLE_LEVELS == 5 */
-
static inline void p4d_clear(p4d_t *p4dp)
{
set_p4d(p4dp, native_make_p4d(0));
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 631c306ce1ff..37a8627d8277 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -91,15 +91,15 @@ struct pv_cpu_ops {
unsigned int *ecx, unsigned int *edx);
/* Unsafe MSR operations. These will warn or panic on failure. */
- u64 (*read_msr)(unsigned int msr);
- void (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+ u64 (*read_msr)(u32 msr);
+ void (*write_msr)(u32 msr, u64 val);
/*
* Safe MSR operations.
- * read sets err to 0 or -EIO. write returns 0 or -EIO.
+ * Returns 0 or -EIO.
*/
- u64 (*read_msr_safe)(unsigned int msr, int *err);
- int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high);
+ int (*read_msr_safe)(u32 msr, u64 *val);
+ int (*write_msr_safe)(u32 msr, u64 val);
u64 (*read_pmc)(int counter);
@@ -189,12 +189,10 @@ struct pv_mmu_ops {
void (*set_p4d)(p4d_t *p4dp, p4d_t p4dval);
-#if CONFIG_PGTABLE_LEVELS >= 5
struct paravirt_callee_save p4d_val;
struct paravirt_callee_save make_p4d;
void (*set_pgd)(pgd_t *pgdp, pgd_t pgdval);
-#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
struct pv_lazy_ops lazy_mode;
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 5fe314a2e73e..b0d03b6c279b 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -29,6 +29,8 @@
#ifdef CONFIG_SMP
+#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":"
+
#ifdef CONFIG_CC_HAS_NAMED_AS
#ifdef __CHECKER__
@@ -36,23 +38,23 @@
# define __seg_fs __attribute__((address_space(__seg_fs)))
#endif
+#define __percpu_prefix
#define __percpu_seg_override CONCATENATE(__seg_, __percpu_seg)
-#define __percpu_prefix ""
#else /* !CONFIG_CC_HAS_NAMED_AS: */
+#define __percpu_prefix __force_percpu_prefix
#define __percpu_seg_override
-#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
#endif /* CONFIG_CC_HAS_NAMED_AS */
-#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":"
-#define __my_cpu_offset this_cpu_read(this_cpu_off)
-
/*
* Compared to the generic __my_cpu_offset version, the following
* saves one instruction and avoids clobbering a temp register.
- *
+ */
+#define __my_cpu_offset this_cpu_read(this_cpu_off)
+
+/*
* arch_raw_cpu_ptr should not be used in 32-bit VDSO for a 64-bit
* kernel, because games are played with CONFIG_X86_64 there and
* sizeof(this_cpu_off) becames 4.
@@ -77,9 +79,9 @@
#else /* !CONFIG_SMP: */
+#define __force_percpu_prefix
+#define __percpu_prefix
#define __percpu_seg_override
-#define __percpu_prefix ""
-#define __force_percpu_prefix ""
#define PER_CPU_VAR(var) (var)__percpu_rel
@@ -97,8 +99,8 @@
# define __my_cpu_var(var) (*__my_cpu_ptr(&(var)))
#endif
-#define __percpu_arg(x) __percpu_prefix "%" #x
#define __force_percpu_arg(x) __force_percpu_prefix "%" #x
+#define __percpu_arg(x) __percpu_prefix "%" #x
/*
* For arch-specific code, we can use direct single-insn ops (they
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 812dac3f79f0..70d1d94aca7e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -195,6 +195,7 @@ union cpuid10_edx {
*/
#define ARCH_PERFMON_EXT_LEAF 0x00000023
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
+#define ARCH_PERFMON_ACR_LEAF 0x2
union cpuid35_eax {
struct {
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 66425424ce91..54690bd4ddbe 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -18,8 +18,6 @@ typedef union {
} pte_t;
#endif /* !__ASSEMBLER__ */
-#define SHARED_KERNEL_PMD 0
-
#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
/*
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 9d5b257d44e3..580b09bf6a45 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -27,9 +27,7 @@ typedef union {
} pmd_t;
#endif /* !__ASSEMBLER__ */
-#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
-
-#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED)
+#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7bd6bd6df4a1..5ddba366d3b4 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -292,13 +292,6 @@ static inline unsigned long pgd_pfn(pgd_t pgd)
return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
}
-#define p4d_leaf p4d_leaf
-static inline bool p4d_leaf(p4d_t p4d)
-{
- /* No 512 GiB pages yet */
- return 0;
-}
-
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
#define pmd_leaf pmd_leaf
@@ -1472,9 +1465,6 @@ static inline bool pgdp_maps_userspace(void *__ptr)
return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
}
-#define pgd_leaf pgd_leaf
-static inline bool pgd_leaf(pgd_t pgd) { return false; }
-
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
* All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index b89f8f1194a9..f06e5d6a2747 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -41,11 +41,9 @@ static inline void sync_initial_page_table(void) { }
pr_err("%s:%d: bad pud %p(%016lx)\n", \
__FILE__, __LINE__, &(e), pud_val(e))
-#if CONFIG_PGTABLE_LEVELS >= 5
#define p4d_ERROR(e) \
pr_err("%s:%d: bad p4d %p(%016lx)\n", \
__FILE__, __LINE__, &(e), p4d_val(e))
-#endif
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %p(%016lx)\n", \
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 5bb782d856f2..4604f924d8b8 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -23,7 +23,6 @@ typedef struct { pmdval_t pmd; } pmd_t;
extern unsigned int __pgtable_l5_enabled;
-#ifdef CONFIG_X86_5LEVEL
#ifdef USE_EARLY_PGTABLE_L5
/*
* cpu_feature_enabled() is not available in early boot code.
@@ -37,19 +36,11 @@ static inline bool pgtable_l5_enabled(void)
#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
#endif /* USE_EARLY_PGTABLE_L5 */
-#else
-#define pgtable_l5_enabled() 0
-#endif /* CONFIG_X86_5LEVEL */
-
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
#endif /* !__ASSEMBLER__ */
-#define SHARED_KERNEL_PMD 0
-
-#ifdef CONFIG_X86_5LEVEL
-
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
@@ -67,17 +58,6 @@ extern unsigned int ptrs_per_p4d;
#define MAX_POSSIBLE_PHYSMEM_BITS 52
-#else /* CONFIG_X86_5LEVEL */
-
-/*
- * PGDIR_SHIFT determines what a top-level page table entry can map
- */
-#define PGDIR_SHIFT 39
-#define PTRS_PER_PGD 512
-#define MAX_PTRS_PER_P4D 1
-
-#endif /* CONFIG_X86_5LEVEL */
-
/*
* 3rd level page
*/
@@ -130,15 +110,9 @@ extern unsigned int ptrs_per_p4d;
#define __VMEMMAP_BASE_L4 0xffffea0000000000UL
#define __VMEMMAP_BASE_L5 0xffd4000000000000UL
-#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
# define VMALLOC_START vmalloc_base
# define VMALLOC_SIZE_TB (pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
# define VMEMMAP_START vmemmap_base
-#else
-# define VMALLOC_START __VMALLOC_BASE_L4
-# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4
-# define VMEMMAP_START __VMEMMAP_BASE_L4
-#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
#ifdef CONFIG_RANDOMIZE_MEMORY
# define DIRECT_MAP_PHYSMEM_END direct_map_physmem_end
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5d2f7e5aff26..bde58f6510ac 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -16,7 +16,7 @@ struct vm86;
#include <uapi/asm/sigcontext.h>
#include <asm/current.h>
#include <asm/cpufeatures.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
@@ -514,15 +514,14 @@ struct thread_struct {
struct thread_shstk shstk;
#endif
-
- /* Floating point and extended processor state */
- struct fpu fpu;
- /*
- * WARNING: 'fpu' is dynamically-sized. It *MUST* be at
- * the end.
- */
};
+#ifdef CONFIG_X86_DEBUG_FPU
+extern struct fpu *x86_task_fpu(struct task_struct *task);
+#else
+# define x86_task_fpu(task) ((struct fpu *)((void *)(task) + sizeof(*(task))))
+#endif
+
extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
static inline void arch_thread_struct_whitelist(unsigned long *offset,
@@ -734,6 +733,7 @@ void store_cpu_caps(struct cpuinfo_x86 *info);
enum l1tf_mitigations {
L1TF_MITIGATION_OFF,
+ L1TF_MITIGATION_AUTO,
L1TF_MITIGATION_FLUSH_NOWARN,
L1TF_MITIGATION_FLUSH,
L1TF_MITIGATION_FLUSH_NOSMT,
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 011bf67a1866..feb93b50e990 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -9,6 +9,8 @@
#include <linux/resctrl_types.h>
#include <linux/sched.h>
+#include <asm/msr.h>
+
/*
* This value can never be a valid CLOSID, and is used when mapping a
* (closid, rmid) pair to an index and back. On x86 only the RMID is
@@ -175,7 +177,7 @@ static inline bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 ignored,
return READ_ONCE(tsk->rmid) == rmid;
}
-static inline void resctrl_sched_in(struct task_struct *tsk)
+static inline void resctrl_arch_sched_in(struct task_struct *tsk)
{
if (static_branch_likely(&rdt_enable_key))
__resctrl_sched_in(tsk);
@@ -194,25 +196,22 @@ static inline u32 resctrl_arch_rmid_idx_encode(u32 ignored, u32 rmid)
/* x86 can always read an rmid, nothing needs allocating */
struct rdt_resource;
-static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid)
+static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r,
+ enum resctrl_event_id evtid)
{
might_sleep();
return NULL;
-};
+}
-static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
- void *ctx) { };
+static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r,
+ enum resctrl_event_id evtid,
+ void *ctx) { }
-u64 resctrl_arch_get_prefetch_disable_bits(void);
-int resctrl_arch_pseudo_lock_fn(void *_plr);
-int resctrl_arch_measure_cycles_lat_fn(void *_plr);
-int resctrl_arch_measure_l2_residency(void *_plr);
-int resctrl_arch_measure_l3_residency(void *_plr);
void resctrl_cpu_detect(struct cpuinfo_x86 *c);
#else
-static inline void resctrl_sched_in(struct task_struct *tsk) {}
+static inline void resctrl_arch_sched_in(struct task_struct *tsk) {}
static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
#endif /* CONFIG_X86_CPU_RESCTRL */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ad9212df0ec0..6324f4c6c545 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -52,6 +52,7 @@ extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp);
extern void startup_64_setup_gdt_idt(void);
+extern void startup_64_load_idt(void *vc_handler);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index acb85b9346d8..0020d77a0800 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -116,7 +116,7 @@ enum psc_op {
#define GHCB_MSR_VMPL_REQ 0x016
#define GHCB_MSR_VMPL_REQ_LEVEL(v) \
/* GHCBData[39:32] */ \
- (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \
+ ((((u64)(v) & GENMASK_ULL(7, 0)) << 32) | \
/* GHCBDdata[11:0] */ \
GHCB_MSR_VMPL_REQ)
diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h
new file mode 100644
index 000000000000..3dfd306d1c9e
--- /dev/null
+++ b/arch/x86/include/asm/sev-internal.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define DR7_RESET_VALUE 0x400
+
+extern struct ghcb boot_ghcb_page;
+extern u64 sev_hv_features;
+extern u64 sev_secrets_pa;
+
+/* #VC handler runtime per-CPU data */
+struct sev_es_runtime_data {
+ struct ghcb ghcb_page;
+
+ /*
+ * Reserve one page per CPU as backup storage for the unencrypted GHCB.
+ * It is needed when an NMI happens while the #VC handler uses the real
+ * GHCB, and the NMI handler itself is causing another #VC exception. In
+ * that case the GHCB content of the first handler needs to be backed up
+ * and restored.
+ */
+ struct ghcb backup_ghcb;
+
+ /*
+ * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
+ * There is no need for it to be atomic, because nothing is written to
+ * the GHCB between the read and the write of ghcb_active. So it is safe
+ * to use it when a nested #VC exception happens before the write.
+ *
+ * This is necessary for example in the #VC->NMI->#VC case when the NMI
+ * happens while the first #VC handler uses the GHCB. When the NMI code
+ * raises a second #VC handler it might overwrite the contents of the
+ * GHCB written by the first handler. To avoid this the content of the
+ * GHCB is saved and restored when the GHCB is detected to be in use
+ * already.
+ */
+ bool ghcb_active;
+ bool backup_ghcb_active;
+
+ /*
+ * Cached DR7 value - write it on DR7 writes and return it on reads.
+ * That value will never make it to the real hardware DR7 as debugging
+ * is currently unsupported in SEV-ES guests.
+ */
+ unsigned long dr7;
+};
+
+struct ghcb_state {
+ struct ghcb *ghcb;
+};
+
+extern struct svsm_ca boot_svsm_ca_page;
+
+struct ghcb *__sev_get_ghcb(struct ghcb_state *state);
+void __sev_put_ghcb(struct ghcb_state *state);
+
+DECLARE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
+DECLARE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
+
+void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages, enum psc_op op);
+
+DECLARE_PER_CPU(struct svsm_ca *, svsm_caa);
+DECLARE_PER_CPU(u64, svsm_caa_pa);
+
+extern struct svsm_ca *boot_svsm_caa;
+extern u64 boot_svsm_caa_pa;
+
+static __always_inline struct svsm_ca *svsm_get_caa(void)
+{
+ if (sev_cfg.use_cas)
+ return this_cpu_read(svsm_caa);
+ else
+ return boot_svsm_caa;
+}
+
+static __always_inline u64 svsm_get_caa_pa(void)
+{
+ if (sev_cfg.use_cas)
+ return this_cpu_read(svsm_caa_pa);
+ else
+ return boot_svsm_caa_pa;
+}
+
+int svsm_perform_call_protocol(struct svsm_call *call);
+
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+ return native_rdmsrq(MSR_AMD64_SEV_ES_GHCB);
+}
+
+static __always_inline void sev_es_wr_ghcb_msr(u64 val)
+{
+ u32 low, high;
+
+ low = (u32)(val);
+ high = (u32)(val >> 32);
+
+ native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
+}
+
+void snp_register_ghcb_early(unsigned long paddr);
+bool sev_es_negotiate_protocol(void);
+bool sev_es_check_cpu_features(void);
+u64 get_hv_features(void);
+
+const struct snp_cpuid_table *snp_cpuid_get_table(void);
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index ba7999f66abe..58e028d42e41 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -15,6 +15,7 @@
#include <asm/sev-common.h>
#include <asm/coco.h>
#include <asm/set_memory.h>
+#include <asm/svm.h>
#define GHCB_PROTOCOL_MIN 1ULL
#define GHCB_PROTOCOL_MAX 2ULL
@@ -83,6 +84,36 @@ extern void vc_no_ghcb(void);
extern void vc_boot_ghcb(void);
extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
+/*
+ * Individual entries of the SNP CPUID table, as defined by the SNP
+ * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
+ */
+struct snp_cpuid_fn {
+ u32 eax_in;
+ u32 ecx_in;
+ u64 xcr0_in;
+ u64 xss_in;
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+ u64 __reserved;
+} __packed;
+
+/*
+ * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
+ * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
+ * of 64 entries per CPUID table.
+ */
+#define SNP_CPUID_COUNT_MAX 64
+
+struct snp_cpuid_table {
+ u32 count;
+ u32 __reserved1;
+ u64 __reserved2;
+ struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
+} __packed;
+
/* PVALIDATE return codes */
#define PVALIDATE_FAIL_SIZEMISMATCH 6
@@ -384,6 +415,10 @@ struct svsm_call {
#define SVSM_ATTEST_SERVICES 0
#define SVSM_ATTEST_SINGLE_SERVICE 1
+#define SVSM_VTPM_CALL(x) ((2ULL << 32) | (x))
+#define SVSM_VTPM_QUERY 0
+#define SVSM_VTPM_CMD 1
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern u8 snp_vmpl;
@@ -481,9 +516,39 @@ void snp_msg_free(struct snp_msg_desc *mdesc);
int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
struct snp_guest_request_ioctl *rio);
+int snp_svsm_vtpm_send_command(u8 *buffer);
+
void __init snp_secure_tsc_prepare(void);
void __init snp_secure_tsc_init(void);
+static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
+{
+ ghcb->save.sw_exit_code = 0;
+ __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+}
+
+void vc_forward_exception(struct es_em_ctxt *ctxt);
+
+/* I/O parameters for CPUID-related helpers */
+struct cpuid_leaf {
+ u32 fn;
+ u32 subfn;
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+};
+
+int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf);
+
+void __noreturn sev_es_terminate(unsigned int set, unsigned int reason);
+enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ u64 exit_code, u64 exit_info_1,
+ u64 exit_info_2);
+
+extern struct ghcb *boot_ghcb;
+
#else /* !CONFIG_AMD_MEM_ENCRYPT */
#define snp_vmpl 0
@@ -524,6 +589,7 @@ static inline struct snp_msg_desc *snp_msg_alloc(void) { return NULL; }
static inline void snp_msg_free(struct snp_msg_desc *mdesc) { }
static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
struct snp_guest_request_ioctl *rio) { return -ENODEV; }
+static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; }
static inline void __init snp_secure_tsc_prepare(void) { }
static inline void __init snp_secure_tsc_init(void) { }
diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h
index a341c878e977..b8027b63cd7a 100644
--- a/arch/x86/include/asm/simd.h
+++ b/arch/x86/include/asm/simd.h
@@ -1,6 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SIMD_H
+#define _ASM_SIMD_H
#include <asm/fpu/api.h>
+#include <linux/compiler_attributes.h>
+#include <linux/types.h>
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
@@ -10,3 +14,5 @@ static __must_check inline bool may_use_simd(void)
{
return irq_fpu_usable();
}
+
+#endif /* _ASM_SIMD_H */
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 658b690b2ccb..00b7e0398210 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -84,7 +84,7 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
static __always_inline void __update_spec_ctrl(u64 val)
{
__this_cpu_write(x86_spec_ctrl_current, val);
- native_wrmsrl(MSR_IA32_SPEC_CTRL, val);
+ native_wrmsrq(MSR_IA32_SPEC_CTRL, val);
}
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 6266d6b9e0b8..ecda17efa042 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -10,30 +10,19 @@
#include <linux/irqflags.h>
#include <linux/jump_label.h>
-/*
- * The compiler should not reorder volatile asm statements with respect to each
- * other: they should execute in program order. However GCC 4.9.x and 5.x have
- * a bug (which was fixed in 8.1, 7.3 and 6.5) where they might reorder
- * volatile asm. The write functions are not affected since they have memory
- * clobbers preventing reordering. To prevent reads from being reordered with
- * respect to writes, use a dummy memory operand.
- */
-
-#define __FORCE_ORDER "m"(*(unsigned int *)0x1000UL)
-
void native_write_cr0(unsigned long val);
static inline unsigned long native_read_cr0(void)
{
unsigned long val;
- asm volatile("mov %%cr0,%0\n\t" : "=r" (val) : __FORCE_ORDER);
+ asm volatile("mov %%cr0,%0" : "=r" (val));
return val;
}
static __always_inline unsigned long native_read_cr2(void)
{
unsigned long val;
- asm volatile("mov %%cr2,%0\n\t" : "=r" (val) : __FORCE_ORDER);
+ asm volatile("mov %%cr2,%0" : "=r" (val));
return val;
}
@@ -45,7 +34,7 @@ static __always_inline void native_write_cr2(unsigned long val)
static __always_inline unsigned long __native_read_cr3(void)
{
unsigned long val;
- asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER);
+ asm volatile("mov %%cr3,%0" : "=r" (val));
return val;
}
@@ -66,10 +55,10 @@ static inline unsigned long native_read_cr4(void)
asm volatile("1: mov %%cr4, %0\n"
"2:\n"
_ASM_EXTABLE(1b, 2b)
- : "=r" (val) : "0" (0), __FORCE_ORDER);
+ : "=r" (val) : "0" (0));
#else
/* CR4 always exists on x86_64. */
- asm volatile("mov %%cr4,%0\n\t" : "=r" (val) : __FORCE_ORDER);
+ asm volatile("mov %%cr4,%0" : "=r" (val));
#endif
return val;
}
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 32c0d981a82a..e9cce169bb4c 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -33,11 +33,11 @@ extern size_t strlen(const char *s);
static __always_inline void *__memcpy(void *to, const void *from, size_t n)
{
int d0, d1, d2;
- asm volatile("rep ; movsl\n\t"
+ asm volatile("rep movsl\n\t"
"movl %4,%%ecx\n\t"
"andl $3,%%ecx\n\t"
"jz 1f\n\t"
- "rep ; movsb\n\t"
+ "rep movsb\n\t"
"1:"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
: "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
@@ -89,7 +89,7 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
if (n >= 5 * 4) {
/* large block: use rep prefix */
int ecx;
- asm volatile("rep ; movsl"
+ asm volatile("rep movsl"
: "=&c" (ecx), "=&D" (edi), "=&S" (esi)
: "0" (n / 4), "1" (edi), "2" (esi)
: "memory"
@@ -165,8 +165,7 @@ extern void *memchr(const void *cs, int c, size_t count);
static inline void *__memset_generic(void *s, char c, size_t count)
{
int d0, d1;
- asm volatile("rep\n\t"
- "stosb"
+ asm volatile("rep stosb"
: "=&c" (d0), "=&D" (d1)
: "a" (c), "1" (s), "0" (count)
: "memory");
@@ -199,8 +198,7 @@ extern void *memset(void *, int, size_t);
static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
{
int d0, d1;
- asm volatile("rep\n\t"
- "stosw"
+ asm volatile("rep stosw"
: "=&c" (d0), "=&D" (d1)
: "a" (v), "1" (s), "0" (n)
: "memory");
@@ -211,8 +209,7 @@ static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
{
int d0, d1;
- asm volatile("rep\n\t"
- "stosl"
+ asm volatile("rep stosl"
: "=&c" (d0), "=&D" (d1)
: "a" (v), "1" (s), "0" (n)
: "memory");
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index d8416b3bf832..e8e5aab06255 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -9,6 +9,7 @@
#include <asm/desc.h>
#include <asm/fpu/api.h>
+#include <asm/msr.h>
/* image of the saved processor state */
struct saved_context {
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 54df06687d83..b512f9665f78 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -9,6 +9,7 @@
#include <asm/desc.h>
#include <asm/fpu/api.h>
+#include <asm/msr.h>
/*
* Image of the saved processor state, used by the low level ACPI suspend to
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 75248546403d..499b1c15cc8b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -52,6 +52,8 @@ do { \
} while (0)
#ifdef CONFIG_X86_32
+#include <asm/msr.h>
+
static inline void refresh_sysenter_cs(struct thread_struct *thread)
{
/* Only happens when SEP is enabled, no need to test "SEP"arately: */
@@ -59,7 +61,7 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
return;
this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+ wrmsrq(MSR_IA32_SYSENTER_CS, thread->sysenter_cs);
}
#endif
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index ab9e143ec9fe..5337f1be18f6 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -11,11 +11,11 @@
* JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
* Raise it if needed.
*/
-#define POKE_MAX_OPCODE_SIZE 5
+#define TEXT_POKE_MAX_OPCODE_SIZE 5
extern void text_poke_early(void *addr, const void *opcode, size_t len);
-extern void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len);
+extern void text_poke_apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len);
/*
* Clear and restore the kernel write-protection flag on the local CPU.
@@ -32,17 +32,17 @@ extern void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u
* an inconsistent instruction while you patch.
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
-extern void text_poke_sync(void);
+extern void smp_text_poke_sync_each_cpu(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
#define text_poke_copy text_poke_copy
extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok);
extern void *text_poke_set(void *addr, int c, size_t len);
-extern int poke_int3_handler(struct pt_regs *regs);
-extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
+extern int smp_text_poke_int3_handler(struct pt_regs *regs);
+extern void smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate);
-extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate);
-extern void text_poke_finish(void);
+extern void smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate);
+extern void smp_text_poke_batch_finish(void);
#define INT3_INSN_SIZE 1
#define INT3_INSN_OPCODE 0xCC
@@ -82,7 +82,7 @@ static __always_inline int text_opcode_size(u8 opcode)
}
union text_poke_insn {
- u8 text[POKE_MAX_OPCODE_SIZE];
+ u8 text[TEXT_POKE_MAX_OPCODE_SIZE];
struct {
u8 opcode;
s32 disp;
@@ -128,8 +128,8 @@ void *text_gen_insn(u8 opcode, const void *addr, const void *dest)
}
extern int after_bootmem;
-extern __ro_after_init struct mm_struct *poking_mm;
-extern __ro_after_init unsigned long poking_addr;
+extern __ro_after_init struct mm_struct *text_poke_mm;
+extern __ro_after_init unsigned long text_poke_mm_addr;
#ifndef CONFIG_UML_X86
static __always_inline
@@ -142,13 +142,14 @@ static __always_inline
void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
/*
- * The int3 handler in entry_64.S adds a gap between the
+ * The INT3 handler in entry_64.S adds a gap between the
* stack where the break point happened, and the saving of
* pt_regs. We can extend the original stack because of
- * this gap. See the idtentry macro's create_gap option.
+ * this gap. See the idtentry macro's X86_TRAP_BP logic.
*
- * Similarly entry_32.S will have a gap on the stack for (any) hardware
- * exception and pt_regs; see FIXUP_FRAME.
+ * Similarly, entry_32.S will have a gap on the stack for
+ * (any) hardware exception and pt_regs; see the
+ * FIXUP_FRAME macro.
*/
regs->sp -= sizeof(unsigned long);
*(unsigned long *)regs->sp = val;
diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h
deleted file mode 100644
index f0f9bcdb74d9..000000000000
--- a/arch/x86/include/asm/trace/common.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_TRACE_COMMON_H
-#define _ASM_TRACE_COMMON_H
-
-#ifdef CONFIG_TRACING
-DECLARE_STATIC_KEY_FALSE(trace_pagefault_key);
-#define trace_pagefault_enabled() \
- static_branch_unlikely(&trace_pagefault_key)
-#else
-static inline bool trace_pagefault_enabled(void) { return false; }
-#endif
-
-#endif
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 4645a6334063..0454d5e60e5d 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -74,11 +74,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_dropped,
TP_ARGS(fpu)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_copy_src,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 88e7f0f3bf62..7408bebdfde0 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -6,7 +6,6 @@
#define _TRACE_IRQ_VECTORS_H
#include <linux/tracepoint.h>
-#include <asm/trace/common.h>
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 94408a784c8e..4f7f09f50552 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -5,10 +5,65 @@
#ifndef _ASM_X86_TSC_H
#define _ASM_X86_TSC_H
+#include <asm/asm.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/msr.h>
+/**
+ * rdtsc() - returns the current TSC without ordering constraints
+ *
+ * rdtsc() returns the result of RDTSC as a 64-bit integer. The
+ * only ordering constraint it supplies is the ordering implied by
+ * "asm volatile": it will put the RDTSC in the place you expect. The
+ * CPU can and will speculatively execute that RDTSC, though, so the
+ * results can be non-monotonic if compared on different CPUs.
+ */
+static __always_inline u64 rdtsc(void)
+{
+ EAX_EDX_DECLARE_ARGS(val, low, high);
+
+ asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
+
+ return EAX_EDX_VAL(val, low, high);
+}
+
+/**
+ * rdtsc_ordered() - read the current TSC in program order
+ *
+ * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
+ * It is ordered like a load to a global in-memory counter. It should
+ * be impossible to observe non-monotonic rdtsc_unordered() behavior
+ * across multiple CPUs as long as the TSC is synced.
+ */
+static __always_inline u64 rdtsc_ordered(void)
+{
+ EAX_EDX_DECLARE_ARGS(val, low, high);
+
+ /*
+ * The RDTSC instruction is not ordered relative to memory
+ * access. The Intel SDM and the AMD APM are both vague on this
+ * point, but empirically an RDTSC instruction can be
+ * speculatively executed before prior loads. An RDTSC
+ * immediately after an appropriate barrier appears to be
+ * ordered as a normal load, that is, it provides the same
+ * ordering guarantees as reading from a global memory location
+ * that some other imaginary CPU is updating continuously with a
+ * time stamp.
+ *
+ * Thus, use the preferred barrier on the respective CPU, aiming for
+ * RDTSCP as the default.
+ */
+ asm volatile(ALTERNATIVE_2("rdtsc",
+ "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
+ "rdtscp", X86_FEATURE_RDTSCP)
+ : EAX_EDX_RET(val, low, high)
+ /* RDTSCP clobbers ECX with MSR_TSC_AUX. */
+ :: "ecx");
+
+ return EAX_EDX_VAL(val, low, high);
+}
+
/*
* Standard way to access the cycle counter.
*/
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index c52f0133425b..c8a5ae35c871 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -26,8 +26,8 @@ extern unsigned long USER_PTR_MAX;
*/
static inline unsigned long __untagged_addr(unsigned long addr)
{
- asm (ALTERNATIVE("",
- "and " __percpu_arg([mask]) ", %[addr]", X86_FEATURE_LAM)
+ asm_inline (ALTERNATIVE("", "and " __percpu_arg([mask]) ", %[addr]",
+ X86_FEATURE_LAM)
: [addr] "+r" (addr)
: [mask] "m" (__my_cpu_var(tlbstate_untag_mask)));
@@ -54,7 +54,7 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
#endif
#define valid_user_address(x) \
- ((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX))
+ likely((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX))
/*
* Masking the user address is an alternative to a conditional
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 80be0da733df..b7253ef3205a 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -27,17 +27,9 @@ struct vdso_image {
long sym_vdso32_rt_sigreturn_landing_pad;
};
-#ifdef CONFIG_X86_64
extern const struct vdso_image vdso_image_64;
-#endif
-
-#ifdef CONFIG_X86_X32_ABI
extern const struct vdso_image vdso_image_x32;
-#endif
-
-#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
extern const struct vdso_image vdso_image_32;
-#endif
extern int __init init_vdso_image(const struct vdso_image *image);
diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h
index c9b2ba7a9ec4..7000aeb59aa2 100644
--- a/arch/x86/include/asm/vdso/processor.h
+++ b/arch/x86/include/asm/vdso/processor.h
@@ -7,15 +7,15 @@
#ifndef __ASSEMBLER__
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static __always_inline void rep_nop(void)
+/* PAUSE is a good thing to insert into busy-wait loops. */
+static __always_inline void native_pause(void)
{
- asm volatile("rep; nop" ::: "memory");
+ asm volatile("pause" ::: "memory");
}
static __always_inline void cpu_relax(void)
{
- rep_nop();
+ native_pause();
}
struct getcpu_cache;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 213cf5379a5a..36698cc9fb44 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -292,6 +292,7 @@ struct x86_hyper_runtime {
* @set_wallclock: set time back to HW clock
* @is_untracked_pat_range exclude from PAT logic
* @nmi_init enable NMI on cpus
+ * @get_nmi_reason get the reason an NMI was received
* @save_sched_clock_state: save state for sched_clock() on suspend
* @restore_sched_clock_state: restore state for sched_clock() on resume
* @apic_post_init: adjust apic if needed
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index bd0fc69a10a7..c2fc7869b996 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -43,7 +43,7 @@ extern struct start_info *xen_start_info;
static inline uint32_t xen_cpuid_base(void)
{
- return hypervisor_cpuid_base(XEN_SIGNATURE, 2);
+ return cpuid_base_hypervisor(XEN_SIGNATURE, 2);
}
struct pci_dev;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 84cfa179802c..99a783fd4691 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -141,7 +141,6 @@ obj-$(CONFIG_OF) += devicetree.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
-obj-$(CONFIG_TRACING) += tracepoint.o
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
obj-$(CONFIG_X86_UMIP) += umip.o
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
index 77bfb846490c..7047124490f6 100644
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -49,7 +49,7 @@ int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
{
int err;
- err = rdmsrl_safe_on_cpu(cpunum, reg->address, val);
+ err = rdmsrq_safe_on_cpu(cpunum, reg->address, val);
if (!err) {
u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
reg->bit_offset);
@@ -65,7 +65,7 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
u64 rd_val;
int err;
- err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val);
+ err = rdmsrq_safe_on_cpu(cpunum, reg->address, &rd_val);
if (!err) {
u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
reg->bit_offset);
@@ -74,7 +74,7 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
val &= mask;
rd_val &= ~mask;
rd_val |= val;
- err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val);
+ err = wrmsrq_safe_on_cpu(cpunum, reg->address, rd_val);
}
return err;
}
@@ -147,7 +147,7 @@ int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf)
int ret;
if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &val);
+ ret = rdmsrq_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &val);
if (ret)
goto out;
@@ -272,7 +272,7 @@ int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
}
/* detect if running on heterogeneous design */
- if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) {
+ if (cpu_feature_enabled(X86_FEATURE_AMD_HTR_CORES)) {
switch (core_type) {
case TOPO_CPU_TYPE_UNKNOWN:
pr_warn("Undefined core type found for cpu %d\n", cpu);
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index d5ac34186555..8698d66563ed 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -14,7 +14,7 @@
#include <acpi/processor.h>
#include <asm/cpu_device_id.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/mwait.h>
#include <asm/special_insns.h>
#include <asm/smp.h>
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 6dfecb27b846..91fa262f0e30 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -16,6 +16,7 @@
#include <asm/cacheflush.h>
#include <asm/realmode.h>
#include <asm/hypervisor.h>
+#include <asm/msr.h>
#include <asm/smp.h>
#include <linux/ftrace.h>
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index bf82c6f7d690..ecfe7b497cad 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,36 +1,17 @@
// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) "SMP alternatives: " fmt
-#include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/mmu_context.h>
#include <linux/perf_event.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/stringify.h>
-#include <linux/highmem.h>
-#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/memory.h>
-#include <linux/stop_machine.h>
-#include <linux/slab.h>
-#include <linux/kdebug.h>
-#include <linux/kprobes.h>
-#include <linux/mmu_context.h>
-#include <linux/bsearch.h>
-#include <linux/sync_core.h>
+#include <linux/execmem.h>
+
#include <asm/text-patching.h>
-#include <asm/alternative.h>
-#include <asm/sections.h>
-#include <asm/mce.h>
-#include <asm/nmi.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
#include <asm/insn.h>
-#include <asm/io.h>
-#include <asm/fixmap.h>
-#include <asm/paravirt.h>
-#include <asm/asm-prototypes.h>
-#include <asm/cfi.h>
+#include <asm/ibt.h>
+#include <asm/set_memory.h>
+#include <asm/nmi.h>
int __read_mostly alternatives_patched;
@@ -124,6 +105,171 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
#endif
};
+#ifdef CONFIG_FINEIBT
+static bool cfi_paranoid __ro_after_init;
+#endif
+
+#ifdef CONFIG_MITIGATION_ITS
+
+#ifdef CONFIG_MODULES
+static struct module *its_mod;
+#endif
+static void *its_page;
+static unsigned int its_offset;
+
+/* Initialize a thunk with the "jmp *reg; int3" instructions. */
+static void *its_init_thunk(void *thunk, int reg)
+{
+ u8 *bytes = thunk;
+ int offset = 0;
+ int i = 0;
+
+#ifdef CONFIG_FINEIBT
+ if (cfi_paranoid) {
+ /*
+ * When ITS uses indirect branch thunk the fineibt_paranoid
+ * caller sequence doesn't fit in the caller site. So put the
+ * remaining part of the sequence (<ea> + JNE) into the ITS
+ * thunk.
+ */
+ bytes[i++] = 0xea; /* invalid instruction */
+ bytes[i++] = 0x75; /* JNE */
+ bytes[i++] = 0xfd;
+
+ offset = 1;
+ }
+#endif
+
+ if (reg >= 8) {
+ bytes[i++] = 0x41; /* REX.B prefix */
+ reg -= 8;
+ }
+ bytes[i++] = 0xff;
+ bytes[i++] = 0xe0 + reg; /* jmp *reg */
+ bytes[i++] = 0xcc;
+
+ return thunk + offset;
+}
+
+#ifdef CONFIG_MODULES
+void its_init_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ mutex_lock(&text_mutex);
+ its_mod = mod;
+ its_page = NULL;
+}
+
+void its_fini_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ WARN_ON_ONCE(its_mod != mod);
+
+ its_mod = NULL;
+ its_page = NULL;
+ mutex_unlock(&text_mutex);
+
+ for (int i = 0; i < mod->its_num_pages; i++) {
+ void *page = mod->its_page_array[i];
+ execmem_restore_rox(page, PAGE_SIZE);
+ }
+}
+
+void its_free_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ for (int i = 0; i < mod->its_num_pages; i++) {
+ void *page = mod->its_page_array[i];
+ execmem_free(page);
+ }
+ kfree(mod->its_page_array);
+}
+#endif /* CONFIG_MODULES */
+
+static void *its_alloc(void)
+{
+ void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE);
+
+ if (!page)
+ return NULL;
+
+#ifdef CONFIG_MODULES
+ if (its_mod) {
+ void *tmp = krealloc(its_mod->its_page_array,
+ (its_mod->its_num_pages+1) * sizeof(void *),
+ GFP_KERNEL);
+ if (!tmp)
+ return NULL;
+
+ its_mod->its_page_array = tmp;
+ its_mod->its_page_array[its_mod->its_num_pages++] = page;
+
+ execmem_make_temp_rw(page, PAGE_SIZE);
+ }
+#endif /* CONFIG_MODULES */
+
+ return no_free_ptr(page);
+}
+
+static void *its_allocate_thunk(int reg)
+{
+ int size = 3 + (reg / 8);
+ void *thunk;
+
+#ifdef CONFIG_FINEIBT
+ /*
+ * The ITS thunk contains an indirect jump and an int3 instruction so
+ * its size is 3 or 4 bytes depending on the register used. If CFI
+ * paranoid is used then 3 extra bytes are added in the ITS thunk to
+ * complete the fineibt_paranoid caller sequence.
+ */
+ if (cfi_paranoid)
+ size += 3;
+#endif
+
+ if (!its_page || (its_offset + size - 1) >= PAGE_SIZE) {
+ its_page = its_alloc();
+ if (!its_page) {
+ pr_err("ITS page allocation failed\n");
+ return NULL;
+ }
+ memset(its_page, INT3_INSN_OPCODE, PAGE_SIZE);
+ its_offset = 32;
+ }
+
+ /*
+ * If the indirect branch instruction will be in the lower half
+ * of a cacheline, then update the offset to reach the upper half.
+ */
+ if ((its_offset + size - 1) % 64 < 32)
+ its_offset = ((its_offset - 1) | 0x3F) + 33;
+
+ thunk = its_page + its_offset;
+ its_offset += size;
+
+ return its_init_thunk(thunk, reg);
+}
+
+u8 *its_static_thunk(int reg)
+{
+ u8 *thunk = __x86_indirect_its_thunk_array[reg];
+
+#ifdef CONFIG_FINEIBT
+ /* Paranoid thunk starts 2 bytes before */
+ if (cfi_paranoid)
+ return thunk - 2;
+#endif
+ return thunk;
+}
+
+#endif
+
/*
* Nomenclature for variable names to simplify and clarify this code and ease
* any potential staring at it:
@@ -171,13 +317,6 @@ static void add_nop(u8 *buf, unsigned int len)
*buf = INT3_INSN_OPCODE;
}
-extern s32 __retpoline_sites[], __retpoline_sites_end[];
-extern s32 __return_sites[], __return_sites_end[];
-extern s32 __cfi_sites[], __cfi_sites_end[];
-extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
-extern s32 __smp_locks[], __smp_locks_end[];
-void text_poke_early(void *addr, const void *opcode, size_t len);
-
/*
* Matches NOP and NOPL, not any of the other possible NOPs.
*/
@@ -369,7 +508,7 @@ static void __apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen,
}
}
-void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len)
+void text_poke_apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len)
{
__apply_relocation(buf, instr, instrlen, repl, repl_len);
optimize_nops(instr, buf, instrlen);
@@ -457,7 +596,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
DPRINTK(ALT, "alt table %px, -> %px", start, end);
/*
- * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using
+ * KASAN_SHADOW_START is defined using
* cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here.
* During the process, KASAN becomes confused seeing partial LA57
* conversion and triggers a false-positive out-of-bound report.
@@ -525,7 +664,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
insn_buff[insn_buff_sz] = 0x90;
- apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen);
+ text_poke_apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen);
DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr);
DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
@@ -581,7 +720,8 @@ static int emit_indirect(int op, int reg, u8 *bytes)
return i;
}
-static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes)
+static int __emit_trampoline(void *addr, struct insn *insn, u8 *bytes,
+ void *call_dest, void *jmp_dest)
{
u8 op = insn->opcode.bytes[0];
int i = 0;
@@ -602,7 +742,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8
switch (op) {
case CALL_INSN_OPCODE:
__text_gen_insn(bytes+i, op, addr+i,
- __x86_indirect_call_thunk_array[reg],
+ call_dest,
CALL_INSN_SIZE);
i += CALL_INSN_SIZE;
break;
@@ -610,7 +750,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8
case JMP32_INSN_OPCODE:
clang_jcc:
__text_gen_insn(bytes+i, op, addr+i,
- __x86_indirect_jump_thunk_array[reg],
+ jmp_dest,
JMP32_INSN_SIZE);
i += JMP32_INSN_SIZE;
break;
@@ -625,6 +765,48 @@ clang_jcc:
return i;
}
+static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes)
+{
+ return __emit_trampoline(addr, insn, bytes,
+ __x86_indirect_call_thunk_array[reg],
+ __x86_indirect_jump_thunk_array[reg]);
+}
+
+#ifdef CONFIG_MITIGATION_ITS
+static int emit_its_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes)
+{
+ u8 *thunk = __x86_indirect_its_thunk_array[reg];
+ u8 *tmp = its_allocate_thunk(reg);
+
+ if (tmp)
+ thunk = tmp;
+
+ return __emit_trampoline(addr, insn, bytes, thunk, thunk);
+}
+
+/* Check if an indirect branch is at ITS-unsafe address */
+static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return false;
+
+ /* Indirect branch opcode is 2 or 3 bytes depending on reg */
+ addr += 1 + reg / 8;
+
+ /* Lower-half of the cacheline? */
+ return !(addr & 0x20);
+}
+#else /* CONFIG_MITIGATION_ITS */
+
+#ifdef CONFIG_FINEIBT
+static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg)
+{
+ return false;
+}
+#endif
+
+#endif /* CONFIG_MITIGATION_ITS */
+
/*
* Rewrite the compiler generated retpoline thunk calls.
*
@@ -699,6 +881,15 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
bytes[i++] = 0xe8; /* LFENCE */
}
+#ifdef CONFIG_MITIGATION_ITS
+ /*
+ * Check if the address of last byte of emitted-indirect is in
+ * lower-half of the cacheline. Such branches need ITS mitigation.
+ */
+ if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + i, reg))
+ return emit_its_trampoline(addr, insn, reg, bytes);
+#endif
+
ret = emit_indirect(op, reg, bytes + i);
if (ret < 0)
return ret;
@@ -732,6 +923,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
int len, ret;
u8 bytes[16];
u8 op1, op2;
+ u8 *dest;
ret = insn_decode_kernel(&insn, addr);
if (WARN_ON_ONCE(ret < 0))
@@ -748,6 +940,12 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
case CALL_INSN_OPCODE:
case JMP32_INSN_OPCODE:
+ /* Check for cfi_paranoid + ITS */
+ dest = addr + insn.length + insn.immediate.value;
+ if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) {
+ WARN_ON_ONCE(cfi_mode != CFI_FINEIBT);
+ continue;
+ }
break;
case 0x0f: /* escape */
@@ -775,6 +973,21 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
#ifdef CONFIG_MITIGATION_RETHUNK
+bool cpu_wants_rethunk(void)
+{
+ return cpu_feature_enabled(X86_FEATURE_RETHUNK);
+}
+
+bool cpu_wants_rethunk_at(void *addr)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ return false;
+ if (x86_return_thunk != its_return_thunk)
+ return true;
+
+ return !((unsigned long)addr & 0x20);
+}
+
/*
* Rewrite the compiler generated return thunk tail-calls.
*
@@ -791,7 +1004,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
int i = 0;
/* Patch the custom return thunks... */
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ if (cpu_wants_rethunk_at(addr)) {
i = JMP32_INSN_SIZE;
__text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i);
} else {
@@ -808,7 +1021,7 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
{
s32 *s;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ if (cpu_wants_rethunk())
static_call_force_reinit();
for (s = start; s < end; s++) {
@@ -1022,8 +1235,6 @@ int cfi_get_func_arity(void *func)
static bool cfi_rand __ro_after_init = true;
static u32 cfi_seed __ro_after_init;
-static bool cfi_paranoid __ro_after_init = false;
-
/*
* Re-hash the CFI hash with a boot-time seed while making sure the result is
* not a valid ENDBR instruction.
@@ -1436,6 +1647,19 @@ static int cfi_rand_callers(s32 *start, s32 *end)
return 0;
}
+static int emit_paranoid_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes)
+{
+ u8 *thunk = (void *)__x86_indirect_its_thunk_array[reg] - 2;
+
+#ifdef CONFIG_MITIGATION_ITS
+ u8 *tmp = its_allocate_thunk(reg);
+ if (tmp)
+ thunk = tmp;
+#endif
+
+ return __emit_trampoline(addr, insn, bytes, thunk, thunk);
+}
+
static int cfi_rewrite_callers(s32 *start, s32 *end)
{
s32 *s;
@@ -1477,9 +1701,14 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
memcpy(bytes, fineibt_paranoid_start, fineibt_paranoid_size);
memcpy(bytes + fineibt_caller_hash, &hash, 4);
- ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
- if (WARN_ON_ONCE(ret != 3))
- continue;
+ if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + fineibt_paranoid_ind, 11)) {
+ emit_paranoid_trampoline(addr + fineibt_caller_size,
+ &insn, 11, bytes + fineibt_caller_size);
+ } else {
+ ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
+ if (WARN_ON_ONCE(ret != 3))
+ continue;
+ }
text_poke_early(addr, bytes, fineibt_paranoid_size);
}
@@ -1706,29 +1935,66 @@ Efault:
return false;
}
+static bool is_paranoid_thunk(unsigned long addr)
+{
+ u32 thunk;
+
+ __get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault);
+ return (thunk & 0x00FFFFFF) == 0xfd75ea;
+
+Efault:
+ return false;
+}
+
/*
* regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[]
- * sequence.
+ * sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS
+ * thunk.
*/
static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type)
{
unsigned long addr = regs->ip - fineibt_paranoid_ud;
- u32 hash;
- if (!cfi_paranoid || !is_cfi_trap(addr + fineibt_caller_size - LEN_UD2))
+ if (!cfi_paranoid)
return false;
- __get_kernel_nofault(&hash, addr + fineibt_caller_hash, u32, Efault);
- *target = regs->r11 + fineibt_preamble_size;
- *type = regs->r10;
+ if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) {
+ *target = regs->r11 + fineibt_preamble_size;
+ *type = regs->r10;
+
+ /*
+ * Since the trapping instruction is the exact, but LOCK prefixed,
+ * Jcc.d8 that got us here, the normal fixup will work.
+ */
+ return true;
+ }
/*
- * Since the trapping instruction is the exact, but LOCK prefixed,
- * Jcc.d8 that got us here, the normal fixup will work.
+ * The cfi_paranoid + ITS thunk combination results in:
+ *
+ * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
+ * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d
+ * a: 4d 8d 5b f0 lea -0x10(%r11), %r11
+ * e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11
+ *
+ * Where the paranoid_thunk looks like:
+ *
+ * 1d: <ea> (bad)
+ * __x86_indirect_paranoid_thunk_r11:
+ * 1e: 75 fd jne 1d
+ * __x86_indirect_its_thunk_r11:
+ * 20: 41 ff eb jmp *%r11
+ * 23: cc int3
+ *
*/
- return true;
+ if (is_paranoid_thunk(regs->ip)) {
+ *target = regs->r11 + fineibt_preamble_size;
+ *type = regs->r10;
+
+ regs->ip = *target;
+ return true;
+ }
-Efault:
return false;
}
@@ -2010,7 +2276,7 @@ __visible noinline void __init __alt_reloc_selftest(void *arg)
static noinline void __init alt_reloc_selftest(void)
{
/*
- * Tests apply_relocation().
+ * Tests text_poke_apply_relocation().
*
* This has a relative immediate (CALL) in a place other than the first
* instruction and additionally on x86_64 we get a RIP-relative LEA:
@@ -2031,6 +2297,8 @@ static noinline void __init alt_reloc_selftest(void)
void __init alternative_instructions(void)
{
+ u64 ibt;
+
int3_selftest();
/*
@@ -2057,6 +2325,9 @@ void __init alternative_instructions(void)
*/
paravirt_set_cap();
+ /* Keep CET-IBT disabled until caller/callee are patched */
+ ibt = ibt_save(/*disable*/ true);
+
__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
__cfi_sites, __cfi_sites_end, true);
@@ -2080,6 +2351,8 @@ void __init alternative_instructions(void)
*/
apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
+ ibt_restore(ibt);
+
#ifdef CONFIG_SMP
/* Patch to UP if other cpus not imminent. */
if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
@@ -2140,76 +2413,8 @@ void __init_or_module text_poke_early(void *addr, const void *opcode,
}
}
-typedef struct {
- struct mm_struct *mm;
-} temp_mm_state_t;
-
-/*
- * Using a temporary mm allows to set temporary mappings that are not accessible
- * by other CPUs. Such mappings are needed to perform sensitive memory writes
- * that override the kernel memory protections (e.g., W^X), without exposing the
- * temporary page-table mappings that are required for these write operations to
- * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the
- * mapping is torn down.
- *
- * Context: The temporary mm needs to be used exclusively by a single core. To
- * harden security IRQs must be disabled while the temporary mm is
- * loaded, thereby preventing interrupt handler bugs from overriding
- * the kernel memory protection.
- */
-static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
-{
- temp_mm_state_t temp_state;
-
- lockdep_assert_irqs_disabled();
-
- /*
- * Make sure not to be in TLB lazy mode, as otherwise we'll end up
- * with a stale address space WITHOUT being in lazy mode after
- * restoring the previous mm.
- */
- if (this_cpu_read(cpu_tlbstate_shared.is_lazy))
- leave_mm();
-
- temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
- switch_mm_irqs_off(NULL, mm, current);
-
- /*
- * If breakpoints are enabled, disable them while the temporary mm is
- * used. Userspace might set up watchpoints on addresses that are used
- * in the temporary mm, which would lead to wrong signals being sent or
- * crashes.
- *
- * Note that breakpoints are not disabled selectively, which also causes
- * kernel breakpoints (e.g., perf's) to be disabled. This might be
- * undesirable, but still seems reasonable as the code that runs in the
- * temporary mm should be short.
- */
- if (hw_breakpoint_active())
- hw_breakpoint_disable();
-
- return temp_state;
-}
-
-__ro_after_init struct mm_struct *poking_mm;
-__ro_after_init unsigned long poking_addr;
-
-static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
-{
- lockdep_assert_irqs_disabled();
-
- switch_mm_irqs_off(NULL, prev_state.mm, current);
-
- /* Clear the cpumask, to indicate no TLB flushing is needed anywhere */
- cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(poking_mm));
-
- /*
- * Restore the breakpoints if they were disabled before the temporary mm
- * was loaded.
- */
- if (hw_breakpoint_active())
- hw_breakpoint_restore();
-}
+__ro_after_init struct mm_struct *text_poke_mm;
+__ro_after_init unsigned long text_poke_mm_addr;
static void text_poke_memcpy(void *dst, const void *src, size_t len)
{
@@ -2229,7 +2434,7 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l
{
bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
struct page *pages[2] = {NULL};
- temp_mm_state_t prev;
+ struct mm_struct *prev_mm;
unsigned long flags;
pte_t pte, *ptep;
spinlock_t *ptl;
@@ -2266,7 +2471,7 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l
/*
* The lock is not really needed, but this allows to avoid open-coding.
*/
- ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
+ ptep = get_locked_pte(text_poke_mm, text_poke_mm_addr, &ptl);
/*
* This must not fail; preallocated in poking_init().
@@ -2276,21 +2481,21 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l
local_irq_save(flags);
pte = mk_pte(pages[0], pgprot);
- set_pte_at(poking_mm, poking_addr, ptep, pte);
+ set_pte_at(text_poke_mm, text_poke_mm_addr, ptep, pte);
if (cross_page_boundary) {
pte = mk_pte(pages[1], pgprot);
- set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
+ set_pte_at(text_poke_mm, text_poke_mm_addr + PAGE_SIZE, ptep + 1, pte);
}
/*
* Loading the temporary mm behaves as a compiler barrier, which
* guarantees that the PTE will be set at the time memcpy() is done.
*/
- prev = use_temporary_mm(poking_mm);
+ prev_mm = use_temporary_mm(text_poke_mm);
kasan_disable_current();
- func((u8 *)poking_addr + offset_in_page(addr), src, len);
+ func((u8 *)text_poke_mm_addr + offset_in_page(addr), src, len);
kasan_enable_current();
/*
@@ -2299,22 +2504,22 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l
*/
barrier();
- pte_clear(poking_mm, poking_addr, ptep);
+ pte_clear(text_poke_mm, text_poke_mm_addr, ptep);
if (cross_page_boundary)
- pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
+ pte_clear(text_poke_mm, text_poke_mm_addr + PAGE_SIZE, ptep + 1);
/*
* Loading the previous page-table hierarchy requires a serializing
* instruction that already allows the core to see the updated version.
* Xen-PV is assumed to serialize execution in a similar manner.
*/
- unuse_temporary_mm(prev);
+ unuse_temporary_mm(prev_mm);
/*
* Flushing the TLB might involve IPIs, which would require enabled
* IRQs, but not if the mm is not used, as it is in this point.
*/
- flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
+ flush_tlb_mm_range(text_poke_mm, text_poke_mm_addr, text_poke_mm_addr +
(cross_page_boundary ? 2 : 1) * PAGE_SIZE,
PAGE_SHIFT, false);
@@ -2450,7 +2655,7 @@ static void do_sync_core(void *info)
sync_core();
}
-void text_poke_sync(void)
+void smp_text_poke_sync_each_cpu(void)
{
on_each_cpu(do_sync_core, NULL, 1);
}
@@ -2460,64 +2665,66 @@ void text_poke_sync(void)
* this thing. When len == 6 everything is prefixed with 0x0f and we map
* opcode to Jcc.d8, using len to distinguish.
*/
-struct text_poke_loc {
+struct smp_text_poke_loc {
/* addr := _stext + rel_addr */
s32 rel_addr;
s32 disp;
u8 len;
u8 opcode;
- const u8 text[POKE_MAX_OPCODE_SIZE];
- /* see text_poke_bp_batch() */
+ const u8 text[TEXT_POKE_MAX_OPCODE_SIZE];
+ /* see smp_text_poke_batch_finish() */
u8 old;
};
-struct bp_patching_desc {
- struct text_poke_loc *vec;
+#define TEXT_POKE_ARRAY_MAX (PAGE_SIZE / sizeof(struct smp_text_poke_loc))
+
+static struct smp_text_poke_array {
+ struct smp_text_poke_loc vec[TEXT_POKE_ARRAY_MAX];
int nr_entries;
- atomic_t refs;
-};
+} text_poke_array;
-static struct bp_patching_desc bp_desc;
+static DEFINE_PER_CPU(atomic_t, text_poke_array_refs);
-static __always_inline
-struct bp_patching_desc *try_get_desc(void)
+/*
+ * These four __always_inline annotations imply noinstr, necessary
+ * due to smp_text_poke_int3_handler() being noinstr:
+ */
+
+static __always_inline bool try_get_text_poke_array(void)
{
- struct bp_patching_desc *desc = &bp_desc;
+ atomic_t *refs = this_cpu_ptr(&text_poke_array_refs);
- if (!raw_atomic_inc_not_zero(&desc->refs))
- return NULL;
+ if (!raw_atomic_inc_not_zero(refs))
+ return false;
- return desc;
+ return true;
}
-static __always_inline void put_desc(void)
+static __always_inline void put_text_poke_array(void)
{
- struct bp_patching_desc *desc = &bp_desc;
+ atomic_t *refs = this_cpu_ptr(&text_poke_array_refs);
smp_mb__before_atomic();
- raw_atomic_dec(&desc->refs);
+ raw_atomic_dec(refs);
}
-static __always_inline void *text_poke_addr(struct text_poke_loc *tp)
+static __always_inline void *text_poke_addr(const struct smp_text_poke_loc *tpl)
{
- return _stext + tp->rel_addr;
+ return _stext + tpl->rel_addr;
}
-static __always_inline int patch_cmp(const void *key, const void *elt)
+static __always_inline int patch_cmp(const void *tpl_a, const void *tpl_b)
{
- struct text_poke_loc *tp = (struct text_poke_loc *) elt;
-
- if (key < text_poke_addr(tp))
+ if (tpl_a < text_poke_addr(tpl_b))
return -1;
- if (key > text_poke_addr(tp))
+ if (tpl_a > text_poke_addr(tpl_b))
return 1;
return 0;
}
-noinstr int poke_int3_handler(struct pt_regs *regs)
+noinstr int smp_text_poke_int3_handler(struct pt_regs *regs)
{
- struct bp_patching_desc *desc;
- struct text_poke_loc *tp;
+ struct smp_text_poke_loc *tpl;
int ret = 0;
void *ip;
@@ -2526,41 +2733,40 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
/*
* Having observed our INT3 instruction, we now must observe
- * bp_desc with non-zero refcount:
+ * text_poke_array with non-zero refcount:
*
- * bp_desc.refs = 1 INT3
- * WMB RMB
- * write INT3 if (bp_desc.refs != 0)
+ * text_poke_array_refs = 1 INT3
+ * WMB RMB
+ * write INT3 if (text_poke_array_refs != 0)
*/
smp_rmb();
- desc = try_get_desc();
- if (!desc)
+ if (!try_get_text_poke_array())
return 0;
/*
- * Discount the INT3. See text_poke_bp_batch().
+ * Discount the INT3. See smp_text_poke_batch_finish().
*/
ip = (void *) regs->ip - INT3_INSN_SIZE;
/*
* Skip the binary search if there is a single member in the vector.
*/
- if (unlikely(desc->nr_entries > 1)) {
- tp = __inline_bsearch(ip, desc->vec, desc->nr_entries,
- sizeof(struct text_poke_loc),
+ if (unlikely(text_poke_array.nr_entries > 1)) {
+ tpl = __inline_bsearch(ip, text_poke_array.vec, text_poke_array.nr_entries,
+ sizeof(struct smp_text_poke_loc),
patch_cmp);
- if (!tp)
+ if (!tpl)
goto out_put;
} else {
- tp = desc->vec;
- if (text_poke_addr(tp) != ip)
+ tpl = text_poke_array.vec;
+ if (text_poke_addr(tpl) != ip)
goto out_put;
}
- ip += tp->len;
+ ip += tpl->len;
- switch (tp->opcode) {
+ switch (tpl->opcode) {
case INT3_INSN_OPCODE:
/*
* Someone poked an explicit INT3, they'll want to handle it,
@@ -2573,16 +2779,16 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
break;
case CALL_INSN_OPCODE:
- int3_emulate_call(regs, (long)ip + tp->disp);
+ int3_emulate_call(regs, (long)ip + tpl->disp);
break;
case JMP32_INSN_OPCODE:
case JMP8_INSN_OPCODE:
- int3_emulate_jmp(regs, (long)ip + tp->disp);
+ int3_emulate_jmp(regs, (long)ip + tpl->disp);
break;
case 0x70 ... 0x7f: /* Jcc */
- int3_emulate_jcc(regs, tp->opcode & 0xf, (long)ip, tp->disp);
+ int3_emulate_jcc(regs, tpl->opcode & 0xf, (long)ip, tpl->disp);
break;
default:
@@ -2592,51 +2798,50 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
ret = 1;
out_put:
- put_desc();
+ put_text_poke_array();
return ret;
}
-#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
-static struct text_poke_loc tp_vec[TP_VEC_MAX];
-static int tp_vec_nr;
-
/**
- * text_poke_bp_batch() -- update instructions on live kernel on SMP
- * @tp: vector of instructions to patch
- * @nr_entries: number of entries in the vector
+ * smp_text_poke_batch_finish() -- update instructions on live kernel on SMP
*
- * Modify multi-byte instruction by using int3 breakpoint on SMP.
- * We completely avoid stop_machine() here, and achieve the
- * synchronization using int3 breakpoint.
+ * Input state:
+ * text_poke_array.vec: vector of instructions to patch
+ * text_poke_array.nr_entries: number of entries in the vector
+ *
+ * Modify multi-byte instructions by using INT3 breakpoints on SMP.
+ * We completely avoid using stop_machine() here, and achieve the
+ * synchronization using INT3 breakpoints and SMP cross-calls.
*
* The way it is done:
* - For each entry in the vector:
- * - add a int3 trap to the address that will be patched
- * - sync cores
+ * - add an INT3 trap to the address that will be patched
+ * - SMP sync all CPUs
* - For each entry in the vector:
* - update all but the first byte of the patched range
- * - sync cores
+ * - SMP sync all CPUs
* - For each entry in the vector:
- * - replace the first byte (int3) by the first byte of
+ * - replace the first byte (INT3) by the first byte of the
* replacing opcode
- * - sync cores
+ * - SMP sync all CPUs
*/
-static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
+void smp_text_poke_batch_finish(void)
{
unsigned char int3 = INT3_INSN_OPCODE;
unsigned int i;
int do_sync;
- lockdep_assert_held(&text_mutex);
+ if (!text_poke_array.nr_entries)
+ return;
- bp_desc.vec = tp;
- bp_desc.nr_entries = nr_entries;
+ lockdep_assert_held(&text_mutex);
/*
- * Corresponds to the implicit memory barrier in try_get_desc() to
- * ensure reading a non-zero refcount provides up to date bp_desc data.
+ * Corresponds to the implicit memory barrier in try_get_text_poke_array() to
+ * ensure reading a non-zero refcount provides up to date text_poke_array data.
*/
- atomic_set_release(&bp_desc.refs, 1);
+ for_each_possible_cpu(i)
+ atomic_set_release(per_cpu_ptr(&text_poke_array_refs, i), 1);
/*
* Function tracing can enable thousands of places that need to be
@@ -2649,33 +2854,33 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
cond_resched();
/*
- * Corresponding read barrier in int3 notifier for making sure the
- * nr_entries and handler are correctly ordered wrt. patching.
+ * Corresponding read barrier in INT3 notifier for making sure the
+ * text_poke_array.nr_entries and handler are correctly ordered wrt. patching.
*/
smp_wmb();
/*
- * First step: add a int3 trap to the address that will be patched.
+ * First step: add a INT3 trap to the address that will be patched.
*/
- for (i = 0; i < nr_entries; i++) {
- tp[i].old = *(u8 *)text_poke_addr(&tp[i]);
- text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
+ for (i = 0; i < text_poke_array.nr_entries; i++) {
+ text_poke_array.vec[i].old = *(u8 *)text_poke_addr(&text_poke_array.vec[i]);
+ text_poke(text_poke_addr(&text_poke_array.vec[i]), &int3, INT3_INSN_SIZE);
}
- text_poke_sync();
+ smp_text_poke_sync_each_cpu();
/*
* Second step: update all but the first byte of the patched range.
*/
- for (do_sync = 0, i = 0; i < nr_entries; i++) {
- u8 old[POKE_MAX_OPCODE_SIZE+1] = { tp[i].old, };
- u8 _new[POKE_MAX_OPCODE_SIZE+1];
- const u8 *new = tp[i].text;
- int len = tp[i].len;
+ for (do_sync = 0, i = 0; i < text_poke_array.nr_entries; i++) {
+ u8 old[TEXT_POKE_MAX_OPCODE_SIZE+1] = { text_poke_array.vec[i].old, };
+ u8 _new[TEXT_POKE_MAX_OPCODE_SIZE+1];
+ const u8 *new = text_poke_array.vec[i].text;
+ int len = text_poke_array.vec[i].len;
if (len - INT3_INSN_SIZE > 0) {
memcpy(old + INT3_INSN_SIZE,
- text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
+ text_poke_addr(&text_poke_array.vec[i]) + INT3_INSN_SIZE,
len - INT3_INSN_SIZE);
if (len == 6) {
@@ -2684,7 +2889,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
new = _new;
}
- text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
+ text_poke(text_poke_addr(&text_poke_array.vec[i]) + INT3_INSN_SIZE,
new + INT3_INSN_SIZE,
len - INT3_INSN_SIZE);
@@ -2715,7 +2920,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
* The old instruction is recorded so that the event can be
* processed forwards or backwards.
*/
- perf_event_text_poke(text_poke_addr(&tp[i]), old, len, new, len);
+ perf_event_text_poke(text_poke_addr(&text_poke_array.vec[i]), old, len, new, len);
}
if (do_sync) {
@@ -2724,63 +2929,79 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
* not necessary and we'd be safe even without it. But
* better safe than sorry (plus there's not only Intel).
*/
- text_poke_sync();
+ smp_text_poke_sync_each_cpu();
}
/*
- * Third step: replace the first byte (int3) by the first byte of
+ * Third step: replace the first byte (INT3) by the first byte of the
* replacing opcode.
*/
- for (do_sync = 0, i = 0; i < nr_entries; i++) {
- u8 byte = tp[i].text[0];
+ for (do_sync = 0, i = 0; i < text_poke_array.nr_entries; i++) {
+ u8 byte = text_poke_array.vec[i].text[0];
- if (tp[i].len == 6)
+ if (text_poke_array.vec[i].len == 6)
byte = 0x0f;
if (byte == INT3_INSN_OPCODE)
continue;
- text_poke(text_poke_addr(&tp[i]), &byte, INT3_INSN_SIZE);
+ text_poke(text_poke_addr(&text_poke_array.vec[i]), &byte, INT3_INSN_SIZE);
do_sync++;
}
if (do_sync)
- text_poke_sync();
+ smp_text_poke_sync_each_cpu();
/*
* Remove and wait for refs to be zero.
+ *
+ * Notably, if after step-3 above the INT3 got removed, then the
+ * smp_text_poke_sync_each_cpu() will have serialized against any running INT3
+ * handlers and the below spin-wait will not happen.
+ *
+ * IOW. unless the replacement instruction is INT3, this case goes
+ * unused.
*/
- if (!atomic_dec_and_test(&bp_desc.refs))
- atomic_cond_read_acquire(&bp_desc.refs, !VAL);
+ for_each_possible_cpu(i) {
+ atomic_t *refs = per_cpu_ptr(&text_poke_array_refs, i);
+
+ if (unlikely(!atomic_dec_and_test(refs)))
+ atomic_cond_read_acquire(refs, !VAL);
+ }
+
+ /* They are all completed: */
+ text_poke_array.nr_entries = 0;
}
-static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
- const void *opcode, size_t len, const void *emulate)
+static void __smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate)
{
+ struct smp_text_poke_loc *tpl;
struct insn insn;
int ret, i = 0;
+ tpl = &text_poke_array.vec[text_poke_array.nr_entries++];
+
if (len == 6)
i = 1;
- memcpy((void *)tp->text, opcode+i, len-i);
+ memcpy((void *)tpl->text, opcode+i, len-i);
if (!emulate)
emulate = opcode;
ret = insn_decode_kernel(&insn, emulate);
BUG_ON(ret < 0);
- tp->rel_addr = addr - (void *)_stext;
- tp->len = len;
- tp->opcode = insn.opcode.bytes[0];
+ tpl->rel_addr = addr - (void *)_stext;
+ tpl->len = len;
+ tpl->opcode = insn.opcode.bytes[0];
if (is_jcc32(&insn)) {
/*
* Map Jcc.d32 onto Jcc.d8 and use len to distinguish.
*/
- tp->opcode = insn.opcode.bytes[1] - 0x10;
+ tpl->opcode = insn.opcode.bytes[1] - 0x10;
}
- switch (tp->opcode) {
+ switch (tpl->opcode) {
case RET_INSN_OPCODE:
case JMP32_INSN_OPCODE:
case JMP8_INSN_OPCODE:
@@ -2789,14 +3010,14 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
* next instruction can be padded with INT3.
*/
for (i = insn.length; i < len; i++)
- BUG_ON(tp->text[i] != INT3_INSN_OPCODE);
+ BUG_ON(tpl->text[i] != INT3_INSN_OPCODE);
break;
default:
BUG_ON(len != insn.length);
}
- switch (tp->opcode) {
+ switch (tpl->opcode) {
case INT3_INSN_OPCODE:
case RET_INSN_OPCODE:
break;
@@ -2805,21 +3026,21 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
case JMP32_INSN_OPCODE:
case JMP8_INSN_OPCODE:
case 0x70 ... 0x7f: /* Jcc */
- tp->disp = insn.immediate.value;
+ tpl->disp = insn.immediate.value;
break;
default: /* assume NOP */
switch (len) {
case 2: /* NOP2 -- emulate as JMP8+0 */
BUG_ON(memcmp(emulate, x86_nops[len], len));
- tp->opcode = JMP8_INSN_OPCODE;
- tp->disp = 0;
+ tpl->opcode = JMP8_INSN_OPCODE;
+ tpl->disp = 0;
break;
case 5: /* NOP5 -- emulate as JMP32+0 */
BUG_ON(memcmp(emulate, x86_nops[len], len));
- tp->opcode = JMP32_INSN_OPCODE;
- tp->disp = 0;
+ tpl->opcode = JMP32_INSN_OPCODE;
+ tpl->disp = 0;
break;
default: /* unknown instruction */
@@ -2830,51 +3051,50 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
}
/*
- * We hard rely on the tp_vec being ordered; ensure this is so by flushing
+ * We hard rely on the text_poke_array.vec being ordered; ensure this is so by flushing
* early if needed.
*/
-static bool tp_order_fail(void *addr)
+static bool text_poke_addr_ordered(void *addr)
{
- struct text_poke_loc *tp;
-
- if (!tp_vec_nr)
- return false;
-
- if (!addr) /* force */
- return true;
+ WARN_ON_ONCE(!addr);
- tp = &tp_vec[tp_vec_nr - 1];
- if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr)
+ if (!text_poke_array.nr_entries)
return true;
- return false;
-}
-
-static void text_poke_flush(void *addr)
-{
- if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) {
- text_poke_bp_batch(tp_vec, tp_vec_nr);
- tp_vec_nr = 0;
- }
-}
+ /*
+ * If the last current entry's address is higher than the
+ * new entry's address we'd like to add, then ordering
+ * is violated and we must first flush all pending patching
+ * requests:
+ */
+ if (text_poke_addr(text_poke_array.vec + text_poke_array.nr_entries-1) > addr)
+ return false;
-void text_poke_finish(void)
-{
- text_poke_flush(NULL);
+ return true;
}
-void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate)
+/**
+ * smp_text_poke_batch_add() -- update instruction on live kernel on SMP, batched
+ * @addr: address to patch
+ * @opcode: opcode of new instruction
+ * @len: length to copy
+ * @emulate: instruction to be emulated
+ *
+ * Add a new instruction to the current queue of to-be-patched instructions
+ * the kernel maintains. The patching request will not be executed immediately,
+ * but becomes part of an array of patching requests, optimized for batched
+ * execution. All pending patching requests will be executed on the next
+ * smp_text_poke_batch_finish() call.
+ */
+void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate)
{
- struct text_poke_loc *tp;
-
- text_poke_flush(addr);
-
- tp = &tp_vec[tp_vec_nr++];
- text_poke_loc_init(tp, addr, opcode, len, emulate);
+ if (text_poke_array.nr_entries == TEXT_POKE_ARRAY_MAX || !text_poke_addr_ordered(addr))
+ smp_text_poke_batch_finish();
+ __smp_text_poke_batch_add(addr, opcode, len, emulate);
}
/**
- * text_poke_bp() -- update instructions on live kernel on SMP
+ * smp_text_poke_single() -- update instruction on live kernel on SMP immediately
* @addr: address to patch
* @opcode: opcode of new instruction
* @len: length to copy
@@ -2882,12 +3102,11 @@ void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const voi
*
* Update a single instruction with the vector in the stack, avoiding
* dynamically allocated memory. This function should be used when it is
- * not possible to allocate memory.
+ * not possible to allocate memory for a vector. The single instruction
+ * is patched in immediately.
*/
-void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
+void __ref smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate)
{
- struct text_poke_loc tp;
-
- text_poke_loc_init(&tp, addr, opcode, len, emulate);
- text_poke_bp_batch(&tp, 1);
+ __smp_text_poke_batch_add(addr, opcode, len, emulate);
+ smp_text_poke_batch_finish();
}
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index c884deca839b..3485d419c2f5 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -39,7 +39,7 @@
#include <asm/gart.h>
#include <asm/set_memory.h>
#include <asm/dma.h>
-#include <asm/amd_nb.h>
+#include <asm/amd/nb.h>
#include <asm/x86_init.h>
static unsigned long iommu_bus_base; /* GART remapping area (physical) */
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 6d12a9b69432..c1acead6227a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -13,7 +13,9 @@
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/pci_ids.h>
-#include <asm/amd_nb.h>
+
+#include <asm/amd/nb.h>
+#include <asm/cpuid/api.h>
static u32 *flush_words;
@@ -91,10 +93,7 @@ static int amd_cache_northbridges(void)
if (amd_gart_present())
amd_northbridges.flags |= AMD_NB_GART;
- /*
- * Check for L3 cache presence.
- */
- if (!cpuid_edx(0x80000006))
+ if (!cpuid_amd_hygon_has_l3_cache())
return 0;
/*
@@ -151,7 +150,7 @@ struct resource *amd_get_mmconfig_range(struct resource *res)
/* Assume CPUs from Fam10h have mmconfig, although not all VMs do */
if (boot_cpu_data.x86 < 0x10 ||
- rdmsrl_safe(MSR_FAM10H_MMIO_CONF_BASE, &msr))
+ rdmsrq_safe(MSR_FAM10H_MMIO_CONF_BASE, &msr))
return NULL;
/* mmconfig is not enabled */
diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c
index b670fa85c61b..a40176b62eb5 100644
--- a/arch/x86/kernel/amd_node.c
+++ b/arch/x86/kernel/amd_node.c
@@ -9,7 +9,7 @@
*/
#include <linux/debugfs.h>
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
/*
* AMD Nodes are a physical collection of I/O devices within an SoC. There can be one
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 89c0c8a3fc7e..769321185a08 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -29,7 +29,7 @@
#include <asm/gart.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
-#include <asm/amd_nb.h>
+#include <asm/amd/nb.h>
#include <asm/x86_init.h>
#include <linux/crash_dump.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 62584a347931..d73ba5a7b623 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -59,6 +59,7 @@
#include <asm/time.h>
#include <asm/smp.h>
#include <asm/mce.h>
+#include <asm/msr.h>
#include <asm/tsc.h>
#include <asm/hypervisor.h>
#include <asm/cpu_device_id.h>
@@ -425,7 +426,7 @@ static int lapic_next_deadline(unsigned long delta,
weak_wrmsr_fence();
tsc = rdtsc();
- wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
+ wrmsrq(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
return 0;
}
@@ -449,7 +450,7 @@ static int lapic_timer_shutdown(struct clock_event_device *evt)
* the timer _and_ zero the counter registers:
*/
if (v & APIC_LVT_TIMER_TSCDEADLINE)
- wrmsrl(MSR_IA32_TSC_DEADLINE, 0);
+ wrmsrq(MSR_IA32_TSC_DEADLINE, 0);
else
apic_write(APIC_TMICT, 0);
@@ -1694,7 +1695,7 @@ static bool x2apic_hw_locked(void)
x86_arch_cap_msr = x86_read_arch_cap_msr();
if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
- rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
+ rdmsrq(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
return (msr & LEGACY_XAPIC_DISABLED);
}
return false;
@@ -1707,12 +1708,12 @@ static void __x2apic_disable(void)
if (!boot_cpu_has(X86_FEATURE_APIC))
return;
- rdmsrl(MSR_IA32_APICBASE, msr);
+ rdmsrq(MSR_IA32_APICBASE, msr);
if (!(msr & X2APIC_ENABLE))
return;
/* Disable xapic and x2apic first and then reenable xapic mode */
- wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
- wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
+ wrmsrq(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
+ wrmsrq(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
printk_once(KERN_INFO "x2apic disabled\n");
}
@@ -1720,10 +1721,10 @@ static void __x2apic_enable(void)
{
u64 msr;
- rdmsrl(MSR_IA32_APICBASE, msr);
+ rdmsrq(MSR_IA32_APICBASE, msr);
if (msr & X2APIC_ENABLE)
return;
- wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
+ wrmsrq(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
printk_once(KERN_INFO "x2apic enabled\n");
}
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 16410f087b7a..e272bc7fdc8e 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/pgtable.h>
+#include <asm/msr.h>
#include <asm/numachip/numachip.h>
#include <asm/numachip/numachip_csr.h>
@@ -31,7 +32,7 @@ static u32 numachip1_get_apic_id(u32 x)
unsigned int id = (x >> 24) & 0xff;
if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
- rdmsrl(MSR_FAM10H_NODE_ID, value);
+ rdmsrq(MSR_FAM10H_NODE_ID, value);
id |= (value << 2) & 0xff00;
}
@@ -42,7 +43,7 @@ static u32 numachip2_get_apic_id(u32 x)
{
u64 mcfg;
- rdmsrl(MSR_FAM10H_MMIO_CONF_BASE, mcfg);
+ rdmsrq(MSR_FAM10H_MMIO_CONF_BASE, mcfg);
return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24);
}
@@ -150,7 +151,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
/* Account for nodes per socket in multi-core-module processors */
if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
- rdmsrl(MSR_FAM10H_NODE_ID, val);
+ rdmsrq(MSR_FAM10H_NODE_ID, val);
nodes = ((val >> 3) & 7) + 1;
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index eebc360ed1bb..5ba2feb2c04c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1486,7 +1486,7 @@ static void __init delay_with_tsc(void)
* 1 GHz == 40 jiffies
*/
do {
- rep_nop();
+ native_pause();
now = rdtsc();
} while ((now - start) < 40000000000ULL / HZ && time_before_eq(jiffies, end));
}
@@ -2225,7 +2225,7 @@ static int mp_irqdomain_create(int ioapic)
/* Handle device tree enumerated APICs proper */
if (cfg->dev) {
- fn = of_node_to_fwnode(cfg->dev);
+ fn = of_fwnode_handle(cfg->dev);
} else {
fn = irq_domain_alloc_named_id_fwnode("IO-APIC", mpc_ioapic_id(ioapic));
if (!fn)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index fee42a73d64a..93069b13d3af 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -864,7 +864,7 @@ void lapic_offline(void)
__vector_cleanup(cl, false);
irq_matrix_offline(vector_matrix);
- WARN_ON_ONCE(try_to_del_timer_sync(&cl->timer) < 0);
+ WARN_ON_ONCE(timer_delete_sync_try(&cl->timer) < 0);
WARN_ON_ONCE(!hlist_empty(&cl->head));
unlock_vector_lock();
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index ad4ea6fb3b6c..6259b474073b 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -33,6 +33,14 @@
static void __used common(void)
{
+ OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
+ OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
+ OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
+ OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
+ OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
+ OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
+ OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
+
BLANK();
OFFSET(TASK_threadsp, task_struct, thread.sp);
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 2b411cd00a4e..e0a292db97b2 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -12,15 +12,6 @@ void foo(void);
void foo(void)
{
- OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
- OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
- OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
- OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
- OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
- OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
- OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
- BLANK();
-
OFFSET(PT_EBX, pt_regs, bx);
OFFSET(PT_ECX, pt_regs, cx);
OFFSET(PT_EDX, pt_regs, dx);
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index d86d7d6e750c..a951333c5995 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -185,7 +185,7 @@ static void *patch_dest(void *dest, bool direct)
u8 *pad = dest - tsize;
memcpy(insn_buff, skl_call_thunk_template, tsize);
- apply_relocation(insn_buff, pad, tsize, skl_call_thunk_template, tsize);
+ text_poke_apply_relocation(insn_buff, pad, tsize, skl_call_thunk_template, tsize);
/* Already patched? */
if (!bcmp(pad, insn_buff, tsize))
@@ -294,7 +294,7 @@ static bool is_callthunk(void *addr)
pad = (void *)(dest - tmpl_size);
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
- apply_relocation(insn_buff, pad, tmpl_size, skl_call_thunk_template, tmpl_size);
+ text_poke_apply_relocation(insn_buff, pad, tmpl_size, skl_call_thunk_template, tmpl_size);
return !bcmp(pad, insn_buff, tmpl_size);
}
@@ -312,7 +312,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
return 0;
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
- apply_relocation(insn_buff, ip, tmpl_size, skl_call_thunk_template, tmpl_size);
+ text_poke_apply_relocation(insn_buff, ip, tmpl_size, skl_call_thunk_template, tmpl_size);
memcpy(*pprog, insn_buff, tmpl_size);
*pprog += tmpl_size;
diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c
index 303bf74d175b..99444409c026 100644
--- a/arch/x86/kernel/cet.c
+++ b/arch/x86/kernel/cet.c
@@ -2,6 +2,7 @@
#include <linux/ptrace.h>
#include <asm/bugs.h>
+#include <asm/msr.h>
#include <asm/traps.h>
enum cp_error_code {
@@ -55,7 +56,7 @@ static void do_user_cp_fault(struct pt_regs *regs, unsigned long error_code)
* will be whatever is live in userspace. So read the SSP before enabling
* interrupts so locking the fpregs to do it later is not required.
*/
- rdmsrl(MSR_IA32_PL3_SSP, ssp);
+ rdmsrq(MSR_IA32_PL3_SSP, ssp);
cond_local_irq_enable(regs);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4efdf5c2efc8..1e26179ff18c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -24,7 +24,7 @@ obj-y += rdrand.o
obj-y += match.o
obj-y += bugs.o
obj-y += aperfmperf.o
-obj-y += cpuid-deps.o
+obj-y += cpuid-deps.o cpuid_0x2_table.o
obj-y += umwait.o
obj-y += capflags.o powerflags.o
@@ -38,6 +38,9 @@ obj-y += intel.o tsx.o
obj-$(CONFIG_PM) += intel_epb.o
endif
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
+ifeq ($(CONFIG_AMD_NB)$(CONFIG_SYSFS),yy)
+obj-y += amd_cache_disable.o
+endif
obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2b36379ff675..93da466dfe2c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -9,6 +9,7 @@
#include <linux/sched/clock.h>
#include <linux/random.h>
#include <linux/topology.h>
+#include <asm/amd/fch.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cacheinfo.h>
@@ -21,6 +22,7 @@
#include <asm/delay.h>
#include <asm/debugreg.h>
#include <asm/resctrl.h>
+#include <asm/msr.h>
#include <asm/sev.h>
#ifdef CONFIG_X86_64
@@ -31,7 +33,7 @@
u16 invlpgb_count_max __ro_after_init;
-static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+static inline int rdmsrq_amd_safe(unsigned msr, u64 *p)
{
u32 gprs[8] = { 0 };
int err;
@@ -49,7 +51,7 @@ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
return err;
}
-static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
+static inline int wrmsrq_amd_safe(unsigned msr, u64 val)
{
u32 gprs[8] = { 0 };
@@ -383,7 +385,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
(c->x86 == 0x10 && c->x86_model >= 0x2)) {
u64 val;
- rdmsrl(MSR_K7_HWCR, val);
+ rdmsrq(MSR_K7_HWCR, val);
if (!(val & BIT(24)))
pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
}
@@ -422,7 +424,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
* Try to cache the base value so further operations can
* avoid RMW. If that faults, do not enable SSBD.
*/
- if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+ if (!rdmsrq_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
setup_force_cpu_cap(X86_FEATURE_SSBD);
x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
@@ -472,6 +474,11 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
case 0x60 ... 0x7f:
setup_force_cpu_cap(X86_FEATURE_ZEN5);
break;
+ case 0x50 ... 0x5f:
+ case 0x90 ... 0xaf:
+ case 0xc0 ... 0xcf:
+ setup_force_cpu_cap(X86_FEATURE_ZEN6);
+ break;
default:
goto warn;
}
@@ -508,7 +515,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
*/
if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) {
/* Check if memory encryption is enabled */
- rdmsrl(MSR_AMD64_SYSCFG, msr);
+ rdmsrq(MSR_AMD64_SYSCFG, msr);
if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
goto clear_all;
@@ -525,7 +532,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
if (!sme_me_mask)
setup_clear_cpu_cap(X86_FEATURE_SME);
- rdmsrl(MSR_K7_HWCR, msr);
+ rdmsrq(MSR_K7_HWCR, msr);
if (!(msr & MSR_K7_HWCR_SMMLOCK))
goto clear_sev;
@@ -612,7 +619,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) {
if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB))
setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
- else if (c->x86 >= 0x19 && !wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) {
+ else if (c->x86 >= 0x19 && !wrmsrq_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) {
setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
setup_force_cpu_cap(X86_FEATURE_SBPB);
}
@@ -636,14 +643,14 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
*/
if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM) && !cpu_has(c, X86_FEATURE_HYPERVISOR)) {
clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
- if (!rdmsrl_amd_safe(0xc001100d, &value)) {
+ if (!rdmsrq_amd_safe(0xc001100d, &value)) {
value &= ~BIT_64(32);
- wrmsrl_amd_safe(0xc001100d, value);
+ wrmsrq_amd_safe(0xc001100d, value);
}
}
if (!c->x86_model_id[0])
- strcpy(c->x86_model_id, "Hammer");
+ strscpy(c->x86_model_id, "Hammer");
#ifdef CONFIG_SMP
/*
@@ -788,9 +795,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
* Disable it on the affected CPUs.
*/
if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
- if (!rdmsrl_safe(MSR_F15H_IC_CFG, &value) && !(value & 0x1E)) {
+ if (!rdmsrq_safe(MSR_F15H_IC_CFG, &value) && !(value & 0x1E)) {
value |= 0x1E;
- wrmsrl_safe(MSR_F15H_IC_CFG, value);
+ wrmsrq_safe(MSR_F15H_IC_CFG, value);
}
}
@@ -839,9 +846,9 @@ void init_spectral_chicken(struct cpuinfo_x86 *c)
* suppresses non-branch predictions.
*/
if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
- if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) {
+ if (!rdmsrq_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) {
value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT;
- wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
+ wrmsrq_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
}
}
#endif
@@ -1025,7 +1032,7 @@ static void init_amd(struct cpuinfo_x86 *c)
init_amd_cacheinfo(c);
if (cpu_has(c, X86_FEATURE_SVM)) {
- rdmsrl(MSR_VM_CR, vm_cr);
+ rdmsrq(MSR_VM_CR, vm_cr);
if (vm_cr & SVM_VM_CR_SVM_DIS_MASK) {
pr_notice_once("SVM disabled (by BIOS) in MSR_VM_CR\n");
clear_cpu_cap(c, X86_FEATURE_SVM);
@@ -1206,7 +1213,7 @@ void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr)
if (per_cpu(amd_dr_addr_mask, cpu)[dr] == mask)
return;
- wrmsr(amd_msr_dr_addr_masks[dr], mask, 0);
+ wrmsrq(amd_msr_dr_addr_masks[dr], mask);
per_cpu(amd_dr_addr_mask, cpu)[dr] = mask;
}
@@ -1237,3 +1244,56 @@ void amd_check_microcode(void)
if (cpu_feature_enabled(X86_FEATURE_ZEN2))
on_each_cpu(zenbleed_check_cpu, NULL, 1);
}
+
+static const char * const s5_reset_reason_txt[] = {
+ [0] = "thermal pin BP_THERMTRIP_L was tripped",
+ [1] = "power button was pressed for 4 seconds",
+ [2] = "shutdown pin was tripped",
+ [4] = "remote ASF power off command was received",
+ [9] = "internal CPU thermal limit was tripped",
+ [16] = "system reset pin BP_SYS_RST_L was tripped",
+ [17] = "software issued PCI reset",
+ [18] = "software wrote 0x4 to reset control register 0xCF9",
+ [19] = "software wrote 0x6 to reset control register 0xCF9",
+ [20] = "software wrote 0xE to reset control register 0xCF9",
+ [21] = "ACPI power state transition occurred",
+ [22] = "keyboard reset pin KB_RST_L was tripped",
+ [23] = "internal CPU shutdown event occurred",
+ [24] = "system failed to boot before failed boot timer expired",
+ [25] = "hardware watchdog timer expired",
+ [26] = "remote ASF reset command was received",
+ [27] = "an uncorrected error caused a data fabric sync flood event",
+ [29] = "FCH and MP1 failed warm reset handshake",
+ [30] = "a parity error occurred",
+ [31] = "a software sync flood event occurred",
+};
+
+static __init int print_s5_reset_status_mmio(void)
+{
+ unsigned long value;
+ void __iomem *addr;
+ int i;
+
+ if (!cpu_feature_enabled(X86_FEATURE_ZEN))
+ return 0;
+
+ addr = ioremap(FCH_PM_BASE + FCH_PM_S5_RESET_STATUS, sizeof(value));
+ if (!addr)
+ return 0;
+
+ value = ioread32(addr);
+ iounmap(addr);
+
+ for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) {
+ if (!(value & BIT(i)))
+ continue;
+
+ if (s5_reset_reason_txt[i]) {
+ pr_info("x86/amd: Previous system reset reason [0x%08lx]: %s\n",
+ value, s5_reset_reason_txt[i]);
+ }
+ }
+
+ return 0;
+}
+late_initcall(print_s5_reset_status_mmio);
diff --git a/arch/x86/kernel/cpu/amd_cache_disable.c b/arch/x86/kernel/cpu/amd_cache_disable.c
new file mode 100644
index 000000000000..8843b9557aea
--- /dev/null
+++ b/arch/x86/kernel/cpu/amd_cache_disable.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD L3 cache_disable_{0,1} sysfs handling
+ * Documentation/ABI/testing/sysfs-devices-system-cpu
+ */
+
+#include <linux/cacheinfo.h>
+#include <linux/capability.h>
+#include <linux/pci.h>
+#include <linux/sysfs.h>
+
+#include <asm/amd/nb.h>
+
+#include "cpu.h"
+
+/*
+ * L3 cache descriptors
+ */
+static void amd_calc_l3_indices(struct amd_northbridge *nb)
+{
+ struct amd_l3_cache *l3 = &nb->l3_cache;
+ unsigned int sc0, sc1, sc2, sc3;
+ u32 val = 0;
+
+ pci_read_config_dword(nb->misc, 0x1C4, &val);
+
+ /* calculate subcache sizes */
+ l3->subcaches[0] = sc0 = !(val & BIT(0));
+ l3->subcaches[1] = sc1 = !(val & BIT(4));
+
+ if (boot_cpu_data.x86 == 0x15) {
+ l3->subcaches[0] = sc0 += !(val & BIT(1));
+ l3->subcaches[1] = sc1 += !(val & BIT(5));
+ }
+
+ l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
+ l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
+
+ l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
+}
+
+/*
+ * check whether a slot used for disabling an L3 index is occupied.
+ * @l3: L3 cache descriptor
+ * @slot: slot number (0..1)
+ *
+ * @returns: the disabled index if used or negative value if slot free.
+ */
+static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned int slot)
+{
+ unsigned int reg = 0;
+
+ pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
+
+ /* check whether this slot is activated already */
+ if (reg & (3UL << 30))
+ return reg & 0xfff;
+
+ return -1;
+}
+
+static ssize_t show_cache_disable(struct cacheinfo *ci, char *buf, unsigned int slot)
+{
+ int index;
+ struct amd_northbridge *nb = ci->priv;
+
+ index = amd_get_l3_disable_slot(nb, slot);
+ if (index >= 0)
+ return sysfs_emit(buf, "%d\n", index);
+
+ return sysfs_emit(buf, "FREE\n");
+}
+
+#define SHOW_CACHE_DISABLE(slot) \
+static ssize_t \
+cache_disable_##slot##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct cacheinfo *ci = dev_get_drvdata(dev); \
+ return show_cache_disable(ci, buf, slot); \
+}
+
+SHOW_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
+
+static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
+ unsigned int slot, unsigned long idx)
+{
+ int i;
+
+ idx |= BIT(30);
+
+ /*
+ * disable index in all 4 subcaches
+ */
+ for (i = 0; i < 4; i++) {
+ u32 reg = idx | (i << 20);
+
+ if (!nb->l3_cache.subcaches[i])
+ continue;
+
+ pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
+
+ /*
+ * We need to WBINVD on a core on the node containing the L3
+ * cache which indices we disable therefore a simple wbinvd()
+ * is not sufficient.
+ */
+ wbinvd_on_cpu(cpu);
+
+ reg |= BIT(31);
+ pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
+ }
+}
+
+/*
+ * disable a L3 cache index by using a disable-slot
+ *
+ * @l3: L3 cache descriptor
+ * @cpu: A CPU on the node containing the L3 cache
+ * @slot: slot number (0..1)
+ * @index: index to disable
+ *
+ * @return: 0 on success, error status on failure
+ */
+static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
+ unsigned int slot, unsigned long index)
+{
+ int ret = 0;
+
+ /* check if @slot is already used or the index is already disabled */
+ ret = amd_get_l3_disable_slot(nb, slot);
+ if (ret >= 0)
+ return -EEXIST;
+
+ if (index > nb->l3_cache.indices)
+ return -EINVAL;
+
+ /* check whether the other slot has disabled the same index already */
+ if (index == amd_get_l3_disable_slot(nb, !slot))
+ return -EEXIST;
+
+ amd_l3_disable_index(nb, cpu, slot, index);
+
+ return 0;
+}
+
+static ssize_t store_cache_disable(struct cacheinfo *ci, const char *buf,
+ size_t count, unsigned int slot)
+{
+ struct amd_northbridge *nb = ci->priv;
+ unsigned long val = 0;
+ int cpu, err = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ cpu = cpumask_first(&ci->shared_cpu_map);
+
+ if (kstrtoul(buf, 10, &val) < 0)
+ return -EINVAL;
+
+ err = amd_set_l3_disable_slot(nb, cpu, slot, val);
+ if (err) {
+ if (err == -EEXIST)
+ pr_warn("L3 slot %d in use/index already disabled!\n",
+ slot);
+ return err;
+ }
+ return count;
+}
+
+#define STORE_CACHE_DISABLE(slot) \
+static ssize_t \
+cache_disable_##slot##_store(struct device *dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t count) \
+{ \
+ struct cacheinfo *ci = dev_get_drvdata(dev); \
+ return store_cache_disable(ci, buf, count, slot); \
+}
+
+STORE_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(1)
+
+static ssize_t subcaches_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cacheinfo *ci = dev_get_drvdata(dev);
+ int cpu = cpumask_first(&ci->shared_cpu_map);
+
+ return sysfs_emit(buf, "%x\n", amd_get_subcaches(cpu));
+}
+
+static ssize_t subcaches_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cacheinfo *ci = dev_get_drvdata(dev);
+ int cpu = cpumask_first(&ci->shared_cpu_map);
+ unsigned long val;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (kstrtoul(buf, 16, &val) < 0)
+ return -EINVAL;
+
+ if (amd_set_subcaches(cpu, val))
+ return -EINVAL;
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(cache_disable_0);
+static DEVICE_ATTR_RW(cache_disable_1);
+static DEVICE_ATTR_RW(subcaches);
+
+static umode_t cache_private_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int unused)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct cacheinfo *ci = dev_get_drvdata(dev);
+ umode_t mode = attr->mode;
+
+ if (!ci->priv)
+ return 0;
+
+ if ((attr == &dev_attr_subcaches.attr) &&
+ amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return mode;
+
+ if ((attr == &dev_attr_cache_disable_0.attr ||
+ attr == &dev_attr_cache_disable_1.attr) &&
+ amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+ return mode;
+
+ return 0;
+}
+
+static struct attribute_group cache_private_group = {
+ .is_visible = cache_private_attrs_is_visible,
+};
+
+static void init_amd_l3_attrs(void)
+{
+ static struct attribute **amd_l3_attrs;
+ int n = 1;
+
+ if (amd_l3_attrs) /* already initialized */
+ return;
+
+ if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+ n += 2;
+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ n += 1;
+
+ amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
+ if (!amd_l3_attrs)
+ return;
+
+ n = 0;
+ if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
+ amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
+ amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
+ }
+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
+
+ cache_private_group.attrs = amd_l3_attrs;
+}
+
+const struct attribute_group *cache_get_priv_group(struct cacheinfo *ci)
+{
+ struct amd_northbridge *nb = ci->priv;
+
+ if (ci->level < 3 || !nb)
+ return NULL;
+
+ if (nb && nb->l3_cache.indices)
+ init_amd_l3_attrs();
+
+ return &cache_private_group;
+}
+
+struct amd_northbridge *amd_init_l3_cache(int index)
+{
+ struct amd_northbridge *nb;
+ int node;
+
+ /* only for L3, and not in virtualized environments */
+ if (index < 3)
+ return NULL;
+
+ node = topology_amd_node_id(smp_processor_id());
+ nb = node_to_amd_nb(node);
+ if (nb && !nb->l3_cache.indices)
+ amd_calc_l3_indices(nb);
+
+ return nb;
+}
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 6cf31a1649c4..a315b0627dfb 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -20,6 +20,7 @@
#include <asm/cpu.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/msr.h>
#include "cpu.h"
@@ -40,8 +41,8 @@ static void init_counter_refs(void)
{
u64 aperf, mperf;
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
+ rdmsrq(MSR_IA32_APERF, aperf);
+ rdmsrq(MSR_IA32_MPERF, mperf);
this_cpu_write(cpu_samples.aperf, aperf);
this_cpu_write(cpu_samples.mperf, mperf);
@@ -99,7 +100,7 @@ static bool __init turbo_disabled(void)
u64 misc_en;
int err;
- err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
+ err = rdmsrq_safe(MSR_IA32_MISC_ENABLE, &misc_en);
if (err)
return false;
@@ -110,11 +111,11 @@ static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
{
int err;
- err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
+ err = rdmsrq_safe(MSR_ATOM_CORE_RATIOS, base_freq);
if (err)
return false;
- err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
+ err = rdmsrq_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
if (err)
return false;
@@ -152,13 +153,13 @@ static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
int err, i;
u64 msr;
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+ err = rdmsrq_safe(MSR_PLATFORM_INFO, base_freq);
if (err)
return false;
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
+ err = rdmsrq_safe(MSR_TURBO_RATIO_LIMIT, &msr);
if (err)
return false;
@@ -190,17 +191,17 @@ static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int s
u32 group_size;
int err, i;
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+ err = rdmsrq_safe(MSR_PLATFORM_INFO, base_freq);
if (err)
return false;
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
+ err = rdmsrq_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
if (err)
return false;
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
+ err = rdmsrq_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
if (err)
return false;
@@ -220,11 +221,11 @@ static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
u64 msr;
int err;
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+ err = rdmsrq_safe(MSR_PLATFORM_INFO, base_freq);
if (err)
return false;
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
+ err = rdmsrq_safe(MSR_TURBO_RATIO_LIMIT, &msr);
if (err)
return false;
@@ -474,8 +475,8 @@ void arch_scale_freq_tick(void)
if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
return;
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
+ rdmsrq(MSR_IA32_APERF, aperf);
+ rdmsrq(MSR_IA32_MPERF, mperf);
acnt = aperf - s->aperf;
mcnt = mperf - s->mperf;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 362602b705cc..7f94e6a5497d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -34,21 +34,66 @@
#include "cpu.h"
+/*
+ * Speculation Vulnerability Handling
+ *
+ * Each vulnerability is handled with the following functions:
+ * <vuln>_select_mitigation() -- Selects a mitigation to use. This should
+ * take into account all relevant command line
+ * options.
+ * <vuln>_update_mitigation() -- This is called after all vulnerabilities have
+ * selected a mitigation, in case the selection
+ * may want to change based on other choices
+ * made. This function is optional.
+ * <vuln>_apply_mitigation() -- Enable the selected mitigation.
+ *
+ * The compile-time mitigation in all cases should be AUTO. An explicit
+ * command-line option can override AUTO. If no such option is
+ * provided, <vuln>_select_mitigation() will override AUTO to the best
+ * mitigation option.
+ */
+
static void __init spectre_v1_select_mitigation(void);
+static void __init spectre_v1_apply_mitigation(void);
static void __init spectre_v2_select_mitigation(void);
+static void __init spectre_v2_update_mitigation(void);
+static void __init spectre_v2_apply_mitigation(void);
static void __init retbleed_select_mitigation(void);
+static void __init retbleed_update_mitigation(void);
+static void __init retbleed_apply_mitigation(void);
static void __init spectre_v2_user_select_mitigation(void);
+static void __init spectre_v2_user_update_mitigation(void);
+static void __init spectre_v2_user_apply_mitigation(void);
static void __init ssb_select_mitigation(void);
+static void __init ssb_apply_mitigation(void);
static void __init l1tf_select_mitigation(void);
+static void __init l1tf_apply_mitigation(void);
static void __init mds_select_mitigation(void);
-static void __init md_clear_update_mitigation(void);
-static void __init md_clear_select_mitigation(void);
+static void __init mds_update_mitigation(void);
+static void __init mds_apply_mitigation(void);
static void __init taa_select_mitigation(void);
+static void __init taa_update_mitigation(void);
+static void __init taa_apply_mitigation(void);
static void __init mmio_select_mitigation(void);
+static void __init mmio_update_mitigation(void);
+static void __init mmio_apply_mitigation(void);
+static void __init rfds_select_mitigation(void);
+static void __init rfds_update_mitigation(void);
+static void __init rfds_apply_mitigation(void);
static void __init srbds_select_mitigation(void);
+static void __init srbds_apply_mitigation(void);
static void __init l1d_flush_select_mitigation(void);
static void __init srso_select_mitigation(void);
+static void __init srso_update_mitigation(void);
+static void __init srso_apply_mitigation(void);
static void __init gds_select_mitigation(void);
+static void __init gds_apply_mitigation(void);
+static void __init bhi_select_mitigation(void);
+static void __init bhi_update_mitigation(void);
+static void __init bhi_apply_mitigation(void);
+static void __init its_select_mitigation(void);
+static void __init its_update_mitigation(void);
+static void __init its_apply_mitigation(void);
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
@@ -66,11 +111,19 @@ static DEFINE_MUTEX(spec_ctrl_mutex);
void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
+static void __init set_return_thunk(void *thunk)
+{
+ if (x86_return_thunk != __x86_return_thunk)
+ pr_warn("x86/bugs: return thunk changed\n");
+
+ x86_return_thunk = thunk;
+}
+
/* Update SPEC_CTRL MSR and its cached copy unconditionally */
static void update_spec_ctrl(u64 val)
{
this_cpu_write(x86_spec_ctrl_current, val);
- wrmsrl(MSR_IA32_SPEC_CTRL, val);
+ wrmsrq(MSR_IA32_SPEC_CTRL, val);
}
/*
@@ -89,7 +142,7 @@ void update_spec_ctrl_cond(u64 val)
* forced the update can be delayed until that time.
*/
if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
- wrmsrl(MSR_IA32_SPEC_CTRL, val);
+ wrmsrq(MSR_IA32_SPEC_CTRL, val);
}
noinstr u64 spec_ctrl_current(void)
@@ -127,9 +180,13 @@ EXPORT_SYMBOL_GPL(mds_idle_clear);
*/
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
-/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */
-DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear);
-EXPORT_SYMBOL_GPL(mmio_stale_data_clear);
+/*
+ * Controls CPU Fill buffer clear before VMenter. This is a subset of
+ * X86_FEATURE_CLEAR_CPU_BUF, and should only be enabled when KVM-only
+ * mitigation is required.
+ */
+DEFINE_STATIC_KEY_FALSE(cpu_buf_vm_clear);
+EXPORT_SYMBOL_GPL(cpu_buf_vm_clear);
void __init cpu_select_mitigations(void)
{
@@ -139,7 +196,7 @@ void __init cpu_select_mitigations(void)
* init code as it is not enumerated and depends on the family.
*/
if (cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) {
- rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ rdmsrq(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
/*
* Previously running kernel (kexec), may have some controls
@@ -154,30 +211,67 @@ void __init cpu_select_mitigations(void)
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
- /*
- * retbleed_select_mitigation() relies on the state set by
- * spectre_v2_select_mitigation(); specifically it wants to know about
- * spectre_v2=ibrs.
- */
retbleed_select_mitigation();
- /*
- * spectre_v2_user_select_mitigation() relies on the state set by
- * retbleed_select_mitigation(); specifically the STIBP selection is
- * forced for UNRET or IBPB.
- */
spectre_v2_user_select_mitigation();
ssb_select_mitigation();
l1tf_select_mitigation();
- md_clear_select_mitigation();
+ mds_select_mitigation();
+ taa_select_mitigation();
+ mmio_select_mitigation();
+ rfds_select_mitigation();
srbds_select_mitigation();
l1d_flush_select_mitigation();
+ srso_select_mitigation();
+ gds_select_mitigation();
+ its_select_mitigation();
+ bhi_select_mitigation();
/*
- * srso_select_mitigation() depends and must run after
- * retbleed_select_mitigation().
+ * After mitigations are selected, some may need to update their
+ * choices.
*/
- srso_select_mitigation();
- gds_select_mitigation();
+ spectre_v2_update_mitigation();
+ /*
+ * retbleed_update_mitigation() relies on the state set by
+ * spectre_v2_update_mitigation(); specifically it wants to know about
+ * spectre_v2=ibrs.
+ */
+ retbleed_update_mitigation();
+ /*
+ * its_update_mitigation() depends on spectre_v2_update_mitigation()
+ * and retbleed_update_mitigation().
+ */
+ its_update_mitigation();
+
+ /*
+ * spectre_v2_user_update_mitigation() depends on
+ * retbleed_update_mitigation(), specifically the STIBP
+ * selection is forced for UNRET or IBPB.
+ */
+ spectre_v2_user_update_mitigation();
+ mds_update_mitigation();
+ taa_update_mitigation();
+ mmio_update_mitigation();
+ rfds_update_mitigation();
+ bhi_update_mitigation();
+ /* srso_update_mitigation() depends on retbleed_update_mitigation(). */
+ srso_update_mitigation();
+
+ spectre_v1_apply_mitigation();
+ spectre_v2_apply_mitigation();
+ retbleed_apply_mitigation();
+ spectre_v2_user_apply_mitigation();
+ ssb_apply_mitigation();
+ l1tf_apply_mitigation();
+ mds_apply_mitigation();
+ taa_apply_mitigation();
+ mmio_apply_mitigation();
+ rfds_apply_mitigation();
+ srbds_apply_mitigation();
+ srso_apply_mitigation();
+ gds_apply_mitigation();
+ its_apply_mitigation();
+ bhi_apply_mitigation();
}
/*
@@ -227,9 +321,9 @@ static void x86_amd_ssb_disable(void)
u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
- wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
+ wrmsrq(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
- wrmsrl(MSR_AMD64_LS_CFG, msrval);
+ wrmsrq(MSR_AMD64_LS_CFG, msrval);
}
#undef pr_fmt
@@ -280,6 +374,12 @@ enum rfds_mitigations {
static enum rfds_mitigations rfds_mitigation __ro_after_init =
IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_AUTO : RFDS_MITIGATION_OFF;
+/*
+ * Set if any of MDS/TAA/MMIO/RFDS are going to enable VERW clearing
+ * through X86_FEATURE_CLEAR_CPU_BUF on kernel and guest entry.
+ */
+static bool verw_clear_cpu_buf_mitigation_selected __ro_after_init;
+
static void __init mds_select_mitigation(void)
{
if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) {
@@ -290,12 +390,34 @@ static void __init mds_select_mitigation(void)
if (mds_mitigation == MDS_MITIGATION_AUTO)
mds_mitigation = MDS_MITIGATION_FULL;
+ if (mds_mitigation == MDS_MITIGATION_OFF)
+ return;
+
+ verw_clear_cpu_buf_mitigation_selected = true;
+}
+
+static void __init mds_update_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off())
+ return;
+
+ /* If TAA, MMIO, or RFDS are being mitigated, MDS gets mitigated too. */
+ if (verw_clear_cpu_buf_mitigation_selected)
+ mds_mitigation = MDS_MITIGATION_FULL;
+
if (mds_mitigation == MDS_MITIGATION_FULL) {
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
mds_mitigation = MDS_MITIGATION_VMWERV;
+ }
- setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ pr_info("%s\n", mds_strings[mds_mitigation]);
+}
+static void __init mds_apply_mitigation(void)
+{
+ if (mds_mitigation == MDS_MITIGATION_FULL ||
+ mds_mitigation == MDS_MITIGATION_VMWERV) {
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
(mds_nosmt || cpu_mitigations_auto_nosmt()))
cpu_smt_disable(false);
@@ -335,6 +457,11 @@ static const char * const taa_strings[] = {
[TAA_MITIGATION_TSX_DISABLED] = "Mitigation: TSX disabled",
};
+static bool __init taa_vulnerable(void)
+{
+ return boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM);
+}
+
static void __init taa_select_mitigation(void)
{
if (!boot_cpu_has_bug(X86_BUG_TAA)) {
@@ -348,48 +475,63 @@ static void __init taa_select_mitigation(void)
return;
}
- if (cpu_mitigations_off()) {
+ if (cpu_mitigations_off())
taa_mitigation = TAA_MITIGATION_OFF;
- return;
- }
- /*
- * TAA mitigation via VERW is turned off if both
- * tsx_async_abort=off and mds=off are specified.
- */
- if (taa_mitigation == TAA_MITIGATION_OFF &&
- mds_mitigation == MDS_MITIGATION_OFF)
+ /* Microcode will be checked in taa_update_mitigation(). */
+ if (taa_mitigation == TAA_MITIGATION_AUTO)
+ taa_mitigation = TAA_MITIGATION_VERW;
+
+ if (taa_mitigation != TAA_MITIGATION_OFF)
+ verw_clear_cpu_buf_mitigation_selected = true;
+}
+
+static void __init taa_update_mitigation(void)
+{
+ if (!taa_vulnerable() || cpu_mitigations_off())
return;
- if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
+ if (verw_clear_cpu_buf_mitigation_selected)
taa_mitigation = TAA_MITIGATION_VERW;
- else
- taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
- /*
- * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1.
- * A microcode update fixes this behavior to clear CPU buffers. It also
- * adds support for MSR_IA32_TSX_CTRL which is enumerated by the
- * ARCH_CAP_TSX_CTRL_MSR bit.
- *
- * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
- * update is required.
- */
- if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) &&
- !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))
- taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+ if (taa_mitigation == TAA_MITIGATION_VERW) {
+ /* Check if the requisite ucode is available. */
+ if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
+ taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
- /*
- * TSX is enabled, select alternate mitigation for TAA which is
- * the same as MDS. Enable MDS static branch to clear CPU buffers.
- *
- * For guests that can't determine whether the correct microcode is
- * present on host, enable the mitigation for UCODE_NEEDED as well.
- */
- setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ /*
+ * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1.
+ * A microcode update fixes this behavior to clear CPU buffers. It also
+ * adds support for MSR_IA32_TSX_CTRL which is enumerated by the
+ * ARCH_CAP_TSX_CTRL_MSR bit.
+ *
+ * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
+ * update is required.
+ */
+ if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))
+ taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+ }
- if (taa_nosmt || cpu_mitigations_auto_nosmt())
- cpu_smt_disable(false);
+ pr_info("%s\n", taa_strings[taa_mitigation]);
+}
+
+static void __init taa_apply_mitigation(void)
+{
+ if (taa_mitigation == TAA_MITIGATION_VERW ||
+ taa_mitigation == TAA_MITIGATION_UCODE_NEEDED) {
+ /*
+ * TSX is enabled, select alternate mitigation for TAA which is
+ * the same as MDS. Enable MDS static branch to clear CPU buffers.
+ *
+ * For guests that can't determine whether the correct microcode is
+ * present on host, enable the mitigation for UCODE_NEEDED as well.
+ */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+
+ if (taa_nosmt || cpu_mitigations_auto_nosmt())
+ cpu_smt_disable(false);
+ }
}
static int __init tsx_async_abort_parse_cmdline(char *str)
@@ -427,31 +569,67 @@ static const char * const mmio_strings[] = {
static void __init mmio_select_mitigation(void)
{
if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
- boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
cpu_mitigations_off()) {
mmio_mitigation = MMIO_MITIGATION_OFF;
return;
}
+ /* Microcode will be checked in mmio_update_mitigation(). */
+ if (mmio_mitigation == MMIO_MITIGATION_AUTO)
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+
if (mmio_mitigation == MMIO_MITIGATION_OFF)
return;
/*
* Enable CPU buffer clear mitigation for host and VMM, if also affected
- * by MDS or TAA. Otherwise, enable mitigation for VMM only.
+ * by MDS or TAA.
*/
- if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
- boot_cpu_has(X86_FEATURE_RTM)))
- setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ if (boot_cpu_has_bug(X86_BUG_MDS) || taa_vulnerable())
+ verw_clear_cpu_buf_mitigation_selected = true;
+}
+
+static void __init mmio_update_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || cpu_mitigations_off())
+ return;
+
+ if (verw_clear_cpu_buf_mitigation_selected)
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+
+ if (mmio_mitigation == MMIO_MITIGATION_VERW) {
+ /*
+ * Check if the system has the right microcode.
+ *
+ * CPU Fill buffer clear mitigation is enumerated by either an explicit
+ * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
+ * affected systems.
+ */
+ if (!((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
+ (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
+ boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))))
+ mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
+ }
+
+ pr_info("%s\n", mmio_strings[mmio_mitigation]);
+}
+
+static void __init mmio_apply_mitigation(void)
+{
+ if (mmio_mitigation == MMIO_MITIGATION_OFF)
+ return;
/*
- * X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based
- * mitigations, disable KVM-only mitigation in that case.
+ * Only enable the VMM mitigation if the CPU buffer clear mitigation is
+ * not being used.
*/
- if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
- static_branch_disable(&mmio_stale_data_clear);
- else
- static_branch_enable(&mmio_stale_data_clear);
+ if (verw_clear_cpu_buf_mitigation_selected) {
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ static_branch_disable(&cpu_buf_vm_clear);
+ } else {
+ static_branch_enable(&cpu_buf_vm_clear);
+ }
/*
* If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can
@@ -461,21 +639,6 @@ static void __init mmio_select_mitigation(void)
if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
static_branch_enable(&mds_idle_clear);
- /*
- * Check if the system has the right microcode.
- *
- * CPU Fill buffer clear mitigation is enumerated by either an explicit
- * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
- * affected systems.
- */
- if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
- (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
- boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
- !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)))
- mmio_mitigation = MMIO_MITIGATION_VERW;
- else
- mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
-
if (mmio_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
}
@@ -510,22 +673,48 @@ static const char * const rfds_strings[] = {
[RFDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
};
+static inline bool __init verw_clears_cpu_reg_file(void)
+{
+ return (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR);
+}
+
static void __init rfds_select_mitigation(void)
{
if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) {
rfds_mitigation = RFDS_MITIGATION_OFF;
return;
}
+
+ if (rfds_mitigation == RFDS_MITIGATION_AUTO)
+ rfds_mitigation = RFDS_MITIGATION_VERW;
+
if (rfds_mitigation == RFDS_MITIGATION_OFF)
return;
- if (rfds_mitigation == RFDS_MITIGATION_AUTO)
+ if (verw_clears_cpu_reg_file())
+ verw_clear_cpu_buf_mitigation_selected = true;
+}
+
+static void __init rfds_update_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off())
+ return;
+
+ if (verw_clear_cpu_buf_mitigation_selected)
rfds_mitigation = RFDS_MITIGATION_VERW;
- if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
+ if (rfds_mitigation == RFDS_MITIGATION_VERW) {
+ if (!verw_clears_cpu_reg_file())
+ rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
+ }
+
+ pr_info("%s\n", rfds_strings[rfds_mitigation]);
+}
+
+static void __init rfds_apply_mitigation(void)
+{
+ if (rfds_mitigation == RFDS_MITIGATION_VERW)
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
- else
- rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
}
static __init int rfds_parse_cmdline(char *str)
@@ -546,76 +735,11 @@ static __init int rfds_parse_cmdline(char *str)
early_param("reg_file_data_sampling", rfds_parse_cmdline);
#undef pr_fmt
-#define pr_fmt(fmt) "" fmt
-
-static void __init md_clear_update_mitigation(void)
-{
- if (cpu_mitigations_off())
- return;
-
- if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
- goto out;
-
- /*
- * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
- * Stale Data mitigation, if necessary.
- */
- if (mds_mitigation == MDS_MITIGATION_OFF &&
- boot_cpu_has_bug(X86_BUG_MDS)) {
- mds_mitigation = MDS_MITIGATION_FULL;
- mds_select_mitigation();
- }
- if (taa_mitigation == TAA_MITIGATION_OFF &&
- boot_cpu_has_bug(X86_BUG_TAA)) {
- taa_mitigation = TAA_MITIGATION_VERW;
- taa_select_mitigation();
- }
- /*
- * MMIO_MITIGATION_OFF is not checked here so that mmio_stale_data_clear
- * gets updated correctly as per X86_FEATURE_CLEAR_CPU_BUF state.
- */
- if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
- mmio_mitigation = MMIO_MITIGATION_VERW;
- mmio_select_mitigation();
- }
- if (rfds_mitigation == RFDS_MITIGATION_OFF &&
- boot_cpu_has_bug(X86_BUG_RFDS)) {
- rfds_mitigation = RFDS_MITIGATION_VERW;
- rfds_select_mitigation();
- }
-out:
- if (boot_cpu_has_bug(X86_BUG_MDS))
- pr_info("MDS: %s\n", mds_strings[mds_mitigation]);
- if (boot_cpu_has_bug(X86_BUG_TAA))
- pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
- if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
- pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
- else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
- pr_info("MMIO Stale Data: Unknown: No mitigations\n");
- if (boot_cpu_has_bug(X86_BUG_RFDS))
- pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]);
-}
-
-static void __init md_clear_select_mitigation(void)
-{
- mds_select_mitigation();
- taa_select_mitigation();
- mmio_select_mitigation();
- rfds_select_mitigation();
-
- /*
- * As these mitigations are inter-related and rely on VERW instruction
- * to clear the microarchitural buffers, update and print their status
- * after mitigation selection is done for each of these vulnerabilities.
- */
- md_clear_update_mitigation();
-}
-
-#undef pr_fmt
#define pr_fmt(fmt) "SRBDS: " fmt
enum srbds_mitigations {
SRBDS_MITIGATION_OFF,
+ SRBDS_MITIGATION_AUTO,
SRBDS_MITIGATION_UCODE_NEEDED,
SRBDS_MITIGATION_FULL,
SRBDS_MITIGATION_TSX_OFF,
@@ -623,7 +747,7 @@ enum srbds_mitigations {
};
static enum srbds_mitigations srbds_mitigation __ro_after_init =
- IS_ENABLED(CONFIG_MITIGATION_SRBDS) ? SRBDS_MITIGATION_FULL : SRBDS_MITIGATION_OFF;
+ IS_ENABLED(CONFIG_MITIGATION_SRBDS) ? SRBDS_MITIGATION_AUTO : SRBDS_MITIGATION_OFF;
static const char * const srbds_strings[] = {
[SRBDS_MITIGATION_OFF] = "Vulnerable",
@@ -655,7 +779,7 @@ void update_srbds_msr(void)
if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL))
return;
- rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ rdmsrq(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
switch (srbds_mitigation) {
case SRBDS_MITIGATION_OFF:
@@ -669,13 +793,18 @@ void update_srbds_msr(void)
break;
}
- wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ wrmsrq(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
}
static void __init srbds_select_mitigation(void)
{
- if (!boot_cpu_has_bug(X86_BUG_SRBDS))
+ if (!boot_cpu_has_bug(X86_BUG_SRBDS) || cpu_mitigations_off()) {
+ srbds_mitigation = SRBDS_MITIGATION_OFF;
return;
+ }
+
+ if (srbds_mitigation == SRBDS_MITIGATION_AUTO)
+ srbds_mitigation = SRBDS_MITIGATION_FULL;
/*
* Check to see if this is one of the MDS_NO systems supporting TSX that
@@ -689,13 +818,17 @@ static void __init srbds_select_mitigation(void)
srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR;
else if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL))
srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED;
- else if (cpu_mitigations_off() || srbds_off)
+ else if (srbds_off)
srbds_mitigation = SRBDS_MITIGATION_OFF;
- update_srbds_msr();
pr_info("%s\n", srbds_strings[srbds_mitigation]);
}
+static void __init srbds_apply_mitigation(void)
+{
+ update_srbds_msr();
+}
+
static int __init srbds_parse_cmdline(char *str)
{
if (!str)
@@ -742,6 +875,7 @@ early_param("l1d_flush", l1d_flush_parse_cmdline);
enum gds_mitigations {
GDS_MITIGATION_OFF,
+ GDS_MITIGATION_AUTO,
GDS_MITIGATION_UCODE_NEEDED,
GDS_MITIGATION_FORCE,
GDS_MITIGATION_FULL,
@@ -750,7 +884,7 @@ enum gds_mitigations {
};
static enum gds_mitigations gds_mitigation __ro_after_init =
- IS_ENABLED(CONFIG_MITIGATION_GDS) ? GDS_MITIGATION_FULL : GDS_MITIGATION_OFF;
+ IS_ENABLED(CONFIG_MITIGATION_GDS) ? GDS_MITIGATION_AUTO : GDS_MITIGATION_OFF;
static const char * const gds_strings[] = {
[GDS_MITIGATION_OFF] = "Vulnerable",
@@ -775,7 +909,7 @@ void update_gds_msr(void)
switch (gds_mitigation) {
case GDS_MITIGATION_OFF:
- rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ rdmsrq(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
mcu_ctrl |= GDS_MITG_DIS;
break;
case GDS_MITIGATION_FULL_LOCKED:
@@ -785,23 +919,24 @@ void update_gds_msr(void)
* CPUs.
*/
case GDS_MITIGATION_FULL:
- rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ rdmsrq(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
mcu_ctrl &= ~GDS_MITG_DIS;
break;
case GDS_MITIGATION_FORCE:
case GDS_MITIGATION_UCODE_NEEDED:
case GDS_MITIGATION_HYPERVISOR:
+ case GDS_MITIGATION_AUTO:
return;
}
- wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ wrmsrq(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
/*
* Check to make sure that the WRMSR value was not ignored. Writes to
* GDS_MITG_DIS will be ignored if this processor is locked but the boot
* processor was not.
*/
- rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl_after);
+ rdmsrq(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl_after);
WARN_ON_ONCE(mcu_ctrl != mcu_ctrl_after);
}
@@ -814,33 +949,28 @@ static void __init gds_select_mitigation(void)
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
gds_mitigation = GDS_MITIGATION_HYPERVISOR;
- goto out;
+ return;
}
if (cpu_mitigations_off())
gds_mitigation = GDS_MITIGATION_OFF;
/* Will verify below that mitigation _can_ be disabled */
+ if (gds_mitigation == GDS_MITIGATION_AUTO)
+ gds_mitigation = GDS_MITIGATION_FULL;
+
/* No microcode */
if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) {
- if (gds_mitigation == GDS_MITIGATION_FORCE) {
- /*
- * This only needs to be done on the boot CPU so do it
- * here rather than in update_gds_msr()
- */
- setup_clear_cpu_cap(X86_FEATURE_AVX);
- pr_warn("Microcode update needed! Disabling AVX as mitigation.\n");
- } else {
+ if (gds_mitigation != GDS_MITIGATION_FORCE)
gds_mitigation = GDS_MITIGATION_UCODE_NEEDED;
- }
- goto out;
+ return;
}
/* Microcode has mitigation, use it */
if (gds_mitigation == GDS_MITIGATION_FORCE)
gds_mitigation = GDS_MITIGATION_FULL;
- rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ rdmsrq(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
if (mcu_ctrl & GDS_MITG_LOCKED) {
if (gds_mitigation == GDS_MITIGATION_OFF)
pr_warn("Mitigation locked. Disable failed.\n");
@@ -854,9 +984,25 @@ static void __init gds_select_mitigation(void)
*/
gds_mitigation = GDS_MITIGATION_FULL_LOCKED;
}
+}
+
+static void __init gds_apply_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_GDS))
+ return;
+
+ /* Microcode is present */
+ if (x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)
+ update_gds_msr();
+ else if (gds_mitigation == GDS_MITIGATION_FORCE) {
+ /*
+ * This only needs to be done on the boot CPU so do it
+ * here rather than in update_gds_msr()
+ */
+ setup_clear_cpu_cap(X86_FEATURE_AVX);
+ pr_warn("Microcode update needed! Disabling AVX as mitigation.\n");
+ }
- update_gds_msr();
-out:
pr_info("%s\n", gds_strings[gds_mitigation]);
}
@@ -917,10 +1063,14 @@ static bool smap_works_speculatively(void)
static void __init spectre_v1_select_mitigation(void)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) {
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off())
spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
+}
+
+static void __init spectre_v1_apply_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off())
return;
- }
if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) {
/*
@@ -973,8 +1123,20 @@ enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE;
#undef pr_fmt
#define pr_fmt(fmt) "RETBleed: " fmt
+enum its_mitigation {
+ ITS_MITIGATION_OFF,
+ ITS_MITIGATION_AUTO,
+ ITS_MITIGATION_VMEXIT_ONLY,
+ ITS_MITIGATION_ALIGNED_THUNKS,
+ ITS_MITIGATION_RETPOLINE_STUFF,
+};
+
+static enum its_mitigation its_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_ITS) ? ITS_MITIGATION_AUTO : ITS_MITIGATION_OFF;
+
enum retbleed_mitigation {
RETBLEED_MITIGATION_NONE,
+ RETBLEED_MITIGATION_AUTO,
RETBLEED_MITIGATION_UNRET,
RETBLEED_MITIGATION_IBPB,
RETBLEED_MITIGATION_IBRS,
@@ -982,14 +1144,6 @@ enum retbleed_mitigation {
RETBLEED_MITIGATION_STUFF,
};
-enum retbleed_mitigation_cmd {
- RETBLEED_CMD_OFF,
- RETBLEED_CMD_AUTO,
- RETBLEED_CMD_UNRET,
- RETBLEED_CMD_IBPB,
- RETBLEED_CMD_STUFF,
-};
-
static const char * const retbleed_strings[] = {
[RETBLEED_MITIGATION_NONE] = "Vulnerable",
[RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk",
@@ -1000,9 +1154,7 @@ static const char * const retbleed_strings[] = {
};
static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
- RETBLEED_MITIGATION_NONE;
-static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
- IS_ENABLED(CONFIG_MITIGATION_RETBLEED) ? RETBLEED_CMD_AUTO : RETBLEED_CMD_OFF;
+ IS_ENABLED(CONFIG_MITIGATION_RETBLEED) ? RETBLEED_MITIGATION_AUTO : RETBLEED_MITIGATION_NONE;
static int __ro_after_init retbleed_nosmt = false;
@@ -1019,15 +1171,15 @@ static int __init retbleed_parse_cmdline(char *str)
}
if (!strcmp(str, "off")) {
- retbleed_cmd = RETBLEED_CMD_OFF;
+ retbleed_mitigation = RETBLEED_MITIGATION_NONE;
} else if (!strcmp(str, "auto")) {
- retbleed_cmd = RETBLEED_CMD_AUTO;
+ retbleed_mitigation = RETBLEED_MITIGATION_AUTO;
} else if (!strcmp(str, "unret")) {
- retbleed_cmd = RETBLEED_CMD_UNRET;
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
} else if (!strcmp(str, "ibpb")) {
- retbleed_cmd = RETBLEED_CMD_IBPB;
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
} else if (!strcmp(str, "stuff")) {
- retbleed_cmd = RETBLEED_CMD_STUFF;
+ retbleed_mitigation = RETBLEED_MITIGATION_STUFF;
} else if (!strcmp(str, "nosmt")) {
retbleed_nosmt = true;
} else if (!strcmp(str, "force")) {
@@ -1048,77 +1200,122 @@ early_param("retbleed", retbleed_parse_cmdline);
static void __init retbleed_select_mitigation(void)
{
- bool mitigate_smt = false;
-
- if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
- return;
-
- switch (retbleed_cmd) {
- case RETBLEED_CMD_OFF:
+ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) {
+ retbleed_mitigation = RETBLEED_MITIGATION_NONE;
return;
+ }
- case RETBLEED_CMD_UNRET:
- if (IS_ENABLED(CONFIG_MITIGATION_UNRET_ENTRY)) {
- retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
- } else {
+ switch (retbleed_mitigation) {
+ case RETBLEED_MITIGATION_UNRET:
+ if (!IS_ENABLED(CONFIG_MITIGATION_UNRET_ENTRY)) {
+ retbleed_mitigation = RETBLEED_MITIGATION_AUTO;
pr_err("WARNING: kernel not compiled with MITIGATION_UNRET_ENTRY.\n");
- goto do_cmd_auto;
}
break;
-
- case RETBLEED_CMD_IBPB:
+ case RETBLEED_MITIGATION_IBPB:
if (!boot_cpu_has(X86_FEATURE_IBPB)) {
pr_err("WARNING: CPU does not support IBPB.\n");
- goto do_cmd_auto;
- } else if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
- retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
- } else {
+ retbleed_mitigation = RETBLEED_MITIGATION_AUTO;
+ } else if (!IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
- goto do_cmd_auto;
+ retbleed_mitigation = RETBLEED_MITIGATION_AUTO;
}
break;
+ case RETBLEED_MITIGATION_STUFF:
+ if (!IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING)) {
+ pr_err("WARNING: kernel not compiled with MITIGATION_CALL_DEPTH_TRACKING.\n");
+ retbleed_mitigation = RETBLEED_MITIGATION_AUTO;
+ } else if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+ pr_err("WARNING: retbleed=stuff only supported for Intel CPUs.\n");
+ retbleed_mitigation = RETBLEED_MITIGATION_AUTO;
+ }
+ break;
+ default:
+ break;
+ }
- case RETBLEED_CMD_STUFF:
- if (IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) &&
- spectre_v2_enabled == SPECTRE_V2_RETPOLINE) {
- retbleed_mitigation = RETBLEED_MITIGATION_STUFF;
+ if (retbleed_mitigation != RETBLEED_MITIGATION_AUTO)
+ return;
+
+ /* Intel mitigation selected in retbleed_update_mitigation() */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+ if (IS_ENABLED(CONFIG_MITIGATION_UNRET_ENTRY))
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+ else if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY) &&
+ boot_cpu_has(X86_FEATURE_IBPB))
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+ else
+ retbleed_mitigation = RETBLEED_MITIGATION_NONE;
+ }
+}
+
+static void __init retbleed_update_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
+ return;
+ if (retbleed_mitigation == RETBLEED_MITIGATION_NONE)
+ goto out;
+
+ /*
+ * retbleed=stuff is only allowed on Intel. If stuffing can't be used
+ * then a different mitigation will be selected below.
+ *
+ * its=stuff will also attempt to enable stuffing.
+ */
+ if (retbleed_mitigation == RETBLEED_MITIGATION_STUFF ||
+ its_mitigation == ITS_MITIGATION_RETPOLINE_STUFF) {
+ if (spectre_v2_enabled != SPECTRE_V2_RETPOLINE) {
+ pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n");
+ retbleed_mitigation = RETBLEED_MITIGATION_AUTO;
} else {
- if (IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING))
- pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n");
- else
- pr_err("WARNING: kernel not compiled with MITIGATION_CALL_DEPTH_TRACKING.\n");
+ if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
+ pr_info("Retbleed mitigation updated to stuffing\n");
- goto do_cmd_auto;
+ retbleed_mitigation = RETBLEED_MITIGATION_STUFF;
}
- break;
-
-do_cmd_auto:
- case RETBLEED_CMD_AUTO:
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
- if (IS_ENABLED(CONFIG_MITIGATION_UNRET_ENTRY))
- retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
- else if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY) &&
- boot_cpu_has(X86_FEATURE_IBPB))
- retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+ }
+ /*
+ * Let IBRS trump all on Intel without affecting the effects of the
+ * retbleed= cmdline option except for call depth based stuffing
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ switch (spectre_v2_enabled) {
+ case SPECTRE_V2_IBRS:
+ retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
+ break;
+ case SPECTRE_V2_EIBRS:
+ case SPECTRE_V2_EIBRS_RETPOLINE:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
+ break;
+ default:
+ if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
+ pr_err(RETBLEED_INTEL_MSG);
}
+ /* If nothing has set the mitigation yet, default to NONE. */
+ if (retbleed_mitigation == RETBLEED_MITIGATION_AUTO)
+ retbleed_mitigation = RETBLEED_MITIGATION_NONE;
+ }
+out:
+ pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
+}
- /*
- * The Intel mitigation (IBRS or eIBRS) was already selected in
- * spectre_v2_select_mitigation(). 'retbleed_mitigation' will
- * be set accordingly below.
- */
- break;
- }
+static void __init retbleed_apply_mitigation(void)
+{
+ bool mitigate_smt = false;
switch (retbleed_mitigation) {
+ case RETBLEED_MITIGATION_NONE:
+ return;
+
case RETBLEED_MITIGATION_UNRET:
setup_force_cpu_cap(X86_FEATURE_RETHUNK);
setup_force_cpu_cap(X86_FEATURE_UNRET);
- x86_return_thunk = retbleed_return_thunk;
+ set_return_thunk(retbleed_return_thunk);
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
@@ -1153,7 +1350,7 @@ do_cmd_auto:
setup_force_cpu_cap(X86_FEATURE_RETHUNK);
setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH);
- x86_return_thunk = call_depth_return_thunk;
+ set_return_thunk(call_depth_return_thunk);
break;
default:
@@ -1163,28 +1360,131 @@ do_cmd_auto:
if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) &&
(retbleed_nosmt || cpu_mitigations_auto_nosmt()))
cpu_smt_disable(false);
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt) "ITS: " fmt
+
+static const char * const its_strings[] = {
+ [ITS_MITIGATION_OFF] = "Vulnerable",
+ [ITS_MITIGATION_VMEXIT_ONLY] = "Mitigation: Vulnerable, KVM: Not affected",
+ [ITS_MITIGATION_ALIGNED_THUNKS] = "Mitigation: Aligned branch/return thunks",
+ [ITS_MITIGATION_RETPOLINE_STUFF] = "Mitigation: Retpolines, Stuffing RSB",
+};
+
+static int __init its_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!IS_ENABLED(CONFIG_MITIGATION_ITS)) {
+ pr_err("Mitigation disabled at compile time, ignoring option (%s)", str);
+ return 0;
+ }
+
+ if (!strcmp(str, "off")) {
+ its_mitigation = ITS_MITIGATION_OFF;
+ } else if (!strcmp(str, "on")) {
+ its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS;
+ } else if (!strcmp(str, "force")) {
+ its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS;
+ setup_force_cpu_bug(X86_BUG_ITS);
+ } else if (!strcmp(str, "vmexit")) {
+ its_mitigation = ITS_MITIGATION_VMEXIT_ONLY;
+ } else if (!strcmp(str, "stuff")) {
+ its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF;
+ } else {
+ pr_err("Ignoring unknown indirect_target_selection option (%s).", str);
+ }
+
+ return 0;
+}
+early_param("indirect_target_selection", its_parse_cmdline);
+
+static void __init its_select_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off()) {
+ its_mitigation = ITS_MITIGATION_OFF;
+ return;
+ }
+
+ if (its_mitigation == ITS_MITIGATION_AUTO)
+ its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS;
+
+ if (its_mitigation == ITS_MITIGATION_OFF)
+ return;
+
+ if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) ||
+ !IS_ENABLED(CONFIG_MITIGATION_RETHUNK)) {
+ pr_err("WARNING: ITS mitigation depends on retpoline and rethunk support\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ return;
+ }
+
+ if (IS_ENABLED(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)) {
+ pr_err("WARNING: ITS mitigation is not compatible with CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ return;
+ }
+
+ if (its_mitigation == ITS_MITIGATION_RETPOLINE_STUFF &&
+ !IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING)) {
+ pr_err("RSB stuff mitigation not supported, using default\n");
+ its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS;
+ }
+
+ if (its_mitigation == ITS_MITIGATION_VMEXIT_ONLY &&
+ !boot_cpu_has_bug(X86_BUG_ITS_NATIVE_ONLY))
+ its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS;
+}
+
+static void __init its_update_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off())
+ return;
+
+ switch (spectre_v2_enabled) {
+ case SPECTRE_V2_NONE:
+ pr_err("WARNING: Spectre-v2 mitigation is off, disabling ITS\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ break;
+ case SPECTRE_V2_RETPOLINE:
+ /* Retpoline+CDT mitigates ITS */
+ if (retbleed_mitigation == RETBLEED_MITIGATION_STUFF)
+ its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF;
+ break;
+ case SPECTRE_V2_LFENCE:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ pr_err("WARNING: ITS mitigation is not compatible with lfence mitigation\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ break;
+ default:
+ break;
+ }
/*
- * Let IBRS trump all on Intel without affecting the effects of the
- * retbleed= cmdline option except for call depth based stuffing
+ * retbleed_update_mitigation() will try to do stuffing if its=stuff.
+ * If it can't, such as if spectre_v2!=retpoline, then fall back to
+ * aligned thunks.
*/
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
- switch (spectre_v2_enabled) {
- case SPECTRE_V2_IBRS:
- retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
- break;
- case SPECTRE_V2_EIBRS:
- case SPECTRE_V2_EIBRS_RETPOLINE:
- case SPECTRE_V2_EIBRS_LFENCE:
- retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
- break;
- default:
- if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
- pr_err(RETBLEED_INTEL_MSG);
- }
- }
+ if (its_mitigation == ITS_MITIGATION_RETPOLINE_STUFF &&
+ retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
+ its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS;
- pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
+ pr_info("%s\n", its_strings[its_mitigation]);
+}
+
+static void __init its_apply_mitigation(void)
+{
+ /* its=stuff forces retbleed stuffing and is enabled there. */
+ if (its_mitigation != ITS_MITIGATION_ALIGNED_THUNKS)
+ return;
+
+ if (!boot_cpu_has(X86_FEATURE_RETPOLINE))
+ setup_force_cpu_cap(X86_FEATURE_INDIRECT_THUNK_ITS);
+
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ set_return_thunk(its_return_thunk);
}
#undef pr_fmt
@@ -1264,6 +1564,8 @@ enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_IBRS,
};
+static enum spectre_v2_mitigation_cmd spectre_v2_cmd __ro_after_init = SPECTRE_V2_CMD_AUTO;
+
enum spectre_v2_user_cmd {
SPECTRE_V2_USER_CMD_NONE,
SPECTRE_V2_USER_CMD_AUTO,
@@ -1302,31 +1604,18 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure)
pr_info("spectre_v2_user=%s forced on command line.\n", reason);
}
-static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
-
-static enum spectre_v2_user_cmd __init
-spectre_v2_parse_user_cmdline(void)
+static enum spectre_v2_user_cmd __init spectre_v2_parse_user_cmdline(void)
{
- enum spectre_v2_user_cmd mode;
char arg[20];
int ret, i;
- mode = IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2) ?
- SPECTRE_V2_USER_CMD_AUTO : SPECTRE_V2_USER_CMD_NONE;
-
- switch (spectre_v2_cmd) {
- case SPECTRE_V2_CMD_NONE:
+ if (cpu_mitigations_off() || !IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2))
return SPECTRE_V2_USER_CMD_NONE;
- case SPECTRE_V2_CMD_FORCE:
- return SPECTRE_V2_USER_CMD_FORCE;
- default:
- break;
- }
ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
arg, sizeof(arg));
if (ret < 0)
- return mode;
+ return SPECTRE_V2_USER_CMD_AUTO;
for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
if (match_option(arg, ret, v2_user_options[i].option)) {
@@ -1337,7 +1626,7 @@ spectre_v2_parse_user_cmdline(void)
}
pr_err("Unknown user space protection option (%s). Switching to default\n", arg);
- return mode;
+ return SPECTRE_V2_USER_CMD_AUTO;
}
static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
@@ -1345,60 +1634,72 @@ static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS;
}
-static void __init
-spectre_v2_user_select_mitigation(void)
+static void __init spectre_v2_user_select_mitigation(void)
{
- enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
- enum spectre_v2_user_cmd cmd;
-
if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
return;
- cmd = spectre_v2_parse_user_cmdline();
- switch (cmd) {
+ switch (spectre_v2_parse_user_cmdline()) {
case SPECTRE_V2_USER_CMD_NONE:
- goto set_mode;
+ return;
case SPECTRE_V2_USER_CMD_FORCE:
- mode = SPECTRE_V2_USER_STRICT;
+ spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT;
+ spectre_v2_user_stibp = SPECTRE_V2_USER_STRICT;
break;
case SPECTRE_V2_USER_CMD_AUTO:
case SPECTRE_V2_USER_CMD_PRCTL:
+ spectre_v2_user_ibpb = SPECTRE_V2_USER_PRCTL;
+ spectre_v2_user_stibp = SPECTRE_V2_USER_PRCTL;
+ break;
case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
- mode = SPECTRE_V2_USER_PRCTL;
+ spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT;
+ spectre_v2_user_stibp = SPECTRE_V2_USER_PRCTL;
break;
case SPECTRE_V2_USER_CMD_SECCOMP:
+ if (IS_ENABLED(CONFIG_SECCOMP))
+ spectre_v2_user_ibpb = SPECTRE_V2_USER_SECCOMP;
+ else
+ spectre_v2_user_ibpb = SPECTRE_V2_USER_PRCTL;
+ spectre_v2_user_stibp = spectre_v2_user_ibpb;
+ break;
case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
+ spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT;
if (IS_ENABLED(CONFIG_SECCOMP))
- mode = SPECTRE_V2_USER_SECCOMP;
+ spectre_v2_user_stibp = SPECTRE_V2_USER_SECCOMP;
else
- mode = SPECTRE_V2_USER_PRCTL;
+ spectre_v2_user_stibp = SPECTRE_V2_USER_PRCTL;
break;
}
- /* Initialize Indirect Branch Prediction Barrier */
- if (boot_cpu_has(X86_FEATURE_IBPB)) {
- static_branch_enable(&switch_vcpu_ibpb);
+ /*
+ * At this point, an STIBP mode other than "off" has been set.
+ * If STIBP support is not being forced, check if STIBP always-on
+ * is preferred.
+ */
+ if ((spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) &&
+ boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+ spectre_v2_user_stibp = SPECTRE_V2_USER_STRICT_PREFERRED;
- spectre_v2_user_ibpb = mode;
- switch (cmd) {
- case SPECTRE_V2_USER_CMD_NONE:
- break;
- case SPECTRE_V2_USER_CMD_FORCE:
- case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
- case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
- static_branch_enable(&switch_mm_always_ibpb);
- spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT;
- break;
- case SPECTRE_V2_USER_CMD_PRCTL:
- case SPECTRE_V2_USER_CMD_AUTO:
- case SPECTRE_V2_USER_CMD_SECCOMP:
- static_branch_enable(&switch_mm_cond_ibpb);
- break;
- }
+ if (!boot_cpu_has(X86_FEATURE_IBPB))
+ spectre_v2_user_ibpb = SPECTRE_V2_USER_NONE;
- pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
- static_key_enabled(&switch_mm_always_ibpb) ?
- "always-on" : "conditional");
+ if (!boot_cpu_has(X86_FEATURE_STIBP))
+ spectre_v2_user_stibp = SPECTRE_V2_USER_NONE;
+}
+
+static void __init spectre_v2_user_update_mitigation(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
+ return;
+
+ /* The spectre_v2 cmd line can override spectre_v2_user options */
+ if (spectre_v2_cmd == SPECTRE_V2_CMD_NONE) {
+ spectre_v2_user_ibpb = SPECTRE_V2_USER_NONE;
+ spectre_v2_user_stibp = SPECTRE_V2_USER_NONE;
+ } else if (spectre_v2_cmd == SPECTRE_V2_CMD_FORCE) {
+ spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT;
+ spectre_v2_user_stibp = SPECTRE_V2_USER_STRICT;
}
/*
@@ -1416,30 +1717,44 @@ spectre_v2_user_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!cpu_smt_possible() ||
(spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
- !boot_cpu_has(X86_FEATURE_AUTOIBRS)))
+ !boot_cpu_has(X86_FEATURE_AUTOIBRS))) {
+ spectre_v2_user_stibp = SPECTRE_V2_USER_NONE;
return;
+ }
- /*
- * At this point, an STIBP mode other than "off" has been set.
- * If STIBP support is not being forced, check if STIBP always-on
- * is preferred.
- */
- if (mode != SPECTRE_V2_USER_STRICT &&
- boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
- mode = SPECTRE_V2_USER_STRICT_PREFERRED;
-
- if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET ||
- retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
- if (mode != SPECTRE_V2_USER_STRICT &&
- mode != SPECTRE_V2_USER_STRICT_PREFERRED)
+ if (spectre_v2_user_stibp != SPECTRE_V2_USER_NONE &&
+ (retbleed_mitigation == RETBLEED_MITIGATION_UNRET ||
+ retbleed_mitigation == RETBLEED_MITIGATION_IBPB)) {
+ if (spectre_v2_user_stibp != SPECTRE_V2_USER_STRICT &&
+ spectre_v2_user_stibp != SPECTRE_V2_USER_STRICT_PREFERRED)
pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n");
- mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+ spectre_v2_user_stibp = SPECTRE_V2_USER_STRICT_PREFERRED;
}
+ pr_info("%s\n", spectre_v2_user_strings[spectre_v2_user_stibp]);
+}
- spectre_v2_user_stibp = mode;
+static void __init spectre_v2_user_apply_mitigation(void)
+{
+ /* Initialize Indirect Branch Prediction Barrier */
+ if (spectre_v2_user_ibpb != SPECTRE_V2_USER_NONE) {
+ static_branch_enable(&switch_vcpu_ibpb);
+
+ switch (spectre_v2_user_ibpb) {
+ case SPECTRE_V2_USER_STRICT:
+ static_branch_enable(&switch_mm_always_ibpb);
+ break;
+ case SPECTRE_V2_USER_PRCTL:
+ case SPECTRE_V2_USER_SECCOMP:
+ static_branch_enable(&switch_mm_cond_ibpb);
+ break;
+ default:
+ break;
+ }
-set_mode:
- pr_info("%s\n", spectre_v2_user_strings[mode]);
+ pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
+ static_key_enabled(&switch_mm_always_ibpb) ?
+ "always-on" : "conditional");
+ }
}
static const char * const spectre_v2_strings[] = {
@@ -1659,12 +1974,13 @@ static bool __init spec_ctrl_bhi_dis(void)
enum bhi_mitigations {
BHI_MITIGATION_OFF,
+ BHI_MITIGATION_AUTO,
BHI_MITIGATION_ON,
BHI_MITIGATION_VMEXIT_ONLY,
};
static enum bhi_mitigations bhi_mitigation __ro_after_init =
- IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF;
+ IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_AUTO : BHI_MITIGATION_OFF;
static int __init spectre_bhi_parse_cmdline(char *str)
{
@@ -1686,6 +2002,25 @@ early_param("spectre_bhi", spectre_bhi_parse_cmdline);
static void __init bhi_select_mitigation(void)
{
+ if (!boot_cpu_has(X86_BUG_BHI) || cpu_mitigations_off())
+ bhi_mitigation = BHI_MITIGATION_OFF;
+
+ if (bhi_mitigation == BHI_MITIGATION_AUTO)
+ bhi_mitigation = BHI_MITIGATION_ON;
+}
+
+static void __init bhi_update_mitigation(void)
+{
+ if (spectre_v2_cmd == SPECTRE_V2_CMD_NONE)
+ bhi_mitigation = BHI_MITIGATION_OFF;
+
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+ spectre_v2_cmd == SPECTRE_V2_CMD_AUTO)
+ bhi_mitigation = BHI_MITIGATION_OFF;
+}
+
+static void __init bhi_apply_mitigation(void)
+{
if (bhi_mitigation == BHI_MITIGATION_OFF)
return;
@@ -1697,95 +2032,101 @@ static void __init bhi_select_mitigation(void)
return;
}
- /* Mitigate in hardware if supported */
- if (spec_ctrl_bhi_dis())
+ if (!IS_ENABLED(CONFIG_X86_64))
return;
- if (!IS_ENABLED(CONFIG_X86_64))
+ /* Mitigate in hardware if supported */
+ if (spec_ctrl_bhi_dis())
return;
if (bhi_mitigation == BHI_MITIGATION_VMEXIT_ONLY) {
pr_info("Spectre BHI mitigation: SW BHB clearing on VM exit only\n");
- setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_VMEXIT);
return;
}
pr_info("Spectre BHI mitigation: SW BHB clearing on syscall and VM exit\n");
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
- setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_VMEXIT);
}
static void __init spectre_v2_select_mitigation(void)
{
- enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
- enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+ spectre_v2_cmd = spectre_v2_parse_cmdline();
- /*
- * If the CPU is not affected and the command line mode is NONE or AUTO
- * then nothing to do.
- */
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
- (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+ (spectre_v2_cmd == SPECTRE_V2_CMD_NONE || spectre_v2_cmd == SPECTRE_V2_CMD_AUTO))
return;
- switch (cmd) {
+ switch (spectre_v2_cmd) {
case SPECTRE_V2_CMD_NONE:
return;
case SPECTRE_V2_CMD_FORCE:
case SPECTRE_V2_CMD_AUTO:
if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
- mode = SPECTRE_V2_EIBRS;
- break;
- }
-
- if (IS_ENABLED(CONFIG_MITIGATION_IBRS_ENTRY) &&
- boot_cpu_has_bug(X86_BUG_RETBLEED) &&
- retbleed_cmd != RETBLEED_CMD_OFF &&
- retbleed_cmd != RETBLEED_CMD_STUFF &&
- boot_cpu_has(X86_FEATURE_IBRS) &&
- boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
- mode = SPECTRE_V2_IBRS;
+ spectre_v2_enabled = SPECTRE_V2_EIBRS;
break;
}
- mode = spectre_v2_select_retpoline();
+ spectre_v2_enabled = spectre_v2_select_retpoline();
break;
case SPECTRE_V2_CMD_RETPOLINE_LFENCE:
pr_err(SPECTRE_V2_LFENCE_MSG);
- mode = SPECTRE_V2_LFENCE;
+ spectre_v2_enabled = SPECTRE_V2_LFENCE;
break;
case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
- mode = SPECTRE_V2_RETPOLINE;
+ spectre_v2_enabled = SPECTRE_V2_RETPOLINE;
break;
case SPECTRE_V2_CMD_RETPOLINE:
- mode = spectre_v2_select_retpoline();
+ spectre_v2_enabled = spectre_v2_select_retpoline();
break;
case SPECTRE_V2_CMD_IBRS:
- mode = SPECTRE_V2_IBRS;
+ spectre_v2_enabled = SPECTRE_V2_IBRS;
break;
case SPECTRE_V2_CMD_EIBRS:
- mode = SPECTRE_V2_EIBRS;
+ spectre_v2_enabled = SPECTRE_V2_EIBRS;
break;
case SPECTRE_V2_CMD_EIBRS_LFENCE:
- mode = SPECTRE_V2_EIBRS_LFENCE;
+ spectre_v2_enabled = SPECTRE_V2_EIBRS_LFENCE;
break;
case SPECTRE_V2_CMD_EIBRS_RETPOLINE:
- mode = SPECTRE_V2_EIBRS_RETPOLINE;
+ spectre_v2_enabled = SPECTRE_V2_EIBRS_RETPOLINE;
break;
}
+}
- if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+static void __init spectre_v2_update_mitigation(void)
+{
+ if (spectre_v2_cmd == SPECTRE_V2_CMD_AUTO &&
+ !spectre_v2_in_eibrs_mode(spectre_v2_enabled)) {
+ if (IS_ENABLED(CONFIG_MITIGATION_IBRS_ENTRY) &&
+ boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+ retbleed_mitigation != RETBLEED_MITIGATION_NONE &&
+ retbleed_mitigation != RETBLEED_MITIGATION_STUFF &&
+ boot_cpu_has(X86_FEATURE_IBRS) &&
+ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ spectre_v2_enabled = SPECTRE_V2_IBRS;
+ }
+ }
+
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && !cpu_mitigations_off())
+ pr_info("%s\n", spectre_v2_strings[spectre_v2_enabled]);
+}
+
+static void __init spectre_v2_apply_mitigation(void)
+{
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
- if (spectre_v2_in_ibrs_mode(mode)) {
+ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) {
if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) {
msr_set_bit(MSR_EFER, _EFER_AUTOIBRS);
} else {
@@ -1794,8 +2135,10 @@ static void __init spectre_v2_select_mitigation(void)
}
}
- switch (mode) {
+ switch (spectre_v2_enabled) {
case SPECTRE_V2_NONE:
+ return;
+
case SPECTRE_V2_EIBRS:
break;
@@ -1821,18 +2164,12 @@ static void __init spectre_v2_select_mitigation(void)
* JMPs gets protection against BHI and Intramode-BTI, but RET
* prediction from a non-RSB predictor is still a risk.
*/
- if (mode == SPECTRE_V2_EIBRS_LFENCE ||
- mode == SPECTRE_V2_EIBRS_RETPOLINE ||
- mode == SPECTRE_V2_RETPOLINE)
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE ||
+ spectre_v2_enabled == SPECTRE_V2_EIBRS_RETPOLINE ||
+ spectre_v2_enabled == SPECTRE_V2_RETPOLINE)
spec_ctrl_disable_kernel_rrsba();
- if (boot_cpu_has(X86_BUG_BHI))
- bhi_select_mitigation();
-
- spectre_v2_enabled = mode;
- pr_info("%s\n", spectre_v2_strings[mode]);
-
- spectre_v2_select_rsb_mitigation(mode);
+ spectre_v2_select_rsb_mitigation(spectre_v2_enabled);
/*
* Retpoline protects the kernel, but doesn't protect firmware. IBRS
@@ -1840,28 +2177,26 @@ static void __init spectre_v2_select_mitigation(void)
* firmware calls only when IBRS / Enhanced / Automatic IBRS aren't
* otherwise enabled.
*
- * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
- * the user might select retpoline on the kernel command line and if
- * the CPU supports Enhanced IBRS, kernel might un-intentionally not
- * enable IBRS around firmware calls.
+ * Use "spectre_v2_enabled" to check Enhanced IBRS instead of
+ * boot_cpu_has(), because the user might select retpoline on the kernel
+ * command line and if the CPU supports Enhanced IBRS, kernel might
+ * un-intentionally not enable IBRS around firmware calls.
*/
if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
boot_cpu_has(X86_FEATURE_IBPB) &&
(boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) {
- if (retbleed_cmd != RETBLEED_CMD_IBPB) {
+ if (retbleed_mitigation != RETBLEED_MITIGATION_IBPB) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB_FW);
pr_info("Enabling Speculation Barrier for firmware calls\n");
}
- } else if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
+ } else if (boot_cpu_has(X86_FEATURE_IBRS) &&
+ !spectre_v2_in_ibrs_mode(spectre_v2_enabled)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
-
- /* Set up IBPB and STIBP depending on the general spectre V2 command */
- spectre_v2_cmd = cmd;
}
static void update_stibp_msr(void * __unused)
@@ -2050,19 +2385,18 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
return cmd;
}
-static enum ssb_mitigation __init __ssb_select_mitigation(void)
+static void __init ssb_select_mitigation(void)
{
- enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
enum ssb_mitigation_cmd cmd;
if (!boot_cpu_has(X86_FEATURE_SSBD))
- return mode;
+ goto out;
cmd = ssb_parse_cmdline();
if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
(cmd == SPEC_STORE_BYPASS_CMD_NONE ||
cmd == SPEC_STORE_BYPASS_CMD_AUTO))
- return mode;
+ return;
switch (cmd) {
case SPEC_STORE_BYPASS_CMD_SECCOMP:
@@ -2071,28 +2405,35 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
* enabled.
*/
if (IS_ENABLED(CONFIG_SECCOMP))
- mode = SPEC_STORE_BYPASS_SECCOMP;
+ ssb_mode = SPEC_STORE_BYPASS_SECCOMP;
else
- mode = SPEC_STORE_BYPASS_PRCTL;
+ ssb_mode = SPEC_STORE_BYPASS_PRCTL;
break;
case SPEC_STORE_BYPASS_CMD_ON:
- mode = SPEC_STORE_BYPASS_DISABLE;
+ ssb_mode = SPEC_STORE_BYPASS_DISABLE;
break;
case SPEC_STORE_BYPASS_CMD_AUTO:
case SPEC_STORE_BYPASS_CMD_PRCTL:
- mode = SPEC_STORE_BYPASS_PRCTL;
+ ssb_mode = SPEC_STORE_BYPASS_PRCTL;
break;
case SPEC_STORE_BYPASS_CMD_NONE:
break;
}
+out:
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ pr_info("%s\n", ssb_strings[ssb_mode]);
+}
+
+static void __init ssb_apply_mitigation(void)
+{
/*
* We have three CPU feature flags that are in play here:
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
* - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
* - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
*/
- if (mode == SPEC_STORE_BYPASS_DISABLE) {
+ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) {
setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
/*
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
@@ -2106,16 +2447,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
update_spec_ctrl(x86_spec_ctrl_base);
}
}
-
- return mode;
-}
-
-static void ssb_select_mitigation(void)
-{
- ssb_mode = __ssb_select_mitigation();
-
- if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
- pr_info("%s\n", ssb_strings[ssb_mode]);
}
#undef pr_fmt
@@ -2371,7 +2702,7 @@ EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
/* Default mitigation for L1TF-affected CPUs */
enum l1tf_mitigations l1tf_mitigation __ro_after_init =
- IS_ENABLED(CONFIG_MITIGATION_L1TF) ? L1TF_MITIGATION_FLUSH : L1TF_MITIGATION_OFF;
+ IS_ENABLED(CONFIG_MITIGATION_L1TF) ? L1TF_MITIGATION_AUTO : L1TF_MITIGATION_OFF;
#if IS_ENABLED(CONFIG_KVM_INTEL)
EXPORT_SYMBOL_GPL(l1tf_mitigation);
#endif
@@ -2419,22 +2750,33 @@ static void override_cache_bits(struct cpuinfo_x86 *c)
static void __init l1tf_select_mitigation(void)
{
+ if (!boot_cpu_has_bug(X86_BUG_L1TF) || cpu_mitigations_off()) {
+ l1tf_mitigation = L1TF_MITIGATION_OFF;
+ return;
+ }
+
+ if (l1tf_mitigation == L1TF_MITIGATION_AUTO) {
+ if (cpu_mitigations_auto_nosmt())
+ l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
+ else
+ l1tf_mitigation = L1TF_MITIGATION_FLUSH;
+ }
+}
+
+static void __init l1tf_apply_mitigation(void)
+{
u64 half_pa;
if (!boot_cpu_has_bug(X86_BUG_L1TF))
return;
- if (cpu_mitigations_off())
- l1tf_mitigation = L1TF_MITIGATION_OFF;
- else if (cpu_mitigations_auto_nosmt())
- l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
-
override_cache_bits(&boot_cpu_data);
switch (l1tf_mitigation) {
case L1TF_MITIGATION_OFF:
case L1TF_MITIGATION_FLUSH_NOWARN:
case L1TF_MITIGATION_FLUSH:
+ case L1TF_MITIGATION_AUTO:
break;
case L1TF_MITIGATION_FLUSH_NOSMT:
case L1TF_MITIGATION_FULL:
@@ -2494,6 +2836,7 @@ early_param("l1tf", l1tf_cmdline);
enum srso_mitigation {
SRSO_MITIGATION_NONE,
+ SRSO_MITIGATION_AUTO,
SRSO_MITIGATION_UCODE_NEEDED,
SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED,
SRSO_MITIGATION_MICROCODE,
@@ -2503,14 +2846,6 @@ enum srso_mitigation {
SRSO_MITIGATION_BP_SPEC_REDUCE,
};
-enum srso_mitigation_cmd {
- SRSO_CMD_OFF,
- SRSO_CMD_MICROCODE,
- SRSO_CMD_SAFE_RET,
- SRSO_CMD_IBPB,
- SRSO_CMD_IBPB_ON_VMEXIT,
-};
-
static const char * const srso_strings[] = {
[SRSO_MITIGATION_NONE] = "Vulnerable",
[SRSO_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
@@ -2522,8 +2857,7 @@ static const char * const srso_strings[] = {
[SRSO_MITIGATION_BP_SPEC_REDUCE] = "Mitigation: Reduced Speculation"
};
-static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE;
-static enum srso_mitigation_cmd srso_cmd __ro_after_init = SRSO_CMD_SAFE_RET;
+static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_AUTO;
static int __init srso_parse_cmdline(char *str)
{
@@ -2531,15 +2865,15 @@ static int __init srso_parse_cmdline(char *str)
return -EINVAL;
if (!strcmp(str, "off"))
- srso_cmd = SRSO_CMD_OFF;
+ srso_mitigation = SRSO_MITIGATION_NONE;
else if (!strcmp(str, "microcode"))
- srso_cmd = SRSO_CMD_MICROCODE;
+ srso_mitigation = SRSO_MITIGATION_MICROCODE;
else if (!strcmp(str, "safe-ret"))
- srso_cmd = SRSO_CMD_SAFE_RET;
+ srso_mitigation = SRSO_MITIGATION_SAFE_RET;
else if (!strcmp(str, "ibpb"))
- srso_cmd = SRSO_CMD_IBPB;
+ srso_mitigation = SRSO_MITIGATION_IBPB;
else if (!strcmp(str, "ibpb-vmexit"))
- srso_cmd = SRSO_CMD_IBPB_ON_VMEXIT;
+ srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
else
pr_err("Ignoring unknown SRSO option (%s).", str);
@@ -2551,132 +2885,85 @@ early_param("spec_rstack_overflow", srso_parse_cmdline);
static void __init srso_select_mitigation(void)
{
- bool has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE);
+ bool has_microcode;
- if (!boot_cpu_has_bug(X86_BUG_SRSO) ||
- cpu_mitigations_off() ||
- srso_cmd == SRSO_CMD_OFF) {
- if (boot_cpu_has(X86_FEATURE_SBPB))
- x86_pred_cmd = PRED_CMD_SBPB;
- goto out;
- }
+ if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off())
+ srso_mitigation = SRSO_MITIGATION_NONE;
+
+ if (srso_mitigation == SRSO_MITIGATION_NONE)
+ return;
+
+ if (srso_mitigation == SRSO_MITIGATION_AUTO)
+ srso_mitigation = SRSO_MITIGATION_SAFE_RET;
+ has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE);
if (has_microcode) {
/*
* Zen1/2 with SMT off aren't vulnerable after the right
* IBPB microcode has been applied.
- *
- * Zen1/2 don't have SBPB, no need to try to enable it here.
*/
if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) {
setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
- goto out;
- }
-
- if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
- srso_mitigation = SRSO_MITIGATION_IBPB;
- goto out;
+ srso_mitigation = SRSO_MITIGATION_NONE;
+ return;
}
} else {
pr_warn("IBPB-extending microcode not applied!\n");
pr_warn(SRSO_NOTICE);
-
- /* may be overwritten by SRSO_CMD_SAFE_RET below */
- srso_mitigation = SRSO_MITIGATION_UCODE_NEEDED;
}
- switch (srso_cmd) {
- case SRSO_CMD_MICROCODE:
- if (has_microcode) {
- srso_mitigation = SRSO_MITIGATION_MICROCODE;
- pr_warn(SRSO_NOTICE);
- }
- break;
-
- case SRSO_CMD_SAFE_RET:
- if (boot_cpu_has(X86_FEATURE_SRSO_USER_KERNEL_NO))
+ switch (srso_mitigation) {
+ case SRSO_MITIGATION_SAFE_RET:
+ if (boot_cpu_has(X86_FEATURE_SRSO_USER_KERNEL_NO)) {
+ srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
goto ibpb_on_vmexit;
+ }
- if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) {
- /*
- * Enable the return thunk for generated code
- * like ftrace, static_call, etc.
- */
- setup_force_cpu_cap(X86_FEATURE_RETHUNK);
- setup_force_cpu_cap(X86_FEATURE_UNRET);
-
- if (boot_cpu_data.x86 == 0x19) {
- setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
- x86_return_thunk = srso_alias_return_thunk;
- } else {
- setup_force_cpu_cap(X86_FEATURE_SRSO);
- x86_return_thunk = srso_return_thunk;
- }
- if (has_microcode)
- srso_mitigation = SRSO_MITIGATION_SAFE_RET;
- else
- srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED;
- } else {
+ if (!IS_ENABLED(CONFIG_MITIGATION_SRSO)) {
pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
+ srso_mitigation = SRSO_MITIGATION_NONE;
}
- break;
- case SRSO_CMD_IBPB:
- if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
- if (has_microcode) {
- setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
- setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
- srso_mitigation = SRSO_MITIGATION_IBPB;
-
- /*
- * IBPB on entry already obviates the need for
- * software-based untraining so clear those in case some
- * other mitigation like Retbleed has selected them.
- */
- setup_clear_cpu_cap(X86_FEATURE_UNRET);
- setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
-
- /*
- * There is no need for RSB filling: write_ibpb() ensures
- * all predictions, including the RSB, are invalidated,
- * regardless of IBPB implementation.
- */
- setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
- }
- } else {
- pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
- }
+ if (!has_microcode)
+ srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED;
break;
-
ibpb_on_vmexit:
- case SRSO_CMD_IBPB_ON_VMEXIT:
+ case SRSO_MITIGATION_IBPB_ON_VMEXIT:
if (boot_cpu_has(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) {
pr_notice("Reducing speculation to address VM/HV SRSO attack vector.\n");
srso_mitigation = SRSO_MITIGATION_BP_SPEC_REDUCE;
break;
}
-
- if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
- if (has_microcode) {
- setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
- srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
-
- /*
- * There is no need for RSB filling: write_ibpb() ensures
- * all predictions, including the RSB, are invalidated,
- * regardless of IBPB implementation.
- */
- setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
- }
- } else {
+ fallthrough;
+ case SRSO_MITIGATION_IBPB:
+ if (!IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
+ srso_mitigation = SRSO_MITIGATION_NONE;
}
+
+ if (!has_microcode)
+ srso_mitigation = SRSO_MITIGATION_UCODE_NEEDED;
break;
default:
break;
}
+}
-out:
+static void __init srso_update_mitigation(void)
+{
+ /* If retbleed is using IBPB, that works for SRSO as well */
+ if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB &&
+ boot_cpu_has(X86_FEATURE_IBPB_BRTYPE))
+ srso_mitigation = SRSO_MITIGATION_IBPB;
+
+ if (boot_cpu_has_bug(X86_BUG_SRSO) &&
+ !cpu_mitigations_off() &&
+ !boot_cpu_has(X86_FEATURE_SRSO_NO))
+ pr_info("%s\n", srso_strings[srso_mitigation]);
+}
+
+static void __init srso_apply_mitigation(void)
+{
/*
* Clear the feature flag if this mitigation is not selected as that
* feature flag controls the BpSpecReduce MSR bit toggling in KVM.
@@ -2684,8 +2971,52 @@ out:
if (srso_mitigation != SRSO_MITIGATION_BP_SPEC_REDUCE)
setup_clear_cpu_cap(X86_FEATURE_SRSO_BP_SPEC_REDUCE);
- if (srso_mitigation != SRSO_MITIGATION_NONE)
- pr_info("%s\n", srso_strings[srso_mitigation]);
+ if (srso_mitigation == SRSO_MITIGATION_NONE) {
+ if (boot_cpu_has(X86_FEATURE_SBPB))
+ x86_pred_cmd = PRED_CMD_SBPB;
+ return;
+ }
+
+ switch (srso_mitigation) {
+ case SRSO_MITIGATION_SAFE_RET:
+ case SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED:
+ /*
+ * Enable the return thunk for generated code
+ * like ftrace, static_call, etc.
+ */
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+ if (boot_cpu_data.x86 == 0x19) {
+ setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
+ set_return_thunk(srso_alias_return_thunk);
+ } else {
+ setup_force_cpu_cap(X86_FEATURE_SRSO);
+ set_return_thunk(srso_return_thunk);
+ }
+ break;
+ case SRSO_MITIGATION_IBPB:
+ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ /*
+ * IBPB on entry already obviates the need for
+ * software-based untraining so clear those in case some
+ * other mitigation like Retbleed has selected them.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_UNRET);
+ setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+ fallthrough;
+ case SRSO_MITIGATION_IBPB_ON_VMEXIT:
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
+ break;
+ default:
+ break;
+ }
}
#undef pr_fmt
@@ -2780,9 +3111,6 @@ static ssize_t tsx_async_abort_show_state(char *buf)
static ssize_t mmio_stale_data_show_state(char *buf)
{
- if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
- return sysfs_emit(buf, "Unknown: No mitigations\n");
-
if (mmio_mitigation == MMIO_MITIGATION_OFF)
return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
@@ -2800,6 +3128,19 @@ static ssize_t rfds_show_state(char *buf)
return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]);
}
+static ssize_t old_microcode_show_state(char *buf)
+{
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return sysfs_emit(buf, "Unknown: running under hypervisor");
+
+ return sysfs_emit(buf, "Vulnerable\n");
+}
+
+static ssize_t its_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", its_strings[its_mitigation]);
+}
+
static char *stibp_state(void)
{
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
@@ -2858,7 +3199,7 @@ static const char *spectre_bhi_state(void)
!boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) &&
rrsba_disabled)
return "; BHI: Retpoline";
- else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_VMEXIT))
return "; BHI: Vulnerable, KVM: SW loop";
return "; BHI: Vulnerable";
@@ -2967,7 +3308,6 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
return srbds_show_state(buf);
case X86_BUG_MMIO_STALE_DATA:
- case X86_BUG_MMIO_UNKNOWN:
return mmio_stale_data_show_state(buf);
case X86_BUG_RETBLEED:
@@ -2982,6 +3322,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_RFDS:
return rfds_show_state(buf);
+ case X86_BUG_OLD_MICROCODE:
+ return old_microcode_show_state(buf);
+
+ case X86_BUG_ITS:
+ return its_show_state(buf);
+
default:
break;
}
@@ -3036,10 +3382,7 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *
ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
{
- if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
- return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN);
- else
- return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
}
ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
@@ -3061,6 +3404,16 @@ ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attrib
{
return cpu_show_common(dev, attr, buf, X86_BUG_RFDS);
}
+
+ssize_t cpu_show_old_microcode(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_OLD_MICROCODE);
+}
+
+ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_ITS);
+}
#endif
void __warn_thunk(void)
diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c
index 237faf7e700c..981f8b1f0792 100644
--- a/arch/x86/kernel/cpu/bus_lock.c
+++ b/arch/x86/kernel/cpu/bus_lock.c
@@ -10,6 +10,7 @@
#include <asm/cmdline.h>
#include <asm/traps.h>
#include <asm/cpu.h>
+#include <asm/msr.h>
enum split_lock_detect_state {
sld_off = 0,
@@ -95,15 +96,15 @@ static bool split_lock_verify_msr(bool on)
{
u64 ctrl, tmp;
- if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
+ if (rdmsrq_safe(MSR_TEST_CTRL, &ctrl))
return false;
if (on)
ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
else
ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
- if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
+ if (wrmsrq_safe(MSR_TEST_CTRL, ctrl))
return false;
- rdmsrl(MSR_TEST_CTRL, tmp);
+ rdmsrq(MSR_TEST_CTRL, tmp);
return ctrl == tmp;
}
@@ -137,7 +138,7 @@ static void __init __split_lock_setup(void)
return;
}
- rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
+ rdmsrq(MSR_TEST_CTRL, msr_test_ctrl_cache);
if (!split_lock_verify_msr(true)) {
pr_info("MSR access failed: Disabled\n");
@@ -145,7 +146,7 @@ static void __init __split_lock_setup(void)
}
/* Restore the MSR to its cached value. */
- wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
+ wrmsrq(MSR_TEST_CTRL, msr_test_ctrl_cache);
setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
}
@@ -162,7 +163,7 @@ static void sld_update_msr(bool on)
if (on)
test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
- wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
+ wrmsrq(MSR_TEST_CTRL, test_ctrl_val);
}
void split_lock_init(void)
@@ -297,7 +298,7 @@ void bus_lock_init(void)
if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
return;
- rdmsrl(MSR_IA32_DEBUGCTLMSR, val);
+ rdmsrq(MSR_IA32_DEBUGCTLMSR, val);
if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
(sld_state == sld_warn || sld_state == sld_fatal)) ||
@@ -311,7 +312,7 @@ void bus_lock_init(void)
val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
}
- wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
+ wrmsrq(MSR_IA32_DEBUGCTLMSR, val);
}
bool handle_user_split_lock(struct pt_regs *regs, long error_code)
@@ -375,7 +376,7 @@ static void __init split_lock_setup(struct cpuinfo_x86 *c)
* MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set
* it have split lock detection.
*/
- rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
+ rdmsrq(MSR_IA32_CORE_CAPS, ia32_core_caps);
if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
goto supported;
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index b3a520959b51..adfa7e8bb865 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -1,35 +1,28 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Routines to identify caches on Intel CPU.
+ * x86 CPU caches detection and configuration
*
- * Changes:
- * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
- * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
- * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
+ * Previous changes
+ * - Venkatesh Pallipadi: Cache identification through CPUID(0x4)
+ * - Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure
+ * - Andi Kleen / Andreas Herrmann: CPUID(0x4) emulation on AMD
*/
#include <linux/cacheinfo.h>
-#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/cpuhotplug.h>
-#include <linux/pci.h>
#include <linux/stop_machine.h>
-#include <linux/sysfs.h>
-#include <asm/amd_nb.h>
+#include <asm/amd/nb.h>
#include <asm/cacheinfo.h>
#include <asm/cpufeature.h>
+#include <asm/cpuid/api.h>
#include <asm/mtrr.h>
#include <asm/smp.h>
#include <asm/tlbflush.h>
#include "cpu.h"
-#define LVL_1_INST 1
-#define LVL_1_DATA 2
-#define LVL_2 3
-#define LVL_3 4
-
/* Shared last level cache maps */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
@@ -41,208 +34,127 @@ static cpumask_var_t cpu_cacheinfo_mask;
/* Kernel controls MTRR and/or PAT MSRs. */
unsigned int memory_caching_control __ro_after_init;
-struct _cache_table {
- unsigned char descriptor;
- char cache_type;
- short size;
-};
-
-#define MB(x) ((x) * 1024)
-
-/* All the cache descriptor types we care about (no TLB or
- trace cache entries) */
-
-static const struct _cache_table cache_table[] =
-{
- { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
- { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
- { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
- { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
- { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
- { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
- { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */
- { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
- { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
- { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
- { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
- { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
- { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
- { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
- { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
- { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
- { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
- { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
- { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
- { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
- { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
- { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */
- { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
- { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
- { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
- { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
- { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
- { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
- { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
- { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
- { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
- { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */
- { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
- { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
- { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
- { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
- { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
- { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
- { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
- { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
- { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
- { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
- { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
- { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
- { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
- { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
- { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
- { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
- { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
- { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
- { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
- { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
- { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
- { 0x00, 0, 0}
-};
-
-
enum _cache_type {
- CTYPE_NULL = 0,
- CTYPE_DATA = 1,
- CTYPE_INST = 2,
- CTYPE_UNIFIED = 3
+ CTYPE_NULL = 0,
+ CTYPE_DATA = 1,
+ CTYPE_INST = 2,
+ CTYPE_UNIFIED = 3
};
union _cpuid4_leaf_eax {
struct {
- enum _cache_type type:5;
- unsigned int level:3;
- unsigned int is_self_initializing:1;
- unsigned int is_fully_associative:1;
- unsigned int reserved:4;
- unsigned int num_threads_sharing:12;
- unsigned int num_cores_on_die:6;
+ enum _cache_type type :5;
+ unsigned int level :3;
+ unsigned int is_self_initializing :1;
+ unsigned int is_fully_associative :1;
+ unsigned int reserved :4;
+ unsigned int num_threads_sharing :12;
+ unsigned int num_cores_on_die :6;
} split;
u32 full;
};
union _cpuid4_leaf_ebx {
struct {
- unsigned int coherency_line_size:12;
- unsigned int physical_line_partition:10;
- unsigned int ways_of_associativity:10;
+ unsigned int coherency_line_size :12;
+ unsigned int physical_line_partition :10;
+ unsigned int ways_of_associativity :10;
} split;
u32 full;
};
union _cpuid4_leaf_ecx {
struct {
- unsigned int number_of_sets:32;
+ unsigned int number_of_sets :32;
} split;
u32 full;
};
-struct _cpuid4_info_regs {
+struct _cpuid4_info {
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned int id;
unsigned long size;
- struct amd_northbridge *nb;
};
-/* AMD doesn't have CPUID4. Emulate it here to report the same
- information to the user. This makes some assumptions about the machine:
- L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+/* Map CPUID(0x4) EAX.cache_type to <linux/cacheinfo.h> types */
+static const enum cache_type cache_type_map[] = {
+ [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
+ [CTYPE_DATA] = CACHE_TYPE_DATA,
+ [CTYPE_INST] = CACHE_TYPE_INST,
+ [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
+};
+
+/*
+ * Fallback AMD CPUID(0x4) emulation
+ * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d)
+ *
+ * @AMD_L2_L3_INVALID_ASSOC: cache info for the respective L2/L3 cache should
+ * be determined from CPUID(0x8000001d) instead of CPUID(0x80000006).
+ */
+
+#define AMD_CPUID4_FULLY_ASSOCIATIVE 0xffff
+#define AMD_L2_L3_INVALID_ASSOC 0x9
- In theory the TLBs could be reported as fake type (they are in "dummy").
- Maybe later */
union l1_cache {
struct {
- unsigned line_size:8;
- unsigned lines_per_tag:8;
- unsigned assoc:8;
- unsigned size_in_kb:8;
+ unsigned line_size :8;
+ unsigned lines_per_tag :8;
+ unsigned assoc :8;
+ unsigned size_in_kb :8;
};
- unsigned val;
+ unsigned int val;
};
union l2_cache {
struct {
- unsigned line_size:8;
- unsigned lines_per_tag:4;
- unsigned assoc:4;
- unsigned size_in_kb:16;
+ unsigned line_size :8;
+ unsigned lines_per_tag :4;
+ unsigned assoc :4;
+ unsigned size_in_kb :16;
};
- unsigned val;
+ unsigned int val;
};
union l3_cache {
struct {
- unsigned line_size:8;
- unsigned lines_per_tag:4;
- unsigned assoc:4;
- unsigned res:2;
- unsigned size_encoded:14;
+ unsigned line_size :8;
+ unsigned lines_per_tag :4;
+ unsigned assoc :4;
+ unsigned res :2;
+ unsigned size_encoded :14;
};
- unsigned val;
+ unsigned int val;
};
+/* L2/L3 associativity mapping */
static const unsigned short assocs[] = {
- [1] = 1,
- [2] = 2,
- [4] = 4,
- [6] = 8,
- [8] = 16,
- [0xa] = 32,
- [0xb] = 48,
- [0xc] = 64,
- [0xd] = 96,
- [0xe] = 128,
- [0xf] = 0xffff /* fully associative - no way to show this currently */
+ [1] = 1,
+ [2] = 2,
+ [3] = 3,
+ [4] = 4,
+ [5] = 6,
+ [6] = 8,
+ [8] = 16,
+ [0xa] = 32,
+ [0xb] = 48,
+ [0xc] = 64,
+ [0xd] = 96,
+ [0xe] = 128,
+ [0xf] = AMD_CPUID4_FULLY_ASSOCIATIVE
};
static const unsigned char levels[] = { 1, 1, 2, 3 };
-static const unsigned char types[] = { 1, 2, 3, 3 };
+static const unsigned char types[] = { 1, 2, 3, 3 };
-static const enum cache_type cache_type_map[] = {
- [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
- [CTYPE_DATA] = CACHE_TYPE_DATA,
- [CTYPE_INST] = CACHE_TYPE_INST,
- [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
-};
-
-static void
-amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
- union _cpuid4_leaf_ebx *ebx,
- union _cpuid4_leaf_ecx *ecx)
+static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax,
+ union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx)
{
- unsigned dummy;
- unsigned line_size, lines_per_tag, assoc, size_in_kb;
- union l1_cache l1i, l1d;
+ unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb;
+ union l1_cache l1i, l1d, *l1;
union l2_cache l2;
union l3_cache l3;
- union l1_cache *l1 = &l1d;
eax->full = 0;
ebx->full = 0;
@@ -251,430 +163,155 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
- switch (leaf) {
+ l1 = &l1d;
+ switch (index) {
case 1:
l1 = &l1i;
fallthrough;
case 0:
if (!l1->val)
return;
- assoc = assocs[l1->assoc];
- line_size = l1->line_size;
- lines_per_tag = l1->lines_per_tag;
- size_in_kb = l1->size_in_kb;
+
+ assoc = (l1->assoc == 0xff) ? AMD_CPUID4_FULLY_ASSOCIATIVE : l1->assoc;
+ line_size = l1->line_size;
+ lines_per_tag = l1->lines_per_tag;
+ size_in_kb = l1->size_in_kb;
break;
case 2:
- if (!l2.val)
+ if (!l2.assoc || l2.assoc == AMD_L2_L3_INVALID_ASSOC)
return;
- assoc = assocs[l2.assoc];
- line_size = l2.line_size;
- lines_per_tag = l2.lines_per_tag;
- /* cpu_data has errata corrections for K7 applied */
- size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
+
+ /* Use x86_cache_size as it might have K7 errata fixes */
+ assoc = assocs[l2.assoc];
+ line_size = l2.line_size;
+ lines_per_tag = l2.lines_per_tag;
+ size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
break;
case 3:
- if (!l3.val)
+ if (!l3.assoc || l3.assoc == AMD_L2_L3_INVALID_ASSOC)
return;
- assoc = assocs[l3.assoc];
- line_size = l3.line_size;
- lines_per_tag = l3.lines_per_tag;
- size_in_kb = l3.size_encoded * 512;
+
+ assoc = assocs[l3.assoc];
+ line_size = l3.line_size;
+ lines_per_tag = l3.lines_per_tag;
+ size_in_kb = l3.size_encoded * 512;
if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
- size_in_kb = size_in_kb >> 1;
- assoc = assoc >> 1;
+ size_in_kb = size_in_kb >> 1;
+ assoc = assoc >> 1;
}
break;
default:
return;
}
- eax->split.is_self_initializing = 1;
- eax->split.type = types[leaf];
- eax->split.level = levels[leaf];
- eax->split.num_threads_sharing = 0;
- eax->split.num_cores_on_die = topology_num_cores_per_package();
+ eax->split.is_self_initializing = 1;
+ eax->split.type = types[index];
+ eax->split.level = levels[index];
+ eax->split.num_threads_sharing = 0;
+ eax->split.num_cores_on_die = topology_num_cores_per_package();
-
- if (assoc == 0xffff)
+ if (assoc == AMD_CPUID4_FULLY_ASSOCIATIVE)
eax->split.is_fully_associative = 1;
- ebx->split.coherency_line_size = line_size - 1;
- ebx->split.ways_of_associativity = assoc - 1;
- ebx->split.physical_line_partition = lines_per_tag - 1;
- ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
- (ebx->split.ways_of_associativity + 1) - 1;
-}
-
-#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
-
-/*
- * L3 cache descriptors
- */
-static void amd_calc_l3_indices(struct amd_northbridge *nb)
-{
- struct amd_l3_cache *l3 = &nb->l3_cache;
- unsigned int sc0, sc1, sc2, sc3;
- u32 val = 0;
-
- pci_read_config_dword(nb->misc, 0x1C4, &val);
-
- /* calculate subcache sizes */
- l3->subcaches[0] = sc0 = !(val & BIT(0));
- l3->subcaches[1] = sc1 = !(val & BIT(4));
-
- if (boot_cpu_data.x86 == 0x15) {
- l3->subcaches[0] = sc0 += !(val & BIT(1));
- l3->subcaches[1] = sc1 += !(val & BIT(5));
- }
-
- l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
- l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
-
- l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
-}
-
-/*
- * check whether a slot used for disabling an L3 index is occupied.
- * @l3: L3 cache descriptor
- * @slot: slot number (0..1)
- *
- * @returns: the disabled index if used or negative value if slot free.
- */
-static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
-{
- unsigned int reg = 0;
-
- pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
-
- /* check whether this slot is activated already */
- if (reg & (3UL << 30))
- return reg & 0xfff;
-
- return -1;
-}
-
-static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
- unsigned int slot)
-{
- int index;
- struct amd_northbridge *nb = this_leaf->priv;
-
- index = amd_get_l3_disable_slot(nb, slot);
- if (index >= 0)
- return sprintf(buf, "%d\n", index);
-
- return sprintf(buf, "FREE\n");
-}
-
-#define SHOW_CACHE_DISABLE(slot) \
-static ssize_t \
-cache_disable_##slot##_show(struct device *dev, \
- struct device_attribute *attr, char *buf) \
-{ \
- struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
- return show_cache_disable(this_leaf, buf, slot); \
-}
-SHOW_CACHE_DISABLE(0)
-SHOW_CACHE_DISABLE(1)
-
-static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
- unsigned slot, unsigned long idx)
-{
- int i;
- idx |= BIT(30);
-
- /*
- * disable index in all 4 subcaches
- */
- for (i = 0; i < 4; i++) {
- u32 reg = idx | (i << 20);
-
- if (!nb->l3_cache.subcaches[i])
- continue;
-
- pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
-
- /*
- * We need to WBINVD on a core on the node containing the L3
- * cache which indices we disable therefore a simple wbinvd()
- * is not sufficient.
- */
- wbinvd_on_cpu(cpu);
-
- reg |= BIT(31);
- pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
- }
-}
-
-/*
- * disable a L3 cache index by using a disable-slot
- *
- * @l3: L3 cache descriptor
- * @cpu: A CPU on the node containing the L3 cache
- * @slot: slot number (0..1)
- * @index: index to disable
- *
- * @return: 0 on success, error status on failure
- */
-static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
- unsigned slot, unsigned long index)
-{
- int ret = 0;
-
- /* check if @slot is already used or the index is already disabled */
- ret = amd_get_l3_disable_slot(nb, slot);
- if (ret >= 0)
- return -EEXIST;
-
- if (index > nb->l3_cache.indices)
- return -EINVAL;
-
- /* check whether the other slot has disabled the same index already */
- if (index == amd_get_l3_disable_slot(nb, !slot))
- return -EEXIST;
-
- amd_l3_disable_index(nb, cpu, slot, index);
-
- return 0;
-}
-
-static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
- const char *buf, size_t count,
- unsigned int slot)
-{
- unsigned long val = 0;
- int cpu, err = 0;
- struct amd_northbridge *nb = this_leaf->priv;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- cpu = cpumask_first(&this_leaf->shared_cpu_map);
-
- if (kstrtoul(buf, 10, &val) < 0)
- return -EINVAL;
-
- err = amd_set_l3_disable_slot(nb, cpu, slot, val);
- if (err) {
- if (err == -EEXIST)
- pr_warn("L3 slot %d in use/index already disabled!\n",
- slot);
- return err;
- }
- return count;
-}
-
-#define STORE_CACHE_DISABLE(slot) \
-static ssize_t \
-cache_disable_##slot##_store(struct device *dev, \
- struct device_attribute *attr, \
- const char *buf, size_t count) \
-{ \
- struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
- return store_cache_disable(this_leaf, buf, count, slot); \
-}
-STORE_CACHE_DISABLE(0)
-STORE_CACHE_DISABLE(1)
-
-static ssize_t subcaches_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct cacheinfo *this_leaf = dev_get_drvdata(dev);
- int cpu = cpumask_first(&this_leaf->shared_cpu_map);
-
- return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
-}
-
-static ssize_t subcaches_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct cacheinfo *this_leaf = dev_get_drvdata(dev);
- int cpu = cpumask_first(&this_leaf->shared_cpu_map);
- unsigned long val;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (kstrtoul(buf, 16, &val) < 0)
- return -EINVAL;
-
- if (amd_set_subcaches(cpu, val))
- return -EINVAL;
-
- return count;
+ ebx->split.coherency_line_size = line_size - 1;
+ ebx->split.ways_of_associativity = assoc - 1;
+ ebx->split.physical_line_partition = lines_per_tag - 1;
+ ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
+ (ebx->split.ways_of_associativity + 1) - 1;
}
-static DEVICE_ATTR_RW(cache_disable_0);
-static DEVICE_ATTR_RW(cache_disable_1);
-static DEVICE_ATTR_RW(subcaches);
-
-static umode_t
-cache_private_attrs_is_visible(struct kobject *kobj,
- struct attribute *attr, int unused)
+static int cpuid4_info_fill_done(struct _cpuid4_info *id4, union _cpuid4_leaf_eax eax,
+ union _cpuid4_leaf_ebx ebx, union _cpuid4_leaf_ecx ecx)
{
- struct device *dev = kobj_to_dev(kobj);
- struct cacheinfo *this_leaf = dev_get_drvdata(dev);
- umode_t mode = attr->mode;
-
- if (!this_leaf->priv)
- return 0;
-
- if ((attr == &dev_attr_subcaches.attr) &&
- amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
- return mode;
+ if (eax.split.type == CTYPE_NULL)
+ return -EIO;
- if ((attr == &dev_attr_cache_disable_0.attr ||
- attr == &dev_attr_cache_disable_1.attr) &&
- amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
- return mode;
+ id4->eax = eax;
+ id4->ebx = ebx;
+ id4->ecx = ecx;
+ id4->size = (ecx.split.number_of_sets + 1) *
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}
-static struct attribute_group cache_private_group = {
- .is_visible = cache_private_attrs_is_visible,
-};
-
-static void init_amd_l3_attrs(void)
-{
- int n = 1;
- static struct attribute **amd_l3_attrs;
-
- if (amd_l3_attrs) /* already initialized */
- return;
-
- if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
- n += 2;
- if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
- n += 1;
-
- amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
- if (!amd_l3_attrs)
- return;
-
- n = 0;
- if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
- amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
- amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
- }
- if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
- amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
-
- cache_private_group.attrs = amd_l3_attrs;
-}
-
-const struct attribute_group *
-cache_get_priv_group(struct cacheinfo *this_leaf)
+static int amd_fill_cpuid4_info(int index, struct _cpuid4_info *id4)
{
- struct amd_northbridge *nb = this_leaf->priv;
-
- if (this_leaf->level < 3 || !nb)
- return NULL;
+ union _cpuid4_leaf_eax eax;
+ union _cpuid4_leaf_ebx ebx;
+ union _cpuid4_leaf_ecx ecx;
+ u32 ignored;
- if (nb && nb->l3_cache.indices)
- init_amd_l3_attrs();
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT) || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+ cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &ignored);
+ else
+ legacy_amd_cpuid4(index, &eax, &ebx, &ecx);
- return &cache_private_group;
+ return cpuid4_info_fill_done(id4, eax, ebx, ecx);
}
-static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
+static int intel_fill_cpuid4_info(int index, struct _cpuid4_info *id4)
{
- int node;
+ union _cpuid4_leaf_eax eax;
+ union _cpuid4_leaf_ebx ebx;
+ union _cpuid4_leaf_ecx ecx;
+ u32 ignored;
- /* only for L3, and not in virtualized environments */
- if (index < 3)
- return;
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &ignored);
- node = topology_amd_node_id(smp_processor_id());
- this_leaf->nb = node_to_amd_nb(node);
- if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
- amd_calc_l3_indices(this_leaf->nb);
+ return cpuid4_info_fill_done(id4, eax, ebx, ecx);
}
-#else
-#define amd_init_l3_cache(x, y)
-#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
-static int
-cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
+static int fill_cpuid4_info(int index, struct _cpuid4_info *id4)
{
- union _cpuid4_leaf_eax eax;
- union _cpuid4_leaf_ebx ebx;
- union _cpuid4_leaf_ecx ecx;
- unsigned edx;
-
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
- if (boot_cpu_has(X86_FEATURE_TOPOEXT))
- cpuid_count(0x8000001d, index, &eax.full,
- &ebx.full, &ecx.full, &edx);
- else
- amd_cpuid4(index, &eax, &ebx, &ecx);
- amd_init_l3_cache(this_leaf, index);
- } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
- cpuid_count(0x8000001d, index, &eax.full,
- &ebx.full, &ecx.full, &edx);
- amd_init_l3_cache(this_leaf, index);
- } else {
- cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
- }
+ u8 cpu_vendor = boot_cpu_data.x86_vendor;
- if (eax.split.type == CTYPE_NULL)
- return -EIO; /* better error ? */
-
- this_leaf->eax = eax;
- this_leaf->ebx = ebx;
- this_leaf->ecx = ecx;
- this_leaf->size = (ecx.split.number_of_sets + 1) *
- (ebx.split.coherency_line_size + 1) *
- (ebx.split.physical_line_partition + 1) *
- (ebx.split.ways_of_associativity + 1);
- return 0;
+ return (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) ?
+ amd_fill_cpuid4_info(index, id4) :
+ intel_fill_cpuid4_info(index, id4);
}
static int find_num_cache_leaves(struct cpuinfo_x86 *c)
{
- unsigned int eax, ebx, ecx, edx, op;
- union _cpuid4_leaf_eax cache_eax;
- int i = -1;
-
- if (c->x86_vendor == X86_VENDOR_AMD ||
- c->x86_vendor == X86_VENDOR_HYGON)
- op = 0x8000001d;
- else
- op = 4;
+ unsigned int eax, ebx, ecx, edx, op;
+ union _cpuid4_leaf_eax cache_eax;
+ int i = -1;
+ /* Do a CPUID(op) loop to calculate num_cache_leaves */
+ op = (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) ? 0x8000001d : 4;
do {
++i;
- /* Do cpuid(op) loop to find out num_cache_leaves */
cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
cache_eax.full = eax;
} while (cache_eax.split.type != CTYPE_NULL);
return i;
}
+/*
+ * AMD/Hygon CPUs may have multiple LLCs if L3 caches exist.
+ */
+
void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
{
- /*
- * We may have multiple LLCs if L3 caches exist, so check if we
- * have an L3 cache by looking at the L3 cache CPUID leaf.
- */
- if (!cpuid_edx(0x80000006))
+ if (!cpuid_amd_hygon_has_l3_cache())
return;
if (c->x86 < 0x17) {
- /* LLC is at the node level. */
+ /* Pre-Zen: LLC is at the node level */
c->topo.llc_id = die_id;
} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
/*
- * LLC is at the core complex level.
- * Core complex ID is ApicId[3] for these processors.
+ * Family 17h up to 1F models: LLC is at the core
+ * complex level. Core complex ID is ApicId[3].
*/
c->topo.llc_id = c->topo.apicid >> 3;
} else {
/*
- * LLC ID is calculated from the number of threads sharing the
- * cache.
- * */
+ * Newer families: LLC ID is calculated from the number
+ * of threads sharing the L3 cache.
+ */
u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
u32 llc_index = find_num_cache_leaves(c) - 1;
@@ -683,25 +320,21 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
if (num_sharing_cache) {
- int bits = get_count_order(num_sharing_cache);
+ int index_msb = get_count_order(num_sharing_cache);
- c->topo.llc_id = c->topo.apicid >> bits;
+ c->topo.llc_id = c->topo.apicid >> index_msb;
}
}
}
void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
{
- /*
- * We may have multiple LLCs if L3 caches exist, so check if we
- * have an L3 cache by looking at the L3 cache CPUID leaf.
- */
- if (!cpuid_edx(0x80000006))
+ if (!cpuid_amd_hygon_has_l3_cache())
return;
/*
- * LLC is at the core complex level.
- * Core complex ID is ApicId[3] for these processors.
+ * Hygons are similar to AMD Family 17h up to 1F models: LLC is
+ * at the core complex level. Core complex ID is ApicId[3].
*/
c->topo.llc_id = c->topo.apicid >> 3;
}
@@ -710,14 +343,10 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
- if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT))
ci->num_leaves = find_num_cache_leaves(c);
- } else if (c->extended_cpuid_level >= 0x80000006) {
- if (cpuid_edx(0x80000006) & 0xf000)
- ci->num_leaves = 4;
- else
- ci->num_leaves = 3;
- }
+ else if (c->extended_cpuid_level >= 0x80000006)
+ ci->num_leaves = (cpuid_edx(0x80000006) & 0xf000) ? 4 : 3;
}
void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
@@ -727,148 +356,131 @@ void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
ci->num_leaves = find_num_cache_leaves(c);
}
-void init_intel_cacheinfo(struct cpuinfo_x86 *c)
+static void intel_cacheinfo_done(struct cpuinfo_x86 *c, unsigned int l3,
+ unsigned int l2, unsigned int l1i, unsigned int l1d)
{
- /* Cache sizes */
- unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0;
- unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
- unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
- unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
- struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
+ /*
+ * If llc_id is still unset, then cpuid_level < 4, which implies
+ * that the only possibility left is SMT. Since CPUID(0x2) doesn't
+ * specify any shared caches and SMT shares all caches, we can
+ * unconditionally set LLC ID to the package ID so that all
+ * threads share it.
+ */
+ if (c->topo.llc_id == BAD_APICID)
+ c->topo.llc_id = c->topo.pkg_id;
- if (c->cpuid_level > 3) {
- /*
- * There should be at least one leaf. A non-zero value means
- * that the number of leaves has been initialized.
- */
- if (!ci->num_leaves)
- ci->num_leaves = find_num_cache_leaves(c);
+ c->x86_cache_size = l3 ? l3 : (l2 ? l2 : l1i + l1d);
- /*
- * Whenever possible use cpuid(4), deterministic cache
- * parameters cpuid leaf to find the cache details
- */
- for (i = 0; i < ci->num_leaves; i++) {
- struct _cpuid4_info_regs this_leaf = {};
- int retval;
+ if (!l2)
+ cpu_detect_cache_sizes(c);
+}
- retval = cpuid4_cache_lookup_regs(i, &this_leaf);
- if (retval < 0)
- continue;
+/*
+ * Legacy Intel CPUID(0x2) path if CPUID(0x4) is not available.
+ */
+static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c)
+{
+ unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0;
+ const struct leaf_0x2_table *desc;
+ union leaf_0x2_regs regs;
+ u8 *ptr;
- switch (this_leaf.eax.split.level) {
- case 1:
- if (this_leaf.eax.split.type == CTYPE_DATA)
- new_l1d = this_leaf.size/1024;
- else if (this_leaf.eax.split.type == CTYPE_INST)
- new_l1i = this_leaf.size/1024;
- break;
- case 2:
- new_l2 = this_leaf.size/1024;
- num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
- index_msb = get_count_order(num_threads_sharing);
- l2_id = c->topo.apicid & ~((1 << index_msb) - 1);
- break;
- case 3:
- new_l3 = this_leaf.size/1024;
- num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
- index_msb = get_count_order(num_threads_sharing);
- l3_id = c->topo.apicid & ~((1 << index_msb) - 1);
- break;
- default:
- break;
- }
- }
- }
+ if (c->cpuid_level < 2)
+ return;
- /* Don't use CPUID(2) if CPUID(4) is supported. */
- if (!ci->num_leaves && c->cpuid_level > 1) {
- /* supports eax=2 call */
- int j, n;
- unsigned int regs[4];
- unsigned char *dp = (unsigned char *)regs;
-
- /* Number of times to iterate */
- n = cpuid_eax(2) & 0xFF;
-
- for (i = 0 ; i < n ; i++) {
- cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
-
- /* If bit 31 is set, this is an unknown format */
- for (j = 0 ; j < 4 ; j++)
- if (regs[j] & (1 << 31))
- regs[j] = 0;
-
- /* Byte 0 is level count, not a descriptor */
- for (j = 1 ; j < 16 ; j++) {
- unsigned char des = dp[j];
- unsigned char k = 0;
-
- /* look up this descriptor in the table */
- while (cache_table[k].descriptor != 0) {
- if (cache_table[k].descriptor == des) {
- switch (cache_table[k].cache_type) {
- case LVL_1_INST:
- l1i += cache_table[k].size;
- break;
- case LVL_1_DATA:
- l1d += cache_table[k].size;
- break;
- case LVL_2:
- l2 += cache_table[k].size;
- break;
- case LVL_3:
- l3 += cache_table[k].size;
- break;
- }
-
- break;
- }
-
- k++;
- }
- }
+ cpuid_leaf_0x2(&regs);
+ for_each_cpuid_0x2_desc(regs, ptr, desc) {
+ switch (desc->c_type) {
+ case CACHE_L1_INST: l1i += desc->c_size; break;
+ case CACHE_L1_DATA: l1d += desc->c_size; break;
+ case CACHE_L2: l2 += desc->c_size; break;
+ case CACHE_L3: l3 += desc->c_size; break;
}
}
- if (new_l1d)
- l1d = new_l1d;
+ intel_cacheinfo_done(c, l3, l2, l1i, l1d);
+}
- if (new_l1i)
- l1i = new_l1i;
+static unsigned int calc_cache_topo_id(struct cpuinfo_x86 *c, const struct _cpuid4_info *id4)
+{
+ unsigned int num_threads_sharing;
+ int index_msb;
- if (new_l2) {
- l2 = new_l2;
- c->topo.llc_id = l2_id;
- c->topo.l2c_id = l2_id;
- }
+ num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
+ index_msb = get_count_order(num_threads_sharing);
+ return c->topo.apicid & ~((1 << index_msb) - 1);
+}
- if (new_l3) {
- l3 = new_l3;
- c->topo.llc_id = l3_id;
- }
+static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
+{
+ struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
+ unsigned int l2_id = BAD_APICID, l3_id = BAD_APICID;
+ unsigned int l1d = 0, l1i = 0, l2 = 0, l3 = 0;
+
+ if (c->cpuid_level < 4)
+ return false;
/*
- * If llc_id is not yet set, this means cpuid_level < 4 which in
- * turns means that the only possibility is SMT (as indicated in
- * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
- * that SMT shares all caches, we can unconditionally set cpu_llc_id to
- * c->topo.pkg_id.
+ * There should be at least one leaf. A non-zero value means
+ * that the number of leaves has been previously initialized.
*/
- if (c->topo.llc_id == BAD_APICID)
- c->topo.llc_id = c->topo.pkg_id;
+ if (!ci->num_leaves)
+ ci->num_leaves = find_num_cache_leaves(c);
+
+ if (!ci->num_leaves)
+ return false;
- c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
+ for (int i = 0; i < ci->num_leaves; i++) {
+ struct _cpuid4_info id4 = {};
+ int ret;
- if (!l2)
- cpu_detect_cache_sizes(c);
+ ret = intel_fill_cpuid4_info(i, &id4);
+ if (ret < 0)
+ continue;
+
+ switch (id4.eax.split.level) {
+ case 1:
+ if (id4.eax.split.type == CTYPE_DATA)
+ l1d = id4.size / 1024;
+ else if (id4.eax.split.type == CTYPE_INST)
+ l1i = id4.size / 1024;
+ break;
+ case 2:
+ l2 = id4.size / 1024;
+ l2_id = calc_cache_topo_id(c, &id4);
+ break;
+ case 3:
+ l3 = id4.size / 1024;
+ l3_id = calc_cache_topo_id(c, &id4);
+ break;
+ default:
+ break;
+ }
+ }
+
+ c->topo.l2c_id = l2_id;
+ c->topo.llc_id = (l3_id == BAD_APICID) ? l2_id : l3_id;
+ intel_cacheinfo_done(c, l3, l2, l1i, l1d);
+ return true;
+}
+
+void init_intel_cacheinfo(struct cpuinfo_x86 *c)
+{
+ /* Don't use CPUID(0x2) if CPUID(0x4) is supported. */
+ if (intel_cacheinfo_0x4(c))
+ return;
+
+ intel_cacheinfo_0x2(c);
}
+/*
+ * <linux/cacheinfo.h> shared_cpu_map setup, AMD/Hygon
+ */
static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
- struct _cpuid4_info_regs *base)
+ const struct _cpuid4_info *id4)
{
struct cpu_cacheinfo *this_cpu_ci;
- struct cacheinfo *this_leaf;
+ struct cacheinfo *ci;
int i, sibling;
/*
@@ -880,18 +492,18 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
this_cpu_ci = get_cpu_cacheinfo(i);
if (!this_cpu_ci->info_list)
continue;
- this_leaf = this_cpu_ci->info_list + index;
+
+ ci = this_cpu_ci->info_list + index;
for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
if (!cpu_online(sibling))
continue;
- cpumask_set_cpu(sibling,
- &this_leaf->shared_cpu_map);
+ cpumask_set_cpu(sibling, &ci->shared_cpu_map);
}
}
} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
unsigned int apicid, nshared, first, last;
- nshared = base->eax.split.num_threads_sharing + 1;
+ nshared = id4->eax.split.num_threads_sharing + 1;
apicid = cpu_data(cpu).topo.apicid;
first = apicid - (apicid % nshared);
last = first + nshared - 1;
@@ -905,14 +517,13 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
if ((apicid < first) || (apicid > last))
continue;
- this_leaf = this_cpu_ci->info_list + index;
+ ci = this_cpu_ci->info_list + index;
for_each_online_cpu(sibling) {
apicid = cpu_data(sibling).topo.apicid;
if ((apicid < first) || (apicid > last))
continue;
- cpumask_set_cpu(sibling,
- &this_leaf->shared_cpu_map);
+ cpumask_set_cpu(sibling, &ci->shared_cpu_map);
}
}
} else
@@ -921,25 +532,27 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
return 1;
}
+/*
+ * <linux/cacheinfo.h> shared_cpu_map setup, Intel + fallback AMD/Hygon
+ */
static void __cache_cpumap_setup(unsigned int cpu, int index,
- struct _cpuid4_info_regs *base)
+ const struct _cpuid4_info *id4)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
- struct cacheinfo *this_leaf, *sibling_leaf;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ struct cacheinfo *ci, *sibling_ci;
unsigned long num_threads_sharing;
int index_msb, i;
- struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86_vendor == X86_VENDOR_AMD ||
- c->x86_vendor == X86_VENDOR_HYGON) {
- if (__cache_amd_cpumap_setup(cpu, index, base))
+ if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
+ if (__cache_amd_cpumap_setup(cpu, index, id4))
return;
}
- this_leaf = this_cpu_ci->info_list + index;
- num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
+ ci = this_cpu_ci->info_list + index;
+ num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
- cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+ cpumask_set_cpu(cpu, &ci->shared_cpu_map);
if (num_threads_sharing == 1)
return;
@@ -949,30 +562,29 @@ static void __cache_cpumap_setup(unsigned int cpu, int index,
if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) {
struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
+ /* Skip if itself or no cacheinfo */
if (i == cpu || !sib_cpu_ci->info_list)
- continue;/* skip if itself or no cacheinfo */
- sibling_leaf = sib_cpu_ci->info_list + index;
- cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
- cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
+ continue;
+
+ sibling_ci = sib_cpu_ci->info_list + index;
+ cpumask_set_cpu(i, &ci->shared_cpu_map);
+ cpumask_set_cpu(cpu, &sibling_ci->shared_cpu_map);
}
}
-static void ci_leaf_init(struct cacheinfo *this_leaf,
- struct _cpuid4_info_regs *base)
+static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info *id4,
+ struct amd_northbridge *nb)
{
- this_leaf->id = base->id;
- this_leaf->attributes = CACHE_ID;
- this_leaf->level = base->eax.split.level;
- this_leaf->type = cache_type_map[base->eax.split.type];
- this_leaf->coherency_line_size =
- base->ebx.split.coherency_line_size + 1;
- this_leaf->ways_of_associativity =
- base->ebx.split.ways_of_associativity + 1;
- this_leaf->size = base->size;
- this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
- this_leaf->physical_line_partition =
- base->ebx.split.physical_line_partition + 1;
- this_leaf->priv = base->nb;
+ ci->id = id4->id;
+ ci->attributes = CACHE_ID;
+ ci->level = id4->eax.split.level;
+ ci->type = cache_type_map[id4->eax.split.type];
+ ci->coherency_line_size = id4->ebx.split.coherency_line_size + 1;
+ ci->ways_of_associativity = id4->ebx.split.ways_of_associativity + 1;
+ ci->size = id4->size;
+ ci->number_of_sets = id4->ecx.split.number_of_sets + 1;
+ ci->physical_line_partition = id4->ebx.split.physical_line_partition + 1;
+ ci->priv = nb;
}
int init_cache_level(unsigned int cpu)
@@ -987,38 +599,45 @@ int init_cache_level(unsigned int cpu)
}
/*
- * The max shared threads number comes from CPUID.4:EAX[25-14] with input
+ * The max shared threads number comes from CPUID(0x4) EAX[25-14] with input
* ECX as cache index. Then right shift apicid by the number's order to get
* cache id for this cache node.
*/
-static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
+static void get_cache_id(int cpu, struct _cpuid4_info *id4)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
unsigned long num_threads_sharing;
int index_msb;
- num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
+ num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
index_msb = get_count_order(num_threads_sharing);
- id4_regs->id = c->topo.apicid >> index_msb;
+ id4->id = c->topo.apicid >> index_msb;
}
int populate_cache_leaves(unsigned int cpu)
{
- unsigned int idx, ret;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
- struct cacheinfo *this_leaf = this_cpu_ci->info_list;
- struct _cpuid4_info_regs id4_regs = {};
+ struct cacheinfo *ci = this_cpu_ci->info_list;
+ u8 cpu_vendor = boot_cpu_data.x86_vendor;
+ struct amd_northbridge *nb = NULL;
+ struct _cpuid4_info id4 = {};
+ int idx, ret;
for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
- ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
+ ret = fill_cpuid4_info(idx, &id4);
if (ret)
return ret;
- get_cache_id(cpu, &id4_regs);
- ci_leaf_init(this_leaf++, &id4_regs);
- __cache_cpumap_setup(cpu, idx, &id4_regs);
+
+ get_cache_id(cpu, &id4);
+
+ if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON)
+ nb = amd_init_l3_cache(idx);
+
+ ci_info_init(ci++, &id4, nb);
+ __cache_cpumap_setup(cpu, idx, &id4);
}
- this_cpu_ci->cpu_map_populated = true;
+ this_cpu_ci->cpu_map_populated = true;
return 0;
}
@@ -1034,31 +653,33 @@ int populate_cache_leaves(unsigned int cpu)
static unsigned long saved_cr4;
static DEFINE_RAW_SPINLOCK(cache_disable_lock);
+/*
+ * Cache flushing is the most time-consuming step when programming the
+ * MTRRs. On many Intel CPUs without known erratas, it can be skipped
+ * if the CPU declares cache self-snooping support.
+ */
+static void maybe_flush_caches(void)
+{
+ if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
+ wbinvd();
+}
+
void cache_disable(void) __acquires(cache_disable_lock)
{
unsigned long cr0;
/*
- * Note that this is not ideal
- * since the cache is only flushed/disabled for this CPU while the
- * MTRRs are changed, but changing this requires more invasive
- * changes to the way the kernel boots
+ * This is not ideal since the cache is only flushed/disabled
+ * for this CPU while the MTRRs are changed, but changing this
+ * requires more invasive changes to the way the kernel boots.
*/
-
raw_spin_lock(&cache_disable_lock);
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
cr0 = read_cr0() | X86_CR0_CD;
write_cr0(cr0);
- /*
- * Cache flushing is the most time-consuming step when programming
- * the MTRRs. Fortunately, as per the Intel Software Development
- * Manual, we can skip it if the processor supports cache self-
- * snooping.
- */
- if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
- wbinvd();
+ maybe_flush_caches();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_feature_enabled(X86_FEATURE_PGE)) {
@@ -1073,9 +694,7 @@ void cache_disable(void) __acquires(cache_disable_lock)
if (cpu_feature_enabled(X86_FEATURE_MTRR))
mtrr_disable();
- /* Again, only flush caches if we have to. */
- if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
- wbinvd();
+ maybe_flush_caches();
}
void cache_enable(void) __releases(cache_disable_lock)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 12126adbc3a9..8feb8fd2957a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -29,7 +29,7 @@
#include <asm/alternative.h>
#include <asm/cmdline.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/doublefault.h>
@@ -148,7 +148,7 @@ static void ppin_init(struct cpuinfo_x86 *c)
*/
info = (struct ppin_info *)id->driver_data;
- if (rdmsrl_safe(info->msr_ppin_ctl, &val))
+ if (rdmsrq_safe(info->msr_ppin_ctl, &val))
goto clear_ppin;
if ((val & 3UL) == 1UL) {
@@ -158,13 +158,13 @@ static void ppin_init(struct cpuinfo_x86 *c)
/* If PPIN is disabled, try to enable */
if (!(val & 2UL)) {
- wrmsrl_safe(info->msr_ppin_ctl, val | 2UL);
- rdmsrl_safe(info->msr_ppin_ctl, &val);
+ wrmsrq_safe(info->msr_ppin_ctl, val | 2UL);
+ rdmsrq_safe(info->msr_ppin_ctl, &val);
}
/* Is the enable bit set? */
if (val & 2UL) {
- c->ppin = __rdmsr(info->msr_ppin);
+ c->ppin = native_rdmsrq(info->msr_ppin);
set_cpu_cap(c, info->feature);
return;
}
@@ -242,6 +242,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#endif
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
+SYM_PIC_ALIAS(gdt_page);
#ifdef CONFIG_X86_64
static int __init x86_nopcid_setup(char *s)
@@ -321,7 +322,7 @@ static int __init cachesize_setup(char *str)
__setup("cachesize=", cachesize_setup);
/* Probe for the CPUID instruction */
-bool have_cpuid_p(void)
+bool cpuid_feature(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
@@ -562,9 +563,9 @@ __noendbr u64 ibt_save(bool disable)
u64 msr = 0;
if (cpu_feature_enabled(X86_FEATURE_IBT)) {
- rdmsrl(MSR_IA32_S_CET, msr);
+ rdmsrq(MSR_IA32_S_CET, msr);
if (disable)
- wrmsrl(MSR_IA32_S_CET, msr & ~CET_ENDBR_EN);
+ wrmsrq(MSR_IA32_S_CET, msr & ~CET_ENDBR_EN);
}
return msr;
@@ -575,10 +576,10 @@ __noendbr void ibt_restore(u64 save)
u64 msr;
if (cpu_feature_enabled(X86_FEATURE_IBT)) {
- rdmsrl(MSR_IA32_S_CET, msr);
+ rdmsrq(MSR_IA32_S_CET, msr);
msr &= ~CET_ENDBR_EN;
msr |= (save & CET_ENDBR_EN);
- wrmsrl(MSR_IA32_S_CET, msr);
+ wrmsrq(MSR_IA32_S_CET, msr);
}
}
@@ -602,15 +603,15 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_USER_SHSTK);
if (kernel_ibt)
- wrmsrl(MSR_IA32_S_CET, CET_ENDBR_EN);
+ wrmsrq(MSR_IA32_S_CET, CET_ENDBR_EN);
else
- wrmsrl(MSR_IA32_S_CET, 0);
+ wrmsrq(MSR_IA32_S_CET, 0);
cr4_set_bits(X86_CR4_CET);
if (kernel_ibt && ibt_selftest()) {
pr_err("IBT selftest: Failed!\n");
- wrmsrl(MSR_IA32_S_CET, 0);
+ wrmsrq(MSR_IA32_S_CET, 0);
setup_clear_cpu_cap(X86_FEATURE_IBT);
}
}
@@ -621,8 +622,8 @@ __noendbr void cet_disable(void)
cpu_feature_enabled(X86_FEATURE_SHSTK)))
return;
- wrmsrl(MSR_IA32_S_CET, 0);
- wrmsrl(MSR_IA32_U_CET, 0);
+ wrmsrq(MSR_IA32_S_CET, 0);
+ wrmsrq(MSR_IA32_U_CET, 0);
}
/*
@@ -751,9 +752,9 @@ void __init switch_gdt_and_percpu_base(int cpu)
* No need to load %gs. It is already correct.
*
* Writing %gs on 64bit would zero GSBASE which would make any per
- * CPU operation up to the point of the wrmsrl() fault.
+ * CPU operation up to the point of the wrmsrq() fault.
*
- * Set GSBASE to the new offset. Until the wrmsrl() happens the
+ * Set GSBASE to the new offset. Until the wrmsrq() happens the
* early mapping is still valid. That means the GSBASE update will
* lose any prior per CPU data which was not copied over in
* setup_per_cpu_areas().
@@ -761,7 +762,7 @@ void __init switch_gdt_and_percpu_base(int cpu)
* This works even with stackprotector enabled because the
* per CPU stack canary is 0 in both per CPU areas.
*/
- wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
+ wrmsrq(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
#else
/*
* %fs is already set to __KERNEL_PERCPU, but after switching GDT
@@ -1005,17 +1006,18 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_D_1_EAX] = eax;
}
- /* AMD-defined flags: level 0x80000001 */
+ /*
+ * Check if extended CPUID leaves are implemented: Max extended
+ * CPUID leaf must be in the 0x80000001-0x8000ffff range.
+ */
eax = cpuid_eax(0x80000000);
- c->extended_cpuid_level = eax;
+ c->extended_cpuid_level = ((eax & 0xffff0000) == 0x80000000) ? eax : 0;
- if ((eax & 0xffff0000) == 0x80000000) {
- if (eax >= 0x80000001) {
- cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+ if (c->extended_cpuid_level >= 0x80000001) {
+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
- c->x86_capability[CPUID_8000_0001_ECX] = ecx;
- c->x86_capability[CPUID_8000_0001_EDX] = edx;
- }
+ c->x86_capability[CPUID_8000_0001_ECX] = ecx;
+ c->x86_capability[CPUID_8000_0001_EDX] = edx;
}
if (c->extended_cpuid_level >= 0x80000007) {
@@ -1227,6 +1229,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define GDS BIT(6)
/* CPU is affected by Register File Data Sampling */
#define RFDS BIT(7)
+/* CPU is affected by Indirect Target Selection */
+#define ITS BIT(8)
+/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */
+#define ITS_NATIVE_ONLY BIT(9)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS),
@@ -1238,22 +1244,25 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS),
VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO),
VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS),
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED),
- VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS),
- VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS),
- VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS),
+ VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
+ VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS),
VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS),
VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS),
@@ -1288,7 +1297,7 @@ u64 x86_read_arch_cap_msr(void)
u64 x86_arch_cap_msr = 0;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
+ rdmsrq(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
return x86_arch_cap_msr;
}
@@ -1318,10 +1327,78 @@ static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
return cpu_matches(cpu_vuln_blacklist, RFDS);
}
+static bool __init vulnerable_to_its(u64 x86_arch_cap_msr)
+{
+ /* The "immunity" bit trumps everything else: */
+ if (x86_arch_cap_msr & ARCH_CAP_ITS_NO)
+ return false;
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return false;
+
+ /* None of the affected CPUs have BHI_CTRL */
+ if (boot_cpu_has(X86_FEATURE_BHI_CTRL))
+ return false;
+
+ /*
+ * If a VMM did not expose ITS_NO, assume that a guest could
+ * be running on a vulnerable hardware or may migrate to such
+ * hardware.
+ */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return true;
+
+ if (cpu_matches(cpu_vuln_blacklist, ITS))
+ return true;
+
+ return false;
+}
+
+static struct x86_cpu_id cpu_latest_microcode[] = {
+#include "microcode/intel-ucode-defs.h"
+ {}
+};
+
+static bool __init cpu_has_old_microcode(void)
+{
+ const struct x86_cpu_id *m = x86_match_cpu(cpu_latest_microcode);
+
+ /* Give unknown CPUs a pass: */
+ if (!m) {
+ /* Intel CPUs should be in the list. Warn if not: */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ pr_info("x86/CPU: Model not found in latest microcode list\n");
+ return false;
+ }
+
+ /*
+ * Hosts usually lie to guests with a super high microcode
+ * version. Just ignore what hosts tell guests:
+ */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return false;
+
+ /* Consider all debug microcode to be old: */
+ if (boot_cpu_data.microcode & BIT(31))
+ return true;
+
+ /* Give new microcode a pass: */
+ if (boot_cpu_data.microcode >= m->driver_data)
+ return false;
+
+ /* Uh oh, too old: */
+ return true;
+}
+
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
+ if (cpu_has_old_microcode()) {
+ pr_warn("x86/CPU: Running old microcode\n");
+ setup_force_cpu_bug(X86_BUG_OLD_MICROCODE);
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ }
+
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
!(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
@@ -1402,15 +1479,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Affected CPU list is generally enough to enumerate the vulnerability,
* but for virtualization case check for ARCH_CAP MSR bits also, VMM may
* not want the guest to enumerate the bug.
- *
- * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
- * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
*/
if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
if (cpu_matches(cpu_vuln_blacklist, MMIO))
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
- else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
- setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
}
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
@@ -1439,9 +1511,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);
- /* When virtualized, eIBRS could be hidden, assume vulnerable */
- if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
- !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ /*
+ * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with
+ * BHI_NO still need to use the BHI mitigation to prevent Intra-mode
+ * attacks. When virtualized, eIBRS could be hidden, assume vulnerable.
+ */
+ if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
(boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
setup_force_cpu_bug(X86_BUG_BHI);
@@ -1449,6 +1524,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET))
setup_force_cpu_bug(X86_BUG_IBPB_NO_RET);
+ if (vulnerable_to_its(x86_arch_cap_msr)) {
+ setup_force_cpu_bug(X86_BUG_ITS);
+ if (cpu_matches(cpu_vuln_blacklist, ITS_NATIVE_ONLY))
+ setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY);
+ }
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -1630,11 +1711,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
memset(&c->x86_capability, 0, sizeof(c->x86_capability));
c->extended_cpuid_level = 0;
- if (!have_cpuid_p())
+ if (!cpuid_feature())
identify_cpu_without_cpuid(c);
/* cyrix could have cpuid enabled via c_identify()*/
- if (have_cpuid_p()) {
+ if (cpuid_feature()) {
cpu_detect(c);
get_cpu_vendor(c);
intel_unlock_cpuid_leafs(c);
@@ -1749,11 +1830,11 @@ static bool detect_null_seg_behavior(void)
*/
unsigned long old_base, tmp;
- rdmsrl(MSR_FS_BASE, old_base);
- wrmsrl(MSR_FS_BASE, 1);
+ rdmsrq(MSR_FS_BASE, old_base);
+ wrmsrq(MSR_FS_BASE, 1);
loadsegment(fs, 0);
- rdmsrl(MSR_FS_BASE, tmp);
- wrmsrl(MSR_FS_BASE, old_base);
+ rdmsrq(MSR_FS_BASE, tmp);
+ wrmsrq(MSR_FS_BASE, old_base);
return tmp == 0;
}
@@ -1794,11 +1875,11 @@ static void generic_identify(struct cpuinfo_x86 *c)
{
c->extended_cpuid_level = 0;
- if (!have_cpuid_p())
+ if (!cpuid_feature())
identify_cpu_without_cpuid(c);
/* cyrix could have cpuid enabled via c_identify()*/
- if (!have_cpuid_p())
+ if (!cpuid_feature())
return;
cpu_detect(c);
@@ -1982,9 +2063,9 @@ void enable_sep_cpu(void)
*/
tss->x86_tss.ss1 = __KERNEL_CS;
- wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
- wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
- wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
+ wrmsrq(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1);
+ wrmsrq(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
+ wrmsrq(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32);
put_cpu();
}
@@ -2091,7 +2172,7 @@ DEFINE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_
DEFINE_PER_CPU_CACHE_HOT(u64, __x86_call_depth);
EXPORT_PER_CPU_SYMBOL(__x86_call_depth);
-static void wrmsrl_cstar(unsigned long val)
+static void wrmsrq_cstar(unsigned long val)
{
/*
* Intel CPUs do not support 32-bit SYSCALL. Writing to MSR_CSTAR
@@ -2099,37 +2180,37 @@ static void wrmsrl_cstar(unsigned long val)
* guest. Avoid the pointless write on all Intel CPUs.
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
- wrmsrl(MSR_CSTAR, val);
+ wrmsrq(MSR_CSTAR, val);
}
static inline void idt_syscall_init(void)
{
- wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+ wrmsrq(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
if (ia32_enabled()) {
- wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
+ wrmsrq_cstar((unsigned long)entry_SYSCALL_compat);
/*
* This only works on Intel CPUs.
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
* This does not cause SYSENTER to jump to the wrong location, because
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
*/
- wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
+ wrmsrq_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+ wrmsrq_safe(MSR_IA32_SYSENTER_ESP,
(unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
- wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+ wrmsrq_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
} else {
- wrmsrl_cstar((unsigned long)entry_SYSCALL32_ignore);
- wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
- wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
+ wrmsrq_cstar((unsigned long)entry_SYSCALL32_ignore);
+ wrmsrq_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
+ wrmsrq_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+ wrmsrq_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
}
/*
* Flags to clear on syscall; clear as much as possible
* to minimize user space-kernel interference.
*/
- wrmsrl(MSR_SYSCALL_MASK,
+ wrmsrq(MSR_SYSCALL_MASK,
X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_TF|
X86_EFLAGS_IF|X86_EFLAGS_DF|X86_EFLAGS_OF|
@@ -2198,7 +2279,7 @@ static inline void setup_getcpu(int cpu)
struct desc_struct d = { };
if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID))
- wrmsr(MSR_TSC_AUX, cpudata, 0);
+ wrmsrq(MSR_TSC_AUX, cpudata);
/* Store CPU and node number in limit. */
d.limit0 = cpudata;
@@ -2313,8 +2394,8 @@ void cpu_init(void)
memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
syscall_init();
- wrmsrl(MSR_FS_BASE, 0);
- wrmsrl(MSR_KERNEL_GS_BASE, 0);
+ wrmsrq(MSR_FS_BASE, 0);
+ wrmsrq(MSR_KERNEL_GS_BASE, 0);
barrier();
x2apic_setup();
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 51deb60a9d26..bc38b2d56f26 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -75,6 +75,15 @@ extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);
void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id);
void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c);
+#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
+struct amd_northbridge *amd_init_l3_cache(int index);
+#else
+static inline struct amd_northbridge *amd_init_l3_cache(int index)
+{
+ return NULL;
+}
+#endif
+
unsigned int aperfmperf_get_khz(int cpu);
void cpu_select_mitigations(void);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index a2fbea0be535..46efcbd6afa4 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -28,6 +28,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_PKU, X86_FEATURE_XSAVE },
{ X86_FEATURE_MPX, X86_FEATURE_XSAVE },
{ X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
+ { X86_FEATURE_APX, X86_FEATURE_XSAVE },
{ X86_FEATURE_CMOV, X86_FEATURE_FXSR },
{ X86_FEATURE_MMX, X86_FEATURE_FXSR },
{ X86_FEATURE_MMXEXT, X86_FEATURE_MMX },
@@ -82,8 +83,12 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_XFD, X86_FEATURE_XSAVES },
{ X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
+ { X86_FEATURE_AMX_FP16, X86_FEATURE_AMX_TILE },
+ { X86_FEATURE_AMX_BF16, X86_FEATURE_AMX_TILE },
+ { X86_FEATURE_AMX_INT8, X86_FEATURE_AMX_TILE },
{ X86_FEATURE_SHSTK, X86_FEATURE_XSAVES },
{ X86_FEATURE_FRED, X86_FEATURE_LKGS },
+ { X86_FEATURE_SPEC_CTRL_SSBD, X86_FEATURE_SPEC_CTRL },
{}
};
diff --git a/arch/x86/kernel/cpu/cpuid_0x2_table.c b/arch/x86/kernel/cpu/cpuid_0x2_table.c
new file mode 100644
index 000000000000..89bc8db5e9c6
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpuid_0x2_table.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/sizes.h>
+
+#include <asm/cpuid/types.h>
+
+#include "cpu.h"
+
+#define CACHE_ENTRY(_desc, _type, _size) \
+ [_desc] = { \
+ .c_type = (_type), \
+ .c_size = (_size) / SZ_1K, \
+ }
+
+#define TLB_ENTRY(_desc, _type, _entries) \
+ [_desc] = { \
+ .t_type = (_type), \
+ .entries = (_entries), \
+ }
+
+const struct leaf_0x2_table cpuid_0x2_table[256] = {
+ CACHE_ENTRY(0x06, CACHE_L1_INST, SZ_8K ), /* 4-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x08, CACHE_L1_INST, SZ_16K ), /* 4-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x09, CACHE_L1_INST, SZ_32K ), /* 4-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x0a, CACHE_L1_DATA, SZ_8K ), /* 2 way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x0c, CACHE_L1_DATA, SZ_16K ), /* 4-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x0d, CACHE_L1_DATA, SZ_16K ), /* 4-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x0e, CACHE_L1_DATA, SZ_24K ), /* 6-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x21, CACHE_L2, SZ_256K ), /* 8-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x22, CACHE_L3, SZ_512K ), /* 4-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x23, CACHE_L3, SZ_1M ), /* 8-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x25, CACHE_L3, SZ_2M ), /* 8-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x29, CACHE_L3, SZ_4M ), /* 8-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x2c, CACHE_L1_DATA, SZ_32K ), /* 8-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x30, CACHE_L1_INST, SZ_32K ), /* 8-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x39, CACHE_L2, SZ_128K ), /* 4-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x3a, CACHE_L2, SZ_192K ), /* 6-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x3b, CACHE_L2, SZ_128K ), /* 2-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x3c, CACHE_L2, SZ_256K ), /* 4-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x3d, CACHE_L2, SZ_384K ), /* 6-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x3e, CACHE_L2, SZ_512K ), /* 4-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x3f, CACHE_L2, SZ_256K ), /* 2-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x41, CACHE_L2, SZ_128K ), /* 4-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x42, CACHE_L2, SZ_256K ), /* 4-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x43, CACHE_L2, SZ_512K ), /* 4-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x44, CACHE_L2, SZ_1M ), /* 4-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x45, CACHE_L2, SZ_2M ), /* 4-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x46, CACHE_L3, SZ_4M ), /* 4-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x47, CACHE_L3, SZ_8M ), /* 8-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x48, CACHE_L2, SZ_3M ), /* 12-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x49, CACHE_L3, SZ_4M ), /* 16-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x4a, CACHE_L3, SZ_6M ), /* 12-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x4b, CACHE_L3, SZ_8M ), /* 16-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x4c, CACHE_L3, SZ_12M ), /* 12-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x4d, CACHE_L3, SZ_16M ), /* 16-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x4e, CACHE_L2, SZ_6M ), /* 24-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x60, CACHE_L1_DATA, SZ_16K ), /* 8-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x66, CACHE_L1_DATA, SZ_8K ), /* 4-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x67, CACHE_L1_DATA, SZ_16K ), /* 4-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x68, CACHE_L1_DATA, SZ_32K ), /* 4-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x78, CACHE_L2, SZ_1M ), /* 4-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x79, CACHE_L2, SZ_128K ), /* 8-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x7a, CACHE_L2, SZ_256K ), /* 8-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x7b, CACHE_L2, SZ_512K ), /* 8-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x7c, CACHE_L2, SZ_1M ), /* 8-way set assoc, sectored cache, 64 byte line size */
+ CACHE_ENTRY(0x7d, CACHE_L2, SZ_2M ), /* 8-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x7f, CACHE_L2, SZ_512K ), /* 2-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x80, CACHE_L2, SZ_512K ), /* 8-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x82, CACHE_L2, SZ_256K ), /* 8-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x83, CACHE_L2, SZ_512K ), /* 8-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x84, CACHE_L2, SZ_1M ), /* 8-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x85, CACHE_L2, SZ_2M ), /* 8-way set assoc, 32 byte line size */
+ CACHE_ENTRY(0x86, CACHE_L2, SZ_512K ), /* 4-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0x87, CACHE_L2, SZ_1M ), /* 8-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xd0, CACHE_L3, SZ_512K ), /* 4-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xd1, CACHE_L3, SZ_1M ), /* 4-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xd2, CACHE_L3, SZ_2M ), /* 4-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xd6, CACHE_L3, SZ_1M ), /* 8-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xd7, CACHE_L3, SZ_2M ), /* 8-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xd8, CACHE_L3, SZ_4M ), /* 12-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xdc, CACHE_L3, SZ_2M ), /* 12-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xdd, CACHE_L3, SZ_4M ), /* 12-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xde, CACHE_L3, SZ_8M ), /* 12-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xe2, CACHE_L3, SZ_2M ), /* 16-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xe3, CACHE_L3, SZ_4M ), /* 16-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xe4, CACHE_L3, SZ_8M ), /* 16-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xea, CACHE_L3, SZ_12M ), /* 24-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xeb, CACHE_L3, SZ_18M ), /* 24-way set assoc, 64 byte line size */
+ CACHE_ENTRY(0xec, CACHE_L3, SZ_24M ), /* 24-way set assoc, 64 byte line size */
+
+ TLB_ENTRY( 0x01, TLB_INST_4K, 32 ), /* TLB_INST 4 KByte pages, 4-way set associative */
+ TLB_ENTRY( 0x02, TLB_INST_4M, 2 ), /* TLB_INST 4 MByte pages, full associative */
+ TLB_ENTRY( 0x03, TLB_DATA_4K, 64 ), /* TLB_DATA 4 KByte pages, 4-way set associative */
+ TLB_ENTRY( 0x04, TLB_DATA_4M, 8 ), /* TLB_DATA 4 MByte pages, 4-way set associative */
+ TLB_ENTRY( 0x05, TLB_DATA_4M, 32 ), /* TLB_DATA 4 MByte pages, 4-way set associative */
+ TLB_ENTRY( 0x0b, TLB_INST_4M, 4 ), /* TLB_INST 4 MByte pages, 4-way set associative */
+ TLB_ENTRY( 0x4f, TLB_INST_4K, 32 ), /* TLB_INST 4 KByte pages */
+ TLB_ENTRY( 0x50, TLB_INST_ALL, 64 ), /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
+ TLB_ENTRY( 0x51, TLB_INST_ALL, 128 ), /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
+ TLB_ENTRY( 0x52, TLB_INST_ALL, 256 ), /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
+ TLB_ENTRY( 0x55, TLB_INST_2M_4M, 7 ), /* TLB_INST 2-MByte or 4-MByte pages, fully associative */
+ TLB_ENTRY( 0x56, TLB_DATA0_4M, 16 ), /* TLB_DATA0 4 MByte pages, 4-way set associative */
+ TLB_ENTRY( 0x57, TLB_DATA0_4K, 16 ), /* TLB_DATA0 4 KByte pages, 4-way associative */
+ TLB_ENTRY( 0x59, TLB_DATA0_4K, 16 ), /* TLB_DATA0 4 KByte pages, fully associative */
+ TLB_ENTRY( 0x5a, TLB_DATA0_2M_4M, 32 ), /* TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative */
+ TLB_ENTRY( 0x5b, TLB_DATA_4K_4M, 64 ), /* TLB_DATA 4 KByte and 4 MByte pages */
+ TLB_ENTRY( 0x5c, TLB_DATA_4K_4M, 128 ), /* TLB_DATA 4 KByte and 4 MByte pages */
+ TLB_ENTRY( 0x5d, TLB_DATA_4K_4M, 256 ), /* TLB_DATA 4 KByte and 4 MByte pages */
+ TLB_ENTRY( 0x61, TLB_INST_4K, 48 ), /* TLB_INST 4 KByte pages, full associative */
+ TLB_ENTRY( 0x63, TLB_DATA_1G_2M_4M, 4 ), /* TLB_DATA 1 GByte pages, 4-way set associative
+ * (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here) */
+ TLB_ENTRY( 0x6b, TLB_DATA_4K, 256 ), /* TLB_DATA 4 KByte pages, 8-way associative */
+ TLB_ENTRY( 0x6c, TLB_DATA_2M_4M, 128 ), /* TLB_DATA 2 MByte or 4 MByte pages, 8-way associative */
+ TLB_ENTRY( 0x6d, TLB_DATA_1G, 16 ), /* TLB_DATA 1 GByte pages, fully associative */
+ TLB_ENTRY( 0x76, TLB_INST_2M_4M, 8 ), /* TLB_INST 2-MByte or 4-MByte pages, fully associative */
+ TLB_ENTRY( 0xb0, TLB_INST_4K, 128 ), /* TLB_INST 4 KByte pages, 4-way set associative */
+ TLB_ENTRY( 0xb1, TLB_INST_2M_4M, 4 ), /* TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries */
+ TLB_ENTRY( 0xb2, TLB_INST_4K, 64 ), /* TLB_INST 4KByte pages, 4-way set associative */
+ TLB_ENTRY( 0xb3, TLB_DATA_4K, 128 ), /* TLB_DATA 4 KByte pages, 4-way set associative */
+ TLB_ENTRY( 0xb4, TLB_DATA_4K, 256 ), /* TLB_DATA 4 KByte pages, 4-way associative */
+ TLB_ENTRY( 0xb5, TLB_INST_4K, 64 ), /* TLB_INST 4 KByte pages, 8-way set associative */
+ TLB_ENTRY( 0xb6, TLB_INST_4K, 128 ), /* TLB_INST 4 KByte pages, 8-way set associative */
+ TLB_ENTRY( 0xba, TLB_DATA_4K, 64 ), /* TLB_DATA 4 KByte pages, 4-way associative */
+ TLB_ENTRY( 0xc0, TLB_DATA_4K_4M, 8 ), /* TLB_DATA 4 KByte and 4 MByte pages, 4-way associative */
+ TLB_ENTRY( 0xc1, STLB_4K_2M, 1024 ), /* STLB 4 KByte and 2 MByte pages, 8-way associative */
+ TLB_ENTRY( 0xc2, TLB_DATA_2M_4M, 16 ), /* TLB_DATA 2 MByte/4MByte pages, 4-way associative */
+ TLB_ENTRY( 0xca, STLB_4K, 512 ), /* STLB 4 KByte pages, 4-way associative */
+};
diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c
index 4a4118784c13..d69757246bde 100644
--- a/arch/x86/kernel/cpu/feat_ctl.c
+++ b/arch/x86/kernel/cpu/feat_ctl.c
@@ -4,6 +4,7 @@
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/msr-index.h>
+#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/vmx.h>
@@ -118,7 +119,7 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
bool enable_vmx;
u64 msr;
- if (rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr)) {
+ if (rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr)) {
clear_cpu_cap(c, X86_FEATURE_VMX);
clear_cpu_cap(c, X86_FEATURE_SGX);
return;
@@ -165,7 +166,7 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
msr |= FEAT_CTL_SGX_LC_ENABLED;
}
- wrmsrl(MSR_IA32_FEAT_CTL, msr);
+ wrmsrq(MSR_IA32_FEAT_CTL, msr);
update_caps:
set_cpu_cap(c, X86_FEATURE_MSR_IA32_FEAT_CTL);
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 6af4a4a90a52..2154f12766fb 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -15,6 +15,7 @@
#include <asm/cacheinfo.h>
#include <asm/spec-ctrl.h>
#include <asm/delay.h>
+#include <asm/msr.h>
#include "cpu.h"
@@ -96,7 +97,7 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
u64 val;
- rdmsrl(MSR_K7_HWCR, val);
+ rdmsrq(MSR_K7_HWCR, val);
if (!(val & BIT(24)))
pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
}
@@ -110,7 +111,7 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c)
* Try to cache the base value so further operations can
* avoid RMW. If that faults, do not enable SSBD.
*/
- if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+ if (!rdmsrq_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
setup_force_cpu_cap(X86_FEATURE_SSBD);
x86_amd_ls_cfg_ssbd_mask = 1ULL << 10;
@@ -194,7 +195,7 @@ static void init_hygon(struct cpuinfo_x86 *c)
init_hygon_cacheinfo(c);
if (cpu_has(c, X86_FEATURE_SVM)) {
- rdmsrl(MSR_VM_CR, vm_cr);
+ rdmsrq(MSR_VM_CR, vm_cr);
if (vm_cr & SVM_VM_CR_SVM_DIS_MASK) {
pr_notice_once("SVM disabled (by BIOS) in MSR_VM_CR\n");
clear_cpu_cap(c, X86_FEATURE_SVM);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cdc9813871ef..076eaa41b8c8 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -6,6 +6,7 @@
#include <linux/minmax.h>
#include <linux/smp.h>
#include <linux/string.h>
+#include <linux/types.h>
#ifdef CONFIG_X86_64
#include <linux/topology.h>
@@ -15,6 +16,7 @@
#include <asm/cpu_device_id.h>
#include <asm/cpufeature.h>
#include <asm/cpu.h>
+#include <asm/cpuid/api.h>
#include <asm/hwcap2.h>
#include <asm/intel-family.h>
#include <asm/microcode.h>
@@ -157,7 +159,7 @@ static void detect_tme_early(struct cpuinfo_x86 *c)
u64 tme_activate;
int keyid_bits;
- rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
+ rdmsrq(MSR_IA32_TME_ACTIVATE, tme_activate);
if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
pr_info_once("x86/tme: not enabled by BIOS\n");
@@ -299,7 +301,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
* string flag and enhanced fast string capabilities accordingly.
*/
if (c->x86_vfm >= INTEL_PENTIUM_M_DOTHAN) {
- rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
+ rdmsrq(MSR_IA32_MISC_ENABLE, misc_enable);
if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
/* X86_FEATURE_ERMS is set based on CPUID */
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
@@ -488,7 +490,7 @@ static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
- if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
+ if (!rdmsrq_safe(MSR_PLATFORM_INFO, &msr)) {
if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
}
@@ -498,7 +500,7 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
{
u64 msr;
- if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
+ if (rdmsrq_safe(MSR_MISC_FEATURES_ENABLES, &msr))
return;
/* Clear all MISC features */
@@ -509,7 +511,7 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
probe_xeon_phi_r3mwait(c);
msr = this_cpu_read(msr_misc_features_shadow);
- wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
+ wrmsrq(MSR_MISC_FEATURES_ENABLES, msr);
}
/*
@@ -646,103 +648,11 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
}
#endif
-#define TLB_INST_4K 0x01
-#define TLB_INST_4M 0x02
-#define TLB_INST_2M_4M 0x03
-
-#define TLB_INST_ALL 0x05
-#define TLB_INST_1G 0x06
-
-#define TLB_DATA_4K 0x11
-#define TLB_DATA_4M 0x12
-#define TLB_DATA_2M_4M 0x13
-#define TLB_DATA_4K_4M 0x14
-
-#define TLB_DATA_1G 0x16
-#define TLB_DATA_1G_2M_4M 0x17
-
-#define TLB_DATA0_4K 0x21
-#define TLB_DATA0_4M 0x22
-#define TLB_DATA0_2M_4M 0x23
-
-#define STLB_4K 0x41
-#define STLB_4K_2M 0x42
-
-/*
- * All of leaf 0x2's one-byte TLB descriptors implies the same number of
- * entries for their respective TLB types. The 0x63 descriptor is an
- * exception: it implies 4 dTLB entries for 1GB pages 32 dTLB entries
- * for 2MB or 4MB pages. Encode descriptor 0x63 dTLB entry count for
- * 2MB/4MB pages here, as its count for dTLB 1GB pages is already at the
- * intel_tlb_table[] mapping.
- */
-#define TLB_0x63_2M_4M_ENTRIES 32
-
-struct _tlb_table {
- unsigned char descriptor;
- char tlb_type;
- unsigned int entries;
-};
-
-static const struct _tlb_table intel_tlb_table[] = {
- { 0x01, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages, 4-way set associative */
- { 0x02, TLB_INST_4M, 2}, /* TLB_INST 4 MByte pages, full associative */
- { 0x03, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way set associative */
- { 0x04, TLB_DATA_4M, 8}, /* TLB_DATA 4 MByte pages, 4-way set associative */
- { 0x05, TLB_DATA_4M, 32}, /* TLB_DATA 4 MByte pages, 4-way set associative */
- { 0x0b, TLB_INST_4M, 4}, /* TLB_INST 4 MByte pages, 4-way set associative */
- { 0x4f, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages */
- { 0x50, TLB_INST_ALL, 64}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
- { 0x51, TLB_INST_ALL, 128}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
- { 0x52, TLB_INST_ALL, 256}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
- { 0x55, TLB_INST_2M_4M, 7}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */
- { 0x56, TLB_DATA0_4M, 16}, /* TLB_DATA0 4 MByte pages, 4-way set associative */
- { 0x57, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, 4-way associative */
- { 0x59, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, fully associative */
- { 0x5a, TLB_DATA0_2M_4M, 32}, /* TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative */
- { 0x5b, TLB_DATA_4K_4M, 64}, /* TLB_DATA 4 KByte and 4 MByte pages */
- { 0x5c, TLB_DATA_4K_4M, 128}, /* TLB_DATA 4 KByte and 4 MByte pages */
- { 0x5d, TLB_DATA_4K_4M, 256}, /* TLB_DATA 4 KByte and 4 MByte pages */
- { 0x61, TLB_INST_4K, 48}, /* TLB_INST 4 KByte pages, full associative */
- { 0x63, TLB_DATA_1G_2M_4M, 4}, /* TLB_DATA 1 GByte pages, 4-way set associative
- * (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here) */
- { 0x6b, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 8-way associative */
- { 0x6c, TLB_DATA_2M_4M, 128}, /* TLB_DATA 2 MByte or 4 MByte pages, 8-way associative */
- { 0x6d, TLB_DATA_1G, 16}, /* TLB_DATA 1 GByte pages, fully associative */
- { 0x76, TLB_INST_2M_4M, 8}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */
- { 0xb0, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 4-way set associative */
- { 0xb1, TLB_INST_2M_4M, 4}, /* TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries */
- { 0xb2, TLB_INST_4K, 64}, /* TLB_INST 4KByte pages, 4-way set associative */
- { 0xb3, TLB_DATA_4K, 128}, /* TLB_DATA 4 KByte pages, 4-way set associative */
- { 0xb4, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 4-way associative */
- { 0xb5, TLB_INST_4K, 64}, /* TLB_INST 4 KByte pages, 8-way set associative */
- { 0xb6, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 8-way set associative */
- { 0xba, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way associative */
- { 0xc0, TLB_DATA_4K_4M, 8}, /* TLB_DATA 4 KByte and 4 MByte pages, 4-way associative */
- { 0xc1, STLB_4K_2M, 1024}, /* STLB 4 KByte and 2 MByte pages, 8-way associative */
- { 0xc2, TLB_DATA_2M_4M, 16}, /* TLB_DATA 2 MByte/4MByte pages, 4-way associative */
- { 0xca, STLB_4K, 512}, /* STLB 4 KByte pages, 4-way associative */
- { 0x00, 0, 0 }
-};
-
-static void intel_tlb_lookup(const unsigned char desc)
+static void intel_tlb_lookup(const struct leaf_0x2_table *desc)
{
- unsigned int entries;
- unsigned char k;
-
- if (desc == 0)
- return;
-
- /* look up this descriptor in the table */
- for (k = 0; intel_tlb_table[k].descriptor != desc &&
- intel_tlb_table[k].descriptor != 0; k++)
- ;
+ short entries = desc->entries;
- if (intel_tlb_table[k].tlb_type == 0)
- return;
-
- entries = intel_tlb_table[k].entries;
- switch (intel_tlb_table[k].tlb_type) {
+ switch (desc->t_type) {
case STLB_4K:
tlb_lli_4k = max(tlb_lli_4k, entries);
tlb_lld_4k = max(tlb_lld_4k, entries);
@@ -799,28 +709,16 @@ static void intel_tlb_lookup(const unsigned char desc)
static void intel_detect_tlb(struct cpuinfo_x86 *c)
{
- int i, j, n;
- unsigned int regs[4];
- unsigned char *desc = (unsigned char *)regs;
+ const struct leaf_0x2_table *desc;
+ union leaf_0x2_regs regs;
+ u8 *ptr;
if (c->cpuid_level < 2)
return;
- /* Number of times to iterate */
- n = cpuid_eax(2) & 0xFF;
-
- for (i = 0 ; i < n ; i++) {
- cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
-
- /* If bit 31 is set, this is an unknown format */
- for (j = 0 ; j < 4 ; j++)
- if (regs[j] & (1 << 31))
- regs[j] = 0;
-
- /* Byte 0 is level count, not a descriptor */
- for (j = 1 ; j < 16 ; j++)
- intel_tlb_lookup(desc[j]);
- }
+ cpuid_leaf_0x2(&regs);
+ for_each_cpuid_0x2_desc(regs, ptr, desc)
+ intel_tlb_lookup(desc);
}
static const struct cpu_dev intel_cpu_dev = {
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
index 30b1d63b97f3..bc7671f920a7 100644
--- a/arch/x86/kernel/cpu/intel_epb.c
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -79,7 +79,7 @@ static int intel_epb_save(void)
{
u64 epb;
- rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
+ rdmsrq(MSR_IA32_ENERGY_PERF_BIAS, epb);
/*
* Ensure that saved_epb will always be nonzero after this write even if
* the EPB value read from the MSR is 0.
@@ -94,7 +94,7 @@ static void intel_epb_restore(void)
u64 val = this_cpu_read(saved_epb);
u64 epb;
- rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
+ rdmsrq(MSR_IA32_ENERGY_PERF_BIAS, epb);
if (val) {
val &= EPB_MASK;
} else {
@@ -111,7 +111,7 @@ static void intel_epb_restore(void)
pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
}
}
- wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
+ wrmsrq(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
}
static struct syscore_ops intel_epb_syscore_ops = {
@@ -135,7 +135,7 @@ static ssize_t energy_perf_bias_show(struct device *dev,
u64 epb;
int ret;
- ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ ret = rdmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
if (ret < 0)
return ret;
@@ -157,11 +157,11 @@ static ssize_t energy_perf_bias_store(struct device *dev,
else if (kstrtou64(buf, 0, &val) || val > MAX_EPB)
return -EINVAL;
- ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ ret = rdmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
if (ret < 0)
return ret;
- ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS,
+ ret = wrmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS,
(epb & ~EPB_MASK) | val);
if (ret < 0)
return ret;
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 1075a90141da..9d852c3b2cb5 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -662,12 +662,12 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
return;
}
- rdmsrl(MSR_K7_HWCR, hwcr);
+ rdmsrq(MSR_K7_HWCR, hwcr);
/* McStatusWrEn has to be set */
need_toggle = !(hwcr & BIT(18));
if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
+ wrmsrq(MSR_K7_HWCR, hwcr | BIT(18));
/* Clear CntP bit safely */
for (i = 0; i < num_msrs; i++)
@@ -675,7 +675,7 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
/* restore old settings */
if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr);
+ wrmsrq(MSR_K7_HWCR, hwcr);
}
/* cpu init entry point, called from mce.c with preempt off */
@@ -805,12 +805,12 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
}
if (mce_flags.smca) {
- rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
+ rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
if (m->status & MCI_STATUS_SYNDV) {
- rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
- rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
- rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
+ rdmsrq(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
+ rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
+ rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
}
}
@@ -834,16 +834,16 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
{
u64 status, addr = 0;
- rdmsrl(msr_stat, status);
+ rdmsrq(msr_stat, status);
if (!(status & MCI_STATUS_VAL))
return false;
if (status & MCI_STATUS_ADDRV)
- rdmsrl(msr_addr, addr);
+ rdmsrq(msr_addr, addr);
__log_error(bank, status, addr, misc);
- wrmsrl(msr_stat, 0);
+ wrmsrq(msr_stat, 0);
return status & MCI_STATUS_DEFERRED;
}
@@ -862,7 +862,7 @@ static bool _log_error_deferred(unsigned int bank, u32 misc)
return true;
/* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
+ wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
return true;
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index f6fd71b64b66..e9b3c5d4a52e 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -121,7 +121,7 @@ void mce_prep_record_common(struct mce *m)
{
m->cpuid = cpuid_eax(1);
m->cpuvendor = boot_cpu_data.x86_vendor;
- m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
+ m->mcgcap = native_rdmsrq(MSR_IA32_MCG_CAP);
/* need the internal __ version to avoid deadlocks */
m->time = __ktime_get_real_seconds();
}
@@ -388,9 +388,9 @@ void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr)
}
/* MSR access wrappers used for error injection */
-noinstr u64 mce_rdmsrl(u32 msr)
+noinstr u64 mce_rdmsrq(u32 msr)
{
- DECLARE_ARGS(val, low, high);
+ EAX_EDX_DECLARE_ARGS(val, low, high);
if (__this_cpu_read(injectm.finished)) {
int offset;
@@ -423,7 +423,7 @@ noinstr u64 mce_rdmsrl(u32 msr)
return EAX_EDX_VAL(val, low, high);
}
-static noinstr void mce_wrmsrl(u32 msr, u64 v)
+static noinstr void mce_wrmsrq(u32 msr, u64 v)
{
u32 low, high;
@@ -444,7 +444,7 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
low = (u32)v;
high = (u32)(v >> 32);
- /* See comment in mce_rdmsrl() */
+ /* See comment in mce_rdmsrq() */
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR_IN_MCE)
@@ -468,7 +468,7 @@ static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs
instrumentation_end();
m = &err->m;
- m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ m->mcgstatus = mce_rdmsrq(MSR_IA32_MCG_STATUS);
if (regs) {
/*
* Get the address of the instruction at the time of
@@ -488,7 +488,7 @@ static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs
}
/* Use accurate RIP reporting if available. */
if (mca_cfg.rip_msr)
- m->ip = mce_rdmsrl(mca_cfg.rip_msr);
+ m->ip = mce_rdmsrq(mca_cfg.rip_msr);
}
}
@@ -684,10 +684,10 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
struct mce *m = &err->m;
if (m->status & MCI_STATUS_MISCV)
- m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));
+ m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC));
if (m->status & MCI_STATUS_ADDRV) {
- m->addr = mce_rdmsrl(mca_msr_reg(i, MCA_ADDR));
+ m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
/*
* Mask the reported address by the reported granularity.
@@ -702,12 +702,12 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
}
if (mce_flags.smca) {
- m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
+ m->ipid = mce_rdmsrq(MSR_AMD64_SMCA_MCx_IPID(i));
if (m->status & MCI_STATUS_SYNDV) {
- m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
- err->vendor.amd.synd1 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(i));
- err->vendor.amd.synd2 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(i));
+ m->synd = mce_rdmsrq(MSR_AMD64_SMCA_MCx_SYND(i));
+ err->vendor.amd.synd1 = mce_rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(i));
+ err->vendor.amd.synd2 = mce_rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(i));
}
}
}
@@ -753,7 +753,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m->bank = i;
barrier();
- m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ m->status = mce_rdmsrq(mca_msr_reg(i, MCA_STATUS));
/*
* Update storm tracking here, before checking for the
@@ -829,7 +829,7 @@ clear_it:
/*
* Clear state for this bank.
*/
- mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
+ mce_wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
}
/*
@@ -887,8 +887,8 @@ quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
*/
static noinstr bool quirk_skylake_repmov(void)
{
- u64 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
- u64 misc_enable = mce_rdmsrl(MSR_IA32_MISC_ENABLE);
+ u64 mcgstatus = mce_rdmsrq(MSR_IA32_MCG_STATUS);
+ u64 misc_enable = mce_rdmsrq(MSR_IA32_MISC_ENABLE);
u64 mc1_status;
/*
@@ -899,7 +899,7 @@ static noinstr bool quirk_skylake_repmov(void)
!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING))
return false;
- mc1_status = mce_rdmsrl(MSR_IA32_MCx_STATUS(1));
+ mc1_status = mce_rdmsrq(MSR_IA32_MCx_STATUS(1));
/* Check for a software-recoverable data fetch error. */
if ((mc1_status &
@@ -910,8 +910,8 @@ static noinstr bool quirk_skylake_repmov(void)
MCI_STATUS_ADDRV | MCI_STATUS_MISCV |
MCI_STATUS_AR | MCI_STATUS_S)) {
misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
- mce_wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
- mce_wrmsrl(MSR_IA32_MCx_STATUS(1), 0);
+ mce_wrmsrq(MSR_IA32_MISC_ENABLE, misc_enable);
+ mce_wrmsrq(MSR_IA32_MCx_STATUS(1), 0);
instrumentation_begin();
pr_err_once("Erratum detected, disable fast string copy instructions.\n");
@@ -955,7 +955,7 @@ static __always_inline int mce_no_way_out(struct mce_hw_err *err, char **msg, un
int i;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
- m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ m->status = mce_rdmsrq(mca_msr_reg(i, MCA_STATUS));
if (!(m->status & MCI_STATUS_VAL))
continue;
@@ -1274,7 +1274,7 @@ static __always_inline void mce_clear_state(unsigned long *toclear)
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (arch_test_bit(i, toclear))
- mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
+ mce_wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
}
}
@@ -1298,7 +1298,7 @@ static noinstr bool mce_check_crashing_cpu(void)
(crashing_cpu != -1 && crashing_cpu != cpu)) {
u64 mcgstatus;
- mcgstatus = __rdmsr(MSR_IA32_MCG_STATUS);
+ mcgstatus = native_rdmsrq(MSR_IA32_MCG_STATUS);
if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
if (mcgstatus & MCG_STATUS_LMCES)
@@ -1306,7 +1306,7 @@ static noinstr bool mce_check_crashing_cpu(void)
}
if (mcgstatus & MCG_STATUS_RIPV) {
- __wrmsr(MSR_IA32_MCG_STATUS, 0, 0);
+ native_wrmsrq(MSR_IA32_MCG_STATUS, 0);
return true;
}
}
@@ -1335,7 +1335,7 @@ __mc_scan_banks(struct mce_hw_err *err, struct pt_regs *regs,
m->addr = 0;
m->bank = i;
- m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ m->status = mce_rdmsrq(mca_msr_reg(i, MCA_STATUS));
if (!(m->status & MCI_STATUS_VAL))
continue;
@@ -1693,7 +1693,7 @@ out:
instrumentation_end();
clear:
- mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ mce_wrmsrq(MSR_IA32_MCG_STATUS, 0);
}
EXPORT_SYMBOL_GPL(do_machine_check);
@@ -1822,7 +1822,7 @@ static void __mcheck_cpu_cap_init(void)
u64 cap;
u8 b;
- rdmsrl(MSR_IA32_MCG_CAP, cap);
+ rdmsrq(MSR_IA32_MCG_CAP, cap);
b = cap & MCG_BANKCNT_MASK;
@@ -1863,7 +1863,7 @@ static void __mcheck_cpu_init_generic(void)
cr4_set_bits(X86_CR4_MCE);
- rdmsrl(MSR_IA32_MCG_CAP, cap);
+ rdmsrq(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
}
@@ -1878,8 +1878,8 @@ static void __mcheck_cpu_init_clear_banks(void)
if (!b->init)
continue;
- wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
- wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
+ wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl);
+ wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
}
}
@@ -1905,7 +1905,7 @@ static void __mcheck_cpu_check_banks(void)
if (!b->init)
continue;
- rdmsrl(mca_msr_reg(i, MCA_CTL), msrval);
+ rdmsrq(mca_msr_reg(i, MCA_CTL), msrval);
b->init = !!msrval;
}
}
@@ -2436,7 +2436,7 @@ static void mce_disable_error_reporting(void)
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(mca_msr_reg(i, MCA_CTL), 0);
+ wrmsrq(mca_msr_reg(i, MCA_CTL), 0);
}
return;
}
@@ -2786,7 +2786,7 @@ static void mce_reenable_cpu(void)
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
+ wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl);
}
}
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 06e3cf7229ce..d02c4f556cd0 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -24,10 +24,11 @@
#include <linux/pci.h>
#include <linux/uaccess.h>
-#include <asm/amd_nb.h>
+#include <asm/amd/nb.h>
#include <asm/apic.h>
#include <asm/irq_vectors.h>
#include <asm/mce.h>
+#include <asm/msr.h>
#include <asm/nmi.h>
#include <asm/smp.h>
@@ -475,27 +476,27 @@ static void prepare_msrs(void *info)
struct mce m = *(struct mce *)info;
u8 b = m.bank;
- wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+ wrmsrq(MSR_IA32_MCG_STATUS, m.mcgstatus);
if (boot_cpu_has(X86_FEATURE_SMCA)) {
if (m.inject_flags == DFR_INT_INJ) {
- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
+ wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
+ wrmsrq(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
} else {
- wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
+ wrmsrq(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
+ wrmsrq(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
}
- wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
+ wrmsrq(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
if (m.misc)
- wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
+ wrmsrq(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
} else {
- wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
- wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
+ wrmsrq(MSR_IA32_MCx_STATUS(b), m.status);
+ wrmsrq(MSR_IA32_MCx_ADDR(b), m.addr);
if (m.misc)
- wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
+ wrmsrq(MSR_IA32_MCx_MISC(b), m.misc);
}
}
@@ -589,7 +590,7 @@ static int inj_bank_set(void *data, u64 val)
u64 cap;
/* Get bank count on target CPU so we can handle non-uniform values. */
- rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
+ rdmsrq_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
n_banks = cap & MCG_BANKCNT_MASK;
if (val >= n_banks) {
@@ -613,7 +614,7 @@ static int inj_bank_set(void *data, u64 val)
if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
u64 ipid;
- if (rdmsrl_on_cpu(m->extcpu, MSR_AMD64_SMCA_MCx_IPID(val), &ipid)) {
+ if (rdmsrq_on_cpu(m->extcpu, MSR_AMD64_SMCA_MCx_IPID(val), &ipid)) {
pr_err("Error reading IPID on CPU%d\n", m->extcpu);
return -EINVAL;
}
@@ -741,15 +742,15 @@ static void check_hw_inj_possible(void)
u64 status = MCI_STATUS_VAL, ipid;
/* Check whether bank is populated */
- rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), ipid);
+ rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), ipid);
if (!ipid)
continue;
toggle_hw_mce_inject(cpu, true);
- wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), status);
- rdmsrl_safe(mca_msr_reg(bank, MCA_STATUS), &status);
- wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), 0);
+ wrmsrq_safe(mca_msr_reg(bank, MCA_STATUS), status);
+ rdmsrq_safe(mca_msr_reg(bank, MCA_STATUS), &status);
+ wrmsrq_safe(mca_msr_reg(bank, MCA_STATUS), 0);
if (!status) {
hw_injection_possible = false;
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index f863df0ff42c..efcf21e9552e 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -94,7 +94,7 @@ static bool cmci_supported(int *banks)
if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
return false;
- rdmsrl(MSR_IA32_MCG_CAP, cap);
+ rdmsrq(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK);
return !!(cap & MCG_CMCI_P);
}
@@ -106,7 +106,7 @@ static bool lmce_supported(void)
if (mca_cfg.lmce_disabled)
return false;
- rdmsrl(MSR_IA32_MCG_CAP, tmp);
+ rdmsrq(MSR_IA32_MCG_CAP, tmp);
/*
* LMCE depends on recovery support in the processor. Hence both
@@ -123,7 +123,7 @@ static bool lmce_supported(void)
* WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
* locks the MSR in the event that it wasn't already locked by BIOS.
*/
- rdmsrl(MSR_IA32_FEAT_CTL, tmp);
+ rdmsrq(MSR_IA32_FEAT_CTL, tmp);
if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
return false;
@@ -141,9 +141,9 @@ static void cmci_set_threshold(int bank, int thresh)
u64 val;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
- rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
- wrmsrl(MSR_IA32_MCx_CTL2(bank), val | thresh);
+ wrmsrq(MSR_IA32_MCx_CTL2(bank), val | thresh);
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
@@ -184,7 +184,7 @@ static bool cmci_skip_bank(int bank, u64 *val)
if (test_bit(bank, mce_banks_ce_disabled))
return true;
- rdmsrl(MSR_IA32_MCx_CTL2(bank), *val);
+ rdmsrq(MSR_IA32_MCx_CTL2(bank), *val);
/* Already owned by someone else? */
if (*val & MCI_CTL2_CMCI_EN) {
@@ -232,8 +232,8 @@ static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_w
struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
val |= MCI_CTL2_CMCI_EN;
- wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
- rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ wrmsrq(MSR_IA32_MCx_CTL2(bank), val);
+ rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
/* If the enable bit did not stick, this bank should be polled. */
if (!(val & MCI_CTL2_CMCI_EN)) {
@@ -324,9 +324,9 @@ static void __cmci_disable_bank(int bank)
if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
return;
- rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
- wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ wrmsrq(MSR_IA32_MCx_CTL2(bank), val);
__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
@@ -430,10 +430,10 @@ void intel_init_lmce(void)
if (!lmce_supported())
return;
- rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+ rdmsrq(MSR_IA32_MCG_EXT_CTL, val);
if (!(val & MCG_EXT_CTL_LMCE_EN))
- wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
+ wrmsrq(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
}
void intel_clear_lmce(void)
@@ -443,9 +443,9 @@ void intel_clear_lmce(void)
if (!lmce_supported())
return;
- rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+ rdmsrq(MSR_IA32_MCG_EXT_CTL, val);
val &= ~MCG_EXT_CTL_LMCE_EN;
- wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
+ wrmsrq(MSR_IA32_MCG_EXT_CTL, val);
}
/*
@@ -460,10 +460,10 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
case INTEL_SANDYBRIDGE_X:
case INTEL_IVYBRIDGE_X:
case INTEL_HASWELL_X:
- if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control))
+ if (rdmsrq_safe(MSR_ERROR_CONTROL, &error_control))
return;
error_control |= 2;
- wrmsrl_safe(MSR_ERROR_CONTROL, error_control);
+ wrmsrq_safe(MSR_ERROR_CONTROL, error_control);
break;
}
}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 95a504ece43e..b5ba598e54cb 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -312,7 +312,7 @@ static __always_inline void pentium_machine_check(struct pt_regs *regs) {}
static __always_inline void winchip_machine_check(struct pt_regs *regs) {}
#endif
-noinstr u64 mce_rdmsrl(u32 msr);
+noinstr u64 mce_rdmsrq(u32 msr);
static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
{
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 96cb992d50ef..097e39327942 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -217,7 +217,6 @@ static bool verify_sha256_digest(u32 patch_id, u32 cur_rev, const u8 *data, unsi
{
struct patch_digest *pd = NULL;
u8 digest[SHA256_DIGEST_SIZE];
- struct sha256_state s;
int i;
if (x86_family(bsp_cpuid_1_eax) < 0x17)
@@ -235,9 +234,7 @@ static bool verify_sha256_digest(u32 patch_id, u32 cur_rev, const u8 *data, unsi
return false;
}
- sha256_init(&s);
- sha256_update(&s, data, len);
- sha256_final(&s, digest);
+ sha256(data, len, digest);
if (memcmp(digest, pd->sha256, sizeof(digest))) {
pr_err("Patch 0x%x SHA256 digest mismatch!\n", patch_id);
@@ -607,7 +604,7 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, u32 *cur_rev,
if (!verify_sha256_digest(mc->hdr.patch_id, *cur_rev, (const u8 *)p_addr, psize))
return false;
- native_wrmsrl(MSR_AMD64_PATCH_LOADER, p_addr);
+ native_wrmsrq(MSR_AMD64_PATCH_LOADER, p_addr);
if (x86_family(bsp_cpuid_1_eax) == 0x17) {
unsigned long p_addr_end = p_addr + psize - 1;
@@ -1178,11 +1175,18 @@ static void microcode_fini_cpu_amd(int cpu)
uci->mc = NULL;
}
+static void finalize_late_load_amd(int result)
+{
+ if (result)
+ cleanup();
+}
+
static struct microcode_ops microcode_amd_ops = {
.request_microcode_fw = request_microcode_amd,
.collect_cpu_info = collect_cpu_info_amd,
.apply_microcode = apply_microcode_amd,
.microcode_fini_cpu = microcode_fini_cpu_amd,
+ .finalize_late_load = finalize_late_load_amd,
.nmi_safe = true,
};
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 079f046ee26d..fe50eb5b7c4a 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -37,6 +37,7 @@
#include <asm/perf_event.h>
#include <asm/processor.h>
#include <asm/cmdline.h>
+#include <asm/msr.h>
#include <asm/setup.h>
#include "internal.h"
@@ -117,7 +118,7 @@ bool __init microcode_loader_disabled(void)
* 3) Certain AMD patch levels are not allowed to be
* overwritten.
*/
- if (!have_cpuid_p() ||
+ if (!cpuid_feature() ||
native_cpuid_ecx(1) & BIT(31) ||
amd_check_current_patch_level())
dis_ucode_ldr = true;
@@ -696,6 +697,8 @@ static int load_late_locked(void)
return load_late_stop_cpus(true);
case UCODE_NFOUND:
return -ENOENT;
+ case UCODE_OK:
+ return 0;
default:
return -EBADFD;
}
diff --git a/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h b/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h
new file mode 100644
index 000000000000..cb6e601701ab
--- /dev/null
+++ b/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h
@@ -0,0 +1,150 @@
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x03, .steppings = 0x0004, .driver_data = 0x2 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0001, .driver_data = 0x45 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0002, .driver_data = 0x40 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0004, .driver_data = 0x2c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0008, .driver_data = 0x10 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0001, .driver_data = 0xa },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0020, .driver_data = 0x3 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0400, .driver_data = 0xd },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x2000, .driver_data = 0x7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0002, .driver_data = 0x14 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0004, .driver_data = 0x38 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0008, .driver_data = 0x2e },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0002, .driver_data = 0x11 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0008, .driver_data = 0x8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0040, .driver_data = 0xc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0400, .driver_data = 0x5 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x09, .steppings = 0x0020, .driver_data = 0x47 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0a, .steppings = 0x0001, .driver_data = 0x3 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0a, .steppings = 0x0002, .driver_data = 0x1 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0b, .steppings = 0x0002, .driver_data = 0x1d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0b, .steppings = 0x0010, .driver_data = 0x2 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0d, .steppings = 0x0040, .driver_data = 0x18 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0e, .steppings = 0x0100, .driver_data = 0x39 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0e, .steppings = 0x1000, .driver_data = 0x59 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0004, .driver_data = 0x5d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0040, .driver_data = 0xd2 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0080, .driver_data = 0x6b },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0400, .driver_data = 0x95 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0800, .driver_data = 0xbc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x2000, .driver_data = 0xa4 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x16, .steppings = 0x0002, .driver_data = 0x44 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0040, .driver_data = 0x60f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0080, .driver_data = 0x70a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0400, .driver_data = 0xa0b },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1a, .steppings = 0x0010, .driver_data = 0x12 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1a, .steppings = 0x0020, .driver_data = 0x1d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0004, .driver_data = 0x219 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0400, .driver_data = 0x107 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1d, .steppings = 0x0002, .driver_data = 0x29 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1e, .steppings = 0x0020, .driver_data = 0xa },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x25, .steppings = 0x0004, .driver_data = 0x11 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x25, .steppings = 0x0020, .driver_data = 0x7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x26, .steppings = 0x0002, .driver_data = 0x105 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2a, .steppings = 0x0080, .driver_data = 0x2f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2c, .steppings = 0x0004, .driver_data = 0x1f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2d, .steppings = 0x0040, .driver_data = 0x621 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2d, .steppings = 0x0080, .driver_data = 0x71a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2e, .steppings = 0x0040, .driver_data = 0xd },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2f, .steppings = 0x0004, .driver_data = 0x3b },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x37, .steppings = 0x0100, .driver_data = 0x838 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x37, .steppings = 0x0200, .driver_data = 0x90d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3a, .steppings = 0x0200, .driver_data = 0x21 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3c, .steppings = 0x0008, .driver_data = 0x28 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3d, .steppings = 0x0010, .driver_data = 0x2f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0010, .driver_data = 0x42e },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0040, .driver_data = 0x600 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0080, .driver_data = 0x715 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3f, .steppings = 0x0004, .driver_data = 0x49 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3f, .steppings = 0x0010, .driver_data = 0x1a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x45, .steppings = 0x0002, .driver_data = 0x26 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x46, .steppings = 0x0002, .driver_data = 0x1c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x47, .steppings = 0x0002, .driver_data = 0x22 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4c, .steppings = 0x0008, .driver_data = 0x368 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4c, .steppings = 0x0010, .driver_data = 0x411 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4d, .steppings = 0x0100, .driver_data = 0x12d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4e, .steppings = 0x0008, .driver_data = 0xf0 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0008, .driver_data = 0x1000191 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0010, .driver_data = 0x2007006 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0020, .driver_data = 0x3000010 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0040, .driver_data = 0x4003605 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0080, .driver_data = 0x5003707 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0800, .driver_data = 0x7002904 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0004, .driver_data = 0x1c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0008, .driver_data = 0x700001c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0010, .driver_data = 0xf00001a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0020, .driver_data = 0xe000015 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0004, .driver_data = 0x14 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0200, .driver_data = 0x48 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0400, .driver_data = 0x28 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5e, .steppings = 0x0008, .driver_data = 0xf0 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5f, .steppings = 0x0002, .driver_data = 0x3e },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x66, .steppings = 0x0008, .driver_data = 0x2a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6a, .steppings = 0x0020, .driver_data = 0xc0002f0 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6a, .steppings = 0x0040, .driver_data = 0xd0003e7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6c, .steppings = 0x0002, .driver_data = 0x10002b0 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7a, .steppings = 0x0002, .driver_data = 0x42 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7a, .steppings = 0x0100, .driver_data = 0x24 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7e, .steppings = 0x0020, .driver_data = 0xc6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8a, .steppings = 0x0002, .driver_data = 0x33 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0002, .driver_data = 0xb8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0004, .driver_data = 0x38 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8d, .steppings = 0x0002, .driver_data = 0x52 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0200, .driver_data = 0xf6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0400, .driver_data = 0xf6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0800, .driver_data = 0xf6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x1000, .driver_data = 0xfc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0100, .driver_data = 0x2c000390 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0080, .driver_data = 0x2b000603 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0040, .driver_data = 0x2c000390 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0020, .driver_data = 0x2c000390 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0010, .driver_data = 0x2c000390 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x96, .steppings = 0x0002, .driver_data = 0x1a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0004, .driver_data = 0x37 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0020, .driver_data = 0x37 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0004, .driver_data = 0x37 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0020, .driver_data = 0x37 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0008, .driver_data = 0x435 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0010, .driver_data = 0x435 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9c, .steppings = 0x0001, .driver_data = 0x24000026 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0200, .driver_data = 0xf8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0400, .driver_data = 0xf8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0800, .driver_data = 0xf6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x1000, .driver_data = 0xf8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x2000, .driver_data = 0x100 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0004, .driver_data = 0xfc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0008, .driver_data = 0xfc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0020, .driver_data = 0xfc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0001, .driver_data = 0xfe },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0002, .driver_data = 0xfc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa7, .steppings = 0x0002, .driver_data = 0x62 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xaa, .steppings = 0x0010, .driver_data = 0x20 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb7, .steppings = 0x0002, .driver_data = 0x12b },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0004, .driver_data = 0x4123 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0008, .driver_data = 0x4123 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0100, .driver_data = 0x4123 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbe, .steppings = 0x0001, .driver_data = 0x1a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0004, .driver_data = 0x21000283 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0002, .driver_data = 0x21000283 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0080, .driver_data = 0x12 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0400, .driver_data = 0x15 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x01, .steppings = 0x0004, .driver_data = 0x2e },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0010, .driver_data = 0x21 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0020, .driver_data = 0x2c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0040, .driver_data = 0x10 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0080, .driver_data = 0x39 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0200, .driver_data = 0x2f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0004, .driver_data = 0xa },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0008, .driver_data = 0xc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0010, .driver_data = 0x17 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0002, .driver_data = 0x17 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0008, .driver_data = 0x5 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0010, .driver_data = 0x6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0080, .driver_data = 0x3 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0100, .driver_data = 0xe },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0200, .driver_data = 0x3 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0400, .driver_data = 0x4 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0004, .driver_data = 0xf },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0010, .driver_data = 0x4 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0020, .driver_data = 0x8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0100, .driver_data = 0x9 },
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 2a397da43923..371ca6eac00e 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -320,7 +320,7 @@ static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
}
/* write microcode via MSR 0x79 */
- native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+ native_wrmsrq(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
rev = intel_get_microcode_revision();
if (rev != mc->hdr.rev)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 3e2533954675..c78f860419d6 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -30,6 +30,7 @@
#include <asm/reboot.h>
#include <asm/nmi.h>
#include <clocksource/hyperv_timer.h>
+#include <asm/msr.h>
#include <asm/numa.h>
#include <asm/svm.h>
@@ -70,7 +71,7 @@ u64 hv_get_non_nested_msr(unsigned int reg)
if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present)
hv_ivm_msr_read(reg, &value);
else
- rdmsrl(reg, value);
+ rdmsrq(reg, value);
return value;
}
EXPORT_SYMBOL_GPL(hv_get_non_nested_msr);
@@ -82,9 +83,9 @@ void hv_set_non_nested_msr(unsigned int reg, u64 value)
/* Write proxy bit via wrmsl instruction */
if (hv_is_sint_msr(reg))
- wrmsrl(reg, value | 1 << 20);
+ wrmsrq(reg, value | 1 << 20);
} else {
- wrmsrl(reg, value);
+ wrmsrq(reg, value);
}
}
EXPORT_SYMBOL_GPL(hv_set_non_nested_msr);
@@ -345,7 +346,7 @@ static unsigned long hv_get_tsc_khz(void)
{
unsigned long freq;
- rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
+ rdmsrq(HV_X64_MSR_TSC_FREQUENCY, freq);
return freq / 1000;
}
@@ -541,7 +542,7 @@ static void __init ms_hyperv_init_platform(void)
*/
u64 hv_lapic_frequency;
- rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
+ rdmsrq(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
lapic_timer_period = hv_lapic_frequency;
pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
@@ -574,7 +575,7 @@ static void __init ms_hyperv_init_platform(void)
* setting of this MSR bit should happen before init_intel()
* is called.
*/
- wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, HV_EXPOSE_INVARIANT_TSC);
+ wrmsrq(HV_X64_MSR_TSC_INVARIANT_CONTROL, HV_EXPOSE_INVARIANT_TSC);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index e2c6b471d230..8c18327eb10b 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -593,7 +593,7 @@ static void get_fixed_ranges(mtrr_type *frs)
void mtrr_save_fixed_ranges(void *info)
{
- if (boot_cpu_has(X86_FEATURE_MTRR))
+ if (mtrr_state.have_fixed)
get_fixed_ranges(mtrr_state.fixed_ranges);
}
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index 0c13b0befd8a..d8a04b195da2 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -2,4 +2,6 @@
obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o
obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o
obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o
+
+# To allow define_trace.h's recursive include:
CFLAGS_pseudo_lock.o = -I$(src)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index cf29681d01e0..7109cbfcad4f 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -22,6 +22,7 @@
#include <linux/cpuhotplug.h>
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include <asm/resctrl.h>
#include "internal.h"
@@ -60,7 +61,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
[RDT_RESOURCE_L3] =
{
.r_resctrl = {
- .rid = RDT_RESOURCE_L3,
.name = "L3",
.ctrl_scope = RESCTRL_L3_CACHE,
.mon_scope = RESCTRL_L3_CACHE,
@@ -74,7 +74,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
[RDT_RESOURCE_L2] =
{
.r_resctrl = {
- .rid = RDT_RESOURCE_L2,
.name = "L2",
.ctrl_scope = RESCTRL_L2_CACHE,
.ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2),
@@ -86,7 +85,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
[RDT_RESOURCE_MBA] =
{
.r_resctrl = {
- .rid = RDT_RESOURCE_MBA,
.name = "MB",
.ctrl_scope = RESCTRL_L3_CACHE,
.ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA),
@@ -96,7 +94,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
[RDT_RESOURCE_SMBA] =
{
.r_resctrl = {
- .rid = RDT_RESOURCE_SMBA,
.name = "SMBA",
.ctrl_scope = RESCTRL_L3_CACHE,
.ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA),
@@ -145,10 +142,10 @@ static inline void cache_alloc_hsw_probe(void)
struct rdt_resource *r = &hw_res->r_resctrl;
u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0;
- if (wrmsrl_safe(MSR_IA32_L3_CBM_BASE, max_cbm))
+ if (wrmsrq_safe(MSR_IA32_L3_CBM_BASE, max_cbm))
return;
- rdmsrl(MSR_IA32_L3_CBM_BASE, l3_cbm_0);
+ rdmsrq(MSR_IA32_L3_CBM_BASE, l3_cbm_0);
/* If all the bits were set in MSR, return success */
if (l3_cbm_0 != max_cbm)
@@ -164,21 +161,6 @@ static inline void cache_alloc_hsw_probe(void)
rdt_alloc_capable = true;
}
-bool is_mba_sc(struct rdt_resource *r)
-{
- if (!r)
- r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
-
- /*
- * The software controller support is only applicable to MBA resource.
- * Make sure to check for resource type.
- */
- if (r->rid != RDT_RESOURCE_MBA)
- return false;
-
- return r->membw.mba_sc;
-}
-
/*
* rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
* exposed to user interface and the h/w understandable delay values.
@@ -309,7 +291,7 @@ static void mba_wrmsr_amd(struct msr_param *m)
unsigned int i;
for (i = m->low; i < m->high; i++)
- wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
+ wrmsrq(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}
/*
@@ -334,7 +316,7 @@ static void mba_wrmsr_intel(struct msr_param *m)
/* Write the delay values for mba. */
for (i = m->low; i < m->high; i++)
- wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res));
+ wrmsrq(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res));
}
static void cat_wrmsr(struct msr_param *m)
@@ -344,7 +326,7 @@ static void cat_wrmsr(struct msr_param *m)
unsigned int i;
for (i = m->low; i < m->high; i++)
- wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
+ wrmsrq(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}
u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
@@ -737,7 +719,7 @@ struct rdt_options {
bool force_off, force_on;
};
-static struct rdt_options rdt_options[] __initdata = {
+static struct rdt_options rdt_options[] __ro_after_init = {
RDT_OPT(RDT_FLAG_CMT, "cmt", X86_FEATURE_CQM_OCCUP_LLC),
RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL),
RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL),
@@ -777,7 +759,7 @@ static int __init set_rdt_options(char *str)
}
__setup("rdt", set_rdt_options);
-bool __init rdt_cpu_has(int flag)
+bool rdt_cpu_has(int flag)
{
bool ret = boot_cpu_has(flag);
struct rdt_options *o;
@@ -797,7 +779,7 @@ bool __init rdt_cpu_has(int flag)
return ret;
}
-__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
+bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
{
if (!rdt_cpu_has(X86_FEATURE_BMEC))
return false;
@@ -1011,7 +993,11 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c)
static int __init resctrl_arch_late_init(void)
{
struct rdt_resource *r;
- int state, ret;
+ int state, ret, i;
+
+ /* for_each_rdt_resource() requires all rid to be initialised. */
+ for (i = 0; i < RDT_NUM_RESOURCES; i++)
+ rdt_resources_all[i].r_resctrl.rid = i;
/*
* Initialize functions(or definitions) that are different
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 0a0ac5f6112e..1189c0df4ad7 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -16,277 +16,9 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cpu.h>
-#include <linux/kernfs.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/tick.h>
#include "internal.h"
-struct rdt_parse_data {
- struct rdtgroup *rdtgrp;
- char *buf;
-};
-
-typedef int (ctrlval_parser_t)(struct rdt_parse_data *data,
- struct resctrl_schema *s,
- struct rdt_ctrl_domain *d);
-
-/*
- * Check whether MBA bandwidth percentage value is correct. The value is
- * checked against the minimum and max bandwidth values specified by the
- * hardware. The allocated bandwidth percentage is rounded to the next
- * control step available on the hardware.
- */
-static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
-{
- int ret;
- u32 bw;
-
- /*
- * Only linear delay values is supported for current Intel SKUs.
- */
- if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
- rdt_last_cmd_puts("No support for non-linear MB domains\n");
- return false;
- }
-
- ret = kstrtou32(buf, 10, &bw);
- if (ret) {
- rdt_last_cmd_printf("Invalid MB value %s\n", buf);
- return false;
- }
-
- /* Nothing else to do if software controller is enabled. */
- if (is_mba_sc(r)) {
- *data = bw;
- return true;
- }
-
- if (bw < r->membw.min_bw || bw > r->membw.max_bw) {
- rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n",
- bw, r->membw.min_bw, r->membw.max_bw);
- return false;
- }
-
- *data = roundup(bw, (unsigned long)r->membw.bw_gran);
- return true;
-}
-
-static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_ctrl_domain *d)
-{
- struct resctrl_staged_config *cfg;
- u32 closid = data->rdtgrp->closid;
- struct rdt_resource *r = s->res;
- u32 bw_val;
-
- cfg = &d->staged_config[s->conf_type];
- if (cfg->have_new_ctrl) {
- rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
- return -EINVAL;
- }
-
- if (!bw_validate(data->buf, &bw_val, r))
- return -EINVAL;
-
- if (is_mba_sc(r)) {
- d->mbps_val[closid] = bw_val;
- return 0;
- }
-
- cfg->new_ctrl = bw_val;
- cfg->have_new_ctrl = true;
-
- return 0;
-}
-
-/*
- * Check whether a cache bit mask is valid.
- * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID:
- * - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1
- * - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1
- *
- * Haswell does not support a non-contiguous 1s value and additionally
- * requires at least two bits set.
- * AMD allows non-contiguous bitmasks.
- */
-static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
-{
- u32 supported_bits = BIT_MASK(r->cache.cbm_len) - 1;
- unsigned int cbm_len = r->cache.cbm_len;
- unsigned long first_bit, zero_bit, val;
- int ret;
-
- ret = kstrtoul(buf, 16, &val);
- if (ret) {
- rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
- return false;
- }
-
- if ((r->cache.min_cbm_bits > 0 && val == 0) || val > supported_bits) {
- rdt_last_cmd_puts("Mask out of range\n");
- return false;
- }
-
- first_bit = find_first_bit(&val, cbm_len);
- zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
-
- /* Are non-contiguous bitmasks allowed? */
- if (!r->cache.arch_has_sparse_bitmasks &&
- (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
- rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
- return false;
- }
-
- if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("Need at least %d bits in the mask\n",
- r->cache.min_cbm_bits);
- return false;
- }
-
- *data = val;
- return true;
-}
-
-/*
- * Read one cache bit mask (hex). Check that it is valid for the current
- * resource type.
- */
-static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_ctrl_domain *d)
-{
- struct rdtgroup *rdtgrp = data->rdtgrp;
- struct resctrl_staged_config *cfg;
- struct rdt_resource *r = s->res;
- u32 cbm_val;
-
- cfg = &d->staged_config[s->conf_type];
- if (cfg->have_new_ctrl) {
- rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
- return -EINVAL;
- }
-
- /*
- * Cannot set up more than one pseudo-locked region in a cache
- * hierarchy.
- */
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
- rdtgroup_pseudo_locked_in_hierarchy(d)) {
- rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n");
- return -EINVAL;
- }
-
- if (!cbm_validate(data->buf, &cbm_val, r))
- return -EINVAL;
-
- if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
- rdtgrp->mode == RDT_MODE_SHAREABLE) &&
- rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
- rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n");
- return -EINVAL;
- }
-
- /*
- * The CBM may not overlap with the CBM of another closid if
- * either is exclusive.
- */
- if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) {
- rdt_last_cmd_puts("Overlaps with exclusive group\n");
- return -EINVAL;
- }
-
- if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) {
- if (rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
- rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- rdt_last_cmd_puts("Overlaps with other group\n");
- return -EINVAL;
- }
- }
-
- cfg->new_ctrl = cbm_val;
- cfg->have_new_ctrl = true;
-
- return 0;
-}
-
-/*
- * For each domain in this resource we expect to find a series of:
- * id=mask
- * separated by ";". The "id" is in decimal, and must match one of
- * the "id"s for this resource.
- */
-static int parse_line(char *line, struct resctrl_schema *s,
- struct rdtgroup *rdtgrp)
-{
- enum resctrl_conf_type t = s->conf_type;
- ctrlval_parser_t *parse_ctrlval = NULL;
- struct resctrl_staged_config *cfg;
- struct rdt_resource *r = s->res;
- struct rdt_parse_data data;
- struct rdt_ctrl_domain *d;
- char *dom = NULL, *id;
- unsigned long dom_id;
-
- /* Walking r->domains, ensure it can't race with cpuhp */
- lockdep_assert_cpus_held();
-
- switch (r->schema_fmt) {
- case RESCTRL_SCHEMA_BITMAP:
- parse_ctrlval = &parse_cbm;
- break;
- case RESCTRL_SCHEMA_RANGE:
- parse_ctrlval = &parse_bw;
- break;
- }
-
- if (WARN_ON_ONCE(!parse_ctrlval))
- return -EINVAL;
-
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
- (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) {
- rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
- return -EINVAL;
- }
-
-next:
- if (!line || line[0] == '\0')
- return 0;
- dom = strsep(&line, ";");
- id = strsep(&dom, "=");
- if (!dom || kstrtoul(id, 10, &dom_id)) {
- rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
- return -EINVAL;
- }
- dom = strim(dom);
- list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
- if (d->hdr.id == dom_id) {
- data.buf = dom;
- data.rdtgrp = rdtgrp;
- if (parse_ctrlval(&data, s, d))
- return -EINVAL;
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- cfg = &d->staged_config[t];
- /*
- * In pseudo-locking setup mode and just
- * parsed a valid CBM that should be
- * pseudo-locked. Only one locked region per
- * resource group and domain so just do
- * the required initialization for single
- * region and return.
- */
- rdtgrp->plr->s = s;
- rdtgrp->plr->d = d;
- rdtgrp->plr->cbm = cfg->new_ctrl;
- d->plr = rdtgrp->plr;
- return 0;
- }
- goto next;
- }
- }
- return -EINVAL;
-}
-
int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type t, u32 cfg_val)
{
@@ -351,100 +83,6 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
return 0;
}
-static int rdtgroup_parse_resource(char *resname, char *tok,
- struct rdtgroup *rdtgrp)
-{
- struct resctrl_schema *s;
-
- list_for_each_entry(s, &resctrl_schema_all, list) {
- if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid)
- return parse_line(tok, s, rdtgrp);
- }
- rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname);
- return -EINVAL;
-}
-
-ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- struct resctrl_schema *s;
- struct rdtgroup *rdtgrp;
- struct rdt_resource *r;
- char *tok, *resname;
- int ret = 0;
-
- /* Valid input requires a trailing newline */
- if (nbytes == 0 || buf[nbytes - 1] != '\n')
- return -EINVAL;
- buf[nbytes - 1] = '\0';
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- rdtgroup_kn_unlock(of->kn);
- return -ENOENT;
- }
- rdt_last_cmd_clear();
-
- /*
- * No changes to pseudo-locked region allowed. It has to be removed
- * and re-created instead.
- */
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- ret = -EINVAL;
- rdt_last_cmd_puts("Resource group is pseudo-locked\n");
- goto out;
- }
-
- rdt_staged_configs_clear();
-
- while ((tok = strsep(&buf, "\n")) != NULL) {
- resname = strim(strsep(&tok, ":"));
- if (!tok) {
- rdt_last_cmd_puts("Missing ':'\n");
- ret = -EINVAL;
- goto out;
- }
- if (tok[0] == '\0') {
- rdt_last_cmd_printf("Missing '%s' value\n", resname);
- ret = -EINVAL;
- goto out;
- }
- ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
- if (ret)
- goto out;
- }
-
- list_for_each_entry(s, &resctrl_schema_all, list) {
- r = s->res;
-
- /*
- * Writes to mba_sc resources update the software controller,
- * not the control MSR.
- */
- if (is_mba_sc(r))
- continue;
-
- ret = resctrl_arch_update_domains(r, rdtgrp->closid);
- if (ret)
- goto out;
- }
-
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- /*
- * If pseudo-locking fails we keep the resource group in
- * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service
- * active and updated for just the domain the pseudo-locked
- * region was requested for.
- */
- ret = rdtgroup_pseudo_lock_create(rdtgrp);
- }
-
-out:
- rdt_staged_configs_clear();
- rdtgroup_kn_unlock(of->kn);
- return ret ?: nbytes;
-}
-
u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type type)
{
@@ -453,276 +91,3 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
return hw_dom->ctrl_val[idx];
}
-
-static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid)
-{
- struct rdt_resource *r = schema->res;
- struct rdt_ctrl_domain *dom;
- bool sep = false;
- u32 ctrl_val;
-
- /* Walking r->domains, ensure it can't race with cpuhp */
- lockdep_assert_cpus_held();
-
- seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
- if (sep)
- seq_puts(s, ";");
-
- if (is_mba_sc(r))
- ctrl_val = dom->mbps_val[closid];
- else
- ctrl_val = resctrl_arch_get_config(r, dom, closid,
- schema->conf_type);
-
- seq_printf(s, schema->fmt_str, dom->hdr.id, ctrl_val);
- sep = true;
- }
- seq_puts(s, "\n");
-}
-
-int rdtgroup_schemata_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
-{
- struct resctrl_schema *schema;
- struct rdtgroup *rdtgrp;
- int ret = 0;
- u32 closid;
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (rdtgrp) {
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- list_for_each_entry(schema, &resctrl_schema_all, list) {
- seq_printf(s, "%s:uninitialized\n", schema->name);
- }
- } else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- if (!rdtgrp->plr->d) {
- rdt_last_cmd_clear();
- rdt_last_cmd_puts("Cache domain offline\n");
- ret = -ENODEV;
- } else {
- seq_printf(s, "%s:%d=%x\n",
- rdtgrp->plr->s->res->name,
- rdtgrp->plr->d->hdr.id,
- rdtgrp->plr->cbm);
- }
- } else {
- closid = rdtgrp->closid;
- list_for_each_entry(schema, &resctrl_schema_all, list) {
- if (closid < schema->num_closid)
- show_doms(s, schema, closid);
- }
- }
- } else {
- ret = -ENOENT;
- }
- rdtgroup_kn_unlock(of->kn);
- return ret;
-}
-
-static int smp_mon_event_count(void *arg)
-{
- mon_event_count(arg);
-
- return 0;
-}
-
-ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- struct rdtgroup *rdtgrp;
- int ret = 0;
-
- /* Valid input requires a trailing newline */
- if (nbytes == 0 || buf[nbytes - 1] != '\n')
- return -EINVAL;
- buf[nbytes - 1] = '\0';
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- rdtgroup_kn_unlock(of->kn);
- return -ENOENT;
- }
- rdt_last_cmd_clear();
-
- if (!strcmp(buf, "mbm_local_bytes")) {
- if (resctrl_arch_is_mbm_local_enabled())
- rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
- else
- ret = -EINVAL;
- } else if (!strcmp(buf, "mbm_total_bytes")) {
- if (resctrl_arch_is_mbm_total_enabled())
- rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
- else
- ret = -EINVAL;
- } else {
- ret = -EINVAL;
- }
-
- if (ret)
- rdt_last_cmd_printf("Unsupported event id '%s'\n", buf);
-
- rdtgroup_kn_unlock(of->kn);
-
- return ret ?: nbytes;
-}
-
-int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
-{
- struct rdtgroup *rdtgrp;
- int ret = 0;
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
-
- if (rdtgrp) {
- switch (rdtgrp->mba_mbps_event) {
- case QOS_L3_MBM_LOCAL_EVENT_ID:
- seq_puts(s, "mbm_local_bytes\n");
- break;
- case QOS_L3_MBM_TOTAL_EVENT_ID:
- seq_puts(s, "mbm_total_bytes\n");
- break;
- default:
- pr_warn_once("Bad event %d\n", rdtgrp->mba_mbps_event);
- ret = -EINVAL;
- break;
- }
- } else {
- ret = -ENOENT;
- }
-
- rdtgroup_kn_unlock(of->kn);
-
- return ret;
-}
-
-struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
- struct list_head **pos)
-{
- struct rdt_domain_hdr *d;
- struct list_head *l;
-
- list_for_each(l, h) {
- d = list_entry(l, struct rdt_domain_hdr, list);
- /* When id is found, return its domain. */
- if (id == d->id)
- return d;
- /* Stop searching when finding id's position in sorted list. */
- if (id < d->id)
- break;
- }
-
- if (pos)
- *pos = l;
-
- return NULL;
-}
-
-void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
- struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
- cpumask_t *cpumask, int evtid, int first)
-{
- int cpu;
-
- /* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */
- lockdep_assert_cpus_held();
-
- /*
- * Setup the parameters to pass to mon_event_count() to read the data.
- */
- rr->rgrp = rdtgrp;
- rr->evtid = evtid;
- rr->r = r;
- rr->d = d;
- rr->first = first;
- rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
- if (IS_ERR(rr->arch_mon_ctx)) {
- rr->err = -EINVAL;
- return;
- }
-
- cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU);
-
- /*
- * cpumask_any_housekeeping() prefers housekeeping CPUs, but
- * are all the CPUs nohz_full? If yes, pick a CPU to IPI.
- * MPAM's resctrl_arch_rmid_read() is unable to read the
- * counters on some platforms if its called in IRQ context.
- */
- if (tick_nohz_full_cpu(cpu))
- smp_call_function_any(cpumask, mon_event_count, rr, 1);
- else
- smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
-
- resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx);
-}
-
-int rdtgroup_mondata_show(struct seq_file *m, void *arg)
-{
- struct kernfs_open_file *of = m->private;
- struct rdt_domain_hdr *hdr;
- struct rmid_read rr = {0};
- struct rdt_mon_domain *d;
- u32 resid, evtid, domid;
- struct rdtgroup *rdtgrp;
- struct rdt_resource *r;
- union mon_data_bits md;
- int ret = 0;
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- ret = -ENOENT;
- goto out;
- }
-
- md.priv = of->kn->priv;
- resid = md.u.rid;
- domid = md.u.domid;
- evtid = md.u.evtid;
- r = resctrl_arch_get_resource(resid);
-
- if (md.u.sum) {
- /*
- * This file requires summing across all domains that share
- * the L3 cache id that was provided in the "domid" field of the
- * mon_data_bits union. Search all domains in the resource for
- * one that matches this cache id.
- */
- list_for_each_entry(d, &r->mon_domains, hdr.list) {
- if (d->ci->id == domid) {
- rr.ci = d->ci;
- mon_event_read(&rr, r, NULL, rdtgrp,
- &d->ci->shared_cpu_map, evtid, false);
- goto checkresult;
- }
- }
- ret = -ENOENT;
- goto out;
- } else {
- /*
- * This file provides data from a single domain. Search
- * the resource to find the domain with "domid".
- */
- hdr = resctrl_find_domain(&r->mon_domains, domid, NULL);
- if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) {
- ret = -ENOENT;
- goto out;
- }
- d = container_of(hdr, struct rdt_mon_domain, hdr);
- mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false);
- }
-
-checkresult:
-
- if (rr.err == -EIO)
- seq_puts(m, "Error\n");
- else if (rr.err == -EINVAL)
- seq_puts(m, "Unavailable\n");
- else
- seq_printf(m, "%llu\n", rr.val);
-
-out:
- rdtgroup_kn_unlock(of->kn);
- return ret;
-}
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index eaae99602b61..5e3c41b36437 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -3,28 +3,21 @@
#define _ASM_X86_RESCTRL_INTERNAL_H
#include <linux/resctrl.h>
-#include <linux/sched.h>
-#include <linux/kernfs.h>
-#include <linux/fs_context.h>
-#include <linux/jump_label.h>
-#include <linux/tick.h>
-
-#include <asm/resctrl.h>
#define L3_QOS_CDP_ENABLE 0x01ULL
#define L2_QOS_CDP_ENABLE 0x01ULL
-#define CQM_LIMBOCHECK_INTERVAL 1000
-
#define MBM_CNTR_WIDTH_BASE 24
-#define MBM_OVERFLOW_INTERVAL 1000
-#define MAX_MBA_BW 100u
+
#define MBA_IS_LINEAR 0x4
+
#define MBM_CNTR_WIDTH_OFFSET_AMD 20
#define RMID_VAL_ERROR BIT_ULL(63)
+
#define RMID_VAL_UNAVAIL BIT_ULL(62)
+
/*
* With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
* data to be returned. The counter width is discovered from the hardware
@@ -33,278 +26,6 @@
#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
/**
- * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
- * aren't marked nohz_full
- * @mask: The mask to pick a CPU from.
- * @exclude_cpu:The CPU to avoid picking.
- *
- * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping
- * CPUs that don't use nohz_full, these are preferred. Pass
- * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs.
- *
- * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available.
- */
-static inline unsigned int
-cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
-{
- unsigned int cpu, hk_cpu;
-
- if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
- cpu = cpumask_any(mask);
- else
- cpu = cpumask_any_but(mask, exclude_cpu);
-
- /* Only continue if tick_nohz_full_mask has been initialized. */
- if (!tick_nohz_full_enabled())
- return cpu;
-
- /* If the CPU picked isn't marked nohz_full nothing more needs doing. */
- if (cpu < nr_cpu_ids && !tick_nohz_full_cpu(cpu))
- return cpu;
-
- /* Try to find a CPU that isn't nohz_full to use in preference */
- hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
- if (hk_cpu == exclude_cpu)
- hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask);
-
- if (hk_cpu < nr_cpu_ids)
- cpu = hk_cpu;
-
- return cpu;
-}
-
-struct rdt_fs_context {
- struct kernfs_fs_context kfc;
- bool enable_cdpl2;
- bool enable_cdpl3;
- bool enable_mba_mbps;
- bool enable_debug;
-};
-
-static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
-{
- struct kernfs_fs_context *kfc = fc->fs_private;
-
- return container_of(kfc, struct rdt_fs_context, kfc);
-}
-
-/**
- * struct mon_evt - Entry in the event list of a resource
- * @evtid: event id
- * @name: name of the event
- * @configurable: true if the event is configurable
- * @list: entry in &rdt_resource->evt_list
- */
-struct mon_evt {
- enum resctrl_event_id evtid;
- char *name;
- bool configurable;
- struct list_head list;
-};
-
-/**
- * union mon_data_bits - Monitoring details for each event file.
- * @priv: Used to store monitoring event data in @u
- * as kernfs private data.
- * @u.rid: Resource id associated with the event file.
- * @u.evtid: Event id associated with the event file.
- * @u.sum: Set when event must be summed across multiple
- * domains.
- * @u.domid: When @u.sum is zero this is the domain to which
- * the event file belongs. When @sum is one this
- * is the id of the L3 cache that all domains to be
- * summed share.
- * @u: Name of the bit fields struct.
- */
-union mon_data_bits {
- void *priv;
- struct {
- unsigned int rid : 10;
- enum resctrl_event_id evtid : 7;
- unsigned int sum : 1;
- unsigned int domid : 14;
- } u;
-};
-
-/**
- * struct rmid_read - Data passed across smp_call*() to read event count.
- * @rgrp: Resource group for which the counter is being read. If it is a parent
- * resource group then its event count is summed with the count from all
- * its child resource groups.
- * @r: Resource describing the properties of the event being read.
- * @d: Domain that the counter should be read from. If NULL then sum all
- * domains in @r sharing L3 @ci.id
- * @evtid: Which monitor event to read.
- * @first: Initialize MBM counter when true.
- * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
- * @err: Error encountered when reading counter.
- * @val: Returned value of event counter. If @rgrp is a parent resource group,
- * @val includes the sum of event counts from its child resource groups.
- * If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id,
- * (summed across child resource groups if @rgrp is a parent resource group).
- * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only).
- */
-struct rmid_read {
- struct rdtgroup *rgrp;
- struct rdt_resource *r;
- struct rdt_mon_domain *d;
- enum resctrl_event_id evtid;
- bool first;
- struct cacheinfo *ci;
- int err;
- u64 val;
- void *arch_mon_ctx;
-};
-
-extern struct list_head resctrl_schema_all;
-extern bool resctrl_mounted;
-
-enum rdt_group_type {
- RDTCTRL_GROUP = 0,
- RDTMON_GROUP,
- RDT_NUM_GROUP,
-};
-
-/**
- * enum rdtgrp_mode - Mode of a RDT resource group
- * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
- * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
- * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
- * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
- * allowed AND the allocations are Cache Pseudo-Locked
- * @RDT_NUM_MODES: Total number of modes
- *
- * The mode of a resource group enables control over the allowed overlap
- * between allocations associated with different resource groups (classes
- * of service). User is able to modify the mode of a resource group by
- * writing to the "mode" resctrl file associated with the resource group.
- *
- * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
- * writing the appropriate text to the "mode" file. A resource group enters
- * "pseudo-locked" mode after the schemata is written while the resource
- * group is in "pseudo-locksetup" mode.
- */
-enum rdtgrp_mode {
- RDT_MODE_SHAREABLE = 0,
- RDT_MODE_EXCLUSIVE,
- RDT_MODE_PSEUDO_LOCKSETUP,
- RDT_MODE_PSEUDO_LOCKED,
-
- /* Must be last */
- RDT_NUM_MODES,
-};
-
-/**
- * struct mongroup - store mon group's data in resctrl fs.
- * @mon_data_kn: kernfs node for the mon_data directory
- * @parent: parent rdtgrp
- * @crdtgrp_list: child rdtgroup node list
- * @rmid: rmid for this rdtgroup
- */
-struct mongroup {
- struct kernfs_node *mon_data_kn;
- struct rdtgroup *parent;
- struct list_head crdtgrp_list;
- u32 rmid;
-};
-
-/**
- * struct rdtgroup - store rdtgroup's data in resctrl file system.
- * @kn: kernfs node
- * @rdtgroup_list: linked list for all rdtgroups
- * @closid: closid for this rdtgroup
- * @cpu_mask: CPUs assigned to this rdtgroup
- * @flags: status bits
- * @waitcount: how many cpus expect to find this
- * group when they acquire rdtgroup_mutex
- * @type: indicates type of this rdtgroup - either
- * monitor only or ctrl_mon group
- * @mon: mongroup related data
- * @mode: mode of resource group
- * @mba_mbps_event: input monitoring event id when mba_sc is enabled
- * @plr: pseudo-locked region
- */
-struct rdtgroup {
- struct kernfs_node *kn;
- struct list_head rdtgroup_list;
- u32 closid;
- struct cpumask cpu_mask;
- int flags;
- atomic_t waitcount;
- enum rdt_group_type type;
- struct mongroup mon;
- enum rdtgrp_mode mode;
- enum resctrl_event_id mba_mbps_event;
- struct pseudo_lock_region *plr;
-};
-
-/* rdtgroup.flags */
-#define RDT_DELETED 1
-
-/* rftype.flags */
-#define RFTYPE_FLAGS_CPUS_LIST 1
-
-/*
- * Define the file type flags for base and info directories.
- */
-#define RFTYPE_INFO BIT(0)
-#define RFTYPE_BASE BIT(1)
-#define RFTYPE_CTRL BIT(4)
-#define RFTYPE_MON BIT(5)
-#define RFTYPE_TOP BIT(6)
-#define RFTYPE_RES_CACHE BIT(8)
-#define RFTYPE_RES_MB BIT(9)
-#define RFTYPE_DEBUG BIT(10)
-#define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL)
-#define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON)
-#define RFTYPE_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP)
-#define RFTYPE_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL)
-#define RFTYPE_MON_BASE (RFTYPE_BASE | RFTYPE_MON)
-
-/* List of all resource groups */
-extern struct list_head rdt_all_groups;
-
-extern int max_name_width;
-
-/**
- * struct rftype - describe each file in the resctrl file system
- * @name: File name
- * @mode: Access mode
- * @kf_ops: File operations
- * @flags: File specific RFTYPE_FLAGS_* flags
- * @fflags: File specific RFTYPE_* flags
- * @seq_show: Show content of the file
- * @write: Write to the file
- */
-struct rftype {
- char *name;
- umode_t mode;
- const struct kernfs_ops *kf_ops;
- unsigned long flags;
- unsigned long fflags;
-
- int (*seq_show)(struct kernfs_open_file *of,
- struct seq_file *sf, void *v);
- /*
- * write() is the generic write callback which maps directly to
- * kernfs write operation and overrides all other operations.
- * Maximum write size is determined by ->max_write_len.
- */
- ssize_t (*write)(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off);
-};
-
-/**
- * struct mbm_state - status for each MBM counter in each domain
- * @prev_bw_bytes: Previous bytes value read for bandwidth calculation
- * @prev_bw: The most recent bandwidth in MBps
- */
-struct mbm_state {
- u64 prev_bw_bytes;
- u32 prev_bw;
-};
-
-/**
* struct arch_mbm_state - values used to compute resctrl_arch_rmid_read()s
* return value.
* @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes)
@@ -401,24 +122,7 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r
return container_of(r, struct rdt_hw_resource, r_resctrl);
}
-extern struct mutex rdtgroup_mutex;
-
-static inline const char *rdt_kn_name(const struct kernfs_node *kn)
-{
- return rcu_dereference_check(kn->name, lockdep_is_held(&rdtgroup_mutex));
-}
-
extern struct rdt_hw_resource rdt_resources_all[];
-extern struct rdtgroup rdtgroup_default;
-extern struct dentry *debugfs_resctrl;
-extern enum resctrl_event_id mba_mbps_default_event;
-
-static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
-{
- return rdt_resources_all[l].cdp_enabled;
-}
-
-int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d);
@@ -455,99 +159,14 @@ union cpuid_0x10_x_edx {
unsigned int full;
};
-void rdt_last_cmd_clear(void);
-void rdt_last_cmd_puts(const char *s);
-__printf(1, 2)
-void rdt_last_cmd_printf(const char *fmt, ...);
-
void rdt_ctrl_update(void *arg);
-struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
-void rdtgroup_kn_unlock(struct kernfs_node *kn);
-int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
-int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
- umode_t mask);
-ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off);
-int rdtgroup_schemata_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v);
-ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off);
-int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v);
-bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
- unsigned long cbm, int closid, bool exclusive);
-unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d,
- unsigned long cbm);
-enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
-int rdtgroup_tasks_assigned(struct rdtgroup *r);
-int closids_supported(void);
-void closid_free(int closid);
-int alloc_rmid(u32 closid);
-void free_rmid(u32 closid, u32 rmid);
-int rdt_get_mon_l3_config(struct rdt_resource *r);
-void resctrl_mon_resource_exit(void);
-bool __init rdt_cpu_has(int flag);
-void mon_event_count(void *info);
-int rdtgroup_mondata_show(struct seq_file *m, void *arg);
-void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
- struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
- cpumask_t *cpumask, int evtid, int first);
-int __init resctrl_mon_resource_init(void);
-void mbm_setup_overflow_handler(struct rdt_mon_domain *dom,
- unsigned long delay_ms,
- int exclude_cpu);
-void mbm_handle_overflow(struct work_struct *work);
-void __init intel_rdt_mbm_apply_quirk(void);
-bool is_mba_sc(struct rdt_resource *r);
-void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
- int exclude_cpu);
-void cqm_handle_limbo(struct work_struct *work);
-bool has_busy_rmid(struct rdt_mon_domain *d);
-void __check_limbo(struct rdt_mon_domain *d, bool force_free);
-void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
-void resctrl_file_fflags_init(const char *config, unsigned long fflags);
-void rdt_staged_configs_clear(void);
-bool closid_allocated(unsigned int closid);
-int resctrl_find_cleanest_closid(void);
-
-#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK
-int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
-int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm);
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d);
-int rdt_pseudo_lock_init(void);
-void rdt_pseudo_lock_release(void);
-int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
-void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
-#else
-static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
-{
- return -EOPNOTSUPP;
-}
-static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
-{
- return -EOPNOTSUPP;
-}
-
-static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm)
-{
- return false;
-}
+int rdt_get_mon_l3_config(struct rdt_resource *r);
-static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
-{
- return false;
-}
+bool rdt_cpu_has(int flag);
-static inline int rdt_pseudo_lock_init(void) { return 0; }
-static inline void rdt_pseudo_lock_release(void) { }
-static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
-{
- return -EOPNOTSUPP;
-}
+void __init intel_rdt_mbm_apply_quirk(void);
-static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { }
-#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */
+void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
#endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index a93ed7d2a160..c261558276cd 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -18,62 +18,12 @@
#define pr_fmt(fmt) "resctrl: " fmt
#include <linux/cpu.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-#include <linux/slab.h>
+#include <linux/resctrl.h>
#include <asm/cpu_device_id.h>
-#include <asm/resctrl.h>
+#include <asm/msr.h>
#include "internal.h"
-#include "trace.h"
-
-/**
- * struct rmid_entry - dirty tracking for all RMID.
- * @closid: The CLOSID for this entry.
- * @rmid: The RMID for this entry.
- * @busy: The number of domains with cached data using this RMID.
- * @list: Member of the rmid_free_lru list when busy == 0.
- *
- * Depending on the architecture the correct monitor is accessed using
- * both @closid and @rmid, or @rmid only.
- *
- * Take the rdtgroup_mutex when accessing.
- */
-struct rmid_entry {
- u32 closid;
- u32 rmid;
- int busy;
- struct list_head list;
-};
-
-/*
- * @rmid_free_lru - A least recently used list of free RMIDs
- * These RMIDs are guaranteed to have an occupancy less than the
- * threshold occupancy
- */
-static LIST_HEAD(rmid_free_lru);
-
-/*
- * @closid_num_dirty_rmid The number of dirty RMID each CLOSID has.
- * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
- * Indexed by CLOSID. Protected by rdtgroup_mutex.
- */
-static u32 *closid_num_dirty_rmid;
-
-/*
- * @rmid_limbo_count - count of currently unused but (potentially)
- * dirty RMIDs.
- * This counts RMIDs that no one is currently using but that
- * may have a occupancy value > resctrl_rmid_realloc_threshold. User can
- * change the threshold occupancy value.
- */
-static unsigned int rmid_limbo_count;
-
-/*
- * @rmid_entry - The entry in the limbo and free lists.
- */
-static struct rmid_entry *rmid_ptrs;
/*
* Global boolean for rdt_monitor which is true if any
@@ -86,23 +36,12 @@ bool rdt_mon_capable;
*/
unsigned int rdt_mon_features;
-/*
- * This is the threshold cache occupancy in bytes at which we will consider an
- * RMID available for re-allocation.
- */
-unsigned int resctrl_rmid_realloc_threshold;
-
-/*
- * This is the maximum value for the reallocation threshold, in bytes.
- */
-unsigned int resctrl_rmid_realloc_limit;
-
#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
static int snc_nodes_per_l3_cache = 1;
/*
- * The correction factor table is documented in Documentation/arch/x86/resctrl.rst.
+ * The correction factor table is documented in Documentation/filesystems/resctrl.rst.
* If rmid > rmid threshold, MBM total and local values should be multiplied
* by the correction factor.
*
@@ -151,6 +90,7 @@ static const struct mbm_correction_factor_table {
};
static u32 mbm_cf_rmidthreshold __read_mostly = UINT_MAX;
+
static u64 mbm_cf __read_mostly;
static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
@@ -163,33 +103,6 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
}
/*
- * x86 and arm64 differ in their handling of monitoring.
- * x86's RMID are independent numbers, there is only one source of traffic
- * with an RMID value of '1'.
- * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
- * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
- * value is no longer unique.
- * To account for this, resctrl uses an index. On x86 this is just the RMID,
- * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
- *
- * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
- * must accept an attempt to read every index.
- */
-static inline struct rmid_entry *__rmid_entry(u32 idx)
-{
- struct rmid_entry *entry;
- u32 closid, rmid;
-
- entry = &rmid_ptrs[idx];
- resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
-
- WARN_ON_ONCE(entry->closid != closid);
- WARN_ON_ONCE(entry->rmid != rmid);
-
- return entry;
-}
-
-/*
* When Sub-NUMA Cluster (SNC) mode is not enabled (as indicated by
* "snc_nodes_per_l3_cache == 1") no translation of the RMID value is
* needed. The physical RMID is the same as the logical RMID.
@@ -238,7 +151,7 @@ static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
* are error bits.
*/
wrmsr(MSR_IA32_QM_EVTSEL, eventid, prmid);
- rdmsrl(MSR_IA32_QM_CTR, msr_val);
+ rdmsrq(MSR_IA32_QM_CTR, msr_val);
if (msr_val & RMID_VAL_ERROR)
return -EIO;
@@ -260,12 +173,11 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
return &hw_dom->arch_mbm_total[rmid];
case QOS_L3_MBM_LOCAL_EVENT_ID:
return &hw_dom->arch_mbm_local[rmid];
+ default:
+ /* Never expect to get here */
+ WARN_ON_ONCE(1);
+ return NULL;
}
-
- /* Never expect to get here */
- WARN_ON_ONCE(1);
-
- return NULL;
}
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
@@ -346,769 +258,6 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
return 0;
}
-static void limbo_release_entry(struct rmid_entry *entry)
-{
- lockdep_assert_held(&rdtgroup_mutex);
-
- rmid_limbo_count--;
- list_add_tail(&entry->list, &rmid_free_lru);
-
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
- closid_num_dirty_rmid[entry->closid]--;
-}
-
-/*
- * Check the RMIDs that are marked as busy for this domain. If the
- * reported LLC occupancy is below the threshold clear the busy bit and
- * decrement the count. If the busy count gets to zero on an RMID, we
- * free the RMID
- */
-void __check_limbo(struct rdt_mon_domain *d, bool force_free)
-{
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- u32 idx_limit = resctrl_arch_system_num_rmid_idx();
- struct rmid_entry *entry;
- u32 idx, cur_idx = 1;
- void *arch_mon_ctx;
- bool rmid_dirty;
- u64 val = 0;
-
- arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
- if (IS_ERR(arch_mon_ctx)) {
- pr_warn_ratelimited("Failed to allocate monitor context: %ld",
- PTR_ERR(arch_mon_ctx));
- return;
- }
-
- /*
- * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
- * are marked as busy for occupancy < threshold. If the occupancy
- * is less than the threshold decrement the busy counter of the
- * RMID and move it to the free list when the counter reaches 0.
- */
- for (;;) {
- idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
- if (idx >= idx_limit)
- break;
-
- entry = __rmid_entry(idx);
- if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
- QOS_L3_OCCUP_EVENT_ID, &val,
- arch_mon_ctx)) {
- rmid_dirty = true;
- } else {
- rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
-
- /*
- * x86's CLOSID and RMID are independent numbers, so the entry's
- * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the
- * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't
- * used to select the configuration. It is thus necessary to track both
- * CLOSID and RMID because there may be dependencies between them
- * on some architectures.
- */
- trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val);
- }
-
- if (force_free || !rmid_dirty) {
- clear_bit(idx, d->rmid_busy_llc);
- if (!--entry->busy)
- limbo_release_entry(entry);
- }
- cur_idx = idx + 1;
- }
-
- resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
-}
-
-bool has_busy_rmid(struct rdt_mon_domain *d)
-{
- u32 idx_limit = resctrl_arch_system_num_rmid_idx();
-
- return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
-}
-
-static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
-{
- struct rmid_entry *itr;
- u32 itr_idx, cmp_idx;
-
- if (list_empty(&rmid_free_lru))
- return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
-
- list_for_each_entry(itr, &rmid_free_lru, list) {
- /*
- * Get the index of this free RMID, and the index it would need
- * to be if it were used with this CLOSID.
- * If the CLOSID is irrelevant on this architecture, the two
- * index values are always the same on every entry and thus the
- * very first entry will be returned.
- */
- itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
- cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
-
- if (itr_idx == cmp_idx)
- return itr;
- }
-
- return ERR_PTR(-ENOSPC);
-}
-
-/**
- * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
- * RMID are clean, or the CLOSID that has
- * the most clean RMID.
- *
- * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
- * may not be able to allocate clean RMID. To avoid this the allocator will
- * choose the CLOSID with the most clean RMID.
- *
- * When the CLOSID and RMID are independent numbers, the first free CLOSID will
- * be returned.
- */
-int resctrl_find_cleanest_closid(void)
-{
- u32 cleanest_closid = ~0;
- int i = 0;
-
- lockdep_assert_held(&rdtgroup_mutex);
-
- if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
- return -EIO;
-
- for (i = 0; i < closids_supported(); i++) {
- int num_dirty;
-
- if (closid_allocated(i))
- continue;
-
- num_dirty = closid_num_dirty_rmid[i];
- if (num_dirty == 0)
- return i;
-
- if (cleanest_closid == ~0)
- cleanest_closid = i;
-
- if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
- cleanest_closid = i;
- }
-
- if (cleanest_closid == ~0)
- return -ENOSPC;
-
- return cleanest_closid;
-}
-
-/*
- * For MPAM the RMID value is not unique, and has to be considered with
- * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
- * allows all domains to be managed by a single free list.
- * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
- */
-int alloc_rmid(u32 closid)
-{
- struct rmid_entry *entry;
-
- lockdep_assert_held(&rdtgroup_mutex);
-
- entry = resctrl_find_free_rmid(closid);
- if (IS_ERR(entry))
- return PTR_ERR(entry);
-
- list_del(&entry->list);
- return entry->rmid;
-}
-
-static void add_rmid_to_limbo(struct rmid_entry *entry)
-{
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- struct rdt_mon_domain *d;
- u32 idx;
-
- lockdep_assert_held(&rdtgroup_mutex);
-
- /* Walking r->domains, ensure it can't race with cpuhp */
- lockdep_assert_cpus_held();
-
- idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
-
- entry->busy = 0;
- list_for_each_entry(d, &r->mon_domains, hdr.list) {
- /*
- * For the first limbo RMID in the domain,
- * setup up the limbo worker.
- */
- if (!has_busy_rmid(d))
- cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
- RESCTRL_PICK_ANY_CPU);
- set_bit(idx, d->rmid_busy_llc);
- entry->busy++;
- }
-
- rmid_limbo_count++;
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
- closid_num_dirty_rmid[entry->closid]++;
-}
-
-void free_rmid(u32 closid, u32 rmid)
-{
- u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
- struct rmid_entry *entry;
-
- lockdep_assert_held(&rdtgroup_mutex);
-
- /*
- * Do not allow the default rmid to be free'd. Comparing by index
- * allows architectures that ignore the closid parameter to avoid an
- * unnecessary check.
- */
- if (!resctrl_arch_mon_capable() ||
- idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
- RESCTRL_RESERVED_RMID))
- return;
-
- entry = __rmid_entry(idx);
-
- if (resctrl_arch_is_llc_occupancy_enabled())
- add_rmid_to_limbo(entry);
- else
- list_add_tail(&entry->list, &rmid_free_lru);
-}
-
-static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
- u32 rmid, enum resctrl_event_id evtid)
-{
- u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
-
- switch (evtid) {
- case QOS_L3_MBM_TOTAL_EVENT_ID:
- return &d->mbm_total[idx];
- case QOS_L3_MBM_LOCAL_EVENT_ID:
- return &d->mbm_local[idx];
- default:
- return NULL;
- }
-}
-
-static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
-{
- int cpu = smp_processor_id();
- struct rdt_mon_domain *d;
- struct mbm_state *m;
- int err, ret;
- u64 tval = 0;
-
- if (rr->first) {
- resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
- m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
- if (m)
- memset(m, 0, sizeof(struct mbm_state));
- return 0;
- }
-
- if (rr->d) {
- /* Reading a single domain, must be on a CPU in that domain. */
- if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
- return -EINVAL;
- rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
- rr->evtid, &tval, rr->arch_mon_ctx);
- if (rr->err)
- return rr->err;
-
- rr->val += tval;
-
- return 0;
- }
-
- /* Summing domains that share a cache, must be on a CPU for that cache. */
- if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
- return -EINVAL;
-
- /*
- * Legacy files must report the sum of an event across all
- * domains that share the same L3 cache instance.
- * Report success if a read from any domain succeeds, -EINVAL
- * (translated to "Unavailable" for user space) if reading from
- * all domains fail for any reason.
- */
- ret = -EINVAL;
- list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
- if (d->ci->id != rr->ci->id)
- continue;
- err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
- rr->evtid, &tval, rr->arch_mon_ctx);
- if (!err) {
- rr->val += tval;
- ret = 0;
- }
- }
-
- if (ret)
- rr->err = ret;
-
- return ret;
-}
-
-/*
- * mbm_bw_count() - Update bw count from values previously read by
- * __mon_event_count().
- * @closid: The closid used to identify the cached mbm_state.
- * @rmid: The rmid used to identify the cached mbm_state.
- * @rr: The struct rmid_read populated by __mon_event_count().
- *
- * Supporting function to calculate the memory bandwidth
- * and delta bandwidth in MBps. The chunks value previously read by
- * __mon_event_count() is compared with the chunks value from the previous
- * invocation. This must be called once per second to maintain values in MBps.
- */
-static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
-{
- u64 cur_bw, bytes, cur_bytes;
- struct mbm_state *m;
-
- m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
- if (WARN_ON_ONCE(!m))
- return;
-
- cur_bytes = rr->val;
- bytes = cur_bytes - m->prev_bw_bytes;
- m->prev_bw_bytes = cur_bytes;
-
- cur_bw = bytes / SZ_1M;
-
- m->prev_bw = cur_bw;
-}
-
-/*
- * This is scheduled by mon_event_read() to read the CQM/MBM counters
- * on a domain.
- */
-void mon_event_count(void *info)
-{
- struct rdtgroup *rdtgrp, *entry;
- struct rmid_read *rr = info;
- struct list_head *head;
- int ret;
-
- rdtgrp = rr->rgrp;
-
- ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);
-
- /*
- * For Ctrl groups read data from child monitor groups and
- * add them together. Count events which are read successfully.
- * Discard the rmid_read's reporting errors.
- */
- head = &rdtgrp->mon.crdtgrp_list;
-
- if (rdtgrp->type == RDTCTRL_GROUP) {
- list_for_each_entry(entry, head, mon.crdtgrp_list) {
- if (__mon_event_count(entry->closid, entry->mon.rmid,
- rr) == 0)
- ret = 0;
- }
- }
-
- /*
- * __mon_event_count() calls for newly created monitor groups may
- * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
- * Discard error if any of the monitor event reads succeeded.
- */
- if (ret == 0)
- rr->err = 0;
-}
-
-static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu,
- struct rdt_resource *r)
-{
- struct rdt_ctrl_domain *d;
-
- lockdep_assert_cpus_held();
-
- list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
- /* Find the domain that contains this CPU */
- if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
- return d;
- }
-
- return NULL;
-}
-
-/*
- * Feedback loop for MBA software controller (mba_sc)
- *
- * mba_sc is a feedback loop where we periodically read MBM counters and
- * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
- * that:
- *
- * current bandwidth(cur_bw) < user specified bandwidth(user_bw)
- *
- * This uses the MBM counters to measure the bandwidth and MBA throttle
- * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
- * fact that resctrl rdtgroups have both monitoring and control.
- *
- * The frequency of the checks is 1s and we just tag along the MBM overflow
- * timer. Having 1s interval makes the calculation of bandwidth simpler.
- *
- * Although MBA's goal is to restrict the bandwidth to a maximum, there may
- * be a need to increase the bandwidth to avoid unnecessarily restricting
- * the L2 <-> L3 traffic.
- *
- * Since MBA controls the L2 external bandwidth where as MBM measures the
- * L3 external bandwidth the following sequence could lead to such a
- * situation.
- *
- * Consider an rdtgroup which had high L3 <-> memory traffic in initial
- * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
- * after some time rdtgroup has mostly L2 <-> L3 traffic.
- *
- * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
- * throttle MSRs already have low percentage values. To avoid
- * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
- */
-static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
-{
- u32 closid, rmid, cur_msr_val, new_msr_val;
- struct mbm_state *pmbm_data, *cmbm_data;
- struct rdt_ctrl_domain *dom_mba;
- enum resctrl_event_id evt_id;
- struct rdt_resource *r_mba;
- struct list_head *head;
- struct rdtgroup *entry;
- u32 cur_bw, user_bw;
-
- r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
- evt_id = rgrp->mba_mbps_event;
-
- closid = rgrp->closid;
- rmid = rgrp->mon.rmid;
- pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
- if (WARN_ON_ONCE(!pmbm_data))
- return;
-
- dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
- if (!dom_mba) {
- pr_warn_once("Failure to get domain for MBA update\n");
- return;
- }
-
- cur_bw = pmbm_data->prev_bw;
- user_bw = dom_mba->mbps_val[closid];
-
- /* MBA resource doesn't support CDP */
- cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
-
- /*
- * For Ctrl groups read data from child monitor groups.
- */
- head = &rgrp->mon.crdtgrp_list;
- list_for_each_entry(entry, head, mon.crdtgrp_list) {
- cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
- if (WARN_ON_ONCE(!cmbm_data))
- return;
- cur_bw += cmbm_data->prev_bw;
- }
-
- /*
- * Scale up/down the bandwidth linearly for the ctrl group. The
- * bandwidth step is the bandwidth granularity specified by the
- * hardware.
- * Always increase throttling if current bandwidth is above the
- * target set by user.
- * But avoid thrashing up and down on every poll by checking
- * whether a decrease in throttling is likely to push the group
- * back over target. E.g. if currently throttling to 30% of bandwidth
- * on a system with 10% granularity steps, check whether moving to
- * 40% would go past the limit by multiplying current bandwidth by
- * "(30 + 10) / 30".
- */
- if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
- new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
- } else if (cur_msr_val < MAX_MBA_BW &&
- (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
- new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
- } else {
- return;
- }
-
- resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
-}
-
-static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d,
- u32 closid, u32 rmid, enum resctrl_event_id evtid)
-{
- struct rmid_read rr = {0};
-
- rr.r = r;
- rr.d = d;
- rr.evtid = evtid;
- rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
- if (IS_ERR(rr.arch_mon_ctx)) {
- pr_warn_ratelimited("Failed to allocate monitor context: %ld",
- PTR_ERR(rr.arch_mon_ctx));
- return;
- }
-
- __mon_event_count(closid, rmid, &rr);
-
- /*
- * If the software controller is enabled, compute the
- * bandwidth for this event id.
- */
- if (is_mba_sc(NULL))
- mbm_bw_count(closid, rmid, &rr);
-
- resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
-}
-
-static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
- u32 closid, u32 rmid)
-{
- /*
- * This is protected from concurrent reads from user as both
- * the user and overflow handler hold the global mutex.
- */
- if (resctrl_arch_is_mbm_total_enabled())
- mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID);
-
- if (resctrl_arch_is_mbm_local_enabled())
- mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID);
-}
-
-/*
- * Handler to scan the limbo list and move the RMIDs
- * to free list whose occupancy < threshold_occupancy.
- */
-void cqm_handle_limbo(struct work_struct *work)
-{
- unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
- struct rdt_mon_domain *d;
-
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
-
- d = container_of(work, struct rdt_mon_domain, cqm_limbo.work);
-
- __check_limbo(d, false);
-
- if (has_busy_rmid(d)) {
- d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
- RESCTRL_PICK_ANY_CPU);
- schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
- delay);
- }
-
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
-}
-
-/**
- * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
- * domain.
- * @dom: The domain the limbo handler should run for.
- * @delay_ms: How far in the future the handler should run.
- * @exclude_cpu: Which CPU the handler should not run on,
- * RESCTRL_PICK_ANY_CPU to pick any CPU.
- */
-void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
- int exclude_cpu)
-{
- unsigned long delay = msecs_to_jiffies(delay_ms);
- int cpu;
-
- cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
- dom->cqm_work_cpu = cpu;
-
- if (cpu < nr_cpu_ids)
- schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
-}
-
-void mbm_handle_overflow(struct work_struct *work)
-{
- unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
- struct rdtgroup *prgrp, *crgrp;
- struct rdt_mon_domain *d;
- struct list_head *head;
- struct rdt_resource *r;
-
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
-
- /*
- * If the filesystem has been unmounted this work no longer needs to
- * run.
- */
- if (!resctrl_mounted || !resctrl_arch_mon_capable())
- goto out_unlock;
-
- r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- d = container_of(work, struct rdt_mon_domain, mbm_over.work);
-
- list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
-
- head = &prgrp->mon.crdtgrp_list;
- list_for_each_entry(crgrp, head, mon.crdtgrp_list)
- mbm_update(r, d, crgrp->closid, crgrp->mon.rmid);
-
- if (is_mba_sc(NULL))
- update_mba_bw(prgrp, d);
- }
-
- /*
- * Re-check for housekeeping CPUs. This allows the overflow handler to
- * move off a nohz_full CPU quickly.
- */
- d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
- RESCTRL_PICK_ANY_CPU);
- schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
-
-out_unlock:
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
-}
-
-/**
- * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
- * domain.
- * @dom: The domain the overflow handler should run for.
- * @delay_ms: How far in the future the handler should run.
- * @exclude_cpu: Which CPU the handler should not run on,
- * RESCTRL_PICK_ANY_CPU to pick any CPU.
- */
-void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
- int exclude_cpu)
-{
- unsigned long delay = msecs_to_jiffies(delay_ms);
- int cpu;
-
- /*
- * When a domain comes online there is no guarantee the filesystem is
- * mounted. If not, there is no need to catch counter overflow.
- */
- if (!resctrl_mounted || !resctrl_arch_mon_capable())
- return;
- cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
- dom->mbm_work_cpu = cpu;
-
- if (cpu < nr_cpu_ids)
- schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
-}
-
-static int dom_data_init(struct rdt_resource *r)
-{
- u32 idx_limit = resctrl_arch_system_num_rmid_idx();
- u32 num_closid = resctrl_arch_get_num_closid(r);
- struct rmid_entry *entry = NULL;
- int err = 0, i;
- u32 idx;
-
- mutex_lock(&rdtgroup_mutex);
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
- u32 *tmp;
-
- /*
- * If the architecture hasn't provided a sanitised value here,
- * this may result in larger arrays than necessary. Resctrl will
- * use a smaller system wide value based on the resources in
- * use.
- */
- tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
- if (!tmp) {
- err = -ENOMEM;
- goto out_unlock;
- }
-
- closid_num_dirty_rmid = tmp;
- }
-
- rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
- if (!rmid_ptrs) {
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
- kfree(closid_num_dirty_rmid);
- closid_num_dirty_rmid = NULL;
- }
- err = -ENOMEM;
- goto out_unlock;
- }
-
- for (i = 0; i < idx_limit; i++) {
- entry = &rmid_ptrs[i];
- INIT_LIST_HEAD(&entry->list);
-
- resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
- list_add_tail(&entry->list, &rmid_free_lru);
- }
-
- /*
- * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
- * are always allocated. These are used for the rdtgroup_default
- * control group, which will be setup later in resctrl_init().
- */
- idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
- RESCTRL_RESERVED_RMID);
- entry = __rmid_entry(idx);
- list_del(&entry->list);
-
-out_unlock:
- mutex_unlock(&rdtgroup_mutex);
-
- return err;
-}
-
-static void dom_data_exit(struct rdt_resource *r)
-{
- mutex_lock(&rdtgroup_mutex);
-
- if (!r->mon_capable)
- goto out_unlock;
-
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
- kfree(closid_num_dirty_rmid);
- closid_num_dirty_rmid = NULL;
- }
-
- kfree(rmid_ptrs);
- rmid_ptrs = NULL;
-
-out_unlock:
- mutex_unlock(&rdtgroup_mutex);
-}
-
-static struct mon_evt llc_occupancy_event = {
- .name = "llc_occupancy",
- .evtid = QOS_L3_OCCUP_EVENT_ID,
-};
-
-static struct mon_evt mbm_total_event = {
- .name = "mbm_total_bytes",
- .evtid = QOS_L3_MBM_TOTAL_EVENT_ID,
-};
-
-static struct mon_evt mbm_local_event = {
- .name = "mbm_local_bytes",
- .evtid = QOS_L3_MBM_LOCAL_EVENT_ID,
-};
-
-/*
- * Initialize the event list for the resource.
- *
- * Note that MBM events are also part of RDT_RESOURCE_L3 resource
- * because as per the SDM the total and local memory bandwidth
- * are enumerated as part of L3 monitoring.
- */
-static void l3_mon_evt_init(struct rdt_resource *r)
-{
- INIT_LIST_HEAD(&r->evt_list);
-
- if (resctrl_arch_is_llc_occupancy_enabled())
- list_add_tail(&llc_occupancy_event.list, &r->evt_list);
- if (resctrl_arch_is_mbm_total_enabled())
- list_add_tail(&mbm_total_event.list, &r->evt_list);
- if (resctrl_arch_is_mbm_local_enabled())
- list_add_tail(&mbm_local_event.list, &r->evt_list);
-}
-
/*
* The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1
* which indicates that RMIDs are configured in legacy mode.
@@ -1192,51 +341,6 @@ static __init int snc_get_config(void)
return ret;
}
-/**
- * resctrl_mon_resource_init() - Initialise global monitoring structures.
- *
- * Allocate and initialise global monitor resources that do not belong to a
- * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists.
- * Called once during boot after the struct rdt_resource's have been configured
- * but before the filesystem is mounted.
- * Resctrl's cpuhp callbacks may be called before this point to bring a domain
- * online.
- *
- * Returns 0 for success, or -ENOMEM.
- */
-int __init resctrl_mon_resource_init(void)
-{
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- int ret;
-
- if (!r->mon_capable)
- return 0;
-
- ret = dom_data_init(r);
- if (ret)
- return ret;
-
- l3_mon_evt_init(r);
-
- if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
- mbm_total_event.configurable = true;
- resctrl_file_fflags_init("mbm_total_bytes_config",
- RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
- }
- if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
- mbm_local_event.configurable = true;
- resctrl_file_fflags_init("mbm_local_bytes_config",
- RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
- }
-
- if (resctrl_arch_is_mbm_local_enabled())
- mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
- else if (resctrl_arch_is_mbm_total_enabled())
- mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
-
- return 0;
-}
-
int __init rdt_get_mon_l3_config(struct rdt_resource *r)
{
unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
@@ -1284,13 +388,6 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
return 0;
}
-void resctrl_mon_resource_exit(void)
-{
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-
- dom_data_exit(r);
-}
-
void __init intel_rdt_mbm_apply_quirk(void)
{
int cf_index;
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 92ea1472bde9..de580eca3363 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -11,26 +11,22 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cacheflush.h>
#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/debugfs.h>
-#include <linux/kthread.h>
-#include <linux/mman.h>
#include <linux/perf_event.h>
#include <linux/pm_qos.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
+#include <linux/resctrl.h>
-#include <asm/cacheflush.h>
#include <asm/cpu_device_id.h>
-#include <asm/resctrl.h>
#include <asm/perf_event.h>
+#include <asm/msr.h>
#include "../../events/perf_event.h" /* For X86_CONFIG() */
#include "internal.h"
#define CREATE_TRACE_POINTS
-#include "trace.h"
+
+#include "pseudo_lock_trace.h"
/*
* The bits needed to disable hardware prefetching varies based on the
@@ -38,29 +34,6 @@
*/
static u64 prefetch_disable_bits;
-/*
- * Major number assigned to and shared by all devices exposing
- * pseudo-locked regions.
- */
-static unsigned int pseudo_lock_major;
-static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
-
-static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode)
-{
- const struct rdtgroup *rdtgrp;
-
- rdtgrp = dev_get_drvdata(dev);
- if (mode)
- *mode = 0600;
- guard(mutex)(&rdtgroup_mutex);
- return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdt_kn_name(rdtgrp->kn));
-}
-
-static const struct class pseudo_lock_class = {
- .name = "pseudo_lock",
- .devnode = pseudo_lock_devnode,
-};
-
/**
* resctrl_arch_get_prefetch_disable_bits - prefetch disable bits of supported
* platforms
@@ -122,298 +95,6 @@ u64 resctrl_arch_get_prefetch_disable_bits(void)
}
/**
- * pseudo_lock_minor_get - Obtain available minor number
- * @minor: Pointer to where new minor number will be stored
- *
- * A bitmask is used to track available minor numbers. Here the next free
- * minor number is marked as unavailable and returned.
- *
- * Return: 0 on success, <0 on failure.
- */
-static int pseudo_lock_minor_get(unsigned int *minor)
-{
- unsigned long first_bit;
-
- first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS);
-
- if (first_bit == MINORBITS)
- return -ENOSPC;
-
- __clear_bit(first_bit, &pseudo_lock_minor_avail);
- *minor = first_bit;
-
- return 0;
-}
-
-/**
- * pseudo_lock_minor_release - Return minor number to available
- * @minor: The minor number made available
- */
-static void pseudo_lock_minor_release(unsigned int minor)
-{
- __set_bit(minor, &pseudo_lock_minor_avail);
-}
-
-/**
- * region_find_by_minor - Locate a pseudo-lock region by inode minor number
- * @minor: The minor number of the device representing pseudo-locked region
- *
- * When the character device is accessed we need to determine which
- * pseudo-locked region it belongs to. This is done by matching the minor
- * number of the device to the pseudo-locked region it belongs.
- *
- * Minor numbers are assigned at the time a pseudo-locked region is associated
- * with a cache instance.
- *
- * Return: On success return pointer to resource group owning the pseudo-locked
- * region, NULL on failure.
- */
-static struct rdtgroup *region_find_by_minor(unsigned int minor)
-{
- struct rdtgroup *rdtgrp, *rdtgrp_match = NULL;
-
- list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
- if (rdtgrp->plr && rdtgrp->plr->minor == minor) {
- rdtgrp_match = rdtgrp;
- break;
- }
- }
- return rdtgrp_match;
-}
-
-/**
- * struct pseudo_lock_pm_req - A power management QoS request list entry
- * @list: Entry within the @pm_reqs list for a pseudo-locked region
- * @req: PM QoS request
- */
-struct pseudo_lock_pm_req {
- struct list_head list;
- struct dev_pm_qos_request req;
-};
-
-static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
-{
- struct pseudo_lock_pm_req *pm_req, *next;
-
- list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
- dev_pm_qos_remove_request(&pm_req->req);
- list_del(&pm_req->list);
- kfree(pm_req);
- }
-}
-
-/**
- * pseudo_lock_cstates_constrain - Restrict cores from entering C6
- * @plr: Pseudo-locked region
- *
- * To prevent the cache from being affected by power management entering
- * C6 has to be avoided. This is accomplished by requesting a latency
- * requirement lower than lowest C6 exit latency of all supported
- * platforms as found in the cpuidle state tables in the intel_idle driver.
- * At this time it is possible to do so with a single latency requirement
- * for all supported platforms.
- *
- * Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
- * the ACPI latencies need to be considered while keeping in mind that C2
- * may be set to map to deeper sleep states. In this case the latency
- * requirement needs to prevent entering C2 also.
- *
- * Return: 0 on success, <0 on failure
- */
-static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
-{
- struct pseudo_lock_pm_req *pm_req;
- int cpu;
- int ret;
-
- for_each_cpu(cpu, &plr->d->hdr.cpu_mask) {
- pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
- if (!pm_req) {
- rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
- ret = -ENOMEM;
- goto out_err;
- }
- ret = dev_pm_qos_add_request(get_cpu_device(cpu),
- &pm_req->req,
- DEV_PM_QOS_RESUME_LATENCY,
- 30);
- if (ret < 0) {
- rdt_last_cmd_printf("Failed to add latency req CPU%d\n",
- cpu);
- kfree(pm_req);
- ret = -1;
- goto out_err;
- }
- list_add(&pm_req->list, &plr->pm_reqs);
- }
-
- return 0;
-
-out_err:
- pseudo_lock_cstates_relax(plr);
- return ret;
-}
-
-/**
- * pseudo_lock_region_clear - Reset pseudo-lock region data
- * @plr: pseudo-lock region
- *
- * All content of the pseudo-locked region is reset - any memory allocated
- * freed.
- *
- * Return: void
- */
-static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
-{
- plr->size = 0;
- plr->line_size = 0;
- kfree(plr->kmem);
- plr->kmem = NULL;
- plr->s = NULL;
- if (plr->d)
- plr->d->plr = NULL;
- plr->d = NULL;
- plr->cbm = 0;
- plr->debugfs_dir = NULL;
-}
-
-/**
- * pseudo_lock_region_init - Initialize pseudo-lock region information
- * @plr: pseudo-lock region
- *
- * Called after user provided a schemata to be pseudo-locked. From the
- * schemata the &struct pseudo_lock_region is on entry already initialized
- * with the resource, domain, and capacity bitmask. Here the information
- * required for pseudo-locking is deduced from this data and &struct
- * pseudo_lock_region initialized further. This information includes:
- * - size in bytes of the region to be pseudo-locked
- * - cache line size to know the stride with which data needs to be accessed
- * to be pseudo-locked
- * - a cpu associated with the cache instance on which the pseudo-locking
- * flow can be executed
- *
- * Return: 0 on success, <0 on failure. Descriptive error will be written
- * to last_cmd_status buffer.
- */
-static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
-{
- enum resctrl_scope scope = plr->s->res->ctrl_scope;
- struct cacheinfo *ci;
- int ret;
-
- if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE))
- return -ENODEV;
-
- /* Pick the first cpu we find that is associated with the cache. */
- plr->cpu = cpumask_first(&plr->d->hdr.cpu_mask);
-
- if (!cpu_online(plr->cpu)) {
- rdt_last_cmd_printf("CPU %u associated with cache not online\n",
- plr->cpu);
- ret = -ENODEV;
- goto out_region;
- }
-
- ci = get_cpu_cacheinfo_level(plr->cpu, scope);
- if (ci) {
- plr->line_size = ci->coherency_line_size;
- plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
- return 0;
- }
-
- ret = -1;
- rdt_last_cmd_puts("Unable to determine cache line size\n");
-out_region:
- pseudo_lock_region_clear(plr);
- return ret;
-}
-
-/**
- * pseudo_lock_init - Initialize a pseudo-lock region
- * @rdtgrp: resource group to which new pseudo-locked region will belong
- *
- * A pseudo-locked region is associated with a resource group. When this
- * association is created the pseudo-locked region is initialized. The
- * details of the pseudo-locked region are not known at this time so only
- * allocation is done and association established.
- *
- * Return: 0 on success, <0 on failure
- */
-static int pseudo_lock_init(struct rdtgroup *rdtgrp)
-{
- struct pseudo_lock_region *plr;
-
- plr = kzalloc(sizeof(*plr), GFP_KERNEL);
- if (!plr)
- return -ENOMEM;
-
- init_waitqueue_head(&plr->lock_thread_wq);
- INIT_LIST_HEAD(&plr->pm_reqs);
- rdtgrp->plr = plr;
- return 0;
-}
-
-/**
- * pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked
- * @plr: pseudo-lock region
- *
- * Initialize the details required to set up the pseudo-locked region and
- * allocate the contiguous memory that will be pseudo-locked to the cache.
- *
- * Return: 0 on success, <0 on failure. Descriptive error will be written
- * to last_cmd_status buffer.
- */
-static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
-{
- int ret;
-
- ret = pseudo_lock_region_init(plr);
- if (ret < 0)
- return ret;
-
- /*
- * We do not yet support contiguous regions larger than
- * KMALLOC_MAX_SIZE.
- */
- if (plr->size > KMALLOC_MAX_SIZE) {
- rdt_last_cmd_puts("Requested region exceeds maximum size\n");
- ret = -E2BIG;
- goto out_region;
- }
-
- plr->kmem = kzalloc(plr->size, GFP_KERNEL);
- if (!plr->kmem) {
- rdt_last_cmd_puts("Unable to allocate memory\n");
- ret = -ENOMEM;
- goto out_region;
- }
-
- ret = 0;
- goto out;
-out_region:
- pseudo_lock_region_clear(plr);
-out:
- return ret;
-}
-
-/**
- * pseudo_lock_free - Free a pseudo-locked region
- * @rdtgrp: resource group to which pseudo-locked region belonged
- *
- * The pseudo-locked region's resources have already been released, or not
- * yet created at this point. Now it can be freed and disassociated from the
- * resource group.
- *
- * Return: void
- */
-static void pseudo_lock_free(struct rdtgroup *rdtgrp)
-{
- pseudo_lock_region_clear(rdtgrp->plr);
- kfree(rdtgrp->plr);
- rdtgrp->plr = NULL;
-}
-
-/**
* resctrl_arch_pseudo_lock_fn - Load kernel memory into cache
* @_plr: the pseudo-lock region descriptor
*
@@ -481,8 +162,8 @@ int resctrl_arch_pseudo_lock_fn(void *_plr)
* the buffer and evict pseudo-locked memory read earlier from the
* cache.
*/
- saved_msr = __rdmsr(MSR_MISC_FEATURE_CONTROL);
- __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+ saved_msr = native_rdmsrq(MSR_MISC_FEATURE_CONTROL);
+ native_wrmsrq(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits);
closid_p = this_cpu_read(pqr_state.cur_closid);
rmid_p = this_cpu_read(pqr_state.cur_rmid);
mem_r = plr->kmem;
@@ -494,7 +175,7 @@ int resctrl_arch_pseudo_lock_fn(void *_plr)
* pseudo-locked followed by reading of kernel memory to load it
* into the cache.
*/
- __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, plr->closid);
+ native_wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, plr->closid);
/*
* Cache was flushed earlier. Now access kernel memory to read it
@@ -531,10 +212,10 @@ int resctrl_arch_pseudo_lock_fn(void *_plr)
* Critical section end: restore closid with capacity bitmask that
* does not overlap with pseudo-locked region.
*/
- __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, closid_p);
+ native_wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, closid_p);
/* Re-enable the hardware prefetcher(s) */
- wrmsrl(MSR_MISC_FEATURE_CONTROL, saved_msr);
+ wrmsrq(MSR_MISC_FEATURE_CONTROL, saved_msr);
local_irq_enable();
plr->thread_done = 1;
@@ -543,340 +224,6 @@ int resctrl_arch_pseudo_lock_fn(void *_plr)
}
/**
- * rdtgroup_monitor_in_progress - Test if monitoring in progress
- * @rdtgrp: resource group being queried
- *
- * Return: 1 if monitor groups have been created for this resource
- * group, 0 otherwise.
- */
-static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp)
-{
- return !list_empty(&rdtgrp->mon.crdtgrp_list);
-}
-
-/**
- * rdtgroup_locksetup_user_restrict - Restrict user access to group
- * @rdtgrp: resource group needing access restricted
- *
- * A resource group used for cache pseudo-locking cannot have cpus or tasks
- * assigned to it. This is communicated to the user by restricting access
- * to all the files that can be used to make such changes.
- *
- * Permissions restored with rdtgroup_locksetup_user_restore()
- *
- * Return: 0 on success, <0 on failure. If a failure occurs during the
- * restriction of access an attempt will be made to restore permissions but
- * the state of the mode of these files will be uncertain when a failure
- * occurs.
- */
-static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
-{
- int ret;
-
- ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
- if (ret)
- return ret;
-
- ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
- if (ret)
- goto err_tasks;
-
- ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
- if (ret)
- goto err_cpus;
-
- if (resctrl_arch_mon_capable()) {
- ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups");
- if (ret)
- goto err_cpus_list;
- }
-
- ret = 0;
- goto out;
-
-err_cpus_list:
- rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
-err_cpus:
- rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
-err_tasks:
- rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
-out:
- return ret;
-}
-
-/**
- * rdtgroup_locksetup_user_restore - Restore user access to group
- * @rdtgrp: resource group needing access restored
- *
- * Restore all file access previously removed using
- * rdtgroup_locksetup_user_restrict()
- *
- * Return: 0 on success, <0 on failure. If a failure occurs during the
- * restoration of access an attempt will be made to restrict permissions
- * again but the state of the mode of these files will be uncertain when
- * a failure occurs.
- */
-static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
-{
- int ret;
-
- ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
- if (ret)
- return ret;
-
- ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
- if (ret)
- goto err_tasks;
-
- ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
- if (ret)
- goto err_cpus;
-
- if (resctrl_arch_mon_capable()) {
- ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777);
- if (ret)
- goto err_cpus_list;
- }
-
- ret = 0;
- goto out;
-
-err_cpus_list:
- rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
-err_cpus:
- rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
-err_tasks:
- rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
-out:
- return ret;
-}
-
-/**
- * rdtgroup_locksetup_enter - Resource group enters locksetup mode
- * @rdtgrp: resource group requested to enter locksetup mode
- *
- * A resource group enters locksetup mode to reflect that it would be used
- * to represent a pseudo-locked region and is in the process of being set
- * up to do so. A resource group used for a pseudo-locked region would
- * lose the closid associated with it so we cannot allow it to have any
- * tasks or cpus assigned nor permit tasks or cpus to be assigned in the
- * future. Monitoring of a pseudo-locked region is not allowed either.
- *
- * The above and more restrictions on a pseudo-locked region are checked
- * for and enforced before the resource group enters the locksetup mode.
- *
- * Returns: 0 if the resource group successfully entered locksetup mode, <0
- * on failure. On failure the last_cmd_status buffer is updated with text to
- * communicate details of failure to the user.
- */
-int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
-{
- int ret;
-
- /*
- * The default resource group can neither be removed nor lose the
- * default closid associated with it.
- */
- if (rdtgrp == &rdtgroup_default) {
- rdt_last_cmd_puts("Cannot pseudo-lock default group\n");
- return -EINVAL;
- }
-
- /*
- * Cache Pseudo-locking not supported when CDP is enabled.
- *
- * Some things to consider if you would like to enable this
- * support (using L3 CDP as example):
- * - When CDP is enabled two separate resources are exposed,
- * L3DATA and L3CODE, but they are actually on the same cache.
- * The implication for pseudo-locking is that if a
- * pseudo-locked region is created on a domain of one
- * resource (eg. L3CODE), then a pseudo-locked region cannot
- * be created on that same domain of the other resource
- * (eg. L3DATA). This is because the creation of a
- * pseudo-locked region involves a call to wbinvd that will
- * affect all cache allocations on particular domain.
- * - Considering the previous, it may be possible to only
- * expose one of the CDP resources to pseudo-locking and
- * hide the other. For example, we could consider to only
- * expose L3DATA and since the L3 cache is unified it is
- * still possible to place instructions there are execute it.
- * - If only one region is exposed to pseudo-locking we should
- * still keep in mind that availability of a portion of cache
- * for pseudo-locking should take into account both resources.
- * Similarly, if a pseudo-locked region is created in one
- * resource, the portion of cache used by it should be made
- * unavailable to all future allocations from both resources.
- */
- if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) ||
- resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) {
- rdt_last_cmd_puts("CDP enabled\n");
- return -EINVAL;
- }
-
- /*
- * Not knowing the bits to disable prefetching implies that this
- * platform does not support Cache Pseudo-Locking.
- */
- if (resctrl_arch_get_prefetch_disable_bits() == 0) {
- rdt_last_cmd_puts("Pseudo-locking not supported\n");
- return -EINVAL;
- }
-
- if (rdtgroup_monitor_in_progress(rdtgrp)) {
- rdt_last_cmd_puts("Monitoring in progress\n");
- return -EINVAL;
- }
-
- if (rdtgroup_tasks_assigned(rdtgrp)) {
- rdt_last_cmd_puts("Tasks assigned to resource group\n");
- return -EINVAL;
- }
-
- if (!cpumask_empty(&rdtgrp->cpu_mask)) {
- rdt_last_cmd_puts("CPUs assigned to resource group\n");
- return -EINVAL;
- }
-
- if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
- rdt_last_cmd_puts("Unable to modify resctrl permissions\n");
- return -EIO;
- }
-
- ret = pseudo_lock_init(rdtgrp);
- if (ret) {
- rdt_last_cmd_puts("Unable to init pseudo-lock region\n");
- goto out_release;
- }
-
- /*
- * If this system is capable of monitoring a rmid would have been
- * allocated when the control group was created. This is not needed
- * anymore when this group would be used for pseudo-locking. This
- * is safe to call on platforms not capable of monitoring.
- */
- free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
-
- ret = 0;
- goto out;
-
-out_release:
- rdtgroup_locksetup_user_restore(rdtgrp);
-out:
- return ret;
-}
-
-/**
- * rdtgroup_locksetup_exit - resource group exist locksetup mode
- * @rdtgrp: resource group
- *
- * When a resource group exits locksetup mode the earlier restrictions are
- * lifted.
- *
- * Return: 0 on success, <0 on failure
- */
-int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
-{
- int ret;
-
- if (resctrl_arch_mon_capable()) {
- ret = alloc_rmid(rdtgrp->closid);
- if (ret < 0) {
- rdt_last_cmd_puts("Out of RMIDs\n");
- return ret;
- }
- rdtgrp->mon.rmid = ret;
- }
-
- ret = rdtgroup_locksetup_user_restore(rdtgrp);
- if (ret) {
- free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
- return ret;
- }
-
- pseudo_lock_free(rdtgrp);
- return 0;
-}
-
-/**
- * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
- * @d: RDT domain
- * @cbm: CBM to test
- *
- * @d represents a cache instance and @cbm a capacity bitmask that is
- * considered for it. Determine if @cbm overlaps with any existing
- * pseudo-locked region on @d.
- *
- * @cbm is unsigned long, even if only 32 bits are used, to make the
- * bitmap functions work correctly.
- *
- * Return: true if @cbm overlaps with pseudo-locked region on @d, false
- * otherwise.
- */
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm)
-{
- unsigned int cbm_len;
- unsigned long cbm_b;
-
- if (d->plr) {
- cbm_len = d->plr->s->res->cache.cbm_len;
- cbm_b = d->plr->cbm;
- if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
- return true;
- }
- return false;
-}
-
-/**
- * rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy
- * @d: RDT domain under test
- *
- * The setup of a pseudo-locked region affects all cache instances within
- * the hierarchy of the region. It is thus essential to know if any
- * pseudo-locked regions exist within a cache hierarchy to prevent any
- * attempts to create new pseudo-locked regions in the same hierarchy.
- *
- * Return: true if a pseudo-locked region exists in the hierarchy of @d or
- * if it is not possible to test due to memory allocation issue,
- * false otherwise.
- */
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
-{
- struct rdt_ctrl_domain *d_i;
- cpumask_var_t cpu_with_psl;
- struct rdt_resource *r;
- bool ret = false;
-
- /* Walking r->domains, ensure it can't race with cpuhp */
- lockdep_assert_cpus_held();
-
- if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
- return true;
-
- /*
- * First determine which cpus have pseudo-locked regions
- * associated with them.
- */
- for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(d_i, &r->ctrl_domains, hdr.list) {
- if (d_i->plr)
- cpumask_or(cpu_with_psl, cpu_with_psl,
- &d_i->hdr.cpu_mask);
- }
- }
-
- /*
- * Next test if new pseudo-locked region would intersect with
- * existing region.
- */
- if (cpumask_intersects(&d->hdr.cpu_mask, cpu_with_psl))
- ret = true;
-
- free_cpumask_var(cpu_with_psl);
- return ret;
-}
-
-/**
* resctrl_arch_measure_cycles_lat_fn - Measure cycle latency to read
* pseudo-locked memory
* @_plr: pseudo-lock region to measure
@@ -904,7 +251,7 @@ int resctrl_arch_measure_cycles_lat_fn(void *_plr)
* Disable hardware prefetchers.
*/
rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high);
- wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+ wrmsrq(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits);
mem_r = READ_ONCE(plr->kmem);
/*
* Dummy execute of the time measurement to load the needed
@@ -1000,7 +347,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
* Disable hardware prefetchers.
*/
rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high);
- wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+ wrmsrq(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits);
/* Initialize rest of local variables */
/*
@@ -1018,8 +365,8 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
* used in L1 cache, second to capture accurate value that does not
* include cache misses incurred because of instruction loads.
*/
- rdpmcl(hit_pmcnum, hits_before);
- rdpmcl(miss_pmcnum, miss_before);
+ hits_before = rdpmc(hit_pmcnum);
+ miss_before = rdpmc(miss_pmcnum);
/*
* From SDM: Performing back-to-back fast reads are not guaranteed
* to be monotonic.
@@ -1027,8 +374,8 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
* before proceeding.
*/
rmb();
- rdpmcl(hit_pmcnum, hits_before);
- rdpmcl(miss_pmcnum, miss_before);
+ hits_before = rdpmc(hit_pmcnum);
+ miss_before = rdpmc(miss_pmcnum);
/*
* Use LFENCE to ensure all previous instructions are retired
* before proceeding.
@@ -1050,8 +397,8 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
* before proceeding.
*/
rmb();
- rdpmcl(hit_pmcnum, hits_after);
- rdpmcl(miss_pmcnum, miss_after);
+ hits_after = rdpmc(hit_pmcnum);
+ miss_after = rdpmc(miss_pmcnum);
/*
* Use LFENCE to ensure all previous instructions are retired
* before proceeding.
@@ -1168,433 +515,3 @@ out:
wake_up_interruptible(&plr->lock_thread_wq);
return 0;
}
-
-/**
- * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
- * @rdtgrp: Resource group to which the pseudo-locked region belongs.
- * @sel: Selector of which measurement to perform on a pseudo-locked region.
- *
- * The measurement of latency to access a pseudo-locked region should be
- * done from a cpu that is associated with that pseudo-locked region.
- * Determine which cpu is associated with this region and start a thread on
- * that cpu to perform the measurement, wait for that thread to complete.
- *
- * Return: 0 on success, <0 on failure
- */
-static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
-{
- struct pseudo_lock_region *plr = rdtgrp->plr;
- struct task_struct *thread;
- unsigned int cpu;
- int ret = -1;
-
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
-
- if (rdtgrp->flags & RDT_DELETED) {
- ret = -ENODEV;
- goto out;
- }
-
- if (!plr->d) {
- ret = -ENODEV;
- goto out;
- }
-
- plr->thread_done = 0;
- cpu = cpumask_first(&plr->d->hdr.cpu_mask);
- if (!cpu_online(cpu)) {
- ret = -ENODEV;
- goto out;
- }
-
- plr->cpu = cpu;
-
- if (sel == 1)
- thread = kthread_run_on_cpu(resctrl_arch_measure_cycles_lat_fn,
- plr, cpu, "pseudo_lock_measure/%u");
- else if (sel == 2)
- thread = kthread_run_on_cpu(resctrl_arch_measure_l2_residency,
- plr, cpu, "pseudo_lock_measure/%u");
- else if (sel == 3)
- thread = kthread_run_on_cpu(resctrl_arch_measure_l3_residency,
- plr, cpu, "pseudo_lock_measure/%u");
- else
- goto out;
-
- if (IS_ERR(thread)) {
- ret = PTR_ERR(thread);
- goto out;
- }
-
- ret = wait_event_interruptible(plr->lock_thread_wq,
- plr->thread_done == 1);
- if (ret < 0)
- goto out;
-
- ret = 0;
-
-out:
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
- return ret;
-}
-
-static ssize_t pseudo_lock_measure_trigger(struct file *file,
- const char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct rdtgroup *rdtgrp = file->private_data;
- size_t buf_size;
- char buf[32];
- int ret;
- int sel;
-
- buf_size = min(count, (sizeof(buf) - 1));
- if (copy_from_user(buf, user_buf, buf_size))
- return -EFAULT;
-
- buf[buf_size] = '\0';
- ret = kstrtoint(buf, 10, &sel);
- if (ret == 0) {
- if (sel != 1 && sel != 2 && sel != 3)
- return -EINVAL;
- ret = debugfs_file_get(file->f_path.dentry);
- if (ret)
- return ret;
- ret = pseudo_lock_measure_cycles(rdtgrp, sel);
- if (ret == 0)
- ret = count;
- debugfs_file_put(file->f_path.dentry);
- }
-
- return ret;
-}
-
-static const struct file_operations pseudo_measure_fops = {
- .write = pseudo_lock_measure_trigger,
- .open = simple_open,
- .llseek = default_llseek,
-};
-
-/**
- * rdtgroup_pseudo_lock_create - Create a pseudo-locked region
- * @rdtgrp: resource group to which pseudo-lock region belongs
- *
- * Called when a resource group in the pseudo-locksetup mode receives a
- * valid schemata that should be pseudo-locked. Since the resource group is
- * in pseudo-locksetup mode the &struct pseudo_lock_region has already been
- * allocated and initialized with the essential information. If a failure
- * occurs the resource group remains in the pseudo-locksetup mode with the
- * &struct pseudo_lock_region associated with it, but cleared from all
- * information and ready for the user to re-attempt pseudo-locking by
- * writing the schemata again.
- *
- * Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0
- * on failure. Descriptive error will be written to last_cmd_status buffer.
- */
-int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
-{
- struct pseudo_lock_region *plr = rdtgrp->plr;
- struct task_struct *thread;
- unsigned int new_minor;
- struct device *dev;
- char *kn_name __free(kfree) = NULL;
- int ret;
-
- ret = pseudo_lock_region_alloc(plr);
- if (ret < 0)
- return ret;
-
- ret = pseudo_lock_cstates_constrain(plr);
- if (ret < 0) {
- ret = -EINVAL;
- goto out_region;
- }
- kn_name = kstrdup(rdt_kn_name(rdtgrp->kn), GFP_KERNEL);
- if (!kn_name) {
- ret = -ENOMEM;
- goto out_cstates;
- }
-
- plr->thread_done = 0;
-
- thread = kthread_run_on_cpu(resctrl_arch_pseudo_lock_fn, plr,
- plr->cpu, "pseudo_lock/%u");
- if (IS_ERR(thread)) {
- ret = PTR_ERR(thread);
- rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
- goto out_cstates;
- }
-
- ret = wait_event_interruptible(plr->lock_thread_wq,
- plr->thread_done == 1);
- if (ret < 0) {
- /*
- * If the thread does not get on the CPU for whatever
- * reason and the process which sets up the region is
- * interrupted then this will leave the thread in runnable
- * state and once it gets on the CPU it will dereference
- * the cleared, but not freed, plr struct resulting in an
- * empty pseudo-locking loop.
- */
- rdt_last_cmd_puts("Locking thread interrupted\n");
- goto out_cstates;
- }
-
- ret = pseudo_lock_minor_get(&new_minor);
- if (ret < 0) {
- rdt_last_cmd_puts("Unable to obtain a new minor number\n");
- goto out_cstates;
- }
-
- /*
- * Unlock access but do not release the reference. The
- * pseudo-locked region will still be here on return.
- *
- * The mutex has to be released temporarily to avoid a potential
- * deadlock with the mm->mmap_lock which is obtained in the
- * device_create() and debugfs_create_dir() callpath below as well as
- * before the mmap() callback is called.
- */
- mutex_unlock(&rdtgroup_mutex);
-
- if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
- plr->debugfs_dir = debugfs_create_dir(kn_name, debugfs_resctrl);
- if (!IS_ERR_OR_NULL(plr->debugfs_dir))
- debugfs_create_file("pseudo_lock_measure", 0200,
- plr->debugfs_dir, rdtgrp,
- &pseudo_measure_fops);
- }
-
- dev = device_create(&pseudo_lock_class, NULL,
- MKDEV(pseudo_lock_major, new_minor),
- rdtgrp, "%s", kn_name);
-
- mutex_lock(&rdtgroup_mutex);
-
- if (IS_ERR(dev)) {
- ret = PTR_ERR(dev);
- rdt_last_cmd_printf("Failed to create character device: %d\n",
- ret);
- goto out_debugfs;
- }
-
- /* We released the mutex - check if group was removed while we did so */
- if (rdtgrp->flags & RDT_DELETED) {
- ret = -ENODEV;
- goto out_device;
- }
-
- plr->minor = new_minor;
-
- rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
- closid_free(rdtgrp->closid);
- rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444);
- rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444);
-
- ret = 0;
- goto out;
-
-out_device:
- device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
-out_debugfs:
- debugfs_remove_recursive(plr->debugfs_dir);
- pseudo_lock_minor_release(new_minor);
-out_cstates:
- pseudo_lock_cstates_relax(plr);
-out_region:
- pseudo_lock_region_clear(plr);
-out:
- return ret;
-}
-
-/**
- * rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region
- * @rdtgrp: resource group to which the pseudo-locked region belongs
- *
- * The removal of a pseudo-locked region can be initiated when the resource
- * group is removed from user space via a "rmdir" from userspace or the
- * unmount of the resctrl filesystem. On removal the resource group does
- * not go back to pseudo-locksetup mode before it is removed, instead it is
- * removed directly. There is thus asymmetry with the creation where the
- * &struct pseudo_lock_region is removed here while it was not created in
- * rdtgroup_pseudo_lock_create().
- *
- * Return: void
- */
-void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
-{
- struct pseudo_lock_region *plr = rdtgrp->plr;
-
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- /*
- * Default group cannot be a pseudo-locked region so we can
- * free closid here.
- */
- closid_free(rdtgrp->closid);
- goto free;
- }
-
- pseudo_lock_cstates_relax(plr);
- debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
- device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
- pseudo_lock_minor_release(plr->minor);
-
-free:
- pseudo_lock_free(rdtgrp);
-}
-
-static int pseudo_lock_dev_open(struct inode *inode, struct file *filp)
-{
- struct rdtgroup *rdtgrp;
-
- mutex_lock(&rdtgroup_mutex);
-
- rdtgrp = region_find_by_minor(iminor(inode));
- if (!rdtgrp) {
- mutex_unlock(&rdtgroup_mutex);
- return -ENODEV;
- }
-
- filp->private_data = rdtgrp;
- atomic_inc(&rdtgrp->waitcount);
- /* Perform a non-seekable open - llseek is not supported */
- filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
-
- mutex_unlock(&rdtgroup_mutex);
-
- return 0;
-}
-
-static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
-{
- struct rdtgroup *rdtgrp;
-
- mutex_lock(&rdtgroup_mutex);
- rdtgrp = filp->private_data;
- WARN_ON(!rdtgrp);
- if (!rdtgrp) {
- mutex_unlock(&rdtgroup_mutex);
- return -ENODEV;
- }
- filp->private_data = NULL;
- atomic_dec(&rdtgrp->waitcount);
- mutex_unlock(&rdtgroup_mutex);
- return 0;
-}
-
-static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
-{
- /* Not supported */
- return -EINVAL;
-}
-
-static const struct vm_operations_struct pseudo_mmap_ops = {
- .mremap = pseudo_lock_dev_mremap,
-};
-
-static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
-{
- unsigned long vsize = vma->vm_end - vma->vm_start;
- unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
- struct pseudo_lock_region *plr;
- struct rdtgroup *rdtgrp;
- unsigned long physical;
- unsigned long psize;
-
- mutex_lock(&rdtgroup_mutex);
-
- rdtgrp = filp->private_data;
- WARN_ON(!rdtgrp);
- if (!rdtgrp) {
- mutex_unlock(&rdtgroup_mutex);
- return -ENODEV;
- }
-
- plr = rdtgrp->plr;
-
- if (!plr->d) {
- mutex_unlock(&rdtgroup_mutex);
- return -ENODEV;
- }
-
- /*
- * Task is required to run with affinity to the cpus associated
- * with the pseudo-locked region. If this is not the case the task
- * may be scheduled elsewhere and invalidate entries in the
- * pseudo-locked region.
- */
- if (!cpumask_subset(current->cpus_ptr, &plr->d->hdr.cpu_mask)) {
- mutex_unlock(&rdtgroup_mutex);
- return -EINVAL;
- }
-
- physical = __pa(plr->kmem) >> PAGE_SHIFT;
- psize = plr->size - off;
-
- if (off > plr->size) {
- mutex_unlock(&rdtgroup_mutex);
- return -ENOSPC;
- }
-
- /*
- * Ensure changes are carried directly to the memory being mapped,
- * do not allow copy-on-write mapping.
- */
- if (!(vma->vm_flags & VM_SHARED)) {
- mutex_unlock(&rdtgroup_mutex);
- return -EINVAL;
- }
-
- if (vsize > psize) {
- mutex_unlock(&rdtgroup_mutex);
- return -ENOSPC;
- }
-
- memset(plr->kmem + off, 0, vsize);
-
- if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
- vsize, vma->vm_page_prot)) {
- mutex_unlock(&rdtgroup_mutex);
- return -EAGAIN;
- }
- vma->vm_ops = &pseudo_mmap_ops;
- mutex_unlock(&rdtgroup_mutex);
- return 0;
-}
-
-static const struct file_operations pseudo_lock_dev_fops = {
- .owner = THIS_MODULE,
- .read = NULL,
- .write = NULL,
- .open = pseudo_lock_dev_open,
- .release = pseudo_lock_dev_release,
- .mmap = pseudo_lock_dev_mmap,
-};
-
-int rdt_pseudo_lock_init(void)
-{
- int ret;
-
- ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops);
- if (ret < 0)
- return ret;
-
- pseudo_lock_major = ret;
-
- ret = class_register(&pseudo_lock_class);
- if (ret) {
- unregister_chrdev(pseudo_lock_major, "pseudo_lock");
- return ret;
- }
-
- return 0;
-}
-
-void rdt_pseudo_lock_release(void)
-{
- class_unregister(&pseudo_lock_class);
- unregister_chrdev(pseudo_lock_major, "pseudo_lock");
- pseudo_lock_major = 0;
-}
diff --git a/arch/x86/kernel/cpu/resctrl/trace.h b/arch/x86/kernel/cpu/resctrl/pseudo_lock_trace.h
index 2a506316b303..7c8aef08010f 100644
--- a/arch/x86/kernel/cpu/resctrl/trace.h
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock_trace.h
@@ -2,8 +2,8 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM resctrl
-#if !defined(_TRACE_RESCTRL_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_RESCTRL_H
+#if !defined(_X86_RESCTRL_PSEUDO_LOCK_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _X86_RESCTRL_PSEUDO_LOCK_TRACE_H
#include <linux/tracepoint.h>
@@ -35,25 +35,11 @@ TRACE_EVENT(pseudo_lock_l3,
TP_printk("hits=%llu miss=%llu",
__entry->l3_hits, __entry->l3_miss));
-TRACE_EVENT(mon_llc_occupancy_limbo,
- TP_PROTO(u32 ctrl_hw_id, u32 mon_hw_id, int domain_id, u64 llc_occupancy_bytes),
- TP_ARGS(ctrl_hw_id, mon_hw_id, domain_id, llc_occupancy_bytes),
- TP_STRUCT__entry(__field(u32, ctrl_hw_id)
- __field(u32, mon_hw_id)
- __field(int, domain_id)
- __field(u64, llc_occupancy_bytes)),
- TP_fast_assign(__entry->ctrl_hw_id = ctrl_hw_id;
- __entry->mon_hw_id = mon_hw_id;
- __entry->domain_id = domain_id;
- __entry->llc_occupancy_bytes = llc_occupancy_bytes;),
- TP_printk("ctrl_hw_id=%u mon_hw_id=%u domain_id=%d llc_occupancy_bytes=%llu",
- __entry->ctrl_hw_id, __entry->mon_hw_id, __entry->domain_id,
- __entry->llc_occupancy_bytes)
- );
-
-#endif /* _TRACE_RESCTRL_H */
+#endif /* _X86_RESCTRL_PSEUDO_LOCK_TRACE_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace
+
+#define TRACE_INCLUDE_FILE pseudo_lock_trace
+
#include <trace/define_trace.h>
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index cc4a54145c83..885026468440 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -18,6 +18,7 @@
#include <linux/fs_parser.h>
#include <linux/sysfs.h>
#include <linux/kernfs.h>
+#include <linux/resctrl.h>
#include <linux/seq_buf.h>
#include <linux/seq_file.h>
#include <linux/sched/signal.h>
@@ -28,341 +29,17 @@
#include <uapi/linux/magic.h>
-#include <asm/resctrl.h>
+#include <asm/msr.h>
#include "internal.h"
DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
-DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
-DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
-
-/* Mutex to protect rdtgroup access. */
-DEFINE_MUTEX(rdtgroup_mutex);
-
-static struct kernfs_root *rdt_root;
-struct rdtgroup rdtgroup_default;
-LIST_HEAD(rdt_all_groups);
-
-/* list of entries for the schemata file */
-LIST_HEAD(resctrl_schema_all);
-
-/* The filesystem can only be mounted once. */
-bool resctrl_mounted;
-
-/* Kernel fs node for "info" directory under root */
-static struct kernfs_node *kn_info;
-
-/* Kernel fs node for "mon_groups" directory under root */
-static struct kernfs_node *kn_mongrp;
-
-/* Kernel fs node for "mon_data" directory under root */
-static struct kernfs_node *kn_mondata;
-
-/*
- * Used to store the max resource name width to display the schemata names in
- * a tabular format.
- */
-int max_name_width;
-
-static struct seq_buf last_cmd_status;
-static char last_cmd_status_buf[512];
-
-static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
-static void rdtgroup_destroy_root(void);
-
-struct dentry *debugfs_resctrl;
-
-/*
- * Memory bandwidth monitoring event to use for the default CTRL_MON group
- * and each new CTRL_MON group created by the user. Only relevant when
- * the filesystem is mounted with the "mba_MBps" option so it does not
- * matter that it remains uninitialized on systems that do not support
- * the "mba_MBps" option.
- */
-enum resctrl_event_id mba_mbps_default_event;
-
-static bool resctrl_debug;
-
-void rdt_last_cmd_clear(void)
-{
- lockdep_assert_held(&rdtgroup_mutex);
- seq_buf_clear(&last_cmd_status);
-}
-
-void rdt_last_cmd_puts(const char *s)
-{
- lockdep_assert_held(&rdtgroup_mutex);
- seq_buf_puts(&last_cmd_status, s);
-}
-
-void rdt_last_cmd_printf(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- lockdep_assert_held(&rdtgroup_mutex);
- seq_buf_vprintf(&last_cmd_status, fmt, ap);
- va_end(ap);
-}
-
-void rdt_staged_configs_clear(void)
-{
- struct rdt_ctrl_domain *dom;
- struct rdt_resource *r;
-
- lockdep_assert_held(&rdtgroup_mutex);
-
- for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
- memset(dom->staged_config, 0, sizeof(dom->staged_config));
- }
-}
-
-static bool resctrl_is_mbm_enabled(void)
-{
- return (resctrl_arch_is_mbm_total_enabled() ||
- resctrl_arch_is_mbm_local_enabled());
-}
-
-static bool resctrl_is_mbm_event(int e)
-{
- return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
- e <= QOS_L3_MBM_LOCAL_EVENT_ID);
-}
-
-/*
- * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
- * we can keep a bitmap of free CLOSIDs in a single integer.
- *
- * Using a global CLOSID across all resources has some advantages and
- * some drawbacks:
- * + We can simply set current's closid to assign a task to a resource
- * group.
- * + Context switch code can avoid extra memory references deciding which
- * CLOSID to load into the PQR_ASSOC MSR
- * - We give up some options in configuring resource groups across multi-socket
- * systems.
- * - Our choices on how to configure each resource become progressively more
- * limited as the number of resources grows.
- */
-static unsigned long closid_free_map;
-static int closid_free_map_len;
-
-int closids_supported(void)
-{
- return closid_free_map_len;
-}
-
-static void closid_init(void)
-{
- struct resctrl_schema *s;
- u32 rdt_min_closid = 32;
-
- /* Compute rdt_min_closid across all resources */
- list_for_each_entry(s, &resctrl_schema_all, list)
- rdt_min_closid = min(rdt_min_closid, s->num_closid);
-
- closid_free_map = BIT_MASK(rdt_min_closid) - 1;
-
- /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
- __clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map);
- closid_free_map_len = rdt_min_closid;
-}
-
-static int closid_alloc(void)
-{
- int cleanest_closid;
- u32 closid;
-
- lockdep_assert_held(&rdtgroup_mutex);
-
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
- resctrl_arch_is_llc_occupancy_enabled()) {
- cleanest_closid = resctrl_find_cleanest_closid();
- if (cleanest_closid < 0)
- return cleanest_closid;
- closid = cleanest_closid;
- } else {
- closid = ffs(closid_free_map);
- if (closid == 0)
- return -ENOSPC;
- closid--;
- }
- __clear_bit(closid, &closid_free_map);
-
- return closid;
-}
-
-void closid_free(int closid)
-{
- lockdep_assert_held(&rdtgroup_mutex);
-
- __set_bit(closid, &closid_free_map);
-}
-
-/**
- * closid_allocated - test if provided closid is in use
- * @closid: closid to be tested
- *
- * Return: true if @closid is currently associated with a resource group,
- * false if @closid is free
- */
-bool closid_allocated(unsigned int closid)
-{
- lockdep_assert_held(&rdtgroup_mutex);
-
- return !test_bit(closid, &closid_free_map);
-}
-
-/**
- * rdtgroup_mode_by_closid - Return mode of resource group with closid
- * @closid: closid if the resource group
- *
- * Each resource group is associated with a @closid. Here the mode
- * of a resource group can be queried by searching for it using its closid.
- *
- * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
- */
-enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
-{
- struct rdtgroup *rdtgrp;
-
- list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
- if (rdtgrp->closid == closid)
- return rdtgrp->mode;
- }
-
- return RDT_NUM_MODES;
-}
-
-static const char * const rdt_mode_str[] = {
- [RDT_MODE_SHAREABLE] = "shareable",
- [RDT_MODE_EXCLUSIVE] = "exclusive",
- [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup",
- [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked",
-};
-
-/**
- * rdtgroup_mode_str - Return the string representation of mode
- * @mode: the resource group mode as &enum rdtgroup_mode
- *
- * Return: string representation of valid mode, "unknown" otherwise
- */
-static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
-{
- if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
- return "unknown";
-
- return rdt_mode_str[mode];
-}
-/* set uid and gid of rdtgroup dirs and files to that of the creator */
-static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
-{
- struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
- .ia_uid = current_fsuid(),
- .ia_gid = current_fsgid(), };
-
- if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
- gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
- return 0;
-
- return kernfs_setattr(kn, &iattr);
-}
-
-static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
-{
- struct kernfs_node *kn;
- int ret;
-
- kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
- GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
- 0, rft->kf_ops, rft, NULL, NULL);
- if (IS_ERR(kn))
- return PTR_ERR(kn);
-
- ret = rdtgroup_kn_set_ugid(kn);
- if (ret) {
- kernfs_remove(kn);
- return ret;
- }
-
- return 0;
-}
-
-static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
-{
- struct kernfs_open_file *of = m->private;
- struct rftype *rft = of->kn->priv;
-
- if (rft->seq_show)
- return rft->seq_show(of, m, arg);
- return 0;
-}
-
-static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
- size_t nbytes, loff_t off)
-{
- struct rftype *rft = of->kn->priv;
-
- if (rft->write)
- return rft->write(of, buf, nbytes, off);
-
- return -EINVAL;
-}
-
-static const struct kernfs_ops rdtgroup_kf_single_ops = {
- .atomic_write_len = PAGE_SIZE,
- .write = rdtgroup_file_write,
- .seq_show = rdtgroup_seqfile_show,
-};
-
-static const struct kernfs_ops kf_mondata_ops = {
- .atomic_write_len = PAGE_SIZE,
- .seq_show = rdtgroup_mondata_show,
-};
-
-static bool is_cpu_list(struct kernfs_open_file *of)
-{
- struct rftype *rft = of->kn->priv;
-
- return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
-}
-
-static int rdtgroup_cpus_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
-{
- struct rdtgroup *rdtgrp;
- struct cpumask *mask;
- int ret = 0;
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
-
- if (rdtgrp) {
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- if (!rdtgrp->plr->d) {
- rdt_last_cmd_clear();
- rdt_last_cmd_puts("Cache domain offline\n");
- ret = -ENODEV;
- } else {
- mask = &rdtgrp->plr->d->hdr.cpu_mask;
- seq_printf(s, is_cpu_list(of) ?
- "%*pbl\n" : "%*pb\n",
- cpumask_pr_args(mask));
- }
- } else {
- seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
- cpumask_pr_args(&rdtgrp->cpu_mask));
- }
- } else {
- ret = -ENOENT;
- }
- rdtgroup_kn_unlock(of->kn);
+DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
- return ret;
-}
+DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
/*
- * This is safe against resctrl_sched_in() called from __switch_to()
+ * This is safe against resctrl_arch_sched_in() called from __switch_to()
* because __switch_to() is executed with interrupts disabled. A local call
* from update_closid_rmid() is protected against __switch_to() because
* preemption is disabled.
@@ -381,1223 +58,7 @@ void resctrl_arch_sync_cpu_closid_rmid(void *info)
* executing task might have its own closid selected. Just reuse
* the context switch code.
*/
- resctrl_sched_in(current);
-}
-
-/*
- * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
- *
- * Per task closids/rmids must have been set up before calling this function.
- * @r may be NULL.
- */
-static void
-update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
-{
- struct resctrl_cpu_defaults defaults, *p = NULL;
-
- if (r) {
- defaults.closid = r->closid;
- defaults.rmid = r->mon.rmid;
- p = &defaults;
- }
-
- on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_closid_rmid, p, 1);
-}
-
-static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
- cpumask_var_t tmpmask)
-{
- struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
- struct list_head *head;
-
- /* Check whether cpus belong to parent ctrl group */
- cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
- if (!cpumask_empty(tmpmask)) {
- rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
- return -EINVAL;
- }
-
- /* Check whether cpus are dropped from this group */
- cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
- if (!cpumask_empty(tmpmask)) {
- /* Give any dropped cpus to parent rdtgroup */
- cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
- update_closid_rmid(tmpmask, prgrp);
- }
-
- /*
- * If we added cpus, remove them from previous group that owned them
- * and update per-cpu rmid
- */
- cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
- if (!cpumask_empty(tmpmask)) {
- head = &prgrp->mon.crdtgrp_list;
- list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
- if (crgrp == rdtgrp)
- continue;
- cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
- tmpmask);
- }
- update_closid_rmid(tmpmask, rdtgrp);
- }
-
- /* Done pushing/pulling - update this group with new mask */
- cpumask_copy(&rdtgrp->cpu_mask, newmask);
-
- return 0;
-}
-
-static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
-{
- struct rdtgroup *crgrp;
-
- cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
- /* update the child mon group masks as well*/
- list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
- cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
-}
-
-static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
- cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
-{
- struct rdtgroup *r, *crgrp;
- struct list_head *head;
-
- /* Check whether cpus are dropped from this group */
- cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
- if (!cpumask_empty(tmpmask)) {
- /* Can't drop from default group */
- if (rdtgrp == &rdtgroup_default) {
- rdt_last_cmd_puts("Can't drop CPUs from default group\n");
- return -EINVAL;
- }
-
- /* Give any dropped cpus to rdtgroup_default */
- cpumask_or(&rdtgroup_default.cpu_mask,
- &rdtgroup_default.cpu_mask, tmpmask);
- update_closid_rmid(tmpmask, &rdtgroup_default);
- }
-
- /*
- * If we added cpus, remove them from previous group and
- * the prev group's child groups that owned them
- * and update per-cpu closid/rmid.
- */
- cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
- if (!cpumask_empty(tmpmask)) {
- list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
- if (r == rdtgrp)
- continue;
- cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
- if (!cpumask_empty(tmpmask1))
- cpumask_rdtgrp_clear(r, tmpmask1);
- }
- update_closid_rmid(tmpmask, rdtgrp);
- }
-
- /* Done pushing/pulling - update this group with new mask */
- cpumask_copy(&rdtgrp->cpu_mask, newmask);
-
- /*
- * Clear child mon group masks since there is a new parent mask
- * now and update the rmid for the cpus the child lost.
- */
- head = &rdtgrp->mon.crdtgrp_list;
- list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
- cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
- update_closid_rmid(tmpmask, rdtgrp);
- cpumask_clear(&crgrp->cpu_mask);
- }
-
- return 0;
-}
-
-static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- cpumask_var_t tmpmask, newmask, tmpmask1;
- struct rdtgroup *rdtgrp;
- int ret;
-
- if (!buf)
- return -EINVAL;
-
- if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
- return -ENOMEM;
- if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
- free_cpumask_var(tmpmask);
- return -ENOMEM;
- }
- if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
- free_cpumask_var(tmpmask);
- free_cpumask_var(newmask);
- return -ENOMEM;
- }
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- ret = -ENOENT;
- goto unlock;
- }
-
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
- rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- ret = -EINVAL;
- rdt_last_cmd_puts("Pseudo-locking in progress\n");
- goto unlock;
- }
-
- if (is_cpu_list(of))
- ret = cpulist_parse(buf, newmask);
- else
- ret = cpumask_parse(buf, newmask);
-
- if (ret) {
- rdt_last_cmd_puts("Bad CPU list/mask\n");
- goto unlock;
- }
-
- /* check that user didn't specify any offline cpus */
- cpumask_andnot(tmpmask, newmask, cpu_online_mask);
- if (!cpumask_empty(tmpmask)) {
- ret = -EINVAL;
- rdt_last_cmd_puts("Can only assign online CPUs\n");
- goto unlock;
- }
-
- if (rdtgrp->type == RDTCTRL_GROUP)
- ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
- else if (rdtgrp->type == RDTMON_GROUP)
- ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
- else
- ret = -EINVAL;
-
-unlock:
- rdtgroup_kn_unlock(of->kn);
- free_cpumask_var(tmpmask);
- free_cpumask_var(newmask);
- free_cpumask_var(tmpmask1);
-
- return ret ?: nbytes;
-}
-
-/**
- * rdtgroup_remove - the helper to remove resource group safely
- * @rdtgrp: resource group to remove
- *
- * On resource group creation via a mkdir, an extra kernfs_node reference is
- * taken to ensure that the rdtgroup structure remains accessible for the
- * rdtgroup_kn_unlock() calls where it is removed.
- *
- * Drop the extra reference here, then free the rdtgroup structure.
- *
- * Return: void
- */
-static void rdtgroup_remove(struct rdtgroup *rdtgrp)
-{
- kernfs_put(rdtgrp->kn);
- kfree(rdtgrp);
-}
-
-static void _update_task_closid_rmid(void *task)
-{
- /*
- * If the task is still current on this CPU, update PQR_ASSOC MSR.
- * Otherwise, the MSR is updated when the task is scheduled in.
- */
- if (task == current)
- resctrl_sched_in(task);
-}
-
-static void update_task_closid_rmid(struct task_struct *t)
-{
- if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
- smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
- else
- _update_task_closid_rmid(t);
-}
-
-static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
-{
- u32 closid, rmid = rdtgrp->mon.rmid;
-
- if (rdtgrp->type == RDTCTRL_GROUP)
- closid = rdtgrp->closid;
- else if (rdtgrp->type == RDTMON_GROUP)
- closid = rdtgrp->mon.parent->closid;
- else
- return false;
-
- return resctrl_arch_match_closid(tsk, closid) &&
- resctrl_arch_match_rmid(tsk, closid, rmid);
-}
-
-static int __rdtgroup_move_task(struct task_struct *tsk,
- struct rdtgroup *rdtgrp)
-{
- /* If the task is already in rdtgrp, no need to move the task. */
- if (task_in_rdtgroup(tsk, rdtgrp))
- return 0;
-
- /*
- * Set the task's closid/rmid before the PQR_ASSOC MSR can be
- * updated by them.
- *
- * For ctrl_mon groups, move both closid and rmid.
- * For monitor groups, can move the tasks only from
- * their parent CTRL group.
- */
- if (rdtgrp->type == RDTMON_GROUP &&
- !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
- rdt_last_cmd_puts("Can't move task to different control group\n");
- return -EINVAL;
- }
-
- if (rdtgrp->type == RDTMON_GROUP)
- resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
- rdtgrp->mon.rmid);
- else
- resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
- rdtgrp->mon.rmid);
-
- /*
- * Ensure the task's closid and rmid are written before determining if
- * the task is current that will decide if it will be interrupted.
- * This pairs with the full barrier between the rq->curr update and
- * resctrl_sched_in() during context switch.
- */
- smp_mb();
-
- /*
- * By now, the task's closid and rmid are set. If the task is current
- * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
- * group go into effect. If the task is not current, the MSR will be
- * updated when the task is scheduled in.
- */
- update_task_closid_rmid(tsk);
-
- return 0;
-}
-
-static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
-{
- return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
- resctrl_arch_match_closid(t, r->closid));
-}
-
-static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
-{
- return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
- resctrl_arch_match_rmid(t, r->mon.parent->closid,
- r->mon.rmid));
-}
-
-/**
- * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
- * @r: Resource group
- *
- * Return: 1 if tasks have been assigned to @r, 0 otherwise
- */
-int rdtgroup_tasks_assigned(struct rdtgroup *r)
-{
- struct task_struct *p, *t;
- int ret = 0;
-
- lockdep_assert_held(&rdtgroup_mutex);
-
- rcu_read_lock();
- for_each_process_thread(p, t) {
- if (is_closid_match(t, r) || is_rmid_match(t, r)) {
- ret = 1;
- break;
- }
- }
- rcu_read_unlock();
-
- return ret;
-}
-
-static int rdtgroup_task_write_permission(struct task_struct *task,
- struct kernfs_open_file *of)
-{
- const struct cred *tcred = get_task_cred(task);
- const struct cred *cred = current_cred();
- int ret = 0;
-
- /*
- * Even if we're attaching all tasks in the thread group, we only
- * need to check permissions on one of them.
- */
- if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
- !uid_eq(cred->euid, tcred->uid) &&
- !uid_eq(cred->euid, tcred->suid)) {
- rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
- ret = -EPERM;
- }
-
- put_cred(tcred);
- return ret;
-}
-
-static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
- struct kernfs_open_file *of)
-{
- struct task_struct *tsk;
- int ret;
-
- rcu_read_lock();
- if (pid) {
- tsk = find_task_by_vpid(pid);
- if (!tsk) {
- rcu_read_unlock();
- rdt_last_cmd_printf("No task %d\n", pid);
- return -ESRCH;
- }
- } else {
- tsk = current;
- }
-
- get_task_struct(tsk);
- rcu_read_unlock();
-
- ret = rdtgroup_task_write_permission(tsk, of);
- if (!ret)
- ret = __rdtgroup_move_task(tsk, rdtgrp);
-
- put_task_struct(tsk);
- return ret;
-}
-
-static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- struct rdtgroup *rdtgrp;
- char *pid_str;
- int ret = 0;
- pid_t pid;
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- rdtgroup_kn_unlock(of->kn);
- return -ENOENT;
- }
- rdt_last_cmd_clear();
-
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
- rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- ret = -EINVAL;
- rdt_last_cmd_puts("Pseudo-locking in progress\n");
- goto unlock;
- }
-
- while (buf && buf[0] != '\0' && buf[0] != '\n') {
- pid_str = strim(strsep(&buf, ","));
-
- if (kstrtoint(pid_str, 0, &pid)) {
- rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str);
- ret = -EINVAL;
- break;
- }
-
- if (pid < 0) {
- rdt_last_cmd_printf("Invalid pid %d\n", pid);
- ret = -EINVAL;
- break;
- }
-
- ret = rdtgroup_move_task(pid, rdtgrp, of);
- if (ret) {
- rdt_last_cmd_printf("Error while processing task %d\n", pid);
- break;
- }
- }
-
-unlock:
- rdtgroup_kn_unlock(of->kn);
-
- return ret ?: nbytes;
-}
-
-static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
-{
- struct task_struct *p, *t;
- pid_t pid;
-
- rcu_read_lock();
- for_each_process_thread(p, t) {
- if (is_closid_match(t, r) || is_rmid_match(t, r)) {
- pid = task_pid_vnr(t);
- if (pid)
- seq_printf(s, "%d\n", pid);
- }
- }
- rcu_read_unlock();
-}
-
-static int rdtgroup_tasks_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
-{
- struct rdtgroup *rdtgrp;
- int ret = 0;
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (rdtgrp)
- show_rdt_tasks(rdtgrp, s);
- else
- ret = -ENOENT;
- rdtgroup_kn_unlock(of->kn);
-
- return ret;
-}
-
-static int rdtgroup_closid_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
-{
- struct rdtgroup *rdtgrp;
- int ret = 0;
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (rdtgrp)
- seq_printf(s, "%u\n", rdtgrp->closid);
- else
- ret = -ENOENT;
- rdtgroup_kn_unlock(of->kn);
-
- return ret;
-}
-
-static int rdtgroup_rmid_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
-{
- struct rdtgroup *rdtgrp;
- int ret = 0;
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (rdtgrp)
- seq_printf(s, "%u\n", rdtgrp->mon.rmid);
- else
- ret = -ENOENT;
- rdtgroup_kn_unlock(of->kn);
-
- return ret;
-}
-
-#ifdef CONFIG_PROC_CPU_RESCTRL
-
-/*
- * A task can only be part of one resctrl control group and of one monitor
- * group which is associated to that control group.
- *
- * 1) res:
- * mon:
- *
- * resctrl is not available.
- *
- * 2) res:/
- * mon:
- *
- * Task is part of the root resctrl control group, and it is not associated
- * to any monitor group.
- *
- * 3) res:/
- * mon:mon0
- *
- * Task is part of the root resctrl control group and monitor group mon0.
- *
- * 4) res:group0
- * mon:
- *
- * Task is part of resctrl control group group0, and it is not associated
- * to any monitor group.
- *
- * 5) res:group0
- * mon:mon1
- *
- * Task is part of resctrl control group group0 and monitor group mon1.
- */
-int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
- struct pid *pid, struct task_struct *tsk)
-{
- struct rdtgroup *rdtg;
- int ret = 0;
-
- mutex_lock(&rdtgroup_mutex);
-
- /* Return empty if resctrl has not been mounted. */
- if (!resctrl_mounted) {
- seq_puts(s, "res:\nmon:\n");
- goto unlock;
- }
-
- list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
- struct rdtgroup *crg;
-
- /*
- * Task information is only relevant for shareable
- * and exclusive groups.
- */
- if (rdtg->mode != RDT_MODE_SHAREABLE &&
- rdtg->mode != RDT_MODE_EXCLUSIVE)
- continue;
-
- if (!resctrl_arch_match_closid(tsk, rdtg->closid))
- continue;
-
- seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
- rdt_kn_name(rdtg->kn));
- seq_puts(s, "mon:");
- list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
- mon.crdtgrp_list) {
- if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
- crg->mon.rmid))
- continue;
- seq_printf(s, "%s", rdt_kn_name(crg->kn));
- break;
- }
- seq_putc(s, '\n');
- goto unlock;
- }
- /*
- * The above search should succeed. Otherwise return
- * with an error.
- */
- ret = -ENOENT;
-unlock:
- mutex_unlock(&rdtgroup_mutex);
-
- return ret;
-}
-#endif
-
-static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- int len;
-
- mutex_lock(&rdtgroup_mutex);
- len = seq_buf_used(&last_cmd_status);
- if (len)
- seq_printf(seq, "%.*s", len, last_cmd_status_buf);
- else
- seq_puts(seq, "ok\n");
- mutex_unlock(&rdtgroup_mutex);
- return 0;
-}
-
-static void *rdt_kn_parent_priv(struct kernfs_node *kn)
-{
- /*
- * The parent pointer is only valid within RCU section since it can be
- * replaced.
- */
- guard(rcu)();
- return rcu_dereference(kn->__parent)->priv;
-}
-
-static int rdt_num_closids_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
-
- seq_printf(seq, "%u\n", s->num_closid);
- return 0;
-}
-
-static int rdt_default_ctrl_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
- struct rdt_resource *r = s->res;
-
- seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r));
- return 0;
-}
-
-static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
- struct rdt_resource *r = s->res;
-
- seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
- return 0;
-}
-
-static int rdt_shareable_bits_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
- struct rdt_resource *r = s->res;
-
- seq_printf(seq, "%x\n", r->cache.shareable_bits);
- return 0;
-}
-
-/*
- * rdt_bit_usage_show - Display current usage of resources
- *
- * A domain is a shared resource that can now be allocated differently. Here
- * we display the current regions of the domain as an annotated bitmask.
- * For each domain of this resource its allocation bitmask
- * is annotated as below to indicate the current usage of the corresponding bit:
- * 0 - currently unused
- * X - currently available for sharing and used by software and hardware
- * H - currently used by hardware only but available for software use
- * S - currently used and shareable by software only
- * E - currently used exclusively by one resource group
- * P - currently pseudo-locked by one resource group
- */
-static int rdt_bit_usage_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
- /*
- * Use unsigned long even though only 32 bits are used to ensure
- * test_bit() is used safely.
- */
- unsigned long sw_shareable = 0, hw_shareable = 0;
- unsigned long exclusive = 0, pseudo_locked = 0;
- struct rdt_resource *r = s->res;
- struct rdt_ctrl_domain *dom;
- int i, hwb, swb, excl, psl;
- enum rdtgrp_mode mode;
- bool sep = false;
- u32 ctrl_val;
-
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
- hw_shareable = r->cache.shareable_bits;
- list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
- if (sep)
- seq_putc(seq, ';');
- sw_shareable = 0;
- exclusive = 0;
- seq_printf(seq, "%d=", dom->hdr.id);
- for (i = 0; i < closids_supported(); i++) {
- if (!closid_allocated(i))
- continue;
- ctrl_val = resctrl_arch_get_config(r, dom, i,
- s->conf_type);
- mode = rdtgroup_mode_by_closid(i);
- switch (mode) {
- case RDT_MODE_SHAREABLE:
- sw_shareable |= ctrl_val;
- break;
- case RDT_MODE_EXCLUSIVE:
- exclusive |= ctrl_val;
- break;
- case RDT_MODE_PSEUDO_LOCKSETUP:
- /*
- * RDT_MODE_PSEUDO_LOCKSETUP is possible
- * here but not included since the CBM
- * associated with this CLOSID in this mode
- * is not initialized and no task or cpu can be
- * assigned this CLOSID.
- */
- break;
- case RDT_MODE_PSEUDO_LOCKED:
- case RDT_NUM_MODES:
- WARN(1,
- "invalid mode for closid %d\n", i);
- break;
- }
- }
- for (i = r->cache.cbm_len - 1; i >= 0; i--) {
- pseudo_locked = dom->plr ? dom->plr->cbm : 0;
- hwb = test_bit(i, &hw_shareable);
- swb = test_bit(i, &sw_shareable);
- excl = test_bit(i, &exclusive);
- psl = test_bit(i, &pseudo_locked);
- if (hwb && swb)
- seq_putc(seq, 'X');
- else if (hwb && !swb)
- seq_putc(seq, 'H');
- else if (!hwb && swb)
- seq_putc(seq, 'S');
- else if (excl)
- seq_putc(seq, 'E');
- else if (psl)
- seq_putc(seq, 'P');
- else /* Unused bits remain */
- seq_putc(seq, '0');
- }
- sep = true;
- }
- seq_putc(seq, '\n');
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
- return 0;
-}
-
-static int rdt_min_bw_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
- struct rdt_resource *r = s->res;
-
- seq_printf(seq, "%u\n", r->membw.min_bw);
- return 0;
-}
-
-static int rdt_num_rmids_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
-
- seq_printf(seq, "%d\n", r->num_rmid);
-
- return 0;
-}
-
-static int rdt_mon_features_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
- struct mon_evt *mevt;
-
- list_for_each_entry(mevt, &r->evt_list, list) {
- seq_printf(seq, "%s\n", mevt->name);
- if (mevt->configurable)
- seq_printf(seq, "%s_config\n", mevt->name);
- }
-
- return 0;
-}
-
-static int rdt_bw_gran_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
- struct rdt_resource *r = s->res;
-
- seq_printf(seq, "%u\n", r->membw.bw_gran);
- return 0;
-}
-
-static int rdt_delay_linear_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
- struct rdt_resource *r = s->res;
-
- seq_printf(seq, "%u\n", r->membw.delay_linear);
- return 0;
-}
-
-static int max_threshold_occ_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
-
- return 0;
-}
-
-static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
- struct rdt_resource *r = s->res;
-
- switch (r->membw.throttle_mode) {
- case THREAD_THROTTLE_PER_THREAD:
- seq_puts(seq, "per-thread\n");
- return 0;
- case THREAD_THROTTLE_MAX:
- seq_puts(seq, "max\n");
- return 0;
- case THREAD_THROTTLE_UNDEFINED:
- seq_puts(seq, "undefined\n");
- return 0;
- }
-
- WARN_ON_ONCE(1);
-
- return 0;
-}
-
-static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- unsigned int bytes;
- int ret;
-
- ret = kstrtouint(buf, 0, &bytes);
- if (ret)
- return ret;
-
- if (bytes > resctrl_rmid_realloc_limit)
- return -EINVAL;
-
- resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
-
- return nbytes;
-}
-
-/*
- * rdtgroup_mode_show - Display mode of this resource group
- */
-static int rdtgroup_mode_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
-{
- struct rdtgroup *rdtgrp;
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- rdtgroup_kn_unlock(of->kn);
- return -ENOENT;
- }
-
- seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
-
- rdtgroup_kn_unlock(of->kn);
- return 0;
-}
-
-static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
-{
- switch (my_type) {
- case CDP_CODE:
- return CDP_DATA;
- case CDP_DATA:
- return CDP_CODE;
- default:
- case CDP_NONE:
- return CDP_NONE;
- }
-}
-
-static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
- struct rdt_resource *r = s->res;
-
- seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
-
- return 0;
-}
-
-/**
- * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
- * @r: Resource to which domain instance @d belongs.
- * @d: The domain instance for which @closid is being tested.
- * @cbm: Capacity bitmask being tested.
- * @closid: Intended closid for @cbm.
- * @type: CDP type of @r.
- * @exclusive: Only check if overlaps with exclusive resource groups
- *
- * Checks if provided @cbm intended to be used for @closid on domain
- * @d overlaps with any other closids or other hardware usage associated
- * with this domain. If @exclusive is true then only overlaps with
- * resource groups in exclusive mode will be considered. If @exclusive
- * is false then overlaps with any resource group or hardware entities
- * will be considered.
- *
- * @cbm is unsigned long, even if only 32 bits are used, to make the
- * bitmap functions work correctly.
- *
- * Return: false if CBM does not overlap, true if it does.
- */
-static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
- unsigned long cbm, int closid,
- enum resctrl_conf_type type, bool exclusive)
-{
- enum rdtgrp_mode mode;
- unsigned long ctrl_b;
- int i;
-
- /* Check for any overlap with regions used by hardware directly */
- if (!exclusive) {
- ctrl_b = r->cache.shareable_bits;
- if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
- return true;
- }
-
- /* Check for overlap with other resource groups */
- for (i = 0; i < closids_supported(); i++) {
- ctrl_b = resctrl_arch_get_config(r, d, i, type);
- mode = rdtgroup_mode_by_closid(i);
- if (closid_allocated(i) && i != closid &&
- mode != RDT_MODE_PSEUDO_LOCKSETUP) {
- if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
- if (exclusive) {
- if (mode == RDT_MODE_EXCLUSIVE)
- return true;
- continue;
- }
- return true;
- }
- }
- }
-
- return false;
-}
-
-/**
- * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
- * @s: Schema for the resource to which domain instance @d belongs.
- * @d: The domain instance for which @closid is being tested.
- * @cbm: Capacity bitmask being tested.
- * @closid: Intended closid for @cbm.
- * @exclusive: Only check if overlaps with exclusive resource groups
- *
- * Resources that can be allocated using a CBM can use the CBM to control
- * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
- * for overlap. Overlap test is not limited to the specific resource for
- * which the CBM is intended though - when dealing with CDP resources that
- * share the underlying hardware the overlap check should be performed on
- * the CDP resource sharing the hardware also.
- *
- * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
- * overlap test.
- *
- * Return: true if CBM overlap detected, false if there is no overlap
- */
-bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
- unsigned long cbm, int closid, bool exclusive)
-{
- enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
- struct rdt_resource *r = s->res;
-
- if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
- exclusive))
- return true;
-
- if (!resctrl_arch_get_cdp_enabled(r->rid))
- return false;
- return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
-}
-
-/**
- * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
- * @rdtgrp: Resource group identified through its closid.
- *
- * An exclusive resource group implies that there should be no sharing of
- * its allocated resources. At the time this group is considered to be
- * exclusive this test can determine if its current schemata supports this
- * setting by testing for overlap with all other resource groups.
- *
- * Return: true if resource group can be exclusive, false if there is overlap
- * with allocations of other resource groups and thus this resource group
- * cannot be exclusive.
- */
-static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
-{
- int closid = rdtgrp->closid;
- struct rdt_ctrl_domain *d;
- struct resctrl_schema *s;
- struct rdt_resource *r;
- bool has_cache = false;
- u32 ctrl;
-
- /* Walking r->domains, ensure it can't race with cpuhp */
- lockdep_assert_cpus_held();
-
- list_for_each_entry(s, &resctrl_schema_all, list) {
- r = s->res;
- if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
- continue;
- has_cache = true;
- list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
- ctrl = resctrl_arch_get_config(r, d, closid,
- s->conf_type);
- if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
- rdt_last_cmd_puts("Schemata overlaps\n");
- return false;
- }
- }
- }
-
- if (!has_cache) {
- rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
- return false;
- }
-
- return true;
-}
-
-/*
- * rdtgroup_mode_write - Modify the resource group's mode
- */
-static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- struct rdtgroup *rdtgrp;
- enum rdtgrp_mode mode;
- int ret = 0;
-
- /* Valid input requires a trailing newline */
- if (nbytes == 0 || buf[nbytes - 1] != '\n')
- return -EINVAL;
- buf[nbytes - 1] = '\0';
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- rdtgroup_kn_unlock(of->kn);
- return -ENOENT;
- }
-
- rdt_last_cmd_clear();
-
- mode = rdtgrp->mode;
-
- if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
- (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
- (!strcmp(buf, "pseudo-locksetup") &&
- mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
- (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
- goto out;
-
- if (mode == RDT_MODE_PSEUDO_LOCKED) {
- rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
- ret = -EINVAL;
- goto out;
- }
-
- if (!strcmp(buf, "shareable")) {
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- ret = rdtgroup_locksetup_exit(rdtgrp);
- if (ret)
- goto out;
- }
- rdtgrp->mode = RDT_MODE_SHAREABLE;
- } else if (!strcmp(buf, "exclusive")) {
- if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
- ret = -EINVAL;
- goto out;
- }
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- ret = rdtgroup_locksetup_exit(rdtgrp);
- if (ret)
- goto out;
- }
- rdtgrp->mode = RDT_MODE_EXCLUSIVE;
- } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) &&
- !strcmp(buf, "pseudo-locksetup")) {
- ret = rdtgroup_locksetup_enter(rdtgrp);
- if (ret)
- goto out;
- rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
- } else {
- rdt_last_cmd_puts("Unknown or unsupported mode\n");
- ret = -EINVAL;
- }
-
-out:
- rdtgroup_kn_unlock(of->kn);
- return ret ?: nbytes;
-}
-
-/**
- * rdtgroup_cbm_to_size - Translate CBM to size in bytes
- * @r: RDT resource to which @d belongs.
- * @d: RDT domain instance.
- * @cbm: bitmask for which the size should be computed.
- *
- * The bitmask provided associated with the RDT domain instance @d will be
- * translated into how many bytes it represents. The size in bytes is
- * computed by first dividing the total cache size by the CBM length to
- * determine how many bytes each bit in the bitmask represents. The result
- * is multiplied with the number of bits set in the bitmask.
- *
- * @cbm is unsigned long, even if only 32 bits are used to make the
- * bitmap functions work correctly.
- */
-unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
- struct rdt_ctrl_domain *d, unsigned long cbm)
-{
- unsigned int size = 0;
- struct cacheinfo *ci;
- int num_b;
-
- if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
- return size;
-
- num_b = bitmap_weight(&cbm, r->cache.cbm_len);
- ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope);
- if (ci)
- size = ci->size / r->cache.cbm_len * num_b;
-
- return size;
-}
-
-/*
- * rdtgroup_size_show - Display size in bytes of allocated regions
- *
- * The "size" file mirrors the layout of the "schemata" file, printing the
- * size in bytes of each region instead of the capacity bitmask.
- */
-static int rdtgroup_size_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
-{
- struct resctrl_schema *schema;
- enum resctrl_conf_type type;
- struct rdt_ctrl_domain *d;
- struct rdtgroup *rdtgrp;
- struct rdt_resource *r;
- unsigned int size;
- int ret = 0;
- u32 closid;
- bool sep;
- u32 ctrl;
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- rdtgroup_kn_unlock(of->kn);
- return -ENOENT;
- }
-
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- if (!rdtgrp->plr->d) {
- rdt_last_cmd_clear();
- rdt_last_cmd_puts("Cache domain offline\n");
- ret = -ENODEV;
- } else {
- seq_printf(s, "%*s:", max_name_width,
- rdtgrp->plr->s->name);
- size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
- rdtgrp->plr->d,
- rdtgrp->plr->cbm);
- seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
- }
- goto out;
- }
-
- closid = rdtgrp->closid;
-
- list_for_each_entry(schema, &resctrl_schema_all, list) {
- r = schema->res;
- type = schema->conf_type;
- sep = false;
- seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
- if (sep)
- seq_putc(s, ';');
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- size = 0;
- } else {
- if (is_mba_sc(r))
- ctrl = d->mbps_val[closid];
- else
- ctrl = resctrl_arch_get_config(r, d,
- closid,
- type);
- if (r->rid == RDT_RESOURCE_MBA ||
- r->rid == RDT_RESOURCE_SMBA)
- size = ctrl;
- else
- size = rdtgroup_cbm_to_size(r, d, ctrl);
- }
- seq_printf(s, "%d=%u", d->hdr.id, size);
- sep = true;
- }
- seq_putc(s, '\n');
- }
-
-out:
- rdtgroup_kn_unlock(of->kn);
-
- return ret;
+ resctrl_arch_sched_in(current);
}
#define INVALID_CONFIG_INDEX UINT_MAX
@@ -1635,68 +96,12 @@ void resctrl_arch_mon_event_config_read(void *_config_info)
pr_warn_once("Invalid event id %d\n", config_info->evtid);
return;
}
- rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
+ rdmsrq(MSR_IA32_EVT_CFG_BASE + index, msrval);
/* Report only the valid event configuration bits */
config_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
}
-static void mondata_config_read(struct resctrl_mon_config_info *mon_info)
-{
- smp_call_function_any(&mon_info->d->hdr.cpu_mask,
- resctrl_arch_mon_event_config_read, mon_info, 1);
-}
-
-static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
-{
- struct resctrl_mon_config_info mon_info;
- struct rdt_mon_domain *dom;
- bool sep = false;
-
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
-
- list_for_each_entry(dom, &r->mon_domains, hdr.list) {
- if (sep)
- seq_puts(s, ";");
-
- memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info));
- mon_info.r = r;
- mon_info.d = dom;
- mon_info.evtid = evtid;
- mondata_config_read(&mon_info);
-
- seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
- sep = true;
- }
- seq_puts(s, "\n");
-
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
-
- return 0;
-}
-
-static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
-
- mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
-
- return 0;
-}
-
-static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
- struct seq_file *seq, void *v)
-{
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
-
- mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
-
- return 0;
-}
-
void resctrl_arch_mon_event_config_write(void *_config_info)
{
struct resctrl_mon_config_info *config_info = _config_info;
@@ -1707,638 +112,21 @@ void resctrl_arch_mon_event_config_write(void *_config_info)
pr_warn_once("Invalid event id %d\n", config_info->evtid);
return;
}
- wrmsr(MSR_IA32_EVT_CFG_BASE + index, config_info->mon_config, 0);
-}
-
-static void mbm_config_write_domain(struct rdt_resource *r,
- struct rdt_mon_domain *d, u32 evtid, u32 val)
-{
- struct resctrl_mon_config_info mon_info = {0};
-
- /*
- * Read the current config value first. If both are the same then
- * no need to write it again.
- */
- mon_info.r = r;
- mon_info.d = d;
- mon_info.evtid = evtid;
- mondata_config_read(&mon_info);
- if (mon_info.mon_config == val)
- return;
-
- mon_info.mon_config = val;
-
- /*
- * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
- * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
- * are scoped at the domain level. Writing any of these MSRs
- * on one CPU is observed by all the CPUs in the domain.
- */
- smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_mon_event_config_write,
- &mon_info, 1);
-
- /*
- * When an Event Configuration is changed, the bandwidth counters
- * for all RMIDs and Events will be cleared by the hardware. The
- * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
- * every RMID on the next read to any event for every RMID.
- * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
- * cleared while it is tracked by the hardware. Clear the
- * mbm_local and mbm_total counts for all the RMIDs.
- */
- resctrl_arch_reset_rmid_all(r, d);
-}
-
-static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
-{
- char *dom_str = NULL, *id_str;
- unsigned long dom_id, val;
- struct rdt_mon_domain *d;
-
- /* Walking r->domains, ensure it can't race with cpuhp */
- lockdep_assert_cpus_held();
-
-next:
- if (!tok || tok[0] == '\0')
- return 0;
-
- /* Start processing the strings for each domain */
- dom_str = strim(strsep(&tok, ";"));
- id_str = strsep(&dom_str, "=");
-
- if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
- rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n");
- return -EINVAL;
- }
-
- if (!dom_str || kstrtoul(dom_str, 16, &val)) {
- rdt_last_cmd_puts("Non-numeric event configuration value\n");
- return -EINVAL;
- }
-
- /* Value from user cannot be more than the supported set of events */
- if ((val & r->mbm_cfg_mask) != val) {
- rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
- r->mbm_cfg_mask);
- return -EINVAL;
- }
-
- list_for_each_entry(d, &r->mon_domains, hdr.list) {
- if (d->hdr.id == dom_id) {
- mbm_config_write_domain(r, d, evtid, val);
- goto next;
- }
- }
-
- return -EINVAL;
-}
-
-static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes,
- loff_t off)
-{
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
- int ret;
-
- /* Valid input requires a trailing newline */
- if (nbytes == 0 || buf[nbytes - 1] != '\n')
- return -EINVAL;
-
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
-
- rdt_last_cmd_clear();
-
- buf[nbytes - 1] = '\0';
-
- ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
-
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
-
- return ret ?: nbytes;
-}
-
-static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes,
- loff_t off)
-{
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
- int ret;
-
- /* Valid input requires a trailing newline */
- if (nbytes == 0 || buf[nbytes - 1] != '\n')
- return -EINVAL;
-
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
-
- rdt_last_cmd_clear();
-
- buf[nbytes - 1] = '\0';
-
- ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
-
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
-
- return ret ?: nbytes;
-}
-
-/* rdtgroup information files for one cache resource. */
-static struct rftype res_common_files[] = {
- {
- .name = "last_cmd_status",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_last_cmd_status_show,
- .fflags = RFTYPE_TOP_INFO,
- },
- {
- .name = "num_closids",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_num_closids_show,
- .fflags = RFTYPE_CTRL_INFO,
- },
- {
- .name = "mon_features",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_mon_features_show,
- .fflags = RFTYPE_MON_INFO,
- },
- {
- .name = "num_rmids",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_num_rmids_show,
- .fflags = RFTYPE_MON_INFO,
- },
- {
- .name = "cbm_mask",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_default_ctrl_show,
- .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
- },
- {
- .name = "min_cbm_bits",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_min_cbm_bits_show,
- .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
- },
- {
- .name = "shareable_bits",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_shareable_bits_show,
- .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
- },
- {
- .name = "bit_usage",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_bit_usage_show,
- .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
- },
- {
- .name = "min_bandwidth",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_min_bw_show,
- .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
- },
- {
- .name = "bandwidth_gran",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_bw_gran_show,
- .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
- },
- {
- .name = "delay_linear",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_delay_linear_show,
- .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
- },
- /*
- * Platform specific which (if any) capabilities are provided by
- * thread_throttle_mode. Defer "fflags" initialization to platform
- * discovery.
- */
- {
- .name = "thread_throttle_mode",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_thread_throttle_mode_show,
- },
- {
- .name = "max_threshold_occupancy",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .write = max_threshold_occ_write,
- .seq_show = max_threshold_occ_show,
- .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
- },
- {
- .name = "mbm_total_bytes_config",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = mbm_total_bytes_config_show,
- .write = mbm_total_bytes_config_write,
- },
- {
- .name = "mbm_local_bytes_config",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = mbm_local_bytes_config_show,
- .write = mbm_local_bytes_config_write,
- },
- {
- .name = "cpus",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .write = rdtgroup_cpus_write,
- .seq_show = rdtgroup_cpus_show,
- .fflags = RFTYPE_BASE,
- },
- {
- .name = "cpus_list",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .write = rdtgroup_cpus_write,
- .seq_show = rdtgroup_cpus_show,
- .flags = RFTYPE_FLAGS_CPUS_LIST,
- .fflags = RFTYPE_BASE,
- },
- {
- .name = "tasks",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .write = rdtgroup_tasks_write,
- .seq_show = rdtgroup_tasks_show,
- .fflags = RFTYPE_BASE,
- },
- {
- .name = "mon_hw_id",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdtgroup_rmid_show,
- .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG,
- },
- {
- .name = "schemata",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .write = rdtgroup_schemata_write,
- .seq_show = rdtgroup_schemata_show,
- .fflags = RFTYPE_CTRL_BASE,
- },
- {
- .name = "mba_MBps_event",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .write = rdtgroup_mba_mbps_event_write,
- .seq_show = rdtgroup_mba_mbps_event_show,
- },
- {
- .name = "mode",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .write = rdtgroup_mode_write,
- .seq_show = rdtgroup_mode_show,
- .fflags = RFTYPE_CTRL_BASE,
- },
- {
- .name = "size",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdtgroup_size_show,
- .fflags = RFTYPE_CTRL_BASE,
- },
- {
- .name = "sparse_masks",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_has_sparse_bitmasks_show,
- .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
- },
- {
- .name = "ctrl_hw_id",
- .mode = 0444,
- .kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdtgroup_closid_show,
- .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
- },
-
-};
-
-static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
-{
- struct rftype *rfts, *rft;
- int ret, len;
-
- rfts = res_common_files;
- len = ARRAY_SIZE(res_common_files);
-
- lockdep_assert_held(&rdtgroup_mutex);
-
- if (resctrl_debug)
- fflags |= RFTYPE_DEBUG;
-
- for (rft = rfts; rft < rfts + len; rft++) {
- if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
- ret = rdtgroup_add_file(kn, rft);
- if (ret)
- goto error;
- }
- }
-
- return 0;
-error:
- pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
- while (--rft >= rfts) {
- if ((fflags & rft->fflags) == rft->fflags)
- kernfs_remove_by_name(kn, rft->name);
- }
- return ret;
-}
-
-static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
-{
- struct rftype *rfts, *rft;
- int len;
-
- rfts = res_common_files;
- len = ARRAY_SIZE(res_common_files);
-
- for (rft = rfts; rft < rfts + len; rft++) {
- if (!strcmp(rft->name, name))
- return rft;
- }
-
- return NULL;
-}
-
-static void thread_throttle_mode_init(void)
-{
- enum membw_throttle_mode throttle_mode = THREAD_THROTTLE_UNDEFINED;
- struct rdt_resource *r_mba, *r_smba;
-
- r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
- if (r_mba->alloc_capable &&
- r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
- throttle_mode = r_mba->membw.throttle_mode;
-
- r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA);
- if (r_smba->alloc_capable &&
- r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
- throttle_mode = r_smba->membw.throttle_mode;
-
- if (throttle_mode == THREAD_THROTTLE_UNDEFINED)
- return;
-
- resctrl_file_fflags_init("thread_throttle_mode",
- RFTYPE_CTRL_INFO | RFTYPE_RES_MB);
-}
-
-void resctrl_file_fflags_init(const char *config, unsigned long fflags)
-{
- struct rftype *rft;
-
- rft = rdtgroup_get_rftype_by_name(config);
- if (rft)
- rft->fflags = fflags;
-}
-
-/**
- * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
- * @r: The resource group with which the file is associated.
- * @name: Name of the file
- *
- * The permissions of named resctrl file, directory, or link are modified
- * to not allow read, write, or execute by any user.
- *
- * WARNING: This function is intended to communicate to the user that the
- * resctrl file has been locked down - that it is not relevant to the
- * particular state the system finds itself in. It should not be relied
- * on to protect from user access because after the file's permissions
- * are restricted the user can still change the permissions using chmod
- * from the command line.
- *
- * Return: 0 on success, <0 on failure.
- */
-int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
-{
- struct iattr iattr = {.ia_valid = ATTR_MODE,};
- struct kernfs_node *kn;
- int ret = 0;
-
- kn = kernfs_find_and_get_ns(r->kn, name, NULL);
- if (!kn)
- return -ENOENT;
-
- switch (kernfs_type(kn)) {
- case KERNFS_DIR:
- iattr.ia_mode = S_IFDIR;
- break;
- case KERNFS_FILE:
- iattr.ia_mode = S_IFREG;
- break;
- case KERNFS_LINK:
- iattr.ia_mode = S_IFLNK;
- break;
- }
-
- ret = kernfs_setattr(kn, &iattr);
- kernfs_put(kn);
- return ret;
-}
-
-/**
- * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
- * @r: The resource group with which the file is associated.
- * @name: Name of the file
- * @mask: Mask of permissions that should be restored
- *
- * Restore the permissions of the named file. If @name is a directory the
- * permissions of its parent will be used.
- *
- * Return: 0 on success, <0 on failure.
- */
-int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
- umode_t mask)
-{
- struct iattr iattr = {.ia_valid = ATTR_MODE,};
- struct kernfs_node *kn, *parent;
- struct rftype *rfts, *rft;
- int ret, len;
-
- rfts = res_common_files;
- len = ARRAY_SIZE(res_common_files);
-
- for (rft = rfts; rft < rfts + len; rft++) {
- if (!strcmp(rft->name, name))
- iattr.ia_mode = rft->mode & mask;
- }
-
- kn = kernfs_find_and_get_ns(r->kn, name, NULL);
- if (!kn)
- return -ENOENT;
-
- switch (kernfs_type(kn)) {
- case KERNFS_DIR:
- parent = kernfs_get_parent(kn);
- if (parent) {
- iattr.ia_mode |= parent->mode;
- kernfs_put(parent);
- }
- iattr.ia_mode |= S_IFDIR;
- break;
- case KERNFS_FILE:
- iattr.ia_mode |= S_IFREG;
- break;
- case KERNFS_LINK:
- iattr.ia_mode |= S_IFLNK;
- break;
- }
-
- ret = kernfs_setattr(kn, &iattr);
- kernfs_put(kn);
- return ret;
-}
-
-static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
- unsigned long fflags)
-{
- struct kernfs_node *kn_subdir;
- int ret;
-
- kn_subdir = kernfs_create_dir(kn_info, name,
- kn_info->mode, priv);
- if (IS_ERR(kn_subdir))
- return PTR_ERR(kn_subdir);
-
- ret = rdtgroup_kn_set_ugid(kn_subdir);
- if (ret)
- return ret;
-
- ret = rdtgroup_add_files(kn_subdir, fflags);
- if (!ret)
- kernfs_activate(kn_subdir);
-
- return ret;
-}
-
-static unsigned long fflags_from_resource(struct rdt_resource *r)
-{
- switch (r->rid) {
- case RDT_RESOURCE_L3:
- case RDT_RESOURCE_L2:
- return RFTYPE_RES_CACHE;
- case RDT_RESOURCE_MBA:
- case RDT_RESOURCE_SMBA:
- return RFTYPE_RES_MB;
- }
-
- return WARN_ON_ONCE(1);
-}
-
-static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
-{
- struct resctrl_schema *s;
- struct rdt_resource *r;
- unsigned long fflags;
- char name[32];
- int ret;
-
- /* create the directory */
- kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
- if (IS_ERR(kn_info))
- return PTR_ERR(kn_info);
-
- ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO);
- if (ret)
- goto out_destroy;
-
- /* loop over enabled controls, these are all alloc_capable */
- list_for_each_entry(s, &resctrl_schema_all, list) {
- r = s->res;
- fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO;
- ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
- if (ret)
- goto out_destroy;
- }
-
- for_each_mon_capable_rdt_resource(r) {
- fflags = fflags_from_resource(r) | RFTYPE_MON_INFO;
- sprintf(name, "%s_MON", r->name);
- ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
- if (ret)
- goto out_destroy;
- }
-
- ret = rdtgroup_kn_set_ugid(kn_info);
- if (ret)
- goto out_destroy;
-
- kernfs_activate(kn_info);
-
- return 0;
-
-out_destroy:
- kernfs_remove(kn_info);
- return ret;
-}
-
-static int
-mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
- char *name, struct kernfs_node **dest_kn)
-{
- struct kernfs_node *kn;
- int ret;
-
- /* create the directory */
- kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
- if (IS_ERR(kn))
- return PTR_ERR(kn);
-
- if (dest_kn)
- *dest_kn = kn;
-
- ret = rdtgroup_kn_set_ugid(kn);
- if (ret)
- goto out_destroy;
-
- kernfs_activate(kn);
-
- return 0;
-
-out_destroy:
- kernfs_remove(kn);
- return ret;
+ wrmsrq(MSR_IA32_EVT_CFG_BASE + index, config_info->mon_config);
}
static void l3_qos_cfg_update(void *arg)
{
bool *enable = arg;
- wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
+ wrmsrq(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
}
static void l2_qos_cfg_update(void *arg)
{
bool *enable = arg;
- wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
-}
-
-static inline bool is_mba_linear(void)
-{
- return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear;
+ wrmsrq(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
}
static int set_cache_qos_cfg(int level, bool enable)
@@ -2396,76 +184,6 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
l3_qos_cfg_update(&hw_res->cdp_enabled);
}
-static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
-{
- u32 num_closid = resctrl_arch_get_num_closid(r);
- int cpu = cpumask_any(&d->hdr.cpu_mask);
- int i;
-
- d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
- GFP_KERNEL, cpu_to_node(cpu));
- if (!d->mbps_val)
- return -ENOMEM;
-
- for (i = 0; i < num_closid; i++)
- d->mbps_val[i] = MBA_MAX_MBPS;
-
- return 0;
-}
-
-static void mba_sc_domain_destroy(struct rdt_resource *r,
- struct rdt_ctrl_domain *d)
-{
- kfree(d->mbps_val);
- d->mbps_val = NULL;
-}
-
-/*
- * MBA software controller is supported only if
- * MBM is supported and MBA is in linear scale,
- * and the MBM monitor scope is the same as MBA
- * control scope.
- */
-static bool supports_mba_mbps(void)
-{
- struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
-
- return (resctrl_is_mbm_enabled() &&
- r->alloc_capable && is_mba_linear() &&
- r->ctrl_scope == rmbm->mon_scope);
-}
-
-/*
- * Enable or disable the MBA software controller
- * which helps user specify bandwidth in MBps.
- */
-static int set_mba_sc(bool mba_sc)
-{
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
- u32 num_closid = resctrl_arch_get_num_closid(r);
- struct rdt_ctrl_domain *d;
- unsigned long fflags;
- int i;
-
- if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
- return -EINVAL;
-
- r->membw.mba_sc = mba_sc;
-
- rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
-
- list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
- for (i = 0; i < num_closid; i++)
- d->mbps_val[i] = MBA_MAX_MBPS;
- }
-
- fflags = mba_sc ? RFTYPE_CTRL_BASE | RFTYPE_MON_BASE : 0;
- resctrl_file_fflags_init("mba_MBps_event", fflags);
-
- return 0;
-}
-
static int cdp_enable(int level)
{
struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
@@ -2506,419 +224,9 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
return 0;
}
-/*
- * We don't allow rdtgroup directories to be created anywhere
- * except the root directory. Thus when looking for the rdtgroup
- * structure for a kernfs node we are either looking at a directory,
- * in which case the rdtgroup structure is pointed at by the "priv"
- * field, otherwise we have a file, and need only look to the parent
- * to find the rdtgroup.
- */
-static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
-{
- if (kernfs_type(kn) == KERNFS_DIR) {
- /*
- * All the resource directories use "kn->priv"
- * to point to the "struct rdtgroup" for the
- * resource. "info" and its subdirectories don't
- * have rdtgroup structures, so return NULL here.
- */
- if (kn == kn_info ||
- rcu_access_pointer(kn->__parent) == kn_info)
- return NULL;
- else
- return kn->priv;
- } else {
- return rdt_kn_parent_priv(kn);
- }
-}
-
-static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
-{
- atomic_inc(&rdtgrp->waitcount);
- kernfs_break_active_protection(kn);
-}
-
-static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
-{
- if (atomic_dec_and_test(&rdtgrp->waitcount) &&
- (rdtgrp->flags & RDT_DELETED)) {
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
- rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
- rdtgroup_pseudo_lock_remove(rdtgrp);
- kernfs_unbreak_active_protection(kn);
- rdtgroup_remove(rdtgrp);
- } else {
- kernfs_unbreak_active_protection(kn);
- }
-}
-
-struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
-{
- struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
-
- if (!rdtgrp)
- return NULL;
-
- rdtgroup_kn_get(rdtgrp, kn);
-
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
-
- /* Was this group deleted while we waited? */
- if (rdtgrp->flags & RDT_DELETED)
- return NULL;
-
- return rdtgrp;
-}
-
-void rdtgroup_kn_unlock(struct kernfs_node *kn)
-{
- struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
-
- if (!rdtgrp)
- return;
-
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
-
- rdtgroup_kn_put(rdtgrp, kn);
-}
-
-static int mkdir_mondata_all(struct kernfs_node *parent_kn,
- struct rdtgroup *prgrp,
- struct kernfs_node **mon_data_kn);
-
-static void rdt_disable_ctx(void)
-{
- resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
- resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
- set_mba_sc(false);
-
- resctrl_debug = false;
-}
-
-static int rdt_enable_ctx(struct rdt_fs_context *ctx)
-{
- int ret = 0;
-
- if (ctx->enable_cdpl2) {
- ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
- if (ret)
- goto out_done;
- }
-
- if (ctx->enable_cdpl3) {
- ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
- if (ret)
- goto out_cdpl2;
- }
-
- if (ctx->enable_mba_mbps) {
- ret = set_mba_sc(true);
- if (ret)
- goto out_cdpl3;
- }
-
- if (ctx->enable_debug)
- resctrl_debug = true;
-
- return 0;
-
-out_cdpl3:
- resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
-out_cdpl2:
- resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
-out_done:
- return ret;
-}
-
-static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
-{
- struct resctrl_schema *s;
- const char *suffix = "";
- int ret, cl;
-
- s = kzalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
-
- s->res = r;
- s->num_closid = resctrl_arch_get_num_closid(r);
- if (resctrl_arch_get_cdp_enabled(r->rid))
- s->num_closid /= 2;
-
- s->conf_type = type;
- switch (type) {
- case CDP_CODE:
- suffix = "CODE";
- break;
- case CDP_DATA:
- suffix = "DATA";
- break;
- case CDP_NONE:
- suffix = "";
- break;
- }
-
- ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
- if (ret >= sizeof(s->name)) {
- kfree(s);
- return -EINVAL;
- }
-
- cl = strlen(s->name);
-
- /*
- * If CDP is supported by this resource, but not enabled,
- * include the suffix. This ensures the tabular format of the
- * schemata file does not change between mounts of the filesystem.
- */
- if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
- cl += 4;
-
- if (cl > max_name_width)
- max_name_width = cl;
-
- switch (r->schema_fmt) {
- case RESCTRL_SCHEMA_BITMAP:
- s->fmt_str = "%d=%x";
- break;
- case RESCTRL_SCHEMA_RANGE:
- s->fmt_str = "%d=%u";
- break;
- }
-
- if (WARN_ON_ONCE(!s->fmt_str)) {
- kfree(s);
- return -EINVAL;
- }
-
- INIT_LIST_HEAD(&s->list);
- list_add(&s->list, &resctrl_schema_all);
-
- return 0;
-}
-
-static int schemata_list_create(void)
-{
- struct rdt_resource *r;
- int ret = 0;
-
- for_each_alloc_capable_rdt_resource(r) {
- if (resctrl_arch_get_cdp_enabled(r->rid)) {
- ret = schemata_list_add(r, CDP_CODE);
- if (ret)
- break;
-
- ret = schemata_list_add(r, CDP_DATA);
- } else {
- ret = schemata_list_add(r, CDP_NONE);
- }
-
- if (ret)
- break;
- }
-
- return ret;
-}
-
-static void schemata_list_destroy(void)
-{
- struct resctrl_schema *s, *tmp;
-
- list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
- list_del(&s->list);
- kfree(s);
- }
-}
-
-static int rdt_get_tree(struct fs_context *fc)
-{
- struct rdt_fs_context *ctx = rdt_fc2context(fc);
- unsigned long flags = RFTYPE_CTRL_BASE;
- struct rdt_mon_domain *dom;
- struct rdt_resource *r;
- int ret;
-
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
- /*
- * resctrl file system can only be mounted once.
- */
- if (resctrl_mounted) {
- ret = -EBUSY;
- goto out;
- }
-
- ret = rdtgroup_setup_root(ctx);
- if (ret)
- goto out;
-
- ret = rdt_enable_ctx(ctx);
- if (ret)
- goto out_root;
-
- ret = schemata_list_create();
- if (ret) {
- schemata_list_destroy();
- goto out_ctx;
- }
-
- closid_init();
-
- if (resctrl_arch_mon_capable())
- flags |= RFTYPE_MON;
-
- ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
- if (ret)
- goto out_schemata_free;
-
- kernfs_activate(rdtgroup_default.kn);
-
- ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
- if (ret < 0)
- goto out_schemata_free;
-
- if (resctrl_arch_mon_capable()) {
- ret = mongroup_create_dir(rdtgroup_default.kn,
- &rdtgroup_default, "mon_groups",
- &kn_mongrp);
- if (ret < 0)
- goto out_info;
-
- ret = mkdir_mondata_all(rdtgroup_default.kn,
- &rdtgroup_default, &kn_mondata);
- if (ret < 0)
- goto out_mongrp;
- rdtgroup_default.mon.mon_data_kn = kn_mondata;
- }
-
- ret = rdt_pseudo_lock_init();
- if (ret)
- goto out_mondata;
-
- ret = kernfs_get_tree(fc);
- if (ret < 0)
- goto out_psl;
-
- if (resctrl_arch_alloc_capable())
- resctrl_arch_enable_alloc();
- if (resctrl_arch_mon_capable())
- resctrl_arch_enable_mon();
-
- if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
- resctrl_mounted = true;
-
- if (resctrl_is_mbm_enabled()) {
- r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- list_for_each_entry(dom, &r->mon_domains, hdr.list)
- mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
- RESCTRL_PICK_ANY_CPU);
- }
-
- goto out;
-
-out_psl:
- rdt_pseudo_lock_release();
-out_mondata:
- if (resctrl_arch_mon_capable())
- kernfs_remove(kn_mondata);
-out_mongrp:
- if (resctrl_arch_mon_capable())
- kernfs_remove(kn_mongrp);
-out_info:
- kernfs_remove(kn_info);
-out_schemata_free:
- schemata_list_destroy();
-out_ctx:
- rdt_disable_ctx();
-out_root:
- rdtgroup_destroy_root();
-out:
- rdt_last_cmd_clear();
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
- return ret;
-}
-
-enum rdt_param {
- Opt_cdp,
- Opt_cdpl2,
- Opt_mba_mbps,
- Opt_debug,
- nr__rdt_params
-};
-
-static const struct fs_parameter_spec rdt_fs_parameters[] = {
- fsparam_flag("cdp", Opt_cdp),
- fsparam_flag("cdpl2", Opt_cdpl2),
- fsparam_flag("mba_MBps", Opt_mba_mbps),
- fsparam_flag("debug", Opt_debug),
- {}
-};
-
-static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
-{
- struct rdt_fs_context *ctx = rdt_fc2context(fc);
- struct fs_parse_result result;
- const char *msg;
- int opt;
-
- opt = fs_parse(fc, rdt_fs_parameters, param, &result);
- if (opt < 0)
- return opt;
-
- switch (opt) {
- case Opt_cdp:
- ctx->enable_cdpl3 = true;
- return 0;
- case Opt_cdpl2:
- ctx->enable_cdpl2 = true;
- return 0;
- case Opt_mba_mbps:
- msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
- if (!supports_mba_mbps())
- return invalfc(fc, msg);
- ctx->enable_mba_mbps = true;
- return 0;
- case Opt_debug:
- ctx->enable_debug = true;
- return 0;
- }
-
- return -EINVAL;
-}
-
-static void rdt_fs_context_free(struct fs_context *fc)
+bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
{
- struct rdt_fs_context *ctx = rdt_fc2context(fc);
-
- kernfs_free_fs_context(fc);
- kfree(ctx);
-}
-
-static const struct fs_context_operations rdt_fs_context_ops = {
- .free = rdt_fs_context_free,
- .parse_param = rdt_parse_param,
- .get_tree = rdt_get_tree,
-};
-
-static int rdt_init_fs_context(struct fs_context *fc)
-{
- struct rdt_fs_context *ctx;
-
- ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
-
- ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
- fc->fs_private = &ctx->kfc;
- fc->ops = &rdt_fs_context_ops;
- put_user_ns(fc->user_ns);
- fc->user_ns = get_user_ns(&init_user_ns);
- fc->global = true;
- return 0;
+ return rdt_resources_all[l].cdp_enabled;
}
void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
@@ -2952,1460 +260,3 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
return;
}
-
-/*
- * Move tasks from one to the other group. If @from is NULL, then all tasks
- * in the systems are moved unconditionally (used for teardown).
- *
- * If @mask is not NULL the cpus on which moved tasks are running are set
- * in that mask so the update smp function call is restricted to affected
- * cpus.
- */
-static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
- struct cpumask *mask)
-{
- struct task_struct *p, *t;
-
- read_lock(&tasklist_lock);
- for_each_process_thread(p, t) {
- if (!from || is_closid_match(t, from) ||
- is_rmid_match(t, from)) {
- resctrl_arch_set_closid_rmid(t, to->closid,
- to->mon.rmid);
-
- /*
- * Order the closid/rmid stores above before the loads
- * in task_curr(). This pairs with the full barrier
- * between the rq->curr update and resctrl_sched_in()
- * during context switch.
- */
- smp_mb();
-
- /*
- * If the task is on a CPU, set the CPU in the mask.
- * The detection is inaccurate as tasks might move or
- * schedule before the smp function call takes place.
- * In such a case the function call is pointless, but
- * there is no other side effect.
- */
- if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
- cpumask_set_cpu(task_cpu(t), mask);
- }
- }
- read_unlock(&tasklist_lock);
-}
-
-static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
-{
- struct rdtgroup *sentry, *stmp;
- struct list_head *head;
-
- head = &rdtgrp->mon.crdtgrp_list;
- list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
- free_rmid(sentry->closid, sentry->mon.rmid);
- list_del(&sentry->mon.crdtgrp_list);
-
- if (atomic_read(&sentry->waitcount) != 0)
- sentry->flags = RDT_DELETED;
- else
- rdtgroup_remove(sentry);
- }
-}
-
-/*
- * Forcibly remove all of subdirectories under root.
- */
-static void rmdir_all_sub(void)
-{
- struct rdtgroup *rdtgrp, *tmp;
-
- /* Move all tasks to the default resource group */
- rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
-
- list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
- /* Free any child rmids */
- free_all_child_rdtgrp(rdtgrp);
-
- /* Remove each rdtgroup other than root */
- if (rdtgrp == &rdtgroup_default)
- continue;
-
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
- rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
- rdtgroup_pseudo_lock_remove(rdtgrp);
-
- /*
- * Give any CPUs back to the default group. We cannot copy
- * cpu_online_mask because a CPU might have executed the
- * offline callback already, but is still marked online.
- */
- cpumask_or(&rdtgroup_default.cpu_mask,
- &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
-
- free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
-
- kernfs_remove(rdtgrp->kn);
- list_del(&rdtgrp->rdtgroup_list);
-
- if (atomic_read(&rdtgrp->waitcount) != 0)
- rdtgrp->flags = RDT_DELETED;
- else
- rdtgroup_remove(rdtgrp);
- }
- /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
- update_closid_rmid(cpu_online_mask, &rdtgroup_default);
-
- kernfs_remove(kn_info);
- kernfs_remove(kn_mongrp);
- kernfs_remove(kn_mondata);
-}
-
-static void rdt_kill_sb(struct super_block *sb)
-{
- struct rdt_resource *r;
-
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
-
- rdt_disable_ctx();
-
- /* Put everything back to default values. */
- for_each_alloc_capable_rdt_resource(r)
- resctrl_arch_reset_all_ctrls(r);
-
- rmdir_all_sub();
- rdt_pseudo_lock_release();
- rdtgroup_default.mode = RDT_MODE_SHAREABLE;
- schemata_list_destroy();
- rdtgroup_destroy_root();
- if (resctrl_arch_alloc_capable())
- resctrl_arch_disable_alloc();
- if (resctrl_arch_mon_capable())
- resctrl_arch_disable_mon();
- resctrl_mounted = false;
- kernfs_kill_sb(sb);
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
-}
-
-static struct file_system_type rdt_fs_type = {
- .name = "resctrl",
- .init_fs_context = rdt_init_fs_context,
- .parameters = rdt_fs_parameters,
- .kill_sb = rdt_kill_sb,
-};
-
-static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
- void *priv)
-{
- struct kernfs_node *kn;
- int ret = 0;
-
- kn = __kernfs_create_file(parent_kn, name, 0444,
- GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
- &kf_mondata_ops, priv, NULL, NULL);
- if (IS_ERR(kn))
- return PTR_ERR(kn);
-
- ret = rdtgroup_kn_set_ugid(kn);
- if (ret) {
- kernfs_remove(kn);
- return ret;
- }
-
- return ret;
-}
-
-static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname)
-{
- struct kernfs_node *kn;
-
- kn = kernfs_find_and_get(pkn, name);
- if (!kn)
- return;
- kernfs_put(kn);
-
- if (kn->dir.subdirs <= 1)
- kernfs_remove(kn);
- else
- kernfs_remove_by_name(kn, subname);
-}
-
-/*
- * Remove all subdirectories of mon_data of ctrl_mon groups
- * and monitor groups for the given domain.
- * Remove files and directories containing "sum" of domain data
- * when last domain being summed is removed.
- */
-static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
- struct rdt_mon_domain *d)
-{
- struct rdtgroup *prgrp, *crgrp;
- char subname[32];
- bool snc_mode;
- char name[32];
-
- snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
- if (snc_mode)
- sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
-
- list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
-
- list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
- mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname);
- }
-}
-
-static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
- struct rdt_resource *r, struct rdtgroup *prgrp,
- bool do_sum)
-{
- struct rmid_read rr = {0};
- union mon_data_bits priv;
- struct mon_evt *mevt;
- int ret;
-
- if (WARN_ON(list_empty(&r->evt_list)))
- return -EPERM;
-
- priv.u.rid = r->rid;
- priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
- priv.u.sum = do_sum;
- list_for_each_entry(mevt, &r->evt_list, list) {
- priv.u.evtid = mevt->evtid;
- ret = mon_addfile(kn, mevt->name, priv.priv);
- if (ret)
- return ret;
-
- if (!do_sum && resctrl_is_mbm_event(mevt->evtid))
- mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
- }
-
- return 0;
-}
-
-static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
- struct rdt_mon_domain *d,
- struct rdt_resource *r, struct rdtgroup *prgrp)
-{
- struct kernfs_node *kn, *ckn;
- char name[32];
- bool snc_mode;
- int ret = 0;
-
- lockdep_assert_held(&rdtgroup_mutex);
-
- snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
- kn = kernfs_find_and_get(parent_kn, name);
- if (kn) {
- /*
- * rdtgroup_mutex will prevent this directory from being
- * removed. No need to keep this hold.
- */
- kernfs_put(kn);
- } else {
- kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
- if (IS_ERR(kn))
- return PTR_ERR(kn);
-
- ret = rdtgroup_kn_set_ugid(kn);
- if (ret)
- goto out_destroy;
- ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
- if (ret)
- goto out_destroy;
- }
-
- if (snc_mode) {
- sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
- ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
- if (IS_ERR(ckn)) {
- ret = -EINVAL;
- goto out_destroy;
- }
-
- ret = rdtgroup_kn_set_ugid(ckn);
- if (ret)
- goto out_destroy;
-
- ret = mon_add_all_files(ckn, d, r, prgrp, false);
- if (ret)
- goto out_destroy;
- }
-
- kernfs_activate(kn);
- return 0;
-
-out_destroy:
- kernfs_remove(kn);
- return ret;
-}
-
-/*
- * Add all subdirectories of mon_data for "ctrl_mon" groups
- * and "monitor" groups with given domain id.
- */
-static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
- struct rdt_mon_domain *d)
-{
- struct kernfs_node *parent_kn;
- struct rdtgroup *prgrp, *crgrp;
- struct list_head *head;
-
- list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- parent_kn = prgrp->mon.mon_data_kn;
- mkdir_mondata_subdir(parent_kn, d, r, prgrp);
-
- head = &prgrp->mon.crdtgrp_list;
- list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
- parent_kn = crgrp->mon.mon_data_kn;
- mkdir_mondata_subdir(parent_kn, d, r, crgrp);
- }
- }
-}
-
-static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
- struct rdt_resource *r,
- struct rdtgroup *prgrp)
-{
- struct rdt_mon_domain *dom;
- int ret;
-
- /* Walking r->domains, ensure it can't race with cpuhp */
- lockdep_assert_cpus_held();
-
- list_for_each_entry(dom, &r->mon_domains, hdr.list) {
- ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-/*
- * This creates a directory mon_data which contains the monitored data.
- *
- * mon_data has one directory for each domain which are named
- * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
- * with L3 domain looks as below:
- * ./mon_data:
- * mon_L3_00
- * mon_L3_01
- * mon_L3_02
- * ...
- *
- * Each domain directory has one file per event:
- * ./mon_L3_00/:
- * llc_occupancy
- *
- */
-static int mkdir_mondata_all(struct kernfs_node *parent_kn,
- struct rdtgroup *prgrp,
- struct kernfs_node **dest_kn)
-{
- struct rdt_resource *r;
- struct kernfs_node *kn;
- int ret;
-
- /*
- * Create the mon_data directory first.
- */
- ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
- if (ret)
- return ret;
-
- if (dest_kn)
- *dest_kn = kn;
-
- /*
- * Create the subdirectories for each domain. Note that all events
- * in a domain like L3 are grouped into a resource whose domain is L3
- */
- for_each_mon_capable_rdt_resource(r) {
- ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
- if (ret)
- goto out_destroy;
- }
-
- return 0;
-
-out_destroy:
- kernfs_remove(kn);
- return ret;
-}
-
-/**
- * cbm_ensure_valid - Enforce validity on provided CBM
- * @_val: Candidate CBM
- * @r: RDT resource to which the CBM belongs
- *
- * The provided CBM represents all cache portions available for use. This
- * may be represented by a bitmap that does not consist of contiguous ones
- * and thus be an invalid CBM.
- * Here the provided CBM is forced to be a valid CBM by only considering
- * the first set of contiguous bits as valid and clearing all bits.
- * The intention here is to provide a valid default CBM with which a new
- * resource group is initialized. The user can follow this with a
- * modification to the CBM if the default does not satisfy the
- * requirements.
- */
-static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
-{
- unsigned int cbm_len = r->cache.cbm_len;
- unsigned long first_bit, zero_bit;
- unsigned long val = _val;
-
- if (!val)
- return 0;
-
- first_bit = find_first_bit(&val, cbm_len);
- zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
-
- /* Clear any remaining bits to ensure contiguous region */
- bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
- return (u32)val;
-}
-
-/*
- * Initialize cache resources per RDT domain
- *
- * Set the RDT domain up to start off with all usable allocations. That is,
- * all shareable and unused bits. All-zero CBM is invalid.
- */
-static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
- u32 closid)
-{
- enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
- enum resctrl_conf_type t = s->conf_type;
- struct resctrl_staged_config *cfg;
- struct rdt_resource *r = s->res;
- u32 used_b = 0, unused_b = 0;
- unsigned long tmp_cbm;
- enum rdtgrp_mode mode;
- u32 peer_ctl, ctrl_val;
- int i;
-
- cfg = &d->staged_config[t];
- cfg->have_new_ctrl = false;
- cfg->new_ctrl = r->cache.shareable_bits;
- used_b = r->cache.shareable_bits;
- for (i = 0; i < closids_supported(); i++) {
- if (closid_allocated(i) && i != closid) {
- mode = rdtgroup_mode_by_closid(i);
- if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
- /*
- * ctrl values for locksetup aren't relevant
- * until the schemata is written, and the mode
- * becomes RDT_MODE_PSEUDO_LOCKED.
- */
- continue;
- /*
- * If CDP is active include peer domain's
- * usage to ensure there is no overlap
- * with an exclusive group.
- */
- if (resctrl_arch_get_cdp_enabled(r->rid))
- peer_ctl = resctrl_arch_get_config(r, d, i,
- peer_type);
- else
- peer_ctl = 0;
- ctrl_val = resctrl_arch_get_config(r, d, i,
- s->conf_type);
- used_b |= ctrl_val | peer_ctl;
- if (mode == RDT_MODE_SHAREABLE)
- cfg->new_ctrl |= ctrl_val | peer_ctl;
- }
- }
- if (d->plr && d->plr->cbm > 0)
- used_b |= d->plr->cbm;
- unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
- unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
- cfg->new_ctrl |= unused_b;
- /*
- * Force the initial CBM to be valid, user can
- * modify the CBM based on system availability.
- */
- cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
- /*
- * Assign the u32 CBM to an unsigned long to ensure that
- * bitmap_weight() does not access out-of-bound memory.
- */
- tmp_cbm = cfg->new_ctrl;
- if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
- return -ENOSPC;
- }
- cfg->have_new_ctrl = true;
-
- return 0;
-}
-
-/*
- * Initialize cache resources with default values.
- *
- * A new RDT group is being created on an allocation capable (CAT)
- * supporting system. Set this group up to start off with all usable
- * allocations.
- *
- * If there are no more shareable bits available on any domain then
- * the entire allocation will fail.
- */
-static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
-{
- struct rdt_ctrl_domain *d;
- int ret;
-
- list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
- ret = __init_one_rdt_domain(d, s, closid);
- if (ret < 0)
- return ret;
- }
-
- return 0;
-}
-
-/* Initialize MBA resource with default values. */
-static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
-{
- struct resctrl_staged_config *cfg;
- struct rdt_ctrl_domain *d;
-
- list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
- if (is_mba_sc(r)) {
- d->mbps_val[closid] = MBA_MAX_MBPS;
- continue;
- }
-
- cfg = &d->staged_config[CDP_NONE];
- cfg->new_ctrl = resctrl_get_default_ctrl(r);
- cfg->have_new_ctrl = true;
- }
-}
-
-/* Initialize the RDT group's allocations. */
-static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
-{
- struct resctrl_schema *s;
- struct rdt_resource *r;
- int ret = 0;
-
- rdt_staged_configs_clear();
-
- list_for_each_entry(s, &resctrl_schema_all, list) {
- r = s->res;
- if (r->rid == RDT_RESOURCE_MBA ||
- r->rid == RDT_RESOURCE_SMBA) {
- rdtgroup_init_mba(r, rdtgrp->closid);
- if (is_mba_sc(r))
- continue;
- } else {
- ret = rdtgroup_init_cat(s, rdtgrp->closid);
- if (ret < 0)
- goto out;
- }
-
- ret = resctrl_arch_update_domains(r, rdtgrp->closid);
- if (ret < 0) {
- rdt_last_cmd_puts("Failed to initialize allocations\n");
- goto out;
- }
-
- }
-
- rdtgrp->mode = RDT_MODE_SHAREABLE;
-
-out:
- rdt_staged_configs_clear();
- return ret;
-}
-
-static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
-{
- int ret;
-
- if (!resctrl_arch_mon_capable())
- return 0;
-
- ret = alloc_rmid(rdtgrp->closid);
- if (ret < 0) {
- rdt_last_cmd_puts("Out of RMIDs\n");
- return ret;
- }
- rdtgrp->mon.rmid = ret;
-
- ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
- if (ret) {
- rdt_last_cmd_puts("kernfs subdir error\n");
- free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
- return ret;
- }
-
- return 0;
-}
-
-static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
-{
- if (resctrl_arch_mon_capable())
- free_rmid(rgrp->closid, rgrp->mon.rmid);
-}
-
-/*
- * We allow creating mon groups only with in a directory called "mon_groups"
- * which is present in every ctrl_mon group. Check if this is a valid
- * "mon_groups" directory.
- *
- * 1. The directory should be named "mon_groups".
- * 2. The mon group itself should "not" be named "mon_groups".
- * This makes sure "mon_groups" directory always has a ctrl_mon group
- * as parent.
- */
-static bool is_mon_groups(struct kernfs_node *kn, const char *name)
-{
- return (!strcmp(rdt_kn_name(kn), "mon_groups") &&
- strcmp(name, "mon_groups"));
-}
-
-static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
- const char *name, umode_t mode,
- enum rdt_group_type rtype, struct rdtgroup **r)
-{
- struct rdtgroup *prdtgrp, *rdtgrp;
- unsigned long files = 0;
- struct kernfs_node *kn;
- int ret;
-
- prdtgrp = rdtgroup_kn_lock_live(parent_kn);
- if (!prdtgrp) {
- ret = -ENODEV;
- goto out_unlock;
- }
-
- /*
- * Check that the parent directory for a monitor group is a "mon_groups"
- * directory.
- */
- if (rtype == RDTMON_GROUP && !is_mon_groups(parent_kn, name)) {
- ret = -EPERM;
- goto out_unlock;
- }
-
- if (rtype == RDTMON_GROUP &&
- (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
- prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
- ret = -EINVAL;
- rdt_last_cmd_puts("Pseudo-locking in progress\n");
- goto out_unlock;
- }
-
- /* allocate the rdtgroup. */
- rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
- if (!rdtgrp) {
- ret = -ENOSPC;
- rdt_last_cmd_puts("Kernel out of memory\n");
- goto out_unlock;
- }
- *r = rdtgrp;
- rdtgrp->mon.parent = prdtgrp;
- rdtgrp->type = rtype;
- INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
-
- /* kernfs creates the directory for rdtgrp */
- kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
- if (IS_ERR(kn)) {
- ret = PTR_ERR(kn);
- rdt_last_cmd_puts("kernfs create error\n");
- goto out_free_rgrp;
- }
- rdtgrp->kn = kn;
-
- /*
- * kernfs_remove() will drop the reference count on "kn" which
- * will free it. But we still need it to stick around for the
- * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
- * which will be dropped by kernfs_put() in rdtgroup_remove().
- */
- kernfs_get(kn);
-
- ret = rdtgroup_kn_set_ugid(kn);
- if (ret) {
- rdt_last_cmd_puts("kernfs perm error\n");
- goto out_destroy;
- }
-
- if (rtype == RDTCTRL_GROUP) {
- files = RFTYPE_BASE | RFTYPE_CTRL;
- if (resctrl_arch_mon_capable())
- files |= RFTYPE_MON;
- } else {
- files = RFTYPE_BASE | RFTYPE_MON;
- }
-
- ret = rdtgroup_add_files(kn, files);
- if (ret) {
- rdt_last_cmd_puts("kernfs fill error\n");
- goto out_destroy;
- }
-
- /*
- * The caller unlocks the parent_kn upon success.
- */
- return 0;
-
-out_destroy:
- kernfs_put(rdtgrp->kn);
- kernfs_remove(rdtgrp->kn);
-out_free_rgrp:
- kfree(rdtgrp);
-out_unlock:
- rdtgroup_kn_unlock(parent_kn);
- return ret;
-}
-
-static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
-{
- kernfs_remove(rgrp->kn);
- rdtgroup_remove(rgrp);
-}
-
-/*
- * Create a monitor group under "mon_groups" directory of a control
- * and monitor group(ctrl_mon). This is a resource group
- * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
- */
-static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
- const char *name, umode_t mode)
-{
- struct rdtgroup *rdtgrp, *prgrp;
- int ret;
-
- ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
- if (ret)
- return ret;
-
- prgrp = rdtgrp->mon.parent;
- rdtgrp->closid = prgrp->closid;
-
- ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
- if (ret) {
- mkdir_rdt_prepare_clean(rdtgrp);
- goto out_unlock;
- }
-
- kernfs_activate(rdtgrp->kn);
-
- /*
- * Add the rdtgrp to the list of rdtgrps the parent
- * ctrl_mon group has to track.
- */
- list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
-
-out_unlock:
- rdtgroup_kn_unlock(parent_kn);
- return ret;
-}
-
-/*
- * These are rdtgroups created under the root directory. Can be used
- * to allocate and monitor resources.
- */
-static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
- const char *name, umode_t mode)
-{
- struct rdtgroup *rdtgrp;
- struct kernfs_node *kn;
- u32 closid;
- int ret;
-
- ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
- if (ret)
- return ret;
-
- kn = rdtgrp->kn;
- ret = closid_alloc();
- if (ret < 0) {
- rdt_last_cmd_puts("Out of CLOSIDs\n");
- goto out_common_fail;
- }
- closid = ret;
- ret = 0;
-
- rdtgrp->closid = closid;
-
- ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
- if (ret)
- goto out_closid_free;
-
- kernfs_activate(rdtgrp->kn);
-
- ret = rdtgroup_init_alloc(rdtgrp);
- if (ret < 0)
- goto out_rmid_free;
-
- list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
-
- if (resctrl_arch_mon_capable()) {
- /*
- * Create an empty mon_groups directory to hold the subset
- * of tasks and cpus to monitor.
- */
- ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
- if (ret) {
- rdt_last_cmd_puts("kernfs subdir error\n");
- goto out_del_list;
- }
- if (is_mba_sc(NULL))
- rdtgrp->mba_mbps_event = mba_mbps_default_event;
- }
-
- goto out_unlock;
-
-out_del_list:
- list_del(&rdtgrp->rdtgroup_list);
-out_rmid_free:
- mkdir_rdt_prepare_rmid_free(rdtgrp);
-out_closid_free:
- closid_free(closid);
-out_common_fail:
- mkdir_rdt_prepare_clean(rdtgrp);
-out_unlock:
- rdtgroup_kn_unlock(parent_kn);
- return ret;
-}
-
-static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
- umode_t mode)
-{
- /* Do not accept '\n' to avoid unparsable situation. */
- if (strchr(name, '\n'))
- return -EINVAL;
-
- /*
- * If the parent directory is the root directory and RDT
- * allocation is supported, add a control and monitoring
- * subdirectory
- */
- if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
- return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
-
- /* Else, attempt to add a monitoring subdirectory. */
- if (resctrl_arch_mon_capable())
- return rdtgroup_mkdir_mon(parent_kn, name, mode);
-
- return -EPERM;
-}
-
-static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
-{
- struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
- u32 closid, rmid;
- int cpu;
-
- /* Give any tasks back to the parent group */
- rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
-
- /*
- * Update per cpu closid/rmid of the moved CPUs first.
- * Note: the closid will not change, but the arch code still needs it.
- */
- closid = prdtgrp->closid;
- rmid = prdtgrp->mon.rmid;
- for_each_cpu(cpu, &rdtgrp->cpu_mask)
- resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
-
- /*
- * Update the MSR on moved CPUs and CPUs which have moved
- * task running on them.
- */
- cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
- update_closid_rmid(tmpmask, NULL);
-
- rdtgrp->flags = RDT_DELETED;
- free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
-
- /*
- * Remove the rdtgrp from the parent ctrl_mon group's list
- */
- WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
- list_del(&rdtgrp->mon.crdtgrp_list);
-
- kernfs_remove(rdtgrp->kn);
-
- return 0;
-}
-
-static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
-{
- rdtgrp->flags = RDT_DELETED;
- list_del(&rdtgrp->rdtgroup_list);
-
- kernfs_remove(rdtgrp->kn);
- return 0;
-}
-
-static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
-{
- u32 closid, rmid;
- int cpu;
-
- /* Give any tasks back to the default group */
- rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
-
- /* Give any CPUs back to the default group */
- cpumask_or(&rdtgroup_default.cpu_mask,
- &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
-
- /* Update per cpu closid and rmid of the moved CPUs first */
- closid = rdtgroup_default.closid;
- rmid = rdtgroup_default.mon.rmid;
- for_each_cpu(cpu, &rdtgrp->cpu_mask)
- resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
-
- /*
- * Update the MSR on moved CPUs and CPUs which have moved
- * task running on them.
- */
- cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
- update_closid_rmid(tmpmask, NULL);
-
- free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
- closid_free(rdtgrp->closid);
-
- rdtgroup_ctrl_remove(rdtgrp);
-
- /*
- * Free all the child monitor group rmids.
- */
- free_all_child_rdtgrp(rdtgrp);
-
- return 0;
-}
-
-static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn)
-{
- /*
- * Valid within the RCU section it was obtained or while rdtgroup_mutex
- * is held.
- */
- return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex));
-}
-
-static int rdtgroup_rmdir(struct kernfs_node *kn)
-{
- struct kernfs_node *parent_kn;
- struct rdtgroup *rdtgrp;
- cpumask_var_t tmpmask;
- int ret = 0;
-
- if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
- return -ENOMEM;
-
- rdtgrp = rdtgroup_kn_lock_live(kn);
- if (!rdtgrp) {
- ret = -EPERM;
- goto out;
- }
- parent_kn = rdt_kn_parent(kn);
-
- /*
- * If the rdtgroup is a ctrl_mon group and parent directory
- * is the root directory, remove the ctrl_mon group.
- *
- * If the rdtgroup is a mon group and parent directory
- * is a valid "mon_groups" directory, remove the mon group.
- */
- if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
- rdtgrp != &rdtgroup_default) {
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
- rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- ret = rdtgroup_ctrl_remove(rdtgrp);
- } else {
- ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
- }
- } else if (rdtgrp->type == RDTMON_GROUP &&
- is_mon_groups(parent_kn, rdt_kn_name(kn))) {
- ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
- } else {
- ret = -EPERM;
- }
-
-out:
- rdtgroup_kn_unlock(kn);
- free_cpumask_var(tmpmask);
- return ret;
-}
-
-/**
- * mongrp_reparent() - replace parent CTRL_MON group of a MON group
- * @rdtgrp: the MON group whose parent should be replaced
- * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp
- * @cpus: cpumask provided by the caller for use during this call
- *
- * Replaces the parent CTRL_MON group for a MON group, resulting in all member
- * tasks' CLOSID immediately changing to that of the new parent group.
- * Monitoring data for the group is unaffected by this operation.
- */
-static void mongrp_reparent(struct rdtgroup *rdtgrp,
- struct rdtgroup *new_prdtgrp,
- cpumask_var_t cpus)
-{
- struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
-
- WARN_ON(rdtgrp->type != RDTMON_GROUP);
- WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
-
- /* Nothing to do when simply renaming a MON group. */
- if (prdtgrp == new_prdtgrp)
- return;
-
- WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
- list_move_tail(&rdtgrp->mon.crdtgrp_list,
- &new_prdtgrp->mon.crdtgrp_list);
-
- rdtgrp->mon.parent = new_prdtgrp;
- rdtgrp->closid = new_prdtgrp->closid;
-
- /* Propagate updated closid to all tasks in this group. */
- rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
-
- update_closid_rmid(cpus, NULL);
-}
-
-static int rdtgroup_rename(struct kernfs_node *kn,
- struct kernfs_node *new_parent, const char *new_name)
-{
- struct kernfs_node *kn_parent;
- struct rdtgroup *new_prdtgrp;
- struct rdtgroup *rdtgrp;
- cpumask_var_t tmpmask;
- int ret;
-
- rdtgrp = kernfs_to_rdtgroup(kn);
- new_prdtgrp = kernfs_to_rdtgroup(new_parent);
- if (!rdtgrp || !new_prdtgrp)
- return -ENOENT;
-
- /* Release both kernfs active_refs before obtaining rdtgroup mutex. */
- rdtgroup_kn_get(rdtgrp, kn);
- rdtgroup_kn_get(new_prdtgrp, new_parent);
-
- mutex_lock(&rdtgroup_mutex);
-
- rdt_last_cmd_clear();
-
- /*
- * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
- * either kernfs_node is a file.
- */
- if (kernfs_type(kn) != KERNFS_DIR ||
- kernfs_type(new_parent) != KERNFS_DIR) {
- rdt_last_cmd_puts("Source and destination must be directories");
- ret = -EPERM;
- goto out;
- }
-
- if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
- ret = -ENOENT;
- goto out;
- }
-
- kn_parent = rdt_kn_parent(kn);
- if (rdtgrp->type != RDTMON_GROUP || !kn_parent ||
- !is_mon_groups(kn_parent, rdt_kn_name(kn))) {
- rdt_last_cmd_puts("Source must be a MON group\n");
- ret = -EPERM;
- goto out;
- }
-
- if (!is_mon_groups(new_parent, new_name)) {
- rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
- ret = -EPERM;
- goto out;
- }
-
- /*
- * If the MON group is monitoring CPUs, the CPUs must be assigned to the
- * current parent CTRL_MON group and therefore cannot be assigned to
- * the new parent, making the move illegal.
- */
- if (!cpumask_empty(&rdtgrp->cpu_mask) &&
- rdtgrp->mon.parent != new_prdtgrp) {
- rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
- ret = -EPERM;
- goto out;
- }
-
- /*
- * Allocate the cpumask for use in mongrp_reparent() to avoid the
- * possibility of failing to allocate it after kernfs_rename() has
- * succeeded.
- */
- if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
- ret = -ENOMEM;
- goto out;
- }
-
- /*
- * Perform all input validation and allocations needed to ensure
- * mongrp_reparent() will succeed before calling kernfs_rename(),
- * otherwise it would be necessary to revert this call if
- * mongrp_reparent() failed.
- */
- ret = kernfs_rename(kn, new_parent, new_name);
- if (!ret)
- mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
-
- free_cpumask_var(tmpmask);
-
-out:
- mutex_unlock(&rdtgroup_mutex);
- rdtgroup_kn_put(rdtgrp, kn);
- rdtgroup_kn_put(new_prdtgrp, new_parent);
- return ret;
-}
-
-static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
-{
- if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
- seq_puts(seq, ",cdp");
-
- if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
- seq_puts(seq, ",cdpl2");
-
- if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA)))
- seq_puts(seq, ",mba_MBps");
-
- if (resctrl_debug)
- seq_puts(seq, ",debug");
-
- return 0;
-}
-
-static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
- .mkdir = rdtgroup_mkdir,
- .rmdir = rdtgroup_rmdir,
- .rename = rdtgroup_rename,
- .show_options = rdtgroup_show_options,
-};
-
-static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
-{
- rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
- KERNFS_ROOT_CREATE_DEACTIVATED |
- KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
- &rdtgroup_default);
- if (IS_ERR(rdt_root))
- return PTR_ERR(rdt_root);
-
- ctx->kfc.root = rdt_root;
- rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
-
- return 0;
-}
-
-static void rdtgroup_destroy_root(void)
-{
- kernfs_destroy_root(rdt_root);
- rdtgroup_default.kn = NULL;
-}
-
-static void __init rdtgroup_setup_default(void)
-{
- mutex_lock(&rdtgroup_mutex);
-
- rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
- rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
- rdtgroup_default.type = RDTCTRL_GROUP;
- INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
-
- list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
-
- mutex_unlock(&rdtgroup_mutex);
-}
-
-static void domain_destroy_mon_state(struct rdt_mon_domain *d)
-{
- bitmap_free(d->rmid_busy_llc);
- kfree(d->mbm_total);
- kfree(d->mbm_local);
-}
-
-void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
-{
- mutex_lock(&rdtgroup_mutex);
-
- if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
- mba_sc_domain_destroy(r, d);
-
- mutex_unlock(&rdtgroup_mutex);
-}
-
-void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
-{
- mutex_lock(&rdtgroup_mutex);
-
- /*
- * If resctrl is mounted, remove all the
- * per domain monitor data directories.
- */
- if (resctrl_mounted && resctrl_arch_mon_capable())
- rmdir_mondata_subdir_allrdtgrp(r, d);
-
- if (resctrl_is_mbm_enabled())
- cancel_delayed_work(&d->mbm_over);
- if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) {
- /*
- * When a package is going down, forcefully
- * decrement rmid->ebusy. There is no way to know
- * that the L3 was flushed and hence may lead to
- * incorrect counts in rare scenarios, but leaving
- * the RMID as busy creates RMID leaks if the
- * package never comes back.
- */
- __check_limbo(d, true);
- cancel_delayed_work(&d->cqm_limbo);
- }
-
- domain_destroy_mon_state(d);
-
- mutex_unlock(&rdtgroup_mutex);
-}
-
-/**
- * domain_setup_mon_state() - Initialise domain monitoring structures.
- * @r: The resource for the newly online domain.
- * @d: The newly online domain.
- *
- * Allocate monitor resources that belong to this domain.
- * Called when the first CPU of a domain comes online, regardless of whether
- * the filesystem is mounted.
- * During boot this may be called before global allocations have been made by
- * resctrl_mon_resource_init().
- *
- * Returns 0 for success, or -ENOMEM.
- */
-static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
-{
- u32 idx_limit = resctrl_arch_system_num_rmid_idx();
- size_t tsize;
-
- if (resctrl_arch_is_llc_occupancy_enabled()) {
- d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
- if (!d->rmid_busy_llc)
- return -ENOMEM;
- }
- if (resctrl_arch_is_mbm_total_enabled()) {
- tsize = sizeof(*d->mbm_total);
- d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
- if (!d->mbm_total) {
- bitmap_free(d->rmid_busy_llc);
- return -ENOMEM;
- }
- }
- if (resctrl_arch_is_mbm_local_enabled()) {
- tsize = sizeof(*d->mbm_local);
- d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
- if (!d->mbm_local) {
- bitmap_free(d->rmid_busy_llc);
- kfree(d->mbm_total);
- return -ENOMEM;
- }
- }
-
- return 0;
-}
-
-int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
-{
- int err = 0;
-
- mutex_lock(&rdtgroup_mutex);
-
- if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
- /* RDT_RESOURCE_MBA is never mon_capable */
- err = mba_sc_domain_allocate(r, d);
- }
-
- mutex_unlock(&rdtgroup_mutex);
-
- return err;
-}
-
-int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
-{
- int err;
-
- mutex_lock(&rdtgroup_mutex);
-
- err = domain_setup_mon_state(r, d);
- if (err)
- goto out_unlock;
-
- if (resctrl_is_mbm_enabled()) {
- INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
- mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
- RESCTRL_PICK_ANY_CPU);
- }
-
- if (resctrl_arch_is_llc_occupancy_enabled())
- INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
-
- /*
- * If the filesystem is not mounted then only the default resource group
- * exists. Creation of its directories is deferred until mount time
- * by rdt_get_tree() calling mkdir_mondata_all().
- * If resctrl is mounted, add per domain monitor data directories.
- */
- if (resctrl_mounted && resctrl_arch_mon_capable())
- mkdir_mondata_subdir_allrdtgrp(r, d);
-
-out_unlock:
- mutex_unlock(&rdtgroup_mutex);
-
- return err;
-}
-
-void resctrl_online_cpu(unsigned int cpu)
-{
- mutex_lock(&rdtgroup_mutex);
- /* The CPU is set in default rdtgroup after online. */
- cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
- mutex_unlock(&rdtgroup_mutex);
-}
-
-static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
-{
- struct rdtgroup *cr;
-
- list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
- if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
- break;
- }
-}
-
-static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu,
- struct rdt_resource *r)
-{
- struct rdt_mon_domain *d;
-
- lockdep_assert_cpus_held();
-
- list_for_each_entry(d, &r->mon_domains, hdr.list) {
- /* Find the domain that contains this CPU */
- if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
- return d;
- }
-
- return NULL;
-}
-
-void resctrl_offline_cpu(unsigned int cpu)
-{
- struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- struct rdt_mon_domain *d;
- struct rdtgroup *rdtgrp;
-
- mutex_lock(&rdtgroup_mutex);
- list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
- if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
- clear_childcpus(rdtgrp, cpu);
- break;
- }
- }
-
- if (!l3->mon_capable)
- goto out_unlock;
-
- d = get_mon_domain_from_cpu(cpu, l3);
- if (d) {
- if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) {
- cancel_delayed_work(&d->mbm_over);
- mbm_setup_overflow_handler(d, 0, cpu);
- }
- if (resctrl_arch_is_llc_occupancy_enabled() &&
- cpu == d->cqm_work_cpu && has_busy_rmid(d)) {
- cancel_delayed_work(&d->cqm_limbo);
- cqm_setup_limbo_handler(d, 0, cpu);
- }
- }
-
-out_unlock:
- mutex_unlock(&rdtgroup_mutex);
-}
-
-/*
- * resctrl_init - resctrl filesystem initialization
- *
- * Setup resctrl file system including set up root, create mount point,
- * register resctrl filesystem, and initialize files under root directory.
- *
- * Return: 0 on success or -errno
- */
-int __init resctrl_init(void)
-{
- int ret = 0;
-
- seq_buf_init(&last_cmd_status, last_cmd_status_buf,
- sizeof(last_cmd_status_buf));
-
- rdtgroup_setup_default();
-
- thread_throttle_mode_init();
-
- ret = resctrl_mon_resource_init();
- if (ret)
- return ret;
-
- ret = sysfs_create_mount_point(fs_kobj, "resctrl");
- if (ret) {
- resctrl_mon_resource_exit();
- return ret;
- }
-
- ret = register_filesystem(&rdt_fs_type);
- if (ret)
- goto cleanup_mountpoint;
-
- /*
- * Adding the resctrl debugfs directory here may not be ideal since
- * it would let the resctrl debugfs directory appear on the debugfs
- * filesystem before the resctrl filesystem is mounted.
- * It may also be ok since that would enable debugging of RDT before
- * resctrl is mounted.
- * The reason why the debugfs directory is created here and not in
- * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
- * during the debugfs directory creation also &sb->s_type->i_mutex_key
- * (the lockdep class of inode->i_rwsem). Other filesystem
- * interactions (eg. SyS_getdents) have the lock ordering:
- * &sb->s_type->i_mutex_key --> &mm->mmap_lock
- * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
- * is taken, thus creating dependency:
- * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
- * issues considering the other two lock dependencies.
- * By creating the debugfs directory here we avoid a dependency
- * that may cause deadlock (even though file operations cannot
- * occur until the filesystem is mounted, but I do not know how to
- * tell lockdep that).
- */
- debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
-
- return 0;
-
-cleanup_mountpoint:
- sysfs_remove_mount_point(fs_kobj, "resctrl");
- resctrl_mon_resource_exit();
-
- return ret;
-}
-
-void __exit resctrl_exit(void)
-{
- debugfs_remove_recursive(debugfs_resctrl);
- unregister_filesystem(&rdt_fs_type);
- sysfs_remove_mount_point(fs_kobj, "resctrl");
-
- resctrl_mon_resource_exit();
-}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 16f3ca30626a..dbf6d71bdf18 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
+ { X86_FEATURE_APX, CPUID_EDX, 21, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
{ X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
@@ -53,7 +54,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
- { X86_FEATURE_AMD_HETEROGENEOUS_CORES, CPUID_EAX, 30, 0x80000026, 0 },
+ { X86_FEATURE_AMD_HTR_CORES, CPUID_EAX, 30, 0x80000026, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 8ce352fc72ac..6722b2fc82cf 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/vmalloc.h>
+#include <asm/msr.h>
#include <asm/sgx.h>
#include "driver.h"
#include "encl.h"
@@ -871,7 +872,7 @@ void sgx_update_lepubkeyhash(u64 *lepubkeyhash)
WARN_ON_ONCE(preemptible());
for (i = 0; i < 4; i++)
- wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + i, lepubkeyhash[i]);
+ wrmsrq(MSR_IA32_SGXLEPUBKEYHASH0 + i, lepubkeyhash[i]);
}
const struct file_operations sgx_provision_fops = {
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 01456236a6dd..e35ccdc84910 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -30,6 +30,7 @@
#include <asm/hypervisor.h>
#include <asm/io_apic.h>
#include <asm/mpspec.h>
+#include <asm/msr.h>
#include <asm/smp.h>
#include "cpu.h"
@@ -154,7 +155,7 @@ static __init bool check_for_real_bsp(u32 apic_id)
* kernel must rely on the firmware enumeration order.
*/
if (has_apic_base) {
- rdmsrl(MSR_IA32_APICBASE, msr);
+ rdmsrq(MSR_IA32_APICBASE, msr);
is_bsp = !!(msr & MSR_IA32_APICBASE_BSP);
}
diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
index 03b3c9c3a45e..843b1655ab45 100644
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -3,6 +3,7 @@
#include <asm/apic.h>
#include <asm/memtype.h>
+#include <asm/msr.h>
#include <asm/processor.h>
#include "cpu.h"
@@ -133,7 +134,7 @@ static void parse_fam10h_node_id(struct topo_scan *tscan)
if (!boot_cpu_has(X86_FEATURE_NODEID_MSR))
return;
- rdmsrl(MSR_FAM10H_NODE_ID, nid.msr);
+ rdmsrq(MSR_FAM10H_NODE_ID, nid.msr);
store_node(tscan, nid.nodes_per_pkg + 1, nid.node_id);
tscan->c->topo.llc_id = nid.node_id;
}
@@ -160,7 +161,7 @@ static void topoext_fixup(struct topo_scan *tscan)
if (msr_set_bit(0xc0011005, 54) <= 0)
return;
- rdmsrl(0xc0011005, msrval);
+ rdmsrq(0xc0011005, msrval);
if (msrval & BIT_64(54)) {
set_cpu_cap(c, X86_FEATURE_TOPOEXT);
pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
@@ -182,7 +183,7 @@ static void parse_topology_amd(struct topo_scan *tscan)
if (cpu_feature_enabled(X86_FEATURE_TOPOEXT))
has_topoext = cpu_parse_topology_ext(tscan);
- if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES))
+ if (cpu_feature_enabled(X86_FEATURE_AMD_HTR_CORES))
tscan->c->topo.cpu_type = cpuid_ebx(0x80000026);
if (!has_topoext && !parse_8000_0008(tscan))
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
index b31ee4f1657a..49782724a943 100644
--- a/arch/x86/kernel/cpu/tsx.c
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -12,6 +12,7 @@
#include <asm/cmdline.h>
#include <asm/cpu.h>
+#include <asm/msr.h>
#include "cpu.h"
@@ -24,7 +25,7 @@ static void tsx_disable(void)
{
u64 tsx;
- rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+ rdmsrq(MSR_IA32_TSX_CTRL, tsx);
/* Force all transactions to immediately abort */
tsx |= TSX_CTRL_RTM_DISABLE;
@@ -37,14 +38,14 @@ static void tsx_disable(void)
*/
tsx |= TSX_CTRL_CPUID_CLEAR;
- wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+ wrmsrq(MSR_IA32_TSX_CTRL, tsx);
}
static void tsx_enable(void)
{
u64 tsx;
- rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+ rdmsrq(MSR_IA32_TSX_CTRL, tsx);
/* Enable the RTM feature in the cpu */
tsx &= ~TSX_CTRL_RTM_DISABLE;
@@ -56,7 +57,7 @@ static void tsx_enable(void)
*/
tsx &= ~TSX_CTRL_CPUID_CLEAR;
- wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+ wrmsrq(MSR_IA32_TSX_CTRL, tsx);
}
static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
@@ -115,13 +116,13 @@ static void tsx_clear_cpuid(void)
*/
if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) &&
boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
- rdmsrl(MSR_TSX_FORCE_ABORT, msr);
+ rdmsrq(MSR_TSX_FORCE_ABORT, msr);
msr |= MSR_TFA_TSX_CPUID_CLEAR;
- wrmsrl(MSR_TSX_FORCE_ABORT, msr);
+ wrmsrq(MSR_TSX_FORCE_ABORT, msr);
} else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) {
- rdmsrl(MSR_IA32_TSX_CTRL, msr);
+ rdmsrq(MSR_IA32_TSX_CTRL, msr);
msr |= TSX_CTRL_CPUID_CLEAR;
- wrmsrl(MSR_IA32_TSX_CTRL, msr);
+ wrmsrq(MSR_IA32_TSX_CTRL, msr);
}
}
@@ -146,11 +147,11 @@ static void tsx_dev_mode_disable(void)
!cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL))
return;
- rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl);
+ rdmsrq(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl);
if (mcu_opt_ctrl & RTM_ALLOW) {
mcu_opt_ctrl &= ~RTM_ALLOW;
- wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl);
+ wrmsrq(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl);
setup_force_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT);
}
}
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
index 2293efd6ffa6..933fcd7ff250 100644
--- a/arch/x86/kernel/cpu/umwait.c
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -33,7 +33,7 @@ static DEFINE_MUTEX(umwait_lock);
static void umwait_update_control_msr(void * unused)
{
lockdep_assert_irqs_disabled();
- wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0);
+ wrmsrq(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached));
}
/*
@@ -71,7 +71,7 @@ static int umwait_cpu_offline(unsigned int cpu)
* the original control MSR value in umwait_init(). So there
* is no race condition here.
*/
- wrmsr(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached, 0);
+ wrmsrq(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached);
return 0;
}
@@ -214,7 +214,7 @@ static int __init umwait_init(void)
* changed. This is the only place where orig_umwait_control_cached
* is modified.
*/
- rdmsrl(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached);
+ rdmsrq(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached);
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
umwait_cpu_online, umwait_cpu_offline);
diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c
index 90eba7eb5335..89b1c8a70fe8 100644
--- a/arch/x86/kernel/cpu/zhaoxin.c
+++ b/arch/x86/kernel/cpu/zhaoxin.c
@@ -4,6 +4,7 @@
#include <asm/cpu.h>
#include <asm/cpufeature.h>
+#include <asm/msr.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c6fefd4585f8..71ee20102a8a 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -23,8 +23,6 @@
#include <asm/stacktrace.h>
#include <asm/unwind.h>
-int panic_on_unrecovered_nmi;
-int panic_on_io_nmi;
static int die_counter;
static struct pt_regs exec_summary_regs;
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 3aad78bfcb26..cba75306e5b6 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/console.h>
#include <linux/kernel.h>
+#include <linux/kexec.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/screen_info.h>
@@ -144,6 +145,11 @@ static __init void early_serial_hw_init(unsigned divisor)
static_call(serial_out)(early_serial_base, DLL, divisor & 0xff);
static_call(serial_out)(early_serial_base, DLH, (divisor >> 8) & 0xff);
static_call(serial_out)(early_serial_base, LCR, c & ~DLAB);
+
+#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_X86_64)
+ if (static_call_query(serial_in) == io_serial_in)
+ kexec_debug_8250_port = early_serial_base;
+#endif
}
#define DEFAULT_BAUD 9600
@@ -327,6 +333,9 @@ static __init void early_pci_serial_init(char *s)
/* WARNING! assuming the address is always in the first 4G */
early_serial_base =
(unsigned long)early_ioremap(bar0 & PCI_BASE_ADDRESS_MEM_MASK, 0x10);
+#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_X86_64)
+ kexec_debug_8250_mmio32 = bar0 & PCI_BASE_ADDRESS_MEM_MASK;
+#endif
write_pci_config(bus, slot, func, PCI_COMMAND,
cmdreg|PCI_COMMAND_MEMORY);
}
diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h
index f6d856bd50bc..10d0a720659c 100644
--- a/arch/x86/kernel/fpu/context.h
+++ b/arch/x86/kernel/fpu/context.h
@@ -53,7 +53,7 @@ static inline void fpregs_activate(struct fpu *fpu)
/* Internal helper for switch_fpu_return() and signal frame setup */
static inline void fpregs_restore_userregs(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
int cpu = smp_processor_id();
if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER)))
@@ -67,7 +67,7 @@ static inline void fpregs_restore_userregs(void)
* If PKRU is enabled, then the PKRU value is already
* correct because it was either set in switch_to() or in
* flush_thread(). So it is excluded because it might be
- * not up to date in current->thread.fpu.xsave state.
+ * not up to date in current->thread.fpu->xsave state.
*
* XFD state is handled in restore_fpregs_from_fpstate().
*/
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 91d6341f281f..ea138583dd92 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -11,6 +11,7 @@
#include <asm/fpu/sched.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/types.h>
+#include <asm/msr.h>
#include <asm/traps.h>
#include <asm/irq_regs.h>
@@ -43,14 +44,27 @@ struct fpu_state_config fpu_user_cfg __ro_after_init;
*/
struct fpstate init_fpstate __ro_after_init;
-/* Track in-kernel FPU usage */
-static DEFINE_PER_CPU(bool, in_kernel_fpu);
+/*
+ * Track FPU initialization and kernel-mode usage. 'true' means the FPU is
+ * initialized and is not currently being used by the kernel:
+ */
+DEFINE_PER_CPU(bool, kernel_fpu_allowed);
/*
* Track which context is using the FPU on the CPU:
*/
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+#ifdef CONFIG_X86_DEBUG_FPU
+struct fpu *x86_task_fpu(struct task_struct *task)
+{
+ if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
+ return NULL;
+
+ return (void *)task + sizeof(*task);
+}
+#endif
+
/*
* Can we use the FPU in kernel mode with the
* whole "kernel_fpu_begin/end()" sequence?
@@ -61,15 +75,18 @@ bool irq_fpu_usable(void)
return false;
/*
- * In kernel FPU usage already active? This detects any explicitly
- * nested usage in task or softirq context, which is unsupported. It
- * also detects attempted usage in a hardirq that has interrupted a
- * kernel-mode FPU section.
+ * Return false in the following cases:
+ *
+ * - FPU is not yet initialized. This can happen only when the call is
+ * coming from CPU onlining, for example for microcode checksumming.
+ * - The kernel is already using the FPU, either because of explicit
+ * nesting (which should never be done), or because of implicit
+ * nesting when a hardirq interrupted a kernel-mode FPU section.
+ *
+ * The single boolean check below handles both cases:
*/
- if (this_cpu_read(in_kernel_fpu)) {
- WARN_ON_FPU(!in_hardirq());
+ if (!this_cpu_read(kernel_fpu_allowed))
return false;
- }
/*
* When not in NMI or hard interrupt context, FPU can be used in:
@@ -202,7 +219,7 @@ void fpu_reset_from_exception_fixup(void)
#if IS_ENABLED(CONFIG_KVM)
static void __fpstate_reset(struct fpstate *fpstate, u64 xfd);
-static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
+static void fpu_lock_guest_permissions(void)
{
struct fpu_state_perm *fpuperm;
u64 perm;
@@ -211,15 +228,13 @@ static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
return;
spin_lock_irq(&current->sighand->siglock);
- fpuperm = &current->group_leader->thread.fpu.guest_perm;
+ fpuperm = &x86_task_fpu(current->group_leader)->guest_perm;
perm = fpuperm->__state_perm;
/* First fpstate allocation locks down permissions. */
WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED);
spin_unlock_irq(&current->sighand->siglock);
-
- gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED;
}
bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
@@ -240,7 +255,6 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
gfpu->fpstate = fpstate;
gfpu->xfeatures = fpu_kernel_cfg.default_features;
- gfpu->perm = fpu_kernel_cfg.default_features;
/*
* KVM sets the FP+SSE bits in the XSAVE header when copying FPU state
@@ -255,7 +269,7 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size))
gfpu->uabi_size = fpu_user_cfg.default_size;
- fpu_init_guest_permissions(gfpu);
+ fpu_lock_guest_permissions();
return true;
}
@@ -263,16 +277,16 @@ EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);
void fpu_free_guest_fpstate(struct fpu_guest *gfpu)
{
- struct fpstate *fps = gfpu->fpstate;
+ struct fpstate *fpstate = gfpu->fpstate;
- if (!fps)
+ if (!fpstate)
return;
- if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use))
+ if (WARN_ON_ONCE(!fpstate->is_valloc || !fpstate->is_guest || fpstate->in_use))
return;
gfpu->fpstate = NULL;
- vfree(fps);
+ vfree(fpstate);
}
EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);
@@ -323,12 +337,12 @@ EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);
*/
void fpu_sync_guest_vmexit_xfd_state(void)
{
- struct fpstate *fps = current->thread.fpu.fpstate;
+ struct fpstate *fpstate = x86_task_fpu(current)->fpstate;
lockdep_assert_irqs_disabled();
if (fpu_state_size_dynamic()) {
- rdmsrl(MSR_IA32_XFD, fps->xfd);
- __this_cpu_write(xfd_state, fps->xfd);
+ rdmsrq(MSR_IA32_XFD, fpstate->xfd);
+ __this_cpu_write(xfd_state, fpstate->xfd);
}
}
EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
@@ -337,7 +351,7 @@ EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
{
struct fpstate *guest_fps = guest_fpu->fpstate;
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
struct fpstate *cur_fps = fpu->fpstate;
fpregs_lock();
@@ -431,14 +445,15 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask)
fpregs_lock();
WARN_ON_FPU(!irq_fpu_usable());
- WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
- this_cpu_write(in_kernel_fpu, true);
+ /* Toggle kernel_fpu_allowed to false: */
+ WARN_ON_FPU(!this_cpu_read(kernel_fpu_allowed));
+ this_cpu_write(kernel_fpu_allowed, false);
if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) &&
!test_thread_flag(TIF_NEED_FPU_LOAD)) {
set_thread_flag(TIF_NEED_FPU_LOAD);
- save_fpregs_to_fpstate(&current->thread.fpu);
+ save_fpregs_to_fpstate(x86_task_fpu(current));
}
__cpu_invalidate_fpregs_state();
@@ -453,9 +468,10 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
void kernel_fpu_end(void)
{
- WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+ /* Toggle kernel_fpu_allowed back to true: */
+ WARN_ON_FPU(this_cpu_read(kernel_fpu_allowed));
+ this_cpu_write(kernel_fpu_allowed, true);
- this_cpu_write(in_kernel_fpu, false);
if (!irqs_disabled())
fpregs_unlock();
}
@@ -467,7 +483,7 @@ EXPORT_SYMBOL_GPL(kernel_fpu_end);
*/
void fpu_sync_fpstate(struct fpu *fpu)
{
- WARN_ON_FPU(fpu != &current->thread.fpu);
+ WARN_ON_FPU(fpu != x86_task_fpu(current));
fpregs_lock();
trace_x86_fpu_before_save(fpu);
@@ -552,7 +568,7 @@ void fpstate_reset(struct fpu *fpu)
static inline void fpu_inherit_perms(struct fpu *dst_fpu)
{
if (fpu_state_size_dynamic()) {
- struct fpu *src_fpu = &current->group_leader->thread.fpu;
+ struct fpu *src_fpu = x86_task_fpu(current->group_leader);
spin_lock_irq(&current->sighand->siglock);
/* Fork also inherits the permissions of the parent */
@@ -572,7 +588,7 @@ static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)
if (!ssp)
return 0;
- xstate = get_xsave_addr(&dst->thread.fpu.fpstate->regs.xsave,
+ xstate = get_xsave_addr(&x86_task_fpu(dst)->fpstate->regs.xsave,
XFEATURE_CET_USER);
/*
@@ -593,8 +609,16 @@ static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)
int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
unsigned long ssp)
{
- struct fpu *src_fpu = &current->thread.fpu;
- struct fpu *dst_fpu = &dst->thread.fpu;
+ /*
+ * We allocate the new FPU structure right after the end of the task struct.
+ * task allocation size already took this into account.
+ *
+ * This is safe because task_struct size is a multiple of cacheline size,
+ * thus x86_task_fpu() will always be cacheline aligned as well.
+ */
+ struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
+
+ BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);
/* The new task's FPU state cannot be valid in the hardware. */
dst_fpu->last_cpu = -1;
@@ -657,19 +681,22 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
if (update_fpu_shstk(dst, ssp))
return 1;
- trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
return 0;
}
/*
- * Whitelist the FPU register state embedded into task_struct for hardened
- * usercopy.
+ * While struct fpu is no longer part of struct thread_struct, it is still
+ * allocated after struct task_struct in the "task_struct" kmem cache. But
+ * since FPU is expected to be part of struct thread_struct, we have to
+ * adjust for it here.
*/
void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
{
- *offset = offsetof(struct thread_struct, fpu.__fpstate.regs);
+ /* The allocation follows struct task_struct. */
+ *offset = sizeof(struct task_struct) - offsetof(struct task_struct, thread);
+ *offset += offsetof(struct fpu, __fpstate.regs);
*size = fpu_kernel_cfg.default_size;
}
@@ -682,11 +709,18 @@ void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
* a state-restore is coming: either an explicit one,
* or a reschedule.
*/
-void fpu__drop(struct fpu *fpu)
+void fpu__drop(struct task_struct *tsk)
{
+ struct fpu *fpu;
+
+ if (test_tsk_thread_flag(tsk, TIF_NEED_FPU_LOAD))
+ return;
+
+ fpu = x86_task_fpu(tsk);
+
preempt_disable();
- if (fpu == &current->thread.fpu) {
+ if (fpu == x86_task_fpu(current)) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
"2:\n"
@@ -718,9 +752,9 @@ static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
/*
* Reset current->fpu memory state to the init values.
*/
-static void fpu_reset_fpregs(void)
+static void fpu_reset_fpstate_regs(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
fpregs_lock();
__fpu_invalidate_fpregs_state(fpu);
@@ -749,11 +783,11 @@ static void fpu_reset_fpregs(void)
*/
void fpu__clear_user_states(struct fpu *fpu)
{
- WARN_ON_FPU(fpu != &current->thread.fpu);
+ WARN_ON_FPU(fpu != x86_task_fpu(current));
fpregs_lock();
if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
- fpu_reset_fpregs();
+ fpu_reset_fpstate_regs();
fpregs_unlock();
return;
}
@@ -782,8 +816,8 @@ void fpu__clear_user_states(struct fpu *fpu)
void fpu_flush_thread(void)
{
- fpstate_reset(&current->thread.fpu);
- fpu_reset_fpregs();
+ fpstate_reset(x86_task_fpu(current));
+ fpu_reset_fpstate_regs();
}
/*
* Load FPU context before returning to userspace.
@@ -823,7 +857,7 @@ void fpregs_lock_and_load(void)
*/
void fpregs_assert_state_consistent(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
if (test_thread_flag(TIF_NEED_FPU_LOAD))
return;
@@ -835,7 +869,7 @@ EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
void fpregs_mark_activate(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
fpregs_activate(fpu);
fpu->last_cpu = smp_processor_id();
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 998a08f17e33..99db41bf9fa6 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -38,7 +38,7 @@ static void fpu__init_cpu_generic(void)
/* Flush out any pending x87 state: */
#ifdef CONFIG_MATH_EMULATION
if (!boot_cpu_has(X86_FEATURE_FPU))
- fpstate_init_soft(&current->thread.fpu.fpstate->regs.soft);
+ ;
else
#endif
asm volatile ("fninit");
@@ -51,6 +51,9 @@ void fpu__init_cpu(void)
{
fpu__init_cpu_generic();
fpu__init_cpu_xstate();
+
+ /* Start allowing kernel-mode FPU: */
+ this_cpu_write(kernel_fpu_allowed, true);
}
static bool __init fpu__probe_without_cpuid(void)
@@ -73,6 +76,8 @@ static bool __init fpu__probe_without_cpuid(void)
static void __init fpu__init_system_early_generic(void)
{
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+
if (!boot_cpu_has(X86_FEATURE_CPUID) &&
!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
if (fpu__probe_without_cpuid())
@@ -94,7 +99,6 @@ static void __init fpu__init_system_early_generic(void)
* Boot time FPU feature detection code:
*/
unsigned int mxcsr_feature_mask __ro_after_init = 0xffffffffu;
-EXPORT_SYMBOL_GPL(mxcsr_feature_mask);
static void __init fpu__init_system_mxcsr(void)
{
@@ -150,11 +154,13 @@ static void __init fpu__init_task_struct_size(void)
{
int task_size = sizeof(struct task_struct);
+ task_size += sizeof(struct fpu);
+
/*
* Subtract off the static size of the register state.
* It potentially has a bunch of padding.
*/
- task_size -= sizeof(current->thread.fpu.__fpstate.regs);
+ task_size -= sizeof(union fpregs_state);
/*
* Add back the dynamically-calculated register state
@@ -164,14 +170,9 @@ static void __init fpu__init_task_struct_size(void)
/*
* We dynamically size 'struct fpu', so we require that
- * it be at the end of 'thread_struct' and that
- * 'thread_struct' be at the end of 'task_struct'. If
- * you hit a compile error here, check the structure to
- * see if something got added to the end.
+ * 'state' be at the end of 'it:
*/
CHECK_MEMBER_AT_END_OF(struct fpu, __fpstate);
- CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
- CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
arch_task_struct_size = task_size;
}
@@ -204,7 +205,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
fpu_kernel_cfg.default_size = size;
fpu_user_cfg.max_size = size;
fpu_user_cfg.default_size = size;
- fpstate_reset(&current->thread.fpu);
}
/*
@@ -213,7 +213,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
*/
void __init fpu__init_system(void)
{
- fpstate_reset(&current->thread.fpu);
fpu__init_system_early_generic();
/*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 887b0b8e21e3..0986c2200adc 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -45,7 +45,7 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r
*/
static void sync_fpstate(struct fpu *fpu)
{
- if (fpu == &current->thread.fpu)
+ if (fpu == x86_task_fpu(current))
fpu_sync_fpstate(fpu);
}
@@ -63,7 +63,7 @@ static void fpu_force_restore(struct fpu *fpu)
* Only stopped child tasks can be used to modify the FPU
* state in the fpstate buffer:
*/
- WARN_ON_FPU(fpu == &current->thread.fpu);
+ WARN_ON_FPU(fpu == x86_task_fpu(current));
__fpu_invalidate_fpregs_state(fpu);
}
@@ -71,7 +71,7 @@ static void fpu_force_restore(struct fpu *fpu)
int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
if (!cpu_feature_enabled(X86_FEATURE_FXSR))
return -ENODEV;
@@ -91,7 +91,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct fxregs_state newstate;
int ret;
@@ -133,7 +133,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
if (!cpu_feature_enabled(X86_FEATURE_XSAVE))
return -ENODEV;
- sync_fpstate(&target->thread.fpu);
+ sync_fpstate(x86_task_fpu(target));
copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_XSAVE);
return 0;
@@ -143,7 +143,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct xregs_state *tmpbuf = NULL;
int ret;
@@ -187,7 +187,7 @@ int ssp_active(struct task_struct *target, const struct user_regset *regset)
int ssp_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct cet_user_state *cetregs;
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
@@ -214,7 +214,7 @@ int ssp_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct xregs_state *xsave = &fpu->fpstate->regs.xsave;
struct cet_user_state *cetregs;
unsigned long user_ssp;
@@ -368,7 +368,7 @@ static void __convert_from_fxsr(struct user_i387_ia32_struct *env,
void
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
{
- __convert_from_fxsr(env, tsk, &tsk->thread.fpu.fpstate->regs.fxsave);
+ __convert_from_fxsr(env, tsk, &x86_task_fpu(tsk)->fpstate->regs.fxsave);
}
void convert_to_fxsr(struct fxregs_state *fxsave,
@@ -401,7 +401,7 @@ void convert_to_fxsr(struct fxregs_state *fxsave,
int fpregs_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct user_i387_ia32_struct env;
struct fxregs_state fxsave, *fx;
@@ -433,7 +433,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct user_i387_ia32_struct env;
int ret;
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 6c69cb28b298..c3ec2512f2bb 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -43,13 +43,13 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
* fpstate layout with out copying the extended state information
* in the memory layout.
*/
- if (__get_user(magic2, (__u32 __user *)(fpstate + current->thread.fpu.fpstate->user_size)))
+ if (__get_user(magic2, (__u32 __user *)(fpstate + x86_task_fpu(current)->fpstate->user_size)))
return false;
if (likely(magic2 == FP_XSTATE_MAGIC2))
return true;
setfx:
- trace_x86_fpu_xstate_check_failed(&current->thread.fpu);
+ trace_x86_fpu_xstate_check_failed(x86_task_fpu(current));
/* Set the parameters for fx only state */
fx_sw->magic1 = 0;
@@ -64,13 +64,13 @@ setfx:
static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf)
{
if (use_fxsr()) {
- struct xregs_state *xsave = &tsk->thread.fpu.fpstate->regs.xsave;
+ struct xregs_state *xsave = &x86_task_fpu(tsk)->fpstate->regs.xsave;
struct user_i387_ia32_struct env;
struct _fpstate_32 __user *fp = buf;
fpregs_lock();
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
- fxsave(&tsk->thread.fpu.fpstate->regs.fxsave);
+ fxsave(&x86_task_fpu(tsk)->fpstate->regs.fxsave);
fpregs_unlock();
convert_from_fxsr(&env, tsk);
@@ -114,7 +114,6 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
{
struct xregs_state __user *x = buf;
struct _fpx_sw_bytes sw_bytes = {};
- u32 xfeatures;
int err;
/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
@@ -128,12 +127,6 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
(__u32 __user *)(buf + fpstate->user_size));
/*
- * Read the xfeatures which we copied (directly from the cpu or
- * from the state in task struct) to the user buffers.
- */
- err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
-
- /*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context. This will
* enable us capturing any changes(during sigreturn) to
@@ -144,9 +137,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
- xfeatures |= XFEATURE_MASK_FPSSE;
-
- err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
+ err |= set_xfeature_in_sigframe(x, XFEATURE_MASK_FPSSE);
return !err;
}
@@ -184,7 +175,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pk
bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size, u32 pkru)
{
struct task_struct *tsk = current;
- struct fpstate *fpstate = tsk->thread.fpu.fpstate;
+ struct fpstate *fpstate = x86_task_fpu(tsk)->fpstate;
bool ia32_fxstate = (buf != buf_fx);
int ret;
@@ -272,7 +263,7 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
*/
static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
int ret;
/* Restore enabled features only. */
@@ -332,7 +323,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
bool ia32_fxstate)
{
struct task_struct *tsk = current;
- struct fpu *fpu = &tsk->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(tsk);
struct user_i387_ia32_struct env;
bool success, fx_only = false;
union fpregs_state *fpregs;
@@ -452,7 +443,7 @@ static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate)
*/
bool fpu__restore_sig(void __user *buf, int ia32_frame)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
void __user *buf_fx = buf;
bool ia32_fxstate = false;
bool success = false;
@@ -499,7 +490,7 @@ unsigned long
fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long *buf_fx, unsigned long *size)
{
- unsigned long frame_size = xstate_sigframe_size(current->thread.fpu.fpstate);
+ unsigned long frame_size = xstate_sigframe_size(x86_task_fpu(current)->fpstate);
*buf_fx = sp = round_down(sp - frame_size, 64);
if (ia32_frame && use_fxsr()) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6a41d1610d8b..9aa9ac8399ae 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -14,13 +14,15 @@
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/coredump.h>
+#include <linux/sort.h>
#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/xcr.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
+#include <asm/msr.h>
#include <asm/tlbflush.h>
#include <asm/prctl.h>
#include <asm/elf.h>
@@ -62,6 +64,7 @@ static const char *xfeature_names[] =
"unknown xstate feature",
"AMX Tile config",
"AMX Tile data",
+ "APX registers",
"unknown xstate feature",
};
@@ -80,6 +83,7 @@ static unsigned short xsave_cpuid_features[] __initdata = {
[XFEATURE_CET_USER] = X86_FEATURE_SHSTK,
[XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
[XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
+ [XFEATURE_APX] = X86_FEATURE_APX,
};
static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
@@ -88,6 +92,31 @@ static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
{ [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
+/*
+ * Ordering of xstate components in uncompacted format: The xfeature
+ * number does not necessarily indicate its position in the XSAVE buffer.
+ * This array defines the traversal order of xstate features.
+ */
+static unsigned int xfeature_uncompact_order[XFEATURE_MAX] __ro_after_init =
+ { [ 0 ... XFEATURE_MAX - 1] = -1};
+
+static inline unsigned int next_xfeature_order(unsigned int i, u64 mask)
+{
+ for (; xfeature_uncompact_order[i] != -1; i++) {
+ if (mask & BIT_ULL(xfeature_uncompact_order[i]))
+ break;
+ }
+
+ return i;
+}
+
+/* Iterate xstate features in uncompacted order: */
+#define for_each_extended_xfeature_in_order(i, mask) \
+ for (i = 0; \
+ i = next_xfeature_order(i, mask), \
+ xfeature_uncompact_order[i] != -1; \
+ i++)
+
#define XSTATE_FLAG_SUPERVISOR BIT(0)
#define XSTATE_FLAG_ALIGNED64 BIT(1)
@@ -199,7 +228,7 @@ void fpu__init_cpu_xstate(void)
* MSR_IA32_XSS sets supervisor states managed by XSAVES.
*/
if (boot_cpu_has(X86_FEATURE_XSAVES)) {
- wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
+ wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() |
xfeatures_mask_independent());
}
}
@@ -209,16 +238,20 @@ static bool xfeature_enabled(enum xfeature xfeature)
return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
}
+static int compare_xstate_offsets(const void *xfeature1, const void *xfeature2)
+{
+ return xstate_offsets[*(unsigned int *)xfeature1] -
+ xstate_offsets[*(unsigned int *)xfeature2];
+}
+
/*
* Record the offsets and sizes of various xstates contained
- * in the XSAVE state memory layout.
+ * in the XSAVE state memory layout. Also, create an ordered
+ * list of xfeatures for handling out-of-order offsets.
*/
static void __init setup_xstate_cache(void)
{
- u32 eax, ebx, ecx, edx, i;
- /* start at the beginning of the "extended state" */
- unsigned int last_good_offset = offsetof(struct xregs_state,
- extended_state_area);
+ u32 eax, ebx, ecx, edx, xfeature, i = 0;
/*
* The FP xstates and SSE xstates are legacy states. They are always
* in the fixed offsets in the xsave area in either compacted form
@@ -232,31 +265,30 @@ static void __init setup_xstate_cache(void)
xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
xmm_space);
- for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
- cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx);
+ for_each_extended_xfeature(xfeature, fpu_kernel_cfg.max_features) {
+ cpuid_count(CPUID_LEAF_XSTATE, xfeature, &eax, &ebx, &ecx, &edx);
- xstate_sizes[i] = eax;
- xstate_flags[i] = ecx;
+ xstate_sizes[xfeature] = eax;
+ xstate_flags[xfeature] = ecx;
/*
* If an xfeature is supervisor state, the offset in EBX is
* invalid, leave it to -1.
*/
- if (xfeature_is_supervisor(i))
+ if (xfeature_is_supervisor(xfeature))
continue;
- xstate_offsets[i] = ebx;
+ xstate_offsets[xfeature] = ebx;
- /*
- * In our xstate size checks, we assume that the highest-numbered
- * xstate feature has the highest offset in the buffer. Ensure
- * it does.
- */
- WARN_ONCE(last_good_offset > xstate_offsets[i],
- "x86/fpu: misordered xstate at %d\n", last_good_offset);
-
- last_good_offset = xstate_offsets[i];
+ /* Populate the list of xfeatures before sorting */
+ xfeature_uncompact_order[i++] = xfeature;
}
+
+ /*
+ * Sort xfeatures by their offsets to support out-of-order
+ * offsets in the uncompacted format.
+ */
+ sort(xfeature_uncompact_order, i, sizeof(unsigned int), compare_xstate_offsets, NULL);
}
/*
@@ -340,7 +372,8 @@ static __init void os_xrstor_booting(struct xregs_state *xstate)
XFEATURE_MASK_BNDCSR | \
XFEATURE_MASK_PASID | \
XFEATURE_MASK_CET_USER | \
- XFEATURE_MASK_XTILE)
+ XFEATURE_MASK_XTILE | \
+ XFEATURE_MASK_APX)
/*
* setup the xstate image representing the init state
@@ -540,6 +573,7 @@ static bool __init check_xstate_against_struct(int nr)
case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg);
case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state);
+ case XFEATURE_APX: return XCHECK_SZ(sz, nr, struct apx_state);
case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
default:
XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
@@ -552,13 +586,20 @@ static bool __init check_xstate_against_struct(int nr)
static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
{
unsigned int topmost = fls64(xfeatures) - 1;
- unsigned int offset = xstate_offsets[topmost];
+ unsigned int offset, i;
if (topmost <= XFEATURE_SSE)
return sizeof(struct xregs_state);
- if (compacted)
+ if (compacted) {
offset = xfeature_get_offset(xfeatures, topmost);
+ } else {
+ /* Walk through the xfeature order to pick the last */
+ for_each_extended_xfeature_in_order(i, xfeatures)
+ topmost = xfeature_uncompact_order[i];
+ offset = xstate_offsets[topmost];
+ }
+
return offset + xstate_sizes[topmost];
}
@@ -639,7 +680,7 @@ static unsigned int __init get_xsave_compacted_size(void)
return get_compacted_size();
/* Disable independent features. */
- wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
+ wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor());
/*
* Ask the hardware what size is required of the buffer.
@@ -648,7 +689,7 @@ static unsigned int __init get_xsave_compacted_size(void)
size = get_compacted_size();
/* Re-enable independent features so XSAVES will work on them again. */
- wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
+ wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
return size;
}
@@ -711,6 +752,8 @@ static int __init init_xstate_size(void)
*/
static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
{
+ pr_info("x86/fpu: XSAVE disabled\n");
+
fpu_kernel_cfg.max_features = 0;
cr4_clear_bits(X86_CR4_OSXSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
@@ -727,7 +770,7 @@ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
*/
init_fpstate.xfd = 0;
- fpstate_reset(&current->thread.fpu);
+ fpstate_reset(x86_task_fpu(current));
}
/*
@@ -775,6 +818,17 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
goto out_disable;
}
+ if (fpu_kernel_cfg.max_features & XFEATURE_MASK_APX &&
+ fpu_kernel_cfg.max_features & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) {
+ /*
+ * This is a problematic CPU configuration where two
+ * conflicting state components are both enumerated.
+ */
+ pr_err("x86/fpu: Both APX/MPX present in the CPU's xstate features: 0x%llx.\n",
+ fpu_kernel_cfg.max_features);
+ goto out_disable;
+ }
+
fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
XFEATURE_MASK_INDEPENDENT;
@@ -834,9 +888,6 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
if (err)
goto out_disable;
- /* Reset the state for the current task */
- fpstate_reset(&current->thread.fpu);
-
/*
* Update info used for ptrace frames; use standard-format size and no
* supervisor xstates:
@@ -852,7 +903,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
init_fpstate.xfeatures = fpu_kernel_cfg.default_features;
if (init_fpstate.size > sizeof(init_fpstate.regs)) {
- pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
+ pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d)\n",
sizeof(init_fpstate.regs), init_fpstate.size);
goto out_disable;
}
@@ -864,7 +915,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
* xfeatures mask.
*/
if (xfeatures != fpu_kernel_cfg.max_features) {
- pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
+ pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init\n",
xfeatures, fpu_kernel_cfg.max_features);
goto out_disable;
}
@@ -904,12 +955,12 @@ void fpu__resume_cpu(void)
* of XSAVES and MSR_IA32_XSS.
*/
if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
- wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
+ wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() |
xfeatures_mask_independent());
}
if (fpu_state_size_dynamic())
- wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
+ wrmsrq(MSR_IA32_XFD, x86_task_fpu(current)->fpstate->xfd);
}
/*
@@ -1071,10 +1122,9 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
struct xregs_state *xinit = &init_fpstate.regs.xsave;
struct xregs_state *xsave = &fpstate->regs.xsave;
+ unsigned int zerofrom, i, xfeature;
struct xstate_header header;
- unsigned int zerofrom;
u64 mask;
- int i;
memset(&header, 0, sizeof(header));
header.xfeatures = xsave->header.xfeatures;
@@ -1143,15 +1193,16 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
*/
mask = header.xfeatures;
- for_each_extended_xfeature(i, mask) {
+ for_each_extended_xfeature_in_order(i, mask) {
+ xfeature = xfeature_uncompact_order[i];
/*
* If there was a feature or alignment gap, zero the space
* in the destination buffer.
*/
- if (zerofrom < xstate_offsets[i])
- membuf_zero(&to, xstate_offsets[i] - zerofrom);
+ if (zerofrom < xstate_offsets[xfeature])
+ membuf_zero(&to, xstate_offsets[xfeature] - zerofrom);
- if (i == XFEATURE_PKRU) {
+ if (xfeature == XFEATURE_PKRU) {
struct pkru_state pkru = {0};
/*
* PKRU is not necessarily up to date in the
@@ -1161,14 +1212,14 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
membuf_write(&to, &pkru, sizeof(pkru));
} else {
membuf_write(&to,
- __raw_xsave_addr(xsave, i),
- xstate_sizes[i]);
+ __raw_xsave_addr(xsave, xfeature),
+ xstate_sizes[xfeature]);
}
/*
* Keep track of the last copied state in the non-compacted
* target buffer for gap zeroing.
*/
- zerofrom = xstate_offsets[i] + xstate_sizes[i];
+ zerofrom = xstate_offsets[xfeature] + xstate_sizes[xfeature];
}
out:
@@ -1191,8 +1242,8 @@ out:
void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode copy_mode)
{
- __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
- tsk->thread.fpu.fpstate->user_xfeatures,
+ __copy_xstate_to_uabi_buf(to, x86_task_fpu(tsk)->fpstate,
+ x86_task_fpu(tsk)->fpstate->user_xfeatures,
tsk->thread.pkru, copy_mode);
}
@@ -1332,7 +1383,7 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u
int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
const void __user *ubuf)
{
- return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
+ return copy_uabi_to_xstate(x86_task_fpu(tsk)->fpstate, NULL, ubuf, &tsk->thread.pkru);
}
static bool validate_independent_components(u64 mask)
@@ -1398,9 +1449,9 @@ void xrstors(struct xregs_state *xstate, u64 mask)
}
#if IS_ENABLED(CONFIG_KVM)
-void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
+void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature)
{
- void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
+ void *addr = get_xsave_addr(&fpstate->regs.xsave, xfeature);
if (addr)
memset(addr, 0, xstate_sizes[xfeature]);
@@ -1426,7 +1477,7 @@ static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
* The XFD MSR does not match fpstate->xfd. That's invalid when
* the passed in fpstate is current's fpstate.
*/
- if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
+ if (fpstate->xfd == x86_task_fpu(current)->fpstate->xfd)
return false;
/*
@@ -1503,7 +1554,7 @@ void fpstate_free(struct fpu *fpu)
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
unsigned int usize, struct fpu_guest *guest_fpu)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
struct fpstate *curfps, *newfps = NULL;
unsigned int fpsize;
bool in_use;
@@ -1596,7 +1647,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
* AVX512.
*/
bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
- struct fpu *fpu = &current->group_leader->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current->group_leader);
struct fpu_state_perm *perm;
unsigned int ksize, usize;
u64 mask;
@@ -1606,16 +1657,20 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
if ((permitted & requested) == requested)
return 0;
- /* Calculate the resulting kernel state size */
+ /*
+ * Calculate the resulting kernel state size. Note, @permitted also
+ * contains supervisor xfeatures even though supervisor are always
+ * permitted for kernel and guest FPUs, and never permitted for user
+ * FPUs.
+ */
mask = permitted | requested;
- /* Take supervisor states into account on the host */
- if (!guest)
- mask |= xfeatures_mask_supervisor();
ksize = xstate_calculate_size(mask, compacted);
- /* Calculate the resulting user state size */
- mask &= XFEATURE_MASK_USER_SUPPORTED;
- usize = xstate_calculate_size(mask, false);
+ /*
+ * Calculate the resulting user state size. Take care not to clobber
+ * the supervisor xfeatures in the new mask!
+ */
+ usize = xstate_calculate_size(mask & XFEATURE_MASK_USER_SUPPORTED, false);
if (!guest) {
ret = validate_sigaltstack(usize);
@@ -1699,7 +1754,7 @@ int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
return -EPERM;
}
- fpu = &current->group_leader->thread.fpu;
+ fpu = x86_task_fpu(current->group_leader);
perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
ksize = perm->__state_size;
usize = perm->__user_state_size;
@@ -1804,7 +1859,7 @@ long fpu_xstate_prctl(int option, unsigned long arg2)
*/
static void avx512_status(struct seq_file *m, struct task_struct *task)
{
- unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
+ unsigned long timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp);
long delta;
if (!timestamp) {
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 0fd34f53f025..52ce19289989 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -5,6 +5,7 @@
#include <asm/cpufeature.h>
#include <asm/fpu/xstate.h>
#include <asm/fpu/xcr.h>
+#include <asm/msr.h>
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(u64, xfd_state);
@@ -22,7 +23,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
static inline u64 xstate_get_group_perm(bool guest)
{
- struct fpu *fpu = &current->group_leader->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current->group_leader);
struct fpu_state_perm *perm;
/* Pairs with WRITE_ONCE() in xstate_request_perm() */
@@ -69,21 +70,31 @@ static inline u64 xfeatures_mask_independent(void)
return fpu_kernel_cfg.independent_features;
}
+static inline int set_xfeature_in_sigframe(struct xregs_state __user *xbuf, u64 mask)
+{
+ u64 xfeatures;
+ int err;
+
+ /* Read the xfeatures value already saved in the user buffer */
+ err = __get_user(xfeatures, &xbuf->header.xfeatures);
+ xfeatures |= mask;
+ err |= __put_user(xfeatures, &xbuf->header.xfeatures);
+
+ return err;
+}
+
/*
* Update the value of PKRU register that was already pushed onto the signal frame.
*/
-static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 mask, u32 pkru)
+static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru)
{
- u64 xstate_bv;
int err;
if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
return 0;
/* Mark PKRU as in-use so that it is restored correctly. */
- xstate_bv = (mask & xfeatures_in_use()) | XFEATURE_MASK_PKRU;
-
- err = __put_user(xstate_bv, &buf->header.xfeatures);
+ err = set_xfeature_in_sigframe(buf, XFEATURE_MASK_PKRU);
if (err)
return err;
@@ -171,7 +182,7 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs
#ifdef CONFIG_X86_64
static inline void xfd_set_state(u64 xfd)
{
- wrmsrl(MSR_IA32_XFD, xfd);
+ wrmsrq(MSR_IA32_XFD, xfd);
__this_cpu_write(xfd_state, xfd);
}
@@ -288,7 +299,7 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkr
* internally, e.g. PKRU. That's user space ABI and also required
* to allow the signal handler to modify PKRU.
*/
- struct fpstate *fpstate = current->thread.fpu.fpstate;
+ struct fpstate *fpstate = x86_task_fpu(current)->fpstate;
u64 mask = fpstate->user_xfeatures;
u32 lmask;
u32 hmask;
@@ -307,7 +318,7 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkr
clac();
if (!err)
- err = update_pkru_in_sigframe(buf, mask, pkru);
+ err = update_pkru_in_sigframe(buf, pkru);
return err;
}
@@ -322,7 +333,7 @@ static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64
u32 hmask = mask >> 32;
int err;
- xfd_validate_state(current->thread.fpu.fpstate, mask, true);
+ xfd_validate_state(x86_task_fpu(current)->fpstate, mask, true);
stac();
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c
index 5e2cd1004980..816187da3a47 100644
--- a/arch/x86/kernel/fred.c
+++ b/arch/x86/kernel/fred.c
@@ -3,6 +3,7 @@
#include <asm/desc.h>
#include <asm/fred.h>
+#include <asm/msr.h>
#include <asm/tlbflush.h>
#include <asm/traps.h>
@@ -43,23 +44,23 @@ void cpu_init_fred_exceptions(void)
*/
loadsegment(ss, __KERNEL_DS);
- wrmsrl(MSR_IA32_FRED_CONFIG,
+ wrmsrq(MSR_IA32_FRED_CONFIG,
/* Reserve for CALL emulation */
FRED_CONFIG_REDZONE |
FRED_CONFIG_INT_STKLVL(0) |
FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
- wrmsrl(MSR_IA32_FRED_STKLVLS, 0);
+ wrmsrq(MSR_IA32_FRED_STKLVLS, 0);
/*
* Ater a CPU offline/online cycle, the FRED RSP0 MSR should be
* resynchronized with its per-CPU cache.
*/
- wrmsrl(MSR_IA32_FRED_RSP0, __this_cpu_read(fred_rsp0));
+ wrmsrq(MSR_IA32_FRED_RSP0, __this_cpu_read(fred_rsp0));
- wrmsrl(MSR_IA32_FRED_RSP1, 0);
- wrmsrl(MSR_IA32_FRED_RSP2, 0);
- wrmsrl(MSR_IA32_FRED_RSP3, 0);
+ wrmsrq(MSR_IA32_FRED_RSP1, 0);
+ wrmsrq(MSR_IA32_FRED_RSP2, 0);
+ wrmsrq(MSR_IA32_FRED_RSP3, 0);
/* Enable FRED */
cr4_set_bits(X86_CR4_FRED);
@@ -79,14 +80,14 @@ void cpu_init_fred_rsps(void)
* (remember that user space faults are always taken on stack level 0)
* is to avoid overflowing the kernel stack.
*/
- wrmsrl(MSR_IA32_FRED_STKLVLS,
+ wrmsrq(MSR_IA32_FRED_STKLVLS,
FRED_STKLVL(X86_TRAP_DB, FRED_DB_STACK_LEVEL) |
FRED_STKLVL(X86_TRAP_NMI, FRED_NMI_STACK_LEVEL) |
FRED_STKLVL(X86_TRAP_MC, FRED_MC_STACK_LEVEL) |
FRED_STKLVL(X86_TRAP_DF, FRED_DF_STACK_LEVEL));
/* The FRED equivalents to IST stacks... */
- wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
- wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
- wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
+ wrmsrq(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
+ wrmsrq(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
+ wrmsrq(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index cace6e8d7cc7..252e82bcfd2f 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -55,10 +55,10 @@ void ftrace_arch_code_modify_post_process(void)
{
/*
* ftrace_make_{call,nop}() may be called during
- * module load, and we need to finish the text_poke_queue()
+ * module load, and we need to finish the smp_text_poke_batch_add()
* that they do, here.
*/
- text_poke_finish();
+ smp_text_poke_batch_finish();
ftrace_poke_late = 0;
mutex_unlock(&text_mutex);
}
@@ -119,7 +119,7 @@ ftrace_modify_code_direct(unsigned long ip, const char *old_code,
/* replace the text with the new text */
if (ftrace_poke_late)
- text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
+ smp_text_poke_batch_add((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
else
text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);
return 0;
@@ -186,11 +186,11 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
ip = (unsigned long)(&ftrace_call);
new = ftrace_call_replace(ip, (unsigned long)func);
- text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
+ smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
ip = (unsigned long)(&ftrace_regs_call);
new = ftrace_call_replace(ip, (unsigned long)func);
- text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
+ smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
return 0;
}
@@ -247,10 +247,10 @@ void ftrace_replace_code(int enable)
break;
}
- text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL);
+ smp_text_poke_batch_add((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL);
ftrace_update_record(rec, enable);
}
- text_poke_finish();
+ smp_text_poke_batch_finish();
}
void arch_ftrace_update_code(int command)
@@ -354,7 +354,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
goto fail;
ip = trampoline + size;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ if (cpu_wants_rethunk_at(ip))
__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
else
memcpy(ip, retq, sizeof(retq));
@@ -492,7 +492,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
mutex_lock(&text_mutex);
/* Do a safe modify in case the trampoline is executing */
new = ftrace_call_replace(ip, (unsigned long)func);
- text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
+ smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
mutex_unlock(&text_mutex);
}
@@ -586,7 +586,7 @@ static int ftrace_mod_jmp(unsigned long ip, void *func)
const char *new;
new = ftrace_jmp_replace(ip, (unsigned long)func);
- text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
+ smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
return 0;
}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index fa9b6339975f..533fcf5636fc 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -47,234 +47,22 @@
* Manage page tables very early on.
*/
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
-static unsigned int __initdata next_early_pgt;
+unsigned int __initdata next_early_pgt;
+SYM_PIC_ALIAS(next_early_pgt);
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
-#ifdef CONFIG_X86_5LEVEL
unsigned int __pgtable_l5_enabled __ro_after_init;
unsigned int pgdir_shift __ro_after_init = 39;
EXPORT_SYMBOL(pgdir_shift);
unsigned int ptrs_per_p4d __ro_after_init = 1;
EXPORT_SYMBOL(ptrs_per_p4d);
-#endif
-#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
EXPORT_SYMBOL(page_offset_base);
unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
EXPORT_SYMBOL(vmalloc_base);
unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
EXPORT_SYMBOL(vmemmap_base);
-#endif
-
-static inline bool check_la57_support(void)
-{
- if (!IS_ENABLED(CONFIG_X86_5LEVEL))
- return false;
-
- /*
- * 5-level paging is detected and enabled at kernel decompression
- * stage. Only check if it has been enabled there.
- */
- if (!(native_read_cr4() & X86_CR4_LA57))
- return false;
-
- RIP_REL_REF(__pgtable_l5_enabled) = 1;
- RIP_REL_REF(pgdir_shift) = 48;
- RIP_REL_REF(ptrs_per_p4d) = 512;
- RIP_REL_REF(page_offset_base) = __PAGE_OFFSET_BASE_L5;
- RIP_REL_REF(vmalloc_base) = __VMALLOC_BASE_L5;
- RIP_REL_REF(vmemmap_base) = __VMEMMAP_BASE_L5;
-
- return true;
-}
-
-static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
- pmdval_t *pmd,
- unsigned long p2v_offset)
-{
- unsigned long paddr, paddr_end;
- int i;
-
- /* Encrypt the kernel and related (if SME is active) */
- sme_encrypt_kernel(bp);
-
- /*
- * Clear the memory encryption mask from the .bss..decrypted section.
- * The bss section will be memset to zero later in the initialization so
- * there is no need to zero it after changing the memory encryption
- * attribute.
- */
- if (sme_get_me_mask()) {
- paddr = (unsigned long)&RIP_REL_REF(__start_bss_decrypted);
- paddr_end = (unsigned long)&RIP_REL_REF(__end_bss_decrypted);
-
- for (; paddr < paddr_end; paddr += PMD_SIZE) {
- /*
- * On SNP, transition the page to shared in the RMP table so that
- * it is consistent with the page table attribute change.
- *
- * __start_bss_decrypted has a virtual address in the high range
- * mapping (kernel .text). PVALIDATE, by way of
- * early_snp_set_memory_shared(), requires a valid virtual
- * address but the kernel is currently running off of the identity
- * mapping so use the PA to get a *currently* valid virtual address.
- */
- early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD);
-
- i = pmd_index(paddr - p2v_offset);
- pmd[i] -= sme_get_me_mask();
- }
- }
-
- /*
- * Return the SME encryption mask (if SME is active) to be used as a
- * modifier for the initial pgdir entry programmed into CR3.
- */
- return sme_get_me_mask();
-}
-
-/* Code in __startup_64() can be relocated during execution, but the compiler
- * doesn't have to generate PC-relative relocations when accessing globals from
- * that function. Clang actually does not generate them, which leads to
- * boot-time crashes. To work around this problem, every global pointer must
- * be accessed using RIP_REL_REF(). Kernel virtual addresses can be determined
- * by subtracting p2v_offset from the RIP-relative address.
- */
-unsigned long __head __startup_64(unsigned long p2v_offset,
- struct boot_params *bp)
-{
- pmd_t (*early_pgts)[PTRS_PER_PMD] = RIP_REL_REF(early_dynamic_pgts);
- unsigned long physaddr = (unsigned long)&RIP_REL_REF(_text);
- unsigned long va_text, va_end;
- unsigned long pgtable_flags;
- unsigned long load_delta;
- pgdval_t *pgd;
- p4dval_t *p4d;
- pudval_t *pud;
- pmdval_t *pmd, pmd_entry;
- bool la57;
- int i;
-
- la57 = check_la57_support();
-
- /* Is the address too large? */
- if (physaddr >> MAX_PHYSMEM_BITS)
- for (;;);
-
- /*
- * Compute the delta between the address I am compiled to run at
- * and the address I am actually running at.
- */
- load_delta = __START_KERNEL_map + p2v_offset;
- RIP_REL_REF(phys_base) = load_delta;
-
- /* Is the address not 2M aligned? */
- if (load_delta & ~PMD_MASK)
- for (;;);
-
- va_text = physaddr - p2v_offset;
- va_end = (unsigned long)&RIP_REL_REF(_end) - p2v_offset;
-
- /* Include the SME encryption mask in the fixup value */
- load_delta += sme_get_me_mask();
-
- /* Fixup the physical addresses in the page table */
-
- pgd = &RIP_REL_REF(early_top_pgt)->pgd;
- pgd[pgd_index(__START_KERNEL_map)] += load_delta;
-
- if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) {
- p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt);
- p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
-
- pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
- }
-
- RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta;
- RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 1].pud += load_delta;
-
- for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
- RIP_REL_REF(level2_fixmap_pgt)[i].pmd += load_delta;
-
- /*
- * Set up the identity mapping for the switchover. These
- * entries should *NOT* have the global bit set! This also
- * creates a bunch of nonsense entries but that is fine --
- * it avoids problems around wraparound.
- */
-
- pud = &early_pgts[0]->pmd;
- pmd = &early_pgts[1]->pmd;
- RIP_REL_REF(next_early_pgt) = 2;
-
- pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
-
- if (la57) {
- p4d = &early_pgts[RIP_REL_REF(next_early_pgt)++]->pmd;
-
- i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
- pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
- pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
-
- i = physaddr >> P4D_SHIFT;
- p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
- p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
- } else {
- i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
- pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
- pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
- }
-
- i = physaddr >> PUD_SHIFT;
- pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
- pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
-
- pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
- /* Filter out unsupported __PAGE_KERNEL_* bits: */
- pmd_entry &= RIP_REL_REF(__supported_pte_mask);
- pmd_entry += sme_get_me_mask();
- pmd_entry += physaddr;
-
- for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) {
- int idx = i + (physaddr >> PMD_SHIFT);
-
- pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
- }
-
- /*
- * Fixup the kernel text+data virtual addresses. Note that
- * we might write invalid pmds, when the kernel is relocated
- * cleanup_highmap() fixes this up along with the mappings
- * beyond _end.
- *
- * Only the region occupied by the kernel image has so far
- * been checked against the table of usable memory regions
- * provided by the firmware, so invalidate pages outside that
- * region. A page table entry that maps to a reserved area of
- * memory would allow processor speculation into that area,
- * and on some hardware (particularly the UV platform) even
- * speculative access to some reserved areas is caught as an
- * error, causing the BIOS to halt the system.
- */
-
- pmd = &RIP_REL_REF(level2_kernel_pgt)->pmd;
-
- /* invalidate pages before the kernel image */
- for (i = 0; i < pmd_index(va_text); i++)
- pmd[i] &= ~_PAGE_PRESENT;
-
- /* fixup pages that are part of the kernel image */
- for (; i <= pmd_index(va_end); i++)
- if (pmd[i] & _PAGE_PRESENT)
- pmd[i] += load_delta;
-
- /* invalidate pages after the kernel image */
- for (; i < PTRS_PER_PMD; i++)
- pmd[i] &= ~_PAGE_PRESENT;
-
- return sme_postprocess_startup(bp, pmd, p2v_offset);
-}
/* Wipe all early page tables except for the kernel symbol map */
static void __init reset_early_page_tables(void)
@@ -449,6 +237,12 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode
/* Kill off the identity-map trampoline */
reset_early_page_tables();
+ if (pgtable_l5_enabled()) {
+ page_offset_base = __PAGE_OFFSET_BASE_L5;
+ vmalloc_base = __VMALLOC_BASE_L5;
+ vmemmap_base = __VMEMMAP_BASE_L5;
+ }
+
clear_bss();
/*
@@ -513,41 +307,6 @@ void __init __noreturn x86_64_start_reservations(char *real_mode_data)
start_kernel();
}
-/*
- * Data structures and code used for IDT setup in head_64.S. The bringup-IDT is
- * used until the idt_table takes over. On the boot CPU this happens in
- * x86_64_start_kernel(), on secondary CPUs in start_secondary(). In both cases
- * this happens in the functions called from head_64.S.
- *
- * The idt_table can't be used that early because all the code modifying it is
- * in idt.c and can be instrumented by tracing or KASAN, which both don't work
- * during early CPU bringup. Also the idt_table has the runtime vectors
- * configured which require certain CPU state to be setup already (like TSS),
- * which also hasn't happened yet in early CPU bringup.
- */
-static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data;
-
-/* This may run while still in the direct mapping */
-static void __head startup_64_load_idt(void *vc_handler)
-{
- struct desc_ptr desc = {
- .address = (unsigned long)&RIP_REL_REF(bringup_idt_table),
- .size = sizeof(bringup_idt_table) - 1,
- };
- struct idt_data data;
- gate_desc idt_desc;
-
- /* @vc_handler is set only for a VMM Communication Exception */
- if (vc_handler) {
- init_idt_data(&data, X86_TRAP_VC, vc_handler);
- idt_init_desc(&idt_desc, &data);
- native_write_idt_entry((gate_desc *)desc.address, X86_TRAP_VC, &idt_desc);
- }
-
- native_load_idt(&desc);
-}
-
-/* This is used when running on kernel addresses */
void early_setup_idt(void)
{
void *handler = NULL;
@@ -559,30 +318,3 @@ void early_setup_idt(void)
startup_64_load_idt(handler);
}
-
-/*
- * Setup boot CPU state needed before kernel switches to virtual addresses.
- */
-void __head startup_64_setup_gdt_idt(void)
-{
- struct desc_struct *gdt = (void *)(__force unsigned long)gdt_page.gdt;
- void *handler = NULL;
-
- struct desc_ptr startup_gdt_descr = {
- .address = (unsigned long)&RIP_REL_REF(*gdt),
- .size = GDT_SIZE - 1,
- };
-
- /* Load GDT */
- native_load_gdt(&startup_gdt_descr);
-
- /* New GDT is live - reload data segment registers */
- asm volatile("movl %%eax, %%ds\n"
- "movl %%eax, %%ss\n"
- "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
-
- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
- handler = &RIP_REL_REF(vc_no_ghcb);
-
- startup_64_load_idt(handler);
-}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 2e42056d2306..76743dfad6ab 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -86,7 +86,7 @@ SYM_CODE_START(startup_32)
movl $pa(__bss_stop),%ecx
subl %edi,%ecx
shrl $2,%ecx
- rep ; stosl
+ rep stosl
/*
* Copy bootup parameters out of the way.
* Note: %esi still has the pointer to the real-mode data.
@@ -98,15 +98,13 @@ SYM_CODE_START(startup_32)
movl $pa(boot_params),%edi
movl $(PARAM_SIZE/4),%ecx
cld
- rep
- movsl
+ rep movsl
movl pa(boot_params) + NEW_CL_POINTER,%esi
andl %esi,%esi
jz 1f # No command line
movl $pa(boot_command_line),%edi
movl $(COMMAND_LINE_SIZE/4),%ecx
- rep
- movsl
+ rep movsl
1:
#ifdef CONFIG_OLPC
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index fefe2a25cf02..3e9b3a3bd039 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -573,6 +573,7 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
/* Pure iret required here - don't use INTERRUPT_RETURN */
iretq
SYM_CODE_END(vc_no_ghcb)
+SYM_PIC_ALIAS(vc_no_ghcb);
#endif
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
@@ -604,10 +605,12 @@ SYM_DATA_START_PTI_ALIGNED(early_top_pgt)
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
.fill PTI_USER_PGD_FILL,8,0
SYM_DATA_END(early_top_pgt)
+SYM_PIC_ALIAS(early_top_pgt)
SYM_DATA_START_PAGE_ALIGNED(early_dynamic_pgts)
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
SYM_DATA_END(early_dynamic_pgts)
+SYM_PIC_ALIAS(early_dynamic_pgts);
SYM_DATA(early_recursion_flag, .long 0)
@@ -646,12 +649,11 @@ SYM_DATA_START_PTI_ALIGNED(init_top_pgt)
SYM_DATA_END(init_top_pgt)
#endif
-#ifdef CONFIG_X86_5LEVEL
SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt)
.fill 511,8,0
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
SYM_DATA_END(level4_kernel_pgt)
-#endif
+SYM_PIC_ALIAS(level4_kernel_pgt)
SYM_DATA_START_PAGE_ALIGNED(level3_kernel_pgt)
.fill L3_START_KERNEL,8,0
@@ -659,6 +661,7 @@ SYM_DATA_START_PAGE_ALIGNED(level3_kernel_pgt)
.quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
SYM_DATA_END(level3_kernel_pgt)
+SYM_PIC_ALIAS(level3_kernel_pgt)
SYM_DATA_START_PAGE_ALIGNED(level2_kernel_pgt)
/*
@@ -676,6 +679,7 @@ SYM_DATA_START_PAGE_ALIGNED(level2_kernel_pgt)
*/
PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE)
SYM_DATA_END(level2_kernel_pgt)
+SYM_PIC_ALIAS(level2_kernel_pgt)
SYM_DATA_START_PAGE_ALIGNED(level2_fixmap_pgt)
.fill (512 - 4 - FIXMAP_PMD_NUM),8,0
@@ -688,6 +692,7 @@ SYM_DATA_START_PAGE_ALIGNED(level2_fixmap_pgt)
/* 6 MB reserved space + a 2MB hole */
.fill 4,8,0
SYM_DATA_END(level2_fixmap_pgt)
+SYM_PIC_ALIAS(level2_fixmap_pgt)
SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt)
.rept (FIXMAP_PMD_NUM)
@@ -703,6 +708,7 @@ SYM_DATA(smpboot_control, .long 0)
.align 16
/* This must match the first entry in level2_kernel_pgt */
SYM_DATA(phys_base, .quad 0x0)
+SYM_PIC_ALIAS(phys_base);
EXPORT_SYMBOL(phys_base)
#include "../xen/xen-head.S"
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 7f4b2966e15c..d6387dde3ff9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -7,11 +7,12 @@
#include <linux/cpu.h>
#include <linux/irq.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/irq_remapping.h>
#include <asm/hpet.h>
#include <asm/time.h>
#include <asm/mwait.h>
+#include <asm/msr.h>
#undef pr_fmt
#define pr_fmt(fmt) "hpet: " fmt
@@ -970,7 +971,7 @@ static bool __init hpet_is_pc10_damaged(void)
return false;
/* Check whether PC10 is enabled in PKG C-state limit */
- rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, pcfg);
+ rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, pcfg);
if ((pcfg & 0xF) < 8)
return false;
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index cd8ed1edbf9e..9e9a591a5fec 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -49,7 +49,7 @@ static uint32_t jailhouse_cpuid_base(void)
!boot_cpu_has(X86_FEATURE_HYPERVISOR))
return 0;
- return hypervisor_cpuid_base("Jailhouse\0\0\0", 0);
+ return cpuid_base_hypervisor("Jailhouse\0\0\0", 0);
}
static uint32_t __init jailhouse_detect(void)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index f5b8ef02d172..a7949a54a0ff 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -102,7 +102,7 @@ __jump_label_transform(struct jump_entry *entry,
return;
}
- text_poke_bp((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
+ smp_text_poke_single((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
}
static void __ref jump_label_transform(struct jump_entry *entry,
@@ -135,7 +135,7 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
mutex_lock(&text_mutex);
jlp = __jump_label_patch(entry, type);
- text_poke_queue((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
+ smp_text_poke_batch_add((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
mutex_unlock(&text_mutex);
return true;
}
@@ -143,6 +143,6 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
void arch_jump_label_transform_apply(void)
{
mutex_lock(&text_mutex);
- text_poke_finish();
+ smp_text_poke_batch_finish();
mutex_unlock(&text_mutex);
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 09608fd93687..47cb8eb138ba 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -808,7 +808,7 @@ void arch_arm_kprobe(struct kprobe *p)
u8 int3 = INT3_INSN_OPCODE;
text_poke(p->addr, &int3, 1);
- text_poke_sync();
+ smp_text_poke_sync_each_cpu();
perf_event_text_poke(p->addr, &p->opcode, 1, &int3, 1);
}
@@ -818,7 +818,7 @@ void arch_disarm_kprobe(struct kprobe *p)
perf_event_text_poke(p->addr, &int3, 1, &p->opcode, 1);
text_poke(p->addr, &p->opcode, 1);
- text_poke_sync();
+ smp_text_poke_sync_each_cpu();
}
void arch_remove_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 36d6809c6c9e..0aabd4c4e2c4 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -488,7 +488,7 @@ void arch_optimize_kprobes(struct list_head *oplist)
insn_buff[0] = JMP32_INSN_OPCODE;
*(s32 *)(&insn_buff[1]) = rel;
- text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
+ smp_text_poke_single(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
list_del_init(&op->list);
}
@@ -513,11 +513,11 @@ void arch_unoptimize_kprobe(struct optimized_kprobe *op)
JMP32_INSN_SIZE - INT3_INSN_SIZE);
text_poke(addr, new, INT3_INSN_SIZE);
- text_poke_sync();
+ smp_text_poke_sync_each_cpu();
text_poke(addr + INT3_INSN_SIZE,
new + INT3_INSN_SIZE,
JMP32_INSN_SIZE - INT3_INSN_SIZE);
- text_poke_sync();
+ smp_text_poke_sync_each_cpu();
perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 3be9b3342c67..921c1c783bc1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -40,6 +40,7 @@
#include <asm/mtrr.h>
#include <asm/tlb.h>
#include <asm/cpuidle_haltpoll.h>
+#include <asm/msr.h>
#include <asm/ptrace.h>
#include <asm/reboot.h>
#include <asm/svm.h>
@@ -301,7 +302,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
token = __this_cpu_read(apf_reason.token);
kvm_async_pf_task_wake(token);
__this_cpu_write(apf_reason.token, 0);
- wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
+ wrmsrq(MSR_KVM_ASYNC_PF_ACK, 1);
}
set_irq_regs(old_regs);
@@ -327,7 +328,7 @@ static void kvm_register_steal_time(void)
if (!has_steal_clock)
return;
- wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
+ wrmsrq(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
pr_debug("stealtime: cpu %d, msr %llx\n", cpu,
(unsigned long long) slow_virt_to_phys(st));
}
@@ -361,9 +362,9 @@ static void kvm_guest_cpu_init(void)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
- wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
+ wrmsrq(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
- wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
+ wrmsrq(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(async_pf_enabled, true);
pr_debug("setup async PF for cpu %d\n", smp_processor_id());
}
@@ -376,7 +377,7 @@ static void kvm_guest_cpu_init(void)
__this_cpu_write(kvm_apic_eoi, 0);
pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi))
| KVM_MSR_ENABLED;
- wrmsrl(MSR_KVM_PV_EOI_EN, pa);
+ wrmsrq(MSR_KVM_PV_EOI_EN, pa);
}
if (has_steal_clock)
@@ -388,7 +389,7 @@ static void kvm_pv_disable_apf(void)
if (!__this_cpu_read(async_pf_enabled))
return;
- wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
+ wrmsrq(MSR_KVM_ASYNC_PF_EN, 0);
__this_cpu_write(async_pf_enabled, false);
pr_debug("disable async PF for cpu %d\n", smp_processor_id());
@@ -399,7 +400,7 @@ static void kvm_disable_steal_time(void)
if (!has_steal_clock)
return;
- wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
+ wrmsrq(MSR_KVM_STEAL_TIME, 0);
}
static u64 kvm_steal_clock(int cpu)
@@ -451,9 +452,9 @@ static void kvm_guest_cpu_offline(bool shutdown)
{
kvm_disable_steal_time();
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
- wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+ wrmsrq(MSR_KVM_PV_EOI_EN, 0);
if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
- wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
+ wrmsrq(MSR_KVM_MIGRATION_CONTROL, 0);
kvm_pv_disable_apf();
if (!shutdown)
apf_task_wake_all();
@@ -615,7 +616,7 @@ static int __init setup_efi_kvm_sev_migration(void)
}
pr_info("%s : live migration enabled in EFI\n", __func__);
- wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
+ wrmsrq(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
return 1;
}
@@ -728,7 +729,7 @@ static int kvm_suspend(void)
#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
- rdmsrl(MSR_KVM_POLL_CONTROL, val);
+ rdmsrq(MSR_KVM_POLL_CONTROL, val);
has_guest_poll = !(val & 1);
#endif
return 0;
@@ -740,7 +741,7 @@ static void kvm_resume(void)
#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll)
- wrmsrl(MSR_KVM_POLL_CONTROL, 0);
+ wrmsrq(MSR_KVM_POLL_CONTROL, 0);
#endif
}
@@ -874,7 +875,7 @@ static noinline uint32_t __kvm_cpuid_base(void)
return 0; /* So we don't blow up on old processors */
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return hypervisor_cpuid_base(KVM_SIGNATURE, 0);
+ return cpuid_base_hypervisor(KVM_SIGNATURE, 0);
return 0;
}
@@ -975,7 +976,7 @@ static void __init kvm_init_platform(void)
* If not booted using EFI, enable Live migration support.
*/
if (!efi_enabled(EFI_BOOT))
- wrmsrl(MSR_KVM_MIGRATION_CONTROL,
+ wrmsrq(MSR_KVM_MIGRATION_CONTROL,
KVM_MIGRATION_READY);
}
kvmclock_init();
@@ -1124,12 +1125,12 @@ out:
static void kvm_disable_host_haltpoll(void *i)
{
- wrmsrl(MSR_KVM_POLL_CONTROL, 0);
+ wrmsrq(MSR_KVM_POLL_CONTROL, 0);
}
static void kvm_enable_host_haltpoll(void *i)
{
- wrmsrl(MSR_KVM_POLL_CONTROL, 1);
+ wrmsrq(MSR_KVM_POLL_CONTROL, 1);
}
void arch_haltpoll_enable(unsigned int cpu)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 5b2c15214a6b..ca0a49eeac4a 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -60,7 +60,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(hv_clock_per_cpu);
*/
static void kvm_get_wallclock(struct timespec64 *now)
{
- wrmsrl(msr_kvm_wall_clock, slow_virt_to_phys(&wall_clock));
+ wrmsrq(msr_kvm_wall_clock, slow_virt_to_phys(&wall_clock));
preempt_disable();
pvclock_read_wallclock(&wall_clock, this_cpu_pvti(), now);
preempt_enable();
@@ -173,7 +173,7 @@ static void kvm_register_clock(char *txt)
return;
pa = slow_virt_to_phys(&src->pvti) | 0x01ULL;
- wrmsrl(msr_kvm_system_time, pa);
+ wrmsrq(msr_kvm_system_time, pa);
pr_debug("kvm-clock: cpu %d, msr %llx, %s", smp_processor_id(), pa, txt);
}
@@ -196,7 +196,7 @@ static void kvm_setup_secondary_clock(void)
void kvmclock_disable(void)
{
if (msr_kvm_system_time)
- native_write_msr(msr_kvm_system_time, 0, 0);
+ native_write_msr(msr_kvm_system_time, 0);
}
static void __init kvmclock_init_mem(void)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index a68f5a0a9f37..949c9e4bfad2 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -76,6 +76,19 @@ map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }
#endif
+static int map_mmio_serial(struct x86_mapping_info *info, pgd_t *level4p)
+{
+ unsigned long mstart, mend;
+
+ if (!kexec_debug_8250_mmio32)
+ return 0;
+
+ mstart = kexec_debug_8250_mmio32 & PAGE_MASK;
+ mend = (kexec_debug_8250_mmio32 + PAGE_SIZE + 23) & PAGE_MASK;
+ pr_info("Map PCI serial at %lx - %lx\n", mstart, mend);
+ return kernel_ident_mapping_init(info, level4p, mstart, mend);
+}
+
#ifdef CONFIG_KEXEC_FILE
const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_bzImage64_ops,
@@ -285,6 +298,10 @@ static int init_pgtable(struct kimage *image, unsigned long control_page)
if (result)
return result;
+ result = map_mmio_serial(&info, image->arch.pgd);
+ if (result)
+ return result;
+
/*
* This must be last because the intermediate page table pages it
* allocates will not be control pages and may overlap the image.
@@ -304,6 +321,24 @@ static void load_segments(void)
);
}
+static void prepare_debug_idt(unsigned long control_page, unsigned long vec_ofs)
+{
+ gate_desc idtentry = { 0 };
+ int i;
+
+ idtentry.bits.p = 1;
+ idtentry.bits.type = GATE_TRAP;
+ idtentry.segment = __KERNEL_CS;
+ idtentry.offset_low = (control_page & 0xFFFF) + vec_ofs;
+ idtentry.offset_middle = (control_page >> 16) & 0xFFFF;
+ idtentry.offset_high = control_page >> 32;
+
+ for (i = 0; i < 16; i++) {
+ kexec_debug_idt[i] = idtentry;
+ idtentry.offset_low += KEXEC_DEBUG_EXC_HANDLER_SIZE;
+ }
+}
+
int machine_kexec_prepare(struct kimage *image)
{
void *control_page = page_address(image->control_code_page);
@@ -321,6 +356,9 @@ int machine_kexec_prepare(struct kimage *image)
if (image->type == KEXEC_TYPE_DEFAULT)
kexec_pa_swap_page = page_to_pfn(image->swap_page) << PAGE_SHIFT;
+ prepare_debug_idt((unsigned long)__pa(control_page),
+ (unsigned long)kexec_debug_exc_vectors - reloc_start);
+
__memcpy(control_page, __relocate_kernel_start, reloc_end - reloc_start);
set_memory_rox((unsigned long)control_page, 1);
@@ -396,16 +434,10 @@ void __nocfi machine_kexec(struct kimage *image)
* with from a table in memory. At no other time is the
* descriptor table in memory accessed.
*
- * I take advantage of this here by force loading the
- * segments, before I zap the gdt with an invalid value.
+ * Take advantage of this here by force loading the segments,
+ * before the GDT is zapped with an invalid value.
*/
load_segments();
- /*
- * The gdt & idt are now invalid.
- * If you want to load them you must set up your own idt & gdt.
- */
- native_idt_invalidate();
- native_gdt_invalidate();
/* now call it */
image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 1f54eedc3015..ef6104e7cc72 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -97,7 +97,7 @@ static void get_fam10h_pci_mmconf_base(void)
/* SYS_CFG */
address = MSR_AMD64_SYSCFG;
- rdmsrl(address, val);
+ rdmsrq(address, val);
/* TOP_MEM2 is not enabled? */
if (!(val & (1<<21))) {
@@ -105,7 +105,7 @@ static void get_fam10h_pci_mmconf_base(void)
} else {
/* TOP_MEM2 */
address = MSR_K8_TOP_MEM2;
- rdmsrl(address, val);
+ rdmsrq(address, val);
tom2 = max(val & 0xffffff800000ULL, 1ULL << 32);
}
@@ -177,7 +177,7 @@ void fam10h_check_enable_mmcfg(void)
return;
address = MSR_FAM10H_MMIO_CONF_BASE;
- rdmsrl(address, val);
+ rdmsrq(address, val);
/* try to make sure that AP's setting is identical to BSP setting */
if (val & FAM10H_MMIO_CONF_ENABLE) {
@@ -212,7 +212,7 @@ void fam10h_check_enable_mmcfg(void)
(FAM10H_MMIO_CONF_BUSRANGE_MASK<<FAM10H_MMIO_CONF_BUSRANGE_SHIFT));
val |= fam10h_pci_mmconf_base | (8 << FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
FAM10H_MMIO_CONF_ENABLE;
- wrmsrl(address, val);
+ wrmsrq(address, val);
}
static int __init set_check_enable_amd_mmconf(const struct dmi_system_id *d)
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index a7998f351701..0ffbae902e2f 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -206,7 +206,7 @@ static int write_relocate_add(Elf64_Shdr *sechdrs,
write, apply);
if (!early) {
- text_poke_sync();
+ smp_text_poke_sync_each_cpu();
mutex_unlock(&text_mutex);
}
@@ -266,6 +266,8 @@ int module_finalize(const Elf_Ehdr *hdr,
ibt_endbr = s;
}
+ its_init_mod(me);
+
if (retpolines || cfi) {
void *rseg = NULL, *cseg = NULL;
unsigned int rsize = 0, csize = 0;
@@ -286,6 +288,9 @@ int module_finalize(const Elf_Ehdr *hdr,
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
}
+
+ its_fini_mod(me);
+
if (returns) {
void *rseg = (void *)returns->sh_addr;
apply_returns(rseg, rseg + returns->sh_size);
@@ -326,4 +331,5 @@ int module_finalize(const Elf_Ehdr *hdr,
void module_arch_cleanup(struct module *mod)
{
alternatives_smp_module_del(mod);
+ its_free_mod(mod);
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 9a95d00f1423..be93ec7255bf 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -49,27 +49,20 @@ struct nmi_desc {
struct list_head head;
};
-static struct nmi_desc nmi_desc[NMI_MAX] =
-{
- {
- .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
- .head = LIST_HEAD_INIT(nmi_desc[0].head),
- },
- {
- .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
- .head = LIST_HEAD_INIT(nmi_desc[1].head),
- },
- {
- .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
- .head = LIST_HEAD_INIT(nmi_desc[2].head),
- },
- {
- .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
- .head = LIST_HEAD_INIT(nmi_desc[3].head),
- },
+#define NMI_DESC_INIT(type) { \
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[type].lock), \
+ .head = LIST_HEAD_INIT(nmi_desc[type].head), \
+}
+static struct nmi_desc nmi_desc[NMI_MAX] = {
+ NMI_DESC_INIT(NMI_LOCAL),
+ NMI_DESC_INIT(NMI_UNKNOWN),
+ NMI_DESC_INIT(NMI_SERR),
+ NMI_DESC_INIT(NMI_IO_CHECK),
};
+#define nmi_to_desc(type) (&nmi_desc[type])
+
struct nmi_stats {
unsigned int normal;
unsigned int unknown;
@@ -91,6 +84,9 @@ static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
static int ignore_nmis __read_mostly;
int unknown_nmi_panic;
+int panic_on_unrecovered_nmi;
+int panic_on_io_nmi;
+
/*
* Prevent NMI reason port (0x61) being accessed simultaneously, can
* only be used in NMI handler.
@@ -104,8 +100,6 @@ static int __init setup_unknown_nmi_panic(char *str)
}
__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-#define nmi_to_desc(type) (&nmi_desc[type])
-
static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
static int __init nmi_warning_debugfs(void)
@@ -125,12 +119,12 @@ static void nmi_check_duration(struct nmiaction *action, u64 duration)
action->max_duration = duration;
- remainder_ns = do_div(duration, (1000 * 1000));
- decimal_msecs = remainder_ns / 1000;
+ /* Convert duration from nsec to msec */
+ remainder_ns = do_div(duration, NSEC_PER_MSEC);
+ decimal_msecs = remainder_ns / NSEC_PER_USEC;
- printk_ratelimited(KERN_INFO
- "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
- action->handler, duration, decimal_msecs);
+ pr_info_ratelimited("INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
+ action->handler, duration, decimal_msecs);
}
static int nmi_handle(unsigned int type, struct pt_regs *regs)
@@ -333,10 +327,9 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
int handled;
/*
- * Use 'false' as back-to-back NMIs are dealt with one level up.
- * Of course this makes having multiple 'unknown' handlers useless
- * as only the first one is ever run (unless it can actually determine
- * if it caused the NMI)
+ * As a last resort, let the "unknown" handlers make a
+ * best-effort attempt to figure out if they can claim
+ * responsibility for this Unknown NMI.
*/
handled = nmi_handle(NMI_UNKNOWN, regs);
if (handled) {
@@ -366,17 +359,18 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
bool b2b = false;
/*
- * CPU-specific NMI must be processed before non-CPU-specific
- * NMI, otherwise we may lose it, because the CPU-specific
- * NMI can not be detected/processed on other CPUs.
- */
-
- /*
- * Back-to-back NMIs are interesting because they can either
- * be two NMI or more than two NMIs (any thing over two is dropped
- * due to NMI being edge-triggered). If this is the second half
- * of the back-to-back NMI, assume we dropped things and process
- * more handlers. Otherwise reset the 'swallow' NMI behaviour
+ * Back-to-back NMIs are detected by comparing the RIP of the
+ * current NMI with that of the previous NMI. If it is the same,
+ * it is assumed that the CPU did not have a chance to jump back
+ * into a non-NMI context and execute code in between the two
+ * NMIs.
+ *
+ * They are interesting because even if there are more than two,
+ * only a maximum of two can be detected (anything over two is
+ * dropped due to NMI being edge-triggered). If this is the
+ * second half of the back-to-back NMI, assume we dropped things
+ * and process more handlers. Otherwise, reset the 'swallow' NMI
+ * behavior.
*/
if (regs->ip == __this_cpu_read(last_nmi_rip))
b2b = true;
@@ -390,6 +384,11 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
if (microcode_nmi_handler_enabled() && microcode_nmi_handler())
goto out;
+ /*
+ * CPU-specific NMI must be processed before non-CPU-specific
+ * NMI, otherwise we may lose it, because the CPU-specific
+ * NMI can not be detected/processed on other CPUs.
+ */
handled = nmi_handle(NMI_LOCAL, regs);
__this_cpu_add(nmi_stats.normal, handled);
if (handled) {
@@ -426,13 +425,14 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
pci_serr_error(reason, regs);
else if (reason & NMI_REASON_IOCHK)
io_check_error(reason, regs);
-#ifdef CONFIG_X86_32
+
/*
* Reassert NMI in case it became active
* meanwhile as it's edge-triggered:
*/
- reassert_nmi();
-#endif
+ if (IS_ENABLED(CONFIG_X86_32))
+ reassert_nmi();
+
__this_cpu_add(nmi_stats.external, 1);
raw_spin_unlock(&nmi_reason_lock);
goto out;
@@ -751,4 +751,3 @@ void local_touch_nmi(void)
{
__this_cpu_write(last_nmi_rip, 0);
}
-EXPORT_SYMBOL_GPL(local_touch_nmi);
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index e93a8545c74d..a010e9d062bf 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * arch/x86/kernel/nmi-selftest.c
- *
* Testsuite for NMI: IPIs
*
* Started by Don Zickus:
@@ -30,7 +28,6 @@ static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __initdata;
static int __initdata testcase_total;
static int __initdata testcase_successes;
-static int __initdata expected_testcase_failures;
static int __initdata unexpected_testcase_failures;
static int __initdata unexpected_testcase_unknowns;
@@ -120,26 +117,22 @@ static void __init dotest(void (*testcase_fn)(void), int expected)
unexpected_testcase_failures++;
if (nmi_fail == FAILURE)
- printk(KERN_CONT "FAILED |");
+ pr_cont("FAILED |");
else if (nmi_fail == TIMEOUT)
- printk(KERN_CONT "TIMEOUT|");
+ pr_cont("TIMEOUT|");
else
- printk(KERN_CONT "ERROR |");
+ pr_cont("ERROR |");
dump_stack();
} else {
testcase_successes++;
- printk(KERN_CONT " ok |");
+ pr_cont(" ok |");
}
- testcase_total++;
+ pr_cont("\n");
+ testcase_total++;
reset_nmi();
}
-static inline void __init print_testname(const char *testname)
-{
- printk("%12s:", testname);
-}
-
void __init nmi_selftest(void)
{
init_nmi_testsuite();
@@ -147,38 +140,25 @@ void __init nmi_selftest(void)
/*
* Run the testsuite:
*/
- printk("----------------\n");
- printk("| NMI testsuite:\n");
- printk("--------------------\n");
+ pr_info("----------------\n");
+ pr_info("| NMI testsuite:\n");
+ pr_info("--------------------\n");
- print_testname("remote IPI");
+ pr_info("%12s:", "remote IPI");
dotest(remote_ipi, SUCCESS);
- printk(KERN_CONT "\n");
- print_testname("local IPI");
+
+ pr_info("%12s:", "local IPI");
dotest(local_ipi, SUCCESS);
- printk(KERN_CONT "\n");
cleanup_nmi_testsuite();
+ pr_info("--------------------\n");
if (unexpected_testcase_failures) {
- printk("--------------------\n");
- printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
+ pr_info("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
unexpected_testcase_failures, testcase_total);
- printk("-----------------------------------------------------------------\n");
- } else if (expected_testcase_failures && testcase_successes) {
- printk("--------------------\n");
- printk("%3d out of %3d testcases failed, as expected. |\n",
- expected_testcase_failures, testcase_total);
- printk("----------------------------------------------------\n");
- } else if (expected_testcase_failures && !testcase_successes) {
- printk("--------------------\n");
- printk("All %3d testcases failed, as expected. |\n",
- expected_testcase_failures);
- printk("----------------------------------------\n");
} else {
- printk("--------------------\n");
- printk("Good, all %3d testcases passed! |\n",
+ pr_info("Good, all %3d testcases passed! |\n",
testcase_successes);
- printk("---------------------------------\n");
}
+ pr_info("-----------------------------------------------------------------\n");
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1ccd05d8999f..ab3e172dcc69 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -33,6 +33,7 @@
#include <asm/tlb.h>
#include <asm/io_bitmap.h>
#include <asm/gsseg.h>
+#include <asm/msr.h>
/* stub always returning 0. */
DEFINE_ASM_FUNC(paravirt_ret0, "xor %eax,%eax", .entry.text);
@@ -210,12 +211,10 @@ struct paravirt_patch_template pv_ops = {
.mmu.set_p4d = native_set_p4d,
-#if CONFIG_PGTABLE_LEVELS >= 5
.mmu.p4d_val = PTE_IDENT,
.mmu.make_p4d = PTE_IDENT,
.mmu.set_pgd = native_set_pgd,
-#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
.mmu.pte_val = PTE_IDENT,
.mmu.pgd_val = PTE_IDENT,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 962c3ce39323..c1d2dac72b9c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -30,7 +30,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/entry-common.h>
#include <asm/cpu.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/apic.h>
#include <linux/uaccess.h>
#include <asm/mwait.h>
@@ -52,6 +52,7 @@
#include <asm/unwind.h>
#include <asm/tdx.h>
#include <asm/mmu_context.h>
+#include <asm/msr.h>
#include <asm/shstk.h>
#include "process.h"
@@ -93,17 +94,12 @@ EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
*/
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- /* init_task is not dynamically sized (incomplete FPU state) */
- if (unlikely(src == &init_task))
- memcpy_and_pad(dst, arch_task_struct_size, src, sizeof(init_task), 0);
- else
- memcpy(dst, src, arch_task_struct_size);
+ /* fpu_clone() will initialize the "dst_fpu" memory */
+ memcpy_and_pad(dst, arch_task_struct_size, src, sizeof(*dst), 0);
#ifdef CONFIG_VM86
dst->thread.vm86 = NULL;
#endif
- /* Drop the copied pointer to current's fpstate */
- dst->thread.fpu.fpstate = NULL;
return 0;
}
@@ -111,8 +107,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
#ifdef CONFIG_X86_64
void arch_release_task_struct(struct task_struct *tsk)
{
- if (fpu_state_size_dynamic())
- fpstate_free(&tsk->thread.fpu);
+ if (fpu_state_size_dynamic() && !(tsk->flags & (PF_KTHREAD | PF_USER_WORKER)))
+ fpstate_free(x86_task_fpu(tsk));
}
#endif
@@ -122,7 +118,6 @@ void arch_release_task_struct(struct task_struct *tsk)
void exit_thread(struct task_struct *tsk)
{
struct thread_struct *t = &tsk->thread;
- struct fpu *fpu = &t->fpu;
if (test_thread_flag(TIF_IO_BITMAP))
io_bitmap_exit(tsk);
@@ -130,7 +125,7 @@ void exit_thread(struct task_struct *tsk)
free_vm86(t);
shstk_free(tsk);
- fpu__drop(fpu);
+ fpu__drop(tsk);
}
static int set_new_tls(struct task_struct *p, unsigned long tls)
@@ -344,7 +339,7 @@ static void set_cpuid_faulting(bool on)
msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
this_cpu_write(msr_misc_features_shadow, msrval);
- wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
+ wrmsrq(MSR_MISC_FEATURES_ENABLES, msrval);
}
static void disable_cpuid(void)
@@ -561,7 +556,7 @@ static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
if (!static_cpu_has(X86_FEATURE_ZEN)) {
msr |= ssbd_tif_to_amd_ls_cfg(tifn);
- wrmsrl(MSR_AMD64_LS_CFG, msr);
+ wrmsrq(MSR_AMD64_LS_CFG, msr);
return;
}
@@ -578,7 +573,7 @@ static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
raw_spin_lock(&st->shared_state->lock);
/* First sibling enables SSBD: */
if (!st->shared_state->disable_state)
- wrmsrl(MSR_AMD64_LS_CFG, msr);
+ wrmsrq(MSR_AMD64_LS_CFG, msr);
st->shared_state->disable_state++;
raw_spin_unlock(&st->shared_state->lock);
} else {
@@ -588,7 +583,7 @@ static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
raw_spin_lock(&st->shared_state->lock);
st->shared_state->disable_state--;
if (!st->shared_state->disable_state)
- wrmsrl(MSR_AMD64_LS_CFG, msr);
+ wrmsrq(MSR_AMD64_LS_CFG, msr);
raw_spin_unlock(&st->shared_state->lock);
}
}
@@ -597,7 +592,7 @@ static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
{
u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
- wrmsrl(MSR_AMD64_LS_CFG, msr);
+ wrmsrq(MSR_AMD64_LS_CFG, msr);
}
#endif
@@ -607,7 +602,7 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
* SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
* so ssbd_tif_to_spec_ctrl() just works.
*/
- wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
+ wrmsrq(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
}
/*
@@ -710,11 +705,11 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
arch_has_block_step()) {
unsigned long debugctl, msk;
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl &= ~DEBUGCTLMSR_BTF;
msk = tifn & _TIF_BLOCKSTEP;
debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
}
if ((tifp ^ tifn) & _TIF_NOTSC)
@@ -907,13 +902,10 @@ static __init bool prefer_mwait_c1_over_halt(void)
static __cpuidle void mwait_idle(void)
{
if (!current_set_polling_and_test()) {
- if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
- mb(); /* quirk */
- clflush((void *)&current_thread_info()->flags);
- mb(); /* quirk */
- }
+ const void *addr = &current_thread_info()->flags;
- __monitor((void *)&current_thread_info()->flags, 0, 0);
+ alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
+ __monitor(addr, 0, 0);
if (!need_resched()) {
__sti_mwait(0, 0);
raw_local_irq_disable();
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4636ef359973..a10e180cbf23 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -160,8 +160,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
- if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
- switch_fpu_prepare(prev_p, cpu);
+ switch_fpu(prev_p, cpu);
/*
* Save away %gs. No need to save %fs, as it was saved on the
@@ -208,10 +207,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
raw_cpu_write(current_task, next_p);
- switch_fpu_finish(next_p);
-
/* Load the Intel cache allocation PQR MSR. */
- resctrl_sched_in(next_p);
+ resctrl_arch_sched_in(next_p);
return prev_p;
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 7196ca7048be..8d6cf25127aa 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -57,6 +57,7 @@
#include <asm/unistd.h>
#include <asm/fsgsbase.h>
#include <asm/fred.h>
+#include <asm/msr.h>
#ifdef CONFIG_IA32_EMULATION
/* Not included via unistd.h */
#include <asm/unistd_32_ia32.h>
@@ -95,8 +96,8 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
return;
if (mode == SHOW_REGS_USER) {
- rdmsrl(MSR_FS_BASE, fs);
- rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ rdmsrq(MSR_FS_BASE, fs);
+ rdmsrq(MSR_KERNEL_GS_BASE, shadowgs);
printk("%sFS: %016lx GS: %016lx\n",
log_lvl, fs, shadowgs);
return;
@@ -107,9 +108,9 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
asm("movl %%fs,%0" : "=r" (fsindex));
asm("movl %%gs,%0" : "=r" (gsindex));
- rdmsrl(MSR_FS_BASE, fs);
- rdmsrl(MSR_GS_BASE, gs);
- rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ rdmsrq(MSR_FS_BASE, fs);
+ rdmsrq(MSR_GS_BASE, gs);
+ rdmsrq(MSR_KERNEL_GS_BASE, shadowgs);
cr0 = read_cr0();
cr2 = read_cr2();
@@ -195,7 +196,7 @@ static noinstr unsigned long __rdgsbase_inactive(void)
native_swapgs();
} else {
instrumentation_begin();
- rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
+ rdmsrq(MSR_KERNEL_GS_BASE, gsbase);
instrumentation_end();
}
@@ -221,7 +222,7 @@ static noinstr void __wrgsbase_inactive(unsigned long gsbase)
native_swapgs();
} else {
instrumentation_begin();
- wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+ wrmsrq(MSR_KERNEL_GS_BASE, gsbase);
instrumentation_end();
}
}
@@ -353,7 +354,7 @@ static __always_inline void load_seg_legacy(unsigned short prev_index,
} else {
if (prev_index != next_index)
loadseg(which, next_index);
- wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
+ wrmsrq(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
next_base);
}
} else {
@@ -463,7 +464,7 @@ unsigned long x86_gsbase_read_cpu_inactive(void)
gsbase = __rdgsbase_inactive();
local_irq_restore(flags);
} else {
- rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
+ rdmsrq(MSR_KERNEL_GS_BASE, gsbase);
}
return gsbase;
@@ -478,7 +479,7 @@ void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
__wrgsbase_inactive(gsbase);
local_irq_restore(flags);
} else {
- wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+ wrmsrq(MSR_KERNEL_GS_BASE, gsbase);
}
}
@@ -616,8 +617,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(hardirq_stack_inuse));
- if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
- switch_fpu_prepare(prev_p, cpu);
+ switch_fpu(prev_p, cpu);
/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
@@ -671,8 +671,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
raw_cpu_write(current_task, next_p);
raw_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
- switch_fpu_finish(next_p);
-
/* Reload sp0. */
update_task_stack(next_p);
@@ -707,7 +705,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
}
/* Load the Intel cache allocation PQR MSR. */
- resctrl_sched_in(next_p);
+ resctrl_arch_sched_in(next_p);
return prev_p;
}
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index b7c0f142d026..4679ac0a03eb 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -27,7 +27,7 @@ static void cs5530a_warm_reset(struct pci_dev *dev)
static void cs5536_warm_reset(struct pci_dev *dev)
{
/* writing 1 to the LSB of this MSR causes a hard reset */
- wrmsrl(MSR_DIVIL_SOFT_RESET, 1ULL);
+ wrmsrq(MSR_DIVIL_SOFT_RESET, 1ULL);
udelay(50); /* shouldn't get here but be safe and spin a while */
}
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index c7c4b1917336..57276f134d12 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -263,17 +263,17 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
movl %edx, %edi
movl $1024, %ecx
- rep ; movsl
+ rep movsl
movl %ebp, %edi
movl %eax, %esi
movl $1024, %ecx
- rep ; movsl
+ rep movsl
movl %eax, %edi
movl %edx, %esi
movl $1024, %ecx
- rep ; movsl
+ rep movsl
lea PAGE_SIZE(%ebp), %esi
jmp 0b
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index ac058971a382..ea604f4d0b52 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -39,6 +39,8 @@ SYM_DATA(kexec_va_control_page, .quad 0)
SYM_DATA(kexec_pa_table_page, .quad 0)
SYM_DATA(kexec_pa_swap_page, .quad 0)
SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
+SYM_DATA(kexec_debug_8250_mmio32, .quad 0)
+SYM_DATA(kexec_debug_8250_port, .word 0)
.balign 16
SYM_DATA_START_LOCAL(kexec_debug_gdt)
@@ -50,6 +52,11 @@ SYM_DATA_START_LOCAL(kexec_debug_gdt)
.quad 0x00cf92000000ffff /* __KERNEL_DS */
SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end)
+ .balign 8
+SYM_DATA_START(kexec_debug_idt)
+ .skip 0x100, 0x00
+SYM_DATA_END(kexec_debug_idt)
+
.section .text..relocate_kernel,"ax";
.code64
SYM_CODE_START_NOALIGN(relocate_kernel)
@@ -72,8 +79,13 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
pushq %r15
pushf
- /* zero out flags, and disable interrupts */
- pushq $0
+ /* Invalidate GDT/IDT, zero out flags */
+ pushq $0
+ pushq $0
+
+ lidt (%rsp)
+ lgdt (%rsp)
+ addq $8, %rsp
popfq
/* Switch to the identity mapped page tables */
@@ -139,6 +151,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
movq %ds, %rax
movq %rax, %ds
+ /* Now an IDTR on the stack to load the IDT the kernel created */
+ leaq kexec_debug_idt(%rip), %rsi
+ pushq %rsi
+ pushw $0xff
+ lidt (%rsp)
+ addq $10, %rsp
+
+ //int3
+
/*
* Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
* below.
@@ -342,20 +363,20 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
/* copy source page to swap page */
movq kexec_pa_swap_page(%rip), %rdi
movl $512, %ecx
- rep ; movsq
+ rep movsq
/* copy destination page to source page */
movq %rax, %rdi
movq %rdx, %rsi
movl $512, %ecx
- rep ; movsq
+ rep movsq
/* copy swap page to destination page */
movq %rdx, %rdi
movq kexec_pa_swap_page(%rip), %rsi
.Lnoswap:
movl $512, %ecx
- rep ; movsq
+ rep movsq
lea PAGE_SIZE(%rax), %rsi
jmp .Lloop
@@ -364,3 +385,222 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
ret
int3
SYM_CODE_END(swap_pages)
+
+/*
+ * Generic 'print character' routine
+ * - %al: Character to be printed (may clobber %rax)
+ * - %rdx: MMIO address or port.
+ */
+#define XMTRDY 0x20
+
+#define TXR 0 /* Transmit register (WRITE) */
+#define LSR 5 /* Line Status */
+
+SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ addw $LSR, %dx
+ xchg %al, %ah
+.Lxmtrdy_loop:
+ inb %dx, %al
+ testb $XMTRDY, %al
+ jnz .Lready
+ pause
+ jmp .Lxmtrdy_loop
+
+.Lready:
+ subw $LSR, %dx
+ xchg %al, %ah
+ outb %al, %dx
+pr_char_null:
+ ANNOTATE_NOENDBR
+
+ ANNOTATE_UNRET_SAFE
+ ret
+SYM_CODE_END(pr_char_8250)
+
+SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+.Lxmtrdy_loop_mmio:
+ movb (LSR*4)(%rdx), %ah
+ testb $XMTRDY, %ah
+ jnz .Lready_mmio
+ pause
+ jmp .Lxmtrdy_loop_mmio
+
+.Lready_mmio:
+ movb %al, (%rdx)
+ ANNOTATE_UNRET_SAFE
+ ret
+SYM_CODE_END(pr_char_8250_mmio32)
+
+/*
+ * Load pr_char function pointer into %rsi and load %rdx with whatever
+ * that function wants to see there (typically port/MMIO address).
+ */
+.macro pr_setup
+ leaq pr_char_8250(%rip), %rsi
+ movw kexec_debug_8250_port(%rip), %dx
+ testw %dx, %dx
+ jnz 1f
+
+ leaq pr_char_8250_mmio32(%rip), %rsi
+ movq kexec_debug_8250_mmio32(%rip), %rdx
+ testq %rdx, %rdx
+ jnz 1f
+
+ leaq pr_char_null(%rip), %rsi
+1:
+.endm
+
+/* Print the nybble in %bl, clobber %rax */
+SYM_CODE_START_LOCAL_NOALIGN(pr_nybble)
+ UNWIND_HINT_FUNC
+ movb %bl, %al
+ nop
+ andb $0x0f, %al
+ addb $0x30, %al
+ cmpb $0x3a, %al
+ jb 1f
+ addb $('a' - '0' - 10), %al
+ ANNOTATE_RETPOLINE_SAFE
+1: jmp *%rsi
+SYM_CODE_END(pr_nybble)
+
+SYM_CODE_START_LOCAL_NOALIGN(pr_qword)
+ UNWIND_HINT_FUNC
+ movq $16, %rcx
+1: rolq $4, %rbx
+ call pr_nybble
+ loop 1b
+ movb $'\n', %al
+ ANNOTATE_RETPOLINE_SAFE
+ jmp *%rsi
+SYM_CODE_END(pr_qword)
+
+.macro print_reg a, b, c, d, r
+ movb $\a, %al
+ ANNOTATE_RETPOLINE_SAFE
+ call *%rsi
+ movb $\b, %al
+ ANNOTATE_RETPOLINE_SAFE
+ call *%rsi
+ movb $\c, %al
+ ANNOTATE_RETPOLINE_SAFE
+ call *%rsi
+ movb $\d, %al
+ ANNOTATE_RETPOLINE_SAFE
+ call *%rsi
+ movq \r, %rbx
+ call pr_qword
+.endm
+
+SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors)
+ /* Each of these is 6 bytes. */
+.macro vec_err exc
+ UNWIND_HINT_ENTRY
+ . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE)
+ nop
+ nop
+ pushq $\exc
+ jmp exc_handler
+.endm
+
+.macro vec_noerr exc
+ UNWIND_HINT_ENTRY
+ . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE)
+ pushq $0
+ pushq $\exc
+ jmp exc_handler
+.endm
+
+ ANNOTATE_NOENDBR
+ vec_noerr 0 // #DE
+ vec_noerr 1 // #DB
+ vec_noerr 2 // #NMI
+ vec_noerr 3 // #BP
+ vec_noerr 4 // #OF
+ vec_noerr 5 // #BR
+ vec_noerr 6 // #UD
+ vec_noerr 7 // #NM
+ vec_err 8 // #DF
+ vec_noerr 9
+ vec_err 10 // #TS
+ vec_err 11 // #NP
+ vec_err 12 // #SS
+ vec_err 13 // #GP
+ vec_err 14 // #PF
+ vec_noerr 15
+SYM_CODE_END(kexec_debug_exc_vectors)
+
+SYM_CODE_START_LOCAL_NOALIGN(exc_handler)
+ /* No need for RET mitigations during kexec */
+ VALIDATE_UNRET_END
+
+ pushq %rax
+ pushq %rbx
+ pushq %rcx
+ pushq %rdx
+ pushq %rsi
+
+ /* Stack frame */
+#define EXC_SS 0x58 /* Architectural... */
+#define EXC_RSP 0x50
+#define EXC_EFLAGS 0x48
+#define EXC_CS 0x40
+#define EXC_RIP 0x38
+#define EXC_ERRORCODE 0x30 /* Either architectural or zero pushed by handler */
+#define EXC_EXCEPTION 0x28 /* Pushed by handler entry point */
+#define EXC_RAX 0x20 /* Pushed just above in exc_handler */
+#define EXC_RBX 0x18
+#define EXC_RCX 0x10
+#define EXC_RDX 0x08
+#define EXC_RSI 0x00
+
+ /* Set up %rdx/%rsi for debug output */
+ pr_setup
+
+ /* rip and exception info */
+ print_reg 'E', 'x', 'c', ':', EXC_EXCEPTION(%rsp)
+ print_reg 'E', 'r', 'r', ':', EXC_ERRORCODE(%rsp)
+ print_reg 'r', 'i', 'p', ':', EXC_RIP(%rsp)
+ print_reg 'r', 's', 'p', ':', EXC_RSP(%rsp)
+
+ /* We spilled these to the stack */
+ print_reg 'r', 'a', 'x', ':', EXC_RAX(%rsp)
+ print_reg 'r', 'b', 'x', ':', EXC_RBX(%rsp)
+ print_reg 'r', 'c', 'x', ':', EXC_RCX(%rsp)
+ print_reg 'r', 'd', 'x', ':', EXC_RDX(%rsp)
+ print_reg 'r', 's', 'i', ':', EXC_RSI(%rsp)
+
+ /* Other registers untouched */
+ print_reg 'r', 'd', 'i', ':', %rdi
+ print_reg 'r', '8', ' ', ':', %r8
+ print_reg 'r', '9', ' ', ':', %r9
+ print_reg 'r', '1', '0', ':', %r10
+ print_reg 'r', '1', '1', ':', %r11
+ print_reg 'r', '1', '2', ':', %r12
+ print_reg 'r', '1', '3', ':', %r13
+ print_reg 'r', '1', '4', ':', %r14
+ print_reg 'r', '1', '5', ':', %r15
+ print_reg 'c', 'r', '2', ':', %cr2
+
+ /* Only return from INT3 */
+ cmpq $3, EXC_EXCEPTION(%rsp)
+ jne .Ldie
+
+ popq %rsi
+ popq %rdx
+ popq %rcx
+ popq %rbx
+ popq %rax
+
+ addq $16, %rsp
+ iretq
+
+.Ldie:
+ hlt
+ jmp .Ldie
+
+SYM_CODE_END(exc_handler)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9d2a13b37833..7d9ed79a93c0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -11,6 +11,7 @@
#include <linux/crash_dump.h>
#include <linux/dma-map-ops.h>
#include <linux/efi.h>
+#include <linux/hugetlb.h>
#include <linux/ima.h>
#include <linux/init_ohci1394_dma.h>
#include <linux/initrd.h>
@@ -18,21 +19,19 @@
#include <linux/memblock.h>
#include <linux/panic_notifier.h>
#include <linux/pci.h>
+#include <linux/random.h>
#include <linux/root_dev.h>
-#include <linux/hugetlb.h>
-#include <linux/tboot.h>
-#include <linux/usb/xhci-dbgp.h>
#include <linux/static_call.h>
#include <linux/swiotlb.h>
-#include <linux/random.h>
+#include <linux/tboot.h>
+#include <linux/usb/xhci-dbgp.h>
+#include <linux/vmalloc.h>
#include <uapi/linux/mount.h>
#include <xen/xen.h>
#include <asm/apic.h>
-#include <asm/efi.h>
-#include <asm/numa.h>
#include <asm/bios_ebda.h>
#include <asm/bugs.h>
#include <asm/cacheinfo.h>
@@ -47,18 +46,16 @@
#include <asm/mce.h>
#include <asm/memtype.h>
#include <asm/mtrr.h>
-#include <asm/realmode.h>
+#include <asm/nmi.h>
+#include <asm/numa.h>
#include <asm/olpc_ofw.h>
#include <asm/pci-direct.h>
#include <asm/prom.h>
#include <asm/proto.h>
+#include <asm/realmode.h>
#include <asm/thermal.h>
#include <asm/unwind.h>
#include <asm/vsyscall.h>
-#include <linux/vmalloc.h>
-#if defined(CONFIG_X86_LOCAL_APIC)
-#include <asm/nmi.h>
-#endif
/*
* max_low_pfn_mapped: highest directly mapped pfn < 4 GB
@@ -134,6 +131,7 @@ struct ist_info ist_info;
struct cpuinfo_x86 boot_cpu_data __read_mostly;
EXPORT_SYMBOL(boot_cpu_data);
+SYM_PIC_ALIAS(boot_cpu_data);
#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
__visible unsigned long mmu_cr4_features __ro_after_init;
@@ -151,6 +149,13 @@ int bootloader_type, bootloader_version;
static const struct ctl_table x86_sysctl_table[] = {
{
+ .procname = "unknown_nmi_panic",
+ .data = &unknown_nmi_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "panic_on_unrecovered_nmi",
.data = &panic_on_unrecovered_nmi,
.maxlen = sizeof(int),
@@ -185,15 +190,6 @@ static const struct ctl_table x86_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
-#if defined(CONFIG_X86_LOCAL_APIC)
- {
- .procname = "unknown_nmi_panic",
- .data = &unknown_nmi_panic,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
#if defined(CONFIG_ACPI_SLEEP)
{
.procname = "acpi_video_flags",
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
index 059685612362..2ddf23387c7e 100644
--- a/arch/x86/kernel/shstk.c
+++ b/arch/x86/kernel/shstk.c
@@ -173,8 +173,8 @@ static int shstk_setup(void)
return PTR_ERR((void *)addr);
fpregs_lock_and_load();
- wrmsrl(MSR_IA32_PL3_SSP, addr + size);
- wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
+ wrmsrq(MSR_IA32_PL3_SSP, addr + size);
+ wrmsrq(MSR_IA32_U_CET, CET_SHSTK_EN);
fpregs_unlock();
shstk->base = addr;
@@ -239,7 +239,7 @@ static unsigned long get_user_shstk_addr(void)
fpregs_lock_and_load();
- rdmsrl(MSR_IA32_PL3_SSP, ssp);
+ rdmsrq(MSR_IA32_PL3_SSP, ssp);
fpregs_unlock();
@@ -372,7 +372,7 @@ int setup_signal_shadow_stack(struct ksignal *ksig)
return -EFAULT;
fpregs_lock_and_load();
- wrmsrl(MSR_IA32_PL3_SSP, ssp);
+ wrmsrq(MSR_IA32_PL3_SSP, ssp);
fpregs_unlock();
return 0;
@@ -396,7 +396,7 @@ int restore_signal_shadow_stack(void)
return err;
fpregs_lock_and_load();
- wrmsrl(MSR_IA32_PL3_SSP, ssp);
+ wrmsrq(MSR_IA32_PL3_SSP, ssp);
fpregs_unlock();
return 0;
@@ -460,7 +460,7 @@ static int wrss_control(bool enable)
return 0;
fpregs_lock_and_load();
- rdmsrl(MSR_IA32_U_CET, msrval);
+ rdmsrq(MSR_IA32_U_CET, msrval);
if (enable) {
features_set(ARCH_SHSTK_WRSS);
@@ -473,7 +473,7 @@ static int wrss_control(bool enable)
msrval &= ~CET_WRSS_EN;
}
- wrmsrl(MSR_IA32_U_CET, msrval);
+ wrmsrq(MSR_IA32_U_CET, msrval);
unlock:
fpregs_unlock();
@@ -492,8 +492,8 @@ static int shstk_disable(void)
fpregs_lock_and_load();
/* Disable WRSS too when disabling shadow stack */
- wrmsrl(MSR_IA32_U_CET, 0);
- wrmsrl(MSR_IA32_PL3_SSP, 0);
+ wrmsrq(MSR_IA32_U_CET, 0);
+ wrmsrq(MSR_IA32_PL3_SSP, 0);
fpregs_unlock();
shstk_free(current);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 5f441039b572..2404233336ab 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -255,7 +255,7 @@ static void
handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
bool stepping, failed;
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
if (v8086_mode(regs))
save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL);
@@ -423,14 +423,14 @@ bool sigaltstack_size_valid(size_t ss_size)
if (!fpu_state_size_dynamic() && !strict_sigaltstack_size)
return true;
- fsize += current->group_leader->thread.fpu.perm.__user_state_size;
+ fsize += x86_task_fpu(current->group_leader)->perm.__user_state_size;
if (likely(ss_size > fsize))
return true;
if (strict_sigaltstack_size)
return ss_size > fsize;
- mask = current->group_leader->thread.fpu.perm.__state_perm;
+ mask = x86_task_fpu(current->group_leader)->perm.__state_perm;
if (mask & XFEATURE_MASK_USER_DYNAMIC)
return ss_size > fsize;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d6cf1e23c2a3..b90d872aa0c8 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,7 +64,7 @@
#include <asm/acpi.h>
#include <asm/cacheinfo.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/desc.h>
#include <asm/nmi.h>
#include <asm/irq.h>
@@ -1188,6 +1188,12 @@ void cpu_disable_common(void)
remove_siblinginfo(cpu);
+ /*
+ * Stop allowing kernel-mode FPU. This is needed so that if the CPU is
+ * brought online again, the initial state is not allowed:
+ */
+ this_cpu_write(kernel_fpu_allowed, false);
+
/* It's now safe to remove this processor from the online map */
lock_vector_lock();
remove_cpu_from_maps(cpu);
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index a59c72e77645..378c388d1b31 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -81,7 +81,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
break;
case RET:
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ if (cpu_wants_rethunk_at(insn))
code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk);
else
code = &retinsn;
@@ -90,7 +90,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
case JCC:
if (!func) {
func = __static_call_return;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ if (cpu_wants_rethunk())
func = x86_return_thunk;
}
@@ -108,7 +108,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
if (system_state == SYSTEM_BOOTING || modinit)
return text_poke_early(insn, code, size);
- text_poke_bp(insn, code, size, emulate);
+ smp_text_poke_single(insn, code, size, emulate);
}
static void __static_call_validate(u8 *insn, bool tail, bool tramp)
diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c
index b8e7abe00b06..708d61743d15 100644
--- a/arch/x86/kernel/trace_clock.c
+++ b/arch/x86/kernel/trace_clock.c
@@ -4,7 +4,7 @@
*/
#include <asm/trace_clock.h>
#include <asm/barrier.h>
-#include <asm/msr.h>
+#include <asm/tsc.h>
/*
* trace_clock_x86_tsc(): A clock that is just the cycle counter.
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
deleted file mode 100644
index 03ae1caaa878..000000000000
--- a/arch/x86/kernel/tracepoint.c
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@hds.com>
- */
-#include <linux/jump_label.h>
-#include <linux/atomic.h>
-
-#include <asm/trace/exceptions.h>
-
-DEFINE_STATIC_KEY_FALSE(trace_pagefault_key);
-
-int trace_pagefault_reg(void)
-{
- static_branch_inc(&trace_pagefault_key);
- return 0;
-}
-
-void trace_pagefault_unreg(void)
-{
- static_branch_dec(&trace_pagefault_key);
-}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9f88b8a78e50..94c0236963c6 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -68,6 +68,7 @@
#include <asm/vdso.h>
#include <asm/tdx.h>
#include <asm/cfi.h>
+#include <asm/msr.h>
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@@ -749,7 +750,7 @@ static bool try_fixup_enqcmd_gp(void)
if (current->pasid_activated)
return false;
- wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID);
+ wrmsrq(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID);
current->pasid_activated = 1;
return true;
@@ -882,16 +883,16 @@ static void do_int3_user(struct pt_regs *regs)
DEFINE_IDTENTRY_RAW(exc_int3)
{
/*
- * poke_int3_handler() is completely self contained code; it does (and
+ * smp_text_poke_int3_handler() is completely self contained code; it does (and
* must) *NOT* call out to anything, lest it hits upon yet another
* INT3.
*/
- if (poke_int3_handler(regs))
+ if (smp_text_poke_int3_handler(regs))
return;
/*
* irqentry_enter_from_user_mode() uses static_branch_{,un}likely()
- * and therefore can trigger INT3, hence poke_int3_handler() must
+ * and therefore can trigger INT3, hence smp_text_poke_int3_handler() must
* be done before. If the entry came from kernel mode, then use
* nmi_enter() because the INT3 could have been hit in any context
* including NMI.
@@ -1120,9 +1121,9 @@ static noinstr void exc_debug_kernel(struct pt_regs *regs, unsigned long dr6)
*/
unsigned long debugctl;
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl |= DEBUGCTLMSR_BTF;
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
}
/*
@@ -1295,7 +1296,7 @@ DEFINE_IDTENTRY_RAW(exc_debug)
static void math_error(struct pt_regs *regs, int trapnr)
{
struct task_struct *task = current;
- struct fpu *fpu = &task->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(task);
int si_code;
char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
"simd exception";
@@ -1386,11 +1387,11 @@ static bool handle_xfd_event(struct pt_regs *regs)
if (!IS_ENABLED(CONFIG_X86_64) || !cpu_feature_enabled(X86_FEATURE_XFD))
return false;
- rdmsrl(MSR_IA32_XFD_ERR, xfd_err);
+ rdmsrq(MSR_IA32_XFD_ERR, xfd_err);
if (!xfd_err)
return false;
- wrmsrl(MSR_IA32_XFD_ERR, 0);
+ wrmsrq(MSR_IA32_XFD_ERR, 0);
/* Die if that happens in kernel space */
if (WARN_ON(!user_mode(regs)))
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 88e5a4ed9db3..87e749106dda 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -16,7 +16,7 @@
#include <linux/static_key.h>
#include <linux/static_call.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/hpet.h>
#include <asm/timer.h>
#include <asm/vgtod.h>
@@ -29,6 +29,7 @@
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
#include <asm/i8259.h>
+#include <asm/msr.h>
#include <asm/topology.h>
#include <asm/uv/uv.h>
#include <asm/sev.h>
@@ -1098,7 +1099,7 @@ static void __init detect_art(void)
if (art_base_clk.denominator < ART_MIN_DENOMINATOR)
return;
- rdmsrl(MSR_IA32_TSC_ADJUST, art_base_clk.offset);
+ rdmsrq(MSR_IA32_TSC_ADJUST, art_base_clk.offset);
/* Make this sticky over multiple CPU init calls */
setup_force_cpu_cap(X86_FEATURE_ART);
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 4334033658ed..ec3aa340d351 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -21,6 +21,7 @@
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/nmi.h>
+#include <asm/msr.h>
#include <asm/tsc.h>
struct tsc_adjust {
@@ -65,12 +66,12 @@ void tsc_verify_tsc_adjust(bool resume)
adj->nextcheck = jiffies + HZ;
- rdmsrl(MSR_IA32_TSC_ADJUST, curval);
+ rdmsrq(MSR_IA32_TSC_ADJUST, curval);
if (adj->adjusted == curval)
return;
/* Restore the original value */
- wrmsrl(MSR_IA32_TSC_ADJUST, adj->adjusted);
+ wrmsrq(MSR_IA32_TSC_ADJUST, adj->adjusted);
if (!adj->warned || resume) {
pr_warn(FW_BUG "TSC ADJUST differs: CPU%u %lld --> %lld. Restoring\n",
@@ -142,7 +143,7 @@ static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
if (likely(!tsc_async_resets)) {
pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n",
cpu, bootval);
- wrmsrl(MSR_IA32_TSC_ADJUST, 0);
+ wrmsrq(MSR_IA32_TSC_ADJUST, 0);
bootval = 0;
} else {
pr_info("TSC ADJUST: CPU%u: %lld NOT forced to 0\n",
@@ -165,7 +166,7 @@ bool __init tsc_store_and_check_tsc_adjust(bool bootcpu)
if (check_tsc_unstable())
return false;
- rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
+ rdmsrq(MSR_IA32_TSC_ADJUST, bootval);
cur->bootval = bootval;
cur->nextcheck = jiffies + HZ;
tsc_sanitize_first_cpu(cur, bootval, smp_processor_id(), bootcpu);
@@ -187,7 +188,7 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
return false;
- rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
+ rdmsrq(MSR_IA32_TSC_ADJUST, bootval);
cur->bootval = bootval;
cur->nextcheck = jiffies + HZ;
cur->warned = false;
@@ -229,7 +230,7 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
*/
if (bootval != ref->adjusted) {
cur->adjusted = ref->adjusted;
- wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted);
+ wrmsrq(MSR_IA32_TSC_ADJUST, ref->adjusted);
}
/*
* We have the TSCs forced to be in sync on this package. Skip sync
@@ -518,7 +519,7 @@ retry:
pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n",
cpu, cur_max_warp, cur->adjusted);
- wrmsrl(MSR_IA32_TSC_ADJUST, cur->adjusted);
+ wrmsrq(MSR_IA32_TSC_ADJUST, cur->adjusted);
goto retry;
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 9194695662b2..6d383839e839 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -840,6 +840,11 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
insn_byte_t p;
int i;
+ /* x86_nops[insn->length]; same as jmp with .offs = 0 */
+ if (insn->length <= ASM_NOP_MAX &&
+ !memcmp(insn->kaddr, x86_nops[insn->length], insn->length))
+ goto setup;
+
switch (opc1) {
case 0xeb: /* jmp 8 */
case 0xe9: /* jmp 32 */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index aa4d0221583c..4fa0be732af1 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -79,11 +79,13 @@ const_cpu_current_top_of_stack = cpu_current_top_of_stack;
#define BSS_DECRYPTED \
. = ALIGN(PMD_SIZE); \
__start_bss_decrypted = .; \
+ __pi___start_bss_decrypted = .; \
*(.bss..decrypted); \
. = ALIGN(PAGE_SIZE); \
__start_bss_decrypted_unused = .; \
. = ALIGN(PMD_SIZE); \
__end_bss_decrypted = .; \
+ __pi___end_bss_decrypted = .; \
#else
@@ -128,6 +130,7 @@ SECTIONS
/* Text and read-only data */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
_text = .;
+ __pi__text = .;
_stext = .;
ALIGN_ENTRY_TEXT_BEGIN
*(.text..__x86.rethunk_untrain)
@@ -391,6 +394,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */
_end = .;
+ __pi__end = .;
#ifdef CONFIG_AMD_MEM_ENCRYPT
/*
@@ -505,6 +509,16 @@ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
"SRSO function pair won't alias");
#endif
+#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)
+. = ASSERT(__x86_indirect_its_thunk_rax & 0x20, "__x86_indirect_thunk_rax not in second half of cacheline");
+. = ASSERT(((__x86_indirect_its_thunk_rcx - __x86_indirect_its_thunk_rax) % 64) == 0, "Indirect thunks are not cacheline apart");
+. = ASSERT(__x86_indirect_its_thunk_array == __x86_indirect_its_thunk_rax, "Gap in ITS thunk array");
+#endif
+
+#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)
+. = ASSERT(its_return_thunk & 0x20, "its_return_thunk not in second half of cacheline");
+#endif
+
#endif /* CONFIG_X86_64 */
/*
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 571c906ffcbf..ecd85f4801cc 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -21,7 +21,7 @@
#include <asm/user.h>
#include <asm/fpu/xstate.h>
#include <asm/sgx.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include "cpuid.h"
#include "lapic.h"
#include "mmu.h"
@@ -236,7 +236,7 @@ static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcp
struct kvm_cpuid_entry2 *entry;
u32 base;
- for_each_possible_hypervisor_cpuid_base(base) {
+ for_each_possible_cpuid_base_hypervisor(base) {
entry = kvm_find_cpuid_entry(vcpu, base);
if (entry) {
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 7338879d1c0c..067f8e3f5a0d 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -20,6 +20,7 @@
#include <linux/kvm_host.h>
#include <asm/irq_remapping.h>
+#include <asm/msr.h>
#include "trace.h"
#include "lapic.h"
@@ -330,7 +331,7 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu)
int cpu = READ_ONCE(vcpu->cpu);
if (cpu != get_cpu()) {
- wrmsrl(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
+ wrmsrq(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
trace_kvm_avic_doorbell(vcpu->vcpu_id, kvm_cpu_get_apicid(cpu));
}
put_cpu();
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index a7a7dc507336..1aa0f07d3a63 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -26,6 +26,7 @@
#include <asm/fpu/xcr.h>
#include <asm/fpu/xstate.h>
#include <asm/debugreg.h>
+#include <asm/msr.h>
#include <asm/sev.h>
#include "mmu.h"
@@ -2933,6 +2934,7 @@ void __init sev_set_cpu_caps(void)
void __init sev_hardware_setup(void)
{
unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
+ struct sev_platform_init_args init_args = {0};
bool sev_snp_supported = false;
bool sev_es_supported = false;
bool sev_supported = false;
@@ -3059,6 +3061,15 @@ out:
sev_supported_vmsa_features = 0;
if (sev_es_debug_swap_enabled)
sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP;
+
+ if (!sev_enabled)
+ return;
+
+ /*
+ * Do both SNP and SEV initialization at KVM module load.
+ */
+ init_args.probe = true;
+ sev_platform_init(&init_args);
}
void sev_hardware_unsetup(void)
@@ -3074,6 +3085,8 @@ void sev_hardware_unsetup(void)
misc_cg_set_capacity(MISC_CG_RES_SEV, 0);
misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0);
+
+ sev_platform_shutdown();
}
int sev_cpu_init(struct svm_cpu_data *sd)
@@ -3119,7 +3132,7 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
* back to WBINVD if this faults so as not to make any problems worse
* by leaving stale encrypted data in the cache.
*/
- if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
+ if (WARN_ON_ONCE(wrmsrq_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
goto do_wbinvd;
return;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a89c271a1951..67fee545d42a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -31,6 +31,7 @@
#include <linux/string_choices.h>
#include <asm/apic.h>
+#include <asm/msr.h>
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
@@ -475,24 +476,18 @@ static void svm_inject_exception(struct kvm_vcpu *vcpu)
static void svm_init_erratum_383(void)
{
- u32 low, high;
- int err;
u64 val;
if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
return;
/* Use _safe variants to not break nested virtualization */
- val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
- if (err)
+ if (native_read_msr_safe(MSR_AMD64_DC_CFG, &val))
return;
val |= (1ULL << 47);
- low = lower_32_bits(val);
- high = upper_32_bits(val);
-
- native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
+ native_write_msr_safe(MSR_AMD64_DC_CFG, val);
erratum_383_found = true;
}
@@ -566,7 +561,7 @@ static void __svm_write_tsc_multiplier(u64 multiplier)
if (multiplier == __this_cpu_read(current_tsc_ratio))
return;
- wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
+ wrmsrq(MSR_AMD64_TSC_RATIO, multiplier);
__this_cpu_write(current_tsc_ratio, multiplier);
}
@@ -579,15 +574,15 @@ static inline void kvm_cpu_svm_disable(void)
{
uint64_t efer;
- wrmsrl(MSR_VM_HSAVE_PA, 0);
- rdmsrl(MSR_EFER, efer);
+ wrmsrq(MSR_VM_HSAVE_PA, 0);
+ rdmsrq(MSR_EFER, efer);
if (efer & EFER_SVME) {
/*
* Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
* NMI aren't blocked.
*/
stgi();
- wrmsrl(MSR_EFER, efer & ~EFER_SVME);
+ wrmsrq(MSR_EFER, efer & ~EFER_SVME);
}
}
@@ -616,7 +611,7 @@ static int svm_enable_virtualization_cpu(void)
uint64_t efer;
int me = raw_smp_processor_id();
- rdmsrl(MSR_EFER, efer);
+ rdmsrq(MSR_EFER, efer);
if (efer & EFER_SVME)
return -EBUSY;
@@ -626,9 +621,9 @@ static int svm_enable_virtualization_cpu(void)
sd->next_asid = sd->max_asid + 1;
sd->min_asid = max_sev_asid + 1;
- wrmsrl(MSR_EFER, efer | EFER_SVME);
+ wrmsrq(MSR_EFER, efer | EFER_SVME);
- wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa);
+ wrmsrq(MSR_VM_HSAVE_PA, sd->save_area_pa);
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
/*
@@ -649,13 +644,12 @@ static int svm_enable_virtualization_cpu(void)
* erratum is present everywhere).
*/
if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
- uint64_t len, status = 0;
+ u64 len, status = 0;
int err;
- len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
+ err = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &len);
if (!err)
- status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
- &err);
+ err = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, &status);
if (err)
osvw_status = osvw_len = 0;
@@ -2205,14 +2199,13 @@ static int ac_interception(struct kvm_vcpu *vcpu)
static bool is_erratum_383(void)
{
- int err, i;
+ int i;
u64 value;
if (!erratum_383_found)
return false;
- value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
- if (err)
+ if (native_read_msr_safe(MSR_IA32_MC0_STATUS, &value))
return false;
/* Bit 62 may or may not be set for this mce */
@@ -2223,17 +2216,11 @@ static bool is_erratum_383(void)
/* Clear MCi_STATUS registers */
for (i = 0; i < 6; ++i)
- native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
-
- value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
- if (!err) {
- u32 low, high;
+ native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0);
+ if (!native_read_msr_safe(MSR_IA32_MCG_STATUS, &value)) {
value &= ~(1ULL << 2);
- low = lower_32_bits(value);
- high = upper_32_bits(value);
-
- native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
+ native_write_msr_safe(MSR_IA32_MCG_STATUS, value);
}
/* Flush tlb to evict multi-match entries */
@@ -5295,7 +5282,7 @@ static __init void svm_adjust_mmio_mask(void)
return;
/* If memory encryption is not enabled, use existing mask */
- rdmsrl(MSR_AMD64_SYSCFG, msr);
+ rdmsrq(MSR_AMD64_SYSCFG, msr);
if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
return;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 5504d9e9fd32..d268224227f0 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -6,6 +6,7 @@
#include <asm/debugreg.h>
#include <asm/mmu_context.h>
+#include <asm/msr.h>
#include "x86.h"
#include "cpuid.h"
@@ -7202,8 +7203,8 @@ static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs)
msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
/* These MSRs specify bits which the guest must keep fixed off. */
- rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
- rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
+ rdmsrq(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
+ rdmsrq(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
if (vmx_umip_emulated())
msrs->cr4_fixed1 |= X86_CR4_UMIP;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 77012b2eca0e..231a9633359c 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -13,6 +13,7 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <asm/msr.h>
#include <asm/perf_event.h>
#include "x86.h"
#include "cpuid.h"
@@ -279,9 +280,9 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
local_irq_disable();
if (lbr_desc->event->state == PERF_EVENT_STATE_ACTIVE) {
if (read)
- rdmsrl(index, msr_info->data);
+ rdmsrq(index, msr_info->data);
else
- wrmsrl(index, msr_info->data);
+ wrmsrq(index, msr_info->data);
__set_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
local_irq_enable();
return true;
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index 9961e07cf071..df1d0cf76947 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -2,6 +2,7 @@
/* Copyright(c) 2021 Intel Corporation. */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <asm/msr.h>
#include <asm/sgx.h>
#include "x86.h"
@@ -411,16 +412,16 @@ void setup_default_sgx_lepubkeyhash(void)
* MSRs exist but are read-only (locked and not writable).
*/
if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
- rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
+ rdmsrq_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
} else {
/* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
- rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
- rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
- rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
+ rdmsrq(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
+ rdmsrq(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
+ rdmsrq(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
}
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5c5766467a61..157c23db22be 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -46,6 +46,7 @@
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/mshyperv.h>
+#include <asm/msr.h>
#include <asm/mwait.h>
#include <asm/spec-ctrl.h>
#include <asm/vmx.h>
@@ -273,6 +274,7 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
case L1TF_MITIGATION_OFF:
l1tf = VMENTER_L1D_FLUSH_NEVER;
break;
+ case L1TF_MITIGATION_AUTO:
case L1TF_MITIGATION_FLUSH_NOWARN:
case L1TF_MITIGATION_FLUSH:
case L1TF_MITIGATION_FLUSH_NOSMT:
@@ -380,9 +382,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
if (!vmx->disable_fb_clear)
return;
- msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
+ msr = native_rdmsrq(MSR_IA32_MCU_OPT_CTRL);
msr |= FB_CLEAR_DIS;
- native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ native_wrmsrq(MSR_IA32_MCU_OPT_CTRL, msr);
/* Cache the MSR value to avoid reading it later */
vmx->msr_ia32_mcu_opt_ctrl = msr;
}
@@ -393,7 +395,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
return;
vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
- native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
+ native_wrmsrq(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
}
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
@@ -1063,7 +1065,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
* provide that period, so a CPU could write host's record into
* guest's memory.
*/
- wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+ wrmsrq(MSR_IA32_PEBS_ENABLE, 0);
}
i = vmx_find_loadstore_msr_slot(&m->guest, msr);
@@ -1192,13 +1194,13 @@ static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
{
u32 i;
- wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
- wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
- wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
- wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
+ wrmsrq(MSR_IA32_RTIT_STATUS, ctx->status);
+ wrmsrq(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
+ wrmsrq(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
+ wrmsrq(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
for (i = 0; i < addr_range; i++) {
- wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
- wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
+ wrmsrq(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
+ wrmsrq(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
}
}
@@ -1206,13 +1208,13 @@ static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
{
u32 i;
- rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
- rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
- rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
- rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
+ rdmsrq(MSR_IA32_RTIT_STATUS, ctx->status);
+ rdmsrq(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
+ rdmsrq(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
+ rdmsrq(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
for (i = 0; i < addr_range; i++) {
- rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
- rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
+ rdmsrq(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
+ rdmsrq(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
}
}
@@ -1225,9 +1227,9 @@ static void pt_guest_enter(struct vcpu_vmx *vmx)
* GUEST_IA32_RTIT_CTL is already set in the VMCS.
* Save host state before VM entry.
*/
- rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
+ rdmsrq(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
- wrmsrl(MSR_IA32_RTIT_CTL, 0);
+ wrmsrq(MSR_IA32_RTIT_CTL, 0);
pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.num_address_ranges);
pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.num_address_ranges);
}
@@ -1248,7 +1250,7 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
* i.e. RTIT_CTL is always cleared on VM-Exit. Restore it if necessary.
*/
if (vmx->pt_desc.host.ctl)
- wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
+ wrmsrq(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
}
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
@@ -1338,7 +1340,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
}
- wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ wrmsrq(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#else
savesegment(fs, fs_sel);
savesegment(gs, gs_sel);
@@ -1362,7 +1364,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
++vmx->vcpu.stat.host_state_reload;
#ifdef CONFIG_X86_64
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ rdmsrq(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#endif
if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
kvm_load_ldt(host_state->ldt_sel);
@@ -1382,7 +1384,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
#endif
invalidate_tss_limit();
#ifdef CONFIG_X86_64
- wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ wrmsrq(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
load_fixmap_gdt(raw_smp_processor_id());
vmx->guest_state_loaded = false;
@@ -1394,7 +1396,7 @@ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
{
preempt_disable();
if (vmx->guest_state_loaded)
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ rdmsrq(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
preempt_enable();
return vmx->msr_guest_kernel_gs_base;
}
@@ -1403,7 +1405,7 @@ static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
{
preempt_disable();
if (vmx->guest_state_loaded)
- wrmsrl(MSR_KERNEL_GS_BASE, data);
+ wrmsrq(MSR_KERNEL_GS_BASE, data);
preempt_enable();
vmx->msr_guest_kernel_gs_base = data;
}
@@ -2574,7 +2576,7 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
{
u64 allowed;
- rdmsrl(msr, allowed);
+ rdmsrq(msr, allowed);
return ctl_opt & allowed;
}
@@ -2746,7 +2748,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
break;
}
- rdmsrl(MSR_IA32_VMX_BASIC, basic_msr);
+ rdmsrq(MSR_IA32_VMX_BASIC, basic_msr);
/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
if (vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE)
@@ -2766,7 +2768,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
if (vmx_basic_vmcs_mem_type(basic_msr) != X86_MEMTYPE_WB)
return -EIO;
- rdmsrl(MSR_IA32_VMX_MISC, misc_msr);
+ rdmsrq(MSR_IA32_VMX_MISC, misc_msr);
vmcs_conf->basic = basic_msr;
vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
@@ -2850,7 +2852,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer)
fault:
WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
- rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
+ rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
cr4_clear_bits(X86_CR4_VMXE);
return -EFAULT;
@@ -4391,7 +4393,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32))
vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
- rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
+ rdmsrq(MSR_IA32_SYSENTER_EIP, tmpl);
vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
@@ -6745,7 +6747,7 @@ static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
vcpu->stat.l1d_flush++;
if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
- native_wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+ native_wrmsrq(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
return;
}
@@ -7052,7 +7054,7 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
* the #NM exception.
*/
if (is_xfd_nm_fault(vcpu))
- rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
+ rdmsrq(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
}
static void handle_exception_irqoff(struct kvm_vcpu *vcpu, u32 intr_info)
@@ -7307,7 +7309,7 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
return;
if (flags & VMX_RUN_SAVE_SPEC_CTRL)
- vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
+ vmx->spec_ctrl = native_rdmsrq(MSR_IA32_SPEC_CTRL);
/*
* If the guest/host SPEC_CTRL values differ, restore the host value.
@@ -7318,7 +7320,7 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
*/
if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
vmx->spec_ctrl != hostval)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
+ native_wrmsrq(MSR_IA32_SPEC_CTRL, hostval);
barrier_nospec();
}
@@ -7358,10 +7360,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
* mitigation for MDS is done late in VMentry and is still
* executed in spite of L1D Flush. This is because an extra VERW
* should not matter much after the big hammer L1D Flush.
+ *
+ * cpu_buf_vm_clear is used when system is not vulnerable to MDS/TAA,
+ * and is affected by MMIO Stale Data. In such cases mitigation in only
+ * needed against an MMIO capable guest.
*/
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
- else if (static_branch_unlikely(&mmio_stale_data_clear) &&
+ else if (static_branch_unlikely(&cpu_buf_vm_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
mds_clear_cpu_buffers();
@@ -7700,6 +7706,7 @@ int vmx_vm_init(struct kvm *kvm)
case L1TF_MITIGATION_FLUSH_NOWARN:
/* 'I explicitly don't care' is set */
break;
+ case L1TF_MITIGATION_AUTO:
case L1TF_MITIGATION_FLUSH:
case L1TF_MITIGATION_FLUSH_NOSMT:
case L1TF_MITIGATION_FULL:
@@ -7959,7 +7966,7 @@ static __init u64 vmx_get_perf_capabilities(void)
return 0;
if (boot_cpu_has(X86_FEATURE_PDCM))
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
+ rdmsrq(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
x86_perf_get_lbr(&vmx_lbr_caps);
@@ -8508,7 +8515,7 @@ __init int vmx_hardware_setup(void)
kvm_enable_efer_bits(EFER_NX);
if (boot_cpu_has(X86_FEATURE_MPX)) {
- rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
+ rdmsrq(MSR_IA32_BNDCFGS, host_bndcfgs);
WARN_ONCE(host_bndcfgs, "BNDCFGS in host will be lost");
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9896fd574bfc..5bdb5b854924 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -578,7 +578,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
values = &msrs->values[slot];
if (values->host != values->curr) {
- wrmsrl(kvm_uret_msrs_list[slot], values->host);
+ wrmsrq(kvm_uret_msrs_list[slot], values->host);
values->curr = values->host;
}
}
@@ -590,10 +590,10 @@ static int kvm_probe_user_return_msr(u32 msr)
int ret;
preempt_disable();
- ret = rdmsrl_safe(msr, &val);
+ ret = rdmsrq_safe(msr, &val);
if (ret)
goto out;
- ret = wrmsrl_safe(msr, val);
+ ret = wrmsrq_safe(msr, val);
out:
preempt_enable();
return ret;
@@ -630,7 +630,7 @@ static void kvm_user_return_msr_cpu_online(void)
int i;
for (i = 0; i < kvm_nr_uret_msrs; ++i) {
- rdmsrl_safe(kvm_uret_msrs_list[i], &value);
+ rdmsrq_safe(kvm_uret_msrs_list[i], &value);
msrs->values[i].host = value;
msrs->values[i].curr = value;
}
@@ -644,7 +644,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
value = (value & mask) | (msrs->values[slot].host & ~mask);
if (value == msrs->values[slot].curr)
return 0;
- err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
+ err = wrmsrq_safe(kvm_uret_msrs_list[slot], value);
if (err)
return 1;
@@ -1174,7 +1174,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) &&
vcpu->arch.ia32_xss != kvm_host.xss)
- wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
+ wrmsrq(MSR_IA32_XSS, vcpu->arch.ia32_xss);
}
if (cpu_feature_enabled(X86_FEATURE_PKU) &&
@@ -1205,7 +1205,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) &&
vcpu->arch.ia32_xss != kvm_host.xss)
- wrmsrl(MSR_IA32_XSS, kvm_host.xss);
+ wrmsrq(MSR_IA32_XSS, kvm_host.xss);
}
}
@@ -1584,7 +1584,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO | ARCH_CAP_ITS_NO)
static u64 kvm_get_arch_capabilities(void)
{
@@ -1618,6 +1618,8 @@ static u64 kvm_get_arch_capabilities(void)
data |= ARCH_CAP_MDS_NO;
if (!boot_cpu_has_bug(X86_BUG_RFDS))
data |= ARCH_CAP_RFDS_NO;
+ if (!boot_cpu_has_bug(X86_BUG_ITS))
+ data |= ARCH_CAP_ITS_NO;
if (!boot_cpu_has(X86_FEATURE_RTM)) {
/*
@@ -1660,7 +1662,7 @@ static int kvm_get_feature_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
*data = MSR_PLATFORM_INFO_CPUID_FAULT;
break;
case MSR_IA32_UCODE_REV:
- rdmsrl_safe(index, data);
+ rdmsrq_safe(index, data);
break;
default:
return kvm_x86_call(get_feature_msr)(index, data);
@@ -3827,7 +3829,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!data)
break;
- wrmsrl(MSR_IA32_PRED_CMD, data);
+ wrmsrq(MSR_IA32_PRED_CMD, data);
break;
}
case MSR_IA32_FLUSH_CMD:
@@ -3840,7 +3842,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!data)
break;
- wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+ wrmsrq(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
break;
case MSR_EFER:
return set_efer(vcpu, msr_info);
@@ -9736,7 +9738,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
* with an exception. PAT[0] is set to WB on RESET and also by the
* kernel, i.e. failure indicates a kernel bug or broken firmware.
*/
- if (rdmsrl_safe(MSR_IA32_CR_PAT, &host_pat) ||
+ if (rdmsrq_safe(MSR_IA32_CR_PAT, &host_pat) ||
(host_pat & GENMASK(2, 0)) != 6) {
pr_err("host PAT[0] is not WB\n");
return -EIO;
@@ -9770,15 +9772,15 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0;
}
- rdmsrl_safe(MSR_EFER, &kvm_host.efer);
+ rdmsrq_safe(MSR_EFER, &kvm_host.efer);
if (boot_cpu_has(X86_FEATURE_XSAVES))
- rdmsrl(MSR_IA32_XSS, kvm_host.xss);
+ rdmsrq(MSR_IA32_XSS, kvm_host.xss);
kvm_init_pmu_capability(ops->pmu_ops);
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, kvm_host.arch_capabilities);
+ rdmsrq(MSR_IA32_ARCH_CAPABILITIES, kvm_host.arch_capabilities);
r = ops->hardware_setup();
if (r != 0)
@@ -10974,7 +10976,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
switch_fpu_return();
if (vcpu->arch.guest_fpu.xfd_err)
- wrmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
+ wrmsrq(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
@@ -11060,7 +11062,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_call(handle_exit_irqoff)(vcpu);
if (vcpu->arch.guest_fpu.xfd_err)
- wrmsrl(MSR_IA32_XFD_ERR, 0);
+ wrmsrq(MSR_IA32_XFD_ERR, 0);
/*
* Consume any pending interrupts, including the possible source of
@@ -13666,12 +13668,12 @@ int kvm_spec_ctrl_test_value(u64 value)
local_irq_save(flags);
- if (rdmsrl_safe(MSR_IA32_SPEC_CTRL, &saved_value))
+ if (rdmsrq_safe(MSR_IA32_SPEC_CTRL, &saved_value))
ret = 1;
- else if (wrmsrl_safe(MSR_IA32_SPEC_CTRL, value))
+ else if (wrmsrq_safe(MSR_IA32_SPEC_CTRL, value))
ret = 1;
else
- wrmsrl(MSR_IA32_SPEC_CTRL, saved_value);
+ wrmsrq(MSR_IA32_SPEC_CTRL, saved_value);
local_irq_restore(flags);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 1c50352eb49f..4fa5c4e1ba8a 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -3,6 +3,8 @@
# Makefile for x86 specific library files.
#
+obj-y += crypto/
+
# Produces uninteresting flaky coverage.
KCOV_INSTRUMENT_delay.o := n
@@ -39,14 +41,14 @@ lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_MITIGATION_RETPOLINE) += retpoline.o
obj-$(CONFIG_CRC32_ARCH) += crc32-x86.o
-crc32-x86-y := crc32-glue.o crc32-pclmul.o
+crc32-x86-y := crc32.o crc32-pclmul.o
crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o
obj-$(CONFIG_CRC64_ARCH) += crc64-x86.o
-crc64-x86-y := crc64-glue.o crc64-pclmul.o
+crc64-x86-y := crc64.o crc64-pclmul.o
obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-x86.o
-crc-t10dif-x86-y := crc-t10dif-glue.o crc16-msb-pclmul.o
+crc-t10dif-x86-y := crc-t10dif.o crc16-msb-pclmul.o
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
obj-y += iomem.o
diff --git a/arch/x86/lib/crc-t10dif-glue.c b/arch/x86/lib/crc-t10dif.c
index f89c335cde3c..db7ce59c31ac 100644
--- a/arch/x86/lib/crc-t10dif-glue.c
+++ b/arch/x86/lib/crc-t10dif.c
@@ -9,7 +9,7 @@
#include <linux/module.h>
#include "crc-pclmul-template.h"
-static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
DECLARE_CRC_PCLMUL_FUNCS(crc16_msb, u16);
@@ -29,7 +29,7 @@ static int __init crc_t10dif_x86_init(void)
}
return 0;
}
-arch_initcall(crc_t10dif_x86_init);
+subsys_initcall(crc_t10dif_x86_init);
static void __exit crc_t10dif_x86_exit(void)
{
diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32.c
index e3f93b17ac3f..d09343e2cea9 100644
--- a/arch/x86/lib/crc32-glue.c
+++ b/arch/x86/lib/crc32.c
@@ -11,8 +11,8 @@
#include <linux/module.h>
#include "crc-pclmul-template.h"
-static DEFINE_STATIC_KEY_FALSE(have_crc32);
-static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
@@ -88,7 +88,7 @@ static int __init crc32_x86_init(void)
}
return 0;
}
-arch_initcall(crc32_x86_init);
+subsys_initcall(crc32_x86_init);
static void __exit crc32_x86_exit(void)
{
diff --git a/arch/x86/lib/crc64-glue.c b/arch/x86/lib/crc64.c
index b0e1b719ecbf..351a09f5813e 100644
--- a/arch/x86/lib/crc64-glue.c
+++ b/arch/x86/lib/crc64.c
@@ -9,7 +9,7 @@
#include <linux/module.h>
#include "crc-pclmul-template.h"
-static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
DECLARE_CRC_PCLMUL_FUNCS(crc64_msb, u64);
DECLARE_CRC_PCLMUL_FUNCS(crc64_lsb, u64);
@@ -39,7 +39,7 @@ static int __init crc64_x86_init(void)
}
return 0;
}
-arch_initcall(crc64_x86_init);
+subsys_initcall(crc64_x86_init);
static void __exit crc64_x86_exit(void)
{
diff --git a/arch/x86/lib/crypto/.gitignore b/arch/x86/lib/crypto/.gitignore
new file mode 100644
index 000000000000..580c839bb177
--- /dev/null
+++ b/arch/x86/lib/crypto/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+poly1305-x86_64-cryptogams.S
diff --git a/arch/x86/lib/crypto/Kconfig b/arch/x86/lib/crypto/Kconfig
new file mode 100644
index 000000000000..5e94cdee492c
--- /dev/null
+++ b/arch/x86/lib/crypto/Kconfig
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_BLAKE2S_X86
+ bool "Hash functions: BLAKE2s (SSSE3/AVX-512)"
+ depends on 64BIT
+ select CRYPTO_LIB_BLAKE2S_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+ help
+ BLAKE2s cryptographic hash function (RFC 7693)
+
+ Architecture: x86_64 using:
+ - SSSE3 (Supplemental SSE3)
+ - AVX-512 (Advanced Vector Extensions-512)
+
+config CRYPTO_CHACHA20_X86_64
+ tristate
+ depends on 64BIT
+ default CRYPTO_LIB_CHACHA
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_POLY1305_X86_64
+ tristate
+ depends on 64BIT
+ default CRYPTO_LIB_POLY1305
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
+
+config CRYPTO_SHA256_X86_64
+ tristate
+ depends on 64BIT
+ default CRYPTO_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
+ select CRYPTO_LIB_SHA256_GENERIC
diff --git a/arch/x86/lib/crypto/Makefile b/arch/x86/lib/crypto/Makefile
new file mode 100644
index 000000000000..abceca3d31c0
--- /dev/null
+++ b/arch/x86/lib/crypto/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o
+libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o
+
+obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o
+chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha-avx512vl-x86_64.o chacha_glue.o
+
+obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
+poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o
+targets += poly1305-x86_64-cryptogams.S
+
+obj-$(CONFIG_CRYPTO_SHA256_X86_64) += sha256-x86_64.o
+sha256-x86_64-y := sha256.o sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256-ni-asm.o
+
+quiet_cmd_perlasm = PERLASM $@
+ cmd_perlasm = $(PERL) $< > $@
+
+$(obj)/%.S: $(src)/%.pl FORCE
+ $(call if_changed,perlasm)
diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/lib/crypto/blake2s-core.S
index b50b35ff1fdb..ac1c845445a4 100644
--- a/arch/x86/crypto/blake2s-core.S
+++ b/arch/x86/lib/crypto/blake2s-core.S
@@ -29,7 +29,6 @@ SIGMA:
.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6
.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4
.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12
-#ifdef CONFIG_AS_AVX512
.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
.align 64
SIGMA2:
@@ -43,7 +42,6 @@ SIGMA2:
.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10
.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14
.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9
-#endif /* CONFIG_AS_AVX512 */
.text
SYM_FUNC_START(blake2s_compress_ssse3)
@@ -174,7 +172,6 @@ SYM_FUNC_START(blake2s_compress_ssse3)
RET
SYM_FUNC_END(blake2s_compress_ssse3)
-#ifdef CONFIG_AS_AVX512
SYM_FUNC_START(blake2s_compress_avx512)
vmovdqu (%rdi),%xmm0
vmovdqu 0x10(%rdi),%xmm1
@@ -253,4 +250,3 @@ SYM_FUNC_START(blake2s_compress_avx512)
vzeroupper
RET
SYM_FUNC_END(blake2s_compress_avx512)
-#endif /* CONFIG_AS_AVX512 */
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/lib/crypto/blake2s-glue.c
index 0313f9673f56..adc296cd17c9 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/lib/crypto/blake2s-glue.c
@@ -3,17 +3,15 @@
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
-#include <crypto/internal/blake2s.h>
-
-#include <linux/types.h>
-#include <linux/jump_label.h>
-#include <linux/kernel.h>
-#include <linux/sizes.h>
-
#include <asm/cpufeature.h>
#include <asm/fpu/api.h>
#include <asm/processor.h>
#include <asm/simd.h>
+#include <crypto/internal/blake2s.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/sizes.h>
asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
const u8 *block, const size_t nblocks,
@@ -41,8 +39,7 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
SZ_4K / BLAKE2S_BLOCK_SIZE);
kernel_fpu_begin();
- if (IS_ENABLED(CONFIG_AS_AVX512) &&
- static_branch_likely(&blake2s_use_avx512))
+ if (static_branch_likely(&blake2s_use_avx512))
blake2s_compress_avx512(state, block, blocks, inc);
else
blake2s_compress_ssse3(state, block, blocks, inc);
@@ -59,8 +56,7 @@ static int __init blake2s_mod_init(void)
if (boot_cpu_has(X86_FEATURE_SSSE3))
static_branch_enable(&blake2s_use_ssse3);
- if (IS_ENABLED(CONFIG_AS_AVX512) &&
- boot_cpu_has(X86_FEATURE_AVX) &&
+ if (boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512VL) &&
diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/lib/crypto/chacha-avx2-x86_64.S
index f3d8fc018249..f3d8fc018249 100644
--- a/arch/x86/crypto/chacha-avx2-x86_64.S
+++ b/arch/x86/lib/crypto/chacha-avx2-x86_64.S
diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/lib/crypto/chacha-avx512vl-x86_64.S
index 259383e1ad44..259383e1ad44 100644
--- a/arch/x86/crypto/chacha-avx512vl-x86_64.S
+++ b/arch/x86/lib/crypto/chacha-avx512vl-x86_64.S
diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/lib/crypto/chacha-ssse3-x86_64.S
index 7111949cd5b9..7111949cd5b9 100644
--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
+++ b/arch/x86/lib/crypto/chacha-ssse3-x86_64.S
diff --git a/arch/x86/lib/crypto/chacha_glue.c b/arch/x86/lib/crypto/chacha_glue.c
new file mode 100644
index 000000000000..10b2c945f541
--- /dev/null
+++ b/arch/x86/lib/crypto/chacha_glue.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ChaCha and HChaCha functions (x86_64 optimized)
+ *
+ * Copyright (C) 2015 Martin Willi
+ */
+
+#include <asm/simd.h>
+#include <crypto/chacha.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+
+asmlinkage void chacha_block_xor_ssse3(const struct chacha_state *state,
+ u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_4block_xor_ssse3(const struct chacha_state *state,
+ u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void hchacha_block_ssse3(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds);
+
+asmlinkage void chacha_2block_xor_avx2(const struct chacha_state *state,
+ u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_4block_xor_avx2(const struct chacha_state *state,
+ u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_8block_xor_avx2(const struct chacha_state *state,
+ u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+
+asmlinkage void chacha_2block_xor_avx512vl(const struct chacha_state *state,
+ u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_4block_xor_avx512vl(const struct chacha_state *state,
+ u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_8block_xor_avx512vl(const struct chacha_state *state,
+ u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
+
+static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
+{
+ len = min(len, maxblocks * CHACHA_BLOCK_SIZE);
+ return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
+}
+
+static void chacha_dosimd(struct chacha_state *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ if (static_branch_likely(&chacha_use_avx512vl)) {
+ while (bytes >= CHACHA_BLOCK_SIZE * 8) {
+ chacha_8block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 8;
+ src += CHACHA_BLOCK_SIZE * 8;
+ dst += CHACHA_BLOCK_SIZE * 8;
+ state->x[12] += 8;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE * 4) {
+ chacha_8block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+ state->x[12] += chacha_advance(bytes, 8);
+ return;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE * 2) {
+ chacha_4block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+ state->x[12] += chacha_advance(bytes, 4);
+ return;
+ }
+ if (bytes) {
+ chacha_2block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+ state->x[12] += chacha_advance(bytes, 2);
+ return;
+ }
+ }
+
+ if (static_branch_likely(&chacha_use_avx2)) {
+ while (bytes >= CHACHA_BLOCK_SIZE * 8) {
+ chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 8;
+ src += CHACHA_BLOCK_SIZE * 8;
+ dst += CHACHA_BLOCK_SIZE * 8;
+ state->x[12] += 8;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE * 4) {
+ chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
+ state->x[12] += chacha_advance(bytes, 8);
+ return;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE * 2) {
+ chacha_4block_xor_avx2(state, dst, src, bytes, nrounds);
+ state->x[12] += chacha_advance(bytes, 4);
+ return;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE) {
+ chacha_2block_xor_avx2(state, dst, src, bytes, nrounds);
+ state->x[12] += chacha_advance(bytes, 2);
+ return;
+ }
+ }
+
+ while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+ chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 4;
+ src += CHACHA_BLOCK_SIZE * 4;
+ dst += CHACHA_BLOCK_SIZE * 4;
+ state->x[12] += 4;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE) {
+ chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
+ state->x[12] += chacha_advance(bytes, 4);
+ return;
+ }
+ if (bytes) {
+ chacha_block_xor_ssse3(state, dst, src, bytes, nrounds);
+ state->x[12]++;
+ }
+}
+
+void hchacha_block_arch(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds)
+{
+ if (!static_branch_likely(&chacha_use_simd)) {
+ hchacha_block_generic(state, out, nrounds);
+ } else {
+ kernel_fpu_begin();
+ hchacha_block_ssse3(state, out, nrounds);
+ kernel_fpu_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ if (!static_branch_likely(&chacha_use_simd) ||
+ bytes <= CHACHA_BLOCK_SIZE)
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ kernel_fpu_begin();
+ chacha_dosimd(state, dst, src, todo, nrounds);
+ kernel_fpu_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+bool chacha_is_arch_optimized(void)
+{
+ return static_key_enabled(&chacha_use_simd);
+}
+EXPORT_SYMBOL(chacha_is_arch_optimized);
+
+static int __init chacha_simd_mod_init(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_SSSE3))
+ return 0;
+
+ static_branch_enable(&chacha_use_simd);
+
+ if (boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
+ static_branch_enable(&chacha_use_avx2);
+
+ if (boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
+ static_branch_enable(&chacha_use_avx512vl);
+ }
+ return 0;
+}
+subsys_initcall(chacha_simd_mod_init);
+
+static void __exit chacha_simd_mod_exit(void)
+{
+}
+module_exit(chacha_simd_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("ChaCha and HChaCha functions (x86_64 optimized)");
diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/lib/crypto/poly1305-x86_64-cryptogams.pl
index b9abcd79c1f4..501827254fed 100644
--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+++ b/arch/x86/lib/crypto/poly1305-x86_64-cryptogams.pl
@@ -118,6 +118,19 @@ sub declare_function() {
}
}
+sub declare_typed_function() {
+ my ($name, $align, $nargs) = @_;
+ if($kernel) {
+ $code .= "SYM_TYPED_FUNC_START($name)\n";
+ $code .= ".L$name:\n";
+ } else {
+ $code .= ".globl $name\n";
+ $code .= ".type $name,\@function,$nargs\n";
+ $code .= ".align $align\n";
+ $code .= "$name:\n";
+ }
+}
+
sub end_function() {
my ($name) = @_;
if($kernel) {
@@ -128,7 +141,7 @@ sub end_function() {
}
$code.=<<___ if $kernel;
-#include <linux/linkage.h>
+#include <linux/cfi_types.h>
___
if ($avx) {
@@ -236,14 +249,14 @@ ___
$code.=<<___ if (!$kernel);
.extern OPENSSL_ia32cap_P
-.globl poly1305_init_x86_64
-.hidden poly1305_init_x86_64
+.globl poly1305_block_init_arch
+.hidden poly1305_block_init_arch
.globl poly1305_blocks_x86_64
.hidden poly1305_blocks_x86_64
.globl poly1305_emit_x86_64
.hidden poly1305_emit_x86_64
___
-&declare_function("poly1305_init_x86_64", 32, 3);
+&declare_typed_function("poly1305_block_init_arch", 32, 3);
$code.=<<___;
xor %eax,%eax
mov %rax,0($ctx) # initialize hash value
@@ -298,7 +311,7 @@ $code.=<<___;
.Lno_key:
RET
___
-&end_function("poly1305_init_x86_64");
+&end_function("poly1305_block_init_arch");
&declare_function("poly1305_blocks_x86_64", 32, 4);
$code.=<<___;
@@ -2811,18 +2824,10 @@ if ($avx>2) {
# reason stack layout is kept identical to poly1305_blocks_avx2. If not
# for this tail, we wouldn't have to even allocate stack frame...
-if($kernel) {
- $code .= "#ifdef CONFIG_AS_AVX512\n";
-}
-
&declare_function("poly1305_blocks_avx512", 32, 4);
poly1305_blocks_avxN(1);
&end_function("poly1305_blocks_avx512");
-if ($kernel) {
- $code .= "#endif\n";
-}
-
if (!$kernel && $avx>3) {
########################################################################
# VPMADD52 version using 2^44 radix.
@@ -4113,9 +4118,9 @@ avx_handler:
.section .pdata
.align 4
- .rva .LSEH_begin_poly1305_init_x86_64
- .rva .LSEH_end_poly1305_init_x86_64
- .rva .LSEH_info_poly1305_init_x86_64
+ .rva .LSEH_begin_poly1305_block_init_arch
+ .rva .LSEH_end_poly1305_block_init_arch
+ .rva .LSEH_info_poly1305_block_init_arch
.rva .LSEH_begin_poly1305_blocks_x86_64
.rva .LSEH_end_poly1305_blocks_x86_64
@@ -4163,10 +4168,10 @@ ___
$code.=<<___;
.section .xdata
.align 8
-.LSEH_info_poly1305_init_x86_64:
+.LSEH_info_poly1305_block_init_arch:
.byte 9,0,0,0
.rva se_handler
- .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64
+ .rva .LSEH_begin_poly1305_block_init_arch,.LSEH_begin_poly1305_block_init_arch
.LSEH_info_poly1305_blocks_x86_64:
.byte 9,0,0,0
diff --git a/arch/x86/lib/crypto/poly1305_glue.c b/arch/x86/lib/crypto/poly1305_glue.c
new file mode 100644
index 000000000000..b7e78a583e07
--- /dev/null
+++ b/arch/x86/lib/crypto/poly1305_glue.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <asm/cpu_device_id.h>
+#include <asm/fpu/api.h>
+#include <crypto/internal/poly1305.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <linux/unaligned.h>
+
+struct poly1305_arch_internal {
+ union {
+ struct {
+ u32 h[5];
+ u32 is_base2_26;
+ };
+ u64 hs[3];
+ };
+ u64 r[2];
+ u64 pad;
+ struct { u32 r2, r1, r4, r3; } rn[9];
+};
+
+asmlinkage void poly1305_block_init_arch(
+ struct poly1305_block_state *state,
+ const u8 raw_key[POLY1305_BLOCK_SIZE]);
+EXPORT_SYMBOL_GPL(poly1305_block_init_arch);
+asmlinkage void poly1305_blocks_x86_64(struct poly1305_arch_internal *ctx,
+ const u8 *inp,
+ const size_t len, const u32 padbit);
+asmlinkage void poly1305_emit_x86_64(const struct poly1305_state *ctx,
+ u8 mac[POLY1305_DIGEST_SIZE],
+ const u32 nonce[4]);
+asmlinkage void poly1305_emit_avx(const struct poly1305_state *ctx,
+ u8 mac[POLY1305_DIGEST_SIZE],
+ const u32 nonce[4]);
+asmlinkage void poly1305_blocks_avx(struct poly1305_arch_internal *ctx,
+ const u8 *inp, const size_t len,
+ const u32 padbit);
+asmlinkage void poly1305_blocks_avx2(struct poly1305_arch_internal *ctx,
+ const u8 *inp, const size_t len,
+ const u32 padbit);
+asmlinkage void poly1305_blocks_avx512(struct poly1305_arch_internal *ctx,
+ const u8 *inp,
+ const size_t len, const u32 padbit);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512);
+
+void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp,
+ unsigned int len, u32 padbit)
+{
+ struct poly1305_arch_internal *ctx =
+ container_of(&state->h.h, struct poly1305_arch_internal, h);
+
+ /* SIMD disables preemption, so relax after processing each page. */
+ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
+ SZ_4K % POLY1305_BLOCK_SIZE);
+
+ if (!static_branch_likely(&poly1305_use_avx)) {
+ poly1305_blocks_x86_64(ctx, inp, len, padbit);
+ return;
+ }
+
+ do {
+ const unsigned int bytes = min(len, SZ_4K);
+
+ kernel_fpu_begin();
+ if (static_branch_likely(&poly1305_use_avx512))
+ poly1305_blocks_avx512(ctx, inp, bytes, padbit);
+ else if (static_branch_likely(&poly1305_use_avx2))
+ poly1305_blocks_avx2(ctx, inp, bytes, padbit);
+ else
+ poly1305_blocks_avx(ctx, inp, bytes, padbit);
+ kernel_fpu_end();
+
+ len -= bytes;
+ inp += bytes;
+ } while (len);
+}
+EXPORT_SYMBOL_GPL(poly1305_blocks_arch);
+
+void poly1305_emit_arch(const struct poly1305_state *ctx,
+ u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4])
+{
+ if (!static_branch_likely(&poly1305_use_avx))
+ poly1305_emit_x86_64(ctx, mac, nonce);
+ else
+ poly1305_emit_avx(ctx, mac, nonce);
+}
+EXPORT_SYMBOL_GPL(poly1305_emit_arch);
+
+bool poly1305_is_arch_optimized(void)
+{
+ return static_key_enabled(&poly1305_use_avx);
+}
+EXPORT_SYMBOL(poly1305_is_arch_optimized);
+
+static int __init poly1305_simd_mod_init(void)
+{
+ if (boot_cpu_has(X86_FEATURE_AVX) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
+ static_branch_enable(&poly1305_use_avx);
+ if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
+ static_branch_enable(&poly1305_use_avx2);
+ if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX512F) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) &&
+ /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */
+ boot_cpu_data.x86_vfm != INTEL_SKYLAKE_X)
+ static_branch_enable(&poly1305_use_avx512);
+ return 0;
+}
+subsys_initcall(poly1305_simd_mod_init);
+
+static void __exit poly1305_simd_mod_exit(void)
+{
+}
+module_exit(poly1305_simd_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+MODULE_DESCRIPTION("Poly1305 authenticator");
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/lib/crypto/sha256-avx-asm.S
index 53de72bdd851..0d7b2c3e45d9 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/lib/crypto/sha256-avx-asm.S
@@ -48,7 +48,7 @@
########################################################################
#include <linux/linkage.h>
-#include <linux/cfi_types.h>
+#include <linux/objtool.h>
## assume buffers not aligned
#define VMOVDQ vmovdqu
@@ -341,13 +341,13 @@ a = TMP_
.endm
########################################################################
-## void sha256_transform_avx(state sha256_state *state, const u8 *data, int blocks)
-## arg 1 : pointer to state
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
+## void sha256_transform_avx(u32 state[SHA256_STATE_WORDS],
+## const u8 *data, size_t nblocks);
########################################################################
.text
-SYM_TYPED_FUNC_START(sha256_transform_avx)
+SYM_FUNC_START(sha256_transform_avx)
+ ANNOTATE_NOENDBR # since this is called only via static_call
+
pushq %rbx
pushq %r12
pushq %r13
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/lib/crypto/sha256-avx2-asm.S
index 0bbec1c75cd0..25d3380321ec 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/lib/crypto/sha256-avx2-asm.S
@@ -49,7 +49,7 @@
########################################################################
#include <linux/linkage.h>
-#include <linux/cfi_types.h>
+#include <linux/objtool.h>
## assume buffers not aligned
#define VMOVDQ vmovdqu
@@ -518,13 +518,13 @@ STACK_SIZE = _CTX + _CTX_SIZE
.endm
########################################################################
-## void sha256_transform_rorx(struct sha256_state *state, const u8 *data, int blocks)
-## arg 1 : pointer to state
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
+## void sha256_transform_rorx(u32 state[SHA256_STATE_WORDS],
+## const u8 *data, size_t nblocks);
########################################################################
.text
-SYM_TYPED_FUNC_START(sha256_transform_rorx)
+SYM_FUNC_START(sha256_transform_rorx)
+ ANNOTATE_NOENDBR # since this is called only via static_call
+
pushq %rbx
pushq %r12
pushq %r13
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/lib/crypto/sha256-ni-asm.S
index d515a55a3bc1..d3548206cf3d 100644
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ b/arch/x86/lib/crypto/sha256-ni-asm.S
@@ -54,9 +54,9 @@
*/
#include <linux/linkage.h>
-#include <linux/cfi_types.h>
+#include <linux/objtool.h>
-#define DIGEST_PTR %rdi /* 1st arg */
+#define STATE_PTR %rdi /* 1st arg */
#define DATA_PTR %rsi /* 2nd arg */
#define NUM_BLKS %rdx /* 3rd arg */
@@ -98,24 +98,20 @@
.endm
/*
- * Intel SHA Extensions optimized implementation of a SHA-256 update function
+ * Intel SHA Extensions optimized implementation of a SHA-256 block function
*
- * The function takes a pointer to the current hash values, a pointer to the
- * input data, and a number of 64 byte blocks to process. Once all blocks have
- * been processed, the digest pointer is updated with the resulting hash value.
- * The function only processes complete blocks, there is no functionality to
- * store partial blocks. All message padding and hash value initialization must
- * be done outside the update function.
+ * This function takes a pointer to the current SHA-256 state, a pointer to the
+ * input data, and the number of 64-byte blocks to process. Once all blocks
+ * have been processed, the state is updated with the new state. This function
+ * only processes complete blocks. State initialization, buffering of partial
+ * blocks, and digest finalization is expected to be handled elsewhere.
*
- * void sha256_ni_transform(uint32_t *digest, const void *data,
- uint32_t numBlocks);
- * digest : pointer to digest
- * data: pointer to input data
- * numBlocks: Number of blocks to process
+ * void sha256_ni_transform(u32 state[SHA256_STATE_WORDS],
+ * const u8 *data, size_t nblocks);
*/
-
.text
-SYM_TYPED_FUNC_START(sha256_ni_transform)
+SYM_FUNC_START(sha256_ni_transform)
+ ANNOTATE_NOENDBR # since this is called only via static_call
shl $6, NUM_BLKS /* convert to bytes */
jz .Ldone_hash
@@ -126,8 +122,8 @@ SYM_TYPED_FUNC_START(sha256_ni_transform)
* Need to reorder these appropriately
* DCBA, HGFE -> ABEF, CDGH
*/
- movdqu 0*16(DIGEST_PTR), STATE0 /* DCBA */
- movdqu 1*16(DIGEST_PTR), STATE1 /* HGFE */
+ movdqu 0*16(STATE_PTR), STATE0 /* DCBA */
+ movdqu 1*16(STATE_PTR), STATE1 /* HGFE */
movdqa STATE0, TMP
punpcklqdq STATE1, STATE0 /* FEBA */
@@ -166,8 +162,8 @@ SYM_TYPED_FUNC_START(sha256_ni_transform)
pshufd $0xB1, STATE0, STATE0 /* HGFE */
pshufd $0x1B, STATE1, STATE1 /* DCBA */
- movdqu STATE1, 0*16(DIGEST_PTR)
- movdqu STATE0, 1*16(DIGEST_PTR)
+ movdqu STATE1, 0*16(STATE_PTR)
+ movdqu STATE0, 1*16(STATE_PTR)
.Ldone_hash:
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/lib/crypto/sha256-ssse3-asm.S
index 93264ee44543..7f24a4cdcb25 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/lib/crypto/sha256-ssse3-asm.S
@@ -47,7 +47,7 @@
########################################################################
#include <linux/linkage.h>
-#include <linux/cfi_types.h>
+#include <linux/objtool.h>
## assume buffers not aligned
#define MOVDQ movdqu
@@ -348,15 +348,13 @@ a = TMP_
.endm
########################################################################
-## void sha256_transform_ssse3(struct sha256_state *state, const u8 *data,
-## int blocks);
-## arg 1 : pointer to state
-## (struct sha256_state is assumed to begin with u32 state[8])
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
+## void sha256_transform_ssse3(u32 state[SHA256_STATE_WORDS],
+## const u8 *data, size_t nblocks);
########################################################################
.text
-SYM_TYPED_FUNC_START(sha256_transform_ssse3)
+SYM_FUNC_START(sha256_transform_ssse3)
+ ANNOTATE_NOENDBR # since this is called only via static_call
+
pushq %rbx
pushq %r12
pushq %r13
diff --git a/arch/x86/lib/crypto/sha256.c b/arch/x86/lib/crypto/sha256.c
new file mode 100644
index 000000000000..80380f8fdcee
--- /dev/null
+++ b/arch/x86/lib/crypto/sha256.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256 optimized for x86_64
+ *
+ * Copyright 2025 Google LLC
+ */
+#include <asm/fpu/api.h>
+#include <crypto/internal/sha2.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/static_call.h>
+
+asmlinkage void sha256_transform_ssse3(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+asmlinkage void sha256_transform_avx(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+asmlinkage void sha256_transform_rorx(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+asmlinkage void sha256_ni_transform(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha256_x86);
+
+DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_transform_ssse3);
+
+void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ if (static_branch_likely(&have_sha256_x86)) {
+ kernel_fpu_begin();
+ static_call(sha256_blocks_x86)(state, data, nblocks);
+ kernel_fpu_end();
+ } else {
+ sha256_blocks_generic(state, data, nblocks);
+ }
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_simd);
+
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ sha256_blocks_generic(state, data, nblocks);
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
+
+bool sha256_is_arch_optimized(void)
+{
+ return static_key_enabled(&have_sha256_x86);
+}
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
+
+static int __init sha256_x86_mod_init(void)
+{
+ if (boot_cpu_has(X86_FEATURE_SHA_NI)) {
+ static_call_update(sha256_blocks_x86, sha256_ni_transform);
+ } else if (cpu_has_xfeatures(XFEATURE_MASK_SSE |
+ XFEATURE_MASK_YMM, NULL) &&
+ boot_cpu_has(X86_FEATURE_AVX)) {
+ if (boot_cpu_has(X86_FEATURE_AVX2) &&
+ boot_cpu_has(X86_FEATURE_BMI2))
+ static_call_update(sha256_blocks_x86,
+ sha256_transform_rorx);
+ else
+ static_call_update(sha256_blocks_x86,
+ sha256_transform_avx);
+ } else if (!boot_cpu_has(X86_FEATURE_SSSE3)) {
+ return 0;
+ }
+ static_branch_enable(&have_sha256_x86);
+ return 0;
+}
+subsys_initcall(sha256_x86_mod_init);
+
+static void __exit sha256_x86_mod_exit(void)
+{
+}
+module_exit(sha256_x86_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-256 optimized for x86_64");
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index e86eda2c0b04..eb2d2e1cbddd 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -75,7 +75,7 @@ static void delay_tsc(u64 cycles)
/* Allow RT tasks to run */
preempt_enable();
- rep_nop();
+ native_pause();
preempt_disable();
/*
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 98631c0e7a11..4e385cbfd444 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -13,6 +13,7 @@
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include <asm/ldt.h>
+#include <asm/msr.h>
#include <asm/vm86.h>
#undef pr_fmt
@@ -631,14 +632,21 @@ static bool get_desc(struct desc_struct *out, unsigned short sel)
/* Bits [15:3] contain the index of the desired entry. */
sel >>= 3;
- mutex_lock(&current->active_mm->context.lock);
- ldt = current->active_mm->context.ldt;
+ /*
+ * If we're not in a valid context with a real (not just lazy)
+ * user mm, then don't even try.
+ */
+ if (!nmi_uaccess_okay())
+ return false;
+
+ mutex_lock(&current->mm->context.lock);
+ ldt = current->mm->context.ldt;
if (ldt && sel < ldt->nr_entries) {
*out = ldt->entries[sel];
success = true;
}
- mutex_unlock(&current->active_mm->context.lock);
+ mutex_unlock(&current->mm->context.lock);
return success;
}
@@ -702,16 +710,16 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
unsigned long base;
if (seg_reg_idx == INAT_SEG_REG_FS) {
- rdmsrl(MSR_FS_BASE, base);
+ rdmsrq(MSR_FS_BASE, base);
} else if (seg_reg_idx == INAT_SEG_REG_GS) {
/*
* swapgs was called at the kernel entry point. Thus,
* MSR_KERNEL_GS_BASE will have the user-space GS base.
*/
if (user_mode(regs))
- rdmsrl(MSR_KERNEL_GS_BASE, base);
+ rdmsrq(MSR_KERNEL_GS_BASE, base);
else
- rdmsrl(MSR_GS_BASE, base);
+ rdmsrq(MSR_GS_BASE, base);
} else {
base = 0;
}
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 6ffb931b9fb1..149a57e334ab 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -324,6 +324,11 @@ int insn_get_opcode(struct insn *insn)
}
insn->attr = inat_get_opcode_attribute(op);
+ if (insn->x86_64 && inat_is_invalid64(insn->attr)) {
+ /* This instruction is invalid, like UD2. Stop decoding. */
+ insn->attr &= INAT_INV64;
+ }
+
while (inat_is_escape(insn->attr)) {
/* Get escaped opcode */
op = get_next(insn_byte_t, insn);
@@ -337,6 +342,7 @@ int insn_get_opcode(struct insn *insn)
insn->attr = 0;
return -EINVAL;
}
+
end:
opcode->got = 1;
return 0;
@@ -658,7 +664,6 @@ int insn_get_immediate(struct insn *insn)
}
if (!inat_has_immediate(insn->attr))
- /* no immediates */
goto done;
switch (inat_immediate_size(insn->attr)) {
diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c
index 5eecb45d05d5..c20e04764edc 100644
--- a/arch/x86/lib/iomem.c
+++ b/arch/x86/lib/iomem.c
@@ -10,7 +10,7 @@
static __always_inline void rep_movs(void *to, const void *from, size_t n)
{
unsigned long d0, d1, d2;
- asm volatile("rep ; movsl\n\t"
+ asm volatile("rep movsl\n\t"
"testb $2,%b4\n\t"
"je 1f\n\t"
"movsw\n"
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index a58f451a7dd3..b5893928d55c 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -8,7 +8,7 @@
*/
#include <asm/asm.h>
#include <asm/kaslr.h>
-#include <asm/msr.h>
+#include <asm/tsc.h>
#include <asm/archrandom.h>
#include <asm/e820/api.h>
#include <asm/shared/io.h>
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 0ae2e1712e2e..12a23fa7c44c 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -41,6 +41,7 @@ SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
+SYM_PIC_ALIAS(memcpy)
EXPORT_SYMBOL(memcpy)
SYM_FUNC_START_LOCAL(memcpy_orig)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index d66b710d628f..fb5a03cf5ab7 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -42,6 +42,7 @@ SYM_FUNC_END(__memset)
EXPORT_SYMBOL(__memset)
SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
+SYM_PIC_ALIAS(memset)
EXPORT_SYMBOL(memset)
SYM_FUNC_START_LOCAL(memset_orig)
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
index acd463d887e1..b8f63419e6ae 100644
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -47,7 +47,7 @@ int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
}
EXPORT_SYMBOL(rdmsr_on_cpu);
-int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
{
int err;
struct msr_info rv;
@@ -60,7 +60,7 @@ int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
return err;
}
-EXPORT_SYMBOL(rdmsrl_on_cpu);
+EXPORT_SYMBOL(rdmsrq_on_cpu);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
@@ -78,7 +78,7 @@ int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
}
EXPORT_SYMBOL(wrmsr_on_cpu);
-int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+int wrmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
{
int err;
struct msr_info rv;
@@ -92,7 +92,7 @@ int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
return err;
}
-EXPORT_SYMBOL(wrmsrl_on_cpu);
+EXPORT_SYMBOL(wrmsrq_on_cpu);
static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no,
struct msr __percpu *msrs,
@@ -204,7 +204,7 @@ int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
}
EXPORT_SYMBOL(wrmsr_safe_on_cpu);
-int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+int wrmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
{
int err;
struct msr_info rv;
@@ -218,9 +218,9 @@ int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
return err ? err : rv.err;
}
-EXPORT_SYMBOL(wrmsrl_safe_on_cpu);
+EXPORT_SYMBOL(wrmsrq_safe_on_cpu);
-int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+int rdmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
{
u32 low, high;
int err;
@@ -230,7 +230,7 @@ int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
return err;
}
-EXPORT_SYMBOL(rdmsrl_safe_on_cpu);
+EXPORT_SYMBOL(rdmsrq_safe_on_cpu);
/*
* These variants are significantly slower, but allows control over
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 5a18ecc04a6c..4ef7c6dcbea6 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -41,7 +41,7 @@ static int msr_read(u32 msr, struct msr *m)
int err;
u64 val;
- err = rdmsrl_safe(msr, &val);
+ err = rdmsrq_safe(msr, &val);
if (!err)
m->q = val;
@@ -58,7 +58,7 @@ static int msr_read(u32 msr, struct msr *m)
*/
static int msr_write(u32 msr, struct msr *m)
{
- return wrmsrl_safe(msr, m->q);
+ return wrmsrq_safe(msr, m->q);
}
static inline int __flip_bit(u32 msr, u8 bit, bool set)
@@ -122,23 +122,23 @@ int msr_clear_bit(u32 msr, u8 bit)
EXPORT_SYMBOL_GPL(msr_clear_bit);
#ifdef CONFIG_TRACEPOINTS
-void do_trace_write_msr(unsigned int msr, u64 val, int failed)
+void do_trace_write_msr(u32 msr, u64 val, int failed)
{
trace_write_msr(msr, val, failed);
}
EXPORT_SYMBOL(do_trace_write_msr);
EXPORT_TRACEPOINT_SYMBOL(write_msr);
-void do_trace_read_msr(unsigned int msr, u64 val, int failed)
+void do_trace_read_msr(u32 msr, u64 val, int failed)
{
trace_read_msr(msr, val, failed);
}
EXPORT_SYMBOL(do_trace_read_msr);
EXPORT_TRACEPOINT_SYMBOL(read_msr);
-void do_trace_rdpmc(unsigned counter, u64 val, int failed)
+void do_trace_rdpmc(u32 msr, u64 val, int failed)
{
- trace_rdpmc(counter, val, failed);
+ trace_rdpmc(msr, val, failed);
}
EXPORT_SYMBOL(do_trace_rdpmc);
EXPORT_TRACEPOINT_SYMBOL(rdpmc);
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index a26c43abd47d..d78d769a02bd 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -40,6 +40,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
__stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
+SYM_PIC_ALIAS(__x86_indirect_thunk_\reg)
.endm
@@ -367,6 +368,54 @@ SYM_FUNC_END(call_depth_return_thunk)
#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
+#ifdef CONFIG_MITIGATION_ITS
+
+.macro ITS_THUNK reg
+
+/*
+ * If CFI paranoid is used then the ITS thunk starts with opcodes (0xea; jne 1b)
+ * that complete the fineibt_paranoid caller sequence.
+ */
+1: .byte 0xea
+SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL)
+ UNWIND_HINT_UNDEFINED
+ ANNOTATE_NOENDBR
+ jne 1b
+SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL)
+ UNWIND_HINT_UNDEFINED
+ ANNOTATE_NOENDBR
+ ANNOTATE_RETPOLINE_SAFE
+ jmp *%\reg
+ int3
+ .align 32, 0xcc /* fill to the end of the line */
+ .skip 32 - (__x86_indirect_its_thunk_\reg - 1b), 0xcc /* skip to the next upper half */
+.endm
+
+/* ITS mitigation requires thunks be aligned to upper half of cacheline */
+.align 64, 0xcc
+.skip 29, 0xcc
+
+#define GEN(reg) ITS_THUNK reg
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+ .align 64, 0xcc
+SYM_FUNC_ALIAS(__x86_indirect_its_thunk_array, __x86_indirect_its_thunk_rax)
+SYM_CODE_END(__x86_indirect_its_thunk_array)
+
+.align 64, 0xcc
+.skip 32, 0xcc
+SYM_CODE_START(its_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+SYM_CODE_END(its_return_thunk)
+EXPORT_SYMBOL(its_return_thunk)
+
+#endif /* CONFIG_MITIGATION_ITS */
+
/*
* This function name is magical and is used by -mfunction-return=thunk-extern
* for the compiler to generate JMPs to it.
@@ -394,6 +443,7 @@ SYM_CODE_START(__x86_return_thunk)
#endif
int3
SYM_CODE_END(__x86_return_thunk)
+SYM_PIC_ALIAS(__x86_return_thunk)
EXPORT_SYMBOL(__x86_return_thunk)
#endif /* CONFIG_MITIGATION_RETHUNK */
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c
index 53b3f202267c..f87ec24fa579 100644
--- a/arch/x86/lib/string_32.c
+++ b/arch/x86/lib/string_32.c
@@ -40,8 +40,7 @@ char *strncpy(char *dest, const char *src, size_t count)
"stosb\n\t"
"testb %%al,%%al\n\t"
"jne 1b\n\t"
- "rep\n\t"
- "stosb\n"
+ "rep stosb\n"
"2:"
: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
: "0" (src), "1" (dest), "2" (count) : "memory");
@@ -54,8 +53,7 @@ EXPORT_SYMBOL(strncpy);
char *strcat(char *dest, const char *src)
{
int d0, d1, d2, d3;
- asm volatile("repne\n\t"
- "scasb\n\t"
+ asm volatile("repne scasb\n\t"
"decl %1\n"
"1:\tlodsb\n\t"
"stosb\n\t"
@@ -72,8 +70,7 @@ EXPORT_SYMBOL(strcat);
char *strncat(char *dest, const char *src, size_t count)
{
int d0, d1, d2, d3;
- asm volatile("repne\n\t"
- "scasb\n\t"
+ asm volatile("repne scasb\n\t"
"decl %1\n\t"
"movl %8,%3\n"
"1:\tdecl %3\n\t"
@@ -167,8 +164,7 @@ size_t strlen(const char *s)
{
int d0;
size_t res;
- asm volatile("repne\n\t"
- "scasb"
+ asm volatile("repne scasb"
: "=c" (res), "=&D" (d0)
: "1" (s), "a" (0), "0" (0xffffffffu)
: "memory");
@@ -184,8 +180,7 @@ void *memchr(const void *cs, int c, size_t count)
void *res;
if (!count)
return NULL;
- asm volatile("repne\n\t"
- "scasb\n\t"
+ asm volatile("repne scasb\n\t"
"je 1f\n\t"
"movl $1,%0\n"
"1:\tdecl %0"
@@ -202,7 +197,7 @@ void *memscan(void *addr, int c, size_t size)
{
if (!size)
return addr;
- asm volatile("repnz; scasb\n\t"
+ asm volatile("repnz scasb\n\t"
"jnz 1f\n\t"
"dec %%edi\n"
"1:"
diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c
index 38f37df056f7..28267985e85f 100644
--- a/arch/x86/lib/strstr_32.c
+++ b/arch/x86/lib/strstr_32.c
@@ -8,16 +8,14 @@ int d0, d1;
register char *__res;
__asm__ __volatile__(
"movl %6,%%edi\n\t"
- "repne\n\t"
- "scasb\n\t"
+ "repne scasb\n\t"
"notl %%ecx\n\t"
"decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */
"movl %%ecx,%%edx\n"
"1:\tmovl %6,%%edi\n\t"
"movl %%esi,%%eax\n\t"
"movl %%edx,%%ecx\n\t"
- "repe\n\t"
- "cmpsb\n\t"
+ "repe cmpsb\n\t"
"je 2f\n\t" /* also works for empty string, see above */
"xchgl %%eax,%%esi\n\t"
"incl %%esi\n\t"
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 422257c350c6..f6f436f1d573 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -38,9 +38,9 @@ do { \
might_fault(); \
__asm__ __volatile__( \
ASM_STAC "\n" \
- "0: rep; stosl\n" \
+ "0: rep stosl\n" \
" movl %2,%0\n" \
- "1: rep; stosb\n" \
+ "1: rep stosb\n" \
"2: " ASM_CLAC "\n" \
_ASM_EXTABLE_TYPE_REG(0b, 2b, EX_TYPE_UCOPY_LEN4, %2) \
_ASM_EXTABLE_UA(1b, 2b) \
@@ -140,9 +140,9 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size)
" shrl $2, %0\n"
" andl $3, %%eax\n"
" cld\n"
- "99: rep; movsl\n"
+ "99: rep movsl\n"
"36: movl %%eax, %0\n"
- "37: rep; movsb\n"
+ "37: rep movsb\n"
"100:\n"
_ASM_EXTABLE_UA(1b, 100b)
_ASM_EXTABLE_UA(2b, 100b)
@@ -242,9 +242,9 @@ static unsigned long __copy_user_intel_nocache(void *to,
" shrl $2, %0\n"
" andl $3, %%eax\n"
" cld\n"
- "6: rep; movsl\n"
+ "6: rep movsl\n"
" movl %%eax,%0\n"
- "7: rep; movsb\n"
+ "7: rep movsb\n"
"8:\n"
_ASM_EXTABLE_UA(0b, 8b)
_ASM_EXTABLE_UA(1b, 8b)
@@ -293,14 +293,14 @@ do { \
" negl %0\n" \
" andl $7,%0\n" \
" subl %0,%3\n" \
- "4: rep; movsb\n" \
+ "4: rep movsb\n" \
" movl %3,%0\n" \
" shrl $2,%0\n" \
" andl $3,%3\n" \
" .align 2,0x90\n" \
- "0: rep; movsl\n" \
+ "0: rep movsl\n" \
" movl %3,%0\n" \
- "1: rep; movsb\n" \
+ "1: rep movsb\n" \
"2:\n" \
_ASM_EXTABLE_TYPE_REG(4b, 2b, EX_TYPE_UCOPY_LEN1, %3) \
_ASM_EXTABLE_TYPE_REG(0b, 2b, EX_TYPE_UCOPY_LEN4, %3) \
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index f5dd84eb55dc..262f7ca1fb95 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -35,7 +35,7 @@
# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
#
-# REX2 Prefix
+# REX2 Prefix Superscripts
# - (!REX2): REX2 is not allowed
# - (REX2): REX2 variant e.g. JMPABS
@@ -147,7 +147,7 @@ AVXcode:
# 0x60 - 0x6f
60: PUSHA/PUSHAD (i64)
61: POPA/POPAD (i64)
-62: BOUND Gv,Ma (i64) | EVEX (Prefix)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix),(o64)
63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
64: SEG=FS (Prefix)
65: SEG=GS (Prefix)
@@ -253,8 +253,8 @@ c0: Grp2 Eb,Ib (1A)
c1: Grp2 Ev,Ib (1A)
c2: RETN Iw (f64)
c3: RETN
-c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
-c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix),(o64)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix),(o64)
c6: Grp11A Eb,Ib (1A)
c7: Grp11B Ev,Iz (1A)
c8: ENTER Iw,Ib
@@ -286,10 +286,10 @@ df: ESC
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
-e0: LOOPNE/LOOPNZ Jb (f64) (!REX2)
-e1: LOOPE/LOOPZ Jb (f64) (!REX2)
-e2: LOOP Jb (f64) (!REX2)
-e3: JrCXZ Jb (f64) (!REX2)
+e0: LOOPNE/LOOPNZ Jb (f64),(!REX2)
+e1: LOOPE/LOOPZ Jb (f64),(!REX2)
+e2: LOOP Jb (f64),(!REX2)
+e3: JrCXZ Jb (f64),(!REX2)
e4: IN AL,Ib (!REX2)
e5: IN eAX,Ib (!REX2)
e6: OUT Ib,AL (!REX2)
@@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2)
# in "near" jumps and calls is 16-bit. For CALL,
# push of return address is 16-bit wide, RSP is decremented by 2
# but is not truncated to 16 bits, unlike RIP.
-e8: CALL Jz (f64) (!REX2)
-e9: JMP-near Jz (f64) (!REX2)
-ea: JMP-far Ap (i64) (!REX2)
-eb: JMP-short Jb (f64) (!REX2)
+e8: CALL Jz (f64),(!REX2)
+e9: JMP-near Jz (f64),(!REX2)
+ea: JMP-far Ap (i64),(!REX2)
+eb: JMP-short Jb (f64),(!REX2)
ec: IN AL,DX (!REX2)
ed: IN eAX,DX (!REX2)
ee: OUT DX,AL (!REX2)
@@ -478,22 +478,22 @@ AVXcode: 1
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
# 0x0f 0x80-0x8f
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
-80: JO Jz (f64) (!REX2)
-81: JNO Jz (f64) (!REX2)
-82: JB/JC/JNAE Jz (f64) (!REX2)
-83: JAE/JNB/JNC Jz (f64) (!REX2)
-84: JE/JZ Jz (f64) (!REX2)
-85: JNE/JNZ Jz (f64) (!REX2)
-86: JBE/JNA Jz (f64) (!REX2)
-87: JA/JNBE Jz (f64) (!REX2)
-88: JS Jz (f64) (!REX2)
-89: JNS Jz (f64) (!REX2)
-8a: JP/JPE Jz (f64) (!REX2)
-8b: JNP/JPO Jz (f64) (!REX2)
-8c: JL/JNGE Jz (f64) (!REX2)
-8d: JNL/JGE Jz (f64) (!REX2)
-8e: JLE/JNG Jz (f64) (!REX2)
-8f: JNLE/JG Jz (f64) (!REX2)
+80: JO Jz (f64),(!REX2)
+81: JNO Jz (f64),(!REX2)
+82: JB/JC/JNAE Jz (f64),(!REX2)
+83: JAE/JNB/JNC Jz (f64),(!REX2)
+84: JE/JZ Jz (f64),(!REX2)
+85: JNE/JNZ Jz (f64),(!REX2)
+86: JBE/JNA Jz (f64),(!REX2)
+87: JA/JNBE Jz (f64),(!REX2)
+88: JS Jz (f64),(!REX2)
+89: JNS Jz (f64),(!REX2)
+8a: JP/JPE Jz (f64),(!REX2)
+8b: JNP/JPO Jz (f64),(!REX2)
+8c: JL/JNGE Jz (f64),(!REX2)
+8d: JNL/JGE Jz (f64),(!REX2)
+8e: JLE/JNG Jz (f64),(!REX2)
+8f: JNLE/JG Jz (f64),(!REX2)
# 0x0f 0x90-0x9f
90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index d62662bdd460..5f253ae406b6 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -53,7 +53,7 @@ void fpstate_init_soft(struct swregs_state *soft)
void finit(void)
{
- fpstate_init_soft(&current->thread.fpu.fpstate->regs.soft);
+ fpstate_init_soft(&x86_task_fpu(current)->fpstate->regs.soft);
}
/*
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 91c52ead1226..5034df617740 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -641,7 +641,7 @@ int fpregs_soft_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct swregs_state *s387 = &target->thread.fpu.fpstate->regs.soft;
+ struct swregs_state *s387 = &x86_task_fpu(target)->fpstate->regs.soft;
void *space = s387->st_space;
int ret;
int offset, other, i, tags, regnr, tag, newtop;
@@ -692,7 +692,7 @@ int fpregs_soft_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
- struct swregs_state *s387 = &target->thread.fpu.fpstate->regs.soft;
+ struct swregs_state *s387 = &x86_task_fpu(target)->fpstate->regs.soft;
const void *space = s387->st_space;
int offset = (S387->ftop & 7) * 10, other = 80 - offset;
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index eec3e4805c75..5e238e930fe3 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -73,7 +73,7 @@ static inline bool seg_writable(struct desc_struct *d)
return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE;
}
-#define I387 (&current->thread.fpu.fpstate->regs)
+#define I387 (&x86_task_fpu(current)->fpstate->regs)
#define FPU_info (I387->soft.info)
#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs))
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 32035d5be5a0..5b9908f13dcf 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -3,12 +3,10 @@
KCOV_INSTRUMENT_tlb.o := n
KCOV_INSTRUMENT_mem_encrypt.o := n
KCOV_INSTRUMENT_mem_encrypt_amd.o := n
-KCOV_INSTRUMENT_mem_encrypt_identity.o := n
KCOV_INSTRUMENT_pgprot.o := n
KASAN_SANITIZE_mem_encrypt.o := n
KASAN_SANITIZE_mem_encrypt_amd.o := n
-KASAN_SANITIZE_mem_encrypt_identity.o := n
KASAN_SANITIZE_pgprot.o := n
# Disable KCSAN entirely, because otherwise we get warnings that some functions
@@ -16,12 +14,10 @@ KASAN_SANITIZE_pgprot.o := n
KCSAN_SANITIZE := n
# Avoid recursion by not calling KMSAN hooks for CEA code.
KMSAN_SANITIZE_cpu_entry_area.o := n
-KMSAN_SANITIZE_mem_encrypt_identity.o := n
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_mem_encrypt.o = -pg
CFLAGS_REMOVE_mem_encrypt_amd.o = -pg
-CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
CFLAGS_REMOVE_pgprot.o = -pg
endif
@@ -32,9 +28,6 @@ obj-y += pat/
# Make sure __phys_addr has no stackprotector
CFLAGS_physaddr.o := -fno-stack-protector
-CFLAGS_mem_encrypt_identity.o := -fno-stack-protector
-
-CFLAGS_fault.o := -I $(src)/../include/asm/trace
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
@@ -52,7 +45,7 @@ obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
-obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_AMD_NUMA) += amdtopology.o
obj-$(CONFIG_ACPI_NUMA) += srat.o
@@ -63,5 +56,4 @@ obj-$(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION) += pti.o
obj-$(CONFIG_X86_MEM_ENCRYPT) += mem_encrypt.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_amd.o
-obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index 628833afee37..f980b0eb0105 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -25,7 +25,7 @@
#include <asm/numa.h>
#include <asm/mpspec.h>
#include <asm/apic.h>
-#include <asm/amd_nb.h>
+#include <asm/amd/nb.h>
static unsigned char __initdata nodeids[8];
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 51986e8a9d35..bf8dab18be97 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -111,7 +111,7 @@ static bool ex_handler_sgx(const struct exception_table_entry *fixup,
/*
* Handler for when we fail to restore a task's FPU state. We should never get
- * here because the FPU state of a task using the FPU (task->thread.fpu.state)
+ * here because the FPU state of a task using the FPU (struct fpu::fpstate)
* should always be valid. However, past bugs have allowed userspace to set
* reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
* These caused XRSTOR to fail when switching to the task, leaking the FPU
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 296d294142c8..998bd807fc7b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -13,7 +13,6 @@
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
#include <linux/perf_event.h> /* perf_sw_event */
#include <linux/hugetlb.h> /* hstate_index_to_shift */
-#include <linux/prefetch.h> /* prefetchw */
#include <linux/context_tracking.h> /* exception_enter(), ... */
#include <linux/uaccess.h> /* faulthandler_disabled() */
#include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/
@@ -38,7 +37,7 @@
#include <asm/sev.h> /* snp_dump_hva_rmpentry() */
#define CREATE_TRACE_POINTS
-#include <asm/trace/exceptions.h>
+#include <trace/events/exceptions.h>
/*
* Returns 0 if mmiotrace is disabled, or if the fault is not
@@ -1455,9 +1454,6 @@ static __always_inline void
trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
- if (!trace_pagefault_enabled())
- return;
-
if (user_mode(regs))
trace_page_fault_user(address, regs, error_code);
else
@@ -1496,8 +1492,6 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
address = cpu_feature_enabled(X86_FEATURE_FRED) ? fred_event_data(regs) : read_cr2();
- prefetchw(&current->mm->mmap_lock);
-
/*
* KVM uses #PF vector to deliver 'page not present' events to guests
* (asynchronous page fault mechanism). The event happens when a
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index bfa444a7dbb0..7456df985d96 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -28,6 +28,7 @@
#include <asm/text-patching.h>
#include <asm/memtype.h>
#include <asm/paravirt.h>
+#include <asm/mmu_context.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -173,11 +174,7 @@ __ref void *alloc_low_pages(unsigned int num)
* randomization is enabled.
*/
-#ifndef CONFIG_X86_5LEVEL
-#define INIT_PGD_PAGE_TABLES 3
-#else
#define INIT_PGD_PAGE_TABLES 4
-#endif
#ifndef CONFIG_RANDOMIZE_MEMORY
#define INIT_PGD_PAGE_COUNT (2 * INIT_PGD_PAGE_TABLES)
@@ -824,31 +821,33 @@ void __init poking_init(void)
spinlock_t *ptl;
pte_t *ptep;
- poking_mm = mm_alloc();
- BUG_ON(!poking_mm);
+ text_poke_mm = mm_alloc();
+ BUG_ON(!text_poke_mm);
/* Xen PV guests need the PGD to be pinned. */
- paravirt_enter_mmap(poking_mm);
+ paravirt_enter_mmap(text_poke_mm);
+
+ set_notrack_mm(text_poke_mm);
/*
* Randomize the poking address, but make sure that the following page
* will be mapped at the same PMD. We need 2 pages, so find space for 3,
* and adjust the address if the PMD ends after the first one.
*/
- poking_addr = TASK_UNMAPPED_BASE;
+ text_poke_mm_addr = TASK_UNMAPPED_BASE;
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
+ text_poke_mm_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
(TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE);
- if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
- poking_addr += PAGE_SIZE;
+ if (((text_poke_mm_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
+ text_poke_mm_addr += PAGE_SIZE;
/*
* We need to trigger the allocation of the page-tables that will be
* needed for poking now. Later, poking may be performed in an atomic
* section, which might cause allocation to fail.
*/
- ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
+ ptep = get_locked_pte(text_poke_mm, text_poke_mm_addr, &ptl);
BUG_ON(!ptep);
pte_unmap_unlock(ptep, ptl);
}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ad662cc4605c..607d6a2e66e2 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -30,6 +30,7 @@
#include <linux/initrd.h>
#include <linux/cpumask.h>
#include <linux/gfp.h>
+#include <linux/execmem.h>
#include <asm/asm.h>
#include <asm/bios_ebda.h>
@@ -565,7 +566,7 @@ static void __init lowmem_pfn_init(void)
"only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
#define MSG_HIGHMEM_TRIMMED \
- "Warning: only 4GB will be used. Support for for CONFIG_HIGHMEM64G was removed!\n"
+ "Warning: only 4GB will be used. Support for CONFIG_HIGHMEM64G was removed!\n"
/*
* We have more RAM than fits into lowmem - we try to put it into
* highmem, also taking the highmem=x boot parameter into account:
@@ -612,7 +613,6 @@ void __init find_low_pfn_range(void)
highmem_pfn_init();
}
-#ifndef CONFIG_NUMA
void __init initmem_init(void)
{
#ifdef CONFIG_HIGHMEM
@@ -633,12 +633,6 @@ void __init initmem_init(void)
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
pages_to_mb(max_low_pfn));
- setup_bootmem_allocator();
-}
-#endif /* !CONFIG_NUMA */
-
-void __init setup_bootmem_allocator(void)
-{
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
@@ -755,6 +749,8 @@ void mark_rodata_ro(void)
pr_info("Write protecting kernel text and read-only data: %luk\n",
size >> 10);
+ execmem_cache_make_ro();
+
kernel_set_to_readonly = 1;
#ifdef CONFIG_CPA_DEBUG
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7c4f6f591f2b..66330fe4e18c 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -34,6 +34,7 @@
#include <linux/gfp.h>
#include <linux/kcore.h>
#include <linux/bootmem_info.h>
+#include <linux/execmem.h>
#include <asm/processor.h>
#include <asm/bios_ebda.h>
@@ -805,12 +806,17 @@ kernel_physical_mapping_change(unsigned long paddr_start,
}
#ifndef CONFIG_NUMA
-void __init initmem_init(void)
+static inline void x86_numa_init(void)
{
memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
}
#endif
+void __init initmem_init(void)
+{
+ x86_numa_init();
+}
+
void __init paging_init(void)
{
sparse_init();
@@ -827,7 +833,6 @@ void __init paging_init(void)
zone_sizes_init();
}
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
#define PAGE_UNUSED 0xFD
/*
@@ -926,7 +931,6 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long
if (!IS_ALIGNED(end, PMD_SIZE))
unused_pmd_start = end;
}
-#endif
/*
* Memory hotplug specific functions
@@ -1146,16 +1150,13 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
pmd_clear(pmd);
spin_unlock(&init_mm.page_table_lock);
pages++;
- }
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- else if (vmemmap_pmd_is_unused(addr, next)) {
+ } else if (vmemmap_pmd_is_unused(addr, next)) {
free_hugepage_table(pmd_page(*pmd),
altmap);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
spin_unlock(&init_mm.page_table_lock);
}
-#endif
continue;
}
@@ -1391,6 +1392,8 @@ void mark_rodata_ro(void)
(end - start) >> 10);
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
+ execmem_cache_make_ro();
+
kernel_set_to_readonly = 1;
/*
@@ -1492,7 +1495,6 @@ unsigned long memory_block_size_bytes(void)
return memory_block_size_probed;
}
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
* Initialise the sparsemem vmemmap using huge-pages at the PMD level.
*/
@@ -1639,4 +1641,3 @@ void __meminit vmemmap_populate_print_last(void)
node_start = 0;
}
}
-#endif
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 7490ff6d83b1..faf3a13fb6ba 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -40,7 +40,9 @@
* section is later cleared.
*/
u64 sme_me_mask __section(".data") = 0;
+SYM_PIC_ALIAS(sme_me_mask);
u64 sev_status __section(".data") = 0;
+SYM_PIC_ALIAS(sev_status);
u64 sev_check_data __section(".data") = 0;
EXPORT_SYMBOL(sme_me_mask);
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index 3f37b5c80bb3..097aadc250f7 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -25,4 +25,8 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache);
extern unsigned long tlb_single_page_flush_ceiling;
+#ifdef CONFIG_NUMA
+void __init x86_numa_init(void);
+#endif
+
#endif /* __X86_MM_INTERNAL_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 64e5cdb2460a..c24890c40138 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -18,9 +18,10 @@
#include <asm/e820/api.h>
#include <asm/proto.h>
#include <asm/dma.h>
-#include <asm/amd_nb.h>
+#include <asm/numa.h>
+#include <asm/amd/nb.h>
-#include "numa_internal.h"
+#include "mm_internal.h"
int numa_off;
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
deleted file mode 100644
index 65fda406e6f2..000000000000
--- a/arch/x86/mm/numa_32.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Written by: Patricia Gaughen <gone@us.ibm.com>, IBM Corporation
- * August 2002: added remote node KVA remap - Martin J. Bligh
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/memblock.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <asm/pgtable_areas.h>
-
-#include "numa_internal.h"
-
-extern unsigned long highend_pfn, highstart_pfn;
-
-void __init initmem_init(void)
-{
- x86_numa_init();
-
-#ifdef CONFIG_HIGHMEM
- highstart_pfn = highend_pfn = max_pfn;
- if (max_pfn > max_low_pfn)
- highstart_pfn = max_low_pfn;
- printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
- pages_to_mb(highend_pfn - highstart_pfn));
- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
-#else
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
-#endif
- printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
- pages_to_mb(max_low_pfn));
- printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n",
- max_low_pfn, highstart_pfn);
-
- printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
- (ulong) pfn_to_kaddr(max_low_pfn));
-
- printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
- (ulong) pfn_to_kaddr(highstart_pfn));
-
- __vmalloc_start_set = true;
- setup_bootmem_allocator();
-}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
deleted file mode 100644
index 59d80160fa5a..000000000000
--- a/arch/x86/mm/numa_64.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Generic VM initialization for x86-64 NUMA setups.
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- */
-#include <linux/memblock.h>
-
-#include "numa_internal.h"
-
-void __init initmem_init(void)
-{
- x86_numa_init();
-}
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
deleted file mode 100644
index 11e1ff370c10..000000000000
--- a/arch/x86/mm/numa_internal.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __X86_MM_NUMA_INTERNAL_H
-#define __X86_MM_NUMA_INTERNAL_H
-
-#include <linux/types.h>
-#include <asm/numa.h>
-
-void __init x86_numa_init(void);
-
-#endif /* __X86_MM_NUMA_INTERNAL_H */
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 72d8cbc61158..c97b527c66fe 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -38,6 +38,7 @@
#include <linux/kernel.h>
#include <linux/pfn_t.h>
#include <linux/slab.h>
+#include <linux/io.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/fs.h>
@@ -232,7 +233,7 @@ void pat_cpu_init(void)
panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
}
- wrmsrl(MSR_IA32_CR_PAT, pat_msr_val);
+ wrmsrq(MSR_IA32_CR_PAT, pat_msr_val);
__flush_tlb_all();
}
@@ -256,7 +257,7 @@ void __init pat_bp_init(void)
if (!cpu_feature_enabled(X86_FEATURE_PAT))
pat_disable("PAT not supported by the CPU.");
else
- rdmsrl(MSR_IA32_CR_PAT, pat_msr_val);
+ rdmsrq(MSR_IA32_CR_PAT, pat_msr_val);
if (!pat_msr_val) {
pat_disable("PAT support disabled by the firmware.");
@@ -682,6 +683,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
/**
* pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type
* of @pfn cannot be overridden by UC MTRR memory type.
+ * @pfn: The page frame number to check.
*
* Only to be called when PAT is enabled.
*
@@ -773,38 +775,14 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
return vma_prot;
}
-#ifdef CONFIG_STRICT_DEVMEM
-/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
-{
- return 1;
-}
-#else
-/* This check is needed to avoid cache aliasing when PAT is enabled */
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
-{
- u64 from = ((u64)pfn) << PAGE_SHIFT;
- u64 to = from + size;
- u64 cursor = from;
-
- if (!pat_enabled())
- return 1;
-
- while (cursor < to) {
- if (!devmem_is_allowed(pfn))
- return 0;
- cursor += PAGE_SIZE;
- pfn++;
- }
- return 1;
-}
-#endif /* CONFIG_STRICT_DEVMEM */
-
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot)
{
enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB;
+ if (!pat_enabled())
+ return 1;
+
if (!range_is_allowed(pfn, size))
return 0;
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index def3d9284254..30ab4aced761 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -889,7 +889,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
/* change init_mm */
set_pte_atomic(kpte, pte);
#ifdef CONFIG_X86_32
- if (!SHARED_KERNEL_PMD) {
+ {
struct page *page;
list_for_each_entry(page, &pgd_list, lru) {
@@ -1293,7 +1293,7 @@ static int collapse_pmd_page(pmd_t *pmd, unsigned long addr,
/* Queue the page table to be freed after TLB flush */
list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables);
- if (IS_ENABLED(CONFIG_X86_32) && !SHARED_KERNEL_PMD) {
+ if (IS_ENABLED(CONFIG_X86_32)) {
struct page *page;
/* Update all PGD tables to use the same large page */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index f7ae44d3dd9e..62777ba4de1a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -10,6 +10,7 @@
#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
EXPORT_SYMBOL(physical_mask);
+SYM_PIC_ALIAS(physical_mask);
#endif
pgtable_t pte_alloc_one(struct mm_struct *mm)
@@ -68,12 +69,6 @@ static inline void pgd_list_del(pgd_t *pgd)
list_del(&ptdesc->pt_list);
}
-#define UNSHARED_PTRS_PER_PGD \
- (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
-#define MAX_UNSHARED_PTRS_PER_PGD \
- MAX_T(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)
-
-
static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
{
virt_to_ptdesc(pgd)->pt_mm = mm;
@@ -86,29 +81,19 @@ struct mm_struct *pgd_page_get_mm(struct page *page)
static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
{
- /* If the pgd points to a shared pagetable level (either the
- ptes in non-PAE, or shared PMD in PAE), then just copy the
- references from swapper_pg_dir. */
- if (CONFIG_PGTABLE_LEVELS == 2 ||
- (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
- CONFIG_PGTABLE_LEVELS >= 4) {
+ /* PAE preallocates all its PMDs. No cloning needed. */
+ if (!IS_ENABLED(CONFIG_X86_PAE))
clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
- }
- /* list required to sync kernel mapping updates */
- if (!SHARED_KERNEL_PMD) {
- pgd_set_mm(pgd, mm);
- pgd_list_add(pgd);
- }
+ /* List used to sync kernel mapping updates */
+ pgd_set_mm(pgd, mm);
+ pgd_list_add(pgd);
}
static void pgd_dtor(pgd_t *pgd)
{
- if (SHARED_KERNEL_PMD)
- return;
-
spin_lock(&pgd_lock);
pgd_list_del(pgd);
spin_unlock(&pgd_lock);
@@ -132,15 +117,15 @@ static void pgd_dtor(pgd_t *pgd)
* processor notices the update. Since this is expensive, and
* all 4 top-level entries are used almost immediately in a
* new process's life, we just pre-populate them here.
- *
- * Also, if we're in a paravirt environment where the kernel pmd is
- * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
- * and initialize the kernel pmds here.
*/
-#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
-#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD
+#define PREALLOCATED_PMDS PTRS_PER_PGD
/*
+ * "USER_PMDS" are the PMDs for the user copy of the page tables when
+ * PTI is enabled. They do not exist when PTI is disabled. Note that
+ * this is distinct from the user _portion_ of the kernel page tables
+ * which always exists.
+ *
* We allocate separate PMDs for the kernel part of the user page-table
* when PTI is enabled. We need them to map the per-process LDT into the
* user-space page-table.
@@ -169,7 +154,6 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
/* No need to prepopulate any pagetable entries in non-PAE modes. */
#define PREALLOCATED_PMDS 0
-#define MAX_PREALLOCATED_PMDS 0
#define PREALLOCATED_USER_PMDS 0
#define MAX_PREALLOCATED_USER_PMDS 0
#endif /* CONFIG_X86_PAE */
@@ -318,68 +302,15 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
{
}
#endif
-/*
- * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
- * assumes that pgd should be in one page.
- *
- * But kernel with PAE paging that is not running as a Xen domain
- * only needs to allocate 32 bytes for pgd instead of one page.
- */
-#ifdef CONFIG_X86_PAE
-
-#include <linux/slab.h>
-
-#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
-#define PGD_ALIGN 32
-
-static struct kmem_cache *pgd_cache;
-
-void __init pgtable_cache_init(void)
-{
- /*
- * When PAE kernel is running as a Xen domain, it does not use
- * shared kernel pmd. And this requires a whole page for pgd.
- */
- if (!SHARED_KERNEL_PMD)
- return;
-
- /*
- * when PAE kernel is not running as a Xen domain, it uses
- * shared kernel pmd. Shared kernel pmd does not require a whole
- * page for pgd. We are able to just allocate a 32-byte for pgd.
- * During boot time, we create a 32-byte slab for pgd table allocation.
- */
- pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
- SLAB_PANIC, NULL);
-}
static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
{
/*
- * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
- * We allocate one page for pgd.
- */
- if (!SHARED_KERNEL_PMD)
- return __pgd_alloc(mm, pgd_allocation_order());
-
- /*
- * Now PAE kernel is not running as a Xen domain. We can allocate
- * a 32-byte slab for pgd to save memory space.
+ * PTI and Xen need a whole page for the PAE PGD
+ * even though the hardware only needs 32 bytes.
+ *
+ * For simplicity, allocate a page for all users.
*/
- return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
-}
-
-static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- if (!SHARED_KERNEL_PMD)
- __pgd_free(mm, pgd);
- else
- kmem_cache_free(pgd_cache, pgd);
-}
-#else
-
-static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
-{
return __pgd_alloc(mm, pgd_allocation_order());
}
@@ -387,13 +318,12 @@ static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
__pgd_free(mm, pgd);
}
-#endif /* CONFIG_X86_PAE */
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
- pmd_t *pmds[MAX_PREALLOCATED_PMDS];
+ pmd_t *pmds[PREALLOCATED_PMDS];
pgd = _pgd_alloc(mm);
@@ -613,11 +543,11 @@ pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
#endif
/**
- * reserve_top_address - reserves a hole in the top of kernel address space
- * @reserve - size of hole to reserve
+ * reserve_top_address - Reserve a hole in the top of the kernel address space
+ * @reserve: Size of hole to reserve
*
* Can be used to relocate the fixmap area and poke a hole in the top
- * of kernel address space to make room for a hypervisor.
+ * of the kernel address space to make room for a hypervisor.
*/
void __init reserve_top_address(unsigned long reserve)
{
@@ -662,9 +592,12 @@ void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
}
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-#ifdef CONFIG_X86_5LEVEL
+#if CONFIG_PGTABLE_LEVELS > 4
/**
- * p4d_set_huge - setup kernel P4D mapping
+ * p4d_set_huge - Set up kernel P4D mapping
+ * @p4d: Pointer to the P4D entry
+ * @addr: Virtual address associated with the P4D entry
+ * @prot: Protection bits to use
*
* No 512GB pages yet -- always return 0
*/
@@ -674,9 +607,10 @@ int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
}
/**
- * p4d_clear_huge - clear kernel P4D mapping when it is set
+ * p4d_clear_huge - Clear kernel P4D mapping when it is set
+ * @p4d: Pointer to the P4D entry to clear
*
- * No 512GB pages yet -- always return 0
+ * No 512GB pages yet -- do nothing
*/
void p4d_clear_huge(p4d_t *p4d)
{
@@ -684,7 +618,10 @@ void p4d_clear_huge(p4d_t *p4d)
#endif
/**
- * pud_set_huge - setup kernel PUD mapping
+ * pud_set_huge - Set up kernel PUD mapping
+ * @pud: Pointer to the PUD entry
+ * @addr: Virtual address associated with the PUD entry
+ * @prot: Protection bits to use
*
* MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
* function sets up a huge page only if the complete range has the same MTRR
@@ -715,7 +652,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
}
/**
- * pmd_set_huge - setup kernel PMD mapping
+ * pmd_set_huge - Set up kernel PMD mapping
+ * @pmd: Pointer to the PMD entry
+ * @addr: Virtual address associated with the PMD entry
+ * @prot: Protection bits to use
*
* See text over pud_set_huge() above.
*
@@ -744,7 +684,8 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
}
/**
- * pud_clear_huge - clear kernel PUD mapping when it is set
+ * pud_clear_huge - Clear kernel PUD mapping when it is set
+ * @pud: Pointer to the PUD entry to clear.
*
* Returns 1 on success and 0 on failure (no PUD map is found).
*/
@@ -759,7 +700,8 @@ int pud_clear_huge(pud_t *pud)
}
/**
- * pmd_clear_huge - clear kernel PMD mapping when it is set
+ * pmd_clear_huge - Clear kernel PMD mapping when it is set
+ * @pmd: Pointer to the PMD entry to clear.
*
* Returns 1 on success and 0 on failure (no PMD map is found).
*/
@@ -775,11 +717,11 @@ int pmd_clear_huge(pmd_t *pmd)
#ifdef CONFIG_X86_64
/**
- * pud_free_pmd_page - Clear pud entry and free pmd page.
- * @pud: Pointer to a PUD.
- * @addr: Virtual address associated with pud.
+ * pud_free_pmd_page - Clear PUD entry and free PMD page
+ * @pud: Pointer to a PUD
+ * @addr: Virtual address associated with PUD
*
- * Context: The pud range has been unmapped and TLB purged.
+ * Context: The PUD range has been unmapped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*
* NOTE: Callers must allow a single page allocation.
@@ -822,11 +764,11 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
}
/**
- * pmd_free_pte_page - Clear pmd entry and free pte page.
- * @pmd: Pointer to a PMD.
- * @addr: Virtual address associated with pmd.
+ * pmd_free_pte_page - Clear PMD entry and free PTE page.
+ * @pmd: Pointer to the PMD
+ * @addr: Virtual address associated with PMD
*
- * Context: The pmd range has been unmapped and TLB purged.
+ * Context: The PMD range has been unmapped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*/
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
@@ -848,7 +790,7 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
/*
* Disable free page handling on x86-PAE. This assures that ioremap()
- * does not update sync'd pmd entries. See vmalloc_sync_one().
+ * does not update sync'd PMD entries. See vmalloc_sync_one().
*/
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 5f0d579932c6..190299834011 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -185,7 +185,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
}
- BUILD_BUG_ON(pgd_leaf(*pgd) != 0);
+ BUILD_BUG_ON(pgd_leaf(*pgd));
return p4d_offset(pgd, address);
}
@@ -206,7 +206,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
if (!p4d)
return NULL;
- BUILD_BUG_ON(p4d_leaf(*p4d) != 0);
+ BUILD_BUG_ON(p4d_leaf(*p4d));
if (p4d_none(*p4d)) {
unsigned long new_pud_page = __get_free_page(gfp);
if (WARN_ON_ONCE(!new_pud_page))
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index b6d6750e4bd1..39f80111e6f1 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -19,6 +19,7 @@
#include <asm/cache.h>
#include <asm/cacheflush.h>
#include <asm/apic.h>
+#include <asm/msr.h>
#include <asm/perf_event.h>
#include <asm/tlb.h>
@@ -215,16 +216,20 @@ static void clear_asid_other(void)
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+struct new_asid {
+ unsigned int asid : 16;
+ unsigned int need_flush : 1;
+};
-static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
- u16 *new_asid, bool *need_flush)
+static struct new_asid choose_new_asid(struct mm_struct *next, u64 next_tlb_gen)
{
+ struct new_asid ns;
u16 asid;
if (!static_cpu_has(X86_FEATURE_PCID)) {
- *new_asid = 0;
- *need_flush = true;
- return;
+ ns.asid = 0;
+ ns.need_flush = 1;
+ return ns;
}
/*
@@ -235,9 +240,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
u16 global_asid = mm_global_asid(next);
if (global_asid) {
- *new_asid = global_asid;
- *need_flush = false;
- return;
+ ns.asid = global_asid;
+ ns.need_flush = 0;
+ return ns;
}
}
@@ -249,22 +254,23 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
next->context.ctx_id)
continue;
- *new_asid = asid;
- *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
- next_tlb_gen);
- return;
+ ns.asid = asid;
+ ns.need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) < next_tlb_gen);
+ return ns;
}
/*
* We don't currently own an ASID slot on this CPU.
* Allocate a slot.
*/
- *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
- if (*new_asid >= TLB_NR_DYN_ASIDS) {
- *new_asid = 0;
+ ns.asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
+ if (ns.asid >= TLB_NR_DYN_ASIDS) {
+ ns.asid = 0;
this_cpu_write(cpu_tlbstate.next_asid, 1);
}
- *need_flush = true;
+ ns.need_flush = true;
+
+ return ns;
}
/*
@@ -623,7 +629,7 @@ static void l1d_flush_evaluate(unsigned long prev_mm, unsigned long next_mm,
{
/* Flush L1D if the outgoing task requests it */
if (prev_mm & LAST_USER_MM_L1D_FLUSH)
- wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+ wrmsrq(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
/* Check whether the incoming task opted in for L1D flush */
if (likely(!(next_mm & LAST_USER_MM_L1D_FLUSH)))
@@ -781,9 +787,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
unsigned cpu = smp_processor_id();
unsigned long new_lam;
+ struct new_asid ns;
u64 next_tlb_gen;
- bool need_flush;
- u16 new_asid;
+
/* We don't want flush_tlb_func() to run concurrently with us. */
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
@@ -847,14 +853,15 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
* mm_cpumask. The TLB shootdown code can figure out from
* cpu_tlbstate_shared.is_lazy whether or not to send an IPI.
*/
- if (IS_ENABLED(CONFIG_DEBUG_VM) && WARN_ON_ONCE(prev != &init_mm &&
+ if (IS_ENABLED(CONFIG_DEBUG_VM) &&
+ WARN_ON_ONCE(prev != &init_mm && !is_notrack_mm(prev) &&
!cpumask_test_cpu(cpu, mm_cpumask(next))))
cpumask_set_cpu(cpu, mm_cpumask(next));
/* Check if the current mm is transitioning to a global ASID */
if (mm_needs_global_asid(next, prev_asid)) {
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ ns = choose_new_asid(next, next_tlb_gen);
goto reload_tlb;
}
@@ -889,8 +896,8 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
* TLB contents went out of date while we were in lazy
* mode. Fall through to the TLB switching code below.
*/
- new_asid = prev_asid;
- need_flush = true;
+ ns.asid = prev_asid;
+ ns.need_flush = true;
} else {
/*
* Apply process to process speculation vulnerability
@@ -906,34 +913,26 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
barrier();
- /*
- * Leave this CPU in prev's mm_cpumask. Atomic writes to
- * mm_cpumask can be expensive under contention. The CPU
- * will be removed lazily at TLB flush time.
- */
- VM_WARN_ON_ONCE(prev != &init_mm && !cpumask_test_cpu(cpu,
- mm_cpumask(prev)));
-
/* Start receiving IPIs and then read tlb_gen (and LAM below) */
if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next)))
cpumask_set_cpu(cpu, mm_cpumask(next));
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ ns = choose_new_asid(next, next_tlb_gen);
}
reload_tlb:
new_lam = mm_lam_cr3_mask(next);
- if (need_flush) {
- VM_WARN_ON_ONCE(is_global_asid(new_asid));
- this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
- this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
+ if (ns.need_flush) {
+ VM_WARN_ON_ONCE(is_global_asid(ns.asid));
+ this_cpu_write(cpu_tlbstate.ctxs[ns.asid].ctx_id, next->context.ctx_id);
+ this_cpu_write(cpu_tlbstate.ctxs[ns.asid].tlb_gen, next_tlb_gen);
+ load_new_mm_cr3(next->pgd, ns.asid, new_lam, true);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
- load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
+ load_new_mm_cr3(next->pgd, ns.asid, new_lam, false);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
@@ -942,7 +941,7 @@ reload_tlb:
barrier();
this_cpu_write(cpu_tlbstate.loaded_mm, next);
- this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, ns.asid);
cpu_tlbstate_update_lam(new_lam, mm_untag_mask(next));
if (next != prev) {
@@ -973,6 +972,77 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
}
/*
+ * Using a temporary mm allows to set temporary mappings that are not accessible
+ * by other CPUs. Such mappings are needed to perform sensitive memory writes
+ * that override the kernel memory protections (e.g., W^X), without exposing the
+ * temporary page-table mappings that are required for these write operations to
+ * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the
+ * mapping is torn down. Temporary mms can also be used for EFI runtime service
+ * calls or similar functionality.
+ *
+ * It is illegal to schedule while using a temporary mm -- the context switch
+ * code is unaware of the temporary mm and does not know how to context switch.
+ * Use a real (non-temporary) mm in a kernel thread if you need to sleep.
+ *
+ * Note: For sensitive memory writes, the temporary mm needs to be used
+ * exclusively by a single core, and IRQs should be disabled while the
+ * temporary mm is loaded, thereby preventing interrupt handler bugs from
+ * overriding the kernel memory protection.
+ */
+struct mm_struct *use_temporary_mm(struct mm_struct *temp_mm)
+{
+ struct mm_struct *prev_mm;
+
+ lockdep_assert_preemption_disabled();
+ guard(irqsave)();
+
+ /*
+ * Make sure not to be in TLB lazy mode, as otherwise we'll end up
+ * with a stale address space WITHOUT being in lazy mode after
+ * restoring the previous mm.
+ */
+ if (this_cpu_read(cpu_tlbstate_shared.is_lazy))
+ leave_mm();
+
+ prev_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ switch_mm_irqs_off(NULL, temp_mm, current);
+
+ /*
+ * If breakpoints are enabled, disable them while the temporary mm is
+ * used. Userspace might set up watchpoints on addresses that are used
+ * in the temporary mm, which would lead to wrong signals being sent or
+ * crashes.
+ *
+ * Note that breakpoints are not disabled selectively, which also causes
+ * kernel breakpoints (e.g., perf's) to be disabled. This might be
+ * undesirable, but still seems reasonable as the code that runs in the
+ * temporary mm should be short.
+ */
+ if (hw_breakpoint_active())
+ hw_breakpoint_disable();
+
+ return prev_mm;
+}
+
+void unuse_temporary_mm(struct mm_struct *prev_mm)
+{
+ lockdep_assert_preemption_disabled();
+ guard(irqsave)();
+
+ /* Clear the cpumask, to indicate no TLB flushing is needed anywhere */
+ cpumask_clear_cpu(smp_processor_id(), mm_cpumask(this_cpu_read(cpu_tlbstate.loaded_mm)));
+
+ switch_mm_irqs_off(NULL, prev_mm, current);
+
+ /*
+ * Restore the breakpoints if they were disabled before the temporary mm
+ * was loaded.
+ */
+ if (hw_breakpoint_active())
+ hw_breakpoint_restore();
+}
+
+/*
* Call this when reinitializing a CPU. It fixes the following potential
* problems:
*
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 9e5fe2ba858f..15672cb926fc 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -41,6 +41,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+#define EMIT5(b1, b2, b3, b4, b5) \
+ do { EMIT1(b1); EMIT4(b2, b3, b4, b5); } while (0)
#define EMIT1_off32(b1, off) \
do { EMIT1(b1); EMIT(off, 4); } while (0)
@@ -629,7 +631,7 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
goto out;
ret = 1;
if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
- text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
+ smp_text_poke_single(ip, new_insn, X86_PATCH_SIZE, NULL);
ret = 0;
}
out:
@@ -661,7 +663,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
{
u8 *prog = *pprog;
- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
+ if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) {
+ OPTIMIZER_HIDE_VAR(reg);
+ emit_jump(&prog, its_static_thunk(reg), ip);
+ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
@@ -683,7 +688,7 @@ static void emit_return(u8 **pprog, u8 *ip)
{
u8 *prog = *pprog;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ if (cpu_wants_rethunk()) {
emit_jump(&prog, x86_return_thunk, ip);
} else {
EMIT1(0xC3); /* ret */
@@ -1502,6 +1507,48 @@ static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr)
#define PRIV_STACK_GUARD_SZ 8
#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
+static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip,
+ struct bpf_prog *bpf_prog)
+{
+ u8 *prog = *pprog;
+ u8 *func;
+
+ if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) {
+ /* The clearing sequence clobbers eax and ecx. */
+ EMIT1(0x50); /* push rax */
+ EMIT1(0x51); /* push rcx */
+ ip += 2;
+
+ func = (u8 *)clear_bhb_loop;
+ ip += x86_call_depth_emit_accounting(&prog, func, ip);
+
+ if (emit_call(&prog, func, ip))
+ return -EINVAL;
+ EMIT1(0x59); /* pop rcx */
+ EMIT1(0x58); /* pop rax */
+ }
+ /* Insert IBHF instruction */
+ if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) &&
+ cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) ||
+ cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW)) {
+ /*
+ * Add an Indirect Branch History Fence (IBHF). IBHF acts as a
+ * fence preventing branch history from before the fence from
+ * affecting indirect branches after the fence. This is
+ * specifically used in cBPF jitted code to prevent Intra-mode
+ * BHI attacks. The IBHF instruction is designed to be a NOP on
+ * hardware that doesn't need or support it. The REP and REX.W
+ * prefixes are required by the microcode, and they also ensure
+ * that the NOP is unlikely to be used in existing code.
+ *
+ * IBHF is not a valid instruction in 32-bit mode.
+ */
+ EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */
+ }
+ *pprog = prog;
+ return 0;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
@@ -2544,6 +2591,13 @@ emit_jmp:
seen_exit = true;
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
+ if (bpf_prog_was_classic(bpf_prog) &&
+ !capable(CAP_SYS_ADMIN)) {
+ u8 *ip = image + addrs[i - 1];
+
+ if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog))
+ return -EINVAL;
+ }
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 631512f7ec85..99b1727136c1 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -5,7 +5,7 @@
#include <linux/cpu.h>
#include <linux/range.h>
-#include <asm/amd_nb.h>
+#include <asm/amd/nb.h>
#include <asm/pci_x86.h>
#include <asm/pci-direct.h>
@@ -202,7 +202,7 @@ static int __init early_root_info_init(void)
/* need to take out [0, TOM) for RAM*/
address = MSR_K8_TOP_MEM1;
- rdmsrl(address, val);
+ rdmsrq(address, val);
end = (val & 0xffffff800000ULL);
printk(KERN_INFO "TOM: %016llx aka %lldM\n", end, end>>20);
if (end < (1ULL<<32))
@@ -293,12 +293,12 @@ static int __init early_root_info_init(void)
/* need to take out [4G, TOM2) for RAM*/
/* SYS_CFG */
address = MSR_AMD64_SYSCFG;
- rdmsrl(address, val);
+ rdmsrq(address, val);
/* TOP_MEM2 is enabled? */
if (val & (1<<21)) {
/* TOP_MEM2 */
address = MSR_K8_TOP_MEM2;
- rdmsrl(address, val);
+ rdmsrq(address, val);
end = (val & 0xffffff800000ULL);
printk(KERN_INFO "TOM2: %016llx aka %lldM\n", end, end>>20);
subtract_range(range, RANGE_NUM, 1ULL<<32, end);
@@ -341,10 +341,10 @@ static int amd_bus_cpu_online(unsigned int cpu)
{
u64 reg;
- rdmsrl(MSR_AMD64_NB_CFG, reg);
+ rdmsrq(MSR_AMD64_NB_CFG, reg);
if (!(reg & ENABLE_CF8_EXT_CFG)) {
reg |= ENABLE_CF8_EXT_CFG;
- wrmsrl(MSR_AMD64_NB_CFG, reg);
+ wrmsrq(MSR_AMD64_NB_CFG, reg);
}
return 0;
}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index efefeb82ab61..e7e71490bd25 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -9,7 +9,7 @@
#include <linux/pci.h>
#include <linux/suspend.h>
#include <linux/vgaarb.h>
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
#include <asm/hpet.h>
#include <asm/pci_x86.h>
@@ -970,13 +970,13 @@ static void amd_rp_pme_suspend(struct pci_dev *dev)
struct pci_dev *rp;
/*
- * PM_SUSPEND_ON means we're doing runtime suspend, which means
+ * If system suspend is not in progress, we're doing runtime suspend, so
* amd-pmc will not be involved so PMEs during D3 work as advertised.
*
* The PMEs *do* work if amd-pmc doesn't put the SoC in the hardware
* sleep state, but we assume amd-pmc is always present.
*/
- if (pm_suspend_target_state == PM_SUSPEND_ON)
+ if (!pm_suspend_in_progress())
return;
rp = pcie_find_root_port(dev);
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 39255f0eb14d..1f4522325920 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -22,9 +22,10 @@
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/rculist.h>
+#include <asm/acpi.h>
#include <asm/e820/api.h>
+#include <asm/msr.h>
#include <asm/pci_x86.h>
-#include <asm/acpi.h>
/* Indicate if the ECAM resources have been placed into the resource table */
static bool pci_mmcfg_running_state;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index a4b4ebd41b8f..e7e8f77f77f8 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -89,6 +89,7 @@ int __init efi_alloc_page_tables(void)
efi_mm.pgd = efi_pgd;
mm_init_cpumask(&efi_mm);
init_new_context(NULL, &efi_mm);
+ set_notrack_mm(&efi_mm);
return 0;
@@ -434,15 +435,12 @@ void __init efi_dump_pagetable(void)
*/
static void efi_enter_mm(void)
{
- efi_prev_mm = current->active_mm;
- current->active_mm = &efi_mm;
- switch_mm(efi_prev_mm, &efi_mm, NULL);
+ efi_prev_mm = use_temporary_mm(&efi_mm);
}
static void efi_leave_mm(void)
{
- current->active_mm = efi_prev_mm;
- switch_mm(&efi_mm, efi_prev_mm, NULL);
+ unuse_temporary_mm(efi_prev_mm);
}
void arch_efi_call_virt_setup(void)
diff --git a/arch/x86/platform/olpc/olpc-xo1-rtc.c b/arch/x86/platform/olpc/olpc-xo1-rtc.c
index 57f210cda761..ee77d57bcab7 100644
--- a/arch/x86/platform/olpc/olpc-xo1-rtc.c
+++ b/arch/x86/platform/olpc/olpc-xo1-rtc.c
@@ -64,9 +64,9 @@ static int __init xo1_rtc_init(void)
of_node_put(node);
pr_info("olpc-xo1-rtc: Initializing OLPC XO-1 RTC\n");
- rdmsrl(MSR_RTC_DOMA_OFFSET, rtc_info.rtc_day_alarm);
- rdmsrl(MSR_RTC_MONA_OFFSET, rtc_info.rtc_mon_alarm);
- rdmsrl(MSR_RTC_CEN_OFFSET, rtc_info.rtc_century);
+ rdmsrq(MSR_RTC_DOMA_OFFSET, rtc_info.rtc_day_alarm);
+ rdmsrq(MSR_RTC_MONA_OFFSET, rtc_info.rtc_mon_alarm);
+ rdmsrq(MSR_RTC_CEN_OFFSET, rtc_info.rtc_century);
r = platform_device_register(&xo1_rtc_device);
if (r)
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 63066e7c8517..30751b42d54e 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -325,7 +325,7 @@ static int setup_sci_interrupt(struct platform_device *pdev)
dev_info(&pdev->dev, "SCI unmapped. Mapping to IRQ 3\n");
sci_irq = 3;
lo |= 0x00300000;
- wrmsrl(0x51400020, lo);
+ wrmsrq(0x51400020, lo);
}
/* Select level triggered in PIC */
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index cfa18ec7d55f..1d78e5631bb8 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -87,8 +87,7 @@ SYM_CODE_START(pvh_start_xen)
mov %ebx, %esi
movl rva(pvh_start_info_sz)(%ebp), %ecx
shr $2,%ecx
- rep
- movsl
+ rep movsl
leal rva(early_stack_end)(%ebp), %esp
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 08e76a5ca155..916441f5e85c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -27,6 +27,7 @@
#include <asm/mmu_context.h>
#include <asm/cpu_device_id.h>
#include <asm/microcode.h>
+#include <asm/msr.h>
#include <asm/fred.h>
#ifdef CONFIG_X86_32
@@ -44,7 +45,7 @@ static void msr_save_context(struct saved_context *ctxt)
while (msr < end) {
if (msr->valid)
- rdmsrl(msr->info.msr_no, msr->info.reg.q);
+ rdmsrq(msr->info.msr_no, msr->info.reg.q);
msr++;
}
}
@@ -56,7 +57,7 @@ static void msr_restore_context(struct saved_context *ctxt)
while (msr < end) {
if (msr->valid)
- wrmsrl(msr->info.msr_no, msr->info.reg.q);
+ wrmsrq(msr->info.msr_no, msr->info.reg.q);
msr++;
}
}
@@ -110,12 +111,12 @@ static void __save_processor_state(struct saved_context *ctxt)
savesegment(ds, ctxt->ds);
savesegment(es, ctxt->es);
- rdmsrl(MSR_FS_BASE, ctxt->fs_base);
- rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
- rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
+ rdmsrq(MSR_FS_BASE, ctxt->fs_base);
+ rdmsrq(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+ rdmsrq(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
mtrr_save_fixed_ranges(NULL);
- rdmsrl(MSR_EFER, ctxt->efer);
+ rdmsrq(MSR_EFER, ctxt->efer);
#endif
/*
@@ -125,7 +126,7 @@ static void __save_processor_state(struct saved_context *ctxt)
ctxt->cr2 = read_cr2();
ctxt->cr3 = __read_cr3();
ctxt->cr4 = __read_cr4();
- ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE,
+ ctxt->misc_enable_saved = !rdmsrq_safe(MSR_IA32_MISC_ENABLE,
&ctxt->misc_enable);
msr_save_context(ctxt);
}
@@ -198,7 +199,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
struct cpuinfo_x86 *c;
if (ctxt->misc_enable_saved)
- wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable);
+ wrmsrq(MSR_IA32_MISC_ENABLE, ctxt->misc_enable);
/*
* control registers
*/
@@ -208,7 +209,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
__write_cr4(ctxt->cr4);
#else
/* CONFIG X86_64 */
- wrmsrl(MSR_EFER, ctxt->efer);
+ wrmsrq(MSR_EFER, ctxt->efer);
__write_cr4(ctxt->cr4);
#endif
write_cr3(ctxt->cr3);
@@ -231,7 +232,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
* handlers or in complicated helpers like load_gs_index().
*/
#ifdef CONFIG_X86_64
- wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+ wrmsrq(MSR_GS_BASE, ctxt->kernelmode_gs_base);
/*
* Reinitialize FRED to ensure the FRED MSRs contain the same values
@@ -267,8 +268,8 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
* restoring the selectors clobbers the bases. Keep in mind
* that MSR_KERNEL_GS_BASE is horribly misnamed.
*/
- wrmsrl(MSR_FS_BASE, ctxt->fs_base);
- wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
+ wrmsrq(MSR_FS_BASE, ctxt->fs_base);
+ wrmsrq(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
#else
loadsegment(gs, ctxt->gs);
#endif
@@ -414,7 +415,7 @@ static int msr_build_context(const u32 *msr_id, const int num)
u64 dummy;
msr_array[i].info.msr_no = msr_id[j];
- msr_array[i].valid = !rdmsrl_safe(msr_id[j], &dummy);
+ msr_array[i].valid = !rdmsrq_safe(msr_id[j], &dummy);
msr_array[i].info.reg.q = 0;
}
saved_msrs->num = total_num;
diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index 5b81d19cd114..a7c23f2a58c9 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -42,6 +42,7 @@ unsigned long relocated_restore_code __visible;
/**
* pfn_is_nosave - check if given pfn is in the 'nosave' section
+ * @pfn: the page frame number to check.
*/
int pfn_is_nosave(unsigned long pfn)
{
@@ -86,7 +87,10 @@ static inline u32 compute_e820_crc32(struct e820_table *table)
/**
* arch_hibernation_header_save - populate the architecture specific part
* of a hibernation image header
- * @addr: address to save the data at
+ * @addr: address where architecture specific header data will be saved.
+ * @max_size: maximum size of architecture specific data in hibernation header.
+ *
+ * Return: 0 on success, -EOVERFLOW if max_size is insufficient.
*/
int arch_hibernation_header_save(void *addr, unsigned int max_size)
{
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index 5606a15cf9a1..fb910d9f8471 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -69,8 +69,7 @@ copy_loop:
movl pbe_orig_address(%edx), %edi
movl $(PAGE_SIZE >> 2), %ecx
- rep
- movsl
+ rep movsl
movl pbe_next(%edx), %edx
jmp copy_loop
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 66f066b8feda..c73be0a02a6c 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -138,8 +138,7 @@ SYM_FUNC_START(core_restore_code)
movq pbe_address(%rdx), %rsi
movq pbe_orig_address(%rdx), %rdi
movq $(PAGE_SIZE >> 3), %rcx
- rep
- movsq
+ rep movsq
/* progress to the next pbe */
movq pbe_next(%rdx), %rdx
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index f9bc444a3064..ed5c63c0b4e5 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -9,6 +9,7 @@
#include <asm/realmode.h>
#include <asm/tlbflush.h>
#include <asm/crash.h>
+#include <asm/msr.h>
#include <asm/sev.h>
struct real_mode_header *real_mode_header;
@@ -145,7 +146,7 @@ static void __init setup_real_mode(void)
* Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR
* so we need to mask it out.
*/
- rdmsrl(MSR_EFER, efer);
+ rdmsrq(MSR_EFER, efer);
trampoline_header->efer = efer & ~EFER_LMA;
trampoline_header->start = (u64) secondary_startup_64;
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index 5770c8097f32..2c19d7fc8a85 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -64,6 +64,8 @@ BEGIN {
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
+ invalid64_expr = "\\(i64\\)"
+ only64_expr = "\\(o64\\)"
rex_expr = "^((REX(\\.[XRWB]+)+)|(REX$))"
rex2_expr = "\\(REX2\\)"
no_rex2_expr = "\\(!REX2\\)"
@@ -319,6 +321,11 @@ function convert_operands(count,opnd, i,j,imm,mod)
if (match(ext, force64_expr))
flags = add_flags(flags, "INAT_FORCE64")
+ # check invalid in 64-bit (and no only64)
+ if (match(ext, invalid64_expr) &&
+ !match($0, only64_expr))
+ flags = add_flags(flags, "INAT_INV64")
+
# check REX2 not allowed
if (match(ext, no_rex2_expr))
flags = add_flags(flags, "INAT_NO_REX2")
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index fc473ca12c44..942372e69b4d 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -27,9 +27,10 @@
#include <asm/smp.h>
#include <asm/cpu.h>
#include <asm/apic.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/cmdline.h>
#include <asm/iommu.h>
+#include <asm/msr.h>
/*
* The RMP entry information as returned by the RMPREAD instruction.
@@ -136,11 +137,11 @@ static int __mfd_enable(unsigned int cpu)
if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
- rdmsrl(MSR_AMD64_SYSCFG, val);
+ rdmsrq(MSR_AMD64_SYSCFG, val);
val |= MSR_AMD64_SYSCFG_MFDM;
- wrmsrl(MSR_AMD64_SYSCFG, val);
+ wrmsrq(MSR_AMD64_SYSCFG, val);
return 0;
}
@@ -157,12 +158,12 @@ static int __snp_enable(unsigned int cpu)
if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
- rdmsrl(MSR_AMD64_SYSCFG, val);
+ rdmsrq(MSR_AMD64_SYSCFG, val);
val |= MSR_AMD64_SYSCFG_SNP_EN;
val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN;
- wrmsrl(MSR_AMD64_SYSCFG, val);
+ wrmsrq(MSR_AMD64_SYSCFG, val);
return 0;
}
@@ -522,7 +523,7 @@ int __init snp_rmptable_init(void)
* Check if SEV-SNP is already enabled, this can happen in case of
* kexec boot.
*/
- rdmsrl(MSR_AMD64_SYSCFG, val);
+ rdmsrq(MSR_AMD64_SYSCFG, val);
if (val & MSR_AMD64_SYSCFG_SNP_EN)
goto skip_enable;
@@ -576,8 +577,8 @@ static bool probe_contiguous_rmptable_info(void)
{
u64 rmp_sz, rmp_base, rmp_end;
- rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
- rdmsrl(MSR_AMD64_RMP_END, rmp_end);
+ rdmsrq(MSR_AMD64_RMP_BASE, rmp_base);
+ rdmsrq(MSR_AMD64_RMP_END, rmp_end);
if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) {
pr_err("Memory for the RMP table has not been reserved by BIOS\n");
@@ -610,13 +611,13 @@ static bool probe_segmented_rmptable_info(void)
unsigned int eax, ebx, segment_shift, segment_shift_min, segment_shift_max;
u64 rmp_base, rmp_end;
- rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
+ rdmsrq(MSR_AMD64_RMP_BASE, rmp_base);
if (!(rmp_base & RMP_ADDR_MASK)) {
pr_err("Memory for the RMP table has not been reserved by BIOS\n");
return false;
}
- rdmsrl(MSR_AMD64_RMP_END, rmp_end);
+ rdmsrq(MSR_AMD64_RMP_END, rmp_end);
WARN_ONCE(rmp_end & RMP_ADDR_MASK,
"Segmented RMP enabled but RMP_END MSR is non-zero\n");
@@ -652,7 +653,7 @@ static bool probe_segmented_rmptable_info(void)
bool snp_probe_rmptable_info(void)
{
if (cpu_feature_enabled(X86_FEATURE_SEGMENTED_RMP))
- rdmsrl(MSR_AMD64_RMP_CFG, rmp_cfg);
+ rdmsrq(MSR_AMD64_RMP_CFG, rmp_cfg);
if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED)
return probe_segmented_rmptable_info();
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 846b5737d320..26bbaf4b7330 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -49,7 +49,7 @@
#include <xen/hvc-console.h>
#include <xen/acpi.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/paravirt.h>
#include <asm/apic.h>
#include <asm/page.h>
@@ -61,6 +61,7 @@
#include <asm/processor.h>
#include <asm/proto.h>
#include <asm/msr-index.h>
+#include <asm/msr.h>
#include <asm/traps.h>
#include <asm/setup.h>
#include <asm/desc.h>
@@ -1086,15 +1087,15 @@ static void xen_write_cr4(unsigned long cr4)
native_write_cr4(cr4);
}
-static u64 xen_do_read_msr(unsigned int msr, int *err)
+static u64 xen_do_read_msr(u32 msr, int *err)
{
u64 val = 0; /* Avoid uninitialized value for safe variant. */
- if (pmu_msr_read(msr, &val, err))
+ if (pmu_msr_chk_emulated(msr, &val, true))
return val;
if (err)
- val = native_read_msr_safe(msr, err);
+ *err = native_read_msr_safe(msr, &val);
else
val = native_read_msr(msr);
@@ -1110,17 +1111,9 @@ static u64 xen_do_read_msr(unsigned int msr, int *err)
return val;
}
-static void set_seg(unsigned int which, unsigned int low, unsigned int high,
- int *err)
+static void set_seg(u32 which, u64 base)
{
- u64 base = ((u64)high << 32) | low;
-
- if (HYPERVISOR_set_segment_base(which, base) == 0)
- return;
-
- if (err)
- *err = -EIO;
- else
+ if (HYPERVISOR_set_segment_base(which, base))
WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base);
}
@@ -1129,20 +1122,19 @@ static void set_seg(unsigned int which, unsigned int low, unsigned int high,
* With err == NULL write_msr() semantics are selected.
* Supplying an err pointer requires err to be pre-initialized with 0.
*/
-static void xen_do_write_msr(unsigned int msr, unsigned int low,
- unsigned int high, int *err)
+static void xen_do_write_msr(u32 msr, u64 val, int *err)
{
switch (msr) {
case MSR_FS_BASE:
- set_seg(SEGBASE_FS, low, high, err);
+ set_seg(SEGBASE_FS, val);
break;
case MSR_KERNEL_GS_BASE:
- set_seg(SEGBASE_GS_USER, low, high, err);
+ set_seg(SEGBASE_GS_USER, val);
break;
case MSR_GS_BASE:
- set_seg(SEGBASE_GS_KERNEL, low, high, err);
+ set_seg(SEGBASE_GS_KERNEL, val);
break;
case MSR_STAR:
@@ -1158,42 +1150,45 @@ static void xen_do_write_msr(unsigned int msr, unsigned int low,
break;
default:
- if (!pmu_msr_write(msr, low, high, err)) {
- if (err)
- *err = native_write_msr_safe(msr, low, high);
- else
- native_write_msr(msr, low, high);
- }
+ if (pmu_msr_chk_emulated(msr, &val, false))
+ return;
+
+ if (err)
+ *err = native_write_msr_safe(msr, val);
+ else
+ native_write_msr(msr, val);
}
}
-static u64 xen_read_msr_safe(unsigned int msr, int *err)
+static int xen_read_msr_safe(u32 msr, u64 *val)
{
- return xen_do_read_msr(msr, err);
+ int err = 0;
+
+ *val = xen_do_read_msr(msr, &err);
+ return err;
}
-static int xen_write_msr_safe(unsigned int msr, unsigned int low,
- unsigned int high)
+static int xen_write_msr_safe(u32 msr, u64 val)
{
int err = 0;
- xen_do_write_msr(msr, low, high, &err);
+ xen_do_write_msr(msr, val, &err);
return err;
}
-static u64 xen_read_msr(unsigned int msr)
+static u64 xen_read_msr(u32 msr)
{
- int err;
+ int err = 0;
return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
}
-static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
+static void xen_write_msr(u32 msr, u64 val)
{
int err;
- xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL);
+ xen_do_write_msr(msr, val, xen_msr_safe ? &err : NULL);
}
/* This is called once we have the cpu_possible_mask */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 38971c6dcd4b..2a4a8deaf612 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -578,7 +578,6 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
xen_mc_issue(XEN_LAZY_MMU);
}
-#if CONFIG_PGTABLE_LEVELS >= 5
__visible p4dval_t xen_p4d_val(p4d_t p4d)
{
return pte_mfn_to_pfn(p4d.p4d);
@@ -592,7 +591,6 @@ __visible p4d_t xen_make_p4d(p4dval_t p4d)
return native_make_p4d(p4d);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
-#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
void (*func)(struct mm_struct *mm, struct page *,
@@ -2222,10 +2220,8 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = {
.alloc_pud = xen_alloc_pmd_init,
.release_pud = xen_release_pmd_init,
-#if CONFIG_PGTABLE_LEVELS >= 5
.p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
.make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
-#endif
.enter_mmap = xen_enter_mmap,
.exit_mmap = xen_exit_mmap,
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index f06987b0efc3..8f89ce0b67e3 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -2,6 +2,7 @@
#include <linux/types.h>
#include <linux/interrupt.h>
+#include <asm/msr.h>
#include <asm/xen/hypercall.h>
#include <xen/xen.h>
#include <xen/page.h>
@@ -128,7 +129,7 @@ static inline uint32_t get_fam15h_addr(u32 addr)
return addr;
}
-static inline bool is_amd_pmu_msr(unsigned int msr)
+static bool is_amd_pmu_msr(u32 msr)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
@@ -194,8 +195,7 @@ static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index)
}
}
-static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
- int index, bool is_read)
+static bool xen_intel_pmu_emulate(u32 msr, u64 *val, int type, int index, bool is_read)
{
uint64_t *reg = NULL;
struct xen_pmu_intel_ctxt *ctxt;
@@ -257,7 +257,7 @@ static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
return false;
}
-static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
+static bool xen_amd_pmu_emulate(u32 msr, u64 *val, bool is_read)
{
uint64_t *reg = NULL;
int i, off = 0;
@@ -298,55 +298,20 @@ static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
return false;
}
-static bool pmu_msr_chk_emulated(unsigned int msr, uint64_t *val, bool is_read,
- bool *emul)
+bool pmu_msr_chk_emulated(u32 msr, u64 *val, bool is_read)
{
int type, index = 0;
if (is_amd_pmu_msr(msr))
- *emul = xen_amd_pmu_emulate(msr, val, is_read);
- else if (is_intel_pmu_msr(msr, &type, &index))
- *emul = xen_intel_pmu_emulate(msr, val, type, index, is_read);
- else
- return false;
-
- return true;
-}
-
-bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
-{
- bool emulated;
+ return xen_amd_pmu_emulate(msr, val, is_read);
- if (!pmu_msr_chk_emulated(msr, val, true, &emulated))
- return false;
+ if (is_intel_pmu_msr(msr, &type, &index))
+ return xen_intel_pmu_emulate(msr, val, type, index, is_read);
- if (!emulated) {
- *val = err ? native_read_msr_safe(msr, err)
- : native_read_msr(msr);
- }
-
- return true;
-}
-
-bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
-{
- uint64_t val = ((uint64_t)high << 32) | low;
- bool emulated;
-
- if (!pmu_msr_chk_emulated(msr, &val, false, &emulated))
- return false;
-
- if (!emulated) {
- if (err)
- *err = native_write_msr_safe(msr, low, high);
- else
- native_write_msr(msr, low, high);
- }
-
- return true;
+ return false;
}
-static unsigned long long xen_amd_read_pmc(int counter)
+static u64 xen_amd_read_pmc(int counter)
{
struct xen_pmu_amd_ctxt *ctxt;
uint64_t *counter_regs;
@@ -354,11 +319,12 @@ static unsigned long long xen_amd_read_pmc(int counter)
uint8_t xenpmu_flags = get_xenpmu_flags();
if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
- uint32_t msr;
- int err;
+ u32 msr;
+ u64 val;
msr = amd_counters_base + (counter * amd_msr_step);
- return native_read_msr_safe(msr, &err);
+ native_read_msr_safe(msr, &val);
+ return val;
}
ctxt = &xenpmu_data->pmu.c.amd;
@@ -366,7 +332,7 @@ static unsigned long long xen_amd_read_pmc(int counter)
return counter_regs[counter];
}
-static unsigned long long xen_intel_read_pmc(int counter)
+static u64 xen_intel_read_pmc(int counter)
{
struct xen_pmu_intel_ctxt *ctxt;
uint64_t *fixed_counters;
@@ -375,15 +341,16 @@ static unsigned long long xen_intel_read_pmc(int counter)
uint8_t xenpmu_flags = get_xenpmu_flags();
if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
- uint32_t msr;
- int err;
+ u32 msr;
+ u64 val;
if (counter & (1 << INTEL_PMC_TYPE_SHIFT))
msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
else
msr = MSR_IA32_PERFCTR0 + counter;
- return native_read_msr_safe(msr, &err);
+ native_read_msr_safe(msr, &val);
+ return val;
}
ctxt = &xenpmu_data->pmu.c.intel;
@@ -396,7 +363,7 @@ static unsigned long long xen_intel_read_pmc(int counter)
return arch_cntr_pair[counter].counter;
}
-unsigned long long xen_read_pmc(int counter)
+u64 xen_read_pmc(int counter)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return xen_amd_read_pmc(counter);
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 77a6ea1c60e4..ba2f17e64321 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -13,6 +13,7 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/page.h>
#include <asm/fixmap.h>
+#include <asm/msr.h>
#include "xen-ops.h"
@@ -39,7 +40,7 @@ void xen_arch_post_suspend(int cancelled)
static void xen_vcpu_notify_restore(void *data)
{
if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
- wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
+ wrmsrq(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
/* Boot processor notified via generic timekeeping_resume() */
if (smp_processor_id() == 0)
@@ -55,9 +56,9 @@ static void xen_vcpu_notify_suspend(void *data)
tick_suspend_local();
if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
- rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
+ rdmsrq(MSR_IA32_SPEC_CTRL, tmp);
this_cpu_write(spec_ctrl, tmp);
- wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ wrmsrq(MSR_IA32_SPEC_CTRL, 0);
}
}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 25e318ef27d6..090349baec09 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -271,10 +271,9 @@ void xen_pmu_finish(int cpu);
static inline void xen_pmu_init(int cpu) {}
static inline void xen_pmu_finish(int cpu) {}
#endif
-bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
-bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);
+bool pmu_msr_chk_emulated(u32 msr, u64 *val, bool is_read);
int pmu_apic_update(uint32_t reg);
-unsigned long long xen_read_pmc(int counter);
+u64 xen_read_pmc(int counter);
#ifdef CONFIG_SMP
diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig
index 91c4c4cae8a7..49f50d1bd724 100644
--- a/arch/xtensa/configs/cadence_csp_defconfig
+++ b/arch/xtensa/configs/cadence_csp_defconfig
@@ -1,6 +1,5 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_USELIB=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_IRQ_TIME_ACCOUNTING=y
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
index 183618090d05..223f1d452310 100644
--- a/arch/xtensa/kernel/perf_event.c
+++ b/arch/xtensa/kernel/perf_event.c
@@ -388,8 +388,7 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
struct pt_regs *regs = get_irq_regs();
perf_sample_data_init(&data, 0, last_period);
- if (perf_event_overflow(event, &data, regs))
- xtensa_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
rc = IRQ_HANDLED;
diff --git a/block/Kconfig b/block/Kconfig
index df8973bc0539..15027963472d 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -211,14 +211,6 @@ config BLK_INLINE_ENCRYPTION_FALLBACK
source "block/partitions/Kconfig"
-config BLK_MQ_PCI
- def_bool PCI
-
-config BLK_MQ_VIRTIO
- bool
- depends on VIRTIO
- default y
-
config BLK_PM
def_bool PM
diff --git a/block/Makefile b/block/Makefile
index 3a941dc0d27f..c65f4da93702 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,13 +5,12 @@
obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
- blk-merge.o blk-timeout.o \
- blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
+ blk-merge.o blk-timeout.o blk-lib.o blk-mq.o \
+ blk-mq-tag.o blk-mq-dma.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
disk-events.o blk-ia-ranges.o early-lookup.o
-obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
diff --git a/block/bdev.c b/block/bdev.c
index 889ec6e002d7..b77ddd12dc06 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -1335,7 +1335,8 @@ void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask)
generic_fill_statx_atomic_writes(stat,
queue_atomic_write_unit_min_bytes(bd_queue),
- queue_atomic_write_unit_max_bytes(bd_queue));
+ queue_atomic_write_unit_max_bytes(bd_queue),
+ 0);
}
stat->blksize = bdev_io_min(bdev);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index abd80dc13562..0cb1e9873aab 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7210,8 +7210,8 @@ static void bfq_exit_queue(struct elevator_queue *e)
#endif
blk_stat_disable_accounting(bfqd->queue);
- clear_bit(ELEVATOR_FLAG_DISABLE_WBT, &e->flags);
- wbt_enable_default(bfqd->queue->disk);
+ blk_queue_flag_clear(QUEUE_FLAG_DISABLE_WBT_DEF, bfqd->queue);
+ set_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, &e->flags);
kfree(bfqd);
}
@@ -7397,7 +7397,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
/* We dispatch from request queue wide instead of hw queue */
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
- set_bit(ELEVATOR_FLAG_DISABLE_WBT, &eq->flags);
+ blk_queue_flag_set(QUEUE_FLAG_DISABLE_WBT_DEF, q);
wbt_disable_default(q->disk);
blk_stat_enable_accounting(q);
diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c
index e524c609be50..9c6657664792 100644
--- a/block/bio-integrity-auto.c
+++ b/block/bio-integrity-auto.c
@@ -9,6 +9,7 @@
* not aware of PI.
*/
#include <linux/blk-integrity.h>
+#include <linux/t10-pi.h>
#include <linux/workqueue.h>
#include "blk.h"
@@ -43,6 +44,29 @@ static void bio_integrity_verify_fn(struct work_struct *work)
bio_endio(bio);
}
+#define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG)
+static bool bip_should_check(struct bio_integrity_payload *bip)
+{
+ return bip->bip_flags & BIP_CHECK_FLAGS;
+}
+
+static bool bi_offload_capable(struct blk_integrity *bi)
+{
+ switch (bi->csum_type) {
+ case BLK_INTEGRITY_CSUM_CRC64:
+ return bi->tuple_size == sizeof(struct crc64_pi_tuple);
+ case BLK_INTEGRITY_CSUM_CRC:
+ case BLK_INTEGRITY_CSUM_IP:
+ return bi->tuple_size == sizeof(struct t10_pi_tuple);
+ default:
+ pr_warn_once("%s: unknown integrity checksum type:%d\n",
+ __func__, bi->csum_type);
+ fallthrough;
+ case BLK_INTEGRITY_CSUM_NONE:
+ return false;
+ }
+}
+
/**
* __bio_integrity_endio - Integrity I/O completion function
* @bio: Protected bio
@@ -54,12 +78,12 @@ static void bio_integrity_verify_fn(struct work_struct *work)
*/
bool __bio_integrity_endio(struct bio *bio)
{
- struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
struct bio_integrity_payload *bip = bio_integrity(bio);
struct bio_integrity_data *bid =
container_of(bip, struct bio_integrity_data, bip);
- if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
+ if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
+ bip_should_check(bip)) {
INIT_WORK(&bid->work, bio_integrity_verify_fn);
queue_work(kintegrityd_wq, &bid->work);
return false;
@@ -84,6 +108,7 @@ bool bio_integrity_prep(struct bio *bio)
{
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
struct bio_integrity_data *bid;
+ bool set_flags = true;
gfp_t gfp = GFP_NOIO;
unsigned int len;
void *buf;
@@ -100,19 +125,24 @@ bool bio_integrity_prep(struct bio *bio)
switch (bio_op(bio)) {
case REQ_OP_READ:
- if (bi->flags & BLK_INTEGRITY_NOVERIFY)
- return true;
+ if (bi->flags & BLK_INTEGRITY_NOVERIFY) {
+ if (bi_offload_capable(bi))
+ return true;
+ set_flags = false;
+ }
break;
case REQ_OP_WRITE:
- if (bi->flags & BLK_INTEGRITY_NOGENERATE)
- return true;
-
/*
* Zero the memory allocated to not leak uninitialized kernel
* memory to disk for non-integrity metadata where nothing else
* initializes the memory.
*/
- if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
+ if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
+ if (bi_offload_capable(bi))
+ return true;
+ set_flags = false;
+ gfp |= __GFP_ZERO;
+ } else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
gfp |= __GFP_ZERO;
break;
default:
@@ -137,19 +167,21 @@ bool bio_integrity_prep(struct bio *bio)
bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
- if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
- bid->bip.bip_flags |= BIP_IP_CHECKSUM;
- if (bi->csum_type)
- bid->bip.bip_flags |= BIP_CHECK_GUARD;
- if (bi->flags & BLK_INTEGRITY_REF_TAG)
- bid->bip.bip_flags |= BIP_CHECK_REFTAG;
+ if (set_flags) {
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
+ bid->bip.bip_flags |= BIP_IP_CHECKSUM;
+ if (bi->csum_type)
+ bid->bip.bip_flags |= BIP_CHECK_GUARD;
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
+ bid->bip.bip_flags |= BIP_CHECK_REFTAG;
+ }
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
offset_in_page(buf)) < len)
goto err_end_io;
/* Auto-generate integrity metadata if this is a write */
- if (bio_data_dir(bio) == WRITE)
+ if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
blk_integrity_generate(bio);
else
bid->saved_bio_iter = bio->bi_iter;
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 43ef6bd06c85..cb94e9be26dc 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -127,10 +127,8 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
if (bip->bip_vcnt > 0) {
struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
- bool same_page = false;
- if (bvec_try_merge_hw_page(q, bv, page, len, offset,
- &same_page)) {
+ if (bvec_try_merge_hw_page(q, bv, page, len, offset)) {
bip->bip_iter.bi_size += len;
return len;
}
diff --git a/block/bio.c b/block/bio.c
index 4e6c85a33d74..3c0a558c90f5 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -251,6 +251,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
bio->bi_flags = 0;
bio->bi_ioprio = 0;
bio->bi_write_hint = 0;
+ bio->bi_write_stream = 0;
bio->bi_status = 0;
bio->bi_iter.bi_sector = 0;
bio->bi_iter.bi_size = 0;
@@ -611,7 +612,7 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
{
struct bio *bio;
- if (nr_vecs > UIO_MAXIOV)
+ if (nr_vecs > BIO_MAX_INLINE_VECS)
return NULL;
return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask);
}
@@ -827,6 +828,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
bio_set_flag(bio, BIO_CLONED);
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_write_hint = bio_src->bi_write_hint;
+ bio->bi_write_stream = bio_src->bi_write_stream;
bio->bi_iter = bio_src->bi_iter;
if (bio->bi_bdev) {
@@ -918,7 +920,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
}
static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
- unsigned int len, unsigned int off, bool *same_page)
+ unsigned int len, unsigned int off)
{
size_t bv_end = bv->bv_offset + bv->bv_len;
phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
@@ -931,9 +933,7 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
if (!zone_device_pages_have_same_pgmap(bv->bv_page, page))
return false;
- *same_page = ((vec_end_addr & PAGE_MASK) == ((page_addr + off) &
- PAGE_MASK));
- if (!*same_page) {
+ if ((vec_end_addr & PAGE_MASK) != ((page_addr + off) & PAGE_MASK)) {
if (IS_ENABLED(CONFIG_KMSAN))
return false;
if (bv->bv_page + bv_end / PAGE_SIZE != page + off / PAGE_SIZE)
@@ -953,8 +953,7 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
* helpers to split. Hopefully this will go away soon.
*/
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
- struct page *page, unsigned len, unsigned offset,
- bool *same_page)
+ struct page *page, unsigned len, unsigned offset)
{
unsigned long mask = queue_segment_boundary(q);
phys_addr_t addr1 = bvec_phys(bv);
@@ -964,7 +963,7 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
return false;
if (len > queue_max_segment_size(q) - bv->bv_len)
return false;
- return bvec_try_merge_page(bv, page, len, offset, same_page);
+ return bvec_try_merge_page(bv, page, len, offset);
}
/**
@@ -990,6 +989,22 @@ void __bio_add_page(struct bio *bio, struct page *page,
EXPORT_SYMBOL_GPL(__bio_add_page);
/**
+ * bio_add_virt_nofail - add data in the direct kernel mapping to a bio
+ * @bio: destination bio
+ * @vaddr: data to add
+ * @len: length of the data to add, may cross pages
+ *
+ * Add the data at @vaddr to @bio. The caller must have ensure a segment
+ * is available for the added data. No merging into an existing segment
+ * will be performed.
+ */
+void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len)
+{
+ __bio_add_page(bio, virt_to_page(vaddr), len, offset_in_page(vaddr));
+}
+EXPORT_SYMBOL_GPL(bio_add_virt_nofail);
+
+/**
* bio_add_page - attempt to add page(s) to bio
* @bio: destination bio
* @page: start page to add
@@ -1002,8 +1017,6 @@ EXPORT_SYMBOL_GPL(__bio_add_page);
int bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
- bool same_page = false;
-
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return 0;
if (bio->bi_iter.bi_size > UINT_MAX - len)
@@ -1011,7 +1024,7 @@ int bio_add_page(struct bio *bio, struct page *page,
if (bio->bi_vcnt > 0 &&
bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
- page, len, offset, &same_page)) {
+ page, len, offset)) {
bio->bi_iter.bi_size += len;
return len;
}
@@ -1058,6 +1071,61 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
}
EXPORT_SYMBOL(bio_add_folio);
+/**
+ * bio_add_vmalloc_chunk - add a vmalloc chunk to a bio
+ * @bio: destination bio
+ * @vaddr: vmalloc address to add
+ * @len: total length in bytes of the data to add
+ *
+ * Add data starting at @vaddr to @bio and return how many bytes were added.
+ * This may be less than the amount originally asked. Returns 0 if no data
+ * could be added to @bio.
+ *
+ * This helper calls flush_kernel_vmap_range() for the range added. For reads
+ * the caller still needs to manually call invalidate_kernel_vmap_range() in
+ * the completion handler.
+ */
+unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len)
+{
+ unsigned int offset = offset_in_page(vaddr);
+
+ len = min(len, PAGE_SIZE - offset);
+ if (bio_add_page(bio, vmalloc_to_page(vaddr), len, offset) < len)
+ return 0;
+ if (op_is_write(bio_op(bio)))
+ flush_kernel_vmap_range(vaddr, len);
+ return len;
+}
+EXPORT_SYMBOL_GPL(bio_add_vmalloc_chunk);
+
+/**
+ * bio_add_vmalloc - add a vmalloc region to a bio
+ * @bio: destination bio
+ * @vaddr: vmalloc address to add
+ * @len: total length in bytes of the data to add
+ *
+ * Add data starting at @vaddr to @bio. Return %true on success or %false if
+ * @bio does not have enough space for the payload.
+ *
+ * This helper calls flush_kernel_vmap_range() for the range added. For reads
+ * the caller still needs to manually call invalidate_kernel_vmap_range() in
+ * the completion handler.
+ */
+bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len)
+{
+ do {
+ unsigned int added = bio_add_vmalloc_chunk(bio, vaddr, len);
+
+ if (!added)
+ return false;
+ vaddr += added;
+ len -= added;
+ } while (len);
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(bio_add_vmalloc);
+
void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
struct folio_iter fi;
@@ -1088,27 +1156,6 @@ void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter)
bio_set_flag(bio, BIO_CLONED);
}
-static int bio_iov_add_folio(struct bio *bio, struct folio *folio, size_t len,
- size_t offset)
-{
- bool same_page = false;
-
- if (WARN_ON_ONCE(bio->bi_iter.bi_size > UINT_MAX - len))
- return -EIO;
-
- if (bio->bi_vcnt > 0 &&
- bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
- folio_page(folio, 0), len, offset,
- &same_page)) {
- bio->bi_iter.bi_size += len;
- if (same_page && bio_flagged(bio, BIO_PAGE_PINNED))
- unpin_user_folio(folio, 1);
- return 0;
- }
- bio_add_folio_nofail(bio, folio, len, offset);
- return 0;
-}
-
static unsigned int get_contig_folio_len(unsigned int *num_pages,
struct page **pages, unsigned int i,
struct folio *folio, size_t left,
@@ -1203,6 +1250,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
for (left = size, i = 0; left > 0; left -= len, i += num_pages) {
struct page *page = pages[i];
struct folio *folio = page_folio(page);
+ unsigned int old_vcnt = bio->bi_vcnt;
folio_offset = ((size_t)folio_page_idx(folio, page) <<
PAGE_SHIFT) + offset;
@@ -1215,7 +1263,23 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
len = get_contig_folio_len(&num_pages, pages, i,
folio, left, offset);
- bio_iov_add_folio(bio, folio, len, folio_offset);
+ if (!bio_add_folio(bio, folio, len, folio_offset)) {
+ WARN_ON_ONCE(1);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (bio_flagged(bio, BIO_PAGE_PINNED)) {
+ /*
+ * We're adding another fragment of a page that already
+ * was part of the last segment. Undo our pin as the
+ * page was pinned when an earlier fragment of it was
+ * added to the bio and __bio_release_pages expects a
+ * single pin per page.
+ */
+ if (offset && bio->bi_vcnt == old_vcnt)
+ unpin_user_folio(folio, 1);
+ }
offset = 0;
}
@@ -1301,6 +1365,36 @@ int submit_bio_wait(struct bio *bio)
}
EXPORT_SYMBOL(submit_bio_wait);
+/**
+ * bdev_rw_virt - synchronously read into / write from kernel mapping
+ * @bdev: block device to access
+ * @sector: sector to access
+ * @data: data to read/write
+ * @len: length in byte to read/write
+ * @op: operation (e.g. REQ_OP_READ/REQ_OP_WRITE)
+ *
+ * Performs synchronous I/O to @bdev for @data/@len. @data must be in
+ * the kernel direct mapping and not a vmalloc address.
+ */
+int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
+ size_t len, enum req_op op)
+{
+ struct bio_vec bv;
+ struct bio bio;
+ int error;
+
+ if (WARN_ON_ONCE(is_vmalloc_addr(data)))
+ return -EIO;
+
+ bio_init(&bio, bdev, &bv, 1, op);
+ bio.bi_iter.bi_sector = sector;
+ bio_add_virt_nofail(&bio, data, len);
+ error = submit_bio_wait(&bio);
+ bio_uninit(&bio);
+ return error;
+}
+EXPORT_SYMBOL_GPL(bdev_rw_virt);
+
static void bio_wait_end_io(struct bio *bio)
{
complete(bio->bi_private);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index ce93706555c5..5936db7f8475 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1074,8 +1074,8 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
/*
* For covering concurrent parent blkg update from blkg_release().
*
- * When flushing from cgroup, cgroup_rstat_lock is always held, so
- * this lock won't cause contention most of time.
+ * When flushing from cgroup, the subsystem rstat lock is always held,
+ * so this lock won't cause contention most of time.
*/
raw_spin_lock_irqsave(&blkg_stat_lock, flags);
@@ -1144,7 +1144,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
/*
* We source root cgroup stats from the system-wide stats to avoid
* tracking the same information twice and incurring overhead when no
- * cgroups are defined. For that reason, cgroup_rstat_flush in
+ * cgroups are defined. For that reason, css_rstat_flush in
* blkcg_print_stat does not actually fill out the iostat in the root
* cgroup's blkcg_gq.
*
@@ -1253,7 +1253,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
if (!seq_css(sf)->parent)
blkcg_fill_root_iostats();
else
- cgroup_rstat_flush(blkcg->css.cgroup);
+ css_rstat_flush(&blkcg->css);
rcu_read_lock();
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
@@ -2243,7 +2243,7 @@ void blk_cgroup_bio_start(struct bio *bio)
}
u64_stats_update_end_irqrestore(&bis->sync, flags);
- cgroup_rstat_updated(blkcg->css.cgroup, cpu);
+ css_rstat_updated(&blkcg->css, cpu);
put_cpu();
}
diff --git a/block/blk-core.c b/block/blk-core.c
index e8cc270a453f..b862c66018f2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1018,7 +1018,7 @@ again:
stamp = READ_ONCE(part->bd_stamp);
if (unlikely(time_after(now, stamp)) &&
likely(try_cmpxchg(&part->bd_stamp, &stamp, now)) &&
- (end || part_in_flight(part)))
+ (end || bdev_count_inflight(part)))
__part_stat_add(part, io_ticks, now - stamp);
if (bdev_is_partition(part)) {
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index f154be0b575a..005c9157ffb3 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -173,6 +173,7 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_write_hint = bio_src->bi_write_hint;
+ bio->bi_write_stream = bio_src->bi_write_stream;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
diff --git a/block/blk-map.c b/block/blk-map.c
index d2f22744b3d1..23e5d5ebe59e 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -317,64 +317,26 @@ static void bio_map_kern_endio(struct bio *bio)
kfree(bio);
}
-/**
- * bio_map_kern - map kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to map
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio allocation
- *
- * Map the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-static struct bio *bio_map_kern(struct request_queue *q, void *data,
- unsigned int len, gfp_t gfp_mask)
+static struct bio *bio_map_kern(void *data, unsigned int len, enum req_op op,
+ gfp_t gfp_mask)
{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- const int nr_pages = end - start;
- bool is_vmalloc = is_vmalloc_addr(data);
- struct page *page;
- int offset, i;
+ unsigned int nr_vecs = bio_add_max_vecs(data, len);
struct bio *bio;
- bio = bio_kmalloc(nr_pages, gfp_mask);
+ bio = bio_kmalloc(nr_vecs, gfp_mask);
if (!bio)
return ERR_PTR(-ENOMEM);
- bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
-
- if (is_vmalloc) {
- flush_kernel_vmap_range(data, len);
+ bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, op);
+ if (is_vmalloc_addr(data)) {
bio->bi_private = data;
- }
-
- offset = offset_in_page(kaddr);
- for (i = 0; i < nr_pages; i++) {
- unsigned int bytes = PAGE_SIZE - offset;
-
- if (len <= 0)
- break;
-
- if (bytes > len)
- bytes = len;
-
- if (!is_vmalloc)
- page = virt_to_page(data);
- else
- page = vmalloc_to_page(data);
- if (bio_add_page(bio, page, bytes, offset) < bytes) {
- /* we don't support partial mappings */
+ if (!bio_add_vmalloc(bio, data, len)) {
bio_uninit(bio);
kfree(bio);
return ERR_PTR(-EINVAL);
}
-
- data += bytes;
- len -= bytes;
- offset = 0;
+ } else {
+ bio_add_virt_nofail(bio, data, len);
}
-
bio->bi_end_io = bio_map_kern_endio;
return bio;
}
@@ -402,17 +364,16 @@ static void bio_copy_kern_endio_read(struct bio *bio)
/**
* bio_copy_kern - copy kernel address into bio
- * @q: the struct request_queue for the bio
* @data: pointer to buffer to copy
* @len: length in bytes
+ * @op: bio/request operation
* @gfp_mask: allocation flags for bio and page allocation
- * @reading: data direction is READ
*
* copy the kernel address into a bio suitable for io to a block
* device. Returns an error pointer in case of error.
*/
-static struct bio *bio_copy_kern(struct request_queue *q, void *data,
- unsigned int len, gfp_t gfp_mask, int reading)
+static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op,
+ gfp_t gfp_mask)
{
unsigned long kaddr = (unsigned long)data;
unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -431,7 +392,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
bio = bio_kmalloc(nr_pages, gfp_mask);
if (!bio)
return ERR_PTR(-ENOMEM);
- bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
+ bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, op);
while (len) {
struct page *page;
@@ -444,7 +405,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
if (!page)
goto cleanup;
- if (!reading)
+ if (op_is_write(op))
memcpy(page_address(page), p, bytes);
if (bio_add_page(bio, page, bytes, 0) < bytes)
@@ -454,11 +415,11 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
p += bytes;
}
- if (reading) {
+ if (op_is_write(op)) {
+ bio->bi_end_io = bio_copy_kern_endio;
+ } else {
bio->bi_end_io = bio_copy_kern_endio_read;
bio->bi_private = data;
- } else {
- bio->bi_end_io = bio_copy_kern_endio;
}
return bio;
@@ -556,8 +517,6 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
if (map_data)
copy = true;
- else if (blk_queue_may_bounce(q))
- copy = true;
else if (iov_iter_alignment(iter) & align)
copy = true;
else if (iov_iter_is_bvec(iter))
@@ -689,7 +648,6 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
/**
* blk_rq_map_kern - map kernel data to a request, for passthrough requests
- * @q: request queue where request should be inserted
* @rq: request to fill
* @kbuf: the kernel buffer
* @len: length of user data
@@ -700,31 +658,26 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
* buffer is used. Can be called multiple times to append multiple
* buffers.
*/
-int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
- unsigned int len, gfp_t gfp_mask)
+int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len,
+ gfp_t gfp_mask)
{
- int reading = rq_data_dir(rq) == READ;
unsigned long addr = (unsigned long) kbuf;
struct bio *bio;
int ret;
- if (len > (queue_max_hw_sectors(q) << 9))
+ if (len > (queue_max_hw_sectors(rq->q) << SECTOR_SHIFT))
return -EINVAL;
if (!len || !kbuf)
return -EINVAL;
- if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf) ||
- blk_queue_may_bounce(q))
- bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
+ if (!blk_rq_aligned(rq->q, addr, len) || object_is_on_stack(kbuf))
+ bio = bio_copy_kern(kbuf, len, req_op(rq), gfp_mask);
else
- bio = bio_map_kern(q, kbuf, len, gfp_mask);
+ bio = bio_map_kern(kbuf, len, req_op(rq), gfp_mask);
if (IS_ERR(bio))
return PTR_ERR(bio);
- bio->bi_opf &= ~REQ_OP_MASK;
- bio->bi_opf |= req_op(rq);
-
ret = blk_rq_append_bio(rq, bio);
if (unlikely(ret)) {
bio_uninit(bio);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index fdd4efb54c6c..3af1d284add5 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -7,7 +7,6 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-integrity.h>
-#include <linux/scatterlist.h>
#include <linux/part_stat.h>
#include <linux/blk-cgroup.h>
@@ -226,27 +225,6 @@ static inline unsigned get_max_io_size(struct bio *bio,
}
/**
- * get_max_segment_size() - maximum number of bytes to add as a single segment
- * @lim: Request queue limits.
- * @paddr: address of the range to add
- * @len: maximum length available to add at @paddr
- *
- * Returns the maximum number of bytes of the range starting at @paddr that can
- * be added to a single segment.
- */
-static inline unsigned get_max_segment_size(const struct queue_limits *lim,
- phys_addr_t paddr, unsigned int len)
-{
- /*
- * Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1
- * after having calculated the minimum.
- */
- return min_t(unsigned long, len,
- min(lim->seg_boundary_mask - (lim->seg_boundary_mask & paddr),
- (unsigned long)lim->max_segment_size - 1) + 1);
-}
-
-/**
* bvec_split_segs - verify whether or not a bvec should be split in the middle
* @lim: [in] queue limits to split based on
* @bv: [in] bvec to examine
@@ -473,117 +451,6 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
return nr_phys_segs;
}
-struct phys_vec {
- phys_addr_t paddr;
- u32 len;
-};
-
-static bool blk_map_iter_next(struct request *req,
- struct req_iterator *iter, struct phys_vec *vec)
-{
- unsigned int max_size;
- struct bio_vec bv;
-
- if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
- if (!iter->bio)
- return false;
- vec->paddr = bvec_phys(&req->special_vec);
- vec->len = req->special_vec.bv_len;
- iter->bio = NULL;
- return true;
- }
-
- if (!iter->iter.bi_size)
- return false;
-
- bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
- vec->paddr = bvec_phys(&bv);
- max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX);
- bv.bv_len = min(bv.bv_len, max_size);
- bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len);
-
- /*
- * If we are entirely done with this bi_io_vec entry, check if the next
- * one could be merged into it. This typically happens when moving to
- * the next bio, but some callers also don't pack bvecs tight.
- */
- while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) {
- struct bio_vec next;
-
- if (!iter->iter.bi_size) {
- if (!iter->bio->bi_next)
- break;
- iter->bio = iter->bio->bi_next;
- iter->iter = iter->bio->bi_iter;
- }
-
- next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
- if (bv.bv_len + next.bv_len > max_size ||
- !biovec_phys_mergeable(req->q, &bv, &next))
- break;
-
- bv.bv_len += next.bv_len;
- bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len);
- }
-
- vec->len = bv.bv_len;
- return true;
-}
-
-static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
- struct scatterlist *sglist)
-{
- if (!*sg)
- return sglist;
-
- /*
- * If the driver previously mapped a shorter list, we could see a
- * termination bit prematurely unless it fully inits the sg table
- * on each mapping. We KNOW that there must be more entries here
- * or the driver would be buggy, so force clear the termination bit
- * to avoid doing a full sg_init_table() in drivers for each command.
- */
- sg_unmark_end(*sg);
- return sg_next(*sg);
-}
-
-/*
- * Map a request to scatterlist, return number of sg entries setup. Caller
- * must make sure sg can hold rq->nr_phys_segments entries.
- */
-int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist,
- struct scatterlist **last_sg)
-{
- struct req_iterator iter = {
- .bio = rq->bio,
- };
- struct phys_vec vec;
- int nsegs = 0;
-
- /* the internal flush request may not have bio attached */
- if (iter.bio)
- iter.iter = iter.bio->bi_iter;
-
- while (blk_map_iter_next(rq, &iter, &vec)) {
- *last_sg = blk_next_sg(last_sg, sglist);
- sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,
- offset_in_page(vec.paddr));
- nsegs++;
- }
-
- if (*last_sg)
- sg_mark_end(*last_sg);
-
- /*
- * Something must have been wrong if the figured number of
- * segment is bigger than number of req's physical segments
- */
- WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
-
- return nsegs;
-}
-EXPORT_SYMBOL(__blk_rq_map_sg);
-
static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
sector_t offset)
{
@@ -832,6 +699,8 @@ static struct request *attempt_merge(struct request_queue *q,
if (req->bio->bi_write_hint != next->bio->bi_write_hint)
return NULL;
+ if (req->bio->bi_write_stream != next->bio->bi_write_stream)
+ return NULL;
if (req->bio->bi_ioprio != next->bio->bi_ioprio)
return NULL;
if (!blk_atomic_write_mergeable_rqs(req, next))
@@ -953,6 +822,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
return false;
if (rq->bio->bi_write_hint != bio->bi_write_hint)
return false;
+ if (rq->bio->bi_write_stream != bio->bi_write_stream)
+ return false;
if (rq->bio->bi_ioprio != bio->bi_ioprio)
return false;
if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 3421b5521fe2..29b3540dd180 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -93,6 +93,8 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(RQ_ALLOC_TIME),
QUEUE_FLAG_NAME(HCTX_ACTIVE),
QUEUE_FLAG_NAME(SQ_SCHED),
+ QUEUE_FLAG_NAME(DISABLE_WBT_DEF),
+ QUEUE_FLAG_NAME(NO_ELV_SWITCH),
};
#undef QUEUE_FLAG_NAME
@@ -624,20 +626,9 @@ void blk_mq_debugfs_register(struct request_queue *q)
debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
- /*
- * blk_mq_init_sched() attempted to do this already, but q->debugfs_dir
- * didn't exist yet (because we don't know what to name the directory
- * until the queue is registered to a gendisk).
- */
- if (q->elevator && !q->sched_debugfs_dir)
- blk_mq_debugfs_register_sched(q);
-
- /* Similarly, blk_mq_init_hctx() couldn't do this previously. */
queue_for_each_hw_ctx(q, hctx, i) {
if (!hctx->debugfs_dir)
blk_mq_debugfs_register_hctx(q, hctx);
- if (q->elevator && !hctx->sched_debugfs_dir)
- blk_mq_debugfs_register_sched_hctx(q, hctx);
}
if (q->rq_qos) {
diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c
new file mode 100644
index 000000000000..82bae475dfa4
--- /dev/null
+++ b/block/blk-mq-dma.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Christoph Hellwig
+ */
+#include "blk.h"
+
+struct phys_vec {
+ phys_addr_t paddr;
+ u32 len;
+};
+
+static bool blk_map_iter_next(struct request *req, struct req_iterator *iter,
+ struct phys_vec *vec)
+{
+ unsigned int max_size;
+ struct bio_vec bv;
+
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ if (!iter->bio)
+ return false;
+ vec->paddr = bvec_phys(&req->special_vec);
+ vec->len = req->special_vec.bv_len;
+ iter->bio = NULL;
+ return true;
+ }
+
+ if (!iter->iter.bi_size)
+ return false;
+
+ bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
+ vec->paddr = bvec_phys(&bv);
+ max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX);
+ bv.bv_len = min(bv.bv_len, max_size);
+ bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len);
+
+ /*
+ * If we are entirely done with this bi_io_vec entry, check if the next
+ * one could be merged into it. This typically happens when moving to
+ * the next bio, but some callers also don't pack bvecs tight.
+ */
+ while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) {
+ struct bio_vec next;
+
+ if (!iter->iter.bi_size) {
+ if (!iter->bio->bi_next)
+ break;
+ iter->bio = iter->bio->bi_next;
+ iter->iter = iter->bio->bi_iter;
+ }
+
+ next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
+ if (bv.bv_len + next.bv_len > max_size ||
+ !biovec_phys_mergeable(req->q, &bv, &next))
+ break;
+
+ bv.bv_len += next.bv_len;
+ bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len);
+ }
+
+ vec->len = bv.bv_len;
+ return true;
+}
+
+static inline struct scatterlist *
+blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist)
+{
+ if (!*sg)
+ return sglist;
+
+ /*
+ * If the driver previously mapped a shorter list, we could see a
+ * termination bit prematurely unless it fully inits the sg table
+ * on each mapping. We KNOW that there must be more entries here
+ * or the driver would be buggy, so force clear the termination bit
+ * to avoid doing a full sg_init_table() in drivers for each command.
+ */
+ sg_unmark_end(*sg);
+ return sg_next(*sg);
+}
+
+/*
+ * Map a request to scatterlist, return number of sg entries setup. Caller
+ * must make sure sg can hold rq->nr_phys_segments entries.
+ */
+int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist,
+ struct scatterlist **last_sg)
+{
+ struct req_iterator iter = {
+ .bio = rq->bio,
+ };
+ struct phys_vec vec;
+ int nsegs = 0;
+
+ /* the internal flush request may not have bio attached */
+ if (iter.bio)
+ iter.iter = iter.bio->bi_iter;
+
+ while (blk_map_iter_next(rq, &iter, &vec)) {
+ *last_sg = blk_next_sg(last_sg, sglist);
+ sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,
+ offset_in_page(vec.paddr));
+ nsegs++;
+ }
+
+ if (*last_sg)
+ sg_mark_end(*last_sg);
+
+ /*
+ * Something must have been wrong if the figured number of
+ * segment is bigger than number of req's physical segments
+ */
+ WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
+
+ return nsegs;
+}
+EXPORT_SYMBOL(__blk_rq_map_sg);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 109611445d40..55a0fd105147 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -59,19 +59,17 @@ static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
struct request *rq;
LIST_HEAD(hctx_list);
- unsigned int count = 0;
list_for_each_entry(rq, rq_list, queuelist) {
if (rq->mq_hctx != hctx) {
list_cut_before(&hctx_list, rq_list, &rq->queuelist);
goto dispatch;
}
- count++;
}
list_splice_tail_init(rq_list, &hctx_list);
dispatch:
- return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
+ return blk_mq_dispatch_rq_list(hctx, &hctx_list, false);
}
#define BLK_MQ_BUDGET_DELAY 3 /* ms units */
@@ -167,7 +165,7 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
} while (!list_empty(&rq_list));
} else {
- dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
+ dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, false);
}
if (busy)
@@ -261,7 +259,7 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
/* round robin for fair dispatch */
ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
- } while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
+ } while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, false));
WRITE_ONCE(hctx->dispatch_from, ctx);
return ret;
@@ -298,7 +296,7 @@ static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
*/
if (!list_empty(&rq_list)) {
blk_mq_sched_mark_restart_hctx(hctx);
- if (!blk_mq_dispatch_rq_list(hctx, &rq_list, 0))
+ if (!blk_mq_dispatch_rq_list(hctx, &rq_list, true))
return 0;
need_dispatch = true;
} else {
@@ -312,7 +310,7 @@ static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
if (need_dispatch)
return blk_mq_do_dispatch_ctx(hctx);
blk_mq_flush_busy_ctxs(hctx, &rq_list);
- blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
+ blk_mq_dispatch_rq_list(hctx, &rq_list, true);
return 0;
}
@@ -436,6 +434,30 @@ static int blk_mq_init_sched_shared_tags(struct request_queue *queue)
return 0;
}
+void blk_mq_sched_reg_debugfs(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+ unsigned long i;
+
+ mutex_lock(&q->debugfs_mutex);
+ blk_mq_debugfs_register_sched(q);
+ queue_for_each_hw_ctx(q, hctx, i)
+ blk_mq_debugfs_register_sched_hctx(q, hctx);
+ mutex_unlock(&q->debugfs_mutex);
+}
+
+void blk_mq_sched_unreg_debugfs(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+ unsigned long i;
+
+ mutex_lock(&q->debugfs_mutex);
+ queue_for_each_hw_ctx(q, hctx, i)
+ blk_mq_debugfs_unregister_sched_hctx(hctx);
+ blk_mq_debugfs_unregister_sched(q);
+ mutex_unlock(&q->debugfs_mutex);
+}
+
/* caller must have a reference to @e, will grab another one if successful */
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
@@ -469,10 +491,6 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
if (ret)
goto err_free_map_and_rqs;
- mutex_lock(&q->debugfs_mutex);
- blk_mq_debugfs_register_sched(q);
- mutex_unlock(&q->debugfs_mutex);
-
queue_for_each_hw_ctx(q, hctx, i) {
if (e->ops.init_hctx) {
ret = e->ops.init_hctx(hctx, i);
@@ -484,11 +502,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
return ret;
}
}
- mutex_lock(&q->debugfs_mutex);
- blk_mq_debugfs_register_sched_hctx(q, hctx);
- mutex_unlock(&q->debugfs_mutex);
}
-
return 0;
err_free_map_and_rqs:
@@ -527,10 +541,6 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
unsigned int flags = 0;
queue_for_each_hw_ctx(q, hctx, i) {
- mutex_lock(&q->debugfs_mutex);
- blk_mq_debugfs_unregister_sched_hctx(hctx);
- mutex_unlock(&q->debugfs_mutex);
-
if (e->type->ops.exit_hctx && hctx->sched_data) {
e->type->ops.exit_hctx(hctx, i);
hctx->sched_data = NULL;
@@ -538,12 +548,9 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
flags = hctx->flags;
}
- mutex_lock(&q->debugfs_mutex);
- blk_mq_debugfs_unregister_sched(q);
- mutex_unlock(&q->debugfs_mutex);
-
if (e->type->ops.exit_sched)
e->type->ops.exit_sched(e);
blk_mq_sched_tags_teardown(q, flags);
+ set_bit(ELEVATOR_FLAG_DYING, &q->elevator->flags);
q->elevator = NULL;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c2697db59109..4806b867e37d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -89,7 +89,7 @@ struct mq_inflight {
unsigned int inflight[2];
};
-static bool blk_mq_check_inflight(struct request *rq, void *priv)
+static bool blk_mq_check_in_driver(struct request *rq, void *priv)
{
struct mq_inflight *mi = priv;
@@ -101,24 +101,14 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv)
return true;
}
-unsigned int blk_mq_in_flight(struct request_queue *q,
- struct block_device *part)
+void blk_mq_in_driver_rw(struct block_device *part, unsigned int inflight[2])
{
struct mq_inflight mi = { .part = part };
- blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
-
- return mi.inflight[0] + mi.inflight[1];
-}
-
-void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
- unsigned int inflight[2])
-{
- struct mq_inflight mi = { .part = part };
-
- blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
- inflight[0] = mi.inflight[0];
- inflight[1] = mi.inflight[1];
+ blk_mq_queue_tag_busy_iter(bdev_get_queue(part), blk_mq_check_in_driver,
+ &mi);
+ inflight[READ] = mi.inflight[READ];
+ inflight[WRITE] = mi.inflight[WRITE];
}
#ifdef CONFIG_LOCKDEP
@@ -584,9 +574,13 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
struct blk_mq_alloc_data data = {
.q = q,
.flags = flags,
+ .shallow_depth = 0,
.cmd_flags = opf,
+ .rq_flags = 0,
.nr_tags = plug->nr_ios,
.cached_rqs = &plug->cached_rqs,
+ .ctx = NULL,
+ .hctx = NULL
};
struct request *rq;
@@ -646,8 +640,13 @@ struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
struct blk_mq_alloc_data data = {
.q = q,
.flags = flags,
+ .shallow_depth = 0,
.cmd_flags = opf,
+ .rq_flags = 0,
.nr_tags = 1,
+ .cached_rqs = NULL,
+ .ctx = NULL,
+ .hctx = NULL
};
int ret;
@@ -675,8 +674,13 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
struct blk_mq_alloc_data data = {
.q = q,
.flags = flags,
+ .shallow_depth = 0,
.cmd_flags = opf,
+ .rq_flags = 0,
.nr_tags = 1,
+ .cached_rqs = NULL,
+ .ctx = NULL,
+ .hctx = NULL
};
u64 alloc_time_ns = 0;
struct request *rq;
@@ -2080,7 +2084,7 @@ static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int queued,
* Returns true if we did some work AND can potentially do more.
*/
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
- unsigned int nr_budgets)
+ bool get_budget)
{
enum prep_dispatch prep;
struct request_queue *q = hctx->queue;
@@ -2102,7 +2106,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
rq = list_first_entry(list, struct request, queuelist);
WARN_ON_ONCE(hctx != rq->mq_hctx);
- prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets);
+ prep = blk_mq_prep_dispatch_rq(rq, get_budget);
if (prep != PREP_DISPATCH_OK)
break;
@@ -2111,12 +2115,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
bd.rq = rq;
bd.last = list_empty(list);
- /*
- * once the request is queued to lld, no need to cover the
- * budget any more
- */
- if (nr_budgets)
- nr_budgets--;
ret = q->mq_ops->queue_rq(hctx, &bd);
switch (ret) {
case BLK_STS_OK:
@@ -2150,7 +2148,11 @@ out:
((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) ||
blk_mq_is_shared_tags(hctx->flags));
- if (nr_budgets)
+ /*
+ * If the caller allocated budgets, free the budgets of the
+ * requests that have not yet been passed to the block driver.
+ */
+ if (!get_budget)
blk_mq_release_budgets(q, list);
spin_lock(&hctx->lock);
@@ -2778,15 +2780,15 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
return __blk_mq_issue_directly(hctx, rq, last);
}
-static void blk_mq_plug_issue_direct(struct blk_plug *plug)
+static void blk_mq_issue_direct(struct rq_list *rqs)
{
struct blk_mq_hw_ctx *hctx = NULL;
struct request *rq;
int queued = 0;
blk_status_t ret = BLK_STS_OK;
- while ((rq = rq_list_pop(&plug->mq_list))) {
- bool last = rq_list_empty(&plug->mq_list);
+ while ((rq = rq_list_pop(rqs))) {
+ bool last = rq_list_empty(rqs);
if (hctx != rq->mq_hctx) {
if (hctx) {
@@ -2817,15 +2819,64 @@ out:
blk_mq_commit_rqs(hctx, queued, false);
}
-static void __blk_mq_flush_plug_list(struct request_queue *q,
- struct blk_plug *plug)
+static void __blk_mq_flush_list(struct request_queue *q, struct rq_list *rqs)
{
if (blk_queue_quiesced(q))
return;
- q->mq_ops->queue_rqs(&plug->mq_list);
+ q->mq_ops->queue_rqs(rqs);
+}
+
+static unsigned blk_mq_extract_queue_requests(struct rq_list *rqs,
+ struct rq_list *queue_rqs)
+{
+ struct request *rq = rq_list_pop(rqs);
+ struct request_queue *this_q = rq->q;
+ struct request **prev = &rqs->head;
+ struct rq_list matched_rqs = {};
+ struct request *last = NULL;
+ unsigned depth = 1;
+
+ rq_list_add_tail(&matched_rqs, rq);
+ while ((rq = *prev)) {
+ if (rq->q == this_q) {
+ /* move rq from rqs to matched_rqs */
+ *prev = rq->rq_next;
+ rq_list_add_tail(&matched_rqs, rq);
+ depth++;
+ } else {
+ /* leave rq in rqs */
+ prev = &rq->rq_next;
+ last = rq;
+ }
+ }
+
+ rqs->tail = last;
+ *queue_rqs = matched_rqs;
+ return depth;
+}
+
+static void blk_mq_dispatch_queue_requests(struct rq_list *rqs, unsigned depth)
+{
+ struct request_queue *q = rq_list_peek(rqs)->q;
+
+ trace_block_unplug(q, depth, true);
+
+ /*
+ * Peek first request and see if we have a ->queue_rqs() hook.
+ * If we do, we can dispatch the whole list in one go.
+ * We already know at this point that all requests belong to the
+ * same queue, caller must ensure that's the case.
+ */
+ if (q->mq_ops->queue_rqs) {
+ blk_mq_run_dispatch_ops(q, __blk_mq_flush_list(q, rqs));
+ if (rq_list_empty(rqs))
+ return;
+ }
+
+ blk_mq_run_dispatch_ops(q, blk_mq_issue_direct(rqs));
}
-static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
+static void blk_mq_dispatch_list(struct rq_list *rqs, bool from_sched)
{
struct blk_mq_hw_ctx *this_hctx = NULL;
struct blk_mq_ctx *this_ctx = NULL;
@@ -2835,7 +2886,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
LIST_HEAD(list);
do {
- struct request *rq = rq_list_pop(&plug->mq_list);
+ struct request *rq = rq_list_pop(rqs);
if (!this_hctx) {
this_hctx = rq->mq_hctx;
@@ -2848,9 +2899,9 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
}
list_add_tail(&rq->queuelist, &list);
depth++;
- } while (!rq_list_empty(&plug->mq_list));
+ } while (!rq_list_empty(rqs));
- plug->mq_list = requeue_list;
+ *rqs = requeue_list;
trace_block_unplug(this_hctx->queue, depth, !from_sched);
percpu_ref_get(&this_hctx->queue->q_usage_counter);
@@ -2870,9 +2921,21 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
percpu_ref_put(&this_hctx->queue->q_usage_counter);
}
+static void blk_mq_dispatch_multiple_queue_requests(struct rq_list *rqs)
+{
+ do {
+ struct rq_list queue_rqs;
+ unsigned depth;
+
+ depth = blk_mq_extract_queue_requests(rqs, &queue_rqs);
+ blk_mq_dispatch_queue_requests(&queue_rqs, depth);
+ while (!rq_list_empty(&queue_rqs))
+ blk_mq_dispatch_list(&queue_rqs, false);
+ } while (!rq_list_empty(rqs));
+}
+
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
- struct request *rq;
unsigned int depth;
/*
@@ -2887,34 +2950,19 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
depth = plug->rq_count;
plug->rq_count = 0;
- if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) {
- struct request_queue *q;
-
- rq = rq_list_peek(&plug->mq_list);
- q = rq->q;
- trace_block_unplug(q, depth, true);
-
- /*
- * Peek first request and see if we have a ->queue_rqs() hook.
- * If we do, we can dispatch the whole plug list in one go. We
- * already know at this point that all requests belong to the
- * same queue, caller must ensure that's the case.
- */
- if (q->mq_ops->queue_rqs) {
- blk_mq_run_dispatch_ops(q,
- __blk_mq_flush_plug_list(q, plug));
- if (rq_list_empty(&plug->mq_list))
- return;
+ if (!plug->has_elevator && !from_schedule) {
+ if (plug->multiple_queues) {
+ blk_mq_dispatch_multiple_queue_requests(&plug->mq_list);
+ return;
}
- blk_mq_run_dispatch_ops(q,
- blk_mq_plug_issue_direct(plug));
+ blk_mq_dispatch_queue_requests(&plug->mq_list, depth);
if (rq_list_empty(&plug->mq_list))
return;
}
do {
- blk_mq_dispatch_plug_list(plug, from_schedule);
+ blk_mq_dispatch_list(&plug->mq_list, from_schedule);
} while (!rq_list_empty(&plug->mq_list));
}
@@ -2969,8 +3017,14 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
{
struct blk_mq_alloc_data data = {
.q = q,
- .nr_tags = 1,
+ .flags = 0,
+ .shallow_depth = 0,
.cmd_flags = bio->bi_opf,
+ .rq_flags = 0,
+ .nr_tags = 1,
+ .cached_rqs = NULL,
+ .ctx = NULL,
+ .hctx = NULL
};
struct request *rq;
@@ -3080,8 +3134,6 @@ void blk_mq_submit_bio(struct bio *bio)
goto new_request;
}
- bio = blk_queue_bounce(bio, q);
-
/*
* The cached request already holds a q_usage_counter reference and we
* don't have to acquire a new one if we use it.
@@ -4094,8 +4146,6 @@ static void blk_mq_map_swqueue(struct request_queue *q)
struct blk_mq_ctx *ctx;
struct blk_mq_tag_set *set = q->tag_set;
- mutex_lock(&q->elevator_lock);
-
queue_for_each_hw_ctx(q, hctx, i) {
cpumask_clear(hctx->cpumask);
hctx->nr_ctx = 0;
@@ -4200,8 +4250,6 @@ static void blk_mq_map_swqueue(struct request_queue *q)
hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
}
-
- mutex_unlock(&q->elevator_lock);
}
/*
@@ -4505,16 +4553,9 @@ static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
}
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
- struct request_queue *q, bool lock)
+ struct request_queue *q)
{
- if (lock) {
- /* protect against switching io scheduler */
- mutex_lock(&q->elevator_lock);
- __blk_mq_realloc_hw_ctxs(set, q);
- mutex_unlock(&q->elevator_lock);
- } else {
- __blk_mq_realloc_hw_ctxs(set, q);
- }
+ __blk_mq_realloc_hw_ctxs(set, q);
/* unregister cpuhp callbacks for exited hctxs */
blk_mq_remove_hw_queues_cpuhp(q);
@@ -4546,7 +4587,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
xa_init(&q->hctx_table);
- blk_mq_realloc_hw_ctxs(set, q, false);
+ blk_mq_realloc_hw_ctxs(set, q);
if (!q->nr_hw_queues)
goto err_hctxs;
@@ -4563,8 +4604,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
q->nr_requests = set->queue_depth;
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
- blk_mq_add_queue_tag_set(set, q);
blk_mq_map_swqueue(q);
+ blk_mq_add_queue_tag_set(set, q);
return 0;
err_hctxs:
@@ -4784,6 +4825,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
goto out_free_srcu;
}
+ init_rwsem(&set->update_nr_hwq_lock);
+
ret = -ENOMEM;
set->tags = kcalloc_node(set->nr_hw_queues,
sizeof(struct blk_mq_tags *), GFP_KERNEL,
@@ -4923,88 +4966,10 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
return ret;
}
-/*
- * request_queue and elevator_type pair.
- * It is just used by __blk_mq_update_nr_hw_queues to cache
- * the elevator_type associated with a request_queue.
- */
-struct blk_mq_qe_pair {
- struct list_head node;
- struct request_queue *q;
- struct elevator_type *type;
-};
-
-/*
- * Cache the elevator_type in qe pair list and switch the
- * io scheduler to 'none'
- */
-static bool blk_mq_elv_switch_none(struct list_head *head,
- struct request_queue *q)
-{
- struct blk_mq_qe_pair *qe;
-
- qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
- if (!qe)
- return false;
-
- /* Accessing q->elevator needs protection from ->elevator_lock. */
- mutex_lock(&q->elevator_lock);
-
- if (!q->elevator) {
- kfree(qe);
- goto unlock;
- }
-
- INIT_LIST_HEAD(&qe->node);
- qe->q = q;
- qe->type = q->elevator->type;
- /* keep a reference to the elevator module as we'll switch back */
- __elevator_get(qe->type);
- list_add(&qe->node, head);
- elevator_disable(q);
-unlock:
- mutex_unlock(&q->elevator_lock);
-
- return true;
-}
-
-static struct blk_mq_qe_pair *blk_lookup_qe_pair(struct list_head *head,
- struct request_queue *q)
-{
- struct blk_mq_qe_pair *qe;
-
- list_for_each_entry(qe, head, node)
- if (qe->q == q)
- return qe;
-
- return NULL;
-}
-
-static void blk_mq_elv_switch_back(struct list_head *head,
- struct request_queue *q)
-{
- struct blk_mq_qe_pair *qe;
- struct elevator_type *t;
-
- qe = blk_lookup_qe_pair(head, q);
- if (!qe)
- return;
- t = qe->type;
- list_del(&qe->node);
- kfree(qe);
-
- mutex_lock(&q->elevator_lock);
- elevator_switch(q, t);
- /* drop the reference acquired in blk_mq_elv_switch_none */
- elevator_put(t);
- mutex_unlock(&q->elevator_lock);
-}
-
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
int nr_hw_queues)
{
struct request_queue *q;
- LIST_HEAD(head);
int prev_nr_hw_queues = set->nr_hw_queues;
unsigned int memflags;
int i;
@@ -5019,30 +4984,24 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
return;
memflags = memalloc_noio_save();
- list_for_each_entry(q, &set->tag_list, tag_set_list)
- blk_mq_freeze_queue_nomemsave(q);
-
- /*
- * Switch IO scheduler to 'none', cleaning up the data associated
- * with the previous scheduler. We will switch back once we are done
- * updating the new sw to hw queue mappings.
- */
- list_for_each_entry(q, &set->tag_list, tag_set_list)
- if (!blk_mq_elv_switch_none(&head, q))
- goto switch_back;
-
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_debugfs_unregister_hctxs(q);
blk_mq_sysfs_unregister_hctxs(q);
}
- if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
+ blk_mq_freeze_queue_nomemsave(q);
+
+ if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) {
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
+ blk_mq_unfreeze_queue_nomemrestore(q);
goto reregister;
+ }
fallback:
blk_mq_update_queue_map(set);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
- blk_mq_realloc_hw_ctxs(set, q, true);
+ __blk_mq_realloc_hw_ctxs(set, q);
if (q->nr_hw_queues != set->nr_hw_queues) {
int i = prev_nr_hw_queues;
@@ -5058,18 +5017,18 @@ fallback:
blk_mq_map_swqueue(q);
}
+ /* elv_update_nr_hw_queues() unfreeze queue for us */
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
+ elv_update_nr_hw_queues(q);
+
reregister:
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_sysfs_register_hctxs(q);
blk_mq_debugfs_register_hctxs(q);
- }
-
-switch_back:
- list_for_each_entry(q, &set->tag_list, tag_set_list)
- blk_mq_elv_switch_back(&head, q);
- list_for_each_entry(q, &set->tag_list, tag_set_list)
- blk_mq_unfreeze_queue_nomemrestore(q);
+ blk_mq_remove_hw_queues_cpuhp(q);
+ blk_mq_add_hw_queues_cpuhp(q);
+ }
memalloc_noio_restore(memflags);
/* Free the excess tags when nr_hw_queues shrink. */
@@ -5079,9 +5038,11 @@ switch_back:
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
{
+ down_write(&set->update_nr_hwq_lock);
mutex_lock(&set->tag_list_lock);
__blk_mq_update_nr_hw_queues(set, nr_hw_queues);
mutex_unlock(&set->tag_list_lock);
+ up_write(&set->update_nr_hwq_lock);
}
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 3011a78cf16a..affb2e14b56e 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -48,7 +48,7 @@ void blk_mq_exit_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *,
- unsigned int);
+ bool);
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *start);
@@ -246,10 +246,7 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
return hctx->nr_ctx && hctx->tags;
}
-unsigned int blk_mq_in_flight(struct request_queue *q,
- struct block_device *part);
-void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
- unsigned int inflight[2]);
+void blk_mq_in_driver_rw(struct block_device *part, unsigned int inflight[2]);
static inline void blk_mq_put_dispatch_budget(struct request_queue *q,
int budget_token)
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 95982bc46ba1..848591fb3c57 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -2,6 +2,8 @@
#include "blk-rq-qos.h"
+__read_mostly DEFINE_STATIC_KEY_FALSE(block_rq_qos);
+
/*
* Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
* false if 'v' + 1 would be bigger than 'below'.
@@ -317,6 +319,7 @@ void rq_qos_exit(struct request_queue *q)
struct rq_qos *rqos = q->rq_qos;
q->rq_qos = rqos->next;
rqos->ops->exit(rqos);
+ static_branch_dec(&block_rq_qos);
}
mutex_unlock(&q->rq_qos_mutex);
}
@@ -343,6 +346,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
goto ebusy;
rqos->next = q->rq_qos;
q->rq_qos = rqos;
+ static_branch_inc(&block_rq_qos);
blk_mq_unfreeze_queue(q, memflags);
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 37245c97ee61..39749f4066fb 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -12,6 +12,7 @@
#include "blk-mq-debugfs.h"
struct blk_mq_debugfs_attr;
+extern struct static_key_false block_rq_qos;
enum rq_qos_id {
RQ_QOS_WBT,
@@ -112,31 +113,33 @@ void __rq_qos_queue_depth_changed(struct rq_qos *rqos);
static inline void rq_qos_cleanup(struct request_queue *q, struct bio *bio)
{
- if (q->rq_qos)
+ if (static_branch_unlikely(&block_rq_qos) && q->rq_qos)
__rq_qos_cleanup(q->rq_qos, bio);
}
static inline void rq_qos_done(struct request_queue *q, struct request *rq)
{
- if (q->rq_qos && !blk_rq_is_passthrough(rq))
+ if (static_branch_unlikely(&block_rq_qos) && q->rq_qos &&
+ !blk_rq_is_passthrough(rq))
__rq_qos_done(q->rq_qos, rq);
}
static inline void rq_qos_issue(struct request_queue *q, struct request *rq)
{
- if (q->rq_qos)
+ if (static_branch_unlikely(&block_rq_qos) && q->rq_qos)
__rq_qos_issue(q->rq_qos, rq);
}
static inline void rq_qos_requeue(struct request_queue *q, struct request *rq)
{
- if (q->rq_qos)
+ if (static_branch_unlikely(&block_rq_qos) && q->rq_qos)
__rq_qos_requeue(q->rq_qos, rq);
}
static inline void rq_qos_done_bio(struct bio *bio)
{
- if (bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) ||
+ if (static_branch_unlikely(&block_rq_qos) &&
+ bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) ||
bio_flagged(bio, BIO_QOS_MERGED))) {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
if (q->rq_qos)
@@ -146,7 +149,7 @@ static inline void rq_qos_done_bio(struct bio *bio)
static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio)
{
- if (q->rq_qos) {
+ if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) {
bio_set_flag(bio, BIO_QOS_THROTTLED);
__rq_qos_throttle(q->rq_qos, bio);
}
@@ -155,14 +158,14 @@ static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio)
static inline void rq_qos_track(struct request_queue *q, struct request *rq,
struct bio *bio)
{
- if (q->rq_qos)
+ if (static_branch_unlikely(&block_rq_qos) && q->rq_qos)
__rq_qos_track(q->rq_qos, rq, bio);
}
static inline void rq_qos_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
- if (q->rq_qos) {
+ if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) {
bio_set_flag(bio, BIO_QOS_MERGED);
__rq_qos_merge(q->rq_qos, rq, bio);
}
@@ -170,7 +173,7 @@ static inline void rq_qos_merge(struct request_queue *q, struct request *rq,
static inline void rq_qos_queue_depth_changed(struct request_queue *q)
{
- if (q->rq_qos)
+ if (static_branch_unlikely(&block_rq_qos) && q->rq_qos)
__rq_qos_queue_depth_changed(q->rq_qos);
}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 4817e7ca03f8..a000daafbfb4 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -124,11 +124,6 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
return 0;
}
- if (lim->features & BLK_FEAT_BOUNCE_HIGH) {
- pr_warn("no bounce buffer support for integrity metadata\n");
- return -EINVAL;
- }
-
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
pr_warn("integrity support disabled.\n");
return -EINVAL;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 1f9b45b0b9ee..b2b9b89d6967 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -134,6 +134,8 @@ QUEUE_SYSFS_LIMIT_SHOW(max_segments)
QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments)
QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments)
QUEUE_SYSFS_LIMIT_SHOW(max_segment_size)
+QUEUE_SYSFS_LIMIT_SHOW(max_write_streams)
+QUEUE_SYSFS_LIMIT_SHOW(write_stream_granularity)
QUEUE_SYSFS_LIMIT_SHOW(logical_block_size)
QUEUE_SYSFS_LIMIT_SHOW(physical_block_size)
QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors)
@@ -488,6 +490,8 @@ QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments");
QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size");
+QUEUE_LIM_RO_ENTRY(queue_max_write_streams, "max_write_streams");
+QUEUE_LIM_RO_ENTRY(queue_write_stream_granularity, "write_stream_granularity");
QUEUE_RW_ENTRY(elv_iosched, "scheduler");
QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size");
@@ -560,7 +564,7 @@ static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
ssize_t ret;
struct request_queue *q = disk->queue;
- mutex_lock(&q->elevator_lock);
+ mutex_lock(&disk->rqos_state_mutex);
if (!wbt_rq_qos(q)) {
ret = -EINVAL;
goto out;
@@ -573,7 +577,7 @@ static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
out:
- mutex_unlock(&q->elevator_lock);
+ mutex_unlock(&disk->rqos_state_mutex);
return ret;
}
@@ -593,7 +597,6 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
return -EINVAL;
memflags = blk_mq_freeze_queue(q);
- mutex_lock(&q->elevator_lock);
rqos = wbt_rq_qos(q);
if (!rqos) {
@@ -618,11 +621,12 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
*/
blk_mq_quiesce_queue(q);
+ mutex_lock(&disk->rqos_state_mutex);
wbt_set_min_lat(q, val);
+ mutex_unlock(&disk->rqos_state_mutex);
blk_mq_unquiesce_queue(q);
out:
- mutex_unlock(&q->elevator_lock);
blk_mq_unfreeze_queue(q, memflags);
return ret;
@@ -642,6 +646,8 @@ static struct attribute *queue_attrs[] = {
&queue_max_discard_segments_entry.attr,
&queue_max_integrity_segments_entry.attr,
&queue_max_segment_size_entry.attr,
+ &queue_max_write_streams_entry.attr,
+ &queue_write_stream_granularity_entry.attr,
&queue_hw_sector_size_entry.attr,
&queue_logical_block_size_entry.attr,
&queue_physical_block_size_entry.attr,
@@ -869,16 +875,9 @@ int blk_register_queue(struct gendisk *disk)
if (ret)
goto out_unregister_ia_ranges;
- mutex_lock(&q->elevator_lock);
- if (q->elevator) {
- ret = elv_register_queue(q, false);
- if (ret) {
- mutex_unlock(&q->elevator_lock);
- goto out_crypto_sysfs_unregister;
- }
- }
+ if (queue_is_mq(q))
+ elevator_set_default(q);
wbt_enable_default(disk);
- mutex_unlock(&q->elevator_lock);
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
@@ -902,8 +901,6 @@ int blk_register_queue(struct gendisk *disk)
return ret;
-out_crypto_sysfs_unregister:
- blk_crypto_sysfs_unregister(disk);
out_unregister_ia_ranges:
disk_unregister_independent_access_ranges(disk);
out_debugfs_remove:
@@ -951,10 +948,6 @@ void blk_unregister_queue(struct gendisk *disk)
blk_mq_sysfs_unregister(disk);
blk_crypto_sysfs_unregister(disk);
- mutex_lock(&q->elevator_lock);
- elv_unregister_queue(q);
- mutex_unlock(&q->elevator_lock);
-
mutex_lock(&q->sysfs_lock);
disk_unregister_independent_access_ranges(disk);
mutex_unlock(&q->sysfs_lock);
@@ -963,5 +956,8 @@ void blk_unregister_queue(struct gendisk *disk)
kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE);
kobject_del(&disk->queue_kobj);
+ if (queue_is_mq(q))
+ elevator_set_none(q);
+
blk_debugfs_remove(disk);
}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index d6dd2e047874..bd15357f23bd 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -143,7 +143,8 @@ static inline unsigned int throtl_bio_data_size(struct bio *bio)
static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
{
INIT_LIST_HEAD(&qn->node);
- bio_list_init(&qn->bios);
+ bio_list_init(&qn->bios_bps);
+ bio_list_init(&qn->bios_iops);
qn->tg = tg;
}
@@ -151,18 +152,32 @@ static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
* throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it
* @bio: bio being added
* @qn: qnode to add bio to
- * @queued: the service_queue->queued[] list @qn belongs to
+ * @sq: the service_queue @qn belongs to
*
- * Add @bio to @qn and put @qn on @queued if it's not already on.
+ * Add @bio to @qn and put @qn on @sq->queued if it's not already on.
* @qn->tg's reference count is bumped when @qn is activated. See the
* comment on top of throtl_qnode definition for details.
*/
static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
- struct list_head *queued)
+ struct throtl_service_queue *sq)
{
- bio_list_add(&qn->bios, bio);
+ bool rw = bio_data_dir(bio);
+
+ /*
+ * Split bios have already been throttled by bps, so they are
+ * directly queued into the iops path.
+ */
+ if (bio_flagged(bio, BIO_TG_BPS_THROTTLED) ||
+ bio_flagged(bio, BIO_BPS_THROTTLED)) {
+ bio_list_add(&qn->bios_iops, bio);
+ sq->nr_queued_iops[rw]++;
+ } else {
+ bio_list_add(&qn->bios_bps, bio);
+ sq->nr_queued_bps[rw]++;
+ }
+
if (list_empty(&qn->node)) {
- list_add_tail(&qn->node, queued);
+ list_add_tail(&qn->node, &sq->queued[rw]);
blkg_get(tg_to_blkg(qn->tg));
}
}
@@ -170,6 +185,10 @@ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
/**
* throtl_peek_queued - peek the first bio on a qnode list
* @queued: the qnode list to peek
+ *
+ * Always take a bio from the head of the iops queue first. If the queue is
+ * empty, we then take it from the bps queue to maintain the overall idea of
+ * fetching bios from the head.
*/
static struct bio *throtl_peek_queued(struct list_head *queued)
{
@@ -180,28 +199,33 @@ static struct bio *throtl_peek_queued(struct list_head *queued)
return NULL;
qn = list_first_entry(queued, struct throtl_qnode, node);
- bio = bio_list_peek(&qn->bios);
+ bio = bio_list_peek(&qn->bios_iops);
+ if (!bio)
+ bio = bio_list_peek(&qn->bios_bps);
WARN_ON_ONCE(!bio);
return bio;
}
/**
* throtl_pop_queued - pop the first bio form a qnode list
- * @queued: the qnode list to pop a bio from
+ * @sq: the service_queue to pop a bio from
* @tg_to_put: optional out argument for throtl_grp to put
+ * @rw: read/write
*
- * Pop the first bio from the qnode list @queued. After popping, the first
- * qnode is removed from @queued if empty or moved to the end of @queued so
- * that the popping order is round-robin.
+ * Pop the first bio from the qnode list @sq->queued. Note that we firstly
+ * focus on the iops list because bios are ultimately dispatched from it.
+ * After popping, the first qnode is removed from @sq->queued if empty or moved
+ * to the end of @sq->queued so that the popping order is round-robin.
*
* When the first qnode is removed, its associated throtl_grp should be put
* too. If @tg_to_put is NULL, this function automatically puts it;
* otherwise, *@tg_to_put is set to the throtl_grp to put and the caller is
* responsible for putting it.
*/
-static struct bio *throtl_pop_queued(struct list_head *queued,
- struct throtl_grp **tg_to_put)
+static struct bio *throtl_pop_queued(struct throtl_service_queue *sq,
+ struct throtl_grp **tg_to_put, bool rw)
{
+ struct list_head *queued = &sq->queued[rw];
struct throtl_qnode *qn;
struct bio *bio;
@@ -209,10 +233,17 @@ static struct bio *throtl_pop_queued(struct list_head *queued,
return NULL;
qn = list_first_entry(queued, struct throtl_qnode, node);
- bio = bio_list_pop(&qn->bios);
+ bio = bio_list_pop(&qn->bios_iops);
+ if (bio) {
+ sq->nr_queued_iops[rw]--;
+ } else {
+ bio = bio_list_pop(&qn->bios_bps);
+ if (bio)
+ sq->nr_queued_bps[rw]--;
+ }
WARN_ON_ONCE(!bio);
- if (bio_list_empty(&qn->bios)) {
+ if (bio_list_empty(&qn->bios_bps) && bio_list_empty(&qn->bios_iops)) {
list_del_init(&qn->node);
if (tg_to_put)
*tg_to_put = qn->tg;
@@ -520,6 +551,9 @@ static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
unsigned long jiffy_end)
{
+ if (!time_before(tg->slice_end[rw], jiffy_end))
+ return;
+
throtl_set_slice_end(tg, rw, jiffy_end);
throtl_log(&tg->service_queue,
"[%c] extend slice start=%lu end=%lu jiffies=%lu",
@@ -536,6 +570,11 @@ static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
return true;
}
+static unsigned int sq_queued(struct throtl_service_queue *sq, int type)
+{
+ return sq->nr_queued_bps[type] + sq->nr_queued_iops[type];
+}
+
static unsigned int calculate_io_allowed(u32 iops_limit,
unsigned long jiffy_elapsed)
{
@@ -571,6 +610,48 @@ static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
}
+static long long throtl_trim_bps(struct throtl_grp *tg, bool rw,
+ unsigned long time_elapsed)
+{
+ u64 bps_limit = tg_bps_limit(tg, rw);
+ long long bytes_trim;
+
+ if (bps_limit == U64_MAX)
+ return 0;
+
+ /* Need to consider the case of bytes_allowed overflow. */
+ bytes_trim = calculate_bytes_allowed(bps_limit, time_elapsed);
+ if (bytes_trim <= 0 || tg->bytes_disp[rw] < bytes_trim) {
+ bytes_trim = tg->bytes_disp[rw];
+ tg->bytes_disp[rw] = 0;
+ } else {
+ tg->bytes_disp[rw] -= bytes_trim;
+ }
+
+ return bytes_trim;
+}
+
+static int throtl_trim_iops(struct throtl_grp *tg, bool rw,
+ unsigned long time_elapsed)
+{
+ u32 iops_limit = tg_iops_limit(tg, rw);
+ int io_trim;
+
+ if (iops_limit == UINT_MAX)
+ return 0;
+
+ /* Need to consider the case of io_allowed overflow. */
+ io_trim = calculate_io_allowed(iops_limit, time_elapsed);
+ if (io_trim <= 0 || tg->io_disp[rw] < io_trim) {
+ io_trim = tg->io_disp[rw];
+ tg->io_disp[rw] = 0;
+ } else {
+ tg->io_disp[rw] -= io_trim;
+ }
+
+ return io_trim;
+}
+
/* Trim the used slices and adjust slice start accordingly */
static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
{
@@ -612,22 +693,11 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
* one extra slice is preserved for deviation.
*/
time_elapsed -= tg->td->throtl_slice;
- bytes_trim = calculate_bytes_allowed(tg_bps_limit(tg, rw),
- time_elapsed);
- io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed);
- if (bytes_trim <= 0 && io_trim <= 0)
+ bytes_trim = throtl_trim_bps(tg, rw, time_elapsed);
+ io_trim = throtl_trim_iops(tg, rw, time_elapsed);
+ if (!bytes_trim && !io_trim)
return;
- if ((long long)tg->bytes_disp[rw] >= bytes_trim)
- tg->bytes_disp[rw] -= bytes_trim;
- else
- tg->bytes_disp[rw] = 0;
-
- if ((int)tg->io_disp[rw] >= io_trim)
- tg->io_disp[rw] -= io_trim;
- else
- tg->io_disp[rw] = 0;
-
tg->slice_start[rw] += time_elapsed;
throtl_log(&tg->service_queue,
@@ -643,21 +713,41 @@ static void __tg_update_carryover(struct throtl_grp *tg, bool rw,
unsigned long jiffy_elapsed = jiffies - tg->slice_start[rw];
u64 bps_limit = tg_bps_limit(tg, rw);
u32 iops_limit = tg_iops_limit(tg, rw);
+ long long bytes_allowed;
+ int io_allowed;
+
+ /*
+ * If the queue is empty, carryover handling is not needed. In such cases,
+ * tg->[bytes/io]_disp should be reset to 0 to avoid impacting the dispatch
+ * of subsequent bios. The same handling applies when the previous BPS/IOPS
+ * limit was set to max.
+ */
+ if (sq_queued(&tg->service_queue, rw) == 0) {
+ tg->bytes_disp[rw] = 0;
+ tg->io_disp[rw] = 0;
+ return;
+ }
/*
* If config is updated while bios are still throttled, calculate and
- * accumulate how many bytes/ios are waited across changes. And
- * carryover_bytes/ios will be used to calculate new wait time under new
- * configuration.
+ * accumulate how many bytes/ios are waited across changes. And use the
+ * calculated carryover (@bytes/@ios) to update [bytes/io]_disp, which
+ * will be used to calculate new wait time under new configuration.
+ * And we need to consider the case of bytes/io_allowed overflow.
*/
- if (bps_limit != U64_MAX)
- *bytes = calculate_bytes_allowed(bps_limit, jiffy_elapsed) -
- tg->bytes_disp[rw];
- if (iops_limit != UINT_MAX)
- *ios = calculate_io_allowed(iops_limit, jiffy_elapsed) -
- tg->io_disp[rw];
- tg->bytes_disp[rw] -= *bytes;
- tg->io_disp[rw] -= *ios;
+ if (bps_limit != U64_MAX) {
+ bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed);
+ if (bytes_allowed > 0)
+ *bytes = bytes_allowed - tg->bytes_disp[rw];
+ }
+ if (iops_limit != UINT_MAX) {
+ io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed);
+ if (io_allowed > 0)
+ *ios = io_allowed - tg->io_disp[rw];
+ }
+
+ tg->bytes_disp[rw] = -*bytes;
+ tg->io_disp[rw] = -*ios;
}
static void tg_update_carryover(struct throtl_grp *tg)
@@ -665,12 +755,10 @@ static void tg_update_carryover(struct throtl_grp *tg)
long long bytes[2] = {0};
int ios[2] = {0};
- if (tg->service_queue.nr_queued[READ])
- __tg_update_carryover(tg, READ, &bytes[READ], &ios[READ]);
- if (tg->service_queue.nr_queued[WRITE])
- __tg_update_carryover(tg, WRITE, &bytes[WRITE], &ios[WRITE]);
+ __tg_update_carryover(tg, READ, &bytes[READ], &ios[READ]);
+ __tg_update_carryover(tg, WRITE, &bytes[WRITE], &ios[WRITE]);
- /* see comments in struct throtl_grp for meaning of these fields. */
+ /* see comments in struct throtl_grp for meaning of carryover. */
throtl_log(&tg->service_queue, "%s: %lld %lld %d %d\n", __func__,
bytes[READ], bytes[WRITE], ios[READ], ios[WRITE]);
}
@@ -682,10 +770,6 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
int io_allowed;
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
- if (iops_limit == UINT_MAX) {
- return 0;
- }
-
jiffy_elapsed = jiffies - tg->slice_start[rw];
/* Round up to the next throttle slice, wait time must be nonzero */
@@ -711,11 +795,6 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
unsigned int bio_size = throtl_bio_data_size(bio);
- /* no need to throttle if this bio's bytes have been accounted */
- if (bps_limit == U64_MAX || bio_flagged(bio, BIO_BPS_THROTTLED)) {
- return 0;
- }
-
jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
/* Slice has just started. Consider one slice interval */
@@ -724,7 +803,9 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd);
- if (bytes_allowed > 0 && tg->bytes_disp[rw] + bio_size <= bytes_allowed)
+ /* Need to consider the case of bytes_allowed overflow. */
+ if ((bytes_allowed > 0 && tg->bytes_disp[rw] + bio_size <= bytes_allowed)
+ || bytes_allowed < 0)
return 0;
/* Calc approx time to dispatch */
@@ -742,17 +823,82 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
return jiffy_wait;
}
+static void throtl_charge_bps_bio(struct throtl_grp *tg, struct bio *bio)
+{
+ unsigned int bio_size = throtl_bio_data_size(bio);
+
+ /* Charge the bio to the group */
+ if (!bio_flagged(bio, BIO_BPS_THROTTLED) &&
+ !bio_flagged(bio, BIO_TG_BPS_THROTTLED)) {
+ bio_set_flag(bio, BIO_TG_BPS_THROTTLED);
+ tg->bytes_disp[bio_data_dir(bio)] += bio_size;
+ }
+}
+
+static void throtl_charge_iops_bio(struct throtl_grp *tg, struct bio *bio)
+{
+ bio_clear_flag(bio, BIO_TG_BPS_THROTTLED);
+ tg->io_disp[bio_data_dir(bio)]++;
+}
+
/*
- * Returns whether one can dispatch a bio or not. Also returns approx number
- * of jiffies to wait before this bio is with-in IO rate and can be dispatched
+ * If previous slice expired, start a new one otherwise renew/extend existing
+ * slice to make sure it is at least throtl_slice interval long since now. New
+ * slice is started only for empty throttle group. If there is queued bio, that
+ * means there should be an active slice and it should be extended instead.
*/
-static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
- unsigned long *wait)
+static void tg_update_slice(struct throtl_grp *tg, bool rw)
+{
+ if (throtl_slice_used(tg, rw) &&
+ sq_queued(&tg->service_queue, rw) == 0)
+ throtl_start_new_slice(tg, rw, true);
+ else
+ throtl_extend_slice(tg, rw, jiffies + tg->td->throtl_slice);
+}
+
+static unsigned long tg_dispatch_bps_time(struct throtl_grp *tg, struct bio *bio)
{
bool rw = bio_data_dir(bio);
- unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0;
u64 bps_limit = tg_bps_limit(tg, rw);
+ unsigned long bps_wait;
+
+ /* no need to throttle if this bio's bytes have been accounted */
+ if (bps_limit == U64_MAX || tg->flags & THROTL_TG_CANCELING ||
+ bio_flagged(bio, BIO_BPS_THROTTLED) ||
+ bio_flagged(bio, BIO_TG_BPS_THROTTLED))
+ return 0;
+
+ tg_update_slice(tg, rw);
+ bps_wait = tg_within_bps_limit(tg, bio, bps_limit);
+ throtl_extend_slice(tg, rw, jiffies + bps_wait);
+
+ return bps_wait;
+}
+
+static unsigned long tg_dispatch_iops_time(struct throtl_grp *tg, struct bio *bio)
+{
+ bool rw = bio_data_dir(bio);
u32 iops_limit = tg_iops_limit(tg, rw);
+ unsigned long iops_wait;
+
+ if (iops_limit == UINT_MAX || tg->flags & THROTL_TG_CANCELING)
+ return 0;
+
+ tg_update_slice(tg, rw);
+ iops_wait = tg_within_iops_limit(tg, bio, iops_limit);
+ throtl_extend_slice(tg, rw, jiffies + iops_wait);
+
+ return iops_wait;
+}
+
+/*
+ * Returns approx number of jiffies to wait before this bio is with-in IO rate
+ * and can be moved to other queue or dispatched.
+ */
+static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio)
+{
+ bool rw = bio_data_dir(bio);
+ unsigned long wait;
/*
* Currently whole state machine of group depends on first bio
@@ -760,62 +906,20 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
* this function with a different bio if there are other bios
* queued.
*/
- BUG_ON(tg->service_queue.nr_queued[rw] &&
+ BUG_ON(sq_queued(&tg->service_queue, rw) &&
bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
- /* If tg->bps = -1, then BW is unlimited */
- if ((bps_limit == U64_MAX && iops_limit == UINT_MAX) ||
- tg->flags & THROTL_TG_CANCELING) {
- if (wait)
- *wait = 0;
- return true;
- }
+ wait = tg_dispatch_bps_time(tg, bio);
+ if (wait != 0)
+ return wait;
/*
- * If previous slice expired, start a new one otherwise renew/extend
- * existing slice to make sure it is at least throtl_slice interval
- * long since now. New slice is started only for empty throttle group.
- * If there is queued bio, that means there should be an active
- * slice and it should be extended instead.
+ * Charge bps here because @bio will be directly placed into the
+ * iops queue afterward.
*/
- if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
- throtl_start_new_slice(tg, rw, true);
- else {
- if (time_before(tg->slice_end[rw],
- jiffies + tg->td->throtl_slice))
- throtl_extend_slice(tg, rw,
- jiffies + tg->td->throtl_slice);
- }
-
- bps_wait = tg_within_bps_limit(tg, bio, bps_limit);
- iops_wait = tg_within_iops_limit(tg, bio, iops_limit);
- if (bps_wait + iops_wait == 0) {
- if (wait)
- *wait = 0;
- return true;
- }
-
- max_wait = max(bps_wait, iops_wait);
-
- if (wait)
- *wait = max_wait;
-
- if (time_before(tg->slice_end[rw], jiffies + max_wait))
- throtl_extend_slice(tg, rw, jiffies + max_wait);
-
- return false;
-}
-
-static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
-{
- bool rw = bio_data_dir(bio);
- unsigned int bio_size = throtl_bio_data_size(bio);
+ throtl_charge_bps_bio(tg, bio);
- /* Charge the bio to the group */
- if (!bio_flagged(bio, BIO_BPS_THROTTLED))
- tg->bytes_disp[rw] += bio_size;
-
- tg->io_disp[rw]++;
+ return tg_dispatch_iops_time(tg, bio);
}
/**
@@ -842,28 +946,36 @@ static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
* dispatched. Mark that @tg was empty. This is automatically
* cleared on the next tg_update_disptime().
*/
- if (!sq->nr_queued[rw])
+ if (sq_queued(sq, rw) == 0)
tg->flags |= THROTL_TG_WAS_EMPTY;
- throtl_qnode_add_bio(bio, qn, &sq->queued[rw]);
+ throtl_qnode_add_bio(bio, qn, sq);
+
+ /*
+ * Since we have split the queues, when the iops queue is
+ * previously empty and a new @bio is added into the first @qn,
+ * we also need to update the @tg->disptime.
+ */
+ if (bio_flagged(bio, BIO_BPS_THROTTLED) &&
+ bio == throtl_peek_queued(&sq->queued[rw]))
+ tg->flags |= THROTL_TG_IOPS_WAS_EMPTY;
- sq->nr_queued[rw]++;
throtl_enqueue_tg(tg);
}
static void tg_update_disptime(struct throtl_grp *tg)
{
struct throtl_service_queue *sq = &tg->service_queue;
- unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
+ unsigned long read_wait = -1, write_wait = -1, min_wait, disptime;
struct bio *bio;
bio = throtl_peek_queued(&sq->queued[READ]);
if (bio)
- tg_may_dispatch(tg, bio, &read_wait);
+ read_wait = tg_dispatch_time(tg, bio);
bio = throtl_peek_queued(&sq->queued[WRITE]);
if (bio)
- tg_may_dispatch(tg, bio, &write_wait);
+ write_wait = tg_dispatch_time(tg, bio);
min_wait = min(read_wait, write_wait);
disptime = jiffies + min_wait;
@@ -875,6 +987,7 @@ static void tg_update_disptime(struct throtl_grp *tg)
/* see throtl_add_bio_tg() */
tg->flags &= ~THROTL_TG_WAS_EMPTY;
+ tg->flags &= ~THROTL_TG_IOPS_WAS_EMPTY;
}
static void start_parent_slice_with_credit(struct throtl_grp *child_tg,
@@ -901,10 +1014,9 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
* getting released prematurely. Remember the tg to put and put it
* after @bio is transferred to @parent_sq.
*/
- bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);
- sq->nr_queued[rw]--;
+ bio = throtl_pop_queued(sq, &tg_to_put, rw);
- throtl_charge_bio(tg, bio);
+ throtl_charge_iops_bio(tg, bio);
/*
* If our parent is another tg, we just need to transfer @bio to
@@ -919,7 +1031,7 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
} else {
bio_set_flag(bio, BIO_BPS_THROTTLED);
throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw],
- &parent_sq->queued[rw]);
+ parent_sq);
BUG_ON(tg->td->nr_queued[rw] <= 0);
tg->td->nr_queued[rw]--;
}
@@ -941,7 +1053,7 @@ static int throtl_dispatch_tg(struct throtl_grp *tg)
/* Try to dispatch 75% READS and 25% WRITES */
while ((bio = throtl_peek_queued(&sq->queued[READ])) &&
- tg_may_dispatch(tg, bio, NULL)) {
+ tg_dispatch_time(tg, bio) == 0) {
tg_dispatch_one_bio(tg, READ);
nr_reads++;
@@ -951,7 +1063,7 @@ static int throtl_dispatch_tg(struct throtl_grp *tg)
}
while ((bio = throtl_peek_queued(&sq->queued[WRITE])) &&
- tg_may_dispatch(tg, bio, NULL)) {
+ tg_dispatch_time(tg, bio) == 0) {
tg_dispatch_one_bio(tg, WRITE);
nr_writes++;
@@ -984,7 +1096,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
nr_disp += throtl_dispatch_tg(tg);
sq = &tg->service_queue;
- if (sq->nr_queued[READ] || sq->nr_queued[WRITE])
+ if (sq_queued(sq, READ) || sq_queued(sq, WRITE))
tg_update_disptime(tg);
else
throtl_dequeue_tg(tg);
@@ -1037,9 +1149,11 @@ again:
dispatched = false;
while (true) {
+ unsigned int __maybe_unused bio_cnt_r = sq_queued(sq, READ);
+ unsigned int __maybe_unused bio_cnt_w = sq_queued(sq, WRITE);
+
throtl_log(sq, "dispatch nr_queued=%u read=%u write=%u",
- sq->nr_queued[READ] + sq->nr_queued[WRITE],
- sq->nr_queued[READ], sq->nr_queued[WRITE]);
+ bio_cnt_r + bio_cnt_w, bio_cnt_r, bio_cnt_w);
ret = throtl_select_dispatch(sq);
if (ret) {
@@ -1061,7 +1175,8 @@ again:
if (parent_sq) {
/* @parent_sq is another throl_grp, propagate dispatch */
- if (tg->flags & THROTL_TG_WAS_EMPTY) {
+ if (tg->flags & THROTL_TG_WAS_EMPTY ||
+ tg->flags & THROTL_TG_IOPS_WAS_EMPTY) {
tg_update_disptime(tg);
if (!throtl_schedule_next_dispatch(parent_sq, false)) {
/* window is already open, repeat dispatching */
@@ -1101,7 +1216,7 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work)
spin_lock_irq(&q->queue_lock);
for (rw = READ; rw <= WRITE; rw++)
- while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL)))
+ while ((bio = throtl_pop_queued(td_sq, NULL, rw)))
bio_list_add(&bio_list_on_stack, bio);
spin_unlock_irq(&q->queue_lock);
@@ -1606,11 +1721,30 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
static bool tg_within_limit(struct throtl_grp *tg, struct bio *bio, bool rw)
{
- /* throtl is FIFO - if bios are already queued, should queue */
- if (tg->service_queue.nr_queued[rw])
+ struct throtl_service_queue *sq = &tg->service_queue;
+
+ /*
+ * For a split bio, we need to specifically distinguish whether the
+ * iops queue is empty.
+ */
+ if (bio_flagged(bio, BIO_BPS_THROTTLED))
+ return sq->nr_queued_iops[rw] == 0 &&
+ tg_dispatch_iops_time(tg, bio) == 0;
+
+ /*
+ * Throtl is FIFO - if bios are already queued, should queue.
+ * If the bps queue is empty and @bio is within the bps limit, charge
+ * bps here for direct placement into the iops queue.
+ */
+ if (sq_queued(&tg->service_queue, rw)) {
+ if (sq->nr_queued_bps[rw] == 0 &&
+ tg_dispatch_bps_time(tg, bio) == 0)
+ throtl_charge_bps_bio(tg, bio);
+
return false;
+ }
- return tg_may_dispatch(tg, bio, NULL);
+ return tg_dispatch_time(tg, bio) == 0;
}
bool __blk_throtl_bio(struct bio *bio)
@@ -1631,7 +1765,7 @@ bool __blk_throtl_bio(struct bio *bio)
while (true) {
if (tg_within_limit(tg, bio, rw)) {
/* within limits, let's charge and dispatch directly */
- throtl_charge_bio(tg, bio);
+ throtl_charge_iops_bio(tg, bio);
/*
* We need to trim slice even when bios are not being
@@ -1654,7 +1788,8 @@ bool __blk_throtl_bio(struct bio *bio)
* control algorithm is adaptive, and extra IO bytes
* will be throttled for paying the debt
*/
- throtl_charge_bio(tg, bio);
+ throtl_charge_bps_bio(tg, bio);
+ throtl_charge_iops_bio(tg, bio);
} else {
/* if above limits, break to queue */
break;
@@ -1680,7 +1815,7 @@ bool __blk_throtl_bio(struct bio *bio)
tg->bytes_disp[rw], bio->bi_iter.bi_size,
tg_bps_limit(tg, rw),
tg->io_disp[rw], tg_iops_limit(tg, rw),
- sq->nr_queued[READ], sq->nr_queued[WRITE]);
+ sq_queued(sq, READ), sq_queued(sq, WRITE));
td->nr_queued[rw]++;
throtl_add_bio_tg(bio, qn, tg);
@@ -1688,11 +1823,13 @@ bool __blk_throtl_bio(struct bio *bio)
/*
* Update @tg's dispatch time and force schedule dispatch if @tg
- * was empty before @bio. The forced scheduling isn't likely to
- * cause undue delay as @bio is likely to be dispatched directly if
- * its @tg's disptime is not in the future.
+ * was empty before @bio, or the iops queue is empty and @bio will
+ * add to. The forced scheduling isn't likely to cause undue
+ * delay as @bio is likely to be dispatched directly if its @tg's
+ * disptime is not in the future.
*/
- if (tg->flags & THROTL_TG_WAS_EMPTY) {
+ if (tg->flags & THROTL_TG_WAS_EMPTY ||
+ tg->flags & THROTL_TG_IOPS_WAS_EMPTY) {
tg_update_disptime(tg);
throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true);
}
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index f9f8666891ab..3b27755bfbff 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -29,7 +29,8 @@
*/
struct throtl_qnode {
struct list_head node; /* service_queue->queued[] */
- struct bio_list bios; /* queued bios */
+ struct bio_list bios_bps; /* queued bios for bps limit */
+ struct bio_list bios_iops; /* queued bios for iops limit */
struct throtl_grp *tg; /* tg this qnode belongs to */
};
@@ -41,7 +42,8 @@ struct throtl_service_queue {
* children throtl_grp's.
*/
struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */
- unsigned int nr_queued[2]; /* number of queued bios */
+ unsigned int nr_queued_bps[2]; /* number of queued bps bios */
+ unsigned int nr_queued_iops[2]; /* number of queued iops bios */
/*
* RB tree of active children throtl_grp's, which are sorted by
@@ -54,9 +56,14 @@ struct throtl_service_queue {
};
enum tg_state_flags {
- THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */
- THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */
- THROTL_TG_CANCELING = 1 << 2, /* starts to cancel bio */
+ THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */
+ THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */
+ /*
+ * The sq's iops queue is empty, and a bio is about to be enqueued
+ * to the first qnode's bios_iops list.
+ */
+ THROTL_TG_IOPS_WAS_EMPTY = 1 << 2,
+ THROTL_TG_CANCELING = 1 << 3, /* starts to cancel bio */
};
struct throtl_grp {
@@ -102,19 +109,16 @@ struct throtl_grp {
/* IOPS limits */
unsigned int iops[2];
- /* Number of bytes dispatched in current slice */
- int64_t bytes_disp[2];
- /* Number of bio's dispatched in current slice */
- int io_disp[2];
-
/*
- * The following two fields are updated when new configuration is
- * submitted while some bios are still throttled, they record how many
- * bytes/ios are waited already in previous configuration, and they will
- * be used to calculate wait time under new configuration.
+ * Number of bytes/bio's dispatched in current slice.
+ * When new configuration is submitted while some bios are still throttled,
+ * first calculate the carryover: the amount of bytes/IOs already waited
+ * under the previous configuration. Then, [bytes/io]_disp are represented
+ * as the negative of the carryover, and they will be used to calculate the
+ * wait time under the new configuration.
*/
- long long carryover_bytes[2];
- int carryover_ios[2];
+ int64_t bytes_disp[2];
+ int io_disp[2];
unsigned long last_check_time;
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index f1754d07f7e0..a50d4cd55f41 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -37,7 +37,7 @@
enum wbt_flags {
WBT_TRACKED = 1, /* write, tracked for throttling */
WBT_READ = 2, /* read */
- WBT_SWAP = 4, /* write, from swap_writepage() */
+ WBT_SWAP = 4, /* write, from swap_writeout() */
WBT_DISCARD = 8, /* discard */
WBT_NR_BITS = 4, /* number of bits */
@@ -704,8 +704,9 @@ void wbt_enable_default(struct gendisk *disk)
struct rq_qos *rqos;
bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ);
- if (q->elevator &&
- test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags))
+ mutex_lock(&disk->rqos_state_mutex);
+
+ if (blk_queue_disable_wbt(q))
enable = false;
/* Throttling already enabled? */
@@ -713,8 +714,10 @@ void wbt_enable_default(struct gendisk *disk)
if (rqos) {
if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
+ mutex_unlock(&disk->rqos_state_mutex);
return;
}
+ mutex_unlock(&disk->rqos_state_mutex);
/* Queue not registered? Maybe shutting down... */
if (!blk_queue_registered(q))
@@ -774,11 +777,13 @@ void wbt_disable_default(struct gendisk *disk)
struct rq_wb *rwb;
if (!rqos)
return;
+ mutex_lock(&disk->rqos_state_mutex);
rwb = RQWB(rqos);
if (rwb->enable_state == WBT_STATE_ON_DEFAULT) {
blk_stat_deactivate(rwb->cb);
rwb->enable_state = WBT_STATE_OFF_DEFAULT;
}
+ mutex_unlock(&disk->rqos_state_mutex);
}
EXPORT_SYMBOL_GPL(wbt_disable_default);
diff --git a/block/blk.h b/block/blk.h
index 594eeba7b949..37ec459fe656 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -103,8 +103,7 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs);
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
- struct page *page, unsigned len, unsigned offset,
- bool *same_page);
+ struct page *page, unsigned len, unsigned offset);
static inline bool biovec_phys_mergeable(struct request_queue *q,
struct bio_vec *vec1, struct bio_vec *vec2)
@@ -322,11 +321,9 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
bool blk_insert_flush(struct request *rq);
-int elevator_switch(struct request_queue *q, struct elevator_type *new_e);
-void elevator_disable(struct request_queue *q);
-void elevator_exit(struct request_queue *q);
-int elv_register_queue(struct request_queue *q, bool uevent);
-void elv_unregister_queue(struct request_queue *q);
+void elv_update_nr_hw_queues(struct request_queue *q);
+void elevator_set_default(struct request_queue *q);
+void elevator_set_none(struct request_queue *q);
ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
char *buf);
@@ -407,6 +404,27 @@ static inline struct bio *__bio_split_to_limits(struct bio *bio,
}
}
+/**
+ * get_max_segment_size() - maximum number of bytes to add as a single segment
+ * @lim: Request queue limits.
+ * @paddr: address of the range to add
+ * @len: maximum length available to add at @paddr
+ *
+ * Returns the maximum number of bytes of the range starting at @paddr that can
+ * be added to a single segment.
+ */
+static inline unsigned get_max_segment_size(const struct queue_limits *lim,
+ phys_addr_t paddr, unsigned int len)
+{
+ /*
+ * Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1
+ * after having calculated the minimum.
+ */
+ return min_t(unsigned long, len,
+ min(lim->seg_boundary_mask - (lim->seg_boundary_mask & paddr),
+ (unsigned long)lim->max_segment_size - 1) + 1);
+}
+
int ll_back_merge_fn(struct request *req, struct bio *bio,
unsigned int nr_segs);
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
@@ -421,7 +439,6 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi,
int blk_dev_init(void);
void update_io_ticks(struct block_device *part, unsigned long now, bool end);
-unsigned int part_in_flight(struct block_device *part);
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
{
@@ -443,23 +460,6 @@ static inline void ioc_clear_queue(struct request_queue *q)
}
#endif /* CONFIG_BLK_ICQ */
-struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q);
-
-static inline bool blk_queue_may_bounce(struct request_queue *q)
-{
- return IS_ENABLED(CONFIG_BOUNCE) &&
- (q->limits.features & BLK_FEAT_BOUNCE_HIGH) &&
- max_low_pfn >= max_pfn;
-}
-
-static inline struct bio *blk_queue_bounce(struct bio *bio,
- struct request_queue *q)
-{
- if (unlikely(blk_queue_may_bounce(q) && bio_has_data(bio)))
- return __blk_queue_bounce(bio, q);
- return bio;
-}
-
#ifdef CONFIG_BLK_DEV_ZONED
void disk_init_zone_resources(struct gendisk *disk);
void disk_free_zone_resources(struct gendisk *disk);
diff --git a/block/bounce.c b/block/bounce.c
deleted file mode 100644
index 09a9616cf209..000000000000
--- a/block/bounce.c
+++ /dev/null
@@ -1,267 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* bounce buffer handling for block devices
- *
- * - Split from highmem.c
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/mm.h>
-#include <linux/export.h>
-#include <linux/swap.h>
-#include <linux/gfp.h>
-#include <linux/bio-integrity.h>
-#include <linux/pagemap.h>
-#include <linux/mempool.h>
-#include <linux/blkdev.h>
-#include <linux/backing-dev.h>
-#include <linux/init.h>
-#include <linux/hash.h>
-#include <linux/highmem.h>
-#include <linux/printk.h>
-#include <asm/tlbflush.h>
-
-#include <trace/events/block.h>
-#include "blk.h"
-#include "blk-cgroup.h"
-
-#define POOL_SIZE 64
-#define ISA_POOL_SIZE 16
-
-static struct bio_set bounce_bio_set, bounce_bio_split;
-static mempool_t page_pool;
-
-static void init_bounce_bioset(void)
-{
- static bool bounce_bs_setup;
- int ret;
-
- if (bounce_bs_setup)
- return;
-
- ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- BUG_ON(ret);
-
- ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0);
- BUG_ON(ret);
- bounce_bs_setup = true;
-}
-
-static __init int init_emergency_pool(void)
-{
- int ret;
-
-#ifndef CONFIG_MEMORY_HOTPLUG
- if (max_pfn <= max_low_pfn)
- return 0;
-#endif
-
- ret = mempool_init_page_pool(&page_pool, POOL_SIZE, 0);
- BUG_ON(ret);
- pr_info("pool size: %d pages\n", POOL_SIZE);
-
- init_bounce_bioset();
- return 0;
-}
-
-__initcall(init_emergency_pool);
-
-/*
- * Simple bounce buffer support for highmem pages. Depending on the
- * queue gfp mask set, *to may or may not be a highmem page. kmap it
- * always, it will do the Right Thing
- */
-static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
-{
- struct bio_vec tovec, fromvec;
- struct bvec_iter iter;
- /*
- * The bio of @from is created by bounce, so we can iterate
- * its bvec from start to end, but the @from->bi_iter can't be
- * trusted because it might be changed by splitting.
- */
- struct bvec_iter from_iter = BVEC_ITER_ALL_INIT;
-
- bio_for_each_segment(tovec, to, iter) {
- fromvec = bio_iter_iovec(from, from_iter);
- if (tovec.bv_page != fromvec.bv_page) {
- /*
- * fromvec->bv_offset and fromvec->bv_len might have
- * been modified by the block layer, so use the original
- * copy, bounce_copy_vec already uses tovec->bv_len
- */
- memcpy_to_bvec(&tovec, page_address(fromvec.bv_page) +
- tovec.bv_offset);
- }
- bio_advance_iter(from, &from_iter, tovec.bv_len);
- }
-}
-
-static void bounce_end_io(struct bio *bio)
-{
- struct bio *bio_orig = bio->bi_private;
- struct bio_vec *bvec, orig_vec;
- struct bvec_iter orig_iter = bio_orig->bi_iter;
- struct bvec_iter_all iter_all;
-
- /*
- * free up bounce indirect pages used
- */
- bio_for_each_segment_all(bvec, bio, iter_all) {
- orig_vec = bio_iter_iovec(bio_orig, orig_iter);
- if (bvec->bv_page != orig_vec.bv_page) {
- dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
- mempool_free(bvec->bv_page, &page_pool);
- }
- bio_advance_iter(bio_orig, &orig_iter, orig_vec.bv_len);
- }
-
- bio_orig->bi_status = bio->bi_status;
- bio_endio(bio_orig);
- bio_put(bio);
-}
-
-static void bounce_end_io_write(struct bio *bio)
-{
- bounce_end_io(bio);
-}
-
-static void bounce_end_io_read(struct bio *bio)
-{
- struct bio *bio_orig = bio->bi_private;
-
- if (!bio->bi_status)
- copy_to_high_bio_irq(bio_orig, bio);
-
- bounce_end_io(bio);
-}
-
-static struct bio *bounce_clone_bio(struct bio *bio_src)
-{
- struct bvec_iter iter;
- struct bio_vec bv;
- struct bio *bio;
-
- /*
- * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
- * bio_src->bi_io_vec to bio->bi_io_vec.
- *
- * We can't do that anymore, because:
- *
- * - The point of cloning the biovec is to produce a bio with a biovec
- * the caller can modify: bi_idx and bi_bvec_done should be 0.
- *
- * - The original bio could've had more than BIO_MAX_VECS biovecs; if
- * we tried to clone the whole thing bio_alloc_bioset() would fail.
- * But the clone should succeed as long as the number of biovecs we
- * actually need to allocate is fewer than BIO_MAX_VECS.
- *
- * - Lastly, bi_vcnt should not be looked at or relied upon by code
- * that does not own the bio - reason being drivers don't use it for
- * iterating over the biovec anymore, so expecting it to be kept up
- * to date (i.e. for clones that share the parent biovec) is just
- * asking for trouble and would force extra work.
- */
- bio = bio_alloc_bioset(bio_src->bi_bdev, bio_segments(bio_src),
- bio_src->bi_opf, GFP_NOIO, &bounce_bio_set);
- if (bio_flagged(bio_src, BIO_REMAPPED))
- bio_set_flag(bio, BIO_REMAPPED);
- bio->bi_ioprio = bio_src->bi_ioprio;
- bio->bi_write_hint = bio_src->bi_write_hint;
- bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
- bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
-
- switch (bio_op(bio)) {
- case REQ_OP_DISCARD:
- case REQ_OP_SECURE_ERASE:
- case REQ_OP_WRITE_ZEROES:
- break;
- default:
- bio_for_each_segment(bv, bio_src, iter)
- bio->bi_io_vec[bio->bi_vcnt++] = bv;
- break;
- }
-
- if (bio_crypt_clone(bio, bio_src, GFP_NOIO) < 0)
- goto err_put;
-
- if (bio_integrity(bio_src) &&
- bio_integrity_clone(bio, bio_src, GFP_NOIO) < 0)
- goto err_put;
-
- bio_clone_blkg_association(bio, bio_src);
-
- return bio;
-
-err_put:
- bio_put(bio);
- return NULL;
-}
-
-struct bio *__blk_queue_bounce(struct bio *bio_orig, struct request_queue *q)
-{
- struct bio *bio;
- int rw = bio_data_dir(bio_orig);
- struct bio_vec *to, from;
- struct bvec_iter iter;
- unsigned i = 0, bytes = 0;
- bool bounce = false;
- int sectors;
-
- bio_for_each_segment(from, bio_orig, iter) {
- if (i++ < BIO_MAX_VECS)
- bytes += from.bv_len;
- if (PageHighMem(from.bv_page))
- bounce = true;
- }
- if (!bounce)
- return bio_orig;
-
- /*
- * Individual bvecs might not be logical block aligned. Round down
- * the split size so that each bio is properly block size aligned,
- * even if we do not use the full hardware limits.
- */
- sectors = ALIGN_DOWN(bytes, queue_logical_block_size(q)) >>
- SECTOR_SHIFT;
- if (sectors < bio_sectors(bio_orig)) {
- bio = bio_split(bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
- bio_chain(bio, bio_orig);
- submit_bio_noacct(bio_orig);
- bio_orig = bio;
- }
- bio = bounce_clone_bio(bio_orig);
-
- /*
- * Bvec table can't be updated by bio_for_each_segment_all(),
- * so retrieve bvec from the table directly. This way is safe
- * because the 'bio' is single-page bvec.
- */
- for (i = 0, to = bio->bi_io_vec; i < bio->bi_vcnt; to++, i++) {
- struct page *bounce_page;
-
- if (!PageHighMem(to->bv_page))
- continue;
-
- bounce_page = mempool_alloc(&page_pool, GFP_NOIO);
- inc_zone_page_state(bounce_page, NR_BOUNCE);
-
- if (rw == WRITE) {
- flush_dcache_page(to->bv_page);
- memcpy_from_bvec(page_address(bounce_page), to);
- }
- to->bv_page = bounce_page;
- }
-
- trace_block_bio_bounce(bio_orig);
-
- bio->bi_flags |= (1 << BIO_BOUNCED);
-
- if (rw == READ)
- bio->bi_end_io = bounce_end_io_read;
- else
- bio->bi_end_io = bounce_end_io_write;
-
- bio->bi_private = bio_orig;
- return bio;
-}
diff --git a/block/elevator.c b/block/elevator.c
index b4d08026b02c..ab22542e6cf0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -45,6 +45,17 @@
#include "blk-wbt.h"
#include "blk-cgroup.h"
+/* Holding context data for changing elevator */
+struct elv_change_ctx {
+ const char *name;
+ bool no_uevent;
+
+ /* for unregistering old elevator */
+ struct elevator_queue *old;
+ /* for registering new elevator */
+ struct elevator_queue *new;
+};
+
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);
@@ -148,18 +159,18 @@ static void elevator_release(struct kobject *kobj)
kfree(e);
}
-void elevator_exit(struct request_queue *q)
+static void elevator_exit(struct request_queue *q)
{
struct elevator_queue *e = q->elevator;
+ lockdep_assert_held(&q->elevator_lock);
+
ioc_clear_queue(q);
blk_mq_sched_free_rqs(q);
mutex_lock(&e->sysfs_lock);
blk_mq_exit_sched(q, e);
mutex_unlock(&e->sysfs_lock);
-
- kobject_put(&e->kobj);
}
static inline void __elv_rqhash_del(struct request *rq)
@@ -412,14 +423,15 @@ elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
const struct elv_fs_entry *entry = to_elv(attr);
struct elevator_queue *e;
- ssize_t error;
+ ssize_t error = -ENODEV;
if (!entry->show)
return -EIO;
e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
- error = e->type ? entry->show(e, page) : -ENOENT;
+ if (!test_bit(ELEVATOR_FLAG_DYING, &e->flags))
+ error = entry->show(e, page);
mutex_unlock(&e->sysfs_lock);
return error;
}
@@ -430,14 +442,15 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr,
{
const struct elv_fs_entry *entry = to_elv(attr);
struct elevator_queue *e;
- ssize_t error;
+ ssize_t error = -ENODEV;
if (!entry->store)
return -EIO;
e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
- error = e->type ? entry->store(e, page, length) : -ENOENT;
+ if (!test_bit(ELEVATOR_FLAG_DYING, &e->flags))
+ error = entry->store(e, page, length);
mutex_unlock(&e->sysfs_lock);
return error;
}
@@ -452,13 +465,12 @@ static const struct kobj_type elv_ktype = {
.release = elevator_release,
};
-int elv_register_queue(struct request_queue *q, bool uevent)
+static int elv_register_queue(struct request_queue *q,
+ struct elevator_queue *e,
+ bool uevent)
{
- struct elevator_queue *e = q->elevator;
int error;
- lockdep_assert_held(&q->elevator_lock);
-
error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
if (!error) {
const struct elv_fs_entry *attr = e->type->elevator_attrs;
@@ -472,20 +484,25 @@ int elv_register_queue(struct request_queue *q, bool uevent)
if (uevent)
kobject_uevent(&e->kobj, KOBJ_ADD);
+ /*
+ * Sched is initialized, it is ready to export it via
+ * debugfs
+ */
+ blk_mq_sched_reg_debugfs(q);
set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
}
return error;
}
-void elv_unregister_queue(struct request_queue *q)
+static void elv_unregister_queue(struct request_queue *q,
+ struct elevator_queue *e)
{
- struct elevator_queue *e = q->elevator;
-
- lockdep_assert_held(&q->elevator_lock);
-
if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
kobject_uevent(&e->kobj, KOBJ_REMOVE);
kobject_del(&e->kobj);
+
+ /* unexport via debugfs before exiting sched */
+ blk_mq_sched_unreg_debugfs(q);
}
}
@@ -548,159 +565,194 @@ void elv_unregister(struct elevator_type *e)
EXPORT_SYMBOL_GPL(elv_unregister);
/*
- * For single queue devices, default to using mq-deadline. If we have multiple
- * queues or mq-deadline is not available, default to "none".
- */
-static struct elevator_type *elevator_get_default(struct request_queue *q)
-{
- if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
- return NULL;
-
- if (q->nr_hw_queues != 1 &&
- !blk_mq_is_shared_tags(q->tag_set->flags))
- return NULL;
-
- return elevator_find_get("mq-deadline");
-}
-
-/*
- * Use the default elevator settings. If the chosen elevator initialization
- * fails, fall back to the "none" elevator (no elevator).
- */
-void elevator_init_mq(struct request_queue *q)
-{
- struct elevator_type *e;
- unsigned int memflags;
- int err;
-
- WARN_ON_ONCE(blk_queue_registered(q));
-
- if (unlikely(q->elevator))
- return;
-
- e = elevator_get_default(q);
- if (!e)
- return;
-
- /*
- * We are called before adding disk, when there isn't any FS I/O,
- * so freezing queue plus canceling dispatch work is enough to
- * drain any dispatch activities originated from passthrough
- * requests, then no need to quiesce queue which may add long boot
- * latency, especially when lots of disks are involved.
- *
- * Disk isn't added yet, so verifying queue lock only manually.
- */
- memflags = blk_mq_freeze_queue(q);
-
- blk_mq_cancel_work_sync(q);
-
- err = blk_mq_init_sched(q, e);
-
- blk_mq_unfreeze_queue(q, memflags);
-
- if (err) {
- pr_warn("\"%s\" elevator initialization failed, "
- "falling back to \"none\"\n", e->elevator_name);
- }
-
- elevator_put(e);
-}
-
-/*
* Switch to new_e io scheduler.
*
* If switching fails, we are most likely running out of memory and not able
* to restore the old io scheduler, so leaving the io scheduler being none.
*/
-int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
+static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
{
- unsigned int memflags;
- int ret;
+ struct elevator_type *new_e = NULL;
+ int ret = 0;
+ WARN_ON_ONCE(q->mq_freeze_depth == 0);
lockdep_assert_held(&q->elevator_lock);
- memflags = blk_mq_freeze_queue(q);
+ if (strncmp(ctx->name, "none", 4)) {
+ new_e = elevator_find_get(ctx->name);
+ if (!new_e)
+ return -EINVAL;
+ }
+
blk_mq_quiesce_queue(q);
if (q->elevator) {
- elv_unregister_queue(q);
+ ctx->old = q->elevator;
elevator_exit(q);
}
- ret = blk_mq_init_sched(q, new_e);
- if (ret)
- goto out_unfreeze;
-
- ret = elv_register_queue(q, true);
- if (ret) {
- elevator_exit(q);
- goto out_unfreeze;
+ if (new_e) {
+ ret = blk_mq_init_sched(q, new_e);
+ if (ret)
+ goto out_unfreeze;
+ ctx->new = q->elevator;
+ } else {
+ blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
+ q->elevator = NULL;
+ q->nr_requests = q->tag_set->queue_depth;
}
- blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
+ blk_add_trace_msg(q, "elv switch: %s", ctx->name);
out_unfreeze:
blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q, memflags);
if (ret) {
pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n",
new_e->elevator_name);
}
+ if (new_e)
+ elevator_put(new_e);
return ret;
}
-void elevator_disable(struct request_queue *q)
+static void elv_exit_and_release(struct request_queue *q)
{
- unsigned int memflags;
-
- lockdep_assert_held(&q->elevator_lock);
+ struct elevator_queue *e;
+ unsigned memflags;
memflags = blk_mq_freeze_queue(q);
- blk_mq_quiesce_queue(q);
-
- elv_unregister_queue(q);
+ mutex_lock(&q->elevator_lock);
+ e = q->elevator;
elevator_exit(q);
- blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
- q->elevator = NULL;
- q->nr_requests = q->tag_set->queue_depth;
- blk_add_trace_msg(q, "elv switch: none");
-
- blk_mq_unquiesce_queue(q);
+ mutex_unlock(&q->elevator_lock);
blk_mq_unfreeze_queue(q, memflags);
+ if (e)
+ kobject_put(&e->kobj);
+}
+
+static int elevator_change_done(struct request_queue *q,
+ struct elv_change_ctx *ctx)
+{
+ int ret = 0;
+
+ if (ctx->old) {
+ bool enable_wbt = test_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT,
+ &ctx->old->flags);
+
+ elv_unregister_queue(q, ctx->old);
+ kobject_put(&ctx->old->kobj);
+ if (enable_wbt)
+ wbt_enable_default(q->disk);
+ }
+ if (ctx->new) {
+ ret = elv_register_queue(q, ctx->new, !ctx->no_uevent);
+ if (ret)
+ elv_exit_and_release(q);
+ }
+ return ret;
}
/*
* Switch this queue to the given IO scheduler.
*/
-static int elevator_change(struct request_queue *q, const char *elevator_name)
+static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
{
- struct elevator_type *e;
- int ret;
+ unsigned int memflags;
+ int ret = 0;
- /* Make sure queue is not in the middle of being removed */
- if (!blk_queue_registered(q))
- return -ENOENT;
+ lockdep_assert_held(&q->tag_set->update_nr_hwq_lock);
+
+ memflags = blk_mq_freeze_queue(q);
+ /*
+ * May be called before adding disk, when there isn't any FS I/O,
+ * so freezing queue plus canceling dispatch work is enough to
+ * drain any dispatch activities originated from passthrough
+ * requests, then no need to quiesce queue which may add long boot
+ * latency, especially when lots of disks are involved.
+ *
+ * Disk isn't added yet, so verifying queue lock only manually.
+ */
+ blk_mq_cancel_work_sync(q);
+ mutex_lock(&q->elevator_lock);
+ if (!(q->elevator && elevator_match(q->elevator->type, ctx->name)))
+ ret = elevator_switch(q, ctx);
+ mutex_unlock(&q->elevator_lock);
+ blk_mq_unfreeze_queue(q, memflags);
+ if (!ret)
+ ret = elevator_change_done(q, ctx);
+
+ return ret;
+}
+
+/*
+ * The I/O scheduler depends on the number of hardware queues, this forces a
+ * reattachment when nr_hw_queues changes.
+ */
+void elv_update_nr_hw_queues(struct request_queue *q)
+{
+ struct elv_change_ctx ctx = {};
+ int ret = -ENODEV;
+
+ WARN_ON_ONCE(q->mq_freeze_depth == 0);
- if (!strncmp(elevator_name, "none", 4)) {
- if (q->elevator)
- elevator_disable(q);
- return 0;
+ mutex_lock(&q->elevator_lock);
+ if (q->elevator && !blk_queue_dying(q) && blk_queue_registered(q)) {
+ ctx.name = q->elevator->type->elevator_name;
+
+ /* force to reattach elevator after nr_hw_queue is updated */
+ ret = elevator_switch(q, &ctx);
}
+ mutex_unlock(&q->elevator_lock);
+ blk_mq_unfreeze_queue_nomemrestore(q);
+ if (!ret)
+ WARN_ON_ONCE(elevator_change_done(q, &ctx));
+}
- if (q->elevator && elevator_match(q->elevator->type, elevator_name))
- return 0;
+/*
+ * Use the default elevator settings. If the chosen elevator initialization
+ * fails, fall back to the "none" elevator (no elevator).
+ */
+void elevator_set_default(struct request_queue *q)
+{
+ struct elv_change_ctx ctx = {
+ .name = "mq-deadline",
+ .no_uevent = true,
+ };
+ int err = 0;
- e = elevator_find_get(elevator_name);
- if (!e)
- return -EINVAL;
- ret = elevator_switch(q, e);
- elevator_put(e);
- return ret;
+ /* now we allow to switch elevator */
+ blk_queue_flag_clear(QUEUE_FLAG_NO_ELV_SWITCH, q);
+
+ if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
+ return;
+
+ /*
+ * For single queue devices, default to using mq-deadline. If we
+ * have multiple queues or mq-deadline is not available, default
+ * to "none".
+ */
+ if (elevator_find_get(ctx.name) && (q->nr_hw_queues == 1 ||
+ blk_mq_is_shared_tags(q->tag_set->flags)))
+ err = elevator_change(q, &ctx);
+ if (err < 0)
+ pr_warn("\"%s\" elevator initialization, failed %d, "
+ "falling back to \"none\"\n", ctx.name, err);
}
-static void elv_iosched_load_module(char *elevator_name)
+void elevator_set_none(struct request_queue *q)
+{
+ struct elv_change_ctx ctx = {
+ .name = "none",
+ };
+ int err;
+
+ err = elevator_change(q, &ctx);
+ if (err < 0)
+ pr_warn("%s: set none elevator failed %d\n", __func__, err);
+}
+
+static void elv_iosched_load_module(const char *elevator_name)
{
struct elevator_type *found;
@@ -716,10 +768,14 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
size_t count)
{
char elevator_name[ELV_NAME_MAX];
- char *name;
+ struct elv_change_ctx ctx = {};
int ret;
- unsigned int memflags;
struct request_queue *q = disk->queue;
+ struct blk_mq_tag_set *set = q->tag_set;
+
+ /* Make sure queue is not in the middle of being removed */
+ if (!blk_queue_registered(q))
+ return -ENOENT;
/*
* If the attribute needs to load a module, do it before freezing the
@@ -727,24 +783,25 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
* queue is the one for the device storing the module file.
*/
strscpy(elevator_name, buf, sizeof(elevator_name));
- name = strstrip(elevator_name);
+ ctx.name = strstrip(elevator_name);
- elv_iosched_load_module(name);
+ elv_iosched_load_module(ctx.name);
- memflags = blk_mq_freeze_queue(q);
- mutex_lock(&q->elevator_lock);
- ret = elevator_change(q, name);
- if (!ret)
- ret = count;
- mutex_unlock(&q->elevator_lock);
- blk_mq_unfreeze_queue(q, memflags);
+ down_read(&set->update_nr_hwq_lock);
+ if (!blk_queue_no_elv_switch(q)) {
+ ret = elevator_change(q, &ctx);
+ if (!ret)
+ ret = count;
+ } else {
+ ret = -ENOENT;
+ }
+ up_read(&set->update_nr_hwq_lock);
return ret;
}
ssize_t elv_iosched_show(struct gendisk *disk, char *name)
{
struct request_queue *q = disk->queue;
- struct elevator_queue *eq = q->elevator;
struct elevator_type *cur = NULL, *e;
int len = 0;
@@ -753,7 +810,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
len += sprintf(name+len, "[none] ");
} else {
len += sprintf(name+len, "none ");
- cur = eq->type;
+ cur = q->elevator->type;
}
spin_lock(&elv_list_lock);
diff --git a/block/elevator.h b/block/elevator.h
index e4e44dfac503..a07ce773a38f 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -121,7 +121,8 @@ struct elevator_queue
};
#define ELEVATOR_FLAG_REGISTERED 0
-#define ELEVATOR_FLAG_DISABLE_WBT 1
+#define ELEVATOR_FLAG_DYING 1
+#define ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT 2
/*
* block elevator interface
@@ -182,4 +183,7 @@ extern struct request *elv_rb_find(struct rb_root *, sector_t);
#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist)
+void blk_mq_sched_reg_debugfs(struct request_queue *q);
+void blk_mq_sched_unreg_debugfs(struct request_queue *q);
+
#endif /* _ELEVATOR_H */
diff --git a/block/fops.c b/block/fops.c
index 82b672d15ea4..1309861d4c2c 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
}
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
+ bio.bi_write_stream = iocb->ki_write_stream;
bio.bi_ioprio = iocb->ki_ioprio;
if (iocb->ki_flags & IOCB_ATOMIC)
bio.bi_opf |= REQ_ATOMIC;
@@ -206,6 +207,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
for (;;) {
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
+ bio->bi_write_stream = iocb->ki_write_stream;
bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io;
bio->bi_ioprio = iocb->ki_ioprio;
@@ -333,6 +335,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
dio->iocb = iocb;
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
+ bio->bi_write_stream = iocb->ki_write_stream;
bio->bi_end_io = blkdev_bio_end_io_async;
bio->bi_ioprio = iocb->ki_ioprio;
@@ -398,6 +401,26 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (blkdev_dio_invalid(bdev, iocb, iter))
return -EINVAL;
+ if (iov_iter_rw(iter) == WRITE) {
+ u16 max_write_streams = bdev_max_write_streams(bdev);
+
+ if (iocb->ki_write_stream) {
+ if (iocb->ki_write_stream > max_write_streams)
+ return -EINVAL;
+ } else if (max_write_streams) {
+ enum rw_hint write_hint =
+ file_inode(iocb->ki_filp)->i_write_hint;
+
+ /*
+ * Just use the write hint as write stream for block
+ * device writes. This assumes no file system is
+ * mounted that would use the streams differently.
+ */
+ if (write_hint <= max_write_streams)
+ iocb->ki_write_stream = write_hint;
+ }
+ }
+
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
if (likely(nr_pages <= BIO_MAX_VECS)) {
if (is_sync_kiocb(iocb))
@@ -451,12 +474,13 @@ static int blkdev_get_block(struct inode *inode, sector_t iblock,
static int blkdev_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
+ struct folio *folio = NULL;
struct blk_plug plug;
int err;
blk_start_plug(&plug);
- err = write_cache_pages(mapping, wbc, block_write_full_folio,
- blkdev_get_block);
+ while ((folio = writeback_iter(mapping, wbc, folio, &err)))
+ err = block_write_full_folio(folio, wbc, blkdev_get_block);
blk_finish_plug(&plug);
return err;
diff --git a/block/genhd.c b/block/genhd.c
index c2bd86cd09de..8171a6bc3210 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -125,37 +125,46 @@ static void part_stat_read_all(struct block_device *part,
}
}
-unsigned int part_in_flight(struct block_device *part)
+static void bdev_count_inflight_rw(struct block_device *part,
+ unsigned int inflight[2], bool mq_driver)
{
- unsigned int inflight = 0;
int cpu;
- for_each_possible_cpu(cpu) {
- inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
- part_stat_local_read_cpu(part, in_flight[1], cpu);
+ if (mq_driver) {
+ blk_mq_in_driver_rw(part, inflight);
+ } else {
+ for_each_possible_cpu(cpu) {
+ inflight[READ] += part_stat_local_read_cpu(
+ part, in_flight[READ], cpu);
+ inflight[WRITE] += part_stat_local_read_cpu(
+ part, in_flight[WRITE], cpu);
+ }
}
- if ((int)inflight < 0)
- inflight = 0;
- return inflight;
+ if (WARN_ON_ONCE((int)inflight[READ] < 0))
+ inflight[READ] = 0;
+ if (WARN_ON_ONCE((int)inflight[WRITE] < 0))
+ inflight[WRITE] = 0;
}
-static void part_in_flight_rw(struct block_device *part,
- unsigned int inflight[2])
+/**
+ * bdev_count_inflight - get the number of inflight IOs for a block device.
+ *
+ * @part: the block device.
+ *
+ * Inflight here means started IO accounting, from bdev_start_io_acct() for
+ * bio-based block device, and from blk_account_io_start() for rq-based block
+ * device.
+ */
+unsigned int bdev_count_inflight(struct block_device *part)
{
- int cpu;
+ unsigned int inflight[2] = {0};
- inflight[0] = 0;
- inflight[1] = 0;
- for_each_possible_cpu(cpu) {
- inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu);
- inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu);
- }
- if ((int)inflight[0] < 0)
- inflight[0] = 0;
- if ((int)inflight[1] < 0)
- inflight[1] = 0;
+ bdev_count_inflight_rw(part, inflight, false);
+
+ return inflight[READ] + inflight[WRITE];
}
+EXPORT_SYMBOL_GPL(bdev_count_inflight);
/*
* Can be deleted altogether. Later.
@@ -389,19 +398,35 @@ int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
return ret;
}
-/**
- * add_disk_fwnode - add disk information to kernel list with fwnode
- * @parent: parent device for the disk
- * @disk: per-device partitioning information
- * @groups: Additional per-device sysfs groups
- * @fwnode: attached disk fwnode
- *
- * This function registers the partitioning information in @disk
- * with the kernel. Also attach a fwnode to the disk device.
- */
-int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
- const struct attribute_group **groups,
- struct fwnode_handle *fwnode)
+static void add_disk_final(struct gendisk *disk)
+{
+ struct device *ddev = disk_to_dev(disk);
+
+ if (!(disk->flags & GENHD_FL_HIDDEN)) {
+ /* Make sure the first partition scan will be proceed */
+ if (get_capacity(disk) && disk_has_partscan(disk))
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
+
+ bdev_add(disk->part0, ddev->devt);
+ if (get_capacity(disk))
+ disk_scan_partitions(disk, BLK_OPEN_READ);
+
+ /*
+ * Announce the disk and partitions after all partitions are
+ * created. (for hidden disks uevents remain suppressed forever)
+ */
+ dev_set_uevent_suppress(ddev, 0);
+ disk_uevent(disk, KOBJ_ADD);
+ }
+
+ blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
+ disk_add_events(disk);
+ set_bit(GD_ADDED, &disk->state);
+}
+
+static int __add_disk(struct device *parent, struct gendisk *disk,
+ const struct attribute_group **groups,
+ struct fwnode_handle *fwnode)
{
struct device *ddev = disk_to_dev(disk);
@@ -416,12 +441,6 @@ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
*/
if (disk->fops->submit_bio || disk->fops->poll_bio)
return -EINVAL;
-
- /*
- * Initialize the I/O scheduler code and pick a default one if
- * needed.
- */
- elevator_init_mq(disk->queue);
} else {
if (!disk->fops->submit_bio)
return -EINVAL;
@@ -438,7 +457,7 @@ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
ret = -EINVAL;
if (disk->major) {
if (WARN_ON(!disk->minors))
- goto out_exit_elevator;
+ goto out;
if (disk->minors > DISK_MAX_PARTS) {
pr_err("block: can't allocate more than %d partitions\n",
@@ -448,14 +467,14 @@ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
if (disk->first_minor > MINORMASK ||
disk->minors > MINORMASK + 1 ||
disk->first_minor + disk->minors > MINORMASK + 1)
- goto out_exit_elevator;
+ goto out;
} else {
if (WARN_ON(disk->minors))
- goto out_exit_elevator;
+ goto out;
ret = blk_alloc_ext_minor();
if (ret < 0)
- goto out_exit_elevator;
+ goto out;
disk->major = BLOCK_EXT_MAJOR;
disk->first_minor = ret;
}
@@ -516,21 +535,6 @@ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
&disk->bdi->dev->kobj, "bdi");
if (ret)
goto out_unregister_bdi;
-
- /* Make sure the first partition scan will be proceed */
- if (get_capacity(disk) && disk_has_partscan(disk))
- set_bit(GD_NEED_PART_SCAN, &disk->state);
-
- bdev_add(disk->part0, ddev->devt);
- if (get_capacity(disk))
- disk_scan_partitions(disk, BLK_OPEN_READ);
-
- /*
- * Announce the disk and partitions after all partitions are
- * created. (for hidden disks uevents remain suppressed forever)
- */
- dev_set_uevent_suppress(ddev, 0);
- disk_uevent(disk, KOBJ_ADD);
} else {
/*
* Even if the block_device for a hidden gendisk is not
@@ -539,10 +543,6 @@ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
*/
disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor);
}
-
- blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
- disk_add_events(disk);
- set_bit(GD_ADDED, &disk->state);
return 0;
out_unregister_bdi:
@@ -564,12 +564,46 @@ out_device_del:
out_free_ext_minor:
if (disk->major == BLOCK_EXT_MAJOR)
blk_free_ext_minor(disk->first_minor);
-out_exit_elevator:
- if (disk->queue->elevator) {
- mutex_lock(&disk->queue->elevator_lock);
- elevator_exit(disk->queue);
- mutex_unlock(&disk->queue->elevator_lock);
+out:
+ return ret;
+}
+
+/**
+ * add_disk_fwnode - add disk information to kernel list with fwnode
+ * @parent: parent device for the disk
+ * @disk: per-device partitioning information
+ * @groups: Additional per-device sysfs groups
+ * @fwnode: attached disk fwnode
+ *
+ * This function registers the partitioning information in @disk
+ * with the kernel. Also attach a fwnode to the disk device.
+ */
+int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
+ const struct attribute_group **groups,
+ struct fwnode_handle *fwnode)
+{
+ struct blk_mq_tag_set *set;
+ unsigned int memflags;
+ int ret;
+
+ if (queue_is_mq(disk->queue)) {
+ set = disk->queue->tag_set;
+ memflags = memalloc_noio_save();
+ down_read(&set->update_nr_hwq_lock);
+ ret = __add_disk(parent, disk, groups, fwnode);
+ up_read(&set->update_nr_hwq_lock);
+ memalloc_noio_restore(memflags);
+ } else {
+ ret = __add_disk(parent, disk, groups, fwnode);
}
+
+ /*
+ * add_disk_final() needn't to read `nr_hw_queues`, so move it out
+ * of read lock `set->update_nr_hwq_lock` for avoiding unnecessary
+ * lock dependency on `disk->open_mutex` from scanning partition.
+ */
+ if (!ret)
+ add_disk_final(disk);
return ret;
}
EXPORT_SYMBOL_GPL(add_disk_fwnode);
@@ -652,26 +686,7 @@ void blk_mark_disk_dead(struct gendisk *disk)
}
EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
-/**
- * del_gendisk - remove the gendisk
- * @disk: the struct gendisk to remove
- *
- * Removes the gendisk and all its associated resources. This deletes the
- * partitions associated with the gendisk, and unregisters the associated
- * request_queue.
- *
- * This is the counter to the respective __device_add_disk() call.
- *
- * The final removal of the struct gendisk happens when its refcount reaches 0
- * with put_disk(), which should be called after del_gendisk(), if
- * __device_add_disk() was used.
- *
- * Drivers exist which depend on the release of the gendisk to be synchronous,
- * it should not be deferred.
- *
- * Context: can sleep
- */
-void del_gendisk(struct gendisk *disk)
+static void __del_gendisk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct block_device *part;
@@ -743,14 +758,7 @@ void del_gendisk(struct gendisk *disk)
if (queue_is_mq(q))
blk_mq_cancel_work_sync(q);
- blk_mq_quiesce_queue(q);
- if (q->elevator) {
- mutex_lock(&q->elevator_lock);
- elevator_exit(q);
- mutex_unlock(&q->elevator_lock);
- }
rq_qos_exit(q);
- blk_mq_unquiesce_queue(q);
/*
* If the disk does not own the queue, allow using passthrough requests
@@ -764,6 +772,55 @@ void del_gendisk(struct gendisk *disk)
if (start_drain)
blk_unfreeze_release_lock(q);
}
+
+static void disable_elv_switch(struct request_queue *q)
+{
+ struct blk_mq_tag_set *set = q->tag_set;
+ WARN_ON_ONCE(!queue_is_mq(q));
+
+ down_write(&set->update_nr_hwq_lock);
+ blk_queue_flag_set(QUEUE_FLAG_NO_ELV_SWITCH, q);
+ up_write(&set->update_nr_hwq_lock);
+}
+
+/**
+ * del_gendisk - remove the gendisk
+ * @disk: the struct gendisk to remove
+ *
+ * Removes the gendisk and all its associated resources. This deletes the
+ * partitions associated with the gendisk, and unregisters the associated
+ * request_queue.
+ *
+ * This is the counter to the respective __device_add_disk() call.
+ *
+ * The final removal of the struct gendisk happens when its refcount reaches 0
+ * with put_disk(), which should be called after del_gendisk(), if
+ * __device_add_disk() was used.
+ *
+ * Drivers exist which depend on the release of the gendisk to be synchronous,
+ * it should not be deferred.
+ *
+ * Context: can sleep
+ */
+void del_gendisk(struct gendisk *disk)
+{
+ struct blk_mq_tag_set *set;
+ unsigned int memflags;
+
+ if (!queue_is_mq(disk->queue)) {
+ __del_gendisk(disk);
+ } else {
+ set = disk->queue->tag_set;
+
+ disable_elv_switch(disk->queue);
+
+ memflags = memalloc_noio_save();
+ down_read(&set->update_nr_hwq_lock);
+ __del_gendisk(disk);
+ up_read(&set->update_nr_hwq_lock);
+ memalloc_noio_restore(memflags);
+ }
+}
EXPORT_SYMBOL(del_gendisk);
/**
@@ -1005,7 +1062,7 @@ ssize_t part_stat_show(struct device *dev,
struct disk_stats stat;
unsigned int inflight;
- inflight = part_in_flight(bdev);
+ inflight = bdev_count_inflight(bdev);
if (inflight) {
part_stat_lock();
update_io_ticks(bdev, jiffies, true);
@@ -1042,19 +1099,21 @@ ssize_t part_stat_show(struct device *dev,
(unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
}
+/*
+ * Show the number of IOs issued to driver.
+ * For bio-based device, started from bdev_start_io_acct();
+ * For rq-based device, started from blk_mq_start_request();
+ */
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct block_device *bdev = dev_to_bdev(dev);
struct request_queue *q = bdev_get_queue(bdev);
- unsigned int inflight[2];
+ unsigned int inflight[2] = {0};
- if (queue_is_mq(q))
- blk_mq_in_flight_rw(q, bdev, inflight);
- else
- part_in_flight_rw(bdev, inflight);
+ bdev_count_inflight_rw(bdev, inflight, queue_is_mq(q));
- return sysfs_emit(buf, "%8u %8u\n", inflight[0], inflight[1]);
+ return sysfs_emit(buf, "%8u %8u\n", inflight[READ], inflight[WRITE]);
}
static ssize_t disk_capability_show(struct device *dev,
@@ -1307,7 +1366,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
continue;
- inflight = part_in_flight(hd);
+ inflight = bdev_count_inflight(hd);
if (inflight) {
part_stat_lock();
update_io_ticks(hd, jiffies, true);
@@ -1422,6 +1481,7 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
INIT_LIST_HEAD(&disk->slave_bdevs);
#endif
+ mutex_init(&disk->rqos_state_mutex);
return disk;
out_erase_part0:
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 754f6b7415cd..2edf1cac06d5 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -715,7 +715,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
}
/*
- * Called from blk_mq_insert_request() or blk_mq_dispatch_plug_list().
+ * Called from blk_mq_insert_request() or blk_mq_dispatch_list().
*/
static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
struct list_head *list,
diff --git a/crypto/842.c b/crypto/842.c
index 5fb37a925989..8c257c40e2b9 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -23,10 +23,6 @@
#include <linux/module.h>
#include <linux/sw842.h>
-struct crypto842_ctx {
- void *wmem; /* working memory for compress */
-};
-
static void *crypto842_alloc_ctx(void)
{
void *ctx;
@@ -74,7 +70,7 @@ static int __init crypto842_mod_init(void)
{
return crypto_register_scomp(&scomp);
}
-subsys_initcall(crypto842_mod_init);
+module_init(crypto842_mod_init);
static void __exit crypto842_mod_exit(void)
{
diff --git a/crypto/Kconfig b/crypto/Kconfig
index dbf97c4e7c59..e9fee7818e27 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -25,7 +25,7 @@ menu "Crypto core or helper"
config CRYPTO_FIPS
bool "FIPS 200 compliance"
- depends on (CRYPTO_ANSI_CPRNG || CRYPTO_DRBG) && !CRYPTO_MANAGER_DISABLE_TESTS
+ depends on (CRYPTO_ANSI_CPRNG || CRYPTO_DRBG) && CRYPTO_SELFTESTS
depends on (MODULE_SIG || !MODULES)
help
This option enables the fips boot option which is
@@ -143,16 +143,17 @@ config CRYPTO_ACOMP
config CRYPTO_HKDF
tristate
- select CRYPTO_SHA256 if !CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
- select CRYPTO_SHA512 if !CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
+ select CRYPTO_SHA256 if CRYPTO_SELFTESTS
+ select CRYPTO_SHA512 if CRYPTO_SELFTESTS
select CRYPTO_HASH2
config CRYPTO_MANAGER
- tristate "Cryptographic algorithm manager"
+ tristate
+ default CRYPTO_ALGAPI if CRYPTO_SELFTESTS
select CRYPTO_MANAGER2
help
- Create default cryptographic template instantiations such as
- cbc(aes).
+ This provides the support for instantiating templates such as
+ cbc(aes), and the support for the crypto self-tests.
config CRYPTO_MANAGER2
def_tristate CRYPTO_MANAGER || (CRYPTO_MANAGER!=n && CRYPTO_ALGAPI=y)
@@ -173,35 +174,27 @@ config CRYPTO_USER
Userspace configuration for cryptographic instantiations such as
cbc(aes).
-config CRYPTO_MANAGER_DISABLE_TESTS
- bool "Disable run-time self tests"
- default y
+config CRYPTO_SELFTESTS
+ bool "Enable cryptographic self-tests"
+ depends on DEBUG_KERNEL
help
- Disable run-time self tests that normally take place at
- algorithm registration.
+ Enable the cryptographic self-tests.
-config CRYPTO_MANAGER_EXTRA_TESTS
- bool "Enable extra run-time crypto self tests"
- depends on DEBUG_KERNEL && !CRYPTO_MANAGER_DISABLE_TESTS && CRYPTO_MANAGER
- help
- Enable extra run-time self tests of registered crypto algorithms,
- including randomized fuzz tests.
+ The cryptographic self-tests run at boot time, or at algorithm
+ registration time if algorithms are dynamically loaded later.
- This is intended for developer use only, as these tests take much
- longer to run than the normal self tests.
+ This is primarily intended for developer use. It should not be
+ enabled in production kernels, unless you are trying to use these
+ tests to fulfill a FIPS testing requirement.
config CRYPTO_NULL
tristate "Null algorithms"
- select CRYPTO_NULL2
+ select CRYPTO_ALGAPI
+ select CRYPTO_SKCIPHER
+ select CRYPTO_HASH
help
These are 'Null' algorithms, used by IPsec, which do nothing.
-config CRYPTO_NULL2
- tristate
- select CRYPTO_ALGAPI2
- select CRYPTO_SKCIPHER2
- select CRYPTO_HASH2
-
config CRYPTO_PCRYPT
tristate "Parallel crypto engine"
depends on SMP
@@ -228,7 +221,6 @@ config CRYPTO_AUTHENC
select CRYPTO_SKCIPHER
select CRYPTO_MANAGER
select CRYPTO_HASH
- select CRYPTO_NULL
help
Authenc: Combined mode wrapper for IPsec.
@@ -240,18 +232,21 @@ config CRYPTO_KRB5ENC
select CRYPTO_SKCIPHER
select CRYPTO_MANAGER
select CRYPTO_HASH
- select CRYPTO_NULL
help
Combined hash and cipher support for Kerberos 5 RFC3961 simplified
profile. This is required for Kerberos 5-style encryption, used by
sunrpc/NFS and rxrpc/AFS.
-config CRYPTO_TEST
- tristate "Testing module"
+config CRYPTO_BENCHMARK
+ tristate "Crypto benchmarking module"
depends on m || EXPERT
select CRYPTO_MANAGER
help
- Quick & dirty crypto test module.
+ Quick & dirty crypto benchmarking module.
+
+ This is mainly intended for use by people developing cryptographic
+ algorithms in the kernel. It should not be enabled in production
+ kernels.
config CRYPTO_SIMD
tristate
@@ -634,8 +629,8 @@ config CRYPTO_ARC4
config CRYPTO_CHACHA20
tristate "ChaCha"
+ select CRYPTO_LIB_CHACHA
select CRYPTO_LIB_CHACHA_GENERIC
- select CRYPTO_LIB_CHACHA_INTERNAL
select CRYPTO_SKCIPHER
help
The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms
@@ -784,8 +779,8 @@ config CRYPTO_AEGIS128_SIMD
config CRYPTO_CHACHA20POLY1305
tristate "ChaCha20-Poly1305"
select CRYPTO_CHACHA20
- select CRYPTO_POLY1305
select CRYPTO_AEAD
+ select CRYPTO_LIB_POLY1305
select CRYPTO_MANAGER
help
ChaCha20 stream cipher and Poly1305 authenticator combined
@@ -806,7 +801,6 @@ config CRYPTO_GCM
select CRYPTO_CTR
select CRYPTO_AEAD
select CRYPTO_GHASH
- select CRYPTO_NULL
select CRYPTO_MANAGER
help
GCM (Galois/Counter Mode) authenticated encryption mode and GMAC
@@ -817,7 +811,6 @@ config CRYPTO_GCM
config CRYPTO_GENIV
tristate
select CRYPTO_AEAD
- select CRYPTO_NULL
select CRYPTO_MANAGER
select CRYPTO_RNG_DEFAULT
@@ -953,18 +946,6 @@ config CRYPTO_POLYVAL
This is used in HCTR2. It is not a general-purpose
cryptographic hash function.
-config CRYPTO_POLY1305
- tristate "Poly1305"
- select CRYPTO_HASH
- select CRYPTO_LIB_POLY1305_GENERIC
- select CRYPTO_LIB_POLY1305_INTERNAL
- help
- Poly1305 authenticator algorithm (RFC7539)
-
- Poly1305 is an authenticator algorithm designed by Daniel J. Bernstein.
- It is used for the ChaCha20-Poly1305 AEAD, specified in RFC7539 for use
- in IETF protocols. This is the portable C implementation of Poly1305.
-
config CRYPTO_RMD160
tristate "RIPEMD-160"
select CRYPTO_HASH
@@ -994,6 +975,7 @@ config CRYPTO_SHA256
tristate "SHA-224 and SHA-256"
select CRYPTO_HASH
select CRYPTO_LIB_SHA256
+ select CRYPTO_LIB_SHA256_GENERIC
help
SHA-224 and SHA-256 secure hash algorithms (FIPS 180, ISO/IEC 10118-3)
@@ -1012,13 +994,10 @@ config CRYPTO_SHA3
help
SHA-3 secure hash algorithms (FIPS 202, ISO/IEC 10118-3)
-config CRYPTO_SM3
- tristate
-
config CRYPTO_SM3_GENERIC
tristate "SM3 (ShangMi 3)"
select CRYPTO_HASH
- select CRYPTO_SM3
+ select CRYPTO_LIB_SM3
help
SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012, ISO/IEC 10118-3)
@@ -1406,7 +1385,6 @@ config CRYPTO_USER_API_AEAD
depends on NET
select CRYPTO_AEAD
select CRYPTO_SKCIPHER
- select CRYPTO_NULL
select CRYPTO_USER_API
help
Enable the userspace interface for AEAD cipher algorithms.
diff --git a/crypto/Makefile b/crypto/Makefile
index 0e6ab5ffd3f7..017df3a2e4bb 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -71,15 +71,15 @@ obj-$(CONFIG_CRYPTO_USER) += crypto_user.o
obj-$(CONFIG_CRYPTO_CMAC) += cmac.o
obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
obj-$(CONFIG_CRYPTO_XCBC) += xcbc.o
-obj-$(CONFIG_CRYPTO_NULL2) += crypto_null.o
+obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o
obj-$(CONFIG_CRYPTO_MD4) += md4.o
obj-$(CONFIG_CRYPTO_MD5) += md5.o
obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o
obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
-obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
+obj-$(CONFIG_CRYPTO_SHA256) += sha256.o
+CFLAGS_sha256.o += -DARCH=$(ARCH)
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o
-obj-$(CONFIG_CRYPTO_SM3) += sm3.o
obj-$(CONFIG_CRYPTO_SM3_GENERIC) += sm3_generic.o
obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
@@ -148,14 +148,16 @@ obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
obj-$(CONFIG_CRYPTO_SEED) += seed.o
obj-$(CONFIG_CRYPTO_ARIA) += aria_generic.o
-obj-$(CONFIG_CRYPTO_CHACHA20) += chacha_generic.o
-obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
+obj-$(CONFIG_CRYPTO_CHACHA20) += chacha.o
+CFLAGS_chacha.o += -DARCH=$(ARCH)
obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
-obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
-obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o
-CFLAGS_crc32c_generic.o += -DARCH=$(ARCH)
-CFLAGS_crc32_generic.o += -DARCH=$(ARCH)
+obj-$(CONFIG_CRYPTO_CRC32C) += crc32c-cryptoapi.o
+crc32c-cryptoapi-y := crc32c.o
+CFLAGS_crc32c.o += -DARCH=$(ARCH)
+obj-$(CONFIG_CRYPTO_CRC32) += crc32-cryptoapi.o
+crc32-cryptoapi-y := crc32.o
+CFLAGS_crc32.o += -DARCH=$(ARCH)
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
obj-$(CONFIG_CRYPTO_KRB5ENC) += krb5enc.o
obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o
@@ -172,7 +174,7 @@ KASAN_SANITIZE_jitterentropy.o = n
UBSAN_SANITIZE_jitterentropy.o = n
jitterentropy_rng-y := jitterentropy.o jitterentropy-kcapi.o
obj-$(CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE) += jitterentropy-testing.o
-obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
+obj-$(CONFIG_CRYPTO_BENCHMARK) += tcrypt.o
obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o
obj-$(CONFIG_CRYPTO_POLYVAL) += polyval-generic.o
obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
diff --git a/crypto/acompress.c b/crypto/acompress.c
index f7a3fbe5447e..be28cbfd22e3 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -8,20 +8,32 @@
*/
#include <crypto/internal/acompress.h>
+#include <crypto/scatterwalk.h>
#include <linux/cryptouser.h>
-#include <linux/errno.h>
+#include <linux/cpumask.h>
+#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/page-flags.h>
+#include <linux/percpu.h>
+#include <linux/scatterlist.h>
+#include <linux/sched.h>
#include <linux/seq_file.h>
-#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
#include <linux/string.h>
+#include <linux/workqueue.h>
#include <net/netlink.h>
#include "compress.h"
struct crypto_scomp;
+enum {
+ ACOMP_WALK_SLEEP = 1 << 0,
+ ACOMP_WALK_SRC_LINEAR = 1 << 1,
+ ACOMP_WALK_DST_LINEAR = 1 << 2,
+};
+
static const struct crypto_type crypto_acomp_type;
static void acomp_reqchain_done(void *data, int err);
@@ -65,7 +77,7 @@ static void crypto_acomp_exit_tfm(struct crypto_tfm *tfm)
alg->exit(acomp);
if (acomp_is_async(acomp))
- crypto_free_acomp(acomp->fb);
+ crypto_free_acomp(crypto_acomp_fb(acomp));
}
static int crypto_acomp_init_tfm(struct crypto_tfm *tfm)
@@ -75,8 +87,6 @@ static int crypto_acomp_init_tfm(struct crypto_tfm *tfm)
struct crypto_acomp *fb = NULL;
int err;
- acomp->fb = acomp;
-
if (tfm->__crt_alg->cra_type != &crypto_acomp_type)
return crypto_init_scomp_ops_async(tfm);
@@ -90,12 +100,12 @@ static int crypto_acomp_init_tfm(struct crypto_tfm *tfm)
if (crypto_acomp_reqsize(fb) > MAX_SYNC_COMP_REQSIZE)
goto out_free_fb;
- acomp->fb = fb;
+ tfm->fb = crypto_acomp_tfm(fb);
}
acomp->compress = alg->compress;
acomp->decompress = alg->decompress;
- acomp->reqsize = alg->reqsize;
+ acomp->reqsize = alg->base.cra_reqsize;
acomp->base.exit = crypto_acomp_exit_tfm;
@@ -136,6 +146,7 @@ static const struct crypto_type crypto_acomp_type = {
.maskset = CRYPTO_ALG_TYPE_ACOMPRESS_MASK,
.type = CRYPTO_ALG_TYPE_ACOMPRESS,
.tfmsize = offsetof(struct crypto_acomp, base),
+ .algsize = offsetof(struct acomp_alg, base),
};
struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type,
@@ -161,7 +172,6 @@ static void acomp_save_req(struct acomp_req *req, crypto_completion_t cplt)
state->data = req->base.data;
req->base.complete = cplt;
req->base.data = state;
- state->req0 = req;
}
static void acomp_restore_req(struct acomp_req *req)
@@ -172,23 +182,16 @@ static void acomp_restore_req(struct acomp_req *req)
req->base.data = state->data;
}
-static void acomp_reqchain_virt(struct acomp_req_chain *state, int err)
+static void acomp_reqchain_virt(struct acomp_req *req)
{
- struct acomp_req *req = state->cur;
+ struct acomp_req_chain *state = &req->chain;
unsigned int slen = req->slen;
unsigned int dlen = req->dlen;
- req->base.err = err;
- state = &req->chain;
-
if (state->flags & CRYPTO_ACOMP_REQ_SRC_VIRT)
acomp_request_set_src_dma(req, state->src, slen);
- else if (state->flags & CRYPTO_ACOMP_REQ_SRC_FOLIO)
- acomp_request_set_src_folio(req, state->sfolio, state->soff, slen);
if (state->flags & CRYPTO_ACOMP_REQ_DST_VIRT)
acomp_request_set_dst_dma(req, state->dst, dlen);
- else if (state->flags & CRYPTO_ACOMP_REQ_DST_FOLIO)
- acomp_request_set_dst_folio(req, state->dfolio, state->doff, dlen);
}
static void acomp_virt_to_sg(struct acomp_req *req)
@@ -196,9 +199,7 @@ static void acomp_virt_to_sg(struct acomp_req *req)
struct acomp_req_chain *state = &req->chain;
state->flags = req->base.flags & (CRYPTO_ACOMP_REQ_SRC_VIRT |
- CRYPTO_ACOMP_REQ_DST_VIRT |
- CRYPTO_ACOMP_REQ_SRC_FOLIO |
- CRYPTO_ACOMP_REQ_DST_FOLIO);
+ CRYPTO_ACOMP_REQ_DST_VIRT);
if (acomp_request_src_isvirt(req)) {
unsigned int slen = req->slen;
@@ -207,17 +208,6 @@ static void acomp_virt_to_sg(struct acomp_req *req)
state->src = svirt;
sg_init_one(&state->ssg, svirt, slen);
acomp_request_set_src_sg(req, &state->ssg, slen);
- } else if (acomp_request_src_isfolio(req)) {
- struct folio *folio = req->sfolio;
- unsigned int slen = req->slen;
- size_t off = req->soff;
-
- state->sfolio = folio;
- state->soff = off;
- sg_init_table(&state->ssg, 1);
- sg_set_page(&state->ssg, folio_page(folio, off / PAGE_SIZE),
- slen, off % PAGE_SIZE);
- acomp_request_set_src_sg(req, &state->ssg, slen);
}
if (acomp_request_dst_isvirt(req)) {
@@ -227,39 +217,15 @@ static void acomp_virt_to_sg(struct acomp_req *req)
state->dst = dvirt;
sg_init_one(&state->dsg, dvirt, dlen);
acomp_request_set_dst_sg(req, &state->dsg, dlen);
- } else if (acomp_request_dst_isfolio(req)) {
- struct folio *folio = req->dfolio;
- unsigned int dlen = req->dlen;
- size_t off = req->doff;
-
- state->dfolio = folio;
- state->doff = off;
- sg_init_table(&state->dsg, 1);
- sg_set_page(&state->dsg, folio_page(folio, off / PAGE_SIZE),
- dlen, off % PAGE_SIZE);
- acomp_request_set_src_sg(req, &state->dsg, dlen);
}
}
-static int acomp_do_nondma(struct acomp_req_chain *state,
- struct acomp_req *req)
+static int acomp_do_nondma(struct acomp_req *req, bool comp)
{
- u32 keep = CRYPTO_ACOMP_REQ_SRC_VIRT |
- CRYPTO_ACOMP_REQ_SRC_NONDMA |
- CRYPTO_ACOMP_REQ_DST_VIRT |
- CRYPTO_ACOMP_REQ_DST_NONDMA;
- ACOMP_REQUEST_ON_STACK(fbreq, crypto_acomp_reqtfm(req));
+ ACOMP_FBREQ_ON_STACK(fbreq, req);
int err;
- acomp_request_set_callback(fbreq, req->base.flags, NULL, NULL);
- fbreq->base.flags &= ~keep;
- fbreq->base.flags |= req->base.flags & keep;
- fbreq->src = req->src;
- fbreq->dst = req->dst;
- fbreq->slen = req->slen;
- fbreq->dlen = req->dlen;
-
- if (state->op == crypto_acomp_reqtfm(req)->compress)
+ if (comp)
err = crypto_acomp_compress(fbreq);
else
err = crypto_acomp_decompress(fbreq);
@@ -268,114 +234,74 @@ static int acomp_do_nondma(struct acomp_req_chain *state,
return err;
}
-static int acomp_do_one_req(struct acomp_req_chain *state,
- struct acomp_req *req)
+static int acomp_do_one_req(struct acomp_req *req, bool comp)
{
- state->cur = req;
-
if (acomp_request_isnondma(req))
- return acomp_do_nondma(state, req);
+ return acomp_do_nondma(req, comp);
acomp_virt_to_sg(req);
- return state->op(req);
+ return comp ? crypto_acomp_reqtfm(req)->compress(req) :
+ crypto_acomp_reqtfm(req)->decompress(req);
}
-static int acomp_reqchain_finish(struct acomp_req *req0, int err, u32 mask)
+static int acomp_reqchain_finish(struct acomp_req *req, int err)
{
- struct acomp_req_chain *state = req0->base.data;
- struct acomp_req *req = state->cur;
- struct acomp_req *n;
-
- acomp_reqchain_virt(state, err);
-
- if (req != req0)
- list_add_tail(&req->base.list, &req0->base.list);
-
- list_for_each_entry_safe(req, n, &state->head, base.list) {
- list_del_init(&req->base.list);
-
- req->base.flags &= mask;
- req->base.complete = acomp_reqchain_done;
- req->base.data = state;
-
- err = acomp_do_one_req(state, req);
-
- if (err == -EINPROGRESS) {
- if (!list_empty(&state->head))
- err = -EBUSY;
- goto out;
- }
-
- if (err == -EBUSY)
- goto out;
-
- acomp_reqchain_virt(state, err);
- list_add_tail(&req->base.list, &req0->base.list);
- }
-
- acomp_restore_req(req0);
-
-out:
+ acomp_reqchain_virt(req);
+ acomp_restore_req(req);
return err;
}
static void acomp_reqchain_done(void *data, int err)
{
- struct acomp_req_chain *state = data;
- crypto_completion_t compl = state->compl;
+ struct acomp_req *req = data;
+ crypto_completion_t compl;
- data = state->data;
+ compl = req->chain.compl;
+ data = req->chain.data;
- if (err == -EINPROGRESS) {
- if (!list_empty(&state->head))
- return;
+ if (err == -EINPROGRESS)
goto notify;
- }
- err = acomp_reqchain_finish(state->req0, err,
- CRYPTO_TFM_REQ_MAY_BACKLOG);
- if (err == -EBUSY)
- return;
+ err = acomp_reqchain_finish(req, err);
notify:
compl(data, err);
}
-static int acomp_do_req_chain(struct acomp_req *req,
- int (*op)(struct acomp_req *req))
+static int acomp_do_req_chain(struct acomp_req *req, bool comp)
{
- struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
- struct acomp_req_chain *state;
int err;
- if (crypto_acomp_req_chain(tfm) ||
- (!acomp_request_chained(req) && acomp_request_issg(req)))
- return op(req);
-
acomp_save_req(req, acomp_reqchain_done);
- state = req->base.data;
-
- state->op = op;
- state->src = NULL;
- INIT_LIST_HEAD(&state->head);
- list_splice_init(&req->base.list, &state->head);
- err = acomp_do_one_req(state, req);
+ err = acomp_do_one_req(req, comp);
if (err == -EBUSY || err == -EINPROGRESS)
- return -EBUSY;
+ return err;
- return acomp_reqchain_finish(req, err, ~0);
+ return acomp_reqchain_finish(req, err);
}
int crypto_acomp_compress(struct acomp_req *req)
{
- return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)->compress);
+ struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+
+ if (acomp_req_on_stack(req) && acomp_is_async(tfm))
+ return -EAGAIN;
+ if (crypto_acomp_req_virt(tfm) || acomp_request_issg(req))
+ return crypto_acomp_reqtfm(req)->compress(req);
+ return acomp_do_req_chain(req, true);
}
EXPORT_SYMBOL_GPL(crypto_acomp_compress);
int crypto_acomp_decompress(struct acomp_req *req)
{
- return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)->decompress);
+ struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+
+ if (acomp_req_on_stack(req) && acomp_is_async(tfm))
+ return -EAGAIN;
+ if (crypto_acomp_req_virt(tfm) || acomp_request_issg(req))
+ return crypto_acomp_reqtfm(req)->decompress(req);
+ return acomp_do_req_chain(req, false);
}
EXPORT_SYMBOL_GPL(crypto_acomp_decompress);
@@ -434,5 +360,229 @@ void crypto_unregister_acomps(struct acomp_alg *algs, int count)
}
EXPORT_SYMBOL_GPL(crypto_unregister_acomps);
+static void acomp_stream_workfn(struct work_struct *work)
+{
+ struct crypto_acomp_streams *s =
+ container_of(work, struct crypto_acomp_streams, stream_work);
+ struct crypto_acomp_stream __percpu *streams = s->streams;
+ int cpu;
+
+ for_each_cpu(cpu, &s->stream_want) {
+ struct crypto_acomp_stream *ps;
+ void *ctx;
+
+ ps = per_cpu_ptr(streams, cpu);
+ if (ps->ctx)
+ continue;
+
+ ctx = s->alloc_ctx();
+ if (IS_ERR(ctx))
+ break;
+
+ spin_lock_bh(&ps->lock);
+ ps->ctx = ctx;
+ spin_unlock_bh(&ps->lock);
+
+ cpumask_clear_cpu(cpu, &s->stream_want);
+ }
+}
+
+void crypto_acomp_free_streams(struct crypto_acomp_streams *s)
+{
+ struct crypto_acomp_stream __percpu *streams = s->streams;
+ void (*free_ctx)(void *);
+ int i;
+
+ s->streams = NULL;
+ if (!streams)
+ return;
+
+ cancel_work_sync(&s->stream_work);
+ free_ctx = s->free_ctx;
+
+ for_each_possible_cpu(i) {
+ struct crypto_acomp_stream *ps = per_cpu_ptr(streams, i);
+
+ if (!ps->ctx)
+ continue;
+
+ free_ctx(ps->ctx);
+ }
+
+ free_percpu(streams);
+}
+EXPORT_SYMBOL_GPL(crypto_acomp_free_streams);
+
+int crypto_acomp_alloc_streams(struct crypto_acomp_streams *s)
+{
+ struct crypto_acomp_stream __percpu *streams;
+ struct crypto_acomp_stream *ps;
+ unsigned int i;
+ void *ctx;
+
+ if (s->streams)
+ return 0;
+
+ streams = alloc_percpu(struct crypto_acomp_stream);
+ if (!streams)
+ return -ENOMEM;
+
+ ctx = s->alloc_ctx();
+ if (IS_ERR(ctx)) {
+ free_percpu(streams);
+ return PTR_ERR(ctx);
+ }
+
+ i = cpumask_first(cpu_possible_mask);
+ ps = per_cpu_ptr(streams, i);
+ ps->ctx = ctx;
+
+ for_each_possible_cpu(i) {
+ ps = per_cpu_ptr(streams, i);
+ spin_lock_init(&ps->lock);
+ }
+
+ s->streams = streams;
+
+ INIT_WORK(&s->stream_work, acomp_stream_workfn);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_acomp_alloc_streams);
+
+struct crypto_acomp_stream *crypto_acomp_lock_stream_bh(
+ struct crypto_acomp_streams *s) __acquires(stream)
+{
+ struct crypto_acomp_stream __percpu *streams = s->streams;
+ int cpu = raw_smp_processor_id();
+ struct crypto_acomp_stream *ps;
+
+ ps = per_cpu_ptr(streams, cpu);
+ spin_lock_bh(&ps->lock);
+ if (likely(ps->ctx))
+ return ps;
+ spin_unlock(&ps->lock);
+
+ cpumask_set_cpu(cpu, &s->stream_want);
+ schedule_work(&s->stream_work);
+
+ ps = per_cpu_ptr(streams, cpumask_first(cpu_possible_mask));
+ spin_lock(&ps->lock);
+ return ps;
+}
+EXPORT_SYMBOL_GPL(crypto_acomp_lock_stream_bh);
+
+void acomp_walk_done_src(struct acomp_walk *walk, int used)
+{
+ walk->slen -= used;
+ if ((walk->flags & ACOMP_WALK_SRC_LINEAR))
+ scatterwalk_advance(&walk->in, used);
+ else
+ scatterwalk_done_src(&walk->in, used);
+
+ if ((walk->flags & ACOMP_WALK_SLEEP))
+ cond_resched();
+}
+EXPORT_SYMBOL_GPL(acomp_walk_done_src);
+
+void acomp_walk_done_dst(struct acomp_walk *walk, int used)
+{
+ walk->dlen -= used;
+ if ((walk->flags & ACOMP_WALK_DST_LINEAR))
+ scatterwalk_advance(&walk->out, used);
+ else
+ scatterwalk_done_dst(&walk->out, used);
+
+ if ((walk->flags & ACOMP_WALK_SLEEP))
+ cond_resched();
+}
+EXPORT_SYMBOL_GPL(acomp_walk_done_dst);
+
+int acomp_walk_next_src(struct acomp_walk *walk)
+{
+ unsigned int slen = walk->slen;
+ unsigned int max = UINT_MAX;
+
+ if (!preempt_model_preemptible() && (walk->flags & ACOMP_WALK_SLEEP))
+ max = PAGE_SIZE;
+ if ((walk->flags & ACOMP_WALK_SRC_LINEAR)) {
+ walk->in.__addr = (void *)(((u8 *)walk->in.sg) +
+ walk->in.offset);
+ return min(slen, max);
+ }
+
+ return slen ? scatterwalk_next(&walk->in, slen) : 0;
+}
+EXPORT_SYMBOL_GPL(acomp_walk_next_src);
+
+int acomp_walk_next_dst(struct acomp_walk *walk)
+{
+ unsigned int dlen = walk->dlen;
+ unsigned int max = UINT_MAX;
+
+ if (!preempt_model_preemptible() && (walk->flags & ACOMP_WALK_SLEEP))
+ max = PAGE_SIZE;
+ if ((walk->flags & ACOMP_WALK_DST_LINEAR)) {
+ walk->out.__addr = (void *)(((u8 *)walk->out.sg) +
+ walk->out.offset);
+ return min(dlen, max);
+ }
+
+ return dlen ? scatterwalk_next(&walk->out, dlen) : 0;
+}
+EXPORT_SYMBOL_GPL(acomp_walk_next_dst);
+
+int acomp_walk_virt(struct acomp_walk *__restrict walk,
+ struct acomp_req *__restrict req, bool atomic)
+{
+ struct scatterlist *src = req->src;
+ struct scatterlist *dst = req->dst;
+
+ walk->slen = req->slen;
+ walk->dlen = req->dlen;
+
+ if (!walk->slen || !walk->dlen)
+ return -EINVAL;
+
+ walk->flags = 0;
+ if ((req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) && !atomic)
+ walk->flags |= ACOMP_WALK_SLEEP;
+ if ((req->base.flags & CRYPTO_ACOMP_REQ_SRC_VIRT))
+ walk->flags |= ACOMP_WALK_SRC_LINEAR;
+ if ((req->base.flags & CRYPTO_ACOMP_REQ_DST_VIRT))
+ walk->flags |= ACOMP_WALK_DST_LINEAR;
+
+ if ((walk->flags & ACOMP_WALK_SRC_LINEAR)) {
+ walk->in.sg = (void *)req->svirt;
+ walk->in.offset = 0;
+ } else
+ scatterwalk_start(&walk->in, src);
+ if ((walk->flags & ACOMP_WALK_DST_LINEAR)) {
+ walk->out.sg = (void *)req->dvirt;
+ walk->out.offset = 0;
+ } else
+ scatterwalk_start(&walk->out, dst);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(acomp_walk_virt);
+
+struct acomp_req *acomp_request_clone(struct acomp_req *req,
+ size_t total, gfp_t gfp)
+{
+ struct acomp_req *nreq;
+
+ nreq = container_of(crypto_request_clone(&req->base, total, gfp),
+ struct acomp_req, base);
+ if (nreq == req)
+ return req;
+
+ if (req->src == &req->chain.ssg)
+ nreq->src = &nreq->chain.ssg;
+ if (req->dst == &req->chain.dsg)
+ nreq->dst = &nreq->chain.dsg;
+ return nreq;
+}
+EXPORT_SYMBOL_GPL(acomp_request_clone);
+
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Asynchronous compression type");
diff --git a/crypto/adiantum.c b/crypto/adiantum.c
index c3ef583598b4..a6bca877c3c7 100644
--- a/crypto/adiantum.c
+++ b/crypto/adiantum.c
@@ -639,7 +639,7 @@ static void __exit adiantum_module_exit(void)
crypto_unregister_template(&adiantum_tmpl);
}
-subsys_initcall(adiantum_module_init);
+module_init(adiantum_module_init);
module_exit(adiantum_module_exit);
MODULE_DESCRIPTION("Adiantum length-preserving encryption mode");
diff --git a/crypto/aead.c b/crypto/aead.c
index 12f5b42171af..5d14b775036e 100644
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -186,6 +186,7 @@ static const struct crypto_type crypto_aead_type = {
.maskset = CRYPTO_ALG_TYPE_MASK,
.type = CRYPTO_ALG_TYPE_AEAD,
.tfmsize = offsetof(struct crypto_aead, base),
+ .algsize = offsetof(struct aead_alg, base),
};
int crypto_grab_aead(struct crypto_aead_spawn *spawn,
diff --git a/crypto/aegis128-core.c b/crypto/aegis128-core.c
index 72f6ee1345ef..ca80d861345d 100644
--- a/crypto/aegis128-core.c
+++ b/crypto/aegis128-core.c
@@ -566,7 +566,7 @@ static void __exit crypto_aegis128_module_exit(void)
crypto_unregister_aead(&crypto_aegis128_alg_generic);
}
-subsys_initcall(crypto_aegis128_module_init);
+module_init(crypto_aegis128_module_init);
module_exit(crypto_aegis128_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index 3c66d425c97b..85d2e78c8ef2 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -1311,7 +1311,7 @@ static void __exit aes_fini(void)
crypto_unregister_alg(&aes_alg);
}
-subsys_initcall(aes_init);
+module_init(aes_init);
module_exit(aes_fini);
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
diff --git a/crypto/ahash.c b/crypto/ahash.c
index 2d9eec2b2b1c..e10bc2659ae4 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -18,7 +18,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/string.h>
@@ -42,26 +42,46 @@ struct crypto_hash_walk {
struct scatterlist *sg;
};
-struct ahash_save_req_state {
- struct list_head head;
- struct ahash_request *req0;
- struct ahash_request *cur;
- int (*op)(struct ahash_request *req);
+static int ahash_def_finup(struct ahash_request *req);
+
+static inline bool crypto_ahash_block_only(struct crypto_ahash *tfm)
+{
+ return crypto_ahash_alg(tfm)->halg.base.cra_flags &
+ CRYPTO_AHASH_ALG_BLOCK_ONLY;
+}
+
+static inline bool crypto_ahash_final_nonzero(struct crypto_ahash *tfm)
+{
+ return crypto_ahash_alg(tfm)->halg.base.cra_flags &
+ CRYPTO_AHASH_ALG_FINAL_NONZERO;
+}
+
+static inline bool crypto_ahash_need_fallback(struct crypto_ahash *tfm)
+{
+ return crypto_ahash_alg(tfm)->halg.base.cra_flags &
+ CRYPTO_ALG_NEED_FALLBACK;
+}
+
+static inline void ahash_op_done(void *data, int err,
+ int (*finish)(struct ahash_request *, int))
+{
+ struct ahash_request *areq = data;
crypto_completion_t compl;
- void *data;
- struct scatterlist sg;
- const u8 *src;
- u8 *page;
- unsigned int offset;
- unsigned int nbytes;
-};
-static void ahash_reqchain_done(void *data, int err);
-static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt);
-static void ahash_restore_req(struct ahash_request *req);
-static void ahash_def_finup_done1(void *data, int err);
-static int ahash_def_finup_finish1(struct ahash_request *req, int err);
-static int ahash_def_finup(struct ahash_request *req);
+ compl = areq->saved_complete;
+ data = areq->saved_data;
+ if (err == -EINPROGRESS)
+ goto out;
+
+ areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ err = finish(areq, err);
+ if (err == -EINPROGRESS || err == -EBUSY)
+ return;
+
+out:
+ compl(data, err);
+}
static int hash_walk_next(struct crypto_hash_walk *walk)
{
@@ -266,7 +286,6 @@ static int crypto_init_ahash_using_shash(struct crypto_tfm *tfm)
crypto_ahash_set_flags(crt, crypto_shash_get_flags(shash) &
CRYPTO_TFM_NEED_KEY);
- crt->reqsize = sizeof(struct shash_desc) + crypto_shash_descsize(shash);
return 0;
}
@@ -303,6 +322,9 @@ int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
int err;
err = alg->setkey(tfm, key, keylen);
+ if (!err && crypto_ahash_need_fallback(tfm))
+ err = crypto_ahash_setkey(crypto_ahash_fb(tfm),
+ key, keylen);
if (unlikely(err)) {
ahash_set_needkey(tfm, alg);
return err;
@@ -313,421 +335,261 @@ int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
}
EXPORT_SYMBOL_GPL(crypto_ahash_setkey);
-static bool ahash_request_hasvirt(struct ahash_request *req)
-{
- return ahash_request_isvirt(req);
-}
-
-static int ahash_reqchain_virt(struct ahash_save_req_state *state,
- int err, u32 mask)
-{
- struct ahash_request *req = state->cur;
-
- for (;;) {
- unsigned len = state->nbytes;
-
- req->base.err = err;
-
- if (!state->offset)
- break;
-
- if (state->offset == len || err) {
- u8 *result = req->result;
-
- ahash_request_set_virt(req, state->src, result, len);
- state->offset = 0;
- break;
- }
-
- len -= state->offset;
-
- len = min(PAGE_SIZE, len);
- memcpy(state->page, state->src + state->offset, len);
- state->offset += len;
- req->nbytes = len;
-
- err = state->op(req);
- if (err == -EINPROGRESS) {
- if (!list_empty(&state->head) ||
- state->offset < state->nbytes)
- err = -EBUSY;
- break;
- }
-
- if (err == -EBUSY)
- break;
- }
-
- return err;
-}
-
-static int ahash_reqchain_finish(struct ahash_request *req0,
- struct ahash_save_req_state *state,
- int err, u32 mask)
-{
- struct ahash_request *req = state->cur;
- struct crypto_ahash *tfm;
- struct ahash_request *n;
- bool update;
- u8 *page;
-
- err = ahash_reqchain_virt(state, err, mask);
- if (err == -EINPROGRESS || err == -EBUSY)
- goto out;
-
- if (req != req0)
- list_add_tail(&req->base.list, &req0->base.list);
-
- tfm = crypto_ahash_reqtfm(req);
- update = state->op == crypto_ahash_alg(tfm)->update;
-
- list_for_each_entry_safe(req, n, &state->head, base.list) {
- list_del_init(&req->base.list);
-
- req->base.flags &= mask;
- req->base.complete = ahash_reqchain_done;
- req->base.data = state;
- state->cur = req;
-
- if (update && ahash_request_isvirt(req) && req->nbytes) {
- unsigned len = req->nbytes;
- u8 *result = req->result;
-
- state->src = req->svirt;
- state->nbytes = len;
-
- len = min(PAGE_SIZE, len);
-
- memcpy(state->page, req->svirt, len);
- state->offset = len;
-
- ahash_request_set_crypt(req, &state->sg, result, len);
- }
-
- err = state->op(req);
-
- if (err == -EINPROGRESS) {
- if (!list_empty(&state->head) ||
- state->offset < state->nbytes)
- err = -EBUSY;
- goto out;
- }
-
- if (err == -EBUSY)
- goto out;
-
- err = ahash_reqchain_virt(state, err, mask);
- if (err == -EINPROGRESS || err == -EBUSY)
- goto out;
-
- list_add_tail(&req->base.list, &req0->base.list);
- }
-
- page = state->page;
- if (page) {
- memset(page, 0, PAGE_SIZE);
- free_page((unsigned long)page);
- }
- ahash_restore_req(req0);
-
-out:
- return err;
-}
-
-static void ahash_reqchain_done(void *data, int err)
-{
- struct ahash_save_req_state *state = data;
- crypto_completion_t compl = state->compl;
-
- data = state->data;
-
- if (err == -EINPROGRESS) {
- if (!list_empty(&state->head) || state->offset < state->nbytes)
- return;
- goto notify;
- }
-
- err = ahash_reqchain_finish(state->req0, state, err,
- CRYPTO_TFM_REQ_MAY_BACKLOG);
- if (err == -EBUSY)
- return;
-
-notify:
- compl(data, err);
-}
-
static int ahash_do_req_chain(struct ahash_request *req,
- int (*op)(struct ahash_request *req))
+ int (*const *op)(struct ahash_request *req))
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- bool update = op == crypto_ahash_alg(tfm)->update;
- struct ahash_save_req_state *state;
- struct ahash_save_req_state state0;
- u8 *page = NULL;
int err;
- if (crypto_ahash_req_chain(tfm) ||
- (!ahash_request_chained(req) &&
- (!update || !ahash_request_isvirt(req))))
- return op(req);
-
- if (update && ahash_request_hasvirt(req)) {
- gfp_t gfp;
- u32 flags;
-
- flags = ahash_request_flags(req);
- gfp = (flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
- GFP_KERNEL : GFP_ATOMIC;
- page = (void *)__get_free_page(gfp);
- err = -ENOMEM;
- if (!page)
- goto out_set_chain;
- }
-
- state = &state0;
- if (ahash_is_async(tfm)) {
- err = ahash_save_req(req, ahash_reqchain_done);
- if (err)
- goto out_free_page;
+ if (crypto_ahash_req_virt(tfm) || !ahash_request_isvirt(req))
+ return (*op)(req);
- state = req->base.data;
- }
+ if (crypto_ahash_statesize(tfm) > HASH_MAX_STATESIZE)
+ return -ENOSYS;
- state->op = op;
- state->cur = req;
- state->page = page;
- state->offset = 0;
- state->nbytes = 0;
- INIT_LIST_HEAD(&state->head);
+ {
+ u8 state[HASH_MAX_STATESIZE];
- if (page)
- sg_init_one(&state->sg, page, PAGE_SIZE);
+ if (op == &crypto_ahash_alg(tfm)->digest) {
+ ahash_request_set_tfm(req, crypto_ahash_fb(tfm));
+ err = crypto_ahash_digest(req);
+ goto out_no_state;
+ }
- if (update && ahash_request_isvirt(req) && req->nbytes) {
- unsigned len = req->nbytes;
- u8 *result = req->result;
+ err = crypto_ahash_export(req, state);
+ ahash_request_set_tfm(req, crypto_ahash_fb(tfm));
+ err = err ?: crypto_ahash_import(req, state);
- state->src = req->svirt;
- state->nbytes = len;
+ if (op == &crypto_ahash_alg(tfm)->finup) {
+ err = err ?: crypto_ahash_finup(req);
+ goto out_no_state;
+ }
- len = min(PAGE_SIZE, len);
+ err = err ?:
+ crypto_ahash_update(req) ?:
+ crypto_ahash_export(req, state);
- memcpy(page, req->svirt, len);
- state->offset = len;
+ ahash_request_set_tfm(req, tfm);
+ return err ?: crypto_ahash_import(req, state);
- ahash_request_set_crypt(req, &state->sg, result, len);
+out_no_state:
+ ahash_request_set_tfm(req, tfm);
+ return err;
}
-
- err = op(req);
- if (err == -EBUSY || err == -EINPROGRESS)
- return -EBUSY;
-
- return ahash_reqchain_finish(req, state, err, ~0);
-
-out_free_page:
- free_page((unsigned long)page);
-
-out_set_chain:
- req->base.err = err;
- return err;
}
int crypto_ahash_init(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- if (likely(tfm->using_shash)) {
- int err;
-
- err = crypto_shash_init(prepare_shash_desc(req, tfm));
- req->base.err = err;
- return err;
- }
-
+ if (likely(tfm->using_shash))
+ return crypto_shash_init(prepare_shash_desc(req, tfm));
if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
return -ENOKEY;
+ if (ahash_req_on_stack(req) && ahash_is_async(tfm))
+ return -EAGAIN;
+ if (crypto_ahash_block_only(tfm)) {
+ u8 *buf = ahash_request_ctx(req);
- return ahash_do_req_chain(req, crypto_ahash_alg(tfm)->init);
+ buf += crypto_ahash_reqsize(tfm) - 1;
+ *buf = 0;
+ }
+ return crypto_ahash_alg(tfm)->init(req);
}
EXPORT_SYMBOL_GPL(crypto_ahash_init);
-static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt)
+static void ahash_save_req(struct ahash_request *req, crypto_completion_t cplt)
{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct ahash_save_req_state *state;
- gfp_t gfp;
- u32 flags;
-
- if (!ahash_is_async(tfm))
- return 0;
-
- flags = ahash_request_flags(req);
- gfp = (flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC;
- state = kmalloc(sizeof(*state), gfp);
- if (!state)
- return -ENOMEM;
-
- state->compl = req->base.complete;
- state->data = req->base.data;
+ req->saved_complete = req->base.complete;
+ req->saved_data = req->base.data;
req->base.complete = cplt;
- req->base.data = state;
- state->req0 = req;
-
- return 0;
+ req->base.data = req;
}
static void ahash_restore_req(struct ahash_request *req)
{
- struct ahash_save_req_state *state;
- struct crypto_ahash *tfm;
-
- tfm = crypto_ahash_reqtfm(req);
- if (!ahash_is_async(tfm))
- return;
-
- state = req->base.data;
-
- req->base.complete = state->compl;
- req->base.data = state->data;
- kfree(state);
+ req->base.complete = req->saved_complete;
+ req->base.data = req->saved_data;
}
-int crypto_ahash_update(struct ahash_request *req)
+static int ahash_update_finish(struct ahash_request *req, int err)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-
- if (likely(tfm->using_shash)) {
- int err;
-
- err = shash_ahash_update(req, ahash_request_ctx(req));
- req->base.err = err;
- return err;
+ bool nonzero = crypto_ahash_final_nonzero(tfm);
+ int bs = crypto_ahash_blocksize(tfm);
+ u8 *blenp = ahash_request_ctx(req);
+ int blen;
+ u8 *buf;
+
+ blenp += crypto_ahash_reqsize(tfm) - 1;
+ blen = *blenp;
+ buf = blenp - bs;
+
+ if (blen) {
+ req->src = req->sg_head + 1;
+ if (sg_is_chain(req->src))
+ req->src = sg_chain_ptr(req->src);
}
- return ahash_do_req_chain(req, crypto_ahash_alg(tfm)->update);
-}
-EXPORT_SYMBOL_GPL(crypto_ahash_update);
+ req->nbytes += nonzero - blen;
-int crypto_ahash_final(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ blen = err < 0 ? 0 : err + nonzero;
+ if (ahash_request_isvirt(req))
+ memcpy(buf, req->svirt + req->nbytes - blen, blen);
+ else
+ memcpy_from_sglist(buf, req->src, req->nbytes - blen, blen);
+ *blenp = blen;
- if (likely(tfm->using_shash)) {
- int err;
+ ahash_restore_req(req);
- err = crypto_shash_final(ahash_request_ctx(req), req->result);
- req->base.err = err;
- return err;
- }
+ return err;
+}
- return ahash_do_req_chain(req, crypto_ahash_alg(tfm)->final);
+static void ahash_update_done(void *data, int err)
+{
+ ahash_op_done(data, err, ahash_update_finish);
}
-EXPORT_SYMBOL_GPL(crypto_ahash_final);
-int crypto_ahash_finup(struct ahash_request *req)
+int crypto_ahash_update(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ bool nonzero = crypto_ahash_final_nonzero(tfm);
+ int bs = crypto_ahash_blocksize(tfm);
+ u8 *blenp = ahash_request_ctx(req);
+ int blen, err;
+ u8 *buf;
- if (likely(tfm->using_shash)) {
- int err;
-
- err = shash_ahash_finup(req, ahash_request_ctx(req));
- req->base.err = err;
- return err;
- }
+ if (likely(tfm->using_shash))
+ return shash_ahash_update(req, ahash_request_ctx(req));
+ if (ahash_req_on_stack(req) && ahash_is_async(tfm))
+ return -EAGAIN;
+ if (!crypto_ahash_block_only(tfm))
+ return ahash_do_req_chain(req, &crypto_ahash_alg(tfm)->update);
- if (!crypto_ahash_alg(tfm)->finup ||
- (!crypto_ahash_req_chain(tfm) && ahash_request_hasvirt(req)))
- return ahash_def_finup(req);
+ blenp += crypto_ahash_reqsize(tfm) - 1;
+ blen = *blenp;
+ buf = blenp - bs;
- return ahash_do_req_chain(req, crypto_ahash_alg(tfm)->finup);
-}
-EXPORT_SYMBOL_GPL(crypto_ahash_finup);
+ if (blen + req->nbytes < bs + nonzero) {
+ if (ahash_request_isvirt(req))
+ memcpy(buf + blen, req->svirt, req->nbytes);
+ else
+ memcpy_from_sglist(buf + blen, req->src, 0,
+ req->nbytes);
-static int ahash_def_digest_finish(struct ahash_request *req, int err)
-{
- struct crypto_ahash *tfm;
+ *blenp += req->nbytes;
+ return 0;
+ }
- if (err)
- goto out;
+ if (blen) {
+ memset(req->sg_head, 0, sizeof(req->sg_head[0]));
+ sg_set_buf(req->sg_head, buf, blen);
+ if (req->src != req->sg_head + 1)
+ sg_chain(req->sg_head, 2, req->src);
+ req->src = req->sg_head;
+ req->nbytes += blen;
+ }
+ req->nbytes -= nonzero;
- tfm = crypto_ahash_reqtfm(req);
- if (ahash_is_async(tfm))
- req->base.complete = ahash_def_finup_done1;
+ ahash_save_req(req, ahash_update_done);
- err = crypto_ahash_update(req);
+ err = ahash_do_req_chain(req, &crypto_ahash_alg(tfm)->update);
if (err == -EINPROGRESS || err == -EBUSY)
return err;
- return ahash_def_finup_finish1(req, err);
-
-out:
- ahash_restore_req(req);
- return err;
+ return ahash_update_finish(req, err);
}
+EXPORT_SYMBOL_GPL(crypto_ahash_update);
-static void ahash_def_digest_done(void *data, int err)
+static int ahash_finup_finish(struct ahash_request *req, int err)
{
- struct ahash_save_req_state *state0 = data;
- struct ahash_save_req_state state;
- struct ahash_request *areq;
-
- state = *state0;
- areq = state.req0;
- if (err == -EINPROGRESS)
- goto out;
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ u8 *blenp = ahash_request_ctx(req);
+ int blen;
+
+ blenp += crypto_ahash_reqsize(tfm) - 1;
+ blen = *blenp;
+
+ if (blen) {
+ if (sg_is_last(req->src))
+ req->src = NULL;
+ else {
+ req->src = req->sg_head + 1;
+ if (sg_is_chain(req->src))
+ req->src = sg_chain_ptr(req->src);
+ }
+ req->nbytes -= blen;
+ }
- areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_restore_req(req);
- err = ahash_def_digest_finish(areq, err);
- if (err == -EINPROGRESS || err == -EBUSY)
- return;
+ return err;
+}
-out:
- state.compl(state.data, err);
+static void ahash_finup_done(void *data, int err)
+{
+ ahash_op_done(data, err, ahash_finup_finish);
}
-static int ahash_def_digest(struct ahash_request *req)
+int crypto_ahash_finup(struct ahash_request *req)
{
- int err;
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ int bs = crypto_ahash_blocksize(tfm);
+ u8 *blenp = ahash_request_ctx(req);
+ int blen, err;
+ u8 *buf;
- err = ahash_save_req(req, ahash_def_digest_done);
- if (err)
- return err;
+ if (likely(tfm->using_shash))
+ return shash_ahash_finup(req, ahash_request_ctx(req));
+ if (ahash_req_on_stack(req) && ahash_is_async(tfm))
+ return -EAGAIN;
+ if (!crypto_ahash_alg(tfm)->finup)
+ return ahash_def_finup(req);
+ if (!crypto_ahash_block_only(tfm))
+ return ahash_do_req_chain(req, &crypto_ahash_alg(tfm)->finup);
+
+ blenp += crypto_ahash_reqsize(tfm) - 1;
+ blen = *blenp;
+ buf = blenp - bs;
+
+ if (blen) {
+ memset(req->sg_head, 0, sizeof(req->sg_head[0]));
+ sg_set_buf(req->sg_head, buf, blen);
+ if (!req->src)
+ sg_mark_end(req->sg_head);
+ else if (req->src != req->sg_head + 1)
+ sg_chain(req->sg_head, 2, req->src);
+ req->src = req->sg_head;
+ req->nbytes += blen;
+ }
- err = crypto_ahash_init(req);
+ ahash_save_req(req, ahash_finup_done);
+
+ err = ahash_do_req_chain(req, &crypto_ahash_alg(tfm)->finup);
if (err == -EINPROGRESS || err == -EBUSY)
return err;
- return ahash_def_digest_finish(req, err);
+ return ahash_finup_finish(req, err);
}
+EXPORT_SYMBOL_GPL(crypto_ahash_finup);
int crypto_ahash_digest(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- if (likely(tfm->using_shash)) {
- int err;
-
- err = shash_ahash_digest(req, prepare_shash_desc(req, tfm));
- req->base.err = err;
- return err;
- }
-
- if (!crypto_ahash_req_chain(tfm) && ahash_request_hasvirt(req))
- return ahash_def_digest(req);
-
+ if (likely(tfm->using_shash))
+ return shash_ahash_digest(req, prepare_shash_desc(req, tfm));
+ if (ahash_req_on_stack(req) && ahash_is_async(tfm))
+ return -EAGAIN;
if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
return -ENOKEY;
-
- return ahash_do_req_chain(req, crypto_ahash_alg(tfm)->digest);
+ return ahash_do_req_chain(req, &crypto_ahash_alg(tfm)->digest);
}
EXPORT_SYMBOL_GPL(crypto_ahash_digest);
static void ahash_def_finup_done2(void *data, int err)
{
- struct ahash_save_req_state *state = data;
- struct ahash_request *areq = state->req0;
+ struct ahash_request *areq = data;
if (err == -EINPROGRESS)
return;
@@ -738,14 +600,10 @@ static void ahash_def_finup_done2(void *data, int err)
static int ahash_def_finup_finish1(struct ahash_request *req, int err)
{
- struct crypto_ahash *tfm;
-
if (err)
goto out;
- tfm = crypto_ahash_reqtfm(req);
- if (ahash_is_async(tfm))
- req->base.complete = ahash_def_finup_done2;
+ req->base.complete = ahash_def_finup_done2;
err = crypto_ahash_final(req);
if (err == -EINPROGRESS || err == -EBUSY)
@@ -758,32 +616,14 @@ out:
static void ahash_def_finup_done1(void *data, int err)
{
- struct ahash_save_req_state *state0 = data;
- struct ahash_save_req_state state;
- struct ahash_request *areq;
-
- state = *state0;
- areq = state.req0;
- if (err == -EINPROGRESS)
- goto out;
-
- areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- err = ahash_def_finup_finish1(areq, err);
- if (err == -EINPROGRESS || err == -EBUSY)
- return;
-
-out:
- state.compl(state.data, err);
+ ahash_op_done(data, err, ahash_def_finup_finish1);
}
static int ahash_def_finup(struct ahash_request *req)
{
int err;
- err = ahash_save_req(req, ahash_def_finup_done1);
- if (err)
- return err;
+ ahash_save_req(req, ahash_def_finup_done1);
err = crypto_ahash_update(req);
if (err == -EINPROGRESS || err == -EBUSY)
@@ -792,16 +632,47 @@ static int ahash_def_finup(struct ahash_request *req)
return ahash_def_finup_finish1(req, err);
}
+int crypto_ahash_export_core(struct ahash_request *req, void *out)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+
+ if (likely(tfm->using_shash))
+ return crypto_shash_export_core(ahash_request_ctx(req), out);
+ return crypto_ahash_alg(tfm)->export_core(req, out);
+}
+EXPORT_SYMBOL_GPL(crypto_ahash_export_core);
+
int crypto_ahash_export(struct ahash_request *req, void *out)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
if (likely(tfm->using_shash))
return crypto_shash_export(ahash_request_ctx(req), out);
+ if (crypto_ahash_block_only(tfm)) {
+ unsigned int plen = crypto_ahash_blocksize(tfm) + 1;
+ unsigned int reqsize = crypto_ahash_reqsize(tfm);
+ unsigned int ss = crypto_ahash_statesize(tfm);
+ u8 *buf = ahash_request_ctx(req);
+
+ memcpy(out + ss - plen, buf + reqsize - plen, plen);
+ }
return crypto_ahash_alg(tfm)->export(req, out);
}
EXPORT_SYMBOL_GPL(crypto_ahash_export);
+int crypto_ahash_import_core(struct ahash_request *req, const void *in)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+
+ if (likely(tfm->using_shash))
+ return crypto_shash_import_core(prepare_shash_desc(req, tfm),
+ in);
+ if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+ return -ENOKEY;
+ return crypto_ahash_alg(tfm)->import_core(req, in);
+}
+EXPORT_SYMBOL_GPL(crypto_ahash_import_core);
+
int crypto_ahash_import(struct ahash_request *req, const void *in)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
@@ -810,6 +681,12 @@ int crypto_ahash_import(struct ahash_request *req, const void *in)
return crypto_shash_import(prepare_shash_desc(req, tfm), in);
if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
return -ENOKEY;
+ if (crypto_ahash_block_only(tfm)) {
+ unsigned int reqsize = crypto_ahash_reqsize(tfm);
+ u8 *buf = ahash_request_ctx(req);
+
+ buf[reqsize - 1] = 0;
+ }
return crypto_ahash_alg(tfm)->import(req, in);
}
EXPORT_SYMBOL_GPL(crypto_ahash_import);
@@ -819,26 +696,73 @@ static void crypto_ahash_exit_tfm(struct crypto_tfm *tfm)
struct crypto_ahash *hash = __crypto_ahash_cast(tfm);
struct ahash_alg *alg = crypto_ahash_alg(hash);
- alg->exit_tfm(hash);
+ if (alg->exit_tfm)
+ alg->exit_tfm(hash);
+ else if (tfm->__crt_alg->cra_exit)
+ tfm->__crt_alg->cra_exit(tfm);
+
+ if (crypto_ahash_need_fallback(hash))
+ crypto_free_ahash(crypto_ahash_fb(hash));
}
static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
{
struct crypto_ahash *hash = __crypto_ahash_cast(tfm);
struct ahash_alg *alg = crypto_ahash_alg(hash);
+ struct crypto_ahash *fb = NULL;
+ int err;
crypto_ahash_set_statesize(hash, alg->halg.statesize);
- crypto_ahash_set_reqsize(hash, alg->reqsize);
+ crypto_ahash_set_reqsize(hash, crypto_tfm_alg_reqsize(tfm));
if (tfm->__crt_alg->cra_type == &crypto_shash_type)
return crypto_init_ahash_using_shash(tfm);
+ if (crypto_ahash_need_fallback(hash)) {
+ fb = crypto_alloc_ahash(crypto_ahash_alg_name(hash),
+ CRYPTO_ALG_REQ_VIRT,
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_REQ_VIRT |
+ CRYPTO_AHASH_ALG_NO_EXPORT_CORE);
+ if (IS_ERR(fb))
+ return PTR_ERR(fb);
+
+ tfm->fb = crypto_ahash_tfm(fb);
+ }
+
ahash_set_needkey(hash, alg);
- if (alg->exit_tfm)
- tfm->exit = crypto_ahash_exit_tfm;
+ tfm->exit = crypto_ahash_exit_tfm;
+
+ if (alg->init_tfm)
+ err = alg->init_tfm(hash);
+ else if (tfm->__crt_alg->cra_init)
+ err = tfm->__crt_alg->cra_init(tfm);
+ else
+ return 0;
+
+ if (err)
+ goto out_free_sync_hash;
+
+ if (!ahash_is_async(hash) && crypto_ahash_reqsize(hash) >
+ MAX_SYNC_HASH_REQSIZE)
+ goto out_exit_tfm;
- return alg->init_tfm ? alg->init_tfm(hash) : 0;
+ BUILD_BUG_ON(HASH_MAX_DESCSIZE > MAX_SYNC_HASH_REQSIZE);
+ if (crypto_ahash_reqsize(hash) < HASH_MAX_DESCSIZE)
+ crypto_ahash_set_reqsize(hash, HASH_MAX_DESCSIZE);
+
+ return 0;
+
+out_exit_tfm:
+ if (alg->exit_tfm)
+ alg->exit_tfm(hash);
+ else if (tfm->__crt_alg->cra_exit)
+ tfm->__crt_alg->cra_exit(tfm);
+ err = -EINVAL;
+out_free_sync_hash:
+ crypto_free_ahash(fb);
+ return err;
}
static unsigned int crypto_ahash_extsize(struct crypto_alg *alg)
@@ -897,6 +821,7 @@ static const struct crypto_type crypto_ahash_type = {
.maskset = CRYPTO_ALG_TYPE_AHASH_MASK,
.type = CRYPTO_ALG_TYPE_AHASH,
.tfmsize = offsetof(struct crypto_ahash, base),
+ .algsize = offsetof(struct ahash_alg, halg.base),
};
int crypto_grab_ahash(struct crypto_ahash_spawn *spawn,
@@ -921,7 +846,7 @@ int crypto_has_ahash(const char *alg_name, u32 type, u32 mask)
}
EXPORT_SYMBOL_GPL(crypto_has_ahash);
-static bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg)
+bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg)
{
struct crypto_alg *alg = &halg->base;
@@ -930,11 +855,13 @@ static bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg)
return __crypto_ahash_alg(alg)->setkey != ahash_nosetkey;
}
+EXPORT_SYMBOL_GPL(crypto_hash_alg_has_setkey);
struct crypto_ahash *crypto_clone_ahash(struct crypto_ahash *hash)
{
struct hash_alg_common *halg = crypto_hash_alg_common(hash);
struct crypto_tfm *tfm = crypto_ahash_tfm(hash);
+ struct crypto_ahash *fb = NULL;
struct crypto_ahash *nhash;
struct ahash_alg *alg;
int err;
@@ -964,28 +891,52 @@ struct crypto_ahash *crypto_clone_ahash(struct crypto_ahash *hash)
err = PTR_ERR(shash);
goto out_free_nhash;
}
+ crypto_ahash_tfm(nhash)->exit = crypto_exit_ahash_using_shash;
nhash->using_shash = true;
*nctx = shash;
return nhash;
}
+ if (crypto_ahash_need_fallback(hash)) {
+ fb = crypto_clone_ahash(crypto_ahash_fb(hash));
+ err = PTR_ERR(fb);
+ if (IS_ERR(fb))
+ goto out_free_nhash;
+
+ crypto_ahash_tfm(nhash)->fb = crypto_ahash_tfm(fb);
+ }
+
err = -ENOSYS;
alg = crypto_ahash_alg(hash);
if (!alg->clone_tfm)
- goto out_free_nhash;
+ goto out_free_fb;
err = alg->clone_tfm(nhash, hash);
if (err)
- goto out_free_nhash;
+ goto out_free_fb;
+
+ crypto_ahash_tfm(nhash)->exit = crypto_ahash_exit_tfm;
return nhash;
+out_free_fb:
+ crypto_free_ahash(fb);
out_free_nhash:
crypto_free_ahash(nhash);
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(crypto_clone_ahash);
+static int ahash_default_export_core(struct ahash_request *req, void *out)
+{
+ return -ENOSYS;
+}
+
+static int ahash_default_import_core(struct ahash_request *req, const void *in)
+{
+ return -ENOSYS;
+}
+
static int ahash_prepare_alg(struct ahash_alg *alg)
{
struct crypto_alg *base = &alg->halg.base;
@@ -994,7 +945,11 @@ static int ahash_prepare_alg(struct ahash_alg *alg)
if (alg->halg.statesize == 0)
return -EINVAL;
- if (alg->reqsize && alg->reqsize < alg->halg.statesize)
+ if (base->cra_reqsize && base->cra_reqsize < alg->halg.statesize)
+ return -EINVAL;
+
+ if (!(base->cra_flags & CRYPTO_ALG_ASYNC) &&
+ base->cra_reqsize > MAX_SYNC_HASH_REQSIZE)
return -EINVAL;
err = hash_prepare_alg(&alg->halg);
@@ -1004,9 +959,28 @@ static int ahash_prepare_alg(struct ahash_alg *alg)
base->cra_type = &crypto_ahash_type;
base->cra_flags |= CRYPTO_ALG_TYPE_AHASH;
+ if ((base->cra_flags ^ CRYPTO_ALG_REQ_VIRT) &
+ (CRYPTO_ALG_ASYNC | CRYPTO_ALG_REQ_VIRT))
+ base->cra_flags |= CRYPTO_ALG_NEED_FALLBACK;
+
if (!alg->setkey)
alg->setkey = ahash_nosetkey;
+ if (base->cra_flags & CRYPTO_AHASH_ALG_BLOCK_ONLY) {
+ BUILD_BUG_ON(MAX_ALGAPI_BLOCKSIZE >= 256);
+ if (!alg->finup)
+ return -EINVAL;
+
+ base->cra_reqsize += base->cra_blocksize + 1;
+ alg->halg.statesize += base->cra_blocksize + 1;
+ alg->export_core = alg->export;
+ alg->import_core = alg->import;
+ } else if (!alg->export_core || !alg->import_core) {
+ alg->export_core = ahash_default_export_core;
+ alg->import_core = ahash_default_import_core;
+ base->cra_flags |= CRYPTO_AHASH_ALG_NO_EXPORT_CORE;
+ }
+
return 0;
}
@@ -1074,5 +1048,42 @@ int ahash_register_instance(struct crypto_template *tmpl,
}
EXPORT_SYMBOL_GPL(ahash_register_instance);
+void ahash_request_free(struct ahash_request *req)
+{
+ if (unlikely(!req))
+ return;
+
+ if (!ahash_req_on_stack(req)) {
+ kfree(req);
+ return;
+ }
+
+ ahash_request_zero(req);
+}
+EXPORT_SYMBOL_GPL(ahash_request_free);
+
+int crypto_hash_digest(struct crypto_ahash *tfm, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ HASH_REQUEST_ON_STACK(req, crypto_ahash_fb(tfm));
+ int err;
+
+ ahash_request_set_callback(req, 0, NULL, NULL);
+ ahash_request_set_virt(req, data, out, len);
+ err = crypto_ahash_digest(req);
+
+ ahash_request_zero(req);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(crypto_hash_digest);
+
+void ahash_free_singlespawn_instance(struct ahash_instance *inst)
+{
+ crypto_drop_spawn(ahash_instance_ctx(inst));
+ kfree(inst);
+}
+EXPORT_SYMBOL_GPL(ahash_free_singlespawn_instance);
+
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Asynchronous cryptographic hash type");
diff --git a/crypto/akcipher.c b/crypto/akcipher.c
index 72c82d9aa077..a36f50c83827 100644
--- a/crypto/akcipher.c
+++ b/crypto/akcipher.c
@@ -97,6 +97,7 @@ static const struct crypto_type crypto_akcipher_type = {
.maskset = CRYPTO_ALG_TYPE_AHASH_MASK,
.type = CRYPTO_ALG_TYPE_AKCIPHER,
.tfmsize = offsetof(struct crypto_akcipher, base),
+ .algsize = offsetof(struct akcipher_alg, base),
};
int crypto_grab_akcipher(struct crypto_akcipher_spawn *spawn,
diff --git a/crypto/algapi.c b/crypto/algapi.c
index ea9ed9580aa8..e604d0d8b7b4 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -71,12 +71,23 @@ static void crypto_free_instance(struct crypto_instance *inst)
static void crypto_destroy_instance_workfn(struct work_struct *w)
{
- struct crypto_instance *inst = container_of(w, struct crypto_instance,
+ struct crypto_template *tmpl = container_of(w, struct crypto_template,
free_work);
- struct crypto_template *tmpl = inst->tmpl;
+ struct crypto_instance *inst;
+ struct hlist_node *n;
+ HLIST_HEAD(list);
+
+ down_write(&crypto_alg_sem);
+ hlist_for_each_entry_safe(inst, n, &tmpl->dead, list) {
+ if (refcount_read(&inst->alg.cra_refcnt) != -1)
+ continue;
+ hlist_del(&inst->list);
+ hlist_add_head(&inst->list, &list);
+ }
+ up_write(&crypto_alg_sem);
- crypto_free_instance(inst);
- crypto_tmpl_put(tmpl);
+ hlist_for_each_entry_safe(inst, n, &list, list)
+ crypto_free_instance(inst);
}
static void crypto_destroy_instance(struct crypto_alg *alg)
@@ -84,9 +95,10 @@ static void crypto_destroy_instance(struct crypto_alg *alg)
struct crypto_instance *inst = container_of(alg,
struct crypto_instance,
alg);
+ struct crypto_template *tmpl = inst->tmpl;
- INIT_WORK(&inst->free_work, crypto_destroy_instance_workfn);
- schedule_work(&inst->free_work);
+ refcount_set(&alg->cra_refcnt, -1);
+ schedule_work(&tmpl->free_work);
}
/*
@@ -132,14 +144,16 @@ static void crypto_remove_instance(struct crypto_instance *inst,
inst->alg.cra_flags |= CRYPTO_ALG_DEAD;
- if (!tmpl || !crypto_tmpl_get(tmpl))
+ if (!tmpl)
return;
- list_move(&inst->alg.cra_list, list);
+ list_del_init(&inst->alg.cra_list);
hlist_del(&inst->list);
- inst->alg.cra_destroy = crypto_destroy_instance;
+ hlist_add_head(&inst->list, &tmpl->dead);
BUG_ON(!list_empty(&inst->alg.cra_users));
+
+ crypto_alg_put(&inst->alg);
}
/*
@@ -260,8 +274,7 @@ static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg)
{
struct crypto_larval *larval;
- if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER) ||
- IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) ||
+ if (!IS_ENABLED(CONFIG_CRYPTO_SELFTESTS) ||
(alg->cra_flags & CRYPTO_ALG_INTERNAL))
return NULL; /* No self-test needed */
@@ -404,6 +417,15 @@ void crypto_remove_final(struct list_head *list)
}
EXPORT_SYMBOL_GPL(crypto_remove_final);
+static void crypto_free_alg(struct crypto_alg *alg)
+{
+ unsigned int algsize = alg->cra_type->algsize;
+ u8 *p = (u8 *)alg - algsize;
+
+ crypto_destroy_alg(alg);
+ kfree(p);
+}
+
int crypto_register_alg(struct crypto_alg *alg)
{
struct crypto_larval *larval;
@@ -416,6 +438,19 @@ int crypto_register_alg(struct crypto_alg *alg)
if (err)
return err;
+ if (alg->cra_flags & CRYPTO_ALG_DUP_FIRST &&
+ !WARN_ON_ONCE(alg->cra_destroy)) {
+ unsigned int algsize = alg->cra_type->algsize;
+ u8 *p = (u8 *)alg - algsize;
+
+ p = kmemdup(p, algsize + sizeof(*alg), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ alg = (void *)(p + algsize);
+ alg->cra_destroy = crypto_free_alg;
+ }
+
down_write(&crypto_alg_sem);
larval = __crypto_register_alg(alg, &algs_to_put);
if (!IS_ERR_OR_NULL(larval)) {
@@ -424,8 +459,10 @@ int crypto_register_alg(struct crypto_alg *alg)
}
up_write(&crypto_alg_sem);
- if (IS_ERR(larval))
+ if (IS_ERR(larval)) {
+ crypto_alg_put(alg);
return PTR_ERR(larval);
+ }
if (test_started)
crypto_schedule_test(larval);
@@ -461,11 +498,9 @@ void crypto_unregister_alg(struct crypto_alg *alg)
if (WARN(ret, "Algorithm %s is not registered", alg->cra_driver_name))
return;
- if (WARN_ON(refcount_read(&alg->cra_refcnt) != 1))
- return;
-
- crypto_alg_put(alg);
+ WARN_ON(!alg->cra_destroy && refcount_read(&alg->cra_refcnt) != 1);
+ list_add(&alg->cra_list, &list);
crypto_remove_final(&list);
}
EXPORT_SYMBOL_GPL(crypto_unregister_alg);
@@ -504,6 +539,8 @@ int crypto_register_template(struct crypto_template *tmpl)
struct crypto_template *q;
int err = -EEXIST;
+ INIT_WORK(&tmpl->free_work, crypto_destroy_instance_workfn);
+
down_write(&crypto_alg_sem);
crypto_check_module_sig(tmpl->module);
@@ -565,6 +602,8 @@ void crypto_unregister_template(struct crypto_template *tmpl)
crypto_free_instance(inst);
}
crypto_remove_final(&users);
+
+ flush_work(&tmpl->free_work);
}
EXPORT_SYMBOL_GPL(crypto_unregister_template);
@@ -618,6 +657,7 @@ int crypto_register_instance(struct crypto_template *tmpl,
inst->alg.cra_module = tmpl->module;
inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE;
+ inst->alg.cra_destroy = crypto_destroy_instance;
down_write(&crypto_alg_sem);
@@ -883,20 +923,20 @@ const char *crypto_attr_alg_name(struct rtattr *rta)
}
EXPORT_SYMBOL_GPL(crypto_attr_alg_name);
-int crypto_inst_setname(struct crypto_instance *inst, const char *name,
- struct crypto_alg *alg)
+int __crypto_inst_setname(struct crypto_instance *inst, const char *name,
+ const char *driver, struct crypto_alg *alg)
{
if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", name,
alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
return -ENAMETOOLONG;
if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
- name, alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+ driver, alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
return -ENAMETOOLONG;
return 0;
}
-EXPORT_SYMBOL_GPL(crypto_inst_setname);
+EXPORT_SYMBOL_GPL(__crypto_inst_setname);
void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen)
{
@@ -1018,7 +1058,7 @@ static void __init crypto_start_tests(void)
if (!IS_BUILTIN(CONFIG_CRYPTO_ALGAPI))
return;
- if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
+ if (!IS_ENABLED(CONFIG_CRYPTO_SELFTESTS))
return;
set_crypto_boot_test_finished();
diff --git a/crypto/algboss.c b/crypto/algboss.c
index a20926bfd34e..846f586889ee 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -189,7 +189,7 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg)
struct task_struct *thread;
struct crypto_test_param *param;
- if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
+ if (!IS_ENABLED(CONFIG_CRYPTO_SELFTESTS))
return NOTIFY_DONE;
if (!try_module_get(THIS_MODULE))
@@ -247,13 +247,7 @@ static void __exit cryptomgr_exit(void)
BUG_ON(err);
}
-/*
- * This is arch_initcall() so that the crypto self-tests are run on algorithms
- * registered early by subsys_initcall(). subsys_initcall() is needed for
- * generic implementations so that they're available for comparison tests when
- * other implementations are registered later by module_init().
- */
-arch_initcall(cryptomgr_init);
+module_init(cryptomgr_init);
module_exit(cryptomgr_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 7d58cbbce4af..79b016a899a1 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -27,7 +27,6 @@
#include <crypto/scatterwalk.h>
#include <crypto/if_alg.h>
#include <crypto/skcipher.h>
-#include <crypto/null.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/kernel.h>
@@ -36,19 +35,13 @@
#include <linux/net.h>
#include <net/sock.h>
-struct aead_tfm {
- struct crypto_aead *aead;
- struct crypto_sync_skcipher *null_tfm;
-};
-
static inline bool aead_sufficient_data(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct sock *psk = ask->parent;
struct alg_sock *pask = alg_sk(psk);
struct af_alg_ctx *ctx = ask->private;
- struct aead_tfm *aeadc = pask->private;
- struct crypto_aead *tfm = aeadc->aead;
+ struct crypto_aead *tfm = pask->private;
unsigned int as = crypto_aead_authsize(tfm);
/*
@@ -64,27 +57,12 @@ static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
struct alg_sock *ask = alg_sk(sk);
struct sock *psk = ask->parent;
struct alg_sock *pask = alg_sk(psk);
- struct aead_tfm *aeadc = pask->private;
- struct crypto_aead *tfm = aeadc->aead;
+ struct crypto_aead *tfm = pask->private;
unsigned int ivsize = crypto_aead_ivsize(tfm);
return af_alg_sendmsg(sock, msg, size, ivsize);
}
-static int crypto_aead_copy_sgl(struct crypto_sync_skcipher *null_tfm,
- struct scatterlist *src,
- struct scatterlist *dst, unsigned int len)
-{
- SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
-
- skcipher_request_set_sync_tfm(skreq, null_tfm);
- skcipher_request_set_callback(skreq, CRYPTO_TFM_REQ_MAY_SLEEP,
- NULL, NULL);
- skcipher_request_set_crypt(skreq, src, dst, len, NULL);
-
- return crypto_skcipher_encrypt(skreq);
-}
-
static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
size_t ignored, int flags)
{
@@ -93,9 +71,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
struct sock *psk = ask->parent;
struct alg_sock *pask = alg_sk(psk);
struct af_alg_ctx *ctx = ask->private;
- struct aead_tfm *aeadc = pask->private;
- struct crypto_aead *tfm = aeadc->aead;
- struct crypto_sync_skcipher *null_tfm = aeadc->null_tfm;
+ struct crypto_aead *tfm = pask->private;
unsigned int i, as = crypto_aead_authsize(tfm);
struct af_alg_async_req *areq;
struct af_alg_tsgl *tsgl, *tmp;
@@ -223,11 +199,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
* v v
* RX SGL: AAD || PT || Tag
*/
- err = crypto_aead_copy_sgl(null_tfm, tsgl_src,
- areq->first_rsgl.sgl.sgt.sgl,
- processed);
- if (err)
- goto free;
+ memcpy_sglist(areq->first_rsgl.sgl.sgt.sgl, tsgl_src,
+ processed);
af_alg_pull_tsgl(sk, processed, NULL, 0);
} else {
/*
@@ -241,12 +214,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
* RX SGL: AAD || CT ----+
*/
- /* Copy AAD || CT to RX SGL buffer for in-place operation. */
- err = crypto_aead_copy_sgl(null_tfm, tsgl_src,
- areq->first_rsgl.sgl.sgt.sgl,
- outlen);
- if (err)
- goto free;
+ /* Copy AAD || CT to RX SGL buffer for in-place operation. */
+ memcpy_sglist(areq->first_rsgl.sgl.sgt.sgl, tsgl_src, outlen);
/* Create TX SGL for tag and chain it to RX SGL. */
areq->tsgl_entries = af_alg_count_tsgl(sk, processed,
@@ -379,7 +348,7 @@ static int aead_check_key(struct socket *sock)
int err = 0;
struct sock *psk;
struct alg_sock *pask;
- struct aead_tfm *tfm;
+ struct crypto_aead *tfm;
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
@@ -393,7 +362,7 @@ static int aead_check_key(struct socket *sock)
err = -ENOKEY;
lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
- if (crypto_aead_get_flags(tfm->aead) & CRYPTO_TFM_NEED_KEY)
+ if (crypto_aead_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
goto unlock;
atomic_dec(&pask->nokey_refcnt);
@@ -454,54 +423,22 @@ static struct proto_ops algif_aead_ops_nokey = {
static void *aead_bind(const char *name, u32 type, u32 mask)
{
- struct aead_tfm *tfm;
- struct crypto_aead *aead;
- struct crypto_sync_skcipher *null_tfm;
-
- tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
- if (!tfm)
- return ERR_PTR(-ENOMEM);
-
- aead = crypto_alloc_aead(name, type, mask);
- if (IS_ERR(aead)) {
- kfree(tfm);
- return ERR_CAST(aead);
- }
-
- null_tfm = crypto_get_default_null_skcipher();
- if (IS_ERR(null_tfm)) {
- crypto_free_aead(aead);
- kfree(tfm);
- return ERR_CAST(null_tfm);
- }
-
- tfm->aead = aead;
- tfm->null_tfm = null_tfm;
-
- return tfm;
+ return crypto_alloc_aead(name, type, mask);
}
static void aead_release(void *private)
{
- struct aead_tfm *tfm = private;
-
- crypto_free_aead(tfm->aead);
- crypto_put_default_null_skcipher();
- kfree(tfm);
+ crypto_free_aead(private);
}
static int aead_setauthsize(void *private, unsigned int authsize)
{
- struct aead_tfm *tfm = private;
-
- return crypto_aead_setauthsize(tfm->aead, authsize);
+ return crypto_aead_setauthsize(private, authsize);
}
static int aead_setkey(void *private, const u8 *key, unsigned int keylen)
{
- struct aead_tfm *tfm = private;
-
- return crypto_aead_setkey(tfm->aead, key, keylen);
+ return crypto_aead_setkey(private, key, keylen);
}
static void aead_sock_destruct(struct sock *sk)
@@ -510,8 +447,7 @@ static void aead_sock_destruct(struct sock *sk)
struct af_alg_ctx *ctx = ask->private;
struct sock *psk = ask->parent;
struct alg_sock *pask = alg_sk(psk);
- struct aead_tfm *aeadc = pask->private;
- struct crypto_aead *tfm = aeadc->aead;
+ struct crypto_aead *tfm = pask->private;
unsigned int ivlen = crypto_aead_ivsize(tfm);
af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
@@ -524,10 +460,9 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk)
{
struct af_alg_ctx *ctx;
struct alg_sock *ask = alg_sk(sk);
- struct aead_tfm *tfm = private;
- struct crypto_aead *aead = tfm->aead;
+ struct crypto_aead *tfm = private;
unsigned int len = sizeof(*ctx);
- unsigned int ivlen = crypto_aead_ivsize(aead);
+ unsigned int ivlen = crypto_aead_ivsize(tfm);
ctx = sock_kmalloc(sk, len, GFP_KERNEL);
if (!ctx)
@@ -554,9 +489,9 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk)
static int aead_accept_parent(void *private, struct sock *sk)
{
- struct aead_tfm *tfm = private;
+ struct crypto_aead *tfm = private;
- if (crypto_aead_get_flags(tfm->aead) & CRYPTO_TFM_NEED_KEY)
+ if (crypto_aead_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
return -ENOKEY;
return aead_accept_parent_nokey(private, sk);
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 5498a87249d3..e3f1a4852737 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -265,10 +265,6 @@ static int hash_accept(struct socket *sock, struct socket *newsock,
goto out_free_state;
err = crypto_ahash_import(&ctx2->req, state);
- if (err) {
- sock_orphan(sk2);
- sock_put(sk2);
- }
out_free_state:
kfree_sensitive(state);
diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c
index 64f57c4c4b06..153523ce6076 100644
--- a/crypto/ansi_cprng.c
+++ b/crypto/ansi_cprng.c
@@ -467,7 +467,7 @@ MODULE_DESCRIPTION("Software Pseudo Random Number Generator");
MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>");
module_param(dbg, int, 0);
MODULE_PARM_DESC(dbg, "Boolean to enable debugging (0/1 == off/on)");
-subsys_initcall(prng_mod_init);
+module_init(prng_mod_init);
module_exit(prng_mod_fini);
MODULE_ALIAS_CRYPTO("stdrng");
MODULE_ALIAS_CRYPTO("ansi_cprng");
diff --git a/crypto/anubis.c b/crypto/anubis.c
index 886e7c913688..4268c3833baa 100644
--- a/crypto/anubis.c
+++ b/crypto/anubis.c
@@ -694,7 +694,7 @@ static void __exit anubis_mod_fini(void)
crypto_unregister_alg(&anubis_alg);
}
-subsys_initcall(anubis_mod_init);
+module_init(anubis_mod_init);
module_exit(anubis_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/api.c b/crypto/api.c
index 3416e98128a0..5724d62e9d07 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -31,8 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_sem);
BLOCKING_NOTIFIER_HEAD(crypto_chain);
EXPORT_SYMBOL_GPL(crypto_chain);
-#if IS_BUILTIN(CONFIG_CRYPTO_ALGAPI) && \
- !IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)
+#if IS_BUILTIN(CONFIG_CRYPTO_ALGAPI) && IS_ENABLED(CONFIG_CRYPTO_SELFTESTS)
DEFINE_STATIC_KEY_FALSE(__crypto_boot_test_finished);
#endif
@@ -220,10 +219,19 @@ again:
if (crypto_is_test_larval(larval))
crypto_larval_kill(larval);
alg = ERR_PTR(-ETIMEDOUT);
- } else if (!alg) {
+ } else if (!alg || PTR_ERR(alg) == -EEXIST) {
+ int err = alg ? -EEXIST : -EAGAIN;
+
+ /*
+ * EEXIST is expected because two probes can be scheduled
+ * at the same time with one using alg_name and the other
+ * using driver_name. Do a re-lookup but do not retry in
+ * case we hit a quirk like gcm_base(ctr(aes),...) which
+ * will never match.
+ */
alg = &larval->alg;
alg = crypto_alg_lookup(alg->cra_name, type, mask) ?:
- ERR_PTR(-EAGAIN);
+ ERR_PTR(err);
} else if (IS_ERR(alg))
;
else if (crypto_is_test_larval(larval) &&
@@ -528,6 +536,7 @@ void *crypto_create_tfm_node(struct crypto_alg *alg,
goto out;
tfm = (struct crypto_tfm *)(mem + frontend->tfmsize);
+ tfm->fb = tfm;
err = frontend->init_tfm(tfm);
if (err)
@@ -569,7 +578,7 @@ void *crypto_clone_tfm(const struct crypto_type *frontend,
tfm = (struct crypto_tfm *)(mem + frontend->tfmsize);
tfm->crt_flags = otfm->crt_flags;
- tfm->exit = otfm->exit;
+ tfm->fb = tfm;
out:
return mem;
@@ -707,11 +716,27 @@ void crypto_destroy_alg(struct crypto_alg *alg)
{
if (alg->cra_type && alg->cra_type->destroy)
alg->cra_type->destroy(alg);
-
if (alg->cra_destroy)
alg->cra_destroy(alg);
}
EXPORT_SYMBOL_GPL(crypto_destroy_alg);
+struct crypto_async_request *crypto_request_clone(
+ struct crypto_async_request *req, size_t total, gfp_t gfp)
+{
+ struct crypto_tfm *tfm = req->tfm;
+ struct crypto_async_request *nreq;
+
+ nreq = kmemdup(req, total, gfp);
+ if (!nreq) {
+ req->tfm = tfm->fb;
+ return req;
+ }
+
+ nreq->flags &= ~CRYPTO_TFM_REQ_ON_STACK;
+ return nreq;
+}
+EXPORT_SYMBOL_GPL(crypto_request_clone);
+
MODULE_DESCRIPTION("Cryptographic core API");
MODULE_LICENSE("GPL");
diff --git a/crypto/arc4.c b/crypto/arc4.c
index 1a4825c97c5a..1608018111d0 100644
--- a/crypto/arc4.c
+++ b/crypto/arc4.c
@@ -73,7 +73,7 @@ static void __exit arc4_exit(void)
crypto_unregister_lskcipher(&arc4_alg);
}
-subsys_initcall(arc4_init);
+module_init(arc4_init);
module_exit(arc4_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/aria_generic.c b/crypto/aria_generic.c
index bd359d3313c2..faa7900383f6 100644
--- a/crypto/aria_generic.c
+++ b/crypto/aria_generic.c
@@ -304,7 +304,7 @@ static void __exit aria_fini(void)
crypto_unregister_alg(&aria_alg);
}
-subsys_initcall(aria_init);
+module_init(aria_init);
module_exit(aria_fini);
MODULE_DESCRIPTION("ARIA Cipher Algorithm");
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index bf165d321440..e5b177c8e842 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -163,10 +163,8 @@ static u8 *pkey_pack_u32(u8 *dst, u32 val)
static int software_key_query(const struct kernel_pkey_params *params,
struct kernel_pkey_query *info)
{
- struct crypto_akcipher *tfm;
struct public_key *pkey = params->key->payload.data[asym_crypto];
char alg_name[CRYPTO_MAX_ALG_NAME];
- struct crypto_sig *sig;
u8 *key, *ptr;
int ret, len;
bool issig;
@@ -188,7 +186,11 @@ static int software_key_query(const struct kernel_pkey_params *params,
ptr = pkey_pack_u32(ptr, pkey->paramlen);
memcpy(ptr, pkey->params, pkey->paramlen);
+ memset(info, 0, sizeof(*info));
+
if (issig) {
+ struct crypto_sig *sig;
+
sig = crypto_alloc_sig(alg_name, 0, 0);
if (IS_ERR(sig)) {
ret = PTR_ERR(sig);
@@ -200,9 +202,10 @@ static int software_key_query(const struct kernel_pkey_params *params,
else
ret = crypto_sig_set_pubkey(sig, key, pkey->keylen);
if (ret < 0)
- goto error_free_tfm;
+ goto error_free_sig;
len = crypto_sig_keysize(sig);
+ info->key_size = len;
info->max_sig_size = crypto_sig_maxsize(sig);
info->max_data_size = crypto_sig_digestsize(sig);
@@ -211,11 +214,19 @@ static int software_key_query(const struct kernel_pkey_params *params,
info->supported_ops |= KEYCTL_SUPPORTS_SIGN;
if (strcmp(params->encoding, "pkcs1") == 0) {
+ info->max_enc_size = len / BITS_PER_BYTE;
+ info->max_dec_size = len / BITS_PER_BYTE;
+
info->supported_ops |= KEYCTL_SUPPORTS_ENCRYPT;
if (pkey->key_is_private)
info->supported_ops |= KEYCTL_SUPPORTS_DECRYPT;
}
+
+error_free_sig:
+ crypto_free_sig(sig);
} else {
+ struct crypto_akcipher *tfm;
+
tfm = crypto_alloc_akcipher(alg_name, 0, 0);
if (IS_ERR(tfm)) {
ret = PTR_ERR(tfm);
@@ -227,28 +238,23 @@ static int software_key_query(const struct kernel_pkey_params *params,
else
ret = crypto_akcipher_set_pub_key(tfm, key, pkey->keylen);
if (ret < 0)
- goto error_free_tfm;
+ goto error_free_akcipher;
len = crypto_akcipher_maxsize(tfm);
+ info->key_size = len * BITS_PER_BYTE;
info->max_sig_size = len;
info->max_data_size = len;
+ info->max_enc_size = len;
+ info->max_dec_size = len;
info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT;
if (pkey->key_is_private)
info->supported_ops |= KEYCTL_SUPPORTS_DECRYPT;
- }
-
- info->key_size = len * 8;
- info->max_enc_size = len;
- info->max_dec_size = len;
-
- ret = 0;
-error_free_tfm:
- if (issig)
- crypto_free_sig(sig);
- else
+error_free_akcipher:
crypto_free_akcipher(tfm);
+ }
+
error_free_key:
kfree_sensitive(key);
pr_devel("<==%s() = %d\n", __func__, ret);
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index ee2fdab42334..2ffe4ae90bea 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -372,10 +372,9 @@ static int x509_fabricate_name(struct x509_parse_context *ctx, size_t hdrlen,
/* Empty name string if no material */
if (!ctx->cn_size && !ctx->o_size && !ctx->email_size) {
- buffer = kmalloc(1, GFP_KERNEL);
+ buffer = kzalloc(1, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
- buffer[0] = 0;
goto done;
}
diff --git a/crypto/authenc.c b/crypto/authenc.c
index 3aaf3ab4e360..a723769c8777 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -9,7 +9,6 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
#include <crypto/authenc.h>
-#include <crypto/null.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/init.h>
@@ -28,7 +27,6 @@ struct authenc_instance_ctx {
struct crypto_authenc_ctx {
struct crypto_ahash *auth;
struct crypto_skcipher *enc;
- struct crypto_sync_skcipher *null;
};
struct authenc_request_ctx {
@@ -170,21 +168,6 @@ out:
authenc_request_complete(areq, err);
}
-static int crypto_authenc_copy_assoc(struct aead_request *req)
-{
- struct crypto_aead *authenc = crypto_aead_reqtfm(req);
- struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
- SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
-
- skcipher_request_set_sync_tfm(skreq, ctx->null);
- skcipher_request_set_callback(skreq, aead_request_flags(req),
- NULL, NULL);
- skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen,
- NULL);
-
- return crypto_skcipher_encrypt(skreq);
-}
-
static int crypto_authenc_encrypt(struct aead_request *req)
{
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
@@ -203,10 +186,7 @@ static int crypto_authenc_encrypt(struct aead_request *req)
dst = src;
if (req->src != req->dst) {
- err = crypto_authenc_copy_assoc(req);
- if (err)
- return err;
-
+ memcpy_sglist(req->dst, req->src, req->assoclen);
dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen);
}
@@ -303,7 +283,6 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm);
struct crypto_ahash *auth;
struct crypto_skcipher *enc;
- struct crypto_sync_skcipher *null;
int err;
auth = crypto_spawn_ahash(&ictx->auth);
@@ -315,14 +294,8 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
if (IS_ERR(enc))
goto err_free_ahash;
- null = crypto_get_default_null_skcipher();
- err = PTR_ERR(null);
- if (IS_ERR(null))
- goto err_free_skcipher;
-
ctx->auth = auth;
ctx->enc = enc;
- ctx->null = null;
crypto_aead_set_reqsize(
tfm,
@@ -336,8 +309,6 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
return 0;
-err_free_skcipher:
- crypto_free_skcipher(enc);
err_free_ahash:
crypto_free_ahash(auth);
return err;
@@ -349,7 +320,6 @@ static void crypto_authenc_exit_tfm(struct crypto_aead *tfm)
crypto_free_ahash(ctx->auth);
crypto_free_skcipher(ctx->enc);
- crypto_put_default_null_skcipher();
}
static void crypto_authenc_free(struct aead_instance *inst)
@@ -451,7 +421,7 @@ static void __exit crypto_authenc_module_exit(void)
crypto_unregister_template(&crypto_authenc_tmpl);
}
-subsys_initcall(crypto_authenc_module_init);
+module_init(crypto_authenc_module_init);
module_exit(crypto_authenc_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 2cc933e2f790..d1bf0fda3f2e 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -12,7 +12,6 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
#include <crypto/authenc.h>
-#include <crypto/null.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/init.h>
@@ -31,7 +30,6 @@ struct crypto_authenc_esn_ctx {
unsigned int reqoff;
struct crypto_ahash *auth;
struct crypto_skcipher *enc;
- struct crypto_sync_skcipher *null;
};
struct authenc_esn_request_ctx {
@@ -158,20 +156,6 @@ static void crypto_authenc_esn_encrypt_done(void *data, int err)
authenc_esn_request_complete(areq, err);
}
-static int crypto_authenc_esn_copy(struct aead_request *req, unsigned int len)
-{
- struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req);
- struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn);
- SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
-
- skcipher_request_set_sync_tfm(skreq, ctx->null);
- skcipher_request_set_callback(skreq, aead_request_flags(req),
- NULL, NULL);
- skcipher_request_set_crypt(skreq, req->src, req->dst, len, NULL);
-
- return crypto_skcipher_encrypt(skreq);
-}
-
static int crypto_authenc_esn_encrypt(struct aead_request *req)
{
struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req);
@@ -190,10 +174,7 @@ static int crypto_authenc_esn_encrypt(struct aead_request *req)
dst = src;
if (req->src != req->dst) {
- err = crypto_authenc_esn_copy(req, assoclen);
- if (err)
- return err;
-
+ memcpy_sglist(req->dst, req->src, assoclen);
sg_init_table(areq_ctx->dst, 2);
dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, assoclen);
}
@@ -277,11 +258,8 @@ static int crypto_authenc_esn_decrypt(struct aead_request *req)
cryptlen -= authsize;
- if (req->src != dst) {
- err = crypto_authenc_esn_copy(req, assoclen + cryptlen);
- if (err)
- return err;
- }
+ if (req->src != dst)
+ memcpy_sglist(dst, req->src, assoclen + cryptlen);
scatterwalk_map_and_copy(ihash, req->src, assoclen + cryptlen,
authsize, 0);
@@ -317,7 +295,6 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm);
struct crypto_ahash *auth;
struct crypto_skcipher *enc;
- struct crypto_sync_skcipher *null;
int err;
auth = crypto_spawn_ahash(&ictx->auth);
@@ -329,14 +306,8 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
if (IS_ERR(enc))
goto err_free_ahash;
- null = crypto_get_default_null_skcipher();
- err = PTR_ERR(null);
- if (IS_ERR(null))
- goto err_free_skcipher;
-
ctx->auth = auth;
ctx->enc = enc;
- ctx->null = null;
ctx->reqoff = 2 * crypto_ahash_digestsize(auth);
@@ -352,8 +323,6 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
return 0;
-err_free_skcipher:
- crypto_free_skcipher(enc);
err_free_ahash:
crypto_free_ahash(auth);
return err;
@@ -365,7 +334,6 @@ static void crypto_authenc_esn_exit_tfm(struct crypto_aead *tfm)
crypto_free_ahash(ctx->auth);
crypto_free_skcipher(ctx->enc);
- crypto_put_default_null_skcipher();
}
static void crypto_authenc_esn_free(struct aead_instance *inst)
@@ -465,7 +433,7 @@ static void __exit crypto_authenc_esn_module_exit(void)
crypto_unregister_template(&crypto_authenc_esn_tmpl);
}
-subsys_initcall(crypto_authenc_esn_module_init);
+module_init(crypto_authenc_esn_module_init);
module_exit(crypto_authenc_esn_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/blake2b_generic.c b/crypto/blake2b_generic.c
index 04a712ddfb43..60f056217510 100644
--- a/crypto/blake2b_generic.c
+++ b/crypto/blake2b_generic.c
@@ -15,12 +15,12 @@
* More information about BLAKE2 can be found at https://blake2.net.
*/
-#include <linux/unaligned.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/bitops.h>
#include <crypto/internal/blake2b.h>
#include <crypto/internal/hash.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
static const u8 blake2b_sigma[12][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
@@ -111,8 +111,8 @@ static void blake2b_compress_one_generic(struct blake2b_state *S,
#undef G
#undef ROUND
-void blake2b_compress_generic(struct blake2b_state *state,
- const u8 *block, size_t nblocks, u32 inc)
+static void blake2b_compress_generic(struct blake2b_state *state,
+ const u8 *block, size_t nblocks, u32 inc)
{
do {
blake2b_increment_counter(state, inc);
@@ -120,17 +120,19 @@ void blake2b_compress_generic(struct blake2b_state *state,
block += BLAKE2B_BLOCK_SIZE;
} while (--nblocks);
}
-EXPORT_SYMBOL(blake2b_compress_generic);
static int crypto_blake2b_update_generic(struct shash_desc *desc,
const u8 *in, unsigned int inlen)
{
- return crypto_blake2b_update(desc, in, inlen, blake2b_compress_generic);
+ return crypto_blake2b_update_bo(desc, in, inlen,
+ blake2b_compress_generic);
}
-static int crypto_blake2b_final_generic(struct shash_desc *desc, u8 *out)
+static int crypto_blake2b_finup_generic(struct shash_desc *desc, const u8 *in,
+ unsigned int inlen, u8 *out)
{
- return crypto_blake2b_final(desc, out, blake2b_compress_generic);
+ return crypto_blake2b_finup(desc, in, inlen, out,
+ blake2b_compress_generic);
}
#define BLAKE2B_ALG(name, driver_name, digest_size) \
@@ -138,7 +140,9 @@ static int crypto_blake2b_final_generic(struct shash_desc *desc, u8 *out)
.base.cra_name = name, \
.base.cra_driver_name = driver_name, \
.base.cra_priority = 100, \
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY | \
+ CRYPTO_AHASH_ALG_BLOCK_ONLY | \
+ CRYPTO_AHASH_ALG_FINAL_NONZERO, \
.base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \
.base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \
.base.cra_module = THIS_MODULE, \
@@ -146,8 +150,9 @@ static int crypto_blake2b_final_generic(struct shash_desc *desc, u8 *out)
.setkey = crypto_blake2b_setkey, \
.init = crypto_blake2b_init, \
.update = crypto_blake2b_update_generic, \
- .final = crypto_blake2b_final_generic, \
- .descsize = sizeof(struct blake2b_state), \
+ .finup = crypto_blake2b_finup_generic, \
+ .descsize = BLAKE2B_DESC_SIZE, \
+ .statesize = BLAKE2B_STATE_SIZE, \
}
static struct shash_alg blake2b_algs[] = {
@@ -171,7 +176,7 @@ static void __exit blake2b_mod_fini(void)
crypto_unregister_shashes(blake2b_algs, ARRAY_SIZE(blake2b_algs));
}
-subsys_initcall(blake2b_mod_init);
+module_init(blake2b_mod_init);
module_exit(blake2b_mod_fini);
MODULE_AUTHOR("David Sterba <kdave@kernel.org>");
diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c
index 0146bc762c09..f3c5f9b09850 100644
--- a/crypto/blowfish_generic.c
+++ b/crypto/blowfish_generic.c
@@ -124,7 +124,7 @@ static void __exit blowfish_mod_fini(void)
crypto_unregister_alg(&alg);
}
-subsys_initcall(blowfish_mod_init);
+module_init(blowfish_mod_init);
module_exit(blowfish_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c
index 197fcf3abc89..ee4336a04b93 100644
--- a/crypto/camellia_generic.c
+++ b/crypto/camellia_generic.c
@@ -1064,7 +1064,7 @@ static void __exit camellia_fini(void)
crypto_unregister_alg(&camellia_alg);
}
-subsys_initcall(camellia_init);
+module_init(camellia_init);
module_exit(camellia_fini);
MODULE_DESCRIPTION("Camellia Cipher Algorithm");
diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c
index f3e57775fa02..f68330793e0c 100644
--- a/crypto/cast5_generic.c
+++ b/crypto/cast5_generic.c
@@ -531,7 +531,7 @@ static void __exit cast5_mod_fini(void)
crypto_unregister_alg(&alg);
}
-subsys_initcall(cast5_mod_init);
+module_init(cast5_mod_init);
module_exit(cast5_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c
index 11b725b12f27..4c08c42646f0 100644
--- a/crypto/cast6_generic.c
+++ b/crypto/cast6_generic.c
@@ -271,7 +271,7 @@ static void __exit cast6_mod_fini(void)
crypto_unregister_alg(&alg);
}
-subsys_initcall(cast6_mod_init);
+module_init(cast6_mod_init);
module_exit(cast6_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/cbc.c b/crypto/cbc.c
index e81918ca68b7..ed3df6246765 100644
--- a/crypto/cbc.c
+++ b/crypto/cbc.c
@@ -179,7 +179,7 @@ static void __exit crypto_cbc_module_exit(void)
crypto_unregister_template(&crypto_cbc_tmpl);
}
-subsys_initcall(crypto_cbc_module_init);
+module_init(crypto_cbc_module_init);
module_exit(crypto_cbc_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/ccm.c b/crypto/ccm.c
index 06476b53b491..2ae929ffdef8 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -10,11 +10,12 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
+#include <crypto/utils.h>
#include <linux/err.h>
-#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/string.h>
struct ccm_instance_ctx {
struct crypto_skcipher_spawn ctr;
@@ -54,11 +55,6 @@ struct cbcmac_tfm_ctx {
struct crypto_cipher *child;
};
-struct cbcmac_desc_ctx {
- unsigned int len;
- u8 dg[];
-};
-
static inline struct crypto_ccm_req_priv_ctx *crypto_ccm_reqctx(
struct aead_request *req)
{
@@ -783,12 +779,10 @@ static int crypto_cbcmac_digest_setkey(struct crypto_shash *parent,
static int crypto_cbcmac_digest_init(struct shash_desc *pdesc)
{
- struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
int bs = crypto_shash_digestsize(pdesc->tfm);
+ u8 *dg = shash_desc_ctx(pdesc);
- ctx->len = 0;
- memset(ctx->dg, 0, bs);
-
+ memset(dg, 0, bs);
return 0;
}
@@ -797,39 +791,34 @@ static int crypto_cbcmac_digest_update(struct shash_desc *pdesc, const u8 *p,
{
struct crypto_shash *parent = pdesc->tfm;
struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
- struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
struct crypto_cipher *tfm = tctx->child;
int bs = crypto_shash_digestsize(parent);
-
- while (len > 0) {
- unsigned int l = min(len, bs - ctx->len);
-
- crypto_xor(&ctx->dg[ctx->len], p, l);
- ctx->len +=l;
- len -= l;
- p += l;
-
- if (ctx->len == bs) {
- crypto_cipher_encrypt_one(tfm, ctx->dg, ctx->dg);
- ctx->len = 0;
- }
- }
-
- return 0;
+ u8 *dg = shash_desc_ctx(pdesc);
+
+ do {
+ crypto_xor(dg, p, bs);
+ crypto_cipher_encrypt_one(tfm, dg, dg);
+ p += bs;
+ len -= bs;
+ } while (len >= bs);
+ return len;
}
-static int crypto_cbcmac_digest_final(struct shash_desc *pdesc, u8 *out)
+static int crypto_cbcmac_digest_finup(struct shash_desc *pdesc, const u8 *src,
+ unsigned int len, u8 *out)
{
struct crypto_shash *parent = pdesc->tfm;
struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
- struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
struct crypto_cipher *tfm = tctx->child;
int bs = crypto_shash_digestsize(parent);
+ u8 *dg = shash_desc_ctx(pdesc);
- if (ctx->len)
- crypto_cipher_encrypt_one(tfm, ctx->dg, ctx->dg);
-
- memcpy(out, ctx->dg, bs);
+ if (len) {
+ crypto_xor(dg, src, len);
+ crypto_cipher_encrypt_one(tfm, out, dg);
+ return 0;
+ }
+ memcpy(out, dg, bs);
return 0;
}
@@ -883,19 +872,19 @@ static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb)
goto err_free_inst;
inst->alg.base.cra_priority = alg->cra_priority;
- inst->alg.base.cra_blocksize = 1;
+ inst->alg.base.cra_blocksize = alg->cra_blocksize;
inst->alg.digestsize = alg->cra_blocksize;
- inst->alg.descsize = sizeof(struct cbcmac_desc_ctx) +
- alg->cra_blocksize;
+ inst->alg.descsize = alg->cra_blocksize;
+ inst->alg.base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY;
inst->alg.base.cra_ctxsize = sizeof(struct cbcmac_tfm_ctx);
inst->alg.base.cra_init = cbcmac_init_tfm;
inst->alg.base.cra_exit = cbcmac_exit_tfm;
inst->alg.init = crypto_cbcmac_digest_init;
inst->alg.update = crypto_cbcmac_digest_update;
- inst->alg.final = crypto_cbcmac_digest_final;
+ inst->alg.finup = crypto_cbcmac_digest_finup;
inst->alg.setkey = crypto_cbcmac_digest_setkey;
inst->free = shash_free_singlespawn_instance;
@@ -940,7 +929,7 @@ static void __exit crypto_ccm_module_exit(void)
ARRAY_SIZE(crypto_ccm_tmpls));
}
-subsys_initcall(crypto_ccm_module_init);
+module_init(crypto_ccm_module_init);
module_exit(crypto_ccm_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/chacha.c b/crypto/chacha.c
new file mode 100644
index 000000000000..c3a11f4e2d13
--- /dev/null
+++ b/crypto/chacha.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Crypto API wrappers for the ChaCha20, XChaCha20, and XChaCha12 stream ciphers
+ *
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2018 Google LLC
+ */
+
+#include <linux/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/module.h>
+
+struct chacha_ctx {
+ u32 key[8];
+ int nrounds;
+};
+
+static int chacha_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keysize, int nrounds)
+{
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int i;
+
+ if (keysize != CHACHA_KEY_SIZE)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+ ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+ ctx->nrounds = nrounds;
+ return 0;
+}
+
+static int chacha20_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keysize)
+{
+ return chacha_setkey(tfm, key, keysize, 20);
+}
+
+static int chacha12_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keysize)
+{
+ return chacha_setkey(tfm, key, keysize, 12);
+}
+
+static int chacha_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx,
+ const u8 iv[CHACHA_IV_SIZE], bool arch)
+{
+ struct skcipher_walk walk;
+ struct chacha_state state;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ chacha_init(&state, ctx->key, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE);
+
+ if (arch)
+ chacha_crypt(&state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes, ctx->nrounds);
+ else
+ chacha_crypt_generic(&state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int crypto_chacha_crypt_generic(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_stream_xor(req, ctx, req->iv, false);
+}
+
+static int crypto_chacha_crypt_arch(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_stream_xor(req, ctx, req->iv, true);
+}
+
+static int crypto_xchacha_crypt(struct skcipher_request *req, bool arch)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ struct chacha_state state;
+ u8 real_iv[16];
+
+ /* Compute the subkey given the original key and first 128 nonce bits */
+ chacha_init(&state, ctx->key, req->iv);
+ if (arch)
+ hchacha_block(&state, subctx.key, ctx->nrounds);
+ else
+ hchacha_block_generic(&state, subctx.key, ctx->nrounds);
+ subctx.nrounds = ctx->nrounds;
+
+ /* Build the real IV */
+ memcpy(&real_iv[0], req->iv + 24, 8); /* stream position */
+ memcpy(&real_iv[8], req->iv + 16, 8); /* remaining 64 nonce bits */
+
+ /* Generate the stream and XOR it with the data */
+ return chacha_stream_xor(req, &subctx, real_iv, arch);
+}
+
+static int crypto_xchacha_crypt_generic(struct skcipher_request *req)
+{
+ return crypto_xchacha_crypt(req, false);
+}
+
+static int crypto_xchacha_crypt_arch(struct skcipher_request *req)
+{
+ return crypto_xchacha_crypt(req, true);
+}
+
+static struct skcipher_alg algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-generic",
+ .base.cra_priority = 100,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = crypto_chacha_crypt_generic,
+ .decrypt = crypto_chacha_crypt_generic,
+ },
+ {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-generic",
+ .base.cra_priority = 100,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = crypto_xchacha_crypt_generic,
+ .decrypt = crypto_xchacha_crypt_generic,
+ },
+ {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-generic",
+ .base.cra_priority = 100,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = crypto_xchacha_crypt_generic,
+ .decrypt = crypto_xchacha_crypt_generic,
+ },
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-" __stringify(ARCH),
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = crypto_chacha_crypt_arch,
+ .decrypt = crypto_chacha_crypt_arch,
+ },
+ {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-" __stringify(ARCH),
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = crypto_xchacha_crypt_arch,
+ .decrypt = crypto_xchacha_crypt_arch,
+ },
+ {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-" __stringify(ARCH),
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = crypto_xchacha_crypt_arch,
+ .decrypt = crypto_xchacha_crypt_arch,
+ }
+};
+
+static unsigned int num_algs;
+
+static int __init crypto_chacha_mod_init(void)
+{
+ /* register the arch flavours only if they differ from generic */
+ num_algs = ARRAY_SIZE(algs);
+ BUILD_BUG_ON(ARRAY_SIZE(algs) % 2 != 0);
+ if (!chacha_is_arch_optimized())
+ num_algs /= 2;
+
+ return crypto_register_skciphers(algs, num_algs);
+}
+
+static void __exit crypto_chacha_mod_fini(void)
+{
+ crypto_unregister_skciphers(algs, num_algs);
+}
+
+module_init(crypto_chacha_mod_init);
+module_exit(crypto_chacha_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("Crypto API wrappers for the ChaCha20, XChaCha20, and XChaCha12 stream ciphers");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-generic");
+MODULE_ALIAS_CRYPTO("chacha20-" __stringify(ARCH));
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-generic");
+MODULE_ALIAS_CRYPTO("xchacha20-" __stringify(ARCH));
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-generic");
+MODULE_ALIAS_CRYPTO("xchacha12-" __stringify(ARCH));
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index d740849f1c19..b4b5a7198d84 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -12,36 +12,23 @@
#include <crypto/chacha.h>
#include <crypto/poly1305.h>
#include <linux/err.h>
-#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/string.h>
struct chachapoly_instance_ctx {
struct crypto_skcipher_spawn chacha;
- struct crypto_ahash_spawn poly;
unsigned int saltlen;
};
struct chachapoly_ctx {
struct crypto_skcipher *chacha;
- struct crypto_ahash *poly;
/* key bytes we use for the ChaCha20 IV */
unsigned int saltlen;
u8 salt[] __counted_by(saltlen);
};
-struct poly_req {
- /* zero byte padding for AD/ciphertext, as needed */
- u8 pad[POLY1305_BLOCK_SIZE];
- /* tail data with AD/ciphertext lengths */
- struct {
- __le64 assoclen;
- __le64 cryptlen;
- } tail;
- struct scatterlist src[1];
- struct ahash_request req; /* must be last member */
-};
-
struct chacha_req {
u8 iv[CHACHA_IV_SIZE];
struct scatterlist src[1];
@@ -62,7 +49,6 @@ struct chachapoly_req_ctx {
/* request flags, with MAY_SLEEP cleared if needed */
u32 flags;
union {
- struct poly_req poly;
struct chacha_req chacha;
} u;
};
@@ -105,16 +91,6 @@ static int poly_verify_tag(struct aead_request *req)
return 0;
}
-static int poly_copy_tag(struct aead_request *req)
-{
- struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
-
- scatterwalk_map_and_copy(rctx->tag, req->dst,
- req->assoclen + rctx->cryptlen,
- sizeof(rctx->tag), 1);
- return 0;
-}
-
static void chacha_decrypt_done(void *data, int err)
{
async_done_continue(data, err, poly_verify_tag);
@@ -151,210 +127,76 @@ skip:
return poly_verify_tag(req);
}
-static int poly_tail_continue(struct aead_request *req)
-{
- struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
-
- if (rctx->cryptlen == req->cryptlen) /* encrypting */
- return poly_copy_tag(req);
-
- return chacha_decrypt(req);
-}
-
-static void poly_tail_done(void *data, int err)
-{
- async_done_continue(data, err, poly_tail_continue);
-}
-
-static int poly_tail(struct aead_request *req)
-{
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm);
- struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
- struct poly_req *preq = &rctx->u.poly;
- int err;
-
- preq->tail.assoclen = cpu_to_le64(rctx->assoclen);
- preq->tail.cryptlen = cpu_to_le64(rctx->cryptlen);
- sg_init_one(preq->src, &preq->tail, sizeof(preq->tail));
-
- ahash_request_set_callback(&preq->req, rctx->flags,
- poly_tail_done, req);
- ahash_request_set_tfm(&preq->req, ctx->poly);
- ahash_request_set_crypt(&preq->req, preq->src,
- rctx->tag, sizeof(preq->tail));
-
- err = crypto_ahash_finup(&preq->req);
- if (err)
- return err;
-
- return poly_tail_continue(req);
-}
-
-static void poly_cipherpad_done(void *data, int err)
-{
- async_done_continue(data, err, poly_tail);
-}
-
-static int poly_cipherpad(struct aead_request *req)
+static int poly_hash(struct aead_request *req)
{
- struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
- struct poly_req *preq = &rctx->u.poly;
+ const void *zp = page_address(ZERO_PAGE(0));
+ struct scatterlist *sg = req->src;
+ struct poly1305_desc_ctx desc;
+ struct scatter_walk walk;
+ struct {
+ union {
+ struct {
+ __le64 assoclen;
+ __le64 cryptlen;
+ };
+ u8 u8[16];
+ };
+ } tail;
unsigned int padlen;
- int err;
-
- padlen = -rctx->cryptlen % POLY1305_BLOCK_SIZE;
- memset(preq->pad, 0, sizeof(preq->pad));
- sg_init_one(preq->src, preq->pad, padlen);
-
- ahash_request_set_callback(&preq->req, rctx->flags,
- poly_cipherpad_done, req);
- ahash_request_set_tfm(&preq->req, ctx->poly);
- ahash_request_set_crypt(&preq->req, preq->src, NULL, padlen);
+ unsigned int total;
- err = crypto_ahash_update(&preq->req);
- if (err)
- return err;
-
- return poly_tail(req);
-}
-
-static void poly_cipher_done(void *data, int err)
-{
- async_done_continue(data, err, poly_cipherpad);
-}
-
-static int poly_cipher(struct aead_request *req)
-{
- struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
- struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
- struct poly_req *preq = &rctx->u.poly;
- struct scatterlist *crypt = req->src;
- int err;
+ if (sg != req->dst)
+ memcpy_sglist(req->dst, sg, req->assoclen);
if (rctx->cryptlen == req->cryptlen) /* encrypting */
- crypt = req->dst;
-
- crypt = scatterwalk_ffwd(rctx->src, crypt, req->assoclen);
-
- ahash_request_set_callback(&preq->req, rctx->flags,
- poly_cipher_done, req);
- ahash_request_set_tfm(&preq->req, ctx->poly);
- ahash_request_set_crypt(&preq->req, crypt, NULL, rctx->cryptlen);
-
- err = crypto_ahash_update(&preq->req);
- if (err)
- return err;
+ sg = req->dst;
- return poly_cipherpad(req);
-}
+ poly1305_init(&desc, rctx->key);
+ scatterwalk_start(&walk, sg);
-static void poly_adpad_done(void *data, int err)
-{
- async_done_continue(data, err, poly_cipher);
-}
+ total = rctx->assoclen;
+ while (total) {
+ unsigned int n = scatterwalk_next(&walk, total);
-static int poly_adpad(struct aead_request *req)
-{
- struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
- struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
- struct poly_req *preq = &rctx->u.poly;
- unsigned int padlen;
- int err;
+ poly1305_update(&desc, walk.addr, n);
+ scatterwalk_done_src(&walk, n);
+ total -= n;
+ }
padlen = -rctx->assoclen % POLY1305_BLOCK_SIZE;
- memset(preq->pad, 0, sizeof(preq->pad));
- sg_init_one(preq->src, preq->pad, padlen);
-
- ahash_request_set_callback(&preq->req, rctx->flags,
- poly_adpad_done, req);
- ahash_request_set_tfm(&preq->req, ctx->poly);
- ahash_request_set_crypt(&preq->req, preq->src, NULL, padlen);
-
- err = crypto_ahash_update(&preq->req);
- if (err)
- return err;
-
- return poly_cipher(req);
-}
-
-static void poly_ad_done(void *data, int err)
-{
- async_done_continue(data, err, poly_adpad);
-}
-
-static int poly_ad(struct aead_request *req)
-{
- struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
- struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
- struct poly_req *preq = &rctx->u.poly;
- int err;
-
- ahash_request_set_callback(&preq->req, rctx->flags,
- poly_ad_done, req);
- ahash_request_set_tfm(&preq->req, ctx->poly);
- ahash_request_set_crypt(&preq->req, req->src, NULL, rctx->assoclen);
-
- err = crypto_ahash_update(&preq->req);
- if (err)
- return err;
-
- return poly_adpad(req);
-}
-
-static void poly_setkey_done(void *data, int err)
-{
- async_done_continue(data, err, poly_ad);
-}
+ poly1305_update(&desc, zp, padlen);
-static int poly_setkey(struct aead_request *req)
-{
- struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
- struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
- struct poly_req *preq = &rctx->u.poly;
- int err;
+ scatterwalk_skip(&walk, req->assoclen - rctx->assoclen);
- sg_init_one(preq->src, rctx->key, sizeof(rctx->key));
+ total = rctx->cryptlen;
+ while (total) {
+ unsigned int n = scatterwalk_next(&walk, total);
- ahash_request_set_callback(&preq->req, rctx->flags,
- poly_setkey_done, req);
- ahash_request_set_tfm(&preq->req, ctx->poly);
- ahash_request_set_crypt(&preq->req, preq->src, NULL, sizeof(rctx->key));
-
- err = crypto_ahash_update(&preq->req);
- if (err)
- return err;
-
- return poly_ad(req);
-}
-
-static void poly_init_done(void *data, int err)
-{
- async_done_continue(data, err, poly_setkey);
-}
+ poly1305_update(&desc, walk.addr, n);
+ scatterwalk_done_src(&walk, n);
+ total -= n;
+ }
-static int poly_init(struct aead_request *req)
-{
- struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
- struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
- struct poly_req *preq = &rctx->u.poly;
- int err;
+ padlen = -rctx->cryptlen % POLY1305_BLOCK_SIZE;
+ poly1305_update(&desc, zp, padlen);
- ahash_request_set_callback(&preq->req, rctx->flags,
- poly_init_done, req);
- ahash_request_set_tfm(&preq->req, ctx->poly);
+ tail.assoclen = cpu_to_le64(rctx->assoclen);
+ tail.cryptlen = cpu_to_le64(rctx->cryptlen);
+ poly1305_update(&desc, tail.u8, sizeof(tail));
+ memzero_explicit(&tail, sizeof(tail));
+ poly1305_final(&desc, rctx->tag);
- err = crypto_ahash_init(&preq->req);
- if (err)
- return err;
+ if (rctx->cryptlen != req->cryptlen)
+ return chacha_decrypt(req);
- return poly_setkey(req);
+ memcpy_to_scatterwalk(&walk, rctx->tag, sizeof(rctx->tag));
+ return 0;
}
static void poly_genkey_done(void *data, int err)
{
- async_done_continue(data, err, poly_init);
+ async_done_continue(data, err, poly_hash);
}
static int poly_genkey(struct aead_request *req)
@@ -388,7 +230,7 @@ static int poly_genkey(struct aead_request *req)
if (err)
return err;
- return poly_init(req);
+ return poly_hash(req);
}
static void chacha_encrypt_done(void *data, int err)
@@ -437,14 +279,7 @@ static int chachapoly_encrypt(struct aead_request *req)
/* encrypt call chain:
* - chacha_encrypt/done()
* - poly_genkey/done()
- * - poly_init/done()
- * - poly_setkey/done()
- * - poly_ad/done()
- * - poly_adpad/done()
- * - poly_cipher/done()
- * - poly_cipherpad/done()
- * - poly_tail/done/continue()
- * - poly_copy_tag()
+ * - poly_hash()
*/
return chacha_encrypt(req);
}
@@ -458,13 +293,7 @@ static int chachapoly_decrypt(struct aead_request *req)
/* decrypt call chain:
* - poly_genkey/done()
- * - poly_init/done()
- * - poly_setkey/done()
- * - poly_ad/done()
- * - poly_adpad/done()
- * - poly_cipher/done()
- * - poly_cipherpad/done()
- * - poly_tail/done/continue()
+ * - poly_hash()
* - chacha_decrypt/done()
* - poly_verify_tag()
*/
@@ -503,21 +332,13 @@ static int chachapoly_init(struct crypto_aead *tfm)
struct chachapoly_instance_ctx *ictx = aead_instance_ctx(inst);
struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm);
struct crypto_skcipher *chacha;
- struct crypto_ahash *poly;
unsigned long align;
- poly = crypto_spawn_ahash(&ictx->poly);
- if (IS_ERR(poly))
- return PTR_ERR(poly);
-
chacha = crypto_spawn_skcipher(&ictx->chacha);
- if (IS_ERR(chacha)) {
- crypto_free_ahash(poly);
+ if (IS_ERR(chacha))
return PTR_ERR(chacha);
- }
ctx->chacha = chacha;
- ctx->poly = poly;
ctx->saltlen = ictx->saltlen;
align = crypto_aead_alignmask(tfm);
@@ -525,12 +346,9 @@ static int chachapoly_init(struct crypto_aead *tfm)
crypto_aead_set_reqsize(
tfm,
align + offsetof(struct chachapoly_req_ctx, u) +
- max(offsetof(struct chacha_req, req) +
- sizeof(struct skcipher_request) +
- crypto_skcipher_reqsize(chacha),
- offsetof(struct poly_req, req) +
- sizeof(struct ahash_request) +
- crypto_ahash_reqsize(poly)));
+ offsetof(struct chacha_req, req) +
+ sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(chacha));
return 0;
}
@@ -539,7 +357,6 @@ static void chachapoly_exit(struct crypto_aead *tfm)
{
struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm);
- crypto_free_ahash(ctx->poly);
crypto_free_skcipher(ctx->chacha);
}
@@ -548,7 +365,6 @@ static void chachapoly_free(struct aead_instance *inst)
struct chachapoly_instance_ctx *ctx = aead_instance_ctx(inst);
crypto_drop_skcipher(&ctx->chacha);
- crypto_drop_ahash(&ctx->poly);
kfree(inst);
}
@@ -559,7 +375,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
struct aead_instance *inst;
struct chachapoly_instance_ctx *ctx;
struct skcipher_alg_common *chacha;
- struct hash_alg_common *poly;
int err;
if (ivsize > CHACHAPOLY_IV_SIZE)
@@ -581,14 +396,9 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
goto err_free_inst;
chacha = crypto_spawn_skcipher_alg_common(&ctx->chacha);
- err = crypto_grab_ahash(&ctx->poly, aead_crypto_instance(inst),
- crypto_attr_alg_name(tb[2]), 0, mask);
- if (err)
- goto err_free_inst;
- poly = crypto_spawn_ahash_alg(&ctx->poly);
-
err = -EINVAL;
- if (poly->digestsize != POLY1305_DIGEST_SIZE)
+ if (strcmp(crypto_attr_alg_name(tb[2]), "poly1305") &&
+ strcmp(crypto_attr_alg_name(tb[2]), "poly1305-generic"))
goto err_free_inst;
/* Need 16-byte IV size, including Initial Block Counter value */
if (chacha->ivsize != CHACHA_IV_SIZE)
@@ -599,16 +409,15 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
err = -ENAMETOOLONG;
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
- "%s(%s,%s)", name, chacha->base.cra_name,
- poly->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
+ "%s(%s,poly1305)", name,
+ chacha->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
goto err_free_inst;
if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
- "%s(%s,%s)", name, chacha->base.cra_driver_name,
- poly->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+ "%s(%s,poly1305-generic)", name,
+ chacha->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
goto err_free_inst;
- inst->alg.base.cra_priority = (chacha->base.cra_priority +
- poly->base.cra_priority) / 2;
+ inst->alg.base.cra_priority = chacha->base.cra_priority;
inst->alg.base.cra_blocksize = 1;
inst->alg.base.cra_alignmask = chacha->base.cra_alignmask;
inst->alg.base.cra_ctxsize = sizeof(struct chachapoly_ctx) +
@@ -667,7 +476,7 @@ static void __exit chacha20poly1305_module_exit(void)
ARRAY_SIZE(rfc7539_tmpls));
}
-subsys_initcall(chacha20poly1305_module_init);
+module_init(chacha20poly1305_module_init);
module_exit(chacha20poly1305_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
deleted file mode 100644
index 1fb9fbd302c6..000000000000
--- a/crypto/chacha_generic.c
+++ /dev/null
@@ -1,139 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * ChaCha and XChaCha stream ciphers, including ChaCha20 (RFC7539)
- *
- * Copyright (C) 2015 Martin Willi
- * Copyright (C) 2018 Google LLC
- */
-
-#include <linux/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/module.h>
-
-static int chacha_stream_xor(struct skcipher_request *req,
- const struct chacha_ctx *ctx, const u8 *iv)
-{
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- chacha_init(state, ctx->key, iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE);
-
- chacha_crypt_generic(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes, ctx->nrounds);
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static int crypto_chacha_crypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return chacha_stream_xor(req, ctx, req->iv);
-}
-
-static int crypto_xchacha_crypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct chacha_ctx subctx;
- u32 state[16];
- u8 real_iv[16];
-
- /* Compute the subkey given the original key and first 128 nonce bits */
- chacha_init(state, ctx->key, req->iv);
- hchacha_block_generic(state, subctx.key, ctx->nrounds);
- subctx.nrounds = ctx->nrounds;
-
- /* Build the real IV */
- memcpy(&real_iv[0], req->iv + 24, 8); /* stream position */
- memcpy(&real_iv[8], req->iv + 16, 8); /* remaining 64 nonce bits */
-
- /* Generate the stream and XOR it with the data */
- return chacha_stream_xor(req, &subctx, real_iv);
-}
-
-static struct skcipher_alg algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-generic",
- .base.cra_priority = 100,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = crypto_chacha_crypt,
- .decrypt = crypto_chacha_crypt,
- }, {
- .base.cra_name = "xchacha20",
- .base.cra_driver_name = "xchacha20-generic",
- .base.cra_priority = 100,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = crypto_xchacha_crypt,
- .decrypt = crypto_xchacha_crypt,
- }, {
- .base.cra_name = "xchacha12",
- .base.cra_driver_name = "xchacha12-generic",
- .base.cra_priority = 100,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha12_setkey,
- .encrypt = crypto_xchacha_crypt,
- .decrypt = crypto_xchacha_crypt,
- }
-};
-
-static int __init chacha_generic_mod_init(void)
-{
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit chacha_generic_mod_fini(void)
-{
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-subsys_initcall(chacha_generic_mod_init);
-module_exit(chacha_generic_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (generic)");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-generic");
-MODULE_ALIAS_CRYPTO("xchacha20");
-MODULE_ALIAS_CRYPTO("xchacha20-generic");
-MODULE_ALIAS_CRYPTO("xchacha12");
-MODULE_ALIAS_CRYPTO("xchacha12-generic");
diff --git a/crypto/cmac.c b/crypto/cmac.c
index c66a0f4d8808..1b03964abe00 100644
--- a/crypto/cmac.c
+++ b/crypto/cmac.c
@@ -13,9 +13,12 @@
#include <crypto/internal/cipher.h>
#include <crypto/internal/hash.h>
+#include <crypto/utils.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
/*
* +------------------------
@@ -31,22 +34,6 @@ struct cmac_tfm_ctx {
__be64 consts[];
};
-/*
- * +------------------------
- * | <shash desc>
- * +------------------------
- * | cmac_desc_ctx
- * +------------------------
- * | odds (block size)
- * +------------------------
- * | prev (block size)
- * +------------------------
- */
-struct cmac_desc_ctx {
- unsigned int len;
- u8 odds[];
-};
-
static int crypto_cmac_digest_setkey(struct crypto_shash *parent,
const u8 *inkey, unsigned int keylen)
{
@@ -102,13 +89,10 @@ static int crypto_cmac_digest_setkey(struct crypto_shash *parent,
static int crypto_cmac_digest_init(struct shash_desc *pdesc)
{
- struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
int bs = crypto_shash_blocksize(pdesc->tfm);
- u8 *prev = &ctx->odds[bs];
+ u8 *prev = shash_desc_ctx(pdesc);
- ctx->len = 0;
memset(prev, 0, bs);
-
return 0;
}
@@ -117,77 +101,36 @@ static int crypto_cmac_digest_update(struct shash_desc *pdesc, const u8 *p,
{
struct crypto_shash *parent = pdesc->tfm;
struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
- struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
struct crypto_cipher *tfm = tctx->child;
int bs = crypto_shash_blocksize(parent);
- u8 *odds = ctx->odds;
- u8 *prev = odds + bs;
-
- /* checking the data can fill the block */
- if ((ctx->len + len) <= bs) {
- memcpy(odds + ctx->len, p, len);
- ctx->len += len;
- return 0;
- }
-
- /* filling odds with new data and encrypting it */
- memcpy(odds + ctx->len, p, bs - ctx->len);
- len -= bs - ctx->len;
- p += bs - ctx->len;
-
- crypto_xor(prev, odds, bs);
- crypto_cipher_encrypt_one(tfm, prev, prev);
+ u8 *prev = shash_desc_ctx(pdesc);
- /* clearing the length */
- ctx->len = 0;
-
- /* encrypting the rest of data */
- while (len > bs) {
+ do {
crypto_xor(prev, p, bs);
crypto_cipher_encrypt_one(tfm, prev, prev);
p += bs;
len -= bs;
- }
-
- /* keeping the surplus of blocksize */
- if (len) {
- memcpy(odds, p, len);
- ctx->len = len;
- }
-
- return 0;
+ } while (len >= bs);
+ return len;
}
-static int crypto_cmac_digest_final(struct shash_desc *pdesc, u8 *out)
+static int crypto_cmac_digest_finup(struct shash_desc *pdesc, const u8 *src,
+ unsigned int len, u8 *out)
{
struct crypto_shash *parent = pdesc->tfm;
struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
- struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
struct crypto_cipher *tfm = tctx->child;
int bs = crypto_shash_blocksize(parent);
- u8 *odds = ctx->odds;
- u8 *prev = odds + bs;
+ u8 *prev = shash_desc_ctx(pdesc);
unsigned int offset = 0;
- if (ctx->len != bs) {
- unsigned int rlen;
- u8 *p = odds + ctx->len;
-
- *p = 0x80;
- p++;
-
- rlen = bs - ctx->len - 1;
- if (rlen)
- memset(p, 0, rlen);
-
+ crypto_xor(prev, src, len);
+ if (len != bs) {
+ prev[len] ^= 0x80;
offset += bs;
}
-
- crypto_xor(prev, odds, bs);
crypto_xor(prev, (const u8 *)tctx->consts + offset, bs);
-
crypto_cipher_encrypt_one(tfm, out, prev);
-
return 0;
}
@@ -269,13 +212,14 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.base.cra_blocksize = alg->cra_blocksize;
inst->alg.base.cra_ctxsize = sizeof(struct cmac_tfm_ctx) +
alg->cra_blocksize * 2;
+ inst->alg.base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINAL_NONZERO;
inst->alg.digestsize = alg->cra_blocksize;
- inst->alg.descsize = sizeof(struct cmac_desc_ctx) +
- alg->cra_blocksize * 2;
+ inst->alg.descsize = alg->cra_blocksize;
inst->alg.init = crypto_cmac_digest_init;
inst->alg.update = crypto_cmac_digest_update;
- inst->alg.final = crypto_cmac_digest_final;
+ inst->alg.finup = crypto_cmac_digest_finup;
inst->alg.setkey = crypto_cmac_digest_setkey;
inst->alg.init_tfm = cmac_init_tfm;
inst->alg.clone_tfm = cmac_clone_tfm;
@@ -307,7 +251,7 @@ static void __exit crypto_cmac_module_exit(void)
crypto_unregister_template(&crypto_cmac_tmpl);
}
-subsys_initcall(crypto_cmac_module_init);
+module_init(crypto_cmac_module_init);
module_exit(crypto_cmac_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/crc32_generic.c b/crypto/crc32.c
index 783a30b27398..cc371d42601f 100644
--- a/crypto/crc32_generic.c
+++ b/crypto/crc32.c
@@ -172,7 +172,7 @@ static void __exit crc32_mod_fini(void)
crypto_unregister_shashes(algs, num_algs);
}
-subsys_initcall(crc32_mod_init);
+module_init(crc32_mod_init);
module_exit(crc32_mod_fini);
MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c.c
index b1a36d32dc50..e5377898414a 100644
--- a/crypto/crc32c_generic.c
+++ b/crypto/crc32c.c
@@ -212,7 +212,7 @@ static void __exit crc32c_mod_fini(void)
crypto_unregister_shashes(algs, num_algs);
}
-subsys_initcall(crc32c_mod_init);
+module_init(crc32c_mod_init);
module_exit(crc32c_mod_fini);
MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 31d022d47f7a..5bb6f8d88cc2 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -1138,7 +1138,7 @@ static void __exit cryptd_exit(void)
crypto_unregister_template(&cryptd_tmpl);
}
-subsys_initcall(cryptd_init);
+module_init(cryptd_init);
module_exit(cryptd_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index c7c16da5e649..445d3c113ee1 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -23,9 +23,6 @@
#define CRYPTO_ENGINE_MAX_QLEN 10
-/* Temporary algorithm flag used to indicate an updated driver. */
-#define CRYPTO_ALG_ENGINE 0x200
-
struct crypto_engine_alg {
struct crypto_alg base;
struct crypto_engine_op op;
@@ -148,16 +145,9 @@ start_request:
}
}
- if (async_req->tfm->__crt_alg->cra_flags & CRYPTO_ALG_ENGINE) {
- alg = container_of(async_req->tfm->__crt_alg,
- struct crypto_engine_alg, base);
- op = &alg->op;
- } else {
- dev_err(engine->dev, "failed to do request\n");
- ret = -EINVAL;
- goto req_err_1;
- }
-
+ alg = container_of(async_req->tfm->__crt_alg,
+ struct crypto_engine_alg, base);
+ op = &alg->op;
ret = op->do_one_request(engine, async_req);
/* Request unsuccessfully executed by hardware */
@@ -569,9 +559,6 @@ int crypto_engine_register_aead(struct aead_engine_alg *alg)
{
if (!alg->op.do_one_request)
return -EINVAL;
-
- alg->base.base.cra_flags |= CRYPTO_ALG_ENGINE;
-
return crypto_register_aead(&alg->base);
}
EXPORT_SYMBOL_GPL(crypto_engine_register_aead);
@@ -614,9 +601,6 @@ int crypto_engine_register_ahash(struct ahash_engine_alg *alg)
{
if (!alg->op.do_one_request)
return -EINVAL;
-
- alg->base.halg.base.cra_flags |= CRYPTO_ALG_ENGINE;
-
return crypto_register_ahash(&alg->base);
}
EXPORT_SYMBOL_GPL(crypto_engine_register_ahash);
@@ -660,9 +644,6 @@ int crypto_engine_register_akcipher(struct akcipher_engine_alg *alg)
{
if (!alg->op.do_one_request)
return -EINVAL;
-
- alg->base.base.cra_flags |= CRYPTO_ALG_ENGINE;
-
return crypto_register_akcipher(&alg->base);
}
EXPORT_SYMBOL_GPL(crypto_engine_register_akcipher);
@@ -677,9 +658,6 @@ int crypto_engine_register_kpp(struct kpp_engine_alg *alg)
{
if (!alg->op.do_one_request)
return -EINVAL;
-
- alg->base.base.cra_flags |= CRYPTO_ALG_ENGINE;
-
return crypto_register_kpp(&alg->base);
}
EXPORT_SYMBOL_GPL(crypto_engine_register_kpp);
@@ -694,9 +672,6 @@ int crypto_engine_register_skcipher(struct skcipher_engine_alg *alg)
{
if (!alg->op.do_one_request)
return -EINVAL;
-
- alg->base.base.cra_flags |= CRYPTO_ALG_ENGINE;
-
return crypto_register_skcipher(&alg->base);
}
EXPORT_SYMBOL_GPL(crypto_engine_register_skcipher);
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index ced90f88ee07..34588f39fdfc 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -15,15 +15,11 @@
#include <crypto/null.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/spinlock.h>
#include <linux/string.h>
-static DEFINE_SPINLOCK(crypto_default_null_skcipher_lock);
-static struct crypto_sync_skcipher *crypto_default_null_skcipher;
-static int crypto_default_null_skcipher_refcnt;
-
static int null_init(struct shash_desc *desc)
{
return 0;
@@ -65,19 +61,9 @@ static void null_crypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
static int null_skcipher_crypt(struct skcipher_request *req)
{
- struct skcipher_walk walk;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while (walk.nbytes) {
- if (walk.src.virt.addr != walk.dst.virt.addr)
- memcpy(walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes);
- err = skcipher_walk_done(&walk, 0);
- }
-
- return err;
+ if (req->src != req->dst)
+ memcpy_sglist(req->dst, req->src, req->cryptlen);
+ return 0;
}
static struct shash_alg digest_null = {
@@ -129,54 +115,6 @@ static struct crypto_alg cipher_null = {
MODULE_ALIAS_CRYPTO("digest_null");
MODULE_ALIAS_CRYPTO("cipher_null");
-struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void)
-{
- struct crypto_sync_skcipher *ntfm = NULL;
- struct crypto_sync_skcipher *tfm;
-
- spin_lock_bh(&crypto_default_null_skcipher_lock);
- tfm = crypto_default_null_skcipher;
-
- if (!tfm) {
- spin_unlock_bh(&crypto_default_null_skcipher_lock);
-
- ntfm = crypto_alloc_sync_skcipher("ecb(cipher_null)", 0, 0);
- if (IS_ERR(ntfm))
- return ntfm;
-
- spin_lock_bh(&crypto_default_null_skcipher_lock);
- tfm = crypto_default_null_skcipher;
- if (!tfm) {
- tfm = ntfm;
- ntfm = NULL;
- crypto_default_null_skcipher = tfm;
- }
- }
-
- crypto_default_null_skcipher_refcnt++;
- spin_unlock_bh(&crypto_default_null_skcipher_lock);
-
- crypto_free_sync_skcipher(ntfm);
-
- return tfm;
-}
-EXPORT_SYMBOL_GPL(crypto_get_default_null_skcipher);
-
-void crypto_put_default_null_skcipher(void)
-{
- struct crypto_sync_skcipher *tfm = NULL;
-
- spin_lock_bh(&crypto_default_null_skcipher_lock);
- if (!--crypto_default_null_skcipher_refcnt) {
- tfm = crypto_default_null_skcipher;
- crypto_default_null_skcipher = NULL;
- }
- spin_unlock_bh(&crypto_default_null_skcipher_lock);
-
- crypto_free_sync_skcipher(tfm);
-}
-EXPORT_SYMBOL_GPL(crypto_put_default_null_skcipher);
-
static int __init crypto_null_mod_init(void)
{
int ret = 0;
@@ -210,7 +148,7 @@ static void __exit crypto_null_mod_fini(void)
crypto_unregister_skcipher(&skcipher_null);
}
-subsys_initcall(crypto_null_mod_init);
+module_init(crypto_null_mod_init);
module_exit(crypto_null_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/ctr.c b/crypto/ctr.c
index 97a947b0a876..a388f0ceb3a0 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -350,7 +350,7 @@ static void __exit crypto_ctr_module_exit(void)
ARRAY_SIZE(crypto_ctr_tmpls));
}
-subsys_initcall(crypto_ctr_module_init);
+module_init(crypto_ctr_module_init);
module_exit(crypto_ctr_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/cts.c b/crypto/cts.c
index f5b42156b6c7..48898d5e24ff 100644
--- a/crypto/cts.c
+++ b/crypto/cts.c
@@ -402,7 +402,7 @@ static void __exit crypto_cts_module_exit(void)
crypto_unregister_template(&crypto_cts_tmpl);
}
-subsys_initcall(crypto_cts_module_init);
+module_init(crypto_cts_module_init);
module_exit(crypto_cts_module_exit);
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/crypto/curve25519-generic.c b/crypto/curve25519-generic.c
index 68a673262e04..f3e56e73c66c 100644
--- a/crypto/curve25519-generic.c
+++ b/crypto/curve25519-generic.c
@@ -82,7 +82,7 @@ static void __exit curve25519_exit(void)
crypto_unregister_kpp(&curve25519_alg);
}
-subsys_initcall(curve25519_init);
+module_init(curve25519_init);
module_exit(curve25519_exit);
MODULE_ALIAS_CRYPTO("curve25519");
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 5c346c544093..fe8e4ad0fee1 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -6,253 +6,250 @@
* by IPCOMP (RFC 3173 & RFC 2394).
*
* Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
- *
- * FIXME: deflate transforms will require up to a total of about 436k of kernel
- * memory on i386 (390k for compression, the rest for decompression), as the
- * current zlib kernel code uses a worst case pre-allocation system by default.
- * This needs to be fixed so that the amount of memory required is properly
- * related to the winbits and memlevel parameters.
- *
- * The default winbits of 11 should suit most packets, and it may be something
- * to configure on a per-tfm basis in the future.
- *
- * Currently, compression history is not maintained between tfm calls, as
- * it is not needed for IPCOMP and keeps the code simpler. It can be
- * implemented if someone wants it.
+ * Copyright (c) 2023 Google, LLC. <ardb@kernel.org>
+ * Copyright (c) 2025 Herbert Xu <herbert@gondor.apana.org.au>
*/
+#include <crypto/internal/acompress.h>
+#include <crypto/scatterwalk.h>
#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/crypto.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <linux/zlib.h>
-#include <linux/vmalloc.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/net.h>
-#include <crypto/internal/scompress.h>
#define DEFLATE_DEF_LEVEL Z_DEFAULT_COMPRESSION
#define DEFLATE_DEF_WINBITS 11
#define DEFLATE_DEF_MEMLEVEL MAX_MEM_LEVEL
-struct deflate_ctx {
- struct z_stream_s comp_stream;
- struct z_stream_s decomp_stream;
+struct deflate_stream {
+ struct z_stream_s stream;
+ u8 workspace[];
};
-static int deflate_comp_init(struct deflate_ctx *ctx)
-{
- int ret = 0;
- struct z_stream_s *stream = &ctx->comp_stream;
-
- stream->workspace = vzalloc(zlib_deflate_workspacesize(
- -DEFLATE_DEF_WINBITS, MAX_MEM_LEVEL));
- if (!stream->workspace) {
- ret = -ENOMEM;
- goto out;
- }
- ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
- -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL,
- Z_DEFAULT_STRATEGY);
- if (ret != Z_OK) {
- ret = -EINVAL;
- goto out_free;
- }
-out:
- return ret;
-out_free:
- vfree(stream->workspace);
- goto out;
-}
+static DEFINE_MUTEX(deflate_stream_lock);
-static int deflate_decomp_init(struct deflate_ctx *ctx)
+static void *deflate_alloc_stream(void)
{
- int ret = 0;
- struct z_stream_s *stream = &ctx->decomp_stream;
+ size_t size = max(zlib_inflate_workspacesize(),
+ zlib_deflate_workspacesize(-DEFLATE_DEF_WINBITS,
+ DEFLATE_DEF_MEMLEVEL));
+ struct deflate_stream *ctx;
- stream->workspace = vzalloc(zlib_inflate_workspacesize());
- if (!stream->workspace) {
- ret = -ENOMEM;
- goto out;
- }
- ret = zlib_inflateInit2(stream, -DEFLATE_DEF_WINBITS);
- if (ret != Z_OK) {
- ret = -EINVAL;
- goto out_free;
- }
-out:
- return ret;
-out_free:
- vfree(stream->workspace);
- goto out;
-}
+ ctx = kvmalloc(sizeof(*ctx) + size, GFP_KERNEL);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
-static void deflate_comp_exit(struct deflate_ctx *ctx)
-{
- zlib_deflateEnd(&ctx->comp_stream);
- vfree(ctx->comp_stream.workspace);
-}
+ ctx->stream.workspace = ctx->workspace;
-static void deflate_decomp_exit(struct deflate_ctx *ctx)
-{
- zlib_inflateEnd(&ctx->decomp_stream);
- vfree(ctx->decomp_stream.workspace);
+ return ctx;
}
-static int __deflate_init(void *ctx)
+static struct crypto_acomp_streams deflate_streams = {
+ .alloc_ctx = deflate_alloc_stream,
+ .cfree_ctx = kvfree,
+};
+
+static int deflate_compress_one(struct acomp_req *req,
+ struct deflate_stream *ds)
{
+ struct z_stream_s *stream = &ds->stream;
+ struct acomp_walk walk;
int ret;
- ret = deflate_comp_init(ctx);
+ ret = acomp_walk_virt(&walk, req, true);
if (ret)
- goto out;
- ret = deflate_decomp_init(ctx);
- if (ret)
- deflate_comp_exit(ctx);
-out:
- return ret;
-}
+ return ret;
-static void *deflate_alloc_ctx(void)
-{
- struct deflate_ctx *ctx;
- int ret;
+ do {
+ unsigned int dcur;
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return ERR_PTR(-ENOMEM);
+ dcur = acomp_walk_next_dst(&walk);
+ if (!dcur)
+ return -ENOSPC;
- ret = __deflate_init(ctx);
- if (ret) {
- kfree(ctx);
- return ERR_PTR(ret);
- }
+ stream->avail_out = dcur;
+ stream->next_out = walk.dst.virt.addr;
- return ctx;
-}
+ do {
+ int flush = Z_FINISH;
+ unsigned int scur;
-static void __deflate_exit(void *ctx)
-{
- deflate_comp_exit(ctx);
- deflate_decomp_exit(ctx);
-}
+ stream->avail_in = 0;
+ stream->next_in = NULL;
-static void deflate_free_ctx(void *ctx)
-{
- __deflate_exit(ctx);
- kfree_sensitive(ctx);
+ scur = acomp_walk_next_src(&walk);
+ if (scur) {
+ if (acomp_walk_more_src(&walk, scur))
+ flush = Z_NO_FLUSH;
+ stream->avail_in = scur;
+ stream->next_in = walk.src.virt.addr;
+ }
+
+ ret = zlib_deflate(stream, flush);
+
+ if (scur) {
+ scur -= stream->avail_in;
+ acomp_walk_done_src(&walk, scur);
+ }
+ } while (ret == Z_OK && stream->avail_out);
+
+ acomp_walk_done_dst(&walk, dcur);
+ } while (ret == Z_OK);
+
+ if (ret != Z_STREAM_END)
+ return -EINVAL;
+
+ req->dlen = stream->total_out;
+ return 0;
}
-static int __deflate_compress(const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen, void *ctx)
+static int deflate_compress(struct acomp_req *req)
{
- int ret = 0;
- struct deflate_ctx *dctx = ctx;
- struct z_stream_s *stream = &dctx->comp_stream;
+ struct crypto_acomp_stream *s;
+ struct deflate_stream *ds;
+ int err;
+
+ s = crypto_acomp_lock_stream_bh(&deflate_streams);
+ ds = s->ctx;
- ret = zlib_deflateReset(stream);
- if (ret != Z_OK) {
- ret = -EINVAL;
+ err = zlib_deflateInit2(&ds->stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
+ -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL,
+ Z_DEFAULT_STRATEGY);
+ if (err != Z_OK) {
+ err = -EINVAL;
goto out;
}
- stream->next_in = (u8 *)src;
- stream->avail_in = slen;
- stream->next_out = (u8 *)dst;
- stream->avail_out = *dlen;
+ err = deflate_compress_one(req, ds);
- ret = zlib_deflate(stream, Z_FINISH);
- if (ret != Z_STREAM_END) {
- ret = -EINVAL;
- goto out;
- }
- ret = 0;
- *dlen = stream->total_out;
out:
- return ret;
+ crypto_acomp_unlock_stream_bh(s);
+
+ return err;
}
-static int deflate_scompress(struct crypto_scomp *tfm, const u8 *src,
- unsigned int slen, u8 *dst, unsigned int *dlen,
- void *ctx)
+static int deflate_decompress_one(struct acomp_req *req,
+ struct deflate_stream *ds)
{
- return __deflate_compress(src, slen, dst, dlen, ctx);
+ struct z_stream_s *stream = &ds->stream;
+ bool out_of_space = false;
+ struct acomp_walk walk;
+ int ret;
+
+ ret = acomp_walk_virt(&walk, req, true);
+ if (ret)
+ return ret;
+
+ do {
+ unsigned int scur;
+
+ stream->avail_in = 0;
+ stream->next_in = NULL;
+
+ scur = acomp_walk_next_src(&walk);
+ if (scur) {
+ stream->avail_in = scur;
+ stream->next_in = walk.src.virt.addr;
+ }
+
+ do {
+ unsigned int dcur;
+
+ dcur = acomp_walk_next_dst(&walk);
+ if (!dcur) {
+ out_of_space = true;
+ break;
+ }
+
+ stream->avail_out = dcur;
+ stream->next_out = walk.dst.virt.addr;
+
+ ret = zlib_inflate(stream, Z_NO_FLUSH);
+
+ dcur -= stream->avail_out;
+ acomp_walk_done_dst(&walk, dcur);
+ } while (ret == Z_OK && stream->avail_in);
+
+ if (scur)
+ acomp_walk_done_src(&walk, scur);
+
+ if (out_of_space)
+ return -ENOSPC;
+ } while (ret == Z_OK);
+
+ if (ret != Z_STREAM_END)
+ return -EINVAL;
+
+ req->dlen = stream->total_out;
+ return 0;
}
-static int __deflate_decompress(const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen, void *ctx)
+static int deflate_decompress(struct acomp_req *req)
{
+ struct crypto_acomp_stream *s;
+ struct deflate_stream *ds;
+ int err;
- int ret = 0;
- struct deflate_ctx *dctx = ctx;
- struct z_stream_s *stream = &dctx->decomp_stream;
+ s = crypto_acomp_lock_stream_bh(&deflate_streams);
+ ds = s->ctx;
- ret = zlib_inflateReset(stream);
- if (ret != Z_OK) {
- ret = -EINVAL;
+ err = zlib_inflateInit2(&ds->stream, -DEFLATE_DEF_WINBITS);
+ if (err != Z_OK) {
+ err = -EINVAL;
goto out;
}
- stream->next_in = (u8 *)src;
- stream->avail_in = slen;
- stream->next_out = (u8 *)dst;
- stream->avail_out = *dlen;
-
- ret = zlib_inflate(stream, Z_SYNC_FLUSH);
- /*
- * Work around a bug in zlib, which sometimes wants to taste an extra
- * byte when being used in the (undocumented) raw deflate mode.
- * (From USAGI).
- */
- if (ret == Z_OK && !stream->avail_in && stream->avail_out) {
- u8 zerostuff = 0;
- stream->next_in = &zerostuff;
- stream->avail_in = 1;
- ret = zlib_inflate(stream, Z_FINISH);
- }
- if (ret != Z_STREAM_END) {
- ret = -EINVAL;
- goto out;
- }
- ret = 0;
- *dlen = stream->total_out;
+ err = deflate_decompress_one(req, ds);
+
out:
- return ret;
+ crypto_acomp_unlock_stream_bh(s);
+
+ return err;
}
-static int deflate_sdecompress(struct crypto_scomp *tfm, const u8 *src,
- unsigned int slen, u8 *dst, unsigned int *dlen,
- void *ctx)
+static int deflate_init(struct crypto_acomp *tfm)
{
- return __deflate_decompress(src, slen, dst, dlen, ctx);
+ int ret;
+
+ mutex_lock(&deflate_stream_lock);
+ ret = crypto_acomp_alloc_streams(&deflate_streams);
+ mutex_unlock(&deflate_stream_lock);
+
+ return ret;
}
-static struct scomp_alg scomp = {
- .alloc_ctx = deflate_alloc_ctx,
- .free_ctx = deflate_free_ctx,
- .compress = deflate_scompress,
- .decompress = deflate_sdecompress,
- .base = {
- .cra_name = "deflate",
- .cra_driver_name = "deflate-scomp",
- .cra_module = THIS_MODULE,
- }
+static struct acomp_alg acomp = {
+ .compress = deflate_compress,
+ .decompress = deflate_decompress,
+ .init = deflate_init,
+ .base.cra_name = "deflate",
+ .base.cra_driver_name = "deflate-generic",
+ .base.cra_flags = CRYPTO_ALG_REQ_VIRT,
+ .base.cra_module = THIS_MODULE,
};
static int __init deflate_mod_init(void)
{
- return crypto_register_scomp(&scomp);
+ return crypto_register_acomp(&acomp);
}
static void __exit deflate_mod_fini(void)
{
- crypto_unregister_scomp(&scomp);
+ crypto_unregister_acomp(&acomp);
+ crypto_acomp_free_streams(&deflate_streams);
}
-subsys_initcall(deflate_mod_init);
+module_init(deflate_mod_init);
module_exit(deflate_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Deflate Compression Algorithm for IPCOMP");
MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>");
+MODULE_AUTHOR("Herbert Xu <herbert@gondor.apana.org.au>");
MODULE_ALIAS_CRYPTO("deflate");
MODULE_ALIAS_CRYPTO("deflate-generic");
diff --git a/crypto/des_generic.c b/crypto/des_generic.c
index 1274e18d3eb9..fce341400914 100644
--- a/crypto/des_generic.c
+++ b/crypto/des_generic.c
@@ -122,7 +122,7 @@ static void __exit des_generic_mod_fini(void)
crypto_unregister_algs(des_algs, ARRAY_SIZE(des_algs));
}
-subsys_initcall(des_generic_mod_init);
+module_init(des_generic_mod_init);
module_exit(des_generic_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/dh.c b/crypto/dh.c
index afc0fd847761..8250eeeebd0f 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -920,7 +920,7 @@ static void __exit dh_exit(void)
crypto_unregister_kpp(&dh);
}
-subsys_initcall(dh_init);
+module_init(dh_init);
module_exit(dh_exit);
MODULE_ALIAS_CRYPTO("dh");
MODULE_LICENSE("GPL");
diff --git a/crypto/drbg.c b/crypto/drbg.c
index f28dfc2511a2..dbe4c8bb5ceb 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -2132,7 +2132,7 @@ static void __exit drbg_exit(void)
crypto_unregister_rngs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2));
}
-subsys_initcall(drbg_init);
+module_init(drbg_init);
module_exit(drbg_exit);
#ifndef CRYPTO_DRBG_HASH_STRING
#define CRYPTO_DRBG_HASH_STRING ""
diff --git a/crypto/ecb.c b/crypto/ecb.c
index 95d7e972865a..cd1b20456dad 100644
--- a/crypto/ecb.c
+++ b/crypto/ecb.c
@@ -219,7 +219,7 @@ static void __exit crypto_ecb_module_exit(void)
crypto_unregister_template(&crypto_ecb_tmpl);
}
-subsys_initcall(crypto_ecb_module_init);
+module_init(crypto_ecb_module_init);
module_exit(crypto_ecb_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index 72cfd1590156..9f0b93b3166d 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -240,7 +240,7 @@ static void __exit ecdh_exit(void)
crypto_unregister_kpp(&ecdh_nist_p384);
}
-subsys_initcall(ecdh_init);
+module_init(ecdh_init);
module_exit(ecdh_exit);
MODULE_ALIAS_CRYPTO("ecdh");
MODULE_LICENSE("GPL");
diff --git a/crypto/ecdsa-p1363.c b/crypto/ecdsa-p1363.c
index 4454f1f8f33f..e0c55c64711c 100644
--- a/crypto/ecdsa-p1363.c
+++ b/crypto/ecdsa-p1363.c
@@ -21,7 +21,8 @@ static int ecdsa_p1363_verify(struct crypto_sig *tfm,
const void *digest, unsigned int dlen)
{
struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm);
- unsigned int keylen = crypto_sig_keysize(ctx->child);
+ unsigned int keylen = DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child),
+ BITS_PER_BYTE);
unsigned int ndigits = DIV_ROUND_UP_POW2(keylen, sizeof(u64));
struct ecdsa_raw_sig sig;
@@ -45,7 +46,8 @@ static unsigned int ecdsa_p1363_max_size(struct crypto_sig *tfm)
{
struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm);
- return 2 * crypto_sig_keysize(ctx->child);
+ return 2 * DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child),
+ BITS_PER_BYTE);
}
static unsigned int ecdsa_p1363_digest_size(struct crypto_sig *tfm)
diff --git a/crypto/ecdsa-x962.c b/crypto/ecdsa-x962.c
index 90a04f4b9a2f..ee71594d10a0 100644
--- a/crypto/ecdsa-x962.c
+++ b/crypto/ecdsa-x962.c
@@ -82,7 +82,7 @@ static int ecdsa_x962_verify(struct crypto_sig *tfm,
int err;
sig_ctx.ndigits = DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child),
- sizeof(u64));
+ sizeof(u64) * BITS_PER_BYTE);
err = asn1_ber_decoder(&ecdsasignature_decoder, &sig_ctx, src, slen);
if (err < 0)
@@ -103,7 +103,8 @@ static unsigned int ecdsa_x962_max_size(struct crypto_sig *tfm)
{
struct ecdsa_x962_ctx *ctx = crypto_sig_ctx(tfm);
struct sig_alg *alg = crypto_sig_alg(ctx->child);
- int slen = crypto_sig_keysize(ctx->child);
+ int slen = DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child),
+ BITS_PER_BYTE);
/*
* Verify takes ECDSA-Sig-Value (described in RFC 5480) as input,
diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c
index 117526d15dde..ce8e4364842f 100644
--- a/crypto/ecdsa.c
+++ b/crypto/ecdsa.c
@@ -167,7 +167,7 @@ static unsigned int ecdsa_key_size(struct crypto_sig *tfm)
{
struct ecc_ctx *ctx = crypto_sig_ctx(tfm);
- return DIV_ROUND_UP(ctx->curve->nbits, 8);
+ return ctx->curve->nbits;
}
static unsigned int ecdsa_digest_size(struct crypto_sig *tfm)
@@ -334,7 +334,7 @@ static void __exit ecdsa_exit(void)
crypto_unregister_sig(&ecdsa_nist_p521);
}
-subsys_initcall(ecdsa_init);
+module_init(ecdsa_init);
module_exit(ecdsa_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/echainiv.c b/crypto/echainiv.c
index 69686668625e..e0a2d3209938 100644
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -32,7 +32,6 @@ static int echainiv_encrypt(struct aead_request *req)
u64 seqno;
u8 *info;
unsigned int ivsize = crypto_aead_ivsize(geniv);
- int err;
if (req->cryptlen < ivsize)
return -EINVAL;
@@ -41,20 +40,9 @@ static int echainiv_encrypt(struct aead_request *req)
info = req->iv;
- if (req->src != req->dst) {
- SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
-
- skcipher_request_set_sync_tfm(nreq, ctx->sknull);
- skcipher_request_set_callback(nreq, req->base.flags,
- NULL, NULL);
- skcipher_request_set_crypt(nreq, req->src, req->dst,
- req->assoclen + req->cryptlen,
- NULL);
-
- err = crypto_skcipher_encrypt(nreq);
- if (err)
- return err;
- }
+ if (req->src != req->dst)
+ memcpy_sglist(req->dst, req->src,
+ req->assoclen + req->cryptlen);
aead_request_set_callback(subreq, req->base.flags,
req->base.complete, req->base.data);
@@ -157,7 +145,7 @@ static void __exit echainiv_module_exit(void)
crypto_unregister_template(&echainiv_tmpl);
}
-subsys_initcall(echainiv_module_init);
+module_init(echainiv_module_init);
module_exit(echainiv_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c
index b3dd8a3ddeb7..2c0602f0cd40 100644
--- a/crypto/ecrdsa.c
+++ b/crypto/ecrdsa.c
@@ -249,7 +249,7 @@ static unsigned int ecrdsa_key_size(struct crypto_sig *tfm)
* Verify doesn't need any output, so it's just informational
* for keyctl to determine the key bit size.
*/
- return ctx->pub_key.ndigits * sizeof(u64);
+ return ctx->pub_key.ndigits * sizeof(u64) * BITS_PER_BYTE;
}
static unsigned int ecrdsa_max_size(struct crypto_sig *tfm)
diff --git a/crypto/essiv.c b/crypto/essiv.c
index ec0ec8992c2d..d003b78fcd85 100644
--- a/crypto/essiv.c
+++ b/crypto/essiv.c
@@ -548,8 +548,7 @@ static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb)
}
/* record the driver name so we can instantiate this exact algo later */
- strscpy(ictx->shash_driver_name, hash_alg->base.cra_driver_name,
- CRYPTO_MAX_ALG_NAME);
+ strscpy(ictx->shash_driver_name, hash_alg->base.cra_driver_name);
/* Instance fields */
@@ -642,7 +641,7 @@ static void __exit essiv_module_exit(void)
crypto_unregister_template(&essiv_tmpl);
}
-subsys_initcall(essiv_module_init);
+module_init(essiv_module_init);
module_exit(essiv_module_exit);
MODULE_DESCRIPTION("ESSIV skcipher/aead wrapper for block encryption");
diff --git a/crypto/fcrypt.c b/crypto/fcrypt.c
index 95a16e88899b..80036835cec5 100644
--- a/crypto/fcrypt.c
+++ b/crypto/fcrypt.c
@@ -411,7 +411,7 @@ static void __exit fcrypt_mod_fini(void)
crypto_unregister_alg(&fcrypt_alg);
}
-subsys_initcall(fcrypt_mod_init);
+module_init(fcrypt_mod_init);
module_exit(fcrypt_mod_fini);
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/crypto/fips.c b/crypto/fips.c
index 2fa3a9ee61a1..e88a604cb42b 100644
--- a/crypto/fips.c
+++ b/crypto/fips.c
@@ -95,5 +95,5 @@ static void __exit fips_exit(void)
crypto_proc_fips_exit();
}
-subsys_initcall(fips_init);
+module_init(fips_init);
module_exit(fips_exit);
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 84f7c23d14e4..97716482bed0 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -9,7 +9,6 @@
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/internal/hash.h>
-#include <crypto/null.h>
#include <crypto/scatterwalk.h>
#include <crypto/gcm.h>
#include <crypto/hash.h>
@@ -46,7 +45,6 @@ struct crypto_rfc4543_instance_ctx {
struct crypto_rfc4543_ctx {
struct crypto_aead *child;
- struct crypto_sync_skcipher *null;
u8 nonce[4];
};
@@ -79,8 +77,6 @@ static struct {
struct scatterlist sg;
} *gcm_zeroes;
-static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc);
-
static inline struct crypto_gcm_req_priv_ctx *crypto_gcm_reqctx(
struct aead_request *req)
{
@@ -930,12 +926,12 @@ static int crypto_rfc4543_crypt(struct aead_request *req, bool enc)
unsigned int authsize = crypto_aead_authsize(aead);
u8 *iv = PTR_ALIGN((u8 *)(rctx + 1) + crypto_aead_reqsize(ctx->child),
crypto_aead_alignmask(ctx->child) + 1);
- int err;
if (req->src != req->dst) {
- err = crypto_rfc4543_copy_src_to_dst(req, enc);
- if (err)
- return err;
+ unsigned int nbytes = req->assoclen + req->cryptlen -
+ (enc ? 0 : authsize);
+
+ memcpy_sglist(req->dst, req->src, nbytes);
}
memcpy(iv, ctx->nonce, 4);
@@ -952,22 +948,6 @@ static int crypto_rfc4543_crypt(struct aead_request *req, bool enc)
return enc ? crypto_aead_encrypt(subreq) : crypto_aead_decrypt(subreq);
}
-static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(aead);
- unsigned int authsize = crypto_aead_authsize(aead);
- unsigned int nbytes = req->assoclen + req->cryptlen -
- (enc ? 0 : authsize);
- SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
-
- skcipher_request_set_sync_tfm(nreq, ctx->null);
- skcipher_request_set_callback(nreq, req->base.flags, NULL, NULL);
- skcipher_request_set_crypt(nreq, req->src, req->dst, nbytes, NULL);
-
- return crypto_skcipher_encrypt(nreq);
-}
-
static int crypto_rfc4543_encrypt(struct aead_request *req)
{
return crypto_ipsec_check_assoclen(req->assoclen) ?:
@@ -987,21 +967,13 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
struct crypto_aead_spawn *spawn = &ictx->aead;
struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
struct crypto_aead *aead;
- struct crypto_sync_skcipher *null;
unsigned long align;
- int err = 0;
aead = crypto_spawn_aead(spawn);
if (IS_ERR(aead))
return PTR_ERR(aead);
- null = crypto_get_default_null_skcipher();
- err = PTR_ERR(null);
- if (IS_ERR(null))
- goto err_free_aead;
-
ctx->child = aead;
- ctx->null = null;
align = crypto_aead_alignmask(aead);
align &= ~(crypto_tfm_ctx_alignment() - 1);
@@ -1012,10 +984,6 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
align + GCM_AES_IV_SIZE);
return 0;
-
-err_free_aead:
- crypto_free_aead(aead);
- return err;
}
static void crypto_rfc4543_exit_tfm(struct crypto_aead *tfm)
@@ -1023,7 +991,6 @@ static void crypto_rfc4543_exit_tfm(struct crypto_aead *tfm)
struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
crypto_free_aead(ctx->child);
- crypto_put_default_null_skcipher();
}
static void crypto_rfc4543_free(struct aead_instance *inst)
@@ -1152,7 +1119,7 @@ static void __exit crypto_gcm_module_exit(void)
ARRAY_SIZE(crypto_gcm_tmpls));
}
-subsys_initcall(crypto_gcm_module_init);
+module_init(crypto_gcm_module_init);
module_exit(crypto_gcm_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/geniv.c b/crypto/geniv.c
index bee4621b4f12..42eff6a7387c 100644
--- a/crypto/geniv.c
+++ b/crypto/geniv.c
@@ -9,7 +9,6 @@
#include <crypto/internal/geniv.h>
#include <crypto/internal/rng.h>
-#include <crypto/null.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -125,15 +124,10 @@ int aead_init_geniv(struct crypto_aead *aead)
if (err)
goto out;
- ctx->sknull = crypto_get_default_null_skcipher();
- err = PTR_ERR(ctx->sknull);
- if (IS_ERR(ctx->sknull))
- goto out;
-
child = crypto_spawn_aead(aead_instance_ctx(inst));
err = PTR_ERR(child);
if (IS_ERR(child))
- goto drop_null;
+ goto out;
ctx->child = child;
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(child) +
@@ -143,10 +137,6 @@ int aead_init_geniv(struct crypto_aead *aead)
out:
return err;
-
-drop_null:
- crypto_put_default_null_skcipher();
- goto out;
}
EXPORT_SYMBOL_GPL(aead_init_geniv);
@@ -155,7 +145,6 @@ void aead_exit_geniv(struct crypto_aead *tfm)
struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
crypto_free_aead(ctx->child);
- crypto_put_default_null_skcipher();
}
EXPORT_SYMBOL_GPL(aead_exit_geniv);
diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c
index c70d163c1ac9..e5803c249c12 100644
--- a/crypto/ghash-generic.c
+++ b/crypto/ghash-generic.c
@@ -34,14 +34,14 @@
* (https://csrc.nist.gov/publications/detail/sp/800-38d/final)
*/
-#include <crypto/algapi.h>
#include <crypto/gf128mul.h>
#include <crypto/ghash.h>
#include <crypto/internal/hash.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
+#include <crypto/utils.h>
+#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/string.h>
static int ghash_init(struct shash_desc *desc)
{
@@ -82,59 +82,36 @@ static int ghash_update(struct shash_desc *desc,
struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
u8 *dst = dctx->buffer;
- if (dctx->bytes) {
- int n = min(srclen, dctx->bytes);
- u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
-
- dctx->bytes -= n;
- srclen -= n;
-
- while (n--)
- *pos++ ^= *src++;
-
- if (!dctx->bytes)
- gf128mul_4k_lle((be128 *)dst, ctx->gf128);
- }
-
- while (srclen >= GHASH_BLOCK_SIZE) {
+ do {
crypto_xor(dst, src, GHASH_BLOCK_SIZE);
gf128mul_4k_lle((be128 *)dst, ctx->gf128);
src += GHASH_BLOCK_SIZE;
srclen -= GHASH_BLOCK_SIZE;
- }
-
- if (srclen) {
- dctx->bytes = GHASH_BLOCK_SIZE - srclen;
- while (srclen--)
- *dst++ ^= *src++;
- }
+ } while (srclen >= GHASH_BLOCK_SIZE);
- return 0;
+ return srclen;
}
-static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+static void ghash_flush(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
{
+ struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
u8 *dst = dctx->buffer;
- if (dctx->bytes) {
- u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
-
- while (dctx->bytes--)
- *tmp++ ^= 0;
-
+ if (len) {
+ crypto_xor(dst, src, len);
gf128mul_4k_lle((be128 *)dst, ctx->gf128);
}
-
- dctx->bytes = 0;
}
-static int ghash_final(struct shash_desc *desc, u8 *dst)
+static int ghash_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *dst)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
u8 *buf = dctx->buffer;
- ghash_flush(ctx, dctx);
+ ghash_flush(desc, src, len);
memcpy(dst, buf, GHASH_BLOCK_SIZE);
return 0;
@@ -151,13 +128,14 @@ static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
- .final = ghash_final,
+ .finup = ghash_finup,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
.base = {
.cra_name = "ghash",
.cra_driver_name = "ghash-generic",
.cra_priority = 100,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_ctx),
.cra_module = THIS_MODULE,
@@ -175,7 +153,7 @@ static void __exit ghash_mod_exit(void)
crypto_unregister_shash(&ghash_alg);
}
-subsys_initcall(ghash_mod_init);
+module_init(ghash_mod_init);
module_exit(ghash_mod_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/hctr2.c b/crypto/hctr2.c
index cbcd673be481..c8932777bba8 100644
--- a/crypto/hctr2.c
+++ b/crypto/hctr2.c
@@ -570,7 +570,7 @@ static void __exit hctr2_module_exit(void)
ARRAY_SIZE(hctr2_tmpls));
}
-subsys_initcall(hctr2_module_init);
+module_init(hctr2_module_init);
module_exit(hctr2_module_exit);
MODULE_DESCRIPTION("HCTR2 length-preserving encryption mode");
diff --git a/crypto/hkdf.c b/crypto/hkdf.c
index 2434c5c42545..f24c2a8d4df9 100644
--- a/crypto/hkdf.c
+++ b/crypto/hkdf.c
@@ -543,7 +543,7 @@ static int __init crypto_hkdf_module_init(void)
{
int ret = 0, i;
- if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
+ if (!IS_ENABLED(CONFIG_CRYPTO_SELFTESTS))
return 0;
for (i = 0; i < ARRAY_SIZE(hkdf_sha256_tv); i++) {
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 7cec25ff9889..148af460ae97 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -13,13 +13,11 @@
#include <crypto/hmac.h>
#include <crypto/internal/hash.h>
-#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/fips.h>
-#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/scatterlist.h>
+#include <linux/slab.h>
#include <linux/string.h>
struct hmac_ctx {
@@ -28,6 +26,12 @@ struct hmac_ctx {
u8 pads[];
};
+struct ahash_hmac_ctx {
+ struct crypto_ahash *hash;
+ /* Contains 'u8 ipad[statesize];', then 'u8 opad[statesize];' */
+ u8 pads[];
+};
+
static int hmac_setkey(struct crypto_shash *parent,
const u8 *inkey, unsigned int keylen)
{
@@ -39,7 +43,7 @@ static int hmac_setkey(struct crypto_shash *parent,
u8 *ipad = &tctx->pads[0];
u8 *opad = &tctx->pads[ss];
SHASH_DESC_ON_STACK(shash, hash);
- unsigned int i;
+ int err, i;
if (fips_enabled && (keylen < 112 / 8))
return -EINVAL;
@@ -65,12 +69,14 @@ static int hmac_setkey(struct crypto_shash *parent,
opad[i] ^= HMAC_OPAD_VALUE;
}
- return crypto_shash_init(shash) ?:
- crypto_shash_update(shash, ipad, bs) ?:
- crypto_shash_export(shash, ipad) ?:
- crypto_shash_init(shash) ?:
- crypto_shash_update(shash, opad, bs) ?:
- crypto_shash_export(shash, opad);
+ err = crypto_shash_init(shash) ?:
+ crypto_shash_update(shash, ipad, bs) ?:
+ crypto_shash_export(shash, ipad) ?:
+ crypto_shash_init(shash) ?:
+ crypto_shash_update(shash, opad, bs) ?:
+ crypto_shash_export(shash, opad);
+ shash_desc_zero(shash);
+ return err;
}
static int hmac_export(struct shash_desc *pdesc, void *out)
@@ -90,6 +96,22 @@ static int hmac_import(struct shash_desc *pdesc, const void *in)
return crypto_shash_import(desc, in);
}
+static int hmac_export_core(struct shash_desc *pdesc, void *out)
+{
+ struct shash_desc *desc = shash_desc_ctx(pdesc);
+
+ return crypto_shash_export_core(desc, out);
+}
+
+static int hmac_import_core(struct shash_desc *pdesc, const void *in)
+{
+ const struct hmac_ctx *tctx = crypto_shash_ctx(pdesc->tfm);
+ struct shash_desc *desc = shash_desc_ctx(pdesc);
+
+ desc->tfm = tctx->hash;
+ return crypto_shash_import_core(desc, in);
+}
+
static int hmac_init(struct shash_desc *pdesc)
{
const struct hmac_ctx *tctx = crypto_shash_ctx(pdesc->tfm);
@@ -105,20 +127,6 @@ static int hmac_update(struct shash_desc *pdesc,
return crypto_shash_update(desc, data, nbytes);
}
-static int hmac_final(struct shash_desc *pdesc, u8 *out)
-{
- struct crypto_shash *parent = pdesc->tfm;
- int ds = crypto_shash_digestsize(parent);
- int ss = crypto_shash_statesize(parent);
- const struct hmac_ctx *tctx = crypto_shash_ctx(parent);
- const u8 *opad = &tctx->pads[ss];
- struct shash_desc *desc = shash_desc_ctx(pdesc);
-
- return crypto_shash_final(desc, out) ?:
- crypto_shash_import(desc, opad) ?:
- crypto_shash_finup(desc, out, ds, out);
-}
-
static int hmac_finup(struct shash_desc *pdesc, const u8 *data,
unsigned int nbytes, u8 *out)
{
@@ -146,9 +154,6 @@ static int hmac_init_tfm(struct crypto_shash *parent)
if (IS_ERR(hash))
return PTR_ERR(hash);
- parent->descsize = sizeof(struct shash_desc) +
- crypto_shash_descsize(hash);
-
tctx->hash = hash;
return 0;
}
@@ -174,26 +179,23 @@ static void hmac_exit_tfm(struct crypto_shash *parent)
crypto_free_shash(tctx->hash);
}
-static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
+static int __hmac_create_shash(struct crypto_template *tmpl,
+ struct rtattr **tb, u32 mask)
{
struct shash_instance *inst;
struct crypto_shash_spawn *spawn;
struct crypto_alg *alg;
struct shash_alg *salg;
- u32 mask;
int err;
int ds;
int ss;
- err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH, &mask);
- if (err)
- return err;
-
inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
if (!inst)
return -ENOMEM;
spawn = shash_instance_ctx(inst);
+ mask |= CRYPTO_AHASH_ALG_NO_EXPORT_CORE;
err = crypto_grab_shash(spawn, shash_crypto_instance(inst),
crypto_attr_alg_name(tb[1]), 0, mask);
if (err)
@@ -212,7 +214,8 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
ss < alg->cra_blocksize)
goto err_free_inst;
- err = crypto_inst_setname(shash_crypto_instance(inst), tmpl->name, alg);
+ err = crypto_inst_setname(shash_crypto_instance(inst), "hmac",
+ "hmac-shash", alg);
if (err)
goto err_free_inst;
@@ -222,12 +225,14 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.digestsize = ds;
inst->alg.statesize = ss;
+ inst->alg.descsize = sizeof(struct shash_desc) + salg->descsize;
inst->alg.init = hmac_init;
inst->alg.update = hmac_update;
- inst->alg.final = hmac_final;
inst->alg.finup = hmac_finup;
inst->alg.export = hmac_export;
inst->alg.import = hmac_import;
+ inst->alg.export_core = hmac_export_core;
+ inst->alg.import_core = hmac_import_core;
inst->alg.setkey = hmac_setkey;
inst->alg.init_tfm = hmac_init_tfm;
inst->alg.clone_tfm = hmac_clone_tfm;
@@ -243,23 +248,332 @@ err_free_inst:
return err;
}
-static struct crypto_template hmac_tmpl = {
- .name = "hmac",
- .create = hmac_create,
- .module = THIS_MODULE,
+static int hmac_setkey_ahash(struct crypto_ahash *parent,
+ const u8 *inkey, unsigned int keylen)
+{
+ struct ahash_hmac_ctx *tctx = crypto_ahash_ctx(parent);
+ struct crypto_ahash *fb = crypto_ahash_fb(tctx->hash);
+ int ds = crypto_ahash_digestsize(parent);
+ int bs = crypto_ahash_blocksize(parent);
+ int ss = crypto_ahash_statesize(parent);
+ HASH_REQUEST_ON_STACK(req, fb);
+ u8 *opad = &tctx->pads[ss];
+ u8 *ipad = &tctx->pads[0];
+ int err, i;
+
+ if (fips_enabled && (keylen < 112 / 8))
+ return -EINVAL;
+
+ ahash_request_set_callback(req, 0, NULL, NULL);
+
+ if (keylen > bs) {
+ ahash_request_set_virt(req, inkey, ipad, keylen);
+ err = crypto_ahash_digest(req);
+ if (err)
+ goto out_zero_req;
+
+ keylen = ds;
+ } else
+ memcpy(ipad, inkey, keylen);
+
+ memset(ipad + keylen, 0, bs - keylen);
+ memcpy(opad, ipad, bs);
+
+ for (i = 0; i < bs; i++) {
+ ipad[i] ^= HMAC_IPAD_VALUE;
+ opad[i] ^= HMAC_OPAD_VALUE;
+ }
+
+ ahash_request_set_virt(req, ipad, NULL, bs);
+ err = crypto_ahash_init(req) ?:
+ crypto_ahash_update(req) ?:
+ crypto_ahash_export(req, ipad);
+
+ ahash_request_set_virt(req, opad, NULL, bs);
+ err = err ?:
+ crypto_ahash_init(req) ?:
+ crypto_ahash_update(req) ?:
+ crypto_ahash_export(req, opad);
+
+out_zero_req:
+ HASH_REQUEST_ZERO(req);
+ return err;
+}
+
+static int hmac_export_ahash(struct ahash_request *preq, void *out)
+{
+ return crypto_ahash_export(ahash_request_ctx(preq), out);
+}
+
+static int hmac_import_ahash(struct ahash_request *preq, const void *in)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(preq);
+ struct ahash_hmac_ctx *tctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *req = ahash_request_ctx(preq);
+
+ ahash_request_set_tfm(req, tctx->hash);
+ return crypto_ahash_import(req, in);
+}
+
+static int hmac_export_core_ahash(struct ahash_request *preq, void *out)
+{
+ return crypto_ahash_export_core(ahash_request_ctx(preq), out);
+}
+
+static int hmac_import_core_ahash(struct ahash_request *preq, const void *in)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(preq);
+ struct ahash_hmac_ctx *tctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *req = ahash_request_ctx(preq);
+
+ ahash_request_set_tfm(req, tctx->hash);
+ return crypto_ahash_import_core(req, in);
+}
+
+static int hmac_init_ahash(struct ahash_request *preq)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(preq);
+ struct ahash_hmac_ctx *tctx = crypto_ahash_ctx(tfm);
+
+ return hmac_import_ahash(preq, &tctx->pads[0]);
+}
+
+static int hmac_update_ahash(struct ahash_request *preq)
+{
+ struct ahash_request *req = ahash_request_ctx(preq);
+
+ ahash_request_set_callback(req, ahash_request_flags(preq),
+ preq->base.complete, preq->base.data);
+ if (ahash_request_isvirt(preq))
+ ahash_request_set_virt(req, preq->svirt, NULL, preq->nbytes);
+ else
+ ahash_request_set_crypt(req, preq->src, NULL, preq->nbytes);
+ return crypto_ahash_update(req);
+}
+
+static int hmac_finup_finish(struct ahash_request *preq, unsigned int mask)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(preq);
+ struct ahash_request *req = ahash_request_ctx(preq);
+ struct ahash_hmac_ctx *tctx = crypto_ahash_ctx(tfm);
+ int ds = crypto_ahash_digestsize(tfm);
+ int ss = crypto_ahash_statesize(tfm);
+ const u8 *opad = &tctx->pads[ss];
+
+ ahash_request_set_callback(req, ahash_request_flags(preq) & ~mask,
+ preq->base.complete, preq->base.data);
+ ahash_request_set_virt(req, preq->result, preq->result, ds);
+ return crypto_ahash_import(req, opad) ?:
+ crypto_ahash_finup(req);
+
+}
+
+static void hmac_finup_done(void *data, int err)
+{
+ struct ahash_request *preq = data;
+
+ if (err)
+ goto out;
+
+ err = hmac_finup_finish(preq, CRYPTO_TFM_REQ_MAY_SLEEP);
+ if (err == -EINPROGRESS || err == -EBUSY)
+ return;
+
+out:
+ ahash_request_complete(preq, err);
+}
+
+static int hmac_finup_ahash(struct ahash_request *preq)
+{
+ struct ahash_request *req = ahash_request_ctx(preq);
+
+ ahash_request_set_callback(req, ahash_request_flags(preq),
+ hmac_finup_done, preq);
+ if (ahash_request_isvirt(preq))
+ ahash_request_set_virt(req, preq->svirt, preq->result,
+ preq->nbytes);
+ else
+ ahash_request_set_crypt(req, preq->src, preq->result,
+ preq->nbytes);
+ return crypto_ahash_finup(req) ?:
+ hmac_finup_finish(preq, 0);
+}
+
+static int hmac_digest_ahash(struct ahash_request *preq)
+{
+ return hmac_init_ahash(preq) ?:
+ hmac_finup_ahash(preq);
+}
+
+static int hmac_init_ahash_tfm(struct crypto_ahash *parent)
+{
+ struct ahash_instance *inst = ahash_alg_instance(parent);
+ struct ahash_hmac_ctx *tctx = crypto_ahash_ctx(parent);
+ struct crypto_ahash *hash;
+
+ hash = crypto_spawn_ahash(ahash_instance_ctx(inst));
+ if (IS_ERR(hash))
+ return PTR_ERR(hash);
+
+ if (crypto_ahash_reqsize(parent) < sizeof(struct ahash_request) +
+ crypto_ahash_reqsize(hash))
+ return -EINVAL;
+
+ tctx->hash = hash;
+ return 0;
+}
+
+static int hmac_clone_ahash_tfm(struct crypto_ahash *dst,
+ struct crypto_ahash *src)
+{
+ struct ahash_hmac_ctx *sctx = crypto_ahash_ctx(src);
+ struct ahash_hmac_ctx *dctx = crypto_ahash_ctx(dst);
+ struct crypto_ahash *hash;
+
+ hash = crypto_clone_ahash(sctx->hash);
+ if (IS_ERR(hash))
+ return PTR_ERR(hash);
+
+ dctx->hash = hash;
+ return 0;
+}
+
+static void hmac_exit_ahash_tfm(struct crypto_ahash *parent)
+{
+ struct ahash_hmac_ctx *tctx = crypto_ahash_ctx(parent);
+
+ crypto_free_ahash(tctx->hash);
+}
+
+static int hmac_create_ahash(struct crypto_template *tmpl, struct rtattr **tb,
+ u32 mask)
+{
+ struct crypto_ahash_spawn *spawn;
+ struct ahash_instance *inst;
+ struct crypto_alg *alg;
+ struct hash_alg_common *halg;
+ int ds, ss, err;
+
+ inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+ spawn = ahash_instance_ctx(inst);
+
+ mask |= CRYPTO_AHASH_ALG_NO_EXPORT_CORE;
+ err = crypto_grab_ahash(spawn, ahash_crypto_instance(inst),
+ crypto_attr_alg_name(tb[1]), 0, mask);
+ if (err)
+ goto err_free_inst;
+ halg = crypto_spawn_ahash_alg(spawn);
+ alg = &halg->base;
+
+ /* The underlying hash algorithm must not require a key */
+ err = -EINVAL;
+ if (crypto_hash_alg_needs_key(halg))
+ goto err_free_inst;
+
+ ds = halg->digestsize;
+ ss = halg->statesize;
+ if (ds > alg->cra_blocksize || ss < alg->cra_blocksize)
+ goto err_free_inst;
+
+ err = crypto_inst_setname(ahash_crypto_instance(inst), tmpl->name, alg);
+ if (err)
+ goto err_free_inst;
+
+ inst->alg.halg.base.cra_flags = alg->cra_flags &
+ CRYPTO_ALG_INHERITED_FLAGS;
+ inst->alg.halg.base.cra_flags |= CRYPTO_ALG_REQ_VIRT;
+ inst->alg.halg.base.cra_priority = alg->cra_priority + 100;
+ inst->alg.halg.base.cra_blocksize = alg->cra_blocksize;
+ inst->alg.halg.base.cra_ctxsize = sizeof(struct ahash_hmac_ctx) +
+ (ss * 2);
+ inst->alg.halg.base.cra_reqsize = sizeof(struct ahash_request) +
+ alg->cra_reqsize;
+
+ inst->alg.halg.digestsize = ds;
+ inst->alg.halg.statesize = ss;
+ inst->alg.init = hmac_init_ahash;
+ inst->alg.update = hmac_update_ahash;
+ inst->alg.finup = hmac_finup_ahash;
+ inst->alg.digest = hmac_digest_ahash;
+ inst->alg.export = hmac_export_ahash;
+ inst->alg.import = hmac_import_ahash;
+ inst->alg.export_core = hmac_export_core_ahash;
+ inst->alg.import_core = hmac_import_core_ahash;
+ inst->alg.setkey = hmac_setkey_ahash;
+ inst->alg.init_tfm = hmac_init_ahash_tfm;
+ inst->alg.clone_tfm = hmac_clone_ahash_tfm;
+ inst->alg.exit_tfm = hmac_exit_ahash_tfm;
+
+ inst->free = ahash_free_singlespawn_instance;
+
+ err = ahash_register_instance(tmpl, inst);
+ if (err) {
+err_free_inst:
+ ahash_free_singlespawn_instance(inst);
+ }
+ return err;
+}
+
+static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+ struct crypto_attr_type *algt;
+ u32 mask;
+
+ algt = crypto_get_attr_type(tb);
+ if (IS_ERR(algt))
+ return PTR_ERR(algt);
+
+ mask = crypto_algt_inherited_mask(algt);
+
+ if (!((algt->type ^ CRYPTO_ALG_TYPE_AHASH) &
+ algt->mask & CRYPTO_ALG_TYPE_MASK))
+ return hmac_create_ahash(tmpl, tb, mask);
+
+ if ((algt->type ^ CRYPTO_ALG_TYPE_SHASH) &
+ algt->mask & CRYPTO_ALG_TYPE_MASK)
+ return -EINVAL;
+
+ return __hmac_create_shash(tmpl, tb, mask);
+}
+
+static int hmac_create_shash(struct crypto_template *tmpl, struct rtattr **tb)
+{
+ u32 mask;
+ int err;
+
+ err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH, &mask);
+ if (err)
+ return err == -EINVAL ? -ENOENT : err;
+
+ return __hmac_create_shash(tmpl, tb, mask);
+}
+
+static struct crypto_template hmac_tmpls[] = {
+ {
+ .name = "hmac",
+ .create = hmac_create,
+ .module = THIS_MODULE,
+ },
+ {
+ .name = "hmac-shash",
+ .create = hmac_create_shash,
+ .module = THIS_MODULE,
+ },
};
static int __init hmac_module_init(void)
{
- return crypto_register_template(&hmac_tmpl);
+ return crypto_register_templates(hmac_tmpls, ARRAY_SIZE(hmac_tmpls));
}
static void __exit hmac_module_exit(void)
{
- crypto_unregister_template(&hmac_tmpl);
+ crypto_unregister_templates(hmac_tmpls, ARRAY_SIZE(hmac_tmpls));
}
-subsys_initcall(hmac_module_init);
+module_init(hmac_module_init);
module_exit(hmac_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/internal.h b/crypto/internal.h
index 11567ea24fc3..b9afd68767c1 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -46,6 +46,7 @@ struct crypto_type {
unsigned int maskclear;
unsigned int maskset;
unsigned int tfmsize;
+ unsigned int algsize;
};
enum {
@@ -66,8 +67,7 @@ extern struct blocking_notifier_head crypto_chain;
int alg_test(const char *driver, const char *alg, u32 type, u32 mask);
-#if !IS_BUILTIN(CONFIG_CRYPTO_ALGAPI) || \
- IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)
+#if !IS_BUILTIN(CONFIG_CRYPTO_ALGAPI) || !IS_ENABLED(CONFIG_CRYPTO_SELFTESTS)
static inline bool crypto_boot_test_finished(void)
{
return true;
@@ -86,7 +86,7 @@ static inline void set_crypto_boot_test_finished(void)
static_branch_enable(&__crypto_boot_test_finished);
}
#endif /* !IS_BUILTIN(CONFIG_CRYPTO_ALGAPI) ||
- * IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)
+ * !IS_ENABLED(CONFIG_CRYPTO_SELFTESTS)
*/
#ifdef CONFIG_PROC_FS
@@ -128,7 +128,6 @@ void *crypto_create_tfm_node(struct crypto_alg *alg,
const struct crypto_type *frontend, int node);
void *crypto_clone_tfm(const struct crypto_type *frontend,
struct crypto_tfm *otfm);
-void crypto_destroy_alg(struct crypto_alg *alg);
static inline void *crypto_create_tfm(struct crypto_alg *alg,
const struct crypto_type *frontend)
@@ -163,6 +162,8 @@ static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
return alg;
}
+void crypto_destroy_alg(struct crypto_alg *alg);
+
static inline void crypto_alg_put(struct crypto_alg *alg)
{
if (refcount_dec_and_test(&alg->cra_refcnt))
diff --git a/crypto/kdf_sp800108.c b/crypto/kdf_sp800108.c
index c3f9938e1ad2..b7a6bf9da773 100644
--- a/crypto/kdf_sp800108.c
+++ b/crypto/kdf_sp800108.c
@@ -127,7 +127,7 @@ static int __init crypto_kdf108_init(void)
{
int ret;
- if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
+ if (!IS_ENABLED(CONFIG_CRYPTO_SELFTESTS))
return 0;
ret = kdf_test(&kdf_ctr_hmac_sha256_tv_template[0], "hmac(sha256)",
diff --git a/crypto/khazad.c b/crypto/khazad.c
index 7ad338ca2c18..024264ee9cd1 100644
--- a/crypto/khazad.c
+++ b/crypto/khazad.c
@@ -871,7 +871,7 @@ static void __exit khazad_mod_fini(void)
}
-subsys_initcall(khazad_mod_init);
+module_init(khazad_mod_init);
module_exit(khazad_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/kpp.c b/crypto/kpp.c
index ecc63a1a948d..2e0cefe7a25f 100644
--- a/crypto/kpp.c
+++ b/crypto/kpp.c
@@ -80,6 +80,7 @@ static const struct crypto_type crypto_kpp_type = {
.maskset = CRYPTO_ALG_TYPE_MASK,
.type = CRYPTO_ALG_TYPE_KPP,
.tfmsize = offsetof(struct crypto_kpp, base),
+ .algsize = offsetof(struct kpp_alg, base),
};
struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask)
diff --git a/crypto/krb5enc.c b/crypto/krb5enc.c
index d07769bf149e..a1de55994d92 100644
--- a/crypto/krb5enc.c
+++ b/crypto/krb5enc.c
@@ -496,7 +496,7 @@ static void __exit crypto_krb5enc_module_exit(void)
crypto_unregister_template(&crypto_krb5enc_tmpl);
}
-subsys_initcall(crypto_krb5enc_module_init);
+module_init(crypto_krb5enc_module_init);
module_exit(crypto_krb5enc_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/lrw.c b/crypto/lrw.c
index 391ae0f7641f..dd403b800513 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -322,7 +322,7 @@ static int lrw_create(struct crypto_template *tmpl, struct rtattr **tb)
err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst),
cipher_name, 0, mask);
- if (err == -ENOENT) {
+ if (err == -ENOENT && memcmp(cipher_name, "ecb(", 4)) {
err = -ENAMETOOLONG;
if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
cipher_name) >= CRYPTO_MAX_ALG_NAME)
@@ -356,7 +356,7 @@ static int lrw_create(struct crypto_template *tmpl, struct rtattr **tb)
/* Alas we screwed up the naming so we have to mangle the
* cipher name.
*/
- if (!strncmp(cipher_name, "ecb(", 4)) {
+ if (!memcmp(cipher_name, "ecb(", 4)) {
int len;
len = strscpy(ecb_name, cipher_name + 4, sizeof(ecb_name));
@@ -420,7 +420,7 @@ static void __exit lrw_module_exit(void)
crypto_unregister_template(&lrw_tmpl);
}
-subsys_initcall(lrw_module_init);
+module_init(lrw_module_init);
module_exit(lrw_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/lskcipher.c b/crypto/lskcipher.c
index cdb4897c63e6..c2e2c38b5aa8 100644
--- a/crypto/lskcipher.c
+++ b/crypto/lskcipher.c
@@ -294,6 +294,7 @@ static const struct crypto_type crypto_lskcipher_type = {
.maskset = CRYPTO_ALG_TYPE_MASK,
.type = CRYPTO_ALG_TYPE_LSKCIPHER,
.tfmsize = offsetof(struct crypto_lskcipher, base),
+ .algsize = offsetof(struct lskcipher_alg, co.base),
};
static void crypto_lskcipher_exit_tfm_sg(struct crypto_tfm *tfm)
diff --git a/crypto/lz4.c b/crypto/lz4.c
index 82588607fb2e..7a984ae5ae52 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -12,10 +12,6 @@
#include <linux/lz4.h>
#include <crypto/internal/scompress.h>
-struct lz4_ctx {
- void *lz4_comp_mem;
-};
-
static void *lz4_alloc_ctx(void)
{
void *ctx;
@@ -93,7 +89,7 @@ static void __exit lz4_mod_fini(void)
crypto_unregister_scomp(&scomp);
}
-subsys_initcall(lz4_mod_init);
+module_init(lz4_mod_init);
module_exit(lz4_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 997e76c0183a..9c61d05b6214 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -10,10 +10,6 @@
#include <linux/vmalloc.h>
#include <linux/lz4.h>
-struct lz4hc_ctx {
- void *lz4hc_comp_mem;
-};
-
static void *lz4hc_alloc_ctx(void)
{
void *ctx;
@@ -91,7 +87,7 @@ static void __exit lz4hc_mod_fini(void)
crypto_unregister_scomp(&scomp);
}
-subsys_initcall(lz4hc_mod_init);
+module_init(lz4hc_mod_init);
module_exit(lz4hc_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/lzo-rle.c b/crypto/lzo-rle.c
index b1350ae278b8..ba013f2d5090 100644
--- a/crypto/lzo-rle.c
+++ b/crypto/lzo-rle.c
@@ -9,10 +9,6 @@
#include <linux/module.h>
#include <linux/slab.h>
-struct lzorle_ctx {
- void *lzorle_comp_mem;
-};
-
static void *lzorle_alloc_ctx(void)
{
void *ctx;
@@ -95,7 +91,7 @@ static void __exit lzorle_mod_fini(void)
crypto_unregister_scomp(&scomp);
}
-subsys_initcall(lzorle_mod_init);
+module_init(lzorle_mod_init);
module_exit(lzorle_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/lzo.c b/crypto/lzo.c
index dfe5a07ca35f..7867e2c67c4e 100644
--- a/crypto/lzo.c
+++ b/crypto/lzo.c
@@ -9,10 +9,6 @@
#include <linux/module.h>
#include <linux/slab.h>
-struct lzo_ctx {
- void *lzo_comp_mem;
-};
-
static void *lzo_alloc_ctx(void)
{
void *ctx;
@@ -95,7 +91,7 @@ static void __exit lzo_mod_fini(void)
crypto_unregister_scomp(&scomp);
}
-subsys_initcall(lzo_mod_init);
+module_init(lzo_mod_init);
module_exit(lzo_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/md4.c b/crypto/md4.c
index 2e7f2f319f95..55bf47e23c13 100644
--- a/crypto/md4.c
+++ b/crypto/md4.c
@@ -233,7 +233,7 @@ static void __exit md4_mod_fini(void)
crypto_unregister_shash(&alg);
}
-subsys_initcall(md4_mod_init);
+module_init(md4_mod_init);
module_exit(md4_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/md5.c b/crypto/md5.c
index 72c0c46fb5ee..32c0819f5118 100644
--- a/crypto/md5.c
+++ b/crypto/md5.c
@@ -17,11 +17,9 @@
*/
#include <crypto/internal/hash.h>
#include <crypto/md5.h>
-#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
const u8 md5_zero_message_hash[MD5_DIGEST_SIZE] = {
0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
@@ -120,10 +118,11 @@ static void md5_transform(__u32 *hash, __u32 const *in)
hash[3] += d;
}
-static inline void md5_transform_helper(struct md5_state *ctx)
+static inline void md5_transform_helper(struct md5_state *ctx,
+ u32 block[MD5_BLOCK_WORDS])
{
- le32_to_cpu_array(ctx->block, sizeof(ctx->block) / sizeof(u32));
- md5_transform(ctx->hash, ctx->block);
+ le32_to_cpu_array(block, MD5_BLOCK_WORDS);
+ md5_transform(ctx->hash, block);
}
static int md5_init(struct shash_desc *desc)
@@ -142,76 +141,53 @@ static int md5_init(struct shash_desc *desc)
static int md5_update(struct shash_desc *desc, const u8 *data, unsigned int len)
{
struct md5_state *mctx = shash_desc_ctx(desc);
- const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
+ u32 block[MD5_BLOCK_WORDS];
mctx->byte_count += len;
-
- if (avail > len) {
- memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
- data, len);
- return 0;
- }
-
- memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
- data, avail);
-
- md5_transform_helper(mctx);
- data += avail;
- len -= avail;
-
- while (len >= sizeof(mctx->block)) {
- memcpy(mctx->block, data, sizeof(mctx->block));
- md5_transform_helper(mctx);
- data += sizeof(mctx->block);
- len -= sizeof(mctx->block);
- }
-
- memcpy(mctx->block, data, len);
-
- return 0;
+ do {
+ memcpy(block, data, sizeof(block));
+ md5_transform_helper(mctx, block);
+ data += sizeof(block);
+ len -= sizeof(block);
+ } while (len >= sizeof(block));
+ memzero_explicit(block, sizeof(block));
+ mctx->byte_count -= len;
+ return len;
}
-static int md5_final(struct shash_desc *desc, u8 *out)
+static int md5_finup(struct shash_desc *desc, const u8 *data, unsigned int len,
+ u8 *out)
{
struct md5_state *mctx = shash_desc_ctx(desc);
- const unsigned int offset = mctx->byte_count & 0x3f;
- char *p = (char *)mctx->block + offset;
- int padding = 56 - (offset + 1);
+ u32 block[MD5_BLOCK_WORDS];
+ unsigned int offset;
+ int padding;
+ char *p;
+
+ memcpy(block, data, len);
+
+ offset = len;
+ p = (char *)block + offset;
+ padding = 56 - (offset + 1);
*p++ = 0x80;
if (padding < 0) {
memset(p, 0x00, padding + sizeof (u64));
- md5_transform_helper(mctx);
- p = (char *)mctx->block;
+ md5_transform_helper(mctx, block);
+ p = (char *)block;
padding = 56;
}
memset(p, 0, padding);
- mctx->block[14] = mctx->byte_count << 3;
- mctx->block[15] = mctx->byte_count >> 29;
- le32_to_cpu_array(mctx->block, (sizeof(mctx->block) -
- sizeof(u64)) / sizeof(u32));
- md5_transform(mctx->hash, mctx->block);
+ mctx->byte_count += len;
+ block[14] = mctx->byte_count << 3;
+ block[15] = mctx->byte_count >> 29;
+ le32_to_cpu_array(block, (sizeof(block) - sizeof(u64)) / sizeof(u32));
+ md5_transform(mctx->hash, block);
+ memzero_explicit(block, sizeof(block));
cpu_to_le32_array(mctx->hash, sizeof(mctx->hash) / sizeof(u32));
memcpy(out, mctx->hash, sizeof(mctx->hash));
- memset(mctx, 0, sizeof(*mctx));
-
- return 0;
-}
-
-static int md5_export(struct shash_desc *desc, void *out)
-{
- struct md5_state *ctx = shash_desc_ctx(desc);
-
- memcpy(out, ctx, sizeof(*ctx));
- return 0;
-}
-
-static int md5_import(struct shash_desc *desc, const void *in)
-{
- struct md5_state *ctx = shash_desc_ctx(desc);
- memcpy(ctx, in, sizeof(*ctx));
return 0;
}
@@ -219,14 +195,12 @@ static struct shash_alg alg = {
.digestsize = MD5_DIGEST_SIZE,
.init = md5_init,
.update = md5_update,
- .final = md5_final,
- .export = md5_export,
- .import = md5_import,
- .descsize = sizeof(struct md5_state),
- .statesize = sizeof(struct md5_state),
+ .finup = md5_finup,
+ .descsize = MD5_STATE_SIZE,
.base = {
.cra_name = "md5",
.cra_driver_name = "md5-generic",
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -242,7 +216,7 @@ static void __exit md5_mod_fini(void)
crypto_unregister_shash(&alg);
}
-subsys_initcall(md5_mod_init);
+module_init(md5_mod_init);
module_exit(md5_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c
index 0d14e980d4d6..69ad35f524d7 100644
--- a/crypto/michael_mic.c
+++ b/crypto/michael_mic.c
@@ -167,7 +167,7 @@ static void __exit michael_mic_exit(void)
}
-subsys_initcall(michael_mic_init);
+module_init(michael_mic_init);
module_exit(michael_mic_exit);
MODULE_LICENSE("GPL v2");
diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c
index a661d4f667cd..2b648615b5ec 100644
--- a/crypto/nhpoly1305.c
+++ b/crypto/nhpoly1305.c
@@ -245,7 +245,7 @@ static void __exit nhpoly1305_mod_exit(void)
crypto_unregister_shash(&nhpoly1305_alg);
}
-subsys_initcall(nhpoly1305_mod_init);
+module_init(nhpoly1305_mod_init);
module_exit(nhpoly1305_mod_exit);
MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function");
diff --git a/crypto/pcbc.c b/crypto/pcbc.c
index 9d2e56d6744a..d092717ea4fc 100644
--- a/crypto/pcbc.c
+++ b/crypto/pcbc.c
@@ -186,7 +186,7 @@ static void __exit crypto_pcbc_module_exit(void)
crypto_unregister_template(&crypto_pcbc_tmpl);
}
-subsys_initcall(crypto_pcbc_module_init);
+module_init(crypto_pcbc_module_init);
module_exit(crypto_pcbc_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 7fc79e7dce44..c33d29a523e0 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -381,7 +381,7 @@ static void __exit pcrypt_exit(void)
kset_unregister(pcrypt_kset);
}
-subsys_initcall(pcrypt_init);
+module_init(pcrypt_init);
module_exit(pcrypt_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
deleted file mode 100644
index e6f29a98725a..000000000000
--- a/crypto/poly1305_generic.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Poly1305 authenticator algorithm, RFC7539
- *
- * Copyright (C) 2015 Martin Willi
- *
- * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/poly1305.h>
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/unaligned.h>
-
-static int crypto_poly1305_init(struct shash_desc *desc)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- poly1305_core_init(&dctx->h);
- dctx->buflen = 0;
- dctx->rset = 0;
- dctx->sset = false;
-
- return 0;
-}
-
-static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen)
-{
- if (!dctx->sset) {
- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_core_setkey(&dctx->core_r, src);
- src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
- dctx->rset = 2;
- }
- if (srclen >= POLY1305_BLOCK_SIZE) {
- dctx->s[0] = get_unaligned_le32(src + 0);
- dctx->s[1] = get_unaligned_le32(src + 4);
- dctx->s[2] = get_unaligned_le32(src + 8);
- dctx->s[3] = get_unaligned_le32(src + 12);
- src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
- dctx->sset = true;
- }
- }
- return srclen;
-}
-
-static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
- unsigned int srclen)
-{
- unsigned int datalen;
-
- if (unlikely(!dctx->sset)) {
- datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
- src += srclen - datalen;
- srclen = datalen;
- }
-
- poly1305_core_blocks(&dctx->h, &dctx->core_r, src,
- srclen / POLY1305_BLOCK_SIZE, 1);
-}
-
-static int crypto_poly1305_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- unsigned int bytes;
-
- if (unlikely(dctx->buflen)) {
- bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
- memcpy(dctx->buf + dctx->buflen, src, bytes);
- src += bytes;
- srclen -= bytes;
- dctx->buflen += bytes;
-
- if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- poly1305_blocks(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE);
- dctx->buflen = 0;
- }
- }
-
- if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- poly1305_blocks(dctx, src, srclen);
- src += srclen - (srclen % POLY1305_BLOCK_SIZE);
- srclen %= POLY1305_BLOCK_SIZE;
- }
-
- if (unlikely(srclen)) {
- dctx->buflen = srclen;
- memcpy(dctx->buf, src, srclen);
- }
-
- return 0;
-}
-
-static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (unlikely(!dctx->sset))
- return -ENOKEY;
-
- poly1305_final_generic(dctx, dst);
- return 0;
-}
-
-static struct shash_alg poly1305_alg = {
- .digestsize = POLY1305_DIGEST_SIZE,
- .init = crypto_poly1305_init,
- .update = crypto_poly1305_update,
- .final = crypto_poly1305_final,
- .descsize = sizeof(struct poly1305_desc_ctx),
- .base = {
- .cra_name = "poly1305",
- .cra_driver_name = "poly1305-generic",
- .cra_priority = 100,
- .cra_blocksize = POLY1305_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- },
-};
-
-static int __init poly1305_mod_init(void)
-{
- return crypto_register_shash(&poly1305_alg);
-}
-
-static void __exit poly1305_mod_exit(void)
-{
- crypto_unregister_shash(&poly1305_alg);
-}
-
-subsys_initcall(poly1305_mod_init);
-module_exit(poly1305_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("Poly1305 authenticator");
-MODULE_ALIAS_CRYPTO("poly1305");
-MODULE_ALIAS_CRYPTO("poly1305-generic");
diff --git a/crypto/polyval-generic.c b/crypto/polyval-generic.c
index 4f98910bcdb5..db8adb56e4ca 100644
--- a/crypto/polyval-generic.c
+++ b/crypto/polyval-generic.c
@@ -44,15 +44,15 @@
*
*/
-#include <linux/unaligned.h>
-#include <crypto/algapi.h>
#include <crypto/gf128mul.h>
-#include <crypto/polyval.h>
#include <crypto/internal/hash.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
+#include <crypto/polyval.h>
+#include <crypto/utils.h>
+#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
struct polyval_tfm_ctx {
struct gf128mul_4k *gf128;
@@ -63,7 +63,6 @@ struct polyval_desc_ctx {
u8 buffer[POLYVAL_BLOCK_SIZE];
be128 buffer128;
};
- u32 bytes;
};
static void copy_and_reverse(u8 dst[POLYVAL_BLOCK_SIZE],
@@ -76,46 +75,6 @@ static void copy_and_reverse(u8 dst[POLYVAL_BLOCK_SIZE],
put_unaligned(swab64(b), (u64 *)&dst[0]);
}
-/*
- * Performs multiplication in the POLYVAL field using the GHASH field as a
- * subroutine. This function is used as a fallback for hardware accelerated
- * implementations when simd registers are unavailable.
- *
- * Note: This function is not used for polyval-generic, instead we use the 4k
- * lookup table implementation for finite field multiplication.
- */
-void polyval_mul_non4k(u8 *op1, const u8 *op2)
-{
- be128 a, b;
-
- // Assume one argument is in Montgomery form and one is not.
- copy_and_reverse((u8 *)&a, op1);
- copy_and_reverse((u8 *)&b, op2);
- gf128mul_x_lle(&a, &a);
- gf128mul_lle(&a, &b);
- copy_and_reverse(op1, (u8 *)&a);
-}
-EXPORT_SYMBOL_GPL(polyval_mul_non4k);
-
-/*
- * Perform a POLYVAL update using non4k multiplication. This function is used
- * as a fallback for hardware accelerated implementations when simd registers
- * are unavailable.
- *
- * Note: This function is not used for polyval-generic, instead we use the 4k
- * lookup table implementation of finite field multiplication.
- */
-void polyval_update_non4k(const u8 *key, const u8 *in,
- size_t nblocks, u8 *accumulator)
-{
- while (nblocks--) {
- crypto_xor(accumulator, in, POLYVAL_BLOCK_SIZE);
- polyval_mul_non4k(accumulator, key);
- in += POLYVAL_BLOCK_SIZE;
- }
-}
-EXPORT_SYMBOL_GPL(polyval_update_non4k);
-
static int polyval_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
@@ -154,56 +113,53 @@ static int polyval_update(struct shash_desc *desc,
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *ctx = crypto_shash_ctx(desc->tfm);
- u8 *pos;
u8 tmp[POLYVAL_BLOCK_SIZE];
- int n;
-
- if (dctx->bytes) {
- n = min(srclen, dctx->bytes);
- pos = dctx->buffer + dctx->bytes - 1;
-
- dctx->bytes -= n;
- srclen -= n;
-
- while (n--)
- *pos-- ^= *src++;
- if (!dctx->bytes)
- gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
- }
-
- while (srclen >= POLYVAL_BLOCK_SIZE) {
+ do {
copy_and_reverse(tmp, src);
crypto_xor(dctx->buffer, tmp, POLYVAL_BLOCK_SIZE);
gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
src += POLYVAL_BLOCK_SIZE;
srclen -= POLYVAL_BLOCK_SIZE;
- }
+ } while (srclen >= POLYVAL_BLOCK_SIZE);
+
+ return srclen;
+}
- if (srclen) {
- dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
- pos = dctx->buffer + POLYVAL_BLOCK_SIZE - 1;
- while (srclen--)
- *pos-- ^= *src++;
+static int polyval_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *dst)
+{
+ struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (len) {
+ u8 tmp[POLYVAL_BLOCK_SIZE] = {};
+
+ memcpy(tmp, src, len);
+ polyval_update(desc, tmp, POLYVAL_BLOCK_SIZE);
}
+ copy_and_reverse(dst, dctx->buffer);
+ return 0;
+}
+
+static int polyval_export(struct shash_desc *desc, void *out)
+{
+ struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
+ copy_and_reverse(out, dctx->buffer);
return 0;
}
-static int polyval_final(struct shash_desc *desc, u8 *dst)
+static int polyval_import(struct shash_desc *desc, const void *in)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
- const struct polyval_tfm_ctx *ctx = crypto_shash_ctx(desc->tfm);
- if (dctx->bytes)
- gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
- copy_and_reverse(dst, dctx->buffer);
+ copy_and_reverse(dctx->buffer, in);
return 0;
}
-static void polyval_exit_tfm(struct crypto_tfm *tfm)
+static void polyval_exit_tfm(struct crypto_shash *tfm)
{
- struct polyval_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct polyval_tfm_ctx *ctx = crypto_shash_ctx(tfm);
gf128mul_free_4k(ctx->gf128);
}
@@ -212,17 +168,21 @@ static struct shash_alg polyval_alg = {
.digestsize = POLYVAL_DIGEST_SIZE,
.init = polyval_init,
.update = polyval_update,
- .final = polyval_final,
+ .finup = polyval_finup,
.setkey = polyval_setkey,
+ .export = polyval_export,
+ .import = polyval_import,
+ .exit_tfm = polyval_exit_tfm,
+ .statesize = sizeof(struct polyval_desc_ctx),
.descsize = sizeof(struct polyval_desc_ctx),
.base = {
.cra_name = "polyval",
.cra_driver_name = "polyval-generic",
.cra_priority = 100,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = POLYVAL_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct polyval_tfm_ctx),
.cra_module = THIS_MODULE,
- .cra_exit = polyval_exit_tfm,
},
};
@@ -236,7 +196,7 @@ static void __exit polyval_mod_exit(void)
crypto_unregister_shash(&polyval_alg);
}
-subsys_initcall(polyval_mod_init);
+module_init(polyval_mod_init);
module_exit(polyval_mod_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/rmd160.c b/crypto/rmd160.c
index c5fe4034b153..9860b60c9be4 100644
--- a/crypto/rmd160.c
+++ b/crypto/rmd160.c
@@ -9,18 +9,14 @@
* Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch>
*/
#include <crypto/internal/hash.h>
-#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
-
+#include <linux/string.h>
#include "ripemd.h"
struct rmd160_ctx {
u64 byte_count;
u32 state[5];
- __le32 buffer[16];
};
#define K1 RMD_K1
@@ -265,72 +261,59 @@ static int rmd160_init(struct shash_desc *desc)
rctx->state[3] = RMD_H3;
rctx->state[4] = RMD_H4;
- memset(rctx->buffer, 0, sizeof(rctx->buffer));
-
return 0;
}
static int rmd160_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
+ int remain = len - round_down(len, RMD160_BLOCK_SIZE);
struct rmd160_ctx *rctx = shash_desc_ctx(desc);
- const u32 avail = sizeof(rctx->buffer) - (rctx->byte_count & 0x3f);
-
- rctx->byte_count += len;
+ __le32 buffer[RMD160_BLOCK_SIZE / 4];
- /* Enough space in buffer? If so copy and we're done */
- if (avail > len) {
- memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail),
- data, len);
- goto out;
- }
-
- memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail),
- data, avail);
+ rctx->byte_count += len - remain;
- rmd160_transform(rctx->state, rctx->buffer);
- data += avail;
- len -= avail;
-
- while (len >= sizeof(rctx->buffer)) {
- memcpy(rctx->buffer, data, sizeof(rctx->buffer));
- rmd160_transform(rctx->state, rctx->buffer);
- data += sizeof(rctx->buffer);
- len -= sizeof(rctx->buffer);
- }
+ do {
+ memcpy(buffer, data, sizeof(buffer));
+ rmd160_transform(rctx->state, buffer);
+ data += sizeof(buffer);
+ len -= sizeof(buffer);
+ } while (len >= sizeof(buffer));
- memcpy(rctx->buffer, data, len);
-
-out:
- return 0;
+ memzero_explicit(buffer, sizeof(buffer));
+ return remain;
}
/* Add padding and return the message digest. */
-static int rmd160_final(struct shash_desc *desc, u8 *out)
+static int rmd160_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
+ unsigned int bit_offset = RMD160_BLOCK_SIZE / 8 - 1;
struct rmd160_ctx *rctx = shash_desc_ctx(desc);
- u32 i, index, padlen;
- __le64 bits;
+ union {
+ __le64 l64[RMD160_BLOCK_SIZE / 4];
+ __le32 l32[RMD160_BLOCK_SIZE / 2];
+ u8 u8[RMD160_BLOCK_SIZE * 2];
+ } block = {};
__le32 *dst = (__le32 *)out;
- static const u8 padding[64] = { 0x80, };
-
- bits = cpu_to_le64(rctx->byte_count << 3);
-
- /* Pad out to 56 mod 64 */
- index = rctx->byte_count & 0x3f;
- padlen = (index < 56) ? (56 - index) : ((64+56) - index);
- rmd160_update(desc, padding, padlen);
+ u32 i;
- /* Append length */
- rmd160_update(desc, (const u8 *)&bits, sizeof(bits));
+ rctx->byte_count += len;
+ if (len >= bit_offset * 8)
+ bit_offset += RMD160_BLOCK_SIZE / 8;
+ memcpy(&block, src, len);
+ block.u8[len] = 0x80;
+ block.l64[bit_offset] = cpu_to_le64(rctx->byte_count << 3);
+
+ rmd160_transform(rctx->state, block.l32);
+ if (bit_offset > RMD160_BLOCK_SIZE / 8)
+ rmd160_transform(rctx->state,
+ block.l32 + RMD160_BLOCK_SIZE / 4);
+ memzero_explicit(&block, sizeof(block));
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_le32p(&rctx->state[i]);
-
- /* Wipe context */
- memset(rctx, 0, sizeof(*rctx));
-
return 0;
}
@@ -338,11 +321,12 @@ static struct shash_alg alg = {
.digestsize = RMD160_DIGEST_SIZE,
.init = rmd160_init,
.update = rmd160_update,
- .final = rmd160_final,
+ .finup = rmd160_finup,
.descsize = sizeof(struct rmd160_ctx),
.base = {
.cra_name = "rmd160",
.cra_driver_name = "rmd160-generic",
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = RMD160_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -358,7 +342,7 @@ static void __exit rmd160_mod_fini(void)
crypto_unregister_shash(&alg);
}
-subsys_initcall(rmd160_mod_init);
+module_init(rmd160_mod_init);
module_exit(rmd160_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/rng.c b/crypto/rng.c
index 9d8804e46422..b8ae6ebc091d 100644
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -98,6 +98,7 @@ static const struct crypto_type crypto_rng_type = {
.maskset = CRYPTO_ALG_TYPE_MASK,
.type = CRYPTO_ALG_TYPE_RNG,
.tfmsize = offsetof(struct crypto_rng, base),
+ .algsize = offsetof(struct rng_alg, base),
};
struct crypto_rng *crypto_alloc_rng(const char *alg_name, u32 type, u32 mask)
diff --git a/crypto/rsa.c b/crypto/rsa.c
index b7d21529c552..6c7734083c98 100644
--- a/crypto/rsa.c
+++ b/crypto/rsa.c
@@ -430,7 +430,7 @@ static void __exit rsa_exit(void)
crypto_unregister_akcipher(&rsa);
}
-subsys_initcall(rsa_init);
+module_init(rsa_init);
module_exit(rsa_exit);
MODULE_ALIAS_CRYPTO("rsa");
MODULE_LICENSE("GPL");
diff --git a/crypto/rsassa-pkcs1.c b/crypto/rsassa-pkcs1.c
index d01ac75635e0..94fa5e9600e7 100644
--- a/crypto/rsassa-pkcs1.c
+++ b/crypto/rsassa-pkcs1.c
@@ -301,7 +301,7 @@ static unsigned int rsassa_pkcs1_key_size(struct crypto_sig *tfm)
{
struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm);
- return ctx->key_size;
+ return ctx->key_size * BITS_PER_BYTE;
}
static int rsassa_pkcs1_set_pub_key(struct crypto_sig *tfm,
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 8225801488d5..1d010e2a1b1a 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -10,10 +10,25 @@
*/
#include <crypto/scatterwalk.h>
+#include <linux/crypto.h>
+#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+enum {
+ SKCIPHER_WALK_SLOW = 1 << 0,
+ SKCIPHER_WALK_COPY = 1 << 1,
+ SKCIPHER_WALK_DIFF = 1 << 2,
+ SKCIPHER_WALK_SLEEP = 1 << 3,
+};
+
+static inline gfp_t skcipher_walk_gfp(struct skcipher_walk *walk)
+{
+ return walk->flags & SKCIPHER_WALK_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
+}
void scatterwalk_skip(struct scatter_walk *walk, unsigned int nbytes)
{
@@ -89,27 +104,23 @@ EXPORT_SYMBOL_GPL(memcpy_to_sglist);
void memcpy_sglist(struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct scatter_walk swalk;
- struct scatter_walk dwalk;
+ struct skcipher_walk walk = {};
if (unlikely(nbytes == 0)) /* in case sg == NULL */
return;
- scatterwalk_start(&swalk, src);
- scatterwalk_start(&dwalk, dst);
+ walk.total = nbytes;
+
+ scatterwalk_start(&walk.in, src);
+ scatterwalk_start(&walk.out, dst);
+ skcipher_walk_first(&walk, true);
do {
- unsigned int slen, dlen;
- unsigned int len;
-
- slen = scatterwalk_next(&swalk, nbytes);
- dlen = scatterwalk_next(&dwalk, nbytes);
- len = min(slen, dlen);
- memcpy(dwalk.addr, swalk.addr, len);
- scatterwalk_done_dst(&dwalk, len);
- scatterwalk_done_src(&swalk, len);
- nbytes -= len;
- } while (nbytes);
+ if (walk.src.virt.addr != walk.dst.virt.addr)
+ memcpy(walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes);
+ skcipher_walk_done(&walk, 0);
+ } while (walk.nbytes);
}
EXPORT_SYMBOL_GPL(memcpy_sglist);
@@ -135,3 +146,236 @@ struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
return dst;
}
EXPORT_SYMBOL_GPL(scatterwalk_ffwd);
+
+static int skcipher_next_slow(struct skcipher_walk *walk, unsigned int bsize)
+{
+ unsigned alignmask = walk->alignmask;
+ unsigned n;
+ void *buffer;
+
+ if (!walk->buffer)
+ walk->buffer = walk->page;
+ buffer = walk->buffer;
+ if (!buffer) {
+ /* Min size for a buffer of bsize bytes aligned to alignmask */
+ n = bsize + (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
+
+ buffer = kzalloc(n, skcipher_walk_gfp(walk));
+ if (!buffer)
+ return skcipher_walk_done(walk, -ENOMEM);
+ walk->buffer = buffer;
+ }
+
+ buffer = PTR_ALIGN(buffer, alignmask + 1);
+ memcpy_from_scatterwalk(buffer, &walk->in, bsize);
+ walk->out.__addr = buffer;
+ walk->in.__addr = walk->out.addr;
+
+ walk->nbytes = bsize;
+ walk->flags |= SKCIPHER_WALK_SLOW;
+
+ return 0;
+}
+
+static int skcipher_next_copy(struct skcipher_walk *walk)
+{
+ void *tmp = walk->page;
+
+ scatterwalk_map(&walk->in);
+ memcpy(tmp, walk->in.addr, walk->nbytes);
+ scatterwalk_unmap(&walk->in);
+ /*
+ * walk->in is advanced later when the number of bytes actually
+ * processed (which might be less than walk->nbytes) is known.
+ */
+
+ walk->in.__addr = tmp;
+ walk->out.__addr = tmp;
+ return 0;
+}
+
+static int skcipher_next_fast(struct skcipher_walk *walk)
+{
+ unsigned long diff;
+
+ diff = offset_in_page(walk->in.offset) -
+ offset_in_page(walk->out.offset);
+ diff |= (u8 *)(sg_page(walk->in.sg) + (walk->in.offset >> PAGE_SHIFT)) -
+ (u8 *)(sg_page(walk->out.sg) + (walk->out.offset >> PAGE_SHIFT));
+
+ scatterwalk_map(&walk->out);
+ walk->in.__addr = walk->out.__addr;
+
+ if (diff) {
+ walk->flags |= SKCIPHER_WALK_DIFF;
+ scatterwalk_map(&walk->in);
+ }
+
+ return 0;
+}
+
+static int skcipher_walk_next(struct skcipher_walk *walk)
+{
+ unsigned int bsize;
+ unsigned int n;
+
+ n = walk->total;
+ bsize = min(walk->stride, max(n, walk->blocksize));
+ n = scatterwalk_clamp(&walk->in, n);
+ n = scatterwalk_clamp(&walk->out, n);
+
+ if (unlikely(n < bsize)) {
+ if (unlikely(walk->total < walk->blocksize))
+ return skcipher_walk_done(walk, -EINVAL);
+
+slow_path:
+ return skcipher_next_slow(walk, bsize);
+ }
+ walk->nbytes = n;
+
+ if (unlikely((walk->in.offset | walk->out.offset) & walk->alignmask)) {
+ if (!walk->page) {
+ gfp_t gfp = skcipher_walk_gfp(walk);
+
+ walk->page = (void *)__get_free_page(gfp);
+ if (!walk->page)
+ goto slow_path;
+ }
+ walk->flags |= SKCIPHER_WALK_COPY;
+ return skcipher_next_copy(walk);
+ }
+
+ return skcipher_next_fast(walk);
+}
+
+static int skcipher_copy_iv(struct skcipher_walk *walk)
+{
+ unsigned alignmask = walk->alignmask;
+ unsigned ivsize = walk->ivsize;
+ unsigned aligned_stride = ALIGN(walk->stride, alignmask + 1);
+ unsigned size;
+ u8 *iv;
+
+ /* Min size for a buffer of stride + ivsize, aligned to alignmask */
+ size = aligned_stride + ivsize +
+ (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
+
+ walk->buffer = kmalloc(size, skcipher_walk_gfp(walk));
+ if (!walk->buffer)
+ return -ENOMEM;
+
+ iv = PTR_ALIGN(walk->buffer, alignmask + 1) + aligned_stride;
+
+ walk->iv = memcpy(iv, walk->iv, walk->ivsize);
+ return 0;
+}
+
+int skcipher_walk_first(struct skcipher_walk *walk, bool atomic)
+{
+ if (WARN_ON_ONCE(in_hardirq()))
+ return -EDEADLK;
+
+ walk->flags = atomic ? 0 : SKCIPHER_WALK_SLEEP;
+
+ walk->buffer = NULL;
+ if (unlikely(((unsigned long)walk->iv & walk->alignmask))) {
+ int err = skcipher_copy_iv(walk);
+ if (err)
+ return err;
+ }
+
+ walk->page = NULL;
+
+ return skcipher_walk_next(walk);
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_first);
+
+/**
+ * skcipher_walk_done() - finish one step of a skcipher_walk
+ * @walk: the skcipher_walk
+ * @res: number of bytes *not* processed (>= 0) from walk->nbytes,
+ * or a -errno value to terminate the walk due to an error
+ *
+ * This function cleans up after one step of walking through the source and
+ * destination scatterlists, and advances to the next step if applicable.
+ * walk->nbytes is set to the number of bytes available in the next step,
+ * walk->total is set to the new total number of bytes remaining, and
+ * walk->{src,dst}.virt.addr is set to the next pair of data pointers. If there
+ * is no more data, or if an error occurred (i.e. -errno return), then
+ * walk->nbytes and walk->total are set to 0 and all resources owned by the
+ * skcipher_walk are freed.
+ *
+ * Return: 0 or a -errno value. If @res was a -errno value then it will be
+ * returned, but other errors may occur too.
+ */
+int skcipher_walk_done(struct skcipher_walk *walk, int res)
+{
+ unsigned int n = walk->nbytes; /* num bytes processed this step */
+ unsigned int total = 0; /* new total remaining */
+
+ if (!n)
+ goto finish;
+
+ if (likely(res >= 0)) {
+ n -= res; /* subtract num bytes *not* processed */
+ total = walk->total - n;
+ }
+
+ if (likely(!(walk->flags & (SKCIPHER_WALK_SLOW |
+ SKCIPHER_WALK_COPY |
+ SKCIPHER_WALK_DIFF)))) {
+ scatterwalk_advance(&walk->in, n);
+ } else if (walk->flags & SKCIPHER_WALK_DIFF) {
+ scatterwalk_done_src(&walk->in, n);
+ } else if (walk->flags & SKCIPHER_WALK_COPY) {
+ scatterwalk_advance(&walk->in, n);
+ scatterwalk_map(&walk->out);
+ memcpy(walk->out.addr, walk->page, n);
+ } else { /* SKCIPHER_WALK_SLOW */
+ if (res > 0) {
+ /*
+ * Didn't process all bytes. Either the algorithm is
+ * broken, or this was the last step and it turned out
+ * the message wasn't evenly divisible into blocks but
+ * the algorithm requires it.
+ */
+ res = -EINVAL;
+ total = 0;
+ } else
+ memcpy_to_scatterwalk(&walk->out, walk->out.addr, n);
+ goto dst_done;
+ }
+
+ scatterwalk_done_dst(&walk->out, n);
+dst_done:
+
+ if (res > 0)
+ res = 0;
+
+ walk->total = total;
+ walk->nbytes = 0;
+
+ if (total) {
+ if (walk->flags & SKCIPHER_WALK_SLEEP)
+ cond_resched();
+ walk->flags &= ~(SKCIPHER_WALK_SLOW | SKCIPHER_WALK_COPY |
+ SKCIPHER_WALK_DIFF);
+ return skcipher_walk_next(walk);
+ }
+
+finish:
+ /* Short-circuit for the common/fast path. */
+ if (!((unsigned long)walk->buffer | (unsigned long)walk->page))
+ goto out;
+
+ if (walk->iv != walk->oiv)
+ memcpy(walk->oiv, walk->iv, walk->ivsize);
+ if (walk->buffer != walk->page)
+ kfree(walk->buffer);
+ if (walk->page)
+ free_page((unsigned long)walk->page);
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_done);
diff --git a/crypto/scompress.c b/crypto/scompress.c
index ffeedcf20b0f..c651e7f2197a 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -7,9 +7,9 @@
* Author: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
*/
-#include <crypto/internal/acompress.h>
#include <crypto/internal/scompress.h>
#include <crypto/scatterwalk.h>
+#include <linux/cpumask.h>
#include <linux/cryptouser.h>
#include <linux/err.h>
#include <linux/highmem.h>
@@ -20,20 +20,17 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/string.h>
-#include <linux/vmalloc.h>
+#include <linux/workqueue.h>
#include <net/netlink.h>
#include "compress.h"
-#define SCOMP_SCRATCH_SIZE 65400
-
struct scomp_scratch {
spinlock_t lock;
union {
void *src;
unsigned long saddr;
};
- void *dst;
};
static DEFINE_PER_CPU(struct scomp_scratch, scomp_scratch) = {
@@ -44,6 +41,10 @@ static const struct crypto_type crypto_scomp_type;
static int scomp_scratch_users;
static DEFINE_MUTEX(scomp_lock);
+static cpumask_t scomp_scratch_want;
+static void scomp_scratch_workfn(struct work_struct *work);
+static DECLARE_WORK(scomp_scratch_work, scomp_scratch_workfn);
+
static int __maybe_unused crypto_scomp_report(
struct sk_buff *skb, struct crypto_alg *alg)
{
@@ -74,82 +75,48 @@ static void crypto_scomp_free_scratches(void)
scratch = per_cpu_ptr(&scomp_scratch, i);
free_page(scratch->saddr);
- vfree(scratch->dst);
scratch->src = NULL;
- scratch->dst = NULL;
}
}
-static int crypto_scomp_alloc_scratches(void)
+static int scomp_alloc_scratch(struct scomp_scratch *scratch, int cpu)
{
- struct scomp_scratch *scratch;
- int i;
-
- for_each_possible_cpu(i) {
- struct page *page;
- void *mem;
-
- scratch = per_cpu_ptr(&scomp_scratch, i);
+ int node = cpu_to_node(cpu);
+ struct page *page;
- page = alloc_pages_node(cpu_to_node(i), GFP_KERNEL, 0);
- if (!page)
- goto error;
- scratch->src = page_address(page);
- mem = vmalloc_node(SCOMP_SCRATCH_SIZE, cpu_to_node(i));
- if (!mem)
- goto error;
- scratch->dst = mem;
- }
+ page = alloc_pages_node(node, GFP_KERNEL, 0);
+ if (!page)
+ return -ENOMEM;
+ spin_lock_bh(&scratch->lock);
+ scratch->src = page_address(page);
+ spin_unlock_bh(&scratch->lock);
return 0;
-error:
- crypto_scomp_free_scratches();
- return -ENOMEM;
}
-static void scomp_free_streams(struct scomp_alg *alg)
+static void scomp_scratch_workfn(struct work_struct *work)
{
- struct crypto_acomp_stream __percpu *stream = alg->stream;
- int i;
+ int cpu;
- alg->stream = NULL;
- if (!stream)
- return;
+ for_each_cpu(cpu, &scomp_scratch_want) {
+ struct scomp_scratch *scratch;
- for_each_possible_cpu(i) {
- struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i);
-
- if (IS_ERR_OR_NULL(ps->ctx))
+ scratch = per_cpu_ptr(&scomp_scratch, cpu);
+ if (scratch->src)
+ continue;
+ if (scomp_alloc_scratch(scratch, cpu))
break;
- alg->free_ctx(ps->ctx);
+ cpumask_clear_cpu(cpu, &scomp_scratch_want);
}
-
- free_percpu(stream);
}
-static int scomp_alloc_streams(struct scomp_alg *alg)
+static int crypto_scomp_alloc_scratches(void)
{
- struct crypto_acomp_stream __percpu *stream;
- int i;
-
- stream = alloc_percpu(struct crypto_acomp_stream);
- if (!stream)
- return -ENOMEM;
-
- alg->stream = stream;
-
- for_each_possible_cpu(i) {
- struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i);
-
- ps->ctx = alg->alloc_ctx();
- if (IS_ERR(ps->ctx)) {
- scomp_free_streams(alg);
- return PTR_ERR(ps->ctx);
- }
+ unsigned int i = cpumask_first(cpu_possible_mask);
+ struct scomp_scratch *scratch;
- spin_lock_init(&ps->lock);
- }
- return 0;
+ scratch = per_cpu_ptr(&scomp_scratch, i);
+ return scomp_alloc_scratch(scratch, i);
}
static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
@@ -158,11 +125,9 @@ static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
int ret = 0;
mutex_lock(&scomp_lock);
- if (!alg->stream) {
- ret = scomp_alloc_streams(alg);
- if (ret)
- goto unlock;
- }
+ ret = crypto_acomp_alloc_streams(&alg->streams);
+ if (ret)
+ goto unlock;
if (!scomp_scratch_users++) {
ret = crypto_scomp_alloc_scratches();
if (ret)
@@ -174,13 +139,40 @@ unlock:
return ret;
}
+static struct scomp_scratch *scomp_lock_scratch(void) __acquires(scratch)
+{
+ int cpu = raw_smp_processor_id();
+ struct scomp_scratch *scratch;
+
+ scratch = per_cpu_ptr(&scomp_scratch, cpu);
+ spin_lock(&scratch->lock);
+ if (likely(scratch->src))
+ return scratch;
+ spin_unlock(&scratch->lock);
+
+ cpumask_set_cpu(cpu, &scomp_scratch_want);
+ schedule_work(&scomp_scratch_work);
+
+ scratch = per_cpu_ptr(&scomp_scratch, cpumask_first(cpu_possible_mask));
+ spin_lock(&scratch->lock);
+ return scratch;
+}
+
+static inline void scomp_unlock_scratch(struct scomp_scratch *scratch)
+ __releases(scratch)
+{
+ spin_unlock(&scratch->lock);
+}
+
static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
{
- struct scomp_scratch *scratch = raw_cpu_ptr(&scomp_scratch);
struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
struct crypto_scomp **tfm_ctx = acomp_tfm_ctx(tfm);
+ bool src_isvirt = acomp_request_src_isvirt(req);
+ bool dst_isvirt = acomp_request_dst_isvirt(req);
struct crypto_scomp *scomp = *tfm_ctx;
struct crypto_acomp_stream *stream;
+ struct scomp_scratch *scratch;
unsigned int slen = req->slen;
unsigned int dlen = req->dlen;
struct page *spage, *dpage;
@@ -197,15 +189,32 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
if (!req->dst || !dlen)
return -EINVAL;
- if (acomp_request_src_isvirt(req))
+ if (dst_isvirt)
+ dst = req->dvirt;
+ else {
+ if (dlen <= req->dst->length) {
+ dpage = sg_page(req->dst);
+ doff = req->dst->offset;
+ } else
+ return -ENOSYS;
+
+ dpage = nth_page(dpage, doff / PAGE_SIZE);
+ doff = offset_in_page(doff);
+
+ n = (dlen - 1) / PAGE_SIZE;
+ n += (offset_in_page(dlen - 1) + doff) / PAGE_SIZE;
+ if (PageHighMem(dpage + n) &&
+ size_add(doff, dlen) > PAGE_SIZE)
+ return -ENOSYS;
+ dst = kmap_local_page(dpage) + doff;
+ }
+
+ if (src_isvirt)
src = req->svirt;
else {
- src = scratch->src;
+ src = NULL;
do {
- if (acomp_request_src_isfolio(req)) {
- spage = folio_page(req->sfolio, 0);
- soff = req->soff;
- } else if (slen <= req->src->length) {
+ if (slen <= req->src->length) {
spage = sg_page(req->src);
soff = req->src->offset;
} else
@@ -223,59 +232,37 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
} while (0);
}
- if (acomp_request_dst_isvirt(req))
- dst = req->dvirt;
- else {
- unsigned int max = SCOMP_SCRATCH_SIZE;
-
- dst = scratch->dst;
- do {
- if (acomp_request_dst_isfolio(req)) {
- dpage = folio_page(req->dfolio, 0);
- doff = req->doff;
- } else if (dlen <= req->dst->length) {
- dpage = sg_page(req->dst);
- doff = req->dst->offset;
- } else
- break;
-
- dpage = nth_page(dpage, doff / PAGE_SIZE);
- doff = offset_in_page(doff);
+ stream = crypto_acomp_lock_stream_bh(&crypto_scomp_alg(scomp)->streams);
- n = (dlen - 1) / PAGE_SIZE;
- n += (offset_in_page(dlen - 1) + doff) / PAGE_SIZE;
- if (PageHighMem(nth_page(dpage, n)) &&
- size_add(doff, dlen) > PAGE_SIZE)
- break;
- dst = kmap_local_page(dpage) + doff;
- max = dlen;
- } while (0);
- dlen = min(dlen, max);
- }
-
- spin_lock_bh(&scratch->lock);
+ if (!src_isvirt && !src) {
+ const u8 *src;
- if (src == scratch->src)
+ scratch = scomp_lock_scratch();
+ src = scratch->src;
memcpy_from_sglist(scratch->src, req->src, 0, slen);
- stream = raw_cpu_ptr(crypto_scomp_alg(scomp)->stream);
- spin_lock(&stream->lock);
- if (dir)
+ if (dir)
+ ret = crypto_scomp_compress(scomp, src, slen,
+ dst, &dlen, stream->ctx);
+ else
+ ret = crypto_scomp_decompress(scomp, src, slen,
+ dst, &dlen, stream->ctx);
+
+ scomp_unlock_scratch(scratch);
+ } else if (dir)
ret = crypto_scomp_compress(scomp, src, slen,
dst, &dlen, stream->ctx);
else
ret = crypto_scomp_decompress(scomp, src, slen,
dst, &dlen, stream->ctx);
- if (dst == scratch->dst)
- memcpy_to_sglist(req->dst, 0, dst, dlen);
-
- spin_unlock(&stream->lock);
- spin_unlock_bh(&scratch->lock);
+ crypto_acomp_unlock_stream_bh(stream);
req->dlen = dlen;
- if (!acomp_request_dst_isvirt(req) && dst != scratch->dst) {
+ if (!src_isvirt && src)
+ kunmap_local(src);
+ if (!dst_isvirt) {
kunmap_local(dst);
dlen += doff;
for (;;) {
@@ -286,34 +273,18 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
dpage = nth_page(dpage, 1);
}
}
- if (!acomp_request_src_isvirt(req) && src != scratch->src)
- kunmap_local(src);
return ret;
}
-static int scomp_acomp_chain(struct acomp_req *req, int dir)
-{
- struct acomp_req *r2;
- int err;
-
- err = scomp_acomp_comp_decomp(req, dir);
- req->base.err = err;
-
- list_for_each_entry(r2, &req->base.list, base.list)
- r2->base.err = scomp_acomp_comp_decomp(r2, dir);
-
- return err;
-}
-
static int scomp_acomp_compress(struct acomp_req *req)
{
- return scomp_acomp_chain(req, 1);
+ return scomp_acomp_comp_decomp(req, 1);
}
static int scomp_acomp_decompress(struct acomp_req *req)
{
- return scomp_acomp_chain(req, 0);
+ return scomp_acomp_comp_decomp(req, 0);
}
static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm)
@@ -322,6 +293,7 @@ static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm)
crypto_free_scomp(*ctx);
+ flush_work(&scomp_scratch_work);
mutex_lock(&scomp_lock);
if (!--scomp_scratch_users)
crypto_scomp_free_scratches();
@@ -355,7 +327,9 @@ int crypto_init_scomp_ops_async(struct crypto_tfm *tfm)
static void crypto_scomp_destroy(struct crypto_alg *alg)
{
- scomp_free_streams(__crypto_scomp_alg(alg));
+ struct scomp_alg *scomp = __crypto_scomp_alg(alg);
+
+ crypto_acomp_free_streams(&scomp->streams);
}
static const struct crypto_type crypto_scomp_type = {
@@ -372,6 +346,7 @@ static const struct crypto_type crypto_scomp_type = {
.maskset = CRYPTO_ALG_TYPE_MASK,
.type = CRYPTO_ALG_TYPE_SCOMPRESS,
.tfmsize = offsetof(struct crypto_scomp, base),
+ .algsize = offsetof(struct scomp_alg, base),
};
static void scomp_prepare_alg(struct scomp_alg *alg)
@@ -380,7 +355,7 @@ static void scomp_prepare_alg(struct scomp_alg *alg)
comp_prepare_alg(&alg->calg);
- base->cra_flags |= CRYPTO_ALG_REQ_CHAIN;
+ base->cra_flags |= CRYPTO_ALG_REQ_VIRT;
}
int crypto_register_scomp(struct scomp_alg *alg)
diff --git a/crypto/seed.c b/crypto/seed.c
index d05d8ed909fa..815391f213de 100644
--- a/crypto/seed.c
+++ b/crypto/seed.c
@@ -460,7 +460,7 @@ static void __exit seed_fini(void)
crypto_unregister_alg(&seed_alg);
}
-subsys_initcall(seed_init);
+module_init(seed_init);
module_exit(seed_fini);
MODULE_DESCRIPTION("SEED Cipher Algorithm");
diff --git a/crypto/seqiv.c b/crypto/seqiv.c
index 17e11d51ddc3..2bae99e33526 100644
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -64,20 +64,9 @@ static int seqiv_aead_encrypt(struct aead_request *req)
data = req->base.data;
info = req->iv;
- if (req->src != req->dst) {
- SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
-
- skcipher_request_set_sync_tfm(nreq, ctx->sknull);
- skcipher_request_set_callback(nreq, req->base.flags,
- NULL, NULL);
- skcipher_request_set_crypt(nreq, req->src, req->dst,
- req->assoclen + req->cryptlen,
- NULL);
-
- err = crypto_skcipher_encrypt(nreq);
- if (err)
- return err;
- }
+ if (req->src != req->dst)
+ memcpy_sglist(req->dst, req->src,
+ req->assoclen + req->cryptlen);
if (unlikely(!IS_ALIGNED((unsigned long)info,
crypto_aead_alignmask(geniv) + 1))) {
@@ -179,7 +168,7 @@ static void __exit seqiv_module_exit(void)
crypto_unregister_template(&seqiv_tmpl);
}
-subsys_initcall(seqiv_module_init);
+module_init(seqiv_module_init);
module_exit(seqiv_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c
index f6ef187be6fe..b21e7606c652 100644
--- a/crypto/serpent_generic.c
+++ b/crypto/serpent_generic.c
@@ -599,7 +599,7 @@ static void __exit serpent_mod_fini(void)
crypto_unregister_alg(&srp_alg);
}
-subsys_initcall(serpent_mod_init);
+module_init(serpent_mod_init);
module_exit(serpent_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c
index 325b57fe28dc..024e8043bab0 100644
--- a/crypto/sha1_generic.c
+++ b/crypto/sha1_generic.c
@@ -12,13 +12,11 @@
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
*/
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
-#include <asm/byteorder.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE] = {
0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
@@ -39,38 +37,31 @@ static void sha1_generic_block_fn(struct sha1_state *sst, u8 const *src,
memzero_explicit(temp, sizeof(temp));
}
-int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- return sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
+ return sha1_base_do_update_blocks(desc, data, len,
+ sha1_generic_block_fn);
}
-EXPORT_SYMBOL(crypto_sha1_update);
-static int sha1_final(struct shash_desc *desc, u8 *out)
+static int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- sha1_base_do_finalize(desc, sha1_generic_block_fn);
+ sha1_base_do_finup(desc, data, len, sha1_generic_block_fn);
return sha1_base_finish(desc, out);
}
-int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
- return sha1_final(desc, out);
-}
-EXPORT_SYMBOL(crypto_sha1_finup);
-
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = crypto_sha1_update,
- .final = sha1_final,
.finup = crypto_sha1_finup,
- .descsize = sizeof(struct sha1_state),
+ .descsize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-generic",
.cra_priority = 100,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -86,7 +77,7 @@ static void __exit sha1_generic_mod_fini(void)
crypto_unregister_shash(&alg);
}
-subsys_initcall(sha1_generic_mod_init);
+module_init(sha1_generic_mod_init);
module_exit(sha1_generic_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/sha256.c b/crypto/sha256.c
new file mode 100644
index 000000000000..4aeb213bab11
--- /dev/null
+++ b/crypto/sha256.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Crypto API wrapper for the SHA-256 and SHA-224 library functions
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
+ */
+#include <crypto/internal/hash.h>
+#include <crypto/internal/sha2.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE] = {
+ 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47,
+ 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2,
+ 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4,
+ 0x2f
+};
+EXPORT_SYMBOL_GPL(sha224_zero_message_hash);
+
+const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE] = {
+ 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
+ 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
+ 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
+ 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55
+};
+EXPORT_SYMBOL_GPL(sha256_zero_message_hash);
+
+static int crypto_sha256_init(struct shash_desc *desc)
+{
+ sha256_block_init(shash_desc_ctx(desc));
+ return 0;
+}
+
+static inline int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len, bool force_generic)
+{
+ struct crypto_sha256_state *sctx = shash_desc_ctx(desc);
+ int remain = len % SHA256_BLOCK_SIZE;
+
+ sctx->count += len - remain;
+ sha256_choose_blocks(sctx->state, data, len / SHA256_BLOCK_SIZE,
+ force_generic, !force_generic);
+ return remain;
+}
+
+static int crypto_sha256_update_generic(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ return crypto_sha256_update(desc, data, len, true);
+}
+
+static int crypto_sha256_update_lib(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ sha256_update(shash_desc_ctx(desc), data, len);
+ return 0;
+}
+
+static int crypto_sha256_update_arch(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ return crypto_sha256_update(desc, data, len, false);
+}
+
+static int crypto_sha256_final_lib(struct shash_desc *desc, u8 *out)
+{
+ sha256_final(shash_desc_ctx(desc), out);
+ return 0;
+}
+
+static __always_inline int crypto_sha256_finup(struct shash_desc *desc,
+ const u8 *data,
+ unsigned int len, u8 *out,
+ bool force_generic)
+{
+ struct crypto_sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int remain = len;
+ u8 *buf;
+
+ if (len >= SHA256_BLOCK_SIZE)
+ remain = crypto_sha256_update(desc, data, len, force_generic);
+ sctx->count += remain;
+ buf = memcpy(sctx + 1, data + len - remain, remain);
+ sha256_finup(sctx, buf, remain, out,
+ crypto_shash_digestsize(desc->tfm), force_generic,
+ !force_generic);
+ return 0;
+}
+
+static int crypto_sha256_finup_generic(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ return crypto_sha256_finup(desc, data, len, out, true);
+}
+
+static int crypto_sha256_finup_arch(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ return crypto_sha256_finup(desc, data, len, out, false);
+}
+
+static int crypto_sha256_digest_generic(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ crypto_sha256_init(desc);
+ return crypto_sha256_finup_generic(desc, data, len, out);
+}
+
+static int crypto_sha256_digest_lib(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ sha256(data, len, out);
+ return 0;
+}
+
+static int crypto_sha256_digest_arch(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ crypto_sha256_init(desc);
+ return crypto_sha256_finup_arch(desc, data, len, out);
+}
+
+static int crypto_sha224_init(struct shash_desc *desc)
+{
+ sha224_block_init(shash_desc_ctx(desc));
+ return 0;
+}
+
+static int crypto_sha224_final_lib(struct shash_desc *desc, u8 *out)
+{
+ sha224_final(shash_desc_ctx(desc), out);
+ return 0;
+}
+
+static int crypto_sha256_import_lib(struct shash_desc *desc, const void *in)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ const u8 *p = in;
+
+ memcpy(sctx, p, sizeof(*sctx));
+ p += sizeof(*sctx);
+ sctx->count += *p;
+ return 0;
+}
+
+static int crypto_sha256_export_lib(struct shash_desc *desc, void *out)
+{
+ struct sha256_state *sctx0 = shash_desc_ctx(desc);
+ struct sha256_state sctx = *sctx0;
+ unsigned int partial;
+ u8 *p = out;
+
+ partial = sctx.count % SHA256_BLOCK_SIZE;
+ sctx.count -= partial;
+ memcpy(p, &sctx, sizeof(sctx));
+ p += sizeof(sctx);
+ *p = partial;
+ return 0;
+}
+
+static struct shash_alg algs[] = {
+ {
+ .base.cra_name = "sha256",
+ .base.cra_driver_name = "sha256-generic",
+ .base.cra_priority = 100,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
+ .base.cra_blocksize = SHA256_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = crypto_sha256_init,
+ .update = crypto_sha256_update_generic,
+ .finup = crypto_sha256_finup_generic,
+ .digest = crypto_sha256_digest_generic,
+ .descsize = sizeof(struct crypto_sha256_state),
+ },
+ {
+ .base.cra_name = "sha224",
+ .base.cra_driver_name = "sha224-generic",
+ .base.cra_priority = 100,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
+ .base.cra_blocksize = SHA224_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = crypto_sha224_init,
+ .update = crypto_sha256_update_generic,
+ .finup = crypto_sha256_finup_generic,
+ .descsize = sizeof(struct crypto_sha256_state),
+ },
+ {
+ .base.cra_name = "sha256",
+ .base.cra_driver_name = "sha256-lib",
+ .base.cra_blocksize = SHA256_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = crypto_sha256_init,
+ .update = crypto_sha256_update_lib,
+ .final = crypto_sha256_final_lib,
+ .digest = crypto_sha256_digest_lib,
+ .descsize = sizeof(struct sha256_state),
+ .statesize = sizeof(struct crypto_sha256_state) +
+ SHA256_BLOCK_SIZE + 1,
+ .import = crypto_sha256_import_lib,
+ .export = crypto_sha256_export_lib,
+ },
+ {
+ .base.cra_name = "sha224",
+ .base.cra_driver_name = "sha224-lib",
+ .base.cra_blocksize = SHA224_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = crypto_sha224_init,
+ .update = crypto_sha256_update_lib,
+ .final = crypto_sha224_final_lib,
+ .descsize = sizeof(struct sha256_state),
+ .statesize = sizeof(struct crypto_sha256_state) +
+ SHA256_BLOCK_SIZE + 1,
+ .import = crypto_sha256_import_lib,
+ .export = crypto_sha256_export_lib,
+ },
+ {
+ .base.cra_name = "sha256",
+ .base.cra_driver_name = "sha256-" __stringify(ARCH),
+ .base.cra_priority = 300,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
+ .base.cra_blocksize = SHA256_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = crypto_sha256_init,
+ .update = crypto_sha256_update_arch,
+ .finup = crypto_sha256_finup_arch,
+ .digest = crypto_sha256_digest_arch,
+ .descsize = sizeof(struct crypto_sha256_state),
+ },
+ {
+ .base.cra_name = "sha224",
+ .base.cra_driver_name = "sha224-" __stringify(ARCH),
+ .base.cra_priority = 300,
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
+ .base.cra_blocksize = SHA224_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = crypto_sha224_init,
+ .update = crypto_sha256_update_arch,
+ .finup = crypto_sha256_finup_arch,
+ .descsize = sizeof(struct crypto_sha256_state),
+ },
+};
+
+static unsigned int num_algs;
+
+static int __init crypto_sha256_mod_init(void)
+{
+ /* register the arch flavours only if they differ from generic */
+ num_algs = ARRAY_SIZE(algs);
+ BUILD_BUG_ON(ARRAY_SIZE(algs) <= 2);
+ if (!sha256_is_arch_optimized())
+ num_algs -= 2;
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+module_init(crypto_sha256_mod_init);
+
+static void __exit crypto_sha256_mod_exit(void)
+{
+ crypto_unregister_shashes(algs, num_algs);
+}
+module_exit(crypto_sha256_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Crypto API wrapper for the SHA-256 and SHA-224 library functions");
+
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha256-generic");
+MODULE_ALIAS_CRYPTO("sha256-" __stringify(ARCH));
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha224-generic");
+MODULE_ALIAS_CRYPTO("sha224-" __stringify(ARCH));
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
deleted file mode 100644
index b00521f1a6d4..000000000000
--- a/crypto/sha256_generic.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Crypto API wrapper for the generic SHA256 code from lib/crypto/sha256.c
- *
- * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
- */
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <crypto/sha2.h>
-#include <crypto/sha256_base.h>
-#include <asm/byteorder.h>
-#include <linux/unaligned.h>
-
-const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE] = {
- 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47,
- 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2,
- 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4,
- 0x2f
-};
-EXPORT_SYMBOL_GPL(sha224_zero_message_hash);
-
-const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE] = {
- 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
- 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
- 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
- 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55
-};
-EXPORT_SYMBOL_GPL(sha256_zero_message_hash);
-
-int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- sha256_update(shash_desc_ctx(desc), data, len);
- return 0;
-}
-EXPORT_SYMBOL(crypto_sha256_update);
-
-static int crypto_sha256_final(struct shash_desc *desc, u8 *out)
-{
- if (crypto_shash_digestsize(desc->tfm) == SHA224_DIGEST_SIZE)
- sha224_final(shash_desc_ctx(desc), out);
- else
- sha256_final(shash_desc_ctx(desc), out);
- return 0;
-}
-
-int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *hash)
-{
- sha256_update(shash_desc_ctx(desc), data, len);
- return crypto_sha256_final(desc, hash);
-}
-EXPORT_SYMBOL(crypto_sha256_finup);
-
-static struct shash_alg sha256_algs[2] = { {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_base_init,
- .update = crypto_sha256_update,
- .final = crypto_sha256_final,
- .finup = crypto_sha256_finup,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name= "sha256-generic",
- .cra_priority = 100,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_base_init,
- .update = crypto_sha256_update,
- .final = crypto_sha256_final,
- .finup = crypto_sha256_finup,
- .descsize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name= "sha224-generic",
- .cra_priority = 100,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
-
-static int __init sha256_generic_mod_init(void)
-{
- return crypto_register_shashes(sha256_algs, ARRAY_SIZE(sha256_algs));
-}
-
-static void __exit sha256_generic_mod_fini(void)
-{
- crypto_unregister_shashes(sha256_algs, ARRAY_SIZE(sha256_algs));
-}
-
-subsys_initcall(sha256_generic_mod_init);
-module_exit(sha256_generic_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm");
-
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_ALIAS_CRYPTO("sha224-generic");
-MODULE_ALIAS_CRYPTO("sha256");
-MODULE_ALIAS_CRYPTO("sha256-generic");
diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c
index b103642b56ea..41d1e506e6de 100644
--- a/crypto/sha3_generic.c
+++ b/crypto/sha3_generic.c
@@ -9,10 +9,10 @@
* Ard Biesheuvel <ard.biesheuvel@linaro.org>
*/
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
#include <crypto/sha3.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
#include <linux/unaligned.h>
/*
@@ -161,68 +161,51 @@ static void keccakf(u64 st[25])
int crypto_sha3_init(struct shash_desc *desc)
{
struct sha3_state *sctx = shash_desc_ctx(desc);
- unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
-
- sctx->rsiz = 200 - 2 * digest_size;
- sctx->rsizw = sctx->rsiz / 8;
- sctx->partial = 0;
memset(sctx->st, 0, sizeof(sctx->st));
return 0;
}
EXPORT_SYMBOL(crypto_sha3_init);
-int crypto_sha3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int crypto_sha3_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
+ unsigned int rsiz = crypto_shash_blocksize(desc->tfm);
struct sha3_state *sctx = shash_desc_ctx(desc);
- unsigned int done;
- const u8 *src;
-
- done = 0;
- src = data;
-
- if ((sctx->partial + len) > (sctx->rsiz - 1)) {
- if (sctx->partial) {
- done = -sctx->partial;
- memcpy(sctx->buf + sctx->partial, data,
- done + sctx->rsiz);
- src = sctx->buf;
- }
+ unsigned int rsizw = rsiz / 8;
- do {
- unsigned int i;
+ do {
+ int i;
- for (i = 0; i < sctx->rsizw; i++)
- sctx->st[i] ^= get_unaligned_le64(src + 8 * i);
- keccakf(sctx->st);
+ for (i = 0; i < rsizw; i++)
+ sctx->st[i] ^= get_unaligned_le64(data + 8 * i);
+ keccakf(sctx->st);
- done += sctx->rsiz;
- src = data + done;
- } while (done + (sctx->rsiz - 1) < len);
-
- sctx->partial = 0;
- }
- memcpy(sctx->buf + sctx->partial, src, len - done);
- sctx->partial += (len - done);
-
- return 0;
+ data += rsiz;
+ len -= rsiz;
+ } while (len >= rsiz);
+ return len;
}
-EXPORT_SYMBOL(crypto_sha3_update);
-int crypto_sha3_final(struct shash_desc *desc, u8 *out)
+static int crypto_sha3_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
- struct sha3_state *sctx = shash_desc_ctx(desc);
- unsigned int i, inlen = sctx->partial;
unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
+ unsigned int rsiz = crypto_shash_blocksize(desc->tfm);
+ struct sha3_state *sctx = shash_desc_ctx(desc);
+ __le64 block[SHA3_224_BLOCK_SIZE / 8] = {};
__le64 *digest = (__le64 *)out;
+ unsigned int rsizw = rsiz / 8;
+ u8 *p;
+ int i;
- sctx->buf[inlen++] = 0x06;
- memset(sctx->buf + inlen, 0, sctx->rsiz - inlen);
- sctx->buf[sctx->rsiz - 1] |= 0x80;
+ p = memcpy(block, src, len);
+ p[len++] = 0x06;
+ p[rsiz - 1] |= 0x80;
- for (i = 0; i < sctx->rsizw; i++)
- sctx->st[i] ^= get_unaligned_le64(sctx->buf + 8 * i);
+ for (i = 0; i < rsizw; i++)
+ sctx->st[i] ^= le64_to_cpu(block[i]);
+ memzero_explicit(block, sizeof(block));
keccakf(sctx->st);
@@ -232,49 +215,51 @@ int crypto_sha3_final(struct shash_desc *desc, u8 *out)
if (digest_size & 4)
put_unaligned_le32(sctx->st[i], (__le32 *)digest);
- memset(sctx, 0, sizeof(*sctx));
return 0;
}
-EXPORT_SYMBOL(crypto_sha3_final);
static struct shash_alg algs[] = { {
.digestsize = SHA3_224_DIGEST_SIZE,
.init = crypto_sha3_init,
.update = crypto_sha3_update,
- .final = crypto_sha3_final,
- .descsize = sizeof(struct sha3_state),
+ .finup = crypto_sha3_finup,
+ .descsize = SHA3_STATE_SIZE,
.base.cra_name = "sha3-224",
.base.cra_driver_name = "sha3-224-generic",
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.base.cra_blocksize = SHA3_224_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
}, {
.digestsize = SHA3_256_DIGEST_SIZE,
.init = crypto_sha3_init,
.update = crypto_sha3_update,
- .final = crypto_sha3_final,
- .descsize = sizeof(struct sha3_state),
+ .finup = crypto_sha3_finup,
+ .descsize = SHA3_STATE_SIZE,
.base.cra_name = "sha3-256",
.base.cra_driver_name = "sha3-256-generic",
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.base.cra_blocksize = SHA3_256_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
}, {
.digestsize = SHA3_384_DIGEST_SIZE,
.init = crypto_sha3_init,
.update = crypto_sha3_update,
- .final = crypto_sha3_final,
- .descsize = sizeof(struct sha3_state),
+ .finup = crypto_sha3_finup,
+ .descsize = SHA3_STATE_SIZE,
.base.cra_name = "sha3-384",
.base.cra_driver_name = "sha3-384-generic",
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.base.cra_blocksize = SHA3_384_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
}, {
.digestsize = SHA3_512_DIGEST_SIZE,
.init = crypto_sha3_init,
.update = crypto_sha3_update,
- .final = crypto_sha3_final,
- .descsize = sizeof(struct sha3_state),
+ .finup = crypto_sha3_finup,
+ .descsize = SHA3_STATE_SIZE,
.base.cra_name = "sha3-512",
.base.cra_driver_name = "sha3-512-generic",
+ .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.base.cra_blocksize = SHA3_512_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
} };
@@ -289,7 +274,7 @@ static void __exit sha3_generic_mod_fini(void)
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
-subsys_initcall(sha3_generic_mod_init);
+module_init(sha3_generic_mod_init);
module_exit(sha3_generic_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index ed81813bd420..7368173f545e 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -6,16 +6,10 @@
* Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
*/
#include <crypto/internal/hash.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/crypto.h>
-#include <linux/types.h>
#include <crypto/sha2.h>
#include <crypto/sha512_base.h>
-#include <linux/percpu.h>
-#include <asm/byteorder.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/unaligned.h>
const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE] = {
@@ -145,47 +139,42 @@ sha512_transform(u64 *state, const u8 *input)
state[4] += e; state[5] += f; state[6] += g; state[7] += h;
}
-static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,
- int blocks)
+void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,
+ int blocks)
{
- while (blocks--) {
+ do {
sha512_transform(sst->state, src);
src += SHA512_BLOCK_SIZE;
- }
+ } while (--blocks);
}
+EXPORT_SYMBOL_GPL(sha512_generic_block_fn);
-int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- return sha512_base_do_update(desc, data, len, sha512_generic_block_fn);
+ return sha512_base_do_update_blocks(desc, data, len,
+ sha512_generic_block_fn);
}
-EXPORT_SYMBOL(crypto_sha512_update);
-static int sha512_final(struct shash_desc *desc, u8 *hash)
+static int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *hash)
{
- sha512_base_do_finalize(desc, sha512_generic_block_fn);
+ sha512_base_do_finup(desc, data, len, sha512_generic_block_fn);
return sha512_base_finish(desc, hash);
}
-int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *hash)
-{
- sha512_base_do_update(desc, data, len, sha512_generic_block_fn);
- return sha512_final(desc, hash);
-}
-EXPORT_SYMBOL(crypto_sha512_finup);
-
static struct shash_alg sha512_algs[2] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = crypto_sha512_update,
- .final = sha512_final,
.finup = crypto_sha512_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-generic",
.cra_priority = 100,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -193,13 +182,14 @@ static struct shash_alg sha512_algs[2] = { {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = crypto_sha512_update,
- .final = sha512_final,
.finup = crypto_sha512_finup,
- .descsize = sizeof(struct sha512_state),
+ .descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-generic",
.cra_priority = 100,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -215,7 +205,7 @@ static void __exit sha512_generic_mod_fini(void)
crypto_unregister_shashes(sha512_algs, ARRAY_SIZE(sha512_algs));
}
-subsys_initcall(sha512_generic_mod_init);
+module_init(sha512_generic_mod_init);
module_exit(sha512_generic_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/shash.c b/crypto/shash.c
index 301ab42bf849..37537d7995c7 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -16,6 +16,24 @@
#include "hash.h"
+static inline bool crypto_shash_block_only(struct crypto_shash *tfm)
+{
+ return crypto_shash_alg(tfm)->base.cra_flags &
+ CRYPTO_AHASH_ALG_BLOCK_ONLY;
+}
+
+static inline bool crypto_shash_final_nonzero(struct crypto_shash *tfm)
+{
+ return crypto_shash_alg(tfm)->base.cra_flags &
+ CRYPTO_AHASH_ALG_FINAL_NONZERO;
+}
+
+static inline bool crypto_shash_finup_max(struct crypto_shash *tfm)
+{
+ return crypto_shash_alg(tfm)->base.cra_flags &
+ CRYPTO_AHASH_ALG_FINUP_MAX;
+}
+
int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen)
{
@@ -46,18 +64,27 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
}
EXPORT_SYMBOL_GPL(crypto_shash_setkey);
-int crypto_shash_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int __crypto_shash_init(struct shash_desc *desc)
{
- return crypto_shash_alg(desc->tfm)->update(desc, data, len);
+ struct crypto_shash *tfm = desc->tfm;
+
+ if (crypto_shash_block_only(tfm)) {
+ u8 *buf = shash_desc_ctx(desc);
+
+ buf += crypto_shash_descsize(tfm) - 1;
+ *buf = 0;
+ }
+
+ return crypto_shash_alg(tfm)->init(desc);
}
-EXPORT_SYMBOL_GPL(crypto_shash_update);
-int crypto_shash_final(struct shash_desc *desc, u8 *out)
+int crypto_shash_init(struct shash_desc *desc)
{
- return crypto_shash_alg(desc->tfm)->final(desc, out);
+ if (crypto_shash_get_flags(desc->tfm) & CRYPTO_TFM_NEED_KEY)
+ return -ENOKEY;
+ return __crypto_shash_init(desc);
}
-EXPORT_SYMBOL_GPL(crypto_shash_final);
+EXPORT_SYMBOL_GPL(crypto_shash_init);
static int shash_default_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
@@ -68,20 +95,89 @@ static int shash_default_finup(struct shash_desc *desc, const u8 *data,
shash->final(desc, out);
}
-int crypto_shash_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
+static int crypto_shash_op_and_zero(
+ int (*op)(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out),
+ struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
{
- return crypto_shash_alg(desc->tfm)->finup(desc, data, len, out);
+ int err;
+
+ err = op(desc, data, len, out);
+ memset(shash_desc_ctx(desc), 0, crypto_shash_descsize(desc->tfm));
+ return err;
+}
+
+int crypto_shash_finup(struct shash_desc *restrict desc, const u8 *data,
+ unsigned int len, u8 *restrict out)
+{
+ struct crypto_shash *tfm = desc->tfm;
+ u8 *blenp = shash_desc_ctx(desc);
+ bool finup_max, nonzero;
+ unsigned int bs;
+ int err;
+ u8 *buf;
+
+ if (!crypto_shash_block_only(tfm)) {
+ if (out)
+ goto finup;
+ return crypto_shash_alg(tfm)->update(desc, data, len);
+ }
+
+ finup_max = out && crypto_shash_finup_max(tfm);
+
+ /* Retain extra block for final nonzero algorithms. */
+ nonzero = crypto_shash_final_nonzero(tfm);
+
+ /*
+ * The partial block buffer follows the algorithm desc context.
+ * The byte following that contains the length.
+ */
+ blenp += crypto_shash_descsize(tfm) - 1;
+ bs = crypto_shash_blocksize(tfm);
+ buf = blenp - bs;
+
+ if (likely(!*blenp && finup_max))
+ goto finup;
+
+ while ((*blenp + len) >= bs + nonzero) {
+ unsigned int nbytes = len - nonzero;
+ const u8 *src = data;
+
+ if (*blenp) {
+ memcpy(buf + *blenp, data, bs - *blenp);
+ nbytes = bs;
+ src = buf;
+ }
+
+ err = crypto_shash_alg(tfm)->update(desc, src, nbytes);
+ if (err < 0)
+ return err;
+
+ data += nbytes - err - *blenp;
+ len -= nbytes - err - *blenp;
+ *blenp = 0;
+ }
+
+ if (*blenp || !out) {
+ memcpy(buf + *blenp, data, len);
+ *blenp += len;
+ if (!out)
+ return 0;
+ data = buf;
+ len = *blenp;
+ }
+
+finup:
+ return crypto_shash_op_and_zero(crypto_shash_alg(tfm)->finup, desc,
+ data, len, out);
}
EXPORT_SYMBOL_GPL(crypto_shash_finup);
static int shash_default_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- struct shash_alg *shash = crypto_shash_alg(desc->tfm);
-
- return shash->init(desc) ?:
- shash->finup(desc, data, len, out);
+ return __crypto_shash_init(desc) ?:
+ crypto_shash_finup(desc, data, len, out);
}
int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
@@ -92,7 +188,8 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
return -ENOKEY;
- return crypto_shash_alg(tfm)->digest(desc, data, len, out);
+ return crypto_shash_op_and_zero(crypto_shash_alg(tfm)->digest, desc,
+ data, len, out);
}
EXPORT_SYMBOL_GPL(crypto_shash_digest);
@@ -100,44 +197,104 @@ int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data,
unsigned int len, u8 *out)
{
SHASH_DESC_ON_STACK(desc, tfm);
- int err;
desc->tfm = tfm;
+ return crypto_shash_digest(desc, data, len, out);
+}
+EXPORT_SYMBOL_GPL(crypto_shash_tfm_digest);
- err = crypto_shash_digest(desc, data, len, out);
+static int __crypto_shash_export(struct shash_desc *desc, void *out,
+ int (*export)(struct shash_desc *desc,
+ void *out))
+{
+ struct crypto_shash *tfm = desc->tfm;
+ u8 *buf = shash_desc_ctx(desc);
+ unsigned int plen, ss;
+
+ plen = crypto_shash_blocksize(tfm) + 1;
+ ss = crypto_shash_statesize(tfm);
+ if (crypto_shash_block_only(tfm))
+ ss -= plen;
+ if (!export) {
+ memcpy(out, buf, ss);
+ return 0;
+ }
- shash_desc_zero(desc);
+ return export(desc, out);
+}
- return err;
+int crypto_shash_export_core(struct shash_desc *desc, void *out)
+{
+ return __crypto_shash_export(desc, out,
+ crypto_shash_alg(desc->tfm)->export_core);
}
-EXPORT_SYMBOL_GPL(crypto_shash_tfm_digest);
+EXPORT_SYMBOL_GPL(crypto_shash_export_core);
int crypto_shash_export(struct shash_desc *desc, void *out)
{
struct crypto_shash *tfm = desc->tfm;
- struct shash_alg *shash = crypto_shash_alg(tfm);
- if (shash->export)
- return shash->export(desc, out);
+ if (crypto_shash_block_only(tfm)) {
+ unsigned int plen = crypto_shash_blocksize(tfm) + 1;
+ unsigned int descsize = crypto_shash_descsize(tfm);
+ unsigned int ss = crypto_shash_statesize(tfm);
+ u8 *buf = shash_desc_ctx(desc);
- memcpy(out, shash_desc_ctx(desc), crypto_shash_descsize(tfm));
- return 0;
+ memcpy(out + ss - plen, buf + descsize - plen, plen);
+ }
+ return __crypto_shash_export(desc, out, crypto_shash_alg(tfm)->export);
}
EXPORT_SYMBOL_GPL(crypto_shash_export);
-int crypto_shash_import(struct shash_desc *desc, const void *in)
+static int __crypto_shash_import(struct shash_desc *desc, const void *in,
+ int (*import)(struct shash_desc *desc,
+ const void *in))
{
struct crypto_shash *tfm = desc->tfm;
- struct shash_alg *shash = crypto_shash_alg(tfm);
+ unsigned int descsize, plen, ss;
+ u8 *buf = shash_desc_ctx(desc);
if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
return -ENOKEY;
- if (shash->import)
- return shash->import(desc, in);
+ plen = crypto_shash_blocksize(tfm) + 1;
+ descsize = crypto_shash_descsize(tfm);
+ ss = crypto_shash_statesize(tfm);
+ buf[descsize - 1] = 0;
+ if (crypto_shash_block_only(tfm))
+ ss -= plen;
+ if (!import) {
+ memcpy(buf, in, ss);
+ return 0;
+ }
- memcpy(shash_desc_ctx(desc), in, crypto_shash_descsize(tfm));
- return 0;
+ return import(desc, in);
+}
+
+int crypto_shash_import_core(struct shash_desc *desc, const void *in)
+{
+ return __crypto_shash_import(desc, in,
+ crypto_shash_alg(desc->tfm)->import_core);
+}
+EXPORT_SYMBOL_GPL(crypto_shash_import_core);
+
+int crypto_shash_import(struct shash_desc *desc, const void *in)
+{
+ struct crypto_shash *tfm = desc->tfm;
+ int err;
+
+ err = __crypto_shash_import(desc, in, crypto_shash_alg(tfm)->import);
+ if (crypto_shash_block_only(tfm)) {
+ unsigned int plen = crypto_shash_blocksize(tfm) + 1;
+ unsigned int descsize = crypto_shash_descsize(tfm);
+ unsigned int ss = crypto_shash_statesize(tfm);
+ u8 *buf = shash_desc_ctx(desc);
+
+ memcpy(buf + descsize - plen, in + ss - plen, plen);
+ if (buf[descsize - 1] >= plen)
+ err = -EOVERFLOW;
+ }
+ return err;
}
EXPORT_SYMBOL_GPL(crypto_shash_import);
@@ -153,9 +310,6 @@ static int crypto_shash_init_tfm(struct crypto_tfm *tfm)
{
struct crypto_shash *hash = __crypto_shash_cast(tfm);
struct shash_alg *alg = crypto_shash_alg(hash);
- int err;
-
- hash->descsize = alg->descsize;
shash_set_needkey(hash, alg);
@@ -165,18 +319,7 @@ static int crypto_shash_init_tfm(struct crypto_tfm *tfm)
if (!alg->init_tfm)
return 0;
- err = alg->init_tfm(hash);
- if (err)
- return err;
-
- /* ->init_tfm() may have increased the descsize. */
- if (WARN_ON_ONCE(hash->descsize > HASH_MAX_DESCSIZE)) {
- if (alg->exit_tfm)
- alg->exit_tfm(hash);
- return -EINVAL;
- }
-
- return 0;
+ return alg->init_tfm(hash);
}
static void crypto_shash_free_instance(struct crypto_instance *inst)
@@ -227,6 +370,7 @@ const struct crypto_type crypto_shash_type = {
.maskset = CRYPTO_ALG_TYPE_MASK,
.type = CRYPTO_ALG_TYPE_SHASH,
.tfmsize = offsetof(struct crypto_shash, base),
+ .algsize = offsetof(struct shash_alg, base),
};
int crypto_grab_shash(struct crypto_shash_spawn *spawn,
@@ -273,8 +417,6 @@ struct crypto_shash *crypto_clone_shash(struct crypto_shash *hash)
if (IS_ERR(nhash))
return nhash;
- nhash->descsize = hash->descsize;
-
if (alg->clone_tfm) {
err = alg->clone_tfm(nhash, hash);
if (err) {
@@ -283,6 +425,9 @@ struct crypto_shash *crypto_clone_shash(struct crypto_shash *hash)
}
}
+ if (alg->exit_tfm)
+ crypto_shash_tfm(nhash)->exit = crypto_shash_exit_tfm;
+
return nhash;
}
EXPORT_SYMBOL_GPL(crypto_clone_shash);
@@ -303,14 +448,21 @@ int hash_prepare_alg(struct hash_alg_common *alg)
return 0;
}
+static int shash_default_export_core(struct shash_desc *desc, void *out)
+{
+ return -ENOSYS;
+}
+
+static int shash_default_import_core(struct shash_desc *desc, const void *in)
+{
+ return -ENOSYS;
+}
+
static int shash_prepare_alg(struct shash_alg *alg)
{
struct crypto_alg *base = &alg->halg.base;
int err;
- if (alg->descsize > HASH_MAX_DESCSIZE)
- return -EINVAL;
-
if ((alg->export && !alg->import) || (alg->import && !alg->export))
return -EINVAL;
@@ -320,6 +472,7 @@ static int shash_prepare_alg(struct shash_alg *alg)
base->cra_type = &crypto_shash_type;
base->cra_flags |= CRYPTO_ALG_TYPE_SHASH;
+ base->cra_flags |= CRYPTO_ALG_REQ_VIRT;
/*
* Handle missing optional functions. For each one we can either
@@ -336,11 +489,30 @@ static int shash_prepare_alg(struct shash_alg *alg)
alg->finup = shash_default_finup;
if (!alg->digest)
alg->digest = shash_default_digest;
- if (!alg->export)
+ if (!alg->export && !alg->halg.statesize)
alg->halg.statesize = alg->descsize;
if (!alg->setkey)
alg->setkey = shash_no_setkey;
+ if (base->cra_flags & CRYPTO_AHASH_ALG_BLOCK_ONLY) {
+ BUILD_BUG_ON(MAX_ALGAPI_BLOCKSIZE >= 256);
+ alg->descsize += base->cra_blocksize + 1;
+ alg->statesize += base->cra_blocksize + 1;
+ alg->export_core = alg->export;
+ alg->import_core = alg->import;
+ } else if (!alg->export_core || !alg->import_core) {
+ alg->export_core = shash_default_export_core;
+ alg->import_core = shash_default_import_core;
+ base->cra_flags |= CRYPTO_AHASH_ALG_NO_EXPORT_CORE;
+ }
+
+ if (alg->descsize > HASH_MAX_DESCSIZE)
+ return -EINVAL;
+ if (alg->statesize > HASH_MAX_STATESIZE)
+ return -EINVAL;
+
+ base->cra_reqsize = sizeof(struct shash_desc) + alg->descsize;
+
return 0;
}
diff --git a/crypto/sig.c b/crypto/sig.c
index dfc7cae90802..beba745b6405 100644
--- a/crypto/sig.c
+++ b/crypto/sig.c
@@ -74,6 +74,7 @@ static const struct crypto_type crypto_sig_type = {
.maskset = CRYPTO_ALG_TYPE_MASK,
.type = CRYPTO_ALG_TYPE_SIG,
.tfmsize = offsetof(struct crypto_sig, base),
+ .algsize = offsetof(struct sig_alg, base),
};
struct crypto_sig *crypto_alloc_sig(const char *alg_name, u32 type, u32 mask)
@@ -102,6 +103,11 @@ static int sig_default_set_key(struct crypto_sig *tfm,
return -ENOSYS;
}
+static unsigned int sig_default_size(struct crypto_sig *tfm)
+{
+ return DIV_ROUND_UP_POW2(crypto_sig_keysize(tfm), BITS_PER_BYTE);
+}
+
static int sig_prepare_alg(struct sig_alg *alg)
{
struct crypto_alg *base = &alg->base;
@@ -117,9 +123,9 @@ static int sig_prepare_alg(struct sig_alg *alg)
if (!alg->key_size)
return -EINVAL;
if (!alg->max_size)
- alg->max_size = alg->key_size;
+ alg->max_size = sig_default_size;
if (!alg->digest_size)
- alg->digest_size = alg->key_size;
+ alg->digest_size = sig_default_size;
base->cra_type = &crypto_sig_type;
base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 132075a905d9..de5fc91bba26 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -17,7 +17,6 @@
#include <linux/cryptouser.h>
#include <linux/err.h>
#include <linux/kernel.h>
-#include <linux/mm.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -28,258 +27,14 @@
#define CRYPTO_ALG_TYPE_SKCIPHER_MASK 0x0000000e
-enum {
- SKCIPHER_WALK_SLOW = 1 << 0,
- SKCIPHER_WALK_COPY = 1 << 1,
- SKCIPHER_WALK_DIFF = 1 << 2,
- SKCIPHER_WALK_SLEEP = 1 << 3,
-};
-
static const struct crypto_type crypto_skcipher_type;
-static int skcipher_walk_next(struct skcipher_walk *walk);
-
-static inline gfp_t skcipher_walk_gfp(struct skcipher_walk *walk)
-{
- return walk->flags & SKCIPHER_WALK_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
-}
-
static inline struct skcipher_alg *__crypto_skcipher_alg(
struct crypto_alg *alg)
{
return container_of(alg, struct skcipher_alg, base);
}
-/**
- * skcipher_walk_done() - finish one step of a skcipher_walk
- * @walk: the skcipher_walk
- * @res: number of bytes *not* processed (>= 0) from walk->nbytes,
- * or a -errno value to terminate the walk due to an error
- *
- * This function cleans up after one step of walking through the source and
- * destination scatterlists, and advances to the next step if applicable.
- * walk->nbytes is set to the number of bytes available in the next step,
- * walk->total is set to the new total number of bytes remaining, and
- * walk->{src,dst}.virt.addr is set to the next pair of data pointers. If there
- * is no more data, or if an error occurred (i.e. -errno return), then
- * walk->nbytes and walk->total are set to 0 and all resources owned by the
- * skcipher_walk are freed.
- *
- * Return: 0 or a -errno value. If @res was a -errno value then it will be
- * returned, but other errors may occur too.
- */
-int skcipher_walk_done(struct skcipher_walk *walk, int res)
-{
- unsigned int n = walk->nbytes; /* num bytes processed this step */
- unsigned int total = 0; /* new total remaining */
-
- if (!n)
- goto finish;
-
- if (likely(res >= 0)) {
- n -= res; /* subtract num bytes *not* processed */
- total = walk->total - n;
- }
-
- if (likely(!(walk->flags & (SKCIPHER_WALK_SLOW |
- SKCIPHER_WALK_COPY |
- SKCIPHER_WALK_DIFF)))) {
- scatterwalk_advance(&walk->in, n);
- } else if (walk->flags & SKCIPHER_WALK_DIFF) {
- scatterwalk_done_src(&walk->in, n);
- } else if (walk->flags & SKCIPHER_WALK_COPY) {
- scatterwalk_advance(&walk->in, n);
- scatterwalk_map(&walk->out);
- memcpy(walk->out.addr, walk->page, n);
- } else { /* SKCIPHER_WALK_SLOW */
- if (res > 0) {
- /*
- * Didn't process all bytes. Either the algorithm is
- * broken, or this was the last step and it turned out
- * the message wasn't evenly divisible into blocks but
- * the algorithm requires it.
- */
- res = -EINVAL;
- total = 0;
- } else
- memcpy_to_scatterwalk(&walk->out, walk->out.addr, n);
- goto dst_done;
- }
-
- scatterwalk_done_dst(&walk->out, n);
-dst_done:
-
- if (res > 0)
- res = 0;
-
- walk->total = total;
- walk->nbytes = 0;
-
- if (total) {
- if (walk->flags & SKCIPHER_WALK_SLEEP)
- cond_resched();
- walk->flags &= ~(SKCIPHER_WALK_SLOW | SKCIPHER_WALK_COPY |
- SKCIPHER_WALK_DIFF);
- return skcipher_walk_next(walk);
- }
-
-finish:
- /* Short-circuit for the common/fast path. */
- if (!((unsigned long)walk->buffer | (unsigned long)walk->page))
- goto out;
-
- if (walk->iv != walk->oiv)
- memcpy(walk->oiv, walk->iv, walk->ivsize);
- if (walk->buffer != walk->page)
- kfree(walk->buffer);
- if (walk->page)
- free_page((unsigned long)walk->page);
-
-out:
- return res;
-}
-EXPORT_SYMBOL_GPL(skcipher_walk_done);
-
-static int skcipher_next_slow(struct skcipher_walk *walk, unsigned int bsize)
-{
- unsigned alignmask = walk->alignmask;
- unsigned n;
- void *buffer;
-
- if (!walk->buffer)
- walk->buffer = walk->page;
- buffer = walk->buffer;
- if (!buffer) {
- /* Min size for a buffer of bsize bytes aligned to alignmask */
- n = bsize + (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
-
- buffer = kzalloc(n, skcipher_walk_gfp(walk));
- if (!buffer)
- return skcipher_walk_done(walk, -ENOMEM);
- walk->buffer = buffer;
- }
-
- buffer = PTR_ALIGN(buffer, alignmask + 1);
- memcpy_from_scatterwalk(buffer, &walk->in, bsize);
- walk->out.__addr = buffer;
- walk->in.__addr = walk->out.addr;
-
- walk->nbytes = bsize;
- walk->flags |= SKCIPHER_WALK_SLOW;
-
- return 0;
-}
-
-static int skcipher_next_copy(struct skcipher_walk *walk)
-{
- void *tmp = walk->page;
-
- scatterwalk_map(&walk->in);
- memcpy(tmp, walk->in.addr, walk->nbytes);
- scatterwalk_unmap(&walk->in);
- /*
- * walk->in is advanced later when the number of bytes actually
- * processed (which might be less than walk->nbytes) is known.
- */
-
- walk->in.__addr = tmp;
- walk->out.__addr = tmp;
- return 0;
-}
-
-static int skcipher_next_fast(struct skcipher_walk *walk)
-{
- unsigned long diff;
-
- diff = offset_in_page(walk->in.offset) -
- offset_in_page(walk->out.offset);
- diff |= (u8 *)(sg_page(walk->in.sg) + (walk->in.offset >> PAGE_SHIFT)) -
- (u8 *)(sg_page(walk->out.sg) + (walk->out.offset >> PAGE_SHIFT));
-
- scatterwalk_map(&walk->out);
- walk->in.__addr = walk->out.__addr;
-
- if (diff) {
- walk->flags |= SKCIPHER_WALK_DIFF;
- scatterwalk_map(&walk->in);
- }
-
- return 0;
-}
-
-static int skcipher_walk_next(struct skcipher_walk *walk)
-{
- unsigned int bsize;
- unsigned int n;
-
- n = walk->total;
- bsize = min(walk->stride, max(n, walk->blocksize));
- n = scatterwalk_clamp(&walk->in, n);
- n = scatterwalk_clamp(&walk->out, n);
-
- if (unlikely(n < bsize)) {
- if (unlikely(walk->total < walk->blocksize))
- return skcipher_walk_done(walk, -EINVAL);
-
-slow_path:
- return skcipher_next_slow(walk, bsize);
- }
- walk->nbytes = n;
-
- if (unlikely((walk->in.offset | walk->out.offset) & walk->alignmask)) {
- if (!walk->page) {
- gfp_t gfp = skcipher_walk_gfp(walk);
-
- walk->page = (void *)__get_free_page(gfp);
- if (!walk->page)
- goto slow_path;
- }
- walk->flags |= SKCIPHER_WALK_COPY;
- return skcipher_next_copy(walk);
- }
-
- return skcipher_next_fast(walk);
-}
-
-static int skcipher_copy_iv(struct skcipher_walk *walk)
-{
- unsigned alignmask = walk->alignmask;
- unsigned ivsize = walk->ivsize;
- unsigned aligned_stride = ALIGN(walk->stride, alignmask + 1);
- unsigned size;
- u8 *iv;
-
- /* Min size for a buffer of stride + ivsize, aligned to alignmask */
- size = aligned_stride + ivsize +
- (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
-
- walk->buffer = kmalloc(size, skcipher_walk_gfp(walk));
- if (!walk->buffer)
- return -ENOMEM;
-
- iv = PTR_ALIGN(walk->buffer, alignmask + 1) + aligned_stride;
-
- walk->iv = memcpy(iv, walk->iv, walk->ivsize);
- return 0;
-}
-
-static int skcipher_walk_first(struct skcipher_walk *walk)
-{
- if (WARN_ON_ONCE(in_hardirq()))
- return -EDEADLK;
-
- walk->buffer = NULL;
- if (unlikely(((unsigned long)walk->iv & walk->alignmask))) {
- int err = skcipher_copy_iv(walk);
- if (err)
- return err;
- }
-
- walk->page = NULL;
-
- return skcipher_walk_next(walk);
-}
-
int skcipher_walk_virt(struct skcipher_walk *__restrict walk,
struct skcipher_request *__restrict req, bool atomic)
{
@@ -294,10 +49,8 @@ int skcipher_walk_virt(struct skcipher_walk *__restrict walk,
walk->nbytes = 0;
walk->iv = req->iv;
walk->oiv = req->iv;
- if ((req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) && !atomic)
- walk->flags = SKCIPHER_WALK_SLEEP;
- else
- walk->flags = 0;
+ if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP))
+ atomic = true;
if (unlikely(!walk->total))
return 0;
@@ -314,7 +67,7 @@ int skcipher_walk_virt(struct skcipher_walk *__restrict walk,
else
walk->stride = alg->walksize;
- return skcipher_walk_first(walk);
+ return skcipher_walk_first(walk, atomic);
}
EXPORT_SYMBOL_GPL(skcipher_walk_virt);
@@ -327,10 +80,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *__restrict walk,
walk->nbytes = 0;
walk->iv = req->iv;
walk->oiv = req->iv;
- if ((req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) && !atomic)
- walk->flags = SKCIPHER_WALK_SLEEP;
- else
- walk->flags = 0;
+ if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP))
+ atomic = true;
if (unlikely(!walk->total))
return 0;
@@ -343,7 +94,7 @@ static int skcipher_walk_aead_common(struct skcipher_walk *__restrict walk,
walk->ivsize = crypto_aead_ivsize(tfm);
walk->alignmask = crypto_aead_alignmask(tfm);
- return skcipher_walk_first(walk);
+ return skcipher_walk_first(walk, atomic);
}
int skcipher_walk_aead_encrypt(struct skcipher_walk *__restrict walk,
@@ -620,6 +371,7 @@ static const struct crypto_type crypto_skcipher_type = {
.maskset = CRYPTO_ALG_TYPE_SKCIPHER_MASK,
.type = CRYPTO_ALG_TYPE_SKCIPHER,
.tfmsize = offsetof(struct crypto_skcipher, base),
+ .algsize = offsetof(struct skcipher_alg, base),
};
int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn,
diff --git a/crypto/sm3_generic.c b/crypto/sm3_generic.c
index a2d23a46924e..7529139fcc96 100644
--- a/crypto/sm3_generic.c
+++ b/crypto/sm3_generic.c
@@ -9,15 +9,10 @@
*/
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/types.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
-#include <linux/bitops.h>
-#include <asm/byteorder.h>
-#include <linux/unaligned.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
const u8 sm3_zero_message_hash[SM3_DIGEST_SIZE] = {
0x1A, 0xB2, 0x1D, 0x83, 0x55, 0xCF, 0xA1, 0x7F,
@@ -30,38 +25,28 @@ EXPORT_SYMBOL_GPL(sm3_zero_message_hash);
static int crypto_sm3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- sm3_update(shash_desc_ctx(desc), data, len);
- return 0;
-}
-
-static int crypto_sm3_final(struct shash_desc *desc, u8 *out)
-{
- sm3_final(shash_desc_ctx(desc), out);
- return 0;
+ return sm3_base_do_update_blocks(desc, data, len, sm3_block_generic);
}
static int crypto_sm3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *hash)
{
- struct sm3_state *sctx = shash_desc_ctx(desc);
-
- if (len)
- sm3_update(sctx, data, len);
- sm3_final(sctx, hash);
- return 0;
+ sm3_base_do_finup(desc, data, len, sm3_block_generic);
+ return sm3_base_finish(desc, hash);
}
static struct shash_alg sm3_alg = {
.digestsize = SM3_DIGEST_SIZE,
.init = sm3_base_init,
.update = crypto_sm3_update,
- .final = crypto_sm3_final,
.finup = crypto_sm3_finup,
- .descsize = sizeof(struct sm3_state),
+ .descsize = SM3_STATE_SIZE,
.base = {
.cra_name = "sm3",
.cra_driver_name = "sm3-generic",
.cra_priority = 100,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SM3_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -77,7 +62,7 @@ static void __exit sm3_generic_mod_fini(void)
crypto_unregister_shash(&sm3_alg);
}
-subsys_initcall(sm3_generic_mod_init);
+module_init(sm3_generic_mod_init);
module_exit(sm3_generic_mod_fini);
MODULE_LICENSE("GPL v2");
diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c
index 7df86369ac00..d57444e8428c 100644
--- a/crypto/sm4_generic.c
+++ b/crypto/sm4_generic.c
@@ -83,7 +83,7 @@ static void __exit sm4_fini(void)
crypto_unregister_alg(&sm4_alg);
}
-subsys_initcall(sm4_init);
+module_init(sm4_init);
module_exit(sm4_fini);
MODULE_DESCRIPTION("SM4 Cipher Algorithm");
diff --git a/crypto/streebog_generic.c b/crypto/streebog_generic.c
index dc625ffc54ad..57bbf70f4c22 100644
--- a/crypto/streebog_generic.c
+++ b/crypto/streebog_generic.c
@@ -13,9 +13,10 @@
*/
#include <crypto/internal/hash.h>
-#include <linux/module.h>
-#include <linux/crypto.h>
#include <crypto/streebog.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
static const struct streebog_uint512 buffer0 = { {
0, 0, 0, 0, 0, 0, 0, 0
@@ -919,17 +920,6 @@ static int streebog_init(struct shash_desc *desc)
return 0;
}
-static void streebog_pad(struct streebog_state *ctx)
-{
- if (ctx->fillsize >= STREEBOG_BLOCK_SIZE)
- return;
-
- memset(ctx->buffer + ctx->fillsize, 0,
- sizeof(ctx->buffer) - ctx->fillsize);
-
- ctx->buffer[ctx->fillsize] = 1;
-}
-
static void streebog_add512(const struct streebog_uint512 *x,
const struct streebog_uint512 *y,
struct streebog_uint512 *r)
@@ -984,16 +974,23 @@ static void streebog_stage2(struct streebog_state *ctx, const u8 *data)
streebog_add512(&ctx->Sigma, &m, &ctx->Sigma);
}
-static void streebog_stage3(struct streebog_state *ctx)
+static void streebog_stage3(struct streebog_state *ctx, const u8 *src,
+ unsigned int len)
{
struct streebog_uint512 buf = { { 0 } };
+ union {
+ u8 buffer[STREEBOG_BLOCK_SIZE];
+ struct streebog_uint512 m;
+ } u = {};
- buf.qword[0] = cpu_to_le64(ctx->fillsize << 3);
- streebog_pad(ctx);
+ buf.qword[0] = cpu_to_le64(len << 3);
+ memcpy(u.buffer, src, len);
+ u.buffer[len] = 1;
- streebog_g(&ctx->h, &ctx->N, &ctx->m);
+ streebog_g(&ctx->h, &ctx->N, &u.m);
streebog_add512(&ctx->N, &buf, &ctx->N);
- streebog_add512(&ctx->Sigma, &ctx->m, &ctx->Sigma);
+ streebog_add512(&ctx->Sigma, &u.m, &ctx->Sigma);
+ memzero_explicit(&u, sizeof(u));
streebog_g(&ctx->h, &buffer0, &ctx->N);
streebog_g(&ctx->h, &buffer0, &ctx->Sigma);
memcpy(&ctx->hash, &ctx->h, sizeof(struct streebog_uint512));
@@ -1003,42 +1000,22 @@ static int streebog_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct streebog_state *ctx = shash_desc_ctx(desc);
- size_t chunksize;
- if (ctx->fillsize) {
- chunksize = STREEBOG_BLOCK_SIZE - ctx->fillsize;
- if (chunksize > len)
- chunksize = len;
- memcpy(&ctx->buffer[ctx->fillsize], data, chunksize);
- ctx->fillsize += chunksize;
- len -= chunksize;
- data += chunksize;
-
- if (ctx->fillsize == STREEBOG_BLOCK_SIZE) {
- streebog_stage2(ctx, ctx->buffer);
- ctx->fillsize = 0;
- }
- }
-
- while (len >= STREEBOG_BLOCK_SIZE) {
+ do {
streebog_stage2(ctx, data);
data += STREEBOG_BLOCK_SIZE;
len -= STREEBOG_BLOCK_SIZE;
- }
+ } while (len >= STREEBOG_BLOCK_SIZE);
- if (len) {
- memcpy(&ctx->buffer, data, len);
- ctx->fillsize = len;
- }
- return 0;
+ return len;
}
-static int streebog_final(struct shash_desc *desc, u8 *digest)
+static int streebog_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *digest)
{
struct streebog_state *ctx = shash_desc_ctx(desc);
- streebog_stage3(ctx);
- ctx->fillsize = 0;
+ streebog_stage3(ctx, src, len);
if (crypto_shash_digestsize(desc->tfm) == STREEBOG256_DIGEST_SIZE)
memcpy(digest, &ctx->hash.qword[4], STREEBOG256_DIGEST_SIZE);
else
@@ -1050,11 +1027,12 @@ static struct shash_alg algs[2] = { {
.digestsize = STREEBOG256_DIGEST_SIZE,
.init = streebog_init,
.update = streebog_update,
- .final = streebog_final,
+ .finup = streebog_finup,
.descsize = sizeof(struct streebog_state),
.base = {
.cra_name = "streebog256",
.cra_driver_name = "streebog256-generic",
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = STREEBOG_BLOCK_SIZE,
.cra_module = THIS_MODULE,
},
@@ -1062,11 +1040,12 @@ static struct shash_alg algs[2] = { {
.digestsize = STREEBOG512_DIGEST_SIZE,
.init = streebog_init,
.update = streebog_update,
- .final = streebog_final,
+ .finup = streebog_finup,
.descsize = sizeof(struct streebog_state),
.base = {
.cra_name = "streebog512",
.cra_driver_name = "streebog512-generic",
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = STREEBOG_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -1082,7 +1061,7 @@ static void __exit streebog_mod_fini(void)
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
-subsys_initcall(streebog_mod_init);
+module_init(streebog_mod_init);
module_exit(streebog_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 96f4a66be14c..d1d88debbd71 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Quick & dirty crypto testing module.
+ * Quick & dirty crypto benchmarking module.
*
- * This will only exist until we have a better testing mechanism
+ * This will only exist until we have a better benchmarking mechanism
* (e.g. a char device).
*
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
@@ -39,7 +39,7 @@
#include "tcrypt.h"
/*
- * Need slab memory for testing (size in number of pages).
+ * Need slab memory for benchmarking (size in number of pages).
*/
#define TVMEMSIZE 4
@@ -716,207 +716,6 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret)
return crypto_wait_req(ret, wait);
}
-struct test_mb_ahash_data {
- struct scatterlist sg[XBUFSIZE];
- char result[64];
- struct ahash_request *req;
- struct crypto_wait wait;
- char *xbuf[XBUFSIZE];
-};
-
-static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb,
- int *rc)
-{
- int i, err;
-
- /* Fire up a bunch of concurrent requests */
- err = crypto_ahash_digest(data[0].req);
-
- /* Wait for all requests to finish */
- err = crypto_wait_req(err, &data[0].wait);
- if (num_mb < 2)
- return err;
-
- for (i = 0; i < num_mb; i++) {
- rc[i] = ahash_request_err(data[i].req);
- if (rc[i]) {
- pr_info("concurrent request %d error %d\n", i, rc[i]);
- err = rc[i];
- }
- }
-
- return err;
-}
-
-static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen,
- int secs, u32 num_mb)
-{
- unsigned long start, end;
- int bcount;
- int ret = 0;
- int *rc;
-
- rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
- if (!rc)
- return -ENOMEM;
-
- for (start = jiffies, end = start + secs * HZ, bcount = 0;
- time_before(jiffies, end); bcount++) {
- ret = do_mult_ahash_op(data, num_mb, rc);
- if (ret)
- goto out;
- }
-
- pr_cont("%d operations in %d seconds (%llu bytes)\n",
- bcount * num_mb, secs, (u64)bcount * blen * num_mb);
-
-out:
- kfree(rc);
- return ret;
-}
-
-static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
- u32 num_mb)
-{
- unsigned long cycles = 0;
- int ret = 0;
- int i;
- int *rc;
-
- rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
- if (!rc)
- return -ENOMEM;
-
- /* Warm-up run. */
- for (i = 0; i < 4; i++) {
- ret = do_mult_ahash_op(data, num_mb, rc);
- if (ret)
- goto out;
- }
-
- /* The real thing. */
- for (i = 0; i < 8; i++) {
- cycles_t start, end;
-
- start = get_cycles();
- ret = do_mult_ahash_op(data, num_mb, rc);
- end = get_cycles();
-
- if (ret)
- goto out;
-
- cycles += end - start;
- }
-
- pr_cont("1 operation in %lu cycles (%d bytes)\n",
- (cycles + 4) / (8 * num_mb), blen);
-
-out:
- kfree(rc);
- return ret;
-}
-
-static void test_mb_ahash_speed(const char *algo, unsigned int secs,
- struct hash_speed *speed, u32 num_mb)
-{
- struct test_mb_ahash_data *data;
- struct crypto_ahash *tfm;
- unsigned int i, j, k;
- int ret;
-
- data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL);
- if (!data)
- return;
-
- tfm = crypto_alloc_ahash(algo, 0, 0);
- if (IS_ERR(tfm)) {
- pr_err("failed to load transform for %s: %ld\n",
- algo, PTR_ERR(tfm));
- goto free_data;
- }
-
- for (i = 0; i < num_mb; ++i) {
- if (testmgr_alloc_buf(data[i].xbuf))
- goto out;
-
- crypto_init_wait(&data[i].wait);
-
- data[i].req = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!data[i].req) {
- pr_err("alg: hash: Failed to allocate request for %s\n",
- algo);
- goto out;
- }
-
-
- if (i) {
- ahash_request_set_callback(data[i].req, 0, NULL, NULL);
- ahash_request_chain(data[i].req, data[0].req);
- } else
- ahash_request_set_callback(data[0].req, 0,
- crypto_req_done,
- &data[0].wait);
-
- sg_init_table(data[i].sg, XBUFSIZE);
- for (j = 0; j < XBUFSIZE; j++) {
- sg_set_buf(data[i].sg + j, data[i].xbuf[j], PAGE_SIZE);
- memset(data[i].xbuf[j], 0xff, PAGE_SIZE);
- }
- }
-
- pr_info("\ntesting speed of multibuffer %s (%s)\n", algo,
- get_driver_name(crypto_ahash, tfm));
-
- for (i = 0; speed[i].blen != 0; i++) {
- /* For some reason this only tests digests. */
- if (speed[i].blen != speed[i].plen)
- continue;
-
- if (speed[i].blen > XBUFSIZE * PAGE_SIZE) {
- pr_err("template (%u) too big for tvmem (%lu)\n",
- speed[i].blen, XBUFSIZE * PAGE_SIZE);
- goto out;
- }
-
- if (klen)
- crypto_ahash_setkey(tfm, tvmem[0], klen);
-
- for (k = 0; k < num_mb; k++)
- ahash_request_set_crypt(data[k].req, data[k].sg,
- data[k].result, speed[i].blen);
-
- pr_info("test%3u "
- "(%5u byte blocks,%5u bytes per update,%4u updates): ",
- i, speed[i].blen, speed[i].plen,
- speed[i].blen / speed[i].plen);
-
- if (secs) {
- ret = test_mb_ahash_jiffies(data, speed[i].blen, secs,
- num_mb);
- cond_resched();
- } else {
- ret = test_mb_ahash_cycles(data, speed[i].blen, num_mb);
- }
-
-
- if (ret) {
- pr_err("At least one hashing failed ret=%d\n", ret);
- break;
- }
- }
-
-out:
- ahash_request_free(data[0].req);
-
- for (k = 0; k < num_mb; ++k)
- testmgr_free_buf(data[k].xbuf);
-
- crypto_free_ahash(tfm);
-
-free_data:
- kfree(data);
-}
-
static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
char *out, int secs)
{
@@ -2584,36 +2383,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
test_ahash_speed("sm3", sec, generic_hash_speed_template);
if (mode > 400 && mode < 500) break;
fallthrough;
- case 450:
- test_mb_ahash_speed("sha1", sec, generic_hash_speed_template,
- num_mb);
- if (mode > 400 && mode < 500) break;
- fallthrough;
- case 451:
- test_mb_ahash_speed("sha256", sec, generic_hash_speed_template,
- num_mb);
- if (mode > 400 && mode < 500) break;
- fallthrough;
- case 452:
- test_mb_ahash_speed("sha512", sec, generic_hash_speed_template,
- num_mb);
- if (mode > 400 && mode < 500) break;
- fallthrough;
- case 453:
- test_mb_ahash_speed("sm3", sec, generic_hash_speed_template,
- num_mb);
- if (mode > 400 && mode < 500) break;
- fallthrough;
- case 454:
- test_mb_ahash_speed("streebog256", sec,
- generic_hash_speed_template, num_mb);
- if (mode > 400 && mode < 500) break;
- fallthrough;
- case 455:
- test_mb_ahash_speed("streebog512", sec,
- generic_hash_speed_template, num_mb);
- if (mode > 400 && mode < 500) break;
- fallthrough;
case 499:
break;
@@ -3099,5 +2868,5 @@ module_param(klen, uint, 0);
MODULE_PARM_DESC(klen, "Key length (defaults to 0)");
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Quick & dirty crypto testing module");
+MODULE_DESCRIPTION("Quick & dirty crypto benchmarking module");
MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index 96c843a24607..7f938ac93e58 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -1,8 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * Quick & dirty crypto testing module.
+ * Quick & dirty crypto benchmarking module.
*
- * This will only exist until we have a better testing mechanism
+ * This will only exist until we have a better benchmarking mechanism
* (e.g. a char device).
*
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
diff --git a/crypto/tea.c b/crypto/tea.c
index b315da8c89eb..cb05140e3470 100644
--- a/crypto/tea.c
+++ b/crypto/tea.c
@@ -255,7 +255,7 @@ MODULE_ALIAS_CRYPTO("tea");
MODULE_ALIAS_CRYPTO("xtea");
MODULE_ALIAS_CRYPTO("xeta");
-subsys_initcall(tea_mod_init);
+module_init(tea_mod_init);
module_exit(tea_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 82977ea25db3..72005074a5c2 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -43,22 +43,17 @@ MODULE_IMPORT_NS("CRYPTO_INTERNAL");
static bool notests;
module_param(notests, bool, 0644);
-MODULE_PARM_DESC(notests, "disable crypto self-tests");
+MODULE_PARM_DESC(notests, "disable all crypto self-tests");
-static bool panic_on_fail;
-module_param(panic_on_fail, bool, 0444);
-
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
-static bool noextratests;
-module_param(noextratests, bool, 0644);
-MODULE_PARM_DESC(noextratests, "disable expensive crypto self-tests");
+static bool noslowtests;
+module_param(noslowtests, bool, 0644);
+MODULE_PARM_DESC(noslowtests, "disable slow crypto self-tests");
static unsigned int fuzz_iterations = 100;
module_param(fuzz_iterations, uint, 0644);
MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations");
-#endif
-#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
+#ifndef CONFIG_CRYPTO_SELFTESTS
/* a perfect nop */
int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
@@ -324,10 +319,9 @@ struct testvec_config {
/*
* The following are the lists of testvec_configs to test for each algorithm
- * type when the basic crypto self-tests are enabled, i.e. when
- * CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is unset. They aim to provide good test
- * coverage, while keeping the test time much shorter than the full fuzz tests
- * so that the basic tests can be enabled in a wider range of circumstances.
+ * type when the fast crypto self-tests are enabled. They aim to provide good
+ * test coverage, while keeping the test time much shorter than the full tests
+ * so that the fast tests can be used to fulfill FIPS 140 testing requirements.
*/
/* Configs for skciphers and aeads */
@@ -876,8 +870,6 @@ static int prepare_keybuf(const u8 *key, unsigned int ksize,
err; \
})
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
-
/*
* The fuzz tests use prandom instead of the normal Linux RNG since they don't
* need cryptographically secure random numbers. This greatly improves the
@@ -1242,15 +1234,6 @@ too_long:
algname);
return -ENAMETOOLONG;
}
-#else /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
-static void crypto_disable_simd_for_test(void)
-{
-}
-
-static void crypto_reenable_simd_for_test(void)
-{
-}
-#endif /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
static int build_hash_sglist(struct test_sglist *tsgl,
const struct hash_testvec *vec,
@@ -1691,8 +1674,7 @@ static int test_hash_vec(const struct hash_testvec *vec, unsigned int vec_num,
return err;
}
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
- if (!noextratests) {
+ if (!noslowtests) {
struct rnd_state rng;
struct testvec_config cfg;
char cfgname[TESTVEC_CONFIG_NAMELEN];
@@ -1709,17 +1691,15 @@ static int test_hash_vec(const struct hash_testvec *vec, unsigned int vec_num,
cond_resched();
}
}
-#endif
return 0;
}
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
/*
* Generate a hash test vector from the given implementation.
* Assumes the buffers in 'vec' were already allocated.
*/
static void generate_random_hash_testvec(struct rnd_state *rng,
- struct shash_desc *desc,
+ struct ahash_request *req,
struct hash_testvec *vec,
unsigned int maxkeysize,
unsigned int maxdatasize,
@@ -1741,16 +1721,17 @@ static void generate_random_hash_testvec(struct rnd_state *rng,
vec->ksize = prandom_u32_inclusive(rng, 1, maxkeysize);
generate_random_bytes(rng, (u8 *)vec->key, vec->ksize);
- vec->setkey_error = crypto_shash_setkey(desc->tfm, vec->key,
- vec->ksize);
+ vec->setkey_error = crypto_ahash_setkey(
+ crypto_ahash_reqtfm(req), vec->key, vec->ksize);
/* If the key couldn't be set, no need to continue to digest. */
if (vec->setkey_error)
goto done;
}
/* Digest */
- vec->digest_error = crypto_shash_digest(desc, vec->plaintext,
- vec->psize, (u8 *)vec->digest);
+ vec->digest_error = crypto_hash_digest(
+ crypto_ahash_reqtfm(req), vec->plaintext,
+ vec->psize, (u8 *)vec->digest);
done:
snprintf(name, max_namelen, "\"random: psize=%u ksize=%u\"",
vec->psize, vec->ksize);
@@ -1775,8 +1756,8 @@ static int test_hash_vs_generic_impl(const char *generic_driver,
const char *driver = crypto_ahash_driver_name(tfm);
struct rnd_state rng;
char _generic_driver[CRYPTO_MAX_ALG_NAME];
- struct crypto_shash *generic_tfm = NULL;
- struct shash_desc *generic_desc = NULL;
+ struct ahash_request *generic_req = NULL;
+ struct crypto_ahash *generic_tfm = NULL;
unsigned int i;
struct hash_testvec vec = { 0 };
char vec_name[64];
@@ -1784,7 +1765,7 @@ static int test_hash_vs_generic_impl(const char *generic_driver,
char cfgname[TESTVEC_CONFIG_NAMELEN];
int err;
- if (noextratests)
+ if (noslowtests)
return 0;
init_rnd_state(&rng);
@@ -1799,7 +1780,7 @@ static int test_hash_vs_generic_impl(const char *generic_driver,
if (strcmp(generic_driver, driver) == 0) /* Already the generic impl? */
return 0;
- generic_tfm = crypto_alloc_shash(generic_driver, 0, 0);
+ generic_tfm = crypto_alloc_ahash(generic_driver, 0, 0);
if (IS_ERR(generic_tfm)) {
err = PTR_ERR(generic_tfm);
if (err == -ENOENT) {
@@ -1818,27 +1799,25 @@ static int test_hash_vs_generic_impl(const char *generic_driver,
goto out;
}
- generic_desc = kzalloc(sizeof(*desc) +
- crypto_shash_descsize(generic_tfm), GFP_KERNEL);
- if (!generic_desc) {
+ generic_req = ahash_request_alloc(generic_tfm, GFP_KERNEL);
+ if (!generic_req) {
err = -ENOMEM;
goto out;
}
- generic_desc->tfm = generic_tfm;
/* Check the algorithm properties for consistency. */
- if (digestsize != crypto_shash_digestsize(generic_tfm)) {
+ if (digestsize != crypto_ahash_digestsize(generic_tfm)) {
pr_err("alg: hash: digestsize for %s (%u) doesn't match generic impl (%u)\n",
driver, digestsize,
- crypto_shash_digestsize(generic_tfm));
+ crypto_ahash_digestsize(generic_tfm));
err = -EINVAL;
goto out;
}
- if (blocksize != crypto_shash_blocksize(generic_tfm)) {
+ if (blocksize != crypto_ahash_blocksize(generic_tfm)) {
pr_err("alg: hash: blocksize for %s (%u) doesn't match generic impl (%u)\n",
- driver, blocksize, crypto_shash_blocksize(generic_tfm));
+ driver, blocksize, crypto_ahash_blocksize(generic_tfm));
err = -EINVAL;
goto out;
}
@@ -1857,7 +1836,7 @@ static int test_hash_vs_generic_impl(const char *generic_driver,
}
for (i = 0; i < fuzz_iterations * 8; i++) {
- generate_random_hash_testvec(&rng, generic_desc, &vec,
+ generate_random_hash_testvec(&rng, generic_req, &vec,
maxkeysize, maxdatasize,
vec_name, sizeof(vec_name));
generate_random_testvec_config(&rng, cfg, cfgname,
@@ -1875,21 +1854,10 @@ out:
kfree(vec.key);
kfree(vec.plaintext);
kfree(vec.digest);
- crypto_free_shash(generic_tfm);
- kfree_sensitive(generic_desc);
+ ahash_request_free(generic_req);
+ crypto_free_ahash(generic_tfm);
return err;
}
-#else /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
-static int test_hash_vs_generic_impl(const char *generic_driver,
- unsigned int maxkeysize,
- struct ahash_request *req,
- struct shash_desc *desc,
- struct test_sglist *tsgl,
- u8 *hashstate)
-{
- return 0;
-}
-#endif /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
static int alloc_shash(const char *driver, u32 type, u32 mask,
struct crypto_shash **tfm_ret,
@@ -1900,7 +1868,7 @@ static int alloc_shash(const char *driver, u32 type, u32 mask,
tfm = crypto_alloc_shash(driver, type, mask);
if (IS_ERR(tfm)) {
- if (PTR_ERR(tfm) == -ENOENT) {
+ if (PTR_ERR(tfm) == -ENOENT || PTR_ERR(tfm) == -EEXIST) {
/*
* This algorithm is only available through the ahash
* API, not the shash API, so skip the shash tests.
@@ -2263,8 +2231,7 @@ static int test_aead_vec(int enc, const struct aead_testvec *vec,
return err;
}
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
- if (!noextratests) {
+ if (!noslowtests) {
struct rnd_state rng;
struct testvec_config cfg;
char cfgname[TESTVEC_CONFIG_NAMELEN];
@@ -2281,13 +2248,10 @@ static int test_aead_vec(int enc, const struct aead_testvec *vec,
cond_resched();
}
}
-#endif
return 0;
}
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
-
-struct aead_extra_tests_ctx {
+struct aead_slow_tests_ctx {
struct rnd_state rng;
struct aead_request *req;
struct crypto_aead *tfm;
@@ -2462,8 +2426,7 @@ static void generate_random_aead_testvec(struct rnd_state *rng,
vec->alen, vec->plen, authsize, vec->klen, vec->novrfy);
}
-static void try_to_generate_inauthentic_testvec(
- struct aead_extra_tests_ctx *ctx)
+static void try_to_generate_inauthentic_testvec(struct aead_slow_tests_ctx *ctx)
{
int i;
@@ -2482,7 +2445,7 @@ static void try_to_generate_inauthentic_testvec(
* Generate inauthentic test vectors (i.e. ciphertext, AAD pairs that aren't the
* result of an encryption with the key) and verify that decryption fails.
*/
-static int test_aead_inauthentic_inputs(struct aead_extra_tests_ctx *ctx)
+static int test_aead_inauthentic_inputs(struct aead_slow_tests_ctx *ctx)
{
unsigned int i;
int err;
@@ -2517,7 +2480,7 @@ static int test_aead_inauthentic_inputs(struct aead_extra_tests_ctx *ctx)
* Test the AEAD algorithm against the corresponding generic implementation, if
* one is available.
*/
-static int test_aead_vs_generic_impl(struct aead_extra_tests_ctx *ctx)
+static int test_aead_vs_generic_impl(struct aead_slow_tests_ctx *ctx)
{
struct crypto_aead *tfm = ctx->tfm;
const char *algname = crypto_aead_alg(tfm)->base.cra_name;
@@ -2621,15 +2584,15 @@ out:
return err;
}
-static int test_aead_extra(const struct alg_test_desc *test_desc,
- struct aead_request *req,
- struct cipher_test_sglists *tsgls)
+static int test_aead_slow(const struct alg_test_desc *test_desc,
+ struct aead_request *req,
+ struct cipher_test_sglists *tsgls)
{
- struct aead_extra_tests_ctx *ctx;
+ struct aead_slow_tests_ctx *ctx;
unsigned int i;
int err;
- if (noextratests)
+ if (noslowtests)
return 0;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -2671,14 +2634,6 @@ out:
kfree(ctx);
return err;
}
-#else /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
-static int test_aead_extra(const struct alg_test_desc *test_desc,
- struct aead_request *req,
- struct cipher_test_sglists *tsgls)
-{
- return 0;
-}
-#endif /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
static int test_aead(int enc, const struct aead_test_suite *suite,
struct aead_request *req,
@@ -2744,7 +2699,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver,
if (err)
goto out;
- err = test_aead_extra(desc, req, tsgls);
+ err = test_aead_slow(desc, req, tsgls);
out:
free_cipher_test_sglists(tsgls);
aead_request_free(req);
@@ -3018,8 +2973,7 @@ static int test_skcipher_vec(int enc, const struct cipher_testvec *vec,
return err;
}
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
- if (!noextratests) {
+ if (!noslowtests) {
struct rnd_state rng;
struct testvec_config cfg;
char cfgname[TESTVEC_CONFIG_NAMELEN];
@@ -3036,11 +2990,9 @@ static int test_skcipher_vec(int enc, const struct cipher_testvec *vec,
cond_resched();
}
}
-#endif
return 0;
}
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
/*
* Generate a symmetric cipher test vector from the given implementation.
* Assumes the buffers in 'vec' were already allocated.
@@ -3123,7 +3075,7 @@ static int test_skcipher_vs_generic_impl(const char *generic_driver,
char cfgname[TESTVEC_CONFIG_NAMELEN];
int err;
- if (noextratests)
+ if (noslowtests)
return 0;
init_rnd_state(&rng);
@@ -3239,14 +3191,6 @@ out:
skcipher_request_free(generic_req);
return err;
}
-#else /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
-static int test_skcipher_vs_generic_impl(const char *generic_driver,
- struct skcipher_request *req,
- struct cipher_test_sglists *tsgls)
-{
- return 0;
-}
-#endif /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
static int test_skcipher(int enc, const struct cipher_test_suite *suite,
struct skcipher_request *req,
@@ -5407,12 +5351,6 @@ static const struct alg_test_desc alg_test_descs[] = {
.test = alg_test_null,
.fips_allowed = 1,
}, {
- .alg = "poly1305",
- .test = alg_test_hash,
- .suite = {
- .hash = __VECS(poly1305_tv_template)
- }
- }, {
.alg = "polyval",
.test = alg_test_hash,
.suite = {
@@ -5769,9 +5707,8 @@ static void testmgr_onetime_init(void)
alg_check_test_descs_order();
alg_check_testvec_configs();
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
- pr_warn("alg: extra crypto tests enabled. This is intended for developer use only.\n");
-#endif
+ if (!noslowtests)
+ pr_warn("alg: full crypto tests enabled. This is intended for developer use only.\n");
}
static int alg_find_test(const char *alg)
@@ -5860,11 +5797,10 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
test_done:
if (rc) {
- if (fips_enabled || panic_on_fail) {
+ if (fips_enabled) {
fips_fail_notify();
- panic("alg: self-tests for %s (%s) failed in %s mode!\n",
- driver, alg,
- fips_enabled ? "fips" : "panic_on_fail");
+ panic("alg: self-tests for %s (%s) failed in fips mode!\n",
+ driver, alg);
}
pr_warn("alg: self-tests for %s using %s failed (rc=%d)",
alg, driver, rc);
@@ -5909,6 +5845,6 @@ non_fips_alg:
return alg_fips_disabled(driver, alg);
}
-#endif /* CONFIG_CRYPTO_MANAGER_DISABLE_TESTS */
+#endif /* CONFIG_CRYPTO_SELFTESTS */
EXPORT_SYMBOL_GPL(alg_test);
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index afc10af59b0a..32d099ac9e73 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -8836,294 +8836,6 @@ static const struct hash_testvec hmac_sha3_512_tv_template[] = {
},
};
-/*
- * Poly1305 test vectors from RFC7539 A.3.
- */
-
-static const struct hash_testvec poly1305_tv_template[] = {
- { /* Test Vector #1 */
- .plaintext = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .psize = 96,
- .digest = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- }, { /* Test Vector #2 */
- .plaintext = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70"
- "\xf0\xef\xca\x96\x22\x7a\x86\x3e"
- "\x41\x6e\x79\x20\x73\x75\x62\x6d"
- "\x69\x73\x73\x69\x6f\x6e\x20\x74"
- "\x6f\x20\x74\x68\x65\x20\x49\x45"
- "\x54\x46\x20\x69\x6e\x74\x65\x6e"
- "\x64\x65\x64\x20\x62\x79\x20\x74"
- "\x68\x65\x20\x43\x6f\x6e\x74\x72"
- "\x69\x62\x75\x74\x6f\x72\x20\x66"
- "\x6f\x72\x20\x70\x75\x62\x6c\x69"
- "\x63\x61\x74\x69\x6f\x6e\x20\x61"
- "\x73\x20\x61\x6c\x6c\x20\x6f\x72"
- "\x20\x70\x61\x72\x74\x20\x6f\x66"
- "\x20\x61\x6e\x20\x49\x45\x54\x46"
- "\x20\x49\x6e\x74\x65\x72\x6e\x65"
- "\x74\x2d\x44\x72\x61\x66\x74\x20"
- "\x6f\x72\x20\x52\x46\x43\x20\x61"
- "\x6e\x64\x20\x61\x6e\x79\x20\x73"
- "\x74\x61\x74\x65\x6d\x65\x6e\x74"
- "\x20\x6d\x61\x64\x65\x20\x77\x69"
- "\x74\x68\x69\x6e\x20\x74\x68\x65"
- "\x20\x63\x6f\x6e\x74\x65\x78\x74"
- "\x20\x6f\x66\x20\x61\x6e\x20\x49"
- "\x45\x54\x46\x20\x61\x63\x74\x69"
- "\x76\x69\x74\x79\x20\x69\x73\x20"
- "\x63\x6f\x6e\x73\x69\x64\x65\x72"
- "\x65\x64\x20\x61\x6e\x20\x22\x49"
- "\x45\x54\x46\x20\x43\x6f\x6e\x74"
- "\x72\x69\x62\x75\x74\x69\x6f\x6e"
- "\x22\x2e\x20\x53\x75\x63\x68\x20"
- "\x73\x74\x61\x74\x65\x6d\x65\x6e"
- "\x74\x73\x20\x69\x6e\x63\x6c\x75"
- "\x64\x65\x20\x6f\x72\x61\x6c\x20"
- "\x73\x74\x61\x74\x65\x6d\x65\x6e"
- "\x74\x73\x20\x69\x6e\x20\x49\x45"
- "\x54\x46\x20\x73\x65\x73\x73\x69"
- "\x6f\x6e\x73\x2c\x20\x61\x73\x20"
- "\x77\x65\x6c\x6c\x20\x61\x73\x20"
- "\x77\x72\x69\x74\x74\x65\x6e\x20"
- "\x61\x6e\x64\x20\x65\x6c\x65\x63"
- "\x74\x72\x6f\x6e\x69\x63\x20\x63"
- "\x6f\x6d\x6d\x75\x6e\x69\x63\x61"
- "\x74\x69\x6f\x6e\x73\x20\x6d\x61"
- "\x64\x65\x20\x61\x74\x20\x61\x6e"
- "\x79\x20\x74\x69\x6d\x65\x20\x6f"
- "\x72\x20\x70\x6c\x61\x63\x65\x2c"
- "\x20\x77\x68\x69\x63\x68\x20\x61"
- "\x72\x65\x20\x61\x64\x64\x72\x65"
- "\x73\x73\x65\x64\x20\x74\x6f",
- .psize = 407,
- .digest = "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70"
- "\xf0\xef\xca\x96\x22\x7a\x86\x3e",
- }, { /* Test Vector #3 */
- .plaintext = "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70"
- "\xf0\xef\xca\x96\x22\x7a\x86\x3e"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x41\x6e\x79\x20\x73\x75\x62\x6d"
- "\x69\x73\x73\x69\x6f\x6e\x20\x74"
- "\x6f\x20\x74\x68\x65\x20\x49\x45"
- "\x54\x46\x20\x69\x6e\x74\x65\x6e"
- "\x64\x65\x64\x20\x62\x79\x20\x74"
- "\x68\x65\x20\x43\x6f\x6e\x74\x72"
- "\x69\x62\x75\x74\x6f\x72\x20\x66"
- "\x6f\x72\x20\x70\x75\x62\x6c\x69"
- "\x63\x61\x74\x69\x6f\x6e\x20\x61"
- "\x73\x20\x61\x6c\x6c\x20\x6f\x72"
- "\x20\x70\x61\x72\x74\x20\x6f\x66"
- "\x20\x61\x6e\x20\x49\x45\x54\x46"
- "\x20\x49\x6e\x74\x65\x72\x6e\x65"
- "\x74\x2d\x44\x72\x61\x66\x74\x20"
- "\x6f\x72\x20\x52\x46\x43\x20\x61"
- "\x6e\x64\x20\x61\x6e\x79\x20\x73"
- "\x74\x61\x74\x65\x6d\x65\x6e\x74"
- "\x20\x6d\x61\x64\x65\x20\x77\x69"
- "\x74\x68\x69\x6e\x20\x74\x68\x65"
- "\x20\x63\x6f\x6e\x74\x65\x78\x74"
- "\x20\x6f\x66\x20\x61\x6e\x20\x49"
- "\x45\x54\x46\x20\x61\x63\x74\x69"
- "\x76\x69\x74\x79\x20\x69\x73\x20"
- "\x63\x6f\x6e\x73\x69\x64\x65\x72"
- "\x65\x64\x20\x61\x6e\x20\x22\x49"
- "\x45\x54\x46\x20\x43\x6f\x6e\x74"
- "\x72\x69\x62\x75\x74\x69\x6f\x6e"
- "\x22\x2e\x20\x53\x75\x63\x68\x20"
- "\x73\x74\x61\x74\x65\x6d\x65\x6e"
- "\x74\x73\x20\x69\x6e\x63\x6c\x75"
- "\x64\x65\x20\x6f\x72\x61\x6c\x20"
- "\x73\x74\x61\x74\x65\x6d\x65\x6e"
- "\x74\x73\x20\x69\x6e\x20\x49\x45"
- "\x54\x46\x20\x73\x65\x73\x73\x69"
- "\x6f\x6e\x73\x2c\x20\x61\x73\x20"
- "\x77\x65\x6c\x6c\x20\x61\x73\x20"
- "\x77\x72\x69\x74\x74\x65\x6e\x20"
- "\x61\x6e\x64\x20\x65\x6c\x65\x63"
- "\x74\x72\x6f\x6e\x69\x63\x20\x63"
- "\x6f\x6d\x6d\x75\x6e\x69\x63\x61"
- "\x74\x69\x6f\x6e\x73\x20\x6d\x61"
- "\x64\x65\x20\x61\x74\x20\x61\x6e"
- "\x79\x20\x74\x69\x6d\x65\x20\x6f"
- "\x72\x20\x70\x6c\x61\x63\x65\x2c"
- "\x20\x77\x68\x69\x63\x68\x20\x61"
- "\x72\x65\x20\x61\x64\x64\x72\x65"
- "\x73\x73\x65\x64\x20\x74\x6f",
- .psize = 407,
- .digest = "\xf3\x47\x7e\x7c\xd9\x54\x17\xaf"
- "\x89\xa6\xb8\x79\x4c\x31\x0c\xf0",
- }, { /* Test Vector #4 */
- .plaintext = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
- "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
- "\x47\x39\x17\xc1\x40\x2b\x80\x09"
- "\x9d\xca\x5c\xbc\x20\x70\x75\xc0"
- "\x27\x54\x77\x61\x73\x20\x62\x72"
- "\x69\x6c\x6c\x69\x67\x2c\x20\x61"
- "\x6e\x64\x20\x74\x68\x65\x20\x73"
- "\x6c\x69\x74\x68\x79\x20\x74\x6f"
- "\x76\x65\x73\x0a\x44\x69\x64\x20"
- "\x67\x79\x72\x65\x20\x61\x6e\x64"
- "\x20\x67\x69\x6d\x62\x6c\x65\x20"
- "\x69\x6e\x20\x74\x68\x65\x20\x77"
- "\x61\x62\x65\x3a\x0a\x41\x6c\x6c"
- "\x20\x6d\x69\x6d\x73\x79\x20\x77"
- "\x65\x72\x65\x20\x74\x68\x65\x20"
- "\x62\x6f\x72\x6f\x67\x6f\x76\x65"
- "\x73\x2c\x0a\x41\x6e\x64\x20\x74"
- "\x68\x65\x20\x6d\x6f\x6d\x65\x20"
- "\x72\x61\x74\x68\x73\x20\x6f\x75"
- "\x74\x67\x72\x61\x62\x65\x2e",
- .psize = 159,
- .digest = "\x45\x41\x66\x9a\x7e\xaa\xee\x61"
- "\xe7\x08\xdc\x7c\xbc\xc5\xeb\x62",
- }, { /* Test Vector #5 */
- .plaintext = "\x02\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff",
- .psize = 48,
- .digest = "\x03\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- }, { /* Test Vector #6 */
- .plaintext = "\x02\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\x02\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .psize = 48,
- .digest = "\x03\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- }, { /* Test Vector #7 */
- .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xf0\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\x11\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .psize = 80,
- .digest = "\x05\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- }, { /* Test Vector #8 */
- .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xfb\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
- "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
- "\x01\x01\x01\x01\x01\x01\x01\x01"
- "\x01\x01\x01\x01\x01\x01\x01\x01",
- .psize = 80,
- .digest = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- }, { /* Test Vector #9 */
- .plaintext = "\x02\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\xfd\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff",
- .psize = 48,
- .digest = "\xfa\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff",
- }, { /* Test Vector #10 */
- .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00"
- "\x04\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\xe3\x35\x94\xd7\x50\x5e\x43\xb9"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x33\x94\xd7\x50\x5e\x43\x79\xcd"
- "\x01\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x01\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .psize = 96,
- .digest = "\x14\x00\x00\x00\x00\x00\x00\x00"
- "\x55\x00\x00\x00\x00\x00\x00\x00",
- }, { /* Test Vector #11 */
- .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00"
- "\x04\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\xe3\x35\x94\xd7\x50\x5e\x43\xb9"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x33\x94\xd7\x50\x5e\x43\x79\xcd"
- "\x01\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .psize = 80,
- .digest = "\x13\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- }, { /* Regression test for overflow in AVX2 implementation */
- .plaintext = "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff",
- .psize = 300,
- .digest = "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8"
- "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1",
- }
-};
-
/* NHPoly1305 test vectors from https://github.com/google/adiantum */
static const struct hash_testvec nhpoly1305_tv_template[] = {
{
diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c
index 19f2b365e140..368018cfa9bf 100644
--- a/crypto/twofish_generic.c
+++ b/crypto/twofish_generic.c
@@ -187,7 +187,7 @@ static void __exit twofish_mod_fini(void)
crypto_unregister_alg(&alg);
}
-subsys_initcall(twofish_mod_init);
+module_init(twofish_mod_init);
module_exit(twofish_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/wp512.c b/crypto/wp512.c
index 07994e5ebf4e..41f13d490333 100644
--- a/crypto/wp512.c
+++ b/crypto/wp512.c
@@ -1169,7 +1169,7 @@ MODULE_ALIAS_CRYPTO("wp512");
MODULE_ALIAS_CRYPTO("wp384");
MODULE_ALIAS_CRYPTO("wp256");
-subsys_initcall(wp512_mod_init);
+module_init(wp512_mod_init);
module_exit(wp512_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/crypto/xcbc.c b/crypto/xcbc.c
index fc785667b134..6c5f6766fdd6 100644
--- a/crypto/xcbc.c
+++ b/crypto/xcbc.c
@@ -8,9 +8,12 @@
#include <crypto/internal/cipher.h>
#include <crypto/internal/hash.h>
+#include <crypto/utils.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
static u_int32_t ks[12] = {0x01010101, 0x01010101, 0x01010101, 0x01010101,
0x02020202, 0x02020202, 0x02020202, 0x02020202,
@@ -30,22 +33,6 @@ struct xcbc_tfm_ctx {
u8 consts[];
};
-/*
- * +------------------------
- * | <shash desc>
- * +------------------------
- * | xcbc_desc_ctx
- * +------------------------
- * | odds (block size)
- * +------------------------
- * | prev (block size)
- * +------------------------
- */
-struct xcbc_desc_ctx {
- unsigned int len;
- u8 odds[];
-};
-
#define XCBC_BLOCKSIZE 16
static int crypto_xcbc_digest_setkey(struct crypto_shash *parent,
@@ -70,13 +57,10 @@ static int crypto_xcbc_digest_setkey(struct crypto_shash *parent,
static int crypto_xcbc_digest_init(struct shash_desc *pdesc)
{
- struct xcbc_desc_ctx *ctx = shash_desc_ctx(pdesc);
int bs = crypto_shash_blocksize(pdesc->tfm);
- u8 *prev = &ctx->odds[bs];
+ u8 *prev = shash_desc_ctx(pdesc);
- ctx->len = 0;
memset(prev, 0, bs);
-
return 0;
}
@@ -85,77 +69,36 @@ static int crypto_xcbc_digest_update(struct shash_desc *pdesc, const u8 *p,
{
struct crypto_shash *parent = pdesc->tfm;
struct xcbc_tfm_ctx *tctx = crypto_shash_ctx(parent);
- struct xcbc_desc_ctx *ctx = shash_desc_ctx(pdesc);
struct crypto_cipher *tfm = tctx->child;
int bs = crypto_shash_blocksize(parent);
- u8 *odds = ctx->odds;
- u8 *prev = odds + bs;
-
- /* checking the data can fill the block */
- if ((ctx->len + len) <= bs) {
- memcpy(odds + ctx->len, p, len);
- ctx->len += len;
- return 0;
- }
-
- /* filling odds with new data and encrypting it */
- memcpy(odds + ctx->len, p, bs - ctx->len);
- len -= bs - ctx->len;
- p += bs - ctx->len;
-
- crypto_xor(prev, odds, bs);
- crypto_cipher_encrypt_one(tfm, prev, prev);
+ u8 *prev = shash_desc_ctx(pdesc);
- /* clearing the length */
- ctx->len = 0;
-
- /* encrypting the rest of data */
- while (len > bs) {
+ do {
crypto_xor(prev, p, bs);
crypto_cipher_encrypt_one(tfm, prev, prev);
p += bs;
len -= bs;
- }
-
- /* keeping the surplus of blocksize */
- if (len) {
- memcpy(odds, p, len);
- ctx->len = len;
- }
-
- return 0;
+ } while (len >= bs);
+ return len;
}
-static int crypto_xcbc_digest_final(struct shash_desc *pdesc, u8 *out)
+static int crypto_xcbc_digest_finup(struct shash_desc *pdesc, const u8 *src,
+ unsigned int len, u8 *out)
{
struct crypto_shash *parent = pdesc->tfm;
struct xcbc_tfm_ctx *tctx = crypto_shash_ctx(parent);
- struct xcbc_desc_ctx *ctx = shash_desc_ctx(pdesc);
struct crypto_cipher *tfm = tctx->child;
int bs = crypto_shash_blocksize(parent);
- u8 *odds = ctx->odds;
- u8 *prev = odds + bs;
+ u8 *prev = shash_desc_ctx(pdesc);
unsigned int offset = 0;
- if (ctx->len != bs) {
- unsigned int rlen;
- u8 *p = odds + ctx->len;
-
- *p = 0x80;
- p++;
-
- rlen = bs - ctx->len -1;
- if (rlen)
- memset(p, 0, rlen);
-
+ crypto_xor(prev, src, len);
+ if (len != bs) {
+ prev[len] ^= 0x80;
offset += bs;
}
-
- crypto_xor(prev, odds, bs);
crypto_xor(prev, &tctx->consts[offset], bs);
-
crypto_cipher_encrypt_one(tfm, out, prev);
-
return 0;
}
@@ -216,17 +159,18 @@ static int xcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.base.cra_blocksize = alg->cra_blocksize;
inst->alg.base.cra_ctxsize = sizeof(struct xcbc_tfm_ctx) +
alg->cra_blocksize * 2;
+ inst->alg.base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINAL_NONZERO;
inst->alg.digestsize = alg->cra_blocksize;
- inst->alg.descsize = sizeof(struct xcbc_desc_ctx) +
- alg->cra_blocksize * 2;
+ inst->alg.descsize = alg->cra_blocksize;
inst->alg.base.cra_init = xcbc_init_tfm;
inst->alg.base.cra_exit = xcbc_exit_tfm;
inst->alg.init = crypto_xcbc_digest_init;
inst->alg.update = crypto_xcbc_digest_update;
- inst->alg.final = crypto_xcbc_digest_final;
+ inst->alg.finup = crypto_xcbc_digest_finup;
inst->alg.setkey = crypto_xcbc_digest_setkey;
inst->free = shash_free_singlespawn_instance;
@@ -255,7 +199,7 @@ static void __exit crypto_xcbc_module_exit(void)
crypto_unregister_template(&crypto_xcbc_tmpl);
}
-subsys_initcall(crypto_xcbc_module_init);
+module_init(crypto_xcbc_module_init);
module_exit(crypto_xcbc_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/xctr.c b/crypto/xctr.c
index 9c536ab6d2e5..607ab82cb19b 100644
--- a/crypto/xctr.c
+++ b/crypto/xctr.c
@@ -182,7 +182,7 @@ static void __exit crypto_xctr_module_exit(void)
crypto_unregister_template(&crypto_xctr_tmpl);
}
-subsys_initcall(crypto_xctr_module_init);
+module_init(crypto_xctr_module_init);
module_exit(crypto_xctr_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/xts.c b/crypto/xts.c
index 31529c9ef08f..3da8f5e053d6 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -363,7 +363,7 @@ static int xts_create(struct crypto_template *tmpl, struct rtattr **tb)
err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst),
cipher_name, 0, mask);
- if (err == -ENOENT) {
+ if (err == -ENOENT && memcmp(cipher_name, "ecb(", 4)) {
err = -ENAMETOOLONG;
if (snprintf(name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
cipher_name) >= CRYPTO_MAX_ALG_NAME)
@@ -397,7 +397,7 @@ static int xts_create(struct crypto_template *tmpl, struct rtattr **tb)
/* Alas we screwed up the naming so we have to mangle the
* cipher name.
*/
- if (!strncmp(cipher_name, "ecb(", 4)) {
+ if (!memcmp(cipher_name, "ecb(", 4)) {
int len;
len = strscpy(name, cipher_name + 4, sizeof(name));
@@ -466,7 +466,7 @@ static void __exit xts_module_exit(void)
crypto_unregister_template(&xts_tmpl);
}
-subsys_initcall(xts_module_init);
+module_init(xts_module_init);
module_exit(xts_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/xxhash_generic.c b/crypto/xxhash_generic.c
index ac206ad4184d..175bb7ae0fcd 100644
--- a/crypto/xxhash_generic.c
+++ b/crypto/xxhash_generic.c
@@ -96,7 +96,7 @@ static void __exit xxhash_mod_fini(void)
crypto_unregister_shash(&alg);
}
-subsys_initcall(xxhash_mod_init);
+module_init(xxhash_mod_init);
module_exit(xxhash_mod_fini);
MODULE_AUTHOR("Nikolay Borisov <nborisov@suse.com>");
diff --git a/crypto/zstd.c b/crypto/zstd.c
index 90bb4f36f846..7570e11b4ee6 100644
--- a/crypto/zstd.c
+++ b/crypto/zstd.c
@@ -196,7 +196,7 @@ static void __exit zstd_mod_fini(void)
crypto_unregister_scomp(&scomp);
}
-subsys_initcall(zstd_mod_init);
+module_init(zstd_mod_init);
module_exit(zstd_mod_fini);
MODULE_LICENSE("GPL");
diff --git a/drivers/accel/habanalabs/Kconfig b/drivers/accel/habanalabs/Kconfig
index be85336107f9..1919fbb169c7 100644
--- a/drivers/accel/habanalabs/Kconfig
+++ b/drivers/accel/habanalabs/Kconfig
@@ -6,7 +6,7 @@
config DRM_ACCEL_HABANALABS
tristate "HabanaLabs AI accelerators"
depends on DRM_ACCEL
- depends on X86_64
+ depends on X86 && X86_64
depends on PCI && HAS_IOMEM
select GENERIC_ALLOCATOR
select HWMON
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 8729a0c57d78..dc80ca921d90 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -17,8 +17,6 @@
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
-#include <asm/msr.h>
-
/* make sure there is space for all the signed info */
static_assert(sizeof(struct cpucp_info) <= SEC_DEV_INFO_BUF_SZ);
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index f0dad0c9ce33..cd24ccd20ba6 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -455,7 +455,7 @@ priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t
if (ret < 0)
return ret;
- buf[size] = '\0';
+ buf[ret] = '\0';
ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period,
&process_quantum);
if (ret != 4)
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 7f10aa38269d..7bc40c2735ac 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -576,6 +576,9 @@ config ACPI_FFH
Enable this feature if you want to set up and install the FFH Address
Space handler to handle FFH OpRegion in the firmware.
+config ACPI_MRRM
+ bool
+
source "drivers/acpi/pmic/Kconfig"
config ACPI_VIOT
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 797070fc9a3f..d1b0affb844f 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -66,6 +66,7 @@ acpi-$(CONFIG_ACPI_WATCHDOG) += acpi_watchdog.o
acpi-$(CONFIG_ACPI_PRMT) += prmt.o
acpi-$(CONFIG_ACPI_PCC) += acpi_pcc.o
acpi-$(CONFIG_ACPI_FFH) += acpi_ffh.o
+acpi-$(CONFIG_ACPI_MRRM) += acpi_mrrm.o
# Address translation
acpi-$(CONFIG_ACPI_ADXL) += acpi_adxl.o
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index f7fb7205028d..f6b9562779de 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -15,6 +15,7 @@
#include <acpi/ghes.h>
#include <asm/cpu.h>
#include <asm/mce.h>
+#include <asm/msr.h>
#include "apei/apei-internal.h"
#include <ras/ras_event.h>
@@ -234,7 +235,7 @@ static int __init extlog_init(void)
u64 cap;
int rc;
- if (rdmsrl_safe(MSR_IA32_MCG_CAP, &cap) ||
+ if (rdmsrq_safe(MSR_IA32_MCG_CAP, &cap) ||
!(cap & MCG_ELOG_P) ||
!extlog_get_l1addr())
return -ENODEV;
diff --git a/drivers/acpi/acpi_lpit.c b/drivers/acpi/acpi_lpit.c
index 794962c5c88e..b8d98b1b48ae 100644
--- a/drivers/acpi/acpi_lpit.c
+++ b/drivers/acpi/acpi_lpit.c
@@ -39,7 +39,7 @@ static int lpit_read_residency_counter_us(u64 *counter, bool io_mem)
return 0;
}
- err = rdmsrl_safe(residency_info_ffh.gaddr.address, counter);
+ err = rdmsrq_safe(residency_info_ffh.gaddr.address, counter);
if (!err) {
u64 mask = GENMASK_ULL(residency_info_ffh.gaddr.bit_offset +
residency_info_ffh.gaddr. bit_width - 1,
diff --git a/drivers/acpi/acpi_mrrm.c b/drivers/acpi/acpi_mrrm.c
new file mode 100644
index 000000000000..26c1a4e6b6ec
--- /dev/null
+++ b/drivers/acpi/acpi_mrrm.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025, Intel Corporation.
+ *
+ * Memory Range and Region Mapping (MRRM) structure
+ *
+ * Parse and report the platform's MRRM table in /sys.
+ */
+
+#define pr_fmt(fmt) "acpi/mrrm: " fmt
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+
+/* Default assume one memory region covering all system memory, per the spec */
+static int max_mem_region = 1;
+
+/* Access for use by resctrl file system */
+int acpi_mrrm_max_mem_region(void)
+{
+ return max_mem_region;
+}
+
+struct mrrm_mem_range_entry {
+ u64 base;
+ u64 length;
+ int node;
+ u8 local_region_id;
+ u8 remote_region_id;
+};
+
+static struct mrrm_mem_range_entry *mrrm_mem_range_entry;
+static u32 mrrm_mem_entry_num;
+
+static int get_node_num(struct mrrm_mem_range_entry *e)
+{
+ unsigned int nid;
+
+ for_each_online_node(nid) {
+ for (int z = 0; z < MAX_NR_ZONES; z++) {
+ struct zone *zone = NODE_DATA(nid)->node_zones + z;
+
+ if (!populated_zone(zone))
+ continue;
+ if (zone_intersects(zone, PHYS_PFN(e->base), PHYS_PFN(e->length)))
+ return zone_to_nid(zone);
+ }
+ }
+
+ return -ENOENT;
+}
+
+static __init int acpi_parse_mrrm(struct acpi_table_header *table)
+{
+ struct acpi_mrrm_mem_range_entry *mre_entry;
+ struct acpi_table_mrrm *mrrm;
+ void *mre, *mrrm_end;
+ int mre_count = 0;
+
+ mrrm = (struct acpi_table_mrrm *)table;
+ if (!mrrm)
+ return -ENODEV;
+
+ if (mrrm->flags & ACPI_MRRM_FLAGS_REGION_ASSIGNMENT_OS)
+ return -EOPNOTSUPP;
+
+ mrrm_end = (void *)mrrm + mrrm->header.length - 1;
+ mre = (void *)mrrm + sizeof(struct acpi_table_mrrm);
+ while (mre < mrrm_end) {
+ mre_entry = mre;
+ mre_count++;
+ mre += mre_entry->header.length;
+ }
+ if (!mre_count) {
+ pr_info(FW_BUG "No ranges listed in MRRM table\n");
+ return -EINVAL;
+ }
+
+ mrrm_mem_range_entry = kmalloc_array(mre_count, sizeof(*mrrm_mem_range_entry),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!mrrm_mem_range_entry)
+ return -ENOMEM;
+
+ mre = (void *)mrrm + sizeof(struct acpi_table_mrrm);
+ while (mre < mrrm_end) {
+ struct mrrm_mem_range_entry *e;
+
+ mre_entry = mre;
+ e = mrrm_mem_range_entry + mrrm_mem_entry_num;
+
+ e->base = mre_entry->addr_base;
+ e->length = mre_entry->addr_len;
+ e->node = get_node_num(e);
+
+ if (mre_entry->region_id_flags & ACPI_MRRM_VALID_REGION_ID_FLAGS_LOCAL)
+ e->local_region_id = mre_entry->local_region_id;
+ else
+ e->local_region_id = -1;
+ if (mre_entry->region_id_flags & ACPI_MRRM_VALID_REGION_ID_FLAGS_REMOTE)
+ e->remote_region_id = mre_entry->remote_region_id;
+ else
+ e->remote_region_id = -1;
+
+ mrrm_mem_entry_num++;
+ mre += mre_entry->header.length;
+ }
+
+ max_mem_region = mrrm->max_mem_region;
+
+ return 0;
+}
+
+#define RANGE_ATTR(name, fmt) \
+static ssize_t name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buf) \
+{ \
+ struct mrrm_mem_range_entry *mre; \
+ const char *kname = kobject_name(kobj); \
+ int n, ret; \
+ \
+ ret = kstrtoint(kname + 5, 10, &n); \
+ if (ret) \
+ return ret; \
+ \
+ mre = mrrm_mem_range_entry + n; \
+ \
+ return sysfs_emit(buf, fmt, mre->name); \
+} \
+static struct kobj_attribute name##_attr = __ATTR_RO(name)
+
+RANGE_ATTR(base, "0x%llx\n");
+RANGE_ATTR(length, "0x%llx\n");
+RANGE_ATTR(node, "%d\n");
+RANGE_ATTR(local_region_id, "%d\n");
+RANGE_ATTR(remote_region_id, "%d\n");
+
+static struct attribute *memory_range_attrs[] = {
+ &base_attr.attr,
+ &length_attr.attr,
+ &node_attr.attr,
+ &local_region_id_attr.attr,
+ &remote_region_id_attr.attr,
+ NULL
+};
+
+ATTRIBUTE_GROUPS(memory_range);
+
+static __init int add_boot_memory_ranges(void)
+{
+ struct kobject *pkobj, *kobj;
+ int ret = -EINVAL;
+ char *name;
+
+ pkobj = kobject_create_and_add("memory_ranges", acpi_kobj);
+
+ for (int i = 0; i < mrrm_mem_entry_num; i++) {
+ name = kasprintf(GFP_KERNEL, "range%d", i);
+ if (!name)
+ break;
+
+ kobj = kobject_create_and_add(name, pkobj);
+
+ ret = sysfs_create_groups(kobj, memory_range_groups);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static __init int mrrm_init(void)
+{
+ int ret;
+
+ ret = acpi_table_parse(ACPI_SIG_MRRM, acpi_parse_mrrm);
+ if (ret < 0)
+ return ret;
+
+ return add_boot_memory_ranges();
+}
+device_initcall(mrrm_init);
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index 3fde4496f8a2..6f8bbe1247a5 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -19,7 +19,7 @@
#include <linux/acpi.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/mwait.h>
#include <xen/xen.h>
diff --git a/drivers/acpi/acpi_pcc.c b/drivers/acpi/acpi_pcc.c
index 07a034a53aca..97064e943768 100644
--- a/drivers/acpi/acpi_pcc.c
+++ b/drivers/acpi/acpi_pcc.c
@@ -31,7 +31,6 @@
struct pcc_data {
struct pcc_mbox_chan *pcc_chan;
- void __iomem *pcc_comm_addr;
struct completion done;
struct mbox_client cl;
struct acpi_pcc_info ctx;
@@ -81,14 +80,6 @@ acpi_pcc_address_space_setup(acpi_handle region_handle, u32 function,
ret = AE_SUPPORT;
goto err_free_channel;
}
- data->pcc_comm_addr = acpi_os_ioremap(pcc_chan->shmem_base_addr,
- pcc_chan->shmem_size);
- if (!data->pcc_comm_addr) {
- pr_err("Failed to ioremap PCC comm region mem for %d\n",
- ctx->subspace_id);
- ret = AE_NO_MEMORY;
- goto err_free_channel;
- }
*region_context = data;
return AE_OK;
@@ -113,7 +104,7 @@ acpi_pcc_address_space_handler(u32 function, acpi_physical_address addr,
reinit_completion(&data->done);
/* Write to Shared Memory */
- memcpy_toio(data->pcc_comm_addr, (void *)value, data->ctx.length);
+ memcpy_toio(data->pcc_chan->shmem, (void *)value, data->ctx.length);
ret = mbox_send_message(data->pcc_chan->mchan, NULL);
if (ret < 0)
@@ -134,7 +125,7 @@ acpi_pcc_address_space_handler(u32 function, acpi_physical_address addr,
mbox_chan_txdone(data->pcc_chan->mchan, ret);
- memcpy_fromio(value, data->pcc_comm_addr, data->ctx.length);
+ memcpy_fromio(value, data->pcc_chan->shmem, data->ctx.length);
return AE_OK;
}
diff --git a/drivers/acpi/acpica/acapps.h b/drivers/acpi/acpica/acapps.h
index 9d4cbd956627..d7d4649ce66f 100644
--- a/drivers/acpi/acpica/acapps.h
+++ b/drivers/acpi/acpica/acapps.h
@@ -3,7 +3,7 @@
*
* Module Name: acapps - common include for ACPI applications/tools
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -17,7 +17,7 @@
/* Common info for tool signons */
#define ACPICA_NAME "Intel ACPI Component Architecture"
-#define ACPICA_COPYRIGHT "Copyright (c) 2000 - 2022 Intel Corporation"
+#define ACPICA_COPYRIGHT "Copyright (c) 2000 - 2025 Intel Corporation"
#if ACPI_MACHINE_WIDTH == 64
#define ACPI_WIDTH " (64-bit version)"
diff --git a/drivers/acpi/acpica/accommon.h b/drivers/acpi/acpica/accommon.h
index 4536dc9d3979..662231f4f881 100644
--- a/drivers/acpi/acpica/accommon.h
+++ b/drivers/acpi/acpica/accommon.h
@@ -3,7 +3,7 @@
*
* Name: accommon.h - Common include files for generation of ACPICA source
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acconvert.h b/drivers/acpi/acpica/acconvert.h
index c6ba6a36cfb5..24998f2d7539 100644
--- a/drivers/acpi/acpica/acconvert.h
+++ b/drivers/acpi/acpica/acconvert.h
@@ -3,7 +3,7 @@
*
* Module Name: acapps - common include for ACPI applications/tools
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h
index 911875c5a5f1..fe6d38b43c9a 100644
--- a/drivers/acpi/acpica/acdebug.h
+++ b/drivers/acpi/acpica/acdebug.h
@@ -3,7 +3,7 @@
*
* Name: acdebug.h - ACPI/AML debugger
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -37,7 +37,7 @@ struct acpi_db_argument_info {
struct acpi_db_execute_walk {
u32 count;
u32 max_count;
- char name_seg[ACPI_NAMESEG_SIZE + 1];
+ char name_seg[ACPI_NAMESEG_SIZE + 1] ACPI_NONSTRING;
};
#define PARAM_LIST(pl) pl
diff --git a/drivers/acpi/acpica/acdispat.h b/drivers/acpi/acpica/acdispat.h
index 73eecbf62f06..5d48a344b35f 100644
--- a/drivers/acpi/acpica/acdispat.h
+++ b/drivers/acpi/acpica/acdispat.h
@@ -3,7 +3,7 @@
*
* Name: acdispat.h - dispatcher (parser to interpreter interface)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h
index 1c5218b79fc2..b40fb3a5ac8a 100644
--- a/drivers/acpi/acpica/acevents.h
+++ b/drivers/acpi/acpica/acevents.h
@@ -3,7 +3,7 @@
*
* Name: acevents.h - Event subcomponent prototypes and defines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 309ce8efb4f6..c8a750d2674c 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -3,7 +3,7 @@
*
* Name: acglobal.h - Declarations for global variables
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index b8543a34caea..6aec56c65fa0 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -3,7 +3,7 @@
*
* Name: achware.h -- hardware specific interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h
index 955114c926bd..1ee6ac9b2baf 100644
--- a/drivers/acpi/acpica/acinterp.h
+++ b/drivers/acpi/acpica/acinterp.h
@@ -3,7 +3,7 @@
*
* Name: acinterp.h - Interpreter subcomponent prototypes and defines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -120,6 +120,9 @@ void
acpi_ex_trace_point(acpi_trace_event_type type,
u8 begin, u8 *aml, char *pathname);
+void
+acpi_ex_trace_args(union acpi_operand_object **params, u32 count);
+
/*
* exfield - ACPI AML (p-code) execution - field manipulation
*/
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 6481c48c22bb..0c41f0097e8d 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -3,7 +3,7 @@
*
* Name: aclocal.h - Internal data types used across the ACPI subsystem
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -293,7 +293,7 @@ acpi_status (*acpi_internal_method) (struct acpi_walk_state * walk_state);
* expected_return_btypes - Allowed type(s) for the return value
*/
struct acpi_name_info {
- char name[ACPI_NAMESEG_SIZE] __nonstring;
+ char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING;
u16 argument_list;
u8 expected_btypes;
};
@@ -370,7 +370,7 @@ typedef acpi_status (*acpi_object_converter) (struct acpi_namespace_node *
converted_object);
struct acpi_simple_repair_info {
- char name[ACPI_NAMESEG_SIZE] __nonstring;
+ char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING;
u32 unexpected_btypes;
u32 package_index;
acpi_object_converter object_converter;
diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h
index de83dd22292b..4e9402c02410 100644
--- a/drivers/acpi/acpica/acmacros.h
+++ b/drivers/acpi/acpica/acmacros.h
@@ -3,7 +3,7 @@
*
* Name: acmacros.h - C macros for the entire subsystem.
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 9448bc026b9b..13f050fecb49 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -3,7 +3,7 @@
*
* Name: acnamesp.h - Namespace subcomponent prototypes and defines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h
index 8fc02946d3cd..6ffcc7a0a0c2 100644
--- a/drivers/acpi/acpica/acobject.h
+++ b/drivers/acpi/acpica/acobject.h
@@ -3,7 +3,7 @@
*
* Name: acobject.h - Definition of union acpi_operand_object (Internal object only)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acopcode.h b/drivers/acpi/acpica/acopcode.h
index da96d80e6b3a..a2a9e51d7ac6 100644
--- a/drivers/acpi/acpica/acopcode.h
+++ b/drivers/acpi/acpica/acopcode.h
@@ -3,7 +3,7 @@
*
* Name: acopcode.h - AML opcode information for the AML parser and interpreter
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acparser.h b/drivers/acpi/acpica/acparser.h
index 6dad786a382c..65a15dee092b 100644
--- a/drivers/acpi/acpica/acparser.h
+++ b/drivers/acpi/acpica/acparser.h
@@ -3,7 +3,7 @@
*
* Module Name: acparser.h - AML Parser subcomponent prototypes and defines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index ef068f4c864a..76c5ed02e916 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -3,7 +3,7 @@
*
* Name: acpredef - Information table for ACPI predefined methods and objects
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acresrc.h b/drivers/acpi/acpica/acresrc.h
index d772ff9ca07d..e8a92be5adae 100644
--- a/drivers/acpi/acpica/acresrc.h
+++ b/drivers/acpi/acpica/acresrc.h
@@ -3,7 +3,7 @@
*
* Name: acresrc.h - Resource Manager function prototypes
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acstruct.h b/drivers/acpi/acpica/acstruct.h
index f8fee94ba708..e690f604cfa0 100644
--- a/drivers/acpi/acpica/acstruct.h
+++ b/drivers/acpi/acpica/acstruct.h
@@ -3,7 +3,7 @@
*
* Name: acstruct.h - Internal structs
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/actables.h b/drivers/acpi/acpica/actables.h
index b6ae979b01b6..ebef72bf58d0 100644
--- a/drivers/acpi/acpica/actables.h
+++ b/drivers/acpi/acpica/actables.h
@@ -3,7 +3,7 @@
*
* Name: actables.h - ACPI table management
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index edfdbbef81c1..3990d509bbab 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -3,7 +3,7 @@
*
* Name: acutils.h -- prototypes for the common (subsystem-wide) procedures
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h
index effe52b40dce..c5b544a006c5 100644
--- a/drivers/acpi/acpica/amlcode.h
+++ b/drivers/acpi/acpica/amlcode.h
@@ -5,7 +5,7 @@
* Declarations and definitions contained herein are derived
* directly from the ACPI specification.
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/amlresrc.h b/drivers/acpi/acpica/amlresrc.h
index 4e88f9fc2a28..54d6e51e0b9a 100644
--- a/drivers/acpi/acpica/amlresrc.h
+++ b/drivers/acpi/acpica/amlresrc.h
@@ -3,7 +3,7 @@
*
* Module Name: amlresrc.h - AML resource descriptors
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -504,10 +504,6 @@ struct aml_resource_pin_group_config {
#define AML_RESOURCE_PIN_GROUP_CONFIG_REVISION 1 /* ACPI 6.2 */
-/* restore default alignment */
-
-#pragma pack()
-
/* Union of all resource descriptors, so we can allocate the worst case */
union aml_resource {
@@ -562,6 +558,10 @@ union aml_resource {
u8 byte_item;
};
+/* restore default alignment */
+
+#pragma pack()
+
/* Interfaces used by both the disassembler and compiler */
void
diff --git a/drivers/acpi/acpica/dbhistry.c b/drivers/acpi/acpica/dbhistry.c
index e874c1dddefa..554ae35108bd 100644
--- a/drivers/acpi/acpica/dbhistry.c
+++ b/drivers/acpi/acpica/dbhistry.c
@@ -3,7 +3,7 @@
*
* Module Name: dbhistry - debugger HISTORY command
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dsargs.c b/drivers/acpi/acpica/dsargs.c
index 4354c175e12e..e2f00c54cb36 100644
--- a/drivers/acpi/acpica/dsargs.c
+++ b/drivers/acpi/acpica/dsargs.c
@@ -4,7 +4,7 @@
* Module Name: dsargs - Support for execution of dynamic arguments for static
* objects (regions, fields, buffer fields, etc.)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dscontrol.c b/drivers/acpi/acpica/dscontrol.c
index 80c69af06948..c1f79d7a2026 100644
--- a/drivers/acpi/acpica/dscontrol.c
+++ b/drivers/acpi/acpica/dscontrol.c
@@ -4,7 +4,7 @@
* Module Name: dscontrol - Support for execution control opcodes -
* if/else/while/return
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dsdebug.c b/drivers/acpi/acpica/dsdebug.c
index c5c8380a3114..274b74255551 100644
--- a/drivers/acpi/acpica/dsdebug.c
+++ b/drivers/acpi/acpica/dsdebug.c
@@ -3,7 +3,7 @@
*
* Module Name: dsdebug - Parser/Interpreter interface - debugging
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c
index 532401ecdab0..df132c9089c7 100644
--- a/drivers/acpi/acpica/dsfield.c
+++ b/drivers/acpi/acpica/dsfield.c
@@ -3,7 +3,7 @@
*
* Module Name: dsfield - Dispatcher field routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c
index 6e0e362e461f..57cd9e2d1109 100644
--- a/drivers/acpi/acpica/dsinit.c
+++ b/drivers/acpi/acpica/dsinit.c
@@ -3,7 +3,7 @@
*
* Module Name: dsinit - Object initialization namespace walk
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index e809c2aed78a..c8f37f4e6626 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -3,7 +3,7 @@
*
* Module Name: dsmethod - Parser/Interpreter interface - control method parsing
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dsmthdat.c b/drivers/acpi/acpica/dsmthdat.c
index eca50517ad82..5393de4dbc4c 100644
--- a/drivers/acpi/acpica/dsmthdat.c
+++ b/drivers/acpi/acpica/dsmthdat.c
@@ -188,6 +188,7 @@ acpi_ds_method_data_init_args(union acpi_operand_object **params,
index++;
}
+ acpi_ex_trace_args(params, index);
ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "%u args passed to method\n", index));
return_ACPI_STATUS(AE_OK);
diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c
index 555f148d666b..1bf7eec49899 100644
--- a/drivers/acpi/acpica/dsobject.c
+++ b/drivers/acpi/acpica/dsobject.c
@@ -3,7 +3,7 @@
*
* Module Name: dsobject - Dispatcher object management routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c
index dd3059000885..5699b0872848 100644
--- a/drivers/acpi/acpica/dsopcode.c
+++ b/drivers/acpi/acpica/dsopcode.c
@@ -3,7 +3,7 @@
*
* Module Name: dsopcode - Dispatcher support for regions and fields
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dspkginit.c b/drivers/acpi/acpica/dspkginit.c
index ecf793fe9919..1ed2386fab82 100644
--- a/drivers/acpi/acpica/dspkginit.c
+++ b/drivers/acpi/acpica/dspkginit.c
@@ -3,7 +3,7 @@
*
* Module Name: dspkginit - Completion of deferred package initialization
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c
index fb9ed5e1da89..baf6a1f27605 100644
--- a/drivers/acpi/acpica/dsutils.c
+++ b/drivers/acpi/acpica/dsutils.c
@@ -668,6 +668,8 @@ acpi_ds_create_operands(struct acpi_walk_state *walk_state,
union acpi_parse_object *arguments[ACPI_OBJ_NUM_OPERANDS];
u32 arg_count = 0;
u32 index = walk_state->num_operands;
+ u32 prev_num_operands = walk_state->num_operands;
+ u32 new_num_operands;
u32 i;
ACPI_FUNCTION_TRACE_PTR(ds_create_operands, first_arg);
@@ -696,6 +698,7 @@ acpi_ds_create_operands(struct acpi_walk_state *walk_state,
/* Create the interpreter arguments, in reverse order */
+ new_num_operands = index;
index--;
for (i = 0; i < arg_count; i++) {
arg = arguments[index];
@@ -720,7 +723,11 @@ cleanup:
* pop everything off of the operand stack and delete those
* objects
*/
- acpi_ds_obj_stack_pop_and_delete(arg_count, walk_state);
+ walk_state->num_operands = (u8)(i);
+ acpi_ds_obj_stack_pop_and_delete(new_num_operands, walk_state);
+
+ /* Restore operand count */
+ walk_state->num_operands = (u8)(prev_num_operands);
ACPI_EXCEPTION((AE_INFO, status, "While creating Arg %u", index));
return_ACPI_STATUS(status);
diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c
index a43336f05206..5c5c6d8a4e48 100644
--- a/drivers/acpi/acpica/dswexec.c
+++ b/drivers/acpi/acpica/dswexec.c
@@ -4,7 +4,7 @@
* Module Name: dswexec - Dispatcher method execution callbacks;
* dispatch to interpreter.
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c
index f7b8496c8bdd..666419b6a5c6 100644
--- a/drivers/acpi/acpica/dswload.c
+++ b/drivers/acpi/acpica/dswload.c
@@ -3,7 +3,7 @@
*
* Module Name: dswload - Dispatcher first pass namespace load callbacks
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 541235f498c2..bfc54c914757 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c
@@ -3,7 +3,7 @@
*
* Module Name: dswload2 - Dispatcher second pass namespace load callbacks
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dswscope.c b/drivers/acpi/acpica/dswscope.c
index 1fdd07ae862c..375a8fa43d9d 100644
--- a/drivers/acpi/acpica/dswscope.c
+++ b/drivers/acpi/acpica/dswscope.c
@@ -3,7 +3,7 @@
*
* Module Name: dswscope - Scope stack manipulation
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/dswstate.c b/drivers/acpi/acpica/dswstate.c
index 75338a13c802..02aaddb89df9 100644
--- a/drivers/acpi/acpica/dswstate.c
+++ b/drivers/acpi/acpica/dswstate.c
@@ -3,7 +3,7 @@
*
* Module Name: dswstate - Dispatcher parse tree walk management routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evevent.c b/drivers/acpi/acpica/evevent.c
index 9e78c5b9ad52..6cdd39c987b8 100644
--- a/drivers/acpi/acpica/evevent.c
+++ b/drivers/acpi/acpica/evevent.c
@@ -3,7 +3,7 @@
*
* Module Name: evevent - Fixed Event handling and dispatch
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evglock.c b/drivers/acpi/acpica/evglock.c
index 989dc01af03f..fa3e0d00d1ca 100644
--- a/drivers/acpi/acpica/evglock.c
+++ b/drivers/acpi/acpica/evglock.c
@@ -3,7 +3,7 @@
*
* Module Name: evglock - Global Lock support
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 934b201d3820..ba65b2ea49b2 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -3,7 +3,7 @@
*
* Module Name: evgpe - General Purpose Event handling and dispatch
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index 58e1890ab25b..fadd93caf1d5 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -3,7 +3,7 @@
*
* Module Name: evgpeblk - GPE block creation and initialization.
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index 38f408cf13ce..eb769739420e 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -3,7 +3,7 @@
*
* Module Name: evgpeinit - System GPE initialization and update
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evgpeutil.c b/drivers/acpi/acpica/evgpeutil.c
index ee3b1ea656d4..d15b1d75c8ec 100644
--- a/drivers/acpi/acpica/evgpeutil.c
+++ b/drivers/acpi/acpica/evgpeutil.c
@@ -3,7 +3,7 @@
*
* Module Name: evgpeutil - GPE utilities
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evhandler.c b/drivers/acpi/acpica/evhandler.c
index 1c8cb6d924df..5a35dae945e2 100644
--- a/drivers/acpi/acpica/evhandler.c
+++ b/drivers/acpi/acpica/evhandler.c
@@ -3,7 +3,7 @@
*
* Module Name: evhandler - Support for Address Space handlers
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c
index e68e876d3b84..04a23a6c3bb1 100644
--- a/drivers/acpi/acpica/evmisc.c
+++ b/drivers/acpi/acpica/evmisc.c
@@ -3,7 +3,7 @@
*
* Module Name: evmisc - Miscellaneous event manager support functions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
index cf53b9535f18..fa3475da7ea9 100644
--- a/drivers/acpi/acpica/evregion.c
+++ b/drivers/acpi/acpica/evregion.c
@@ -3,7 +3,7 @@
*
* Module Name: evregion - Operation Region support
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index 46d1b3f5582d..b03952798af5 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -3,7 +3,7 @@
*
* Module Name: evrgnini- ACPI address_space (op_region) init
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evxface.c b/drivers/acpi/acpica/evxface.c
index 24fa6433d562..86a8d41c079c 100644
--- a/drivers/acpi/acpica/evxface.c
+++ b/drivers/acpi/acpica/evxface.c
@@ -3,7 +3,7 @@
*
* Module Name: evxface - External interfaces for ACPI events
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c
index 48bf845191d2..4b052908d2e7 100644
--- a/drivers/acpi/acpica/evxfevnt.c
+++ b/drivers/acpi/acpica/evxfevnt.c
@@ -3,7 +3,7 @@
*
* Module Name: evxfevnt - External Interfaces, ACPI event disable/enable
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 4eeeb3b7ab7e..60dacec1b121 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -3,7 +3,7 @@
*
* Module Name: evxfgpe - External Interfaces for General Purpose Events (GPEs)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c
index bff2d099f469..bccc672c934c 100644
--- a/drivers/acpi/acpica/evxfregn.c
+++ b/drivers/acpi/acpica/evxfregn.c
@@ -4,7 +4,7 @@
* Module Name: evxfregn - External Interfaces, ACPI Operation Regions and
* Address Spaces.
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exconcat.c b/drivers/acpi/acpica/exconcat.c
index 2fb78b35565b..c248c9b162fa 100644
--- a/drivers/acpi/acpica/exconcat.c
+++ b/drivers/acpi/acpica/exconcat.c
@@ -3,7 +3,7 @@
*
* Module Name: exconcat - Concatenate-type AML operators
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index 473115309860..4d7dd0fc6b07 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -3,7 +3,7 @@
*
* Module Name: exconfig - Namespace reconfiguration (Load/Unload opcodes)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exconvrt.c b/drivers/acpi/acpica/exconvrt.c
index bb1be42daee1..fded9bfc2436 100644
--- a/drivers/acpi/acpica/exconvrt.c
+++ b/drivers/acpi/acpica/exconvrt.c
@@ -3,7 +3,7 @@
*
* Module Name: exconvrt - Object conversion routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -226,8 +226,8 @@ acpi_ex_convert_to_buffer(union acpi_operand_object *obj_desc,
/* Copy the string to the buffer */
new_buf = return_desc->buffer.pointer;
- strncpy((char *)new_buf, (char *)obj_desc->string.pointer,
- obj_desc->string.length);
+ memcpy((char *)new_buf, (char *)obj_desc->string.pointer,
+ obj_desc->string.length);
break;
default:
diff --git a/drivers/acpi/acpica/excreate.c b/drivers/acpi/acpica/excreate.c
index 1bea9d97652c..052c69567997 100644
--- a/drivers/acpi/acpica/excreate.c
+++ b/drivers/acpi/acpica/excreate.c
@@ -3,7 +3,7 @@
*
* Module Name: excreate - Named object creation
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exdebug.c b/drivers/acpi/acpica/exdebug.c
index 3f86bfada510..81a07a52b73c 100644
--- a/drivers/acpi/acpica/exdebug.c
+++ b/drivers/acpi/acpica/exdebug.c
@@ -3,7 +3,7 @@
*
* Module Name: exdebug - Support for stores to the AML Debug Object
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c
index 2e2da8790224..d8aeebaab70a 100644
--- a/drivers/acpi/acpica/exdump.c
+++ b/drivers/acpi/acpica/exdump.c
@@ -3,7 +3,7 @@
*
* Module Name: exdump - Interpreter debug output routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index 61ff36189ace..ced3ff9d0a86 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -3,7 +3,7 @@
*
* Module Name: exfield - AML execution - field_unit read/write
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c
index cf6c812a8b6d..0771934c0455 100644
--- a/drivers/acpi/acpica/exfldio.c
+++ b/drivers/acpi/acpica/exfldio.c
@@ -3,7 +3,7 @@
*
* Module Name: exfldio - Aml Field I/O
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c
index c6f2a9166ac0..07cbac58ed21 100644
--- a/drivers/acpi/acpica/exmisc.c
+++ b/drivers/acpi/acpica/exmisc.c
@@ -3,7 +3,7 @@
*
* Module Name: exmisc - ACPI AML (p-code) execution - specific opcodes
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exmutex.c b/drivers/acpi/acpica/exmutex.c
index 65c487facdda..1fa013197fcf 100644
--- a/drivers/acpi/acpica/exmutex.c
+++ b/drivers/acpi/acpica/exmutex.c
@@ -3,7 +3,7 @@
*
* Module Name: exmutex - ASL Mutex Acquire/Release functions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c
index 9a448165bfeb..76ab73c37e90 100644
--- a/drivers/acpi/acpica/exnames.c
+++ b/drivers/acpi/acpica/exnames.c
@@ -3,7 +3,7 @@
*
* Module Name: exnames - interpreter/scanner name load/execute
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c
index 20fb34b68bee..6ac7e0ca5c9d 100644
--- a/drivers/acpi/acpica/exoparg1.c
+++ b/drivers/acpi/acpica/exoparg1.c
@@ -3,7 +3,7 @@
*
* Module Name: exoparg1 - AML execution - opcodes with 1 argument
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exoparg2.c b/drivers/acpi/acpica/exoparg2.c
index 743c258bf2e8..a94fa4d70e99 100644
--- a/drivers/acpi/acpica/exoparg2.c
+++ b/drivers/acpi/acpica/exoparg2.c
@@ -3,7 +3,7 @@
*
* Module Name: exoparg2 - AML execution - opcodes with 2 arguments
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exoparg3.c b/drivers/acpi/acpica/exoparg3.c
index d3091f619909..bf08110ed6d2 100644
--- a/drivers/acpi/acpica/exoparg3.c
+++ b/drivers/acpi/acpica/exoparg3.c
@@ -3,7 +3,7 @@
*
* Module Name: exoparg3 - AML execution - opcodes with 3 arguments
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exoparg6.c b/drivers/acpi/acpica/exoparg6.c
index 1af35e143ba9..cb078e39abf7 100644
--- a/drivers/acpi/acpica/exoparg6.c
+++ b/drivers/acpi/acpica/exoparg6.c
@@ -3,7 +3,7 @@
*
* Module Name: exoparg6 - AML execution - opcodes with 6 arguments
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exprep.c b/drivers/acpi/acpica/exprep.c
index 82b1fa2d201f..1b1a006e82de 100644
--- a/drivers/acpi/acpica/exprep.c
+++ b/drivers/acpi/acpica/exprep.c
@@ -3,7 +3,7 @@
*
* Module Name: exprep - ACPI AML field prep utilities
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c
index c49b9f8de723..a390a1c2b0ab 100644
--- a/drivers/acpi/acpica/exregion.c
+++ b/drivers/acpi/acpica/exregion.c
@@ -3,7 +3,7 @@
*
* Module Name: exregion - ACPI default op_region (address space) handlers
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exresnte.c b/drivers/acpi/acpica/exresnte.c
index 873de01b8ad2..dd83631090fc 100644
--- a/drivers/acpi/acpica/exresnte.c
+++ b/drivers/acpi/acpica/exresnte.c
@@ -3,7 +3,7 @@
*
* Module Name: exresnte - AML Interpreter object resolution
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exresolv.c b/drivers/acpi/acpica/exresolv.c
index 24a78b5e266c..4589de3f3012 100644
--- a/drivers/acpi/acpica/exresolv.c
+++ b/drivers/acpi/acpica/exresolv.c
@@ -3,7 +3,7 @@
*
* Module Name: exresolv - AML Interpreter object resolution
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exresop.c b/drivers/acpi/acpica/exresop.c
index 3a437e6ace5c..782ee353a709 100644
--- a/drivers/acpi/acpica/exresop.c
+++ b/drivers/acpi/acpica/exresop.c
@@ -3,7 +3,7 @@
*
* Module Name: exresop - AML Interpreter operand/object resolution
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exserial.c b/drivers/acpi/acpica/exserial.c
index 5241f4c01c76..6d2581ec22ad 100644
--- a/drivers/acpi/acpica/exserial.c
+++ b/drivers/acpi/acpica/exserial.c
@@ -3,7 +3,7 @@
*
* Module Name: exserial - field_unit support for serial address spaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -201,6 +201,12 @@ acpi_ex_read_serial_bus(union acpi_operand_object *obj_desc,
function = ACPI_READ;
break;
+ case ACPI_ADR_SPACE_FIXED_HARDWARE:
+
+ buffer_length = ACPI_FFH_INPUT_BUFFER_SIZE;
+ function = ACPI_READ;
+ break;
+
default:
return_ACPI_STATUS(AE_AML_INVALID_SPACE_ID);
}
diff --git a/drivers/acpi/acpica/exstore.c b/drivers/acpi/acpica/exstore.c
index 575c7a39f1aa..cbc42207496d 100644
--- a/drivers/acpi/acpica/exstore.c
+++ b/drivers/acpi/acpica/exstore.c
@@ -3,7 +3,7 @@
*
* Module Name: exstore - AML Interpreter object store support
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exstoren.c b/drivers/acpi/acpica/exstoren.c
index b01ae015e1b5..0470b2639831 100644
--- a/drivers/acpi/acpica/exstoren.c
+++ b/drivers/acpi/acpica/exstoren.c
@@ -4,7 +4,7 @@
* Module Name: exstoren - AML Interpreter object store support,
* Store to Node (namespace object)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exstorob.c b/drivers/acpi/acpica/exstorob.c
index 37c3131a82fa..5b168fbc03e8 100644
--- a/drivers/acpi/acpica/exstorob.c
+++ b/drivers/acpi/acpica/exstorob.c
@@ -3,7 +3,7 @@
*
* Module Name: exstorob - AML object store support, store to object
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/exsystem.c b/drivers/acpi/acpica/exsystem.c
index 2c384bd52b9c..7f843c9d8a06 100644
--- a/drivers/acpi/acpica/exsystem.c
+++ b/drivers/acpi/acpica/exsystem.c
@@ -3,7 +3,7 @@
*
* Module Name: exsystem - Interface to OS services
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/extrace.c b/drivers/acpi/acpica/extrace.c
index f1730221ff13..d34497f3576a 100644
--- a/drivers/acpi/acpica/extrace.c
+++ b/drivers/acpi/acpica/extrace.c
@@ -3,7 +3,7 @@
*
* Module Name: extrace - Support for interpreter execution tracing
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -149,6 +149,57 @@ acpi_ex_trace_point(acpi_trace_event_type type,
/*******************************************************************************
*
+ * FUNCTION: acpi_ex_trace_args
+ *
+ * PARAMETERS: params - AML method arguments
+ * count - numer of method arguments
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Trace any arguments
+ *
+ ******************************************************************************/
+
+void
+acpi_ex_trace_args(union acpi_operand_object **params, u32 count)
+{
+ u32 i;
+
+ ACPI_FUNCTION_NAME(ex_trace_args);
+
+ for (i = 0; i < count; i++) {
+ union acpi_operand_object *obj_desc = params[i];
+
+ if (!i) {
+ ACPI_DEBUG_PRINT((ACPI_DB_TRACE_POINT, " "));
+ }
+
+ switch (obj_desc->common.type) {
+ case ACPI_TYPE_INTEGER:
+ ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "%llx", obj_desc->integer.value));
+ break;
+ case ACPI_TYPE_STRING:
+ if (!obj_desc->string.length) {
+ ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "NULL"));
+ continue;
+ }
+ if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_TRACE_POINT, _COMPONENT))
+ acpi_ut_print_string(obj_desc->string.pointer, ACPI_UINT8_MAX);
+ break;
+ default:
+ ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "Unknown"));
+ break;
+ }
+ if (i+1 == count) {
+ ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "\n"));
+ } else {
+ ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, ", "));
+ }
+ }
+}
+
+/*******************************************************************************
+ *
* FUNCTION: acpi_ex_start_trace_method
*
* PARAMETERS: method_node - Node of the method
diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index f4d4a033f166..cc10c0732218 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -3,7 +3,7 @@
*
* Module Name: exutils - interpreter/scanner utilities
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/hwacpi.c b/drivers/acpi/acpica/hwacpi.c
index 790f342dcd25..a1e1fa787566 100644
--- a/drivers/acpi/acpica/hwacpi.c
+++ b/drivers/acpi/acpica/hwacpi.c
@@ -3,7 +3,7 @@
*
* Module Name: hwacpi - ACPI Hardware Initialization/Mode Interface
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/hwesleep.c b/drivers/acpi/acpica/hwesleep.c
index a9ba9190408b..631fd8e2b774 100644
--- a/drivers/acpi/acpica/hwesleep.c
+++ b/drivers/acpi/acpica/hwesleep.c
@@ -4,7 +4,7 @@
* Name: hwesleep.c - ACPI Hardware Sleep/Wake Support functions for the
* extended FADT-V5 sleep registers.
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index e0c847ab8324..386f4759c317 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -3,7 +3,7 @@
*
* Module Name: hwgpe - Low level GPE enable/disable/clear functions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
index e0921f08b71a..87d78bef6323 100644
--- a/drivers/acpi/acpica/hwsleep.c
+++ b/drivers/acpi/acpica/hwsleep.c
@@ -4,7 +4,7 @@
* Name: hwsleep.c - ACPI Hardware Sleep/Wake Support functions for the
* original/legacy sleep/PM registers.
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/hwtimer.c b/drivers/acpi/acpica/hwtimer.c
index 192c04b5a599..a5e0bccae6a4 100644
--- a/drivers/acpi/acpica/hwtimer.c
+++ b/drivers/acpi/acpica/hwtimer.c
@@ -3,7 +3,7 @@
*
* Name: hwtimer.c - ACPI Power Management Timer Interface
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c
index b8de458f0368..496fd9e49f0b 100644
--- a/drivers/acpi/acpica/hwvalid.c
+++ b/drivers/acpi/acpica/hwvalid.c
@@ -3,7 +3,7 @@
*
* Module Name: hwvalid - I/O request validation
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
index c31f803995c6..847cd1b2493d 100644
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -3,7 +3,7 @@
*
* Module Name: hwxface - Public ACPICA hardware interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c
index 8dbf83aeb455..9aabe30416da 100644
--- a/drivers/acpi/acpica/hwxfsleep.c
+++ b/drivers/acpi/acpica/hwxfsleep.c
@@ -3,7 +3,7 @@
*
* Name: hwxfsleep.c - ACPI Hardware Sleep/Wake External Interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nsarguments.c b/drivers/acpi/acpica/nsarguments.c
index 3efb46f0dc54..366d54a1d157 100644
--- a/drivers/acpi/acpica/nsarguments.c
+++ b/drivers/acpi/acpica/nsarguments.c
@@ -3,7 +3,7 @@
*
* Module Name: nsarguments - Validation of args for ACPI predefined methods
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nsconvert.c b/drivers/acpi/acpica/nsconvert.c
index 7e5a683ae957..f05a92b88642 100644
--- a/drivers/acpi/acpica/nsconvert.c
+++ b/drivers/acpi/acpica/nsconvert.c
@@ -4,7 +4,7 @@
* Module Name: nsconvert - Object conversions for objects returned by
* predefined methods
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c
index 90a26cb0c472..6dc20486ad51 100644
--- a/drivers/acpi/acpica/nsdump.c
+++ b/drivers/acpi/acpica/nsdump.c
@@ -3,7 +3,7 @@
*
* Module Name: nsdump - table dumping routines for debug
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nsdumpdv.c b/drivers/acpi/acpica/nsdumpdv.c
index fa116ebe49a3..d5b16aaec233 100644
--- a/drivers/acpi/acpica/nsdumpdv.c
+++ b/drivers/acpi/acpica/nsdumpdv.c
@@ -3,7 +3,7 @@
*
* Module Name: nsdump - table dumping routines for debug
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index 86d126fdb27d..03373e7f7978 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -3,7 +3,7 @@
*
* Module Name: nsinit - namespace initialization
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index fcb9de0f77a2..6ec4c646fff7 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -3,7 +3,7 @@
*
* Module Name: nsload - namespace loading/expanding/contracting procedures
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nsnames.c b/drivers/acpi/acpica/nsnames.c
index d91153f65700..22aeeeb56cff 100644
--- a/drivers/acpi/acpica/nsnames.c
+++ b/drivers/acpi/acpica/nsnames.c
@@ -194,7 +194,7 @@ acpi_ns_build_normalized_path(struct acpi_namespace_node *node,
char *full_path, u32 path_size, u8 no_trailing)
{
u32 length = 0, i;
- char name[ACPI_NAMESEG_SIZE];
+ char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING;
u8 do_no_trailing;
char c, *left, *right;
struct acpi_namespace_node *next_node;
diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c
index 31e551cf4ea6..959e6379bc4c 100644
--- a/drivers/acpi/acpica/nsparse.c
+++ b/drivers/acpi/acpica/nsparse.c
@@ -3,7 +3,7 @@
*
* Module Name: nsparse - namespace interface to AML parser
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index cf57bd69616d..81995ee48c49 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -3,7 +3,7 @@
*
* Module Name: nspredef - Validation of ACPI predefined methods and objects
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nsprepkg.c b/drivers/acpi/acpica/nsprepkg.c
index dd37fc108fce..ca137ce5674f 100644
--- a/drivers/acpi/acpica/nsprepkg.c
+++ b/drivers/acpi/acpica/nsprepkg.c
@@ -3,7 +3,7 @@
*
* Module Name: nsprepkg - Validation of package objects for predefined names
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index b8657004190d..accfdcfb7e62 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c
@@ -3,7 +3,7 @@
*
* Module Name: nsrepair - Repair for objects returned by predefined methods
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 330b5e4711da..8dbb870f40d2 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -4,7 +4,7 @@
* Module Name: nsrepair2 - Repair for objects returned by specific
* predefined methods
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -25,7 +25,7 @@ acpi_status (*acpi_repair_function) (struct acpi_evaluate_info * info,
return_object_ptr);
typedef struct acpi_repair_info {
- char name[ACPI_NAMESEG_SIZE] __nonstring;
+ char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING;
acpi_repair_function repair_function;
} acpi_repair_info;
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index 06ffdb6808f5..49cc07e2ac5a 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -4,7 +4,7 @@
* Module Name: nsutils - Utilities for accessing ACPI namespace, accessing
* parents and siblings and Scope manipulation
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c
index eee396a77bae..a2ac06a26e92 100644
--- a/drivers/acpi/acpica/nswalk.c
+++ b/drivers/acpi/acpica/nswalk.c
@@ -3,7 +3,7 @@
*
* Module Name: nswalk - Functions for walking the ACPI namespace
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c
index 5d5bcf165298..1db831545ec8 100644
--- a/drivers/acpi/acpica/nsxfname.c
+++ b/drivers/acpi/acpica/nsxfname.c
@@ -4,7 +4,7 @@
* Module Name: nsxfname - Public interfaces to the ACPI subsystem
* ACPI Namespace oriented interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index 28582adfc0ac..6f6ae38ec044 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -3,7 +3,7 @@
*
* Module Name: psargs - Parse AML opcode arguments
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index d0fd55636129..c989cadf271c 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -3,7 +3,7 @@
*
* Module Name: psloop - Main AML parse loop
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c
index 54471083ba54..496a1c1d5b0b 100644
--- a/drivers/acpi/acpica/psobject.c
+++ b/drivers/acpi/acpica/psobject.c
@@ -3,7 +3,7 @@
*
* Module Name: psobject - Support for parse objects
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -636,7 +636,8 @@ acpi_status
acpi_ps_complete_final_op(struct acpi_walk_state *walk_state,
union acpi_parse_object *op, acpi_status status)
{
- acpi_status status2;
+ acpi_status return_status = status;
+ u8 ascending = TRUE;
ACPI_FUNCTION_TRACE_PTR(ps_complete_final_op, walk_state);
@@ -650,7 +651,7 @@ acpi_ps_complete_final_op(struct acpi_walk_state *walk_state,
op));
do {
if (op) {
- if (walk_state->ascending_callback != NULL) {
+ if (ascending && walk_state->ascending_callback != NULL) {
walk_state->op = op;
walk_state->op_info =
acpi_ps_get_opcode_info(op->common.
@@ -672,49 +673,26 @@ acpi_ps_complete_final_op(struct acpi_walk_state *walk_state,
}
if (status == AE_CTRL_TERMINATE) {
- status = AE_OK;
-
- /* Clean up */
- do {
- if (op) {
- status2 =
- acpi_ps_complete_this_op
- (walk_state, op);
- if (ACPI_FAILURE
- (status2)) {
- return_ACPI_STATUS
- (status2);
- }
- }
-
- acpi_ps_pop_scope(&
- (walk_state->
- parser_state),
- &op,
- &walk_state->
- arg_types,
- &walk_state->
- arg_count);
-
- } while (op);
-
- return_ACPI_STATUS(status);
+ ascending = FALSE;
+ return_status = AE_CTRL_TERMINATE;
}
else if (ACPI_FAILURE(status)) {
/* First error is most important */
- (void)
- acpi_ps_complete_this_op(walk_state,
- op);
- return_ACPI_STATUS(status);
+ ascending = FALSE;
+ return_status = status;
}
}
- status2 = acpi_ps_complete_this_op(walk_state, op);
- if (ACPI_FAILURE(status2)) {
- return_ACPI_STATUS(status2);
+ status = acpi_ps_complete_this_op(walk_state, op);
+ if (ACPI_FAILURE(status)) {
+ ascending = FALSE;
+ if (ACPI_SUCCESS(return_status) ||
+ return_status == AE_CTRL_TERMINATE) {
+ return_status = status;
+ }
}
}
@@ -724,5 +702,5 @@ acpi_ps_complete_final_op(struct acpi_walk_state *walk_state,
} while (op);
- return_ACPI_STATUS(status);
+ return_ACPI_STATUS(return_status);
}
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 39e31030e5f4..bf6103986f48 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -3,7 +3,7 @@
*
* Module Name: psopcode - Parser/Interpreter opcode information table
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/psopinfo.c b/drivers/acpi/acpica/psopinfo.c
index bccf606e08b4..1c8044ffcb97 100644
--- a/drivers/acpi/acpica/psopinfo.c
+++ b/drivers/acpi/acpica/psopinfo.c
@@ -3,7 +3,7 @@
*
* Module Name: psopinfo - AML opcode information functions and dispatch tables
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/psparse.c b/drivers/acpi/acpica/psparse.c
index 10a072953d78..55a416e56fd8 100644
--- a/drivers/acpi/acpica/psparse.c
+++ b/drivers/acpi/acpica/psparse.c
@@ -3,7 +3,7 @@
*
* Module Name: psparse - Parser top level AML parse routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/psscope.c b/drivers/acpi/acpica/psscope.c
index a0035bde7556..c4e4483f0a0b 100644
--- a/drivers/acpi/acpica/psscope.c
+++ b/drivers/acpi/acpica/psscope.c
@@ -3,7 +3,7 @@
*
* Module Name: psscope - Parser scope stack management routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/pstree.c b/drivers/acpi/acpica/pstree.c
index 7f7f5ecd4011..5a285d3f2cdb 100644
--- a/drivers/acpi/acpica/pstree.c
+++ b/drivers/acpi/acpica/pstree.c
@@ -3,7 +3,7 @@
*
* Module Name: pstree - Parser op tree manipulation/traversal/search
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c
index d550c4af4702..ada1dc304d25 100644
--- a/drivers/acpi/acpica/psutils.c
+++ b/drivers/acpi/acpica/psutils.c
@@ -3,7 +3,7 @@
*
* Module Name: psutils - Parser miscellaneous utilities (Parser only)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/pswalk.c b/drivers/acpi/acpica/pswalk.c
index d92817c72b8d..2f3ebcd8aebe 100644
--- a/drivers/acpi/acpica/pswalk.c
+++ b/drivers/acpi/acpica/pswalk.c
@@ -3,7 +3,7 @@
*
* Module Name: pswalk - Parser routines to walk parsed op tree(s)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/psxface.c b/drivers/acpi/acpica/psxface.c
index 6f4eace0ba69..d480de075a90 100644
--- a/drivers/acpi/acpica/psxface.c
+++ b/drivers/acpi/acpica/psxface.c
@@ -3,7 +3,7 @@
*
* Module Name: psxface - Parser external interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/rsaddr.c b/drivers/acpi/acpica/rsaddr.c
index 27384ee245f0..f92010e667cd 100644
--- a/drivers/acpi/acpica/rsaddr.c
+++ b/drivers/acpi/acpica/rsaddr.c
@@ -272,18 +272,13 @@ u8
acpi_rs_get_address_common(struct acpi_resource *resource,
union aml_resource *aml)
{
- struct aml_resource_address address;
-
ACPI_FUNCTION_ENTRY();
- /* Avoid undefined behavior: member access within misaligned address */
-
- memcpy(&address, aml, sizeof(address));
-
/* Validate the Resource Type */
- if ((address.resource_type > 2) &&
- (address.resource_type < 0xC0) && (address.resource_type != 0x0A)) {
+ if ((aml->address.resource_type > 2) &&
+ (aml->address.resource_type < 0xC0) &&
+ (aml->address.resource_type != 0x0A)) {
return (FALSE);
}
@@ -304,7 +299,7 @@ acpi_rs_get_address_common(struct acpi_resource *resource,
/* Generic resource type, just grab the type_specific byte */
resource->data.address.info.type_specific =
- address.specific_flags;
+ aml->address.specific_flags;
}
return (TRUE);
diff --git a/drivers/acpi/acpica/rscalc.c b/drivers/acpi/acpica/rscalc.c
index 6e7a152d6459..242daf45e20e 100644
--- a/drivers/acpi/acpica/rscalc.c
+++ b/drivers/acpi/acpica/rscalc.c
@@ -608,18 +608,12 @@ acpi_rs_get_list_length(u8 *aml_buffer,
case ACPI_RESOURCE_NAME_SERIAL_BUS:{
- /* Avoid undefined behavior: member access within misaligned address */
-
- struct aml_resource_common_serialbus
- common_serial_bus;
- memcpy(&common_serial_bus, aml_resource,
- sizeof(common_serial_bus));
-
minimum_aml_resource_length =
acpi_gbl_resource_aml_serial_bus_sizes
- [common_serial_bus.type];
+ [aml_resource->common_serial_bus.type];
extra_struct_bytes +=
- common_serial_bus.resource_length -
+ aml_resource->common_serial_bus.
+ resource_length -
minimum_aml_resource_length;
break;
}
@@ -688,16 +682,10 @@ acpi_rs_get_list_length(u8 *aml_buffer,
*/
if (acpi_ut_get_resource_type(aml_buffer) ==
ACPI_RESOURCE_NAME_SERIAL_BUS) {
-
- /* Avoid undefined behavior: member access within misaligned address */
-
- struct aml_resource_common_serialbus common_serial_bus;
- memcpy(&common_serial_bus, aml_resource,
- sizeof(common_serial_bus));
-
buffer_size =
acpi_gbl_resource_struct_serial_bus_sizes
- [common_serial_bus.type] + extra_struct_bytes;
+ [aml_resource->common_serial_bus.type] +
+ extra_struct_bytes;
} else {
buffer_size =
acpi_gbl_resource_struct_sizes[resource_index] +
diff --git a/drivers/acpi/acpica/rslist.c b/drivers/acpi/acpica/rslist.c
index 164c96e063c6..e46efaa889cd 100644
--- a/drivers/acpi/acpica/rslist.c
+++ b/drivers/acpi/acpica/rslist.c
@@ -55,21 +55,15 @@ acpi_rs_convert_aml_to_resources(u8 * aml,
aml_resource = ACPI_CAST_PTR(union aml_resource, aml);
if (acpi_ut_get_resource_type(aml) == ACPI_RESOURCE_NAME_SERIAL_BUS) {
-
- /* Avoid undefined behavior: member access within misaligned address */
-
- struct aml_resource_common_serialbus common_serial_bus;
- memcpy(&common_serial_bus, aml_resource,
- sizeof(common_serial_bus));
-
- if (common_serial_bus.type > AML_RESOURCE_MAX_SERIALBUSTYPE) {
+ if (aml_resource->common_serial_bus.type >
+ AML_RESOURCE_MAX_SERIALBUSTYPE) {
conversion_table = NULL;
} else {
/* This is an I2C, SPI, UART, or CSI2 serial_bus descriptor */
conversion_table =
acpi_gbl_convert_resource_serial_bus_dispatch
- [common_serial_bus.type];
+ [aml_resource->common_serial_bus.type];
}
} else {
conversion_table =
diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c
index a1f10e4409a3..5b98e09fff76 100644
--- a/drivers/acpi/acpica/tbdata.c
+++ b/drivers/acpi/acpica/tbdata.c
@@ -3,7 +3,7 @@
*
* Module Name: tbdata - Table manager data structure functions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index 3c126c6d306b..c6658b2f3027 100644
--- a/drivers/acpi/acpica/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c
@@ -3,7 +3,7 @@
*
* Module Name: tbfadt - FADT table utilities
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c
index 1c1b2e284bd9..d71a73216380 100644
--- a/drivers/acpi/acpica/tbfind.c
+++ b/drivers/acpi/acpica/tbfind.c
@@ -3,7 +3,7 @@
*
* Module Name: tbfind - find table
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -57,8 +57,8 @@ acpi_tb_find_table(char *signature,
memset(&header, 0, sizeof(struct acpi_table_header));
ACPI_COPY_NAMESEG(header.signature, signature);
- strncpy(header.oem_id, oem_id, ACPI_OEM_ID_SIZE);
- strncpy(header.oem_table_id, oem_table_id, ACPI_OEM_TABLE_ID_SIZE);
+ memcpy(header.oem_id, oem_id, ACPI_OEM_ID_SIZE);
+ memcpy(header.oem_table_id, oem_table_id, ACPI_OEM_TABLE_ID_SIZE);
/* Search for the table */
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 0dc003c20e4d..ee9b85bc238b 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -3,7 +3,7 @@
*
* Module Name: tbinstal - ACPI table installation and removal
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c
index 58b02e4b254b..fd64460a2e26 100644
--- a/drivers/acpi/acpica/tbprint.c
+++ b/drivers/acpi/acpica/tbprint.c
@@ -3,7 +3,7 @@
*
* Module Name: tbprint - Table output utilities
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index dad7425fce3f..fa64851c7b62 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -3,7 +3,7 @@
*
* Module Name: tbutils - ACPI Table utilities
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index 275b52dc42e9..a8f07d2641b6 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -3,7 +3,7 @@
*
* Module Name: tbxface - ACPI table-oriented external interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c
index 0f2a7343de3a..2a17c60a9a39 100644
--- a/drivers/acpi/acpica/tbxfload.c
+++ b/drivers/acpi/acpica/tbxfload.c
@@ -3,7 +3,7 @@
*
* Module Name: tbxfload - Table load/unload external interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/tbxfroot.c b/drivers/acpi/acpica/tbxfroot.c
index 5b413bbab338..961577ba9486 100644
--- a/drivers/acpi/acpica/tbxfroot.c
+++ b/drivers/acpi/acpica/tbxfroot.c
@@ -3,7 +3,7 @@
*
* Module Name: tbxfroot - Find the root ACPI table (RSDT)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utaddress.c b/drivers/acpi/acpica/utaddress.c
index be94d2fd99a7..c673d6c95e0a 100644
--- a/drivers/acpi/acpica/utaddress.c
+++ b/drivers/acpi/acpica/utaddress.c
@@ -3,7 +3,7 @@
*
* Module Name: utaddress - op_region address range check
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utalloc.c b/drivers/acpi/acpica/utalloc.c
index c1fb70457e20..2418a312733a 100644
--- a/drivers/acpi/acpica/utalloc.c
+++ b/drivers/acpi/acpica/utalloc.c
@@ -3,7 +3,7 @@
*
* Module Name: utalloc - local memory allocation routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utascii.c b/drivers/acpi/acpica/utascii.c
index 2be37676edd7..259c28d3fecd 100644
--- a/drivers/acpi/acpica/utascii.c
+++ b/drivers/acpi/acpica/utascii.c
@@ -3,7 +3,7 @@
*
* Module Name: utascii - Utility ascii functions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utbuffer.c b/drivers/acpi/acpica/utbuffer.c
index b054bb5eeaf0..f6e6e98e9523 100644
--- a/drivers/acpi/acpica/utbuffer.c
+++ b/drivers/acpi/acpica/utbuffer.c
@@ -3,7 +3,7 @@
*
* Module Name: utbuffer - Buffer dump routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c
index 85a85f7cf750..cabec193febb 100644
--- a/drivers/acpi/acpica/utcache.c
+++ b/drivers/acpi/acpica/utcache.c
@@ -3,7 +3,7 @@
*
* Module Name: utcache - local cache allocation routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -251,9 +251,9 @@ void *acpi_os_acquire_object(struct acpi_memory_list *cache)
} else {
/* The cache is empty, create a new object */
+#ifdef ACPI_DBG_TRACK_ALLOCATIONS
ACPI_MEM_TRACKING(cache->total_allocated++);
-#ifdef ACPI_DBG_TRACK_ALLOCATIONS
if ((cache->total_allocated - cache->total_freed) >
cache->max_occupied) {
cache->max_occupied =
diff --git a/drivers/acpi/acpica/utcksum.c b/drivers/acpi/acpica/utcksum.c
index b483894c3629..e6f6030b3a3f 100644
--- a/drivers/acpi/acpica/utcksum.c
+++ b/drivers/acpi/acpica/utcksum.c
@@ -3,7 +3,7 @@
*
* Module Name: utcksum - Support generating table checksums
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c
index 2e17e657dfa4..80458e70ac2b 100644
--- a/drivers/acpi/acpica/utcopy.c
+++ b/drivers/acpi/acpica/utcopy.c
@@ -3,7 +3,7 @@
*
* Module Name: utcopy - Internal to external object translation utilities
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index 3d71bd9245c7..9f197e293c7e 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -3,7 +3,7 @@
*
* Module Name: utdebug - Debug print/trace routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c
index 95a4b7509e01..b82130d1a8bc 100644
--- a/drivers/acpi/acpica/utdecode.c
+++ b/drivers/acpi/acpica/utdecode.c
@@ -3,7 +3,7 @@
*
* Module Name: utdecode - Utility decoding routines (value-to-string)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c
index c85bfa13ac1e..e8180099d01f 100644
--- a/drivers/acpi/acpica/utdelete.c
+++ b/drivers/acpi/acpica/utdelete.c
@@ -404,7 +404,7 @@ acpi_ut_update_ref_count(union acpi_operand_object *object, u32 action)
object, object->common.type,
acpi_ut_get_object_type_name(object),
new_count));
- message = "Incremement";
+ message = "Increment";
break;
case REF_DECREMENT:
diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c
index 3e5173d03953..abc6583ed369 100644
--- a/drivers/acpi/acpica/uteval.c
+++ b/drivers/acpi/acpica/uteval.c
@@ -3,7 +3,7 @@
*
* Module Name: uteval - Object evaluation
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index 820820ea8119..97c55a113bae 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -3,7 +3,7 @@
*
* Module Name: utglobal - Global variables for the ACPI subsystem
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/uthex.c b/drivers/acpi/acpica/uthex.c
index e62802791dcf..8cd050e9cad5 100644
--- a/drivers/acpi/acpica/uthex.c
+++ b/drivers/acpi/acpica/uthex.c
@@ -3,7 +3,7 @@
*
* Module Name: uthex -- Hex/ASCII support functions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c
index 15c2ce91d403..eb88335dea2c 100644
--- a/drivers/acpi/acpica/utids.c
+++ b/drivers/acpi/acpica/utids.c
@@ -3,7 +3,7 @@
*
* Module Name: utids - support for device Ids - HID, UID, CID, SUB, CLS
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utinit.c b/drivers/acpi/acpica/utinit.c
index 6d78504e9fbc..4bef97e8223a 100644
--- a/drivers/acpi/acpica/utinit.c
+++ b/drivers/acpi/acpica/utinit.c
@@ -3,7 +3,7 @@
*
* Module Name: utinit - Common ACPI subsystem initialization
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utlock.c b/drivers/acpi/acpica/utlock.c
index ee6d72385c5c..123dbcbc60bc 100644
--- a/drivers/acpi/acpica/utlock.c
+++ b/drivers/acpi/acpica/utlock.c
@@ -3,7 +3,7 @@
*
* Module Name: utlock - Reader/Writer lock interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utnonansi.c b/drivers/acpi/acpica/utnonansi.c
index ff0802ace19b..803e3e893825 100644
--- a/drivers/acpi/acpica/utnonansi.c
+++ b/drivers/acpi/acpica/utnonansi.c
@@ -168,7 +168,7 @@ void acpi_ut_safe_strncpy(char *dest, char *source, acpi_size dest_size)
{
/* Always terminate destination string */
- strncpy(dest, source, dest_size);
+ memcpy(dest, source, dest_size);
dest[dest_size - 1] = 0;
}
diff --git a/drivers/acpi/acpica/utobject.c b/drivers/acpi/acpica/utobject.c
index f4aae8f0d3a8..272e46208263 100644
--- a/drivers/acpi/acpica/utobject.c
+++ b/drivers/acpi/acpica/utobject.c
@@ -3,7 +3,7 @@
*
* Module Name: utobject - ACPI object create/delete/size/cache routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c
index 99b85fd6eccf..f6ac16729e42 100644
--- a/drivers/acpi/acpica/utosi.c
+++ b/drivers/acpi/acpica/utosi.c
@@ -3,7 +3,7 @@
*
* Module Name: utosi - Support for the _OSI predefined control method
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utpredef.c b/drivers/acpi/acpica/utpredef.c
index 29d2977d0746..d9bd80e2d32a 100644
--- a/drivers/acpi/acpica/utpredef.c
+++ b/drivers/acpi/acpica/utpredef.c
@@ -3,7 +3,7 @@
*
* Module Name: utpredef - support functions for predefined names
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c
index 42b30b9f9312..423d10569736 100644
--- a/drivers/acpi/acpica/utprint.c
+++ b/drivers/acpi/acpica/utprint.c
@@ -3,7 +3,7 @@
*
* Module Name: utprint - Formatted printing routines
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -333,11 +333,8 @@ int vsnprintf(char *string, acpi_size size, const char *format, va_list args)
pos = string;
- if (size != ACPI_UINT32_MAX) {
- end = string + size;
- } else {
- end = ACPI_CAST_PTR(char, ACPI_UINT32_MAX);
- }
+ size = ACPI_MIN(size, ACPI_PTR_DIFF(ACPI_MAX_PTR, string));
+ end = string + size;
for (; *format; ++format) {
if (*format != '%') {
diff --git a/drivers/acpi/acpica/utresrc.c b/drivers/acpi/acpica/utresrc.c
index cff7901f7866..e1cc3d348750 100644
--- a/drivers/acpi/acpica/utresrc.c
+++ b/drivers/acpi/acpica/utresrc.c
@@ -361,20 +361,16 @@ acpi_ut_validate_resource(struct acpi_walk_state *walk_state,
aml_resource = ACPI_CAST_PTR(union aml_resource, aml);
if (resource_type == ACPI_RESOURCE_NAME_SERIAL_BUS) {
- /* Avoid undefined behavior: member access within misaligned address */
-
- struct aml_resource_common_serialbus common_serial_bus;
- memcpy(&common_serial_bus, aml_resource,
- sizeof(common_serial_bus));
-
/* Validate the bus_type field */
- if ((common_serial_bus.type == 0) ||
- (common_serial_bus.type > AML_RESOURCE_MAX_SERIALBUSTYPE)) {
+ if ((aml_resource->common_serial_bus.type == 0) ||
+ (aml_resource->common_serial_bus.type >
+ AML_RESOURCE_MAX_SERIALBUSTYPE)) {
if (walk_state) {
ACPI_ERROR((AE_INFO,
"Invalid/unsupported SerialBus resource descriptor: BusType 0x%2.2X",
- common_serial_bus.type));
+ aml_resource->common_serial_bus.
+ type));
}
return (AE_AML_INVALID_RESOURCE_TYPE);
}
diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c
index f5f5da441458..a99c4c9e3d39 100644
--- a/drivers/acpi/acpica/uttrack.c
+++ b/drivers/acpi/acpica/uttrack.c
@@ -3,7 +3,7 @@
*
* Module Name: uttrack - Memory allocation tracking routines (debug only)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utuuid.c b/drivers/acpi/acpica/utuuid.c
index 8f10b413e928..0682554934ca 100644
--- a/drivers/acpi/acpica/utuuid.c
+++ b/drivers/acpi/acpica/utuuid.c
@@ -3,7 +3,7 @@
*
* Module Name: utuuid -- UUID support functions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index aa2e923462b7..56942b5f026b 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -3,7 +3,7 @@
*
* Module Name: utxface - External interfaces, miscellaneous utility functions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c
index 70ae0afa7939..c1702f8fba67 100644
--- a/drivers/acpi/acpica/utxfinit.c
+++ b/drivers/acpi/acpica/utxfinit.c
@@ -3,7 +3,7 @@
*
* Module Name: utxfinit - External interfaces for ACPICA initialization
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
index 04731a5b01fa..ca3484dac5c4 100644
--- a/drivers/acpi/apei/einj-core.c
+++ b/drivers/acpi/apei/einj-core.c
@@ -21,7 +21,7 @@
#include <linux/nmi.h>
#include <linux/delay.h>
#include <linux/mm.h>
-#include <linux/platform_device.h>
+#include <linux/device/faux.h>
#include <linux/unaligned.h>
#include "apei-internal.h"
@@ -83,6 +83,8 @@ static struct debugfs_blob_wrapper vendor_blob;
static struct debugfs_blob_wrapper vendor_errors;
static char vendor_dev[64];
+static u32 available_error_type;
+
/*
* Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
* EINJ table through an unpublished extension. Use with caution as
@@ -648,14 +650,9 @@ static struct { u32 mask; const char *str; } const einj_error_type_string[] = {
static int available_error_type_show(struct seq_file *m, void *v)
{
- int rc;
- u32 error_type = 0;
- rc = einj_get_available_error_type(&error_type);
- if (rc)
- return rc;
for (int pos = 0; pos < ARRAY_SIZE(einj_error_type_string); pos++)
- if (error_type & einj_error_type_string[pos].mask)
+ if (available_error_type & einj_error_type_string[pos].mask)
seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask,
einj_error_type_string[pos].str);
@@ -678,8 +675,7 @@ bool einj_is_cxl_error_type(u64 type)
int einj_validate_error_type(u64 type)
{
- u32 tval, vendor, available_error_type = 0;
- int rc;
+ u32 tval, vendor;
/* Only low 32 bits for error type are valid */
if (type & GENMASK_ULL(63, 32))
@@ -695,13 +691,9 @@ int einj_validate_error_type(u64 type)
/* Only one error type can be specified */
if (tval & (tval - 1))
return -EINVAL;
- if (!vendor) {
- rc = einj_get_available_error_type(&available_error_type);
- if (rc)
- return rc;
+ if (!vendor)
if (!(type & available_error_type))
return -EINVAL;
- }
return 0;
}
@@ -749,17 +741,12 @@ static int einj_check_table(struct acpi_table_einj *einj_tab)
return 0;
}
-static int __init einj_probe(struct platform_device *pdev)
+static int __init einj_probe(struct faux_device *fdev)
{
int rc;
acpi_status status;
struct apei_exec_context ctx;
- if (acpi_disabled) {
- pr_debug("ACPI disabled.\n");
- return -ENODEV;
- }
-
status = acpi_get_table(ACPI_SIG_EINJ, 0,
(struct acpi_table_header **)&einj_tab);
if (status == AE_NOT_FOUND) {
@@ -777,6 +764,10 @@ static int __init einj_probe(struct platform_device *pdev)
goto err_put_table;
}
+ rc = einj_get_available_error_type(&available_error_type);
+ if (rc)
+ return rc;
+
rc = -ENOMEM;
einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir());
@@ -851,7 +842,7 @@ err_put_table:
return rc;
}
-static void __exit einj_remove(struct platform_device *pdev)
+static void __exit einj_remove(struct faux_device *fdev)
{
struct apei_exec_context ctx;
@@ -872,34 +863,30 @@ static void __exit einj_remove(struct platform_device *pdev)
acpi_put_table((struct acpi_table_header *)einj_tab);
}
-static struct platform_device *einj_dev;
+static struct faux_device *einj_dev;
/*
* einj_remove() lives in .exit.text. For drivers registered via
* platform_driver_probe() this is ok because they cannot get unbound at
* runtime. So mark the driver struct with __refdata to prevent modpost
* triggering a section mismatch warning.
*/
-static struct platform_driver einj_driver __refdata = {
+static struct faux_device_ops einj_device_ops __refdata = {
+ .probe = einj_probe,
.remove = __exit_p(einj_remove),
- .driver = {
- .name = "acpi-einj",
- },
};
static int __init einj_init(void)
{
- struct platform_device_info einj_dev_info = {
- .name = "acpi-einj",
- .id = -1,
- };
- int rc;
+ if (acpi_disabled) {
+ pr_debug("ACPI disabled.\n");
+ return -ENODEV;
+ }
- einj_dev = platform_device_register_full(&einj_dev_info);
- if (IS_ERR(einj_dev))
- return PTR_ERR(einj_dev);
+ einj_dev = faux_device_create("acpi-einj", NULL, &einj_device_ops);
+ if (!einj_dev)
+ return -ENODEV;
- rc = platform_driver_probe(&einj_driver, einj_probe);
- einj_initialized = rc == 0;
+ einj_initialized = true;
return 0;
}
@@ -907,9 +894,8 @@ static int __init einj_init(void)
static void __exit einj_exit(void)
{
if (einj_initialized)
- platform_driver_unregister(&einj_driver);
+ faux_device_destroy(einj_dev);
- platform_device_unregister(einj_dev);
}
module_init(einj_init);
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 6760330a8af5..45593612a4db 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -243,10 +243,23 @@ static int acpi_battery_get_property(struct power_supply *psy,
break;
case POWER_SUPPLY_PROP_CURRENT_NOW:
case POWER_SUPPLY_PROP_POWER_NOW:
- if (battery->rate_now == ACPI_BATTERY_VALUE_UNKNOWN)
+ if (battery->rate_now == ACPI_BATTERY_VALUE_UNKNOWN) {
ret = -ENODEV;
- else
- val->intval = battery->rate_now * 1000;
+ break;
+ }
+
+ val->intval = battery->rate_now * 1000;
+ /*
+ * When discharging, the current should be reported as a
+ * negative number as per the power supply class interface
+ * definition.
+ */
+ if (psp == POWER_SUPPLY_PROP_CURRENT_NOW &&
+ (battery->state & ACPI_BATTERY_STATE_DISCHARGING) &&
+ acpi_battery_handle_discharging(battery)
+ == POWER_SUPPLY_STATUS_DISCHARGING)
+ val->intval = -val->intval;
+
break;
case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
@@ -279,8 +292,8 @@ static int acpi_battery_get_property(struct power_supply *psy,
full_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
ret = -ENODEV;
else
- val->intval = battery->capacity_now * 100/
- full_capacity;
+ val->intval = DIV_ROUND_CLOSEST_ULL(battery->capacity_now * 100ULL,
+ full_capacity);
break;
case POWER_SUPPLY_PROP_CAPACITY_LEVEL:
if (battery->state & ACPI_BATTERY_STATE_CRITICAL)
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 058910af82bc..c2ab2783303f 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1446,8 +1446,10 @@ static int __init acpi_init(void)
}
acpi_kobj = kobject_create_and_add("acpi", firmware_kobj);
- if (!acpi_kobj)
- pr_debug("%s: kset create error\n", __func__);
+ if (!acpi_kobj) {
+ pr_err("Failed to register kobject\n");
+ return -ENOMEM;
+ }
init_prmt();
acpi_init_pcc();
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index f193e713825a..a9ae2fd62863 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -47,7 +47,6 @@
struct cppc_pcc_data {
struct pcc_mbox_chan *pcc_channel;
- void __iomem *pcc_comm_addr;
bool pcc_channel_acquired;
unsigned int deadline_us;
unsigned int pcc_mpar, pcc_mrtt, pcc_nominal;
@@ -95,7 +94,7 @@ static DEFINE_PER_CPU(int, cpu_pcc_subspace_idx);
static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
/* pcc mapped address + header size + offset within PCC subspace */
-#define GET_PCC_VADDR(offs, pcc_ss_id) (pcc_data[pcc_ss_id]->pcc_comm_addr + \
+#define GET_PCC_VADDR(offs, pcc_ss_id) (pcc_data[pcc_ss_id]->pcc_channel->shmem + \
0x8 + (offs))
/* Check if a CPC register is in PCC */
@@ -129,6 +128,20 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
#define CPC_SUPPORTED(cpc) ((cpc)->type == ACPI_TYPE_INTEGER ? \
!!(cpc)->cpc_entry.int_value : \
!IS_NULL_REG(&(cpc)->cpc_entry.reg))
+
+/*
+ * Each bit indicates the optionality of the register in per-cpu
+ * cpc_regs[] with the corresponding index. 0 means mandatory and 1
+ * means optional.
+ */
+#define REG_OPTIONAL (0x1FC7D0)
+
+/*
+ * Use the index of the register in per-cpu cpc_regs[] to check if
+ * it's an optional one.
+ */
+#define IS_OPTIONAL_CPC_REG(reg_idx) (REG_OPTIONAL & (1U << (reg_idx)))
+
/*
* Arbitrary Retries in case the remote processor is slow to respond
* to PCC commands. Keeping it high enough to cover emulators where
@@ -223,7 +236,7 @@ static int check_pcc_chan(int pcc_ss_id, bool chk_err_bit)
int ret, status;
struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
struct acpi_pcct_shared_memory __iomem *generic_comm_base =
- pcc_ss_data->pcc_comm_addr;
+ pcc_ss_data->pcc_channel->shmem;
if (!pcc_ss_data->platform_owns_pcc)
return 0;
@@ -258,7 +271,7 @@ static int send_pcc_cmd(int pcc_ss_id, u16 cmd)
int ret = -EIO, i;
struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
struct acpi_pcct_shared_memory __iomem *generic_comm_base =
- pcc_ss_data->pcc_comm_addr;
+ pcc_ss_data->pcc_channel->shmem;
unsigned int time_delta;
/*
@@ -571,15 +584,6 @@ static int register_pcc_channel(int pcc_ss_idx)
pcc_data[pcc_ss_idx]->pcc_mpar = pcc_chan->max_access_rate;
pcc_data[pcc_ss_idx]->pcc_nominal = pcc_chan->latency;
- pcc_data[pcc_ss_idx]->pcc_comm_addr =
- acpi_os_ioremap(pcc_chan->shmem_base_addr,
- pcc_chan->shmem_size);
- if (!pcc_data[pcc_ss_idx]->pcc_comm_addr) {
- pr_err("Failed to ioremap PCC comm region mem for %d\n",
- pcc_ss_idx);
- return -ENOMEM;
- }
-
/* Set flag so that we don't come here for each CPU. */
pcc_data[pcc_ss_idx]->pcc_channel_acquired = true;
}
@@ -1175,43 +1179,106 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
return ret_val;
}
-static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf)
+static int cppc_get_reg_val_in_pcc(int cpu, struct cpc_register_resource *reg, u64 *val)
{
- struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
+ int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
+ struct cppc_pcc_data *pcc_ss_data = NULL;
+ int ret;
+
+ if (pcc_ss_id < 0) {
+ pr_debug("Invalid pcc_ss_id\n");
+ return -ENODEV;
+ }
+
+ pcc_ss_data = pcc_data[pcc_ss_id];
+
+ down_write(&pcc_ss_data->pcc_lock);
+
+ if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0)
+ ret = cpc_read(cpu, reg, val);
+ else
+ ret = -EIO;
+
+ up_write(&pcc_ss_data->pcc_lock);
+
+ return ret;
+}
+
+static int cppc_get_reg_val(int cpu, enum cppc_regs reg_idx, u64 *val)
+{
+ struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
struct cpc_register_resource *reg;
+ if (val == NULL)
+ return -EINVAL;
+
if (!cpc_desc) {
- pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
+ pr_debug("No CPC descriptor for CPU:%d\n", cpu);
return -ENODEV;
}
reg = &cpc_desc->cpc_regs[reg_idx];
- if (CPC_IN_PCC(reg)) {
- int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
- struct cppc_pcc_data *pcc_ss_data = NULL;
- int ret = 0;
+ if ((reg->type == ACPI_TYPE_INTEGER && IS_OPTIONAL_CPC_REG(reg_idx) &&
+ !reg->cpc_entry.int_value) || (reg->type != ACPI_TYPE_INTEGER &&
+ IS_NULL_REG(&reg->cpc_entry.reg))) {
+ pr_debug("CPC register is not supported\n");
+ return -EOPNOTSUPP;
+ }
- if (pcc_ss_id < 0)
- return -EIO;
+ if (CPC_IN_PCC(reg))
+ return cppc_get_reg_val_in_pcc(cpu, reg, val);
- pcc_ss_data = pcc_data[pcc_ss_id];
-
- down_write(&pcc_ss_data->pcc_lock);
+ return cpc_read(cpu, reg, val);
+}
- if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0)
- cpc_read(cpunum, reg, perf);
- else
- ret = -EIO;
+static int cppc_set_reg_val_in_pcc(int cpu, struct cpc_register_resource *reg, u64 val)
+{
+ int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
+ struct cppc_pcc_data *pcc_ss_data = NULL;
+ int ret;
- up_write(&pcc_ss_data->pcc_lock);
+ if (pcc_ss_id < 0) {
+ pr_debug("Invalid pcc_ss_id\n");
+ return -ENODEV;
+ }
+ ret = cpc_write(cpu, reg, val);
+ if (ret)
return ret;
+
+ pcc_ss_data = pcc_data[pcc_ss_id];
+
+ down_write(&pcc_ss_data->pcc_lock);
+ /* after writing CPC, transfer the ownership of PCC to platform */
+ ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE);
+ up_write(&pcc_ss_data->pcc_lock);
+
+ return ret;
+}
+
+static int cppc_set_reg_val(int cpu, enum cppc_regs reg_idx, u64 val)
+{
+ struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
+ struct cpc_register_resource *reg;
+
+ if (!cpc_desc) {
+ pr_debug("No CPC descriptor for CPU:%d\n", cpu);
+ return -ENODEV;
}
- cpc_read(cpunum, reg, perf);
+ reg = &cpc_desc->cpc_regs[reg_idx];
- return 0;
+ /* if a register is writeable, it must be a buffer and not null */
+ if ((reg->type != ACPI_TYPE_BUFFER) || IS_NULL_REG(&reg->cpc_entry.reg)) {
+ pr_debug("CPC register is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (CPC_IN_PCC(reg))
+ return cppc_set_reg_val_in_pcc(cpu, reg, val);
+
+ return cpc_write(cpu, reg, val);
}
/**
@@ -1223,7 +1290,7 @@ static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf)
*/
int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
{
- return cppc_get_perf(cpunum, DESIRED_PERF, desired_perf);
+ return cppc_get_reg_val(cpunum, DESIRED_PERF, desired_perf);
}
EXPORT_SYMBOL_GPL(cppc_get_desired_perf);
@@ -1236,7 +1303,7 @@ EXPORT_SYMBOL_GPL(cppc_get_desired_perf);
*/
int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
{
- return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
+ return cppc_get_reg_val(cpunum, NOMINAL_PERF, nominal_perf);
}
/**
@@ -1248,7 +1315,7 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
*/
int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
{
- return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf);
+ return cppc_get_reg_val(cpunum, HIGHEST_PERF, highest_perf);
}
EXPORT_SYMBOL_GPL(cppc_get_highest_perf);
@@ -1261,7 +1328,7 @@ EXPORT_SYMBOL_GPL(cppc_get_highest_perf);
*/
int cppc_get_epp_perf(int cpunum, u64 *epp_perf)
{
- return cppc_get_perf(cpunum, ENERGY_PERF, epp_perf);
+ return cppc_get_reg_val(cpunum, ENERGY_PERF, epp_perf);
}
EXPORT_SYMBOL_GPL(cppc_get_epp_perf);
@@ -1535,53 +1602,110 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable)
EXPORT_SYMBOL_GPL(cppc_set_epp_perf);
/**
- * cppc_get_auto_sel_caps - Read autonomous selection register.
- * @cpunum : CPU from which to read register.
- * @perf_caps : struct where autonomous selection register value is updated.
+ * cppc_set_epp() - Write the EPP register.
+ * @cpu: CPU on which to write register.
+ * @epp_val: Value to write to the EPP register.
*/
-int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps)
+int cppc_set_epp(int cpu, u64 epp_val)
{
- struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
- struct cpc_register_resource *auto_sel_reg;
- u64 auto_sel;
+ if (epp_val > CPPC_ENERGY_PERF_MAX)
+ return -EINVAL;
- if (!cpc_desc) {
- pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
- return -ENODEV;
- }
+ return cppc_set_reg_val(cpu, ENERGY_PERF, epp_val);
+}
+EXPORT_SYMBOL_GPL(cppc_set_epp);
- auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE];
+/**
+ * cppc_get_auto_act_window() - Read autonomous activity window register.
+ * @cpu: CPU from which to read register.
+ * @auto_act_window: Return address.
+ *
+ * According to ACPI 6.5, s8.4.6.1.6, the value read from the autonomous
+ * activity window register consists of two parts: a 7 bits value indicate
+ * significand and a 3 bits value indicate exponent.
+ */
+int cppc_get_auto_act_window(int cpu, u64 *auto_act_window)
+{
+ unsigned int exp;
+ u64 val, sig;
+ int ret;
- if (!CPC_SUPPORTED(auto_sel_reg))
- pr_warn_once("Autonomous mode is not unsupported!\n");
+ if (auto_act_window == NULL)
+ return -EINVAL;
- if (CPC_IN_PCC(auto_sel_reg)) {
- int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
- struct cppc_pcc_data *pcc_ss_data = NULL;
- int ret = 0;
+ ret = cppc_get_reg_val(cpu, AUTO_ACT_WINDOW, &val);
+ if (ret)
+ return ret;
- if (pcc_ss_id < 0)
- return -ENODEV;
+ sig = val & CPPC_AUTO_ACT_WINDOW_MAX_SIG;
+ exp = (val >> CPPC_AUTO_ACT_WINDOW_SIG_BIT_SIZE) & CPPC_AUTO_ACT_WINDOW_MAX_EXP;
+ *auto_act_window = sig * int_pow(10, exp);
- pcc_ss_data = pcc_data[pcc_ss_id];
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cppc_get_auto_act_window);
- down_write(&pcc_ss_data->pcc_lock);
+/**
+ * cppc_set_auto_act_window() - Write autonomous activity window register.
+ * @cpu: CPU on which to write register.
+ * @auto_act_window: usec value to write to the autonomous activity window register.
+ *
+ * According to ACPI 6.5, s8.4.6.1.6, the value to write to the autonomous
+ * activity window register consists of two parts: a 7 bits value indicate
+ * significand and a 3 bits value indicate exponent.
+ */
+int cppc_set_auto_act_window(int cpu, u64 auto_act_window)
+{
+ /* The max value to store is 1270000000 */
+ u64 max_val = CPPC_AUTO_ACT_WINDOW_MAX_SIG * int_pow(10, CPPC_AUTO_ACT_WINDOW_MAX_EXP);
+ int exp = 0;
+ u64 val;
- if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0) {
- cpc_read(cpunum, auto_sel_reg, &auto_sel);
- perf_caps->auto_sel = (bool)auto_sel;
- } else {
- ret = -EIO;
- }
+ if (auto_act_window > max_val)
+ return -EINVAL;
- up_write(&pcc_ss_data->pcc_lock);
+ /*
+ * The max significand is 127, when auto_act_window is larger than
+ * 129, discard the precision of the last digit and increase the
+ * exponent by 1.
+ */
+ while (auto_act_window > CPPC_AUTO_ACT_WINDOW_SIG_CARRY_THRESH) {
+ auto_act_window /= 10;
+ exp += 1;
+ }
+
+ /* For 128 and 129, cut it to 127. */
+ if (auto_act_window > CPPC_AUTO_ACT_WINDOW_MAX_SIG)
+ auto_act_window = CPPC_AUTO_ACT_WINDOW_MAX_SIG;
+
+ val = (exp << CPPC_AUTO_ACT_WINDOW_SIG_BIT_SIZE) + auto_act_window;
+ return cppc_set_reg_val(cpu, AUTO_ACT_WINDOW, val);
+}
+EXPORT_SYMBOL_GPL(cppc_set_auto_act_window);
+
+/**
+ * cppc_get_auto_sel() - Read autonomous selection register.
+ * @cpu: CPU from which to read register.
+ * @enable: Return address.
+ */
+int cppc_get_auto_sel(int cpu, bool *enable)
+{
+ u64 auto_sel;
+ int ret;
+
+ if (enable == NULL)
+ return -EINVAL;
+
+ ret = cppc_get_reg_val(cpu, AUTO_SEL_ENABLE, &auto_sel);
+ if (ret)
return ret;
- }
+
+ *enable = (bool)auto_sel;
return 0;
}
-EXPORT_SYMBOL_GPL(cppc_get_auto_sel_caps);
+EXPORT_SYMBOL_GPL(cppc_get_auto_sel);
/**
* cppc_set_auto_sel - Write autonomous selection register.
@@ -1590,43 +1714,7 @@ EXPORT_SYMBOL_GPL(cppc_get_auto_sel_caps);
*/
int cppc_set_auto_sel(int cpu, bool enable)
{
- int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
- struct cpc_register_resource *auto_sel_reg;
- struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
- struct cppc_pcc_data *pcc_ss_data = NULL;
- int ret = -EINVAL;
-
- if (!cpc_desc) {
- pr_debug("No CPC descriptor for CPU:%d\n", cpu);
- return -ENODEV;
- }
-
- auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE];
-
- if (CPC_IN_PCC(auto_sel_reg)) {
- if (pcc_ss_id < 0) {
- pr_debug("Invalid pcc_ss_id\n");
- return -ENODEV;
- }
-
- if (CPC_SUPPORTED(auto_sel_reg)) {
- ret = cpc_write(cpu, auto_sel_reg, enable);
- if (ret)
- return ret;
- }
-
- pcc_ss_data = pcc_data[pcc_ss_id];
-
- down_write(&pcc_ss_data->pcc_lock);
- /* after writing CPC, transfer the ownership of PCC to platform */
- ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE);
- up_write(&pcc_ss_data->pcc_lock);
- } else {
- ret = -ENOTSUPP;
- pr_debug("_CPC in PCC is not supported\n");
- }
-
- return ret;
+ return cppc_set_reg_val(cpu, AUTO_SEL_ENABLE, enable);
}
EXPORT_SYMBOL_GPL(cppc_set_auto_sel);
@@ -1640,38 +1728,7 @@ EXPORT_SYMBOL_GPL(cppc_set_auto_sel);
*/
int cppc_set_enable(int cpu, bool enable)
{
- int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
- struct cpc_register_resource *enable_reg;
- struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
- struct cppc_pcc_data *pcc_ss_data = NULL;
- int ret = -EINVAL;
-
- if (!cpc_desc) {
- pr_debug("No CPC descriptor for CPU:%d\n", cpu);
- return -EINVAL;
- }
-
- enable_reg = &cpc_desc->cpc_regs[ENABLE];
-
- if (CPC_IN_PCC(enable_reg)) {
-
- if (pcc_ss_id < 0)
- return -EIO;
-
- ret = cpc_write(cpu, enable_reg, enable);
- if (ret)
- return ret;
-
- pcc_ss_data = pcc_data[pcc_ss_id];
-
- down_write(&pcc_ss_data->pcc_lock);
- /* after writing CPC, transfer the ownership of PCC to platfrom */
- ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE);
- up_write(&pcc_ss_data->pcc_lock);
- return ret;
- }
-
- return cpc_write(cpu, enable_reg, enable);
+ return cppc_set_reg_val(cpu, ENABLE, enable);
}
EXPORT_SYMBOL_GPL(cppc_set_enable);
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 3c5f34892734..6f4203716b53 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -2329,6 +2329,12 @@ static const struct dmi_system_id acpi_ec_no_wakeup[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "83Q3"),
}
},
+ {
+ // TUXEDO InfinityBook Pro AMD Gen9
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "GXxHRXx"),
+ },
+ },
{ },
};
diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c
index df9328c850bd..f2c943b934be 100644
--- a/drivers/acpi/osi.c
+++ b/drivers/acpi/osi.c
@@ -42,7 +42,6 @@ static struct acpi_osi_entry
osi_setup_entries[OSI_STRING_ENTRIES_MAX] __initdata = {
{"Module Device", true},
{"Processor Device", true},
- {"3.0 _SCP Extensions", true},
{"Processor Aggregator Device", true},
};
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d0b6a024daae..74ade4160314 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -858,7 +858,7 @@ next:
}
}
-static void acpi_pci_root_remap_iospace(struct fwnode_handle *fwnode,
+static void acpi_pci_root_remap_iospace(const struct fwnode_handle *fwnode,
struct resource_entry *entry)
{
#ifdef PCI_IOBASE
diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c
index ffbfd32f4cf1..b43f4459a4f6 100644
--- a/drivers/acpi/platform_profile.c
+++ b/drivers/acpi/platform_profile.c
@@ -688,6 +688,9 @@ static int __init platform_profile_init(void)
{
int err;
+ if (acpi_disabled)
+ return -EOPNOTSUPP;
+
err = class_register(&platform_profile_class);
if (err)
return err;
diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index f73ce6e13065..54676e3d82dd 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -231,16 +231,18 @@ static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr,
sizeof(struct acpi_table_pptt));
proc_sz = sizeof(struct acpi_pptt_processor);
- while ((unsigned long)entry + proc_sz < table_end) {
+ /* ignore subtable types that are smaller than a processor node */
+ while ((unsigned long)entry + proc_sz <= table_end) {
cpu_node = (struct acpi_pptt_processor *)entry;
+
if (entry->type == ACPI_PPTT_TYPE_PROCESSOR &&
cpu_node->parent == node_entry)
return 0;
if (entry->length == 0)
return 0;
+
entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
entry->length);
-
}
return 1;
}
@@ -273,15 +275,18 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he
proc_sz = sizeof(struct acpi_pptt_processor);
/* find the processor structure associated with this cpuid */
- while ((unsigned long)entry + proc_sz < table_end) {
+ while ((unsigned long)entry + proc_sz <= table_end) {
cpu_node = (struct acpi_pptt_processor *)entry;
if (entry->length == 0) {
pr_warn("Invalid zero length subtable\n");
break;
}
+ /* entry->length may not equal proc_sz, revalidate the processor structure length */
if (entry->type == ACPI_PPTT_TYPE_PROCESSOR &&
acpi_cpu_id == cpu_node->acpi_processor_id &&
+ (unsigned long)entry + entry->length <= table_end &&
+ entry->length == proc_sz + cpu_node->number_of_priv_resources * sizeof(u32) &&
acpi_pptt_leaf_node(table_hdr, cpu_node)) {
return (struct acpi_pptt_processor *)entry;
}
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index b181f7fc2090..e2febca2ec13 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -461,10 +461,8 @@ static int acpi_processor_power_verify(struct acpi_processor *pr)
static int acpi_processor_get_cstate_info(struct acpi_processor *pr)
{
- unsigned int i;
int result;
-
/* NOTE: the idle thread may not be running while calling
* this function */
@@ -481,17 +479,7 @@ static int acpi_processor_get_cstate_info(struct acpi_processor *pr)
acpi_processor_get_power_info_default(pr);
pr->power.count = acpi_processor_power_verify(pr);
-
- /*
- * if one state of type C2 or C3 is available, mark this
- * CPU as being "idle manageable"
- */
- for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
- if (pr->power.states[i].valid) {
- pr->power.count = i;
- pr->flags.power = 1;
- }
- }
+ pr->flags.power = 1;
return 0;
}
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 53996f1a2d80..64b8d1e19594 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -20,6 +20,7 @@
#include <acpi/processor.h>
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
+#include <asm/msr.h>
#endif
#define ACPI_PROCESSOR_FILE_PERFORMANCE "performance"
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index 00d045e5f524..d1541a386fbc 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -18,9 +18,12 @@
#include <linux/sched.h>
#include <linux/cpufreq.h>
#include <linux/acpi.h>
+#include <linux/uaccess.h>
#include <acpi/processor.h>
#include <asm/io.h>
-#include <linux/uaccess.h>
+#ifdef CONFIG_X86
+#include <asm/msr.h>
+#endif
/* ignore_tpc:
* 0 -> acpi processor driver doesn't ignore _TPC values
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 14c7bac4100b..7d59c6c9185f 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -534,7 +534,7 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = {
*/
static const struct dmi_system_id irq1_edge_low_force_override[] = {
{
- /* MECHREV Jiaolong17KS Series GM7XG0M */
+ /* MECHREVO Jiaolong17KS Series GM7XG0M */
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "GM7XG0M"),
},
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 2295abbecd14..fa9bb8c8ce95 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -396,7 +396,7 @@ static u8 __init acpi_table_checksum(u8 *buffer, u32 length)
}
/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */
-static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = {
+static const char table_sigs[][ACPI_NAMESEG_SIZE] __nonstring_array __initconst = {
ACPI_SIG_BERT, ACPI_SIG_BGRT, ACPI_SIG_CPEP, ACPI_SIG_ECDT,
ACPI_SIG_EINJ, ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT,
ACPI_SIG_MSCT, ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT,
@@ -719,8 +719,12 @@ int __init acpi_locate_initial_tables(void)
}
status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
- if (ACPI_FAILURE(status))
+ if (ACPI_FAILURE(status)) {
+ const char *msg = acpi_format_exception(status);
+
+ pr_warn("Failed to initialize tables, status=0x%x (%s)", status, msg);
return -EINVAL;
+ }
return 0;
}
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 0c874186f8ae..5c2defe55898 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -803,6 +803,12 @@ static int acpi_thermal_add(struct acpi_device *device)
acpi_thermal_aml_dependency_fix(tz);
+ /*
+ * Set the cooling mode [_SCP] to active cooling. This needs to happen before
+ * we retrieve the trip point values.
+ */
+ acpi_execute_simple_method(tz->device->handle, "_SCP", ACPI_THERMAL_MODE_ACTIVE);
+
/* Get trip points [_ACi, _PSV, etc.] (required). */
acpi_thermal_get_trip_points(tz);
@@ -814,10 +820,6 @@ static int acpi_thermal_add(struct acpi_device *device)
if (result)
goto free_memory;
- /* Set the cooling mode [_SCP] to active cooling. */
- acpi_execute_simple_method(tz->device->handle, "_SCP",
- ACPI_THERMAL_MODE_ACTIVE);
-
/* Determine the default polling frequency [_TZP]. */
if (tzp)
tz->polling_frequency = tzp;
diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c
index 2aa69a2fba73..c13a20365c2c 100644
--- a/drivers/acpi/viot.c
+++ b/drivers/acpi/viot.c
@@ -19,11 +19,11 @@
#define pr_fmt(fmt) "ACPI: VIOT: " fmt
#include <linux/acpi_viot.h>
-#include <linux/fwnode.h>
#include <linux/iommu.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
struct viot_iommu {
/* Node offset within the table */
diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
index 94c6446604fc..98da8c4eea59 100644
--- a/drivers/android/binderfs.c
+++ b/drivers/android/binderfs.c
@@ -187,7 +187,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode,
inode_lock(d_inode(root));
/* look it up */
- dentry = lookup_one_len(name, root, name_len);
+ dentry = lookup_noperm(&QSTR(name), root);
if (IS_ERR(dentry)) {
inode_unlock(d_inode(root));
ret = PTR_ERR(dentry);
@@ -487,7 +487,7 @@ static struct dentry *binderfs_create_dentry(struct dentry *parent,
{
struct dentry *dentry;
- dentry = lookup_one_len(name, parent, strlen(name));
+ dentry = lookup_noperm(&QSTR(name), parent);
if (IS_ERR(dentry))
return dentry;
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index af0029d30dbe..1037169abb45 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -154,14 +154,6 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
per_cpu(arch_freq_scale, i) = scale;
}
-DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
-EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale);
-
-void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
-{
- per_cpu(cpu_scale, cpu) = capacity;
-}
-
DEFINE_PER_CPU(unsigned long, hw_pressure);
/**
@@ -207,53 +199,9 @@ void topology_update_hw_pressure(const struct cpumask *cpus,
}
EXPORT_SYMBOL_GPL(topology_update_hw_pressure);
-static ssize_t cpu_capacity_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
-
- return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
-}
-
static void update_topology_flags_workfn(struct work_struct *work);
static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
-static DEVICE_ATTR_RO(cpu_capacity);
-
-static int cpu_capacity_sysctl_add(unsigned int cpu)
-{
- struct device *cpu_dev = get_cpu_device(cpu);
-
- if (!cpu_dev)
- return -ENOENT;
-
- device_create_file(cpu_dev, &dev_attr_cpu_capacity);
-
- return 0;
-}
-
-static int cpu_capacity_sysctl_remove(unsigned int cpu)
-{
- struct device *cpu_dev = get_cpu_device(cpu);
-
- if (!cpu_dev)
- return -ENOENT;
-
- device_remove_file(cpu_dev, &dev_attr_cpu_capacity);
-
- return 0;
-}
-
-static int register_cpu_capacity_sysctl(void)
-{
- cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "topology/cpu-capacity",
- cpu_capacity_sysctl_add, cpu_capacity_sysctl_remove);
-
- return 0;
-}
-subsys_initcall(register_cpu_capacity_sysctl);
-
static int update_topology;
int topology_update_cpu_topology(void)
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index a7e511849875..7779ab0ca7ce 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -600,6 +600,8 @@ CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow);
CPU_SHOW_VULN_FALLBACK(gds);
CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling);
CPU_SHOW_VULN_FALLBACK(ghostwrite);
+CPU_SHOW_VULN_FALLBACK(old_microcode);
+CPU_SHOW_VULN_FALLBACK(indirect_target_selection);
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
@@ -616,6 +618,8 @@ static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NU
static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL);
static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL);
+static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL);
+static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@@ -633,6 +637,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_gather_data_sampling.attr,
&dev_attr_reg_file_data_sampling.attr,
&dev_attr_ghostwrite.attr,
+ &dev_attr_old_microcode.attr,
+ &dev_attr_indirect_target_selection.attr,
NULL
};
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index d8a733ea5e1a..7c20517a52c2 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -759,6 +759,17 @@ int __devm_add_action(struct device *dev, void (*action)(void *), void *data, co
}
EXPORT_SYMBOL_GPL(__devm_add_action);
+bool devm_is_action_added(struct device *dev, void (*action)(void *), void *data)
+{
+ struct action_devres devres = {
+ .data = data,
+ .action = action,
+ };
+
+ return devres_find(dev, devm_action_release, devm_action_match, &devres);
+}
+EXPORT_SYMBOL_GPL(devm_is_action_added);
+
/**
* devm_remove_action_nowarn() - removes previously added custom action
* @dev: Device that owns the action
diff --git a/drivers/base/node.c b/drivers/base/node.c
index cd13ef287011..618712071a1e 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -468,7 +468,7 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
nid, 0UL,
- nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
+ nid, 0UL,
nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
nid, K(sreclaimable +
node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 0e60dd650b5e..70db08f3ac6f 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -95,5 +95,6 @@ EXPORT_SYMBOL_GPL(platform_device_msi_init_and_alloc_irqs);
void platform_device_msi_free_irqs_all(struct device *dev)
{
msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN);
+ msi_remove_device_irq_domain(dev, MSI_DEFAULT_DOMAIN);
}
EXPORT_SYMBOL_GPL(platform_device_msi_free_irqs_all);
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index c8b0a9e29ed8..19fd55b8ac77 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -63,6 +63,7 @@ static LIST_HEAD(dpm_noirq_list);
static DEFINE_MUTEX(dpm_list_mtx);
static pm_message_t pm_transition;
+static DEFINE_MUTEX(async_wip_mtx);
static int async_error;
static const char *pm_verb(int event)
@@ -560,7 +561,7 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd)
struct timer_list *timer = &wd->timer;
timer_delete_sync(timer);
- destroy_timer_on_stack(timer);
+ timer_destroy_on_stack(timer);
}
#else
#define DECLARE_DPM_WATCHDOG_ON_STACK(wd)
@@ -597,8 +598,11 @@ static bool is_async(struct device *dev)
&& !pm_trace_is_enabled();
}
-static bool dpm_async_fn(struct device *dev, async_func_t func)
+static bool __dpm_async(struct device *dev, async_func_t func)
{
+ if (dev->power.work_in_progress)
+ return true;
+
if (!is_async(dev))
return false;
@@ -611,14 +615,37 @@ static bool dpm_async_fn(struct device *dev, async_func_t func)
put_device(dev);
+ return false;
+}
+
+static bool dpm_async_fn(struct device *dev, async_func_t func)
+{
+ guard(mutex)(&async_wip_mtx);
+
+ return __dpm_async(dev, func);
+}
+
+static int dpm_async_with_cleanup(struct device *dev, void *fn)
+{
+ guard(mutex)(&async_wip_mtx);
+
+ if (!__dpm_async(dev, fn))
+ dev->power.work_in_progress = false;
+
+ return 0;
+}
+
+static void dpm_async_resume_children(struct device *dev, async_func_t func)
+{
/*
- * async_schedule_dev_nocall() above has returned false, so func() is
- * not running and it is safe to update power.work_in_progress without
- * extra synchronization.
+ * Start processing "async" children of the device unless it's been
+ * started already for them.
+ *
+ * This could have been done for the device's "async" consumers too, but
+ * they either need to wait for their parents or the processing has
+ * already started for them after their parents were processed.
*/
- dev->power.work_in_progress = false;
-
- return false;
+ device_for_each_child(dev, func, dpm_async_with_cleanup);
}
static void dpm_clear_async_state(struct device *dev)
@@ -627,6 +654,13 @@ static void dpm_clear_async_state(struct device *dev)
dev->power.work_in_progress = false;
}
+static bool dpm_root_device(struct device *dev)
+{
+ return !dev->parent;
+}
+
+static void async_resume_noirq(void *data, async_cookie_t cookie);
+
/**
* device_resume_noirq - Execute a "noirq resume" callback for given device.
* @dev: Device to handle.
@@ -710,6 +744,8 @@ Out:
dpm_save_failed_dev(dev_name(dev));
pm_dev_err(dev, state, async ? " async noirq" : " noirq", error);
}
+
+ dpm_async_resume_children(dev, async_resume_noirq);
}
static void async_resume_noirq(void *data, async_cookie_t cookie)
@@ -733,19 +769,20 @@ static void dpm_noirq_resume_devices(pm_message_t state)
mutex_lock(&dpm_list_mtx);
/*
- * Trigger the resume of "async" devices upfront so they don't have to
- * wait for the "non-async" ones they don't depend on.
+ * Start processing "async" root devices upfront so they don't wait for
+ * the "sync" devices they don't depend on.
*/
list_for_each_entry(dev, &dpm_noirq_list, power.entry) {
dpm_clear_async_state(dev);
- dpm_async_fn(dev, async_resume_noirq);
+ if (dpm_root_device(dev))
+ dpm_async_with_cleanup(dev, async_resume_noirq);
}
while (!list_empty(&dpm_noirq_list)) {
dev = to_device(dpm_noirq_list.next);
list_move_tail(&dev->power.entry, &dpm_late_early_list);
- if (!dev->power.work_in_progress) {
+ if (!dpm_async_fn(dev, async_resume_noirq)) {
get_device(dev);
mutex_unlock(&dpm_list_mtx);
@@ -781,6 +818,8 @@ void dpm_resume_noirq(pm_message_t state)
device_wakeup_disarm_wake_irqs();
}
+static void async_resume_early(void *data, async_cookie_t cookie);
+
/**
* device_resume_early - Execute an "early resume" callback for given device.
* @dev: Device to handle.
@@ -848,6 +887,8 @@ Out:
dpm_save_failed_dev(dev_name(dev));
pm_dev_err(dev, state, async ? " async early" : " early", error);
}
+
+ dpm_async_resume_children(dev, async_resume_early);
}
static void async_resume_early(void *data, async_cookie_t cookie)
@@ -875,19 +916,20 @@ void dpm_resume_early(pm_message_t state)
mutex_lock(&dpm_list_mtx);
/*
- * Trigger the resume of "async" devices upfront so they don't have to
- * wait for the "non-async" ones they don't depend on.
+ * Start processing "async" root devices upfront so they don't wait for
+ * the "sync" devices they don't depend on.
*/
list_for_each_entry(dev, &dpm_late_early_list, power.entry) {
dpm_clear_async_state(dev);
- dpm_async_fn(dev, async_resume_early);
+ if (dpm_root_device(dev))
+ dpm_async_with_cleanup(dev, async_resume_early);
}
while (!list_empty(&dpm_late_early_list)) {
dev = to_device(dpm_late_early_list.next);
list_move_tail(&dev->power.entry, &dpm_suspended_list);
- if (!dev->power.work_in_progress) {
+ if (!dpm_async_fn(dev, async_resume_early)) {
get_device(dev);
mutex_unlock(&dpm_list_mtx);
@@ -919,6 +961,8 @@ void dpm_resume_start(pm_message_t state)
}
EXPORT_SYMBOL_GPL(dpm_resume_start);
+static void async_resume(void *data, async_cookie_t cookie);
+
/**
* device_resume - Execute "resume" callbacks for given device.
* @dev: Device to handle.
@@ -1018,6 +1062,8 @@ static void device_resume(struct device *dev, pm_message_t state, bool async)
dpm_save_failed_dev(dev_name(dev));
pm_dev_err(dev, state, async ? " async" : "", error);
}
+
+ dpm_async_resume_children(dev, async_resume);
}
static void async_resume(void *data, async_cookie_t cookie)
@@ -1049,19 +1095,20 @@ void dpm_resume(pm_message_t state)
mutex_lock(&dpm_list_mtx);
/*
- * Trigger the resume of "async" devices upfront so they don't have to
- * wait for the "non-async" ones they don't depend on.
+ * Start processing "async" root devices upfront so they don't wait for
+ * the "sync" devices they don't depend on.
*/
list_for_each_entry(dev, &dpm_suspended_list, power.entry) {
dpm_clear_async_state(dev);
- dpm_async_fn(dev, async_resume);
+ if (dpm_root_device(dev))
+ dpm_async_with_cleanup(dev, async_resume);
}
while (!list_empty(&dpm_suspended_list)) {
dev = to_device(dpm_suspended_list.next);
list_move_tail(&dev->power.entry, &dpm_prepared_list);
- if (!dev->power.work_in_progress) {
+ if (!dpm_async_fn(dev, async_resume)) {
get_device(dev);
mutex_unlock(&dpm_list_mtx);
@@ -1189,6 +1236,41 @@ EXPORT_SYMBOL_GPL(dpm_resume_end);
/*------------------------- Suspend routines -------------------------*/
+static bool dpm_leaf_device(struct device *dev)
+{
+ struct device *child;
+
+ lockdep_assert_held(&dpm_list_mtx);
+
+ child = device_find_any_child(dev);
+ if (child) {
+ put_device(child);
+
+ return false;
+ }
+
+ return true;
+}
+
+static void dpm_async_suspend_parent(struct device *dev, async_func_t func)
+{
+ guard(mutex)(&dpm_list_mtx);
+
+ /*
+ * If the device is suspended asynchronously and the parent's callback
+ * deletes both the device and the parent itself, the parent object may
+ * be freed while this function is running, so avoid that by checking
+ * if the device has been deleted already as the parent cannot be
+ * deleted before it.
+ */
+ if (!device_pm_initialized(dev))
+ return;
+
+ /* Start processing the device's parent if it is "async". */
+ if (dev->parent)
+ dpm_async_with_cleanup(dev->parent, func);
+}
+
/**
* resume_event - Return a "resume" message for given "suspend" sleep state.
* @sleep_state: PM message representing a sleep state.
@@ -1226,6 +1308,8 @@ static void dpm_superior_set_must_resume(struct device *dev)
device_links_read_unlock(idx);
}
+static void async_suspend_noirq(void *data, async_cookie_t cookie);
+
/**
* device_suspend_noirq - Execute a "noirq suspend" callback for given device.
* @dev: Device to handle.
@@ -1304,7 +1388,13 @@ Skip:
Complete:
complete_all(&dev->power.completion);
TRACE_SUSPEND(error);
- return error;
+
+ if (error || async_error)
+ return error;
+
+ dpm_async_suspend_parent(dev, async_suspend_noirq);
+
+ return 0;
}
static void async_suspend_noirq(void *data, async_cookie_t cookie)
@@ -1318,6 +1408,7 @@ static void async_suspend_noirq(void *data, async_cookie_t cookie)
static int dpm_noirq_suspend_devices(pm_message_t state)
{
ktime_t starttime = ktime_get();
+ struct device *dev;
int error = 0;
trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true);
@@ -1327,12 +1418,21 @@ static int dpm_noirq_suspend_devices(pm_message_t state)
mutex_lock(&dpm_list_mtx);
+ /*
+ * Start processing "async" leaf devices upfront so they don't need to
+ * wait for the "sync" devices they don't depend on.
+ */
+ list_for_each_entry_reverse(dev, &dpm_late_early_list, power.entry) {
+ dpm_clear_async_state(dev);
+ if (dpm_leaf_device(dev))
+ dpm_async_with_cleanup(dev, async_suspend_noirq);
+ }
+
while (!list_empty(&dpm_late_early_list)) {
- struct device *dev = to_device(dpm_late_early_list.prev);
+ dev = to_device(dpm_late_early_list.prev);
list_move(&dev->power.entry, &dpm_noirq_list);
- dpm_clear_async_state(dev);
if (dpm_async_fn(dev, async_suspend_noirq))
continue;
@@ -1346,8 +1446,14 @@ static int dpm_noirq_suspend_devices(pm_message_t state)
mutex_lock(&dpm_list_mtx);
- if (error || async_error)
+ if (error || async_error) {
+ /*
+ * Move all devices to the target list to resume them
+ * properly.
+ */
+ list_splice(&dpm_late_early_list, &dpm_noirq_list);
break;
+ }
}
mutex_unlock(&dpm_list_mtx);
@@ -1400,6 +1506,8 @@ static void dpm_propagate_wakeup_to_parent(struct device *dev)
spin_unlock_irq(&parent->power.lock);
}
+static void async_suspend_late(void *data, async_cookie_t cookie);
+
/**
* device_suspend_late - Execute a "late suspend" callback for given device.
* @dev: Device to handle.
@@ -1476,7 +1584,13 @@ Skip:
Complete:
TRACE_SUSPEND(error);
complete_all(&dev->power.completion);
- return error;
+
+ if (error || async_error)
+ return error;
+
+ dpm_async_suspend_parent(dev, async_suspend_late);
+
+ return 0;
}
static void async_suspend_late(void *data, async_cookie_t cookie)
@@ -1494,6 +1608,7 @@ static void async_suspend_late(void *data, async_cookie_t cookie)
int dpm_suspend_late(pm_message_t state)
{
ktime_t starttime = ktime_get();
+ struct device *dev;
int error = 0;
trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true);
@@ -1505,12 +1620,21 @@ int dpm_suspend_late(pm_message_t state)
mutex_lock(&dpm_list_mtx);
+ /*
+ * Start processing "async" leaf devices upfront so they don't need to
+ * wait for the "sync" devices they don't depend on.
+ */
+ list_for_each_entry_reverse(dev, &dpm_suspended_list, power.entry) {
+ dpm_clear_async_state(dev);
+ if (dpm_leaf_device(dev))
+ dpm_async_with_cleanup(dev, async_suspend_late);
+ }
+
while (!list_empty(&dpm_suspended_list)) {
- struct device *dev = to_device(dpm_suspended_list.prev);
+ dev = to_device(dpm_suspended_list.prev);
list_move(&dev->power.entry, &dpm_late_early_list);
- dpm_clear_async_state(dev);
if (dpm_async_fn(dev, async_suspend_late))
continue;
@@ -1524,8 +1648,14 @@ int dpm_suspend_late(pm_message_t state)
mutex_lock(&dpm_list_mtx);
- if (error || async_error)
+ if (error || async_error) {
+ /*
+ * Move all devices to the target list to resume them
+ * properly.
+ */
+ list_splice(&dpm_suspended_list, &dpm_late_early_list);
break;
+ }
}
mutex_unlock(&dpm_list_mtx);
@@ -1614,6 +1744,8 @@ static void dpm_clear_superiors_direct_complete(struct device *dev)
device_links_read_unlock(idx);
}
+static void async_suspend(void *data, async_cookie_t cookie);
+
/**
* device_suspend - Execute "suspend" callbacks for given device.
* @dev: Device to handle.
@@ -1743,7 +1875,13 @@ static int device_suspend(struct device *dev, pm_message_t state, bool async)
complete_all(&dev->power.completion);
TRACE_SUSPEND(error);
- return error;
+
+ if (error || async_error)
+ return error;
+
+ dpm_async_suspend_parent(dev, async_suspend);
+
+ return 0;
}
static void async_suspend(void *data, async_cookie_t cookie)
@@ -1761,6 +1899,7 @@ static void async_suspend(void *data, async_cookie_t cookie)
int dpm_suspend(pm_message_t state)
{
ktime_t starttime = ktime_get();
+ struct device *dev;
int error = 0;
trace_suspend_resume(TPS("dpm_suspend"), state.event, true);
@@ -1774,12 +1913,21 @@ int dpm_suspend(pm_message_t state)
mutex_lock(&dpm_list_mtx);
+ /*
+ * Start processing "async" leaf devices upfront so they don't need to
+ * wait for the "sync" devices they don't depend on.
+ */
+ list_for_each_entry_reverse(dev, &dpm_prepared_list, power.entry) {
+ dpm_clear_async_state(dev);
+ if (dpm_leaf_device(dev))
+ dpm_async_with_cleanup(dev, async_suspend);
+ }
+
while (!list_empty(&dpm_prepared_list)) {
- struct device *dev = to_device(dpm_prepared_list.prev);
+ dev = to_device(dpm_prepared_list.prev);
list_move(&dev->power.entry, &dpm_suspended_list);
- dpm_clear_async_state(dev);
if (dpm_async_fn(dev, async_suspend))
continue;
@@ -1793,8 +1941,14 @@ int dpm_suspend(pm_message_t state)
mutex_lock(&dpm_list_mtx);
- if (error || async_error)
+ if (error || async_error) {
+ /*
+ * Move all devices to the target list to resume them
+ * properly.
+ */
+ list_splice(&dpm_prepared_list, &dpm_suspended_list);
break;
+ }
}
mutex_unlock(&dpm_list_mtx);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 0e127b0329c0..c55a7c70bc1a 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1011,7 +1011,7 @@ static enum hrtimer_restart pm_suspend_timer_fn(struct hrtimer *timer)
* If 'expires' is after the current time, we've been called
* too early.
*/
- if (expires > 0 && expires < ktime_get_mono_fast_ns()) {
+ if (expires > 0 && expires <= ktime_get_mono_fast_ns()) {
dev->power.timer_expires = 0;
rpm_suspend(dev, dev->power.timer_autosuspends ?
(RPM_ASYNC | RPM_AUTO) : RPM_ASYNC);
@@ -1568,6 +1568,32 @@ out:
}
EXPORT_SYMBOL_GPL(pm_runtime_enable);
+static void pm_runtime_set_suspended_action(void *data)
+{
+ pm_runtime_set_suspended(data);
+}
+
+/**
+ * devm_pm_runtime_set_active_enabled - set_active version of devm_pm_runtime_enable.
+ *
+ * @dev: Device to handle.
+ */
+int devm_pm_runtime_set_active_enabled(struct device *dev)
+{
+ int err;
+
+ err = pm_runtime_set_active(dev);
+ if (err)
+ return err;
+
+ err = devm_add_action_or_reset(dev, pm_runtime_set_suspended_action, dev);
+ if (err)
+ return err;
+
+ return devm_pm_runtime_enable(dev);
+}
+EXPORT_SYMBOL_GPL(devm_pm_runtime_set_active_enabled);
+
static void pm_runtime_disable_action(void *data)
{
pm_runtime_dont_use_autosuspend(data);
@@ -1590,6 +1616,24 @@ int devm_pm_runtime_enable(struct device *dev)
}
EXPORT_SYMBOL_GPL(devm_pm_runtime_enable);
+static void pm_runtime_put_noidle_action(void *data)
+{
+ pm_runtime_put_noidle(data);
+}
+
+/**
+ * devm_pm_runtime_get_noresume - devres-enabled version of pm_runtime_get_noresume.
+ *
+ * @dev: Device to handle.
+ */
+int devm_pm_runtime_get_noresume(struct device *dev)
+{
+ pm_runtime_get_noresume(dev);
+
+ return devm_add_action_or_reset(dev, pm_runtime_put_noidle_action, dev);
+}
+EXPORT_SYMBOL_GPL(devm_pm_runtime_get_noresume);
+
/**
* pm_runtime_forbid - Block runtime PM of a device.
* @dev: Device to handle.
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index f84018125b46..13b31a3adc77 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -611,15 +611,9 @@ static DEVICE_ATTR_RW(async);
#endif /* CONFIG_PM_ADVANCED_DEBUG */
static struct attribute *power_attrs[] = {
-#ifdef CONFIG_PM_ADVANCED_DEBUG
-#ifdef CONFIG_PM_SLEEP
+#if defined(CONFIG_PM_ADVANCED_DEBUG) && defined(CONFIG_PM_SLEEP)
&dev_attr_async.attr,
#endif
- &dev_attr_runtime_status.attr,
- &dev_attr_runtime_usage.attr,
- &dev_attr_runtime_active_kids.attr,
- &dev_attr_runtime_enabled.attr,
-#endif /* CONFIG_PM_ADVANCED_DEBUG */
NULL,
};
static const struct attribute_group pm_attr_group = {
@@ -650,13 +644,16 @@ static const struct attribute_group pm_wakeup_attr_group = {
};
static struct attribute *runtime_attrs[] = {
-#ifndef CONFIG_PM_ADVANCED_DEBUG
&dev_attr_runtime_status.attr,
-#endif
&dev_attr_control.attr,
&dev_attr_runtime_suspended_time.attr,
&dev_attr_runtime_active_time.attr,
&dev_attr_autosuspend_delay_ms.attr,
+#ifdef CONFIG_PM_ADVANCED_DEBUG
+ &dev_attr_runtime_usage.attr,
+ &dev_attr_runtime_active_kids.attr,
+ &dev_attr_runtime_enabled.attr,
+#endif
NULL,
};
static const struct attribute_group pm_runtime_attr_group = {
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 63bf914a4d44..7e612977be1b 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -337,7 +337,7 @@ int device_wakeup_enable(struct device *dev)
if (!dev || !dev->power.can_wakeup)
return -EINVAL;
- if (pm_suspend_target_state != PM_SUSPEND_ON)
+ if (pm_sleep_transition_in_progress())
dev_dbg(dev, "Suspicious %s() during system transition!\n", __func__);
ws = wakeup_source_register(dev, dev_name(dev));
diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c
index 6732ed2869f9..3ffd427248e8 100644
--- a/drivers/base/power/wakeup_stats.c
+++ b/drivers/base/power/wakeup_stats.c
@@ -34,6 +34,7 @@ wakeup_attr(active_count);
wakeup_attr(event_count);
wakeup_attr(wakeup_count);
wakeup_attr(expire_count);
+wakeup_attr(relax_count);
static ssize_t active_time_ms_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -119,6 +120,7 @@ static struct attribute *wakeup_source_attrs[] = {
&dev_attr_event_count.attr,
&dev_attr_wakeup_count.attr,
&dev_attr_expire_count.attr,
+ &dev_attr_relax_count.attr,
&dev_attr_active_time_ms.attr,
&dev_attr_total_time_ms.attr,
&dev_attr_max_time_ms.attr,
diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index b1affac70d5d..ffb2ef488298 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -6,8 +6,6 @@
config REGMAP
bool
default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SOUNDWIRE || REGMAP_SOUNDWIRE_MBQ || REGMAP_SCCB || REGMAP_I3C || REGMAP_SPI_AVMM || REGMAP_MDIO || REGMAP_FSI)
- select IRQ_DOMAIN if REGMAP_IRQ
- select MDIO_BUS if REGMAP_MDIO
help
Enable support for the Register Map (regmap) access API.
@@ -58,12 +56,14 @@ config REGMAP_W1
config REGMAP_MDIO
tristate
+ select MDIO_BUS
config REGMAP_MMIO
tristate
config REGMAP_IRQ
bool
+ select IRQ_DOMAIN
config REGMAP_RAM
tristate
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index f7fcf2de1301..c7650fa434ad 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -34,21 +34,10 @@ static int regcache_defaults_cmp(const void *a, const void *b)
return 0;
}
-static void regcache_defaults_swap(void *a, void *b, int size)
-{
- struct reg_default *x = a;
- struct reg_default *y = b;
- struct reg_default tmp;
-
- tmp = *x;
- *x = *y;
- *y = tmp;
-}
-
void regcache_sort_defaults(struct reg_default *defaults, unsigned int ndefaults)
{
sort(defaults, ndefaults, sizeof(*defaults),
- regcache_defaults_cmp, regcache_defaults_swap);
+ regcache_defaults_cmp, NULL);
}
EXPORT_SYMBOL_GPL(regcache_sort_defaults);
diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 6c6869188c31..d1585f073776 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -6,11 +6,13 @@
//
// Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+#include <linux/array_size.h>
#include <linux/device.h>
#include <linux/export.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
+#include <linux/overflow.h>
#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/slab.h>
@@ -33,6 +35,7 @@ struct regmap_irq_chip_data {
void *status_reg_buf;
unsigned int *main_status_buf;
unsigned int *status_buf;
+ unsigned int *prev_status_buf;
unsigned int *mask_buf;
unsigned int *mask_buf_def;
unsigned int *wake_buf;
@@ -193,10 +196,10 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
/* If we've changed our wakeup count propagate it to the parent */
if (d->wake_count < 0)
for (i = d->wake_count; i < 0; i++)
- irq_set_irq_wake(d->irq, 0);
+ disable_irq_wake(d->irq);
else if (d->wake_count > 0)
for (i = 0; i < d->wake_count; i++)
- irq_set_irq_wake(d->irq, 1);
+ enable_irq_wake(d->irq);
d->wake_count = 0;
@@ -332,27 +335,13 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data,
return ret;
}
-static irqreturn_t regmap_irq_thread(int irq, void *d)
+static int read_irq_data(struct regmap_irq_chip_data *data)
{
- struct regmap_irq_chip_data *data = d;
const struct regmap_irq_chip *chip = data->chip;
struct regmap *map = data->map;
int ret, i;
- bool handled = false;
u32 reg;
- if (chip->handle_pre_irq)
- chip->handle_pre_irq(chip->irq_drv_data);
-
- if (chip->runtime_pm) {
- ret = pm_runtime_get_sync(map->dev);
- if (ret < 0) {
- dev_err(map->dev, "IRQ thread failed to resume: %d\n",
- ret);
- goto exit;
- }
- }
-
/*
* Read only registers with active IRQs if the chip has 'main status
* register'. Else read in the statuses, using a single bulk read if
@@ -379,10 +368,8 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
reg = data->get_irq_reg(data, chip->main_status, i);
ret = regmap_read(map, reg, &data->main_status_buf[i]);
if (ret) {
- dev_err(map->dev,
- "Failed to read IRQ status %d\n",
- ret);
- goto exit;
+ dev_err(map->dev, "Failed to read IRQ status %d\n", ret);
+ return ret;
}
}
@@ -398,10 +385,8 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
ret = read_sub_irq_data(data, b);
if (ret != 0) {
- dev_err(map->dev,
- "Failed to read IRQ status %d\n",
- ret);
- goto exit;
+ dev_err(map->dev, "Failed to read IRQ status %d\n", ret);
+ return ret;
}
}
@@ -418,9 +403,8 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
data->status_reg_buf,
chip->num_regs);
if (ret != 0) {
- dev_err(map->dev, "Failed to read IRQ status: %d\n",
- ret);
- goto exit;
+ dev_err(map->dev, "Failed to read IRQ status: %d\n", ret);
+ return ret;
}
for (i = 0; i < data->chip->num_regs; i++) {
@@ -436,7 +420,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
break;
default:
BUG();
- goto exit;
+ return -EIO;
}
}
@@ -447,10 +431,8 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
ret = regmap_read(map, reg, &data->status_buf[i]);
if (ret != 0) {
- dev_err(map->dev,
- "Failed to read IRQ status: %d\n",
- ret);
- goto exit;
+ dev_err(map->dev, "Failed to read IRQ status: %d\n", ret);
+ return ret;
}
}
}
@@ -459,6 +441,42 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
for (i = 0; i < data->chip->num_regs; i++)
data->status_buf[i] = ~data->status_buf[i];
+ return 0;
+}
+
+static irqreturn_t regmap_irq_thread(int irq, void *d)
+{
+ struct regmap_irq_chip_data *data = d;
+ const struct regmap_irq_chip *chip = data->chip;
+ struct regmap *map = data->map;
+ int ret, i;
+ bool handled = false;
+ u32 reg;
+
+ if (chip->handle_pre_irq)
+ chip->handle_pre_irq(chip->irq_drv_data);
+
+ if (chip->runtime_pm) {
+ ret = pm_runtime_get_sync(map->dev);
+ if (ret < 0) {
+ dev_err(map->dev, "IRQ thread failed to resume: %d\n", ret);
+ goto exit;
+ }
+ }
+
+ ret = read_irq_data(data);
+ if (ret < 0)
+ goto exit;
+
+ if (chip->status_is_level) {
+ for (i = 0; i < data->chip->num_regs; i++) {
+ unsigned int val = data->status_buf[i];
+
+ data->status_buf[i] ^= data->prev_status_buf[i];
+ data->prev_status_buf[i] = val;
+ }
+ }
+
/*
* Ignore masked IRQs and ack if we need to; we ack early so
* there is no race between handling and acknowledging the
@@ -705,6 +723,13 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
if (!d->status_buf)
goto err_alloc;
+ if (chip->status_is_level) {
+ d->prev_status_buf = kcalloc(chip->num_regs, sizeof(*d->prev_status_buf),
+ GFP_KERNEL);
+ if (!d->prev_status_buf)
+ goto err_alloc;
+ }
+
d->mask_buf = kcalloc(chip->num_regs, sizeof(*d->mask_buf),
GFP_KERNEL);
if (!d->mask_buf)
@@ -881,6 +906,16 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
}
}
+ /* Store current levels */
+ if (chip->status_is_level) {
+ ret = read_irq_data(d);
+ if (ret < 0)
+ goto err_alloc;
+
+ memcpy(d->prev_status_buf, d->status_buf,
+ array_size(d->chip->num_regs, sizeof(d->prev_status_buf[0])));
+ }
+
ret = regmap_irq_create_domain(fwnode, irq_base, chip, d);
if (ret)
goto err_alloc;
@@ -908,6 +943,7 @@ err_alloc:
kfree(d->mask_buf);
kfree(d->main_status_buf);
kfree(d->status_buf);
+ kfree(d->prev_status_buf);
kfree(d->status_reg_buf);
if (d->config_buf) {
for (i = 0; i < chip->num_config_bases; i++)
@@ -985,6 +1021,7 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d)
kfree(d->main_status_buf);
kfree(d->status_reg_buf);
kfree(d->status_buf);
+ kfree(d->prev_status_buf);
if (d->config_buf) {
for (i = 0; i < d->chip->num_config_bases; i++)
kfree(d->config_buf[i]);
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index b962da263eee..8b42df05feff 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -208,3 +208,55 @@ static int __init topology_sysfs_init(void)
}
device_initcall(topology_sysfs_init);
+
+DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale);
+
+void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
+{
+ per_cpu(cpu_scale, cpu) = capacity;
+}
+
+static ssize_t cpu_capacity_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+
+ return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
+}
+
+static DEVICE_ATTR_RO(cpu_capacity);
+
+static int cpu_capacity_sysctl_add(unsigned int cpu)
+{
+ struct device *cpu_dev = get_cpu_device(cpu);
+
+ if (!cpu_dev)
+ return -ENOENT;
+
+ device_create_file(cpu_dev, &dev_attr_cpu_capacity);
+
+ return 0;
+}
+
+static int cpu_capacity_sysctl_remove(unsigned int cpu)
+{
+ struct device *cpu_dev = get_cpu_device(cpu);
+
+ if (!cpu_dev)
+ return -ENOENT;
+
+ device_remove_file(cpu_dev, &dev_attr_cpu_capacity);
+
+ return 0;
+}
+
+static int register_cpu_capacity_sysctl(void)
+{
+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "topology/cpu-capacity",
+ cpu_capacity_sysctl_add, cpu_capacity_sysctl_remove);
+
+ return 0;
+}
+subsys_initcall(register_cpu_capacity_sysctl);
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index e48b24be45ee..0f70e2374e7f 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -407,4 +407,23 @@ config BLKDEV_UBLK_LEGACY_OPCODES
source "drivers/block/rnbd/Kconfig"
+config BLK_DEV_ZONED_LOOP
+ tristate "Zoned loopback device support"
+ depends on BLK_DEV_ZONED
+ help
+ Saying Y here will allow you to use create a zoned block device using
+ regular files for zones (one file per zones). This is useful to test
+ file systems, device mapper and applications that support zoned block
+ devices. To create a zoned loop device, no user utility is needed, a
+ zoned loop device can be created (or re-started) using a command
+ like:
+
+ echo "add id=0,zone_size_mb=256,capacity_mb=16384,conv_zones=11" > \
+ /dev/zloop-control
+
+ See Documentation/admin-guide/blockdev/zoned_loop.rst for usage
+ details.
+
+ If unsure, say N.
+
endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 1105a2d4fdcb..097707aca725 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -41,5 +41,6 @@ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/
obj-$(CONFIG_BLK_DEV_UBLK) += ublk_drv.o
+obj-$(CONFIG_BLK_DEV_ZONED_LOOP) += zloop.o
swim_mod-y := swim.o swim_asm.o
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 292f127cae0a..b1be6c510372 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -54,32 +54,33 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
/*
* Insert a new page for a given sector, if one does not already exist.
*/
-static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
+static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
+ blk_opf_t opf)
+ __releases(rcu)
+ __acquires(rcu)
{
- pgoff_t idx = sector >> PAGE_SECTORS_SHIFT;
- struct page *page;
- int ret = 0;
-
- page = brd_lookup_page(brd, sector);
- if (page)
- return 0;
+ gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
+ struct page *page, *ret;
+ rcu_read_unlock();
page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
+ rcu_read_lock();
if (!page)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
xa_lock(&brd->brd_pages);
- ret = __xa_insert(&brd->brd_pages, idx, page, gfp);
- if (!ret)
- brd->brd_nr_pages++;
- xa_unlock(&brd->brd_pages);
-
- if (ret < 0) {
+ ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
+ page, gfp);
+ if (ret) {
+ xa_unlock(&brd->brd_pages);
__free_page(page);
- if (ret == -EBUSY)
- ret = 0;
+ if (xa_is_err(ret))
+ return ERR_PTR(xa_err(ret));
+ return ret;
}
- return ret;
+ brd->brd_nr_pages++;
+ xa_unlock(&brd->brd_pages);
+ return page;
}
/*
@@ -100,143 +101,77 @@ static void brd_free_pages(struct brd_device *brd)
}
/*
- * copy_to_brd_setup must be called before copy_to_brd. It may sleep.
+ * Process a single segment. The segment is capped to not cross page boundaries
+ * in both the bio and the brd backing memory.
*/
-static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
- gfp_t gfp)
-{
- unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
- size_t copy;
- int ret;
-
- copy = min_t(size_t, n, PAGE_SIZE - offset);
- ret = brd_insert_page(brd, sector, gfp);
- if (ret)
- return ret;
- if (copy < n) {
- sector += copy >> SECTOR_SHIFT;
- ret = brd_insert_page(brd, sector, gfp);
- }
- return ret;
-}
-
-/*
- * Copy n bytes from src to the brd starting at sector. Does not sleep.
- */
-static void copy_to_brd(struct brd_device *brd, const void *src,
- sector_t sector, size_t n)
+static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
{
+ struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter);
+ sector_t sector = bio->bi_iter.bi_sector;
+ u32 offset = (sector & (PAGE_SECTORS - 1)) << SECTOR_SHIFT;
+ blk_opf_t opf = bio->bi_opf;
struct page *page;
- void *dst;
- unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
- size_t copy;
+ void *kaddr;
- copy = min_t(size_t, n, PAGE_SIZE - offset);
- page = brd_lookup_page(brd, sector);
- BUG_ON(!page);
-
- dst = kmap_atomic(page);
- memcpy(dst + offset, src, copy);
- kunmap_atomic(dst);
-
- if (copy < n) {
- src += copy;
- sector += copy >> SECTOR_SHIFT;
- copy = n - copy;
- page = brd_lookup_page(brd, sector);
- BUG_ON(!page);
-
- dst = kmap_atomic(page);
- memcpy(dst, src, copy);
- kunmap_atomic(dst);
- }
-}
+ bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
-/*
- * Copy n bytes to dst from the brd starting at sector. Does not sleep.
- */
-static void copy_from_brd(void *dst, struct brd_device *brd,
- sector_t sector, size_t n)
-{
- struct page *page;
- void *src;
- unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
- size_t copy;
-
- copy = min_t(size_t, n, PAGE_SIZE - offset);
+ rcu_read_lock();
page = brd_lookup_page(brd, sector);
- if (page) {
- src = kmap_atomic(page);
- memcpy(dst, src + offset, copy);
- kunmap_atomic(src);
- } else
- memset(dst, 0, copy);
-
- if (copy < n) {
- dst += copy;
- sector += copy >> SECTOR_SHIFT;
- copy = n - copy;
- page = brd_lookup_page(brd, sector);
- if (page) {
- src = kmap_atomic(page);
- memcpy(dst, src, copy);
- kunmap_atomic(src);
- } else
- memset(dst, 0, copy);
+ if (!page && op_is_write(opf)) {
+ page = brd_insert_page(brd, sector, opf);
+ if (IS_ERR(page))
+ goto out_error;
}
-}
-
-/*
- * Process a single bvec of a bio.
- */
-static int brd_do_bvec(struct brd_device *brd, struct page *page,
- unsigned int len, unsigned int off, blk_opf_t opf,
- sector_t sector)
-{
- void *mem;
- int err = 0;
+ kaddr = bvec_kmap_local(&bv);
if (op_is_write(opf)) {
- /*
- * Must use NOIO because we don't want to recurse back into the
- * block or filesystem layers from page reclaim.
- */
- gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO;
-
- err = copy_to_brd_setup(brd, sector, len, gfp);
- if (err)
- goto out;
- }
-
- mem = kmap_atomic(page);
- if (!op_is_write(opf)) {
- copy_from_brd(mem + off, brd, sector, len);
- flush_dcache_page(page);
+ memcpy_to_page(page, offset, kaddr, bv.bv_len);
} else {
- flush_dcache_page(page);
- copy_to_brd(brd, mem + off, sector, len);
+ if (page)
+ memcpy_from_page(kaddr, page, offset, bv.bv_len);
+ else
+ memset(kaddr, 0, bv.bv_len);
}
- kunmap_atomic(mem);
+ kunmap_local(kaddr);
+ rcu_read_unlock();
+
+ bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
+ return true;
+
+out_error:
+ rcu_read_unlock();
+ if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
+ bio_wouldblock_error(bio);
+ else
+ bio_io_error(bio);
+ return false;
+}
-out:
- return err;
+static void brd_free_one_page(struct rcu_head *head)
+{
+ struct page *page = container_of(head, struct page, rcu_head);
+
+ __free_page(page);
}
static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
{
- sector_t aligned_sector = (sector + PAGE_SECTORS) & ~PAGE_SECTORS;
+ sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
+ sector_t aligned_end = round_down(
+ sector + (size >> SECTOR_SHIFT), PAGE_SECTORS);
struct page *page;
- size -= (aligned_sector - sector) * SECTOR_SIZE;
+ if (aligned_end <= aligned_sector)
+ return;
+
xa_lock(&brd->brd_pages);
- while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) {
+ while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
if (page) {
- __free_page(page);
+ call_rcu(&page->rcu_head, brd_free_one_page);
brd->brd_nr_pages--;
}
aligned_sector += PAGE_SECTORS;
- size -= PAGE_SIZE;
}
xa_unlock(&brd->brd_pages);
}
@@ -244,36 +179,18 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
static void brd_submit_bio(struct bio *bio)
{
struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
- sector_t sector = bio->bi_iter.bi_sector;
- struct bio_vec bvec;
- struct bvec_iter iter;
if (unlikely(op_is_discard(bio->bi_opf))) {
- brd_do_discard(brd, sector, bio->bi_iter.bi_size);
+ brd_do_discard(brd, bio->bi_iter.bi_sector,
+ bio->bi_iter.bi_size);
bio_endio(bio);
return;
}
- bio_for_each_segment(bvec, bio, iter) {
- unsigned int len = bvec.bv_len;
- int err;
-
- /* Don't support un-aligned buffer */
- WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
- (len & (SECTOR_SIZE - 1)));
-
- err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
- bio->bi_opf, sector);
- if (err) {
- if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) {
- bio_wouldblock_error(bio);
- return;
- }
- bio_io_error(bio);
+ do {
+ if (!brd_rw_bvec(brd, bio))
return;
- }
- sector += len >> SECTOR_SHIFT;
- }
+ } while (bio->bi_iter.bi_size);
bio_endio(bio);
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b8ba7de08753..e2b1f377f585 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -979,9 +979,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
if (!file)
return -EBADF;
- if ((mode & BLK_OPEN_WRITE) && !file->f_op->write_iter)
- return -EINVAL;
-
error = loop_check_backing_file(file);
if (error)
return error;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 65b96c083b3c..d5cc7bd2875c 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -725,7 +725,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
scmd = blk_mq_rq_to_pdu(rq);
if (cgc->buflen) {
- ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
+ ret = blk_rq_map_kern(rq, cgc->buffer, cgc->buflen,
GFP_NOIO);
if (ret)
goto out;
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 2ee6e9bd4e28..2df8941a6b14 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -147,12 +147,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
bio = bio_alloc(file_bdev(sess_dev->bdev_file), 1,
rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL);
- if (bio_add_page(bio, virt_to_page(data), datalen,
- offset_in_page(data)) != datalen) {
- rnbd_srv_err_rl(sess_dev, "Failed to map data to bio\n");
- err = -EINVAL;
- goto bio_put;
- }
+ bio_add_virt_nofail(bio, data, datalen);
bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw));
if (bio_has_data(bio) &&
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index f9032076bc06..6f51072776f1 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -50,6 +50,8 @@
/* private ioctl command mirror */
#define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC)
+#define UBLK_CMD_UPDATE_SIZE _IOC_NR(UBLK_U_CMD_UPDATE_SIZE)
+#define UBLK_CMD_QUIESCE_DEV _IOC_NR(UBLK_U_CMD_QUIESCE_DEV)
#define UBLK_IO_REGISTER_IO_BUF _IOC_NR(UBLK_U_IO_REGISTER_IO_BUF)
#define UBLK_IO_UNREGISTER_IO_BUF _IOC_NR(UBLK_U_IO_UNREGISTER_IO_BUF)
@@ -64,7 +66,10 @@
| UBLK_F_CMD_IOCTL_ENCODE \
| UBLK_F_USER_COPY \
| UBLK_F_ZONED \
- | UBLK_F_USER_RECOVERY_FAIL_IO)
+ | UBLK_F_USER_RECOVERY_FAIL_IO \
+ | UBLK_F_UPDATE_SIZE \
+ | UBLK_F_AUTO_BUF_REG \
+ | UBLK_F_QUIESCE)
#define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
| UBLK_F_USER_RECOVERY_REISSUE \
@@ -77,7 +82,11 @@
UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT)
struct ublk_rq_data {
- struct kref ref;
+ refcount_t ref;
+
+ /* for auto-unregister buffer in case of UBLK_F_AUTO_BUF_REG */
+ u16 buf_index;
+ void *buf_ctx_handle;
};
struct ublk_uring_cmd_pdu {
@@ -99,6 +108,9 @@ struct ublk_uring_cmd_pdu {
* setup in ublk uring_cmd handler
*/
struct ublk_queue *ubq;
+
+ struct ublk_auto_buf_reg buf;
+
u16 tag;
};
@@ -131,6 +143,14 @@ struct ublk_uring_cmd_pdu {
*/
#define UBLK_IO_FLAG_NEED_GET_DATA 0x08
+/*
+ * request buffer is registered automatically, so we have to unregister it
+ * before completing this request.
+ *
+ * io_uring will unregister buffer automatically for us during exiting.
+ */
+#define UBLK_IO_FLAG_AUTO_BUF_REG 0x10
+
/* atomic RW with ubq->cancel_lock */
#define UBLK_IO_FLAG_CANCELED 0x80000000
@@ -140,7 +160,12 @@ struct ublk_io {
unsigned int flags;
int res;
- struct io_uring_cmd *cmd;
+ union {
+ /* valid if UBLK_IO_FLAG_ACTIVE is set */
+ struct io_uring_cmd *cmd;
+ /* valid if UBLK_IO_FLAG_OWNED_BY_SRV is set */
+ struct request *req;
+ };
};
struct ublk_queue {
@@ -198,13 +223,19 @@ struct ublk_params_header {
__u32 types;
};
+static void ublk_io_release(void *priv);
static void ublk_stop_dev_unlocked(struct ublk_device *ub);
static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
const struct ublk_queue *ubq, int tag, size_t offset);
static inline unsigned int ublk_req_build_flags(struct request *req);
-static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
- int tag);
+
+static inline struct ublksrv_io_desc *
+ublk_get_iod(const struct ublk_queue *ubq, unsigned tag)
+{
+ return &ubq->io_cmd_buf[tag];
+}
+
static inline bool ublk_dev_is_zoned(const struct ublk_device *ub)
{
return ub->dev_info.flags & UBLK_F_ZONED;
@@ -356,8 +387,7 @@ static int ublk_report_zones(struct gendisk *disk, sector_t sector,
if (ret)
goto free_req;
- ret = blk_rq_map_kern(disk->queue, req, buffer, buffer_length,
- GFP_KERNEL);
+ ret = blk_rq_map_kern(req, buffer, buffer_length, GFP_KERNEL);
if (ret)
goto erase_desc;
@@ -477,7 +507,6 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq,
#endif
static inline void __ublk_complete_rq(struct request *req);
-static void ublk_complete_rq(struct kref *ref);
static dev_t ublk_chr_devt;
static const struct class ublk_chr_class = {
@@ -609,6 +638,11 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY;
}
+static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
+{
+ return ubq->flags & UBLK_F_AUTO_BUF_REG;
+}
+
static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
{
return ubq->flags & UBLK_F_USER_COPY;
@@ -616,7 +650,8 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
static inline bool ublk_need_map_io(const struct ublk_queue *ubq)
{
- return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq);
+ return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) &&
+ !ublk_support_auto_buf_reg(ubq);
}
static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
@@ -627,8 +662,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
*
* for zero copy, request buffer need to be registered to io_uring
* buffer table, so reference is needed
+ *
+ * For auto buffer register, ublk server still may issue
+ * UBLK_IO_COMMIT_AND_FETCH_REQ before one registered buffer is used up,
+ * so reference is required too.
*/
- return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq);
+ return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq) ||
+ ublk_support_auto_buf_reg(ubq);
}
static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
@@ -637,7 +677,7 @@ static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
if (ublk_need_req_ref(ubq)) {
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
- kref_init(&data->ref);
+ refcount_set(&data->ref, 1);
}
}
@@ -647,7 +687,7 @@ static inline bool ublk_get_req_ref(const struct ublk_queue *ubq,
if (ublk_need_req_ref(ubq)) {
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
- return kref_get_unless_zero(&data->ref);
+ return refcount_inc_not_zero(&data->ref);
}
return true;
@@ -659,7 +699,8 @@ static inline void ublk_put_req_ref(const struct ublk_queue *ubq,
if (ublk_need_req_ref(ubq)) {
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
- kref_put(&data->ref, ublk_complete_rq);
+ if (refcount_dec_and_test(&data->ref))
+ __ublk_complete_rq(req);
} else {
__ublk_complete_rq(req);
}
@@ -695,12 +736,6 @@ static inline bool ublk_rq_has_data(const struct request *rq)
return bio_has_data(rq->bio);
}
-static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
- int tag)
-{
- return &ubq->io_cmd_buf[tag];
-}
-
static inline struct ublksrv_io_desc *
ublk_queue_cmd_buf(struct ublk_device *ub, int q_id)
{
@@ -1117,18 +1152,12 @@ exit:
blk_mq_end_request(req, res);
}
-static void ublk_complete_rq(struct kref *ref)
+static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req,
+ int res, unsigned issue_flags)
{
- struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
- ref);
- struct request *req = blk_mq_rq_from_pdu(data);
+ /* read cmd first because req will overwrite it */
+ struct io_uring_cmd *cmd = io->cmd;
- __ublk_complete_rq(req);
-}
-
-static void ubq_complete_io_cmd(struct ublk_io *io, int res,
- unsigned issue_flags)
-{
/* mark this cmd owned by ublksrv */
io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
@@ -1138,8 +1167,10 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res,
*/
io->flags &= ~UBLK_IO_FLAG_ACTIVE;
+ io->req = req;
+
/* tell ublksrv one io request is coming */
- io_uring_cmd_done(io->cmd, res, 0, issue_flags);
+ io_uring_cmd_done(cmd, res, 0, issue_flags);
}
#define UBLK_REQUEUE_DELAY_MS 3
@@ -1154,16 +1185,91 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
blk_mq_end_request(rq, BLK_STS_IOERR);
}
+static void ublk_auto_buf_reg_fallback(struct request *req)
+{
+ const struct ublk_queue *ubq = req->mq_hctx->driver_data;
+ struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ iod->op_flags |= UBLK_IO_F_NEED_REG_BUF;
+ refcount_set(&data->ref, 1);
+}
+
+static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
+ unsigned int issue_flags)
+{
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd);
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+ int ret;
+
+ ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release,
+ pdu->buf.index, issue_flags);
+ if (ret) {
+ if (pdu->buf.flags & UBLK_AUTO_BUF_REG_FALLBACK) {
+ ublk_auto_buf_reg_fallback(req);
+ return true;
+ }
+ blk_mq_end_request(req, BLK_STS_IOERR);
+ return false;
+ }
+ /* one extra reference is dropped by ublk_io_release */
+ refcount_set(&data->ref, 2);
+
+ data->buf_ctx_handle = io_uring_cmd_ctx_handle(io->cmd);
+ /* store buffer index in request payload */
+ data->buf_index = pdu->buf.index;
+ io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
+ return true;
+}
+
+static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq,
+ struct request *req, struct ublk_io *io,
+ unsigned int issue_flags)
+{
+ if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req))
+ return ublk_auto_buf_reg(req, io, issue_flags);
+
+ ublk_init_req_ref(ubq, req);
+ return true;
+}
+
+static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
+ struct ublk_io *io)
+{
+ unsigned mapped_bytes = ublk_map_io(ubq, req, io);
+
+ /* partially mapped, update io descriptor */
+ if (unlikely(mapped_bytes != blk_rq_bytes(req))) {
+ /*
+ * Nothing mapped, retry until we succeed.
+ *
+ * We may never succeed in mapping any bytes here because
+ * of OOM. TODO: reserve one buffer with single page pinned
+ * for providing forward progress guarantee.
+ */
+ if (unlikely(!mapped_bytes)) {
+ blk_mq_requeue_request(req, false);
+ blk_mq_delay_kick_requeue_list(req->q,
+ UBLK_REQUEUE_DELAY_MS);
+ return false;
+ }
+
+ ublk_get_iod(ubq, req->tag)->nr_sectors =
+ mapped_bytes >> 9;
+ }
+
+ return true;
+}
+
static void ublk_dispatch_req(struct ublk_queue *ubq,
struct request *req,
unsigned int issue_flags)
{
int tag = req->tag;
struct ublk_io *io = &ubq->ios[tag];
- unsigned int mapped_bytes;
- pr_devel("%s: complete: op %d, qid %d tag %d io_flags %x addr %llx\n",
- __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags,
+ pr_devel("%s: complete: qid %d tag %d io_flags %x addr %llx\n",
+ __func__, ubq->q_id, req->tag, io->flags,
ublk_get_iod(ubq, req->tag)->addr);
/*
@@ -1183,54 +1289,22 @@ static void ublk_dispatch_req(struct ublk_queue *ubq,
if (ublk_need_get_data(ubq) && ublk_need_map_req(req)) {
/*
* We have not handled UBLK_IO_NEED_GET_DATA command yet,
- * so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
+ * so immediately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
* and notify it.
*/
- if (!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) {
- io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
- pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n",
- __func__, io->cmd->cmd_op, ubq->q_id,
- req->tag, io->flags);
- ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA, issue_flags);
- return;
- }
- /*
- * We have handled UBLK_IO_NEED_GET_DATA command,
- * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just
- * do the copy work.
- */
- io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
- /* update iod->addr because ublksrv may have passed a new io buffer */
- ublk_get_iod(ubq, req->tag)->addr = io->addr;
- pr_devel("%s: update iod->addr: op %d, qid %d tag %d io_flags %x addr %llx\n",
- __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags,
- ublk_get_iod(ubq, req->tag)->addr);
+ io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
+ pr_devel("%s: need get data. qid %d tag %d io_flags %x\n",
+ __func__, ubq->q_id, req->tag, io->flags);
+ ublk_complete_io_cmd(io, req, UBLK_IO_RES_NEED_GET_DATA,
+ issue_flags);
+ return;
}
- mapped_bytes = ublk_map_io(ubq, req, io);
-
- /* partially mapped, update io descriptor */
- if (unlikely(mapped_bytes != blk_rq_bytes(req))) {
- /*
- * Nothing mapped, retry until we succeed.
- *
- * We may never succeed in mapping any bytes here because
- * of OOM. TODO: reserve one buffer with single page pinned
- * for providing forward progress guarantee.
- */
- if (unlikely(!mapped_bytes)) {
- blk_mq_requeue_request(req, false);
- blk_mq_delay_kick_requeue_list(req->q,
- UBLK_REQUEUE_DELAY_MS);
- return;
- }
-
- ublk_get_iod(ubq, req->tag)->nr_sectors =
- mapped_bytes >> 9;
- }
+ if (!ublk_start_io(ubq, req, io))
+ return;
- ublk_init_req_ref(ubq, req);
- ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
+ if (ublk_prep_auto_buf_reg(ubq, req, io, issue_flags))
+ ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
}
static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd,
@@ -1590,30 +1664,6 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
}
-static void ublk_commit_completion(struct ublk_device *ub,
- const struct ublksrv_io_cmd *ub_cmd)
-{
- u32 qid = ub_cmd->q_id, tag = ub_cmd->tag;
- struct ublk_queue *ubq = ublk_get_queue(ub, qid);
- struct ublk_io *io = &ubq->ios[tag];
- struct request *req;
-
- /* now this cmd slot is owned by nbd driver */
- io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV;
- io->res = ub_cmd->result;
-
- /* find the io request and complete */
- req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag);
- if (WARN_ON_ONCE(unlikely(!req)))
- return;
-
- if (req_op(req) == REQ_OP_ZONE_APPEND)
- req->__sector = ub_cmd->zone_append_lba;
-
- if (likely(!blk_should_fake_timeout(req->q)))
- ublk_put_req_ref(ubq, req);
-}
-
static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
struct request *req)
{
@@ -1642,17 +1692,8 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
- if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) {
- struct request *rq;
-
- /*
- * Either we fail the request or ublk_rq_task_work_cb
- * will do it
- */
- rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
- if (rq && blk_mq_request_started(rq))
- __ublk_fail_req(ubq, io, rq);
- }
+ if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
+ __ublk_fail_req(ubq, io, io->req);
}
}
@@ -1708,7 +1749,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag,
* that ublk_dispatch_req() is always called
*/
req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
- if (req && blk_mq_request_started(req))
+ if (req && blk_mq_request_started(req) && req->tag == tag)
return;
spin_lock(&ubq->cancel_lock);
@@ -1940,6 +1981,20 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
io_uring_cmd_mark_cancelable(cmd, issue_flags);
}
+static inline int ublk_set_auto_buf_reg(struct io_uring_cmd *cmd)
+{
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+
+ pdu->buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr));
+
+ if (pdu->buf.reserved0 || pdu->buf.reserved1)
+ return -EINVAL;
+
+ if (pdu->buf.flags & ~UBLK_AUTO_BUF_REG_F_MASK)
+ return -EINVAL;
+ return 0;
+}
+
static void ublk_io_release(void *priv)
{
struct request *rq = priv;
@@ -1953,16 +2008,12 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd,
unsigned int index, unsigned int issue_flags)
{
struct ublk_device *ub = cmd->file->private_data;
- const struct ublk_io *io = &ubq->ios[tag];
struct request *req;
int ret;
if (!ublk_support_zero_copy(ubq))
return -EINVAL;
- if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
- return -EINVAL;
-
req = __ublk_check_and_get_req(ub, ubq, tag, 0);
if (!req)
return -EINVAL;
@@ -1978,17 +2029,12 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd,
}
static int ublk_unregister_io_buf(struct io_uring_cmd *cmd,
- const struct ublk_queue *ubq, unsigned int tag,
+ const struct ublk_queue *ubq,
unsigned int index, unsigned int issue_flags)
{
- const struct ublk_io *io = &ubq->ios[tag];
-
if (!ublk_support_zero_copy(ubq))
return -EINVAL;
- if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
- return -EINVAL;
-
return io_buffer_unregister_bvec(cmd, index, issue_flags);
}
@@ -2031,6 +2077,12 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
goto out;
}
+ if (ublk_support_auto_buf_reg(ubq)) {
+ ret = ublk_set_auto_buf_reg(cmd);
+ if (ret)
+ goto out;
+ }
+
ublk_fill_io_cmd(io, cmd, buf_addr);
ublk_mark_io_ready(ub, ubq);
out:
@@ -2038,6 +2090,90 @@ out:
return ret;
}
+static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
+ struct ublk_io *io, struct io_uring_cmd *cmd,
+ const struct ublksrv_io_cmd *ub_cmd,
+ unsigned int issue_flags)
+{
+ struct request *req = io->req;
+
+ if (ublk_need_map_io(ubq)) {
+ /*
+ * COMMIT_AND_FETCH_REQ has to provide IO buffer if
+ * NEED GET DATA is not enabled or it is Read IO.
+ */
+ if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
+ req_op(req) == REQ_OP_READ))
+ return -EINVAL;
+ } else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) {
+ /*
+ * User copy requires addr to be unset when command is
+ * not zone append
+ */
+ return -EINVAL;
+ }
+
+ if (ublk_support_auto_buf_reg(ubq)) {
+ int ret;
+
+ /*
+ * `UBLK_F_AUTO_BUF_REG` only works iff `UBLK_IO_FETCH_REQ`
+ * and `UBLK_IO_COMMIT_AND_FETCH_REQ` are issued from same
+ * `io_ring_ctx`.
+ *
+ * If this uring_cmd's io_ring_ctx isn't same with the
+ * one for registering the buffer, it is ublk server's
+ * responsibility for unregistering the buffer, otherwise
+ * this ublk request gets stuck.
+ */
+ if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ if (data->buf_ctx_handle == io_uring_cmd_ctx_handle(cmd))
+ io_buffer_unregister_bvec(cmd, data->buf_index,
+ issue_flags);
+ io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
+ }
+
+ ret = ublk_set_auto_buf_reg(cmd);
+ if (ret)
+ return ret;
+ }
+
+ ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
+
+ /* now this cmd slot is owned by ublk driver */
+ io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV;
+ io->res = ub_cmd->result;
+
+ if (req_op(req) == REQ_OP_ZONE_APPEND)
+ req->__sector = ub_cmd->zone_append_lba;
+
+ if (likely(!blk_should_fake_timeout(req->q)))
+ ublk_put_req_ref(ubq, req);
+
+ return 0;
+}
+
+static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io)
+{
+ struct request *req = io->req;
+
+ /*
+ * We have handled UBLK_IO_NEED_GET_DATA command,
+ * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just
+ * do the copy work.
+ */
+ io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
+ /* update iod->addr because ublksrv may have passed a new io buffer */
+ ublk_get_iod(ubq, req->tag)->addr = io->addr;
+ pr_devel("%s: update iod->addr: qid %d tag %d io_flags %x addr %llx\n",
+ __func__, ubq->q_id, req->tag, io->flags,
+ ublk_get_iod(ubq, req->tag)->addr);
+
+ return ublk_start_io(ubq, req, io);
+}
+
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags,
const struct ublksrv_io_cmd *ub_cmd)
@@ -2048,7 +2184,6 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
u32 cmd_op = cmd->cmd_op;
unsigned tag = ub_cmd->tag;
int ret = -EINVAL;
- struct request *req;
pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n",
__func__, cmd->cmd_op, ub_cmd->q_id, tag,
@@ -2058,9 +2193,6 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
goto out;
ubq = ublk_get_queue(ub, ub_cmd->q_id);
- if (!ubq || ub_cmd->q_id != ubq->q_id)
- goto out;
-
if (ubq->ubq_daemon && ubq->ubq_daemon != current)
goto out;
@@ -2075,6 +2207,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
goto out;
}
+ /* only UBLK_IO_FETCH_REQ is allowed if io is not OWNED_BY_SRV */
+ if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) &&
+ _IOC_NR(cmd_op) != UBLK_IO_FETCH_REQ)
+ goto out;
+
/*
* ensure that the user issues UBLK_IO_NEED_GET_DATA
* iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
@@ -2092,45 +2229,23 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
case UBLK_IO_REGISTER_IO_BUF:
return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
case UBLK_IO_UNREGISTER_IO_BUF:
- return ublk_unregister_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
+ return ublk_unregister_io_buf(cmd, ubq, ub_cmd->addr, issue_flags);
case UBLK_IO_FETCH_REQ:
ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr);
if (ret)
goto out;
break;
case UBLK_IO_COMMIT_AND_FETCH_REQ:
- req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
-
- if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
- goto out;
-
- if (ublk_need_map_io(ubq)) {
- /*
- * COMMIT_AND_FETCH_REQ has to provide IO buffer if
- * NEED GET DATA is not enabled or it is Read IO.
- */
- if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
- req_op(req) == REQ_OP_READ))
- goto out;
- } else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) {
- /*
- * User copy requires addr to be unset when command is
- * not zone append
- */
- ret = -EINVAL;
+ ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd, issue_flags);
+ if (ret)
goto out;
- }
-
- ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
- ublk_commit_completion(ub, ub_cmd);
break;
case UBLK_IO_NEED_GET_DATA:
- if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
- goto out;
- ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
- req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
- ublk_dispatch_req(ubq, req, issue_flags);
- return -EIOCBQUEUED;
+ io->addr = ub_cmd->addr;
+ if (!ublk_get_data(ubq, io))
+ return -EIOCBQUEUED;
+
+ return UBLK_IO_RES_OK;
default:
goto out;
}
@@ -2728,6 +2843,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
return -EINVAL;
}
+ if ((info.flags & UBLK_F_QUIESCE) && !(info.flags & UBLK_F_USER_RECOVERY)) {
+ pr_warn("UBLK_F_QUIESCE requires UBLK_F_USER_RECOVERY\n");
+ return -EINVAL;
+ }
+
/*
* unprivileged device can't be trusted, but RECOVERY and
* RECOVERY_REISSUE still may hang error handling, so can't
@@ -2744,8 +2864,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
* For USER_COPY, we depends on userspace to fill request
* buffer by pwrite() to ublk char device, which can't be
* used for unprivileged device
+ *
+ * Same with zero copy or auto buffer register.
*/
- if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+ if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+ UBLK_F_AUTO_BUF_REG))
return -EINVAL;
}
@@ -2803,7 +2926,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
UBLK_F_URING_CMD_COMP_IN_TASK;
/* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
- if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+ if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+ UBLK_F_AUTO_BUF_REG))
ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
/*
@@ -3106,6 +3230,127 @@ static int ublk_ctrl_get_features(const struct ublksrv_ctrl_cmd *header)
return 0;
}
+static void ublk_ctrl_set_size(struct ublk_device *ub, const struct ublksrv_ctrl_cmd *header)
+{
+ struct ublk_param_basic *p = &ub->params.basic;
+ u64 new_size = header->data[0];
+
+ mutex_lock(&ub->mutex);
+ p->dev_sectors = new_size;
+ set_capacity_and_notify(ub->ub_disk, p->dev_sectors);
+ mutex_unlock(&ub->mutex);
+}
+
+struct count_busy {
+ const struct ublk_queue *ubq;
+ unsigned int nr_busy;
+};
+
+static bool ublk_count_busy_req(struct request *rq, void *data)
+{
+ struct count_busy *idle = data;
+
+ if (!blk_mq_request_started(rq) && rq->mq_hctx->driver_data == idle->ubq)
+ idle->nr_busy += 1;
+ return true;
+}
+
+/* uring_cmd is guaranteed to be active if the associated request is idle */
+static bool ubq_has_idle_io(const struct ublk_queue *ubq)
+{
+ struct count_busy data = {
+ .ubq = ubq,
+ };
+
+ blk_mq_tagset_busy_iter(&ubq->dev->tag_set, ublk_count_busy_req, &data);
+ return data.nr_busy < ubq->q_depth;
+}
+
+/* Wait until each hw queue has at least one idle IO */
+static int ublk_wait_for_idle_io(struct ublk_device *ub,
+ unsigned int timeout_ms)
+{
+ unsigned int elapsed = 0;
+ int ret;
+
+ while (elapsed < timeout_ms && !signal_pending(current)) {
+ unsigned int queues_cancelable = 0;
+ int i;
+
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
+ struct ublk_queue *ubq = ublk_get_queue(ub, i);
+
+ queues_cancelable += !!ubq_has_idle_io(ubq);
+ }
+
+ /*
+ * Each queue needs at least one active command for
+ * notifying ublk server
+ */
+ if (queues_cancelable == ub->dev_info.nr_hw_queues)
+ break;
+
+ msleep(UBLK_REQUEUE_DELAY_MS);
+ elapsed += UBLK_REQUEUE_DELAY_MS;
+ }
+
+ if (signal_pending(current))
+ ret = -EINTR;
+ else if (elapsed >= timeout_ms)
+ ret = -EBUSY;
+ else
+ ret = 0;
+
+ return ret;
+}
+
+static int ublk_ctrl_quiesce_dev(struct ublk_device *ub,
+ const struct ublksrv_ctrl_cmd *header)
+{
+ /* zero means wait forever */
+ u64 timeout_ms = header->data[0];
+ struct gendisk *disk;
+ int i, ret = -ENODEV;
+
+ if (!(ub->dev_info.flags & UBLK_F_QUIESCE))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&ub->mutex);
+ disk = ublk_get_disk(ub);
+ if (!disk)
+ goto unlock;
+ if (ub->dev_info.state == UBLK_S_DEV_DEAD)
+ goto put_disk;
+
+ ret = 0;
+ /* already in expected state */
+ if (ub->dev_info.state != UBLK_S_DEV_LIVE)
+ goto put_disk;
+
+ /* Mark all queues as canceling */
+ blk_mq_quiesce_queue(disk->queue);
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
+ struct ublk_queue *ubq = ublk_get_queue(ub, i);
+
+ ubq->canceling = true;
+ }
+ blk_mq_unquiesce_queue(disk->queue);
+
+ if (!timeout_ms)
+ timeout_ms = UINT_MAX;
+ ret = ublk_wait_for_idle_io(ub, timeout_ms);
+
+put_disk:
+ ublk_put_disk(disk);
+unlock:
+ mutex_unlock(&ub->mutex);
+
+ /* Cancel pending uring_cmd */
+ if (!ret)
+ ublk_cancel_dev(ub);
+ return ret;
+}
+
/*
* All control commands are sent via /dev/ublk-control, so we have to check
* the destination device's permission
@@ -3191,6 +3436,8 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
case UBLK_CMD_SET_PARAMS:
case UBLK_CMD_START_USER_RECOVERY:
case UBLK_CMD_END_USER_RECOVERY:
+ case UBLK_CMD_UPDATE_SIZE:
+ case UBLK_CMD_QUIESCE_DEV:
mask = MAY_READ | MAY_WRITE;
break;
default:
@@ -3282,6 +3529,13 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
case UBLK_CMD_END_USER_RECOVERY:
ret = ublk_ctrl_end_recovery(ub, header);
break;
+ case UBLK_CMD_UPDATE_SIZE:
+ ublk_ctrl_set_size(ub, header);
+ ret = 0;
+ break;
+ case UBLK_CMD_QUIESCE_DEV:
+ ret = ublk_ctrl_quiesce_dev(ub, header);
+ break;
default:
ret = -EOPNOTSUPP;
break;
@@ -3315,6 +3569,7 @@ static int __init ublk_init(void)
BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
+ BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) != 8);
init_waitqueue_head(&ublk_idr_wq);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 7cffea01d868..30bca8cb7106 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -571,7 +571,7 @@ static int virtblk_submit_zone_report(struct virtio_blk *vblk,
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_ZONE_REPORT);
vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector);
- err = blk_rq_map_kern(q, req, report_buf, report_len, GFP_KERNEL);
+ err = blk_rq_map_kern(req, report_buf, report_len, GFP_KERNEL);
if (err)
goto out;
@@ -817,7 +817,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
vbr->out_hdr.sector = 0;
- err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
+ err = blk_rq_map_kern(req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
if (err)
goto out;
diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
new file mode 100644
index 000000000000..553b1a713ab9
--- /dev/null
+++ b/drivers/block/zloop.c
@@ -0,0 +1,1385 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2025, Christoph Hellwig.
+ * Copyright (c) 2025, Western Digital Corporation or its affiliates.
+ *
+ * Zoned Loop Device driver - exports a zoned block device using one file per
+ * zone as backing storage.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/blk-mq.h>
+#include <linux/blkzoned.h>
+#include <linux/pagemap.h>
+#include <linux/miscdevice.h>
+#include <linux/falloc.h>
+#include <linux/mutex.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+
+/*
+ * Options for adding (and removing) a device.
+ */
+enum {
+ ZLOOP_OPT_ERR = 0,
+ ZLOOP_OPT_ID = (1 << 0),
+ ZLOOP_OPT_CAPACITY = (1 << 1),
+ ZLOOP_OPT_ZONE_SIZE = (1 << 2),
+ ZLOOP_OPT_ZONE_CAPACITY = (1 << 3),
+ ZLOOP_OPT_NR_CONV_ZONES = (1 << 4),
+ ZLOOP_OPT_BASE_DIR = (1 << 5),
+ ZLOOP_OPT_NR_QUEUES = (1 << 6),
+ ZLOOP_OPT_QUEUE_DEPTH = (1 << 7),
+ ZLOOP_OPT_BUFFERED_IO = (1 << 8),
+};
+
+static const match_table_t zloop_opt_tokens = {
+ { ZLOOP_OPT_ID, "id=%d" },
+ { ZLOOP_OPT_CAPACITY, "capacity_mb=%u" },
+ { ZLOOP_OPT_ZONE_SIZE, "zone_size_mb=%u" },
+ { ZLOOP_OPT_ZONE_CAPACITY, "zone_capacity_mb=%u" },
+ { ZLOOP_OPT_NR_CONV_ZONES, "conv_zones=%u" },
+ { ZLOOP_OPT_BASE_DIR, "base_dir=%s" },
+ { ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" },
+ { ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" },
+ { ZLOOP_OPT_BUFFERED_IO, "buffered_io" },
+ { ZLOOP_OPT_ERR, NULL }
+};
+
+/* Default values for the "add" operation. */
+#define ZLOOP_DEF_ID -1
+#define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT)
+#define ZLOOP_DEF_NR_ZONES 64
+#define ZLOOP_DEF_NR_CONV_ZONES 8
+#define ZLOOP_DEF_BASE_DIR "/var/local/zloop"
+#define ZLOOP_DEF_NR_QUEUES 1
+#define ZLOOP_DEF_QUEUE_DEPTH 128
+#define ZLOOP_DEF_BUFFERED_IO false
+
+/* Arbitrary limit on the zone size (16GB). */
+#define ZLOOP_MAX_ZONE_SIZE_MB 16384
+
+struct zloop_options {
+ unsigned int mask;
+ int id;
+ sector_t capacity;
+ sector_t zone_size;
+ sector_t zone_capacity;
+ unsigned int nr_conv_zones;
+ char *base_dir;
+ unsigned int nr_queues;
+ unsigned int queue_depth;
+ bool buffered_io;
+};
+
+/*
+ * Device states.
+ */
+enum {
+ Zlo_creating = 0,
+ Zlo_live,
+ Zlo_deleting,
+};
+
+enum zloop_zone_flags {
+ ZLOOP_ZONE_CONV = 0,
+ ZLOOP_ZONE_SEQ_ERROR,
+};
+
+struct zloop_zone {
+ struct file *file;
+
+ unsigned long flags;
+ struct mutex lock;
+ enum blk_zone_cond cond;
+ sector_t start;
+ sector_t wp;
+
+ gfp_t old_gfp_mask;
+};
+
+struct zloop_device {
+ unsigned int id;
+ unsigned int state;
+
+ struct blk_mq_tag_set tag_set;
+ struct gendisk *disk;
+
+ struct workqueue_struct *workqueue;
+ bool buffered_io;
+
+ const char *base_dir;
+ struct file *data_dir;
+
+ unsigned int zone_shift;
+ sector_t zone_size;
+ sector_t zone_capacity;
+ unsigned int nr_zones;
+ unsigned int nr_conv_zones;
+ unsigned int block_size;
+
+ struct zloop_zone zones[] __counted_by(nr_zones);
+};
+
+struct zloop_cmd {
+ struct work_struct work;
+ atomic_t ref;
+ sector_t sector;
+ sector_t nr_sectors;
+ long ret;
+ struct kiocb iocb;
+ struct bio_vec *bvec;
+};
+
+static DEFINE_IDR(zloop_index_idr);
+static DEFINE_MUTEX(zloop_ctl_mutex);
+
+static unsigned int rq_zone_no(struct request *rq)
+{
+ struct zloop_device *zlo = rq->q->queuedata;
+
+ return blk_rq_pos(rq) >> zlo->zone_shift;
+}
+
+static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
+{
+ struct zloop_zone *zone = &zlo->zones[zone_no];
+ struct kstat stat;
+ sector_t file_sectors;
+ int ret;
+
+ lockdep_assert_held(&zone->lock);
+
+ ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0);
+ if (ret < 0) {
+ pr_err("Failed to get zone %u file stat (err=%d)\n",
+ zone_no, ret);
+ set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+ return ret;
+ }
+
+ file_sectors = stat.size >> SECTOR_SHIFT;
+ if (file_sectors > zlo->zone_capacity) {
+ pr_err("Zone %u file too large (%llu sectors > %llu)\n",
+ zone_no, file_sectors, zlo->zone_capacity);
+ return -EINVAL;
+ }
+
+ if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
+ pr_err("Zone %u file size not aligned to block size %u\n",
+ zone_no, zlo->block_size);
+ return -EINVAL;
+ }
+
+ if (!file_sectors) {
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ zone->wp = zone->start;
+ } else if (file_sectors == zlo->zone_capacity) {
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = zone->start + zlo->zone_size;
+ } else {
+ zone->cond = BLK_ZONE_COND_CLOSED;
+ zone->wp = zone->start + file_sectors;
+ }
+
+ return 0;
+}
+
+static int zloop_open_zone(struct zloop_device *zlo, unsigned int zone_no)
+{
+ struct zloop_zone *zone = &zlo->zones[zone_no];
+ int ret = 0;
+
+ if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
+ return -EIO;
+
+ mutex_lock(&zone->lock);
+
+ if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
+ ret = zloop_update_seq_zone(zlo, zone_no);
+ if (ret)
+ goto unlock;
+ }
+
+ switch (zone->cond) {
+ case BLK_ZONE_COND_EXP_OPEN:
+ break;
+ case BLK_ZONE_COND_EMPTY:
+ case BLK_ZONE_COND_CLOSED:
+ case BLK_ZONE_COND_IMP_OPEN:
+ zone->cond = BLK_ZONE_COND_EXP_OPEN;
+ break;
+ case BLK_ZONE_COND_FULL:
+ default:
+ ret = -EIO;
+ break;
+ }
+
+unlock:
+ mutex_unlock(&zone->lock);
+
+ return ret;
+}
+
+static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
+{
+ struct zloop_zone *zone = &zlo->zones[zone_no];
+ int ret = 0;
+
+ if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
+ return -EIO;
+
+ mutex_lock(&zone->lock);
+
+ if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
+ ret = zloop_update_seq_zone(zlo, zone_no);
+ if (ret)
+ goto unlock;
+ }
+
+ switch (zone->cond) {
+ case BLK_ZONE_COND_CLOSED:
+ break;
+ case BLK_ZONE_COND_IMP_OPEN:
+ case BLK_ZONE_COND_EXP_OPEN:
+ if (zone->wp == zone->start)
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ else
+ zone->cond = BLK_ZONE_COND_CLOSED;
+ break;
+ case BLK_ZONE_COND_EMPTY:
+ case BLK_ZONE_COND_FULL:
+ default:
+ ret = -EIO;
+ break;
+ }
+
+unlock:
+ mutex_unlock(&zone->lock);
+
+ return ret;
+}
+
+static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
+{
+ struct zloop_zone *zone = &zlo->zones[zone_no];
+ int ret = 0;
+
+ if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
+ return -EIO;
+
+ mutex_lock(&zone->lock);
+
+ if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
+ zone->cond == BLK_ZONE_COND_EMPTY)
+ goto unlock;
+
+ if (vfs_truncate(&zone->file->f_path, 0)) {
+ set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+ ret = -EIO;
+ goto unlock;
+ }
+
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ zone->wp = zone->start;
+ clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+
+unlock:
+ mutex_unlock(&zone->lock);
+
+ return ret;
+}
+
+static int zloop_reset_all_zones(struct zloop_device *zlo)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = zlo->nr_conv_zones; i < zlo->nr_zones; i++) {
+ ret = zloop_reset_zone(zlo, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
+{
+ struct zloop_zone *zone = &zlo->zones[zone_no];
+ int ret = 0;
+
+ if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
+ return -EIO;
+
+ mutex_lock(&zone->lock);
+
+ if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
+ zone->cond == BLK_ZONE_COND_FULL)
+ goto unlock;
+
+ if (vfs_truncate(&zone->file->f_path, zlo->zone_size << SECTOR_SHIFT)) {
+ set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+ ret = -EIO;
+ goto unlock;
+ }
+
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = zone->start + zlo->zone_size;
+ clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+
+ unlock:
+ mutex_unlock(&zone->lock);
+
+ return ret;
+}
+
+static void zloop_put_cmd(struct zloop_cmd *cmd)
+{
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
+
+ if (!atomic_dec_and_test(&cmd->ref))
+ return;
+ kfree(cmd->bvec);
+ cmd->bvec = NULL;
+ if (likely(!blk_should_fake_timeout(rq->q)))
+ blk_mq_complete_request(rq);
+}
+
+static void zloop_rw_complete(struct kiocb *iocb, long ret)
+{
+ struct zloop_cmd *cmd = container_of(iocb, struct zloop_cmd, iocb);
+
+ cmd->ret = ret;
+ zloop_put_cmd(cmd);
+}
+
+static void zloop_rw(struct zloop_cmd *cmd)
+{
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
+ struct zloop_device *zlo = rq->q->queuedata;
+ unsigned int zone_no = rq_zone_no(rq);
+ sector_t sector = blk_rq_pos(rq);
+ sector_t nr_sectors = blk_rq_sectors(rq);
+ bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
+ bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
+ int rw = is_write ? ITER_SOURCE : ITER_DEST;
+ struct req_iterator rq_iter;
+ struct zloop_zone *zone;
+ struct iov_iter iter;
+ struct bio_vec tmp;
+ sector_t zone_end;
+ int nr_bvec = 0;
+ int ret;
+
+ atomic_set(&cmd->ref, 2);
+ cmd->sector = sector;
+ cmd->nr_sectors = nr_sectors;
+ cmd->ret = 0;
+
+ /* We should never get an I/O beyond the device capacity. */
+ if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
+ ret = -EIO;
+ goto out;
+ }
+ zone = &zlo->zones[zone_no];
+ zone_end = zone->start + zlo->zone_capacity;
+
+ /*
+ * The block layer should never send requests that are not fully
+ * contained within the zone.
+ */
+ if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
+ mutex_lock(&zone->lock);
+ ret = zloop_update_seq_zone(zlo, zone_no);
+ mutex_unlock(&zone->lock);
+ if (ret)
+ goto out;
+ }
+
+ if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
+ mutex_lock(&zone->lock);
+
+ if (is_append) {
+ sector = zone->wp;
+ cmd->sector = sector;
+ }
+
+ /*
+ * Write operations must be aligned to the write pointer and
+ * fully contained within the zone capacity.
+ */
+ if (sector != zone->wp || zone->wp + nr_sectors > zone_end) {
+ pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
+ zone_no, sector, zone->wp);
+ ret = -EIO;
+ goto unlock;
+ }
+
+ /* Implicitly open the target zone. */
+ if (zone->cond == BLK_ZONE_COND_CLOSED ||
+ zone->cond == BLK_ZONE_COND_EMPTY)
+ zone->cond = BLK_ZONE_COND_IMP_OPEN;
+
+ /*
+ * Advance the write pointer of sequential zones. If the write
+ * fails, the wp position will be corrected when the next I/O
+ * copmpletes.
+ */
+ zone->wp += nr_sectors;
+ if (zone->wp == zone_end)
+ zone->cond = BLK_ZONE_COND_FULL;
+ }
+
+ rq_for_each_bvec(tmp, rq, rq_iter)
+ nr_bvec++;
+
+ if (rq->bio != rq->biotail) {
+ struct bio_vec *bvec;
+
+ cmd->bvec = kmalloc_array(nr_bvec, sizeof(*cmd->bvec), GFP_NOIO);
+ if (!cmd->bvec) {
+ ret = -EIO;
+ goto unlock;
+ }
+
+ /*
+ * The bios of the request may be started from the middle of
+ * the 'bvec' because of bio splitting, so we can't directly
+ * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec
+ * API will take care of all details for us.
+ */
+ bvec = cmd->bvec;
+ rq_for_each_bvec(tmp, rq, rq_iter) {
+ *bvec = tmp;
+ bvec++;
+ }
+ iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq));
+ } else {
+ /*
+ * Same here, this bio may be started from the middle of the
+ * 'bvec' because of bio splitting, so offset from the bvec
+ * must be passed to iov iterator
+ */
+ iov_iter_bvec(&iter, rw,
+ __bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter),
+ nr_bvec, blk_rq_bytes(rq));
+ iter.iov_offset = rq->bio->bi_iter.bi_bvec_done;
+ }
+
+ cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT;
+ cmd->iocb.ki_filp = zone->file;
+ cmd->iocb.ki_complete = zloop_rw_complete;
+ if (!zlo->buffered_io)
+ cmd->iocb.ki_flags = IOCB_DIRECT;
+ cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+
+ if (rw == ITER_SOURCE)
+ ret = zone->file->f_op->write_iter(&cmd->iocb, &iter);
+ else
+ ret = zone->file->f_op->read_iter(&cmd->iocb, &iter);
+unlock:
+ if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write)
+ mutex_unlock(&zone->lock);
+out:
+ if (ret != -EIOCBQUEUED)
+ zloop_rw_complete(&cmd->iocb, ret);
+ zloop_put_cmd(cmd);
+}
+
+static void zloop_handle_cmd(struct zloop_cmd *cmd)
+{
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
+ struct zloop_device *zlo = rq->q->queuedata;
+
+ switch (req_op(rq)) {
+ case REQ_OP_READ:
+ case REQ_OP_WRITE:
+ case REQ_OP_ZONE_APPEND:
+ /*
+ * zloop_rw() always executes asynchronously or completes
+ * directly.
+ */
+ zloop_rw(cmd);
+ return;
+ case REQ_OP_FLUSH:
+ /*
+ * Sync the entire FS containing the zone files instead of
+ * walking all files
+ */
+ cmd->ret = sync_filesystem(file_inode(zlo->data_dir)->i_sb);
+ break;
+ case REQ_OP_ZONE_RESET:
+ cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq));
+ break;
+ case REQ_OP_ZONE_RESET_ALL:
+ cmd->ret = zloop_reset_all_zones(zlo);
+ break;
+ case REQ_OP_ZONE_FINISH:
+ cmd->ret = zloop_finish_zone(zlo, rq_zone_no(rq));
+ break;
+ case REQ_OP_ZONE_OPEN:
+ cmd->ret = zloop_open_zone(zlo, rq_zone_no(rq));
+ break;
+ case REQ_OP_ZONE_CLOSE:
+ cmd->ret = zloop_close_zone(zlo, rq_zone_no(rq));
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ pr_err("Unsupported operation %d\n", req_op(rq));
+ cmd->ret = -EOPNOTSUPP;
+ break;
+ }
+
+ blk_mq_complete_request(rq);
+}
+
+static void zloop_cmd_workfn(struct work_struct *work)
+{
+ struct zloop_cmd *cmd = container_of(work, struct zloop_cmd, work);
+ int orig_flags = current->flags;
+
+ current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
+ zloop_handle_cmd(cmd);
+ current->flags = orig_flags;
+}
+
+static void zloop_complete_rq(struct request *rq)
+{
+ struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
+ struct zloop_device *zlo = rq->q->queuedata;
+ unsigned int zone_no = cmd->sector >> zlo->zone_shift;
+ struct zloop_zone *zone = &zlo->zones[zone_no];
+ blk_status_t sts = BLK_STS_OK;
+
+ switch (req_op(rq)) {
+ case REQ_OP_READ:
+ if (cmd->ret < 0)
+ pr_err("Zone %u: failed read sector %llu, %llu sectors\n",
+ zone_no, cmd->sector, cmd->nr_sectors);
+
+ if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) {
+ /* short read */
+ struct bio *bio;
+
+ __rq_for_each_bio(bio, rq)
+ zero_fill_bio(bio);
+ }
+ break;
+ case REQ_OP_WRITE:
+ case REQ_OP_ZONE_APPEND:
+ if (cmd->ret < 0)
+ pr_err("Zone %u: failed %swrite sector %llu, %llu sectors\n",
+ zone_no,
+ req_op(rq) == REQ_OP_WRITE ? "" : "append ",
+ cmd->sector, cmd->nr_sectors);
+
+ if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) {
+ pr_err("Zone %u: partial write %ld/%u B\n",
+ zone_no, cmd->ret, blk_rq_bytes(rq));
+ cmd->ret = -EIO;
+ }
+
+ if (cmd->ret < 0 && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
+ /*
+ * A write to a sequential zone file failed: mark the
+ * zone as having an error. This will be corrected and
+ * cleared when the next IO is submitted.
+ */
+ set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+ break;
+ }
+ if (req_op(rq) == REQ_OP_ZONE_APPEND)
+ rq->__sector = cmd->sector;
+
+ break;
+ default:
+ break;
+ }
+
+ if (cmd->ret < 0)
+ sts = errno_to_blk_status(cmd->ret);
+ blk_mq_end_request(rq, sts);
+}
+
+static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+{
+ struct request *rq = bd->rq;
+ struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
+ struct zloop_device *zlo = rq->q->queuedata;
+
+ if (zlo->state == Zlo_deleting)
+ return BLK_STS_IOERR;
+
+ blk_mq_start_request(rq);
+
+ INIT_WORK(&cmd->work, zloop_cmd_workfn);
+ queue_work(zlo->workqueue, &cmd->work);
+
+ return BLK_STS_OK;
+}
+
+static const struct blk_mq_ops zloop_mq_ops = {
+ .queue_rq = zloop_queue_rq,
+ .complete = zloop_complete_rq,
+};
+
+static int zloop_open(struct gendisk *disk, blk_mode_t mode)
+{
+ struct zloop_device *zlo = disk->private_data;
+ int ret;
+
+ ret = mutex_lock_killable(&zloop_ctl_mutex);
+ if (ret)
+ return ret;
+
+ if (zlo->state != Zlo_live)
+ ret = -ENXIO;
+ mutex_unlock(&zloop_ctl_mutex);
+ return ret;
+}
+
+static int zloop_report_zones(struct gendisk *disk, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data)
+{
+ struct zloop_device *zlo = disk->private_data;
+ struct blk_zone blkz = {};
+ unsigned int first, i;
+ int ret;
+
+ first = disk_zone_no(disk, sector);
+ if (first >= zlo->nr_zones)
+ return 0;
+ nr_zones = min(nr_zones, zlo->nr_zones - first);
+
+ for (i = 0; i < nr_zones; i++) {
+ unsigned int zone_no = first + i;
+ struct zloop_zone *zone = &zlo->zones[zone_no];
+
+ mutex_lock(&zone->lock);
+
+ if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
+ ret = zloop_update_seq_zone(zlo, zone_no);
+ if (ret) {
+ mutex_unlock(&zone->lock);
+ return ret;
+ }
+ }
+
+ blkz.start = zone->start;
+ blkz.len = zlo->zone_size;
+ blkz.wp = zone->wp;
+ blkz.cond = zone->cond;
+ if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
+ blkz.type = BLK_ZONE_TYPE_CONVENTIONAL;
+ blkz.capacity = zlo->zone_size;
+ } else {
+ blkz.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
+ blkz.capacity = zlo->zone_capacity;
+ }
+
+ mutex_unlock(&zone->lock);
+
+ ret = cb(&blkz, i, data);
+ if (ret)
+ return ret;
+ }
+
+ return nr_zones;
+}
+
+static void zloop_free_disk(struct gendisk *disk)
+{
+ struct zloop_device *zlo = disk->private_data;
+ unsigned int i;
+
+ for (i = 0; i < zlo->nr_zones; i++) {
+ struct zloop_zone *zone = &zlo->zones[i];
+
+ mapping_set_gfp_mask(zone->file->f_mapping,
+ zone->old_gfp_mask);
+ fput(zone->file);
+ }
+
+ fput(zlo->data_dir);
+ destroy_workqueue(zlo->workqueue);
+ kfree(zlo->base_dir);
+ kvfree(zlo);
+}
+
+static const struct block_device_operations zloop_fops = {
+ .owner = THIS_MODULE,
+ .open = zloop_open,
+ .report_zones = zloop_report_zones,
+ .free_disk = zloop_free_disk,
+};
+
+__printf(3, 4)
+static struct file *zloop_filp_open_fmt(int oflags, umode_t mode,
+ const char *fmt, ...)
+{
+ struct file *file;
+ va_list ap;
+ char *p;
+
+ va_start(ap, fmt);
+ p = kvasprintf(GFP_KERNEL, fmt, ap);
+ va_end(ap);
+
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+ file = filp_open(p, oflags, mode);
+ kfree(p);
+ return file;
+}
+
+static int zloop_get_block_size(struct zloop_device *zlo,
+ struct zloop_zone *zone)
+{
+ struct block_device *sb_bdev = zone->file->f_mapping->host->i_sb->s_bdev;
+ struct kstat st;
+
+ /*
+ * If the FS block size is lower than or equal to 4K, use that as the
+ * device block size. Otherwise, fallback to the FS direct IO alignment
+ * constraint if that is provided, and to the FS underlying device
+ * physical block size if the direct IO alignment is unknown.
+ */
+ if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K)
+ zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize;
+ else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) &&
+ (st.result_mask & STATX_DIOALIGN))
+ zlo->block_size = st.dio_offset_align;
+ else if (sb_bdev)
+ zlo->block_size = bdev_physical_block_size(sb_bdev);
+ else
+ zlo->block_size = SECTOR_SIZE;
+
+ if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
+ pr_err("Zone capacity is not aligned to block size %u\n",
+ zlo->block_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts,
+ unsigned int zone_no, bool restore)
+{
+ struct zloop_zone *zone = &zlo->zones[zone_no];
+ int oflags = O_RDWR;
+ struct kstat stat;
+ sector_t file_sectors;
+ int ret;
+
+ mutex_init(&zone->lock);
+ zone->start = (sector_t)zone_no << zlo->zone_shift;
+
+ if (!restore)
+ oflags |= O_CREAT;
+
+ if (!opts->buffered_io)
+ oflags |= O_DIRECT;
+
+ if (zone_no < zlo->nr_conv_zones) {
+ /* Conventional zone file. */
+ set_bit(ZLOOP_ZONE_CONV, &zone->flags);
+ zone->cond = BLK_ZONE_COND_NOT_WP;
+ zone->wp = U64_MAX;
+
+ zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/cnv-%06u",
+ zlo->base_dir, zlo->id, zone_no);
+ if (IS_ERR(zone->file)) {
+ pr_err("Failed to open zone %u file %s/%u/cnv-%06u (err=%ld)",
+ zone_no, zlo->base_dir, zlo->id, zone_no,
+ PTR_ERR(zone->file));
+ return PTR_ERR(zone->file);
+ }
+
+ if (!zlo->block_size) {
+ ret = zloop_get_block_size(zlo, zone);
+ if (ret)
+ return ret;
+ }
+
+ ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0);
+ if (ret < 0) {
+ pr_err("Failed to get zone %u file stat\n", zone_no);
+ return ret;
+ }
+ file_sectors = stat.size >> SECTOR_SHIFT;
+
+ if (restore && file_sectors != zlo->zone_size) {
+ pr_err("Invalid conventional zone %u file size (%llu sectors != %llu)\n",
+ zone_no, file_sectors, zlo->zone_capacity);
+ return ret;
+ }
+
+ ret = vfs_truncate(&zone->file->f_path,
+ zlo->zone_size << SECTOR_SHIFT);
+ if (ret < 0) {
+ pr_err("Failed to truncate zone %u file (err=%d)\n",
+ zone_no, ret);
+ return ret;
+ }
+
+ return 0;
+ }
+
+ /* Sequential zone file. */
+ zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/seq-%06u",
+ zlo->base_dir, zlo->id, zone_no);
+ if (IS_ERR(zone->file)) {
+ pr_err("Failed to open zone %u file %s/%u/seq-%06u (err=%ld)",
+ zone_no, zlo->base_dir, zlo->id, zone_no,
+ PTR_ERR(zone->file));
+ return PTR_ERR(zone->file);
+ }
+
+ if (!zlo->block_size) {
+ ret = zloop_get_block_size(zlo, zone);
+ if (ret)
+ return ret;
+ }
+
+ zloop_get_block_size(zlo, zone);
+
+ mutex_lock(&zone->lock);
+ ret = zloop_update_seq_zone(zlo, zone_no);
+ mutex_unlock(&zone->lock);
+
+ return ret;
+}
+
+static bool zloop_dev_exists(struct zloop_device *zlo)
+{
+ struct file *cnv, *seq;
+ bool exists;
+
+ cnv = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/cnv-%06u",
+ zlo->base_dir, zlo->id, 0);
+ seq = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/seq-%06u",
+ zlo->base_dir, zlo->id, 0);
+ exists = !IS_ERR(cnv) || !IS_ERR(seq);
+
+ if (!IS_ERR(cnv))
+ fput(cnv);
+ if (!IS_ERR(seq))
+ fput(seq);
+
+ return exists;
+}
+
+static int zloop_ctl_add(struct zloop_options *opts)
+{
+ struct queue_limits lim = {
+ .max_hw_sectors = SZ_1M >> SECTOR_SHIFT,
+ .max_hw_zone_append_sectors = SZ_1M >> SECTOR_SHIFT,
+ .chunk_sectors = opts->zone_size,
+ .features = BLK_FEAT_ZONED,
+ };
+ unsigned int nr_zones, i, j;
+ struct zloop_device *zlo;
+ int ret = -EINVAL;
+ bool restore;
+
+ __module_get(THIS_MODULE);
+
+ nr_zones = opts->capacity >> ilog2(opts->zone_size);
+ if (opts->nr_conv_zones >= nr_zones) {
+ pr_err("Invalid number of conventional zones %u\n",
+ opts->nr_conv_zones);
+ goto out;
+ }
+
+ zlo = kvzalloc(struct_size(zlo, zones, nr_zones), GFP_KERNEL);
+ if (!zlo) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ zlo->state = Zlo_creating;
+
+ ret = mutex_lock_killable(&zloop_ctl_mutex);
+ if (ret)
+ goto out_free_dev;
+
+ /* Allocate id, if @opts->id >= 0, we're requesting that specific id */
+ if (opts->id >= 0) {
+ ret = idr_alloc(&zloop_index_idr, zlo,
+ opts->id, opts->id + 1, GFP_KERNEL);
+ if (ret == -ENOSPC)
+ ret = -EEXIST;
+ } else {
+ ret = idr_alloc(&zloop_index_idr, zlo, 0, 0, GFP_KERNEL);
+ }
+ mutex_unlock(&zloop_ctl_mutex);
+ if (ret < 0)
+ goto out_free_dev;
+
+ zlo->id = ret;
+ zlo->zone_shift = ilog2(opts->zone_size);
+ zlo->zone_size = opts->zone_size;
+ if (opts->zone_capacity)
+ zlo->zone_capacity = opts->zone_capacity;
+ else
+ zlo->zone_capacity = zlo->zone_size;
+ zlo->nr_zones = nr_zones;
+ zlo->nr_conv_zones = opts->nr_conv_zones;
+ zlo->buffered_io = opts->buffered_io;
+
+ zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE,
+ opts->nr_queues * opts->queue_depth, zlo->id);
+ if (!zlo->workqueue) {
+ ret = -ENOMEM;
+ goto out_free_idr;
+ }
+
+ if (opts->base_dir)
+ zlo->base_dir = kstrdup(opts->base_dir, GFP_KERNEL);
+ else
+ zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL);
+ if (!zlo->base_dir) {
+ ret = -ENOMEM;
+ goto out_destroy_workqueue;
+ }
+
+ zlo->data_dir = zloop_filp_open_fmt(O_RDONLY | O_DIRECTORY, 0, "%s/%u",
+ zlo->base_dir, zlo->id);
+ if (IS_ERR(zlo->data_dir)) {
+ ret = PTR_ERR(zlo->data_dir);
+ pr_warn("Failed to open directory %s/%u (err=%d)\n",
+ zlo->base_dir, zlo->id, ret);
+ goto out_free_base_dir;
+ }
+
+ /*
+ * If we already have zone files, we are restoring a device created by a
+ * previous add operation. In this case, zloop_init_zone() will check
+ * that the zone files are consistent with the zone configuration given.
+ */
+ restore = zloop_dev_exists(zlo);
+ for (i = 0; i < nr_zones; i++) {
+ ret = zloop_init_zone(zlo, opts, i, restore);
+ if (ret)
+ goto out_close_files;
+ }
+
+ lim.physical_block_size = zlo->block_size;
+ lim.logical_block_size = zlo->block_size;
+
+ zlo->tag_set.ops = &zloop_mq_ops;
+ zlo->tag_set.nr_hw_queues = opts->nr_queues;
+ zlo->tag_set.queue_depth = opts->queue_depth;
+ zlo->tag_set.numa_node = NUMA_NO_NODE;
+ zlo->tag_set.cmd_size = sizeof(struct zloop_cmd);
+ zlo->tag_set.driver_data = zlo;
+
+ ret = blk_mq_alloc_tag_set(&zlo->tag_set);
+ if (ret) {
+ pr_err("blk_mq_alloc_tag_set failed (err=%d)\n", ret);
+ goto out_close_files;
+ }
+
+ zlo->disk = blk_mq_alloc_disk(&zlo->tag_set, &lim, zlo);
+ if (IS_ERR(zlo->disk)) {
+ pr_err("blk_mq_alloc_disk failed (err=%d)\n", ret);
+ ret = PTR_ERR(zlo->disk);
+ goto out_cleanup_tags;
+ }
+ zlo->disk->flags = GENHD_FL_NO_PART;
+ zlo->disk->fops = &zloop_fops;
+ zlo->disk->private_data = zlo;
+ sprintf(zlo->disk->disk_name, "zloop%d", zlo->id);
+ set_capacity(zlo->disk, (u64)lim.chunk_sectors * zlo->nr_zones);
+
+ ret = blk_revalidate_disk_zones(zlo->disk);
+ if (ret)
+ goto out_cleanup_disk;
+
+ ret = add_disk(zlo->disk);
+ if (ret) {
+ pr_err("add_disk failed (err=%d)\n", ret);
+ goto out_cleanup_disk;
+ }
+
+ mutex_lock(&zloop_ctl_mutex);
+ zlo->state = Zlo_live;
+ mutex_unlock(&zloop_ctl_mutex);
+
+ pr_info("Added device %d: %u zones of %llu MB, %u B block size\n",
+ zlo->id, zlo->nr_zones,
+ ((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20,
+ zlo->block_size);
+
+ return 0;
+
+out_cleanup_disk:
+ put_disk(zlo->disk);
+out_cleanup_tags:
+ blk_mq_free_tag_set(&zlo->tag_set);
+out_close_files:
+ for (j = 0; j < i; j++) {
+ struct zloop_zone *zone = &zlo->zones[j];
+
+ if (!IS_ERR_OR_NULL(zone->file))
+ fput(zone->file);
+ }
+ fput(zlo->data_dir);
+out_free_base_dir:
+ kfree(zlo->base_dir);
+out_destroy_workqueue:
+ destroy_workqueue(zlo->workqueue);
+out_free_idr:
+ mutex_lock(&zloop_ctl_mutex);
+ idr_remove(&zloop_index_idr, zlo->id);
+ mutex_unlock(&zloop_ctl_mutex);
+out_free_dev:
+ kvfree(zlo);
+out:
+ module_put(THIS_MODULE);
+ if (ret == -ENOENT)
+ ret = -EINVAL;
+ return ret;
+}
+
+static int zloop_ctl_remove(struct zloop_options *opts)
+{
+ struct zloop_device *zlo;
+ int ret;
+
+ if (!(opts->mask & ZLOOP_OPT_ID)) {
+ pr_err("No ID specified\n");
+ return -EINVAL;
+ }
+
+ ret = mutex_lock_killable(&zloop_ctl_mutex);
+ if (ret)
+ return ret;
+
+ zlo = idr_find(&zloop_index_idr, opts->id);
+ if (!zlo || zlo->state == Zlo_creating) {
+ ret = -ENODEV;
+ } else if (zlo->state == Zlo_deleting) {
+ ret = -EINVAL;
+ } else {
+ idr_remove(&zloop_index_idr, zlo->id);
+ zlo->state = Zlo_deleting;
+ }
+
+ mutex_unlock(&zloop_ctl_mutex);
+ if (ret)
+ return ret;
+
+ del_gendisk(zlo->disk);
+ put_disk(zlo->disk);
+ blk_mq_free_tag_set(&zlo->tag_set);
+
+ pr_info("Removed device %d\n", opts->id);
+
+ module_put(THIS_MODULE);
+
+ return 0;
+}
+
+static int zloop_parse_options(struct zloop_options *opts, const char *buf)
+{
+ substring_t args[MAX_OPT_ARGS];
+ char *options, *o, *p;
+ unsigned int token;
+ int ret = 0;
+
+ /* Set defaults. */
+ opts->mask = 0;
+ opts->id = ZLOOP_DEF_ID;
+ opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES;
+ opts->zone_size = ZLOOP_DEF_ZONE_SIZE;
+ opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES;
+ opts->nr_queues = ZLOOP_DEF_NR_QUEUES;
+ opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH;
+ opts->buffered_io = ZLOOP_DEF_BUFFERED_IO;
+
+ if (!buf)
+ return 0;
+
+ /* Skip leading spaces before the options. */
+ while (isspace(*buf))
+ buf++;
+
+ options = o = kstrdup(buf, GFP_KERNEL);
+ if (!options)
+ return -ENOMEM;
+
+ /* Parse the options, doing only some light invalid value checks. */
+ while ((p = strsep(&o, ",\n")) != NULL) {
+ if (!*p)
+ continue;
+
+ token = match_token(p, zloop_opt_tokens, args);
+ opts->mask |= token;
+ switch (token) {
+ case ZLOOP_OPT_ID:
+ if (match_int(args, &opts->id)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case ZLOOP_OPT_CAPACITY:
+ if (match_uint(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!token) {
+ pr_err("Invalid capacity\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->capacity =
+ ((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
+ break;
+ case ZLOOP_OPT_ZONE_SIZE:
+ if (match_uint(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!token || token > ZLOOP_MAX_ZONE_SIZE_MB ||
+ !is_power_of_2(token)) {
+ pr_err("Invalid zone size %u\n", token);
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->zone_size =
+ ((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
+ break;
+ case ZLOOP_OPT_ZONE_CAPACITY:
+ if (match_uint(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!token) {
+ pr_err("Invalid zone capacity\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->zone_capacity =
+ ((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
+ break;
+ case ZLOOP_OPT_NR_CONV_ZONES:
+ if (match_uint(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->nr_conv_zones = token;
+ break;
+ case ZLOOP_OPT_BASE_DIR:
+ p = match_strdup(args);
+ if (!p) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ kfree(opts->base_dir);
+ opts->base_dir = p;
+ break;
+ case ZLOOP_OPT_NR_QUEUES:
+ if (match_uint(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!token) {
+ pr_err("Invalid number of queues\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->nr_queues = min(token, num_online_cpus());
+ break;
+ case ZLOOP_OPT_QUEUE_DEPTH:
+ if (match_uint(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!token) {
+ pr_err("Invalid queue depth\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->queue_depth = token;
+ break;
+ case ZLOOP_OPT_BUFFERED_IO:
+ opts->buffered_io = true;
+ break;
+ case ZLOOP_OPT_ERR:
+ default:
+ pr_warn("unknown parameter or missing value '%s'\n", p);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ ret = -EINVAL;
+ if (opts->capacity <= opts->zone_size) {
+ pr_err("Invalid capacity\n");
+ goto out;
+ }
+
+ if (opts->zone_capacity > opts->zone_size) {
+ pr_err("Invalid zone capacity\n");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ kfree(options);
+ return ret;
+}
+
+enum {
+ ZLOOP_CTL_ADD,
+ ZLOOP_CTL_REMOVE,
+};
+
+static struct zloop_ctl_op {
+ int code;
+ const char *name;
+} zloop_ctl_ops[] = {
+ { ZLOOP_CTL_ADD, "add" },
+ { ZLOOP_CTL_REMOVE, "remove" },
+ { -1, NULL },
+};
+
+static ssize_t zloop_ctl_write(struct file *file, const char __user *ubuf,
+ size_t count, loff_t *pos)
+{
+ struct zloop_options opts = { };
+ struct zloop_ctl_op *op;
+ const char *buf, *opts_buf;
+ int i, ret;
+
+ if (count > PAGE_SIZE)
+ return -ENOMEM;
+
+ buf = memdup_user_nul(ubuf, count);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
+ for (i = 0; i < ARRAY_SIZE(zloop_ctl_ops); i++) {
+ op = &zloop_ctl_ops[i];
+ if (!op->name) {
+ pr_err("Invalid operation\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!strncmp(buf, op->name, strlen(op->name)))
+ break;
+ }
+
+ if (count <= strlen(op->name))
+ opts_buf = NULL;
+ else
+ opts_buf = buf + strlen(op->name);
+
+ ret = zloop_parse_options(&opts, opts_buf);
+ if (ret) {
+ pr_err("Failed to parse options\n");
+ goto out;
+ }
+
+ switch (op->code) {
+ case ZLOOP_CTL_ADD:
+ ret = zloop_ctl_add(&opts);
+ break;
+ case ZLOOP_CTL_REMOVE:
+ ret = zloop_ctl_remove(&opts);
+ break;
+ default:
+ pr_err("Invalid operation\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+out:
+ kfree(opts.base_dir);
+ kfree(buf);
+ return ret ? ret : count;
+}
+
+static int zloop_ctl_show(struct seq_file *seq_file, void *private)
+{
+ const struct match_token *tok;
+ int i;
+
+ /* Add operation */
+ seq_printf(seq_file, "%s ", zloop_ctl_ops[0].name);
+ for (i = 0; i < ARRAY_SIZE(zloop_opt_tokens); i++) {
+ tok = &zloop_opt_tokens[i];
+ if (!tok->pattern)
+ break;
+ if (i)
+ seq_putc(seq_file, ',');
+ seq_puts(seq_file, tok->pattern);
+ }
+ seq_putc(seq_file, '\n');
+
+ /* Remove operation */
+ seq_puts(seq_file, zloop_ctl_ops[1].name);
+ seq_puts(seq_file, " id=%d\n");
+
+ return 0;
+}
+
+static int zloop_ctl_open(struct inode *inode, struct file *file)
+{
+ file->private_data = NULL;
+ return single_open(file, zloop_ctl_show, NULL);
+}
+
+static int zloop_ctl_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations zloop_ctl_fops = {
+ .owner = THIS_MODULE,
+ .open = zloop_ctl_open,
+ .release = zloop_ctl_release,
+ .write = zloop_ctl_write,
+ .read = seq_read,
+};
+
+static struct miscdevice zloop_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "zloop-control",
+ .fops = &zloop_ctl_fops,
+};
+
+static int __init zloop_init(void)
+{
+ int ret;
+
+ ret = misc_register(&zloop_misc);
+ if (ret) {
+ pr_err("Failed to register misc device: %d\n", ret);
+ return ret;
+ }
+ pr_info("Module loaded\n");
+
+ return 0;
+}
+
+static void __exit zloop_exit(void)
+{
+ misc_deregister(&zloop_misc);
+ idr_destroy(&zloop_index_idr);
+}
+
+module_init(zloop_init);
+module_exit(zloop_exit);
+
+MODULE_DESCRIPTION("Zoned loopback device");
+MODULE_LICENSE("GPL");
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index a42dedb78e0a..256b451bbe06 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -3014,9 +3014,8 @@ static void btusb_coredump_qca(struct hci_dev *hdev)
static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb)
{
int ret = 0;
+ unsigned int skip = 0;
u8 pkt_type;
- u8 *sk_ptr;
- unsigned int sk_len;
u16 seqno;
u32 dump_size;
@@ -3025,18 +3024,13 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb)
struct usb_device *udev = btdata->udev;
pkt_type = hci_skb_pkt_type(skb);
- sk_ptr = skb->data;
- sk_len = skb->len;
+ skip = sizeof(struct hci_event_hdr);
+ if (pkt_type == HCI_ACLDATA_PKT)
+ skip += sizeof(struct hci_acl_hdr);
- if (pkt_type == HCI_ACLDATA_PKT) {
- sk_ptr += HCI_ACL_HDR_SIZE;
- sk_len -= HCI_ACL_HDR_SIZE;
- }
-
- sk_ptr += HCI_EVENT_HDR_SIZE;
- sk_len -= HCI_EVENT_HDR_SIZE;
+ skb_pull(skb, skip);
+ dump_hdr = (struct qca_dump_hdr *)skb->data;
- dump_hdr = (struct qca_dump_hdr *)sk_ptr;
seqno = le16_to_cpu(dump_hdr->seqno);
if (seqno == 0) {
set_bit(BTUSB_HW_SSR_ACTIVE, &btdata->flags);
@@ -3056,16 +3050,15 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb)
btdata->qca_dump.ram_dump_size = dump_size;
btdata->qca_dump.ram_dump_seqno = 0;
- sk_ptr += offsetof(struct qca_dump_hdr, data0);
- sk_len -= offsetof(struct qca_dump_hdr, data0);
+
+ skb_pull(skb, offsetof(struct qca_dump_hdr, data0));
usb_disable_autosuspend(udev);
bt_dev_info(hdev, "%s memdump size(%u)\n",
(pkt_type == HCI_ACLDATA_PKT) ? "ACL" : "event",
dump_size);
} else {
- sk_ptr += offsetof(struct qca_dump_hdr, data);
- sk_len -= offsetof(struct qca_dump_hdr, data);
+ skb_pull(skb, offsetof(struct qca_dump_hdr, data));
}
if (!btdata->qca_dump.ram_dump_size) {
@@ -3085,7 +3078,6 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb)
return ret;
}
- skb_pull(skb, skb->len - sk_len);
hci_devcd_append(hdev, skb);
btdata->qca_dump.ram_dump_seqno++;
if (seqno == QCA_LAST_SEQUENCE_NUM) {
@@ -3113,68 +3105,58 @@ out:
/* Return: true if the ACL packet is a dump packet, false otherwise. */
static bool acl_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb)
{
- u8 *sk_ptr;
- unsigned int sk_len;
-
struct hci_event_hdr *event_hdr;
struct hci_acl_hdr *acl_hdr;
struct qca_dump_hdr *dump_hdr;
+ struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
+ bool is_dump = false;
- sk_ptr = skb->data;
- sk_len = skb->len;
-
- acl_hdr = hci_acl_hdr(skb);
- if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE)
+ if (!clone)
return false;
- sk_ptr += HCI_ACL_HDR_SIZE;
- sk_len -= HCI_ACL_HDR_SIZE;
- event_hdr = (struct hci_event_hdr *)sk_ptr;
-
- if ((event_hdr->evt != HCI_VENDOR_PKT) ||
- (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE)))
- return false;
+ acl_hdr = skb_pull_data(clone, sizeof(*acl_hdr));
+ if (!acl_hdr || (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE))
+ goto out;
- sk_ptr += HCI_EVENT_HDR_SIZE;
- sk_len -= HCI_EVENT_HDR_SIZE;
+ event_hdr = skb_pull_data(clone, sizeof(*event_hdr));
+ if (!event_hdr || (event_hdr->evt != HCI_VENDOR_PKT))
+ goto out;
- dump_hdr = (struct qca_dump_hdr *)sk_ptr;
- if ((sk_len < offsetof(struct qca_dump_hdr, data)) ||
- (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) ||
- (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE))
- return false;
+ dump_hdr = skb_pull_data(clone, sizeof(*dump_hdr));
+ if (!dump_hdr || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) ||
+ (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE))
+ goto out;
- return true;
+ is_dump = true;
+out:
+ consume_skb(clone);
+ return is_dump;
}
/* Return: true if the event packet is a dump packet, false otherwise. */
static bool evt_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb)
{
- u8 *sk_ptr;
- unsigned int sk_len;
-
struct hci_event_hdr *event_hdr;
struct qca_dump_hdr *dump_hdr;
+ struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
+ bool is_dump = false;
- sk_ptr = skb->data;
- sk_len = skb->len;
-
- event_hdr = hci_event_hdr(skb);
-
- if ((event_hdr->evt != HCI_VENDOR_PKT)
- || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE)))
+ if (!clone)
return false;
- sk_ptr += HCI_EVENT_HDR_SIZE;
- sk_len -= HCI_EVENT_HDR_SIZE;
+ event_hdr = skb_pull_data(clone, sizeof(*event_hdr));
+ if (!event_hdr || (event_hdr->evt != HCI_VENDOR_PKT))
+ goto out;
- dump_hdr = (struct qca_dump_hdr *)sk_ptr;
- if ((sk_len < offsetof(struct qca_dump_hdr, data)) ||
- (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) ||
- (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE))
- return false;
+ dump_hdr = skb_pull_data(clone, sizeof(*dump_hdr));
+ if (!dump_hdr || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) ||
+ (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE))
+ goto out;
- return true;
+ is_dump = true;
+out:
+ consume_skb(clone);
+ return is_dump;
}
static int btusb_recv_acl_qca(struct hci_dev *hdev, struct sk_buff *skb)
diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c
index 1e57ebfb7622..6c3e5c5dae10 100644
--- a/drivers/bus/moxtet.c
+++ b/drivers/bus/moxtet.c
@@ -737,9 +737,9 @@ static int moxtet_irq_setup(struct moxtet *moxtet)
{
int i, ret;
- moxtet->irq.domain = irq_domain_add_simple(moxtet->dev->of_node,
- MOXTET_NIRQS, 0,
- &moxtet_irq_domain, moxtet);
+ moxtet->irq.domain = irq_domain_create_simple(of_fwnode_handle(moxtet->dev->of_node),
+ MOXTET_NIRQS, 0,
+ &moxtet_irq_domain, moxtet);
if (moxtet->irq.domain == NULL) {
dev_err(moxtet->dev, "Could not add IRQ domain\n");
return -ENOMEM;
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index b163e043c687..21a10552da61 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3677,8 +3677,7 @@ static void cdrom_sysctl_register(void)
static void cdrom_sysctl_unregister(void)
{
- if (cdrom_sysctl_header)
- unregister_sysctl_table(cdrom_sysctl_header);
+ unregister_sysctl_table(cdrom_sysctl_header);
}
#else /* CONFIG_SYSCTL */
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 8e41731d3642..bf490967241a 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -16,7 +16,7 @@
#include <linux/mmzone.h>
#include <asm/page.h> /* PAGE_SIZE */
#include <asm/e820/api.h>
-#include <asm/amd_nb.h>
+#include <asm/amd/nb.h>
#include <asm/gart.h>
#include "agp.h"
diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c
index e424360fb4a1..4787391bb6b4 100644
--- a/drivers/char/agp/nvidia-agp.c
+++ b/drivers/char/agp/nvidia-agp.c
@@ -11,6 +11,7 @@
#include <linux/page-flags.h>
#include <linux/mm.h>
#include <linux/jiffies.h>
+#include <asm/msr.h>
#include "agp.h"
/* NVIDIA registers */
diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index 143406bc6939..d2b00458761e 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -37,6 +37,7 @@ struct atmel_trng {
struct clk *clk;
void __iomem *base;
struct hwrng rng;
+ struct device *dev;
bool has_half_rate;
};
@@ -59,9 +60,9 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max,
u32 *data = buf;
int ret;
- ret = pm_runtime_get_sync((struct device *)trng->rng.priv);
+ ret = pm_runtime_get_sync(trng->dev);
if (ret < 0) {
- pm_runtime_put_sync((struct device *)trng->rng.priv);
+ pm_runtime_put_sync(trng->dev);
return ret;
}
@@ -79,8 +80,8 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max,
ret = 4;
out:
- pm_runtime_mark_last_busy((struct device *)trng->rng.priv);
- pm_runtime_put_sync_autosuspend((struct device *)trng->rng.priv);
+ pm_runtime_mark_last_busy(trng->dev);
+ pm_runtime_put_sync_autosuspend(trng->dev);
return ret;
}
@@ -134,9 +135,9 @@ static int atmel_trng_probe(struct platform_device *pdev)
return -ENODEV;
trng->has_half_rate = data->has_half_rate;
+ trng->dev = &pdev->dev;
trng->rng.name = pdev->name;
trng->rng.read = atmel_trng_read;
- trng->rng.priv = (unsigned long)&pdev->dev;
platform_set_drvdata(pdev, trng);
#ifndef CONFIG_PM
diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c
index 1e3048f2bb38..b7fa1bc1122b 100644
--- a/drivers/char/hw_random/mtk-rng.c
+++ b/drivers/char/hw_random/mtk-rng.c
@@ -36,6 +36,7 @@ struct mtk_rng {
void __iomem *base;
struct clk *clk;
struct hwrng rng;
+ struct device *dev;
};
static int mtk_rng_init(struct hwrng *rng)
@@ -85,7 +86,7 @@ static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
struct mtk_rng *priv = to_mtk_rng(rng);
int retval = 0;
- pm_runtime_get_sync((struct device *)priv->rng.priv);
+ pm_runtime_get_sync(priv->dev);
while (max >= sizeof(u32)) {
if (!mtk_rng_wait_ready(rng, wait))
@@ -97,8 +98,8 @@ static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
max -= sizeof(u32);
}
- pm_runtime_mark_last_busy((struct device *)priv->rng.priv);
- pm_runtime_put_sync_autosuspend((struct device *)priv->rng.priv);
+ pm_runtime_mark_last_busy(priv->dev);
+ pm_runtime_put_sync_autosuspend(priv->dev);
return retval || !wait ? retval : -EIO;
}
@@ -112,13 +113,13 @@ static int mtk_rng_probe(struct platform_device *pdev)
if (!priv)
return -ENOMEM;
+ priv->dev = &pdev->dev;
priv->rng.name = pdev->name;
#ifndef CONFIG_PM
priv->rng.init = mtk_rng_init;
priv->rng.cleanup = mtk_rng_cleanup;
#endif
priv->rng.read = mtk_rng_read;
- priv->rng.priv = (unsigned long)&pdev->dev;
priv->rng.quality = 900;
priv->clk = devm_clk_get(&pdev->dev, "rng");
diff --git a/drivers/char/hw_random/npcm-rng.c b/drivers/char/hw_random/npcm-rng.c
index 9ff00f096f38..3e308c890bd2 100644
--- a/drivers/char/hw_random/npcm-rng.c
+++ b/drivers/char/hw_random/npcm-rng.c
@@ -32,6 +32,7 @@
struct npcm_rng {
void __iomem *base;
struct hwrng rng;
+ struct device *dev;
u32 clkp;
};
@@ -57,7 +58,7 @@ static int npcm_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
int retval = 0;
int ready;
- pm_runtime_get_sync((struct device *)priv->rng.priv);
+ pm_runtime_get_sync(priv->dev);
while (max) {
if (wait) {
@@ -79,8 +80,8 @@ static int npcm_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
max--;
}
- pm_runtime_mark_last_busy((struct device *)priv->rng.priv);
- pm_runtime_put_sync_autosuspend((struct device *)priv->rng.priv);
+ pm_runtime_mark_last_busy(priv->dev);
+ pm_runtime_put_sync_autosuspend(priv->dev);
return retval || !wait ? retval : -EIO;
}
@@ -109,7 +110,7 @@ static int npcm_rng_probe(struct platform_device *pdev)
#endif
priv->rng.name = pdev->name;
priv->rng.read = npcm_rng_read;
- priv->rng.priv = (unsigned long)&pdev->dev;
+ priv->dev = &pdev->dev;
priv->clkp = (u32)(uintptr_t)of_device_get_match_data(&pdev->dev);
writel(NPCM_RNG_M1ROSEL, priv->base + NPCM_RNGMODE_REG);
diff --git a/drivers/char/hw_random/rockchip-rng.c b/drivers/char/hw_random/rockchip-rng.c
index 161050591663..fb4a30b95507 100644
--- a/drivers/char/hw_random/rockchip-rng.c
+++ b/drivers/char/hw_random/rockchip-rng.c
@@ -93,6 +93,30 @@
#define TRNG_v1_VERSION_CODE 0x46bc
/* end of TRNG_V1 register definitions */
+/*
+ * RKRNG register definitions
+ * The RKRNG IP is a stand-alone TRNG implementation (not part of a crypto IP)
+ * and can be found in the Rockchip RK3576, Rockchip RK3562 and Rockchip RK3528
+ * SoCs. It can either output true randomness (TRNG) or "deterministic"
+ * randomness derived from hashing the true entropy (DRNG). This driver
+ * implementation uses just the true entropy, and leaves stretching the entropy
+ * up to Linux.
+ */
+#define RKRNG_CFG 0x0000
+#define RKRNG_CTRL 0x0010
+#define RKRNG_CTRL_REQ_TRNG BIT(4)
+#define RKRNG_STATE 0x0014
+#define RKRNG_STATE_TRNG_RDY BIT(4)
+#define RKRNG_TRNG_DATA0 0x0050
+#define RKRNG_TRNG_DATA1 0x0054
+#define RKRNG_TRNG_DATA2 0x0058
+#define RKRNG_TRNG_DATA3 0x005C
+#define RKRNG_TRNG_DATA4 0x0060
+#define RKRNG_TRNG_DATA5 0x0064
+#define RKRNG_TRNG_DATA6 0x0068
+#define RKRNG_TRNG_DATA7 0x006C
+#define RKRNG_READ_LEN 32
+
/* Before removing this assert, give rk3588_rng_read an upper bound of 32 */
static_assert(RK_RNG_MAX_BYTE <= (TRNG_V1_RAND7 + 4 - TRNG_V1_RAND0),
"You raised RK_RNG_MAX_BYTE and broke rk3588-rng, congrats.");
@@ -205,6 +229,46 @@ out:
return (ret < 0) ? ret : to_read;
}
+static int rk3576_rng_init(struct hwrng *rng)
+{
+ struct rk_rng *rk_rng = container_of(rng, struct rk_rng, rng);
+
+ return rk_rng_enable_clks(rk_rng);
+}
+
+static int rk3576_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
+{
+ struct rk_rng *rk_rng = container_of(rng, struct rk_rng, rng);
+ size_t to_read = min_t(size_t, max, RKRNG_READ_LEN);
+ int ret = 0;
+ u32 val;
+
+ ret = pm_runtime_resume_and_get(rk_rng->dev);
+ if (ret < 0)
+ return ret;
+
+ rk_rng_writel(rk_rng, RKRNG_CTRL_REQ_TRNG | (RKRNG_CTRL_REQ_TRNG << 16),
+ RKRNG_CTRL);
+
+ if (readl_poll_timeout(rk_rng->base + RKRNG_STATE, val,
+ (val & RKRNG_STATE_TRNG_RDY), RK_RNG_POLL_PERIOD_US,
+ RK_RNG_POLL_TIMEOUT_US)) {
+ dev_err(rk_rng->dev, "timed out waiting for data\n");
+ ret = -ETIMEDOUT;
+ goto out;
+ }
+
+ rk_rng_writel(rk_rng, RKRNG_STATE_TRNG_RDY, RKRNG_STATE);
+
+ memcpy_fromio(buf, rk_rng->base + RKRNG_TRNG_DATA0, to_read);
+
+out:
+ pm_runtime_mark_last_busy(rk_rng->dev);
+ pm_runtime_put_sync_autosuspend(rk_rng->dev);
+
+ return (ret < 0) ? ret : to_read;
+}
+
static int rk3588_rng_init(struct hwrng *rng)
{
struct rk_rng *rk_rng = container_of(rng, struct rk_rng, rng);
@@ -305,6 +369,14 @@ static const struct rk_rng_soc_data rk3568_soc_data = {
.reset_optional = false,
};
+static const struct rk_rng_soc_data rk3576_soc_data = {
+ .rk_rng_init = rk3576_rng_init,
+ .rk_rng_read = rk3576_rng_read,
+ .rk_rng_cleanup = rk3588_rng_cleanup,
+ .quality = 999, /* as determined by actual testing */
+ .reset_optional = true,
+};
+
static const struct rk_rng_soc_data rk3588_soc_data = {
.rk_rng_init = rk3588_rng_init,
.rk_rng_read = rk3588_rng_read,
@@ -397,6 +469,7 @@ static const struct dev_pm_ops rk_rng_pm_ops = {
static const struct of_device_id rk_rng_dt_match[] = {
{ .compatible = "rockchip,rk3568-rng", .data = (void *)&rk3568_soc_data },
+ { .compatible = "rockchip,rk3576-rng", .data = (void *)&rk3576_soc_data },
{ .compatible = "rockchip,rk3588-rng", .data = (void *)&rk3588_soc_data },
{ /* sentinel */ },
};
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 169eed162a7f..48839958b0b1 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -61,29 +61,11 @@ static inline int page_is_allowed(unsigned long pfn)
{
return devmem_is_allowed(pfn);
}
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
-{
- u64 from = ((u64)pfn) << PAGE_SHIFT;
- u64 to = from + size;
- u64 cursor = from;
-
- while (cursor < to) {
- if (!devmem_is_allowed(pfn))
- return 0;
- cursor += PAGE_SIZE;
- pfn++;
- }
- return 1;
-}
#else
static inline int page_is_allowed(unsigned long pfn)
{
return 1;
}
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
-{
- return 1;
-}
#endif
static inline bool should_stop_iteration(void)
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 38f2fab29c56..b8b24b6ed3fe 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -309,11 +309,11 @@ static void crng_reseed(struct work_struct *work)
* key value, at index 4, so the state should always be zeroed out
* immediately after using in order to maintain forward secrecy.
* If the state cannot be erased in a timely manner, then it is
- * safer to set the random_data parameter to &chacha_state[4] so
- * that this function overwrites it before returning.
+ * safer to set the random_data parameter to &chacha_state->x[4]
+ * so that this function overwrites it before returning.
*/
static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
- u32 chacha_state[CHACHA_STATE_WORDS],
+ struct chacha_state *chacha_state,
u8 *random_data, size_t random_data_len)
{
u8 first_block[CHACHA_BLOCK_SIZE];
@@ -321,8 +321,8 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
BUG_ON(random_data_len > 32);
chacha_init_consts(chacha_state);
- memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE);
- memset(&chacha_state[12], 0, sizeof(u32) * 4);
+ memcpy(&chacha_state->x[4], key, CHACHA_KEY_SIZE);
+ memset(&chacha_state->x[12], 0, sizeof(u32) * 4);
chacha20_block(chacha_state, first_block);
memcpy(key, first_block, CHACHA_KEY_SIZE);
@@ -335,7 +335,7 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
* random data. It also returns up to 32 bytes on its own of random data
* that may be used; random_data_len may not be greater than 32.
*/
-static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
+static void crng_make_state(struct chacha_state *chacha_state,
u8 *random_data, size_t random_data_len)
{
unsigned long flags;
@@ -395,7 +395,7 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
static void _get_random_bytes(void *buf, size_t len)
{
- u32 chacha_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha_state;
u8 tmp[CHACHA_BLOCK_SIZE];
size_t first_block_len;
@@ -403,26 +403,26 @@ static void _get_random_bytes(void *buf, size_t len)
return;
first_block_len = min_t(size_t, 32, len);
- crng_make_state(chacha_state, buf, first_block_len);
+ crng_make_state(&chacha_state, buf, first_block_len);
len -= first_block_len;
buf += first_block_len;
while (len) {
if (len < CHACHA_BLOCK_SIZE) {
- chacha20_block(chacha_state, tmp);
+ chacha20_block(&chacha_state, tmp);
memcpy(buf, tmp, len);
memzero_explicit(tmp, sizeof(tmp));
break;
}
- chacha20_block(chacha_state, buf);
- if (unlikely(chacha_state[12] == 0))
- ++chacha_state[13];
+ chacha20_block(&chacha_state, buf);
+ if (unlikely(chacha_state.x[12] == 0))
+ ++chacha_state.x[13];
len -= CHACHA_BLOCK_SIZE;
buf += CHACHA_BLOCK_SIZE;
}
- memzero_explicit(chacha_state, sizeof(chacha_state));
+ chacha_zeroize_state(&chacha_state);
}
/*
@@ -441,7 +441,7 @@ EXPORT_SYMBOL(get_random_bytes);
static ssize_t get_random_bytes_user(struct iov_iter *iter)
{
- u32 chacha_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha_state;
u8 block[CHACHA_BLOCK_SIZE];
size_t ret = 0, copied;
@@ -453,21 +453,22 @@ static ssize_t get_random_bytes_user(struct iov_iter *iter)
* bytes, in case userspace causes copy_to_iter() below to sleep
* forever, so that we still retain forward secrecy in that case.
*/
- crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE);
+ crng_make_state(&chacha_state, (u8 *)&chacha_state.x[4],
+ CHACHA_KEY_SIZE);
/*
* However, if we're doing a read of len <= 32, we don't need to
* use chacha_state after, so we can simply return those bytes to
* the user directly.
*/
if (iov_iter_count(iter) <= CHACHA_KEY_SIZE) {
- ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, iter);
+ ret = copy_to_iter(&chacha_state.x[4], CHACHA_KEY_SIZE, iter);
goto out_zero_chacha;
}
for (;;) {
- chacha20_block(chacha_state, block);
- if (unlikely(chacha_state[12] == 0))
- ++chacha_state[13];
+ chacha20_block(&chacha_state, block);
+ if (unlikely(chacha_state.x[12] == 0))
+ ++chacha_state.x[13];
copied = copy_to_iter(block, sizeof(block), iter);
ret += copied;
@@ -484,7 +485,7 @@ static ssize_t get_random_bytes_user(struct iov_iter *iter)
memzero_explicit(block, sizeof(block));
out_zero_chacha:
- memzero_explicit(chacha_state, sizeof(chacha_state));
+ chacha_zeroize_state(&chacha_state);
return ret ? ret : -EFAULT;
}
@@ -726,6 +727,7 @@ static void __cold _credit_init_bits(size_t bits)
static DECLARE_WORK(set_ready, crng_set_ready);
unsigned int new, orig, add;
unsigned long flags;
+ int m;
if (!bits)
return;
@@ -748,9 +750,9 @@ static void __cold _credit_init_bits(size_t bits)
wake_up_interruptible(&crng_init_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
pr_notice("crng init done\n");
- if (urandom_warning.missed)
- pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
- urandom_warning.missed);
+ m = ratelimit_state_get_miss(&urandom_warning);
+ if (m)
+ pr_notice("%d urandom warning(s) missed due to ratelimiting\n", m);
} else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) {
spin_lock_irqsave(&base_crng.lock, flags);
/* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */
@@ -1311,9 +1313,9 @@ static void __cold try_to_generate_entropy(void)
while (!crng_ready() && !signal_pending(current)) {
/*
* Check !timer_pending() and then ensure that any previous callback has finished
- * executing by checking try_to_del_timer_sync(), before queueing the next one.
+ * executing by checking timer_delete_sync_try(), before queueing the next one.
*/
- if (!timer_pending(&stack->timer) && try_to_del_timer_sync(&stack->timer) >= 0) {
+ if (!timer_pending(&stack->timer) && timer_delete_sync_try(&stack->timer) >= 0) {
struct cpumask timer_cpus;
unsigned int num_cpus;
@@ -1353,7 +1355,7 @@ static void __cold try_to_generate_entropy(void)
mix_pool_bytes(&stack->entropy, sizeof(stack->entropy));
timer_delete_sync(&stack->timer);
- destroy_timer_on_stack(&stack->timer);
+ timer_destroy_on_stack(&stack->timer);
}
@@ -1466,7 +1468,7 @@ static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter)
if (!crng_ready()) {
if (!ratelimit_disable && maxwarn <= 0)
- ++urandom_warning.missed;
+ ratelimit_state_inc_miss(&urandom_warning);
else if (ratelimit_disable || __ratelimit(&urandom_warning)) {
--maxwarn;
pr_notice("%s: uninitialized urandom read (%zu bytes read)\n",
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index fe4f3a609934..dddd702b2454 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -234,5 +234,15 @@ config TCG_FTPM_TEE
help
This driver proxies for firmware TPM running in TEE.
+config TCG_SVSM
+ tristate "SNP SVSM vTPM interface"
+ depends on AMD_MEM_ENCRYPT
+ help
+ This is a driver for the AMD SVSM vTPM protocol that a SEV-SNP guest
+ OS can use to discover and talk to a vTPM emulated by the Secure VM
+ Service Module (SVSM) in the guest context, but at a more privileged
+ level (usually VMPL0). To compile this driver as a module, choose M
+ here; the module will be called tpm_svsm.
+
source "drivers/char/tpm/st33zp24/Kconfig"
endif # TCG_TPM
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index 2b004df8c04b..9de1b3ea34a9 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -45,3 +45,4 @@ obj-$(CONFIG_TCG_CRB) += tpm_crb.o
obj-$(CONFIG_TCG_ARM_CRB_FFA) += tpm_crb_ffa.o
obj-$(CONFIG_TCG_VTPM_PROXY) += tpm_vtpm_proxy.o
obj-$(CONFIG_TCG_FTPM_TEE) += tpm_ftpm_tee.o
+obj-$(CONFIG_TCG_SVSM) += tpm_svsm.o
diff --git a/drivers/char/tpm/eventlog/tpm1.c b/drivers/char/tpm/eventlog/tpm1.c
index 12ee42a31c71..e7913b2853d5 100644
--- a/drivers/char/tpm/eventlog/tpm1.c
+++ b/drivers/char/tpm/eventlog/tpm1.c
@@ -257,11 +257,8 @@ static int tpm1_ascii_bios_measurements_show(struct seq_file *m, void *v)
(unsigned char *)(v + sizeof(struct tcpa_event));
eventname = kmalloc(MAX_TEXT_EVENT, GFP_KERNEL);
- if (!eventname) {
- printk(KERN_ERR "%s: ERROR - No Memory for event name\n ",
- __func__);
- return -EFAULT;
- }
+ if (!eventname)
+ return -ENOMEM;
/* 1st: PCR */
seq_printf(m, "%2d ", do_endian_conversion(event->pcr_index));
diff --git a/drivers/char/tpm/tpm-buf.c b/drivers/char/tpm/tpm-buf.c
index e49a19fea3bd..dc882fc9fa9e 100644
--- a/drivers/char/tpm/tpm-buf.c
+++ b/drivers/char/tpm/tpm-buf.c
@@ -201,7 +201,7 @@ static void tpm_buf_read(struct tpm_buf *buf, off_t *offset, size_t count, void
*/
u8 tpm_buf_read_u8(struct tpm_buf *buf, off_t *offset)
{
- u8 value;
+ u8 value = 0;
tpm_buf_read(buf, offset, sizeof(value), &value);
@@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(tpm_buf_read_u8);
*/
u16 tpm_buf_read_u16(struct tpm_buf *buf, off_t *offset)
{
- u16 value;
+ u16 value = 0;
tpm_buf_read(buf, offset, sizeof(value), &value);
@@ -235,7 +235,7 @@ EXPORT_SYMBOL_GPL(tpm_buf_read_u16);
*/
u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset)
{
- u32 value;
+ u32 value = 0;
tpm_buf_read(buf, offset, sizeof(value), &value);
diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index 3f89635ba5e8..7b5049b3d476 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -40,11 +40,6 @@
*
* These are the usage functions:
*
- * tpm2_start_auth_session() which allocates the opaque auth structure
- * and gets a session from the TPM. This must be called before
- * any of the following functions. The session is protected by a
- * session_key which is derived from a random salt value
- * encrypted to the NULL seed.
* tpm2_end_auth_session() kills the session and frees the resources.
* Under normal operation this function is done by
* tpm_buf_check_hmac_response(), so this is only to be used on
@@ -963,16 +958,13 @@ err:
}
/**
- * tpm2_start_auth_session() - create a HMAC authentication session with the TPM
- * @chip: the TPM chip structure to create the session with
+ * tpm2_start_auth_session() - Create an a HMAC authentication session
+ * @chip: A TPM chip
*
- * This function loads the NULL seed from its saved context and starts
- * an authentication session on the null seed, fills in the
- * @chip->auth structure to contain all the session details necessary
- * for performing the HMAC, encrypt and decrypt operations and
- * returns. The NULL seed is flushed before this function returns.
+ * Loads the ephemeral key (null seed), and starts an HMAC authenticated
+ * session. The null seed is flushed before the return.
*
- * Return: zero on success or actual error encountered.
+ * Returns zero on success, or a POSIX error code.
*/
int tpm2_start_auth_session(struct tpm_chip *chip)
{
@@ -1024,7 +1016,7 @@ int tpm2_start_auth_session(struct tpm_chip *chip)
/* hash algorithm for session */
tpm_buf_append_u16(&buf, TPM_ALG_SHA256);
- rc = tpm_transmit_cmd(chip, &buf, 0, "start auth session");
+ rc = tpm_ret_to_err(tpm_transmit_cmd(chip, &buf, 0, "StartAuthSession"));
tpm2_flush_context(chip, null_key);
if (rc == TPM2_RC_SUCCESS)
diff --git a/drivers/char/tpm/tpm_crb_ffa.c b/drivers/char/tpm/tpm_crb_ffa.c
index 3169a87a56b6..4ead61f01299 100644
--- a/drivers/char/tpm/tpm_crb_ffa.c
+++ b/drivers/char/tpm/tpm_crb_ffa.c
@@ -38,9 +38,11 @@
* messages.
*
* All requests with FFA_MSG_SEND_DIRECT_REQ and FFA_MSG_SEND_DIRECT_RESP
- * are using the AArch32 SMC calling convention with register usage as
- * defined in FF-A specification:
- * w0: Function ID (0x8400006F or 0x84000070)
+ * are using the AArch32 or AArch64 SMC calling convention with register usage
+ * as defined in FF-A specification:
+ * w0: Function ID
+ * -for 32-bit: 0x8400006F or 0x84000070
+ * -for 64-bit: 0xC400006F or 0xC4000070
* w1: Source/Destination IDs
* w2: Reserved (MBZ)
* w3-w7: Implementation defined, free to be used below
@@ -68,7 +70,8 @@
#define CRB_FFA_GET_INTERFACE_VERSION 0x0f000001
/*
- * Return information on a given feature of the TPM service
+ * Notifies the TPM service that a TPM command or TPM locality request is
+ * ready to be processed, and allows the TPM service to process it.
* Call register usage:
* w3: Not used (MBZ)
* w4: TPM service function ID, CRB_FFA_START
@@ -105,7 +108,10 @@ struct tpm_crb_ffa {
u16 minor_version;
/* lock to protect sending of FF-A messages: */
struct mutex msg_data_lock;
- struct ffa_send_direct_data direct_msg_data;
+ union {
+ struct ffa_send_direct_data direct_msg_data;
+ struct ffa_send_direct_data2 direct_msg_data2;
+ };
};
static struct tpm_crb_ffa *tpm_crb_ffa;
@@ -185,18 +191,34 @@ static int __tpm_crb_ffa_send_recieve(unsigned long func_id,
msg_ops = tpm_crb_ffa->ffa_dev->ops->msg_ops;
- memset(&tpm_crb_ffa->direct_msg_data, 0x00,
- sizeof(struct ffa_send_direct_data));
-
- tpm_crb_ffa->direct_msg_data.data1 = func_id;
- tpm_crb_ffa->direct_msg_data.data2 = a0;
- tpm_crb_ffa->direct_msg_data.data3 = a1;
- tpm_crb_ffa->direct_msg_data.data4 = a2;
+ if (ffa_partition_supports_direct_req2_recv(tpm_crb_ffa->ffa_dev)) {
+ memset(&tpm_crb_ffa->direct_msg_data2, 0x00,
+ sizeof(struct ffa_send_direct_data2));
+
+ tpm_crb_ffa->direct_msg_data2.data[0] = func_id;
+ tpm_crb_ffa->direct_msg_data2.data[1] = a0;
+ tpm_crb_ffa->direct_msg_data2.data[2] = a1;
+ tpm_crb_ffa->direct_msg_data2.data[3] = a2;
+
+ ret = msg_ops->sync_send_receive2(tpm_crb_ffa->ffa_dev,
+ &tpm_crb_ffa->direct_msg_data2);
+ if (!ret)
+ ret = tpm_crb_ffa_to_linux_errno(tpm_crb_ffa->direct_msg_data2.data[0]);
+ } else {
+ memset(&tpm_crb_ffa->direct_msg_data, 0x00,
+ sizeof(struct ffa_send_direct_data));
+
+ tpm_crb_ffa->direct_msg_data.data1 = func_id;
+ tpm_crb_ffa->direct_msg_data.data2 = a0;
+ tpm_crb_ffa->direct_msg_data.data3 = a1;
+ tpm_crb_ffa->direct_msg_data.data4 = a2;
+
+ ret = msg_ops->sync_send_receive(tpm_crb_ffa->ffa_dev,
+ &tpm_crb_ffa->direct_msg_data);
+ if (!ret)
+ ret = tpm_crb_ffa_to_linux_errno(tpm_crb_ffa->direct_msg_data.data1);
+ }
- ret = msg_ops->sync_send_receive(tpm_crb_ffa->ffa_dev,
- &tpm_crb_ffa->direct_msg_data);
- if (!ret)
- ret = tpm_crb_ffa_to_linux_errno(tpm_crb_ffa->direct_msg_data.data1);
return ret;
}
@@ -231,8 +253,13 @@ int tpm_crb_ffa_get_interface_version(u16 *major, u16 *minor)
rc = __tpm_crb_ffa_send_recieve(CRB_FFA_GET_INTERFACE_VERSION, 0x00, 0x00, 0x00);
if (!rc) {
- *major = CRB_FFA_MAJOR_VERSION(tpm_crb_ffa->direct_msg_data.data2);
- *minor = CRB_FFA_MINOR_VERSION(tpm_crb_ffa->direct_msg_data.data2);
+ if (ffa_partition_supports_direct_req2_recv(tpm_crb_ffa->ffa_dev)) {
+ *major = CRB_FFA_MAJOR_VERSION(tpm_crb_ffa->direct_msg_data2.data[1]);
+ *minor = CRB_FFA_MINOR_VERSION(tpm_crb_ffa->direct_msg_data2.data[1]);
+ } else {
+ *major = CRB_FFA_MAJOR_VERSION(tpm_crb_ffa->direct_msg_data.data2);
+ *minor = CRB_FFA_MINOR_VERSION(tpm_crb_ffa->direct_msg_data.data2);
+ }
}
return rc;
@@ -277,8 +304,9 @@ static int tpm_crb_ffa_probe(struct ffa_device *ffa_dev)
tpm_crb_ffa = ERR_PTR(-ENODEV); // set tpm_crb_ffa so we can detect probe failure
- if (!ffa_partition_supports_direct_recv(ffa_dev)) {
- pr_err("TPM partition doesn't support direct message receive.\n");
+ if (!ffa_partition_supports_direct_recv(ffa_dev) &&
+ !ffa_partition_supports_direct_req2_recv(ffa_dev)) {
+ dev_warn(&ffa_dev->dev, "partition doesn't support direct message receive.\n");
return -EINVAL;
}
@@ -299,17 +327,17 @@ static int tpm_crb_ffa_probe(struct ffa_device *ffa_dev)
rc = tpm_crb_ffa_get_interface_version(&tpm_crb_ffa->major_version,
&tpm_crb_ffa->minor_version);
if (rc) {
- pr_err("failed to get crb interface version. rc:%d", rc);
+ dev_err(&ffa_dev->dev, "failed to get crb interface version. rc:%d\n", rc);
goto out;
}
- pr_info("ABI version %u.%u", tpm_crb_ffa->major_version,
+ dev_info(&ffa_dev->dev, "ABI version %u.%u\n", tpm_crb_ffa->major_version,
tpm_crb_ffa->minor_version);
if (tpm_crb_ffa->major_version != CRB_FFA_VERSION_MAJOR ||
(tpm_crb_ffa->minor_version > 0 &&
tpm_crb_ffa->minor_version < CRB_FFA_VERSION_MINOR)) {
- pr_err("Incompatible ABI version");
+ dev_warn(&ffa_dev->dev, "Incompatible ABI version\n");
goto out;
}
diff --git a/drivers/char/tpm/tpm_svsm.c b/drivers/char/tpm/tpm_svsm.c
new file mode 100644
index 000000000000..4280edf427d6
--- /dev/null
+++ b/drivers/char/tpm/tpm_svsm.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ *
+ * Driver for the vTPM defined by the AMD SVSM spec [1].
+ *
+ * The specification defines a protocol that a SEV-SNP guest OS can use to
+ * discover and talk to a vTPM emulated by the Secure VM Service Module (SVSM)
+ * in the guest context, but at a more privileged level (usually VMPL0).
+ *
+ * [1] "Secure VM Service Module for SEV-SNP Guests"
+ * Publication # 58019 Revision: 1.00
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/tpm_svsm.h>
+
+#include <asm/sev.h>
+
+#include "tpm.h"
+
+struct tpm_svsm_priv {
+ void *buffer;
+};
+
+static int tpm_svsm_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+ struct tpm_svsm_priv *priv = dev_get_drvdata(&chip->dev);
+ int ret;
+
+ ret = svsm_vtpm_cmd_request_fill(priv->buffer, 0, buf, len);
+ if (ret)
+ return ret;
+
+ /*
+ * The SVSM call uses the same buffer for the command and for the
+ * response, so after this call, the buffer will contain the response
+ * that can be used by .recv() op.
+ */
+ return snp_svsm_vtpm_send_command(priv->buffer);
+}
+
+static int tpm_svsm_recv(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+ struct tpm_svsm_priv *priv = dev_get_drvdata(&chip->dev);
+
+ /*
+ * The internal buffer contains the response after we send the command
+ * to SVSM.
+ */
+ return svsm_vtpm_cmd_response_parse(priv->buffer, buf, len);
+}
+
+static struct tpm_class_ops tpm_chip_ops = {
+ .flags = TPM_OPS_AUTO_STARTUP,
+ .recv = tpm_svsm_recv,
+ .send = tpm_svsm_send,
+};
+
+static int __init tpm_svsm_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct tpm_svsm_priv *priv;
+ struct tpm_chip *chip;
+ int err;
+
+ priv = devm_kmalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ /*
+ * The maximum buffer supported is one page (see SVSM_VTPM_MAX_BUFFER
+ * in tpm_svsm.h).
+ */
+ priv->buffer = (void *)devm_get_free_pages(dev, GFP_KERNEL, 0);
+ if (!priv->buffer)
+ return -ENOMEM;
+
+ chip = tpmm_chip_alloc(dev, &tpm_chip_ops);
+ if (IS_ERR(chip))
+ return PTR_ERR(chip);
+
+ dev_set_drvdata(&chip->dev, priv);
+
+ err = tpm2_probe(chip);
+ if (err)
+ return err;
+
+ err = tpm_chip_register(chip);
+ if (err)
+ return err;
+
+ dev_info(dev, "SNP SVSM vTPM %s device\n",
+ (chip->flags & TPM_CHIP_FLAG_TPM2) ? "2.0" : "1.2");
+
+ return 0;
+}
+
+static void __exit tpm_svsm_remove(struct platform_device *pdev)
+{
+ struct tpm_chip *chip = platform_get_drvdata(pdev);
+
+ tpm_chip_unregister(chip);
+}
+
+/*
+ * tpm_svsm_remove() lives in .exit.text. For drivers registered via
+ * module_platform_driver_probe() this is ok because they cannot get unbound
+ * at runtime. So mark the driver struct with __refdata to prevent modpost
+ * triggering a section mismatch warning.
+ */
+static struct platform_driver tpm_svsm_driver __refdata = {
+ .remove = __exit_p(tpm_svsm_remove),
+ .driver = {
+ .name = "tpm-svsm",
+ },
+};
+
+module_platform_driver_probe(tpm_svsm_driver, tpm_svsm_probe);
+
+MODULE_DESCRIPTION("SNP SVSM vTPM Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:tpm-svsm");
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index 970d02c337c7..6c3aa480396b 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -54,7 +54,7 @@ enum tis_int_flags {
enum tis_defaults {
TIS_MEM_LEN = 0x5000,
TIS_SHORT_TIMEOUT = 750, /* ms */
- TIS_LONG_TIMEOUT = 2000, /* 2 sec */
+ TIS_LONG_TIMEOUT = 4000, /* 4 secs */
TIS_TIMEOUT_MIN_ATML = 14700, /* usecs */
TIS_TIMEOUT_MAX_ATML = 15000, /* usecs */
};
diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c
index 014db6386624..8ddf3a9a53df 100644
--- a/drivers/clk/clk-s2mps11.c
+++ b/drivers/clk/clk-s2mps11.c
@@ -137,6 +137,8 @@ static int s2mps11_clk_probe(struct platform_device *pdev)
if (!clk_data)
return -ENOMEM;
+ clk_data->num = S2MPS11_CLKS_NUM;
+
switch (hwid) {
case S2MPS11X:
s2mps11_reg = S2MPS11_REG_RTC_CTRL;
@@ -186,7 +188,6 @@ static int s2mps11_clk_probe(struct platform_device *pdev)
clk_data->hws[i] = &s2mps11_clks[i].hw;
}
- clk_data->num = S2MPS11_CLKS_NUM;
of_clk_add_hw_provider(s2mps11_clks->clk_np, of_clk_hw_onecell_get,
clk_data);
diff --git a/drivers/clk/rockchip/clk-rk3576.c b/drivers/clk/rockchip/clk-rk3576.c
index 595e010341f7..be703f250197 100644
--- a/drivers/clk/rockchip/clk-rk3576.c
+++ b/drivers/clk/rockchip/clk-rk3576.c
@@ -541,6 +541,8 @@ static struct rockchip_clk_branch rk3576_clk_branches[] __initdata = {
RK3576_CLKGATE_CON(5), 14, GFLAGS),
GATE(CLK_OTPC_AUTO_RD_G, "clk_otpc_auto_rd_g", "xin24m", 0,
RK3576_CLKGATE_CON(5), 15, GFLAGS),
+ GATE(CLK_OTP_PHY_G, "clk_otp_phy_g", "xin24m", 0,
+ RK3576_CLKGATE_CON(6), 0, GFLAGS),
COMPOSITE(CLK_MIPI_CAMERAOUT_M0, "clk_mipi_cameraout_m0", mux_24m_spll_gpll_cpll_p, 0,
RK3576_CLKSEL_CON(38), 8, 2, MFLAGS, 0, 8, DFLAGS,
RK3576_CLKGATE_CON(6), 3, GFLAGS),
diff --git a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c
index bb66c906ebbb..e83d4fd40240 100644
--- a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c
+++ b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c
@@ -412,19 +412,23 @@ static const struct clk_parent_data mmc0_mmc1_parents[] = {
{ .hw = &pll_periph0_2x_clk.common.hw },
{ .hw = &pll_audio1_div2_clk.common.hw },
};
-static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc0_clk, "mmc0", mmc0_mmc1_parents, 0x830,
- 0, 4, /* M */
- 8, 2, /* P */
- 24, 3, /* mux */
- BIT(31), /* gate */
- 0);
-
-static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc1_clk, "mmc1", mmc0_mmc1_parents, 0x834,
- 0, 4, /* M */
- 8, 2, /* P */
- 24, 3, /* mux */
- BIT(31), /* gate */
- 0);
+static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc0_clk, "mmc0",
+ mmc0_mmc1_parents, 0x830,
+ 0, 4, /* M */
+ 8, 2, /* P */
+ 24, 3, /* mux */
+ BIT(31), /* gate */
+ 2, /* post-div */
+ 0);
+
+static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc1_clk, "mmc1",
+ mmc0_mmc1_parents, 0x834,
+ 0, 4, /* M */
+ 8, 2, /* P */
+ 24, 3, /* mux */
+ BIT(31), /* gate */
+ 2, /* post-div */
+ 0);
static const struct clk_parent_data mmc2_parents[] = {
{ .fw_name = "hosc" },
@@ -433,12 +437,14 @@ static const struct clk_parent_data mmc2_parents[] = {
{ .hw = &pll_periph0_800M_clk.common.hw },
{ .hw = &pll_audio1_div2_clk.common.hw },
};
-static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc2_clk, "mmc2", mmc2_parents, 0x838,
- 0, 4, /* M */
- 8, 2, /* P */
- 24, 3, /* mux */
- BIT(31), /* gate */
- 0);
+static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc2_clk, "mmc2", mmc2_parents,
+ 0x838,
+ 0, 4, /* M */
+ 8, 2, /* P */
+ 24, 3, /* mux */
+ BIT(31), /* gate */
+ 2, /* post-div */
+ 0);
static SUNXI_CCU_GATE_HWS(bus_mmc0_clk, "bus-mmc0", psi_ahb_hws,
0x84c, BIT(0), 0);
diff --git a/drivers/clk/sunxi-ng/ccu_mp.h b/drivers/clk/sunxi-ng/ccu_mp.h
index b35aeec70484..bb09c649bfa3 100644
--- a/drivers/clk/sunxi-ng/ccu_mp.h
+++ b/drivers/clk/sunxi-ng/ccu_mp.h
@@ -52,6 +52,28 @@ struct ccu_mp {
} \
}
+#define SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(_struct, _name, _parents, \
+ _reg, \
+ _mshift, _mwidth, \
+ _pshift, _pwidth, \
+ _muxshift, _muxwidth, \
+ _gate, _postdiv, _flags)\
+ struct ccu_mp _struct = { \
+ .enable = _gate, \
+ .m = _SUNXI_CCU_DIV(_mshift, _mwidth), \
+ .p = _SUNXI_CCU_DIV(_pshift, _pwidth), \
+ .mux = _SUNXI_CCU_MUX(_muxshift, _muxwidth), \
+ .fixed_post_div = _postdiv, \
+ .common = { \
+ .reg = _reg, \
+ .features = CCU_FEATURE_FIXED_POSTDIV, \
+ .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, \
+ _parents, \
+ &ccu_mp_ops, \
+ _flags), \
+ } \
+ }
+
#define SUNXI_CCU_MP_WITH_MUX_GATE(_struct, _name, _parents, _reg, \
_mshift, _mwidth, \
_pshift, _pwidth, \
@@ -109,8 +131,7 @@ struct ccu_mp {
_mshift, _mwidth, \
_pshift, _pwidth, \
_muxshift, _muxwidth, \
- _gate, _features, \
- _flags) \
+ _gate, _flags, _features) \
struct ccu_mp _struct = { \
.enable = _gate, \
.m = _SUNXI_CCU_DIV(_mshift, _mwidth), \
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 487c85259967..645f517a1ac2 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -73,6 +73,14 @@ config DW_APB_TIMER_OF
select DW_APB_TIMER
select TIMER_OF
+config ECONET_EN751221_TIMER
+ bool "EcoNet EN751221 High Precision Timer" if COMPILE_TEST
+ depends on HAS_IOMEM
+ select CLKSRC_MMIO
+ select TIMER_OF
+ help
+ Support for CPU timer found on EcoNet MIPS based SoCs.
+
config FTTMR010_TIMER
bool "Faraday Technology timer driver" if COMPILE_TEST
depends on HAS_IOMEM
@@ -437,8 +445,8 @@ config ATMEL_ST
config ATMEL_TCB_CLKSRC
bool "Atmel TC Block timer driver" if COMPILE_TEST
- depends on ARM && HAS_IOMEM
- select TIMER_OF if OF
+ depends on ARM && OF && HAS_IOMEM
+ select TIMER_OF
help
Support for Timer Counter Blocks on Atmel SoCs.
@@ -763,4 +771,12 @@ config RALINK_TIMER
Enables support for system tick counter present on
Ralink SoCs RT3352 and MT7620.
+config NXP_STM_TIMER
+ bool "NXP System Timer Module driver"
+ depends on ARCH_S32 || COMPILE_TEST
+ select CLKSRC_MMIO
+ help
+ Enables the support for NXP System Timer Module found in the
+ s32g NXP platform series.
+
endmenu
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 43ef16a4efa6..205bf3b0a8f3 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_CLKBLD_I8253) += i8253.o
obj-$(CONFIG_CLKSRC_MMIO) += mmio.o
obj-$(CONFIG_DAVINCI_TIMER) += timer-davinci.o
obj-$(CONFIG_DIGICOLOR_TIMER) += timer-digicolor.o
+obj-$(CONFIG_ECONET_EN751221_TIMER) += timer-econet-en751221.o
obj-$(CONFIG_OMAP_DM_TIMER) += timer-ti-dm.o
obj-$(CONFIG_OMAP_DM_SYSTIMER) += timer-ti-dm-systimer.o
obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o
@@ -92,3 +93,4 @@ obj-$(CONFIG_GXP_TIMER) += timer-gxp.o
obj-$(CONFIG_CLKSRC_LOONGSON1_PWM) += timer-loongson1-pwm.o
obj-$(CONFIG_EP93XX_TIMER) += timer-ep93xx.o
obj-$(CONFIG_RALINK_TIMER) += timer-ralink.o
+obj-$(CONFIG_NXP_STM_TIMER) += timer-nxp-stm.o
diff --git a/drivers/clocksource/renesas-ostm.c b/drivers/clocksource/renesas-ostm.c
index 3fcbd02b2483..2089aeaae225 100644
--- a/drivers/clocksource/renesas-ostm.c
+++ b/drivers/clocksource/renesas-ostm.c
@@ -225,7 +225,6 @@ err_free:
TIMER_OF_DECLARE(ostm, "renesas,ostm", ostm_init);
-#if defined(CONFIG_ARCH_RZG2L) || defined(CONFIG_ARCH_R9A09G057)
static int __init ostm_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -233,7 +232,7 @@ static int __init ostm_probe(struct platform_device *pdev)
return ostm_init(dev->of_node);
}
-static const struct of_device_id ostm_of_table[] = {
+static const struct of_device_id __maybe_unused ostm_of_table[] = {
{ .compatible = "renesas,ostm", },
{ /* sentinel */ }
};
@@ -246,4 +245,3 @@ static struct platform_driver ostm_device_driver = {
},
};
builtin_platform_driver_probe(ostm_device_driver, ostm_probe);
-#endif
diff --git a/drivers/clocksource/timer-econet-en751221.c b/drivers/clocksource/timer-econet-en751221.c
new file mode 100644
index 000000000000..3b449fdaafee
--- /dev/null
+++ b/drivers/clocksource/timer-econet-en751221.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Timer present on EcoNet EN75xx MIPS based SoCs.
+ *
+ * Copyright (C) 2025 by Caleb James DeLisle <cjd@cjdns.fr>
+ */
+
+#include <linux/io.h>
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/clockchips.h>
+#include <linux/sched_clock.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/cpuhotplug.h>
+#include <linux/clk.h>
+
+#define ECONET_BITS 32
+#define ECONET_MIN_DELTA 0x00001000
+#define ECONET_MAX_DELTA GENMASK(ECONET_BITS - 2, 0)
+/* 34Kc hardware has 1 block and 1004Kc has 2. */
+#define ECONET_NUM_BLOCKS DIV_ROUND_UP(NR_CPUS, 2)
+
+static struct {
+ void __iomem *membase[ECONET_NUM_BLOCKS];
+ u32 freq_hz;
+} econet_timer __ro_after_init;
+
+static DEFINE_PER_CPU(struct clock_event_device, econet_timer_pcpu);
+
+/* Each memory block has 2 timers, the order of registers is:
+ * CTL, CMR0, CNT0, CMR1, CNT1
+ */
+static inline void __iomem *reg_ctl(u32 timer_n)
+{
+ return econet_timer.membase[timer_n >> 1];
+}
+
+static inline void __iomem *reg_compare(u32 timer_n)
+{
+ return econet_timer.membase[timer_n >> 1] + (timer_n & 1) * 0x08 + 0x04;
+}
+
+static inline void __iomem *reg_count(u32 timer_n)
+{
+ return econet_timer.membase[timer_n >> 1] + (timer_n & 1) * 0x08 + 0x08;
+}
+
+static inline u32 ctl_bit_enabled(u32 timer_n)
+{
+ return 1U << (timer_n & 1);
+}
+
+static inline u32 ctl_bit_pending(u32 timer_n)
+{
+ return 1U << ((timer_n & 1) + 16);
+}
+
+static bool cevt_is_pending(int cpu_id)
+{
+ return ioread32(reg_ctl(cpu_id)) & ctl_bit_pending(cpu_id);
+}
+
+static irqreturn_t cevt_interrupt(int irq, void *dev_id)
+{
+ struct clock_event_device *dev = this_cpu_ptr(&econet_timer_pcpu);
+ int cpu = cpumask_first(dev->cpumask);
+
+ /* Each VPE has its own events,
+ * so this will only happen on spurious interrupt.
+ */
+ if (!cevt_is_pending(cpu))
+ return IRQ_NONE;
+
+ iowrite32(ioread32(reg_count(cpu)), reg_compare(cpu));
+ dev->event_handler(dev);
+ return IRQ_HANDLED;
+}
+
+static int cevt_set_next_event(ulong delta, struct clock_event_device *dev)
+{
+ u32 next;
+ int cpu;
+
+ cpu = cpumask_first(dev->cpumask);
+ next = ioread32(reg_count(cpu)) + delta;
+ iowrite32(next, reg_compare(cpu));
+
+ if ((s32)(next - ioread32(reg_count(cpu))) < ECONET_MIN_DELTA / 2)
+ return -ETIME;
+
+ return 0;
+}
+
+static int cevt_init_cpu(uint cpu)
+{
+ struct clock_event_device *cd = &per_cpu(econet_timer_pcpu, cpu);
+ u32 reg;
+
+ pr_debug("%s: Setting up clockevent for CPU %d\n", cd->name, cpu);
+
+ reg = ioread32(reg_ctl(cpu)) | ctl_bit_enabled(cpu);
+ iowrite32(reg, reg_ctl(cpu));
+
+ enable_percpu_irq(cd->irq, IRQ_TYPE_NONE);
+
+ /* Do this last because it synchronously configures the timer */
+ clockevents_config_and_register(cd, econet_timer.freq_hz,
+ ECONET_MIN_DELTA, ECONET_MAX_DELTA);
+
+ return 0;
+}
+
+static u64 notrace sched_clock_read(void)
+{
+ /* Always read from clock zero no matter the CPU */
+ return (u64)ioread32(reg_count(0));
+}
+
+/* Init */
+
+static void __init cevt_dev_init(uint cpu)
+{
+ iowrite32(0, reg_count(cpu));
+ iowrite32(U32_MAX, reg_compare(cpu));
+}
+
+static int __init cevt_init(struct device_node *np)
+{
+ int i, irq, ret;
+
+ irq = irq_of_parse_and_map(np, 0);
+ if (irq <= 0) {
+ pr_err("%pOFn: irq_of_parse_and_map failed", np);
+ return -EINVAL;
+ }
+
+ ret = request_percpu_irq(irq, cevt_interrupt, np->name, &econet_timer_pcpu);
+
+ if (ret < 0) {
+ pr_err("%pOFn: IRQ %d setup failed (%d)\n", np, irq, ret);
+ goto err_unmap_irq;
+ }
+
+ for_each_possible_cpu(i) {
+ struct clock_event_device *cd = &per_cpu(econet_timer_pcpu, i);
+
+ cd->rating = 310,
+ cd->features = CLOCK_EVT_FEAT_ONESHOT |
+ CLOCK_EVT_FEAT_C3STOP |
+ CLOCK_EVT_FEAT_PERCPU;
+ cd->set_next_event = cevt_set_next_event;
+ cd->irq = irq;
+ cd->cpumask = cpumask_of(i);
+ cd->name = np->name;
+
+ cevt_dev_init(i);
+ }
+
+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "clockevents/econet/timer:starting",
+ cevt_init_cpu, NULL);
+ return 0;
+
+err_unmap_irq:
+ irq_dispose_mapping(irq);
+ return ret;
+}
+
+static int __init timer_init(struct device_node *np)
+{
+ int num_blocks = DIV_ROUND_UP(num_possible_cpus(), 2);
+ struct clk *clk;
+ int ret;
+
+ clk = of_clk_get(np, 0);
+ if (IS_ERR(clk)) {
+ pr_err("%pOFn: Failed to get CPU clock from DT %ld\n", np, PTR_ERR(clk));
+ return PTR_ERR(clk);
+ }
+
+ econet_timer.freq_hz = clk_get_rate(clk);
+
+ for (int i = 0; i < num_blocks; i++) {
+ econet_timer.membase[i] = of_iomap(np, i);
+ if (!econet_timer.membase[i]) {
+ pr_err("%pOFn: failed to map register [%d]\n", np, i);
+ return -ENXIO;
+ }
+ }
+
+ /* For clocksource purposes always read clock zero, whatever the CPU */
+ ret = clocksource_mmio_init(reg_count(0), np->name,
+ econet_timer.freq_hz, 301, ECONET_BITS,
+ clocksource_mmio_readl_up);
+ if (ret) {
+ pr_err("%pOFn: clocksource_mmio_init failed: %d", np, ret);
+ return ret;
+ }
+
+ ret = cevt_init(np);
+ if (ret < 0)
+ return ret;
+
+ sched_clock_register(sched_clock_read, ECONET_BITS,
+ econet_timer.freq_hz);
+
+ pr_info("%pOFn: using %u.%03u MHz high precision timer\n", np,
+ econet_timer.freq_hz / 1000000,
+ (econet_timer.freq_hz / 1000) % 1000);
+
+ return 0;
+}
+
+TIMER_OF_DECLARE(econet_timer_hpt, "econet,en751221-timer", timer_init);
diff --git a/drivers/clocksource/timer-nxp-stm.c b/drivers/clocksource/timer-nxp-stm.c
new file mode 100644
index 000000000000..d7ccf9001729
--- /dev/null
+++ b/drivers/clocksource/timer-nxp-stm.c
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ * Copyright 2018,2021-2025 NXP
+ *
+ * NXP System Timer Module:
+ *
+ * STM supports commonly required system and application software
+ * timing functions. STM includes a 32-bit count-up timer and four
+ * 32-bit compare channels with a separate interrupt source for each
+ * channel. The timer is driven by the STM module clock divided by an
+ * 8-bit prescale value (1 to 256). It has ability to stop the timer
+ * in Debug mode
+ */
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/sched_clock.h>
+#include <linux/units.h>
+
+#define STM_CR(__base) (__base)
+
+#define STM_CR_TEN BIT(0)
+#define STM_CR_FRZ BIT(1)
+#define STM_CR_CPS_OFFSET 8u
+#define STM_CR_CPS_MASK GENMASK(15, STM_CR_CPS_OFFSET)
+
+#define STM_CNT(__base) ((__base) + 0x04)
+
+#define STM_CCR0(__base) ((__base) + 0x10)
+#define STM_CCR1(__base) ((__base) + 0x20)
+#define STM_CCR2(__base) ((__base) + 0x30)
+#define STM_CCR3(__base) ((__base) + 0x40)
+
+#define STM_CCR_CEN BIT(0)
+
+#define STM_CIR0(__base) ((__base) + 0x14)
+#define STM_CIR1(__base) ((__base) + 0x24)
+#define STM_CIR2(__base) ((__base) + 0x34)
+#define STM_CIR3(__base) ((__base) + 0x44)
+
+#define STM_CIR_CIF BIT(0)
+
+#define STM_CMP0(__base) ((__base) + 0x18)
+#define STM_CMP1(__base) ((__base) + 0x28)
+#define STM_CMP2(__base) ((__base) + 0x38)
+#define STM_CMP3(__base) ((__base) + 0x48)
+
+#define STM_ENABLE_MASK (STM_CR_FRZ | STM_CR_TEN)
+
+struct stm_timer {
+ void __iomem *base;
+ unsigned long rate;
+ unsigned long delta;
+ unsigned long counter;
+ struct clock_event_device ced;
+ struct clocksource cs;
+ atomic_t refcnt;
+};
+
+static DEFINE_PER_CPU(struct stm_timer *, stm_timers);
+
+static struct stm_timer *stm_sched_clock;
+
+/*
+ * Global structure for multiple STMs initialization
+ */
+static int stm_instances;
+
+/*
+ * This global lock is used to prevent race conditions with the
+ * stm_instances in case the driver is using the ASYNC option
+ */
+static DEFINE_MUTEX(stm_instances_lock);
+
+DEFINE_GUARD(stm_instances, struct mutex *, mutex_lock(_T), mutex_unlock(_T))
+
+static struct stm_timer *cs_to_stm(struct clocksource *cs)
+{
+ return container_of(cs, struct stm_timer, cs);
+}
+
+static struct stm_timer *ced_to_stm(struct clock_event_device *ced)
+{
+ return container_of(ced, struct stm_timer, ced);
+}
+
+static u64 notrace nxp_stm_read_sched_clock(void)
+{
+ return readl(STM_CNT(stm_sched_clock->base));
+}
+
+static u32 nxp_stm_clocksource_getcnt(struct stm_timer *stm_timer)
+{
+ return readl(STM_CNT(stm_timer->base));
+}
+
+static void nxp_stm_clocksource_setcnt(struct stm_timer *stm_timer, u32 cnt)
+{
+ writel(cnt, STM_CNT(stm_timer->base));
+}
+
+static u64 nxp_stm_clocksource_read(struct clocksource *cs)
+{
+ struct stm_timer *stm_timer = cs_to_stm(cs);
+
+ return (u64)nxp_stm_clocksource_getcnt(stm_timer);
+}
+
+static void nxp_stm_module_enable(struct stm_timer *stm_timer)
+{
+ u32 reg;
+
+ reg = readl(STM_CR(stm_timer->base));
+
+ reg |= STM_ENABLE_MASK;
+
+ writel(reg, STM_CR(stm_timer->base));
+}
+
+static void nxp_stm_module_disable(struct stm_timer *stm_timer)
+{
+ u32 reg;
+
+ reg = readl(STM_CR(stm_timer->base));
+
+ reg &= ~STM_ENABLE_MASK;
+
+ writel(reg, STM_CR(stm_timer->base));
+}
+
+static void nxp_stm_module_put(struct stm_timer *stm_timer)
+{
+ if (atomic_dec_and_test(&stm_timer->refcnt))
+ nxp_stm_module_disable(stm_timer);
+}
+
+static void nxp_stm_module_get(struct stm_timer *stm_timer)
+{
+ if (atomic_inc_return(&stm_timer->refcnt) == 1)
+ nxp_stm_module_enable(stm_timer);
+}
+
+static int nxp_stm_clocksource_enable(struct clocksource *cs)
+{
+ struct stm_timer *stm_timer = cs_to_stm(cs);
+
+ nxp_stm_module_get(stm_timer);
+
+ return 0;
+}
+
+static void nxp_stm_clocksource_disable(struct clocksource *cs)
+{
+ struct stm_timer *stm_timer = cs_to_stm(cs);
+
+ nxp_stm_module_put(stm_timer);
+}
+
+static void nxp_stm_clocksource_suspend(struct clocksource *cs)
+{
+ struct stm_timer *stm_timer = cs_to_stm(cs);
+
+ nxp_stm_clocksource_disable(cs);
+ stm_timer->counter = nxp_stm_clocksource_getcnt(stm_timer);
+}
+
+static void nxp_stm_clocksource_resume(struct clocksource *cs)
+{
+ struct stm_timer *stm_timer = cs_to_stm(cs);
+
+ nxp_stm_clocksource_setcnt(stm_timer, stm_timer->counter);
+ nxp_stm_clocksource_enable(cs);
+}
+
+static void __init devm_clocksource_unregister(void *data)
+{
+ struct stm_timer *stm_timer = data;
+
+ clocksource_unregister(&stm_timer->cs);
+}
+
+static int __init nxp_stm_clocksource_init(struct device *dev, struct stm_timer *stm_timer,
+ const char *name, void __iomem *base, struct clk *clk)
+{
+ int ret;
+
+ stm_timer->base = base;
+ stm_timer->rate = clk_get_rate(clk);
+
+ stm_timer->cs.name = name;
+ stm_timer->cs.rating = 460;
+ stm_timer->cs.read = nxp_stm_clocksource_read;
+ stm_timer->cs.enable = nxp_stm_clocksource_enable;
+ stm_timer->cs.disable = nxp_stm_clocksource_disable;
+ stm_timer->cs.suspend = nxp_stm_clocksource_suspend;
+ stm_timer->cs.resume = nxp_stm_clocksource_resume;
+ stm_timer->cs.mask = CLOCKSOURCE_MASK(32);
+ stm_timer->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+ ret = clocksource_register_hz(&stm_timer->cs, stm_timer->rate);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(dev, devm_clocksource_unregister, stm_timer);
+ if (ret) {
+ clocksource_unregister(&stm_timer->cs);
+ return ret;
+ }
+
+ stm_sched_clock = stm_timer;
+
+ sched_clock_register(nxp_stm_read_sched_clock, 32, stm_timer->rate);
+
+ dev_dbg(dev, "Registered clocksource %s\n", name);
+
+ return 0;
+}
+
+static int nxp_stm_clockevent_read_counter(struct stm_timer *stm_timer)
+{
+ return readl(STM_CNT(stm_timer->base));
+}
+
+static void nxp_stm_clockevent_disable(struct stm_timer *stm_timer)
+{
+ writel(0, STM_CCR0(stm_timer->base));
+}
+
+static void nxp_stm_clockevent_enable(struct stm_timer *stm_timer)
+{
+ writel(STM_CCR_CEN, STM_CCR0(stm_timer->base));
+}
+
+static int nxp_stm_clockevent_shutdown(struct clock_event_device *ced)
+{
+ struct stm_timer *stm_timer = ced_to_stm(ced);
+
+ nxp_stm_clockevent_disable(stm_timer);
+
+ return 0;
+}
+
+static int nxp_stm_clockevent_set_next_event(unsigned long delta, struct clock_event_device *ced)
+{
+ struct stm_timer *stm_timer = ced_to_stm(ced);
+ u32 val;
+
+ nxp_stm_clockevent_disable(stm_timer);
+
+ stm_timer->delta = delta;
+
+ val = nxp_stm_clockevent_read_counter(stm_timer) + delta;
+
+ writel(val, STM_CMP0(stm_timer->base));
+
+ /*
+ * The counter is shared across the channels and can not be
+ * stopped while we are setting the next event. If the delta
+ * is very small it is possible the counter increases above
+ * the computed 'val'. The min_delta value specified when
+ * registering the clockevent will prevent that. The second
+ * case is if the counter wraps while we compute the 'val' and
+ * before writing the comparator register. We read the counter,
+ * check if we are back in time and abort the timer with -ETIME.
+ */
+ if (val > nxp_stm_clockevent_read_counter(stm_timer) + delta)
+ return -ETIME;
+
+ nxp_stm_clockevent_enable(stm_timer);
+
+ return 0;
+}
+
+static int nxp_stm_clockevent_set_periodic(struct clock_event_device *ced)
+{
+ struct stm_timer *stm_timer = ced_to_stm(ced);
+
+ return nxp_stm_clockevent_set_next_event(stm_timer->rate, ced);
+}
+
+static void nxp_stm_clockevent_suspend(struct clock_event_device *ced)
+{
+ struct stm_timer *stm_timer = ced_to_stm(ced);
+
+ nxp_stm_module_put(stm_timer);
+}
+
+static void nxp_stm_clockevent_resume(struct clock_event_device *ced)
+{
+ struct stm_timer *stm_timer = ced_to_stm(ced);
+
+ nxp_stm_module_get(stm_timer);
+}
+
+static int __init nxp_stm_clockevent_per_cpu_init(struct device *dev, struct stm_timer *stm_timer,
+ const char *name, void __iomem *base, int irq,
+ struct clk *clk, int cpu)
+{
+ stm_timer->base = base;
+ stm_timer->rate = clk_get_rate(clk);
+
+ stm_timer->ced.name = name;
+ stm_timer->ced.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+ stm_timer->ced.set_state_shutdown = nxp_stm_clockevent_shutdown;
+ stm_timer->ced.set_state_periodic = nxp_stm_clockevent_set_periodic;
+ stm_timer->ced.set_next_event = nxp_stm_clockevent_set_next_event;
+ stm_timer->ced.suspend = nxp_stm_clockevent_suspend;
+ stm_timer->ced.resume = nxp_stm_clockevent_resume;
+ stm_timer->ced.cpumask = cpumask_of(cpu);
+ stm_timer->ced.rating = 460;
+ stm_timer->ced.irq = irq;
+
+ per_cpu(stm_timers, cpu) = stm_timer;
+
+ nxp_stm_module_get(stm_timer);
+
+ dev_dbg(dev, "Initialized per cpu clockevent name=%s, irq=%d, cpu=%d\n", name, irq, cpu);
+
+ return 0;
+}
+
+static int nxp_stm_clockevent_starting_cpu(unsigned int cpu)
+{
+ struct stm_timer *stm_timer = per_cpu(stm_timers, cpu);
+ int ret;
+
+ if (WARN_ON(!stm_timer))
+ return -EFAULT;
+
+ ret = irq_force_affinity(stm_timer->ced.irq, cpumask_of(cpu));
+ if (ret)
+ return ret;
+
+ /*
+ * The timings measurement show reading the counter register
+ * and writing to the comparator register takes as a maximum
+ * value 1100 ns at 133MHz rate frequency. The timer must be
+ * set above this value and to be secure we set the minimum
+ * value equal to 2000ns, so 2us.
+ *
+ * minimum ticks = (rate / MICRO) * 2
+ */
+ clockevents_config_and_register(&stm_timer->ced, stm_timer->rate,
+ (stm_timer->rate / MICRO) * 2, ULONG_MAX);
+
+ return 0;
+}
+
+static irqreturn_t nxp_stm_module_interrupt(int irq, void *dev_id)
+{
+ struct stm_timer *stm_timer = dev_id;
+ struct clock_event_device *ced = &stm_timer->ced;
+ u32 val;
+
+ /*
+ * The interrupt is shared across the channels in the
+ * module. But this one is configured to run only one channel,
+ * consequently it is pointless to test the interrupt flags
+ * before and we can directly reset the channel 0 irq flag
+ * register.
+ */
+ writel(STM_CIR_CIF, STM_CIR0(stm_timer->base));
+
+ /*
+ * Update STM_CMP value using the counter value
+ */
+ val = nxp_stm_clockevent_read_counter(stm_timer) + stm_timer->delta;
+
+ writel(val, STM_CMP0(stm_timer->base));
+
+ /*
+ * stm hardware doesn't support oneshot, it will generate an
+ * interrupt and start the counter again so software needs to
+ * disable the timer to stop the counter loop in ONESHOT mode.
+ */
+ if (likely(clockevent_state_oneshot(ced)))
+ nxp_stm_clockevent_disable(stm_timer);
+
+ ced->event_handler(ced);
+
+ return IRQ_HANDLED;
+}
+
+static int __init nxp_stm_timer_probe(struct platform_device *pdev)
+{
+ struct stm_timer *stm_timer;
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ const char *name = of_node_full_name(np);
+ struct clk *clk;
+ void __iomem *base;
+ int irq, ret;
+
+ /*
+ * The device tree can have multiple STM nodes described, so
+ * it makes this driver a good candidate for the async probe.
+ * It is still unclear if the time framework correctly handles
+ * parallel loading of the timers but at least this driver is
+ * ready to support the option.
+ */
+ guard(stm_instances)(&stm_instances_lock);
+
+ /*
+ * The S32Gx are SoCs featuring a diverse set of cores. Linux
+ * is expected to run on Cortex-A53 cores, while other
+ * software stacks will operate on Cortex-M cores. The number
+ * of STM instances has been sized to include at most one
+ * instance per core.
+ *
+ * As we need a clocksource and a clockevent per cpu, we
+ * simply initialize a clocksource per cpu along with the
+ * clockevent which makes the resulting code simpler.
+ *
+ * However if the device tree is describing more STM instances
+ * than the number of cores, then we ignore them.
+ */
+ if (stm_instances >= num_possible_cpus())
+ return 0;
+
+ base = devm_of_iomap(dev, np, 0, NULL);
+ if (IS_ERR(base))
+ return dev_err_probe(dev, PTR_ERR(base), "Failed to iomap %pOFn\n", np);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return dev_err_probe(dev, irq, "Failed to get IRQ\n");
+
+ clk = devm_clk_get_enabled(dev, NULL);
+ if (IS_ERR(clk))
+ return dev_err_probe(dev, PTR_ERR(clk), "Clock not found\n");
+
+ stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL);
+ if (!stm_timer)
+ return -ENOMEM;
+
+ ret = devm_request_irq(dev, irq, nxp_stm_module_interrupt,
+ IRQF_TIMER | IRQF_NOBALANCING, name, stm_timer);
+ if (ret)
+ return dev_err_probe(dev, ret, "Unable to allocate interrupt line\n");
+
+ ret = nxp_stm_clocksource_init(dev, stm_timer, name, base, clk);
+ if (ret)
+ return ret;
+
+ /*
+ * Next probed STM will be a per CPU clockevent, until we
+ * probe as many as we have CPUs available on the system, we
+ * do a partial initialization
+ */
+ ret = nxp_stm_clockevent_per_cpu_init(dev, stm_timer, name,
+ base, irq, clk,
+ stm_instances);
+ if (ret)
+ return ret;
+
+ stm_instances++;
+
+ /*
+ * The number of probed STMs for per CPU clockevent is
+ * equal to the number of available CPUs on the
+ * system. We install the cpu hotplug to finish the
+ * initialization by registering the clockevents
+ */
+ if (stm_instances == num_possible_cpus()) {
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "STM timer:starting",
+ nxp_stm_clockevent_starting_cpu, NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static const struct of_device_id nxp_stm_of_match[] = {
+ { .compatible = "nxp,s32g2-stm" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, nxp_stm_of_match);
+
+static struct platform_driver nxp_stm_probe = {
+ .probe = nxp_stm_timer_probe,
+ .driver = {
+ .name = "nxp-stm",
+ .of_match_table = nxp_stm_of_match,
+ },
+};
+module_platform_driver(nxp_stm_probe);
+
+MODULE_DESCRIPTION("NXP System Timer Module driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/clocksource/timer-tegra186.c b/drivers/clocksource/timer-tegra186.c
index 5d4cf5237a11..e5394f98a02e 100644
--- a/drivers/clocksource/timer-tegra186.c
+++ b/drivers/clocksource/timer-tegra186.c
@@ -1,8 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (c) 2019-2020 NVIDIA Corporation. All rights reserved.
+ * Copyright (c) 2019-2025 NVIDIA Corporation. All rights reserved.
*/
+#include <linux/bitfield.h>
#include <linux/clocksource.h>
#include <linux/module.h>
#include <linux/interrupt.h>
@@ -29,6 +30,7 @@
#define TMRSR 0x004
#define TMRSR_INTR_CLR BIT(30)
+#define TMRSR_PCV GENMASK(28, 0)
#define TMRCSSR 0x008
#define TMRCSSR_SRC_USEC (0 << 0)
@@ -45,6 +47,9 @@
#define WDTCR_TIMER_SOURCE_MASK 0xf
#define WDTCR_TIMER_SOURCE(x) ((x) & 0xf)
+#define WDTSR 0x004
+#define WDTSR_CURRENT_EXPIRATION_COUNT GENMASK(14, 12)
+
#define WDTCMDR 0x008
#define WDTCMDR_DISABLE_COUNTER BIT(1)
#define WDTCMDR_START_COUNTER BIT(0)
@@ -169,18 +174,6 @@ static void tegra186_wdt_enable(struct tegra186_wdt *wdt)
value &= ~WDTCR_PERIOD_MASK;
value |= WDTCR_PERIOD(1);
- /* enable local interrupt for WDT petting */
- value |= WDTCR_LOCAL_INT_ENABLE;
-
- /* enable local FIQ and remote interrupt for debug dump */
- if (0)
- value |= WDTCR_REMOTE_INT_ENABLE |
- WDTCR_LOCAL_FIQ_ENABLE;
-
- /* enable system debug reset (doesn't properly reboot) */
- if (0)
- value |= WDTCR_SYSTEM_DEBUG_RESET_ENABLE;
-
/* enable system POR reset */
value |= WDTCR_SYSTEM_POR_RESET_ENABLE;
@@ -234,12 +227,69 @@ static int tegra186_wdt_set_timeout(struct watchdog_device *wdd,
return 0;
}
+static unsigned int tegra186_wdt_get_timeleft(struct watchdog_device *wdd)
+{
+ struct tegra186_wdt *wdt = to_tegra186_wdt(wdd);
+ u32 expiration, val;
+ u64 timeleft;
+
+ if (!watchdog_active(&wdt->base)) {
+ /* return zero if the watchdog timer is not activated. */
+ return 0;
+ }
+
+ /*
+ * Reset occurs on the fifth expiration of the
+ * watchdog timer and so when the watchdog timer is configured,
+ * the actual value programmed into the counter is 1/5 of the
+ * timeout value. Once the counter reaches 0, expiration count
+ * will be increased by 1 and the down counter restarts.
+ * Hence to get the time left before system reset we must
+ * combine 2 parts:
+ * 1. value of the current down counter
+ * 2. (number of counter expirations remaining) * (timeout/5)
+ */
+
+ /* Get the current number of counter expirations. Should be a
+ * value between 0 and 4
+ */
+ val = readl_relaxed(wdt->regs + WDTSR);
+ expiration = FIELD_GET(WDTSR_CURRENT_EXPIRATION_COUNT, val);
+ if (WARN_ON_ONCE(expiration > 4))
+ return 0;
+
+ /* Get the current counter value in microsecond. */
+ val = readl_relaxed(wdt->tmr->regs + TMRSR);
+ timeleft = FIELD_GET(TMRSR_PCV, val);
+
+ /*
+ * Calculate the time remaining by adding the time for the
+ * counter value to the time of the counter expirations that
+ * remain.
+ */
+ timeleft += (((u64)wdt->base.timeout * USEC_PER_SEC) / 5) * (4 - expiration);
+
+ /*
+ * Convert the current counter value to seconds,
+ * rounding up to the nearest second. Cast u64 to
+ * u32 under the assumption that no overflow happens
+ * when coverting to seconds.
+ */
+ timeleft = DIV_ROUND_CLOSEST_ULL(timeleft, USEC_PER_SEC);
+
+ if (WARN_ON_ONCE(timeleft > U32_MAX))
+ return U32_MAX;
+
+ return lower_32_bits(timeleft);
+}
+
static const struct watchdog_ops tegra186_wdt_ops = {
.owner = THIS_MODULE,
.start = tegra186_wdt_start,
.stop = tegra186_wdt_stop,
.ping = tegra186_wdt_ping,
.set_timeout = tegra186_wdt_set_timeout,
+ .get_timeleft = tegra186_wdt_get_timeleft,
};
static struct tegra186_wdt *tegra186_wdt_create(struct tegra186_timer *tegra,
@@ -365,23 +415,10 @@ static int tegra186_timer_usec_init(struct tegra186_timer *tegra)
return clocksource_register_hz(&tegra->usec, USEC_PER_SEC);
}
-static irqreturn_t tegra186_timer_irq(int irq, void *data)
-{
- struct tegra186_timer *tegra = data;
-
- if (watchdog_active(&tegra->wdt->base)) {
- tegra186_wdt_disable(tegra->wdt);
- tegra186_wdt_enable(tegra->wdt);
- }
-
- return IRQ_HANDLED;
-}
-
static int tegra186_timer_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct tegra186_timer *tegra;
- unsigned int irq;
int err;
tegra = devm_kzalloc(dev, sizeof(*tegra), GFP_KERNEL);
@@ -400,8 +437,6 @@ static int tegra186_timer_probe(struct platform_device *pdev)
if (err < 0)
return err;
- irq = err;
-
/* create a watchdog using a preconfigured timer */
tegra->wdt = tegra186_wdt_create(tegra, 0);
if (IS_ERR(tegra->wdt)) {
@@ -428,17 +463,8 @@ static int tegra186_timer_probe(struct platform_device *pdev)
goto unregister_osc;
}
- err = devm_request_irq(dev, irq, tegra186_timer_irq, 0,
- "tegra186-timer", tegra);
- if (err < 0) {
- dev_err(dev, "failed to request IRQ#%u: %d\n", irq, err);
- goto unregister_usec;
- }
-
return 0;
-unregister_usec:
- clocksource_unregister(&tegra->usec);
unregister_osc:
clocksource_unregister(&tegra->osc);
unregister_tsc:
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index d26b610e4f24..ea4b8f220a05 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -79,11 +79,11 @@ static bool boost_state(unsigned int cpu)
case X86_VENDOR_INTEL:
case X86_VENDOR_CENTAUR:
case X86_VENDOR_ZHAOXIN:
- rdmsrl_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &msr);
+ rdmsrq_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &msr);
return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
case X86_VENDOR_HYGON:
case X86_VENDOR_AMD:
- rdmsrl_on_cpu(cpu, MSR_K7_HWCR, &msr);
+ rdmsrq_on_cpu(cpu, MSR_K7_HWCR, &msr);
return !(msr & MSR_K7_HWCR_CPB_DIS);
}
return false;
@@ -110,14 +110,14 @@ static int boost_set_msr(bool enable)
return -EINVAL;
}
- rdmsrl(msr_addr, val);
+ rdmsrq(msr_addr, val);
if (enable)
val &= ~msr_mask;
else
val |= msr_mask;
- wrmsrl(msr_addr, val);
+ wrmsrq(msr_addr, val);
return 0;
}
diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
index e671bc7d1550..447b9aa5ce40 100644
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -31,6 +31,8 @@
#include <acpi/cppc_acpi.h>
+#include <asm/msr.h>
+
#include "amd-pstate.h"
@@ -90,9 +92,9 @@ static int amd_pstate_ut_check_enabled(u32 index)
if (get_shared_mem())
return 0;
- ret = rdmsrl_safe(MSR_AMD_CPPC_ENABLE, &cppc_enable);
+ ret = rdmsrq_safe(MSR_AMD_CPPC_ENABLE, &cppc_enable);
if (ret) {
- pr_err("%s rdmsrl_safe MSR_AMD_CPPC_ENABLE ret=%d error!\n", __func__, ret);
+ pr_err("%s rdmsrq_safe MSR_AMD_CPPC_ENABLE ret=%d error!\n", __func__, ret);
return ret;
}
@@ -137,7 +139,7 @@ static int amd_pstate_ut_check_perf(u32 index)
lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
lowest_perf = cppc_perf.lowest_perf;
} else {
- ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
+ ret = rdmsrq_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
if (ret) {
pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
return ret;
@@ -242,25 +244,30 @@ static int amd_pstate_set_mode(enum amd_pstate_mode mode)
static int amd_pstate_ut_check_driver(u32 index)
{
enum amd_pstate_mode mode1, mode2 = AMD_PSTATE_DISABLE;
+ enum amd_pstate_mode orig_mode = amd_pstate_get_status();
+ int ret;
for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) {
- int ret = amd_pstate_set_mode(mode1);
+ ret = amd_pstate_set_mode(mode1);
if (ret)
return ret;
for (mode2 = AMD_PSTATE_DISABLE; mode2 < AMD_PSTATE_MAX; mode2++) {
if (mode1 == mode2)
continue;
ret = amd_pstate_set_mode(mode2);
- if (ret) {
- pr_err("%s: failed to update status for %s->%s\n", __func__,
- amd_pstate_get_mode_string(mode1),
- amd_pstate_get_mode_string(mode2));
- return ret;
- }
+ if (ret)
+ goto out;
}
}
- return 0;
+out:
+ if (ret)
+ pr_warn("%s: failed to update status for %s->%s: %d\n", __func__,
+ amd_pstate_get_mode_string(mode1),
+ amd_pstate_get_mode_string(mode2), ret);
+
+ amd_pstate_set_mode(orig_mode);
+ return ret;
}
static int __init amd_pstate_ut_init(void)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index b961f3a3b580..f3477ab37742 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -197,7 +197,7 @@ static u8 msr_get_epp(struct amd_cpudata *cpudata)
u64 value;
int ret;
- ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
+ ret = rdmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
if (ret < 0) {
pr_debug("Could not retrieve energy perf value (%d)\n", ret);
return ret;
@@ -258,10 +258,10 @@ static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf,
return 0;
if (fast_switch) {
- wrmsrl(MSR_AMD_CPPC_REQ, value);
+ wrmsrq(MSR_AMD_CPPC_REQ, value);
return 0;
} else {
- int ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+ int ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
if (ret)
return ret;
@@ -309,7 +309,7 @@ static int msr_set_epp(struct cpufreq_policy *policy, u8 epp)
if (value == prev)
return 0;
- ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+ ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
if (ret) {
pr_err("failed to set energy perf value (%d)\n", ret);
return ret;
@@ -371,7 +371,7 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp)
static inline int msr_cppc_enable(struct cpufreq_policy *policy)
{
- return wrmsrl_safe_on_cpu(policy->cpu, MSR_AMD_CPPC_ENABLE, 1);
+ return wrmsrq_safe_on_cpu(policy->cpu, MSR_AMD_CPPC_ENABLE, 1);
}
static int shmem_cppc_enable(struct cpufreq_policy *policy)
@@ -389,9 +389,10 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
static int msr_init_perf(struct amd_cpudata *cpudata)
{
union perf_cached perf = READ_ONCE(cpudata->perf);
- u64 cap1, numerator;
+ u64 cap1, numerator, cppc_req;
+ u8 min_perf;
- int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
+ int ret = rdmsrq_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
&cap1);
if (ret)
return ret;
@@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
if (ret)
return ret;
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req);
+ if (ret)
+ return ret;
+
+ WRITE_ONCE(cpudata->cppc_req_cached, cppc_req);
+ min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req);
+
+ /*
+ * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an
+ * indication that the min_perf value is the one specified through the BIOS option
+ */
+ cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
+
+ if (!cppc_req)
+ perf.bios_min_perf = min_perf;
+
perf.highest_perf = numerator;
perf.max_limit_perf = numerator;
perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
@@ -417,6 +434,7 @@ static int shmem_init_perf(struct amd_cpudata *cpudata)
struct cppc_perf_caps cppc_perf;
union perf_cached perf = READ_ONCE(cpudata->perf);
u64 numerator;
+ bool auto_sel;
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
if (ret)
@@ -438,7 +456,7 @@ static int shmem_init_perf(struct amd_cpudata *cpudata)
if (cppc_state == AMD_PSTATE_ACTIVE)
return 0;
- ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf);
+ ret = cppc_get_auto_sel(cpudata->cpu, &auto_sel);
if (ret) {
pr_warn("failed to get auto_sel, ret: %d\n", ret);
return 0;
@@ -518,8 +536,8 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
unsigned long flags;
local_irq_save(flags);
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
+ rdmsrq(MSR_IA32_APERF, aperf);
+ rdmsrq(MSR_IA32_MPERF, mperf);
tsc = rdtsc();
if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
@@ -554,6 +572,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf,
if (!policy)
return;
+ /* limit the max perf when core performance boost feature is disabled */
+ if (!cpudata->boost_supported)
+ max_perf = min_t(u8, perf.nominal_perf, max_perf);
+
des_perf = clamp_t(u8, des_perf, min_perf, max_perf);
policy->cur = perf_to_freq(perf, cpudata->nominal_freq, des_perf);
@@ -563,10 +585,6 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf,
des_perf = 0;
}
- /* limit the max perf when core performance boost feature is disabled */
- if (!cpudata->boost_supported)
- max_perf = min_t(u8, perf.nominal_perf, max_perf);
-
if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
@@ -580,20 +598,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
{
/*
* Initialize lower frequency limit (i.e.policy->min) with
- * lowest_nonlinear_frequency which is the most energy efficient
- * frequency. Override the initial value set by cpufreq core and
- * amd-pstate qos_requests.
+ * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS,
+ * Override the initial value set by cpufreq core and amd-pstate qos_requests.
*/
if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
struct cpufreq_policy *policy __free(put_cpufreq_policy) =
cpufreq_cpu_get(policy_data->cpu);
struct amd_cpudata *cpudata;
+ union perf_cached perf;
if (!policy)
return -EINVAL;
cpudata = policy->driver_data;
- policy_data->min = cpudata->lowest_nonlinear_freq;
+ perf = READ_ONCE(cpudata->perf);
+
+ if (perf.bios_min_perf)
+ policy_data->min = perf_to_freq(perf, cpudata->nominal_freq,
+ perf.bios_min_perf);
+ else
+ policy_data->min = cpudata->lowest_nonlinear_freq;
}
cpufreq_verify_within_cpu_limits(policy_data);
@@ -772,7 +796,7 @@ static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata)
goto exit_err;
}
- ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val);
+ ret = rdmsrq_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val);
if (ret) {
pr_err_once("failed to read initial CPU boost state!\n");
ret = -EIO;
@@ -791,7 +815,7 @@ exit_err:
static void amd_perf_ctl_reset(unsigned int cpu)
{
- wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
+ wrmsrq_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
}
#define CPPC_MAX_PERF U8_MAX
@@ -808,19 +832,16 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu);
}
-static void amd_pstate_update_limits(unsigned int cpu)
+static void amd_pstate_update_limits(struct cpufreq_policy *policy)
{
- struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata;
u32 prev_high = 0, cur_high = 0;
bool highest_perf_changed = false;
+ unsigned int cpu = policy->cpu;
if (!amd_pstate_prefcore)
return;
- if (!policy)
- return;
-
if (amd_get_highest_perf(cpu, &cur_high))
return;
@@ -831,8 +852,10 @@ static void amd_pstate_update_limits(unsigned int cpu)
if (highest_perf_changed) {
WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
- if (cur_high < CPPC_MAX_PERF)
+ if (cur_high < CPPC_MAX_PERF) {
sched_set_itmt_core_prio((int)cur_high, cpu);
+ sched_update_asym_prefer_cpu(cpu, prev_high, cur_high);
+ }
}
}
@@ -1024,6 +1047,10 @@ free_cpudata1:
static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+
+ /* Reset CPPC_REQ MSR to the BIOS value */
+ amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
freq_qos_remove_request(&cpudata->req[1]);
freq_qos_remove_request(&cpudata->req[0]);
@@ -1305,6 +1332,12 @@ static ssize_t amd_pstate_show_status(char *buf)
return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]);
}
+int amd_pstate_get_status(void)
+{
+ return cppc_state;
+}
+EXPORT_SYMBOL_GPL(amd_pstate_get_status);
+
int amd_pstate_update_status(const char *buf, size_t size)
{
int mode_idx;
@@ -1419,7 +1452,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
struct amd_cpudata *cpudata;
union perf_cached perf;
struct device *dev;
- u64 value;
int ret;
/*
@@ -1484,12 +1516,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
}
- if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
- if (ret)
- return ret;
- WRITE_ONCE(cpudata->cppc_req_cached, value);
- }
ret = amd_pstate_set_epp(policy, cpudata->epp_default);
if (ret)
return ret;
@@ -1509,6 +1535,11 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
struct amd_cpudata *cpudata = policy->driver_data;
if (cpudata) {
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+
+ /* Reset CPPC_REQ MSR to the BIOS value */
+ amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
+
kfree(cpudata);
policy->driver_data = NULL;
}
@@ -1559,21 +1590,38 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
return 0;
}
-static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
+static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
{
- pr_debug("AMD CPU Core %d going online\n", policy->cpu);
-
return amd_pstate_cppc_enable(policy);
}
-static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
+static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
{
- return 0;
+ struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+
+ /*
+ * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
+ * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
+ * limits, epp and desired perf will get reset to the cached values in cpudata struct
+ */
+ return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
}
-static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
+static int amd_pstate_suspend(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+ int ret;
+
+ /*
+ * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
+ * min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
+ * the limits, epp and desired perf will get reset to the cached values in cpudata struct
+ */
+ ret = amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
+ if (ret)
+ return ret;
/* invalidate to ensure it's rewritten during resume */
cpudata->cppc_req_cached = 0;
@@ -1584,6 +1632,17 @@ static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
return 0;
}
+static int amd_pstate_resume(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+ int cur_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->cur);
+
+ /* Set CPPC_REQ to last sane value until the governor updates it */
+ return amd_pstate_update_perf(policy, perf.min_limit_perf, cur_perf, perf.max_limit_perf,
+ 0U, false);
+}
+
static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
@@ -1609,6 +1668,10 @@ static struct cpufreq_driver amd_pstate_driver = {
.fast_switch = amd_pstate_fast_switch,
.init = amd_pstate_cpu_init,
.exit = amd_pstate_cpu_exit,
+ .online = amd_pstate_cpu_online,
+ .offline = amd_pstate_cpu_offline,
+ .suspend = amd_pstate_suspend,
+ .resume = amd_pstate_resume,
.set_boost = amd_pstate_set_boost,
.update_limits = amd_pstate_update_limits,
.name = "amd-pstate",
@@ -1621,9 +1684,9 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
.setpolicy = amd_pstate_epp_set_policy,
.init = amd_pstate_epp_cpu_init,
.exit = amd_pstate_epp_cpu_exit,
- .offline = amd_pstate_epp_cpu_offline,
- .online = amd_pstate_epp_cpu_online,
- .suspend = amd_pstate_epp_suspend,
+ .offline = amd_pstate_cpu_offline,
+ .online = amd_pstate_cpu_online,
+ .suspend = amd_pstate_suspend,
.resume = amd_pstate_epp_resume,
.update_limits = amd_pstate_update_limits,
.set_boost = amd_pstate_set_boost,
diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
index fbe1c08d3f06..cb45fdca27a6 100644
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -30,6 +30,7 @@
* @lowest_perf: the absolute lowest performance level of the processor
* @min_limit_perf: Cached value of the performance corresponding to policy->min
* @max_limit_perf: Cached value of the performance corresponding to policy->max
+ * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option
*/
union perf_cached {
struct {
@@ -39,6 +40,7 @@ union perf_cached {
u8 lowest_perf;
u8 min_limit_perf;
u8 max_limit_perf;
+ u8 bios_min_perf;
};
u64 val;
};
@@ -119,6 +121,7 @@ enum amd_pstate_mode {
AMD_PSTATE_MAX,
};
const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode);
+int amd_pstate_get_status(void);
int amd_pstate_update_status(const char *buf, size_t size);
#endif /* _LINUX_AMD_PSTATE_H */
diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
index 59b19b9975e8..13fed4b9e02b 100644
--- a/drivers/cpufreq/amd_freq_sensitivity.c
+++ b/drivers/cpufreq/amd_freq_sensitivity.c
@@ -129,7 +129,7 @@ static int __init amd_freq_sensitivity_init(void)
pci_dev_put(pcidev);
}
- if (rdmsrl_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val))
+ if (rdmsrq_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val))
return -ENODEV;
if (!(val >> CLASS_CODE_SHIFT))
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index cb93f00bafdb..b7c688a5659c 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -808,10 +808,119 @@ static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
return cpufreq_show_cpus(cpu_data->shared_cpu_map, buf);
}
+
+static ssize_t show_auto_select(struct cpufreq_policy *policy, char *buf)
+{
+ bool val;
+ int ret;
+
+ ret = cppc_get_auto_sel(policy->cpu, &val);
+
+ /* show "<unsupported>" when this register is not supported by cpc */
+ if (ret == -EOPNOTSUPP)
+ return sysfs_emit(buf, "<unsupported>\n");
+
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%d\n", val);
+}
+
+static ssize_t store_auto_select(struct cpufreq_policy *policy,
+ const char *buf, size_t count)
+{
+ bool val;
+ int ret;
+
+ ret = kstrtobool(buf, &val);
+ if (ret)
+ return ret;
+
+ ret = cppc_set_auto_sel(policy->cpu, val);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static ssize_t show_auto_act_window(struct cpufreq_policy *policy, char *buf)
+{
+ u64 val;
+ int ret;
+
+ ret = cppc_get_auto_act_window(policy->cpu, &val);
+
+ /* show "<unsupported>" when this register is not supported by cpc */
+ if (ret == -EOPNOTSUPP)
+ return sysfs_emit(buf, "<unsupported>\n");
+
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%llu\n", val);
+}
+
+static ssize_t store_auto_act_window(struct cpufreq_policy *policy,
+ const char *buf, size_t count)
+{
+ u64 usec;
+ int ret;
+
+ ret = kstrtou64(buf, 0, &usec);
+ if (ret)
+ return ret;
+
+ ret = cppc_set_auto_act_window(policy->cpu, usec);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static ssize_t show_energy_performance_preference_val(struct cpufreq_policy *policy, char *buf)
+{
+ u64 val;
+ int ret;
+
+ ret = cppc_get_epp_perf(policy->cpu, &val);
+
+ /* show "<unsupported>" when this register is not supported by cpc */
+ if (ret == -EOPNOTSUPP)
+ return sysfs_emit(buf, "<unsupported>\n");
+
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%llu\n", val);
+}
+
+static ssize_t store_energy_performance_preference_val(struct cpufreq_policy *policy,
+ const char *buf, size_t count)
+{
+ u64 val;
+ int ret;
+
+ ret = kstrtou64(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ ret = cppc_set_epp(policy->cpu, val);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
cpufreq_freq_attr_ro(freqdomain_cpus);
+cpufreq_freq_attr_rw(auto_select);
+cpufreq_freq_attr_rw(auto_act_window);
+cpufreq_freq_attr_rw(energy_performance_preference_val);
static struct freq_attr *cppc_cpufreq_attr[] = {
&freqdomain_cpus,
+ &auto_select,
+ &auto_act_window,
+ &energy_performance_preference_val,
NULL,
};
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index f45ded62b0e0..d7426e1d8bdd 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -255,51 +255,6 @@ void cpufreq_cpu_put(struct cpufreq_policy *policy)
}
EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
-/**
- * cpufreq_cpu_release - Unlock a policy and decrement its usage counter.
- * @policy: cpufreq policy returned by cpufreq_cpu_acquire().
- */
-void cpufreq_cpu_release(struct cpufreq_policy *policy)
-{
- if (WARN_ON(!policy))
- return;
-
- lockdep_assert_held(&policy->rwsem);
-
- up_write(&policy->rwsem);
-
- cpufreq_cpu_put(policy);
-}
-
-/**
- * cpufreq_cpu_acquire - Find policy for a CPU, mark it as busy and lock it.
- * @cpu: CPU to find the policy for.
- *
- * Call cpufreq_cpu_get() to get a reference on the cpufreq policy for @cpu and
- * if the policy returned by it is not NULL, acquire its rwsem for writing.
- * Return the policy if it is active or release it and return NULL otherwise.
- *
- * The policy returned by this function has to be released with the help of
- * cpufreq_cpu_release() in order to release its rwsem and balance its usage
- * counter properly.
- */
-struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu)
-{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
-
- if (!policy)
- return NULL;
-
- down_write(&policy->rwsem);
-
- if (policy_is_inactive(policy)) {
- cpufreq_cpu_release(policy);
- return NULL;
- }
-
- return policy;
-}
-
/*********************************************************************
* EXTERNALLY AFFECTING FREQUENCY CHANGES *
*********************************************************************/
@@ -636,6 +591,22 @@ static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf)
return sysfs_emit(buf, "%d\n", policy->boost_enabled);
}
+static int policy_set_boost(struct cpufreq_policy *policy, bool enable)
+{
+ int ret;
+
+ if (policy->boost_enabled == enable)
+ return 0;
+
+ policy->boost_enabled = enable;
+
+ ret = cpufreq_driver->set_boost(policy, enable);
+ if (ret)
+ policy->boost_enabled = !policy->boost_enabled;
+
+ return ret;
+}
+
static ssize_t store_local_boost(struct cpufreq_policy *policy,
const char *buf, size_t count)
{
@@ -651,21 +622,11 @@ static ssize_t store_local_boost(struct cpufreq_policy *policy,
if (!policy->boost_supported)
return -EINVAL;
- if (policy->boost_enabled == enable)
+ ret = policy_set_boost(policy, enable);
+ if (!ret)
return count;
- policy->boost_enabled = enable;
-
- cpus_read_lock();
- ret = cpufreq_driver->set_boost(policy, enable);
- cpus_read_unlock();
-
- if (ret) {
- policy->boost_enabled = !policy->boost_enabled;
- return ret;
- }
-
- return count;
+ return ret;
}
static struct freq_attr local_boost = __ATTR(boost, 0644, show_local_boost, store_local_boost);
@@ -845,7 +806,7 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
const char *buf, size_t count)
{
- char str_governor[16];
+ char str_governor[CPUFREQ_NAME_LEN];
int ret;
ret = sscanf(buf, "%15s", str_governor);
@@ -956,9 +917,9 @@ static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
if (!policy->governor || !policy->governor->store_setspeed)
return -EINVAL;
- ret = sscanf(buf, "%u", &freq);
- if (ret != 1)
- return -EINVAL;
+ ret = kstrtouint(buf, 0, &freq);
+ if (ret)
+ return ret;
policy->governor->store_setspeed(policy, freq);
@@ -1025,17 +986,16 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
struct cpufreq_policy *policy = to_policy(kobj);
struct freq_attr *fattr = to_attr(attr);
- ssize_t ret = -EBUSY;
if (!fattr->show)
return -EIO;
- down_read(&policy->rwsem);
+ guard(cpufreq_policy_read)(policy);
+
if (likely(!policy_is_inactive(policy)))
- ret = fattr->show(policy, buf);
- up_read(&policy->rwsem);
+ return fattr->show(policy, buf);
- return ret;
+ return -EBUSY;
}
static ssize_t store(struct kobject *kobj, struct attribute *attr,
@@ -1043,17 +1003,16 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
{
struct cpufreq_policy *policy = to_policy(kobj);
struct freq_attr *fattr = to_attr(attr);
- ssize_t ret = -EBUSY;
if (!fattr->store)
return -EIO;
- down_write(&policy->rwsem);
+ guard(cpufreq_policy_write)(policy);
+
if (likely(!policy_is_inactive(policy)))
- ret = fattr->store(policy, buf, count);
- up_write(&policy->rwsem);
+ return fattr->store(policy, buf, count);
- return ret;
+ return -EBUSY;
}
static void cpufreq_sysfs_release(struct kobject *kobj)
@@ -1211,7 +1170,8 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp
if (cpumask_test_cpu(cpu, policy->cpus))
return 0;
- down_write(&policy->rwsem);
+ guard(cpufreq_policy_write)(policy);
+
if (has_target())
cpufreq_stop_governor(policy);
@@ -1222,7 +1182,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp
if (ret)
pr_err("%s: Failed to start governor\n", __func__);
}
- up_write(&policy->rwsem);
+
return ret;
}
@@ -1242,9 +1202,10 @@ static void handle_update(struct work_struct *work)
container_of(work, struct cpufreq_policy, update);
pr_debug("handle_update for cpu %u called\n", policy->cpu);
- down_write(&policy->rwsem);
+
+ guard(cpufreq_policy_write)(policy);
+
refresh_frequency_limits(policy);
- up_write(&policy->rwsem);
}
static int cpufreq_notifier_min(struct notifier_block *nb, unsigned long freq,
@@ -1270,11 +1231,11 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
struct kobject *kobj;
struct completion *cmp;
- down_write(&policy->rwsem);
- cpufreq_stats_free_table(policy);
- kobj = &policy->kobj;
- cmp = &policy->kobj_unregister;
- up_write(&policy->rwsem);
+ scoped_guard(cpufreq_policy_write, policy) {
+ cpufreq_stats_free_table(policy);
+ kobj = &policy->kobj;
+ cmp = &policy->kobj_unregister;
+ }
kobject_put(kobj);
/*
@@ -1350,7 +1311,6 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
init_waitqueue_head(&policy->transition_wait);
INIT_WORK(&policy->update, handle_update);
- policy->cpu = cpu;
return policy;
err_min_qos_notifier:
@@ -1419,35 +1379,17 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
kfree(policy);
}
-static int cpufreq_online(unsigned int cpu)
+static int cpufreq_policy_online(struct cpufreq_policy *policy,
+ unsigned int cpu, bool new_policy)
{
- struct cpufreq_policy *policy;
- bool new_policy;
unsigned long flags;
unsigned int j;
int ret;
- pr_debug("%s: bringing CPU%u online\n", __func__, cpu);
-
- /* Check if this CPU already has a policy to manage it */
- policy = per_cpu(cpufreq_cpu_data, cpu);
- if (policy) {
- WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus));
- if (!policy_is_inactive(policy))
- return cpufreq_add_policy_cpu(policy, cpu);
+ guard(cpufreq_policy_write)(policy);
- /* This is the only online CPU for the policy. Start over. */
- new_policy = false;
- down_write(&policy->rwsem);
- policy->cpu = cpu;
- policy->governor = NULL;
- } else {
- new_policy = true;
- policy = cpufreq_policy_alloc(cpu);
- if (!policy)
- return -ENOMEM;
- down_write(&policy->rwsem);
- }
+ policy->cpu = cpu;
+ policy->governor = NULL;
if (!new_policy && cpufreq_driver->online) {
/* Recover policy->cpus using related_cpus */
@@ -1470,7 +1412,7 @@ static int cpufreq_online(unsigned int cpu)
if (ret) {
pr_debug("%s: %d: initialization failed\n", __func__,
__LINE__);
- goto out_free_policy;
+ goto out_clear_policy;
}
/*
@@ -1621,7 +1563,55 @@ static int cpufreq_online(unsigned int cpu)
goto out_destroy_policy;
}
- up_write(&policy->rwsem);
+ return 0;
+
+out_destroy_policy:
+ for_each_cpu(j, policy->real_cpus)
+ remove_cpu_dev_symlink(policy, j, get_cpu_device(j));
+
+out_offline_policy:
+ if (cpufreq_driver->offline)
+ cpufreq_driver->offline(policy);
+
+out_exit_policy:
+ if (cpufreq_driver->exit)
+ cpufreq_driver->exit(policy);
+
+out_clear_policy:
+ cpumask_clear(policy->cpus);
+
+ return ret;
+}
+
+static int cpufreq_online(unsigned int cpu)
+{
+ struct cpufreq_policy *policy;
+ bool new_policy;
+ int ret;
+
+ pr_debug("%s: bringing CPU%u online\n", __func__, cpu);
+
+ /* Check if this CPU already has a policy to manage it */
+ policy = per_cpu(cpufreq_cpu_data, cpu);
+ if (policy) {
+ WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus));
+ if (!policy_is_inactive(policy))
+ return cpufreq_add_policy_cpu(policy, cpu);
+
+ /* This is the only online CPU for the policy. Start over. */
+ new_policy = false;
+ } else {
+ new_policy = true;
+ policy = cpufreq_policy_alloc(cpu);
+ if (!policy)
+ return -ENOMEM;
+ }
+
+ ret = cpufreq_policy_online(policy, cpu, new_policy);
+ if (ret) {
+ cpufreq_policy_free(policy);
+ return ret;
+ }
kobject_uevent(&policy->kobj, KOBJ_ADD);
@@ -1633,41 +1623,24 @@ static int cpufreq_online(unsigned int cpu)
if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver))
policy->cdev = of_cpufreq_cooling_register(policy);
- /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
+ /*
+ * Let the per-policy boost flag mirror the cpufreq_driver boost during
+ * initialization for a new policy. For an existing policy, maintain the
+ * previous boost value unless global boost is disabled.
+ */
if (cpufreq_driver->set_boost && policy->boost_supported &&
- policy->boost_enabled != cpufreq_boost_enabled()) {
- policy->boost_enabled = cpufreq_boost_enabled();
- ret = cpufreq_driver->set_boost(policy, policy->boost_enabled);
+ (new_policy || !cpufreq_boost_enabled())) {
+ ret = policy_set_boost(policy, cpufreq_boost_enabled());
if (ret) {
/* If the set_boost fails, the online operation is not affected */
pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu,
- str_enable_disable(policy->boost_enabled));
- policy->boost_enabled = !policy->boost_enabled;
+ str_enable_disable(cpufreq_boost_enabled()));
}
}
pr_debug("initialization complete\n");
return 0;
-
-out_destroy_policy:
- for_each_cpu(j, policy->real_cpus)
- remove_cpu_dev_symlink(policy, j, get_cpu_device(j));
-
-out_offline_policy:
- if (cpufreq_driver->offline)
- cpufreq_driver->offline(policy);
-
-out_exit_policy:
- if (cpufreq_driver->exit)
- cpufreq_driver->exit(policy);
-
-out_free_policy:
- cpumask_clear(policy->cpus);
- up_write(&policy->rwsem);
-
- cpufreq_policy_free(policy);
- return ret;
}
/**
@@ -1757,11 +1730,10 @@ static int cpufreq_offline(unsigned int cpu)
return 0;
}
- down_write(&policy->rwsem);
+ guard(cpufreq_policy_write)(policy);
__cpufreq_offline(cpu, policy);
- up_write(&policy->rwsem);
return 0;
}
@@ -1778,33 +1750,29 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
if (!policy)
return;
- down_write(&policy->rwsem);
+ scoped_guard(cpufreq_policy_write, policy) {
+ if (cpu_online(cpu))
+ __cpufreq_offline(cpu, policy);
- if (cpu_online(cpu))
- __cpufreq_offline(cpu, policy);
+ remove_cpu_dev_symlink(policy, cpu, dev);
- remove_cpu_dev_symlink(policy, cpu, dev);
+ if (!cpumask_empty(policy->real_cpus))
+ return;
- if (!cpumask_empty(policy->real_cpus)) {
- up_write(&policy->rwsem);
- return;
- }
+ /*
+ * Unregister cpufreq cooling once all the CPUs of the policy
+ * are removed.
+ */
+ if (cpufreq_thermal_control_enabled(cpufreq_driver)) {
+ cpufreq_cooling_unregister(policy->cdev);
+ policy->cdev = NULL;
+ }
- /*
- * Unregister cpufreq cooling once all the CPUs of the policy are
- * removed.
- */
- if (cpufreq_thermal_control_enabled(cpufreq_driver)) {
- cpufreq_cooling_unregister(policy->cdev);
- policy->cdev = NULL;
+ /* We did light-weight exit earlier, do full tear down now */
+ if (cpufreq_driver->offline && cpufreq_driver->exit)
+ cpufreq_driver->exit(policy);
}
- /* We did light-weight exit earlier, do full tear down now */
- if (cpufreq_driver->offline && cpufreq_driver->exit)
- cpufreq_driver->exit(policy);
-
- up_write(&policy->rwsem);
-
cpufreq_policy_free(policy);
}
@@ -1874,27 +1842,26 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b
*/
unsigned int cpufreq_quick_get(unsigned int cpu)
{
- struct cpufreq_policy *policy;
- unsigned int ret_freq = 0;
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
unsigned long flags;
read_lock_irqsave(&cpufreq_driver_lock, flags);
if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) {
- ret_freq = cpufreq_driver->get(cpu);
+ unsigned int ret_freq = cpufreq_driver->get(cpu);
+
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
return ret_freq;
}
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
policy = cpufreq_cpu_get(cpu);
- if (policy) {
- ret_freq = policy->cur;
- cpufreq_cpu_put(policy);
- }
+ if (policy)
+ return policy->cur;
- return ret_freq;
+ return 0;
}
EXPORT_SYMBOL(cpufreq_quick_get);
@@ -1906,15 +1873,13 @@ EXPORT_SYMBOL(cpufreq_quick_get);
*/
unsigned int cpufreq_quick_get_max(unsigned int cpu)
{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
- unsigned int ret_freq = 0;
+ struct cpufreq_policy *policy __free(put_cpufreq_policy);
- if (policy) {
- ret_freq = policy->max;
- cpufreq_cpu_put(policy);
- }
+ policy = cpufreq_cpu_get(cpu);
+ if (policy)
+ return policy->max;
- return ret_freq;
+ return 0;
}
EXPORT_SYMBOL(cpufreq_quick_get_max);
@@ -1926,15 +1891,13 @@ EXPORT_SYMBOL(cpufreq_quick_get_max);
*/
__weak unsigned int cpufreq_get_hw_max_freq(unsigned int cpu)
{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
- unsigned int ret_freq = 0;
+ struct cpufreq_policy *policy __free(put_cpufreq_policy);
- if (policy) {
- ret_freq = policy->cpuinfo.max_freq;
- cpufreq_cpu_put(policy);
- }
+ policy = cpufreq_cpu_get(cpu);
+ if (policy)
+ return policy->cpuinfo.max_freq;
- return ret_freq;
+ return 0;
}
EXPORT_SYMBOL(cpufreq_get_hw_max_freq);
@@ -1954,19 +1917,18 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy)
*/
unsigned int cpufreq_get(unsigned int cpu)
{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
- unsigned int ret_freq = 0;
+ struct cpufreq_policy *policy __free(put_cpufreq_policy);
- if (policy) {
- down_read(&policy->rwsem);
- if (cpufreq_driver->get)
- ret_freq = __cpufreq_get(policy);
- up_read(&policy->rwsem);
+ policy = cpufreq_cpu_get(cpu);
+ if (!policy)
+ return 0;
- cpufreq_cpu_put(policy);
- }
+ guard(cpufreq_policy_read)(policy);
+
+ if (cpufreq_driver->get)
+ return __cpufreq_get(policy);
- return ret_freq;
+ return 0;
}
EXPORT_SYMBOL(cpufreq_get);
@@ -2025,9 +1987,9 @@ void cpufreq_suspend(void)
for_each_active_policy(policy) {
if (has_target()) {
- down_write(&policy->rwsem);
- cpufreq_stop_governor(policy);
- up_write(&policy->rwsem);
+ scoped_guard(cpufreq_policy_write, policy) {
+ cpufreq_stop_governor(policy);
+ }
}
if (cpufreq_driver->suspend && cpufreq_driver->suspend(policy))
@@ -2068,9 +2030,9 @@ void cpufreq_resume(void)
pr_err("%s: Failed to resume driver: %s\n", __func__,
cpufreq_driver->name);
} else if (has_target()) {
- down_write(&policy->rwsem);
- ret = cpufreq_start_governor(policy);
- up_write(&policy->rwsem);
+ scoped_guard(cpufreq_policy_write, policy) {
+ ret = cpufreq_start_governor(policy);
+ }
if (ret)
pr_err("%s: Failed to start governor for CPU%u's policy\n",
@@ -2438,15 +2400,9 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
- int ret;
-
- down_write(&policy->rwsem);
+ guard(cpufreq_policy_write)(policy);
- ret = __cpufreq_driver_target(policy, target_freq, relation);
-
- up_write(&policy->rwsem);
-
- return ret;
+ return __cpufreq_driver_target(policy, target_freq, relation);
}
EXPORT_SYMBOL_GPL(cpufreq_driver_target);
@@ -2618,31 +2574,6 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
* POLICY INTERFACE *
*********************************************************************/
-/**
- * cpufreq_get_policy - get the current cpufreq_policy
- * @policy: struct cpufreq_policy into which the current cpufreq_policy
- * is written
- * @cpu: CPU to find the policy for
- *
- * Reads the current cpufreq policy.
- */
-int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
-{
- struct cpufreq_policy *cpu_policy;
- if (!policy)
- return -EINVAL;
-
- cpu_policy = cpufreq_cpu_get(cpu);
- if (!cpu_policy)
- return -EINVAL;
-
- memcpy(policy, cpu_policy, sizeof(*policy));
-
- cpufreq_cpu_put(cpu_policy);
- return 0;
-}
-EXPORT_SYMBOL(cpufreq_get_policy);
-
DEFINE_PER_CPU(unsigned long, cpufreq_pressure);
/**
@@ -2793,6 +2724,21 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
return ret;
}
+static void cpufreq_policy_refresh(struct cpufreq_policy *policy)
+{
+ guard(cpufreq_policy_write)(policy);
+
+ /*
+ * BIOS might change freq behind our back
+ * -> ask driver for current freq and notify governors about a change
+ */
+ if (cpufreq_driver->get && has_target() &&
+ (cpufreq_suspended || WARN_ON(!cpufreq_verify_current_freq(policy, false))))
+ return;
+
+ refresh_frequency_limits(policy);
+}
+
/**
* cpufreq_update_policy - Re-evaluate an existing cpufreq policy.
* @cpu: CPU to re-evaluate the policy for.
@@ -2804,23 +2750,13 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
*/
void cpufreq_update_policy(unsigned int cpu)
{
- struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
+ struct cpufreq_policy *policy __free(put_cpufreq_policy);
+ policy = cpufreq_cpu_get(cpu);
if (!policy)
return;
- /*
- * BIOS might change freq behind our back
- * -> ask driver for current freq and notify governors about a change
- */
- if (cpufreq_driver->get && has_target() &&
- (cpufreq_suspended || WARN_ON(!cpufreq_verify_current_freq(policy, false))))
- goto unlock;
-
- refresh_frequency_limits(policy);
-
-unlock:
- cpufreq_cpu_release(policy);
+ cpufreq_policy_refresh(policy);
}
EXPORT_SYMBOL(cpufreq_update_policy);
@@ -2829,7 +2765,7 @@ EXPORT_SYMBOL(cpufreq_update_policy);
* @cpu: CPU to update the policy limits for.
*
* Invoke the driver's ->update_limits callback if present or call
- * cpufreq_update_policy() for @cpu.
+ * cpufreq_policy_refresh() for @cpu.
*/
void cpufreq_update_limits(unsigned int cpu)
{
@@ -2840,9 +2776,9 @@ void cpufreq_update_limits(unsigned int cpu)
return;
if (cpufreq_driver->update_limits)
- cpufreq_driver->update_limits(cpu);
+ cpufreq_driver->update_limits(policy);
else
- cpufreq_update_policy(cpu);
+ cpufreq_policy_refresh(policy);
}
EXPORT_SYMBOL_GPL(cpufreq_update_limits);
@@ -2876,8 +2812,10 @@ static int cpufreq_boost_trigger_state(int state)
unsigned long flags;
int ret = 0;
- if (cpufreq_driver->boost_enabled == state)
- return 0;
+ /*
+ * Don't compare 'cpufreq_driver->boost_enabled' with 'state' here to
+ * make sure all policies are in sync with global boost flag.
+ */
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver->boost_enabled = state;
@@ -2888,12 +2826,9 @@ static int cpufreq_boost_trigger_state(int state)
if (!policy->boost_supported)
continue;
- policy->boost_enabled = state;
- ret = cpufreq_driver->set_boost(policy, state);
- if (ret) {
- policy->boost_enabled = !policy->boost_enabled;
+ ret = policy_set_boost(policy, state);
+ if (ret)
goto err_reset_state;
- }
}
cpus_read_unlock();
@@ -3118,6 +3053,36 @@ static int __init cpufreq_core_init(void)
return 0;
}
+
+static bool cpufreq_policy_is_good_for_eas(unsigned int cpu)
+{
+ struct cpufreq_policy *policy __free(put_cpufreq_policy);
+
+ policy = cpufreq_cpu_get(cpu);
+ if (!policy) {
+ pr_debug("cpufreq policy not set for CPU: %d\n", cpu);
+ return false;
+ }
+
+ return sugov_is_governor(policy);
+}
+
+bool cpufreq_ready_for_eas(const struct cpumask *cpu_mask)
+{
+ unsigned int cpu;
+
+ /* Do not attempt EAS if schedutil is not being used. */
+ for_each_cpu(cpu, cpu_mask) {
+ if (!cpufreq_policy_is_good_for_eas(cpu)) {
+ pr_debug("rd %*pbl: schedutil is mandatory for EAS\n",
+ cpumask_pr_args(cpu_mask));
+ return false;
+ }
+ }
+
+ return true;
+}
+
module_param(off, int, 0444);
module_param_string(default_governor, default_governor, CPUFREQ_NAME_LEN, 0444);
core_initcall(cpufreq_core_init);
diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
index d23a97ba6478..320a0af2266a 100644
--- a/drivers/cpufreq/e_powersaver.c
+++ b/drivers/cpufreq/e_powersaver.c
@@ -225,12 +225,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
return -ENODEV;
}
/* Enable Enhanced PowerSaver */
- rdmsrl(MSR_IA32_MISC_ENABLE, val);
+ rdmsrq(MSR_IA32_MISC_ENABLE, val);
if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
- wrmsrl(MSR_IA32_MISC_ENABLE, val);
+ wrmsrq(MSR_IA32_MISC_ENABLE, val);
/* Can be locked at 0 */
- rdmsrl(MSR_IA32_MISC_ENABLE, val);
+ rdmsrq(MSR_IA32_MISC_ENABLE, val);
if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
pr_info("Can't enable Enhanced PowerSaver\n");
return -ENODEV;
diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c
index 36494b855e41..fc5a58088b35 100644
--- a/drivers/cpufreq/elanfreq.c
+++ b/drivers/cpufreq/elanfreq.c
@@ -21,7 +21,6 @@
#include <linux/cpufreq.h>
#include <asm/cpu_device_id.h>
-#include <asm/msr.h>
#include <linux/timex.h>
#include <linux/io.h>
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index ba9bf06f1c77..64587d318267 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -221,6 +221,7 @@ struct global_params {
* @sched_flags: Store scheduler flags for possible cross CPU update
* @hwp_boost_min: Last HWP boosted min performance
* @suspended: Whether or not the driver has been suspended.
+ * @pd_registered: Set when a perf domain is registered for this CPU.
* @hwp_notify_work: workqueue for HWP notifications.
*
* This structure stores per CPU instance data for all CPUs.
@@ -260,6 +261,9 @@ struct cpudata {
unsigned int sched_flags;
u32 hwp_boost_min;
bool suspended;
+#ifdef CONFIG_ENERGY_MODEL
+ bool pd_registered;
+#endif
struct delayed_work hwp_notify_work;
};
@@ -303,6 +307,7 @@ static bool hwp_is_hybrid;
static struct cpufreq_driver *intel_pstate_driver __read_mostly;
+#define INTEL_PSTATE_CORE_SCALING 100000
#define HYBRID_SCALING_FACTOR_ADL 78741
#define HYBRID_SCALING_FACTOR_MTL 80000
#define HYBRID_SCALING_FACTOR_LNL 86957
@@ -311,7 +316,7 @@ static int hybrid_scaling_factor;
static inline int core_get_scaling(void)
{
- return 100000;
+ return INTEL_PSTATE_CORE_SCALING;
}
#ifdef CONFIG_ACPI
@@ -601,7 +606,7 @@ static bool turbo_is_disabled(void)
if (!cpu_feature_enabled(X86_FEATURE_IDA))
return true;
- rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
+ rdmsrq(MSR_IA32_MISC_ENABLE, misc_en);
return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
}
@@ -623,7 +628,7 @@ static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
if (!boot_cpu_has(X86_FEATURE_EPB))
return -ENXIO;
- ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ ret = rdmsrq_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
if (ret)
return (s16)ret;
@@ -640,7 +645,7 @@ static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
* MSR_HWP_REQUEST, so need to read and get EPP.
*/
if (!hwp_req_data) {
- epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
+ epp = rdmsrq_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
&hwp_req_data);
if (epp)
return epp;
@@ -662,12 +667,12 @@ static int intel_pstate_set_epb(int cpu, s16 pref)
if (!boot_cpu_has(X86_FEATURE_EPB))
return -ENXIO;
- ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ ret = rdmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
if (ret)
return ret;
epb = (epb & ~0x0f) | pref;
- wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
+ wrmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
return 0;
}
@@ -765,7 +770,7 @@ static int intel_pstate_set_epp(struct cpudata *cpu, u32 epp)
* function, so it cannot run in parallel with the update below.
*/
WRITE_ONCE(cpu->hwp_req_cached, value);
- ret = wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+ ret = wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
if (!ret)
cpu->epp_cached = epp;
@@ -919,7 +924,7 @@ static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
if (ratio <= 0) {
u64 cap;
- rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
+ rdmsrq_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
ratio = HWP_GUARANTEED_PERF(cap);
}
@@ -948,12 +953,124 @@ static struct cpudata *hybrid_max_perf_cpu __read_mostly;
*/
static DEFINE_MUTEX(hybrid_capacity_lock);
+#ifdef CONFIG_ENERGY_MODEL
+#define HYBRID_EM_STATE_COUNT 4
+
+static int hybrid_active_power(struct device *dev, unsigned long *power,
+ unsigned long *freq)
+{
+ /*
+ * Create "utilization bins" of 0-40%, 40%-60%, 60%-80%, and 80%-100%
+ * of the maximum capacity such that two CPUs of the same type will be
+ * regarded as equally attractive if the utilization of each of them
+ * falls into the same bin, which should prevent tasks from being
+ * migrated between them too often.
+ *
+ * For this purpose, return the "frequency" of 2 for the first
+ * performance level and otherwise leave the value set by the caller.
+ */
+ if (!*freq)
+ *freq = 2;
+
+ /* No power information. */
+ *power = EM_MAX_POWER;
+
+ return 0;
+}
+
+static int hybrid_get_cost(struct device *dev, unsigned long freq,
+ unsigned long *cost)
+{
+ struct pstate_data *pstate = &all_cpu_data[dev->id]->pstate;
+ struct cpu_cacheinfo *cacheinfo = get_cpu_cacheinfo(dev->id);
+
+ /*
+ * The smaller the perf-to-frequency scaling factor, the larger the IPC
+ * ratio between the given CPU and the least capable CPU in the system.
+ * Regard that IPC ratio as the primary cost component and assume that
+ * the scaling factors for different CPU types will differ by at least
+ * 5% and they will not be above INTEL_PSTATE_CORE_SCALING.
+ *
+ * Add the freq value to the cost, so that the cost of running on CPUs
+ * of the same type in different "utilization bins" is different.
+ */
+ *cost = div_u64(100ULL * INTEL_PSTATE_CORE_SCALING, pstate->scaling) + freq;
+ /*
+ * Increase the cost slightly for CPUs able to access L3 to avoid
+ * touching it in case some other CPUs of the same type can do the work
+ * without it.
+ */
+ if (cacheinfo) {
+ unsigned int i;
+
+ /* Check if L3 cache is there. */
+ for (i = 0; i < cacheinfo->num_leaves; i++) {
+ if (cacheinfo->info_list[i].level == 3) {
+ *cost += 2;
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static bool hybrid_register_perf_domain(unsigned int cpu)
+{
+ static const struct em_data_callback cb
+ = EM_ADV_DATA_CB(hybrid_active_power, hybrid_get_cost);
+ struct cpudata *cpudata = all_cpu_data[cpu];
+ struct device *cpu_dev;
+
+ /*
+ * Registering EM perf domains without enabling asymmetric CPU capacity
+ * support is not really useful and one domain should not be registered
+ * more than once.
+ */
+ if (!hybrid_max_perf_cpu || cpudata->pd_registered)
+ return false;
+
+ cpu_dev = get_cpu_device(cpu);
+ if (!cpu_dev)
+ return false;
+
+ if (em_dev_register_perf_domain(cpu_dev, HYBRID_EM_STATE_COUNT, &cb,
+ cpumask_of(cpu), false))
+ return false;
+
+ cpudata->pd_registered = true;
+
+ return true;
+}
+
+static void hybrid_register_all_perf_domains(void)
+{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu)
+ hybrid_register_perf_domain(cpu);
+}
+
+static void hybrid_update_perf_domain(struct cpudata *cpu)
+{
+ if (cpu->pd_registered)
+ em_adjust_cpu_capacity(cpu->cpu);
+}
+#else /* !CONFIG_ENERGY_MODEL */
+static inline bool hybrid_register_perf_domain(unsigned int cpu) { return false; }
+static inline void hybrid_register_all_perf_domains(void) {}
+static inline void hybrid_update_perf_domain(struct cpudata *cpu) {}
+#endif /* CONFIG_ENERGY_MODEL */
+
static void hybrid_set_cpu_capacity(struct cpudata *cpu)
{
arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf,
hybrid_max_perf_cpu->capacity_perf,
cpu->capacity_perf,
cpu->pstate.max_pstate_physical);
+ hybrid_update_perf_domain(cpu);
+
+ topology_set_cpu_scale(cpu->cpu, arch_scale_cpu_capacity(cpu->cpu));
pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu,
cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf,
@@ -1042,6 +1159,11 @@ static void hybrid_refresh_cpu_capacity_scaling(void)
guard(mutex)(&hybrid_capacity_lock);
__hybrid_refresh_cpu_capacity_scaling();
+ /*
+ * Perf domains are not registered before setting hybrid_max_perf_cpu,
+ * so register them all after setting up CPU capacity scaling.
+ */
+ hybrid_register_all_perf_domains();
}
static void hybrid_init_cpu_capacity_scaling(bool refresh)
@@ -1069,7 +1191,7 @@ static void hybrid_init_cpu_capacity_scaling(bool refresh)
hybrid_refresh_cpu_capacity_scaling();
/*
* Disabling ITMT causes sched domains to be rebuilt to disable asym
- * packing and enable asym capacity.
+ * packing and enable asym capacity and EAS.
*/
sched_clear_itmt_support();
}
@@ -1091,7 +1213,7 @@ static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
{
u64 cap;
- rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
+ rdmsrq_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
WRITE_ONCE(cpu->hwp_cap_cached, cap);
cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap);
cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap);
@@ -1147,6 +1269,14 @@ static void hybrid_update_capacity(struct cpudata *cpu)
}
hybrid_set_cpu_capacity(cpu);
+ /*
+ * If the CPU was offline to start with and it is going online for the
+ * first time, a perf domain needs to be registered for it if hybrid
+ * capacity scaling has been enabled already. In that case, sched
+ * domains need to be rebuilt to take the new perf domain into account.
+ */
+ if (hybrid_register_perf_domain(cpu->cpu))
+ em_rebuild_sched_domains();
unlock:
mutex_unlock(&hybrid_capacity_lock);
@@ -1165,7 +1295,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
min = max;
- rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
+ rdmsrq_on_cpu(cpu, MSR_HWP_REQUEST, &value);
value &= ~HWP_MIN_PERF(~0L);
value |= HWP_MIN_PERF(min);
@@ -1212,7 +1342,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
}
skip_epp:
WRITE_ONCE(cpu_data->hwp_req_cached, value);
- wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
+ wrmsrq_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata);
@@ -1259,7 +1389,7 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu)
if (boot_cpu_has(X86_FEATURE_HWP_EPP))
value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
- wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+ wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
mutex_lock(&hybrid_capacity_lock);
@@ -1288,7 +1418,7 @@ static void set_power_ctl_ee_state(bool input)
u64 power_ctl;
mutex_lock(&intel_pstate_driver_lock);
- rdmsrl(MSR_IA32_POWER_CTL, power_ctl);
+ rdmsrq(MSR_IA32_POWER_CTL, power_ctl);
if (input) {
power_ctl &= ~BIT(MSR_IA32_POWER_CTL_BIT_EE);
power_ctl_ee_state = POWER_CTL_EE_ENABLE;
@@ -1296,7 +1426,7 @@ static void set_power_ctl_ee_state(bool input)
power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE);
power_ctl_ee_state = POWER_CTL_EE_DISABLE;
}
- wrmsrl(MSR_IA32_POWER_CTL, power_ctl);
+ wrmsrq(MSR_IA32_POWER_CTL, power_ctl);
mutex_unlock(&intel_pstate_driver_lock);
}
@@ -1305,7 +1435,7 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata);
static void intel_pstate_hwp_reenable(struct cpudata *cpu)
{
intel_pstate_hwp_enable(cpu);
- wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, READ_ONCE(cpu->hwp_req_cached));
+ wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, READ_ONCE(cpu->hwp_req_cached));
}
static int intel_pstate_suspend(struct cpufreq_policy *policy)
@@ -1356,9 +1486,11 @@ static void intel_pstate_update_policies(void)
cpufreq_update_policy(cpu);
}
-static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
- struct cpufreq_policy *policy)
+static void __intel_pstate_update_max_freq(struct cpufreq_policy *policy,
+ struct cpudata *cpudata)
{
+ guard(cpufreq_policy_write)(policy);
+
if (hwp_active)
intel_pstate_get_hwp_cap(cpudata);
@@ -1368,42 +1500,34 @@ static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
refresh_frequency_limits(policy);
}
-static void intel_pstate_update_limits(unsigned int cpu)
+static bool intel_pstate_update_max_freq(struct cpudata *cpudata)
{
- struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
- struct cpudata *cpudata;
+ struct cpufreq_policy *policy __free(put_cpufreq_policy);
+ policy = cpufreq_cpu_get(cpudata->cpu);
if (!policy)
- return;
+ return false;
- cpudata = all_cpu_data[cpu];
+ __intel_pstate_update_max_freq(policy, cpudata);
- __intel_pstate_update_max_freq(cpudata, policy);
+ return true;
+}
- /* Prevent the driver from being unregistered now. */
- mutex_lock(&intel_pstate_driver_lock);
+static void intel_pstate_update_limits(struct cpufreq_policy *policy)
+{
+ struct cpudata *cpudata = all_cpu_data[policy->cpu];
- cpufreq_cpu_release(policy);
+ __intel_pstate_update_max_freq(policy, cpudata);
hybrid_update_capacity(cpudata);
-
- mutex_unlock(&intel_pstate_driver_lock);
}
static void intel_pstate_update_limits_for_all(void)
{
int cpu;
- for_each_possible_cpu(cpu) {
- struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
-
- if (!policy)
- continue;
-
- __intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
-
- cpufreq_cpu_release(policy);
- }
+ for_each_possible_cpu(cpu)
+ intel_pstate_update_max_freq(all_cpu_data[cpu]);
mutex_lock(&hybrid_capacity_lock);
@@ -1706,7 +1830,7 @@ static ssize_t show_energy_efficiency(struct kobject *kobj, struct kobj_attribut
u64 power_ctl;
int enable;
- rdmsrl(MSR_IA32_POWER_CTL, power_ctl);
+ rdmsrq(MSR_IA32_POWER_CTL, power_ctl);
enable = !!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE));
return sprintf(buf, "%d\n", !enable);
}
@@ -1843,13 +1967,8 @@ static void intel_pstate_notify_work(struct work_struct *work)
{
struct cpudata *cpudata =
container_of(to_delayed_work(work), struct cpudata, hwp_notify_work);
- struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
-
- if (policy) {
- __intel_pstate_update_max_freq(cpudata, policy);
-
- cpufreq_cpu_release(policy);
+ if (intel_pstate_update_max_freq(cpudata)) {
/*
* The driver will not be unregistered while this function is
* running, so update the capacity without acquiring the driver
@@ -1858,7 +1977,7 @@ static void intel_pstate_notify_work(struct work_struct *work)
hybrid_update_capacity(cpudata);
}
- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
+ wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
}
static DEFINE_RAW_SPINLOCK(hwp_notify_lock);
@@ -1880,7 +1999,7 @@ void notify_hwp_interrupt(void)
if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE))
status_mask |= HWP_HIGHEST_PERF_CHANGE_STATUS;
- rdmsrl_safe(MSR_HWP_STATUS, &value);
+ rdmsrq_safe(MSR_HWP_STATUS, &value);
if (!(value & status_mask))
return;
@@ -1897,7 +2016,7 @@ void notify_hwp_interrupt(void)
return;
ack_intr:
- wrmsrl_safe(MSR_HWP_STATUS, 0);
+ wrmsrq_safe(MSR_HWP_STATUS, 0);
raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
}
@@ -1908,8 +2027,8 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
if (!cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY))
return;
- /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */
- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
+ /* wrmsrq_on_cpu has to be outside spinlock as this can result in IPC */
+ wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
raw_spin_lock_irq(&hwp_notify_lock);
cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask);
@@ -1936,9 +2055,9 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE))
interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ;
- /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */
- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask);
- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
+ /* wrmsrq_on_cpu has to be outside spinlock as this can result in IPC */
+ wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask);
+ wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
}
}
@@ -1977,9 +2096,9 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{
/* First disable HWP notification interrupt till we activate again */
if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
+ wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
- wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
+ wrmsrq_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
intel_pstate_enable_hwp_interrupt(cpudata);
@@ -1993,7 +2112,7 @@ static int atom_get_min_pstate(int not_used)
{
u64 value;
- rdmsrl(MSR_ATOM_CORE_RATIOS, value);
+ rdmsrq(MSR_ATOM_CORE_RATIOS, value);
return (value >> 8) & 0x7F;
}
@@ -2001,7 +2120,7 @@ static int atom_get_max_pstate(int not_used)
{
u64 value;
- rdmsrl(MSR_ATOM_CORE_RATIOS, value);
+ rdmsrq(MSR_ATOM_CORE_RATIOS, value);
return (value >> 16) & 0x7F;
}
@@ -2009,7 +2128,7 @@ static int atom_get_turbo_pstate(int not_used)
{
u64 value;
- rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value);
+ rdmsrq(MSR_ATOM_CORE_TURBO_RATIOS, value);
return value & 0x7F;
}
@@ -2044,7 +2163,7 @@ static int silvermont_get_scaling(void)
static int silvermont_freq_table[] = {
83300, 100000, 133300, 116700, 80000};
- rdmsrl(MSR_FSB_FREQ, value);
+ rdmsrq(MSR_FSB_FREQ, value);
i = value & 0x7;
WARN_ON(i > 4);
@@ -2060,7 +2179,7 @@ static int airmont_get_scaling(void)
83300, 100000, 133300, 116700, 80000,
93300, 90000, 88900, 87500};
- rdmsrl(MSR_FSB_FREQ, value);
+ rdmsrq(MSR_FSB_FREQ, value);
i = value & 0xF;
WARN_ON(i > 8);
@@ -2071,7 +2190,7 @@ static void atom_get_vid(struct cpudata *cpudata)
{
u64 value;
- rdmsrl(MSR_ATOM_CORE_VIDS, value);
+ rdmsrq(MSR_ATOM_CORE_VIDS, value);
cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
cpudata->vid.ratio = div_fp(
@@ -2079,7 +2198,7 @@ static void atom_get_vid(struct cpudata *cpudata)
int_tofp(cpudata->pstate.max_pstate -
cpudata->pstate.min_pstate));
- rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value);
+ rdmsrq(MSR_ATOM_CORE_TURBO_VIDS, value);
cpudata->vid.turbo = value & 0x7f;
}
@@ -2087,7 +2206,7 @@ static int core_get_min_pstate(int cpu)
{
u64 value;
- rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
+ rdmsrq_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
return (value >> 40) & 0xFF;
}
@@ -2095,7 +2214,7 @@ static int core_get_max_pstate_physical(int cpu)
{
u64 value;
- rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
+ rdmsrq_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
return (value >> 8) & 0xFF;
}
@@ -2109,13 +2228,13 @@ static int core_get_tdp_ratio(int cpu, u64 plat_info)
int err;
/* Get the TDP level (0, 1, 2) to get ratios */
- err = rdmsrl_safe_on_cpu(cpu, MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
+ err = rdmsrq_safe_on_cpu(cpu, MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
if (err)
return err;
/* TDP MSR are continuous starting at 0x648 */
tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03);
- err = rdmsrl_safe_on_cpu(cpu, tdp_msr, &tdp_ratio);
+ err = rdmsrq_safe_on_cpu(cpu, tdp_msr, &tdp_ratio);
if (err)
return err;
@@ -2140,7 +2259,7 @@ static int core_get_max_pstate(int cpu)
int tdp_ratio;
int err;
- rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info);
+ rdmsrq_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info);
max_pstate = (plat_info >> 8) & 0xFF;
tdp_ratio = core_get_tdp_ratio(cpu, plat_info);
@@ -2152,7 +2271,7 @@ static int core_get_max_pstate(int cpu)
return tdp_ratio;
}
- err = rdmsrl_safe_on_cpu(cpu, MSR_TURBO_ACTIVATION_RATIO, &tar);
+ err = rdmsrq_safe_on_cpu(cpu, MSR_TURBO_ACTIVATION_RATIO, &tar);
if (!err) {
int tar_levels;
@@ -2172,7 +2291,7 @@ static int core_get_turbo_pstate(int cpu)
u64 value;
int nont, ret;
- rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
+ rdmsrq_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
nont = core_get_max_pstate(cpu);
ret = (value) & 255;
if (ret <= nont)
@@ -2201,7 +2320,7 @@ static int knl_get_turbo_pstate(int cpu)
u64 value;
int nont, ret;
- rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
+ rdmsrq_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
nont = core_get_max_pstate(cpu);
ret = (((value) >> 8) & 0xFF);
if (ret <= nont)
@@ -2247,7 +2366,7 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
* the CPU being updated, so force the register update to run on the
* right CPU.
*/
- wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
+ wrmsrq_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
pstate_funcs.get_val(cpu, pstate));
}
@@ -2354,7 +2473,7 @@ static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
return;
hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
- wrmsrl(MSR_HWP_REQUEST, hwp_req);
+ wrmsrq(MSR_HWP_REQUEST, hwp_req);
cpu->last_update = cpu->sample.time;
}
@@ -2367,7 +2486,7 @@ static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
expired = time_after64(cpu->sample.time, cpu->last_update +
hwp_boost_hold_time_ns);
if (expired) {
- wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
+ wrmsrq(MSR_HWP_REQUEST, cpu->hwp_req_cached);
cpu->hwp_boost_min = 0;
}
}
@@ -2428,8 +2547,8 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
u64 tsc;
local_irq_save(flags);
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
+ rdmsrq(MSR_IA32_APERF, aperf);
+ rdmsrq(MSR_IA32_MPERF, mperf);
tsc = rdtsc();
if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
local_irq_restore(flags);
@@ -2523,7 +2642,7 @@ static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
return;
cpu->pstate.current_pstate = pstate;
- wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
+ wrmsrq(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
}
static void intel_pstate_adjust_pstate(struct cpudata *cpu)
@@ -3103,19 +3222,19 @@ static void intel_cpufreq_hwp_update(struct cpudata *cpu, u32 min, u32 max,
WRITE_ONCE(cpu->hwp_req_cached, value);
if (fast_switch)
- wrmsrl(MSR_HWP_REQUEST, value);
+ wrmsrq(MSR_HWP_REQUEST, value);
else
- wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+ wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
}
static void intel_cpufreq_perf_ctl_update(struct cpudata *cpu,
u32 target_pstate, bool fast_switch)
{
if (fast_switch)
- wrmsrl(MSR_IA32_PERF_CTL,
+ wrmsrq(MSR_IA32_PERF_CTL,
pstate_funcs.get_val(cpu, target_pstate));
else
- wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
+ wrmsrq_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
pstate_funcs.get_val(cpu, target_pstate));
}
@@ -3259,7 +3378,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
intel_pstate_get_hwp_cap(cpu);
- rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
+ rdmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
WRITE_ONCE(cpu->hwp_req_cached, value);
cpu->epp_cached = intel_pstate_get_epp(cpu, value);
@@ -3326,7 +3445,7 @@ static int intel_cpufreq_suspend(struct cpufreq_policy *policy)
* written by it may not be suitable.
*/
value &= ~HWP_DESIRED_PERF(~0L);
- wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+ wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
WRITE_ONCE(cpu->hwp_req_cached, value);
}
@@ -3576,7 +3695,7 @@ static bool __init intel_pstate_platform_pwr_mgmt_exists(void)
id = x86_match_cpu(intel_pstate_cpu_oob_ids);
if (id) {
- rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
+ rdmsrq(MSR_MISC_PWR_MGMT, misc_pwr);
if (misc_pwr & BITMASK_OOB) {
pr_debug("Bit 8 or 18 in the MISC_PWR_MGMT MSR set\n");
pr_debug("P states are controlled in Out of Band mode by the firmware/hardware\n");
@@ -3632,7 +3751,7 @@ static bool intel_pstate_hwp_is_enabled(void)
{
u64 value;
- rdmsrl(MSR_PM_ENABLE, value);
+ rdmsrq(MSR_PM_ENABLE, value);
return !!(value & 0x1);
}
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index 68ccd73c8129..ba0e08c8486a 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -136,7 +136,7 @@ static void do_longhaul1(unsigned int mults_index)
{
union msr_bcr2 bcr2;
- rdmsrl(MSR_VIA_BCR2, bcr2.val);
+ rdmsrq(MSR_VIA_BCR2, bcr2.val);
/* Enable software clock multiplier */
bcr2.bits.ESOFTBF = 1;
bcr2.bits.CLOCKMUL = mults_index & 0xff;
@@ -144,16 +144,16 @@ static void do_longhaul1(unsigned int mults_index)
/* Sync to timer tick */
safe_halt();
/* Change frequency on next halt or sleep */
- wrmsrl(MSR_VIA_BCR2, bcr2.val);
+ wrmsrq(MSR_VIA_BCR2, bcr2.val);
/* Invoke transition */
ACPI_FLUSH_CPU_CACHE();
halt();
/* Disable software clock multiplier */
local_irq_disable();
- rdmsrl(MSR_VIA_BCR2, bcr2.val);
+ rdmsrq(MSR_VIA_BCR2, bcr2.val);
bcr2.bits.ESOFTBF = 0;
- wrmsrl(MSR_VIA_BCR2, bcr2.val);
+ wrmsrq(MSR_VIA_BCR2, bcr2.val);
}
/* For processor with Longhaul MSR */
@@ -164,7 +164,7 @@ static void do_powersaver(int cx_address, unsigned int mults_index,
union msr_longhaul longhaul;
u32 t;
- rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ rdmsrq(MSR_VIA_LONGHAUL, longhaul.val);
/* Setup new frequency */
if (!revid_errata)
longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
@@ -180,7 +180,7 @@ static void do_powersaver(int cx_address, unsigned int mults_index,
/* Raise voltage if necessary */
if (can_scale_voltage && dir) {
longhaul.bits.EnableSoftVID = 1;
- wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ wrmsrq(MSR_VIA_LONGHAUL, longhaul.val);
/* Change voltage */
if (!cx_address) {
ACPI_FLUSH_CPU_CACHE();
@@ -194,12 +194,12 @@ static void do_powersaver(int cx_address, unsigned int mults_index,
t = inl(acpi_gbl_FADT.xpm_timer_block.address);
}
longhaul.bits.EnableSoftVID = 0;
- wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ wrmsrq(MSR_VIA_LONGHAUL, longhaul.val);
}
/* Change frequency on next halt or sleep */
longhaul.bits.EnableSoftBusRatio = 1;
- wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ wrmsrq(MSR_VIA_LONGHAUL, longhaul.val);
if (!cx_address) {
ACPI_FLUSH_CPU_CACHE();
halt();
@@ -212,12 +212,12 @@ static void do_powersaver(int cx_address, unsigned int mults_index,
}
/* Disable bus ratio bit */
longhaul.bits.EnableSoftBusRatio = 0;
- wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ wrmsrq(MSR_VIA_LONGHAUL, longhaul.val);
/* Reduce voltage if necessary */
if (can_scale_voltage && !dir) {
longhaul.bits.EnableSoftVID = 1;
- wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ wrmsrq(MSR_VIA_LONGHAUL, longhaul.val);
/* Change voltage */
if (!cx_address) {
ACPI_FLUSH_CPU_CACHE();
@@ -231,7 +231,7 @@ static void do_powersaver(int cx_address, unsigned int mults_index,
t = inl(acpi_gbl_FADT.xpm_timer_block.address);
}
longhaul.bits.EnableSoftVID = 0;
- wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ wrmsrq(MSR_VIA_LONGHAUL, longhaul.val);
}
}
@@ -534,7 +534,7 @@ static void longhaul_setup_voltagescaling(void)
unsigned int j, speed, pos, kHz_step, numvscales;
int min_vid_speed;
- rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ rdmsrq(MSR_VIA_LONGHAUL, longhaul.val);
if (!(longhaul.bits.RevisionID & 1)) {
pr_info("Voltage scaling not supported by CPU\n");
return;
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index fb2197dc170f..31039330a3ba 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c
@@ -219,13 +219,13 @@ static void change_FID(int fid)
{
union msr_fidvidctl fidvidctl;
- rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+ rdmsrq(MSR_K7_FID_VID_CTL, fidvidctl.val);
if (fidvidctl.bits.FID != fid) {
fidvidctl.bits.SGTC = latency;
fidvidctl.bits.FID = fid;
fidvidctl.bits.VIDC = 0;
fidvidctl.bits.FIDC = 1;
- wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+ wrmsrq(MSR_K7_FID_VID_CTL, fidvidctl.val);
}
}
@@ -234,13 +234,13 @@ static void change_VID(int vid)
{
union msr_fidvidctl fidvidctl;
- rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+ rdmsrq(MSR_K7_FID_VID_CTL, fidvidctl.val);
if (fidvidctl.bits.VID != vid) {
fidvidctl.bits.SGTC = latency;
fidvidctl.bits.VID = vid;
fidvidctl.bits.FIDC = 0;
fidvidctl.bits.VIDC = 1;
- wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+ wrmsrq(MSR_K7_FID_VID_CTL, fidvidctl.val);
}
}
@@ -260,7 +260,7 @@ static int powernow_target(struct cpufreq_policy *policy, unsigned int index)
fid = powernow_table[index].driver_data & 0xFF;
vid = (powernow_table[index].driver_data & 0xFF00) >> 8;
- rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+ rdmsrq(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
cfid = fidvidstatus.bits.CFID;
freqs.old = fsb * fid_codes[cfid] / 10;
@@ -557,7 +557,7 @@ static unsigned int powernow_get(unsigned int cpu)
if (cpu)
return 0;
- rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+ rdmsrq(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
cfid = fidvidstatus.bits.CFID;
return fsb * fid_codes[cfid] / 10;
@@ -598,7 +598,7 @@ static int powernow_cpu_init(struct cpufreq_policy *policy)
if (policy->cpu != 0)
return -ENODEV;
- rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+ rdmsrq(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
recalibrate_cpu_khz();
diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c
index 103d2519dff7..b360f03a116f 100644
--- a/drivers/cpufreq/sc520_freq.c
+++ b/drivers/cpufreq/sc520_freq.c
@@ -21,7 +21,6 @@
#include <linux/io.h>
#include <asm/cpu_device_id.h>
-#include <asm/msr.h>
#define MMCR_BASE 0xfffef000 /* The default base address */
#define OFFS_CPUCTL 0x2 /* CPU Control Register */
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index 5fb5228f6bf1..2041f59116ce 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -43,7 +43,7 @@ static int psci_pd_power_off(struct generic_pm_domain *pd)
/* OSI mode is enabled, set the corresponding domain state. */
pd_state = state->data;
- psci_set_domain_state(*pd_state);
+ psci_set_domain_state(pd, pd->state_idx, *pd_state);
return 0;
}
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index b46a83f5ffe4..3c2756a539c4 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -16,7 +16,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/platform_device.h>
+#include <linux/device/faux.h>
#include <linux/psci.h>
#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
@@ -36,19 +36,30 @@ struct psci_cpuidle_data {
struct device *dev;
};
+struct psci_cpuidle_domain_state {
+ struct generic_pm_domain *pd;
+ unsigned int state_idx;
+ u32 state;
+};
+
static DEFINE_PER_CPU_READ_MOSTLY(struct psci_cpuidle_data, psci_cpuidle_data);
-static DEFINE_PER_CPU(u32, domain_state);
+static DEFINE_PER_CPU(struct psci_cpuidle_domain_state, psci_domain_state);
static bool psci_cpuidle_use_syscore;
static bool psci_cpuidle_use_cpuhp;
-void psci_set_domain_state(u32 state)
+void psci_set_domain_state(struct generic_pm_domain *pd, unsigned int state_idx,
+ u32 state)
{
- __this_cpu_write(domain_state, state);
+ struct psci_cpuidle_domain_state *ds = this_cpu_ptr(&psci_domain_state);
+
+ ds->pd = pd;
+ ds->state_idx = state_idx;
+ ds->state = state;
}
-static inline u32 psci_get_domain_state(void)
+static inline void psci_clear_domain_state(void)
{
- return __this_cpu_read(domain_state);
+ __this_cpu_write(psci_domain_state.state, 0);
}
static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
@@ -58,7 +69,8 @@ static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data);
u32 *states = data->psci_states;
struct device *pd_dev = data->dev;
- u32 state;
+ struct psci_cpuidle_domain_state *ds;
+ u32 state = states[idx];
int ret;
ret = cpu_pm_enter();
@@ -71,9 +83,9 @@ static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
else
pm_runtime_put_sync_suspend(pd_dev);
- state = psci_get_domain_state();
- if (!state)
- state = states[idx];
+ ds = this_cpu_ptr(&psci_domain_state);
+ if (ds->state)
+ state = ds->state;
trace_psci_domain_idle_enter(dev->cpu, state, s2idle);
ret = psci_cpu_suspend_enter(state) ? -1 : idx;
@@ -86,8 +98,12 @@ static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
cpu_pm_exit();
+ /* Correct domain-idlestate statistics if we failed to enter. */
+ if (ret == -1 && ds->state)
+ pm_genpd_inc_rejected(ds->pd, ds->state_idx);
+
/* Clear the domain state to start fresh when back from idle. */
- psci_set_domain_state(0);
+ psci_clear_domain_state();
return ret;
}
@@ -121,7 +137,7 @@ static int psci_idle_cpuhp_down(unsigned int cpu)
if (pd_dev) {
pm_runtime_put_sync(pd_dev);
/* Clear domain state to start fresh at next online. */
- psci_set_domain_state(0);
+ psci_clear_domain_state();
}
return 0;
@@ -147,7 +163,7 @@ static void psci_idle_syscore_switch(bool suspend)
/* Clear domain state to re-start fresh. */
if (!cleared) {
- psci_set_domain_state(0);
+ psci_clear_domain_state();
cleared = true;
}
}
@@ -407,14 +423,14 @@ deinit:
* to register cpuidle driver then rollback to cancel all CPUs
* registration.
*/
-static int psci_cpuidle_probe(struct platform_device *pdev)
+static int psci_cpuidle_probe(struct faux_device *fdev)
{
int cpu, ret;
struct cpuidle_driver *drv;
struct cpuidle_device *dev;
for_each_present_cpu(cpu) {
- ret = psci_idle_init_cpu(&pdev->dev, cpu);
+ ret = psci_idle_init_cpu(&fdev->dev, cpu);
if (ret)
goto out_fail;
}
@@ -434,26 +450,37 @@ out_fail:
return ret;
}
-static struct platform_driver psci_cpuidle_driver = {
+static struct faux_device_ops psci_cpuidle_ops = {
.probe = psci_cpuidle_probe,
- .driver = {
- .name = "psci-cpuidle",
- },
};
+static bool __init dt_idle_state_present(void)
+{
+ struct device_node *cpu_node __free(device_node);
+ struct device_node *state_node __free(device_node);
+
+ cpu_node = of_cpu_device_node_get(cpumask_first(cpu_possible_mask));
+ if (!cpu_node)
+ return false;
+
+ state_node = of_get_cpu_state_node(cpu_node, 0);
+ if (!state_node)
+ return false;
+
+ return !!of_match_node(psci_idle_state_match, state_node);
+}
+
static int __init psci_idle_init(void)
{
- struct platform_device *pdev;
- int ret;
+ struct faux_device *fdev;
- ret = platform_driver_register(&psci_cpuidle_driver);
- if (ret)
- return ret;
+ if (!dt_idle_state_present())
+ return 0;
- pdev = platform_device_register_simple("psci-cpuidle", -1, NULL, 0);
- if (IS_ERR(pdev)) {
- platform_driver_unregister(&psci_cpuidle_driver);
- return PTR_ERR(pdev);
+ fdev = faux_device_create("psci-cpuidle", NULL, &psci_cpuidle_ops);
+ if (!fdev) {
+ pr_err("Failed to create psci-cpuidle device\n");
+ return -ENODEV;
}
return 0;
diff --git a/drivers/cpuidle/cpuidle-psci.h b/drivers/cpuidle/cpuidle-psci.h
index ef004ec7a7c5..d29cbd796cd5 100644
--- a/drivers/cpuidle/cpuidle-psci.h
+++ b/drivers/cpuidle/cpuidle-psci.h
@@ -4,8 +4,10 @@
#define __CPUIDLE_PSCI_H
struct device_node;
+struct generic_pm_domain;
-void psci_set_domain_state(u32 state);
+void psci_set_domain_state(struct generic_pm_domain *pd, unsigned int state_idx,
+ u32 state);
int psci_dt_parse_state_node(struct device_node *np, u32 *state);
#endif /* __CPUIDLE_PSCI_H */
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 39aa0aea61c6..52d5d26fc7c6 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -255,7 +255,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
*/
data->next_timer_ns = KTIME_MAX;
delta_tick = TICK_NSEC / 2;
- data->bucket = which_bucket(KTIME_MAX);
+ data->bucket = BUCKETS - 1;
}
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 8fe5e1b47ef9..bfa55c1eab5b 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -19,7 +19,7 @@
*
* Of course, non-timer wakeup sources are more important in some use cases,
* but even then it is generally unnecessary to consider idle duration values
- * greater than the time time till the next timer event, referred as the sleep
+ * greater than the time till the next timer event, referred as the sleep
* length in what follows, because the closest timer will ultimately wake up the
* CPU anyway unless it is woken up earlier.
*
@@ -311,7 +311,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
struct cpuidle_state *s = &drv->states[i];
/*
- * Update the sums of idle state mertics for all of the states
+ * Update the sums of idle state metrics for all of the states
* shallower than the current one.
*/
intercept_sum += prev_bin->intercepts;
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 47082782008a..5686369779be 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -530,13 +530,6 @@ source "drivers/crypto/cavium/nitrox/Kconfig"
source "drivers/crypto/marvell/Kconfig"
source "drivers/crypto/intel/Kconfig"
-config CRYPTO_DEV_CAVIUM_ZIP
- tristate "Cavium ZIP driver"
- depends on PCI && 64BIT && (ARM64 || COMPILE_TEST)
- help
- Select this option if you want to enable compression/decompression
- acceleration on Cavium's ARM based SoCs
-
config CRYPTO_DEV_QCE
tristate "Qualcomm crypto engine accelerator"
depends on ARCH_QCOM || COMPILE_TEST
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index c97f0ebc55ec..22eadcc8f4a2 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -8,12 +8,9 @@ obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_I2C) += atmel-i2c.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA204A) += atmel-sha204a.o
-obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/
obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
obj-$(CONFIG_CRYPTO_DEV_CCREE) += ccree/
obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
-obj-$(CONFIG_CRYPTO_DEV_CPT) += cavium/cpt/
-obj-$(CONFIG_CRYPTO_DEV_NITROX) += cavium/nitrox/
obj-$(CONFIG_CRYPTO_DEV_EXYNOS_RNG) += exynos-rng.o
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_COMMON) += caam/
obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
@@ -50,3 +47,4 @@ obj-y += hisilicon/
obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic/
obj-y += intel/
obj-y += starfive/
+obj-y += cavium/
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
index 19b7fb4a93e8..f9cf00d690e2 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -33,22 +33,30 @@ static int sun8i_ce_cipher_need_fallback(struct skcipher_request *areq)
if (sg_nents_for_len(areq->src, areq->cryptlen) > MAX_SG ||
sg_nents_for_len(areq->dst, areq->cryptlen) > MAX_SG) {
- algt->stat_fb_maxsg++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_maxsg++;
+
return true;
}
if (areq->cryptlen < crypto_skcipher_ivsize(tfm)) {
- algt->stat_fb_leniv++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_leniv++;
+
return true;
}
if (areq->cryptlen == 0) {
- algt->stat_fb_len0++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_len0++;
+
return true;
}
if (areq->cryptlen % 16) {
- algt->stat_fb_mod16++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_mod16++;
+
return true;
}
@@ -56,12 +64,16 @@ static int sun8i_ce_cipher_need_fallback(struct skcipher_request *areq)
sg = areq->src;
while (sg) {
if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
- algt->stat_fb_srcali++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_srcali++;
+
return true;
}
todo = min(len, sg->length);
if (todo % 4) {
- algt->stat_fb_srclen++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_srclen++;
+
return true;
}
len -= todo;
@@ -72,12 +84,16 @@ static int sun8i_ce_cipher_need_fallback(struct skcipher_request *areq)
sg = areq->dst;
while (sg) {
if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
- algt->stat_fb_dstali++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_dstali++;
+
return true;
}
todo = min(len, sg->length);
if (todo % 4) {
- algt->stat_fb_dstlen++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_dstlen++;
+
return true;
}
len -= todo;
@@ -100,9 +116,7 @@ static int sun8i_ce_cipher_fallback(struct skcipher_request *areq)
algt = container_of(alg, struct sun8i_ce_alg_template,
alg.skcipher.base);
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
algt->stat_fb++;
-#endif
}
skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
@@ -146,9 +160,8 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
rctx->op_dir, areq->iv, crypto_skcipher_ivsize(tfm),
op->keylen);
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
- algt->stat_req++;
-#endif
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_req++;
flow = rctx->flow;
@@ -275,13 +288,16 @@ theend_sgs:
} else {
if (nr_sgs > 0)
dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
- dma_unmap_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE);
+
+ if (nr_sgd > 0)
+ dma_unmap_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE);
}
theend_iv:
if (areq->iv && ivsize > 0) {
- if (rctx->addr_iv)
+ if (!dma_mapping_error(ce->dev, rctx->addr_iv))
dma_unmap_single(ce->dev, rctx->addr_iv, rctx->ivlen, DMA_TO_DEVICE);
+
offset = areq->cryptlen - ivsize;
if (rctx->op_dir & CE_DECRYPTION) {
memcpy(areq->iv, chan->backup_iv, ivsize);
@@ -434,17 +450,17 @@ int sun8i_ce_cipher_init(struct crypto_tfm *tfm)
crypto_skcipher_set_reqsize(sktfm, sizeof(struct sun8i_cipher_req_ctx) +
crypto_skcipher_reqsize(op->fallback_tfm));
- memcpy(algt->fbname,
- crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)),
- CRYPTO_MAX_ALG_NAME);
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ memcpy(algt->fbname,
+ crypto_skcipher_driver_name(op->fallback_tfm),
+ CRYPTO_MAX_ALG_NAME);
- err = pm_runtime_get_sync(op->ce->dev);
+ err = pm_runtime_resume_and_get(op->ce->dev);
if (err < 0)
goto error_pm;
return 0;
error_pm:
- pm_runtime_put_noidle(op->ce->dev);
crypto_free_skcipher(op->fallback_tfm);
return err;
}
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
index ec1ffda9ea32..658f520cee0c 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
@@ -832,13 +832,12 @@ static int sun8i_ce_pm_init(struct sun8i_ce_dev *ce)
err = pm_runtime_set_suspended(ce->dev);
if (err)
return err;
- pm_runtime_enable(ce->dev);
- return err;
-}
-static void sun8i_ce_pm_exit(struct sun8i_ce_dev *ce)
-{
- pm_runtime_disable(ce->dev);
+ err = devm_pm_runtime_enable(ce->dev);
+ if (err)
+ return err;
+
+ return 0;
}
static int sun8i_ce_get_clks(struct sun8i_ce_dev *ce)
@@ -1041,7 +1040,7 @@ static int sun8i_ce_probe(struct platform_device *pdev)
"sun8i-ce-ns", ce);
if (err) {
dev_err(ce->dev, "Cannot request CryptoEngine Non-secure IRQ (err=%d)\n", err);
- goto error_irq;
+ goto error_pm;
}
err = sun8i_ce_register_algs(ce);
@@ -1082,8 +1081,6 @@ static int sun8i_ce_probe(struct platform_device *pdev)
return 0;
error_alg:
sun8i_ce_unregister_algs(ce);
-error_irq:
- sun8i_ce_pm_exit(ce);
error_pm:
sun8i_ce_free_chanlist(ce, MAXFLOW - 1);
return err;
@@ -1104,8 +1101,6 @@ static void sun8i_ce_remove(struct platform_device *pdev)
#endif
sun8i_ce_free_chanlist(ce, MAXFLOW - 1);
-
- sun8i_ce_pm_exit(ce);
}
static const struct of_device_id sun8i_ce_crypto_of_match_table[] = {
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
index 6072dd9f390b..bef44f350167 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
@@ -23,6 +23,18 @@
#include <linux/string.h>
#include "sun8i-ce.h"
+static void sun8i_ce_hash_stat_fb_inc(struct crypto_ahash *tfm)
+{
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG)) {
+ struct sun8i_ce_alg_template *algt __maybe_unused;
+ struct ahash_alg *alg = crypto_ahash_alg(tfm);
+
+ algt = container_of(alg, struct sun8i_ce_alg_template,
+ alg.hash.base);
+ algt->stat_fb++;
+ }
+}
+
int sun8i_ce_hash_init_tfm(struct crypto_ahash *tfm)
{
struct sun8i_ce_hash_tfm_ctx *op = crypto_ahash_ctx(tfm);
@@ -48,15 +60,16 @@ int sun8i_ce_hash_init_tfm(struct crypto_ahash *tfm)
sizeof(struct sun8i_ce_hash_reqctx) +
crypto_ahash_reqsize(op->fallback_tfm));
- memcpy(algt->fbname, crypto_ahash_driver_name(op->fallback_tfm),
- CRYPTO_MAX_ALG_NAME);
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ memcpy(algt->fbname,
+ crypto_ahash_driver_name(op->fallback_tfm),
+ CRYPTO_MAX_ALG_NAME);
- err = pm_runtime_get_sync(op->ce->dev);
+ err = pm_runtime_resume_and_get(op->ce->dev);
if (err < 0)
goto error_pm;
return 0;
error_pm:
- pm_runtime_put_noidle(op->ce->dev);
crypto_free_ahash(op->fallback_tfm);
return err;
}
@@ -78,7 +91,9 @@ int sun8i_ce_hash_init(struct ahash_request *areq)
memset(rctx, 0, sizeof(struct sun8i_ce_hash_reqctx));
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
return crypto_ahash_init(&rctx->fallback_req);
}
@@ -90,7 +105,9 @@ int sun8i_ce_hash_export(struct ahash_request *areq, void *out)
struct sun8i_ce_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
return crypto_ahash_export(&rctx->fallback_req, out);
}
@@ -102,7 +119,9 @@ int sun8i_ce_hash_import(struct ahash_request *areq, const void *in)
struct sun8i_ce_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
return crypto_ahash_import(&rctx->fallback_req, in);
}
@@ -113,21 +132,13 @@ int sun8i_ce_hash_final(struct ahash_request *areq)
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
struct sun8i_ce_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
- ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.result = areq->result;
-
- if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG)) {
- struct sun8i_ce_alg_template *algt __maybe_unused;
- struct ahash_alg *alg = crypto_ahash_alg(tfm);
+ sun8i_ce_hash_stat_fb_inc(tfm);
- algt = container_of(alg, struct sun8i_ce_alg_template,
- alg.hash.base);
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
- algt->stat_fb++;
-#endif
- }
+ ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, NULL, areq->result, 0);
return crypto_ahash_final(&rctx->fallback_req);
}
@@ -139,10 +150,10 @@ int sun8i_ce_hash_update(struct ahash_request *areq)
struct sun8i_ce_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.nbytes = areq->nbytes;
- rctx->fallback_req.src = areq->src;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, areq->src, NULL, areq->nbytes);
return crypto_ahash_update(&rctx->fallback_req);
}
@@ -153,24 +164,14 @@ int sun8i_ce_hash_finup(struct ahash_request *areq)
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
struct sun8i_ce_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
- ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
-
- rctx->fallback_req.nbytes = areq->nbytes;
- rctx->fallback_req.src = areq->src;
- rctx->fallback_req.result = areq->result;
+ sun8i_ce_hash_stat_fb_inc(tfm);
- if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG)) {
- struct sun8i_ce_alg_template *algt __maybe_unused;
- struct ahash_alg *alg = crypto_ahash_alg(tfm);
-
- algt = container_of(alg, struct sun8i_ce_alg_template,
- alg.hash.base);
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
- algt->stat_fb++;
-#endif
- }
+ ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, areq->src, areq->result,
+ areq->nbytes);
return crypto_ahash_finup(&rctx->fallback_req);
}
@@ -181,24 +182,14 @@ static int sun8i_ce_hash_digest_fb(struct ahash_request *areq)
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
struct sun8i_ce_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
- ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
-
- rctx->fallback_req.nbytes = areq->nbytes;
- rctx->fallback_req.src = areq->src;
- rctx->fallback_req.result = areq->result;
-
- if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG)) {
- struct sun8i_ce_alg_template *algt __maybe_unused;
- struct ahash_alg *alg = crypto_ahash_alg(tfm);
+ sun8i_ce_hash_stat_fb_inc(tfm);
- algt = container_of(alg, struct sun8i_ce_alg_template,
- alg.hash.base);
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
- algt->stat_fb++;
-#endif
- }
+ ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, areq->src, areq->result,
+ areq->nbytes);
return crypto_ahash_digest(&rctx->fallback_req);
}
@@ -213,22 +204,30 @@ static bool sun8i_ce_hash_need_fallback(struct ahash_request *areq)
algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash.base);
if (areq->nbytes == 0) {
- algt->stat_fb_len0++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_len0++;
+
return true;
}
/* we need to reserve one SG for padding one */
if (sg_nents_for_len(areq->src, areq->nbytes) > MAX_SG - 1) {
- algt->stat_fb_maxsg++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_maxsg++;
+
return true;
}
sg = areq->src;
while (sg) {
if (sg->length % 4) {
- algt->stat_fb_srclen++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_srclen++;
+
return true;
}
if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
- algt->stat_fb_srcali++;
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_fb_srcali++;
+
return true;
}
sg = sg_next(sg);
@@ -244,21 +243,11 @@ int sun8i_ce_hash_digest(struct ahash_request *areq)
struct sun8i_ce_alg_template *algt;
struct sun8i_ce_dev *ce;
struct crypto_engine *engine;
- struct scatterlist *sg;
- int nr_sgs, e, i;
+ int e;
if (sun8i_ce_hash_need_fallback(areq))
return sun8i_ce_hash_digest_fb(areq);
- nr_sgs = sg_nents_for_len(areq->src, areq->nbytes);
- if (nr_sgs > MAX_SG - 1)
- return sun8i_ce_hash_digest_fb(areq);
-
- for_each_sg(areq->src, sg, nr_sgs, i) {
- if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32)))
- return sun8i_ce_hash_digest_fb(areq);
- }
-
algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash.base);
ce = algt->ce;
@@ -343,9 +332,8 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
u32 common;
u64 byte_count;
__le32 *bf;
- void *buf = NULL;
+ void *buf, *result;
int j, i, todo;
- void *result = NULL;
u64 bs;
int digestsize;
dma_addr_t addr_res, addr_pad;
@@ -365,22 +353,22 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
buf = kcalloc(2, bs, GFP_KERNEL | GFP_DMA);
if (!buf) {
err = -ENOMEM;
- goto theend;
+ goto err_out;
}
bf = (__le32 *)buf;
result = kzalloc(digestsize, GFP_KERNEL | GFP_DMA);
if (!result) {
err = -ENOMEM;
- goto theend;
+ goto err_free_buf;
}
flow = rctx->flow;
chan = &ce->chanlist[flow];
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
- algt->stat_req++;
-#endif
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG))
+ algt->stat_req++;
+
dev_dbg(ce->dev, "%s %s len=%d\n", __func__, crypto_tfm_alg_name(areq->base.tfm), areq->nbytes);
cet = chan->tl;
@@ -398,7 +386,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
if (nr_sgs <= 0 || nr_sgs > MAX_SG) {
dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
err = -EINVAL;
- goto theend;
+ goto err_free_result;
}
len = areq->nbytes;
@@ -411,7 +399,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
if (len > 0) {
dev_err(ce->dev, "remaining len %d\n", len);
err = -EINVAL;
- goto theend;
+ goto err_unmap_src;
}
addr_res = dma_map_single(ce->dev, result, digestsize, DMA_FROM_DEVICE);
cet->t_dst[0].addr = desc_addr_val_le32(ce, addr_res);
@@ -419,7 +407,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
if (dma_mapping_error(ce->dev, addr_res)) {
dev_err(ce->dev, "DMA map dest\n");
err = -EINVAL;
- goto theend;
+ goto err_unmap_src;
}
byte_count = areq->nbytes;
@@ -441,7 +429,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
}
if (!j) {
err = -EINVAL;
- goto theend;
+ goto err_unmap_result;
}
addr_pad = dma_map_single(ce->dev, buf, j * 4, DMA_TO_DEVICE);
@@ -450,7 +438,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
if (dma_mapping_error(ce->dev, addr_pad)) {
dev_err(ce->dev, "DMA error on padding SG\n");
err = -EINVAL;
- goto theend;
+ goto err_unmap_result;
}
if (ce->variant->hash_t_dlen_in_bits)
@@ -463,16 +451,25 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
err = sun8i_ce_run_task(ce, flow, crypto_ahash_alg_name(tfm));
dma_unmap_single(ce->dev, addr_pad, j * 4, DMA_TO_DEVICE);
- dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
+
+err_unmap_result:
dma_unmap_single(ce->dev, addr_res, digestsize, DMA_FROM_DEVICE);
+ if (!err)
+ memcpy(areq->result, result, algt->alg.hash.base.halg.digestsize);
+err_unmap_src:
+ dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
- memcpy(areq->result, result, algt->alg.hash.base.halg.digestsize);
-theend:
- kfree(buf);
+err_free_result:
kfree(result);
+
+err_free_buf:
+ kfree(buf);
+
+err_out:
local_bh_disable();
crypto_finalize_hash_request(engine, breq, err);
local_bh_enable();
+
return 0;
}
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
index 3b5c2af013d0..83df4d719053 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
@@ -308,8 +308,8 @@ struct sun8i_ce_hash_tfm_ctx {
* @flow: the flow to use for this request
*/
struct sun8i_ce_hash_reqctx {
- struct ahash_request fallback_req;
int flow;
+ struct ahash_request fallback_req; // keep at the end
};
/*
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 9b9605ce8ee6..8831bcb230c2 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -141,7 +141,7 @@ static int sun8i_ss_setup_ivs(struct skcipher_request *areq)
/* we need to copy all IVs from source in case DMA is bi-directionnal */
while (sg && len) {
- if (sg_dma_len(sg) == 0) {
+ if (sg->length == 0) {
sg = sg_next(sg);
continue;
}
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
index 753f67a36dc5..8bc08089f044 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
@@ -150,7 +150,9 @@ int sun8i_ss_hash_init(struct ahash_request *areq)
memset(rctx, 0, sizeof(struct sun8i_ss_hash_reqctx));
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
return crypto_ahash_init(&rctx->fallback_req);
}
@@ -162,7 +164,9 @@ int sun8i_ss_hash_export(struct ahash_request *areq, void *out)
struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
return crypto_ahash_export(&rctx->fallback_req, out);
}
@@ -174,7 +178,9 @@ int sun8i_ss_hash_import(struct ahash_request *areq, const void *in)
struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
return crypto_ahash_import(&rctx->fallback_req, in);
}
@@ -186,9 +192,10 @@ int sun8i_ss_hash_final(struct ahash_request *areq)
struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.result = areq->result;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, NULL, areq->result, 0);
if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG)) {
struct ahash_alg *alg = crypto_ahash_alg(tfm);
@@ -212,10 +219,10 @@ int sun8i_ss_hash_update(struct ahash_request *areq)
struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.nbytes = areq->nbytes;
- rctx->fallback_req.src = areq->src;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, areq->src, NULL, areq->nbytes);
return crypto_ahash_update(&rctx->fallback_req);
}
@@ -227,12 +234,11 @@ int sun8i_ss_hash_finup(struct ahash_request *areq)
struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
-
- rctx->fallback_req.nbytes = areq->nbytes;
- rctx->fallback_req.src = areq->src;
- rctx->fallback_req.result = areq->result;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, areq->src, areq->result,
+ areq->nbytes);
if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG)) {
struct ahash_alg *alg = crypto_ahash_alg(tfm);
@@ -256,12 +262,11 @@ static int sun8i_ss_hash_digest_fb(struct ahash_request *areq)
struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
-
- rctx->fallback_req.nbytes = areq->nbytes;
- rctx->fallback_req.src = areq->src;
- rctx->fallback_req.result = areq->result;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, areq->src, areq->result,
+ areq->nbytes);
if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG)) {
struct ahash_alg *alg = crypto_ahash_alg(tfm);
diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c
index e0af611a95d8..38e8a61e9166 100644
--- a/drivers/crypto/amcc/crypto4xx_alg.c
+++ b/drivers/crypto/amcc/crypto4xx_alg.c
@@ -12,9 +12,6 @@
#include <linux/interrupt.h>
#include <linux/spinlock_types.h>
#include <linux/scatterlist.h>
-#include <linux/crypto.h>
-#include <linux/hash.h>
-#include <crypto/internal/hash.h>
#include <linux/dma-mapping.h>
#include <crypto/algapi.h>
#include <crypto/aead.h>
@@ -72,7 +69,7 @@ static inline int crypto4xx_crypt(struct skcipher_request *req,
{
struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
- __le32 iv[AES_IV_SIZE];
+ __le32 iv[AES_IV_SIZE / 4];
if (check_blocksize && !IS_ALIGNED(req->cryptlen, AES_BLOCK_SIZE))
return -EINVAL;
@@ -429,7 +426,7 @@ static int crypto4xx_crypt_aes_ccm(struct aead_request *req, bool decrypt)
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
struct crypto4xx_aead_reqctx *rctx = aead_request_ctx(req);
struct crypto_aead *aead = crypto_aead_reqtfm(req);
- __le32 iv[16];
+ __le32 iv[4];
u32 tmp_sa[SA_AES128_CCM_LEN + 4];
struct dynamic_sa_ctl *sa = (struct dynamic_sa_ctl *)tmp_sa;
unsigned int len = req->cryptlen;
@@ -602,106 +599,3 @@ int crypto4xx_decrypt_aes_gcm(struct aead_request *req)
{
return crypto4xx_crypt_aes_gcm(req, true);
}
-
-/*
- * HASH SHA1 Functions
- */
-static int crypto4xx_hash_alg_init(struct crypto_tfm *tfm,
- unsigned int sa_len,
- unsigned char ha,
- unsigned char hm)
-{
- struct crypto_alg *alg = tfm->__crt_alg;
- struct crypto4xx_alg *my_alg;
- struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm);
- struct dynamic_sa_hash160 *sa;
- int rc;
-
- my_alg = container_of(__crypto_ahash_alg(alg), struct crypto4xx_alg,
- alg.u.hash);
- ctx->dev = my_alg->dev;
-
- /* Create SA */
- if (ctx->sa_in || ctx->sa_out)
- crypto4xx_free_sa(ctx);
-
- rc = crypto4xx_alloc_sa(ctx, sa_len);
- if (rc)
- return rc;
-
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct crypto4xx_ctx));
- sa = (struct dynamic_sa_hash160 *)ctx->sa_in;
- set_dynamic_sa_command_0(&sa->ctrl, SA_SAVE_HASH, SA_NOT_SAVE_IV,
- SA_NOT_LOAD_HASH, SA_LOAD_IV_FROM_SA,
- SA_NO_HEADER_PROC, ha, SA_CIPHER_ALG_NULL,
- SA_PAD_TYPE_ZERO, SA_OP_GROUP_BASIC,
- SA_OPCODE_HASH, DIR_INBOUND);
- set_dynamic_sa_command_1(&sa->ctrl, 0, SA_HASH_MODE_HASH,
- CRYPTO_FEEDBACK_MODE_NO_FB, SA_EXTENDED_SN_OFF,
- SA_SEQ_MASK_OFF, SA_MC_ENABLE,
- SA_NOT_COPY_PAD, SA_NOT_COPY_PAYLOAD,
- SA_NOT_COPY_HDR);
- /* Need to zero hash digest in SA */
- memset(sa->inner_digest, 0, sizeof(sa->inner_digest));
- memset(sa->outer_digest, 0, sizeof(sa->outer_digest));
-
- return 0;
-}
-
-int crypto4xx_hash_init(struct ahash_request *req)
-{
- struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
- int ds;
- struct dynamic_sa_ctl *sa;
-
- sa = ctx->sa_in;
- ds = crypto_ahash_digestsize(
- __crypto_ahash_cast(req->base.tfm));
- sa->sa_command_0.bf.digest_len = ds >> 2;
- sa->sa_command_0.bf.load_hash_state = SA_LOAD_HASH_FROM_SA;
-
- return 0;
-}
-
-int crypto4xx_hash_update(struct ahash_request *req)
-{
- struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
- struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
- struct scatterlist dst;
- unsigned int ds = crypto_ahash_digestsize(ahash);
-
- sg_init_one(&dst, req->result, ds);
-
- return crypto4xx_build_pd(&req->base, ctx, req->src, &dst,
- req->nbytes, NULL, 0, ctx->sa_in,
- ctx->sa_len, 0, NULL);
-}
-
-int crypto4xx_hash_final(struct ahash_request *req)
-{
- return 0;
-}
-
-int crypto4xx_hash_digest(struct ahash_request *req)
-{
- struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
- struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
- struct scatterlist dst;
- unsigned int ds = crypto_ahash_digestsize(ahash);
-
- sg_init_one(&dst, req->result, ds);
-
- return crypto4xx_build_pd(&req->base, ctx, req->src, &dst,
- req->nbytes, NULL, 0, ctx->sa_in,
- ctx->sa_len, 0, NULL);
-}
-
-/*
- * SHA1 Algorithm
- */
-int crypto4xx_sha1_alg_init(struct crypto_tfm *tfm)
-{
- return crypto4xx_hash_alg_init(tfm, SA_HASH160_LEN, SA_HASH_ALG_SHA1,
- SA_HASH_MODE_HASH);
-}
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index ec3ccfa60445..8cdc66d520c9 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -485,18 +485,6 @@ static void crypto4xx_copy_pkt_to_dst(struct crypto4xx_device *dev,
}
}
-static void crypto4xx_copy_digest_to_dst(void *dst,
- struct pd_uinfo *pd_uinfo,
- struct crypto4xx_ctx *ctx)
-{
- struct dynamic_sa_ctl *sa = (struct dynamic_sa_ctl *) ctx->sa_in;
-
- if (sa->sa_command_0.bf.hash_alg == SA_HASH_ALG_SHA1) {
- memcpy(dst, pd_uinfo->sr_va->save_digest,
- SA_HASH_ALG_SHA1_DIGEST_SIZE);
- }
-}
-
static void crypto4xx_ret_sg_desc(struct crypto4xx_device *dev,
struct pd_uinfo *pd_uinfo)
{
@@ -549,23 +537,6 @@ static void crypto4xx_cipher_done(struct crypto4xx_device *dev,
skcipher_request_complete(req, 0);
}
-static void crypto4xx_ahash_done(struct crypto4xx_device *dev,
- struct pd_uinfo *pd_uinfo)
-{
- struct crypto4xx_ctx *ctx;
- struct ahash_request *ahash_req;
-
- ahash_req = ahash_request_cast(pd_uinfo->async_req);
- ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(ahash_req));
-
- crypto4xx_copy_digest_to_dst(ahash_req->result, pd_uinfo, ctx);
- crypto4xx_ret_sg_desc(dev, pd_uinfo);
-
- if (pd_uinfo->state & PD_ENTRY_BUSY)
- ahash_request_complete(ahash_req, -EINPROGRESS);
- ahash_request_complete(ahash_req, 0);
-}
-
static void crypto4xx_aead_done(struct crypto4xx_device *dev,
struct pd_uinfo *pd_uinfo,
struct ce_pd *pd)
@@ -642,9 +613,6 @@ static void crypto4xx_pd_done(struct crypto4xx_device *dev, u32 idx)
case CRYPTO_ALG_TYPE_AEAD:
crypto4xx_aead_done(dev, pd_uinfo, pd);
break;
- case CRYPTO_ALG_TYPE_AHASH:
- crypto4xx_ahash_done(dev, pd_uinfo);
- break;
}
}
@@ -676,7 +644,7 @@ int crypto4xx_build_pd(struct crypto_async_request *req,
struct scatterlist *src,
struct scatterlist *dst,
const unsigned int datalen,
- const __le32 *iv, const u32 iv_len,
+ const void *iv, const u32 iv_len,
const struct dynamic_sa_ctl *req_sa,
const unsigned int sa_len,
const unsigned int assoclen,
@@ -912,8 +880,7 @@ int crypto4xx_build_pd(struct crypto_async_request *req,
}
pd->pd_ctl.w = PD_CTL_HOST_READY |
- ((crypto_tfm_alg_type(req->tfm) == CRYPTO_ALG_TYPE_AHASH) ||
- (crypto_tfm_alg_type(req->tfm) == CRYPTO_ALG_TYPE_AEAD) ?
+ ((crypto_tfm_alg_type(req->tfm) == CRYPTO_ALG_TYPE_AEAD) ?
PD_CTL_HASH_FINAL : 0);
pd->pd_ctl_len.w = 0x00400000 | (assoclen + datalen);
pd_uinfo->state = PD_ENTRY_INUSE | (is_busy ? PD_ENTRY_BUSY : 0);
@@ -1019,10 +986,6 @@ static int crypto4xx_register_alg(struct crypto4xx_device *sec_dev,
rc = crypto_register_aead(&alg->alg.u.aead);
break;
- case CRYPTO_ALG_TYPE_AHASH:
- rc = crypto_register_ahash(&alg->alg.u.hash);
- break;
-
case CRYPTO_ALG_TYPE_RNG:
rc = crypto_register_rng(&alg->alg.u.rng);
break;
@@ -1048,10 +1011,6 @@ static void crypto4xx_unregister_alg(struct crypto4xx_device *sec_dev)
list_for_each_entry_safe(alg, tmp, &sec_dev->alg_list, entry) {
list_del(&alg->entry);
switch (alg->alg.type) {
- case CRYPTO_ALG_TYPE_AHASH:
- crypto_unregister_ahash(&alg->alg.u.hash);
- break;
-
case CRYPTO_ALG_TYPE_AEAD:
crypto_unregister_aead(&alg->alg.u.aead);
break;
diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h
index 3adcc5e65694..ee36630c670f 100644
--- a/drivers/crypto/amcc/crypto4xx_core.h
+++ b/drivers/crypto/amcc/crypto4xx_core.h
@@ -16,7 +16,6 @@
#include <linux/ratelimit.h>
#include <linux/mutex.h>
#include <linux/scatterlist.h>
-#include <crypto/internal/hash.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/rng.h>
#include <crypto/internal/skcipher.h>
@@ -135,7 +134,6 @@ struct crypto4xx_alg_common {
u32 type;
union {
struct skcipher_alg cipher;
- struct ahash_alg hash;
struct aead_alg aead;
struct rng_alg rng;
} u;
@@ -147,6 +145,12 @@ struct crypto4xx_alg {
struct crypto4xx_device *dev;
};
+#if IS_ENABLED(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION >= 120000
+#define BUILD_PD_ACCESS __attribute__((access(read_only, 6, 7)))
+#else
+#define BUILD_PD_ACCESS
+#endif
+
int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size);
void crypto4xx_free_sa(struct crypto4xx_ctx *ctx);
int crypto4xx_build_pd(struct crypto_async_request *req,
@@ -154,11 +158,11 @@ int crypto4xx_build_pd(struct crypto_async_request *req,
struct scatterlist *src,
struct scatterlist *dst,
const unsigned int datalen,
- const __le32 *iv, const u32 iv_len,
+ const void *iv, const u32 iv_len,
const struct dynamic_sa_ctl *sa,
const unsigned int sa_len,
const unsigned int assoclen,
- struct scatterlist *dst_tmp);
+ struct scatterlist *dst_tmp) BUILD_PD_ACCESS;
int crypto4xx_setkey_aes_cbc(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen);
int crypto4xx_setkey_aes_ctr(struct crypto_skcipher *cipher,
@@ -177,11 +181,6 @@ int crypto4xx_encrypt_noiv_block(struct skcipher_request *req);
int crypto4xx_decrypt_noiv_block(struct skcipher_request *req);
int crypto4xx_rfc3686_encrypt(struct skcipher_request *req);
int crypto4xx_rfc3686_decrypt(struct skcipher_request *req);
-int crypto4xx_sha1_alg_init(struct crypto_tfm *tfm);
-int crypto4xx_hash_digest(struct ahash_request *req);
-int crypto4xx_hash_final(struct ahash_request *req);
-int crypto4xx_hash_update(struct ahash_request *req);
-int crypto4xx_hash_init(struct ahash_request *req);
/*
* Note: Only use this function to copy items that is word aligned.
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 14bf86957d31..27c5d000b4b2 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -1743,7 +1743,8 @@ static struct skcipher_alg aes_xts_alg = {
.base.cra_driver_name = "atmel-xts-aes",
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct atmel_aes_xts_ctx),
- .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+ CRYPTO_ALG_KERN_DRIVER_ONLY,
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
@@ -2220,7 +2221,7 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
static void atmel_aes_crypto_alg_init(struct crypto_alg *alg)
{
- alg->cra_flags |= CRYPTO_ALG_ASYNC;
+ alg->cra_flags |= CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
alg->cra_alignmask = 0xf;
alg->cra_priority = ATMEL_AES_PRIORITY;
alg->cra_module = THIS_MODULE;
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 67a170608566..2cc36da163e8 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -1254,7 +1254,8 @@ static int atmel_sha_cra_init(struct crypto_tfm *tfm)
static void atmel_sha_alg_init(struct ahash_alg *alg)
{
alg->halg.base.cra_priority = ATMEL_SHA_PRIORITY;
- alg->halg.base.cra_flags = CRYPTO_ALG_ASYNC;
+ alg->halg.base.cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_KERN_DRIVER_ONLY;
alg->halg.base.cra_ctxsize = sizeof(struct atmel_sha_ctx);
alg->halg.base.cra_module = THIS_MODULE;
alg->halg.base.cra_init = atmel_sha_cra_init;
@@ -2041,7 +2042,8 @@ static void atmel_sha_hmac_cra_exit(struct crypto_tfm *tfm)
static void atmel_sha_hmac_alg_init(struct ahash_alg *alg)
{
alg->halg.base.cra_priority = ATMEL_SHA_PRIORITY;
- alg->halg.base.cra_flags = CRYPTO_ALG_ASYNC;
+ alg->halg.base.cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_KERN_DRIVER_ONLY;
alg->halg.base.cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx);
alg->halg.base.cra_module = THIS_MODULE;
alg->halg.base.cra_init = atmel_sha_hmac_cra_init;
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index de9717e221e4..098f5532f389 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -785,7 +785,7 @@ static int atmel_tdes_init_tfm(struct crypto_skcipher *tfm)
static void atmel_tdes_skcipher_alg_init(struct skcipher_alg *alg)
{
alg->base.cra_priority = ATMEL_TDES_PRIORITY;
- alg->base.cra_flags = CRYPTO_ALG_ASYNC;
+ alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
alg->base.cra_ctxsize = sizeof(struct atmel_tdes_ctx);
alg->base.cra_module = THIS_MODULE;
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index d4b39184dbdb..38ff931059b4 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -573,6 +573,7 @@ static const struct soc_device_attribute caam_imx_soc_table[] = {
{ .soc_id = "i.MX7*", .data = &caam_imx7_data },
{ .soc_id = "i.MX8M*", .data = &caam_imx7_data },
{ .soc_id = "i.MX8ULP", .data = &caam_imx8ulp_data },
+ { .soc_id = "i.MX8QM", .data = &caam_imx8ulp_data },
{ .soc_id = "VF*", .data = &caam_vf610_data },
{ .family = "Freescale i.MX" },
{ /* sentinel */ }
diff --git a/drivers/crypto/cavium/Makefile b/drivers/crypto/cavium/Makefile
index 4679c06b611f..75227c587ed0 100644
--- a/drivers/crypto/cavium/Makefile
+++ b/drivers/crypto/cavium/Makefile
@@ -2,4 +2,5 @@
#
# Makefile for Cavium crypto device drivers
#
-obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += zip/
+obj-$(CONFIG_CRYPTO_DEV_CPT) += cpt/
+obj-$(CONFIG_CRYPTO_DEV_NITROX) += nitrox/
diff --git a/drivers/crypto/cavium/zip/Makefile b/drivers/crypto/cavium/zip/Makefile
deleted file mode 100644
index 020d189d793d..000000000000
--- a/drivers/crypto/cavium/zip/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for Cavium's ZIP Driver.
-#
-
-obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += thunderx_zip.o
-thunderx_zip-y := zip_main.o \
- zip_device.o \
- zip_crypto.o \
- zip_mem.o \
- zip_deflate.o \
- zip_inflate.o
diff --git a/drivers/crypto/cavium/zip/common.h b/drivers/crypto/cavium/zip/common.h
deleted file mode 100644
index 54f6fb054119..000000000000
--- a/drivers/crypto/cavium/zip/common.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#ifndef __COMMON_H__
-#define __COMMON_H__
-
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/seq_file.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-/* Device specific zlib function definitions */
-#include "zip_device.h"
-
-/* ZIP device definitions */
-#include "zip_main.h"
-
-/* ZIP memory allocation/deallocation related definitions */
-#include "zip_mem.h"
-
-/* Device specific structure definitions */
-#include "zip_regs.h"
-
-#define ZIP_ERROR -1
-
-#define ZIP_FLUSH_FINISH 4
-
-#define RAW_FORMAT 0 /* for rawpipe */
-#define ZLIB_FORMAT 1 /* for zpipe */
-#define GZIP_FORMAT 2 /* for gzpipe */
-#define LZS_FORMAT 3 /* for lzspipe */
-
-/* Max number of ZIP devices supported */
-#define MAX_ZIP_DEVICES 2
-
-/* Configures the number of zip queues to be used */
-#define ZIP_NUM_QUEUES 2
-
-#define DYNAMIC_STOP_EXCESS 1024
-
-/* Maximum buffer sizes in direct mode */
-#define MAX_INPUT_BUFFER_SIZE (64 * 1024)
-#define MAX_OUTPUT_BUFFER_SIZE (64 * 1024)
-
-/**
- * struct zip_operation - common data structure for comp and decomp operations
- * @input: Next input byte is read from here
- * @output: Next output byte written here
- * @ctx_addr: Inflate context buffer address
- * @history: Pointer to the history buffer
- * @input_len: Number of bytes available at next_in
- * @input_total_len: Total number of input bytes read
- * @output_len: Remaining free space at next_out
- * @output_total_len: Total number of bytes output so far
- * @csum: Checksum value of the uncompressed data
- * @flush: Flush flag
- * @format: Format (depends on stream's wrap)
- * @speed: Speed depends on stream's level
- * @ccode: Compression code ( stream's strategy)
- * @lzs_flag: Flag for LZS support
- * @begin_file: Beginning of file indication for inflate
- * @history_len: Size of the history data
- * @end_file: Ending of the file indication for inflate
- * @compcode: Completion status of the ZIP invocation
- * @bytes_read: Input bytes read in current instruction
- * @bits_processed: Total bits processed for entire file
- * @sizeofptr: To distinguish between ILP32 and LP64
- * @sizeofzops: Optional just for padding
- *
- * This structure is used to maintain the required meta data for the
- * comp and decomp operations.
- */
-struct zip_operation {
- u8 *input;
- u8 *output;
- u64 ctx_addr;
- u64 history;
-
- u32 input_len;
- u32 input_total_len;
-
- u32 output_len;
- u32 output_total_len;
-
- u32 csum;
- u32 flush;
-
- u32 format;
- u32 speed;
- u32 ccode;
- u32 lzs_flag;
-
- u32 begin_file;
- u32 history_len;
-
- u32 end_file;
- u32 compcode;
- u32 bytes_read;
- u32 bits_processed;
-
- u32 sizeofptr;
- u32 sizeofzops;
-};
-
-static inline int zip_poll_result(union zip_zres_s *result)
-{
- int retries = 1000;
-
- while (!result->s.compcode) {
- if (!--retries) {
- pr_err("ZIP ERR: request timed out");
- return -ETIMEDOUT;
- }
- udelay(10);
- /*
- * Force re-reading of compcode which is updated
- * by the ZIP coprocessor.
- */
- rmb();
- }
- return 0;
-}
-
-/* error messages */
-#define zip_err(fmt, args...) pr_err("ZIP ERR:%s():%d: " \
- fmt "\n", __func__, __LINE__, ## args)
-
-#ifdef MSG_ENABLE
-/* Enable all messages */
-#define zip_msg(fmt, args...) pr_info("ZIP_MSG:" fmt "\n", ## args)
-#else
-#define zip_msg(fmt, args...)
-#endif
-
-#if defined(ZIP_DEBUG_ENABLE) && defined(MSG_ENABLE)
-
-#ifdef DEBUG_LEVEL
-
-#define FILE_NAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : \
- strrchr(__FILE__, '\\') ? strrchr(__FILE__, '\\') + 1 : __FILE__)
-
-#if DEBUG_LEVEL >= 4
-
-#define zip_dbg(fmt, args...) pr_info("ZIP DBG: %s: %s() : %d: " \
- fmt "\n", FILE_NAME, __func__, __LINE__, ## args)
-
-#elif DEBUG_LEVEL >= 3
-
-#define zip_dbg(fmt, args...) pr_info("ZIP DBG: %s: %s() : %d: " \
- fmt "\n", FILE_NAME, __func__, __LINE__, ## args)
-
-#elif DEBUG_LEVEL >= 2
-
-#define zip_dbg(fmt, args...) pr_info("ZIP DBG: %s() : %d: " \
- fmt "\n", __func__, __LINE__, ## args)
-
-#else
-
-#define zip_dbg(fmt, args...) pr_info("ZIP DBG:" fmt "\n", ## args)
-
-#endif /* DEBUG LEVEL >=4 */
-
-#else
-
-#define zip_dbg(fmt, args...) pr_info("ZIP DBG:" fmt "\n", ## args)
-
-#endif /* DEBUG_LEVEL */
-#else
-
-#define zip_dbg(fmt, args...)
-
-#endif /* ZIP_DEBUG_ENABLE && MSG_ENABLE*/
-
-#endif
diff --git a/drivers/crypto/cavium/zip/zip_crypto.c b/drivers/crypto/cavium/zip/zip_crypto.c
deleted file mode 100644
index 02e87f2d50db..000000000000
--- a/drivers/crypto/cavium/zip/zip_crypto.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#include "zip_crypto.h"
-
-static void zip_static_init_zip_ops(struct zip_operation *zip_ops,
- int lzs_flag)
-{
- zip_ops->flush = ZIP_FLUSH_FINISH;
-
- /* equivalent to level 6 of opensource zlib */
- zip_ops->speed = 1;
-
- if (!lzs_flag) {
- zip_ops->ccode = 0; /* Auto Huffman */
- zip_ops->lzs_flag = 0;
- zip_ops->format = ZLIB_FORMAT;
- } else {
- zip_ops->ccode = 3; /* LZS Encoding */
- zip_ops->lzs_flag = 1;
- zip_ops->format = LZS_FORMAT;
- }
- zip_ops->begin_file = 1;
- zip_ops->history_len = 0;
- zip_ops->end_file = 1;
- zip_ops->compcode = 0;
- zip_ops->csum = 1; /* Adler checksum desired */
-}
-
-static int zip_ctx_init(struct zip_kernel_ctx *zip_ctx, int lzs_flag)
-{
- struct zip_operation *comp_ctx = &zip_ctx->zip_comp;
- struct zip_operation *decomp_ctx = &zip_ctx->zip_decomp;
-
- zip_static_init_zip_ops(comp_ctx, lzs_flag);
- zip_static_init_zip_ops(decomp_ctx, lzs_flag);
-
- comp_ctx->input = zip_data_buf_alloc(MAX_INPUT_BUFFER_SIZE);
- if (!comp_ctx->input)
- return -ENOMEM;
-
- comp_ctx->output = zip_data_buf_alloc(MAX_OUTPUT_BUFFER_SIZE);
- if (!comp_ctx->output)
- goto err_comp_input;
-
- decomp_ctx->input = zip_data_buf_alloc(MAX_INPUT_BUFFER_SIZE);
- if (!decomp_ctx->input)
- goto err_comp_output;
-
- decomp_ctx->output = zip_data_buf_alloc(MAX_OUTPUT_BUFFER_SIZE);
- if (!decomp_ctx->output)
- goto err_decomp_input;
-
- return 0;
-
-err_decomp_input:
- zip_data_buf_free(decomp_ctx->input, MAX_INPUT_BUFFER_SIZE);
-
-err_comp_output:
- zip_data_buf_free(comp_ctx->output, MAX_OUTPUT_BUFFER_SIZE);
-
-err_comp_input:
- zip_data_buf_free(comp_ctx->input, MAX_INPUT_BUFFER_SIZE);
-
- return -ENOMEM;
-}
-
-static void zip_ctx_exit(struct zip_kernel_ctx *zip_ctx)
-{
- struct zip_operation *comp_ctx = &zip_ctx->zip_comp;
- struct zip_operation *dec_ctx = &zip_ctx->zip_decomp;
-
- zip_data_buf_free(comp_ctx->input, MAX_INPUT_BUFFER_SIZE);
- zip_data_buf_free(comp_ctx->output, MAX_OUTPUT_BUFFER_SIZE);
-
- zip_data_buf_free(dec_ctx->input, MAX_INPUT_BUFFER_SIZE);
- zip_data_buf_free(dec_ctx->output, MAX_OUTPUT_BUFFER_SIZE);
-}
-
-static int zip_compress(const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen,
- struct zip_kernel_ctx *zip_ctx)
-{
- struct zip_operation *zip_ops = NULL;
- struct zip_state *zip_state;
- struct zip_device *zip = NULL;
- int ret;
-
- if (!zip_ctx || !src || !dst || !dlen)
- return -ENOMEM;
-
- zip = zip_get_device(zip_get_node_id());
- if (!zip)
- return -ENODEV;
-
- zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
- if (!zip_state)
- return -ENOMEM;
-
- zip_ops = &zip_ctx->zip_comp;
-
- zip_ops->input_len = slen;
- zip_ops->output_len = *dlen;
- memcpy(zip_ops->input, src, slen);
-
- ret = zip_deflate(zip_ops, zip_state, zip);
-
- if (!ret) {
- *dlen = zip_ops->output_len;
- memcpy(dst, zip_ops->output, *dlen);
- }
- kfree(zip_state);
- return ret;
-}
-
-static int zip_decompress(const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen,
- struct zip_kernel_ctx *zip_ctx)
-{
- struct zip_operation *zip_ops = NULL;
- struct zip_state *zip_state;
- struct zip_device *zip = NULL;
- int ret;
-
- if (!zip_ctx || !src || !dst || !dlen)
- return -ENOMEM;
-
- zip = zip_get_device(zip_get_node_id());
- if (!zip)
- return -ENODEV;
-
- zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
- if (!zip_state)
- return -ENOMEM;
-
- zip_ops = &zip_ctx->zip_decomp;
- memcpy(zip_ops->input, src, slen);
-
- /* Work around for a bug in zlib which needs an extra bytes sometimes */
- if (zip_ops->ccode != 3) /* Not LZS Encoding */
- zip_ops->input[slen++] = 0;
-
- zip_ops->input_len = slen;
- zip_ops->output_len = *dlen;
-
- ret = zip_inflate(zip_ops, zip_state, zip);
-
- if (!ret) {
- *dlen = zip_ops->output_len;
- memcpy(dst, zip_ops->output, *dlen);
- }
- kfree(zip_state);
- return ret;
-}
-
-/* SCOMP framework start */
-void *zip_alloc_scomp_ctx_deflate(void)
-{
- int ret;
- struct zip_kernel_ctx *zip_ctx;
-
- zip_ctx = kzalloc(sizeof(*zip_ctx), GFP_KERNEL);
- if (!zip_ctx)
- return ERR_PTR(-ENOMEM);
-
- ret = zip_ctx_init(zip_ctx, 0);
-
- if (ret) {
- kfree_sensitive(zip_ctx);
- return ERR_PTR(ret);
- }
-
- return zip_ctx;
-}
-
-void *zip_alloc_scomp_ctx_lzs(void)
-{
- int ret;
- struct zip_kernel_ctx *zip_ctx;
-
- zip_ctx = kzalloc(sizeof(*zip_ctx), GFP_KERNEL);
- if (!zip_ctx)
- return ERR_PTR(-ENOMEM);
-
- ret = zip_ctx_init(zip_ctx, 1);
-
- if (ret) {
- kfree_sensitive(zip_ctx);
- return ERR_PTR(ret);
- }
-
- return zip_ctx;
-}
-
-void zip_free_scomp_ctx(void *ctx)
-{
- struct zip_kernel_ctx *zip_ctx = ctx;
-
- zip_ctx_exit(zip_ctx);
- kfree_sensitive(zip_ctx);
-}
-
-int zip_scomp_compress(struct crypto_scomp *tfm,
- const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen, void *ctx)
-{
- struct zip_kernel_ctx *zip_ctx = ctx;
-
- return zip_compress(src, slen, dst, dlen, zip_ctx);
-}
-
-int zip_scomp_decompress(struct crypto_scomp *tfm,
- const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen, void *ctx)
-{
- struct zip_kernel_ctx *zip_ctx = ctx;
-
- return zip_decompress(src, slen, dst, dlen, zip_ctx);
-} /* SCOMP framework end */
diff --git a/drivers/crypto/cavium/zip/zip_crypto.h b/drivers/crypto/cavium/zip/zip_crypto.h
deleted file mode 100644
index 10899ece2d1f..000000000000
--- a/drivers/crypto/cavium/zip/zip_crypto.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#ifndef __ZIP_CRYPTO_H__
-#define __ZIP_CRYPTO_H__
-
-#include <crypto/internal/scompress.h>
-#include "common.h"
-#include "zip_deflate.h"
-#include "zip_inflate.h"
-
-struct zip_kernel_ctx {
- struct zip_operation zip_comp;
- struct zip_operation zip_decomp;
-};
-
-void *zip_alloc_scomp_ctx_deflate(void);
-void *zip_alloc_scomp_ctx_lzs(void);
-void zip_free_scomp_ctx(void *zip_ctx);
-int zip_scomp_compress(struct crypto_scomp *tfm,
- const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen, void *ctx);
-int zip_scomp_decompress(struct crypto_scomp *tfm,
- const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen, void *ctx);
-#endif
diff --git a/drivers/crypto/cavium/zip/zip_deflate.c b/drivers/crypto/cavium/zip/zip_deflate.c
deleted file mode 100644
index d7133f857d67..000000000000
--- a/drivers/crypto/cavium/zip/zip_deflate.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#include <linux/delay.h>
-#include <linux/sched.h>
-
-#include "common.h"
-#include "zip_deflate.h"
-
-/* Prepares the deflate zip command */
-static int prepare_zip_command(struct zip_operation *zip_ops,
- struct zip_state *s, union zip_inst_s *zip_cmd)
-{
- union zip_zres_s *result_ptr = &s->result;
-
- memset(zip_cmd, 0, sizeof(s->zip_cmd));
- memset(result_ptr, 0, sizeof(s->result));
-
- /* IWORD #0 */
- /* History gather */
- zip_cmd->s.hg = 0;
- /* compression enable = 1 for deflate */
- zip_cmd->s.ce = 1;
- /* sf (sync flush) */
- zip_cmd->s.sf = 1;
- /* ef (end of file) */
- if (zip_ops->flush == ZIP_FLUSH_FINISH) {
- zip_cmd->s.ef = 1;
- zip_cmd->s.sf = 0;
- }
-
- zip_cmd->s.cc = zip_ops->ccode;
- /* ss (compression speed/storage) */
- zip_cmd->s.ss = zip_ops->speed;
-
- /* IWORD #1 */
- /* adler checksum */
- zip_cmd->s.adlercrc32 = zip_ops->csum;
- zip_cmd->s.historylength = zip_ops->history_len;
- zip_cmd->s.dg = 0;
-
- /* IWORD # 6 and 7 - compression input/history pointer */
- zip_cmd->s.inp_ptr_addr.s.addr = __pa(zip_ops->input);
- zip_cmd->s.inp_ptr_ctl.s.length = (zip_ops->input_len +
- zip_ops->history_len);
- zip_cmd->s.ds = 0;
-
- /* IWORD # 8 and 9 - Output pointer */
- zip_cmd->s.out_ptr_addr.s.addr = __pa(zip_ops->output);
- zip_cmd->s.out_ptr_ctl.s.length = zip_ops->output_len;
- /* maximum number of output-stream bytes that can be written */
- zip_cmd->s.totaloutputlength = zip_ops->output_len;
-
- /* IWORD # 10 and 11 - Result pointer */
- zip_cmd->s.res_ptr_addr.s.addr = __pa(result_ptr);
- /* Clearing completion code */
- result_ptr->s.compcode = 0;
-
- return 0;
-}
-
-/**
- * zip_deflate - API to offload deflate operation to hardware
- * @zip_ops: Pointer to zip operation structure
- * @s: Pointer to the structure representing zip state
- * @zip_dev: Pointer to zip device structure
- *
- * This function prepares the zip deflate command and submits it to the zip
- * engine for processing.
- *
- * Return: 0 if successful or error code
- */
-int zip_deflate(struct zip_operation *zip_ops, struct zip_state *s,
- struct zip_device *zip_dev)
-{
- union zip_inst_s *zip_cmd = &s->zip_cmd;
- union zip_zres_s *result_ptr = &s->result;
- u32 queue;
-
- /* Prepares zip command based on the input parameters */
- prepare_zip_command(zip_ops, s, zip_cmd);
-
- atomic64_add(zip_ops->input_len, &zip_dev->stats.comp_in_bytes);
- /* Loads zip command into command queues and rings door bell */
- queue = zip_load_instr(zip_cmd, zip_dev);
-
- /* Stats update for compression requests submitted */
- atomic64_inc(&zip_dev->stats.comp_req_submit);
-
- /* Wait for completion or error */
- zip_poll_result(result_ptr);
-
- /* Stats update for compression requests completed */
- atomic64_inc(&zip_dev->stats.comp_req_complete);
-
- zip_ops->compcode = result_ptr->s.compcode;
- switch (zip_ops->compcode) {
- case ZIP_CMD_NOTDONE:
- zip_dbg("Zip instruction not yet completed");
- return ZIP_ERROR;
-
- case ZIP_CMD_SUCCESS:
- zip_dbg("Zip instruction completed successfully");
- zip_update_cmd_bufs(zip_dev, queue);
- break;
-
- case ZIP_CMD_DTRUNC:
- zip_dbg("Output Truncate error");
- /* Returning ZIP_ERROR to avoid copy to user */
- return ZIP_ERROR;
-
- default:
- zip_err("Zip instruction failed. Code:%d", zip_ops->compcode);
- return ZIP_ERROR;
- }
-
- /* Update the CRC depending on the format */
- switch (zip_ops->format) {
- case RAW_FORMAT:
- zip_dbg("RAW Format: %d ", zip_ops->format);
- /* Get checksum from engine, need to feed it again */
- zip_ops->csum = result_ptr->s.adler32;
- break;
-
- case ZLIB_FORMAT:
- zip_dbg("ZLIB Format: %d ", zip_ops->format);
- zip_ops->csum = result_ptr->s.adler32;
- break;
-
- case GZIP_FORMAT:
- zip_dbg("GZIP Format: %d ", zip_ops->format);
- zip_ops->csum = result_ptr->s.crc32;
- break;
-
- case LZS_FORMAT:
- zip_dbg("LZS Format: %d ", zip_ops->format);
- break;
-
- default:
- zip_err("Unknown Format:%d\n", zip_ops->format);
- }
-
- atomic64_add(result_ptr->s.totalbyteswritten,
- &zip_dev->stats.comp_out_bytes);
-
- /* Update output_len */
- if (zip_ops->output_len < result_ptr->s.totalbyteswritten) {
- /* Dynamic stop && strm->output_len < zipconstants[onfsize] */
- zip_err("output_len (%d) < total bytes written(%d)\n",
- zip_ops->output_len, result_ptr->s.totalbyteswritten);
- zip_ops->output_len = 0;
-
- } else {
- zip_ops->output_len = result_ptr->s.totalbyteswritten;
- }
-
- return 0;
-}
diff --git a/drivers/crypto/cavium/zip/zip_deflate.h b/drivers/crypto/cavium/zip/zip_deflate.h
deleted file mode 100644
index 1d32e76edc4d..000000000000
--- a/drivers/crypto/cavium/zip/zip_deflate.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#ifndef __ZIP_DEFLATE_H__
-#define __ZIP_DEFLATE_H__
-
-/**
- * zip_deflate - API to offload deflate operation to hardware
- * @zip_ops: Pointer to zip operation structure
- * @s: Pointer to the structure representing zip state
- * @zip_dev: Pointer to the structure representing zip device
- *
- * This function prepares the zip deflate command and submits it to the zip
- * engine by ringing the doorbell.
- *
- * Return: 0 if successful or error code
- */
-int zip_deflate(struct zip_operation *zip_ops, struct zip_state *s,
- struct zip_device *zip_dev);
-#endif
diff --git a/drivers/crypto/cavium/zip/zip_device.c b/drivers/crypto/cavium/zip/zip_device.c
deleted file mode 100644
index f174ec29ed69..000000000000
--- a/drivers/crypto/cavium/zip/zip_device.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#include "common.h"
-#include "zip_deflate.h"
-
-/**
- * zip_cmd_queue_consumed - Calculates the space consumed in the command queue.
- *
- * @zip_dev: Pointer to zip device structure
- * @queue: Queue number
- *
- * Return: Bytes consumed in the command queue buffer.
- */
-static inline u32 zip_cmd_queue_consumed(struct zip_device *zip_dev, int queue)
-{
- return ((zip_dev->iq[queue].sw_head - zip_dev->iq[queue].sw_tail) *
- sizeof(u64 *));
-}
-
-/**
- * zip_load_instr - Submits the instruction into the ZIP command queue
- * @instr: Pointer to the instruction to be submitted
- * @zip_dev: Pointer to ZIP device structure to which the instruction is to
- * be submitted
- *
- * This function copies the ZIP instruction to the command queue and rings the
- * doorbell to notify the engine of the instruction submission. The command
- * queue is maintained in a circular fashion. When there is space for exactly
- * one instruction in the queue, next chunk pointer of the queue is made to
- * point to the head of the queue, thus maintaining a circular queue.
- *
- * Return: Queue number to which the instruction was submitted
- */
-u32 zip_load_instr(union zip_inst_s *instr,
- struct zip_device *zip_dev)
-{
- union zip_quex_doorbell dbell;
- u32 queue = 0;
- u32 consumed = 0;
- u64 *ncb_ptr = NULL;
- union zip_nptr_s ncp;
-
- /*
- * Distribute the instructions between the enabled queues based on
- * the CPU id.
- */
- if (raw_smp_processor_id() % 2 == 0)
- queue = 0;
- else
- queue = 1;
-
- zip_dbg("CPU Core: %d Queue number:%d", raw_smp_processor_id(), queue);
-
- /* Take cmd buffer lock */
- spin_lock(&zip_dev->iq[queue].lock);
-
- /*
- * Command Queue implementation
- * 1. If there is place for new instructions, push the cmd at sw_head.
- * 2. If there is place for exactly one instruction, push the new cmd
- * at the sw_head. Make sw_head point to the sw_tail to make it
- * circular. Write sw_head's physical address to the "Next-Chunk
- * Buffer Ptr" to make it cmd_hw_tail.
- * 3. Ring the door bell.
- */
- zip_dbg("sw_head : %lx", zip_dev->iq[queue].sw_head);
- zip_dbg("sw_tail : %lx", zip_dev->iq[queue].sw_tail);
-
- consumed = zip_cmd_queue_consumed(zip_dev, queue);
- /* Check if there is space to push just one cmd */
- if ((consumed + 128) == (ZIP_CMD_QBUF_SIZE - 8)) {
- zip_dbg("Cmd queue space available for single command");
- /* Space for one cmd, pust it and make it circular queue */
- memcpy((u8 *)zip_dev->iq[queue].sw_head, (u8 *)instr,
- sizeof(union zip_inst_s));
- zip_dev->iq[queue].sw_head += 16; /* 16 64_bit words = 128B */
-
- /* Now, point the "Next-Chunk Buffer Ptr" to sw_head */
- ncb_ptr = zip_dev->iq[queue].sw_head;
-
- zip_dbg("ncb addr :0x%lx sw_head addr :0x%lx",
- ncb_ptr, zip_dev->iq[queue].sw_head - 16);
-
- /* Using Circular command queue */
- zip_dev->iq[queue].sw_head = zip_dev->iq[queue].sw_tail;
- /* Mark this buffer for free */
- zip_dev->iq[queue].free_flag = 1;
-
- /* Write new chunk buffer address at "Next-Chunk Buffer Ptr" */
- ncp.u_reg64 = 0ull;
- ncp.s.addr = __pa(zip_dev->iq[queue].sw_head);
- *ncb_ptr = ncp.u_reg64;
- zip_dbg("*ncb_ptr :0x%lx sw_head[phys] :0x%lx",
- *ncb_ptr, __pa(zip_dev->iq[queue].sw_head));
-
- zip_dev->iq[queue].pend_cnt++;
-
- } else {
- zip_dbg("Enough space is available for commands");
- /* Push this cmd to cmd queue buffer */
- memcpy((u8 *)zip_dev->iq[queue].sw_head, (u8 *)instr,
- sizeof(union zip_inst_s));
- zip_dev->iq[queue].sw_head += 16; /* 16 64_bit words = 128B */
-
- zip_dev->iq[queue].pend_cnt++;
- }
- zip_dbg("sw_head :0x%lx sw_tail :0x%lx hw_tail :0x%lx",
- zip_dev->iq[queue].sw_head, zip_dev->iq[queue].sw_tail,
- zip_dev->iq[queue].hw_tail);
-
- zip_dbg(" Pushed the new cmd : pend_cnt : %d",
- zip_dev->iq[queue].pend_cnt);
-
- /* Ring the doorbell */
- dbell.u_reg64 = 0ull;
- dbell.s.dbell_cnt = 1;
- zip_reg_write(dbell.u_reg64,
- (zip_dev->reg_base + ZIP_QUEX_DOORBELL(queue)));
-
- /* Unlock cmd buffer lock */
- spin_unlock(&zip_dev->iq[queue].lock);
-
- return queue;
-}
-
-/**
- * zip_update_cmd_bufs - Updates the queue statistics after posting the
- * instruction
- * @zip_dev: Pointer to zip device structure
- * @queue: Queue number
- */
-void zip_update_cmd_bufs(struct zip_device *zip_dev, u32 queue)
-{
- /* Take cmd buffer lock */
- spin_lock(&zip_dev->iq[queue].lock);
-
- /* Check if the previous buffer can be freed */
- if (zip_dev->iq[queue].free_flag == 1) {
- zip_dbg("Free flag. Free cmd buffer, adjust sw head and tail");
- /* Reset the free flag */
- zip_dev->iq[queue].free_flag = 0;
-
- /* Point the hw_tail to start of the new chunk buffer */
- zip_dev->iq[queue].hw_tail = zip_dev->iq[queue].sw_head;
- } else {
- zip_dbg("Free flag not set. increment hw tail");
- zip_dev->iq[queue].hw_tail += 16; /* 16 64_bit words = 128B */
- }
-
- zip_dev->iq[queue].done_cnt++;
- zip_dev->iq[queue].pend_cnt--;
-
- zip_dbg("sw_head :0x%lx sw_tail :0x%lx hw_tail :0x%lx",
- zip_dev->iq[queue].sw_head, zip_dev->iq[queue].sw_tail,
- zip_dev->iq[queue].hw_tail);
- zip_dbg(" Got CC : pend_cnt : %d\n", zip_dev->iq[queue].pend_cnt);
-
- spin_unlock(&zip_dev->iq[queue].lock);
-}
diff --git a/drivers/crypto/cavium/zip/zip_device.h b/drivers/crypto/cavium/zip/zip_device.h
deleted file mode 100644
index 9e18b3b93d38..000000000000
--- a/drivers/crypto/cavium/zip/zip_device.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#ifndef __ZIP_DEVICE_H__
-#define __ZIP_DEVICE_H__
-
-#include <linux/types.h>
-#include "zip_main.h"
-
-struct sg_info {
- /*
- * Pointer to the input data when scatter_gather == 0 and
- * pointer to the input gather list buffer when scatter_gather == 1
- */
- union zip_zptr_s *gather;
-
- /*
- * Pointer to the output data when scatter_gather == 0 and
- * pointer to the output scatter list buffer when scatter_gather == 1
- */
- union zip_zptr_s *scatter;
-
- /*
- * Holds size of the output buffer pointed by scatter list
- * when scatter_gather == 1
- */
- u64 scatter_buf_size;
-
- /* for gather data */
- u64 gather_enable;
-
- /* for scatter data */
- u64 scatter_enable;
-
- /* Number of gather list pointers for gather data */
- u32 gbuf_cnt;
-
- /* Number of scatter list pointers for scatter data */
- u32 sbuf_cnt;
-
- /* Buffers allocation state */
- u8 alloc_state;
-};
-
-/**
- * struct zip_state - Structure representing the required information related
- * to a command
- * @zip_cmd: Pointer to zip instruction structure
- * @result: Pointer to zip result structure
- * @ctx: Context pointer for inflate
- * @history: Decompression history pointer
- * @sginfo: Scatter-gather info structure
- */
-struct zip_state {
- union zip_inst_s zip_cmd;
- union zip_zres_s result;
- union zip_zptr_s *ctx;
- union zip_zptr_s *history;
- struct sg_info sginfo;
-};
-
-#define ZIP_CONTEXT_SIZE 2048
-#define ZIP_INFLATE_HISTORY_SIZE 32768
-#define ZIP_DEFLATE_HISTORY_SIZE 32768
-
-#endif
diff --git a/drivers/crypto/cavium/zip/zip_inflate.c b/drivers/crypto/cavium/zip/zip_inflate.c
deleted file mode 100644
index 7e0d73e2f89e..000000000000
--- a/drivers/crypto/cavium/zip/zip_inflate.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#include <linux/delay.h>
-#include <linux/sched.h>
-
-#include "common.h"
-#include "zip_inflate.h"
-
-static int prepare_inflate_zcmd(struct zip_operation *zip_ops,
- struct zip_state *s, union zip_inst_s *zip_cmd)
-{
- union zip_zres_s *result_ptr = &s->result;
-
- memset(zip_cmd, 0, sizeof(s->zip_cmd));
- memset(result_ptr, 0, sizeof(s->result));
-
- /* IWORD#0 */
-
- /* Decompression History Gather list - no gather list */
- zip_cmd->s.hg = 0;
- /* For decompression, CE must be 0x0. */
- zip_cmd->s.ce = 0;
- /* For decompression, SS must be 0x0. */
- zip_cmd->s.ss = 0;
- /* For decompression, SF should always be set. */
- zip_cmd->s.sf = 1;
-
- /* Begin File */
- if (zip_ops->begin_file == 0)
- zip_cmd->s.bf = 0;
- else
- zip_cmd->s.bf = 1;
-
- zip_cmd->s.ef = 1;
- /* 0: for Deflate decompression, 3: for LZS decompression */
- zip_cmd->s.cc = zip_ops->ccode;
-
- /* IWORD #1*/
-
- /* adler checksum */
- zip_cmd->s.adlercrc32 = zip_ops->csum;
-
- /*
- * HISTORYLENGTH must be 0x0 for any ZIP decompress operation.
- * History data is added to a decompression operation via IWORD3.
- */
- zip_cmd->s.historylength = 0;
- zip_cmd->s.ds = 0;
-
- /* IWORD # 8 and 9 - Output pointer */
- zip_cmd->s.out_ptr_addr.s.addr = __pa(zip_ops->output);
- zip_cmd->s.out_ptr_ctl.s.length = zip_ops->output_len;
-
- /* Maximum number of output-stream bytes that can be written */
- zip_cmd->s.totaloutputlength = zip_ops->output_len;
-
- zip_dbg("Data Direct Input case ");
-
- /* IWORD # 6 and 7 - input pointer */
- zip_cmd->s.dg = 0;
- zip_cmd->s.inp_ptr_addr.s.addr = __pa((u8 *)zip_ops->input);
- zip_cmd->s.inp_ptr_ctl.s.length = zip_ops->input_len;
-
- /* IWORD # 10 and 11 - Result pointer */
- zip_cmd->s.res_ptr_addr.s.addr = __pa(result_ptr);
-
- /* Clearing completion code */
- result_ptr->s.compcode = 0;
-
- /* Returning 0 for time being.*/
- return 0;
-}
-
-/**
- * zip_inflate - API to offload inflate operation to hardware
- * @zip_ops: Pointer to zip operation structure
- * @s: Pointer to the structure representing zip state
- * @zip_dev: Pointer to zip device structure
- *
- * This function prepares the zip inflate command and submits it to the zip
- * engine for processing.
- *
- * Return: 0 if successful or error code
- */
-int zip_inflate(struct zip_operation *zip_ops, struct zip_state *s,
- struct zip_device *zip_dev)
-{
- union zip_inst_s *zip_cmd = &s->zip_cmd;
- union zip_zres_s *result_ptr = &s->result;
- u32 queue;
-
- /* Prepare inflate zip command */
- prepare_inflate_zcmd(zip_ops, s, zip_cmd);
-
- atomic64_add(zip_ops->input_len, &zip_dev->stats.decomp_in_bytes);
-
- /* Load inflate command to zip queue and ring the doorbell */
- queue = zip_load_instr(zip_cmd, zip_dev);
-
- /* Decompression requests submitted stats update */
- atomic64_inc(&zip_dev->stats.decomp_req_submit);
-
- /* Wait for completion or error */
- zip_poll_result(result_ptr);
-
- /* Decompression requests completed stats update */
- atomic64_inc(&zip_dev->stats.decomp_req_complete);
-
- zip_ops->compcode = result_ptr->s.compcode;
- switch (zip_ops->compcode) {
- case ZIP_CMD_NOTDONE:
- zip_dbg("Zip Instruction not yet completed\n");
- return ZIP_ERROR;
-
- case ZIP_CMD_SUCCESS:
- zip_dbg("Zip Instruction completed successfully\n");
- break;
-
- case ZIP_CMD_DYNAMIC_STOP:
- zip_dbg(" Dynamic stop Initiated\n");
- break;
-
- default:
- zip_dbg("Instruction failed. Code = %d\n", zip_ops->compcode);
- atomic64_inc(&zip_dev->stats.decomp_bad_reqs);
- zip_update_cmd_bufs(zip_dev, queue);
- return ZIP_ERROR;
- }
-
- zip_update_cmd_bufs(zip_dev, queue);
-
- if ((zip_ops->ccode == 3) && (zip_ops->flush == 4) &&
- (zip_ops->compcode != ZIP_CMD_DYNAMIC_STOP))
- result_ptr->s.ef = 1;
-
- zip_ops->csum = result_ptr->s.adler32;
-
- atomic64_add(result_ptr->s.totalbyteswritten,
- &zip_dev->stats.decomp_out_bytes);
-
- if (zip_ops->output_len < result_ptr->s.totalbyteswritten) {
- zip_err("output_len (%d) < total bytes written (%d)\n",
- zip_ops->output_len, result_ptr->s.totalbyteswritten);
- zip_ops->output_len = 0;
- } else {
- zip_ops->output_len = result_ptr->s.totalbyteswritten;
- }
-
- zip_ops->bytes_read = result_ptr->s.totalbytesread;
- zip_ops->bits_processed = result_ptr->s.totalbitsprocessed;
- zip_ops->end_file = result_ptr->s.ef;
- if (zip_ops->end_file) {
- switch (zip_ops->format) {
- case RAW_FORMAT:
- zip_dbg("RAW Format: %d ", zip_ops->format);
- /* Get checksum from engine */
- zip_ops->csum = result_ptr->s.adler32;
- break;
-
- case ZLIB_FORMAT:
- zip_dbg("ZLIB Format: %d ", zip_ops->format);
- zip_ops->csum = result_ptr->s.adler32;
- break;
-
- case GZIP_FORMAT:
- zip_dbg("GZIP Format: %d ", zip_ops->format);
- zip_ops->csum = result_ptr->s.crc32;
- break;
-
- case LZS_FORMAT:
- zip_dbg("LZS Format: %d ", zip_ops->format);
- break;
-
- default:
- zip_err("Format error:%d\n", zip_ops->format);
- }
- }
-
- return 0;
-}
diff --git a/drivers/crypto/cavium/zip/zip_inflate.h b/drivers/crypto/cavium/zip/zip_inflate.h
deleted file mode 100644
index 6b20f179978e..000000000000
--- a/drivers/crypto/cavium/zip/zip_inflate.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#ifndef __ZIP_INFLATE_H__
-#define __ZIP_INFLATE_H__
-
-/**
- * zip_inflate - API to offload inflate operation to hardware
- * @zip_ops: Pointer to zip operation structure
- * @s: Pointer to the structure representing zip state
- * @zip_dev: Pointer to the structure representing zip device
- *
- * This function prepares the zip inflate command and submits it to the zip
- * engine for processing.
- *
- * Return: 0 if successful or error code
- */
-int zip_inflate(struct zip_operation *zip_ops, struct zip_state *s,
- struct zip_device *zip_dev);
-#endif
diff --git a/drivers/crypto/cavium/zip/zip_main.c b/drivers/crypto/cavium/zip/zip_main.c
deleted file mode 100644
index abd58de4343d..000000000000
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ /dev/null
@@ -1,603 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#include "common.h"
-#include "zip_crypto.h"
-
-#define DRV_NAME "ThunderX-ZIP"
-
-static struct zip_device *zip_dev[MAX_ZIP_DEVICES];
-
-static const struct pci_device_id zip_id_table[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDERX_ZIP) },
- { 0, }
-};
-
-static void zip_debugfs_init(void);
-static void zip_debugfs_exit(void);
-static int zip_register_compression_device(void);
-static void zip_unregister_compression_device(void);
-
-void zip_reg_write(u64 val, u64 __iomem *addr)
-{
- writeq(val, addr);
-}
-
-u64 zip_reg_read(u64 __iomem *addr)
-{
- return readq(addr);
-}
-
-/*
- * Allocates new ZIP device structure
- * Returns zip_device pointer or NULL if cannot allocate memory for zip_device
- */
-static struct zip_device *zip_alloc_device(struct pci_dev *pdev)
-{
- struct zip_device *zip = NULL;
- int idx;
-
- for (idx = 0; idx < MAX_ZIP_DEVICES; idx++) {
- if (!zip_dev[idx])
- break;
- }
-
- /* To ensure that the index is within the limit */
- if (idx < MAX_ZIP_DEVICES)
- zip = devm_kzalloc(&pdev->dev, sizeof(*zip), GFP_KERNEL);
-
- if (!zip)
- return NULL;
-
- zip_dev[idx] = zip;
- zip->index = idx;
- return zip;
-}
-
-/**
- * zip_get_device - Get ZIP device based on node id of cpu
- *
- * @node: Node id of the current cpu
- * Return: Pointer to Zip device structure
- */
-struct zip_device *zip_get_device(int node)
-{
- if ((node < MAX_ZIP_DEVICES) && (node >= 0))
- return zip_dev[node];
-
- zip_err("ZIP device not found for node id %d\n", node);
- return NULL;
-}
-
-/**
- * zip_get_node_id - Get the node id of the current cpu
- *
- * Return: Node id of the current cpu
- */
-int zip_get_node_id(void)
-{
- return cpu_to_node(raw_smp_processor_id());
-}
-
-/* Initializes the ZIP h/w sub-system */
-static int zip_init_hw(struct zip_device *zip)
-{
- union zip_cmd_ctl cmd_ctl;
- union zip_constants constants;
- union zip_que_ena que_ena;
- union zip_quex_map que_map;
- union zip_que_pri que_pri;
-
- union zip_quex_sbuf_addr que_sbuf_addr;
- union zip_quex_sbuf_ctl que_sbuf_ctl;
-
- int q = 0;
-
- /* Enable the ZIP Engine(Core) Clock */
- cmd_ctl.u_reg64 = zip_reg_read(zip->reg_base + ZIP_CMD_CTL);
- cmd_ctl.s.forceclk = 1;
- zip_reg_write(cmd_ctl.u_reg64 & 0xFF, (zip->reg_base + ZIP_CMD_CTL));
-
- zip_msg("ZIP_CMD_CTL : 0x%016llx",
- zip_reg_read(zip->reg_base + ZIP_CMD_CTL));
-
- constants.u_reg64 = zip_reg_read(zip->reg_base + ZIP_CONSTANTS);
- zip->depth = constants.s.depth;
- zip->onfsize = constants.s.onfsize;
- zip->ctxsize = constants.s.ctxsize;
-
- zip_msg("depth: 0x%016llx , onfsize : 0x%016llx , ctxsize : 0x%016llx",
- zip->depth, zip->onfsize, zip->ctxsize);
-
- /*
- * Program ZIP_QUE(0..7)_SBUF_ADDR and ZIP_QUE(0..7)_SBUF_CTL to
- * have the correct buffer pointer and size configured for each
- * instruction queue.
- */
- for (q = 0; q < ZIP_NUM_QUEUES; q++) {
- que_sbuf_ctl.u_reg64 = 0ull;
- que_sbuf_ctl.s.size = (ZIP_CMD_QBUF_SIZE / sizeof(u64));
- que_sbuf_ctl.s.inst_be = 0;
- que_sbuf_ctl.s.stream_id = 0;
- zip_reg_write(que_sbuf_ctl.u_reg64,
- (zip->reg_base + ZIP_QUEX_SBUF_CTL(q)));
-
- zip_msg("QUEX_SBUF_CTL[%d]: 0x%016llx", q,
- zip_reg_read(zip->reg_base + ZIP_QUEX_SBUF_CTL(q)));
- }
-
- for (q = 0; q < ZIP_NUM_QUEUES; q++) {
- memset(&zip->iq[q], 0x0, sizeof(struct zip_iq));
-
- spin_lock_init(&zip->iq[q].lock);
-
- if (zip_cmd_qbuf_alloc(zip, q)) {
- while (q != 0) {
- q--;
- zip_cmd_qbuf_free(zip, q);
- }
- return -ENOMEM;
- }
-
- /* Initialize tail ptr to head */
- zip->iq[q].sw_tail = zip->iq[q].sw_head;
- zip->iq[q].hw_tail = zip->iq[q].sw_head;
-
- /* Write the physical addr to register */
- que_sbuf_addr.u_reg64 = 0ull;
- que_sbuf_addr.s.ptr = (__pa(zip->iq[q].sw_head) >>
- ZIP_128B_ALIGN);
-
- zip_msg("QUE[%d]_PTR(PHYS): 0x%016llx", q,
- (u64)que_sbuf_addr.s.ptr);
-
- zip_reg_write(que_sbuf_addr.u_reg64,
- (zip->reg_base + ZIP_QUEX_SBUF_ADDR(q)));
-
- zip_msg("QUEX_SBUF_ADDR[%d]: 0x%016llx", q,
- zip_reg_read(zip->reg_base + ZIP_QUEX_SBUF_ADDR(q)));
-
- zip_dbg("sw_head :0x%lx sw_tail :0x%lx hw_tail :0x%lx",
- zip->iq[q].sw_head, zip->iq[q].sw_tail,
- zip->iq[q].hw_tail);
- zip_dbg("sw_head phy addr : 0x%lx", que_sbuf_addr.s.ptr);
- }
-
- /*
- * Queue-to-ZIP core mapping
- * If a queue is not mapped to a particular core, it is equivalent to
- * the ZIP core being disabled.
- */
- que_ena.u_reg64 = 0x0ull;
- /* Enabling queues based on ZIP_NUM_QUEUES */
- for (q = 0; q < ZIP_NUM_QUEUES; q++)
- que_ena.s.ena |= (0x1 << q);
- zip_reg_write(que_ena.u_reg64, (zip->reg_base + ZIP_QUE_ENA));
-
- zip_msg("QUE_ENA : 0x%016llx",
- zip_reg_read(zip->reg_base + ZIP_QUE_ENA));
-
- for (q = 0; q < ZIP_NUM_QUEUES; q++) {
- que_map.u_reg64 = 0ull;
- /* Mapping each queue to two ZIP cores */
- que_map.s.zce = 0x3;
- zip_reg_write(que_map.u_reg64,
- (zip->reg_base + ZIP_QUEX_MAP(q)));
-
- zip_msg("QUE_MAP(%d) : 0x%016llx", q,
- zip_reg_read(zip->reg_base + ZIP_QUEX_MAP(q)));
- }
-
- que_pri.u_reg64 = 0ull;
- for (q = 0; q < ZIP_NUM_QUEUES; q++)
- que_pri.s.pri |= (0x1 << q); /* Higher Priority RR */
- zip_reg_write(que_pri.u_reg64, (zip->reg_base + ZIP_QUE_PRI));
-
- zip_msg("QUE_PRI %016llx", zip_reg_read(zip->reg_base + ZIP_QUE_PRI));
-
- return 0;
-}
-
-static void zip_reset(struct zip_device *zip)
-{
- union zip_cmd_ctl cmd_ctl;
-
- cmd_ctl.u_reg64 = 0x0ull;
- cmd_ctl.s.reset = 1; /* Forces ZIP cores to do reset */
- zip_reg_write(cmd_ctl.u_reg64, (zip->reg_base + ZIP_CMD_CTL));
-}
-
-static int zip_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
- struct device *dev = &pdev->dev;
- struct zip_device *zip = NULL;
- int err;
-
- zip = zip_alloc_device(pdev);
- if (!zip)
- return -ENOMEM;
-
- dev_info(dev, "Found ZIP device %d %x:%x on Node %d\n", zip->index,
- pdev->vendor, pdev->device, dev_to_node(dev));
-
- pci_set_drvdata(pdev, zip);
- zip->pdev = pdev;
-
- err = pci_enable_device(pdev);
- if (err) {
- dev_err(dev, "Failed to enable PCI device");
- goto err_free_device;
- }
-
- err = pci_request_regions(pdev, DRV_NAME);
- if (err) {
- dev_err(dev, "PCI request regions failed 0x%x", err);
- goto err_disable_device;
- }
-
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
- if (err) {
- dev_err(dev, "Unable to get usable 48-bit DMA configuration\n");
- goto err_release_regions;
- }
-
- /* MAP configuration registers */
- zip->reg_base = pci_ioremap_bar(pdev, PCI_CFG_ZIP_PF_BAR0);
- if (!zip->reg_base) {
- dev_err(dev, "ZIP: Cannot map BAR0 CSR memory space, aborting");
- err = -ENOMEM;
- goto err_release_regions;
- }
-
- /* Initialize ZIP Hardware */
- err = zip_init_hw(zip);
- if (err)
- goto err_release_regions;
-
- /* Register with the Kernel Crypto Interface */
- err = zip_register_compression_device();
- if (err < 0) {
- zip_err("ZIP: Kernel Crypto Registration failed\n");
- goto err_register;
- }
-
- /* comp-decomp statistics are handled with debugfs interface */
- zip_debugfs_init();
-
- return 0;
-
-err_register:
- zip_reset(zip);
-
-err_release_regions:
- if (zip->reg_base)
- iounmap(zip->reg_base);
- pci_release_regions(pdev);
-
-err_disable_device:
- pci_disable_device(pdev);
-
-err_free_device:
- pci_set_drvdata(pdev, NULL);
-
- /* Remove zip_dev from zip_device list, free the zip_device memory */
- zip_dev[zip->index] = NULL;
- devm_kfree(dev, zip);
-
- return err;
-}
-
-static void zip_remove(struct pci_dev *pdev)
-{
- struct zip_device *zip = pci_get_drvdata(pdev);
- int q = 0;
-
- if (!zip)
- return;
-
- zip_debugfs_exit();
-
- zip_unregister_compression_device();
-
- if (zip->reg_base) {
- zip_reset(zip);
- iounmap(zip->reg_base);
- }
-
- pci_release_regions(pdev);
- pci_disable_device(pdev);
-
- /*
- * Free Command Queue buffers. This free should be called for all
- * the enabled Queues.
- */
- for (q = 0; q < ZIP_NUM_QUEUES; q++)
- zip_cmd_qbuf_free(zip, q);
-
- pci_set_drvdata(pdev, NULL);
- /* remove zip device from zip device list */
- zip_dev[zip->index] = NULL;
-}
-
-/* PCI Sub-System Interface */
-static struct pci_driver zip_driver = {
- .name = DRV_NAME,
- .id_table = zip_id_table,
- .probe = zip_probe,
- .remove = zip_remove,
-};
-
-/* Kernel Crypto Subsystem Interface */
-
-static struct scomp_alg zip_scomp_deflate = {
- .alloc_ctx = zip_alloc_scomp_ctx_deflate,
- .free_ctx = zip_free_scomp_ctx,
- .compress = zip_scomp_compress,
- .decompress = zip_scomp_decompress,
- .base = {
- .cra_name = "deflate",
- .cra_driver_name = "deflate-scomp-cavium",
- .cra_module = THIS_MODULE,
- .cra_priority = 300,
- }
-};
-
-static struct scomp_alg zip_scomp_lzs = {
- .alloc_ctx = zip_alloc_scomp_ctx_lzs,
- .free_ctx = zip_free_scomp_ctx,
- .compress = zip_scomp_compress,
- .decompress = zip_scomp_decompress,
- .base = {
- .cra_name = "lzs",
- .cra_driver_name = "lzs-scomp-cavium",
- .cra_module = THIS_MODULE,
- .cra_priority = 300,
- }
-};
-
-static int zip_register_compression_device(void)
-{
- int ret;
-
- ret = crypto_register_scomp(&zip_scomp_deflate);
- if (ret < 0) {
- zip_err("Deflate scomp algorithm registration failed\n");
- return ret;
- }
-
- ret = crypto_register_scomp(&zip_scomp_lzs);
- if (ret < 0) {
- zip_err("LZS scomp algorithm registration failed\n");
- goto err_unregister_scomp_deflate;
- }
-
- return ret;
-
-err_unregister_scomp_deflate:
- crypto_unregister_scomp(&zip_scomp_deflate);
-
- return ret;
-}
-
-static void zip_unregister_compression_device(void)
-{
- crypto_unregister_scomp(&zip_scomp_deflate);
- crypto_unregister_scomp(&zip_scomp_lzs);
-}
-
-/*
- * debugfs functions
- */
-#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-
-/* Displays ZIP device statistics */
-static int zip_stats_show(struct seq_file *s, void *unused)
-{
- u64 val = 0ull;
- u64 avg_chunk = 0ull, avg_cr = 0ull;
- u32 q = 0;
-
- int index = 0;
- struct zip_device *zip;
- struct zip_stats *st;
-
- for (index = 0; index < MAX_ZIP_DEVICES; index++) {
- u64 pending = 0;
-
- if (zip_dev[index]) {
- zip = zip_dev[index];
- st = &zip->stats;
-
- /* Get all the pending requests */
- for (q = 0; q < ZIP_NUM_QUEUES; q++) {
- val = zip_reg_read((zip->reg_base +
- ZIP_DBG_QUEX_STA(q)));
- pending += val >> 32 & 0xffffff;
- }
-
- val = atomic64_read(&st->comp_req_complete);
- avg_chunk = (val) ? atomic64_read(&st->comp_in_bytes) / val : 0;
-
- val = atomic64_read(&st->comp_out_bytes);
- avg_cr = (val) ? atomic64_read(&st->comp_in_bytes) / val : 0;
- seq_printf(s, " ZIP Device %d Stats\n"
- "-----------------------------------\n"
- "Comp Req Submitted : \t%lld\n"
- "Comp Req Completed : \t%lld\n"
- "Compress In Bytes : \t%lld\n"
- "Compressed Out Bytes : \t%lld\n"
- "Average Chunk size : \t%llu\n"
- "Average Compression ratio : \t%llu\n"
- "Decomp Req Submitted : \t%lld\n"
- "Decomp Req Completed : \t%lld\n"
- "Decompress In Bytes : \t%lld\n"
- "Decompressed Out Bytes : \t%lld\n"
- "Decompress Bad requests : \t%lld\n"
- "Pending Req : \t%lld\n"
- "---------------------------------\n",
- index,
- (u64)atomic64_read(&st->comp_req_submit),
- (u64)atomic64_read(&st->comp_req_complete),
- (u64)atomic64_read(&st->comp_in_bytes),
- (u64)atomic64_read(&st->comp_out_bytes),
- avg_chunk,
- avg_cr,
- (u64)atomic64_read(&st->decomp_req_submit),
- (u64)atomic64_read(&st->decomp_req_complete),
- (u64)atomic64_read(&st->decomp_in_bytes),
- (u64)atomic64_read(&st->decomp_out_bytes),
- (u64)atomic64_read(&st->decomp_bad_reqs),
- pending);
- }
- }
- return 0;
-}
-
-/* Clears stats data */
-static int zip_clear_show(struct seq_file *s, void *unused)
-{
- int index = 0;
-
- for (index = 0; index < MAX_ZIP_DEVICES; index++) {
- if (zip_dev[index]) {
- memset(&zip_dev[index]->stats, 0,
- sizeof(struct zip_stats));
- seq_printf(s, "Cleared stats for zip %d\n", index);
- }
- }
-
- return 0;
-}
-
-static struct zip_registers zipregs[64] = {
- {"ZIP_CMD_CTL ", 0x0000ull},
- {"ZIP_THROTTLE ", 0x0010ull},
- {"ZIP_CONSTANTS ", 0x00A0ull},
- {"ZIP_QUE0_MAP ", 0x1400ull},
- {"ZIP_QUE1_MAP ", 0x1408ull},
- {"ZIP_QUE_ENA ", 0x0500ull},
- {"ZIP_QUE_PRI ", 0x0508ull},
- {"ZIP_QUE0_DONE ", 0x2000ull},
- {"ZIP_QUE1_DONE ", 0x2008ull},
- {"ZIP_QUE0_DOORBELL ", 0x4000ull},
- {"ZIP_QUE1_DOORBELL ", 0x4008ull},
- {"ZIP_QUE0_SBUF_ADDR ", 0x1000ull},
- {"ZIP_QUE1_SBUF_ADDR ", 0x1008ull},
- {"ZIP_QUE0_SBUF_CTL ", 0x1200ull},
- {"ZIP_QUE1_SBUF_CTL ", 0x1208ull},
- { NULL, 0}
-};
-
-/* Prints registers' contents */
-static int zip_regs_show(struct seq_file *s, void *unused)
-{
- u64 val = 0;
- int i = 0, index = 0;
-
- for (index = 0; index < MAX_ZIP_DEVICES; index++) {
- if (zip_dev[index]) {
- seq_printf(s, "--------------------------------\n"
- " ZIP Device %d Registers\n"
- "--------------------------------\n",
- index);
-
- i = 0;
-
- while (zipregs[i].reg_name) {
- val = zip_reg_read((zip_dev[index]->reg_base +
- zipregs[i].reg_offset));
- seq_printf(s, "%s: 0x%016llx\n",
- zipregs[i].reg_name, val);
- i++;
- }
- }
- }
- return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(zip_stats);
-DEFINE_SHOW_ATTRIBUTE(zip_clear);
-DEFINE_SHOW_ATTRIBUTE(zip_regs);
-
-/* Root directory for thunderx_zip debugfs entry */
-static struct dentry *zip_debugfs_root;
-
-static void zip_debugfs_init(void)
-{
- if (!debugfs_initialized())
- return;
-
- zip_debugfs_root = debugfs_create_dir("thunderx_zip", NULL);
-
- /* Creating files for entries inside thunderx_zip directory */
- debugfs_create_file("zip_stats", 0444, zip_debugfs_root, NULL,
- &zip_stats_fops);
-
- debugfs_create_file("zip_clear", 0444, zip_debugfs_root, NULL,
- &zip_clear_fops);
-
- debugfs_create_file("zip_regs", 0444, zip_debugfs_root, NULL,
- &zip_regs_fops);
-
-}
-
-static void zip_debugfs_exit(void)
-{
- debugfs_remove_recursive(zip_debugfs_root);
-}
-
-#else
-static void __init zip_debugfs_init(void) { }
-static void __exit zip_debugfs_exit(void) { }
-#endif
-/* debugfs - end */
-
-module_pci_driver(zip_driver);
-
-MODULE_AUTHOR("Cavium Inc");
-MODULE_DESCRIPTION("Cavium Inc ThunderX ZIP Driver");
-MODULE_LICENSE("GPL v2");
-MODULE_DEVICE_TABLE(pci, zip_id_table);
diff --git a/drivers/crypto/cavium/zip/zip_main.h b/drivers/crypto/cavium/zip/zip_main.h
deleted file mode 100644
index e1e4fa92ce80..000000000000
--- a/drivers/crypto/cavium/zip/zip_main.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#ifndef __ZIP_MAIN_H__
-#define __ZIP_MAIN_H__
-
-#include "zip_device.h"
-#include "zip_regs.h"
-
-/* PCI device IDs */
-#define PCI_DEVICE_ID_THUNDERX_ZIP 0xA01A
-
-/* ZIP device BARs */
-#define PCI_CFG_ZIP_PF_BAR0 0 /* Base addr for normal regs */
-
-/* Maximum available zip queues */
-#define ZIP_MAX_NUM_QUEUES 8
-
-#define ZIP_128B_ALIGN 7
-
-/* Command queue buffer size */
-#define ZIP_CMD_QBUF_SIZE (8064 + 8)
-
-struct zip_registers {
- char *reg_name;
- u64 reg_offset;
-};
-
-/* ZIP Compression - Decompression stats */
-struct zip_stats {
- atomic64_t comp_req_submit;
- atomic64_t comp_req_complete;
- atomic64_t decomp_req_submit;
- atomic64_t decomp_req_complete;
- atomic64_t comp_in_bytes;
- atomic64_t comp_out_bytes;
- atomic64_t decomp_in_bytes;
- atomic64_t decomp_out_bytes;
- atomic64_t decomp_bad_reqs;
-};
-
-/* ZIP Instruction Queue */
-struct zip_iq {
- u64 *sw_head;
- u64 *sw_tail;
- u64 *hw_tail;
- u64 done_cnt;
- u64 pend_cnt;
- u64 free_flag;
-
- /* ZIP IQ lock */
- spinlock_t lock;
-};
-
-/* ZIP Device */
-struct zip_device {
- u32 index;
- void __iomem *reg_base;
- struct pci_dev *pdev;
-
- /* Different ZIP Constants */
- u64 depth;
- u64 onfsize;
- u64 ctxsize;
-
- struct zip_iq iq[ZIP_MAX_NUM_QUEUES];
- struct zip_stats stats;
-};
-
-/* Prototypes */
-struct zip_device *zip_get_device(int node_id);
-int zip_get_node_id(void);
-void zip_reg_write(u64 val, u64 __iomem *addr);
-u64 zip_reg_read(u64 __iomem *addr);
-void zip_update_cmd_bufs(struct zip_device *zip_dev, u32 queue);
-u32 zip_load_instr(union zip_inst_s *instr, struct zip_device *zip_dev);
-
-#endif /* ZIP_MAIN_H */
diff --git a/drivers/crypto/cavium/zip/zip_mem.c b/drivers/crypto/cavium/zip/zip_mem.c
deleted file mode 100644
index b3e0843a9169..000000000000
--- a/drivers/crypto/cavium/zip/zip_mem.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#include <linux/types.h>
-#include <linux/vmalloc.h>
-
-#include "common.h"
-
-/**
- * zip_cmd_qbuf_alloc - Allocates a cmd buffer for ZIP Instruction Queue
- * @zip: Pointer to zip device structure
- * @q: Queue number to allocate bufffer to
- * Return: 0 if successful, -ENOMEM otherwise
- */
-int zip_cmd_qbuf_alloc(struct zip_device *zip, int q)
-{
- zip->iq[q].sw_head = (u64 *)__get_free_pages((GFP_KERNEL | GFP_DMA),
- get_order(ZIP_CMD_QBUF_SIZE));
-
- if (!zip->iq[q].sw_head)
- return -ENOMEM;
-
- memset(zip->iq[q].sw_head, 0, ZIP_CMD_QBUF_SIZE);
-
- zip_dbg("cmd_qbuf_alloc[%d] Success : %p\n", q, zip->iq[q].sw_head);
- return 0;
-}
-
-/**
- * zip_cmd_qbuf_free - Frees the cmd Queue buffer
- * @zip: Pointer to zip device structure
- * @q: Queue number to free buffer of
- */
-void zip_cmd_qbuf_free(struct zip_device *zip, int q)
-{
- zip_dbg("Freeing cmd_qbuf 0x%lx\n", zip->iq[q].sw_tail);
-
- free_pages((u64)zip->iq[q].sw_tail, get_order(ZIP_CMD_QBUF_SIZE));
-}
-
-/**
- * zip_data_buf_alloc - Allocates memory for a data bufffer
- * @size: Size of the buffer to allocate
- * Returns: Pointer to the buffer allocated
- */
-u8 *zip_data_buf_alloc(u64 size)
-{
- u8 *ptr;
-
- ptr = (u8 *)__get_free_pages((GFP_KERNEL | GFP_DMA),
- get_order(size));
-
- if (!ptr)
- return NULL;
-
- memset(ptr, 0, size);
-
- zip_dbg("Data buffer allocation success\n");
- return ptr;
-}
-
-/**
- * zip_data_buf_free - Frees the memory of a data buffer
- * @ptr: Pointer to the buffer
- * @size: Buffer size
- */
-void zip_data_buf_free(u8 *ptr, u64 size)
-{
- zip_dbg("Freeing data buffer 0x%lx\n", ptr);
-
- free_pages((u64)ptr, get_order(size));
-}
diff --git a/drivers/crypto/cavium/zip/zip_mem.h b/drivers/crypto/cavium/zip/zip_mem.h
deleted file mode 100644
index f8f2f08c4a5c..000000000000
--- a/drivers/crypto/cavium/zip/zip_mem.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#ifndef __ZIP_MEM_H__
-#define __ZIP_MEM_H__
-
-/**
- * zip_cmd_qbuf_free - Frees the cmd Queue buffer
- * @zip: Pointer to zip device structure
- * @q: Queue nmber to free buffer of
- */
-void zip_cmd_qbuf_free(struct zip_device *zip, int q);
-
-/**
- * zip_cmd_qbuf_alloc - Allocates a Chunk/cmd buffer for ZIP Inst(cmd) Queue
- * @zip: Pointer to zip device structure
- * @q: Queue number to allocate bufffer to
- * Return: 0 if successful, 1 otherwise
- */
-int zip_cmd_qbuf_alloc(struct zip_device *zip, int q);
-
-/**
- * zip_data_buf_alloc - Allocates memory for a data bufffer
- * @size: Size of the buffer to allocate
- * Returns: Pointer to the buffer allocated
- */
-u8 *zip_data_buf_alloc(u64 size);
-
-/**
- * zip_data_buf_free - Frees the memory of a data buffer
- * @ptr: Pointer to the buffer
- * @size: Buffer size
- */
-void zip_data_buf_free(u8 *ptr, u64 size);
-
-#endif
diff --git a/drivers/crypto/cavium/zip/zip_regs.h b/drivers/crypto/cavium/zip/zip_regs.h
deleted file mode 100644
index 874e0236c87e..000000000000
--- a/drivers/crypto/cavium/zip/zip_regs.h
+++ /dev/null
@@ -1,1347 +0,0 @@
-/***********************license start************************************
- * Copyright (c) 2003-2017 Cavium, Inc.
- * All rights reserved.
- *
- * License: one of 'Cavium License' or 'GNU General Public License Version 2'
- *
- * This file is provided under the terms of the Cavium License (see below)
- * or under the terms of GNU General Public License, Version 2, as
- * published by the Free Software Foundation. When using or redistributing
- * this file, you may do so under either license.
- *
- * Cavium License: Redistribution and use in source and binary forms, with
- * or without modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * * Neither the name of Cavium Inc. nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * This Software, including technical data, may be subject to U.S. export
- * control laws, including the U.S. Export Administration Act and its
- * associated regulations, and may be subject to export or import
- * regulations in other countries.
- *
- * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
- * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS
- * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
- * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
- * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
- * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY)
- * WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A
- * PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET
- * ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE
- * ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES
- * WITH YOU.
- ***********************license end**************************************/
-
-#ifndef __ZIP_REGS_H__
-#define __ZIP_REGS_H__
-
-/*
- * Configuration and status register (CSR) address and type definitions for
- * Cavium ZIP.
- */
-
-#include <linux/kern_levels.h>
-
-/* ZIP invocation result completion status codes */
-#define ZIP_CMD_NOTDONE 0x0
-
-/* Successful completion. */
-#define ZIP_CMD_SUCCESS 0x1
-
-/* Output truncated */
-#define ZIP_CMD_DTRUNC 0x2
-
-/* Dynamic Stop */
-#define ZIP_CMD_DYNAMIC_STOP 0x3
-
-/* Uncompress ran out of input data when IWORD0[EF] was set */
-#define ZIP_CMD_ITRUNC 0x4
-
-/* Uncompress found the reserved block type 3 */
-#define ZIP_CMD_RBLOCK 0x5
-
-/*
- * Uncompress found LEN != ZIP_CMD_NLEN in an uncompressed block in the input.
- */
-#define ZIP_CMD_NLEN 0x6
-
-/* Uncompress found a bad code in the main Huffman codes. */
-#define ZIP_CMD_BADCODE 0x7
-
-/* Uncompress found a bad code in the 19 Huffman codes encoding lengths. */
-#define ZIP_CMD_BADCODE2 0x8
-
-/* Compress found a zero-length input. */
-#define ZIP_CMD_ZERO_LEN 0x9
-
-/* The compress or decompress encountered an internal parity error. */
-#define ZIP_CMD_PARITY 0xA
-
-/*
- * Uncompress found a string identifier that precedes the uncompressed data and
- * decompression history.
- */
-#define ZIP_CMD_FATAL 0xB
-
-/**
- * enum zip_int_vec_e - ZIP MSI-X Vector Enumeration, enumerates the MSI-X
- * interrupt vectors.
- */
-enum zip_int_vec_e {
- ZIP_INT_VEC_E_ECCE = 0x10,
- ZIP_INT_VEC_E_FIFE = 0x11,
- ZIP_INT_VEC_E_QUE0_DONE = 0x0,
- ZIP_INT_VEC_E_QUE0_ERR = 0x8,
- ZIP_INT_VEC_E_QUE1_DONE = 0x1,
- ZIP_INT_VEC_E_QUE1_ERR = 0x9,
- ZIP_INT_VEC_E_QUE2_DONE = 0x2,
- ZIP_INT_VEC_E_QUE2_ERR = 0xa,
- ZIP_INT_VEC_E_QUE3_DONE = 0x3,
- ZIP_INT_VEC_E_QUE3_ERR = 0xb,
- ZIP_INT_VEC_E_QUE4_DONE = 0x4,
- ZIP_INT_VEC_E_QUE4_ERR = 0xc,
- ZIP_INT_VEC_E_QUE5_DONE = 0x5,
- ZIP_INT_VEC_E_QUE5_ERR = 0xd,
- ZIP_INT_VEC_E_QUE6_DONE = 0x6,
- ZIP_INT_VEC_E_QUE6_ERR = 0xe,
- ZIP_INT_VEC_E_QUE7_DONE = 0x7,
- ZIP_INT_VEC_E_QUE7_ERR = 0xf,
- ZIP_INT_VEC_E_ENUM_LAST = 0x12,
-};
-
-/**
- * union zip_zptr_addr_s - ZIP Generic Pointer Structure for ADDR.
- *
- * It is the generic format of pointers in ZIP_INST_S.
- */
-union zip_zptr_addr_s {
- u64 u_reg64;
- struct {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_49_63 : 15;
- u64 addr : 49;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 addr : 49;
- u64 reserved_49_63 : 15;
-#endif
- } s;
-
-};
-
-/**
- * union zip_zptr_ctl_s - ZIP Generic Pointer Structure for CTL.
- *
- * It is the generic format of pointers in ZIP_INST_S.
- */
-union zip_zptr_ctl_s {
- u64 u_reg64;
- struct {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_112_127 : 16;
- u64 length : 16;
- u64 reserved_67_95 : 29;
- u64 fw : 1;
- u64 nc : 1;
- u64 data_be : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 data_be : 1;
- u64 nc : 1;
- u64 fw : 1;
- u64 reserved_67_95 : 29;
- u64 length : 16;
- u64 reserved_112_127 : 16;
-#endif
- } s;
-};
-
-/**
- * union zip_inst_s - ZIP Instruction Structure.
- * Each ZIP instruction has 16 words (they are called IWORD0 to IWORD15 within
- * the structure).
- */
-union zip_inst_s {
- u64 u_reg64[16];
- struct {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 doneint : 1;
- u64 reserved_56_62 : 7;
- u64 totaloutputlength : 24;
- u64 reserved_27_31 : 5;
- u64 exn : 3;
- u64 reserved_23_23 : 1;
- u64 exbits : 7;
- u64 reserved_12_15 : 4;
- u64 sf : 1;
- u64 ss : 2;
- u64 cc : 2;
- u64 ef : 1;
- u64 bf : 1;
- u64 ce : 1;
- u64 reserved_3_3 : 1;
- u64 ds : 1;
- u64 dg : 1;
- u64 hg : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 hg : 1;
- u64 dg : 1;
- u64 ds : 1;
- u64 reserved_3_3 : 1;
- u64 ce : 1;
- u64 bf : 1;
- u64 ef : 1;
- u64 cc : 2;
- u64 ss : 2;
- u64 sf : 1;
- u64 reserved_12_15 : 4;
- u64 exbits : 7;
- u64 reserved_23_23 : 1;
- u64 exn : 3;
- u64 reserved_27_31 : 5;
- u64 totaloutputlength : 24;
- u64 reserved_56_62 : 7;
- u64 doneint : 1;
-#endif
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 historylength : 16;
- u64 reserved_96_111 : 16;
- u64 adlercrc32 : 32;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 adlercrc32 : 32;
- u64 reserved_96_111 : 16;
- u64 historylength : 16;
-#endif
- union zip_zptr_addr_s ctx_ptr_addr;
- union zip_zptr_ctl_s ctx_ptr_ctl;
- union zip_zptr_addr_s his_ptr_addr;
- union zip_zptr_ctl_s his_ptr_ctl;
- union zip_zptr_addr_s inp_ptr_addr;
- union zip_zptr_ctl_s inp_ptr_ctl;
- union zip_zptr_addr_s out_ptr_addr;
- union zip_zptr_ctl_s out_ptr_ctl;
- union zip_zptr_addr_s res_ptr_addr;
- union zip_zptr_ctl_s res_ptr_ctl;
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_817_831 : 15;
- u64 wq_ptr : 49;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 wq_ptr : 49;
- u64 reserved_817_831 : 15;
-#endif
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_882_895 : 14;
- u64 tt : 2;
- u64 reserved_874_879 : 6;
- u64 grp : 10;
- u64 tag : 32;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 tag : 32;
- u64 grp : 10;
- u64 reserved_874_879 : 6;
- u64 tt : 2;
- u64 reserved_882_895 : 14;
-#endif
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_896_959 : 64;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 reserved_896_959 : 64;
-#endif
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_960_1023 : 64;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 reserved_960_1023 : 64;
-#endif
- } s;
-};
-
-/**
- * union zip_nptr_s - ZIP Instruction Next-Chunk-Buffer Pointer (NPTR)
- * Structure
- *
- * ZIP_NPTR structure is used to chain all the zip instruction buffers
- * together. ZIP instruction buffers are managed (allocated and released) by
- * the software.
- */
-union zip_nptr_s {
- u64 u_reg64;
- struct {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_49_63 : 15;
- u64 addr : 49;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 addr : 49;
- u64 reserved_49_63 : 15;
-#endif
- } s;
-};
-
-/**
- * union zip_zptr_s - ZIP Generic Pointer Structure.
- *
- * It is the generic format of pointers in ZIP_INST_S.
- */
-union zip_zptr_s {
- u64 u_reg64[2];
- struct {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_49_63 : 15;
- u64 addr : 49;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 addr : 49;
- u64 reserved_49_63 : 15;
-#endif
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_112_127 : 16;
- u64 length : 16;
- u64 reserved_67_95 : 29;
- u64 fw : 1;
- u64 nc : 1;
- u64 data_be : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 data_be : 1;
- u64 nc : 1;
- u64 fw : 1;
- u64 reserved_67_95 : 29;
- u64 length : 16;
- u64 reserved_112_127 : 16;
-#endif
- } s;
-};
-
-/**
- * union zip_zres_s - ZIP Result Structure
- *
- * The ZIP coprocessor writes the result structure after it completes the
- * invocation. The result structure is exactly 24 bytes, and each invocation of
- * the ZIP coprocessor produces exactly one result structure.
- */
-union zip_zres_s {
- u64 u_reg64[3];
- struct {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 crc32 : 32;
- u64 adler32 : 32;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 adler32 : 32;
- u64 crc32 : 32;
-#endif
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 totalbyteswritten : 32;
- u64 totalbytesread : 32;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 totalbytesread : 32;
- u64 totalbyteswritten : 32;
-#endif
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 totalbitsprocessed : 32;
- u64 doneint : 1;
- u64 reserved_155_158 : 4;
- u64 exn : 3;
- u64 reserved_151_151 : 1;
- u64 exbits : 7;
- u64 reserved_137_143 : 7;
- u64 ef : 1;
-
- volatile u64 compcode : 8;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
-
- volatile u64 compcode : 8;
- u64 ef : 1;
- u64 reserved_137_143 : 7;
- u64 exbits : 7;
- u64 reserved_151_151 : 1;
- u64 exn : 3;
- u64 reserved_155_158 : 4;
- u64 doneint : 1;
- u64 totalbitsprocessed : 32;
-#endif
- } s;
-};
-
-/**
- * union zip_cmd_ctl - Structure representing the register that controls
- * clock and reset.
- */
-union zip_cmd_ctl {
- u64 u_reg64;
- struct zip_cmd_ctl_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_2_63 : 62;
- u64 forceclk : 1;
- u64 reset : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 reset : 1;
- u64 forceclk : 1;
- u64 reserved_2_63 : 62;
-#endif
- } s;
-};
-
-#define ZIP_CMD_CTL 0x0ull
-
-/**
- * union zip_constants - Data structure representing the register that contains
- * all of the current implementation-related parameters of the zip core in this
- * chip.
- */
-union zip_constants {
- u64 u_reg64;
- struct zip_constants_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 nexec : 8;
- u64 reserved_49_55 : 7;
- u64 syncflush_capable : 1;
- u64 depth : 16;
- u64 onfsize : 12;
- u64 ctxsize : 12;
- u64 reserved_1_7 : 7;
- u64 disabled : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 disabled : 1;
- u64 reserved_1_7 : 7;
- u64 ctxsize : 12;
- u64 onfsize : 12;
- u64 depth : 16;
- u64 syncflush_capable : 1;
- u64 reserved_49_55 : 7;
- u64 nexec : 8;
-#endif
- } s;
-};
-
-#define ZIP_CONSTANTS 0x00A0ull
-
-/**
- * union zip_corex_bist_status - Represents registers which have the BIST
- * status of memories in zip cores.
- *
- * Each bit is the BIST result of an individual memory
- * (per bit, 0 = pass and 1 = fail).
- */
-union zip_corex_bist_status {
- u64 u_reg64;
- struct zip_corex_bist_status_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_53_63 : 11;
- u64 bstatus : 53;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 bstatus : 53;
- u64 reserved_53_63 : 11;
-#endif
- } s;
-};
-
-static inline u64 ZIP_COREX_BIST_STATUS(u64 param1)
-{
- if (param1 <= 1)
- return 0x0520ull + (param1 & 1) * 0x8ull;
- pr_err("ZIP_COREX_BIST_STATUS: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_ctl_bist_status - Represents register that has the BIST status of
- * memories in ZIP_CTL (instruction buffer, G/S pointer FIFO, input data
- * buffer, output data buffers).
- *
- * Each bit is the BIST result of an individual memory
- * (per bit, 0 = pass and 1 = fail).
- */
-union zip_ctl_bist_status {
- u64 u_reg64;
- struct zip_ctl_bist_status_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_9_63 : 55;
- u64 bstatus : 9;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 bstatus : 9;
- u64 reserved_9_63 : 55;
-#endif
- } s;
-};
-
-#define ZIP_CTL_BIST_STATUS 0x0510ull
-
-/**
- * union zip_ctl_cfg - Represents the register that controls the behavior of
- * the ZIP DMA engines.
- *
- * It is recommended to keep default values for normal operation. Changing the
- * values of the fields may be useful for diagnostics.
- */
-union zip_ctl_cfg {
- u64 u_reg64;
- struct zip_ctl_cfg_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_52_63 : 12;
- u64 ildf : 4;
- u64 reserved_36_47 : 12;
- u64 drtf : 4;
- u64 reserved_27_31 : 5;
- u64 stcf : 3;
- u64 reserved_19_23 : 5;
- u64 ldf : 3;
- u64 reserved_2_15 : 14;
- u64 busy : 1;
- u64 reserved_0_0 : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 reserved_0_0 : 1;
- u64 busy : 1;
- u64 reserved_2_15 : 14;
- u64 ldf : 3;
- u64 reserved_19_23 : 5;
- u64 stcf : 3;
- u64 reserved_27_31 : 5;
- u64 drtf : 4;
- u64 reserved_36_47 : 12;
- u64 ildf : 4;
- u64 reserved_52_63 : 12;
-#endif
- } s;
-};
-
-#define ZIP_CTL_CFG 0x0560ull
-
-/**
- * union zip_dbg_corex_inst - Represents the registers that reflect the status
- * of the current instruction that the ZIP core is executing or has executed.
- *
- * These registers are only for debug use.
- */
-union zip_dbg_corex_inst {
- u64 u_reg64;
- struct zip_dbg_corex_inst_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 busy : 1;
- u64 reserved_35_62 : 28;
- u64 qid : 3;
- u64 iid : 32;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 iid : 32;
- u64 qid : 3;
- u64 reserved_35_62 : 28;
- u64 busy : 1;
-#endif
- } s;
-};
-
-static inline u64 ZIP_DBG_COREX_INST(u64 param1)
-{
- if (param1 <= 1)
- return 0x0640ull + (param1 & 1) * 0x8ull;
- pr_err("ZIP_DBG_COREX_INST: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_dbg_corex_sta - Represents registers that reflect the status of
- * the zip cores.
- *
- * They are for debug use only.
- */
-union zip_dbg_corex_sta {
- u64 u_reg64;
- struct zip_dbg_corex_sta_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 busy : 1;
- u64 reserved_37_62 : 26;
- u64 ist : 5;
- u64 nie : 32;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 nie : 32;
- u64 ist : 5;
- u64 reserved_37_62 : 26;
- u64 busy : 1;
-#endif
- } s;
-};
-
-static inline u64 ZIP_DBG_COREX_STA(u64 param1)
-{
- if (param1 <= 1)
- return 0x0680ull + (param1 & 1) * 0x8ull;
- pr_err("ZIP_DBG_COREX_STA: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_dbg_quex_sta - Represets registers that reflect status of the zip
- * instruction queues.
- *
- * They are for debug use only.
- */
-union zip_dbg_quex_sta {
- u64 u_reg64;
- struct zip_dbg_quex_sta_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 busy : 1;
- u64 reserved_56_62 : 7;
- u64 rqwc : 24;
- u64 nii : 32;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 nii : 32;
- u64 rqwc : 24;
- u64 reserved_56_62 : 7;
- u64 busy : 1;
-#endif
- } s;
-};
-
-static inline u64 ZIP_DBG_QUEX_STA(u64 param1)
-{
- if (param1 <= 7)
- return 0x1800ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_DBG_QUEX_STA: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_ecc_ctl - Represents the register that enables ECC for each
- * individual internal memory that requires ECC.
- *
- * For debug purpose, it can also flip one or two bits in the ECC data.
- */
-union zip_ecc_ctl {
- u64 u_reg64;
- struct zip_ecc_ctl_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_19_63 : 45;
- u64 vmem_cdis : 1;
- u64 vmem_fs : 2;
- u64 reserved_15_15 : 1;
- u64 idf1_cdis : 1;
- u64 idf1_fs : 2;
- u64 reserved_11_11 : 1;
- u64 idf0_cdis : 1;
- u64 idf0_fs : 2;
- u64 reserved_7_7 : 1;
- u64 gspf_cdis : 1;
- u64 gspf_fs : 2;
- u64 reserved_3_3 : 1;
- u64 iqf_cdis : 1;
- u64 iqf_fs : 2;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 iqf_fs : 2;
- u64 iqf_cdis : 1;
- u64 reserved_3_3 : 1;
- u64 gspf_fs : 2;
- u64 gspf_cdis : 1;
- u64 reserved_7_7 : 1;
- u64 idf0_fs : 2;
- u64 idf0_cdis : 1;
- u64 reserved_11_11 : 1;
- u64 idf1_fs : 2;
- u64 idf1_cdis : 1;
- u64 reserved_15_15 : 1;
- u64 vmem_fs : 2;
- u64 vmem_cdis : 1;
- u64 reserved_19_63 : 45;
-#endif
- } s;
-};
-
-#define ZIP_ECC_CTL 0x0568ull
-
-/* NCB - zip_ecce_ena_w1c */
-union zip_ecce_ena_w1c {
- u64 u_reg64;
- struct zip_ecce_ena_w1c_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_37_63 : 27;
- u64 dbe : 5;
- u64 reserved_5_31 : 27;
- u64 sbe : 5;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 sbe : 5;
- u64 reserved_5_31 : 27;
- u64 dbe : 5;
- u64 reserved_37_63 : 27;
-#endif
- } s;
-};
-
-#define ZIP_ECCE_ENA_W1C 0x0598ull
-
-/* NCB - zip_ecce_ena_w1s */
-union zip_ecce_ena_w1s {
- u64 u_reg64;
- struct zip_ecce_ena_w1s_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_37_63 : 27;
- u64 dbe : 5;
- u64 reserved_5_31 : 27;
- u64 sbe : 5;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 sbe : 5;
- u64 reserved_5_31 : 27;
- u64 dbe : 5;
- u64 reserved_37_63 : 27;
-#endif
- } s;
-};
-
-#define ZIP_ECCE_ENA_W1S 0x0590ull
-
-/**
- * union zip_ecce_int - Represents the register that contains the status of the
- * ECC interrupt sources.
- */
-union zip_ecce_int {
- u64 u_reg64;
- struct zip_ecce_int_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_37_63 : 27;
- u64 dbe : 5;
- u64 reserved_5_31 : 27;
- u64 sbe : 5;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 sbe : 5;
- u64 reserved_5_31 : 27;
- u64 dbe : 5;
- u64 reserved_37_63 : 27;
-#endif
- } s;
-};
-
-#define ZIP_ECCE_INT 0x0580ull
-
-/* NCB - zip_ecce_int_w1s */
-union zip_ecce_int_w1s {
- u64 u_reg64;
- struct zip_ecce_int_w1s_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_37_63 : 27;
- u64 dbe : 5;
- u64 reserved_5_31 : 27;
- u64 sbe : 5;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 sbe : 5;
- u64 reserved_5_31 : 27;
- u64 dbe : 5;
- u64 reserved_37_63 : 27;
-#endif
- } s;
-};
-
-#define ZIP_ECCE_INT_W1S 0x0588ull
-
-/* NCB - zip_fife_ena_w1c */
-union zip_fife_ena_w1c {
- u64 u_reg64;
- struct zip_fife_ena_w1c_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_42_63 : 22;
- u64 asserts : 42;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 asserts : 42;
- u64 reserved_42_63 : 22;
-#endif
- } s;
-};
-
-#define ZIP_FIFE_ENA_W1C 0x0090ull
-
-/* NCB - zip_fife_ena_w1s */
-union zip_fife_ena_w1s {
- u64 u_reg64;
- struct zip_fife_ena_w1s_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_42_63 : 22;
- u64 asserts : 42;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 asserts : 42;
- u64 reserved_42_63 : 22;
-#endif
- } s;
-};
-
-#define ZIP_FIFE_ENA_W1S 0x0088ull
-
-/* NCB - zip_fife_int */
-union zip_fife_int {
- u64 u_reg64;
- struct zip_fife_int_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_42_63 : 22;
- u64 asserts : 42;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 asserts : 42;
- u64 reserved_42_63 : 22;
-#endif
- } s;
-};
-
-#define ZIP_FIFE_INT 0x0078ull
-
-/* NCB - zip_fife_int_w1s */
-union zip_fife_int_w1s {
- u64 u_reg64;
- struct zip_fife_int_w1s_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_42_63 : 22;
- u64 asserts : 42;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 asserts : 42;
- u64 reserved_42_63 : 22;
-#endif
- } s;
-};
-
-#define ZIP_FIFE_INT_W1S 0x0080ull
-
-/**
- * union zip_msix_pbax - Represents the register that is the MSI-X PBA table
- *
- * The bit number is indexed by the ZIP_INT_VEC_E enumeration.
- */
-union zip_msix_pbax {
- u64 u_reg64;
- struct zip_msix_pbax_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 pend : 64;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 pend : 64;
-#endif
- } s;
-};
-
-static inline u64 ZIP_MSIX_PBAX(u64 param1)
-{
- if (param1 == 0)
- return 0x0000838000FF0000ull;
- pr_err("ZIP_MSIX_PBAX: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_msix_vecx_addr - Represents the register that is the MSI-X vector
- * table, indexed by the ZIP_INT_VEC_E enumeration.
- */
-union zip_msix_vecx_addr {
- u64 u_reg64;
- struct zip_msix_vecx_addr_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_49_63 : 15;
- u64 addr : 47;
- u64 reserved_1_1 : 1;
- u64 secvec : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 secvec : 1;
- u64 reserved_1_1 : 1;
- u64 addr : 47;
- u64 reserved_49_63 : 15;
-#endif
- } s;
-};
-
-static inline u64 ZIP_MSIX_VECX_ADDR(u64 param1)
-{
- if (param1 <= 17)
- return 0x0000838000F00000ull + (param1 & 31) * 0x10ull;
- pr_err("ZIP_MSIX_VECX_ADDR: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_msix_vecx_ctl - Represents the register that is the MSI-X vector
- * table, indexed by the ZIP_INT_VEC_E enumeration.
- */
-union zip_msix_vecx_ctl {
- u64 u_reg64;
- struct zip_msix_vecx_ctl_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_33_63 : 31;
- u64 mask : 1;
- u64 reserved_20_31 : 12;
- u64 data : 20;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 data : 20;
- u64 reserved_20_31 : 12;
- u64 mask : 1;
- u64 reserved_33_63 : 31;
-#endif
- } s;
-};
-
-static inline u64 ZIP_MSIX_VECX_CTL(u64 param1)
-{
- if (param1 <= 17)
- return 0x0000838000F00008ull + (param1 & 31) * 0x10ull;
- pr_err("ZIP_MSIX_VECX_CTL: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_quex_done - Represents the registers that contain the per-queue
- * instruction done count.
- */
-union zip_quex_done {
- u64 u_reg64;
- struct zip_quex_done_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_20_63 : 44;
- u64 done : 20;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 done : 20;
- u64 reserved_20_63 : 44;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_DONE(u64 param1)
-{
- if (param1 <= 7)
- return 0x2000ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_DONE: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_quex_done_ack - Represents the registers on write to which will
- * decrement the per-queue instructiona done count.
- */
-union zip_quex_done_ack {
- u64 u_reg64;
- struct zip_quex_done_ack_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_20_63 : 44;
- u64 done_ack : 20;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 done_ack : 20;
- u64 reserved_20_63 : 44;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_DONE_ACK(u64 param1)
-{
- if (param1 <= 7)
- return 0x2200ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_DONE_ACK: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_quex_done_ena_w1c - Represents the register which when written
- * 1 to will disable the DONEINT interrupt for the queue.
- */
-union zip_quex_done_ena_w1c {
- u64 u_reg64;
- struct zip_quex_done_ena_w1c_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_1_63 : 63;
- u64 done_ena : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 done_ena : 1;
- u64 reserved_1_63 : 63;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_DONE_ENA_W1C(u64 param1)
-{
- if (param1 <= 7)
- return 0x2600ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_DONE_ENA_W1C: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_quex_done_ena_w1s - Represents the register that when written 1 to
- * will enable the DONEINT interrupt for the queue.
- */
-union zip_quex_done_ena_w1s {
- u64 u_reg64;
- struct zip_quex_done_ena_w1s_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_1_63 : 63;
- u64 done_ena : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 done_ena : 1;
- u64 reserved_1_63 : 63;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_DONE_ENA_W1S(u64 param1)
-{
- if (param1 <= 7)
- return 0x2400ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_DONE_ENA_W1S: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_quex_done_wait - Represents the register that specifies the per
- * queue interrupt coalescing settings.
- */
-union zip_quex_done_wait {
- u64 u_reg64;
- struct zip_quex_done_wait_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_48_63 : 16;
- u64 time_wait : 16;
- u64 reserved_20_31 : 12;
- u64 num_wait : 20;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 num_wait : 20;
- u64 reserved_20_31 : 12;
- u64 time_wait : 16;
- u64 reserved_48_63 : 16;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_DONE_WAIT(u64 param1)
-{
- if (param1 <= 7)
- return 0x2800ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_DONE_WAIT: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_quex_doorbell - Represents doorbell registers for the ZIP
- * instruction queues.
- */
-union zip_quex_doorbell {
- u64 u_reg64;
- struct zip_quex_doorbell_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_20_63 : 44;
- u64 dbell_cnt : 20;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 dbell_cnt : 20;
- u64 reserved_20_63 : 44;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_DOORBELL(u64 param1)
-{
- if (param1 <= 7)
- return 0x4000ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_DOORBELL: %llu\n", param1);
- return 0;
-}
-
-union zip_quex_err_ena_w1c {
- u64 u_reg64;
- struct zip_quex_err_ena_w1c_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_5_63 : 59;
- u64 mdbe : 1;
- u64 nwrp : 1;
- u64 nrrp : 1;
- u64 irde : 1;
- u64 dovf : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 dovf : 1;
- u64 irde : 1;
- u64 nrrp : 1;
- u64 nwrp : 1;
- u64 mdbe : 1;
- u64 reserved_5_63 : 59;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_ERR_ENA_W1C(u64 param1)
-{
- if (param1 <= 7)
- return 0x3600ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_ERR_ENA_W1C: %llu\n", param1);
- return 0;
-}
-
-union zip_quex_err_ena_w1s {
- u64 u_reg64;
- struct zip_quex_err_ena_w1s_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_5_63 : 59;
- u64 mdbe : 1;
- u64 nwrp : 1;
- u64 nrrp : 1;
- u64 irde : 1;
- u64 dovf : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 dovf : 1;
- u64 irde : 1;
- u64 nrrp : 1;
- u64 nwrp : 1;
- u64 mdbe : 1;
- u64 reserved_5_63 : 59;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_ERR_ENA_W1S(u64 param1)
-{
- if (param1 <= 7)
- return 0x3400ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_ERR_ENA_W1S: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_quex_err_int - Represents registers that contain the per-queue
- * error interrupts.
- */
-union zip_quex_err_int {
- u64 u_reg64;
- struct zip_quex_err_int_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_5_63 : 59;
- u64 mdbe : 1;
- u64 nwrp : 1;
- u64 nrrp : 1;
- u64 irde : 1;
- u64 dovf : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 dovf : 1;
- u64 irde : 1;
- u64 nrrp : 1;
- u64 nwrp : 1;
- u64 mdbe : 1;
- u64 reserved_5_63 : 59;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_ERR_INT(u64 param1)
-{
- if (param1 <= 7)
- return 0x3000ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_ERR_INT: %llu\n", param1);
- return 0;
-}
-
-/* NCB - zip_que#_err_int_w1s */
-union zip_quex_err_int_w1s {
- u64 u_reg64;
- struct zip_quex_err_int_w1s_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_5_63 : 59;
- u64 mdbe : 1;
- u64 nwrp : 1;
- u64 nrrp : 1;
- u64 irde : 1;
- u64 dovf : 1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 dovf : 1;
- u64 irde : 1;
- u64 nrrp : 1;
- u64 nwrp : 1;
- u64 mdbe : 1;
- u64 reserved_5_63 : 59;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_ERR_INT_W1S(u64 param1)
-{
- if (param1 <= 7)
- return 0x3200ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_ERR_INT_W1S: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_quex_gcfg - Represents the registers that reflect status of the
- * zip instruction queues,debug use only.
- */
-union zip_quex_gcfg {
- u64 u_reg64;
- struct zip_quex_gcfg_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_4_63 : 60;
- u64 iqb_ldwb : 1;
- u64 cbw_sty : 1;
- u64 l2ld_cmd : 2;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 l2ld_cmd : 2;
- u64 cbw_sty : 1;
- u64 iqb_ldwb : 1;
- u64 reserved_4_63 : 60;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_GCFG(u64 param1)
-{
- if (param1 <= 7)
- return 0x1A00ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_GCFG: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_quex_map - Represents the registers that control how each
- * instruction queue maps to zip cores.
- */
-union zip_quex_map {
- u64 u_reg64;
- struct zip_quex_map_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_2_63 : 62;
- u64 zce : 2;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 zce : 2;
- u64 reserved_2_63 : 62;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_MAP(u64 param1)
-{
- if (param1 <= 7)
- return 0x1400ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_MAP: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_quex_sbuf_addr - Represents the registers that set the buffer
- * parameters for the instruction queues.
- *
- * When quiescent (i.e. outstanding doorbell count is 0), it is safe to rewrite
- * this register to effectively reset the command buffer state machine.
- * These registers must be programmed after SW programs the corresponding
- * ZIP_QUE(0..7)_SBUF_CTL.
- */
-union zip_quex_sbuf_addr {
- u64 u_reg64;
- struct zip_quex_sbuf_addr_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_49_63 : 15;
- u64 ptr : 42;
- u64 off : 7;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 off : 7;
- u64 ptr : 42;
- u64 reserved_49_63 : 15;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_SBUF_ADDR(u64 param1)
-{
- if (param1 <= 7)
- return 0x1000ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_SBUF_ADDR: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_quex_sbuf_ctl - Represents the registers that set the buffer
- * parameters for the instruction queues.
- *
- * When quiescent (i.e. outstanding doorbell count is 0), it is safe to rewrite
- * this register to effectively reset the command buffer state machine.
- * These registers must be programmed before SW programs the corresponding
- * ZIP_QUE(0..7)_SBUF_ADDR.
- */
-union zip_quex_sbuf_ctl {
- u64 u_reg64;
- struct zip_quex_sbuf_ctl_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_45_63 : 19;
- u64 size : 13;
- u64 inst_be : 1;
- u64 reserved_24_30 : 7;
- u64 stream_id : 8;
- u64 reserved_12_15 : 4;
- u64 aura : 12;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 aura : 12;
- u64 reserved_12_15 : 4;
- u64 stream_id : 8;
- u64 reserved_24_30 : 7;
- u64 inst_be : 1;
- u64 size : 13;
- u64 reserved_45_63 : 19;
-#endif
- } s;
-};
-
-static inline u64 ZIP_QUEX_SBUF_CTL(u64 param1)
-{
- if (param1 <= 7)
- return 0x1200ull + (param1 & 7) * 0x8ull;
- pr_err("ZIP_QUEX_SBUF_CTL: %llu\n", param1);
- return 0;
-}
-
-/**
- * union zip_que_ena - Represents queue enable register
- *
- * If a queue is disabled, ZIP_CTL stops fetching instructions from the queue.
- */
-union zip_que_ena {
- u64 u_reg64;
- struct zip_que_ena_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_8_63 : 56;
- u64 ena : 8;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 ena : 8;
- u64 reserved_8_63 : 56;
-#endif
- } s;
-};
-
-#define ZIP_QUE_ENA 0x0500ull
-
-/**
- * union zip_que_pri - Represents the register that defines the priority
- * between instruction queues.
- */
-union zip_que_pri {
- u64 u_reg64;
- struct zip_que_pri_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_8_63 : 56;
- u64 pri : 8;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 pri : 8;
- u64 reserved_8_63 : 56;
-#endif
- } s;
-};
-
-#define ZIP_QUE_PRI 0x0508ull
-
-/**
- * union zip_throttle - Represents the register that controls the maximum
- * number of in-flight X2I data fetch transactions.
- *
- * Writing 0 to this register causes the ZIP module to temporarily suspend NCB
- * accesses; it is not recommended for normal operation, but may be useful for
- * diagnostics.
- */
-union zip_throttle {
- u64 u_reg64;
- struct zip_throttle_s {
-#if defined(__BIG_ENDIAN_BITFIELD)
- u64 reserved_6_63 : 58;
- u64 ld_infl : 6;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u64 ld_infl : 6;
- u64 reserved_6_63 : 58;
-#endif
- } s;
-};
-
-#define ZIP_THROTTLE 0x0010ull
-
-#endif /* _CSRS_ZIP__ */
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
index d11daaf47f06..685d42ec7ade 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -7,15 +7,16 @@
* Author: Tom Lendacky <thomas.lendacky@amd.com>
*/
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <linux/scatterlist.h>
-#include <linux/crypto.h>
-#include <crypto/algapi.h>
#include <crypto/aes.h>
#include <crypto/ctr.h>
-#include <crypto/scatterwalk.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
#include "ccp-crypto.h"
diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c
index afae30adb703..91b1189c47de 100644
--- a/drivers/crypto/ccp/ccp-crypto-des3.c
+++ b/drivers/crypto/ccp/ccp-crypto-des3.c
@@ -7,14 +7,15 @@
* Author: Gary R Hook <ghook@amd.com>
*/
+#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
#include <linux/scatterlist.h>
-#include <linux/crypto.h>
-#include <crypto/algapi.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/internal/des.h>
+#include <linux/slab.h>
+#include <linux/string.h>
#include "ccp-crypto.h"
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index ecd58b38c46e..bc90aba5162a 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -7,14 +7,17 @@
* Author: Tom Lendacky <thomas.lendacky@amd.com>
*/
-#include <linux/module.h>
-#include <linux/moduleparam.h>
+#include <crypto/internal/akcipher.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/ccp.h>
+#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/list.h>
-#include <linux/ccp.h>
+#include <linux/module.h>
#include <linux/scatterlist.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/akcipher.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
#include "ccp-crypto.h"
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index cb8e99936abb..109b5aef4034 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -8,13 +8,14 @@
* Author: Gary R Hook <gary.hook@amd.com>
*/
-#include <linux/dma-mapping.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <crypto/scatterwalk.h>
#include <crypto/des.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/utils.h>
#include <linux/ccp.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include "ccp-dev.h"
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 2e87ca0e292a..3451bada884e 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -33,6 +33,7 @@
#include <asm/cacheflush.h>
#include <asm/e820/types.h>
#include <asm/sev.h>
+#include <asm/msr.h>
#include "psp-dev.h"
#include "sev-dev.h"
@@ -109,6 +110,15 @@ static void *sev_init_ex_buffer;
*/
static struct sev_data_range_list *snp_range_list;
+static void __sev_firmware_shutdown(struct sev_device *sev, bool panic);
+
+static int snp_shutdown_on_panic(struct notifier_block *nb,
+ unsigned long reason, void *arg);
+
+static struct notifier_block snp_panic_notifier = {
+ .notifier_call = snp_shutdown_on_panic,
+};
+
static inline bool sev_version_greater_or_equal(u8 maj, u8 min)
{
struct sev_device *sev = psp_master->sev_data;
@@ -1060,7 +1070,7 @@ static inline int __sev_do_init_locked(int *psp_ret)
static void snp_set_hsave_pa(void *arg)
{
- wrmsrl(MSR_VM_HSAVE_PA, 0);
+ wrmsrq(MSR_VM_HSAVE_PA, 0);
}
static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg)
@@ -1112,7 +1122,7 @@ static int __sev_snp_init_locked(int *error)
if (!sev_version_greater_or_equal(SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR)) {
dev_dbg(sev->dev, "SEV-SNP support requires firmware version >= %d:%d\n",
SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR);
- return 0;
+ return -EOPNOTSUPP;
}
/* SNP_INIT requires MSR_VM_HSAVE_PA to be cleared on all CPUs. */
@@ -1176,21 +1186,34 @@ static int __sev_snp_init_locked(int *error)
wbinvd_on_all_cpus();
rc = __sev_do_cmd_locked(cmd, arg, error);
- if (rc)
+ if (rc) {
+ dev_err(sev->dev, "SEV-SNP: %s failed rc %d, error %#x\n",
+ cmd == SEV_CMD_SNP_INIT_EX ? "SNP_INIT_EX" : "SNP_INIT",
+ rc, *error);
return rc;
+ }
/* Prepare for first SNP guest launch after INIT. */
wbinvd_on_all_cpus();
rc = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, error);
- if (rc)
+ if (rc) {
+ dev_err(sev->dev, "SEV-SNP: SNP_DF_FLUSH failed rc %d, error %#x\n",
+ rc, *error);
return rc;
+ }
sev->snp_initialized = true;
dev_dbg(sev->dev, "SEV-SNP firmware initialized\n");
+ dev_info(sev->dev, "SEV-SNP API:%d.%d build:%d\n", sev->api_major,
+ sev->api_minor, sev->build);
+
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &snp_panic_notifier);
+
sev_es_tmr_size = SNP_TMR_SIZE;
- return rc;
+ return 0;
}
static void __sev_platform_init_handle_tmr(struct sev_device *sev)
@@ -1287,16 +1310,22 @@ static int __sev_platform_init_locked(int *error)
if (error)
*error = psp_ret;
- if (rc)
+ if (rc) {
+ dev_err(sev->dev, "SEV: %s failed %#x, rc %d\n",
+ sev_init_ex_buffer ? "INIT_EX" : "INIT", psp_ret, rc);
return rc;
+ }
sev->state = SEV_STATE_INIT;
/* Prepare for first SEV guest launch after INIT */
wbinvd_on_all_cpus();
rc = __sev_do_cmd_locked(SEV_CMD_DF_FLUSH, NULL, error);
- if (rc)
+ if (rc) {
+ dev_err(sev->dev, "SEV: DF_FLUSH failed %#x, rc %d\n",
+ *error, rc);
return rc;
+ }
dev_dbg(sev->dev, "SEV firmware initialized\n");
@@ -1319,19 +1348,9 @@ static int _sev_platform_init_locked(struct sev_platform_init_args *args)
if (sev->state == SEV_STATE_INIT)
return 0;
- /*
- * Legacy guests cannot be running while SNP_INIT(_EX) is executing,
- * so perform SEV-SNP initialization at probe time.
- */
rc = __sev_snp_init_locked(&args->error);
- if (rc && rc != -ENODEV) {
- /*
- * Don't abort the probe if SNP INIT failed,
- * continue to initialize the legacy SEV firmware.
- */
- dev_err(sev->dev, "SEV-SNP: failed to INIT rc %d, error %#x\n",
- rc, args->error);
- }
+ if (rc && rc != -ENODEV)
+ return rc;
/* Defer legacy SEV/SEV-ES support if allowed by caller/module. */
if (args->probe && !psp_init_on_probe)
@@ -1367,8 +1386,11 @@ static int __sev_platform_shutdown_locked(int *error)
return 0;
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
- if (ret)
+ if (ret) {
+ dev_err(sev->dev, "SEV: failed to SHUTDOWN error %#x, rc %d\n",
+ *error, ret);
return ret;
+ }
sev->state = SEV_STATE_UNINIT;
dev_dbg(sev->dev, "SEV firmware shutdown\n");
@@ -1389,6 +1411,37 @@ static int sev_get_platform_state(int *state, int *error)
return rc;
}
+static int sev_move_to_init_state(struct sev_issue_cmd *argp, bool *shutdown_required)
+{
+ struct sev_platform_init_args init_args = {0};
+ int rc;
+
+ rc = _sev_platform_init_locked(&init_args);
+ if (rc) {
+ argp->error = SEV_RET_INVALID_PLATFORM_STATE;
+ return rc;
+ }
+
+ *shutdown_required = true;
+
+ return 0;
+}
+
+static int snp_move_to_init_state(struct sev_issue_cmd *argp, bool *shutdown_required)
+{
+ int error, rc;
+
+ rc = __sev_snp_init_locked(&error);
+ if (rc) {
+ argp->error = SEV_RET_INVALID_PLATFORM_STATE;
+ return rc;
+ }
+
+ *shutdown_required = true;
+
+ return 0;
+}
+
static int sev_ioctl_do_reset(struct sev_issue_cmd *argp, bool writable)
{
int state, rc;
@@ -1441,24 +1494,31 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp, bool writable)
{
struct sev_device *sev = psp_master->sev_data;
+ bool shutdown_required = false;
int rc;
if (!writable)
return -EPERM;
if (sev->state == SEV_STATE_UNINIT) {
- rc = __sev_platform_init_locked(&argp->error);
+ rc = sev_move_to_init_state(argp, &shutdown_required);
if (rc)
return rc;
}
- return __sev_do_cmd_locked(cmd, NULL, &argp->error);
+ rc = __sev_do_cmd_locked(cmd, NULL, &argp->error);
+
+ if (shutdown_required)
+ __sev_firmware_shutdown(sev, false);
+
+ return rc;
}
static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable)
{
struct sev_device *sev = psp_master->sev_data;
struct sev_user_data_pek_csr input;
+ bool shutdown_required = false;
struct sev_data_pek_csr data;
void __user *input_address;
void *blob = NULL;
@@ -1490,7 +1550,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable)
cmd:
if (sev->state == SEV_STATE_UNINIT) {
- ret = __sev_platform_init_locked(&argp->error);
+ ret = sev_move_to_init_state(argp, &shutdown_required);
if (ret)
goto e_free_blob;
}
@@ -1511,6 +1571,9 @@ cmd:
}
e_free_blob:
+ if (shutdown_required)
+ __sev_firmware_shutdown(sev, false);
+
kfree(blob);
return ret;
}
@@ -1682,9 +1745,12 @@ static int __sev_snp_shutdown_locked(int *error, bool panic)
ret = __sev_do_cmd_locked(SEV_CMD_SNP_SHUTDOWN_EX, &data, error);
/* SHUTDOWN may require DF_FLUSH */
if (*error == SEV_RET_DFFLUSH_REQUIRED) {
- ret = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, NULL);
+ int dfflush_error = SEV_RET_NO_FW_CALL;
+
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, &dfflush_error);
if (ret) {
- dev_err(sev->dev, "SEV-SNP DF_FLUSH failed\n");
+ dev_err(sev->dev, "SEV-SNP DF_FLUSH failed, ret = %d, error = %#x\n",
+ ret, dfflush_error);
return ret;
}
/* reissue the shutdown command */
@@ -1692,7 +1758,8 @@ static int __sev_snp_shutdown_locked(int *error, bool panic)
error);
}
if (ret) {
- dev_err(sev->dev, "SEV-SNP firmware shutdown failed\n");
+ dev_err(sev->dev, "SEV-SNP firmware shutdown failed, rc %d, error %#x\n",
+ ret, *error);
return ret;
}
@@ -1718,6 +1785,12 @@ static int __sev_snp_shutdown_locked(int *error, bool panic)
sev->snp_initialized = false;
dev_dbg(sev->dev, "SEV-SNP firmware shutdown\n");
+ atomic_notifier_chain_unregister(&panic_notifier_list,
+ &snp_panic_notifier);
+
+ /* Reset TMR size back to default */
+ sev_es_tmr_size = SEV_TMR_SIZE;
+
return ret;
}
@@ -1726,6 +1799,7 @@ static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable)
struct sev_device *sev = psp_master->sev_data;
struct sev_user_data_pek_cert_import input;
struct sev_data_pek_cert_import data;
+ bool shutdown_required = false;
void *pek_blob, *oca_blob;
int ret;
@@ -1756,7 +1830,7 @@ static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable)
/* If platform is not in INIT state then transition it to INIT */
if (sev->state != SEV_STATE_INIT) {
- ret = __sev_platform_init_locked(&argp->error);
+ ret = sev_move_to_init_state(argp, &shutdown_required);
if (ret)
goto e_free_oca;
}
@@ -1764,6 +1838,9 @@ static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable)
ret = __sev_do_cmd_locked(SEV_CMD_PEK_CERT_IMPORT, &data, &argp->error);
e_free_oca:
+ if (shutdown_required)
+ __sev_firmware_shutdown(sev, false);
+
kfree(oca_blob);
e_free_pek:
kfree(pek_blob);
@@ -1880,32 +1957,23 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
struct sev_data_pdh_cert_export data;
void __user *input_cert_chain_address;
void __user *input_pdh_cert_address;
+ bool shutdown_required = false;
int ret;
- /* If platform is not in INIT state then transition it to INIT. */
- if (sev->state != SEV_STATE_INIT) {
- if (!writable)
- return -EPERM;
-
- ret = __sev_platform_init_locked(&argp->error);
- if (ret)
- return ret;
- }
-
if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
return -EFAULT;
memset(&data, 0, sizeof(data));
+ input_pdh_cert_address = (void __user *)input.pdh_cert_address;
+ input_cert_chain_address = (void __user *)input.cert_chain_address;
+
/* Userspace wants to query the certificate length. */
if (!input.pdh_cert_address ||
!input.pdh_cert_len ||
!input.cert_chain_address)
goto cmd;
- input_pdh_cert_address = (void __user *)input.pdh_cert_address;
- input_cert_chain_address = (void __user *)input.cert_chain_address;
-
/* Allocate a physically contiguous buffer to store the PDH blob. */
if (input.pdh_cert_len > SEV_FW_BLOB_MAX_SIZE)
return -EFAULT;
@@ -1931,6 +1999,17 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
data.cert_chain_len = input.cert_chain_len;
cmd:
+ /* If platform is not in INIT state then transition it to INIT. */
+ if (sev->state != SEV_STATE_INIT) {
+ if (!writable) {
+ ret = -EPERM;
+ goto e_free_cert;
+ }
+ ret = sev_move_to_init_state(argp, &shutdown_required);
+ if (ret)
+ goto e_free_cert;
+ }
+
ret = __sev_do_cmd_locked(SEV_CMD_PDH_CERT_EXPORT, &data, &argp->error);
/* If we query the length, FW responded with expected data. */
@@ -1957,6 +2036,9 @@ cmd:
}
e_free_cert:
+ if (shutdown_required)
+ __sev_firmware_shutdown(sev, false);
+
kfree(cert_blob);
e_free_pdh:
kfree(pdh_blob);
@@ -1966,12 +2048,13 @@ e_free_pdh:
static int sev_ioctl_do_snp_platform_status(struct sev_issue_cmd *argp)
{
struct sev_device *sev = psp_master->sev_data;
+ bool shutdown_required = false;
struct sev_data_snp_addr buf;
struct page *status_page;
+ int ret, error;
void *data;
- int ret;
- if (!sev->snp_initialized || !argp->data)
+ if (!argp->data)
return -EINVAL;
status_page = alloc_page(GFP_KERNEL_ACCOUNT);
@@ -1980,6 +2063,12 @@ static int sev_ioctl_do_snp_platform_status(struct sev_issue_cmd *argp)
data = page_address(status_page);
+ if (!sev->snp_initialized) {
+ ret = snp_move_to_init_state(argp, &shutdown_required);
+ if (ret)
+ goto cleanup;
+ }
+
/*
* Firmware expects status page to be in firmware-owned state, otherwise
* it will report firmware error code INVALID_PAGE_STATE (0x1A).
@@ -2008,6 +2097,9 @@ static int sev_ioctl_do_snp_platform_status(struct sev_issue_cmd *argp)
ret = -EFAULT;
cleanup:
+ if (shutdown_required)
+ __sev_snp_shutdown_locked(&error, false);
+
__free_pages(status_page, 0);
return ret;
}
@@ -2016,21 +2108,33 @@ static int sev_ioctl_do_snp_commit(struct sev_issue_cmd *argp)
{
struct sev_device *sev = psp_master->sev_data;
struct sev_data_snp_commit buf;
+ bool shutdown_required = false;
+ int ret, error;
- if (!sev->snp_initialized)
- return -EINVAL;
+ if (!sev->snp_initialized) {
+ ret = snp_move_to_init_state(argp, &shutdown_required);
+ if (ret)
+ return ret;
+ }
buf.len = sizeof(buf);
- return __sev_do_cmd_locked(SEV_CMD_SNP_COMMIT, &buf, &argp->error);
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_COMMIT, &buf, &argp->error);
+
+ if (shutdown_required)
+ __sev_snp_shutdown_locked(&error, false);
+
+ return ret;
}
static int sev_ioctl_do_snp_set_config(struct sev_issue_cmd *argp, bool writable)
{
struct sev_device *sev = psp_master->sev_data;
struct sev_user_data_snp_config config;
+ bool shutdown_required = false;
+ int ret, error;
- if (!sev->snp_initialized || !argp->data)
+ if (!argp->data)
return -EINVAL;
if (!writable)
@@ -2039,17 +2143,29 @@ static int sev_ioctl_do_snp_set_config(struct sev_issue_cmd *argp, bool writable
if (copy_from_user(&config, (void __user *)argp->data, sizeof(config)))
return -EFAULT;
- return __sev_do_cmd_locked(SEV_CMD_SNP_CONFIG, &config, &argp->error);
+ if (!sev->snp_initialized) {
+ ret = snp_move_to_init_state(argp, &shutdown_required);
+ if (ret)
+ return ret;
+ }
+
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_CONFIG, &config, &argp->error);
+
+ if (shutdown_required)
+ __sev_snp_shutdown_locked(&error, false);
+
+ return ret;
}
static int sev_ioctl_do_snp_vlek_load(struct sev_issue_cmd *argp, bool writable)
{
struct sev_device *sev = psp_master->sev_data;
struct sev_user_data_snp_vlek_load input;
+ bool shutdown_required = false;
+ int ret, error;
void *blob;
- int ret;
- if (!sev->snp_initialized || !argp->data)
+ if (!argp->data)
return -EINVAL;
if (!writable)
@@ -2068,8 +2184,18 @@ static int sev_ioctl_do_snp_vlek_load(struct sev_issue_cmd *argp, bool writable)
input.vlek_wrapped_address = __psp_pa(blob);
+ if (!sev->snp_initialized) {
+ ret = snp_move_to_init_state(argp, &shutdown_required);
+ if (ret)
+ goto cleanup;
+ }
+
ret = __sev_do_cmd_locked(SEV_CMD_SNP_VLEK_LOAD, &input, &argp->error);
+ if (shutdown_required)
+ __sev_snp_shutdown_locked(&error, false);
+
+cleanup:
kfree(blob);
return ret;
@@ -2339,6 +2465,15 @@ static void sev_firmware_shutdown(struct sev_device *sev)
mutex_unlock(&sev_cmd_mutex);
}
+void sev_platform_shutdown(void)
+{
+ if (!psp_master || !psp_master->sev_data)
+ return;
+
+ sev_firmware_shutdown(psp_master->sev_data);
+}
+EXPORT_SYMBOL_GPL(sev_platform_shutdown);
+
void sev_dev_destroy(struct psp_device *psp)
{
struct sev_device *sev = psp->sev_data;
@@ -2373,10 +2508,6 @@ static int snp_shutdown_on_panic(struct notifier_block *nb,
return NOTIFY_DONE;
}
-static struct notifier_block snp_panic_notifier = {
- .notifier_call = snp_shutdown_on_panic,
-};
-
int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd,
void *data, int *error)
{
@@ -2390,9 +2521,7 @@ EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
void sev_pci_init(void)
{
struct sev_device *sev = psp_master->sev_data;
- struct sev_platform_init_args args = {0};
u8 api_major, api_minor, build;
- int rc;
if (!sev)
return;
@@ -2415,18 +2544,6 @@ void sev_pci_init(void)
api_major, api_minor, build,
sev->api_major, sev->api_minor, sev->build);
- /* Initialize the platform */
- args.probe = true;
- rc = sev_platform_init(&args);
- if (rc)
- dev_err(sev->dev, "SEV: failed to INIT error %#x, rc %d\n",
- args.error, rc);
-
- dev_info(sev->dev, "SEV%s API:%d.%d build:%d\n", sev->snp_initialized ?
- "-SNP" : "", sev->api_major, sev->api_minor, sev->build);
-
- atomic_notifier_chain_register(&panic_notifier_list,
- &snp_panic_notifier);
return;
err:
@@ -2443,7 +2560,4 @@ void sev_pci_exit(void)
return;
sev_firmware_shutdown(sev);
-
- atomic_notifier_chain_unregister(&panic_notifier_list,
- &snp_panic_notifier);
}
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 2ebc878da160..e1be2072d680 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -375,6 +375,7 @@ static const struct tee_vdata teev1 = {
static const struct tee_vdata teev2 = {
.ring_wptr_reg = 0x10950, /* C2PMSG_20 */
.ring_rptr_reg = 0x10954, /* C2PMSG_21 */
+ .info_reg = 0x109e8, /* C2PMSG_58 */
};
static const struct platform_access_vdata pa_v1 = {
@@ -440,6 +441,7 @@ static const struct psp_vdata pspv5 = {
.cmdresp_reg = 0x10944, /* C2PMSG_17 */
.cmdbuff_addr_lo_reg = 0x10948, /* C2PMSG_18 */
.cmdbuff_addr_hi_reg = 0x1094c, /* C2PMSG_19 */
+ .bootloader_info_reg = 0x109ec, /* C2PMSG_59 */
.feature_reg = 0x109fc, /* C2PMSG_63 */
.inten_reg = 0x10510, /* P2CMSG_INTEN */
.intsts_reg = 0x10514, /* P2CMSG_INTSTS */
@@ -535,6 +537,7 @@ static const struct pci_device_id sp_pci_table[] = {
{ PCI_VDEVICE(AMD, 0x1134), (kernel_ulong_t)&dev_vdata[7] },
{ PCI_VDEVICE(AMD, 0x17E0), (kernel_ulong_t)&dev_vdata[7] },
{ PCI_VDEVICE(AMD, 0x156E), (kernel_ulong_t)&dev_vdata[8] },
+ { PCI_VDEVICE(AMD, 0x17D8), (kernel_ulong_t)&dev_vdata[8] },
/* Last entry must be zero */
{ 0, }
};
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index d3f5d108b898..7c41f9593d03 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -862,7 +862,7 @@ int hisi_qm_set_algs(struct hisi_qm *qm, u64 alg_msk, const struct qm_dev_alg *d
return -EINVAL;
}
- algs = devm_kzalloc(dev, QM_DEV_ALG_MAX_LEN * sizeof(char), GFP_KERNEL);
+ algs = devm_kzalloc(dev, QM_DEV_ALG_MAX_LEN, GFP_KERNEL);
if (!algs)
return -ENOMEM;
@@ -5224,7 +5224,7 @@ static int qm_pre_store_caps(struct hisi_qm *qm)
size_t i, size;
size = ARRAY_SIZE(qm_cap_query_info);
- qm_cap = devm_kzalloc(&pdev->dev, sizeof(*qm_cap) * size, GFP_KERNEL);
+ qm_cap = devm_kcalloc(&pdev->dev, sizeof(*qm_cap), size, GFP_KERNEL);
if (!qm_cap)
return -ENOMEM;
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
index 1dc2378aa88b..e050f5ff5efb 100644
--- a/drivers/crypto/img-hash.c
+++ b/drivers/crypto/img-hash.c
@@ -491,8 +491,9 @@ static int img_hash_init(struct ahash_request *req)
struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
- rctx->fallback_req.base.flags = req->base.flags
- & CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
return crypto_ahash_init(&rctx->fallback_req);
}
@@ -555,10 +556,10 @@ static int img_hash_update(struct ahash_request *req)
struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
- rctx->fallback_req.base.flags = req->base.flags
- & CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.nbytes = req->nbytes;
- rctx->fallback_req.src = req->src;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, req->src, NULL, req->nbytes);
return crypto_ahash_update(&rctx->fallback_req);
}
@@ -570,9 +571,10 @@ static int img_hash_final(struct ahash_request *req)
struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
- rctx->fallback_req.base.flags = req->base.flags
- & CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.result = req->result;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, NULL, req->result, 0);
return crypto_ahash_final(&rctx->fallback_req);
}
@@ -584,11 +586,12 @@ static int img_hash_finup(struct ahash_request *req)
struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
- rctx->fallback_req.base.flags = req->base.flags
- & CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.nbytes = req->nbytes;
- rctx->fallback_req.src = req->src;
- rctx->fallback_req.result = req->result;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, req->src, req->result,
+ req->nbytes);
+
return crypto_ahash_finup(&rctx->fallback_req);
}
@@ -600,8 +603,9 @@ static int img_hash_import(struct ahash_request *req, const void *in)
struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
- rctx->fallback_req.base.flags = req->base.flags
- & CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
return crypto_ahash_import(&rctx->fallback_req, in);
}
@@ -613,8 +617,9 @@ static int img_hash_export(struct ahash_request *req, void *out)
struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
- rctx->fallback_req.base.flags = req->base.flags
- & CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
return crypto_ahash_export(&rctx->fallback_req, out);
}
diff --git a/drivers/crypto/inside-secure/eip93/eip93-hash.c b/drivers/crypto/inside-secure/eip93/eip93-hash.c
index df1b05ac5a57..ac13d90a2b7c 100644
--- a/drivers/crypto/inside-secure/eip93/eip93-hash.c
+++ b/drivers/crypto/inside-secure/eip93/eip93-hash.c
@@ -97,12 +97,20 @@ void eip93_hash_handle_result(struct crypto_async_request *async, int err)
static void eip93_hash_init_sa_state_digest(u32 hash, u8 *digest)
{
- u32 sha256_init[] = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
- SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7 };
- u32 sha224_init[] = { SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
- SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7 };
- u32 sha1_init[] = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 };
- u32 md5_init[] = { MD5_H0, MD5_H1, MD5_H2, MD5_H3 };
+ static const u32 sha256_init[] = {
+ SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+ SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7
+ };
+ static const u32 sha224_init[] = {
+ SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
+ SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7
+ };
+ static const u32 sha1_init[] = {
+ SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4
+ };
+ static const u32 md5_init[] = {
+ MD5_H0, MD5_H1, MD5_H2, MD5_H3
+ };
/* Init HASH constant */
switch (hash) {
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index f44c08f5f5ec..d2b632193beb 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -2043,7 +2043,7 @@ struct safexcel_alg_template safexcel_alg_cbcmac = {
.cra_flags = CRYPTO_ALG_ASYNC |
CRYPTO_ALG_ALLOCATES_MEMORY |
CRYPTO_ALG_KERN_DRIVER_ONLY,
- .cra_blocksize = 1,
+ .cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
.cra_init = safexcel_ahash_cra_init,
.cra_exit = safexcel_ahash_cra_exit,
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 09d9589f2d68..23f585219fb4 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -725,7 +725,7 @@ static int alloc_wq_table(int max_wqs)
for (cpu = 0; cpu < nr_cpus; cpu++) {
entry = per_cpu_ptr(wq_table, cpu);
- entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL);
+ entry->wqs = kcalloc(max_wqs, sizeof(*entry->wqs), GFP_KERNEL);
if (!entry->wqs) {
free_wq_table();
return -ENOMEM;
@@ -894,7 +894,7 @@ out:
static void rebalance_wq_table(void)
{
const struct cpumask *node_cpus;
- int node, cpu, iaa = -1;
+ int node_cpu, node, cpu, iaa = 0;
if (nr_iaa == 0)
return;
@@ -905,36 +905,29 @@ static void rebalance_wq_table(void)
clear_wq_table();
if (nr_iaa == 1) {
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- if (WARN_ON(wq_table_add_wqs(0, cpu))) {
- pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
- return;
- }
+ for_each_possible_cpu(cpu) {
+ if (WARN_ON(wq_table_add_wqs(0, cpu)))
+ goto err;
}
return;
}
for_each_node_with_cpus(node) {
+ cpu = 0;
node_cpus = cpumask_of_node(node);
- for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) {
- int node_cpu = cpumask_nth(cpu, node_cpus);
-
- if (WARN_ON(node_cpu >= nr_cpu_ids)) {
- pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
- return;
- }
-
- if ((cpu % cpus_per_iaa) == 0)
- iaa++;
-
- if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
- pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
- return;
- }
+ for_each_cpu(node_cpu, node_cpus) {
+ iaa = cpu / cpus_per_iaa;
+ if (WARN_ON(wq_table_add_wqs(iaa, node_cpu)))
+ goto err;
+ cpu++;
}
}
+
+ return;
+err:
+ pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
}
static inline int check_completion(struct device *dev,
@@ -999,12 +992,9 @@ out:
static int deflate_generic_decompress(struct acomp_req *req)
{
- ACOMP_REQUEST_ON_STACK(fbreq, crypto_acomp_reqtfm(req));
+ ACOMP_FBREQ_ON_STACK(fbreq, req);
int ret;
- acomp_request_set_callback(fbreq, 0, NULL, NULL);
- acomp_request_set_params(fbreq, req->src, req->dst, req->slen,
- req->dlen);
ret = crypto_acomp_decompress(fbreq);
req->dlen = fbreq->dlen;
@@ -1020,8 +1010,7 @@ static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
struct idxd_wq *wq,
dma_addr_t src_addr, unsigned int slen,
- dma_addr_t dst_addr, unsigned int *dlen,
- u32 compression_crc);
+ dma_addr_t dst_addr, unsigned int *dlen);
static void iaa_desc_complete(struct idxd_desc *idxd_desc,
enum idxd_complete_type comp_type,
@@ -1087,10 +1076,10 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc,
}
if (ctx->compress && compression_ctx->verify_compress) {
+ u32 *compression_crc = acomp_request_ctx(ctx->req);
dma_addr_t src_addr, dst_addr;
- u32 compression_crc;
- compression_crc = idxd_desc->iax_completion->crc;
+ *compression_crc = idxd_desc->iax_completion->crc;
ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr);
if (ret) {
@@ -1100,8 +1089,7 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc,
}
ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr,
- ctx->req->slen, dst_addr, &ctx->req->dlen,
- compression_crc);
+ ctx->req->slen, dst_addr, &ctx->req->dlen);
if (ret) {
dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret);
err = -EIO;
@@ -1130,11 +1118,11 @@ out:
static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
struct idxd_wq *wq,
dma_addr_t src_addr, unsigned int slen,
- dma_addr_t dst_addr, unsigned int *dlen,
- u32 *compression_crc)
+ dma_addr_t dst_addr, unsigned int *dlen)
{
struct iaa_device_compression_mode *active_compression_mode;
struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
+ u32 *compression_crc = acomp_request_ctx(req);
struct iaa_device *iaa_device;
struct idxd_desc *idxd_desc;
struct iax_hw_desc *desc;
@@ -1187,8 +1175,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
" src_addr %llx, dst_addr %llx\n", __func__,
active_compression_mode->name,
src_addr, dst_addr);
- } else if (ctx->async_mode)
- req->base.data = idxd_desc;
+ }
dev_dbg(dev, "%s: compression mode %s,"
" desc->src1_addr %llx, desc->src1_size %d,"
@@ -1282,11 +1269,11 @@ out:
static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
struct idxd_wq *wq,
dma_addr_t src_addr, unsigned int slen,
- dma_addr_t dst_addr, unsigned int *dlen,
- u32 compression_crc)
+ dma_addr_t dst_addr, unsigned int *dlen)
{
struct iaa_device_compression_mode *active_compression_mode;
struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
+ u32 *compression_crc = acomp_request_ctx(req);
struct iaa_device *iaa_device;
struct idxd_desc *idxd_desc;
struct iax_hw_desc *desc;
@@ -1346,10 +1333,10 @@ static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
goto err;
}
- if (compression_crc != idxd_desc->iax_completion->crc) {
+ if (*compression_crc != idxd_desc->iax_completion->crc) {
ret = -EINVAL;
dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
- " comp=0x%x, decomp=0x%x\n", compression_crc,
+ " comp=0x%x, decomp=0x%x\n", *compression_crc,
idxd_desc->iax_completion->crc);
print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
8, 1, idxd_desc->iax_completion, 64, 0);
@@ -1369,8 +1356,7 @@ err:
static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
struct idxd_wq *wq,
dma_addr_t src_addr, unsigned int slen,
- dma_addr_t dst_addr, unsigned int *dlen,
- bool disable_async)
+ dma_addr_t dst_addr, unsigned int *dlen)
{
struct iaa_device_compression_mode *active_compression_mode;
struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -1412,7 +1398,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
desc->src1_size = slen;
desc->completion_addr = idxd_desc->compl_dma;
- if (ctx->use_irq && !disable_async) {
+ if (ctx->use_irq) {
desc->flags |= IDXD_OP_FLAG_RCI;
idxd_desc->crypto.req = req;
@@ -1425,8 +1411,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
" src_addr %llx, dst_addr %llx\n", __func__,
active_compression_mode->name,
src_addr, dst_addr);
- } else if (ctx->async_mode && !disable_async)
- req->base.data = idxd_desc;
+ }
dev_dbg(dev, "%s: decompression mode %s,"
" desc->src1_addr %llx, desc->src1_size %d,"
@@ -1446,7 +1431,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
update_total_decomp_calls();
update_wq_decomp_calls(wq);
- if (ctx->async_mode && !disable_async) {
+ if (ctx->async_mode) {
ret = -EINPROGRESS;
dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
goto out;
@@ -1474,7 +1459,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
*dlen = req->dlen;
- if (!ctx->async_mode || disable_async)
+ if (!ctx->async_mode)
idxd_free_desc(wq, idxd_desc);
/* Update stats */
@@ -1496,7 +1481,6 @@ static int iaa_comp_acompress(struct acomp_req *req)
dma_addr_t src_addr, dst_addr;
int nr_sgs, cpu, ret = 0;
struct iaa_wq *iaa_wq;
- u32 compression_crc;
struct idxd_wq *wq;
struct device *dev;
@@ -1557,7 +1541,7 @@ static int iaa_comp_acompress(struct acomp_req *req)
req->dst, req->dlen, sg_dma_len(req->dst));
ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
- &req->dlen, &compression_crc);
+ &req->dlen);
if (ret == -EINPROGRESS)
return ret;
@@ -1569,7 +1553,7 @@ static int iaa_comp_acompress(struct acomp_req *req)
}
ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
- dst_addr, &req->dlen, compression_crc);
+ dst_addr, &req->dlen);
if (ret)
dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret);
@@ -1655,7 +1639,7 @@ static int iaa_comp_adecompress(struct acomp_req *req)
req->dst, req->dlen, sg_dma_len(req->dst));
ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
- dst_addr, &req->dlen, false);
+ dst_addr, &req->dlen);
if (ret == -EINPROGRESS)
return ret;
@@ -1699,6 +1683,7 @@ static struct acomp_alg iaa_acomp_fixed_deflate = {
.cra_driver_name = "deflate-iaa",
.cra_flags = CRYPTO_ALG_ASYNC,
.cra_ctxsize = sizeof(struct iaa_compression_ctx),
+ .cra_reqsize = sizeof(u32),
.cra_module = THIS_MODULE,
.cra_priority = IAA_ALG_PRIORITY,
}
diff --git a/drivers/crypto/intel/qat/Kconfig b/drivers/crypto/intel/qat/Kconfig
index 02fb8abe4e6e..359c61f0c8a1 100644
--- a/drivers/crypto/intel/qat/Kconfig
+++ b/drivers/crypto/intel/qat/Kconfig
@@ -70,6 +70,18 @@ config CRYPTO_DEV_QAT_420XX
To compile this as a module, choose M here: the module
will be called qat_420xx.
+config CRYPTO_DEV_QAT_6XXX
+ tristate "Support for Intel(R) QuickAssist Technology QAT_6XXX"
+ depends on (X86 || COMPILE_TEST)
+ depends on PCI
+ select CRYPTO_DEV_QAT
+ help
+ Support for Intel(R) QuickAssist Technology QAT_6xxx
+ for accelerating crypto and compression workloads.
+
+ To compile this as a module, choose M here: the module
+ will be called qat_6xxx.
+
config CRYPTO_DEV_QAT_DH895xCCVF
tristate "Support for Intel(R) DH895xCC Virtual Function"
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
diff --git a/drivers/crypto/intel/qat/Makefile b/drivers/crypto/intel/qat/Makefile
index 235b69f4f3f7..abef14207afa 100644
--- a/drivers/crypto/intel/qat/Makefile
+++ b/drivers/crypto/intel/qat/Makefile
@@ -1,10 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
+subdir-ccflags-y := -I$(src)/qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT) += qat_common/
obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCC) += qat_dh895xcc/
obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXX) += qat_c3xxx/
obj-$(CONFIG_CRYPTO_DEV_QAT_C62X) += qat_c62x/
obj-$(CONFIG_CRYPTO_DEV_QAT_4XXX) += qat_4xxx/
obj-$(CONFIG_CRYPTO_DEV_QAT_420XX) += qat_420xx/
+obj-$(CONFIG_CRYPTO_DEV_QAT_6XXX) += qat_6xxx/
obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCCVF) += qat_dh895xccvf/
obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXXVF) += qat_c3xxxvf/
obj-$(CONFIG_CRYPTO_DEV_QAT_C62XVF) += qat_c62xvf/
diff --git a/drivers/crypto/intel/qat/qat_420xx/Makefile b/drivers/crypto/intel/qat/qat_420xx/Makefile
index 72b24b1804cf..f6df54d2993e 100644
--- a/drivers/crypto/intel/qat/qat_420xx/Makefile
+++ b/drivers/crypto/intel/qat/qat_420xx/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_420XX) += qat_420xx.o
qat_420xx-y := adf_drv.o adf_420xx_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
index 4feeef83f7a3..7c3c0f561c95 100644
--- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
@@ -9,15 +9,14 @@
#include <adf_common_drv.h>
#include <adf_fw_config.h>
#include <adf_gen4_config.h>
-#include <adf_gen4_dc.h>
#include <adf_gen4_hw_csr_data.h>
#include <adf_gen4_hw_data.h>
#include <adf_gen4_pfvf.h>
#include <adf_gen4_pm.h>
#include <adf_gen4_ras.h>
-#include <adf_gen4_timer.h>
#include <adf_gen4_tl.h>
#include <adf_gen4_vf_mig.h>
+#include <adf_timer.h>
#include "adf_420xx_hw_data.h"
#include "icp_qat_hw.h"
@@ -93,7 +92,6 @@ static const struct adf_fw_config adf_fw_dcc_config[] = {
static struct adf_hw_device_class adf_420xx_class = {
.name = ADF_420XX_DEVICE_NAME,
.type = DEV_420XX,
- .instances = 0,
};
static u32 get_ae_mask(struct adf_hw_device_data *self)
@@ -469,8 +467,8 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id)
hw_data->enable_pm = adf_gen4_enable_pm;
hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt;
hw_data->dev_config = adf_gen4_dev_config;
- hw_data->start_timer = adf_gen4_timer_start;
- hw_data->stop_timer = adf_gen4_timer_stop;
+ hw_data->start_timer = adf_timer_start;
+ hw_data->stop_timer = adf_timer_stop;
hw_data->get_hb_clock = adf_gen4_get_heartbeat_clock;
hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE;
hw_data->clock_frequency = ADF_420XX_AE_FREQ;
diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_drv.c b/drivers/crypto/intel/qat/qat_420xx/adf_drv.c
index 8084aa0f7f41..cfa00daeb4fb 100644
--- a/drivers/crypto/intel/qat/qat_420xx/adf_drv.c
+++ b/drivers/crypto/intel/qat/qat_420xx/adf_drv.c
@@ -14,7 +14,7 @@
#include "adf_420xx_hw_data.h"
static const struct pci_device_id adf_pci_tbl[] = {
- { PCI_VDEVICE(INTEL, ADF_420XX_PCI_DEVICE_ID), },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_420XX) },
{ }
};
MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
@@ -186,11 +186,19 @@ static void adf_remove(struct pci_dev *pdev)
adf_cleanup_accel(accel_dev);
}
+static void adf_shutdown(struct pci_dev *pdev)
+{
+ struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
+
+ adf_dev_down(accel_dev);
+}
+
static struct pci_driver adf_driver = {
.id_table = adf_pci_tbl,
.name = ADF_420XX_DEVICE_NAME,
.probe = adf_probe,
.remove = adf_remove,
+ .shutdown = adf_shutdown,
.sriov_configure = adf_sriov_configure,
.err_handler = &adf_err_handler,
};
diff --git a/drivers/crypto/intel/qat/qat_4xxx/Makefile b/drivers/crypto/intel/qat/qat_4xxx/Makefile
index e8480bb80dee..188b611445e6 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/Makefile
+++ b/drivers/crypto/intel/qat/qat_4xxx/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
-ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_4XXX) += qat_4xxx.o
qat_4xxx-y := adf_drv.o adf_4xxx_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
index 4eb6ef99efdd..bd0b1b1015c0 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -9,15 +9,14 @@
#include <adf_common_drv.h>
#include <adf_fw_config.h>
#include <adf_gen4_config.h>
-#include <adf_gen4_dc.h>
#include <adf_gen4_hw_csr_data.h>
#include <adf_gen4_hw_data.h>
#include <adf_gen4_pfvf.h>
#include <adf_gen4_pm.h>
#include "adf_gen4_ras.h"
-#include <adf_gen4_timer.h>
#include <adf_gen4_tl.h>
#include <adf_gen4_vf_mig.h>
+#include <adf_timer.h>
#include "adf_4xxx_hw_data.h"
#include "icp_qat_hw.h"
@@ -96,7 +95,6 @@ static_assert(ARRAY_SIZE(adf_fw_cy_config) == ARRAY_SIZE(adf_fw_dcc_config));
static struct adf_hw_device_class adf_4xxx_class = {
.name = ADF_4XXX_DEVICE_NAME,
.type = DEV_4XXX,
- .instances = 0,
};
static u32 get_ae_mask(struct adf_hw_device_data *self)
@@ -422,13 +420,13 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK;
hw_data->num_rps = ADF_GEN4_MAX_RPS;
switch (dev_id) {
- case ADF_402XX_PCI_DEVICE_ID:
+ case PCI_DEVICE_ID_INTEL_QAT_402XX:
hw_data->fw_name = ADF_402XX_FW;
hw_data->fw_mmp_name = ADF_402XX_MMP;
hw_data->uof_get_name = uof_get_name_402xx;
hw_data->get_ena_thd_mask = get_ena_thd_mask;
break;
- case ADF_401XX_PCI_DEVICE_ID:
+ case PCI_DEVICE_ID_INTEL_QAT_401XX:
hw_data->fw_name = ADF_4XXX_FW;
hw_data->fw_mmp_name = ADF_4XXX_MMP;
hw_data->uof_get_name = uof_get_name_4xxx;
@@ -455,8 +453,8 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
hw_data->enable_pm = adf_gen4_enable_pm;
hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt;
hw_data->dev_config = adf_gen4_dev_config;
- hw_data->start_timer = adf_gen4_timer_start;
- hw_data->stop_timer = adf_gen4_timer_stop;
+ hw_data->start_timer = adf_timer_start;
+ hw_data->stop_timer = adf_timer_stop;
hw_data->get_hb_clock = adf_gen4_get_heartbeat_clock;
hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE;
hw_data->clock_frequency = ADF_4XXX_AE_FREQ;
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c
index 5537a9991e4e..c9be5dcddb27 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c
@@ -14,9 +14,9 @@
#include "adf_4xxx_hw_data.h"
static const struct pci_device_id adf_pci_tbl[] = {
- { PCI_VDEVICE(INTEL, ADF_4XXX_PCI_DEVICE_ID), },
- { PCI_VDEVICE(INTEL, ADF_401XX_PCI_DEVICE_ID), },
- { PCI_VDEVICE(INTEL, ADF_402XX_PCI_DEVICE_ID), },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_4XXX) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_401XX) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_402XX) },
{ }
};
MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
@@ -188,11 +188,19 @@ static void adf_remove(struct pci_dev *pdev)
adf_cleanup_accel(accel_dev);
}
+static void adf_shutdown(struct pci_dev *pdev)
+{
+ struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
+
+ adf_dev_down(accel_dev);
+}
+
static struct pci_driver adf_driver = {
.id_table = adf_pci_tbl,
.name = ADF_4XXX_DEVICE_NAME,
.probe = adf_probe,
.remove = adf_remove,
+ .shutdown = adf_shutdown,
.sriov_configure = adf_sriov_configure,
.err_handler = &adf_err_handler,
};
diff --git a/drivers/crypto/intel/qat/qat_6xxx/Makefile b/drivers/crypto/intel/qat/qat_6xxx/Makefile
new file mode 100644
index 000000000000..4b4de67cb0c2
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_6xxx/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_CRYPTO_DEV_QAT_6XXX) += qat_6xxx.o
+qat_6xxx-y := adf_drv.o adf_6xxx_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c
new file mode 100644
index 000000000000..359a6447ccb8
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c
@@ -0,0 +1,845 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2025 Intel Corporation */
+#include <linux/array_size.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/bits.h>
+#include <linux/iopoll.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+
+#include <adf_accel_devices.h>
+#include <adf_admin.h>
+#include <adf_cfg.h>
+#include <adf_cfg_services.h>
+#include <adf_clock.h>
+#include <adf_common_drv.h>
+#include <adf_fw_config.h>
+#include <adf_gen6_pm.h>
+#include <adf_gen6_ras.h>
+#include <adf_gen6_shared.h>
+#include <adf_timer.h>
+#include "adf_6xxx_hw_data.h"
+#include "icp_qat_fw_comp.h"
+#include "icp_qat_hw_51_comp.h"
+
+#define RP_GROUP_0_MASK (BIT(0) | BIT(2))
+#define RP_GROUP_1_MASK (BIT(1) | BIT(3))
+#define RP_GROUP_ALL_MASK (RP_GROUP_0_MASK | RP_GROUP_1_MASK)
+
+#define ADF_AE_GROUP_0 GENMASK(3, 0)
+#define ADF_AE_GROUP_1 GENMASK(7, 4)
+#define ADF_AE_GROUP_2 BIT(8)
+
+struct adf_ring_config {
+ u32 ring_mask;
+ enum adf_cfg_service_type ring_type;
+ const unsigned long *thrd_mask;
+};
+
+static u32 rmask_two_services[] = {
+ RP_GROUP_0_MASK,
+ RP_GROUP_1_MASK,
+};
+
+enum adf_gen6_rps {
+ RP0 = 0,
+ RP1 = 1,
+ RP2 = 2,
+ RP3 = 3,
+ RP_MAX = RP3
+};
+
+/*
+ * thrd_mask_[sym|asym|cpr|dcc]: these static arrays define the thread
+ * configuration for handling requests of specific services across the
+ * accelerator engines. Each element in an array corresponds to an
+ * accelerator engine, with the value being a bitmask that specifies which
+ * threads within that engine are capable of processing the particular service.
+ *
+ * For example, a value of 0x0C means that threads 2 and 3 are enabled for the
+ * service in the respective accelerator engine.
+ */
+static const unsigned long thrd_mask_sym[ADF_6XXX_MAX_ACCELENGINES] = {
+ 0x0C, 0x0C, 0x0C, 0x0C, 0x1C, 0x1C, 0x1C, 0x1C, 0x00
+};
+
+static const unsigned long thrd_mask_asym[ADF_6XXX_MAX_ACCELENGINES] = {
+ 0x70, 0x70, 0x70, 0x70, 0x60, 0x60, 0x60, 0x60, 0x00
+};
+
+static const unsigned long thrd_mask_cpr[ADF_6XXX_MAX_ACCELENGINES] = {
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00
+};
+
+static const unsigned long thrd_mask_dcc[ADF_6XXX_MAX_ACCELENGINES] = {
+ 0x00, 0x00, 0x00, 0x00, 0x07, 0x07, 0x03, 0x03, 0x00
+};
+
+static const char *const adf_6xxx_fw_objs[] = {
+ [ADF_FW_CY_OBJ] = ADF_6XXX_CY_OBJ,
+ [ADF_FW_DC_OBJ] = ADF_6XXX_DC_OBJ,
+ [ADF_FW_ADMIN_OBJ] = ADF_6XXX_ADMIN_OBJ,
+};
+
+static const struct adf_fw_config adf_default_fw_config[] = {
+ { ADF_AE_GROUP_1, ADF_FW_DC_OBJ },
+ { ADF_AE_GROUP_0, ADF_FW_CY_OBJ },
+ { ADF_AE_GROUP_2, ADF_FW_ADMIN_OBJ },
+};
+
+static struct adf_hw_device_class adf_6xxx_class = {
+ .name = ADF_6XXX_DEVICE_NAME,
+ .type = DEV_6XXX,
+};
+
+static bool services_supported(unsigned long mask)
+{
+ int num_svc;
+
+ if (mask >= BIT(SVC_BASE_COUNT))
+ return false;
+
+ num_svc = hweight_long(mask);
+ switch (num_svc) {
+ case ADF_ONE_SERVICE:
+ return true;
+ case ADF_TWO_SERVICES:
+ case ADF_THREE_SERVICES:
+ return !test_bit(SVC_DCC, &mask);
+ default:
+ return false;
+ }
+}
+
+static int get_service(unsigned long *mask)
+{
+ if (test_and_clear_bit(SVC_ASYM, mask))
+ return SVC_ASYM;
+
+ if (test_and_clear_bit(SVC_SYM, mask))
+ return SVC_SYM;
+
+ if (test_and_clear_bit(SVC_DC, mask))
+ return SVC_DC;
+
+ if (test_and_clear_bit(SVC_DCC, mask))
+ return SVC_DCC;
+
+ return -EINVAL;
+}
+
+static enum adf_cfg_service_type get_ring_type(enum adf_services service)
+{
+ switch (service) {
+ case SVC_SYM:
+ return SYM;
+ case SVC_ASYM:
+ return ASYM;
+ case SVC_DC:
+ case SVC_DCC:
+ return COMP;
+ default:
+ return UNUSED;
+ }
+}
+
+static const unsigned long *get_thrd_mask(enum adf_services service)
+{
+ switch (service) {
+ case SVC_SYM:
+ return thrd_mask_sym;
+ case SVC_ASYM:
+ return thrd_mask_asym;
+ case SVC_DC:
+ return thrd_mask_cpr;
+ case SVC_DCC:
+ return thrd_mask_dcc;
+ default:
+ return NULL;
+ }
+}
+
+static int get_rp_config(struct adf_accel_dev *accel_dev, struct adf_ring_config *rp_config,
+ unsigned int *num_services)
+{
+ unsigned int i, nservices;
+ unsigned long mask;
+ int ret, service;
+
+ ret = adf_get_service_mask(accel_dev, &mask);
+ if (ret)
+ return ret;
+
+ nservices = hweight_long(mask);
+ if (nservices > MAX_NUM_CONCURR_SVC)
+ return -EINVAL;
+
+ for (i = 0; i < nservices; i++) {
+ service = get_service(&mask);
+ if (service < 0)
+ return service;
+
+ rp_config[i].ring_type = get_ring_type(service);
+ rp_config[i].thrd_mask = get_thrd_mask(service);
+
+ /*
+ * If there is only one service enabled, use all ring pairs for
+ * that service.
+ * If there are two services enabled, use ring pairs 0 and 2 for
+ * one service and ring pairs 1 and 3 for the other service.
+ */
+ switch (nservices) {
+ case ADF_ONE_SERVICE:
+ rp_config[i].ring_mask = RP_GROUP_ALL_MASK;
+ break;
+ case ADF_TWO_SERVICES:
+ rp_config[i].ring_mask = rmask_two_services[i];
+ break;
+ case ADF_THREE_SERVICES:
+ rp_config[i].ring_mask = BIT(i);
+
+ /* If ASYM is enabled, use additional ring pair */
+ if (service == SVC_ASYM)
+ rp_config[i].ring_mask |= BIT(RP3);
+
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ *num_services = nservices;
+
+ return 0;
+}
+
+static u32 adf_gen6_get_arb_mask(struct adf_accel_dev *accel_dev, unsigned int ae)
+{
+ struct adf_ring_config rp_config[MAX_NUM_CONCURR_SVC];
+ unsigned int num_services, i, thrd;
+ u32 ring_mask, thd2arb_mask = 0;
+ const unsigned long *p_mask;
+
+ if (get_rp_config(accel_dev, rp_config, &num_services))
+ return 0;
+
+ /*
+ * The thd2arb_mask maps ring pairs to threads within an accelerator engine.
+ * It ensures that jobs submitted to ring pairs are scheduled on threads capable
+ * of handling the specified service type.
+ *
+ * Each group of 4 bits in the mask corresponds to a thread, with each bit
+ * indicating whether a job from a ring pair can be scheduled on that thread.
+ * The use of 4 bits is due to the organization of ring pairs into groups of
+ * four, where each group shares the same configuration.
+ */
+ for (i = 0; i < num_services; i++) {
+ p_mask = &rp_config[i].thrd_mask[ae];
+ ring_mask = rp_config[i].ring_mask;
+
+ for_each_set_bit(thrd, p_mask, ADF_NUM_THREADS_PER_AE)
+ thd2arb_mask |= ring_mask << (thrd * 4);
+ }
+
+ return thd2arb_mask;
+}
+
+static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev)
+{
+ enum adf_cfg_service_type rps[ADF_GEN6_NUM_BANKS_PER_VF] = { };
+ struct adf_ring_config rp_config[MAX_NUM_CONCURR_SVC];
+ unsigned int num_services, rp_num, i;
+ unsigned long cfg_mask;
+ u16 ring_to_svc_map;
+
+ if (get_rp_config(accel_dev, rp_config, &num_services))
+ return 0;
+
+ /*
+ * Loop through the configured services and populate the `rps` array that
+ * contains what service that particular ring pair can handle (i.e. symmetric
+ * crypto, asymmetric crypto, data compression or compression chaining).
+ */
+ for (i = 0; i < num_services; i++) {
+ cfg_mask = rp_config[i].ring_mask;
+ for_each_set_bit(rp_num, &cfg_mask, ADF_GEN6_NUM_BANKS_PER_VF)
+ rps[rp_num] = rp_config[i].ring_type;
+ }
+
+ /*
+ * The ring_mask is structured into segments of 3 bits, with each
+ * segment representing the service configuration for a specific ring pair.
+ * Since ring pairs are organized into groups of 4, the ring_mask contains 4
+ * such 3-bit segments, each corresponding to one ring pair.
+ *
+ * The device has 64 ring pairs, which are organized in groups of 4, namely
+ * 16 groups. Each group has the same configuration, represented here by
+ * `ring_to_svc_map`.
+ */
+ ring_to_svc_map = rps[RP0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT |
+ rps[RP1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT |
+ rps[RP2] << ADF_CFG_SERV_RING_PAIR_2_SHIFT |
+ rps[RP3] << ADF_CFG_SERV_RING_PAIR_3_SHIFT;
+
+ return ring_to_svc_map;
+}
+
+static u32 get_accel_mask(struct adf_hw_device_data *self)
+{
+ return ADF_GEN6_ACCELERATORS_MASK;
+}
+
+static u32 get_num_accels(struct adf_hw_device_data *self)
+{
+ return ADF_GEN6_MAX_ACCELERATORS;
+}
+
+static u32 get_num_aes(struct adf_hw_device_data *self)
+{
+ return self ? hweight32(self->ae_mask) : 0;
+}
+
+static u32 get_misc_bar_id(struct adf_hw_device_data *self)
+{
+ return ADF_GEN6_PMISC_BAR;
+}
+
+static u32 get_etr_bar_id(struct adf_hw_device_data *self)
+{
+ return ADF_GEN6_ETR_BAR;
+}
+
+static u32 get_sram_bar_id(struct adf_hw_device_data *self)
+{
+ return ADF_GEN6_SRAM_BAR;
+}
+
+static enum dev_sku_info get_sku(struct adf_hw_device_data *self)
+{
+ return DEV_SKU_1;
+}
+
+static void get_arb_info(struct arb_info *arb_info)
+{
+ arb_info->arb_cfg = ADF_GEN6_ARB_CONFIG;
+ arb_info->arb_offset = ADF_GEN6_ARB_OFFSET;
+ arb_info->wt2sam_offset = ADF_GEN6_ARB_WRK_2_SER_MAP_OFFSET;
+}
+
+static void get_admin_info(struct admin_info *admin_csrs_info)
+{
+ admin_csrs_info->mailbox_offset = ADF_GEN6_MAILBOX_BASE_OFFSET;
+ admin_csrs_info->admin_msg_ur = ADF_GEN6_ADMINMSGUR_OFFSET;
+ admin_csrs_info->admin_msg_lr = ADF_GEN6_ADMINMSGLR_OFFSET;
+}
+
+static u32 get_heartbeat_clock(struct adf_hw_device_data *self)
+{
+ return ADF_GEN6_COUNTER_FREQ;
+}
+
+static void enable_error_correction(struct adf_accel_dev *accel_dev)
+{
+ void __iomem *csr = adf_get_pmisc_base(accel_dev);
+
+ /*
+ * Enable all error notification bits in errsou3 except VFLR
+ * notification on host.
+ */
+ ADF_CSR_WR(csr, ADF_GEN6_ERRMSK3, ADF_GEN6_VFLNOTIFY);
+}
+
+static void enable_ints(struct adf_accel_dev *accel_dev)
+{
+ void __iomem *addr = adf_get_pmisc_base(accel_dev);
+
+ /* Enable bundle interrupts */
+ ADF_CSR_WR(addr, ADF_GEN6_SMIAPF_RP_X0_MASK_OFFSET, 0);
+ ADF_CSR_WR(addr, ADF_GEN6_SMIAPF_RP_X1_MASK_OFFSET, 0);
+
+ /* Enable misc interrupts */
+ ADF_CSR_WR(addr, ADF_GEN6_SMIAPF_MASK_OFFSET, 0);
+}
+
+static void set_ssm_wdtimer(struct adf_accel_dev *accel_dev)
+{
+ void __iomem *addr = adf_get_pmisc_base(accel_dev);
+ u64 val_pke = ADF_SSM_WDT_PKE_DEFAULT_VALUE;
+ u64 val = ADF_SSM_WDT_DEFAULT_VALUE;
+
+ /* Enable watchdog timer for sym and dc */
+ ADF_CSR_WR64_LO_HI(addr, ADF_SSMWDTATHL_OFFSET, ADF_SSMWDTATHH_OFFSET, val);
+ ADF_CSR_WR64_LO_HI(addr, ADF_SSMWDTCNVL_OFFSET, ADF_SSMWDTCNVH_OFFSET, val);
+ ADF_CSR_WR64_LO_HI(addr, ADF_SSMWDTUCSL_OFFSET, ADF_SSMWDTUCSH_OFFSET, val);
+ ADF_CSR_WR64_LO_HI(addr, ADF_SSMWDTDCPRL_OFFSET, ADF_SSMWDTDCPRH_OFFSET, val);
+
+ /* Enable watchdog timer for pke */
+ ADF_CSR_WR64_LO_HI(addr, ADF_SSMWDTPKEL_OFFSET, ADF_SSMWDTPKEH_OFFSET, val_pke);
+}
+
+/*
+ * The vector routing table is used to select the MSI-X entry to use for each
+ * interrupt source.
+ * The first ADF_GEN6_ETR_MAX_BANKS entries correspond to ring interrupts.
+ * The final entry corresponds to VF2PF or error interrupts.
+ * This vector table could be used to configure one MSI-X entry to be shared
+ * between multiple interrupt sources.
+ *
+ * The default routing is set to have a one to one correspondence between the
+ * interrupt source and the MSI-X entry used.
+ */
+static void set_msix_default_rttable(struct adf_accel_dev *accel_dev)
+{
+ void __iomem *csr = adf_get_pmisc_base(accel_dev);
+ unsigned int i;
+
+ for (i = 0; i <= ADF_GEN6_ETR_MAX_BANKS; i++)
+ ADF_CSR_WR(csr, ADF_GEN6_MSIX_RTTABLE_OFFSET(i), i);
+}
+
+static int reset_ring_pair(void __iomem *csr, u32 bank_number)
+{
+ u32 status;
+ int ret;
+
+ /*
+ * Write rpresetctl register BIT(0) as 1.
+ * Since rpresetctl registers have no RW fields, no need to preserve
+ * values for other bits. Just write directly.
+ */
+ ADF_CSR_WR(csr, ADF_WQM_CSR_RPRESETCTL(bank_number),
+ ADF_WQM_CSR_RPRESETCTL_RESET);
+
+ /* Read rpresetsts register and wait for rp reset to complete */
+ ret = read_poll_timeout(ADF_CSR_RD, status,
+ status & ADF_WQM_CSR_RPRESETSTS_STATUS,
+ ADF_RPRESET_POLL_DELAY_US,
+ ADF_RPRESET_POLL_TIMEOUT_US, true,
+ csr, ADF_WQM_CSR_RPRESETSTS(bank_number));
+ if (ret)
+ return ret;
+
+ /* When ring pair reset is done, clear rpresetsts */
+ ADF_CSR_WR(csr, ADF_WQM_CSR_RPRESETSTS(bank_number), ADF_WQM_CSR_RPRESETSTS_STATUS);
+
+ return 0;
+}
+
+static int ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number)
+{
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ void __iomem *csr = adf_get_etr_base(accel_dev);
+ int ret;
+
+ if (bank_number >= hw_data->num_banks)
+ return -EINVAL;
+
+ dev_dbg(&GET_DEV(accel_dev), "ring pair reset for bank:%d\n", bank_number);
+
+ ret = reset_ring_pair(csr, bank_number);
+ if (ret)
+ dev_err(&GET_DEV(accel_dev), "ring pair reset failed (timeout)\n");
+ else
+ dev_dbg(&GET_DEV(accel_dev), "ring pair reset successful\n");
+
+ return ret;
+}
+
+static int build_comp_block(void *ctx, enum adf_dc_algo algo)
+{
+ struct icp_qat_fw_comp_req *req_tmpl = ctx;
+ struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars;
+ struct icp_qat_hw_comp_51_config_csr_lower hw_comp_lower_csr = { };
+ struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr;
+ u32 lower_val;
+
+ switch (algo) {
+ case QAT_DEFLATE:
+ header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DYNAMIC;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_51_LLLBD_CTRL_LLLBD_DISABLED;
+ hw_comp_lower_csr.sd = ICP_QAT_HW_COMP_51_SEARCH_DEPTH_LEVEL_1;
+ lower_val = ICP_QAT_FW_COMP_51_BUILD_CONFIG_LOWER(hw_comp_lower_csr);
+ cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val;
+ cd_pars->u.sl.comp_slice_cfg_word[1] = 0;
+
+ return 0;
+}
+
+static int build_decomp_block(void *ctx, enum adf_dc_algo algo)
+{
+ struct icp_qat_fw_comp_req *req_tmpl = ctx;
+ struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars;
+ struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr;
+
+ switch (algo) {
+ case QAT_DEFLATE:
+ header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ cd_pars->u.sl.comp_slice_cfg_word[0] = 0;
+ cd_pars->u.sl.comp_slice_cfg_word[1] = 0;
+
+ return 0;
+}
+
+static void adf_gen6_init_dc_ops(struct adf_dc_ops *dc_ops)
+{
+ dc_ops->build_comp_block = build_comp_block;
+ dc_ops->build_decomp_block = build_decomp_block;
+}
+
+static int adf_gen6_init_thd2arb_map(struct adf_accel_dev *accel_dev)
+{
+ struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev);
+ u32 *thd2arb_map = hw_data->thd_to_arb_map;
+ unsigned int i;
+
+ for (i = 0; i < hw_data->num_engines; i++) {
+ thd2arb_map[i] = adf_gen6_get_arb_mask(accel_dev, i);
+ dev_dbg(&GET_DEV(accel_dev), "ME:%d arb_mask:%#x\n", i, thd2arb_map[i]);
+ }
+
+ return 0;
+}
+
+static void set_vc_csr_for_bank(void __iomem *csr, u32 bank_number)
+{
+ u32 value;
+
+ /*
+ * After each PF FLR, for each of the 64 ring pairs in the PF, the
+ * driver must program the ringmodectl CSRs.
+ */
+ value = ADF_CSR_RD(csr, ADF_GEN6_CSR_RINGMODECTL(bank_number));
+ value |= FIELD_PREP(ADF_GEN6_RINGMODECTL_TC_MASK, ADF_GEN6_RINGMODECTL_TC_DEFAULT);
+ value |= FIELD_PREP(ADF_GEN6_RINGMODECTL_TC_EN_MASK, ADF_GEN6_RINGMODECTL_TC_EN_OP1);
+ ADF_CSR_WR(csr, ADF_GEN6_CSR_RINGMODECTL(bank_number), value);
+}
+
+static int set_vc_config(struct adf_accel_dev *accel_dev)
+{
+ struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
+ u32 value;
+ int err;
+
+ /*
+ * After each PF FLR, the driver must program the Port Virtual Channel (VC)
+ * Control Registers.
+ * Read PVC0CTL then write the masked values.
+ */
+ pci_read_config_dword(pdev, ADF_GEN6_PVC0CTL_OFFSET, &value);
+ value |= FIELD_PREP(ADF_GEN6_PVC0CTL_TCVCMAP_MASK, ADF_GEN6_PVC0CTL_TCVCMAP_DEFAULT);
+ err = pci_write_config_dword(pdev, ADF_GEN6_PVC0CTL_OFFSET, value);
+ if (err) {
+ dev_err(&GET_DEV(accel_dev), "pci write to PVC0CTL failed\n");
+ return pcibios_err_to_errno(err);
+ }
+
+ /* Read PVC1CTL then write masked values */
+ pci_read_config_dword(pdev, ADF_GEN6_PVC1CTL_OFFSET, &value);
+ value |= FIELD_PREP(ADF_GEN6_PVC1CTL_TCVCMAP_MASK, ADF_GEN6_PVC1CTL_TCVCMAP_DEFAULT);
+ value |= FIELD_PREP(ADF_GEN6_PVC1CTL_VCEN_MASK, ADF_GEN6_PVC1CTL_VCEN_ON);
+ err = pci_write_config_dword(pdev, ADF_GEN6_PVC1CTL_OFFSET, value);
+ if (err)
+ dev_err(&GET_DEV(accel_dev), "pci write to PVC1CTL failed\n");
+
+ return pcibios_err_to_errno(err);
+}
+
+static int adf_gen6_set_vc(struct adf_accel_dev *accel_dev)
+{
+ struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev);
+ void __iomem *csr = adf_get_etr_base(accel_dev);
+ u32 i;
+
+ for (i = 0; i < hw_data->num_banks; i++) {
+ dev_dbg(&GET_DEV(accel_dev), "set virtual channels for bank:%d\n", i);
+ set_vc_csr_for_bank(csr, i);
+ }
+
+ return set_vc_config(accel_dev);
+}
+
+static u32 get_ae_mask(struct adf_hw_device_data *self)
+{
+ unsigned long fuses = self->fuses[ADF_FUSECTL4];
+ u32 mask = ADF_6XXX_ACCELENGINES_MASK;
+
+ /*
+ * If bit 0 is set in the fuses, the first 4 engines are disabled.
+ * If bit 4 is set, the second group of 4 engines are disabled.
+ * If bit 8 is set, the admin engine (bit 8) is disabled.
+ */
+ if (test_bit(0, &fuses))
+ mask &= ~ADF_AE_GROUP_0;
+
+ if (test_bit(4, &fuses))
+ mask &= ~ADF_AE_GROUP_1;
+
+ if (test_bit(8, &fuses))
+ mask &= ~ADF_AE_GROUP_2;
+
+ return mask;
+}
+
+static u32 get_accel_cap(struct adf_accel_dev *accel_dev)
+{
+ u32 capabilities_sym, capabilities_asym;
+ u32 capabilities_dc;
+ unsigned long mask;
+ u32 caps = 0;
+ u32 fusectl1;
+
+ fusectl1 = GET_HW_DATA(accel_dev)->fuses[ADF_FUSECTL1];
+
+ /* Read accelerator capabilities mask */
+ capabilities_sym = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC |
+ ICP_ACCEL_CAPABILITIES_CIPHER |
+ ICP_ACCEL_CAPABILITIES_AUTHENTICATION |
+ ICP_ACCEL_CAPABILITIES_SHA3 |
+ ICP_ACCEL_CAPABILITIES_SHA3_EXT |
+ ICP_ACCEL_CAPABILITIES_CHACHA_POLY |
+ ICP_ACCEL_CAPABILITIES_AESGCM_SPC |
+ ICP_ACCEL_CAPABILITIES_AES_V2;
+
+ /* A set bit in fusectl1 means the corresponding feature is OFF in this SKU */
+ if (fusectl1 & ICP_ACCEL_GEN6_MASK_UCS_SLICE) {
+ capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC;
+ capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER;
+ capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CHACHA_POLY;
+ capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AESGCM_SPC;
+ capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AES_V2;
+ capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER;
+ }
+ if (fusectl1 & ICP_ACCEL_GEN6_MASK_AUTH_SLICE) {
+ capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION;
+ capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SHA3;
+ capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SHA3_EXT;
+ capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER;
+ }
+
+ capabilities_asym = 0;
+
+ capabilities_dc = ICP_ACCEL_CAPABILITIES_COMPRESSION |
+ ICP_ACCEL_CAPABILITIES_LZ4_COMPRESSION |
+ ICP_ACCEL_CAPABILITIES_LZ4S_COMPRESSION |
+ ICP_ACCEL_CAPABILITIES_CNV_INTEGRITY64;
+
+ if (fusectl1 & ICP_ACCEL_GEN6_MASK_CPR_SLICE) {
+ capabilities_dc &= ~ICP_ACCEL_CAPABILITIES_COMPRESSION;
+ capabilities_dc &= ~ICP_ACCEL_CAPABILITIES_LZ4_COMPRESSION;
+ capabilities_dc &= ~ICP_ACCEL_CAPABILITIES_LZ4S_COMPRESSION;
+ capabilities_dc &= ~ICP_ACCEL_CAPABILITIES_CNV_INTEGRITY64;
+ }
+
+ if (adf_get_service_mask(accel_dev, &mask))
+ return 0;
+
+ if (test_bit(SVC_ASYM, &mask))
+ caps |= capabilities_asym;
+ if (test_bit(SVC_SYM, &mask))
+ caps |= capabilities_sym;
+ if (test_bit(SVC_DC, &mask))
+ caps |= capabilities_dc;
+ if (test_bit(SVC_DCC, &mask)) {
+ /*
+ * Sym capabilities are available for chaining operations,
+ * but sym crypto instances cannot be supported
+ */
+ caps = capabilities_dc | capabilities_sym;
+ caps &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC;
+ }
+
+ return caps;
+}
+
+static u32 uof_get_num_objs(struct adf_accel_dev *accel_dev)
+{
+ return ARRAY_SIZE(adf_default_fw_config);
+}
+
+static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num)
+{
+ int num_fw_objs = ARRAY_SIZE(adf_6xxx_fw_objs);
+ int id;
+
+ id = adf_default_fw_config[obj_num].obj;
+ if (id >= num_fw_objs)
+ return NULL;
+
+ return adf_6xxx_fw_objs[id];
+}
+
+static const char *uof_get_name_6xxx(struct adf_accel_dev *accel_dev, u32 obj_num)
+{
+ return uof_get_name(accel_dev, obj_num);
+}
+
+static int uof_get_obj_type(struct adf_accel_dev *accel_dev, u32 obj_num)
+{
+ if (obj_num >= uof_get_num_objs(accel_dev))
+ return -EINVAL;
+
+ return adf_default_fw_config[obj_num].obj;
+}
+
+static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num)
+{
+ return adf_default_fw_config[obj_num].ae_mask;
+}
+
+static const u32 *adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev)
+{
+ if (adf_gen6_init_thd2arb_map(accel_dev))
+ dev_warn(&GET_DEV(accel_dev),
+ "Failed to generate thread to arbiter mapping");
+
+ return GET_HW_DATA(accel_dev)->thd_to_arb_map;
+}
+
+static int adf_init_device(struct adf_accel_dev *accel_dev)
+{
+ void __iomem *addr = adf_get_pmisc_base(accel_dev);
+ u32 status;
+ u32 csr;
+ int ret;
+
+ /* Temporarily mask PM interrupt */
+ csr = ADF_CSR_RD(addr, ADF_GEN6_ERRMSK2);
+ csr |= ADF_GEN6_PM_SOU;
+ ADF_CSR_WR(addr, ADF_GEN6_ERRMSK2, csr);
+
+ /* Set DRV_ACTIVE bit to power up the device */
+ ADF_CSR_WR(addr, ADF_GEN6_PM_INTERRUPT, ADF_GEN6_PM_DRV_ACTIVE);
+
+ /* Poll status register to make sure the device is powered up */
+ ret = read_poll_timeout(ADF_CSR_RD, status,
+ status & ADF_GEN6_PM_INIT_STATE,
+ ADF_GEN6_PM_POLL_DELAY_US,
+ ADF_GEN6_PM_POLL_TIMEOUT_US, true, addr,
+ ADF_GEN6_PM_STATUS);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev), "Failed to power up the device\n");
+ return ret;
+ }
+
+ dev_dbg(&GET_DEV(accel_dev), "Setting virtual channels for device qat_dev%d\n",
+ accel_dev->accel_id);
+
+ ret = adf_gen6_set_vc(accel_dev);
+ if (ret)
+ dev_err(&GET_DEV(accel_dev), "Failed to set virtual channels\n");
+
+ return ret;
+}
+
+static int enable_pm(struct adf_accel_dev *accel_dev)
+{
+ return adf_init_admin_pm(accel_dev, ADF_GEN6_PM_DEFAULT_IDLE_FILTER);
+}
+
+static int dev_config(struct adf_accel_dev *accel_dev)
+{
+ int ret;
+
+ ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC);
+ if (ret)
+ return ret;
+
+ ret = adf_cfg_section_add(accel_dev, "Accelerator0");
+ if (ret)
+ return ret;
+
+ switch (adf_get_service_enabled(accel_dev)) {
+ case SVC_DC:
+ case SVC_DCC:
+ ret = adf_gen6_comp_dev_config(accel_dev);
+ break;
+ default:
+ ret = adf_gen6_no_dev_config(accel_dev);
+ break;
+ }
+ if (ret)
+ return ret;
+
+ __set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
+
+ return ret;
+}
+
+void adf_init_hw_data_6xxx(struct adf_hw_device_data *hw_data)
+{
+ hw_data->dev_class = &adf_6xxx_class;
+ hw_data->instance_id = adf_6xxx_class.instances++;
+ hw_data->num_banks = ADF_GEN6_ETR_MAX_BANKS;
+ hw_data->num_banks_per_vf = ADF_GEN6_NUM_BANKS_PER_VF;
+ hw_data->num_rings_per_bank = ADF_GEN6_NUM_RINGS_PER_BANK;
+ hw_data->num_accel = ADF_GEN6_MAX_ACCELERATORS;
+ hw_data->num_engines = ADF_6XXX_MAX_ACCELENGINES;
+ hw_data->num_logical_accel = 1;
+ hw_data->tx_rx_gap = ADF_GEN6_RX_RINGS_OFFSET;
+ hw_data->tx_rings_mask = ADF_GEN6_TX_RINGS_MASK;
+ hw_data->ring_to_svc_map = 0;
+ hw_data->alloc_irq = adf_isr_resource_alloc;
+ hw_data->free_irq = adf_isr_resource_free;
+ hw_data->enable_error_correction = enable_error_correction;
+ hw_data->get_accel_mask = get_accel_mask;
+ hw_data->get_ae_mask = get_ae_mask;
+ hw_data->get_num_accels = get_num_accels;
+ hw_data->get_num_aes = get_num_aes;
+ hw_data->get_sram_bar_id = get_sram_bar_id;
+ hw_data->get_etr_bar_id = get_etr_bar_id;
+ hw_data->get_misc_bar_id = get_misc_bar_id;
+ hw_data->get_arb_info = get_arb_info;
+ hw_data->get_admin_info = get_admin_info;
+ hw_data->get_accel_cap = get_accel_cap;
+ hw_data->get_sku = get_sku;
+ hw_data->init_admin_comms = adf_init_admin_comms;
+ hw_data->exit_admin_comms = adf_exit_admin_comms;
+ hw_data->send_admin_init = adf_send_admin_init;
+ hw_data->init_arb = adf_init_arb;
+ hw_data->exit_arb = adf_exit_arb;
+ hw_data->get_arb_mapping = adf_get_arbiter_mapping;
+ hw_data->enable_ints = enable_ints;
+ hw_data->reset_device = adf_reset_flr;
+ hw_data->admin_ae_mask = ADF_6XXX_ADMIN_AE_MASK;
+ hw_data->fw_name = ADF_6XXX_FW;
+ hw_data->fw_mmp_name = ADF_6XXX_MMP;
+ hw_data->uof_get_name = uof_get_name_6xxx;
+ hw_data->uof_get_num_objs = uof_get_num_objs;
+ hw_data->uof_get_obj_type = uof_get_obj_type;
+ hw_data->uof_get_ae_mask = uof_get_ae_mask;
+ hw_data->set_msix_rttable = set_msix_default_rttable;
+ hw_data->set_ssm_wdtimer = set_ssm_wdtimer;
+ hw_data->get_ring_to_svc_map = get_ring_to_svc_map;
+ hw_data->disable_iov = adf_disable_sriov;
+ hw_data->ring_pair_reset = ring_pair_reset;
+ hw_data->dev_config = dev_config;
+ hw_data->get_hb_clock = get_heartbeat_clock;
+ hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE;
+ hw_data->start_timer = adf_timer_start;
+ hw_data->stop_timer = adf_timer_stop;
+ hw_data->init_device = adf_init_device;
+ hw_data->enable_pm = enable_pm;
+ hw_data->services_supported = services_supported;
+
+ adf_gen6_init_hw_csr_ops(&hw_data->csr_ops);
+ adf_gen6_init_pf_pfvf_ops(&hw_data->pfvf_ops);
+ adf_gen6_init_dc_ops(&hw_data->dc_ops);
+ adf_gen6_init_ras_ops(&hw_data->ras_ops);
+}
+
+void adf_clean_hw_data_6xxx(struct adf_hw_device_data *hw_data)
+{
+ if (hw_data->dev_class->instances)
+ hw_data->dev_class->instances--;
+}
diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h
new file mode 100644
index 000000000000..78e2e2c5816e
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef ADF_6XXX_HW_DATA_H_
+#define ADF_6XXX_HW_DATA_H_
+
+#include <linux/bits.h>
+#include <linux/time.h>
+#include <linux/units.h>
+
+#include "adf_accel_devices.h"
+#include "adf_cfg_common.h"
+#include "adf_dc.h"
+
+/* PCIe configuration space */
+#define ADF_GEN6_BAR_MASK (BIT(0) | BIT(2) | BIT(4))
+#define ADF_GEN6_SRAM_BAR 0
+#define ADF_GEN6_PMISC_BAR 1
+#define ADF_GEN6_ETR_BAR 2
+#define ADF_6XXX_MAX_ACCELENGINES 9
+
+/* Clocks frequency */
+#define ADF_GEN6_COUNTER_FREQ (100 * HZ_PER_MHZ)
+
+/* Physical function fuses */
+#define ADF_GEN6_FUSECTL0_OFFSET 0x2C8
+#define ADF_GEN6_FUSECTL1_OFFSET 0x2CC
+#define ADF_GEN6_FUSECTL4_OFFSET 0x2D8
+
+/* Accelerators */
+#define ADF_GEN6_ACCELERATORS_MASK 0x1
+#define ADF_GEN6_MAX_ACCELERATORS 1
+
+/* MSI-X interrupt */
+#define ADF_GEN6_SMIAPF_RP_X0_MASK_OFFSET 0x41A040
+#define ADF_GEN6_SMIAPF_RP_X1_MASK_OFFSET 0x41A044
+#define ADF_GEN6_SMIAPF_MASK_OFFSET 0x41A084
+#define ADF_GEN6_MSIX_RTTABLE_OFFSET(i) (0x409000 + ((i) * 4))
+
+/* Bank and ring configuration */
+#define ADF_GEN6_NUM_RINGS_PER_BANK 2
+#define ADF_GEN6_NUM_BANKS_PER_VF 4
+#define ADF_GEN6_ETR_MAX_BANKS 64
+#define ADF_GEN6_RX_RINGS_OFFSET 1
+#define ADF_GEN6_TX_RINGS_MASK 0x1
+
+/* Arbiter configuration */
+#define ADF_GEN6_ARB_CONFIG (BIT(31) | BIT(6) | BIT(0))
+#define ADF_GEN6_ARB_OFFSET 0x000
+#define ADF_GEN6_ARB_WRK_2_SER_MAP_OFFSET 0x400
+
+/* Admin interface configuration */
+#define ADF_GEN6_ADMINMSGUR_OFFSET 0x500574
+#define ADF_GEN6_ADMINMSGLR_OFFSET 0x500578
+#define ADF_GEN6_MAILBOX_BASE_OFFSET 0x600970
+
+/*
+ * Watchdog timers
+ * Timeout is in cycles. Clock speed may vary across products but this
+ * value should be a few milli-seconds.
+ */
+#define ADF_SSM_WDT_DEFAULT_VALUE 0x7000000ULL
+#define ADF_SSM_WDT_PKE_DEFAULT_VALUE 0x8000000ULL
+#define ADF_SSMWDTATHL_OFFSET 0x5208
+#define ADF_SSMWDTATHH_OFFSET 0x520C
+#define ADF_SSMWDTCNVL_OFFSET 0x5408
+#define ADF_SSMWDTCNVH_OFFSET 0x540C
+#define ADF_SSMWDTUCSL_OFFSET 0x5808
+#define ADF_SSMWDTUCSH_OFFSET 0x580C
+#define ADF_SSMWDTDCPRL_OFFSET 0x5A08
+#define ADF_SSMWDTDCPRH_OFFSET 0x5A0C
+#define ADF_SSMWDTPKEL_OFFSET 0x5E08
+#define ADF_SSMWDTPKEH_OFFSET 0x5E0C
+
+/* Ring reset */
+#define ADF_RPRESET_POLL_TIMEOUT_US (5 * USEC_PER_SEC)
+#define ADF_RPRESET_POLL_DELAY_US 20
+#define ADF_WQM_CSR_RPRESETCTL_RESET BIT(0)
+#define ADF_WQM_CSR_RPRESETCTL(bank) (0x6000 + (bank) * 8)
+#define ADF_WQM_CSR_RPRESETSTS_STATUS BIT(0)
+#define ADF_WQM_CSR_RPRESETSTS(bank) (ADF_WQM_CSR_RPRESETCTL(bank) + 4)
+
+/* Controls and sets up the corresponding ring mode of operation */
+#define ADF_GEN6_CSR_RINGMODECTL(bank) (0x9000 + (bank) * 4)
+
+/* Specifies the traffic class to use for the transactions to/from the ring */
+#define ADF_GEN6_RINGMODECTL_TC_MASK GENMASK(18, 16)
+#define ADF_GEN6_RINGMODECTL_TC_DEFAULT 0x7
+
+/* Specifies usage of tc for the transactions to/from this ring */
+#define ADF_GEN6_RINGMODECTL_TC_EN_MASK GENMASK(20, 19)
+
+/*
+ * Use the value programmed in the tc field for request descriptor
+ * and metadata read transactions
+ */
+#define ADF_GEN6_RINGMODECTL_TC_EN_OP1 0x1
+
+/* VC0 Resource Control Register */
+#define ADF_GEN6_PVC0CTL_OFFSET 0x204
+#define ADF_GEN6_PVC0CTL_TCVCMAP_OFFSET 1
+#define ADF_GEN6_PVC0CTL_TCVCMAP_MASK GENMASK(7, 1)
+#define ADF_GEN6_PVC0CTL_TCVCMAP_DEFAULT 0x7F
+
+/* VC1 Resource Control Register */
+#define ADF_GEN6_PVC1CTL_OFFSET 0x210
+#define ADF_GEN6_PVC1CTL_TCVCMAP_OFFSET 1
+#define ADF_GEN6_PVC1CTL_TCVCMAP_MASK GENMASK(7, 1)
+#define ADF_GEN6_PVC1CTL_TCVCMAP_DEFAULT 0x40
+#define ADF_GEN6_PVC1CTL_VCEN_OFFSET 31
+#define ADF_GEN6_PVC1CTL_VCEN_MASK BIT(31)
+/* RW bit: 0x1 - enables a Virtual Channel, 0x0 - disables */
+#define ADF_GEN6_PVC1CTL_VCEN_ON 0x1
+
+/* Error source mask registers */
+#define ADF_GEN6_ERRMSK0 0x41A210
+#define ADF_GEN6_ERRMSK1 0x41A214
+#define ADF_GEN6_ERRMSK2 0x41A218
+#define ADF_GEN6_ERRMSK3 0x41A21C
+
+#define ADF_GEN6_VFLNOTIFY BIT(7)
+
+/* Number of heartbeat counter pairs */
+#define ADF_NUM_HB_CNT_PER_AE ADF_NUM_THREADS_PER_AE
+
+/* Physical function fuses */
+#define ADF_6XXX_ACCELENGINES_MASK GENMASK(8, 0)
+#define ADF_6XXX_ADMIN_AE_MASK GENMASK(8, 8)
+
+/* Firmware binaries */
+#define ADF_6XXX_FW "qat_6xxx.bin"
+#define ADF_6XXX_MMP "qat_6xxx_mmp.bin"
+#define ADF_6XXX_CY_OBJ "qat_6xxx_cy.bin"
+#define ADF_6XXX_DC_OBJ "qat_6xxx_dc.bin"
+#define ADF_6XXX_ADMIN_OBJ "qat_6xxx_admin.bin"
+
+enum icp_qat_gen6_slice_mask {
+ ICP_ACCEL_GEN6_MASK_UCS_SLICE = BIT(0),
+ ICP_ACCEL_GEN6_MASK_AUTH_SLICE = BIT(1),
+ ICP_ACCEL_GEN6_MASK_PKE_SLICE = BIT(2),
+ ICP_ACCEL_GEN6_MASK_CPR_SLICE = BIT(3),
+ ICP_ACCEL_GEN6_MASK_DCPRZ_SLICE = BIT(4),
+ ICP_ACCEL_GEN6_MASK_WCP_WAT_SLICE = BIT(6),
+};
+
+void adf_init_hw_data_6xxx(struct adf_hw_device_data *hw_data);
+void adf_clean_hw_data_6xxx(struct adf_hw_device_data *hw_data);
+
+#endif /* ADF_6XXX_HW_DATA_H_ */
diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c
new file mode 100644
index 000000000000..c1dc9c56fdf5
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2025 Intel Corporation */
+#include <linux/array_size.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+
+#include <adf_accel_devices.h>
+#include <adf_cfg.h>
+#include <adf_common_drv.h>
+#include <adf_dbgfs.h>
+
+#include "adf_gen6_shared.h"
+#include "adf_6xxx_hw_data.h"
+
+static int bar_map[] = {
+ 0, /* SRAM */
+ 2, /* PMISC */
+ 4, /* ETR */
+};
+
+static void adf_device_down(void *accel_dev)
+{
+ adf_dev_down(accel_dev);
+}
+
+static void adf_dbgfs_cleanup(void *accel_dev)
+{
+ adf_dbgfs_exit(accel_dev);
+}
+
+static void adf_cfg_device_remove(void *accel_dev)
+{
+ adf_cfg_dev_remove(accel_dev);
+}
+
+static void adf_cleanup_hw_data(void *accel_dev)
+{
+ struct adf_accel_dev *accel_device = accel_dev;
+
+ if (accel_device->hw_device) {
+ adf_clean_hw_data_6xxx(accel_device->hw_device);
+ accel_device->hw_device = NULL;
+ }
+}
+
+static void adf_devmgr_remove(void *accel_dev)
+{
+ adf_devmgr_rm_dev(accel_dev, NULL);
+}
+
+static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct adf_accel_pci *accel_pci_dev;
+ struct adf_hw_device_data *hw_data;
+ struct device *dev = &pdev->dev;
+ struct adf_accel_dev *accel_dev;
+ struct adf_bar *bar;
+ unsigned int i;
+ int ret;
+
+ if (num_possible_nodes() > 1 && dev_to_node(dev) < 0) {
+ /*
+ * If the accelerator is connected to a node with no memory
+ * there is no point in using the accelerator since the remote
+ * memory transaction will be very slow.
+ */
+ return dev_err_probe(dev, -EINVAL, "Invalid NUMA configuration.\n");
+ }
+
+ accel_dev = devm_kzalloc(dev, sizeof(*accel_dev), GFP_KERNEL);
+ if (!accel_dev)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&accel_dev->crypto_list);
+ INIT_LIST_HEAD(&accel_dev->list);
+ accel_pci_dev = &accel_dev->accel_pci_dev;
+ accel_pci_dev->pci_dev = pdev;
+ accel_dev->owner = THIS_MODULE;
+
+ hw_data = devm_kzalloc(dev, sizeof(*hw_data), GFP_KERNEL);
+ if (!hw_data)
+ return -ENOMEM;
+
+ pci_read_config_byte(pdev, PCI_REVISION_ID, &accel_pci_dev->revid);
+ pci_read_config_dword(pdev, ADF_GEN6_FUSECTL4_OFFSET, &hw_data->fuses[ADF_FUSECTL4]);
+ pci_read_config_dword(pdev, ADF_GEN6_FUSECTL0_OFFSET, &hw_data->fuses[ADF_FUSECTL0]);
+ pci_read_config_dword(pdev, ADF_GEN6_FUSECTL1_OFFSET, &hw_data->fuses[ADF_FUSECTL1]);
+
+ if (!(hw_data->fuses[ADF_FUSECTL1] & ICP_ACCEL_GEN6_MASK_WCP_WAT_SLICE))
+ return dev_err_probe(dev, -EFAULT, "Wireless mode is not supported.\n");
+
+ /* Enable PCI device */
+ ret = pcim_enable_device(pdev);
+ if (ret)
+ return dev_err_probe(dev, ret, "Cannot enable PCI device.\n");
+
+ ret = adf_devmgr_add_dev(accel_dev, NULL);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to add new accelerator device.\n");
+
+ ret = devm_add_action_or_reset(dev, adf_devmgr_remove, accel_dev);
+ if (ret)
+ return ret;
+
+ accel_dev->hw_device = hw_data;
+ adf_init_hw_data_6xxx(accel_dev->hw_device);
+
+ ret = devm_add_action_or_reset(dev, adf_cleanup_hw_data, accel_dev);
+ if (ret)
+ return ret;
+
+ /* Get Accelerators and Accelerator Engine masks */
+ hw_data->accel_mask = hw_data->get_accel_mask(hw_data);
+ hw_data->ae_mask = hw_data->get_ae_mask(hw_data);
+ accel_pci_dev->sku = hw_data->get_sku(hw_data);
+
+ /* If the device has no acceleration engines then ignore it */
+ if (!hw_data->accel_mask || !hw_data->ae_mask ||
+ (~hw_data->ae_mask & ADF_GEN6_ACCELERATORS_MASK)) {
+ ret = -EFAULT;
+ return dev_err_probe(dev, ret, "No acceleration units were found.\n");
+ }
+
+ /* Create device configuration table */
+ ret = adf_cfg_dev_add(accel_dev);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(dev, adf_cfg_device_remove, accel_dev);
+ if (ret)
+ return ret;
+
+ /* Set DMA identifier */
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+ if (ret)
+ return dev_err_probe(dev, ret, "No usable DMA configuration.\n");
+
+ ret = adf_gen6_cfg_dev_init(accel_dev);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to initialize configuration.\n");
+
+ /* Get accelerator capability mask */
+ hw_data->accel_capabilities_mask = hw_data->get_accel_cap(accel_dev);
+ if (!hw_data->accel_capabilities_mask) {
+ ret = -EINVAL;
+ return dev_err_probe(dev, ret, "Failed to get capabilities mask.\n");
+ }
+
+ for (i = 0; i < ARRAY_SIZE(bar_map); i++) {
+ bar = &accel_pci_dev->pci_bars[i];
+
+ /* Map 64-bit PCIe BAR */
+ bar->virt_addr = pcim_iomap_region(pdev, bar_map[i], pci_name(pdev));
+ if (IS_ERR(bar->virt_addr)) {
+ ret = PTR_ERR(bar->virt_addr);
+ return dev_err_probe(dev, ret, "Failed to ioremap PCI region.\n");
+ }
+ }
+
+ pci_set_master(pdev);
+
+ /*
+ * The PCI config space is saved at this point and will be restored
+ * after a Function Level Reset (FLR) as the FLR does not completely
+ * restore it.
+ */
+ ret = pci_save_state(pdev);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to save pci state.\n");
+
+ accel_dev->ras_errors.enabled = true;
+
+ adf_dbgfs_init(accel_dev);
+
+ ret = devm_add_action_or_reset(dev, adf_dbgfs_cleanup, accel_dev);
+ if (ret)
+ return ret;
+
+ ret = adf_dev_up(accel_dev, true);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(dev, adf_device_down, accel_dev);
+ if (ret)
+ return ret;
+
+ ret = adf_sysfs_init(accel_dev);
+
+ return ret;
+}
+
+static void adf_shutdown(struct pci_dev *pdev)
+{
+ struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
+
+ adf_dev_down(accel_dev);
+}
+
+static const struct pci_device_id adf_pci_tbl[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_6XXX) },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
+
+static struct pci_driver adf_driver = {
+ .id_table = adf_pci_tbl,
+ .name = ADF_6XXX_DEVICE_NAME,
+ .probe = adf_probe,
+ .shutdown = adf_shutdown,
+ .sriov_configure = adf_sriov_configure,
+ .err_handler = &adf_err_handler,
+};
+module_pci_driver(adf_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel");
+MODULE_FIRMWARE(ADF_6XXX_FW);
+MODULE_FIRMWARE(ADF_6XXX_MMP);
+MODULE_DESCRIPTION("Intel(R) QuickAssist Technology for GEN6 Devices");
+MODULE_SOFTDEP("pre: crypto-intel_qat");
+MODULE_IMPORT_NS("CRYPTO_QAT");
diff --git a/drivers/crypto/intel/qat/qat_c3xxx/Makefile b/drivers/crypto/intel/qat/qat_c3xxx/Makefile
index d9e568572da8..43604c025f0c 100644
--- a/drivers/crypto/intel/qat/qat_c3xxx/Makefile
+++ b/drivers/crypto/intel/qat/qat_c3xxx/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXX) += qat_c3xxx.o
qat_c3xxx-y := adf_drv.o adf_c3xxx_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
index e78f7bfd30b8..07f2c42a68f5 100644
--- a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
@@ -5,7 +5,6 @@
#include <adf_clock.h>
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
-#include <adf_gen2_dc.h>
#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
@@ -22,7 +21,6 @@ static const u32 thrd_to_arb_map[ADF_C3XXX_MAX_ACCELENGINES] = {
static struct adf_hw_device_class c3xxx_class = {
.name = ADF_C3XXX_DEVICE_NAME,
.type = DEV_C3XXX,
- .instances = 0
};
static u32 get_accel_mask(struct adf_hw_device_data *self)
diff --git a/drivers/crypto/intel/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_c3xxx/adf_drv.c
index b825b35ab4bf..bceb5dd8b148 100644
--- a/drivers/crypto/intel/qat/qat_c3xxx/adf_drv.c
+++ b/drivers/crypto/intel/qat/qat_c3xxx/adf_drv.c
@@ -19,24 +19,6 @@
#include <adf_dbgfs.h>
#include "adf_c3xxx_hw_data.h"
-static const struct pci_device_id adf_pci_tbl[] = {
- { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_C3XXX), },
- { }
-};
-MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
-
-static int adf_probe(struct pci_dev *dev, const struct pci_device_id *ent);
-static void adf_remove(struct pci_dev *dev);
-
-static struct pci_driver adf_driver = {
- .id_table = adf_pci_tbl,
- .name = ADF_C3XXX_DEVICE_NAME,
- .probe = adf_probe,
- .remove = adf_remove,
- .sriov_configure = adf_sriov_configure,
- .err_handler = &adf_err_handler,
-};
-
static void adf_cleanup_pci_dev(struct adf_accel_dev *accel_dev)
{
pci_release_regions(accel_dev->accel_pci_dev.pci_dev);
@@ -227,6 +209,29 @@ static void adf_remove(struct pci_dev *pdev)
kfree(accel_dev);
}
+static void adf_shutdown(struct pci_dev *pdev)
+{
+ struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
+
+ adf_dev_down(accel_dev);
+}
+
+static const struct pci_device_id adf_pci_tbl[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_C3XXX) },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
+
+static struct pci_driver adf_driver = {
+ .id_table = adf_pci_tbl,
+ .name = ADF_C3XXX_DEVICE_NAME,
+ .probe = adf_probe,
+ .remove = adf_remove,
+ .shutdown = adf_shutdown,
+ .sriov_configure = adf_sriov_configure,
+ .err_handler = &adf_err_handler,
+};
+
static int __init adfdrv_init(void)
{
request_module("intel_qat");
diff --git a/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile b/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile
index 31a908a211ac..03f6745b4aa2 100644
--- a/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile
+++ b/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXXVF) += qat_c3xxxvf.o
qat_c3xxxvf-y := adf_drv.o adf_c3xxxvf_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
index a512ca4efd3f..db3c33fa1881 100644
--- a/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
@@ -3,7 +3,6 @@
#include <adf_accel_devices.h>
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
-#include <adf_gen2_dc.h>
#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
@@ -13,7 +12,6 @@
static struct adf_hw_device_class c3xxxiov_class = {
.name = ADF_C3XXXVF_DEVICE_NAME,
.type = DEV_C3XXXVF,
- .instances = 0
};
static u32 get_accel_mask(struct adf_hw_device_data *self)
diff --git a/drivers/crypto/intel/qat/qat_c62x/Makefile b/drivers/crypto/intel/qat/qat_c62x/Makefile
index cbdaaa135e84..f3d722bef088 100644
--- a/drivers/crypto/intel/qat/qat_c62x/Makefile
+++ b/drivers/crypto/intel/qat/qat_c62x/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_C62X) += qat_c62x.o
qat_c62x-y := adf_drv.o adf_c62x_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
index 32ebe09477a8..0b410b41474d 100644
--- a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
@@ -5,7 +5,6 @@
#include <adf_clock.h>
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
-#include <adf_gen2_dc.h>
#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
@@ -22,7 +21,6 @@ static const u32 thrd_to_arb_map[ADF_C62X_MAX_ACCELENGINES] = {
static struct adf_hw_device_class c62x_class = {
.name = ADF_C62X_DEVICE_NAME,
.type = DEV_C62X,
- .instances = 0
};
static u32 get_accel_mask(struct adf_hw_device_data *self)
diff --git a/drivers/crypto/intel/qat/qat_c62x/adf_drv.c b/drivers/crypto/intel/qat/qat_c62x/adf_drv.c
index 8a7bdec358d6..23ccb72b6ea2 100644
--- a/drivers/crypto/intel/qat/qat_c62x/adf_drv.c
+++ b/drivers/crypto/intel/qat/qat_c62x/adf_drv.c
@@ -19,24 +19,6 @@
#include <adf_dbgfs.h>
#include "adf_c62x_hw_data.h"
-static const struct pci_device_id adf_pci_tbl[] = {
- { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_C62X), },
- { }
-};
-MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
-
-static int adf_probe(struct pci_dev *dev, const struct pci_device_id *ent);
-static void adf_remove(struct pci_dev *dev);
-
-static struct pci_driver adf_driver = {
- .id_table = adf_pci_tbl,
- .name = ADF_C62X_DEVICE_NAME,
- .probe = adf_probe,
- .remove = adf_remove,
- .sriov_configure = adf_sriov_configure,
- .err_handler = &adf_err_handler,
-};
-
static void adf_cleanup_pci_dev(struct adf_accel_dev *accel_dev)
{
pci_release_regions(accel_dev->accel_pci_dev.pci_dev);
@@ -227,6 +209,29 @@ static void adf_remove(struct pci_dev *pdev)
kfree(accel_dev);
}
+static void adf_shutdown(struct pci_dev *pdev)
+{
+ struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
+
+ adf_dev_down(accel_dev);
+}
+
+static const struct pci_device_id adf_pci_tbl[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_C62X) },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
+
+static struct pci_driver adf_driver = {
+ .id_table = adf_pci_tbl,
+ .name = ADF_C62X_DEVICE_NAME,
+ .probe = adf_probe,
+ .remove = adf_remove,
+ .shutdown = adf_shutdown,
+ .sriov_configure = adf_sriov_configure,
+ .err_handler = &adf_err_handler,
+};
+
static int __init adfdrv_init(void)
{
request_module("intel_qat");
diff --git a/drivers/crypto/intel/qat/qat_c62xvf/Makefile b/drivers/crypto/intel/qat/qat_c62xvf/Makefile
index 60e499b041ec..ed7f3f722d99 100644
--- a/drivers/crypto/intel/qat/qat_c62xvf/Makefile
+++ b/drivers/crypto/intel/qat/qat_c62xvf/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_C62XVF) += qat_c62xvf.o
qat_c62xvf-y := adf_drv.o adf_c62xvf_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c
index 4aaaaf921734..7f00035d3661 100644
--- a/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c
@@ -3,7 +3,6 @@
#include <adf_accel_devices.h>
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
-#include <adf_gen2_dc.h>
#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
@@ -13,7 +12,6 @@
static struct adf_hw_device_class c62xiov_class = {
.name = ADF_C62XVF_DEVICE_NAME,
.type = DEV_C62XVF,
- .instances = 0
};
static u32 get_accel_mask(struct adf_hw_device_data *self)
diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile
index af5df29fd2e3..66bb295ace28 100644
--- a/drivers/crypto/intel/qat/qat_common/Makefile
+++ b/drivers/crypto/intel/qat/qat_common/Makefile
@@ -8,19 +8,19 @@ intel_qat-y := adf_accel_engine.o \
adf_cfg_services.o \
adf_clock.o \
adf_ctl_drv.o \
+ adf_dc.o \
adf_dev_mgr.o \
adf_gen2_config.o \
- adf_gen2_dc.o \
adf_gen2_hw_csr_data.o \
adf_gen2_hw_data.o \
adf_gen4_config.o \
- adf_gen4_dc.o \
adf_gen4_hw_csr_data.o \
adf_gen4_hw_data.o \
adf_gen4_pm.o \
adf_gen4_ras.o \
- adf_gen4_timer.o \
adf_gen4_vf_mig.o \
+ adf_gen6_ras.o \
+ adf_gen6_shared.o \
adf_hw_arbiter.o \
adf_init.o \
adf_isr.o \
@@ -30,6 +30,7 @@ intel_qat-y := adf_accel_engine.o \
adf_sysfs.o \
adf_sysfs_ras_counters.o \
adf_sysfs_rl.o \
+ adf_timer.o \
adf_transport.o \
qat_algs.o \
qat_algs_send.o \
diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
index dc21551153cb..2ee526063213 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
@@ -12,6 +12,7 @@
#include <linux/qat/qat_mig_dev.h>
#include <linux/wordpart.h>
#include "adf_cfg_common.h"
+#include "adf_dc.h"
#include "adf_rl.h"
#include "adf_telemetry.h"
#include "adf_pfvf_msg.h"
@@ -25,14 +26,18 @@
#define ADF_C3XXXVF_DEVICE_NAME "c3xxxvf"
#define ADF_4XXX_DEVICE_NAME "4xxx"
#define ADF_420XX_DEVICE_NAME "420xx"
-#define ADF_4XXX_PCI_DEVICE_ID 0x4940
-#define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941
-#define ADF_401XX_PCI_DEVICE_ID 0x4942
-#define ADF_401XXIOV_PCI_DEVICE_ID 0x4943
-#define ADF_402XX_PCI_DEVICE_ID 0x4944
-#define ADF_402XXIOV_PCI_DEVICE_ID 0x4945
-#define ADF_420XX_PCI_DEVICE_ID 0x4946
-#define ADF_420XXIOV_PCI_DEVICE_ID 0x4947
+#define ADF_6XXX_DEVICE_NAME "6xxx"
+#define PCI_DEVICE_ID_INTEL_QAT_4XXX 0x4940
+#define PCI_DEVICE_ID_INTEL_QAT_4XXXIOV 0x4941
+#define PCI_DEVICE_ID_INTEL_QAT_401XX 0x4942
+#define PCI_DEVICE_ID_INTEL_QAT_401XXIOV 0x4943
+#define PCI_DEVICE_ID_INTEL_QAT_402XX 0x4944
+#define PCI_DEVICE_ID_INTEL_QAT_402XXIOV 0x4945
+#define PCI_DEVICE_ID_INTEL_QAT_420XX 0x4946
+#define PCI_DEVICE_ID_INTEL_QAT_420XXIOV 0x4947
+#define PCI_DEVICE_ID_INTEL_QAT_6XXX 0x4948
+#define PCI_DEVICE_ID_INTEL_QAT_6XXX_IOV 0x4949
+
#define ADF_DEVICE_FUSECTL_OFFSET 0x40
#define ADF_DEVICE_LEGFUSE_OFFSET 0x4C
#define ADF_DEVICE_FUSECTL_MASK 0x80000000
@@ -267,7 +272,8 @@ struct adf_pfvf_ops {
};
struct adf_dc_ops {
- void (*build_deflate_ctx)(void *ctx);
+ int (*build_comp_block)(void *ctx, enum adf_dc_algo algo);
+ int (*build_decomp_block)(void *ctx, enum adf_dc_algo algo);
};
struct qat_migdev_ops {
diff --git a/drivers/crypto/intel/qat/qat_common/adf_admin.c b/drivers/crypto/intel/qat/qat_common/adf_admin.c
index acad526eb741..573388c37100 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_admin.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_admin.c
@@ -449,6 +449,7 @@ int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay)
return adf_send_admin(accel_dev, &req, &resp, ae_mask);
}
+EXPORT_SYMBOL_GPL(adf_init_admin_pm);
int adf_get_pm_info(struct adf_accel_dev *accel_dev, dma_addr_t p_state_addr,
size_t buff_size)
diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h
index 89df3888d7ea..15fdf9854b81 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h
@@ -48,6 +48,7 @@ enum adf_device_type {
DEV_C3XXXVF,
DEV_4XXX,
DEV_420XX,
+ DEV_6XXX,
};
struct adf_dev_status_info {
diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c
index 30abcd9e1283..c39871291da7 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c
@@ -116,7 +116,7 @@ int adf_parse_service_string(struct adf_accel_dev *accel_dev, const char *in,
return adf_service_mask_to_string(mask, out, out_len);
}
-static int adf_get_service_mask(struct adf_accel_dev *accel_dev, unsigned long *mask)
+int adf_get_service_mask(struct adf_accel_dev *accel_dev, unsigned long *mask)
{
char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { };
size_t len;
@@ -138,6 +138,7 @@ static int adf_get_service_mask(struct adf_accel_dev *accel_dev, unsigned long *
return ret;
}
+EXPORT_SYMBOL_GPL(adf_get_service_mask);
int adf_get_service_enabled(struct adf_accel_dev *accel_dev)
{
diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h
index f6bafc15cbc6..3742c450878f 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h
@@ -32,5 +32,6 @@ enum {
int adf_parse_service_string(struct adf_accel_dev *accel_dev, const char *in,
size_t in_len, char *out, size_t out_len);
int adf_get_service_enabled(struct adf_accel_dev *accel_dev);
+int adf_get_service_mask(struct adf_accel_dev *accel_dev, unsigned long *mask);
#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_dc.c b/drivers/crypto/intel/qat/qat_common/adf_dc.c
index 47261b1c1da6..3e8fb4e3ed97 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen2_dc.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_dc.c
@@ -1,22 +1,21 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2022 Intel Corporation */
#include "adf_accel_devices.h"
-#include "adf_gen2_dc.h"
+#include "adf_dc.h"
#include "icp_qat_fw_comp.h"
-static void qat_comp_build_deflate_ctx(void *ctx)
+int qat_comp_build_ctx(struct adf_accel_dev *accel_dev, void *ctx, enum adf_dc_algo algo)
{
- struct icp_qat_fw_comp_req *req_tmpl = (struct icp_qat_fw_comp_req *)ctx;
- struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr;
- struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars;
- struct icp_qat_fw_comp_req_params *req_pars = &req_tmpl->comp_pars;
+ struct icp_qat_fw_comp_req *req_tmpl = ctx;
struct icp_qat_fw_comp_cd_hdr *comp_cd_ctrl = &req_tmpl->comp_cd_ctrl;
+ struct icp_qat_fw_comp_req_params *req_pars = &req_tmpl->comp_pars;
+ struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr;
+ int ret;
memset(req_tmpl, 0, sizeof(*req_tmpl));
header->hdr_flags =
ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET);
header->service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_COMP;
- header->service_cmd_id = ICP_QAT_FW_COMP_CMD_STATIC;
header->comn_req_flags =
ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_CD_FLD_TYPE_16BYTE_DATA,
QAT_COMN_PTR_TYPE_SGL);
@@ -26,12 +25,14 @@ static void qat_comp_build_deflate_ctx(void *ctx)
ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST,
ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST,
ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF);
- cd_pars->u.sl.comp_slice_cfg_word[0] =
- ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(ICP_QAT_HW_COMPRESSION_DIR_COMPRESS,
- ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED,
- ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE,
- ICP_QAT_HW_COMPRESSION_DEPTH_1,
- ICP_QAT_HW_COMPRESSION_FILE_TYPE_0);
+
+ /* Build HW config block for compression */
+ ret = GET_DC_OPS(accel_dev)->build_comp_block(ctx, algo);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev), "Failed to build compression block\n");
+ return ret;
+ }
+
req_pars->crc.legacy.initial_adler = COMP_CPR_INITIAL_ADLER;
req_pars->crc.legacy.initial_crc32 = COMP_CPR_INITIAL_CRC;
req_pars->req_par_flags =
@@ -45,26 +46,19 @@ static void qat_comp_build_deflate_ctx(void *ctx)
ICP_QAT_FW_COMP_NO_XXHASH_ACC,
ICP_QAT_FW_COMP_CNV_ERROR_NONE,
ICP_QAT_FW_COMP_NO_APPEND_CRC,
- ICP_QAT_FW_COMP_NO_DROP_DATA);
+ ICP_QAT_FW_COMP_NO_DROP_DATA,
+ ICP_QAT_FW_COMP_NO_PARTIAL_DECOMPRESS);
ICP_QAT_FW_COMN_NEXT_ID_SET(comp_cd_ctrl, ICP_QAT_FW_SLICE_DRAM_WR);
ICP_QAT_FW_COMN_CURR_ID_SET(comp_cd_ctrl, ICP_QAT_FW_SLICE_COMP);
/* Fill second half of the template for decompression */
memcpy(req_tmpl + 1, req_tmpl, sizeof(*req_tmpl));
req_tmpl++;
- header = &req_tmpl->comn_hdr;
- header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS;
- cd_pars = &req_tmpl->cd_pars;
- cd_pars->u.sl.comp_slice_cfg_word[0] =
- ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(ICP_QAT_HW_COMPRESSION_DIR_DECOMPRESS,
- ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED,
- ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE,
- ICP_QAT_HW_COMPRESSION_DEPTH_1,
- ICP_QAT_HW_COMPRESSION_FILE_TYPE_0);
-}
-void adf_gen2_init_dc_ops(struct adf_dc_ops *dc_ops)
-{
- dc_ops->build_deflate_ctx = qat_comp_build_deflate_ctx;
+ /* Build HW config block for decompression */
+ ret = GET_DC_OPS(accel_dev)->build_decomp_block(req_tmpl, algo);
+ if (ret)
+ dev_err(&GET_DEV(accel_dev), "Failed to build decompression block\n");
+
+ return ret;
}
-EXPORT_SYMBOL_GPL(adf_gen2_init_dc_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_dc.h b/drivers/crypto/intel/qat/qat_common/adf_dc.h
new file mode 100644
index 000000000000..6cb5e09054a6
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_dc.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef ADF_DC_H
+#define ADF_DC_H
+
+struct adf_accel_dev;
+
+enum adf_dc_algo {
+ QAT_DEFLATE,
+ QAT_LZ4,
+ QAT_LZ4S,
+ QAT_ZSTD,
+};
+
+int qat_comp_build_ctx(struct adf_accel_dev *accel_dev, void *ctx, enum adf_dc_algo algo);
+
+#endif /* ADF_DC_H */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_fw_config.h b/drivers/crypto/intel/qat/qat_common/adf_fw_config.h
index 4f86696800c9..78957fa900b7 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_fw_config.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_fw_config.h
@@ -8,6 +8,7 @@ enum adf_fw_objs {
ADF_FW_ASYM_OBJ,
ADF_FW_DC_OBJ,
ADF_FW_ADMIN_OBJ,
+ ADF_FW_CY_OBJ,
};
struct adf_fw_config {
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_dc.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_dc.h
deleted file mode 100644
index 6eae023354d7..000000000000
--- a/drivers/crypto/intel/qat/qat_common/adf_gen2_dc.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright(c) 2022 Intel Corporation */
-#ifndef ADF_GEN2_DC_H
-#define ADF_GEN2_DC_H
-
-#include "adf_accel_devices.h"
-
-void adf_gen2_init_dc_ops(struct adf_dc_ops *dc_ops);
-
-#endif /* ADF_GEN2_DC_H */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c
index 2b263442c856..6a505e9a5cf9 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
/* Copyright(c) 2020 Intel Corporation */
#include "adf_common_drv.h"
+#include "adf_dc.h"
#include "adf_gen2_hw_data.h"
+#include "icp_qat_fw_comp.h"
#include "icp_qat_hw.h"
#include <linux/pci.h>
@@ -169,3 +171,58 @@ void adf_gen2_set_ssm_wdtimer(struct adf_accel_dev *accel_dev)
}
}
EXPORT_SYMBOL_GPL(adf_gen2_set_ssm_wdtimer);
+
+static int adf_gen2_build_comp_block(void *ctx, enum adf_dc_algo algo)
+{
+ struct icp_qat_fw_comp_req *req_tmpl = ctx;
+ struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars;
+ struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr;
+
+ switch (algo) {
+ case QAT_DEFLATE:
+ header->service_cmd_id = ICP_QAT_FW_COMP_CMD_STATIC;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ cd_pars->u.sl.comp_slice_cfg_word[0] =
+ ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(ICP_QAT_HW_COMPRESSION_DIR_COMPRESS,
+ ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED,
+ ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE,
+ ICP_QAT_HW_COMPRESSION_DEPTH_1,
+ ICP_QAT_HW_COMPRESSION_FILE_TYPE_0);
+
+ return 0;
+}
+
+static int adf_gen2_build_decomp_block(void *ctx, enum adf_dc_algo algo)
+{
+ struct icp_qat_fw_comp_req *req_tmpl = ctx;
+ struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars;
+ struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr;
+
+ switch (algo) {
+ case QAT_DEFLATE:
+ header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ cd_pars->u.sl.comp_slice_cfg_word[0] =
+ ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(ICP_QAT_HW_COMPRESSION_DIR_DECOMPRESS,
+ ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED,
+ ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE,
+ ICP_QAT_HW_COMPRESSION_DEPTH_1,
+ ICP_QAT_HW_COMPRESSION_FILE_TYPE_0);
+
+ return 0;
+}
+
+void adf_gen2_init_dc_ops(struct adf_dc_ops *dc_ops)
+{
+ dc_ops->build_comp_block = adf_gen2_build_comp_block;
+ dc_ops->build_decomp_block = adf_gen2_build_decomp_block;
+}
+EXPORT_SYMBOL_GPL(adf_gen2_init_dc_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h
index 708e9186127b..59bad368a921 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h
@@ -88,5 +88,6 @@ void adf_gen2_get_arb_info(struct arb_info *arb_info);
void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev);
u32 adf_gen2_get_accel_cap(struct adf_accel_dev *accel_dev);
void adf_gen2_set_ssm_wdtimer(struct adf_accel_dev *accel_dev);
+void adf_gen2_init_dc_ops(struct adf_dc_ops *dc_ops);
#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_pfvf.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_pfvf.h
index a716545a764c..34a63cf40db2 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen2_pfvf.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_pfvf.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include "adf_accel_devices.h"
+#include "adf_common_drv.h"
#define ADF_GEN2_ERRSOU3 (0x3A000 + 0x0C)
#define ADF_GEN2_ERRSOU5 (0x3A000 + 0xD8)
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_config.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_config.c
index f97e7a880f3a..afcdfdd0a37a 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_config.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_config.c
@@ -11,7 +11,7 @@
#include "qat_compression.h"
#include "qat_crypto.h"
-static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
+int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
{
char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES];
int banks = GET_MAX_BANKS(accel_dev);
@@ -117,7 +117,7 @@ err:
return ret;
}
-static int adf_comp_dev_config(struct adf_accel_dev *accel_dev)
+int adf_comp_dev_config(struct adf_accel_dev *accel_dev)
{
char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES];
int banks = GET_MAX_BANKS(accel_dev);
@@ -187,7 +187,7 @@ err:
return ret;
}
-static int adf_no_dev_config(struct adf_accel_dev *accel_dev)
+int adf_no_dev_config(struct adf_accel_dev *accel_dev)
{
unsigned long val;
int ret;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_config.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_config.h
index bb87655f69a8..38a674c27e40 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_config.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_config.h
@@ -7,5 +7,8 @@
int adf_gen4_dev_config(struct adf_accel_dev *accel_dev);
int adf_gen4_cfg_dev_init(struct adf_accel_dev *accel_dev);
+int adf_crypto_dev_config(struct adf_accel_dev *accel_dev);
+int adf_comp_dev_config(struct adf_accel_dev *accel_dev);
+int adf_no_dev_config(struct adf_accel_dev *accel_dev);
#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_dc.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_dc.c
deleted file mode 100644
index 5859238e37de..000000000000
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_dc.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2022 Intel Corporation */
-#include "adf_accel_devices.h"
-#include "icp_qat_fw_comp.h"
-#include "icp_qat_hw_20_comp.h"
-#include "adf_gen4_dc.h"
-
-static void qat_comp_build_deflate(void *ctx)
-{
- struct icp_qat_fw_comp_req *req_tmpl =
- (struct icp_qat_fw_comp_req *)ctx;
- struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr;
- struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars;
- struct icp_qat_fw_comp_req_params *req_pars = &req_tmpl->comp_pars;
- struct icp_qat_hw_comp_20_config_csr_upper hw_comp_upper_csr = {0};
- struct icp_qat_hw_comp_20_config_csr_lower hw_comp_lower_csr = {0};
- struct icp_qat_hw_decomp_20_config_csr_lower hw_decomp_lower_csr = {0};
- u32 upper_val;
- u32 lower_val;
-
- memset(req_tmpl, 0, sizeof(*req_tmpl));
- header->hdr_flags =
- ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET);
- header->service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_COMP;
- header->service_cmd_id = ICP_QAT_FW_COMP_CMD_STATIC;
- header->comn_req_flags =
- ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_CD_FLD_TYPE_16BYTE_DATA,
- QAT_COMN_PTR_TYPE_SGL);
- header->serv_specif_flags =
- ICP_QAT_FW_COMP_FLAGS_BUILD(ICP_QAT_FW_COMP_STATELESS_SESSION,
- ICP_QAT_FW_COMP_AUTO_SELECT_BEST,
- ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST,
- ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST,
- ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF);
- hw_comp_lower_csr.skip_ctrl = ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL;
- hw_comp_lower_csr.algo = ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77;
- hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED;
- hw_comp_lower_csr.sd = ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1;
- hw_comp_lower_csr.hash_update = ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_DONT_ALLOW;
- hw_comp_lower_csr.edmm = ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_ENABLED;
- hw_comp_upper_csr.nice = ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_DEFAULT_VAL;
- hw_comp_upper_csr.lazy = ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_DEFAULT_VAL;
-
- upper_val = ICP_QAT_FW_COMP_20_BUILD_CONFIG_UPPER(hw_comp_upper_csr);
- lower_val = ICP_QAT_FW_COMP_20_BUILD_CONFIG_LOWER(hw_comp_lower_csr);
-
- cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val;
- cd_pars->u.sl.comp_slice_cfg_word[1] = upper_val;
-
- req_pars->crc.legacy.initial_adler = COMP_CPR_INITIAL_ADLER;
- req_pars->crc.legacy.initial_crc32 = COMP_CPR_INITIAL_CRC;
- req_pars->req_par_flags =
- ICP_QAT_FW_COMP_REQ_PARAM_FLAGS_BUILD(ICP_QAT_FW_COMP_SOP,
- ICP_QAT_FW_COMP_EOP,
- ICP_QAT_FW_COMP_BFINAL,
- ICP_QAT_FW_COMP_CNV,
- ICP_QAT_FW_COMP_CNV_RECOVERY,
- ICP_QAT_FW_COMP_NO_CNV_DFX,
- ICP_QAT_FW_COMP_CRC_MODE_LEGACY,
- ICP_QAT_FW_COMP_NO_XXHASH_ACC,
- ICP_QAT_FW_COMP_CNV_ERROR_NONE,
- ICP_QAT_FW_COMP_NO_APPEND_CRC,
- ICP_QAT_FW_COMP_NO_DROP_DATA);
-
- /* Fill second half of the template for decompression */
- memcpy(req_tmpl + 1, req_tmpl, sizeof(*req_tmpl));
- req_tmpl++;
- header = &req_tmpl->comn_hdr;
- header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS;
- cd_pars = &req_tmpl->cd_pars;
-
- hw_decomp_lower_csr.algo = ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE;
- lower_val = ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_LOWER(hw_decomp_lower_csr);
-
- cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val;
- cd_pars->u.sl.comp_slice_cfg_word[1] = 0;
-}
-
-void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops)
-{
- dc_ops->build_deflate_ctx = qat_comp_build_deflate;
-}
-EXPORT_SYMBOL_GPL(adf_gen4_init_dc_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_dc.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_dc.h
deleted file mode 100644
index 0b1a6774412e..000000000000
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_dc.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright(c) 2022 Intel Corporation */
-#ifndef ADF_GEN4_DC_H
-#define ADF_GEN4_DC_H
-
-#include "adf_accel_devices.h"
-
-void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops);
-
-#endif /* ADF_GEN4_DC_H */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
index 099949a2421c..0406cb09c5bb 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
@@ -9,6 +9,8 @@
#include "adf_fw_config.h"
#include "adf_gen4_hw_data.h"
#include "adf_gen4_pm.h"
+#include "icp_qat_fw_comp.h"
+#include "icp_qat_hw_20_comp.h"
u32 adf_gen4_get_accel_mask(struct adf_hw_device_data *self)
{
@@ -663,3 +665,71 @@ int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev, u32 bank_number
return ret;
}
EXPORT_SYMBOL_GPL(adf_gen4_bank_state_restore);
+
+static int adf_gen4_build_comp_block(void *ctx, enum adf_dc_algo algo)
+{
+ struct icp_qat_fw_comp_req *req_tmpl = ctx;
+ struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars;
+ struct icp_qat_hw_comp_20_config_csr_upper hw_comp_upper_csr = { };
+ struct icp_qat_hw_comp_20_config_csr_lower hw_comp_lower_csr = { };
+ struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr;
+ u32 upper_val;
+ u32 lower_val;
+
+ switch (algo) {
+ case QAT_DEFLATE:
+ header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DYNAMIC;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ hw_comp_lower_csr.skip_ctrl = ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL;
+ hw_comp_lower_csr.algo = ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77;
+ hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED;
+ hw_comp_lower_csr.sd = ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1;
+ hw_comp_lower_csr.hash_update = ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_DONT_ALLOW;
+ hw_comp_lower_csr.edmm = ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_ENABLED;
+ hw_comp_upper_csr.nice = ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_DEFAULT_VAL;
+ hw_comp_upper_csr.lazy = ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_DEFAULT_VAL;
+
+ upper_val = ICP_QAT_FW_COMP_20_BUILD_CONFIG_UPPER(hw_comp_upper_csr);
+ lower_val = ICP_QAT_FW_COMP_20_BUILD_CONFIG_LOWER(hw_comp_lower_csr);
+
+ cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val;
+ cd_pars->u.sl.comp_slice_cfg_word[1] = upper_val;
+
+ return 0;
+}
+
+static int adf_gen4_build_decomp_block(void *ctx, enum adf_dc_algo algo)
+{
+ struct icp_qat_fw_comp_req *req_tmpl = ctx;
+ struct icp_qat_hw_decomp_20_config_csr_lower hw_decomp_lower_csr = { };
+ struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars;
+ struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr;
+ u32 lower_val;
+
+ switch (algo) {
+ case QAT_DEFLATE:
+ header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ hw_decomp_lower_csr.algo = ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE;
+ lower_val = ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_LOWER(hw_decomp_lower_csr);
+
+ cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val;
+ cd_pars->u.sl.comp_slice_cfg_word[1] = 0;
+
+ return 0;
+}
+
+void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops)
+{
+ dc_ops->build_comp_block = adf_gen4_build_comp_block;
+ dc_ops->build_decomp_block = adf_gen4_build_decomp_block;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_init_dc_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
index 51fc2eaa263e..e4f4d5fa616d 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
@@ -7,6 +7,7 @@
#include "adf_accel_devices.h"
#include "adf_cfg_common.h"
+#include "adf_dc.h"
/* PCIe configuration space */
#define ADF_GEN4_BAR_MASK (BIT(0) | BIT(2) | BIT(4))
@@ -180,5 +181,6 @@ int adf_gen4_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number,
int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev,
u32 bank_number, struct bank_state *state);
bool adf_gen4_services_supported(unsigned long service_mask);
+void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops);
#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.h
index 17d1b774d4a8..2c8708117f70 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.h
@@ -4,6 +4,7 @@
#define ADF_GEN4_PFVF_H
#include "adf_accel_devices.h"
+#include "adf_common_drv.h"
#ifdef CONFIG_PCI_IOV
void adf_gen4_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen6_pm.h b/drivers/crypto/intel/qat/qat_common/adf_gen6_pm.h
new file mode 100644
index 000000000000..9a5b995f7ada
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen6_pm.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef ADF_GEN6_PM_H
+#define ADF_GEN6_PM_H
+
+#include <linux/bits.h>
+#include <linux/time.h>
+
+struct adf_accel_dev;
+
+/* Power management */
+#define ADF_GEN6_PM_POLL_DELAY_US 20
+#define ADF_GEN6_PM_POLL_TIMEOUT_US USEC_PER_SEC
+#define ADF_GEN6_PM_STATUS 0x50A00C
+#define ADF_GEN6_PM_INTERRUPT 0x50A028
+
+/* Power management source in ERRSOU2 and ERRMSK2 */
+#define ADF_GEN6_PM_SOU BIT(18)
+
+/* cpm_pm_interrupt bitfields */
+#define ADF_GEN6_PM_DRV_ACTIVE BIT(20)
+
+#define ADF_GEN6_PM_DEFAULT_IDLE_FILTER 0x6
+
+/* cpm_pm_status bitfields */
+#define ADF_GEN6_PM_INIT_STATE BIT(21)
+
+#endif /* ADF_GEN6_PM_H */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen6_ras.c b/drivers/crypto/intel/qat/qat_common/adf_gen6_ras.c
new file mode 100644
index 000000000000..967253082a98
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen6_ras.c
@@ -0,0 +1,818 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2025 Intel Corporation */
+#include <linux/bitfield.h>
+#include <linux/types.h>
+
+#include "adf_common_drv.h"
+#include "adf_gen6_ras.h"
+#include "adf_sysfs_ras_counters.h"
+
+static void enable_errsou_reporting(void __iomem *csr)
+{
+ /* Enable correctable error reporting in ERRSOU0 */
+ ADF_CSR_WR(csr, ADF_GEN6_ERRMSK0, 0);
+
+ /* Enable uncorrectable error reporting in ERRSOU1 */
+ ADF_CSR_WR(csr, ADF_GEN6_ERRMSK1, 0);
+
+ /*
+ * Enable uncorrectable error reporting in ERRSOU2
+ * but disable PM interrupt by default
+ */
+ ADF_CSR_WR(csr, ADF_GEN6_ERRMSK2, ADF_GEN6_ERRSOU2_PM_INT_BIT);
+
+ /* Enable uncorrectable error reporting in ERRSOU3 */
+ ADF_CSR_WR(csr, ADF_GEN6_ERRMSK3, 0);
+}
+
+static void enable_ae_error_reporting(struct adf_accel_dev *accel_dev, void __iomem *csr)
+{
+ u32 ae_mask = GET_HW_DATA(accel_dev)->ae_mask;
+
+ /* Enable acceleration engine correctable error reporting */
+ ADF_CSR_WR(csr, ADF_GEN6_HIAECORERRLOGENABLE_CPP0, ae_mask);
+
+ /* Enable acceleration engine uncorrectable error reporting */
+ ADF_CSR_WR(csr, ADF_GEN6_HIAEUNCERRLOGENABLE_CPP0, ae_mask);
+}
+
+static void enable_cpp_error_reporting(struct adf_accel_dev *accel_dev, void __iomem *csr)
+{
+ /* Enable HI CPP agents command parity error reporting */
+ ADF_CSR_WR(csr, ADF_GEN6_HICPPAGENTCMDPARERRLOGENABLE,
+ ADF_6XXX_HICPPAGENTCMDPARERRLOG_MASK);
+
+ ADF_CSR_WR(csr, ADF_GEN6_CPP_CFC_ERR_CTRL, ADF_GEN6_CPP_CFC_ERR_CTRL_MASK);
+}
+
+static void enable_ti_ri_error_reporting(void __iomem *csr)
+{
+ u32 reg, mask;
+
+ /* Enable RI memory error reporting */
+ mask = ADF_GEN6_RIMEM_PARERR_FATAL_MASK | ADF_GEN6_RIMEM_PARERR_CERR_MASK;
+ ADF_CSR_WR(csr, ADF_GEN6_RI_MEM_PAR_ERR_EN0, mask);
+
+ /* Enable IOSF primary command parity error reporting */
+ ADF_CSR_WR(csr, ADF_GEN6_RIMISCCTL, ADF_GEN6_RIMISCSTS_BIT);
+
+ /* Enable TI internal memory parity error reporting */
+ reg = ADF_CSR_RD(csr, ADF_GEN6_TI_CI_PAR_ERR_MASK);
+ reg &= ~ADF_GEN6_TI_CI_PAR_STS_MASK;
+ ADF_CSR_WR(csr, ADF_GEN6_TI_CI_PAR_ERR_MASK, reg);
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_TI_PULL0FUB_PAR_ERR_MASK);
+ reg &= ~ADF_GEN6_TI_PULL0FUB_PAR_STS_MASK;
+ ADF_CSR_WR(csr, ADF_GEN6_TI_PULL0FUB_PAR_ERR_MASK, reg);
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_TI_PUSHFUB_PAR_ERR_MASK);
+ reg &= ~ADF_GEN6_TI_PUSHFUB_PAR_STS_MASK;
+ ADF_CSR_WR(csr, ADF_GEN6_TI_PUSHFUB_PAR_ERR_MASK, reg);
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_TI_CD_PAR_ERR_MASK);
+ reg &= ~ADF_GEN6_TI_CD_PAR_STS_MASK;
+ ADF_CSR_WR(csr, ADF_GEN6_TI_CD_PAR_ERR_MASK, reg);
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_TI_TRNSB_PAR_ERR_MASK);
+ reg &= ~ADF_GEN6_TI_TRNSB_PAR_STS_MASK;
+ ADF_CSR_WR(csr, ADF_GEN6_TI_TRNSB_PAR_ERR_MASK, reg);
+
+ /* Enable error handling in RI, TI CPP interface control registers */
+ ADF_CSR_WR(csr, ADF_GEN6_RICPPINTCTL, ADF_GEN6_RICPPINTCTL_MASK);
+ ADF_CSR_WR(csr, ADF_GEN6_TICPPINTCTL, ADF_GEN6_TICPPINTCTL_MASK);
+
+ /*
+ * Enable error detection and reporting in TIMISCSTS
+ * with bits 1, 2 and 30 value preserved
+ */
+ reg = ADF_CSR_RD(csr, ADF_GEN6_TIMISCCTL);
+ reg &= ADF_GEN6_TIMSCCTL_RELAY_MASK;
+ reg |= ADF_GEN6_TIMISCCTL_BIT;
+ ADF_CSR_WR(csr, ADF_GEN6_TIMISCCTL, reg);
+}
+
+static void enable_ssm_error_reporting(struct adf_accel_dev *accel_dev,
+ void __iomem *csr)
+{
+ /* Enable SSM interrupts */
+ ADF_CSR_WR(csr, ADF_GEN6_INTMASKSSM, 0);
+}
+
+static void adf_gen6_enable_ras(struct adf_accel_dev *accel_dev)
+{
+ void __iomem *csr = adf_get_pmisc_base(accel_dev);
+
+ enable_errsou_reporting(csr);
+ enable_ae_error_reporting(accel_dev, csr);
+ enable_cpp_error_reporting(accel_dev, csr);
+ enable_ti_ri_error_reporting(csr);
+ enable_ssm_error_reporting(accel_dev, csr);
+}
+
+static void disable_errsou_reporting(void __iomem *csr)
+{
+ u32 val;
+
+ /* Disable correctable error reporting in ERRSOU0 */
+ ADF_CSR_WR(csr, ADF_GEN6_ERRMSK0, ADF_GEN6_ERRSOU0_MASK);
+
+ /* Disable uncorrectable error reporting in ERRSOU1 */
+ ADF_CSR_WR(csr, ADF_GEN6_ERRMSK1, ADF_GEN6_ERRMSK1_MASK);
+
+ /* Disable uncorrectable error reporting in ERRSOU2 */
+ val = ADF_CSR_RD(csr, ADF_GEN6_ERRMSK2);
+ val |= ADF_GEN6_ERRSOU2_DIS_MASK;
+ ADF_CSR_WR(csr, ADF_GEN6_ERRMSK2, val);
+
+ /* Disable uncorrectable error reporting in ERRSOU3 */
+ ADF_CSR_WR(csr, ADF_GEN6_ERRMSK3, ADF_GEN6_ERRSOU3_DIS_MASK);
+}
+
+static void disable_ae_error_reporting(void __iomem *csr)
+{
+ /* Disable acceleration engine correctable error reporting */
+ ADF_CSR_WR(csr, ADF_GEN6_HIAECORERRLOGENABLE_CPP0, 0);
+
+ /* Disable acceleration engine uncorrectable error reporting */
+ ADF_CSR_WR(csr, ADF_GEN6_HIAEUNCERRLOGENABLE_CPP0, 0);
+}
+
+static void disable_cpp_error_reporting(void __iomem *csr)
+{
+ /* Disable HI CPP agents command parity error reporting */
+ ADF_CSR_WR(csr, ADF_GEN6_HICPPAGENTCMDPARERRLOGENABLE, 0);
+
+ ADF_CSR_WR(csr, ADF_GEN6_CPP_CFC_ERR_CTRL, ADF_GEN6_CPP_CFC_ERR_CTRL_DIS_MASK);
+}
+
+static void disable_ti_ri_error_reporting(void __iomem *csr)
+{
+ u32 reg;
+
+ /* Disable RI memory error reporting */
+ ADF_CSR_WR(csr, ADF_GEN6_RI_MEM_PAR_ERR_EN0, 0);
+
+ /* Disable IOSF primary command parity error reporting */
+ reg = ADF_CSR_RD(csr, ADF_GEN6_RIMISCCTL);
+ reg &= ~ADF_GEN6_RIMISCSTS_BIT;
+ ADF_CSR_WR(csr, ADF_GEN6_RIMISCCTL, reg);
+
+ /* Disable TI internal memory parity error reporting */
+ ADF_CSR_WR(csr, ADF_GEN6_TI_CI_PAR_ERR_MASK, ADF_GEN6_TI_CI_PAR_STS_MASK);
+ ADF_CSR_WR(csr, ADF_GEN6_TI_PULL0FUB_PAR_ERR_MASK, ADF_GEN6_TI_PULL0FUB_PAR_STS_MASK);
+ ADF_CSR_WR(csr, ADF_GEN6_TI_PUSHFUB_PAR_ERR_MASK, ADF_GEN6_TI_PUSHFUB_PAR_STS_MASK);
+ ADF_CSR_WR(csr, ADF_GEN6_TI_CD_PAR_ERR_MASK, ADF_GEN6_TI_CD_PAR_STS_MASK);
+ ADF_CSR_WR(csr, ADF_GEN6_TI_TRNSB_PAR_ERR_MASK, ADF_GEN6_TI_TRNSB_PAR_STS_MASK);
+
+ /* Disable error handling in RI, TI CPP interface control registers */
+ reg = ADF_CSR_RD(csr, ADF_GEN6_RICPPINTCTL);
+ reg &= ~ADF_GEN6_RICPPINTCTL_MASK;
+ ADF_CSR_WR(csr, ADF_GEN6_RICPPINTCTL, reg);
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_TICPPINTCTL);
+ reg &= ~ADF_GEN6_TICPPINTCTL_MASK;
+ ADF_CSR_WR(csr, ADF_GEN6_TICPPINTCTL, reg);
+
+ /*
+ * Disable error detection and reporting in TIMISCSTS
+ * with bits 1, 2 and 30 value preserved
+ */
+ reg = ADF_CSR_RD(csr, ADF_GEN6_TIMISCCTL);
+ reg &= ADF_GEN6_TIMSCCTL_RELAY_MASK;
+ ADF_CSR_WR(csr, ADF_GEN6_TIMISCCTL, reg);
+}
+
+static void disable_ssm_error_reporting(void __iomem *csr)
+{
+ /* Disable SSM interrupts */
+ ADF_CSR_WR(csr, ADF_GEN6_INTMASKSSM, ADF_GEN6_INTMASKSSM_MASK);
+}
+
+static void adf_gen6_disable_ras(struct adf_accel_dev *accel_dev)
+{
+ void __iomem *csr = adf_get_pmisc_base(accel_dev);
+
+ disable_errsou_reporting(csr);
+ disable_ae_error_reporting(csr);
+ disable_cpp_error_reporting(csr);
+ disable_ti_ri_error_reporting(csr);
+ disable_ssm_error_reporting(csr);
+}
+
+static void adf_gen6_process_errsou0(struct adf_accel_dev *accel_dev, void __iomem *csr)
+{
+ u32 ae, errsou;
+
+ ae = ADF_CSR_RD(csr, ADF_GEN6_HIAECORERRLOG_CPP0);
+ ae &= GET_HW_DATA(accel_dev)->ae_mask;
+
+ dev_warn(&GET_DEV(accel_dev), "Correctable error detected: %#x\n", ae);
+
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_CORR);
+
+ /* Clear interrupt from ERRSOU0 */
+ ADF_CSR_WR(csr, ADF_GEN6_HIAECORERRLOG_CPP0, ae);
+
+ errsou = ADF_CSR_RD(csr, ADF_GEN6_ERRSOU0);
+ if (errsou & ADF_GEN6_ERRSOU0_MASK)
+ dev_warn(&GET_DEV(accel_dev), "errsou0 still set: %#x\n", errsou);
+}
+
+static void adf_handle_cpp_ae_unc(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ u32 ae;
+
+ if (!(errsou & ADF_GEN6_ERRSOU1_CPP0_MEUNC_BIT))
+ return;
+
+ ae = ADF_CSR_RD(csr, ADF_GEN6_HIAEUNCERRLOG_CPP0);
+ ae &= GET_HW_DATA(accel_dev)->ae_mask;
+ if (ae) {
+ dev_err(&GET_DEV(accel_dev), "Uncorrectable error detected: %#x\n", ae);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_UNCORR);
+ ADF_CSR_WR(csr, ADF_GEN6_HIAEUNCERRLOG_CPP0, ae);
+ }
+}
+
+static void adf_handle_cpp_cmd_par_err(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ u32 cmd_par_err;
+
+ if (!(errsou & ADF_GEN6_ERRSOU1_CPP_CMDPARERR_BIT))
+ return;
+
+ cmd_par_err = ADF_CSR_RD(csr, ADF_GEN6_HICPPAGENTCMDPARERRLOG);
+ cmd_par_err &= ADF_6XXX_HICPPAGENTCMDPARERRLOG_MASK;
+ if (cmd_par_err) {
+ dev_err(&GET_DEV(accel_dev), "HI CPP agent command parity error: %#x\n",
+ cmd_par_err);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ ADF_CSR_WR(csr, ADF_GEN6_HICPPAGENTCMDPARERRLOG, cmd_par_err);
+ }
+}
+
+static void adf_handle_ri_mem_par_err(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ u32 rimem_parerr_sts;
+
+ if (!(errsou & ADF_GEN6_ERRSOU1_RIMEM_PARERR_STS_BIT))
+ return;
+
+ rimem_parerr_sts = ADF_CSR_RD(csr, ADF_GEN6_RIMEM_PARERR_STS);
+ rimem_parerr_sts &= ADF_GEN6_RIMEM_PARERR_CERR_MASK |
+ ADF_GEN6_RIMEM_PARERR_FATAL_MASK;
+ if (rimem_parerr_sts & ADF_GEN6_RIMEM_PARERR_CERR_MASK) {
+ dev_err(&GET_DEV(accel_dev), "RI memory parity correctable error: %#x\n",
+ rimem_parerr_sts);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_CORR);
+ }
+
+ if (rimem_parerr_sts & ADF_GEN6_RIMEM_PARERR_FATAL_MASK) {
+ dev_err(&GET_DEV(accel_dev), "RI memory parity fatal error: %#x\n",
+ rimem_parerr_sts);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ }
+
+ ADF_CSR_WR(csr, ADF_GEN6_RIMEM_PARERR_STS, rimem_parerr_sts);
+}
+
+static void adf_handle_ti_ci_par_sts(struct adf_accel_dev *accel_dev, void __iomem *csr)
+{
+ u32 ti_ci_par_sts;
+
+ ti_ci_par_sts = ADF_CSR_RD(csr, ADF_GEN6_TI_CI_PAR_STS);
+ ti_ci_par_sts &= ADF_GEN6_TI_CI_PAR_STS_MASK;
+ if (ti_ci_par_sts) {
+ dev_err(&GET_DEV(accel_dev), "TI memory parity error: %#x\n", ti_ci_par_sts);
+ ADF_CSR_WR(csr, ADF_GEN6_TI_CI_PAR_STS, ti_ci_par_sts);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_UNCORR);
+ }
+}
+
+static void adf_handle_ti_pullfub_par_sts(struct adf_accel_dev *accel_dev, void __iomem *csr)
+{
+ u32 ti_pullfub_par_sts;
+
+ ti_pullfub_par_sts = ADF_CSR_RD(csr, ADF_GEN6_TI_PULL0FUB_PAR_STS);
+ ti_pullfub_par_sts &= ADF_GEN6_TI_PULL0FUB_PAR_STS_MASK;
+ if (ti_pullfub_par_sts) {
+ dev_err(&GET_DEV(accel_dev), "TI pull parity error: %#x\n", ti_pullfub_par_sts);
+ ADF_CSR_WR(csr, ADF_GEN6_TI_PULL0FUB_PAR_STS, ti_pullfub_par_sts);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_UNCORR);
+ }
+}
+
+static void adf_handle_ti_pushfub_par_sts(struct adf_accel_dev *accel_dev, void __iomem *csr)
+{
+ u32 ti_pushfub_par_sts;
+
+ ti_pushfub_par_sts = ADF_CSR_RD(csr, ADF_GEN6_TI_PUSHFUB_PAR_STS);
+ ti_pushfub_par_sts &= ADF_GEN6_TI_PUSHFUB_PAR_STS_MASK;
+ if (ti_pushfub_par_sts) {
+ dev_err(&GET_DEV(accel_dev), "TI push parity error: %#x\n", ti_pushfub_par_sts);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_UNCORR);
+ ADF_CSR_WR(csr, ADF_GEN6_TI_PUSHFUB_PAR_STS, ti_pushfub_par_sts);
+ }
+}
+
+static void adf_handle_ti_cd_par_sts(struct adf_accel_dev *accel_dev, void __iomem *csr)
+{
+ u32 ti_cd_par_sts;
+
+ ti_cd_par_sts = ADF_CSR_RD(csr, ADF_GEN6_TI_CD_PAR_STS);
+ ti_cd_par_sts &= ADF_GEN6_TI_CD_PAR_STS_MASK;
+ if (ti_cd_par_sts) {
+ dev_err(&GET_DEV(accel_dev), "TI CD parity error: %#x\n", ti_cd_par_sts);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_UNCORR);
+ ADF_CSR_WR(csr, ADF_GEN6_TI_CD_PAR_STS, ti_cd_par_sts);
+ }
+}
+
+static void adf_handle_ti_trnsb_par_sts(struct adf_accel_dev *accel_dev, void __iomem *csr)
+{
+ u32 ti_trnsb_par_sts;
+
+ ti_trnsb_par_sts = ADF_CSR_RD(csr, ADF_GEN6_TI_TRNSB_PAR_STS);
+ ti_trnsb_par_sts &= ADF_GEN6_TI_TRNSB_PAR_STS_MASK;
+ if (ti_trnsb_par_sts) {
+ dev_err(&GET_DEV(accel_dev), "TI TRNSB parity error: %#x\n", ti_trnsb_par_sts);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_UNCORR);
+ ADF_CSR_WR(csr, ADF_GEN6_TI_TRNSB_PAR_STS, ti_trnsb_par_sts);
+ }
+}
+
+static void adf_handle_iosfp_cmd_parerr(struct adf_accel_dev *accel_dev, void __iomem *csr)
+{
+ u32 rimiscsts;
+
+ rimiscsts = ADF_CSR_RD(csr, ADF_GEN6_RIMISCSTS);
+ rimiscsts &= ADF_GEN6_RIMISCSTS_BIT;
+ if (rimiscsts) {
+ dev_err(&GET_DEV(accel_dev), "Command parity error detected on IOSFP: %#x\n",
+ rimiscsts);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ ADF_CSR_WR(csr, ADF_GEN6_RIMISCSTS, rimiscsts);
+ }
+}
+
+static void adf_handle_ti_err(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ if (!(errsou & ADF_GEN6_ERRSOU1_TIMEM_PARERR_STS_BIT))
+ return;
+
+ adf_handle_ti_ci_par_sts(accel_dev, csr);
+ adf_handle_ti_pullfub_par_sts(accel_dev, csr);
+ adf_handle_ti_pushfub_par_sts(accel_dev, csr);
+ adf_handle_ti_cd_par_sts(accel_dev, csr);
+ adf_handle_ti_trnsb_par_sts(accel_dev, csr);
+ adf_handle_iosfp_cmd_parerr(accel_dev, csr);
+}
+
+static void adf_handle_sfi_cmd_parerr(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ if (!(errsou & ADF_GEN6_ERRSOU1_SFICMD_PARERR_BIT))
+ return;
+
+ dev_err(&GET_DEV(accel_dev),
+ "Command parity error detected on streaming fabric interface\n");
+
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+}
+
+static void adf_gen6_process_errsou1(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ adf_handle_cpp_ae_unc(accel_dev, csr, errsou);
+ adf_handle_cpp_cmd_par_err(accel_dev, csr, errsou);
+ adf_handle_ri_mem_par_err(accel_dev, csr, errsou);
+ adf_handle_ti_err(accel_dev, csr, errsou);
+ adf_handle_sfi_cmd_parerr(accel_dev, csr, errsou);
+
+ errsou = ADF_CSR_RD(csr, ADF_GEN6_ERRSOU1);
+ if (errsou & ADF_GEN6_ERRSOU1_MASK)
+ dev_warn(&GET_DEV(accel_dev), "errsou1 still set: %#x\n", errsou);
+}
+
+static void adf_handle_cerrssmsh(struct adf_accel_dev *accel_dev, void __iomem *csr)
+{
+ u32 reg;
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_CERRSSMSH);
+ reg &= ADF_GEN6_CERRSSMSH_ERROR_BIT;
+ if (reg) {
+ dev_warn(&GET_DEV(accel_dev),
+ "Correctable error on ssm shared memory: %#x\n", reg);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_CORR);
+ ADF_CSR_WR(csr, ADF_GEN6_CERRSSMSH, reg);
+ }
+}
+
+static void adf_handle_uerrssmsh(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 iastatssm)
+{
+ u32 reg;
+
+ if (!(iastatssm & ADF_GEN6_IAINTSTATSSM_SH_ERR_BIT))
+ return;
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_UERRSSMSH);
+ reg &= ADF_GEN6_UERRSSMSH_MASK;
+ if (reg) {
+ dev_err(&GET_DEV(accel_dev),
+ "Fatal error on ssm shared memory: %#x\n", reg);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ ADF_CSR_WR(csr, ADF_GEN6_UERRSSMSH, reg);
+ }
+}
+
+static void adf_handle_pperr_err(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 iastatssm)
+{
+ u32 reg;
+
+ if (!(iastatssm & ADF_GEN6_IAINTSTATSSM_PPERR_BIT))
+ return;
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_PPERR);
+ reg &= ADF_GEN6_PPERR_MASK;
+ if (reg) {
+ dev_err(&GET_DEV(accel_dev),
+ "Fatal push or pull data error: %#x\n", reg);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ ADF_CSR_WR(csr, ADF_GEN6_PPERR, reg);
+ }
+}
+
+static void adf_handle_scmpar_err(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 iastatssm)
+{
+ u32 reg;
+
+ if (!(iastatssm & ADF_GEN6_IAINTSTATSSM_SCMPAR_ERR_BIT))
+ return;
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_SSM_FERR_STATUS);
+ reg &= ADF_GEN6_SCM_PAR_ERR_MASK;
+ if (reg) {
+ dev_err(&GET_DEV(accel_dev), "Fatal error on SCM: %#x\n", reg);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ ADF_CSR_WR(csr, ADF_GEN6_SSM_FERR_STATUS, reg);
+ }
+}
+
+static void adf_handle_cpppar_err(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 iastatssm)
+{
+ u32 reg;
+
+ if (!(iastatssm & ADF_GEN6_IAINTSTATSSM_CPPPAR_ERR_BIT))
+ return;
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_SSM_FERR_STATUS);
+ reg &= ADF_GEN6_CPP_PAR_ERR_MASK;
+ if (reg) {
+ dev_err(&GET_DEV(accel_dev), "Fatal error on CPP: %#x\n", reg);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ ADF_CSR_WR(csr, ADF_GEN6_SSM_FERR_STATUS, reg);
+ }
+}
+
+static void adf_handle_rfpar_err(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 iastatssm)
+{
+ u32 reg;
+
+ if (!(iastatssm & ADF_GEN6_IAINTSTATSSM_RFPAR_ERR_BIT))
+ return;
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_SSM_FERR_STATUS);
+ reg &= ADF_GEN6_RF_PAR_ERR_MASK;
+ if (reg) {
+ dev_err(&GET_DEV(accel_dev), "Fatal error on RF Parity: %#x\n", reg);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ ADF_CSR_WR(csr, ADF_GEN6_SSM_FERR_STATUS, reg);
+ }
+}
+
+static void adf_handle_unexp_cpl_err(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 iastatssm)
+{
+ u32 reg;
+
+ if (!(iastatssm & ADF_GEN6_IAINTSTATSSM_UNEXP_CPL_ERR_BIT))
+ return;
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_SSM_FERR_STATUS);
+ reg &= ADF_GEN6_UNEXP_CPL_ERR_MASK;
+ if (reg) {
+ dev_err(&GET_DEV(accel_dev),
+ "Fatal error for AXI unexpected tag/length: %#x\n", reg);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ ADF_CSR_WR(csr, ADF_GEN6_SSM_FERR_STATUS, reg);
+ }
+}
+
+static void adf_handle_iaintstatssm(struct adf_accel_dev *accel_dev, void __iomem *csr)
+{
+ u32 iastatssm = ADF_CSR_RD(csr, ADF_GEN6_IAINTSTATSSM);
+
+ iastatssm &= ADF_GEN6_IAINTSTATSSM_MASK;
+ if (!iastatssm)
+ return;
+
+ adf_handle_uerrssmsh(accel_dev, csr, iastatssm);
+ adf_handle_pperr_err(accel_dev, csr, iastatssm);
+ adf_handle_scmpar_err(accel_dev, csr, iastatssm);
+ adf_handle_cpppar_err(accel_dev, csr, iastatssm);
+ adf_handle_rfpar_err(accel_dev, csr, iastatssm);
+ adf_handle_unexp_cpl_err(accel_dev, csr, iastatssm);
+
+ ADF_CSR_WR(csr, ADF_GEN6_IAINTSTATSSM, iastatssm);
+}
+
+static void adf_handle_ssm(struct adf_accel_dev *accel_dev, void __iomem *csr, u32 errsou)
+{
+ if (!(errsou & ADF_GEN6_ERRSOU2_SSM_ERR_BIT))
+ return;
+
+ adf_handle_cerrssmsh(accel_dev, csr);
+ adf_handle_iaintstatssm(accel_dev, csr);
+}
+
+static void adf_handle_cpp_cfc_err(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ u32 reg;
+
+ if (!(errsou & ADF_GEN6_ERRSOU2_CPP_CFC_ERR_STATUS_BIT))
+ return;
+
+ reg = ADF_CSR_RD(csr, ADF_GEN6_CPP_CFC_ERR_STATUS);
+ if (reg & ADF_GEN6_CPP_CFC_ERR_STATUS_DATAPAR_BIT) {
+ dev_err(&GET_DEV(accel_dev), "CPP_CFC_ERR: data parity: %#x", reg);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_UNCORR);
+ }
+
+ if (reg & ADF_GEN6_CPP_CFC_ERR_STATUS_CMDPAR_BIT) {
+ dev_err(&GET_DEV(accel_dev), "CPP_CFC_ERR: command parity: %#x", reg);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ }
+
+ if (reg & ADF_GEN6_CPP_CFC_FATAL_ERR_BIT) {
+ dev_err(&GET_DEV(accel_dev), "CPP_CFC_ERR: errors: %#x", reg);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ }
+
+ ADF_CSR_WR(csr, ADF_GEN6_CPP_CFC_ERR_STATUS_CLR,
+ ADF_GEN6_CPP_CFC_ERR_STATUS_CLR_MASK);
+}
+
+static void adf_gen6_process_errsou2(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ adf_handle_ssm(accel_dev, csr, errsou);
+ adf_handle_cpp_cfc_err(accel_dev, csr, errsou);
+
+ errsou = ADF_CSR_RD(csr, ADF_GEN6_ERRSOU2);
+ if (errsou & ADF_GEN6_ERRSOU2_MASK)
+ dev_warn(&GET_DEV(accel_dev), "errsou2 still set: %#x\n", errsou);
+}
+
+static void adf_handle_timiscsts(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ u32 timiscsts;
+
+ if (!(errsou & ADF_GEN6_ERRSOU3_TIMISCSTS_BIT))
+ return;
+
+ timiscsts = ADF_CSR_RD(csr, ADF_GEN6_TIMISCSTS);
+ if (timiscsts) {
+ dev_err(&GET_DEV(accel_dev), "Fatal error in transmit interface: %#x\n",
+ timiscsts);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ }
+}
+
+static void adf_handle_ricppintsts(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ u32 ricppintsts;
+
+ if (!(errsou & ADF_GEN6_ERRSOU3_RICPPINTSTS_MASK))
+ return;
+
+ ricppintsts = ADF_CSR_RD(csr, ADF_GEN6_RICPPINTSTS);
+ ricppintsts &= ADF_GEN6_RICPPINTSTS_MASK;
+ if (ricppintsts) {
+ dev_err(&GET_DEV(accel_dev), "RI push pull error: %#x\n", ricppintsts);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_UNCORR);
+ ADF_CSR_WR(csr, ADF_GEN6_RICPPINTSTS, ricppintsts);
+ }
+}
+
+static void adf_handle_ticppintsts(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ u32 ticppintsts;
+
+ if (!(errsou & ADF_GEN6_ERRSOU3_TICPPINTSTS_MASK))
+ return;
+
+ ticppintsts = ADF_CSR_RD(csr, ADF_GEN6_TICPPINTSTS);
+ ticppintsts &= ADF_GEN6_TICPPINTSTS_MASK;
+ if (ticppintsts) {
+ dev_err(&GET_DEV(accel_dev), "TI push pull error: %#x\n", ticppintsts);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+ ADF_CSR_WR(csr, ADF_GEN6_TICPPINTSTS, ticppintsts);
+ }
+}
+
+static void adf_handle_atufaultstatus(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ u32 max_rp_num = GET_HW_DATA(accel_dev)->num_banks;
+ u32 atufaultstatus;
+ u32 i;
+
+ if (!(errsou & ADF_GEN6_ERRSOU3_ATUFAULTSTATUS_BIT))
+ return;
+
+ for (i = 0; i < max_rp_num; i++) {
+ atufaultstatus = ADF_CSR_RD(csr, ADF_GEN6_ATUFAULTSTATUS(i));
+
+ atufaultstatus &= ADF_GEN6_ATUFAULTSTATUS_BIT;
+ if (atufaultstatus) {
+ dev_err(&GET_DEV(accel_dev), "Ring pair (%u) ATU detected fault: %#x\n", i,
+ atufaultstatus);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_UNCORR);
+ ADF_CSR_WR(csr, ADF_GEN6_ATUFAULTSTATUS(i), atufaultstatus);
+ }
+ }
+}
+
+static void adf_handle_rlterror(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ u32 rlterror;
+
+ if (!(errsou & ADF_GEN6_ERRSOU3_RLTERROR_BIT))
+ return;
+
+ rlterror = ADF_CSR_RD(csr, ADF_GEN6_RLT_ERRLOG);
+ rlterror &= ADF_GEN6_RLT_ERRLOG_MASK;
+ if (rlterror) {
+ dev_err(&GET_DEV(accel_dev), "Error in rate limiting block: %#x\n", rlterror);
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_UNCORR);
+ ADF_CSR_WR(csr, ADF_GEN6_RLT_ERRLOG, rlterror);
+ }
+}
+
+static void adf_handle_vflr(struct adf_accel_dev *accel_dev, void __iomem *csr, u32 errsou)
+{
+ if (!(errsou & ADF_GEN6_ERRSOU3_VFLRNOTIFY_BIT))
+ return;
+
+ dev_err(&GET_DEV(accel_dev), "Uncorrectable error in VF\n");
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_UNCORR);
+}
+
+static void adf_handle_tc_vc_map_error(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ if (!(errsou & ADF_GEN6_ERRSOU3_TC_VC_MAP_ERROR_BIT))
+ return;
+
+ dev_err(&GET_DEV(accel_dev), "Violation of PCIe TC VC mapping\n");
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+}
+
+static void adf_handle_pcie_devhalt(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ if (!(errsou & ADF_GEN6_ERRSOU3_PCIE_DEVHALT_BIT))
+ return;
+
+ dev_err(&GET_DEV(accel_dev),
+ "DEVHALT due to an error in an incoming transaction\n");
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+}
+
+static void adf_handle_pg_req_devhalt(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ if (!(errsou & ADF_GEN6_ERRSOU3_PG_REQ_DEVHALT_BIT))
+ return;
+
+ dev_err(&GET_DEV(accel_dev),
+ "Error due to response failure in response to a page request\n");
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+}
+
+static void adf_handle_xlt_cpl_devhalt(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ if (!(errsou & ADF_GEN6_ERRSOU3_XLT_CPL_DEVHALT_BIT))
+ return;
+
+ dev_err(&GET_DEV(accel_dev), "Error status for a address translation request\n");
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+}
+
+static void adf_handle_ti_int_err_devhalt(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ if (!(errsou & ADF_GEN6_ERRSOU3_TI_INT_ERR_DEVHALT_BIT))
+ return;
+
+ dev_err(&GET_DEV(accel_dev), "DEVHALT due to a TI internal memory error\n");
+ ADF_RAS_ERR_CTR_INC(accel_dev->ras_errors, ADF_RAS_FATAL);
+}
+
+static void adf_gen6_process_errsou3(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ u32 errsou)
+{
+ adf_handle_timiscsts(accel_dev, csr, errsou);
+ adf_handle_ricppintsts(accel_dev, csr, errsou);
+ adf_handle_ticppintsts(accel_dev, csr, errsou);
+ adf_handle_atufaultstatus(accel_dev, csr, errsou);
+ adf_handle_rlterror(accel_dev, csr, errsou);
+ adf_handle_vflr(accel_dev, csr, errsou);
+ adf_handle_tc_vc_map_error(accel_dev, csr, errsou);
+ adf_handle_pcie_devhalt(accel_dev, csr, errsou);
+ adf_handle_pg_req_devhalt(accel_dev, csr, errsou);
+ adf_handle_xlt_cpl_devhalt(accel_dev, csr, errsou);
+ adf_handle_ti_int_err_devhalt(accel_dev, csr, errsou);
+
+ errsou = ADF_CSR_RD(csr, ADF_GEN6_ERRSOU3);
+ if (errsou & ADF_GEN6_ERRSOU3_MASK)
+ dev_warn(&GET_DEV(accel_dev), "errsou3 still set: %#x\n", errsou);
+}
+
+static void adf_gen6_is_reset_required(struct adf_accel_dev *accel_dev, void __iomem *csr,
+ bool *reset_required)
+{
+ u8 reset, dev_state;
+ u32 gensts;
+
+ gensts = ADF_CSR_RD(csr, ADF_GEN6_GENSTS);
+ dev_state = FIELD_GET(ADF_GEN6_GENSTS_DEVICE_STATE_MASK, gensts);
+ reset = FIELD_GET(ADF_GEN6_GENSTS_RESET_TYPE_MASK, gensts);
+ if (dev_state == ADF_GEN6_GENSTS_DEVHALT && reset == ADF_GEN6_GENSTS_PFLR) {
+ *reset_required = true;
+ return;
+ }
+
+ if (reset == ADF_GEN6_GENSTS_COLD_RESET)
+ dev_err(&GET_DEV(accel_dev), "Fatal error, cold reset required\n");
+
+ *reset_required = false;
+}
+
+static bool adf_gen6_handle_interrupt(struct adf_accel_dev *accel_dev, bool *reset_required)
+{
+ void __iomem *csr = adf_get_pmisc_base(accel_dev);
+ bool handled = false;
+ u32 errsou;
+
+ errsou = ADF_CSR_RD(csr, ADF_GEN6_ERRSOU0);
+ if (errsou & ADF_GEN6_ERRSOU0_MASK) {
+ adf_gen6_process_errsou0(accel_dev, csr);
+ handled = true;
+ }
+
+ errsou = ADF_CSR_RD(csr, ADF_GEN6_ERRSOU1);
+ if (errsou & ADF_GEN6_ERRSOU1_MASK) {
+ adf_gen6_process_errsou1(accel_dev, csr, errsou);
+ handled = true;
+ }
+
+ errsou = ADF_CSR_RD(csr, ADF_GEN6_ERRSOU2);
+ if (errsou & ADF_GEN6_ERRSOU2_MASK) {
+ adf_gen6_process_errsou2(accel_dev, csr, errsou);
+ handled = true;
+ }
+
+ errsou = ADF_CSR_RD(csr, ADF_GEN6_ERRSOU3);
+ if (errsou & ADF_GEN6_ERRSOU3_MASK) {
+ adf_gen6_process_errsou3(accel_dev, csr, errsou);
+ handled = true;
+ }
+
+ adf_gen6_is_reset_required(accel_dev, csr, reset_required);
+
+ return handled;
+}
+
+void adf_gen6_init_ras_ops(struct adf_ras_ops *ras_ops)
+{
+ ras_ops->enable_ras_errors = adf_gen6_enable_ras;
+ ras_ops->disable_ras_errors = adf_gen6_disable_ras;
+ ras_ops->handle_interrupt = adf_gen6_handle_interrupt;
+}
+EXPORT_SYMBOL_GPL(adf_gen6_init_ras_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen6_ras.h b/drivers/crypto/intel/qat/qat_common/adf_gen6_ras.h
new file mode 100644
index 000000000000..66ced271d173
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen6_ras.h
@@ -0,0 +1,504 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef ADF_GEN6_RAS_H_
+#define ADF_GEN6_RAS_H_
+
+#include <linux/bits.h>
+
+struct adf_ras_ops;
+
+/* Error source registers */
+#define ADF_GEN6_ERRSOU0 0x41A200
+#define ADF_GEN6_ERRSOU1 0x41A204
+#define ADF_GEN6_ERRSOU2 0x41A208
+#define ADF_GEN6_ERRSOU3 0x41A20C
+
+/* Error source mask registers */
+#define ADF_GEN6_ERRMSK0 0x41A210
+#define ADF_GEN6_ERRMSK1 0x41A214
+#define ADF_GEN6_ERRMSK2 0x41A218
+#define ADF_GEN6_ERRMSK3 0x41A21C
+
+/* ERRSOU0 Correctable error mask */
+#define ADF_GEN6_ERRSOU0_MASK BIT(0)
+
+#define ADF_GEN6_ERRSOU1_CPP0_MEUNC_BIT BIT(0)
+#define ADF_GEN6_ERRSOU1_CPP_CMDPARERR_BIT BIT(1)
+#define ADF_GEN6_ERRSOU1_RIMEM_PARERR_STS_BIT BIT(2)
+#define ADF_GEN6_ERRSOU1_TIMEM_PARERR_STS_BIT BIT(3)
+#define ADF_GEN6_ERRSOU1_SFICMD_PARERR_BIT BIT(4)
+
+#define ADF_GEN6_ERRSOU1_MASK ( \
+ (ADF_GEN6_ERRSOU1_CPP0_MEUNC_BIT) | \
+ (ADF_GEN6_ERRSOU1_CPP_CMDPARERR_BIT) | \
+ (ADF_GEN6_ERRSOU1_RIMEM_PARERR_STS_BIT) | \
+ (ADF_GEN6_ERRSOU1_TIMEM_PARERR_STS_BIT) | \
+ (ADF_GEN6_ERRSOU1_SFICMD_PARERR_BIT))
+
+#define ADF_GEN6_ERRMSK1_CPP0_MEUNC_BIT BIT(0)
+#define ADF_GEN6_ERRMSK1_CPP_CMDPARERR_BIT BIT(1)
+#define ADF_GEN6_ERRMSK1_RIMEM_PARERR_STS_BIT BIT(2)
+#define ADF_GEN6_ERRMSK1_TIMEM_PARERR_STS_BIT BIT(3)
+#define ADF_GEN6_ERRMSK1_IOSFCMD_PARERR_BIT BIT(4)
+
+#define ADF_GEN6_ERRMSK1_MASK ( \
+ (ADF_GEN6_ERRMSK1_CPP0_MEUNC_BIT) | \
+ (ADF_GEN6_ERRMSK1_CPP_CMDPARERR_BIT) | \
+ (ADF_GEN6_ERRMSK1_RIMEM_PARERR_STS_BIT) | \
+ (ADF_GEN6_ERRMSK1_TIMEM_PARERR_STS_BIT) | \
+ (ADF_GEN6_ERRMSK1_IOSFCMD_PARERR_BIT))
+
+/* HI AE Uncorrectable error log */
+#define ADF_GEN6_HIAEUNCERRLOG_CPP0 0x41A300
+
+/* HI AE Uncorrectable error log enable */
+#define ADF_GEN6_HIAEUNCERRLOGENABLE_CPP0 0x41A320
+
+/* HI AE Correctable error log */
+#define ADF_GEN6_HIAECORERRLOG_CPP0 0x41A308
+
+/* HI AE Correctable error log enable */
+#define ADF_GEN6_HIAECORERRLOGENABLE_CPP0 0x41A318
+
+/* HI CPP Agent Command parity error log */
+#define ADF_GEN6_HICPPAGENTCMDPARERRLOG 0x41A310
+
+/* HI CPP Agent command parity error logging enable */
+#define ADF_GEN6_HICPPAGENTCMDPARERRLOGENABLE 0x41A314
+
+#define ADF_6XXX_HICPPAGENTCMDPARERRLOG_MASK 0x1B
+
+/* RI Memory parity error status register */
+#define ADF_GEN6_RIMEM_PARERR_STS 0x41B128
+
+/* RI Memory parity error reporting enable */
+#define ADF_GEN6_RI_MEM_PAR_ERR_EN0 0x41B12C
+
+/*
+ * RI Memory parity error mask
+ * BIT(4) - ri_tlq_phdr parity error
+ * BIT(5) - ri_tlq_pdata parity error
+ * BIT(6) - ri_tlq_nphdr parity error
+ * BIT(7) - ri_tlq_npdata parity error
+ * BIT(8) - ri_tlq_cplhdr parity error
+ * BIT(10) - BIT(13) - ri_tlq_cpldata[0:3] parity error
+ * BIT(19) - ri_cds_cmd_fifo parity error
+ * BIT(20) - ri_obc_ricpl_fifo parity error
+ * BIT(21) - ri_obc_tiricpl_fifo parity error
+ * BIT(22) - ri_obc_cppcpl_fifo parity error
+ * BIT(23) - ri_obc_pendcpl_fifo parity error
+ * BIT(24) - ri_cpp_cmd_fifo parity error
+ * BIT(25) - ri_cds_ticmd_fifo parity error
+ * BIT(26) - riti_cmd_fifo parity error
+ * BIT(27) - ri_int_msixtbl parity error
+ * BIT(28) - ri_int_imstbl parity error
+ * BIT(30) - ri_kpt_fuses parity error
+ */
+#define ADF_GEN6_RIMEM_PARERR_FATAL_MASK \
+ (BIT(0) | BIT(1) | BIT(2) | BIT(4) | BIT(5) | BIT(6) | \
+ BIT(7) | BIT(8) | BIT(18) | BIT(19) | BIT(20) | BIT(21) | \
+ BIT(22) | BIT(23) | BIT(24) | BIT(25) | BIT(26) | BIT(27) | \
+ BIT(28) | BIT(30))
+
+#define ADF_GEN6_RIMEM_PARERR_CERR_MASK \
+ (BIT(10) | BIT(11) | BIT(12) | BIT(13))
+
+/* TI CI parity status */
+#define ADF_GEN6_TI_CI_PAR_STS 0x50060C
+
+/* TI CI parity reporting mask */
+#define ADF_GEN6_TI_CI_PAR_ERR_MASK 0x500608
+
+/*
+ * TI CI parity status mask
+ * BIT(0) - CdCmdQ_sts patiry error status
+ * BIT(1) - CdDataQ_sts parity error status
+ * BIT(3) - CPP_SkidQ_sts parity error status
+ */
+#define ADF_GEN6_TI_CI_PAR_STS_MASK \
+ (BIT(0) | BIT(1) | BIT(3))
+
+/* TI PULLFUB parity status */
+#define ADF_GEN6_TI_PULL0FUB_PAR_STS 0x500618
+
+/* TI PULLFUB parity error reporting mask */
+#define ADF_GEN6_TI_PULL0FUB_PAR_ERR_MASK 0x500614
+
+/*
+ * TI PULLFUB parity status mask
+ * BIT(0) - TrnPullReqQ_sts parity status
+ * BIT(1) - TrnSharedDataQ_sts parity status
+ * BIT(2) - TrnPullReqDataQ_sts parity status
+ * BIT(4) - CPP_CiPullReqQ_sts parity status
+ * BIT(5) - CPP_TrnPullReqQ_sts parity status
+ * BIT(6) - CPP_PullidQ_sts parity status
+ * BIT(7) - CPP_WaitDataQ_sts parity status
+ * BIT(8) - CPP_CdDataQ_sts parity status
+ * BIT(9) - CPP_TrnDataQP0_sts parity status
+ * BIT(10) - BIT(11) - CPP_TrnDataQRF[00:01]_sts parity status
+ * BIT(12) - CPP_TrnDataQP1_sts parity status
+ * BIT(13) - BIT(14) - CPP_TrnDataQRF[10:11]_sts parity status
+ */
+#define ADF_GEN6_TI_PULL0FUB_PAR_STS_MASK \
+ (BIT(0) | BIT(1) | BIT(2) | BIT(4) | BIT(5) | BIT(6) | BIT(7) | \
+ BIT(8) | BIT(9) | BIT(10) | BIT(11) | BIT(12) | BIT(13) | BIT(14))
+
+/* TI PUSHUB parity status */
+#define ADF_GEN6_TI_PUSHFUB_PAR_STS 0x500630
+
+/* TI PUSHFUB parity error reporting mask */
+#define ADF_GEN6_TI_PUSHFUB_PAR_ERR_MASK 0x50062C
+
+/*
+ * TI PUSHUB parity status mask
+ * BIT(0) - SbPushReqQ_sts parity status
+ * BIT(1) - BIT(2) - SbPushDataQ[0:1]_sts parity status
+ * BIT(4) - CPP_CdPushReqQ_sts parity status
+ * BIT(5) - BIT(6) - CPP_CdPushDataQ[0:1]_sts parity status
+ * BIT(7) - CPP_SbPushReqQ_sts parity status
+ * BIT(8) - CPP_SbPushDataQP_sts parity status
+ * BIT(9) - BIT(10) - CPP_SbPushDataQRF[0:1]_sts parity status
+ */
+#define ADF_GEN6_TI_PUSHFUB_PAR_STS_MASK \
+ (BIT(0) | BIT(1) | BIT(2) | BIT(4) | BIT(5) | \
+ BIT(6) | BIT(7) | BIT(8) | BIT(9) | BIT(10))
+
+/* TI CD parity status */
+#define ADF_GEN6_TI_CD_PAR_STS 0x50063C
+
+/* TI CD parity error mask */
+#define ADF_GEN6_TI_CD_PAR_ERR_MASK 0x500638
+
+/*
+ * TI CD parity status mask
+ * BIT(0) - BIT(15) - CtxMdRam[0:15]_sts parity status
+ * BIT(16) - Leaf2ClusterRam_sts parity status
+ * BIT(17) - BIT(18) - Ring2LeafRam[0:1]_sts parity status
+ * BIT(19) - VirtualQ_sts parity status
+ * BIT(20) - DtRdQ_sts parity status
+ * BIT(21) - DtWrQ_sts parity status
+ * BIT(22) - RiCmdQ_sts parity status
+ * BIT(23) - BypassQ_sts parity status
+ * BIT(24) - DtRdQ_sc_sts parity status
+ * BIT(25) - DtWrQ_sc_sts parity status
+ */
+#define ADF_GEN6_TI_CD_PAR_STS_MASK \
+ (BIT(0) | BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5) | BIT(6) | \
+ BIT(7) | BIT(8) | BIT(9) | BIT(10) | BIT(11) | BIT(12) | BIT(13) | \
+ BIT(14) | BIT(15) | BIT(16) | BIT(17) | BIT(18) | BIT(19) | BIT(20) | \
+ BIT(21) | BIT(22) | BIT(23) | BIT(24) | BIT(25))
+
+/* TI TRNSB parity status */
+#define ADF_GEN6_TI_TRNSB_PAR_STS 0x500648
+
+/* TI TRNSB parity error reporting mask */
+#define ADF_GEN6_TI_TRNSB_PAR_ERR_MASK 0x500644
+
+/*
+ * TI TRNSB parity status mask
+ * BIT(0) - TrnPHdrQP_sts parity status
+ * BIT(1) - TrnPHdrQRF_sts parity status
+ * BIT(2) - TrnPDataQP_sts parity status
+ * BIT(3) - BIT(6) - TrnPDataQRF[0:3]_sts parity status
+ * BIT(7) - TrnNpHdrQP_sts parity status
+ * BIT(8) - BIT(9) - TrnNpHdrQRF[0:1]_sts parity status
+ * BIT(10) - TrnCplHdrQ_sts parity status
+ * BIT(11) - TrnPutObsReqQ_sts parity status
+ * BIT(12) - TrnPushReqQ_sts parity status
+ * BIT(13) - SbSplitIdRam_sts parity status
+ * BIT(14) - SbReqCountQ_sts parity status
+ * BIT(15) - SbCplTrkRam_sts parity status
+ * BIT(16) - SbGetObsReqQ_sts parity status
+ * BIT(17) - SbEpochIdQ_sts parity status
+ * BIT(18) - SbAtCplHdrQ_sts parity status
+ * BIT(19) - SbAtCplDataQ_sts parity status
+ * BIT(20) - SbReqCountRam_sts parity status
+ * BIT(21) - SbAtCplHdrQ_sc_sts parity status
+ */
+#define ADF_GEN6_TI_TRNSB_PAR_STS_MASK \
+ (BIT(0) | BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5) | BIT(6) | \
+ BIT(7) | BIT(8) | BIT(9) | BIT(10) | BIT(11) | BIT(12) | \
+ BIT(13) | BIT(14) | BIT(15) | BIT(16) | BIT(17) | BIT(18) | \
+ BIT(19) | BIT(20) | BIT(21))
+
+/* Status register to log misc error on RI */
+#define ADF_GEN6_RIMISCSTS 0x41B1B8
+
+/* Status control register to log misc RI error */
+#define ADF_GEN6_RIMISCCTL 0x41B1BC
+
+/*
+ * ERRSOU2 bit mask
+ * BIT(0) - SSM Interrupt Mask
+ * BIT(1) - CFC on CPP. ORed of CFC Push error and Pull error
+ * BIT(2) - BIT(4) - CPP attention interrupts
+ * BIT(18) - PM interrupt
+ */
+#define ADF_GEN6_ERRSOU2_SSM_ERR_BIT BIT(0)
+#define ADF_GEN6_ERRSOU2_CPP_CFC_ERR_STATUS_BIT BIT(1)
+#define ADF_GEN6_ERRSOU2_CPP_CFC_ATT_INT_MASK \
+ (BIT(2) | BIT(3) | BIT(4))
+
+#define ADF_GEN6_ERRSOU2_PM_INT_BIT BIT(18)
+
+#define ADF_GEN6_ERRSOU2_MASK \
+ (ADF_GEN6_ERRSOU2_SSM_ERR_BIT | \
+ ADF_GEN6_ERRSOU2_CPP_CFC_ERR_STATUS_BIT)
+
+#define ADF_GEN6_ERRSOU2_DIS_MASK \
+ (ADF_GEN6_ERRSOU2_SSM_ERR_BIT | \
+ ADF_GEN6_ERRSOU2_CPP_CFC_ERR_STATUS_BIT | \
+ ADF_GEN6_ERRSOU2_CPP_CFC_ATT_INT_MASK)
+
+#define ADF_GEN6_IAINTSTATSSM 0x28
+
+/* IAINTSTATSSM error bit mask definitions */
+#define ADF_GEN6_IAINTSTATSSM_SH_ERR_BIT BIT(0)
+#define ADF_GEN6_IAINTSTATSSM_PPERR_BIT BIT(2)
+#define ADF_GEN6_IAINTSTATSSM_SCMPAR_ERR_BIT BIT(4)
+#define ADF_GEN6_IAINTSTATSSM_CPPPAR_ERR_BIT BIT(5)
+#define ADF_GEN6_IAINTSTATSSM_RFPAR_ERR_BIT BIT(6)
+#define ADF_GEN6_IAINTSTATSSM_UNEXP_CPL_ERR_BIT BIT(7)
+
+#define ADF_GEN6_IAINTSTATSSM_MASK \
+ (ADF_GEN6_IAINTSTATSSM_SH_ERR_BIT | \
+ ADF_GEN6_IAINTSTATSSM_PPERR_BIT | \
+ ADF_GEN6_IAINTSTATSSM_SCMPAR_ERR_BIT | \
+ ADF_GEN6_IAINTSTATSSM_CPPPAR_ERR_BIT | \
+ ADF_GEN6_IAINTSTATSSM_RFPAR_ERR_BIT | \
+ ADF_GEN6_IAINTSTATSSM_UNEXP_CPL_ERR_BIT)
+
+#define ADF_GEN6_UERRSSMSH 0x18
+
+/*
+ * UERRSSMSH error bit mask definitions
+ *
+ * BIT(0) - Indicates one uncorrectable error
+ * BIT(15) - Indicates multiple uncorrectable errors
+ * in device shared memory
+ */
+#define ADF_GEN6_UERRSSMSH_MASK (BIT(0) | BIT(15))
+
+/*
+ * CERRSSMSH error bit
+ * BIT(0) - Indicates one correctable error
+ */
+#define ADF_GEN6_CERRSSMSH_ERROR_BIT (BIT(0) | BIT(15) | BIT(24))
+#define ADF_GEN6_CERRSSMSH 0x10
+
+#define ADF_GEN6_INTMASKSSM 0x0
+
+/*
+ * Error reporting mask in INTMASKSSM
+ * BIT(0) - Shared memory uncorrectable interrupt mask
+ * BIT(2) - PPERR interrupt mask
+ * BIT(4) - SCM parity error interrupt mask
+ * BIT(5) - CPP parity error interrupt mask
+ * BIT(6) - SHRAM RF parity error interrupt mask
+ * BIT(7) - AXI unexpected completion error mask
+ */
+#define ADF_GEN6_INTMASKSSM_MASK \
+ (BIT(0) | BIT(2) | BIT(4) | BIT(5) | BIT(6) | BIT(7))
+
+/* CPP push or pull error */
+#define ADF_GEN6_PPERR 0x8
+
+#define ADF_GEN6_PPERR_MASK (BIT(0) | BIT(1))
+
+/*
+ * SSM_FERR_STATUS error bit mask definitions
+ */
+#define ADF_GEN6_SCM_PAR_ERR_MASK BIT(5)
+#define ADF_GEN6_CPP_PAR_ERR_MASK (BIT(0) | BIT(1) | BIT(2))
+#define ADF_GEN6_UNEXP_CPL_ERR_MASK (BIT(3) | BIT(4) | BIT(10) | BIT(11))
+#define ADF_GEN6_RF_PAR_ERR_MASK BIT(16)
+
+#define ADF_GEN6_SSM_FERR_STATUS 0x9C
+
+#define ADF_GEN6_CPP_CFC_ERR_STATUS 0x640C04
+
+/*
+ * BIT(0) - Indicates one or more CPP CFC errors
+ * BIT(1) - Indicates multiple CPP CFC errors
+ * BIT(7) - Indicates CPP CFC command parity error type
+ * BIT(8) - Indicates CPP CFC data parity error type
+ */
+#define ADF_GEN6_CPP_CFC_ERR_STATUS_ERR_BIT BIT(0)
+#define ADF_GEN6_CPP_CFC_ERR_STATUS_MERR_BIT BIT(1)
+#define ADF_GEN6_CPP_CFC_ERR_STATUS_CMDPAR_BIT BIT(7)
+#define ADF_GEN6_CPP_CFC_ERR_STATUS_DATAPAR_BIT BIT(8)
+#define ADF_GEN6_CPP_CFC_FATAL_ERR_BIT \
+ (ADF_GEN6_CPP_CFC_ERR_STATUS_ERR_BIT | \
+ ADF_GEN6_CPP_CFC_ERR_STATUS_MERR_BIT)
+
+/*
+ * BIT(0) - Enables CFC to detect and log a push/pull data error
+ * BIT(1) - Enables CFC to generate interrupt to PCIEP for a CPP error
+ * BIT(4) - When 1 parity detection is disabled
+ * BIT(5) - When 1 parity detection is disabled on CPP command bus
+ * BIT(6) - When 1 parity detection is disabled on CPP push/pull bus
+ * BIT(9) - When 1 RF parity error detection is disabled
+ */
+#define ADF_GEN6_CPP_CFC_ERR_CTRL_MASK (BIT(0) | BIT(1))
+
+#define ADF_GEN6_CPP_CFC_ERR_CTRL_DIS_MASK \
+ (BIT(4) | BIT(5) | BIT(6) | BIT(9) | BIT(10))
+
+#define ADF_GEN6_CPP_CFC_ERR_CTRL 0x640C00
+
+/*
+ * BIT(0) - Clears bit(0) of ADF_GEN6_CPP_CFC_ERR_STATUS
+ * when an error is reported on CPP
+ * BIT(1) - Clears bit(1) of ADF_GEN6_CPP_CFC_ERR_STATUS
+ * when multiple errors are reported on CPP
+ * BIT(2) - Clears bit(2) of ADF_GEN6_CPP_CFC_ERR_STATUS
+ * when attention interrupt is reported
+ */
+#define ADF_GEN6_CPP_CFC_ERR_STATUS_CLR_MASK (BIT(0) | BIT(1) | BIT(2))
+#define ADF_GEN6_CPP_CFC_ERR_STATUS_CLR 0x640C08
+
+/*
+ * ERRSOU3 bit masks
+ * BIT(0) - indicates error response order overflow and/or BME error
+ * BIT(1) - indicates RI push/pull error
+ * BIT(2) - indicates TI push/pull error
+ * BIT(5) - indicates TI pull parity error
+ * BIT(6) - indicates RI push parity error
+ * BIT(7) - indicates VFLR interrupt
+ * BIT(8) - indicates ring pair interrupts for ATU detected fault
+ * BIT(9) - indicates rate limiting error
+ */
+#define ADF_GEN6_ERRSOU3_TIMISCSTS_BIT BIT(0)
+#define ADF_GEN6_ERRSOU3_RICPPINTSTS_MASK (BIT(1) | BIT(6))
+#define ADF_GEN6_ERRSOU3_TICPPINTSTS_MASK (BIT(2) | BIT(5))
+#define ADF_GEN6_ERRSOU3_VFLRNOTIFY_BIT BIT(7)
+#define ADF_GEN6_ERRSOU3_ATUFAULTSTATUS_BIT BIT(8)
+#define ADF_GEN6_ERRSOU3_RLTERROR_BIT BIT(9)
+#define ADF_GEN6_ERRSOU3_TC_VC_MAP_ERROR_BIT BIT(16)
+#define ADF_GEN6_ERRSOU3_PCIE_DEVHALT_BIT BIT(17)
+#define ADF_GEN6_ERRSOU3_PG_REQ_DEVHALT_BIT BIT(18)
+#define ADF_GEN6_ERRSOU3_XLT_CPL_DEVHALT_BIT BIT(19)
+#define ADF_GEN6_ERRSOU3_TI_INT_ERR_DEVHALT_BIT BIT(20)
+
+#define ADF_GEN6_ERRSOU3_MASK ( \
+ (ADF_GEN6_ERRSOU3_TIMISCSTS_BIT) | \
+ (ADF_GEN6_ERRSOU3_RICPPINTSTS_MASK) | \
+ (ADF_GEN6_ERRSOU3_TICPPINTSTS_MASK) | \
+ (ADF_GEN6_ERRSOU3_VFLRNOTIFY_BIT) | \
+ (ADF_GEN6_ERRSOU3_ATUFAULTSTATUS_BIT) | \
+ (ADF_GEN6_ERRSOU3_RLTERROR_BIT) | \
+ (ADF_GEN6_ERRSOU3_TC_VC_MAP_ERROR_BIT) | \
+ (ADF_GEN6_ERRSOU3_PCIE_DEVHALT_BIT) | \
+ (ADF_GEN6_ERRSOU3_PG_REQ_DEVHALT_BIT) | \
+ (ADF_GEN6_ERRSOU3_XLT_CPL_DEVHALT_BIT) | \
+ (ADF_GEN6_ERRSOU3_TI_INT_ERR_DEVHALT_BIT))
+
+#define ADF_GEN6_ERRSOU3_DIS_MASK ( \
+ (ADF_GEN6_ERRSOU3_TIMISCSTS_BIT) | \
+ (ADF_GEN6_ERRSOU3_RICPPINTSTS_MASK) | \
+ (ADF_GEN6_ERRSOU3_TICPPINTSTS_MASK) | \
+ (ADF_GEN6_ERRSOU3_VFLRNOTIFY_BIT) | \
+ (ADF_GEN6_ERRSOU3_ATUFAULTSTATUS_BIT) | \
+ (ADF_GEN6_ERRSOU3_RLTERROR_BIT) | \
+ (ADF_GEN6_ERRSOU3_TC_VC_MAP_ERROR_BIT))
+
+/* Rate limiting error log register */
+#define ADF_GEN6_RLT_ERRLOG 0x508814
+
+#define ADF_GEN6_RLT_ERRLOG_MASK (BIT(0) | BIT(1) | BIT(2) | BIT(3))
+
+/* TI misc status register */
+#define ADF_GEN6_TIMISCSTS 0x50054C
+
+/* TI misc error reporting mask */
+#define ADF_GEN6_TIMISCCTL 0x500548
+
+/*
+ * TI Misc error reporting control mask
+ * BIT(0) - Enables error detection and logging in TIMISCSTS register
+ * BIT(1) - It has effect only when SRIOV enabled, this bit is 0 by default
+ * BIT(2) - Enables the D-F-x counter within the dispatch arbiter
+ * to start based on the command triggered from
+ * BIT(30) - Disables VFLR functionality
+ * bits 1, 2 and 30 value should be preserved and not meant to be changed
+ * within RAS.
+ */
+#define ADF_GEN6_TIMISCCTL_BIT BIT(0)
+#define ADF_GEN6_TIMSCCTL_RELAY_MASK (BIT(1) | BIT(2) | BIT(30))
+
+/* RI CPP interface status register */
+#define ADF_GEN6_RICPPINTSTS 0x41A330
+
+/*
+ * Uncorrectable error mask in RICPPINTSTS register
+ * BIT(0) - RI asserted the CPP error signal during a push
+ * BIT(1) - RI detected the CPP error signal asserted during a pull
+ * BIT(2) - RI detected a push data parity error
+ * BIT(3) - RI detected a push valid parity error
+ */
+#define ADF_GEN6_RICPPINTSTS_MASK (BIT(0) | BIT(1) | BIT(2) | BIT(3))
+
+/* RI CPP interface register control */
+#define ADF_GEN6_RICPPINTCTL 0x41A32C
+
+/*
+ * Control bit mask for RICPPINTCTL register
+ * BIT(0) - value of 1 enables error detection and reporting
+ * on the RI CPP Push interface
+ * BIT(1) - value of 1 enables error detection and reporting
+ * on the RI CPP Pull interface
+ * BIT(2) - value of 1 enables error detection and reporting
+ * on the RI Parity
+ * BIT(3) - value of 1 enable checking parity on CPP
+ */
+#define ADF_GEN6_RICPPINTCTL_MASK \
+ (BIT(0) | BIT(1) | BIT(2) | BIT(3) | BIT(4))
+
+/* TI CPP interface status register */
+#define ADF_GEN6_TICPPINTSTS 0x50053C
+
+/*
+ * Uncorrectable error mask in TICPPINTSTS register
+ * BIT(0) - value of 1 indicates that the TI asserted
+ * the CPP error signal during a push
+ * BIT(1) - value of 1 indicates that the TI detected
+ * the CPP error signal asserted during a pull
+ * BIT(2) - value of 1 indicates that the TI detected
+ * a pull data parity error
+ */
+#define ADF_GEN6_TICPPINTSTS_MASK (BIT(0) | BIT(1) | BIT(2))
+
+/* TI CPP interface status register control */
+#define ADF_GEN6_TICPPINTCTL 0x500538
+
+/*
+ * Control bit mask for TICPPINTCTL register
+ * BIT(0) - value of 1 enables error detection and reporting on
+ * the TI CPP Push interface
+ * BIT(1) - value of 1 enables error detection and reporting on
+ * the TI CPP Push interface
+ * BIT(2) - value of 1 enables parity error detection and logging on
+ * the TI CPP Pull interface
+ * BIT(3) - value of 1 enables CPP CMD and Pull Data parity checking
+ */
+#define ADF_GEN6_TICPPINTCTL_MASK \
+ (BIT(0) | BIT(1) | BIT(2) | BIT(3) | BIT(4))
+
+/* ATU fault status register */
+#define ADF_GEN6_ATUFAULTSTATUS(i) (0x506000 + ((i) * 0x4))
+
+#define ADF_GEN6_ATUFAULTSTATUS_BIT BIT(0)
+
+/* Command parity error detected on IOSFP command to QAT */
+#define ADF_GEN6_RIMISCSTS_BIT BIT(0)
+
+#define ADF_GEN6_GENSTS 0x41A220
+#define ADF_GEN6_GENSTS_DEVICE_STATE_MASK GENMASK(1, 0)
+#define ADF_GEN6_GENSTS_RESET_TYPE_MASK GENMASK(3, 2)
+#define ADF_GEN6_GENSTS_PFLR 0x1
+#define ADF_GEN6_GENSTS_COLD_RESET 0x3
+#define ADF_GEN6_GENSTS_DEVHALT 0x1
+
+void adf_gen6_init_ras_ops(struct adf_ras_ops *ras_ops);
+
+#endif /* ADF_GEN6_RAS_H_ */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.c b/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.c
new file mode 100644
index 000000000000..58a072e2f936
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2025 Intel Corporation */
+#include <linux/export.h>
+
+#include "adf_gen4_config.h"
+#include "adf_gen4_hw_csr_data.h"
+#include "adf_gen4_pfvf.h"
+#include "adf_gen6_shared.h"
+
+struct adf_accel_dev;
+struct adf_pfvf_ops;
+struct adf_hw_csr_ops;
+
+/*
+ * QAT GEN4 and GEN6 devices often differ in terms of supported features,
+ * options and internal logic. However, some of the mechanisms and register
+ * layout are shared between those two GENs. This file serves as an abstraction
+ * layer that allows to use existing GEN4 implementation that is also
+ * applicable to GEN6 without additional overhead and complexity.
+ */
+void adf_gen6_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops)
+{
+ adf_gen4_init_pf_pfvf_ops(pfvf_ops);
+}
+EXPORT_SYMBOL_GPL(adf_gen6_init_pf_pfvf_ops);
+
+void adf_gen6_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops)
+{
+ return adf_gen4_init_hw_csr_ops(csr_ops);
+}
+EXPORT_SYMBOL_GPL(adf_gen6_init_hw_csr_ops);
+
+int adf_gen6_cfg_dev_init(struct adf_accel_dev *accel_dev)
+{
+ return adf_gen4_cfg_dev_init(accel_dev);
+}
+EXPORT_SYMBOL_GPL(adf_gen6_cfg_dev_init);
+
+int adf_gen6_comp_dev_config(struct adf_accel_dev *accel_dev)
+{
+ return adf_comp_dev_config(accel_dev);
+}
+EXPORT_SYMBOL_GPL(adf_gen6_comp_dev_config);
+
+int adf_gen6_no_dev_config(struct adf_accel_dev *accel_dev)
+{
+ return adf_no_dev_config(accel_dev);
+}
+EXPORT_SYMBOL_GPL(adf_gen6_no_dev_config);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.h b/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.h
new file mode 100644
index 000000000000..bc8e71e984fc
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef ADF_GEN6_SHARED_H_
+#define ADF_GEN6_SHARED_H_
+
+struct adf_hw_csr_ops;
+struct adf_accel_dev;
+struct adf_pfvf_ops;
+
+void adf_gen6_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops);
+void adf_gen6_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops);
+int adf_gen6_cfg_dev_init(struct adf_accel_dev *accel_dev);
+int adf_gen6_comp_dev_config(struct adf_accel_dev *accel_dev);
+int adf_gen6_no_dev_config(struct adf_accel_dev *accel_dev);
+#endif/* ADF_GEN6_SHARED_H_ */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_timer.c b/drivers/crypto/intel/qat/qat_common/adf_timer.c
index 35ccb91d6ec1..8962a49f145a 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_timer.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_timer.c
@@ -12,9 +12,9 @@
#include "adf_admin.h"
#include "adf_accel_devices.h"
#include "adf_common_drv.h"
-#include "adf_gen4_timer.h"
+#include "adf_timer.h"
-#define ADF_GEN4_TIMER_PERIOD_MS 200
+#define ADF_DEFAULT_TIMER_PERIOD_MS 200
/* This periodic update is used to trigger HB, RL & TL fw events */
static void work_handler(struct work_struct *work)
@@ -27,16 +27,16 @@ static void work_handler(struct work_struct *work)
accel_dev = timer_ctx->accel_dev;
adf_misc_wq_queue_delayed_work(&timer_ctx->work_ctx,
- msecs_to_jiffies(ADF_GEN4_TIMER_PERIOD_MS));
+ msecs_to_jiffies(ADF_DEFAULT_TIMER_PERIOD_MS));
time_periods = div_u64(ktime_ms_delta(ktime_get_real(), timer_ctx->initial_ktime),
- ADF_GEN4_TIMER_PERIOD_MS);
+ ADF_DEFAULT_TIMER_PERIOD_MS);
if (adf_send_admin_tim_sync(accel_dev, time_periods))
dev_err(&GET_DEV(accel_dev), "Failed to synchronize qat timer\n");
}
-int adf_gen4_timer_start(struct adf_accel_dev *accel_dev)
+int adf_timer_start(struct adf_accel_dev *accel_dev)
{
struct adf_timer *timer_ctx;
@@ -50,13 +50,13 @@ int adf_gen4_timer_start(struct adf_accel_dev *accel_dev)
INIT_DELAYED_WORK(&timer_ctx->work_ctx, work_handler);
adf_misc_wq_queue_delayed_work(&timer_ctx->work_ctx,
- msecs_to_jiffies(ADF_GEN4_TIMER_PERIOD_MS));
+ msecs_to_jiffies(ADF_DEFAULT_TIMER_PERIOD_MS));
return 0;
}
-EXPORT_SYMBOL_GPL(adf_gen4_timer_start);
+EXPORT_SYMBOL_GPL(adf_timer_start);
-void adf_gen4_timer_stop(struct adf_accel_dev *accel_dev)
+void adf_timer_stop(struct adf_accel_dev *accel_dev)
{
struct adf_timer *timer_ctx = accel_dev->timer;
@@ -68,4 +68,4 @@ void adf_gen4_timer_stop(struct adf_accel_dev *accel_dev)
kfree(timer_ctx);
accel_dev->timer = NULL;
}
-EXPORT_SYMBOL_GPL(adf_gen4_timer_stop);
+EXPORT_SYMBOL_GPL(adf_timer_stop);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_timer.h b/drivers/crypto/intel/qat/qat_common/adf_timer.h
index 66a709e7b358..68e5136d6ba1 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_timer.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_timer.h
@@ -1,8 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright(c) 2023 Intel Corporation */
-#ifndef ADF_GEN4_TIMER_H_
-#define ADF_GEN4_TIMER_H_
+#ifndef ADF_TIMER_H_
+#define ADF_TIMER_H_
#include <linux/ktime.h>
#include <linux/workqueue.h>
@@ -15,7 +15,7 @@ struct adf_timer {
ktime_t initial_ktime;
};
-int adf_gen4_timer_start(struct adf_accel_dev *accel_dev);
-void adf_gen4_timer_stop(struct adf_accel_dev *accel_dev);
+int adf_timer_start(struct adf_accel_dev *accel_dev);
+void adf_timer_stop(struct adf_accel_dev *accel_dev);
-#endif /* ADF_GEN4_TIMER_H_ */
+#endif /* ADF_TIMER_H_ */
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_comp.h b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_comp.h
index 04f645957e28..81969c515a17 100644
--- a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_comp.h
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_comp.h
@@ -44,6 +44,7 @@ enum icp_qat_fw_comp_20_cmd_id {
#define ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_MASK 0x1
#define ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_BITPOS 7
#define ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_MASK 0x1
+#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST_MAX_VALUE 0xFFFFFFFF
#define ICP_QAT_FW_COMP_FLAGS_BUILD(sesstype, autoselect, enhanced_asb, \
ret_uncomp, secure_ram) \
@@ -117,7 +118,7 @@ struct icp_qat_fw_comp_req_params {
#define ICP_QAT_FW_COMP_REQ_PARAM_FLAGS_BUILD(sop, eop, bfinal, cnv, cnvnr, \
cnvdfx, crc, xxhash_acc, \
cnv_error_type, append_crc, \
- drop_data) \
+ drop_data, partial_decomp) \
((((sop) & ICP_QAT_FW_COMP_SOP_MASK) << \
ICP_QAT_FW_COMP_SOP_BITPOS) | \
(((eop) & ICP_QAT_FW_COMP_EOP_MASK) << \
@@ -139,7 +140,9 @@ struct icp_qat_fw_comp_req_params {
(((append_crc) & ICP_QAT_FW_COMP_APPEND_CRC_MASK) \
<< ICP_QAT_FW_COMP_APPEND_CRC_BITPOS) | \
(((drop_data) & ICP_QAT_FW_COMP_DROP_DATA_MASK) \
- << ICP_QAT_FW_COMP_DROP_DATA_BITPOS))
+ << ICP_QAT_FW_COMP_DROP_DATA_BITPOS) | \
+ (((partial_decomp) & ICP_QAT_FW_COMP_PARTIAL_DECOMP_MASK) \
+ << ICP_QAT_FW_COMP_PARTIAL_DECOMP_BITPOS))
#define ICP_QAT_FW_COMP_NOT_SOP 0
#define ICP_QAT_FW_COMP_SOP 1
@@ -161,6 +164,8 @@ struct icp_qat_fw_comp_req_params {
#define ICP_QAT_FW_COMP_NO_APPEND_CRC 0
#define ICP_QAT_FW_COMP_DROP_DATA 1
#define ICP_QAT_FW_COMP_NO_DROP_DATA 0
+#define ICP_QAT_FW_COMP_PARTIAL_DECOMPRESS 1
+#define ICP_QAT_FW_COMP_NO_PARTIAL_DECOMPRESS 0
#define ICP_QAT_FW_COMP_SOP_BITPOS 0
#define ICP_QAT_FW_COMP_SOP_MASK 0x1
#define ICP_QAT_FW_COMP_EOP_BITPOS 1
@@ -189,6 +194,8 @@ struct icp_qat_fw_comp_req_params {
#define ICP_QAT_FW_COMP_APPEND_CRC_MASK 0x1
#define ICP_QAT_FW_COMP_DROP_DATA_BITPOS 25
#define ICP_QAT_FW_COMP_DROP_DATA_MASK 0x1
+#define ICP_QAT_FW_COMP_PARTIAL_DECOMP_BITPOS 27
+#define ICP_QAT_FW_COMP_PARTIAL_DECOMP_MASK 0x1
#define ICP_QAT_FW_COMP_SOP_GET(flags) \
QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_SOP_BITPOS, \
@@ -281,8 +288,18 @@ struct icp_qat_fw_comp_req {
union {
struct icp_qat_fw_xlt_req_params xlt_pars;
__u32 resrvd1[ICP_QAT_FW_NUM_LONGWORDS_2];
+ struct {
+ __u32 partial_decompress_length;
+ __u32 partial_decompress_offset;
+ } partial_decompress;
} u1;
- __u32 resrvd2[ICP_QAT_FW_NUM_LONGWORDS_2];
+ union {
+ __u32 resrvd2[ICP_QAT_FW_NUM_LONGWORDS_2];
+ struct {
+ __u32 asb_value;
+ __u32 reserved;
+ } asb_threshold;
+ } u3;
struct icp_qat_fw_comp_cd_hdr comp_cd_ctrl;
union {
struct icp_qat_fw_xlt_cd_hdr xlt_cd_ctrl;
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_loader_handle.h b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_loader_handle.h
index 7eb5daef4f88..6887930c7995 100644
--- a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_loader_handle.h
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_loader_handle.h
@@ -35,6 +35,7 @@ struct icp_qat_fw_loader_chip_info {
u32 wakeup_event_val;
bool fw_auth;
bool css_3k;
+ bool dual_sign;
bool tgroup_share_ustore;
u32 fcu_ctl_csr;
u32 fcu_sts_csr;
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_hw_51_comp.h b/drivers/crypto/intel/qat/qat_common/icp_qat_hw_51_comp.h
new file mode 100644
index 000000000000..dce639152345
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_hw_51_comp.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef ICP_QAT_HW_51_COMP_H_
+#define ICP_QAT_HW_51_COMP_H_
+
+#include <linux/types.h>
+
+#include "icp_qat_fw.h"
+#include "icp_qat_hw_51_comp_defs.h"
+
+struct icp_qat_hw_comp_51_config_csr_lower {
+ enum icp_qat_hw_comp_51_abd abd;
+ enum icp_qat_hw_comp_51_lllbd_ctrl lllbd;
+ enum icp_qat_hw_comp_51_search_depth sd;
+ enum icp_qat_hw_comp_51_min_match_control mmctrl;
+ enum icp_qat_hw_comp_51_lz4_block_checksum lbc;
+};
+
+static inline u32
+ICP_QAT_FW_COMP_51_BUILD_CONFIG_LOWER(struct icp_qat_hw_comp_51_config_csr_lower csr)
+{
+ u32 val32 = 0;
+
+ QAT_FIELD_SET(val32, csr.abd,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_ABD_BITPOS,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_ABD_MASK);
+ QAT_FIELD_SET(val32, csr.lllbd,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_LLLBD_CTRL_BITPOS,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_LLLBD_CTRL_MASK);
+ QAT_FIELD_SET(val32, csr.sd,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_SEARCH_DEPTH_BITPOS,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_SEARCH_DEPTH_MASK);
+ QAT_FIELD_SET(val32, csr.mmctrl,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_MIN_MATCH_CONTROL_MASK);
+ QAT_FIELD_SET(val32, csr.lbc,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_BITPOS,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_MASK);
+
+ return val32;
+}
+
+struct icp_qat_hw_comp_51_config_csr_upper {
+ enum icp_qat_hw_comp_51_dmm_algorithm edmm;
+ enum icp_qat_hw_comp_51_bms bms;
+ enum icp_qat_hw_comp_51_scb_mode_reset_mask scb_mode_reset;
+};
+
+static inline u32
+ICP_QAT_FW_COMP_51_BUILD_CONFIG_UPPER(struct icp_qat_hw_comp_51_config_csr_upper csr)
+{
+ u32 val32 = 0;
+
+ QAT_FIELD_SET(val32, csr.edmm,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_DMM_ALGORITHM_BITPOS,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_DMM_ALGORITHM_MASK);
+ QAT_FIELD_SET(val32, csr.bms,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_BMS_BITPOS,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_BMS_MASK);
+ QAT_FIELD_SET(val32, csr.scb_mode_reset,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_SCB_MODE_RESET_MASK_BITPOS,
+ ICP_QAT_HW_COMP_51_CONFIG_CSR_SCB_MODE_RESET_MASK_MASK);
+
+ return val32;
+}
+
+struct icp_qat_hw_decomp_51_config_csr_lower {
+ enum icp_qat_hw_decomp_51_lz4_block_checksum lbc;
+};
+
+static inline u32
+ICP_QAT_FW_DECOMP_51_BUILD_CONFIG_LOWER(struct icp_qat_hw_decomp_51_config_csr_lower csr)
+{
+ u32 val32 = 0;
+
+ QAT_FIELD_SET(val32, csr.lbc,
+ ICP_QAT_HW_DECOMP_51_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_BITPOS,
+ ICP_QAT_HW_DECOMP_51_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_MASK);
+
+ return val32;
+}
+
+struct icp_qat_hw_decomp_51_config_csr_upper {
+ enum icp_qat_hw_decomp_51_bms bms;
+};
+
+static inline u32
+ICP_QAT_FW_DECOMP_51_BUILD_CONFIG_UPPER(struct icp_qat_hw_decomp_51_config_csr_upper csr)
+{
+ u32 val32 = 0;
+
+ QAT_FIELD_SET(val32, csr.bms,
+ ICP_QAT_HW_DECOMP_51_CONFIG_CSR_BMS_BITPOS,
+ ICP_QAT_HW_DECOMP_51_CONFIG_CSR_BMS_MASK);
+
+ return val32;
+}
+
+#endif /* ICP_QAT_HW_51_COMP_H_ */
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_hw_51_comp_defs.h b/drivers/crypto/intel/qat/qat_common/icp_qat_hw_51_comp_defs.h
new file mode 100644
index 000000000000..e745688c5da4
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_hw_51_comp_defs.h
@@ -0,0 +1,318 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef ICP_QAT_HW_51_COMP_DEFS_H_
+#define ICP_QAT_HW_51_COMP_DEFS_H_
+
+#include <linux/bits.h>
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SOM_CONTROL_BITPOS 28
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SOM_CONTROL_MASK GENMASK(1, 0)
+enum icp_qat_hw_comp_51_som_control {
+ ICP_QAT_HW_COMP_51_SOM_CONTROL_NORMAL_MODE = 0x0,
+ ICP_QAT_HW_COMP_51_SOM_CONTROL_DICTIONARY_MODE = 0x1,
+ ICP_QAT_HW_COMP_51_SOM_CONTROL_INPUT_CRC = 0x2,
+ ICP_QAT_HW_COMP_51_SOM_CONTROL_RESERVED_MODE = 0x3,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SOM_CONTROL_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_SOM_CONTROL_NORMAL_MODE
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_RD_CONTROL_BITPOS 27
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_RD_CONTROL_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_skip_hash_rd_control {
+ ICP_QAT_HW_COMP_51_SKIP_HASH_RD_CONTROL_NO_SKIP = 0x0,
+ ICP_QAT_HW_COMP_51_SKIP_HASH_RD_CONTROL_SKIP_HASH_READS = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_RD_CONTROL_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_SKIP_HASH_RD_CONTROL_NO_SKIP
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_BYPASS_COMPRESSION_BITPOS 25
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_BYPASS_COMPRESSION_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_bypass_compression {
+ ICP_QAT_HW_COMP_51_BYPASS_COMPRESSION_DISABLED = 0x0,
+ ICP_QAT_HW_COMP_51_BYPASS_COMPRESSION_ENABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_BYPASS_COMPRESSION_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_BYPASS_COMPRESSION_DISABLED
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_DMM_ALGORITHM_BITPOS 22
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_DMM_ALGORITHM_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_dmm_algorithm {
+ ICP_QAT_HW_COMP_51_DMM_ALGORITHM_EDMM_ENABLED = 0x0,
+ ICP_QAT_HW_COMP_51_DMM_ALGORITHM_ZSTD_DMM_LITE = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_DMM_ALGORITHM_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_DMM_ALGORITHM_EDMM_ENABLED
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_TOKEN_FUSION_INTERNAL_ONLY_BITPOS 21
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_TOKEN_FUSION_INTERNAL_ONLY_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_token_fusion_internal_only {
+ ICP_QAT_HW_COMP_51_TOKEN_FUSION_INTERNAL_ONLY_ENABLED = 0x0,
+ ICP_QAT_HW_COMP_51_TOKEN_FUSION_INTERNAL_ONLY_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_TOKEN_FUSION_INTERNAL_ONLY_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_TOKEN_FUSION_INTERNAL_ONLY_ENABLED
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_BMS_BITPOS 19
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_BMS_MASK GENMASK(1, 0)
+enum icp_qat_hw_comp_51_bms {
+ ICP_QAT_HW_COMP_51_BMS_BMS_64KB = 0x0,
+ ICP_QAT_HW_COMP_51_BMS_BMS_256KB = 0x1,
+ ICP_QAT_HW_COMP_51_BMS_BMS_1MB = 0x2,
+ ICP_QAT_HW_COMP_51_BMS_BMS_4MB = 0x3,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_BMS_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_BMS_BMS_64KB
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SCB_MODE_RESET_MASK_BITPOS 18
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SCB_MODE_RESET_MASK_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_scb_mode_reset_mask {
+ ICP_QAT_HW_COMP_51_SCB_MODE_RESET_MASK_DO_NOT_RESET_HB_HT = 0x0,
+ ICP_QAT_HW_COMP_51_SCB_MODE_RESET_MASK_RESET_HB_HT = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SCB_MODE_RESET_MASK_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_SCB_MODE_RESET_MASK_DO_NOT_RESET_HB_HT
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_ZSTD_FRAME_GEN_DEC_EN_BITPOS 2
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_ZSTD_FRAME_GEN_DEC_EN_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_zstd_frame_gen_dec_en {
+ ICP_QAT_HW_COMP_51_ZSTD_FRAME_GEN_DEC_EN_ZSTD_FRAME_HDR_DISABLE = 0x0,
+ ICP_QAT_HW_COMP_51_ZSTD_FRAME_GEN_DEC_EN_ZSTD_FRAME_HDR_ENABLE = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_ZSTD_FRAME_GEN_DEC_EN_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_ZSTD_FRAME_GEN_DEC_EN_ZSTD_FRAME_HDR_ENABLE
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_CNV_DISABLE_BITPOS 1
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_CNV_DISABLE_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_cnv_disable {
+ ICP_QAT_HW_COMP_51_CNV_DISABLE_CNV_ENABLED = 0x0,
+ ICP_QAT_HW_COMP_51_CNV_DISABLE_CNV_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_CNV_DISABLE_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_CNV_DISABLE_CNV_ENABLED
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_ASB_DISABLE_BITPOS 0
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_ASB_DISABLE_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_asb_disable {
+ ICP_QAT_HW_COMP_51_ASB_DISABLE_ASB_ENABLED = 0x0,
+ ICP_QAT_HW_COMP_51_ASB_DISABLE_ASB_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_ASB_DISABLE_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_ASB_DISABLE_ASB_ENABLED
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SPEC_DECODER_INTERNAL_ONLY_BITPOS 21
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SPEC_DECODER_INTERNAL_ONLY_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_spec_decoder_internal_only {
+ ICP_QAT_HW_COMP_51_SPEC_DECODER_INTERNAL_ONLY_NORMAL = 0x0,
+ ICP_QAT_HW_COMP_51_SPEC_DECODER_INTERNAL_ONLY_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SPEC_DECODER_INTERNAL_ONLY_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_SPEC_DECODER_INTERNAL_ONLY_NORMAL
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_MINI_XCAM_INTERNAL_ONLY_BITPOS 20
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_MINI_XCAM_INTERNAL_ONLY_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_mini_xcam_internal_only {
+ ICP_QAT_HW_COMP_51_MINI_XCAM_INTERNAL_ONLY_NORMAL = 0x0,
+ ICP_QAT_HW_COMP_51_MINI_XCAM_INTERNAL_ONLY_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_MINI_XCAM_INTERNAL_ONLY_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_MINI_XCAM_INTERNAL_ONLY_NORMAL
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_REP_OFF_ENC_INTERNAL_ONLY_BITPOS 19
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_REP_OFF_ENC_INTERNAL_ONLY_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_rep_off_enc_internal_only {
+ ICP_QAT_HW_COMP_51_REP_OFF_ENC_INTERNAL_ONLY_ENABLED = 0x0,
+ ICP_QAT_HW_COMP_51_REP_OFF_ENC_INTERNAL_ONLY_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_REP_OFF_ENC_INTERNAL_ONLY_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_REP_OFF_ENC_INTERNAL_ONLY_ENABLED
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_PROG_BLOCK_DROP_INTERNAL_ONLY_BITPOS 18
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_PROG_BLOCK_DROP_INTERNAL_ONLY_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_prog_block_drop_internal_only {
+ ICP_QAT_HW_COMP_51_PROG_BLOCK_DROP_INTERNAL_ONLY_DISABLE = 0x0,
+ ICP_QAT_HW_COMP_51_PROG_BLOCK_DROP_INTERNAL_ONLY_ENABLE = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_PROG_BLOCK_DROP_INTERNAL_ONLY_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_PROG_BLOCK_DROP_INTERNAL_ONLY_DISABLE
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_OVERRIDE_INTERNAL_ONLY_BITPOS 17
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_OVERRIDE_INTERNAL_ONLY_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_skip_hash_override_internal_only {
+ ICP_QAT_HW_COMP_51_SKIP_HASH_OVERRIDE_INTERNAL_ONLY_DETERMINE_HASH_PARAMS = 0x0,
+ ICP_QAT_HW_COMP_51_SKIP_HASH_OVERRIDE_INTERNAL_ONLY_OVERRIDE_HASH_PARAMS = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_OVERRIDE_INTERNAL_ONLY_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_SKIP_HASH_OVERRIDE_INTERNAL_ONLY_DETERMINE_HASH_PARAMS
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_HBS_BITPOS 14
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_HBS_MASK GENMASK(2, 0)
+enum icp_qat_hw_comp_51_hbs {
+ ICP_QAT_HW_COMP_51_HBS_32KB = 0x0,
+ ICP_QAT_HW_COMP_51_HBS_64KB = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_HBS_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_HBS_32KB
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_ABD_BITPOS 13
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_ABD_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_abd {
+ ICP_QAT_HW_COMP_51_ABD_ABD_ENABLED = 0x0,
+ ICP_QAT_HW_COMP_51_ABD_ABD_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_ABD_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_ABD_ABD_ENABLED
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_LLLBD_CTRL_BITPOS 12
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_LLLBD_CTRL_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_lllbd_ctrl {
+ ICP_QAT_HW_COMP_51_LLLBD_CTRL_LLLBD_ENABLED = 0x0,
+ ICP_QAT_HW_COMP_51_LLLBD_CTRL_LLLBD_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_LLLBD_CTRL_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_LLLBD_CTRL_LLLBD_ENABLED
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SEARCH_DEPTH_BITPOS 8
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SEARCH_DEPTH_MASK GENMASK(3, 0)
+enum icp_qat_hw_comp_51_search_depth {
+ ICP_QAT_HW_COMP_51_SEARCH_DEPTH_LEVEL_1 = 0x1,
+ ICP_QAT_HW_COMP_51_SEARCH_DEPTH_LEVEL_6 = 0x3,
+ ICP_QAT_HW_COMP_51_SEARCH_DEPTH_LEVEL_9 = 0x4,
+ ICP_QAT_HW_COMP_51_SEARCH_DEPTH_LEVEL_10 = 0x4,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SEARCH_DEPTH_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_SEARCH_DEPTH_LEVEL_1
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_FORMAT_BITPOS 5
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_FORMAT_MASK GENMASK(2, 0)
+enum icp_qat_hw_comp_51_format {
+ ICP_QAT_HW_COMP_51_FORMAT_ILZ77 = 0x1,
+ ICP_QAT_HW_COMP_51_FORMAT_LZ4 = 0x2,
+ ICP_QAT_HW_COMP_51_FORMAT_LZ4s = 0x3,
+ ICP_QAT_HW_COMP_51_FORMAT_ZSTD = 0x4,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_FORMAT_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_FORMAT_ILZ77
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS 4
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_MIN_MATCH_CONTROL_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_min_match_control {
+ ICP_QAT_HW_COMP_51_MIN_MATCH_CONTROL_MATCH_3B = 0x0,
+ ICP_QAT_HW_COMP_51_MIN_MATCH_CONTROL_MATCH_4B = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_MIN_MATCH_CONTROL_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_MIN_MATCH_CONTROL_MATCH_3B
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_COLLISION_BITPOS 3
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_COLLISION_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_skip_hash_collision {
+ ICP_QAT_HW_COMP_51_SKIP_HASH_COLLISION_ALLOW = 0x0,
+ ICP_QAT_HW_COMP_51_SKIP_HASH_COLLISION_DONT_ALLOW = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_COLLISION_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_SKIP_HASH_COLLISION_ALLOW
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_UPDATE_BITPOS 2
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_UPDATE_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_skip_hash_update {
+ ICP_QAT_HW_COMP_51_SKIP_HASH_UPDATE_ALLOW = 0x0,
+ ICP_QAT_HW_COMP_51_SKIP_HASH_UPDATE_DONT_ALLOW = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_SKIP_HASH_UPDATE_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_SKIP_HASH_UPDATE_ALLOW
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_BYTE_SKIP_BITPOS 1
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_BYTE_SKIP_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_byte_skip {
+ ICP_QAT_HW_COMP_51_BYTE_SKIP_3BYTE_TOKEN = 0x0,
+ ICP_QAT_HW_COMP_51_BYTE_SKIP_3BYTE_LITERAL = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_BYTE_SKIP_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_BYTE_SKIP_3BYTE_TOKEN
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_BITPOS 0
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_MASK GENMASK(0, 0)
+enum icp_qat_hw_comp_51_lz4_block_checksum {
+ ICP_QAT_HW_COMP_51_LZ4_BLOCK_CHECKSUM_ABSENT = 0x0,
+ ICP_QAT_HW_COMP_51_LZ4_BLOCK_CHECKSUM_PRESENT = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_51_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_DEFAULT_VAL \
+ ICP_QAT_HW_COMP_51_LZ4_BLOCK_CHECKSUM_ABSENT
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_DISCARD_DATA_BITPOS 26
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_DISCARD_DATA_MASK GENMASK(0, 0)
+enum icp_qat_hw_decomp_51_discard_data {
+ ICP_QAT_HW_DECOMP_51_DISCARD_DATA_DISABLED = 0x0,
+ ICP_QAT_HW_DECOMP_51_DISCARD_DATA_ENABLED = 0x1,
+};
+
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_DISCARD_DATA_DEFAULT_VAL \
+ ICP_QAT_HW_DECOMP_51_DISCARD_DATA_DISABLED
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_BMS_BITPOS 19
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_BMS_MASK GENMASK(1, 0)
+enum icp_qat_hw_decomp_51_bms {
+ ICP_QAT_HW_DECOMP_51_BMS_BMS_64KB = 0x0,
+ ICP_QAT_HW_DECOMP_51_BMS_BMS_256KB = 0x1,
+ ICP_QAT_HW_DECOMP_51_BMS_BMS_1MB = 0x2,
+ ICP_QAT_HW_DECOMP_51_BMS_BMS_4MB = 0x3,
+};
+
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_BMS_DEFAULT_VAL \
+ ICP_QAT_HW_DECOMP_51_BMS_BMS_64KB
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_ZSTD_FRAME_GEN_DEC_EN_BITPOS 2
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_ZSTD_FRAME_GEN_DEC_EN_MASK GENMASK(0, 0)
+enum icp_qat_hw_decomp_51_zstd_frame_gen_dec_en {
+ ICP_QAT_HW_DECOMP_51_ZSTD_FRAME_GEN_DEC_EN_ZSTD_FRAME_HDR_DISABLE = 0x0,
+ ICP_QAT_HW_DECOMP_51_ZSTD_FRAME_GEN_DEC_EN_ZSTD_FRAME_HDR_ENABLE = 0x1,
+};
+
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_ZSTD_FRAME_GEN_DEC_EN_DEFAULT_VAL \
+ ICP_QAT_HW_DECOMP_51_ZSTD_FRAME_GEN_DEC_EN_ZSTD_FRAME_HDR_ENABLE
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_SPEC_DECODER_INTERNAL_ONLY_BITPOS 21
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_SPEC_DECODER_INTERNAL_ONLY_MASK GENMASK(0, 0)
+enum icp_qat_hw_decomp_51_spec_decoder_internal_only {
+ ICP_QAT_HW_DECOMP_51_SPEC_DECODER_INTERNAL_ONLY_NORMAL = 0x0,
+ ICP_QAT_HW_DECOMP_51_SPEC_DECODER_INTERNAL_ONLY_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_SPEC_DECODER_INTERNAL_ONLY_DEFAULT_VAL \
+ ICP_QAT_HW_DECOMP_51_SPEC_DECODER_INTERNAL_ONLY_NORMAL
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_MINI_XCAM_INTERNAL_ONLY_BITPOS 20
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_MINI_XCAM_INTERNAL_ONLY_MASK GENMASK(0, 0)
+enum icp_qat_hw_decomp_51_mini_xcam_internal_only {
+ ICP_QAT_HW_DECOMP_51_MINI_XCAM_INTERNAL_ONLY_NORMAL = 0x0,
+ ICP_QAT_HW_DECOMP_51_MINI_XCAM_INTERNAL_ONLY_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_MINI_XCAM_INTERNAL_ONLY_DEFAULT_VAL \
+ ICP_QAT_HW_DECOMP_51_MINI_XCAM_INTERNAL_ONLY_NORMAL
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_HBS_BITPOS 14
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_HBS_MASK GENMASK(2, 0)
+enum icp_qat_hw_decomp_51_hbs {
+ ICP_QAT_HW_DECOMP_51_HBS_32KB = 0x0,
+ ICP_QAT_HW_DECOMP_51_HBS_64KB = 0x1,
+};
+
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_HBS_DEFAULT_VAL \
+ ICP_QAT_HW_DECOMP_51_HBS_32KB
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_FORMAT_BITPOS 5
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_FORMAT_MASK GENMASK(2, 0)
+enum icp_qat_hw_decomp_51_format {
+ ICP_QAT_HW_DECOMP_51_FORMAT_ILZ77 = 0x1,
+ ICP_QAT_HW_DECOMP_51_FORMAT_LZ4 = 0x2,
+ ICP_QAT_HW_DECOMP_51_FORMAT_RESERVED = 0x3,
+ ICP_QAT_HW_DECOMP_51_FORMAT_ZSTD = 0x4,
+};
+
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_FORMAT_DEFAULT_VAL \
+ ICP_QAT_HW_DECOMP_51_FORMAT_ILZ77
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_BITPOS 0
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_MASK GENMASK(0, 0)
+enum icp_qat_hw_decomp_51_lz4_block_checksum {
+ ICP_QAT_HW_DECOMP_51_LZ4_BLOCK_CHECKSUM_ABSENT = 0x0,
+ ICP_QAT_HW_DECOMP_51_LZ4_BLOCK_CHECKSUM_PRESENT = 0x1,
+};
+
+#define ICP_QAT_HW_DECOMP_51_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_DEFAULT_VAL \
+ ICP_QAT_HW_DECOMP_51_LZ4_BLOCK_CHECKSUM_ABSENT
+
+#endif /* ICP_QAT_HW_51_COMP_DEFS_H_ */
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_uclo.h b/drivers/crypto/intel/qat/qat_common/icp_qat_uclo.h
index 1c7bcd8e4055..6313c35eff0c 100644
--- a/drivers/crypto/intel/qat/qat_common/icp_qat_uclo.h
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_uclo.h
@@ -7,6 +7,7 @@
#define ICP_QAT_AC_C62X_DEV_TYPE 0x01000000
#define ICP_QAT_AC_C3XXX_DEV_TYPE 0x02000000
#define ICP_QAT_AC_4XXX_A_DEV_TYPE 0x08000000
+#define ICP_QAT_AC_6XXX_DEV_TYPE 0x80000000
#define ICP_QAT_UCLO_MAX_AE 17
#define ICP_QAT_UCLO_MAX_CTX 8
#define ICP_QAT_UCLO_MAX_UIMAGE (ICP_QAT_UCLO_MAX_AE * ICP_QAT_UCLO_MAX_CTX)
@@ -81,6 +82,21 @@
#define ICP_QAT_CSS_RSA4K_MAX_IMAGE_LEN 0x40000
#define ICP_QAT_CSS_RSA3K_MAX_IMAGE_LEN 0x30000
+/* All lengths below are in bytes */
+#define ICP_QAT_DUALSIGN_OPAQUE_HDR_LEN 12
+#define ICP_QAT_DUALSIGN_OPAQUE_HDR_ALIGN_LEN 16
+#define ICP_QAT_DUALSIGN_OPAQUE_DATA_LEN 3540
+#define ICP_QAT_DUALSIGN_XMSS_PUBKEY_LEN 64
+#define ICP_QAT_DUALSIGN_XMSS_SIG_LEN 2692
+#define ICP_QAT_DUALSIGN_XMSS_SIG_ALIGN_LEN 2696
+#define ICP_QAT_DUALSIGN_MISC_INFO_LEN 16
+#define ICP_QAT_DUALSIGN_FW_TYPE_LEN 7
+#define ICP_QAT_DUALSIGN_MODULE_TYPE 0x14
+#define ICP_QAT_DUALSIGN_HDR_LEN 0x375
+#define ICP_QAT_DUALSIGN_HDR_VER 0x40001
+#define ICP_QAT_DUALSIGN_HDR_LEN_OFFSET 4
+#define ICP_QAT_DUALSIGN_HDR_VER_OFFSET 8
+
#define ICP_QAT_CTX_MODE(ae_mode) ((ae_mode) & 0xf)
#define ICP_QAT_NN_MODE(ae_mode) (((ae_mode) >> 0x4) & 0xf)
#define ICP_QAT_SHARED_USTORE_MODE(ae_mode) (((ae_mode) >> 0xb) & 0x1)
@@ -440,6 +456,13 @@ struct icp_qat_fw_auth_desc {
unsigned int img_ae_init_data_low;
unsigned int img_ae_insts_high;
unsigned int img_ae_insts_low;
+ unsigned int cpp_mask;
+ unsigned int reserved;
+ unsigned int xmss_pubkey_high;
+ unsigned int xmss_pubkey_low;
+ unsigned int xmss_sig_high;
+ unsigned int xmss_sig_low;
+ unsigned int reserved2[2];
};
struct icp_qat_auth_chunk {
diff --git a/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c b/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
index a6e02405d402..8b123472b71c 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
@@ -8,6 +8,7 @@
#include <linux/workqueue.h>
#include "adf_accel_devices.h"
#include "adf_common_drv.h"
+#include "adf_dc.h"
#include "qat_bl.h"
#include "qat_comp_req.h"
#include "qat_compression.h"
@@ -145,9 +146,7 @@ static int qat_comp_alg_init_tfm(struct crypto_acomp *acomp_tfm)
return -EINVAL;
ctx->inst = inst;
- ctx->inst->build_deflate_ctx(ctx->comp_ctx);
-
- return 0;
+ return qat_comp_build_ctx(inst->accel_dev, ctx->comp_ctx, QAT_DEFLATE);
}
static void qat_comp_alg_exit_tfm(struct crypto_acomp *acomp_tfm)
@@ -241,13 +240,13 @@ static struct acomp_alg qat_acomp[] = { {
.cra_priority = 4001,
.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
.cra_ctxsize = sizeof(struct qat_compression_ctx),
+ .cra_reqsize = sizeof(struct qat_compression_req),
.cra_module = THIS_MODULE,
},
.init = qat_comp_alg_init_tfm,
.exit = qat_comp_alg_exit_tfm,
.compress = qat_comp_alg_compress,
.decompress = qat_comp_alg_decompress,
- .reqsize = sizeof(struct qat_compression_req),
}};
int qat_comp_algs_register(void)
diff --git a/drivers/crypto/intel/qat/qat_common/qat_compression.c b/drivers/crypto/intel/qat/qat_common/qat_compression.c
index 7842a9f22178..c285b45b8679 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_compression.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_compression.c
@@ -144,7 +144,6 @@ static int qat_compression_create_instances(struct adf_accel_dev *accel_dev)
inst->id = i;
atomic_set(&inst->refctr, 0);
inst->accel_dev = accel_dev;
- inst->build_deflate_ctx = GET_DC_OPS(accel_dev)->build_deflate_ctx;
snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i);
ret = adf_cfg_get_param_value(accel_dev, SEC, key, val);
diff --git a/drivers/crypto/intel/qat/qat_common/qat_compression.h b/drivers/crypto/intel/qat/qat_common/qat_compression.h
index aebac2302dcf..5ced3ed0e5ea 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_compression.h
+++ b/drivers/crypto/intel/qat/qat_common/qat_compression.h
@@ -20,7 +20,6 @@ struct qat_compression_instance {
atomic_t refctr;
struct qat_instance_backlog backlog;
struct adf_dc_data *dc_data;
- void (*build_deflate_ctx)(void *ctx);
};
static inline bool adf_hw_dev_has_compression(struct adf_accel_dev *accel_dev)
diff --git a/drivers/crypto/intel/qat/qat_common/qat_hal.c b/drivers/crypto/intel/qat/qat_common/qat_hal.c
index ef8a9cf74f0c..da4eca6e1633 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_hal.c
@@ -694,16 +694,17 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle,
handle->pci_dev = pci_info->pci_dev;
switch (handle->pci_dev->device) {
- case ADF_4XXX_PCI_DEVICE_ID:
- case ADF_401XX_PCI_DEVICE_ID:
- case ADF_402XX_PCI_DEVICE_ID:
- case ADF_420XX_PCI_DEVICE_ID:
+ case PCI_DEVICE_ID_INTEL_QAT_4XXX:
+ case PCI_DEVICE_ID_INTEL_QAT_401XX:
+ case PCI_DEVICE_ID_INTEL_QAT_402XX:
+ case PCI_DEVICE_ID_INTEL_QAT_420XX:
+ case PCI_DEVICE_ID_INTEL_QAT_6XXX:
handle->chip_info->mmp_sram_size = 0;
handle->chip_info->nn = false;
handle->chip_info->lm2lm3 = true;
handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG_2X;
handle->chip_info->icp_rst_csr = ICP_RESET_CPP0;
- if (handle->pci_dev->device == ADF_420XX_PCI_DEVICE_ID)
+ if (handle->pci_dev->device == PCI_DEVICE_ID_INTEL_QAT_420XX)
handle->chip_info->icp_rst_mask = 0x100155;
else
handle->chip_info->icp_rst_mask = 0x100015;
@@ -712,6 +713,8 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle,
handle->chip_info->wakeup_event_val = 0x80000000;
handle->chip_info->fw_auth = true;
handle->chip_info->css_3k = true;
+ if (handle->pci_dev->device == PCI_DEVICE_ID_INTEL_QAT_6XXX)
+ handle->chip_info->dual_sign = true;
handle->chip_info->tgroup_share_ustore = true;
handle->chip_info->fcu_ctl_csr = FCU_CONTROL_4XXX;
handle->chip_info->fcu_sts_csr = FCU_STATUS_4XXX;
diff --git a/drivers/crypto/intel/qat/qat_common/qat_uclo.c b/drivers/crypto/intel/qat/qat_common/qat_uclo.c
index 7678a93c6853..21d652a1c8ef 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_uclo.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_uclo.c
@@ -1,11 +1,16 @@
// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
/* Copyright(c) 2014 - 2020 Intel Corporation */
+
+#define pr_fmt(fmt) "QAT: " fmt
+
#include <linux/align.h>
+#include <linux/bitops.h>
#include <linux/slab.h>
#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/delay.h>
#include <linux/pci_ids.h>
+#include <linux/wordpart.h>
#include "adf_accel_devices.h"
#include "adf_common_drv.h"
#include "icp_qat_uclo.h"
@@ -59,7 +64,7 @@ static int qat_uclo_free_ae_data(struct icp_qat_uclo_aedata *ae_data)
unsigned int i;
if (!ae_data) {
- pr_err("QAT: bad argument, ae_data is NULL\n");
+ pr_err("bad argument, ae_data is NULL\n");
return -EINVAL;
}
@@ -86,12 +91,11 @@ static int qat_uclo_check_uof_format(struct icp_qat_uof_filehdr *hdr)
int min = hdr->min_ver & 0xff;
if (hdr->file_id != ICP_QAT_UOF_FID) {
- pr_err("QAT: Invalid header 0x%x\n", hdr->file_id);
+ pr_err("Invalid header 0x%x\n", hdr->file_id);
return -EINVAL;
}
if (min != ICP_QAT_UOF_MINVER || maj != ICP_QAT_UOF_MAJVER) {
- pr_err("QAT: bad UOF version, major 0x%x, minor 0x%x\n",
- maj, min);
+ pr_err("bad UOF version, major 0x%x, minor 0x%x\n", maj, min);
return -EINVAL;
}
return 0;
@@ -103,20 +107,19 @@ static int qat_uclo_check_suof_format(struct icp_qat_suof_filehdr *suof_hdr)
int min = suof_hdr->min_ver & 0xff;
if (suof_hdr->file_id != ICP_QAT_SUOF_FID) {
- pr_err("QAT: invalid header 0x%x\n", suof_hdr->file_id);
+ pr_err("invalid header 0x%x\n", suof_hdr->file_id);
return -EINVAL;
}
if (suof_hdr->fw_type != 0) {
- pr_err("QAT: unsupported firmware type\n");
+ pr_err("unsupported firmware type\n");
return -EINVAL;
}
if (suof_hdr->num_chunks <= 0x1) {
- pr_err("QAT: SUOF chunk amount is incorrect\n");
+ pr_err("SUOF chunk amount is incorrect\n");
return -EINVAL;
}
if (maj != ICP_QAT_SUOF_MAJVER || min != ICP_QAT_SUOF_MINVER) {
- pr_err("QAT: bad SUOF version, major 0x%x, minor 0x%x\n",
- maj, min);
+ pr_err("bad SUOF version, major 0x%x, minor 0x%x\n", maj, min);
return -EINVAL;
}
return 0;
@@ -223,24 +226,24 @@ static int qat_uclo_fetch_initmem_ae(struct icp_qat_fw_loader_handle *handle,
char *str;
if ((init_mem->addr + init_mem->num_in_bytes) > (size_range << 0x2)) {
- pr_err("QAT: initmem is out of range");
+ pr_err("initmem is out of range");
return -EINVAL;
}
if (init_mem->scope != ICP_QAT_UOF_LOCAL_SCOPE) {
- pr_err("QAT: Memory scope for init_mem error\n");
+ pr_err("Memory scope for init_mem error\n");
return -EINVAL;
}
str = qat_uclo_get_string(&obj_handle->str_table, init_mem->sym_name);
if (!str) {
- pr_err("QAT: AE name assigned in UOF init table is NULL\n");
+ pr_err("AE name assigned in UOF init table is NULL\n");
return -EINVAL;
}
if (qat_uclo_parse_num(str, ae)) {
- pr_err("QAT: Parse num for AE number failed\n");
+ pr_err("Parse num for AE number failed\n");
return -EINVAL;
}
if (*ae >= ICP_QAT_UCLO_MAX_AE) {
- pr_err("QAT: ae %d out of range\n", *ae);
+ pr_err("ae %d out of range\n", *ae);
return -EINVAL;
}
return 0;
@@ -356,8 +359,7 @@ static int qat_uclo_init_ae_memory(struct icp_qat_fw_loader_handle *handle,
return -EINVAL;
break;
default:
- pr_err("QAT: initmem region error. region type=0x%x\n",
- init_mem->region);
+ pr_err("initmem region error. region type=0x%x\n", init_mem->region);
return -EINVAL;
}
return 0;
@@ -431,7 +433,7 @@ static int qat_uclo_init_memory(struct icp_qat_fw_loader_handle *handle)
for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) {
if (qat_hal_batch_wr_lm(handle, ae,
obj_handle->lm_init_tab[ae])) {
- pr_err("QAT: fail to batch init lmem for AE %d\n", ae);
+ pr_err("fail to batch init lmem for AE %d\n", ae);
return -EINVAL;
}
qat_uclo_cleanup_batch_init_list(handle,
@@ -539,26 +541,26 @@ qat_uclo_check_image_compat(struct icp_qat_uof_encap_obj *encap_uof_obj,
code_page->imp_expr_tab_offset);
if (uc_var_tab->entry_num || imp_var_tab->entry_num ||
imp_expr_tab->entry_num) {
- pr_err("QAT: UOF can't contain imported variable to be parsed\n");
+ pr_err("UOF can't contain imported variable to be parsed\n");
return -EINVAL;
}
neigh_reg_tab = (struct icp_qat_uof_objtable *)
(encap_uof_obj->beg_uof +
code_page->neigh_reg_tab_offset);
if (neigh_reg_tab->entry_num) {
- pr_err("QAT: UOF can't contain neighbor register table\n");
+ pr_err("UOF can't contain neighbor register table\n");
return -EINVAL;
}
if (image->numpages > 1) {
- pr_err("QAT: UOF can't contain multiple pages\n");
+ pr_err("UOF can't contain multiple pages\n");
return -EINVAL;
}
if (ICP_QAT_SHARED_USTORE_MODE(image->ae_mode)) {
- pr_err("QAT: UOF can't use shared control store feature\n");
+ pr_err("UOF can't use shared control store feature\n");
return -EFAULT;
}
if (RELOADABLE_CTX_SHARED_MODE(image->ae_mode)) {
- pr_err("QAT: UOF can't use reloadable feature\n");
+ pr_err("UOF can't use reloadable feature\n");
return -EFAULT;
}
return 0;
@@ -677,7 +679,7 @@ static int qat_uclo_map_ae(struct icp_qat_fw_loader_handle *handle, int max_ae)
}
}
if (!mflag) {
- pr_err("QAT: uimage uses AE not set\n");
+ pr_err("uimage uses AE not set\n");
return -EINVAL;
}
return 0;
@@ -731,14 +733,15 @@ qat_uclo_get_dev_type(struct icp_qat_fw_loader_handle *handle)
return ICP_QAT_AC_C62X_DEV_TYPE;
case PCI_DEVICE_ID_INTEL_QAT_C3XXX:
return ICP_QAT_AC_C3XXX_DEV_TYPE;
- case ADF_4XXX_PCI_DEVICE_ID:
- case ADF_401XX_PCI_DEVICE_ID:
- case ADF_402XX_PCI_DEVICE_ID:
- case ADF_420XX_PCI_DEVICE_ID:
+ case PCI_DEVICE_ID_INTEL_QAT_4XXX:
+ case PCI_DEVICE_ID_INTEL_QAT_401XX:
+ case PCI_DEVICE_ID_INTEL_QAT_402XX:
+ case PCI_DEVICE_ID_INTEL_QAT_420XX:
return ICP_QAT_AC_4XXX_A_DEV_TYPE;
+ case PCI_DEVICE_ID_INTEL_QAT_6XXX:
+ return ICP_QAT_AC_6XXX_DEV_TYPE;
default:
- pr_err("QAT: unsupported device 0x%x\n",
- handle->pci_dev->device);
+ pr_err("unsupported device 0x%x\n", handle->pci_dev->device);
return 0;
}
}
@@ -748,7 +751,7 @@ static int qat_uclo_check_uof_compat(struct icp_qat_uclo_objhandle *obj_handle)
unsigned int maj_ver, prod_type = obj_handle->prod_type;
if (!(prod_type & obj_handle->encap_uof_obj.obj_hdr->ac_dev_type)) {
- pr_err("QAT: UOF type 0x%x doesn't match with platform 0x%x\n",
+ pr_err("UOF type 0x%x doesn't match with platform 0x%x\n",
obj_handle->encap_uof_obj.obj_hdr->ac_dev_type,
prod_type);
return -EINVAL;
@@ -756,7 +759,7 @@ static int qat_uclo_check_uof_compat(struct icp_qat_uclo_objhandle *obj_handle)
maj_ver = obj_handle->prod_rev & 0xff;
if (obj_handle->encap_uof_obj.obj_hdr->max_cpu_ver < maj_ver ||
obj_handle->encap_uof_obj.obj_hdr->min_cpu_ver > maj_ver) {
- pr_err("QAT: UOF majVer 0x%x out of range\n", maj_ver);
+ pr_err("UOF majVer 0x%x out of range\n", maj_ver);
return -EINVAL;
}
return 0;
@@ -799,7 +802,7 @@ static int qat_uclo_init_reg(struct icp_qat_fw_loader_handle *handle,
case ICP_NEIGH_REL:
return qat_hal_init_nn(handle, ae, ctx_mask, reg_addr, value);
default:
- pr_err("QAT: UOF uses not supported reg type 0x%x\n", reg_type);
+ pr_err("UOF uses not supported reg type 0x%x\n", reg_type);
return -EFAULT;
}
return 0;
@@ -835,8 +838,7 @@ static int qat_uclo_init_reg_sym(struct icp_qat_fw_loader_handle *handle,
case ICP_QAT_UOF_INIT_REG_CTX:
/* check if ctx is appropriate for the ctxMode */
if (!((1 << init_regsym->ctx) & ctx_mask)) {
- pr_err("QAT: invalid ctx num = 0x%x\n",
- init_regsym->ctx);
+ pr_err("invalid ctx num = 0x%x\n", init_regsym->ctx);
return -EINVAL;
}
qat_uclo_init_reg(handle, ae,
@@ -848,10 +850,10 @@ static int qat_uclo_init_reg_sym(struct icp_qat_fw_loader_handle *handle,
exp_res);
break;
case ICP_QAT_UOF_INIT_EXPR:
- pr_err("QAT: INIT_EXPR feature not supported\n");
+ pr_err("INIT_EXPR feature not supported\n");
return -EINVAL;
case ICP_QAT_UOF_INIT_EXPR_ENDIAN_SWAP:
- pr_err("QAT: INIT_EXPR_ENDIAN_SWAP feature not supported\n");
+ pr_err("INIT_EXPR_ENDIAN_SWAP feature not supported\n");
return -EINVAL;
default:
break;
@@ -871,7 +873,7 @@ static int qat_uclo_init_globals(struct icp_qat_fw_loader_handle *handle)
return 0;
if (obj_handle->init_mem_tab.entry_num) {
if (qat_uclo_init_memory(handle)) {
- pr_err("QAT: initialize memory failed\n");
+ pr_err("initialize memory failed\n");
return -EINVAL;
}
}
@@ -900,40 +902,40 @@ static int qat_hal_set_modes(struct icp_qat_fw_loader_handle *handle,
mode = ICP_QAT_CTX_MODE(uof_image->ae_mode);
ret = qat_hal_set_ae_ctx_mode(handle, ae, mode);
if (ret) {
- pr_err("QAT: qat_hal_set_ae_ctx_mode error\n");
+ pr_err("qat_hal_set_ae_ctx_mode error\n");
return ret;
}
if (handle->chip_info->nn) {
mode = ICP_QAT_NN_MODE(uof_image->ae_mode);
ret = qat_hal_set_ae_nn_mode(handle, ae, mode);
if (ret) {
- pr_err("QAT: qat_hal_set_ae_nn_mode error\n");
+ pr_err("qat_hal_set_ae_nn_mode error\n");
return ret;
}
}
mode = ICP_QAT_LOC_MEM0_MODE(uof_image->ae_mode);
ret = qat_hal_set_ae_lm_mode(handle, ae, ICP_LMEM0, mode);
if (ret) {
- pr_err("QAT: qat_hal_set_ae_lm_mode LMEM0 error\n");
+ pr_err("qat_hal_set_ae_lm_mode LMEM0 error\n");
return ret;
}
mode = ICP_QAT_LOC_MEM1_MODE(uof_image->ae_mode);
ret = qat_hal_set_ae_lm_mode(handle, ae, ICP_LMEM1, mode);
if (ret) {
- pr_err("QAT: qat_hal_set_ae_lm_mode LMEM1 error\n");
+ pr_err("qat_hal_set_ae_lm_mode LMEM1 error\n");
return ret;
}
if (handle->chip_info->lm2lm3) {
mode = ICP_QAT_LOC_MEM2_MODE(uof_image->ae_mode);
ret = qat_hal_set_ae_lm_mode(handle, ae, ICP_LMEM2, mode);
if (ret) {
- pr_err("QAT: qat_hal_set_ae_lm_mode LMEM2 error\n");
+ pr_err("qat_hal_set_ae_lm_mode LMEM2 error\n");
return ret;
}
mode = ICP_QAT_LOC_MEM3_MODE(uof_image->ae_mode);
ret = qat_hal_set_ae_lm_mode(handle, ae, ICP_LMEM3, mode);
if (ret) {
- pr_err("QAT: qat_hal_set_ae_lm_mode LMEM3 error\n");
+ pr_err("qat_hal_set_ae_lm_mode LMEM3 error\n");
return ret;
}
mode = ICP_QAT_LOC_TINDEX_MODE(uof_image->ae_mode);
@@ -997,7 +999,7 @@ static int qat_uclo_parse_uof_obj(struct icp_qat_fw_loader_handle *handle)
obj_handle->prod_rev = PID_MAJOR_REV |
(PID_MINOR_REV & handle->hal_handle->revision_id);
if (qat_uclo_check_uof_compat(obj_handle)) {
- pr_err("QAT: UOF incompatible\n");
+ pr_err("UOF incompatible\n");
return -EINVAL;
}
obj_handle->uword_buf = kcalloc(UWORD_CPYBUF_SIZE, sizeof(u64),
@@ -1008,7 +1010,7 @@ static int qat_uclo_parse_uof_obj(struct icp_qat_fw_loader_handle *handle)
if (!obj_handle->obj_hdr->file_buff ||
!qat_uclo_map_str_table(obj_handle->obj_hdr, ICP_QAT_UOF_STRT,
&obj_handle->str_table)) {
- pr_err("QAT: UOF doesn't have effective images\n");
+ pr_err("UOF doesn't have effective images\n");
goto out_err;
}
obj_handle->uimage_num =
@@ -1017,7 +1019,7 @@ static int qat_uclo_parse_uof_obj(struct icp_qat_fw_loader_handle *handle)
if (!obj_handle->uimage_num)
goto out_err;
if (qat_uclo_map_ae(handle, handle->hal_handle->ae_max_num)) {
- pr_err("QAT: Bad object\n");
+ pr_err("Bad object\n");
goto out_check_uof_aemask_err;
}
qat_uclo_init_uword_num(handle);
@@ -1034,6 +1036,36 @@ out_err:
return -EFAULT;
}
+static unsigned int qat_uclo_simg_hdr2sign_len(struct icp_qat_fw_loader_handle *handle)
+{
+ if (handle->chip_info->dual_sign)
+ return ICP_QAT_DUALSIGN_OPAQUE_DATA_LEN;
+
+ return ICP_QAT_AE_IMG_OFFSET(handle);
+}
+
+static unsigned int qat_uclo_simg_hdr2cont_len(struct icp_qat_fw_loader_handle *handle)
+{
+ if (handle->chip_info->dual_sign)
+ return ICP_QAT_DUALSIGN_OPAQUE_DATA_LEN + ICP_QAT_DUALSIGN_MISC_INFO_LEN;
+
+ return ICP_QAT_AE_IMG_OFFSET(handle);
+}
+
+static unsigned int qat_uclo_simg_fw_type(struct icp_qat_fw_loader_handle *handle, void *img_ptr)
+{
+ struct icp_qat_css_hdr *hdr = img_ptr;
+ char *fw_hdr = img_ptr;
+ unsigned int offset;
+
+ if (handle->chip_info->dual_sign) {
+ offset = qat_uclo_simg_hdr2sign_len(handle) + ICP_QAT_DUALSIGN_FW_TYPE_LEN;
+ return *(fw_hdr + offset);
+ }
+
+ return hdr->fw_type;
+}
+
static int qat_uclo_map_suof_file_hdr(struct icp_qat_fw_loader_handle *handle,
struct icp_qat_suof_filehdr *suof_ptr,
int suof_size)
@@ -1050,7 +1082,7 @@ static int qat_uclo_map_suof_file_hdr(struct icp_qat_fw_loader_handle *handle,
check_sum = qat_uclo_calc_str_checksum((char *)&suof_ptr->min_ver,
min_ver_offset);
if (check_sum != suof_ptr->check_sum) {
- pr_err("QAT: incorrect SUOF checksum\n");
+ pr_err("incorrect SUOF checksum\n");
return -EINVAL;
}
suof_handle->check_sum = suof_ptr->check_sum;
@@ -1065,9 +1097,9 @@ static void qat_uclo_map_simg(struct icp_qat_fw_loader_handle *handle,
struct icp_qat_suof_chunk_hdr *suof_chunk_hdr)
{
struct icp_qat_suof_handle *suof_handle = handle->sobj_handle;
- unsigned int offset = ICP_QAT_AE_IMG_OFFSET(handle);
- struct icp_qat_simg_ae_mode *ae_mode;
+ unsigned int offset = qat_uclo_simg_hdr2cont_len(handle);
struct icp_qat_suof_objhdr *suof_objhdr;
+ struct icp_qat_simg_ae_mode *ae_mode;
suof_img_hdr->simg_buf = (suof_handle->suof_buf +
suof_chunk_hdr->offset +
@@ -1112,14 +1144,13 @@ static int qat_uclo_check_simg_compat(struct icp_qat_fw_loader_handle *handle,
prod_rev = PID_MAJOR_REV |
(PID_MINOR_REV & handle->hal_handle->revision_id);
if (img_ae_mode->dev_type != prod_type) {
- pr_err("QAT: incompatible product type %x\n",
- img_ae_mode->dev_type);
+ pr_err("incompatible product type %x\n", img_ae_mode->dev_type);
return -EINVAL;
}
maj_ver = prod_rev & 0xff;
if (maj_ver > img_ae_mode->devmax_ver ||
maj_ver < img_ae_mode->devmin_ver) {
- pr_err("QAT: incompatible device majver 0x%x\n", maj_ver);
+ pr_err("incompatible device majver 0x%x\n", maj_ver);
return -EINVAL;
}
return 0;
@@ -1162,7 +1193,7 @@ static int qat_uclo_map_suof(struct icp_qat_fw_loader_handle *handle,
struct icp_qat_suof_img_hdr img_header;
if (!suof_ptr || suof_size == 0) {
- pr_err("QAT: input parameter SUOF pointer/size is NULL\n");
+ pr_err("input parameter SUOF pointer/size is NULL\n");
return -EINVAL;
}
if (qat_uclo_check_suof_format(suof_ptr))
@@ -1205,7 +1236,6 @@ static int qat_uclo_map_suof(struct icp_qat_fw_loader_handle *handle,
}
#define ADD_ADDR(high, low) ((((u64)high) << 32) + low)
-#define BITS_IN_DWORD 32
static int qat_uclo_auth_fw(struct icp_qat_fw_loader_handle *handle,
struct icp_qat_fw_auth_desc *desc)
@@ -1223,7 +1253,7 @@ static int qat_uclo_auth_fw(struct icp_qat_fw_loader_handle *handle,
fcu_dram_hi_csr = handle->chip_info->fcu_dram_addr_hi;
fcu_dram_lo_csr = handle->chip_info->fcu_dram_addr_lo;
- SET_CAP_CSR(handle, fcu_dram_hi_csr, (bus_addr >> BITS_IN_DWORD));
+ SET_CAP_CSR(handle, fcu_dram_hi_csr, bus_addr >> BITS_PER_TYPE(u32));
SET_CAP_CSR(handle, fcu_dram_lo_csr, bus_addr);
SET_CAP_CSR(handle, fcu_ctl_csr, FCU_CTRL_CMD_AUTH);
@@ -1237,7 +1267,7 @@ static int qat_uclo_auth_fw(struct icp_qat_fw_loader_handle *handle,
return 0;
} while (retry++ < FW_AUTH_MAX_RETRY);
auth_fail:
- pr_err("QAT: authentication error (FCU_STATUS = 0x%x),retry = %d\n",
+ pr_err("authentication error (FCU_STATUS = 0x%x),retry = %d\n",
fcu_sts & FCU_AUTH_STS_MASK, retry);
return -EINVAL;
}
@@ -1273,14 +1303,13 @@ static int qat_uclo_broadcast_load_fw(struct icp_qat_fw_loader_handle *handle,
fcu_sts_csr = handle->chip_info->fcu_sts_csr;
fcu_loaded_csr = handle->chip_info->fcu_loaded_ae_csr;
} else {
- pr_err("Chip 0x%x doesn't support broadcast load\n",
- handle->pci_dev->device);
+ pr_err("Chip 0x%x doesn't support broadcast load\n", handle->pci_dev->device);
return -EINVAL;
}
for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) {
if (qat_hal_check_ae_active(handle, (unsigned char)ae)) {
- pr_err("QAT: Broadcast load failed. AE is not enabled or active.\n");
+ pr_err("Broadcast load failed. AE is not enabled or active.\n");
return -EINVAL;
}
@@ -1312,7 +1341,7 @@ static int qat_uclo_broadcast_load_fw(struct icp_qat_fw_loader_handle *handle,
} while (retry++ < FW_AUTH_MAX_RETRY);
if (retry > FW_AUTH_MAX_RETRY) {
- pr_err("QAT: broadcast load failed timeout %d\n", retry);
+ pr_err("broadcast load failed timeout %d\n", retry);
return -EINVAL;
}
}
@@ -1366,24 +1395,38 @@ static void qat_uclo_ummap_auth_fw(struct icp_qat_fw_loader_handle *handle,
}
static int qat_uclo_check_image(struct icp_qat_fw_loader_handle *handle,
- char *image, unsigned int size,
+ void *image, unsigned int size,
unsigned int fw_type)
{
char *fw_type_name = fw_type ? "MMP" : "AE";
unsigned int css_dword_size = sizeof(u32);
+ unsigned int header_len, simg_type;
+ struct icp_qat_css_hdr *css_hdr;
if (handle->chip_info->fw_auth) {
- struct icp_qat_css_hdr *css_hdr = (struct icp_qat_css_hdr *)image;
- unsigned int header_len = ICP_QAT_AE_IMG_OFFSET(handle);
+ header_len = qat_uclo_simg_hdr2sign_len(handle);
+ simg_type = qat_uclo_simg_fw_type(handle, image);
+ css_hdr = image;
+
+ if (handle->chip_info->dual_sign) {
+ if (css_hdr->module_type != ICP_QAT_DUALSIGN_MODULE_TYPE)
+ goto err;
+ if (css_hdr->header_len != ICP_QAT_DUALSIGN_HDR_LEN)
+ goto err;
+ if (css_hdr->header_ver != ICP_QAT_DUALSIGN_HDR_VER)
+ goto err;
+ } else {
+ if (css_hdr->header_len * css_dword_size != header_len)
+ goto err;
+ if (css_hdr->size * css_dword_size != size)
+ goto err;
+ if (size <= header_len)
+ goto err;
+ }
- if ((css_hdr->header_len * css_dword_size) != header_len)
- goto err;
- if ((css_hdr->size * css_dword_size) != size)
- goto err;
- if (fw_type != css_hdr->fw_type)
- goto err;
- if (size <= header_len)
+ if (fw_type != simg_type)
goto err;
+
size -= header_len;
}
@@ -1397,123 +1440,95 @@ static int qat_uclo_check_image(struct icp_qat_fw_loader_handle *handle,
if (size > ICP_QAT_CSS_RSA3K_MAX_IMAGE_LEN)
goto err;
} else {
- pr_err("QAT: Unsupported firmware type\n");
+ pr_err("Unsupported firmware type\n");
return -EINVAL;
}
return 0;
err:
- pr_err("QAT: Invalid %s firmware image\n", fw_type_name);
+ pr_err("Invalid %s firmware image\n", fw_type_name);
return -EINVAL;
}
-static int qat_uclo_map_auth_fw(struct icp_qat_fw_loader_handle *handle,
- char *image, unsigned int size,
- struct icp_qat_fw_auth_desc **desc)
+static int qat_uclo_build_auth_desc_RSA(struct icp_qat_fw_loader_handle *handle,
+ char *image, unsigned int size,
+ struct icp_firml_dram_desc *dram_desc,
+ unsigned int fw_type, struct icp_qat_fw_auth_desc **desc)
{
struct icp_qat_css_hdr *css_hdr = (struct icp_qat_css_hdr *)image;
- struct icp_qat_fw_auth_desc *auth_desc;
- struct icp_qat_auth_chunk *auth_chunk;
- u64 virt_addr, bus_addr, virt_base;
- unsigned int simg_offset = sizeof(*auth_chunk);
struct icp_qat_simg_ae_mode *simg_ae_mode;
- struct icp_firml_dram_desc img_desc;
- int ret;
-
- ret = qat_uclo_simg_alloc(handle, &img_desc, ICP_QAT_CSS_RSA4K_MAX_IMAGE_LEN);
- if (ret) {
- pr_err("QAT: error, allocate continuous dram fail\n");
- return ret;
- }
-
- if (!IS_ALIGNED(img_desc.dram_size, 8) || !img_desc.dram_bus_addr) {
- pr_debug("QAT: invalid address\n");
- qat_uclo_simg_free(handle, &img_desc);
- return -EINVAL;
- }
+ struct icp_qat_fw_auth_desc *auth_desc;
+ char *virt_addr, *virt_base;
+ u64 bus_addr;
- auth_chunk = img_desc.dram_base_addr_v;
- auth_chunk->chunk_size = img_desc.dram_size;
- auth_chunk->chunk_bus_addr = img_desc.dram_bus_addr;
- virt_base = (uintptr_t)img_desc.dram_base_addr_v + simg_offset;
- bus_addr = img_desc.dram_bus_addr + simg_offset;
- auth_desc = img_desc.dram_base_addr_v;
- auth_desc->css_hdr_high = (unsigned int)(bus_addr >> BITS_IN_DWORD);
- auth_desc->css_hdr_low = (unsigned int)bus_addr;
+ virt_base = dram_desc->dram_base_addr_v;
+ virt_base += sizeof(struct icp_qat_auth_chunk);
+ bus_addr = dram_desc->dram_bus_addr + sizeof(struct icp_qat_auth_chunk);
+ auth_desc = dram_desc->dram_base_addr_v;
+ auth_desc->css_hdr_high = upper_32_bits(bus_addr);
+ auth_desc->css_hdr_low = lower_32_bits(bus_addr);
virt_addr = virt_base;
- memcpy((void *)(uintptr_t)virt_addr, image, sizeof(*css_hdr));
+ memcpy(virt_addr, image, sizeof(*css_hdr));
/* pub key */
bus_addr = ADD_ADDR(auth_desc->css_hdr_high, auth_desc->css_hdr_low) +
sizeof(*css_hdr);
virt_addr = virt_addr + sizeof(*css_hdr);
- auth_desc->fwsk_pub_high = (unsigned int)(bus_addr >> BITS_IN_DWORD);
- auth_desc->fwsk_pub_low = (unsigned int)bus_addr;
+ auth_desc->fwsk_pub_high = upper_32_bits(bus_addr);
+ auth_desc->fwsk_pub_low = lower_32_bits(bus_addr);
- memcpy((void *)(uintptr_t)virt_addr,
- (void *)(image + sizeof(*css_hdr)),
- ICP_QAT_CSS_FWSK_MODULUS_LEN(handle));
+ memcpy(virt_addr, image + sizeof(*css_hdr), ICP_QAT_CSS_FWSK_MODULUS_LEN(handle));
/* padding */
memset((void *)(uintptr_t)(virt_addr + ICP_QAT_CSS_FWSK_MODULUS_LEN(handle)),
0, ICP_QAT_CSS_FWSK_PAD_LEN(handle));
/* exponent */
- memcpy((void *)(uintptr_t)(virt_addr + ICP_QAT_CSS_FWSK_MODULUS_LEN(handle) +
- ICP_QAT_CSS_FWSK_PAD_LEN(handle)),
- (void *)(image + sizeof(*css_hdr) +
- ICP_QAT_CSS_FWSK_MODULUS_LEN(handle)),
- sizeof(unsigned int));
+ memcpy(virt_addr + ICP_QAT_CSS_FWSK_MODULUS_LEN(handle) +
+ ICP_QAT_CSS_FWSK_PAD_LEN(handle), image + sizeof(*css_hdr) +
+ ICP_QAT_CSS_FWSK_MODULUS_LEN(handle), sizeof(unsigned int));
/* signature */
bus_addr = ADD_ADDR(auth_desc->fwsk_pub_high,
auth_desc->fwsk_pub_low) +
ICP_QAT_CSS_FWSK_PUB_LEN(handle);
virt_addr = virt_addr + ICP_QAT_CSS_FWSK_PUB_LEN(handle);
- auth_desc->signature_high = (unsigned int)(bus_addr >> BITS_IN_DWORD);
- auth_desc->signature_low = (unsigned int)bus_addr;
+ auth_desc->signature_high = upper_32_bits(bus_addr);
+ auth_desc->signature_low = lower_32_bits(bus_addr);
- memcpy((void *)(uintptr_t)virt_addr,
- (void *)(image + sizeof(*css_hdr) +
- ICP_QAT_CSS_FWSK_MODULUS_LEN(handle) +
- ICP_QAT_CSS_FWSK_EXPONENT_LEN(handle)),
- ICP_QAT_CSS_SIGNATURE_LEN(handle));
+ memcpy(virt_addr, image + sizeof(*css_hdr) + ICP_QAT_CSS_FWSK_MODULUS_LEN(handle) +
+ ICP_QAT_CSS_FWSK_EXPONENT_LEN(handle), ICP_QAT_CSS_SIGNATURE_LEN(handle));
bus_addr = ADD_ADDR(auth_desc->signature_high,
auth_desc->signature_low) +
ICP_QAT_CSS_SIGNATURE_LEN(handle);
virt_addr += ICP_QAT_CSS_SIGNATURE_LEN(handle);
- auth_desc->img_high = (unsigned int)(bus_addr >> BITS_IN_DWORD);
- auth_desc->img_low = (unsigned int)bus_addr;
- auth_desc->img_len = size - ICP_QAT_AE_IMG_OFFSET(handle);
- if (bus_addr + auth_desc->img_len > img_desc.dram_bus_addr +
- ICP_QAT_CSS_RSA4K_MAX_IMAGE_LEN) {
- pr_err("QAT: insufficient memory size for authentication data\n");
- qat_uclo_simg_free(handle, &img_desc);
+ auth_desc->img_high = upper_32_bits(bus_addr);
+ auth_desc->img_low = lower_32_bits(bus_addr);
+ auth_desc->img_len = size - qat_uclo_simg_hdr2sign_len(handle);
+ if (bus_addr + auth_desc->img_len >
+ dram_desc->dram_bus_addr + ICP_QAT_CSS_RSA4K_MAX_IMAGE_LEN) {
+ pr_err("insufficient memory size for authentication data\n");
+ qat_uclo_simg_free(handle, dram_desc);
return -ENOMEM;
}
- memcpy((void *)(uintptr_t)virt_addr,
- (void *)(image + ICP_QAT_AE_IMG_OFFSET(handle)),
- auth_desc->img_len);
+ memcpy(virt_addr, image + qat_uclo_simg_hdr2sign_len(handle), auth_desc->img_len);
virt_addr = virt_base;
/* AE firmware */
- if (((struct icp_qat_css_hdr *)(uintptr_t)virt_addr)->fw_type ==
- CSS_AE_FIRMWARE) {
+ if (fw_type == CSS_AE_FIRMWARE) {
auth_desc->img_ae_mode_data_high = auth_desc->img_high;
auth_desc->img_ae_mode_data_low = auth_desc->img_low;
bus_addr = ADD_ADDR(auth_desc->img_ae_mode_data_high,
auth_desc->img_ae_mode_data_low) +
sizeof(struct icp_qat_simg_ae_mode);
- auth_desc->img_ae_init_data_high = (unsigned int)
- (bus_addr >> BITS_IN_DWORD);
- auth_desc->img_ae_init_data_low = (unsigned int)bus_addr;
+ auth_desc->img_ae_init_data_high = upper_32_bits(bus_addr);
+ auth_desc->img_ae_init_data_low = lower_32_bits(bus_addr);
bus_addr += ICP_QAT_SIMG_AE_INIT_SEQ_LEN;
- auth_desc->img_ae_insts_high = (unsigned int)
- (bus_addr >> BITS_IN_DWORD);
- auth_desc->img_ae_insts_low = (unsigned int)bus_addr;
+ auth_desc->img_ae_insts_high = upper_32_bits(bus_addr);
+ auth_desc->img_ae_insts_low = lower_32_bits(bus_addr);
virt_addr += sizeof(struct icp_qat_css_hdr);
virt_addr += ICP_QAT_CSS_FWSK_PUB_LEN(handle);
virt_addr += ICP_QAT_CSS_SIGNATURE_LEN(handle);
@@ -1527,6 +1542,141 @@ static int qat_uclo_map_auth_fw(struct icp_qat_fw_loader_handle *handle,
return 0;
}
+static int qat_uclo_build_auth_desc_dualsign(struct icp_qat_fw_loader_handle *handle,
+ char *image, unsigned int size,
+ struct icp_firml_dram_desc *dram_desc,
+ unsigned int fw_type,
+ struct icp_qat_fw_auth_desc **desc)
+{
+ struct icp_qat_simg_ae_mode *simg_ae_mode;
+ struct icp_qat_fw_auth_desc *auth_desc;
+ unsigned int chunk_offset, img_offset;
+ u64 bus_addr, addr;
+ char *virt_addr;
+
+ virt_addr = dram_desc->dram_base_addr_v;
+ virt_addr += sizeof(struct icp_qat_auth_chunk);
+ bus_addr = dram_desc->dram_bus_addr + sizeof(struct icp_qat_auth_chunk);
+
+ auth_desc = dram_desc->dram_base_addr_v;
+ auth_desc->img_len = size - qat_uclo_simg_hdr2sign_len(handle);
+ auth_desc->css_hdr_high = upper_32_bits(bus_addr);
+ auth_desc->css_hdr_low = lower_32_bits(bus_addr);
+ memcpy(virt_addr, image, ICP_QAT_DUALSIGN_OPAQUE_HDR_LEN);
+
+ img_offset = ICP_QAT_DUALSIGN_OPAQUE_HDR_LEN;
+ chunk_offset = ICP_QAT_DUALSIGN_OPAQUE_HDR_ALIGN_LEN;
+
+ /* RSA pub key */
+ addr = bus_addr + chunk_offset;
+ auth_desc->fwsk_pub_high = upper_32_bits(addr);
+ auth_desc->fwsk_pub_low = lower_32_bits(addr);
+ memcpy(virt_addr + chunk_offset, image + img_offset, ICP_QAT_CSS_FWSK_MODULUS_LEN(handle));
+
+ img_offset += ICP_QAT_CSS_FWSK_MODULUS_LEN(handle);
+ chunk_offset += ICP_QAT_CSS_FWSK_MODULUS_LEN(handle);
+ /* RSA padding */
+ memset(virt_addr + chunk_offset, 0, ICP_QAT_CSS_FWSK_PAD_LEN(handle));
+
+ chunk_offset += ICP_QAT_CSS_FWSK_PAD_LEN(handle);
+ /* RSA exponent */
+ memcpy(virt_addr + chunk_offset, image + img_offset, ICP_QAT_CSS_FWSK_EXPONENT_LEN(handle));
+
+ img_offset += ICP_QAT_CSS_FWSK_EXPONENT_LEN(handle);
+ chunk_offset += ICP_QAT_CSS_FWSK_EXPONENT_LEN(handle);
+ /* RSA signature */
+ addr = bus_addr + chunk_offset;
+ auth_desc->signature_high = upper_32_bits(addr);
+ auth_desc->signature_low = lower_32_bits(addr);
+ memcpy(virt_addr + chunk_offset, image + img_offset, ICP_QAT_CSS_SIGNATURE_LEN(handle));
+
+ img_offset += ICP_QAT_CSS_SIGNATURE_LEN(handle);
+ chunk_offset += ICP_QAT_CSS_SIGNATURE_LEN(handle);
+ /* XMSS pubkey */
+ addr = bus_addr + chunk_offset;
+ auth_desc->xmss_pubkey_high = upper_32_bits(addr);
+ auth_desc->xmss_pubkey_low = lower_32_bits(addr);
+ memcpy(virt_addr + chunk_offset, image + img_offset, ICP_QAT_DUALSIGN_XMSS_PUBKEY_LEN);
+
+ img_offset += ICP_QAT_DUALSIGN_XMSS_PUBKEY_LEN;
+ chunk_offset += ICP_QAT_DUALSIGN_XMSS_PUBKEY_LEN;
+ /* XMSS signature */
+ addr = bus_addr + chunk_offset;
+ auth_desc->xmss_sig_high = upper_32_bits(addr);
+ auth_desc->xmss_sig_low = lower_32_bits(addr);
+ memcpy(virt_addr + chunk_offset, image + img_offset, ICP_QAT_DUALSIGN_XMSS_SIG_LEN);
+
+ img_offset += ICP_QAT_DUALSIGN_XMSS_SIG_LEN;
+ chunk_offset += ICP_QAT_DUALSIGN_XMSS_SIG_ALIGN_LEN;
+
+ if (dram_desc->dram_size < (chunk_offset + auth_desc->img_len)) {
+ pr_err("auth chunk memory size is not enough to store data\n");
+ return -ENOMEM;
+ }
+
+ /* Signed data */
+ addr = bus_addr + chunk_offset;
+ auth_desc->img_high = upper_32_bits(addr);
+ auth_desc->img_low = lower_32_bits(addr);
+ memcpy(virt_addr + chunk_offset, image + img_offset, auth_desc->img_len);
+
+ chunk_offset += ICP_QAT_DUALSIGN_MISC_INFO_LEN;
+ /* AE firmware */
+ if (fw_type == CSS_AE_FIRMWARE) {
+ /* AE mode data */
+ addr = bus_addr + chunk_offset;
+ auth_desc->img_ae_mode_data_high = upper_32_bits(addr);
+ auth_desc->img_ae_mode_data_low = lower_32_bits(addr);
+ simg_ae_mode =
+ (struct icp_qat_simg_ae_mode *)(virt_addr + chunk_offset);
+ auth_desc->ae_mask = simg_ae_mode->ae_mask & handle->cfg_ae_mask;
+
+ chunk_offset += sizeof(struct icp_qat_simg_ae_mode);
+ /* AE init seq */
+ addr = bus_addr + chunk_offset;
+ auth_desc->img_ae_init_data_high = upper_32_bits(addr);
+ auth_desc->img_ae_init_data_low = lower_32_bits(addr);
+
+ chunk_offset += ICP_QAT_SIMG_AE_INIT_SEQ_LEN;
+ /* AE instructions */
+ addr = bus_addr + chunk_offset;
+ auth_desc->img_ae_insts_high = upper_32_bits(addr);
+ auth_desc->img_ae_insts_low = lower_32_bits(addr);
+ } else {
+ addr = bus_addr + chunk_offset;
+ auth_desc->img_ae_insts_high = upper_32_bits(addr);
+ auth_desc->img_ae_insts_low = lower_32_bits(addr);
+ }
+ *desc = auth_desc;
+ return 0;
+}
+
+static int qat_uclo_map_auth_fw(struct icp_qat_fw_loader_handle *handle,
+ char *image, unsigned int size,
+ struct icp_qat_fw_auth_desc **desc)
+{
+ struct icp_qat_auth_chunk *auth_chunk;
+ struct icp_firml_dram_desc img_desc;
+ unsigned int simg_fw_type;
+ int ret;
+
+ ret = qat_uclo_simg_alloc(handle, &img_desc, ICP_QAT_CSS_RSA4K_MAX_IMAGE_LEN);
+ if (ret)
+ return ret;
+
+ simg_fw_type = qat_uclo_simg_fw_type(handle, image);
+ auth_chunk = img_desc.dram_base_addr_v;
+ auth_chunk->chunk_size = img_desc.dram_size;
+ auth_chunk->chunk_bus_addr = img_desc.dram_bus_addr;
+
+ if (handle->chip_info->dual_sign)
+ return qat_uclo_build_auth_desc_dualsign(handle, image, size, &img_desc,
+ simg_fw_type, desc);
+
+ return qat_uclo_build_auth_desc_RSA(handle, image, size, &img_desc,
+ simg_fw_type, desc);
+}
+
static int qat_uclo_load_fw(struct icp_qat_fw_loader_handle *handle,
struct icp_qat_fw_auth_desc *desc)
{
@@ -1546,7 +1696,7 @@ static int qat_uclo_load_fw(struct icp_qat_fw_loader_handle *handle,
if (!((desc->ae_mask >> i) & 0x1))
continue;
if (qat_hal_check_ae_active(handle, i)) {
- pr_err("QAT: AE %d is active\n", i);
+ pr_err("AE %d is active\n", i);
return -EINVAL;
}
SET_CAP_CSR(handle, fcu_ctl_csr,
@@ -1566,7 +1716,7 @@ static int qat_uclo_load_fw(struct icp_qat_fw_loader_handle *handle,
}
} while (retry++ < FW_AUTH_MAX_RETRY);
if (retry > FW_AUTH_MAX_RETRY) {
- pr_err("QAT: firmware load failed timeout %x\n", retry);
+ pr_err("firmware load failed timeout %x\n", retry);
return -EINVAL;
}
}
@@ -1584,7 +1734,7 @@ static int qat_uclo_map_suof_obj(struct icp_qat_fw_loader_handle *handle,
handle->sobj_handle = suof_handle;
if (qat_uclo_map_suof(handle, addr_ptr, mem_size)) {
qat_uclo_del_suof(handle);
- pr_err("QAT: map SUOF failed\n");
+ pr_err("map SUOF failed\n");
return -EINVAL;
}
return 0;
@@ -1608,7 +1758,7 @@ int qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle,
qat_uclo_ummap_auth_fw(handle, &desc);
} else {
if (handle->chip_info->mmp_sram_size < mem_size) {
- pr_err("QAT: MMP size is too large: 0x%x\n", mem_size);
+ pr_err("MMP size is too large: 0x%x\n", mem_size);
return -EFBIG;
}
qat_uclo_wr_sram_by_words(handle, 0, addr_ptr, mem_size);
@@ -1634,7 +1784,7 @@ static int qat_uclo_map_uof_obj(struct icp_qat_fw_loader_handle *handle,
objhdl->obj_hdr = qat_uclo_map_chunk((char *)objhdl->obj_buf, filehdr,
ICP_QAT_UOF_OBJS);
if (!objhdl->obj_hdr) {
- pr_err("QAT: object file chunk is null\n");
+ pr_err("object file chunk is null\n");
goto out_objhdr_err;
}
handle->obj_handle = objhdl;
@@ -1669,7 +1819,7 @@ static int qat_uclo_map_mof_file_hdr(struct icp_qat_fw_loader_handle *handle,
checksum = qat_uclo_calc_str_checksum(&mof_ptr->min_ver,
min_ver_offset);
if (checksum != mof_ptr->checksum) {
- pr_err("QAT: incorrect MOF checksum\n");
+ pr_err("incorrect MOF checksum\n");
return -EINVAL;
}
@@ -1705,7 +1855,7 @@ static int qat_uclo_seek_obj_inside_mof(struct icp_qat_mof_handle *mobj_handle,
}
}
- pr_err("QAT: object %s is not found inside MOF\n", obj_name);
+ pr_err("object %s is not found inside MOF\n", obj_name);
return -EINVAL;
}
@@ -1722,7 +1872,7 @@ static int qat_uclo_map_obj_from_mof(struct icp_qat_mof_handle *mobj_handle,
ICP_QAT_MOF_OBJ_CHUNKID_LEN)) {
obj = mobj_handle->sobjs_hdr + obj_chunkhdr->offset;
} else {
- pr_err("QAT: unsupported chunk id\n");
+ pr_err("unsupported chunk id\n");
return -EINVAL;
}
mobj_hdr->obj_buf = obj;
@@ -1783,7 +1933,7 @@ static int qat_uclo_map_objs_from_mof(struct icp_qat_mof_handle *mobj_handle)
}
if ((uobj_chunk_num + sobj_chunk_num) != *valid_chunk) {
- pr_err("QAT: inconsistent UOF/SUOF chunk amount\n");
+ pr_err("inconsistent UOF/SUOF chunk amount\n");
return -EINVAL;
}
return 0;
@@ -1824,17 +1974,16 @@ static int qat_uclo_check_mof_format(struct icp_qat_mof_file_hdr *mof_hdr)
int min = mof_hdr->min_ver & 0xff;
if (mof_hdr->file_id != ICP_QAT_MOF_FID) {
- pr_err("QAT: invalid header 0x%x\n", mof_hdr->file_id);
+ pr_err("invalid header 0x%x\n", mof_hdr->file_id);
return -EINVAL;
}
if (mof_hdr->num_chunks <= 0x1) {
- pr_err("QAT: MOF chunk amount is incorrect\n");
+ pr_err("MOF chunk amount is incorrect\n");
return -EINVAL;
}
if (maj != ICP_QAT_MOF_MAJVER || min != ICP_QAT_MOF_MINVER) {
- pr_err("QAT: bad MOF version, major 0x%x, minor 0x%x\n",
- maj, min);
+ pr_err("bad MOF version, major 0x%x, minor 0x%x\n", maj, min);
return -EINVAL;
}
return 0;
diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/Makefile b/drivers/crypto/intel/qat/qat_dh895xcc/Makefile
index 5bf5c890c362..1427fe76f171 100644
--- a/drivers/crypto/intel/qat/qat_dh895xcc/Makefile
+++ b/drivers/crypto/intel/qat/qat_dh895xcc/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCC) += qat_dh895xcc.o
qat_dh895xcc-y := adf_drv.o adf_dh895xcc_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index e48bcf1818cd..5b4bd0ba1ccb 100644
--- a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -4,7 +4,6 @@
#include <adf_admin.h>
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
-#include <adf_gen2_dc.h>
#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
@@ -24,7 +23,6 @@ static const u32 thrd_to_arb_map[ADF_DH895XCC_MAX_ACCELENGINES] = {
static struct adf_hw_device_class dh895xcc_class = {
.name = ADF_DH895XCC_DEVICE_NAME,
.type = DEV_DH895XCC,
- .instances = 0
};
static u32 get_accel_mask(struct adf_hw_device_data *self)
diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/intel/qat/qat_dh895xcc/adf_drv.c
index 07e9d7e52861..b59e0cc49e52 100644
--- a/drivers/crypto/intel/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/intel/qat/qat_dh895xcc/adf_drv.c
@@ -19,24 +19,6 @@
#include <adf_dbgfs.h>
#include "adf_dh895xcc_hw_data.h"
-static const struct pci_device_id adf_pci_tbl[] = {
- { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_DH895XCC), },
- { }
-};
-MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
-
-static int adf_probe(struct pci_dev *dev, const struct pci_device_id *ent);
-static void adf_remove(struct pci_dev *dev);
-
-static struct pci_driver adf_driver = {
- .id_table = adf_pci_tbl,
- .name = ADF_DH895XCC_DEVICE_NAME,
- .probe = adf_probe,
- .remove = adf_remove,
- .sriov_configure = adf_sriov_configure,
- .err_handler = &adf_err_handler,
-};
-
static void adf_cleanup_pci_dev(struct adf_accel_dev *accel_dev)
{
pci_release_regions(accel_dev->accel_pci_dev.pci_dev);
@@ -227,6 +209,29 @@ static void adf_remove(struct pci_dev *pdev)
kfree(accel_dev);
}
+static void adf_shutdown(struct pci_dev *pdev)
+{
+ struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
+
+ adf_dev_down(accel_dev);
+}
+
+static const struct pci_device_id adf_pci_tbl[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_DH895XCC) },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
+
+static struct pci_driver adf_driver = {
+ .id_table = adf_pci_tbl,
+ .name = ADF_DH895XCC_DEVICE_NAME,
+ .probe = adf_probe,
+ .remove = adf_remove,
+ .shutdown = adf_shutdown,
+ .sriov_configure = adf_sriov_configure,
+ .err_handler = &adf_err_handler,
+};
+
static int __init adfdrv_init(void)
{
request_module("intel_qat");
diff --git a/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile b/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile
index 93f9c81edf09..c2fdb6e0f68f 100644
--- a/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile
+++ b/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCCVF) += qat_dh895xccvf.o
qat_dh895xccvf-y := adf_drv.o adf_dh895xccvf_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
index f4ee4c2e00da..828456c43b76 100644
--- a/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
@@ -3,7 +3,6 @@
#include <adf_accel_devices.h>
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
-#include <adf_gen2_dc.h>
#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
@@ -13,7 +12,6 @@
static struct adf_hw_device_class dh895xcciov_class = {
.name = ADF_DH895XCCVF_DEVICE_NAME,
.type = DEV_DH895XCCVF,
- .instances = 0
};
static u32 get_accel_mask(struct adf_hw_device_data *self)
diff --git a/drivers/crypto/marvell/cesa/cesa.c b/drivers/crypto/marvell/cesa/cesa.c
index fa08f10e6f3f..9c21f5d835d2 100644
--- a/drivers/crypto/marvell/cesa/cesa.c
+++ b/drivers/crypto/marvell/cesa/cesa.c
@@ -94,7 +94,7 @@ static int mv_cesa_std_process(struct mv_cesa_engine *engine, u32 status)
static int mv_cesa_int_process(struct mv_cesa_engine *engine, u32 status)
{
- if (engine->chain.first && engine->chain.last)
+ if (engine->chain_hw.first && engine->chain_hw.last)
return mv_cesa_tdma_process(engine, status);
return mv_cesa_std_process(engine, status);
diff --git a/drivers/crypto/marvell/cesa/cesa.h b/drivers/crypto/marvell/cesa/cesa.h
index d215a6bed6bc..50ca1039fdaa 100644
--- a/drivers/crypto/marvell/cesa/cesa.h
+++ b/drivers/crypto/marvell/cesa/cesa.h
@@ -440,8 +440,10 @@ struct mv_cesa_dev {
* SRAM
* @queue: fifo of the pending crypto requests
* @load: engine load counter, useful for load balancing
- * @chain: list of the current tdma descriptors being processed
- * by this engine.
+ * @chain_hw: list of the current tdma descriptors being processed
+ * by the hardware.
+ * @chain_sw: list of the current tdma descriptors that will be
+ * submitted to the hardware.
* @complete_queue: fifo of the processed requests by the engine
*
* Structure storing CESA engine information.
@@ -463,7 +465,8 @@ struct mv_cesa_engine {
struct gen_pool *pool;
struct crypto_queue queue;
atomic_t load;
- struct mv_cesa_tdma_chain chain;
+ struct mv_cesa_tdma_chain chain_hw;
+ struct mv_cesa_tdma_chain chain_sw;
struct list_head complete_queue;
int irq;
};
diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c
index cf62db50f958..48c5c8ea8c43 100644
--- a/drivers/crypto/marvell/cesa/cipher.c
+++ b/drivers/crypto/marvell/cesa/cipher.c
@@ -459,6 +459,9 @@ static int mv_cesa_skcipher_queue_req(struct skcipher_request *req,
struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req);
struct mv_cesa_engine *engine;
+ if (!req->cryptlen)
+ return 0;
+
ret = mv_cesa_skcipher_req_init(req, tmpl);
if (ret)
return ret;
diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c
index f150861ceaf6..6815eddc9068 100644
--- a/drivers/crypto/marvell/cesa/hash.c
+++ b/drivers/crypto/marvell/cesa/hash.c
@@ -663,7 +663,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
if (ret)
goto err_free_tdma;
- if (iter.src.sg) {
+ if (iter.base.len > iter.src.op_offset) {
/*
* Add all the new data, inserting an operation block and
* launch command between each full SRAM block-worth of
diff --git a/drivers/crypto/marvell/cesa/tdma.c b/drivers/crypto/marvell/cesa/tdma.c
index 388a06e180d6..243305354420 100644
--- a/drivers/crypto/marvell/cesa/tdma.c
+++ b/drivers/crypto/marvell/cesa/tdma.c
@@ -38,6 +38,15 @@ void mv_cesa_dma_step(struct mv_cesa_req *dreq)
{
struct mv_cesa_engine *engine = dreq->engine;
+ spin_lock_bh(&engine->lock);
+ if (engine->chain_sw.first == dreq->chain.first) {
+ engine->chain_sw.first = NULL;
+ engine->chain_sw.last = NULL;
+ }
+ engine->chain_hw.first = dreq->chain.first;
+ engine->chain_hw.last = dreq->chain.last;
+ spin_unlock_bh(&engine->lock);
+
writel_relaxed(0, engine->regs + CESA_SA_CFG);
mv_cesa_set_int_mask(engine, CESA_SA_INT_ACC0_IDMA_DONE);
@@ -96,25 +105,27 @@ void mv_cesa_dma_prepare(struct mv_cesa_req *dreq,
void mv_cesa_tdma_chain(struct mv_cesa_engine *engine,
struct mv_cesa_req *dreq)
{
- if (engine->chain.first == NULL && engine->chain.last == NULL) {
- engine->chain.first = dreq->chain.first;
- engine->chain.last = dreq->chain.last;
- } else {
- struct mv_cesa_tdma_desc *last;
+ struct mv_cesa_tdma_desc *last = engine->chain_sw.last;
- last = engine->chain.last;
+ /*
+ * Break the DMA chain if the request being queued needs the IV
+ * regs to be set before lauching the request.
+ */
+ if (!last || dreq->chain.first->flags & CESA_TDMA_SET_STATE)
+ engine->chain_sw.first = dreq->chain.first;
+ else {
last->next = dreq->chain.first;
- engine->chain.last = dreq->chain.last;
-
- /*
- * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on
- * the last element of the current chain, or if the request
- * being queued needs the IV regs to be set before lauching
- * the request.
- */
- if (!(last->flags & CESA_TDMA_BREAK_CHAIN) &&
- !(dreq->chain.first->flags & CESA_TDMA_SET_STATE))
- last->next_dma = cpu_to_le32(dreq->chain.first->cur_dma);
+ last->next_dma = cpu_to_le32(dreq->chain.first->cur_dma);
+ }
+ last = dreq->chain.last;
+ engine->chain_sw.last = last;
+ /*
+ * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on
+ * the last element of the current chain.
+ */
+ if (last->flags & CESA_TDMA_BREAK_CHAIN) {
+ engine->chain_sw.first = NULL;
+ engine->chain_sw.last = NULL;
}
}
@@ -127,7 +138,7 @@ int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status)
tdma_cur = readl(engine->regs + CESA_TDMA_CUR);
- for (tdma = engine->chain.first; tdma; tdma = next) {
+ for (tdma = engine->chain_hw.first; tdma; tdma = next) {
spin_lock_bh(&engine->lock);
next = tdma->next;
spin_unlock_bh(&engine->lock);
@@ -149,12 +160,12 @@ int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status)
&backlog);
/* Re-chaining to the next request */
- engine->chain.first = tdma->next;
+ engine->chain_hw.first = tdma->next;
tdma->next = NULL;
/* If this is the last request, clear the chain */
- if (engine->chain.first == NULL)
- engine->chain.last = NULL;
+ if (engine->chain_hw.first == NULL)
+ engine->chain_hw.last = NULL;
spin_unlock_bh(&engine->lock);
ctx = crypto_tfm_ctx(req->tfm);
diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
index 5cae8fafa151..d4aab9e20f2a 100644
--- a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
+++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
@@ -6,6 +6,7 @@
#include "otx2_cptvf.h"
#include "otx2_cptlf.h"
#include "cn10k_cpt.h"
+#include "otx2_cpt_common.h"
static void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num,
struct otx2_cptlf_info *lf);
@@ -27,7 +28,7 @@ static struct cpt_hw_ops cn10k_hw_ops = {
static void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num,
struct otx2_cptlf_info *lf)
{
- void __iomem *lmtline = lf->lmtline;
+ void *lmtline = lf->lfs->lmt_info.base + (lf->slot * LMTLINE_SIZE);
u64 val = (lf->slot & 0x7FF);
u64 tar_addr = 0;
@@ -41,15 +42,49 @@ static void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num,
dma_wmb();
/* Copy CPT command to LMTLINE */
- memcpy_toio(lmtline, cptinst, insts_num * OTX2_CPT_INST_SIZE);
+ memcpy(lmtline, cptinst, insts_num * OTX2_CPT_INST_SIZE);
cn10k_lmt_flush(val, tar_addr);
}
+void cn10k_cpt_lmtst_free(struct pci_dev *pdev, struct otx2_cptlfs_info *lfs)
+{
+ struct otx2_lmt_info *lmt_info = &lfs->lmt_info;
+
+ if (!lmt_info->base)
+ return;
+
+ dma_free_attrs(&pdev->dev, lmt_info->size,
+ lmt_info->base - lmt_info->align,
+ lmt_info->iova - lmt_info->align,
+ DMA_ATTR_FORCE_CONTIGUOUS);
+}
+EXPORT_SYMBOL_NS_GPL(cn10k_cpt_lmtst_free, "CRYPTO_DEV_OCTEONTX2_CPT");
+
+static int cn10k_cpt_lmtst_alloc(struct pci_dev *pdev,
+ struct otx2_cptlfs_info *lfs, u32 size)
+{
+ struct otx2_lmt_info *lmt_info = &lfs->lmt_info;
+ dma_addr_t align_iova;
+ dma_addr_t iova;
+
+ lmt_info->base = dma_alloc_attrs(&pdev->dev, size, &iova, GFP_KERNEL,
+ DMA_ATTR_FORCE_CONTIGUOUS);
+ if (!lmt_info->base)
+ return -ENOMEM;
+
+ align_iova = ALIGN((u64)iova, LMTLINE_ALIGN);
+ lmt_info->iova = align_iova;
+ lmt_info->align = align_iova - iova;
+ lmt_info->size = size;
+ lmt_info->base += lmt_info->align;
+ return 0;
+}
+
int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf)
{
struct pci_dev *pdev = cptpf->pdev;
- resource_size_t size;
- u64 lmt_base;
+ u32 size;
+ int ret;
if (!test_bit(CN10K_LMTST, &cptpf->cap_flag)) {
cptpf->lfs.ops = &otx2_hw_ops;
@@ -57,18 +92,19 @@ int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf)
}
cptpf->lfs.ops = &cn10k_hw_ops;
- lmt_base = readq(cptpf->reg_base + RVU_PF_LMTLINE_ADDR);
- if (!lmt_base) {
- dev_err(&pdev->dev, "PF LMTLINE address not configured\n");
- return -ENOMEM;
+ size = OTX2_CPT_MAX_VFS_NUM * LMTLINE_SIZE + LMTLINE_ALIGN;
+ ret = cn10k_cpt_lmtst_alloc(pdev, &cptpf->lfs, size);
+ if (ret) {
+ dev_err(&pdev->dev, "PF-%d LMTLINE memory allocation failed\n",
+ cptpf->pf_id);
+ return ret;
}
- size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
- size -= ((1 + cptpf->max_vfs) * MBOX_SIZE);
- cptpf->lfs.lmt_base = devm_ioremap_wc(&pdev->dev, lmt_base, size);
- if (!cptpf->lfs.lmt_base) {
- dev_err(&pdev->dev,
- "Mapping of PF LMTLINE address failed\n");
- return -ENOMEM;
+
+ ret = otx2_cpt_lmtst_tbl_setup_msg(&cptpf->lfs);
+ if (ret) {
+ dev_err(&pdev->dev, "PF-%d: LMTST Table setup failed\n",
+ cptpf->pf_id);
+ cn10k_cpt_lmtst_free(pdev, &cptpf->lfs);
}
return 0;
@@ -78,18 +114,25 @@ EXPORT_SYMBOL_NS_GPL(cn10k_cptpf_lmtst_init, "CRYPTO_DEV_OCTEONTX2_CPT");
int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf)
{
struct pci_dev *pdev = cptvf->pdev;
- resource_size_t offset, size;
+ u32 size;
+ int ret;
if (!test_bit(CN10K_LMTST, &cptvf->cap_flag))
return 0;
- offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
- size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
- /* Map VF LMILINE region */
- cptvf->lfs.lmt_base = devm_ioremap_wc(&pdev->dev, offset, size);
- if (!cptvf->lfs.lmt_base) {
- dev_err(&pdev->dev, "Unable to map BAR4\n");
- return -ENOMEM;
+ size = cptvf->lfs.lfs_num * LMTLINE_SIZE + LMTLINE_ALIGN;
+ ret = cn10k_cpt_lmtst_alloc(pdev, &cptvf->lfs, size);
+ if (ret) {
+ dev_err(&pdev->dev, "VF-%d LMTLINE memory allocation failed\n",
+ cptvf->vf_id);
+ return ret;
+ }
+
+ ret = otx2_cpt_lmtst_tbl_setup_msg(&cptvf->lfs);
+ if (ret) {
+ dev_err(&pdev->dev, "VF-%d: LMTST Table setup failed\n",
+ cptvf->vf_id);
+ cn10k_cpt_lmtst_free(pdev, &cptvf->lfs);
}
return 0;
diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h
index 92be3ecf570f..ea5990048c21 100644
--- a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h
+++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h
@@ -50,6 +50,7 @@ static inline u8 otx2_cpt_get_uc_compcode(union otx2_cpt_res_s *result)
int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf);
int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf);
+void cn10k_cpt_lmtst_free(struct pci_dev *pdev, struct otx2_cptlfs_info *lfs);
void cn10k_cpt_ctx_flush(struct pci_dev *pdev, u64 cptr, bool inval);
int cn10k_cpt_hw_ctx_init(struct pci_dev *pdev,
struct cn10k_cpt_errata_ctx *er_ctx);
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
index c5b7c57574ef..d529bcb03775 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
@@ -145,11 +145,8 @@ static inline u64 otx2_cpt_read64(void __iomem *reg_base, u64 blk, u64 slot,
static inline bool is_dev_otx2(struct pci_dev *pdev)
{
- if (pdev->device == OTX2_CPT_PCI_PF_DEVICE_ID ||
- pdev->device == OTX2_CPT_PCI_VF_DEVICE_ID)
- return true;
-
- return false;
+ return pdev->device == OTX2_CPT_PCI_PF_DEVICE_ID ||
+ pdev->device == OTX2_CPT_PCI_VF_DEVICE_ID;
}
static inline bool is_dev_cn10ka(struct pci_dev *pdev)
@@ -159,12 +156,10 @@ static inline bool is_dev_cn10ka(struct pci_dev *pdev)
static inline bool is_dev_cn10ka_ax(struct pci_dev *pdev)
{
- if (pdev->subsystem_device == CPT_PCI_SUBSYS_DEVID_CN10K_A &&
- ((pdev->revision & 0xFF) == 4 || (pdev->revision & 0xFF) == 0x50 ||
- (pdev->revision & 0xff) == 0x51))
- return true;
-
- return false;
+ return pdev->subsystem_device == CPT_PCI_SUBSYS_DEVID_CN10K_A &&
+ ((pdev->revision & 0xFF) == 4 ||
+ (pdev->revision & 0xFF) == 0x50 ||
+ (pdev->revision & 0xFF) == 0x51);
}
static inline bool is_dev_cn10kb(struct pci_dev *pdev)
@@ -174,11 +169,8 @@ static inline bool is_dev_cn10kb(struct pci_dev *pdev)
static inline bool is_dev_cn10ka_b0(struct pci_dev *pdev)
{
- if (pdev->subsystem_device == CPT_PCI_SUBSYS_DEVID_CN10K_A &&
- (pdev->revision & 0xFF) == 0x54)
- return true;
-
- return false;
+ return pdev->subsystem_device == CPT_PCI_SUBSYS_DEVID_CN10K_A &&
+ (pdev->revision & 0xFF) == 0x54;
}
static inline void otx2_cpt_set_hw_caps(struct pci_dev *pdev,
@@ -192,18 +184,12 @@ static inline void otx2_cpt_set_hw_caps(struct pci_dev *pdev,
static inline bool cpt_is_errata_38550_exists(struct pci_dev *pdev)
{
- if (is_dev_otx2(pdev) || is_dev_cn10ka_ax(pdev))
- return true;
-
- return false;
+ return is_dev_otx2(pdev) || is_dev_cn10ka_ax(pdev);
}
static inline bool cpt_feature_sgv2(struct pci_dev *pdev)
{
- if (!is_dev_otx2(pdev) && !is_dev_cn10ka_ax(pdev))
- return true;
-
- return false;
+ return !is_dev_otx2(pdev) && !is_dev_cn10ka_ax(pdev);
}
int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev);
@@ -223,5 +209,6 @@ int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs);
int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs);
int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox);
int otx2_cpt_lf_reset_msg(struct otx2_cptlfs_info *lfs, int slot);
+int otx2_cpt_lmtst_tbl_setup_msg(struct otx2_cptlfs_info *lfs);
#endif /* __OTX2_CPT_COMMON_H */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
index b8b7c8a3c0ca..95f3de3a34eb 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
@@ -255,3 +255,28 @@ int otx2_cpt_lf_reset_msg(struct otx2_cptlfs_info *lfs, int slot)
return ret;
}
EXPORT_SYMBOL_NS_GPL(otx2_cpt_lf_reset_msg, "CRYPTO_DEV_OCTEONTX2_CPT");
+
+int otx2_cpt_lmtst_tbl_setup_msg(struct otx2_cptlfs_info *lfs)
+{
+ struct otx2_mbox *mbox = lfs->mbox;
+ struct pci_dev *pdev = lfs->pdev;
+ struct lmtst_tbl_setup_req *req;
+
+ req = (struct lmtst_tbl_setup_req *)
+ otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req),
+ sizeof(struct msg_rsp));
+ if (!req) {
+ dev_err(&pdev->dev, "RVU MBOX failed to alloc message.\n");
+ return -EFAULT;
+ }
+
+ req->hdr.id = MBOX_MSG_LMTST_TBL_SETUP;
+ req->hdr.sig = OTX2_MBOX_REQ_SIG;
+ req->hdr.pcifunc = 0;
+
+ req->use_local_lmt_region = true;
+ req->lmt_iova = lfs->lmt_info.iova;
+
+ return otx2_cpt_send_mbox_msg(mbox, pdev);
+}
+EXPORT_SYMBOL_NS_GPL(otx2_cpt_lmtst_tbl_setup_msg, "CRYPTO_DEV_OCTEONTX2_CPT");
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c
index b5d66afcc030..dc7c7a2650a5 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c
@@ -433,10 +433,7 @@ int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_mask, int pri,
for (slot = 0; slot < lfs->lfs_num; slot++) {
lfs->lf[slot].lfs = lfs;
lfs->lf[slot].slot = slot;
- if (lfs->lmt_base)
- lfs->lf[slot].lmtline = lfs->lmt_base +
- (slot * LMTLINE_SIZE);
- else
+ if (!lfs->lmt_info.base)
lfs->lf[slot].lmtline = lfs->reg_base +
OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_LMT, slot,
OTX2_CPT_LMT_LF_LMTLINEX(0));
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
index bd8604be2952..6e004a5568d8 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
@@ -105,11 +105,19 @@ struct cpt_hw_ops {
gfp_t gfp);
};
+#define LMTLINE_SIZE 128
+#define LMTLINE_ALIGN 128
+struct otx2_lmt_info {
+ void *base;
+ dma_addr_t iova;
+ u32 size;
+ u8 align;
+};
+
struct otx2_cptlfs_info {
/* Registers start address of VF/PF LFs are attached to */
void __iomem *reg_base;
-#define LMTLINE_SIZE 128
- void __iomem *lmt_base;
+ struct otx2_lmt_info lmt_info;
struct pci_dev *pdev; /* Device LFs are attached to */
struct otx2_cptlf_info lf[OTX2_CPT_MAX_LFS_NUM];
struct otx2_mbox *mbox;
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
index 12971300296d..1c5c262af48d 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
@@ -639,6 +639,12 @@ static int cptpf_device_init(struct otx2_cptpf_dev *cptpf)
/* Disable all cores */
ret = otx2_cpt_disable_all_cores(cptpf);
+ otx2_cptlf_set_dev_info(&cptpf->lfs, cptpf->pdev, cptpf->reg_base,
+ &cptpf->afpf_mbox, BLKADDR_CPT0);
+ if (cptpf->has_cpt1)
+ otx2_cptlf_set_dev_info(&cptpf->cpt1_lfs, cptpf->pdev,
+ cptpf->reg_base, &cptpf->afpf_mbox,
+ BLKADDR_CPT1);
return ret;
}
@@ -786,19 +792,19 @@ static int otx2_cptpf_probe(struct pci_dev *pdev,
cptpf->max_vfs = pci_sriov_get_totalvfs(pdev);
cptpf->kvf_limits = 1;
- err = cn10k_cptpf_lmtst_init(cptpf);
+ /* Initialize CPT PF device */
+ err = cptpf_device_init(cptpf);
if (err)
goto unregister_intr;
- /* Initialize CPT PF device */
- err = cptpf_device_init(cptpf);
+ err = cn10k_cptpf_lmtst_init(cptpf);
if (err)
goto unregister_intr;
/* Initialize engine groups */
err = otx2_cpt_init_eng_grps(pdev, &cptpf->eng_grps);
if (err)
- goto unregister_intr;
+ goto free_lmtst;
err = sysfs_create_group(&dev->kobj, &cptpf_sysfs_group);
if (err)
@@ -814,6 +820,8 @@ sysfs_grp_del:
sysfs_remove_group(&dev->kobj, &cptpf_sysfs_group);
cleanup_eng_grps:
otx2_cpt_cleanup_eng_grps(pdev, &cptpf->eng_grps);
+free_lmtst:
+ cn10k_cpt_lmtst_free(pdev, &cptpf->lfs);
unregister_intr:
cptpf_disable_afpf_mbox_intr(cptpf);
destroy_afpf_mbox:
@@ -848,6 +856,8 @@ static void otx2_cptpf_remove(struct pci_dev *pdev)
cptpf_disable_afpf_mbox_intr(cptpf);
/* Destroy AF-PF mbox */
cptpf_afpf_mbox_destroy(cptpf);
+ /* Free LMTST memory */
+ cn10k_cpt_lmtst_free(pdev, &cptpf->lfs);
pci_set_drvdata(pdev, NULL);
}
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
index ec1ac7e836a3..12c0e966fa65 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
@@ -264,8 +264,6 @@ static int handle_msg_rx_inline_ipsec_lf_cfg(struct otx2_cptpf_dev *cptpf,
return -ENOENT;
}
- otx2_cptlf_set_dev_info(&cptpf->lfs, cptpf->pdev, cptpf->reg_base,
- &cptpf->afpf_mbox, BLKADDR_CPT0);
cptpf->lfs.global_slot = 0;
cptpf->lfs.ctx_ilen_ovrd = cfg_req->ctx_ilen_valid;
cptpf->lfs.ctx_ilen = cfg_req->ctx_ilen;
@@ -278,9 +276,6 @@ static int handle_msg_rx_inline_ipsec_lf_cfg(struct otx2_cptpf_dev *cptpf,
if (cptpf->has_cpt1) {
cptpf->rsrc_req_blkaddr = BLKADDR_CPT1;
- otx2_cptlf_set_dev_info(&cptpf->cpt1_lfs, cptpf->pdev,
- cptpf->reg_base, &cptpf->afpf_mbox,
- BLKADDR_CPT1);
cptpf->cpt1_lfs.global_slot = num_lfs;
cptpf->cpt1_lfs.ctx_ilen_ovrd = cfg_req->ctx_ilen_valid;
cptpf->cpt1_lfs.ctx_ilen = cfg_req->ctx_ilen;
@@ -507,6 +502,7 @@ static void process_afpf_mbox_msg(struct otx2_cptpf_dev *cptpf,
case MBOX_MSG_CPT_INLINE_IPSEC_CFG:
case MBOX_MSG_NIX_INLINE_IPSEC_CFG:
case MBOX_MSG_CPT_LF_RESET:
+ case MBOX_MSG_LMTST_TBL_SETUP:
break;
default:
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
index 42c5484ce66a..78367849c3d5 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
@@ -1513,8 +1513,6 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
if (ret)
goto delete_grps;
- otx2_cptlf_set_dev_info(lfs, cptpf->pdev, cptpf->reg_base,
- &cptpf->afpf_mbox, BLKADDR_CPT0);
ret = otx2_cptlf_init(lfs, OTX2_CPT_ALL_ENG_GRPS_MASK,
OTX2_CPT_QUEUE_HI_PRIO, 1);
if (ret)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
index d84eebdf2fa8..56904bdfd6e8 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
@@ -283,8 +283,6 @@ static int cptvf_lf_init(struct otx2_cptvf_dev *cptvf)
lfs_num = cptvf->lfs.kvf_limits;
- otx2_cptlf_set_dev_info(lfs, cptvf->pdev, cptvf->reg_base,
- &cptvf->pfvf_mbox, cptvf->blkaddr);
ret = otx2_cptlf_init(lfs, eng_grp_msk, OTX2_CPT_QUEUE_HI_PRIO,
lfs_num);
if (ret)
@@ -378,10 +376,6 @@ static int otx2_cptvf_probe(struct pci_dev *pdev,
otx2_cpt_set_hw_caps(pdev, &cptvf->cap_flag);
- ret = cn10k_cptvf_lmtst_init(cptvf);
- if (ret)
- goto clear_drvdata;
-
/* Initialize PF<=>VF mailbox */
ret = cptvf_pfvf_mbox_init(cptvf);
if (ret)
@@ -396,6 +390,9 @@ static int otx2_cptvf_probe(struct pci_dev *pdev,
cptvf_hw_ops_get(cptvf);
+ otx2_cptlf_set_dev_info(&cptvf->lfs, cptvf->pdev, cptvf->reg_base,
+ &cptvf->pfvf_mbox, cptvf->blkaddr);
+
ret = otx2_cptvf_send_caps_msg(cptvf);
if (ret) {
dev_err(&pdev->dev, "Couldn't get CPT engine capabilities.\n");
@@ -404,13 +401,19 @@ static int otx2_cptvf_probe(struct pci_dev *pdev,
if (cptvf->eng_caps[OTX2_CPT_SE_TYPES] & BIT_ULL(35))
cptvf->lfs.ops->cpt_sg_info_create = cn10k_sgv2_info_create;
+ ret = cn10k_cptvf_lmtst_init(cptvf);
+ if (ret)
+ goto unregister_interrupts;
+
/* Initialize CPT LFs */
ret = cptvf_lf_init(cptvf);
if (ret)
- goto unregister_interrupts;
+ goto free_lmtst;
return 0;
+free_lmtst:
+ cn10k_cpt_lmtst_free(pdev, &cptvf->lfs);
unregister_interrupts:
cptvf_disable_pfvf_mbox_intrs(cptvf);
destroy_pfvf_mbox:
@@ -434,6 +437,8 @@ static void otx2_cptvf_remove(struct pci_dev *pdev)
cptvf_disable_pfvf_mbox_intrs(cptvf);
/* Destroy PF-VF mbox */
cptvf_pfvf_mbox_destroy(cptvf);
+ /* Free LMTST memory */
+ cn10k_cpt_lmtst_free(pdev, &cptvf->lfs);
pci_set_drvdata(pdev, NULL);
}
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c
index d9fa5f6e204d..931b72580fd9 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c
@@ -134,6 +134,7 @@ static void process_pfvf_mbox_mbox_msg(struct otx2_cptvf_dev *cptvf,
sizeof(cptvf->eng_caps));
break;
case MBOX_MSG_CPT_LF_RESET:
+ case MBOX_MSG_LMTST_TBL_SETUP:
break;
default:
dev_err(&cptvf->pdev->dev, "Unsupported msg %d received.\n",
diff --git a/drivers/crypto/nx/nx-aes-cbc.c b/drivers/crypto/nx/nx-aes-cbc.c
index 0e440f704a8f..35fa5bad1d9f 100644
--- a/drivers/crypto/nx/nx-aes-cbc.c
+++ b/drivers/crypto/nx/nx-aes-cbc.c
@@ -8,10 +8,12 @@
*/
#include <crypto/aes.h>
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
#include <asm/vio.h>
#include "nx_csbcpb.h"
diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c
index dfa3ad1a12f2..709b3ee74657 100644
--- a/drivers/crypto/nx/nx-aes-ctr.c
+++ b/drivers/crypto/nx/nx-aes-ctr.c
@@ -9,10 +9,12 @@
#include <crypto/aes.h>
#include <crypto/ctr.h>
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
#include <asm/vio.h>
#include "nx_csbcpb.h"
diff --git a/drivers/crypto/nx/nx-aes-ecb.c b/drivers/crypto/nx/nx-aes-ecb.c
index 502a565074e9..4039cf3b22d4 100644
--- a/drivers/crypto/nx/nx-aes-ecb.c
+++ b/drivers/crypto/nx/nx-aes-ecb.c
@@ -8,10 +8,12 @@
*/
#include <crypto/aes.h>
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
#include <asm/vio.h>
#include "nx_csbcpb.h"
diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c
index eb5c8f689360..bf465d824e2c 100644
--- a/drivers/crypto/nx/nx-aes-xcbc.c
+++ b/drivers/crypto/nx/nx-aes-xcbc.c
@@ -7,13 +7,14 @@
* Author: Kent Yoder <yoder1@us.ibm.com>
*/
-#include <crypto/internal/hash.h>
#include <crypto/aes.h>
-#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <linux/atomic.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <asm/vio.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
#include "nx_csbcpb.h"
#include "nx.h"
@@ -21,8 +22,6 @@
struct xcbc_state {
u8 state[AES_BLOCK_SIZE];
- unsigned int count;
- u8 buffer[AES_BLOCK_SIZE];
};
static int nx_xcbc_set_key(struct crypto_shash *desc,
@@ -58,7 +57,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc,
*/
static int nx_xcbc_empty(struct shash_desc *desc, u8 *out)
{
- struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
+ struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc->tfm);
struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
struct nx_sg *in_sg, *out_sg;
u8 keys[2][AES_BLOCK_SIZE];
@@ -135,9 +134,9 @@ out:
return rc;
}
-static int nx_crypto_ctx_aes_xcbc_init2(struct crypto_tfm *tfm)
+static int nx_crypto_ctx_aes_xcbc_init2(struct crypto_shash *tfm)
{
- struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm);
+ struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(tfm);
struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
int err;
@@ -166,31 +165,24 @@ static int nx_xcbc_update(struct shash_desc *desc,
const u8 *data,
unsigned int len)
{
+ struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc->tfm);
struct xcbc_state *sctx = shash_desc_ctx(desc);
- struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
struct nx_sg *in_sg;
struct nx_sg *out_sg;
- u32 to_process = 0, leftover, total;
unsigned int max_sg_len;
unsigned long irq_flags;
+ u32 to_process, total;
int rc = 0;
int data_len;
spin_lock_irqsave(&nx_ctx->lock, irq_flags);
+ memcpy(csbcpb->cpb.aes_xcbc.out_cv_mac, sctx->state, AES_BLOCK_SIZE);
+ NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE;
+ NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
- total = sctx->count + len;
-
- /* 2 cases for total data len:
- * 1: <= AES_BLOCK_SIZE: copy into state, return 0
- * 2: > AES_BLOCK_SIZE: process X blocks, copy in leftover
- */
- if (total <= AES_BLOCK_SIZE) {
- memcpy(sctx->buffer + sctx->count, data, len);
- sctx->count += len;
- goto out;
- }
+ total = len;
in_sg = nx_ctx->in_sg;
max_sg_len = min_t(u64, nx_driver.of.max_sg_len/sizeof(struct nx_sg),
@@ -200,7 +192,7 @@ static int nx_xcbc_update(struct shash_desc *desc,
data_len = AES_BLOCK_SIZE;
out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state,
- &len, nx_ctx->ap->sglen);
+ &data_len, nx_ctx->ap->sglen);
if (data_len != AES_BLOCK_SIZE) {
rc = -EINVAL;
@@ -210,56 +202,21 @@ static int nx_xcbc_update(struct shash_desc *desc,
nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg);
do {
- to_process = total - to_process;
- to_process = to_process & ~(AES_BLOCK_SIZE - 1);
-
- leftover = total - to_process;
-
- /* the hardware will not accept a 0 byte operation for this
- * algorithm and the operation MUST be finalized to be correct.
- * So if we happen to get an update that falls on a block sized
- * boundary, we must save off the last block to finalize with
- * later. */
- if (!leftover) {
- to_process -= AES_BLOCK_SIZE;
- leftover = AES_BLOCK_SIZE;
- }
-
- if (sctx->count) {
- data_len = sctx->count;
- in_sg = nx_build_sg_list(nx_ctx->in_sg,
- (u8 *) sctx->buffer,
- &data_len,
- max_sg_len);
- if (data_len != sctx->count) {
- rc = -EINVAL;
- goto out;
- }
- }
+ to_process = total & ~(AES_BLOCK_SIZE - 1);
- data_len = to_process - sctx->count;
in_sg = nx_build_sg_list(in_sg,
(u8 *) data,
- &data_len,
+ &to_process,
max_sg_len);
- if (data_len != to_process - sctx->count) {
- rc = -EINVAL;
- goto out;
- }
-
nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) *
sizeof(struct nx_sg);
/* we've hit the nx chip previously and we're updating again,
* so copy over the partial digest */
- if (NX_CPB_FDM(csbcpb) & NX_FDM_CONTINUATION) {
- memcpy(csbcpb->cpb.aes_xcbc.cv,
- csbcpb->cpb.aes_xcbc.out_cv_mac,
- AES_BLOCK_SIZE);
- }
+ memcpy(csbcpb->cpb.aes_xcbc.cv,
+ csbcpb->cpb.aes_xcbc.out_cv_mac, AES_BLOCK_SIZE);
- NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE;
if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) {
rc = -EINVAL;
goto out;
@@ -271,28 +228,24 @@ static int nx_xcbc_update(struct shash_desc *desc,
atomic_inc(&(nx_ctx->stats->aes_ops));
- /* everything after the first update is continuation */
- NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
-
total -= to_process;
- data += to_process - sctx->count;
- sctx->count = 0;
+ data += to_process;
in_sg = nx_ctx->in_sg;
- } while (leftover > AES_BLOCK_SIZE);
+ } while (total >= AES_BLOCK_SIZE);
- /* copy the leftover back into the state struct */
- memcpy(sctx->buffer, data, leftover);
- sctx->count = leftover;
+ rc = total;
+ memcpy(sctx->state, csbcpb->cpb.aes_xcbc.out_cv_mac, AES_BLOCK_SIZE);
out:
spin_unlock_irqrestore(&nx_ctx->lock, irq_flags);
return rc;
}
-static int nx_xcbc_final(struct shash_desc *desc, u8 *out)
+static int nx_xcbc_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int nbytes, u8 *out)
{
+ struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc->tfm);
struct xcbc_state *sctx = shash_desc_ctx(desc);
- struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
struct nx_sg *in_sg, *out_sg;
unsigned long irq_flags;
@@ -301,12 +254,10 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out)
spin_lock_irqsave(&nx_ctx->lock, irq_flags);
- if (NX_CPB_FDM(csbcpb) & NX_FDM_CONTINUATION) {
- /* we've hit the nx chip previously, now we're finalizing,
- * so copy over the partial digest */
- memcpy(csbcpb->cpb.aes_xcbc.cv,
- csbcpb->cpb.aes_xcbc.out_cv_mac, AES_BLOCK_SIZE);
- } else if (sctx->count == 0) {
+ if (nbytes) {
+ /* non-zero final, so copy over the partial digest */
+ memcpy(csbcpb->cpb.aes_xcbc.cv, sctx->state, AES_BLOCK_SIZE);
+ } else {
/*
* we've never seen an update, so this is a 0 byte op. The
* hardware cannot handle a 0 byte op, so just ECB to
@@ -320,11 +271,11 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out)
* this is not an intermediate operation */
NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE;
- len = sctx->count;
- in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)sctx->buffer,
- &len, nx_ctx->ap->sglen);
+ len = nbytes;
+ in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)src, &len,
+ nx_ctx->ap->sglen);
- if (len != sctx->count) {
+ if (len != nbytes) {
rc = -EINVAL;
goto out;
}
@@ -362,18 +313,19 @@ struct shash_alg nx_shash_aes_xcbc_alg = {
.digestsize = AES_BLOCK_SIZE,
.init = nx_xcbc_init,
.update = nx_xcbc_update,
- .final = nx_xcbc_final,
+ .finup = nx_xcbc_finup,
.setkey = nx_xcbc_set_key,
.descsize = sizeof(struct xcbc_state),
- .statesize = sizeof(struct xcbc_state),
+ .init_tfm = nx_crypto_ctx_aes_xcbc_init2,
+ .exit_tfm = nx_crypto_ctx_shash_exit,
.base = {
.cra_name = "xcbc(aes)",
.cra_driver_name = "xcbc-aes-nx",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINAL_NONZERO,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_module = THIS_MODULE,
.cra_ctxsize = sizeof(struct nx_crypto_ctx),
- .cra_init = nx_crypto_ctx_aes_xcbc_init2,
- .cra_exit = nx_crypto_ctx_exit,
}
};
diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c
index c3bebf0feabe..5b29dd026df2 100644
--- a/drivers/crypto/nx/nx-sha256.c
+++ b/drivers/crypto/nx/nx-sha256.c
@@ -9,9 +9,12 @@
#include <crypto/internal/hash.h>
#include <crypto/sha2.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <asm/vio.h>
-#include <asm/byteorder.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
#include "nx_csbcpb.h"
#include "nx.h"
@@ -19,12 +22,11 @@
struct sha256_state_be {
__be32 state[SHA256_DIGEST_SIZE / 4];
u64 count;
- u8 buf[SHA256_BLOCK_SIZE];
};
-static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm)
+static int nx_crypto_ctx_sha256_init(struct crypto_shash *tfm)
{
- struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm);
+ struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(tfm);
int err;
err = nx_crypto_ctx_sha_init(tfm);
@@ -40,11 +42,10 @@ static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm)
return 0;
}
-static int nx_sha256_init(struct shash_desc *desc) {
+static int nx_sha256_init(struct shash_desc *desc)
+{
struct sha256_state_be *sctx = shash_desc_ctx(desc);
- memset(sctx, 0, sizeof *sctx);
-
sctx->state[0] = __cpu_to_be32(SHA256_H0);
sctx->state[1] = __cpu_to_be32(SHA256_H1);
sctx->state[2] = __cpu_to_be32(SHA256_H2);
@@ -61,30 +62,18 @@ static int nx_sha256_init(struct shash_desc *desc) {
static int nx_sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
+ struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc->tfm);
struct sha256_state_be *sctx = shash_desc_ctx(desc);
- struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
+ u64 to_process, leftover, total = len;
struct nx_sg *out_sg;
- u64 to_process = 0, leftover, total;
unsigned long irq_flags;
int rc = 0;
int data_len;
u32 max_sg_len;
- u64 buf_len = (sctx->count % SHA256_BLOCK_SIZE);
spin_lock_irqsave(&nx_ctx->lock, irq_flags);
- /* 2 cases for total data len:
- * 1: < SHA256_BLOCK_SIZE: copy into state, return 0
- * 2: >= SHA256_BLOCK_SIZE: process X blocks, copy in leftover
- */
- total = (sctx->count % SHA256_BLOCK_SIZE) + len;
- if (total < SHA256_BLOCK_SIZE) {
- memcpy(sctx->buf + buf_len, data, len);
- sctx->count += len;
- goto out;
- }
-
memcpy(csbcpb->cpb.sha256.message_digest, sctx->state, SHA256_DIGEST_SIZE);
NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE;
NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
@@ -105,41 +94,17 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data,
}
do {
- int used_sgs = 0;
struct nx_sg *in_sg = nx_ctx->in_sg;
- if (buf_len) {
- data_len = buf_len;
- in_sg = nx_build_sg_list(in_sg,
- (u8 *) sctx->buf,
- &data_len,
- max_sg_len);
-
- if (data_len != buf_len) {
- rc = -EINVAL;
- goto out;
- }
- used_sgs = in_sg - nx_ctx->in_sg;
- }
+ to_process = total & ~(SHA256_BLOCK_SIZE - 1);
- /* to_process: SHA256_BLOCK_SIZE aligned chunk to be
- * processed in this iteration. This value is restricted
- * by sg list limits and number of sgs we already used
- * for leftover data. (see above)
- * In ideal case, we could allow NX_PAGE_SIZE * max_sg_len,
- * but because data may not be aligned, we need to account
- * for that too. */
- to_process = min_t(u64, total,
- (max_sg_len - 1 - used_sgs) * NX_PAGE_SIZE);
- to_process = to_process & ~(SHA256_BLOCK_SIZE - 1);
-
- data_len = to_process - buf_len;
+ data_len = to_process;
in_sg = nx_build_sg_list(in_sg, (u8 *) data,
&data_len, max_sg_len);
nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg);
- to_process = data_len + buf_len;
+ to_process = data_len;
leftover = total - to_process;
/*
@@ -162,26 +127,22 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data,
atomic_inc(&(nx_ctx->stats->sha256_ops));
total -= to_process;
- data += to_process - buf_len;
- buf_len = 0;
-
+ data += to_process;
+ sctx->count += to_process;
} while (leftover >= SHA256_BLOCK_SIZE);
- /* copy the leftover back into the state struct */
- if (leftover)
- memcpy(sctx->buf, data, leftover);
-
- sctx->count += len;
+ rc = leftover;
memcpy(sctx->state, csbcpb->cpb.sha256.message_digest, SHA256_DIGEST_SIZE);
out:
spin_unlock_irqrestore(&nx_ctx->lock, irq_flags);
return rc;
}
-static int nx_sha256_final(struct shash_desc *desc, u8 *out)
+static int nx_sha256_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int nbytes, u8 *out)
{
+ struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc->tfm);
struct sha256_state_be *sctx = shash_desc_ctx(desc);
- struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
struct nx_sg *in_sg, *out_sg;
unsigned long irq_flags;
@@ -197,25 +158,19 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out)
nx_ctx->ap->databytelen/NX_PAGE_SIZE);
/* final is represented by continuing the operation and indicating that
- * this is not an intermediate operation */
- if (sctx->count >= SHA256_BLOCK_SIZE) {
- /* we've hit the nx chip previously, now we're finalizing,
- * so copy over the partial digest */
- memcpy(csbcpb->cpb.sha256.input_partial_digest, sctx->state, SHA256_DIGEST_SIZE);
- NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE;
- NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
- } else {
- NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE;
- NX_CPB_FDM(csbcpb) &= ~NX_FDM_CONTINUATION;
- }
+ * this is not an intermediate operation
+ * copy over the partial digest */
+ memcpy(csbcpb->cpb.sha256.input_partial_digest, sctx->state, SHA256_DIGEST_SIZE);
+ NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE;
+ NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
+ sctx->count += nbytes;
csbcpb->cpb.sha256.message_bit_length = (u64) (sctx->count * 8);
- len = sctx->count & (SHA256_BLOCK_SIZE - 1);
- in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *) sctx->buf,
- &len, max_sg_len);
+ len = nbytes;
+ in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)src, &len, max_sg_len);
- if (len != (sctx->count & (SHA256_BLOCK_SIZE - 1))) {
+ if (len != nbytes) {
rc = -EINVAL;
goto out;
}
@@ -251,18 +206,34 @@ out:
static int nx_sha256_export(struct shash_desc *desc, void *out)
{
struct sha256_state_be *sctx = shash_desc_ctx(desc);
+ union {
+ u8 *u8;
+ u32 *u32;
+ u64 *u64;
+ } p = { .u8 = out };
+ int i;
- memcpy(out, sctx, sizeof(*sctx));
+ for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(*p.u32); i++)
+ put_unaligned(be32_to_cpu(sctx->state[i]), p.u32++);
+ put_unaligned(sctx->count, p.u64++);
return 0;
}
static int nx_sha256_import(struct shash_desc *desc, const void *in)
{
struct sha256_state_be *sctx = shash_desc_ctx(desc);
+ union {
+ const u8 *u8;
+ const u32 *u32;
+ const u64 *u64;
+ } p = { .u8 = in };
+ int i;
- memcpy(sctx, in, sizeof(*sctx));
+ for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(*p.u32); i++)
+ sctx->state[i] = cpu_to_be32(get_unaligned(p.u32++));
+ sctx->count = get_unaligned(p.u64++);
return 0;
}
@@ -270,19 +241,20 @@ struct shash_alg nx_shash_sha256_alg = {
.digestsize = SHA256_DIGEST_SIZE,
.init = nx_sha256_init,
.update = nx_sha256_update,
- .final = nx_sha256_final,
+ .finup = nx_sha256_finup,
.export = nx_sha256_export,
.import = nx_sha256_import,
+ .init_tfm = nx_crypto_ctx_sha256_init,
+ .exit_tfm = nx_crypto_ctx_shash_exit,
.descsize = sizeof(struct sha256_state_be),
.statesize = sizeof(struct sha256_state_be),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-nx",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
.cra_ctxsize = sizeof(struct nx_crypto_ctx),
- .cra_init = nx_crypto_ctx_sha256_init,
- .cra_exit = nx_crypto_ctx_exit,
}
};
diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c
index 1ffb40d2c324..f74776b7d7d7 100644
--- a/drivers/crypto/nx/nx-sha512.c
+++ b/drivers/crypto/nx/nx-sha512.c
@@ -9,8 +9,12 @@
#include <crypto/internal/hash.h>
#include <crypto/sha2.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <asm/vio.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
#include "nx_csbcpb.h"
#include "nx.h"
@@ -18,12 +22,11 @@
struct sha512_state_be {
__be64 state[SHA512_DIGEST_SIZE / 8];
u64 count[2];
- u8 buf[SHA512_BLOCK_SIZE];
};
-static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm)
+static int nx_crypto_ctx_sha512_init(struct crypto_shash *tfm)
{
- struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm);
+ struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(tfm);
int err;
err = nx_crypto_ctx_sha_init(tfm);
@@ -43,8 +46,6 @@ static int nx_sha512_init(struct shash_desc *desc)
{
struct sha512_state_be *sctx = shash_desc_ctx(desc);
- memset(sctx, 0, sizeof *sctx);
-
sctx->state[0] = __cpu_to_be64(SHA512_H0);
sctx->state[1] = __cpu_to_be64(SHA512_H1);
sctx->state[2] = __cpu_to_be64(SHA512_H2);
@@ -54,6 +55,7 @@ static int nx_sha512_init(struct shash_desc *desc)
sctx->state[6] = __cpu_to_be64(SHA512_H6);
sctx->state[7] = __cpu_to_be64(SHA512_H7);
sctx->count[0] = 0;
+ sctx->count[1] = 0;
return 0;
}
@@ -61,30 +63,18 @@ static int nx_sha512_init(struct shash_desc *desc)
static int nx_sha512_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
+ struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc->tfm);
struct sha512_state_be *sctx = shash_desc_ctx(desc);
- struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
+ u64 to_process, leftover, total = len;
struct nx_sg *out_sg;
- u64 to_process, leftover = 0, total;
unsigned long irq_flags;
int rc = 0;
int data_len;
u32 max_sg_len;
- u64 buf_len = (sctx->count[0] % SHA512_BLOCK_SIZE);
spin_lock_irqsave(&nx_ctx->lock, irq_flags);
- /* 2 cases for total data len:
- * 1: < SHA512_BLOCK_SIZE: copy into state, return 0
- * 2: >= SHA512_BLOCK_SIZE: process X blocks, copy in leftover
- */
- total = (sctx->count[0] % SHA512_BLOCK_SIZE) + len;
- if (total < SHA512_BLOCK_SIZE) {
- memcpy(sctx->buf + buf_len, data, len);
- sctx->count[0] += len;
- goto out;
- }
-
memcpy(csbcpb->cpb.sha512.message_digest, sctx->state, SHA512_DIGEST_SIZE);
NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE;
NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
@@ -105,45 +95,17 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data,
}
do {
- int used_sgs = 0;
struct nx_sg *in_sg = nx_ctx->in_sg;
- if (buf_len) {
- data_len = buf_len;
- in_sg = nx_build_sg_list(in_sg,
- (u8 *) sctx->buf,
- &data_len, max_sg_len);
-
- if (data_len != buf_len) {
- rc = -EINVAL;
- goto out;
- }
- used_sgs = in_sg - nx_ctx->in_sg;
- }
+ to_process = total & ~(SHA512_BLOCK_SIZE - 1);
- /* to_process: SHA512_BLOCK_SIZE aligned chunk to be
- * processed in this iteration. This value is restricted
- * by sg list limits and number of sgs we already used
- * for leftover data. (see above)
- * In ideal case, we could allow NX_PAGE_SIZE * max_sg_len,
- * but because data may not be aligned, we need to account
- * for that too. */
- to_process = min_t(u64, total,
- (max_sg_len - 1 - used_sgs) * NX_PAGE_SIZE);
- to_process = to_process & ~(SHA512_BLOCK_SIZE - 1);
-
- data_len = to_process - buf_len;
+ data_len = to_process;
in_sg = nx_build_sg_list(in_sg, (u8 *) data,
&data_len, max_sg_len);
nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg);
- if (data_len != (to_process - buf_len)) {
- rc = -EINVAL;
- goto out;
- }
-
- to_process = data_len + buf_len;
+ to_process = data_len;
leftover = total - to_process;
/*
@@ -166,30 +128,29 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data,
atomic_inc(&(nx_ctx->stats->sha512_ops));
total -= to_process;
- data += to_process - buf_len;
- buf_len = 0;
-
+ data += to_process;
+ sctx->count[0] += to_process;
+ if (sctx->count[0] < to_process)
+ sctx->count[1]++;
} while (leftover >= SHA512_BLOCK_SIZE);
- /* copy the leftover back into the state struct */
- if (leftover)
- memcpy(sctx->buf, data, leftover);
- sctx->count[0] += len;
+ rc = leftover;
memcpy(sctx->state, csbcpb->cpb.sha512.message_digest, SHA512_DIGEST_SIZE);
out:
spin_unlock_irqrestore(&nx_ctx->lock, irq_flags);
return rc;
}
-static int nx_sha512_final(struct shash_desc *desc, u8 *out)
+static int nx_sha512_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int nbytes, u8 *out)
{
struct sha512_state_be *sctx = shash_desc_ctx(desc);
- struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
+ struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc->tfm);
struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
struct nx_sg *in_sg, *out_sg;
u32 max_sg_len;
- u64 count0;
unsigned long irq_flags;
+ u64 count0, count1;
int rc = 0;
int len;
@@ -201,30 +162,23 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out)
nx_ctx->ap->databytelen/NX_PAGE_SIZE);
/* final is represented by continuing the operation and indicating that
- * this is not an intermediate operation */
- if (sctx->count[0] >= SHA512_BLOCK_SIZE) {
- /* we've hit the nx chip previously, now we're finalizing,
- * so copy over the partial digest */
- memcpy(csbcpb->cpb.sha512.input_partial_digest, sctx->state,
- SHA512_DIGEST_SIZE);
- NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE;
- NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
- } else {
- NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE;
- NX_CPB_FDM(csbcpb) &= ~NX_FDM_CONTINUATION;
- }
-
+ * this is not an intermediate operation
+ * copy over the partial digest */
+ memcpy(csbcpb->cpb.sha512.input_partial_digest, sctx->state, SHA512_DIGEST_SIZE);
NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE;
+ NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
- count0 = sctx->count[0] * 8;
+ count0 = sctx->count[0] + nbytes;
+ count1 = sctx->count[1];
- csbcpb->cpb.sha512.message_bit_length_lo = count0;
+ csbcpb->cpb.sha512.message_bit_length_lo = count0 << 3;
+ csbcpb->cpb.sha512.message_bit_length_hi = (count1 << 3) |
+ (count0 >> 61);
- len = sctx->count[0] & (SHA512_BLOCK_SIZE - 1);
- in_sg = nx_build_sg_list(nx_ctx->in_sg, sctx->buf, &len,
- max_sg_len);
+ len = nbytes;
+ in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)src, &len, max_sg_len);
- if (len != (sctx->count[0] & (SHA512_BLOCK_SIZE - 1))) {
+ if (len != nbytes) {
rc = -EINVAL;
goto out;
}
@@ -246,7 +200,7 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out)
goto out;
atomic_inc(&(nx_ctx->stats->sha512_ops));
- atomic64_add(sctx->count[0], &(nx_ctx->stats->sha512_bytes));
+ atomic64_add(count0, &(nx_ctx->stats->sha512_bytes));
memcpy(out, csbcpb->cpb.sha512.message_digest, SHA512_DIGEST_SIZE);
out:
@@ -257,18 +211,34 @@ out:
static int nx_sha512_export(struct shash_desc *desc, void *out)
{
struct sha512_state_be *sctx = shash_desc_ctx(desc);
+ union {
+ u8 *u8;
+ u64 *u64;
+ } p = { .u8 = out };
+ int i;
- memcpy(out, sctx, sizeof(*sctx));
+ for (i = 0; i < SHA512_DIGEST_SIZE / sizeof(*p.u64); i++)
+ put_unaligned(be64_to_cpu(sctx->state[i]), p.u64++);
+ put_unaligned(sctx->count[0], p.u64++);
+ put_unaligned(sctx->count[1], p.u64++);
return 0;
}
static int nx_sha512_import(struct shash_desc *desc, const void *in)
{
struct sha512_state_be *sctx = shash_desc_ctx(desc);
+ union {
+ const u8 *u8;
+ const u64 *u64;
+ } p = { .u8 = in };
+ int i;
- memcpy(sctx, in, sizeof(*sctx));
+ for (i = 0; i < SHA512_DIGEST_SIZE / sizeof(*p.u64); i++)
+ sctx->state[i] = cpu_to_be64(get_unaligned(p.u64++));
+ sctx->count[0] = get_unaligned(p.u64++);
+ sctx->count[1] = get_unaligned(p.u64++);
return 0;
}
@@ -276,19 +246,20 @@ struct shash_alg nx_shash_sha512_alg = {
.digestsize = SHA512_DIGEST_SIZE,
.init = nx_sha512_init,
.update = nx_sha512_update,
- .final = nx_sha512_final,
+ .finup = nx_sha512_finup,
.export = nx_sha512_export,
.import = nx_sha512_import,
+ .init_tfm = nx_crypto_ctx_sha512_init,
+ .exit_tfm = nx_crypto_ctx_shash_exit,
.descsize = sizeof(struct sha512_state_be),
.statesize = sizeof(struct sha512_state_be),
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-nx",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
.cra_ctxsize = sizeof(struct nx_crypto_ctx),
- .cra_init = nx_crypto_ctx_sha512_init,
- .cra_exit = nx_crypto_ctx_exit,
}
};
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index a3b979193d9b..78135fb13f5c 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -7,11 +7,11 @@
* Author: Kent Yoder <yoder1@us.ibm.com>
*/
+#include <crypto/aes.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/hash.h>
-#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/sha2.h>
-#include <crypto/algapi.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -124,8 +124,6 @@ struct nx_sg *nx_build_sg_list(struct nx_sg *sg_head,
}
if ((sg - sg_head) == sgmax) {
- pr_err("nx: scatter/gather list overflow, pid: %d\n",
- current->pid);
sg++;
break;
}
@@ -702,14 +700,14 @@ int nx_crypto_ctx_aes_ecb_init(struct crypto_skcipher *tfm)
NX_MODE_AES_ECB);
}
-int nx_crypto_ctx_sha_init(struct crypto_tfm *tfm)
+int nx_crypto_ctx_sha_init(struct crypto_shash *tfm)
{
- return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_SHA, NX_MODE_SHA);
+ return nx_crypto_ctx_init(crypto_shash_ctx(tfm), NX_FC_SHA, NX_MODE_SHA);
}
-int nx_crypto_ctx_aes_xcbc_init(struct crypto_tfm *tfm)
+int nx_crypto_ctx_aes_xcbc_init(struct crypto_shash *tfm)
{
- return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES,
+ return nx_crypto_ctx_init(crypto_shash_ctx(tfm), NX_FC_AES,
NX_MODE_AES_XCBC_MAC);
}
@@ -744,6 +742,11 @@ void nx_crypto_ctx_aead_exit(struct crypto_aead *tfm)
kfree_sensitive(nx_ctx->kmem);
}
+void nx_crypto_ctx_shash_exit(struct crypto_shash *tfm)
+{
+ nx_crypto_ctx_exit(crypto_shash_ctx(tfm));
+}
+
static int nx_probe(struct vio_dev *viodev, const struct vio_device_id *id)
{
dev_dbg(&viodev->dev, "driver probed: %s resource id: 0x%x\n",
diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h
index e1b4b6927bec..36974f08490a 100644
--- a/drivers/crypto/nx/nx.h
+++ b/drivers/crypto/nx/nx.h
@@ -3,7 +3,11 @@
#ifndef __NX_H__
#define __NX_H__
+#include <asm/vio.h>
#include <crypto/ctr.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
#define NX_NAME "nx-crypto"
#define NX_STRING "IBM Power7+ Nest Accelerator Crypto Driver"
@@ -139,19 +143,20 @@ struct nx_crypto_ctx {
} priv;
};
-struct crypto_aead;
+struct scatterlist;
/* prototypes */
int nx_crypto_ctx_aes_ccm_init(struct crypto_aead *tfm);
int nx_crypto_ctx_aes_gcm_init(struct crypto_aead *tfm);
-int nx_crypto_ctx_aes_xcbc_init(struct crypto_tfm *tfm);
+int nx_crypto_ctx_aes_xcbc_init(struct crypto_shash *tfm);
int nx_crypto_ctx_aes_ctr_init(struct crypto_skcipher *tfm);
int nx_crypto_ctx_aes_cbc_init(struct crypto_skcipher *tfm);
int nx_crypto_ctx_aes_ecb_init(struct crypto_skcipher *tfm);
-int nx_crypto_ctx_sha_init(struct crypto_tfm *tfm);
+int nx_crypto_ctx_sha_init(struct crypto_shash *tfm);
void nx_crypto_ctx_exit(struct crypto_tfm *tfm);
void nx_crypto_ctx_skcipher_exit(struct crypto_skcipher *tfm);
void nx_crypto_ctx_aead_exit(struct crypto_aead *tfm);
+void nx_crypto_ctx_shash_exit(struct crypto_shash *tfm);
void nx_ctx_init(struct nx_crypto_ctx *nx_ctx, unsigned int function);
int nx_hcall_sync(struct nx_crypto_ctx *ctx, struct vio_pfo_op *op,
u32 may_sleep);
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 551dd32a8db0..1ecf5f6ac04e 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -1086,10 +1086,7 @@ static struct attribute *omap_aes_attrs[] = {
&dev_attr_fallback.attr,
NULL,
};
-
-static const struct attribute_group omap_aes_attr_group = {
- .attrs = omap_aes_attrs,
-};
+ATTRIBUTE_GROUPS(omap_aes);
static int omap_aes_probe(struct platform_device *pdev)
{
@@ -1215,12 +1212,6 @@ static int omap_aes_probe(struct platform_device *pdev)
}
}
- err = sysfs_create_group(&dev->kobj, &omap_aes_attr_group);
- if (err) {
- dev_err(dev, "could not create sysfs device attrs\n");
- goto err_aead_algs;
- }
-
return 0;
err_aead_algs:
for (i = dd->pdata->aead_algs_info->registered - 1; i >= 0; i--) {
@@ -1277,8 +1268,6 @@ static void omap_aes_remove(struct platform_device *pdev)
tasklet_kill(&dd->done_task);
omap_aes_dma_cleanup(dd);
pm_runtime_disable(dd->dev);
-
- sysfs_remove_group(&dd->dev->kobj, &omap_aes_attr_group);
}
#ifdef CONFIG_PM_SLEEP
@@ -1304,6 +1293,7 @@ static struct platform_driver omap_aes_driver = {
.name = "omap-aes",
.pm = &omap_aes_pm_ops,
.of_match_table = omap_aes_of_match,
+ .dev_groups = omap_aes_groups,
},
};
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 7021481bf027..56f192cb976d 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -2039,10 +2039,7 @@ static struct attribute *omap_sham_attrs[] = {
&dev_attr_fallback.attr,
NULL,
};
-
-static const struct attribute_group omap_sham_attr_group = {
- .attrs = omap_sham_attrs,
-};
+ATTRIBUTE_GROUPS(omap_sham);
static int omap_sham_probe(struct platform_device *pdev)
{
@@ -2158,12 +2155,6 @@ static int omap_sham_probe(struct platform_device *pdev)
}
}
- err = sysfs_create_group(&dev->kobj, &omap_sham_attr_group);
- if (err) {
- dev_err(dev, "could not create sysfs device attrs\n");
- goto err_algs;
- }
-
return 0;
err_algs:
@@ -2210,8 +2201,6 @@ static void omap_sham_remove(struct platform_device *pdev)
if (!dd->polling_mode)
dma_release_channel(dd->dma_lch);
-
- sysfs_remove_group(&dd->dev->kobj, &omap_sham_attr_group);
}
static struct platform_driver omap_sham_driver = {
@@ -2220,6 +2209,7 @@ static struct platform_driver omap_sham_driver = {
.driver = {
.name = "omap-sham",
.of_match_table = omap_sham_of_match,
+ .dev_groups = omap_sham_groups,
},
};
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index db9e84c0c9fb..329f60ad422e 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -7,59 +7,89 @@
* Copyright (c) 2006 Michal Ludvig <michal@logix.cz>
*/
+#include <asm/cpu_device_id.h>
#include <crypto/internal/hash.h>
#include <crypto/padlock.h>
#include <crypto/sha1.h>
#include <crypto/sha2.h>
+#include <linux/cpufeature.h>
#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
#include <linux/kernel.h>
-#include <linux/scatterlist.h>
-#include <asm/cpu_device_id.h>
-#include <asm/fpu/api.h>
+#include <linux/module.h>
-struct padlock_sha_desc {
- struct shash_desc fallback;
-};
+#define PADLOCK_SHA_DESCSIZE (128 + ((PADLOCK_ALIGNMENT - 1) & \
+ ~(CRYPTO_MINALIGN - 1)))
struct padlock_sha_ctx {
- struct crypto_shash *fallback;
+ struct crypto_ahash *fallback;
};
-static int padlock_sha_init(struct shash_desc *desc)
+static inline void *padlock_shash_desc_ctx(struct shash_desc *desc)
{
- struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
- struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ return PTR_ALIGN(shash_desc_ctx(desc), PADLOCK_ALIGNMENT);
+}
+
+static int padlock_sha1_init(struct shash_desc *desc)
+{
+ struct sha1_state *sctx = padlock_shash_desc_ctx(desc);
+
+ *sctx = (struct sha1_state){
+ .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+ };
+
+ return 0;
+}
+
+static int padlock_sha256_init(struct shash_desc *desc)
+{
+ struct crypto_sha256_state *sctx = padlock_shash_desc_ctx(desc);
- dctx->fallback.tfm = ctx->fallback;
- return crypto_shash_init(&dctx->fallback);
+ sha256_block_init(sctx);
+ return 0;
}
static int padlock_sha_update(struct shash_desc *desc,
const u8 *data, unsigned int length)
{
- struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
+ u8 *state = padlock_shash_desc_ctx(desc);
+ struct crypto_shash *tfm = desc->tfm;
+ int err, remain;
+
+ remain = length - round_down(length, crypto_shash_blocksize(tfm));
+ {
+ struct padlock_sha_ctx *ctx = crypto_shash_ctx(tfm);
+ HASH_REQUEST_ON_STACK(req, ctx->fallback);
+
+ ahash_request_set_callback(req, 0, NULL, NULL);
+ ahash_request_set_virt(req, data, NULL, length - remain);
+ err = crypto_ahash_import_core(req, state) ?:
+ crypto_ahash_update(req) ?:
+ crypto_ahash_export_core(req, state);
+ HASH_REQUEST_ZERO(req);
+ }
- return crypto_shash_update(&dctx->fallback, data, length);
+ return err ?: remain;
}
static int padlock_sha_export(struct shash_desc *desc, void *out)
{
- struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
-
- return crypto_shash_export(&dctx->fallback, out);
+ memcpy(out, padlock_shash_desc_ctx(desc),
+ crypto_shash_coresize(desc->tfm));
+ return 0;
}
static int padlock_sha_import(struct shash_desc *desc, const void *in)
{
- struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
- struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ unsigned int bs = crypto_shash_blocksize(desc->tfm);
+ unsigned int ss = crypto_shash_coresize(desc->tfm);
+ u64 *state = padlock_shash_desc_ctx(desc);
+
+ memcpy(state, in, ss);
+
+ /* Stop evil imports from generating a fault. */
+ state[ss / 8 - 1] &= ~(bs - 1);
- dctx->fallback.tfm = ctx->fallback;
- return crypto_shash_import(&dctx->fallback, in);
+ return 0;
}
static inline void padlock_output_block(uint32_t *src,
@@ -69,65 +99,38 @@ static inline void padlock_output_block(uint32_t *src,
*dst++ = swab32(*src++);
}
+static int padlock_sha_finup(struct shash_desc *desc, const u8 *in,
+ unsigned int count, u8 *out)
+{
+ struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ HASH_REQUEST_ON_STACK(req, ctx->fallback);
+
+ ahash_request_set_callback(req, 0, NULL, NULL);
+ ahash_request_set_virt(req, in, out, count);
+ return crypto_ahash_import_core(req, padlock_shash_desc_ctx(desc)) ?:
+ crypto_ahash_finup(req);
+}
+
static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
unsigned int count, u8 *out)
{
/* We can't store directly to *out as it may be unaligned. */
/* BTW Don't reduce the buffer size below 128 Bytes!
* PadLock microcode needs it that big. */
- char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
- ((aligned(STACK_ALIGN)));
- char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
- struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
- struct sha1_state state;
- unsigned int space;
- unsigned int leftover;
- int err;
-
- err = crypto_shash_export(&dctx->fallback, &state);
- if (err)
- goto out;
+ struct sha1_state *state = padlock_shash_desc_ctx(desc);
+ u64 start = state->count;
- if (state.count + count > ULONG_MAX)
- return crypto_shash_finup(&dctx->fallback, in, count, out);
-
- leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
- space = SHA1_BLOCK_SIZE - leftover;
- if (space) {
- if (count > space) {
- err = crypto_shash_update(&dctx->fallback, in, space) ?:
- crypto_shash_export(&dctx->fallback, &state);
- if (err)
- goto out;
- count -= space;
- in += space;
- } else {
- memcpy(state.buffer + leftover, in, count);
- in = state.buffer;
- count += leftover;
- state.count &= ~(SHA1_BLOCK_SIZE - 1);
- }
- }
-
- memcpy(result, &state.state, SHA1_DIGEST_SIZE);
+ if (start + count > ULONG_MAX)
+ return padlock_sha_finup(desc, in, count, out);
asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
: \
- : "c"((unsigned long)state.count + count), \
- "a"((unsigned long)state.count), \
- "S"(in), "D"(result));
-
- padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
+ : "c"((unsigned long)start + count), \
+ "a"((unsigned long)start), \
+ "S"(in), "D"(state));
-out:
- return err;
-}
-
-static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
-{
- const u8 *buf = (void *)desc;
-
- return padlock_sha1_finup(desc, buf, 0, out);
+ padlock_output_block(state->state, (uint32_t *)out, 5);
+ return 0;
}
static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
@@ -136,78 +139,46 @@ static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
/* We can't store directly to *out as it may be unaligned. */
/* BTW Don't reduce the buffer size below 128 Bytes!
* PadLock microcode needs it that big. */
- char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
- ((aligned(STACK_ALIGN)));
- char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
- struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
- struct sha256_state state;
- unsigned int space;
- unsigned int leftover;
- int err;
-
- err = crypto_shash_export(&dctx->fallback, &state);
- if (err)
- goto out;
+ struct sha256_state *state = padlock_shash_desc_ctx(desc);
+ u64 start = state->count;
- if (state.count + count > ULONG_MAX)
- return crypto_shash_finup(&dctx->fallback, in, count, out);
-
- leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
- space = SHA256_BLOCK_SIZE - leftover;
- if (space) {
- if (count > space) {
- err = crypto_shash_update(&dctx->fallback, in, space) ?:
- crypto_shash_export(&dctx->fallback, &state);
- if (err)
- goto out;
- count -= space;
- in += space;
- } else {
- memcpy(state.buf + leftover, in, count);
- in = state.buf;
- count += leftover;
- state.count &= ~(SHA1_BLOCK_SIZE - 1);
- }
- }
-
- memcpy(result, &state.state, SHA256_DIGEST_SIZE);
+ if (start + count > ULONG_MAX)
+ return padlock_sha_finup(desc, in, count, out);
asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
: \
- : "c"((unsigned long)state.count + count), \
- "a"((unsigned long)state.count), \
- "S"(in), "D"(result));
+ : "c"((unsigned long)start + count), \
+ "a"((unsigned long)start), \
+ "S"(in), "D"(state));
- padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
-
-out:
- return err;
-}
-
-static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
-{
- const u8 *buf = (void *)desc;
-
- return padlock_sha256_finup(desc, buf, 0, out);
+ padlock_output_block(state->state, (uint32_t *)out, 8);
+ return 0;
}
static int padlock_init_tfm(struct crypto_shash *hash)
{
const char *fallback_driver_name = crypto_shash_alg_name(hash);
struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
- struct crypto_shash *fallback_tfm;
+ struct crypto_ahash *fallback_tfm;
/* Allocate a fallback and abort if it failed. */
- fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ fallback_tfm = crypto_alloc_ahash(fallback_driver_name, 0,
+ CRYPTO_ALG_NEED_FALLBACK |
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(fallback_tfm)) {
printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
fallback_driver_name);
return PTR_ERR(fallback_tfm);
}
+ if (crypto_shash_statesize(hash) !=
+ crypto_ahash_statesize(fallback_tfm)) {
+ crypto_free_ahash(fallback_tfm);
+ return -EINVAL;
+ }
+
ctx->fallback = fallback_tfm;
- hash->descsize += crypto_shash_descsize(fallback_tfm);
+
return 0;
}
@@ -215,26 +186,27 @@ static void padlock_exit_tfm(struct crypto_shash *hash)
{
struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
- crypto_free_shash(ctx->fallback);
+ crypto_free_ahash(ctx->fallback);
}
static struct shash_alg sha1_alg = {
.digestsize = SHA1_DIGEST_SIZE,
- .init = padlock_sha_init,
+ .init = padlock_sha1_init,
.update = padlock_sha_update,
.finup = padlock_sha1_finup,
- .final = padlock_sha1_final,
.export = padlock_sha_export,
.import = padlock_sha_import,
.init_tfm = padlock_init_tfm,
.exit_tfm = padlock_exit_tfm,
- .descsize = sizeof(struct padlock_sha_desc),
- .statesize = sizeof(struct sha1_state),
+ .descsize = PADLOCK_SHA_DESCSIZE,
+ .statesize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-padlock",
.cra_priority = PADLOCK_CRA_PRIORITY,
- .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+ CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct padlock_sha_ctx),
.cra_module = THIS_MODULE,
@@ -243,21 +215,22 @@ static struct shash_alg sha1_alg = {
static struct shash_alg sha256_alg = {
.digestsize = SHA256_DIGEST_SIZE,
- .init = padlock_sha_init,
+ .init = padlock_sha256_init,
.update = padlock_sha_update,
.finup = padlock_sha256_finup,
- .final = padlock_sha256_final,
+ .init_tfm = padlock_init_tfm,
.export = padlock_sha_export,
.import = padlock_sha_import,
- .init_tfm = padlock_init_tfm,
.exit_tfm = padlock_exit_tfm,
- .descsize = sizeof(struct padlock_sha_desc),
- .statesize = sizeof(struct sha256_state),
+ .descsize = PADLOCK_SHA_DESCSIZE,
+ .statesize = sizeof(struct crypto_sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-padlock",
.cra_priority = PADLOCK_CRA_PRIORITY,
- .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+ CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct padlock_sha_ctx),
.cra_module = THIS_MODULE,
@@ -266,207 +239,58 @@ static struct shash_alg sha256_alg = {
/* Add two shash_alg instance for hardware-implemented *
* multiple-parts hash supported by VIA Nano Processor.*/
-static int padlock_sha1_init_nano(struct shash_desc *desc)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- *sctx = (struct sha1_state){
- .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
- };
-
- return 0;
-}
static int padlock_sha1_update_nano(struct shash_desc *desc,
- const u8 *data, unsigned int len)
+ const u8 *src, unsigned int len)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial, done;
- const u8 *src;
/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
- u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
- ((aligned(STACK_ALIGN)));
- u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
-
- partial = sctx->count & 0x3f;
- sctx->count += len;
- done = 0;
- src = data;
- memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
-
- if ((partial + len) >= SHA1_BLOCK_SIZE) {
-
- /* Append the bytes in state's buffer to a block to handle */
- if (partial) {
- done = -partial;
- memcpy(sctx->buffer + partial, data,
- done + SHA1_BLOCK_SIZE);
- src = sctx->buffer;
- asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
- : "+S"(src), "+D"(dst) \
- : "a"((long)-1), "c"((unsigned long)1));
- done += SHA1_BLOCK_SIZE;
- src = data + done;
- }
-
- /* Process the left bytes from the input data */
- if (len - done >= SHA1_BLOCK_SIZE) {
- asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
- : "+S"(src), "+D"(dst)
- : "a"((long)-1),
- "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
- done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
- src = data + done;
- }
- partial = 0;
- }
- memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
- memcpy(sctx->buffer + partial, src, len - done);
-
- return 0;
-}
-
-static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
-{
- struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
- unsigned int partial, padlen;
- __be64 bits;
- static const u8 padding[64] = { 0x80, };
-
- bits = cpu_to_be64(state->count << 3);
-
- /* Pad out to 56 mod 64 */
- partial = state->count & 0x3f;
- padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
- padlock_sha1_update_nano(desc, padding, padlen);
-
- /* Append length field bytes */
- padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
-
- /* Swap to output */
- padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
-
- return 0;
-}
-
-static int padlock_sha256_init_nano(struct shash_desc *desc)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- *sctx = (struct sha256_state){
- .state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
- SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
- };
-
- return 0;
+ struct sha1_state *state = padlock_shash_desc_ctx(desc);
+ int blocks = len / SHA1_BLOCK_SIZE;
+
+ len -= blocks * SHA1_BLOCK_SIZE;
+ state->count += blocks * SHA1_BLOCK_SIZE;
+
+ /* Process the left bytes from the input data */
+ asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
+ : "+S"(src), "+D"(state)
+ : "a"((long)-1),
+ "c"((unsigned long)blocks));
+ return len;
}
-static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
+static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- unsigned int partial, done;
- const u8 *src;
/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
- u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
- ((aligned(STACK_ALIGN)));
- u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
-
- partial = sctx->count & 0x3f;
- sctx->count += len;
- done = 0;
- src = data;
- memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
-
- if ((partial + len) >= SHA256_BLOCK_SIZE) {
-
- /* Append the bytes in state's buffer to a block to handle */
- if (partial) {
- done = -partial;
- memcpy(sctx->buf + partial, data,
- done + SHA256_BLOCK_SIZE);
- src = sctx->buf;
- asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
- : "+S"(src), "+D"(dst)
- : "a"((long)-1), "c"((unsigned long)1));
- done += SHA256_BLOCK_SIZE;
- src = data + done;
- }
-
- /* Process the left bytes from input data*/
- if (len - done >= SHA256_BLOCK_SIZE) {
- asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
- : "+S"(src), "+D"(dst)
- : "a"((long)-1),
- "c"((unsigned long)((len - done) / 64)));
- done += ((len - done) - (len - done) % 64);
- src = data + done;
- }
- partial = 0;
- }
- memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
- memcpy(sctx->buf + partial, src, len - done);
-
- return 0;
-}
-
-static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
-{
- struct sha256_state *state =
- (struct sha256_state *)shash_desc_ctx(desc);
- unsigned int partial, padlen;
- __be64 bits;
- static const u8 padding[64] = { 0x80, };
-
- bits = cpu_to_be64(state->count << 3);
-
- /* Pad out to 56 mod 64 */
- partial = state->count & 0x3f;
- padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
- padlock_sha256_update_nano(desc, padding, padlen);
-
- /* Append length field bytes */
- padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
-
- /* Swap to output */
- padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
-
- return 0;
-}
-
-static int padlock_sha_export_nano(struct shash_desc *desc,
- void *out)
-{
- int statesize = crypto_shash_statesize(desc->tfm);
- void *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, statesize);
- return 0;
-}
-
-static int padlock_sha_import_nano(struct shash_desc *desc,
- const void *in)
-{
- int statesize = crypto_shash_statesize(desc->tfm);
- void *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, statesize);
- return 0;
+ struct crypto_sha256_state *state = padlock_shash_desc_ctx(desc);
+ int blocks = len / SHA256_BLOCK_SIZE;
+
+ len -= blocks * SHA256_BLOCK_SIZE;
+ state->count += blocks * SHA256_BLOCK_SIZE;
+
+ /* Process the left bytes from input data*/
+ asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
+ : "+S"(src), "+D"(state)
+ : "a"((long)-1),
+ "c"((unsigned long)blocks));
+ return len;
}
static struct shash_alg sha1_alg_nano = {
.digestsize = SHA1_DIGEST_SIZE,
- .init = padlock_sha1_init_nano,
+ .init = padlock_sha1_init,
.update = padlock_sha1_update_nano,
- .final = padlock_sha1_final_nano,
- .export = padlock_sha_export_nano,
- .import = padlock_sha_import_nano,
- .descsize = sizeof(struct sha1_state),
- .statesize = sizeof(struct sha1_state),
+ .finup = padlock_sha1_finup,
+ .export = padlock_sha_export,
+ .import = padlock_sha_import,
+ .descsize = PADLOCK_SHA_DESCSIZE,
+ .statesize = SHA1_STATE_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-padlock-nano",
.cra_priority = PADLOCK_CRA_PRIORITY,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -474,17 +298,19 @@ static struct shash_alg sha1_alg_nano = {
static struct shash_alg sha256_alg_nano = {
.digestsize = SHA256_DIGEST_SIZE,
- .init = padlock_sha256_init_nano,
+ .init = padlock_sha256_init,
.update = padlock_sha256_update_nano,
- .final = padlock_sha256_final_nano,
- .export = padlock_sha_export_nano,
- .import = padlock_sha_import_nano,
- .descsize = sizeof(struct sha256_state),
- .statesize = sizeof(struct sha256_state),
+ .finup = padlock_sha256_finup,
+ .export = padlock_sha_export,
+ .import = padlock_sha_import,
+ .descsize = PADLOCK_SHA_DESCSIZE,
+ .statesize = sizeof(struct crypto_sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-padlock-nano",
.cra_priority = PADLOCK_CRA_PRIORITY,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
+ CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 69d6019d8abc..d6928ebe9526 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -52,12 +52,11 @@ static int rk_ahash_digest_fb(struct ahash_request *areq)
algt->stat_fb++;
ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
- rctx->fallback_req.base.flags = areq->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
-
- rctx->fallback_req.nbytes = areq->nbytes;
- rctx->fallback_req.src = areq->src;
- rctx->fallback_req.result = areq->result;
+ ahash_request_set_callback(&rctx->fallback_req,
+ areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ areq->base.complete, areq->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, areq->src, areq->result,
+ areq->nbytes);
return crypto_ahash_digest(&rctx->fallback_req);
}
@@ -124,8 +123,9 @@ static int rk_ahash_init(struct ahash_request *req)
struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
return crypto_ahash_init(&rctx->fallback_req);
}
@@ -137,10 +137,10 @@ static int rk_ahash_update(struct ahash_request *req)
struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.nbytes = req->nbytes;
- rctx->fallback_req.src = req->src;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, req->src, NULL, req->nbytes);
return crypto_ahash_update(&rctx->fallback_req);
}
@@ -152,9 +152,10 @@ static int rk_ahash_final(struct ahash_request *req)
struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.result = req->result;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, NULL, req->result, 0);
return crypto_ahash_final(&rctx->fallback_req);
}
@@ -166,12 +167,11 @@ static int rk_ahash_finup(struct ahash_request *req)
struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
-
- rctx->fallback_req.nbytes = req->nbytes;
- rctx->fallback_req.src = req->src;
- rctx->fallback_req.result = req->result;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, req->src, req->result,
+ req->nbytes);
return crypto_ahash_finup(&rctx->fallback_req);
}
@@ -183,8 +183,9 @@ static int rk_ahash_import(struct ahash_request *req, const void *in)
struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
return crypto_ahash_import(&rctx->fallback_req, in);
}
@@ -196,8 +197,9 @@ static int rk_ahash_export(struct ahash_request *req, void *out)
struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
return crypto_ahash_export(&rctx->fallback_req, out);
}
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index b4c3c14dafd5..b829c84f60f2 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -9,11 +9,17 @@
//
// Hash part based on omap-sham.c driver.
+#include <crypto/aes.h>
+#include <crypto/ctr.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/md5.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
#include <linux/clk.h>
-#include <linux/crypto.h>
#include <linux/dma-mapping.h>
#include <linux/err.h>
-#include <linux/errno.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
@@ -22,17 +28,9 @@
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/scatterlist.h>
-
-#include <crypto/ctr.h>
-#include <crypto/aes.h>
-#include <crypto/algapi.h>
-#include <crypto/scatterwalk.h>
-
-#include <crypto/hash.h>
-#include <crypto/md5.h>
-#include <crypto/sha1.h>
-#include <crypto/sha2.h>
-#include <crypto/internal/hash.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
#define _SBF(s, v) ((v) << (s))
diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index 091612b066f1..fdc0b2486069 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -1415,22 +1415,13 @@ static int sa_sha_run(struct ahash_request *req)
(auth_len >= SA_UNSAFE_DATA_SZ_MIN &&
auth_len <= SA_UNSAFE_DATA_SZ_MAX)) {
struct ahash_request *subreq = &rctx->fallback_req;
- int ret = 0;
+ int ret;
ahash_request_set_tfm(subreq, ctx->fallback.ahash);
- subreq->base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
-
- crypto_ahash_init(subreq);
-
- subreq->nbytes = auth_len;
- subreq->src = req->src;
- subreq->result = req->result;
-
- ret |= crypto_ahash_update(subreq);
-
- subreq->nbytes = 0;
+ ahash_request_set_callback(subreq, req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+ ahash_request_set_crypt(subreq, req->src, req->result, auth_len);
- ret |= crypto_ahash_final(subreq);
+ ret = crypto_ahash_digest(subreq);
return ret;
}
@@ -1502,8 +1493,7 @@ static int sa_sha_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
return ret;
if (alg_base) {
- ctx->shash = crypto_alloc_shash(alg_base, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ ctx->shash = crypto_alloc_shash(alg_base, 0, 0);
if (IS_ERR(ctx->shash)) {
dev_err(sa_k3_dev, "base driver %s couldn't be loaded\n",
alg_base);
@@ -1511,8 +1501,7 @@ static int sa_sha_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
}
/* for fallback */
ctx->fallback.ahash =
- crypto_alloc_ahash(alg_base, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ crypto_alloc_ahash(alg_base, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(ctx->fallback.ahash)) {
dev_err(ctx->dev_data->dev,
"Could not load fallback driver\n");
@@ -1546,54 +1535,38 @@ static int sa_sha_init(struct ahash_request *req)
crypto_ahash_digestsize(tfm), rctx);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback.ahash);
- rctx->fallback_req.base.flags =
- req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req, req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+ ahash_request_set_crypt(&rctx->fallback_req, NULL, NULL, 0);
return crypto_ahash_init(&rctx->fallback_req);
}
static int sa_sha_update(struct ahash_request *req)
{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct sa_sha_req_ctx *rctx = ahash_request_ctx(req);
- struct sa_tfm_ctx *ctx = crypto_ahash_ctx(tfm);
- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback.ahash);
- rctx->fallback_req.base.flags =
- req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.nbytes = req->nbytes;
- rctx->fallback_req.src = req->src;
+ ahash_request_set_callback(&rctx->fallback_req, req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+ ahash_request_set_crypt(&rctx->fallback_req, req->src, NULL, req->nbytes);
return crypto_ahash_update(&rctx->fallback_req);
}
static int sa_sha_final(struct ahash_request *req)
{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct sa_sha_req_ctx *rctx = ahash_request_ctx(req);
- struct sa_tfm_ctx *ctx = crypto_ahash_ctx(tfm);
- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback.ahash);
- rctx->fallback_req.base.flags =
- req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.result = req->result;
+ ahash_request_set_callback(&rctx->fallback_req, req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+ ahash_request_set_crypt(&rctx->fallback_req, NULL, req->result, 0);
return crypto_ahash_final(&rctx->fallback_req);
}
static int sa_sha_finup(struct ahash_request *req)
{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct sa_sha_req_ctx *rctx = ahash_request_ctx(req);
- struct sa_tfm_ctx *ctx = crypto_ahash_ctx(tfm);
- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback.ahash);
- rctx->fallback_req.base.flags =
- req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
-
- rctx->fallback_req.nbytes = req->nbytes;
- rctx->fallback_req.src = req->src;
- rctx->fallback_req.result = req->result;
+ ahash_request_set_callback(&rctx->fallback_req, req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+ ahash_request_set_crypt(&rctx->fallback_req, req->src, req->result, req->nbytes);
return crypto_ahash_finup(&rctx->fallback_req);
}
@@ -1605,8 +1578,7 @@ static int sa_sha_import(struct ahash_request *req, const void *in)
struct sa_tfm_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback.ahash);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req, req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
return crypto_ahash_import(&rctx->fallback_req, in);
}
@@ -1614,12 +1586,9 @@ static int sa_sha_import(struct ahash_request *req, const void *in)
static int sa_sha_export(struct ahash_request *req, void *out)
{
struct sa_sha_req_ctx *rctx = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sa_tfm_ctx *ctx = crypto_ahash_ctx(tfm);
struct ahash_request *subreq = &rctx->fallback_req;
- ahash_request_set_tfm(subreq, ctx->fallback.ahash);
- subreq->base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(subreq, req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
return crypto_ahash_export(subreq, out);
}
diff --git a/drivers/crypto/tegra/tegra-se-hash.c b/drivers/crypto/tegra/tegra-se-hash.c
index 42d007b7af45..d09b4aaeecef 100644
--- a/drivers/crypto/tegra/tegra-se-hash.c
+++ b/drivers/crypto/tegra/tegra-se-hash.c
@@ -117,8 +117,9 @@ static int tegra_sha_fallback_init(struct ahash_request *req)
struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
return crypto_ahash_init(&rctx->fallback_req);
}
@@ -130,10 +131,10 @@ static int tegra_sha_fallback_update(struct ahash_request *req)
struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.nbytes = req->nbytes;
- rctx->fallback_req.src = req->src;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, req->src, NULL, req->nbytes);
return crypto_ahash_update(&rctx->fallback_req);
}
@@ -145,9 +146,10 @@ static int tegra_sha_fallback_final(struct ahash_request *req)
struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
- rctx->fallback_req.result = req->result;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, NULL, req->result, 0);
return crypto_ahash_final(&rctx->fallback_req);
}
@@ -159,12 +161,11 @@ static int tegra_sha_fallback_finup(struct ahash_request *req)
struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
-
- rctx->fallback_req.nbytes = req->nbytes;
- rctx->fallback_req.src = req->src;
- rctx->fallback_req.result = req->result;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, req->src, req->result,
+ req->nbytes);
return crypto_ahash_finup(&rctx->fallback_req);
}
@@ -176,12 +177,11 @@ static int tegra_sha_fallback_digest(struct ahash_request *req)
struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
-
- rctx->fallback_req.nbytes = req->nbytes;
- rctx->fallback_req.src = req->src;
- rctx->fallback_req.result = req->result;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
+ ahash_request_set_crypt(&rctx->fallback_req, req->src, req->result,
+ req->nbytes);
return crypto_ahash_digest(&rctx->fallback_req);
}
@@ -193,8 +193,9 @@ static int tegra_sha_fallback_import(struct ahash_request *req, const void *in)
struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
return crypto_ahash_import(&rctx->fallback_req, in);
}
@@ -206,8 +207,9 @@ static int tegra_sha_fallback_export(struct ahash_request *req, void *out)
struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
- rctx->fallback_req.base.flags = req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(&rctx->fallback_req,
+ req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP,
+ req->base.complete, req->base.data);
return crypto_ahash_export(&rctx->fallback_req, out);
}
diff --git a/drivers/crypto/xilinx/zynqmp-sha.c b/drivers/crypto/xilinx/zynqmp-sha.c
index 580649f9bff8..5813017b6b79 100644
--- a/drivers/crypto/xilinx/zynqmp-sha.c
+++ b/drivers/crypto/xilinx/zynqmp-sha.c
@@ -3,18 +3,18 @@
* Xilinx ZynqMP SHA Driver.
* Copyright (c) 2022 Xilinx Inc.
*/
-#include <linux/cacheflush.h>
-#include <crypto/hash.h>
#include <crypto/internal/hash.h>
#include <crypto/sha3.h>
-#include <linux/crypto.h>
+#include <linux/cacheflush.h>
+#include <linux/cleanup.h>
#include <linux/device.h>
#include <linux/dma-mapping.h>
+#include <linux/err.h>
#include <linux/firmware/xlnx-zynqmp.h>
-#include <linux/init.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/spinlock.h>
#include <linux/platform_device.h>
#define ZYNQMP_DMA_BIT_MASK 32U
@@ -36,13 +36,11 @@ struct zynqmp_sha_tfm_ctx {
struct crypto_shash *fbk_tfm;
};
-struct zynqmp_sha_desc_ctx {
- struct shash_desc fbk_req;
-};
-
static dma_addr_t update_dma_addr, final_dma_addr;
static char *ubuf, *fbuf;
+static DEFINE_SPINLOCK(zynqmp_sha_lock);
+
static int zynqmp_sha_init_tfm(struct crypto_shash *hash)
{
const char *fallback_driver_name = crypto_shash_alg_name(hash);
@@ -60,8 +58,13 @@ static int zynqmp_sha_init_tfm(struct crypto_shash *hash)
if (IS_ERR(fallback_tfm))
return PTR_ERR(fallback_tfm);
+ if (crypto_shash_descsize(hash) <
+ crypto_shash_statesize(tfm_ctx->fbk_tfm)) {
+ crypto_free_shash(fallback_tfm);
+ return -EINVAL;
+ }
+
tfm_ctx->fbk_tfm = fallback_tfm;
- hash->descsize += crypto_shash_descsize(tfm_ctx->fbk_tfm);
return 0;
}
@@ -70,61 +73,55 @@ static void zynqmp_sha_exit_tfm(struct crypto_shash *hash)
{
struct zynqmp_sha_tfm_ctx *tfm_ctx = crypto_shash_ctx(hash);
- if (tfm_ctx->fbk_tfm) {
- crypto_free_shash(tfm_ctx->fbk_tfm);
- tfm_ctx->fbk_tfm = NULL;
- }
+ crypto_free_shash(tfm_ctx->fbk_tfm);
+}
- memzero_explicit(tfm_ctx, sizeof(struct zynqmp_sha_tfm_ctx));
+static int zynqmp_sha_continue(struct shash_desc *desc,
+ struct shash_desc *fbdesc, int err)
+{
+ err = err ?: crypto_shash_export(fbdesc, shash_desc_ctx(desc));
+ shash_desc_zero(fbdesc);
+ return err;
}
static int zynqmp_sha_init(struct shash_desc *desc)
{
- struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
struct zynqmp_sha_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct crypto_shash *fbtfm = tctx->fbk_tfm;
+ SHASH_DESC_ON_STACK(fbdesc, fbtfm);
+ int err;
- dctx->fbk_req.tfm = tctx->fbk_tfm;
- return crypto_shash_init(&dctx->fbk_req);
+ fbdesc->tfm = fbtfm;
+ err = crypto_shash_init(fbdesc);
+ return zynqmp_sha_continue(desc, fbdesc, err);
}
static int zynqmp_sha_update(struct shash_desc *desc, const u8 *data, unsigned int length)
{
- struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
-
- return crypto_shash_update(&dctx->fbk_req, data, length);
-}
-
-static int zynqmp_sha_final(struct shash_desc *desc, u8 *out)
-{
- struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct zynqmp_sha_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct crypto_shash *fbtfm = tctx->fbk_tfm;
+ SHASH_DESC_ON_STACK(fbdesc, fbtfm);
+ int err;
- return crypto_shash_final(&dctx->fbk_req, out);
+ fbdesc->tfm = fbtfm;
+ err = crypto_shash_import(fbdesc, shash_desc_ctx(desc)) ?:
+ crypto_shash_update(fbdesc, data, length);
+ return zynqmp_sha_continue(desc, fbdesc, err);
}
static int zynqmp_sha_finup(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
{
- struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
-
- return crypto_shash_finup(&dctx->fbk_req, data, length, out);
-}
-
-static int zynqmp_sha_import(struct shash_desc *desc, const void *in)
-{
- struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
struct zynqmp_sha_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct crypto_shash *fbtfm = tctx->fbk_tfm;
+ SHASH_DESC_ON_STACK(fbdesc, fbtfm);
- dctx->fbk_req.tfm = tctx->fbk_tfm;
- return crypto_shash_import(&dctx->fbk_req, in);
+ fbdesc->tfm = fbtfm;
+ return crypto_shash_import(fbdesc, shash_desc_ctx(desc)) ?:
+ crypto_shash_finup(fbdesc, data, length, out);
}
-static int zynqmp_sha_export(struct shash_desc *desc, void *out)
-{
- struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
-
- return crypto_shash_export(&dctx->fbk_req, out);
-}
-
-static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
+static int __zynqmp_sha_digest(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
unsigned int remaining_len = len;
int update_size;
@@ -159,26 +156,27 @@ static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned i
return ret;
}
+static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
+{
+ scoped_guard(spinlock_bh, &zynqmp_sha_lock)
+ return __zynqmp_sha_digest(desc, data, len, out);
+}
+
static struct zynqmp_sha_drv_ctx sha3_drv_ctx = {
.sha3_384 = {
.init = zynqmp_sha_init,
.update = zynqmp_sha_update,
- .final = zynqmp_sha_final,
.finup = zynqmp_sha_finup,
.digest = zynqmp_sha_digest,
- .export = zynqmp_sha_export,
- .import = zynqmp_sha_import,
.init_tfm = zynqmp_sha_init_tfm,
.exit_tfm = zynqmp_sha_exit_tfm,
- .descsize = sizeof(struct zynqmp_sha_desc_ctx),
- .statesize = sizeof(struct sha3_state),
+ .descsize = SHA3_384_EXPORT_SIZE,
.digestsize = SHA3_384_DIGEST_SIZE,
.base = {
.cra_name = "sha3-384",
.cra_driver_name = "zynqmp-sha3-384",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |
- CRYPTO_ALG_ALLOCATES_MEMORY |
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = SHA3_384_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct zynqmp_sha_tfm_ctx),
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 5f8d010516f0..b1ef4546346d 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -320,8 +320,9 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
count++;
dma_resv_list_set(fobj, i, fence, usage);
- /* pointer update must be visible before we extend the num_fences */
- smp_store_mb(fobj->num_fences, count);
+ /* fence update must be visible before we extend the num_fences */
+ smp_wmb();
+ fobj->num_fences = count;
}
EXPORT_SYMBOL(dma_resv_add_fence);
diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c
index 9f80a45498f0..261b38816226 100644
--- a/drivers/dma-buf/st-dma-fence.c
+++ b/drivers/dma-buf/st-dma-fence.c
@@ -413,7 +413,7 @@ static int test_wait_timeout(void *arg)
err = 0;
err_free:
timer_delete_sync(&wt.timer);
- destroy_timer_on_stack(&wt.timer);
+ timer_destroy_on_stack(&wt.timer);
dma_fence_signal(wt.f);
dma_fence_put(wt.f);
return err;
diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
index 715ac3ae067b..81339664036f 100644
--- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c
+++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
@@ -342,6 +342,9 @@ static void pt_cmd_callback_work(void *data, int err)
struct pt_dma_chan *chan;
unsigned long flags;
+ if (!desc)
+ return;
+
dma_chan = desc->vd.tx.chan;
chan = to_pt_chan(dma_chan);
@@ -355,16 +358,14 @@ static void pt_cmd_callback_work(void *data, int err)
desc->status = DMA_ERROR;
spin_lock_irqsave(&chan->vc.lock, flags);
- if (desc) {
- if (desc->status != DMA_COMPLETE) {
- if (desc->status != DMA_ERROR)
- desc->status = DMA_COMPLETE;
+ if (desc->status != DMA_COMPLETE) {
+ if (desc->status != DMA_ERROR)
+ desc->status = DMA_COMPLETE;
- dma_cookie_complete(tx_desc);
- dma_descriptor_unmap(tx_desc);
- } else {
- tx_desc = NULL;
- }
+ dma_cookie_complete(tx_desc);
+ dma_descriptor_unmap(tx_desc);
+ } else {
+ tx_desc = NULL;
}
spin_unlock_irqrestore(&chan->vc.lock, flags);
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index d891dfca358e..91b2fbc0b864 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -841,9 +841,9 @@ static int dmatest_func(void *data)
} else {
dma_async_issue_pending(chan);
- wait_event_timeout(thread->done_wait,
- done->done,
- msecs_to_jiffies(params->timeout));
+ wait_event_freezable_timeout(thread->done_wait,
+ done->done,
+ msecs_to_jiffies(params->timeout));
status = dma_async_is_tx_complete(chan, cookie, NULL,
NULL);
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 756d67325db5..66bfa28d984e 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -57,7 +57,7 @@ static irqreturn_t fsl_edma3_tx_handler(int irq, void *dev_id)
intr = edma_readl_chreg(fsl_chan, ch_int);
if (!intr)
- return IRQ_HANDLED;
+ return IRQ_NONE;
edma_writel_chreg(fsl_chan, 1, ch_int);
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index ff94ee892339..6d12033649f8 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -222,7 +222,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
struct idxd_wq *wq;
struct device *dev, *fdev;
int rc = 0;
- struct iommu_sva *sva;
+ struct iommu_sva *sva = NULL;
unsigned int pasid;
struct idxd_cdev *idxd_cdev;
@@ -317,7 +317,7 @@ failed_set_pasid:
if (device_user_pasid_enabled(idxd))
idxd_xa_pasid_remove(ctx);
failed_get_pasid:
- if (device_user_pasid_enabled(idxd))
+ if (device_user_pasid_enabled(idxd) && !IS_ERR_OR_NULL(sva))
iommu_sva_unbind_device(sva);
failed:
mutex_unlock(&wq->wq_lock);
@@ -407,6 +407,9 @@ static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
if (!idxd->user_submission_safe && !capable(CAP_SYS_RAWIO))
return -EPERM;
+ if (current->mm != ctx->mm)
+ return -EPERM;
+
rc = check_vma(wq, vma, __func__);
if (rc < 0)
return rc;
@@ -473,6 +476,9 @@ static ssize_t idxd_cdev_write(struct file *filp, const char __user *buf, size_t
ssize_t written = 0;
int i;
+ if (current->mm != ctx->mm)
+ return -EPERM;
+
for (i = 0; i < len/sizeof(struct dsa_hw_desc); i++) {
int rc = idxd_submit_user_descriptor(ctx, udesc + i);
@@ -493,6 +499,9 @@ static __poll_t idxd_cdev_poll(struct file *filp,
struct idxd_device *idxd = wq->idxd;
__poll_t out = 0;
+ if (current->mm != ctx->mm)
+ return POLLNVAL;
+
poll_wait(filp, &wq->err_queue, wait);
spin_lock(&idxd->dev_lock);
if (idxd->sw_err.valid)
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index fca1d2924999..760b7d81fcd8 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -155,6 +155,25 @@ static void idxd_cleanup_interrupts(struct idxd_device *idxd)
pci_free_irq_vectors(pdev);
}
+static void idxd_clean_wqs(struct idxd_device *idxd)
+{
+ struct idxd_wq *wq;
+ struct device *conf_dev;
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ wq = idxd->wqs[i];
+ if (idxd->hw.wq_cap.op_config)
+ bitmap_free(wq->opcap_bmap);
+ kfree(wq->wqcfg);
+ conf_dev = wq_confdev(wq);
+ put_device(conf_dev);
+ kfree(wq);
+ }
+ bitmap_free(idxd->wq_enable_map);
+ kfree(idxd->wqs);
+}
+
static int idxd_setup_wqs(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
@@ -169,8 +188,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev));
if (!idxd->wq_enable_map) {
- kfree(idxd->wqs);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto err_bitmap;
}
for (i = 0; i < idxd->max_wqs; i++) {
@@ -189,10 +208,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
conf_dev->bus = &dsa_bus_type;
conf_dev->type = &idxd_wq_device_type;
rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id);
- if (rc < 0) {
- put_device(conf_dev);
+ if (rc < 0)
goto err;
- }
mutex_init(&wq->wq_lock);
init_waitqueue_head(&wq->err_queue);
@@ -203,7 +220,6 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
if (!wq->wqcfg) {
- put_device(conf_dev);
rc = -ENOMEM;
goto err;
}
@@ -211,9 +227,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
if (idxd->hw.wq_cap.op_config) {
wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL);
if (!wq->opcap_bmap) {
- put_device(conf_dev);
rc = -ENOMEM;
- goto err;
+ goto err_opcap_bmap;
}
bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS);
}
@@ -224,15 +239,46 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
return 0;
- err:
+err_opcap_bmap:
+ kfree(wq->wqcfg);
+
+err:
+ put_device(conf_dev);
+ kfree(wq);
+
while (--i >= 0) {
wq = idxd->wqs[i];
+ if (idxd->hw.wq_cap.op_config)
+ bitmap_free(wq->opcap_bmap);
+ kfree(wq->wqcfg);
conf_dev = wq_confdev(wq);
put_device(conf_dev);
+ kfree(wq);
+
}
+ bitmap_free(idxd->wq_enable_map);
+
+err_bitmap:
+ kfree(idxd->wqs);
+
return rc;
}
+static void idxd_clean_engines(struct idxd_device *idxd)
+{
+ struct idxd_engine *engine;
+ struct device *conf_dev;
+ int i;
+
+ for (i = 0; i < idxd->max_engines; i++) {
+ engine = idxd->engines[i];
+ conf_dev = engine_confdev(engine);
+ put_device(conf_dev);
+ kfree(engine);
+ }
+ kfree(idxd->engines);
+}
+
static int idxd_setup_engines(struct idxd_device *idxd)
{
struct idxd_engine *engine;
@@ -263,6 +309,7 @@ static int idxd_setup_engines(struct idxd_device *idxd)
rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id);
if (rc < 0) {
put_device(conf_dev);
+ kfree(engine);
goto err;
}
@@ -276,10 +323,26 @@ static int idxd_setup_engines(struct idxd_device *idxd)
engine = idxd->engines[i];
conf_dev = engine_confdev(engine);
put_device(conf_dev);
+ kfree(engine);
}
+ kfree(idxd->engines);
+
return rc;
}
+static void idxd_clean_groups(struct idxd_device *idxd)
+{
+ struct idxd_group *group;
+ int i;
+
+ for (i = 0; i < idxd->max_groups; i++) {
+ group = idxd->groups[i];
+ put_device(group_confdev(group));
+ kfree(group);
+ }
+ kfree(idxd->groups);
+}
+
static int idxd_setup_groups(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
@@ -310,6 +373,7 @@ static int idxd_setup_groups(struct idxd_device *idxd)
rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id);
if (rc < 0) {
put_device(conf_dev);
+ kfree(group);
goto err;
}
@@ -334,20 +398,18 @@ static int idxd_setup_groups(struct idxd_device *idxd)
while (--i >= 0) {
group = idxd->groups[i];
put_device(group_confdev(group));
+ kfree(group);
}
+ kfree(idxd->groups);
+
return rc;
}
static void idxd_cleanup_internals(struct idxd_device *idxd)
{
- int i;
-
- for (i = 0; i < idxd->max_groups; i++)
- put_device(group_confdev(idxd->groups[i]));
- for (i = 0; i < idxd->max_engines; i++)
- put_device(engine_confdev(idxd->engines[i]));
- for (i = 0; i < idxd->max_wqs; i++)
- put_device(wq_confdev(idxd->wqs[i]));
+ idxd_clean_groups(idxd);
+ idxd_clean_engines(idxd);
+ idxd_clean_wqs(idxd);
destroy_workqueue(idxd->wq);
}
@@ -390,7 +452,7 @@ static int idxd_init_evl(struct idxd_device *idxd)
static int idxd_setup_internals(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
- int rc, i;
+ int rc;
init_waitqueue_head(&idxd->cmd_waitq);
@@ -421,14 +483,11 @@ static int idxd_setup_internals(struct idxd_device *idxd)
err_evl:
destroy_workqueue(idxd->wq);
err_wkq_create:
- for (i = 0; i < idxd->max_groups; i++)
- put_device(group_confdev(idxd->groups[i]));
+ idxd_clean_groups(idxd);
err_group:
- for (i = 0; i < idxd->max_engines; i++)
- put_device(engine_confdev(idxd->engines[i]));
+ idxd_clean_engines(idxd);
err_engine:
- for (i = 0; i < idxd->max_wqs; i++)
- put_device(wq_confdev(idxd->wqs[i]));
+ idxd_clean_wqs(idxd);
err_wqs:
return rc;
}
@@ -528,6 +587,17 @@ static void idxd_read_caps(struct idxd_device *idxd)
idxd->hw.iaa_cap.bits = ioread64(idxd->reg_base + IDXD_IAACAP_OFFSET);
}
+static void idxd_free(struct idxd_device *idxd)
+{
+ if (!idxd)
+ return;
+
+ put_device(idxd_confdev(idxd));
+ bitmap_free(idxd->opcap_bmap);
+ ida_free(&idxd_ida, idxd->id);
+ kfree(idxd);
+}
+
static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
{
struct device *dev = &pdev->dev;
@@ -545,28 +615,34 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d
idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type);
idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL);
if (idxd->id < 0)
- return NULL;
+ goto err_ida;
idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, dev_to_node(dev));
- if (!idxd->opcap_bmap) {
- ida_free(&idxd_ida, idxd->id);
- return NULL;
- }
+ if (!idxd->opcap_bmap)
+ goto err_opcap;
device_initialize(conf_dev);
conf_dev->parent = dev;
conf_dev->bus = &dsa_bus_type;
conf_dev->type = idxd->data->dev_type;
rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
- if (rc < 0) {
- put_device(conf_dev);
- return NULL;
- }
+ if (rc < 0)
+ goto err_name;
spin_lock_init(&idxd->dev_lock);
spin_lock_init(&idxd->cmd_lock);
return idxd;
+
+err_name:
+ put_device(conf_dev);
+ bitmap_free(idxd->opcap_bmap);
+err_opcap:
+ ida_free(&idxd_ida, idxd->id);
+err_ida:
+ kfree(idxd);
+
+ return NULL;
}
static int idxd_enable_system_pasid(struct idxd_device *idxd)
@@ -1190,7 +1266,7 @@ int idxd_pci_probe_alloc(struct idxd_device *idxd, struct pci_dev *pdev,
err:
pci_iounmap(pdev, idxd->reg_base);
err_iomap:
- put_device(idxd_confdev(idxd));
+ idxd_free(idxd);
err_idxd_alloc:
pci_disable_device(pdev);
return rc;
@@ -1232,7 +1308,6 @@ static void idxd_shutdown(struct pci_dev *pdev)
static void idxd_remove(struct pci_dev *pdev)
{
struct idxd_device *idxd = pci_get_drvdata(pdev);
- struct idxd_irq_entry *irq_entry;
idxd_unregister_devices(idxd);
/*
@@ -1245,20 +1320,12 @@ static void idxd_remove(struct pci_dev *pdev)
get_device(idxd_confdev(idxd));
device_unregister(idxd_confdev(idxd));
idxd_shutdown(pdev);
- if (device_pasid_enabled(idxd))
- idxd_disable_system_pasid(idxd);
idxd_device_remove_debugfs(idxd);
-
- irq_entry = idxd_get_ie(idxd, 0);
- free_irq(irq_entry->vector, irq_entry);
- pci_free_irq_vectors(pdev);
+ idxd_cleanup(idxd);
pci_iounmap(pdev, idxd->reg_base);
- if (device_user_pasid_enabled(idxd))
- idxd_disable_sva(pdev);
- pci_disable_device(pdev);
- destroy_workqueue(idxd->wq);
- perfmon_pmu_remove(idxd);
put_device(idxd_confdev(idxd));
+ idxd_free(idxd);
+ pci_disable_device(pdev);
}
static struct pci_driver idxd_pci_driver = {
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c
index c9aba2304de7..5d3c0ae6b342 100644
--- a/drivers/dma/ioat/dca.c
+++ b/drivers/dma/ioat/dca.c
@@ -10,7 +10,7 @@
#include <linux/interrupt.h>
#include <linux/dca.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
/* either a kernel change is needed, or we need something like this in kernel */
#ifndef CONFIG_SMP
diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c
index d5ddb4e30e71..47c8adfdc155 100644
--- a/drivers/dma/mediatek/mtk-cqdma.c
+++ b/drivers/dma/mediatek/mtk-cqdma.c
@@ -420,15 +420,11 @@ static struct virt_dma_desc *mtk_cqdma_find_active_desc(struct dma_chan *c,
{
struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c);
struct virt_dma_desc *vd;
- unsigned long flags;
- spin_lock_irqsave(&cvc->pc->lock, flags);
list_for_each_entry(vd, &cvc->pc->queue, node)
if (vd->tx.cookie == cookie) {
- spin_unlock_irqrestore(&cvc->pc->lock, flags);
return vd;
}
- spin_unlock_irqrestore(&cvc->pc->lock, flags);
list_for_each_entry(vd, &cvc->vc.desc_issued, node)
if (vd->tx.cookie == cookie)
@@ -452,9 +448,11 @@ static enum dma_status mtk_cqdma_tx_status(struct dma_chan *c,
if (ret == DMA_COMPLETE || !txstate)
return ret;
+ spin_lock_irqsave(&cvc->pc->lock, flags);
spin_lock_irqsave(&cvc->vc.lock, flags);
vd = mtk_cqdma_find_active_desc(c, cookie);
spin_unlock_irqrestore(&cvc->vc.lock, flags);
+ spin_unlock_irqrestore(&cvc->pc->lock, flags);
if (vd) {
cvd = to_cqdma_vdesc(vd);
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index b223a7aacb0c..b6255c0601bb 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -1091,8 +1091,11 @@ static void udma_check_tx_completion(struct work_struct *work)
u32 residue_diff;
ktime_t time_diff;
unsigned long delay;
+ unsigned long flags;
while (1) {
+ spin_lock_irqsave(&uc->vc.lock, flags);
+
if (uc->desc) {
/* Get previous residue and time stamp */
residue_diff = uc->tx_drain.residue;
@@ -1127,6 +1130,8 @@ static void udma_check_tx_completion(struct work_struct *work)
break;
}
+ spin_unlock_irqrestore(&uc->vc.lock, flags);
+
usleep_range(ktime_to_us(delay),
ktime_to_us(delay) + 10);
continue;
@@ -1143,6 +1148,8 @@ static void udma_check_tx_completion(struct work_struct *work)
break;
}
+
+ spin_unlock_irqrestore(&uc->vc.lock, flags);
}
static irqreturn_t udma_ring_irq_handler(int irq, void *data)
@@ -4246,7 +4253,6 @@ static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec,
struct of_dma *ofdma)
{
struct udma_dev *ud = ofdma->of_dma_data;
- dma_cap_mask_t mask = ud->ddev.cap_mask;
struct udma_filter_param filter_param;
struct dma_chan *chan;
@@ -4278,7 +4284,7 @@ static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec,
}
}
- chan = __dma_request_channel(&mask, udma_dma_filter_fn, &filter_param,
+ chan = __dma_request_channel(&ud->ddev.cap_mask, udma_dma_filter_fn, &filter_param,
ofdma->of_node);
if (!chan) {
dev_err(ud->dev, "get channel fail in %s.\n", __func__);
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index dcd7008fe06b..20333608b983 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -2131,8 +2131,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
edac->irq_chip.name = pdev->dev.of_node->name;
edac->irq_chip.irq_mask = a10_eccmgr_irq_mask;
edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask;
- edac->domain = irq_domain_add_linear(pdev->dev.of_node, 64,
- &a10_eccmgr_ic_ops, edac);
+ edac->domain = irq_domain_create_linear(of_fwnode_handle(pdev->dev.of_node),
+ 64, &a10_eccmgr_ic_ops, edac);
if (!edac->domain) {
dev_err(&pdev->dev, "Error adding IRQ domain\n");
return -ENOMEM;
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 90f0eb7cc5b9..58b1482a0fbb 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2,8 +2,8 @@
#include <linux/ras.h>
#include <linux/string_choices.h>
#include "amd64_edac.h"
-#include <asm/amd_nb.h>
-#include <asm/amd_node.h>
+#include <asm/amd/nb.h>
+#include <asm/amd/node.h>
static struct edac_pci_ctl_info *pci_ctl;
@@ -2942,13 +2942,13 @@ static void dct_read_mc_regs(struct amd64_pvt *pvt)
* Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
* those are Read-As-Zero.
*/
- rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
+ rdmsrq(MSR_K8_TOP_MEM1, pvt->top_mem);
edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem);
/* Check first whether TOP_MEM2 is enabled: */
- rdmsrl(MSR_AMD64_SYSCFG, msr_val);
+ rdmsrq(MSR_AMD64_SYSCFG, msr_val);
if (msr_val & BIT(21)) {
- rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
+ rdmsrq(MSR_K8_TOP_MEM2, pvt->top_mem2);
edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
} else {
edac_dbg(0, " TOP_MEM2 disabled\n");
diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c
index 4942a240c30f..ae3bb7afa103 100644
--- a/drivers/edac/bluefield_edac.c
+++ b/drivers/edac/bluefield_edac.c
@@ -199,8 +199,10 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
* error without the detailed information.
*/
err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom);
- if (err)
+ if (err) {
dev_err(priv->dev, "DRAM syndrom read failed.\n");
+ return;
+ }
serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom);
derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom);
@@ -213,20 +215,26 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
}
err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info);
- if (err)
+ if (err) {
dev_err(priv->dev, "DRAM additional info read failed.\n");
+ return;
+ }
err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info);
ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0;
err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0);
- if (err)
+ if (err) {
dev_err(priv->dev, "Error addr 0 read failed.\n");
+ return;
+ }
err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1);
- if (err)
+ if (err) {
dev_err(priv->dev, "Error addr 1 read failed.\n");
+ return;
+ }
ecc_dimm_addr = ((u64)edea1 << 32) | edea0;
@@ -250,8 +258,10 @@ static void bluefield_edac_check(struct mem_ctl_info *mci)
return;
err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count);
- if (err)
+ if (err) {
dev_err(priv->dev, "ECC count read failed.\n");
+ return;
+ }
single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count);
double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count);
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 355a977019e9..a3fca2567752 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -72,12 +72,6 @@
#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5)
-#define RETRY_RD_ERR_LOG_UC BIT(1)
-#define RETRY_RD_ERR_LOG_NOOVER BIT(14)
-#define RETRY_RD_ERR_LOG_EN BIT(15)
-#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1))
-#define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0))
-
static struct list_head *i10nm_edac_list;
static struct res_config *res_cfg;
@@ -85,227 +79,319 @@ static int retry_rd_err_log;
static int decoding_via_mca;
static bool mem_cfg_2lm;
-static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
-static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
-static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8};
-static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8};
-static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
-static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
-static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10};
-static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
-static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
-
-static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable,
- u32 *offsets_scrub, u32 *offsets_demand,
- u32 *offsets_demand2)
+static struct reg_rrl icx_reg_rrl_ddr = {
+ .set_num = 2,
+ .reg_num = 6,
+ .modes = {LRE_SCRUB, LRE_DEMAND},
+ .offsets = {
+ {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8},
+ {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0},
+ },
+ .widths = {4, 4, 4, 4, 4, 8},
+ .v_mask = BIT(0),
+ .uc_mask = BIT(1),
+ .over_mask = BIT(2),
+ .en_patspr_mask = BIT(13),
+ .noover_mask = BIT(14),
+ .en_mask = BIT(15),
+
+ .cecnt_num = 4,
+ .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24},
+ .cecnt_widths = {4, 4, 4, 4},
+};
+
+static struct reg_rrl spr_reg_rrl_ddr = {
+ .set_num = 3,
+ .reg_num = 6,
+ .modes = {LRE_SCRUB, LRE_DEMAND, FRE_DEMAND},
+ .offsets = {
+ {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8},
+ {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0},
+ {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10},
+ },
+ .widths = {4, 4, 8, 4, 4, 8},
+ .v_mask = BIT(0),
+ .uc_mask = BIT(1),
+ .over_mask = BIT(2),
+ .en_patspr_mask = BIT(13),
+ .noover_mask = BIT(14),
+ .en_mask = BIT(15),
+
+ .cecnt_num = 4,
+ .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24},
+ .cecnt_widths = {4, 4, 4, 4},
+};
+
+static struct reg_rrl spr_reg_rrl_hbm_pch0 = {
+ .set_num = 2,
+ .reg_num = 6,
+ .modes = {LRE_SCRUB, LRE_DEMAND},
+ .offsets = {
+ {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8},
+ {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0},
+ },
+ .widths = {4, 4, 8, 4, 4, 8},
+ .v_mask = BIT(0),
+ .uc_mask = BIT(1),
+ .over_mask = BIT(2),
+ .en_patspr_mask = BIT(13),
+ .noover_mask = BIT(14),
+ .en_mask = BIT(15),
+
+ .cecnt_num = 4,
+ .cecnt_offsets = {0x2818, 0x281c, 0x2820, 0x2824},
+ .cecnt_widths = {4, 4, 4, 4},
+};
+
+static struct reg_rrl spr_reg_rrl_hbm_pch1 = {
+ .set_num = 2,
+ .reg_num = 6,
+ .modes = {LRE_SCRUB, LRE_DEMAND},
+ .offsets = {
+ {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8},
+ {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0},
+ },
+ .widths = {4, 4, 8, 4, 4, 8},
+ .v_mask = BIT(0),
+ .uc_mask = BIT(1),
+ .over_mask = BIT(2),
+ .en_patspr_mask = BIT(13),
+ .noover_mask = BIT(14),
+ .en_mask = BIT(15),
+
+ .cecnt_num = 4,
+ .cecnt_offsets = {0x2c18, 0x2c1c, 0x2c20, 0x2c24},
+ .cecnt_widths = {4, 4, 4, 4},
+};
+
+static struct reg_rrl gnr_reg_rrl_ddr = {
+ .set_num = 4,
+ .reg_num = 6,
+ .modes = {FRE_SCRUB, FRE_DEMAND, LRE_SCRUB, LRE_DEMAND},
+ .offsets = {
+ {0x2f10, 0x2f20, 0x2f30, 0x2f50, 0x2f60, 0xba0},
+ {0x2f14, 0x2f24, 0x2f38, 0x2f54, 0x2f64, 0xba8},
+ {0x2f18, 0x2f28, 0x2f40, 0x2f58, 0x2f68, 0xbb0},
+ {0x2f1c, 0x2f2c, 0x2f48, 0x2f5c, 0x2f6c, 0xbb8},
+ },
+ .widths = {4, 4, 8, 4, 4, 8},
+ .v_mask = BIT(0),
+ .uc_mask = BIT(1),
+ .over_mask = BIT(2),
+ .en_patspr_mask = BIT(14),
+ .noover_mask = BIT(15),
+ .en_mask = BIT(12),
+
+ .cecnt_num = 8,
+ .cecnt_offsets = {0x2c10, 0x2c14, 0x2c18, 0x2c1c, 0x2c20, 0x2c24, 0x2c28, 0x2c2c},
+ .cecnt_widths = {4, 4, 4, 4, 4, 4, 4, 4},
+};
+
+static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width)
{
- u32 s, d, d2;
+ switch (width) {
+ case 4:
+ return I10NM_GET_REG32(imc, chan, offset);
+ case 8:
+ return I10NM_GET_REG64(imc, chan, offset);
+ default:
+ i10nm_printk(KERN_ERR, "Invalid readd RRL 0x%x width %d\n", offset, width);
+ return 0;
+ }
+}
+
+static void write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val)
+{
+ switch (width) {
+ case 4:
+ return I10NM_SET_REG32(imc, chan, offset, (u32)val);
+ default:
+ i10nm_printk(KERN_ERR, "Invalid write RRL 0x%x width %d\n", offset, width);
+ }
+}
- s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]);
- d = I10NM_GET_REG32(imc, chan, offsets_demand[0]);
- if (offsets_demand2)
- d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]);
+static void enable_rrl(struct skx_imc *imc, int chan, struct reg_rrl *rrl,
+ int rrl_set, bool enable, u32 *rrl_ctl)
+{
+ enum rrl_mode mode = rrl->modes[rrl_set];
+ u32 offset = rrl->offsets[rrl_set][0], v;
+ u8 width = rrl->widths[0];
+ bool first, scrub;
+
+ /* First or last read error. */
+ first = (mode == FRE_SCRUB || mode == FRE_DEMAND);
+ /* Patrol scrub or on-demand read error. */
+ scrub = (mode == FRE_SCRUB || mode == LRE_SCRUB);
+
+ v = read_imc_reg(imc, chan, offset, width);
if (enable) {
- /* Save default configurations */
- imc->chan[chan].retry_rd_err_log_s = s;
- imc->chan[chan].retry_rd_err_log_d = d;
- if (offsets_demand2)
- imc->chan[chan].retry_rd_err_log_d2 = d2;
-
- s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
- s |= RETRY_RD_ERR_LOG_EN;
- d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
- d |= RETRY_RD_ERR_LOG_EN;
-
- if (offsets_demand2) {
- d2 &= ~RETRY_RD_ERR_LOG_UC;
- d2 |= RETRY_RD_ERR_LOG_NOOVER;
- d2 |= RETRY_RD_ERR_LOG_EN;
- }
+ /* Save default configurations. */
+ *rrl_ctl = v;
+ v &= ~rrl->uc_mask;
+
+ if (first)
+ v |= rrl->noover_mask;
+ else
+ v &= ~rrl->noover_mask;
+
+ if (scrub)
+ v |= rrl->en_patspr_mask;
+ else
+ v &= ~rrl->en_patspr_mask;
+
+ v |= rrl->en_mask;
} else {
- /* Restore default configurations */
- if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
- s |= RETRY_RD_ERR_LOG_UC;
- if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
- s |= RETRY_RD_ERR_LOG_NOOVER;
- if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
- s &= ~RETRY_RD_ERR_LOG_EN;
- if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
- d |= RETRY_RD_ERR_LOG_UC;
- if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
- d |= RETRY_RD_ERR_LOG_NOOVER;
- if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
- d &= ~RETRY_RD_ERR_LOG_EN;
-
- if (offsets_demand2) {
- if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC)
- d2 |= RETRY_RD_ERR_LOG_UC;
- if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER))
- d2 &= ~RETRY_RD_ERR_LOG_NOOVER;
- if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN))
- d2 &= ~RETRY_RD_ERR_LOG_EN;
+ /* Restore default configurations. */
+ if (*rrl_ctl & rrl->uc_mask)
+ v |= rrl->uc_mask;
+
+ if (first) {
+ if (!(*rrl_ctl & rrl->noover_mask))
+ v &= ~rrl->noover_mask;
+ } else {
+ if (*rrl_ctl & rrl->noover_mask)
+ v |= rrl->noover_mask;
}
+
+ if (scrub) {
+ if (!(*rrl_ctl & rrl->en_patspr_mask))
+ v &= ~rrl->en_patspr_mask;
+ } else {
+ if (*rrl_ctl & rrl->en_patspr_mask)
+ v |= rrl->en_patspr_mask;
+ }
+
+ if (!(*rrl_ctl & rrl->en_mask))
+ v &= ~rrl->en_mask;
}
- I10NM_SET_REG32(imc, chan, offsets_scrub[0], s);
- I10NM_SET_REG32(imc, chan, offsets_demand[0], d);
- if (offsets_demand2)
- I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2);
+ write_imc_reg(imc, chan, offset, width, v);
+}
+
+static void enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl,
+ bool enable, u32 *rrl_ctl)
+{
+ for (int i = 0; i < rrl->set_num; i++)
+ enable_rrl(imc, chan, rrl, i, enable, rrl_ctl + i);
+}
+
+static void enable_rrls_ddr(struct skx_imc *imc, bool enable)
+{
+ struct reg_rrl *rrl_ddr = res_cfg->reg_rrl_ddr;
+ int i, chan_num = res_cfg->ddr_chan_num;
+ struct skx_channel *chan = imc->chan;
+
+ if (!imc->mbase)
+ return;
+
+ for (i = 0; i < chan_num; i++)
+ enable_rrls(imc, i, rrl_ddr, enable, chan[i].rrl_ctl[0]);
+}
+
+static void enable_rrls_hbm(struct skx_imc *imc, bool enable)
+{
+ struct reg_rrl **rrl_hbm = res_cfg->reg_rrl_hbm;
+ int i, chan_num = res_cfg->hbm_chan_num;
+ struct skx_channel *chan = imc->chan;
+
+ if (!imc->mbase || !imc->hbm_mc || !rrl_hbm[0] || !rrl_hbm[1])
+ return;
+
+ for (i = 0; i < chan_num; i++) {
+ enable_rrls(imc, i, rrl_hbm[0], enable, chan[i].rrl_ctl[0]);
+ enable_rrls(imc, i, rrl_hbm[1], enable, chan[i].rrl_ctl[1]);
+ }
}
static void enable_retry_rd_err_log(bool enable)
{
- int i, j, imc_num, chan_num;
- struct skx_imc *imc;
struct skx_dev *d;
+ int i, imc_num;
edac_dbg(2, "\n");
list_for_each_entry(d, i10nm_edac_list, list) {
imc_num = res_cfg->ddr_imc_num;
- chan_num = res_cfg->ddr_chan_num;
-
- for (i = 0; i < imc_num; i++) {
- imc = &d->imc[i];
- if (!imc->mbase)
- continue;
-
- for (j = 0; j < chan_num; j++)
- __enable_retry_rd_err_log(imc, j, enable,
- res_cfg->offsets_scrub,
- res_cfg->offsets_demand,
- res_cfg->offsets_demand2);
- }
+ for (i = 0; i < imc_num; i++)
+ enable_rrls_ddr(&d->imc[i], enable);
imc_num += res_cfg->hbm_imc_num;
- chan_num = res_cfg->hbm_chan_num;
-
- for (; i < imc_num; i++) {
- imc = &d->imc[i];
- if (!imc->mbase || !imc->hbm_mc)
- continue;
-
- for (j = 0; j < chan_num; j++) {
- __enable_retry_rd_err_log(imc, j, enable,
- res_cfg->offsets_scrub_hbm0,
- res_cfg->offsets_demand_hbm0,
- NULL);
- __enable_retry_rd_err_log(imc, j, enable,
- res_cfg->offsets_scrub_hbm1,
- res_cfg->offsets_demand_hbm1,
- NULL);
- }
- }
+ for (; i < imc_num; i++)
+ enable_rrls_hbm(&d->imc[i], enable);
}
}
static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
int len, bool scrub_err)
{
+ int i, j, n, ch = res->channel, pch = res->cs & 1;
struct skx_imc *imc = &res->dev->imc[res->imc];
- u32 log0, log1, log2, log3, log4;
- u32 corr0, corr1, corr2, corr3;
- u32 lxg0, lxg1, lxg3, lxg4;
- u32 *xffsets = NULL;
- u64 log2a, log5;
- u64 lxg2a, lxg5;
- u32 *offsets;
- int n, pch;
+ u64 log, corr, status_mask;
+ struct reg_rrl *rrl;
+ bool scrub;
+ u32 offset;
+ u8 width;
if (!imc->mbase)
return;
- if (imc->hbm_mc) {
- pch = res->cs & 1;
+ rrl = imc->hbm_mc ? res_cfg->reg_rrl_hbm[pch] : res_cfg->reg_rrl_ddr;
- if (pch)
- offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 :
- res_cfg->offsets_demand_hbm1;
- else
- offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 :
- res_cfg->offsets_demand_hbm0;
- } else {
- if (scrub_err) {
- offsets = res_cfg->offsets_scrub;
- } else {
- offsets = res_cfg->offsets_demand;
- xffsets = res_cfg->offsets_demand2;
- }
- }
+ if (!rrl)
+ return;
- log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
- log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
- log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]);
- log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
- log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
-
- if (xffsets) {
- lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]);
- lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]);
- lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]);
- lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]);
- lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]);
- }
+ status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask;
- if (res_cfg->type == SPR) {
- log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]);
- n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx",
- log0, log1, log2a, log3, log4, log5);
+ n = snprintf(msg, len, " retry_rd_err_log[");
+ for (i = 0; i < rrl->set_num; i++) {
+ scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB);
+ if (scrub_err != scrub)
+ continue;
- if (len - n > 0) {
- if (xffsets) {
- lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]);
- n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]",
- lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5);
- } else {
- n += snprintf(msg + n, len - n, "]");
- }
- }
- } else {
- log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
- n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
- log0, log1, log2, log3, log4, log5);
- }
+ for (j = 0; j < rrl->reg_num && len - n > 0; j++) {
+ offset = rrl->offsets[i][j];
+ width = rrl->widths[j];
+ log = read_imc_reg(imc, ch, offset, width);
- if (imc->hbm_mc) {
- if (pch) {
- corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18);
- corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c);
- corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20);
- corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24);
- } else {
- corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818);
- corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c);
- corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820);
- corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824);
+ if (width == 4)
+ n += snprintf(msg + n, len - n, "%.8llx ", log);
+ else
+ n += snprintf(msg + n, len - n, "%.16llx ", log);
+
+ /* Clear RRL status if RRL in Linux control mode. */
+ if (retry_rd_err_log == 2 && !j && (log & status_mask))
+ write_imc_reg(imc, ch, offset, width, log & ~status_mask);
}
- } else {
- corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
- corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
- corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
- corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
}
- if (len - n > 0)
- snprintf(msg + n, len - n,
- " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
- corr0 & 0xffff, corr0 >> 16,
- corr1 & 0xffff, corr1 >> 16,
- corr2 & 0xffff, corr2 >> 16,
- corr3 & 0xffff, corr3 >> 16);
-
- /* Clear status bits */
- if (retry_rd_err_log == 2) {
- if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) {
- log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
- I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
+ /* Move back one space. */
+ n--;
+ n += snprintf(msg + n, len - n, "]");
+
+ if (len - n > 0) {
+ n += snprintf(msg + n, len - n, " correrrcnt[");
+ for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) {
+ offset = rrl->cecnt_offsets[i];
+ width = rrl->cecnt_widths[i];
+ corr = read_imc_reg(imc, ch, offset, width);
+
+ /* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */
+ if (res_cfg->type <= SPR) {
+ n += snprintf(msg + n, len - n, "%.4llx %.4llx ",
+ corr & 0xffff, corr >> 16);
+ } else {
+ /* CPUs {GNR} encode one counter per CORRERRCNT register. */
+ if (width == 4)
+ n += snprintf(msg + n, len - n, "%.8llx ", corr);
+ else
+ n += snprintf(msg + n, len - n, "%.16llx ", corr);
+ }
}
- if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
- lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
- I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0);
- }
+ /* Move back one space. */
+ n--;
+ n += snprintf(msg + n, len - n, "]");
}
}
@@ -870,8 +956,7 @@ static struct res_config i10nm_cfg0 = {
.ddr_mdev_bdf = {0, 12, 0},
.hbm_mdev_bdf = {0, 12, 1},
.sad_all_offset = 0x108,
- .offsets_scrub = offsets_scrub_icx,
- .offsets_demand = offsets_demand_icx,
+ .reg_rrl_ddr = &icx_reg_rrl_ddr,
};
static struct res_config i10nm_cfg1 = {
@@ -889,8 +974,7 @@ static struct res_config i10nm_cfg1 = {
.ddr_mdev_bdf = {0, 12, 0},
.hbm_mdev_bdf = {0, 12, 1},
.sad_all_offset = 0x108,
- .offsets_scrub = offsets_scrub_icx,
- .offsets_demand = offsets_demand_icx,
+ .reg_rrl_ddr = &icx_reg_rrl_ddr,
};
static struct res_config spr_cfg = {
@@ -913,13 +997,9 @@ static struct res_config spr_cfg = {
.ddr_mdev_bdf = {0, 12, 0},
.hbm_mdev_bdf = {0, 12, 1},
.sad_all_offset = 0x300,
- .offsets_scrub = offsets_scrub_spr,
- .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0,
- .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1,
- .offsets_demand = offsets_demand_spr,
- .offsets_demand2 = offsets_demand2_spr,
- .offsets_demand_hbm0 = offsets_demand_spr_hbm0,
- .offsets_demand_hbm1 = offsets_demand_spr_hbm1,
+ .reg_rrl_ddr = &spr_reg_rrl_ddr,
+ .reg_rrl_hbm[0] = &spr_reg_rrl_hbm_pch0,
+ .reg_rrl_hbm[1] = &spr_reg_rrl_hbm_pch1,
};
static struct res_config gnr_cfg = {
@@ -937,6 +1017,7 @@ static struct res_config gnr_cfg = {
.uracu_bdf = {0, 0, 1},
.ddr_mdev_bdf = {0, 5, 1},
.sad_all_offset = 0x300,
+ .reg_rrl_ddr = &gnr_reg_rrl_ddr,
};
static const struct x86_cpu_id i10nm_cpuids[] = {
@@ -1108,7 +1189,7 @@ static int __init i10nm_init(void)
mce_register_decode_chain(&i10nm_mce_dec);
skx_setup_debug("i10nm_test");
- if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
+ if (retry_rd_err_log && res_cfg->reg_rrl_ddr) {
skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log);
if (retry_rd_err_log == 2)
enable_retry_rd_err_log(true);
@@ -1128,7 +1209,7 @@ static void __exit i10nm_exit(void)
{
edac_dbg(2, "\n");
- if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
+ if (retry_rd_err_log && res_cfg->reg_rrl_ddr) {
skx_set_decode(NULL, NULL);
if (retry_rd_err_log == 2)
enable_retry_rd_err_log(false);
diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
index 204834149579..a53612be4b2f 100644
--- a/drivers/edac/ie31200_edac.c
+++ b/drivers/edac/ie31200_edac.c
@@ -52,6 +52,7 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/mce.h>
+#include <asm/msr.h>
#include "edac_module.h"
#define EDAC_MOD_STR "ie31200_edac"
@@ -89,6 +90,10 @@
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630
+#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4 0xa700
+
+/* Alder Lake-S */
+#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1 0x4660
#define IE31200_RANKS_PER_CHANNEL 8
#define IE31200_DIMMS_PER_CHANNEL 2
@@ -734,6 +739,8 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg},
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg},
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1), (kernel_ulong_t)&rpl_s_cfg},
{ 0, } /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl);
diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index 5807517ee32d..1930dc00c791 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -127,6 +127,7 @@
static const struct res_config {
bool machine_check;
+ /* The number of present memory controllers. */
int num_imc;
u32 imc_base;
u32 cmf_base;
@@ -240,6 +241,12 @@ static struct work_struct ecclog_work;
#define DID_ADL_N_SKU11 0x467c
#define DID_ADL_N_SKU12 0x4632
+/* Compute die IDs for Arizona Beach with IBECC */
+#define DID_AZB_SKU1 0x4676
+
+/* Compute did IDs for Amston Lake with IBECC */
+#define DID_ASL_SKU1 0x464a
+
/* Compute die IDs for Raptor Lake-P with IBECC */
#define DID_RPL_P_SKU1 0xa706
#define DID_RPL_P_SKU2 0xa707
@@ -595,6 +602,8 @@ static const struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg },
@@ -1201,23 +1210,21 @@ static void igen6_check(struct mem_ctl_info *mci)
irq_work_queue(&ecclog_irq_work);
}
-static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
+/* Check whether the memory controller is absent. */
+static bool igen6_imc_absent(void __iomem *window)
+{
+ return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0;
+}
+
+static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev)
{
struct edac_mc_layer layers[2];
struct mem_ctl_info *mci;
struct igen6_imc *imc;
- void __iomem *window;
int rc;
edac_dbg(2, "\n");
- mchbar += mc * MCHBAR_SIZE;
- window = ioremap(mchbar, MCHBAR_SIZE);
- if (!window) {
- igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
- return -ENODEV;
- }
-
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = NUM_CHANNELS;
layers[0].is_virt_csrow = false;
@@ -1283,7 +1290,6 @@ fail3:
fail2:
edac_mc_free(mci);
fail:
- iounmap(window);
return rc;
}
@@ -1309,6 +1315,56 @@ static void igen6_unregister_mcis(void)
}
}
+static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar)
+{
+ void __iomem *window;
+ int lmc, pmc, rc;
+ u64 base;
+
+ for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) {
+ base = mchbar + pmc * MCHBAR_SIZE;
+ window = ioremap(base, MCHBAR_SIZE);
+ if (!window) {
+ igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc);
+ rc = -ENOMEM;
+ goto out_unregister_mcis;
+ }
+
+ if (igen6_imc_absent(window)) {
+ iounmap(window);
+ edac_dbg(2, "Skip absent mc%d\n", pmc);
+ continue;
+ }
+
+ rc = igen6_register_mci(lmc, window, pdev);
+ if (rc)
+ goto out_iounmap;
+
+ /* Done, if all present MCs are detected and registered. */
+ if (++lmc >= res_cfg->num_imc)
+ break;
+ }
+
+ if (!lmc) {
+ igen6_printk(KERN_ERR, "No mc found.\n");
+ return -ENODEV;
+ }
+
+ if (lmc < res_cfg->num_imc)
+ igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.",
+ res_cfg->num_imc, lmc);
+
+ return 0;
+
+out_iounmap:
+ iounmap(window);
+
+out_unregister_mcis:
+ igen6_unregister_mcis();
+
+ return rc;
+}
+
static int igen6_mem_slice_setup(u64 mchbar)
{
struct igen6_imc *imc = &igen6_pvt->imc[0];
@@ -1405,7 +1461,7 @@ static void opstate_set(const struct res_config *cfg, const struct pci_device_id
static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
u64 mchbar;
- int i, rc;
+ int rc;
edac_dbg(2, "\n");
@@ -1421,11 +1477,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
opstate_set(res_cfg, ent);
- for (i = 0; i < res_cfg->num_imc; i++) {
- rc = igen6_register_mci(i, mchbar, pdev);
- if (rc)
- goto fail2;
- }
+ rc = igen6_register_mcis(pdev, mchbar);
+ if (rc)
+ goto fail;
if (res_cfg->num_imc > 1) {
rc = igen6_mem_slice_setup(mchbar);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 50d74d3bf0f5..af3c12284a1e 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include <asm/cpu.h>
+#include <asm/msr.h>
#include "mce_amd.h"
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index fa5b442b1844..c9ade45c1a99 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -116,6 +116,7 @@ EXPORT_SYMBOL_GPL(skx_adxl_get);
void skx_adxl_put(void)
{
+ adxl_component_count = 0;
kfree(adxl_values);
kfree(adxl_msg);
}
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index ca5408803f87..ec4966f7ea40 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -79,6 +79,47 @@
*/
#define MCACOD_EXT_MEM_ERR 0x280
+/* Max RRL register sets per {,sub-,pseudo-}channel. */
+#define NUM_RRL_SET 4
+/* Max RRL registers per set. */
+#define NUM_RRL_REG 6
+/* Max correctable error count registers. */
+#define NUM_CECNT_REG 8
+
+/* Modes of RRL register set. */
+enum rrl_mode {
+ /* Last read error from patrol scrub. */
+ LRE_SCRUB,
+ /* Last read error from demand. */
+ LRE_DEMAND,
+ /* First read error from patrol scrub. */
+ FRE_SCRUB,
+ /* First read error from demand. */
+ FRE_DEMAND,
+};
+
+/* RRL registers per {,sub-,pseudo-}channel. */
+struct reg_rrl {
+ /* RRL register parts. */
+ int set_num, reg_num;
+ enum rrl_mode modes[NUM_RRL_SET];
+ u32 offsets[NUM_RRL_SET][NUM_RRL_REG];
+ /* RRL register widths in byte per set. */
+ u8 widths[NUM_RRL_REG];
+ /* RRL control bits of the first register per set. */
+ u32 v_mask;
+ u32 uc_mask;
+ u32 over_mask;
+ u32 en_patspr_mask;
+ u32 noover_mask;
+ u32 en_mask;
+
+ /* CORRERRCNT register parts. */
+ int cecnt_num;
+ u32 cecnt_offsets[NUM_CECNT_REG];
+ u8 cecnt_widths[NUM_CECNT_REG];
+};
+
/*
* Each cpu socket contains some pci devices that provide global
* information, and also some that are local to each of the two
@@ -117,9 +158,11 @@ struct skx_dev {
struct skx_channel {
struct pci_dev *cdev;
struct pci_dev *edev;
- u32 retry_rd_err_log_s;
- u32 retry_rd_err_log_d;
- u32 retry_rd_err_log_d2;
+ /*
+ * Two groups of RRL control registers per channel to save default RRL
+ * settings of two {sub-,pseudo-}channels in Linux RRL control mode.
+ */
+ u32 rrl_ctl[2][NUM_RRL_SET];
struct skx_dimm {
u8 close_pg;
u8 bank_xor_enable;
@@ -232,14 +275,10 @@ struct res_config {
/* HBM mdev device BDF */
struct pci_bdf hbm_mdev_bdf;
int sad_all_offset;
- /* Offsets of retry_rd_err_log registers */
- u32 *offsets_scrub;
- u32 *offsets_scrub_hbm0;
- u32 *offsets_scrub_hbm1;
- u32 *offsets_demand;
- u32 *offsets_demand2;
- u32 *offsets_demand_hbm0;
- u32 *offsets_demand_hbm1;
+ /* RRL register sets per DDR channel */
+ struct reg_rrl *reg_rrl_ddr;
+ /* RRL register sets per HBM channel */
+ struct reg_rrl *reg_rrl_hbm[2];
};
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index b0f9ef6ac6df..18cacb9edbbc 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -431,7 +431,7 @@ int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
fw_send_request(card, &t, tcode, destination_id, generation, speed,
offset, payload, length, transaction_callback, &d);
wait_for_completion(&d.done);
- destroy_timer_on_stack(&t.split_timeout_timer);
+ timer_destroy_on_stack(&t.split_timeout_timer);
return d.rcode;
}
diff --git a/drivers/firmware/cirrus/test/cs_dsp_mock_bin.c b/drivers/firmware/cirrus/test/cs_dsp_mock_bin.c
index 49d84f7e59e6..3f8777ee4dc0 100644
--- a/drivers/firmware/cirrus/test/cs_dsp_mock_bin.c
+++ b/drivers/firmware/cirrus/test/cs_dsp_mock_bin.c
@@ -96,10 +96,11 @@ static void cs_dsp_mock_bin_add_name_or_info(struct cs_dsp_mock_bin_builder *bui
if (info_len % 4) {
/* Create a padded string with length a multiple of 4 */
+ size_t copy_len = info_len;
info_len = round_up(info_len, 4);
tmp = kunit_kzalloc(builder->test_priv->test, info_len, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(builder->test_priv->test, tmp);
- memcpy(tmp, info, info_len);
+ memcpy(tmp, info, copy_len);
info = tmp;
}
@@ -176,6 +177,9 @@ struct cs_dsp_mock_bin_builder *cs_dsp_mock_bin_init(struct cs_dsp_test *priv,
struct cs_dsp_mock_bin_builder *builder;
struct wmfw_coeff_hdr *hdr;
+ KUNIT_ASSERT_LE(priv->test, format_version, 0xff);
+ KUNIT_ASSERT_LE(priv->test, fw_version, 0xffffff);
+
builder = kunit_kzalloc(priv->test, sizeof(*builder), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(priv->test, builder);
builder->test_priv = priv;
diff --git a/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c b/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c
index 73412bcef50c..95946fac5563 100644
--- a/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c
+++ b/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c
@@ -505,9 +505,11 @@ void cs_dsp_mock_xm_header_drop_from_regmap_cache(struct cs_dsp_test *priv)
* Could be one 32-bit register or two 16-bit registers.
* A raw read will read the requested number of bytes.
*/
- regmap_raw_read(priv->dsp->regmap,
- xm + (offsetof(struct wmfw_adsp2_id_hdr, n_algs) / 2),
- &num_algs_be32, sizeof(num_algs_be32));
+ KUNIT_ASSERT_GE(priv->test, 0,
+ regmap_raw_read(priv->dsp->regmap,
+ xm +
+ (offsetof(struct wmfw_adsp2_id_hdr, n_algs) / 2),
+ &num_algs_be32, sizeof(num_algs_be32)));
num_algs = be32_to_cpu(num_algs_be32);
bytes = sizeof(struct wmfw_adsp2_id_hdr) +
(num_algs * sizeof(struct wmfw_adsp2_alg_hdr)) +
@@ -516,9 +518,10 @@ void cs_dsp_mock_xm_header_drop_from_regmap_cache(struct cs_dsp_test *priv)
regcache_drop_region(priv->dsp->regmap, xm, xm + (bytes / 2) - 1);
break;
case WMFW_HALO:
- regmap_read(priv->dsp->regmap,
- xm + offsetof(struct wmfw_halo_id_hdr, n_algs),
- &num_algs);
+ KUNIT_ASSERT_GE(priv->test, 0,
+ regmap_read(priv->dsp->regmap,
+ xm + offsetof(struct wmfw_halo_id_hdr, n_algs),
+ &num_algs));
bytes = sizeof(struct wmfw_halo_id_hdr) +
(num_algs * sizeof(struct wmfw_halo_alg_hdr)) +
4 /* terminator word */;
diff --git a/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c b/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c
index 5a3ac03ac37f..934d40a4d709 100644
--- a/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c
+++ b/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c
@@ -178,6 +178,8 @@ void cs_dsp_mock_wmfw_start_alg_info_block(struct cs_dsp_mock_wmfw_builder *buil
size_t bytes_needed, name_len, description_len;
int offset;
+ KUNIT_ASSERT_LE(builder->test_priv->test, alg_id, 0xffffff);
+
/* Bytes needed for region header */
bytes_needed = offsetof(struct wmfw_region, data);
@@ -435,6 +437,8 @@ struct cs_dsp_mock_wmfw_builder *cs_dsp_mock_wmfw_init(struct cs_dsp_test *priv,
{
struct cs_dsp_mock_wmfw_builder *builder;
+ KUNIT_ASSERT_LE(priv->test, format_version, 0xff);
+
/* If format version isn't given use the default for the target core */
if (format_version < 0) {
switch (priv->dsp->type) {
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index d23a1b9fed75..2f173391b63d 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -85,7 +85,6 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o \
lib-$(CONFIG_ARM) += arm32-stub.o
lib-$(CONFIG_ARM64) += kaslr.o arm64.o arm64-stub.o smbios.o
lib-$(CONFIG_X86) += x86-stub.o smbios.o
-lib-$(CONFIG_EFI_MIXED) += x86-mixed.o
lib-$(CONFIG_X86_64) += x86-5lvl.o
lib-$(CONFIG_RISCV) += kaslr.o riscv.o riscv-stub.o
lib-$(CONFIG_LOONGARCH) += loongarch.o loongarch-stub.o
diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c
index 77359e802181..f1c5fb45d5f7 100644
--- a/drivers/firmware/efi/libstub/x86-5lvl.c
+++ b/drivers/firmware/efi/libstub/x86-5lvl.c
@@ -62,7 +62,7 @@ efi_status_t efi_setup_5level_paging(void)
void efi_5level_switch(void)
{
- bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
+ bool want_la57 = !efi_no5lvl;
bool have_la57 = native_read_cr4() & X86_CR4_LA57;
bool need_toggle = want_la57 ^ have_la57;
u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
diff --git a/drivers/firmware/psci/psci_checker.c b/drivers/firmware/psci/psci_checker.c
index b662b7e28b80..df02a4ec3398 100644
--- a/drivers/firmware/psci/psci_checker.c
+++ b/drivers/firmware/psci/psci_checker.c
@@ -343,7 +343,7 @@ static int suspend_test_thread(void *arg)
* later.
*/
timer_delete(&wakeup_timer);
- destroy_timer_on_stack(&wakeup_timer);
+ timer_destroy_on_stack(&wakeup_timer);
if (atomic_dec_return_relaxed(&nb_active_threads) == 0)
complete(&suspend_threads_done);
diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c
index a85b2dbdd9f0..15e991b99f5a 100644
--- a/drivers/firmware/samsung/exynos-acpm.c
+++ b/drivers/firmware/samsung/exynos-acpm.c
@@ -185,6 +185,29 @@ struct acpm_match_data {
#define handle_to_acpm_info(h) container_of(h, struct acpm_info, handle)
/**
+ * acpm_get_saved_rx() - get the response if it was already saved.
+ * @achan: ACPM channel info.
+ * @xfer: reference to the transfer to get response for.
+ * @tx_seqnum: xfer TX sequence number.
+ */
+static void acpm_get_saved_rx(struct acpm_chan *achan,
+ const struct acpm_xfer *xfer, u32 tx_seqnum)
+{
+ const struct acpm_rx_data *rx_data = &achan->rx_data[tx_seqnum - 1];
+ u32 rx_seqnum;
+
+ if (!rx_data->response)
+ return;
+
+ rx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, rx_data->cmd[0]);
+
+ if (rx_seqnum == tx_seqnum) {
+ memcpy(xfer->rxd, rx_data->cmd, xfer->rxlen);
+ clear_bit(rx_seqnum - 1, achan->bitmap_seqnum);
+ }
+}
+
+/**
* acpm_get_rx() - get response from RX queue.
* @achan: ACPM channel info.
* @xfer: reference to the transfer to get response for.
@@ -204,15 +227,16 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer)
rx_front = readl(achan->rx.front);
i = readl(achan->rx.rear);
- /* Bail out if RX is empty. */
- if (i == rx_front)
+ tx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, xfer->txd[0]);
+
+ if (i == rx_front) {
+ acpm_get_saved_rx(achan, xfer, tx_seqnum);
return 0;
+ }
base = achan->rx.base;
mlen = achan->mlen;
- tx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, xfer->txd[0]);
-
/* Drain RX queue. */
do {
/* Read RX seqnum. */
@@ -259,16 +283,8 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer)
* If the response was not in this iteration of the queue, check if the
* RX data was previously saved.
*/
- rx_data = &achan->rx_data[tx_seqnum - 1];
- if (!rx_set && rx_data->response) {
- rx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM,
- rx_data->cmd[0]);
-
- if (rx_seqnum == tx_seqnum) {
- memcpy(xfer->rxd, rx_data->cmd, xfer->rxlen);
- clear_bit(rx_seqnum - 1, achan->bitmap_seqnum);
- }
- }
+ if (!rx_set)
+ acpm_get_saved_rx(achan, xfer, tx_seqnum);
return 0;
}
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index f2c39bbff83a..44f922e10db2 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -201,6 +201,7 @@ config GPIO_RASPBERRYPI_EXP
config GPIO_BCM_KONA
bool "Broadcom Kona GPIO"
depends on ARCH_BCM_MOBILE || COMPILE_TEST
+ select GPIOLIB_IRQCHIP
help
Turn on GPIO support for Broadcom "Kona" chips.
@@ -213,6 +214,18 @@ config GPIO_BCM_XGS_IPROC
help
Say yes here to enable GPIO support for Broadcom XGS iProc SoCs.
+config GPIO_BLZP1600
+ tristate "Blaize BLZP1600 GPIO support"
+ default y if ARCH_BLAIZE
+ depends on ARCH_BLAIZE || COMPILE_TEST
+ depends on OF_GPIO
+ select GPIO_GENERIC
+ select GPIOLIB_IRQCHIP
+ help
+ Say Y or M here to add support for the Blaize BLZP1600 GPIO device.
+ The controller is based on the Verisilicon Microelectronics GPIO APB v0.2
+ IP block.
+
config GPIO_BRCMSTB
tristate "BRCMSTB GPIO support"
default y if (ARCH_BRCMSTB || BMIPS_GENERIC)
@@ -241,6 +254,7 @@ config GPIO_DAVINCI
tristate "TI Davinci/Keystone GPIO support"
default y if ARCH_DAVINCI
depends on ((ARM || ARM64) && (ARCH_DAVINCI || ARCH_KEYSTONE || ARCH_K3)) || COMPILE_TEST
+ select GPIOLIB_IRQCHIP
help
Say yes here to enable GPIO support for TI Davinci/Keystone SoCs.
@@ -340,7 +354,7 @@ config GPIO_GRGPIO
tristate "Aeroflex Gaisler GRGPIO support"
depends on OF || COMPILE_TEST
select GPIO_GENERIC
- select IRQ_DOMAIN
+ select GPIOLIB_IRQCHIP
help
Select this to support Aeroflex Gaisler GRGPIO cores from the GRLIB
VHDL IP core library.
@@ -368,8 +382,7 @@ config GPIO_HLWD
config GPIO_ICH
tristate "Intel ICH GPIO"
- depends on X86
- depends on LPC_ICH
+ depends on (X86 && LPC_ICH) || (COMPILE_TEST && HAS_IOPORT)
help
Say yes here to support the GPIO functionality of a number of Intel
ICH-based chipsets. Currently supported devices: ICH6, ICH7, ICH8
@@ -425,6 +438,7 @@ config GPIO_LPC18XX
default y if ARCH_LPC18XX
depends on OF_GPIO && (ARCH_LPC18XX || COMPILE_TEST)
select IRQ_DOMAIN_HIERARCHY
+ select GPIOLIB_IRQCHIP
help
Select this option to enable GPIO driver for
NXP LPC18XX/43XX devices.
@@ -468,7 +482,7 @@ config GPIO_MPC8XXX
FSL_SOC_BOOKE || PPC_86xx || ARCH_LAYERSCAPE || ARM || \
COMPILE_TEST
select GPIO_GENERIC
- select IRQ_DOMAIN
+ select GPIOLIB_IRQCHIP
help
Say Y here if you're going to use hardware that connects to the
MPC512x/831x/834x/837x/8572/8610/QorIQ GPIOs.
@@ -540,7 +554,7 @@ config GPIO_OMAP
config GPIO_PL061
tristate "PrimeCell PL061 GPIO support"
- depends on ARM_AMBA
+ depends on ARM_AMBA || COMPILE_TEST
select IRQ_DOMAIN
select GPIOLIB_IRQCHIP
help
@@ -555,6 +569,7 @@ config GPIO_POLARFIRE_SOC
config GPIO_PXA
bool "PXA GPIO support"
depends on ARCH_PXA || ARCH_MMP || COMPILE_TEST
+ select GPIOLIB_IRQCHIP
help
Say yes here to support the PXA GPIO device.
@@ -604,7 +619,7 @@ config GPIO_ROCKCHIP
config GPIO_RTD
tristate "Realtek DHC GPIO support"
- depends on ARCH_REALTEK
+ depends on ARCH_REALTEK || COMPILE_TEST
default y
select GPIOLIB_IRQCHIP
help
@@ -656,6 +671,15 @@ config GPIO_SNPS_CREG
where only several fields in register belong to GPIO lines and
each GPIO line owns a field with different length and on/off value.
+config GPIO_SPACEMIT_K1
+ tristate "SPACEMIT K1 GPIO support"
+ depends on ARCH_SPACEMIT || COMPILE_TEST
+ depends on OF_GPIO
+ select GPIO_GENERIC
+ select GPIOLIB_IRQCHIP
+ help
+ Say yes here to support the SpacemiT's K1 GPIO device.
+
config GPIO_SPEAR_SPICS
bool "ST SPEAr13xx SPI Chip Select as GPIO support"
depends on PLAT_SPEAR
@@ -753,7 +777,7 @@ config GPIO_UNIPHIER
Say yes here to support UniPhier GPIOs.
config GPIO_VF610
- bool "VF610 GPIO support"
+ tristate "VF610 GPIO support"
default y if SOC_VF610
depends on ARCH_MXC || COMPILE_TEST
select GPIOLIB_IRQCHIP
@@ -830,14 +854,14 @@ config GPIO_ZEVIO
config GPIO_ZYNQ
tristate "Xilinx Zynq GPIO support"
- depends on ARCH_ZYNQ || ARCH_ZYNQMP
+ depends on ARCH_ZYNQ || ARCH_ZYNQMP || COMPILE_TEST
select GPIOLIB_IRQCHIP
help
Say yes here to support Xilinx Zynq GPIO controller.
config GPIO_ZYNQMP_MODEPIN
tristate "ZynqMP ps-mode pin GPIO configuration driver"
- depends on ZYNQMP_FIRMWARE
+ depends on ZYNQMP_FIRMWARE || COMPILE_TEST
default ZYNQMP_FIRMWARE
help
Say yes here to support the ZynqMP ps-mode pin GPIO configuration
@@ -866,7 +890,7 @@ config GPIO_AMD_FCH
config GPIO_MSC313
bool "MStar MSC313 GPIO support"
- depends on ARCH_MSTARV7
+ depends on ARCH_MSTARV7 || COMPILE_TEST
default ARCH_MSTARV7
select GPIOLIB_IRQCHIP
select IRQ_DOMAIN_HIERARCHY
@@ -1365,7 +1389,7 @@ config GPIO_DLN2
config HTC_EGPIO
bool "HTC EGPIO support"
- depends on ARM
+ depends on ARM || COMPILE_TEST
help
This driver supports the CPLD egpio chip present on
several HTC phones. It provides basic support for input
@@ -1463,6 +1487,19 @@ config GPIO_MAX77650
GPIO driver for MAX77650/77651 PMIC from Maxim Semiconductor.
These chips have a single pin that can be configured as GPIO.
+config GPIO_MAX77759
+ tristate "Maxim Integrated MAX77759 GPIO support"
+ depends on MFD_MAX77759
+ default MFD_MAX77759
+ select GPIOLIB_IRQCHIP
+ help
+ GPIO driver for MAX77759 PMIC from Maxim Integrated.
+ There are two GPIOs available on these chips in total, both of
+ which can also generate interrupts.
+
+ This driver can also be built as a module. If so, the module will be
+ called gpio-max77759.
+
config GPIO_PALMAS
bool "TI PALMAS series PMICs GPIO"
depends on MFD_PALMAS
@@ -1520,12 +1557,13 @@ config GPIO_TC3589X
config GPIO_TIMBERDALE
bool "Support for timberdale GPIO IP"
depends on MFD_TIMBERDALE
+ select GPIOLIB_IRQCHIP
help
Add support for the GPIO IP in the timberdale FPGA.
config GPIO_TN48M_CPLD
tristate "Delta Networks TN48M switch CPLD GPIO driver"
- depends on MFD_TN48M_CPLD
+ depends on MFD_TN48M_CPLD || COMPILE_TEST
select GPIO_REGMAP
help
This enables support for the GPIOs found on the Delta
@@ -1869,6 +1907,8 @@ menu "Virtual GPIO drivers"
config GPIO_AGGREGATOR
tristate "GPIO Aggregator"
+ select CONFIGFS_FS
+ select DEV_SYNC_PROBE
help
Say yes here to enable the GPIO Aggregator, which provides a way to
aggregate existing GPIO lines into a new virtual GPIO chip.
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index af130882ffee..88dedd298256 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_OF_GPIO) += gpiolib-of.o
obj-$(CONFIG_GPIO_CDEV) += gpiolib-cdev.o
obj-$(CONFIG_GPIO_SYSFS) += gpiolib-sysfs.o
obj-$(CONFIG_GPIO_ACPI) += gpiolib-acpi.o
+gpiolib-acpi-y := gpiolib-acpi-core.o gpiolib-acpi-quirks.o
obj-$(CONFIG_GPIOLIB) += gpiolib-swnode.o
# Device drivers. Generally keep list sorted alphabetically
@@ -45,6 +46,7 @@ obj-$(CONFIG_GPIO_BCM_XGS_IPROC) += gpio-xgs-iproc.o
obj-$(CONFIG_GPIO_BD71815) += gpio-bd71815.o
obj-$(CONFIG_GPIO_BD71828) += gpio-bd71828.o
obj-$(CONFIG_GPIO_BD9571MWV) += gpio-bd9571mwv.o
+obj-$(CONFIG_GPIO_BLZP1600) += gpio-blzp1600.o
obj-$(CONFIG_GPIO_BRCMSTB) += gpio-brcmstb.o
obj-$(CONFIG_GPIO_BT8XX) += gpio-bt8xx.o
obj-$(CONFIG_GPIO_CADENCE) += gpio-cadence.o
@@ -105,6 +107,7 @@ obj-$(CONFIG_GPIO_MAX730X) += gpio-max730x.o
obj-$(CONFIG_GPIO_MAX732X) += gpio-max732x.o
obj-$(CONFIG_GPIO_MAX77620) += gpio-max77620.o
obj-$(CONFIG_GPIO_MAX77650) += gpio-max77650.o
+obj-$(CONFIG_GPIO_MAX77759) += gpio-max77759.o
obj-$(CONFIG_GPIO_MB86S7X) += gpio-mb86s7x.o
obj-$(CONFIG_GPIO_MC33880) += gpio-mc33880.o
obj-$(CONFIG_GPIO_MENZ127) += gpio-menz127.o
@@ -159,6 +162,7 @@ obj-$(CONFIG_GPIO_SIOX) += gpio-siox.o
obj-$(CONFIG_GPIO_SL28CPLD) += gpio-sl28cpld.o
obj-$(CONFIG_GPIO_SLOPPY_LOGIC_ANALYZER) += gpio-sloppy-logic-analyzer.o
obj-$(CONFIG_GPIO_SODAVILLE) += gpio-sodaville.o
+obj-$(CONFIG_GPIO_SPACEMIT_K1) += gpio-spacemit-k1.o
obj-$(CONFIG_GPIO_SPEAR_SPICS) += gpio-spear-spics.o
obj-$(CONFIG_GPIO_SPRD) += gpio-sprd.o
obj-$(CONFIG_GPIO_STMPE) += gpio-stmpe.o
diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO
index 4b70cbaa1caa..4a8b349f2483 100644
--- a/drivers/gpio/TODO
+++ b/drivers/gpio/TODO
@@ -44,6 +44,13 @@ Work items:
to a machine description such as device tree, ACPI or fwnode that
implicitly does not use global GPIO numbers.
+- Fix drivers to not read back struct gpio_chip::base. Some drivers do
+ that and would be broken by attempts to poison it or make it dynamic.
+ Example in AT91 pinctrl driver:
+ https://lore.kernel.org/all/1d00c056-3d61-4c22-bedd-3bae0bf1ddc4@pengutronix.de/
+ This particular driver is also DT-only, so with the above fixed, the
+ base can be made dynamic (set to -1) if CONFIG_GPIO_SYSFS is disabled.
+
- When this work is complete (will require some of the items in the
following ongoing work as well) we can delete the old global
numberspace accessors from <linux/gpio.h> and eventually delete
diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c
index d232ea865356..6f941db02c04 100644
--- a/drivers/gpio/gpio-aggregator.c
+++ b/drivers/gpio/gpio-aggregator.c
@@ -9,10 +9,13 @@
#include <linux/bitmap.h>
#include <linux/bitops.h>
+#include <linux/configfs.h>
#include <linux/ctype.h>
#include <linux/delay.h>
#include <linux/idr.h>
#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/mutex.h>
@@ -27,226 +30,200 @@
#include <linux/gpio/driver.h>
#include <linux/gpio/machine.h>
+#include "dev-sync-probe.h"
+
#define AGGREGATOR_MAX_GPIOS 512
+#define AGGREGATOR_LEGACY_PREFIX "_sysfs"
/*
* GPIO Aggregator sysfs interface
*/
struct gpio_aggregator {
+ struct dev_sync_probe_data probe_data;
+ struct config_group group;
struct gpiod_lookup_table *lookups;
- struct platform_device *pdev;
+ struct mutex lock;
+ int id;
+
+ /* List of gpio_aggregator_line. Always added in order */
+ struct list_head list_head;
+
+ /* used by legacy sysfs interface only */
+ bool init_via_sysfs;
char args[];
};
+struct gpio_aggregator_line {
+ struct config_group group;
+ struct gpio_aggregator *parent;
+ struct list_head entry;
+
+ /* Line index within the aggregator device */
+ unsigned int idx;
+
+ /* Custom name for the virtual line */
+ const char *name;
+ /* GPIO chip label or line name */
+ const char *key;
+ /* Can be negative to indicate lookup by line name */
+ int offset;
+
+ enum gpio_lookup_flags flags;
+};
+
+struct gpio_aggregator_pdev_meta {
+ bool init_via_sysfs;
+};
+
static DEFINE_MUTEX(gpio_aggregator_lock); /* protects idr */
static DEFINE_IDR(gpio_aggregator_idr);
-static int aggr_add_gpio(struct gpio_aggregator *aggr, const char *key,
- int hwnum, unsigned int *n)
+static int gpio_aggregator_alloc(struct gpio_aggregator **aggr, size_t arg_size)
{
- struct gpiod_lookup_table *lookups;
+ int ret;
- lookups = krealloc(aggr->lookups, struct_size(lookups, table, *n + 2),
- GFP_KERNEL);
- if (!lookups)
+ struct gpio_aggregator *new __free(kfree) = kzalloc(
+ sizeof(*new) + arg_size, GFP_KERNEL);
+ if (!new)
return -ENOMEM;
- lookups->table[*n] = GPIO_LOOKUP_IDX(key, hwnum, NULL, *n, 0);
+ scoped_guard(mutex, &gpio_aggregator_lock)
+ ret = idr_alloc(&gpio_aggregator_idr, new, 0, 0, GFP_KERNEL);
- (*n)++;
- memset(&lookups->table[*n], 0, sizeof(lookups->table[*n]));
+ if (ret < 0)
+ return ret;
- aggr->lookups = lookups;
+ new->id = ret;
+ INIT_LIST_HEAD(&new->list_head);
+ mutex_init(&new->lock);
+ *aggr = no_free_ptr(new);
return 0;
}
-static int aggr_parse(struct gpio_aggregator *aggr)
+static void gpio_aggregator_free(struct gpio_aggregator *aggr)
{
- char *args = skip_spaces(aggr->args);
- char *name, *offsets, *p;
- unsigned int i, n = 0;
- int error = 0;
-
- unsigned long *bitmap __free(bitmap) =
- bitmap_alloc(AGGREGATOR_MAX_GPIOS, GFP_KERNEL);
- if (!bitmap)
- return -ENOMEM;
-
- args = next_arg(args, &name, &p);
- while (*args) {
- args = next_arg(args, &offsets, &p);
-
- p = get_options(offsets, 0, &error);
- if (error == 0 || *p) {
- /* Named GPIO line */
- error = aggr_add_gpio(aggr, name, U16_MAX, &n);
- if (error)
- return error;
+ scoped_guard(mutex, &gpio_aggregator_lock)
+ idr_remove(&gpio_aggregator_idr, aggr->id);
- name = offsets;
- continue;
- }
+ mutex_destroy(&aggr->lock);
+ kfree(aggr);
+}
- /* GPIO chip + offset(s) */
- error = bitmap_parselist(offsets, bitmap, AGGREGATOR_MAX_GPIOS);
- if (error) {
- pr_err("Cannot parse %s: %d\n", offsets, error);
- return error;
- }
+static int gpio_aggregator_add_gpio(struct gpio_aggregator *aggr,
+ const char *key, int hwnum, unsigned int *n)
+{
+ struct gpiod_lookup_table *lookups;
- for_each_set_bit(i, bitmap, AGGREGATOR_MAX_GPIOS) {
- error = aggr_add_gpio(aggr, name, i, &n);
- if (error)
- return error;
- }
+ lookups = krealloc(aggr->lookups, struct_size(lookups, table, *n + 2),
+ GFP_KERNEL);
+ if (!lookups)
+ return -ENOMEM;
- args = next_arg(args, &name, &p);
- }
+ lookups->table[*n] = GPIO_LOOKUP_IDX(key, hwnum, NULL, *n, 0);
- if (!n) {
- pr_err("No GPIOs specified\n");
- return -EINVAL;
- }
+ (*n)++;
+ memset(&lookups->table[*n], 0, sizeof(lookups->table[*n]));
+ aggr->lookups = lookups;
return 0;
}
-static ssize_t new_device_store(struct device_driver *driver, const char *buf,
- size_t count)
+static bool gpio_aggregator_is_active(struct gpio_aggregator *aggr)
{
- struct gpio_aggregator *aggr;
- struct platform_device *pdev;
- int res, id;
+ lockdep_assert_held(&aggr->lock);
- if (!try_module_get(THIS_MODULE))
- return -ENOENT;
-
- /* kernfs guarantees string termination, so count + 1 is safe */
- aggr = kzalloc(sizeof(*aggr) + count + 1, GFP_KERNEL);
- if (!aggr) {
- res = -ENOMEM;
- goto put_module;
- }
-
- memcpy(aggr->args, buf, count + 1);
+ return aggr->probe_data.pdev && platform_get_drvdata(aggr->probe_data.pdev);
+}
- aggr->lookups = kzalloc(struct_size(aggr->lookups, table, 1),
- GFP_KERNEL);
- if (!aggr->lookups) {
- res = -ENOMEM;
- goto free_ga;
- }
+/* Only aggregators created via legacy sysfs can be "activating". */
+static bool gpio_aggregator_is_activating(struct gpio_aggregator *aggr)
+{
+ lockdep_assert_held(&aggr->lock);
- mutex_lock(&gpio_aggregator_lock);
- id = idr_alloc(&gpio_aggregator_idr, aggr, 0, 0, GFP_KERNEL);
- mutex_unlock(&gpio_aggregator_lock);
+ return aggr->probe_data.pdev && !platform_get_drvdata(aggr->probe_data.pdev);
+}
- if (id < 0) {
- res = id;
- goto free_table;
- }
+static size_t gpio_aggregator_count_lines(struct gpio_aggregator *aggr)
+{
+ lockdep_assert_held(&aggr->lock);
- aggr->lookups->dev_id = kasprintf(GFP_KERNEL, "%s.%d", DRV_NAME, id);
- if (!aggr->lookups->dev_id) {
- res = -ENOMEM;
- goto remove_idr;
- }
+ return list_count_nodes(&aggr->list_head);
+}
- res = aggr_parse(aggr);
- if (res)
- goto free_dev_id;
+static struct gpio_aggregator_line *
+gpio_aggregator_line_alloc(struct gpio_aggregator *parent, unsigned int idx,
+ char *key, int offset)
+{
+ struct gpio_aggregator_line *line;
- gpiod_add_lookup_table(aggr->lookups);
+ line = kzalloc(sizeof(*line), GFP_KERNEL);
+ if (!line)
+ return ERR_PTR(-ENOMEM);
- pdev = platform_device_register_simple(DRV_NAME, id, NULL, 0);
- if (IS_ERR(pdev)) {
- res = PTR_ERR(pdev);
- goto remove_table;
+ if (key) {
+ line->key = kstrdup(key, GFP_KERNEL);
+ if (!line->key) {
+ kfree(line);
+ return ERR_PTR(-ENOMEM);
+ }
}
- aggr->pdev = pdev;
- module_put(THIS_MODULE);
- return count;
+ line->flags = GPIO_LOOKUP_FLAGS_DEFAULT;
+ line->parent = parent;
+ line->idx = idx;
+ line->offset = offset;
+ INIT_LIST_HEAD(&line->entry);
-remove_table:
- gpiod_remove_lookup_table(aggr->lookups);
-free_dev_id:
- kfree(aggr->lookups->dev_id);
-remove_idr:
- mutex_lock(&gpio_aggregator_lock);
- idr_remove(&gpio_aggregator_idr, id);
- mutex_unlock(&gpio_aggregator_lock);
-free_table:
- kfree(aggr->lookups);
-free_ga:
- kfree(aggr);
-put_module:
- module_put(THIS_MODULE);
- return res;
-}
-
-static DRIVER_ATTR_WO(new_device);
-
-static void gpio_aggregator_free(struct gpio_aggregator *aggr)
-{
- platform_device_unregister(aggr->pdev);
- gpiod_remove_lookup_table(aggr->lookups);
- kfree(aggr->lookups->dev_id);
- kfree(aggr->lookups);
- kfree(aggr);
+ return line;
}
-static ssize_t delete_device_store(struct device_driver *driver,
- const char *buf, size_t count)
+static void gpio_aggregator_line_add(struct gpio_aggregator *aggr,
+ struct gpio_aggregator_line *line)
{
- struct gpio_aggregator *aggr;
- unsigned int id;
- int error;
+ struct gpio_aggregator_line *tmp;
- if (!str_has_prefix(buf, DRV_NAME "."))
- return -EINVAL;
+ lockdep_assert_held(&aggr->lock);
- error = kstrtouint(buf + strlen(DRV_NAME "."), 10, &id);
- if (error)
- return error;
-
- if (!try_module_get(THIS_MODULE))
- return -ENOENT;
-
- mutex_lock(&gpio_aggregator_lock);
- aggr = idr_remove(&gpio_aggregator_idr, id);
- mutex_unlock(&gpio_aggregator_lock);
- if (!aggr) {
- module_put(THIS_MODULE);
- return -ENOENT;
+ list_for_each_entry(tmp, &aggr->list_head, entry) {
+ if (tmp->idx > line->idx) {
+ list_add_tail(&line->entry, &tmp->entry);
+ return;
+ }
}
-
- gpio_aggregator_free(aggr);
- module_put(THIS_MODULE);
- return count;
+ list_add_tail(&line->entry, &aggr->list_head);
}
-static DRIVER_ATTR_WO(delete_device);
-
-static struct attribute *gpio_aggregator_attrs[] = {
- &driver_attr_new_device.attr,
- &driver_attr_delete_device.attr,
- NULL
-};
-ATTRIBUTE_GROUPS(gpio_aggregator);
-static int __exit gpio_aggregator_idr_remove(int id, void *p, void *data)
+static void gpio_aggregator_line_del(struct gpio_aggregator *aggr,
+ struct gpio_aggregator_line *line)
{
- gpio_aggregator_free(p);
- return 0;
+ lockdep_assert_held(&aggr->lock);
+
+ list_del(&line->entry);
}
-static void __exit gpio_aggregator_remove_all(void)
+static void gpio_aggregator_free_lines(struct gpio_aggregator *aggr)
{
- mutex_lock(&gpio_aggregator_lock);
- idr_for_each(&gpio_aggregator_idr, gpio_aggregator_idr_remove, NULL);
- idr_destroy(&gpio_aggregator_idr);
- mutex_unlock(&gpio_aggregator_lock);
+ struct gpio_aggregator_line *line, *tmp;
+
+ list_for_each_entry_safe(line, tmp, &aggr->list_head, entry) {
+ configfs_unregister_group(&line->group);
+ /*
+ * Normally, we acquire aggr->lock within the configfs
+ * callback. However, in the legacy sysfs interface case,
+ * calling configfs_(un)register_group while holding
+ * aggr->lock could cause a deadlock. Fortunately, this is
+ * unnecessary because the new_device/delete_device path
+ * and the module unload path are mutually exclusive,
+ * thanks to an explicit try_module_get. That's why this
+ * minimal scoped_guard suffices.
+ */
+ scoped_guard(mutex, &aggr->lock)
+ gpio_aggregator_line_del(aggr, line);
+ kfree(line->key);
+ kfree(line->name);
+ kfree(line);
+ }
}
@@ -582,6 +559,728 @@ static struct gpiochip_fwd *gpiochip_fwd_create(struct device *dev,
return fwd;
}
+/*
+ * Configfs interface
+ */
+
+static struct gpio_aggregator *
+to_gpio_aggregator(struct config_item *item)
+{
+ struct config_group *group = to_config_group(item);
+
+ return container_of(group, struct gpio_aggregator, group);
+}
+
+static struct gpio_aggregator_line *
+to_gpio_aggregator_line(struct config_item *item)
+{
+ struct config_group *group = to_config_group(item);
+
+ return container_of(group, struct gpio_aggregator_line, group);
+}
+
+static struct fwnode_handle *
+gpio_aggregator_make_device_sw_node(struct gpio_aggregator *aggr)
+{
+ struct property_entry properties[2];
+ struct gpio_aggregator_line *line;
+ size_t num_lines;
+ int n = 0;
+
+ memset(properties, 0, sizeof(properties));
+
+ num_lines = gpio_aggregator_count_lines(aggr);
+ if (num_lines == 0)
+ return NULL;
+
+ const char **line_names __free(kfree) = kcalloc(
+ num_lines, sizeof(*line_names), GFP_KERNEL);
+ if (!line_names)
+ return ERR_PTR(-ENOMEM);
+
+ /* The list is always sorted as new elements are inserted in order. */
+ list_for_each_entry(line, &aggr->list_head, entry)
+ line_names[n++] = line->name ?: "";
+
+ properties[0] = PROPERTY_ENTRY_STRING_ARRAY_LEN(
+ "gpio-line-names",
+ line_names, num_lines);
+
+ return fwnode_create_software_node(properties, NULL);
+}
+
+static int gpio_aggregator_activate(struct gpio_aggregator *aggr)
+{
+ struct platform_device_info pdevinfo;
+ struct gpio_aggregator_line *line;
+ struct fwnode_handle *swnode;
+ unsigned int n = 0;
+ int ret = 0;
+
+ if (gpio_aggregator_count_lines(aggr) == 0)
+ return -EINVAL;
+
+ aggr->lookups = kzalloc(struct_size(aggr->lookups, table, 1),
+ GFP_KERNEL);
+ if (!aggr->lookups)
+ return -ENOMEM;
+
+ swnode = gpio_aggregator_make_device_sw_node(aggr);
+ if (IS_ERR(swnode)) {
+ ret = PTR_ERR(swnode);
+ goto err_remove_lookups;
+ }
+
+ memset(&pdevinfo, 0, sizeof(pdevinfo));
+ pdevinfo.name = DRV_NAME;
+ pdevinfo.id = aggr->id;
+ pdevinfo.fwnode = swnode;
+
+ /* The list is always sorted as new elements are inserted in order. */
+ list_for_each_entry(line, &aggr->list_head, entry) {
+ /*
+ * - Either GPIO chip label or line name must be configured
+ * (i.e. line->key must be non-NULL)
+ * - Line directories must be named with sequential numeric
+ * suffixes starting from 0. (i.e. ./line0, ./line1, ...)
+ */
+ if (!line->key || line->idx != n) {
+ ret = -EINVAL;
+ goto err_remove_swnode;
+ }
+
+ if (line->offset < 0)
+ ret = gpio_aggregator_add_gpio(aggr, line->key,
+ U16_MAX, &n);
+ else
+ ret = gpio_aggregator_add_gpio(aggr, line->key,
+ line->offset, &n);
+ if (ret)
+ goto err_remove_swnode;
+ }
+
+ aggr->lookups->dev_id = kasprintf(GFP_KERNEL, "%s.%d", DRV_NAME, aggr->id);
+ if (!aggr->lookups->dev_id) {
+ ret = -ENOMEM;
+ goto err_remove_swnode;
+ }
+
+ gpiod_add_lookup_table(aggr->lookups);
+
+ ret = dev_sync_probe_register(&aggr->probe_data, &pdevinfo);
+ if (ret)
+ goto err_remove_lookup_table;
+
+ return 0;
+
+err_remove_lookup_table:
+ kfree(aggr->lookups->dev_id);
+ gpiod_remove_lookup_table(aggr->lookups);
+err_remove_swnode:
+ fwnode_remove_software_node(swnode);
+err_remove_lookups:
+ kfree(aggr->lookups);
+
+ return ret;
+}
+
+static void gpio_aggregator_deactivate(struct gpio_aggregator *aggr)
+{
+ dev_sync_probe_unregister(&aggr->probe_data);
+ gpiod_remove_lookup_table(aggr->lookups);
+ kfree(aggr->lookups->dev_id);
+ kfree(aggr->lookups);
+}
+
+static void gpio_aggregator_lockup_configfs(struct gpio_aggregator *aggr,
+ bool lock)
+{
+ struct configfs_subsystem *subsys = aggr->group.cg_subsys;
+ struct gpio_aggregator_line *line;
+
+ /*
+ * The device only needs to depend on leaf lines. This is
+ * sufficient to lock up all the configfs entries that the
+ * instantiated, alive device depends on.
+ */
+ list_for_each_entry(line, &aggr->list_head, entry) {
+ if (lock)
+ configfs_depend_item_unlocked(
+ subsys, &line->group.cg_item);
+ else
+ configfs_undepend_item_unlocked(
+ &line->group.cg_item);
+ }
+}
+
+static ssize_t
+gpio_aggregator_line_key_show(struct config_item *item, char *page)
+{
+ struct gpio_aggregator_line *line = to_gpio_aggregator_line(item);
+ struct gpio_aggregator *aggr = line->parent;
+
+ guard(mutex)(&aggr->lock);
+
+ return sysfs_emit(page, "%s\n", line->key ?: "");
+}
+
+static ssize_t
+gpio_aggregator_line_key_store(struct config_item *item, const char *page,
+ size_t count)
+{
+ struct gpio_aggregator_line *line = to_gpio_aggregator_line(item);
+ struct gpio_aggregator *aggr = line->parent;
+
+ char *key __free(kfree) = kstrndup(skip_spaces(page), count,
+ GFP_KERNEL);
+ if (!key)
+ return -ENOMEM;
+
+ strim(key);
+
+ guard(mutex)(&aggr->lock);
+
+ if (gpio_aggregator_is_activating(aggr) ||
+ gpio_aggregator_is_active(aggr))
+ return -EBUSY;
+
+ kfree(line->key);
+ line->key = no_free_ptr(key);
+
+ return count;
+}
+CONFIGFS_ATTR(gpio_aggregator_line_, key);
+
+static ssize_t
+gpio_aggregator_line_name_show(struct config_item *item, char *page)
+{
+ struct gpio_aggregator_line *line = to_gpio_aggregator_line(item);
+ struct gpio_aggregator *aggr = line->parent;
+
+ guard(mutex)(&aggr->lock);
+
+ return sysfs_emit(page, "%s\n", line->name ?: "");
+}
+
+static ssize_t
+gpio_aggregator_line_name_store(struct config_item *item, const char *page,
+ size_t count)
+{
+ struct gpio_aggregator_line *line = to_gpio_aggregator_line(item);
+ struct gpio_aggregator *aggr = line->parent;
+
+ char *name __free(kfree) = kstrndup(skip_spaces(page), count,
+ GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ strim(name);
+
+ guard(mutex)(&aggr->lock);
+
+ if (gpio_aggregator_is_activating(aggr) ||
+ gpio_aggregator_is_active(aggr))
+ return -EBUSY;
+
+ kfree(line->name);
+ line->name = no_free_ptr(name);
+
+ return count;
+}
+CONFIGFS_ATTR(gpio_aggregator_line_, name);
+
+static ssize_t
+gpio_aggregator_line_offset_show(struct config_item *item, char *page)
+{
+ struct gpio_aggregator_line *line = to_gpio_aggregator_line(item);
+ struct gpio_aggregator *aggr = line->parent;
+
+ guard(mutex)(&aggr->lock);
+
+ return sysfs_emit(page, "%d\n", line->offset);
+}
+
+static ssize_t
+gpio_aggregator_line_offset_store(struct config_item *item, const char *page,
+ size_t count)
+{
+ struct gpio_aggregator_line *line = to_gpio_aggregator_line(item);
+ struct gpio_aggregator *aggr = line->parent;
+ int offset, ret;
+
+ ret = kstrtoint(page, 0, &offset);
+ if (ret)
+ return ret;
+
+ /*
+ * When offset == -1, 'key' represents a line name to lookup.
+ * When 0 <= offset < 65535, 'key' represents the label of the chip with
+ * the 'offset' value representing the line within that chip.
+ *
+ * GPIOLIB uses the U16_MAX value to indicate lookup by line name so
+ * the greatest offset we can accept is (U16_MAX - 1).
+ */
+ if (offset > (U16_MAX - 1) || offset < -1)
+ return -EINVAL;
+
+ guard(mutex)(&aggr->lock);
+
+ if (gpio_aggregator_is_activating(aggr) ||
+ gpio_aggregator_is_active(aggr))
+ return -EBUSY;
+
+ line->offset = offset;
+
+ return count;
+}
+CONFIGFS_ATTR(gpio_aggregator_line_, offset);
+
+static struct configfs_attribute *gpio_aggregator_line_attrs[] = {
+ &gpio_aggregator_line_attr_key,
+ &gpio_aggregator_line_attr_name,
+ &gpio_aggregator_line_attr_offset,
+ NULL
+};
+
+static ssize_t
+gpio_aggregator_device_dev_name_show(struct config_item *item, char *page)
+{
+ struct gpio_aggregator *aggr = to_gpio_aggregator(item);
+ struct platform_device *pdev;
+
+ guard(mutex)(&aggr->lock);
+
+ pdev = aggr->probe_data.pdev;
+ if (pdev)
+ return sysfs_emit(page, "%s\n", dev_name(&pdev->dev));
+
+ return sysfs_emit(page, "%s.%d\n", DRV_NAME, aggr->id);
+}
+CONFIGFS_ATTR_RO(gpio_aggregator_device_, dev_name);
+
+static ssize_t
+gpio_aggregator_device_live_show(struct config_item *item, char *page)
+{
+ struct gpio_aggregator *aggr = to_gpio_aggregator(item);
+
+ guard(mutex)(&aggr->lock);
+
+ return sysfs_emit(page, "%c\n",
+ gpio_aggregator_is_active(aggr) ? '1' : '0');
+}
+
+static ssize_t
+gpio_aggregator_device_live_store(struct config_item *item, const char *page,
+ size_t count)
+{
+ struct gpio_aggregator *aggr = to_gpio_aggregator(item);
+ int ret = 0;
+ bool live;
+
+ ret = kstrtobool(page, &live);
+ if (ret)
+ return ret;
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENOENT;
+
+ if (live && !aggr->init_via_sysfs)
+ gpio_aggregator_lockup_configfs(aggr, true);
+
+ scoped_guard(mutex, &aggr->lock) {
+ if (gpio_aggregator_is_activating(aggr) ||
+ (live == gpio_aggregator_is_active(aggr)))
+ ret = -EPERM;
+ else if (live)
+ ret = gpio_aggregator_activate(aggr);
+ else
+ gpio_aggregator_deactivate(aggr);
+ }
+
+ /*
+ * Undepend is required only if device disablement (live == 0)
+ * succeeds or if device enablement (live == 1) fails.
+ */
+ if (live == !!ret && !aggr->init_via_sysfs)
+ gpio_aggregator_lockup_configfs(aggr, false);
+
+ module_put(THIS_MODULE);
+
+ return ret ?: count;
+}
+CONFIGFS_ATTR(gpio_aggregator_device_, live);
+
+static struct configfs_attribute *gpio_aggregator_device_attrs[] = {
+ &gpio_aggregator_device_attr_dev_name,
+ &gpio_aggregator_device_attr_live,
+ NULL
+};
+
+static void
+gpio_aggregator_line_release(struct config_item *item)
+{
+ struct gpio_aggregator_line *line = to_gpio_aggregator_line(item);
+ struct gpio_aggregator *aggr = line->parent;
+
+ guard(mutex)(&aggr->lock);
+
+ gpio_aggregator_line_del(aggr, line);
+ kfree(line->key);
+ kfree(line->name);
+ kfree(line);
+}
+
+static struct configfs_item_operations gpio_aggregator_line_item_ops = {
+ .release = gpio_aggregator_line_release,
+};
+
+static const struct config_item_type gpio_aggregator_line_type = {
+ .ct_item_ops = &gpio_aggregator_line_item_ops,
+ .ct_attrs = gpio_aggregator_line_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static void gpio_aggregator_device_release(struct config_item *item)
+{
+ struct gpio_aggregator *aggr = to_gpio_aggregator(item);
+
+ /*
+ * At this point, aggr is neither active nor activating,
+ * so calling gpio_aggregator_deactivate() is always unnecessary.
+ */
+ gpio_aggregator_free(aggr);
+}
+
+static struct configfs_item_operations gpio_aggregator_device_item_ops = {
+ .release = gpio_aggregator_device_release,
+};
+
+static struct config_group *
+gpio_aggregator_device_make_group(struct config_group *group, const char *name)
+{
+ struct gpio_aggregator *aggr = to_gpio_aggregator(&group->cg_item);
+ struct gpio_aggregator_line *line;
+ unsigned int idx;
+ int ret, nchar;
+
+ ret = sscanf(name, "line%u%n", &idx, &nchar);
+ if (ret != 1 || nchar != strlen(name))
+ return ERR_PTR(-EINVAL);
+
+ if (aggr->init_via_sysfs)
+ /*
+ * Aggregators created via legacy sysfs interface are exposed as
+ * default groups, which means rmdir(2) is prohibited for them.
+ * For simplicity, and to avoid confusion, we also prohibit
+ * mkdir(2).
+ */
+ return ERR_PTR(-EPERM);
+
+ guard(mutex)(&aggr->lock);
+
+ if (gpio_aggregator_is_active(aggr))
+ return ERR_PTR(-EBUSY);
+
+ list_for_each_entry(line, &aggr->list_head, entry)
+ if (line->idx == idx)
+ return ERR_PTR(-EINVAL);
+
+ line = gpio_aggregator_line_alloc(aggr, idx, NULL, -1);
+ if (IS_ERR(line))
+ return ERR_CAST(line);
+
+ config_group_init_type_name(&line->group, name, &gpio_aggregator_line_type);
+
+ gpio_aggregator_line_add(aggr, line);
+
+ return &line->group;
+}
+
+static struct configfs_group_operations gpio_aggregator_device_group_ops = {
+ .make_group = gpio_aggregator_device_make_group,
+};
+
+static const struct config_item_type gpio_aggregator_device_type = {
+ .ct_group_ops = &gpio_aggregator_device_group_ops,
+ .ct_item_ops = &gpio_aggregator_device_item_ops,
+ .ct_attrs = gpio_aggregator_device_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_group *
+gpio_aggregator_make_group(struct config_group *group, const char *name)
+{
+ struct gpio_aggregator *aggr;
+ int ret;
+
+ /*
+ * "_sysfs" prefix is reserved for auto-generated config group
+ * for devices create via legacy sysfs interface.
+ */
+ if (strncmp(name, AGGREGATOR_LEGACY_PREFIX,
+ sizeof(AGGREGATOR_LEGACY_PREFIX) - 1) == 0)
+ return ERR_PTR(-EINVAL);
+
+ /* arg space is unneeded */
+ ret = gpio_aggregator_alloc(&aggr, 0);
+ if (ret)
+ return ERR_PTR(ret);
+
+ config_group_init_type_name(&aggr->group, name, &gpio_aggregator_device_type);
+ dev_sync_probe_init(&aggr->probe_data);
+
+ return &aggr->group;
+}
+
+static struct configfs_group_operations gpio_aggregator_group_ops = {
+ .make_group = gpio_aggregator_make_group,
+};
+
+static const struct config_item_type gpio_aggregator_type = {
+ .ct_group_ops = &gpio_aggregator_group_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem gpio_aggregator_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = DRV_NAME,
+ .ci_type = &gpio_aggregator_type,
+ },
+ },
+};
+
+/*
+ * Sysfs interface
+ */
+static int gpio_aggregator_parse(struct gpio_aggregator *aggr)
+{
+ char *args = skip_spaces(aggr->args);
+ struct gpio_aggregator_line *line;
+ char name[CONFIGFS_ITEM_NAME_LEN];
+ char *key, *offsets, *p;
+ unsigned int i, n = 0;
+ int error = 0;
+
+ unsigned long *bitmap __free(bitmap) =
+ bitmap_alloc(AGGREGATOR_MAX_GPIOS, GFP_KERNEL);
+ if (!bitmap)
+ return -ENOMEM;
+
+ args = next_arg(args, &key, &p);
+ while (*args) {
+ args = next_arg(args, &offsets, &p);
+
+ p = get_options(offsets, 0, &error);
+ if (error == 0 || *p) {
+ /* Named GPIO line */
+ scnprintf(name, sizeof(name), "line%u", n);
+ line = gpio_aggregator_line_alloc(aggr, n, key, -1);
+ if (IS_ERR(line)) {
+ error = PTR_ERR(line);
+ goto err;
+ }
+ config_group_init_type_name(&line->group, name,
+ &gpio_aggregator_line_type);
+ error = configfs_register_group(&aggr->group,
+ &line->group);
+ if (error)
+ goto err;
+ scoped_guard(mutex, &aggr->lock)
+ gpio_aggregator_line_add(aggr, line);
+
+ error = gpio_aggregator_add_gpio(aggr, key, U16_MAX, &n);
+ if (error)
+ goto err;
+
+ key = offsets;
+ continue;
+ }
+
+ /* GPIO chip + offset(s) */
+ error = bitmap_parselist(offsets, bitmap, AGGREGATOR_MAX_GPIOS);
+ if (error) {
+ pr_err("Cannot parse %s: %d\n", offsets, error);
+ goto err;
+ }
+
+ for_each_set_bit(i, bitmap, AGGREGATOR_MAX_GPIOS) {
+ scnprintf(name, sizeof(name), "line%u", n);
+ line = gpio_aggregator_line_alloc(aggr, n, key, i);
+ if (IS_ERR(line)) {
+ error = PTR_ERR(line);
+ goto err;
+ }
+ config_group_init_type_name(&line->group, name,
+ &gpio_aggregator_line_type);
+ error = configfs_register_group(&aggr->group,
+ &line->group);
+ if (error)
+ goto err;
+ scoped_guard(mutex, &aggr->lock)
+ gpio_aggregator_line_add(aggr, line);
+
+ error = gpio_aggregator_add_gpio(aggr, key, i, &n);
+ if (error)
+ goto err;
+ }
+
+ args = next_arg(args, &key, &p);
+ }
+
+ if (!n) {
+ pr_err("No GPIOs specified\n");
+ error = -EINVAL;
+ goto err;
+ }
+
+ return 0;
+
+err:
+ gpio_aggregator_free_lines(aggr);
+ return error;
+}
+
+static ssize_t gpio_aggregator_new_device_store(struct device_driver *driver,
+ const char *buf, size_t count)
+{
+ struct gpio_aggregator_pdev_meta meta = { .init_via_sysfs = true };
+ char name[CONFIGFS_ITEM_NAME_LEN];
+ struct gpio_aggregator *aggr;
+ struct platform_device *pdev;
+ int res;
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENOENT;
+
+ /* kernfs guarantees string termination, so count + 1 is safe */
+ res = gpio_aggregator_alloc(&aggr, count + 1);
+ if (res)
+ goto put_module;
+
+ memcpy(aggr->args, buf, count + 1);
+
+ aggr->init_via_sysfs = true;
+ aggr->lookups = kzalloc(struct_size(aggr->lookups, table, 1),
+ GFP_KERNEL);
+ if (!aggr->lookups) {
+ res = -ENOMEM;
+ goto free_ga;
+ }
+
+ aggr->lookups->dev_id = kasprintf(GFP_KERNEL, "%s.%d", DRV_NAME, aggr->id);
+ if (!aggr->lookups->dev_id) {
+ res = -ENOMEM;
+ goto free_table;
+ }
+
+ scnprintf(name, sizeof(name), "%s.%d", AGGREGATOR_LEGACY_PREFIX, aggr->id);
+ config_group_init_type_name(&aggr->group, name, &gpio_aggregator_device_type);
+
+ /*
+ * Since the device created by sysfs might be toggled via configfs
+ * 'live' attribute later, this initialization is needed.
+ */
+ dev_sync_probe_init(&aggr->probe_data);
+
+ /* Expose to configfs */
+ res = configfs_register_group(&gpio_aggregator_subsys.su_group,
+ &aggr->group);
+ if (res)
+ goto free_dev_id;
+
+ res = gpio_aggregator_parse(aggr);
+ if (res)
+ goto unregister_group;
+
+ gpiod_add_lookup_table(aggr->lookups);
+
+ pdev = platform_device_register_data(NULL, DRV_NAME, aggr->id, &meta, sizeof(meta));
+ if (IS_ERR(pdev)) {
+ res = PTR_ERR(pdev);
+ goto remove_table;
+ }
+
+ aggr->probe_data.pdev = pdev;
+ module_put(THIS_MODULE);
+ return count;
+
+remove_table:
+ gpiod_remove_lookup_table(aggr->lookups);
+unregister_group:
+ configfs_unregister_group(&aggr->group);
+free_dev_id:
+ kfree(aggr->lookups->dev_id);
+free_table:
+ kfree(aggr->lookups);
+free_ga:
+ gpio_aggregator_free(aggr);
+put_module:
+ module_put(THIS_MODULE);
+ return res;
+}
+
+static struct driver_attribute driver_attr_gpio_aggregator_new_device =
+ __ATTR(new_device, 0200, NULL, gpio_aggregator_new_device_store);
+
+static void gpio_aggregator_destroy(struct gpio_aggregator *aggr)
+{
+ scoped_guard(mutex, &aggr->lock) {
+ if (gpio_aggregator_is_activating(aggr) ||
+ gpio_aggregator_is_active(aggr))
+ gpio_aggregator_deactivate(aggr);
+ }
+ gpio_aggregator_free_lines(aggr);
+ configfs_unregister_group(&aggr->group);
+ kfree(aggr);
+}
+
+static ssize_t gpio_aggregator_delete_device_store(struct device_driver *driver,
+ const char *buf, size_t count)
+{
+ struct gpio_aggregator *aggr;
+ unsigned int id;
+ int error;
+
+ if (!str_has_prefix(buf, DRV_NAME "."))
+ return -EINVAL;
+
+ error = kstrtouint(buf + strlen(DRV_NAME "."), 10, &id);
+ if (error)
+ return error;
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENOENT;
+
+ mutex_lock(&gpio_aggregator_lock);
+ aggr = idr_find(&gpio_aggregator_idr, id);
+ /*
+ * For simplicity, devices created via configfs cannot be deleted
+ * via sysfs.
+ */
+ if (aggr && aggr->init_via_sysfs)
+ idr_remove(&gpio_aggregator_idr, id);
+ else {
+ mutex_unlock(&gpio_aggregator_lock);
+ module_put(THIS_MODULE);
+ return -ENOENT;
+ }
+ mutex_unlock(&gpio_aggregator_lock);
+
+ gpio_aggregator_destroy(aggr);
+ module_put(THIS_MODULE);
+ return count;
+}
+
+static struct driver_attribute driver_attr_gpio_aggregator_delete_device =
+ __ATTR(delete_device, 0200, NULL, gpio_aggregator_delete_device_store);
+
+static struct attribute *gpio_aggregator_attrs[] = {
+ &driver_attr_gpio_aggregator_new_device.attr,
+ &driver_attr_gpio_aggregator_delete_device.attr,
+ NULL
+};
+ATTRIBUTE_GROUPS(gpio_aggregator);
/*
* GPIO Aggregator platform device
@@ -589,7 +1288,9 @@ static struct gpiochip_fwd *gpiochip_fwd_create(struct device *dev,
static int gpio_aggregator_probe(struct platform_device *pdev)
{
+ struct gpio_aggregator_pdev_meta *meta;
struct device *dev = &pdev->dev;
+ bool init_via_sysfs = false;
struct gpio_desc **descs;
struct gpiochip_fwd *fwd;
unsigned long features;
@@ -603,10 +1304,28 @@ static int gpio_aggregator_probe(struct platform_device *pdev)
if (!descs)
return -ENOMEM;
+ meta = dev_get_platdata(&pdev->dev);
+ if (meta && meta->init_via_sysfs)
+ init_via_sysfs = true;
+
for (i = 0; i < n; i++) {
descs[i] = devm_gpiod_get_index(dev, NULL, i, GPIOD_ASIS);
- if (IS_ERR(descs[i]))
+ if (IS_ERR(descs[i])) {
+ /*
+ * Deferred probing is not suitable when the aggregator
+ * is created via configfs. They should just retry later
+ * whenever they like. For device creation via sysfs,
+ * error is propagated without overriding for backward
+ * compatibility. .prevent_deferred_probe is kept unset
+ * for other cases.
+ */
+ if (!init_via_sysfs && !dev_of_node(dev) &&
+ descs[i] == ERR_PTR(-EPROBE_DEFER)) {
+ pr_warn("Deferred probe canceled for creation via configfs.\n");
+ return -ENODEV;
+ }
return PTR_ERR(descs[i]);
+ }
}
features = (uintptr_t)device_get_match_data(dev);
@@ -640,9 +1359,63 @@ static struct platform_driver gpio_aggregator_driver = {
},
};
+static int __exit gpio_aggregator_idr_remove(int id, void *p, void *data)
+{
+ /*
+ * There should be no aggregator created via configfs, as their
+ * presence would prevent module unloading.
+ */
+ gpio_aggregator_destroy(p);
+ return 0;
+}
+
+static void __exit gpio_aggregator_remove_all(void)
+{
+ /*
+ * Configfs callbacks acquire gpio_aggregator_lock when accessing
+ * gpio_aggregator_idr, so to prevent lock inversion deadlock, we
+ * cannot protect idr_for_each invocation here with
+ * gpio_aggregator_lock, as gpio_aggregator_idr_remove() accesses
+ * configfs groups. Fortunately, the new_device/delete_device path
+ * and the module unload path are mutually exclusive, thanks to an
+ * explicit try_module_get inside of those driver attr handlers.
+ * Also, when we reach here, no configfs entries present or being
+ * created. Therefore, no need to protect with gpio_aggregator_lock
+ * below.
+ */
+ idr_for_each(&gpio_aggregator_idr, gpio_aggregator_idr_remove, NULL);
+ idr_destroy(&gpio_aggregator_idr);
+}
+
static int __init gpio_aggregator_init(void)
{
- return platform_driver_register(&gpio_aggregator_driver);
+ int ret = 0;
+
+ config_group_init(&gpio_aggregator_subsys.su_group);
+ mutex_init(&gpio_aggregator_subsys.su_mutex);
+ ret = configfs_register_subsystem(&gpio_aggregator_subsys);
+ if (ret) {
+ pr_err("Failed to register the '%s' configfs subsystem: %d\n",
+ gpio_aggregator_subsys.su_group.cg_item.ci_namebuf, ret);
+ mutex_destroy(&gpio_aggregator_subsys.su_mutex);
+ return ret;
+ }
+
+ /*
+ * CAVEAT: This must occur after configfs registration. Otherwise,
+ * a race condition could arise: driver attribute groups might be
+ * exposed and accessed by users before configfs registration
+ * completes. new_device_store() does not expect a partially
+ * initialized configfs state.
+ */
+ ret = platform_driver_register(&gpio_aggregator_driver);
+ if (ret) {
+ pr_err("Failed to register the platform driver: %d\n", ret);
+ mutex_destroy(&gpio_aggregator_subsys.su_mutex);
+ configfs_unregister_subsystem(&gpio_aggregator_subsys);
+ }
+
+ return ret;
}
module_init(gpio_aggregator_init);
@@ -650,6 +1423,7 @@ static void __exit gpio_aggregator_exit(void)
{
gpio_aggregator_remove_all();
platform_driver_unregister(&gpio_aggregator_driver);
+ configfs_unregister_subsystem(&gpio_aggregator_subsys);
}
module_exit(gpio_aggregator_exit);
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index 17c287dc7471..8f22cb36004d 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -516,6 +516,7 @@ static struct irq_chip bcm_gpio_irq_chip = {
.irq_set_type = bcm_kona_gpio_irq_set_type,
.irq_request_resources = bcm_kona_gpio_irq_reqres,
.irq_release_resources = bcm_kona_gpio_irq_relres,
+ .flags = IRQCHIP_IMMUTABLE,
};
static struct of_device_id const bcm_kona_gpio_of_match[] = {
diff --git a/drivers/gpio/gpio-blzp1600.c b/drivers/gpio/gpio-blzp1600.c
new file mode 100644
index 000000000000..055cb296ae54
--- /dev/null
+++ b/drivers/gpio/gpio-blzp1600.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 VeriSilicon Limited.
+ * Copyright (C) 2025 Blaize, Inc.
+ */
+
+#include <linux/errno.h>
+#include <linux/gpio/driver.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#define GPIO_DIR_REG 0x00
+#define GPIO_CTRL_REG 0x04
+#define GPIO_SET_REG 0x08
+#define GPIO_CLR_REG 0x0C
+#define GPIO_ODATA_REG 0x10
+#define GPIO_IDATA_REG 0x14
+#define GPIO_IEN_REG 0x18
+#define GPIO_IS_REG 0x1C
+#define GPIO_IBE_REG 0x20
+#define GPIO_IEV_REG 0x24
+#define GPIO_RIS_REG 0x28
+#define GPIO_IM_REG 0x2C
+#define GPIO_MIS_REG 0x30
+#define GPIO_IC_REG 0x34
+#define GPIO_DB_REG 0x38
+#define GPIO_DFG_REG 0x3C
+
+#define DRIVER_NAME "blzp1600-gpio"
+
+struct blzp1600_gpio {
+ void __iomem *base;
+ struct gpio_chip gc;
+ int irq;
+};
+
+static inline struct blzp1600_gpio *get_blzp1600_gpio_from_irq_data(struct irq_data *d)
+{
+ return gpiochip_get_data(irq_data_get_irq_chip_data(d));
+}
+
+static inline struct blzp1600_gpio *get_blzp1600_gpio_from_irq_desc(struct irq_desc *d)
+{
+ return gpiochip_get_data(irq_desc_get_handler_data(d));
+}
+
+static inline u32 blzp1600_gpio_read(struct blzp1600_gpio *chip, unsigned int offset)
+{
+ return readl_relaxed(chip->base + offset);
+}
+
+static inline void blzp1600_gpio_write(struct blzp1600_gpio *chip, unsigned int offset, u32 val)
+{
+ writel_relaxed(val, chip->base + offset);
+}
+
+static inline void blzp1600_gpio_rmw(void __iomem *reg, u32 mask, bool set)
+{
+ u32 val = readl_relaxed(reg);
+
+ if (set)
+ val |= mask;
+ else
+ val &= ~mask;
+
+ writel_relaxed(val, reg);
+}
+
+static void blzp1600_gpio_irq_mask(struct irq_data *d)
+{
+ struct blzp1600_gpio *chip = get_blzp1600_gpio_from_irq_data(d);
+
+ guard(raw_spinlock_irqsave)(&chip->gc.bgpio_lock);
+ blzp1600_gpio_rmw(chip->base + GPIO_IM_REG, BIT(d->hwirq), 1);
+}
+
+static void blzp1600_gpio_irq_unmask(struct irq_data *d)
+{
+ struct blzp1600_gpio *chip = get_blzp1600_gpio_from_irq_data(d);
+
+ guard(raw_spinlock_irqsave)(&chip->gc.bgpio_lock);
+ blzp1600_gpio_rmw(chip->base + GPIO_IM_REG, BIT(d->hwirq), 0);
+}
+
+static void blzp1600_gpio_irq_ack(struct irq_data *d)
+{
+ struct blzp1600_gpio *chip = get_blzp1600_gpio_from_irq_data(d);
+
+ blzp1600_gpio_write(chip, GPIO_IC_REG, BIT(d->hwirq));
+}
+
+static void blzp1600_gpio_irq_enable(struct irq_data *d)
+{
+ struct blzp1600_gpio *chip = get_blzp1600_gpio_from_irq_data(d);
+
+ gpiochip_enable_irq(&chip->gc, irqd_to_hwirq(d));
+
+ guard(raw_spinlock_irqsave)(&chip->gc.bgpio_lock);
+ blzp1600_gpio_rmw(chip->base + GPIO_DIR_REG, BIT(d->hwirq), 0);
+ blzp1600_gpio_rmw(chip->base + GPIO_IEN_REG, BIT(d->hwirq), 1);
+}
+
+static void blzp1600_gpio_irq_disable(struct irq_data *d)
+{
+ struct blzp1600_gpio *chip = get_blzp1600_gpio_from_irq_data(d);
+
+ guard(raw_spinlock_irqsave)(&chip->gc.bgpio_lock);
+ blzp1600_gpio_rmw(chip->base + GPIO_IEN_REG, BIT(d->hwirq), 0);
+ gpiochip_disable_irq(&chip->gc, irqd_to_hwirq(d));
+}
+
+static int blzp1600_gpio_irq_set_type(struct irq_data *d, u32 type)
+{
+ struct blzp1600_gpio *chip = get_blzp1600_gpio_from_irq_data(d);
+ u32 edge_level, single_both, fall_rise;
+ int mask = BIT(d->hwirq);
+
+ guard(raw_spinlock_irqsave)(&chip->gc.bgpio_lock);
+ edge_level = blzp1600_gpio_read(chip, GPIO_IS_REG);
+ single_both = blzp1600_gpio_read(chip, GPIO_IBE_REG);
+ fall_rise = blzp1600_gpio_read(chip, GPIO_IEV_REG);
+
+ switch (type) {
+ case IRQ_TYPE_EDGE_BOTH:
+ edge_level &= ~mask;
+ single_both |= mask;
+ break;
+ case IRQ_TYPE_EDGE_RISING:
+ edge_level &= ~mask;
+ single_both &= ~mask;
+ fall_rise |= mask;
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ edge_level &= ~mask;
+ single_both &= ~mask;
+ fall_rise &= ~mask;
+ break;
+ case IRQ_TYPE_LEVEL_HIGH:
+ edge_level |= mask;
+ fall_rise |= mask;
+ break;
+ case IRQ_TYPE_LEVEL_LOW:
+ edge_level |= mask;
+ fall_rise &= ~mask;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ blzp1600_gpio_write(chip, GPIO_IS_REG, edge_level);
+ blzp1600_gpio_write(chip, GPIO_IBE_REG, single_both);
+ blzp1600_gpio_write(chip, GPIO_IEV_REG, fall_rise);
+
+ if (type & IRQ_TYPE_LEVEL_MASK)
+ irq_set_handler_locked(d, handle_level_irq);
+ else
+ irq_set_handler_locked(d, handle_edge_irq);
+
+ return 0;
+}
+
+static const struct irq_chip blzp1600_gpio_irqchip = {
+ .name = DRIVER_NAME,
+ .irq_ack = blzp1600_gpio_irq_ack,
+ .irq_mask = blzp1600_gpio_irq_mask,
+ .irq_unmask = blzp1600_gpio_irq_unmask,
+ .irq_set_type = blzp1600_gpio_irq_set_type,
+ .irq_enable = blzp1600_gpio_irq_enable,
+ .irq_disable = blzp1600_gpio_irq_disable,
+ .flags = IRQCHIP_IMMUTABLE | IRQCHIP_MASK_ON_SUSPEND,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
+static void blzp1600_gpio_irqhandler(struct irq_desc *desc)
+{
+ struct blzp1600_gpio *gpio = get_blzp1600_gpio_from_irq_desc(desc);
+ struct irq_chip *irqchip = irq_desc_get_chip(desc);
+ unsigned long irq_status;
+ int hwirq = 0;
+
+ chained_irq_enter(irqchip, desc);
+ irq_status = blzp1600_gpio_read(gpio, GPIO_RIS_REG);
+ for_each_set_bit(hwirq, &irq_status, gpio->gc.ngpio)
+ generic_handle_domain_irq(gpio->gc.irq.domain, hwirq);
+
+ chained_irq_exit(irqchip, desc);
+}
+
+static int blzp1600_gpio_set_debounce(struct gpio_chip *gc, unsigned int offset,
+ unsigned int debounce)
+{
+ struct blzp1600_gpio *chip = gpiochip_get_data(gc);
+
+ guard(raw_spinlock_irqsave)(&chip->gc.bgpio_lock);
+ blzp1600_gpio_rmw(chip->base + GPIO_DB_REG, BIT(offset), debounce);
+
+ return 0;
+}
+
+static int blzp1600_gpio_set_config(struct gpio_chip *gc, unsigned int offset, unsigned long config)
+{
+ u32 debounce;
+
+ if (pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE)
+ return -ENOTSUPP;
+
+ debounce = pinconf_to_config_argument(config);
+ return blzp1600_gpio_set_debounce(gc, offset, debounce);
+}
+
+static int blzp1600_gpio_probe(struct platform_device *pdev)
+{
+ struct blzp1600_gpio *chip;
+ struct gpio_chip *gc;
+ int ret;
+
+ chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ chip->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(chip->base))
+ return PTR_ERR(chip->base);
+
+ ret = bgpio_init(&chip->gc, &pdev->dev, 4, chip->base + GPIO_IDATA_REG,
+ chip->base + GPIO_SET_REG, chip->base + GPIO_CLR_REG,
+ chip->base + GPIO_DIR_REG, NULL, 0);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to register generic gpio\n");
+
+ /* configure the gpio chip */
+ gc = &chip->gc;
+ gc->set_config = blzp1600_gpio_set_config;
+
+ if (device_property_present(&pdev->dev, "interrupt-controller")) {
+ struct gpio_irq_chip *girq;
+
+ chip->irq = platform_get_irq(pdev, 0);
+ if (chip->irq < 0)
+ return chip->irq;
+
+ girq = &gc->irq;
+ gpio_irq_chip_set_chip(girq, &blzp1600_gpio_irqchip);
+ girq->parent_handler = blzp1600_gpio_irqhandler;
+ girq->num_parents = 1;
+ girq->parents = devm_kcalloc(&pdev->dev, 1, sizeof(*girq->parents), GFP_KERNEL);
+ if (!girq->parents)
+ return -ENOMEM;
+
+ girq->parents[0] = chip->irq;
+ girq->default_type = IRQ_TYPE_NONE;
+ }
+
+ return devm_gpiochip_add_data(&pdev->dev, gc, chip);
+}
+
+static const struct of_device_id blzp1600_gpio_of_match[] = {
+ { .compatible = "blaize,blzp1600-gpio", },
+ { /* Sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, blzp1600_gpio_of_match);
+
+static struct platform_driver blzp1600_gpio_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ .of_match_table = blzp1600_gpio_of_match,
+ },
+ .probe = blzp1600_gpio_probe,
+};
+
+module_platform_driver(blzp1600_gpio_driver);
+
+MODULE_AUTHOR("Nikolaos Pasaloukos <nikolaos.pasaloukos@blaize.com>");
+MODULE_DESCRIPTION("Blaize BLZP1600 GPIO driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c
index ca3472977431..e7671bcd5c07 100644
--- a/drivers/gpio/gpio-brcmstb.c
+++ b/drivers/gpio/gpio-brcmstb.c
@@ -437,7 +437,7 @@ static int brcmstb_gpio_irq_setup(struct platform_device *pdev,
int err;
priv->irq_domain =
- irq_domain_add_linear(np, priv->num_gpios,
+ irq_domain_create_linear(of_fwnode_handle(np), priv->num_gpios,
&brcmstb_gpio_irq_domain_ops,
priv);
if (!priv->irq_domain) {
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 63fc7888c1d4..80a82492171e 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -68,15 +68,6 @@ static inline u32 __gpio_mask(unsigned gpio)
return 1 << (gpio % 32);
}
-static inline struct davinci_gpio_regs __iomem *irq2regs(struct irq_data *d)
-{
- struct davinci_gpio_regs __iomem *g;
-
- g = (__force struct davinci_gpio_regs __iomem *)irq_data_get_irq_chip_data(d);
-
- return g;
-}
-
static int davinci_gpio_irq_setup(struct platform_device *pdev);
/*--------------------------------------------------------------------------*/
@@ -255,19 +246,27 @@ static int davinci_gpio_probe(struct platform_device *pdev)
static void gpio_irq_mask(struct irq_data *d)
{
- struct davinci_gpio_regs __iomem *g = irq2regs(d);
+ struct davinci_gpio_controller *chips = irq_data_get_irq_chip_data(d);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
+ struct davinci_gpio_regs __iomem *g = chips->regs[hwirq / 32];
uintptr_t mask = (uintptr_t)irq_data_get_irq_handler_data(d);
writel_relaxed(mask, &g->clr_falling);
writel_relaxed(mask, &g->clr_rising);
+
+ gpiochip_disable_irq(&chips->chip, hwirq);
}
static void gpio_irq_unmask(struct irq_data *d)
{
- struct davinci_gpio_regs __iomem *g = irq2regs(d);
+ struct davinci_gpio_controller *chips = irq_data_get_irq_chip_data(d);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
+ struct davinci_gpio_regs __iomem *g = chips->regs[hwirq / 32];
uintptr_t mask = (uintptr_t)irq_data_get_irq_handler_data(d);
unsigned status = irqd_get_trigger_type(d);
+ gpiochip_enable_irq(&chips->chip, hwirq);
+
status &= IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING;
if (!status)
status = IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING;
@@ -286,12 +285,13 @@ static int gpio_irq_type(struct irq_data *d, unsigned trigger)
return 0;
}
-static struct irq_chip gpio_irqchip = {
+static const struct irq_chip gpio_irqchip = {
.name = "GPIO",
.irq_unmask = gpio_irq_unmask,
.irq_mask = gpio_irq_mask,
.irq_set_type = gpio_irq_type,
- .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_IMMUTABLE | IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static void gpio_irq_handler(struct irq_desc *desc)
@@ -399,12 +399,11 @@ davinci_gpio_irq_map(struct irq_domain *d, unsigned int irq,
{
struct davinci_gpio_controller *chips =
(struct davinci_gpio_controller *)d->host_data;
- struct davinci_gpio_regs __iomem *g = chips->regs[hw / 32];
irq_set_chip_and_handler_name(irq, &gpio_irqchip, handle_simple_irq,
"davinci_gpio");
irq_set_irq_type(irq, IRQ_TYPE_NONE);
- irq_set_chip_data(irq, (__force void *)g);
+ irq_set_chip_data(irq, (__force void *)chips);
irq_set_handler_data(irq, (void *)(uintptr_t)__gpio_mask(hw));
return 0;
@@ -479,9 +478,8 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev)
return irq;
}
- irq_domain = irq_domain_add_legacy(dev->of_node, ngpio, irq, 0,
- &davinci_gpio_irq_ops,
- chips);
+ irq_domain = irq_domain_create_legacy(of_fwnode_handle(dev->of_node), ngpio, irq, 0,
+ &davinci_gpio_irq_ops, chips);
if (!irq_domain) {
dev_err(dev, "Couldn't register an IRQ domain\n");
return -ENODEV;
diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c
index 596da59d4b13..4bd3c47eaf93 100644
--- a/drivers/gpio/gpio-dln2.c
+++ b/drivers/gpio/gpio-dln2.c
@@ -220,11 +220,12 @@ static int dln2_gpio_get(struct gpio_chip *chip, unsigned int offset)
return dln2_gpio_pin_get_out_val(dln2, offset);
}
-static void dln2_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int dln2_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct dln2_gpio *dln2 = gpiochip_get_data(chip);
- dln2_gpio_pin_set_out_val(dln2, offset, value);
+ return dln2_gpio_pin_set_out_val(dln2, offset, value);
}
static int dln2_gpio_set_direction(struct gpio_chip *chip, unsigned offset,
@@ -468,7 +469,7 @@ static int dln2_gpio_probe(struct platform_device *pdev)
dln2->gpio.base = -1;
dln2->gpio.ngpio = pins;
dln2->gpio.can_sleep = true;
- dln2->gpio.set = dln2_gpio_set;
+ dln2->gpio.set_rv = dln2_gpio_set;
dln2->gpio.get = dln2_gpio_get;
dln2->gpio.request = dln2_gpio_request;
dln2->gpio.free = dln2_gpio_free;
diff --git a/drivers/gpio/gpio-ds4520.c b/drivers/gpio/gpio-ds4520.c
index 1903deaef3e9..f52ecae382a4 100644
--- a/drivers/gpio/gpio-ds4520.c
+++ b/drivers/gpio/gpio-ds4520.c
@@ -25,7 +25,6 @@ static int ds4520_gpio_probe(struct i2c_client *client)
struct gpio_regmap_config config = { };
struct device *dev = &client->dev;
struct regmap *regmap;
- u32 ngpio;
u32 base;
int ret;
@@ -33,10 +32,6 @@ static int ds4520_gpio_probe(struct i2c_client *client)
if (ret)
return dev_err_probe(dev, ret, "Missing 'reg' property.\n");
- ret = device_property_read_u32(dev, "ngpios", &ngpio);
- if (ret)
- return dev_err_probe(dev, ret, "Missing 'ngpios' property.\n");
-
regmap = devm_regmap_init_i2c(client, &ds4520_regmap_config);
if (IS_ERR(regmap))
return dev_err_probe(dev, PTR_ERR(regmap),
@@ -44,7 +39,6 @@ static int ds4520_gpio_probe(struct i2c_client *client)
config.regmap = regmap;
config.parent = dev;
- config.ngpio = ngpio;
config.reg_dat_base = base + DS4520_IO_STATUS0;
config.reg_set_base = base + DS4520_PULLUP0;
diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c
index d4bf8d187e16..f2973d0b7138 100644
--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -203,9 +203,10 @@ static int sprd_eic_direction_input(struct gpio_chip *chip, unsigned int offset)
return 0;
}
-static void sprd_eic_set(struct gpio_chip *chip, unsigned int offset, int value)
+static int sprd_eic_set(struct gpio_chip *chip, unsigned int offset, int value)
{
/* EICs are always input, nothing need to do here. */
+ return 0;
}
static int sprd_eic_set_debounce(struct gpio_chip *chip, unsigned int offset,
@@ -662,7 +663,7 @@ static int sprd_eic_probe(struct platform_device *pdev)
sprd_eic->chip.request = sprd_eic_request;
sprd_eic->chip.free = sprd_eic_free;
sprd_eic->chip.set_config = sprd_eic_set_config;
- sprd_eic->chip.set = sprd_eic_set;
+ sprd_eic->chip.set_rv = sprd_eic_set;
fallthrough;
case SPRD_EIC_ASYNC:
case SPRD_EIC_SYNC:
diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c
index 6c862c572322..a5e6e446f39c 100644
--- a/drivers/gpio/gpio-em.c
+++ b/drivers/gpio/gpio-em.c
@@ -204,13 +204,15 @@ static void __em_gio_set(struct gpio_chip *chip, unsigned int reg,
(BIT(shift + 16)) | (value << shift));
}
-static void em_gio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int em_gio_set(struct gpio_chip *chip, unsigned int offset, int value)
{
/* output is split into two registers */
if (offset < 16)
__em_gio_set(chip, GIO_OL, offset, value);
else
__em_gio_set(chip, GIO_OH, offset - 16, value);
+
+ return 0;
}
static int em_gio_direction_output(struct gpio_chip *chip, unsigned offset,
@@ -304,7 +306,7 @@ static int em_gio_probe(struct platform_device *pdev)
gpio_chip->direction_input = em_gio_direction_input;
gpio_chip->get = em_gio_get;
gpio_chip->direction_output = em_gio_direction_output;
- gpio_chip->set = em_gio_set;
+ gpio_chip->set_rv = em_gio_set;
gpio_chip->to_irq = em_gio_to_irq;
gpio_chip->request = pinctrl_gpio_request;
gpio_chip->free = em_gio_free;
@@ -323,8 +325,9 @@ static int em_gio_probe(struct platform_device *pdev)
irq_chip->irq_release_resources = em_gio_irq_relres;
irq_chip->flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND;
- p->irq_domain = irq_domain_add_simple(dev->of_node, ngpios, 0,
- &em_gio_irq_domain_ops, p);
+ p->irq_domain = irq_domain_create_simple(of_fwnode_handle(dev->of_node),
+ ngpios, 0,
+ &em_gio_irq_domain_ops, p);
if (!p->irq_domain) {
dev_err(dev, "cannot initialize irq domain\n");
return -ENXIO;
diff --git a/drivers/gpio/gpio-exar.c b/drivers/gpio/gpio-exar.c
index d5909a4f0433..beb98286d13e 100644
--- a/drivers/gpio/gpio-exar.c
+++ b/drivers/gpio/gpio-exar.c
@@ -93,8 +93,8 @@ static int exar_get_value(struct gpio_chip *chip, unsigned int offset)
return !!(regmap_test_bits(exar_gpio->regmap, addr, BIT(bit)));
}
-static void exar_set_value(struct gpio_chip *chip, unsigned int offset,
- int value)
+static int exar_set_value(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct exar_gpio_chip *exar_gpio = gpiochip_get_data(chip);
unsigned int addr = exar_offset_to_lvl_addr(exar_gpio, offset);
@@ -105,7 +105,7 @@ static void exar_set_value(struct gpio_chip *chip, unsigned int offset,
* regmap_write_bits() forces value to be written when an external
* pull up/down might otherwise indicate value was already set.
*/
- regmap_write_bits(exar_gpio->regmap, addr, BIT(bit), bit_value);
+ return regmap_write_bits(exar_gpio->regmap, addr, BIT(bit), bit_value);
}
static int exar_direction_output(struct gpio_chip *chip, unsigned int offset,
@@ -114,11 +114,13 @@ static int exar_direction_output(struct gpio_chip *chip, unsigned int offset,
struct exar_gpio_chip *exar_gpio = gpiochip_get_data(chip);
unsigned int addr = exar_offset_to_sel_addr(exar_gpio, offset);
unsigned int bit = exar_offset_to_bit(exar_gpio, offset);
+ int ret;
- exar_set_value(chip, offset, value);
- regmap_clear_bits(exar_gpio->regmap, addr, BIT(bit));
+ ret = exar_set_value(chip, offset, value);
+ if (ret)
+ return ret;
- return 0;
+ return regmap_clear_bits(exar_gpio->regmap, addr, BIT(bit));
}
static int exar_direction_input(struct gpio_chip *chip, unsigned int offset)
@@ -209,7 +211,7 @@ static int gpio_exar_probe(struct platform_device *pdev)
exar_gpio->gpio_chip.direction_input = exar_direction_input;
exar_gpio->gpio_chip.get_direction = exar_get_direction;
exar_gpio->gpio_chip.get = exar_get_value;
- exar_gpio->gpio_chip.set = exar_set_value;
+ exar_gpio->gpio_chip.set_rv = exar_set_value;
exar_gpio->gpio_chip.base = -1;
exar_gpio->gpio_chip.ngpio = ngpios;
exar_gpio->index = index;
diff --git a/drivers/gpio/gpio-f7188x.c b/drivers/gpio/gpio-f7188x.c
index 3875fd940ccb..dfcd3634f279 100644
--- a/drivers/gpio/gpio-f7188x.c
+++ b/drivers/gpio/gpio-f7188x.c
@@ -159,7 +159,8 @@ static int f7188x_gpio_direction_in(struct gpio_chip *chip, unsigned offset);
static int f7188x_gpio_get(struct gpio_chip *chip, unsigned offset);
static int f7188x_gpio_direction_out(struct gpio_chip *chip,
unsigned offset, int value);
-static void f7188x_gpio_set(struct gpio_chip *chip, unsigned offset, int value);
+static int f7188x_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value);
static int f7188x_gpio_set_config(struct gpio_chip *chip, unsigned offset,
unsigned long config);
@@ -172,7 +173,7 @@ static int f7188x_gpio_set_config(struct gpio_chip *chip, unsigned offset,
.direction_input = f7188x_gpio_direction_in, \
.get = f7188x_gpio_get, \
.direction_output = f7188x_gpio_direction_out, \
- .set = f7188x_gpio_set, \
+ .set_rv = f7188x_gpio_set, \
.set_config = f7188x_gpio_set_config, \
.base = -1, \
.ngpio = _ngpio, \
@@ -391,7 +392,8 @@ static int f7188x_gpio_direction_out(struct gpio_chip *chip,
return 0;
}
-static void f7188x_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int f7188x_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
int err;
struct f7188x_gpio_bank *bank = gpiochip_get_data(chip);
@@ -400,7 +402,8 @@ static void f7188x_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
err = superio_enter(sio->addr);
if (err)
- return;
+ return err;
+
superio_select(sio->addr, sio->device);
data_out = superio_inb(sio->addr, f7188x_gpio_data_out(bank->regbase));
@@ -411,6 +414,8 @@ static void f7188x_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
superio_outb(sio->addr, f7188x_gpio_data_out(bank->regbase), data_out);
superio_exit(sio->addr);
+
+ return 0;
}
static int f7188x_gpio_set_config(struct gpio_chip *chip, unsigned offset,
diff --git a/drivers/gpio/gpio-graniterapids.c b/drivers/gpio/gpio-graniterapids.c
index ad6a045fd3d2..f25283e5239d 100644
--- a/drivers/gpio/gpio-graniterapids.c
+++ b/drivers/gpio/gpio-graniterapids.c
@@ -116,7 +116,7 @@ static int gnr_gpio_get(struct gpio_chip *gc, unsigned int gpio)
return !!(dw & GNR_CFG_DW_RXSTATE);
}
-static void gnr_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value)
+static int gnr_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
u32 clear = 0;
u32 set = 0;
@@ -126,7 +126,7 @@ static void gnr_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value)
else
clear = GNR_CFG_DW_TXSTATE;
- gnr_gpio_configure_line(gc, gpio, clear, set);
+ return gnr_gpio_configure_line(gc, gpio, clear, set);
}
static int gnr_gpio_get_direction(struct gpio_chip *gc, unsigned int gpio)
@@ -159,7 +159,7 @@ static const struct gpio_chip gnr_gpio_chip = {
.owner = THIS_MODULE,
.request = gnr_gpio_request,
.get = gnr_gpio_get,
- .set = gnr_gpio_set,
+ .set_rv = gnr_gpio_set,
.get_direction = gnr_gpio_get_direction,
.direction_input = gnr_gpio_direction_input,
.direction_output = gnr_gpio_direction_output,
diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c
index 30a0522ae735..d38a2d9854ca 100644
--- a/drivers/gpio/gpio-grgpio.c
+++ b/drivers/gpio/gpio-grgpio.c
@@ -170,6 +170,8 @@ static void grgpio_irq_mask(struct irq_data *d)
grgpio_set_imask(priv, offset, 0);
raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
+
+ gpiochip_disable_irq(&priv->gc, d->hwirq);
}
static void grgpio_irq_unmask(struct irq_data *d)
@@ -178,6 +180,7 @@ static void grgpio_irq_unmask(struct irq_data *d)
int offset = d->hwirq;
unsigned long flags;
+ gpiochip_enable_irq(&priv->gc, d->hwirq);
raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
grgpio_set_imask(priv, offset, 1);
@@ -185,11 +188,13 @@ static void grgpio_irq_unmask(struct irq_data *d)
raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
}
-static struct irq_chip grgpio_irq_chip = {
+static const struct irq_chip grgpio_irq_chip = {
.name = "grgpio",
.irq_mask = grgpio_irq_mask,
.irq_unmask = grgpio_irq_unmask,
.irq_set_type = grgpio_irq_set_type,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static irqreturn_t grgpio_irq_handler(int irq, void *dev)
@@ -397,7 +402,7 @@ static int grgpio_probe(struct platform_device *ofdev)
return -EINVAL;
}
- priv->domain = irq_domain_add_linear(np, gc->ngpio,
+ priv->domain = irq_domain_create_linear(of_fwnode_handle(np), gc->ngpio,
&grgpio_irq_domain_ops,
priv);
if (!priv->domain) {
diff --git a/drivers/gpio/gpio-gw-pld.c b/drivers/gpio/gpio-gw-pld.c
index 7e29a2d8de1a..a40ba99a3aea 100644
--- a/drivers/gpio/gpio-gw-pld.c
+++ b/drivers/gpio/gpio-gw-pld.c
@@ -62,9 +62,9 @@ static int gw_pld_output8(struct gpio_chip *gc, unsigned offset, int value)
return i2c_smbus_write_byte(gw->client, gw->out);
}
-static void gw_pld_set8(struct gpio_chip *gc, unsigned offset, int value)
+static int gw_pld_set8(struct gpio_chip *gc, unsigned int offset, int value)
{
- gw_pld_output8(gc, offset, value);
+ return gw_pld_output8(gc, offset, value);
}
static int gw_pld_probe(struct i2c_client *client)
@@ -86,7 +86,7 @@ static int gw_pld_probe(struct i2c_client *client)
gw->chip.direction_input = gw_pld_input8;
gw->chip.get = gw_pld_get8;
gw->chip.direction_output = gw_pld_output8;
- gw->chip.set = gw_pld_set8;
+ gw->chip.set_rv = gw_pld_set8;
gw->client = client;
/*
diff --git a/drivers/gpio/gpio-htc-egpio.c b/drivers/gpio/gpio-htc-egpio.c
index a40bd56673fe..b1844a676c7c 100644
--- a/drivers/gpio/gpio-htc-egpio.c
+++ b/drivers/gpio/gpio-htc-egpio.c
@@ -170,7 +170,7 @@ static int egpio_direction_input(struct gpio_chip *chip, unsigned offset)
* Output pins
*/
-static void egpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int egpio_set(struct gpio_chip *chip, unsigned int offset, int value)
{
unsigned long flag;
struct egpio_chip *egpio;
@@ -198,6 +198,8 @@ static void egpio_set(struct gpio_chip *chip, unsigned offset, int value)
egpio->cached_values &= ~(1 << offset);
egpio_writew((egpio->cached_values >> shift) & ei->reg_mask, ei, reg);
spin_unlock_irqrestore(&ei->lock, flag);
+
+ return 0;
}
static int egpio_direction_output(struct gpio_chip *chip,
@@ -206,12 +208,10 @@ static int egpio_direction_output(struct gpio_chip *chip,
struct egpio_chip *egpio;
egpio = gpiochip_get_data(chip);
- if (test_bit(offset, &egpio->is_out)) {
- egpio_set(chip, offset, value);
- return 0;
- } else {
- return -EINVAL;
- }
+ if (test_bit(offset, &egpio->is_out))
+ return egpio_set(chip, offset, value);
+
+ return -EINVAL;
}
static int egpio_get_direction(struct gpio_chip *chip, unsigned offset)
@@ -324,7 +324,7 @@ static int __init egpio_probe(struct platform_device *pdev)
chip->parent = &pdev->dev;
chip->owner = THIS_MODULE;
chip->get = egpio_get;
- chip->set = egpio_set;
+ chip->set_rv = egpio_set;
chip->direction_input = egpio_direction_input;
chip->direction_output = egpio_direction_output;
chip->get_direction = egpio_get_direction;
diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c
index 0be9285efebc..67089b2423d8 100644
--- a/drivers/gpio/gpio-ich.c
+++ b/drivers/gpio/gpio-ich.c
@@ -175,12 +175,16 @@ static int ichx_gpio_direction_input(struct gpio_chip *gpio, unsigned int nr)
static int ichx_gpio_direction_output(struct gpio_chip *gpio, unsigned int nr,
int val)
{
+ int ret;
+
/* Disable blink hardware which is available for GPIOs from 0 to 31. */
if (nr < 32 && ichx_priv.desc->have_blink)
ichx_write_bit(GPO_BLINK, nr, 0, 0);
/* Set GPIO output value. */
- ichx_write_bit(GPIO_LVL, nr, val, 0);
+ ret = ichx_write_bit(GPIO_LVL, nr, val, 0);
+ if (ret)
+ return ret;
/*
* Try setting pin as an output and verify it worked since many pins
@@ -252,9 +256,9 @@ static int ich6_gpio_request(struct gpio_chip *chip, unsigned int nr)
return ichx_gpio_request(chip, nr);
}
-static void ichx_gpio_set(struct gpio_chip *chip, unsigned int nr, int val)
+static int ichx_gpio_set(struct gpio_chip *chip, unsigned int nr, int val)
{
- ichx_write_bit(GPIO_LVL, nr, val, 0);
+ return ichx_write_bit(GPIO_LVL, nr, val, 0);
}
static void ichx_gpiolib_setup(struct gpio_chip *chip)
@@ -269,7 +273,7 @@ static void ichx_gpiolib_setup(struct gpio_chip *chip)
chip->get = ichx_priv.desc->get ?
ichx_priv.desc->get : ichx_gpio_get;
- chip->set = ichx_gpio_set;
+ chip->set_rv = ichx_gpio_set;
chip->get_direction = ichx_gpio_get_direction;
chip->direction_input = ichx_gpio_direction_input;
chip->direction_output = ichx_gpio_direction_output;
diff --git a/drivers/gpio/gpio-idt3243x.c b/drivers/gpio/gpio-idt3243x.c
index 00f547d26254..535f25514455 100644
--- a/drivers/gpio/gpio-idt3243x.c
+++ b/drivers/gpio/gpio-idt3243x.c
@@ -37,7 +37,7 @@ static void idt_gpio_dispatch(struct irq_desc *desc)
pending = readl(ctrl->pic + IDT_PIC_IRQ_PEND);
pending &= ~ctrl->mask_cache;
for_each_set_bit(bit, &pending, gc->ngpio) {
- virq = irq_linear_revmap(gc->irq.domain, bit);
+ virq = irq_find_mapping(gc->irq.domain, bit);
if (virq)
generic_handle_irq(virq);
}
diff --git a/drivers/gpio/gpio-imx-scu.c b/drivers/gpio/gpio-imx-scu.c
index 13baf465aedf..1693dbf1b777 100644
--- a/drivers/gpio/gpio-imx-scu.c
+++ b/drivers/gpio/gpio-imx-scu.c
@@ -6,8 +6,10 @@
* to control the PIN resources on SCU domain.
*/
+#include <linux/cleanup.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/gpio/driver.h>
#include <linux/platform_device.h>
#include <linux/firmware/imx/svc/rm.h>
@@ -37,16 +39,11 @@ static int imx_scu_gpio_get(struct gpio_chip *chip, unsigned int offset)
int level;
int err;
- if (offset >= chip->ngpio)
- return -EINVAL;
-
- mutex_lock(&priv->lock);
-
- /* to read PIN state via scu api */
- err = imx_sc_misc_get_control(priv->handle,
- scu_rsrc_arr[offset], 0, &level);
- mutex_unlock(&priv->lock);
-
+ scoped_guard(mutex, &priv->lock) {
+ /* to read PIN state via scu api */
+ err = imx_sc_misc_get_control(priv->handle,
+ scu_rsrc_arr[offset], 0, &level);
+ }
if (err) {
dev_err(priv->dev, "SCU get failed: %d\n", err);
return err;
@@ -55,31 +52,26 @@ static int imx_scu_gpio_get(struct gpio_chip *chip, unsigned int offset)
return level;
}
-static void imx_scu_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
+static int imx_scu_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct scu_gpio_priv *priv = gpiochip_get_data(chip);
int err;
- if (offset >= chip->ngpio)
- return;
-
- mutex_lock(&priv->lock);
-
- /* to set PIN output level via scu api */
- err = imx_sc_misc_set_control(priv->handle,
- scu_rsrc_arr[offset], 0, value);
- mutex_unlock(&priv->lock);
-
+ scoped_guard(mutex, &priv->lock) {
+ /* to set PIN output level via scu api */
+ err = imx_sc_misc_set_control(priv->handle,
+ scu_rsrc_arr[offset], 0, value);
+ }
if (err)
dev_err(priv->dev, "SCU set (%d) failed: %d\n",
scu_rsrc_arr[offset], err);
+
+ return err;
}
static int imx_scu_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
{
- if (offset >= chip->ngpio)
- return -EINVAL;
-
return GPIO_LINE_DIRECTION_OUT;
}
@@ -99,7 +91,10 @@ static int imx_scu_gpio_probe(struct platform_device *pdev)
return ret;
priv->dev = dev;
- mutex_init(&priv->lock);
+
+ ret = devm_mutex_init(&pdev->dev, &priv->lock);
+ if (ret)
+ return ret;
gc = &priv->chip;
gc->base = -1;
@@ -107,7 +102,7 @@ static int imx_scu_gpio_probe(struct platform_device *pdev)
gc->ngpio = ARRAY_SIZE(scu_rsrc_arr);
gc->label = dev_name(dev);
gc->get = imx_scu_gpio_get;
- gc->set = imx_scu_gpio_set;
+ gc->set_rv = imx_scu_gpio_set;
gc->get_direction = imx_scu_gpio_get_direction;
platform_set_drvdata(pdev, priv);
diff --git a/drivers/gpio/gpio-it87.c b/drivers/gpio/gpio-it87.c
index f332341fd4c8..d8184b527bac 100644
--- a/drivers/gpio/gpio-it87.c
+++ b/drivers/gpio/gpio-it87.c
@@ -213,8 +213,7 @@ exit:
return rc;
}
-static void it87_gpio_set(struct gpio_chip *chip,
- unsigned gpio_num, int val)
+static int it87_gpio_set(struct gpio_chip *chip, unsigned int gpio_num, int val)
{
u8 mask, curr_vals;
u16 reg;
@@ -228,6 +227,8 @@ static void it87_gpio_set(struct gpio_chip *chip,
outb(curr_vals | mask, reg);
else
outb(curr_vals & ~mask, reg);
+
+ return 0;
}
static int it87_gpio_direction_out(struct gpio_chip *chip,
@@ -249,7 +250,9 @@ static int it87_gpio_direction_out(struct gpio_chip *chip,
/* set the output enable bit */
superio_set_mask(mask, group + it87_gpio->output_base);
- it87_gpio_set(chip, gpio_num, val);
+ rc = it87_gpio_set(chip, gpio_num, val);
+ if (rc)
+ goto exit;
superio_exit();
@@ -264,7 +267,7 @@ static const struct gpio_chip it87_template_chip = {
.request = it87_gpio_request,
.get = it87_gpio_get,
.direction_input = it87_gpio_direction_in,
- .set = it87_gpio_set,
+ .set_rv = it87_gpio_set,
.direction_output = it87_gpio_direction_out,
.base = -1
};
diff --git a/drivers/gpio/gpio-janz-ttl.c b/drivers/gpio/gpio-janz-ttl.c
index cdf50e4ea165..9f548eda3888 100644
--- a/drivers/gpio/gpio-janz-ttl.c
+++ b/drivers/gpio/gpio-janz-ttl.c
@@ -76,7 +76,7 @@ static int ttl_get_value(struct gpio_chip *gpio, unsigned offset)
return !!ret;
}
-static void ttl_set_value(struct gpio_chip *gpio, unsigned offset, int value)
+static int ttl_set_value(struct gpio_chip *gpio, unsigned int offset, int value)
{
struct ttl_module *mod = dev_get_drvdata(gpio->parent);
void __iomem *port;
@@ -103,6 +103,8 @@ static void ttl_set_value(struct gpio_chip *gpio, unsigned offset, int value)
iowrite16be(*shadow, port);
spin_unlock(&mod->lock);
+
+ return 0;
}
static void ttl_write_reg(struct ttl_module *mod, u8 reg, u16 val)
@@ -169,7 +171,7 @@ static int ttl_probe(struct platform_device *pdev)
gpio->parent = &pdev->dev;
gpio->label = pdev->name;
gpio->get = ttl_get_value;
- gpio->set = ttl_set_value;
+ gpio->set_rv = ttl_set_value;
gpio->owner = THIS_MODULE;
/* request dynamic allocation */
diff --git a/drivers/gpio/gpio-kempld.c b/drivers/gpio/gpio-kempld.c
index 4ea15f08e0f4..e38e604baa22 100644
--- a/drivers/gpio/gpio-kempld.c
+++ b/drivers/gpio/gpio-kempld.c
@@ -63,7 +63,8 @@ static int kempld_gpio_get(struct gpio_chip *chip, unsigned offset)
return !!kempld_gpio_get_bit(pld, KEMPLD_GPIO_LVL_NUM(offset), offset);
}
-static void kempld_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int kempld_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct kempld_gpio_data *gpio = gpiochip_get_data(chip);
struct kempld_device_data *pld = gpio->pld;
@@ -71,6 +72,8 @@ static void kempld_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
kempld_get_mutex(pld);
kempld_gpio_bitop(pld, KEMPLD_GPIO_LVL_NUM(offset), offset, value);
kempld_release_mutex(pld);
+
+ return 0;
}
static int kempld_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
@@ -166,7 +169,7 @@ static int kempld_gpio_probe(struct platform_device *pdev)
chip->direction_output = kempld_gpio_direction_output;
chip->get_direction = kempld_gpio_get_direction;
chip->get = kempld_gpio_get;
- chip->set = kempld_gpio_set;
+ chip->set_rv = kempld_gpio_set;
chip->ngpio = kempld_gpio_pincount(pld);
if (chip->ngpio == 0) {
dev_err(dev, "No GPIO pins detected\n");
diff --git a/drivers/gpio/gpio-ljca.c b/drivers/gpio/gpio-ljca.c
index 817ecb12d550..61524a9ba765 100644
--- a/drivers/gpio/gpio-ljca.c
+++ b/drivers/gpio/gpio-ljca.c
@@ -144,8 +144,8 @@ static int ljca_gpio_get_value(struct gpio_chip *chip, unsigned int offset)
return ljca_gpio_read(ljca_gpio, offset);
}
-static void ljca_gpio_set_value(struct gpio_chip *chip, unsigned int offset,
- int val)
+static int ljca_gpio_set_value(struct gpio_chip *chip, unsigned int offset,
+ int val)
{
struct ljca_gpio_dev *ljca_gpio = gpiochip_get_data(chip);
int ret;
@@ -155,6 +155,8 @@ static void ljca_gpio_set_value(struct gpio_chip *chip, unsigned int offset,
dev_err(chip->parent,
"set value failed offset: %u val: %d ret: %d\n",
offset, val, ret);
+
+ return ret;
}
static int ljca_gpio_direction_input(struct gpio_chip *chip, unsigned int offset)
@@ -183,7 +185,10 @@ static int ljca_gpio_direction_output(struct gpio_chip *chip,
if (ret)
return ret;
- ljca_gpio_set_value(chip, offset, val);
+ ret = ljca_gpio_set_value(chip, offset, val);
+ if (ret)
+ return ret;
+
set_bit(offset, ljca_gpio->output_enabled);
return 0;
@@ -432,7 +437,7 @@ static int ljca_gpio_probe(struct auxiliary_device *auxdev,
ljca_gpio->gc.direction_output = ljca_gpio_direction_output;
ljca_gpio->gc.get_direction = ljca_gpio_get_direction;
ljca_gpio->gc.get = ljca_gpio_get_value;
- ljca_gpio->gc.set = ljca_gpio_set_value;
+ ljca_gpio->gc.set_rv = ljca_gpio_set_value;
ljca_gpio->gc.set_config = ljca_gpio_set_config;
ljca_gpio->gc.init_valid_mask = ljca_gpio_init_valid_mask;
ljca_gpio->gc.can_sleep = true;
diff --git a/drivers/gpio/gpio-logicvc.c b/drivers/gpio/gpio-logicvc.c
index 05d62011f335..19cd2847467c 100644
--- a/drivers/gpio/gpio-logicvc.c
+++ b/drivers/gpio/gpio-logicvc.c
@@ -61,23 +61,22 @@ static int logicvc_gpio_get(struct gpio_chip *chip, unsigned offset)
return !!(value & bit);
}
-static void logicvc_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int logicvc_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct logicvc_gpio *logicvc = gpiochip_get_data(chip);
unsigned int reg, bit;
logicvc_gpio_offset(logicvc, offset, &reg, &bit);
- regmap_update_bits(logicvc->regmap, reg, bit, value ? bit : 0);
+ return regmap_update_bits(logicvc->regmap, reg, bit, value ? bit : 0);
}
static int logicvc_gpio_direction_output(struct gpio_chip *chip,
unsigned offset, int value)
{
/* Pins are always configured as output, so just set the value. */
- logicvc_gpio_set(chip, offset, value);
-
- return 0;
+ return logicvc_gpio_set(chip, offset, value);
}
static struct regmap_config logicvc_gpio_regmap_config = {
@@ -135,7 +134,7 @@ static int logicvc_gpio_probe(struct platform_device *pdev)
logicvc->chip.ngpio = LOGICVC_CTRL_GPIO_BITS +
LOGICVC_POWER_CTRL_GPIO_BITS;
logicvc->chip.get = logicvc_gpio_get;
- logicvc->chip.set = logicvc_gpio_set;
+ logicvc->chip.set_rv = logicvc_gpio_set;
logicvc->chip.direction_output = logicvc_gpio_direction_output;
return devm_gpiochip_add_data(dev, &logicvc->chip, logicvc);
diff --git a/drivers/gpio/gpio-loongson-64bit.c b/drivers/gpio/gpio-loongson-64bit.c
index a9a93036f08f..26227669f026 100644
--- a/drivers/gpio/gpio-loongson-64bit.c
+++ b/drivers/gpio/gpio-loongson-64bit.c
@@ -105,7 +105,7 @@ static int loongson_gpio_get_direction(struct gpio_chip *chip, unsigned int pin)
return GPIO_LINE_DIRECTION_OUT;
}
-static void loongson_gpio_set(struct gpio_chip *chip, unsigned int pin, int value)
+static int loongson_gpio_set(struct gpio_chip *chip, unsigned int pin, int value)
{
unsigned long flags;
struct loongson_gpio_chip *lgpio = to_loongson_gpio_chip(chip);
@@ -113,6 +113,8 @@ static void loongson_gpio_set(struct gpio_chip *chip, unsigned int pin, int valu
spin_lock_irqsave(&lgpio->lock, flags);
loongson_commit_level(lgpio, pin, value);
spin_unlock_irqrestore(&lgpio->lock, flags);
+
+ return 0;
}
static int loongson_gpio_to_irq(struct gpio_chip *chip, unsigned int offset)
@@ -155,7 +157,7 @@ static int loongson_gpio_init(struct device *dev, struct loongson_gpio_chip *lgp
lgpio->chip.get = loongson_gpio_get;
lgpio->chip.get_direction = loongson_gpio_get_direction;
lgpio->chip.direction_output = loongson_gpio_direction_output;
- lgpio->chip.set = loongson_gpio_set;
+ lgpio->chip.set_rv = loongson_gpio_set;
lgpio->chip.parent = dev;
spin_lock_init(&lgpio->lock);
}
diff --git a/drivers/gpio/gpio-loongson.c b/drivers/gpio/gpio-loongson.c
index a42145873cc9..8f3668169ebf 100644
--- a/drivers/gpio/gpio-loongson.c
+++ b/drivers/gpio/gpio-loongson.c
@@ -48,8 +48,8 @@ static int loongson_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
return !!(val & BIT(gpio + LOONGSON_GPIO_IN_OFFSET));
}
-static void loongson_gpio_set_value(struct gpio_chip *chip,
- unsigned gpio, int value)
+static int loongson_gpio_set_value(struct gpio_chip *chip, unsigned int gpio,
+ int value)
{
u32 val;
@@ -61,6 +61,8 @@ static void loongson_gpio_set_value(struct gpio_chip *chip,
val &= ~BIT(gpio);
LOONGSON_GPIODATA = val;
spin_unlock(&gpio_lock);
+
+ return 0;
}
static int loongson_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
@@ -104,7 +106,7 @@ static int loongson_gpio_probe(struct platform_device *pdev)
gc->base = 0;
gc->ngpio = LOONGSON_N_GPIO;
gc->get = loongson_gpio_get_value;
- gc->set = loongson_gpio_set_value;
+ gc->set_rv = loongson_gpio_set_value;
gc->direction_input = loongson_gpio_direction_input;
gc->direction_output = loongson_gpio_direction_output;
diff --git a/drivers/gpio/gpio-lp3943.c b/drivers/gpio/gpio-lp3943.c
index 8e58242f5123..52ab3ac4844c 100644
--- a/drivers/gpio/gpio-lp3943.c
+++ b/drivers/gpio/gpio-lp3943.c
@@ -147,7 +147,8 @@ static int lp3943_gpio_get(struct gpio_chip *chip, unsigned int offset)
return lp3943_get_gpio_out_status(lp3943_gpio, chip, offset);
}
-static void lp3943_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
+static int lp3943_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct lp3943_gpio *lp3943_gpio = gpiochip_get_data(chip);
u8 data;
@@ -157,15 +158,19 @@ static void lp3943_gpio_set(struct gpio_chip *chip, unsigned int offset, int val
else
data = LP3943_GPIO_OUT_LOW;
- lp3943_gpio_set_mode(lp3943_gpio, offset, data);
+ return lp3943_gpio_set_mode(lp3943_gpio, offset, data);
}
static int lp3943_gpio_direction_output(struct gpio_chip *chip, unsigned int offset,
int value)
{
struct lp3943_gpio *lp3943_gpio = gpiochip_get_data(chip);
+ int ret;
+
+ ret = lp3943_gpio_set(chip, offset, value);
+ if (ret)
+ return ret;
- lp3943_gpio_set(chip, offset, value);
lp3943_gpio->input_mask &= ~BIT(offset);
return 0;
@@ -179,7 +184,7 @@ static const struct gpio_chip lp3943_gpio_chip = {
.direction_input = lp3943_gpio_direction_input,
.get = lp3943_gpio_get,
.direction_output = lp3943_gpio_direction_output,
- .set = lp3943_gpio_set,
+ .set_rv = lp3943_gpio_set,
.base = -1,
.ngpio = LP3943_MAX_GPIO,
.can_sleep = 1,
diff --git a/drivers/gpio/gpio-lp873x.c b/drivers/gpio/gpio-lp873x.c
index 5c79ba1f229c..1908ed302e92 100644
--- a/drivers/gpio/gpio-lp873x.c
+++ b/drivers/gpio/gpio-lp873x.c
@@ -58,14 +58,14 @@ static int lp873x_gpio_get(struct gpio_chip *chip, unsigned int offset)
return val & BIT(offset * BITS_PER_GPO);
}
-static void lp873x_gpio_set(struct gpio_chip *chip, unsigned int offset,
- int value)
+static int lp873x_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct lp873x_gpio *gpio = gpiochip_get_data(chip);
- regmap_update_bits(gpio->lp873->regmap, LP873X_REG_GPO_CTRL,
- BIT(offset * BITS_PER_GPO),
- value ? BIT(offset * BITS_PER_GPO) : 0);
+ return regmap_update_bits(gpio->lp873->regmap, LP873X_REG_GPO_CTRL,
+ BIT(offset * BITS_PER_GPO),
+ value ? BIT(offset * BITS_PER_GPO) : 0);
}
static int lp873x_gpio_request(struct gpio_chip *gc, unsigned int offset)
@@ -124,7 +124,7 @@ static const struct gpio_chip template_chip = {
.direction_input = lp873x_gpio_direction_input,
.direction_output = lp873x_gpio_direction_output,
.get = lp873x_gpio_get,
- .set = lp873x_gpio_set,
+ .set_rv = lp873x_gpio_set,
.set_config = lp873x_gpio_set_config,
.base = -1,
.ngpio = 2,
diff --git a/drivers/gpio/gpio-lp87565.c b/drivers/gpio/gpio-lp87565.c
index d3ce027de081..8ea687d5d028 100644
--- a/drivers/gpio/gpio-lp87565.c
+++ b/drivers/gpio/gpio-lp87565.c
@@ -30,13 +30,13 @@ static int lp87565_gpio_get(struct gpio_chip *chip, unsigned int offset)
return !!(val & BIT(offset));
}
-static void lp87565_gpio_set(struct gpio_chip *chip, unsigned int offset,
- int value)
+static int lp87565_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct lp87565_gpio *gpio = gpiochip_get_data(chip);
- regmap_update_bits(gpio->map, LP87565_REG_GPIO_OUT,
- BIT(offset), value ? BIT(offset) : 0);
+ return regmap_update_bits(gpio->map, LP87565_REG_GPIO_OUT,
+ BIT(offset), value ? BIT(offset) : 0);
}
static int lp87565_gpio_get_direction(struct gpio_chip *chip,
@@ -69,8 +69,11 @@ static int lp87565_gpio_direction_output(struct gpio_chip *chip,
unsigned int offset, int value)
{
struct lp87565_gpio *gpio = gpiochip_get_data(chip);
+ int ret;
- lp87565_gpio_set(chip, offset, value);
+ ret = lp87565_gpio_set(chip, offset, value);
+ if (ret)
+ return ret;
return regmap_update_bits(gpio->map,
LP87565_REG_GPIO_CONFIG,
@@ -136,7 +139,7 @@ static const struct gpio_chip template_chip = {
.direction_input = lp87565_gpio_direction_input,
.direction_output = lp87565_gpio_direction_output,
.get = lp87565_gpio_get,
- .set = lp87565_gpio_set,
+ .set_rv = lp87565_gpio_set,
.set_config = lp87565_gpio_set_config,
.base = -1,
.ngpio = 3,
diff --git a/drivers/gpio/gpio-lpc18xx.c b/drivers/gpio/gpio-lpc18xx.c
index 2cf9fb4637a2..b0a8da5c058d 100644
--- a/drivers/gpio/gpio-lpc18xx.c
+++ b/drivers/gpio/gpio-lpc18xx.c
@@ -42,6 +42,7 @@ struct lpc18xx_gpio_pin_ic {
void __iomem *base;
struct irq_domain *domain;
struct raw_spinlock lock;
+ struct gpio_chip *gpio;
};
struct lpc18xx_gpio_chip {
@@ -74,6 +75,7 @@ static void lpc18xx_gpio_pin_ic_mask(struct irq_data *d)
{
struct lpc18xx_gpio_pin_ic *ic = d->chip_data;
u32 type = irqd_get_trigger_type(d);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
raw_spin_lock(&ic->lock);
@@ -88,12 +90,17 @@ static void lpc18xx_gpio_pin_ic_mask(struct irq_data *d)
raw_spin_unlock(&ic->lock);
irq_chip_mask_parent(d);
+
+ gpiochip_disable_irq(ic->gpio, hwirq);
}
static void lpc18xx_gpio_pin_ic_unmask(struct irq_data *d)
{
struct lpc18xx_gpio_pin_ic *ic = d->chip_data;
u32 type = irqd_get_trigger_type(d);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
+
+ gpiochip_enable_irq(ic->gpio, hwirq);
raw_spin_lock(&ic->lock);
@@ -149,13 +156,14 @@ static int lpc18xx_gpio_pin_ic_set_type(struct irq_data *d, unsigned int type)
return 0;
}
-static struct irq_chip lpc18xx_gpio_pin_ic = {
+static const struct irq_chip lpc18xx_gpio_pin_ic = {
.name = "LPC18xx GPIO pin",
.irq_mask = lpc18xx_gpio_pin_ic_mask,
.irq_unmask = lpc18xx_gpio_pin_ic_unmask,
.irq_eoi = lpc18xx_gpio_pin_ic_eoi,
.irq_set_type = lpc18xx_gpio_pin_ic_set_type,
- .flags = IRQCHIP_SET_TYPE_MASKED,
+ .flags = IRQCHIP_IMMUTABLE | IRQCHIP_SET_TYPE_MASKED,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static int lpc18xx_gpio_pin_ic_domain_alloc(struct irq_domain *domain,
@@ -240,17 +248,16 @@ static int lpc18xx_gpio_pin_ic_probe(struct lpc18xx_gpio_chip *gc)
raw_spin_lock_init(&ic->lock);
- ic->domain = irq_domain_add_hierarchy(parent_domain, 0,
- NR_LPC18XX_GPIO_PIN_IC_IRQS,
- dev->of_node,
- &lpc18xx_gpio_pin_ic_domain_ops,
- ic);
+ ic->domain = irq_domain_create_hierarchy(parent_domain, 0, NR_LPC18XX_GPIO_PIN_IC_IRQS,
+ of_fwnode_handle(dev->of_node),
+ &lpc18xx_gpio_pin_ic_domain_ops, ic);
if (!ic->domain) {
pr_err("unable to add irq domain\n");
ret = -ENODEV;
goto free_iomap;
}
+ ic->gpio = &gc->gpio;
gc->pin_ic = ic;
return 0;
@@ -263,10 +270,14 @@ free_ic:
return ret;
}
-static void lpc18xx_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int lpc18xx_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct lpc18xx_gpio_chip *gc = gpiochip_get_data(chip);
+
writeb(value ? 1 : 0, gc->base + offset);
+
+ return 0;
}
static int lpc18xx_gpio_get(struct gpio_chip *chip, unsigned offset)
@@ -316,7 +327,7 @@ static const struct gpio_chip lpc18xx_chip = {
.free = gpiochip_generic_free,
.direction_input = lpc18xx_gpio_direction_input,
.direction_output = lpc18xx_gpio_direction_output,
- .set = lpc18xx_gpio_set,
+ .set_rv = lpc18xx_gpio_set,
.get = lpc18xx_gpio_get,
.ngpio = LPC18XX_MAX_PORTS * LPC18XX_PINS_PER_PORT,
.owner = THIS_MODULE,
diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c
index c097e310c9e8..6668b8bd9f1e 100644
--- a/drivers/gpio/gpio-lpc32xx.c
+++ b/drivers/gpio/gpio-lpc32xx.c
@@ -340,28 +340,34 @@ static int lpc32xx_gpio_dir_out_always(struct gpio_chip *chip, unsigned pin,
return 0;
}
-static void lpc32xx_gpio_set_value_p012(struct gpio_chip *chip, unsigned pin,
- int value)
+static int lpc32xx_gpio_set_value_p012(struct gpio_chip *chip,
+ unsigned int pin, int value)
{
struct lpc32xx_gpio_chip *group = gpiochip_get_data(chip);
__set_gpio_level_p012(group, pin, value);
+
+ return 0;
}
-static void lpc32xx_gpio_set_value_p3(struct gpio_chip *chip, unsigned pin,
- int value)
+static int lpc32xx_gpio_set_value_p3(struct gpio_chip *chip,
+ unsigned int pin, int value)
{
struct lpc32xx_gpio_chip *group = gpiochip_get_data(chip);
__set_gpio_level_p3(group, pin, value);
+
+ return 0;
}
-static void lpc32xx_gpo_set_value(struct gpio_chip *chip, unsigned pin,
- int value)
+static int lpc32xx_gpo_set_value(struct gpio_chip *chip, unsigned int pin,
+ int value)
{
struct lpc32xx_gpio_chip *group = gpiochip_get_data(chip);
__set_gpo_level_p3(group, pin, value);
+
+ return 0;
}
static int lpc32xx_gpo_get_value(struct gpio_chip *chip, unsigned pin)
@@ -401,7 +407,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = {
.direction_input = lpc32xx_gpio_dir_input_p012,
.get = lpc32xx_gpio_get_value_p012,
.direction_output = lpc32xx_gpio_dir_output_p012,
- .set = lpc32xx_gpio_set_value_p012,
+ .set_rv = lpc32xx_gpio_set_value_p012,
.request = lpc32xx_gpio_request,
.to_irq = lpc32xx_gpio_to_irq_p01,
.base = LPC32XX_GPIO_P0_GRP,
@@ -417,7 +423,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = {
.direction_input = lpc32xx_gpio_dir_input_p012,
.get = lpc32xx_gpio_get_value_p012,
.direction_output = lpc32xx_gpio_dir_output_p012,
- .set = lpc32xx_gpio_set_value_p012,
+ .set_rv = lpc32xx_gpio_set_value_p012,
.request = lpc32xx_gpio_request,
.to_irq = lpc32xx_gpio_to_irq_p01,
.base = LPC32XX_GPIO_P1_GRP,
@@ -433,7 +439,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = {
.direction_input = lpc32xx_gpio_dir_input_p012,
.get = lpc32xx_gpio_get_value_p012,
.direction_output = lpc32xx_gpio_dir_output_p012,
- .set = lpc32xx_gpio_set_value_p012,
+ .set_rv = lpc32xx_gpio_set_value_p012,
.request = lpc32xx_gpio_request,
.base = LPC32XX_GPIO_P2_GRP,
.ngpio = LPC32XX_GPIO_P2_MAX,
@@ -448,7 +454,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = {
.direction_input = lpc32xx_gpio_dir_input_p3,
.get = lpc32xx_gpio_get_value_p3,
.direction_output = lpc32xx_gpio_dir_output_p3,
- .set = lpc32xx_gpio_set_value_p3,
+ .set_rv = lpc32xx_gpio_set_value_p3,
.request = lpc32xx_gpio_request,
.to_irq = lpc32xx_gpio_to_irq_gpio_p3,
.base = LPC32XX_GPIO_P3_GRP,
@@ -476,7 +482,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = {
.chip = {
.label = "gpo_p3",
.direction_output = lpc32xx_gpio_dir_out_always,
- .set = lpc32xx_gpo_set_value,
+ .set_rv = lpc32xx_gpo_set_value,
.get = lpc32xx_gpo_get_value,
.request = lpc32xx_gpio_request,
.base = LPC32XX_GPO_P3_GRP,
diff --git a/drivers/gpio/gpio-madera.c b/drivers/gpio/gpio-madera.c
index 8f38303fcbc4..e73e72d62bc8 100644
--- a/drivers/gpio/gpio-madera.c
+++ b/drivers/gpio/gpio-madera.c
@@ -87,23 +87,17 @@ static int madera_gpio_direction_out(struct gpio_chip *chip,
MADERA_GP1_LVL_MASK, reg_val);
}
-static void madera_gpio_set(struct gpio_chip *chip, unsigned int offset,
- int value)
+static int madera_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct madera_gpio *madera_gpio = gpiochip_get_data(chip);
struct madera *madera = madera_gpio->madera;
unsigned int reg_offset = 2 * offset;
unsigned int reg_val = value ? MADERA_GP1_LVL : 0;
- int ret;
-
- ret = regmap_update_bits(madera->regmap,
- MADERA_GPIO1_CTRL_1 + reg_offset,
- MADERA_GP1_LVL_MASK, reg_val);
- /* set() doesn't return an error so log a warning */
- if (ret)
- dev_warn(madera->dev, "Failed to write to 0x%x (%d)\n",
- MADERA_GPIO1_CTRL_1 + reg_offset, ret);
+ return regmap_update_bits(madera->regmap,
+ MADERA_GPIO1_CTRL_1 + reg_offset,
+ MADERA_GP1_LVL_MASK, reg_val);
}
static const struct gpio_chip madera_gpio_chip = {
@@ -115,7 +109,7 @@ static const struct gpio_chip madera_gpio_chip = {
.direction_input = madera_gpio_direction_in,
.get = madera_gpio_get,
.direction_output = madera_gpio_direction_out,
- .set = madera_gpio_set,
+ .set_rv = madera_gpio_set,
.set_config = gpiochip_generic_config,
.can_sleep = true,
};
diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c
index fc0708ab5192..6e6504ab740a 100644
--- a/drivers/gpio/gpio-max3191x.c
+++ b/drivers/gpio/gpio-max3191x.c
@@ -103,19 +103,6 @@ static int max3191x_direction_input(struct gpio_chip *gpio, unsigned int offset)
return 0;
}
-static int max3191x_direction_output(struct gpio_chip *gpio,
- unsigned int offset, int value)
-{
- return -EINVAL;
-}
-
-static void max3191x_set(struct gpio_chip *gpio, unsigned int offset, int value)
-{ }
-
-static void max3191x_set_multiple(struct gpio_chip *gpio, unsigned long *mask,
- unsigned long *bits)
-{ }
-
static unsigned int max3191x_wordlen(struct max3191x_chip *max3191x)
{
return max3191x->mode == STATUS_BYTE_ENABLED ? 2 : 1;
@@ -421,9 +408,6 @@ static int max3191x_probe(struct spi_device *spi)
max3191x->gpio.get_direction = max3191x_get_direction;
max3191x->gpio.direction_input = max3191x_direction_input;
- max3191x->gpio.direction_output = max3191x_direction_output;
- max3191x->gpio.set = max3191x_set;
- max3191x->gpio.set_multiple = max3191x_set_multiple;
max3191x->gpio.get = max3191x_get;
max3191x->gpio.get_multiple = max3191x_get_multiple;
max3191x->gpio.set_config = max3191x_set_config;
diff --git a/drivers/gpio/gpio-max730x.c b/drivers/gpio/gpio-max730x.c
index e688c13c8cc3..75d414d8c992 100644
--- a/drivers/gpio/gpio-max730x.c
+++ b/drivers/gpio/gpio-max730x.c
@@ -143,18 +143,21 @@ static int max7301_get(struct gpio_chip *chip, unsigned offset)
return level;
}
-static void max7301_set(struct gpio_chip *chip, unsigned offset, int value)
+static int max7301_set(struct gpio_chip *chip, unsigned int offset, int value)
{
struct max7301 *ts = gpiochip_get_data(chip);
+ int ret;
/* First 4 pins are unused in the controller */
offset += 4;
mutex_lock(&ts->lock);
- __max7301_set(ts, offset, value);
+ ret = __max7301_set(ts, offset, value);
mutex_unlock(&ts->lock);
+
+ return ret;
}
int __max730x_probe(struct max7301 *ts)
@@ -185,7 +188,7 @@ int __max730x_probe(struct max7301 *ts)
ts->chip.direction_input = max7301_direction_input;
ts->chip.get = max7301_get;
ts->chip.direction_output = max7301_direction_output;
- ts->chip.set = max7301_set;
+ ts->chip.set_rv = max7301_set;
ts->chip.ngpio = PIN_NUMBER;
ts->chip.can_sleep = true;
diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c
index 49d362907bc7..d5ffedb086af 100644
--- a/drivers/gpio/gpio-max732x.c
+++ b/drivers/gpio/gpio-max732x.c
@@ -225,16 +225,19 @@ out:
mutex_unlock(&chip->lock);
}
-static void max732x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
+static int max732x_gpio_set_value(struct gpio_chip *gc, unsigned int off,
+ int val)
{
unsigned base = off & ~0x7;
uint8_t mask = 1u << (off & 0x7);
max732x_gpio_set_mask(gc, base, mask, val << (off & 0x7));
+
+ return 0;
}
-static void max732x_gpio_set_multiple(struct gpio_chip *gc,
- unsigned long *mask, unsigned long *bits)
+static int max732x_gpio_set_multiple(struct gpio_chip *gc,
+ unsigned long *mask, unsigned long *bits)
{
unsigned mask_lo = mask[0] & 0xff;
unsigned mask_hi = (mask[0] >> 8) & 0xff;
@@ -243,6 +246,8 @@ static void max732x_gpio_set_multiple(struct gpio_chip *gc,
max732x_gpio_set_mask(gc, 0, mask_lo, bits[0] & 0xff);
if (mask_hi)
max732x_gpio_set_mask(gc, 8, mask_hi, (bits[0] >> 8) & 0xff);
+
+ return 0;
}
static int max732x_gpio_direction_input(struct gpio_chip *gc, unsigned off)
@@ -580,8 +585,8 @@ static int max732x_setup_gpio(struct max732x_chip *chip,
gc->direction_input = max732x_gpio_direction_input;
if (chip->dir_output) {
gc->direction_output = max732x_gpio_direction_output;
- gc->set = max732x_gpio_set_value;
- gc->set_multiple = max732x_gpio_set_multiple;
+ gc->set_rv = max732x_gpio_set_value;
+ gc->set_multiple_rv = max732x_gpio_set_multiple;
}
gc->get = max732x_gpio_get_value;
gc->can_sleep = true;
diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c
index 8c2a5609161f..af7af8e40afe 100644
--- a/drivers/gpio/gpio-max77620.c
+++ b/drivers/gpio/gpio-max77620.c
@@ -223,20 +223,17 @@ static int max77620_gpio_set_debounce(struct max77620_gpio *mgpio,
return ret;
}
-static void max77620_gpio_set(struct gpio_chip *gc, unsigned int offset,
- int value)
+static int max77620_gpio_set(struct gpio_chip *gc, unsigned int offset,
+ int value)
{
struct max77620_gpio *mgpio = gpiochip_get_data(gc);
u8 val;
- int ret;
val = (value) ? MAX77620_CNFG_GPIO_OUTPUT_VAL_HIGH :
MAX77620_CNFG_GPIO_OUTPUT_VAL_LOW;
- ret = regmap_update_bits(mgpio->rmap, GPIO_REG_ADDR(offset),
- MAX77620_CNFG_GPIO_OUTPUT_VAL_MASK, val);
- if (ret < 0)
- dev_err(mgpio->dev, "CNFG_GPIO_OUT update failed: %d\n", ret);
+ return regmap_update_bits(mgpio->rmap, GPIO_REG_ADDR(offset),
+ MAX77620_CNFG_GPIO_OUTPUT_VAL_MASK, val);
}
static int max77620_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
@@ -314,7 +311,7 @@ static int max77620_gpio_probe(struct platform_device *pdev)
mgpio->gpio_chip.direction_input = max77620_gpio_dir_input;
mgpio->gpio_chip.get = max77620_gpio_get;
mgpio->gpio_chip.direction_output = max77620_gpio_dir_output;
- mgpio->gpio_chip.set = max77620_gpio_set;
+ mgpio->gpio_chip.set_rv = max77620_gpio_set;
mgpio->gpio_chip.set_config = max77620_gpio_set_config;
mgpio->gpio_chip.ngpio = MAX77620_GPIO_NR;
mgpio->gpio_chip.can_sleep = 1;
diff --git a/drivers/gpio/gpio-max77759.c b/drivers/gpio/gpio-max77759.c
new file mode 100644
index 000000000000..7fe8e6f697d0
--- /dev/null
+++ b/drivers/gpio/gpio-max77759.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright 2020 Google Inc
+// Copyright 2025 Linaro Ltd.
+//
+// GPIO driver for Maxim MAX77759
+
+#include <linux/dev_printk.h>
+#include <linux/device.h>
+#include <linux/device/driver.h>
+#include <linux/gpio/driver.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqreturn.h>
+#include <linux/lockdep.h>
+#include <linux/mfd/max77759.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/overflow.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/seq_file.h>
+
+#define MAX77759_N_GPIOS ARRAY_SIZE(max77759_gpio_line_names)
+static const char * const max77759_gpio_line_names[] = { "GPIO5", "GPIO6" };
+
+struct max77759_gpio_chip {
+ struct regmap *map;
+ struct max77759 *max77759;
+ struct gpio_chip gc;
+ struct mutex maxq_lock; /* protect MaxQ r/m/w operations */
+
+ struct mutex irq_lock; /* protect irq bus */
+ int irq_mask;
+ int irq_mask_changed;
+ int irq_trig;
+ int irq_trig_changed;
+};
+
+#define MAX77759_GPIOx_TRIGGER(offs, val) (((val) & 1) << (offs))
+#define MAX77759_GPIOx_TRIGGER_MASK(offs) MAX77759_GPIOx_TRIGGER(offs, ~0)
+enum max77759_trigger_gpio_type {
+ MAX77759_GPIO_TRIGGER_RISING = 0,
+ MAX77759_GPIO_TRIGGER_FALLING = 1
+};
+
+#define MAX77759_GPIOx_DIR(offs, dir) (((dir) & 1) << (2 + (3 * (offs))))
+#define MAX77759_GPIOx_DIR_MASK(offs) MAX77759_GPIOx_DIR(offs, ~0)
+enum max77759_control_gpio_dir {
+ MAX77759_GPIO_DIR_IN = 0,
+ MAX77759_GPIO_DIR_OUT = 1
+};
+
+#define MAX77759_GPIOx_OUTVAL(offs, val) (((val) & 1) << (3 + (3 * (offs))))
+#define MAX77759_GPIOx_OUTVAL_MASK(offs) MAX77759_GPIOx_OUTVAL(offs, ~0)
+
+#define MAX77759_GPIOx_INVAL_MASK(offs) (BIT(4) << (3 * (offs)))
+
+static int max77759_gpio_maxq_gpio_trigger_read(struct max77759_gpio_chip *chip)
+{
+ DEFINE_FLEX(struct max77759_maxq_command, cmd, cmd, length, 1);
+ DEFINE_FLEX(struct max77759_maxq_response, rsp, rsp, length, 2);
+ int ret;
+
+ cmd->cmd[0] = MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_READ;
+
+ ret = max77759_maxq_command(chip->max77759, cmd, rsp);
+ if (ret < 0)
+ return ret;
+
+ return rsp->rsp[1];
+}
+
+static int max77759_gpio_maxq_gpio_trigger_write(struct max77759_gpio_chip *chip,
+ u8 trigger)
+{
+ DEFINE_FLEX(struct max77759_maxq_command, cmd, cmd, length, 2);
+
+ cmd->cmd[0] = MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_WRITE;
+ cmd->cmd[1] = trigger;
+
+ return max77759_maxq_command(chip->max77759, cmd, NULL);
+}
+
+static int max77759_gpio_maxq_gpio_control_read(struct max77759_gpio_chip *chip)
+{
+ DEFINE_FLEX(struct max77759_maxq_command, cmd, cmd, length, 1);
+ DEFINE_FLEX(struct max77759_maxq_response, rsp, rsp, length, 2);
+ int ret;
+
+ cmd->cmd[0] = MAX77759_MAXQ_OPCODE_GPIO_CONTROL_READ;
+
+ ret = max77759_maxq_command(chip->max77759, cmd, rsp);
+ if (ret < 0)
+ return ret;
+
+ return rsp->rsp[1];
+}
+
+static int max77759_gpio_maxq_gpio_control_write(struct max77759_gpio_chip *chip,
+ u8 ctrl)
+{
+ DEFINE_FLEX(struct max77759_maxq_command, cmd, cmd, length, 2);
+
+ cmd->cmd[0] = MAX77759_MAXQ_OPCODE_GPIO_CONTROL_WRITE;
+ cmd->cmd[1] = ctrl;
+
+ return max77759_maxq_command(chip->max77759, cmd, NULL);
+}
+
+static int
+max77759_gpio_direction_from_control(int ctrl, unsigned int offset)
+{
+ enum max77759_control_gpio_dir dir;
+
+ dir = !!(ctrl & MAX77759_GPIOx_DIR_MASK(offset));
+ return ((dir == MAX77759_GPIO_DIR_OUT)
+ ? GPIO_LINE_DIRECTION_OUT
+ : GPIO_LINE_DIRECTION_IN);
+}
+
+static int max77759_gpio_get_direction(struct gpio_chip *gc,
+ unsigned int offset)
+{
+ struct max77759_gpio_chip *chip = gpiochip_get_data(gc);
+ int ctrl;
+
+ ctrl = max77759_gpio_maxq_gpio_control_read(chip);
+ if (ctrl < 0)
+ return ctrl;
+
+ return max77759_gpio_direction_from_control(ctrl, offset);
+}
+
+static int max77759_gpio_direction_helper(struct gpio_chip *gc,
+ unsigned int offset,
+ enum max77759_control_gpio_dir dir,
+ int value)
+{
+ struct max77759_gpio_chip *chip = gpiochip_get_data(gc);
+ int ctrl, new_ctrl;
+
+ guard(mutex)(&chip->maxq_lock);
+
+ ctrl = max77759_gpio_maxq_gpio_control_read(chip);
+ if (ctrl < 0)
+ return ctrl;
+
+ new_ctrl = ctrl & ~MAX77759_GPIOx_DIR_MASK(offset);
+ new_ctrl |= MAX77759_GPIOx_DIR(offset, dir);
+
+ if (dir == MAX77759_GPIO_DIR_OUT) {
+ new_ctrl &= ~MAX77759_GPIOx_OUTVAL_MASK(offset);
+ new_ctrl |= MAX77759_GPIOx_OUTVAL(offset, value);
+ }
+
+ if (new_ctrl == ctrl)
+ return 0;
+
+ return max77759_gpio_maxq_gpio_control_write(chip, new_ctrl);
+}
+
+static int max77759_gpio_direction_input(struct gpio_chip *gc,
+ unsigned int offset)
+{
+ return max77759_gpio_direction_helper(gc, offset,
+ MAX77759_GPIO_DIR_IN, -1);
+}
+
+static int max77759_gpio_direction_output(struct gpio_chip *gc,
+ unsigned int offset, int value)
+{
+ return max77759_gpio_direction_helper(gc, offset,
+ MAX77759_GPIO_DIR_OUT, value);
+}
+
+static int max77759_gpio_get_value(struct gpio_chip *gc, unsigned int offset)
+{
+ struct max77759_gpio_chip *chip = gpiochip_get_data(gc);
+ int ctrl, mask;
+
+ ctrl = max77759_gpio_maxq_gpio_control_read(chip);
+ if (ctrl < 0)
+ return ctrl;
+
+ /*
+ * The input status bit doesn't reflect the pin state when the GPIO is
+ * configured as an output. Check the direction, and inspect the input
+ * or output bit accordingly.
+ */
+ mask = ((max77759_gpio_direction_from_control(ctrl, offset)
+ == GPIO_LINE_DIRECTION_IN)
+ ? MAX77759_GPIOx_INVAL_MASK(offset)
+ : MAX77759_GPIOx_OUTVAL_MASK(offset));
+
+ return !!(ctrl & mask);
+}
+
+static int max77759_gpio_set_value(struct gpio_chip *gc,
+ unsigned int offset, int value)
+{
+ struct max77759_gpio_chip *chip = gpiochip_get_data(gc);
+ int ctrl, new_ctrl;
+
+ guard(mutex)(&chip->maxq_lock);
+
+ ctrl = max77759_gpio_maxq_gpio_control_read(chip);
+ if (ctrl < 0)
+ return ctrl;
+
+ new_ctrl = ctrl & ~MAX77759_GPIOx_OUTVAL_MASK(offset);
+ new_ctrl |= MAX77759_GPIOx_OUTVAL(offset, value);
+
+ if (new_ctrl == ctrl)
+ return 0;
+
+ return max77759_gpio_maxq_gpio_control_write(chip, new_ctrl);
+}
+
+static void max77759_gpio_irq_mask(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct max77759_gpio_chip *chip = gpiochip_get_data(gc);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
+
+ chip->irq_mask &= ~MAX77759_MAXQ_REG_UIC_INT1_GPIOxI_MASK(hwirq);
+ chip->irq_mask |= MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(hwirq, 1);
+ chip->irq_mask_changed |= MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(hwirq, 1);
+
+ gpiochip_disable_irq(gc, hwirq);
+}
+
+static void max77759_gpio_irq_unmask(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct max77759_gpio_chip *chip = gpiochip_get_data(gc);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
+
+ gpiochip_enable_irq(gc, hwirq);
+
+ chip->irq_mask &= ~MAX77759_MAXQ_REG_UIC_INT1_GPIOxI_MASK(hwirq);
+ chip->irq_mask |= MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(hwirq, 0);
+ chip->irq_mask_changed |= MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(hwirq, 1);
+}
+
+static int max77759_gpio_set_irq_type(struct irq_data *d, unsigned int type)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct max77759_gpio_chip *chip = gpiochip_get_data(gc);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
+
+ chip->irq_trig &= ~MAX77759_GPIOx_TRIGGER_MASK(hwirq);
+ switch (type) {
+ case IRQ_TYPE_EDGE_RISING:
+ chip->irq_trig |= MAX77759_GPIOx_TRIGGER(hwirq,
+ MAX77759_GPIO_TRIGGER_RISING);
+ break;
+
+ case IRQ_TYPE_EDGE_FALLING:
+ chip->irq_trig |= MAX77759_GPIOx_TRIGGER(hwirq,
+ MAX77759_GPIO_TRIGGER_FALLING);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ chip->irq_trig_changed |= MAX77759_GPIOx_TRIGGER(hwirq, 1);
+
+ return 0;
+}
+
+static void max77759_gpio_bus_lock(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct max77759_gpio_chip *chip = gpiochip_get_data(gc);
+
+ mutex_lock(&chip->irq_lock);
+}
+
+static int max77759_gpio_bus_sync_unlock_helper(struct gpio_chip *gc,
+ struct max77759_gpio_chip *chip)
+ __must_hold(&chip->maxq_lock)
+{
+ int ctrl, trigger, new_trigger, new_ctrl;
+ unsigned long irq_trig_changed;
+ int offset;
+ int ret;
+
+ lockdep_assert_held(&chip->maxq_lock);
+
+ ctrl = max77759_gpio_maxq_gpio_control_read(chip);
+ trigger = max77759_gpio_maxq_gpio_trigger_read(chip);
+ if (ctrl < 0 || trigger < 0) {
+ dev_err(gc->parent, "failed to read current state: %d / %d\n",
+ ctrl, trigger);
+ return (ctrl < 0) ? ctrl : trigger;
+ }
+
+ new_trigger = trigger & ~chip->irq_trig_changed;
+ new_trigger |= (chip->irq_trig & chip->irq_trig_changed);
+
+ /* change GPIO direction if required */
+ new_ctrl = ctrl;
+ irq_trig_changed = chip->irq_trig_changed;
+ for_each_set_bit(offset, &irq_trig_changed, MAX77759_N_GPIOS) {
+ new_ctrl &= ~MAX77759_GPIOx_DIR_MASK(offset);
+ new_ctrl |= MAX77759_GPIOx_DIR(offset, MAX77759_GPIO_DIR_IN);
+ }
+
+ if (new_trigger != trigger) {
+ ret = max77759_gpio_maxq_gpio_trigger_write(chip, new_trigger);
+ if (ret) {
+ dev_err(gc->parent,
+ "failed to write new trigger: %d\n", ret);
+ return ret;
+ }
+ }
+
+ if (new_ctrl != ctrl) {
+ ret = max77759_gpio_maxq_gpio_control_write(chip, new_ctrl);
+ if (ret) {
+ dev_err(gc->parent,
+ "failed to write new control: %d\n", ret);
+ return ret;
+ }
+ }
+
+ chip->irq_trig_changed = 0;
+
+ return 0;
+}
+
+static void max77759_gpio_bus_sync_unlock(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct max77759_gpio_chip *chip = gpiochip_get_data(gc);
+ int ret;
+
+ scoped_guard(mutex, &chip->maxq_lock) {
+ ret = max77759_gpio_bus_sync_unlock_helper(gc, chip);
+ if (ret)
+ goto out_unlock;
+ }
+
+ ret = regmap_update_bits(chip->map,
+ MAX77759_MAXQ_REG_UIC_INT1_M,
+ chip->irq_mask_changed, chip->irq_mask);
+ if (ret) {
+ dev_err(gc->parent,
+ "failed to update UIC_INT1 irq mask: %d\n", ret);
+ goto out_unlock;
+ }
+
+ chip->irq_mask_changed = 0;
+
+out_unlock:
+ mutex_unlock(&chip->irq_lock);
+}
+
+static void max77759_gpio_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+
+ seq_puts(p, dev_name(gc->parent));
+}
+
+static const struct irq_chip max77759_gpio_irq_chip = {
+ .irq_mask = max77759_gpio_irq_mask,
+ .irq_unmask = max77759_gpio_irq_unmask,
+ .irq_set_type = max77759_gpio_set_irq_type,
+ .irq_bus_lock = max77759_gpio_bus_lock,
+ .irq_bus_sync_unlock = max77759_gpio_bus_sync_unlock,
+ .irq_print_chip = max77759_gpio_irq_print_chip,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
+static irqreturn_t max77759_gpio_irqhandler(int irq, void *data)
+{
+ struct max77759_gpio_chip *chip = data;
+ struct gpio_chip *gc = &chip->gc;
+ bool handled = false;
+
+ /* iterate until no interrupt is pending */
+ while (true) {
+ unsigned int uic_int1;
+ int ret;
+ unsigned long pending;
+ int offset;
+
+ ret = regmap_read(chip->map, MAX77759_MAXQ_REG_UIC_INT1,
+ &uic_int1);
+ if (ret < 0) {
+ dev_err_ratelimited(gc->parent,
+ "failed to read IRQ status: %d\n",
+ ret);
+ /*
+ * If !handled, we have looped not even once, which
+ * means we should return IRQ_NONE in that case (and
+ * of course IRQ_HANDLED otherwise).
+ */
+ return IRQ_RETVAL(handled);
+ }
+
+ pending = uic_int1;
+ pending &= (MAX77759_MAXQ_REG_UIC_INT1_GPIO6I
+ | MAX77759_MAXQ_REG_UIC_INT1_GPIO5I);
+ if (!pending)
+ break;
+
+ for_each_set_bit(offset, &pending, MAX77759_N_GPIOS) {
+ /*
+ * ACK interrupt by writing 1 to bit 'offset', all
+ * others need to be written as 0. This needs to be
+ * done unconditionally hence regmap_set_bits() is
+ * inappropriate here.
+ */
+ regmap_write(chip->map, MAX77759_MAXQ_REG_UIC_INT1,
+ BIT(offset));
+
+ handle_nested_irq(irq_find_mapping(gc->irq.domain,
+ offset));
+
+ handled = true;
+ }
+ }
+
+ return IRQ_RETVAL(handled);
+}
+
+static int max77759_gpio_probe(struct platform_device *pdev)
+{
+ struct max77759_gpio_chip *chip;
+ int irq;
+ struct gpio_irq_chip *girq;
+ int ret;
+ unsigned long irq_flags;
+ struct irq_data *irqd;
+
+ chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ chip->map = dev_get_regmap(pdev->dev.parent, "maxq");
+ if (!chip->map)
+ return dev_err_probe(&pdev->dev, -ENODEV, "Missing regmap\n");
+
+ irq = platform_get_irq_byname(pdev, "GPI");
+ if (irq < 0)
+ return dev_err_probe(&pdev->dev, irq, "Failed to get IRQ\n");
+
+ chip->max77759 = dev_get_drvdata(pdev->dev.parent);
+ ret = devm_mutex_init(&pdev->dev, &chip->maxq_lock);
+ if (ret)
+ return ret;
+ ret = devm_mutex_init(&pdev->dev, &chip->irq_lock);
+ if (ret)
+ return ret;
+
+ chip->gc.base = -1;
+ chip->gc.label = dev_name(&pdev->dev);
+ chip->gc.parent = &pdev->dev;
+ chip->gc.can_sleep = true;
+
+ chip->gc.names = max77759_gpio_line_names;
+ chip->gc.ngpio = MAX77759_N_GPIOS;
+ chip->gc.get_direction = max77759_gpio_get_direction;
+ chip->gc.direction_input = max77759_gpio_direction_input;
+ chip->gc.direction_output = max77759_gpio_direction_output;
+ chip->gc.get = max77759_gpio_get_value;
+ chip->gc.set_rv = max77759_gpio_set_value;
+
+ girq = &chip->gc.irq;
+ gpio_irq_chip_set_chip(girq, &max77759_gpio_irq_chip);
+ /* This will let us handle the parent IRQ in the driver */
+ girq->parent_handler = NULL;
+ girq->num_parents = 0;
+ girq->parents = NULL;
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_simple_irq;
+ girq->threaded = true;
+
+ ret = devm_gpiochip_add_data(&pdev->dev, &chip->gc, chip);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret,
+ "Failed to add GPIO chip\n");
+
+ irq_flags = IRQF_ONESHOT | IRQF_SHARED;
+ irqd = irq_get_irq_data(irq);
+ if (irqd)
+ irq_flags |= irqd_get_trigger_type(irqd);
+
+ ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+ max77759_gpio_irqhandler, irq_flags,
+ dev_name(&pdev->dev), chip);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret,
+ "Failed to request IRQ\n");
+
+ return ret;
+}
+
+static const struct of_device_id max77759_gpio_of_id[] = {
+ { .compatible = "maxim,max77759-gpio", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, max77759_gpio_of_id);
+
+static const struct platform_device_id max77759_gpio_platform_id[] = {
+ { "max77759-gpio", },
+ { }
+};
+MODULE_DEVICE_TABLE(platform, max77759_gpio_platform_id);
+
+static struct platform_driver max77759_gpio_driver = {
+ .driver = {
+ .name = "max77759-gpio",
+ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+ .of_match_table = max77759_gpio_of_id,
+ },
+ .probe = max77759_gpio_probe,
+ .id_table = max77759_gpio_platform_id,
+};
+
+module_platform_driver(max77759_gpio_driver);
+
+MODULE_AUTHOR("André Draszik <andre.draszik@linaro.org>");
+MODULE_DESCRIPTION("GPIO driver for Maxim MAX77759");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/gpio-mb86s7x.c b/drivers/gpio/gpio-mb86s7x.c
index 7ee891ef6905..5ee2991ecdfd 100644
--- a/drivers/gpio/gpio-mb86s7x.c
+++ b/drivers/gpio/gpio-mb86s7x.c
@@ -119,7 +119,7 @@ static int mb86s70_gpio_get(struct gpio_chip *gc, unsigned gpio)
return !!(readl(gchip->base + PDR(gpio)) & OFFSET(gpio));
}
-static void mb86s70_gpio_set(struct gpio_chip *gc, unsigned gpio, int value)
+static int mb86s70_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
struct mb86s70_gpio_chip *gchip = gpiochip_get_data(gc);
unsigned long flags;
@@ -135,6 +135,8 @@ static void mb86s70_gpio_set(struct gpio_chip *gc, unsigned gpio, int value)
writel(val, gchip->base + PDR(gpio));
spin_unlock_irqrestore(&gchip->lock, flags);
+
+ return 0;
}
static int mb86s70_gpio_to_irq(struct gpio_chip *gc, unsigned int offset)
@@ -178,7 +180,7 @@ static int mb86s70_gpio_probe(struct platform_device *pdev)
gchip->gc.request = mb86s70_gpio_request;
gchip->gc.free = mb86s70_gpio_free;
gchip->gc.get = mb86s70_gpio_get;
- gchip->gc.set = mb86s70_gpio_set;
+ gchip->gc.set_rv = mb86s70_gpio_set;
gchip->gc.to_irq = mb86s70_gpio_to_irq;
gchip->gc.label = dev_name(&pdev->dev);
gchip->gc.ngpio = 32;
diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c
index 5fb357d7b78a..e68956104161 100644
--- a/drivers/gpio/gpio-mc33880.c
+++ b/drivers/gpio/gpio-mc33880.c
@@ -57,15 +57,18 @@ static int __mc33880_set(struct mc33880 *mc, unsigned offset, int value)
}
-static void mc33880_set(struct gpio_chip *chip, unsigned offset, int value)
+static int mc33880_set(struct gpio_chip *chip, unsigned int offset, int value)
{
struct mc33880 *mc = gpiochip_get_data(chip);
+ int ret;
mutex_lock(&mc->lock);
- __mc33880_set(mc, offset, value);
+ ret = __mc33880_set(mc, offset, value);
mutex_unlock(&mc->lock);
+
+ return ret;
}
static int mc33880_probe(struct spi_device *spi)
@@ -100,7 +103,7 @@ static int mc33880_probe(struct spi_device *spi)
mc->spi = spi;
mc->chip.label = DRIVER_NAME;
- mc->chip.set = mc33880_set;
+ mc->chip.set_rv = mc33880_set;
mc->chip.base = pdata->base;
mc->chip.ngpio = PIN_NUMBER;
mc->chip.can_sleep = true;
diff --git a/drivers/gpio/gpio-ml-ioh.c b/drivers/gpio/gpio-ml-ioh.c
index 48e3768a830e..12cf36f9ca63 100644
--- a/drivers/gpio/gpio-ml-ioh.c
+++ b/drivers/gpio/gpio-ml-ioh.c
@@ -89,7 +89,7 @@ struct ioh_gpio {
static const int num_ports[] = {6, 12, 16, 16, 15, 16, 16, 12};
-static void ioh_gpio_set(struct gpio_chip *gpio, unsigned nr, int val)
+static int ioh_gpio_set(struct gpio_chip *gpio, unsigned int nr, int val)
{
u32 reg_val;
struct ioh_gpio *chip = gpiochip_get_data(gpio);
@@ -104,6 +104,8 @@ static void ioh_gpio_set(struct gpio_chip *gpio, unsigned nr, int val)
iowrite32(reg_val, &chip->reg->regs[chip->ch].po);
spin_unlock_irqrestore(&chip->spinlock, flags);
+
+ return 0;
}
static int ioh_gpio_get(struct gpio_chip *gpio, unsigned nr)
@@ -222,7 +224,7 @@ static void ioh_gpio_setup(struct ioh_gpio *chip, int num_port)
gpio->direction_input = ioh_gpio_direction_input;
gpio->get = ioh_gpio_get;
gpio->direction_output = ioh_gpio_direction_output;
- gpio->set = ioh_gpio_set;
+ gpio->set_rv = ioh_gpio_set;
gpio->dbg_show = NULL;
gpio->base = -1;
gpio->ngpio = num_port;
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index 541517536489..121efdd71e45 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -123,9 +123,12 @@ static irqreturn_t mpc8xxx_gpio_irq_cascade(int irq, void *data)
static void mpc8xxx_irq_unmask(struct irq_data *d)
{
struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
struct gpio_chip *gc = &mpc8xxx_gc->gc;
unsigned long flags;
+ gpiochip_enable_irq(gc, hwirq);
+
raw_spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
gc->write_reg(mpc8xxx_gc->regs + GPIO_IMR,
@@ -138,6 +141,7 @@ static void mpc8xxx_irq_unmask(struct irq_data *d)
static void mpc8xxx_irq_mask(struct irq_data *d)
{
struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
struct gpio_chip *gc = &mpc8xxx_gc->gc;
unsigned long flags;
@@ -148,6 +152,8 @@ static void mpc8xxx_irq_mask(struct irq_data *d)
& ~mpc_pin2mask(irqd_to_hwirq(d)));
raw_spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
+
+ gpiochip_disable_irq(gc, hwirq);
}
static void mpc8xxx_irq_ack(struct irq_data *d)
@@ -244,6 +250,8 @@ static struct irq_chip mpc8xxx_irq_chip = {
.irq_ack = mpc8xxx_irq_ack,
/* this might get overwritten in mpc8xxx_probe() */
.irq_set_type = mpc8xxx_irq_set_type,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static int mpc8xxx_gpio_irq_map(struct irq_domain *h, unsigned int irq,
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 3604abcb6fec..57633a7b4270 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -408,9 +408,8 @@ static void mvebu_gpio_irq_ack(struct irq_data *d)
struct mvebu_gpio_chip *mvchip = gc->private;
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
mvebu_gpio_write_edge_cause(mvchip, ~mask);
- irq_gc_unlock(gc);
}
static void mvebu_gpio_edge_irq_mask(struct irq_data *d)
@@ -420,10 +419,9 @@ static void mvebu_gpio_edge_irq_mask(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
ct->mask_cache_priv &= ~mask;
mvebu_gpio_write_edge_mask(mvchip, ct->mask_cache_priv);
- irq_gc_unlock(gc);
}
static void mvebu_gpio_edge_irq_unmask(struct irq_data *d)
@@ -433,11 +431,10 @@ static void mvebu_gpio_edge_irq_unmask(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
mvebu_gpio_write_edge_cause(mvchip, ~mask);
ct->mask_cache_priv |= mask;
mvebu_gpio_write_edge_mask(mvchip, ct->mask_cache_priv);
- irq_gc_unlock(gc);
}
static void mvebu_gpio_level_irq_mask(struct irq_data *d)
@@ -447,10 +444,9 @@ static void mvebu_gpio_level_irq_mask(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
ct->mask_cache_priv &= ~mask;
mvebu_gpio_write_level_mask(mvchip, ct->mask_cache_priv);
- irq_gc_unlock(gc);
}
static void mvebu_gpio_level_irq_unmask(struct irq_data *d)
@@ -460,10 +456,9 @@ static void mvebu_gpio_level_irq_unmask(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
ct->mask_cache_priv |= mask;
mvebu_gpio_write_level_mask(mvchip, ct->mask_cache_priv);
- irq_gc_unlock(gc);
}
/*****************************************************************************
@@ -1242,7 +1237,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
return 0;
mvchip->domain =
- irq_domain_add_linear(np, ngpios, &irq_generic_chip_ops, NULL);
+ irq_domain_create_linear(of_fwnode_handle(np), ngpios, &irq_generic_chip_ops, NULL);
if (!mvchip->domain) {
dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n",
mvchip->chip.label);
diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index 619b6fb9d833..fae1a30f8ae6 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -490,7 +490,14 @@ static int mxc_gpio_probe(struct platform_device *pdev)
port->gc.request = mxc_gpio_request;
port->gc.free = mxc_gpio_free;
port->gc.to_irq = mxc_gpio_to_irq;
- port->gc.base = of_alias_get_id(np, "gpio") * 32;
+ /*
+ * Driver is DT-only, so a fixed base needs only be maintained for legacy
+ * userspace with sysfs interface.
+ */
+ if (IS_ENABLED(CONFIG_GPIO_SYSFS))
+ port->gc.base = of_alias_get_id(np, "gpio") * 32;
+ else /* silence boot time warning */
+ port->gc.base = -1;
err = devm_gpiochip_add_data(&pdev->dev, &port->gc, port);
if (err)
@@ -502,7 +509,7 @@ static int mxc_gpio_probe(struct platform_device *pdev)
goto out_bgio;
}
- port->domain = irq_domain_add_legacy(np, 32, irq_base, 0,
+ port->domain = irq_domain_create_legacy(of_fwnode_handle(np), 32, irq_base, 0,
&irq_domain_simple_ops, NULL);
if (!port->domain) {
err = -ENODEV;
diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c
index 024ad077e98d..b418fbccb26c 100644
--- a/drivers/gpio/gpio-mxs.c
+++ b/drivers/gpio/gpio-mxs.c
@@ -303,8 +303,8 @@ static int mxs_gpio_probe(struct platform_device *pdev)
goto out_iounmap;
}
- port->domain = irq_domain_add_legacy(np, 32, irq_base, 0,
- &irq_domain_simple_ops, NULL);
+ port->domain = irq_domain_create_legacy(of_fwnode_handle(np), 32, irq_base, 0,
+ &irq_domain_simple_ops, NULL);
if (!port->domain) {
err = -ENODEV;
goto out_iounmap;
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 442435ded020..b852e4997629 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -215,6 +215,8 @@ struct pca953x_chip {
DECLARE_BITMAP(irq_stat, MAX_LINE);
DECLARE_BITMAP(irq_trig_raise, MAX_LINE);
DECLARE_BITMAP(irq_trig_fall, MAX_LINE);
+ DECLARE_BITMAP(irq_trig_level_high, MAX_LINE);
+ DECLARE_BITMAP(irq_trig_level_low, MAX_LINE);
#endif
atomic_t wakeup_path;
@@ -774,6 +776,8 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
pca953x_read_regs(chip, chip->regs->direction, reg_direction);
bitmap_or(irq_mask, chip->irq_trig_fall, chip->irq_trig_raise, gc->ngpio);
+ bitmap_or(irq_mask, irq_mask, chip->irq_trig_level_high, gc->ngpio);
+ bitmap_or(irq_mask, irq_mask, chip->irq_trig_level_low, gc->ngpio);
bitmap_complement(reg_direction, reg_direction, gc->ngpio);
bitmap_and(irq_mask, irq_mask, reg_direction, gc->ngpio);
@@ -791,13 +795,15 @@ static int pca953x_irq_set_type(struct irq_data *d, unsigned int type)
struct device *dev = &chip->client->dev;
irq_hw_number_t hwirq = irqd_to_hwirq(d);
- if (!(type & IRQ_TYPE_EDGE_BOTH)) {
+ if (!(type & IRQ_TYPE_SENSE_MASK)) {
dev_err(dev, "irq %d: unsupported type %d\n", d->irq, type);
return -EINVAL;
}
assign_bit(hwirq, chip->irq_trig_fall, type & IRQ_TYPE_EDGE_FALLING);
assign_bit(hwirq, chip->irq_trig_raise, type & IRQ_TYPE_EDGE_RISING);
+ assign_bit(hwirq, chip->irq_trig_level_low, type & IRQ_TYPE_LEVEL_LOW);
+ assign_bit(hwirq, chip->irq_trig_level_high, type & IRQ_TYPE_LEVEL_HIGH);
return 0;
}
@@ -810,6 +816,8 @@ static void pca953x_irq_shutdown(struct irq_data *d)
clear_bit(hwirq, chip->irq_trig_raise);
clear_bit(hwirq, chip->irq_trig_fall);
+ clear_bit(hwirq, chip->irq_trig_level_low);
+ clear_bit(hwirq, chip->irq_trig_level_high);
}
static void pca953x_irq_print_chip(struct irq_data *data, struct seq_file *p)
@@ -840,6 +848,7 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin
DECLARE_BITMAP(cur_stat, MAX_LINE);
DECLARE_BITMAP(new_stat, MAX_LINE);
DECLARE_BITMAP(trigger, MAX_LINE);
+ DECLARE_BITMAP(edges, MAX_LINE);
int ret;
ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
@@ -857,13 +866,26 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin
bitmap_copy(chip->irq_stat, new_stat, gc->ngpio);
- if (bitmap_empty(trigger, gc->ngpio))
- return false;
+ if (bitmap_empty(chip->irq_trig_level_high, gc->ngpio) &&
+ bitmap_empty(chip->irq_trig_level_low, gc->ngpio)) {
+ if (bitmap_empty(trigger, gc->ngpio))
+ return false;
+ }
bitmap_and(cur_stat, chip->irq_trig_fall, old_stat, gc->ngpio);
bitmap_and(old_stat, chip->irq_trig_raise, new_stat, gc->ngpio);
- bitmap_or(new_stat, old_stat, cur_stat, gc->ngpio);
- bitmap_and(pending, new_stat, trigger, gc->ngpio);
+ bitmap_or(edges, old_stat, cur_stat, gc->ngpio);
+ bitmap_and(pending, edges, trigger, gc->ngpio);
+
+ bitmap_and(cur_stat, new_stat, chip->irq_trig_level_high, gc->ngpio);
+ bitmap_and(cur_stat, cur_stat, chip->irq_mask, gc->ngpio);
+ bitmap_or(pending, pending, cur_stat, gc->ngpio);
+
+ bitmap_complement(cur_stat, new_stat, gc->ngpio);
+ bitmap_and(cur_stat, cur_stat, reg_direction, gc->ngpio);
+ bitmap_and(old_stat, cur_stat, chip->irq_trig_level_low, gc->ngpio);
+ bitmap_and(old_stat, old_stat, chip->irq_mask, gc->ngpio);
+ bitmap_or(pending, pending, old_stat, gc->ngpio);
return !bitmap_empty(pending, gc->ngpio);
}
@@ -1204,6 +1226,8 @@ static int pca953x_restore_context(struct pca953x_chip *chip)
guard(mutex)(&chip->i2c_lock);
+ if (chip->client->irq > 0)
+ enable_irq(chip->client->irq);
regcache_cache_only(chip->regmap, false);
regcache_mark_dirty(chip->regmap);
ret = pca953x_regcache_sync(chip);
@@ -1216,6 +1240,10 @@ static int pca953x_restore_context(struct pca953x_chip *chip)
static void pca953x_save_context(struct pca953x_chip *chip)
{
guard(mutex)(&chip->i2c_lock);
+
+ /* Disable IRQ to prevent early triggering while regmap "cache only" is on */
+ if (chip->client->irq > 0)
+ disable_irq(chip->client->irq);
regcache_cache_only(chip->regmap, true);
}
diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c
index 91cea97255fa..aead35ea090e 100644
--- a/drivers/gpio/gpio-pxa.c
+++ b/drivers/gpio/gpio-pxa.c
@@ -497,6 +497,8 @@ static void pxa_mask_muxed_gpio(struct irq_data *d)
gfer = readl_relaxed(base + GFER_OFFSET) & ~GPIO_bit(gpio);
writel_relaxed(grer, base + GRER_OFFSET);
writel_relaxed(gfer, base + GFER_OFFSET);
+
+ gpiochip_disable_irq(&pchip->chip, gpio);
}
static int pxa_gpio_set_wake(struct irq_data *d, unsigned int on)
@@ -516,17 +518,21 @@ static void pxa_unmask_muxed_gpio(struct irq_data *d)
unsigned int gpio = irqd_to_hwirq(d);
struct pxa_gpio_bank *c = gpio_to_pxabank(&pchip->chip, gpio);
+ gpiochip_enable_irq(&pchip->chip, gpio);
+
c->irq_mask |= GPIO_bit(gpio);
update_edge_detect(c);
}
-static struct irq_chip pxa_muxed_gpio_chip = {
+static const struct irq_chip pxa_muxed_gpio_chip = {
.name = "GPIO",
.irq_ack = pxa_ack_muxed_gpio,
.irq_mask = pxa_mask_muxed_gpio,
.irq_unmask = pxa_unmask_muxed_gpio,
.irq_set_type = pxa_gpio_irq_type,
.irq_set_wake = pxa_gpio_set_wake,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static int pxa_gpio_nums(struct platform_device *pdev)
@@ -636,9 +642,9 @@ static int pxa_gpio_probe(struct platform_device *pdev)
if (!pxa_last_gpio)
return -EINVAL;
- pchip->irqdomain = irq_domain_add_legacy(pdev->dev.of_node,
- pxa_last_gpio + 1, irq_base,
- 0, &pxa_irq_domain_ops, pchip);
+ pchip->irqdomain = irq_domain_create_legacy(of_fwnode_handle(pdev->dev.of_node),
+ pxa_last_gpio + 1, irq_base, 0,
+ &pxa_irq_domain_ops, pchip);
if (!pchip->irqdomain)
return -ENOMEM;
diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c
index 01a3b3dac58b..c63352f2f1ec 100644
--- a/drivers/gpio/gpio-rockchip.c
+++ b/drivers/gpio/gpio-rockchip.c
@@ -521,7 +521,7 @@ static int rockchip_interrupts_register(struct rockchip_pin_bank *bank)
struct irq_chip_generic *gc;
int ret;
- bank->domain = irq_domain_add_linear(bank->of_node, 32,
+ bank->domain = irq_domain_create_linear(of_fwnode_handle(bank->of_node), 32,
&irq_generic_chip_ops, NULL);
if (!bank->domain) {
dev_warn(bank->dev, "could not init irq domain for bank %s\n",
diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c
index 242dad763ac4..3f3ee36bc3cb 100644
--- a/drivers/gpio/gpio-sa1100.c
+++ b/drivers/gpio/gpio-sa1100.c
@@ -319,7 +319,7 @@ void __init sa1100_init_gpio(void)
gpiochip_add_data(&sa1100_gpio_chip.chip, NULL);
- sa1100_gpio_irqdomain = irq_domain_add_simple(NULL,
+ sa1100_gpio_irqdomain = irq_domain_create_simple(NULL,
28, IRQ_GPIO0,
&sa1100_gpio_irqdomain_ops, sgc);
diff --git a/drivers/gpio/gpio-sodaville.c b/drivers/gpio/gpio-sodaville.c
index c2a2c76c1652..6a3c4c625138 100644
--- a/drivers/gpio/gpio-sodaville.c
+++ b/drivers/gpio/gpio-sodaville.c
@@ -169,7 +169,7 @@ static int sdv_register_irqsupport(struct sdv_gpio_chip_data *sd,
IRQ_GC_INIT_MASK_CACHE, IRQ_NOREQUEST,
IRQ_LEVEL | IRQ_NOPROBE);
- sd->id = irq_domain_add_legacy(pdev->dev.of_node, SDV_NUM_PUB_GPIOS,
+ sd->id = irq_domain_create_legacy(of_fwnode_handle(pdev->dev.of_node), SDV_NUM_PUB_GPIOS,
sd->irq_base, 0, &irq_domain_sdv_ops, sd);
if (!sd->id)
return -ENODEV;
diff --git a/drivers/gpio/gpio-spacemit-k1.c b/drivers/gpio/gpio-spacemit-k1.c
new file mode 100644
index 000000000000..f027066365ff
--- /dev/null
+++ b/drivers/gpio/gpio-spacemit-k1.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2023-2025 SpacemiT (Hangzhou) Technology Co. Ltd
+ * Copyright (C) 2025 Yixun Lan <dlan@gentoo.org>
+ */
+
+#include <linux/clk.h>
+#include <linux/gpio/driver.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+
+/* register offset */
+#define SPACEMIT_GPLR 0x00 /* port level - R */
+#define SPACEMIT_GPDR 0x0c /* port direction - R/W */
+#define SPACEMIT_GPSR 0x18 /* port set - W */
+#define SPACEMIT_GPCR 0x24 /* port clear - W */
+#define SPACEMIT_GRER 0x30 /* port rising edge R/W */
+#define SPACEMIT_GFER 0x3c /* port falling edge R/W */
+#define SPACEMIT_GEDR 0x48 /* edge detect status - R/W1C */
+#define SPACEMIT_GSDR 0x54 /* (set) direction - W */
+#define SPACEMIT_GCDR 0x60 /* (clear) direction - W */
+#define SPACEMIT_GSRER 0x6c /* (set) rising edge detect enable - W */
+#define SPACEMIT_GCRER 0x78 /* (clear) rising edge detect enable - W */
+#define SPACEMIT_GSFER 0x84 /* (set) falling edge detect enable - W */
+#define SPACEMIT_GCFER 0x90 /* (clear) falling edge detect enable - W */
+#define SPACEMIT_GAPMASK 0x9c /* interrupt mask , 0 disable, 1 enable - R/W */
+
+#define SPACEMIT_NR_BANKS 4
+#define SPACEMIT_NR_GPIOS_PER_BANK 32
+
+#define to_spacemit_gpio_bank(x) container_of((x), struct spacemit_gpio_bank, gc)
+
+struct spacemit_gpio;
+
+struct spacemit_gpio_bank {
+ struct gpio_chip gc;
+ struct spacemit_gpio *sg;
+ void __iomem *base;
+ u32 irq_mask;
+ u32 irq_rising_edge;
+ u32 irq_falling_edge;
+};
+
+struct spacemit_gpio {
+ struct device *dev;
+ struct spacemit_gpio_bank sgb[SPACEMIT_NR_BANKS];
+};
+
+static u32 spacemit_gpio_bank_index(struct spacemit_gpio_bank *gb)
+{
+ return (u32)(gb - gb->sg->sgb);
+}
+
+static irqreturn_t spacemit_gpio_irq_handler(int irq, void *dev_id)
+{
+ struct spacemit_gpio_bank *gb = dev_id;
+ unsigned long pending;
+ u32 n, gedr;
+
+ gedr = readl(gb->base + SPACEMIT_GEDR);
+ if (!gedr)
+ return IRQ_NONE;
+ writel(gedr, gb->base + SPACEMIT_GEDR);
+
+ pending = gedr & gb->irq_mask;
+ if (!pending)
+ return IRQ_NONE;
+
+ for_each_set_bit(n, &pending, BITS_PER_LONG)
+ handle_nested_irq(irq_find_mapping(gb->gc.irq.domain, n));
+
+ return IRQ_HANDLED;
+}
+
+static void spacemit_gpio_irq_ack(struct irq_data *d)
+{
+ struct spacemit_gpio_bank *gb = irq_data_get_irq_chip_data(d);
+
+ writel(BIT(irqd_to_hwirq(d)), gb->base + SPACEMIT_GEDR);
+}
+
+static void spacemit_gpio_irq_mask(struct irq_data *d)
+{
+ struct spacemit_gpio_bank *gb = irq_data_get_irq_chip_data(d);
+ u32 bit = BIT(irqd_to_hwirq(d));
+
+ gb->irq_mask &= ~bit;
+ writel(gb->irq_mask, gb->base + SPACEMIT_GAPMASK);
+
+ if (bit & gb->irq_rising_edge)
+ writel(bit, gb->base + SPACEMIT_GCRER);
+
+ if (bit & gb->irq_falling_edge)
+ writel(bit, gb->base + SPACEMIT_GCFER);
+}
+
+static void spacemit_gpio_irq_unmask(struct irq_data *d)
+{
+ struct spacemit_gpio_bank *gb = irq_data_get_irq_chip_data(d);
+ u32 bit = BIT(irqd_to_hwirq(d));
+
+ gb->irq_mask |= bit;
+
+ if (bit & gb->irq_rising_edge)
+ writel(bit, gb->base + SPACEMIT_GSRER);
+
+ if (bit & gb->irq_falling_edge)
+ writel(bit, gb->base + SPACEMIT_GSFER);
+
+ writel(gb->irq_mask, gb->base + SPACEMIT_GAPMASK);
+}
+
+static int spacemit_gpio_irq_set_type(struct irq_data *d, unsigned int type)
+{
+ struct spacemit_gpio_bank *gb = irq_data_get_irq_chip_data(d);
+ u32 bit = BIT(irqd_to_hwirq(d));
+
+ if (type & IRQ_TYPE_EDGE_RISING) {
+ gb->irq_rising_edge |= bit;
+ writel(bit, gb->base + SPACEMIT_GSRER);
+ } else {
+ gb->irq_rising_edge &= ~bit;
+ writel(bit, gb->base + SPACEMIT_GCRER);
+ }
+
+ if (type & IRQ_TYPE_EDGE_FALLING) {
+ gb->irq_falling_edge |= bit;
+ writel(bit, gb->base + SPACEMIT_GSFER);
+ } else {
+ gb->irq_falling_edge &= ~bit;
+ writel(bit, gb->base + SPACEMIT_GCFER);
+ }
+
+ return 0;
+}
+
+static void spacemit_gpio_irq_print_chip(struct irq_data *data, struct seq_file *p)
+{
+ struct spacemit_gpio_bank *gb = irq_data_get_irq_chip_data(data);
+
+ seq_printf(p, "%s-%d", dev_name(gb->gc.parent), spacemit_gpio_bank_index(gb));
+}
+
+static struct irq_chip spacemit_gpio_chip = {
+ .name = "k1-gpio-irqchip",
+ .irq_ack = spacemit_gpio_irq_ack,
+ .irq_mask = spacemit_gpio_irq_mask,
+ .irq_unmask = spacemit_gpio_irq_unmask,
+ .irq_set_type = spacemit_gpio_irq_set_type,
+ .irq_print_chip = spacemit_gpio_irq_print_chip,
+ .flags = IRQCHIP_IMMUTABLE | IRQCHIP_SKIP_SET_WAKE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
+static bool spacemit_of_node_instance_match(struct gpio_chip *gc, unsigned int i)
+{
+ struct spacemit_gpio_bank *gb = gpiochip_get_data(gc);
+ struct spacemit_gpio *sg = gb->sg;
+
+ if (i >= SPACEMIT_NR_BANKS)
+ return false;
+
+ return (gc == &sg->sgb[i].gc);
+}
+
+static int spacemit_gpio_add_bank(struct spacemit_gpio *sg,
+ void __iomem *regs,
+ int index, int irq)
+{
+ struct spacemit_gpio_bank *gb = &sg->sgb[index];
+ struct gpio_chip *gc = &gb->gc;
+ struct device *dev = sg->dev;
+ struct gpio_irq_chip *girq;
+ void __iomem *dat, *set, *clr, *dirin, *dirout;
+ int ret, bank_base[] = { 0x0, 0x4, 0x8, 0x100 };
+
+ gb->base = regs + bank_base[index];
+
+ dat = gb->base + SPACEMIT_GPLR;
+ set = gb->base + SPACEMIT_GPSR;
+ clr = gb->base + SPACEMIT_GPCR;
+ dirin = gb->base + SPACEMIT_GCDR;
+ dirout = gb->base + SPACEMIT_GSDR;
+
+ /* This registers 32 GPIO lines per bank */
+ ret = bgpio_init(gc, dev, 4, dat, set, clr, dirout, dirin,
+ BGPIOF_UNREADABLE_REG_SET | BGPIOF_UNREADABLE_REG_DIR);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to init gpio chip\n");
+
+ gb->sg = sg;
+
+ gc->label = dev_name(dev);
+ gc->request = gpiochip_generic_request;
+ gc->free = gpiochip_generic_free;
+ gc->ngpio = SPACEMIT_NR_GPIOS_PER_BANK;
+ gc->base = -1;
+ gc->of_gpio_n_cells = 3;
+ gc->of_node_instance_match = spacemit_of_node_instance_match;
+
+ girq = &gc->irq;
+ girq->threaded = true;
+ girq->handler = handle_simple_irq;
+
+ gpio_irq_chip_set_chip(girq, &spacemit_gpio_chip);
+
+ /* Disable Interrupt */
+ writel(0, gb->base + SPACEMIT_GAPMASK);
+ /* Disable Edge Detection Settings */
+ writel(0x0, gb->base + SPACEMIT_GRER);
+ writel(0x0, gb->base + SPACEMIT_GFER);
+ /* Clear Interrupt */
+ writel(0xffffffff, gb->base + SPACEMIT_GCRER);
+ writel(0xffffffff, gb->base + SPACEMIT_GCFER);
+
+ ret = devm_request_threaded_irq(dev, irq, NULL,
+ spacemit_gpio_irq_handler,
+ IRQF_ONESHOT | IRQF_SHARED,
+ gb->gc.label, gb);
+ if (ret < 0)
+ return dev_err_probe(dev, ret, "failed to register IRQ\n");
+
+ ret = devm_gpiochip_add_data(dev, gc, gb);
+ if (ret)
+ return ret;
+
+ /* Distuingish IRQ domain, for selecting threecells mode */
+ irq_domain_update_bus_token(girq->domain, DOMAIN_BUS_WIRED);
+
+ return 0;
+}
+
+static int spacemit_gpio_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct spacemit_gpio *sg;
+ struct clk *core_clk, *bus_clk;
+ void __iomem *regs;
+ int i, irq, ret;
+
+ sg = devm_kzalloc(dev, sizeof(*sg), GFP_KERNEL);
+ if (!sg)
+ return -ENOMEM;
+
+ regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ sg->dev = dev;
+
+ core_clk = devm_clk_get_enabled(dev, "core");
+ if (IS_ERR(core_clk))
+ return dev_err_probe(dev, PTR_ERR(core_clk), "failed to get clock\n");
+
+ bus_clk = devm_clk_get_enabled(dev, "bus");
+ if (IS_ERR(bus_clk))
+ return dev_err_probe(dev, PTR_ERR(bus_clk), "failed to get bus clock\n");
+
+ for (i = 0; i < SPACEMIT_NR_BANKS; i++) {
+ ret = spacemit_gpio_add_bank(sg, regs, i, irq);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static const struct of_device_id spacemit_gpio_dt_ids[] = {
+ { .compatible = "spacemit,k1-gpio" },
+ { /* sentinel */ }
+};
+
+static struct platform_driver spacemit_gpio_driver = {
+ .probe = spacemit_gpio_probe,
+ .driver = {
+ .name = "k1-gpio",
+ .of_match_table = spacemit_gpio_dt_ids,
+ },
+};
+module_platform_driver(spacemit_gpio_driver);
+
+MODULE_AUTHOR("Yixun Lan <dlan@gentoo.org>");
+MODULE_DESCRIPTION("GPIO driver for SpacemiT K1 SoC");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/gpio-tb10x.c b/drivers/gpio/gpio-tb10x.c
index b6335cde455f..8cf676fd0a0b 100644
--- a/drivers/gpio/gpio-tb10x.c
+++ b/drivers/gpio/gpio-tb10x.c
@@ -183,7 +183,7 @@ static int tb10x_gpio_probe(struct platform_device *pdev)
if (ret != 0)
return ret;
- tb10x_gpio->domain = irq_domain_add_linear(np,
+ tb10x_gpio->domain = irq_domain_create_linear(of_fwnode_handle(np),
tb10x_gpio->gc.ngpio,
&irq_generic_chip_ops, NULL);
if (!tb10x_gpio->domain) {
diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c
index fad979797486..cb303a26f4d3 100644
--- a/drivers/gpio/gpio-timberdale.c
+++ b/drivers/gpio/gpio-timberdale.c
@@ -103,20 +103,26 @@ static void timbgpio_irq_disable(struct irq_data *d)
{
struct timbgpio *tgpio = irq_data_get_irq_chip_data(d);
int offset = d->irq - tgpio->irq_base;
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
unsigned long flags;
spin_lock_irqsave(&tgpio->lock, flags);
tgpio->last_ier &= ~(1UL << offset);
iowrite32(tgpio->last_ier, tgpio->membase + TGPIO_IER);
spin_unlock_irqrestore(&tgpio->lock, flags);
+
+ gpiochip_disable_irq(&tgpio->gpio, hwirq);
}
static void timbgpio_irq_enable(struct irq_data *d)
{
struct timbgpio *tgpio = irq_data_get_irq_chip_data(d);
int offset = d->irq - tgpio->irq_base;
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
unsigned long flags;
+ gpiochip_enable_irq(&tgpio->gpio, hwirq);
+
spin_lock_irqsave(&tgpio->lock, flags);
tgpio->last_ier |= 1UL << offset;
iowrite32(tgpio->last_ier, tgpio->membase + TGPIO_IER);
@@ -205,11 +211,13 @@ static void timbgpio_irq(struct irq_desc *desc)
iowrite32(tgpio->last_ier, tgpio->membase + TGPIO_IER);
}
-static struct irq_chip timbgpio_irqchip = {
+static const struct irq_chip timbgpio_irqchip = {
.name = "GPIO",
.irq_enable = timbgpio_irq_enable,
.irq_disable = timbgpio_irq_disable,
.irq_set_type = timbgpio_irq_type,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static int timbgpio_probe(struct platform_device *pdev)
diff --git a/drivers/gpio/gpio-twl4030.c b/drivers/gpio/gpio-twl4030.c
index bcd692229c7c..0d17985a5fdc 100644
--- a/drivers/gpio/gpio-twl4030.c
+++ b/drivers/gpio/gpio-twl4030.c
@@ -502,7 +502,6 @@ static void gpio_twl4030_power_off_action(void *data)
static int gpio_twl4030_probe(struct platform_device *pdev)
{
struct twl4030_gpio_platform_data *pdata;
- struct device_node *node = pdev->dev.of_node;
struct gpio_twl4030_priv *priv;
int ret, irq_base;
@@ -524,8 +523,8 @@ static int gpio_twl4030_probe(struct platform_device *pdev)
return irq_base;
}
- irq_domain_add_legacy(node, TWL4030_GPIO_MAX, irq_base, 0,
- &irq_domain_simple_ops, NULL);
+ irq_domain_create_legacy(of_fwnode_handle(pdev->dev.of_node), TWL4030_GPIO_MAX, irq_base, 0,
+ &irq_domain_simple_ops, NULL);
ret = twl4030_sih_setup(&pdev->dev, TWL4030_MODULE_GPIO, irq_base);
if (ret < 0)
diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c
index 4dad7ce0c4dc..7de0d5b53d56 100644
--- a/drivers/gpio/gpio-vf610.c
+++ b/drivers/gpio/gpio-vf610.c
@@ -345,4 +345,6 @@ static struct platform_driver vf610_gpio_driver = {
.probe = vf610_gpio_probe,
};
-builtin_platform_driver(vf610_gpio_driver);
+module_platform_driver(vf610_gpio_driver);
+MODULE_DESCRIPTION("VF610 GPIO driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c
index 13407fd4f0eb..eab6726953b4 100644
--- a/drivers/gpio/gpio-virtuser.c
+++ b/drivers/gpio/gpio-virtuser.c
@@ -401,10 +401,15 @@ static ssize_t gpio_virtuser_direction_do_write(struct file *file,
char buf[32], *trimmed;
int ret, dir, val = 0;
- ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
+ ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count);
if (ret < 0)
return ret;
+ buf[ret] = '\0';
+
trimmed = strim(buf);
if (strcmp(trimmed, "input") == 0) {
@@ -623,12 +628,15 @@ static ssize_t gpio_virtuser_consumer_write(struct file *file,
char buf[GPIO_VIRTUSER_NAME_BUF_LEN + 2];
int ret;
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
ret = simple_write_to_buffer(buf, GPIO_VIRTUSER_NAME_BUF_LEN, ppos,
user_buf, count);
if (ret < 0)
return ret;
- buf[strlen(buf) - 1] = '\0';
+ buf[ret] = '\0';
ret = gpiod_set_consumer_name(data->ad.desc, buf);
if (ret)
diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c
index 48b829733b15..b51b1fa726bb 100644
--- a/drivers/gpio/gpio-xgene-sb.c
+++ b/drivers/gpio/gpio-xgene-sb.c
@@ -103,12 +103,32 @@ static int xgene_gpio_sb_irq_set_type(struct irq_data *d, unsigned int type)
return irq_chip_set_type_parent(d, IRQ_TYPE_LEVEL_HIGH);
}
-static struct irq_chip xgene_gpio_sb_irq_chip = {
+static void xgene_gpio_sb_irq_mask(struct irq_data *d)
+{
+ struct xgene_gpio_sb *priv = irq_data_get_irq_chip_data(d);
+
+ irq_chip_mask_parent(d);
+
+ gpiochip_disable_irq(&priv->gc, d->hwirq);
+}
+
+static void xgene_gpio_sb_irq_unmask(struct irq_data *d)
+{
+ struct xgene_gpio_sb *priv = irq_data_get_irq_chip_data(d);
+
+ gpiochip_enable_irq(&priv->gc, d->hwirq);
+
+ irq_chip_unmask_parent(d);
+}
+
+static const struct irq_chip xgene_gpio_sb_irq_chip = {
.name = "sbgpio",
.irq_eoi = irq_chip_eoi_parent,
- .irq_mask = irq_chip_mask_parent,
- .irq_unmask = irq_chip_unmask_parent,
+ .irq_mask = xgene_gpio_sb_irq_mask,
+ .irq_unmask = xgene_gpio_sb_irq_unmask,
.irq_set_type = xgene_gpio_sb_irq_set_type,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio)
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi-core.c
index 69caa35c58df..12b24a717e43 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi-core.c
@@ -23,29 +23,6 @@
#include "gpiolib.h"
#include "gpiolib-acpi.h"
-static int run_edge_events_on_boot = -1;
-module_param(run_edge_events_on_boot, int, 0444);
-MODULE_PARM_DESC(run_edge_events_on_boot,
- "Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto");
-
-static char *ignore_wake;
-module_param(ignore_wake, charp, 0444);
-MODULE_PARM_DESC(ignore_wake,
- "controller@pin combos on which to ignore the ACPI wake flag "
- "ignore_wake=controller@pin[,controller@pin[,...]]");
-
-static char *ignore_interrupt;
-module_param(ignore_interrupt, charp, 0444);
-MODULE_PARM_DESC(ignore_interrupt,
- "controller@pin combos on which to ignore interrupt "
- "ignore_interrupt=controller@pin[,controller@pin[,...]]");
-
-struct acpi_gpiolib_dmi_quirk {
- bool no_edge_events_on_boot;
- char *ignore_wake;
- char *ignore_interrupt;
-};
-
/**
* struct acpi_gpio_event - ACPI GPIO event handler data
*
@@ -96,10 +73,10 @@ struct acpi_gpio_chip {
* @adev: reference to ACPI device which consumes GPIO resource
* @flags: GPIO initialization flags
* @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo
+ * @wake_capable: wake capability as provided by ACPI
* @pin_config: pin bias as provided by ACPI
* @polarity: interrupt polarity as provided by ACPI
* @triggering: triggering type as provided by ACPI
- * @wake_capable: wake capability as provided by ACPI
* @debounce: debounce timeout as provided by ACPI
* @quirks: Linux specific quirks as provided by struct acpi_gpio_mapping
*/
@@ -107,25 +84,14 @@ struct acpi_gpio_info {
struct acpi_device *adev;
enum gpiod_flags flags;
bool gpioint;
+ bool wake_capable;
int pin_config;
int polarity;
int triggering;
- bool wake_capable;
unsigned int debounce;
unsigned int quirks;
};
-/*
- * For GPIO chips which call acpi_gpiochip_request_interrupts() before late_init
- * (so builtin drivers) we register the ACPI GpioInt IRQ handlers from a
- * late_initcall_sync() handler, so that other builtin drivers can register their
- * OpRegions before the event handlers can run. This list contains GPIO chips
- * for which the acpi_gpiochip_request_irqs() call has been deferred.
- */
-static DEFINE_MUTEX(acpi_gpio_deferred_req_irqs_lock);
-static LIST_HEAD(acpi_gpio_deferred_req_irqs_list);
-static bool acpi_gpio_deferred_req_irqs_done;
-
static int acpi_gpiochip_find(struct gpio_chip *gc, const void *data)
{
/* First check the actual GPIO device */
@@ -268,7 +234,7 @@ static void acpi_gpiochip_request_irq(struct acpi_gpio_chip *acpi_gpio,
event->irq_requested = true;
/* Make sure we trigger the initial state of edge-triggered IRQs */
- if (run_edge_events_on_boot &&
+ if (acpi_gpio_need_run_edge_events_on_boot() &&
(event->irqflags & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING))) {
value = gpiod_get_raw_value_cansleep(event->desc);
if (((event->irqflags & IRQF_TRIGGER_RISING) && value == 1) ||
@@ -350,42 +316,6 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip,
return desc;
}
-static bool acpi_gpio_in_ignore_list(const char *ignore_list, const char *controller_in,
- unsigned int pin_in)
-{
- const char *controller, *pin_str;
- unsigned int pin;
- char *endp;
- int len;
-
- controller = ignore_list;
- while (controller) {
- pin_str = strchr(controller, '@');
- if (!pin_str)
- goto err;
-
- len = pin_str - controller;
- if (len == strlen(controller_in) &&
- strncmp(controller, controller_in, len) == 0) {
- pin = simple_strtoul(pin_str + 1, &endp, 10);
- if (*endp != 0 && *endp != ',')
- goto err;
-
- if (pin == pin_in)
- return true;
- }
-
- controller = strchr(controller, ',');
- if (controller)
- controller++;
- }
-
- return false;
-err:
- pr_err_once("Error: Invalid value for gpiolib_acpi.ignore_...: %s\n", ignore_list);
- return false;
-}
-
static bool acpi_gpio_irq_is_wake(struct device *parent,
const struct acpi_resource_gpio *agpio)
{
@@ -394,7 +324,7 @@ static bool acpi_gpio_irq_is_wake(struct device *parent,
if (agpio->wake_capable != ACPI_WAKE_CAPABLE)
return false;
- if (acpi_gpio_in_ignore_list(ignore_wake, dev_name(parent), pin)) {
+ if (acpi_gpio_in_ignore_list(ACPI_GPIO_IGNORE_WAKE, dev_name(parent), pin)) {
dev_info(parent, "Ignoring wakeup on pin %u\n", pin);
return false;
}
@@ -437,7 +367,7 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
if (!handler)
return AE_OK;
- if (acpi_gpio_in_ignore_list(ignore_interrupt, dev_name(chip->parent), pin)) {
+ if (acpi_gpio_in_ignore_list(ACPI_GPIO_IGNORE_INTERRUPT, dev_name(chip->parent), pin)) {
dev_info(chip->parent, "Ignoring interrupt on pin %u\n", pin);
return AE_OK;
}
@@ -525,7 +455,6 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip)
struct acpi_gpio_chip *acpi_gpio;
acpi_handle handle;
acpi_status status;
- bool defer;
if (!chip->parent || !chip->to_irq)
return;
@@ -544,14 +473,7 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip)
acpi_walk_resources(handle, METHOD_NAME__AEI,
acpi_gpiochip_alloc_event, acpi_gpio);
- mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
- defer = !acpi_gpio_deferred_req_irqs_done;
- if (defer)
- list_add(&acpi_gpio->deferred_req_irqs_list_entry,
- &acpi_gpio_deferred_req_irqs_list);
- mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);
-
- if (defer)
+ if (acpi_gpio_add_to_deferred_list(&acpi_gpio->deferred_req_irqs_list_entry))
return;
acpi_gpiochip_request_irqs(acpi_gpio);
@@ -583,10 +505,7 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip)
if (ACPI_FAILURE(status))
return;
- mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
- if (!list_empty(&acpi_gpio->deferred_req_irqs_list_entry))
- list_del_init(&acpi_gpio->deferred_req_irqs_list_entry);
- mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);
+ acpi_gpio_remove_from_deferred_list(&acpi_gpio->deferred_req_irqs_list_entry);
list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) {
if (event->irq_requested) {
@@ -604,6 +523,14 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip)
}
EXPORT_SYMBOL_GPL(acpi_gpiochip_free_interrupts);
+void __init acpi_gpio_process_deferred_list(struct list_head *list)
+{
+ struct acpi_gpio_chip *acpi_gpio, *tmp;
+
+ list_for_each_entry_safe(acpi_gpio, tmp, list, deferred_req_irqs_list_entry)
+ acpi_gpiochip_request_irqs(acpi_gpio);
+}
+
int acpi_dev_add_driver_gpios(struct acpi_device *adev,
const struct acpi_gpio_mapping *gpios)
{
@@ -653,12 +580,12 @@ static bool acpi_get_driver_gpio_data(struct acpi_device *adev,
for (gm = adev->driver_gpios; gm->name; gm++)
if (!strcmp(name, gm->name) && gm->data && index < gm->size) {
- const struct acpi_gpio_params *par = gm->data + index;
+ const struct acpi_gpio_params *params = gm->data + index;
args->fwnode = acpi_fwnode_handle(adev);
- args->args[0] = par->crs_entry_index;
- args->args[1] = par->line_index;
- args->args[2] = par->active_low;
+ args->args[0] = params->crs_entry_index;
+ args->args[1] = params->line_index;
+ args->args[2] = params->active_low;
args->nargs = 3;
*quirks = gm->quirks;
@@ -743,10 +670,8 @@ static int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags,
}
struct acpi_gpio_lookup {
- struct acpi_gpio_info info;
- int index;
- u16 pin_index;
- bool active_low;
+ struct acpi_gpio_params params;
+ struct acpi_gpio_info *info;
struct gpio_desc *desc;
int n;
};
@@ -754,6 +679,8 @@ struct acpi_gpio_lookup {
static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
{
struct acpi_gpio_lookup *lookup = data;
+ struct acpi_gpio_params *params = &lookup->params;
+ struct acpi_gpio_info *info = lookup->info;
if (ares->type != ACPI_RESOURCE_TYPE_GPIO)
return 1;
@@ -764,26 +691,26 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
struct gpio_desc *desc;
u16 pin_index;
- if (lookup->info.quirks & ACPI_GPIO_QUIRK_ONLY_GPIOIO && gpioint)
- lookup->index++;
+ if (info->quirks & ACPI_GPIO_QUIRK_ONLY_GPIOIO && gpioint)
+ params->crs_entry_index++;
- if (lookup->n++ != lookup->index)
+ if (lookup->n++ != params->crs_entry_index)
return 1;
- pin_index = lookup->pin_index;
+ pin_index = params->line_index;
if (pin_index >= agpio->pin_table_length)
return 1;
- if (lookup->info.quirks & ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER)
+ if (info->quirks & ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER)
desc = gpio_to_desc(agpio->pin_table[pin_index]);
else
desc = acpi_get_gpiod(agpio->resource_source.string_ptr,
agpio->pin_table[pin_index]);
lookup->desc = desc;
- lookup->info.pin_config = agpio->pin_config;
- lookup->info.debounce = agpio->debounce_timeout;
- lookup->info.gpioint = gpioint;
- lookup->info.wake_capable = acpi_gpio_irq_is_wake(&lookup->info.adev->dev, agpio);
+ info->pin_config = agpio->pin_config;
+ info->debounce = agpio->debounce_timeout;
+ info->gpioint = gpioint;
+ info->wake_capable = acpi_gpio_irq_is_wake(&info->adev->dev, agpio);
/*
* Polarity and triggering are only specified for GpioInt
@@ -792,23 +719,23 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
* - ACPI_ACTIVE_LOW == GPIO_ACTIVE_LOW
* - ACPI_ACTIVE_HIGH == GPIO_ACTIVE_HIGH
*/
- if (lookup->info.gpioint) {
- lookup->info.polarity = agpio->polarity;
- lookup->info.triggering = agpio->triggering;
+ if (info->gpioint) {
+ info->polarity = agpio->polarity;
+ info->triggering = agpio->triggering;
} else {
- lookup->info.polarity = lookup->active_low;
+ info->polarity = params->active_low;
}
- lookup->info.flags = acpi_gpio_to_gpiod_flags(agpio, lookup->info.polarity);
+ info->flags = acpi_gpio_to_gpiod_flags(agpio, info->polarity);
}
return 1;
}
-static int acpi_gpio_resource_lookup(struct acpi_gpio_lookup *lookup,
- struct acpi_gpio_info *info)
+static int acpi_gpio_resource_lookup(struct acpi_gpio_lookup *lookup)
{
- struct acpi_device *adev = lookup->info.adev;
+ struct acpi_gpio_info *info = lookup->info;
+ struct acpi_device *adev = info->adev;
struct list_head res_list;
int ret;
@@ -825,22 +752,22 @@ static int acpi_gpio_resource_lookup(struct acpi_gpio_lookup *lookup,
if (!lookup->desc)
return -ENOENT;
- if (info)
- *info = lookup->info;
return 0;
}
-static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
- const char *propname, int index,
+static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode, const char *propname,
struct acpi_gpio_lookup *lookup)
{
struct fwnode_reference_args args;
+ struct acpi_gpio_params *params = &lookup->params;
+ struct acpi_gpio_info *info = lookup->info;
+ unsigned int index = params->crs_entry_index;
unsigned int quirks = 0;
int ret;
memset(&args, 0, sizeof(args));
- ret = __acpi_node_get_property_reference(fwnode, propname, index, 3,
- &args);
+
+ ret = __acpi_node_get_property_reference(fwnode, propname, index, 3, &args);
if (ret) {
struct acpi_device *adev;
@@ -857,12 +784,12 @@ static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
if (args.nargs != 3)
return -EPROTO;
- lookup->index = args.args[0];
- lookup->pin_index = args.args[1];
- lookup->active_low = !!args.args[2];
+ params->crs_entry_index = args.args[0];
+ params->line_index = args.args[1];
+ params->active_low = !!args.args[2];
- lookup->info.adev = to_acpi_device_node(args.fwnode);
- lookup->info.quirks = quirks;
+ info->adev = to_acpi_device_node(args.fwnode);
+ info->quirks = quirks;
return 0;
}
@@ -871,96 +798,83 @@ static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
* acpi_get_gpiod_by_index() - get a GPIO descriptor from device resources
* @adev: pointer to a ACPI device to get GPIO from
* @propname: Property name of the GPIO (optional)
- * @index: index of GpioIo/GpioInt resource (starting from %0)
- * @info: info pointer to fill in (optional)
+ * @lookup: pointer to struct acpi_gpio_lookup to fill in
*
- * Function goes through ACPI resources for @adev and based on @index looks
+ * Function goes through ACPI resources for @adev and based on @lookup.index looks
* up a GpioIo/GpioInt resource, translates it to the Linux GPIO descriptor,
- * and returns it. @index matches GpioIo/GpioInt resources only so if there
- * are total %3 GPIO resources, the index goes from %0 to %2.
+ * and returns it. @lookup.index matches GpioIo/GpioInt resources only so if there
+ * are total 3 GPIO resources, the index goes from 0 to 2.
*
* If @propname is specified the GPIO is looked using device property. In
* that case @index is used to select the GPIO entry in the property value
* (in case of multiple).
*
* Returns:
- * GPIO descriptor to use with Linux generic GPIO API.
- * If the GPIO cannot be translated or there is an error an ERR_PTR is
- * returned.
+ * 0 on success, negative errno on failure.
+ *
+ * The @lookup is filled with GPIO descriptor to use with Linux generic GPIO API.
+ * If the GPIO cannot be translated an error will be returned.
*
* Note: if the GPIO resource has multiple entries in the pin list, this
* function only returns the first.
*/
-static struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev,
- const char *propname,
- int index,
- struct acpi_gpio_info *info)
+static int acpi_get_gpiod_by_index(struct acpi_device *adev, const char *propname,
+ struct acpi_gpio_lookup *lookup)
{
- struct acpi_gpio_lookup lookup;
+ struct acpi_gpio_params *params = &lookup->params;
+ struct acpi_gpio_info *info = lookup->info;
int ret;
- memset(&lookup, 0, sizeof(lookup));
- lookup.index = index;
-
if (propname) {
dev_dbg(&adev->dev, "GPIO: looking up %s\n", propname);
- ret = acpi_gpio_property_lookup(acpi_fwnode_handle(adev),
- propname, index, &lookup);
+ ret = acpi_gpio_property_lookup(acpi_fwnode_handle(adev), propname, lookup);
if (ret)
- return ERR_PTR(ret);
+ return ret;
- dev_dbg(&adev->dev, "GPIO: _DSD returned %s %d %u %u\n",
- dev_name(&lookup.info.adev->dev), lookup.index,
- lookup.pin_index, lookup.active_low);
+ dev_dbg(&adev->dev, "GPIO: _DSD returned %s %u %u %u\n",
+ dev_name(&info->adev->dev),
+ params->crs_entry_index, params->line_index, params->active_low);
} else {
- dev_dbg(&adev->dev, "GPIO: looking up %d in _CRS\n", index);
- lookup.info.adev = adev;
+ dev_dbg(&adev->dev, "GPIO: looking up %u in _CRS\n", params->crs_entry_index);
+ info->adev = adev;
}
- ret = acpi_gpio_resource_lookup(&lookup, info);
- return ret ? ERR_PTR(ret) : lookup.desc;
+ return acpi_gpio_resource_lookup(lookup);
}
/**
* acpi_get_gpiod_from_data() - get a GPIO descriptor from ACPI data node
* @fwnode: pointer to an ACPI firmware node to get the GPIO information from
* @propname: Property name of the GPIO
- * @index: index of GpioIo/GpioInt resource (starting from %0)
- * @info: info pointer to fill in (optional)
+ * @lookup: pointer to struct acpi_gpio_lookup to fill in
*
* This function uses the property-based GPIO lookup to get to the GPIO
* resource with the relevant information from a data-only ACPI firmware node
* and uses that to obtain the GPIO descriptor to return.
*
* Returns:
- * GPIO descriptor to use with Linux generic GPIO API.
- * If the GPIO cannot be translated or there is an error an ERR_PTR is
- * returned.
+ * 0 on success, negative errno on failure.
+ *
+ * The @lookup is filled with GPIO descriptor to use with Linux generic GPIO API.
+ * If the GPIO cannot be translated an error will be returned.
*/
-static struct gpio_desc *acpi_get_gpiod_from_data(struct fwnode_handle *fwnode,
- const char *propname,
- int index,
- struct acpi_gpio_info *info)
+static int acpi_get_gpiod_from_data(struct fwnode_handle *fwnode, const char *propname,
+ struct acpi_gpio_lookup *lookup)
{
- struct acpi_gpio_lookup lookup;
int ret;
if (!is_acpi_data_node(fwnode))
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
if (!propname)
- return ERR_PTR(-EINVAL);
-
- memset(&lookup, 0, sizeof(lookup));
- lookup.index = index;
+ return -EINVAL;
- ret = acpi_gpio_property_lookup(fwnode, propname, index, &lookup);
+ ret = acpi_gpio_property_lookup(fwnode, propname, lookup);
if (ret)
- return ERR_PTR(ret);
+ return ret;
- ret = acpi_gpio_resource_lookup(&lookup, info);
- return ret ? ERR_PTR(ret) : lookup.desc;
+ return acpi_gpio_resource_lookup(lookup);
}
static bool acpi_can_fallback_to_crs(struct acpi_device *adev,
@@ -982,17 +896,25 @@ __acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int
bool can_fallback, struct acpi_gpio_info *info)
{
struct acpi_device *adev = to_acpi_device_node(fwnode);
+ struct acpi_gpio_lookup lookup;
struct gpio_desc *desc;
char propname[32];
+ int ret;
+
+ memset(&lookup, 0, sizeof(lookup));
+ lookup.params.crs_entry_index = idx;
+ lookup.info = info;
/* Try first from _DSD */
for_each_gpio_property_name(propname, con_id) {
if (adev)
- desc = acpi_get_gpiod_by_index(adev,
- propname, idx, info);
+ ret = acpi_get_gpiod_by_index(adev, propname, &lookup);
else
- desc = acpi_get_gpiod_from_data(fwnode,
- propname, idx, info);
+ ret = acpi_get_gpiod_from_data(fwnode, propname, &lookup);
+ if (ret)
+ continue;
+
+ desc = lookup.desc;
if (PTR_ERR(desc) == -EPROBE_DEFER)
return desc;
@@ -1001,8 +923,13 @@ __acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int
}
/* Then from plain _CRS GPIOs */
- if (can_fallback)
- return acpi_get_gpiod_by_index(adev, NULL, idx, info);
+ if (can_fallback) {
+ ret = acpi_get_gpiod_by_index(adev, NULL, &lookup);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return lookup.desc;
+ }
return ERR_PTR(-ENOENT);
}
@@ -1488,248 +1415,3 @@ int acpi_gpio_count(const struct fwnode_handle *fwnode, const char *con_id)
}
return count ? count : -ENOENT;
}
-
-/* Run deferred acpi_gpiochip_request_irqs() */
-static int __init acpi_gpio_handle_deferred_request_irqs(void)
-{
- struct acpi_gpio_chip *acpi_gpio, *tmp;
-
- mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
- list_for_each_entry_safe(acpi_gpio, tmp,
- &acpi_gpio_deferred_req_irqs_list,
- deferred_req_irqs_list_entry)
- acpi_gpiochip_request_irqs(acpi_gpio);
-
- acpi_gpio_deferred_req_irqs_done = true;
- mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);
-
- return 0;
-}
-/* We must use _sync so that this runs after the first deferred_probe run */
-late_initcall_sync(acpi_gpio_handle_deferred_request_irqs);
-
-static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = {
- {
- /*
- * The Minix Neo Z83-4 has a micro-USB-B id-pin handler for
- * a non existing micro-USB-B connector which puts the HDMI
- * DDC pins in GPIO mode, breaking HDMI support.
- */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "MINIX"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .no_edge_events_on_boot = true,
- },
- },
- {
- /*
- * The Terra Pad 1061 has a micro-USB-B id-pin handler, which
- * instead of controlling the actual micro-USB-B turns the 5V
- * boost for its USB-A connector off. The actual micro-USB-B
- * connector is wired for charging only.
- */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"),
- DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .no_edge_events_on_boot = true,
- },
- },
- {
- /*
- * The Dell Venue 10 Pro 5055, with Bay Trail SoC + TI PMIC uses an
- * external embedded-controller connected via I2C + an ACPI GPIO
- * event handler on INT33FFC:02 pin 12, causing spurious wakeups.
- */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Venue 10 Pro 5055"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .ignore_wake = "INT33FC:02@12",
- },
- },
- {
- /*
- * HP X2 10 models with Cherry Trail SoC + TI PMIC use an
- * external embedded-controller connected via I2C + an ACPI GPIO
- * event handler on INT33FF:01 pin 0, causing spurious wakeups.
- * When suspending by closing the LID, the power to the USB
- * keyboard is turned off, causing INT0002 ACPI events to
- * trigger once the XHCI controller notices the keyboard is
- * gone. So INT0002 events cause spurious wakeups too. Ignoring
- * EC wakes breaks wakeup when opening the lid, the user needs
- * to press the power-button to wakeup the system. The
- * alternative is suspend simply not working, which is worse.
- */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "HP"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .ignore_wake = "INT33FF:01@0,INT0002:00@2",
- },
- },
- {
- /*
- * HP X2 10 models with Bay Trail SoC + AXP288 PMIC use an
- * external embedded-controller connected via I2C + an ACPI GPIO
- * event handler on INT33FC:02 pin 28, causing spurious wakeups.
- */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
- DMI_MATCH(DMI_BOARD_NAME, "815D"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .ignore_wake = "INT33FC:02@28",
- },
- },
- {
- /*
- * HP X2 10 models with Cherry Trail SoC + AXP288 PMIC use an
- * external embedded-controller connected via I2C + an ACPI GPIO
- * event handler on INT33FF:01 pin 0, causing spurious wakeups.
- */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "HP"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
- DMI_MATCH(DMI_BOARD_NAME, "813E"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .ignore_wake = "INT33FF:01@0",
- },
- },
- {
- /*
- * Interrupt storm caused from edge triggered floating pin
- * Found in BIOS UX325UAZ.300
- * https://bugzilla.kernel.org/show_bug.cgi?id=216208
- */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UAZ_UM325UAZ"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .ignore_interrupt = "AMDI0030:00@18",
- },
- },
- {
- /*
- * Spurious wakeups from TP_ATTN# pin
- * Found in BIOS 1.7.8
- * https://gitlab.freedesktop.org/drm/amd/-/issues/1722#note_1720627
- */
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .ignore_wake = "ELAN0415:00@9",
- },
- },
- {
- /*
- * Spurious wakeups from TP_ATTN# pin
- * Found in BIOS 1.7.8
- * https://gitlab.freedesktop.org/drm/amd/-/issues/1722#note_1720627
- */
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .ignore_wake = "ELAN0415:00@9",
- },
- },
- {
- /*
- * Spurious wakeups from TP_ATTN# pin
- * Found in BIOS 1.7.7
- */
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .ignore_wake = "SYNA1202:00@16",
- },
- },
- {
- /*
- * On the Peaq C1010 2-in-1 INT33FC:00 pin 3 is connected to
- * a "dolby" button. At the ACPI level an _AEI event-handler
- * is connected which sets an ACPI variable to 1 on both
- * edges. This variable can be polled + cleared to 0 using
- * WMI. But since the variable is set on both edges the WMI
- * interface is pretty useless even when polling.
- * So instead the x86-android-tablets code instantiates
- * a gpio-keys platform device for it.
- * Ignore the _AEI handler for the pin, so that it is not busy.
- */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "PEAQ"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PEAQ PMM C1010 MD99187"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .ignore_interrupt = "INT33FC:00@3",
- },
- },
- {
- /*
- * Spurious wakeups from TP_ATTN# pin
- * Found in BIOS 0.35
- * https://gitlab.freedesktop.org/drm/amd/-/issues/3073
- */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "GPD"),
- DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .ignore_wake = "PNP0C50:00@8",
- },
- },
- {
- /*
- * Spurious wakeups from GPIO 11
- * Found in BIOS 1.04
- * https://gitlab.freedesktop.org/drm/amd/-/issues/3954
- */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
- DMI_MATCH(DMI_PRODUCT_FAMILY, "Acer Nitro V 14"),
- },
- .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
- .ignore_interrupt = "AMDI0030:00@11",
- },
- },
- {} /* Terminating entry */
-};
-
-static int __init acpi_gpio_setup_params(void)
-{
- const struct acpi_gpiolib_dmi_quirk *quirk = NULL;
- const struct dmi_system_id *id;
-
- id = dmi_first_match(gpiolib_acpi_quirks);
- if (id)
- quirk = id->driver_data;
-
- if (run_edge_events_on_boot < 0) {
- if (quirk && quirk->no_edge_events_on_boot)
- run_edge_events_on_boot = 0;
- else
- run_edge_events_on_boot = 1;
- }
-
- if (ignore_wake == NULL && quirk && quirk->ignore_wake)
- ignore_wake = quirk->ignore_wake;
-
- if (ignore_interrupt == NULL && quirk && quirk->ignore_interrupt)
- ignore_interrupt = quirk->ignore_interrupt;
-
- return 0;
-}
-
-/* Directly after dmi_setup() which runs as core_initcall() */
-postcore_initcall(acpi_gpio_setup_params);
diff --git a/drivers/gpio/gpiolib-acpi-quirks.c b/drivers/gpio/gpiolib-acpi-quirks.c
new file mode 100644
index 000000000000..219667315b2c
--- /dev/null
+++ b/drivers/gpio/gpiolib-acpi-quirks.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACPI quirks for GPIO ACPI helpers
+ *
+ * Author: Hans de Goede <hdegoede@redhat.com>
+ */
+
+#include <linux/dmi.h>
+#include <linux/kstrtox.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/printk.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "gpiolib-acpi.h"
+
+static int run_edge_events_on_boot = -1;
+module_param(run_edge_events_on_boot, int, 0444);
+MODULE_PARM_DESC(run_edge_events_on_boot,
+ "Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto");
+
+static char *ignore_wake;
+module_param(ignore_wake, charp, 0444);
+MODULE_PARM_DESC(ignore_wake,
+ "controller@pin combos on which to ignore the ACPI wake flag "
+ "ignore_wake=controller@pin[,controller@pin[,...]]");
+
+static char *ignore_interrupt;
+module_param(ignore_interrupt, charp, 0444);
+MODULE_PARM_DESC(ignore_interrupt,
+ "controller@pin combos on which to ignore interrupt "
+ "ignore_interrupt=controller@pin[,controller@pin[,...]]");
+
+/*
+ * For GPIO chips which call acpi_gpiochip_request_interrupts() before late_init
+ * (so builtin drivers) we register the ACPI GpioInt IRQ handlers from a
+ * late_initcall_sync() handler, so that other builtin drivers can register their
+ * OpRegions before the event handlers can run. This list contains GPIO chips
+ * for which the acpi_gpiochip_request_irqs() call has been deferred.
+ */
+static DEFINE_MUTEX(acpi_gpio_deferred_req_irqs_lock);
+static LIST_HEAD(acpi_gpio_deferred_req_irqs_list);
+static bool acpi_gpio_deferred_req_irqs_done;
+
+bool acpi_gpio_add_to_deferred_list(struct list_head *list)
+{
+ bool defer;
+
+ mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
+ defer = !acpi_gpio_deferred_req_irqs_done;
+ if (defer)
+ list_add(list, &acpi_gpio_deferred_req_irqs_list);
+ mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);
+
+ return defer;
+}
+
+void acpi_gpio_remove_from_deferred_list(struct list_head *list)
+{
+ mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
+ if (!list_empty(list))
+ list_del_init(list);
+ mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);
+}
+
+int acpi_gpio_need_run_edge_events_on_boot(void)
+{
+ return run_edge_events_on_boot;
+}
+
+bool acpi_gpio_in_ignore_list(enum acpi_gpio_ignore_list list,
+ const char *controller_in, unsigned int pin_in)
+{
+ const char *ignore_list, *controller, *pin_str;
+ unsigned int pin;
+ char *endp;
+ int len;
+
+ switch (list) {
+ case ACPI_GPIO_IGNORE_WAKE:
+ ignore_list = ignore_wake;
+ break;
+ case ACPI_GPIO_IGNORE_INTERRUPT:
+ ignore_list = ignore_interrupt;
+ break;
+ default:
+ return false;
+ }
+
+ controller = ignore_list;
+ while (controller) {
+ pin_str = strchr(controller, '@');
+ if (!pin_str)
+ goto err;
+
+ len = pin_str - controller;
+ if (len == strlen(controller_in) &&
+ strncmp(controller, controller_in, len) == 0) {
+ pin = simple_strtoul(pin_str + 1, &endp, 10);
+ if (*endp != 0 && *endp != ',')
+ goto err;
+
+ if (pin == pin_in)
+ return true;
+ }
+
+ controller = strchr(controller, ',');
+ if (controller)
+ controller++;
+ }
+
+ return false;
+err:
+ pr_err_once("Error: Invalid value for gpiolib_acpi.ignore_...: %s\n", ignore_list);
+ return false;
+}
+
+/* Run deferred acpi_gpiochip_request_irqs() */
+static int __init acpi_gpio_handle_deferred_request_irqs(void)
+{
+ mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
+ acpi_gpio_process_deferred_list(&acpi_gpio_deferred_req_irqs_list);
+ acpi_gpio_deferred_req_irqs_done = true;
+ mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);
+
+ return 0;
+}
+/* We must use _sync so that this runs after the first deferred_probe run */
+late_initcall_sync(acpi_gpio_handle_deferred_request_irqs);
+
+struct acpi_gpiolib_dmi_quirk {
+ bool no_edge_events_on_boot;
+ char *ignore_wake;
+ char *ignore_interrupt;
+};
+
+static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = {
+ {
+ /*
+ * The Minix Neo Z83-4 has a micro-USB-B id-pin handler for
+ * a non existing micro-USB-B connector which puts the HDMI
+ * DDC pins in GPIO mode, breaking HDMI support.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "MINIX"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .no_edge_events_on_boot = true,
+ },
+ },
+ {
+ /*
+ * The Terra Pad 1061 has a micro-USB-B id-pin handler, which
+ * instead of controlling the actual micro-USB-B turns the 5V
+ * boost for its USB-A connector off. The actual micro-USB-B
+ * connector is wired for charging only.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .no_edge_events_on_boot = true,
+ },
+ },
+ {
+ /*
+ * The Dell Venue 10 Pro 5055, with Bay Trail SoC + TI PMIC uses an
+ * external embedded-controller connected via I2C + an ACPI GPIO
+ * event handler on INT33FFC:02 pin 12, causing spurious wakeups.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Venue 10 Pro 5055"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "INT33FC:02@12",
+ },
+ },
+ {
+ /*
+ * HP X2 10 models with Cherry Trail SoC + TI PMIC use an
+ * external embedded-controller connected via I2C + an ACPI GPIO
+ * event handler on INT33FF:01 pin 0, causing spurious wakeups.
+ * When suspending by closing the LID, the power to the USB
+ * keyboard is turned off, causing INT0002 ACPI events to
+ * trigger once the XHCI controller notices the keyboard is
+ * gone. So INT0002 events cause spurious wakeups too. Ignoring
+ * EC wakes breaks wakeup when opening the lid, the user needs
+ * to press the power-button to wakeup the system. The
+ * alternative is suspend simply not working, which is worse.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "INT33FF:01@0,INT0002:00@2",
+ },
+ },
+ {
+ /*
+ * HP X2 10 models with Bay Trail SoC + AXP288 PMIC use an
+ * external embedded-controller connected via I2C + an ACPI GPIO
+ * event handler on INT33FC:02 pin 28, causing spurious wakeups.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
+ DMI_MATCH(DMI_BOARD_NAME, "815D"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "INT33FC:02@28",
+ },
+ },
+ {
+ /*
+ * HP X2 10 models with Cherry Trail SoC + AXP288 PMIC use an
+ * external embedded-controller connected via I2C + an ACPI GPIO
+ * event handler on INT33FF:01 pin 0, causing spurious wakeups.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
+ DMI_MATCH(DMI_BOARD_NAME, "813E"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "INT33FF:01@0",
+ },
+ },
+ {
+ /*
+ * Interrupt storm caused from edge triggered floating pin
+ * Found in BIOS UX325UAZ.300
+ * https://bugzilla.kernel.org/show_bug.cgi?id=216208
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UAZ_UM325UAZ"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_interrupt = "AMDI0030:00@18",
+ },
+ },
+ {
+ /*
+ * Spurious wakeups from TP_ATTN# pin
+ * Found in BIOS 1.7.8
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/1722#note_1720627
+ */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "ELAN0415:00@9",
+ },
+ },
+ {
+ /*
+ * Spurious wakeups from TP_ATTN# pin
+ * Found in BIOS 1.7.8
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/1722#note_1720627
+ */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "ELAN0415:00@9",
+ },
+ },
+ {
+ /*
+ * Spurious wakeups from TP_ATTN# pin
+ * Found in BIOS 1.7.7
+ */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "SYNA1202:00@16",
+ },
+ },
+ {
+ /*
+ * On the Peaq C1010 2-in-1 INT33FC:00 pin 3 is connected to
+ * a "dolby" button. At the ACPI level an _AEI event-handler
+ * is connected which sets an ACPI variable to 1 on both
+ * edges. This variable can be polled + cleared to 0 using
+ * WMI. But since the variable is set on both edges the WMI
+ * interface is pretty useless even when polling.
+ * So instead the x86-android-tablets code instantiates
+ * a gpio-keys platform device for it.
+ * Ignore the _AEI handler for the pin, so that it is not busy.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "PEAQ"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "PEAQ PMM C1010 MD99187"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_interrupt = "INT33FC:00@3",
+ },
+ },
+ {
+ /*
+ * Spurious wakeups from TP_ATTN# pin
+ * Found in BIOS 0.35
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/3073
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "GPD"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "PNP0C50:00@8",
+ },
+ },
+ {
+ /*
+ * Spurious wakeups from GPIO 11
+ * Found in BIOS 1.04
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/3954
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_FAMILY, "Acer Nitro V 14"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_interrupt = "AMDI0030:00@11",
+ },
+ },
+ {} /* Terminating entry */
+};
+
+static int __init acpi_gpio_setup_params(void)
+{
+ const struct acpi_gpiolib_dmi_quirk *quirk = NULL;
+ const struct dmi_system_id *id;
+
+ id = dmi_first_match(gpiolib_acpi_quirks);
+ if (id)
+ quirk = id->driver_data;
+
+ if (run_edge_events_on_boot < 0) {
+ if (quirk && quirk->no_edge_events_on_boot)
+ run_edge_events_on_boot = 0;
+ else
+ run_edge_events_on_boot = 1;
+ }
+
+ if (ignore_wake == NULL && quirk && quirk->ignore_wake)
+ ignore_wake = quirk->ignore_wake;
+
+ if (ignore_interrupt == NULL && quirk && quirk->ignore_interrupt)
+ ignore_interrupt = quirk->ignore_interrupt;
+
+ return 0;
+}
+
+/* Directly after dmi_setup() which runs as core_initcall() */
+postcore_initcall(acpi_gpio_setup_params);
diff --git a/drivers/gpio/gpiolib-acpi.h b/drivers/gpio/gpiolib-acpi.h
index 7e1c51d04040..a90267470a4e 100644
--- a/drivers/gpio/gpiolib-acpi.h
+++ b/drivers/gpio/gpiolib-acpi.h
@@ -58,4 +58,19 @@ static inline int acpi_gpio_count(const struct fwnode_handle *fwnode,
}
#endif
+void acpi_gpio_process_deferred_list(struct list_head *list);
+
+bool acpi_gpio_add_to_deferred_list(struct list_head *list);
+void acpi_gpio_remove_from_deferred_list(struct list_head *list);
+
+int acpi_gpio_need_run_edge_events_on_boot(void);
+
+enum acpi_gpio_ignore_list {
+ ACPI_GPIO_IGNORE_WAKE,
+ ACPI_GPIO_IGNORE_INTERRUPT,
+};
+
+bool acpi_gpio_in_ignore_list(enum acpi_gpio_ignore_list list,
+ const char *controller_in, unsigned int pin_in);
+
#endif /* GPIOLIB_ACPI_H */
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 107d75558b5a..e6a289fa0f8f 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -1366,9 +1366,6 @@ static long linereq_set_values(struct linereq *lr, void __user *ip)
/* scan requested lines to determine the subset to be set */
for (num_set = 0, i = 0; i < lr->num_lines; i++) {
if (lv.mask & BIT_ULL(i)) {
- /* setting inputs is not allowed */
- if (!test_bit(FLAG_IS_OUT, &lr->lines[i].desc->flags))
- return -EPERM;
/* add to compacted values */
if (lv.bits & BIT_ULL(i))
__set_bit(num_set, vals);
diff --git a/drivers/gpio/gpiolib-devres.c b/drivers/gpio/gpiolib-devres.c
index 120d1ec5af3b..4d5f83b17624 100644
--- a/drivers/gpio/gpiolib-devres.c
+++ b/drivers/gpio/gpiolib-devres.c
@@ -6,7 +6,7 @@
* Copyright (c) 2011 John Crispin <john@phrozen.org>
*/
-#include <linux/device.h>
+#include <linux/device/devres.h>
#include <linux/err.h>
#include <linux/export.h>
#include <linux/gfp.h>
@@ -19,32 +19,14 @@
struct fwnode_handle;
struct lock_class_key;
-static void devm_gpiod_release(struct device *dev, void *res)
+static void devm_gpiod_release(void *desc)
{
- struct gpio_desc **desc = res;
-
- gpiod_put(*desc);
-}
-
-static int devm_gpiod_match(struct device *dev, void *res, void *data)
-{
- struct gpio_desc **this = res, **gpio = data;
-
- return *this == *gpio;
+ gpiod_put(desc);
}
-static void devm_gpiod_release_array(struct device *dev, void *res)
+static void devm_gpiod_release_array(void *descs)
{
- struct gpio_descs **descs = res;
-
- gpiod_put_array(*descs);
-}
-
-static int devm_gpiod_match_array(struct device *dev, void *res, void *data)
-{
- struct gpio_descs **this = res, **gpios = data;
-
- return *this == *gpios;
+ gpiod_put_array(descs);
}
/**
@@ -114,8 +96,8 @@ struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
unsigned int idx,
enum gpiod_flags flags)
{
- struct gpio_desc **dr;
struct gpio_desc *desc;
+ int ret;
desc = gpiod_get_index(dev, con_id, idx, flags);
if (IS_ERR(desc))
@@ -126,23 +108,16 @@ struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
* already under resource management by this device.
*/
if (flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE) {
- struct devres *dres;
+ bool dres;
- dres = devres_find(dev, devm_gpiod_release,
- devm_gpiod_match, &desc);
+ dres = devm_is_action_added(dev, devm_gpiod_release, desc);
if (dres)
return desc;
}
- dr = devres_alloc(devm_gpiod_release, sizeof(struct gpio_desc *),
- GFP_KERNEL);
- if (!dr) {
- gpiod_put(desc);
- return ERR_PTR(-ENOMEM);
- }
-
- *dr = desc;
- devres_add(dev, dr);
+ ret = devm_add_action_or_reset(dev, devm_gpiod_release, desc);
+ if (ret)
+ return ERR_PTR(ret);
return desc;
}
@@ -171,22 +146,16 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev,
enum gpiod_flags flags,
const char *label)
{
- struct gpio_desc **dr;
struct gpio_desc *desc;
-
- dr = devres_alloc(devm_gpiod_release, sizeof(struct gpio_desc *),
- GFP_KERNEL);
- if (!dr)
- return ERR_PTR(-ENOMEM);
+ int ret;
desc = gpiod_find_and_request(dev, fwnode, con_id, index, flags, label, false);
- if (IS_ERR(desc)) {
- devres_free(dr);
+ if (IS_ERR(desc))
return desc;
- }
- *dr = desc;
- devres_add(dev, dr);
+ ret = devm_add_action_or_reset(dev, devm_gpiod_release, desc);
+ if (ret)
+ return ERR_PTR(ret);
return desc;
}
@@ -244,22 +213,16 @@ struct gpio_descs *__must_check devm_gpiod_get_array(struct device *dev,
const char *con_id,
enum gpiod_flags flags)
{
- struct gpio_descs **dr;
struct gpio_descs *descs;
-
- dr = devres_alloc(devm_gpiod_release_array,
- sizeof(struct gpio_descs *), GFP_KERNEL);
- if (!dr)
- return ERR_PTR(-ENOMEM);
+ int ret;
descs = gpiod_get_array(dev, con_id, flags);
- if (IS_ERR(descs)) {
- devres_free(dr);
+ if (IS_ERR(descs))
return descs;
- }
- *dr = descs;
- devres_add(dev, dr);
+ ret = devm_add_action_or_reset(dev, devm_gpiod_release_array, descs);
+ if (ret)
+ return ERR_PTR(ret);
return descs;
}
@@ -307,8 +270,7 @@ EXPORT_SYMBOL_GPL(devm_gpiod_get_array_optional);
*/
void devm_gpiod_put(struct device *dev, struct gpio_desc *desc)
{
- WARN_ON(devres_release(dev, devm_gpiod_release, devm_gpiod_match,
- &desc));
+ devm_release_action(dev, devm_gpiod_release, desc);
}
EXPORT_SYMBOL_GPL(devm_gpiod_put);
@@ -332,13 +294,13 @@ void devm_gpiod_unhinge(struct device *dev, struct gpio_desc *desc)
if (IS_ERR_OR_NULL(desc))
return;
- ret = devres_destroy(dev, devm_gpiod_release,
- devm_gpiod_match, &desc);
+
/*
* If the GPIO descriptor is requested as nonexclusive, we
* may call this function several times on the same descriptor
* so it is OK if devres_destroy() returns -ENOENT.
*/
+ ret = devm_remove_action_nowarn(dev, devm_gpiod_release, desc);
if (ret == -ENOENT)
return;
/* Anything else we should warn about */
@@ -357,8 +319,7 @@ EXPORT_SYMBOL_GPL(devm_gpiod_unhinge);
*/
void devm_gpiod_put_array(struct device *dev, struct gpio_descs *descs)
{
- WARN_ON(devres_release(dev, devm_gpiod_release_array,
- devm_gpiod_match_array, &descs));
+ devm_remove_action(dev, devm_gpiod_release_array, descs);
}
EXPORT_SYMBOL_GPL(devm_gpiod_put_array);
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 65f6a7177b78..73ba73b31cb1 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -224,6 +224,15 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np,
*/
{ "lantiq,pci-xway", "gpio-reset", false },
#endif
+#if IS_ENABLED(CONFIG_REGULATOR_S5M8767)
+ /*
+ * According to S5M8767, the DVS and DS pin are
+ * active-high signals. However, exynos5250-spring.dts use
+ * active-low setting.
+ */
+ { "samsung,s5m8767-pmic", "s5m8767,pmic-buck-dvs-gpios", true },
+ { "samsung,s5m8767-pmic", "s5m8767,pmic-buck-ds-gpios", true },
+#endif
#if IS_ENABLED(CONFIG_TOUCHSCREEN_TSC2005)
/*
* DTS for Nokia N900 incorrectly specified "active high"
@@ -1278,3 +1287,11 @@ void of_gpiochip_remove(struct gpio_chip *chip)
{
of_node_put(dev_of_node(&chip->gpiodev->dev));
}
+
+bool of_gpiochip_instance_match(struct gpio_chip *gc, unsigned int index)
+{
+ if (gc->of_node_instance_match)
+ return gc->of_node_instance_match(gc, index);
+
+ return false;
+}
diff --git a/drivers/gpio/gpiolib-of.h b/drivers/gpio/gpiolib-of.h
index 16d6ac8cb156..3eebfac290c5 100644
--- a/drivers/gpio/gpiolib-of.h
+++ b/drivers/gpio/gpiolib-of.h
@@ -22,6 +22,7 @@ struct gpio_desc *of_find_gpio(struct device_node *np,
unsigned long *lookupflags);
int of_gpiochip_add(struct gpio_chip *gc);
void of_gpiochip_remove(struct gpio_chip *gc);
+bool of_gpiochip_instance_match(struct gpio_chip *gc, unsigned int index);
int of_gpio_count(const struct fwnode_handle *fwnode, const char *con_id);
#else
static inline struct gpio_desc *of_find_gpio(struct device_node *np,
@@ -33,6 +34,11 @@ static inline struct gpio_desc *of_find_gpio(struct device_node *np,
}
static inline int of_gpiochip_add(struct gpio_chip *gc) { return 0; }
static inline void of_gpiochip_remove(struct gpio_chip *gc) { }
+static inline bool of_gpiochip_instance_match(struct gpio_chip *gc,
+ unsigned int index)
+{
+ return false;
+}
static inline int of_gpio_count(const struct fwnode_handle *fwnode,
const char *con_id)
{
diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index 1acfa43bf1ab..4a3aa09dad9d 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -134,17 +134,15 @@ static ssize_t value_store(struct device *dev,
long value;
status = kstrtol(buf, 0, &value);
+ if (status)
+ return status;
guard(mutex)(&data->mutex);
- if (!test_bit(FLAG_IS_OUT, &desc->flags))
- return -EPERM;
-
+ status = gpiod_set_value_cansleep(desc, value);
if (status)
return status;
- gpiod_set_value_cansleep(desc, value);
-
return size;
}
static DEVICE_ATTR_PREALLOC(value, S_IWUSR | S_IRUGO, value_show, value_store);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index cd4fecbb41f2..fdafa0df1b43 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -266,6 +266,20 @@ struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc)
EXPORT_SYMBOL_GPL(gpiod_to_gpio_device);
/**
+ * gpiod_is_equal() - Check if two GPIO descriptors refer to the same pin.
+ * @desc: Descriptor to compare.
+ * @other: The second descriptor to compare against.
+ *
+ * Returns:
+ * True if the descriptors refer to the same physical pin. False otherwise.
+ */
+bool gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other)
+{
+ return desc == other;
+}
+EXPORT_SYMBOL_GPL(gpiod_is_equal);
+
+/**
* gpio_device_get_base() - Get the base GPIO number allocated by this device
* @gdev: GPIO device
*
@@ -342,6 +356,37 @@ static int gpiochip_find_base_unlocked(u16 ngpio)
}
}
+/*
+ * This descriptor validation needs to be inserted verbatim into each
+ * function taking a descriptor, so we need to use a preprocessor
+ * macro to avoid endless duplication. If the desc is NULL it is an
+ * optional GPIO and calls should just bail out.
+ */
+static int validate_desc(const struct gpio_desc *desc, const char *func)
+{
+ if (!desc)
+ return 0;
+
+ if (IS_ERR(desc)) {
+ pr_warn("%s: invalid GPIO (errorpointer: %pe)\n", func, desc);
+ return PTR_ERR(desc);
+ }
+
+ return 1;
+}
+
+#define VALIDATE_DESC(desc) do { \
+ int __valid = validate_desc(desc, __func__); \
+ if (__valid <= 0) \
+ return __valid; \
+ } while (0)
+
+#define VALIDATE_DESC_VOID(desc) do { \
+ int __valid = validate_desc(desc, __func__); \
+ if (__valid <= 0) \
+ return; \
+ } while (0)
+
static int gpiochip_get_direction(struct gpio_chip *gc, unsigned int offset)
{
int ret;
@@ -376,11 +421,8 @@ int gpiod_get_direction(struct gpio_desc *desc)
unsigned int offset;
int ret;
- /*
- * We cannot use VALIDATE_DESC() as we must not return 0 for a NULL
- * descriptor like we usually do.
- */
- if (IS_ERR_OR_NULL(desc))
+ ret = validate_desc(desc, __func__);
+ if (ret <= 0)
return -EINVAL;
CLASS(gpio_chip_guard, guard)(desc);
@@ -742,6 +784,12 @@ EXPORT_SYMBOL_GPL(gpiochip_query_valid_mask);
bool gpiochip_line_is_valid(const struct gpio_chip *gc,
unsigned int offset)
{
+ /*
+ * hog pins are requested before registering GPIO chip
+ */
+ if (!gc->gpiodev)
+ return true;
+
/* No mask means all valid */
if (likely(!gc->gpiodev->valid_mask))
return true;
@@ -874,14 +922,12 @@ static void machine_gpiochip_add(struct gpio_chip *gc)
{
struct gpiod_hog *hog;
- mutex_lock(&gpio_machine_hogs_mutex);
+ guard(mutex)(&gpio_machine_hogs_mutex);
list_for_each_entry(hog, &gpio_machine_hogs, list) {
if (!strcmp(gc->label, hog->chip_label))
gpiochip_machine_hog(gc, hog);
}
-
- mutex_unlock(&gpio_machine_hogs_mutex);
}
static void gpiochip_setup_devs(void)
@@ -975,7 +1021,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
struct gpio_device *gdev;
unsigned int desc_index;
int base = 0;
- int ret = 0;
+ int ret;
/* Only allow one set() and one set_multiple(). */
if ((gc->set && gc->set_rv) ||
@@ -1000,11 +1046,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
device_set_node(&gdev->dev, gpiochip_choose_fwnode(gc));
- gdev->id = ida_alloc(&gpio_ida, GFP_KERNEL);
- if (gdev->id < 0) {
- ret = gdev->id;
+ ret = ida_alloc(&gpio_ida, GFP_KERNEL);
+ if (ret < 0)
goto err_free_gdev;
- }
+ gdev->id = ret;
ret = dev_set_name(&gdev->dev, GPIOCHIP_NAME "%d", gdev->id);
if (ret)
@@ -1507,9 +1552,8 @@ static int gpiochip_hierarchy_irq_domain_translate(struct irq_domain *d,
unsigned int *type)
{
/* We support standard DT translation */
- if (is_of_node(fwspec->fwnode) && fwspec->param_count == 2) {
- return irq_domain_translate_twocell(d, fwspec, hwirq, type);
- }
+ if (is_of_node(fwspec->fwnode))
+ return irq_domain_translate_twothreecell(d, fwspec, hwirq, type);
/* This is for board files and others not using DT */
if (is_fwnode_irqchip(fwspec->fwnode)) {
@@ -1811,11 +1855,26 @@ static void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq)
irq_set_chip_data(irq, NULL);
}
+static int gpiochip_irq_select(struct irq_domain *d, struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ struct fwnode_handle *fwnode = fwspec->fwnode;
+ struct gpio_chip *gc = d->host_data;
+ unsigned int index = fwspec->param[0];
+
+ if (fwspec->param_count == 3 && is_of_node(fwnode))
+ return of_gpiochip_instance_match(gc, index);
+
+ /* Fallback for twocells */
+ return (fwnode && (d->fwnode == fwnode) && (d->bus_token == bus_token));
+}
+
static const struct irq_domain_ops gpiochip_domain_ops = {
.map = gpiochip_irq_map,
.unmap = gpiochip_irq_unmap,
+ .select = gpiochip_irq_select,
/* Virtually all GPIO irqchips are twocell:ed */
- .xlate = irq_domain_xlate_twocell,
+ .xlate = irq_domain_xlate_twothreecell,
};
static struct irq_domain *gpiochip_simple_create_domain(struct gpio_chip *gc)
@@ -1835,7 +1894,6 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset)
{
struct irq_domain *domain = gc->irq.domain;
-#ifdef CONFIG_GPIOLIB_IRQCHIP
/*
* Avoid race condition with other code, which tries to lookup
* an IRQ before the irqchip has been properly registered,
@@ -1843,7 +1901,6 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset)
*/
if (!gc->irq.initialized)
return -EPROBE_DEFER;
-#endif
if (!gpiochip_irqchip_irq_valid(gc, offset))
return -ENXIO;
@@ -2405,37 +2462,6 @@ out_clear_bit:
return ret;
}
-/*
- * This descriptor validation needs to be inserted verbatim into each
- * function taking a descriptor, so we need to use a preprocessor
- * macro to avoid endless duplication. If the desc is NULL it is an
- * optional GPIO and calls should just bail out.
- */
-static int validate_desc(const struct gpio_desc *desc, const char *func)
-{
- if (!desc)
- return 0;
-
- if (IS_ERR(desc)) {
- pr_warn("%s: invalid GPIO (errorpointer)\n", func);
- return PTR_ERR(desc);
- }
-
- return 1;
-}
-
-#define VALIDATE_DESC(desc) do { \
- int __valid = validate_desc(desc, __func__); \
- if (__valid <= 0) \
- return __valid; \
- } while (0)
-
-#define VALIDATE_DESC_VOID(desc) do { \
- int __valid = validate_desc(desc, __func__); \
- if (__valid <= 0) \
- return; \
- } while (0)
-
int gpiod_request(struct gpio_desc *desc, const char *label)
{
int ret = -EPROBE_DEFER;
@@ -3045,7 +3071,7 @@ set_output_flag:
*/
int gpiod_enable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags)
{
- int ret = 0;
+ int ret;
VALIDATE_DESC(desc);
@@ -3078,7 +3104,7 @@ EXPORT_SYMBOL_GPL(gpiod_enable_hw_timestamp_ns);
*/
int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags)
{
- int ret = 0;
+ int ret;
VALIDATE_DESC(desc);
@@ -3593,6 +3619,9 @@ static int gpio_set_open_source_value_commit(struct gpio_desc *desc, bool value)
static int gpiod_set_raw_value_commit(struct gpio_desc *desc, bool value)
{
+ if (unlikely(!test_bit(FLAG_IS_OUT, &desc->flags)))
+ return -EPERM;
+
CLASS(gpio_chip_guard, guard)(desc);
if (!guard.gc)
return -ENODEV;
@@ -3664,6 +3693,12 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep,
if (!can_sleep)
WARN_ON(array_info->gdev->can_sleep);
+ for (i = 0; i < array_size; i++) {
+ if (unlikely(!test_bit(FLAG_IS_OUT,
+ &desc_array[i]->flags)))
+ return -EPERM;
+ }
+
guard(srcu)(&array_info->gdev->srcu);
gc = srcu_dereference(array_info->gdev->chip,
&array_info->gdev->srcu);
@@ -3723,6 +3758,9 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep,
int hwgpio = gpio_chip_hwgpio(desc);
int value = test_bit(i, value_bitmap);
+ if (unlikely(!test_bit(FLAG_IS_OUT, &desc->flags)))
+ return -EPERM;
+
/*
* Pins applicable for fast input but not for
* fast output processing may have been already
@@ -3944,13 +3982,10 @@ int gpiod_to_irq(const struct gpio_desc *desc)
struct gpio_device *gdev;
struct gpio_chip *gc;
int offset;
+ int ret;
- /*
- * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics
- * requires this function to not return zero on an invalid descriptor
- * but rather a negative error number.
- */
- if (IS_ERR_OR_NULL(desc))
+ ret = validate_desc(desc, __func__);
+ if (ret <= 0)
return -EINVAL;
gdev = desc->gdev;
@@ -3962,13 +3997,12 @@ int gpiod_to_irq(const struct gpio_desc *desc)
offset = gpio_chip_hwgpio(desc);
if (gc->to_irq) {
- int retirq = gc->to_irq(gc, offset);
+ ret = gc->to_irq(gc, offset);
+ if (ret)
+ return ret;
/* Zero means NO_IRQ */
- if (!retirq)
- return -ENXIO;
-
- return retirq;
+ return -ENXIO;
}
#ifdef CONFIG_GPIOLIB_IRQCHIP
if (gc->irq.chip) {
@@ -4323,12 +4357,10 @@ void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n)
{
unsigned int i;
- mutex_lock(&gpio_lookup_lock);
+ guard(mutex)(&gpio_lookup_lock);
for (i = 0; i < n; i++)
list_add_tail(&tables[i]->list, &gpio_lookup_list);
-
- mutex_unlock(&gpio_lookup_lock);
}
/**
@@ -4387,11 +4419,9 @@ void gpiod_remove_lookup_table(struct gpiod_lookup_table *table)
if (!table)
return;
- mutex_lock(&gpio_lookup_lock);
+ guard(mutex)(&gpio_lookup_lock);
list_del(&table->list);
-
- mutex_unlock(&gpio_lookup_lock);
}
EXPORT_SYMBOL_GPL(gpiod_remove_lookup_table);
@@ -4403,7 +4433,7 @@ void gpiod_add_hogs(struct gpiod_hog *hogs)
{
struct gpiod_hog *hog;
- mutex_lock(&gpio_machine_hogs_mutex);
+ guard(mutex)(&gpio_machine_hogs_mutex);
for (hog = &hogs[0]; hog->chip_label; hog++) {
list_add_tail(&hog->list, &gpio_machine_hogs);
@@ -4417,8 +4447,6 @@ void gpiod_add_hogs(struct gpiod_hog *hogs)
if (gdev)
gpiochip_machine_hog(gpio_device_get_chip(gdev), hog);
}
-
- mutex_unlock(&gpio_machine_hogs_mutex);
}
EXPORT_SYMBOL_GPL(gpiod_add_hogs);
@@ -4426,10 +4454,10 @@ void gpiod_remove_hogs(struct gpiod_hog *hogs)
{
struct gpiod_hog *hog;
- mutex_lock(&gpio_machine_hogs_mutex);
+ guard(mutex)(&gpio_machine_hogs_mutex);
+
for (hog = &hogs[0]; hog->chip_label; hog++)
list_del(&hog->list);
- mutex_unlock(&gpio_machine_hogs_mutex);
}
EXPORT_SYMBOL_GPL(gpiod_remove_hogs);
@@ -5108,8 +5136,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_array_optional);
*/
void gpiod_put(struct gpio_desc *desc)
{
- if (desc)
- gpiod_free(desc);
+ gpiod_free(desc);
}
EXPORT_SYMBOL_GPL(gpiod_put);
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index ed54a546bbe2..d21d0cd2c752 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -236,7 +236,7 @@ always-$(CONFIG_DRM_HEADER_TEST) += \
quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
cmd_hdrtest = \
$(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \
- $(srctree)/scripts/kernel-doc -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \
+ PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \
touch $@
$(obj)/%.hdrtest: $(src)/%.h FORCE
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index cfdf558b48b6..02138aa55793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -109,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_exec exec;
int r;
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
r = amdgpu_vm_lock_pd(vm, &exec, 0);
if (likely(!r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 19ce4da285e8..38e7043016e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -725,8 +725,8 @@ static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
*/
int amdgpu_irq_add_domain(struct amdgpu_device *adev)
{
- adev->irq.domain = irq_domain_add_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
- &amdgpu_hw_irqdomain_ops, adev);
+ adev->irq.domain = irq_domain_create_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
+ &amdgpu_hw_irqdomain_ops, adev);
if (!adev->irq.domain) {
DRM_ERROR("GPU irq add domain failed\n");
return -ENODEV;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index e74e26b6a4f2..fec9a007533a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -752,6 +752,18 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->gmc.vram_type = vram_type;
adev->gmc.vram_vendor = vram_vendor;
+ /* The mall_size is already calculated as mall_size_per_umc * num_umc.
+ * However, for gfx1151, which features a 2-to-1 UMC mapping,
+ * the result must be multiplied by 2 to determine the actual mall size.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 5, 1):
+ adev->gmc.mall_size *= 2;
+ break;
+ default:
+ break;
+ }
+
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index a1171e6152ed..f11df9c2ec13 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -1023,6 +1023,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL);
+
return 0;
}
@@ -1205,6 +1209,10 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst)
WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 64df8ca448b3..a187cdb43e7e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -372,6 +372,8 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev,
static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state,
struct dm_crtc_state *new_state)
{
+ if (new_state->stream->adjust.timing_adjust_pending)
+ return true;
if (new_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED)
return true;
else if (amdgpu_dm_crtc_vrr_active(old_state) != amdgpu_dm_crtc_vrr_active(new_state))
@@ -3467,11 +3469,6 @@ static int dm_resume(struct amdgpu_ip_block *ip_block)
return 0;
}
-
- /* leave display off for S4 sequence */
- if (adev->in_s4)
- return 0;
-
/* Recreate dc_state - DC invalidates it when setting power state to S3. */
dc_state_release(dm_state->context);
dm_state->context = dc_state_create(dm->dc, NULL);
@@ -12763,7 +12760,8 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(
/* The reply is stored in the top nibble of the command. */
payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF;
- if (!payload->write && p_notify->aux_reply.length)
+ /*write req may receive a byte indicating partially written number as well*/
+ if (p_notify->aux_reply.length)
memcpy(payload->data, p_notify->aux_reply.data,
p_notify->aux_reply.length);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 074b79fd5822..5cdbc86ef8f5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -62,6 +62,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
enum aux_return_code_type operation_result;
struct amdgpu_device *adev;
struct ddc_service *ddc;
+ uint8_t copy[16];
if (WARN_ON(msg->size > 16))
return -E2BIG;
@@ -77,6 +78,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
(msg->request & DP_AUX_I2C_WRITE_STATUS_UPDATE) != 0;
payload.defer_delay = 0;
+ if (payload.write) {
+ memcpy(copy, msg->buffer, msg->size);
+ payload.data = copy;
+ }
+
result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload,
&operation_result);
@@ -100,9 +106,9 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
*/
if (payload.write && result >= 0) {
if (result) {
- /*one byte indicating partially written bytes. Force 0 to retry*/
- drm_info(adev_to_drm(adev), "amdgpu: AUX partially written\n");
- result = 0;
+ /*one byte indicating partially written bytes*/
+ drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX partially written\n");
+ result = payload.data[0];
} else if (!payload.reply[0])
/*I2C_ACK|AUX_ACK*/
result = msg->size;
@@ -127,11 +133,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
break;
}
- drm_info(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result);
+ drm_dbg_dp(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result);
}
if (payload.reply[0])
- drm_info(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.",
+ drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.",
payload.reply[0]);
return result;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 28d1353f403d..ba4ce8a63158 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -439,9 +439,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
* Don't adjust DRR while there's bandwidth optimizations pending to
* avoid conflicting with firmware updates.
*/
- if (dc->ctx->dce_version > DCE_VERSION_MAX)
- if (dc->optimized_required || dc->wm_optimized_required)
+ if (dc->ctx->dce_version > DCE_VERSION_MAX) {
+ if (dc->optimized_required || dc->wm_optimized_required) {
+ stream->adjust.timing_adjust_pending = true;
return false;
+ }
+ }
dc_exit_ips_for_hw_access(dc);
@@ -3168,7 +3171,8 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->crtc_timing_adjust) {
if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min ||
- stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max)
+ stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max ||
+ stream->adjust.timing_adjust_pending)
update->crtc_timing_adjust->timing_adjust_pending = true;
stream->adjust = *update->crtc_timing_adjust;
update->crtc_timing_adjust->timing_adjust_pending = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index d9159ca55412..92f0a099d089 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
.dcn_downspread_percent = 0.5,
.gpuvm_min_page_size_bytes = 4096,
.hostvm_min_page_size_bytes = 4096,
- .do_urgent_latency_adjustment = 1,
+ .do_urgent_latency_adjustment = 0,
.urgent_latency_adjustment_fabric_clock_component_us = 0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
};
void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index 0c8ec30ea672..731fbd4bc600 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -910,7 +910,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm
}
//TODO : Could be possibly moved to a common helper layer.
-static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane, unsigned int *plane_id)
+static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id)
{
int i, j;
@@ -918,10 +918,12 @@ static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const str
return false;
for (i = 0; i < context->stream_count; i++) {
- for (j = 0; j < context->stream_status[i].plane_count; j++) {
- if (context->stream_status[i].plane_states[j] == plane) {
- *plane_id = (i << 16) | j;
- return true;
+ if (context->streams[i]->stream_id == stream_id) {
+ for (j = 0; j < context->stream_status[i].plane_count; j++) {
+ if (context->stream_status[i].plane_states[j] == plane) {
+ *plane_id = (i << 16) | j;
+ return true;
+ }
}
}
}
@@ -944,14 +946,14 @@ static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *d
return location;
}
-static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx,
+static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id,
const struct dc_plane_state *plane, const struct dc_state *context)
{
unsigned int plane_id;
int i = 0;
int location = -1;
- if (!dml21_wrapper_get_plane_id(context, plane, &plane_id)) {
+ if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) {
ASSERT(false);
return -1;
}
@@ -1037,7 +1039,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s
dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location;
} else {
for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) {
- disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->stream_status[stream_index].plane_states[plane_index], context);
+ disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context);
if (disp_cfg_plane_location < 0)
disp_cfg_plane_location = dml_dispcfg->num_planes++;
@@ -1048,7 +1050,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s
populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index);
dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location;
- if (dml21_wrapper_get_plane_id(context, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location]))
+ if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location]))
dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true;
/* apply forced pstate policy */
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c
index 1236e0f9a256..712aff7e17f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c
@@ -120,10 +120,11 @@ void dpp401_set_cursor_attributes(
enum dc_cursor_color_format color_format = cursor_attributes->color_format;
int cur_rom_en = 0;
- // DCN4 should always do Cursor degamma for Cursor Color modes
if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA ||
color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) {
- cur_rom_en = 1;
+ if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) {
+ cur_rom_en = 1;
+ }
}
REG_UPDATE_3(CURSOR0_CONTROL,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
index 5489f3d431f6..3af6a3402b89 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
@@ -1980,9 +1980,9 @@ void dcn401_program_pipe(
dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size);
}
- if (pipe_ctx->update_flags.raw ||
- (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) ||
- pipe_ctx->stream->update_flags.raw)
+ if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw ||
+ pipe_ctx->plane_state->update_flags.raw ||
+ pipe_ctx->stream->update_flags.raw))
dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context);
if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable ||
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 268626e73c54..53c961f86d43 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -148,6 +148,7 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init)
void link_set_all_streams_dpms_off_for_link(struct dc_link *link)
{
struct pipe_ctx *pipes[MAX_PIPES];
+ struct dc_stream_state *streams[MAX_PIPES];
struct dc_state *state = link->dc->current_state;
uint8_t count;
int i;
@@ -160,10 +161,18 @@ void link_set_all_streams_dpms_off_for_link(struct dc_link *link)
link_get_master_pipes_with_dpms_on(link, state, &count, pipes);
+ /* The subsequent call to dc_commit_updates_for_stream for a full update
+ * will release the current state and swap to a new state. Releasing the
+ * current state results in the stream pointers in the pipe_ctx structs
+ * to be zero'd. Hence, cache all streams prior to dc_commit_updates_for_stream.
+ */
+ for (i = 0; i < count; i++)
+ streams[i] = pipes[i]->stream;
+
for (i = 0; i < count; i++) {
- stream_update.stream = pipes[i]->stream;
+ stream_update.stream = streams[i];
dc_commit_updates_for_stream(link->ctx->dc, NULL, 0,
- pipes[i]->stream, &stream_update,
+ streams[i], &stream_update,
state);
}
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 922def51685b..d533c79f7e21 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1606,7 +1606,6 @@ static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
long throttling_logging_interval;
- unsigned long flags;
int ret = 0;
ret = kstrtol(buf, 0, &throttling_logging_interval);
@@ -1617,18 +1616,12 @@ static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev,
return -EINVAL;
if (throttling_logging_interval > 0) {
- raw_spin_lock_irqsave(&adev->throttling_logging_rs.lock, flags);
/*
* Reset the ratelimit timer internals.
* This can effectively restart the timer.
*/
- adev->throttling_logging_rs.interval =
- (throttling_logging_interval - 1) * HZ;
- adev->throttling_logging_rs.begin = 0;
- adev->throttling_logging_rs.printed = 0;
- adev->throttling_logging_rs.missed = 0;
- raw_spin_unlock_irqrestore(&adev->throttling_logging_rs.lock, flags);
-
+ ratelimit_state_reset_interval(&adev->throttling_logging_rs,
+ (throttling_logging_interval - 1) * HZ);
atomic_set(&adev->throttling_logging_enabled, 1);
} else {
atomic_set(&adev->throttling_logging_enabled, 0);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 13bc4c290b17..9edb3247c767 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -6596,6 +6596,7 @@ static void drm_reset_display_info(struct drm_connector *connector)
info->has_hdmi_infoframe = false;
info->rgb_quant_range_selectable = false;
memset(&info->hdmi, 0, sizeof(info->hdmi));
+ memset(&connector->hdr_sink_metadata, 0, sizeof(connector->hdr_sink_metadata));
info->edid_hdmi_rgb444_dc_modes = 0;
info->edid_hdmi_ycbcr444_dc_modes = 0;
diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index de424e670995..4b2f32889f00 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -1118,6 +1118,10 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
lockdep_assert_held(&gpusvm->notifier_lock);
if (range->flags.has_dma_mapping) {
+ struct drm_gpusvm_range_flags flags = {
+ .__flags = range->flags.__flags,
+ };
+
for (i = 0, j = 0; i < npages; j++) {
struct drm_pagemap_device_addr *addr = &range->dma_addr[j];
@@ -1131,8 +1135,12 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
dev, *addr);
i += 1 << addr->order;
}
- range->flags.has_devmem_pages = false;
- range->flags.has_dma_mapping = false;
+
+ /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
+ flags.has_devmem_pages = false;
+ flags.has_dma_mapping = false;
+ WRITE_ONCE(range->flags.__flags, flags.__flags);
+
range->dpagemap = NULL;
}
}
@@ -1334,6 +1342,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
int err = 0;
struct dev_pagemap *pagemap;
struct drm_pagemap *dpagemap;
+ struct drm_gpusvm_range_flags flags;
retry:
hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
@@ -1378,7 +1387,8 @@ map_pages:
*/
drm_gpusvm_notifier_lock(gpusvm);
- if (range->flags.unmapped) {
+ flags.__flags = range->flags.__flags;
+ if (flags.unmapped) {
drm_gpusvm_notifier_unlock(gpusvm);
err = -EFAULT;
goto err_free;
@@ -1454,6 +1464,11 @@ map_pages:
goto err_unmap;
}
+ if (ctx->devmem_only) {
+ err = -EFAULT;
+ goto err_unmap;
+ }
+
addr = dma_map_page(gpusvm->drm->dev,
page, 0,
PAGE_SIZE << order,
@@ -1469,14 +1484,17 @@ map_pages:
}
i += 1 << order;
num_dma_mapped = i;
- range->flags.has_dma_mapping = true;
+ flags.has_dma_mapping = true;
}
if (zdd) {
- range->flags.has_devmem_pages = true;
+ flags.has_devmem_pages = true;
range->dpagemap = dpagemap;
}
+ /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
+ WRITE_ONCE(range->flags.__flags, flags.__flags);
+
drm_gpusvm_notifier_unlock(gpusvm);
kvfree(pfns);
set_seqno:
@@ -1765,6 +1783,8 @@ int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm,
goto err_finalize;
/* Upon success bind devmem allocation to range and zdd */
+ devmem_allocation->timeslice_expiration = get_jiffies_64() +
+ msecs_to_jiffies(ctx->timeslice_ms);
zdd->devmem_allocation = devmem_allocation; /* Owns ref */
err_finalize:
@@ -1985,6 +2005,13 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas,
void *buf;
int i, err = 0;
+ if (page) {
+ zdd = page->zone_device_data;
+ if (time_before64(get_jiffies_64(),
+ zdd->devmem_allocation->timeslice_expiration))
+ return 0;
+ }
+
start = ALIGN_DOWN(fault_addr, size);
end = ALIGN(fault_addr + 1, size);
diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c
index 77cfcf37ddd2..feff73cc0005 100644
--- a/drivers/gpu/drm/gud/gud_pipe.c
+++ b/drivers/gpu/drm/gud/gud_pipe.c
@@ -261,7 +261,7 @@ static int gud_usb_bulk(struct gud_device *gdrm, size_t len)
else if (ctx.sgr.bytes != len)
ret = -EIO;
- destroy_timer_on_stack(&ctx.timer);
+ timer_destroy_on_stack(&ctx.timer);
return ret;
}
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index ed05b131ed3a..ab6b89a163e7 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -408,7 +408,7 @@ obj-$(CONFIG_DRM_I915_GVT_KVMGT) += kvmgt.o
#
# Enable locally for CONFIG_DRM_I915_WERROR=y. See also scripts/Makefile.build
ifdef CONFIG_DRM_I915_WERROR
- cmd_checkdoc = $(srctree)/scripts/kernel-doc -none -Werror $<
+ cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none -Werror $<
endif
# header test
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index ae3343c81a64..5e784db9f315 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -305,36 +305,20 @@ void __shmem_writeback(size_t size, struct address_space *mapping)
.range_end = LLONG_MAX,
.for_reclaim = 1,
};
- unsigned long i;
+ struct folio *folio = NULL;
+ int error = 0;
/*
* Leave mmapings intact (GTT will have been revoked on unbinding,
- * leaving only CPU mmapings around) and add those pages to the LRU
+ * leaving only CPU mmapings around) and add those folios to the LRU
* instead of invoking writeback so they are aged and paged out
* as normal.
*/
-
- /* Begin writeback on each dirty page */
- for (i = 0; i < size >> PAGE_SHIFT; i++) {
- struct page *page;
-
- page = find_lock_page(mapping, i);
- if (!page)
- continue;
-
- if (!page_mapped(page) && clear_page_dirty_for_io(page)) {
- int ret;
-
- SetPageReclaim(page);
- ret = mapping->a_ops->writepage(page, &wbc);
- if (!PageWriteback(page))
- ClearPageReclaim(page);
- if (!ret)
- goto put;
- }
- unlock_page(page);
-put:
- put_page(page);
+ while ((folio = writeback_iter(mapping, &wbc, folio, &error))) {
+ if (folio_mapped(folio))
+ folio_redirty_for_writepage(&wbc, folio);
+ else
+ error = shmem_writeout(folio, &wbc);
}
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index 401bee030dbc..32c762eb79ed 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -661,7 +661,7 @@ static int live_emit_pte_full_ring(void *arg)
out_rq:
i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */
timer_delete_sync(&st.timer);
- destroy_timer_on_stack(&st.timer);
+ timer_destroy_on_stack(&st.timer);
out_unpin:
intel_context_unpin(ce);
out_put:
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index de0b413600a1..1658f1246c6f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1666,6 +1666,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
struct i915_perf *perf = stream->perf;
struct intel_gt *gt = stream->engine->gt;
struct i915_perf_group *g = stream->engine->oa_group;
+ int m;
if (WARN_ON(stream != g->exclusive_stream))
return;
@@ -1690,10 +1691,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
free_oa_configs(stream);
free_noa_wait(stream);
- if (perf->spurious_report_rs.missed) {
- gt_notice(gt, "%d spurious OA report notices suppressed due to ratelimiting\n",
- perf->spurious_report_rs.missed);
- }
+ m = ratelimit_state_get_miss(&perf->spurious_report_rs);
+ if (m)
+ gt_notice(gt, "%d spurious OA report notices suppressed due to ratelimiting\n", m);
}
static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
index d5ecc68155da..522ad49406ce 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
@@ -77,7 +77,7 @@ void timed_fence_fini(struct timed_fence *tf)
if (timer_delete_sync(&tf->timer))
i915_sw_fence_commit(&tf->fence);
- destroy_timer_on_stack(&tf->timer);
+ timer_destroy_on_stack(&tf->timer);
i915_sw_fence_fini(&tf->fence);
}
diff --git a/drivers/gpu/drm/i915/selftests/librapl.c b/drivers/gpu/drm/i915/selftests/librapl.c
index eb03b5b28bad..25b8726b9dff 100644
--- a/drivers/gpu/drm/i915/selftests/librapl.c
+++ b/drivers/gpu/drm/i915/selftests/librapl.c
@@ -22,12 +22,12 @@ u64 librapl_energy_uJ(void)
unsigned long long power;
u32 units;
- if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power))
+ if (rdmsrq_safe(MSR_RAPL_POWER_UNIT, &power))
return 0;
units = (power & 0x1f00) >> 8;
- if (rdmsrl_safe(MSR_PP1_ENERGY_STATUS, &power))
+ if (rdmsrq_safe(MSR_PP1_ENERGY_STATUS, &power))
return 0;
return (1000000 * power) >> units; /* convert to uJ */
diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
index 7752d8ac85f0..c08fa93e50a3 100644
--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
@@ -75,7 +75,7 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi,
unsigned long long venc_freq;
unsigned long long hdmi_freq;
- vclk_freq = mode->clock * 1000;
+ vclk_freq = mode->clock * 1000ULL;
/* For 420, pixel clock is half unlike venc clock */
if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24)
@@ -123,7 +123,7 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri
struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge);
struct meson_drm *priv = encoder_hdmi->priv;
bool is_hdmi2_sink = display_info->hdmi.scdc.supported;
- unsigned long long clock = mode->clock * 1000;
+ unsigned long long clock = mode->clock * 1000ULL;
unsigned long long phy_freq;
unsigned long long vclk_freq;
unsigned long long venc_freq;
diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index dcb49fd30402..9d006ee88a8a 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -150,7 +150,7 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss)
dev = msm_mdss->dev;
- domain = irq_domain_add_linear(dev->of_node, 32,
+ domain = irq_domain_create_linear(of_fwnode_handle(dev->of_node), 32,
&msm_mdss_irqdomain_ops, msm_mdss);
if (!domain) {
dev_err(dev, "failed to add irq_domain\n");
diff --git a/drivers/gpu/drm/tiny/panel-mipi-dbi.c b/drivers/gpu/drm/tiny/panel-mipi-dbi.c
index 0460ecaef4bd..23914a9f7fd3 100644
--- a/drivers/gpu/drm/tiny/panel-mipi-dbi.c
+++ b/drivers/gpu/drm/tiny/panel-mipi-dbi.c
@@ -390,7 +390,10 @@ static int panel_mipi_dbi_spi_probe(struct spi_device *spi)
spi_set_drvdata(spi, drm);
- drm_client_setup(drm, NULL);
+ if (bpp == 16)
+ drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB565);
+ else
+ drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB888);
return 0;
}
diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
index f8f20d2f6174..f24866823d95 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
@@ -201,7 +201,7 @@ static int threaded_ttm_bo_reserve(void *arg)
err = ttm_bo_reserve(bo, interruptible, no_wait, &ctx);
timer_delete_sync(&s_timer.timer);
- destroy_timer_on_stack(&s_timer.timer);
+ timer_destroy_on_stack(&s_timer.timer);
ww_acquire_fini(&ctx);
diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c
index 9e2d72c447ee..ffaab68bd5dd 100644
--- a/drivers/gpu/drm/ttm/ttm_backup.c
+++ b/drivers/gpu/drm/ttm/ttm_backup.c
@@ -120,13 +120,13 @@ ttm_backup_backup_page(struct file *backup, struct page *page,
.for_reclaim = 1,
};
folio_set_reclaim(to_folio);
- ret = mapping->a_ops->writepage(folio_file_page(to_folio, idx), &wbc);
+ ret = shmem_writeout(to_folio, &wbc);
if (!folio_test_writeback(to_folio))
folio_clear_reclaim(to_folio);
/*
- * If writepage succeeds, it unlocks the folio.
- * writepage() errors are otherwise dropped, since writepage()
- * is only best effort here.
+ * If writeout succeeds, it unlocks the folio. errors
+ * are otherwise dropped, since writeout is only best
+ * effort here.
*/
if (ret)
folio_unlock(to_folio);
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index 167fb0f742de..5a47991b4b81 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -47,6 +47,10 @@
#define MI_LRI_FORCE_POSTED REG_BIT(12)
#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
+#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4))
+#define MI_SRM_USE_GGTT REG_BIT(22)
+#define MI_SRM_ADD_CS_OFFSET REG_BIT(19)
+
#define MI_FLUSH_DW __MI_INSTR(0x26)
#define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22)
#define MI_FLUSH_DW_STORE_INDEX REG_BIT(21)
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index fb8ec317b6ee..891f928d80ce 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -43,6 +43,10 @@
#define XEHPC_BCS8_RING_BASE 0x3ee000
#define GSCCS_RING_BASE 0x11a000
+#define ENGINE_ID(base) XE_REG((base) + 0x8c)
+#define ENGINE_INSTANCE_ID REG_GENMASK(9, 4)
+#define ENGINE_CLASS_ID REG_GENMASK(2, 0)
+
#define RING_TAIL(base) XE_REG((base) + 0x30)
#define TAIL_ADDR REG_GENMASK(20, 3)
@@ -154,6 +158,7 @@
#define STOP_RING REG_BIT(8)
#define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8)
+#define RING_CTX_TIMESTAMP_UDW(base) XE_REG((base) + 0x3ac)
#define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc)
#define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index da1f198ac107..181913967ac9 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -157,6 +157,7 @@
#define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108)
#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED)
+#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12)
#define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6)
#define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 57944f90bbf6..994af591a2e8 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -11,7 +11,9 @@
#define CTX_RING_TAIL (0x06 + 1)
#define CTX_RING_START (0x08 + 1)
#define CTX_RING_CTL (0x0a + 1)
+#define CTX_BB_PER_CTX_PTR (0x12 + 1)
#define CTX_TIMESTAMP (0x22 + 1)
+#define CTX_TIMESTAMP_UDW (0x24 + 1)
#define CTX_INDIRECT_RING_STATE (0x26 + 1)
#define CTX_PDP0_UDW (0x30 + 1)
#define CTX_PDP0_LDW (0x32 + 1)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 9f8667ebba85..0482f26aa480 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -330,6 +330,8 @@ struct xe_device {
u8 has_sriov:1;
/** @info.has_usm: Device has unified shared memory support */
u8 has_usm:1;
+ /** @info.has_64bit_timestamp: Device supports 64-bit timestamps */
+ u8 has_64bit_timestamp:1;
/** @info.is_dgfx: is discrete device */
u8 is_dgfx:1;
/**
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 606922d9dd73..cd9b1c32f30f 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -830,7 +830,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{
struct xe_device *xe = gt_to_xe(q->gt);
struct xe_lrc *lrc;
- u32 old_ts, new_ts;
+ u64 old_ts, new_ts;
int idx;
/*
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 31bc2022bfc2..769781d577df 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -941,7 +941,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
return xe_sched_invalidate_job(job, 2);
}
- ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
+ ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0]));
ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
/*
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index df3ceddede07..03bfba696b37 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -24,6 +24,7 @@
#include "xe_hw_fence.h"
#include "xe_map.h"
#include "xe_memirq.h"
+#include "xe_mmio.h"
#include "xe_sriov.h"
#include "xe_trace_lrc.h"
#include "xe_vm.h"
@@ -650,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_PARALLEL_PPHWSP_OFFSET 2048
+#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096
#define LRC_PPHWSP_SIZE SZ_4K
u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
@@ -684,7 +686,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
{
- /* The start seqno is stored in the driver-defined portion of PPHWSP */
+ /* This is stored in the driver-defined portion of PPHWSP */
return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
}
@@ -694,11 +696,21 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
}
+static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc)
+{
+ return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET;
+}
+
static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
{
return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
}
+static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
+{
+ return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32);
+}
+
static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
{
/* Indirect ring state page is at the very end of LRC */
@@ -726,8 +738,10 @@ DECL_MAP_ADDR_HELPERS(regs)
DECL_MAP_ADDR_HELPERS(start_seqno)
DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
DECL_MAP_ADDR_HELPERS(ctx_timestamp)
+DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw)
DECL_MAP_ADDR_HELPERS(parallel)
DECL_MAP_ADDR_HELPERS(indirect_ring)
+DECL_MAP_ADDR_HELPERS(engine_id)
#undef DECL_MAP_ADDR_HELPERS
@@ -743,18 +757,37 @@ u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
}
/**
+ * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address
+ * @lrc: Pointer to the lrc.
+ *
+ * Returns: ctx timestamp udw GGTT address
+ */
+u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc)
+{
+ return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
+}
+
+/**
* xe_lrc_ctx_timestamp() - Read ctx timestamp value
* @lrc: Pointer to the lrc.
*
* Returns: ctx timestamp value
*/
-u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
+u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
+ u32 ldw, udw = 0;
map = __xe_lrc_ctx_timestamp_map(lrc);
- return xe_map_read32(xe, &map);
+ ldw = xe_map_read32(xe, &map);
+
+ if (xe->info.has_64bit_timestamp) {
+ map = __xe_lrc_ctx_timestamp_udw_map(lrc);
+ udw = xe_map_read32(xe, &map);
+ }
+
+ return (u64)udw << 32 | ldw;
}
/**
@@ -864,7 +897,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
{
- u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
+ u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
@@ -877,6 +910,65 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
xe_bo_unpin(lrc->bo);
xe_bo_unlock(lrc->bo);
xe_bo_put(lrc->bo);
+ xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
+}
+
+/*
+ * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active
+ * context run ticks.
+ * @lrc: Pointer to the lrc.
+ *
+ * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
+ * context, but only gets updated when the context switches out. In order to
+ * check how long a context has been active before it switches out, two things
+ * are required:
+ *
+ * (1) Determine if the context is running:
+ * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in
+ * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is
+ * initialized. During a query, we just check for this value to determine if the
+ * context is active. If the context switched out, it would overwrite this
+ * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as
+ * the last part of context restore, so reusing this LRC location will not
+ * clobber anything.
+ *
+ * (2) Calculate the time that the context has been active for:
+ * The CTX_TIMESTAMP ticks only when the context is active. If a context is
+ * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization.
+ * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific
+ * engine instance. Since we do not know which instance the context is running
+ * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and
+ * store it in the PPHSWP.
+ */
+#define CONTEXT_ACTIVE 1ULL
+static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
+{
+ u32 *cmd;
+
+ cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
+
+ *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
+ *cmd++ = ENGINE_ID(0).addr;
+ *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc);
+ *cmd++ = 0;
+
+ *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
+ *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
+ *cmd++ = 0;
+ *cmd++ = lower_32_bits(CONTEXT_ACTIVE);
+
+ if (lrc_to_xe(lrc)->info.has_64bit_timestamp) {
+ *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
+ *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
+ *cmd++ = 0;
+ *cmd++ = upper_32_bits(CONTEXT_ACTIVE);
+ }
+
+ *cmd++ = MI_BATCH_BUFFER_END;
+
+ xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
+ xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
+
}
#define PVC_CTX_ASID (0x2e + 1)
@@ -893,31 +985,40 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
void *init_data = NULL;
u32 arb_enable;
u32 lrc_size;
+ u32 bo_flags;
int err;
kref_init(&lrc->refcount);
+ lrc->gt = gt;
lrc->flags = 0;
lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
if (xe_gt_has_indirect_ring_state(gt))
lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
+ bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE;
+
/*
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls.
*/
lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ bo_flags);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
+ lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
+ ttm_bo_type_kernel,
+ bo_flags);
+ if (IS_ERR(lrc->bb_per_ctx_bo)) {
+ err = PTR_ERR(lrc->bb_per_ctx_bo);
+ goto err_lrc_finish;
+ }
+
lrc->size = lrc_size;
- lrc->tile = gt_to_tile(hwe->gt);
lrc->ring.size = ring_size;
lrc->ring.tail = 0;
- lrc->ctx_timestamp = 0;
xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
hwe->fence_irq, hwe->name);
@@ -990,7 +1091,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
_MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE));
+ lrc->ctx_timestamp = 0;
xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
+ if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
+ xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
if (xe->info.has_asid && vm)
xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
@@ -1019,6 +1123,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
map = __xe_lrc_start_seqno_map(lrc);
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
+ xe_lrc_setup_utilization(lrc);
+
return 0;
err_lrc_finish:
@@ -1238,6 +1344,21 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
return __xe_lrc_parallel_map(lrc);
}
+/**
+ * xe_lrc_engine_id() - Read engine id value
+ * @lrc: Pointer to the lrc.
+ *
+ * Returns: context id value
+ */
+static u32 xe_lrc_engine_id(struct xe_lrc *lrc)
+{
+ struct xe_device *xe = lrc_to_xe(lrc);
+ struct iosys_map map;
+
+ map = __xe_lrc_engine_id_map(lrc);
+ return xe_map_read32(xe, &map);
+}
+
static int instr_dw(u32 cmd_header)
{
/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
@@ -1684,7 +1805,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
snapshot->lrc_snapshot = NULL;
- snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
+ snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
return snapshot;
}
@@ -1784,22 +1905,74 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
kfree(snapshot);
}
+static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
+{
+ u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
+ u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
+ struct xe_hw_engine *hwe;
+ u64 val;
+
+ hwe = xe_gt_hw_engine(lrc->gt, class, instance, false);
+ if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe),
+ "Unexpected engine class:instance %d:%d for context utilization\n",
+ class, instance))
+ return -1;
+
+ if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
+ val = xe_mmio_read64_2x32(&hwe->gt->mmio,
+ RING_CTX_TIMESTAMP(hwe->mmio_base));
+ else
+ val = xe_mmio_read32(&hwe->gt->mmio,
+ RING_CTX_TIMESTAMP(hwe->mmio_base));
+
+ *reg_ctx_ts = val;
+
+ return 0;
+}
+
/**
* xe_lrc_update_timestamp() - Update ctx timestamp
* @lrc: Pointer to the lrc.
* @old_ts: Old timestamp value
*
* Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
- * update saved value.
+ * update saved value. With support for active contexts, the calculation may be
+ * slightly racy, so follow a read-again logic to ensure that the context is
+ * still active before returning the right timestamp.
*
* Returns: New ctx timestamp value
*/
-u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
+u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts)
{
+ u64 lrc_ts, reg_ts;
+ u32 engine_id;
+
*old_ts = lrc->ctx_timestamp;
- lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
+ lrc_ts = xe_lrc_ctx_timestamp(lrc);
+ /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
+ if (IS_SRIOV_VF(lrc_to_xe(lrc))) {
+ lrc->ctx_timestamp = lrc_ts;
+ goto done;
+ }
+
+ if (lrc_ts == CONTEXT_ACTIVE) {
+ engine_id = xe_lrc_engine_id(lrc);
+ if (!get_ctx_timestamp(lrc, engine_id, &reg_ts))
+ lrc->ctx_timestamp = reg_ts;
+
+ /* read lrc again to ensure context is still active */
+ lrc_ts = xe_lrc_ctx_timestamp(lrc);
+ }
+
+ /*
+ * If context switched out, just use the lrc_ts. Note that this needs to
+ * be a separate if condition.
+ */
+ if (lrc_ts != CONTEXT_ACTIVE)
+ lrc->ctx_timestamp = lrc_ts;
+done:
trace_xe_lrc_update_timestamp(lrc, *old_ts);
return lrc->ctx_timestamp;
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 0b40f349ab95..eb6e8de8c939 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -120,7 +120,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot);
u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc);
-u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
+u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc);
+u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
@@ -136,6 +137,6 @@ u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
*
* Returns the current LRC timestamp
*/
-u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts);
+u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts);
#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index 71ecb453f811..ae24cf6f8dd9 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -25,8 +25,8 @@ struct xe_lrc {
/** @size: size of lrc including any indirect ring state page */
u32 size;
- /** @tile: tile which this LRC belongs to */
- struct xe_tile *tile;
+ /** @gt: gt which this LRC belongs to */
+ struct xe_gt *gt;
/** @flags: LRC flags */
#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1
@@ -52,7 +52,10 @@ struct xe_lrc {
struct xe_hw_fence_ctx fence_ctx;
/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
- u32 ctx_timestamp;
+ u64 ctx_timestamp;
+
+ /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
+ struct xe_bo *bb_per_ctx_bo;
};
struct xe_lrc_snapshot;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 70a36e777546..46301f341773 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -75,12 +75,12 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
* is fine as it's going to the root tile's mmio, that's
* guaranteed to be initialized earlier in xe_mmio_probe_early()
*/
- mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR);
+ mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR);
tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
if (tile_count < xe->info.tile_count) {
drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
- xe->info.tile_count, tile_count);
+ xe->info.tile_count, tile_count);
xe->info.tile_count = tile_count;
/*
@@ -128,7 +128,7 @@ int xe_mmio_probe_early(struct xe_device *xe)
*/
xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR);
xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0);
- if (xe->mmio.regs == NULL) {
+ if (!xe->mmio.regs) {
drm_err(&xe->drm, "failed to map registers\n");
return -EIO;
}
@@ -309,8 +309,8 @@ u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg)
return (u64)udw << 32 | ldw;
}
-static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
- u32 *out_val, bool atomic, bool expect_match)
+static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val,
+ u32 timeout_us, u32 *out_val, bool atomic, bool expect_match)
{
ktime_t cur = ktime_get_raw();
const ktime_t end = ktime_add_us(cur, timeout_us);
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 31dade91a089..0c737413fcb6 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -775,22 +775,23 @@ void xe_mocs_init(struct xe_gt *gt)
void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
+ enum xe_force_wake_domains domain;
struct xe_mocs_info table;
unsigned int fw_ref, flags;
flags = get_mocs_settings(xe, &table);
+ domain = flags & HAS_LNCF_MOCS ? XE_FORCEWAKE_ALL : XE_FW_GT;
xe_pm_runtime_get_noresume(xe);
- fw_ref = xe_force_wake_get(gt_to_fw(gt),
- flags & HAS_LNCF_MOCS ?
- XE_FORCEWAKE_ALL : XE_FW_GT);
- if (!fw_ref)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), domain);
+
+ if (!xe_force_wake_ref_has_domain(fw_ref, domain))
goto err_fw;
table.ops->dump(&table, flags, gt, p);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
err_fw:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
}
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 9f4632e39a1a..e861c694f336 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -29,9 +29,6 @@ struct xe_modparam xe_modparam = {
module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600);
MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2");
-module_param_named(always_migrate_to_vram, xe_modparam.always_migrate_to_vram, bool, 0444);
-MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on GPU fault");
-
module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444);
MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 84339e509c80..5a3bfea8b7b4 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -12,7 +12,6 @@
struct xe_modparam {
bool force_execlist;
bool probe_display;
- bool always_migrate_to_vram;
u32 force_vram_bar_size;
int guc_log_level;
char *guc_firmware_path;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 818f023166d5..f4d108dc49b1 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -140,6 +140,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
.has_indirect_ring_state = 1, \
.has_range_tlb_invalidation = 1, \
.has_usm = 1, \
+ .has_64bit_timestamp = 1, \
.va_bits = 48, \
.vm_max_level = 4, \
.hw_engine_mask = \
@@ -668,6 +669,7 @@ static int xe_info_init(struct xe_device *xe,
xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
xe->info.has_usm = graphics_desc->has_usm;
+ xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp;
for_each_remote_tile(tile, xe, id) {
int err;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index e9b9bbc138d3..ca6b10d35573 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -21,6 +21,7 @@ struct xe_graphics_desc {
u8 has_indirect_ring_state:1;
u8 has_range_tlb_invalidation:1;
u8 has_usm:1;
+ u8 has_64bit_timestamp:1;
};
struct xe_media_desc {
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 7b6b754ad6eb..cb7fbf74138e 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -641,7 +641,7 @@ static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
return dev->power.runtime_status == RPM_SUSPENDING ||
dev->power.runtime_status == RPM_RESUMING ||
- pm_suspend_target_state != PM_SUSPEND_ON;
+ pm_suspend_in_progress();
#else
return false;
#endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index ffaf0d02dc7d..856038553b81 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -2232,11 +2232,19 @@ static void op_commit(struct xe_vm *vm,
}
case DRM_GPUVA_OP_DRIVER:
{
+ /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */
+
if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
- op->map_range.range->tile_present |= BIT(tile->id);
- op->map_range.range->tile_invalidated &= ~BIT(tile->id);
+ WRITE_ONCE(op->map_range.range->tile_present,
+ op->map_range.range->tile_present |
+ BIT(tile->id));
+ WRITE_ONCE(op->map_range.range->tile_invalidated,
+ op->map_range.range->tile_invalidated &
+ ~BIT(tile->id));
} else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) {
- op->unmap_range.range->tile_present &= ~BIT(tile->id);
+ WRITE_ONCE(op->unmap_range.range->tile_present,
+ op->unmap_range.range->tile_present &
+ ~BIT(tile->id));
}
break;
}
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index a7582b097ae6..bc1689db4cd7 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -234,13 +234,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job)
static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
{
- dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT |
- MI_COPY_MEM_MEM_DST_GGTT;
+ dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
+ dw[i++] = RING_CTX_TIMESTAMP(0).addr;
dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
dw[i++] = 0;
- dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc);
- dw[i++] = 0;
- dw[i++] = MI_NOOP;
return i;
}
diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
index 8184390f9c7b..86d47aaf0358 100644
--- a/drivers/gpu/drm/xe/xe_shrinker.c
+++ b/drivers/gpu/drm/xe/xe_shrinker.c
@@ -227,7 +227,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe)
if (!shrinker)
return ERR_PTR(-ENOMEM);
- shrinker->shrink = shrinker_alloc(0, "xe system shrinker");
+ shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique);
if (!shrinker->shrink) {
kfree(shrinker);
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 24c578e1170e..975094c1a582 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -15,8 +15,17 @@
static bool xe_svm_range_in_vram(struct xe_svm_range *range)
{
- /* Not reliable without notifier lock */
- return range->base.flags.has_devmem_pages;
+ /*
+ * Advisory only check whether the range is currently backed by VRAM
+ * memory.
+ */
+
+ struct drm_gpusvm_range_flags flags = {
+ /* Pairs with WRITE_ONCE in drm_gpusvm.c */
+ .__flags = READ_ONCE(range->base.flags.__flags),
+ };
+
+ return flags.has_devmem_pages;
}
static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range)
@@ -645,9 +654,16 @@ void xe_svm_fini(struct xe_vm *vm)
}
static bool xe_svm_range_is_valid(struct xe_svm_range *range,
- struct xe_tile *tile)
+ struct xe_tile *tile,
+ bool devmem_only)
{
- return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id);
+ /*
+ * Advisory only check whether the range currently has a valid mapping,
+ * READ_ONCE pairs with WRITE_ONCE in xe_pt.c
+ */
+ return ((READ_ONCE(range->tile_present) &
+ ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) &&
+ (!devmem_only || xe_svm_range_in_vram(range));
}
static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
@@ -712,6 +728,36 @@ unlock:
return err;
}
+static bool supports_4K_migration(struct xe_device *xe)
+{
+ if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+ return false;
+
+ return true;
+}
+
+static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
+ struct xe_vma *vma)
+{
+ struct xe_vm *vm = range_to_vm(&range->base);
+ u64 range_size = xe_svm_range_size(range);
+
+ if (!range->base.flags.migrate_devmem)
+ return false;
+
+ if (xe_svm_range_in_vram(range)) {
+ drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
+ return false;
+ }
+
+ if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) {
+ drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
+ return false;
+ }
+
+ return true;
+}
+
/**
* xe_svm_handle_pagefault() - SVM handle page fault
* @vm: The VM.
@@ -735,11 +781,16 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
.check_pages_threshold = IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
+ .devmem_only = atomic && IS_DGFX(vm->xe) &&
+ IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
+ .timeslice_ms = atomic && IS_DGFX(vm->xe) &&
+ IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0,
};
struct xe_svm_range *range;
struct drm_gpusvm_range *r;
struct drm_exec exec;
struct dma_fence *fence;
+ int migrate_try_count = ctx.devmem_only ? 3 : 1;
ktime_t end = 0;
int err;
@@ -758,24 +809,31 @@ retry:
if (IS_ERR(r))
return PTR_ERR(r);
+ if (ctx.devmem_only && !r->flags.migrate_devmem)
+ return -EACCES;
+
range = to_xe_range(r);
- if (xe_svm_range_is_valid(range, tile))
+ if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
return 0;
range_debug(range, "PAGE FAULT");
- /* XXX: Add migration policy, for now migrate range once */
- if (!range->skip_migrate && range->base.flags.migrate_devmem &&
- xe_svm_range_size(range) >= SZ_64K) {
- range->skip_migrate = true;
-
+ if (--migrate_try_count >= 0 &&
+ xe_svm_range_needs_migrate_to_vram(range, vma)) {
err = xe_svm_alloc_vram(vm, tile, range, &ctx);
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (err) {
- drm_dbg(&vm->xe->drm,
- "VRAM allocation failed, falling back to "
- "retrying fault, asid=%u, errno=%pe\n",
- vm->usm.asid, ERR_PTR(err));
- goto retry;
+ if (migrate_try_count || !ctx.devmem_only) {
+ drm_dbg(&vm->xe->drm,
+ "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
+ vm->usm.asid, ERR_PTR(err));
+ goto retry;
+ } else {
+ drm_err(&vm->xe->drm,
+ "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
+ vm->usm.asid, ERR_PTR(err));
+ return err;
+ }
}
}
@@ -783,15 +841,23 @@ retry:
err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
/* Corner where CPU mappings have changed */
if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
- if (err == -EOPNOTSUPP) {
- range_debug(range, "PAGE FAULT - EVICT PAGES");
- drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
+ if (migrate_try_count > 0 || !ctx.devmem_only) {
+ if (err == -EOPNOTSUPP) {
+ range_debug(range, "PAGE FAULT - EVICT PAGES");
+ drm_gpusvm_range_evict(&vm->svm.gpusvm,
+ &range->base);
+ }
+ drm_dbg(&vm->xe->drm,
+ "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
+ vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
+ range_debug(range, "PAGE FAULT - RETRY PAGES");
+ goto retry;
+ } else {
+ drm_err(&vm->xe->drm,
+ "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n",
+ vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
}
- drm_dbg(&vm->xe->drm,
- "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
- vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
- range_debug(range, "PAGE FAULT - RETRY PAGES");
- goto retry;
}
if (err) {
range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
@@ -815,6 +881,7 @@ retry_bind:
drm_exec_fini(&exec);
err = PTR_ERR(fence);
if (err == -EAGAIN) {
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
range_debug(range, "PAGE FAULT - RETRY BIND");
goto retry;
}
@@ -825,9 +892,6 @@ retry_bind:
}
drm_exec_fini(&exec);
- if (xe_modparam.always_migrate_to_vram)
- range->skip_migrate = false;
-
dma_fence_wait(fence, false);
dma_fence_put(fence);
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index be306fe7aaa4..fe58ac2f4baa 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -36,11 +36,6 @@ struct xe_svm_range {
* range. Protected by GPU SVM notifier lock.
*/
u8 tile_invalidated;
- /**
- * @skip_migrate: Skip migration to VRAM, protected by GPU fault handler
- * locking.
- */
- u8 skip_migrate :1;
};
#if IS_ENABLED(CONFIG_DRM_GPUSVM)
diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h
index 5c669a0b2180..d525cbee1e34 100644
--- a/drivers/gpu/drm/xe/xe_trace_lrc.h
+++ b/drivers/gpu/drm/xe/xe_trace_lrc.h
@@ -19,12 +19,12 @@
#define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev)
TRACE_EVENT(xe_lrc_update_timestamp,
- TP_PROTO(struct xe_lrc *lrc, uint32_t old),
+ TP_PROTO(struct xe_lrc *lrc, uint64_t old),
TP_ARGS(lrc, old),
TP_STRUCT__entry(
__field(struct xe_lrc *, lrc)
- __field(u32, old)
- __field(u32, new)
+ __field(u64, old)
+ __field(u64, new)
__string(name, lrc->fence_ctx.name)
__string(device_id, __dev_name_lrc(lrc))
),
@@ -36,7 +36,7 @@ TRACE_EVENT(xe_lrc_update_timestamp,
__assign_str(name);
__assign_str(device_id);
),
- TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s",
+ TP_printk("lrc=:%p lrc->name=%s old=%llu new=%llu device_id:%s",
__entry->lrc, __get_str(name),
__entry->old, __entry->new,
__get_str(device_id))
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 24f644c0a673..2f833f0d575f 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -815,6 +815,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
},
+ { XE_RTP_NAME("22021007897"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
+ },
/* Xe3_LPG */
{ XE_RTP_NAME("14021490052"),
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index fa77e4e64f12..333f36e0a715 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -1008,7 +1008,7 @@ int ipu_map_irq(struct ipu_soc *ipu, int irq)
{
int virq;
- virq = irq_linear_revmap(ipu->domain, irq);
+ virq = irq_find_mapping(ipu->domain, irq);
if (!virq)
virq = irq_create_mapping(ipu->domain, irq);
@@ -1169,8 +1169,8 @@ static int ipu_irq_init(struct ipu_soc *ipu)
};
int ret, i;
- ipu->domain = irq_domain_add_linear(ipu->dev->of_node, IPU_NUM_IRQS,
- &irq_generic_chip_ops, ipu);
+ ipu->domain = irq_domain_create_linear(of_fwnode_handle(ipu->dev->of_node), IPU_NUM_IRQS,
+ &irq_generic_chip_ops, ipu);
if (!ipu->domain) {
dev_err(ipu->dev, "failed to add irq domain\n");
return -ENODEV;
@@ -1219,7 +1219,7 @@ static void ipu_irq_exit(struct ipu_soc *ipu)
/* TODO: remove irq_domain_generic_chips */
for (i = 0; i < IPU_NUM_IRQS; i++) {
- irq = irq_linear_revmap(ipu->domain, i);
+ irq = irq_find_mapping(ipu->domain, i);
if (irq)
irq_dispose_mapping(irq);
}
diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
index 25f0ebfcbd5f..0a9b44ce4904 100644
--- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
+++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
@@ -83,6 +83,9 @@ static int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata)
case ALS_IDX:
privdata->dev_en.is_als_present = false;
break;
+ case SRA_IDX:
+ privdata->dev_en.is_sra_present = false;
+ break;
}
if (cl_data->sensor_sts[i] == SENSOR_ENABLED) {
@@ -134,9 +137,6 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata)
for (i = 0; i < cl_data->num_hid_devices; i++) {
cl_data->sensor_sts[i] = SENSOR_DISABLED;
- if (cl_data->num_hid_devices == 1 && cl_data->sensor_idx[0] == SRA_IDX)
- break;
-
if (cl_data->sensor_idx[i] == SRA_IDX) {
info.sensor_idx = cl_data->sensor_idx[i];
writel(0, privdata->mmio + amd_get_p2c_val(privdata, 0));
@@ -145,8 +145,10 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata)
(privdata, cl_data->sensor_idx[i], ENABLE_SENSOR);
cl_data->sensor_sts[i] = (status == 0) ? SENSOR_ENABLED : SENSOR_DISABLED;
- if (cl_data->sensor_sts[i] == SENSOR_ENABLED)
+ if (cl_data->sensor_sts[i] == SENSOR_ENABLED) {
+ cl_data->is_any_sensor_enabled = true;
privdata->dev_en.is_sra_present = true;
+ }
continue;
}
@@ -238,6 +240,8 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata)
cleanup:
amd_sfh_hid_client_deinit(privdata);
for (i = 0; i < cl_data->num_hid_devices; i++) {
+ if (cl_data->sensor_idx[i] == SRA_IDX)
+ continue;
devm_kfree(dev, cl_data->feature_report[i]);
devm_kfree(dev, in_data->input_report[i]);
devm_kfree(dev, cl_data->report_descr[i]);
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c
index 2e96ec6a3073..9a06f9b0e4ef 100644
--- a/drivers/hid/bpf/hid_bpf_dispatch.c
+++ b/drivers/hid/bpf/hid_bpf_dispatch.c
@@ -38,6 +38,9 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type
struct hid_bpf_ops *e;
int ret;
+ if (unlikely(hdev->bpf.destroyed))
+ return ERR_PTR(-ENODEV);
+
if (type >= HID_REPORT_TYPES)
return ERR_PTR(-EINVAL);
@@ -93,6 +96,9 @@ int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
struct hid_bpf_ops *e;
int ret, idx;
+ if (unlikely(hdev->bpf.destroyed))
+ return -ENODEV;
+
if (rtype >= HID_REPORT_TYPES)
return -EINVAL;
@@ -130,6 +136,9 @@ int dispatch_hid_bpf_output_report(struct hid_device *hdev,
struct hid_bpf_ops *e;
int ret, idx;
+ if (unlikely(hdev->bpf.destroyed))
+ return -ENODEV;
+
idx = srcu_read_lock(&hdev->bpf.srcu);
list_for_each_entry_srcu(e, &hdev->bpf.prog_list, list,
srcu_read_lock_held(&hdev->bpf.srcu)) {
diff --git a/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c b/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c
index 1a0aeea6a081..a754710fc90b 100644
--- a/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c
+++ b/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c
@@ -157,6 +157,7 @@ static const __u8 fixed_rdesc_vendor[] = {
ReportCount(5) // padding
Input(Const)
// Byte 4 in report - just exists so we get to be a tablet pad
+ UsagePage_Digitizers
Usage_Dig_BarrelSwitch // BTN_STYLUS
ReportCount(1)
ReportSize(1)
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 288a2b864cc4..1062731315a2 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -41,6 +41,10 @@
#define USB_VENDOR_ID_ACTIONSTAR 0x2101
#define USB_DEVICE_ID_ACTIONSTAR_1011 0x1011
+#define USB_VENDOR_ID_ADATA_XPG 0x125f
+#define USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE 0x7505
+#define USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE_DONGLE 0x7506
+
#define USB_VENDOR_ID_ADS_TECH 0x06e1
#define USB_DEVICE_ID_ADS_TECH_RADIO_SI470X 0xa155
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 646171598e41..0731473cc9b1 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -27,6 +27,8 @@
static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_GAMEPAD), HID_QUIRK_BADPAD },
{ HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_PREDATOR), HID_QUIRK_BADPAD },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ADATA_XPG, USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ADATA_XPG, USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE_DONGLE), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_AFATECH, USB_DEVICE_ID_AFATECH_AF9016), HID_QUIRK_FULLSPEED_INTERVAL },
{ HID_USB_DEVICE(USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS), HID_QUIRK_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_AKAI_09E8, USB_DEVICE_ID_AKAI_09E8_MIDIMIX), HID_QUIRK_NO_INIT_REPORTS },
diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
index dfd9d22ed559..949d307c66a8 100644
--- a/drivers/hid/hid-steam.c
+++ b/drivers/hid/hid-steam.c
@@ -1150,11 +1150,9 @@ static void steam_client_ll_close(struct hid_device *hdev)
struct steam_device *steam = hdev->driver_data;
unsigned long flags;
- bool connected;
spin_lock_irqsave(&steam->lock, flags);
steam->client_opened--;
- connected = steam->connected && !steam->client_opened;
spin_unlock_irqrestore(&steam->lock, flags);
schedule_work(&steam->unregister_work);
diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c
index 3b81468a1df2..0bf70664c35e 100644
--- a/drivers/hid/hid-thrustmaster.c
+++ b/drivers/hid/hid-thrustmaster.c
@@ -174,6 +174,7 @@ static void thrustmaster_interrupts(struct hid_device *hdev)
u8 ep_addr[2] = {b_ep, 0};
if (!usb_check_int_endpoints(usbif, ep_addr)) {
+ kfree(send_buf);
hid_err(hdev, "Unexpected non-int endpoint\n");
return;
}
diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c
index a367df6ea01f..61a4019ddc74 100644
--- a/drivers/hid/hid-uclogic-core.c
+++ b/drivers/hid/hid-uclogic-core.c
@@ -142,11 +142,12 @@ static int uclogic_input_configured(struct hid_device *hdev,
suffix = "System Control";
break;
}
- }
-
- if (suffix)
+ } else {
hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
"%s %s", hdev->name, suffix);
+ if (!hi->input->name)
+ return -ENOMEM;
+ }
return 0;
}
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 1556d4287fa5..eaf099b2efdb 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -70,10 +70,16 @@ static void wacom_wac_queue_flush(struct hid_device *hdev,
{
while (!kfifo_is_empty(fifo)) {
int size = kfifo_peek_len(fifo);
- u8 *buf = kzalloc(size, GFP_KERNEL);
+ u8 *buf;
unsigned int count;
int err;
+ buf = kzalloc(size, GFP_KERNEL);
+ if (!buf) {
+ kfifo_skip(fifo);
+ continue;
+ }
+
count = kfifo_out(fifo, buf, size);
if (count != size) {
// Hard to say what is the "right" action in this
@@ -81,6 +87,7 @@ static void wacom_wac_queue_flush(struct hid_device *hdev,
// to flush seems reasonable enough, however.
hid_warn(hdev, "%s: removed fifo entry with unexpected size\n",
__func__);
+ kfree(buf);
continue;
}
err = hid_report_raw_event(hdev, HID_INPUT_REPORT, buf, size, false);
@@ -2361,6 +2368,8 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
unsigned int connect_mask = HID_CONNECT_HIDRAW;
features->pktlen = wacom_compute_pktlen(hdev);
+ if (!features->pktlen)
+ return -ENODEV;
if (!devres_open_group(&hdev->dev, wacom, GFP_KERNEL))
return -ENOMEM;
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index fb8cd8469328..35f26fa1ffe7 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -1077,68 +1077,10 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
EXPORT_SYMBOL(vmbus_sendpacket);
/*
- * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
- * packets using a GPADL Direct packet type. This interface allows you
- * to control notifying the host. This will be useful for sending
- * batched data. Also the sender can control the send flags
- * explicitly.
- */
-int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
- struct hv_page_buffer pagebuffers[],
- u32 pagecount, void *buffer, u32 bufferlen,
- u64 requestid)
-{
- int i;
- struct vmbus_channel_packet_page_buffer desc;
- u32 descsize;
- u32 packetlen;
- u32 packetlen_aligned;
- struct kvec bufferlist[3];
- u64 aligned_data = 0;
-
- if (pagecount > MAX_PAGE_BUFFER_COUNT)
- return -EINVAL;
-
- /*
- * Adjust the size down since vmbus_channel_packet_page_buffer is the
- * largest size we support
- */
- descsize = sizeof(struct vmbus_channel_packet_page_buffer) -
- ((MAX_PAGE_BUFFER_COUNT - pagecount) *
- sizeof(struct hv_page_buffer));
- packetlen = descsize + bufferlen;
- packetlen_aligned = ALIGN(packetlen, sizeof(u64));
-
- /* Setup the descriptor */
- desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
- desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
- desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */
- desc.length8 = (u16)(packetlen_aligned >> 3);
- desc.transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */
- desc.reserved = 0;
- desc.rangecount = pagecount;
-
- for (i = 0; i < pagecount; i++) {
- desc.range[i].len = pagebuffers[i].len;
- desc.range[i].offset = pagebuffers[i].offset;
- desc.range[i].pfn = pagebuffers[i].pfn;
- }
-
- bufferlist[0].iov_base = &desc;
- bufferlist[0].iov_len = descsize;
- bufferlist[1].iov_base = buffer;
- bufferlist[1].iov_len = bufferlen;
- bufferlist[2].iov_base = &aligned_data;
- bufferlist[2].iov_len = (packetlen_aligned - packetlen);
-
- return hv_ringbuffer_write(channel, bufferlist, 3, requestid, NULL);
-}
-EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
-
-/*
- * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
+ * vmbus_sendpacket_mpb_desc - Send one or more multi-page buffer packets
* using a GPADL Direct packet type.
- * The buffer includes the vmbus descriptor.
+ * The desc argument must include space for the VMBus descriptor. The
+ * rangecount field must already be set.
*/
int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
struct vmbus_packet_mpb_array *desc,
@@ -1160,7 +1102,6 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
desc->length8 = (u16)(packetlen_aligned >> 3);
desc->transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */
desc->reserved = 0;
- desc->rangecount = 1;
bufferlist[0].iov_base = desc;
bufferlist[0].iov_len = desc_size;
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index 9ed2c4b6734e..8ecebea53651 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -143,8 +143,8 @@ static void do_read_registers_on_cu(void *_data)
*/
cu = topology_core_id(smp_processor_id());
- rdmsrl_safe(MSR_F15H_CU_PWR_ACCUMULATOR, &data->cu_acc_power[cu]);
- rdmsrl_safe(MSR_F15H_PTSC, &data->cpu_sw_pwr_ptsc[cu]);
+ rdmsrq_safe(MSR_F15H_CU_PWR_ACCUMULATOR, &data->cu_acc_power[cu]);
+ rdmsrq_safe(MSR_F15H_PTSC, &data->cpu_sw_pwr_ptsc[cu]);
data->cu_on[cu] = 1;
}
@@ -424,7 +424,7 @@ static int fam15h_power_init_data(struct pci_dev *f4,
*/
data->cpu_pwr_sample_ratio = cpuid_ecx(0x80000007);
- if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &tmp)) {
+ if (rdmsrq_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &tmp)) {
pr_err("Failed to read max compute unit power accumulator MSR\n");
return -ENODEV;
}
diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c
index 6d1175a51832..2df4956296ed 100644
--- a/drivers/hwmon/hwmon-vid.c
+++ b/drivers/hwmon/hwmon-vid.c
@@ -15,6 +15,10 @@
#include <linux/kernel.h>
#include <linux/hwmon-vid.h>
+#ifdef CONFIG_X86
+#include <asm/msr.h>
+#endif
+
/*
* Common code for decoding VID pins.
*
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 3685906cc57c..472bcf6092f6 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -20,7 +20,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
#include <asm/processor.h>
MODULE_DESCRIPTION("AMD Family 10h+ CPU core temperature monitor");
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 83c88c79afe2..bbbd6240fa6e 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -200,7 +200,7 @@ config I2C_ISMT
config I2C_PIIX4
tristate "Intel PIIX4 and compatible (ATI/AMD/Serverworks/Broadcom/SMSC)"
- depends on PCI && HAS_IOPORT
+ depends on PCI && HAS_IOPORT && X86
select I2C_SMBUS
help
If you say yes to this option, support will be included for the Intel
diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c
index 26a36a65521e..606ac071cb80 100644
--- a/drivers/i2c/busses/i2c-cht-wc.c
+++ b/drivers/i2c/busses/i2c-cht-wc.c
@@ -467,7 +467,7 @@ static int cht_wc_i2c_adap_i2c_probe(struct platform_device *pdev)
return ret;
/* Alloc and register client IRQ */
- adap->irq_domain = irq_domain_add_linear(NULL, 1, &irq_domain_simple_ops, NULL);
+ adap->irq_domain = irq_domain_create_linear(NULL, 1, &irq_domain_simple_ops, NULL);
if (!adap->irq_domain)
return -ENOMEM;
diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c
index 8e0267c7cc29..f21f9877c040 100644
--- a/drivers/i2c/busses/i2c-designware-pcidrv.c
+++ b/drivers/i2c/busses/i2c-designware-pcidrv.c
@@ -278,9 +278,11 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
if ((dev->flags & MODEL_MASK) == MODEL_AMD_NAVI_GPU) {
dev->slave = i2c_new_ccgx_ucsi(&dev->adapter, dev->irq, &dgpu_node);
- if (IS_ERR(dev->slave))
+ if (IS_ERR(dev->slave)) {
+ i2c_del_adapter(&dev->adapter);
return dev_err_probe(device, PTR_ERR(dev->slave),
"register UCSI failed\n");
+ }
}
pm_runtime_set_autosuspend_delay(device, 1000);
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index dd75916157f0..59ecaa990bce 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -34,6 +34,7 @@
#include <linux/dmi.h>
#include <linux/acpi.h>
#include <linux/io.h>
+#include <asm/amd/fch.h>
#include "i2c-piix4.h"
@@ -80,12 +81,11 @@
#define SB800_PIIX4_PORT_IDX_MASK 0x06
#define SB800_PIIX4_PORT_IDX_SHIFT 1
-/* On kerncz and Hudson2, SmBus0Sel is at bit 20:19 of PMx00 DecodeEn */
-#define SB800_PIIX4_PORT_IDX_KERNCZ 0x02
-#define SB800_PIIX4_PORT_IDX_MASK_KERNCZ 0x18
+/* SmBus0Sel is at bit 20:19 of PMx00 DecodeEn */
+#define SB800_PIIX4_PORT_IDX_KERNCZ (FCH_PM_DECODEEN + 0x02)
+#define SB800_PIIX4_PORT_IDX_MASK_KERNCZ (FCH_PM_DECODEEN_SMBUS0SEL >> 16)
#define SB800_PIIX4_PORT_IDX_SHIFT_KERNCZ 3
-#define SB800_PIIX4_FCH_PM_ADDR 0xFED80300
#define SB800_PIIX4_FCH_PM_SIZE 8
#define SB800_ASF_ACPI_PATH "\\_SB.ASFC"
@@ -162,19 +162,19 @@ int piix4_sb800_region_request(struct device *dev, struct sb800_mmio_cfg *mmio_c
if (mmio_cfg->use_mmio) {
void __iomem *addr;
- if (!request_mem_region_muxed(SB800_PIIX4_FCH_PM_ADDR,
+ if (!request_mem_region_muxed(FCH_PM_BASE,
SB800_PIIX4_FCH_PM_SIZE,
"sb800_piix4_smb")) {
dev_err(dev,
"SMBus base address memory region 0x%x already in use.\n",
- SB800_PIIX4_FCH_PM_ADDR);
+ FCH_PM_BASE);
return -EBUSY;
}
- addr = ioremap(SB800_PIIX4_FCH_PM_ADDR,
+ addr = ioremap(FCH_PM_BASE,
SB800_PIIX4_FCH_PM_SIZE);
if (!addr) {
- release_mem_region(SB800_PIIX4_FCH_PM_ADDR,
+ release_mem_region(FCH_PM_BASE,
SB800_PIIX4_FCH_PM_SIZE);
dev_err(dev, "SMBus base address mapping failed.\n");
return -ENOMEM;
@@ -201,7 +201,7 @@ void piix4_sb800_region_release(struct device *dev, struct sb800_mmio_cfg *mmio_
{
if (mmio_cfg->use_mmio) {
iounmap(mmio_cfg->addr);
- release_mem_region(SB800_PIIX4_FCH_PM_ADDR,
+ release_mem_region(FCH_PM_BASE,
SB800_PIIX4_FCH_PM_SIZE);
return;
}
diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c
index db95113a5b49..5bb26af0f532 100644
--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
+++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
@@ -442,9 +442,9 @@ static int pca954x_irq_setup(struct i2c_mux_core *muxc)
raw_spin_lock_init(&data->lock);
- data->irq = irq_domain_add_linear(client->dev.of_node,
- data->chip->nchans,
- &irq_domain_simple_ops, data);
+ data->irq = irq_domain_create_linear(of_fwnode_handle(client->dev.of_node),
+ data->chip->nchans,
+ &irq_domain_simple_ops, data);
if (!data->irq)
return -ENODEV;
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 976f5be54e36..8ccb483204fa 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -48,14 +48,17 @@
#include <trace/events/power.h>
#include <linux/sched.h>
#include <linux/sched/smt.h>
+#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/moduleparam.h>
-#include <asm/cpuid.h>
+#include <linux/sysfs.h>
+#include <asm/cpuid/api.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/mwait.h>
#include <asm/spec-ctrl.h>
+#include <asm/msr.h>
#include <asm/tsc.h>
#include <asm/fpu/api.h>
#include <asm/smp.h>
@@ -92,9 +95,15 @@ struct idle_cpu {
*/
unsigned long auto_demotion_disable_flags;
bool disable_promotion_to_c1e;
+ bool c1_demotion_supported;
bool use_acpi;
};
+static bool c1_demotion_supported;
+static DEFINE_MUTEX(c1_demotion_mutex);
+
+static struct device *sysfs_root __initdata;
+
static const struct idle_cpu *icpu __initdata;
static struct cpuidle_state *cpuidle_state_table __initdata;
@@ -1549,18 +1558,21 @@ static const struct idle_cpu idle_cpu_gmt __initconst = {
static const struct idle_cpu idle_cpu_spr __initconst = {
.state_table = spr_cstates,
.disable_promotion_to_c1e = true,
+ .c1_demotion_supported = true,
.use_acpi = true,
};
static const struct idle_cpu idle_cpu_gnr __initconst = {
.state_table = gnr_cstates,
.disable_promotion_to_c1e = true,
+ .c1_demotion_supported = true,
.use_acpi = true,
};
static const struct idle_cpu idle_cpu_gnrd __initconst = {
.state_table = gnrd_cstates,
.disable_promotion_to_c1e = true,
+ .c1_demotion_supported = true,
.use_acpi = true,
};
@@ -1599,12 +1611,14 @@ static const struct idle_cpu idle_cpu_snr __initconst = {
static const struct idle_cpu idle_cpu_grr __initconst = {
.state_table = grr_cstates,
.disable_promotion_to_c1e = true,
+ .c1_demotion_supported = true,
.use_acpi = true,
};
static const struct idle_cpu idle_cpu_srf __initconst = {
.state_table = srf_cstates,
.disable_promotion_to_c1e = true,
+ .c1_demotion_supported = true,
.use_acpi = true,
};
@@ -1928,35 +1942,35 @@ static void __init bxt_idle_state_table_update(void)
unsigned long long msr;
unsigned int usec;
- rdmsrl(MSR_PKGC6_IRTL, msr);
+ rdmsrq(MSR_PKGC6_IRTL, msr);
usec = irtl_2_usec(msr);
if (usec) {
bxt_cstates[2].exit_latency = usec;
bxt_cstates[2].target_residency = usec;
}
- rdmsrl(MSR_PKGC7_IRTL, msr);
+ rdmsrq(MSR_PKGC7_IRTL, msr);
usec = irtl_2_usec(msr);
if (usec) {
bxt_cstates[3].exit_latency = usec;
bxt_cstates[3].target_residency = usec;
}
- rdmsrl(MSR_PKGC8_IRTL, msr);
+ rdmsrq(MSR_PKGC8_IRTL, msr);
usec = irtl_2_usec(msr);
if (usec) {
bxt_cstates[4].exit_latency = usec;
bxt_cstates[4].target_residency = usec;
}
- rdmsrl(MSR_PKGC9_IRTL, msr);
+ rdmsrq(MSR_PKGC9_IRTL, msr);
usec = irtl_2_usec(msr);
if (usec) {
bxt_cstates[5].exit_latency = usec;
bxt_cstates[5].target_residency = usec;
}
- rdmsrl(MSR_PKGC10_IRTL, msr);
+ rdmsrq(MSR_PKGC10_IRTL, msr);
usec = irtl_2_usec(msr);
if (usec) {
bxt_cstates[6].exit_latency = usec;
@@ -1984,7 +1998,7 @@ static void __init sklh_idle_state_table_update(void)
if ((mwait_substates & (0xF << 28)) == 0)
return;
- rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+ rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
/* PC10 is not enabled in PKG C-state limit */
if ((msr & 0xF) != 8)
@@ -1996,7 +2010,7 @@ static void __init sklh_idle_state_table_update(void)
/* if SGX is present */
if (ebx & (1 << 2)) {
- rdmsrl(MSR_IA32_FEAT_CTL, msr);
+ rdmsrq(MSR_IA32_FEAT_CTL, msr);
/* if SGX is enabled */
if (msr & (1 << 18))
@@ -2015,7 +2029,7 @@ static void __init skx_idle_state_table_update(void)
{
unsigned long long msr;
- rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+ rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
/*
* 000b: C0/C1 (no package C-state support)
@@ -2068,7 +2082,7 @@ static void __init spr_idle_state_table_update(void)
* C6. However, if PC6 is disabled, we update the numbers to match
* core C6.
*/
- rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+ rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
/* Limit value 2 and above allow for PC6. */
if ((msr & 0x7) < 2) {
@@ -2082,8 +2096,8 @@ static void __init spr_idle_state_table_update(void)
*/
static void __init byt_cht_auto_demotion_disable(void)
{
- wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
- wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
+ wrmsrq(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
+ wrmsrq(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
}
static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
@@ -2241,27 +2255,27 @@ static void auto_demotion_disable(void)
{
unsigned long long msr_bits;
- rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
+ rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
msr_bits &= ~auto_demotion_disable_flags;
- wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
+ wrmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
}
static void c1e_promotion_enable(void)
{
unsigned long long msr_bits;
- rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+ rdmsrq(MSR_IA32_POWER_CTL, msr_bits);
msr_bits |= 0x2;
- wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+ wrmsrq(MSR_IA32_POWER_CTL, msr_bits);
}
static void c1e_promotion_disable(void)
{
unsigned long long msr_bits;
- rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+ rdmsrq(MSR_IA32_POWER_CTL, msr_bits);
msr_bits &= ~0x2;
- wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+ wrmsrq(MSR_IA32_POWER_CTL, msr_bits);
}
/**
@@ -2324,6 +2338,88 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
}
+static void intel_c1_demotion_toggle(void *enable)
+{
+ unsigned long long msr_val;
+
+ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
+ /*
+ * Enable/disable C1 undemotion along with C1 demotion, as this is the
+ * most sensible configuration in general.
+ */
+ if (enable)
+ msr_val |= NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE;
+ else
+ msr_val &= ~(NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE);
+ wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
+}
+
+static ssize_t intel_c1_demotion_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ bool enable;
+ int err;
+
+ err = kstrtobool(buf, &enable);
+ if (err)
+ return err;
+
+ mutex_lock(&c1_demotion_mutex);
+ /* Enable/disable C1 demotion on all CPUs */
+ on_each_cpu(intel_c1_demotion_toggle, (void *)enable, 1);
+ mutex_unlock(&c1_demotion_mutex);
+
+ return count;
+}
+
+static ssize_t intel_c1_demotion_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ unsigned long long msr_val;
+
+ /*
+ * Read the MSR value for a CPU and assume it is the same for all CPUs. Any other
+ * configuration would be a BIOS bug.
+ */
+ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
+ return sysfs_emit(buf, "%d\n", !!(msr_val & NHM_C1_AUTO_DEMOTE));
+}
+static DEVICE_ATTR_RW(intel_c1_demotion);
+
+static int __init intel_idle_sysfs_init(void)
+{
+ int err;
+
+ if (!c1_demotion_supported)
+ return 0;
+
+ sysfs_root = bus_get_dev_root(&cpu_subsys);
+ if (!sysfs_root)
+ return 0;
+
+ err = sysfs_add_file_to_group(&sysfs_root->kobj,
+ &dev_attr_intel_c1_demotion.attr,
+ "cpuidle");
+ if (err) {
+ put_device(sysfs_root);
+ return err;
+ }
+
+ return 0;
+}
+
+static void __init intel_idle_sysfs_uninit(void)
+{
+ if (!sysfs_root)
+ return;
+
+ sysfs_remove_file_from_group(&sysfs_root->kobj,
+ &dev_attr_intel_c1_demotion.attr,
+ "cpuidle");
+ put_device(sysfs_root);
+}
+
static int __init intel_idle_init(void)
{
const struct x86_cpu_id *id;
@@ -2374,6 +2470,8 @@ static int __init intel_idle_init(void)
auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
if (icpu->disable_promotion_to_c1e)
c1e_promotion = C1E_PROMOTION_DISABLE;
+ if (icpu->c1_demotion_supported)
+ c1_demotion_supported = true;
if (icpu->use_acpi || force_use_acpi)
intel_idle_acpi_cst_extract();
} else if (!intel_idle_acpi_cst_extract()) {
@@ -2387,6 +2485,10 @@ static int __init intel_idle_init(void)
if (!intel_idle_cpuidle_devices)
return -ENOMEM;
+ retval = intel_idle_sysfs_init();
+ if (retval)
+ pr_warn("failed to initialized sysfs");
+
intel_idle_cpuidle_driver_init(&intel_idle_driver);
retval = cpuidle_register_driver(&intel_idle_driver);
@@ -2411,6 +2513,7 @@ hp_setup_fail:
intel_idle_cpuidle_devices_uninit();
cpuidle_unregister_driver(&intel_idle_driver);
init_driver_fail:
+ intel_idle_sysfs_uninit();
free_percpu(intel_idle_cpuidle_devices);
return retval;
diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c
index 0914148d1a22..bd3458965bff 100644
--- a/drivers/iio/adc/stm32-adc-core.c
+++ b/drivers/iio/adc/stm32-adc-core.c
@@ -421,9 +421,10 @@ static int stm32_adc_irq_probe(struct platform_device *pdev,
return priv->irq[i];
}
- priv->domain = irq_domain_add_simple(np, STM32_ADC_MAX_ADCS, 0,
- &stm32_adc_domain_ops,
- priv);
+ priv->domain = irq_domain_create_simple(of_fwnode_handle(np),
+ STM32_ADC_MAX_ADCS, 0,
+ &stm32_adc_domain_ops,
+ priv);
if (!priv->domain) {
dev_err(&pdev->dev, "Failed to add irq domain\n");
return -ENOMEM;
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b4e3e4beb7f4..d4263385850a 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1352,6 +1352,9 @@ static void ib_device_notify_register(struct ib_device *device)
down_read(&devices_rwsem);
+ /* Mark for userspace that device is ready */
+ kobject_uevent(&device->dev.kobj, KOBJ_ADD);
+
ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT);
if (ret)
goto out;
@@ -1468,10 +1471,9 @@ int ib_register_device(struct ib_device *device, const char *name,
return ret;
}
dev_set_uevent_suppress(&device->dev, false);
- /* Mark for userspace that device is ready */
- kobject_uevent(&device->dev.kobj, KOBJ_ADD);
ib_device_notify_register(device);
+
ib_device_put(device);
return 0;
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
index 1ee8969595d3..7599e31b5743 100644
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -221,7 +221,7 @@ static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf)
break;
if (i < IRDMA_MIN_MSIX) {
- for (; i > 0; i--)
+ while (--i >= 0)
ice_free_rdma_qvector(pf, &rf->msix_entries[i]);
kfree(rf->msix_entries);
@@ -255,6 +255,8 @@ static void irdma_remove(struct auxiliary_device *aux_dev)
ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false);
irdma_deinit_interrupts(iwdev->rf, pf);
+ kfree(iwdev->rf);
+
pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn));
}
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index eeb932e58730..1e8c92826de2 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -4871,5 +4871,4 @@ void irdma_ib_dealloc_device(struct ib_device *ibdev)
irdma_rt_deinit_hw(iwdev);
irdma_ctrl_deinit_hw(iwdev->rf);
- kfree(iwdev->rf);
}
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index b9f4a2937c3a..2098de762bf5 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -90,7 +90,7 @@ static int create_file(const char *name, umode_t mode,
int error;
inode_lock(d_inode(parent));
- *dentry = lookup_one_len(name, parent, strlen(name));
+ *dentry = lookup_noperm(&QSTR(name), parent);
if (!IS_ERR(*dentry))
error = qibfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
@@ -433,7 +433,7 @@ static int remove_device_files(struct super_block *sb,
char unit[10];
snprintf(unit, sizeof(unit), "%u", dd->unit);
- dir = lookup_one_len_unlocked(unit, sb->s_root, strlen(unit));
+ dir = lookup_noperm_unlocked(&QSTR(unit), sb->s_root);
if (IS_ERR(dir)) {
pr_err("Lookup of %s failed\n", unit);
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index fec87c9030ab..fffd144d509e 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -56,11 +56,8 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata,
cq->queue->buf, cq->queue->buf_size, &cq->queue->ip);
- if (err) {
- vfree(cq->queue->buf);
- kfree(cq->queue);
+ if (err)
return err;
- }
cq->is_user = uresp;
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 57a5ff3d1992..1008858f78e2 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -290,6 +290,8 @@ static const struct xpad_device {
{ 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 },
{ 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 },
{ 0x10f5, 0x7005, "Turtle Beach Recon Controller", 0, XTYPE_XBOXONE },
+ { 0x10f5, 0x7008, "Turtle Beach Recon Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE },
+ { 0x10f5, 0x7073, "Turtle Beach Stealth Ultra Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE },
{ 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 },
{ 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 },
{ 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 },
@@ -354,6 +356,7 @@ static const struct xpad_device {
{ 0x1ee9, 0x1590, "ZOTAC Gaming Zone", 0, XTYPE_XBOX360 },
{ 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE },
{ 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE },
+ { 0x20d6, 0x2064, "PowerA Wired Controller for Xbox", MAP_SHARE_BUTTON, XTYPE_XBOXONE },
{ 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 },
{ 0x20d6, 0x400b, "PowerA FUSION Pro 4 Wired Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE },
{ 0x20d6, 0x890b, "PowerA MOGA XP-Ultra Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE },
diff --git a/drivers/input/rmi4/rmi_f34.c b/drivers/input/rmi4/rmi_f34.c
index d760af4cc12e..f1947f03b06a 100644
--- a/drivers/input/rmi4/rmi_f34.c
+++ b/drivers/input/rmi4/rmi_f34.c
@@ -4,6 +4,7 @@
* Copyright (C) 2016 Zodiac Inflight Innovations
*/
+#include "linux/device.h"
#include <linux/kernel.h>
#include <linux/rmi.h>
#include <linux/firmware.h>
@@ -289,39 +290,30 @@ static int rmi_f34_update_firmware(struct f34_data *f34,
return rmi_f34_flash_firmware(f34, syn_fw);
}
-static int rmi_f34_status(struct rmi_function *fn)
-{
- struct f34_data *f34 = dev_get_drvdata(&fn->dev);
-
- /*
- * The status is the percentage complete, or once complete,
- * zero for success or a negative return code.
- */
- return f34->update_status;
-}
-
static ssize_t rmi_driver_bootloader_id_show(struct device *dev,
struct device_attribute *dattr,
char *buf)
{
struct rmi_driver_data *data = dev_get_drvdata(dev);
- struct rmi_function *fn = data->f34_container;
+ struct rmi_function *fn;
struct f34_data *f34;
- if (fn) {
- f34 = dev_get_drvdata(&fn->dev);
-
- if (f34->bl_version == 5)
- return sysfs_emit(buf, "%c%c\n",
- f34->bootloader_id[0],
- f34->bootloader_id[1]);
- else
- return sysfs_emit(buf, "V%d.%d\n",
- f34->bootloader_id[1],
- f34->bootloader_id[0]);
- }
+ fn = data->f34_container;
+ if (!fn)
+ return -ENODEV;
- return 0;
+ f34 = dev_get_drvdata(&fn->dev);
+ if (!f34)
+ return -ENODEV;
+
+ if (f34->bl_version == 5)
+ return sysfs_emit(buf, "%c%c\n",
+ f34->bootloader_id[0],
+ f34->bootloader_id[1]);
+ else
+ return sysfs_emit(buf, "V%d.%d\n",
+ f34->bootloader_id[1],
+ f34->bootloader_id[0]);
}
static DEVICE_ATTR(bootloader_id, 0444, rmi_driver_bootloader_id_show, NULL);
@@ -334,13 +326,16 @@ static ssize_t rmi_driver_configuration_id_show(struct device *dev,
struct rmi_function *fn = data->f34_container;
struct f34_data *f34;
- if (fn) {
- f34 = dev_get_drvdata(&fn->dev);
+ fn = data->f34_container;
+ if (!fn)
+ return -ENODEV;
+
+ f34 = dev_get_drvdata(&fn->dev);
+ if (!f34)
+ return -ENODEV;
- return sysfs_emit(buf, "%s\n", f34->configuration_id);
- }
- return 0;
+ return sysfs_emit(buf, "%s\n", f34->configuration_id);
}
static DEVICE_ATTR(configuration_id, 0444,
@@ -356,10 +351,14 @@ static int rmi_firmware_update(struct rmi_driver_data *data,
if (!data->f34_container) {
dev_warn(dev, "%s: No F34 present!\n", __func__);
- return -EINVAL;
+ return -ENODEV;
}
f34 = dev_get_drvdata(&data->f34_container->dev);
+ if (!f34) {
+ dev_warn(dev, "%s: No valid F34 present!\n", __func__);
+ return -ENODEV;
+ }
if (f34->bl_version >= 7) {
if (data->pdt_props & HAS_BSR) {
@@ -485,10 +484,18 @@ static ssize_t rmi_driver_update_fw_status_show(struct device *dev,
char *buf)
{
struct rmi_driver_data *data = dev_get_drvdata(dev);
- int update_status = 0;
+ struct f34_data *f34;
+ int update_status = -ENODEV;
- if (data->f34_container)
- update_status = rmi_f34_status(data->f34_container);
+ /*
+ * The status is the percentage complete, or once complete,
+ * zero for success or a negative return code.
+ */
+ if (data->f34_container) {
+ f34 = dev_get_drvdata(&data->f34_container->dev);
+ if (f34)
+ update_status = f34->update_status;
+ }
return sysfs_emit(buf, "%d\n", update_status);
}
@@ -508,33 +515,21 @@ static const struct attribute_group rmi_firmware_attr_group = {
.attrs = rmi_firmware_attrs,
};
-static int rmi_f34_probe(struct rmi_function *fn)
+static int rmi_f34v5_probe(struct f34_data *f34)
{
- struct f34_data *f34;
- unsigned char f34_queries[9];
+ struct rmi_function *fn = f34->fn;
+ u8 f34_queries[9];
bool has_config_id;
- u8 version = fn->fd.function_version;
- int ret;
-
- f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL);
- if (!f34)
- return -ENOMEM;
-
- f34->fn = fn;
- dev_set_drvdata(&fn->dev, f34);
-
- /* v5 code only supported version 0, try V7 probe */
- if (version > 0)
- return rmi_f34v7_probe(f34);
+ int error;
f34->bl_version = 5;
- ret = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr,
- f34_queries, sizeof(f34_queries));
- if (ret) {
+ error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr,
+ f34_queries, sizeof(f34_queries));
+ if (error) {
dev_err(&fn->dev, "%s: Failed to query properties\n",
__func__);
- return ret;
+ return error;
}
snprintf(f34->bootloader_id, sizeof(f34->bootloader_id),
@@ -560,11 +555,11 @@ static int rmi_f34_probe(struct rmi_function *fn)
f34->v5.config_blocks);
if (has_config_id) {
- ret = rmi_read_block(fn->rmi_dev, fn->fd.control_base_addr,
- f34_queries, sizeof(f34_queries));
- if (ret) {
+ error = rmi_read_block(fn->rmi_dev, fn->fd.control_base_addr,
+ f34_queries, sizeof(f34_queries));
+ if (error) {
dev_err(&fn->dev, "Failed to read F34 config ID\n");
- return ret;
+ return error;
}
snprintf(f34->configuration_id, sizeof(f34->configuration_id),
@@ -573,12 +568,34 @@ static int rmi_f34_probe(struct rmi_function *fn)
f34_queries[2], f34_queries[3]);
rmi_dbg(RMI_DEBUG_FN, &fn->dev, "Configuration ID: %s\n",
- f34->configuration_id);
+ f34->configuration_id);
}
return 0;
}
+static int rmi_f34_probe(struct rmi_function *fn)
+{
+ struct f34_data *f34;
+ u8 version = fn->fd.function_version;
+ int error;
+
+ f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL);
+ if (!f34)
+ return -ENOMEM;
+
+ f34->fn = fn;
+
+ /* v5 code only supported version 0 */
+ error = version == 0 ? rmi_f34v5_probe(f34) : rmi_f34v7_probe(f34);
+ if (error)
+ return error;
+
+ dev_set_drvdata(&fn->dev, f34);
+
+ return 0;
+}
+
int rmi_f34_create_sysfs(struct rmi_device *rmi_dev)
{
return sysfs_create_group(&rmi_dev->dev.kobj, &rmi_firmware_attr_group);
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index a775e4dbe06f..98f7205ec8fb 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -27,6 +27,7 @@
#include <linux/msi.h>
#include <linux/of_iommu.h>
#include <linux/pci.h>
+#include <linux/pci-p2pdma.h>
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
#include <linux/swiotlb.h>
@@ -1137,6 +1138,54 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
}
+static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iova_domain *iovad = &domain->iova_cookie->iovad;
+
+ if (!is_swiotlb_active(dev)) {
+ dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n");
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
+
+ trace_swiotlb_bounced(dev, phys, size);
+
+ phys = swiotlb_tbl_map_single(dev, phys, size, iova_mask(iovad), dir,
+ attrs);
+
+ /*
+ * Untrusted devices should not see padding areas with random leftover
+ * kernel data, so zero the pre- and post-padding.
+ * swiotlb_tbl_map_single() has initialized the bounce buffer proper to
+ * the contents of the original memory buffer.
+ */
+ if (phys != (phys_addr_t)DMA_MAPPING_ERROR && dev_is_untrusted(dev)) {
+ size_t start, virt = (size_t)phys_to_virt(phys);
+
+ /* Pre-padding */
+ start = iova_align_down(iovad, virt);
+ memset((void *)start, 0, virt - start);
+
+ /* Post-padding */
+ start = virt + size;
+ memset((void *)start, 0, iova_align(iovad, start) - start);
+ }
+
+ return phys;
+}
+
+/*
+ * Checks if a physical buffer has unaligned boundaries with respect to
+ * the IOMMU granule. Returns non-zero if either the start or end
+ * address is not aligned to the granule boundary.
+ */
+static inline size_t iova_unaligned(struct iova_domain *iovad, phys_addr_t phys,
+ size_t size)
+{
+ return iova_offset(iovad, phys | size);
+}
+
dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs)
@@ -1150,42 +1199,14 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
dma_addr_t iova, dma_mask = dma_get_mask(dev);
/*
- * If both the physical buffer start address and size are
- * page aligned, we don't need to use a bounce page.
+ * If both the physical buffer start address and size are page aligned,
+ * we don't need to use a bounce page.
*/
if (dev_use_swiotlb(dev, size, dir) &&
- iova_offset(iovad, phys | size)) {
- if (!is_swiotlb_active(dev)) {
- dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n");
+ iova_unaligned(iovad, phys, size)) {
+ phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs);
+ if (phys == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
- }
-
- trace_swiotlb_bounced(dev, phys, size);
-
- phys = swiotlb_tbl_map_single(dev, phys, size,
- iova_mask(iovad), dir, attrs);
-
- if (phys == DMA_MAPPING_ERROR)
- return DMA_MAPPING_ERROR;
-
- /*
- * Untrusted devices should not see padding areas with random
- * leftover kernel data, so zero the pre- and post-padding.
- * swiotlb_tbl_map_single() has initialized the bounce buffer
- * proper to the contents of the original memory buffer.
- */
- if (dev_is_untrusted(dev)) {
- size_t start, virt = (size_t)phys_to_virt(phys);
-
- /* Pre-padding */
- start = iova_align_down(iovad, virt);
- memset((void *)start, 0, virt - start);
-
- /* Post-padding */
- start = virt + size;
- memset((void *)start, 0,
- iova_align(iovad, start) - start);
- }
}
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@ -1359,7 +1380,6 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
struct scatterlist *s, *prev = NULL;
int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
struct pci_p2pdma_map_state p2pdma_state = {};
- enum pci_p2pdma_map_type map;
dma_addr_t iova;
size_t iova_len = 0;
unsigned long mask = dma_get_seg_boundary(dev);
@@ -1389,28 +1409,30 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
size_t s_length = s->length;
size_t pad_len = (mask - iova_len + 1) & mask;
- if (is_pci_p2pdma_page(sg_page(s))) {
- map = pci_p2pdma_map_segment(&p2pdma_state, dev, s);
- switch (map) {
- case PCI_P2PDMA_MAP_BUS_ADDR:
- /*
- * iommu_map_sg() will skip this segment as
- * it is marked as a bus address,
- * __finalise_sg() will copy the dma address
- * into the output segment.
- */
- continue;
- case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
- /*
- * Mapping through host bridge should be
- * mapped with regular IOVAs, thus we
- * do nothing here and continue below.
- */
- break;
- default:
- ret = -EREMOTEIO;
- goto out_restore_sg;
- }
+ switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(s))) {
+ case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+ /*
+ * Mapping through host bridge should be mapped with
+ * regular IOVAs, thus we do nothing here and continue
+ * below.
+ */
+ break;
+ case PCI_P2PDMA_MAP_NONE:
+ break;
+ case PCI_P2PDMA_MAP_BUS_ADDR:
+ /*
+ * iommu_map_sg() will skip this segment as it is marked
+ * as a bus address, __finalise_sg() will copy the dma
+ * address into the output segment.
+ */
+ s->dma_address = pci_p2pdma_bus_addr_map(&p2pdma_state,
+ sg_phys(s));
+ sg_dma_len(s) = sg->length;
+ sg_dma_mark_bus_address(s);
+ continue;
+ default:
+ ret = -EREMOTEIO;
+ goto out_restore_sg;
}
sg_dma_address(s) = s_iova_off;
@@ -1721,6 +1743,354 @@ size_t iommu_dma_max_mapping_size(struct device *dev)
return SIZE_MAX;
}
+/**
+ * dma_iova_try_alloc - Try to allocate an IOVA space
+ * @dev: Device to allocate the IOVA space for
+ * @state: IOVA state
+ * @phys: physical address
+ * @size: IOVA size
+ *
+ * Check if @dev supports the IOVA-based DMA API, and if yes allocate IOVA space
+ * for the given base address and size.
+ *
+ * Note: @phys is only used to calculate the IOVA alignment. Callers that always
+ * do PAGE_SIZE aligned transfers can safely pass 0 here.
+ *
+ * Returns %true if the IOVA-based DMA API can be used and IOVA space has been
+ * allocated, or %false if the regular DMA API should be used.
+ */
+bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t size)
+{
+ struct iommu_dma_cookie *cookie;
+ struct iommu_domain *domain;
+ struct iova_domain *iovad;
+ size_t iova_off;
+ dma_addr_t addr;
+
+ memset(state, 0, sizeof(*state));
+ if (!use_dma_iommu(dev))
+ return false;
+
+ domain = iommu_get_dma_domain(dev);
+ cookie = domain->iova_cookie;
+ iovad = &cookie->iovad;
+ iova_off = iova_offset(iovad, phys);
+
+ if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
+ iommu_deferred_attach(dev, iommu_get_domain_for_dev(dev)))
+ return false;
+
+ if (WARN_ON_ONCE(!size))
+ return false;
+
+ /*
+ * DMA_IOVA_USE_SWIOTLB is flag which is set by dma-iommu
+ * internals, make sure that caller didn't set it and/or
+ * didn't use this interface to map SIZE_MAX.
+ */
+ if (WARN_ON_ONCE((u64)size & DMA_IOVA_USE_SWIOTLB))
+ return false;
+
+ addr = iommu_dma_alloc_iova(domain,
+ iova_align(iovad, size + iova_off),
+ dma_get_mask(dev), dev);
+ if (!addr)
+ return false;
+
+ state->addr = addr + iova_off;
+ state->__size = size;
+ return true;
+}
+EXPORT_SYMBOL_GPL(dma_iova_try_alloc);
+
+/**
+ * dma_iova_free - Free an IOVA space
+ * @dev: Device to free the IOVA space for
+ * @state: IOVA state
+ *
+ * Undoes a successful dma_try_iova_alloc().
+ *
+ * Note that all dma_iova_link() calls need to be undone first. For callers
+ * that never call dma_iova_unlink(), dma_iova_destroy() can be used instead
+ * which unlinks all ranges and frees the IOVA space in a single efficient
+ * operation.
+ */
+void dma_iova_free(struct device *dev, struct dma_iova_state *state)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, state->addr);
+ size_t size = dma_iova_size(state);
+
+ iommu_dma_free_iova(domain, state->addr - iova_start_pad,
+ iova_align(iovad, size + iova_start_pad), NULL);
+}
+EXPORT_SYMBOL_GPL(dma_iova_free);
+
+static int __dma_iova_link(struct device *dev, dma_addr_t addr,
+ phys_addr_t phys, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ bool coherent = dev_is_dma_coherent(dev);
+
+ if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(phys, size, dir);
+
+ return iommu_map_nosync(iommu_get_dma_domain(dev), addr, phys, size,
+ dma_info_to_prot(dir, coherent, attrs), GFP_ATOMIC);
+}
+
+static int iommu_dma_iova_bounce_and_link(struct device *dev, dma_addr_t addr,
+ phys_addr_t phys, size_t bounce_len,
+ enum dma_data_direction dir, unsigned long attrs,
+ size_t iova_start_pad)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iova_domain *iovad = &domain->iova_cookie->iovad;
+ phys_addr_t bounce_phys;
+ int error;
+
+ bounce_phys = iommu_dma_map_swiotlb(dev, phys, bounce_len, dir, attrs);
+ if (bounce_phys == DMA_MAPPING_ERROR)
+ return -ENOMEM;
+
+ error = __dma_iova_link(dev, addr - iova_start_pad,
+ bounce_phys - iova_start_pad,
+ iova_align(iovad, bounce_len), dir, attrs);
+ if (error)
+ swiotlb_tbl_unmap_single(dev, bounce_phys, bounce_len, dir,
+ attrs);
+ return error;
+}
+
+static int iommu_dma_iova_link_swiotlb(struct device *dev,
+ struct dma_iova_state *state, phys_addr_t phys, size_t offset,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, phys);
+ size_t iova_end_pad = iova_offset(iovad, phys + size);
+ dma_addr_t addr = state->addr + offset;
+ size_t mapped = 0;
+ int error;
+
+ if (iova_start_pad) {
+ size_t bounce_len = min(size, iovad->granule - iova_start_pad);
+
+ error = iommu_dma_iova_bounce_and_link(dev, addr, phys,
+ bounce_len, dir, attrs, iova_start_pad);
+ if (error)
+ return error;
+ state->__size |= DMA_IOVA_USE_SWIOTLB;
+
+ mapped += bounce_len;
+ size -= bounce_len;
+ if (!size)
+ return 0;
+ }
+
+ size -= iova_end_pad;
+ error = __dma_iova_link(dev, addr + mapped, phys + mapped, size, dir,
+ attrs);
+ if (error)
+ goto out_unmap;
+ mapped += size;
+
+ if (iova_end_pad) {
+ error = iommu_dma_iova_bounce_and_link(dev, addr + mapped,
+ phys + mapped, iova_end_pad, dir, attrs, 0);
+ if (error)
+ goto out_unmap;
+ state->__size |= DMA_IOVA_USE_SWIOTLB;
+ }
+
+ return 0;
+
+out_unmap:
+ dma_iova_unlink(dev, state, 0, mapped, dir, attrs);
+ return error;
+}
+
+/**
+ * dma_iova_link - Link a range of IOVA space
+ * @dev: DMA device
+ * @state: IOVA state
+ * @phys: physical address to link
+ * @offset: offset into the IOVA state to map into
+ * @size: size of the buffer
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Link a range of IOVA space for the given IOVA state without IOTLB sync.
+ * This function is used to link multiple physical addresses in contiguous
+ * IOVA space without performing costly IOTLB sync.
+ *
+ * The caller is responsible to call to dma_iova_sync() to sync IOTLB at
+ * the end of linkage.
+ */
+int dma_iova_link(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, phys);
+
+ if (WARN_ON_ONCE(iova_start_pad && offset > 0))
+ return -EIO;
+
+ if (dev_use_swiotlb(dev, size, dir) &&
+ iova_unaligned(iovad, phys, size))
+ return iommu_dma_iova_link_swiotlb(dev, state, phys, offset,
+ size, dir, attrs);
+
+ return __dma_iova_link(dev, state->addr + offset - iova_start_pad,
+ phys - iova_start_pad,
+ iova_align(iovad, size + iova_start_pad), dir, attrs);
+}
+EXPORT_SYMBOL_GPL(dma_iova_link);
+
+/**
+ * dma_iova_sync - Sync IOTLB
+ * @dev: DMA device
+ * @state: IOVA state
+ * @offset: offset into the IOVA state to sync
+ * @size: size of the buffer
+ *
+ * Sync IOTLB for the given IOVA state. This function should be called on
+ * the IOVA-contiguous range created by one ore more dma_iova_link() calls
+ * to sync the IOTLB.
+ */
+int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ dma_addr_t addr = state->addr + offset;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+
+ return iommu_sync_map(domain, addr - iova_start_pad,
+ iova_align(iovad, size + iova_start_pad));
+}
+EXPORT_SYMBOL_GPL(dma_iova_sync);
+
+static void iommu_dma_iova_unlink_range_slow(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+ dma_addr_t end = addr + size;
+
+ do {
+ phys_addr_t phys;
+ size_t len;
+
+ phys = iommu_iova_to_phys(domain, addr);
+ if (WARN_ON(!phys))
+ /* Something very horrible happen here */
+ return;
+
+ len = min_t(size_t,
+ end - addr, iovad->granule - iova_start_pad);
+
+ if (!dev_is_dma_coherent(dev) &&
+ !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_cpu(phys, len, dir);
+
+ swiotlb_tbl_unmap_single(dev, phys, len, dir, attrs);
+
+ addr += len;
+ iova_start_pad = 0;
+ } while (addr < end);
+}
+
+static void __iommu_dma_iova_unlink(struct device *dev,
+ struct dma_iova_state *state, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs,
+ bool free_iova)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ dma_addr_t addr = state->addr + offset;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+ struct iommu_iotlb_gather iotlb_gather;
+ size_t unmapped;
+
+ if ((state->__size & DMA_IOVA_USE_SWIOTLB) ||
+ (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)))
+ iommu_dma_iova_unlink_range_slow(dev, addr, size, dir, attrs);
+
+ iommu_iotlb_gather_init(&iotlb_gather);
+ iotlb_gather.queued = free_iova && READ_ONCE(cookie->fq_domain);
+
+ size = iova_align(iovad, size + iova_start_pad);
+ addr -= iova_start_pad;
+ unmapped = iommu_unmap_fast(domain, addr, size, &iotlb_gather);
+ WARN_ON(unmapped != size);
+
+ if (!iotlb_gather.queued)
+ iommu_iotlb_sync(domain, &iotlb_gather);
+ if (free_iova)
+ iommu_dma_free_iova(domain, addr, size, &iotlb_gather);
+}
+
+/**
+ * dma_iova_unlink - Unlink a range of IOVA space
+ * @dev: DMA device
+ * @state: IOVA state
+ * @offset: offset into the IOVA state to unlink
+ * @size: size of the buffer
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Unlink a range of IOVA space for the given IOVA state.
+ */
+void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ __iommu_dma_iova_unlink(dev, state, offset, size, dir, attrs, false);
+}
+EXPORT_SYMBOL_GPL(dma_iova_unlink);
+
+/**
+ * dma_iova_destroy - Finish a DMA mapping transaction
+ * @dev: DMA device
+ * @state: IOVA state
+ * @mapped_len: number of bytes to unmap
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Unlink the IOVA range up to @mapped_len and free the entire IOVA space. The
+ * range of IOVA from dma_addr to @mapped_len must all be linked, and be the
+ * only linked IOVA in state.
+ */
+void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
+ size_t mapped_len, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ if (mapped_len)
+ __iommu_dma_iova_unlink(dev, state, 0, mapped_len, dir, attrs,
+ true);
+ else
+ /*
+ * We can be here if first call to dma_iova_link() failed and
+ * there is nothing to unlink, so let's be more clear.
+ */
+ dma_iova_free(dev, state);
+}
+EXPORT_SYMBOL_GPL(dma_iova_destroy);
+
void iommu_setup_dma_ops(struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9d728800a862..6c02f93422ce 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2443,8 +2443,8 @@ out_set_count:
return pgsize;
}
-static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
const struct iommu_domain_ops *ops = domain->ops;
unsigned long orig_iova = iova;
@@ -2453,12 +2453,19 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t orig_paddr = paddr;
int ret = 0;
+ might_sleep_if(gfpflags_allow_blocking(gfp));
+
if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
return -EINVAL;
if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL))
return -ENODEV;
+ /* Discourage passing strange GFP flags */
+ if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+ __GFP_HIGHMEM)))
+ return -EINVAL;
+
/* find out the minimum page size supported */
min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
@@ -2506,31 +2513,27 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
-int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size)
{
const struct iommu_domain_ops *ops = domain->ops;
- int ret;
- might_sleep_if(gfpflags_allow_blocking(gfp));
-
- /* Discourage passing strange GFP flags */
- if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
- __GFP_HIGHMEM)))
- return -EINVAL;
+ if (!ops->iotlb_sync_map)
+ return 0;
+ return ops->iotlb_sync_map(domain, iova, size);
+}
- ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
- if (ret == 0 && ops->iotlb_sync_map) {
- ret = ops->iotlb_sync_map(domain, iova, size);
- if (ret)
- goto out_err;
- }
+int iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ int ret;
- return ret;
+ ret = iommu_map_nosync(domain, iova, paddr, size, prot, gfp);
+ if (ret)
+ return ret;
-out_err:
- /* undo mappings already done */
- iommu_unmap(domain, iova, size);
+ ret = iommu_sync_map(domain, iova, size);
+ if (ret)
+ iommu_unmap(domain, iova, size);
return ret;
}
@@ -2618,6 +2621,25 @@ size_t iommu_unmap(struct iommu_domain *domain,
}
EXPORT_SYMBOL_GPL(iommu_unmap);
+/**
+ * iommu_unmap_fast() - Remove mappings from a range of IOVA without IOTLB sync
+ * @domain: Domain to manipulate
+ * @iova: IO virtual address to start
+ * @size: Length of the range starting from @iova
+ * @iotlb_gather: range information for a pending IOTLB flush
+ *
+ * iommu_unmap_fast() will remove a translation created by iommu_map().
+ * It can't subdivide a mapping created by iommu_map(), so it should be
+ * called with IOVA ranges that match what was passed to iommu_map(). The
+ * range can aggregate contiguous iommu_map() calls so long as no individual
+ * range is split.
+ *
+ * Basically iommu_unmap_fast() is the same as iommu_unmap() but for callers
+ * which manage the IOTLB flushing externally to perform a batched sync.
+ *
+ * Returns: Number of bytes of IOVA unmapped. iova + res will be the point
+ * unmapping stopped.
+ */
size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2630,26 +2652,17 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
struct scatterlist *sg, unsigned int nents, int prot,
gfp_t gfp)
{
- const struct iommu_domain_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
phys_addr_t start;
unsigned int i = 0;
int ret;
- might_sleep_if(gfpflags_allow_blocking(gfp));
-
- /* Discourage passing strange GFP flags */
- if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
- __GFP_HIGHMEM)))
- return -EINVAL;
-
while (i <= nents) {
phys_addr_t s_phys = sg_phys(sg);
if (len && s_phys != start + len) {
- ret = __iommu_map(domain, iova + mapped, start,
+ ret = iommu_map_nosync(domain, iova + mapped, start,
len, prot, gfp);
-
if (ret)
goto out_err;
@@ -2672,11 +2685,10 @@ next:
sg = sg_next(sg);
}
- if (ops->iotlb_sync_map) {
- ret = ops->iotlb_sync_map(domain, iova, mapped);
- if (ret)
- goto out_err;
- }
+ ret = iommu_sync_map(domain, iova, mapped);
+ if (ret)
+ goto out_err;
+
return mapped;
out_err:
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 08bb3b031f23..0d196e447142 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -166,6 +166,11 @@ config DW_APB_ICTL
select GENERIC_IRQ_CHIP
select IRQ_DOMAIN_HIERARCHY
+config ECONET_EN751221_INTC
+ bool
+ select GENERIC_IRQ_CHIP
+ select IRQ_DOMAIN
+
config FARADAY_FTINTC010
bool
select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 365bcea9a61f..23ca4959e6ce 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2836.o
obj-$(CONFIG_ARCH_ACTIONS) += irq-owl-sirq.o
obj-$(CONFIG_DAVINCI_CP_INTC) += irq-davinci-cp-intc.o
obj-$(CONFIG_EXYNOS_IRQ_COMBINER) += exynos-combiner.o
+obj-$(CONFIG_ECONET_EN751221_INTC) += irq-econet-en751221.o
obj-$(CONFIG_FARADAY_FTINTC010) += irq-ftintc010.o
obj-$(CONFIG_ARCH_HIP04) += irq-hip04.o
obj-$(CONFIG_ARCH_LPC32XX) += irq-lpc32xx.o
diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c
index 552aa04ff063..e7dfcf0cda43 100644
--- a/drivers/irqchip/exynos-combiner.c
+++ b/drivers/irqchip/exynos-combiner.c
@@ -180,7 +180,7 @@ static void __init combiner_init(void __iomem *combiner_base,
if (!combiner_data)
return;
- combiner_irq_domain = irq_domain_add_linear(np, nr_irq,
+ combiner_irq_domain = irq_domain_create_linear(of_fwnode_handle(np), nr_irq,
&combiner_irq_domain_ops, combiner_data);
if (WARN_ON(!combiner_irq_domain)) {
pr_warn("%s: irq domain init failed\n", __func__);
diff --git a/drivers/irqchip/irq-al-fic.c b/drivers/irqchip/irq-al-fic.c
index dfb761e86c9c..8f300843bbca 100644
--- a/drivers/irqchip/irq-al-fic.c
+++ b/drivers/irqchip/irq-al-fic.c
@@ -65,15 +65,13 @@ static int al_fic_irq_set_type(struct irq_data *data, unsigned int flow_type)
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
struct al_fic *fic = gc->private;
enum al_fic_state new_state;
- int ret = 0;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
if (((flow_type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_LEVEL_HIGH) &&
((flow_type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_EDGE_RISING)) {
pr_debug("fic doesn't support flow type %d\n", flow_type);
- ret = -EINVAL;
- goto err;
+ return -EINVAL;
}
new_state = (flow_type & IRQ_TYPE_LEVEL_HIGH) ?
@@ -91,16 +89,10 @@ static int al_fic_irq_set_type(struct irq_data *data, unsigned int flow_type)
if (fic->state == AL_FIC_UNCONFIGURED) {
al_fic_set_trigger(fic, gc, new_state);
} else if (fic->state != new_state) {
- pr_debug("fic %s state already configured to %d\n",
- fic->name, fic->state);
- ret = -EINVAL;
- goto err;
+ pr_debug("fic %s state already configured to %d\n", fic->name, fic->state);
+ return -EINVAL;
}
-
-err:
- irq_gc_unlock(gc);
-
- return ret;
+ return 0;
}
static void al_fic_irq_handler(struct irq_desc *desc)
@@ -139,7 +131,7 @@ static int al_fic_register(struct device_node *node,
struct irq_chip_generic *gc;
int ret;
- fic->domain = irq_domain_add_linear(node,
+ fic->domain = irq_domain_create_linear(of_fwnode_handle(node),
NR_FIC_IRQS,
&irq_generic_chip_ops,
fic);
diff --git a/drivers/irqchip/irq-alpine-msi.c b/drivers/irqchip/irq-alpine-msi.c
index a1430ab60a8a..a5289dc26dca 100644
--- a/drivers/irqchip/irq-alpine-msi.c
+++ b/drivers/irqchip/irq-alpine-msi.c
@@ -205,15 +205,14 @@ static int alpine_msix_init_domains(struct alpine_msix_data *priv,
return -ENXIO;
}
- middle_domain = irq_domain_add_hierarchy(gic_domain, 0, 0, NULL,
- &alpine_msix_middle_domain_ops,
- priv);
+ middle_domain = irq_domain_create_hierarchy(gic_domain, 0, 0, NULL,
+ &alpine_msix_middle_domain_ops, priv);
if (!middle_domain) {
pr_err("Failed to create the MSIX middle domain\n");
return -ENOMEM;
}
- msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node),
+ msi_domain = pci_msi_create_irq_domain(of_fwnode_handle(node),
&alpine_msix_domain_info,
middle_domain);
if (!msi_domain) {
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 974dc088c853..032d66dceb8e 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -1014,7 +1014,7 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
irqc->info.die_stride = off - start_off;
- irqc->hw_domain = irq_domain_create_tree(of_node_to_fwnode(node),
+ irqc->hw_domain = irq_domain_create_tree(of_fwnode_handle(node),
&aic_irq_domain_ops, irqc);
if (WARN_ON(!irqc->hw_domain))
goto err_unmap;
@@ -1067,7 +1067,7 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
if (is_kernel_in_hyp_mode()) {
struct irq_fwspec mi = {
- .fwnode = of_node_to_fwnode(node),
+ .fwnode = of_fwnode_handle(node),
.param_count = 3,
.param = {
[0] = AIC_FIQ, /* This is a lie */
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 2aa6a51e05d0..67b672a78862 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -348,12 +348,12 @@ static int __init mpic_msi_init(struct mpic *mpic, struct device_node *node,
mpic->msi_doorbell_mask = PCI_MSI_FULL_DOORBELL_MASK;
}
- mpic->msi_inner_domain = irq_domain_add_linear(NULL, mpic->msi_doorbell_size,
+ mpic->msi_inner_domain = irq_domain_create_linear(NULL, mpic->msi_doorbell_size,
&mpic_msi_domain_ops, mpic);
if (!mpic->msi_inner_domain)
return -ENOMEM;
- mpic->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node), &mpic_msi_domain_info,
+ mpic->msi_domain = pci_msi_create_irq_domain(of_fwnode_handle(node), &mpic_msi_domain_info,
mpic->msi_inner_domain);
if (!mpic->msi_domain) {
irq_domain_remove(mpic->msi_inner_domain);
@@ -492,7 +492,7 @@ static int __init mpic_ipi_init(struct mpic *mpic, struct device_node *node)
{
int base_ipi;
- mpic->ipi_domain = irq_domain_create_linear(of_node_to_fwnode(node), IPI_DOORBELL_NR,
+ mpic->ipi_domain = irq_domain_create_linear(of_fwnode_handle(node), IPI_DOORBELL_NR,
&mpic_ipi_domain_ops, mpic);
if (WARN_ON(!mpic->ipi_domain))
return -ENOMEM;
@@ -546,7 +546,7 @@ static void mpic_reenable_percpu(struct mpic *mpic)
{
/* Re-enable per-CPU interrupts that were enabled before suspend */
for (irq_hw_number_t i = 0; i < MPIC_PER_CPU_IRQS_NR; i++) {
- unsigned int virq = irq_linear_revmap(mpic->domain, i);
+ unsigned int virq = irq_find_mapping(mpic->domain, i);
struct irq_data *d;
if (!virq || !irq_percpu_is_enabled(virq))
@@ -740,7 +740,7 @@ static void mpic_resume(void)
/* Re-enable interrupts */
for (irq_hw_number_t i = 0; i < mpic->domain->hwirq_max; i++) {
- unsigned int virq = irq_linear_revmap(mpic->domain, i);
+ unsigned int virq = irq_find_mapping(mpic->domain, i);
struct irq_data *d;
if (!virq)
@@ -861,7 +861,7 @@ static int __init mpic_of_init(struct device_node *node, struct device_node *par
if (!mpic_is_ipi_available(mpic))
nr_irqs = MPIC_PER_CPU_IRQS_NR;
- mpic->domain = irq_domain_add_linear(node, nr_irqs, &mpic_irq_ops, mpic);
+ mpic->domain = irq_domain_create_linear(of_fwnode_handle(node), nr_irqs, &mpic_irq_ops, mpic);
if (!mpic->domain) {
pr_err("%pOF: Unable to add IRQ domain\n", node);
return -ENOMEM;
diff --git a/drivers/irqchip/irq-aspeed-i2c-ic.c b/drivers/irqchip/irq-aspeed-i2c-ic.c
index 9c9fc3e2967e..87c1feb999ff 100644
--- a/drivers/irqchip/irq-aspeed-i2c-ic.c
+++ b/drivers/irqchip/irq-aspeed-i2c-ic.c
@@ -82,7 +82,7 @@ static int __init aspeed_i2c_ic_of_init(struct device_node *node,
goto err_iounmap;
}
- i2c_ic->irq_domain = irq_domain_add_linear(node, ASPEED_I2C_IC_NUM_BUS,
+ i2c_ic->irq_domain = irq_domain_create_linear(of_fwnode_handle(node), ASPEED_I2C_IC_NUM_BUS,
&aspeed_i2c_ic_irq_domain_ops,
NULL);
if (!i2c_ic->irq_domain) {
diff --git a/drivers/irqchip/irq-aspeed-intc.c b/drivers/irqchip/irq-aspeed-intc.c
index bd3b759b4b2c..8330221799a0 100644
--- a/drivers/irqchip/irq-aspeed-intc.c
+++ b/drivers/irqchip/irq-aspeed-intc.c
@@ -102,7 +102,7 @@ static int __init aspeed_intc_ic_of_init(struct device_node *node,
writel(0xffffffff, intc_ic->base + INTC_INT_STATUS_REG);
writel(0x0, intc_ic->base + INTC_INT_ENABLE_REG);
- intc_ic->irq_domain = irq_domain_add_linear(node, INTC_IRQS_PER_WORD,
+ intc_ic->irq_domain = irq_domain_create_linear(of_fwnode_handle(node), INTC_IRQS_PER_WORD,
&aspeed_intc_ic_irq_domain_ops, intc_ic);
if (!intc_ic->irq_domain) {
ret = -ENOMEM;
diff --git a/drivers/irqchip/irq-aspeed-scu-ic.c b/drivers/irqchip/irq-aspeed-scu-ic.c
index 94a7223e95df..1c7045467c48 100644
--- a/drivers/irqchip/irq-aspeed-scu-ic.c
+++ b/drivers/irqchip/irq-aspeed-scu-ic.c
@@ -165,7 +165,7 @@ static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic,
goto err;
}
- scu_ic->irq_domain = irq_domain_add_linear(node, scu_ic->num_irqs,
+ scu_ic->irq_domain = irq_domain_create_linear(of_fwnode_handle(node), scu_ic->num_irqs,
&aspeed_scu_ic_domain_ops,
scu_ic);
if (!scu_ic->irq_domain) {
diff --git a/drivers/irqchip/irq-aspeed-vic.c b/drivers/irqchip/irq-aspeed-vic.c
index 62ccf2c0c414..9b665b5bb531 100644
--- a/drivers/irqchip/irq-aspeed-vic.c
+++ b/drivers/irqchip/irq-aspeed-vic.c
@@ -211,8 +211,8 @@ static int __init avic_of_init(struct device_node *node,
set_handle_irq(avic_handle_irq);
/* Register our domain */
- vic->dom = irq_domain_add_simple(node, NUM_IRQS, 0,
- &avic_dom_ops, vic);
+ vic->dom = irq_domain_create_simple(of_fwnode_handle(node), NUM_IRQS, 0,
+ &avic_dom_ops, vic);
return 0;
}
diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c
index 92f001a5ff8d..268cc18b781f 100644
--- a/drivers/irqchip/irq-ath79-misc.c
+++ b/drivers/irqchip/irq-ath79-misc.c
@@ -147,7 +147,7 @@ static int __init ath79_misc_intc_of_init(
return -ENOMEM;
}
- domain = irq_domain_add_linear(node, ATH79_MISC_IRQ_COUNT,
+ domain = irq_domain_create_linear(of_fwnode_handle(node), ATH79_MISC_IRQ_COUNT,
&misc_irq_domain_ops, base);
if (!domain) {
pr_err("Failed to add MISC irqdomain\n");
@@ -188,7 +188,7 @@ void __init ath79_misc_irq_init(void __iomem *regs, int irq,
else
ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
- domain = irq_domain_add_legacy(NULL, ATH79_MISC_IRQ_COUNT,
+ domain = irq_domain_create_legacy(NULL, ATH79_MISC_IRQ_COUNT,
irq_base, 0, &misc_irq_domain_ops, regs);
if (!domain)
panic("Failed to create MISC irqdomain");
diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c
index 4525366d16d6..3cad30a40c19 100644
--- a/drivers/irqchip/irq-atmel-aic-common.c
+++ b/drivers/irqchip/irq-atmel-aic-common.c
@@ -228,7 +228,7 @@ struct irq_domain *__init aic_common_of_init(struct device_node *node,
goto err_iounmap;
}
- domain = irq_domain_add_linear(node, nchips * 32, ops, aic);
+ domain = irq_domain_create_linear(of_fwnode_handle(node), nchips * 32, ops, aic);
if (!domain) {
ret = -ENOMEM;
goto err_free_aic;
diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c
index 3839ad79ad31..03aeed39a4d2 100644
--- a/drivers/irqchip/irq-atmel-aic.c
+++ b/drivers/irqchip/irq-atmel-aic.c
@@ -78,9 +78,8 @@ static int aic_retrigger(struct irq_data *d)
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
/* Enable interrupt on AIC5 */
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, d->mask, AT91_AIC_ISCR);
- irq_gc_unlock(gc);
return 1;
}
@@ -106,30 +105,27 @@ static void aic_suspend(struct irq_data *d)
{
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IDCR);
irq_reg_writel(gc, gc->wake_active, AT91_AIC_IECR);
- irq_gc_unlock(gc);
}
static void aic_resume(struct irq_data *d)
{
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, gc->wake_active, AT91_AIC_IDCR);
irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IECR);
- irq_gc_unlock(gc);
}
static void aic_pm_shutdown(struct irq_data *d)
{
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, 0xffffffff, AT91_AIC_IDCR);
irq_reg_writel(gc, 0xffffffff, AT91_AIC_ICCR);
- irq_gc_unlock(gc);
}
#else
#define aic_suspend NULL
@@ -175,10 +171,8 @@ static int aic_irq_domain_xlate(struct irq_domain *d,
{
struct irq_domain_chip_generic *dgc = d->gc;
struct irq_chip_generic *gc;
- unsigned long flags;
unsigned smr;
- int idx;
- int ret;
+ int idx, ret;
if (!dgc)
return -EINVAL;
@@ -194,11 +188,10 @@ static int aic_irq_domain_xlate(struct irq_domain *d,
gc = dgc->gc[idx];
- irq_gc_lock_irqsave(gc, flags);
+ guard(raw_spinlock_irq)(&gc->lock);
smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq));
aic_common_set_priority(intspec[2], &smr);
irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq));
- irq_gc_unlock_irqrestore(gc, flags);
return ret;
}
diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c
index e98c2875af9e..60b00d2c3d7a 100644
--- a/drivers/irqchip/irq-atmel-aic5.c
+++ b/drivers/irqchip/irq-atmel-aic5.c
@@ -92,11 +92,10 @@ static void aic5_mask(struct irq_data *d)
* Disable interrupt on AIC5. We always take the lock of the
* first irq chip as all chips share the same registers.
*/
- irq_gc_lock(bgc);
+ guard(raw_spinlock)(&bgc->lock);
irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
irq_reg_writel(gc, 1, AT91_AIC5_IDCR);
gc->mask_cache &= ~d->mask;
- irq_gc_unlock(bgc);
}
static void aic5_unmask(struct irq_data *d)
@@ -109,11 +108,10 @@ static void aic5_unmask(struct irq_data *d)
* Enable interrupt on AIC5. We always take the lock of the
* first irq chip as all chips share the same registers.
*/
- irq_gc_lock(bgc);
+ guard(raw_spinlock)(&bgc->lock);
irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
irq_reg_writel(gc, 1, AT91_AIC5_IECR);
gc->mask_cache |= d->mask;
- irq_gc_unlock(bgc);
}
static int aic5_retrigger(struct irq_data *d)
@@ -122,11 +120,9 @@ static int aic5_retrigger(struct irq_data *d)
struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0);
/* Enable interrupt on AIC5 */
- irq_gc_lock(bgc);
+ guard(raw_spinlock)(&bgc->lock);
irq_reg_writel(bgc, d->hwirq, AT91_AIC5_SSR);
irq_reg_writel(bgc, 1, AT91_AIC5_ISCR);
- irq_gc_unlock(bgc);
-
return 1;
}
@@ -137,14 +133,12 @@ static int aic5_set_type(struct irq_data *d, unsigned type)
unsigned int smr;
int ret;
- irq_gc_lock(bgc);
+ guard(raw_spinlock)(&bgc->lock);
irq_reg_writel(bgc, d->hwirq, AT91_AIC5_SSR);
smr = irq_reg_readl(bgc, AT91_AIC5_SMR);
ret = aic_common_set_type(d, type, &smr);
if (!ret)
irq_reg_writel(bgc, smr, AT91_AIC5_SMR);
- irq_gc_unlock(bgc);
-
return ret;
}
@@ -166,7 +160,7 @@ static void aic5_suspend(struct irq_data *d)
smr_cache[i] = irq_reg_readl(bgc, AT91_AIC5_SMR);
}
- irq_gc_lock(bgc);
+ guard(raw_spinlock)(&bgc->lock);
for (i = 0; i < dgc->irqs_per_chip; i++) {
mask = 1 << i;
if ((mask & gc->mask_cache) == (mask & gc->wake_active))
@@ -178,7 +172,6 @@ static void aic5_suspend(struct irq_data *d)
else
irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
}
- irq_gc_unlock(bgc);
}
static void aic5_resume(struct irq_data *d)
@@ -190,7 +183,7 @@ static void aic5_resume(struct irq_data *d)
int i;
u32 mask;
- irq_gc_lock(bgc);
+ guard(raw_spinlock)(&bgc->lock);
if (smr_cache) {
irq_reg_writel(bgc, 0xffffffff, AT91_AIC5_SPU);
@@ -214,7 +207,6 @@ static void aic5_resume(struct irq_data *d)
else
irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
}
- irq_gc_unlock(bgc);
}
static void aic5_pm_shutdown(struct irq_data *d)
@@ -225,13 +217,12 @@ static void aic5_pm_shutdown(struct irq_data *d)
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
int i;
- irq_gc_lock(bgc);
+ guard(raw_spinlock)(&bgc->lock);
for (i = 0; i < dgc->irqs_per_chip; i++) {
irq_reg_writel(bgc, i + gc->irq_base, AT91_AIC5_SSR);
irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
irq_reg_writel(bgc, 1, AT91_AIC5_ICCR);
}
- irq_gc_unlock(bgc);
}
#else
#define aic5_suspend NULL
@@ -277,7 +268,6 @@ static int aic5_irq_domain_xlate(struct irq_domain *d,
unsigned int *out_type)
{
struct irq_chip_generic *bgc = irq_get_domain_generic_chip(d, 0);
- unsigned long flags;
unsigned smr;
int ret;
@@ -289,13 +279,11 @@ static int aic5_irq_domain_xlate(struct irq_domain *d,
if (ret)
return ret;
- irq_gc_lock_irqsave(bgc, flags);
+ guard(raw_spinlock_irq)(&bgc->lock);
irq_reg_writel(bgc, *out_hwirq, AT91_AIC5_SSR);
smr = irq_reg_readl(bgc, AT91_AIC5_SMR);
aic_common_set_priority(intspec[2], &smr);
irq_reg_writel(bgc, smr, AT91_AIC5_SMR);
- irq_gc_unlock_irqrestore(bgc, flags);
-
return ret;
}
diff --git a/drivers/irqchip/irq-bcm2712-mip.c b/drivers/irqchip/irq-bcm2712-mip.c
index 4cce24233f0f..63de5ef6cf2d 100644
--- a/drivers/irqchip/irq-bcm2712-mip.c
+++ b/drivers/irqchip/irq-bcm2712-mip.c
@@ -11,7 +11,7 @@
#include <linux/of_address.h>
#include <linux/of_platform.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
#define MIP_INT_RAISE 0x00
#define MIP_INT_CLEAR 0x10
@@ -174,8 +174,8 @@ static int mip_init_domains(struct mip_priv *mip, struct device_node *np)
{
struct irq_domain *middle;
- middle = irq_domain_add_hierarchy(mip->parent, 0, mip->num_msis, np,
- &mip_middle_domain_ops, mip);
+ middle = irq_domain_create_hierarchy(mip->parent, 0, mip->num_msis, of_fwnode_handle(np),
+ &mip_middle_domain_ops, mip);
if (!middle)
return -ENOMEM;
diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c
index 6c20604c2242..1e384c870350 100644
--- a/drivers/irqchip/irq-bcm2835.c
+++ b/drivers/irqchip/irq-bcm2835.c
@@ -144,7 +144,7 @@ static int __init armctrl_of_init(struct device_node *node,
if (!base)
panic("%pOF: unable to map IC registers\n", node);
- intc.domain = irq_domain_add_linear(node, MAKE_HWIRQ(NR_BANKS, 0),
+ intc.domain = irq_domain_create_linear(of_fwnode_handle(node), MAKE_HWIRQ(NR_BANKS, 0),
&armctrl_ops, NULL);
if (!intc.domain)
panic("%pOF: unable to create IRQ domain\n", node);
diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c
index e366257684b5..fafd1f71348e 100644
--- a/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c
@@ -325,7 +325,7 @@ static int __init bcm2836_arm_irqchip_l1_intc_of_init(struct device_node *node,
bcm2835_init_local_timer_frequency();
- intc.domain = irq_domain_add_linear(node, LAST_IRQ + 1,
+ intc.domain = irq_domain_create_linear(of_fwnode_handle(node), LAST_IRQ + 1,
&bcm2836_arm_irqchip_intc_ops,
NULL);
if (!intc.domain)
diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c
index 90daa274ef23..ca4e141c5bc2 100644
--- a/drivers/irqchip/irq-bcm6345-l1.c
+++ b/drivers/irqchip/irq-bcm6345-l1.c
@@ -316,7 +316,7 @@ static int __init bcm6345_l1_of_init(struct device_node *dn,
raw_spin_lock_init(&intc->lock);
- intc->domain = irq_domain_add_linear(dn, IRQS_PER_WORD * intc->n_words,
+ intc->domain = irq_domain_create_linear(of_fwnode_handle(dn), IRQS_PER_WORD * intc->n_words,
&bcm6345_l1_domain_ops,
intc);
if (!intc->domain) {
diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c
index 36e71af054e9..04fac0cc857f 100644
--- a/drivers/irqchip/irq-bcm7038-l1.c
+++ b/drivers/irqchip/irq-bcm7038-l1.c
@@ -416,7 +416,7 @@ static int __init bcm7038_l1_of_init(struct device_node *dn,
}
}
- intc->domain = irq_domain_add_linear(dn, IRQS_PER_WORD * intc->n_words,
+ intc->domain = irq_domain_create_linear(of_fwnode_handle(dn), IRQS_PER_WORD * intc->n_words,
&bcm7038_l1_domain_ops,
intc);
if (!intc->domain) {
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index 1e9dab6e0d86..ff22c3104401 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -63,16 +63,15 @@ static void bcm7120_l2_intc_irq_handle(struct irq_desc *desc)
for (idx = 0; idx < b->n_words; idx++) {
int base = idx * IRQS_PER_WORD;
- struct irq_chip_generic *gc =
- irq_get_domain_generic_chip(b->domain, base);
+ struct irq_chip_generic *gc;
unsigned long pending;
int hwirq;
- irq_gc_lock(gc);
- pending = irq_reg_readl(gc, b->stat_offset[idx]) &
- gc->mask_cache &
- data->irq_map_mask[idx];
- irq_gc_unlock(gc);
+ gc = irq_get_domain_generic_chip(b->domain, base);
+ scoped_guard (raw_spinlock, &gc->lock) {
+ pending = irq_reg_readl(gc, b->stat_offset[idx]) & gc->mask_cache &
+ data->irq_map_mask[idx];
+ }
for_each_set_bit(hwirq, &pending, IRQS_PER_WORD)
generic_handle_domain_irq(b->domain, base + hwirq);
@@ -86,11 +85,9 @@ static void bcm7120_l2_intc_suspend(struct irq_chip_generic *gc)
struct bcm7120_l2_intc_data *b = gc->private;
struct irq_chip_type *ct = gc->chip_types;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
if (b->can_wake)
- irq_reg_writel(gc, gc->mask_cache | gc->wake_active,
- ct->regs.mask);
- irq_gc_unlock(gc);
+ irq_reg_writel(gc, gc->mask_cache | gc->wake_active, ct->regs.mask);
}
static void bcm7120_l2_intc_resume(struct irq_chip_generic *gc)
@@ -98,9 +95,8 @@ static void bcm7120_l2_intc_resume(struct irq_chip_generic *gc)
struct irq_chip_type *ct = gc->chip_types;
/* Restore the saved mask */
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, gc->mask_cache, ct->regs.mask);
- irq_gc_unlock(gc);
}
static int bcm7120_l2_intc_init_one(struct device_node *dn,
@@ -264,7 +260,7 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn,
goto out_free_l1_data;
}
- data->domain = irq_domain_add_linear(dn, IRQS_PER_WORD * data->n_words,
+ data->domain = irq_domain_create_linear(of_fwnode_handle(dn), IRQS_PER_WORD * data->n_words,
&irq_generic_chip_ops, NULL);
if (!data->domain) {
ret = -ENOMEM;
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index db4c9721fcf2..1bec5b2cd3f0 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -97,9 +97,8 @@ static void __brcmstb_l2_intc_suspend(struct irq_data *d, bool save)
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
struct irq_chip_type *ct = irq_data_get_chip_type(d);
struct brcmstb_l2_intc_data *b = gc->private;
- unsigned long flags;
- irq_gc_lock_irqsave(gc, flags);
+ guard(raw_spinlock_irqsave)(&gc->lock);
/* Save the current mask */
if (save)
b->saved_mask = irq_reg_readl(gc, ct->regs.mask);
@@ -109,7 +108,6 @@ static void __brcmstb_l2_intc_suspend(struct irq_data *d, bool save)
irq_reg_writel(gc, ~gc->wake_active, ct->regs.disable);
irq_reg_writel(gc, gc->wake_active, ct->regs.enable);
}
- irq_gc_unlock_irqrestore(gc, flags);
}
static void brcmstb_l2_intc_shutdown(struct irq_data *d)
@@ -127,9 +125,8 @@ static void brcmstb_l2_intc_resume(struct irq_data *d)
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
struct irq_chip_type *ct = irq_data_get_chip_type(d);
struct brcmstb_l2_intc_data *b = gc->private;
- unsigned long flags;
- irq_gc_lock_irqsave(gc, flags);
+ guard(raw_spinlock_irqsave)(&gc->lock);
if (ct->chip.irq_ack) {
/* Clear unmasked non-wakeup interrupts */
irq_reg_writel(gc, ~b->saved_mask & ~gc->wake_active,
@@ -139,7 +136,6 @@ static void brcmstb_l2_intc_resume(struct irq_data *d)
/* Restore the saved mask */
irq_reg_writel(gc, b->saved_mask, ct->regs.disable);
irq_reg_writel(gc, ~b->saved_mask, ct->regs.enable);
- irq_gc_unlock_irqrestore(gc, flags);
}
static int __init brcmstb_l2_intc_of_init(struct device_node *np,
@@ -182,7 +178,7 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np,
goto out_unmap;
}
- data->domain = irq_domain_add_linear(np, 32,
+ data->domain = irq_domain_create_linear(of_fwnode_handle(np), 32,
&irq_generic_chip_ops, NULL);
if (!data->domain) {
ret = -ENOMEM;
diff --git a/drivers/irqchip/irq-clps711x.c b/drivers/irqchip/irq-clps711x.c
index 48c73c948ddf..c4b73ba2323b 100644
--- a/drivers/irqchip/irq-clps711x.c
+++ b/drivers/irqchip/irq-clps711x.c
@@ -184,8 +184,8 @@ static int __init _clps711x_intc_init(struct device_node *np,
clps711x_intc->ops.map = clps711x_intc_irq_map;
clps711x_intc->ops.xlate = irq_domain_xlate_onecell;
clps711x_intc->domain =
- irq_domain_add_legacy(np, ARRAY_SIZE(clps711x_irqs),
- 0, 0, &clps711x_intc->ops, NULL);
+ irq_domain_create_legacy(of_fwnode_handle(np), ARRAY_SIZE(clps711x_irqs), 0, 0,
+ &clps711x_intc->ops, NULL);
if (!clps711x_intc->domain) {
err = -ENOMEM;
goto out_irqfree;
diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index a05a7501e107..66bb39e24a52 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -351,10 +351,8 @@ static int __init irqcrossbar_init(struct device_node *node,
if (err)
return err;
- domain = irq_domain_add_hierarchy(parent_domain, 0,
- cb->max_crossbar_sources,
- node, &crossbar_domain_ops,
- NULL);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, cb->max_crossbar_sources,
+ of_fwnode_handle(node), &crossbar_domain_ops, NULL);
if (!domain) {
pr_err("%pOF: failed to allocated domain\n", node);
return -ENOMEM;
diff --git a/drivers/irqchip/irq-csky-apb-intc.c b/drivers/irqchip/irq-csky-apb-intc.c
index 6710691e4c25..5b7150705d29 100644
--- a/drivers/irqchip/irq-csky-apb-intc.c
+++ b/drivers/irqchip/irq-csky-apb-intc.c
@@ -50,11 +50,10 @@ static void irq_ck_mask_set_bit(struct irq_data *d)
unsigned long ifr = ct->regs.mask - 8;
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
*ct->mask_cache |= mask;
irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask);
irq_reg_writel(gc, irq_reg_readl(gc, ifr) & ~mask, ifr);
- irq_gc_unlock(gc);
}
static void __init ck_set_gc(struct device_node *node, void __iomem *reg_base,
@@ -114,7 +113,7 @@ ck_intc_init_comm(struct device_node *node, struct device_node *parent)
return -EINVAL;
}
- root_domain = irq_domain_add_linear(node, nr_irq,
+ root_domain = irq_domain_create_linear(of_fwnode_handle(node), nr_irq,
&irq_generic_chip_ops, NULL);
if (!root_domain) {
pr_err("C-SKY Intc irq_domain_add failed.\n");
diff --git a/drivers/irqchip/irq-csky-mpintc.c b/drivers/irqchip/irq-csky-mpintc.c
index 4aebd67d4f8f..1d1f5091f26f 100644
--- a/drivers/irqchip/irq-csky-mpintc.c
+++ b/drivers/irqchip/irq-csky-mpintc.c
@@ -255,7 +255,7 @@ csky_mpintc_init(struct device_node *node, struct device_node *parent)
writel_relaxed(BIT(0), INTCG_base + INTCG_ICTLR);
}
- root_domain = irq_domain_add_linear(node, nr_irq, &csky_irqdomain_ops,
+ root_domain = irq_domain_create_linear(of_fwnode_handle(node), nr_irq, &csky_irqdomain_ops,
NULL);
if (!root_domain)
return -ENXIO;
diff --git a/drivers/irqchip/irq-davinci-cp-intc.c b/drivers/irqchip/irq-davinci-cp-intc.c
index d7948c55f542..00cdcc90f614 100644
--- a/drivers/irqchip/irq-davinci-cp-intc.c
+++ b/drivers/irqchip/irq-davinci-cp-intc.c
@@ -204,8 +204,10 @@ static int __init davinci_cp_intc_do_init(struct resource *res, unsigned int num
return irq_base;
}
- davinci_cp_intc_irq_domain = irq_domain_add_legacy(node, num_irqs, irq_base, 0,
- &davinci_cp_intc_irq_domain_ops, NULL);
+ davinci_cp_intc_irq_domain = irq_domain_create_legacy(of_fwnode_handle(node), num_irqs,
+ irq_base, 0,
+ &davinci_cp_intc_irq_domain_ops,
+ NULL);
if (!davinci_cp_intc_irq_domain) {
pr_err("%s: unable to create an interrupt domain\n", __func__);
diff --git a/drivers/irqchip/irq-digicolor.c b/drivers/irqchip/irq-digicolor.c
index 3b0d78aac13b..eb5a8de82751 100644
--- a/drivers/irqchip/irq-digicolor.c
+++ b/drivers/irqchip/irq-digicolor.c
@@ -95,7 +95,7 @@ static int __init digicolor_of_init(struct device_node *node,
regmap_write(ucregs, UC_IRQ_CONTROL, 1);
digicolor_irq_domain =
- irq_domain_add_linear(node, 64, &irq_generic_chip_ops, NULL);
+ irq_domain_create_linear(of_fwnode_handle(node), 64, &irq_generic_chip_ops, NULL);
if (!digicolor_irq_domain) {
pr_err("%pOF: unable to create IRQ domain\n", node);
return -ENOMEM;
diff --git a/drivers/irqchip/irq-dw-apb-ictl.c b/drivers/irqchip/irq-dw-apb-ictl.c
index d5c1c750c8d2..4240a0dbf627 100644
--- a/drivers/irqchip/irq-dw-apb-ictl.c
+++ b/drivers/irqchip/irq-dw-apb-ictl.c
@@ -101,10 +101,9 @@ static void dw_apb_ictl_resume(struct irq_data *d)
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
struct irq_chip_type *ct = irq_data_get_chip_type(d);
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
writel_relaxed(~0, gc->reg_base + ct->regs.enable);
writel_relaxed(*ct->mask_cache, gc->reg_base + ct->regs.mask);
- irq_gc_unlock(gc);
}
#else
#define dw_apb_ictl_resume NULL
@@ -173,7 +172,7 @@ static int __init dw_apb_ictl_init(struct device_node *np,
else
nrirqs = fls(readl_relaxed(iobase + APB_INT_ENABLE_L));
- domain = irq_domain_add_linear(np, nrirqs, domain_ops, NULL);
+ domain = irq_domain_create_linear(of_fwnode_handle(np), nrirqs, domain_ops, NULL);
if (!domain) {
pr_err("%pOF: unable to add irq domain\n", np);
ret = -ENOMEM;
diff --git a/drivers/irqchip/irq-econet-en751221.c b/drivers/irqchip/irq-econet-en751221.c
new file mode 100644
index 000000000000..d83d5eb12795
--- /dev/null
+++ b/drivers/irqchip/irq-econet-en751221.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * EN751221 Interrupt Controller Driver.
+ *
+ * The EcoNet EN751221 Interrupt Controller is a simple interrupt controller
+ * designed for the MIPS 34Kc MT SMP processor with 2 VPEs. Each interrupt can
+ * be routed to either VPE but not both, so to support per-CPU interrupts, a
+ * secondary IRQ number is allocated to control masking/unmasking on VPE#1. In
+ * this driver, these are called "shadow interrupts". The assignment of shadow
+ * interrupts is defined by the SoC integrator when wiring the interrupt lines,
+ * so they are configurable in the device tree.
+ *
+ * If an interrupt (say 30) needs per-CPU capability, the SoC integrator
+ * allocates another IRQ number (say 29) to be its shadow. The device tree
+ * reflects this by adding the pair <30 29> to the "econet,shadow-interrupts"
+ * property.
+ *
+ * When VPE#1 requests IRQ 30, the driver manipulates the mask bit for IRQ 29,
+ * telling the hardware to mask VPE#1's view of IRQ 30.
+ *
+ * Copyright (C) 2025 Caleb James DeLisle <cjd@cjdns.fr>
+ */
+
+#include <linux/cleanup.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+
+#define IRQ_COUNT 40
+
+#define NOT_PERCPU 0xff
+#define IS_SHADOW 0xfe
+
+#define REG_MASK0 0x04
+#define REG_MASK1 0x50
+#define REG_PENDING0 0x08
+#define REG_PENDING1 0x54
+
+/**
+ * @membase: Base address of the interrupt controller registers
+ * @interrupt_shadows: Array of all interrupts, for each value,
+ * - NOT_PERCPU: This interrupt is not per-cpu, so it has no shadow
+ * - IS_SHADOW: This interrupt is a shadow of another per-cpu interrupt
+ * - else: This is a per-cpu interrupt whose shadow is the value
+ */
+static struct {
+ void __iomem *membase;
+ u8 interrupt_shadows[IRQ_COUNT];
+} econet_intc __ro_after_init;
+
+static DEFINE_RAW_SPINLOCK(irq_lock);
+
+/* IRQs must be disabled */
+static void econet_wreg(u32 reg, u32 val, u32 mask)
+{
+ u32 v;
+
+ guard(raw_spinlock)(&irq_lock);
+
+ v = ioread32(econet_intc.membase + reg);
+ v &= ~mask;
+ v |= val & mask;
+ iowrite32(v, econet_intc.membase + reg);
+}
+
+/* IRQs must be disabled */
+static void econet_chmask(u32 hwirq, bool unmask)
+{
+ u32 reg, mask;
+ u8 shadow;
+
+ /*
+ * If the IRQ is a shadow, it should never be manipulated directly.
+ * It should only be masked/unmasked as a result of the "real" per-cpu
+ * irq being manipulated by a thread running on VPE#1.
+ * If it is per-cpu (has a shadow), and we're on VPE#1, the shadow is what we mask.
+ * This is single processor only, so smp_processor_id() never exceeds 1.
+ */
+ shadow = econet_intc.interrupt_shadows[hwirq];
+ if (WARN_ON_ONCE(shadow == IS_SHADOW))
+ return;
+ else if (shadow != NOT_PERCPU && smp_processor_id() == 1)
+ hwirq = shadow;
+
+ if (hwirq >= 32) {
+ reg = REG_MASK1;
+ mask = BIT(hwirq - 32);
+ } else {
+ reg = REG_MASK0;
+ mask = BIT(hwirq);
+ }
+
+ econet_wreg(reg, unmask ? mask : 0, mask);
+}
+
+/* IRQs must be disabled */
+static void econet_intc_mask(struct irq_data *d)
+{
+ econet_chmask(d->hwirq, false);
+}
+
+/* IRQs must be disabled */
+static void econet_intc_unmask(struct irq_data *d)
+{
+ econet_chmask(d->hwirq, true);
+}
+
+static void econet_mask_all(void)
+{
+ /* IRQs are generally disabled during init, but guarding here makes it non-obligatory. */
+ guard(irqsave)();
+ econet_wreg(REG_MASK0, 0, ~0);
+ econet_wreg(REG_MASK1, 0, ~0);
+}
+
+static void econet_intc_handle_pending(struct irq_domain *d, u32 pending, u32 offset)
+{
+ int hwirq;
+
+ while (pending) {
+ hwirq = fls(pending) - 1;
+ generic_handle_domain_irq(d, hwirq + offset);
+ pending &= ~BIT(hwirq);
+ }
+}
+
+static void econet_intc_from_parent(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct irq_domain *domain;
+ u32 pending0, pending1;
+
+ chained_irq_enter(chip, desc);
+
+ pending0 = ioread32(econet_intc.membase + REG_PENDING0);
+ pending1 = ioread32(econet_intc.membase + REG_PENDING1);
+
+ if (unlikely(!(pending0 | pending1))) {
+ spurious_interrupt();
+ } else {
+ domain = irq_desc_get_handler_data(desc);
+ econet_intc_handle_pending(domain, pending0, 0);
+ econet_intc_handle_pending(domain, pending1, 32);
+ }
+
+ chained_irq_exit(chip, desc);
+}
+
+static const struct irq_chip econet_irq_chip;
+
+static int econet_intc_map(struct irq_domain *d, u32 irq, irq_hw_number_t hwirq)
+{
+ int ret;
+
+ if (hwirq >= IRQ_COUNT) {
+ pr_err("%s: hwirq %lu out of range\n", __func__, hwirq);
+ return -EINVAL;
+ } else if (econet_intc.interrupt_shadows[hwirq] == IS_SHADOW) {
+ pr_err("%s: can't map hwirq %lu, it is a shadow interrupt\n", __func__, hwirq);
+ return -EINVAL;
+ }
+
+ if (econet_intc.interrupt_shadows[hwirq] == NOT_PERCPU) {
+ irq_set_chip_and_handler(irq, &econet_irq_chip, handle_level_irq);
+ } else {
+ irq_set_chip_and_handler(irq, &econet_irq_chip, handle_percpu_devid_irq);
+ ret = irq_set_percpu_devid(irq);
+ if (ret)
+ pr_warn("%s: Failed irq_set_percpu_devid for %u: %d\n", d->name, irq, ret);
+ }
+
+ irq_set_chip_data(irq, NULL);
+ return 0;
+}
+
+static const struct irq_chip econet_irq_chip = {
+ .name = "en751221-intc",
+ .irq_unmask = econet_intc_unmask,
+ .irq_mask = econet_intc_mask,
+ .irq_mask_ack = econet_intc_mask,
+};
+
+static const struct irq_domain_ops econet_domain_ops = {
+ .xlate = irq_domain_xlate_onecell,
+ .map = econet_intc_map
+};
+
+static int __init get_shadow_interrupts(struct device_node *node)
+{
+ const char *field = "econet,shadow-interrupts";
+ int num_shadows;
+
+ num_shadows = of_property_count_u32_elems(node, field);
+
+ memset(econet_intc.interrupt_shadows, NOT_PERCPU,
+ sizeof(econet_intc.interrupt_shadows));
+
+ if (num_shadows <= 0) {
+ return 0;
+ } else if (num_shadows % 2) {
+ pr_err("%pOF: %s count is odd, ignoring\n", node, field);
+ return 0;
+ }
+
+ u32 *shadows __free(kfree) = kmalloc_array(num_shadows, sizeof(u32), GFP_KERNEL);
+ if (!shadows)
+ return -ENOMEM;
+
+ if (of_property_read_u32_array(node, field, shadows, num_shadows)) {
+ pr_err("%pOF: Failed to read %s\n", node, field);
+ return -EINVAL;
+ }
+
+ for (int i = 0; i < num_shadows; i += 2) {
+ u32 shadow = shadows[i + 1];
+ u32 target = shadows[i];
+
+ if (shadow > IRQ_COUNT) {
+ pr_err("%pOF: %s[%d] shadow(%d) out of range\n",
+ node, field, i + 1, shadow);
+ continue;
+ }
+
+ if (target >= IRQ_COUNT) {
+ pr_err("%pOF: %s[%d] target(%d) out of range\n", node, field, i, target);
+ continue;
+ }
+
+ if (econet_intc.interrupt_shadows[target] != NOT_PERCPU) {
+ pr_err("%pOF: %s[%d] target(%d) already has a shadow\n",
+ node, field, i, target);
+ continue;
+ }
+
+ if (econet_intc.interrupt_shadows[shadow] != NOT_PERCPU) {
+ pr_err("%pOF: %s[%d] shadow(%d) already has a target\n",
+ node, field, i + 1, shadow);
+ continue;
+ }
+
+ econet_intc.interrupt_shadows[target] = shadow;
+ econet_intc.interrupt_shadows[shadow] = IS_SHADOW;
+ }
+
+ return 0;
+}
+
+static int __init econet_intc_of_init(struct device_node *node, struct device_node *parent)
+{
+ struct irq_domain *domain;
+ struct resource res;
+ int ret, irq;
+
+ ret = get_shadow_interrupts(node);
+ if (ret)
+ return ret;
+
+ irq = irq_of_parse_and_map(node, 0);
+ if (!irq) {
+ pr_err("%pOF: DT: Failed to get IRQ from 'interrupts'\n", node);
+ return -EINVAL;
+ }
+
+ if (of_address_to_resource(node, 0, &res)) {
+ pr_err("%pOF: DT: Failed to get 'reg'\n", node);
+ ret = -EINVAL;
+ goto err_dispose_mapping;
+ }
+
+ if (!request_mem_region(res.start, resource_size(&res), res.name)) {
+ pr_err("%pOF: Failed to request memory\n", node);
+ ret = -EBUSY;
+ goto err_dispose_mapping;
+ }
+
+ econet_intc.membase = ioremap(res.start, resource_size(&res));
+ if (!econet_intc.membase) {
+ pr_err("%pOF: Failed to remap membase\n", node);
+ ret = -ENOMEM;
+ goto err_release;
+ }
+
+ econet_mask_all();
+
+ domain = irq_domain_create_linear(of_fwnode_handle(node), IRQ_COUNT,
+ &econet_domain_ops, NULL);
+ if (!domain) {
+ pr_err("%pOF: Failed to add irqdomain\n", node);
+ ret = -ENOMEM;
+ goto err_unmap;
+ }
+
+ irq_set_chained_handler_and_data(irq, econet_intc_from_parent, domain);
+
+ return 0;
+
+err_unmap:
+ iounmap(econet_intc.membase);
+err_release:
+ release_mem_region(res.start, resource_size(&res));
+err_dispose_mapping:
+ irq_dispose_mapping(irq);
+ return ret;
+}
+
+IRQCHIP_DECLARE(econet_en751221_intc, "econet,en751221-intc", econet_intc_of_init);
diff --git a/drivers/irqchip/irq-ftintc010.c b/drivers/irqchip/irq-ftintc010.c
index b91c358ea6db..a59a66d79da6 100644
--- a/drivers/irqchip/irq-ftintc010.c
+++ b/drivers/irqchip/irq-ftintc010.c
@@ -180,8 +180,9 @@ static int __init ft010_of_init_irq(struct device_node *node,
writel(0, FT010_IRQ_MASK(f->base));
writel(0, FT010_FIQ_MASK(f->base));
- f->domain = irq_domain_add_simple(node, FT010_NUM_IRQS, 0,
- &ft010_irqdomain_ops, f);
+ f->domain = irq_domain_create_simple(of_fwnode_handle(node),
+ FT010_NUM_IRQS, 0,
+ &ft010_irqdomain_ops, f);
set_handle_irq(ft010_irqchip_handle_irq);
return 0;
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index dc98c39d2b20..24ef5af569fe 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -26,7 +26,7 @@
#include <linux/irqchip/arm-gic.h>
#include <linux/irqchip/arm-gic-common.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
/*
* MSI_TYPER:
@@ -252,7 +252,7 @@ static void __init gicv2m_teardown(void)
static struct msi_parent_ops gicv2m_msi_parent_ops = {
.supported_flags = GICV2M_MSI_FLAGS_SUPPORTED,
.required_flags = GICV2M_MSI_FLAGS_REQUIRED,
- .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI,
.bus_select_token = DOMAIN_BUS_NEXUS,
.bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
.prefix = "GICv2m-",
@@ -261,23 +261,23 @@ static struct msi_parent_ops gicv2m_msi_parent_ops = {
static __init int gicv2m_allocate_domains(struct irq_domain *parent)
{
- struct irq_domain *inner_domain;
+ struct irq_domain_info info = {
+ .ops = &gicv2m_domain_ops,
+ .parent = parent,
+ };
struct v2m_data *v2m;
v2m = list_first_entry_or_null(&v2m_nodes, struct v2m_data, entry);
if (!v2m)
return 0;
- inner_domain = irq_domain_create_hierarchy(parent, 0, 0, v2m->fwnode,
- &gicv2m_domain_ops, v2m);
- if (!inner_domain) {
+ info.host_data = v2m;
+ info.fwnode = v2m->fwnode;
+
+ if (!msi_create_parent_irq_domain(&info, &gicv2m_msi_parent_ops)) {
pr_err("Failed to create GICv2m domain\n");
return -ENOMEM;
}
-
- irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_NEXUS);
- inner_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
- inner_domain->msi_parent_ops = &gicv2m_msi_parent_ops;
return 0;
}
diff --git a/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c b/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c
index 8e87fc35f8aa..11549d85f23b 100644
--- a/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c
@@ -152,7 +152,7 @@ static void __init its_fsl_mc_of_msi_init(void)
if (!of_property_read_bool(np, "msi-controller"))
continue;
- its_fsl_mc_msi_init_one(of_node_to_fwnode(np),
+ its_fsl_mc_msi_init_one(of_fwnode_handle(np),
np->full_name);
}
}
diff --git a/drivers/irqchip/irq-gic-v3-its-msi-parent.c b/drivers/irqchip/irq-gic-v3-its-msi-parent.c
index bdb04c808148..a5e110ffdd88 100644
--- a/drivers/irqchip/irq-gic-v3-its-msi-parent.c
+++ b/drivers/irqchip/irq-gic-v3-its-msi-parent.c
@@ -8,7 +8,7 @@
#include <linux/pci.h>
#include "irq-gic-common.h"
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
#define ITS_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \
MSI_FLAG_USE_DEF_CHIP_OPS | \
@@ -68,17 +68,6 @@ static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev,
info->scratchpad[0].ul = pci_msi_domain_get_msi_rid(domain->parent, pdev);
/*
- * @domain->msi_domain_info->hwsize contains the size of the
- * MSI[-X] domain, but vector allocation happens one by one. This
- * needs some thought when MSI comes into play as the size of MSI
- * might be unknown at domain creation time and therefore set to
- * MSI_MAX_INDEX.
- */
- msi_info = msi_get_domain_info(domain);
- if (msi_info->hwsize > nvec)
- nvec = msi_info->hwsize;
-
- /*
* Always allocate a power of 2, and special case device 0 for
* broken systems where the DevID is not wired (and all devices
* appear as DevID 0). For that reason, we generously allocate a
@@ -118,6 +107,14 @@ static int of_pmsi_get_dev_id(struct irq_domain *domain, struct device *dev,
index++;
} while (!ret);
+ if (ret) {
+ struct device_node *np = NULL;
+
+ ret = of_map_id(dev->of_node, dev->id, "msi-map", "msi-map-mask", &np, dev_id);
+ if (np)
+ of_node_put(np);
+ }
+
return ret;
}
@@ -143,14 +140,6 @@ static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev,
/* ITS specific DeviceID, as the core ITS ignores dev. */
info->scratchpad[0].ul = dev_id;
- /*
- * @domain->msi_domain_info->hwsize contains the size of the device
- * domain, but vector allocation happens one by one.
- */
- msi_info = msi_get_domain_info(domain);
- if (msi_info->hwsize > nvec)
- nvec = msi_info->hwsize;
-
/* Allocate at least 32 MSIs, and always as a power of 2 */
nvec = max_t(int, 32, roundup_pow_of_two(nvec));
@@ -159,6 +148,14 @@ static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev,
dev, nvec, info);
}
+static void its_msi_teardown(struct irq_domain *domain, msi_alloc_info_t *info)
+{
+ struct msi_domain_info *msi_info;
+
+ msi_info = msi_get_domain_info(domain->parent);
+ msi_info->ops->msi_teardown(domain->parent, info);
+}
+
static bool its_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
struct irq_domain *real_parent, struct msi_domain_info *info)
{
@@ -182,6 +179,7 @@ static bool its_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
* %MSI_MAX_INDEX.
*/
info->ops->msi_prepare = its_pci_msi_prepare;
+ info->ops->msi_teardown = its_msi_teardown;
break;
case DOMAIN_BUS_DEVICE_MSI:
case DOMAIN_BUS_WIRED_TO_MSI:
@@ -190,6 +188,7 @@ static bool its_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
* size is also known at domain creation time.
*/
info->ops->msi_prepare = its_pmsi_prepare;
+ info->ops->msi_teardown = its_msi_teardown;
break;
default:
/* Confused. How did the lib return true? */
@@ -203,7 +202,7 @@ static bool its_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
const struct msi_parent_ops gic_v3_its_msi_parent_ops = {
.supported_flags = ITS_MSI_FLAGS_SUPPORTED,
.required_flags = ITS_MSI_FLAGS_REQUIRED,
- .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI,
.bus_select_token = DOMAIN_BUS_NEXUS,
.bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
.prefix = "ITS-",
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 0115ad6c8259..d54fa0638dc4 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -41,7 +41,7 @@
#include <asm/exception.h>
#include "irq-gic-common.h"
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
#define ITS_FLAGS_CMDQ_NEEDS_FLUSHING (1ULL << 0)
#define ITS_FLAGS_WORKAROUND_CAVIUM_22375 (1ULL << 1)
@@ -125,6 +125,8 @@ struct its_node {
int vlpi_redist_offset;
};
+static DEFINE_PER_CPU(struct its_node *, local_4_1_its);
+
#define is_v4(its) (!!((its)->typer & GITS_TYPER_VLPIS))
#define is_v4_1(its) (!!((its)->typer & GITS_TYPER_VMAPP))
#define device_ids(its) (FIELD_GET(GITS_TYPER_DEVBITS, (its)->typer) + 1)
@@ -2778,6 +2780,7 @@ static u64 inherit_vpe_l1_table_from_its(void)
}
val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, GITS_BASER_NR_PAGES(baser) - 1);
+ *this_cpu_ptr(&local_4_1_its) = its;
return val;
}
@@ -2815,6 +2818,7 @@ static u64 inherit_vpe_l1_table_from_rd(cpumask_t **mask)
gic_data_rdist()->vpe_l1_base = gic_data_rdist_cpu(cpu)->vpe_l1_base;
*mask = gic_data_rdist_cpu(cpu)->vpe_table_mask;
+ *this_cpu_ptr(&local_4_1_its) = *per_cpu_ptr(&local_4_1_its, cpu);
return val;
}
@@ -3620,8 +3624,33 @@ out:
return err;
}
+static void its_msi_teardown(struct irq_domain *domain, msi_alloc_info_t *info)
+{
+ struct its_device *its_dev = info->scratchpad[0].ptr;
+
+ guard(mutex)(&its_dev->its->dev_alloc_lock);
+
+ /* If the device is shared, keep everything around */
+ if (its_dev->shared)
+ return;
+
+ /* LPIs should have been already unmapped at this stage */
+ if (WARN_ON_ONCE(!bitmap_empty(its_dev->event_map.lpi_map,
+ its_dev->event_map.nr_lpis)))
+ return;
+
+ its_lpi_free(its_dev->event_map.lpi_map,
+ its_dev->event_map.lpi_base,
+ its_dev->event_map.nr_lpis);
+
+ /* Unmap device/itt, and get rid of the tracking */
+ its_send_mapd(its_dev, 0);
+ its_free_device(its_dev);
+}
+
static struct msi_domain_ops its_msi_domain_ops = {
.msi_prepare = its_msi_prepare,
+ .msi_teardown = its_msi_teardown,
};
static int its_irq_gic_domain_alloc(struct irq_domain *domain,
@@ -3722,7 +3751,6 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
{
struct irq_data *d = irq_domain_get_irq_data(domain, virq);
struct its_device *its_dev = irq_data_get_irq_chip_data(d);
- struct its_node *its = its_dev->its;
int i;
bitmap_release_region(its_dev->event_map.lpi_map,
@@ -3736,26 +3764,6 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
irq_domain_reset_irq_data(data);
}
- mutex_lock(&its->dev_alloc_lock);
-
- /*
- * If all interrupts have been freed, start mopping the
- * floor. This is conditioned on the device not being shared.
- */
- if (!its_dev->shared &&
- bitmap_empty(its_dev->event_map.lpi_map,
- its_dev->event_map.nr_lpis)) {
- its_lpi_free(its_dev->event_map.lpi_map,
- its_dev->event_map.lpi_base,
- its_dev->event_map.nr_lpis);
-
- /* Unmap device/itt */
- its_send_mapd(its_dev, 0);
- its_free_device(its_dev);
- }
-
- mutex_unlock(&its->dev_alloc_lock);
-
irq_domain_free_irqs_parent(domain, virq, nr_irqs);
}
@@ -4180,7 +4188,7 @@ static struct irq_chip its_vpe_irq_chip = {
static struct its_node *find_4_1_its(void)
{
- static struct its_node *its = NULL;
+ struct its_node *its = *this_cpu_ptr(&local_4_1_its);
if (!its) {
list_for_each_entry(its, &its_nodes, entry) {
@@ -5118,7 +5126,12 @@ out_unmap:
static int its_init_domain(struct its_node *its)
{
- struct irq_domain *inner_domain;
+ struct irq_domain_info dom_info = {
+ .fwnode = its->fwnode_handle,
+ .ops = &its_domain_ops,
+ .domain_flags = its->msi_domain_flags,
+ .parent = its_parent,
+ };
struct msi_domain_info *info;
info = kzalloc(sizeof(*info), GFP_KERNEL);
@@ -5127,21 +5140,12 @@ static int its_init_domain(struct its_node *its)
info->ops = &its_msi_domain_ops;
info->data = its;
+ dom_info.host_data = info;
- inner_domain = irq_domain_create_hierarchy(its_parent,
- its->msi_domain_flags, 0,
- its->fwnode_handle, &its_domain_ops,
- info);
- if (!inner_domain) {
+ if (!msi_create_parent_irq_domain(&dom_info, &gic_v3_its_msi_parent_ops)) {
kfree(info);
return -ENOMEM;
}
-
- irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_NEXUS);
-
- inner_domain->msi_parent_ops = &gic_v3_its_msi_parent_ops;
- inner_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
-
return 0;
}
@@ -5518,7 +5522,7 @@ static struct its_node __init *its_node_init(struct resource *res,
its->base = its_base;
its->phys_base = res->start;
its->get_msi_base = its_irq_get_msi_base;
- its->msi_domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI;
+ its->msi_domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI | IRQ_DOMAIN_FLAG_MSI_IMMUTABLE;
its->numa_node = numa_node;
its->fwnode_handle = handle;
diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c
index 34e9ca77a8c3..aa11bbe8026a 100644
--- a/drivers/irqchip/irq-gic-v3-mbi.c
+++ b/drivers/irqchip/irq-gic-v3-mbi.c
@@ -18,7 +18,7 @@
#include <linux/irqchip/arm-gic-v3.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
struct mbi_range {
u32 spi_start;
@@ -197,7 +197,7 @@ static bool mbi_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
static const struct msi_parent_ops gic_v3_mbi_msi_parent_ops = {
.supported_flags = MBI_MSI_FLAGS_SUPPORTED,
.required_flags = MBI_MSI_FLAGS_REQUIRED,
- .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI,
.bus_select_token = DOMAIN_BUS_NEXUS,
.bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
.prefix = "MBI-",
@@ -206,17 +206,13 @@ static const struct msi_parent_ops gic_v3_mbi_msi_parent_ops = {
static int mbi_allocate_domain(struct irq_domain *parent)
{
- struct irq_domain *nexus_domain;
+ struct irq_domain_info info = {
+ .fwnode = parent->fwnode,
+ .ops = &mbi_domain_ops,
+ .parent = parent,
+ };
- nexus_domain = irq_domain_create_hierarchy(parent, 0, 0, parent->fwnode,
- &mbi_domain_ops, NULL);
- if (!nexus_domain)
- return -ENOMEM;
-
- irq_domain_update_bus_token(nexus_domain, DOMAIN_BUS_NEXUS);
- nexus_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
- nexus_domain->msi_parent_ops = &gic_v3_mbi_msi_parent_ops;
- return 0;
+ return msi_create_parent_irq_domain(&info, &gic_v3_mbi_msi_parent_ops) ? 0 : -ENOMEM;
}
int __init mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 270d7a4d85a6..efc791c43d44 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1826,7 +1826,7 @@ static int partition_domain_translate(struct irq_domain *d,
ppi_idx = __gic_get_ppi_index(ppi_intid);
ret = partition_translate_id(gic_data.ppi_descs[ppi_idx],
- of_node_to_fwnode(np));
+ of_fwnode_handle(np));
if (ret < 0)
return ret;
@@ -2192,7 +2192,7 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
part = &parts[part_idx];
- part->partition_id = of_node_to_fwnode(child_part);
+ part->partition_id = of_fwnode_handle(child_part);
pr_info("GIC: PPI partition %pOFn[%d] { ",
child_part, part_idx);
diff --git a/drivers/irqchip/irq-goldfish-pic.c b/drivers/irqchip/irq-goldfish-pic.c
index 513f6edbbe95..a8b23b507ecd 100644
--- a/drivers/irqchip/irq-goldfish-pic.c
+++ b/drivers/irqchip/irq-goldfish-pic.c
@@ -101,10 +101,9 @@ static int __init goldfish_pic_of_init(struct device_node *of_node,
irq_setup_generic_chip(gc, IRQ_MSK(GFPIC_NR_IRQS), 0,
IRQ_NOPROBE | IRQ_LEVEL, 0);
- gfpic->irq_domain = irq_domain_add_legacy(of_node, GFPIC_NR_IRQS,
- GFPIC_IRQ_BASE, 0,
- &goldfish_irq_domain_ops,
- NULL);
+ gfpic->irq_domain = irq_domain_create_legacy(of_fwnode_handle(of_node), GFPIC_NR_IRQS,
+ GFPIC_IRQ_BASE, 0, &goldfish_irq_domain_ops,
+ NULL);
if (!gfpic->irq_domain) {
pr_err("Failed to add irqdomain!\n");
ret = -ENOMEM;
diff --git a/drivers/irqchip/irq-hip04.c b/drivers/irqchip/irq-hip04.c
index 31c3f70a5d5e..b7958c5a1221 100644
--- a/drivers/irqchip/irq-hip04.c
+++ b/drivers/irqchip/irq-hip04.c
@@ -386,10 +386,8 @@ hip04_of_init(struct device_node *node, struct device_node *parent)
return -EINVAL;
}
- hip04_data.domain = irq_domain_add_legacy(node, nr_irqs, irq_base,
- 0,
- &hip04_irq_domain_ops,
- &hip04_data);
+ hip04_data.domain = irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, irq_base, 0,
+ &hip04_irq_domain_ops, &hip04_data);
if (WARN_ON(!hip04_data.domain))
return -EINVAL;
diff --git a/drivers/irqchip/irq-i8259.c b/drivers/irqchip/irq-i8259.c
index 115bdcffab24..91b2f587119c 100644
--- a/drivers/irqchip/irq-i8259.c
+++ b/drivers/irqchip/irq-i8259.c
@@ -313,8 +313,8 @@ struct irq_domain * __init __init_i8259_irqs(struct device_node *node)
init_8259A(0);
- domain = irq_domain_add_legacy(node, 16, I8259A_IRQ_BASE, 0,
- &i8259A_ops, NULL);
+ domain = irq_domain_create_legacy(of_fwnode_handle(node), 16, I8259A_IRQ_BASE, 0,
+ &i8259A_ops, NULL);
if (!domain)
panic("Failed to add i8259 IRQ domain");
diff --git a/drivers/irqchip/irq-idt3243x.c b/drivers/irqchip/irq-idt3243x.c
index 0732a0e9af62..f8324fb1fe8f 100644
--- a/drivers/irqchip/irq-idt3243x.c
+++ b/drivers/irqchip/irq-idt3243x.c
@@ -72,7 +72,7 @@ static int idt_pic_init(struct device_node *of_node, struct device_node *parent)
goto out_unmap_irq;
}
- domain = irq_domain_add_linear(of_node, IDT_PIC_NR_IRQS,
+ domain = irq_domain_create_linear(of_fwnode_handle(of_node), IDT_PIC_NR_IRQS,
&irq_generic_chip_ops, NULL);
if (!domain) {
pr_err("Failed to add irqdomain!\n");
diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c
index 85f80bac0961..f0410d5d7315 100644
--- a/drivers/irqchip/irq-imgpdc.c
+++ b/drivers/irqchip/irq-imgpdc.c
@@ -372,7 +372,7 @@ static int pdc_intc_probe(struct platform_device *pdev)
priv->syswake_irq = irq;
/* Set up an IRQ domain */
- priv->domain = irq_domain_add_linear(node, 16, &irq_generic_chip_ops,
+ priv->domain = irq_domain_create_linear(of_fwnode_handle(node), 16, &irq_generic_chip_ops,
priv);
if (unlikely(!priv->domain)) {
dev_err(&pdev->dev, "cannot add IRQ domain\n");
diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c
index 095ae8e3217e..b91f5c14b405 100644
--- a/drivers/irqchip/irq-imx-gpcv2.c
+++ b/drivers/irqchip/irq-imx-gpcv2.c
@@ -240,8 +240,8 @@ static int __init imx_gpcv2_irqchip_init(struct device_node *node,
return -ENOMEM;
}
- domain = irq_domain_add_hierarchy(parent_domain, 0, GPC_MAX_IRQS,
- node, &gpcv2_irqchip_data_domain_ops, cd);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, GPC_MAX_IRQS,
+ of_fwnode_handle(node), &gpcv2_irqchip_data_domain_ops, cd);
if (!domain) {
iounmap(cd->gpc_base);
kfree(cd);
diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c
index 787543d07565..5f9b204d350b 100644
--- a/drivers/irqchip/irq-imx-intmux.c
+++ b/drivers/irqchip/irq-imx-intmux.c
@@ -254,7 +254,7 @@ static int imx_intmux_probe(struct platform_device *pdev)
goto out;
}
- domain = irq_domain_add_linear(np, 32, &imx_intmux_domain_ops,
+ domain = irq_domain_create_linear(of_fwnode_handle(np), 32, &imx_intmux_domain_ops,
&data->irqchip_data[i]);
if (!domain) {
ret = -ENOMEM;
diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c
index afbfcce3b1e3..6dc9ac48fee5 100644
--- a/drivers/irqchip/irq-imx-irqsteer.c
+++ b/drivers/irqchip/irq-imx-irqsteer.c
@@ -212,7 +212,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev)
/* steer all IRQs into configured channel */
writel_relaxed(BIT(data->channel), data->regs + CHANCTRL);
- data->domain = irq_domain_add_linear(np, data->reg_num * 32,
+ data->domain = irq_domain_create_linear(of_fwnode_handle(np), data->reg_num * 32,
&imx_irqsteer_domain_ops, data);
if (!data->domain) {
dev_err(&pdev->dev, "failed to create IRQ domain\n");
diff --git a/drivers/irqchip/irq-imx-mu-msi.c b/drivers/irqchip/irq-imx-mu-msi.c
index 69aacdfc8bef..137da1927d14 100644
--- a/drivers/irqchip/irq-imx-mu-msi.c
+++ b/drivers/irqchip/irq-imx-mu-msi.c
@@ -24,7 +24,7 @@
#include <linux/pm_domain.h>
#include <linux/spinlock.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
#define IMX_MU_CHANS 4
diff --git a/drivers/irqchip/irq-ingenic-tcu.c b/drivers/irqchip/irq-ingenic-tcu.c
index 3363f83bd7e9..794ecba717c9 100644
--- a/drivers/irqchip/irq-ingenic-tcu.c
+++ b/drivers/irqchip/irq-ingenic-tcu.c
@@ -52,11 +52,10 @@ static void ingenic_tcu_gc_unmask_enable_reg(struct irq_data *d)
struct regmap *map = gc->private;
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
regmap_write(map, ct->regs.ack, mask);
regmap_write(map, ct->regs.enable, mask);
*ct->mask_cache |= mask;
- irq_gc_unlock(gc);
}
static void ingenic_tcu_gc_mask_disable_reg(struct irq_data *d)
@@ -66,10 +65,9 @@ static void ingenic_tcu_gc_mask_disable_reg(struct irq_data *d)
struct regmap *map = gc->private;
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
regmap_write(map, ct->regs.disable, mask);
*ct->mask_cache &= ~mask;
- irq_gc_unlock(gc);
}
static void ingenic_tcu_gc_mask_disable_reg_and_ack(struct irq_data *d)
@@ -79,10 +77,9 @@ static void ingenic_tcu_gc_mask_disable_reg_and_ack(struct irq_data *d)
struct regmap *map = gc->private;
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
regmap_write(map, ct->regs.ack, mask);
regmap_write(map, ct->regs.disable, mask);
- irq_gc_unlock(gc);
}
static int __init ingenic_tcu_irq_init(struct device_node *np,
@@ -114,8 +111,8 @@ static int __init ingenic_tcu_irq_init(struct device_node *np,
tcu->nb_parent_irqs = irqs;
- tcu->domain = irq_domain_add_linear(np, 32, &irq_generic_chip_ops,
- NULL);
+ tcu->domain = irq_domain_create_linear(of_fwnode_handle(np), 32, &irq_generic_chip_ops,
+ NULL);
if (!tcu->domain) {
ret = -ENOMEM;
goto err_free_tcu;
diff --git a/drivers/irqchip/irq-ingenic.c b/drivers/irqchip/irq-ingenic.c
index cee839ca627e..52393724f213 100644
--- a/drivers/irqchip/irq-ingenic.c
+++ b/drivers/irqchip/irq-ingenic.c
@@ -90,8 +90,8 @@ static int __init ingenic_intc_of_init(struct device_node *node,
goto out_unmap_irq;
}
- domain = irq_domain_add_linear(node, num_chips * 32,
- &irq_generic_chip_ops, NULL);
+ domain = irq_domain_create_linear(of_fwnode_handle(node), num_chips * 32,
+ &irq_generic_chip_ops, NULL);
if (!domain) {
err = -ENOMEM;
goto out_unmap_base;
diff --git a/drivers/irqchip/irq-ixp4xx.c b/drivers/irqchip/irq-ixp4xx.c
index f23b02f62a5c..a9a5a52b818a 100644
--- a/drivers/irqchip/irq-ixp4xx.c
+++ b/drivers/irqchip/irq-ixp4xx.c
@@ -261,7 +261,7 @@ static int __init ixp4xx_of_init_irq(struct device_node *np,
pr_crit("IXP4XX: could not ioremap interrupt controller\n");
return -ENODEV;
}
- fwnode = of_node_to_fwnode(np);
+ fwnode = of_fwnode_handle(np);
/* These chip variants have 64 interrupts */
is_356 = of_device_is_compatible(np, "intel,ixp43x-interrupt") ||
diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c
index 1f613eb7b7f0..94c05cf974be 100644
--- a/drivers/irqchip/irq-jcore-aic.c
+++ b/drivers/irqchip/irq-jcore-aic.c
@@ -107,9 +107,8 @@ static int __init aic_irq_of_init(struct device_node *node,
if (ret < 0)
return ret;
- domain = irq_domain_add_legacy(node, dom_sz - min_irq, min_irq, min_irq,
- &jcore_aic_irqdomain_ops,
- &jcore_aic);
+ domain = irq_domain_create_legacy(of_fwnode_handle(node), dom_sz - min_irq, min_irq,
+ min_irq, &jcore_aic_irqdomain_ops, &jcore_aic);
if (!domain)
return -ENOMEM;
diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c
index 37e1a03fcbb4..c9e902b7bf48 100644
--- a/drivers/irqchip/irq-keystone.c
+++ b/drivers/irqchip/irq-keystone.c
@@ -157,8 +157,8 @@ static int keystone_irq_probe(struct platform_device *pdev)
kirq->chip.irq_mask = keystone_irq_setmask;
kirq->chip.irq_unmask = keystone_irq_unmask;
- kirq->irqd = irq_domain_add_linear(np, KEYSTONE_N_IRQ,
- &keystone_irq_ops, kirq);
+ kirq->irqd = irq_domain_create_linear(of_fwnode_handle(np), KEYSTONE_N_IRQ,
+ &keystone_irq_ops, kirq);
if (!kirq->irqd) {
dev_err(dev, "IRQ domain registration failed\n");
return -ENODEV;
diff --git a/drivers/irqchip/irq-lan966x-oic.c b/drivers/irqchip/irq-lan966x-oic.c
index 41ac880e3b87..11d3a0ffa261 100644
--- a/drivers/irqchip/irq-lan966x-oic.c
+++ b/drivers/irqchip/irq-lan966x-oic.c
@@ -71,14 +71,12 @@ static unsigned int lan966x_oic_irq_startup(struct irq_data *data)
struct lan966x_oic_chip_regs *chip_regs = gc->private;
u32 map;
- irq_gc_lock(gc);
-
- /* Map the source interrupt to the destination */
- map = irq_reg_readl(gc, chip_regs->reg_off_map);
- map |= data->mask;
- irq_reg_writel(gc, map, chip_regs->reg_off_map);
-
- irq_gc_unlock(gc);
+ scoped_guard (raw_spinlock, &gc->lock) {
+ /* Map the source interrupt to the destination */
+ map = irq_reg_readl(gc, chip_regs->reg_off_map);
+ map |= data->mask;
+ irq_reg_writel(gc, map, chip_regs->reg_off_map);
+ }
ct->chip.irq_ack(data);
ct->chip.irq_unmask(data);
@@ -95,14 +93,12 @@ static void lan966x_oic_irq_shutdown(struct irq_data *data)
ct->chip.irq_mask(data);
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
/* Unmap the interrupt */
map = irq_reg_readl(gc, chip_regs->reg_off_map);
map &= ~data->mask;
irq_reg_writel(gc, map, chip_regs->reg_off_map);
-
- irq_gc_unlock(gc);
}
static int lan966x_oic_irq_set_type(struct irq_data *data,
@@ -224,7 +220,7 @@ static int lan966x_oic_probe(struct platform_device *pdev)
.exit = lan966x_oic_chip_exit,
};
struct irq_domain_info d_info = {
- .fwnode = of_node_to_fwnode(pdev->dev.of_node),
+ .fwnode = of_fwnode_handle(pdev->dev.of_node),
.domain_flags = IRQ_DOMAIN_FLAG_DESTROY_GC,
.size = LAN966X_OIC_NR_IRQ,
.hwirq_max = LAN966X_OIC_NR_IRQ,
diff --git a/drivers/irqchip/irq-loongarch-avec.c b/drivers/irqchip/irq-loongarch-avec.c
index 80e55955a29f..bf52dc8345f5 100644
--- a/drivers/irqchip/irq-loongarch-avec.c
+++ b/drivers/irqchip/irq-loongarch-avec.c
@@ -18,7 +18,7 @@
#include <asm/loongarch.h>
#include <asm/setup.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
#include "irq-loongson.h"
#define VECTORS_PER_REG 64
diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c
index e62dab4c97fc..950bc087e388 100644
--- a/drivers/irqchip/irq-loongarch-cpu.c
+++ b/drivers/irqchip/irq-loongarch-cpu.c
@@ -100,7 +100,7 @@ static const struct irq_domain_ops loongarch_cpu_intc_irq_domain_ops = {
static int __init cpuintc_of_init(struct device_node *of_node,
struct device_node *parent)
{
- cpuintc_handle = of_node_to_fwnode(of_node);
+ cpuintc_handle = of_fwnode_handle(of_node);
irq_domain = irq_domain_create_linear(cpuintc_handle, EXCCODE_INT_NUM,
&loongarch_cpu_intc_irq_domain_ops, NULL);
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
index bb79e19dfb59..b2860eb2d32c 100644
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -554,7 +554,7 @@ static int __init eiointc_of_init(struct device_node *of_node,
priv->vec_count = VEC_COUNT;
priv->node = 0;
- priv->domain_handle = of_node_to_fwnode(of_node);
+ priv->domain_handle = of_fwnode_handle(of_node);
ret = eiointc_init(priv, parent_irq, 0);
if (ret < 0)
diff --git a/drivers/irqchip/irq-loongson-htvec.c b/drivers/irqchip/irq-loongson-htvec.c
index 5da02c7ad0b3..d8558eb35044 100644
--- a/drivers/irqchip/irq-loongson-htvec.c
+++ b/drivers/irqchip/irq-loongson-htvec.c
@@ -248,7 +248,7 @@ static int htvec_of_init(struct device_node *node,
}
err = htvec_init(res.start, resource_size(&res),
- num_parents, parent_irq, of_node_to_fwnode(node));
+ num_parents, parent_irq, of_fwnode_handle(node));
if (err < 0)
return err;
diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c
index 2b1bd4a96665..0033c2188abc 100644
--- a/drivers/irqchip/irq-loongson-liointc.c
+++ b/drivers/irqchip/irq-loongson-liointc.c
@@ -116,9 +116,8 @@ static int liointc_set_type(struct irq_data *data, unsigned int type)
{
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
u32 mask = data->mask;
- unsigned long flags;
- irq_gc_lock_irqsave(gc, flags);
+ guard(raw_spinlock)(&gc->lock);
switch (type) {
case IRQ_TYPE_LEVEL_HIGH:
liointc_set_bit(gc, LIOINTC_REG_INTC_EDGE, mask, false);
@@ -137,10 +136,8 @@ static int liointc_set_type(struct irq_data *data, unsigned int type)
liointc_set_bit(gc, LIOINTC_REG_INTC_POL, mask, true);
break;
default:
- irq_gc_unlock_irqrestore(gc, flags);
return -EINVAL;
}
- irq_gc_unlock_irqrestore(gc, flags);
irqd_set_trigger_type(data, type);
return 0;
@@ -157,10 +154,9 @@ static void liointc_suspend(struct irq_chip_generic *gc)
static void liointc_resume(struct irq_chip_generic *gc)
{
struct liointc_priv *priv = gc->private;
- unsigned long flags;
int i;
- irq_gc_lock_irqsave(gc, flags);
+ guard(raw_spinlock_irqsave)(&gc->lock);
/* Disable all at first */
writel(0xffffffff, gc->reg_base + LIOINTC_REG_INTC_DISABLE);
/* Restore map cache */
@@ -170,7 +166,6 @@ static void liointc_resume(struct irq_chip_generic *gc)
writel(priv->int_edge, gc->reg_base + LIOINTC_REG_INTC_EDGE);
/* Restore mask cache */
writel(gc->mask_cache, gc->reg_base + LIOINTC_REG_INTC_ENABLE);
- irq_gc_unlock_irqrestore(gc, flags);
}
static int parent_irq[LIOINTC_NUM_PARENT];
@@ -363,7 +358,7 @@ static int __init liointc_of_init(struct device_node *node,
}
err = liointc_init(res.start, resource_size(&res),
- revision, of_node_to_fwnode(node), node);
+ revision, of_fwnode_handle(node), node);
if (err < 0)
return err;
diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c
index 9c62108b3ad5..a0257c7bef10 100644
--- a/drivers/irqchip/irq-loongson-pch-msi.c
+++ b/drivers/irqchip/irq-loongson-pch-msi.c
@@ -15,7 +15,7 @@
#include <linux/pci.h>
#include <linux/slab.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
#include "irq-loongson.h"
static int nr_pics;
@@ -243,7 +243,7 @@ static int pch_msi_of_init(struct device_node *node, struct device_node *parent)
return -EINVAL;
}
- err = pch_msi_init(res.start, irq_base, irq_count, parent_domain, of_node_to_fwnode(node));
+ err = pch_msi_init(res.start, irq_base, irq_count, parent_domain, of_fwnode_handle(node));
if (err < 0)
return err;
diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c
index 69efda35a8e7..62e6bf3a0611 100644
--- a/drivers/irqchip/irq-loongson-pch-pic.c
+++ b/drivers/irqchip/irq-loongson-pch-pic.c
@@ -392,7 +392,7 @@ static int pch_pic_of_init(struct device_node *node,
}
err = pch_pic_init(res.start, resource_size(&res), vec_base,
- parent_domain, of_node_to_fwnode(node), 0);
+ parent_domain, of_fwnode_handle(node), 0);
if (err < 0)
return err;
diff --git a/drivers/irqchip/irq-lpc32xx.c b/drivers/irqchip/irq-lpc32xx.c
index 4d70a857133f..14cca44baa14 100644
--- a/drivers/irqchip/irq-lpc32xx.c
+++ b/drivers/irqchip/irq-lpc32xx.c
@@ -210,8 +210,8 @@ static int __init lpc32xx_of_ic_init(struct device_node *node,
return -EINVAL;
}
- irqc->domain = irq_domain_add_linear(node, NR_LPC32XX_IC_IRQS,
- &lpc32xx_irq_domain_ops, irqc);
+ irqc->domain = irq_domain_create_linear(of_fwnode_handle(node), NR_LPC32XX_IC_IRQS,
+ &lpc32xx_irq_domain_ops, irqc);
if (!irqc->domain) {
pr_err("unable to add irq domain\n");
iounmap(irqc->base);
diff --git a/drivers/irqchip/irq-ls-extirq.c b/drivers/irqchip/irq-ls-extirq.c
index 139f26b0a6ef..50a7b38381b9 100644
--- a/drivers/irqchip/irq-ls-extirq.c
+++ b/drivers/irqchip/irq-ls-extirq.c
@@ -208,8 +208,8 @@ ls_extirq_of_init(struct device_node *node, struct device_node *parent)
of_device_is_compatible(node, "fsl,ls1043a-extirq");
raw_spin_lock_init(&priv->lock);
- domain = irq_domain_add_hierarchy(parent_domain, 0, priv->nirq, node,
- &extirq_domain_ops, priv);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, priv->nirq, of_fwnode_handle(node),
+ &extirq_domain_ops, priv);
if (!domain) {
ret = -ENOMEM;
goto err_add_hierarchy;
diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c
index 3cb80796cc7c..84bc5e4b47cf 100644
--- a/drivers/irqchip/irq-ls-scfg-msi.c
+++ b/drivers/irqchip/irq-ls-scfg-msi.c
@@ -215,17 +215,17 @@ static void ls_scfg_msi_irq_handler(struct irq_desc *desc)
static int ls_scfg_msi_domains_init(struct ls_scfg_msi *msi_data)
{
/* Initialize MSI domain parent */
- msi_data->parent = irq_domain_add_linear(NULL,
- msi_data->irqs_num,
- &ls_scfg_msi_domain_ops,
- msi_data);
+ msi_data->parent = irq_domain_create_linear(NULL,
+ msi_data->irqs_num,
+ &ls_scfg_msi_domain_ops,
+ msi_data);
if (!msi_data->parent) {
dev_err(&msi_data->pdev->dev, "failed to create IRQ domain\n");
return -ENOMEM;
}
msi_data->msi_domain = pci_msi_create_irq_domain(
- of_node_to_fwnode(msi_data->pdev->dev.of_node),
+ of_fwnode_handle(msi_data->pdev->dev.of_node),
&ls_scfg_msi_domain_info,
msi_data->parent);
if (!msi_data->msi_domain) {
diff --git a/drivers/irqchip/irq-ls1x.c b/drivers/irqchip/irq-ls1x.c
index 77a3f7dfaaf0..589d32007fca 100644
--- a/drivers/irqchip/irq-ls1x.c
+++ b/drivers/irqchip/irq-ls1x.c
@@ -126,8 +126,8 @@ static int __init ls1x_intc_of_init(struct device_node *node,
}
/* Set up an IRQ domain */
- priv->domain = irq_domain_add_linear(node, 32, &irq_generic_chip_ops,
- NULL);
+ priv->domain = irq_domain_create_linear(of_fwnode_handle(node), 32, &irq_generic_chip_ops,
+ NULL);
if (!priv->domain) {
pr_err("ls1x-irq: cannot add IRQ domain\n");
err = -ENOMEM;
diff --git a/drivers/irqchip/irq-mchp-eic.c b/drivers/irqchip/irq-mchp-eic.c
index 5dcd94c000a2..516a3a0e359c 100644
--- a/drivers/irqchip/irq-mchp-eic.c
+++ b/drivers/irqchip/irq-mchp-eic.c
@@ -248,8 +248,9 @@ static int mchp_eic_init(struct device_node *node, struct device_node *parent)
eic->irqs[i] = irq.args[1];
}
- eic->domain = irq_domain_add_hierarchy(parent_domain, 0, MCHP_EIC_NIRQ,
- node, &mchp_eic_domain_ops, eic);
+ eic->domain = irq_domain_create_hierarchy(parent_domain, 0, MCHP_EIC_NIRQ,
+ of_fwnode_handle(node), &mchp_eic_domain_ops,
+ eic);
if (!eic->domain) {
pr_err("%pOF: Failed to add domain\n", node);
ret = -ENODEV;
diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index 0a25536a5d07..7d177626d64b 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -607,7 +607,7 @@ static int meson_gpio_irq_of_init(struct device_node *node, struct device_node *
domain = irq_domain_create_hierarchy(parent_domain, 0,
ctl->params->nr_hwirq,
- of_node_to_fwnode(node),
+ of_fwnode_handle(node),
&meson_gpio_irq_domain_ops,
ctl);
if (!domain) {
diff --git a/drivers/irqchip/irq-mips-cpu.c b/drivers/irqchip/irq-mips-cpu.c
index 0c7ae71a0af0..ac784ef3ed4b 100644
--- a/drivers/irqchip/irq-mips-cpu.c
+++ b/drivers/irqchip/irq-mips-cpu.c
@@ -238,11 +238,9 @@ static void mips_cpu_register_ipi_domain(struct device_node *of_node)
struct cpu_ipi_domain_state *ipi_domain_state;
ipi_domain_state = kzalloc(sizeof(*ipi_domain_state), GFP_KERNEL);
- ipi_domain = irq_domain_add_hierarchy(irq_domain,
- IRQ_DOMAIN_FLAG_IPI_SINGLE,
- 2, of_node,
- &mips_cpu_ipi_chip_ops,
- ipi_domain_state);
+ ipi_domain = irq_domain_create_hierarchy(irq_domain, IRQ_DOMAIN_FLAG_IPI_SINGLE, 2,
+ of_fwnode_handle(of_node),
+ &mips_cpu_ipi_chip_ops, ipi_domain_state);
if (!ipi_domain)
panic("Failed to add MIPS CPU IPI domain");
irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI);
@@ -260,9 +258,8 @@ static void __init __mips_cpu_irq_init(struct device_node *of_node)
clear_c0_status(ST0_IM);
clear_c0_cause(CAUSEF_IP);
- irq_domain = irq_domain_add_legacy(of_node, 8, MIPS_CPU_IRQ_BASE, 0,
- &mips_cpu_intc_irq_domain_ops,
- NULL);
+ irq_domain = irq_domain_create_legacy(of_fwnode_handle(of_node), 8, MIPS_CPU_IRQ_BASE, 0,
+ &mips_cpu_intc_irq_domain_ops, NULL);
if (!irq_domain)
panic("Failed to add irqdomain for MIPS CPU");
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index bca8053864b2..34e8d09c12a0 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -841,10 +841,10 @@ static int gic_register_ipi_domain(struct device_node *node)
struct irq_domain *gic_ipi_domain;
unsigned int v[2], num_ipis;
- gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain,
- IRQ_DOMAIN_FLAG_IPI_PER_CPU,
- GIC_NUM_LOCAL_INTRS + gic_shared_intrs,
- node, &gic_ipi_domain_ops, NULL);
+ gic_ipi_domain = irq_domain_create_hierarchy(gic_irq_domain, IRQ_DOMAIN_FLAG_IPI_PER_CPU,
+ GIC_NUM_LOCAL_INTRS + gic_shared_intrs,
+ of_fwnode_handle(node), &gic_ipi_domain_ops,
+ NULL);
if (!gic_ipi_domain) {
pr_err("Failed to add IPI domain");
return -ENXIO;
@@ -963,9 +963,10 @@ static int __init gic_of_init(struct device_node *node,
gic_irq_dispatch);
}
- gic_irq_domain = irq_domain_add_simple(node, GIC_NUM_LOCAL_INTRS +
- gic_shared_intrs, 0,
- &gic_irq_domain_ops, NULL);
+ gic_irq_domain = irq_domain_create_simple(of_fwnode_handle(node),
+ GIC_NUM_LOCAL_INTRS +
+ gic_shared_intrs, 0,
+ &gic_irq_domain_ops, NULL);
if (!gic_irq_domain) {
pr_err("Failed to add IRQ domain");
return -ENXIO;
diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c
index 25cf4f80e767..09e640430208 100644
--- a/drivers/irqchip/irq-mmp.c
+++ b/drivers/irqchip/irq-mmp.c
@@ -261,9 +261,9 @@ static int __init mmp_init_bases(struct device_node *node)
}
icu_data[0].virq_base = 0;
- icu_data[0].domain = irq_domain_add_linear(node, nr_irqs,
- &mmp_irq_domain_ops,
- &icu_data[0]);
+ icu_data[0].domain = irq_domain_create_linear(of_fwnode_handle(node), nr_irqs,
+ &mmp_irq_domain_ops,
+ &icu_data[0]);
for (irq = 0; irq < nr_irqs; irq++) {
ret = irq_create_mapping(icu_data[0].domain, irq);
if (!ret) {
@@ -391,9 +391,9 @@ static int __init mmp2_mux_of_init(struct device_node *node,
return -EINVAL;
icu_data[i].virq_base = 0;
- icu_data[i].domain = irq_domain_add_linear(node, nr_irqs,
- &mmp_irq_domain_ops,
- &icu_data[i]);
+ icu_data[i].domain = irq_domain_create_linear(of_fwnode_handle(node), nr_irqs,
+ &mmp_irq_domain_ops,
+ &icu_data[i]);
for (irq = 0; irq < nr_irqs; irq++) {
ret = irq_create_mapping(icu_data[i].domain, irq);
if (!ret) {
diff --git a/drivers/irqchip/irq-mscc-ocelot.c b/drivers/irqchip/irq-mscc-ocelot.c
index 3dc745b14caf..8cbc191f750b 100644
--- a/drivers/irqchip/irq-mscc-ocelot.c
+++ b/drivers/irqchip/irq-mscc-ocelot.c
@@ -83,7 +83,7 @@ static void ocelot_irq_unmask(struct irq_data *data)
unsigned int mask = data->mask;
u32 val;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
/*
* Clear sticky bits for edge mode interrupts.
* Serval has only one trigger register replication, but the adjacent
@@ -97,7 +97,6 @@ static void ocelot_irq_unmask(struct irq_data *data)
*ct->mask_cache &= ~mask;
irq_reg_writel(gc, mask, p->reg_off_ena_set);
- irq_gc_unlock(gc);
}
static void ocelot_irq_handler(struct irq_desc *desc)
@@ -132,8 +131,8 @@ static int __init vcoreiii_irq_init(struct device_node *node,
if (!parent_irq)
return -EINVAL;
- domain = irq_domain_add_linear(node, p->n_irq,
- &irq_generic_chip_ops, NULL);
+ domain = irq_domain_create_linear(of_fwnode_handle(node), p->n_irq,
+ &irq_generic_chip_ops, NULL);
if (!domain) {
pr_err("%pOFn: unable to add irq domain\n", node);
return -ENOMEM;
diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c
index 51464c6257f3..246c30205af4 100644
--- a/drivers/irqchip/irq-msi-lib.c
+++ b/drivers/irqchip/irq-msi-lib.c
@@ -4,7 +4,7 @@
#include <linux/export.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
/**
* msi_lib_init_dev_msi_info - Domain info setup for MSI domains
@@ -105,8 +105,13 @@ bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
* MSI message into the hardware which is the whole purpose of the
* device MSI domain aside of mask/unmask which is provided e.g. by
* PCI/MSI device domains.
+ *
+ * The exception to the rule is when the underlying domain
+ * tells you that affinity is not a thing -- for example when
+ * everything is muxed behind a single interrupt.
*/
- chip->irq_set_affinity = msi_domain_set_affinity;
+ if (!chip->irq_set_affinity && !(info->flags & MSI_FLAG_NO_AFFINITY))
+ chip->irq_set_affinity = msi_domain_set_affinity;
return true;
}
EXPORT_SYMBOL_GPL(msi_lib_init_dev_msi_info);
diff --git a/drivers/irqchip/irq-mst-intc.c b/drivers/irqchip/irq-mst-intc.c
index f6133ae28155..9643cc3a77d7 100644
--- a/drivers/irqchip/irq-mst-intc.c
+++ b/drivers/irqchip/irq-mst-intc.c
@@ -273,8 +273,8 @@ static int __init mst_intc_of_init(struct device_node *dn,
raw_spin_lock_init(&cd->lock);
cd->irq_start = irq_start;
cd->nr_irqs = irq_end - irq_start + 1;
- domain = irq_domain_add_hierarchy(domain_parent, 0, cd->nr_irqs, dn,
- &mst_intc_domain_ops, cd);
+ domain = irq_domain_create_hierarchy(domain_parent, 0, cd->nr_irqs, of_fwnode_handle(dn),
+ &mst_intc_domain_ops, cd);
if (!domain) {
iounmap(cd->base);
kfree(cd);
diff --git a/drivers/irqchip/irq-mtk-cirq.c b/drivers/irqchip/irq-mtk-cirq.c
index 76bc0283e3b9..de481ba340f8 100644
--- a/drivers/irqchip/irq-mtk-cirq.c
+++ b/drivers/irqchip/irq-mtk-cirq.c
@@ -336,9 +336,8 @@ static int __init mtk_cirq_of_init(struct device_node *node,
cirq_data->offsets = match->data;
irq_num = cirq_data->ext_irq_end - cirq_data->ext_irq_start + 1;
- domain = irq_domain_add_hierarchy(domain_parent, 0,
- irq_num, node,
- &cirq_domain_ops, cirq_data);
+ domain = irq_domain_create_hierarchy(domain_parent, 0, irq_num, of_fwnode_handle(node),
+ &cirq_domain_ops, cirq_data);
if (!domain) {
ret = -ENOMEM;
goto out_unmap;
diff --git a/drivers/irqchip/irq-mtk-sysirq.c b/drivers/irqchip/irq-mtk-sysirq.c
index 586e52d5442b..6895e7096b27 100644
--- a/drivers/irqchip/irq-mtk-sysirq.c
+++ b/drivers/irqchip/irq-mtk-sysirq.c
@@ -207,8 +207,8 @@ static int __init mtk_sysirq_of_init(struct device_node *node,
chip_data->which_word[i] = word;
}
- domain = irq_domain_add_hierarchy(domain_parent, 0, intpol_num, node,
- &sysirq_domain_ops, chip_data);
+ domain = irq_domain_create_hierarchy(domain_parent, 0, intpol_num, of_fwnode_handle(node),
+ &sysirq_domain_ops, chip_data);
if (!domain) {
ret = -ENOMEM;
goto out_free_which_word;
diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c
index d67f93f6d750..d3232d6d8dce 100644
--- a/drivers/irqchip/irq-mvebu-gicp.c
+++ b/drivers/irqchip/irq-mvebu-gicp.c
@@ -17,7 +17,7 @@
#include <linux/of_platform.h>
#include <linux/platform_device.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
@@ -161,7 +161,7 @@ static const struct irq_domain_ops gicp_domain_ops = {
static const struct msi_parent_ops gicp_msi_parent_ops = {
.supported_flags = GICP_MSI_FLAGS_SUPPORTED,
.required_flags = GICP_MSI_FLAGS_REQUIRED,
- .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI,
.bus_select_token = DOMAIN_BUS_GENERIC_MSI,
.bus_select_mask = MATCH_PLATFORM_MSI,
.prefix = "GICP-",
@@ -170,9 +170,12 @@ static const struct msi_parent_ops gicp_msi_parent_ops = {
static int mvebu_gicp_probe(struct platform_device *pdev)
{
- struct irq_domain *inner_domain, *parent_domain;
struct device_node *node = pdev->dev.of_node;
struct device_node *irq_parent_dn;
+ struct irq_domain_info info = {
+ .fwnode = of_fwnode_handle(node),
+ .ops = &gicp_domain_ops,
+ };
struct mvebu_gicp *gicp;
int ret, i;
@@ -217,30 +220,23 @@ static int mvebu_gicp_probe(struct platform_device *pdev)
if (!gicp->spi_bitmap)
return -ENOMEM;
+ info.size = gicp->spi_cnt;
+ info.host_data = gicp;
+
irq_parent_dn = of_irq_find_parent(node);
if (!irq_parent_dn) {
dev_err(&pdev->dev, "failed to find parent IRQ node\n");
return -ENODEV;
}
- parent_domain = irq_find_host(irq_parent_dn);
+ info.parent = irq_find_host(irq_parent_dn);
of_node_put(irq_parent_dn);
- if (!parent_domain) {
+ if (!info.parent) {
dev_err(&pdev->dev, "failed to find parent IRQ domain\n");
return -ENODEV;
}
- inner_domain = irq_domain_create_hierarchy(parent_domain, 0,
- gicp->spi_cnt,
- of_node_to_fwnode(node),
- &gicp_domain_ops, gicp);
- if (!inner_domain)
- return -ENOMEM;
-
- irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_GENERIC_MSI);
- inner_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
- inner_domain->msi_parent_ops = &gicp_msi_parent_ops;
- return 0;
+ return msi_create_parent_irq_domain(&info, &gicp_msi_parent_ops) ? 0 : -ENOMEM;
}
static const struct of_device_id mvebu_gicp_of_match[] = {
diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c
index 4eebed39880a..db5dbc6e88b0 100644
--- a/drivers/irqchip/irq-mvebu-icu.c
+++ b/drivers/irqchip/irq-mvebu-icu.c
@@ -20,7 +20,7 @@
#include <linux/of_platform.h>
#include <linux/platform_device.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
#include <dt-bindings/interrupt-controller/mvebu-icu.h>
diff --git a/drivers/irqchip/irq-mvebu-odmi.c b/drivers/irqchip/irq-mvebu-odmi.c
index 28f7e81df94f..e5b2bde3d933 100644
--- a/drivers/irqchip/irq-mvebu-odmi.c
+++ b/drivers/irqchip/irq-mvebu-odmi.c
@@ -18,7 +18,7 @@
#include <linux/of_address.h>
#include <linux/slab.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
@@ -157,7 +157,7 @@ static const struct irq_domain_ops odmi_domain_ops = {
static const struct msi_parent_ops odmi_msi_parent_ops = {
.supported_flags = ODMI_MSI_FLAGS_SUPPORTED,
.required_flags = ODMI_MSI_FLAGS_REQUIRED,
- .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI,
.bus_select_token = DOMAIN_BUS_GENERIC_MSI,
.bus_select_mask = MATCH_PLATFORM_MSI,
.prefix = "ODMI-",
@@ -167,7 +167,12 @@ static const struct msi_parent_ops odmi_msi_parent_ops = {
static int __init mvebu_odmi_init(struct device_node *node,
struct device_node *parent)
{
- struct irq_domain *parent_domain, *inner_domain;
+ struct irq_domain_info info = {
+ .fwnode = of_fwnode_handle(node),
+ .ops = &odmi_domain_ops,
+ .size = odmis_count * NODMIS_PER_FRAME,
+ .parent = irq_find_host(parent),
+ };
int ret, i;
if (of_property_read_u32(node, "marvell,odmi-frames", &odmis_count))
@@ -203,22 +208,10 @@ static int __init mvebu_odmi_init(struct device_node *node,
}
}
- parent_domain = irq_find_host(parent);
+ if (msi_create_parent_irq_domain(&info, &odmi_msi_parent_ops))
+ return 0;
- inner_domain = irq_domain_create_hierarchy(parent_domain, 0,
- odmis_count * NODMIS_PER_FRAME,
- of_node_to_fwnode(node),
- &odmi_domain_ops, NULL);
- if (!inner_domain) {
- ret = -ENOMEM;
- goto err_unmap;
- }
-
- irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_GENERIC_MSI);
- inner_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
- inner_domain->msi_parent_ops = &odmi_msi_parent_ops;
-
- return 0;
+ ret = -ENOMEM;
err_unmap:
for (i = 0; i < odmis_count; i++) {
diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c
index 3888b7585981..8db638aa21d2 100644
--- a/drivers/irqchip/irq-mvebu-pic.c
+++ b/drivers/irqchip/irq-mvebu-pic.c
@@ -150,8 +150,8 @@ static int mvebu_pic_probe(struct platform_device *pdev)
return -EINVAL;
}
- pic->domain = irq_domain_add_linear(node, PIC_MAX_IRQS,
- &mvebu_pic_domain_ops, pic);
+ pic->domain = irq_domain_create_linear(of_fwnode_handle(node), PIC_MAX_IRQS,
+ &mvebu_pic_domain_ops, pic);
if (!pic->domain) {
dev_err(&pdev->dev, "Failed to allocate irq domain\n");
return -ENOMEM;
diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
index ebd4a9014e8d..5822ea864765 100644
--- a/drivers/irqchip/irq-mvebu-sei.c
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -14,7 +14,7 @@
#include <linux/of_irq.h>
#include <linux/of_platform.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
/* Cause register */
#define GICP_SECR(idx) (0x0 + ((idx) * 0x4))
@@ -366,6 +366,10 @@ static const struct msi_parent_ops sei_msi_parent_ops = {
static int mvebu_sei_probe(struct platform_device *pdev)
{
struct device_node *node = pdev->dev.of_node;
+ struct irq_domain_info info = {
+ .fwnode = of_fwnode_handle(node),
+ .ops = &mvebu_sei_cp_domain_ops,
+ };
struct mvebu_sei *sei;
u32 parent_irq;
int ret;
@@ -402,7 +406,7 @@ static int mvebu_sei_probe(struct platform_device *pdev)
}
/* Create the root SEI domain */
- sei->sei_domain = irq_domain_create_linear(of_node_to_fwnode(node),
+ sei->sei_domain = irq_domain_create_linear(of_fwnode_handle(node),
(sei->caps->ap_range.size +
sei->caps->cp_range.size),
&mvebu_sei_domain_ops,
@@ -418,7 +422,7 @@ static int mvebu_sei_probe(struct platform_device *pdev)
/* Create the 'wired' domain */
sei->ap_domain = irq_domain_create_hierarchy(sei->sei_domain, 0,
sei->caps->ap_range.size,
- of_node_to_fwnode(node),
+ of_fwnode_handle(node),
&mvebu_sei_ap_domain_ops,
sei);
if (!sei->ap_domain) {
@@ -430,21 +434,17 @@ static int mvebu_sei_probe(struct platform_device *pdev)
irq_domain_update_bus_token(sei->ap_domain, DOMAIN_BUS_WIRED);
/* Create the 'MSI' domain */
- sei->cp_domain = irq_domain_create_hierarchy(sei->sei_domain, 0,
- sei->caps->cp_range.size,
- of_node_to_fwnode(node),
- &mvebu_sei_cp_domain_ops,
- sei);
+ info.size = sei->caps->cp_range.size;
+ info.host_data = sei;
+ info.parent = sei->sei_domain;
+
+ sei->cp_domain = msi_create_parent_irq_domain(&info, &sei_msi_parent_ops);
if (!sei->cp_domain) {
pr_err("Failed to create CPs IRQ domain\n");
ret = -ENOMEM;
goto remove_ap_domain;
}
- irq_domain_update_bus_token(sei->cp_domain, DOMAIN_BUS_GENERIC_MSI);
- sei->cp_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
- sei->cp_domain->msi_parent_ops = &sei_msi_parent_ops;
-
mvebu_sei_reset(sei);
irq_set_chained_handler_and_data(parent_irq, mvebu_sei_handle_cascade_irq, sei);
diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
index d67b5da38982..0bb423dd5280 100644
--- a/drivers/irqchip/irq-mxs.c
+++ b/drivers/irqchip/irq-mxs.c
@@ -162,8 +162,8 @@ static const struct irq_domain_ops icoll_irq_domain_ops = {
static void __init icoll_add_domain(struct device_node *np,
int num)
{
- icoll_domain = irq_domain_add_linear(np, num,
- &icoll_irq_domain_ops, NULL);
+ icoll_domain = irq_domain_create_linear(of_fwnode_handle(np), num,
+ &icoll_irq_domain_ops, NULL);
if (!icoll_domain)
panic("%pOF: unable to create irq domain", np);
diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c
index ba6332b00a0a..76e11cac9631 100644
--- a/drivers/irqchip/irq-nvic.c
+++ b/drivers/irqchip/irq-nvic.c
@@ -90,7 +90,7 @@ static int __init nvic_of_init(struct device_node *node,
irqs = NVIC_MAX_IRQ;
nvic_irq_domain =
- irq_domain_add_linear(node, irqs, &nvic_irq_domain_ops, NULL);
+ irq_domain_create_linear(of_fwnode_handle(node), irqs, &nvic_irq_domain_ops, NULL);
if (!nvic_irq_domain) {
pr_warn("Failed to allocate irq domain\n");
diff --git a/drivers/irqchip/irq-omap-intc.c b/drivers/irqchip/irq-omap-intc.c
index ad84a2f03368..16f00db570e7 100644
--- a/drivers/irqchip/irq-omap-intc.c
+++ b/drivers/irqchip/irq-omap-intc.c
@@ -248,7 +248,7 @@ static int __init omap_init_irq_of(struct device_node *node)
if (WARN_ON(!omap_irq_base))
return -ENOMEM;
- domain = irq_domain_add_linear(node, omap_nr_irqs,
+ domain = irq_domain_create_linear(of_fwnode_handle(node), omap_nr_irqs,
&irq_generic_chip_ops, NULL);
omap_irq_soft_reset();
@@ -274,7 +274,7 @@ static int __init omap_init_irq_legacy(u32 base, struct device_node *node)
irq_base = 0;
}
- domain = irq_domain_add_legacy(node, omap_nr_irqs, irq_base, 0,
+ domain = irq_domain_create_legacy(of_fwnode_handle(node), omap_nr_irqs, irq_base, 0,
&irq_domain_simple_ops, NULL);
omap_irq_soft_reset();
diff --git a/drivers/irqchip/irq-or1k-pic.c b/drivers/irqchip/irq-or1k-pic.c
index f289ccd95291..48126067c54b 100644
--- a/drivers/irqchip/irq-or1k-pic.c
+++ b/drivers/irqchip/irq-or1k-pic.c
@@ -144,8 +144,8 @@ static int __init or1k_pic_init(struct device_node *node,
/* Disable all interrupts until explicitly requested */
mtspr(SPR_PICMR, (0UL));
- root_domain = irq_domain_add_linear(node, 32, &or1k_irq_domain_ops,
- pic);
+ root_domain = irq_domain_create_linear(of_fwnode_handle(node), 32, &or1k_irq_domain_ops,
+ pic);
set_handle_irq(or1k_pic_handle_irq);
diff --git a/drivers/irqchip/irq-orion.c b/drivers/irqchip/irq-orion.c
index 4e4e874e09a8..dddbc05917c0 100644
--- a/drivers/irqchip/irq-orion.c
+++ b/drivers/irqchip/irq-orion.c
@@ -59,7 +59,7 @@ static int __init orion_irq_init(struct device_node *np,
/* count number of irq chips by valid reg addresses */
num_chips = of_address_count(np);
- orion_irq_domain = irq_domain_add_linear(np,
+ orion_irq_domain = irq_domain_create_linear(of_fwnode_handle(np),
num_chips * ORION_IRQS_PER_CHIP,
&irq_generic_chip_ops, NULL);
if (!orion_irq_domain)
@@ -146,8 +146,8 @@ static int __init orion_bridge_irq_init(struct device_node *np,
/* get optional number of interrupts provided */
of_property_read_u32(np, "marvell,#interrupts", &nrirqs);
- domain = irq_domain_add_linear(np, nrirqs,
- &irq_generic_chip_ops, NULL);
+ domain = irq_domain_create_linear(of_fwnode_handle(np), nrirqs,
+ &irq_generic_chip_ops, NULL);
if (!domain) {
pr_err("%pOFn: unable to add irq domain\n", np);
return -ENOMEM;
diff --git a/drivers/irqchip/irq-owl-sirq.c b/drivers/irqchip/irq-owl-sirq.c
index 6e4127465094..3d93d21f6732 100644
--- a/drivers/irqchip/irq-owl-sirq.c
+++ b/drivers/irqchip/irq-owl-sirq.c
@@ -323,8 +323,8 @@ static int __init owl_sirq_init(const struct owl_sirq_params *params,
owl_sirq_clear_set_extctl(chip_data, 0, INTC_EXTCTL_CLK_SEL, i);
}
- domain = irq_domain_add_hierarchy(parent_domain, 0, NUM_SIRQ, node,
- &owl_sirq_domain_ops, chip_data);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, NUM_SIRQ, of_fwnode_handle(node),
+ &owl_sirq_domain_ops, chip_data);
if (!domain) {
pr_err("%pOF: failed to add domain\n", node);
ret = -ENOMEM;
diff --git a/drivers/irqchip/irq-pic32-evic.c b/drivers/irqchip/irq-pic32-evic.c
index b546b1036e12..5dfda8e8df10 100644
--- a/drivers/irqchip/irq-pic32-evic.c
+++ b/drivers/irqchip/irq-pic32-evic.c
@@ -227,9 +227,9 @@ static int __init pic32_of_init(struct device_node *node,
goto err_iounmap;
}
- evic_irq_domain = irq_domain_add_linear(node, nchips * 32,
- &pic32_irq_domain_ops,
- priv);
+ evic_irq_domain = irq_domain_create_linear(of_fwnode_handle(node), nchips * 32,
+ &pic32_irq_domain_ops,
+ priv);
if (!evic_irq_domain) {
ret = -ENOMEM;
goto err_free_priv;
diff --git a/drivers/irqchip/irq-pruss-intc.c b/drivers/irqchip/irq-pruss-intc.c
index bee01980b463..87a5813fd835 100644
--- a/drivers/irqchip/irq-pruss-intc.c
+++ b/drivers/irqchip/irq-pruss-intc.c
@@ -555,8 +555,8 @@ static int pruss_intc_probe(struct platform_device *pdev)
mutex_init(&intc->lock);
- intc->domain = irq_domain_add_linear(dev->of_node, max_system_events,
- &pruss_intc_irq_domain_ops, intc);
+ intc->domain = irq_domain_create_linear(of_fwnode_handle(dev->of_node), max_system_events,
+ &pruss_intc_irq_domain_ops, intc);
if (!intc->domain)
return -ENOMEM;
@@ -581,8 +581,7 @@ static int pruss_intc_probe(struct platform_device *pdev)
host_data->intc = intc;
host_data->host_irq = i;
- irq_set_handler_data(irq, host_data);
- irq_set_chained_handler(irq, pruss_intc_irq_handler);
+ irq_set_chained_handler_and_data(irq, pruss_intc_irq_handler, host_data);
}
return 0;
diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c
index f772deb9cba5..8d569f7c5a7a 100644
--- a/drivers/irqchip/irq-qcom-mpm.c
+++ b/drivers/irqchip/irq-qcom-mpm.c
@@ -450,7 +450,7 @@ static int qcom_mpm_init(struct device_node *np, struct device_node *parent)
priv->domain = irq_domain_create_hierarchy(parent_domain,
IRQ_DOMAIN_FLAG_QCOM_MPM_WAKEUP, pin_cnt,
- of_node_to_fwnode(np), &qcom_mpm_ops, priv);
+ of_fwnode_handle(np), &qcom_mpm_ops, priv);
if (!priv->domain) {
dev_err(dev, "failed to create MPM domain\n");
ret = -ENOMEM;
diff --git a/drivers/irqchip/irq-realtek-rtl.c b/drivers/irqchip/irq-realtek-rtl.c
index 2a349082af81..942c1f8c363d 100644
--- a/drivers/irqchip/irq-realtek-rtl.c
+++ b/drivers/irqchip/irq-realtek-rtl.c
@@ -162,7 +162,7 @@ static int __init realtek_rtl_of_init(struct device_node *node, struct device_no
else if (!parent_irq)
return -ENODEV;
- domain = irq_domain_add_linear(node, RTL_ICTL_NUM_INPUTS, &irq_domain_ops, NULL);
+ domain = irq_domain_create_linear(of_fwnode_handle(node), RTL_ICTL_NUM_INPUTS, &irq_domain_ops, NULL);
if (!domain)
return -ENOMEM;
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index 954419f2460d..0959ed43b1a9 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -513,8 +513,10 @@ static int intc_irqpin_probe(struct platform_device *pdev)
irq_chip->irq_set_wake = intc_irqpin_irq_set_wake;
irq_chip->flags = IRQCHIP_MASK_ON_SUSPEND;
- p->irq_domain = irq_domain_add_simple(dev->of_node, nirqs, 0,
- &intc_irqpin_irq_domain_ops, p);
+ p->irq_domain = irq_domain_create_simple(of_fwnode_handle(dev->of_node),
+ nirqs, 0,
+ &intc_irqpin_irq_domain_ops,
+ p);
if (!p->irq_domain) {
ret = -ENXIO;
dev_err(dev, "cannot initialize irq domain\n");
diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index cbce8ffc7de4..5c3196e5a437 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -168,8 +168,8 @@ static int irqc_probe(struct platform_device *pdev)
p->cpu_int_base = p->iomem + IRQC_INT_CPU_BASE(0); /* SYS-SPI */
- p->irq_domain = irq_domain_add_linear(dev->of_node, p->number_of_irqs,
- &irq_generic_chip_ops, p);
+ p->irq_domain = irq_domain_create_linear(of_fwnode_handle(dev->of_node), p->number_of_irqs,
+ &irq_generic_chip_ops, p);
if (!p->irq_domain) {
ret = -ENXIO;
dev_err(dev, "cannot initialize irq domain\n");
diff --git a/drivers/irqchip/irq-renesas-rza1.c b/drivers/irqchip/irq-renesas-rza1.c
index d4e6a68889ec..0a9640ba0adb 100644
--- a/drivers/irqchip/irq-renesas-rza1.c
+++ b/drivers/irqchip/irq-renesas-rza1.c
@@ -231,9 +231,9 @@ static int rza1_irqc_probe(struct platform_device *pdev)
priv->chip.irq_set_type = rza1_irqc_set_type;
priv->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE;
- priv->irq_domain = irq_domain_add_hierarchy(parent, 0, IRQC_NUM_IRQ,
- np, &rza1_irqc_domain_ops,
- priv);
+ priv->irq_domain = irq_domain_create_hierarchy(parent, 0, IRQC_NUM_IRQ,
+ of_fwnode_handle(np), &rza1_irqc_domain_ops,
+ priv);
if (!priv->irq_domain) {
dev_err(dev, "cannot initialize irq domain\n");
ret = -ENOMEM;
diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c
index 6a2e41f02446..1e861bd64f97 100644
--- a/drivers/irqchip/irq-renesas-rzg2l.c
+++ b/drivers/irqchip/irq-renesas-rzg2l.c
@@ -574,9 +574,9 @@ static int rzg2l_irqc_common_init(struct device_node *node, struct device_node *
raw_spin_lock_init(&rzg2l_irqc_data->lock);
- irq_domain = irq_domain_add_hierarchy(parent_domain, 0, IRQC_NUM_IRQ,
- node, &rzg2l_irqc_domain_ops,
- rzg2l_irqc_data);
+ irq_domain = irq_domain_create_hierarchy(parent_domain, 0, IRQC_NUM_IRQ,
+ of_fwnode_handle(node), &rzg2l_irqc_domain_ops,
+ rzg2l_irqc_data);
if (!irq_domain) {
pm_runtime_put(dev);
return dev_err_probe(dev, -ENOMEM, "failed to add irq domain\n");
diff --git a/drivers/irqchip/irq-renesas-rzv2h.c b/drivers/irqchip/irq-renesas-rzv2h.c
index 0f0fd7d4dfdf..1c12e6ec1370 100644
--- a/drivers/irqchip/irq-renesas-rzv2h.c
+++ b/drivers/irqchip/irq-renesas-rzv2h.c
@@ -522,8 +522,9 @@ static int rzv2h_icu_init_common(struct device_node *node, struct device_node *p
raw_spin_lock_init(&rzv2h_icu_data->lock);
- irq_domain = irq_domain_add_hierarchy(parent_domain, 0, ICU_NUM_IRQ, node,
- &rzv2h_icu_domain_ops, rzv2h_icu_data);
+ irq_domain = irq_domain_create_hierarchy(parent_domain, 0, ICU_NUM_IRQ,
+ of_fwnode_handle(node), &rzv2h_icu_domain_ops,
+ rzv2h_icu_data);
if (!irq_domain) {
dev_err(&pdev->dev, "failed to add irq domain\n");
ret = -ENOMEM;
diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c
index b8ae67c25b37..1b9fbfce9581 100644
--- a/drivers/irqchip/irq-riscv-imsic-platform.c
+++ b/drivers/irqchip/irq-riscv-imsic-platform.c
@@ -20,7 +20,7 @@
#include <linux/spinlock.h>
#include <linux/smp.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
#include "irq-riscv-imsic-state.h"
static bool imsic_cpu_page_phys(unsigned int cpu, unsigned int guest_index,
diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c
index bdf5cd2037f2..77670dd645ac 100644
--- a/drivers/irqchip/irq-riscv-imsic-state.c
+++ b/drivers/irqchip/irq-riscv-imsic-state.c
@@ -208,17 +208,17 @@ skip:
}
#ifdef CONFIG_SMP
-static void __imsic_local_timer_start(struct imsic_local_priv *lpriv)
+static void __imsic_local_timer_start(struct imsic_local_priv *lpriv, unsigned int cpu)
{
lockdep_assert_held(&lpriv->lock);
if (!timer_pending(&lpriv->timer)) {
lpriv->timer.expires = jiffies + 1;
- add_timer_on(&lpriv->timer, smp_processor_id());
+ add_timer_on(&lpriv->timer, cpu);
}
}
#else
-static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv)
+static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv, unsigned int cpu)
{
}
#endif
@@ -233,7 +233,7 @@ void imsic_local_sync_all(bool force_all)
if (force_all)
bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1);
if (!__imsic_local_sync(lpriv))
- __imsic_local_timer_start(lpriv);
+ __imsic_local_timer_start(lpriv, smp_processor_id());
raw_spin_unlock_irqrestore(&lpriv->lock, flags);
}
@@ -278,7 +278,7 @@ static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu
return;
}
- __imsic_local_timer_start(lpriv);
+ __imsic_local_timer_start(lpriv, cpu);
}
}
#else
@@ -564,7 +564,7 @@ void imsic_state_offline(void)
struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
raw_spin_lock_irqsave(&lpriv->lock, flags);
- WARN_ON_ONCE(try_to_del_timer_sync(&lpriv->timer) < 0);
+ WARN_ON_ONCE(timer_delete_sync_try(&lpriv->timer) < 0);
raw_spin_unlock_irqrestore(&lpriv->lock, flags);
#endif
}
diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c
index f653c13de62b..e5805885394e 100644
--- a/drivers/irqchip/irq-riscv-intc.c
+++ b/drivers/irqchip/irq-riscv-intc.c
@@ -242,7 +242,7 @@ static int __init riscv_intc_init(struct device_node *node,
chip = &andes_intc_chip;
}
- return riscv_intc_init_common(of_node_to_fwnode(node), chip);
+ return riscv_intc_init_common(of_fwnode_handle(node), chip);
}
IRQCHIP_DECLARE(riscv, "riscv,cpu-intc", riscv_intc_init);
diff --git a/drivers/irqchip/irq-sa11x0.c b/drivers/irqchip/irq-sa11x0.c
index 9d0b80271949..d8d4dff16276 100644
--- a/drivers/irqchip/irq-sa11x0.c
+++ b/drivers/irqchip/irq-sa11x0.c
@@ -162,7 +162,7 @@ void __init sa11x0_init_irq_nodt(int irq_start, resource_size_t io_start)
*/
writel_relaxed(1, iobase + ICCR);
- sa1100_normal_irqdomain = irq_domain_add_simple(NULL,
+ sa1100_normal_irqdomain = irq_domain_create_simple(NULL,
32, irq_start,
&sa1100_normal_irqdomain_ops, NULL);
diff --git a/drivers/irqchip/irq-sg2042-msi.c b/drivers/irqchip/irq-sg2042-msi.c
index 375b55aa0acd..af16bc5a3c8b 100644
--- a/drivers/irqchip/irq-sg2042-msi.c
+++ b/drivers/irqchip/irq-sg2042-msi.c
@@ -17,23 +17,38 @@
#include <linux/property.h>
#include <linux/slab.h>
-#include "irq-msi-lib.h"
+#include <linux/irqchip/irq-msi-lib.h>
-#define SG2042_MAX_MSI_VECTOR 32
+struct sg204x_msi_chip_info {
+ const struct irq_chip *irqchip;
+ const struct msi_parent_ops *parent_ops;
+};
+
+/**
+ * struct sg204x_msi_chipdata - chip data for the SG204x MSI IRQ controller
+ * @reg_clr: clear reg, see TRM, 10.1.33, GP_INTR0_CLR
+ * @doorbell_addr: see TRM, 10.1.32, GP_INTR0_SET
+ * @irq_first: First vectors number that MSIs starts
+ * @num_irqs: Number of vectors for MSIs
+ * @msi_map: mapping for allocated MSI vectors.
+ * @msi_map_lock: Lock for msi_map
+ * @chip_info: chip specific infomations
+ */
+struct sg204x_msi_chipdata {
+ void __iomem *reg_clr;
-struct sg2042_msi_chipdata {
- void __iomem *reg_clr; // clear reg, see TRM, 10.1.33, GP_INTR0_CLR
+ phys_addr_t doorbell_addr;
- phys_addr_t doorbell_addr; // see TRM, 10.1.32, GP_INTR0_SET
+ u32 irq_first;
+ u32 num_irqs;
- u32 irq_first; // The vector number that MSIs starts
- u32 num_irqs; // The number of vectors for MSIs
+ unsigned long *msi_map;
+ struct mutex msi_map_lock;
- DECLARE_BITMAP(msi_map, SG2042_MAX_MSI_VECTOR);
- struct mutex msi_map_lock; // lock for msi_map
+ const struct sg204x_msi_chip_info *chip_info;
};
-static int sg2042_msi_allocate_hwirq(struct sg2042_msi_chipdata *data, int num_req)
+static int sg204x_msi_allocate_hwirq(struct sg204x_msi_chipdata *data, int num_req)
{
int first;
@@ -43,7 +58,7 @@ static int sg2042_msi_allocate_hwirq(struct sg2042_msi_chipdata *data, int num_r
return first >= 0 ? first : -ENOSPC;
}
-static void sg2042_msi_free_hwirq(struct sg2042_msi_chipdata *data, int hwirq, int num_req)
+static void sg204x_msi_free_hwirq(struct sg204x_msi_chipdata *data, int hwirq, int num_req)
{
guard(mutex)(&data->msi_map_lock);
bitmap_release_region(data->msi_map, hwirq, get_count_order(num_req));
@@ -51,7 +66,7 @@ static void sg2042_msi_free_hwirq(struct sg2042_msi_chipdata *data, int hwirq, i
static void sg2042_msi_irq_ack(struct irq_data *d)
{
- struct sg2042_msi_chipdata *data = irq_data_get_irq_chip_data(d);
+ struct sg204x_msi_chipdata *data = irq_data_get_irq_chip_data(d);
int bit_off = d->hwirq;
writel(1 << bit_off, data->reg_clr);
@@ -61,7 +76,7 @@ static void sg2042_msi_irq_ack(struct irq_data *d)
static void sg2042_msi_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
{
- struct sg2042_msi_chipdata *data = irq_data_get_irq_chip_data(d);
+ struct sg204x_msi_chipdata *data = irq_data_get_irq_chip_data(d);
msg->address_hi = upper_32_bits(data->doorbell_addr);
msg->address_lo = lower_32_bits(data->doorbell_addr);
@@ -79,9 +94,38 @@ static const struct irq_chip sg2042_msi_middle_irq_chip = {
.irq_compose_msi_msg = sg2042_msi_irq_compose_msi_msg,
};
-static int sg2042_msi_parent_domain_alloc(struct irq_domain *domain, unsigned int virq, int hwirq)
+static void sg2044_msi_irq_ack(struct irq_data *d)
+{
+ struct sg204x_msi_chipdata *data = irq_data_get_irq_chip_data(d);
+
+ writel(0, (u32 __iomem *)data->reg_clr + d->hwirq);
+ irq_chip_ack_parent(d);
+}
+
+static void sg2044_msi_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
+{
+ struct sg204x_msi_chipdata *data = irq_data_get_irq_chip_data(d);
+ phys_addr_t doorbell = data->doorbell_addr + 4 * (d->hwirq / 32);
+
+ msg->address_lo = lower_32_bits(doorbell);
+ msg->address_hi = upper_32_bits(doorbell);
+ msg->data = d->hwirq % 32;
+}
+
+static struct irq_chip sg2044_msi_middle_irq_chip = {
+ .name = "SG2044 MSI",
+ .irq_ack = sg2044_msi_irq_ack,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+#endif
+ .irq_compose_msi_msg = sg2044_msi_irq_compose_msi_msg,
+};
+
+static int sg204x_msi_parent_domain_alloc(struct irq_domain *domain, unsigned int virq, int hwirq)
{
- struct sg2042_msi_chipdata *data = domain->host_data;
+ struct sg204x_msi_chipdata *data = domain->host_data;
struct irq_fwspec fwspec;
struct irq_data *d;
int ret;
@@ -99,47 +143,45 @@ static int sg2042_msi_parent_domain_alloc(struct irq_domain *domain, unsigned in
return d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING);
}
-static int sg2042_msi_middle_domain_alloc(struct irq_domain *domain, unsigned int virq,
+static int sg204x_msi_middle_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *args)
{
- struct sg2042_msi_chipdata *data = domain->host_data;
+ struct sg204x_msi_chipdata *data = domain->host_data;
int hwirq, err, i;
- hwirq = sg2042_msi_allocate_hwirq(data, nr_irqs);
+ hwirq = sg204x_msi_allocate_hwirq(data, nr_irqs);
if (hwirq < 0)
return hwirq;
for (i = 0; i < nr_irqs; i++) {
- err = sg2042_msi_parent_domain_alloc(domain, virq + i, hwirq + i);
+ err = sg204x_msi_parent_domain_alloc(domain, virq + i, hwirq + i);
if (err)
goto err_hwirq;
irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
- &sg2042_msi_middle_irq_chip, data);
+ data->chip_info->irqchip, data);
}
-
return 0;
err_hwirq:
- sg2042_msi_free_hwirq(data, hwirq, nr_irqs);
+ sg204x_msi_free_hwirq(data, hwirq, nr_irqs);
irq_domain_free_irqs_parent(domain, virq, i);
-
return err;
}
-static void sg2042_msi_middle_domain_free(struct irq_domain *domain, unsigned int virq,
+static void sg204x_msi_middle_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs)
{
struct irq_data *d = irq_domain_get_irq_data(domain, virq);
- struct sg2042_msi_chipdata *data = irq_data_get_irq_chip_data(d);
+ struct sg204x_msi_chipdata *data = irq_data_get_irq_chip_data(d);
irq_domain_free_irqs_parent(domain, virq, nr_irqs);
- sg2042_msi_free_hwirq(data, d->hwirq, nr_irqs);
+ sg204x_msi_free_hwirq(data, d->hwirq, nr_irqs);
}
-static const struct irq_domain_ops sg2042_msi_middle_domain_ops = {
- .alloc = sg2042_msi_middle_domain_alloc,
- .free = sg2042_msi_middle_domain_free,
+static const struct irq_domain_ops sg204x_msi_middle_domain_ops = {
+ .alloc = sg204x_msi_middle_domain_alloc,
+ .free = sg204x_msi_middle_domain_free,
.select = msi_lib_irq_domain_select,
};
@@ -158,14 +200,30 @@ static const struct msi_parent_ops sg2042_msi_parent_ops = {
.init_dev_msi_info = msi_lib_init_dev_msi_info,
};
-static int sg2042_msi_init_domains(struct sg2042_msi_chipdata *data,
+#define SG2044_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \
+ MSI_FLAG_USE_DEF_CHIP_OPS)
+
+#define SG2044_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \
+ MSI_FLAG_PCI_MSIX)
+
+static const struct msi_parent_ops sg2044_msi_parent_ops = {
+ .required_flags = SG2044_MSI_FLAGS_REQUIRED,
+ .supported_flags = SG2044_MSI_FLAGS_SUPPORTED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
+ .bus_select_mask = MATCH_PCI_MSI,
+ .bus_select_token = DOMAIN_BUS_NEXUS,
+ .prefix = "SG2044-",
+ .init_dev_msi_info = msi_lib_init_dev_msi_info,
+};
+
+static int sg204x_msi_init_domains(struct sg204x_msi_chipdata *data,
struct irq_domain *plic_domain, struct device *dev)
{
struct fwnode_handle *fwnode = dev_fwnode(dev);
struct irq_domain *middle_domain;
middle_domain = irq_domain_create_hierarchy(plic_domain, 0, data->num_irqs, fwnode,
- &sg2042_msi_middle_domain_ops, data);
+ &sg204x_msi_middle_domain_ops, data);
if (!middle_domain) {
pr_err("Failed to create the MSI middle domain\n");
return -ENOMEM;
@@ -174,24 +232,29 @@ static int sg2042_msi_init_domains(struct sg2042_msi_chipdata *data,
irq_domain_update_bus_token(middle_domain, DOMAIN_BUS_NEXUS);
middle_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
- middle_domain->msi_parent_ops = &sg2042_msi_parent_ops;
-
+ middle_domain->msi_parent_ops = data->chip_info->parent_ops;
return 0;
}
static int sg2042_msi_probe(struct platform_device *pdev)
{
struct fwnode_reference_args args = { };
- struct sg2042_msi_chipdata *data;
+ struct sg204x_msi_chipdata *data;
struct device *dev = &pdev->dev;
struct irq_domain *plic_domain;
struct resource *res;
int ret;
- data = devm_kzalloc(dev, sizeof(struct sg2042_msi_chipdata), GFP_KERNEL);
+ data = devm_kzalloc(dev, sizeof(struct sg204x_msi_chipdata), GFP_KERNEL);
if (!data)
return -ENOMEM;
+ data->chip_info = device_get_match_data(&pdev->dev);
+ if (!data->chip_info) {
+ dev_err(&pdev->dev, "Failed to get irqchip\n");
+ return -EINVAL;
+ }
+
data->reg_clr = devm_platform_ioremap_resource_byname(pdev, "clr");
if (IS_ERR(data->reg_clr)) {
dev_err(dev, "Failed to map clear register\n");
@@ -232,11 +295,28 @@ static int sg2042_msi_probe(struct platform_device *pdev)
mutex_init(&data->msi_map_lock);
- return sg2042_msi_init_domains(data, plic_domain, dev);
+ data->msi_map = devm_bitmap_zalloc(&pdev->dev, data->num_irqs, GFP_KERNEL);
+ if (!data->msi_map) {
+ dev_err(&pdev->dev, "Unable to allocate msi mapping\n");
+ return -ENOMEM;
+ }
+
+ return sg204x_msi_init_domains(data, plic_domain, dev);
}
+static const struct sg204x_msi_chip_info sg2042_chip_info = {
+ .irqchip = &sg2042_msi_middle_irq_chip,
+ .parent_ops = &sg2042_msi_parent_ops,
+};
+
+static const struct sg204x_msi_chip_info sg2044_chip_info = {
+ .irqchip = &sg2044_msi_middle_irq_chip,
+ .parent_ops = &sg2044_msi_parent_ops,
+};
+
static const struct of_device_id sg2042_msi_of_match[] = {
- { .compatible = "sophgo,sg2042-msi" },
+ { .compatible = "sophgo,sg2042-msi", .data = &sg2042_chip_info },
+ { .compatible = "sophgo,sg2044-msi", .data = &sg2044_chip_info },
{ }
};
diff --git a/drivers/irqchip/irq-sni-exiu.c b/drivers/irqchip/irq-sni-exiu.c
index c7db617e1a2f..0cad68aa8388 100644
--- a/drivers/irqchip/irq-sni-exiu.c
+++ b/drivers/irqchip/irq-sni-exiu.c
@@ -249,12 +249,12 @@ static int __init exiu_dt_init(struct device_node *node,
return -ENXIO;
}
- data = exiu_init(of_node_to_fwnode(node), &res);
+ data = exiu_init(of_fwnode_handle(node), &res);
if (IS_ERR(data))
return PTR_ERR(data);
- domain = irq_domain_add_hierarchy(parent_domain, 0, NUM_IRQS, node,
- &exiu_domain_ops, data);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, NUM_IRQS, of_fwnode_handle(node),
+ &exiu_domain_ops, data);
if (!domain) {
pr_err("%pOF: failed to allocate domain\n", node);
goto out_unmap;
diff --git a/drivers/irqchip/irq-sp7021-intc.c b/drivers/irqchip/irq-sp7021-intc.c
index bed78d1def3d..2a6eda9ab62e 100644
--- a/drivers/irqchip/irq-sp7021-intc.c
+++ b/drivers/irqchip/irq-sp7021-intc.c
@@ -256,8 +256,8 @@ static int __init sp_intc_init_dt(struct device_node *node, struct device_node *
writel_relaxed(~0, REG_INTR_CLEAR + i * 4);
}
- sp_intc.domain = irq_domain_add_linear(node, SP_INTC_NR_IRQS,
- &sp_intc_dm_ops, &sp_intc);
+ sp_intc.domain = irq_domain_create_linear(of_fwnode_handle(node), SP_INTC_NR_IRQS,
+ &sp_intc_dm_ops, &sp_intc);
if (!sp_intc.domain) {
ret = -ENOMEM;
goto out_unmap1;
diff --git a/drivers/irqchip/irq-starfive-jh8100-intc.c b/drivers/irqchip/irq-starfive-jh8100-intc.c
index 0f5837176e53..2460798ec158 100644
--- a/drivers/irqchip/irq-starfive-jh8100-intc.c
+++ b/drivers/irqchip/irq-starfive-jh8100-intc.c
@@ -158,8 +158,8 @@ static int __init starfive_intc_init(struct device_node *intc,
raw_spin_lock_init(&irqc->lock);
- irqc->domain = irq_domain_add_linear(intc, STARFIVE_INTC_SRC_IRQ_NUM,
- &starfive_intc_domain_ops, irqc);
+ irqc->domain = irq_domain_create_linear(of_fwnode_handle(intc), STARFIVE_INTC_SRC_IRQ_NUM,
+ &starfive_intc_domain_ops, irqc);
if (!irqc->domain) {
pr_err("Unable to create IRQ domain\n");
ret = -EINVAL;
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index 7c6a0080c330..978811f2abe8 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -169,22 +169,18 @@ static int stm32_irq_set_type(struct irq_data *d, unsigned int type)
u32 rtsr, ftsr;
int err;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
rtsr = irq_reg_readl(gc, stm32_bank->rtsr_ofst);
ftsr = irq_reg_readl(gc, stm32_bank->ftsr_ofst);
err = stm32_exti_set_type(d, type, &rtsr, &ftsr);
if (err)
- goto unlock;
+ return err;
irq_reg_writel(gc, rtsr, stm32_bank->rtsr_ofst);
irq_reg_writel(gc, ftsr, stm32_bank->ftsr_ofst);
-
-unlock:
- irq_gc_unlock(gc);
-
- return err;
+ return 0;
}
static void stm32_chip_suspend(struct stm32_exti_chip_data *chip_data,
@@ -217,18 +213,16 @@ static void stm32_irq_suspend(struct irq_chip_generic *gc)
{
struct stm32_exti_chip_data *chip_data = gc->private;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
stm32_chip_suspend(chip_data, gc->wake_active);
- irq_gc_unlock(gc);
}
static void stm32_irq_resume(struct irq_chip_generic *gc)
{
struct stm32_exti_chip_data *chip_data = gc->private;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
stm32_chip_resume(chip_data, gc->mask_cache);
- irq_gc_unlock(gc);
}
static int stm32_exti_alloc(struct irq_domain *d, unsigned int virq,
@@ -265,11 +259,8 @@ static void stm32_irq_ack(struct irq_data *d)
struct stm32_exti_chip_data *chip_data = gc->private;
const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
- irq_gc_lock(gc);
-
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, d->mask, stm32_bank->rpr_ofst);
-
- irq_gc_unlock(gc);
}
static struct
@@ -344,8 +335,8 @@ static int __init stm32_exti_init(const struct stm32_exti_drv_data *drv_data,
if (!host_data)
return -ENOMEM;
- domain = irq_domain_add_linear(node, drv_data->bank_nr * IRQS_PER_BANK,
- &irq_exti_domain_ops, NULL);
+ domain = irq_domain_create_linear(of_fwnode_handle(node), drv_data->bank_nr * IRQS_PER_BANK,
+ &irq_exti_domain_ops, NULL);
if (!domain) {
pr_err("%pOFn: Could not register interrupt domain.\n",
node);
diff --git a/drivers/irqchip/irq-stm32mp-exti.c b/drivers/irqchip/irq-stm32mp-exti.c
index cb83d6cc6113..c6b4407d05f9 100644
--- a/drivers/irqchip/irq-stm32mp-exti.c
+++ b/drivers/irqchip/irq-stm32mp-exti.c
@@ -531,7 +531,7 @@ static int stm32mp_exti_domain_alloc(struct irq_domain *dm,
if (ret)
return ret;
/* we only support one parent, so far */
- if (of_node_to_fwnode(out_irq.np) != dm->parent->fwnode)
+ if (of_fwnode_handle(out_irq.np) != dm->parent->fwnode)
return -EINVAL;
of_phandle_args_to_fwspec(out_irq.np, out_irq.args,
@@ -682,10 +682,9 @@ static int stm32mp_exti_probe(struct platform_device *pdev)
return -EINVAL;
}
- domain = irq_domain_add_hierarchy(parent_domain, 0,
- drv_data->bank_nr * IRQS_PER_BANK,
- np, &stm32mp_exti_domain_ops,
- host_data);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, drv_data->bank_nr * IRQS_PER_BANK,
+ of_fwnode_handle(np), &stm32mp_exti_domain_ops,
+ host_data);
if (!domain) {
dev_err(dev, "Could not register exti domain\n");
diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c
index dd506ebfdacb..9c2c9caeca2a 100644
--- a/drivers/irqchip/irq-sun4i.c
+++ b/drivers/irqchip/irq-sun4i.c
@@ -133,7 +133,7 @@ static int __init sun4i_of_init(struct device_node *node,
/* Configure the external interrupt source type */
writel(0x00, irq_ic_data->irq_base + SUN4I_IRQ_NMI_CTRL_REG);
- irq_ic_data->irq_domain = irq_domain_add_linear(node, 3 * 32,
+ irq_ic_data->irq_domain = irq_domain_create_linear(of_fwnode_handle(node), 3 * 32,
&sun4i_irq_ops, NULL);
if (!irq_ic_data->irq_domain)
panic("%pOF: unable to create IRQ domain\n", node);
diff --git a/drivers/irqchip/irq-sun6i-r.c b/drivers/irqchip/irq-sun6i-r.c
index 99958d470d62..37d4b29763bc 100644
--- a/drivers/irqchip/irq-sun6i-r.c
+++ b/drivers/irqchip/irq-sun6i-r.c
@@ -338,8 +338,8 @@ static int __init sun6i_r_intc_init(struct device_node *node,
return PTR_ERR(base);
}
- domain = irq_domain_add_hierarchy(parent_domain, 0, 0, node,
- &sun6i_r_intc_domain_ops, NULL);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, 0, of_fwnode_handle(node),
+ &sun6i_r_intc_domain_ops, NULL);
if (!domain) {
pr_err("%pOF: Failed to allocate domain\n", node);
iounmap(base);
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 01b0d8321728..fe32dfdc2dd0 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -111,7 +111,7 @@ static int sunxi_sc_nmi_set_type(struct irq_data *data, unsigned int flow_type)
unsigned int src_type;
unsigned int i;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
switch (flow_type & IRQF_TRIGGER_MASK) {
case IRQ_TYPE_EDGE_FALLING:
@@ -128,9 +128,7 @@ static int sunxi_sc_nmi_set_type(struct irq_data *data, unsigned int flow_type)
src_type = SUNXI_SRC_TYPE_LEVEL_LOW;
break;
default:
- irq_gc_unlock(gc);
- pr_err("Cannot assign multiple trigger modes to IRQ %d.\n",
- data->irq);
+ pr_err("Cannot assign multiple trigger modes to IRQ %d.\n", data->irq);
return -EBADR;
}
@@ -145,9 +143,6 @@ static int sunxi_sc_nmi_set_type(struct irq_data *data, unsigned int flow_type)
src_type_reg &= ~SUNXI_NMI_SRC_TYPE_MASK;
src_type_reg |= src_type;
sunxi_sc_nmi_write(gc, ctrl_off, src_type_reg);
-
- irq_gc_unlock(gc);
-
return IRQ_SET_MASK_OK;
}
@@ -159,7 +154,7 @@ static int __init sunxi_sc_nmi_irq_init(struct device_node *node,
struct irq_domain *domain;
int ret;
- domain = irq_domain_add_linear(node, 1, &irq_generic_chip_ops, NULL);
+ domain = irq_domain_create_linear(of_fwnode_handle(node), 1, &irq_generic_chip_ops, NULL);
if (!domain) {
pr_err("Could not register interrupt domain.\n");
return -ENOMEM;
diff --git a/drivers/irqchip/irq-tb10x.c b/drivers/irqchip/irq-tb10x.c
index d59bfbe8c6d0..94cbc5111d7e 100644
--- a/drivers/irqchip/irq-tb10x.c
+++ b/drivers/irqchip/irq-tb10x.c
@@ -41,11 +41,9 @@ static inline u32 ab_irqctl_readreg(struct irq_chip_generic *gc, u32 reg)
static int tb10x_irq_set_type(struct irq_data *data, unsigned int flow_type)
{
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
- uint32_t im, mod, pol;
+ uint32_t mod, pol, im = data->mask;
- im = data->mask;
-
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
mod = ab_irqctl_readreg(gc, AB_IRQCTL_SRC_MODE) | im;
pol = ab_irqctl_readreg(gc, AB_IRQCTL_SRC_POLARITY) | im;
@@ -67,9 +65,7 @@ static int tb10x_irq_set_type(struct irq_data *data, unsigned int flow_type)
case IRQ_TYPE_EDGE_RISING:
break;
default:
- irq_gc_unlock(gc);
- pr_err("%s: Cannot assign multiple trigger modes to IRQ %d.\n",
- __func__, data->irq);
+ pr_err("%s: Cannot assign multiple trigger modes to IRQ %d.\n", __func__, data->irq);
return -EBADR;
}
@@ -79,9 +75,6 @@ static int tb10x_irq_set_type(struct irq_data *data, unsigned int flow_type)
ab_irqctl_writereg(gc, AB_IRQCTL_SRC_MODE, mod);
ab_irqctl_writereg(gc, AB_IRQCTL_SRC_POLARITY, pol);
ab_irqctl_writereg(gc, AB_IRQCTL_INT_STATUS, im);
-
- irq_gc_unlock(gc);
-
return IRQ_SET_MASK_OK;
}
@@ -121,13 +114,13 @@ static int __init of_tb10x_init_irq(struct device_node *ictl,
goto ioremap_fail;
}
- domain = irq_domain_add_linear(ictl, AB_IRQCTL_MAXIRQ,
- &irq_generic_chip_ops, NULL);
+ domain = irq_domain_create_linear(of_fwnode_handle(ictl), AB_IRQCTL_MAXIRQ,
+ &irq_generic_chip_ops, NULL);
if (!domain) {
ret = -ENOMEM;
pr_err("%pOFn: Could not register interrupt domain.\n",
ictl);
- goto irq_domain_add_fail;
+ goto irq_domain_create_fail;
}
ret = irq_alloc_domain_generic_chips(domain, AB_IRQCTL_MAXIRQ,
@@ -174,7 +167,7 @@ static int __init of_tb10x_init_irq(struct device_node *ictl,
gc_alloc_fail:
irq_domain_remove(domain);
-irq_domain_add_fail:
+irq_domain_create_fail:
iounmap(reg_base);
ioremap_fail:
release_mem_region(mem.start, resource_size(&mem));
diff --git a/drivers/irqchip/irq-tegra.c b/drivers/irqchip/irq-tegra.c
index ad3e2c1b3c87..66cbb9f77ff3 100644
--- a/drivers/irqchip/irq-tegra.c
+++ b/drivers/irqchip/irq-tegra.c
@@ -330,9 +330,8 @@ static int __init tegra_ictlr_init(struct device_node *node,
node, num_ictlrs, soc->num_ictlrs);
- domain = irq_domain_add_hierarchy(parent_domain, 0, num_ictlrs * 32,
- node, &tegra_ictlr_domain_ops,
- lic);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, num_ictlrs * 32,
+ of_fwnode_handle(node), &tegra_ictlr_domain_ops, lic);
if (!domain) {
pr_err("%pOF: failed to allocated domain\n", node);
err = -ENOMEM;
diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c
index a887efba262c..7de59238e6b0 100644
--- a/drivers/irqchip/irq-ti-sci-inta.c
+++ b/drivers/irqchip/irq-ti-sci-inta.c
@@ -233,7 +233,7 @@ static struct ti_sci_inta_vint_desc *ti_sci_inta_alloc_parent_irq(struct irq_dom
INIT_LIST_HEAD(&vint_desc->list);
parent_node = of_irq_find_parent(dev_of_node(&inta->pdev->dev));
- parent_fwspec.fwnode = of_node_to_fwnode(parent_node);
+ parent_fwspec.fwnode = of_fwnode_handle(parent_node);
if (of_device_is_compatible(parent_node, "arm,gic-v3")) {
/* Parent is GIC */
@@ -701,15 +701,15 @@ static int ti_sci_inta_irq_domain_probe(struct platform_device *pdev)
if (ret)
return ret;
- domain = irq_domain_add_linear(dev_of_node(dev),
- ti_sci_get_num_resources(inta->vint),
- &ti_sci_inta_irq_domain_ops, inta);
+ domain = irq_domain_create_linear(of_fwnode_handle(dev_of_node(dev)),
+ ti_sci_get_num_resources(inta->vint),
+ &ti_sci_inta_irq_domain_ops, inta);
if (!domain) {
dev_err(dev, "Failed to allocate IRQ domain\n");
return -ENOMEM;
}
- msi_domain = ti_sci_inta_msi_create_irq_domain(of_node_to_fwnode(node),
+ msi_domain = ti_sci_inta_msi_create_irq_domain(of_fwnode_handle(node),
&ti_sci_inta_msi_domain_info,
domain);
if (!msi_domain) {
diff --git a/drivers/irqchip/irq-ti-sci-intr.c b/drivers/irqchip/irq-ti-sci-intr.c
index b49a73106c69..07fff5ae5ce0 100644
--- a/drivers/irqchip/irq-ti-sci-intr.c
+++ b/drivers/irqchip/irq-ti-sci-intr.c
@@ -149,7 +149,7 @@ static int ti_sci_intr_alloc_parent_irq(struct irq_domain *domain,
goto err_irqs;
parent_node = of_irq_find_parent(dev_of_node(intr->dev));
- fwspec.fwnode = of_node_to_fwnode(parent_node);
+ fwspec.fwnode = of_fwnode_handle(parent_node);
if (of_device_is_compatible(parent_node, "arm,gic-v3")) {
/* Parent is GIC */
@@ -274,8 +274,9 @@ static int ti_sci_intr_irq_domain_probe(struct platform_device *pdev)
return PTR_ERR(intr->out_irqs);
}
- domain = irq_domain_add_hierarchy(parent_domain, 0, 0, dev_of_node(dev),
- &ti_sci_intr_irq_domain_ops, intr);
+ domain = irq_domain_create_hierarchy(parent_domain, 0, 0,
+ of_fwnode_handle(dev_of_node(dev)),
+ &ti_sci_intr_irq_domain_ops, intr);
if (!domain) {
dev_err(dev, "Failed to allocate IRQ domain\n");
return -ENOMEM;
diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c
index 960c343d5781..e625f4fb2bb8 100644
--- a/drivers/irqchip/irq-ts4800.c
+++ b/drivers/irqchip/irq-ts4800.c
@@ -125,7 +125,7 @@ static int ts4800_ic_probe(struct platform_device *pdev)
return -EINVAL;
}
- data->domain = irq_domain_add_linear(node, 8, &ts4800_ic_ops, data);
+ data->domain = irq_domain_create_linear(of_fwnode_handle(node), 8, &ts4800_ic_ops, data);
if (!data->domain) {
dev_err(&pdev->dev, "cannot add IRQ domain\n");
return -ENOMEM;
diff --git a/drivers/irqchip/irq-uniphier-aidet.c b/drivers/irqchip/irq-uniphier-aidet.c
index 601f9343d5b3..6005c2d28dd9 100644
--- a/drivers/irqchip/irq-uniphier-aidet.c
+++ b/drivers/irqchip/irq-uniphier-aidet.c
@@ -188,7 +188,7 @@ static int uniphier_aidet_probe(struct platform_device *pdev)
priv->domain = irq_domain_create_hierarchy(
parent_domain, 0,
UNIPHIER_AIDET_NR_IRQS,
- of_node_to_fwnode(dev->of_node),
+ of_fwnode_handle(dev->of_node),
&uniphier_aidet_domain_ops, priv);
if (!priv->domain)
return -ENOMEM;
diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
index 0abc8934c2ee..034ce6afe170 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -176,8 +176,8 @@ static void __init fpga_irq_init(void __iomem *base, int parent_irq,
f);
}
- f->domain = irq_domain_add_linear(node, fls(valid),
- &fpga_irqdomain_ops, f);
+ f->domain = irq_domain_create_linear(of_fwnode_handle(node), fls(valid),
+ &fpga_irqdomain_ops, f);
/* This will allocate all valid descriptors in the linear case */
for (i = 0; i < fls(valid); i++)
diff --git a/drivers/irqchip/irq-vf610-mscm-ir.c b/drivers/irqchip/irq-vf610-mscm-ir.c
index 2b9a8ba58e26..5d9c7503aa7f 100644
--- a/drivers/irqchip/irq-vf610-mscm-ir.c
+++ b/drivers/irqchip/irq-vf610-mscm-ir.c
@@ -209,9 +209,9 @@ static int __init vf610_mscm_ir_of_init(struct device_node *node,
regmap_read(mscm_cp_regmap, MSCM_CPxNUM, &cpuid);
mscm_ir_data->cpu_mask = 0x1 << cpuid;
- domain = irq_domain_add_hierarchy(domain_parent, 0,
- MSCM_IRSPRC_NUM, node,
- &mscm_irq_domain_ops, mscm_ir_data);
+ domain = irq_domain_create_hierarchy(domain_parent, 0, MSCM_IRSPRC_NUM,
+ of_fwnode_handle(node), &mscm_irq_domain_ops,
+ mscm_ir_data);
if (!domain) {
ret = -ENOMEM;
goto out_unmap;
diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c
index ea93e7236c4a..2bcdf216a000 100644
--- a/drivers/irqchip/irq-vic.c
+++ b/drivers/irqchip/irq-vic.c
@@ -289,8 +289,9 @@ static void __init vic_register(void __iomem *base, unsigned int parent_irq,
vic_handle_irq_cascaded, v);
}
- v->domain = irq_domain_add_simple(node, fls(valid_sources), irq,
- &vic_irqdomain_ops, v);
+ v->domain = irq_domain_create_simple(of_fwnode_handle(node),
+ fls(valid_sources), irq,
+ &vic_irqdomain_ops, v);
/* create an IRQ mapping for each valid IRQ */
for (i = 0; i < fls(valid_sources); i++)
if (valid_sources & (1 << i))
diff --git a/drivers/irqchip/irq-vt8500.c b/drivers/irqchip/irq-vt8500.c
index e17dd3a8c2d5..3b742590aec8 100644
--- a/drivers/irqchip/irq-vt8500.c
+++ b/drivers/irqchip/irq-vt8500.c
@@ -15,6 +15,7 @@
#include <linux/io.h>
#include <linux/irq.h>
#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
#include <linux/irqdomain.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
@@ -63,29 +64,28 @@ struct vt8500_irq_data {
struct irq_domain *domain; /* Domain for this controller */
};
-/* Global variable for accessing io-mem addresses */
-static struct vt8500_irq_data intc[VT8500_INTC_MAX];
-static u32 active_cnt = 0;
+/* Primary interrupt controller data */
+static struct vt8500_irq_data *primary_intc;
-static void vt8500_irq_mask(struct irq_data *d)
+static void vt8500_irq_ack(struct irq_data *d)
{
struct vt8500_irq_data *priv = d->domain->host_data;
void __iomem *base = priv->base;
void __iomem *stat_reg = base + VT8500_ICIS + (d->hwirq < 32 ? 0 : 4);
- u8 edge, dctr;
- u32 status;
+ u32 status = (1 << (d->hwirq & 0x1f));
- edge = readb(base + VT8500_ICDC + d->hwirq) & VT8500_EDGE;
- if (edge) {
- status = readl(stat_reg);
+ writel(status, stat_reg);
+}
- status |= (1 << (d->hwirq & 0x1f));
- writel(status, stat_reg);
- } else {
- dctr = readb(base + VT8500_ICDC + d->hwirq);
- dctr &= ~VT8500_INT_ENABLE;
- writeb(dctr, base + VT8500_ICDC + d->hwirq);
- }
+static void vt8500_irq_mask(struct irq_data *d)
+{
+ struct vt8500_irq_data *priv = d->domain->host_data;
+ void __iomem *base = priv->base;
+ u8 dctr;
+
+ dctr = readb(base + VT8500_ICDC + d->hwirq);
+ dctr &= ~VT8500_INT_ENABLE;
+ writeb(dctr, base + VT8500_ICDC + d->hwirq);
}
static void vt8500_irq_unmask(struct irq_data *d)
@@ -130,11 +130,11 @@ static int vt8500_irq_set_type(struct irq_data *d, unsigned int flow_type)
}
static struct irq_chip vt8500_irq_chip = {
- .name = "vt8500",
- .irq_ack = vt8500_irq_mask,
- .irq_mask = vt8500_irq_mask,
- .irq_unmask = vt8500_irq_unmask,
- .irq_set_type = vt8500_irq_set_type,
+ .name = "vt8500",
+ .irq_ack = vt8500_irq_ack,
+ .irq_mask = vt8500_irq_mask,
+ .irq_unmask = vt8500_irq_unmask,
+ .irq_set_type = vt8500_irq_set_type,
};
static void __init vt8500_init_irq_hw(void __iomem *base)
@@ -163,82 +163,89 @@ static const struct irq_domain_ops vt8500_irq_domain_ops = {
.xlate = irq_domain_xlate_onecell,
};
+static inline void vt8500_handle_irq_common(struct vt8500_irq_data *intc)
+{
+ unsigned long irqnr = readl_relaxed(intc->base) & 0x3F;
+ unsigned long stat;
+
+ /*
+ * Highest Priority register default = 63, so check that this
+ * is a real interrupt by checking the status register
+ */
+ if (irqnr == 63) {
+ stat = readl_relaxed(intc->base + VT8500_ICIS + 4);
+ if (!(stat & BIT(31)))
+ return;
+ }
+
+ generic_handle_domain_irq(intc->domain, irqnr);
+}
+
static void __exception_irq_entry vt8500_handle_irq(struct pt_regs *regs)
{
- u32 stat, i;
- int irqnr;
- void __iomem *base;
-
- /* Loop through each active controller */
- for (i=0; i<active_cnt; i++) {
- base = intc[i].base;
- irqnr = readl_relaxed(base) & 0x3F;
- /*
- Highest Priority register default = 63, so check that this
- is a real interrupt by checking the status register
- */
- if (irqnr == 63) {
- stat = readl_relaxed(base + VT8500_ICIS + 4);
- if (!(stat & BIT(31)))
- continue;
- }
+ vt8500_handle_irq_common(primary_intc);
+}
- generic_handle_domain_irq(intc[i].domain, irqnr);
- }
+static void vt8500_handle_irq_chained(struct irq_desc *desc)
+{
+ struct irq_domain *d = irq_desc_get_handler_data(desc);
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct vt8500_irq_data *intc = d->host_data;
+
+ chained_irq_enter(chip, desc);
+ vt8500_handle_irq_common(intc);
+ chained_irq_exit(chip, desc);
}
static int __init vt8500_irq_init(struct device_node *node,
struct device_node *parent)
{
- int irq, i;
- struct device_node *np = node;
+ struct vt8500_irq_data *intc;
+ int irq, i, ret = 0;
- if (active_cnt == VT8500_INTC_MAX) {
- pr_err("%s: Interrupt controllers > VT8500_INTC_MAX\n",
- __func__);
- goto out;
- }
+ intc = kzalloc(sizeof(*intc), GFP_KERNEL);
+ if (!intc)
+ return -ENOMEM;
- intc[active_cnt].base = of_iomap(np, 0);
- intc[active_cnt].domain = irq_domain_add_linear(node, 64,
- &vt8500_irq_domain_ops, &intc[active_cnt]);
-
- if (!intc[active_cnt].base) {
+ intc->base = of_iomap(node, 0);
+ if (!intc->base) {
pr_err("%s: Unable to map IO memory\n", __func__);
- goto out;
+ ret = -ENOMEM;
+ goto err_free;
}
- if (!intc[active_cnt].domain) {
+ intc->domain = irq_domain_create_linear(of_fwnode_handle(node), 64,
+ &vt8500_irq_domain_ops, intc);
+ if (!intc->domain) {
pr_err("%s: Unable to add irq domain!\n", __func__);
- goto out;
+ ret = -ENOMEM;
+ goto err_unmap;
}
- set_handle_irq(vt8500_handle_irq);
-
- vt8500_init_irq_hw(intc[active_cnt].base);
+ vt8500_init_irq_hw(intc->base);
pr_info("vt8500-irq: Added interrupt controller\n");
- active_cnt++;
-
- /* check if this is a slaved controller */
- if (of_irq_count(np) != 0) {
- /* check that we have the correct number of interrupts */
- if (of_irq_count(np) != 8) {
- pr_err("%s: Incorrect IRQ map for slaved controller\n",
- __func__);
- return -EINVAL;
- }
-
- for (i = 0; i < 8; i++) {
- irq = irq_of_parse_and_map(np, i);
- enable_irq(irq);
+ /* check if this is a chained controller */
+ if (of_irq_count(node) != 0) {
+ for (i = 0; i < of_irq_count(node); i++) {
+ irq = irq_of_parse_and_map(node, i);
+ irq_set_chained_handler_and_data(irq, vt8500_handle_irq_chained,
+ intc);
}
pr_info("vt8500-irq: Enabled slave->parent interrupts\n");
+ } else {
+ primary_intc = intc;
+ set_handle_irq(vt8500_handle_irq);
}
-out:
return 0;
+
+err_unmap:
+ iounmap(intc->base);
+err_free:
+ kfree(intc);
+ return ret;
}
IRQCHIP_DECLARE(vt8500_irq, "via,vt8500-intc", vt8500_irq_init);
diff --git a/drivers/irqchip/irq-wpcm450-aic.c b/drivers/irqchip/irq-wpcm450-aic.c
index 91df62a64cd9..a8ed4894d29e 100644
--- a/drivers/irqchip/irq-wpcm450-aic.c
+++ b/drivers/irqchip/irq-wpcm450-aic.c
@@ -154,7 +154,7 @@ static int __init wpcm450_aic_of_init(struct device_node *node,
set_handle_irq(wpcm450_aic_handle_irq);
- aic->domain = irq_domain_add_linear(node, AIC_NUM_IRQS, &wpcm450_aic_ops, aic);
+ aic->domain = irq_domain_create_linear(of_fwnode_handle(node), AIC_NUM_IRQS, &wpcm450_aic_ops, aic);
return 0;
}
diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c
index 38727e9cc713..92dcb9fdcb25 100644
--- a/drivers/irqchip/irq-xilinx-intc.c
+++ b/drivers/irqchip/irq-xilinx-intc.c
@@ -212,8 +212,8 @@ static int __init xilinx_intc_of_init(struct device_node *intc,
xintc_write(irqc, MER, MER_HIE | MER_ME);
}
- irqc->root_domain = irq_domain_add_linear(intc, irqc->nr_irq,
- &xintc_irq_domain_ops, irqc);
+ irqc->root_domain = irq_domain_create_linear(of_fwnode_handle(intc), irqc->nr_irq,
+ &xintc_irq_domain_ops, irqc);
if (!irqc->root_domain) {
pr_err("irq-xilinx: Unable to create IRQ domain\n");
ret = -EINVAL;
diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c
index 9b441d180299..9fdacbd89a63 100644
--- a/drivers/irqchip/irq-xtensa-mx.c
+++ b/drivers/irqchip/irq-xtensa-mx.c
@@ -167,8 +167,7 @@ static void __init xtensa_mx_init_common(struct irq_domain *root_domain)
int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent)
{
struct irq_domain *root_domain =
- irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
- &xtensa_mx_irq_domain_ops,
+ irq_domain_create_legacy(NULL, NR_IRQS - 1, 1, 0, &xtensa_mx_irq_domain_ops,
&xtensa_mx_irq_chip);
xtensa_mx_init_common(root_domain);
return 0;
@@ -178,7 +177,7 @@ static int __init xtensa_mx_init(struct device_node *np,
struct device_node *interrupt_parent)
{
struct irq_domain *root_domain =
- irq_domain_add_linear(np, NR_IRQS, &xtensa_mx_irq_domain_ops,
+ irq_domain_create_linear(of_fwnode_handle(np), NR_IRQS, &xtensa_mx_irq_domain_ops,
&xtensa_mx_irq_chip);
xtensa_mx_init_common(root_domain);
return 0;
diff --git a/drivers/irqchip/irq-xtensa-pic.c b/drivers/irqchip/irq-xtensa-pic.c
index 9be7b7c5cd23..44e7be051a2e 100644
--- a/drivers/irqchip/irq-xtensa-pic.c
+++ b/drivers/irqchip/irq-xtensa-pic.c
@@ -85,7 +85,7 @@ static struct irq_chip xtensa_irq_chip = {
int __init xtensa_pic_init_legacy(struct device_node *interrupt_parent)
{
struct irq_domain *root_domain =
- irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
+ irq_domain_create_legacy(NULL, NR_IRQS - 1, 1, 0,
&xtensa_irq_domain_ops, &xtensa_irq_chip);
irq_set_default_domain(root_domain);
return 0;
@@ -95,7 +95,7 @@ static int __init xtensa_pic_init(struct device_node *np,
struct device_node *interrupt_parent)
{
struct irq_domain *root_domain =
- irq_domain_add_linear(np, NR_IRQS, &xtensa_irq_domain_ops,
+ irq_domain_create_linear(of_fwnode_handle(np), NR_IRQS, &xtensa_irq_domain_ops,
&xtensa_irq_chip);
irq_set_default_domain(root_domain);
return 0;
diff --git a/drivers/irqchip/irq-zevio.c b/drivers/irqchip/irq-zevio.c
index 7a72620fc478..22d46c246594 100644
--- a/drivers/irqchip/irq-zevio.c
+++ b/drivers/irqchip/irq-zevio.c
@@ -92,8 +92,8 @@ static int __init zevio_of_init(struct device_node *node,
zevio_init_irq_base(zevio_irq_io + IO_IRQ_BASE);
zevio_init_irq_base(zevio_irq_io + IO_FIQ_BASE);
- zevio_irq_domain = irq_domain_add_linear(node, MAX_INTRS,
- &irq_generic_chip_ops, NULL);
+ zevio_irq_domain = irq_domain_create_linear(of_fwnode_handle(node), MAX_INTRS,
+ &irq_generic_chip_ops, NULL);
BUG_ON(!zevio_irq_domain);
ret = irq_alloc_domain_generic_chips(zevio_irq_domain, MAX_INTRS, 1,
diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c
index 7c17a6f643ef..576e55569d77 100644
--- a/drivers/irqchip/spear-shirq.c
+++ b/drivers/irqchip/spear-shirq.c
@@ -239,7 +239,7 @@ static int __init shirq_init(struct spear_shirq **shirq_blocks, int block_nr,
goto err_unmap;
}
- shirq_domain = irq_domain_add_legacy(np, nr_irqs, virq_base, 0,
+ shirq_domain = irq_domain_create_legacy(of_fwnode_handle(np), nr_irqs, virq_base, 0,
&irq_domain_simple_ops, NULL);
if (WARN_ON(!shirq_domain)) {
pr_warn("%s: irq domain init failed\n", __func__);
diff --git a/drivers/mailbox/qcom-ipcc.c b/drivers/mailbox/qcom-ipcc.c
index 0b17a38ea6bf..ea44ffb5ce1a 100644
--- a/drivers/mailbox/qcom-ipcc.c
+++ b/drivers/mailbox/qcom-ipcc.c
@@ -312,8 +312,8 @@ static int qcom_ipcc_probe(struct platform_device *pdev)
if (!name)
return -ENOMEM;
- ipcc->irq_domain = irq_domain_add_tree(pdev->dev.of_node,
- &qcom_ipcc_irq_ops, ipcc);
+ ipcc->irq_domain = irq_domain_create_tree(of_fwnode_handle(pdev->dev.of_node),
+ &qcom_ipcc_irq_ops, ipcc);
if (!ipcc->irq_domain)
return -ENOMEM;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 813b38aec3e4..c40db9c161c1 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -293,8 +293,7 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META;
bio->bi_iter.bi_sector = SB_SECTOR;
- __bio_add_page(bio, virt_to_page(out), SB_SIZE,
- offset_in_page(out));
+ bio_add_virt_nofail(bio, out, SB_SIZE);
out->offset = cpu_to_le64(sb->offset);
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index f0b5a6931161..d098e75e3461 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1364,7 +1364,7 @@ static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector,
ptr = (char *)b->data + offset;
len = n_sectors << SECTOR_SHIFT;
- __bio_add_page(bio, virt_to_page(ptr), len, offset_in_page(ptr));
+ bio_add_virt_nofail(bio, ptr, len);
submit_bio(bio);
}
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index cc3d3897ef42..1f626066e8cc 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2557,14 +2557,8 @@ static void dm_integrity_inline_recheck(struct work_struct *w)
char *mem;
outgoing_bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recheck_bios);
-
- r = bio_add_page(outgoing_bio, virt_to_page(outgoing_data), ic->sectors_per_block << SECTOR_SHIFT, 0);
- if (unlikely(r != (ic->sectors_per_block << SECTOR_SHIFT))) {
- bio_put(outgoing_bio);
- bio->bi_status = BLK_STS_RESOURCE;
- bio_endio(bio);
- return;
- }
+ bio_add_virt_nofail(outgoing_bio, outgoing_data,
+ ic->sectors_per_block << SECTOR_SHIFT);
bip = bio_integrity_alloc(outgoing_bio, GFP_NOIO, 1);
if (IS_ERR(bip)) {
@@ -3211,7 +3205,8 @@ next_chunk:
bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recalc_bios);
bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector;
- __bio_add_page(bio, virt_to_page(recalc_buffer), range.n_sectors << SECTOR_SHIFT, offset_in_page(recalc_buffer));
+ bio_add_virt_nofail(bio, recalc_buffer,
+ range.n_sectors << SECTOR_SHIFT);
r = submit_bio_wait(bio);
bio_put(bio);
if (unlikely(r)) {
@@ -3228,7 +3223,8 @@ next_chunk:
bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_WRITE, GFP_NOIO, &ic->recalc_bios);
bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector;
- __bio_add_page(bio, virt_to_page(recalc_buffer), range.n_sectors << SECTOR_SHIFT, offset_in_page(recalc_buffer));
+ bio_add_virt_nofail(bio, recalc_buffer,
+ range.n_sectors << SECTOR_SHIFT);
bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
if (unlikely(IS_ERR(bip))) {
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 6adc55fd90d3..127138c61be5 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -14,6 +14,7 @@
#include "raid5.h"
#include "raid10.h"
#include "md-bitmap.h"
+#include "dm-core.h"
#include <linux/device-mapper.h>
@@ -3308,6 +3309,7 @@ size_check:
/* Disable/enable discard support on raid set. */
configure_discard_support(rs);
+ rs->md.dm_gendisk = ti->table->md->disk;
mddev_unlock(&rs->md);
return 0;
@@ -3327,6 +3329,7 @@ static void raid_dtr(struct dm_target *ti)
mddev_lock_nointr(&rs->md);
md_stop(&rs->md);
+ rs->md.dm_gendisk = NULL;
mddev_unlock(&rs->md);
if (work_pending(&rs->md.event_work))
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9daa78c5fe33..0fde115e921f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -111,32 +111,48 @@ static void md_wakeup_thread_directly(struct md_thread __rcu *thread);
/* Default safemode delay: 200 msec */
#define DEFAULT_SAFEMODE_DELAY ((200 * HZ)/1000 +1)
/*
- * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
- * is 1000 KB/sec, so the extra system load does not show up that much.
- * Increase it if you want to have more _guaranteed_ speed. Note that
- * the RAID driver will use the maximum available bandwidth if the IO
- * subsystem is idle. There is also an 'absolute maximum' reconstruction
- * speed limit - in case reconstruction slows down your system despite
- * idle IO detection.
+ * Current RAID-1,4,5,6,10 parallel reconstruction 'guaranteed speed limit'
+ * is sysctl_speed_limit_min, 1000 KB/sec by default, so the extra system load
+ * does not show up that much. Increase it if you want to have more guaranteed
+ * speed. Note that the RAID driver will use the maximum bandwidth
+ * sysctl_speed_limit_max, 200 MB/sec by default, if the IO subsystem is idle.
*
- * you can change it via /proc/sys/dev/raid/speed_limit_min and _max.
- * or /sys/block/mdX/md/sync_speed_{min,max}
+ * Background sync IO speed control:
+ *
+ * - below speed min:
+ * no limit;
+ * - above speed min and below speed max:
+ * a) if mddev is idle, then no limit;
+ * b) if mddev is busy handling normal IO, then limit inflight sync IO
+ * to sync_io_depth;
+ * - above speed max:
+ * sync IO can't be issued;
+ *
+ * Following configurations can be changed via /proc/sys/dev/raid/ for system
+ * or /sys/block/mdX/md/ for one array.
*/
-
static int sysctl_speed_limit_min = 1000;
static int sysctl_speed_limit_max = 200000;
-static inline int speed_min(struct mddev *mddev)
+static int sysctl_sync_io_depth = 32;
+
+static int speed_min(struct mddev *mddev)
{
return mddev->sync_speed_min ?
mddev->sync_speed_min : sysctl_speed_limit_min;
}
-static inline int speed_max(struct mddev *mddev)
+static int speed_max(struct mddev *mddev)
{
return mddev->sync_speed_max ?
mddev->sync_speed_max : sysctl_speed_limit_max;
}
+static int sync_io_depth(struct mddev *mddev)
+{
+ return mddev->sync_io_depth ?
+ mddev->sync_io_depth : sysctl_sync_io_depth;
+}
+
static void rdev_uninit_serial(struct md_rdev *rdev)
{
if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
@@ -293,14 +309,21 @@ static const struct ctl_table raid_table[] = {
.procname = "speed_limit_min",
.data = &sysctl_speed_limit_min,
.maxlen = sizeof(int),
- .mode = S_IRUGO|S_IWUSR,
+ .mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "speed_limit_max",
.data = &sysctl_speed_limit_max,
.maxlen = sizeof(int),
- .mode = S_IRUGO|S_IWUSR,
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sync_io_depth",
+ .data = &sysctl_sync_io_depth,
+ .maxlen = sizeof(int),
+ .mode = 0644,
.proc_handler = proc_dointvec,
},
};
@@ -5091,7 +5114,7 @@ static ssize_t
sync_min_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%d (%s)\n", speed_min(mddev),
- mddev->sync_speed_min ? "local": "system");
+ mddev->sync_speed_min ? "local" : "system");
}
static ssize_t
@@ -5100,7 +5123,7 @@ sync_min_store(struct mddev *mddev, const char *buf, size_t len)
unsigned int min;
int rv;
- if (strncmp(buf, "system", 6)==0) {
+ if (strncmp(buf, "system", 6) == 0) {
min = 0;
} else {
rv = kstrtouint(buf, 10, &min);
@@ -5120,7 +5143,7 @@ static ssize_t
sync_max_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%d (%s)\n", speed_max(mddev),
- mddev->sync_speed_max ? "local": "system");
+ mddev->sync_speed_max ? "local" : "system");
}
static ssize_t
@@ -5129,7 +5152,7 @@ sync_max_store(struct mddev *mddev, const char *buf, size_t len)
unsigned int max;
int rv;
- if (strncmp(buf, "system", 6)==0) {
+ if (strncmp(buf, "system", 6) == 0) {
max = 0;
} else {
rv = kstrtouint(buf, 10, &max);
@@ -5146,6 +5169,35 @@ static struct md_sysfs_entry md_sync_max =
__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
static ssize_t
+sync_io_depth_show(struct mddev *mddev, char *page)
+{
+ return sprintf(page, "%d (%s)\n", sync_io_depth(mddev),
+ mddev->sync_io_depth ? "local" : "system");
+}
+
+static ssize_t
+sync_io_depth_store(struct mddev *mddev, const char *buf, size_t len)
+{
+ unsigned int max;
+ int rv;
+
+ if (strncmp(buf, "system", 6) == 0) {
+ max = 0;
+ } else {
+ rv = kstrtouint(buf, 10, &max);
+ if (rv < 0)
+ return rv;
+ if (max == 0)
+ return -EINVAL;
+ }
+ mddev->sync_io_depth = max;
+ return len;
+}
+
+static struct md_sysfs_entry md_sync_io_depth =
+__ATTR_RW(sync_io_depth);
+
+static ssize_t
degraded_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%d\n", mddev->degraded);
@@ -5671,6 +5723,7 @@ static struct attribute *md_redundancy_attrs[] = {
&md_mismatches.attr,
&md_sync_min.attr,
&md_sync_max.attr,
+ &md_sync_io_depth.attr,
&md_sync_speed.attr,
&md_sync_force_parallel.attr,
&md_sync_completed.attr,
@@ -8572,50 +8625,55 @@ void md_cluster_stop(struct mddev *mddev)
put_cluster_ops(mddev);
}
-static int is_mddev_idle(struct mddev *mddev, int init)
+static bool is_rdev_holder_idle(struct md_rdev *rdev, bool init)
{
+ unsigned long last_events = rdev->last_events;
+
+ if (!bdev_is_partition(rdev->bdev))
+ return true;
+
+ /*
+ * If rdev is partition, and user doesn't issue IO to the array, the
+ * array is still not idle if user issues IO to other partitions.
+ */
+ rdev->last_events = part_stat_read_accum(rdev->bdev->bd_disk->part0,
+ sectors) -
+ part_stat_read_accum(rdev->bdev, sectors);
+
+ return init || rdev->last_events <= last_events;
+}
+
+/*
+ * mddev is idle if following conditions are matched since last check:
+ * 1) mddev doesn't have normal IO completed;
+ * 2) mddev doesn't have inflight normal IO;
+ * 3) if any member disk is partition, and other partitions don't have IO
+ * completed;
+ *
+ * Noted this checking rely on IO accounting is enabled.
+ */
+static bool is_mddev_idle(struct mddev *mddev, int init)
+{
+ unsigned long last_events = mddev->normal_io_events;
+ struct gendisk *disk;
struct md_rdev *rdev;
- int idle;
- int curr_events;
+ bool idle = true;
- idle = 1;
- rcu_read_lock();
- rdev_for_each_rcu(rdev, mddev) {
- struct gendisk *disk = rdev->bdev->bd_disk;
+ disk = mddev_is_dm(mddev) ? mddev->dm_gendisk : mddev->gendisk;
+ if (!disk)
+ return true;
- if (!init && !blk_queue_io_stat(disk->queue))
- continue;
+ mddev->normal_io_events = part_stat_read_accum(disk->part0, sectors);
+ if (!init && (mddev->normal_io_events > last_events ||
+ bdev_count_inflight(disk->part0)))
+ idle = false;
- curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
- atomic_read(&disk->sync_io);
- /* sync IO will cause sync_io to increase before the disk_stats
- * as sync_io is counted when a request starts, and
- * disk_stats is counted when it completes.
- * So resync activity will cause curr_events to be smaller than
- * when there was no such activity.
- * non-sync IO will cause disk_stat to increase without
- * increasing sync_io so curr_events will (eventually)
- * be larger than it was before. Once it becomes
- * substantially larger, the test below will cause
- * the array to appear non-idle, and resync will slow
- * down.
- * If there is a lot of outstanding resync activity when
- * we set last_event to curr_events, then all that activity
- * completing might cause the array to appear non-idle
- * and resync will be slowed down even though there might
- * not have been non-resync activity. This will only
- * happen once though. 'last_events' will soon reflect
- * the state where there is little or no outstanding
- * resync requests, and further resync activity will
- * always make curr_events less than last_events.
- *
- */
- if (init || curr_events - rdev->last_events > 64) {
- rdev->last_events = curr_events;
- idle = 0;
- }
- }
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev)
+ if (!is_rdev_holder_idle(rdev, init))
+ idle = false;
rcu_read_unlock();
+
return idle;
}
@@ -8927,6 +8985,23 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
}
}
+static bool sync_io_within_limit(struct mddev *mddev)
+{
+ int io_sectors;
+
+ /*
+ * For raid456, sync IO is stripe(4k) per IO, for other levels, it's
+ * RESYNC_PAGES(64k) per IO.
+ */
+ if (mddev->level == 4 || mddev->level == 5 || mddev->level == 6)
+ io_sectors = 8;
+ else
+ io_sectors = 128;
+
+ return atomic_read(&mddev->recovery_active) <
+ io_sectors * sync_io_depth(mddev);
+}
+
#define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ)
#define UPDATE_FREQUENCY (5*60*HZ)
@@ -9195,7 +9270,8 @@ void md_do_sync(struct md_thread *thread)
msleep(500);
goto repeat;
}
- if (!is_mddev_idle(mddev, 0)) {
+ if (!sync_io_within_limit(mddev) &&
+ !is_mddev_idle(mddev, 0)) {
/*
* Give other IO more of a chance.
* The faster the devices, the less we wait.
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 1cf00a04bcdd..d45a9e6ead80 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -132,7 +132,7 @@ struct md_rdev {
sector_t sectors; /* Device size (in 512bytes sectors) */
struct mddev *mddev; /* RAID array if running */
- int last_events; /* IO event timestamp */
+ unsigned long last_events; /* IO event timestamp */
/*
* If meta_bdev is non-NULL, it means that a separate device is
@@ -404,7 +404,8 @@ struct mddev {
* are happening, so run/
* takeover/stop are not safe
*/
- struct gendisk *gendisk;
+ struct gendisk *gendisk; /* mdraid gendisk */
+ struct gendisk *dm_gendisk; /* dm-raid gendisk */
struct kobject kobj;
int hold_active;
@@ -483,6 +484,7 @@ struct mddev {
/* if zero, use the system-wide default */
int sync_speed_min;
int sync_speed_max;
+ int sync_io_depth;
/* resync even though the same disks are shared among md-devices */
int parallel_resync;
@@ -518,6 +520,7 @@ struct mddev {
* adding a spare
*/
+ unsigned long normal_io_events; /* IO event timestamp */
atomic_t recovery_active; /* blocks scheduled, but not written */
wait_queue_head_t recovery_wait;
sector_t recovery_cp;
@@ -714,17 +717,6 @@ static inline int mddev_trylock(struct mddev *mddev)
}
extern void mddev_unlock(struct mddev *mddev);
-static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
-{
- if (blk_queue_io_stat(bdev->bd_disk->queue))
- atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
-}
-
-static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
-{
- md_sync_acct(bio->bi_bdev, nr_sectors);
-}
-
struct md_personality
{
struct md_submodule_head head;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index de9bccbe7337..657d481525be 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2382,7 +2382,6 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
wbio->bi_end_io = end_sync_write;
atomic_inc(&r1_bio->remaining);
- md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
submit_bio_noacct(wbio);
}
@@ -3055,7 +3054,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
bio = r1_bio->bios[i];
if (bio->bi_end_io == end_sync_read) {
read_targets--;
- md_sync_acct_bio(bio, nr_sectors);
if (read_targets == 1)
bio->bi_opf &= ~MD_FAILFAST;
submit_bio_noacct(bio);
@@ -3064,7 +3062,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
} else {
atomic_set(&r1_bio->remaining, 1);
bio = r1_bio->bios[r1_bio->read_disk];
- md_sync_acct_bio(bio, nr_sectors);
if (read_targets == 1)
bio->bi_opf &= ~MD_FAILFAST;
submit_bio_noacct(bio);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ba32bac975b8..dce06bf65016 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2426,7 +2426,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
atomic_inc(&r10_bio->remaining);
- md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
tbio->bi_opf |= MD_FAILFAST;
@@ -2448,8 +2447,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
bio_copy_data(tbio, fbio);
d = r10_bio->devs[i].devnum;
atomic_inc(&r10_bio->remaining);
- md_sync_acct(conf->mirrors[d].replacement->bdev,
- bio_sectors(tbio));
submit_bio_noacct(tbio);
}
@@ -2583,13 +2580,10 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
d = r10_bio->devs[1].devnum;
if (wbio->bi_end_io) {
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
- md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
submit_bio_noacct(wbio);
}
if (wbio2) {
atomic_inc(&conf->mirrors[d].replacement->nr_pending);
- md_sync_acct(conf->mirrors[d].replacement->bdev,
- bio_sectors(wbio2));
submit_bio_noacct(wbio2);
}
}
@@ -3757,7 +3751,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
r10_bio->sectors = nr_sectors;
if (bio->bi_end_io == end_sync_read) {
- md_sync_acct_bio(bio, nr_sectors);
bio->bi_status = 0;
submit_bio_noacct(bio);
}
@@ -4880,7 +4873,6 @@ read_more:
r10_bio->sectors = nr_sectors;
/* Now submit the read */
- md_sync_acct_bio(read_bio, r10_bio->sectors);
atomic_inc(&r10_bio->remaining);
read_bio->bi_next = NULL;
submit_bio_noacct(read_bio);
@@ -4940,7 +4932,6 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
continue;
atomic_inc(&rdev->nr_pending);
- md_sync_acct_bio(b, r10_bio->sectors);
atomic_inc(&r10_bio->remaining);
b->bi_next = NULL;
submit_bio_noacct(b);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6389383166c0..ca5b0e8ba707 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1240,10 +1240,6 @@ again:
}
if (rdev) {
- if (s->syncing || s->expanding || s->expanded
- || s->replacing)
- md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf));
-
set_bit(STRIPE_IO_STARTED, &sh->state);
bio_init(bi, rdev->bdev, &dev->vec, 1, op | op_flags);
@@ -1300,10 +1296,6 @@ again:
submit_bio_noacct(bi);
}
if (rrdev) {
- if (s->syncing || s->expanding || s->expanded
- || s->replacing)
- md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf));
-
set_bit(STRIPE_IO_STARTED, &sh->state);
bio_init(rbi, rrdev->bdev, &dev->rvec, 1, op | op_flags);
diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
index 9a583eeaa329..e23b0de1e0aa 100644
--- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
+++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
@@ -3806,7 +3806,7 @@ status);
if ((status < 0) && (!probe_fl)) {
pvr2_hdw_render_useless(hdw);
}
- destroy_timer_on_stack(&timer.timer);
+ timer_destroy_on_stack(&timer.timer);
return status;
}
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index 53f1888cc84f..d5bf3243fe78 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -1455,10 +1455,8 @@ static int gpmc_setup_irq(struct gpmc_device *gpmc)
gpmc->irq_chip.irq_unmask = gpmc_irq_unmask;
gpmc->irq_chip.irq_set_type = gpmc_irq_set_type;
- gpmc_irq_domain = irq_domain_add_linear(gpmc->dev->of_node,
- gpmc->nirqs,
- &gpmc_irq_domain_ops,
- gpmc);
+ gpmc_irq_domain = irq_domain_create_linear(of_fwnode_handle(gpmc->dev->of_node),
+ gpmc->nirqs, &gpmc_irq_domain_ops, gpmc);
if (!gpmc_irq_domain) {
dev_err(gpmc->dev, "IRQ domain add failed\n");
return -ENODEV;
diff --git a/drivers/memory/renesas-rpc-if-regs.h b/drivers/memory/renesas-rpc-if-regs.h
new file mode 100644
index 000000000000..e6b33f7c40a8
--- /dev/null
+++ b/drivers/memory/renesas-rpc-if-regs.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * R-Car RPC Interface Registers Definitions
+ *
+ * Copyright (C) 2025 Renesas Electronics Corporation
+ */
+
+#ifndef __RENESAS_RPC_IF_REGS_H__
+#define __RENESAS_RPC_IF_REGS_H__
+
+#include <linux/bits.h>
+
+#define RPCIF_CMNCR 0x0000 /* R/W */
+#define RPCIF_CMNCR_MD BIT(31)
+#define RPCIF_CMNCR_MOIIO3(val) (((val) & 0x3) << 22)
+#define RPCIF_CMNCR_MOIIO2(val) (((val) & 0x3) << 20)
+#define RPCIF_CMNCR_MOIIO1(val) (((val) & 0x3) << 18)
+#define RPCIF_CMNCR_MOIIO0(val) (((val) & 0x3) << 16)
+#define RPCIF_CMNCR_MOIIO(val) (RPCIF_CMNCR_MOIIO0(val) | RPCIF_CMNCR_MOIIO1(val) | \
+ RPCIF_CMNCR_MOIIO2(val) | RPCIF_CMNCR_MOIIO3(val))
+#define RPCIF_CMNCR_IO3FV(val) (((val) & 0x3) << 14) /* documented for RZ/G2L */
+#define RPCIF_CMNCR_IO2FV(val) (((val) & 0x3) << 12) /* documented for RZ/G2L */
+#define RPCIF_CMNCR_IO0FV(val) (((val) & 0x3) << 8)
+#define RPCIF_CMNCR_IOFV(val) (RPCIF_CMNCR_IO0FV(val) | RPCIF_CMNCR_IO2FV(val) | \
+ RPCIF_CMNCR_IO3FV(val))
+#define RPCIF_CMNCR_BSZ(val) (((val) & 0x3) << 0)
+
+#define RPCIF_SSLDR 0x0004 /* R/W */
+#define RPCIF_SSLDR_SPNDL(d) (((d) & 0x7) << 16)
+#define RPCIF_SSLDR_SLNDL(d) (((d) & 0x7) << 8)
+#define RPCIF_SSLDR_SCKDL(d) (((d) & 0x7) << 0)
+
+#define RPCIF_DRCR 0x000C /* R/W */
+#define RPCIF_DRCR_SSLN BIT(24)
+#define RPCIF_DRCR_RBURST(v) ((((v) - 1) & 0x1F) << 16)
+#define RPCIF_DRCR_RCF BIT(9)
+#define RPCIF_DRCR_RBE BIT(8)
+#define RPCIF_DRCR_SSLE BIT(0)
+
+#define RPCIF_DRCMR 0x0010 /* R/W */
+#define RPCIF_DRCMR_CMD(c) (((c) & 0xFF) << 16)
+#define RPCIF_DRCMR_OCMD(c) (((c) & 0xFF) << 0)
+
+#define RPCIF_DREAR 0x0014 /* R/W */
+#define RPCIF_DREAR_EAV(c) (((c) & 0xF) << 16)
+#define RPCIF_DREAR_EAC(c) (((c) & 0x7) << 0)
+
+#define RPCIF_DROPR 0x0018 /* R/W */
+
+#define RPCIF_DRENR 0x001C /* R/W */
+#define RPCIF_DRENR_CDB(o) (((u32)((o) & 0x3)) << 30)
+#define RPCIF_DRENR_OCDB(o) (((o) & 0x3) << 28)
+#define RPCIF_DRENR_ADB(o) (((o) & 0x3) << 24)
+#define RPCIF_DRENR_OPDB(o) (((o) & 0x3) << 20)
+#define RPCIF_DRENR_DRDB(o) (((o) & 0x3) << 16)
+#define RPCIF_DRENR_DME BIT(15)
+#define RPCIF_DRENR_CDE BIT(14)
+#define RPCIF_DRENR_OCDE BIT(12)
+#define RPCIF_DRENR_ADE(v) (((v) & 0xF) << 8)
+#define RPCIF_DRENR_OPDE(v) (((v) & 0xF) << 4)
+
+#define RPCIF_SMCR 0x0020 /* R/W */
+#define RPCIF_SMCR_SSLKP BIT(8)
+#define RPCIF_SMCR_SPIRE BIT(2)
+#define RPCIF_SMCR_SPIWE BIT(1)
+#define RPCIF_SMCR_SPIE BIT(0)
+
+#define RPCIF_SMCMR 0x0024 /* R/W */
+#define RPCIF_SMCMR_CMD(c) (((c) & 0xFF) << 16)
+#define RPCIF_SMCMR_OCMD(c) (((c) & 0xFF) << 0)
+
+#define RPCIF_SMADR 0x0028 /* R/W */
+
+#define RPCIF_SMOPR 0x002C /* R/W */
+#define RPCIF_SMOPR_OPD3(o) (((o) & 0xFF) << 24)
+#define RPCIF_SMOPR_OPD2(o) (((o) & 0xFF) << 16)
+#define RPCIF_SMOPR_OPD1(o) (((o) & 0xFF) << 8)
+#define RPCIF_SMOPR_OPD0(o) (((o) & 0xFF) << 0)
+
+#define RPCIF_SMENR 0x0030 /* R/W */
+#define RPCIF_SMENR_CDB(o) (((o) & 0x3) << 30)
+#define RPCIF_SMENR_OCDB(o) (((o) & 0x3) << 28)
+#define RPCIF_SMENR_ADB(o) (((o) & 0x3) << 24)
+#define RPCIF_SMENR_OPDB(o) (((o) & 0x3) << 20)
+#define RPCIF_SMENR_SPIDB(o) (((o) & 0x3) << 16)
+#define RPCIF_SMENR_DME BIT(15)
+#define RPCIF_SMENR_CDE BIT(14)
+#define RPCIF_SMENR_OCDE BIT(12)
+#define RPCIF_SMENR_ADE(v) (((v) & 0xF) << 8)
+#define RPCIF_SMENR_OPDE(v) (((v) & 0xF) << 4)
+#define RPCIF_SMENR_SPIDE(v) (((v) & 0xF) << 0)
+
+#define RPCIF_SMRDR0 0x0038 /* R */
+#define RPCIF_SMRDR1 0x003C /* R */
+#define RPCIF_SMWDR0 0x0040 /* W */
+#define RPCIF_SMWDR1 0x0044 /* W */
+
+#define RPCIF_CMNSR 0x0048 /* R */
+#define RPCIF_CMNSR_SSLF BIT(1)
+#define RPCIF_CMNSR_TEND BIT(0)
+
+#define RPCIF_DRDMCR 0x0058 /* R/W */
+#define RPCIF_DMDMCR_DMCYC(v) ((((v) - 1) & 0x1F) << 0)
+
+#define RPCIF_DRDRENR 0x005C /* R/W */
+#define RPCIF_DRDRENR_HYPE(v) (((v) & 0x7) << 12)
+#define RPCIF_DRDRENR_ADDRE BIT(8)
+#define RPCIF_DRDRENR_OPDRE BIT(4)
+#define RPCIF_DRDRENR_DRDRE BIT(0)
+
+#define RPCIF_SMDMCR 0x0060 /* R/W */
+#define RPCIF_SMDMCR_DMCYC(v) ((((v) - 1) & 0x1F) << 0)
+
+#define RPCIF_SMDRENR 0x0064 /* R/W */
+#define RPCIF_SMDRENR_HYPE(v) (((v) & 0x7) << 12)
+#define RPCIF_SMDRENR_ADDRE BIT(8)
+#define RPCIF_SMDRENR_OPDRE BIT(4)
+#define RPCIF_SMDRENR_SPIDRE BIT(0)
+
+#define RPCIF_PHYADD 0x0070 /* R/W available on R-Car E3/D3/V3M and RZ/G2{E,L} */
+#define RPCIF_PHYWR 0x0074 /* R/W available on R-Car E3/D3/V3M and RZ/G2{E,L} */
+
+#define RPCIF_PHYCNT 0x007C /* R/W */
+#define RPCIF_PHYCNT_CAL BIT(31)
+#define RPCIF_PHYCNT_OCTA(v) (((v) & 0x3) << 22)
+#define RPCIF_PHYCNT_EXDS BIT(21)
+#define RPCIF_PHYCNT_OCT BIT(20)
+#define RPCIF_PHYCNT_DDRCAL BIT(19)
+#define RPCIF_PHYCNT_HS BIT(18)
+#define RPCIF_PHYCNT_CKSEL(v) (((v) & 0x3) << 16) /* valid only for RZ/G2L */
+#define RPCIF_PHYCNT_STRTIM(v) (((v) & 0x7) << 15 | ((v) & 0x8) << 24) /* valid for R-Car and RZ/G2{E,H,M,N} */
+
+#define RPCIF_PHYCNT_WBUF2 BIT(4)
+#define RPCIF_PHYCNT_WBUF BIT(2)
+#define RPCIF_PHYCNT_PHYMEM(v) (((v) & 0x3) << 0)
+#define RPCIF_PHYCNT_PHYMEM_MASK GENMASK(1, 0)
+
+#define RPCIF_PHYOFFSET1 0x0080 /* R/W */
+#define RPCIF_PHYOFFSET1_DDRTMG(v) (((v) & 0x3) << 28)
+
+#define RPCIF_PHYOFFSET2 0x0084 /* R/W */
+#define RPCIF_PHYOFFSET2_OCTTMG(v) (((v) & 0x7) << 8)
+
+#define RPCIF_PHYINT 0x0088 /* R/W */
+#define RPCIF_PHYINT_WPVAL BIT(1)
+
+#endif /* __RENESAS_RPC_IF_REGS_H__ */
diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c
index 15b4706aafee..4a417b693080 100644
--- a/drivers/memory/renesas-rpc-if.c
+++ b/drivers/memory/renesas-rpc-if.c
@@ -18,139 +18,8 @@
#include <memory/renesas-rpc-if.h>
-#define RPCIF_CMNCR 0x0000 /* R/W */
-#define RPCIF_CMNCR_MD BIT(31)
-#define RPCIF_CMNCR_MOIIO3(val) (((val) & 0x3) << 22)
-#define RPCIF_CMNCR_MOIIO2(val) (((val) & 0x3) << 20)
-#define RPCIF_CMNCR_MOIIO1(val) (((val) & 0x3) << 18)
-#define RPCIF_CMNCR_MOIIO0(val) (((val) & 0x3) << 16)
-#define RPCIF_CMNCR_MOIIO(val) (RPCIF_CMNCR_MOIIO0(val) | RPCIF_CMNCR_MOIIO1(val) | \
- RPCIF_CMNCR_MOIIO2(val) | RPCIF_CMNCR_MOIIO3(val))
-#define RPCIF_CMNCR_IO3FV(val) (((val) & 0x3) << 14) /* documented for RZ/G2L */
-#define RPCIF_CMNCR_IO2FV(val) (((val) & 0x3) << 12) /* documented for RZ/G2L */
-#define RPCIF_CMNCR_IO0FV(val) (((val) & 0x3) << 8)
-#define RPCIF_CMNCR_IOFV(val) (RPCIF_CMNCR_IO0FV(val) | RPCIF_CMNCR_IO2FV(val) | \
- RPCIF_CMNCR_IO3FV(val))
-#define RPCIF_CMNCR_BSZ(val) (((val) & 0x3) << 0)
-
-#define RPCIF_SSLDR 0x0004 /* R/W */
-#define RPCIF_SSLDR_SPNDL(d) (((d) & 0x7) << 16)
-#define RPCIF_SSLDR_SLNDL(d) (((d) & 0x7) << 8)
-#define RPCIF_SSLDR_SCKDL(d) (((d) & 0x7) << 0)
-
-#define RPCIF_DRCR 0x000C /* R/W */
-#define RPCIF_DRCR_SSLN BIT(24)
-#define RPCIF_DRCR_RBURST(v) ((((v) - 1) & 0x1F) << 16)
-#define RPCIF_DRCR_RCF BIT(9)
-#define RPCIF_DRCR_RBE BIT(8)
-#define RPCIF_DRCR_SSLE BIT(0)
-
-#define RPCIF_DRCMR 0x0010 /* R/W */
-#define RPCIF_DRCMR_CMD(c) (((c) & 0xFF) << 16)
-#define RPCIF_DRCMR_OCMD(c) (((c) & 0xFF) << 0)
-
-#define RPCIF_DREAR 0x0014 /* R/W */
-#define RPCIF_DREAR_EAV(c) (((c) & 0xF) << 16)
-#define RPCIF_DREAR_EAC(c) (((c) & 0x7) << 0)
-
-#define RPCIF_DROPR 0x0018 /* R/W */
-
-#define RPCIF_DRENR 0x001C /* R/W */
-#define RPCIF_DRENR_CDB(o) (u32)((((o) & 0x3) << 30))
-#define RPCIF_DRENR_OCDB(o) (((o) & 0x3) << 28)
-#define RPCIF_DRENR_ADB(o) (((o) & 0x3) << 24)
-#define RPCIF_DRENR_OPDB(o) (((o) & 0x3) << 20)
-#define RPCIF_DRENR_DRDB(o) (((o) & 0x3) << 16)
-#define RPCIF_DRENR_DME BIT(15)
-#define RPCIF_DRENR_CDE BIT(14)
-#define RPCIF_DRENR_OCDE BIT(12)
-#define RPCIF_DRENR_ADE(v) (((v) & 0xF) << 8)
-#define RPCIF_DRENR_OPDE(v) (((v) & 0xF) << 4)
-
-#define RPCIF_SMCR 0x0020 /* R/W */
-#define RPCIF_SMCR_SSLKP BIT(8)
-#define RPCIF_SMCR_SPIRE BIT(2)
-#define RPCIF_SMCR_SPIWE BIT(1)
-#define RPCIF_SMCR_SPIE BIT(0)
-
-#define RPCIF_SMCMR 0x0024 /* R/W */
-#define RPCIF_SMCMR_CMD(c) (((c) & 0xFF) << 16)
-#define RPCIF_SMCMR_OCMD(c) (((c) & 0xFF) << 0)
-
-#define RPCIF_SMADR 0x0028 /* R/W */
-
-#define RPCIF_SMOPR 0x002C /* R/W */
-#define RPCIF_SMOPR_OPD3(o) (((o) & 0xFF) << 24)
-#define RPCIF_SMOPR_OPD2(o) (((o) & 0xFF) << 16)
-#define RPCIF_SMOPR_OPD1(o) (((o) & 0xFF) << 8)
-#define RPCIF_SMOPR_OPD0(o) (((o) & 0xFF) << 0)
-
-#define RPCIF_SMENR 0x0030 /* R/W */
-#define RPCIF_SMENR_CDB(o) (((o) & 0x3) << 30)
-#define RPCIF_SMENR_OCDB(o) (((o) & 0x3) << 28)
-#define RPCIF_SMENR_ADB(o) (((o) & 0x3) << 24)
-#define RPCIF_SMENR_OPDB(o) (((o) & 0x3) << 20)
-#define RPCIF_SMENR_SPIDB(o) (((o) & 0x3) << 16)
-#define RPCIF_SMENR_DME BIT(15)
-#define RPCIF_SMENR_CDE BIT(14)
-#define RPCIF_SMENR_OCDE BIT(12)
-#define RPCIF_SMENR_ADE(v) (((v) & 0xF) << 8)
-#define RPCIF_SMENR_OPDE(v) (((v) & 0xF) << 4)
-#define RPCIF_SMENR_SPIDE(v) (((v) & 0xF) << 0)
-
-#define RPCIF_SMRDR0 0x0038 /* R */
-#define RPCIF_SMRDR1 0x003C /* R */
-#define RPCIF_SMWDR0 0x0040 /* W */
-#define RPCIF_SMWDR1 0x0044 /* W */
-
-#define RPCIF_CMNSR 0x0048 /* R */
-#define RPCIF_CMNSR_SSLF BIT(1)
-#define RPCIF_CMNSR_TEND BIT(0)
-
-#define RPCIF_DRDMCR 0x0058 /* R/W */
-#define RPCIF_DMDMCR_DMCYC(v) ((((v) - 1) & 0x1F) << 0)
-
-#define RPCIF_DRDRENR 0x005C /* R/W */
-#define RPCIF_DRDRENR_HYPE(v) (((v) & 0x7) << 12)
-#define RPCIF_DRDRENR_ADDRE BIT(8)
-#define RPCIF_DRDRENR_OPDRE BIT(4)
-#define RPCIF_DRDRENR_DRDRE BIT(0)
-
-#define RPCIF_SMDMCR 0x0060 /* R/W */
-#define RPCIF_SMDMCR_DMCYC(v) ((((v) - 1) & 0x1F) << 0)
-
-#define RPCIF_SMDRENR 0x0064 /* R/W */
-#define RPCIF_SMDRENR_HYPE(v) (((v) & 0x7) << 12)
-#define RPCIF_SMDRENR_ADDRE BIT(8)
-#define RPCIF_SMDRENR_OPDRE BIT(4)
-#define RPCIF_SMDRENR_SPIDRE BIT(0)
-
-#define RPCIF_PHYADD 0x0070 /* R/W available on R-Car E3/D3/V3M and RZ/G2{E,L} */
-#define RPCIF_PHYWR 0x0074 /* R/W available on R-Car E3/D3/V3M and RZ/G2{E,L} */
-
-#define RPCIF_PHYCNT 0x007C /* R/W */
-#define RPCIF_PHYCNT_CAL BIT(31)
-#define RPCIF_PHYCNT_OCTA(v) (((v) & 0x3) << 22)
-#define RPCIF_PHYCNT_EXDS BIT(21)
-#define RPCIF_PHYCNT_OCT BIT(20)
-#define RPCIF_PHYCNT_DDRCAL BIT(19)
-#define RPCIF_PHYCNT_HS BIT(18)
-#define RPCIF_PHYCNT_CKSEL(v) (((v) & 0x3) << 16) /* valid only for RZ/G2L */
-#define RPCIF_PHYCNT_STRTIM(v) (((v) & 0x7) << 15 | ((v) & 0x8) << 24) /* valid for R-Car and RZ/G2{E,H,M,N} */
-
-#define RPCIF_PHYCNT_WBUF2 BIT(4)
-#define RPCIF_PHYCNT_WBUF BIT(2)
-#define RPCIF_PHYCNT_PHYMEM(v) (((v) & 0x3) << 0)
-#define RPCIF_PHYCNT_PHYMEM_MASK GENMASK(1, 0)
-
-#define RPCIF_PHYOFFSET1 0x0080 /* R/W */
-#define RPCIF_PHYOFFSET1_DDRTMG(v) (((v) & 0x3) << 28)
-
-#define RPCIF_PHYOFFSET2 0x0084 /* R/W */
-#define RPCIF_PHYOFFSET2_OCTTMG(v) (((v) & 0x7) << 8)
-
-#define RPCIF_PHYINT 0x0088 /* R/W */
-#define RPCIF_PHYINT_WPVAL BIT(1)
+#include "renesas-rpc-if-regs.h"
+#include "renesas-xspi-if-regs.h"
static const struct regmap_range rpcif_volatile_ranges[] = {
regmap_reg_range(RPCIF_SMRDR0, RPCIF_SMRDR1),
@@ -163,7 +32,31 @@ static const struct regmap_access_table rpcif_volatile_table = {
.n_yes_ranges = ARRAY_SIZE(rpcif_volatile_ranges),
};
+static const struct regmap_range xspi_volatile_ranges[] = {
+ regmap_reg_range(XSPI_CDD0BUF0, XSPI_CDD0BUF0),
+};
+
+static const struct regmap_access_table xspi_volatile_table = {
+ .yes_ranges = xspi_volatile_ranges,
+ .n_yes_ranges = ARRAY_SIZE(xspi_volatile_ranges),
+};
+
+struct rpcif_priv;
+
+struct rpcif_impl {
+ int (*hw_init)(struct rpcif_priv *rpc, bool hyperflash);
+ void (*prepare)(struct rpcif_priv *rpc, const struct rpcif_op *op,
+ u64 *offs, size_t *len);
+ int (*manual_xfer)(struct rpcif_priv *rpc);
+ size_t (*dirmap_read)(struct rpcif_priv *rpc, u64 offs, size_t len,
+ void *buf);
+ u32 status_reg;
+ u32 status_mask;
+};
+
struct rpcif_info {
+ const struct regmap_config *regmap_config;
+ const struct rpcif_impl *impl;
enum rpcif_type type;
u8 strtim;
};
@@ -180,6 +73,8 @@ struct rpcif_priv {
enum rpcif_data_dir dir;
u8 bus_size;
u8 xfer_size;
+ u8 addr_nbytes; /* Specified for xSPI */
+ u32 proto; /* Specified for xSPI */
void *buffer;
u32 xferlen;
u32 smcr;
@@ -191,26 +86,6 @@ struct rpcif_priv {
u32 ddr; /* DRDRENR or SMDRENR */
};
-static const struct rpcif_info rpcif_info_r8a7796 = {
- .type = RPCIF_RCAR_GEN3,
- .strtim = 6,
-};
-
-static const struct rpcif_info rpcif_info_gen3 = {
- .type = RPCIF_RCAR_GEN3,
- .strtim = 7,
-};
-
-static const struct rpcif_info rpcif_info_rz_g2l = {
- .type = RPCIF_RZ_G2L,
- .strtim = 7,
-};
-
-static const struct rpcif_info rpcif_info_gen4 = {
- .type = RPCIF_RCAR_GEN4,
- .strtim = 15,
-};
-
/*
* Custom accessor functions to ensure SM[RW]DR[01] are always accessed with
* proper width. Requires rpcif_priv.xfer_size to be correctly set before!
@@ -300,6 +175,33 @@ static const struct regmap_config rpcif_regmap_config = {
.volatile_table = &rpcif_volatile_table,
};
+static int xspi_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+ struct rpcif_priv *xspi = context;
+
+ *val = readl(xspi->base + reg);
+ return 0;
+}
+
+static int xspi_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+ struct rpcif_priv *xspi = context;
+
+ writel(val, xspi->base + reg);
+ return 0;
+}
+
+static const struct regmap_config xspi_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .reg_read = xspi_reg_read,
+ .reg_write = xspi_reg_write,
+ .fast_io = true,
+ .max_register = XSPI_INTE,
+ .volatile_table = &xspi_volatile_table,
+};
+
int rpcif_sw_init(struct rpcif *rpcif, struct device *dev)
{
struct rpcif_priv *rpc = dev_get_drvdata(dev);
@@ -307,6 +209,7 @@ int rpcif_sw_init(struct rpcif *rpcif, struct device *dev)
rpcif->dev = dev;
rpcif->dirmap = rpc->dirmap;
rpcif->size = rpc->size;
+ rpcif->xspi = rpc->info->type == XSPI_RZ_G3E;
return 0;
}
EXPORT_SYMBOL(rpcif_sw_init);
@@ -325,16 +228,11 @@ static void rpcif_rzg2l_timing_adjust_sdr(struct rpcif_priv *rpc)
regmap_write(rpc->regmap, RPCIF_PHYADD, 0x80000032);
}
-int rpcif_hw_init(struct device *dev, bool hyperflash)
+static int rpcif_hw_init_impl(struct rpcif_priv *rpc, bool hyperflash)
{
- struct rpcif_priv *rpc = dev_get_drvdata(dev);
u32 dummy;
int ret;
- ret = pm_runtime_resume_and_get(dev);
- if (ret)
- return ret;
-
if (rpc->info->type == RPCIF_RZ_G2L) {
ret = reset_control_reset(rpc->rstc);
if (ret)
@@ -382,21 +280,62 @@ int rpcif_hw_init(struct device *dev, bool hyperflash)
regmap_write(rpc->regmap, RPCIF_SSLDR, RPCIF_SSLDR_SPNDL(7) |
RPCIF_SSLDR_SLNDL(7) | RPCIF_SSLDR_SCKDL(7));
- pm_runtime_put(dev);
-
rpc->bus_size = hyperflash ? 2 : 1;
return 0;
}
+
+static int xspi_hw_init_impl(struct rpcif_priv *xspi, bool hyperflash)
+{
+ int ret;
+
+ ret = reset_control_reset(xspi->rstc);
+ if (ret)
+ return ret;
+
+ regmap_write(xspi->regmap, XSPI_WRAPCFG, 0x0);
+
+ regmap_update_bits(xspi->regmap, XSPI_LIOCFGCS0,
+ XSPI_LIOCFG_PRTMD(0x3ff) | XSPI_LIOCFG_CSMIN(0xf) |
+ XSPI_LIOCFG_CSASTEX | XSPI_LIOCFG_CSNEGEX,
+ XSPI_LIOCFG_PRTMD(0) | XSPI_LIOCFG_CSMIN(0) |
+ XSPI_LIOCFG_CSASTEX | XSPI_LIOCFG_CSNEGEX);
+
+ regmap_update_bits(xspi->regmap, XSPI_CCCTL0CS0, XSPI_CCCTL0_CAEN, 0);
+
+ regmap_update_bits(xspi->regmap, XSPI_CDCTL0,
+ XSPI_CDCTL0_TRREQ | XSPI_CDCTL0_CSSEL, 0);
+
+ regmap_update_bits(xspi->regmap, XSPI_INTE, XSPI_INTE_CMDCMPE,
+ XSPI_INTE_CMDCMPE);
+
+ return 0;
+}
+
+int rpcif_hw_init(struct device *dev, bool hyperflash)
+{
+ struct rpcif_priv *rpc = dev_get_drvdata(dev);
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret)
+ return ret;
+
+ ret = rpc->info->impl->hw_init(rpc, hyperflash);
+
+ pm_runtime_put(dev);
+
+ return ret;
+}
EXPORT_SYMBOL(rpcif_hw_init);
static int wait_msg_xfer_end(struct rpcif_priv *rpc)
{
u32 sts;
- return regmap_read_poll_timeout(rpc->regmap, RPCIF_CMNSR, sts,
- sts & RPCIF_CMNSR_TEND, 0,
- USEC_PER_SEC);
+ return regmap_read_poll_timeout(rpc->regmap, rpc->info->impl->status_reg,
+ sts, sts & rpc->info->impl->status_mask,
+ 0, USEC_PER_SEC);
}
static u8 rpcif_bits_set(struct rpcif_priv *rpc, u32 nbytes)
@@ -412,11 +351,9 @@ static u8 rpcif_bit_size(u8 buswidth)
return buswidth > 4 ? 2 : ilog2(buswidth);
}
-void rpcif_prepare(struct device *dev, const struct rpcif_op *op, u64 *offs,
- size_t *len)
+static void rpcif_prepare_impl(struct rpcif_priv *rpc, const struct rpcif_op *op,
+ u64 *offs, size_t *len)
{
- struct rpcif_priv *rpc = dev_get_drvdata(dev);
-
rpc->smcr = 0;
rpc->smadr = 0;
rpc->enable = 0;
@@ -497,18 +434,76 @@ void rpcif_prepare(struct device *dev, const struct rpcif_op *op, u64 *offs,
rpc->enable |= RPCIF_SMENR_SPIDB(rpcif_bit_size(op->data.buswidth));
}
}
-EXPORT_SYMBOL(rpcif_prepare);
-int rpcif_manual_xfer(struct device *dev)
+static void xspi_prepare_impl(struct rpcif_priv *xspi, const struct rpcif_op *op,
+ u64 *offs, size_t *len)
+{
+ xspi->smadr = 0;
+ xspi->addr_nbytes = 0;
+ xspi->command = 0;
+ xspi->option = 0;
+ xspi->dummy = 0;
+ xspi->xferlen = 0;
+ xspi->proto = 0;
+
+ if (op->cmd.buswidth)
+ xspi->command = op->cmd.opcode;
+
+ if (op->ocmd.buswidth)
+ xspi->command = (xspi->command << 8) | op->ocmd.opcode;
+
+ if (op->addr.buswidth) {
+ xspi->addr_nbytes = op->addr.nbytes;
+ if (offs && len)
+ xspi->smadr = *offs;
+ else
+ xspi->smadr = op->addr.val;
+ }
+
+ if (op->dummy.buswidth)
+ xspi->dummy = op->dummy.ncycles;
+
+ xspi->dir = op->data.dir;
+ if (op->data.buswidth) {
+ u32 nbytes;
+
+ xspi->buffer = op->data.buf.in;
+
+ if (offs && len)
+ nbytes = *len;
+ else
+ nbytes = op->data.nbytes;
+ xspi->xferlen = nbytes;
+ }
+
+ if (op->cmd.buswidth == 1) {
+ if (op->addr.buswidth == 2 || op->data.buswidth == 2)
+ xspi->proto = PROTO_1S_2S_2S;
+ else if (op->addr.buswidth == 4 || op->data.buswidth == 4)
+ xspi->proto = PROTO_1S_4S_4S;
+ } else if (op->cmd.buswidth == 2 &&
+ (op->addr.buswidth == 2 || op->data.buswidth == 2)) {
+ xspi->proto = PROTO_2S_2S_2S;
+ } else if (op->cmd.buswidth == 4 &&
+ (op->addr.buswidth == 4 || op->data.buswidth == 4)) {
+ xspi->proto = PROTO_4S_4S_4S;
+ }
+}
+
+void rpcif_prepare(struct device *dev, const struct rpcif_op *op, u64 *offs,
+ size_t *len)
{
struct rpcif_priv *rpc = dev_get_drvdata(dev);
+
+ rpc->info->impl->prepare(rpc, op, offs, len);
+}
+EXPORT_SYMBOL(rpcif_prepare);
+
+static int rpcif_manual_xfer_impl(struct rpcif_priv *rpc)
+{
u32 smenr, smcr, pos = 0, max = rpc->bus_size == 2 ? 8 : 4;
int ret = 0;
- ret = pm_runtime_resume_and_get(dev);
- if (ret < 0)
- return ret;
-
regmap_update_bits(rpc->regmap, RPCIF_PHYCNT,
RPCIF_PHYCNT_CAL, RPCIF_PHYCNT_CAL);
regmap_update_bits(rpc->regmap, RPCIF_CMNCR,
@@ -616,15 +611,169 @@ int rpcif_manual_xfer(struct device *dev)
goto err_out;
}
-exit:
- pm_runtime_put(dev);
return ret;
err_out:
if (reset_control_reset(rpc->rstc))
- dev_err(dev, "Failed to reset HW\n");
- rpcif_hw_init(dev, rpc->bus_size == 2);
- goto exit;
+ dev_err(rpc->dev, "Failed to reset HW\n");
+ rpcif_hw_init_impl(rpc, rpc->bus_size == 2);
+ return ret;
+}
+
+static int xspi_manual_xfer_impl(struct rpcif_priv *xspi)
+{
+ u32 pos = 0, max = 8;
+ int ret = 0;
+
+ regmap_update_bits(xspi->regmap, XSPI_CDCTL0, XSPI_CDCTL0_TRNUM(0x3),
+ XSPI_CDCTL0_TRNUM(0));
+
+ regmap_update_bits(xspi->regmap, XSPI_CDCTL0, XSPI_CDCTL0_TRREQ, 0);
+
+ regmap_write(xspi->regmap, XSPI_CDTBUF0,
+ XSPI_CDTBUF_CMDSIZE(0x1) | XSPI_CDTBUF_CMD_FIELD(xspi->command));
+
+ regmap_write(xspi->regmap, XSPI_CDABUF0, 0);
+
+ regmap_update_bits(xspi->regmap, XSPI_CDTBUF0, XSPI_CDTBUF_ADDSIZE(0x7),
+ XSPI_CDTBUF_ADDSIZE(xspi->addr_nbytes));
+
+ regmap_write(xspi->regmap, XSPI_CDABUF0, xspi->smadr);
+
+ regmap_update_bits(xspi->regmap, XSPI_LIOCFGCS0, XSPI_LIOCFG_PRTMD(0x3ff),
+ XSPI_LIOCFG_PRTMD(xspi->proto));
+
+ switch (xspi->dir) {
+ case RPCIF_DATA_OUT:
+ while (pos < xspi->xferlen) {
+ u32 bytes_left = xspi->xferlen - pos;
+ u32 nbytes, data[2], *p = data;
+
+ regmap_update_bits(xspi->regmap, XSPI_CDTBUF0,
+ XSPI_CDTBUF_TRTYPE, XSPI_CDTBUF_TRTYPE);
+
+ nbytes = bytes_left >= max ? max : bytes_left;
+
+ regmap_update_bits(xspi->regmap, XSPI_CDTBUF0,
+ XSPI_CDTBUF_DATASIZE(0xf),
+ XSPI_CDTBUF_DATASIZE(nbytes));
+
+ regmap_update_bits(xspi->regmap, XSPI_CDTBUF0,
+ XSPI_CDTBUF_ADDSIZE(0x7),
+ XSPI_CDTBUF_ADDSIZE(xspi->addr_nbytes));
+
+ memcpy(data, xspi->buffer + pos, nbytes);
+
+ if (nbytes > 4) {
+ regmap_write(xspi->regmap, XSPI_CDD0BUF0, *p++);
+ regmap_write(xspi->regmap, XSPI_CDD1BUF0, *p);
+ } else {
+ regmap_write(xspi->regmap, XSPI_CDD0BUF0, *p);
+ }
+
+ regmap_write(xspi->regmap, XSPI_CDABUF0, xspi->smadr + pos);
+
+ regmap_update_bits(xspi->regmap, XSPI_CDCTL0,
+ XSPI_CDCTL0_TRREQ, XSPI_CDCTL0_TRREQ);
+
+ ret = wait_msg_xfer_end(xspi);
+ if (ret)
+ goto err_out;
+
+ regmap_update_bits(xspi->regmap, XSPI_INTC,
+ XSPI_INTC_CMDCMPC, XSPI_INTC_CMDCMPC);
+
+ pos += nbytes;
+ }
+ regmap_update_bits(xspi->regmap, XSPI_CDCTL0, XSPI_CDCTL0_TRREQ, 0);
+ break;
+ case RPCIF_DATA_IN:
+ while (pos < xspi->xferlen) {
+ u32 bytes_left = xspi->xferlen - pos;
+ u32 nbytes, data[2], *p = data;
+
+ regmap_update_bits(xspi->regmap, XSPI_CDTBUF0,
+ XSPI_CDTBUF_TRTYPE,
+ ~(u32)XSPI_CDTBUF_TRTYPE);
+
+ /* nbytes can be up to 8 bytes */
+ nbytes = bytes_left >= max ? max : bytes_left;
+
+ regmap_update_bits(xspi->regmap, XSPI_CDTBUF0,
+ XSPI_CDTBUF_DATASIZE(0xf),
+ XSPI_CDTBUF_DATASIZE(nbytes));
+
+ regmap_update_bits(xspi->regmap, XSPI_CDTBUF0,
+ XSPI_CDTBUF_ADDSIZE(0x7),
+ XSPI_CDTBUF_ADDSIZE(xspi->addr_nbytes));
+
+ if (xspi->addr_nbytes)
+ regmap_write(xspi->regmap, XSPI_CDABUF0,
+ xspi->smadr + pos);
+
+ regmap_update_bits(xspi->regmap, XSPI_CDTBUF0,
+ XSPI_CDTBUF_LATE(0x1f),
+ XSPI_CDTBUF_LATE(xspi->dummy));
+
+ regmap_update_bits(xspi->regmap, XSPI_CDCTL0,
+ XSPI_CDCTL0_TRREQ, XSPI_CDCTL0_TRREQ);
+
+ ret = wait_msg_xfer_end(xspi);
+ if (ret)
+ goto err_out;
+
+ if (nbytes > 4) {
+ regmap_read(xspi->regmap, XSPI_CDD0BUF0, p++);
+ regmap_read(xspi->regmap, XSPI_CDD1BUF0, p);
+ } else {
+ regmap_read(xspi->regmap, XSPI_CDD0BUF0, p);
+ }
+
+ memcpy(xspi->buffer + pos, data, nbytes);
+
+ regmap_update_bits(xspi->regmap, XSPI_INTC,
+ XSPI_INTC_CMDCMPC, XSPI_INTC_CMDCMPC);
+
+ pos += nbytes;
+ }
+ regmap_update_bits(xspi->regmap, XSPI_CDCTL0,
+ XSPI_CDCTL0_TRREQ, 0);
+ break;
+ default:
+ regmap_update_bits(xspi->regmap, XSPI_CDTBUF0,
+ XSPI_CDTBUF_TRTYPE, XSPI_CDTBUF_TRTYPE);
+ regmap_update_bits(xspi->regmap, XSPI_CDCTL0,
+ XSPI_CDCTL0_TRREQ, XSPI_CDCTL0_TRREQ);
+
+ ret = wait_msg_xfer_end(xspi);
+ if (ret)
+ goto err_out;
+
+ regmap_update_bits(xspi->regmap, XSPI_INTC,
+ XSPI_INTC_CMDCMPC, XSPI_INTC_CMDCMPC);
+ }
+
+ return ret;
+
+err_out:
+ xspi_hw_init_impl(xspi, false);
+ return ret;
+}
+
+int rpcif_manual_xfer(struct device *dev)
+{
+ struct rpcif_priv *rpc = dev_get_drvdata(dev);
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret)
+ return ret;
+
+ ret = rpc->info->impl->manual_xfer(rpc);
+
+ pm_runtime_put(dev);
+
+ return ret;
}
EXPORT_SYMBOL(rpcif_manual_xfer);
@@ -670,20 +819,15 @@ static void memcpy_fromio_readw(void *to,
}
}
-ssize_t rpcif_dirmap_read(struct device *dev, u64 offs, size_t len, void *buf)
+static size_t rpcif_dirmap_read_impl(struct rpcif_priv *rpc, u64 offs,
+ size_t len, void *buf)
{
- struct rpcif_priv *rpc = dev_get_drvdata(dev);
loff_t from = offs & (rpc->size - 1);
size_t size = rpc->size - from;
- int ret;
if (len > size)
len = size;
- ret = pm_runtime_resume_and_get(dev);
- if (ret < 0)
- return ret;
-
regmap_update_bits(rpc->regmap, RPCIF_CMNCR, RPCIF_CMNCR_MD, 0);
regmap_write(rpc->regmap, RPCIF_DRCR, 0);
regmap_write(rpc->regmap, RPCIF_DRCMR, rpc->command);
@@ -700,12 +844,129 @@ ssize_t rpcif_dirmap_read(struct device *dev, u64 offs, size_t len, void *buf)
else
memcpy_fromio(buf, rpc->dirmap + from, len);
- pm_runtime_put(dev);
+ return len;
+}
+
+static size_t xspi_dirmap_read_impl(struct rpcif_priv *xspi, u64 offs,
+ size_t len, void *buf)
+{
+ loff_t from = offs & (xspi->size - 1);
+ size_t size = xspi->size - from;
+ u8 addsize = xspi->addr_nbytes - 1;
+
+ if (len > size)
+ len = size;
+
+ regmap_update_bits(xspi->regmap, XSPI_CMCFG0CS0,
+ XSPI_CMCFG0_FFMT(0x3) | XSPI_CMCFG0_ADDSIZE(0x3),
+ XSPI_CMCFG0_FFMT(0) | XSPI_CMCFG0_ADDSIZE(addsize));
+
+ regmap_update_bits(xspi->regmap, XSPI_CMCFG1CS0,
+ XSPI_CMCFG1_RDCMD(0xffff) | XSPI_CMCFG1_RDLATE(0x1f),
+ XSPI_CMCFG1_RDCMD_UPPER_BYTE(xspi->command) |
+ XSPI_CMCFG1_RDLATE(xspi->dummy));
+
+ regmap_update_bits(xspi->regmap, XSPI_BMCTL0, XSPI_BMCTL0_CS0ACC(0xff),
+ XSPI_BMCTL0_CS0ACC(0x01));
+
+ regmap_update_bits(xspi->regmap, XSPI_BMCFG,
+ XSPI_BMCFG_WRMD | XSPI_BMCFG_MWRCOMB |
+ XSPI_BMCFG_MWRSIZE(0xff) | XSPI_BMCFG_PREEN,
+ 0 | XSPI_BMCFG_MWRCOMB | XSPI_BMCFG_MWRSIZE(0x0f) |
+ XSPI_BMCFG_PREEN);
+
+ regmap_update_bits(xspi->regmap, XSPI_LIOCFGCS0, XSPI_LIOCFG_PRTMD(0x3ff),
+ XSPI_LIOCFG_PRTMD(xspi->proto));
+
+ memcpy_fromio(buf, xspi->dirmap + from, len);
return len;
}
+
+ssize_t rpcif_dirmap_read(struct device *dev, u64 offs, size_t len, void *buf)
+{
+ struct rpcif_priv *rpc = dev_get_drvdata(dev);
+ size_t read;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret)
+ return ret;
+
+ read = rpc->info->impl->dirmap_read(rpc, offs, len, buf);
+
+ pm_runtime_put(dev);
+
+ return read;
+}
EXPORT_SYMBOL(rpcif_dirmap_read);
+/**
+ * xspi_dirmap_write - Write data to xspi memory.
+ * @dev: xspi device
+ * @offs: offset
+ * @len: Number of bytes to be written.
+ * @buf: Buffer holding write data.
+ *
+ * This function writes data into xspi memory.
+ *
+ * Returns number of bytes written on success, else negative errno.
+ */
+ssize_t xspi_dirmap_write(struct device *dev, u64 offs, size_t len, const void *buf)
+{
+ struct rpcif_priv *xspi = dev_get_drvdata(dev);
+ loff_t from = offs & (xspi->size - 1);
+ u8 addsize = xspi->addr_nbytes - 1;
+ size_t size = xspi->size - from;
+ ssize_t writebytes;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret)
+ return ret;
+
+ if (len > size)
+ len = size;
+
+ if (len > MWRSIZE_MAX)
+ writebytes = MWRSIZE_MAX;
+ else
+ writebytes = len;
+
+ regmap_update_bits(xspi->regmap, XSPI_CMCFG0CS0,
+ XSPI_CMCFG0_FFMT(0x3) | XSPI_CMCFG0_ADDSIZE(0x3),
+ XSPI_CMCFG0_FFMT(0) | XSPI_CMCFG0_ADDSIZE(addsize));
+
+ regmap_update_bits(xspi->regmap, XSPI_CMCFG2CS0,
+ XSPI_CMCFG2_WRCMD_UPPER(0xff) | XSPI_CMCFG2_WRLATE(0x1f),
+ XSPI_CMCFG2_WRCMD_UPPER(xspi->command) |
+ XSPI_CMCFG2_WRLATE(xspi->dummy));
+
+ regmap_update_bits(xspi->regmap, XSPI_BMCTL0,
+ XSPI_BMCTL0_CS0ACC(0xff), XSPI_BMCTL0_CS0ACC(0x03));
+
+ regmap_update_bits(xspi->regmap, XSPI_BMCFG,
+ XSPI_BMCFG_WRMD | XSPI_BMCFG_MWRCOMB |
+ XSPI_BMCFG_MWRSIZE(0xff) | XSPI_BMCFG_PREEN,
+ 0 | XSPI_BMCFG_MWRCOMB | XSPI_BMCFG_MWRSIZE(0x0f) |
+ XSPI_BMCFG_PREEN);
+
+ regmap_update_bits(xspi->regmap, XSPI_LIOCFGCS0, XSPI_LIOCFG_PRTMD(0x3ff),
+ XSPI_LIOCFG_PRTMD(xspi->proto));
+
+ memcpy_toio(xspi->dirmap + from, buf, writebytes);
+
+ /* Request to push the pending data */
+ if (writebytes < MWRSIZE_MAX)
+ regmap_update_bits(xspi->regmap, XSPI_BMCTL1,
+ XSPI_BMCTL1_MWRPUSH, XSPI_BMCTL1_MWRPUSH);
+
+ pm_runtime_put(dev);
+
+ return writebytes;
+}
+EXPORT_SYMBOL_GPL(xspi_dirmap_write);
+
static int rpcif_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -740,8 +1001,8 @@ static int rpcif_probe(struct platform_device *pdev)
rpc->base = devm_platform_ioremap_resource_byname(pdev, "regs");
if (IS_ERR(rpc->base))
return PTR_ERR(rpc->base);
-
- rpc->regmap = devm_regmap_init(dev, NULL, rpc, &rpcif_regmap_config);
+ rpc->info = of_device_get_match_data(dev);
+ rpc->regmap = devm_regmap_init(dev, NULL, rpc, rpc->info->regmap_config);
if (IS_ERR(rpc->regmap)) {
dev_err(dev, "failed to init regmap for rpcif, error %ld\n",
PTR_ERR(rpc->regmap));
@@ -754,11 +1015,29 @@ static int rpcif_probe(struct platform_device *pdev)
return PTR_ERR(rpc->dirmap);
rpc->size = resource_size(res);
- rpc->info = of_device_get_match_data(dev);
- rpc->rstc = devm_reset_control_get_exclusive(dev, NULL);
+ rpc->rstc = devm_reset_control_array_get_exclusive(dev);
if (IS_ERR(rpc->rstc))
return PTR_ERR(rpc->rstc);
+ /*
+ * The enabling/disabling of spi/spix2 clocks at runtime leading to
+ * flash write failure. So, enable these clocks during probe() and
+ * disable it in remove().
+ */
+ if (rpc->info->type == XSPI_RZ_G3E) {
+ struct clk *spi_clk;
+
+ spi_clk = devm_clk_get_enabled(dev, "spix2");
+ if (IS_ERR(spi_clk))
+ return dev_err_probe(dev, PTR_ERR(spi_clk),
+ "cannot get enabled spix2 clk\n");
+
+ spi_clk = devm_clk_get_enabled(dev, "spi");
+ if (IS_ERR(spi_clk))
+ return dev_err_probe(dev, PTR_ERR(spi_clk),
+ "cannot get enabled spi clk\n");
+ }
+
vdev = platform_device_alloc(name, pdev->id);
if (!vdev)
return -ENOMEM;
@@ -784,8 +1063,61 @@ static void rpcif_remove(struct platform_device *pdev)
platform_device_unregister(rpc->vdev);
}
+static const struct rpcif_impl rpcif_impl = {
+ .hw_init = rpcif_hw_init_impl,
+ .prepare = rpcif_prepare_impl,
+ .manual_xfer = rpcif_manual_xfer_impl,
+ .dirmap_read = rpcif_dirmap_read_impl,
+ .status_reg = RPCIF_CMNSR,
+ .status_mask = RPCIF_CMNSR_TEND,
+};
+
+static const struct rpcif_impl xspi_impl = {
+ .hw_init = xspi_hw_init_impl,
+ .prepare = xspi_prepare_impl,
+ .manual_xfer = xspi_manual_xfer_impl,
+ .dirmap_read = xspi_dirmap_read_impl,
+ .status_reg = XSPI_INTS,
+ .status_mask = XSPI_INTS_CMDCMP,
+};
+
+static const struct rpcif_info rpcif_info_r8a7796 = {
+ .regmap_config = &rpcif_regmap_config,
+ .impl = &rpcif_impl,
+ .type = RPCIF_RCAR_GEN3,
+ .strtim = 6,
+};
+
+static const struct rpcif_info rpcif_info_gen3 = {
+ .regmap_config = &rpcif_regmap_config,
+ .impl = &rpcif_impl,
+ .type = RPCIF_RCAR_GEN3,
+ .strtim = 7,
+};
+
+static const struct rpcif_info rpcif_info_rz_g2l = {
+ .regmap_config = &rpcif_regmap_config,
+ .impl = &rpcif_impl,
+ .type = RPCIF_RZ_G2L,
+ .strtim = 7,
+};
+
+static const struct rpcif_info rpcif_info_gen4 = {
+ .regmap_config = &rpcif_regmap_config,
+ .impl = &rpcif_impl,
+ .type = RPCIF_RCAR_GEN4,
+ .strtim = 15,
+};
+
+static const struct rpcif_info xspi_info_r9a09g047 = {
+ .regmap_config = &xspi_regmap_config,
+ .impl = &xspi_impl,
+ .type = XSPI_RZ_G3E,
+};
+
static const struct of_device_id rpcif_of_match[] = {
{ .compatible = "renesas,r8a7796-rpc-if", .data = &rpcif_info_r8a7796 },
+ { .compatible = "renesas,r9a09g047-xspi", .data = &xspi_info_r9a09g047 },
{ .compatible = "renesas,rcar-gen3-rpc-if", .data = &rpcif_info_gen3 },
{ .compatible = "renesas,rcar-gen4-rpc-if", .data = &rpcif_info_gen4 },
{ .compatible = "renesas,rzg2l-rpc-if", .data = &rpcif_info_rz_g2l },
diff --git a/drivers/memory/renesas-xspi-if-regs.h b/drivers/memory/renesas-xspi-if-regs.h
new file mode 100644
index 000000000000..53f801d591f2
--- /dev/null
+++ b/drivers/memory/renesas-xspi-if-regs.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RZ xSPI Interface Registers Definitions
+ *
+ * Copyright (C) 2025 Renesas Electronics Corporation
+ */
+
+#ifndef __RENESAS_XSPI_IF_REGS_H__
+#define __RENESAS_XSPI_IF_REGS_H__
+
+#include <linux/bits.h>
+
+/* xSPI Wrapper Configuration Register */
+#define XSPI_WRAPCFG 0x0000
+
+/* xSPI Bridge Configuration Register */
+#define XSPI_BMCFG 0x0008
+#define XSPI_BMCFG_WRMD BIT(0)
+#define XSPI_BMCFG_MWRCOMB BIT(7)
+#define XSPI_BMCFG_MWRSIZE(val) (((val) & 0xff) << 8)
+#define XSPI_BMCFG_PREEN BIT(16)
+
+/* xSPI Command Map Configuration Register 0 CS0 */
+#define XSPI_CMCFG0CS0 0x0010
+#define XSPI_CMCFG0_FFMT(val) (((val) & 0x03) << 0)
+#define XSPI_CMCFG0_ADDSIZE(val) (((val) & 0x03) << 2)
+
+/* xSPI Command Map Configuration Register 1 CS0 */
+#define XSPI_CMCFG1CS0 0x0014
+#define XSPI_CMCFG1_RDCMD(val) (((val) & 0xffff) << 0)
+#define XSPI_CMCFG1_RDCMD_UPPER_BYTE(val) (((val) & 0xff) << 8)
+#define XSPI_CMCFG1_RDLATE(val) (((val) & 0x1f) << 16)
+
+/* xSPI Command Map Configuration Register 2 CS0 */
+#define XSPI_CMCFG2CS0 0x0018
+#define XSPI_CMCFG2_WRCMD(val) (((val) & 0xffff) << 0)
+#define XSPI_CMCFG2_WRCMD_UPPER(val) (((val) & 0xff) << 8)
+#define XSPI_CMCFG2_WRLATE(val) (((val) & 0x1f) << 16)
+
+/* xSPI Link I/O Configuration Register CS0 */
+#define XSPI_LIOCFGCS0 0x0050
+#define XSPI_LIOCFG_PRTMD(val) (((val) & 0x3ff) << 0)
+#define XSPI_LIOCFG_CSMIN(val) (((val) & 0x0f) << 16)
+#define XSPI_LIOCFG_CSASTEX BIT(20)
+#define XSPI_LIOCFG_CSNEGEX BIT(21)
+
+/* xSPI Bridge Map Control Register 0 */
+#define XSPI_BMCTL0 0x0060
+#define XSPI_BMCTL0_CS0ACC(val) (((val) & 0x03) << 0)
+
+/* xSPI Bridge Map Control Register 1 */
+#define XSPI_BMCTL1 0x0064
+#define XSPI_BMCTL1_MWRPUSH BIT(8)
+
+/* xSPI Command Manual Control Register 0 */
+#define XSPI_CDCTL0 0x0070
+#define XSPI_CDCTL0_TRREQ BIT(0)
+#define XSPI_CDCTL0_CSSEL BIT(3)
+#define XSPI_CDCTL0_TRNUM(val) (((val) & 0x03) << 4)
+
+/* xSPI Command Manual Type Buf */
+#define XSPI_CDTBUF0 0x0080
+#define XSPI_CDTBUF_CMDSIZE(val) (((val) & 0x03) << 0)
+#define XSPI_CDTBUF_ADDSIZE(val) (((val) & 0x07) << 2)
+#define XSPI_CDTBUF_DATASIZE(val) (((val) & 0x0f) << 5)
+#define XSPI_CDTBUF_LATE(val) (((val) & 0x1f) << 9)
+#define XSPI_CDTBUF_TRTYPE BIT(15)
+#define XSPI_CDTBUF_CMD(val) (((val) & 0xffff) << 16)
+#define XSPI_CDTBUF_CMD_FIELD(val) (((val) & 0xff) << 24)
+
+/* xSPI Command Manual Address Buff */
+#define XSPI_CDABUF0 0x0084
+
+/* xSPI Command Manual Data 0 Buf */
+#define XSPI_CDD0BUF0 0x0088
+
+/* xSPI Command Manual Data 1 Buf */
+#define XSPI_CDD1BUF0 0x008c
+
+/* xSPI Command Calibration Control Register 0 CS0 */
+#define XSPI_CCCTL0CS0 0x0130
+#define XSPI_CCCTL0_CAEN BIT(0)
+
+/* xSPI Interrupt Status Register */
+#define XSPI_INTS 0x0190
+#define XSPI_INTS_CMDCMP BIT(0)
+
+/* xSPI Interrupt Clear Register */
+#define XSPI_INTC 0x0194
+#define XSPI_INTC_CMDCMPC BIT(0)
+
+/* xSPI Interrupt Enable Register */
+#define XSPI_INTE 0x0198
+#define XSPI_INTE_CMDCMPE BIT(0)
+
+/* Maximum data size of MWRSIZE*/
+#define MWRSIZE_MAX 64
+
+/* xSPI Protocol mode */
+#define PROTO_1S_2S_2S 0x48
+#define PROTO_2S_2S_2S 0x49
+#define PROTO_1S_4S_4S 0x090
+#define PROTO_4S_4S_4S 0x092
+
+#endif /* __RENESAS_XSPI_IF_REGS_H__ */
diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 8e68b64bd7f8..488e346047c1 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -624,8 +624,8 @@ static int device_irq_init(struct pm860x_chip *chip,
ret = -EBUSY;
goto out;
}
- irq_domain_add_legacy(node, nr_irqs, chip->irq_base, 0,
- &pm860x_irq_domain_ops, chip);
+ irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, chip->irq_base, 0,
+ &pm860x_irq_domain_ops, chip);
chip->core_irq = i2c->irq;
if (!chip->core_irq)
goto out;
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 22b936310039..96992af22565 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -943,6 +943,26 @@ config MFD_MAX77714
drivers must be enabled in order to use each functionality of the
device.
+config MFD_MAX77759
+ tristate "Maxim Integrated MAX77759 PMIC"
+ depends on I2C
+ depends on OF
+ select IRQ_DOMAIN
+ select MFD_CORE
+ select REGMAP_I2C
+ select REGMAP_IRQ
+ help
+ Say yes here to add support for Maxim Integrated MAX77759.
+ This is a companion Power Management IC for USB Type-C applications
+ with Battery Charger, Fuel Gauge, temperature sensors, USB Type-C
+ Port Controller (TCPC), NVMEM, and additional GPIO interfaces.
+ This driver provides common support for accessing the device;
+ additional drivers must be enabled in order to use the functionality
+ of the device.
+
+ To compile this driver as a module, choose M here: the module will be
+ called max77759.
+
config MFD_MAX77843
bool "Maxim Semiconductor MAX77843 PMIC Support"
depends on I2C=y
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 948cbdf42a18..5e5cc279af60 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -169,6 +169,7 @@ obj-$(CONFIG_MFD_MAX77686) += max77686.o
obj-$(CONFIG_MFD_MAX77693) += max77693.o
obj-$(CONFIG_MFD_MAX77705) += max77705.o
obj-$(CONFIG_MFD_MAX77714) += max77714.o
+obj-$(CONFIG_MFD_MAX77759) += max77759.o
obj-$(CONFIG_MFD_MAX77843) += max77843.o
obj-$(CONFIG_MFD_MAX8907) += max8907.o
max8925-objs := max8925-core.o max8925-i2c.o
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 15c95828b09a..049abcbd71ce 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -580,9 +580,9 @@ static int ab8500_irq_init(struct ab8500 *ab8500, struct device_node *np)
num_irqs = AB8500_NR_IRQS;
/* If ->irq_base is zero this will give a linear mapping */
- ab8500->domain = irq_domain_add_simple(ab8500->dev->of_node,
- num_irqs, 0,
- &ab8500_irq_ops, ab8500);
+ ab8500->domain = irq_domain_create_simple(of_fwnode_handle(ab8500->dev->of_node),
+ num_irqs, 0,
+ &ab8500_irq_ops, ab8500);
if (!ab8500->domain) {
dev_err(ab8500->dev, "Failed to create irqdomain\n");
diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c
index d919ae9691e2..ac2139597fab 100644
--- a/drivers/mfd/arizona-irq.c
+++ b/drivers/mfd/arizona-irq.c
@@ -312,8 +312,7 @@ int arizona_irq_init(struct arizona *arizona)
flags |= arizona->pdata.irq_flags;
/* Allocate a virtual IRQ domain to distribute to the regmap domains */
- arizona->virq = irq_domain_add_linear(NULL, 2, &arizona_domain_ops,
- arizona);
+ arizona->virq = irq_domain_create_linear(NULL, 2, &arizona_domain_ops, arizona);
if (!arizona->virq) {
dev_err(arizona->dev, "Failed to add core IRQ domain\n");
ret = -EINVAL;
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 5b3e355e78f6..21e68a382b11 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -2607,9 +2607,9 @@ static int db8500_irq_init(struct device_node *np)
{
int i;
- db8500_irq_domain = irq_domain_add_simple(
- np, NUM_PRCMU_WAKEUPS, 0,
- &db8500_irq_ops, NULL);
+ db8500_irq_domain = irq_domain_create_simple(of_fwnode_handle(np),
+ NUM_PRCMU_WAKEUPS, 0,
+ &db8500_irq_ops, NULL);
if (!db8500_irq_domain) {
pr_err("Failed to create irqdomain\n");
diff --git a/drivers/mfd/fsl-imx25-tsadc.c b/drivers/mfd/fsl-imx25-tsadc.c
index 6fe388da6fb6..d47152467951 100644
--- a/drivers/mfd/fsl-imx25-tsadc.c
+++ b/drivers/mfd/fsl-imx25-tsadc.c
@@ -65,15 +65,14 @@ static int mx25_tsadc_setup_irq(struct platform_device *pdev,
struct mx25_tsadc *tsadc)
{
struct device *dev = &pdev->dev;
- struct device_node *np = dev->of_node;
int irq;
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return irq;
- tsadc->domain = irq_domain_add_simple(np, 2, 0, &mx25_tsadc_domain_ops,
- tsadc);
+ tsadc->domain = irq_domain_create_simple(of_fwnode_handle(dev->of_node), 2, 0,
+ &mx25_tsadc_domain_ops, tsadc);
if (!tsadc->domain) {
dev_err(dev, "Failed to add irq domain\n");
return -ENOMEM;
diff --git a/drivers/mfd/lp8788-irq.c b/drivers/mfd/lp8788-irq.c
index 39006297f3d2..ea0fdf7a4b6e 100644
--- a/drivers/mfd/lp8788-irq.c
+++ b/drivers/mfd/lp8788-irq.c
@@ -161,7 +161,7 @@ int lp8788_irq_init(struct lp8788 *lp, int irq)
return -ENOMEM;
irqd->lp = lp;
- irqd->domain = irq_domain_add_linear(lp->dev->of_node, LP8788_INT_MAX,
+ irqd->domain = irq_domain_create_linear(of_fwnode_handle(lp->dev->of_node), LP8788_INT_MAX,
&lp8788_domain_ops, irqd);
if (!irqd->domain) {
dev_err(lp->dev, "failed to add irq domain err\n");
diff --git a/drivers/mfd/max77759.c b/drivers/mfd/max77759.c
new file mode 100644
index 000000000000..6cf6306c4a3b
--- /dev/null
+++ b/drivers/mfd/max77759.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2020 Google Inc
+ * Copyright 2025 Linaro Ltd.
+ *
+ * Core driver for Maxim MAX77759 companion PMIC for USB Type-C
+ */
+
+#include <linux/array_size.h>
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/cleanup.h>
+#include <linux/completion.h>
+#include <linux/dev_printk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/jiffies.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/max77759.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/overflow.h>
+#include <linux/regmap.h>
+
+/* Chip ID as per MAX77759_PMIC_REG_PMIC_ID */
+enum {
+ MAX77759_CHIP_ID = 59,
+};
+
+enum max77759_i2c_subdev_id {
+ /*
+ * These are arbitrary and simply used to match struct
+ * max77759_i2c_subdev entries to the regmap pointers in struct
+ * max77759 during probe().
+ */
+ MAX77759_I2C_SUBDEV_ID_MAXQ,
+ MAX77759_I2C_SUBDEV_ID_CHARGER,
+};
+
+struct max77759_i2c_subdev {
+ enum max77759_i2c_subdev_id id;
+ const struct regmap_config *cfg;
+ u16 i2c_address;
+};
+
+static const struct regmap_range max77759_top_registers[] = {
+ regmap_reg_range(0x00, 0x02), /* PMIC_ID / PMIC_REVISION / OTP_REVISION */
+ regmap_reg_range(0x22, 0x24), /* INTSRC / INTSRCMASK / TOPSYS_INT */
+ regmap_reg_range(0x26, 0x26), /* TOPSYS_INT_MASK */
+ regmap_reg_range(0x40, 0x40), /* I2C_CNFG */
+ regmap_reg_range(0x50, 0x51), /* SWRESET / CONTROL_FG */
+};
+
+static const struct regmap_range max77759_top_ro_registers[] = {
+ regmap_reg_range(0x00, 0x02),
+ regmap_reg_range(0x22, 0x22),
+};
+
+static const struct regmap_range max77759_top_volatile_registers[] = {
+ regmap_reg_range(0x22, 0x22),
+ regmap_reg_range(0x24, 0x24),
+};
+
+static const struct regmap_access_table max77759_top_wr_table = {
+ .yes_ranges = max77759_top_registers,
+ .n_yes_ranges = ARRAY_SIZE(max77759_top_registers),
+ .no_ranges = max77759_top_ro_registers,
+ .n_no_ranges = ARRAY_SIZE(max77759_top_ro_registers),
+};
+
+static const struct regmap_access_table max77759_top_rd_table = {
+ .yes_ranges = max77759_top_registers,
+ .n_yes_ranges = ARRAY_SIZE(max77759_top_registers),
+};
+
+static const struct regmap_access_table max77759_top_volatile_table = {
+ .yes_ranges = max77759_top_volatile_registers,
+ .n_yes_ranges = ARRAY_SIZE(max77759_top_volatile_registers),
+};
+
+static const struct regmap_config max77759_regmap_config_top = {
+ .name = "top",
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = MAX77759_PMIC_REG_CONTROL_FG,
+ .wr_table = &max77759_top_wr_table,
+ .rd_table = &max77759_top_rd_table,
+ .volatile_table = &max77759_top_volatile_table,
+ .num_reg_defaults_raw = MAX77759_PMIC_REG_CONTROL_FG + 1,
+ .cache_type = REGCACHE_FLAT,
+};
+
+static const struct regmap_range max77759_maxq_registers[] = {
+ regmap_reg_range(0x60, 0x73), /* Device ID, Rev, INTx, STATUSx, MASKx */
+ regmap_reg_range(0x81, 0xa1), /* AP_DATAOUTx */
+ regmap_reg_range(0xb1, 0xd1), /* AP_DATAINx */
+ regmap_reg_range(0xe0, 0xe0), /* UIC_SWRST */
+};
+
+static const struct regmap_range max77759_maxq_ro_registers[] = {
+ regmap_reg_range(0x60, 0x63), /* Device ID, Rev */
+ regmap_reg_range(0x68, 0x6f), /* STATUSx */
+ regmap_reg_range(0xb1, 0xd1),
+};
+
+static const struct regmap_range max77759_maxq_volatile_registers[] = {
+ regmap_reg_range(0x64, 0x6f), /* INTx, STATUSx */
+ regmap_reg_range(0xb1, 0xd1),
+ regmap_reg_range(0xe0, 0xe0),
+};
+
+static const struct regmap_access_table max77759_maxq_wr_table = {
+ .yes_ranges = max77759_maxq_registers,
+ .n_yes_ranges = ARRAY_SIZE(max77759_maxq_registers),
+ .no_ranges = max77759_maxq_ro_registers,
+ .n_no_ranges = ARRAY_SIZE(max77759_maxq_ro_registers),
+};
+
+static const struct regmap_access_table max77759_maxq_rd_table = {
+ .yes_ranges = max77759_maxq_registers,
+ .n_yes_ranges = ARRAY_SIZE(max77759_maxq_registers),
+};
+
+static const struct regmap_access_table max77759_maxq_volatile_table = {
+ .yes_ranges = max77759_maxq_volatile_registers,
+ .n_yes_ranges = ARRAY_SIZE(max77759_maxq_volatile_registers),
+};
+
+static const struct regmap_config max77759_regmap_config_maxq = {
+ .name = "maxq",
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = MAX77759_MAXQ_REG_UIC_SWRST,
+ .wr_table = &max77759_maxq_wr_table,
+ .rd_table = &max77759_maxq_rd_table,
+ .volatile_table = &max77759_maxq_volatile_table,
+ .num_reg_defaults_raw = MAX77759_MAXQ_REG_UIC_SWRST + 1,
+ .cache_type = REGCACHE_FLAT,
+};
+
+static const struct regmap_range max77759_charger_registers[] = {
+ regmap_reg_range(0xb0, 0xcc),
+};
+
+static const struct regmap_range max77759_charger_ro_registers[] = {
+ regmap_reg_range(0xb4, 0xb8), /* INT_OK, DETAILS_0x */
+};
+
+static const struct regmap_range max77759_charger_volatile_registers[] = {
+ regmap_reg_range(0xb0, 0xb1), /* INTx */
+ regmap_reg_range(0xb4, 0xb8),
+};
+
+static const struct regmap_access_table max77759_charger_wr_table = {
+ .yes_ranges = max77759_charger_registers,
+ .n_yes_ranges = ARRAY_SIZE(max77759_charger_registers),
+ .no_ranges = max77759_charger_ro_registers,
+ .n_no_ranges = ARRAY_SIZE(max77759_charger_ro_registers),
+};
+
+static const struct regmap_access_table max77759_charger_rd_table = {
+ .yes_ranges = max77759_charger_registers,
+ .n_yes_ranges = ARRAY_SIZE(max77759_charger_registers),
+};
+
+static const struct regmap_access_table max77759_charger_volatile_table = {
+ .yes_ranges = max77759_charger_volatile_registers,
+ .n_yes_ranges = ARRAY_SIZE(max77759_charger_volatile_registers),
+};
+
+static const struct regmap_config max77759_regmap_config_charger = {
+ .name = "charger",
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = MAX77759_CHGR_REG_CHG_CNFG_19,
+ .wr_table = &max77759_charger_wr_table,
+ .rd_table = &max77759_charger_rd_table,
+ .volatile_table = &max77759_charger_volatile_table,
+ .num_reg_defaults_raw = MAX77759_CHGR_REG_CHG_CNFG_19 + 1,
+ .cache_type = REGCACHE_FLAT,
+};
+
+/*
+ * Interrupts - with the following interrupt hierarchy:
+ * pmic IRQs (INTSRC)
+ * - MAXQ_INT: MaxQ IRQs
+ * - UIC_INT1
+ * - APCmdResI
+ * - SysMsgI
+ * - GPIOxI
+ * - TOPSYS_INT: topsys
+ * - TOPSYS_INT
+ * - TSHDN_INT
+ * - SYSOVLO_INT
+ * - SYSUVLO_INT
+ * - FSHIP_NOT_RD
+ * - CHGR_INT: charger
+ * - CHG_INT
+ * - CHG_INT2
+ */
+enum {
+ MAX77759_INT_MAXQ,
+ MAX77759_INT_TOPSYS,
+ MAX77759_INT_CHGR,
+};
+
+enum {
+ MAX77759_TOPSYS_INT_TSHDN,
+ MAX77759_TOPSYS_INT_SYSOVLO,
+ MAX77759_TOPSYS_INT_SYSUVLO,
+ MAX77759_TOPSYS_INT_FSHIP_NOT_RD,
+};
+
+enum {
+ MAX77759_MAXQ_INT_APCMDRESI,
+ MAX77759_MAXQ_INT_SYSMSGI,
+ MAX77759_MAXQ_INT_GPIO,
+ MAX77759_MAXQ_INT_UIC1,
+ MAX77759_MAXQ_INT_UIC2,
+ MAX77759_MAXQ_INT_UIC3,
+ MAX77759_MAXQ_INT_UIC4,
+};
+
+enum {
+ MAX77759_CHARGER_INT_1,
+ MAX77759_CHARGER_INT_2,
+};
+
+static const struct regmap_irq max77759_pmic_irqs[] = {
+ REGMAP_IRQ_REG(MAX77759_INT_MAXQ, 0, MAX77759_PMIC_REG_INTSRC_MAXQ),
+ REGMAP_IRQ_REG(MAX77759_INT_TOPSYS, 0, MAX77759_PMIC_REG_INTSRC_TOPSYS),
+ REGMAP_IRQ_REG(MAX77759_INT_CHGR, 0, MAX77759_PMIC_REG_INTSRC_CHGR),
+};
+
+static const struct regmap_irq max77759_maxq_irqs[] = {
+ REGMAP_IRQ_REG(MAX77759_MAXQ_INT_APCMDRESI, 0, MAX77759_MAXQ_REG_UIC_INT1_APCMDRESI),
+ REGMAP_IRQ_REG(MAX77759_MAXQ_INT_SYSMSGI, 0, MAX77759_MAXQ_REG_UIC_INT1_SYSMSGI),
+ REGMAP_IRQ_REG(MAX77759_MAXQ_INT_GPIO, 0, GENMASK(1, 0)),
+ REGMAP_IRQ_REG(MAX77759_MAXQ_INT_UIC1, 0, GENMASK(5, 2)),
+ REGMAP_IRQ_REG(MAX77759_MAXQ_INT_UIC2, 1, GENMASK(7, 0)),
+ REGMAP_IRQ_REG(MAX77759_MAXQ_INT_UIC3, 2, GENMASK(7, 0)),
+ REGMAP_IRQ_REG(MAX77759_MAXQ_INT_UIC4, 3, GENMASK(7, 0)),
+};
+
+static const struct regmap_irq max77759_topsys_irqs[] = {
+ REGMAP_IRQ_REG(MAX77759_TOPSYS_INT_TSHDN, 0, MAX77759_PMIC_REG_TOPSYS_INT_TSHDN),
+ REGMAP_IRQ_REG(MAX77759_TOPSYS_INT_SYSOVLO, 0, MAX77759_PMIC_REG_TOPSYS_INT_SYSOVLO),
+ REGMAP_IRQ_REG(MAX77759_TOPSYS_INT_SYSUVLO, 0, MAX77759_PMIC_REG_TOPSYS_INT_SYSUVLO),
+ REGMAP_IRQ_REG(MAX77759_TOPSYS_INT_FSHIP_NOT_RD, 0, MAX77759_PMIC_REG_TOPSYS_INT_FSHIP),
+};
+
+static const struct regmap_irq max77759_chgr_irqs[] = {
+ REGMAP_IRQ_REG(MAX77759_CHARGER_INT_1, 0, GENMASK(7, 0)),
+ REGMAP_IRQ_REG(MAX77759_CHARGER_INT_2, 1, GENMASK(7, 0)),
+};
+
+static const struct regmap_irq_chip max77759_pmic_irq_chip = {
+ .name = "max77759-pmic",
+ /* INTSRC is read-only and doesn't require clearing */
+ .status_base = MAX77759_PMIC_REG_INTSRC,
+ .mask_base = MAX77759_PMIC_REG_INTSRCMASK,
+ .num_regs = 1,
+ .irqs = max77759_pmic_irqs,
+ .num_irqs = ARRAY_SIZE(max77759_pmic_irqs),
+};
+
+/*
+ * We can let regmap-irq auto-ack the topsys interrupt bits as required, but
+ * for all others the individual drivers need to know which interrupt bit
+ * exactly is set inside their interrupt handlers, and therefore we can not set
+ * .ack_base for those.
+ */
+static const struct regmap_irq_chip max77759_maxq_irq_chip = {
+ .name = "max77759-maxq",
+ .domain_suffix = "MAXQ",
+ .status_base = MAX77759_MAXQ_REG_UIC_INT1,
+ .mask_base = MAX77759_MAXQ_REG_UIC_INT1_M,
+ .num_regs = 4,
+ .irqs = max77759_maxq_irqs,
+ .num_irqs = ARRAY_SIZE(max77759_maxq_irqs),
+};
+
+static const struct regmap_irq_chip max77759_topsys_irq_chip = {
+ .name = "max77759-topsys",
+ .domain_suffix = "TOPSYS",
+ .status_base = MAX77759_PMIC_REG_TOPSYS_INT,
+ .mask_base = MAX77759_PMIC_REG_TOPSYS_INT_MASK,
+ .ack_base = MAX77759_PMIC_REG_TOPSYS_INT,
+ .num_regs = 1,
+ .irqs = max77759_topsys_irqs,
+ .num_irqs = ARRAY_SIZE(max77759_topsys_irqs),
+};
+
+static const struct regmap_irq_chip max77759_chrg_irq_chip = {
+ .name = "max77759-chgr",
+ .domain_suffix = "CHGR",
+ .status_base = MAX77759_CHGR_REG_CHG_INT,
+ .mask_base = MAX77759_CHGR_REG_CHG_INT_MASK,
+ .num_regs = 2,
+ .irqs = max77759_chgr_irqs,
+ .num_irqs = ARRAY_SIZE(max77759_chgr_irqs),
+};
+
+static const struct max77759_i2c_subdev max77759_i2c_subdevs[] = {
+ {
+ .id = MAX77759_I2C_SUBDEV_ID_MAXQ,
+ .cfg = &max77759_regmap_config_maxq,
+ /* I2C address is same as for sub-block 'top' */
+ },
+ {
+ .id = MAX77759_I2C_SUBDEV_ID_CHARGER,
+ .cfg = &max77759_regmap_config_charger,
+ .i2c_address = 0x69,
+ },
+};
+
+static const struct resource max77759_gpio_resources[] = {
+ DEFINE_RES_IRQ_NAMED(MAX77759_MAXQ_INT_GPIO, "GPI"),
+};
+
+static const struct resource max77759_charger_resources[] = {
+ DEFINE_RES_IRQ_NAMED(MAX77759_CHARGER_INT_1, "INT1"),
+ DEFINE_RES_IRQ_NAMED(MAX77759_CHARGER_INT_2, "INT2"),
+};
+
+static const struct mfd_cell max77759_cells[] = {
+ MFD_CELL_OF("max77759-nvmem", NULL, NULL, 0, 0,
+ "maxim,max77759-nvmem"),
+};
+
+static const struct mfd_cell max77759_maxq_cells[] = {
+ MFD_CELL_OF("max77759-gpio", max77759_gpio_resources, NULL, 0, 0,
+ "maxim,max77759-gpio"),
+};
+
+static const struct mfd_cell max77759_charger_cells[] = {
+ MFD_CELL_RES("max77759-charger", max77759_charger_resources),
+};
+
+int max77759_maxq_command(struct max77759 *max77759,
+ const struct max77759_maxq_command *cmd,
+ struct max77759_maxq_response *rsp)
+{
+ DEFINE_FLEX(struct max77759_maxq_response, _rsp, rsp, length, 1);
+ struct device *dev = regmap_get_device(max77759->regmap_maxq);
+ static const unsigned int timeout_ms = 200;
+ int ret;
+
+ if (cmd->length > MAX77759_MAXQ_OPCODE_MAXLENGTH)
+ return -EINVAL;
+
+ /*
+ * As a convenience for API users when issuing simple commands, rsp is
+ * allowed to be NULL. In that case we need a temporary here to write
+ * the response to, as we need to verify that the command was indeed
+ * completed correctly.
+ */
+ if (!rsp)
+ rsp = _rsp;
+
+ if (!rsp->length || rsp->length > MAX77759_MAXQ_OPCODE_MAXLENGTH)
+ return -EINVAL;
+
+ guard(mutex)(&max77759->maxq_lock);
+
+ reinit_completion(&max77759->cmd_done);
+
+ /*
+ * MaxQ latches the message when the DATAOUT32 register is written. If
+ * cmd->length is shorter we still need to write 0 to it.
+ */
+ ret = regmap_bulk_write(max77759->regmap_maxq,
+ MAX77759_MAXQ_REG_AP_DATAOUT0, cmd->cmd,
+ cmd->length);
+ if (!ret && cmd->length < MAX77759_MAXQ_OPCODE_MAXLENGTH)
+ ret = regmap_write(max77759->regmap_maxq,
+ MAX77759_MAXQ_REG_AP_DATAOUT32, 0);
+ if (ret) {
+ dev_err(dev, "writing command failed: %d\n", ret);
+ return ret;
+ }
+
+ /* Wait for response from MaxQ */
+ if (!wait_for_completion_timeout(&max77759->cmd_done,
+ msecs_to_jiffies(timeout_ms))) {
+ dev_err(dev, "timed out waiting for response\n");
+ return -ETIMEDOUT;
+ }
+
+ ret = regmap_bulk_read(max77759->regmap_maxq,
+ MAX77759_MAXQ_REG_AP_DATAIN0,
+ rsp->rsp, rsp->length);
+ if (ret) {
+ dev_err(dev, "reading response failed: %d\n", ret);
+ return ret;
+ }
+
+ /*
+ * As per the protocol, the first byte of the reply will match the
+ * request.
+ */
+ if (cmd->cmd[0] != rsp->rsp[0]) {
+ dev_err(dev, "unexpected opcode response for %#.2x: %*ph\n",
+ cmd->cmd[0], (int)rsp->length, rsp->rsp);
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(max77759_maxq_command);
+
+static irqreturn_t apcmdres_irq_handler(int irq, void *irq_data)
+{
+ struct max77759 *max77759 = irq_data;
+
+ regmap_write(max77759->regmap_maxq, MAX77759_MAXQ_REG_UIC_INT1,
+ MAX77759_MAXQ_REG_UIC_INT1_APCMDRESI);
+
+ complete(&max77759->cmd_done);
+
+ return IRQ_HANDLED;
+}
+
+static int max77759_create_i2c_subdev(struct i2c_client *client,
+ struct max77759 *max77759,
+ const struct max77759_i2c_subdev *sd)
+{
+ struct i2c_client *sub;
+ struct regmap *regmap;
+ int ret;
+
+ /*
+ * If 'sd' has an I2C address, 'sub' will be assigned a new 'dummy'
+ * device, otherwise use it as-is.
+ */
+ sub = client;
+ if (sd->i2c_address) {
+ sub = devm_i2c_new_dummy_device(&client->dev,
+ client->adapter,
+ sd->i2c_address);
+
+ if (IS_ERR(sub))
+ return dev_err_probe(&client->dev, PTR_ERR(sub),
+ "failed to claim I2C device %s\n",
+ sd->cfg->name);
+ }
+
+ regmap = devm_regmap_init_i2c(sub, sd->cfg);
+ if (IS_ERR(regmap))
+ return dev_err_probe(&sub->dev, PTR_ERR(regmap),
+ "regmap init for '%s' failed\n",
+ sd->cfg->name);
+
+ ret = regmap_attach_dev(&client->dev, regmap, sd->cfg);
+ if (ret)
+ return dev_err_probe(&client->dev, ret,
+ "regmap attach of '%s' failed\n",
+ sd->cfg->name);
+
+ if (sd->id == MAX77759_I2C_SUBDEV_ID_MAXQ)
+ max77759->regmap_maxq = regmap;
+ else if (sd->id == MAX77759_I2C_SUBDEV_ID_CHARGER)
+ max77759->regmap_charger = regmap;
+
+ return 0;
+}
+
+static int max77759_add_chained_irq_chip(struct device *dev,
+ struct regmap *regmap,
+ int pirq,
+ struct regmap_irq_chip_data *parent,
+ const struct regmap_irq_chip *chip,
+ struct regmap_irq_chip_data **data)
+{
+ int irq, ret;
+
+ irq = regmap_irq_get_virq(parent, pirq);
+ if (irq < 0)
+ return dev_err_probe(dev, irq,
+ "failed to get parent vIRQ(%d) for chip %s\n",
+ pirq, chip->name);
+
+ ret = devm_regmap_add_irq_chip(dev, regmap, irq,
+ IRQF_ONESHOT | IRQF_SHARED, 0, chip,
+ data);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to add %s IRQ chip\n",
+ chip->name);
+
+ return 0;
+}
+
+static int max77759_add_chained_maxq(struct i2c_client *client,
+ struct max77759 *max77759,
+ struct regmap_irq_chip_data *parent)
+{
+ struct regmap_irq_chip_data *irq_chip_data;
+ int apcmdres_irq;
+ int ret;
+
+ ret = max77759_add_chained_irq_chip(&client->dev,
+ max77759->regmap_maxq,
+ MAX77759_INT_MAXQ,
+ parent,
+ &max77759_maxq_irq_chip,
+ &irq_chip_data);
+ if (ret)
+ return ret;
+
+ init_completion(&max77759->cmd_done);
+ apcmdres_irq = regmap_irq_get_virq(irq_chip_data,
+ MAX77759_MAXQ_INT_APCMDRESI);
+
+ ret = devm_request_threaded_irq(&client->dev, apcmdres_irq,
+ NULL, apcmdres_irq_handler,
+ IRQF_ONESHOT | IRQF_SHARED,
+ dev_name(&client->dev), max77759);
+ if (ret)
+ return dev_err_probe(&client->dev, ret,
+ "MAX77759_MAXQ_INT_APCMDRESI failed\n");
+
+ ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_AUTO,
+ max77759_maxq_cells,
+ ARRAY_SIZE(max77759_maxq_cells),
+ NULL, 0,
+ regmap_irq_get_domain(irq_chip_data));
+ if (ret)
+ return dev_err_probe(&client->dev, ret,
+ "failed to add child devices (MaxQ)\n");
+
+ return 0;
+}
+
+static int max77759_add_chained_topsys(struct i2c_client *client,
+ struct max77759 *max77759,
+ struct regmap_irq_chip_data *parent)
+{
+ struct regmap_irq_chip_data *irq_chip_data;
+ int ret;
+
+ ret = max77759_add_chained_irq_chip(&client->dev,
+ max77759->regmap_top,
+ MAX77759_INT_TOPSYS,
+ parent,
+ &max77759_topsys_irq_chip,
+ &irq_chip_data);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int max77759_add_chained_charger(struct i2c_client *client,
+ struct max77759 *max77759,
+ struct regmap_irq_chip_data *parent)
+{
+ struct regmap_irq_chip_data *irq_chip_data;
+ int ret;
+
+ ret = max77759_add_chained_irq_chip(&client->dev,
+ max77759->regmap_charger,
+ MAX77759_INT_CHGR,
+ parent,
+ &max77759_chrg_irq_chip,
+ &irq_chip_data);
+ if (ret)
+ return ret;
+
+ ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_AUTO,
+ max77759_charger_cells,
+ ARRAY_SIZE(max77759_charger_cells),
+ NULL, 0,
+ regmap_irq_get_domain(irq_chip_data));
+ if (ret)
+ return dev_err_probe(&client->dev, ret,
+ "failed to add child devices (charger)\n");
+
+ return 0;
+}
+
+static int max77759_probe(struct i2c_client *client)
+{
+ struct regmap_irq_chip_data *irq_chip_data_pmic;
+ struct irq_data *irq_data;
+ struct max77759 *max77759;
+ unsigned long irq_flags;
+ unsigned int pmic_id;
+ int ret;
+
+ max77759 = devm_kzalloc(&client->dev, sizeof(*max77759), GFP_KERNEL);
+ if (!max77759)
+ return -ENOMEM;
+
+ i2c_set_clientdata(client, max77759);
+
+ max77759->regmap_top = devm_regmap_init_i2c(client,
+ &max77759_regmap_config_top);
+ if (IS_ERR(max77759->regmap_top))
+ return dev_err_probe(&client->dev, PTR_ERR(max77759->regmap_top),
+ "regmap init for '%s' failed\n",
+ max77759_regmap_config_top.name);
+
+ ret = regmap_read(max77759->regmap_top,
+ MAX77759_PMIC_REG_PMIC_ID, &pmic_id);
+ if (ret)
+ return dev_err_probe(&client->dev, ret,
+ "unable to read device ID\n");
+
+ if (pmic_id != MAX77759_CHIP_ID)
+ return dev_err_probe(&client->dev, -ENODEV,
+ "unsupported device ID %#.2x (%d)\n",
+ pmic_id, pmic_id);
+
+ ret = devm_mutex_init(&client->dev, &max77759->maxq_lock);
+ if (ret)
+ return ret;
+
+ for (int i = 0; i < ARRAY_SIZE(max77759_i2c_subdevs); i++) {
+ ret = max77759_create_i2c_subdev(client, max77759,
+ &max77759_i2c_subdevs[i]);
+ if (ret)
+ return ret;
+ }
+
+ irq_data = irq_get_irq_data(client->irq);
+ if (!irq_data)
+ return dev_err_probe(&client->dev, -EINVAL,
+ "invalid IRQ: %d\n", client->irq);
+
+ irq_flags = IRQF_ONESHOT | IRQF_SHARED;
+ irq_flags |= irqd_get_trigger_type(irq_data);
+
+ ret = devm_regmap_add_irq_chip(&client->dev, max77759->regmap_top,
+ client->irq, irq_flags, 0,
+ &max77759_pmic_irq_chip,
+ &irq_chip_data_pmic);
+ if (ret)
+ return dev_err_probe(&client->dev, ret,
+ "failed to add IRQ chip '%s'\n",
+ max77759_pmic_irq_chip.name);
+
+ ret = max77759_add_chained_maxq(client, max77759, irq_chip_data_pmic);
+ if (ret)
+ return ret;
+
+ ret = max77759_add_chained_topsys(client, max77759, irq_chip_data_pmic);
+ if (ret)
+ return ret;
+
+ ret = max77759_add_chained_charger(client, max77759, irq_chip_data_pmic);
+ if (ret)
+ return ret;
+
+ return devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_AUTO,
+ max77759_cells, ARRAY_SIZE(max77759_cells),
+ NULL, 0,
+ regmap_irq_get_domain(irq_chip_data_pmic));
+}
+
+static const struct i2c_device_id max77759_i2c_id[] = {
+ { "max77759" },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, max77759_i2c_id);
+
+static const struct of_device_id max77759_of_id[] = {
+ { .compatible = "maxim,max77759", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, max77759_of_id);
+
+static struct i2c_driver max77759_i2c_driver = {
+ .driver = {
+ .name = "max77759",
+ .of_match_table = max77759_of_id,
+ },
+ .probe = max77759_probe,
+ .id_table = max77759_i2c_id,
+};
+module_i2c_driver(max77759_i2c_driver);
+
+MODULE_AUTHOR("André Draszik <andre.draszik@linaro.org>");
+MODULE_DESCRIPTION("Maxim MAX77759 core driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index 105d79b91493..78b16c67a5fc 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -682,8 +682,8 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq,
return -EBUSY;
}
- irq_domain_add_legacy(node, MAX8925_NR_IRQS, chip->irq_base, 0,
- &max8925_irq_domain_ops, chip);
+ irq_domain_create_legacy(of_fwnode_handle(node), MAX8925_NR_IRQS, chip->irq_base, 0,
+ &max8925_irq_domain_ops, chip);
/* request irq handler for pmic main irq*/
chip->core_irq = irq;
diff --git a/drivers/mfd/max8997-irq.c b/drivers/mfd/max8997-irq.c
index 92e348df03d1..cc87571c9af5 100644
--- a/drivers/mfd/max8997-irq.c
+++ b/drivers/mfd/max8997-irq.c
@@ -327,8 +327,8 @@ int max8997_irq_init(struct max8997_dev *max8997)
true : false;
}
- domain = irq_domain_add_linear(NULL, MAX8997_IRQ_NR,
- &max8997_irq_domain_ops, max8997);
+ domain = irq_domain_create_linear(NULL, MAX8997_IRQ_NR,
+ &max8997_irq_domain_ops, max8997);
if (!domain) {
dev_err(max8997->dev, "could not create irq domain\n");
return -ENODEV;
diff --git a/drivers/mfd/max8998-irq.c b/drivers/mfd/max8998-irq.c
index 83b6f510bc05..b0773fa6e07f 100644
--- a/drivers/mfd/max8998-irq.c
+++ b/drivers/mfd/max8998-irq.c
@@ -230,7 +230,7 @@ int max8998_irq_init(struct max8998_dev *max8998)
max8998_write_reg(max8998->i2c, MAX8998_REG_STATUSM1, 0xff);
max8998_write_reg(max8998->i2c, MAX8998_REG_STATUSM2, 0xff);
- domain = irq_domain_add_simple(NULL, MAX8998_IRQ_NR,
+ domain = irq_domain_create_simple(NULL, MAX8998_IRQ_NR,
max8998->irq_base, &max8998_irq_domain_ops, max8998);
if (!domain) {
dev_err(max8998->dev, "could not create irq domain\n");
diff --git a/drivers/mfd/mt6358-irq.c b/drivers/mfd/mt6358-irq.c
index 49830b526ee8..9f0bcc3ad7a1 100644
--- a/drivers/mfd/mt6358-irq.c
+++ b/drivers/mfd/mt6358-irq.c
@@ -272,9 +272,9 @@ int mt6358_irq_init(struct mt6397_chip *chip)
irqd->pmic_ints[i].en_reg_shift * j, 0);
}
- chip->irq_domain = irq_domain_add_linear(chip->dev->of_node,
- irqd->num_pmic_irqs,
- &mt6358_irq_domain_ops, chip);
+ chip->irq_domain = irq_domain_create_linear(of_fwnode_handle(chip->dev->of_node),
+ irqd->num_pmic_irqs,
+ &mt6358_irq_domain_ops, chip);
if (!chip->irq_domain) {
dev_err(chip->dev, "Could not create IRQ domain\n");
return -ENODEV;
diff --git a/drivers/mfd/mt6397-irq.c b/drivers/mfd/mt6397-irq.c
index 1310665200ed..badc614b4345 100644
--- a/drivers/mfd/mt6397-irq.c
+++ b/drivers/mfd/mt6397-irq.c
@@ -216,10 +216,8 @@ int mt6397_irq_init(struct mt6397_chip *chip)
regmap_write(chip->regmap, chip->int_con[2], 0x0);
chip->pm_nb.notifier_call = mt6397_irq_pm_notifier;
- chip->irq_domain = irq_domain_add_linear(chip->dev->of_node,
- MT6397_IRQ_NR,
- &mt6397_irq_domain_ops,
- chip);
+ chip->irq_domain = irq_domain_create_linear(of_fwnode_handle(chip->dev->of_node),
+ MT6397_IRQ_NR, &mt6397_irq_domain_ops, chip);
if (!chip->irq_domain) {
dev_err(chip->dev, "could not create irq domain\n");
return -ENOMEM;
diff --git a/drivers/mfd/qcom-pm8xxx.c b/drivers/mfd/qcom-pm8xxx.c
index f9ebdf5845b8..c96ea6fbede8 100644
--- a/drivers/mfd/qcom-pm8xxx.c
+++ b/drivers/mfd/qcom-pm8xxx.c
@@ -559,10 +559,8 @@ static int pm8xxx_probe(struct platform_device *pdev)
chip->pm_irq_data = data;
spin_lock_init(&chip->pm_irq_lock);
- chip->irqdomain = irq_domain_add_linear(pdev->dev.of_node,
- data->num_irqs,
- &pm8xxx_irq_domain_ops,
- chip);
+ chip->irqdomain = irq_domain_create_linear(of_fwnode_handle(pdev->dev.of_node),
+ data->num_irqs, &pm8xxx_irq_domain_ops, chip);
if (!chip->irqdomain)
return -ENODEV;
diff --git a/drivers/mfd/stmfx.c b/drivers/mfd/stmfx.c
index f391c2ccaa72..823b1d29389e 100644
--- a/drivers/mfd/stmfx.c
+++ b/drivers/mfd/stmfx.c
@@ -269,7 +269,7 @@ static int stmfx_irq_init(struct i2c_client *client)
u32 irqoutpin = 0, irqtrigger;
int ret;
- stmfx->irq_domain = irq_domain_add_simple(stmfx->dev->of_node,
+ stmfx->irq_domain = irq_domain_create_simple(of_fwnode_handle(stmfx->dev->of_node),
STMFX_REG_IRQ_SRC_MAX, 0,
&stmfx_irq_ops, stmfx);
if (!stmfx->irq_domain) {
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 9c3cf58457a7..819d19dc9b4a 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -1219,8 +1219,8 @@ static int stmpe_irq_init(struct stmpe *stmpe, struct device_node *np)
int base = 0;
int num_irqs = stmpe->variant->num_irqs;
- stmpe->domain = irq_domain_add_simple(np, num_irqs, base,
- &stmpe_irq_ops, stmpe);
+ stmpe->domain = irq_domain_create_simple(of_fwnode_handle(np), num_irqs,
+ base, &stmpe_irq_ops, stmpe);
if (!stmpe->domain) {
dev_err(stmpe->dev, "Failed to create irqdomain\n");
return -ENOSYS;
diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index ef953ee73145..2d4eb771e230 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c
@@ -234,9 +234,9 @@ static const struct irq_domain_ops tc3589x_irq_ops = {
static int tc3589x_irq_init(struct tc3589x *tc3589x, struct device_node *np)
{
- tc3589x->domain = irq_domain_add_simple(
- np, TC3589x_NR_INTERNAL_IRQS, 0,
- &tc3589x_irq_ops, tc3589x);
+ tc3589x->domain = irq_domain_create_simple(of_fwnode_handle(np),
+ TC3589x_NR_INTERNAL_IRQS, 0,
+ &tc3589x_irq_ops, tc3589x);
if (!tc3589x->domain) {
dev_err(tc3589x->dev, "Failed to create irqdomain\n");
diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c
index 029ecc32f078..4e9669d327b4 100644
--- a/drivers/mfd/tps65217.c
+++ b/drivers/mfd/tps65217.c
@@ -158,7 +158,7 @@ static int tps65217_irq_init(struct tps65217 *tps, int irq)
tps65217_set_bits(tps, TPS65217_REG_INT, TPS65217_INT_MASK,
TPS65217_INT_MASK, TPS65217_PROTECT_NONE);
- tps->irq_domain = irq_domain_add_linear(tps->dev->of_node,
+ tps->irq_domain = irq_domain_create_linear(of_fwnode_handle(tps->dev->of_node),
TPS65217_NUM_IRQ, &tps65217_irq_domain_ops, tps);
if (!tps->irq_domain) {
dev_err(tps->dev, "Could not create IRQ domain\n");
diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 82714899efb2..853c48286071 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -363,7 +363,7 @@ static int tps6586x_irq_init(struct tps6586x *tps6586x, int irq,
new_irq_base = 0;
}
- tps6586x->irq_domain = irq_domain_add_simple(tps6586x->dev->of_node,
+ tps6586x->irq_domain = irq_domain_create_simple(of_fwnode_handle(tps6586x->dev->of_node),
irq_num, new_irq_base, &tps6586x_domain_ops,
tps6586x);
if (!tps6586x->irq_domain) {
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index 87496c1cb8bc..232c2bfe8c18 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -691,8 +691,8 @@ int twl4030_init_irq(struct device *dev, int irq_num)
return irq_base;
}
- irq_domain_add_legacy(node, nr_irqs, irq_base, 0,
- &irq_domain_simple_ops, NULL);
+ irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, irq_base, 0,
+ &irq_domain_simple_ops, NULL);
irq_end = irq_base + TWL4030_CORE_NR_IRQS;
diff --git a/drivers/mfd/twl6030-irq.c b/drivers/mfd/twl6030-irq.c
index 3c03681c124c..00b14cef1dfb 100644
--- a/drivers/mfd/twl6030-irq.c
+++ b/drivers/mfd/twl6030-irq.c
@@ -364,7 +364,6 @@ static const struct of_device_id twl6030_of_match[] __maybe_unused = {
int twl6030_init_irq(struct device *dev, int irq_num)
{
- struct device_node *node = dev->of_node;
int nr_irqs;
int status;
u8 mask[3];
@@ -412,8 +411,8 @@ int twl6030_init_irq(struct device *dev, int irq_num)
twl6030_irq->irq_mapping_tbl = of_id->data;
twl6030_irq->irq_domain =
- irq_domain_add_linear(node, nr_irqs,
- &twl6030_irq_domain_ops, twl6030_irq);
+ irq_domain_create_linear(of_fwnode_handle(dev->of_node), nr_irqs,
+ &twl6030_irq_domain_ops, twl6030_irq);
if (!twl6030_irq->irq_domain) {
dev_err(dev, "Can't add irq_domain\n");
return -ENOMEM;
diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c
index f1f58e3149ae..b3883fa5dd9f 100644
--- a/drivers/mfd/wm831x-irq.c
+++ b/drivers/mfd/wm831x-irq.c
@@ -587,16 +587,13 @@ int wm831x_irq_init(struct wm831x *wm831x, int irq)
}
if (irq_base)
- domain = irq_domain_add_legacy(wm831x->dev->of_node,
- ARRAY_SIZE(wm831x_irqs),
- irq_base, 0,
- &wm831x_irq_domain_ops,
- wm831x);
+ domain = irq_domain_create_legacy(of_fwnode_handle(wm831x->dev->of_node),
+ ARRAY_SIZE(wm831x_irqs), irq_base, 0,
+ &wm831x_irq_domain_ops, wm831x);
else
- domain = irq_domain_add_linear(wm831x->dev->of_node,
- ARRAY_SIZE(wm831x_irqs),
- &wm831x_irq_domain_ops,
- wm831x);
+ domain = irq_domain_create_linear(of_fwnode_handle(wm831x->dev->of_node),
+ ARRAY_SIZE(wm831x_irqs), &wm831x_irq_domain_ops,
+ wm831x);
if (!domain) {
dev_warn(wm831x->dev, "Failed to allocate IRQ domain\n");
diff --git a/drivers/mfd/wm8994-irq.c b/drivers/mfd/wm8994-irq.c
index 651a028bc519..1475b1ac6983 100644
--- a/drivers/mfd/wm8994-irq.c
+++ b/drivers/mfd/wm8994-irq.c
@@ -213,9 +213,7 @@ int wm8994_irq_init(struct wm8994 *wm8994)
return ret;
}
- wm8994->edge_irq = irq_domain_add_linear(NULL, 1,
- &wm8994_edge_irq_ops,
- wm8994);
+ wm8994->edge_irq = irq_domain_create_linear(NULL, 1, &wm8994_edge_irq_ops, wm8994);
ret = regmap_add_irq_chip(wm8994->regmap,
irq_create_mapping(wm8994->edge_irq,
diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c
index 18fc1aaa5cdd..2b6778d8d166 100644
--- a/drivers/misc/cs5535-mfgpt.c
+++ b/drivers/misc/cs5535-mfgpt.c
@@ -16,6 +16,7 @@
#include <linux/platform_device.h>
#include <linux/cs5535.h>
#include <linux/slab.h>
+#include <asm/msr.h>
#define DRV_NAME "cs5535-mfgpt"
diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c
index 69ee4f39af2a..187c5bc91e31 100644
--- a/drivers/misc/hi6421v600-irq.c
+++ b/drivers/misc/hi6421v600-irq.c
@@ -254,8 +254,9 @@ static int hi6421v600_irq_probe(struct platform_device *pdev)
if (!priv->irqs)
return -ENOMEM;
- priv->domain = irq_domain_add_simple(np, PMIC_IRQ_LIST_MAX, 0,
- &hi6421v600_domain_ops, priv);
+ priv->domain = irq_domain_create_simple(of_fwnode_handle(np),
+ PMIC_IRQ_LIST_MAX, 0,
+ &hi6421v600_domain_ops, priv);
if (!priv->domain) {
dev_err(dev, "Failed to create IRQ domain\n");
return -ENODEV;
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 4830628510e6..9cc47bf94804 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1220,7 +1220,7 @@ static void mmc_blk_issue_erase_rq(struct mmc_queue *mq, struct request *req,
int err = 0;
blk_status_t status = BLK_STS_OK;
- if (!mmc_can_erase(card)) {
+ if (!mmc_card_can_erase(card)) {
status = BLK_STS_NOTSUPP;
goto fail;
}
@@ -1276,7 +1276,7 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
int err = 0, type = MMC_BLK_SECDISCARD;
blk_status_t status = BLK_STS_OK;
- if (!(mmc_can_secure_erase_trim(card))) {
+ if (!(mmc_card_can_secure_erase_trim(card))) {
status = BLK_STS_NOTSUPP;
goto out;
}
@@ -1284,7 +1284,7 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
from = blk_rq_pos(req);
nr = blk_rq_sectors(req);
- if (mmc_can_trim(card) && !mmc_erase_group_aligned(card, from, nr))
+ if (mmc_card_can_trim(card) && !mmc_erase_group_aligned(card, from, nr))
arg = MMC_SECURE_TRIM1_ARG;
else
arg = MMC_SECURE_ERASE_ARG;
@@ -2278,7 +2278,7 @@ void mmc_blk_mq_recovery(struct mmc_queue *mq)
static void mmc_blk_mq_complete_prev_req(struct mmc_queue *mq,
struct request **prev_req)
{
- if (mmc_host_done_complete(mq->card->host))
+ if (mmc_host_can_done_complete(mq->card->host))
return;
mutex_lock(&mq->complete_lock);
@@ -2317,7 +2317,7 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
struct mmc_host *host = mq->card->host;
unsigned long flags;
- if (!mmc_host_done_complete(host)) {
+ if (!mmc_host_can_done_complete(host)) {
bool waiting;
/*
@@ -2430,7 +2430,7 @@ static int mmc_blk_mq_issue_rw_rq(struct mmc_queue *mq,
mq->rw_wait = false;
/* Release re-tuning here where there is no synchronization required */
- if (err || mmc_host_done_complete(host))
+ if (err || mmc_host_can_done_complete(host))
mmc_retune_release(host);
out_post_req:
@@ -2618,7 +2618,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
*/
md->read_only = mmc_blk_readonly(card);
- if (mmc_host_cmd23(card->host)) {
+ if (mmc_host_can_cmd23(card->host)) {
if ((mmc_card_mmc(card) &&
card->csd.mmca_vsn >= CSD_SPEC_VER_3) ||
(mmc_card_sd(card) && !mmc_card_ult_capacity(card) &&
@@ -2655,7 +2655,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
md->disk->private_data = md;
md->parent = parent;
set_disk_ro(md->disk, md->read_only || default_ro);
- if (area_type & (MMC_BLK_DATA_AREA_RPMB | MMC_BLK_DATA_AREA_BOOT))
+ if (area_type & MMC_BLK_DATA_AREA_RPMB)
md->disk->flags |= GENHD_FL_NO_PART;
/*
diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
index 3205feb1e8ff..9cbdd240c3a7 100644
--- a/drivers/mmc/core/card.h
+++ b/drivers/mmc/core/card.h
@@ -89,6 +89,7 @@ struct mmc_fixup {
#define CID_MANFID_MICRON 0x13
#define CID_MANFID_SAMSUNG 0x15
#define CID_MANFID_APACER 0x27
+#define CID_MANFID_SWISSBIT 0x5D
#define CID_MANFID_KINGSTON 0x70
#define CID_MANFID_HYNIX 0x90
#define CID_MANFID_KINGSTON_SD 0x9F
@@ -294,4 +295,9 @@ static inline int mmc_card_broken_sd_poweroff_notify(const struct mmc_card *c)
return c->quirks & MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY;
}
+static inline int mmc_card_no_uhs_ddr50_tuning(const struct mmc_card *c)
+{
+ return c->quirks & MMC_QUIRK_NO_UHS_DDR50_TUNING;
+}
+
#endif
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index ce08e0ea7fc1..a0e2dce70434 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1837,52 +1837,44 @@ int mmc_erase(struct mmc_card *card, sector_t from, unsigned int nr,
}
EXPORT_SYMBOL(mmc_erase);
-int mmc_can_erase(struct mmc_card *card)
+bool mmc_card_can_erase(struct mmc_card *card)
{
- if (card->csd.cmdclass & CCC_ERASE && card->erase_size)
- return 1;
- return 0;
+ return (card->csd.cmdclass & CCC_ERASE && card->erase_size);
}
-EXPORT_SYMBOL(mmc_can_erase);
+EXPORT_SYMBOL(mmc_card_can_erase);
-int mmc_can_trim(struct mmc_card *card)
+bool mmc_card_can_trim(struct mmc_card *card)
{
- if ((card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN) &&
- (!(card->quirks & MMC_QUIRK_TRIM_BROKEN)))
- return 1;
- return 0;
+ return ((card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN) &&
+ (!(card->quirks & MMC_QUIRK_TRIM_BROKEN)));
}
-EXPORT_SYMBOL(mmc_can_trim);
+EXPORT_SYMBOL(mmc_card_can_trim);
-int mmc_can_discard(struct mmc_card *card)
+bool mmc_card_can_discard(struct mmc_card *card)
{
/*
* As there's no way to detect the discard support bit at v4.5
* use the s/w feature support filed.
*/
- if (card->ext_csd.feature_support & MMC_DISCARD_FEATURE)
- return 1;
- return 0;
+ return (card->ext_csd.feature_support & MMC_DISCARD_FEATURE);
}
-EXPORT_SYMBOL(mmc_can_discard);
+EXPORT_SYMBOL(mmc_card_can_discard);
-int mmc_can_sanitize(struct mmc_card *card)
+bool mmc_card_can_sanitize(struct mmc_card *card)
{
- if (!mmc_can_trim(card) && !mmc_can_erase(card))
- return 0;
+ if (!mmc_card_can_trim(card) && !mmc_card_can_erase(card))
+ return false;
if (card->ext_csd.sec_feature_support & EXT_CSD_SEC_SANITIZE)
- return 1;
- return 0;
+ return true;
+ return false;
}
-int mmc_can_secure_erase_trim(struct mmc_card *card)
+bool mmc_card_can_secure_erase_trim(struct mmc_card *card)
{
- if ((card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN) &&
- !(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
- return 1;
- return 0;
+ return ((card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN) &&
+ !(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN));
}
-EXPORT_SYMBOL(mmc_can_secure_erase_trim);
+EXPORT_SYMBOL(mmc_card_can_secure_erase_trim);
int mmc_erase_group_aligned(struct mmc_card *card, sector_t from,
unsigned int nr)
@@ -1987,7 +1979,7 @@ unsigned int mmc_calc_max_discard(struct mmc_card *card)
return card->pref_erase;
max_discard = mmc_do_calc_max_discard(card, MMC_ERASE_ARG);
- if (mmc_can_trim(card)) {
+ if (mmc_card_can_trim(card)) {
max_trim = mmc_do_calc_max_discard(card, MMC_TRIM_ARG);
if (max_trim < max_discard || max_discard == 0)
max_discard = max_trim;
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index fc9c066e6468..622085cd766f 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -118,11 +118,11 @@ bool mmc_is_req_done(struct mmc_host *host, struct mmc_request *mrq);
int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq);
int mmc_erase(struct mmc_card *card, sector_t from, unsigned int nr, unsigned int arg);
-int mmc_can_erase(struct mmc_card *card);
-int mmc_can_trim(struct mmc_card *card);
-int mmc_can_discard(struct mmc_card *card);
-int mmc_can_sanitize(struct mmc_card *card);
-int mmc_can_secure_erase_trim(struct mmc_card *card);
+bool mmc_card_can_erase(struct mmc_card *card);
+bool mmc_card_can_trim(struct mmc_card *card);
+bool mmc_card_can_discard(struct mmc_card *card);
+bool mmc_card_can_sanitize(struct mmc_card *card);
+bool mmc_card_can_secure_erase_trim(struct mmc_card *card);
int mmc_erase_group_aligned(struct mmc_card *card, sector_t from, unsigned int nr);
unsigned int mmc_calc_max_discard(struct mmc_card *card);
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index 48c4952512a5..5941d68ff989 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -39,22 +39,22 @@ static inline void mmc_retune_recheck(struct mmc_host *host)
host->retune_now = 1;
}
-static inline int mmc_host_cmd23(struct mmc_host *host)
+static inline int mmc_host_can_cmd23(struct mmc_host *host)
{
return host->caps & MMC_CAP_CMD23;
}
-static inline bool mmc_host_done_complete(struct mmc_host *host)
+static inline bool mmc_host_can_done_complete(struct mmc_host *host)
{
return host->caps & MMC_CAP_DONE_COMPLETE;
}
-static inline int mmc_boot_partition_access(struct mmc_host *host)
+static inline int mmc_host_can_access_boot(struct mmc_host *host)
{
return !(host->caps2 & MMC_CAP2_BOOTPART_NOACC);
}
-static inline int mmc_host_uhs(struct mmc_host *host)
+static inline int mmc_host_can_uhs(struct mmc_host *host)
{
return host->caps &
(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 1522fd2b517d..5be9b42d5057 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -33,6 +33,12 @@
#define MIN_CACHE_EN_TIMEOUT_MS 1600
#define CACHE_FLUSH_TIMEOUT_MS 30000 /* 30s */
+enum mmc_poweroff_type {
+ MMC_POWEROFF_SUSPEND,
+ MMC_POWEROFF_SHUTDOWN,
+ MMC_POWEROFF_UNBIND,
+};
+
static const unsigned int tran_exp[] = {
10000, 100000, 1000000, 10000000,
0, 0, 0, 0
@@ -453,7 +459,7 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
* There are two boot regions of equal size, defined in
* multiples of 128K.
*/
- if (ext_csd[EXT_CSD_BOOT_MULT] && mmc_boot_partition_access(card->host)) {
+ if (ext_csd[EXT_CSD_BOOT_MULT] && mmc_host_can_access_boot(card->host)) {
for (idx = 0; idx < MMC_NUM_BOOT_PARTITION; idx++) {
part_size = ext_csd[EXT_CSD_BOOT_MULT] << 17;
mmc_part_add(card, part_size,
@@ -572,7 +578,7 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
* RPMB regions are defined in multiples of 128K.
*/
card->ext_csd.raw_rpmb_size_mult = ext_csd[EXT_CSD_RPMB_MULT];
- if (ext_csd[EXT_CSD_RPMB_MULT] && mmc_host_cmd23(card->host)) {
+ if (ext_csd[EXT_CSD_RPMB_MULT] && mmc_host_can_cmd23(card->host)) {
mmc_part_add(card, ext_csd[EXT_CSD_RPMB_MULT] << 17,
EXT_CSD_PART_CONFIG_ACC_RPMB,
"rpmb", 0, false,
@@ -674,7 +680,7 @@ static int mmc_read_ext_csd(struct mmc_card *card)
u8 *ext_csd;
int err;
- if (!mmc_can_ext_csd(card))
+ if (!mmc_card_can_ext_csd(card))
return 0;
err = mmc_get_ext_csd(card, &ext_csd);
@@ -953,7 +959,7 @@ static int mmc_select_powerclass(struct mmc_card *card)
int err, ddr;
/* Power class selection is supported for versions >= 4.0 */
- if (!mmc_can_ext_csd(card))
+ if (!mmc_card_can_ext_csd(card))
return 0;
bus_width = host->ios.bus_width;
@@ -1016,7 +1022,7 @@ static int mmc_select_bus_width(struct mmc_card *card)
unsigned idx, bus_width = 0;
int err = 0;
- if (!mmc_can_ext_csd(card) ||
+ if (!mmc_card_can_ext_csd(card) ||
!(host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA)))
return 0;
@@ -1537,7 +1543,7 @@ static int mmc_select_timing(struct mmc_card *card)
{
int err = 0;
- if (!mmc_can_ext_csd(card))
+ if (!mmc_card_can_ext_csd(card))
goto bus_speed;
if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400ES) {
@@ -1798,9 +1804,9 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
}
/* set erase_arg */
- if (mmc_can_discard(card))
+ if (mmc_card_can_discard(card))
card->erase_arg = MMC_DISCARD_ARG;
- else if (mmc_can_trim(card))
+ else if (mmc_card_can_trim(card))
card->erase_arg = MMC_TRIM_ARG;
else
card->erase_arg = MMC_ERASE_ARG;
@@ -1949,7 +1955,7 @@ err:
return err;
}
-static int mmc_can_sleep(struct mmc_card *card)
+static bool mmc_card_can_sleep(struct mmc_card *card)
{
return card->ext_csd.rev >= 3;
}
@@ -2007,13 +2013,26 @@ out_release:
return err;
}
-static int mmc_can_poweroff_notify(const struct mmc_card *card)
+static bool mmc_card_can_poweroff_notify(const struct mmc_card *card)
{
return card &&
mmc_card_mmc(card) &&
(card->ext_csd.power_off_notification == EXT_CSD_POWER_ON);
}
+static bool mmc_host_can_poweroff_notify(const struct mmc_host *host,
+ enum mmc_poweroff_type pm_type)
+{
+ if (host->caps2 & MMC_CAP2_FULL_PWR_CYCLE)
+ return true;
+
+ if (host->caps2 & MMC_CAP2_FULL_PWR_CYCLE_IN_SUSPEND &&
+ pm_type == MMC_POWEROFF_SUSPEND)
+ return true;
+
+ return pm_type == MMC_POWEROFF_SHUTDOWN;
+}
+
static int mmc_poweroff_notify(struct mmc_card *card, unsigned int notify_type)
{
unsigned int timeout = card->ext_csd.generic_cmd6_time;
@@ -2037,15 +2056,6 @@ static int mmc_poweroff_notify(struct mmc_card *card, unsigned int notify_type)
}
/*
- * Host is being removed. Free up the current card.
- */
-static void mmc_remove(struct mmc_host *host)
-{
- mmc_remove_card(host->card);
- host->card = NULL;
-}
-
-/*
* Card detection - card is alive.
*/
static int mmc_alive(struct mmc_host *host)
@@ -2070,7 +2080,8 @@ static void mmc_detect(struct mmc_host *host)
mmc_put_card(host->card, NULL);
if (err) {
- mmc_remove(host);
+ mmc_remove_card(host->card);
+ host->card = NULL;
mmc_claim_host(host);
mmc_detach_bus(host);
@@ -2108,11 +2119,13 @@ static int _mmc_flush_cache(struct mmc_host *host)
return err;
}
-static int _mmc_suspend(struct mmc_host *host, bool is_suspend)
+static int _mmc_suspend(struct mmc_host *host, enum mmc_poweroff_type pm_type)
{
+ unsigned int notify_type = EXT_CSD_POWER_OFF_SHORT;
int err = 0;
- unsigned int notify_type = is_suspend ? EXT_CSD_POWER_OFF_SHORT :
- EXT_CSD_POWER_OFF_LONG;
+
+ if (pm_type == MMC_POWEROFF_SHUTDOWN)
+ notify_type = EXT_CSD_POWER_OFF_LONG;
mmc_claim_host(host);
@@ -2123,11 +2136,10 @@ static int _mmc_suspend(struct mmc_host *host, bool is_suspend)
if (err)
goto out;
- if (mmc_can_poweroff_notify(host->card) &&
- ((host->caps2 & MMC_CAP2_FULL_PWR_CYCLE) || !is_suspend ||
- (host->caps2 & MMC_CAP2_FULL_PWR_CYCLE_IN_SUSPEND)))
+ if (mmc_card_can_poweroff_notify(host->card) &&
+ mmc_host_can_poweroff_notify(host, pm_type))
err = mmc_poweroff_notify(host->card, notify_type);
- else if (mmc_can_sleep(host->card))
+ else if (mmc_card_can_sleep(host->card))
err = mmc_sleep(host);
else if (!mmc_host_is_spi(host))
err = mmc_deselect_cards(host);
@@ -2142,13 +2154,27 @@ out:
}
/*
+ * Host is being removed. Free up the current card and do a graceful power-off.
+ */
+static void mmc_remove(struct mmc_host *host)
+{
+ get_device(&host->card->dev);
+ mmc_remove_card(host->card);
+
+ _mmc_suspend(host, MMC_POWEROFF_UNBIND);
+
+ put_device(&host->card->dev);
+ host->card = NULL;
+}
+
+/*
* Suspend callback
*/
static int mmc_suspend(struct mmc_host *host)
{
int err;
- err = _mmc_suspend(host, true);
+ err = _mmc_suspend(host, MMC_POWEROFF_SUSPEND);
if (!err) {
pm_runtime_disable(&host->card->dev);
pm_runtime_set_suspended(&host->card->dev);
@@ -2187,15 +2213,16 @@ static int mmc_shutdown(struct mmc_host *host)
int err = 0;
/*
- * In a specific case for poweroff notify, we need to resume the card
- * before we can shutdown it properly.
+ * If the card remains suspended at this point and it was done by using
+ * the sleep-cmd (CMD5), we may need to re-initialize it first, to allow
+ * us to send the preferred poweroff-notification cmd at shutdown.
*/
- if (mmc_can_poweroff_notify(host->card) &&
- !(host->caps2 & MMC_CAP2_FULL_PWR_CYCLE))
+ if (mmc_card_can_poweroff_notify(host->card) &&
+ !mmc_host_can_poweroff_notify(host, MMC_POWEROFF_SUSPEND))
err = _mmc_resume(host);
if (!err)
- err = _mmc_suspend(host, false);
+ err = _mmc_suspend(host, MMC_POWEROFF_SHUTDOWN);
return err;
}
@@ -2219,7 +2246,7 @@ static int mmc_runtime_suspend(struct mmc_host *host)
if (!(host->caps & MMC_CAP_AGGRESSIVE_PM))
return 0;
- err = _mmc_suspend(host, true);
+ err = _mmc_suspend(host, MMC_POWEROFF_SUSPEND);
if (err)
pr_err("%s: error %d doing aggressive suspend\n",
mmc_hostname(host), err);
@@ -2242,14 +2269,12 @@ static int mmc_runtime_resume(struct mmc_host *host)
return 0;
}
-static int mmc_can_reset(struct mmc_card *card)
+static bool mmc_card_can_reset(struct mmc_card *card)
{
u8 rst_n_function;
rst_n_function = card->ext_csd.rst_n_function;
- if ((rst_n_function & EXT_CSD_RST_N_EN_MASK) != EXT_CSD_RST_N_ENABLED)
- return 0;
- return 1;
+ return ((rst_n_function & EXT_CSD_RST_N_EN_MASK) == EXT_CSD_RST_N_ENABLED);
}
static int _mmc_hw_reset(struct mmc_host *host)
@@ -2263,7 +2288,7 @@ static int _mmc_hw_reset(struct mmc_host *host)
_mmc_flush_cache(host);
if ((host->caps & MMC_CAP_HW_RESET) && host->ops->card_hw_reset &&
- mmc_can_reset(card)) {
+ mmc_card_can_reset(card)) {
/* If the card accept RST_n signal, send it. */
mmc_set_clock(host, host->f_init);
host->ops->card_hw_reset(host);
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 5c8e62e8f331..66283825513c 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -383,7 +383,7 @@ int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd)
if (!card || !new_ext_csd)
return -EINVAL;
- if (!mmc_can_ext_csd(card))
+ if (!mmc_card_can_ext_csd(card))
return -EOPNOTSUPP;
/*
@@ -944,7 +944,7 @@ out:
return err;
}
-int mmc_can_ext_csd(struct mmc_card *card)
+bool mmc_card_can_ext_csd(struct mmc_card *card)
{
return (card && card->csd.mmca_vsn > CSD_SPEC_VER_3);
}
@@ -1046,7 +1046,7 @@ int mmc_sanitize(struct mmc_card *card, unsigned int timeout_ms)
struct mmc_host *host = card->host;
int err;
- if (!mmc_can_sanitize(card)) {
+ if (!mmc_card_can_sanitize(card)) {
pr_warn("%s: Sanitize not supported\n", mmc_hostname(host));
return -EOPNOTSUPP;
}
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 0df3ebd900d1..514c40ff4b4e 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -37,7 +37,7 @@ int mmc_send_cid(struct mmc_host *host, u32 *cid);
int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp);
int mmc_spi_set_crc(struct mmc_host *host, int use_crc);
int mmc_bus_test(struct mmc_card *card, u8 bus_width);
-int mmc_can_ext_csd(struct mmc_card *card);
+bool mmc_card_can_ext_csd(struct mmc_card *card);
int mmc_switch_status(struct mmc_card *card, bool crc_err_fatal);
bool mmc_prepare_busy_cmd(struct mmc_host *host, struct mmc_command *cmd,
unsigned int timeout_ms);
diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c
index 4f4286b8e0f2..80e5d87a5e50 100644
--- a/drivers/mmc/core/mmc_test.c
+++ b/drivers/mmc/core/mmc_test.c
@@ -191,7 +191,7 @@ static void mmc_test_prepare_sbc(struct mmc_test_card *test,
{
struct mmc_card *card = test->card;
- if (!mrq->sbc || !mmc_host_cmd23(card->host) ||
+ if (!mrq->sbc || !mmc_host_can_cmd23(card->host) ||
!mmc_test_card_cmd23(card) || !mmc_op_multi(mrq->cmd->opcode) ||
(card->quirks & MMC_QUIRK_BLK_NO_CMD23)) {
mrq->sbc = NULL;
@@ -1510,7 +1510,7 @@ static int mmc_test_area_erase(struct mmc_test_card *test)
{
struct mmc_test_area *t = &test->area;
- if (!mmc_can_erase(test->card))
+ if (!mmc_card_can_erase(test->card))
return 0;
return mmc_erase(test->card, t->dev_addr, t->max_sz >> 9,
@@ -1746,10 +1746,10 @@ static int mmc_test_profile_trim_perf(struct mmc_test_card *test)
struct timespec64 ts1, ts2;
int ret;
- if (!mmc_can_trim(test->card))
+ if (!mmc_card_can_trim(test->card))
return RESULT_UNSUP_CARD;
- if (!mmc_can_erase(test->card))
+ if (!mmc_card_can_erase(test->card))
return RESULT_UNSUP_HOST;
for (sz = 512; sz < t->max_sz; sz <<= 1) {
@@ -1863,10 +1863,10 @@ static int mmc_test_profile_seq_trim_perf(struct mmc_test_card *test)
struct timespec64 ts1, ts2;
int ret;
- if (!mmc_can_trim(test->card))
+ if (!mmc_card_can_trim(test->card))
return RESULT_UNSUP_CARD;
- if (!mmc_can_erase(test->card))
+ if (!mmc_card_can_erase(test->card))
return RESULT_UNSUP_HOST;
for (sz = 512; sz <= t->max_sz; sz <<= 1) {
@@ -2114,7 +2114,7 @@ static int mmc_test_rw_multiple(struct mmc_test_card *test,
return 0;
/* prepare test area */
- if (mmc_can_erase(test->card) &&
+ if (mmc_card_can_erase(test->card) &&
tdata->prepare & MMC_TEST_PREP_ERASE) {
ret = mmc_erase(test->card, dev_addr,
size / 512, test->card->erase_arg);
@@ -2390,7 +2390,7 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test,
512, write);
if (use_sbc && t->blocks > 1 && !mrq->sbc) {
- ret = mmc_host_cmd23(host) ?
+ ret = mmc_host_can_cmd23(host) ?
RESULT_UNSUP_CARD :
RESULT_UNSUP_HOST;
goto out_free;
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 3ba62f825b84..284856c8f655 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -184,9 +184,9 @@ static void mmc_queue_setup_discard(struct mmc_card *card,
return;
lim->max_hw_discard_sectors = max_discard;
- if (mmc_can_secure_erase_trim(card))
+ if (mmc_card_can_secure_erase_trim(card))
lim->max_secure_erase_sectors = max_discard;
- if (mmc_can_trim(card) && card->erased_byte == 0)
+ if (mmc_card_can_trim(card) && card->erased_byte == 0)
lim->max_write_zeroes_sectors = max_discard;
/* granularity must not be greater than max. discard */
@@ -352,7 +352,7 @@ static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq,
};
struct gendisk *disk;
- if (mmc_can_erase(card))
+ if (mmc_card_can_erase(card))
mmc_queue_setup_discard(card, &lim);
lim.max_hw_sectors = min(host->max_blk_count, host->max_req_size / 512);
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index 89b512905be1..7f893bafaa60 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -34,6 +34,16 @@ static const struct mmc_fixup __maybe_unused mmc_sd_fixups[] = {
MMC_QUIRK_BROKEN_SD_CACHE | MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY,
EXT_CSD_REV_ANY),
+ /*
+ * Swissbit series S46-u cards throw I/O errors during tuning requests
+ * after the initial tuning request expectedly times out. This has
+ * only been observed on cards manufactured on 01/2019 that are using
+ * Bay Trail host controllers.
+ */
+ _FIXUP_EXT("0016G", CID_MANFID_SWISSBIT, 0x5342, 2019, 1,
+ 0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd,
+ MMC_QUIRK_NO_UHS_DDR50_TUNING, EXT_CSD_REV_ANY),
+
END_FIXUP
};
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 8eba697d3d86..ec02067f03c5 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -455,7 +455,7 @@ static void sd_update_bus_speed_mode(struct mmc_card *card)
* If the host doesn't support any of the UHS-I modes, fallback on
* default speed.
*/
- if (!mmc_host_uhs(card->host)) {
+ if (!mmc_host_can_uhs(card->host)) {
card->sd_bus_speed = 0;
return;
}
@@ -618,6 +618,29 @@ static int sd_set_current_limit(struct mmc_card *card, u8 *status)
}
/*
+ * Determine if the card should tune or not.
+ */
+static bool mmc_sd_use_tuning(struct mmc_card *card)
+{
+ /*
+ * SPI mode doesn't define CMD19 and tuning is only valid for SDR50 and
+ * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
+ */
+ if (mmc_host_is_spi(card->host))
+ return false;
+
+ switch (card->host->ios.timing) {
+ case MMC_TIMING_UHS_SDR50:
+ case MMC_TIMING_UHS_SDR104:
+ return true;
+ case MMC_TIMING_UHS_DDR50:
+ return !mmc_card_no_uhs_ddr50_tuning(card);
+ }
+
+ return false;
+}
+
+/*
* UHS-I specific initialization procedure
*/
static int mmc_sd_init_uhs_card(struct mmc_card *card)
@@ -660,14 +683,7 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
if (err)
goto out;
- /*
- * SPI mode doesn't define CMD19 and tuning is only valid for SDR50 and
- * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
- */
- if (!mmc_host_is_spi(card->host) &&
- (card->host->ios.timing == MMC_TIMING_UHS_SDR50 ||
- card->host->ios.timing == MMC_TIMING_UHS_DDR50 ||
- card->host->ios.timing == MMC_TIMING_UHS_SDR104)) {
+ if (mmc_sd_use_tuning(card)) {
err = mmc_execute_tuning(card);
/*
@@ -851,7 +867,7 @@ try_again:
* to switch to 1.8V signaling level. If the card has failed
* repeatedly to switch however, skip this.
*/
- if (retries && mmc_host_uhs(host))
+ if (retries && mmc_host_can_uhs(host))
ocr |= SD_OCR_S18R;
/*
@@ -1493,7 +1509,7 @@ retry:
* signaling. Detect that situation and try to initialize a UHS-I (1.8V)
* transfer mode.
*/
- if (!v18_fixup_failed && !mmc_host_is_spi(host) && mmc_host_uhs(host) &&
+ if (!v18_fixup_failed && !mmc_host_is_spi(host) && mmc_host_can_uhs(host) &&
mmc_sd_card_using_v18(card) &&
host->ios.signal_voltage != MMC_SIGNAL_VOLTAGE_180) {
if (mmc_host_set_uhs_voltage(host) ||
@@ -1508,7 +1524,7 @@ retry:
}
/* Initialization sequence for UHS-I cards */
- if (rocr & SD_ROCR_S18A && mmc_host_uhs(host)) {
+ if (rocr & SD_ROCR_S18A && mmc_host_can_uhs(host)) {
err = mmc_sd_init_uhs_card(card);
if (err)
goto free_card;
@@ -1597,15 +1613,6 @@ free_card:
}
/*
- * Host is being removed. Free up the current card.
- */
-static void mmc_sd_remove(struct mmc_host *host)
-{
- mmc_remove_card(host->card);
- host->card = NULL;
-}
-
-/*
* Card detection - card is alive.
*/
static int mmc_sd_alive(struct mmc_host *host)
@@ -1630,7 +1637,8 @@ static void mmc_sd_detect(struct mmc_host *host)
mmc_put_card(host->card, NULL);
if (err) {
- mmc_sd_remove(host);
+ mmc_remove_card(host->card);
+ host->card = NULL;
mmc_claim_host(host);
mmc_detach_bus(host);
@@ -1731,6 +1739,19 @@ out:
}
/*
+ * Host is being removed. Free up the current card and do a graceful power-off.
+ */
+static void mmc_sd_remove(struct mmc_host *host)
+{
+ get_device(&host->card->dev);
+ mmc_remove_card(host->card);
+
+ _mmc_sd_suspend(host);
+
+ put_device(&host->card->dev);
+ host->card = NULL;
+}
+/*
* Callback for suspend
*/
static int mmc_sd_suspend(struct mmc_host *host)
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 4b19b8a16b09..0f753367aec1 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -198,7 +198,7 @@ static int sdio_read_cccr(struct mmc_card *card, u32 ocr)
if (ret)
goto out;
- if (mmc_host_uhs(card->host)) {
+ if (mmc_host_can_uhs(card->host)) {
if (data & SDIO_UHS_DDR50)
card->sw_caps.sd3_bus_mode
|= SD_MODE_UHS_DDR50 | SD_MODE_UHS_SDR50
@@ -527,7 +527,7 @@ static int sdio_set_bus_speed_mode(struct mmc_card *card)
* If the host doesn't support any of the UHS-I modes, fallback on
* default speed.
*/
- if (!mmc_host_uhs(card->host))
+ if (!mmc_host_can_uhs(card->host))
return 0;
bus_speed = SDIO_SPEED_SDR12;
@@ -669,7 +669,7 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
WARN_ON(!host->claimed);
/* to query card if 1.8V signalling is supported */
- if (mmc_host_uhs(host))
+ if (mmc_host_can_uhs(host))
ocr |= R4_18V_PRESENT;
try_again:
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 5fd455816393..c5bc6268803e 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -228,13 +228,13 @@ int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config)
}
EXPORT_SYMBOL(mmc_gpiod_set_cd_config);
-bool mmc_can_gpio_cd(struct mmc_host *host)
+bool mmc_host_can_gpio_cd(struct mmc_host *host)
{
struct mmc_gpio *ctx = host->slot.handler_priv;
return ctx->cd_gpio ? true : false;
}
-EXPORT_SYMBOL(mmc_can_gpio_cd);
+EXPORT_SYMBOL(mmc_host_can_gpio_cd);
/**
* mmc_gpiod_request_ro - request a gpio descriptor for write protection
@@ -275,10 +275,10 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
}
EXPORT_SYMBOL(mmc_gpiod_request_ro);
-bool mmc_can_gpio_ro(struct mmc_host *host)
+bool mmc_host_can_gpio_ro(struct mmc_host *host)
{
struct mmc_gpio *ctx = host->slot.handler_priv;
return ctx->ro_gpio ? true : false;
}
-EXPORT_SYMBOL(mmc_can_gpio_ro);
+EXPORT_SYMBOL(mmc_host_can_gpio_ro);
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 264e11fa58ea..c3f0f41a426d 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -250,6 +250,20 @@ config MMC_SDHCI_OF_DWCMSHC
If you have a controller with this interface, say Y or M here.
If unsure, say N.
+config MMC_SDHCI_OF_K1
+ tristate "SDHCI OF support for the SpacemiT K1 SoC"
+ depends on ARCH_SPACEMIT || COMPILE_TEST
+ depends on MMC_SDHCI_PLTFM
+ depends on OF
+ depends on COMMON_CLK
+ help
+ This selects the Secure Digital Host Controller Interface (SDHCI)
+ found in the SpacemiT K1 SoC.
+
+ If you have a controller with this interface, say Y or M here.
+
+ If unsure, say N.
+
config MMC_SDHCI_OF_SPARX5
tristate "SDHCI OF support for the MCHP Sparx5 SoC"
depends on MMC_SDHCI_PLTFM
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 5147467ec825..75bafc7b162b 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -88,6 +88,7 @@ obj-$(CONFIG_MMC_SDHCI_OF_AT91) += sdhci-of-at91.o
obj-$(CONFIG_MMC_SDHCI_OF_ESDHC) += sdhci-of-esdhc.o
obj-$(CONFIG_MMC_SDHCI_OF_HLWD) += sdhci-of-hlwd.o
obj-$(CONFIG_MMC_SDHCI_OF_DWCMSHC) += sdhci-of-dwcmshc.o
+obj-$(CONFIG_MMC_SDHCI_OF_K1) += sdhci-of-k1.o
obj-$(CONFIG_MMC_SDHCI_OF_SPARX5) += sdhci-of-sparx5.o
obj-$(CONFIG_MMC_SDHCI_OF_MA35D1) += sdhci-of-ma35d1.o
obj-$(CONFIG_MMC_SDHCI_BCM_KONA) += sdhci-bcm-kona.o
diff --git a/drivers/mmc/host/alcor.c b/drivers/mmc/host/alcor.c
index b6b6dd677ae5..24abd3a93da9 100644
--- a/drivers/mmc/host/alcor.c
+++ b/drivers/mmc/host/alcor.c
@@ -20,6 +20,7 @@
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
+#include <linux/string_choices.h>
#include <linux/mmc/host.h>
#include <linux/mmc/mmc.h>
@@ -208,7 +209,7 @@ static void alcor_trf_block_pio(struct alcor_sdmmc_host *host, bool read)
len = min(host->sg_miter.length, blksize);
dev_dbg(host->dev, "PIO, %s block size: 0x%zx\n",
- read ? "read" : "write", blksize);
+ str_read_write(read), blksize);
host->sg_miter.consumed = len;
host->blocks--;
diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c
index e5f151d092cd..def054ddd256 100644
--- a/drivers/mmc/host/bcm2835.c
+++ b/drivers/mmc/host/bcm2835.c
@@ -44,6 +44,7 @@
#include <linux/scatterlist.h>
#include <linux/time.h>
#include <linux/workqueue.h>
+#include <linux/string_choices.h>
#include <linux/mmc/host.h>
#include <linux/mmc/mmc.h>
@@ -391,8 +392,7 @@ static void bcm2835_transfer_block_pio(struct bcm2835_host *host, bool is_read)
if (time_after(jiffies, wait_max)) {
dev_err(dev, "PIO %s timeout - EDM %08x\n",
- is_read ? "read" : "write",
- edm);
+ str_read_write(is_read), edm);
hsts = SDHSTS_REW_TIME_OUT;
break;
}
@@ -435,12 +435,12 @@ static void bcm2835_transfer_pio(struct bcm2835_host *host)
SDHSTS_CRC7_ERROR |
SDHSTS_FIFO_ERROR)) {
dev_err(dev, "%s transfer error - HSTS %08x\n",
- is_read ? "read" : "write", sdhsts);
+ str_read_write(is_read), sdhsts);
host->data->error = -EILSEQ;
} else if ((sdhsts & (SDHSTS_CMD_TIME_OUT |
SDHSTS_REW_TIME_OUT))) {
dev_err(dev, "%s timeout error - HSTS %08x\n",
- is_read ? "read" : "write", sdhsts);
+ str_read_write(is_read), sdhsts);
host->data->error = -ETIMEDOUT;
}
}
diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c
index 2e2ff984f0b3..1373deb3f531 100644
--- a/drivers/mmc/host/cavium-thunderx.c
+++ b/drivers/mmc/host/cavium-thunderx.c
@@ -72,7 +72,7 @@ static int thunder_mmc_probe(struct pci_dev *pdev,
if (ret)
return ret;
- ret = pci_request_regions(pdev, KBUILD_MODNAME);
+ ret = pcim_request_all_regions(pdev, KBUILD_MODNAME);
if (ret)
return ret;
@@ -164,7 +164,6 @@ error:
}
}
clk_disable_unprepare(host->clk);
- pci_release_regions(pdev);
return ret;
}
@@ -183,7 +182,6 @@ static void thunder_mmc_remove(struct pci_dev *pdev)
writeq(dma_cfg, host->dma_base + MIO_EMM_DMA_CFG(host));
clk_disable_unprepare(host->clk);
- pci_release_regions(pdev);
}
static const struct pci_device_id thunder_mmc_id_table[] = {
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 578290015e5b..2bfcc47dcf3e 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -3622,7 +3622,7 @@ int dw_mci_runtime_suspend(struct device *dev)
clk_disable_unprepare(host->ciu_clk);
if (host->slot &&
- (mmc_can_gpio_cd(host->slot->mmc) ||
+ (mmc_host_can_gpio_cd(host->slot->mmc) ||
!mmc_card_is_removable(host->slot->mmc)))
clk_disable_unprepare(host->biu_clk);
@@ -3636,7 +3636,7 @@ int dw_mci_runtime_resume(struct device *dev)
struct dw_mci *host = dev_get_drvdata(dev);
if (host->slot &&
- (mmc_can_gpio_cd(host->slot->mmc) ||
+ (mmc_host_can_gpio_cd(host->slot->mmc) ||
!mmc_card_is_removable(host->slot->mmc))) {
ret = clk_prepare_enable(host->biu_clk);
if (ret)
@@ -3690,7 +3690,7 @@ int dw_mci_runtime_resume(struct device *dev)
err:
if (host->slot &&
- (mmc_can_gpio_cd(host->slot->mmc) ||
+ (mmc_host_can_gpio_cd(host->slot->mmc) ||
!mmc_card_is_removable(host->slot->mmc)))
clk_disable_unprepare(host->biu_clk);
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 345ea91629e0..31eb90536bce 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -5,6 +5,7 @@
*/
#include <linux/module.h>
+#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/clk.h>
#include <linux/delay.h>
@@ -83,6 +84,7 @@
#define EMMC51_CFG0 0x204
#define EMMC50_CFG0 0x208
#define EMMC50_CFG1 0x20c
+#define EMMC50_CFG2 0x21c
#define EMMC50_CFG3 0x220
#define SDC_FIFO_CFG 0x228
#define CQHCI_SETTING 0x7fc
@@ -233,7 +235,9 @@
/* MSDC_PATCH_BIT mask */
#define MSDC_PATCH_BIT_ODDSUPP BIT(1) /* RW */
+#define MSDC_PATCH_BIT_DIS_WRMON BIT(2) /* RW */
#define MSDC_PATCH_BIT_RD_DAT_SEL BIT(3) /* RW */
+#define MSDC_PATCH_BIT_DESCUP_SEL BIT(6) /* RW */
#define MSDC_INT_DAT_LATCH_CK_SEL GENMASK(9, 7)
#define MSDC_CKGEN_MSDC_DLY_SEL GENMASK(14, 10)
#define MSDC_PATCH_BIT_IODSSEL BIT(16) /* RW */
@@ -246,10 +250,22 @@
#define MSDC_PATCH_BIT_SPCPUSH BIT(29) /* RW */
#define MSDC_PATCH_BIT_DECRCTMO BIT(30) /* RW */
-#define MSDC_PATCH_BIT1_CMDTA GENMASK(5, 3) /* RW */
+/* MSDC_PATCH_BIT1 mask */
+#define MSDC_PB1_WRDAT_CRC_TACNTR GENMASK(2, 0) /* RW */
+#define MSDC_PATCH_BIT1_CMDTA GENMASK(5, 3) /* RW */
#define MSDC_PB1_BUSY_CHECK_SEL BIT(7) /* RW */
#define MSDC_PATCH_BIT1_STOP_DLY GENMASK(11, 8) /* RW */
-
+#define MSDC_PB1_DDR_CMD_FIX_SEL BIT(14) /* RW */
+#define MSDC_PB1_SINGLE_BURST BIT(16) /* RW */
+#define MSDC_PB1_RSVD20 GENMASK(18, 17) /* RW */
+#define MSDC_PB1_AUTO_SYNCST_CLR BIT(19) /* RW */
+#define MSDC_PB1_MARK_POP_WATER BIT(20) /* RW */
+#define MSDC_PB1_LP_DCM_EN BIT(21) /* RW */
+#define MSDC_PB1_RSVD3 BIT(22) /* RW */
+#define MSDC_PB1_AHB_GDMA_HCLK BIT(23) /* RW */
+#define MSDC_PB1_MSDC_CLK_ENFEAT GENMASK(31, 24) /* RW */
+
+/* MSDC_PATCH_BIT2 mask */
#define MSDC_PATCH_BIT2_CFGRESP BIT(15) /* RW */
#define MSDC_PATCH_BIT2_CFGCRCSTS BIT(28) /* RW */
#define MSDC_PB2_SUPPORT_64G BIT(1) /* RW */
@@ -291,7 +307,10 @@
/* EMMC50_CFG1 mask */
#define EMMC50_CFG1_DS_CFG BIT(28) /* RW */
-#define EMMC50_CFG3_OUTS_WR GENMASK(4, 0) /* RW */
+/* EMMC50_CFG2 mask */
+#define EMMC50_CFG2_AXI_SET_LEN GENMASK(27, 24) /* RW */
+
+#define EMMC50_CFG3_OUTS_WR GENMASK(4, 0) /* RW */
#define SDC_FIFO_CFG_WRVALIDSEL BIT(24) /* RW */
#define SDC_FIFO_CFG_RDVALIDSEL BIT(25) /* RW */
@@ -927,15 +946,15 @@ static int msdc_ungate_clock(struct msdc_host *host)
static void msdc_new_tx_setting(struct msdc_host *host)
{
+ u32 val;
+
if (!host->top_base)
return;
- sdr_set_bits(host->top_base + LOOP_TEST_CONTROL,
- TEST_LOOP_DSCLK_MUX_SEL);
- sdr_set_bits(host->top_base + LOOP_TEST_CONTROL,
- TEST_LOOP_LATCH_MUX_SEL);
- sdr_clr_bits(host->top_base + LOOP_TEST_CONTROL,
- TEST_HS400_CMD_LOOP_MUX_SEL);
+ val = readl(host->top_base + LOOP_TEST_CONTROL);
+ val |= TEST_LOOP_DSCLK_MUX_SEL;
+ val |= TEST_LOOP_LATCH_MUX_SEL;
+ val &= ~TEST_HS400_CMD_LOOP_MUX_SEL;
switch (host->timing) {
case MMC_TIMING_LEGACY:
@@ -945,19 +964,18 @@ static void msdc_new_tx_setting(struct msdc_host *host)
case MMC_TIMING_UHS_SDR25:
case MMC_TIMING_UHS_DDR50:
case MMC_TIMING_MMC_DDR52:
- sdr_clr_bits(host->top_base + LOOP_TEST_CONTROL,
- LOOP_EN_SEL_CLK);
+ val &= ~LOOP_EN_SEL_CLK;
break;
case MMC_TIMING_UHS_SDR50:
case MMC_TIMING_UHS_SDR104:
case MMC_TIMING_MMC_HS200:
case MMC_TIMING_MMC_HS400:
- sdr_set_bits(host->top_base + LOOP_TEST_CONTROL,
- LOOP_EN_SEL_CLK);
+ val |= LOOP_EN_SEL_CLK;
break;
default:
break;
}
+ writel(val, host->top_base + LOOP_TEST_CONTROL);
}
static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
@@ -1816,7 +1834,7 @@ static irqreturn_t msdc_irq(int irq, void *dev_id)
static void msdc_init_hw(struct msdc_host *host)
{
- u32 val;
+ u32 val, pb1_val, pb2_val;
u32 tune_reg = host->dev_comp->pad_tune_reg;
struct mmc_host *mmc = mmc_from_priv(host);
@@ -1869,71 +1887,115 @@ static void msdc_init_hw(struct msdc_host *host)
}
writel(0, host->base + MSDC_IOCON);
sdr_set_field(host->base + MSDC_IOCON, MSDC_IOCON_DDLSEL, 0);
- writel(0x403c0046, host->base + MSDC_PATCH_BIT);
- sdr_set_field(host->base + MSDC_PATCH_BIT, MSDC_CKGEN_MSDC_DLY_SEL, 1);
- writel(0xffff4089, host->base + MSDC_PATCH_BIT1);
- sdr_set_bits(host->base + EMMC50_CFG0, EMMC50_CFG_CFCSTS_SEL);
+
+ /*
+ * Patch bit 0 and 1 are completely rewritten, but for patch bit 2
+ * defaults are retained and, if necessary, only some bits are fixed
+ * up: read the PB2 register here for later usage in this function.
+ */
+ pb2_val = readl(host->base + MSDC_PATCH_BIT2);
+
+ /* Enable odd number support for 8-bit data bus */
+ val = MSDC_PATCH_BIT_ODDSUPP;
+
+ /* Disable SD command register write monitor */
+ val |= MSDC_PATCH_BIT_DIS_WRMON;
+
+ /* Issue transfer done interrupt after GPD update */
+ val |= MSDC_PATCH_BIT_DESCUP_SEL;
+
+ /* Extend R1B busy detection delay (in clock cycles) */
+ val |= FIELD_PREP(MSDC_PATCH_BIT_BUSYDLY, 15);
+
+ /* Enable CRC phase timeout during data write operation */
+ val |= MSDC_PATCH_BIT_DECRCTMO;
+
+ /* Set CKGEN delay to one stage */
+ val |= FIELD_PREP(MSDC_CKGEN_MSDC_DLY_SEL, 1);
+
+ /* First MSDC_PATCH_BIT setup is done: pull the trigger! */
+ writel(val, host->base + MSDC_PATCH_BIT);
+
+ /* Set wr data, crc status, cmd response turnaround period for UHS104 */
+ pb1_val = FIELD_PREP(MSDC_PB1_WRDAT_CRC_TACNTR, 1);
+ pb1_val |= FIELD_PREP(MSDC_PATCH_BIT1_CMDTA, 1);
+ pb1_val |= MSDC_PB1_DDR_CMD_FIX_SEL;
+
+ /* Support 'single' burst type only when AXI_LEN is 0 */
+ sdr_get_field(host->base + EMMC50_CFG2, EMMC50_CFG2_AXI_SET_LEN, &val);
+ if (!val)
+ pb1_val |= MSDC_PB1_SINGLE_BURST;
+
+ /* Set auto sync state clear, block gap stop clk */
+ pb1_val |= MSDC_PB1_RSVD20 | MSDC_PB1_AUTO_SYNCST_CLR | MSDC_PB1_MARK_POP_WATER;
+
+ /* Set low power DCM, use HCLK for GDMA, use MSDC CLK for everything else */
+ pb1_val |= MSDC_PB1_LP_DCM_EN | MSDC_PB1_RSVD3 |
+ MSDC_PB1_AHB_GDMA_HCLK | MSDC_PB1_MSDC_CLK_ENFEAT;
+
+ /* If needed, enable R1b command busy check at controller init time */
+ if (!host->dev_comp->busy_check)
+ pb1_val |= MSDC_PB1_BUSY_CHECK_SEL;
if (host->dev_comp->stop_clk_fix) {
if (host->dev_comp->stop_dly_sel)
- sdr_set_field(host->base + MSDC_PATCH_BIT1,
- MSDC_PATCH_BIT1_STOP_DLY,
- host->dev_comp->stop_dly_sel);
+ pb1_val |= FIELD_PREP(MSDC_PATCH_BIT1_STOP_DLY,
+ host->dev_comp->stop_dly_sel);
- if (host->dev_comp->pop_en_cnt)
- sdr_set_field(host->base + MSDC_PATCH_BIT2,
- MSDC_PB2_POP_EN_CNT,
- host->dev_comp->pop_en_cnt);
+ if (host->dev_comp->pop_en_cnt) {
+ pb2_val &= ~MSDC_PB2_POP_EN_CNT;
+ pb2_val |= FIELD_PREP(MSDC_PB2_POP_EN_CNT,
+ host->dev_comp->pop_en_cnt);
+ }
- sdr_clr_bits(host->base + SDC_FIFO_CFG,
- SDC_FIFO_CFG_WRVALIDSEL);
- sdr_clr_bits(host->base + SDC_FIFO_CFG,
- SDC_FIFO_CFG_RDVALIDSEL);
+ sdr_clr_bits(host->base + SDC_FIFO_CFG, SDC_FIFO_CFG_WRVALIDSEL);
+ sdr_clr_bits(host->base + SDC_FIFO_CFG, SDC_FIFO_CFG_RDVALIDSEL);
}
- if (host->dev_comp->busy_check)
- sdr_clr_bits(host->base + MSDC_PATCH_BIT1, BIT(7));
-
if (host->dev_comp->async_fifo) {
- sdr_set_field(host->base + MSDC_PATCH_BIT2,
- MSDC_PB2_RESPWAIT, 3);
- if (host->dev_comp->enhance_rx) {
- if (host->top_base)
- sdr_set_bits(host->top_base + EMMC_TOP_CONTROL,
- SDC_RX_ENH_EN);
- else
- sdr_set_bits(host->base + SDC_ADV_CFG0,
- SDC_RX_ENHANCE_EN);
+ /* Set CMD response timeout multiplier to 65 + (16 * 3) cycles */
+ pb2_val &= ~MSDC_PB2_RESPWAIT;
+ pb2_val |= FIELD_PREP(MSDC_PB2_RESPWAIT, 3);
+
+ /* eMMC4.5: Select async FIFO path for CMD resp and CRC status */
+ pb2_val &= ~MSDC_PATCH_BIT2_CFGRESP;
+ pb2_val |= MSDC_PATCH_BIT2_CFGCRCSTS;
+
+ if (!host->dev_comp->enhance_rx) {
+ /* eMMC4.5: Delay 2T for CMD resp and CRC status EN signals */
+ pb2_val &= ~(MSDC_PB2_RESPSTSENSEL | MSDC_PB2_CRCSTSENSEL);
+ pb2_val |= FIELD_PREP(MSDC_PB2_RESPSTSENSEL, 2);
+ pb2_val |= FIELD_PREP(MSDC_PB2_CRCSTSENSEL, 2);
+ } else if (host->top_base) {
+ sdr_set_bits(host->top_base + EMMC_TOP_CONTROL, SDC_RX_ENH_EN);
} else {
- sdr_set_field(host->base + MSDC_PATCH_BIT2,
- MSDC_PB2_RESPSTSENSEL, 2);
- sdr_set_field(host->base + MSDC_PATCH_BIT2,
- MSDC_PB2_CRCSTSENSEL, 2);
+ sdr_set_bits(host->base + SDC_ADV_CFG0, SDC_RX_ENHANCE_EN);
}
- /* use async fifo, then no need tune internal delay */
- sdr_clr_bits(host->base + MSDC_PATCH_BIT2,
- MSDC_PATCH_BIT2_CFGRESP);
- sdr_set_bits(host->base + MSDC_PATCH_BIT2,
- MSDC_PATCH_BIT2_CFGCRCSTS);
}
if (host->dev_comp->support_64g)
- sdr_set_bits(host->base + MSDC_PATCH_BIT2,
- MSDC_PB2_SUPPORT_64G);
+ pb2_val |= MSDC_PB2_SUPPORT_64G;
+
+ /* Patch Bit 1/2 setup is done: pull the trigger! */
+ writel(pb1_val, host->base + MSDC_PATCH_BIT1);
+ writel(pb2_val, host->base + MSDC_PATCH_BIT2);
+ sdr_set_bits(host->base + EMMC50_CFG0, EMMC50_CFG_CFCSTS_SEL);
+
if (host->dev_comp->data_tune) {
if (host->top_base) {
- sdr_set_bits(host->top_base + EMMC_TOP_CONTROL,
- PAD_DAT_RD_RXDLY_SEL);
- sdr_clr_bits(host->top_base + EMMC_TOP_CONTROL,
- DATA_K_VALUE_SEL);
- sdr_set_bits(host->top_base + EMMC_TOP_CMD,
- PAD_CMD_RD_RXDLY_SEL);
+ u32 top_ctl_val = readl(host->top_base + EMMC_TOP_CONTROL);
+ u32 top_cmd_val = readl(host->top_base + EMMC_TOP_CMD);
+
+ top_cmd_val |= PAD_CMD_RD_RXDLY_SEL;
+ top_ctl_val |= PAD_DAT_RD_RXDLY_SEL;
+ top_ctl_val &= ~DATA_K_VALUE_SEL;
if (host->tuning_step > PAD_DELAY_HALF) {
- sdr_set_bits(host->top_base + EMMC_TOP_CONTROL,
- PAD_DAT_RD_RXDLY2_SEL);
- sdr_set_bits(host->top_base + EMMC_TOP_CMD,
- PAD_CMD_RD_RXDLY2_SEL);
+ top_cmd_val |= PAD_CMD_RD_RXDLY2_SEL;
+ top_ctl_val |= PAD_DAT_RD_RXDLY2_SEL;
}
+
+ writel(top_ctl_val, host->top_base + EMMC_TOP_CONTROL);
+ writel(top_cmd_val, host->top_base + EMMC_TOP_CMD);
} else {
sdr_set_bits(host->base + tune_reg,
MSDC_PAD_TUNE_RD_SEL |
@@ -2143,15 +2205,17 @@ static inline void msdc_set_cmd_delay(struct msdc_host *host, u32 value)
u32 tune_reg = host->dev_comp->pad_tune_reg;
if (host->top_base) {
+ u32 regval = readl(host->top_base + EMMC_TOP_CMD);
+
+ regval &= ~(PAD_CMD_RXDLY | PAD_CMD_RXDLY2);
+
if (value < PAD_DELAY_HALF) {
- sdr_set_field(host->top_base + EMMC_TOP_CMD, PAD_CMD_RXDLY, value);
- sdr_set_field(host->top_base + EMMC_TOP_CMD, PAD_CMD_RXDLY2, 0);
+ regval |= FIELD_PREP(PAD_CMD_RXDLY, value);
} else {
- sdr_set_field(host->top_base + EMMC_TOP_CMD, PAD_CMD_RXDLY,
- PAD_DELAY_HALF - 1);
- sdr_set_field(host->top_base + EMMC_TOP_CMD, PAD_CMD_RXDLY2,
- value - PAD_DELAY_HALF);
+ regval |= FIELD_PREP(PAD_CMD_RXDLY, PAD_DELAY_HALF - 1);
+ regval |= FIELD_PREP(PAD_CMD_RXDLY2, value - PAD_DELAY_HALF);
}
+ writel(regval, host->top_base + EMMC_TOP_CMD);
} else {
if (value < PAD_DELAY_HALF) {
sdr_set_field(host->base + tune_reg, MSDC_PAD_TUNE_CMDRDLY, value);
@@ -2171,17 +2235,18 @@ static inline void msdc_set_data_delay(struct msdc_host *host, u32 value)
u32 tune_reg = host->dev_comp->pad_tune_reg;
if (host->top_base) {
+ u32 regval = readl(host->top_base + EMMC_TOP_CONTROL);
+
+ regval &= ~(PAD_DAT_RD_RXDLY | PAD_DAT_RD_RXDLY2);
+
if (value < PAD_DELAY_HALF) {
- sdr_set_field(host->top_base + EMMC_TOP_CONTROL,
- PAD_DAT_RD_RXDLY, value);
- sdr_set_field(host->top_base + EMMC_TOP_CONTROL,
- PAD_DAT_RD_RXDLY2, 0);
+ regval |= FIELD_PREP(PAD_DAT_RD_RXDLY, value);
+ regval |= FIELD_PREP(PAD_DAT_RD_RXDLY2, value);
} else {
- sdr_set_field(host->top_base + EMMC_TOP_CONTROL,
- PAD_DAT_RD_RXDLY, PAD_DELAY_HALF - 1);
- sdr_set_field(host->top_base + EMMC_TOP_CONTROL,
- PAD_DAT_RD_RXDLY2, value - PAD_DELAY_HALF);
+ regval |= FIELD_PREP(PAD_DAT_RD_RXDLY, PAD_DELAY_HALF - 1);
+ regval |= FIELD_PREP(PAD_DAT_RD_RXDLY2, value - PAD_DELAY_HALF);
}
+ writel(regval, host->top_base + EMMC_TOP_CONTROL);
} else {
if (value < PAD_DELAY_HALF) {
sdr_set_field(host->base + tune_reg, MSDC_PAD_TUNE_DATRRDLY, value);
@@ -2977,7 +3042,7 @@ static int msdc_drv_probe(struct platform_device *pdev)
mmc->f_min = DIV_ROUND_UP(host->src_clk_freq, 4 * 4095);
if (!(mmc->caps & MMC_CAP_NONREMOVABLE) &&
- !mmc_can_gpio_cd(mmc) &&
+ !mmc_host_can_gpio_cd(mmc) &&
host->dev_comp->use_internal_cd) {
/*
* Is removable but no GPIO declared, so
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index 8c83e203c516..e6fa3ed42560 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -1112,7 +1112,7 @@ int renesas_sdhi_probe(struct platform_device *pdev,
host->mmc->caps2 &= ~(MMC_CAP2_HS400 | MMC_CAP2_HS400_ES);
/* For some SoC, we disable internal WP. GPIO may override this */
- if (mmc_can_gpio_ro(host->mmc))
+ if (mmc_host_can_gpio_ro(host->mmc))
mmc_data->capabilities2 &= ~MMC_CAP2_NO_WRITE_PROTECT;
/* SDR speeds are only available on Gen2+ */
@@ -1166,12 +1166,7 @@ int renesas_sdhi_probe(struct platform_device *pdev,
if (ret)
goto efree;
- rcfg.of_node = of_get_child_by_name(dev->of_node, "vqmmc-regulator");
- if (!of_device_is_available(rcfg.of_node)) {
- of_node_put(rcfg.of_node);
- rcfg.of_node = NULL;
- }
-
+ rcfg.of_node = of_get_available_child_by_name(dev->of_node, "vqmmc-regulator");
if (rcfg.of_node) {
rcfg.driver_data = priv->host;
rdev = devm_regulator_register(dev, &renesas_sdhi_vqmmc_regulator, &rcfg);
@@ -1240,15 +1235,10 @@ int renesas_sdhi_probe(struct platform_device *pdev,
sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, host->sdcard_irq_mask_all);
- num_irqs = platform_irq_count(pdev);
- if (num_irqs < 0) {
- ret = num_irqs;
- goto edisclk;
- }
-
/* There must be at least one IRQ source */
- if (!num_irqs) {
- ret = -ENXIO;
+ num_irqs = platform_irq_count(pdev);
+ if (num_irqs <= 0) {
+ ret = num_irqs ?: -ENOENT;
goto edisclk;
}
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index ff78a7c6a04c..ac187a8798b7 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -31,7 +31,9 @@
#include "cqhci.h"
#define ESDHC_SYS_CTRL_DTOCV_MASK GENMASK(19, 16)
+#define ESDHC_SYS_CTRL_RST_FIFO BIT(22)
#define ESDHC_SYS_CTRL_IPP_RST_N BIT(23)
+#define ESDHC_SYS_CTRL_RESET_TUNING BIT(28)
#define ESDHC_CTRL_D3CD 0x08
#define ESDHC_BURST_LEN_EN_INCR (1 << 27)
/* VENDOR SPEC register */
@@ -81,7 +83,11 @@
#define ESDHC_TUNE_CTRL_STEP 1
#define ESDHC_TUNE_CTRL_MIN 0
#define ESDHC_TUNE_CTRL_MAX ((1 << 7) - 1)
-
+#define ESDHC_TUNE_CTRL_STATUS_TAP_SEL_MASK GENMASK(30, 16)
+#define ESDHC_TUNE_CTRL_STATUS_TAP_SEL_PRE_MASK GENMASK(30, 24)
+#define ESDHC_TUNE_CTRL_STATUS_DLY_CELL_SET_PRE_MASK GENMASK(14, 8)
+#define ESDHC_TUNE_CTRL_STATUS_DLY_CELL_SET_OUT_MASK GENMASK(7, 4)
+#define ESDHC_TUNE_CTRL_STATUS_DLY_CELL_SET_POST_MASK GENMASK(3, 0)
/* strobe dll register */
#define ESDHC_STROBE_DLL_CTRL 0x70
#define ESDHC_STROBE_DLL_CTRL_ENABLE (1 << 0)
@@ -104,6 +110,7 @@
#define ESDHC_TUNING_CTRL 0xcc
#define ESDHC_STD_TUNING_EN (1 << 24)
+#define ESDHC_TUNING_WINDOW_MASK GENMASK(22, 20)
/* NOTE: the minimum valid tuning start tap for mx6sl is 1 */
#define ESDHC_TUNING_START_TAP_DEFAULT 0x1
#define ESDHC_TUNING_START_TAP_MASK 0x7f
@@ -205,6 +212,8 @@
/* The IP does not have GPIO CD wake capabilities */
#define ESDHC_FLAG_SKIP_CD_WAKE BIT(18)
+#define ESDHC_AUTO_TUNING_WINDOW 3
+
enum wp_types {
ESDHC_WP_NONE, /* no WP, neither controller nor gpio */
ESDHC_WP_CONTROLLER, /* mmc controller internal WP */
@@ -235,6 +244,8 @@ struct esdhc_platform_data {
unsigned int tuning_step; /* The delay cell steps in tuning procedure */
unsigned int tuning_start_tap; /* The start delay cell point in tuning procedure */
unsigned int strobe_dll_delay_target; /* The delay cell for strobe pad (read clock) */
+ unsigned int saved_tuning_delay_cell; /* save the value of tuning delay cell */
+ unsigned int saved_auto_tuning_window; /* save the auto tuning window width */
};
struct esdhc_soc_data {
@@ -264,35 +275,35 @@ static const struct esdhc_soc_data usdhc_imx6q_data = {
};
static const struct esdhc_soc_data usdhc_imx6sl_data = {
- .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
+ .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_MAN_TUNING
| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_ERR004536
| ESDHC_FLAG_HS200
| ESDHC_FLAG_BROKEN_AUTO_CMD23,
};
static const struct esdhc_soc_data usdhc_imx6sll_data = {
- .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
+ .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_MAN_TUNING
| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
| ESDHC_FLAG_HS400
| ESDHC_FLAG_STATE_LOST_IN_LPMODE,
};
static const struct esdhc_soc_data usdhc_imx6sx_data = {
- .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
+ .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_MAN_TUNING
| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
| ESDHC_FLAG_STATE_LOST_IN_LPMODE
| ESDHC_FLAG_BROKEN_AUTO_CMD23,
};
static const struct esdhc_soc_data usdhc_imx6ull_data = {
- .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
+ .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_MAN_TUNING
| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
| ESDHC_FLAG_ERR010450
| ESDHC_FLAG_STATE_LOST_IN_LPMODE,
};
static const struct esdhc_soc_data usdhc_imx7d_data = {
- .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
+ .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_MAN_TUNING
| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
| ESDHC_FLAG_HS400
| ESDHC_FLAG_STATE_LOST_IN_LPMODE
@@ -308,7 +319,7 @@ static struct esdhc_soc_data usdhc_s32g2_data = {
};
static struct esdhc_soc_data usdhc_imx7ulp_data = {
- .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
+ .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_MAN_TUNING
| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
| ESDHC_FLAG_PMQOS | ESDHC_FLAG_HS400
| ESDHC_FLAG_STATE_LOST_IN_LPMODE,
@@ -321,7 +332,7 @@ static struct esdhc_soc_data usdhc_imxrt1050_data = {
};
static struct esdhc_soc_data usdhc_imx8qxp_data = {
- .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
+ .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_MAN_TUNING
| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
| ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES
| ESDHC_FLAG_STATE_LOST_IN_LPMODE
@@ -330,7 +341,7 @@ static struct esdhc_soc_data usdhc_imx8qxp_data = {
};
static struct esdhc_soc_data usdhc_imx8mm_data = {
- .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
+ .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_MAN_TUNING
| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
| ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES
| ESDHC_FLAG_STATE_LOST_IN_LPMODE,
@@ -870,6 +881,11 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg)
esdhc_clrset_le(host, mask, new_val, reg);
return;
+ case SDHCI_TIMEOUT_CONTROL:
+ esdhc_clrset_le(host, ESDHC_SYS_CTRL_DTOCV_MASK,
+ FIELD_PREP(ESDHC_SYS_CTRL_DTOCV_MASK, val),
+ ESDHC_SYSTEM_CONTROL);
+ return;
case SDHCI_SOFTWARE_RESET:
if (val & SDHCI_RESET_DATA)
new_val = readl(host->ioaddr + SDHCI_HOST_CONTROL);
@@ -1057,7 +1073,7 @@ static void esdhc_reset_tuning(struct sdhci_host *host)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
- u32 ctrl;
+ u32 ctrl, tuning_ctrl, sys_ctrl;
int ret;
/* Reset the tuning circuit */
@@ -1071,6 +1087,21 @@ static void esdhc_reset_tuning(struct sdhci_host *host)
writel(0, host->ioaddr + ESDHC_TUNE_CTRL_STATUS);
} else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) {
writel(ctrl, host->ioaddr + ESDHC_MIX_CTRL);
+ /*
+ * enable the std tuning just in case it cleared in
+ * sdhc_esdhc_tuning_restore.
+ */
+ tuning_ctrl = readl(host->ioaddr + ESDHC_TUNING_CTRL);
+ if (!(tuning_ctrl & ESDHC_STD_TUNING_EN)) {
+ tuning_ctrl |= ESDHC_STD_TUNING_EN;
+ writel(tuning_ctrl, host->ioaddr + ESDHC_TUNING_CTRL);
+ }
+
+ /* set the reset tuning bit */
+ sys_ctrl = readl(host->ioaddr + ESDHC_SYSTEM_CONTROL);
+ sys_ctrl |= ESDHC_SYS_CTRL_RESET_TUNING;
+ writel(sys_ctrl, host->ioaddr + ESDHC_SYSTEM_CONTROL);
+
ctrl = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS);
ctrl &= ~ESDHC_MIX_CTRL_SMPCLK_SEL;
ctrl &= ~ESDHC_MIX_CTRL_EXE_TUNE;
@@ -1130,7 +1161,7 @@ static int usdhc_execute_tuning(struct mmc_host *mmc, u32 opcode)
static void esdhc_prepare_tuning(struct sdhci_host *host, u32 val)
{
- u32 reg;
+ u32 reg, sys_ctrl;
u8 sw_rst;
int ret;
@@ -1149,10 +1180,21 @@ static void esdhc_prepare_tuning(struct sdhci_host *host, u32 val)
reg |= ESDHC_MIX_CTRL_EXE_TUNE | ESDHC_MIX_CTRL_SMPCLK_SEL |
ESDHC_MIX_CTRL_FBCLK_SEL;
writel(reg, host->ioaddr + ESDHC_MIX_CTRL);
- writel(val << 8, host->ioaddr + ESDHC_TUNE_CTRL_STATUS);
+ writel(FIELD_PREP(ESDHC_TUNE_CTRL_STATUS_DLY_CELL_SET_PRE_MASK, val),
+ host->ioaddr + ESDHC_TUNE_CTRL_STATUS);
dev_dbg(mmc_dev(host->mmc),
"tuning with delay 0x%x ESDHC_TUNE_CTRL_STATUS 0x%x\n",
val, readl(host->ioaddr + ESDHC_TUNE_CTRL_STATUS));
+
+ /* set RST_FIFO to reset the async FIFO, and wat it to self-clear */
+ sys_ctrl = readl(host->ioaddr + ESDHC_SYSTEM_CONTROL);
+ sys_ctrl |= ESDHC_SYS_CTRL_RST_FIFO;
+ writel(sys_ctrl, host->ioaddr + ESDHC_SYSTEM_CONTROL);
+ ret = readl_poll_timeout(host->ioaddr + ESDHC_SYSTEM_CONTROL, sys_ctrl,
+ !(sys_ctrl & ESDHC_SYS_CTRL_RST_FIFO), 10, 100);
+ if (ret == -ETIMEDOUT)
+ dev_warn(mmc_dev(host->mmc),
+ "warning! RST_FIFO not clear in 100us\n");
}
static void esdhc_post_tuning(struct sdhci_host *host)
@@ -1172,9 +1214,10 @@ static int esdhc_executing_tuning(struct sdhci_host *host, u32 opcode)
{
int min, max, avg, ret;
int win_length, target_min, target_max, target_win_length;
+ u32 clk_tune_ctrl_status, temp;
- min = ESDHC_TUNE_CTRL_MIN;
- max = ESDHC_TUNE_CTRL_MIN;
+ min = target_min = ESDHC_TUNE_CTRL_MIN;
+ max = target_max = ESDHC_TUNE_CTRL_MIN;
target_win_length = 0;
while (max < ESDHC_TUNE_CTRL_MAX) {
/* find the mininum delay first which can pass tuning */
@@ -1211,6 +1254,30 @@ static int esdhc_executing_tuning(struct sdhci_host *host, u32 opcode)
/* use average delay to get the best timing */
avg = (target_min + target_max) / 2;
esdhc_prepare_tuning(host, avg);
+
+ /*
+ * adjust the delay according to tuning window, make preparation
+ * for the auto-tuning logic. According to hardware suggest, need
+ * to config the auto tuning window width to 3, to make the auto
+ * tuning logic have enough space to handle the sample point shift
+ * caused by temperature change.
+ */
+ clk_tune_ctrl_status = FIELD_PREP(ESDHC_TUNE_CTRL_STATUS_DLY_CELL_SET_PRE_MASK,
+ avg - ESDHC_AUTO_TUNING_WINDOW) |
+ FIELD_PREP(ESDHC_TUNE_CTRL_STATUS_DLY_CELL_SET_OUT_MASK,
+ ESDHC_AUTO_TUNING_WINDOW) |
+ FIELD_PREP(ESDHC_TUNE_CTRL_STATUS_DLY_CELL_SET_POST_MASK,
+ ESDHC_AUTO_TUNING_WINDOW);
+
+ writel(clk_tune_ctrl_status, host->ioaddr + ESDHC_TUNE_CTRL_STATUS);
+ ret = readl_poll_timeout(host->ioaddr + ESDHC_TUNE_CTRL_STATUS, temp,
+ clk_tune_ctrl_status ==
+ FIELD_GET(ESDHC_TUNE_CTRL_STATUS_TAP_SEL_MASK, temp),
+ 1, 10);
+ if (ret == -ETIMEDOUT)
+ dev_warn(mmc_dev(host->mmc),
+ "clock tuning control status not set in 10us\n");
+
ret = mmc_send_tuning(host->mmc, opcode, NULL);
esdhc_post_tuning(host);
@@ -1385,17 +1452,6 @@ static unsigned int esdhc_get_max_timeout_count(struct sdhci_host *host)
return esdhc_is_usdhc(imx_data) ? 1 << 29 : 1 << 27;
}
-static void esdhc_set_timeout(struct sdhci_host *host, struct mmc_command *cmd)
-{
- struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
- struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
-
- /* use maximum timeout counter */
- esdhc_clrset_le(host, ESDHC_SYS_CTRL_DTOCV_MASK,
- esdhc_is_usdhc(imx_data) ? 0xF0000 : 0xE0000,
- ESDHC_SYSTEM_CONTROL);
-}
-
static u32 esdhc_cqhci_irq(struct sdhci_host *host, u32 intmask)
{
int cmd_error = 0;
@@ -1432,7 +1488,6 @@ static struct sdhci_ops sdhci_esdhc_ops = {
.get_min_clock = esdhc_pltfm_get_min_clock,
.get_max_timeout_count = esdhc_get_max_timeout_count,
.get_ro = esdhc_pltfm_get_ro,
- .set_timeout = esdhc_set_timeout,
.set_bus_width = esdhc_pltfm_set_bus_width,
.set_uhs_signaling = esdhc_set_uhs_signaling,
.reset = esdhc_reset,
@@ -1529,6 +1584,16 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host)
<< ESDHC_TUNING_STEP_SHIFT;
}
+ /*
+ * Config the tuning window to the hardware suggested value 3.
+ * This tuning window is used for auto tuning logic. The default
+ * tuning window is 2, here change to 3 make the window a bit
+ * wider, give auto tuning enough space to handle the sample
+ * point shift cause by temperature change.
+ */
+ tmp &= ~ESDHC_TUNING_WINDOW_MASK;
+ tmp |= FIELD_PREP(ESDHC_TUNING_WINDOW_MASK, ESDHC_AUTO_TUNING_WINDOW);
+
/* Disable the CMD CRC check for tuning, if not, need to
* add some delay after every tuning command, because
* hardware standard tuning logic will directly go to next
@@ -1569,6 +1634,63 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host)
}
}
+#ifdef CONFIG_PM_SLEEP
+static void sdhc_esdhc_tuning_save(struct sdhci_host *host)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
+ u32 reg;
+
+ /*
+ * SD/eMMC do not need this tuning save because it will re-init
+ * after system resume back.
+ * Here save the tuning delay value for SDIO device since it may
+ * keep power during system PM. And for usdhc, only SDR50 and
+ * SDR104 mode for SDIO device need to do tuning, and need to
+ * save/restore.
+ */
+ if (host->timing == MMC_TIMING_UHS_SDR50 ||
+ host->timing == MMC_TIMING_UHS_SDR104) {
+ reg = readl(host->ioaddr + ESDHC_TUNE_CTRL_STATUS);
+ reg = FIELD_GET(ESDHC_TUNE_CTRL_STATUS_TAP_SEL_PRE_MASK, reg);
+ imx_data->boarddata.saved_tuning_delay_cell = reg;
+ }
+}
+
+static void sdhc_esdhc_tuning_restore(struct sdhci_host *host)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
+ u32 reg;
+
+ if (host->timing == MMC_TIMING_UHS_SDR50 ||
+ host->timing == MMC_TIMING_UHS_SDR104) {
+ /*
+ * restore the tuning delay value actually is a
+ * manual tuning method, so clear the standard
+ * tuning enable bit here. Will set back this
+ * ESDHC_STD_TUNING_EN in esdhc_reset_tuning()
+ * when trigger re-tuning.
+ */
+ reg = readl(host->ioaddr + ESDHC_TUNING_CTRL);
+ reg &= ~ESDHC_STD_TUNING_EN;
+ writel(reg, host->ioaddr + ESDHC_TUNING_CTRL);
+
+ reg = readl(host->ioaddr + ESDHC_MIX_CTRL);
+ reg |= ESDHC_MIX_CTRL_SMPCLK_SEL | ESDHC_MIX_CTRL_FBCLK_SEL;
+ writel(reg, host->ioaddr + ESDHC_MIX_CTRL);
+
+ writel(FIELD_PREP(ESDHC_TUNE_CTRL_STATUS_DLY_CELL_SET_PRE_MASK,
+ imx_data->boarddata.saved_tuning_delay_cell) |
+ FIELD_PREP(ESDHC_TUNE_CTRL_STATUS_DLY_CELL_SET_OUT_MASK,
+ ESDHC_AUTO_TUNING_WINDOW) |
+ FIELD_PREP(ESDHC_TUNE_CTRL_STATUS_DLY_CELL_SET_POST_MASK,
+ ESDHC_AUTO_TUNING_WINDOW),
+ host->ioaddr + ESDHC_TUNE_CTRL_STATUS);
+ }
+}
+#endif
+
static void esdhc_cqe_enable(struct mmc_host *mmc)
{
struct sdhci_host *host = mmc_priv(mmc);
@@ -1777,6 +1899,8 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
* to distinguish the card type.
*/
host->mmc_host_ops.init_card = usdhc_init_card;
+
+ host->max_timeout_count = 0xF;
}
if (imx_data->socdata->flags & ESDHC_FLAG_MAN_TUNING)
@@ -1885,11 +2009,14 @@ static int sdhci_esdhc_suspend(struct device *dev)
struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
int ret;
- if (host->mmc->caps2 & MMC_CAP2_CQE) {
- ret = cqhci_suspend(host->mmc);
- if (ret)
- return ret;
- }
+ /*
+ * Switch to runtime resume for two reasons:
+ * 1, there is register access (e.g., wakeup control register), so
+ * need to make sure gate on ipg clock.
+ * 2, make sure the pm_runtime_force_resume() in sdhci_esdhc_resume() really
+ * invoke its ->runtime_resume callback (needs_force_resume = 1).
+ */
+ pm_runtime_get_sync(dev);
if ((imx_data->socdata->flags & ESDHC_FLAG_STATE_LOST_IN_LPMODE) &&
(host->tuning_mode != SDHCI_TUNING_MODE_1)) {
@@ -1897,12 +2024,22 @@ static int sdhci_esdhc_suspend(struct device *dev)
mmc_retune_needed(host->mmc);
}
- if (host->tuning_mode != SDHCI_TUNING_MODE_3)
- mmc_retune_needed(host->mmc);
-
- ret = sdhci_suspend_host(host);
- if (ret)
- return ret;
+ /*
+ * For the device need to keep power during system PM, need
+ * to save the tuning delay value just in case the usdhc
+ * lost power during system PM.
+ */
+ if (mmc_card_keep_power(host->mmc) && mmc_card_wake_sdio_irq(host->mmc) &&
+ esdhc_is_usdhc(imx_data))
+ sdhc_esdhc_tuning_save(host);
+
+ if (device_may_wakeup(dev)) {
+ /* The irqs of imx are not shared. It is safe to disable */
+ disable_irq(host->irq);
+ ret = sdhci_enable_irq_wakeups(host);
+ if (!ret)
+ dev_warn(dev, "Failed to enable irq wakeup\n");
+ }
ret = pinctrl_pm_select_sleep_state(dev);
if (ret)
@@ -1910,30 +2047,46 @@ static int sdhci_esdhc_suspend(struct device *dev)
ret = mmc_gpio_set_cd_wake(host->mmc, true);
+ /*
+ * Make sure invoke runtime_suspend to gate off clock.
+ * uSDHC IP supports in-band SDIO wakeup even without clock.
+ */
+ pm_runtime_force_suspend(dev);
+
return ret;
}
static int sdhci_esdhc_resume(struct device *dev)
{
struct sdhci_host *host = dev_get_drvdata(dev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
int ret;
- ret = pinctrl_pm_select_default_state(dev);
+ pm_runtime_force_resume(dev);
+
+ ret = mmc_gpio_set_cd_wake(host->mmc, false);
if (ret)
return ret;
/* re-initialize hw state in case it's lost in low power mode */
sdhci_esdhc_imx_hwinit(host);
- ret = sdhci_resume_host(host);
- if (ret)
- return ret;
+ if (host->irq_wake_enabled) {
+ sdhci_disable_irq_wakeups(host);
+ enable_irq(host->irq);
+ }
- if (host->mmc->caps2 & MMC_CAP2_CQE)
- ret = cqhci_resume(host->mmc);
+ /*
+ * restore the saved tuning delay value for the device which keep
+ * power during system PM.
+ */
+ if (mmc_card_keep_power(host->mmc) && mmc_card_wake_sdio_irq(host->mmc) &&
+ esdhc_is_usdhc(imx_data))
+ sdhc_esdhc_tuning_restore(host);
- if (!ret)
- ret = mmc_gpio_set_cd_wake(host->mmc, false);
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
return ret;
}
diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index 09b9ab15e499..a20d03fdd6a9 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
#include <linux/sizes.h>
@@ -745,6 +746,29 @@ static void dwcmshc_rk35xx_postinit(struct sdhci_host *host, struct dwcmshc_priv
}
}
+static void dwcmshc_rk3576_postinit(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv)
+{
+ struct device *dev = mmc_dev(host->mmc);
+ int ret;
+
+ /*
+ * This works around the design of the RK3576's power domains, which
+ * makes the PD_NVM power domain, which the sdhci controller on the
+ * RK3576 is in, never come back the same way once it's run-time
+ * suspended once. This can happen during early kernel boot if no driver
+ * is using either PD_NVM or its child power domain PD_SDGMAC for a
+ * short moment, leading to it being turned off to save power. By
+ * keeping it on, sdhci suspending won't lead to PD_NVM becoming a
+ * candidate for getting turned off.
+ */
+ ret = dev_pm_genpd_rpm_always_on(dev, true);
+ if (ret && ret != -EOPNOTSUPP)
+ dev_warn(dev, "failed to set PD rpm always on, SoC may hang later: %pe\n",
+ ERR_PTR(ret));
+
+ dwcmshc_rk35xx_postinit(host, dwc_priv);
+}
+
static int th1520_execute_tuning(struct sdhci_host *host, u32 opcode)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -1176,6 +1200,18 @@ static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk35xx_pdata = {
.postinit = dwcmshc_rk35xx_postinit,
};
+static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk3576_pdata = {
+ .pdata = {
+ .ops = &sdhci_dwcmshc_rk35xx_ops,
+ .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+ SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+ SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
+ },
+ .init = dwcmshc_rk35xx_init,
+ .postinit = dwcmshc_rk3576_postinit,
+};
+
static const struct dwcmshc_pltfm_data sdhci_dwcmshc_th1520_pdata = {
.pdata = {
.ops = &sdhci_dwcmshc_th1520_ops,
@@ -1275,6 +1311,10 @@ static const struct of_device_id sdhci_dwcmshc_dt_ids[] = {
.data = &sdhci_dwcmshc_rk35xx_pdata,
},
{
+ .compatible = "rockchip,rk3576-dwcmshc",
+ .data = &sdhci_dwcmshc_rk3576_pdata,
+ },
+ {
.compatible = "rockchip,rk3568-dwcmshc",
.data = &sdhci_dwcmshc_rk35xx_pdata,
},
diff --git a/drivers/mmc/host/sdhci-of-k1.c b/drivers/mmc/host/sdhci-of-k1.c
new file mode 100644
index 000000000000..6880d3e9ab62
--- /dev/null
+++ b/drivers/mmc/host/sdhci-of-k1.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2025 SpacemiT (Hangzhou) Technology Co. Ltd
+ * Copyright (c) 2025 Yixun Lan <dlan@gentoo.org>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
+#include <linux/init.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "sdhci.h"
+#include "sdhci-pltfm.h"
+
+#define SDHC_MMC_CTRL_REG 0x114
+#define MISC_INT_EN BIT(1)
+#define MISC_INT BIT(2)
+#define ENHANCE_STROBE_EN BIT(8)
+#define MMC_HS400 BIT(9)
+#define MMC_HS200 BIT(10)
+#define MMC_CARD_MODE BIT(12)
+
+#define SDHC_TX_CFG_REG 0x11C
+#define TX_INT_CLK_SEL BIT(30)
+#define TX_MUX_SEL BIT(31)
+
+#define SDHC_PHY_CTRL_REG 0x160
+#define PHY_FUNC_EN BIT(0)
+#define PHY_PLL_LOCK BIT(1)
+#define HOST_LEGACY_MODE BIT(31)
+
+#define SDHC_PHY_FUNC_REG 0x164
+#define PHY_TEST_EN BIT(7)
+#define HS200_USE_RFIFO BIT(15)
+
+#define SDHC_PHY_DLLCFG 0x168
+#define DLL_PREDLY_NUM GENMASK(3, 2)
+#define DLL_FULLDLY_RANGE GENMASK(5, 4)
+#define DLL_VREG_CTRL GENMASK(7, 6)
+#define DLL_ENABLE BIT(31)
+
+#define SDHC_PHY_DLLCFG1 0x16C
+#define DLL_REG1_CTRL GENMASK(7, 0)
+#define DLL_REG2_CTRL GENMASK(15, 8)
+#define DLL_REG3_CTRL GENMASK(23, 16)
+#define DLL_REG4_CTRL GENMASK(31, 24)
+
+#define SDHC_PHY_DLLSTS 0x170
+#define DLL_LOCK_STATE BIT(0)
+
+#define SDHC_PHY_PADCFG_REG 0x178
+#define PHY_DRIVE_SEL GENMASK(2, 0)
+#define RX_BIAS_CTRL BIT(5)
+
+struct spacemit_sdhci_host {
+ struct clk *clk_core;
+ struct clk *clk_io;
+};
+
+/* All helper functions will update clr/set while preserve rest bits */
+static inline void spacemit_sdhci_setbits(struct sdhci_host *host, u32 val, int reg)
+{
+ sdhci_writel(host, sdhci_readl(host, reg) | val, reg);
+}
+
+static inline void spacemit_sdhci_clrbits(struct sdhci_host *host, u32 val, int reg)
+{
+ sdhci_writel(host, sdhci_readl(host, reg) & ~val, reg);
+}
+
+static inline void spacemit_sdhci_clrsetbits(struct sdhci_host *host, u32 clr, u32 set, int reg)
+{
+ u32 val = sdhci_readl(host, reg);
+
+ val = (val & ~clr) | set;
+ sdhci_writel(host, val, reg);
+}
+
+static void spacemit_sdhci_reset(struct sdhci_host *host, u8 mask)
+{
+ sdhci_reset(host, mask);
+
+ if (mask != SDHCI_RESET_ALL)
+ return;
+
+ spacemit_sdhci_setbits(host, PHY_FUNC_EN | PHY_PLL_LOCK, SDHC_PHY_CTRL_REG);
+
+ spacemit_sdhci_clrsetbits(host, PHY_DRIVE_SEL,
+ RX_BIAS_CTRL | FIELD_PREP(PHY_DRIVE_SEL, 4),
+ SDHC_PHY_PADCFG_REG);
+
+ if (!(host->mmc->caps2 & MMC_CAP2_NO_MMC))
+ spacemit_sdhci_setbits(host, MMC_CARD_MODE, SDHC_MMC_CTRL_REG);
+}
+
+static void spacemit_sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned int timing)
+{
+ if (timing == MMC_TIMING_MMC_HS200)
+ spacemit_sdhci_setbits(host, MMC_HS200, SDHC_MMC_CTRL_REG);
+
+ if (timing == MMC_TIMING_MMC_HS400)
+ spacemit_sdhci_setbits(host, MMC_HS400, SDHC_MMC_CTRL_REG);
+
+ sdhci_set_uhs_signaling(host, timing);
+
+ if (!(host->mmc->caps2 & MMC_CAP2_NO_SDIO))
+ spacemit_sdhci_setbits(host, SDHCI_CTRL_VDD_180, SDHCI_HOST_CONTROL2);
+}
+
+static void spacemit_sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+ struct mmc_host *mmc = host->mmc;
+
+ if (mmc->ios.timing <= MMC_TIMING_UHS_SDR50)
+ spacemit_sdhci_setbits(host, TX_INT_CLK_SEL, SDHC_TX_CFG_REG);
+ else
+ spacemit_sdhci_clrbits(host, TX_INT_CLK_SEL, SDHC_TX_CFG_REG);
+
+ sdhci_set_clock(host, clock);
+};
+
+static void spacemit_sdhci_phy_dll_init(struct sdhci_host *host)
+{
+ u32 state;
+ int ret;
+
+ spacemit_sdhci_clrsetbits(host, DLL_PREDLY_NUM | DLL_FULLDLY_RANGE | DLL_VREG_CTRL,
+ FIELD_PREP(DLL_PREDLY_NUM, 1) |
+ FIELD_PREP(DLL_FULLDLY_RANGE, 1) |
+ FIELD_PREP(DLL_VREG_CTRL, 1),
+ SDHC_PHY_DLLCFG);
+
+ spacemit_sdhci_clrsetbits(host, DLL_REG1_CTRL,
+ FIELD_PREP(DLL_REG1_CTRL, 0x92),
+ SDHC_PHY_DLLCFG1);
+
+ spacemit_sdhci_setbits(host, DLL_ENABLE, SDHC_PHY_DLLCFG);
+
+ ret = readl_poll_timeout(host->ioaddr + SDHC_PHY_DLLSTS, state,
+ state & DLL_LOCK_STATE, 2, 100);
+ if (ret == -ETIMEDOUT)
+ dev_warn(mmc_dev(host->mmc), "fail to lock phy dll in 100us!\n");
+}
+
+static void spacemit_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+ struct sdhci_host *host = mmc_priv(mmc);
+
+ if (!ios->enhanced_strobe) {
+ spacemit_sdhci_clrbits(host, ENHANCE_STROBE_EN, SDHC_MMC_CTRL_REG);
+ return;
+ }
+
+ spacemit_sdhci_setbits(host, ENHANCE_STROBE_EN, SDHC_MMC_CTRL_REG);
+ spacemit_sdhci_phy_dll_init(host);
+}
+
+static unsigned int spacemit_sdhci_clk_get_max_clock(struct sdhci_host *host)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+ return clk_get_rate(pltfm_host->clk);
+}
+
+static int spacemit_sdhci_pre_select_hs400(struct mmc_host *mmc)
+{
+ struct sdhci_host *host = mmc_priv(mmc);
+
+ spacemit_sdhci_setbits(host, MMC_HS400, SDHC_MMC_CTRL_REG);
+ host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
+
+ return 0;
+}
+
+static void spacemit_sdhci_post_select_hs400(struct mmc_host *mmc)
+{
+ struct sdhci_host *host = mmc_priv(mmc);
+
+ spacemit_sdhci_phy_dll_init(host);
+ host->mmc->caps &= ~MMC_CAP_WAIT_WHILE_BUSY;
+}
+
+static void spacemit_sdhci_pre_hs400_to_hs200(struct mmc_host *mmc)
+{
+ struct sdhci_host *host = mmc_priv(mmc);
+
+ spacemit_sdhci_clrbits(host, PHY_FUNC_EN | PHY_PLL_LOCK, SDHC_PHY_CTRL_REG);
+ spacemit_sdhci_clrbits(host, MMC_HS400 | MMC_HS200 | ENHANCE_STROBE_EN, SDHC_MMC_CTRL_REG);
+ spacemit_sdhci_clrbits(host, HS200_USE_RFIFO, SDHC_PHY_FUNC_REG);
+
+ udelay(5);
+
+ spacemit_sdhci_setbits(host, PHY_FUNC_EN | PHY_PLL_LOCK, SDHC_PHY_CTRL_REG);
+}
+
+static inline int spacemit_sdhci_get_clocks(struct device *dev,
+ struct sdhci_pltfm_host *pltfm_host)
+{
+ struct spacemit_sdhci_host *sdhst = sdhci_pltfm_priv(pltfm_host);
+
+ sdhst->clk_core = devm_clk_get_enabled(dev, "core");
+ if (IS_ERR(sdhst->clk_core))
+ return -EINVAL;
+
+ sdhst->clk_io = devm_clk_get_enabled(dev, "io");
+ if (IS_ERR(sdhst->clk_io))
+ return -EINVAL;
+
+ pltfm_host->clk = sdhst->clk_io;
+
+ return 0;
+}
+
+static const struct sdhci_ops spacemit_sdhci_ops = {
+ .get_max_clock = spacemit_sdhci_clk_get_max_clock,
+ .reset = spacemit_sdhci_reset,
+ .set_bus_width = sdhci_set_bus_width,
+ .set_clock = spacemit_sdhci_set_clock,
+ .set_uhs_signaling = spacemit_sdhci_set_uhs_signaling,
+};
+
+static const struct sdhci_pltfm_data spacemit_sdhci_k1_pdata = {
+ .ops = &spacemit_sdhci_ops,
+ .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
+ SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+ SDHCI_QUIRK_32BIT_ADMA_SIZE |
+ SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+ SDHCI_QUIRK_BROKEN_CARD_DETECTION |
+ SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
+ .quirks2 = SDHCI_QUIRK2_BROKEN_64_BIT_DMA |
+ SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+};
+
+static const struct of_device_id spacemit_sdhci_of_match[] = {
+ { .compatible = "spacemit,k1-sdhci" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, spacemit_sdhci_of_match);
+
+static int spacemit_sdhci_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct spacemit_sdhci_host *sdhst;
+ struct sdhci_pltfm_host *pltfm_host;
+ struct sdhci_host *host;
+ struct mmc_host_ops *mops;
+ int ret;
+
+ host = sdhci_pltfm_init(pdev, &spacemit_sdhci_k1_pdata, sizeof(*sdhst));
+ if (IS_ERR(host))
+ return PTR_ERR(host);
+
+ pltfm_host = sdhci_priv(host);
+
+ ret = mmc_of_parse(host->mmc);
+ if (ret)
+ goto err_pltfm;
+
+ sdhci_get_of_property(pdev);
+
+ if (!(host->mmc->caps2 & MMC_CAP2_NO_MMC)) {
+ mops = &host->mmc_host_ops;
+ mops->hs400_prepare_ddr = spacemit_sdhci_pre_select_hs400;
+ mops->hs400_complete = spacemit_sdhci_post_select_hs400;
+ mops->hs400_downgrade = spacemit_sdhci_pre_hs400_to_hs200;
+ mops->hs400_enhanced_strobe = spacemit_sdhci_hs400_enhanced_strobe;
+ }
+
+ host->mmc->caps |= MMC_CAP_NEED_RSP_BUSY;
+
+ if (spacemit_sdhci_get_clocks(dev, pltfm_host))
+ goto err_pltfm;
+
+ ret = sdhci_add_host(host);
+ if (ret)
+ goto err_pltfm;
+
+ return 0;
+
+err_pltfm:
+ sdhci_pltfm_free(pdev);
+ return ret;
+}
+
+static struct platform_driver spacemit_sdhci_driver = {
+ .driver = {
+ .name = "sdhci-spacemit",
+ .of_match_table = spacemit_sdhci_of_match,
+ },
+ .probe = spacemit_sdhci_probe,
+ .remove = sdhci_pltfm_remove,
+};
+module_platform_driver(spacemit_sdhci_driver);
+
+MODULE_DESCRIPTION("SpacemiT SDHCI platform driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 26a9a8b5682a..8897839ab2aa 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -1270,7 +1270,7 @@ static int sdhci_omap_probe(struct platform_device *pdev)
mmc->f_max = 48000000;
}
- if (!mmc_can_gpio_ro(mmc))
+ if (!mmc_host_can_gpio_ro(mmc))
mmc->caps2 |= MMC_CAP2_NO_WRITE_PROTECT;
pltfm_host->clk = devm_clk_get(dev, "fck");
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 5f78be7ae16d..32fa0b2bb912 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -158,7 +158,7 @@ static void sdhci_set_card_detection(struct sdhci_host *host, bool enable)
u32 present;
if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) ||
- !mmc_card_is_removable(host->mmc) || mmc_can_gpio_cd(host->mmc))
+ !mmc_card_is_removable(host->mmc) || mmc_host_can_gpio_cd(host->mmc))
return;
if (enable) {
@@ -2571,7 +2571,7 @@ int sdhci_get_ro(struct mmc_host *mmc)
is_readonly = 0;
} else if (host->ops->get_ro) {
is_readonly = host->ops->get_ro(host);
- } else if (mmc_can_gpio_ro(mmc)) {
+ } else if (mmc_host_can_gpio_ro(mmc)) {
is_readonly = mmc_gpio_get_ro(mmc);
/* Do not invert twice */
allow_invert = !(mmc->caps2 & MMC_CAP2_RO_ACTIVE_HIGH);
@@ -3744,7 +3744,7 @@ static bool sdhci_cd_irq_can_wakeup(struct sdhci_host *host)
{
return mmc_card_is_removable(host->mmc) &&
!(host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) &&
- !mmc_can_gpio_cd(host->mmc);
+ !mmc_host_can_gpio_cd(host->mmc);
}
/*
@@ -3755,7 +3755,7 @@ static bool sdhci_cd_irq_can_wakeup(struct sdhci_host *host)
* sdhci_disable_irq_wakeups() since it will be set by
* sdhci_enable_card_detection() or sdhci_init().
*/
-static bool sdhci_enable_irq_wakeups(struct sdhci_host *host)
+bool sdhci_enable_irq_wakeups(struct sdhci_host *host)
{
u8 mask = SDHCI_WAKE_ON_INSERT | SDHCI_WAKE_ON_REMOVE |
SDHCI_WAKE_ON_INT;
@@ -3787,8 +3787,9 @@ static bool sdhci_enable_irq_wakeups(struct sdhci_host *host)
return host->irq_wake_enabled;
}
+EXPORT_SYMBOL_GPL(sdhci_enable_irq_wakeups);
-static void sdhci_disable_irq_wakeups(struct sdhci_host *host)
+void sdhci_disable_irq_wakeups(struct sdhci_host *host)
{
u8 val;
u8 mask = SDHCI_WAKE_ON_INSERT | SDHCI_WAKE_ON_REMOVE
@@ -3802,6 +3803,7 @@ static void sdhci_disable_irq_wakeups(struct sdhci_host *host)
host->irq_wake_enabled = false;
}
+EXPORT_SYMBOL_GPL(sdhci_disable_irq_wakeups);
int sdhci_suspend_host(struct sdhci_host *host)
{
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index cd0e35a80542..f9d65dd0f2b2 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -875,6 +875,8 @@ void sdhci_adma_write_desc(struct sdhci_host *host, void **desc,
dma_addr_t addr, int len, unsigned int cmd);
#ifdef CONFIG_PM
+bool sdhci_enable_irq_wakeups(struct sdhci_host *host);
+void sdhci_disable_irq_wakeups(struct sdhci_host *host);
int sdhci_suspend_host(struct sdhci_host *host);
int sdhci_resume_host(struct sdhci_host *host);
int sdhci_runtime_suspend_host(struct sdhci_host *host);
diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
index f75c31815ab0..73385ff4c0f3 100644
--- a/drivers/mmc/host/sdhci_am654.c
+++ b/drivers/mmc/host/sdhci_am654.c
@@ -155,6 +155,7 @@ struct sdhci_am654_data {
u32 tuning_loop;
#define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0)
+#define SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA BIT(1)
};
struct window {
@@ -166,6 +167,7 @@ struct window {
struct sdhci_am654_driver_data {
const struct sdhci_pltfm_data *pdata;
u32 flags;
+ u32 quirks;
#define IOMUX_PRESENT (1 << 0)
#define FREQSEL_2_BIT (1 << 1)
#define STRBSEL_4_BIT (1 << 2)
@@ -356,6 +358,29 @@ static void sdhci_j721e_4bit_set_clock(struct sdhci_host *host,
sdhci_set_clock(host, clock);
}
+static int sdhci_am654_start_signal_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+ struct sdhci_host *host = mmc_priv(mmc);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
+ int ret;
+
+ if ((sdhci_am654->quirks & SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA) &&
+ ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180) {
+ if (!IS_ERR(mmc->supply.vqmmc)) {
+ ret = mmc_regulator_set_vqmmc(mmc, ios);
+ if (ret < 0) {
+ pr_err("%s: Switching to 1.8V signalling voltage failed,\n",
+ mmc_hostname(mmc));
+ return -EIO;
+ }
+ }
+ return 0;
+ }
+
+ return sdhci_start_signal_voltage_switch(mmc, ios);
+}
+
static u8 sdhci_am654_write_power_on(struct sdhci_host *host, u8 val, int reg)
{
writeb(val, host->ioaddr + reg);
@@ -650,6 +675,12 @@ static const struct sdhci_am654_driver_data sdhci_j721e_4bit_drvdata = {
.flags = IOMUX_PRESENT,
};
+static const struct sdhci_am654_driver_data sdhci_am62_4bit_drvdata = {
+ .pdata = &sdhci_j721e_4bit_pdata,
+ .flags = IOMUX_PRESENT,
+ .quirks = SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA,
+};
+
static const struct soc_device_attribute sdhci_am654_devices[] = {
{ .family = "AM65X",
.revision = "SR1.0",
@@ -872,7 +903,7 @@ static const struct of_device_id sdhci_am654_of_match[] = {
},
{
.compatible = "ti,am62-sdhci",
- .data = &sdhci_j721e_4bit_drvdata,
+ .data = &sdhci_am62_4bit_drvdata,
},
{ /* sentinel */ }
};
@@ -906,6 +937,7 @@ static int sdhci_am654_probe(struct platform_device *pdev)
pltfm_host = sdhci_priv(host);
sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
sdhci_am654->flags = drvdata->flags;
+ sdhci_am654->quirks = drvdata->quirks;
clk_xin = devm_clk_get(dev, "clk_xin");
if (IS_ERR(clk_xin)) {
@@ -940,6 +972,7 @@ static int sdhci_am654_probe(struct platform_device *pdev)
goto err_pltfm_free;
}
+ host->mmc_host_ops.start_signal_voltage_switch = sdhci_am654_start_signal_voltage_switch;
host->mmc_host_ops.execute_tuning = sdhci_am654_execute_tuning;
pm_runtime_get_noresume(dev);
diff --git a/drivers/mmc/host/sunplus-mmc.c b/drivers/mmc/host/sunplus-mmc.c
index 1cddea615a27..63279760239c 100644
--- a/drivers/mmc/host/sunplus-mmc.c
+++ b/drivers/mmc/host/sunplus-mmc.c
@@ -791,7 +791,7 @@ static int spmmc_get_cd(struct mmc_host *mmc)
{
int ret = 0;
- if (mmc_can_gpio_cd(mmc))
+ if (mmc_host_can_gpio_cd(mmc))
ret = mmc_gpio_get_cd(mmc);
if (ret < 0)
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 04c1c54df791..b71241f55df5 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -1176,14 +1176,14 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host)
dma_max_mapping_size(&pdev->dev));
mmc->max_seg_size = mmc->max_req_size;
- if (mmc_can_gpio_ro(mmc))
+ if (mmc_host_can_gpio_ro(mmc))
_host->ops.get_ro = mmc_gpio_get_ro;
- if (mmc_can_gpio_cd(mmc))
+ if (mmc_host_can_gpio_cd(mmc))
_host->ops.get_cd = mmc_gpio_get_cd;
/* must be set before tmio_mmc_reset() */
- _host->native_hotplug = !(mmc_can_gpio_cd(mmc) ||
+ _host->native_hotplug = !(mmc_host_can_gpio_cd(mmc) ||
mmc->caps & MMC_CAP_NEEDS_POLL ||
!mmc_card_is_removable(mmc));
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index 341318024a19..ec95d787001b 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -351,20 +351,20 @@ static int __init cs553x_init(void)
return -ENXIO;
/* If it doesn't have the CS553[56], abort */
- rdmsrl(MSR_DIVIL_GLD_CAP, val);
+ rdmsrq(MSR_DIVIL_GLD_CAP, val);
val &= ~0xFFULL;
if (val != CAP_CS5535 && val != CAP_CS5536)
return -ENXIO;
/* If it doesn't have the NAND controller enabled, abort */
- rdmsrl(MSR_DIVIL_BALL_OPTS, val);
+ rdmsrq(MSR_DIVIL_BALL_OPTS, val);
if (val & PIN_OPT_IDE) {
pr_info("CS553x NAND controller: Flash I/O not enabled in MSR_DIVIL_BALL_OPTS.\n");
return -ENXIO;
}
for (i = 0; i < NR_CS553X_CONTROLLERS; i++) {
- rdmsrl(MSR_DIVIL_LBAR_FLSH0 + i, val);
+ rdmsrq(MSR_DIVIL_LBAR_FLSH0 + i, val);
if ((val & (FLSH_LBAR_EN|FLSH_NOR_NAND)) == (FLSH_LBAR_EN|FLSH_NOR_NAND))
err = cs553x_init_one(i, !!(val & FLSH_MEM_IO), val & 0xFFFFFFFF);
diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c
index cf0d51805272..f6921368cd14 100644
--- a/drivers/net/can/kvaser_pciefd.c
+++ b/drivers/net/can/kvaser_pciefd.c
@@ -16,6 +16,7 @@
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/timer.h>
+#include <net/netdev_queues.h>
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Kvaser AB <support@kvaser.com>");
@@ -410,10 +411,13 @@ struct kvaser_pciefd_can {
void __iomem *reg_base;
struct can_berr_counter bec;
u8 cmd_seq;
+ u8 tx_max_count;
+ u8 tx_idx;
+ u8 ack_idx;
int err_rep_cnt;
- int echo_idx;
+ unsigned int completed_tx_pkts;
+ unsigned int completed_tx_bytes;
spinlock_t lock; /* Locks sensitive registers (e.g. MODE) */
- spinlock_t echo_lock; /* Locks the message echo buffer */
struct timer_list bec_poll_timer;
struct completion start_comp, flush_comp;
};
@@ -714,6 +718,9 @@ static int kvaser_pciefd_open(struct net_device *netdev)
int ret;
struct kvaser_pciefd_can *can = netdev_priv(netdev);
+ can->tx_idx = 0;
+ can->ack_idx = 0;
+
ret = open_candev(netdev);
if (ret)
return ret;
@@ -745,21 +752,26 @@ static int kvaser_pciefd_stop(struct net_device *netdev)
timer_delete(&can->bec_poll_timer);
}
can->can.state = CAN_STATE_STOPPED;
+ netdev_reset_queue(netdev);
close_candev(netdev);
return ret;
}
+static unsigned int kvaser_pciefd_tx_avail(const struct kvaser_pciefd_can *can)
+{
+ return can->tx_max_count - (READ_ONCE(can->tx_idx) - READ_ONCE(can->ack_idx));
+}
+
static int kvaser_pciefd_prepare_tx_packet(struct kvaser_pciefd_tx_packet *p,
- struct kvaser_pciefd_can *can,
+ struct can_priv *can, u8 seq,
struct sk_buff *skb)
{
struct canfd_frame *cf = (struct canfd_frame *)skb->data;
int packet_size;
- int seq = can->echo_idx;
memset(p, 0, sizeof(*p));
- if (can->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT)
+ if (can->ctrlmode & CAN_CTRLMODE_ONE_SHOT)
p->header[1] |= KVASER_PCIEFD_TPACKET_SMS;
if (cf->can_id & CAN_RTR_FLAG)
@@ -782,7 +794,7 @@ static int kvaser_pciefd_prepare_tx_packet(struct kvaser_pciefd_tx_packet *p,
} else {
p->header[1] |=
FIELD_PREP(KVASER_PCIEFD_RPACKET_DLC_MASK,
- can_get_cc_dlc((struct can_frame *)cf, can->can.ctrlmode));
+ can_get_cc_dlc((struct can_frame *)cf, can->ctrlmode));
}
p->header[1] |= FIELD_PREP(KVASER_PCIEFD_PACKET_SEQ_MASK, seq);
@@ -797,22 +809,24 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct kvaser_pciefd_can *can = netdev_priv(netdev);
- unsigned long irq_flags;
struct kvaser_pciefd_tx_packet packet;
+ unsigned int seq = can->tx_idx & (can->can.echo_skb_max - 1);
+ unsigned int frame_len;
int nr_words;
- u8 count;
if (can_dev_dropped_skb(netdev, skb))
return NETDEV_TX_OK;
+ if (!netif_subqueue_maybe_stop(netdev, 0, kvaser_pciefd_tx_avail(can), 1, 1))
+ return NETDEV_TX_BUSY;
- nr_words = kvaser_pciefd_prepare_tx_packet(&packet, can, skb);
+ nr_words = kvaser_pciefd_prepare_tx_packet(&packet, &can->can, seq, skb);
- spin_lock_irqsave(&can->echo_lock, irq_flags);
/* Prepare and save echo skb in internal slot */
- can_put_echo_skb(skb, netdev, can->echo_idx, 0);
-
- /* Move echo index to the next slot */
- can->echo_idx = (can->echo_idx + 1) % can->can.echo_skb_max;
+ WRITE_ONCE(can->can.echo_skb[seq], NULL);
+ frame_len = can_skb_get_frame_len(skb);
+ can_put_echo_skb(skb, netdev, seq, frame_len);
+ netdev_sent_queue(netdev, frame_len);
+ WRITE_ONCE(can->tx_idx, can->tx_idx + 1);
/* Write header to fifo */
iowrite32(packet.header[0],
@@ -836,14 +850,7 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb,
KVASER_PCIEFD_KCAN_FIFO_LAST_REG);
}
- count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK,
- ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG));
- /* No room for a new message, stop the queue until at least one
- * successful transmit
- */
- if (count >= can->can.echo_skb_max || can->can.echo_skb[can->echo_idx])
- netif_stop_queue(netdev);
- spin_unlock_irqrestore(&can->echo_lock, irq_flags);
+ netif_subqueue_maybe_stop(netdev, 0, kvaser_pciefd_tx_avail(can), 1, 1);
return NETDEV_TX_OK;
}
@@ -970,6 +977,8 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie)
can->kv_pcie = pcie;
can->cmd_seq = 0;
can->err_rep_cnt = 0;
+ can->completed_tx_pkts = 0;
+ can->completed_tx_bytes = 0;
can->bec.txerr = 0;
can->bec.rxerr = 0;
@@ -983,11 +992,10 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie)
tx_nr_packets_max =
FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_MAX_MASK,
ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG));
+ can->tx_max_count = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1);
can->can.clock.freq = pcie->freq;
- can->can.echo_skb_max = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1);
- can->echo_idx = 0;
- spin_lock_init(&can->echo_lock);
+ can->can.echo_skb_max = roundup_pow_of_two(can->tx_max_count);
spin_lock_init(&can->lock);
can->can.bittiming_const = &kvaser_pciefd_bittiming_const;
@@ -1201,7 +1209,7 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie,
skb = alloc_canfd_skb(priv->dev, &cf);
if (!skb) {
priv->dev->stats.rx_dropped++;
- return -ENOMEM;
+ return 0;
}
cf->len = can_fd_dlc2len(dlc);
@@ -1213,7 +1221,7 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie,
skb = alloc_can_skb(priv->dev, (struct can_frame **)&cf);
if (!skb) {
priv->dev->stats.rx_dropped++;
- return -ENOMEM;
+ return 0;
}
can_frame_set_cc_len((struct can_frame *)cf, dlc, priv->ctrlmode);
}
@@ -1231,7 +1239,9 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie,
priv->dev->stats.rx_packets++;
kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp);
- return netif_rx(skb);
+ netif_rx(skb);
+
+ return 0;
}
static void kvaser_pciefd_change_state(struct kvaser_pciefd_can *can,
@@ -1510,19 +1520,21 @@ static int kvaser_pciefd_handle_ack_packet(struct kvaser_pciefd *pcie,
netdev_dbg(can->can.dev, "Packet was flushed\n");
} else {
int echo_idx = FIELD_GET(KVASER_PCIEFD_PACKET_SEQ_MASK, p->header[0]);
- int len;
- u8 count;
+ unsigned int len, frame_len = 0;
struct sk_buff *skb;
+ if (echo_idx != (can->ack_idx & (can->can.echo_skb_max - 1)))
+ return 0;
skb = can->can.echo_skb[echo_idx];
- if (skb)
- kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp);
- len = can_get_echo_skb(can->can.dev, echo_idx, NULL);
- count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK,
- ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG));
+ if (!skb)
+ return 0;
+ kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp);
+ len = can_get_echo_skb(can->can.dev, echo_idx, &frame_len);
- if (count < can->can.echo_skb_max && netif_queue_stopped(can->can.dev))
- netif_wake_queue(can->can.dev);
+ /* Pairs with barrier in kvaser_pciefd_start_xmit() */
+ smp_store_release(&can->ack_idx, can->ack_idx + 1);
+ can->completed_tx_pkts++;
+ can->completed_tx_bytes += frame_len;
if (!one_shot_fail) {
can->can.dev->stats.tx_bytes += len;
@@ -1638,32 +1650,51 @@ static int kvaser_pciefd_read_buffer(struct kvaser_pciefd *pcie, int dma_buf)
{
int pos = 0;
int res = 0;
+ unsigned int i;
do {
res = kvaser_pciefd_read_packet(pcie, &pos, dma_buf);
} while (!res && pos > 0 && pos < KVASER_PCIEFD_DMA_SIZE);
+ /* Report ACKs in this buffer to BQL en masse for correct periods */
+ for (i = 0; i < pcie->nr_channels; ++i) {
+ struct kvaser_pciefd_can *can = pcie->can[i];
+
+ if (!can->completed_tx_pkts)
+ continue;
+ netif_subqueue_completed_wake(can->can.dev, 0,
+ can->completed_tx_pkts,
+ can->completed_tx_bytes,
+ kvaser_pciefd_tx_avail(can), 1);
+ can->completed_tx_pkts = 0;
+ can->completed_tx_bytes = 0;
+ }
+
return res;
}
-static u32 kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie)
+static void kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie)
{
+ void __iomem *srb_cmd_reg = KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG;
u32 irq = ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG);
- if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0)
+ iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG);
+
+ if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) {
kvaser_pciefd_read_buffer(pcie, 0);
+ iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, srb_cmd_reg); /* Rearm buffer */
+ }
- if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1)
+ if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) {
kvaser_pciefd_read_buffer(pcie, 1);
+ iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, srb_cmd_reg); /* Rearm buffer */
+ }
if (unlikely(irq & KVASER_PCIEFD_SRB_IRQ_DOF0 ||
irq & KVASER_PCIEFD_SRB_IRQ_DOF1 ||
irq & KVASER_PCIEFD_SRB_IRQ_DUF0 ||
irq & KVASER_PCIEFD_SRB_IRQ_DUF1))
dev_err(&pcie->pci->dev, "DMA IRQ error 0x%08X\n", irq);
-
- iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG);
- return irq;
}
static void kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can)
@@ -1691,29 +1722,22 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev)
struct kvaser_pciefd *pcie = (struct kvaser_pciefd *)dev;
const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask;
u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie));
- u32 srb_irq = 0;
- u32 srb_release = 0;
int i;
if (!(pci_irq & irq_mask->all))
return IRQ_NONE;
+ iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie));
+
if (pci_irq & irq_mask->kcan_rx0)
- srb_irq = kvaser_pciefd_receive_irq(pcie);
+ kvaser_pciefd_receive_irq(pcie);
for (i = 0; i < pcie->nr_channels; i++) {
if (pci_irq & irq_mask->kcan_tx[i])
kvaser_pciefd_transmit_irq(pcie->can[i]);
}
- if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0)
- srb_release |= KVASER_PCIEFD_SRB_CMD_RDB0;
-
- if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1)
- srb_release |= KVASER_PCIEFD_SRB_CMD_RDB1;
-
- if (srb_release)
- iowrite32(srb_release, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG);
+ iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie));
return IRQ_HANDLED;
}
@@ -1733,13 +1757,22 @@ static void kvaser_pciefd_teardown_can_ctrls(struct kvaser_pciefd *pcie)
}
}
+static void kvaser_pciefd_disable_irq_srcs(struct kvaser_pciefd *pcie)
+{
+ unsigned int i;
+
+ /* Masking PCI_IRQ is insufficient as running ISR will unmask it */
+ iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG);
+ for (i = 0; i < pcie->nr_channels; ++i)
+ iowrite32(0, pcie->can[i]->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
+}
+
static int kvaser_pciefd_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
int ret;
struct kvaser_pciefd *pcie;
const struct kvaser_pciefd_irq_mask *irq_mask;
- void __iomem *irq_en_base;
pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
if (!pcie)
@@ -1805,8 +1838,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG);
/* Enable PCI interrupts */
- irq_en_base = KVASER_PCIEFD_PCI_IEN_ADDR(pcie);
- iowrite32(irq_mask->all, irq_en_base);
+ iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie));
/* Ready the DMA buffers */
iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0,
KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG);
@@ -1820,8 +1852,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
return 0;
err_free_irq:
- /* Disable PCI interrupts */
- iowrite32(0, irq_en_base);
+ kvaser_pciefd_disable_irq_srcs(pcie);
free_irq(pcie->pci->irq, pcie);
err_pci_free_irq_vectors:
@@ -1844,35 +1875,26 @@ err_disable_pci:
return ret;
}
-static void kvaser_pciefd_remove_all_ctrls(struct kvaser_pciefd *pcie)
-{
- int i;
-
- for (i = 0; i < pcie->nr_channels; i++) {
- struct kvaser_pciefd_can *can = pcie->can[i];
-
- if (can) {
- iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
- unregister_candev(can->can.dev);
- timer_delete(&can->bec_poll_timer);
- kvaser_pciefd_pwm_stop(can);
- free_candev(can->can.dev);
- }
- }
-}
-
static void kvaser_pciefd_remove(struct pci_dev *pdev)
{
struct kvaser_pciefd *pcie = pci_get_drvdata(pdev);
+ unsigned int i;
- kvaser_pciefd_remove_all_ctrls(pcie);
+ for (i = 0; i < pcie->nr_channels; ++i) {
+ struct kvaser_pciefd_can *can = pcie->can[i];
- /* Disable interrupts */
- iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG);
- iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie));
+ unregister_candev(can->can.dev);
+ timer_delete(&can->bec_poll_timer);
+ kvaser_pciefd_pwm_stop(can);
+ }
+ kvaser_pciefd_disable_irq_srcs(pcie);
free_irq(pcie->pci->irq, pcie);
pci_free_irq_vectors(pcie->pci);
+
+ for (i = 0; i < pcie->nr_channels; ++i)
+ free_candev(pcie->can[i]->can.dev);
+
pci_iounmap(pdev, pcie->reg_base);
pci_release_regions(pdev);
pci_disable_device(pdev);
diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c
index 24c6622d36bd..58ff2ec1d975 100644
--- a/drivers/net/can/slcan/slcan-core.c
+++ b/drivers/net/can/slcan/slcan-core.c
@@ -71,12 +71,21 @@ MODULE_AUTHOR("Dario Binacchi <dario.binacchi@amarulasolutions.com>");
#define SLCAN_CMD_LEN 1
#define SLCAN_SFF_ID_LEN 3
#define SLCAN_EFF_ID_LEN 8
+#define SLCAN_DATA_LENGTH_LEN 1
+#define SLCAN_ERROR_LEN 1
#define SLCAN_STATE_LEN 1
#define SLCAN_STATE_BE_RXCNT_LEN 3
#define SLCAN_STATE_BE_TXCNT_LEN 3
-#define SLCAN_STATE_FRAME_LEN (1 + SLCAN_CMD_LEN + \
- SLCAN_STATE_BE_RXCNT_LEN + \
- SLCAN_STATE_BE_TXCNT_LEN)
+#define SLCAN_STATE_MSG_LEN (SLCAN_CMD_LEN + \
+ SLCAN_STATE_LEN + \
+ SLCAN_STATE_BE_RXCNT_LEN + \
+ SLCAN_STATE_BE_TXCNT_LEN)
+#define SLCAN_ERROR_MSG_LEN_MIN (SLCAN_CMD_LEN + \
+ SLCAN_ERROR_LEN + \
+ SLCAN_DATA_LENGTH_LEN)
+#define SLCAN_FRAME_MSG_LEN_MIN (SLCAN_CMD_LEN + \
+ SLCAN_SFF_ID_LEN + \
+ SLCAN_DATA_LENGTH_LEN)
struct slcan {
struct can_priv can;
@@ -176,6 +185,9 @@ static void slcan_bump_frame(struct slcan *sl)
u32 tmpid;
char *cmd = sl->rbuff;
+ if (sl->rcount < SLCAN_FRAME_MSG_LEN_MIN)
+ return;
+
skb = alloc_can_skb(sl->dev, &cf);
if (unlikely(!skb)) {
sl->dev->stats.rx_dropped++;
@@ -281,7 +293,7 @@ static void slcan_bump_state(struct slcan *sl)
return;
}
- if (state == sl->can.state || sl->rcount < SLCAN_STATE_FRAME_LEN)
+ if (state == sl->can.state || sl->rcount != SLCAN_STATE_MSG_LEN)
return;
cmd += SLCAN_STATE_BE_RXCNT_LEN + SLCAN_CMD_LEN + 1;
@@ -328,6 +340,9 @@ static void slcan_bump_err(struct slcan *sl)
bool rx_errors = false, tx_errors = false, rx_over_errors = false;
int i, len;
+ if (sl->rcount < SLCAN_ERROR_MSG_LEN_MIN)
+ return;
+
/* get len from sanitized ASCII value */
len = cmd[1];
if (len >= '0' && len < '9')
@@ -456,8 +471,7 @@ static void slcan_bump(struct slcan *sl)
static void slcan_unesc(struct slcan *sl, unsigned char s)
{
if ((s == '\r') || (s == '\a')) { /* CR or BEL ends the pdu */
- if (!test_and_clear_bit(SLF_ERROR, &sl->flags) &&
- sl->rcount > 4)
+ if (!test_and_clear_bit(SLF_ERROR, &sl->flags))
slcan_bump(sl);
sl->rcount = 0;
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 9eb39cfa5fb2..7216eb8f9493 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -326,6 +326,26 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid,
}
}
+static void b53_set_eap_mode(struct b53_device *dev, int port, int mode)
+{
+ u64 eap_conf;
+
+ if (is5325(dev) || is5365(dev) || dev->chip_id == BCM5389_DEVICE_ID)
+ return;
+
+ b53_read64(dev, B53_EAP_PAGE, B53_PORT_EAP_CONF(port), &eap_conf);
+
+ if (is63xx(dev)) {
+ eap_conf &= ~EAP_MODE_MASK_63XX;
+ eap_conf |= (u64)mode << EAP_MODE_SHIFT_63XX;
+ } else {
+ eap_conf &= ~EAP_MODE_MASK;
+ eap_conf |= (u64)mode << EAP_MODE_SHIFT;
+ }
+
+ b53_write64(dev, B53_EAP_PAGE, B53_PORT_EAP_CONF(port), eap_conf);
+}
+
static void b53_set_forwarding(struct b53_device *dev, int enable)
{
u8 mgmt;
@@ -586,6 +606,13 @@ int b53_setup_port(struct dsa_switch *ds, int port)
b53_port_set_mcast_flood(dev, port, true);
b53_port_set_learning(dev, port, false);
+ /* Force all traffic to go to the CPU port to prevent the ASIC from
+ * trying to forward to bridged ports on matching FDB entries, then
+ * dropping frames because it isn't allowed to forward there.
+ */
+ if (dsa_is_user_port(ds, port))
+ b53_set_eap_mode(dev, port, EAP_MODE_SIMPLIFIED);
+
return 0;
}
EXPORT_SYMBOL(b53_setup_port);
@@ -2042,6 +2069,9 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge,
pvlan |= BIT(i);
}
+ /* Disable redirection of unknown SA to the CPU port */
+ b53_set_eap_mode(dev, port, EAP_MODE_BASIC);
+
/* Configure the local port VLAN control membership to include
* remote ports and update the local port bitmask
*/
@@ -2077,6 +2107,9 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge)
pvlan &= ~BIT(i);
}
+ /* Enable redirection of unknown SA to the CPU port */
+ b53_set_eap_mode(dev, port, EAP_MODE_SIMPLIFIED);
+
b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan);
dev->ports[port].vlan_ctl_mask = pvlan;
diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h
index bfbcb66bef66..5f7a0e5c5709 100644
--- a/drivers/net/dsa/b53/b53_regs.h
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -50,6 +50,9 @@
/* Jumbo Frame Registers */
#define B53_JUMBO_PAGE 0x40
+/* EAP Registers */
+#define B53_EAP_PAGE 0x42
+
/* EEE Control Registers Page */
#define B53_EEE_PAGE 0x92
@@ -481,6 +484,17 @@
#define JMS_MAX_SIZE 9724
/*************************************************************************
+ * EAP Page Registers
+ *************************************************************************/
+#define B53_PORT_EAP_CONF(i) (0x20 + 8 * (i))
+#define EAP_MODE_SHIFT 51
+#define EAP_MODE_SHIFT_63XX 50
+#define EAP_MODE_MASK (0x3ull << EAP_MODE_SHIFT)
+#define EAP_MODE_MASK_63XX (0x3ull << EAP_MODE_SHIFT_63XX)
+#define EAP_MODE_BASIC 0
+#define EAP_MODE_SIMPLIFIED 3
+
+/*************************************************************************
* EEE Configuration Page Registers
*************************************************************************/
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 89f0796894af..d66da5cbbeb9 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -265,16 +265,70 @@ static void ksz_phylink_mac_link_down(struct phylink_config *config,
unsigned int mode,
phy_interface_t interface);
+/**
+ * ksz_phylink_mac_disable_tx_lpi() - Callback to signal LPI support (Dummy)
+ * @config: phylink config structure
+ *
+ * This function is a dummy handler. See ksz_phylink_mac_enable_tx_lpi() for
+ * a detailed explanation of EEE/LPI handling in KSZ switches.
+ */
+static void ksz_phylink_mac_disable_tx_lpi(struct phylink_config *config)
+{
+}
+
+/**
+ * ksz_phylink_mac_enable_tx_lpi() - Callback to signal LPI support (Dummy)
+ * @config: phylink config structure
+ * @timer: timer value before entering LPI (unused)
+ * @tx_clock_stop: whether to stop the TX clock in LPI mode (unused)
+ *
+ * This function signals to phylink that the driver architecture supports
+ * LPI management, enabling phylink to control EEE advertisement during
+ * negotiation according to IEEE Std 802.3 (Clause 78).
+ *
+ * Hardware Management of EEE/LPI State:
+ * For KSZ switch ports with integrated PHYs (e.g., KSZ9893R ports 1-2),
+ * observation and testing suggest that the actual EEE / Low Power Idle (LPI)
+ * state transitions are managed autonomously by the hardware based on
+ * the auto-negotiation results. (Note: While the datasheet describes EEE
+ * operation based on negotiation, it doesn't explicitly detail the internal
+ * MAC/PHY interaction, so autonomous hardware management of the MAC state
+ * for LPI is inferred from observed behavior).
+ * This hardware control, consistent with the switch's ability to operate
+ * autonomously via strapping, means MAC-level software intervention is not
+ * required or exposed for managing the LPI state once EEE is negotiated.
+ * (Ref: KSZ9893R Data Sheet DS00002420D, primarily Section 4.7.5 explaining
+ * EEE, also Sections 4.1.7 on Auto-Negotiation and 3.2.1 on Configuration
+ * Straps).
+ *
+ * Additionally, ports configured as MAC interfaces (e.g., KSZ9893R port 3)
+ * lack documented MAC-level LPI control.
+ *
+ * Therefore, this callback performs no action and serves primarily to inform
+ * phylink of LPI awareness and to document the inferred hardware behavior.
+ *
+ * Returns: 0 (Always success)
+ */
+static int ksz_phylink_mac_enable_tx_lpi(struct phylink_config *config,
+ u32 timer, bool tx_clock_stop)
+{
+ return 0;
+}
+
static const struct phylink_mac_ops ksz88x3_phylink_mac_ops = {
.mac_config = ksz88x3_phylink_mac_config,
.mac_link_down = ksz_phylink_mac_link_down,
.mac_link_up = ksz8_phylink_mac_link_up,
+ .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi,
+ .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi,
};
static const struct phylink_mac_ops ksz8_phylink_mac_ops = {
.mac_config = ksz_phylink_mac_config,
.mac_link_down = ksz_phylink_mac_link_down,
.mac_link_up = ksz8_phylink_mac_link_up,
+ .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi,
+ .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi,
};
static const struct ksz_dev_ops ksz88xx_dev_ops = {
@@ -358,6 +412,8 @@ static const struct phylink_mac_ops ksz9477_phylink_mac_ops = {
.mac_config = ksz_phylink_mac_config,
.mac_link_down = ksz_phylink_mac_link_down,
.mac_link_up = ksz9477_phylink_mac_link_up,
+ .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi,
+ .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi,
};
static const struct ksz_dev_ops ksz9477_dev_ops = {
@@ -401,6 +457,8 @@ static const struct phylink_mac_ops lan937x_phylink_mac_ops = {
.mac_config = ksz_phylink_mac_config,
.mac_link_down = ksz_phylink_mac_link_down,
.mac_link_up = ksz9477_phylink_mac_link_up,
+ .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi,
+ .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi,
};
static const struct ksz_dev_ops lan937x_dev_ops = {
@@ -2016,6 +2074,18 @@ static void ksz_phylink_get_caps(struct dsa_switch *ds, int port,
if (dev->dev_ops->get_caps)
dev->dev_ops->get_caps(dev, port, config);
+
+ if (ds->ops->support_eee && ds->ops->support_eee(ds, port)) {
+ memcpy(config->lpi_interfaces, config->supported_interfaces,
+ sizeof(config->lpi_interfaces));
+
+ config->lpi_capabilities = MAC_100FD;
+ if (dev->info->gbit_capable[port])
+ config->lpi_capabilities |= MAC_1000FD;
+
+ /* EEE is fully operational */
+ config->eee_enabled_default = true;
+ }
}
void ksz_r_mib_stats64(struct ksz_device *dev, int port)
@@ -2697,8 +2767,9 @@ static int ksz_irq_common_setup(struct ksz_device *dev, struct ksz_irq *kirq)
kirq->dev = dev;
kirq->masked = ~0;
- kirq->domain = irq_domain_add_simple(dev->dev->of_node, kirq->nirqs, 0,
- &ksz_irq_domain_ops, kirq);
+ kirq->domain = irq_domain_create_simple(of_fwnode_handle(dev->dev->of_node),
+ kirq->nirqs, 0,
+ &ksz_irq_domain_ops, kirq);
if (!kirq->domain)
return -ENOMEM;
@@ -3008,31 +3079,6 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port)
if (!port)
return MICREL_KSZ8_P1_ERRATA;
break;
- case KSZ8567_CHIP_ID:
- /* KSZ8567R Errata DS80000752C Module 4 */
- case KSZ8765_CHIP_ID:
- case KSZ8794_CHIP_ID:
- case KSZ8795_CHIP_ID:
- /* KSZ879x/KSZ877x/KSZ876x Errata DS80000687C Module 2 */
- case KSZ9477_CHIP_ID:
- /* KSZ9477S Errata DS80000754A Module 4 */
- case KSZ9567_CHIP_ID:
- /* KSZ9567S Errata DS80000756A Module 4 */
- case KSZ9896_CHIP_ID:
- /* KSZ9896C Errata DS80000757A Module 3 */
- case KSZ9897_CHIP_ID:
- case LAN9646_CHIP_ID:
- /* KSZ9897R Errata DS80000758C Module 4 */
- /* Energy Efficient Ethernet (EEE) feature select must be manually disabled
- * The EEE feature is enabled by default, but it is not fully
- * operational. It must be manually disabled through register
- * controls. If not disabled, the PHY ports can auto-negotiate
- * to enable EEE, and this feature can cause link drops when
- * linked to another device supporting EEE.
- *
- * The same item appears in the errata for all switches above.
- */
- return MICREL_NO_EEE;
}
return 0;
@@ -3466,6 +3512,20 @@ static int ksz_max_mtu(struct dsa_switch *ds, int port)
return -EOPNOTSUPP;
}
+/**
+ * ksz_support_eee - Determine Energy Efficient Ethernet (EEE) support for a
+ * port
+ * @ds: Pointer to the DSA switch structure
+ * @port: Port number to check
+ *
+ * This function also documents devices where EEE was initially advertised but
+ * later withdrawn due to reliability issues, as described in official errata
+ * documents. These devices are explicitly listed to record known limitations,
+ * even if there is no technical necessity for runtime checks.
+ *
+ * Returns: true if the internal PHY on the given port supports fully
+ * operational EEE, false otherwise.
+ */
static bool ksz_support_eee(struct dsa_switch *ds, int port)
{
struct ksz_device *dev = ds->priv;
@@ -3475,15 +3535,35 @@ static bool ksz_support_eee(struct dsa_switch *ds, int port)
switch (dev->chip_id) {
case KSZ8563_CHIP_ID:
+ case KSZ9563_CHIP_ID:
+ case KSZ9893_CHIP_ID:
+ return true;
case KSZ8567_CHIP_ID:
+ /* KSZ8567R Errata DS80000752C Module 4 */
+ case KSZ8765_CHIP_ID:
+ case KSZ8794_CHIP_ID:
+ case KSZ8795_CHIP_ID:
+ /* KSZ879x/KSZ877x/KSZ876x Errata DS80000687C Module 2 */
case KSZ9477_CHIP_ID:
- case KSZ9563_CHIP_ID:
+ /* KSZ9477S Errata DS80000754A Module 4 */
case KSZ9567_CHIP_ID:
- case KSZ9893_CHIP_ID:
+ /* KSZ9567S Errata DS80000756A Module 4 */
case KSZ9896_CHIP_ID:
+ /* KSZ9896C Errata DS80000757A Module 3 */
case KSZ9897_CHIP_ID:
case LAN9646_CHIP_ID:
- return true;
+ /* KSZ9897R Errata DS80000758C Module 4 */
+ /* Energy Efficient Ethernet (EEE) feature select must be
+ * manually disabled
+ * The EEE feature is enabled by default, but it is not fully
+ * operational. It must be manually disabled through register
+ * controls. If not disabled, the PHY ports can auto-negotiate
+ * to enable EEE, and this feature can cause link drops when
+ * linked to another device supporting EEE.
+ *
+ * The same item appears in the errata for all switches above.
+ */
+ break;
}
return false;
diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c
index 22fb9ef4645c..992101e4bdee 100644
--- a/drivers/net/dsa/microchip/ksz_ptp.c
+++ b/drivers/net/dsa/microchip/ksz_ptp.c
@@ -1136,8 +1136,8 @@ int ksz_ptp_irq_setup(struct dsa_switch *ds, u8 p)
init_completion(&port->tstamp_msg_comp);
- ptpirq->domain = irq_domain_add_linear(dev->dev->of_node, ptpirq->nirqs,
- &ksz_ptp_irq_domain_ops, ptpirq);
+ ptpirq->domain = irq_domain_create_linear(of_fwnode_handle(dev->dev->of_node),
+ ptpirq->nirqs, &ksz_ptp_irq_domain_ops, ptpirq);
if (!ptpirq->domain)
return -ENOMEM;
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 08db846cda8d..2281d6ab8c9a 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -297,7 +297,7 @@ static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
u16 reg, mask;
chip->g1_irq.nirqs = chip->info->g1_irqs;
- chip->g1_irq.domain = irq_domain_add_simple(
+ chip->g1_irq.domain = irq_domain_create_simple(
NULL, chip->g1_irq.nirqs, 0,
&mv88e6xxx_g1_irq_domain_ops, chip);
if (!chip->g1_irq.domain)
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index b2b5f6ba438f..aaf97c1e3167 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -1154,8 +1154,10 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
if (err)
return err;
- chip->g2_irq.domain = irq_domain_add_simple(
- chip->dev->of_node, 16, 0, &mv88e6xxx_g2_irq_domain_ops, chip);
+ chip->g2_irq.domain = irq_domain_create_simple(of_fwnode_handle(chip->dev->of_node),
+ 16, 0,
+ &mv88e6xxx_g2_irq_domain_ops,
+ chip);
if (!chip->g2_irq.domain)
return -ENOMEM;
diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
index e9f2c67bc15f..79a29676ca6f 100644
--- a/drivers/net/dsa/qca/ar9331.c
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -821,8 +821,8 @@ static int ar9331_sw_irq_init(struct ar9331_sw_priv *priv)
return ret;
}
- priv->irqdomain = irq_domain_add_linear(np, 1, &ar9331_sw_irqdomain_ops,
- priv);
+ priv->irqdomain = irq_domain_create_linear(of_fwnode_handle(np), 1,
+ &ar9331_sw_irqdomain_ops, priv);
if (!priv->irqdomain) {
dev_err(dev, "failed to create IRQ domain\n");
return -EINVAL;
diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c
index 7e96355c28bd..964a56ee16cc 100644
--- a/drivers/net/dsa/realtek/rtl8365mb.c
+++ b/drivers/net/dsa/realtek/rtl8365mb.c
@@ -1719,8 +1719,8 @@ static int rtl8365mb_irq_setup(struct realtek_priv *priv)
goto out_put_node;
}
- priv->irqdomain = irq_domain_add_linear(intc, priv->num_ports,
- &rtl8365mb_irqdomain_ops, priv);
+ priv->irqdomain = irq_domain_create_linear(of_fwnode_handle(intc), priv->num_ports,
+ &rtl8365mb_irqdomain_ops, priv);
if (!priv->irqdomain) {
dev_err(priv->dev, "failed to add irq domain\n");
ret = -ENOMEM;
diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
index f54771cab56d..8bdb52b5fdcb 100644
--- a/drivers/net/dsa/realtek/rtl8366rb.c
+++ b/drivers/net/dsa/realtek/rtl8366rb.c
@@ -550,10 +550,8 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_priv *priv)
dev_err(priv->dev, "unable to request irq: %d\n", ret);
goto out_put_node;
}
- priv->irqdomain = irq_domain_add_linear(intc,
- RTL8366RB_NUM_INTERRUPT,
- &rtl8366rb_irqdomain_ops,
- priv);
+ priv->irqdomain = irq_domain_create_linear(of_fwnode_handle(intc), RTL8366RB_NUM_INTERRUPT,
+ &rtl8366rb_irqdomain_ops, priv);
if (!priv->irqdomain) {
dev_err(priv->dev, "failed to create IRQ domain\n");
ret = -EINVAL;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index f8454f3b6f9c..f674c400f05b 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2081,6 +2081,7 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
switch (state) {
case BR_STATE_DISABLED:
case BR_STATE_BLOCKING:
+ case BR_STATE_LISTENING:
/* From UM10944 description of DRPDTAG (why put this there?):
* "Management traffic flows to the port regardless of the state
* of the INGRESS flag". So BPDUs are still be allowed to pass.
@@ -2090,11 +2091,6 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
mac[port].egress = false;
mac[port].dyn_learn = false;
break;
- case BR_STATE_LISTENING:
- mac[port].ingress = true;
- mac[port].egress = false;
- mac[port].dyn_learn = false;
- break;
case BR_STATE_LEARNING:
mac[port].ingress = true;
mac[port].egress = false;
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index d748dc6de923..1e9ab65218ff 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -614,7 +614,6 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
struct airoha_queue_entry *e = &q->entry[q->tail];
struct airoha_qdma_desc *desc = &q->desc[q->tail];
u32 hash, reason, msg1 = le32_to_cpu(desc->msg1);
- dma_addr_t dma_addr = le32_to_cpu(desc->addr);
struct page *page = virt_to_head_page(e->buf);
u32 desc_ctrl = le32_to_cpu(desc->ctrl);
struct airoha_gdm_port *port;
@@ -623,22 +622,16 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
if (!(desc_ctrl & QDMA_DESC_DONE_MASK))
break;
- if (!dma_addr)
- break;
-
- len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl);
- if (!len)
- break;
-
q->tail = (q->tail + 1) % q->ndesc;
q->queued--;
- dma_sync_single_for_cpu(eth->dev, dma_addr,
+ dma_sync_single_for_cpu(eth->dev, e->dma_addr,
SKB_WITH_OVERHEAD(q->buf_size), dir);
+ len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl);
data_len = q->skb ? q->buf_size
: SKB_WITH_OVERHEAD(q->buf_size);
- if (data_len < len)
+ if (!len || data_len < len)
goto free_frag;
p = airoha_qdma_get_gdm_port(eth, desc);
@@ -701,9 +694,12 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
q->skb = NULL;
continue;
free_frag:
- page_pool_put_full_page(q->page_pool, page, true);
- dev_kfree_skb(q->skb);
- q->skb = NULL;
+ if (q->skb) {
+ dev_kfree_skb(q->skb);
+ q->skb = NULL;
+ } else {
+ page_pool_put_full_page(q->page_pool, page, true);
+ }
}
airoha_qdma_fill_rx_queue(q);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 86a5de44b6f3..6afc2ab6fad2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -14013,13 +14013,28 @@ static void bnxt_unlock_sp(struct bnxt *bp)
netdev_unlock(bp->dev);
}
+/* Same as bnxt_lock_sp() with additional rtnl_lock */
+static void bnxt_rtnl_lock_sp(struct bnxt *bp)
+{
+ clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+ rtnl_lock();
+ netdev_lock(bp->dev);
+}
+
+static void bnxt_rtnl_unlock_sp(struct bnxt *bp)
+{
+ set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+ netdev_unlock(bp->dev);
+ rtnl_unlock();
+}
+
/* Only called from bnxt_sp_task() */
static void bnxt_reset(struct bnxt *bp, bool silent)
{
- bnxt_lock_sp(bp);
+ bnxt_rtnl_lock_sp(bp);
if (test_bit(BNXT_STATE_OPEN, &bp->state))
bnxt_reset_task(bp, silent);
- bnxt_unlock_sp(bp);
+ bnxt_rtnl_unlock_sp(bp);
}
/* Only called from bnxt_sp_task() */
@@ -14027,9 +14042,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
{
int i;
- bnxt_lock_sp(bp);
+ bnxt_rtnl_lock_sp(bp);
if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
- bnxt_unlock_sp(bp);
+ bnxt_rtnl_unlock_sp(bp);
return;
}
/* Disable and flush TPA before resetting the RX ring */
@@ -14068,7 +14083,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
}
if (bp->flags & BNXT_FLAG_TPA)
bnxt_set_tpa(bp, true);
- bnxt_unlock_sp(bp);
+ bnxt_rtnl_unlock_sp(bp);
}
static void bnxt_fw_fatal_close(struct bnxt *bp)
@@ -14960,15 +14975,17 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING;
fallthrough;
case BNXT_FW_RESET_STATE_OPENING:
- while (!netdev_trylock(bp->dev)) {
+ while (!rtnl_trylock()) {
bnxt_queue_fw_reset_work(bp, HZ / 10);
return;
}
+ netdev_lock(bp->dev);
rc = bnxt_open(bp->dev);
if (rc) {
netdev_err(bp->dev, "bnxt_open() failed during FW reset\n");
bnxt_fw_reset_abort(bp, rc);
netdev_unlock(bp->dev);
+ rtnl_unlock();
goto ulp_start;
}
@@ -14988,6 +15005,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bnxt_dl_health_fw_status_update(bp, true);
}
netdev_unlock(bp->dev);
+ rtnl_unlock();
bnxt_ulp_start(bp, 0);
bnxt_reenable_sriov(bp);
netdev_lock(bp->dev);
@@ -15936,7 +15954,7 @@ err_reset:
rc);
napi_enable_locked(&bnapi->napi);
bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
- bnxt_reset_task(bp, true);
+ netif_close(dev);
return rc;
}
@@ -16752,6 +16770,7 @@ static int bnxt_resume(struct device *device)
struct bnxt *bp = netdev_priv(dev);
int rc = 0;
+ rtnl_lock();
netdev_lock(dev);
rc = pci_enable_device(bp->pdev);
if (rc) {
@@ -16796,6 +16815,7 @@ static int bnxt_resume(struct device *device)
resume_exit:
netdev_unlock(bp->dev);
+ rtnl_unlock();
bnxt_ulp_start(bp, rc);
if (!rc)
bnxt_reenable_sriov(bp);
@@ -16961,6 +16981,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
int err;
netdev_info(bp->dev, "PCI Slot Resume\n");
+ rtnl_lock();
netdev_lock(netdev);
err = bnxt_hwrm_func_qcaps(bp);
@@ -16978,6 +16999,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
netif_device_attach(netdev);
netdev_unlock(netdev);
+ rtnl_unlock();
bnxt_ulp_start(bp, err);
if (!err)
bnxt_reenable_sriov(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index a8e930d5dbb0..7564705d6478 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -20,6 +20,7 @@
#include <asm/byteorder.h>
#include <linux/bitmap.h>
#include <linux/auxiliary_bus.h>
+#include <net/netdev_lock.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
@@ -309,14 +310,12 @@ void bnxt_ulp_irq_stop(struct bnxt *bp)
if (!ulp->msix_requested)
return;
- netdev_lock(bp->dev);
- ops = rcu_dereference(ulp->ulp_ops);
+ ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev);
if (!ops || !ops->ulp_irq_stop)
return;
if (test_bit(BNXT_STATE_FW_RESET_DET, &bp->state))
reset = true;
ops->ulp_irq_stop(ulp->handle, reset);
- netdev_unlock(bp->dev);
}
}
@@ -335,8 +334,7 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err)
if (!ulp->msix_requested)
return;
- netdev_lock(bp->dev);
- ops = rcu_dereference(ulp->ulp_ops);
+ ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev);
if (!ops || !ops->ulp_irq_restart)
return;
@@ -348,7 +346,6 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err)
bnxt_fill_msix_vecs(bp, ent);
}
ops->ulp_irq_restart(ulp->handle, ent);
- netdev_unlock(bp->dev);
kfree(ent);
}
}
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 1fe8ec37491b..e1e8bd2ec155 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -997,22 +997,15 @@ static void macb_update_stats(struct macb *bp)
static int macb_halt_tx(struct macb *bp)
{
- unsigned long halt_time, timeout;
- u32 status;
+ u32 status;
macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(THALT));
- timeout = jiffies + usecs_to_jiffies(MACB_HALT_TIMEOUT);
- do {
- halt_time = jiffies;
- status = macb_readl(bp, TSR);
- if (!(status & MACB_BIT(TGO)))
- return 0;
-
- udelay(250);
- } while (time_before(halt_time, timeout));
-
- return -ETIMEDOUT;
+ /* Poll TSR until TGO is cleared or timeout. */
+ return read_poll_timeout_atomic(macb_readl, status,
+ !(status & MACB_BIT(TGO)),
+ 250, MACB_HALT_TIMEOUT, false,
+ bp, TSR);
}
static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budget)
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index 625245b0845c..eba73246f986 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -67,6 +67,8 @@
#define TSNEP_TX_TYPE_XDP_NDO_MAP_PAGE (TSNEP_TX_TYPE_XDP_NDO | TSNEP_TX_TYPE_MAP_PAGE)
#define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO)
#define TSNEP_TX_TYPE_XSK BIT(12)
+#define TSNEP_TX_TYPE_TSTAMP BIT(13)
+#define TSNEP_TX_TYPE_SKB_TSTAMP (TSNEP_TX_TYPE_SKB | TSNEP_TX_TYPE_TSTAMP)
#define TSNEP_XDP_TX BIT(0)
#define TSNEP_XDP_REDIRECT BIT(1)
@@ -386,8 +388,7 @@ static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length,
if (entry->skb) {
entry->properties = length & TSNEP_DESC_LENGTH_MASK;
entry->properties |= TSNEP_DESC_INTERRUPT_FLAG;
- if ((entry->type & TSNEP_TX_TYPE_SKB) &&
- (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS))
+ if ((entry->type & TSNEP_TX_TYPE_SKB_TSTAMP) == TSNEP_TX_TYPE_SKB_TSTAMP)
entry->properties |= TSNEP_DESC_EXTENDED_WRITEBACK_FLAG;
/* toggle user flag to prevent false acknowledge
@@ -479,7 +480,8 @@ static int tsnep_tx_map_frag(skb_frag_t *frag, struct tsnep_tx_entry *entry,
return mapped;
}
-static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count)
+static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count,
+ bool do_tstamp)
{
struct device *dmadev = tx->adapter->dmadev;
struct tsnep_tx_entry *entry;
@@ -505,6 +507,9 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count)
entry->type = TSNEP_TX_TYPE_SKB_INLINE;
mapped = 0;
}
+
+ if (do_tstamp)
+ entry->type |= TSNEP_TX_TYPE_TSTAMP;
} else {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
@@ -558,11 +563,12 @@ static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count)
static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
struct tsnep_tx *tx)
{
- int count = 1;
struct tsnep_tx_entry *entry;
+ bool do_tstamp = false;
+ int count = 1;
int length;
- int i;
int retval;
+ int i;
if (skb_shinfo(skb)->nr_frags > 0)
count += skb_shinfo(skb)->nr_frags;
@@ -579,7 +585,13 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
entry = &tx->entry[tx->write];
entry->skb = skb;
- retval = tsnep_tx_map(skb, tx, count);
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ tx->adapter->hwtstamp_config.tx_type == HWTSTAMP_TX_ON) {
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ do_tstamp = true;
+ }
+
+ retval = tsnep_tx_map(skb, tx, count, do_tstamp);
if (retval < 0) {
tsnep_tx_unmap(tx, tx->write, count);
dev_kfree_skb_any(entry->skb);
@@ -591,9 +603,6 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
}
length = retval;
- if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
- skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-
for (i = 0; i < count; i++)
tsnep_tx_activate(tx, (tx->write + i) & TSNEP_RING_MASK, length,
i == count - 1);
@@ -844,8 +853,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
length = tsnep_tx_unmap(tx, tx->read, count);
- if ((entry->type & TSNEP_TX_TYPE_SKB) &&
- (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) &&
+ if (((entry->type & TSNEP_TX_TYPE_SKB_TSTAMP) == TSNEP_TX_TYPE_SKB_TSTAMP) &&
(__le32_to_cpu(entry->desc_wb->properties) &
TSNEP_DESC_EXTENDED_WRITEBACK_FLAG)) {
struct skb_shared_hwtstamps hwtstamps;
diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c
index a0bcfb5a713d..ff3295b60a69 100644
--- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c
+++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c
@@ -61,6 +61,8 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type)
return -EBUSY;
}
+ netif_device_detach(priv->netdev);
+
priv->reset_type = type;
set_bit(HBG_NIC_STATE_RESETTING, &priv->state);
clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state);
@@ -91,6 +93,8 @@ static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type)
return ret;
}
+ netif_device_attach(priv->netdev);
+
dev_info(&priv->pdev->dev, "reset done\n");
return ret;
}
@@ -117,16 +121,13 @@ void hbg_err_reset(struct hbg_priv *priv)
if (running)
dev_close(priv->netdev);
- hbg_reset(priv);
-
- /* in hbg_pci_err_detected(), we will detach first,
- * so we need to attach before open
- */
- if (!netif_device_present(priv->netdev))
- netif_device_attach(priv->netdev);
+ if (hbg_reset(priv))
+ goto err_unlock;
if (running)
dev_open(priv->netdev, NULL);
+
+err_unlock:
rtnl_unlock();
}
@@ -160,7 +161,6 @@ static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev)
pci_save_state(pdev);
hbg_err_reset(priv);
- netif_device_attach(netdev);
return PCI_ERS_RESULT_RECOVERED;
}
diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c
index 8f1107b85fbb..55520053270a 100644
--- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c
@@ -317,6 +317,9 @@ static void hbg_update_stats_by_info(struct hbg_priv *priv,
const struct hbg_ethtool_stats *stats;
u32 i;
+ if (test_bit(HBG_NIC_STATE_RESETTING, &priv->state))
+ return;
+
for (i = 0; i < info_len; i++) {
stats = &info[i];
if (!stats->reg)
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index 22371011c249..2410aee59fb2 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -1321,12 +1321,18 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
*/
if (!primary_lag) {
lag->primary = true;
+ if (!ice_is_switchdev_running(lag->pf))
+ return;
+
/* Configure primary's SWID to be shared */
ice_lag_primary_swid(lag, true);
primary_lag = lag;
} else {
u16 swid;
+ if (!ice_is_switchdev_running(primary_lag->pf))
+ return;
+
swid = primary_lag->pf->hw.port_info->sw_id;
ice_lag_set_swid(swid, lag, true);
ice_lag_add_prune_list(primary_lag, lag->pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index 7c3006eb68dd..6446d0fcc052 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -4275,7 +4275,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg)
}
ice_vfhw_mac_add(vf, &al->list[i]);
- vf->num_mac++;
break;
}
diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
index aef0e9775a33..70dbf80f3bb7 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -143,6 +143,7 @@ enum idpf_vport_state {
* @vport_id: Vport identifier
* @link_speed_mbps: Link speed in mbps
* @vport_idx: Relative vport index
+ * @max_tx_hdr_size: Max header length hardware can support
* @state: See enum idpf_vport_state
* @netstats: Packet and byte stats
* @stats_lock: Lock to protect stats update
@@ -153,6 +154,7 @@ struct idpf_netdev_priv {
u32 vport_id;
u32 link_speed_mbps;
u16 vport_idx;
+ u16 max_tx_hdr_size;
enum idpf_vport_state state;
struct rtnl_link_stats64 netstats;
spinlock_t stats_lock;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 82f09b4030bc..3a033ce19cda 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -723,6 +723,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
np->vport = vport;
np->vport_idx = vport->idx;
np->vport_id = vport->vport_id;
+ np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter);
vport->netdev = netdev;
return idpf_init_mac_addr(vport, netdev);
@@ -740,6 +741,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
np->adapter = adapter;
np->vport_idx = vport->idx;
np->vport_id = vport->vport_id;
+ np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter);
spin_lock_init(&np->stats_lock);
@@ -2203,8 +2205,8 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb,
struct net_device *netdev,
netdev_features_t features)
{
- struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
- struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ u16 max_tx_hdr_size = np->max_tx_hdr_size;
size_t len;
/* No point in doing any of this if neither checksum nor GSO are
@@ -2227,7 +2229,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb,
goto unsupported;
len = skb_network_header_len(skb);
- if (unlikely(len > idpf_get_max_tx_hdr_size(adapter)))
+ if (unlikely(len > max_tx_hdr_size))
goto unsupported;
if (!skb->encapsulation)
@@ -2240,7 +2242,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb,
/* IPLEN can support at most 127 dwords */
len = skb_inner_network_header_len(skb);
- if (unlikely(len > idpf_get_max_tx_hdr_size(adapter)))
+ if (unlikely(len > max_tx_hdr_size))
goto unsupported;
/* No need to validate L4LEN as TCP is the only protocol with a
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index bdf52cef3891..2d5f5c9f91ce 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -4025,6 +4025,14 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
return budget;
}
+ /* Switch to poll mode in the tear-down path after sending disable
+ * queues virtchnl message, as the interrupts will be disabled after
+ * that.
+ */
+ if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE,
+ q_vector->tx[0])))
+ return budget;
+
work_done = min_t(int, work_done, budget - 1);
/* Exit the polling mode, but don't re-enable interrupts if stack might
@@ -4035,15 +4043,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
else
idpf_vport_intr_set_wb_on_itr(q_vector);
- /* Switch to poll mode in the tear-down path after sending disable
- * queues virtchnl message, as the interrupts will be disabled after
- * that
- */
- if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE,
- q_vector->tx[0])))
- return budget;
- else
- return work_done;
+ return work_done;
}
/**
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 0b27a695008b..971993586fb4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -717,6 +717,11 @@ int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat)
if (!is_lmac_valid(cgx, lmac_id))
return -ENODEV;
+
+ /* pass lmac as 0 for CGX_CMR_RX_STAT9-12 */
+ if (idx >= CGX_RX_STAT_GLOBAL_INDEX)
+ lmac_id = 0;
+
*rx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_STAT0 + (idx * 8));
return 0;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
index 7fa98aeb3663..4a3370a40dd8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
@@ -13,19 +13,26 @@
/* RVU LMTST */
#define LMT_TBL_OP_READ 0
#define LMT_TBL_OP_WRITE 1
-#define LMT_MAP_TABLE_SIZE (128 * 1024)
#define LMT_MAPTBL_ENTRY_SIZE 16
+#define LMT_MAX_VFS 256
+
+#define LMT_MAP_ENTRY_ENA BIT_ULL(20)
+#define LMT_MAP_ENTRY_LINES GENMASK_ULL(18, 16)
/* Function to perform operations (read/write) on lmtst map table */
static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
int lmt_tbl_op)
{
void __iomem *lmt_map_base;
- u64 tbl_base;
+ u64 tbl_base, cfg;
+ int pfs, vfs;
tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE);
+ cfg = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG);
+ vfs = 1 << (cfg & 0xF);
+ pfs = 1 << ((cfg >> 4) & 0x7);
- lmt_map_base = ioremap_wc(tbl_base, LMT_MAP_TABLE_SIZE);
+ lmt_map_base = ioremap_wc(tbl_base, pfs * vfs * LMT_MAPTBL_ENTRY_SIZE);
if (!lmt_map_base) {
dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n");
return -ENOMEM;
@@ -35,6 +42,13 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
*val = readq(lmt_map_base + index);
} else {
writeq((*val), (lmt_map_base + index));
+
+ cfg = FIELD_PREP(LMT_MAP_ENTRY_ENA, 0x1);
+ /* 2048 LMTLINES */
+ cfg |= FIELD_PREP(LMT_MAP_ENTRY_LINES, 0x6);
+
+ writeq(cfg, (lmt_map_base + (index + 8)));
+
/* Flushing the AP interceptor cache to make APR_LMT_MAP_ENTRY_S
* changes effective. Write 1 for flush and read is being used as a
* barrier and sets up a data dependency. Write to 0 after a write
@@ -52,7 +66,7 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
#define LMT_MAP_TBL_W1_OFF 8
static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc)
{
- return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) +
+ return ((rvu_get_pf(pcifunc) * LMT_MAX_VFS) +
(pcifunc & RVU_PFVF_FUNC_MASK)) * LMT_MAPTBL_ENTRY_SIZE;
}
@@ -69,7 +83,7 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc,
mutex_lock(&rvu->rsrc_lock);
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova);
- pf = rvu_get_pf(pcifunc) & 0x1F;
+ pf = rvu_get_pf(pcifunc) & RVU_PFVF_PF_MASK;
val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 |
((pcifunc & RVU_PFVF_FUNC_MASK) & 0xFF);
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TXN_REQ, val);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index a1f9ec03c2ce..c827da626471 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -553,6 +553,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
u64 lmt_addr, val, tbl_base;
int pf, vf, num_vfs, hw_vfs;
void __iomem *lmt_map_base;
+ int apr_pfs, apr_vfs;
int buf_size = 10240;
size_t off = 0;
int index = 0;
@@ -568,8 +569,12 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
return -ENOMEM;
tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE);
+ val = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG);
+ apr_vfs = 1 << (val & 0xF);
+ apr_pfs = 1 << ((val >> 4) & 0x7);
- lmt_map_base = ioremap_wc(tbl_base, 128 * 1024);
+ lmt_map_base = ioremap_wc(tbl_base, apr_pfs * apr_vfs *
+ LMT_MAPTBL_ENTRY_SIZE);
if (!lmt_map_base) {
dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n");
kfree(buf);
@@ -591,7 +596,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d \t\t\t",
pf);
- index = pf * rvu->hw->total_vfs * LMT_MAPTBL_ENTRY_SIZE;
+ index = pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE;
off += scnprintf(&buf[off], buf_size - 1 - off, " 0x%llx\t\t",
(tbl_base + index));
lmt_addr = readq(lmt_map_base + index);
@@ -604,7 +609,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
/* Reading num of VFs per PF */
rvu_get_pf_numvfs(rvu, pf, &num_vfs, &hw_vfs);
for (vf = 0; vf < num_vfs; vf++) {
- index = (pf * rvu->hw->total_vfs * 16) +
+ index = (pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE) +
((vf + 1) * LMT_MAPTBL_ENTRY_SIZE);
off += scnprintf(&buf[off], buf_size - 1 - off,
"PF%d:VF%d \t\t", pf, vf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c
index f3b9daffaec3..4c7e0f345cb5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c
@@ -531,7 +531,8 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf,
if (sw_tx_sc->encrypt)
sectag_tci |= (MCS_TCI_E | MCS_TCI_C);
- policy = FIELD_PREP(MCS_TX_SECY_PLCY_MTU, secy->netdev->mtu);
+ policy = FIELD_PREP(MCS_TX_SECY_PLCY_MTU,
+ pfvf->netdev->mtu + OTX2_ETH_HLEN);
/* Write SecTag excluding AN bits(1..0) */
policy |= FIELD_PREP(MCS_TX_SECY_PLCY_ST_TCI, sectag_tci >> 2);
policy |= FIELD_PREP(MCS_TX_SECY_PLCY_ST_OFFSET, tag_offset);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 1e88422825be..d6b4b74e4002 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -356,6 +356,7 @@ struct otx2_flow_config {
struct list_head flow_list_tc;
u8 ucast_flt_cnt;
bool ntuple;
+ u16 ntuple_cnt;
};
struct dev_hw_ops {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
index 33ec9a7f7c03..e13ae5484c19 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
@@ -41,6 +41,7 @@ static int otx2_dl_mcam_count_set(struct devlink *devlink, u32 id,
if (!pfvf->flow_cfg)
return 0;
+ pfvf->flow_cfg->ntuple_cnt = ctx->val.vu16;
otx2_alloc_mcam_entries(pfvf, ctx->val.vu16);
return 0;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index 010385b29988..45b8c9230184 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -315,7 +315,7 @@ static void otx2_get_pauseparam(struct net_device *netdev,
struct otx2_nic *pfvf = netdev_priv(netdev);
struct cgx_pause_frm_cfg *req, *rsp;
- if (is_otx2_lbkvf(pfvf->pdev))
+ if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev))
return;
mutex_lock(&pfvf->mbox.lock);
@@ -347,7 +347,7 @@ static int otx2_set_pauseparam(struct net_device *netdev,
if (pause->autoneg)
return -EOPNOTSUPP;
- if (is_otx2_lbkvf(pfvf->pdev))
+ if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev))
return -EOPNOTSUPP;
if (pause->rx_pause)
@@ -941,8 +941,8 @@ static u32 otx2_get_link(struct net_device *netdev)
{
struct otx2_nic *pfvf = netdev_priv(netdev);
- /* LBK link is internal and always UP */
- if (is_otx2_lbkvf(pfvf->pdev))
+ /* LBK and SDP links are internal and always UP */
+ if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev))
return 1;
return pfvf->linfo.link_up;
}
@@ -1413,7 +1413,7 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev,
{
struct otx2_nic *pfvf = netdev_priv(netdev);
- if (is_otx2_lbkvf(pfvf->pdev)) {
+ if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) {
cmd->base.duplex = DUPLEX_FULL;
cmd->base.speed = SPEED_100000;
} else {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
index 47bfd1fb37d4..64c6d9162ef6 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
@@ -247,7 +247,7 @@ int otx2_mcam_entry_init(struct otx2_nic *pfvf)
mutex_unlock(&pfvf->mbox.lock);
/* Allocate entries for Ntuple filters */
- count = otx2_alloc_mcam_entries(pfvf, OTX2_DEFAULT_FLOWCOUNT);
+ count = otx2_alloc_mcam_entries(pfvf, flow_cfg->ntuple_cnt);
if (count <= 0) {
otx2_clear_ntuple_flow_info(pfvf, flow_cfg);
return 0;
@@ -307,6 +307,7 @@ int otx2_mcam_flow_init(struct otx2_nic *pf)
INIT_LIST_HEAD(&pf->flow_cfg->flow_list_tc);
pf->flow_cfg->ucast_flt_cnt = OTX2_DEFAULT_UNICAST_FLOWS;
+ pf->flow_cfg->ntuple_cnt = OTX2_DEFAULT_FLOWCOUNT;
/* Allocate bare minimum number of MCAM entries needed for
* unicast and ntuple filters.
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 7ef3ba477d49..9b28be4c4a5d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -729,9 +729,12 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
}
#ifdef CONFIG_DCB
- err = otx2_dcbnl_set_ops(netdev);
- if (err)
- goto err_free_zc_bmap;
+ /* Priority flow control is not supported for LBK and SDP vf(s) */
+ if (!(is_otx2_lbkvf(vf->pdev) || is_otx2_sdp_rep(vf->pdev))) {
+ err = otx2_dcbnl_set_ops(netdev);
+ if (err)
+ goto err_free_zc_bmap;
+ }
#endif
otx2_qos_init(vf, qos_txqs);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 22a532695fb0..6c92072b4c28 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -4748,7 +4748,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
}
if (mtk_is_netsys_v3_or_greater(mac->hw) &&
- MTK_HAS_CAPS(mac->hw->soc->caps, MTK_ESW_BIT) &&
+ MTK_HAS_CAPS(mac->hw->soc->caps, MTK_ESW) &&
id == MTK_GMAC1_ID) {
mac->phylink_config.mac_capabilities = MAC_ASYM_PAUSE |
MAC_SYM_PAUSE |
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3506024c2453..9bd166f489e7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4349,6 +4349,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev
if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n");
+ features &= ~NETIF_F_HW_MACSEC;
+ if (netdev->features & NETIF_F_HW_MACSEC)
+ netdev_warn(netdev, "Disabling HW MACsec offload, not supported in switchdev mode\n");
+
return features;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 464821dd492d..a2033837182e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3014,6 +3014,9 @@ static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
.rif = rif,
};
+ if (!mlxsw_sp_dev_lower_is_port(mlxsw_sp_rif_dev(rif)))
+ return 0;
+
neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
if (rms.err)
goto err_arp;
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index e2d6bfb5d693..a70b88037a20 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -3495,6 +3495,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
struct pci_dev *pdev)
{
struct lan743x_tx *tx;
+ u32 sgmii_ctl;
int index;
int ret;
@@ -3507,6 +3508,15 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
spin_lock_init(&adapter->eth_syslock_spinlock);
mutex_init(&adapter->sgmii_rw_lock);
pci11x1x_set_rfe_rd_fifo_threshold(adapter);
+ sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL);
+ if (adapter->is_sgmii_en) {
+ sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_;
+ sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_;
+ } else {
+ sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_;
+ sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_;
+ }
+ lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl);
} else {
adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS;
adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS;
@@ -3558,7 +3568,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
static int lan743x_mdiobus_init(struct lan743x_adapter *adapter)
{
- u32 sgmii_ctl;
int ret;
adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev);
@@ -3570,10 +3579,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter)
adapter->mdiobus->priv = (void *)adapter;
if (adapter->is_pci11x1x) {
if (adapter->is_sgmii_en) {
- sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL);
- sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_;
- sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_;
- lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl);
netif_dbg(adapter, drv, adapter->netdev,
"SGMII operation\n");
adapter->mdiobus->read = lan743x_mdiobus_read_c22;
@@ -3584,10 +3589,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter)
netif_dbg(adapter, drv, adapter->netdev,
"lan743x-mdiobus-c45\n");
} else {
- sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL);
- sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_;
- sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_;
- lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl);
netif_dbg(adapter, drv, adapter->netdev,
"RGMII operation\n");
// Only C22 support when RGMII I/F
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 99df00c30b8c..b5d744d2586f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -203,7 +203,7 @@ static struct pci_driver qede_pci_driver = {
};
static struct qed_eth_cb_ops qede_ll_ops = {
- {
+ .common = {
#ifdef CONFIG_RFS_ACCEL
.arfs_filter_op = qede_arfs_filter_op,
#endif
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 28d24d59efb8..d57b976b9040 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -1484,8 +1484,11 @@ static int qlcnic_sriov_channel_cfg_cmd(struct qlcnic_adapter *adapter, u8 cmd_o
}
cmd_op = (cmd.rsp.arg[0] & 0xff);
- if (cmd.rsp.arg[0] >> 25 == 2)
- return 2;
+ if (cmd.rsp.arg[0] >> 25 == 2) {
+ ret = 2;
+ goto out;
+ }
+
if (cmd_op == QLCNIC_BC_CMD_CHANNEL_INIT)
set_bit(QLC_BC_VF_STATE, &vf->state);
else
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 85723a78793a..6c7e8655a7eb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -964,7 +964,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev,
/* of_mdio_parse_addr returns a valid (0 ~ 31) PHY
* address. No need to mask it again.
*/
- reg |= 1 << H3_EPHY_ADDR_SHIFT;
+ reg |= ret << H3_EPHY_ADDR_SHIFT;
} else {
/* For SoCs without internal PHY the PHY selection bit should be
* set to 0 (external PHY).
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 1e6d2335293d..30665ffe78cf 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2685,7 +2685,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
port->slave.mac_addr);
if (!is_valid_ether_addr(port->slave.mac_addr)) {
eth_random_addr(port->slave.mac_addr);
- dev_err(dev, "Use random MAC address\n");
+ dev_info(dev, "Use random MAC address\n");
}
}
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index aed45abafb1b..490d34233d38 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -434,14 +434,20 @@ static int wx_host_interface_command_r(struct wx *wx, u32 *buffer,
wr32m(wx, WX_SW2FW_MBOX_CMD, WX_SW2FW_MBOX_CMD_VLD, WX_SW2FW_MBOX_CMD_VLD);
/* polling reply from FW */
- err = read_poll_timeout(wx_poll_fw_reply, reply, reply, 1000, 50000,
- true, wx, buffer, send_cmd);
+ err = read_poll_timeout(wx_poll_fw_reply, reply, reply, 2000,
+ timeout * 1000, true, wx, buffer, send_cmd);
if (err) {
wx_err(wx, "Polling from FW messages timeout, cmd: 0x%x, index: %d\n",
send_cmd, wx->swfw_index);
goto rel_out;
}
+ if (hdr->cmd_or_resp.ret_status == 0x80) {
+ wx_err(wx, "Unknown FW command: 0x%x\n", send_cmd);
+ err = -EINVAL;
+ goto rel_out;
+ }
+
/* expect no reply from FW then return */
if (!return_data)
goto rel_out;
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
index 4b9921b7bb11..a054b259d435 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
@@ -99,9 +99,15 @@ static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum)
}
local_buffer = eeprom_ptrs;
- for (i = 0; i < TXGBE_EEPROM_LAST_WORD; i++)
+ for (i = 0; i < TXGBE_EEPROM_LAST_WORD; i++) {
+ if (wx->mac.type == wx_mac_aml) {
+ if (i >= TXGBE_EEPROM_I2C_SRART_PTR &&
+ i < TXGBE_EEPROM_I2C_END_PTR)
+ local_buffer[i] = 0xffff;
+ }
if (i != wx->eeprom.sw_region_offset + TXGBE_EEPROM_CHECKSUM)
*checksum += local_buffer[i];
+ }
kvfree(eeprom_ptrs);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
index 8658a51ee810..f2c2bd257e39 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
@@ -184,8 +184,8 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe)
goto skip_sp_irq;
txgbe->misc.nirqs = 1;
- txgbe->misc.domain = irq_domain_add_simple(NULL, txgbe->misc.nirqs, 0,
- &txgbe_misc_irq_domain_ops, txgbe);
+ txgbe->misc.domain = irq_domain_create_simple(NULL, txgbe->misc.nirqs, 0,
+ &txgbe_misc_irq_domain_ops, txgbe);
if (!txgbe->misc.domain)
return -ENOMEM;
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
index 9c1c26234cad..f423012dec22 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -158,6 +158,8 @@
#define TXGBE_EEPROM_VERSION_L 0x1D
#define TXGBE_EEPROM_VERSION_H 0x1E
#define TXGBE_ISCSI_BOOT_CONFIG 0x07
+#define TXGBE_EEPROM_I2C_SRART_PTR 0x580
+#define TXGBE_EEPROM_I2C_END_PTR 0x800
#define TXGBE_MAX_MSIX_VECTORS 64
#define TXGBE_MAX_FDIR_INDICES 63
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 70f7cb383228..cb6f5482d203 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -158,7 +158,6 @@ struct hv_netvsc_packet {
u8 cp_partial; /* partial copy into send buffer */
u8 rmsg_size; /* RNDIS header and PPI size */
- u8 rmsg_pgcnt; /* page count of RNDIS header and PPI */
u8 page_buf_cnt;
u16 q_idx;
@@ -893,6 +892,18 @@ struct nvsp_message {
sizeof(struct nvsp_message))
#define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor)
+/* Maximum # of contiguous data ranges that can make up a trasmitted packet.
+ * Typically it's the max SKB fragments plus 2 for the rndis packet and the
+ * linear portion of the SKB. But if MAX_SKB_FRAGS is large, the value may
+ * need to be limited to MAX_PAGE_BUFFER_COUNT, which is the max # of entries
+ * in a GPA direct packet sent to netvsp over VMBus.
+ */
+#if MAX_SKB_FRAGS + 2 < MAX_PAGE_BUFFER_COUNT
+#define MAX_DATA_RANGES (MAX_SKB_FRAGS + 2)
+#else
+#define MAX_DATA_RANGES MAX_PAGE_BUFFER_COUNT
+#endif
+
/* Estimated requestor size:
* out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size
*/
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index d6f5b9ea3109..720104661d7f 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -953,8 +953,7 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
+ pend_size;
int i;
u32 padding = 0;
- u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
- packet->page_buf_cnt;
+ u32 page_count = packet->cp_partial ? 1 : packet->page_buf_cnt;
u32 remain;
/* Add padding */
@@ -1055,6 +1054,42 @@ static int netvsc_dma_map(struct hv_device *hv_dev,
return 0;
}
+/* Build an "array" of mpb entries describing the data to be transferred
+ * over VMBus. After the desc header fields, each "array" entry is variable
+ * size, and each entry starts after the end of the previous entry. The
+ * "offset" and "len" fields for each entry imply the size of the entry.
+ *
+ * The pfns are in HV_HYP_PAGE_SIZE, because all communication with Hyper-V
+ * uses that granularity, even if the system page size of the guest is larger.
+ * Each entry in the input "pb" array must describe a contiguous range of
+ * guest physical memory so that the pfns are sequential if the range crosses
+ * a page boundary. The offset field must be < HV_HYP_PAGE_SIZE.
+ */
+static inline void netvsc_build_mpb_array(struct hv_page_buffer *pb,
+ u32 page_buffer_count,
+ struct vmbus_packet_mpb_array *desc,
+ u32 *desc_size)
+{
+ struct hv_mpb_array *mpb_entry = &desc->range;
+ int i, j;
+
+ for (i = 0; i < page_buffer_count; i++) {
+ u32 offset = pb[i].offset;
+ u32 len = pb[i].len;
+
+ mpb_entry->offset = offset;
+ mpb_entry->len = len;
+
+ for (j = 0; j < HVPFN_UP(offset + len); j++)
+ mpb_entry->pfn_array[j] = pb[i].pfn + j;
+
+ mpb_entry = (struct hv_mpb_array *)&mpb_entry->pfn_array[j];
+ }
+
+ desc->rangecount = page_buffer_count;
+ *desc_size = (char *)mpb_entry - (char *)desc;
+}
+
static inline int netvsc_send_pkt(
struct hv_device *device,
struct hv_netvsc_packet *packet,
@@ -1097,8 +1132,11 @@ static inline int netvsc_send_pkt(
packet->dma_range = NULL;
if (packet->page_buf_cnt) {
+ struct vmbus_channel_packet_page_buffer desc;
+ u32 desc_size;
+
if (packet->cp_partial)
- pb += packet->rmsg_pgcnt;
+ pb++;
ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
if (ret) {
@@ -1106,11 +1144,12 @@ static inline int netvsc_send_pkt(
goto exit;
}
- ret = vmbus_sendpacket_pagebuffer(out_channel,
- pb, packet->page_buf_cnt,
- &nvmsg, sizeof(nvmsg),
- req_id);
-
+ netvsc_build_mpb_array(pb, packet->page_buf_cnt,
+ (struct vmbus_packet_mpb_array *)&desc,
+ &desc_size);
+ ret = vmbus_sendpacket_mpb_desc(out_channel,
+ (struct vmbus_packet_mpb_array *)&desc,
+ desc_size, &nvmsg, sizeof(nvmsg), req_id);
if (ret)
netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
} else {
@@ -1259,7 +1298,7 @@ int netvsc_send(struct net_device *ndev,
packet->send_buf_index = section_index;
if (packet->cp_partial) {
- packet->page_buf_cnt -= packet->rmsg_pgcnt;
+ packet->page_buf_cnt--;
packet->total_data_buflen = msd_len + packet->rmsg_size;
} else {
packet->page_buf_cnt = 0;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index c51b318b8a72..d8b169ac0343 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -326,43 +326,10 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
return txq;
}
-static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len,
- struct hv_page_buffer *pb)
-{
- int j = 0;
-
- hvpfn += offset >> HV_HYP_PAGE_SHIFT;
- offset = offset & ~HV_HYP_PAGE_MASK;
-
- while (len > 0) {
- unsigned long bytes;
-
- bytes = HV_HYP_PAGE_SIZE - offset;
- if (bytes > len)
- bytes = len;
- pb[j].pfn = hvpfn;
- pb[j].offset = offset;
- pb[j].len = bytes;
-
- offset += bytes;
- len -= bytes;
-
- if (offset == HV_HYP_PAGE_SIZE && len) {
- hvpfn++;
- offset = 0;
- j++;
- }
- }
-
- return j + 1;
-}
-
static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
struct hv_netvsc_packet *packet,
struct hv_page_buffer *pb)
{
- u32 slots_used = 0;
- char *data = skb->data;
int frags = skb_shinfo(skb)->nr_frags;
int i;
@@ -371,28 +338,27 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
* 2. skb linear data
* 3. skb fragment data
*/
- slots_used += fill_pg_buf(virt_to_hvpfn(hdr),
- offset_in_hvpage(hdr),
- len,
- &pb[slots_used]);
+ pb[0].offset = offset_in_hvpage(hdr);
+ pb[0].len = len;
+ pb[0].pfn = virt_to_hvpfn(hdr);
packet->rmsg_size = len;
- packet->rmsg_pgcnt = slots_used;
- slots_used += fill_pg_buf(virt_to_hvpfn(data),
- offset_in_hvpage(data),
- skb_headlen(skb),
- &pb[slots_used]);
+ pb[1].offset = offset_in_hvpage(skb->data);
+ pb[1].len = skb_headlen(skb);
+ pb[1].pfn = virt_to_hvpfn(skb->data);
for (i = 0; i < frags; i++) {
skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+ struct hv_page_buffer *cur_pb = &pb[i + 2];
+ u64 pfn = page_to_hvpfn(skb_frag_page(frag));
+ u32 offset = skb_frag_off(frag);
- slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)),
- skb_frag_off(frag),
- skb_frag_size(frag),
- &pb[slots_used]);
+ cur_pb->offset = offset_in_hvpage(offset);
+ cur_pb->len = skb_frag_size(frag);
+ cur_pb->pfn = pfn + (offset >> HV_HYP_PAGE_SHIFT);
}
- return slots_used;
+ return frags + 2;
}
static int count_skb_frag_slots(struct sk_buff *skb)
@@ -483,7 +449,7 @@ static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx)
struct net_device *vf_netdev;
u32 rndis_msg_size;
u32 hash;
- struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT];
+ struct hv_page_buffer pb[MAX_DATA_RANGES];
/* If VF is present and up then redirect packets to it.
* Skip the VF if it is marked down or has no carrier.
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 82747dfacd70..9e73959e61ee 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -225,8 +225,7 @@ static int rndis_filter_send_request(struct rndis_device *dev,
struct rndis_request *req)
{
struct hv_netvsc_packet *packet;
- struct hv_page_buffer page_buf[2];
- struct hv_page_buffer *pb = page_buf;
+ struct hv_page_buffer pb;
int ret;
/* Setup the packet to send it */
@@ -235,27 +234,14 @@ static int rndis_filter_send_request(struct rndis_device *dev,
packet->total_data_buflen = req->request_msg.msg_len;
packet->page_buf_cnt = 1;
- pb[0].pfn = virt_to_phys(&req->request_msg) >>
- HV_HYP_PAGE_SHIFT;
- pb[0].len = req->request_msg.msg_len;
- pb[0].offset = offset_in_hvpage(&req->request_msg);
-
- /* Add one page_buf when request_msg crossing page boundary */
- if (pb[0].offset + pb[0].len > HV_HYP_PAGE_SIZE) {
- packet->page_buf_cnt++;
- pb[0].len = HV_HYP_PAGE_SIZE -
- pb[0].offset;
- pb[1].pfn = virt_to_phys((void *)&req->request_msg
- + pb[0].len) >> HV_HYP_PAGE_SHIFT;
- pb[1].offset = 0;
- pb[1].len = req->request_msg.msg_len -
- pb[0].len;
- }
+ pb.pfn = virt_to_phys(&req->request_msg) >> HV_HYP_PAGE_SHIFT;
+ pb.len = req->request_msg.msg_len;
+ pb.offset = offset_in_hvpage(&req->request_msg);
trace_rndis_send(dev->ndev, 0, &req->request_msg);
rcu_read_lock_bh();
- ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL, false);
+ ret = netvsc_send(dev->ndev, packet, NULL, &pb, NULL, false);
rcu_read_unlock_bh();
return ret;
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 24882d30f685..e2c6569d8c45 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -2027,12 +2027,6 @@ static int ksz9477_config_init(struct phy_device *phydev)
return err;
}
- /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes
- * in this switch shall be regarded as broken.
- */
- if (phydev->dev_flags & MICREL_NO_EEE)
- phy_disable_eee(phydev);
-
return kszphy_config_init(phydev);
}
@@ -5705,7 +5699,6 @@ static struct phy_driver ksphy_driver[] = {
.handle_interrupt = kszphy_handle_interrupt,
.suspend = genphy_suspend,
.resume = ksz9477_resume,
- .get_features = ksz9477_get_features,
} };
module_phy_driver(ksphy_driver);
diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index d8fc0c79745d..b75ceb90359f 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -1778,8 +1778,8 @@ static void team_change_rx_flags(struct net_device *dev, int change)
struct team_port *port;
int inc;
- rcu_read_lock();
- list_for_each_entry_rcu(port, &team->port_list, list) {
+ mutex_lock(&team->lock);
+ list_for_each_entry(port, &team->port_list, list) {
if (change & IFF_PROMISC) {
inc = dev->flags & IFF_PROMISC ? 1 : -1;
dev_set_promiscuity(port->dev, inc);
@@ -1789,7 +1789,7 @@ static void team_change_rx_flags(struct net_device *dev, int change)
dev_set_allmulti(port->dev, inc);
}
}
- rcu_read_unlock();
+ mutex_unlock(&team->lock);
}
static void team_set_rx_mode(struct net_device *dev)
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index e4f1663b6204..3e8025a71fcb 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2456,14 +2456,11 @@ static struct irq_chip lan78xx_irqchip = {
static int lan78xx_setup_irq_domain(struct lan78xx_net *dev)
{
- struct device_node *of_node;
struct irq_domain *irqdomain;
unsigned int irqmap = 0;
u32 buf;
int ret = 0;
- of_node = dev->udev->dev.parent->of_node;
-
mutex_init(&dev->domain_data.irq_lock);
ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf);
@@ -2475,8 +2472,10 @@ static int lan78xx_setup_irq_domain(struct lan78xx_net *dev)
dev->domain_data.irqchip = &lan78xx_irqchip;
dev->domain_data.irq_handler = handle_simple_irq;
- irqdomain = irq_domain_add_simple(of_node, MAX_INT_EP, 0,
- &chip_domain_ops, &dev->domain_data);
+ irqdomain = irq_domain_create_simple(of_fwnode_handle(dev->udev->dev.parent->of_node),
+ MAX_INT_EP, 0,
+ &chip_domain_ops,
+ &dev->domain_data);
if (irqdomain) {
/* create mapping for PHY interrupt */
irqmap = irq_create_mapping(irqdomain, INT_EP_PHY);
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 3df6aabc7e33..2440e30c5bd1 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -27,6 +27,10 @@
#include <linux/module.h>
#include <net/ip6_checksum.h>
+#ifdef CONFIG_X86
+#include <asm/msr.h>
+#endif
+
#include "vmxnet3_int.h"
#include "vmxnet3_xdp.h"
@@ -3607,8 +3611,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
int err = 0;
- WRITE_ONCE(netdev->mtu, new_mtu);
-
/*
* Reset_work may be in the middle of resetting the device, wait for its
* completion.
@@ -3622,6 +3624,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
/* we need to re-create the rx queue based on the new mtu */
vmxnet3_rq_destroy_all(adapter);
+ WRITE_ONCE(netdev->mtu, new_mtu);
vmxnet3_adjust_rx_ring_size(adapter);
err = vmxnet3_rq_create_all(adapter);
if (err) {
@@ -3638,6 +3641,8 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
"Closing it\n", err);
goto out;
}
+ } else {
+ WRITE_ONCE(netdev->mtu, new_mtu);
}
out:
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 844af16ee551..35b4ec91979e 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -1011,6 +1011,7 @@ void mt76_dma_cleanup(struct mt76_dev *dev)
int i;
mt76_worker_disable(&dev->tx_worker);
+ napi_disable(&dev->tx_napi);
netif_napi_del(&dev->tx_napi);
for (i = 0; i < ARRAY_SIZE(dev->phys); i++) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
index e61da76b2097..14b1f603fb62 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
@@ -1924,14 +1924,14 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy,
mt7925_mcu_sta_mld_tlv(skb, info->vif, info->link_sta->sta);
mt7925_mcu_sta_eht_mld_tlv(skb, info->vif, info->link_sta->sta);
}
-
- mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->link_sta);
}
if (!info->enable) {
mt7925_mcu_sta_remove_tlv(skb);
mt76_connac_mcu_add_tlv(skb, STA_REC_MLD_OFF,
sizeof(struct tlv));
+ } else {
+ mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->link_sta);
}
return mt76_mcu_skb_send_msg(dev, skb, info->cmd, true);
diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c
index 6295e55ef85e..368f6d894bba 100644
--- a/drivers/ntb/msi.c
+++ b/drivers/ntb/msi.c
@@ -106,10 +106,10 @@ int ntb_msi_setup_mws(struct ntb_dev *ntb)
if (!ntb->msi)
return -EINVAL;
- msi_lock_descs(&ntb->pdev->dev);
- desc = msi_first_desc(&ntb->pdev->dev, MSI_DESC_ASSOCIATED);
- addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32);
- msi_unlock_descs(&ntb->pdev->dev);
+ scoped_guard (msi_descs_lock, &ntb->pdev->dev) {
+ desc = msi_first_desc(&ntb->pdev->dev, MSI_DESC_ASSOCIATED);
+ addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32);
+ }
for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
@@ -289,7 +289,7 @@ int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
if (!ntb->msi)
return -EINVAL;
- msi_lock_descs(dev);
+ guard(msi_descs_lock)(dev);
msi_for_each_desc(entry, dev, MSI_DESC_ASSOCIATED) {
if (irq_has_action(entry->irq))
continue;
@@ -307,17 +307,11 @@ int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
ret = ntbm_msi_setup_callback(ntb, entry, msi_desc);
if (ret) {
devm_free_irq(&ntb->dev, entry->irq, dev_id);
- goto unlock;
+ return ret;
}
-
- ret = entry->irq;
- goto unlock;
+ return entry->irq;
}
- ret = -ENODEV;
-
-unlock:
- msi_unlock_descs(dev);
- return ret;
+ return -ENODEV;
}
EXPORT_SYMBOL(ntbm_msi_request_threaded_irq);
diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c
index 2c092ec8c0a9..3b6d759bcdf2 100644
--- a/drivers/nvme/common/auth.c
+++ b/drivers/nvme/common/auth.c
@@ -242,7 +242,7 @@ struct nvme_dhchap_key *nvme_auth_transform_key(
{
const char *hmac_name;
struct crypto_shash *key_tfm;
- struct shash_desc *shash;
+ SHASH_DESC_ON_STACK(shash, key_tfm);
struct nvme_dhchap_key *transformed_key;
int ret, key_len;
@@ -267,19 +267,11 @@ struct nvme_dhchap_key *nvme_auth_transform_key(
if (IS_ERR(key_tfm))
return ERR_CAST(key_tfm);
- shash = kmalloc(sizeof(struct shash_desc) +
- crypto_shash_descsize(key_tfm),
- GFP_KERNEL);
- if (!shash) {
- ret = -ENOMEM;
- goto out_free_key;
- }
-
key_len = crypto_shash_digestsize(key_tfm);
transformed_key = nvme_auth_alloc_key(key_len, key->hash);
if (!transformed_key) {
ret = -ENOMEM;
- goto out_free_shash;
+ goto out_free_key;
}
shash->tfm = key_tfm;
@@ -299,15 +291,12 @@ struct nvme_dhchap_key *nvme_auth_transform_key(
if (ret < 0)
goto out_free_transformed_key;
- kfree(shash);
crypto_free_shash(key_tfm);
return transformed_key;
out_free_transformed_key:
nvme_auth_free_key(transformed_key);
-out_free_shash:
- kfree(shash);
out_free_key:
crypto_free_shash(key_tfm);
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index 6115fef74c1e..f6ddbe553289 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -31,6 +31,7 @@ struct nvme_dhchap_queue_context {
u32 s1;
u32 s2;
bool bi_directional;
+ bool authenticated;
u16 transaction;
u8 status;
u8 dhgroup_id;
@@ -682,6 +683,7 @@ static void nvme_auth_reset_dhchap(struct nvme_dhchap_queue_context *chap)
static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap)
{
nvme_auth_reset_dhchap(chap);
+ chap->authenticated = false;
if (chap->shash_tfm)
crypto_free_shash(chap->shash_tfm);
if (chap->dh_tfm)
@@ -930,12 +932,14 @@ static void nvme_queue_auth_work(struct work_struct *work)
}
if (!ret) {
chap->error = 0;
+ chap->authenticated = true;
if (ctrl->opts->concat &&
(ret = nvme_auth_secure_concat(ctrl, chap))) {
dev_warn(ctrl->device,
"%s: qid %d failed to enable secure concatenation\n",
__func__, chap->qid);
chap->error = ret;
+ chap->authenticated = false;
}
return;
}
@@ -1023,13 +1027,16 @@ static void nvme_ctrl_auth_work(struct work_struct *work)
return;
for (q = 1; q < ctrl->queue_count; q++) {
- ret = nvme_auth_negotiate(ctrl, q);
- if (ret) {
- dev_warn(ctrl->device,
- "qid %d: error %d setting up authentication\n",
- q, ret);
- break;
- }
+ struct nvme_dhchap_queue_context *chap =
+ &ctrl->dhchap_ctxs[q];
+ /*
+ * Skip re-authentication if the queue had
+ * not been authenticated initially.
+ */
+ if (!chap->authenticated)
+ continue;
+ cancel_work_sync(&chap->auth_work);
+ queue_work(nvme_auth_wq, &chap->auth_work);
}
/*
@@ -1037,7 +1044,13 @@ static void nvme_ctrl_auth_work(struct work_struct *work)
* the controller terminates the connection.
*/
for (q = 1; q < ctrl->queue_count; q++) {
- ret = nvme_auth_wait(ctrl, q);
+ struct nvme_dhchap_queue_context *chap =
+ &ctrl->dhchap_ctxs[q];
+ if (!chap->authenticated)
+ continue;
+ flush_work(&chap->auth_work);
+ ret = chap->error;
+ nvme_auth_reset_dhchap(chap);
if (ret)
dev_warn(ctrl->device,
"qid %d: authentication failed\n", q);
@@ -1076,6 +1089,7 @@ int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
chap = &ctrl->dhchap_ctxs[i];
chap->qid = i;
chap->ctrl = ctrl;
+ chap->authenticated = false;
INIT_WORK(&chap->auth_work, nvme_queue_auth_work);
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ac53629fce68..f69a232a000a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -38,6 +38,8 @@ struct nvme_ns_info {
u32 nsid;
__le32 anagrpid;
u8 pi_offset;
+ u16 endgid;
+ u64 runs;
bool is_shared;
bool is_readonly;
bool is_ready;
@@ -150,6 +152,8 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid);
static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
struct nvme_command *cmd);
+static int nvme_get_log_lsi(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page,
+ u8 lsp, u8 csi, void *log, size_t size, u64 offset, u16 lsi);
void nvme_queue_scan(struct nvme_ctrl *ctrl)
{
@@ -664,10 +668,11 @@ static void nvme_free_ns_head(struct kref *ref)
struct nvme_ns_head *head =
container_of(ref, struct nvme_ns_head, ref);
- nvme_mpath_remove_disk(head);
+ nvme_mpath_put_disk(head);
ida_free(&head->subsys->ns_ida, head->instance);
cleanup_srcu_struct(&head->srcu);
nvme_put_subsystem(head->subsys);
+ kfree(head->plids);
kfree(head);
}
@@ -991,6 +996,18 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+ if (op == nvme_cmd_write && ns->head->nr_plids) {
+ u16 write_stream = req->bio->bi_write_stream;
+
+ if (WARN_ON_ONCE(write_stream > ns->head->nr_plids))
+ return BLK_STS_INVAL;
+
+ if (write_stream) {
+ dsmgmt |= ns->head->plids[write_stream - 1] << 16;
+ control |= NVME_RW_DTYPE_DPLCMT;
+ }
+ }
+
if (req->cmd_flags & REQ_ATOMIC && !nvme_valid_atomic_write(req))
return BLK_STS_INVAL;
@@ -1157,7 +1174,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
req->cmd_flags &= ~REQ_FAILFAST_DRIVER;
if (buffer && bufflen) {
- ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
+ ret = blk_rq_map_kern(req, buffer, bufflen, GFP_KERNEL);
if (ret)
goto out;
}
@@ -1609,6 +1626,7 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
info->is_ready = true;
+ info->endgid = le16_to_cpu(id->endgid);
if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
dev_info(ctrl->device,
"Ignoring bogus Namespace Identifiers\n");
@@ -1649,6 +1667,7 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
info->is_ready = id->nstat & NVME_NSTAT_NRDY;
info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL;
info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT;
+ info->endgid = le16_to_cpu(id->endgid);
}
kfree(id);
return ret;
@@ -1674,7 +1693,7 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen,
- u32 *result)
+ void *result)
{
return nvme_features(dev, nvme_admin_set_features, fid, dword11, buffer,
buflen, result);
@@ -1683,7 +1702,7 @@ EXPORT_SYMBOL_GPL(nvme_set_features);
int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen,
- u32 *result)
+ void *result)
{
return nvme_features(dev, nvme_admin_get_features, fid, dword11, buffer,
buflen, result);
@@ -2059,7 +2078,21 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
else
- atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
+ atomic_bs = (1 + ns->ctrl->awupf) * bs;
+
+ /*
+ * Set subsystem atomic bs.
+ */
+ if (ns->ctrl->subsys->atomic_bs) {
+ if (atomic_bs != ns->ctrl->subsys->atomic_bs) {
+ dev_err_ratelimited(ns->ctrl->device,
+ "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
+ ns->disk ? ns->disk->disk_name : "?",
+ ns->ctrl->subsys->atomic_bs,
+ atomic_bs);
+ }
+ } else
+ ns->ctrl->subsys->atomic_bs = atomic_bs;
nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs);
}
@@ -2153,6 +2186,148 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
return ret;
}
+static int nvme_query_fdp_granularity(struct nvme_ctrl *ctrl,
+ struct nvme_ns_info *info, u8 fdp_idx)
+{
+ struct nvme_fdp_config_log hdr, *h;
+ struct nvme_fdp_config_desc *desc;
+ size_t size = sizeof(hdr);
+ void *log, *end;
+ int i, n, ret;
+
+ ret = nvme_get_log_lsi(ctrl, 0, NVME_LOG_FDP_CONFIGS, 0,
+ NVME_CSI_NVM, &hdr, size, 0, info->endgid);
+ if (ret) {
+ dev_warn(ctrl->device,
+ "FDP configs log header status:0x%x endgid:%d\n", ret,
+ info->endgid);
+ return ret;
+ }
+
+ size = le32_to_cpu(hdr.sze);
+ if (size > PAGE_SIZE * MAX_ORDER_NR_PAGES) {
+ dev_warn(ctrl->device, "FDP config size too large:%zu\n",
+ size);
+ return 0;
+ }
+
+ h = kvmalloc(size, GFP_KERNEL);
+ if (!h)
+ return -ENOMEM;
+
+ ret = nvme_get_log_lsi(ctrl, 0, NVME_LOG_FDP_CONFIGS, 0,
+ NVME_CSI_NVM, h, size, 0, info->endgid);
+ if (ret) {
+ dev_warn(ctrl->device,
+ "FDP configs log status:0x%x endgid:%d\n", ret,
+ info->endgid);
+ goto out;
+ }
+
+ n = le16_to_cpu(h->numfdpc) + 1;
+ if (fdp_idx > n) {
+ dev_warn(ctrl->device, "FDP index:%d out of range:%d\n",
+ fdp_idx, n);
+ /* Proceed without registering FDP streams */
+ ret = 0;
+ goto out;
+ }
+
+ log = h + 1;
+ desc = log;
+ end = log + size - sizeof(*h);
+ for (i = 0; i < fdp_idx; i++) {
+ log += le16_to_cpu(desc->dsze);
+ desc = log;
+ if (log >= end) {
+ dev_warn(ctrl->device,
+ "FDP invalid config descriptor list\n");
+ ret = 0;
+ goto out;
+ }
+ }
+
+ if (le32_to_cpu(desc->nrg) > 1) {
+ dev_warn(ctrl->device, "FDP NRG > 1 not supported\n");
+ ret = 0;
+ goto out;
+ }
+
+ info->runs = le64_to_cpu(desc->runs);
+out:
+ kvfree(h);
+ return ret;
+}
+
+static int nvme_query_fdp_info(struct nvme_ns *ns, struct nvme_ns_info *info)
+{
+ struct nvme_ns_head *head = ns->head;
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ struct nvme_fdp_ruh_status *ruhs;
+ struct nvme_fdp_config fdp;
+ struct nvme_command c = {};
+ size_t size;
+ int i, ret;
+
+ /*
+ * The FDP configuration is static for the lifetime of the namespace,
+ * so return immediately if we've already registered this namespace's
+ * streams.
+ */
+ if (head->nr_plids)
+ return 0;
+
+ ret = nvme_get_features(ctrl, NVME_FEAT_FDP, info->endgid, NULL, 0,
+ &fdp);
+ if (ret) {
+ dev_warn(ctrl->device, "FDP get feature status:0x%x\n", ret);
+ return ret;
+ }
+
+ if (!(fdp.flags & FDPCFG_FDPE))
+ return 0;
+
+ ret = nvme_query_fdp_granularity(ctrl, info, fdp.fdpcidx);
+ if (!info->runs)
+ return ret;
+
+ size = struct_size(ruhs, ruhsd, S8_MAX - 1);
+ ruhs = kzalloc(size, GFP_KERNEL);
+ if (!ruhs)
+ return -ENOMEM;
+
+ c.imr.opcode = nvme_cmd_io_mgmt_recv;
+ c.imr.nsid = cpu_to_le32(head->ns_id);
+ c.imr.mo = NVME_IO_MGMT_RECV_MO_RUHS;
+ c.imr.numd = cpu_to_le32(nvme_bytes_to_numd(size));
+ ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size);
+ if (ret) {
+ dev_warn(ctrl->device, "FDP io-mgmt status:0x%x\n", ret);
+ goto free;
+ }
+
+ head->nr_plids = le16_to_cpu(ruhs->nruhsd);
+ if (!head->nr_plids)
+ goto free;
+
+ head->plids = kcalloc(head->nr_plids, sizeof(*head->plids),
+ GFP_KERNEL);
+ if (!head->plids) {
+ dev_warn(ctrl->device,
+ "failed to allocate %u FDP placement IDs\n",
+ head->nr_plids);
+ head->nr_plids = 0;
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ for (i = 0; i < head->nr_plids; i++)
+ head->plids[i] = le16_to_cpu(ruhs->ruhsd[i].pid);
+free:
+ kfree(ruhs);
+ return ret;
+}
+
static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
@@ -2190,6 +2365,12 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
goto out;
}
+ if (ns->ctrl->ctratt & NVME_CTRL_ATTR_FDPS) {
+ ret = nvme_query_fdp_info(ns, info);
+ if (ret < 0)
+ goto out;
+ }
+
lim = queue_limits_start_update(ns->disk->queue);
memflags = blk_mq_freeze_queue(ns->disk->queue);
@@ -2201,6 +2382,17 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
nvme_set_chunk_sectors(ns, id, &lim);
if (!nvme_update_disk_info(ns, id, &lim))
capacity = 0;
+
+ /*
+ * Validate the max atomic write size fits within the subsystem's
+ * atomic write capabilities.
+ */
+ if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
+ blk_mq_unfreeze_queue(ns->disk->queue, memflags);
+ ret = -ENXIO;
+ goto out;
+ }
+
nvme_config_discard(ns, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
ns->head->ids.csi == NVME_CSI_ZNS)
@@ -2223,6 +2415,12 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (!nvme_init_integrity(ns->head, &lim, info))
capacity = 0;
+ lim.max_write_streams = ns->head->nr_plids;
+ if (lim.max_write_streams)
+ lim.write_stream_granularity = min(info->runs, U32_MAX);
+ else
+ lim.write_stream_granularity = 0;
+
ret = queue_limits_commit_update(ns->disk->queue, &lim);
if (ret) {
blk_mq_unfreeze_queue(ns->disk->queue, memflags);
@@ -2326,6 +2524,8 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
ns->head->disk->flags |= GENHD_FL_HIDDEN;
else
nvme_init_integrity(ns->head, &lim, info);
+ lim.max_write_streams = ns_lim->max_write_streams;
+ lim.write_stream_granularity = ns_lim->write_stream_granularity;
ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
@@ -3031,7 +3231,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
kfree(subsys);
return -EINVAL;
}
- subsys->awupf = le16_to_cpu(id->awupf);
nvme_mpath_default_iopolicy(subsys);
subsys->dev.class = &nvme_subsys_class;
@@ -3084,8 +3283,8 @@ out_unlock:
return ret;
}
-int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
- void *log, size_t size, u64 offset)
+static int nvme_get_log_lsi(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page,
+ u8 lsp, u8 csi, void *log, size_t size, u64 offset, u16 lsi)
{
struct nvme_command c = { };
u32 dwlen = nvme_bytes_to_numd(size);
@@ -3099,10 +3298,18 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset));
c.get_log_page.csi = csi;
+ c.get_log_page.lsi = cpu_to_le16(lsi);
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
}
+int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
+ void *log, size_t size, u64 offset)
+{
+ return nvme_get_log_lsi(ctrl, nsid, log_page, lsp, csi, log, size,
+ offset, 0);
+}
+
static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
struct nvme_effects_log **log)
{
@@ -3441,7 +3648,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
-
+ ctrl->awupf = le16_to_cpu(id->awupf);
out_free:
kfree(id);
return ret;
@@ -3560,7 +3767,7 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl,
*/
if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h))
continue;
- if (!list_empty(&h->list) && nvme_tryget_ns_head(h))
+ if (nvme_tryget_ns_head(h))
return h;
}
@@ -3804,7 +4011,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
}
} else {
ret = -EINVAL;
- if (!info->is_shared || !head->shared) {
+ if ((!info->is_shared || !head->shared) &&
+ !list_empty(&head->list)) {
dev_err(ctrl->device,
"Duplicate unshared namespace %d\n",
info->nsid);
@@ -4008,7 +4216,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
mutex_lock(&ns->ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
if (list_empty(&ns->head->list)) {
- list_del_init(&ns->head->entry);
+ if (!nvme_mpath_queue_if_no_path(ns->head))
+ list_del_init(&ns->head->entry);
last_path = true;
}
mutex_unlock(&ns->ctrl->subsys->lock);
@@ -4029,7 +4238,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
synchronize_srcu(&ns->ctrl->srcu);
if (last_path)
- nvme_mpath_shutdown_disk(ns->head);
+ nvme_mpath_remove_disk(ns->head);
nvme_put_ns(ns);
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 2257c3c96dd2..fdafa3e9e66f 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1410,9 +1410,8 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
}
static void
-nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
+nvme_fc_xmt_ls_rsp_free(struct nvmefc_ls_rcv_op *lsop)
{
- struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private;
struct nvme_fc_rport *rport = lsop->rport;
struct nvme_fc_lport *lport = rport->lport;
unsigned long flags;
@@ -1434,6 +1433,14 @@ nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
}
static void
+nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
+{
+ struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private;
+
+ nvme_fc_xmt_ls_rsp_free(lsop);
+}
+
+static void
nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop)
{
struct nvme_fc_rport *rport = lsop->rport;
@@ -1450,7 +1457,7 @@ nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop)
dev_warn(lport->dev,
"LLDD rejected LS RSP xmt: LS %d status %d\n",
w0->ls_cmd, ret);
- nvme_fc_xmt_ls_rsp_done(lsop->lsrsp);
+ nvme_fc_xmt_ls_rsp_free(lsop);
return;
}
}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 250f3da67cc9..878ea8b1a0ac 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -10,10 +10,61 @@
#include "nvme.h"
bool multipath = true;
-module_param(multipath, bool, 0444);
+static bool multipath_always_on;
+
+static int multipath_param_set(const char *val, const struct kernel_param *kp)
+{
+ int ret;
+ bool *arg = kp->arg;
+
+ ret = param_set_bool(val, kp);
+ if (ret)
+ return ret;
+
+ if (multipath_always_on && !*arg) {
+ pr_err("Can't disable multipath when multipath_always_on is configured.\n");
+ *arg = true;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct kernel_param_ops multipath_param_ops = {
+ .set = multipath_param_set,
+ .get = param_get_bool,
+};
+
+module_param_cb(multipath, &multipath_param_ops, &multipath, 0444);
MODULE_PARM_DESC(multipath,
"turn on native support for multiple controllers per subsystem");
+static int multipath_always_on_set(const char *val,
+ const struct kernel_param *kp)
+{
+ int ret;
+ bool *arg = kp->arg;
+
+ ret = param_set_bool(val, kp);
+ if (ret < 0)
+ return ret;
+
+ if (*arg)
+ multipath = true;
+
+ return 0;
+}
+
+static const struct kernel_param_ops multipath_always_on_ops = {
+ .set = multipath_always_on_set,
+ .get = param_get_bool,
+};
+
+module_param_cb(multipath_always_on, &multipath_always_on_ops,
+ &multipath_always_on, 0444);
+MODULE_PARM_DESC(multipath_always_on,
+ "create multipath node always except for private namespace with non-unique nsid; note that this also implicitly enables native multipath support");
+
static const char *nvme_iopolicy_names[] = {
[NVME_IOPOLICY_NUMA] = "numa",
[NVME_IOPOLICY_RR] = "round-robin",
@@ -442,7 +493,17 @@ static bool nvme_available_path(struct nvme_ns_head *head)
break;
}
}
- return false;
+
+ /*
+ * If "head->delayed_removal_secs" is configured (i.e., non-zero), do
+ * not immediately fail I/O. Instead, requeue the I/O for the configured
+ * duration, anticipating that if there's a transient link failure then
+ * it may recover within this time window. This parameter is exported to
+ * userspace via sysfs, and its default value is zero. It is internally
+ * mapped to NVME_NSHEAD_QUEUE_IF_NO_PATH. When delayed_removal_secs is
+ * non-zero, this flag is set to true. When zero, the flag is cleared.
+ */
+ return nvme_mpath_queue_if_no_path(head);
}
static void nvme_ns_head_submit_bio(struct bio *bio)
@@ -617,6 +678,40 @@ static void nvme_requeue_work(struct work_struct *work)
}
}
+static void nvme_remove_head(struct nvme_ns_head *head)
+{
+ if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
+ /*
+ * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
+ * to allow multipath to fail all I/O.
+ */
+ kblockd_schedule_work(&head->requeue_work);
+
+ nvme_cdev_del(&head->cdev, &head->cdev_device);
+ synchronize_srcu(&head->srcu);
+ del_gendisk(head->disk);
+ nvme_put_ns_head(head);
+ }
+}
+
+static void nvme_remove_head_work(struct work_struct *work)
+{
+ struct nvme_ns_head *head = container_of(to_delayed_work(work),
+ struct nvme_ns_head, remove_work);
+ bool remove = false;
+
+ mutex_lock(&head->subsys->lock);
+ if (list_empty(&head->list)) {
+ list_del_init(&head->entry);
+ remove = true;
+ }
+ mutex_unlock(&head->subsys->lock);
+ if (remove)
+ nvme_remove_head(head);
+
+ module_put(THIS_MODULE);
+}
+
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
{
struct queue_limits lim;
@@ -626,19 +721,31 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
spin_lock_init(&head->requeue_lock);
INIT_WORK(&head->requeue_work, nvme_requeue_work);
INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);
+ INIT_DELAYED_WORK(&head->remove_work, nvme_remove_head_work);
+ head->delayed_removal_secs = 0;
/*
- * Add a multipath node if the subsystems supports multiple controllers.
- * We also do this for private namespaces as the namespace sharing flag
- * could change after a rescan.
+ * If "multipath_always_on" is enabled, a multipath node is added
+ * regardless of whether the disk is single/multi ported, and whether
+ * the namespace is shared or private. If "multipath_always_on" is not
+ * enabled, a multipath node is added only if the subsystem supports
+ * multiple controllers and the "multipath" option is configured. In
+ * either case, for private namespaces, we ensure that the NSID is
+ * unique.
*/
- if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) ||
- !nvme_is_unique_nsid(ctrl, head) || !multipath)
+ if (!multipath_always_on) {
+ if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) ||
+ !multipath)
+ return 0;
+ }
+
+ if (!nvme_is_unique_nsid(ctrl, head))
return 0;
blk_set_stacking_limits(&lim);
lim.dma_alignment = 3;
- lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
+ lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT |
+ BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES;
if (head->ids.csi == NVME_CSI_ZNS)
lim.features |= BLK_FEAT_ZONED;
@@ -659,6 +766,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state);
sprintf(head->disk->disk_name, "nvme%dn%d",
ctrl->subsys->instance, head->instance);
+ nvme_tryget_ns_head(head);
return 0;
}
@@ -1015,6 +1123,49 @@ static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr
}
DEVICE_ATTR_RO(numa_nodes);
+static ssize_t delayed_removal_secs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct nvme_ns_head *head = disk->private_data;
+ int ret;
+
+ mutex_lock(&head->subsys->lock);
+ ret = sysfs_emit(buf, "%u\n", head->delayed_removal_secs);
+ mutex_unlock(&head->subsys->lock);
+ return ret;
+}
+
+static ssize_t delayed_removal_secs_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct nvme_ns_head *head = disk->private_data;
+ unsigned int sec;
+ int ret;
+
+ ret = kstrtouint(buf, 0, &sec);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&head->subsys->lock);
+ head->delayed_removal_secs = sec;
+ if (sec)
+ set_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags);
+ else
+ clear_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags);
+ mutex_unlock(&head->subsys->lock);
+ /*
+ * Ensure that update to NVME_NSHEAD_QUEUE_IF_NO_PATH is seen
+ * by its reader.
+ */
+ synchronize_srcu(&head->srcu);
+
+ return count;
+}
+
+DEVICE_ATTR_RW(delayed_removal_secs);
+
static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
struct nvme_ana_group_desc *desc, void *data)
{
@@ -1136,23 +1287,43 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
#endif
}
-void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
+void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
- if (!head->disk)
- return;
- if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
- nvme_cdev_del(&head->cdev, &head->cdev_device);
+ bool remove = false;
+
+ mutex_lock(&head->subsys->lock);
+ /*
+ * We are called when all paths have been removed, and at that point
+ * head->list is expected to be empty. However, nvme_remove_ns() and
+ * nvme_init_ns_head() can run concurrently and so if head->delayed_
+ * removal_secs is configured, it is possible that by the time we reach
+ * this point, head->list may no longer be empty. Therefore, we recheck
+ * head->list here. If it is no longer empty then we skip enqueuing the
+ * delayed head removal work.
+ */
+ if (!list_empty(&head->list))
+ goto out;
+
+ if (head->delayed_removal_secs) {
/*
- * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
- * to allow multipath to fail all I/O.
+ * Ensure that no one could remove this module while the head
+ * remove work is pending.
*/
- synchronize_srcu(&head->srcu);
- kblockd_schedule_work(&head->requeue_work);
- del_gendisk(head->disk);
+ if (!try_module_get(THIS_MODULE))
+ goto out;
+ queue_delayed_work(nvme_wq, &head->remove_work,
+ head->delayed_removal_secs * HZ);
+ } else {
+ list_del_init(&head->entry);
+ remove = true;
}
+out:
+ mutex_unlock(&head->subsys->lock);
+ if (remove)
+ nvme_remove_head(head);
}
-void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+void nvme_mpath_put_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 51e078642127..ad0c1f834f09 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -410,6 +410,7 @@ struct nvme_ctrl {
enum nvme_ctrl_type cntrltype;
enum nvme_dctype dctype;
+ u16 awupf; /* 0's based value. */
};
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
@@ -442,11 +443,11 @@ struct nvme_subsystem {
u8 cmic;
enum nvme_subsys_type subtype;
u16 vendor_id;
- u16 awupf; /* 0's based awupf value. */
struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;
#endif
+ u32 atomic_bs;
};
/*
@@ -496,6 +497,9 @@ struct nvme_ns_head {
struct device cdev_device;
struct gendisk *disk;
+
+ u16 nr_plids;
+ u16 *plids;
#ifdef CONFIG_NVME_MULTIPATH
struct bio_list requeue_list;
spinlock_t requeue_lock;
@@ -503,7 +507,10 @@ struct nvme_ns_head {
struct work_struct partition_scan_work;
struct mutex lock;
unsigned long flags;
-#define NVME_NSHEAD_DISK_LIVE 0
+ struct delayed_work remove_work;
+ unsigned int delayed_removal_secs;
+#define NVME_NSHEAD_DISK_LIVE 0
+#define NVME_NSHEAD_QUEUE_IF_NO_PATH 1
struct nvme_ns __rcu *current_path[];
#endif
};
@@ -896,10 +903,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
int qid, nvme_submit_flags_t flags);
int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen,
- u32 *result);
+ void *result);
int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen,
- u32 *result);
+ void *result);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
@@ -960,7 +967,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_sysfs_link(struct nvme_ns_head *ns);
void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns);
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid);
-void nvme_mpath_remove_disk(struct nvme_ns_head *head);
+void nvme_mpath_put_disk(struct nvme_ns_head *head);
int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
void nvme_mpath_update(struct nvme_ctrl *ctrl);
@@ -969,7 +976,7 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl);
bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
void nvme_mpath_revalidate_paths(struct nvme_ns *ns);
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
-void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
+void nvme_mpath_remove_disk(struct nvme_ns_head *head);
void nvme_mpath_start_request(struct request *rq);
void nvme_mpath_end_request(struct request *rq);
@@ -986,12 +993,19 @@ extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
extern struct device_attribute dev_attr_queue_depth;
extern struct device_attribute dev_attr_numa_nodes;
+extern struct device_attribute dev_attr_delayed_removal_secs;
extern struct device_attribute subsys_attr_iopolicy;
static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
{
return disk->fops == &nvme_ns_head_ops;
}
+static inline bool nvme_mpath_queue_if_no_path(struct nvme_ns_head *head)
+{
+ if (test_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags))
+ return true;
+ return false;
+}
#else
#define multipath false
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
@@ -1012,7 +1026,7 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
{
}
-static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+static inline void nvme_mpath_put_disk(struct nvme_ns_head *head)
{
}
static inline void nvme_mpath_add_sysfs_link(struct nvme_ns *ns)
@@ -1031,7 +1045,7 @@ static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{
}
-static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
+static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
}
static inline void nvme_trace_bio_complete(struct request *req)
@@ -1079,6 +1093,10 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
{
return false;
}
+static inline bool nvme_mpath_queue_if_no_path(struct nvme_ns_head *head)
+{
+ return false;
+}
#endif /* CONFIG_NVME_MULTIPATH */
int nvme_ns_get_unique_id(struct nvme_ns *ns, u8 id[16],
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 2e30e9be7408..e0bfe04a2bc2 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/nodemask.h>
#include <linux/once.h>
#include <linux/pci.h>
#include <linux/suspend.h>
@@ -34,16 +35,31 @@
#define SQ_SIZE(q) ((q)->q_depth << (q)->sqes)
#define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion))
-#define SGES_PER_PAGE (NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc))
+/* Optimisation for I/Os between 4k and 128k */
+#define NVME_SMALL_POOL_SIZE 256
/*
* These can be higher, but we need to ensure that any command doesn't
* require an sg allocation that needs more than a page of data.
*/
#define NVME_MAX_KB_SZ 8192
-#define NVME_MAX_SEGS 128
-#define NVME_MAX_META_SEGS 15
-#define NVME_MAX_NR_ALLOCATIONS 5
+#define NVME_MAX_NR_DESCRIPTORS 5
+
+/*
+ * For data SGLs we support a single descriptors worth of SGL entries, but for
+ * now we also limit it to avoid an allocation larger than PAGE_SIZE for the
+ * scatterlist.
+ */
+#define NVME_MAX_SEGS \
+ min(NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc), \
+ (PAGE_SIZE / sizeof(struct scatterlist)))
+
+/*
+ * For metadata SGLs, only the small descriptor is supported, and the first
+ * entry is the segment descriptor, which for the data pointer sits in the SQE.
+ */
+#define NVME_MAX_META_SEGS \
+ ((NVME_SMALL_POOL_SIZE / sizeof(struct nvme_sgl_desc)) - 1)
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0444);
@@ -112,6 +128,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
static void nvme_delete_io_queues(struct nvme_dev *dev);
static void nvme_update_attrs(struct nvme_dev *dev);
+struct nvme_descriptor_pools {
+ struct dma_pool *large;
+ struct dma_pool *small;
+};
+
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
*/
@@ -121,8 +142,6 @@ struct nvme_dev {
struct blk_mq_tag_set admin_tagset;
u32 __iomem *dbs;
struct device *dev;
- struct dma_pool *prp_page_pool;
- struct dma_pool *prp_small_pool;
unsigned online_queues;
unsigned max_qid;
unsigned io_queues[HCTX_MAX_TYPES];
@@ -162,6 +181,7 @@ struct nvme_dev {
unsigned int nr_allocated_queues;
unsigned int nr_write_queues;
unsigned int nr_poll_queues;
+ struct nvme_descriptor_pools descriptor_pools[];
};
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
@@ -191,6 +211,7 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
*/
struct nvme_queue {
struct nvme_dev *dev;
+ struct nvme_descriptor_pools descriptor_pools;
spinlock_t sq_lock;
void *sq_cmds;
/* only used for poll queues: */
@@ -219,30 +240,30 @@ struct nvme_queue {
struct completion delete_done;
};
-union nvme_descriptor {
- struct nvme_sgl_desc *sg_list;
- __le64 *prp_list;
+/* bits for iod->flags */
+enum nvme_iod_flags {
+ /* this command has been aborted by the timeout handler */
+ IOD_ABORTED = 1U << 0,
+
+ /* uses the small descriptor pool */
+ IOD_SMALL_DESCRIPTOR = 1U << 1,
};
/*
* The nvme_iod describes the data in an I/O.
- *
- * The sg pointer contains the list of PRP/SGL chunk allocations in addition
- * to the actual struct scatterlist.
*/
struct nvme_iod {
struct nvme_request req;
struct nvme_command cmd;
- bool aborted;
- s8 nr_allocations; /* PRP list pool allocations. 0 means small
- pool in use */
+ u8 flags;
+ u8 nr_descriptors;
unsigned int dma_len; /* length of single DMA segment mapping */
dma_addr_t first_dma;
dma_addr_t meta_dma;
struct sg_table sgt;
struct sg_table meta_sgt;
- union nvme_descriptor meta_list;
- union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS];
+ struct nvme_sgl_desc *meta_descriptor;
+ void *descriptors[NVME_MAX_NR_DESCRIPTORS];
};
static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev)
@@ -390,37 +411,85 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db,
* as it only leads to a small amount of wasted memory for the lifetime of
* the I/O.
*/
-static int nvme_pci_npages_prp(void)
+static __always_inline int nvme_pci_npages_prp(void)
{
unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE;
unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE);
return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8);
}
-static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
- unsigned int hctx_idx)
+static struct nvme_descriptor_pools *
+nvme_setup_descriptor_pools(struct nvme_dev *dev, unsigned numa_node)
{
- struct nvme_dev *dev = to_nvme_dev(data);
- struct nvme_queue *nvmeq = &dev->queues[0];
+ struct nvme_descriptor_pools *pools = &dev->descriptor_pools[numa_node];
+ size_t small_align = NVME_SMALL_POOL_SIZE;
- WARN_ON(hctx_idx != 0);
- WARN_ON(dev->admin_tagset.tags[0] != hctx->tags);
+ if (pools->small)
+ return pools; /* already initialized */
- hctx->driver_data = nvmeq;
- return 0;
+ pools->large = dma_pool_create_node("nvme descriptor page", dev->dev,
+ NVME_CTRL_PAGE_SIZE, NVME_CTRL_PAGE_SIZE, 0, numa_node);
+ if (!pools->large)
+ return ERR_PTR(-ENOMEM);
+
+ if (dev->ctrl.quirks & NVME_QUIRK_DMAPOOL_ALIGN_512)
+ small_align = 512;
+
+ pools->small = dma_pool_create_node("nvme descriptor small", dev->dev,
+ NVME_SMALL_POOL_SIZE, small_align, 0, numa_node);
+ if (!pools->small) {
+ dma_pool_destroy(pools->large);
+ pools->large = NULL;
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return pools;
}
-static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
- unsigned int hctx_idx)
+static void nvme_release_descriptor_pools(struct nvme_dev *dev)
+{
+ unsigned i;
+
+ for (i = 0; i < nr_node_ids; i++) {
+ struct nvme_descriptor_pools *pools = &dev->descriptor_pools[i];
+
+ dma_pool_destroy(pools->large);
+ dma_pool_destroy(pools->small);
+ }
+}
+
+static int nvme_init_hctx_common(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned qid)
{
struct nvme_dev *dev = to_nvme_dev(data);
- struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1];
+ struct nvme_queue *nvmeq = &dev->queues[qid];
+ struct nvme_descriptor_pools *pools;
+ struct blk_mq_tags *tags;
+
+ tags = qid ? dev->tagset.tags[qid - 1] : dev->admin_tagset.tags[0];
+ WARN_ON(tags != hctx->tags);
+ pools = nvme_setup_descriptor_pools(dev, hctx->numa_node);
+ if (IS_ERR(pools))
+ return PTR_ERR(pools);
- WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags);
+ nvmeq->descriptor_pools = *pools;
hctx->driver_data = nvmeq;
return 0;
}
+static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int hctx_idx)
+{
+ WARN_ON(hctx_idx != 0);
+ return nvme_init_hctx_common(hctx, data, 0);
+}
+
+static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int hctx_idx)
+{
+ return nvme_init_hctx_common(hctx, data, hctx_idx + 1);
+}
+
static int nvme_pci_init_request(struct blk_mq_tag_set *set,
struct request *req, unsigned int hctx_idx,
unsigned int numa_node)
@@ -537,23 +606,39 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req,
return true;
}
-static void nvme_free_prps(struct nvme_dev *dev, struct request *req)
+static inline struct dma_pool *nvme_dma_pool(struct nvme_queue *nvmeq,
+ struct nvme_iod *iod)
+{
+ if (iod->flags & IOD_SMALL_DESCRIPTOR)
+ return nvmeq->descriptor_pools.small;
+ return nvmeq->descriptor_pools.large;
+}
+
+static void nvme_free_descriptors(struct nvme_queue *nvmeq, struct request *req)
{
const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
dma_addr_t dma_addr = iod->first_dma;
int i;
- for (i = 0; i < iod->nr_allocations; i++) {
- __le64 *prp_list = iod->list[i].prp_list;
+ if (iod->nr_descriptors == 1) {
+ dma_pool_free(nvme_dma_pool(nvmeq, iod), iod->descriptors[0],
+ dma_addr);
+ return;
+ }
+
+ for (i = 0; i < iod->nr_descriptors; i++) {
+ __le64 *prp_list = iod->descriptors[i];
dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);
- dma_pool_free(dev->prp_page_pool, prp_list, dma_addr);
+ dma_pool_free(nvmeq->descriptor_pools.large, prp_list,
+ dma_addr);
dma_addr = next_dma_addr;
}
}
-static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
+static void nvme_unmap_data(struct nvme_dev *dev, struct nvme_queue *nvmeq,
+ struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -566,15 +651,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
WARN_ON_ONCE(!iod->sgt.nents);
dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0);
-
- if (iod->nr_allocations == 0)
- dma_pool_free(dev->prp_small_pool, iod->list[0].sg_list,
- iod->first_dma);
- else if (iod->nr_allocations == 1)
- dma_pool_free(dev->prp_page_pool, iod->list[0].sg_list,
- iod->first_dma);
- else
- nvme_free_prps(dev, req);
+ nvme_free_descriptors(nvmeq, req);
mempool_free(iod->sgt.sgl, dev->iod_mempool);
}
@@ -592,11 +669,10 @@ static void nvme_print_sgl(struct scatterlist *sgl, int nents)
}
}
-static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
+static blk_status_t nvme_pci_setup_prps(struct nvme_queue *nvmeq,
struct request *req, struct nvme_rw_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct dma_pool *pool;
int length = blk_rq_payload_bytes(req);
struct scatterlist *sg = iod->sgt.sgl;
int dma_len = sg_dma_len(sg);
@@ -604,7 +680,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1);
__le64 *prp_list;
dma_addr_t prp_dma;
- int nprps, i;
+ int i;
length -= (NVME_CTRL_PAGE_SIZE - offset);
if (length <= 0) {
@@ -626,30 +702,26 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
goto done;
}
- nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE);
- if (nprps <= (256 / 8)) {
- pool = dev->prp_small_pool;
- iod->nr_allocations = 0;
- } else {
- pool = dev->prp_page_pool;
- iod->nr_allocations = 1;
- }
+ if (DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE) <=
+ NVME_SMALL_POOL_SIZE / sizeof(__le64))
+ iod->flags |= IOD_SMALL_DESCRIPTOR;
- prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
- if (!prp_list) {
- iod->nr_allocations = -1;
+ prp_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC,
+ &prp_dma);
+ if (!prp_list)
return BLK_STS_RESOURCE;
- }
- iod->list[0].prp_list = prp_list;
+ iod->descriptors[iod->nr_descriptors++] = prp_list;
iod->first_dma = prp_dma;
i = 0;
for (;;) {
if (i == NVME_CTRL_PAGE_SIZE >> 3) {
__le64 *old_prp_list = prp_list;
- prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
+
+ prp_list = dma_pool_alloc(nvmeq->descriptor_pools.large,
+ GFP_ATOMIC, &prp_dma);
if (!prp_list)
goto free_prps;
- iod->list[iod->nr_allocations++].prp_list = prp_list;
+ iod->descriptors[iod->nr_descriptors++] = prp_list;
prp_list[0] = old_prp_list[i - 1];
old_prp_list[i - 1] = cpu_to_le64(prp_dma);
i = 1;
@@ -673,7 +745,7 @@ done:
cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma);
return BLK_STS_OK;
free_prps:
- nvme_free_prps(dev, req);
+ nvme_free_descriptors(nvmeq, req);
return BLK_STS_RESOURCE;
bad_sgl:
WARN(DO_ONCE(nvme_print_sgl, iod->sgt.sgl, iod->sgt.nents),
@@ -698,11 +770,10 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge,
sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4;
}
-static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
+static blk_status_t nvme_pci_setup_sgls(struct nvme_queue *nvmeq,
struct request *req, struct nvme_rw_command *cmd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct dma_pool *pool;
struct nvme_sgl_desc *sg_list;
struct scatterlist *sg = iod->sgt.sgl;
unsigned int entries = iod->sgt.nents;
@@ -717,21 +788,14 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
return BLK_STS_OK;
}
- if (entries <= (256 / sizeof(struct nvme_sgl_desc))) {
- pool = dev->prp_small_pool;
- iod->nr_allocations = 0;
- } else {
- pool = dev->prp_page_pool;
- iod->nr_allocations = 1;
- }
+ if (entries <= NVME_SMALL_POOL_SIZE / sizeof(*sg_list))
+ iod->flags |= IOD_SMALL_DESCRIPTOR;
- sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
- if (!sg_list) {
- iod->nr_allocations = -1;
+ sg_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC,
+ &sgl_dma);
+ if (!sg_list)
return BLK_STS_RESOURCE;
- }
-
- iod->list[0].sg_list = sg_list;
+ iod->descriptors[iod->nr_descriptors++] = sg_list;
iod->first_dma = sgl_dma;
nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries);
@@ -785,12 +849,12 @@ static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev,
static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
+ struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
blk_status_t ret = BLK_STS_RESOURCE;
int rc;
if (blk_rq_nr_phys_segments(req) == 1) {
- struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
struct bio_vec bv = req_bvec(req);
if (!is_pci_p2pdma_page(bv.bv_page)) {
@@ -825,9 +889,9 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
}
if (nvme_pci_use_sgls(dev, req, iod->sgt.nents))
- ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
+ ret = nvme_pci_setup_sgls(nvmeq, req, &cmnd->rw);
else
- ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
+ ret = nvme_pci_setup_prps(nvmeq, req, &cmnd->rw);
if (ret != BLK_STS_OK)
goto out_unmap_sg;
return BLK_STS_OK;
@@ -842,6 +906,7 @@ out_free_sg:
static blk_status_t nvme_pci_setup_meta_sgls(struct nvme_dev *dev,
struct request *req)
{
+ struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_rw_command *cmnd = &iod->cmd.rw;
struct nvme_sgl_desc *sg_list;
@@ -865,12 +930,13 @@ static blk_status_t nvme_pci_setup_meta_sgls(struct nvme_dev *dev,
if (rc)
goto out_free_sg;
- sg_list = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC, &sgl_dma);
+ sg_list = dma_pool_alloc(nvmeq->descriptor_pools.small, GFP_ATOMIC,
+ &sgl_dma);
if (!sg_list)
goto out_unmap_sg;
entries = iod->meta_sgt.nents;
- iod->meta_list.sg_list = sg_list;
+ iod->meta_descriptor = sg_list;
iod->meta_dma = sgl_dma;
cmnd->flags = NVME_CMD_SGL_METASEG;
@@ -912,7 +978,10 @@ static blk_status_t nvme_pci_setup_meta_mptr(struct nvme_dev *dev,
static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req)
{
- if (nvme_pci_metadata_use_sgls(dev, req))
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+
+ if ((iod->cmd.common.flags & NVME_CMD_SGL_METABUF) &&
+ nvme_pci_metadata_use_sgls(dev, req))
return nvme_pci_setup_meta_sgls(dev, req);
return nvme_pci_setup_meta_mptr(dev, req);
}
@@ -922,8 +991,8 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
blk_status_t ret;
- iod->aborted = false;
- iod->nr_allocations = -1;
+ iod->flags = 0;
+ iod->nr_descriptors = 0;
iod->sgt.nents = 0;
iod->meta_sgt.nents = 0;
@@ -947,7 +1016,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
return BLK_STS_OK;
out_unmap_data:
if (blk_rq_nr_phys_segments(req))
- nvme_unmap_data(dev, req);
+ nvme_unmap_data(dev, req->mq_hctx->driver_data, req);
out_free_cmd:
nvme_cleanup_cmd(req);
return ret;
@@ -1037,6 +1106,7 @@ static void nvme_queue_rqs(struct rq_list *rqlist)
}
static __always_inline void nvme_unmap_metadata(struct nvme_dev *dev,
+ struct nvme_queue *nvmeq,
struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -1048,8 +1118,8 @@ static __always_inline void nvme_unmap_metadata(struct nvme_dev *dev,
return;
}
- dma_pool_free(dev->prp_small_pool, iod->meta_list.sg_list,
- iod->meta_dma);
+ dma_pool_free(nvmeq->descriptor_pools.small, iod->meta_descriptor,
+ iod->meta_dma);
dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0);
mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool);
}
@@ -1060,10 +1130,10 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
struct nvme_dev *dev = nvmeq->dev;
if (blk_integrity_rq(req))
- nvme_unmap_metadata(dev, req);
+ nvme_unmap_metadata(dev, nvmeq, req);
if (blk_rq_nr_phys_segments(req))
- nvme_unmap_data(dev, req);
+ nvme_unmap_data(dev, nvmeq, req);
}
static void nvme_pci_complete_rq(struct request *req)
@@ -1202,7 +1272,9 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq)
WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags));
disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
+ spin_lock(&nvmeq->cq_poll_lock);
nvme_poll_cq(nvmeq, NULL);
+ spin_unlock(&nvmeq->cq_poll_lock);
enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
}
@@ -1488,7 +1560,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
* returned to the driver, or if this is the admin queue.
*/
opcode = nvme_req(req)->cmd->common.opcode;
- if (!nvmeq->qid || iod->aborted) {
+ if (!nvmeq->qid || (iod->flags & IOD_ABORTED)) {
dev_warn(dev->ctrl.device,
"I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
req->tag, nvme_cid(req), opcode,
@@ -1501,7 +1573,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
atomic_inc(&dev->ctrl.abort_limit);
return BLK_EH_RESET_TIMER;
}
- iod->aborted = true;
+ iod->flags |= IOD_ABORTED;
cmd.abort.opcode = nvme_admin_abort_cmd;
cmd.abort.cid = nvme_cid(req);
@@ -2840,35 +2912,6 @@ static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
return 0;
}
-static int nvme_setup_prp_pools(struct nvme_dev *dev)
-{
- size_t small_align = 256;
-
- dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
- NVME_CTRL_PAGE_SIZE,
- NVME_CTRL_PAGE_SIZE, 0);
- if (!dev->prp_page_pool)
- return -ENOMEM;
-
- if (dev->ctrl.quirks & NVME_QUIRK_DMAPOOL_ALIGN_512)
- small_align = 512;
-
- /* Optimisation for I/Os between 4k and 128k */
- dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev,
- 256, small_align, 0);
- if (!dev->prp_small_pool) {
- dma_pool_destroy(dev->prp_page_pool);
- return -ENOMEM;
- }
- return 0;
-}
-
-static void nvme_release_prp_pools(struct nvme_dev *dev)
-{
- dma_pool_destroy(dev->prp_page_pool);
- dma_pool_destroy(dev->prp_small_pool);
-}
-
static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
{
size_t meta_size = sizeof(struct scatterlist) * (NVME_MAX_META_SEGS + 1);
@@ -3183,7 +3226,8 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
struct nvme_dev *dev;
int ret = -ENOMEM;
- dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
+ dev = kzalloc_node(struct_size(dev, descriptor_pools, nr_node_ids),
+ GFP_KERNEL, node);
if (!dev)
return ERR_PTR(-ENOMEM);
INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
@@ -3258,13 +3302,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result)
goto out_uninit_ctrl;
- result = nvme_setup_prp_pools(dev);
- if (result)
- goto out_dev_unmap;
-
result = nvme_pci_alloc_iod_mempool(dev);
if (result)
- goto out_release_prp_pools;
+ goto out_dev_unmap;
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
@@ -3340,8 +3380,6 @@ out_disable:
out_release_iod_mempool:
mempool_destroy(dev->iod_mempool);
mempool_destroy(dev->iod_meta_mempool);
-out_release_prp_pools:
- nvme_release_prp_pools(dev);
out_dev_unmap:
nvme_dev_unmap(dev);
out_uninit_ctrl:
@@ -3406,7 +3444,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_free_queues(dev, 0);
mempool_destroy(dev->iod_mempool);
mempool_destroy(dev->iod_meta_mempool);
- nvme_release_prp_pools(dev);
+ nvme_release_descriptor_pools(dev);
nvme_dev_unmap(dev);
nvme_uninit_ctrl(&dev->ctrl);
}
@@ -3737,6 +3775,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
+ { PCI_DEVICE(0x025e, 0xf1ac), /* SOLIDIGM P44 pro SSDPFKKW020X7 */
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */
@@ -3805,9 +3845,7 @@ static int __init nvme_init(void)
BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
- BUILD_BUG_ON(NVME_MAX_SEGS > SGES_PER_PAGE);
- BUILD_BUG_ON(sizeof(struct scatterlist) * NVME_MAX_SEGS > PAGE_SIZE);
- BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_NR_ALLOCATIONS);
+ BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_NR_DESCRIPTORS);
return pci_register_driver(&nvme_driver);
}
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 6d31226f7a4f..29430949ce2f 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -260,6 +260,7 @@ static struct attribute *nvme_ns_attrs[] = {
&dev_attr_ana_state.attr,
&dev_attr_queue_depth.attr,
&dev_attr_numa_nodes.attr,
+ &dev_attr_delayed_removal_secs.attr,
#endif
&dev_attr_io_passthru_err_log_enabled.attr,
NULL,
@@ -296,6 +297,12 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
if (nvme_disk_is_ns_head(dev_to_disk(dev)))
return 0;
}
+ if (a == &dev_attr_delayed_removal_secs.attr) {
+ struct gendisk *disk = dev_to_disk(dev);
+
+ if (!nvme_disk_is_ns_head(disk))
+ return 0;
+ }
#endif
return a->mode;
}
@@ -306,13 +313,41 @@ static const struct attribute_group nvme_ns_attr_group = {
};
#ifdef CONFIG_NVME_MULTIPATH
+/*
+ * NOTE: The dummy attribute does not appear in sysfs. It exists solely to allow
+ * control over the visibility of the multipath sysfs node. Without at least one
+ * attribute defined in nvme_ns_mpath_attrs[], the sysfs implementation does not
+ * invoke the multipath_sysfs_group_visible() method. As a result, we would not
+ * be able to control the visibility of the multipath sysfs node.
+ */
+static struct attribute dummy_attr = {
+ .name = "dummy",
+};
+
static struct attribute *nvme_ns_mpath_attrs[] = {
+ &dummy_attr,
NULL,
};
+static bool multipath_sysfs_group_visible(struct kobject *kobj)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+
+ return nvme_disk_is_ns_head(dev_to_disk(dev));
+}
+
+static bool multipath_sysfs_attr_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ return false;
+}
+
+DEFINE_SYSFS_GROUP_VISIBLE(multipath_sysfs)
+
const struct attribute_group nvme_ns_mpath_attr_group = {
.name = "multipath",
.attrs = nvme_ns_mpath_attrs,
+ .is_visible = SYSFS_GROUP_VISIBLE(multipath_sysfs),
};
#endif
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index aba365f97cf6..853bc67d045c 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -403,7 +403,7 @@ static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
}
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
- bool sync, bool last)
+ bool last)
{
struct nvme_tcp_queue *queue = req->queue;
bool empty;
@@ -417,7 +417,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
* are on the same cpu, so we don't introduce contention.
*/
if (queue->io_cpu == raw_smp_processor_id() &&
- sync && empty && mutex_trylock(&queue->send_mutex)) {
+ empty && mutex_trylock(&queue->send_mutex)) {
nvme_tcp_send_all(queue);
mutex_unlock(&queue->send_mutex);
}
@@ -770,7 +770,9 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
req->ttag = pdu->ttag;
nvme_tcp_setup_h2c_data_pdu(req);
- nvme_tcp_queue_request(req, false, true);
+
+ llist_add(&req->lentry, &queue->req_list);
+ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
return 0;
}
@@ -2385,7 +2387,7 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
if (ret)
return ret;
- if (ctrl->opts && ctrl->opts->concat && !ctrl->tls_pskid) {
+ if (ctrl->opts->concat && !ctrl->tls_pskid) {
/* See comments for nvme_tcp_key_revoke_needed() */
dev_dbg(ctrl->device, "restart admin queue for secure concatenation\n");
nvme_stop_keep_alive(ctrl);
@@ -2637,7 +2639,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
ctrl->async_req.curr_bio = NULL;
ctrl->async_req.data_len = 0;
- nvme_tcp_queue_request(&ctrl->async_req, true, true);
+ nvme_tcp_queue_request(&ctrl->async_req, true);
}
static void nvme_tcp_complete_timed_out(struct request *rq)
@@ -2789,7 +2791,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
nvme_start_request(rq);
- nvme_tcp_queue_request(req, true, bd->last);
+ nvme_tcp_queue_request(req, bd->last);
return BLK_STS_OK;
}
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index acc138bbf8f2..c7317299078d 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -63,14 +63,9 @@ static void nvmet_execute_create_sq(struct nvmet_req *req)
if (status != NVME_SC_SUCCESS)
goto complete;
- /*
- * Note: The NVMe specification allows multiple SQs to use the same CQ.
- * However, the target code does not really support that. So for now,
- * prevent this and fail the command if sqid and cqid are different.
- */
- if (!cqid || cqid != sqid) {
- pr_err("SQ %u: Unsupported CQID %u\n", sqid, cqid);
- status = NVME_SC_CQ_INVALID | NVME_STATUS_DNR;
+ status = nvmet_check_io_cqid(ctrl, cqid, false);
+ if (status != NVME_SC_SUCCESS) {
+ pr_err("SQ %u: Invalid CQID %u\n", sqid, cqid);
goto complete;
}
@@ -79,7 +74,7 @@ static void nvmet_execute_create_sq(struct nvmet_req *req)
goto complete;
}
- status = ctrl->ops->create_sq(ctrl, sqid, sq_flags, qsize, prp1);
+ status = ctrl->ops->create_sq(ctrl, sqid, cqid, sq_flags, qsize, prp1);
complete:
nvmet_req_complete(req, status);
@@ -96,14 +91,15 @@ static void nvmet_execute_delete_cq(struct nvmet_req *req)
goto complete;
}
- if (!cqid) {
- status = NVME_SC_QID_INVALID | NVME_STATUS_DNR;
+ status = nvmet_check_io_cqid(ctrl, cqid, false);
+ if (status != NVME_SC_SUCCESS)
goto complete;
- }
- status = nvmet_check_cqid(ctrl, cqid);
- if (status != NVME_SC_SUCCESS)
+ if (!ctrl->cqs[cqid] || nvmet_cq_in_use(ctrl->cqs[cqid])) {
+ /* Some SQs are still using this CQ */
+ status = NVME_SC_QID_INVALID | NVME_STATUS_DNR;
goto complete;
+ }
status = ctrl->ops->delete_cq(ctrl, cqid);
@@ -127,12 +123,7 @@ static void nvmet_execute_create_cq(struct nvmet_req *req)
goto complete;
}
- if (!cqid) {
- status = NVME_SC_QID_INVALID | NVME_STATUS_DNR;
- goto complete;
- }
-
- status = nvmet_check_cqid(ctrl, cqid);
+ status = nvmet_check_io_cqid(ctrl, cqid, true);
if (status != NVME_SC_SUCCESS)
goto complete;
diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c
index 9429b8218408..b340380f3892 100644
--- a/drivers/nvme/target/auth.c
+++ b/drivers/nvme/target/auth.c
@@ -280,9 +280,12 @@ void nvmet_destroy_auth(struct nvmet_ctrl *ctrl)
bool nvmet_check_auth_status(struct nvmet_req *req)
{
- if (req->sq->ctrl->host_key &&
- !req->sq->authenticated)
- return false;
+ if (req->sq->ctrl->host_key) {
+ if (req->sq->qid > 0)
+ return true;
+ if (!req->sq->authenticated)
+ return false;
+ }
return true;
}
@@ -290,7 +293,7 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
unsigned int shash_len)
{
struct crypto_shash *shash_tfm;
- struct shash_desc *shash;
+ SHASH_DESC_ON_STACK(shash, shash_tfm);
struct nvmet_ctrl *ctrl = req->sq->ctrl;
const char *hash_name;
u8 *challenge = req->sq->dhchap_c1;
@@ -342,19 +345,13 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
req->sq->dhchap_c1,
challenge, shash_len);
if (ret)
- goto out_free_challenge;
+ goto out;
}
pr_debug("ctrl %d qid %d host response seq %u transaction %d\n",
ctrl->cntlid, req->sq->qid, req->sq->dhchap_s1,
req->sq->dhchap_tid);
- shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(shash_tfm),
- GFP_KERNEL);
- if (!shash) {
- ret = -ENOMEM;
- goto out_free_challenge;
- }
shash->tfm = shash_tfm;
ret = crypto_shash_init(shash);
if (ret)
@@ -389,8 +386,6 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
goto out;
ret = crypto_shash_final(shash, response);
out:
- kfree(shash);
-out_free_challenge:
if (challenge != req->sq->dhchap_c1)
kfree(challenge);
out_free_response:
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 245475c43127..db7b17d1094e 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -813,11 +813,43 @@ void nvmet_req_complete(struct nvmet_req *req, u16 status)
}
EXPORT_SYMBOL_GPL(nvmet_req_complete);
+void nvmet_cq_init(struct nvmet_cq *cq)
+{
+ refcount_set(&cq->ref, 1);
+}
+EXPORT_SYMBOL_GPL(nvmet_cq_init);
+
+bool nvmet_cq_get(struct nvmet_cq *cq)
+{
+ return refcount_inc_not_zero(&cq->ref);
+}
+EXPORT_SYMBOL_GPL(nvmet_cq_get);
+
+void nvmet_cq_put(struct nvmet_cq *cq)
+{
+ if (refcount_dec_and_test(&cq->ref))
+ nvmet_cq_destroy(cq);
+}
+EXPORT_SYMBOL_GPL(nvmet_cq_put);
+
void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
u16 qid, u16 size)
{
cq->qid = qid;
cq->size = size;
+
+ ctrl->cqs[qid] = cq;
+}
+
+void nvmet_cq_destroy(struct nvmet_cq *cq)
+{
+ struct nvmet_ctrl *ctrl = cq->ctrl;
+
+ if (ctrl) {
+ ctrl->cqs[cq->qid] = NULL;
+ nvmet_ctrl_put(cq->ctrl);
+ cq->ctrl = NULL;
+ }
}
void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
@@ -837,37 +869,47 @@ static void nvmet_confirm_sq(struct percpu_ref *ref)
complete(&sq->confirm_done);
}
-u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid)
+u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create)
{
- if (!ctrl->sqs)
+ if (!ctrl->cqs)
return NVME_SC_INTERNAL | NVME_STATUS_DNR;
if (cqid > ctrl->subsys->max_qid)
return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
- /*
- * Note: For PCI controllers, the NVMe specifications allows multiple
- * SQs to share a single CQ. However, we do not support this yet, so
- * check that there is no SQ defined for a CQ. If one exist, then the
- * CQ ID is invalid for creation as well as when the CQ is being
- * deleted (as that would mean that the SQ was not deleted before the
- * CQ).
- */
- if (ctrl->sqs[cqid])
+ if ((create && ctrl->cqs[cqid]) || (!create && !ctrl->cqs[cqid]))
return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
return NVME_SC_SUCCESS;
}
+u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create)
+{
+ if (!cqid)
+ return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
+ return nvmet_check_cqid(ctrl, cqid, create);
+}
+
+bool nvmet_cq_in_use(struct nvmet_cq *cq)
+{
+ return refcount_read(&cq->ref) > 1;
+}
+EXPORT_SYMBOL_GPL(nvmet_cq_in_use);
+
u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
u16 qid, u16 size)
{
u16 status;
- status = nvmet_check_cqid(ctrl, qid);
+ status = nvmet_check_cqid(ctrl, qid, true);
if (status != NVME_SC_SUCCESS)
return status;
+ if (!kref_get_unless_zero(&ctrl->ref))
+ return NVME_SC_INTERNAL | NVME_STATUS_DNR;
+ cq->ctrl = ctrl;
+
+ nvmet_cq_init(cq);
nvmet_cq_setup(ctrl, cq, qid, size);
return NVME_SC_SUCCESS;
@@ -891,7 +933,7 @@ u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid,
}
u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
- u16 sqid, u16 size)
+ struct nvmet_cq *cq, u16 sqid, u16 size)
{
u16 status;
int ret;
@@ -903,7 +945,7 @@ u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
if (status != NVME_SC_SUCCESS)
return status;
- ret = nvmet_sq_init(sq);
+ ret = nvmet_sq_init(sq, cq);
if (ret) {
status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
goto ctrl_put;
@@ -935,6 +977,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
wait_for_completion(&sq->free_done);
percpu_ref_exit(&sq->ref);
nvmet_auth_sq_free(sq);
+ nvmet_cq_put(sq->cq);
/*
* we must reference the ctrl again after waiting for inflight IO
@@ -967,18 +1010,23 @@ static void nvmet_sq_free(struct percpu_ref *ref)
complete(&sq->free_done);
}
-int nvmet_sq_init(struct nvmet_sq *sq)
+int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq)
{
int ret;
+ if (!nvmet_cq_get(cq))
+ return -EINVAL;
+
ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
if (ret) {
pr_err("percpu_ref init failed!\n");
+ nvmet_cq_put(cq);
return ret;
}
init_completion(&sq->free_done);
init_completion(&sq->confirm_done);
nvmet_auth_sq_init(sq);
+ sq->cq = cq;
return 0;
}
@@ -1108,13 +1156,13 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
return ret;
}
-bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
- struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
+bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq,
+ const struct nvmet_fabrics_ops *ops)
{
u8 flags = req->cmd->common.flags;
u16 status;
- req->cq = cq;
+ req->cq = sq->cq;
req->sq = sq;
req->ops = ops;
req->sg = NULL;
@@ -1612,12 +1660,17 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
if (!ctrl->sqs)
goto out_free_changed_ns_list;
+ ctrl->cqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_cq *),
+ GFP_KERNEL);
+ if (!ctrl->cqs)
+ goto out_free_sqs;
+
ret = ida_alloc_range(&cntlid_ida,
subsys->cntlid_min, subsys->cntlid_max,
GFP_KERNEL);
if (ret < 0) {
args->status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR;
- goto out_free_sqs;
+ goto out_free_cqs;
}
ctrl->cntlid = ret;
@@ -1676,6 +1729,8 @@ init_pr_fail:
mutex_unlock(&subsys->lock);
nvmet_stop_keep_alive_timer(ctrl);
ida_free(&cntlid_ida, ctrl->cntlid);
+out_free_cqs:
+ kfree(ctrl->cqs);
out_free_sqs:
kfree(ctrl->sqs);
out_free_changed_ns_list:
@@ -1712,6 +1767,7 @@ static void nvmet_ctrl_free(struct kref *ref)
nvmet_async_events_free(ctrl);
kfree(ctrl->sqs);
+ kfree(ctrl->cqs);
kfree(ctrl->changed_ns_list);
kfree(ctrl);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index df7207640506..c06f3e04296c 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -119,7 +119,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
memcpy(e->traddr, traddr, NVMF_TRADDR_SIZE);
memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE);
- strncpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE);
+ strscpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE);
}
/*
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index f012bdf89850..7b8d8b397802 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -208,6 +208,14 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
return NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR;
}
+ kref_get(&ctrl->ref);
+ old = cmpxchg(&req->cq->ctrl, NULL, ctrl);
+ if (old) {
+ pr_warn("queue already connected!\n");
+ req->error_loc = offsetof(struct nvmf_connect_command, opcode);
+ return NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR;
+ }
+
/* note: convert queue size from 0's-based value to 1's-based value */
nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1);
nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1);
@@ -239,8 +247,8 @@ static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq)
bool needs_auth = nvmet_has_auth(ctrl, sq);
key_serial_t keyid = nvmet_queue_tls_keyid(sq);
- /* Do not authenticate I/O queues for secure concatenation */
- if (ctrl->concat && sq->qid)
+ /* Do not authenticate I/O queues */
+ if (sq->qid)
needs_auth = false;
if (keyid)
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 7b50130f10f6..254537b93e63 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -816,7 +816,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
nvmet_fc_prep_fcp_iodlist(assoc->tgtport, queue);
- ret = nvmet_sq_init(&queue->nvme_sq);
+ nvmet_cq_init(&queue->nvme_cq);
+ ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq);
if (ret)
goto out_fail_iodlist;
@@ -826,6 +827,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
return queue;
out_fail_iodlist:
+ nvmet_cq_put(&queue->nvme_cq);
nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue);
destroy_workqueue(queue->work_q);
out_free_queue:
@@ -934,6 +936,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
flush_workqueue(queue->work_q);
nvmet_sq_destroy(&queue->nvme_sq);
+ nvmet_cq_put(&queue->nvme_cq);
nvmet_fc_tgt_q_put(queue);
}
@@ -1254,6 +1257,7 @@ nvmet_fc_portentry_bind(struct nvmet_fc_tgtport *tgtport,
{
lockdep_assert_held(&nvmet_fc_tgtlock);
+ nvmet_fc_tgtport_get(tgtport);
pe->tgtport = tgtport;
tgtport->pe = pe;
@@ -1273,8 +1277,10 @@ nvmet_fc_portentry_unbind(struct nvmet_fc_port_entry *pe)
unsigned long flags;
spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
- if (pe->tgtport)
+ if (pe->tgtport) {
+ nvmet_fc_tgtport_put(pe->tgtport);
pe->tgtport->pe = NULL;
+ }
list_del(&pe->pe_list);
spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
}
@@ -1292,8 +1298,10 @@ nvmet_fc_portentry_unbind_tgt(struct nvmet_fc_tgtport *tgtport)
spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
pe = tgtport->pe;
- if (pe)
+ if (pe) {
+ nvmet_fc_tgtport_put(pe->tgtport);
pe->tgtport = NULL;
+ }
tgtport->pe = NULL;
spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
}
@@ -1316,6 +1324,9 @@ nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport)
list_for_each_entry(pe, &nvmet_fc_portentry_list, pe_list) {
if (tgtport->fc_target_port.node_name == pe->node_name &&
tgtport->fc_target_port.port_name == pe->port_name) {
+ if (!nvmet_fc_tgtport_get(tgtport))
+ continue;
+
WARN_ON(pe->tgtport);
tgtport->pe = pe;
pe->tgtport = tgtport;
@@ -1580,6 +1591,39 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
}
+static void
+nvmet_fc_free_pending_reqs(struct nvmet_fc_tgtport *tgtport)
+{
+ struct nvmet_fc_ls_req_op *lsop;
+ struct nvmefc_ls_req *lsreq;
+ struct nvmet_fc_ls_iod *iod;
+ int i;
+
+ iod = tgtport->iod;
+ for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++)
+ cancel_work(&iod->work);
+
+ /*
+ * After this point the connection is lost and thus any pending
+ * request can't be processed by the normal completion path. This
+ * is likely a request from nvmet_fc_send_ls_req_async.
+ */
+ while ((lsop = list_first_entry_or_null(&tgtport->ls_req_list,
+ struct nvmet_fc_ls_req_op, lsreq_list))) {
+ list_del(&lsop->lsreq_list);
+
+ if (!lsop->req_queued)
+ continue;
+
+ lsreq = &lsop->ls_req;
+ fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma,
+ (lsreq->rqstlen + lsreq->rsplen),
+ DMA_BIDIRECTIONAL);
+ nvmet_fc_tgtport_put(tgtport);
+ kfree(lsop);
+ }
+}
+
/**
* nvmet_fc_unregister_targetport - transport entry point called by an
* LLDD to deregister/remove a previously
@@ -1608,13 +1652,7 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port)
flush_workqueue(nvmet_wq);
- /*
- * should terminate LS's as well. However, LS's will be generated
- * at the tail end of association termination, so they likely don't
- * exist yet. And even if they did, it's worthwhile to just let
- * them finish and targetport ref counting will clean things up.
- */
-
+ nvmet_fc_free_pending_reqs(tgtport);
nvmet_fc_tgtport_put(tgtport);
return 0;
@@ -2531,10 +2569,8 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
fod->data_sg = NULL;
fod->data_sg_cnt = 0;
- ret = nvmet_req_init(&fod->req,
- &fod->queue->nvme_cq,
- &fod->queue->nvme_sq,
- &nvmet_fc_tgt_fcp_ops);
+ ret = nvmet_req_init(&fod->req, &fod->queue->nvme_sq,
+ &nvmet_fc_tgt_fcp_ops);
if (!ret) {
/* bad SQE content or invalid ctrl state */
/* nvmet layer has already called op done to send rsp. */
@@ -2860,12 +2896,17 @@ nvmet_fc_add_port(struct nvmet_port *port)
list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) {
if ((tgtport->fc_target_port.node_name == traddr.nn) &&
(tgtport->fc_target_port.port_name == traddr.pn)) {
+ if (!nvmet_fc_tgtport_get(tgtport))
+ continue;
+
/* a FC port can only be 1 nvmet port id */
if (!tgtport->pe) {
nvmet_fc_portentry_bind(tgtport, pe, port);
ret = 0;
} else
ret = -EALREADY;
+
+ nvmet_fc_tgtport_put(tgtport);
break;
}
}
@@ -2881,11 +2922,21 @@ static void
nvmet_fc_remove_port(struct nvmet_port *port)
{
struct nvmet_fc_port_entry *pe = port->priv;
+ struct nvmet_fc_tgtport *tgtport = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
+ if (pe->tgtport && nvmet_fc_tgtport_get(pe->tgtport))
+ tgtport = pe->tgtport;
+ spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
nvmet_fc_portentry_unbind(pe);
- /* terminate any outstanding associations */
- __nvmet_fc_free_assocs(pe->tgtport);
+ if (tgtport) {
+ /* terminate any outstanding associations */
+ __nvmet_fc_free_assocs(tgtport);
+ nvmet_fc_tgtport_put(tgtport);
+ }
kfree(pe);
}
@@ -2894,10 +2945,21 @@ static void
nvmet_fc_discovery_chg(struct nvmet_port *port)
{
struct nvmet_fc_port_entry *pe = port->priv;
- struct nvmet_fc_tgtport *tgtport = pe->tgtport;
+ struct nvmet_fc_tgtport *tgtport = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
+ if (pe->tgtport && nvmet_fc_tgtport_get(pe->tgtport))
+ tgtport = pe->tgtport;
+ spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+
+ if (!tgtport)
+ return;
if (tgtport && tgtport->ops->discovery_event)
tgtport->ops->discovery_event(&tgtport->fc_target_port);
+
+ nvmet_fc_tgtport_put(tgtport);
}
static ssize_t
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 641201e62c1b..257b497d515a 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -207,7 +207,6 @@ static LIST_HEAD(fcloop_nports);
struct fcloop_lport {
struct nvme_fc_local_port *localport;
struct list_head lport_list;
- struct completion unreg_done;
refcount_t ref;
};
@@ -215,6 +214,9 @@ struct fcloop_lport_priv {
struct fcloop_lport *lport;
};
+/* The port is already being removed, avoid double free */
+#define PORT_DELETED 0
+
struct fcloop_rport {
struct nvme_fc_remote_port *remoteport;
struct nvmet_fc_target_port *targetport;
@@ -223,6 +225,7 @@ struct fcloop_rport {
spinlock_t lock;
struct list_head ls_list;
struct work_struct ls_work;
+ unsigned long flags;
};
struct fcloop_tport {
@@ -233,6 +236,7 @@ struct fcloop_tport {
spinlock_t lock;
struct list_head ls_list;
struct work_struct ls_work;
+ unsigned long flags;
};
struct fcloop_nport {
@@ -288,6 +292,9 @@ struct fcloop_ini_fcpreq {
spinlock_t inilock;
};
+/* SLAB cache for fcloop_lsreq structures */
+static struct kmem_cache *lsreq_cache;
+
static inline struct fcloop_lsreq *
ls_rsp_to_lsreq(struct nvmefc_ls_rsp *lsrsp)
{
@@ -338,6 +345,7 @@ fcloop_rport_lsrqst_work(struct work_struct *work)
* callee may free memory containing tls_req.
* do not reference lsreq after this.
*/
+ kmem_cache_free(lsreq_cache, tls_req);
spin_lock(&rport->lock);
}
@@ -349,10 +357,13 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
struct nvmefc_ls_req *lsreq)
{
- struct fcloop_lsreq *tls_req = lsreq->private;
struct fcloop_rport *rport = remoteport->private;
+ struct fcloop_lsreq *tls_req;
int ret = 0;
+ tls_req = kmem_cache_alloc(lsreq_cache, GFP_KERNEL);
+ if (!tls_req)
+ return -ENOMEM;
tls_req->lsreq = lsreq;
INIT_LIST_HEAD(&tls_req->ls_list);
@@ -389,14 +400,17 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
lsrsp->done(lsrsp);
- if (remoteport) {
- rport = remoteport->private;
- spin_lock(&rport->lock);
- list_add_tail(&tls_req->ls_list, &rport->ls_list);
- spin_unlock(&rport->lock);
- queue_work(nvmet_wq, &rport->ls_work);
+ if (!remoteport) {
+ kmem_cache_free(lsreq_cache, tls_req);
+ return 0;
}
+ rport = remoteport->private;
+ spin_lock(&rport->lock);
+ list_add_tail(&tls_req->ls_list, &rport->ls_list);
+ spin_unlock(&rport->lock);
+ queue_work(nvmet_wq, &rport->ls_work);
+
return 0;
}
@@ -422,6 +436,7 @@ fcloop_tport_lsrqst_work(struct work_struct *work)
* callee may free memory containing tls_req.
* do not reference lsreq after this.
*/
+ kmem_cache_free(lsreq_cache, tls_req);
spin_lock(&tport->lock);
}
@@ -432,8 +447,8 @@ static int
fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
struct nvmefc_ls_req *lsreq)
{
- struct fcloop_lsreq *tls_req = lsreq->private;
struct fcloop_tport *tport = targetport->private;
+ struct fcloop_lsreq *tls_req;
int ret = 0;
/*
@@ -441,6 +456,10 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
* hosthandle ignored as fcloop currently is
* 1:1 tgtport vs remoteport
*/
+
+ tls_req = kmem_cache_alloc(lsreq_cache, GFP_KERNEL);
+ if (!tls_req)
+ return -ENOMEM;
tls_req->lsreq = lsreq;
INIT_LIST_HEAD(&tls_req->ls_list);
@@ -457,6 +476,9 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
ret = nvme_fc_rcv_ls_req(tport->remoteport, &tls_req->ls_rsp,
lsreq->rqstaddr, lsreq->rqstlen);
+ if (ret)
+ kmem_cache_free(lsreq_cache, tls_req);
+
return ret;
}
@@ -471,18 +493,30 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
struct nvmet_fc_target_port *targetport = rport->targetport;
struct fcloop_tport *tport;
+ if (!targetport) {
+ /*
+ * The target port is gone. The target doesn't expect any
+ * response anymore and the ->done call is not valid
+ * because the resources have been freed by
+ * nvmet_fc_free_pending_reqs.
+ *
+ * We end up here from delete association exchange:
+ * nvmet_fc_xmt_disconnect_assoc sends an async request.
+ */
+ kmem_cache_free(lsreq_cache, tls_req);
+ return 0;
+ }
+
memcpy(lsreq->rspaddr, lsrsp->rspbuf,
((lsreq->rsplen < lsrsp->rsplen) ?
lsreq->rsplen : lsrsp->rsplen));
lsrsp->done(lsrsp);
- if (targetport) {
- tport = targetport->private;
- spin_lock(&tport->lock);
- list_add_tail(&tls_req->ls_list, &tport->ls_list);
- spin_unlock(&tport->lock);
- queue_work(nvmet_wq, &tport->ls_work);
- }
+ tport = targetport->private;
+ spin_lock(&tport->lock);
+ list_add_tail(&tls_req->ls_list, &tport->ls_list);
+ spin_unlock(&tport->lock);
+ queue_work(nvmet_wq, &tport->ls_work);
return 0;
}
@@ -566,7 +600,8 @@ fcloop_call_host_done(struct nvmefc_fcp_req *fcpreq,
}
/* release original io reference on tgt struct */
- fcloop_tfcp_req_put(tfcp_req);
+ if (tfcp_req)
+ fcloop_tfcp_req_put(tfcp_req);
}
static bool drop_fabric_opcode;
@@ -618,12 +653,13 @@ fcloop_fcp_recv_work(struct work_struct *work)
{
struct fcloop_fcpreq *tfcp_req =
container_of(work, struct fcloop_fcpreq, fcp_rcv_work);
- struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq;
+ struct nvmefc_fcp_req *fcpreq;
unsigned long flags;
int ret = 0;
bool aborted = false;
spin_lock_irqsave(&tfcp_req->reqlock, flags);
+ fcpreq = tfcp_req->fcpreq;
switch (tfcp_req->inistate) {
case INI_IO_START:
tfcp_req->inistate = INI_IO_ACTIVE;
@@ -638,16 +674,19 @@ fcloop_fcp_recv_work(struct work_struct *work)
}
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
- if (unlikely(aborted))
- ret = -ECANCELED;
- else {
- if (likely(!check_for_drop(tfcp_req)))
- ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport,
- &tfcp_req->tgt_fcp_req,
- fcpreq->cmdaddr, fcpreq->cmdlen);
- else
- pr_info("%s: dropped command ********\n", __func__);
+ if (unlikely(aborted)) {
+ /* the abort handler will call fcloop_call_host_done */
+ return;
+ }
+
+ if (unlikely(check_for_drop(tfcp_req))) {
+ pr_info("%s: dropped command ********\n", __func__);
+ return;
}
+
+ ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport,
+ &tfcp_req->tgt_fcp_req,
+ fcpreq->cmdaddr, fcpreq->cmdlen);
if (ret)
fcloop_call_host_done(fcpreq, tfcp_req, ret);
}
@@ -662,15 +701,17 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
unsigned long flags;
spin_lock_irqsave(&tfcp_req->reqlock, flags);
- fcpreq = tfcp_req->fcpreq;
switch (tfcp_req->inistate) {
case INI_IO_ABORTED:
+ fcpreq = tfcp_req->fcpreq;
+ tfcp_req->fcpreq = NULL;
break;
case INI_IO_COMPLETED:
completed = true;
break;
default:
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+ fcloop_tfcp_req_put(tfcp_req);
WARN_ON(1);
return;
}
@@ -686,10 +727,6 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport,
&tfcp_req->tgt_fcp_req);
- spin_lock_irqsave(&tfcp_req->reqlock, flags);
- tfcp_req->fcpreq = NULL;
- spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
-
fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED);
/* call_host_done releases reference for abort downcall */
}
@@ -958,13 +995,16 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
spin_lock(&inireq->inilock);
tfcp_req = inireq->tfcp_req;
- if (tfcp_req)
- fcloop_tfcp_req_get(tfcp_req);
+ if (tfcp_req) {
+ if (!fcloop_tfcp_req_get(tfcp_req))
+ tfcp_req = NULL;
+ }
spin_unlock(&inireq->inilock);
- if (!tfcp_req)
+ if (!tfcp_req) {
/* abort has already been called */
- return;
+ goto out_host_done;
+ }
/* break initiator/target relationship for io */
spin_lock_irqsave(&tfcp_req->reqlock, flags);
@@ -979,7 +1019,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
default:
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
WARN_ON(1);
- return;
+ goto out_host_done;
}
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
@@ -993,6 +1033,11 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
*/
fcloop_tfcp_req_put(tfcp_req);
}
+
+ return;
+
+out_host_done:
+ fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED);
}
static void
@@ -1019,9 +1064,18 @@ fcloop_lport_get(struct fcloop_lport *lport)
static void
fcloop_nport_put(struct fcloop_nport *nport)
{
+ unsigned long flags;
+
if (!refcount_dec_and_test(&nport->ref))
return;
+ spin_lock_irqsave(&fcloop_lock, flags);
+ list_del(&nport->nport_list);
+ spin_unlock_irqrestore(&fcloop_lock, flags);
+
+ if (nport->lport)
+ fcloop_lport_put(nport->lport);
+
kfree(nport);
}
@@ -1037,9 +1091,6 @@ fcloop_localport_delete(struct nvme_fc_local_port *localport)
struct fcloop_lport_priv *lport_priv = localport->private;
struct fcloop_lport *lport = lport_priv->lport;
- /* release any threads waiting for the unreg to complete */
- complete(&lport->unreg_done);
-
fcloop_lport_put(lport);
}
@@ -1047,18 +1098,38 @@ static void
fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport)
{
struct fcloop_rport *rport = remoteport->private;
+ bool put_port = false;
+ unsigned long flags;
flush_work(&rport->ls_work);
- fcloop_nport_put(rport->nport);
+
+ spin_lock_irqsave(&fcloop_lock, flags);
+ if (!test_and_set_bit(PORT_DELETED, &rport->flags))
+ put_port = true;
+ rport->nport->rport = NULL;
+ spin_unlock_irqrestore(&fcloop_lock, flags);
+
+ if (put_port)
+ fcloop_nport_put(rport->nport);
}
static void
fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
{
struct fcloop_tport *tport = targetport->private;
+ bool put_port = false;
+ unsigned long flags;
flush_work(&tport->ls_work);
- fcloop_nport_put(tport->nport);
+
+ spin_lock_irqsave(&fcloop_lock, flags);
+ if (!test_and_set_bit(PORT_DELETED, &tport->flags))
+ put_port = true;
+ tport->nport->tport = NULL;
+ spin_unlock_irqrestore(&fcloop_lock, flags);
+
+ if (put_port)
+ fcloop_nport_put(tport->nport);
}
#define FCLOOP_HW_QUEUES 4
@@ -1082,7 +1153,6 @@ static struct nvme_fc_port_template fctemplate = {
/* sizes of additional private data for data structures */
.local_priv_sz = sizeof(struct fcloop_lport_priv),
.remote_priv_sz = sizeof(struct fcloop_rport),
- .lsrqst_priv_sz = sizeof(struct fcloop_lsreq),
.fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq),
};
@@ -1105,7 +1175,6 @@ static struct nvmet_fc_target_template tgttemplate = {
.target_features = 0,
/* sizes of additional private data for data structures */
.target_priv_sz = sizeof(struct fcloop_tport),
- .lsrqst_priv_sz = sizeof(struct fcloop_lsreq),
};
static ssize_t
@@ -1170,51 +1239,92 @@ out_free_lport:
}
static int
-__wait_localport_unreg(struct fcloop_lport *lport)
+__localport_unreg(struct fcloop_lport *lport)
{
- int ret;
+ return nvme_fc_unregister_localport(lport->localport);
+}
- init_completion(&lport->unreg_done);
+static struct fcloop_nport *
+__fcloop_nport_lookup(u64 node_name, u64 port_name)
+{
+ struct fcloop_nport *nport;
- ret = nvme_fc_unregister_localport(lport->localport);
+ list_for_each_entry(nport, &fcloop_nports, nport_list) {
+ if (nport->node_name != node_name ||
+ nport->port_name != port_name)
+ continue;
- if (!ret)
- wait_for_completion(&lport->unreg_done);
+ if (fcloop_nport_get(nport))
+ return nport;
- return ret;
+ break;
+ }
+
+ return NULL;
}
+static struct fcloop_nport *
+fcloop_nport_lookup(u64 node_name, u64 port_name)
+{
+ struct fcloop_nport *nport;
+ unsigned long flags;
+
+ spin_lock_irqsave(&fcloop_lock, flags);
+ nport = __fcloop_nport_lookup(node_name, port_name);
+ spin_unlock_irqrestore(&fcloop_lock, flags);
+
+ return nport;
+}
+
+static struct fcloop_lport *
+__fcloop_lport_lookup(u64 node_name, u64 port_name)
+{
+ struct fcloop_lport *lport;
+
+ list_for_each_entry(lport, &fcloop_lports, lport_list) {
+ if (lport->localport->node_name != node_name ||
+ lport->localport->port_name != port_name)
+ continue;
+
+ if (fcloop_lport_get(lport))
+ return lport;
+
+ break;
+ }
+
+ return NULL;
+}
+
+static struct fcloop_lport *
+fcloop_lport_lookup(u64 node_name, u64 port_name)
+{
+ struct fcloop_lport *lport;
+ unsigned long flags;
+
+ spin_lock_irqsave(&fcloop_lock, flags);
+ lport = __fcloop_lport_lookup(node_name, port_name);
+ spin_unlock_irqrestore(&fcloop_lock, flags);
+
+ return lport;
+}
static ssize_t
fcloop_delete_local_port(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
- struct fcloop_lport *tlport, *lport = NULL;
+ struct fcloop_lport *lport;
u64 nodename, portname;
- unsigned long flags;
int ret;
ret = fcloop_parse_nm_options(dev, &nodename, &portname, buf);
if (ret)
return ret;
- spin_lock_irqsave(&fcloop_lock, flags);
-
- list_for_each_entry(tlport, &fcloop_lports, lport_list) {
- if (tlport->localport->node_name == nodename &&
- tlport->localport->port_name == portname) {
- if (!fcloop_lport_get(tlport))
- break;
- lport = tlport;
- break;
- }
- }
- spin_unlock_irqrestore(&fcloop_lock, flags);
-
+ lport = fcloop_lport_lookup(nodename, portname);
if (!lport)
return -ENOENT;
- ret = __wait_localport_unreg(lport);
+ ret = __localport_unreg(lport);
fcloop_lport_put(lport);
return ret ? ret : count;
@@ -1223,8 +1333,8 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr,
static struct fcloop_nport *
fcloop_alloc_nport(const char *buf, size_t count, bool remoteport)
{
- struct fcloop_nport *newnport, *nport = NULL;
- struct fcloop_lport *tmplport, *lport = NULL;
+ struct fcloop_nport *newnport, *nport;
+ struct fcloop_lport *lport;
struct fcloop_ctrl_options *opts;
unsigned long flags;
u32 opts_mask = (remoteport) ? RPORT_OPTS : TGTPORT_OPTS;
@@ -1239,10 +1349,8 @@ fcloop_alloc_nport(const char *buf, size_t count, bool remoteport)
goto out_free_opts;
/* everything there ? */
- if ((opts->mask & opts_mask) != opts_mask) {
- ret = -EINVAL;
+ if ((opts->mask & opts_mask) != opts_mask)
goto out_free_opts;
- }
newnport = kzalloc(sizeof(*newnport), GFP_KERNEL);
if (!newnport)
@@ -1258,60 +1366,61 @@ fcloop_alloc_nport(const char *buf, size_t count, bool remoteport)
refcount_set(&newnport->ref, 1);
spin_lock_irqsave(&fcloop_lock, flags);
-
- list_for_each_entry(tmplport, &fcloop_lports, lport_list) {
- if (tmplport->localport->node_name == opts->wwnn &&
- tmplport->localport->port_name == opts->wwpn)
- goto out_invalid_opts;
-
- if (tmplport->localport->node_name == opts->lpwwnn &&
- tmplport->localport->port_name == opts->lpwwpn)
- lport = tmplport;
+ lport = __fcloop_lport_lookup(opts->wwnn, opts->wwpn);
+ if (lport) {
+ /* invalid configuration */
+ fcloop_lport_put(lport);
+ goto out_free_newnport;
}
if (remoteport) {
- if (!lport)
- goto out_invalid_opts;
- newnport->lport = lport;
- }
-
- list_for_each_entry(nport, &fcloop_nports, nport_list) {
- if (nport->node_name == opts->wwnn &&
- nport->port_name == opts->wwpn) {
- if ((remoteport && nport->rport) ||
- (!remoteport && nport->tport)) {
- nport = NULL;
- goto out_invalid_opts;
- }
-
- fcloop_nport_get(nport);
-
- spin_unlock_irqrestore(&fcloop_lock, flags);
-
- if (remoteport)
- nport->lport = lport;
- if (opts->mask & NVMF_OPT_ROLES)
- nport->port_role = opts->roles;
- if (opts->mask & NVMF_OPT_FCADDR)
- nport->port_id = opts->fcaddr;
+ lport = __fcloop_lport_lookup(opts->lpwwnn, opts->lpwwpn);
+ if (!lport) {
+ /* invalid configuration */
goto out_free_newnport;
}
}
- list_add_tail(&newnport->nport_list, &fcloop_nports);
+ nport = __fcloop_nport_lookup(opts->wwnn, opts->wwpn);
+ if (nport) {
+ if ((remoteport && nport->rport) ||
+ (!remoteport && nport->tport)) {
+ /* invalid configuration */
+ goto out_put_nport;
+ }
+
+ /* found existing nport, discard the new nport */
+ kfree(newnport);
+ } else {
+ list_add_tail(&newnport->nport_list, &fcloop_nports);
+ nport = newnport;
+ }
+ if (opts->mask & NVMF_OPT_ROLES)
+ nport->port_role = opts->roles;
+ if (opts->mask & NVMF_OPT_FCADDR)
+ nport->port_id = opts->fcaddr;
+ if (lport) {
+ if (!nport->lport)
+ nport->lport = lport;
+ else
+ fcloop_lport_put(lport);
+ }
spin_unlock_irqrestore(&fcloop_lock, flags);
kfree(opts);
- return newnport;
+ return nport;
-out_invalid_opts:
- spin_unlock_irqrestore(&fcloop_lock, flags);
+out_put_nport:
+ if (lport)
+ fcloop_lport_put(lport);
+ fcloop_nport_put(nport);
out_free_newnport:
+ spin_unlock_irqrestore(&fcloop_lock, flags);
kfree(newnport);
out_free_opts:
kfree(opts);
- return nport;
+ return NULL;
}
static ssize_t
@@ -1352,6 +1461,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr,
rport->nport = nport;
rport->lport = nport->lport;
nport->rport = rport;
+ rport->flags = 0;
spin_lock_init(&rport->lock);
INIT_WORK(&rport->ls_work, fcloop_rport_lsrqst_work);
INIT_LIST_HEAD(&rport->ls_list);
@@ -1365,21 +1475,18 @@ __unlink_remote_port(struct fcloop_nport *nport)
{
struct fcloop_rport *rport = nport->rport;
+ lockdep_assert_held(&fcloop_lock);
+
if (rport && nport->tport)
nport->tport->remoteport = NULL;
nport->rport = NULL;
- list_del(&nport->nport_list);
-
return rport;
}
static int
__remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
{
- if (!rport)
- return -EALREADY;
-
return nvme_fc_unregister_remoteport(rport->remoteport);
}
@@ -1387,8 +1494,8 @@ static ssize_t
fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
- struct fcloop_nport *nport = NULL, *tmpport;
- static struct fcloop_rport *rport;
+ struct fcloop_nport *nport;
+ struct fcloop_rport *rport;
u64 nodename, portname;
unsigned long flags;
int ret;
@@ -1397,24 +1504,24 @@ fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr,
if (ret)
return ret;
- spin_lock_irqsave(&fcloop_lock, flags);
-
- list_for_each_entry(tmpport, &fcloop_nports, nport_list) {
- if (tmpport->node_name == nodename &&
- tmpport->port_name == portname && tmpport->rport) {
- nport = tmpport;
- rport = __unlink_remote_port(nport);
- break;
- }
- }
+ nport = fcloop_nport_lookup(nodename, portname);
+ if (!nport)
+ return -ENOENT;
+ spin_lock_irqsave(&fcloop_lock, flags);
+ rport = __unlink_remote_port(nport);
spin_unlock_irqrestore(&fcloop_lock, flags);
- if (!nport)
- return -ENOENT;
+ if (!rport) {
+ ret = -ENOENT;
+ goto out_nport_put;
+ }
ret = __remoteport_unreg(nport, rport);
+out_nport_put:
+ fcloop_nport_put(nport);
+
return ret ? ret : count;
}
@@ -1452,6 +1559,7 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr,
tport->nport = nport;
tport->lport = nport->lport;
nport->tport = tport;
+ tport->flags = 0;
spin_lock_init(&tport->lock);
INIT_WORK(&tport->ls_work, fcloop_tport_lsrqst_work);
INIT_LIST_HEAD(&tport->ls_list);
@@ -1465,6 +1573,8 @@ __unlink_target_port(struct fcloop_nport *nport)
{
struct fcloop_tport *tport = nport->tport;
+ lockdep_assert_held(&fcloop_lock);
+
if (tport && nport->rport)
nport->rport->targetport = NULL;
nport->tport = NULL;
@@ -1475,9 +1585,6 @@ __unlink_target_port(struct fcloop_nport *nport)
static int
__targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
{
- if (!tport)
- return -EALREADY;
-
return nvmet_fc_unregister_targetport(tport->targetport);
}
@@ -1485,8 +1592,8 @@ static ssize_t
fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
- struct fcloop_nport *nport = NULL, *tmpport;
- struct fcloop_tport *tport = NULL;
+ struct fcloop_nport *nport;
+ struct fcloop_tport *tport;
u64 nodename, portname;
unsigned long flags;
int ret;
@@ -1495,24 +1602,24 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
if (ret)
return ret;
- spin_lock_irqsave(&fcloop_lock, flags);
-
- list_for_each_entry(tmpport, &fcloop_nports, nport_list) {
- if (tmpport->node_name == nodename &&
- tmpport->port_name == portname && tmpport->tport) {
- nport = tmpport;
- tport = __unlink_target_port(nport);
- break;
- }
- }
+ nport = fcloop_nport_lookup(nodename, portname);
+ if (!nport)
+ return -ENOENT;
+ spin_lock_irqsave(&fcloop_lock, flags);
+ tport = __unlink_target_port(nport);
spin_unlock_irqrestore(&fcloop_lock, flags);
- if (!nport)
- return -ENOENT;
+ if (!tport) {
+ ret = -ENOENT;
+ goto out_nport_put;
+ }
ret = __targetport_unreg(nport, tport);
+out_nport_put:
+ fcloop_nport_put(nport);
+
return ret ? ret : count;
}
@@ -1578,15 +1685,20 @@ static const struct class fcloop_class = {
};
static struct device *fcloop_device;
-
static int __init fcloop_init(void)
{
int ret;
+ lsreq_cache = kmem_cache_create("lsreq_cache",
+ sizeof(struct fcloop_lsreq), 0,
+ 0, NULL);
+ if (!lsreq_cache)
+ return -ENOMEM;
+
ret = class_register(&fcloop_class);
if (ret) {
pr_err("couldn't register class fcloop\n");
- return ret;
+ goto out_destroy_cache;
}
fcloop_device = device_create_with_groups(
@@ -1604,13 +1716,15 @@ static int __init fcloop_init(void)
out_destroy_class:
class_unregister(&fcloop_class);
+out_destroy_cache:
+ kmem_cache_destroy(lsreq_cache);
return ret;
}
static void __exit fcloop_exit(void)
{
- struct fcloop_lport *lport = NULL;
- struct fcloop_nport *nport = NULL;
+ struct fcloop_lport *lport;
+ struct fcloop_nport *nport;
struct fcloop_tport *tport;
struct fcloop_rport *rport;
unsigned long flags;
@@ -1621,7 +1735,7 @@ static void __exit fcloop_exit(void)
for (;;) {
nport = list_first_entry_or_null(&fcloop_nports,
typeof(*nport), nport_list);
- if (!nport)
+ if (!nport || !fcloop_nport_get(nport))
break;
tport = __unlink_target_port(nport);
@@ -1629,13 +1743,21 @@ static void __exit fcloop_exit(void)
spin_unlock_irqrestore(&fcloop_lock, flags);
- ret = __targetport_unreg(nport, tport);
- if (ret)
- pr_warn("%s: Failed deleting target port\n", __func__);
+ if (tport) {
+ ret = __targetport_unreg(nport, tport);
+ if (ret)
+ pr_warn("%s: Failed deleting target port\n",
+ __func__);
+ }
- ret = __remoteport_unreg(nport, rport);
- if (ret)
- pr_warn("%s: Failed deleting remote port\n", __func__);
+ if (rport) {
+ ret = __remoteport_unreg(nport, rport);
+ if (ret)
+ pr_warn("%s: Failed deleting remote port\n",
+ __func__);
+ }
+
+ fcloop_nport_put(nport);
spin_lock_irqsave(&fcloop_lock, flags);
}
@@ -1648,7 +1770,7 @@ static void __exit fcloop_exit(void)
spin_unlock_irqrestore(&fcloop_lock, flags);
- ret = __wait_localport_unreg(lport);
+ ret = __localport_unreg(lport);
if (ret)
pr_warn("%s: Failed deleting local port\n", __func__);
@@ -1663,6 +1785,7 @@ static void __exit fcloop_exit(void)
device_destroy(&fcloop_class, MKDEV(0, 0));
class_unregister(&fcloop_class);
+ kmem_cache_destroy(lsreq_cache);
}
module_init(fcloop_init);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index a5c41144667c..f85a8441bcc6 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -33,10 +33,12 @@ struct nvme_loop_ctrl {
struct list_head list;
struct blk_mq_tag_set tag_set;
- struct nvme_loop_iod async_event_iod;
struct nvme_ctrl ctrl;
struct nvmet_port *port;
+
+ /* Must be last --ends in a flexible-array member. */
+ struct nvme_loop_iod async_event_iod;
};
static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -148,8 +150,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
nvme_start_request(req);
iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
iod->req.port = queue->ctrl->port;
- if (!nvmet_req_init(&iod->req, &queue->nvme_cq,
- &queue->nvme_sq, &nvme_loop_ops))
+ if (!nvmet_req_init(&iod->req, &queue->nvme_sq, &nvme_loop_ops))
return BLK_STS_OK;
if (blk_rq_nr_phys_segments(req)) {
@@ -181,8 +182,7 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg)
iod->cmd.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
- if (!nvmet_req_init(&iod->req, &queue->nvme_cq, &queue->nvme_sq,
- &nvme_loop_ops)) {
+ if (!nvmet_req_init(&iod->req, &queue->nvme_sq, &nvme_loop_ops)) {
dev_err(ctrl->ctrl.device, "failed async event work\n");
return;
}
@@ -273,6 +273,7 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
nvme_unquiesce_admin_queue(&ctrl->ctrl);
nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
+ nvmet_cq_put(&ctrl->queues[0].nvme_cq);
nvme_remove_admin_tag_set(&ctrl->ctrl);
}
@@ -302,6 +303,7 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
for (i = 1; i < ctrl->ctrl.queue_count; i++) {
clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags);
nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+ nvmet_cq_put(&ctrl->queues[i].nvme_cq);
}
ctrl->ctrl.queue_count = 1;
/*
@@ -327,9 +329,13 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
for (i = 1; i <= nr_io_queues; i++) {
ctrl->queues[i].ctrl = ctrl;
- ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
- if (ret)
+ nvmet_cq_init(&ctrl->queues[i].nvme_cq);
+ ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq,
+ &ctrl->queues[i].nvme_cq);
+ if (ret) {
+ nvmet_cq_put(&ctrl->queues[i].nvme_cq);
goto out_destroy_queues;
+ }
ctrl->ctrl.queue_count++;
}
@@ -360,9 +366,13 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
int error;
ctrl->queues[0].ctrl = ctrl;
- error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
- if (error)
+ nvmet_cq_init(&ctrl->queues[0].nvme_cq);
+ error = nvmet_sq_init(&ctrl->queues[0].nvme_sq,
+ &ctrl->queues[0].nvme_cq);
+ if (error) {
+ nvmet_cq_put(&ctrl->queues[0].nvme_cq);
return error;
+ }
ctrl->ctrl.queue_count = 1;
error = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
@@ -401,6 +411,7 @@ out_cleanup_tagset:
nvme_remove_admin_tag_set(&ctrl->ctrl);
out_free_sq:
nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
+ nvmet_cq_put(&ctrl->queues[0].nvme_cq);
return error;
}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index b6db8b74dc4a..df69a9dee71c 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -141,13 +141,16 @@ static inline struct device *nvmet_ns_dev(struct nvmet_ns *ns)
}
struct nvmet_cq {
+ struct nvmet_ctrl *ctrl;
u16 qid;
u16 size;
+ refcount_t ref;
};
struct nvmet_sq {
struct nvmet_ctrl *ctrl;
struct percpu_ref ref;
+ struct nvmet_cq *cq;
u16 qid;
u16 size;
u32 sqhd;
@@ -247,6 +250,7 @@ struct nvmet_pr_log_mgr {
struct nvmet_ctrl {
struct nvmet_subsys *subsys;
struct nvmet_sq **sqs;
+ struct nvmet_cq **cqs;
void *drvdata;
@@ -424,7 +428,7 @@ struct nvmet_fabrics_ops {
u16 (*get_max_queue_size)(const struct nvmet_ctrl *ctrl);
/* Operations mandatory for PCI target controllers */
- u16 (*create_sq)(struct nvmet_ctrl *ctrl, u16 sqid, u16 flags,
+ u16 (*create_sq)(struct nvmet_ctrl *ctrl, u16 sqid, u16 cqid, u16 flags,
u16 qsize, u64 prp1);
u16 (*delete_sq)(struct nvmet_ctrl *ctrl, u16 sqid);
u16 (*create_cq)(struct nvmet_ctrl *ctrl, u16 cqid, u16 flags,
@@ -557,8 +561,8 @@ u32 nvmet_fabrics_admin_cmd_data_len(struct nvmet_req *req);
u16 nvmet_parse_fabrics_io_cmd(struct nvmet_req *req);
u32 nvmet_fabrics_io_cmd_data_len(struct nvmet_req *req);
-bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
- struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
+bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq,
+ const struct nvmet_fabrics_ops *ops);
void nvmet_req_uninit(struct nvmet_req *req);
size_t nvmet_req_transfer_len(struct nvmet_req *req);
bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len);
@@ -571,18 +575,24 @@ void nvmet_execute_set_features(struct nvmet_req *req);
void nvmet_execute_get_features(struct nvmet_req *req);
void nvmet_execute_keep_alive(struct nvmet_req *req);
-u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid);
+u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create);
+u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create);
+void nvmet_cq_init(struct nvmet_cq *cq);
void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
u16 size);
u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
u16 size);
+void nvmet_cq_destroy(struct nvmet_cq *cq);
+bool nvmet_cq_get(struct nvmet_cq *cq);
+void nvmet_cq_put(struct nvmet_cq *cq);
+bool nvmet_cq_in_use(struct nvmet_cq *cq);
u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, bool create);
void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
u16 size);
-u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
- u16 size);
+u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
+ struct nvmet_cq *cq, u16 qid, u16 size);
void nvmet_sq_destroy(struct nvmet_sq *sq);
-int nvmet_sq_init(struct nvmet_sq *sq);
+int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq);
void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c
index 7fab7f3d79b7..a4295a5b8d28 100644
--- a/drivers/nvme/target/pci-epf.c
+++ b/drivers/nvme/target/pci-epf.c
@@ -62,8 +62,7 @@ static DEFINE_MUTEX(nvmet_pci_epf_ports_mutex);
#define NVMET_PCI_EPF_CQ_RETRY_INTERVAL msecs_to_jiffies(1)
enum nvmet_pci_epf_queue_flags {
- NVMET_PCI_EPF_Q_IS_SQ = 0, /* The queue is a submission queue */
- NVMET_PCI_EPF_Q_LIVE, /* The queue is live */
+ NVMET_PCI_EPF_Q_LIVE = 0, /* The queue is live */
NVMET_PCI_EPF_Q_IRQ_ENABLED, /* IRQ is enabled for this queue */
};
@@ -596,9 +595,6 @@ static bool nvmet_pci_epf_should_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
struct nvmet_pci_epf_irq_vector *iv = cq->iv;
bool ret;
- if (!test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
- return false;
-
/* IRQ coalescing for the admin queue is not allowed. */
if (!cq->qid)
return true;
@@ -625,7 +621,8 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
struct pci_epf *epf = nvme_epf->epf;
int ret = 0;
- if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
+ if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) ||
+ !test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
return;
mutex_lock(&ctrl->irq_lock);
@@ -636,14 +633,16 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
switch (nvme_epf->irq_type) {
case PCI_IRQ_MSIX:
case PCI_IRQ_MSI:
+ /*
+ * If we fail to raise an MSI or MSI-X interrupt, it is likely
+ * because the host is using legacy INTX IRQs (e.g. BIOS,
+ * grub), but we can fallback to the INTX type only if the
+ * endpoint controller supports this type.
+ */
ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no,
nvme_epf->irq_type, cq->vector + 1);
- if (!ret)
+ if (!ret || !nvme_epf->epc_features->intx_capable)
break;
- /*
- * If we got an error, it is likely because the host is using
- * legacy IRQs (e.g. BIOS, grub).
- */
fallthrough;
case PCI_IRQ_INTX:
ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no,
@@ -656,7 +655,9 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
}
if (ret)
- dev_err(ctrl->dev, "Failed to raise IRQ (err=%d)\n", ret);
+ dev_err_ratelimited(ctrl->dev,
+ "CQ[%u]: Failed to raise IRQ (err=%d)\n",
+ cq->qid, ret);
unlock:
mutex_unlock(&ctrl->irq_lock);
@@ -1319,8 +1320,14 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);
- dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
- cqid, qsize, cq->qes, cq->vector);
+ if (test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
+ dev_dbg(ctrl->dev,
+ "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
+ cqid, qsize, cq->qes, cq->vector);
+ else
+ dev_dbg(ctrl->dev,
+ "CQ[%u]: %u entries of %zu B, IRQ disabled\n",
+ cqid, qsize, cq->qes);
return NVME_SC_SUCCESS;
@@ -1344,17 +1351,20 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)
cancel_delayed_work_sync(&cq->work);
nvmet_pci_epf_drain_queue(cq);
- nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
+ if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
+ nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
+ nvmet_cq_put(&cq->nvme_cq);
return NVME_SC_SUCCESS;
}
static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
- u16 sqid, u16 flags, u16 qsize, u64 pci_addr)
+ u16 sqid, u16 cqid, u16 flags, u16 qsize, u64 pci_addr)
{
struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata;
struct nvmet_pci_epf_queue *sq = &ctrl->sq[sqid];
+ struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
u16 status;
if (test_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags))
@@ -1377,7 +1387,8 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
sq->qes = ctrl->io_sqes;
sq->pci_size = sq->qes * sq->depth;
- status = nvmet_sq_create(tctrl, &sq->nvme_sq, sqid, sq->depth);
+ status = nvmet_sq_create(tctrl, &sq->nvme_sq, &cq->nvme_cq, sqid,
+ sq->depth);
if (status != NVME_SC_SUCCESS)
return status;
@@ -1533,7 +1544,6 @@ static void nvmet_pci_epf_init_queue(struct nvmet_pci_epf_ctrl *ctrl,
if (sq) {
queue = &ctrl->sq[qid];
- set_bit(NVMET_PCI_EPF_Q_IS_SQ, &queue->flags);
} else {
queue = &ctrl->cq[qid];
INIT_DELAYED_WORK(&queue->work, nvmet_pci_epf_cq_work);
@@ -1594,8 +1604,7 @@ static void nvmet_pci_epf_exec_iod_work(struct work_struct *work)
goto complete;
}
- if (!nvmet_req_init(req, &iod->cq->nvme_cq, &iod->sq->nvme_sq,
- &nvmet_pci_epf_fabrics_ops))
+ if (!nvmet_req_init(req, &iod->sq->nvme_sq, &nvmet_pci_epf_fabrics_ops))
goto complete;
iod->data_len = nvmet_req_transfer_len(req);
@@ -1872,8 +1881,8 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
qsize = aqa & 0x00000fff;
pci_addr = asq & GENMASK_ULL(63, 12);
- status = nvmet_pci_epf_create_sq(ctrl->tctrl, 0, NVME_QUEUE_PHYS_CONTIG,
- qsize, pci_addr);
+ status = nvmet_pci_epf_create_sq(ctrl->tctrl, 0, 0,
+ NVME_QUEUE_PHYS_CONTIG, qsize, pci_addr);
if (status != NVME_SC_SUCCESS) {
dev_err(ctrl->dev, "Failed to create admin submission queue\n");
nvmet_pci_epf_delete_cq(ctrl->tctrl, 0);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 2a4536ef6184..432bdf7cd49e 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -976,8 +976,7 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
cmd->send_sge.addr, cmd->send_sge.length,
DMA_TO_DEVICE);
- if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
- &queue->nvme_sq, &nvmet_rdma_ops))
+ if (!nvmet_req_init(&cmd->req, &queue->nvme_sq, &nvmet_rdma_ops))
return;
status = nvmet_rdma_map_sgl(cmd);
@@ -1353,6 +1352,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
pr_debug("freeing queue %d\n", queue->idx);
nvmet_sq_destroy(&queue->nvme_sq);
+ nvmet_cq_put(&queue->nvme_cq);
nvmet_rdma_destroy_queue_ib(queue);
if (!queue->nsrq) {
@@ -1436,7 +1436,8 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
goto out_reject;
}
- ret = nvmet_sq_init(&queue->nvme_sq);
+ nvmet_cq_init(&queue->nvme_cq);
+ ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq);
if (ret) {
ret = NVME_RDMA_CM_NO_RSC;
goto out_free_queue;
@@ -1517,6 +1518,7 @@ out_ida_remove:
out_destroy_sq:
nvmet_sq_destroy(&queue->nvme_sq);
out_free_queue:
+ nvmet_cq_put(&queue->nvme_cq);
kfree(queue);
out_reject:
nvmet_rdma_cm_reject(cm_id, ret);
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 12a5cb8641ca..c6603bd9c95e 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/crc32c.h>
#include <linux/err.h>
#include <linux/nvme-tcp.h>
#include <linux/nvme-keyring.h>
@@ -17,7 +18,6 @@
#include <net/handshake.h>
#include <linux/inet.h>
#include <linux/llist.h>
-#include <crypto/hash.h>
#include <trace/events/sock.h>
#include "nvmet.h"
@@ -172,8 +172,6 @@ struct nvmet_tcp_queue {
/* digest state */
bool hdr_digest;
bool data_digest;
- struct ahash_request *snd_hash;
- struct ahash_request *rcv_hash;
/* TLS state */
key_serial_t tls_pskid;
@@ -294,14 +292,9 @@ static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue)
return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
}
-static inline void nvmet_tcp_hdgst(struct ahash_request *hash,
- void *pdu, size_t len)
+static inline void nvmet_tcp_hdgst(void *pdu, size_t len)
{
- struct scatterlist sg;
-
- sg_init_one(&sg, pdu, len);
- ahash_request_set_crypt(hash, &sg, pdu + len, len);
- crypto_ahash_digest(hash);
+ put_unaligned_le32(~crc32c(~0, pdu, len), pdu + len);
}
static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue,
@@ -318,7 +311,7 @@ static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue,
}
recv_digest = *(__le32 *)(pdu + hdr->hlen);
- nvmet_tcp_hdgst(queue->rcv_hash, pdu, len);
+ nvmet_tcp_hdgst(pdu, len);
exp_digest = *(__le32 *)(pdu + hdr->hlen);
if (recv_digest != exp_digest) {
pr_err("queue %d: header digest error: recv %#x expected %#x\n",
@@ -441,12 +434,24 @@ err:
return NVME_SC_INTERNAL;
}
-static void nvmet_tcp_calc_ddgst(struct ahash_request *hash,
- struct nvmet_tcp_cmd *cmd)
+static void nvmet_tcp_calc_ddgst(struct nvmet_tcp_cmd *cmd)
{
- ahash_request_set_crypt(hash, cmd->req.sg,
- (void *)&cmd->exp_ddgst, cmd->req.transfer_len);
- crypto_ahash_digest(hash);
+ size_t total_len = cmd->req.transfer_len;
+ struct scatterlist *sg = cmd->req.sg;
+ u32 crc = ~0;
+
+ while (total_len) {
+ size_t len = min_t(size_t, total_len, sg->length);
+
+ /*
+ * Note that the scatterlist does not contain any highmem pages,
+ * as it was allocated by sgl_alloc() with GFP_KERNEL.
+ */
+ crc = crc32c(crc, sg_virt(sg), len);
+ total_len -= len;
+ sg = sg_next(sg);
+ }
+ cmd->exp_ddgst = cpu_to_le32(~crc);
}
static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd)
@@ -473,19 +478,18 @@ static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd)
if (queue->data_digest) {
pdu->hdr.flags |= NVME_TCP_F_DDGST;
- nvmet_tcp_calc_ddgst(queue->snd_hash, cmd);
+ nvmet_tcp_calc_ddgst(cmd);
}
if (cmd->queue->hdr_digest) {
pdu->hdr.flags |= NVME_TCP_F_HDGST;
- nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ nvmet_tcp_hdgst(pdu, sizeof(*pdu));
}
}
static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd)
{
struct nvme_tcp_r2t_pdu *pdu = cmd->r2t_pdu;
- struct nvmet_tcp_queue *queue = cmd->queue;
u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
cmd->offset = 0;
@@ -503,14 +507,13 @@ static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd)
pdu->r2t_offset = cpu_to_le32(cmd->rbytes_done);
if (cmd->queue->hdr_digest) {
pdu->hdr.flags |= NVME_TCP_F_HDGST;
- nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ nvmet_tcp_hdgst(pdu, sizeof(*pdu));
}
}
static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd)
{
struct nvme_tcp_rsp_pdu *pdu = cmd->rsp_pdu;
- struct nvmet_tcp_queue *queue = cmd->queue;
u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
cmd->offset = 0;
@@ -523,7 +526,7 @@ static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd)
pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
if (cmd->queue->hdr_digest) {
pdu->hdr.flags |= NVME_TCP_F_HDGST;
- nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ nvmet_tcp_hdgst(pdu, sizeof(*pdu));
}
}
@@ -857,42 +860,6 @@ static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue)
smp_store_release(&queue->rcv_state, NVMET_TCP_RECV_PDU);
}
-static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
-
- ahash_request_free(queue->rcv_hash);
- ahash_request_free(queue->snd_hash);
- crypto_free_ahash(tfm);
-}
-
-static int nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue *queue)
-{
- struct crypto_ahash *tfm;
-
- tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
-
- queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!queue->snd_hash)
- goto free_tfm;
- ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
-
- queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!queue->rcv_hash)
- goto free_snd_hash;
- ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
-
- return 0;
-free_snd_hash:
- ahash_request_free(queue->snd_hash);
-free_tfm:
- crypto_free_ahash(tfm);
- return -ENOMEM;
-}
-
-
static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
{
struct nvme_tcp_icreq_pdu *icreq = &queue->pdu.icreq;
@@ -921,11 +888,6 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE);
queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE);
- if (queue->hdr_digest || queue->data_digest) {
- ret = nvmet_tcp_alloc_crypto(queue);
- if (ret)
- return ret;
- }
memset(icresp, 0, sizeof(*icresp));
icresp->hdr.type = nvme_tcp_icresp;
@@ -1077,8 +1039,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
req = &queue->cmd->req;
memcpy(req->cmd, nvme_cmd, sizeof(*nvme_cmd));
- if (unlikely(!nvmet_req_init(req, &queue->nvme_cq,
- &queue->nvme_sq, &nvmet_tcp_ops))) {
+ if (unlikely(!nvmet_req_init(req, &queue->nvme_sq, &nvmet_tcp_ops))) {
pr_err("failed cmd %p id %d opcode %d, data_len: %d, status: %04x\n",
req->cmd, req->cmd->common.command_id,
req->cmd->common.opcode,
@@ -1247,7 +1208,7 @@ static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd)
{
struct nvmet_tcp_queue *queue = cmd->queue;
- nvmet_tcp_calc_ddgst(queue->rcv_hash, cmd);
+ nvmet_tcp_calc_ddgst(cmd);
queue->offset = 0;
queue->left = NVME_TCP_DIGEST_LENGTH;
queue->rcv_state = NVMET_TCP_RECV_DDGST;
@@ -1615,13 +1576,12 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
nvmet_sq_put_tls_key(&queue->nvme_sq);
nvmet_tcp_uninit_data_in_cmds(queue);
nvmet_sq_destroy(&queue->nvme_sq);
+ nvmet_cq_put(&queue->nvme_cq);
cancel_work_sync(&queue->io_work);
nvmet_tcp_free_cmd_data_in_buffers(queue);
/* ->sock will be released by fput() */
fput(queue->sock->file);
nvmet_tcp_free_cmds(queue);
- if (queue->hdr_digest || queue->data_digest)
- nvmet_tcp_free_crypto(queue);
ida_free(&nvmet_tcp_queue_ida, queue->idx);
page_frag_cache_drain(&queue->pf_cache);
kfree(queue);
@@ -1950,7 +1910,8 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
if (ret)
goto out_ida_remove;
- ret = nvmet_sq_init(&queue->nvme_sq);
+ nvmet_cq_init(&queue->nvme_cq);
+ ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq);
if (ret)
goto out_free_connect;
@@ -1993,6 +1954,7 @@ out_destroy_sq:
mutex_unlock(&nvmet_tcp_queue_mutex);
nvmet_sq_destroy(&queue->nvme_sq);
out_free_connect:
+ nvmet_cq_put(&queue->nvme_cq);
nvmet_tcp_free_cmd(&queue->connect);
out_ida_remove:
ida_free(&nvmet_tcp_queue_ida, queue->idx);
diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig
index 8671b7c974b9..3de07ef52490 100644
--- a/drivers/nvmem/Kconfig
+++ b/drivers/nvmem/Kconfig
@@ -154,6 +154,18 @@ config NVMEM_LPC18XX_OTP
To compile this driver as a module, choose M here: the module
will be called nvmem_lpc18xx_otp.
+config NVMEM_MAX77759
+ tristate "Maxim Integrated MAX77759 NVMEM Support"
+ depends on MFD_MAX77759
+ default MFD_MAX77759
+ help
+ Say Y here to include support for the user-accessible storage found
+ in Maxim Integrated MAX77759 PMICs. This IC provides space for 30
+ bytes of storage.
+
+ This driver can also be built as a module. If so, the module
+ will be called nvmem-max77759.
+
config NVMEM_MESON_EFUSE
tristate "Amlogic Meson GX eFuse Support"
depends on (ARCH_MESON || COMPILE_TEST) && MESON_SM
diff --git a/drivers/nvmem/Makefile b/drivers/nvmem/Makefile
index 5b77bbb6488b..a9d03cfbbd27 100644
--- a/drivers/nvmem/Makefile
+++ b/drivers/nvmem/Makefile
@@ -34,6 +34,8 @@ obj-$(CONFIG_NVMEM_LPC18XX_EEPROM) += nvmem_lpc18xx_eeprom.o
nvmem_lpc18xx_eeprom-y := lpc18xx_eeprom.o
obj-$(CONFIG_NVMEM_LPC18XX_OTP) += nvmem_lpc18xx_otp.o
nvmem_lpc18xx_otp-y := lpc18xx_otp.o
+obj-$(CONFIG_NVMEM_MAX77759) += nvmem-max77759.o
+nvmem-max77759-y := max77759-nvmem.o
obj-$(CONFIG_NVMEM_MESON_EFUSE) += nvmem_meson_efuse.o
nvmem_meson_efuse-y := meson-efuse.o
obj-$(CONFIG_NVMEM_MESON_MX_EFUSE) += nvmem_meson_mx_efuse.o
diff --git a/drivers/nvmem/max77759-nvmem.c b/drivers/nvmem/max77759-nvmem.c
new file mode 100644
index 000000000000..c9961ad0e232
--- /dev/null
+++ b/drivers/nvmem/max77759-nvmem.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright 2020 Google Inc
+// Copyright 2025 Linaro Ltd.
+//
+// NVMEM driver for Maxim MAX77759
+
+#include <linux/dev_printk.h>
+#include <linux/device.h>
+#include <linux/device/driver.h>
+#include <linux/err.h>
+#include <linux/mfd/max77759.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/nvmem-provider.h>
+#include <linux/overflow.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+#define MAX77759_NVMEM_OPCODE_HEADER_LEN 3
+/*
+ * NVMEM commands have a three byte header (which becomes part of the command),
+ * so we need to subtract that.
+ */
+#define MAX77759_NVMEM_SIZE (MAX77759_MAXQ_OPCODE_MAXLENGTH \
+ - MAX77759_NVMEM_OPCODE_HEADER_LEN)
+
+struct max77759_nvmem {
+ struct device *dev;
+ struct max77759 *max77759;
+};
+
+static int max77759_nvmem_reg_read(void *priv, unsigned int offset,
+ void *val, size_t bytes)
+{
+ struct max77759_nvmem *nvmem = priv;
+ DEFINE_FLEX(struct max77759_maxq_command, cmd, cmd, length,
+ MAX77759_NVMEM_OPCODE_HEADER_LEN);
+ DEFINE_FLEX(struct max77759_maxq_response, rsp, rsp, length,
+ MAX77759_MAXQ_OPCODE_MAXLENGTH);
+ int ret;
+
+ cmd->cmd[0] = MAX77759_MAXQ_OPCODE_USER_SPACE_READ;
+ cmd->cmd[1] = offset;
+ cmd->cmd[2] = bytes;
+ rsp->length = bytes + MAX77759_NVMEM_OPCODE_HEADER_LEN;
+
+ ret = max77759_maxq_command(nvmem->max77759, cmd, rsp);
+ if (ret < 0)
+ return ret;
+
+ if (memcmp(cmd->cmd, rsp->rsp, MAX77759_NVMEM_OPCODE_HEADER_LEN)) {
+ dev_warn(nvmem->dev, "protocol error (read)\n");
+ return -EIO;
+ }
+
+ memcpy(val, &rsp->rsp[MAX77759_NVMEM_OPCODE_HEADER_LEN], bytes);
+
+ return 0;
+}
+
+static int max77759_nvmem_reg_write(void *priv, unsigned int offset,
+ void *val, size_t bytes)
+{
+ struct max77759_nvmem *nvmem = priv;
+ DEFINE_FLEX(struct max77759_maxq_command, cmd, cmd, length,
+ MAX77759_MAXQ_OPCODE_MAXLENGTH);
+ DEFINE_FLEX(struct max77759_maxq_response, rsp, rsp, length,
+ MAX77759_MAXQ_OPCODE_MAXLENGTH);
+ int ret;
+
+ cmd->cmd[0] = MAX77759_MAXQ_OPCODE_USER_SPACE_WRITE;
+ cmd->cmd[1] = offset;
+ cmd->cmd[2] = bytes;
+ memcpy(&cmd->cmd[MAX77759_NVMEM_OPCODE_HEADER_LEN], val, bytes);
+ cmd->length = bytes + MAX77759_NVMEM_OPCODE_HEADER_LEN;
+ rsp->length = cmd->length;
+
+ ret = max77759_maxq_command(nvmem->max77759, cmd, rsp);
+ if (ret < 0)
+ return ret;
+
+ if (memcmp(cmd->cmd, rsp->rsp, cmd->length)) {
+ dev_warn(nvmem->dev, "protocol error (write)\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int max77759_nvmem_probe(struct platform_device *pdev)
+{
+ struct nvmem_config config = {
+ .dev = &pdev->dev,
+ .name = dev_name(&pdev->dev),
+ .id = NVMEM_DEVID_NONE,
+ .type = NVMEM_TYPE_EEPROM,
+ .ignore_wp = true,
+ .size = MAX77759_NVMEM_SIZE,
+ .word_size = sizeof(u8),
+ .stride = sizeof(u8),
+ .reg_read = max77759_nvmem_reg_read,
+ .reg_write = max77759_nvmem_reg_write,
+ };
+ struct max77759_nvmem *nvmem;
+
+ nvmem = devm_kzalloc(&pdev->dev, sizeof(*nvmem), GFP_KERNEL);
+ if (!nvmem)
+ return -ENOMEM;
+
+ nvmem->dev = &pdev->dev;
+ nvmem->max77759 = dev_get_drvdata(pdev->dev.parent);
+
+ config.priv = nvmem;
+
+ return PTR_ERR_OR_ZERO(devm_nvmem_register(config.dev, &config));
+}
+
+static const struct of_device_id max77759_nvmem_of_id[] = {
+ { .compatible = "maxim,max77759-nvmem", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, max77759_nvmem_of_id);
+
+static const struct platform_device_id max77759_nvmem_platform_id[] = {
+ { "max77759-nvmem", },
+ { }
+};
+MODULE_DEVICE_TABLE(platform, max77759_nvmem_platform_id);
+
+static struct platform_driver max77759_nvmem_driver = {
+ .driver = {
+ .name = "max77759-nvmem",
+ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+ .of_match_table = max77759_nvmem_of_id,
+ },
+ .probe = max77759_nvmem_probe,
+ .id_table = max77759_nvmem_platform_id,
+};
+
+module_platform_driver(max77759_nvmem_driver);
+
+MODULE_AUTHOR("André Draszik <andre.draszik@linaro.org>");
+MODULE_DESCRIPTION("NVMEM driver for Maxim MAX77759");
+MODULE_LICENSE("GPL");
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 73e9a3b2f29b..edbd60501cf0 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -40,17 +40,14 @@ static DEFINE_XARRAY_ALLOC1(opp_configs);
static bool _find_opp_dev(const struct device *dev, struct opp_table *opp_table)
{
struct opp_device *opp_dev;
- bool found = false;
- mutex_lock(&opp_table->lock);
+ guard(mutex)(&opp_table->lock);
+
list_for_each_entry(opp_dev, &opp_table->dev_list, node)
- if (opp_dev->dev == dev) {
- found = true;
- break;
- }
+ if (opp_dev->dev == dev)
+ return true;
- mutex_unlock(&opp_table->lock);
- return found;
+ return false;
}
static struct opp_table *_find_opp_table_unlocked(struct device *dev)
@@ -58,10 +55,8 @@ static struct opp_table *_find_opp_table_unlocked(struct device *dev)
struct opp_table *opp_table;
list_for_each_entry(opp_table, &opp_tables, node) {
- if (_find_opp_dev(dev, opp_table)) {
- _get_opp_table_kref(opp_table);
- return opp_table;
- }
+ if (_find_opp_dev(dev, opp_table))
+ return dev_pm_opp_get_opp_table_ref(opp_table);
}
return ERR_PTR(-ENODEV);
@@ -80,18 +75,13 @@ static struct opp_table *_find_opp_table_unlocked(struct device *dev)
*/
struct opp_table *_find_opp_table(struct device *dev)
{
- struct opp_table *opp_table;
-
if (IS_ERR_OR_NULL(dev)) {
pr_err("%s: Invalid parameters\n", __func__);
return ERR_PTR(-EINVAL);
}
- mutex_lock(&opp_table_lock);
- opp_table = _find_opp_table_unlocked(dev);
- mutex_unlock(&opp_table_lock);
-
- return opp_table;
+ guard(mutex)(&opp_table_lock);
+ return _find_opp_table_unlocked(dev);
}
/*
@@ -319,18 +309,13 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_is_turbo);
*/
unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
{
- struct opp_table *opp_table;
- unsigned long clock_latency_ns;
+ struct opp_table *opp_table __free(put_opp_table);
opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table))
return 0;
- clock_latency_ns = opp_table->clock_latency_ns_max;
-
- dev_pm_opp_put_opp_table(opp_table);
-
- return clock_latency_ns;
+ return opp_table->clock_latency_ns_max;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
@@ -342,7 +327,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
*/
unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
{
- struct opp_table *opp_table;
+ struct opp_table *opp_table __free(put_opp_table);
struct dev_pm_opp *opp;
struct regulator *reg;
unsigned long latency_ns = 0;
@@ -358,33 +343,31 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
/* Regulator may not be required for the device */
if (!opp_table->regulators)
- goto put_opp_table;
+ return 0;
count = opp_table->regulator_count;
uV = kmalloc_array(count, sizeof(*uV), GFP_KERNEL);
if (!uV)
- goto put_opp_table;
-
- mutex_lock(&opp_table->lock);
+ return 0;
- for (i = 0; i < count; i++) {
- uV[i].min = ~0;
- uV[i].max = 0;
+ scoped_guard(mutex, &opp_table->lock) {
+ for (i = 0; i < count; i++) {
+ uV[i].min = ~0;
+ uV[i].max = 0;
- list_for_each_entry(opp, &opp_table->opp_list, node) {
- if (!opp->available)
- continue;
+ list_for_each_entry(opp, &opp_table->opp_list, node) {
+ if (!opp->available)
+ continue;
- if (opp->supplies[i].u_volt_min < uV[i].min)
- uV[i].min = opp->supplies[i].u_volt_min;
- if (opp->supplies[i].u_volt_max > uV[i].max)
- uV[i].max = opp->supplies[i].u_volt_max;
+ if (opp->supplies[i].u_volt_min < uV[i].min)
+ uV[i].min = opp->supplies[i].u_volt_min;
+ if (opp->supplies[i].u_volt_max > uV[i].max)
+ uV[i].max = opp->supplies[i].u_volt_max;
+ }
}
}
- mutex_unlock(&opp_table->lock);
-
/*
* The caller needs to ensure that opp_table (and hence the regulator)
* isn't freed, while we are executing this routine.
@@ -397,8 +380,6 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
}
kfree(uV);
-put_opp_table:
- dev_pm_opp_put_opp_table(opp_table);
return latency_ns;
}
@@ -428,7 +409,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_transition_latency);
*/
unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev)
{
- struct opp_table *opp_table;
+ struct opp_table *opp_table __free(put_opp_table);
unsigned long freq = 0;
opp_table = _find_opp_table(dev);
@@ -438,8 +419,6 @@ unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev)
if (opp_table->suspend_opp && opp_table->suspend_opp->available)
freq = dev_pm_opp_get_freq(opp_table->suspend_opp);
- dev_pm_opp_put_opp_table(opp_table);
-
return freq;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp_freq);
@@ -449,15 +428,13 @@ int _get_opp_count(struct opp_table *opp_table)
struct dev_pm_opp *opp;
int count = 0;
- mutex_lock(&opp_table->lock);
+ guard(mutex)(&opp_table->lock);
list_for_each_entry(opp, &opp_table->opp_list, node) {
if (opp->available)
count++;
}
- mutex_unlock(&opp_table->lock);
-
return count;
}
@@ -470,21 +447,16 @@ int _get_opp_count(struct opp_table *opp_table)
*/
int dev_pm_opp_get_opp_count(struct device *dev)
{
- struct opp_table *opp_table;
- int count;
+ struct opp_table *opp_table __free(put_opp_table);
opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table)) {
- count = PTR_ERR(opp_table);
- dev_dbg(dev, "%s: OPP table not found (%d)\n",
- __func__, count);
- return count;
+ dev_dbg(dev, "%s: OPP table not found (%ld)\n",
+ __func__, PTR_ERR(opp_table));
+ return PTR_ERR(opp_table);
}
- count = _get_opp_count(opp_table);
- dev_pm_opp_put_opp_table(opp_table);
-
- return count;
+ return _get_opp_count(opp_table);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
@@ -551,7 +523,7 @@ static struct dev_pm_opp *_opp_table_find_key(struct opp_table *opp_table,
if (assert && !assert(opp_table, index))
return ERR_PTR(-EINVAL);
- mutex_lock(&opp_table->lock);
+ guard(mutex)(&opp_table->lock);
list_for_each_entry(temp_opp, &opp_table->opp_list, node) {
if (temp_opp->available == available) {
@@ -566,8 +538,6 @@ static struct dev_pm_opp *_opp_table_find_key(struct opp_table *opp_table,
dev_pm_opp_get(opp);
}
- mutex_unlock(&opp_table->lock);
-
return opp;
}
@@ -578,8 +548,7 @@ _find_key(struct device *dev, unsigned long *key, int index, bool available,
unsigned long opp_key, unsigned long key),
bool (*assert)(struct opp_table *opp_table, unsigned int index))
{
- struct opp_table *opp_table;
- struct dev_pm_opp *opp;
+ struct opp_table *opp_table __free(put_opp_table);
opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table)) {
@@ -588,12 +557,8 @@ _find_key(struct device *dev, unsigned long *key, int index, bool available,
return ERR_CAST(opp_table);
}
- opp = _opp_table_find_key(opp_table, key, index, available, read,
- compare, assert);
-
- dev_pm_opp_put_opp_table(opp_table);
-
- return opp;
+ return _opp_table_find_key(opp_table, key, index, available, read,
+ compare, assert);
}
static struct dev_pm_opp *_find_key_exact(struct device *dev,
@@ -1187,10 +1152,9 @@ static void _find_current_opp(struct device *dev, struct opp_table *opp_table)
* make special checks to validate current_opp.
*/
if (IS_ERR(opp)) {
- mutex_lock(&opp_table->lock);
- opp = list_first_entry(&opp_table->opp_list, struct dev_pm_opp, node);
- dev_pm_opp_get(opp);
- mutex_unlock(&opp_table->lock);
+ guard(mutex)(&opp_table->lock);
+ opp = dev_pm_opp_get(list_first_entry(&opp_table->opp_list,
+ struct dev_pm_opp, node));
}
opp_table->current_opp = opp;
@@ -1329,8 +1293,7 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
dev_pm_opp_put(old_opp);
/* Make sure current_opp doesn't get freed */
- dev_pm_opp_get(opp);
- opp_table->current_opp = opp;
+ opp_table->current_opp = dev_pm_opp_get(opp);
return ret;
}
@@ -1348,11 +1311,10 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
*/
int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
{
- struct opp_table *opp_table;
+ struct opp_table *opp_table __free(put_opp_table);
+ struct dev_pm_opp *opp __free(put_opp) = NULL;
unsigned long freq = 0, temp_freq;
- struct dev_pm_opp *opp = NULL;
bool forced = false;
- int ret;
opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table)) {
@@ -1369,9 +1331,8 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
* equivalent to a clk_set_rate()
*/
if (!_get_opp_count(opp_table)) {
- ret = opp_table->config_clks(dev, opp_table, NULL,
- &target_freq, false);
- goto put_opp_table;
+ return opp_table->config_clks(dev, opp_table, NULL,
+ &target_freq, false);
}
freq = clk_round_rate(opp_table->clk, target_freq);
@@ -1386,10 +1347,9 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
temp_freq = freq;
opp = _find_freq_ceil(opp_table, &temp_freq);
if (IS_ERR(opp)) {
- ret = PTR_ERR(opp);
- dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n",
- __func__, freq, ret);
- goto put_opp_table;
+ dev_err(dev, "%s: failed to find OPP for freq %lu (%ld)\n",
+ __func__, freq, PTR_ERR(opp));
+ return PTR_ERR(opp);
}
/*
@@ -1402,14 +1362,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
forced = opp_table->current_rate_single_clk != freq;
}
- ret = _set_opp(dev, opp_table, opp, &freq, forced);
-
- if (freq)
- dev_pm_opp_put(opp);
-
-put_opp_table:
- dev_pm_opp_put_opp_table(opp_table);
- return ret;
+ return _set_opp(dev, opp_table, opp, &freq, forced);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate);
@@ -1425,8 +1378,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate);
*/
int dev_pm_opp_set_opp(struct device *dev, struct dev_pm_opp *opp)
{
- struct opp_table *opp_table;
- int ret;
+ struct opp_table *opp_table __free(put_opp_table);
opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table)) {
@@ -1434,10 +1386,7 @@ int dev_pm_opp_set_opp(struct device *dev, struct dev_pm_opp *opp)
return PTR_ERR(opp_table);
}
- ret = _set_opp(dev, opp_table, opp, NULL, false);
- dev_pm_opp_put_opp_table(opp_table);
-
- return ret;
+ return _set_opp(dev, opp_table, opp, NULL, false);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_set_opp);
@@ -1462,9 +1411,8 @@ struct opp_device *_add_opp_dev(const struct device *dev,
/* Initialize opp-dev */
opp_dev->dev = dev;
- mutex_lock(&opp_table->lock);
- list_add(&opp_dev->node, &opp_table->dev_list);
- mutex_unlock(&opp_table->lock);
+ scoped_guard(mutex, &opp_table->lock)
+ list_add(&opp_dev->node, &opp_table->dev_list);
/* Create debugfs entries for the opp_table */
opp_debug_register(opp_dev, opp_table);
@@ -1688,14 +1636,10 @@ static void _opp_table_kref_release(struct kref *kref)
kfree(opp_table);
}
-void _get_opp_table_kref(struct opp_table *opp_table)
+struct opp_table *dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table)
{
kref_get(&opp_table->kref);
-}
-
-void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table)
-{
- _get_opp_table_kref(opp_table);
+ return opp_table;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_table_ref);
@@ -1729,9 +1673,10 @@ static void _opp_kref_release(struct kref *kref)
kfree(opp);
}
-void dev_pm_opp_get(struct dev_pm_opp *opp)
+struct dev_pm_opp *dev_pm_opp_get(struct dev_pm_opp *opp)
{
kref_get(&opp->kref);
+ return opp;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_get);
@@ -1750,27 +1695,25 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put);
*/
void dev_pm_opp_remove(struct device *dev, unsigned long freq)
{
+ struct opp_table *opp_table __free(put_opp_table);
struct dev_pm_opp *opp = NULL, *iter;
- struct opp_table *opp_table;
opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table))
return;
if (!assert_single_clk(opp_table, 0))
- goto put_table;
-
- mutex_lock(&opp_table->lock);
+ return;
- list_for_each_entry(iter, &opp_table->opp_list, node) {
- if (iter->rates[0] == freq) {
- opp = iter;
- break;
+ scoped_guard(mutex, &opp_table->lock) {
+ list_for_each_entry(iter, &opp_table->opp_list, node) {
+ if (iter->rates[0] == freq) {
+ opp = iter;
+ break;
+ }
}
}
- mutex_unlock(&opp_table->lock);
-
if (opp) {
dev_pm_opp_put(opp);
@@ -1780,32 +1723,26 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq)
dev_warn(dev, "%s: Couldn't find OPP with freq: %lu\n",
__func__, freq);
}
-
-put_table:
- /* Drop the reference taken by _find_opp_table() */
- dev_pm_opp_put_opp_table(opp_table);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
static struct dev_pm_opp *_opp_get_next(struct opp_table *opp_table,
bool dynamic)
{
- struct dev_pm_opp *opp = NULL, *temp;
+ struct dev_pm_opp *opp;
+
+ guard(mutex)(&opp_table->lock);
- mutex_lock(&opp_table->lock);
- list_for_each_entry(temp, &opp_table->opp_list, node) {
+ list_for_each_entry(opp, &opp_table->opp_list, node) {
/*
* Refcount must be dropped only once for each OPP by OPP core,
* do that with help of "removed" flag.
*/
- if (!temp->removed && dynamic == temp->dynamic) {
- opp = temp;
- break;
- }
+ if (!opp->removed && dynamic == opp->dynamic)
+ return opp;
}
- mutex_unlock(&opp_table->lock);
- return opp;
+ return NULL;
}
/*
@@ -1829,20 +1766,14 @@ static void _opp_remove_all(struct opp_table *opp_table, bool dynamic)
bool _opp_remove_all_static(struct opp_table *opp_table)
{
- mutex_lock(&opp_table->lock);
-
- if (!opp_table->parsed_static_opps) {
- mutex_unlock(&opp_table->lock);
- return false;
- }
+ scoped_guard(mutex, &opp_table->lock) {
+ if (!opp_table->parsed_static_opps)
+ return false;
- if (--opp_table->parsed_static_opps) {
- mutex_unlock(&opp_table->lock);
- return true;
+ if (--opp_table->parsed_static_opps)
+ return true;
}
- mutex_unlock(&opp_table->lock);
-
_opp_remove_all(opp_table, false);
return true;
}
@@ -1855,16 +1786,13 @@ bool _opp_remove_all_static(struct opp_table *opp_table)
*/
void dev_pm_opp_remove_all_dynamic(struct device *dev)
{
- struct opp_table *opp_table;
+ struct opp_table *opp_table __free(put_opp_table);
opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table))
return;
_opp_remove_all(opp_table, true);
-
- /* Drop the reference taken by _find_opp_table() */
- dev_pm_opp_put_opp_table(opp_table);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_remove_all_dynamic);
@@ -2049,17 +1977,15 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
struct list_head *head;
int ret;
- mutex_lock(&opp_table->lock);
- head = &opp_table->opp_list;
+ scoped_guard(mutex, &opp_table->lock) {
+ head = &opp_table->opp_list;
- ret = _opp_is_duplicate(dev, new_opp, opp_table, &head);
- if (ret) {
- mutex_unlock(&opp_table->lock);
- return ret;
- }
+ ret = _opp_is_duplicate(dev, new_opp, opp_table, &head);
+ if (ret)
+ return ret;
- list_add(&new_opp->node, head);
- mutex_unlock(&opp_table->lock);
+ list_add(&new_opp->node, head);
+ }
new_opp->opp_table = opp_table;
kref_init(&new_opp->kref);
@@ -2161,8 +2087,8 @@ static int _opp_set_supported_hw(struct opp_table *opp_table,
if (opp_table->supported_hw)
return 0;
- opp_table->supported_hw = kmemdup(versions, count * sizeof(*versions),
- GFP_KERNEL);
+ opp_table->supported_hw = kmemdup_array(versions, count,
+ sizeof(*versions), GFP_KERNEL);
if (!opp_table->supported_hw)
return -ENOMEM;
@@ -2706,18 +2632,16 @@ struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table,
return ERR_PTR(-EBUSY);
for (i = 0; i < src_table->required_opp_count; i++) {
- if (src_table->required_opp_tables[i] == dst_table) {
- mutex_lock(&src_table->lock);
+ if (src_table->required_opp_tables[i] != dst_table)
+ continue;
+ scoped_guard(mutex, &src_table->lock) {
list_for_each_entry(opp, &src_table->opp_list, node) {
if (opp == src_opp) {
- dest_opp = opp->required_opps[i];
- dev_pm_opp_get(dest_opp);
+ dest_opp = dev_pm_opp_get(opp->required_opps[i]);
break;
}
}
-
- mutex_unlock(&src_table->lock);
break;
}
}
@@ -2749,7 +2673,6 @@ int dev_pm_opp_xlate_performance_state(struct opp_table *src_table,
unsigned int pstate)
{
struct dev_pm_opp *opp;
- int dest_pstate = -EINVAL;
int i;
/*
@@ -2783,22 +2706,17 @@ int dev_pm_opp_xlate_performance_state(struct opp_table *src_table,
return -EINVAL;
}
- mutex_lock(&src_table->lock);
+ guard(mutex)(&src_table->lock);
list_for_each_entry(opp, &src_table->opp_list, node) {
- if (opp->level == pstate) {
- dest_pstate = opp->required_opps[i]->level;
- goto unlock;
- }
+ if (opp->level == pstate)
+ return opp->required_opps[i]->level;
}
pr_err("%s: Couldn't find matching OPP (%p: %p)\n", __func__, src_table,
dst_table);
-unlock:
- mutex_unlock(&src_table->lock);
-
- return dest_pstate;
+ return -EINVAL;
}
/**
@@ -2853,46 +2771,38 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add_dynamic);
static int _opp_set_availability(struct device *dev, unsigned long freq,
bool availability_req)
{
- struct opp_table *opp_table;
- struct dev_pm_opp *tmp_opp, *opp = ERR_PTR(-ENODEV);
- int r = 0;
+ struct dev_pm_opp *opp __free(put_opp) = ERR_PTR(-ENODEV), *tmp_opp;
+ struct opp_table *opp_table __free(put_opp_table);
/* Find the opp_table */
opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table)) {
- r = PTR_ERR(opp_table);
- dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
- return r;
+ dev_warn(dev, "%s: Device OPP not found (%ld)\n", __func__,
+ PTR_ERR(opp_table));
+ return PTR_ERR(opp_table);
}
- if (!assert_single_clk(opp_table, 0)) {
- r = -EINVAL;
- goto put_table;
- }
+ if (!assert_single_clk(opp_table, 0))
+ return -EINVAL;
- mutex_lock(&opp_table->lock);
+ scoped_guard(mutex, &opp_table->lock) {
+ /* Do we have the frequency? */
+ list_for_each_entry(tmp_opp, &opp_table->opp_list, node) {
+ if (tmp_opp->rates[0] == freq) {
+ opp = dev_pm_opp_get(tmp_opp);
- /* Do we have the frequency? */
- list_for_each_entry(tmp_opp, &opp_table->opp_list, node) {
- if (tmp_opp->rates[0] == freq) {
- opp = tmp_opp;
- break;
- }
- }
+ /* Is update really needed? */
+ if (opp->available == availability_req)
+ return 0;
- if (IS_ERR(opp)) {
- r = PTR_ERR(opp);
- goto unlock;
+ opp->available = availability_req;
+ break;
+ }
+ }
}
- /* Is update really needed? */
- if (opp->available == availability_req)
- goto unlock;
-
- opp->available = availability_req;
-
- dev_pm_opp_get(opp);
- mutex_unlock(&opp_table->lock);
+ if (IS_ERR(opp))
+ return PTR_ERR(opp);
/* Notify the change of the OPP availability */
if (availability_req)
@@ -2902,14 +2812,7 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
blocking_notifier_call_chain(&opp_table->head,
OPP_EVENT_DISABLE, opp);
- dev_pm_opp_put(opp);
- goto put_table;
-
-unlock:
- mutex_unlock(&opp_table->lock);
-put_table:
- dev_pm_opp_put_opp_table(opp_table);
- return r;
+ return 0;
}
/**
@@ -2929,9 +2832,9 @@ int dev_pm_opp_adjust_voltage(struct device *dev, unsigned long freq,
unsigned long u_volt_max)
{
- struct opp_table *opp_table;
- struct dev_pm_opp *tmp_opp, *opp = ERR_PTR(-ENODEV);
- int r = 0;
+ struct dev_pm_opp *opp __free(put_opp) = ERR_PTR(-ENODEV), *tmp_opp;
+ struct opp_table *opp_table __free(put_opp_table);
+ int r;
/* Find the opp_table */
opp_table = _find_opp_table(dev);
@@ -2941,49 +2844,36 @@ int dev_pm_opp_adjust_voltage(struct device *dev, unsigned long freq,
return r;
}
- if (!assert_single_clk(opp_table, 0)) {
- r = -EINVAL;
- goto put_table;
- }
+ if (!assert_single_clk(opp_table, 0))
+ return -EINVAL;
- mutex_lock(&opp_table->lock);
+ scoped_guard(mutex, &opp_table->lock) {
+ /* Do we have the frequency? */
+ list_for_each_entry(tmp_opp, &opp_table->opp_list, node) {
+ if (tmp_opp->rates[0] == freq) {
+ opp = dev_pm_opp_get(tmp_opp);
- /* Do we have the frequency? */
- list_for_each_entry(tmp_opp, &opp_table->opp_list, node) {
- if (tmp_opp->rates[0] == freq) {
- opp = tmp_opp;
- break;
- }
- }
+ /* Is update really needed? */
+ if (opp->supplies->u_volt == u_volt)
+ return 0;
- if (IS_ERR(opp)) {
- r = PTR_ERR(opp);
- goto adjust_unlock;
- }
-
- /* Is update really needed? */
- if (opp->supplies->u_volt == u_volt)
- goto adjust_unlock;
+ opp->supplies->u_volt = u_volt;
+ opp->supplies->u_volt_min = u_volt_min;
+ opp->supplies->u_volt_max = u_volt_max;
- opp->supplies->u_volt = u_volt;
- opp->supplies->u_volt_min = u_volt_min;
- opp->supplies->u_volt_max = u_volt_max;
+ break;
+ }
+ }
+ }
- dev_pm_opp_get(opp);
- mutex_unlock(&opp_table->lock);
+ if (IS_ERR(opp))
+ return PTR_ERR(opp);
/* Notify the voltage change of the OPP */
blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ADJUST_VOLTAGE,
opp);
- dev_pm_opp_put(opp);
- goto put_table;
-
-adjust_unlock:
- mutex_unlock(&opp_table->lock);
-put_table:
- dev_pm_opp_put_opp_table(opp_table);
- return r;
+ return 0;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_adjust_voltage);
@@ -2997,9 +2887,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_adjust_voltage);
*/
int dev_pm_opp_sync_regulators(struct device *dev)
{
- struct opp_table *opp_table;
+ struct opp_table *opp_table __free(put_opp_table);
struct regulator *reg;
- int i, ret = 0;
+ int ret, i;
/* Device may not have OPP table */
opp_table = _find_opp_table(dev);
@@ -3008,23 +2898,20 @@ int dev_pm_opp_sync_regulators(struct device *dev)
/* Regulator may not be required for the device */
if (unlikely(!opp_table->regulators))
- goto put_table;
+ return 0;
/* Nothing to sync if voltage wasn't changed */
if (!opp_table->enabled)
- goto put_table;
+ return 0;
for (i = 0; i < opp_table->regulator_count; i++) {
reg = opp_table->regulators[i];
ret = regulator_sync_voltage(reg);
if (ret)
- break;
+ return ret;
}
-put_table:
- /* Drop reference taken by _find_opp_table() */
- dev_pm_opp_put_opp_table(opp_table);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_sync_regulators);
@@ -3076,18 +2963,13 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_disable);
*/
int dev_pm_opp_register_notifier(struct device *dev, struct notifier_block *nb)
{
- struct opp_table *opp_table;
- int ret;
+ struct opp_table *opp_table __free(put_opp_table);
opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table))
return PTR_ERR(opp_table);
- ret = blocking_notifier_chain_register(&opp_table->head, nb);
-
- dev_pm_opp_put_opp_table(opp_table);
-
- return ret;
+ return blocking_notifier_chain_register(&opp_table->head, nb);
}
EXPORT_SYMBOL(dev_pm_opp_register_notifier);
@@ -3101,18 +2983,13 @@ EXPORT_SYMBOL(dev_pm_opp_register_notifier);
int dev_pm_opp_unregister_notifier(struct device *dev,
struct notifier_block *nb)
{
- struct opp_table *opp_table;
- int ret;
+ struct opp_table *opp_table __free(put_opp_table);
opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table))
return PTR_ERR(opp_table);
- ret = blocking_notifier_chain_unregister(&opp_table->head, nb);
-
- dev_pm_opp_put_opp_table(opp_table);
-
- return ret;
+ return blocking_notifier_chain_unregister(&opp_table->head, nb);
}
EXPORT_SYMBOL(dev_pm_opp_unregister_notifier);
@@ -3125,7 +3002,7 @@ EXPORT_SYMBOL(dev_pm_opp_unregister_notifier);
*/
void dev_pm_opp_remove_table(struct device *dev)
{
- struct opp_table *opp_table;
+ struct opp_table *opp_table __free(put_opp_table);
/* Check for existing table for 'dev' */
opp_table = _find_opp_table(dev);
@@ -3146,8 +3023,5 @@ void dev_pm_opp_remove_table(struct device *dev)
**/
if (_opp_remove_all_static(opp_table))
dev_pm_opp_put_opp_table(opp_table);
-
- /* Drop reference taken by _find_opp_table() */
- dev_pm_opp_put_opp_table(opp_table);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_remove_table);
diff --git a/drivers/opp/cpu.c b/drivers/opp/cpu.c
index 12c429b407ca..97989d4fe336 100644
--- a/drivers/opp/cpu.c
+++ b/drivers/opp/cpu.c
@@ -43,7 +43,6 @@
int dev_pm_opp_init_cpufreq_table(struct device *dev,
struct cpufreq_frequency_table **opp_table)
{
- struct dev_pm_opp *opp;
struct cpufreq_frequency_table *freq_table = NULL;
int i, max_opps, ret = 0;
unsigned long rate;
@@ -57,6 +56,8 @@ int dev_pm_opp_init_cpufreq_table(struct device *dev,
return -ENOMEM;
for (i = 0, rate = 0; i < max_opps; i++, rate++) {
+ struct dev_pm_opp *opp __free(put_opp);
+
/* find next rate */
opp = dev_pm_opp_find_freq_ceil(dev, &rate);
if (IS_ERR(opp)) {
@@ -69,8 +70,6 @@ int dev_pm_opp_init_cpufreq_table(struct device *dev,
/* Is Boost/turbo opp ? */
if (dev_pm_opp_is_turbo(opp))
freq_table[i].flags = CPUFREQ_BOOST_FREQ;
-
- dev_pm_opp_put(opp);
}
freq_table[i].driver_data = i;
@@ -155,10 +154,10 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_cpumask_remove_table);
int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev,
const struct cpumask *cpumask)
{
+ struct opp_table *opp_table __free(put_opp_table);
struct opp_device *opp_dev;
- struct opp_table *opp_table;
struct device *dev;
- int cpu, ret = 0;
+ int cpu;
opp_table = _find_opp_table(cpu_dev);
if (IS_ERR(opp_table))
@@ -186,9 +185,7 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev,
opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED;
}
- dev_pm_opp_put_opp_table(opp_table);
-
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
@@ -204,33 +201,26 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
*/
int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
{
+ struct opp_table *opp_table __free(put_opp_table);
struct opp_device *opp_dev;
- struct opp_table *opp_table;
- int ret = 0;
opp_table = _find_opp_table(cpu_dev);
if (IS_ERR(opp_table))
return PTR_ERR(opp_table);
- if (opp_table->shared_opp == OPP_TABLE_ACCESS_UNKNOWN) {
- ret = -EINVAL;
- goto put_opp_table;
- }
+ if (opp_table->shared_opp == OPP_TABLE_ACCESS_UNKNOWN)
+ return -EINVAL;
cpumask_clear(cpumask);
if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) {
- mutex_lock(&opp_table->lock);
+ guard(mutex)(&opp_table->lock);
list_for_each_entry(opp_dev, &opp_table->dev_list, node)
cpumask_set_cpu(opp_dev->dev->id, cpumask);
- mutex_unlock(&opp_table->lock);
} else {
cpumask_set_cpu(cpu_dev->id, cpumask);
}
-put_opp_table:
- dev_pm_opp_put_opp_table(opp_table);
-
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_get_sharing_cpus);
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index a24f76f5fd01..505d79821584 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -45,7 +45,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node);
struct opp_table *_managed_opp(struct device *dev, int index)
{
struct opp_table *opp_table, *managed_table = NULL;
- struct device_node *np;
+ struct device_node *np __free(device_node);
np = _opp_of_get_opp_desc_node(dev->of_node, index);
if (!np)
@@ -60,17 +60,13 @@ struct opp_table *_managed_opp(struct device *dev, int index)
* But the OPPs will be considered as shared only if the
* OPP table contains a "opp-shared" property.
*/
- if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) {
- _get_opp_table_kref(opp_table);
- managed_table = opp_table;
- }
+ if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED)
+ managed_table = dev_pm_opp_get_opp_table_ref(opp_table);
break;
}
}
- of_node_put(np);
-
return managed_table;
}
@@ -80,18 +76,13 @@ static struct dev_pm_opp *_find_opp_of_np(struct opp_table *opp_table,
{
struct dev_pm_opp *opp;
- mutex_lock(&opp_table->lock);
+ guard(mutex)(&opp_table->lock);
list_for_each_entry(opp, &opp_table->opp_list, node) {
- if (opp->np == opp_np) {
- dev_pm_opp_get(opp);
- mutex_unlock(&opp_table->lock);
- return opp;
- }
+ if (opp->np == opp_np)
+ return dev_pm_opp_get(opp);
}
- mutex_unlock(&opp_table->lock);
-
return NULL;
}
@@ -104,27 +95,20 @@ static struct device_node *of_parse_required_opp(struct device_node *np,
/* The caller must call dev_pm_opp_put_opp_table() after the table is used */
static struct opp_table *_find_table_of_opp_np(struct device_node *opp_np)
{
+ struct device_node *opp_table_np __free(device_node);
struct opp_table *opp_table;
- struct device_node *opp_table_np;
opp_table_np = of_get_parent(opp_np);
if (!opp_table_np)
- goto err;
+ return ERR_PTR(-ENODEV);
- /* It is safe to put the node now as all we need now is its address */
- of_node_put(opp_table_np);
+ guard(mutex)(&opp_table_lock);
- mutex_lock(&opp_table_lock);
list_for_each_entry(opp_table, &opp_tables, node) {
- if (opp_table_np == opp_table->np) {
- _get_opp_table_kref(opp_table);
- mutex_unlock(&opp_table_lock);
- return opp_table;
- }
+ if (opp_table_np == opp_table->np)
+ return dev_pm_opp_get_opp_table_ref(opp_table);
}
- mutex_unlock(&opp_table_lock);
-err:
return ERR_PTR(-ENODEV);
}
@@ -149,9 +133,8 @@ static void _opp_table_free_required_tables(struct opp_table *opp_table)
opp_table->required_opp_count = 0;
opp_table->required_opp_tables = NULL;
- mutex_lock(&opp_table_lock);
+ guard(mutex)(&opp_table_lock);
list_del(&opp_table->lazy);
- mutex_unlock(&opp_table_lock);
}
/*
@@ -163,7 +146,7 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table,
struct device_node *opp_np)
{
struct opp_table **required_opp_tables;
- struct device_node *required_np, *np;
+ struct device_node *np __free(device_node);
bool lazy = false;
int count, i, size;
@@ -171,30 +154,32 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table,
np = of_get_next_available_child(opp_np, NULL);
if (!np) {
dev_warn(dev, "Empty OPP table\n");
-
return;
}
count = of_count_phandle_with_args(np, "required-opps", NULL);
if (count <= 0)
- goto put_np;
+ return;
size = sizeof(*required_opp_tables) + sizeof(*opp_table->required_devs);
required_opp_tables = kcalloc(count, size, GFP_KERNEL);
if (!required_opp_tables)
- goto put_np;
+ return;
opp_table->required_opp_tables = required_opp_tables;
opp_table->required_devs = (void *)(required_opp_tables + count);
opp_table->required_opp_count = count;
for (i = 0; i < count; i++) {
+ struct device_node *required_np __free(device_node);
+
required_np = of_parse_required_opp(np, i);
- if (!required_np)
- goto free_required_tables;
+ if (!required_np) {
+ _opp_table_free_required_tables(opp_table);
+ return;
+ }
required_opp_tables[i] = _find_table_of_opp_np(required_np);
- of_node_put(required_np);
if (IS_ERR(required_opp_tables[i]))
lazy = true;
@@ -206,23 +191,15 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table,
* The OPP table is not held while allocating the table, take it
* now to avoid corruption to the lazy_opp_tables list.
*/
- mutex_lock(&opp_table_lock);
+ guard(mutex)(&opp_table_lock);
list_add(&opp_table->lazy, &lazy_opp_tables);
- mutex_unlock(&opp_table_lock);
}
-
- goto put_np;
-
-free_required_tables:
- _opp_table_free_required_tables(opp_table);
-put_np:
- of_node_put(np);
}
void _of_init_opp_table(struct opp_table *opp_table, struct device *dev,
int index)
{
- struct device_node *np, *opp_np;
+ struct device_node *np __free(device_node), *opp_np;
u32 val;
/*
@@ -243,8 +220,6 @@ void _of_init_opp_table(struct opp_table *opp_table, struct device *dev,
/* Get OPP table node */
opp_np = _opp_of_get_opp_desc_node(np, index);
- of_node_put(np);
-
if (!opp_np)
return;
@@ -298,15 +273,13 @@ void _of_clear_opp(struct opp_table *opp_table, struct dev_pm_opp *opp)
static int _link_required_opps(struct dev_pm_opp *opp,
struct opp_table *required_table, int index)
{
- struct device_node *np;
+ struct device_node *np __free(device_node);
np = of_parse_required_opp(opp->np, index);
if (unlikely(!np))
return -ENODEV;
opp->required_opps[index] = _find_opp_of_np(required_table, np);
- of_node_put(np);
-
if (!opp->required_opps[index]) {
pr_err("%s: Unable to find required OPP node: %pOF (%d)\n",
__func__, opp->np, index);
@@ -370,19 +343,22 @@ static int lazy_link_required_opps(struct opp_table *opp_table,
static void lazy_link_required_opp_table(struct opp_table *new_table)
{
struct opp_table *opp_table, *temp, **required_opp_tables;
- struct device_node *required_np, *opp_np, *required_table_np;
struct dev_pm_opp *opp;
int i, ret;
- mutex_lock(&opp_table_lock);
+ guard(mutex)(&opp_table_lock);
list_for_each_entry_safe(opp_table, temp, &lazy_opp_tables, lazy) {
+ struct device_node *opp_np __free(device_node);
bool lazy = false;
/* opp_np can't be invalid here */
opp_np = of_get_next_available_child(opp_table->np, NULL);
for (i = 0; i < opp_table->required_opp_count; i++) {
+ struct device_node *required_np __free(device_node) = NULL;
+ struct device_node *required_table_np __free(device_node) = NULL;
+
required_opp_tables = opp_table->required_opp_tables;
/* Required opp-table is already parsed */
@@ -393,9 +369,6 @@ static void lazy_link_required_opp_table(struct opp_table *new_table)
required_np = of_parse_required_opp(opp_np, i);
required_table_np = of_get_parent(required_np);
- of_node_put(required_table_np);
- of_node_put(required_np);
-
/*
* Newly added table isn't the required opp-table for
* opp_table.
@@ -405,8 +378,7 @@ static void lazy_link_required_opp_table(struct opp_table *new_table)
continue;
}
- required_opp_tables[i] = new_table;
- _get_opp_table_kref(new_table);
+ required_opp_tables[i] = dev_pm_opp_get_opp_table_ref(new_table);
/* Link OPPs now */
ret = lazy_link_required_opps(opp_table, new_table, i);
@@ -417,8 +389,6 @@ static void lazy_link_required_opp_table(struct opp_table *new_table)
}
}
- of_node_put(opp_np);
-
/* All required opp-tables found, remove from lazy list */
if (!lazy) {
list_del_init(&opp_table->lazy);
@@ -427,22 +397,22 @@ static void lazy_link_required_opp_table(struct opp_table *new_table)
_required_opps_available(opp, opp_table->required_opp_count);
}
}
-
- mutex_unlock(&opp_table_lock);
}
static int _bandwidth_supported(struct device *dev, struct opp_table *opp_table)
{
- struct device_node *np, *opp_np;
+ struct device_node *opp_np __free(device_node) = NULL;
+ struct device_node *np __free(device_node) = NULL;
struct property *prop;
if (!opp_table) {
+ struct device_node *np __free(device_node);
+
np = of_node_get(dev->of_node);
if (!np)
return -ENODEV;
opp_np = _opp_of_get_opp_desc_node(np, 0);
- of_node_put(np);
} else {
opp_np = of_node_get(opp_table->np);
}
@@ -453,15 +423,12 @@ static int _bandwidth_supported(struct device *dev, struct opp_table *opp_table)
/* Checking only first OPP is sufficient */
np = of_get_next_available_child(opp_np, NULL);
- of_node_put(opp_np);
if (!np) {
dev_err(dev, "OPP table empty\n");
return -EINVAL;
}
prop = of_find_property(np, "opp-peak-kBps", NULL);
- of_node_put(np);
-
if (!prop || !prop->length)
return 0;
@@ -471,7 +438,7 @@ static int _bandwidth_supported(struct device *dev, struct opp_table *opp_table)
int dev_pm_opp_of_find_icc_paths(struct device *dev,
struct opp_table *opp_table)
{
- struct device_node *np;
+ struct device_node *np __free(device_node) = of_node_get(dev->of_node);
int ret, i, count, num_paths;
struct icc_path **paths;
@@ -481,15 +448,13 @@ int dev_pm_opp_of_find_icc_paths(struct device *dev,
else if (ret <= 0)
return ret;
- ret = 0;
-
- np = of_node_get(dev->of_node);
if (!np)
return 0;
+ ret = 0;
+
count = of_count_phandle_with_args(np, "interconnects",
"#interconnect-cells");
- of_node_put(np);
if (count < 0)
return 0;
@@ -992,15 +957,14 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table)
struct dev_pm_opp *opp;
/* OPP table is already initialized for the device */
- mutex_lock(&opp_table->lock);
- if (opp_table->parsed_static_opps) {
- opp_table->parsed_static_opps++;
- mutex_unlock(&opp_table->lock);
- return 0;
- }
+ scoped_guard(mutex, &opp_table->lock) {
+ if (opp_table->parsed_static_opps) {
+ opp_table->parsed_static_opps++;
+ return 0;
+ }
- opp_table->parsed_static_opps = 1;
- mutex_unlock(&opp_table->lock);
+ opp_table->parsed_static_opps = 1;
+ }
/* We have opp-table node now, iterate over it and add OPPs */
for_each_available_child_of_node(opp_table->np, np) {
@@ -1040,15 +1004,14 @@ static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table)
const __be32 *val;
int nr, ret = 0;
- mutex_lock(&opp_table->lock);
- if (opp_table->parsed_static_opps) {
- opp_table->parsed_static_opps++;
- mutex_unlock(&opp_table->lock);
- return 0;
- }
+ scoped_guard(mutex, &opp_table->lock) {
+ if (opp_table->parsed_static_opps) {
+ opp_table->parsed_static_opps++;
+ return 0;
+ }
- opp_table->parsed_static_opps = 1;
- mutex_unlock(&opp_table->lock);
+ opp_table->parsed_static_opps = 1;
+ }
prop = of_find_property(dev->of_node, "operating-points", NULL);
if (!prop) {
@@ -1306,8 +1269,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
struct cpumask *cpumask)
{
- struct device_node *np, *tmp_np, *cpu_np;
- int cpu, ret = 0;
+ struct device_node *np __free(device_node);
+ int cpu;
/* Get OPP descriptor node */
np = dev_pm_opp_of_get_opp_desc_node(cpu_dev);
@@ -1320,9 +1283,12 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
/* OPPs are shared ? */
if (!of_property_read_bool(np, "opp-shared"))
- goto put_cpu_node;
+ return 0;
for_each_possible_cpu(cpu) {
+ struct device_node *cpu_np __free(device_node) = NULL;
+ struct device_node *tmp_np __free(device_node) = NULL;
+
if (cpu == cpu_dev->id)
continue;
@@ -1330,29 +1296,22 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
if (!cpu_np) {
dev_err(cpu_dev, "%s: failed to get cpu%d node\n",
__func__, cpu);
- ret = -ENOENT;
- goto put_cpu_node;
+ return -ENOENT;
}
/* Get OPP descriptor node */
tmp_np = _opp_of_get_opp_desc_node(cpu_np, 0);
- of_node_put(cpu_np);
if (!tmp_np) {
pr_err("%pOF: Couldn't find opp node\n", cpu_np);
- ret = -ENOENT;
- goto put_cpu_node;
+ return -ENOENT;
}
/* CPUs are sharing opp node */
if (np == tmp_np)
cpumask_set_cpu(cpu, cpumask);
-
- of_node_put(tmp_np);
}
-put_cpu_node:
- of_node_put(np);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
@@ -1369,9 +1328,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
*/
int of_get_required_opp_performance_state(struct device_node *np, int index)
{
- struct dev_pm_opp *opp;
- struct device_node *required_np;
- struct opp_table *opp_table;
+ struct device_node *required_np __free(device_node);
+ struct opp_table *opp_table __free(put_opp_table) = NULL;
+ struct dev_pm_opp *opp __free(put_opp) = NULL;
int pstate = -EINVAL;
required_np = of_parse_required_opp(np, index);
@@ -1382,13 +1341,13 @@ int of_get_required_opp_performance_state(struct device_node *np, int index)
if (IS_ERR(opp_table)) {
pr_err("%s: Failed to find required OPP table %pOF: %ld\n",
__func__, np, PTR_ERR(opp_table));
- goto put_required_np;
+ return PTR_ERR(opp_table);
}
/* The OPP tables must belong to a genpd */
if (unlikely(!opp_table->is_genpd)) {
pr_err("%s: Performance state is only valid for genpds.\n", __func__);
- goto put_required_np;
+ return -EINVAL;
}
opp = _find_opp_of_np(opp_table, required_np);
@@ -1399,15 +1358,8 @@ int of_get_required_opp_performance_state(struct device_node *np, int index)
} else {
pstate = opp->level;
}
- dev_pm_opp_put(opp);
-
}
- dev_pm_opp_put_opp_table(opp_table);
-
-put_required_np:
- of_node_put(required_np);
-
return pstate;
}
EXPORT_SYMBOL_GPL(of_get_required_opp_performance_state);
@@ -1424,7 +1376,7 @@ EXPORT_SYMBOL_GPL(of_get_required_opp_performance_state);
*/
bool dev_pm_opp_of_has_required_opp(struct device *dev)
{
- struct device_node *opp_np, *np;
+ struct device_node *np __free(device_node) = NULL, *opp_np __free(device_node);
int count;
opp_np = _opp_of_get_opp_desc_node(dev->of_node, 0);
@@ -1432,14 +1384,12 @@ bool dev_pm_opp_of_has_required_opp(struct device *dev)
return false;
np = of_get_next_available_child(opp_np, NULL);
- of_node_put(opp_np);
if (!np) {
dev_warn(dev, "Empty OPP table\n");
return false;
}
count = of_count_phandle_with_args(np, "required-opps", NULL);
- of_node_put(np);
return count > 0;
}
@@ -1475,7 +1425,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
static int __maybe_unused
_get_dt_power(struct device *dev, unsigned long *uW, unsigned long *kHz)
{
- struct dev_pm_opp *opp;
+ struct dev_pm_opp *opp __free(put_opp);
unsigned long opp_freq, opp_power;
/* Find the right frequency and related OPP */
@@ -1485,7 +1435,6 @@ _get_dt_power(struct device *dev, unsigned long *uW, unsigned long *kHz)
return -EINVAL;
opp_power = dev_pm_opp_get_power(opp);
- dev_pm_opp_put(opp);
if (!opp_power)
return -EINVAL;
@@ -1516,8 +1465,8 @@ _get_dt_power(struct device *dev, unsigned long *uW, unsigned long *kHz)
int dev_pm_opp_calc_power(struct device *dev, unsigned long *uW,
unsigned long *kHz)
{
- struct dev_pm_opp *opp;
- struct device_node *np;
+ struct dev_pm_opp *opp __free(put_opp) = NULL;
+ struct device_node *np __free(device_node);
unsigned long mV, Hz;
u32 cap;
u64 tmp;
@@ -1528,7 +1477,6 @@ int dev_pm_opp_calc_power(struct device *dev, unsigned long *uW,
return -EINVAL;
ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap);
- of_node_put(np);
if (ret)
return -EINVAL;
@@ -1538,7 +1486,6 @@ int dev_pm_opp_calc_power(struct device *dev, unsigned long *uW,
return -EINVAL;
mV = dev_pm_opp_get_voltage(opp) / 1000;
- dev_pm_opp_put(opp);
if (!mV)
return -EINVAL;
@@ -1555,20 +1502,15 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_calc_power);
static bool _of_has_opp_microwatt_property(struct device *dev)
{
- unsigned long power, freq = 0;
- struct dev_pm_opp *opp;
+ struct dev_pm_opp *opp __free(put_opp);
+ unsigned long freq = 0;
/* Check if at least one OPP has needed property */
opp = dev_pm_opp_find_freq_ceil(dev, &freq);
if (IS_ERR(opp))
return false;
- power = dev_pm_opp_get_power(opp);
- dev_pm_opp_put(opp);
- if (!power)
- return false;
-
- return true;
+ return !!dev_pm_opp_get_power(opp);
}
/**
@@ -1584,8 +1526,8 @@ static bool _of_has_opp_microwatt_property(struct device *dev)
*/
int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus)
{
+ struct device_node *np __free(device_node) = NULL;
struct em_data_callback em_cb;
- struct device_node *np;
int ret, nr_opp;
u32 cap;
@@ -1620,7 +1562,6 @@ int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus)
* user about the inconsistent configuration.
*/
ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap);
- of_node_put(np);
if (ret || !cap) {
dev_dbg(dev, "Couldn't find proper 'dynamic-power-coefficient' in DT\n");
ret = -EINVAL;
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index 5c7c81190e41..9eba63e01a9e 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -251,7 +251,6 @@ struct opp_table {
/* Routines internal to opp core */
bool _opp_remove_all_static(struct opp_table *opp_table);
-void _get_opp_table_kref(struct opp_table *opp_table);
int _get_opp_count(struct opp_table *opp_table);
struct opp_table *_find_opp_table(struct device *dev);
struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table);
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index 9800b7681054..eb3cc28d43f8 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -40,6 +40,7 @@ config PCIE_APPLE
depends on OF
depends on PCI_MSI
select PCI_HOST_COMMON
+ select IRQ_MSI_LIB
help
Say Y here if you want to enable PCIe controller support on Apple
system-on-chips, like the Apple M1. This is required for the USB
@@ -227,6 +228,7 @@ config PCI_TEGRA
bool "NVIDIA Tegra PCIe controller"
depends on ARCH_TEGRA || COMPILE_TEST
depends on PCI_MSI
+ select IRQ_MSI_LIB
help
Say Y here if you want support for the PCIe host controller found
on NVIDIA Tegra SoCs.
@@ -303,6 +305,7 @@ config PCI_XGENE_MSI
bool "X-Gene v1 PCIe MSI feature"
depends on PCI_XGENE
depends on PCI_MSI
+ select IRQ_MSI_LIB
default y
help
Say Y here if you want PCIe MSI support for the APM X-Gene v1 SoC.
diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c
index 33d6bf460ffe..3219704aba0e 100644
--- a/drivers/pci/controller/dwc/pci-dra7xx.c
+++ b/drivers/pci/controller/dwc/pci-dra7xx.c
@@ -359,8 +359,8 @@ static int dra7xx_pcie_init_irq_domain(struct dw_pcie_rp *pp)
irq_set_chained_handler_and_data(pp->irq, dra7xx_pcie_msi_irq_handler,
pp);
- dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
- &intx_domain_ops, pp);
+ dra7xx->irq_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node),
+ PCI_NUM_INTX, &intx_domain_ops, pp);
of_node_put(pcie_intc_node);
if (!dra7xx->irq_domain) {
dev_err(dev, "Failed to get a INTx IRQ domain\n");
diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c
index 76a37368ae4f..1385d9db7b32 100644
--- a/drivers/pci/controller/dwc/pci-keystone.c
+++ b/drivers/pci/controller/dwc/pci-keystone.c
@@ -761,7 +761,7 @@ static int ks_pcie_config_intx_irq(struct keystone_pcie *ks_pcie)
ks_pcie);
}
- intx_irq_domain = irq_domain_add_linear(intc_np, PCI_NUM_INTX,
+ intx_irq_domain = irq_domain_create_linear(of_fwnode_handle(intc_np), PCI_NUM_INTX,
&ks_pcie_intx_irq_domain_ops, NULL);
if (!intx_irq_domain) {
dev_err(dev, "Failed to add irq domain for INTX irqs\n");
diff --git a/drivers/pci/controller/dwc/pcie-amd-mdb.c b/drivers/pci/controller/dwc/pcie-amd-mdb.c
index 4eb2a4e8189d..9f7251a16d32 100644
--- a/drivers/pci/controller/dwc/pcie-amd-mdb.c
+++ b/drivers/pci/controller/dwc/pcie-amd-mdb.c
@@ -290,8 +290,8 @@ static int amd_mdb_pcie_init_irq_domains(struct amd_mdb_pcie *pcie,
return -ENODEV;
}
- pcie->mdb_domain = irq_domain_add_linear(pcie_intc_node, 32,
- &event_domain_ops, pcie);
+ pcie->mdb_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node), 32,
+ &event_domain_ops, pcie);
if (!pcie->mdb_domain) {
err = -ENOMEM;
dev_err(dev, "Failed to add MDB domain\n");
@@ -300,8 +300,8 @@ static int amd_mdb_pcie_init_irq_domains(struct amd_mdb_pcie *pcie,
irq_domain_update_bus_token(pcie->mdb_domain, DOMAIN_BUS_NEXUS);
- pcie->intx_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
- &amd_intx_domain_ops, pcie);
+ pcie->intx_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node),
+ PCI_NUM_INTX, &amd_intx_domain_ops, pcie);
if (!pcie->intx_domain) {
err = -ENOMEM;
dev_err(dev, "Failed to add INTx domain\n");
diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
index ecc33f6789e3..d1cd48efad43 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -227,7 +227,7 @@ static const struct irq_domain_ops dw_pcie_msi_domain_ops = {
int dw_pcie_allocate_domains(struct dw_pcie_rp *pp)
{
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct fwnode_handle *fwnode = of_node_to_fwnode(pci->dev->of_node);
+ struct fwnode_handle *fwnode = of_fwnode_handle(pci->dev->of_node);
pp->irq_domain = irq_domain_create_linear(fwnode, pp->num_vectors,
&dw_pcie_msi_domain_ops, pp);
diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
index c624b7ebd118..678d510a261d 100644
--- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
+++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
@@ -144,8 +144,8 @@ static int rockchip_pcie_init_irq_domain(struct rockchip_pcie *rockchip)
return -EINVAL;
}
- rockchip->irq_domain = irq_domain_add_linear(intc, PCI_NUM_INTX,
- &intx_domain_ops, rockchip);
+ rockchip->irq_domain = irq_domain_create_linear(of_fwnode_handle(intc), PCI_NUM_INTX,
+ &intx_domain_ops, rockchip);
of_node_put(intc);
if (!rockchip->irq_domain) {
dev_err(dev, "failed to get a INTx IRQ domain\n");
diff --git a/drivers/pci/controller/dwc/pcie-uniphier.c b/drivers/pci/controller/dwc/pcie-uniphier.c
index 5757ca3803c9..43b28f826edd 100644
--- a/drivers/pci/controller/dwc/pcie-uniphier.c
+++ b/drivers/pci/controller/dwc/pcie-uniphier.c
@@ -279,7 +279,7 @@ static int uniphier_pcie_config_intx_irq(struct dw_pcie_rp *pp)
goto out_put_node;
}
- pcie->intx_irq_domain = irq_domain_add_linear(np_intc, PCI_NUM_INTX,
+ pcie->intx_irq_domain = irq_domain_create_linear(of_fwnode_handle(np_intc), PCI_NUM_INTX,
&uniphier_intx_domain_ops, pp);
if (!pcie->intx_irq_domain) {
dev_err(pci->dev, "Failed to get INTx domain\n");
diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
index 0e088e74155d..a600f46ee3c3 100644
--- a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
+++ b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
@@ -435,12 +435,12 @@ static const struct irq_domain_ops msi_domain_ops = {
static int mobiveil_allocate_msi_domains(struct mobiveil_pcie *pcie)
{
struct device *dev = &pcie->pdev->dev;
- struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node);
+ struct fwnode_handle *fwnode = of_fwnode_handle(dev->of_node);
struct mobiveil_msi *msi = &pcie->rp.msi;
mutex_init(&msi->lock);
- msi->dev_domain = irq_domain_add_linear(NULL, msi->num_of_vectors,
- &msi_domain_ops, pcie);
+ msi->dev_domain = irq_domain_create_linear(NULL, msi->num_of_vectors,
+ &msi_domain_ops, pcie);
if (!msi->dev_domain) {
dev_err(dev, "failed to create IRQ domain\n");
return -ENOMEM;
@@ -461,12 +461,11 @@ static int mobiveil_allocate_msi_domains(struct mobiveil_pcie *pcie)
static int mobiveil_pcie_init_irq_domain(struct mobiveil_pcie *pcie)
{
struct device *dev = &pcie->pdev->dev;
- struct device_node *node = dev->of_node;
struct mobiveil_root_port *rp = &pcie->rp;
/* setup INTx */
- rp->intx_domain = irq_domain_add_linear(node, PCI_NUM_INTX,
- &intx_domain_ops, pcie);
+ rp->intx_domain = irq_domain_create_linear(of_fwnode_handle(dev->of_node), PCI_NUM_INTX,
+ &intx_domain_ops, pcie);
if (!rp->intx_domain) {
dev_err(dev, "Failed to get a INTx IRQ domain\n");
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index a29796cce420..7bac64533b14 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -1456,9 +1456,8 @@ static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie)
raw_spin_lock_init(&pcie->msi_irq_lock);
mutex_init(&pcie->msi_used_lock);
- pcie->msi_inner_domain =
- irq_domain_add_linear(NULL, MSI_IRQ_NUM,
- &advk_msi_domain_ops, pcie);
+ pcie->msi_inner_domain = irq_domain_create_linear(NULL, MSI_IRQ_NUM,
+ &advk_msi_domain_ops, pcie);
if (!pcie->msi_inner_domain)
return -ENOMEM;
@@ -1508,9 +1507,8 @@ static int advk_pcie_init_irq_domain(struct advk_pcie *pcie)
irq_chip->irq_mask = advk_pcie_irq_mask;
irq_chip->irq_unmask = advk_pcie_irq_unmask;
- pcie->irq_domain =
- irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
- &advk_pcie_irq_domain_ops, pcie);
+ pcie->irq_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node), PCI_NUM_INTX,
+ &advk_pcie_irq_domain_ops, pcie);
if (!pcie->irq_domain) {
dev_err(dev, "Failed to get a INTx IRQ domain\n");
ret = -ENOMEM;
@@ -1549,9 +1547,7 @@ static const struct irq_domain_ops advk_pcie_rp_irq_domain_ops = {
static int advk_pcie_init_rp_irq_domain(struct advk_pcie *pcie)
{
- pcie->rp_irq_domain = irq_domain_add_linear(NULL, 1,
- &advk_pcie_rp_irq_domain_ops,
- pcie);
+ pcie->rp_irq_domain = irq_domain_create_linear(NULL, 1, &advk_pcie_rp_irq_domain_ops, pcie);
if (!pcie->rp_irq_domain) {
dev_err(&pcie->pdev->dev, "Failed to add Root Port IRQ domain\n");
return -ENOMEM;
diff --git a/drivers/pci/controller/pci-ftpci100.c b/drivers/pci/controller/pci-ftpci100.c
index ffdeed25e961..28e43831c0f1 100644
--- a/drivers/pci/controller/pci-ftpci100.c
+++ b/drivers/pci/controller/pci-ftpci100.c
@@ -345,8 +345,8 @@ static int faraday_pci_setup_cascaded_irq(struct faraday_pci *p)
return irq ?: -EINVAL;
}
- p->irqdomain = irq_domain_add_linear(intc, PCI_NUM_INTX,
- &faraday_pci_irqdomain_ops, p);
+ p->irqdomain = irq_domain_create_linear(of_fwnode_handle(intc), PCI_NUM_INTX,
+ &faraday_pci_irqdomain_ops, p);
of_node_put(intc);
if (!p->irqdomain) {
dev_err(p->dev, "failed to create Gemini PCI IRQ domain\n");
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index ac27bda5ba26..e1eaa24559a2 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -3975,24 +3975,18 @@ static int hv_pci_restore_msi_msg(struct pci_dev *pdev, void *arg)
{
struct irq_data *irq_data;
struct msi_desc *entry;
- int ret = 0;
if (!pdev->msi_enabled && !pdev->msix_enabled)
return 0;
- msi_lock_descs(&pdev->dev);
+ guard(msi_descs_lock)(&pdev->dev);
msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) {
irq_data = irq_get_irq_data(entry->irq);
- if (WARN_ON_ONCE(!irq_data)) {
- ret = -EINVAL;
- break;
- }
-
+ if (WARN_ON_ONCE(!irq_data))
+ return -EINVAL;
hv_compose_msi_msg(irq_data, &entry->msg);
}
- msi_unlock_descs(&pdev->dev);
-
- return ret;
+ return 0;
}
/*
diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
index b0e3bce10aa4..60da24ba0a19 100644
--- a/drivers/pci/controller/pci-mvebu.c
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -1078,9 +1078,9 @@ static int mvebu_pcie_init_irq_domain(struct mvebu_pcie_port *port)
return -ENODEV;
}
- port->intx_irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
- &mvebu_pcie_intx_irq_domain_ops,
- port);
+ port->intx_irq_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node),
+ PCI_NUM_INTX,
+ &mvebu_pcie_intx_irq_domain_ops, port);
of_node_put(pcie_intc_node);
if (!port->intx_irq_domain) {
dev_err(dev, "Failed to get INTx IRQ domain for %s\n", port->name);
diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
index d2f88997283a..467ddc701adc 100644
--- a/drivers/pci/controller/pci-tegra.c
+++ b/drivers/pci/controller/pci-tegra.c
@@ -22,6 +22,7 @@
#include <linux/iopoll.h>
#include <linux/irq.h>
#include <linux/irqchip/chained_irq.h>
+#include <linux/irqchip/irq-msi-lib.h>
#include <linux/irqdomain.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -1547,7 +1548,7 @@ static void tegra_pcie_msi_irq(struct irq_desc *desc)
unsigned int index = i * 32 + offset;
int ret;
- ret = generic_handle_domain_irq(msi->domain->parent, index);
+ ret = generic_handle_domain_irq(msi->domain, index);
if (ret) {
/*
* that's weird who triggered this?
@@ -1565,30 +1566,6 @@ static void tegra_pcie_msi_irq(struct irq_desc *desc)
chained_irq_exit(chip, desc);
}
-static void tegra_msi_top_irq_ack(struct irq_data *d)
-{
- irq_chip_ack_parent(d);
-}
-
-static void tegra_msi_top_irq_mask(struct irq_data *d)
-{
- pci_msi_mask_irq(d);
- irq_chip_mask_parent(d);
-}
-
-static void tegra_msi_top_irq_unmask(struct irq_data *d)
-{
- pci_msi_unmask_irq(d);
- irq_chip_unmask_parent(d);
-}
-
-static struct irq_chip tegra_msi_top_chip = {
- .name = "Tegra PCIe MSI",
- .irq_ack = tegra_msi_top_irq_ack,
- .irq_mask = tegra_msi_top_irq_mask,
- .irq_unmask = tegra_msi_top_irq_unmask,
-};
-
static void tegra_msi_irq_ack(struct irq_data *d)
{
struct tegra_msi *msi = irq_data_get_irq_chip_data(d);
@@ -1690,42 +1667,40 @@ static const struct irq_domain_ops tegra_msi_domain_ops = {
.free = tegra_msi_domain_free,
};
-static struct msi_domain_info tegra_msi_info = {
- .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_NO_AFFINITY | MSI_FLAG_PCI_MSIX,
- .chip = &tegra_msi_top_chip,
+static const struct msi_parent_ops tegra_msi_parent_ops = {
+ .supported_flags = (MSI_GENERIC_FLAGS_MASK |
+ MSI_FLAG_PCI_MSIX),
+ .required_flags = (MSI_FLAG_USE_DEF_DOM_OPS |
+ MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_PCI_MSI_MASK_PARENT |
+ MSI_FLAG_NO_AFFINITY),
+ .chip_flags = MSI_CHIP_FLAG_SET_ACK,
+ .bus_select_token = DOMAIN_BUS_PCI_MSI,
+ .init_dev_msi_info = msi_lib_init_dev_msi_info,
};
static int tegra_allocate_domains(struct tegra_msi *msi)
{
struct tegra_pcie *pcie = msi_to_pcie(msi);
struct fwnode_handle *fwnode = dev_fwnode(pcie->dev);
- struct irq_domain *parent;
-
- parent = irq_domain_create_linear(fwnode, INT_PCI_MSI_NR,
- &tegra_msi_domain_ops, msi);
- if (!parent) {
- dev_err(pcie->dev, "failed to create IRQ domain\n");
- return -ENOMEM;
- }
- irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS);
+ struct irq_domain_info info = {
+ .fwnode = fwnode,
+ .ops = &tegra_msi_domain_ops,
+ .size = INT_PCI_MSI_NR,
+ .host_data = msi,
+ };
- msi->domain = pci_msi_create_irq_domain(fwnode, &tegra_msi_info, parent);
+ msi->domain = msi_create_parent_irq_domain(&info, &tegra_msi_parent_ops);
if (!msi->domain) {
dev_err(pcie->dev, "failed to create MSI domain\n");
- irq_domain_remove(parent);
return -ENOMEM;
}
-
return 0;
}
static void tegra_free_domains(struct tegra_msi *msi)
{
- struct irq_domain *parent = msi->domain->parent;
-
irq_domain_remove(msi->domain);
- irq_domain_remove(parent);
}
static int tegra_pcie_msi_setup(struct tegra_pcie *pcie)
diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c
index 7bce327897c9..b05ec8b0bb93 100644
--- a/drivers/pci/controller/pci-xgene-msi.c
+++ b/drivers/pci/controller/pci-xgene-msi.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/msi.h>
#include <linux/irqchip/chained_irq.h>
+#include <linux/irqchip/irq-msi-lib.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/of_pci.h>
@@ -32,7 +33,6 @@ struct xgene_msi_group {
struct xgene_msi {
struct device_node *node;
struct irq_domain *inner_domain;
- struct irq_domain *msi_domain;
u64 msi_addr;
void __iomem *msi_regs;
unsigned long *bitmap;
@@ -44,20 +44,6 @@ struct xgene_msi {
/* Global data */
static struct xgene_msi xgene_msi_ctrl;
-static struct irq_chip xgene_msi_top_irq_chip = {
- .name = "X-Gene1 MSI",
- .irq_enable = pci_msi_unmask_irq,
- .irq_disable = pci_msi_mask_irq,
- .irq_mask = pci_msi_mask_irq,
- .irq_unmask = pci_msi_unmask_irq,
-};
-
-static struct msi_domain_info xgene_msi_domain_info = {
- .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_PCI_MSIX),
- .chip = &xgene_msi_top_irq_chip,
-};
-
/*
* X-Gene v1 has 16 groups of MSI termination registers MSInIRx, where
* n is group number (0..F), x is index of registers in each group (0..7)
@@ -235,34 +221,35 @@ static void xgene_irq_domain_free(struct irq_domain *domain,
irq_domain_free_irqs_parent(domain, virq, nr_irqs);
}
-static const struct irq_domain_ops msi_domain_ops = {
+static const struct irq_domain_ops xgene_msi_domain_ops = {
.alloc = xgene_irq_domain_alloc,
.free = xgene_irq_domain_free,
};
+static const struct msi_parent_ops xgene_msi_parent_ops = {
+ .supported_flags = (MSI_GENERIC_FLAGS_MASK |
+ MSI_FLAG_PCI_MSIX),
+ .required_flags = (MSI_FLAG_USE_DEF_DOM_OPS |
+ MSI_FLAG_USE_DEF_CHIP_OPS),
+ .bus_select_token = DOMAIN_BUS_PCI_MSI,
+ .init_dev_msi_info = msi_lib_init_dev_msi_info,
+};
+
static int xgene_allocate_domains(struct xgene_msi *msi)
{
- msi->inner_domain = irq_domain_add_linear(NULL, NR_MSI_VEC,
- &msi_domain_ops, msi);
- if (!msi->inner_domain)
- return -ENOMEM;
-
- msi->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(msi->node),
- &xgene_msi_domain_info,
- msi->inner_domain);
-
- if (!msi->msi_domain) {
- irq_domain_remove(msi->inner_domain);
- return -ENOMEM;
- }
-
- return 0;
+ struct irq_domain_info info = {
+ .fwnode = of_fwnode_handle(msi->node),
+ .ops = &xgene_msi_domain_ops,
+ .size = NR_MSI_VEC,
+ .host_data = msi,
+ };
+
+ msi->inner_domain = msi_create_parent_irq_domain(&info, &xgene_msi_parent_ops);
+ return msi->inner_domain ? 0 : -ENOMEM;
}
static void xgene_free_domains(struct xgene_msi *msi)
{
- if (msi->msi_domain)
- irq_domain_remove(msi->msi_domain);
if (msi->inner_domain)
irq_domain_remove(msi->inner_domain);
}
diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c
index e1cee3c0575f..a43f21eb8fbb 100644
--- a/drivers/pci/controller/pcie-altera-msi.c
+++ b/drivers/pci/controller/pcie-altera-msi.c
@@ -164,9 +164,9 @@ static const struct irq_domain_ops msi_domain_ops = {
static int altera_allocate_domains(struct altera_msi *msi)
{
- struct fwnode_handle *fwnode = of_node_to_fwnode(msi->pdev->dev.of_node);
+ struct fwnode_handle *fwnode = of_fwnode_handle(msi->pdev->dev.of_node);
- msi->inner_domain = irq_domain_add_linear(NULL, msi->num_of_vectors,
+ msi->inner_domain = irq_domain_create_linear(NULL, msi->num_of_vectors,
&msi_domain_ops, msi);
if (!msi->inner_domain) {
dev_err(&msi->pdev->dev, "failed to create IRQ domain\n");
diff --git a/drivers/pci/controller/pcie-altera.c b/drivers/pci/controller/pcie-altera.c
index 70409e71a18f..0fc77176a52e 100644
--- a/drivers/pci/controller/pcie-altera.c
+++ b/drivers/pci/controller/pcie-altera.c
@@ -855,7 +855,7 @@ static int altera_pcie_init_irq_domain(struct altera_pcie *pcie)
struct device_node *node = dev->of_node;
/* Setup INTx */
- pcie->irq_domain = irq_domain_add_linear(node, PCI_NUM_INTX,
+ pcie->irq_domain = irq_domain_create_linear(of_fwnode_handle(node), PCI_NUM_INTX,
&intx_domain_ops, pcie);
if (!pcie->irq_domain) {
dev_err(dev, "Failed to get a INTx IRQ domain\n");
diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c
index 18e11b9a7f46..3d412a931774 100644
--- a/drivers/pci/controller/pcie-apple.c
+++ b/drivers/pci/controller/pcie-apple.c
@@ -22,6 +22,7 @@
#include <linux/kernel.h>
#include <linux/iopoll.h>
#include <linux/irqchip/chained_irq.h>
+#include <linux/irqchip/irq-msi-lib.h>
#include <linux/irqdomain.h>
#include <linux/list.h>
#include <linux/module.h>
@@ -133,7 +134,6 @@ struct apple_pcie {
struct mutex lock;
struct device *dev;
void __iomem *base;
- struct irq_domain *domain;
unsigned long *bitmap;
struct list_head ports;
struct completion event;
@@ -162,27 +162,6 @@ static void rmw_clear(u32 clr, void __iomem *addr)
writel_relaxed(readl_relaxed(addr) & ~clr, addr);
}
-static void apple_msi_top_irq_mask(struct irq_data *d)
-{
- pci_msi_mask_irq(d);
- irq_chip_mask_parent(d);
-}
-
-static void apple_msi_top_irq_unmask(struct irq_data *d)
-{
- pci_msi_unmask_irq(d);
- irq_chip_unmask_parent(d);
-}
-
-static struct irq_chip apple_msi_top_chip = {
- .name = "PCIe MSI",
- .irq_mask = apple_msi_top_irq_mask,
- .irq_unmask = apple_msi_top_irq_unmask,
- .irq_eoi = irq_chip_eoi_parent,
- .irq_set_affinity = irq_chip_set_affinity_parent,
- .irq_set_type = irq_chip_set_type_parent,
-};
-
static void apple_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
{
msg->address_hi = upper_32_bits(DOORBELL_ADDR);
@@ -226,8 +205,7 @@ static int apple_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
for (i = 0; i < nr_irqs; i++) {
irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
- &apple_msi_bottom_chip,
- domain->host_data);
+ &apple_msi_bottom_chip, pcie);
}
return 0;
@@ -251,12 +229,6 @@ static const struct irq_domain_ops apple_msi_domain_ops = {
.free = apple_msi_domain_free,
};
-static struct msi_domain_info apple_msi_info = {
- .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
- .chip = &apple_msi_top_chip,
-};
-
static void apple_port_irq_mask(struct irq_data *data)
{
struct apple_pcie_port *port = irq_data_get_irq_chip_data(data);
@@ -595,11 +567,28 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie,
return 0;
}
+static const struct msi_parent_ops apple_msi_parent_ops = {
+ .supported_flags = (MSI_GENERIC_FLAGS_MASK |
+ MSI_FLAG_PCI_MSIX |
+ MSI_FLAG_MULTI_PCI_MSI),
+ .required_flags = (MSI_FLAG_USE_DEF_DOM_OPS |
+ MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_PCI_MSI_MASK_PARENT),
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI,
+ .bus_select_token = DOMAIN_BUS_PCI_MSI,
+ .init_dev_msi_info = msi_lib_init_dev_msi_info,
+};
+
static int apple_msi_init(struct apple_pcie *pcie)
{
struct fwnode_handle *fwnode = dev_fwnode(pcie->dev);
+ struct irq_domain_info info = {
+ .fwnode = fwnode,
+ .ops = &apple_msi_domain_ops,
+ .size = pcie->nvecs,
+ .host_data = pcie,
+ };
struct of_phandle_args args = {};
- struct irq_domain *parent;
int ret;
ret = of_parse_phandle_with_args(to_of_node(fwnode), "msi-ranges",
@@ -619,28 +608,16 @@ static int apple_msi_init(struct apple_pcie *pcie)
if (!pcie->bitmap)
return -ENOMEM;
- parent = irq_find_matching_fwspec(&pcie->fwspec, DOMAIN_BUS_WIRED);
- if (!parent) {
+ info.parent = irq_find_matching_fwspec(&pcie->fwspec, DOMAIN_BUS_WIRED);
+ if (!info.parent) {
dev_err(pcie->dev, "failed to find parent domain\n");
return -ENXIO;
}
- parent = irq_domain_create_hierarchy(parent, 0, pcie->nvecs, fwnode,
- &apple_msi_domain_ops, pcie);
- if (!parent) {
+ if (!msi_create_parent_irq_domain(&info, &apple_msi_parent_ops)) {
dev_err(pcie->dev, "failed to create IRQ domain\n");
return -ENOMEM;
}
- irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS);
-
- pcie->domain = pci_msi_create_irq_domain(fwnode, &apple_msi_info,
- parent);
- if (!pcie->domain) {
- dev_err(pcie->dev, "failed to create MSI domain\n");
- irq_domain_remove(parent);
- return -ENOMEM;
- }
-
return 0;
}
diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index e19628e13898..92887b394eb4 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -581,10 +581,10 @@ static const struct irq_domain_ops msi_domain_ops = {
static int brcm_allocate_domains(struct brcm_msi *msi)
{
- struct fwnode_handle *fwnode = of_node_to_fwnode(msi->np);
+ struct fwnode_handle *fwnode = of_fwnode_handle(msi->np);
struct device *dev = msi->dev;
- msi->inner_domain = irq_domain_add_linear(NULL, msi->nr, &msi_domain_ops, msi);
+ msi->inner_domain = irq_domain_create_linear(NULL, msi->nr, &msi_domain_ops, msi);
if (!msi->inner_domain) {
dev_err(dev, "failed to create IRQ domain\n");
return -ENOMEM;
diff --git a/drivers/pci/controller/pcie-iproc-msi.c b/drivers/pci/controller/pcie-iproc-msi.c
index 649fcb449f34..d2cb4c4f821a 100644
--- a/drivers/pci/controller/pcie-iproc-msi.c
+++ b/drivers/pci/controller/pcie-iproc-msi.c
@@ -446,12 +446,12 @@ static void iproc_msi_disable(struct iproc_msi *msi)
static int iproc_msi_alloc_domains(struct device_node *node,
struct iproc_msi *msi)
{
- msi->inner_domain = irq_domain_add_linear(NULL, msi->nr_msi_vecs,
- &msi_domain_ops, msi);
+ msi->inner_domain = irq_domain_create_linear(NULL, msi->nr_msi_vecs,
+ &msi_domain_ops, msi);
if (!msi->inner_domain)
return -ENOMEM;
- msi->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node),
+ msi->msi_domain = pci_msi_create_irq_domain(of_fwnode_handle(node),
&iproc_msi_domain_info,
msi->inner_domain);
if (!msi->msi_domain) {
diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
index 9d52504acae4..b55f5973414c 100644
--- a/drivers/pci/controller/pcie-mediatek-gen3.c
+++ b/drivers/pci/controller/pcie-mediatek-gen3.c
@@ -745,8 +745,8 @@ static int mtk_pcie_init_irq_domains(struct mtk_gen3_pcie *pcie)
return -ENODEV;
}
- pcie->intx_domain = irq_domain_add_linear(intc_node, PCI_NUM_INTX,
- &intx_domain_ops, pcie);
+ pcie->intx_domain = irq_domain_create_linear(of_fwnode_handle(intc_node), PCI_NUM_INTX,
+ &intx_domain_ops, pcie);
if (!pcie->intx_domain) {
dev_err(dev, "failed to create INTx IRQ domain\n");
ret = -ENODEV;
@@ -756,8 +756,9 @@ static int mtk_pcie_init_irq_domains(struct mtk_gen3_pcie *pcie)
/* Setup MSI */
mutex_init(&pcie->lock);
- pcie->msi_bottom_domain = irq_domain_add_linear(node, PCIE_MSI_IRQS_NUM,
- &mtk_msi_bottom_domain_ops, pcie);
+ pcie->msi_bottom_domain = irq_domain_create_linear(of_fwnode_handle(node),
+ PCIE_MSI_IRQS_NUM,
+ &mtk_msi_bottom_domain_ops, pcie);
if (!pcie->msi_bottom_domain) {
dev_err(dev, "failed to create MSI bottom domain\n");
ret = -ENODEV;
diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c
index 811a8b4acd50..e1934aa06c8d 100644
--- a/drivers/pci/controller/pcie-mediatek.c
+++ b/drivers/pci/controller/pcie-mediatek.c
@@ -485,7 +485,7 @@ static struct msi_domain_info mtk_msi_domain_info = {
static int mtk_pcie_allocate_msi_domains(struct mtk_pcie_port *port)
{
- struct fwnode_handle *fwnode = of_node_to_fwnode(port->pcie->dev->of_node);
+ struct fwnode_handle *fwnode = of_fwnode_handle(port->pcie->dev->of_node);
mutex_init(&port->lock);
@@ -569,8 +569,8 @@ static int mtk_pcie_init_irq_domain(struct mtk_pcie_port *port,
return -ENODEV;
}
- port->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
- &intx_domain_ops, port);
+ port->irq_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node), PCI_NUM_INTX,
+ &intx_domain_ops, port);
of_node_put(pcie_intc_node);
if (!port->irq_domain) {
dev_err(dev, "failed to get INTx IRQ domain\n");
diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c
index 6a46be17aa91..b9e7a8710cf0 100644
--- a/drivers/pci/controller/pcie-rockchip-host.c
+++ b/drivers/pci/controller/pcie-rockchip-host.c
@@ -693,8 +693,8 @@ static int rockchip_pcie_init_irq_domain(struct rockchip_pcie *rockchip)
return -EINVAL;
}
- rockchip->irq_domain = irq_domain_add_linear(intc, PCI_NUM_INTX,
- &intx_domain_ops, rockchip);
+ rockchip->irq_domain = irq_domain_create_linear(of_fwnode_handle(intc), PCI_NUM_INTX,
+ &intx_domain_ops, rockchip);
of_node_put(intc);
if (!rockchip->irq_domain) {
dev_err(dev, "failed to get a INTx IRQ domain\n");
diff --git a/drivers/pci/controller/pcie-xilinx-cpm.c b/drivers/pci/controller/pcie-xilinx-cpm.c
index 13ca493d22bd..d38f27e20761 100644
--- a/drivers/pci/controller/pcie-xilinx-cpm.c
+++ b/drivers/pci/controller/pcie-xilinx-cpm.c
@@ -395,17 +395,15 @@ static int xilinx_cpm_pcie_init_irq_domain(struct xilinx_cpm_pcie *port)
return -EINVAL;
}
- port->cpm_domain = irq_domain_add_linear(pcie_intc_node, 32,
- &event_domain_ops,
- port);
+ port->cpm_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node), 32,
+ &event_domain_ops, port);
if (!port->cpm_domain)
goto out;
irq_domain_update_bus_token(port->cpm_domain, DOMAIN_BUS_NEXUS);
- port->intx_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
- &intx_domain_ops,
- port);
+ port->intx_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node), PCI_NUM_INTX,
+ &intx_domain_ops, port);
if (!port->intx_domain)
goto out;
diff --git a/drivers/pci/controller/pcie-xilinx-dma-pl.c b/drivers/pci/controller/pcie-xilinx-dma-pl.c
index dd117f07fc95..dc9690a535e1 100644
--- a/drivers/pci/controller/pcie-xilinx-dma-pl.c
+++ b/drivers/pci/controller/pcie-xilinx-dma-pl.c
@@ -470,10 +470,10 @@ static int xilinx_pl_dma_pcie_init_msi_irq_domain(struct pl_dma_pcie *port)
struct device *dev = port->dev;
struct xilinx_msi *msi = &port->msi;
int size = BITS_TO_LONGS(XILINX_NUM_MSI_IRQS) * sizeof(long);
- struct fwnode_handle *fwnode = of_node_to_fwnode(port->dev->of_node);
+ struct fwnode_handle *fwnode = of_fwnode_handle(port->dev->of_node);
- msi->dev_domain = irq_domain_add_linear(NULL, XILINX_NUM_MSI_IRQS,
- &dev_msi_domain_ops, port);
+ msi->dev_domain = irq_domain_create_linear(NULL, XILINX_NUM_MSI_IRQS,
+ &dev_msi_domain_ops, port);
if (!msi->dev_domain)
goto out;
@@ -585,15 +585,15 @@ static int xilinx_pl_dma_pcie_init_irq_domain(struct pl_dma_pcie *port)
return -EINVAL;
}
- port->pldma_domain = irq_domain_add_linear(pcie_intc_node, 32,
- &event_domain_ops, port);
+ port->pldma_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node), 32,
+ &event_domain_ops, port);
if (!port->pldma_domain)
return -ENOMEM;
irq_domain_update_bus_token(port->pldma_domain, DOMAIN_BUS_NEXUS);
- port->intx_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
- &intx_domain_ops, port);
+ port->intx_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node), PCI_NUM_INTX,
+ &intx_domain_ops, port);
if (!port->intx_domain) {
dev_err(dev, "Failed to get a INTx IRQ domain\n");
return -ENOMEM;
diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c
index 8d6e2a89b067..c8b05477b719 100644
--- a/drivers/pci/controller/pcie-xilinx-nwl.c
+++ b/drivers/pci/controller/pcie-xilinx-nwl.c
@@ -495,11 +495,10 @@ static int nwl_pcie_init_msi_irq_domain(struct nwl_pcie *pcie)
{
#ifdef CONFIG_PCI_MSI
struct device *dev = pcie->dev;
- struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node);
+ struct fwnode_handle *fwnode = of_fwnode_handle(dev->of_node);
struct nwl_msi *msi = &pcie->msi;
- msi->dev_domain = irq_domain_add_linear(NULL, INT_PCI_MSI_NR,
- &dev_msi_domain_ops, pcie);
+ msi->dev_domain = irq_domain_create_linear(NULL, INT_PCI_MSI_NR, &dev_msi_domain_ops, pcie);
if (!msi->dev_domain) {
dev_err(dev, "failed to create dev IRQ domain\n");
return -ENOMEM;
@@ -582,10 +581,8 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie)
return -EINVAL;
}
- pcie->intx_irq_domain = irq_domain_add_linear(intc_node,
- PCI_NUM_INTX,
- &intx_domain_ops,
- pcie);
+ pcie->intx_irq_domain = irq_domain_create_linear(of_fwnode_handle(intc_node), PCI_NUM_INTX,
+ &intx_domain_ops, pcie);
of_node_put(intc_node);
if (!pcie->intx_irq_domain) {
dev_err(dev, "failed to create IRQ domain\n");
diff --git a/drivers/pci/controller/pcie-xilinx.c b/drivers/pci/controller/pcie-xilinx.c
index 0b534f73a942..e36aa874bae9 100644
--- a/drivers/pci/controller/pcie-xilinx.c
+++ b/drivers/pci/controller/pcie-xilinx.c
@@ -461,9 +461,8 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie *pcie)
return -ENODEV;
}
- pcie->leg_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
- &intx_domain_ops,
- pcie);
+ pcie->leg_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node), PCI_NUM_INTX,
+ &intx_domain_ops, pcie);
of_node_put(pcie_intc_node);
if (!pcie->leg_domain) {
dev_err(dev, "Failed to get a INTx IRQ domain\n");
diff --git a/drivers/pci/controller/plda/pcie-plda-host.c b/drivers/pci/controller/plda/pcie-plda-host.c
index 4153214ca410..3abedf723215 100644
--- a/drivers/pci/controller/plda/pcie-plda-host.c
+++ b/drivers/pci/controller/plda/pcie-plda-host.c
@@ -150,13 +150,12 @@ static struct msi_domain_info plda_msi_domain_info = {
static int plda_allocate_msi_domains(struct plda_pcie_rp *port)
{
struct device *dev = port->dev;
- struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node);
+ struct fwnode_handle *fwnode = of_fwnode_handle(dev->of_node);
struct plda_msi *msi = &port->msi;
mutex_init(&port->msi.lock);
- msi->dev_domain = irq_domain_add_linear(NULL, msi->num_vectors,
- &msi_domain_ops, port);
+ msi->dev_domain = irq_domain_create_linear(NULL, msi->num_vectors, &msi_domain_ops, port);
if (!msi->dev_domain) {
dev_err(dev, "failed to create IRQ domain\n");
return -ENOMEM;
@@ -393,10 +392,9 @@ static int plda_pcie_init_irq_domains(struct plda_pcie_rp *port)
return -EINVAL;
}
- port->event_domain = irq_domain_add_linear(pcie_intc_node,
- port->num_events,
- &plda_event_domain_ops,
- port);
+ port->event_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node),
+ port->num_events, &plda_event_domain_ops,
+ port);
if (!port->event_domain) {
dev_err(dev, "failed to get event domain\n");
of_node_put(pcie_intc_node);
@@ -405,8 +403,8 @@ static int plda_pcie_init_irq_domains(struct plda_pcie_rp *port)
irq_domain_update_bus_token(port->event_domain, DOMAIN_BUS_NEXUS);
- port->intx_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
- &intx_domain_ops, port);
+ port->intx_domain = irq_domain_create_linear(of_fwnode_handle(pcie_intc_node), PCI_NUM_INTX,
+ &intx_domain_ops, port);
if (!port->intx_domain) {
dev_err(dev, "failed to get an INTx IRQ domain\n");
of_node_put(pcie_intc_node);
diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
index e9e9aaa91770..d9996516f49e 100644
--- a/drivers/pci/hotplug/s390_pci_hpc.c
+++ b/drivers/pci/hotplug/s390_pci_hpc.c
@@ -65,9 +65,9 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
rc = zpci_deconfigure_device(zdev);
out:
- mutex_unlock(&zdev->state_lock);
if (pdev)
pci_dev_put(pdev);
+ mutex_unlock(&zdev->state_lock);
return rc;
}
diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c
index 17ec6332cb1d..818d55fbad0d 100644
--- a/drivers/pci/msi/api.c
+++ b/drivers/pci/msi/api.c
@@ -53,10 +53,9 @@ void pci_disable_msi(struct pci_dev *dev)
if (!pci_msi_enabled() || !dev || !dev->msi_enabled)
return;
- msi_lock_descs(&dev->dev);
+ guard(msi_descs_lock)(&dev->dev);
pci_msi_shutdown(dev);
pci_free_msi_irqs(dev);
- msi_unlock_descs(&dev->dev);
}
EXPORT_SYMBOL(pci_disable_msi);
@@ -196,10 +195,9 @@ void pci_disable_msix(struct pci_dev *dev)
if (!pci_msi_enabled() || !dev || !dev->msix_enabled)
return;
- msi_lock_descs(&dev->dev);
+ guard(msi_descs_lock)(&dev->dev);
pci_msix_shutdown(dev);
pci_free_msi_irqs(dev);
- msi_unlock_descs(&dev->dev);
}
EXPORT_SYMBOL(pci_disable_msix);
@@ -401,7 +399,7 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state);
* Return: true if MSI has not been globally disabled through ACPI FADT,
* PCI bridge quirks, or the "pci=nomsi" kernel command-line option.
*/
-int pci_msi_enabled(void)
+bool pci_msi_enabled(void)
{
return pci_msi_enable;
}
diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
index 8b8848788618..d6ce04054702 100644
--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -15,7 +15,7 @@
#include "../pci.h"
#include "msi.h"
-int pci_msi_enable = 1;
+bool pci_msi_enable = true;
/**
* pci_msi_supported - check whether MSI may be enabled on a device
@@ -335,43 +335,13 @@ static int msi_verify_entries(struct pci_dev *dev)
return !entry ? 0 : -EIO;
}
-/**
- * msi_capability_init - configure device's MSI capability structure
- * @dev: pointer to the pci_dev data structure of MSI device function
- * @nvec: number of interrupts to allocate
- * @affd: description of automatic IRQ affinity assignments (may be %NULL)
- *
- * Setup the MSI capability structure of the device with the requested
- * number of interrupts. A return value of zero indicates the successful
- * setup of an entry with the new MSI IRQ. A negative return value indicates
- * an error, and a positive return value indicates the number of interrupts
- * which could have been allocated.
- */
-static int msi_capability_init(struct pci_dev *dev, int nvec,
- struct irq_affinity *affd)
+static int __msi_capability_init(struct pci_dev *dev, int nvec, struct irq_affinity_desc *masks)
{
- struct irq_affinity_desc *masks = NULL;
+ int ret = msi_setup_msi_desc(dev, nvec, masks);
struct msi_desc *entry, desc;
- int ret;
- /* Reject multi-MSI early on irq domain enabled architectures */
- if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY))
- return 1;
-
- /*
- * Disable MSI during setup in the hardware, but mark it enabled
- * so that setup code can evaluate it.
- */
- pci_msi_set_enable(dev, 0);
- dev->msi_enabled = 1;
-
- if (affd)
- masks = irq_create_affinity_masks(nvec, affd);
-
- msi_lock_descs(&dev->dev);
- ret = msi_setup_msi_desc(dev, nvec, masks);
if (ret)
- goto fail;
+ return ret;
/* All MSIs are unmasked by default; mask them all */
entry = msi_first_desc(&dev->dev, MSI_DESC_ALL);
@@ -393,24 +363,51 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
goto err;
/* Set MSI enabled bits */
+ dev->msi_enabled = 1;
pci_intx_for_msi(dev, 0);
pci_msi_set_enable(dev, 1);
pcibios_free_irq(dev);
dev->irq = entry->irq;
- goto unlock;
-
+ return 0;
err:
pci_msi_unmask(&desc, msi_multi_mask(&desc));
pci_free_msi_irqs(dev);
-fail:
- dev->msi_enabled = 0;
-unlock:
- msi_unlock_descs(&dev->dev);
- kfree(masks);
return ret;
}
+/**
+ * msi_capability_init - configure device's MSI capability structure
+ * @dev: pointer to the pci_dev data structure of MSI device function
+ * @nvec: number of interrupts to allocate
+ * @affd: description of automatic IRQ affinity assignments (may be %NULL)
+ *
+ * Setup the MSI capability structure of the device with the requested
+ * number of interrupts. A return value of zero indicates the successful
+ * setup of an entry with the new MSI IRQ. A negative return value indicates
+ * an error, and a positive return value indicates the number of interrupts
+ * which could have been allocated.
+ */
+static int msi_capability_init(struct pci_dev *dev, int nvec,
+ struct irq_affinity *affd)
+{
+ /* Reject multi-MSI early on irq domain enabled architectures */
+ if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY))
+ return 1;
+
+ /*
+ * Disable MSI during setup in the hardware, but mark it enabled
+ * so that setup code can evaluate it.
+ */
+ pci_msi_set_enable(dev, 0);
+
+ struct irq_affinity_desc *masks __free(kfree) =
+ affd ? irq_create_affinity_masks(nvec, affd) : NULL;
+
+ guard(msi_descs_lock)(&dev->dev);
+ return __msi_capability_init(dev, nvec, masks);
+}
+
int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
struct irq_affinity *affd)
{
@@ -666,38 +663,39 @@ static void msix_mask_all(void __iomem *base, int tsize)
writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
}
-static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries,
- int nvec, struct irq_affinity *affd)
-{
- struct irq_affinity_desc *masks = NULL;
- int ret;
+DEFINE_FREE(free_msi_irqs, struct pci_dev *, if (_T) pci_free_msi_irqs(_T));
- if (affd)
- masks = irq_create_affinity_masks(nvec, affd);
+static int __msix_setup_interrupts(struct pci_dev *__dev, struct msix_entry *entries,
+ int nvec, struct irq_affinity_desc *masks)
+{
+ struct pci_dev *dev __free(free_msi_irqs) = __dev;
- msi_lock_descs(&dev->dev);
- ret = msix_setup_msi_descs(dev, entries, nvec, masks);
+ int ret = msix_setup_msi_descs(dev, entries, nvec, masks);
if (ret)
- goto out_free;
+ return ret;
ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
if (ret)
- goto out_free;
+ return ret;
/* Check if all MSI entries honor device restrictions */
ret = msi_verify_entries(dev);
if (ret)
- goto out_free;
+ return ret;
msix_update_entries(dev, entries);
- goto out_unlock;
+ retain_and_null_ptr(dev);
+ return 0;
+}
-out_free:
- pci_free_msi_irqs(dev);
-out_unlock:
- msi_unlock_descs(&dev->dev);
- kfree(masks);
- return ret;
+static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries,
+ int nvec, struct irq_affinity *affd)
+{
+ struct irq_affinity_desc *masks __free(kfree) =
+ affd ? irq_create_affinity_masks(nvec, affd) : NULL;
+
+ guard(msi_descs_lock)(&dev->dev);
+ return __msix_setup_interrupts(dev, entries, nvec, masks);
}
/**
@@ -873,13 +871,13 @@ void __pci_restore_msix_state(struct pci_dev *dev)
write_msg = arch_restore_msi_irqs(dev);
- msi_lock_descs(&dev->dev);
- msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
- if (write_msg)
- __pci_write_msi_msg(entry, &entry->msg);
- pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl);
+ scoped_guard (msi_descs_lock, &dev->dev) {
+ msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
+ if (write_msg)
+ __pci_write_msi_msg(entry, &entry->msg);
+ pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl);
+ }
}
- msi_unlock_descs(&dev->dev);
pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
}
@@ -918,6 +916,53 @@ void pci_free_msi_irqs(struct pci_dev *dev)
}
}
+#ifdef CONFIG_PCIE_TPH
+/**
+ * pci_msix_write_tph_tag - Update the TPH tag for a given MSI-X vector
+ * @pdev: The PCIe device to update
+ * @index: The MSI-X index to update
+ * @tag: The tag to write
+ *
+ * Returns: 0 on success, error code on failure
+ */
+int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag)
+{
+ struct msi_desc *msi_desc;
+ struct irq_desc *irq_desc;
+ unsigned int virq;
+
+ if (!pdev->msix_enabled)
+ return -ENXIO;
+
+ guard(msi_descs_lock)(&pdev->dev);
+ virq = msi_get_virq(&pdev->dev, index);
+ if (!virq)
+ return -ENXIO;
+ /*
+ * This is a horrible hack, but short of implementing a PCI
+ * specific interrupt chip callback and a huge pile of
+ * infrastructure, this is the minor nuissance. It provides the
+ * protection against concurrent operations on this entry and keeps
+ * the control word cache in sync.
+ */
+ irq_desc = irq_to_desc(virq);
+ if (!irq_desc)
+ return -ENXIO;
+
+ guard(raw_spinlock_irq)(&irq_desc->lock);
+ msi_desc = irq_data_get_msi_desc(&irq_desc->irq_data);
+ if (!msi_desc || msi_desc->pci.msi_attrib.is_virtual)
+ return -ENXIO;
+
+ msi_desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_ST;
+ msi_desc->pci.msix_ctrl |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag);
+ pci_msix_write_vector_ctrl(msi_desc, msi_desc->pci.msix_ctrl);
+ /* Flush the write */
+ readl(pci_msix_desc_addr(msi_desc));
+ return 0;
+}
+#endif
+
/* Misc. infrastructure */
struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
@@ -928,5 +973,5 @@ EXPORT_SYMBOL(msi_desc_to_pci_dev);
void pci_no_msi(void)
{
- pci_msi_enable = 0;
+ pci_msi_enable = false;
}
diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h
index ee53cf079f4e..fc70b601e942 100644
--- a/drivers/pci/msi/msi.h
+++ b/drivers/pci/msi/msi.h
@@ -87,7 +87,7 @@ static inline __attribute_const__ u32 msi_multi_mask(struct msi_desc *desc)
void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc);
/* Subsystem variables */
-extern int pci_msi_enable;
+extern bool pci_msi_enable;
/* MSI internal functions invoked from the public APIs */
void pci_msi_shutdown(struct pci_dev *dev);
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 19214ec81fbb..8d955c25aed3 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -1004,40 +1004,12 @@ static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap,
return type;
}
-/**
- * pci_p2pdma_map_segment - map an sg segment determining the mapping type
- * @state: State structure that should be declared outside of the for_each_sg()
- * loop and initialized to zero.
- * @dev: DMA device that's doing the mapping operation
- * @sg: scatterlist segment to map
- *
- * This is a helper to be used by non-IOMMU dma_map_sg() implementations where
- * the sg segment is the same for the page_link and the dma_address.
- *
- * Attempt to map a single segment in an SGL with the PCI bus address.
- * The segment must point to a PCI P2PDMA page and thus must be
- * wrapped in a is_pci_p2pdma_page(sg_page(sg)) check.
- *
- * Returns the type of mapping used and maps the page if the type is
- * PCI_P2PDMA_MAP_BUS_ADDR.
- */
-enum pci_p2pdma_map_type
-pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
- struct scatterlist *sg)
+void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state,
+ struct device *dev, struct page *page)
{
- if (state->pgmap != page_pgmap(sg_page(sg))) {
- state->pgmap = page_pgmap(sg_page(sg));
- state->map = pci_p2pdma_map_type(state->pgmap, dev);
- state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
- }
-
- if (state->map == PCI_P2PDMA_MAP_BUS_ADDR) {
- sg->dma_address = sg_phys(sg) + state->bus_off;
- sg_dma_len(sg) = sg->length;
- sg_dma_mark_bus_address(sg);
- }
-
- return state->map;
+ state->pgmap = page_pgmap(page);
+ state->map = pci_p2pdma_map_type(state->pgmap, dev);
+ state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
}
/**
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index b81e99cd4b62..39f368d2f26d 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1064,6 +1064,15 @@ int pcim_request_region_exclusive(struct pci_dev *pdev, int bar,
const char *name);
void pcim_release_region(struct pci_dev *pdev, int bar);
+#ifdef CONFIG_PCI_MSI
+int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag);
+#else
+static inline int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag)
+{
+ return -ENODEV;
+}
+#endif
+
/*
* Config Address for PCI Configuration Mechanism #1
*
diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c
index 07de59ca2ebf..77fce5e1b830 100644
--- a/drivers/pci/tph.c
+++ b/drivers/pci/tph.c
@@ -204,48 +204,6 @@ static u8 get_rp_completer_type(struct pci_dev *pdev)
return FIELD_GET(PCI_EXP_DEVCAP2_TPH_COMP_MASK, reg);
}
-/* Write ST to MSI-X vector control reg - Return 0 if OK, otherwise -errno */
-static int write_tag_to_msix(struct pci_dev *pdev, int msix_idx, u16 tag)
-{
-#ifdef CONFIG_PCI_MSI
- struct msi_desc *msi_desc = NULL;
- void __iomem *vec_ctrl;
- u32 val;
- int err = 0;
-
- msi_lock_descs(&pdev->dev);
-
- /* Find the msi_desc entry with matching msix_idx */
- msi_for_each_desc(msi_desc, &pdev->dev, MSI_DESC_ASSOCIATED) {
- if (msi_desc->msi_index == msix_idx)
- break;
- }
-
- if (!msi_desc) {
- err = -ENXIO;
- goto err_out;
- }
-
- /* Get the vector control register (offset 0xc) pointed by msix_idx */
- vec_ctrl = pdev->msix_base + msix_idx * PCI_MSIX_ENTRY_SIZE;
- vec_ctrl += PCI_MSIX_ENTRY_VECTOR_CTRL;
-
- val = readl(vec_ctrl);
- val &= ~PCI_MSIX_ENTRY_CTRL_ST;
- val |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag);
- writel(val, vec_ctrl);
-
- /* Read back to flush the update */
- val = readl(vec_ctrl);
-
-err_out:
- msi_unlock_descs(&pdev->dev);
- return err;
-#else
- return -ENODEV;
-#endif
-}
-
/* Write tag to ST table - Return 0 if OK, otherwise -errno */
static int write_tag_to_st_table(struct pci_dev *pdev, int index, u16 tag)
{
@@ -346,7 +304,7 @@ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag)
switch (loc) {
case PCI_TPH_LOC_MSIX:
- err = write_tag_to_msix(pdev, index, tag);
+ err = pci_msix_write_tph_tag(pdev, index, tag);
break;
case PCI_TPH_LOC_CAP:
err = write_tag_to_st_table(pdev, index, tag);
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
index df9a28ba69dc..81b6f1a62349 100644
--- a/drivers/perf/apple_m1_cpu_pmu.c
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -474,8 +474,7 @@ static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu)
if (!armpmu_event_set_period(event))
continue;
- if (perf_event_overflow(event, &data, regs))
- m1_pmu_disable_event(event);
+ perf_event_overflow(event, &data, regs);
}
cpu_pmu->start(cpu_pmu);
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index e506d59654e7..3db9f4ed17e8 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -887,8 +887,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
* an irq_work which will be taken care of in the handling of
* IPI_IRQ_WORK.
*/
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
+ perf_event_overflow(event, &data, regs);
}
armv8pmu_start(cpu_pmu);
diff --git a/drivers/perf/arm_v6_pmu.c b/drivers/perf/arm_v6_pmu.c
index b09615bb2bb2..7cb12c8e06c7 100644
--- a/drivers/perf/arm_v6_pmu.c
+++ b/drivers/perf/arm_v6_pmu.c
@@ -276,8 +276,7 @@ armv6pmu_handle_irq(struct arm_pmu *cpu_pmu)
if (!armpmu_event_set_period(event))
continue;
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
+ perf_event_overflow(event, &data, regs);
}
/*
diff --git a/drivers/perf/arm_v7_pmu.c b/drivers/perf/arm_v7_pmu.c
index 17831e1920bd..a1e438101114 100644
--- a/drivers/perf/arm_v7_pmu.c
+++ b/drivers/perf/arm_v7_pmu.c
@@ -930,8 +930,7 @@ static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
if (!armpmu_event_set_period(event))
continue;
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
+ perf_event_overflow(event, &data, regs);
}
/*
diff --git a/drivers/perf/arm_xscale_pmu.c b/drivers/perf/arm_xscale_pmu.c
index 638fea9b1263..c2ac41dd9e19 100644
--- a/drivers/perf/arm_xscale_pmu.c
+++ b/drivers/perf/arm_xscale_pmu.c
@@ -186,8 +186,7 @@ xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu)
if (!armpmu_event_set_period(event))
continue;
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
+ perf_event_overflow(event, &data, regs);
}
irq_work_run();
@@ -519,8 +518,7 @@ xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu)
if (!armpmu_event_set_period(event))
continue;
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
+ perf_event_overflow(event, &data, regs);
}
irq_work_run();
diff --git a/drivers/phy/phy-can-transceiver.c b/drivers/phy/phy-can-transceiver.c
index 2bec70615449..f59caff4b3d4 100644
--- a/drivers/phy/phy-can-transceiver.c
+++ b/drivers/phy/phy-can-transceiver.c
@@ -93,6 +93,16 @@ static const struct of_device_id can_transceiver_phy_ids[] = {
};
MODULE_DEVICE_TABLE(of, can_transceiver_phy_ids);
+/* Temporary wrapper until the multiplexer subsystem supports optional muxes */
+static inline struct mux_state *
+devm_mux_state_get_optional(struct device *dev, const char *mux_name)
+{
+ if (!of_property_present(dev->of_node, "mux-states"))
+ return NULL;
+
+ return devm_mux_state_get(dev, mux_name);
+}
+
static int can_transceiver_phy_probe(struct platform_device *pdev)
{
struct phy_provider *phy_provider;
@@ -114,13 +124,11 @@ static int can_transceiver_phy_probe(struct platform_device *pdev)
match = of_match_node(can_transceiver_phy_ids, pdev->dev.of_node);
drvdata = match->data;
- mux_state = devm_mux_state_get(dev, NULL);
- if (IS_ERR(mux_state)) {
- if (PTR_ERR(mux_state) == -EPROBE_DEFER)
- return PTR_ERR(mux_state);
- } else {
- can_transceiver_phy->mux_state = mux_state;
- }
+ mux_state = devm_mux_state_get_optional(dev, NULL);
+ if (IS_ERR(mux_state))
+ return PTR_ERR(mux_state);
+
+ can_transceiver_phy->mux_state = mux_state;
phy = devm_phy_create(dev, dev->of_node,
&can_transceiver_phy_ops);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index 45b3b792696e..b33e2e2b5014 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -1754,7 +1754,8 @@ static void qmp_ufs_init_registers(struct qmp_ufs *qmp, const struct qmp_phy_cfg
qmp_ufs_init_all(qmp, &cfg->tbls_hs_overlay[i]);
}
- qmp_ufs_init_all(qmp, &cfg->tbls_hs_b);
+ if (qmp->mode == PHY_MODE_UFS_HS_B)
+ qmp_ufs_init_all(qmp, &cfg->tbls_hs_b);
}
static int qmp_ufs_com_init(struct qmp_ufs *qmp)
diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
index 775f4f973a6c..9fdf17e0848a 100644
--- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
@@ -9,6 +9,7 @@
* Copyright (C) 2014 Cogent Embedded, Inc.
*/
+#include <linux/cleanup.h>
#include <linux/extcon-provider.h>
#include <linux/interrupt.h>
#include <linux/io.h>
@@ -107,7 +108,6 @@ struct rcar_gen3_phy {
struct rcar_gen3_chan *ch;
u32 int_enable_bits;
bool initialized;
- bool otg_initialized;
bool powered;
};
@@ -119,9 +119,8 @@ struct rcar_gen3_chan {
struct regulator *vbus;
struct reset_control *rstc;
struct work_struct work;
- struct mutex lock; /* protects rphys[...].powered */
+ spinlock_t lock; /* protects access to hardware and driver data structure. */
enum usb_dr_mode dr_mode;
- int irq;
u32 obint_enable_bits;
bool extcon_host;
bool is_otg_channel;
@@ -320,16 +319,15 @@ static bool rcar_gen3_is_any_rphy_initialized(struct rcar_gen3_chan *ch)
return false;
}
-static bool rcar_gen3_needs_init_otg(struct rcar_gen3_chan *ch)
+static bool rcar_gen3_is_any_otg_rphy_initialized(struct rcar_gen3_chan *ch)
{
- int i;
-
- for (i = 0; i < NUM_OF_PHYS; i++) {
- if (ch->rphys[i].otg_initialized)
- return false;
+ for (enum rcar_gen3_phy_index i = PHY_INDEX_BOTH_HC; i <= PHY_INDEX_EHCI;
+ i++) {
+ if (ch->rphys[i].initialized)
+ return true;
}
- return true;
+ return false;
}
static bool rcar_gen3_are_all_rphys_power_off(struct rcar_gen3_chan *ch)
@@ -351,7 +349,9 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr,
bool is_b_device;
enum phy_mode cur_mode, new_mode;
- if (!ch->is_otg_channel || !rcar_gen3_is_any_rphy_initialized(ch))
+ guard(spinlock_irqsave)(&ch->lock);
+
+ if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch))
return -EIO;
if (sysfs_streq(buf, "host"))
@@ -389,7 +389,7 @@ static ssize_t role_show(struct device *dev, struct device_attribute *attr,
{
struct rcar_gen3_chan *ch = dev_get_drvdata(dev);
- if (!ch->is_otg_channel || !rcar_gen3_is_any_rphy_initialized(ch))
+ if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch))
return -EIO;
return sprintf(buf, "%s\n", rcar_gen3_is_host(ch) ? "host" :
@@ -402,6 +402,9 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch)
void __iomem *usb2_base = ch->base;
u32 val;
+ if (!ch->is_otg_channel || rcar_gen3_is_any_otg_rphy_initialized(ch))
+ return;
+
/* Should not use functions of read-modify-write a register */
val = readl(usb2_base + USB2_LINECTRL1);
val = (val & ~USB2_LINECTRL1_DP_RPD) | USB2_LINECTRL1_DPRPD_EN |
@@ -415,7 +418,7 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch)
val = readl(usb2_base + USB2_ADPCTRL);
writel(val | USB2_ADPCTRL_IDPULLUP, usb2_base + USB2_ADPCTRL);
}
- msleep(20);
+ mdelay(20);
writel(0xffffffff, usb2_base + USB2_OBINTSTA);
writel(ch->obint_enable_bits, usb2_base + USB2_OBINTEN);
@@ -427,16 +430,27 @@ static irqreturn_t rcar_gen3_phy_usb2_irq(int irq, void *_ch)
{
struct rcar_gen3_chan *ch = _ch;
void __iomem *usb2_base = ch->base;
- u32 status = readl(usb2_base + USB2_OBINTSTA);
+ struct device *dev = ch->dev;
irqreturn_t ret = IRQ_NONE;
+ u32 status;
+
+ pm_runtime_get_noresume(dev);
+
+ if (pm_runtime_suspended(dev))
+ goto rpm_put;
- if (status & ch->obint_enable_bits) {
- dev_vdbg(ch->dev, "%s: %08x\n", __func__, status);
- writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA);
- rcar_gen3_device_recognition(ch);
- ret = IRQ_HANDLED;
+ scoped_guard(spinlock, &ch->lock) {
+ status = readl(usb2_base + USB2_OBINTSTA);
+ if (status & ch->obint_enable_bits) {
+ dev_vdbg(dev, "%s: %08x\n", __func__, status);
+ writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA);
+ rcar_gen3_device_recognition(ch);
+ ret = IRQ_HANDLED;
+ }
}
+rpm_put:
+ pm_runtime_put_noidle(dev);
return ret;
}
@@ -446,32 +460,23 @@ static int rcar_gen3_phy_usb2_init(struct phy *p)
struct rcar_gen3_chan *channel = rphy->ch;
void __iomem *usb2_base = channel->base;
u32 val;
- int ret;
- if (!rcar_gen3_is_any_rphy_initialized(channel) && channel->irq >= 0) {
- INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work);
- ret = request_irq(channel->irq, rcar_gen3_phy_usb2_irq,
- IRQF_SHARED, dev_name(channel->dev), channel);
- if (ret < 0) {
- dev_err(channel->dev, "No irq handler (%d)\n", channel->irq);
- return ret;
- }
- }
+ guard(spinlock_irqsave)(&channel->lock);
/* Initialize USB2 part */
val = readl(usb2_base + USB2_INT_ENABLE);
val |= USB2_INT_ENABLE_UCOM_INTEN | rphy->int_enable_bits;
writel(val, usb2_base + USB2_INT_ENABLE);
- writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET);
- writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET);
-
- /* Initialize otg part */
- if (channel->is_otg_channel) {
- if (rcar_gen3_needs_init_otg(channel))
- rcar_gen3_init_otg(channel);
- rphy->otg_initialized = true;
+
+ if (!rcar_gen3_is_any_rphy_initialized(channel)) {
+ writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET);
+ writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET);
}
+ /* Initialize otg part (only if we initialize a PHY with IRQs). */
+ if (rphy->int_enable_bits)
+ rcar_gen3_init_otg(channel);
+
rphy->initialized = true;
return 0;
@@ -484,10 +489,9 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p)
void __iomem *usb2_base = channel->base;
u32 val;
- rphy->initialized = false;
+ guard(spinlock_irqsave)(&channel->lock);
- if (channel->is_otg_channel)
- rphy->otg_initialized = false;
+ rphy->initialized = false;
val = readl(usb2_base + USB2_INT_ENABLE);
val &= ~rphy->int_enable_bits;
@@ -495,9 +499,6 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p)
val &= ~USB2_INT_ENABLE_UCOM_INTEN;
writel(val, usb2_base + USB2_INT_ENABLE);
- if (channel->irq >= 0 && !rcar_gen3_is_any_rphy_initialized(channel))
- free_irq(channel->irq, channel);
-
return 0;
}
@@ -509,16 +510,17 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p)
u32 val;
int ret = 0;
- mutex_lock(&channel->lock);
- if (!rcar_gen3_are_all_rphys_power_off(channel))
- goto out;
-
if (channel->vbus) {
ret = regulator_enable(channel->vbus);
if (ret)
- goto out;
+ return ret;
}
+ guard(spinlock_irqsave)(&channel->lock);
+
+ if (!rcar_gen3_are_all_rphys_power_off(channel))
+ goto out;
+
val = readl(usb2_base + USB2_USBCTR);
val |= USB2_USBCTR_PLL_RST;
writel(val, usb2_base + USB2_USBCTR);
@@ -528,7 +530,6 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p)
out:
/* The powered flag should be set for any other phys anyway */
rphy->powered = true;
- mutex_unlock(&channel->lock);
return 0;
}
@@ -539,18 +540,20 @@ static int rcar_gen3_phy_usb2_power_off(struct phy *p)
struct rcar_gen3_chan *channel = rphy->ch;
int ret = 0;
- mutex_lock(&channel->lock);
- rphy->powered = false;
+ scoped_guard(spinlock_irqsave, &channel->lock) {
+ rphy->powered = false;
- if (!rcar_gen3_are_all_rphys_power_off(channel))
- goto out;
+ if (rcar_gen3_are_all_rphys_power_off(channel)) {
+ u32 val = readl(channel->base + USB2_USBCTR);
+
+ val |= USB2_USBCTR_PLL_RST;
+ writel(val, channel->base + USB2_USBCTR);
+ }
+ }
if (channel->vbus)
ret = regulator_disable(channel->vbus);
-out:
- mutex_unlock(&channel->lock);
-
return ret;
}
@@ -703,7 +706,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct rcar_gen3_chan *channel;
struct phy_provider *provider;
- int ret = 0, i;
+ int ret = 0, i, irq;
if (!dev->of_node) {
dev_err(dev, "This driver needs device tree\n");
@@ -719,8 +722,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
return PTR_ERR(channel->base);
channel->obint_enable_bits = USB2_OBINT_BITS;
- /* get irq number here and request_irq for OTG in phy_init */
- channel->irq = platform_get_irq_optional(pdev, 0);
channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node);
if (channel->dr_mode != USB_DR_MODE_UNKNOWN) {
channel->is_otg_channel = true;
@@ -763,7 +764,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
if (phy_data->no_adp_ctrl)
channel->obint_enable_bits = USB2_OBINT_IDCHG_EN;
- mutex_init(&channel->lock);
+ spin_lock_init(&channel->lock);
for (i = 0; i < NUM_OF_PHYS; i++) {
channel->rphys[i].phy = devm_phy_create(dev, NULL,
phy_data->phy_usb2_ops);
@@ -789,6 +790,20 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
channel->vbus = NULL;
}
+ irq = platform_get_irq_optional(pdev, 0);
+ if (irq < 0 && irq != -ENXIO) {
+ ret = irq;
+ goto error;
+ } else if (irq > 0) {
+ INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work);
+ ret = devm_request_irq(dev, irq, rcar_gen3_phy_usb2_irq,
+ IRQF_SHARED, dev_name(dev), channel);
+ if (ret < 0) {
+ dev_err(dev, "Failed to request irq (%d)\n", irq);
+ goto error;
+ }
+ }
+
provider = devm_of_phy_provider_register(dev, rcar_gen3_phy_usb2_xlate);
if (IS_ERR(provider)) {
dev_err(dev, "Failed to register PHY provider\n");
diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c
index 08c78c1bafc9..28a052e17366 100644
--- a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c
+++ b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c
@@ -1653,7 +1653,7 @@ static __maybe_unused int samsung_mipi_dcphy_runtime_resume(struct device *dev)
return ret;
}
- clk_prepare_enable(samsung->ref_clk);
+ ret = clk_prepare_enable(samsung->ref_clk);
if (ret) {
dev_err(samsung->dev, "Failed to enable reference clock, %d\n", ret);
clk_disable_unprepare(samsung->pclk);
diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
index fe7c05748356..77236f012a1f 100644
--- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
+++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
@@ -476,6 +476,8 @@ static const struct ropll_config ropll_tmds_cfg[] = {
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
{ 650000, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+ { 502500, 84, 84, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 11, 1, 4, 5,
+ 4, 11, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
{ 337500, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5,
1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
{ 400000, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
diff --git a/drivers/phy/starfive/phy-jh7110-usb.c b/drivers/phy/starfive/phy-jh7110-usb.c
index cb5454fbe2c8..b505d89860b4 100644
--- a/drivers/phy/starfive/phy-jh7110-usb.c
+++ b/drivers/phy/starfive/phy-jh7110-usb.c
@@ -18,6 +18,8 @@
#include <linux/usb/of.h>
#define USB_125M_CLK_RATE 125000000
+#define USB_CLK_MODE_OFF 0x0
+#define USB_CLK_MODE_RX_NORMAL_PWR BIT(1)
#define USB_LS_KEEPALIVE_OFF 0x4
#define USB_LS_KEEPALIVE_ENABLE BIT(4)
@@ -78,6 +80,7 @@ static int jh7110_usb2_phy_init(struct phy *_phy)
{
struct jh7110_usb2_phy *phy = phy_get_drvdata(_phy);
int ret;
+ unsigned int val;
ret = clk_set_rate(phy->usb_125m_clk, USB_125M_CLK_RATE);
if (ret)
@@ -87,6 +90,10 @@ static int jh7110_usb2_phy_init(struct phy *_phy)
if (ret)
return ret;
+ val = readl(phy->regs + USB_CLK_MODE_OFF);
+ val |= USB_CLK_MODE_RX_NORMAL_PWR;
+ writel(val, phy->regs + USB_CLK_MODE_OFF);
+
return 0;
}
diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c
index fae6242aa730..23a23f2d64e5 100644
--- a/drivers/phy/tegra/xusb-tegra186.c
+++ b/drivers/phy/tegra/xusb-tegra186.c
@@ -237,6 +237,8 @@
#define DATA0_VAL_PD BIT(1)
#define USE_XUSB_AO BIT(4)
+#define TEGRA_UTMI_PAD_MAX 4
+
#define TEGRA186_LANE(_name, _offset, _shift, _mask, _type) \
{ \
.name = _name, \
@@ -269,7 +271,7 @@ struct tegra186_xusb_padctl {
/* UTMI bias and tracking */
struct clk *usb2_trk_clk;
- unsigned int bias_pad_enable;
+ DECLARE_BITMAP(utmi_pad_enabled, TEGRA_UTMI_PAD_MAX);
/* padctl context */
struct tegra186_xusb_padctl_context context;
@@ -603,12 +605,8 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl)
u32 value;
int err;
- mutex_lock(&padctl->lock);
-
- if (priv->bias_pad_enable++ > 0) {
- mutex_unlock(&padctl->lock);
+ if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX))
return;
- }
err = clk_prepare_enable(priv->usb2_trk_clk);
if (err < 0)
@@ -658,8 +656,6 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl)
} else {
clk_disable_unprepare(priv->usb2_trk_clk);
}
-
- mutex_unlock(&padctl->lock);
}
static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl)
@@ -667,17 +663,8 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl)
struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl);
u32 value;
- mutex_lock(&padctl->lock);
-
- if (WARN_ON(priv->bias_pad_enable == 0)) {
- mutex_unlock(&padctl->lock);
- return;
- }
-
- if (--priv->bias_pad_enable > 0) {
- mutex_unlock(&padctl->lock);
+ if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX))
return;
- }
value = padctl_readl(padctl, XUSB_PADCTL_USB2_BIAS_PAD_CTL1);
value |= USB2_PD_TRK;
@@ -690,13 +677,13 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl)
clk_disable_unprepare(priv->usb2_trk_clk);
}
- mutex_unlock(&padctl->lock);
}
static void tegra186_utmi_pad_power_on(struct phy *phy)
{
struct tegra_xusb_lane *lane = phy_get_drvdata(phy);
struct tegra_xusb_padctl *padctl = lane->pad->padctl;
+ struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl);
struct tegra_xusb_usb2_port *port;
struct device *dev = padctl->dev;
unsigned int index = lane->index;
@@ -705,9 +692,16 @@ static void tegra186_utmi_pad_power_on(struct phy *phy)
if (!phy)
return;
+ mutex_lock(&padctl->lock);
+ if (test_bit(index, priv->utmi_pad_enabled)) {
+ mutex_unlock(&padctl->lock);
+ return;
+ }
+
port = tegra_xusb_find_usb2_port(padctl, index);
if (!port) {
dev_err(dev, "no port found for USB2 lane %u\n", index);
+ mutex_unlock(&padctl->lock);
return;
}
@@ -724,18 +718,28 @@ static void tegra186_utmi_pad_power_on(struct phy *phy)
value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index));
value &= ~USB2_OTG_PD_DR;
padctl_writel(padctl, value, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index));
+
+ set_bit(index, priv->utmi_pad_enabled);
+ mutex_unlock(&padctl->lock);
}
static void tegra186_utmi_pad_power_down(struct phy *phy)
{
struct tegra_xusb_lane *lane = phy_get_drvdata(phy);
struct tegra_xusb_padctl *padctl = lane->pad->padctl;
+ struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl);
unsigned int index = lane->index;
u32 value;
if (!phy)
return;
+ mutex_lock(&padctl->lock);
+ if (!test_bit(index, priv->utmi_pad_enabled)) {
+ mutex_unlock(&padctl->lock);
+ return;
+ }
+
dev_dbg(padctl->dev, "power down UTMI pad %u\n", index);
value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL0(index));
@@ -748,7 +752,11 @@ static void tegra186_utmi_pad_power_down(struct phy *phy)
udelay(2);
+ clear_bit(index, priv->utmi_pad_enabled);
+
tegra186_utmi_bias_pad_power_off(padctl);
+
+ mutex_unlock(&padctl->lock);
}
static int tegra186_xusb_padctl_vbus_override(struct tegra_xusb_padctl *padctl,
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index 79d4814d758d..c89df95aa6ca 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -548,16 +548,16 @@ static int tegra_xusb_port_init(struct tegra_xusb_port *port,
err = dev_set_name(&port->dev, "%s-%u", name, index);
if (err < 0)
- goto unregister;
+ goto put_device;
err = device_add(&port->dev);
if (err < 0)
- goto unregister;
+ goto put_device;
return 0;
-unregister:
- device_unregister(&port->dev);
+put_device:
+ put_device(&port->dev);
return err;
}
diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c
index b4eb2beab691..e20aaba0a33a 100644
--- a/drivers/pinctrl/mediatek/mtk-eint.c
+++ b/drivers/pinctrl/mediatek/mtk-eint.c
@@ -565,9 +565,8 @@ int mtk_eint_do_init(struct mtk_eint *eint)
goto err_eint;
}
- eint->domain = irq_domain_add_linear(eint->dev->of_node,
- eint->hw->ap_num,
- &irq_domain_simple_ops, NULL);
+ eint->domain = irq_domain_create_linear(of_fwnode_handle(eint->dev->of_node),
+ eint->hw->ap_num, &irq_domain_simple_ops, NULL);
if (!eint->domain)
goto err_eint;
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 1d7fdcdec4c8..5da8c48695e6 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -37,6 +37,10 @@
#include "pinctrl-utils.h"
#include "pinctrl-amd.h"
+#ifdef CONFIG_SUSPEND
+static struct amd_gpio *pinctrl_dev;
+#endif
+
static int amd_gpio_get_direction(struct gpio_chip *gc, unsigned offset)
{
unsigned long flags;
@@ -890,6 +894,44 @@ static void amd_gpio_irq_init(struct amd_gpio *gpio_dev)
}
}
+#if defined(CONFIG_SUSPEND) && defined(CONFIG_ACPI)
+static void amd_gpio_check_pending(void)
+{
+ struct amd_gpio *gpio_dev = pinctrl_dev;
+ struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
+ int i;
+
+ if (!pm_debug_messages_on)
+ return;
+
+ for (i = 0; i < desc->npins; i++) {
+ int pin = desc->pins[i].number;
+ u32 tmp;
+
+ tmp = readl(gpio_dev->base + pin * 4);
+ if (tmp & PIN_IRQ_PENDING)
+ pm_pr_dbg("%s: GPIO %d is active: 0x%x.\n", __func__, pin, tmp);
+ }
+}
+
+static struct acpi_s2idle_dev_ops pinctrl_amd_s2idle_dev_ops = {
+ .check = amd_gpio_check_pending,
+};
+
+static void amd_gpio_register_s2idle_ops(void)
+{
+ acpi_register_lps0_dev(&pinctrl_amd_s2idle_dev_ops);
+}
+
+static void amd_gpio_unregister_s2idle_ops(void)
+{
+ acpi_unregister_lps0_dev(&pinctrl_amd_s2idle_dev_ops);
+}
+#else
+static inline void amd_gpio_register_s2idle_ops(void) {}
+static inline void amd_gpio_unregister_s2idle_ops(void) {}
+#endif
+
#ifdef CONFIG_PM_SLEEP
static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin)
{
@@ -942,6 +984,9 @@ static int amd_gpio_suspend_hibernate_common(struct device *dev, bool is_suspend
static int amd_gpio_suspend(struct device *dev)
{
+#ifdef CONFIG_SUSPEND
+ pinctrl_dev = dev_get_drvdata(dev);
+#endif
return amd_gpio_suspend_hibernate_common(dev, true);
}
@@ -1115,7 +1160,7 @@ static int amd_gpio_probe(struct platform_device *pdev)
if (gpio_dev->irq < 0)
return gpio_dev->irq;
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_SUSPEND
gpio_dev->saved_regs = devm_kcalloc(&pdev->dev, amd_pinctrl_desc.npins,
sizeof(*gpio_dev->saved_regs),
GFP_KERNEL);
@@ -1181,6 +1226,7 @@ static int amd_gpio_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, gpio_dev);
acpi_register_wakeup_handler(gpio_dev->irq, amd_gpio_check_wake, gpio_dev);
+ amd_gpio_register_s2idle_ops();
dev_dbg(&pdev->dev, "amd gpio driver loaded\n");
return ret;
@@ -1199,6 +1245,7 @@ static void amd_gpio_remove(struct platform_device *pdev)
gpiochip_remove(&gpio_dev->gc);
acpi_unregister_wakeup_handler(amd_gpio_check_wake, gpio_dev);
+ amd_gpio_unregister_s2idle_ops();
}
#ifdef CONFIG_ACPI
diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
index 8b01d312305a..e57ac4ea91dd 100644
--- a/drivers/pinctrl/pinctrl-at91-pio4.c
+++ b/drivers/pinctrl/pinctrl-at91-pio4.c
@@ -1206,7 +1206,7 @@ static int atmel_pinctrl_probe(struct platform_device *pdev)
dev_dbg(dev, "bank %i: irq=%d\n", i, ret);
}
- atmel_pioctrl->irq_domain = irq_domain_add_linear(dev->of_node,
+ atmel_pioctrl->irq_domain = irq_domain_create_linear(of_fwnode_handle(dev->of_node),
atmel_pioctrl->gpio_chip->ngpio,
&irq_domain_simple_ops, NULL);
if (!atmel_pioctrl->irq_domain)
diff --git a/drivers/pinctrl/pinctrl-keembay.c b/drivers/pinctrl/pinctrl-keembay.c
index b693f4787044..0d7cc8280ea2 100644
--- a/drivers/pinctrl/pinctrl-keembay.c
+++ b/drivers/pinctrl/pinctrl-keembay.c
@@ -1268,7 +1268,7 @@ static void keembay_gpio_irq_handler(struct irq_desc *desc)
for_each_set_clump8(bit, clump, &reg, BITS_PER_TYPE(typeof(reg))) {
pin = clump & ~KEEMBAY_GPIO_IRQ_ENABLE;
val = keembay_read_pin(kpc->base0 + KEEMBAY_GPIO_DATA_IN, pin);
- kmb_irq = irq_linear_revmap(gc->irq.domain, pin);
+ kmb_irq = irq_find_mapping(gc->irq.domain, pin);
/* Checks if the interrupt is enabled */
if (val && (clump & KEEMBAY_GPIO_IRQ_ENABLE))
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 5be14dc979e2..5cda6201b60f 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1611,15 +1611,16 @@ static int pcs_irq_init_chained_handler(struct pcs_device *pcs,
/*
* We can use the register offset as the hardirq
- * number as irq_domain_add_simple maps them lazily.
+ * number as irq_domain_create_simple maps them lazily.
* This way we can easily support more than one
* interrupt per function if needed.
*/
num_irqs = pcs->size;
- pcs->domain = irq_domain_add_simple(np, num_irqs, 0,
- &pcs_irqdomain_ops,
- pcs_soc);
+ pcs->domain = irq_domain_create_simple(of_fwnode_handle(np),
+ num_irqs, 0,
+ &pcs_irqdomain_ops,
+ pcs_soc);
if (!pcs->domain) {
irq_set_chained_handler(pcs_soc->irq, NULL);
return -EINVAL;
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 82f0cc43bbf4..0eb816395dc6 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -44,7 +44,6 @@
* @pctrl: pinctrl handle.
* @chip: gpiochip handle.
* @desc: pin controller descriptor
- * @restart_nb: restart notifier block.
* @irq: parent irq for the TLMM irq_chip.
* @intr_target_use_scm: route irq to application cpu using scm calls
* @lock: Spinlock to protect register resources as well
@@ -64,7 +63,6 @@ struct msm_pinctrl {
struct pinctrl_dev *pctrl;
struct gpio_chip chip;
struct pinctrl_desc desc;
- struct notifier_block restart_nb;
int irq;
@@ -1471,10 +1469,9 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
return 0;
}
-static int msm_ps_hold_restart(struct notifier_block *nb, unsigned long action,
- void *data)
+static int msm_ps_hold_restart(struct sys_off_data *data)
{
- struct msm_pinctrl *pctrl = container_of(nb, struct msm_pinctrl, restart_nb);
+ struct msm_pinctrl *pctrl = data->cb_data;
writel(0, pctrl->regs[0] + PS_HOLD_OFFSET);
mdelay(1000);
@@ -1485,7 +1482,11 @@ static struct msm_pinctrl *poweroff_pctrl;
static void msm_ps_hold_poweroff(void)
{
- msm_ps_hold_restart(&poweroff_pctrl->restart_nb, 0, NULL);
+ struct sys_off_data data = {
+ .cb_data = poweroff_pctrl,
+ };
+
+ msm_ps_hold_restart(&data);
}
static void msm_pinctrl_setup_pm_reset(struct msm_pinctrl *pctrl)
@@ -1495,9 +1496,11 @@ static void msm_pinctrl_setup_pm_reset(struct msm_pinctrl *pctrl)
for (i = 0; i < pctrl->soc->nfunctions; i++)
if (!strcmp(func[i].name, "ps_hold")) {
- pctrl->restart_nb.notifier_call = msm_ps_hold_restart;
- pctrl->restart_nb.priority = 128;
- if (register_restart_handler(&pctrl->restart_nb))
+ if (devm_register_sys_off_handler(pctrl->dev,
+ SYS_OFF_MODE_RESTART,
+ 128,
+ msm_ps_hold_restart,
+ pctrl))
dev_err(pctrl->dev,
"failed to setup restart handler.\n");
poweroff_pctrl = pctrl;
@@ -1599,8 +1602,6 @@ void msm_pinctrl_remove(struct platform_device *pdev)
struct msm_pinctrl *pctrl = platform_get_drvdata(pdev);
gpiochip_remove(&pctrl->chip);
-
- unregister_restart_handler(&pctrl->restart_nb);
}
EXPORT_SYMBOL(msm_pinctrl_remove);
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index f1c5a991cf7b..bf8612d72daa 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -1646,10 +1646,9 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev,
}
}
- pctl->domain = irq_domain_add_linear(node,
- pctl->desc->irq_banks * IRQ_PER_BANK,
- &sunxi_pinctrl_irq_domain_ops,
- pctl);
+ pctl->domain = irq_domain_create_linear(of_fwnode_handle(node),
+ pctl->desc->irq_banks * IRQ_PER_BANK,
+ &sunxi_pinctrl_irq_domain_ops, pctl);
if (!pctl->domain) {
dev_err(&pdev->dev, "Couldn't register IRQ domain\n");
ret = -ENOMEM;
diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 1b2f2bd09662..10941ac37305 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -155,13 +155,14 @@ config CROS_EC_LPC
module will be called cros_ec_lpcs.
config CROS_EC_PROTO
- bool
+ tristate
help
ChromeOS EC communication protocol helpers.
config CROS_KBD_LED_BACKLIGHT
tristate "Backlight LED support for Chrome OS keyboards"
- depends on LEDS_CLASS && (ACPI || CROS_EC || MFD_CROS_EC_DEV)
+ depends on LEDS_CLASS
+ depends on MFD_CROS_EC_DEV || (MFD_CROS_EC_DEV=n && ACPI)
help
This option enables support for the keyboard backlight LEDs on
select Chrome OS systems.
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 1a5a484563cc..b981a1bb5bd8 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -25,7 +25,8 @@ endif
obj-$(CONFIG_CROS_EC_TYPEC) += cros-ec-typec.o
obj-$(CONFIG_CROS_EC_LPC) += cros_ec_lpcs.o
-obj-$(CONFIG_CROS_EC_PROTO) += cros_ec_proto.o cros_ec_trace.o
+cros-ec-proto-objs := cros_ec_proto.o cros_ec_trace.o
+obj-$(CONFIG_CROS_EC_PROTO) += cros-ec-proto.o
obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT) += cros_kbd_led_backlight.o
obj-$(CONFIG_CROS_EC_CHARDEV) += cros_ec_chardev.o
obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o
diff --git a/drivers/platform/chrome/chromeos_of_hw_prober.c b/drivers/platform/chrome/chromeos_of_hw_prober.c
index c6992f5cdc76..f3cd612e5584 100644
--- a/drivers/platform/chrome/chromeos_of_hw_prober.c
+++ b/drivers/platform/chrome/chromeos_of_hw_prober.c
@@ -57,7 +57,9 @@ static int chromeos_i2c_component_prober(struct device *dev, const void *_data)
}
DEFINE_CHROMEOS_I2C_PROBE_DATA_DUMB_BY_TYPE(touchscreen);
+DEFINE_CHROMEOS_I2C_PROBE_DATA_DUMB_BY_TYPE(trackpad);
+DEFINE_CHROMEOS_I2C_PROBE_CFG_SIMPLE_BY_TYPE(touchscreen);
DEFINE_CHROMEOS_I2C_PROBE_CFG_SIMPLE_BY_TYPE(trackpad);
static const struct chromeos_i2c_probe_data chromeos_i2c_probe_hana_trackpad = {
@@ -75,6 +77,17 @@ static const struct chromeos_i2c_probe_data chromeos_i2c_probe_hana_trackpad = {
},
};
+static const struct chromeos_i2c_probe_data chromeos_i2c_probe_squirtle_touchscreen = {
+ .cfg = &chromeos_i2c_probe_simple_touchscreen_cfg,
+ .opts = &(const struct i2c_of_probe_simple_opts) {
+ .res_node_compatible = "elan,ekth6a12nay",
+ .supply_name = "vcc33",
+ .gpio_name = "reset",
+ .post_power_on_delay_ms = 10,
+ .post_gpio_config_delay_ms = 300,
+ },
+};
+
static const struct hw_prober_entry hw_prober_platforms[] = {
{
.compatible = "google,hana",
@@ -84,6 +97,26 @@ static const struct hw_prober_entry hw_prober_platforms[] = {
.compatible = "google,hana",
.prober = chromeos_i2c_component_prober,
.data = &chromeos_i2c_probe_hana_trackpad,
+ }, {
+ .compatible = "google,spherion",
+ .prober = chromeos_i2c_component_prober,
+ .data = &chromeos_i2c_probe_hana_trackpad,
+ }, {
+ .compatible = "google,squirtle",
+ .prober = chromeos_i2c_component_prober,
+ .data = &chromeos_i2c_probe_dumb_trackpad,
+ }, {
+ .compatible = "google,squirtle",
+ .prober = chromeos_i2c_component_prober,
+ .data = &chromeos_i2c_probe_squirtle_touchscreen,
+ }, {
+ .compatible = "google,steelix",
+ .prober = chromeos_i2c_component_prober,
+ .data = &chromeos_i2c_probe_dumb_trackpad,
+ }, {
+ .compatible = "google,voltorb",
+ .prober = chromeos_i2c_component_prober,
+ .data = &chromeos_i2c_probe_dumb_trackpad,
},
};
diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index 92ac9a2f9c88..d10f9561990c 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -207,22 +207,15 @@ static ssize_t cros_ec_pdinfo_read(struct file *file,
char read_buf[EC_USB_PD_MAX_PORTS * 40], *p = read_buf;
struct cros_ec_debugfs *debug_info = file->private_data;
struct cros_ec_device *ec_dev = debug_info->ec->ec_dev;
- struct {
- struct cros_ec_command msg;
- union {
- struct ec_response_usb_pd_control_v1 resp;
- struct ec_params_usb_pd_control params;
- };
- } __packed ec_buf;
- struct cros_ec_command *msg;
- struct ec_response_usb_pd_control_v1 *resp;
- struct ec_params_usb_pd_control *params;
+ DEFINE_RAW_FLEX(struct cros_ec_command, msg, data,
+ MAX(sizeof(struct ec_response_usb_pd_control_v1),
+ sizeof(struct ec_params_usb_pd_control)));
+ struct ec_response_usb_pd_control_v1 *resp =
+ (struct ec_response_usb_pd_control_v1 *)msg->data;
+ struct ec_params_usb_pd_control *params =
+ (struct ec_params_usb_pd_control *)msg->data;
int i;
- msg = &ec_buf.msg;
- params = (struct ec_params_usb_pd_control *)msg->data;
- resp = (struct ec_response_usb_pd_control_v1 *)msg->data;
-
msg->command = EC_CMD_USB_PD_CONTROL;
msg->version = 1;
msg->insize = sizeof(*resp);
@@ -253,17 +246,15 @@ static ssize_t cros_ec_pdinfo_read(struct file *file,
static bool cros_ec_uptime_is_supported(struct cros_ec_device *ec_dev)
{
- struct {
- struct cros_ec_command cmd;
- struct ec_response_uptime_info resp;
- } __packed msg = {};
+ DEFINE_RAW_FLEX(struct cros_ec_command, msg, data,
+ sizeof(struct ec_response_uptime_info));
int ret;
- msg.cmd.command = EC_CMD_GET_UPTIME_INFO;
- msg.cmd.insize = sizeof(msg.resp);
+ msg->command = EC_CMD_GET_UPTIME_INFO;
+ msg->insize = sizeof(struct ec_response_uptime_info);
- ret = cros_ec_cmd_xfer_status(ec_dev, &msg.cmd);
- if (ret == -EPROTO && msg.cmd.result == EC_RES_INVALID_COMMAND)
+ ret = cros_ec_cmd_xfer_status(ec_dev, msg);
+ if (ret == -EPROTO && msg->result == EC_RES_INVALID_COMMAND)
return false;
/* Other errors maybe a transient error, do not rule about support. */
@@ -275,20 +266,17 @@ static ssize_t cros_ec_uptime_read(struct file *file, char __user *user_buf,
{
struct cros_ec_debugfs *debug_info = file->private_data;
struct cros_ec_device *ec_dev = debug_info->ec->ec_dev;
- struct {
- struct cros_ec_command cmd;
- struct ec_response_uptime_info resp;
- } __packed msg = {};
- struct ec_response_uptime_info *resp;
+ DEFINE_RAW_FLEX(struct cros_ec_command, msg, data,
+ sizeof(struct ec_response_uptime_info));
+ struct ec_response_uptime_info *resp =
+ (struct ec_response_uptime_info *)msg->data;
char read_buf[32];
int ret;
- resp = (struct ec_response_uptime_info *)&msg.resp;
-
- msg.cmd.command = EC_CMD_GET_UPTIME_INFO;
- msg.cmd.insize = sizeof(*resp);
+ msg->command = EC_CMD_GET_UPTIME_INFO;
+ msg->insize = sizeof(*resp);
- ret = cros_ec_cmd_xfer_status(ec_dev, &msg.cmd);
+ ret = cros_ec_cmd_xfer_status(ec_dev, msg);
if (ret < 0)
return ret;
diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index 877b107fee4b..3e94a0a82173 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -139,12 +139,10 @@ static int cros_ec_xfer_command(struct cros_ec_device *ec_dev, struct cros_ec_co
static int cros_ec_wait_until_complete(struct cros_ec_device *ec_dev, uint32_t *result)
{
- struct {
- struct cros_ec_command msg;
- struct ec_response_get_comms_status status;
- } __packed buf;
- struct cros_ec_command *msg = &buf.msg;
- struct ec_response_get_comms_status *status = &buf.status;
+ DEFINE_RAW_FLEX(struct cros_ec_command, msg, data,
+ sizeof(struct ec_response_get_comms_status));
+ struct ec_response_get_comms_status *status =
+ (struct ec_response_get_comms_status *)msg->data;
int ret = 0, i;
msg->version = 0;
@@ -757,16 +755,13 @@ static int get_next_event_xfer(struct cros_ec_device *ec_dev,
static int get_next_event(struct cros_ec_device *ec_dev)
{
- struct {
- struct cros_ec_command msg;
- struct ec_response_get_next_event_v3 event;
- } __packed buf;
- struct cros_ec_command *msg = &buf.msg;
- struct ec_response_get_next_event_v3 *event = &buf.event;
+ DEFINE_RAW_FLEX(struct cros_ec_command, msg, data,
+ sizeof(struct ec_response_get_next_event_v3));
+ struct ec_response_get_next_event_v3 *event =
+ (struct ec_response_get_next_event_v3 *)msg->data;
int cmd_version = ec_dev->mkbp_event_supported - 1;
u32 size;
- memset(msg, 0, sizeof(*msg));
if (ec_dev->suspended) {
dev_dbg(ec_dev->dev, "Device suspended.\n");
return -EHOSTDOWN;
@@ -1157,3 +1152,6 @@ int cros_ec_get_cmd_versions(struct cros_ec_device *ec_dev, u16 cmd)
return resp.version_mask;
}
EXPORT_SYMBOL_GPL(cros_ec_get_cmd_versions);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ChromeOS EC communication protocol helpers");
diff --git a/drivers/platform/chrome/cros_ec_proto_test_util.h b/drivers/platform/chrome/cros_ec_proto_test_util.h
index 414002271c9c..b17239f052c2 100644
--- a/drivers/platform/chrome/cros_ec_proto_test_util.h
+++ b/drivers/platform/chrome/cros_ec_proto_test_util.h
@@ -13,7 +13,6 @@ struct ec_xfer_mock {
struct kunit *test;
/* input */
- struct cros_ec_command msg;
void *i_data;
/* output */
@@ -21,6 +20,10 @@ struct ec_xfer_mock {
int result;
void *o_data;
u32 o_data_len;
+
+ /* input */
+ /* Must be last -ends in a flexible-array member. */
+ struct cros_ec_command msg;
};
extern int cros_kunit_ec_xfer_mock_default_result;
diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c
index d2228720991f..7678e3d05fd3 100644
--- a/drivers/platform/chrome/cros_ec_typec.c
+++ b/drivers/platform/chrome/cros_ec_typec.c
@@ -22,8 +22,10 @@
#define DRV_NAME "cros-ec-typec"
-#define DP_PORT_VDO (DP_CONF_SET_PIN_ASSIGN(BIT(DP_PIN_ASSIGN_C) | BIT(DP_PIN_ASSIGN_D)) | \
- DP_CAP_DFP_D | DP_CAP_RECEPTACLE)
+#define DP_PORT_VDO (DP_CAP_DFP_D | DP_CAP_RECEPTACLE | \
+ DP_CONF_SET_PIN_ASSIGN(BIT(DP_PIN_ASSIGN_C) | \
+ BIT(DP_PIN_ASSIGN_D) | \
+ BIT(DP_PIN_ASSIGN_E)))
static void cros_typec_role_switch_quirk(struct fwnode_handle *fwnode)
{
diff --git a/drivers/platform/chrome/cros_kbd_led_backlight.c b/drivers/platform/chrome/cros_kbd_led_backlight.c
index fc27bd7fc4b9..f4c2282129f5 100644
--- a/drivers/platform/chrome/cros_kbd_led_backlight.c
+++ b/drivers/platform/chrome/cros_kbd_led_backlight.c
@@ -137,16 +137,12 @@ static int
keyboard_led_set_brightness_ec_pwm(struct led_classdev *cdev,
enum led_brightness brightness)
{
- struct {
- struct cros_ec_command msg;
- struct ec_params_pwm_set_keyboard_backlight params;
- } __packed buf;
- struct ec_params_pwm_set_keyboard_backlight *params = &buf.params;
- struct cros_ec_command *msg = &buf.msg;
+ DEFINE_RAW_FLEX(struct cros_ec_command, msg, data,
+ sizeof(struct ec_params_pwm_set_keyboard_backlight));
+ struct ec_params_pwm_set_keyboard_backlight *params =
+ (struct ec_params_pwm_set_keyboard_backlight *)msg->data;
struct keyboard_led *keyboard_led = container_of(cdev, struct keyboard_led, cdev);
- memset(&buf, 0, sizeof(buf));
-
msg->command = EC_CMD_PWM_SET_KEYBOARD_BACKLIGHT;
msg->outsize = sizeof(*params);
@@ -158,17 +154,13 @@ keyboard_led_set_brightness_ec_pwm(struct led_classdev *cdev,
static enum led_brightness
keyboard_led_get_brightness_ec_pwm(struct led_classdev *cdev)
{
- struct {
- struct cros_ec_command msg;
- struct ec_response_pwm_get_keyboard_backlight resp;
- } __packed buf;
- struct ec_response_pwm_get_keyboard_backlight *resp = &buf.resp;
- struct cros_ec_command *msg = &buf.msg;
+ DEFINE_RAW_FLEX(struct cros_ec_command, msg, data,
+ sizeof(struct ec_response_pwm_get_keyboard_backlight));
+ struct ec_response_pwm_get_keyboard_backlight *resp =
+ (struct ec_response_pwm_get_keyboard_backlight *)msg->data;
struct keyboard_led *keyboard_led = container_of(cdev, struct keyboard_led, cdev);
int ret;
- memset(&buf, 0, sizeof(buf));
-
msg->command = EC_CMD_PWM_GET_KEYBOARD_BACKLIGHT;
msg->insize = sizeof(*resp);
diff --git a/drivers/platform/x86/amd/hsmp/acpi.c b/drivers/platform/x86/amd/hsmp/acpi.c
index c1eccb3c80c5..73ca3f48c5cf 100644
--- a/drivers/platform/x86/amd/hsmp/acpi.c
+++ b/drivers/platform/x86/amd/hsmp/acpi.c
@@ -9,7 +9,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <asm/amd_hsmp.h>
+#include <asm/amd/hsmp.h>
#include <linux/acpi.h>
#include <linux/device.h>
@@ -23,13 +23,12 @@
#include <uapi/asm-generic/errno-base.h>
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
#include "hsmp.h"
-#define DRIVER_NAME "amd_hsmp"
+#define DRIVER_NAME "hsmp_acpi"
#define DRIVER_VERSION "2.3"
-#define ACPI_HSMP_DEVICE_HID "AMDI0097"
/* These are the strings specified in ACPI table */
#define MSG_IDOFF_STR "MsgIdOffset"
diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c
index a3ac09a90de4..e262e8a97b45 100644
--- a/drivers/platform/x86/amd/hsmp/hsmp.c
+++ b/drivers/platform/x86/amd/hsmp/hsmp.c
@@ -7,7 +7,7 @@
* This file provides a device implementation for HSMP interface
*/
-#include <asm/amd_hsmp.h>
+#include <asm/amd/hsmp.h>
#include <linux/acpi.h>
#include <linux/delay.h>
diff --git a/drivers/platform/x86/amd/hsmp/hsmp.h b/drivers/platform/x86/amd/hsmp/hsmp.h
index af8b21f821d6..d58d4f0c20d5 100644
--- a/drivers/platform/x86/amd/hsmp/hsmp.h
+++ b/drivers/platform/x86/amd/hsmp/hsmp.h
@@ -23,6 +23,7 @@
#define HSMP_CDEV_NAME "hsmp_cdev"
#define HSMP_DEVNODE_NAME "hsmp"
+#define ACPI_HSMP_DEVICE_HID "AMDI0097"
struct hsmp_mbaddr_info {
u32 base_addr;
diff --git a/drivers/platform/x86/amd/hsmp/plat.c b/drivers/platform/x86/amd/hsmp/plat.c
index b9782a078dbd..62bf9547631e 100644
--- a/drivers/platform/x86/amd/hsmp/plat.c
+++ b/drivers/platform/x86/amd/hsmp/plat.c
@@ -9,8 +9,9 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <asm/amd_hsmp.h>
+#include <asm/amd/hsmp.h>
+#include <linux/acpi.h>
#include <linux/build_bug.h>
#include <linux/device.h>
#include <linux/module.h>
@@ -18,7 +19,7 @@
#include <linux/platform_device.h>
#include <linux/sysfs.h>
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
#include "hsmp.h"
@@ -266,7 +267,7 @@ static bool legacy_hsmp_support(void)
}
case 0x1A:
switch (boot_cpu_data.x86_model) {
- case 0x00 ... 0x1F:
+ case 0x00 ... 0x0F:
return true;
default:
return false;
@@ -288,6 +289,9 @@ static int __init hsmp_plt_init(void)
return ret;
}
+ if (acpi_dev_present(ACPI_HSMP_DEVICE_HID, NULL, -1))
+ return -ENODEV;
+
hsmp_pdev = get_hsmp_pdev();
if (!hsmp_pdev)
return -ENOMEM;
diff --git a/drivers/platform/x86/amd/pmc/mp1_stb.c b/drivers/platform/x86/amd/pmc/mp1_stb.c
index c005f00988f7..3b9b9f30faa3 100644
--- a/drivers/platform/x86/amd/pmc/mp1_stb.c
+++ b/drivers/platform/x86/amd/pmc/mp1_stb.c
@@ -11,7 +11,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <asm/amd_nb.h>
+#include <asm/amd/nb.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c
index b4f49720c87f..5c7c01f66cde 100644
--- a/drivers/platform/x86/amd/pmc/pmc-quirks.c
+++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c
@@ -11,6 +11,7 @@
#include <linux/dmi.h>
#include <linux/io.h>
#include <linux/ioport.h>
+#include <asm/amd/fch.h>
#include "pmc.h"
@@ -20,7 +21,7 @@ struct quirk_entry {
};
static struct quirk_entry quirk_s2idle_bug = {
- .s2idle_bug_mmio = 0xfed80380,
+ .s2idle_bug_mmio = FCH_PM_BASE + FCH_PM_SCRATCH,
};
static struct quirk_entry quirk_spurious_8042 = {
@@ -217,6 +218,13 @@ static const struct dmi_system_id fwbug_list[] = {
DMI_MATCH(DMI_BIOS_VERSION, "03.05"),
}
},
+ {
+ .ident = "MECHREVO Wujie 14X (GX4HRXL)",
+ .driver_data = &quirk_spurious_8042,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "WUJIE14-GX4HRXL"),
+ }
+ },
{}
};
diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c
index 0329fafe14eb..37c7a57afee5 100644
--- a/drivers/platform/x86/amd/pmc/pmc.c
+++ b/drivers/platform/x86/amd/pmc/pmc.c
@@ -28,7 +28,7 @@
#include <linux/seq_file.h>
#include <linux/uaccess.h>
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
#include "pmc.h"
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index 96821101ec77..76910601cac8 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -14,7 +14,7 @@
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/power_supply.h>
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
#include "pmf.h"
/* PMF-SMU communication registers */
diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
index 14b99d8b63d2..d3bd12ad036a 100644
--- a/drivers/platform/x86/amd/pmf/tee-if.c
+++ b/drivers/platform/x86/amd/pmf/tee-if.c
@@ -334,6 +334,11 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev)
return 0;
}
+static inline bool amd_pmf_pb_valid(struct amd_pmf_dev *dev)
+{
+ return memchr_inv(dev->policy_buf, 0xff, dev->policy_sz);
+}
+
#ifdef CONFIG_AMD_PMF_DEBUG
static void amd_pmf_hex_dump_pb(struct amd_pmf_dev *dev)
{
@@ -361,12 +366,22 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf,
dev->policy_buf = new_policy_buf;
dev->policy_sz = length;
+ if (!amd_pmf_pb_valid(dev)) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
amd_pmf_hex_dump_pb(dev);
ret = amd_pmf_start_policy_engine(dev);
if (ret < 0)
- return ret;
+ goto cleanup;
return length;
+
+cleanup:
+ kfree(dev->policy_buf);
+ dev->policy_buf = NULL;
+ return ret;
}
static const struct file_operations pb_fops = {
@@ -528,6 +543,12 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz);
+ if (!amd_pmf_pb_valid(dev)) {
+ dev_info(dev->dev, "No Smart PC policy present\n");
+ ret = -EINVAL;
+ goto err_free_policy;
+ }
+
amd_pmf_hex_dump_pb(dev);
dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 0c697b46f436..47cc766624d7 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -4779,7 +4779,8 @@ static int asus_wmi_add(struct platform_device *pdev)
goto fail_leds;
asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
- if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
+ if ((result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) ==
+ (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
asus->driver->wlan_ctrl_by_user = 1;
if (!(asus->driver->wlan_ctrl_by_user && ashs_present())) {
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c
index 230e6ee96636..d8f1bf5e58a0 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c
@@ -45,7 +45,7 @@ static ssize_t current_password_store(struct kobject *kobj,
int length;
length = strlen(buf);
- if (buf[length-1] == '\n')
+ if (length && buf[length - 1] == '\n')
length--;
/* firmware does verifiation of min/max password length,
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index a0eae24ca9e6..162809140f68 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -17,13 +17,13 @@
/*
* fujitsu-laptop.c - Fujitsu laptop support, providing access to additional
* features made available on a range of Fujitsu laptops including the
- * P2xxx/P5xxx/S6xxx/S7xxx series.
+ * P2xxx/P5xxx/S2xxx/S6xxx/S7xxx series.
*
* This driver implements a vendor-specific backlight control interface for
* Fujitsu laptops and provides support for hotkeys present on certain Fujitsu
* laptops.
*
- * This driver has been tested on a Fujitsu Lifebook S6410, S7020 and
+ * This driver has been tested on a Fujitsu Lifebook S2110, S6410, S7020 and
* P8010. It should work on most P-series and S-series Lifebooks, but
* YMMV.
*
@@ -107,7 +107,11 @@
#define KEY2_CODE 0x411
#define KEY3_CODE 0x412
#define KEY4_CODE 0x413
-#define KEY5_CODE 0x420
+#define KEY5_CODE 0x414
+#define KEY6_CODE 0x415
+#define KEY7_CODE 0x416
+#define KEY8_CODE 0x417
+#define KEY9_CODE 0x420
/* Hotkey ringbuffer limits */
#define MAX_HOTKEY_RINGBUFFER_SIZE 100
@@ -560,7 +564,7 @@ static const struct key_entry keymap_default[] = {
{ KE_KEY, KEY2_CODE, { KEY_PROG2 } },
{ KE_KEY, KEY3_CODE, { KEY_PROG3 } },
{ KE_KEY, KEY4_CODE, { KEY_PROG4 } },
- { KE_KEY, KEY5_CODE, { KEY_RFKILL } },
+ { KE_KEY, KEY9_CODE, { KEY_RFKILL } },
/* Soft keys read from status flags */
{ KE_KEY, FLAG_RFKILL, { KEY_RFKILL } },
{ KE_KEY, FLAG_TOUCHPAD_TOGGLE, { KEY_TOUCHPAD_TOGGLE } },
@@ -584,6 +588,18 @@ static const struct key_entry keymap_p8010[] = {
{ KE_END, 0 }
};
+static const struct key_entry keymap_s2110[] = {
+ { KE_KEY, KEY1_CODE, { KEY_PROG1 } }, /* "A" */
+ { KE_KEY, KEY2_CODE, { KEY_PROG2 } }, /* "B" */
+ { KE_KEY, KEY3_CODE, { KEY_WWW } }, /* "Internet" */
+ { KE_KEY, KEY4_CODE, { KEY_EMAIL } }, /* "E-mail" */
+ { KE_KEY, KEY5_CODE, { KEY_STOPCD } },
+ { KE_KEY, KEY6_CODE, { KEY_PLAYPAUSE } },
+ { KE_KEY, KEY7_CODE, { KEY_PREVIOUSSONG } },
+ { KE_KEY, KEY8_CODE, { KEY_NEXTSONG } },
+ { KE_END, 0 }
+};
+
static const struct key_entry *keymap = keymap_default;
static int fujitsu_laptop_dmi_keymap_override(const struct dmi_system_id *id)
@@ -621,6 +637,15 @@ static const struct dmi_system_id fujitsu_laptop_dmi_table[] = {
},
.driver_data = (void *)keymap_p8010
},
+ {
+ .callback = fujitsu_laptop_dmi_keymap_override,
+ .ident = "Fujitsu LifeBook S2110",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK S2110"),
+ },
+ .driver_data = (void *)keymap_s2110
+ },
{}
};
diff --git a/drivers/platform/x86/intel/ifs/core.c b/drivers/platform/x86/intel/ifs/core.c
index 1ae50702bdb7..b73e582128c9 100644
--- a/drivers/platform/x86/intel/ifs/core.c
+++ b/drivers/platform/x86/intel/ifs/core.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include "ifs.h"
@@ -115,13 +116,13 @@ static int __init ifs_init(void)
if (!m)
return -ENODEV;
- if (rdmsrl_safe(MSR_IA32_CORE_CAPS, &msrval))
+ if (rdmsrq_safe(MSR_IA32_CORE_CAPS, &msrval))
return -ENODEV;
if (!(msrval & MSR_IA32_CORE_CAPS_INTEGRITY_CAPS))
return -ENODEV;
- if (rdmsrl_safe(MSR_INTEGRITY_CAPS, &msrval))
+ if (rdmsrq_safe(MSR_INTEGRITY_CAPS, &msrval))
return -ENODEV;
ifs_pkg_auth = kmalloc_array(topology_max_packages(), sizeof(bool), GFP_KERNEL);
diff --git a/drivers/platform/x86/intel/ifs/load.c b/drivers/platform/x86/intel/ifs/load.c
index de54bd1a5970..50f1fdf7dfed 100644
--- a/drivers/platform/x86/intel/ifs/load.c
+++ b/drivers/platform/x86/intel/ifs/load.c
@@ -5,6 +5,7 @@
#include <linux/sizes.h>
#include <asm/cpu.h>
#include <asm/microcode.h>
+#include <asm/msr.h>
#include "ifs.h"
@@ -127,8 +128,8 @@ static void copy_hashes_authenticate_chunks(struct work_struct *work)
ifsd = ifs_get_data(dev);
msrs = ifs_get_test_msrs(dev);
/* run scan hash copy */
- wrmsrl(msrs->copy_hashes, ifs_hash_ptr);
- rdmsrl(msrs->copy_hashes_status, hashes_status.data);
+ wrmsrq(msrs->copy_hashes, ifs_hash_ptr);
+ rdmsrq(msrs->copy_hashes_status, hashes_status.data);
/* enumerate the scan image information */
num_chunks = hashes_status.num_chunks;
@@ -149,8 +150,8 @@ static void copy_hashes_authenticate_chunks(struct work_struct *work)
linear_addr = base + i * chunk_size;
linear_addr |= i;
- wrmsrl(msrs->copy_chunks, linear_addr);
- rdmsrl(msrs->copy_chunks_status, chunk_status.data);
+ wrmsrq(msrs->copy_chunks, linear_addr);
+ rdmsrq(msrs->copy_chunks_status, chunk_status.data);
ifsd->valid_chunks = chunk_status.valid_chunks;
err_code = chunk_status.error_code;
@@ -195,8 +196,8 @@ static int copy_hashes_authenticate_chunks_gen2(struct device *dev)
msrs = ifs_get_test_msrs(dev);
if (need_copy_scan_hashes(ifsd)) {
- wrmsrl(msrs->copy_hashes, ifs_hash_ptr);
- rdmsrl(msrs->copy_hashes_status, hashes_status.data);
+ wrmsrq(msrs->copy_hashes, ifs_hash_ptr);
+ rdmsrq(msrs->copy_hashes_status, hashes_status.data);
/* enumerate the scan image information */
chunk_size = hashes_status.chunk_size * SZ_1K;
@@ -216,8 +217,8 @@ static int copy_hashes_authenticate_chunks_gen2(struct device *dev)
}
if (ifsd->generation >= IFS_GEN_STRIDE_AWARE) {
- wrmsrl(msrs->test_ctrl, INVALIDATE_STRIDE);
- rdmsrl(msrs->copy_chunks_status, chunk_status.data);
+ wrmsrq(msrs->test_ctrl, INVALIDATE_STRIDE);
+ rdmsrq(msrs->copy_chunks_status, chunk_status.data);
if (chunk_status.valid_chunks != 0) {
dev_err(dev, "Couldn't invalidate installed stride - %d\n",
chunk_status.valid_chunks);
@@ -238,9 +239,9 @@ static int copy_hashes_authenticate_chunks_gen2(struct device *dev)
chunk_table[1] = linear_addr;
do {
local_irq_disable();
- wrmsrl(msrs->copy_chunks, (u64)chunk_table);
+ wrmsrq(msrs->copy_chunks, (u64)chunk_table);
local_irq_enable();
- rdmsrl(msrs->copy_chunks_status, chunk_status.data);
+ rdmsrq(msrs->copy_chunks_status, chunk_status.data);
err_code = chunk_status.error_code;
} while (err_code == AUTH_INTERRUPTED_ERROR && --retry_count);
diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c
index f978dd05d4d8..dfc119d7354d 100644
--- a/drivers/platform/x86/intel/ifs/runtest.c
+++ b/drivers/platform/x86/intel/ifs/runtest.c
@@ -7,6 +7,7 @@
#include <linux/nmi.h>
#include <linux/slab.h>
#include <linux/stop_machine.h>
+#include <asm/msr.h>
#include "ifs.h"
@@ -209,8 +210,8 @@ static int doscan(void *data)
* take up to 200 milliseconds (in the case where all chunks
* are processed in a single pass) before it retires.
*/
- wrmsrl(MSR_ACTIVATE_SCAN, params->activate->data);
- rdmsrl(MSR_SCAN_STATUS, status.data);
+ wrmsrq(MSR_ACTIVATE_SCAN, params->activate->data);
+ rdmsrq(MSR_SCAN_STATUS, status.data);
trace_ifs_status(ifsd->cur_batch, start, stop, status.data);
@@ -321,9 +322,9 @@ static int do_array_test(void *data)
first = cpumask_first(cpu_smt_mask(cpu));
if (cpu == first) {
- wrmsrl(MSR_ARRAY_BIST, command->data);
+ wrmsrq(MSR_ARRAY_BIST, command->data);
/* Pass back the result of the test */
- rdmsrl(MSR_ARRAY_BIST, command->data);
+ rdmsrq(MSR_ARRAY_BIST, command->data);
}
return 0;
@@ -374,8 +375,8 @@ static int do_array_test_gen1(void *status)
first = cpumask_first(cpu_smt_mask(cpu));
if (cpu == first) {
- wrmsrl(MSR_ARRAY_TRIGGER, ARRAY_GEN1_TEST_ALL_ARRAYS);
- rdmsrl(MSR_ARRAY_STATUS, *((u64 *)status));
+ wrmsrq(MSR_ARRAY_TRIGGER, ARRAY_GEN1_TEST_ALL_ARRAYS);
+ rdmsrq(MSR_ARRAY_STATUS, *((u64 *)status));
}
return 0;
@@ -526,8 +527,8 @@ static int dosbaf(void *data)
* starts scan of each requested bundle. The core test happens
* during the "execution" of the WRMSR.
*/
- wrmsrl(MSR_ACTIVATE_SBAF, run_params->activate->data);
- rdmsrl(MSR_SBAF_STATUS, status.data);
+ wrmsrq(MSR_ACTIVATE_SBAF, run_params->activate->data);
+ rdmsrq(MSR_SBAF_STATUS, status.data);
trace_ifs_sbaf(ifsd->cur_batch, *run_params->activate, status);
/* Pass back the result of the test */
diff --git a/drivers/platform/x86/intel/int0002_vgpio.c b/drivers/platform/x86/intel/int0002_vgpio.c
index 3b48cd7a4075..b7b98343fdc6 100644
--- a/drivers/platform/x86/intel/int0002_vgpio.c
+++ b/drivers/platform/x86/intel/int0002_vgpio.c
@@ -23,7 +23,7 @@
* ACPI mechanisms, this is not a real GPIO at all.
*
* This driver will bind to the INT0002 device, and register as a GPIO
- * controller, letting gpiolib-acpi.c call the _L02 handler as it would
+ * controller, letting gpiolib-acpi call the _L02 handler as it would
* for a real GPIO controller.
*/
diff --git a/drivers/platform/x86/intel/pmc/arl.c b/drivers/platform/x86/intel/pmc/arl.c
index 320993bd6d31..f9c48738b853 100644
--- a/drivers/platform/x86/intel/pmc/arl.c
+++ b/drivers/platform/x86/intel/pmc/arl.c
@@ -681,6 +681,7 @@ static struct pmc_info arl_pmc_info_list[] = {
#define ARL_NPU_PCI_DEV 0xad1d
#define ARL_GNA_PCI_DEV 0xae4c
+#define ARL_H_NPU_PCI_DEV 0x7d1d
#define ARL_H_GNA_PCI_DEV 0x774c
/*
* Set power state of select devices that do not have drivers to D3
@@ -694,7 +695,7 @@ static void arl_d3_fixup(void)
static void arl_h_d3_fixup(void)
{
- pmc_core_set_device_d3(ARL_NPU_PCI_DEV);
+ pmc_core_set_device_d3(ARL_H_NPU_PCI_DEV);
pmc_core_set_device_d3(ARL_H_GNA_PCI_DEV);
}
diff --git a/drivers/platform/x86/intel/pmc/cnp.c b/drivers/platform/x86/intel/pmc/cnp.c
index 2c5af158bbe2..efea4e1ba52b 100644
--- a/drivers/platform/x86/intel/pmc/cnp.c
+++ b/drivers/platform/x86/intel/pmc/cnp.c
@@ -10,6 +10,7 @@
#include <linux/smp.h>
#include <linux/suspend.h>
+#include <asm/msr.h>
#include "core.h"
/* Cannon Lake: PGD PFET Enable Ack Status Register(s) bitmap */
@@ -227,10 +228,10 @@ static void disable_c1_auto_demote(void *unused)
int cpunum = smp_processor_id();
u64 val;
- rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, val);
+ rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, val);
per_cpu(pkg_cst_config, cpunum) = val;
val &= ~NHM_C1_AUTO_DEMOTE;
- wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, val);
+ wrmsrq(MSR_PKG_CST_CONFIG_CONTROL, val);
pr_debug("%s: cpu:%d cst %llx\n", __func__, cpunum, val);
}
@@ -239,7 +240,7 @@ static void restore_c1_auto_demote(void *unused)
{
int cpunum = smp_processor_id();
- wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, per_cpu(pkg_cst_config, cpunum));
+ wrmsrq(MSR_PKG_CST_CONFIG_CONTROL, per_cpu(pkg_cst_config, cpunum));
pr_debug("%s: cpu:%d cst %llx\n", __func__, cpunum,
per_cpu(pkg_cst_config, cpunum));
diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
index 7a1d11f2914f..9f678c753dfa 100644
--- a/drivers/platform/x86/intel/pmc/core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -22,7 +22,7 @@
#include <linux/suspend.h>
#include <linux/units.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/msr.h>
@@ -1082,7 +1082,7 @@ static int pmc_core_pkgc_show(struct seq_file *s, void *unused)
unsigned int index;
for (index = 0; map[index].name ; index++) {
- if (rdmsrl_safe(map[index].bit_mask, &pcstate_count))
+ if (rdmsrq_safe(map[index].bit_mask, &pcstate_count))
continue;
pcstate_count *= 1000;
@@ -1587,7 +1587,7 @@ static __maybe_unused int pmc_core_suspend(struct device *dev)
/* Save PKGC residency for checking later */
for (i = 0; i < pmcdev->num_of_pkgc; i++) {
- if (rdmsrl_safe(msr_map[i].bit_mask, &pmcdev->pkgc_res_cnt[i]))
+ if (rdmsrq_safe(msr_map[i].bit_mask, &pmcdev->pkgc_res_cnt[i]))
return -EIO;
}
@@ -1603,7 +1603,7 @@ static inline bool pmc_core_is_deepest_pkgc_failed(struct pmc_dev *pmcdev)
u32 deepest_pkgc_msr = msr_map[pmcdev->num_of_pkgc - 1].bit_mask;
u64 deepest_pkgc_residency;
- if (rdmsrl_safe(deepest_pkgc_msr, &deepest_pkgc_residency))
+ if (rdmsrq_safe(deepest_pkgc_msr, &deepest_pkgc_residency))
return false;
if (deepest_pkgc_residency == pmcdev->pkgc_res_cnt[pmcdev->num_of_pkgc - 1])
@@ -1655,7 +1655,7 @@ int pmc_core_resume_common(struct pmc_dev *pmcdev)
for (i = 0; i < pmcdev->num_of_pkgc; i++) {
u64 pc_cnt;
- if (!rdmsrl_safe(msr_map[i].bit_mask, &pc_cnt)) {
+ if (!rdmsrq_safe(msr_map[i].bit_mask, &pc_cnt)) {
dev_info(dev, "Prev %s cnt = 0x%llx, Current %s cnt = 0x%llx\n",
msr_map[i].name, pmcdev->pkgc_res_cnt[i],
msr_map[i].name, pc_cnt);
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index 31239a93dd71..8a5713593811 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -21,6 +21,7 @@
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/msr.h>
#include "isst_if_common.h"
@@ -191,7 +192,7 @@ void isst_resume_common(void)
if (cb->registered)
isst_mbox_resume_command(cb, sst_cmd);
} else {
- wrmsrl_safe_on_cpu(sst_cmd->cpu, sst_cmd->cmd,
+ wrmsrq_safe_on_cpu(sst_cmd->cpu, sst_cmd->cmd,
sst_cmd->data);
}
}
@@ -211,7 +212,7 @@ static void isst_restore_msr_local(int cpu)
hash_for_each_possible(isst_hash, sst_cmd, hnode,
punit_msr_white_list[i]) {
if (!sst_cmd->mbox_cmd_type && sst_cmd->cpu == cpu)
- wrmsrl_safe(sst_cmd->cmd, sst_cmd->data);
+ wrmsrq_safe(sst_cmd->cmd, sst_cmd->data);
}
}
mutex_unlock(&isst_hash_lock);
@@ -406,7 +407,7 @@ static int isst_if_cpu_online(unsigned int cpu)
isst_cpu_info[cpu].numa_node = cpu_to_node(cpu);
- ret = rdmsrl_safe(MSR_CPU_BUS_NUMBER, &data);
+ ret = rdmsrq_safe(MSR_CPU_BUS_NUMBER, &data);
if (ret) {
/* This is not a fatal error on MSR mailbox only I/F */
isst_cpu_info[cpu].bus_info[0] = -1;
@@ -420,12 +421,12 @@ static int isst_if_cpu_online(unsigned int cpu)
if (isst_hpm_support) {
- ret = rdmsrl_safe(MSR_PM_LOGICAL_ID, &data);
+ ret = rdmsrq_safe(MSR_PM_LOGICAL_ID, &data);
if (!ret)
goto set_punit_id;
}
- ret = rdmsrl_safe(MSR_THREAD_ID_INFO, &data);
+ ret = rdmsrq_safe(MSR_THREAD_ID_INFO, &data);
if (ret) {
isst_cpu_info[cpu].punit_cpu_id = -1;
return ret;
@@ -524,7 +525,7 @@ static long isst_if_msr_cmd_req(u8 *cmd_ptr, int *write_only, int resume)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- ret = wrmsrl_safe_on_cpu(msr_cmd->logical_cpu,
+ ret = wrmsrq_safe_on_cpu(msr_cmd->logical_cpu,
msr_cmd->msr,
msr_cmd->data);
*write_only = 1;
@@ -535,7 +536,7 @@ static long isst_if_msr_cmd_req(u8 *cmd_ptr, int *write_only, int resume)
} else {
u64 data;
- ret = rdmsrl_safe_on_cpu(msr_cmd->logical_cpu,
+ ret = rdmsrq_safe_on_cpu(msr_cmd->logical_cpu,
msr_cmd->msr, &data);
if (!ret) {
msr_cmd->data = data;
@@ -831,8 +832,8 @@ static int __init isst_if_common_init(void)
u64 data;
/* Can fail only on some Skylake-X generations */
- if (rdmsrl_safe(MSR_OS_MAILBOX_INTERFACE, &data) ||
- rdmsrl_safe(MSR_OS_MAILBOX_DATA, &data))
+ if (rdmsrq_safe(MSR_OS_MAILBOX_INTERFACE, &data) ||
+ rdmsrq_safe(MSR_OS_MAILBOX_DATA, &data))
return -ENODEV;
}
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_mbox_msr.c b/drivers/platform/x86/intel/speed_select_if/isst_if_mbox_msr.c
index c4b7af00352b..22745b217c6f 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_mbox_msr.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_mbox_msr.c
@@ -18,6 +18,7 @@
#include <uapi/linux/isst_if.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/msr.h>
#include "isst_if_common.h"
@@ -39,7 +40,7 @@ static int isst_if_send_mbox_cmd(u8 command, u8 sub_command, u32 parameter,
/* Poll for rb bit == 0 */
retries = OS_MAILBOX_RETRY_COUNT;
do {
- rdmsrl(MSR_OS_MAILBOX_INTERFACE, data);
+ rdmsrq(MSR_OS_MAILBOX_INTERFACE, data);
if (data & BIT_ULL(MSR_OS_MAILBOX_BUSY_BIT)) {
ret = -EBUSY;
continue;
@@ -52,19 +53,19 @@ static int isst_if_send_mbox_cmd(u8 command, u8 sub_command, u32 parameter,
return ret;
/* Write DATA register */
- wrmsrl(MSR_OS_MAILBOX_DATA, command_data);
+ wrmsrq(MSR_OS_MAILBOX_DATA, command_data);
/* Write command register */
data = BIT_ULL(MSR_OS_MAILBOX_BUSY_BIT) |
(parameter & GENMASK_ULL(13, 0)) << 16 |
(sub_command << 8) |
command;
- wrmsrl(MSR_OS_MAILBOX_INTERFACE, data);
+ wrmsrq(MSR_OS_MAILBOX_INTERFACE, data);
/* Poll for rb bit == 0 */
retries = OS_MAILBOX_RETRY_COUNT;
do {
- rdmsrl(MSR_OS_MAILBOX_INTERFACE, data);
+ rdmsrq(MSR_OS_MAILBOX_INTERFACE, data);
if (data & BIT_ULL(MSR_OS_MAILBOX_BUSY_BIT)) {
ret = -EBUSY;
continue;
@@ -74,7 +75,7 @@ static int isst_if_send_mbox_cmd(u8 command, u8 sub_command, u32 parameter,
return -ENXIO;
if (response_data) {
- rdmsrl(MSR_OS_MAILBOX_DATA, data);
+ rdmsrq(MSR_OS_MAILBOX_DATA, data);
*response_data = data;
}
ret = 0;
@@ -176,11 +177,11 @@ static int __init isst_if_mbox_init(void)
return -ENODEV;
/* Check presence of mailbox MSRs */
- ret = rdmsrl_safe(MSR_OS_MAILBOX_INTERFACE, &data);
+ ret = rdmsrq_safe(MSR_OS_MAILBOX_INTERFACE, &data);
if (ret)
return ret;
- ret = rdmsrl_safe(MSR_OS_MAILBOX_DATA, &data);
+ ret = rdmsrq_safe(MSR_OS_MAILBOX_DATA, &data);
if (ret)
return ret;
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
index 9978cdd19851..4d30d5360c8f 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
@@ -27,6 +27,7 @@
#include <linux/kernel.h>
#include <linux/minmax.h>
#include <linux/module.h>
+#include <asm/msr.h>
#include <uapi/linux/isst_if.h>
#include "isst_tpmi_core.h"
@@ -556,7 +557,7 @@ static bool disable_dynamic_sst_features(void)
{
u64 value;
- rdmsrl(MSR_PM_ENABLE, value);
+ rdmsrq(MSR_PM_ENABLE, value);
return !(value & 0x1);
}
diff --git a/drivers/platform/x86/intel/tpmi_power_domains.c b/drivers/platform/x86/intel/tpmi_power_domains.c
index 2f01cd22a6ee..c21b3cb99b7c 100644
--- a/drivers/platform/x86/intel/tpmi_power_domains.c
+++ b/drivers/platform/x86/intel/tpmi_power_domains.c
@@ -157,7 +157,7 @@ static int tpmi_get_logical_id(unsigned int cpu, struct tpmi_cpu_info *info)
u64 data;
int ret;
- ret = rdmsrl_safe(MSR_PM_LOGICAL_ID, &data);
+ ret = rdmsrq_safe(MSR_PM_LOGICAL_ID, &data);
if (ret)
return ret;
@@ -203,7 +203,7 @@ static int __init tpmi_init(void)
return -ENODEV;
/* Check for MSR 0x54 presence */
- ret = rdmsrl_safe(MSR_PM_LOGICAL_ID, &data);
+ ret = rdmsrq_safe(MSR_PM_LOGICAL_ID, &data);
if (ret)
return ret;
diff --git a/drivers/platform/x86/intel/turbo_max_3.c b/drivers/platform/x86/intel/turbo_max_3.c
index 79a0bcdeffb8..b5af3e91ba04 100644
--- a/drivers/platform/x86/intel/turbo_max_3.c
+++ b/drivers/platform/x86/intel/turbo_max_3.c
@@ -17,6 +17,7 @@
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/msr.h>
#define MSR_OC_MAILBOX 0x150
#define MSR_OC_MAILBOX_CMD_OFFSET 32
@@ -41,14 +42,14 @@ static int get_oc_core_priority(unsigned int cpu)
value = cmd << MSR_OC_MAILBOX_CMD_OFFSET;
/* Set the busy bit to indicate OS is trying to issue command */
value |= BIT_ULL(MSR_OC_MAILBOX_BUSY_BIT);
- ret = wrmsrl_safe(MSR_OC_MAILBOX, value);
+ ret = wrmsrq_safe(MSR_OC_MAILBOX, value);
if (ret) {
pr_debug("cpu %d OC mailbox write failed\n", cpu);
return ret;
}
for (i = 0; i < OC_MAILBOX_RETRY_COUNT; ++i) {
- ret = rdmsrl_safe(MSR_OC_MAILBOX, &value);
+ ret = rdmsrq_safe(MSR_OC_MAILBOX, &value);
if (ret) {
pr_debug("cpu %d OC mailbox read failed\n", cpu);
break;
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c
index bdee5d00f30b..2a6897035150 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c
@@ -21,6 +21,7 @@
#include <linux/suspend.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/msr.h>
#include "uncore-frequency-common.h"
@@ -51,7 +52,7 @@ static int uncore_read_control_freq(struct uncore_data *data, unsigned int *valu
if (data->control_cpu < 0)
return -ENXIO;
- ret = rdmsrl_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, &cap);
+ ret = rdmsrq_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, &cap);
if (ret)
return ret;
@@ -76,7 +77,7 @@ static int uncore_write_control_freq(struct uncore_data *data, unsigned int inpu
if (data->control_cpu < 0)
return -ENXIO;
- ret = rdmsrl_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, &cap);
+ ret = rdmsrq_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, &cap);
if (ret)
return ret;
@@ -88,7 +89,7 @@ static int uncore_write_control_freq(struct uncore_data *data, unsigned int inpu
cap |= FIELD_PREP(UNCORE_MIN_RATIO_MASK, input);
}
- ret = wrmsrl_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, cap);
+ ret = wrmsrq_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, cap);
if (ret)
return ret;
@@ -105,7 +106,7 @@ static int uncore_read_freq(struct uncore_data *data, unsigned int *freq)
if (data->control_cpu < 0)
return -ENXIO;
- ret = rdmsrl_on_cpu(data->control_cpu, MSR_UNCORE_PERF_STATUS, &ratio);
+ ret = rdmsrq_on_cpu(data->control_cpu, MSR_UNCORE_PERF_STATUS, &ratio);
if (ret)
return ret;
@@ -212,7 +213,7 @@ static int uncore_pm_notify(struct notifier_block *nb, unsigned long mode,
if (!data || !data->valid || !data->stored_uncore_data)
return 0;
- wrmsrl_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT,
+ wrmsrq_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT,
data->stored_uncore_data);
}
break;
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index 5d717b1c23cf..9506f28fb7d8 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -370,7 +370,7 @@ static void ips_cpu_raise(struct ips_driver *ips)
if (!ips->cpu_turbo_enabled)
return;
- rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+ rdmsrq(TURBO_POWER_CURRENT_LIMIT, turbo_override);
cur_tdp_limit = turbo_override & TURBO_TDP_MASK;
new_tdp_limit = cur_tdp_limit + 8; /* 1W increase */
@@ -382,12 +382,12 @@ static void ips_cpu_raise(struct ips_driver *ips)
thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8);
turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
- wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+ wrmsrq(TURBO_POWER_CURRENT_LIMIT, turbo_override);
turbo_override &= ~TURBO_TDP_MASK;
turbo_override |= new_tdp_limit;
- wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+ wrmsrq(TURBO_POWER_CURRENT_LIMIT, turbo_override);
}
/**
@@ -405,7 +405,7 @@ static void ips_cpu_lower(struct ips_driver *ips)
u64 turbo_override;
u16 cur_limit, new_limit;
- rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+ rdmsrq(TURBO_POWER_CURRENT_LIMIT, turbo_override);
cur_limit = turbo_override & TURBO_TDP_MASK;
new_limit = cur_limit - 8; /* 1W decrease */
@@ -417,12 +417,12 @@ static void ips_cpu_lower(struct ips_driver *ips)
thm_writew(THM_MPCPC, (new_limit * 10) / 8);
turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
- wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+ wrmsrq(TURBO_POWER_CURRENT_LIMIT, turbo_override);
turbo_override &= ~TURBO_TDP_MASK;
turbo_override |= new_limit;
- wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+ wrmsrq(TURBO_POWER_CURRENT_LIMIT, turbo_override);
}
/**
@@ -437,10 +437,10 @@ static void do_enable_cpu_turbo(void *data)
{
u64 perf_ctl;
- rdmsrl(IA32_PERF_CTL, perf_ctl);
+ rdmsrq(IA32_PERF_CTL, perf_ctl);
if (perf_ctl & IA32_PERF_TURBO_DIS) {
perf_ctl &= ~IA32_PERF_TURBO_DIS;
- wrmsrl(IA32_PERF_CTL, perf_ctl);
+ wrmsrq(IA32_PERF_CTL, perf_ctl);
}
}
@@ -475,10 +475,10 @@ static void do_disable_cpu_turbo(void *data)
{
u64 perf_ctl;
- rdmsrl(IA32_PERF_CTL, perf_ctl);
+ rdmsrq(IA32_PERF_CTL, perf_ctl);
if (!(perf_ctl & IA32_PERF_TURBO_DIS)) {
perf_ctl |= IA32_PERF_TURBO_DIS;
- wrmsrl(IA32_PERF_CTL, perf_ctl);
+ wrmsrq(IA32_PERF_CTL, perf_ctl);
}
}
@@ -1215,7 +1215,7 @@ static int cpu_clamp_show(struct seq_file *m, void *data)
u64 turbo_override;
int tdp, tdc;
- rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+ rdmsrq(TURBO_POWER_CURRENT_LIMIT, turbo_override);
tdp = (int)(turbo_override & TURBO_TDP_MASK);
tdc = (int)((turbo_override & TURBO_TDC_MASK) >> TURBO_TDC_SHIFT);
@@ -1290,7 +1290,7 @@ static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
return NULL;
}
- rdmsrl(IA32_MISC_ENABLE, misc_en);
+ rdmsrq(IA32_MISC_ENABLE, misc_en);
/*
* If the turbo enable bit isn't set, we shouldn't try to enable/disable
* turbo manually or we'll get an illegal MSR access, even though
@@ -1312,7 +1312,7 @@ static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
return NULL;
}
- rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power);
+ rdmsrq(TURBO_POWER_CURRENT_LIMIT, turbo_power);
tdp = turbo_power & TURBO_TDP_MASK;
/* Sanity check TDP against CPU */
@@ -1496,7 +1496,7 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
* Check PLATFORM_INFO MSR to make sure this chip is
* turbo capable.
*/
- rdmsrl(PLATFORM_INFO, platform_info);
+ rdmsrq(PLATFORM_INFO, platform_info);
if (!(platform_info & PLATFORM_TDP)) {
dev_err(&dev->dev, "platform indicates TDP override unavailable, aborting\n");
return -ENODEV;
@@ -1529,7 +1529,7 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
ips->mgta_val = thm_readw(THM_MGTA);
/* Save turbo limits & ratios */
- rdmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
+ rdmsrq(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
ips_disable_cpu_turbo(ips);
ips->cpu_turbo_enabled = false;
@@ -1596,10 +1596,10 @@ static void ips_remove(struct pci_dev *dev)
if (ips->gpu_turbo_disable)
symbol_put(i915_gpu_turbo_disable);
- rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+ rdmsrq(TURBO_POWER_CURRENT_LIMIT, turbo_override);
turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
- wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
- wrmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
+ wrmsrq(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+ wrmsrq(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
free_irq(ips->irq, ips);
pci_free_irq_vectors(dev);
diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 0fc275e461be..00b1e7c79a3d 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -1061,8 +1061,8 @@ static ssize_t current_value_store(struct kobject *kobj,
ret = -EINVAL;
goto out;
}
- set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->display_name,
- new_setting, tlmi_priv.pwd_admin->signature);
+ set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->name,
+ new_setting, tlmi_priv.pwd_admin->signature);
if (!set_str) {
ret = -ENOMEM;
goto out;
@@ -1092,7 +1092,7 @@ static ssize_t current_value_store(struct kobject *kobj,
goto out;
}
- set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name,
+ set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->name,
new_setting);
if (!set_str) {
ret = -ENOMEM;
@@ -1120,11 +1120,11 @@ static ssize_t current_value_store(struct kobject *kobj,
}
if (auth_str)
- set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->display_name,
- new_setting, auth_str);
+ set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->name,
+ new_setting, auth_str);
else
- set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name,
- new_setting);
+ set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->name,
+ new_setting);
if (!set_str) {
ret = -ENOMEM;
goto out;
@@ -1629,9 +1629,6 @@ static int tlmi_analyze(struct wmi_device *wdev)
continue;
}
- /* It is not allowed to have '/' for file name. Convert it into '\'. */
- strreplace(item, '/', '\\');
-
/* Remove the value part */
strreplace(item, ',', '\0');
@@ -1644,11 +1641,16 @@ static int tlmi_analyze(struct wmi_device *wdev)
}
setting->wdev = wdev;
setting->index = i;
+
+ strscpy(setting->name, item);
+ /* It is not allowed to have '/' for file name. Convert it into '\'. */
+ strreplace(item, '/', '\\');
strscpy(setting->display_name, item);
+
/* If BIOS selections supported, load those */
if (tlmi_priv.can_get_bios_selections) {
- ret = tlmi_get_bios_selections(setting->display_name,
- &setting->possible_values);
+ ret = tlmi_get_bios_selections(setting->name,
+ &setting->possible_values);
if (ret || !setting->possible_values)
pr_info("Error retrieving possible values for %d : %s\n",
i, setting->display_name);
diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h
index a80452482227..9b014644d316 100644
--- a/drivers/platform/x86/think-lmi.h
+++ b/drivers/platform/x86/think-lmi.h
@@ -90,6 +90,7 @@ struct tlmi_attr_setting {
struct kobject kobj;
struct wmi_device *wdev;
int index;
+ char name[TLMI_SETTINGS_MAXLEN];
char display_name[TLMI_SETTINGS_MAXLEN];
char *possible_values;
};
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 5790095c175e..657625dd60a0 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -231,6 +231,7 @@ enum tpacpi_hkey_event_t {
/* Thermal events */
TP_HKEY_EV_ALARM_BAT_HOT = 0x6011, /* battery too hot */
TP_HKEY_EV_ALARM_BAT_XHOT = 0x6012, /* battery critically hot */
+ TP_HKEY_EV_ALARM_BAT_LIM_CHANGE = 0x6013, /* battery charge limit changed*/
TP_HKEY_EV_ALARM_SENSOR_HOT = 0x6021, /* sensor too hot */
TP_HKEY_EV_ALARM_SENSOR_XHOT = 0x6022, /* sensor critically hot */
TP_HKEY_EV_THM_TABLE_CHANGED = 0x6030, /* windows; thermal table changed */
@@ -3777,6 +3778,10 @@ static bool hotkey_notify_6xxx(const u32 hkey, bool *send_acpi_ev)
pr_alert("THERMAL EMERGENCY: battery is extremely hot!\n");
/* recommended action: immediate sleep/hibernate */
break;
+ case TP_HKEY_EV_ALARM_BAT_LIM_CHANGE:
+ pr_debug("Battery Info: battery charge threshold changed\n");
+ /* User changed charging threshold. No action needed */
+ return true;
case TP_HKEY_EV_ALARM_SENSOR_HOT:
pr_crit("THERMAL ALARM: a sensor reports something is too hot!\n");
/* recommended action: warn user through gui, that */
@@ -11478,6 +11483,8 @@ static int __must_check __init get_thinkpad_model_data(
tp->vendor = PCI_VENDOR_ID_IBM;
else if (dmi_name_in_vendors("LENOVO"))
tp->vendor = PCI_VENDOR_ID_LENOVO;
+ else if (dmi_name_in_vendors("NEC"))
+ tp->vendor = PCI_VENDOR_ID_LENOVO;
else
return 0;
diff --git a/drivers/pmdomain/amlogic/meson-ee-pwrc.c b/drivers/pmdomain/amlogic/meson-ee-pwrc.c
index fbb2b4103930..55c8c9f66a1b 100644
--- a/drivers/pmdomain/amlogic/meson-ee-pwrc.c
+++ b/drivers/pmdomain/amlogic/meson-ee-pwrc.c
@@ -69,27 +69,27 @@ struct meson_ee_pwrc_domain_desc {
char *name;
unsigned int reset_names_count;
unsigned int clk_names_count;
- struct meson_ee_pwrc_top_domain *top_pd;
+ const struct meson_ee_pwrc_top_domain *top_pd;
unsigned int mem_pd_count;
- struct meson_ee_pwrc_mem_domain *mem_pd;
+ const struct meson_ee_pwrc_mem_domain *mem_pd;
bool (*is_powered_off)(struct meson_ee_pwrc_domain *pwrc_domain);
};
struct meson_ee_pwrc_domain_data {
unsigned int count;
- struct meson_ee_pwrc_domain_desc *domains;
+ const struct meson_ee_pwrc_domain_desc *domains;
};
/* TOP Power Domains */
-static struct meson_ee_pwrc_top_domain gx_pwrc_vpu = {
+static const struct meson_ee_pwrc_top_domain gx_pwrc_vpu = {
.sleep_reg = GX_AO_RTI_GEN_PWR_SLEEP0,
.sleep_mask = BIT(8),
.iso_reg = GX_AO_RTI_GEN_PWR_SLEEP0,
.iso_mask = BIT(9),
};
-static struct meson_ee_pwrc_top_domain meson8_pwrc_vpu = {
+static const struct meson_ee_pwrc_top_domain meson8_pwrc_vpu = {
.sleep_reg = MESON8_AO_RTI_GEN_PWR_SLEEP0,
.sleep_mask = BIT(8),
.iso_reg = MESON8_AO_RTI_GEN_PWR_SLEEP0,
@@ -104,20 +104,20 @@ static struct meson_ee_pwrc_top_domain meson8_pwrc_vpu = {
.iso_mask = BIT(__bit), \
}
-static struct meson_ee_pwrc_top_domain sm1_pwrc_vpu = SM1_EE_PD(8);
-static struct meson_ee_pwrc_top_domain sm1_pwrc_nna = SM1_EE_PD(16);
-static struct meson_ee_pwrc_top_domain sm1_pwrc_usb = SM1_EE_PD(17);
-static struct meson_ee_pwrc_top_domain sm1_pwrc_pci = SM1_EE_PD(18);
-static struct meson_ee_pwrc_top_domain sm1_pwrc_ge2d = SM1_EE_PD(19);
+static const struct meson_ee_pwrc_top_domain sm1_pwrc_vpu = SM1_EE_PD(8);
+static const struct meson_ee_pwrc_top_domain sm1_pwrc_nna = SM1_EE_PD(16);
+static const struct meson_ee_pwrc_top_domain sm1_pwrc_usb = SM1_EE_PD(17);
+static const struct meson_ee_pwrc_top_domain sm1_pwrc_pci = SM1_EE_PD(18);
+static const struct meson_ee_pwrc_top_domain sm1_pwrc_ge2d = SM1_EE_PD(19);
-static struct meson_ee_pwrc_top_domain g12a_pwrc_nna = {
+static const struct meson_ee_pwrc_top_domain g12a_pwrc_nna = {
.sleep_reg = GX_AO_RTI_GEN_PWR_SLEEP0,
.sleep_mask = BIT(16) | BIT(17),
.iso_reg = GX_AO_RTI_GEN_PWR_ISO0,
.iso_mask = BIT(16) | BIT(17),
};
-static struct meson_ee_pwrc_top_domain g12a_pwrc_isp = {
+static const struct meson_ee_pwrc_top_domain g12a_pwrc_isp = {
.sleep_reg = GX_AO_RTI_GEN_PWR_SLEEP0,
.sleep_mask = BIT(18) | BIT(19),
.iso_reg = GX_AO_RTI_GEN_PWR_ISO0,
@@ -154,39 +154,39 @@ static struct meson_ee_pwrc_top_domain g12a_pwrc_isp = {
{ __reg, BIT(14) }, \
{ __reg, BIT(15) }
-static struct meson_ee_pwrc_mem_domain axg_pwrc_mem_vpu[] = {
+static const struct meson_ee_pwrc_mem_domain axg_pwrc_mem_vpu[] = {
VPU_MEMPD(HHI_VPU_MEM_PD_REG0),
VPU_HHI_MEMPD(HHI_MEM_PD_REG0),
};
-static struct meson_ee_pwrc_mem_domain g12a_pwrc_mem_vpu[] = {
+static const struct meson_ee_pwrc_mem_domain g12a_pwrc_mem_vpu[] = {
VPU_MEMPD(HHI_VPU_MEM_PD_REG0),
VPU_MEMPD(HHI_VPU_MEM_PD_REG1),
VPU_MEMPD(HHI_VPU_MEM_PD_REG2),
VPU_HHI_MEMPD(HHI_MEM_PD_REG0),
};
-static struct meson_ee_pwrc_mem_domain gxbb_pwrc_mem_vpu[] = {
+static const struct meson_ee_pwrc_mem_domain gxbb_pwrc_mem_vpu[] = {
VPU_MEMPD(HHI_VPU_MEM_PD_REG0),
VPU_MEMPD(HHI_VPU_MEM_PD_REG1),
VPU_HHI_MEMPD(HHI_MEM_PD_REG0),
};
-static struct meson_ee_pwrc_mem_domain meson_pwrc_mem_eth[] = {
+static const struct meson_ee_pwrc_mem_domain meson_pwrc_mem_eth[] = {
{ HHI_MEM_PD_REG0, GENMASK(3, 2) },
};
-static struct meson_ee_pwrc_mem_domain meson8_pwrc_audio_dsp_mem[] = {
+static const struct meson_ee_pwrc_mem_domain meson8_pwrc_audio_dsp_mem[] = {
{ HHI_MEM_PD_REG0, GENMASK(1, 0) },
};
-static struct meson_ee_pwrc_mem_domain meson8_pwrc_mem_vpu[] = {
+static const struct meson_ee_pwrc_mem_domain meson8_pwrc_mem_vpu[] = {
VPU_MEMPD(HHI_VPU_MEM_PD_REG0),
VPU_MEMPD(HHI_VPU_MEM_PD_REG1),
VPU_HHI_MEMPD(HHI_MEM_PD_REG0),
};
-static struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_vpu[] = {
+static const struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_vpu[] = {
VPU_MEMPD(HHI_VPU_MEM_PD_REG0),
VPU_MEMPD(HHI_VPU_MEM_PD_REG1),
VPU_MEMPD(HHI_VPU_MEM_PD_REG2),
@@ -198,28 +198,28 @@ static struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_vpu[] = {
VPU_HHI_MEMPD(HHI_MEM_PD_REG0),
};
-static struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_nna[] = {
+static const struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_nna[] = {
{ HHI_NANOQ_MEM_PD_REG0, 0xff },
{ HHI_NANOQ_MEM_PD_REG1, 0xff },
};
-static struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_usb[] = {
+static const struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_usb[] = {
{ HHI_MEM_PD_REG0, GENMASK(31, 30) },
};
-static struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_pcie[] = {
+static const struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_pcie[] = {
{ HHI_MEM_PD_REG0, GENMASK(29, 26) },
};
-static struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_ge2d[] = {
+static const struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_ge2d[] = {
{ HHI_MEM_PD_REG0, GENMASK(25, 18) },
};
-static struct meson_ee_pwrc_mem_domain axg_pwrc_mem_audio[] = {
+static const struct meson_ee_pwrc_mem_domain axg_pwrc_mem_audio[] = {
{ HHI_MEM_PD_REG0, GENMASK(5, 4) },
};
-static struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_audio[] = {
+static const struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_audio[] = {
{ HHI_MEM_PD_REG0, GENMASK(5, 4) },
{ HHI_AUDIO_MEM_PD_REG0, GENMASK(1, 0) },
{ HHI_AUDIO_MEM_PD_REG0, GENMASK(3, 2) },
@@ -235,12 +235,12 @@ static struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_audio[] = {
{ HHI_AUDIO_MEM_PD_REG0, GENMASK(27, 26) },
};
-static struct meson_ee_pwrc_mem_domain g12a_pwrc_mem_nna[] = {
+static const struct meson_ee_pwrc_mem_domain g12a_pwrc_mem_nna[] = {
{ G12A_HHI_NANOQ_MEM_PD_REG0, GENMASK(31, 0) },
{ G12A_HHI_NANOQ_MEM_PD_REG1, GENMASK(31, 0) },
};
-static struct meson_ee_pwrc_mem_domain g12a_pwrc_mem_isp[] = {
+static const struct meson_ee_pwrc_mem_domain g12a_pwrc_mem_isp[] = {
{ G12A_HHI_ISP_MEM_PD_REG0, GENMASK(31, 0) },
{ G12A_HHI_ISP_MEM_PD_REG1, GENMASK(31, 0) },
};
@@ -270,14 +270,14 @@ static struct meson_ee_pwrc_mem_domain g12a_pwrc_mem_isp[] = {
static bool pwrc_ee_is_powered_off(struct meson_ee_pwrc_domain *pwrc_domain);
-static struct meson_ee_pwrc_domain_desc axg_pwrc_domains[] = {
+static const struct meson_ee_pwrc_domain_desc axg_pwrc_domains[] = {
[PWRC_AXG_VPU_ID] = VPU_PD("VPU", &gx_pwrc_vpu, axg_pwrc_mem_vpu,
pwrc_ee_is_powered_off, 5, 2),
[PWRC_AXG_ETHERNET_MEM_ID] = MEM_PD("ETH", meson_pwrc_mem_eth),
[PWRC_AXG_AUDIO_ID] = MEM_PD("AUDIO", axg_pwrc_mem_audio),
};
-static struct meson_ee_pwrc_domain_desc g12a_pwrc_domains[] = {
+static const struct meson_ee_pwrc_domain_desc g12a_pwrc_domains[] = {
[PWRC_G12A_VPU_ID] = VPU_PD("VPU", &gx_pwrc_vpu, g12a_pwrc_mem_vpu,
pwrc_ee_is_powered_off, 11, 2),
[PWRC_G12A_ETH_ID] = MEM_PD("ETH", meson_pwrc_mem_eth),
@@ -287,13 +287,13 @@ static struct meson_ee_pwrc_domain_desc g12a_pwrc_domains[] = {
pwrc_ee_is_powered_off),
};
-static struct meson_ee_pwrc_domain_desc gxbb_pwrc_domains[] = {
+static const struct meson_ee_pwrc_domain_desc gxbb_pwrc_domains[] = {
[PWRC_GXBB_VPU_ID] = VPU_PD("VPU", &gx_pwrc_vpu, gxbb_pwrc_mem_vpu,
pwrc_ee_is_powered_off, 12, 2),
[PWRC_GXBB_ETHERNET_MEM_ID] = MEM_PD("ETH", meson_pwrc_mem_eth),
};
-static struct meson_ee_pwrc_domain_desc meson8_pwrc_domains[] = {
+static const struct meson_ee_pwrc_domain_desc meson8_pwrc_domains[] = {
[PWRC_MESON8_VPU_ID] = VPU_PD("VPU", &meson8_pwrc_vpu,
meson8_pwrc_mem_vpu,
pwrc_ee_is_powered_off, 0, 1),
@@ -303,7 +303,7 @@ static struct meson_ee_pwrc_domain_desc meson8_pwrc_domains[] = {
meson8_pwrc_audio_dsp_mem),
};
-static struct meson_ee_pwrc_domain_desc meson8b_pwrc_domains[] = {
+static const struct meson_ee_pwrc_domain_desc meson8b_pwrc_domains[] = {
[PWRC_MESON8_VPU_ID] = VPU_PD("VPU", &meson8_pwrc_vpu,
meson8_pwrc_mem_vpu,
pwrc_ee_is_powered_off, 11, 1),
@@ -313,7 +313,7 @@ static struct meson_ee_pwrc_domain_desc meson8b_pwrc_domains[] = {
meson8_pwrc_audio_dsp_mem),
};
-static struct meson_ee_pwrc_domain_desc sm1_pwrc_domains[] = {
+static const struct meson_ee_pwrc_domain_desc sm1_pwrc_domains[] = {
[PWRC_SM1_VPU_ID] = VPU_PD("VPU", &sm1_pwrc_vpu, sm1_pwrc_mem_vpu,
pwrc_ee_is_powered_off, 11, 2),
[PWRC_SM1_NNA_ID] = TOP_PD("NNA", &sm1_pwrc_nna, sm1_pwrc_mem_nna,
@@ -576,32 +576,32 @@ static void meson_ee_pwrc_shutdown(struct platform_device *pdev)
}
}
-static struct meson_ee_pwrc_domain_data meson_ee_g12a_pwrc_data = {
+static const struct meson_ee_pwrc_domain_data meson_ee_g12a_pwrc_data = {
.count = ARRAY_SIZE(g12a_pwrc_domains),
.domains = g12a_pwrc_domains,
};
-static struct meson_ee_pwrc_domain_data meson_ee_axg_pwrc_data = {
+static const struct meson_ee_pwrc_domain_data meson_ee_axg_pwrc_data = {
.count = ARRAY_SIZE(axg_pwrc_domains),
.domains = axg_pwrc_domains,
};
-static struct meson_ee_pwrc_domain_data meson_ee_gxbb_pwrc_data = {
+static const struct meson_ee_pwrc_domain_data meson_ee_gxbb_pwrc_data = {
.count = ARRAY_SIZE(gxbb_pwrc_domains),
.domains = gxbb_pwrc_domains,
};
-static struct meson_ee_pwrc_domain_data meson_ee_m8_pwrc_data = {
+static const struct meson_ee_pwrc_domain_data meson_ee_m8_pwrc_data = {
.count = ARRAY_SIZE(meson8_pwrc_domains),
.domains = meson8_pwrc_domains,
};
-static struct meson_ee_pwrc_domain_data meson_ee_m8b_pwrc_data = {
+static const struct meson_ee_pwrc_domain_data meson_ee_m8b_pwrc_data = {
.count = ARRAY_SIZE(meson8b_pwrc_domains),
.domains = meson8b_pwrc_domains,
};
-static struct meson_ee_pwrc_domain_data meson_ee_sm1_pwrc_data = {
+static const struct meson_ee_pwrc_domain_data meson_ee_sm1_pwrc_data = {
.count = ARRAY_SIZE(sm1_pwrc_domains),
.domains = sm1_pwrc_domains,
};
diff --git a/drivers/pmdomain/arm/Kconfig b/drivers/pmdomain/arm/Kconfig
index efa139c34e08..afed10d382ad 100644
--- a/drivers/pmdomain/arm/Kconfig
+++ b/drivers/pmdomain/arm/Kconfig
@@ -2,7 +2,7 @@
config ARM_SCMI_PERF_DOMAIN
tristate "SCMI performance domain driver"
depends on ARM_SCMI_PROTOCOL || (COMPILE_TEST && OF)
- default y
+ default ARM_SCMI_PROTOCOL
select PM_GENERIC_DOMAINS if PM
help
This enables support for the SCMI performance domains which can be
@@ -14,7 +14,7 @@ config ARM_SCMI_PERF_DOMAIN
config ARM_SCMI_POWER_DOMAIN
tristate "SCMI power domain driver"
depends on ARM_SCMI_PROTOCOL || (COMPILE_TEST && OF)
- default y
+ default ARM_SCMI_PROTOCOL
select PM_GENERIC_DOMAINS if PM
help
This enables support for the SCMI power domains which can be
@@ -27,7 +27,7 @@ config ARM_SCMI_POWER_DOMAIN
config ARM_SCPI_POWER_DOMAIN
tristate "SCPI power domain driver"
depends on ARM_SCPI_PROTOCOL || (COMPILE_TEST && OF)
- default y
+ default ARM_SCPI_PROTOCOL
select PM_GENERIC_DOMAINS if PM
help
This enables support for the SCPI power domains which can be
diff --git a/drivers/pmdomain/bcm/bcm2835-power.c b/drivers/pmdomain/bcm/bcm2835-power.c
index d3cd816979ac..f5289fd184d0 100644
--- a/drivers/pmdomain/bcm/bcm2835-power.c
+++ b/drivers/pmdomain/bcm/bcm2835-power.c
@@ -506,18 +506,10 @@ bcm2835_init_power_domain(struct bcm2835_power *power,
struct device *dev = power->dev;
struct bcm2835_power_domain *dom = &power->domains[pd_xlate_index];
- dom->clk = devm_clk_get(dev->parent, name);
- if (IS_ERR(dom->clk)) {
- int ret = PTR_ERR(dom->clk);
-
- if (ret == -EPROBE_DEFER)
- return ret;
-
- /* Some domains don't have a clk, so make sure that we
- * don't deref an error pointer later.
- */
- dom->clk = NULL;
- }
+ dom->clk = devm_clk_get_optional(dev->parent, name);
+ if (IS_ERR(dom->clk))
+ return dev_err_probe(dev, PTR_ERR(dom->clk), "Failed to get clock %s\n",
+ name);
dom->base.name = name;
dom->base.flags = GENPD_FLAG_ACTIVE_WAKEUP;
diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c
index 9b2f28b34bb5..ff5c7f2b69ce 100644
--- a/drivers/pmdomain/core.c
+++ b/drivers/pmdomain/core.c
@@ -304,10 +304,40 @@ static void genpd_update_accounting(struct generic_pm_domain *genpd)
genpd->accounting_time = now;
}
+
+static void genpd_reflect_residency(struct generic_pm_domain *genpd)
+{
+ struct genpd_governor_data *gd = genpd->gd;
+ struct genpd_power_state *state, *next_state;
+ unsigned int state_idx;
+ s64 sleep_ns, target_ns;
+
+ if (!gd || !gd->reflect_residency)
+ return;
+
+ sleep_ns = ktime_to_ns(ktime_sub(ktime_get(), gd->last_enter));
+ state_idx = genpd->state_idx;
+ state = &genpd->states[state_idx];
+ target_ns = state->power_off_latency_ns + state->residency_ns;
+
+ if (sleep_ns < target_ns) {
+ state->above++;
+ } else if (state_idx < (genpd->state_count -1)) {
+ next_state = &genpd->states[state_idx + 1];
+ target_ns = next_state->power_off_latency_ns +
+ next_state->residency_ns;
+
+ if (sleep_ns >= target_ns)
+ state->below++;
+ }
+
+ gd->reflect_residency = false;
+}
#else
static inline void genpd_debug_add(struct generic_pm_domain *genpd) {}
static inline void genpd_debug_remove(struct generic_pm_domain *genpd) {}
static inline void genpd_update_accounting(struct generic_pm_domain *genpd) {}
+static inline void genpd_reflect_residency(struct generic_pm_domain *genpd) {}
#endif
static int _genpd_reeval_performance_state(struct generic_pm_domain *genpd,
@@ -728,6 +758,31 @@ int dev_pm_genpd_rpm_always_on(struct device *dev, bool on)
}
EXPORT_SYMBOL_GPL(dev_pm_genpd_rpm_always_on);
+/**
+ * pm_genpd_inc_rejected() - Adjust the rejected/usage counts for an idle-state.
+ *
+ * @genpd: The PM domain the idle-state belongs to.
+ * @state_idx: The index of the idle-state that failed.
+ *
+ * In some special cases the ->power_off() callback is asynchronously powering
+ * off the PM domain, leading to that it may return zero to indicate success,
+ * even though the actual power-off could fail. To account for this correctly in
+ * the rejected/usage counts for the idle-state statistics, users can call this
+ * function to adjust the values.
+ *
+ * It is assumed that the users guarantee that the genpd doesn't get removed
+ * while this routine is getting called.
+ */
+void pm_genpd_inc_rejected(struct generic_pm_domain *genpd,
+ unsigned int state_idx)
+{
+ genpd_lock(genpd);
+ genpd->states[genpd->state_idx].rejected++;
+ genpd->states[genpd->state_idx].usage--;
+ genpd_unlock(genpd);
+}
+EXPORT_SYMBOL_GPL(pm_genpd_inc_rejected);
+
static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
{
unsigned int state_idx = genpd->state_idx;
@@ -853,31 +908,24 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
* If all of the @genpd's devices have been suspended and all of its subdomains
* have been powered down, remove power from @genpd.
*/
-static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
- unsigned int depth)
+static void genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
+ unsigned int depth)
{
struct pm_domain_data *pdd;
struct gpd_link *link;
unsigned int not_suspended = 0;
- int ret;
/*
* Do not try to power off the domain in the following situations:
- * (1) The domain is already in the "power off" state.
- * (2) System suspend is in progress.
+ * The domain is already in the "power off" state.
+ * System suspend is in progress.
+ * The domain is configured as always on.
+ * The domain has a subdomain being powered on.
*/
- if (!genpd_status_on(genpd) || genpd->prepared_count > 0)
- return 0;
-
- /*
- * Abort power off for the PM domain in the following situations:
- * (1) The domain is configured as always on.
- * (2) When the domain has a subdomain being powered on.
- */
- if (genpd_is_always_on(genpd) ||
- genpd_is_rpm_always_on(genpd) ||
- atomic_read(&genpd->sd_count) > 0)
- return -EBUSY;
+ if (!genpd_status_on(genpd) || genpd->prepared_count > 0 ||
+ genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd) ||
+ atomic_read(&genpd->sd_count) > 0)
+ return;
/*
* The children must be in their deepest (powered-off) states to allow
@@ -888,7 +936,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
list_for_each_entry(link, &genpd->parent_links, parent_node) {
struct generic_pm_domain *child = link->child;
if (child->state_idx < child->state_count - 1)
- return -EBUSY;
+ return;
}
list_for_each_entry(pdd, &genpd->dev_list, list_node) {
@@ -902,15 +950,15 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
/* The device may need its PM domain to stay powered on. */
if (to_gpd_data(pdd)->rpm_always_on)
- return -EBUSY;
+ return;
}
if (not_suspended > 1 || (not_suspended == 1 && !one_dev_on))
- return -EBUSY;
+ return;
if (genpd->gov && genpd->gov->power_down_ok) {
if (!genpd->gov->power_down_ok(&genpd->domain))
- return -EAGAIN;
+ return;
}
/* Default to shallowest state. */
@@ -919,12 +967,11 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
/* Don't power off, if a child domain is waiting to power on. */
if (atomic_read(&genpd->sd_count) > 0)
- return -EBUSY;
+ return;
- ret = _genpd_power_off(genpd, true);
- if (ret) {
+ if (_genpd_power_off(genpd, true)) {
genpd->states[genpd->state_idx].rejected++;
- return ret;
+ return;
}
genpd->status = GENPD_STATE_OFF;
@@ -937,8 +984,6 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
genpd_power_off(link->parent, false, depth + 1);
genpd_unlock(link->parent);
}
-
- return 0;
}
/**
@@ -957,6 +1002,9 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth)
if (genpd_status_on(genpd))
return 0;
+ /* Reflect over the entered idle-states residency for debugfs. */
+ genpd_reflect_residency(genpd);
+
/*
* The list is guaranteed not to change while the loop below is being
* executed, unless one of the parents' .power_on() callbacks fiddles
@@ -1450,7 +1498,7 @@ static int genpd_finish_suspend(struct device *dev,
if (ret)
return ret;
- if (device_wakeup_path(dev) && genpd_is_active_wakeup(genpd))
+ if (device_awake_path(dev) && genpd_is_active_wakeup(genpd))
return 0;
if (genpd->dev_ops.stop && genpd->dev_ops.start &&
@@ -1505,7 +1553,7 @@ static int genpd_finish_resume(struct device *dev,
if (IS_ERR(genpd))
return -EINVAL;
- if (device_wakeup_path(dev) && genpd_is_active_wakeup(genpd))
+ if (device_awake_path(dev) && genpd_is_active_wakeup(genpd))
return resume_noirq(dev);
genpd_lock(genpd);
@@ -2229,8 +2277,10 @@ static int genpd_alloc_data(struct generic_pm_domain *genpd)
return 0;
put:
put_device(&genpd->dev);
- if (genpd->free_states == genpd_free_default_power_state)
+ if (genpd->free_states == genpd_free_default_power_state) {
kfree(genpd->states);
+ genpd->states = NULL;
+ }
free:
if (genpd_is_cpu_domain(genpd))
free_cpumask_var(genpd->cpus);
@@ -2293,6 +2343,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
genpd->provider = NULL;
genpd->device_id = -ENXIO;
genpd->has_provider = false;
+ genpd->opp_table = NULL;
genpd->accounting_time = ktime_get_mono_fast_ns();
genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
genpd->domain.ops.runtime_resume = genpd_runtime_resume;
@@ -2567,7 +2618,7 @@ int of_genpd_add_provider_simple(struct device_node *np,
ret = genpd_add_provider(np, genpd_xlate_simple, genpd);
if (ret) {
- if (!genpd_is_opp_table_fw(genpd) && genpd->set_performance_state) {
+ if (genpd->opp_table) {
dev_pm_opp_put_opp_table(genpd->opp_table);
dev_pm_opp_of_remove_table(&genpd->dev);
}
@@ -2647,7 +2698,7 @@ error:
genpd->provider = NULL;
genpd->has_provider = false;
- if (!genpd_is_opp_table_fw(genpd) && genpd->set_performance_state) {
+ if (genpd->opp_table) {
dev_pm_opp_put_opp_table(genpd->opp_table);
dev_pm_opp_of_remove_table(&genpd->dev);
}
@@ -2679,11 +2730,10 @@ void of_genpd_del_provider(struct device_node *np)
if (gpd->provider == &np->fwnode) {
gpd->has_provider = false;
- if (genpd_is_opp_table_fw(gpd) || !gpd->set_performance_state)
- continue;
-
- dev_pm_opp_put_opp_table(gpd->opp_table);
- dev_pm_opp_of_remove_table(&gpd->dev);
+ if (gpd->opp_table) {
+ dev_pm_opp_put_opp_table(gpd->opp_table);
+ dev_pm_opp_of_remove_table(&gpd->dev);
+ }
}
}
@@ -3126,7 +3176,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
/* Verify that the index is within a valid range. */
num_domains = of_count_phandle_with_args(dev->of_node, "power-domains",
"#power-domain-cells");
- if (index >= num_domains)
+ if (num_domains < 0 || index >= num_domains)
return NULL;
/* Allocate and register device on the genpd bus. */
@@ -3492,7 +3542,7 @@ static int idle_states_show(struct seq_file *s, void *data)
if (ret)
return -ERESTARTSYS;
- seq_puts(s, "State Time Spent(ms) Usage Rejected\n");
+ seq_puts(s, "State Time Spent(ms) Usage Rejected Above Below\n");
for (i = 0; i < genpd->state_count; i++) {
struct genpd_power_state *state = &genpd->states[i];
@@ -3512,9 +3562,10 @@ static int idle_states_show(struct seq_file *s, void *data)
snprintf(state_name, ARRAY_SIZE(state_name), "S%-13d", i);
do_div(idle_time, NSEC_PER_MSEC);
- seq_printf(s, "%-14s %-14llu %-14llu %llu\n",
+ seq_printf(s, "%-14s %-14llu %-10llu %-10llu %-10llu %llu\n",
state->name ?: state_name, idle_time,
- state->usage, state->rejected);
+ state->usage, state->rejected, state->above,
+ state->below);
}
genpd_unlock(genpd);
diff --git a/drivers/pmdomain/governor.c b/drivers/pmdomain/governor.c
index d1a10eeebd16..c1e148657c87 100644
--- a/drivers/pmdomain/governor.c
+++ b/drivers/pmdomain/governor.c
@@ -392,6 +392,8 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
if (idle_duration_ns >= (genpd->states[i].residency_ns +
genpd->states[i].power_off_latency_ns)) {
genpd->state_idx = i;
+ genpd->gd->last_enter = now;
+ genpd->gd->reflect_residency = true;
return true;
}
} while (--i >= 0);
diff --git a/drivers/pmdomain/mediatek/mt6893-pm-domains.h b/drivers/pmdomain/mediatek/mt6893-pm-domains.h
new file mode 100644
index 000000000000..c9e2aa511448
--- /dev/null
+++ b/drivers/pmdomain/mediatek/mt6893-pm-domains.h
@@ -0,0 +1,585 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2025 Collabora Ltd
+ * AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+ */
+
+#ifndef __PMDOMAIN_MEDIATEK_MT6893_PM_DOMAINS_H
+#define __PMDOMAIN_MEDIATEK_MT6893_PM_DOMAINS_H
+
+#include <linux/soc/mediatek/infracfg.h>
+#include <dt-bindings/power/mediatek,mt6893-power.h>
+#include "mtk-pm-domains.h"
+
+#define MT6893_TOP_AXI_PROT_EN_MCU_STA1 0x2e4
+#define MT6893_TOP_AXI_PROT_EN_MCU_SET 0x2c4
+#define MT6893_TOP_AXI_PROT_EN_MCU_CLR 0x2c8
+#define MT6893_TOP_AXI_PROT_EN_VDNR_1_SET 0xba4
+#define MT6893_TOP_AXI_PROT_EN_VDNR_1_CLR 0xba8
+#define MT6893_TOP_AXI_PROT_EN_VDNR_1_STA1 0xbb0
+#define MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_SET 0xbb8
+#define MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_CLR 0xbbc
+#define MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_STA1 0xbc4
+
+#define MT6893_TOP_AXI_PROT_EN_1_MFG1_STEP1 GENMASK(21, 19)
+#define MT6893_TOP_AXI_PROT_EN_2_MFG1_STEP2 GENMASK(6, 5)
+#define MT6893_TOP_AXI_PROT_EN_MFG1_STEP3 GENMASK(22, 21)
+#define MT6893_TOP_AXI_PROT_EN_2_MFG1_STEP4 BIT(7)
+#define MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_MFG1_STEP5 GENMASK(19, 17)
+#define MT6893_TOP_AXI_PROT_EN_MM_VDEC0_STEP1 BIT(24)
+#define MT6893_TOP_AXI_PROT_EN_MM_2_VDEC0_STEP2 BIT(25)
+#define MT6893_TOP_AXI_PROT_EN_MM_VDEC1_STEP1 BIT(6)
+#define MT6893_TOP_AXI_PROT_EN_MM_VDEC1_STEP2 BIT(7)
+#define MT6893_TOP_AXI_PROT_EN_MM_VENC0_STEP1 BIT(26)
+#define MT6893_TOP_AXI_PROT_EN_MM_2_VENC0_STEP2 BIT(0)
+#define MT6893_TOP_AXI_PROT_EN_MM_VENC0_STEP3 BIT(27)
+#define MT6893_TOP_AXI_PROT_EN_MM_2_VENC0_STEP4 BIT(1)
+#define MT6893_TOP_AXI_PROT_EN_MM_VENC1_STEP1 GENMASK(30, 28)
+#define MT6893_TOP_AXI_PROT_EN_MM_VENC1_STEP2 GENMASK(31, 29)
+#define MT6893_TOP_AXI_PROT_EN_MDP_STEP1 BIT(10)
+#define MT6893_TOP_AXI_PROT_EN_MM_MDP_STEP2 (BIT(2) | BIT(4) | BIT(6) | \
+ BIT(8) | BIT(18) | BIT(22) | \
+ BIT(28) | BIT(30))
+#define MT6893_TOP_AXI_PROT_EN_MM_2_MDP_STEP3 (BIT(0) | BIT(2) | BIT(4) | \
+ BIT(6) | BIT(8))
+#define MT6893_TOP_AXI_PROT_EN_MDP_STEP4 BIT(23)
+#define MT6893_TOP_AXI_PROT_EN_MM_MDP_STEP5 (BIT(3) | BIT(5) | BIT(7) | \
+ BIT(9) | BIT(19) | BIT(23) | \
+ BIT(29) | BIT(31))
+#define MT6893_TOP_AXI_PROT_EN_MM_2_MDP_STEP6 (BIT(1) | BIT(7) | BIT(9) | BIT(11))
+#define MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_MDP_STEP7 BIT(20)
+#define MT6893_TOP_AXI_PROT_EN_MM_DISP_STEP1 (BIT(0) | BIT(6) | BIT(8) | \
+ BIT(10) | BIT(12) | BIT(14) | \
+ BIT(16) | BIT(20) | BIT(24) | \
+ BIT(26))
+#define MT6893_TOP_AXI_PROT_EN_DISP_STEP2 BIT(6)
+#define MT6893_TOP_AXI_PROT_EN_MM_DISP_STEP3 (BIT(1) | BIT(7) | BIT(9) | \
+ BIT(15) | BIT(17) | BIT(21) | \
+ BIT(25) | BIT(27))
+#define MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_DISP_STEP4 BIT(21)
+#define MT6893_TOP_AXI_PROT_EN_2_ADSP BIT(3)
+#define MT6893_TOP_AXI_PROT_EN_2_CAM_STEP1 BIT(1)
+#define MT6893_TOP_AXI_PROT_EN_MM_CAM_STEP2 (BIT(0) | BIT(2) | BIT(4))
+#define MT6893_TOP_AXI_PROT_EN_1_CAM_STEP3 BIT(22)
+#define MT6893_TOP_AXI_PROT_EN_MM_CAM_STEP4 (BIT(1) | BIT(3) | BIT(5))
+#define MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWA_STEP1 BIT(18)
+#define MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWA_STEP2 BIT(19)
+#define MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWB_STEP1 BIT(20)
+#define MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWB_STEP2 BIT(21)
+#define MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWC_STEP1 BIT(22)
+#define MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWC_STEP2 BIT(23)
+
+/*
+ * MT6893 Power Domain (MTCMOS) support
+ *
+ * The register layout for this IP is very similar to MT8192 so where possible
+ * the same definitions are reused to avoid duplication.
+ * Where the bus protection bits are also the same, the entire set is reused.
+ */
+static const struct scpsys_domain_data scpsys_domain_data_mt6893[] = {
+ [MT6893_POWER_DOMAIN_CONN] = {
+ .name = "conn",
+ .sta_mask = BIT(1),
+ .ctl_offs = 0x304,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = 0,
+ .sram_pdn_ack_bits = 0,
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT8192_TOP_AXI_PROT_EN_CONN,
+ MT8192_TOP_AXI_PROT_EN_SET,
+ MT8192_TOP_AXI_PROT_EN_CLR,
+ MT8192_TOP_AXI_PROT_EN_STA1),
+ BUS_PROT_WR(INFRA,
+ MT8192_TOP_AXI_PROT_EN_CONN_2ND,
+ MT8192_TOP_AXI_PROT_EN_SET,
+ MT8192_TOP_AXI_PROT_EN_CLR,
+ MT8192_TOP_AXI_PROT_EN_STA1),
+ BUS_PROT_WR(INFRA,
+ MT8192_TOP_AXI_PROT_EN_1_CONN,
+ MT8192_TOP_AXI_PROT_EN_1_SET,
+ MT8192_TOP_AXI_PROT_EN_1_CLR,
+ MT8192_TOP_AXI_PROT_EN_1_STA1),
+ },
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF,
+ },
+ [MT6893_POWER_DOMAIN_MFG0] = {
+ .name = "mfg0",
+ .sta_mask = BIT(2),
+ .ctl_offs = 0x308,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF | MTK_SCPD_DOMAIN_SUPPLY,
+ },
+ [MT6893_POWER_DOMAIN_MFG1] = {
+ .name = "mfg1",
+ .sta_mask = BIT(3),
+ .ctl_offs = 0x30c,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_1_MFG1_STEP1,
+ MT8192_TOP_AXI_PROT_EN_1_SET,
+ MT8192_TOP_AXI_PROT_EN_1_CLR,
+ MT8192_TOP_AXI_PROT_EN_1_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_2_MFG1_STEP2,
+ MT8192_TOP_AXI_PROT_EN_2_SET,
+ MT8192_TOP_AXI_PROT_EN_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_2_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MFG1_STEP3,
+ MT8192_TOP_AXI_PROT_EN_SET,
+ MT8192_TOP_AXI_PROT_EN_CLR,
+ MT8192_TOP_AXI_PROT_EN_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_2_MFG1_STEP4,
+ MT8192_TOP_AXI_PROT_EN_2_SET,
+ MT8192_TOP_AXI_PROT_EN_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_2_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_MFG1_STEP5,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_SET,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_CLR,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_STA1),
+ },
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF | MTK_SCPD_DOMAIN_SUPPLY,
+ },
+ [MT6893_POWER_DOMAIN_MFG2] = {
+ .name = "mfg2",
+ .sta_mask = BIT(4),
+ .ctl_offs = 0x310,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF,
+ },
+ [MT6893_POWER_DOMAIN_MFG3] = {
+ .name = "mfg3",
+ .sta_mask = BIT(5),
+ .ctl_offs = 0x314,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF,
+ },
+ [MT6893_POWER_DOMAIN_MFG4] = {
+ .name = "mfg4",
+ .sta_mask = BIT(6),
+ .ctl_offs = 0x318,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF,
+ },
+ [MT6893_POWER_DOMAIN_MFG5] = {
+ .name = "mfg5",
+ .sta_mask = BIT(7),
+ .ctl_offs = 0x31c,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF,
+ },
+ [MT6893_POWER_DOMAIN_MFG6] = {
+ .name = "mfg6",
+ .sta_mask = BIT(8),
+ .ctl_offs = 0x320,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF,
+ },
+ [MT6893_POWER_DOMAIN_ISP] = {
+ .name = "isp",
+ .sta_mask = BIT(12),
+ .ctl_offs = 0x330,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT8192_TOP_AXI_PROT_EN_MM_2_ISP,
+ MT8192_TOP_AXI_PROT_EN_MM_2_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_2_STA1),
+ BUS_PROT_WR(INFRA,
+ MT8192_TOP_AXI_PROT_EN_MM_2_ISP_2ND,
+ MT8192_TOP_AXI_PROT_EN_MM_2_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_2_STA1),
+ },
+ },
+ [MT6893_POWER_DOMAIN_ISP2] = {
+ .name = "isp2",
+ .sta_mask = BIT(13),
+ .ctl_offs = 0x334,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT8192_TOP_AXI_PROT_EN_MM_ISP2,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT8192_TOP_AXI_PROT_EN_MM_ISP2_2ND,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ },
+ },
+ [MT6893_POWER_DOMAIN_IPE] = {
+ .name = "ipe",
+ .sta_mask = BIT(14),
+ .ctl_offs = 0x338,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT8192_TOP_AXI_PROT_EN_MM_IPE,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT8192_TOP_AXI_PROT_EN_MM_IPE_2ND,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ },
+ },
+ [MT6893_POWER_DOMAIN_VDEC0] = {
+ .name = "vdec0",
+ .sta_mask = BIT(15),
+ .ctl_offs = 0x33c,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_VDEC0_STEP1,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_2_VDEC0_STEP2,
+ MT8192_TOP_AXI_PROT_EN_MM_2_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ },
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF,
+ },
+ [MT6893_POWER_DOMAIN_VDEC1] = {
+ .name = "vdec1",
+ .sta_mask = BIT(16),
+ .ctl_offs = 0x340,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_VDEC1_STEP1,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_VDEC1_STEP2,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ },
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF,
+ },
+ [MT6893_POWER_DOMAIN_VENC0] = {
+ .name = "venc0",
+ .sta_mask = BIT(17),
+ .ctl_offs = 0x344,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_VENC0_STEP1,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_2_VENC0_STEP2,
+ MT8192_TOP_AXI_PROT_EN_MM_2_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_2_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_VENC0_STEP3,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_2_VENC0_STEP4,
+ MT8192_TOP_AXI_PROT_EN_MM_2_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_2_STA1),
+ },
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF,
+ },
+ [MT6893_POWER_DOMAIN_VENC1] = {
+ .name = "venc1",
+ .sta_mask = BIT(18),
+ .ctl_offs = 0x348,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_VENC1_STEP1,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_VENC1_STEP2,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ },
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF,
+ },
+ [MT6893_POWER_DOMAIN_MDP] = {
+ .name = "mdp",
+ .sta_mask = BIT(19),
+ .ctl_offs = 0x34c,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MDP_STEP1,
+ MT8192_TOP_AXI_PROT_EN_SET,
+ MT8192_TOP_AXI_PROT_EN_CLR,
+ MT8192_TOP_AXI_PROT_EN_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_MDP_STEP2,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_2_MDP_STEP3,
+ MT8192_TOP_AXI_PROT_EN_MM_2_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_2_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MDP_STEP4,
+ MT8192_TOP_AXI_PROT_EN_SET,
+ MT8192_TOP_AXI_PROT_EN_CLR,
+ MT8192_TOP_AXI_PROT_EN_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_MDP_STEP5,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_2_MDP_STEP6,
+ MT8192_TOP_AXI_PROT_EN_MM_2_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_2_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_MDP_STEP7,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_SET,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_CLR,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_STA1),
+ },
+ },
+ [MT6893_POWER_DOMAIN_DISP] = {
+ .name = "disp",
+ .sta_mask = BIT(20),
+ .ctl_offs = 0x350,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_DISP_STEP1,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_DISP_STEP2,
+ MT8192_TOP_AXI_PROT_EN_SET,
+ MT8192_TOP_AXI_PROT_EN_CLR,
+ MT8192_TOP_AXI_PROT_EN_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_DISP_STEP3,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_DISP_STEP4,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_SET,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_CLR,
+ MT6893_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_STA1),
+ },
+ },
+ [MT6893_POWER_DOMAIN_AUDIO] = {
+ .name = "audio",
+ .sta_mask = BIT(21),
+ .ctl_offs = 0x354,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT8192_TOP_AXI_PROT_EN_2_AUDIO,
+ MT8192_TOP_AXI_PROT_EN_2_SET,
+ MT8192_TOP_AXI_PROT_EN_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_2_STA1),
+ },
+ },
+ [MT6893_POWER_DOMAIN_ADSP] = {
+ .name = "audio",
+ .sta_mask = BIT(22),
+ .ctl_offs = 0x358,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(9),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_2_ADSP,
+ MT8192_TOP_AXI_PROT_EN_2_SET,
+ MT8192_TOP_AXI_PROT_EN_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_2_STA1),
+ },
+ },
+ [MT6893_POWER_DOMAIN_CAM] = {
+ .name = "cam",
+ .sta_mask = BIT(23),
+ .ctl_offs = 0x35c,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_2_CAM_STEP1,
+ MT8192_TOP_AXI_PROT_EN_2_SET,
+ MT8192_TOP_AXI_PROT_EN_2_CLR,
+ MT8192_TOP_AXI_PROT_EN_2_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_CAM_STEP2,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_1_CAM_STEP3,
+ MT8192_TOP_AXI_PROT_EN_1_SET,
+ MT8192_TOP_AXI_PROT_EN_1_CLR,
+ MT8192_TOP_AXI_PROT_EN_1_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_CAM_STEP4,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ },
+ },
+ [MT6893_POWER_DOMAIN_CAM_RAWA] = {
+ .name = "cam_rawa",
+ .sta_mask = BIT(24),
+ .ctl_offs = 0x360,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWA_STEP1,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWA_STEP2,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ },
+ },
+ [MT6893_POWER_DOMAIN_CAM_RAWB] = {
+ .name = "cam_rawb",
+ .sta_mask = BIT(25),
+ .ctl_offs = 0x364,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWB_STEP1,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWB_STEP2,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ },
+ },
+ [MT6893_POWER_DOMAIN_CAM_RAWC] = {
+ .name = "cam_rawc",
+ .sta_mask = BIT(26),
+ .ctl_offs = 0x368,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .bp_cfg = {
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWC_STEP1,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ BUS_PROT_WR(INFRA,
+ MT6893_TOP_AXI_PROT_EN_MM_CAM_RAWC_STEP2,
+ MT8192_TOP_AXI_PROT_EN_MM_SET,
+ MT8192_TOP_AXI_PROT_EN_MM_CLR,
+ MT8192_TOP_AXI_PROT_EN_MM_STA1),
+ },
+ },
+ [MT6893_POWER_DOMAIN_DP_TX] = {
+ .name = "dp_tx",
+ .sta_mask = BIT(27),
+ .ctl_offs = 0x3ac,
+ .pwr_sta_offs = 0x16c,
+ .pwr_sta2nd_offs = 0x170,
+ .sram_pdn_bits = BIT(8),
+ .sram_pdn_ack_bits = BIT(12),
+ .caps = MTK_SCPD_KEEP_DEFAULT_OFF,
+ },
+};
+
+static const struct scpsys_soc_data mt6893_scpsys_data = {
+ .domains_data = scpsys_domain_data_mt6893,
+ .num_domains = ARRAY_SIZE(scpsys_domain_data_mt6893),
+};
+
+#endif /* __PMDOMAIN_MEDIATEK_MT6893_PM_DOMAINS_H */
diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.c b/drivers/pmdomain/mediatek/mtk-pm-domains.c
index b866b006af69..a58ed7e2d9a4 100644
--- a/drivers/pmdomain/mediatek/mtk-pm-domains.c
+++ b/drivers/pmdomain/mediatek/mtk-pm-domains.c
@@ -18,6 +18,7 @@
#include "mt6735-pm-domains.h"
#include "mt6795-pm-domains.h"
+#include "mt6893-pm-domains.h"
#include "mt8167-pm-domains.h"
#include "mt8173-pm-domains.h"
#include "mt8183-pm-domains.h"
@@ -397,20 +398,26 @@ generic_pm_domain *scpsys_add_one_domain(struct scpsys *scpsys, struct device_no
pd->infracfg = syscon_regmap_lookup_by_phandle_optional(node, "mediatek,infracfg");
if (IS_ERR(pd->infracfg))
- return ERR_CAST(pd->infracfg);
+ return dev_err_cast_probe(scpsys->dev, pd->infracfg,
+ "%pOF: failed to get infracfg regmap\n",
+ node);
smi_node = of_parse_phandle(node, "mediatek,smi", 0);
if (smi_node) {
pd->smi = device_node_to_regmap(smi_node);
of_node_put(smi_node);
if (IS_ERR(pd->smi))
- return ERR_CAST(pd->smi);
+ return dev_err_cast_probe(scpsys->dev, pd->smi,
+ "%pOF: failed to get SMI regmap\n",
+ node);
}
if (MTK_SCPD_CAPS(pd, MTK_SCPD_HAS_INFRA_NAO)) {
pd->infracfg_nao = syscon_regmap_lookup_by_phandle(node, "mediatek,infracfg-nao");
if (IS_ERR(pd->infracfg_nao))
- return ERR_CAST(pd->infracfg_nao);
+ return dev_err_cast_probe(scpsys->dev, pd->infracfg_nao,
+ "%pOF: failed to get infracfg-nao regmap\n",
+ node);
} else {
pd->infracfg_nao = NULL;
}
@@ -618,6 +625,10 @@ static const struct of_device_id scpsys_of_match[] = {
.data = &mt6795_scpsys_data,
},
{
+ .compatible = "mediatek,mt6893-power-controller",
+ .data = &mt6893_scpsys_data,
+ },
+ {
.compatible = "mediatek,mt8167-power-controller",
.data = &mt8167_scpsys_data,
},
diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.h b/drivers/pmdomain/mediatek/mtk-pm-domains.h
index 2ac96804b985..7085fa2976e9 100644
--- a/drivers/pmdomain/mediatek/mtk-pm-domains.h
+++ b/drivers/pmdomain/mediatek/mtk-pm-domains.h
@@ -44,7 +44,7 @@
#define PWR_STATUS_AUDIO BIT(24)
#define PWR_STATUS_USB BIT(25)
-#define SPM_MAX_BUS_PROT_DATA 6
+#define SPM_MAX_BUS_PROT_DATA 7
enum scpsys_bus_prot_flags {
BUS_PROT_REG_UPDATE = BIT(1),
diff --git a/drivers/pmdomain/qcom/rpmhpd.c b/drivers/pmdomain/qcom/rpmhpd.c
index dfd0f80154e4..078323b85b56 100644
--- a/drivers/pmdomain/qcom/rpmhpd.c
+++ b/drivers/pmdomain/qcom/rpmhpd.c
@@ -360,6 +360,21 @@ static const struct rpmhpd_desc sdx75_desc = {
.num_pds = ARRAY_SIZE(sdx75_rpmhpds),
};
+/* SM4450 RPMH powerdomains */
+static struct rpmhpd *sm4450_rpmhpds[] = {
+ [RPMHPD_CX] = &cx,
+ [RPMHPD_CX_AO] = &cx_ao,
+ [RPMHPD_EBI] = &ebi,
+ [RPMHPD_LMX] = &lmx,
+ [RPMHPD_MSS] = &mss,
+ [RPMHPD_MX] = &mx,
+};
+
+static const struct rpmhpd_desc sm4450_desc = {
+ .rpmhpds = sm4450_rpmhpds,
+ .num_pds = ARRAY_SIZE(sm4450_rpmhpds),
+};
+
/* SM6350 RPMH powerdomains */
static struct rpmhpd *sm6350_rpmhpds[] = {
[SM6350_CX] = &cx_w_mx_parent,
@@ -724,6 +739,7 @@ static const struct of_device_id rpmhpd_match_table[] = {
{ .compatible = "qcom,sdx55-rpmhpd", .data = &sdx55_desc},
{ .compatible = "qcom,sdx65-rpmhpd", .data = &sdx65_desc},
{ .compatible = "qcom,sdx75-rpmhpd", .data = &sdx75_desc},
+ { .compatible = "qcom,sm4450-rpmhpd", .data = &sm4450_desc },
{ .compatible = "qcom,sm6350-rpmhpd", .data = &sm6350_desc },
{ .compatible = "qcom,sm7150-rpmhpd", .data = &sm7150_desc },
{ .compatible = "qcom,sm8150-rpmhpd", .data = &sm8150_desc },
diff --git a/drivers/pmdomain/renesas/rcar-gen4-sysc.c b/drivers/pmdomain/renesas/rcar-gen4-sysc.c
index 66409cff2083..e001b5c25bed 100644
--- a/drivers/pmdomain/renesas/rcar-gen4-sysc.c
+++ b/drivers/pmdomain/renesas/rcar-gen4-sysc.c
@@ -338,11 +338,6 @@ static int __init rcar_gen4_sysc_pd_init(void)
struct rcar_gen4_sysc_pd *pd;
size_t n;
- if (!area->name) {
- /* Skip NULLified area */
- continue;
- }
-
n = strlen(area->name) + 1;
pd = kzalloc(sizeof(*pd) + n, GFP_KERNEL);
if (!pd) {
diff --git a/drivers/pmdomain/renesas/rcar-sysc.c b/drivers/pmdomain/renesas/rcar-sysc.c
index dce1a6d37e80..047495f54e8a 100644
--- a/drivers/pmdomain/renesas/rcar-sysc.c
+++ b/drivers/pmdomain/renesas/rcar-sysc.c
@@ -396,11 +396,6 @@ static int __init rcar_sysc_pd_init(void)
struct rcar_sysc_pd *pd;
size_t n;
- if (!area->name) {
- /* Skip NULLified area */
- continue;
- }
-
n = strlen(area->name) + 1;
pd = kzalloc(sizeof(*pd) + n, GFP_KERNEL);
if (!pd) {
diff --git a/drivers/pmdomain/rockchip/pm-domains.c b/drivers/pmdomain/rockchip/pm-domains.c
index 03bcf79a461f..4cce407bb1eb 100644
--- a/drivers/pmdomain/rockchip/pm-domains.c
+++ b/drivers/pmdomain/rockchip/pm-domains.c
@@ -2,7 +2,7 @@
/*
* Rockchip Generic power domain support.
*
- * Copyright (c) 2015 ROCKCHIP, Co. Ltd.
+ * Copyright (c) 2015 Rockchip Electronics Co., Ltd.
*/
#include <linux/arm-smccc.h>
@@ -35,6 +35,7 @@
#include <dt-bindings/power/rk3366-power.h>
#include <dt-bindings/power/rk3368-power.h>
#include <dt-bindings/power/rk3399-power.h>
+#include <dt-bindings/power/rockchip,rk3562-power.h>
#include <dt-bindings/power/rk3568-power.h>
#include <dt-bindings/power/rockchip,rk3576-power.h>
#include <dt-bindings/power/rk3588-power.h>
@@ -135,6 +136,20 @@ struct rockchip_pmu {
.active_wakeup = wakeup, \
}
+#define DOMAIN_M_G_SD(_name, pwr, status, req, idle, ack, g_mask, mem, wakeup, keepon) \
+{ \
+ .name = _name, \
+ .pwr_w_mask = (pwr) << 16, \
+ .pwr_mask = (pwr), \
+ .status_mask = (status), \
+ .req_w_mask = (req) << 16, \
+ .req_mask = (req), \
+ .idle_mask = (idle), \
+ .ack_mask = (ack), \
+ .clk_ungate_mask = (g_mask), \
+ .active_wakeup = wakeup, \
+}
+
#define DOMAIN_M_O_R(_name, p_offset, pwr, status, m_offset, m_status, r_status, r_offset, req, idle, ack, wakeup, regulator) \
{ \
.name = _name, \
@@ -201,6 +216,9 @@ struct rockchip_pmu {
#define DOMAIN_RK3399(name, pwr, status, req, wakeup) \
DOMAIN(name, pwr, status, req, req, req, wakeup)
+#define DOMAIN_RK3562(name, pwr, req, g_mask, mem, wakeup) \
+ DOMAIN_M_G_SD(name, pwr, pwr, req, req, req, g_mask, mem, wakeup, false)
+
#define DOMAIN_RK3568(name, pwr, req, wakeup) \
DOMAIN_M(name, pwr, pwr, req, req, req, wakeup)
@@ -1197,6 +1215,18 @@ static const struct rockchip_domain_info rk3399_pm_domains[] = {
[RK3399_PD_SDIOAUDIO] = DOMAIN_RK3399("sdioaudio", BIT(31), BIT(31), BIT(29), true),
};
+static const struct rockchip_domain_info rk3562_pm_domains[] = {
+ /* name pwr req g_mask mem wakeup */
+ [RK3562_PD_GPU] = DOMAIN_RK3562("gpu", BIT(0), BIT(1), BIT(1), 0, false),
+ [RK3562_PD_NPU] = DOMAIN_RK3562("npu", BIT(1), BIT(2), BIT(2), 0, false),
+ [RK3562_PD_VDPU] = DOMAIN_RK3562("vdpu", BIT(2), BIT(6), BIT(6), 0, false),
+ [RK3562_PD_VEPU] = DOMAIN_RK3562("vepu", BIT(3), BIT(7), BIT(7) | BIT(3), 0, false),
+ [RK3562_PD_RGA] = DOMAIN_RK3562("rga", BIT(4), BIT(5), BIT(5) | BIT(4), 0, false),
+ [RK3562_PD_VI] = DOMAIN_RK3562("vi", BIT(5), BIT(3), BIT(3), 0, false),
+ [RK3562_PD_VO] = DOMAIN_RK3562("vo", BIT(6), BIT(4), BIT(4), 16, false),
+ [RK3562_PD_PHP] = DOMAIN_RK3562("php", BIT(7), BIT(8), BIT(8), 0, false),
+};
+
static const struct rockchip_domain_info rk3568_pm_domains[] = {
[RK3568_PD_NPU] = DOMAIN_RK3568("npu", BIT(1), BIT(2), false),
[RK3568_PD_GPU] = DOMAIN_RK3568("gpu", BIT(0), BIT(1), false),
@@ -1398,6 +1428,18 @@ static const struct rockchip_pmu_info rk3399_pmu = {
.domain_info = rk3399_pm_domains,
};
+static const struct rockchip_pmu_info rk3562_pmu = {
+ .pwr_offset = 0x210,
+ .status_offset = 0x230,
+ .req_offset = 0x110,
+ .idle_offset = 0x128,
+ .ack_offset = 0x120,
+ .clk_ungate_offset = 0x140,
+
+ .num_domains = ARRAY_SIZE(rk3562_pm_domains),
+ .domain_info = rk3562_pm_domains,
+};
+
static const struct rockchip_pmu_info rk3568_pmu = {
.pwr_offset = 0xa0,
.status_offset = 0x98,
@@ -1497,6 +1539,10 @@ static const struct of_device_id rockchip_pm_domain_dt_match[] = {
.data = (void *)&rk3399_pmu,
},
{
+ .compatible = "rockchip,rk3562-power-controller",
+ .data = (void *)&rk3562_pmu,
+ },
+ {
.compatible = "rockchip,rk3568-power-controller",
.data = (void *)&rk3568_pmu,
},
diff --git a/drivers/pmdomain/sunxi/Kconfig b/drivers/pmdomain/sunxi/Kconfig
index 17781bf8d86d..43eecb3ea981 100644
--- a/drivers/pmdomain/sunxi/Kconfig
+++ b/drivers/pmdomain/sunxi/Kconfig
@@ -8,3 +8,13 @@ config SUN20I_PPU
help
Say y to enable the PPU power domain driver. This saves power
when certain peripherals, such as the video engine, are idle.
+
+config SUN50I_H6_PRCM_PPU
+ tristate "Allwinner H6 PRCM power domain driver"
+ depends on ARCH_SUNXI || COMPILE_TEST
+ depends on PM
+ select PM_GENERIC_DOMAINS
+ help
+ Say y to enable the Allwinner H6/H616 PRCM power domain driver.
+ This is required to enable the Mali GPU in the H616 SoC, it is
+ optional for the H6.
diff --git a/drivers/pmdomain/sunxi/Makefile b/drivers/pmdomain/sunxi/Makefile
index ec1d7a2fb21d..c1343e123759 100644
--- a/drivers/pmdomain/sunxi/Makefile
+++ b/drivers/pmdomain/sunxi/Makefile
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SUN20I_PPU) += sun20i-ppu.o
+obj-$(CONFIG_SUN50I_H6_PRCM_PPU) += sun50i-h6-prcm-ppu.o
diff --git a/drivers/pmdomain/sunxi/sun50i-h6-prcm-ppu.c b/drivers/pmdomain/sunxi/sun50i-h6-prcm-ppu.c
new file mode 100644
index 000000000000..d59644499dfe
--- /dev/null
+++ b/drivers/pmdomain/sunxi/sun50i-h6-prcm-ppu.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) Arm Ltd. 2024
+ *
+ * Allwinner H6/H616 PRCM power domain driver.
+ * This covers a few registers inside the PRCM (Power Reset Clock Management)
+ * block that control some power rails, most prominently for the Mali GPU.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/reset.h>
+
+/*
+ * The PRCM block covers multiple devices, starting with some clocks,
+ * then followed by the power rails.
+ * The clocks are covered by a different driver, so this driver's MMIO range
+ * starts later in the PRCM MMIO frame, not at the beginning of it.
+ * To keep the register offsets consistent with other PRCM documentation,
+ * express the registers relative to the beginning of the whole PRCM, and
+ * subtract the PPU offset this driver is bound to.
+ */
+#define PD_H6_PPU_OFFSET 0x250
+#define PD_H6_VDD_SYS_REG 0x250
+#define PD_H616_ANA_VDD_GATE BIT(4)
+#define PD_H6_CPUS_VDD_GATE BIT(3)
+#define PD_H6_AVCC_VDD_GATE BIT(2)
+#define PD_H6_GPU_REG 0x254
+#define PD_H6_GPU_GATE BIT(0)
+
+struct sun50i_h6_ppu_pd {
+ struct generic_pm_domain genpd;
+ void __iomem *reg;
+ u32 gate_mask;
+ bool negated;
+};
+
+#define FLAG_PPU_ALWAYS_ON BIT(0)
+#define FLAG_PPU_NEGATED BIT(1)
+
+struct sun50i_h6_ppu_desc {
+ const char *name;
+ u32 offset;
+ u32 mask;
+ unsigned int flags;
+};
+
+static const struct sun50i_h6_ppu_desc sun50i_h6_ppus[] = {
+ { "AVCC", PD_H6_VDD_SYS_REG, PD_H6_AVCC_VDD_GATE },
+ { "CPUS", PD_H6_VDD_SYS_REG, PD_H6_CPUS_VDD_GATE },
+ { "GPU", PD_H6_GPU_REG, PD_H6_GPU_GATE },
+};
+static const struct sun50i_h6_ppu_desc sun50i_h616_ppus[] = {
+ { "PLL", PD_H6_VDD_SYS_REG, PD_H6_AVCC_VDD_GATE,
+ FLAG_PPU_ALWAYS_ON | FLAG_PPU_NEGATED },
+ { "ANA", PD_H6_VDD_SYS_REG, PD_H616_ANA_VDD_GATE, FLAG_PPU_ALWAYS_ON },
+ { "GPU", PD_H6_GPU_REG, PD_H6_GPU_GATE, FLAG_PPU_NEGATED },
+};
+
+struct sun50i_h6_ppu_data {
+ const struct sun50i_h6_ppu_desc *descs;
+ int nr_domains;
+};
+
+static const struct sun50i_h6_ppu_data sun50i_h6_ppu_data = {
+ .descs = sun50i_h6_ppus,
+ .nr_domains = ARRAY_SIZE(sun50i_h6_ppus),
+};
+
+static const struct sun50i_h6_ppu_data sun50i_h616_ppu_data = {
+ .descs = sun50i_h616_ppus,
+ .nr_domains = ARRAY_SIZE(sun50i_h616_ppus),
+};
+
+#define to_sun50i_h6_ppu_pd(_genpd) \
+ container_of(_genpd, struct sun50i_h6_ppu_pd, genpd)
+
+static bool sun50i_h6_ppu_power_status(const struct sun50i_h6_ppu_pd *pd)
+{
+ bool bit = readl(pd->reg) & pd->gate_mask;
+
+ return bit ^ pd->negated;
+}
+
+static int sun50i_h6_ppu_pd_set_power(const struct sun50i_h6_ppu_pd *pd,
+ bool set_bit)
+{
+ u32 reg = readl(pd->reg);
+
+ if (set_bit)
+ writel(reg | pd->gate_mask, pd->reg);
+ else
+ writel(reg & ~pd->gate_mask, pd->reg);
+
+ return 0;
+}
+
+static int sun50i_h6_ppu_pd_power_on(struct generic_pm_domain *genpd)
+{
+ const struct sun50i_h6_ppu_pd *pd = to_sun50i_h6_ppu_pd(genpd);
+
+ return sun50i_h6_ppu_pd_set_power(pd, !pd->negated);
+}
+
+static int sun50i_h6_ppu_pd_power_off(struct generic_pm_domain *genpd)
+{
+ const struct sun50i_h6_ppu_pd *pd = to_sun50i_h6_ppu_pd(genpd);
+
+ return sun50i_h6_ppu_pd_set_power(pd, pd->negated);
+}
+
+static int sun50i_h6_ppu_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct genpd_onecell_data *ppu;
+ struct sun50i_h6_ppu_pd *pds;
+ const struct sun50i_h6_ppu_data *data;
+ void __iomem *base;
+ int ret, i;
+
+ data = of_device_get_match_data(dev);
+ if (!data)
+ return -EINVAL;
+
+ pds = devm_kcalloc(dev, data->nr_domains, sizeof(*pds), GFP_KERNEL);
+ if (!pds)
+ return -ENOMEM;
+
+ ppu = devm_kzalloc(dev, sizeof(*ppu), GFP_KERNEL);
+ if (!ppu)
+ return -ENOMEM;
+
+ ppu->num_domains = data->nr_domains;
+ ppu->domains = devm_kcalloc(dev, data->nr_domains,
+ sizeof(*ppu->domains), GFP_KERNEL);
+ if (!ppu->domains)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, ppu);
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ for (i = 0; i < data->nr_domains; i++) {
+ struct sun50i_h6_ppu_pd *pd = &pds[i];
+ const struct sun50i_h6_ppu_desc *desc = &data->descs[i];
+
+ pd->genpd.name = desc->name;
+ pd->genpd.power_off = sun50i_h6_ppu_pd_power_off;
+ pd->genpd.power_on = sun50i_h6_ppu_pd_power_on;
+ if (desc->flags & FLAG_PPU_ALWAYS_ON)
+ pd->genpd.flags = GENPD_FLAG_ALWAYS_ON;
+ pd->negated = !!(desc->flags & FLAG_PPU_NEGATED);
+ pd->reg = base + desc->offset - PD_H6_PPU_OFFSET;
+ pd->gate_mask = desc->mask;
+
+ ret = pm_genpd_init(&pd->genpd, NULL,
+ !sun50i_h6_ppu_power_status(pd));
+ if (ret) {
+ dev_warn(dev, "Failed to add %s power domain: %d\n",
+ desc->name, ret);
+ goto out_remove_pds;
+ }
+ ppu->domains[i] = &pd->genpd;
+ }
+
+ ret = of_genpd_add_provider_onecell(dev->of_node, ppu);
+ if (!ret)
+ return 0;
+
+ dev_warn(dev, "Failed to add provider: %d\n", ret);
+out_remove_pds:
+ for (i--; i >= 0; i--)
+ pm_genpd_remove(&pds[i].genpd);
+
+ return ret;
+}
+
+static const struct of_device_id sun50i_h6_ppu_of_match[] = {
+ { .compatible = "allwinner,sun50i-h6-prcm-ppu",
+ .data = &sun50i_h6_ppu_data },
+ { .compatible = "allwinner,sun50i-h616-prcm-ppu",
+ .data = &sun50i_h616_ppu_data },
+ { }
+};
+MODULE_DEVICE_TABLE(of, sun50i_h6_ppu_of_match);
+
+static struct platform_driver sun50i_h6_ppu_driver = {
+ .probe = sun50i_h6_ppu_probe,
+ .driver = {
+ .name = "sun50i-h6-prcm-ppu",
+ .of_match_table = sun50i_h6_ppu_of_match,
+ /* Power domains cannot be removed while they are in use. */
+ .suppress_bind_attrs = true,
+ },
+};
+module_platform_driver(sun50i_h6_ppu_driver);
+
+MODULE_AUTHOR("Andre Przywara <andre.przywara@arm.com>");
+MODULE_DESCRIPTION("Allwinner H6 PRCM power domain driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pmdomain/ti/omap_prm.c b/drivers/pmdomain/ti/omap_prm.c
index 79d165331d8c..5142f064bf5c 100644
--- a/drivers/pmdomain/ti/omap_prm.c
+++ b/drivers/pmdomain/ti/omap_prm.c
@@ -18,7 +18,9 @@
#include <linux/pm_domain.h>
#include <linux/reset-controller.h>
#include <linux/delay.h>
-
+#if IS_ENABLED(CONFIG_SUSPEND)
+#include <linux/suspend.h>
+#endif
#include <linux/platform_data/ti-prm.h>
enum omap_prm_domain_mode {
@@ -88,6 +90,7 @@ struct omap_reset_data {
#define OMAP_PRM_HAS_RSTST BIT(1)
#define OMAP_PRM_HAS_NO_CLKDM BIT(2)
#define OMAP_PRM_RET_WHEN_IDLE BIT(3)
+#define OMAP_PRM_ON_WHEN_STANDBY BIT(4)
#define OMAP_PRM_HAS_RESETS (OMAP_PRM_HAS_RSTCTRL | OMAP_PRM_HAS_RSTST)
@@ -404,7 +407,8 @@ static const struct omap_prm_data am3_prm_data[] = {
.name = "per", .base = 0x44e00c00,
.pwrstctrl = 0xc, .pwrstst = 0x8, .dmap = &omap_prm_noinact,
.rstctrl = 0x0, .rstmap = am3_per_rst_map,
- .flags = OMAP_PRM_HAS_RSTCTRL, .clkdm_name = "pruss_ocp"
+ .flags = OMAP_PRM_HAS_RSTCTRL | OMAP_PRM_ON_WHEN_STANDBY,
+ .clkdm_name = "pruss_ocp",
},
{
.name = "wkup", .base = 0x44e00d00,
diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c
index 6085a1471de2..6e1d4bfd28ac 100644
--- a/drivers/pnp/quirks.c
+++ b/drivers/pnp/quirks.c
@@ -290,7 +290,7 @@ static void quirk_system_pci_resources(struct pnp_dev *dev)
#ifdef CONFIG_AMD_NB
-#include <asm/amd_nb.h>
+#include <asm/amd/nb.h>
static void quirk_amd_mmconfig_area(struct pnp_dev *dev)
{
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index 60bf0ca64cf3..e71f0af4e378 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -216,6 +216,19 @@ config POWER_RESET_ST
help
Reset support for STMicroelectronics boards.
+config POWER_RESET_TORADEX_EC
+ tristate "Toradex Embedded Controller power-off and reset driver"
+ depends on I2C
+ select REGMAP_I2C
+ help
+ This driver supports power-off and reset for SMARC Toradex SoMs,
+ for example the SMARC iMX8MP and SMARC iMX95, using Toradex
+ Embedded Controller (EC).
+
+ Say Y here if you have a Toradex SMARC SoM.
+
+ If unsure, say N.
+
config POWER_RESET_TPS65086
bool "TPS65086 restart driver"
depends on MFD_TPS65086
diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile
index 10782d32e1da..1b9b63a1a873 100644
--- a/drivers/power/reset/Makefile
+++ b/drivers/power/reset/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_POWER_RESET_QNAP) += qnap-poweroff.o
obj-$(CONFIG_POWER_RESET_REGULATOR) += regulator-poweroff.o
obj-$(CONFIG_POWER_RESET_RESTART) += restart-poweroff.o
obj-$(CONFIG_POWER_RESET_ST) += st-poweroff.o
+obj-$(CONFIG_POWER_RESET_TORADEX_EC) += tdx-ec-poweroff.o
obj-$(CONFIG_POWER_RESET_TPS65086) += tps65086-restart.o
obj-$(CONFIG_POWER_RESET_VERSATILE) += arm-versatile-reboot.o
obj-$(CONFIG_POWER_RESET_VEXPRESS) += vexpress-poweroff.o
diff --git a/drivers/power/reset/at91-reset.c b/drivers/power/reset/at91-reset.c
index 036b18a1f90f..511f5a8f8961 100644
--- a/drivers/power/reset/at91-reset.c
+++ b/drivers/power/reset/at91-reset.c
@@ -129,12 +129,11 @@ static int at91_reset(struct notifier_block *this, unsigned long mode,
" str %4, [%0, %6]\n\t"
/* Disable SDRAM1 accesses */
"1: tst %1, #0\n\t"
- " beq 2f\n\t"
" strne %3, [%1, #" __stringify(AT91_DDRSDRC_RTR) "]\n\t"
/* Power down SDRAM1 */
" strne %4, [%1, %6]\n\t"
/* Reset CPU */
- "2: str %5, [%2, #" __stringify(AT91_RSTC_CR) "]\n\t"
+ " str %5, [%2, #" __stringify(AT91_RSTC_CR) "]\n\t"
" b .\n\t"
:
@@ -145,7 +144,7 @@ static int at91_reset(struct notifier_block *this, unsigned long mode,
"r" cpu_to_le32(AT91_DDRSDRC_LPCB_POWER_DOWN),
"r" (reset->data->reset_args),
"r" (reset->ramc_lpr)
- : "r4");
+ );
return NOTIFY_DONE;
}
diff --git a/drivers/power/reset/reboot-mode.c b/drivers/power/reset/reboot-mode.c
index b4076b10b893..fba53f638da0 100644
--- a/drivers/power/reset/reboot-mode.c
+++ b/drivers/power/reset/reboot-mode.c
@@ -23,20 +23,29 @@ static unsigned int get_reboot_mode_magic(struct reboot_mode_driver *reboot,
const char *cmd)
{
const char *normal = "normal";
- int magic = 0;
struct mode_info *info;
+ char cmd_[110];
if (!cmd)
cmd = normal;
- list_for_each_entry(info, &reboot->head, list) {
- if (!strcmp(info->mode, cmd)) {
- magic = info->magic;
- break;
- }
- }
+ list_for_each_entry(info, &reboot->head, list)
+ if (!strcmp(info->mode, cmd))
+ return info->magic;
+
+ /* try to match again, replacing characters impossible in DT */
+ if (strscpy(cmd_, cmd, sizeof(cmd_)) == -E2BIG)
+ return 0;
- return magic;
+ strreplace(cmd_, ' ', '-');
+ strreplace(cmd_, ',', '-');
+ strreplace(cmd_, '/', '-');
+
+ list_for_each_entry(info, &reboot->head, list)
+ if (!strcmp(info->mode, cmd_))
+ return info->magic;
+
+ return 0;
}
static int reboot_mode_notify(struct notifier_block *this,
diff --git a/drivers/power/reset/syscon-reboot.c b/drivers/power/reset/syscon-reboot.c
index d623d77e657e..2e2cf5f62d73 100644
--- a/drivers/power/reset/syscon-reboot.c
+++ b/drivers/power/reset/syscon-reboot.c
@@ -14,11 +14,24 @@
#include <linux/reboot.h>
#include <linux/regmap.h>
-struct syscon_reboot_context {
- struct regmap *map;
+struct reboot_mode_bits {
u32 offset;
- u32 value;
u32 mask;
+ u32 value;
+ bool valid;
+};
+
+struct reboot_data {
+ struct reboot_mode_bits mode_bits[REBOOT_SOFT + 1];
+ struct reboot_mode_bits catchall;
+};
+
+struct syscon_reboot_context {
+ struct regmap *map;
+
+ const struct reboot_data *rd; /* from of match data, if any */
+ struct reboot_mode_bits catchall; /* from DT */
+
struct notifier_block restart_handler;
};
@@ -28,9 +41,21 @@ static int syscon_restart_handle(struct notifier_block *this,
struct syscon_reboot_context *ctx =
container_of(this, struct syscon_reboot_context,
restart_handler);
+ const struct reboot_mode_bits *mode_bits;
+
+ if (ctx->rd) {
+ if (mode < ARRAY_SIZE(ctx->rd->mode_bits) &&
+ ctx->rd->mode_bits[mode].valid)
+ mode_bits = &ctx->rd->mode_bits[mode];
+ else
+ mode_bits = &ctx->rd->catchall;
+ } else {
+ mode_bits = &ctx->catchall;
+ }
/* Issue the reboot */
- regmap_update_bits(ctx->map, ctx->offset, ctx->mask, ctx->value);
+ regmap_update_bits(ctx->map, mode_bits->offset, mode_bits->mask,
+ mode_bits->value);
mdelay(1000);
@@ -42,7 +67,6 @@ static int syscon_reboot_probe(struct platform_device *pdev)
{
struct syscon_reboot_context *ctx;
struct device *dev = &pdev->dev;
- int mask_err, value_err;
int priority;
int err;
@@ -60,24 +84,33 @@ static int syscon_reboot_probe(struct platform_device *pdev)
if (of_property_read_s32(pdev->dev.of_node, "priority", &priority))
priority = 192;
- if (of_property_read_u32(pdev->dev.of_node, "offset", &ctx->offset))
- if (of_property_read_u32(pdev->dev.of_node, "reg", &ctx->offset))
- return -EINVAL;
+ ctx->rd = of_device_get_match_data(dev);
+ if (!ctx->rd) {
+ int mask_err, value_err;
- value_err = of_property_read_u32(pdev->dev.of_node, "value", &ctx->value);
- mask_err = of_property_read_u32(pdev->dev.of_node, "mask", &ctx->mask);
- if (value_err && mask_err) {
- dev_err(dev, "unable to read 'value' and 'mask'");
- return -EINVAL;
- }
+ if (of_property_read_u32(pdev->dev.of_node, "offset",
+ &ctx->catchall.offset) &&
+ of_property_read_u32(pdev->dev.of_node, "reg",
+ &ctx->catchall.offset))
+ return -EINVAL;
- if (value_err) {
- /* support old binding */
- ctx->value = ctx->mask;
- ctx->mask = 0xFFFFFFFF;
- } else if (mask_err) {
- /* support value without mask*/
- ctx->mask = 0xFFFFFFFF;
+ value_err = of_property_read_u32(pdev->dev.of_node, "value",
+ &ctx->catchall.value);
+ mask_err = of_property_read_u32(pdev->dev.of_node, "mask",
+ &ctx->catchall.mask);
+ if (value_err && mask_err) {
+ dev_err(dev, "unable to read 'value' and 'mask'");
+ return -EINVAL;
+ }
+
+ if (value_err) {
+ /* support old binding */
+ ctx->catchall.value = ctx->catchall.mask;
+ ctx->catchall.mask = 0xFFFFFFFF;
+ } else if (mask_err) {
+ /* support value without mask */
+ ctx->catchall.mask = 0xFFFFFFFF;
+ }
}
ctx->restart_handler.notifier_call = syscon_restart_handle;
@@ -89,7 +122,30 @@ static int syscon_reboot_probe(struct platform_device *pdev)
return err;
}
+static const struct reboot_data gs101_reboot_data = {
+ .mode_bits = {
+ [REBOOT_WARM] = {
+ .offset = 0x3a00, /* SYSTEM_CONFIGURATION */
+ .mask = 0x00000002, /* SWRESET_SYSTEM */
+ .value = 0x00000002,
+ .valid = true,
+ },
+ [REBOOT_SOFT] = {
+ .offset = 0x3a00, /* SYSTEM_CONFIGURATION */
+ .mask = 0x00000002, /* SWRESET_SYSTEM */
+ .value = 0x00000002,
+ .valid = true,
+ },
+ },
+ .catchall = {
+ .offset = 0x3e9c, /* PAD_CTRL_PWR_HOLD */
+ .mask = 0x00000100,
+ .value = 0x00000000,
+ },
+};
+
static const struct of_device_id syscon_reboot_of_match[] = {
+ { .compatible = "google,gs101-reboot", .data = &gs101_reboot_data },
{ .compatible = "syscon-reboot" },
{}
};
diff --git a/drivers/power/reset/tdx-ec-poweroff.c b/drivers/power/reset/tdx-ec-poweroff.c
new file mode 100644
index 000000000000..3302a127fce5
--- /dev/null
+++ b/drivers/power/reset/tdx-ec-poweroff.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Toradex Embedded Controller driver
+ *
+ * Copyright (C) 2025 Toradex
+ *
+ * Author: Emanuele Ghidoli <emanuele.ghidoli@toradex.com>
+ */
+
+#include <linux/array_size.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/regmap.h>
+#include <linux/types.h>
+
+#define EC_CHIP_ID_REG 0x00
+#define EC_CHIP_ID_SMARC_IMX95 0x11
+#define EC_CHIP_ID_SMARC_IMX8MP 0x12
+
+#define EC_VERSION_REG_MAJOR 0x01
+#define EC_VERSION_REG_MINOR 0x02
+#define EC_ID_VERSION_LEN 3
+
+#define EC_CMD_REG 0xD0
+#define EC_CMD_POWEROFF 0x01
+#define EC_CMD_RESET 0x02
+
+#define EC_REG_MAX 0xD0
+
+static const struct regmap_range volatile_ranges[] = {
+ regmap_reg_range(EC_CMD_REG, EC_CMD_REG),
+};
+
+static const struct regmap_access_table volatile_table = {
+ .yes_ranges = volatile_ranges,
+ .n_yes_ranges = ARRAY_SIZE(volatile_ranges),
+};
+
+static const struct regmap_range read_ranges[] = {
+ regmap_reg_range(EC_CHIP_ID_REG, EC_VERSION_REG_MINOR),
+};
+
+static const struct regmap_access_table read_table = {
+ .yes_ranges = read_ranges,
+ .n_yes_ranges = ARRAY_SIZE(read_ranges),
+};
+
+static const struct regmap_config regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = EC_REG_MAX,
+ .cache_type = REGCACHE_RBTREE,
+ .rd_table = &read_table,
+ .volatile_table = &volatile_table,
+};
+
+static int tdx_ec_cmd(struct regmap *regmap, u8 cmd)
+{
+ int err = regmap_write(regmap, EC_CMD_REG, cmd);
+
+ if (err)
+ dev_err(regmap_get_device(regmap), "Failed to send command 0x%02X: %d\n", cmd, err);
+
+ return err;
+}
+
+static int tdx_ec_power_off(struct sys_off_data *data)
+{
+ struct regmap *regmap = data->cb_data;
+ int err;
+
+ err = tdx_ec_cmd(regmap, EC_CMD_POWEROFF);
+
+ return err ? NOTIFY_BAD : NOTIFY_DONE;
+}
+
+static int tdx_ec_restart(struct sys_off_data *data)
+{
+ struct regmap *regmap = data->cb_data;
+ int err;
+
+ err = tdx_ec_cmd(regmap, EC_CMD_RESET);
+
+ return err ? NOTIFY_BAD : NOTIFY_DONE;
+}
+
+static int tdx_ec_register_power_off_restart(struct device *dev, struct regmap *regmap)
+{
+ int err;
+
+ err = devm_register_sys_off_handler(dev, SYS_OFF_MODE_RESTART,
+ SYS_OFF_PRIO_FIRMWARE,
+ tdx_ec_restart, regmap);
+ if (err)
+ return err;
+
+ return devm_register_sys_off_handler(dev, SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_FIRMWARE,
+ tdx_ec_power_off, regmap);
+}
+
+static int tdx_ec_probe(struct i2c_client *client)
+{
+ struct device *dev = &client->dev;
+ u8 reg_val[EC_ID_VERSION_LEN];
+ struct regmap *regmap;
+ int err;
+
+ regmap = devm_regmap_init_i2c(client, &regmap_config);
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+ err = regmap_bulk_read(regmap, EC_CHIP_ID_REG, &reg_val, EC_ID_VERSION_LEN);
+ if (err)
+ return dev_err_probe(dev, err,
+ "Cannot read id and version registers\n");
+
+ dev_info(dev, "Toradex Embedded Controller id %x - Firmware %u.%u\n",
+ reg_val[0], reg_val[1], reg_val[2]);
+
+ err = tdx_ec_register_power_off_restart(dev, regmap);
+ if (err)
+ return dev_err_probe(dev, err,
+ "Cannot register system restart handler\n");
+
+ return 0;
+}
+
+static const struct of_device_id __maybe_unused of_tdx_ec_match[] = {
+ { .compatible = "toradex,smarc-ec" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, of_tdx_ec_match);
+
+static struct i2c_driver tdx_ec_driver = {
+ .probe = tdx_ec_probe,
+ .driver = {
+ .name = "toradex-smarc-ec",
+ .of_match_table = of_tdx_ec_match,
+ },
+};
+module_i2c_driver(tdx_ec_driver);
+
+MODULE_AUTHOR("Emanuele Ghidoli <emanuele.ghidoli@toradex.com>");
+MODULE_DESCRIPTION("Toradex SMARC Embedded Controller driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index 8dbd39afa43c..79ddb006e2da 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -107,6 +107,18 @@ config BATTERY_ACT8945A
Say Y here to enable support for power supply provided by
Active-semi ActivePath ACT8945A charger.
+config BATTERY_CHAGALL
+ tristate "Pegatron Chagall battery driver"
+ depends on I2C
+ depends on LEDS_CLASS
+ help
+ Say Y to include support for Cypress CG7153AM IC based battery
+ fuel gauge with custom firmware found in Pegatron Chagall based
+ tablet line.
+
+ This driver can also be built as a module. If so, the module will be
+ called chagall-battery.
+
config BATTERY_CPCAP
tristate "Motorola CPCAP PMIC battery driver"
depends on MFD_CPCAP && IIO
@@ -161,6 +173,16 @@ config BATTERY_DS2782
Say Y here to enable support for the DS2782/DS2786 standalone battery
gas-gauge.
+config BATTERY_HUAWEI_GAOKUN
+ tristate "Huawei Matebook E Go power supply"
+ depends on EC_HUAWEI_GAOKUN
+ help
+ This driver enables battery and adapter support on the Huawei Matebook
+ E Go, which is a sc8280xp-based 2-in-1 tablet.
+
+ To compile the driver as a module, choose M here: the module will be
+ called huawei-gaokun-battery.
+
config BATTERY_LEGO_EV3
tristate "LEGO MINDSTORMS EV3 battery"
depends on OF && IIO && GPIOLIB && (ARCH_DAVINCI_DA850 || COMPILE_TEST)
@@ -595,6 +617,21 @@ config CHARGER_MAX77976
This driver can also be built as a module. If so, the module will be
called max77976_charger.
+config CHARGER_MAX8971
+ tristate "Maxim MAX8971 battery charger driver"
+ depends on I2C
+ depends on EXTCON || !EXTCON
+ select REGMAP_I2C
+ help
+ The MAX8971 is a compact, high-frequency, high-efficiency switch-mode
+ charger for a one-cell lithium-ion (Li+) battery. It delivers up to
+ 1.55A of current to the battery from inputs up to 7.5V and withstands
+ transient inputs up to 22V.
+
+ Say Y to enable support for the Maxim MAX8971 battery charger.
+ This driver can also be built as a module. If so, the module will be
+ called max8971_charger.
+
config CHARGER_MAX8997
tristate "Maxim MAX8997/MAX8966 PMIC battery charger driver"
depends on MFD_MAX8997 && REGULATOR_MAX8997
diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile
index 61677be328b0..4f5f8e3507f8 100644
--- a/drivers/power/supply/Makefile
+++ b/drivers/power/supply/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_CHARGER_ADP5061) += adp5061.o
obj-$(CONFIG_BATTERY_ACT8945A) += act8945a_charger.o
obj-$(CONFIG_BATTERY_AXP20X) += axp20x_battery.o
obj-$(CONFIG_CHARGER_AXP20X) += axp20x_ac_power.o
+obj-$(CONFIG_BATTERY_CHAGALL) += chagall-battery.o
obj-$(CONFIG_BATTERY_CPCAP) += cpcap-battery.o
obj-$(CONFIG_BATTERY_CW2015) += cw2015_battery.o
obj-$(CONFIG_BATTERY_DS2760) += ds2760_battery.o
@@ -31,6 +32,7 @@ obj-$(CONFIG_BATTERY_DS2781) += ds2781_battery.o
obj-$(CONFIG_BATTERY_DS2782) += ds2782_battery.o
obj-$(CONFIG_BATTERY_GAUGE_LTC2941) += ltc2941-battery-gauge.o
obj-$(CONFIG_BATTERY_GOLDFISH) += goldfish_battery.o
+obj-$(CONFIG_BATTERY_HUAWEI_GAOKUN) += huawei-gaokun-battery.o
obj-$(CONFIG_BATTERY_LEGO_EV3) += lego_ev3_battery.o
obj-$(CONFIG_BATTERY_LENOVO_YOGA_C630) += lenovo_yoga_c630_battery.o
obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o
@@ -81,6 +83,7 @@ obj-$(CONFIG_CHARGER_MAX77650) += max77650-charger.o
obj-$(CONFIG_CHARGER_MAX77693) += max77693_charger.o
obj-$(CONFIG_CHARGER_MAX77705) += max77705_charger.o
obj-$(CONFIG_CHARGER_MAX77976) += max77976_charger.o
+obj-$(CONFIG_CHARGER_MAX8971) += max8971_charger.o
obj-$(CONFIG_CHARGER_MAX8997) += max8997_charger.o
obj-$(CONFIG_CHARGER_MAX8998) += max8998_charger.o
obj-$(CONFIG_CHARGER_MP2629) += mp2629_charger.o
diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c
index f0d97ab45bd8..1867beadd7af 100644
--- a/drivers/power/supply/bq24190_charger.c
+++ b/drivers/power/supply/bq24190_charger.c
@@ -207,6 +207,7 @@ enum bq24190_chip {
BQ24190,
BQ24192,
BQ24192i,
+ BQ24193,
BQ24196,
BQ24296,
BQ24297,
@@ -2021,6 +2022,17 @@ static const struct bq24190_chip_info bq24190_chip_info_tbl[] = {
.get_ntc_status = bq24190_charger_get_ntc_status,
.set_otg_vbus = bq24190_set_otg_vbus,
},
+ [BQ24193] = {
+ .ichg_array_size = ARRAY_SIZE(bq24190_ccc_ichg_values),
+#ifdef CONFIG_REGULATOR
+ .vbus_desc = &bq24190_vbus_desc,
+#endif
+ .check_chip = bq24190_check_chip,
+ .set_chg_config = bq24190_battery_set_chg_config,
+ .ntc_fault_mask = BQ24190_REG_F_NTC_FAULT_MASK,
+ .get_ntc_status = bq24190_charger_get_ntc_status,
+ .set_otg_vbus = bq24190_set_otg_vbus,
+ },
[BQ24196] = {
.ichg_array_size = ARRAY_SIZE(bq24190_ccc_ichg_values),
#ifdef CONFIG_REGULATOR
@@ -2308,6 +2320,7 @@ static const struct i2c_device_id bq24190_i2c_ids[] = {
{ "bq24190", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24190] },
{ "bq24192", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24192] },
{ "bq24192i", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24192i] },
+ { "bq24193", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24193] },
{ "bq24196", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24196] },
{ "bq24296", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24296] },
{ "bq24297", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24297] },
@@ -2319,6 +2332,7 @@ static const struct of_device_id bq24190_of_match[] = {
{ .compatible = "ti,bq24190", .data = &bq24190_chip_info_tbl[BQ24190] },
{ .compatible = "ti,bq24192", .data = &bq24190_chip_info_tbl[BQ24192] },
{ .compatible = "ti,bq24192i", .data = &bq24190_chip_info_tbl[BQ24192i] },
+ { .compatible = "ti,bq24193", .data = &bq24190_chip_info_tbl[BQ24193] },
{ .compatible = "ti,bq24196", .data = &bq24190_chip_info_tbl[BQ24196] },
{ .compatible = "ti,bq24296", .data = &bq24190_chip_info_tbl[BQ24296] },
{ .compatible = "ti,bq24297", .data = &bq24190_chip_info_tbl[BQ24297] },
diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index 2f31d750a4c1..93dcebbe1141 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -2131,7 +2131,7 @@ static int bq27xxx_battery_get_property(struct power_supply *psy,
mutex_unlock(&di->lock);
if (psp != POWER_SUPPLY_PROP_PRESENT && di->cache.flags < 0)
- return -ENODEV;
+ return di->cache.flags;
switch (psp) {
case POWER_SUPPLY_PROP_STATUS:
diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
index ba0d22d90429..868e95f0887e 100644
--- a/drivers/power/supply/bq27xxx_battery_i2c.c
+++ b/drivers/power/supply/bq27xxx_battery_i2c.c
@@ -6,6 +6,7 @@
* Andrew F. Davis <afd@ti.com>
*/
+#include <linux/delay.h>
#include <linux/i2c.h>
#include <linux/interrupt.h>
#include <linux/module.h>
@@ -31,6 +32,7 @@ static int bq27xxx_battery_i2c_read(struct bq27xxx_device_info *di, u8 reg,
struct i2c_msg msg[2];
u8 data[2];
int ret;
+ int retry = 0;
if (!client->adapter)
return -ENODEV;
@@ -47,7 +49,16 @@ static int bq27xxx_battery_i2c_read(struct bq27xxx_device_info *di, u8 reg,
else
msg[1].len = 2;
- ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+ do {
+ ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+ if (ret == -EBUSY && ++retry < 3) {
+ /* sleep 10 milliseconds when busy */
+ usleep_range(10000, 11000);
+ continue;
+ }
+ break;
+ } while (1);
+
if (ret < 0)
return ret;
diff --git a/drivers/power/supply/chagall-battery.c b/drivers/power/supply/chagall-battery.c
new file mode 100644
index 000000000000..8b05422aca6f
--- /dev/null
+++ b/drivers/power/supply/chagall-battery.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/array_size.h>
+#include <linux/delay.h>
+#include <linux/devm-helpers.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/leds.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/power_supply.h>
+#include <linux/regmap.h>
+
+#define CHAGALL_REG_LED_AMBER 0x60
+#define CHAGALL_REG_LED_WHITE 0x70
+#define CHAGALL_REG_BATTERY_TEMPERATURE 0xa2
+#define CHAGALL_REG_BATTERY_VOLTAGE 0xa4
+#define CHAGALL_REG_BATTERY_CURRENT 0xa6
+#define CHAGALL_REG_BATTERY_CAPACITY 0xa8
+#define CHAGALL_REG_BATTERY_CHARGING_CURRENT 0xaa
+#define CHAGALL_REG_BATTERY_CHARGING_VOLTAGE 0xac
+#define CHAGALL_REG_BATTERY_STATUS 0xae
+#define BATTERY_DISCHARGING BIT(6)
+#define BATTERY_FULL_CHARGED BIT(5)
+#define BATTERY_FULL_DISCHARGED BIT(4)
+#define CHAGALL_REG_BATTERY_REMAIN_CAPACITY 0xb0
+#define CHAGALL_REG_BATTERY_FULL_CAPACITY 0xb2
+#define CHAGALL_REG_MAX_COUNT 0xb4
+
+#define CHAGALL_BATTERY_DATA_REFRESH 5000
+#define TEMP_CELSIUS_OFFSET 2731
+
+static const struct regmap_config chagall_battery_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = CHAGALL_REG_MAX_COUNT,
+ .reg_format_endian = REGMAP_ENDIAN_LITTLE,
+ .val_format_endian = REGMAP_ENDIAN_LITTLE,
+};
+
+struct chagall_battery_data {
+ struct regmap *regmap;
+ struct led_classdev amber_led;
+ struct led_classdev white_led;
+ struct power_supply *battery;
+ struct delayed_work poll_work;
+ u16 last_state;
+};
+
+static void chagall_led_set_brightness_amber(struct led_classdev *led,
+ enum led_brightness brightness)
+{
+ struct chagall_battery_data *cg =
+ container_of(led, struct chagall_battery_data, amber_led);
+
+ regmap_write(cg->regmap, CHAGALL_REG_LED_AMBER, brightness);
+}
+
+static void chagall_led_set_brightness_white(struct led_classdev *led,
+ enum led_brightness brightness)
+{
+ struct chagall_battery_data *cg =
+ container_of(led, struct chagall_battery_data, white_led);
+
+ regmap_write(cg->regmap, CHAGALL_REG_LED_WHITE, brightness);
+}
+
+static const enum power_supply_property chagall_battery_properties[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_PRESENT,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
+ POWER_SUPPLY_PROP_VOLTAGE_MAX,
+ POWER_SUPPLY_PROP_CURRENT_NOW,
+ POWER_SUPPLY_PROP_CURRENT_MAX,
+ POWER_SUPPLY_PROP_CAPACITY,
+ POWER_SUPPLY_PROP_TEMP,
+ POWER_SUPPLY_PROP_CHARGE_FULL,
+ POWER_SUPPLY_PROP_CHARGE_NOW,
+};
+
+static const unsigned int chagall_battery_prop_offs[] = {
+ [POWER_SUPPLY_PROP_STATUS] = CHAGALL_REG_BATTERY_STATUS,
+ [POWER_SUPPLY_PROP_VOLTAGE_NOW] = CHAGALL_REG_BATTERY_VOLTAGE,
+ [POWER_SUPPLY_PROP_VOLTAGE_MAX] = CHAGALL_REG_BATTERY_CHARGING_VOLTAGE,
+ [POWER_SUPPLY_PROP_CURRENT_NOW] = CHAGALL_REG_BATTERY_CURRENT,
+ [POWER_SUPPLY_PROP_CURRENT_MAX] = CHAGALL_REG_BATTERY_CHARGING_CURRENT,
+ [POWER_SUPPLY_PROP_CAPACITY] = CHAGALL_REG_BATTERY_CAPACITY,
+ [POWER_SUPPLY_PROP_TEMP] = CHAGALL_REG_BATTERY_TEMPERATURE,
+ [POWER_SUPPLY_PROP_CHARGE_FULL] = CHAGALL_REG_BATTERY_FULL_CAPACITY,
+ [POWER_SUPPLY_PROP_CHARGE_NOW] = CHAGALL_REG_BATTERY_REMAIN_CAPACITY,
+};
+
+static int chagall_battery_get_value(struct chagall_battery_data *cg,
+ enum power_supply_property psp, u32 *val)
+{
+ if (psp >= ARRAY_SIZE(chagall_battery_prop_offs))
+ return -EINVAL;
+ if (!chagall_battery_prop_offs[psp])
+ return -EINVAL;
+
+ /* Battery data is stored in 2 consecutive registers with little-endian */
+ return regmap_bulk_read(cg->regmap, chagall_battery_prop_offs[psp], val, 2);
+}
+
+static int chagall_battery_get_status(u32 status_reg)
+{
+ if (status_reg & BATTERY_FULL_CHARGED)
+ return POWER_SUPPLY_STATUS_FULL;
+ else if (status_reg & BATTERY_DISCHARGING)
+ return POWER_SUPPLY_STATUS_DISCHARGING;
+ else
+ return POWER_SUPPLY_STATUS_CHARGING;
+}
+
+static int chagall_battery_get_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct chagall_battery_data *cg = power_supply_get_drvdata(psy);
+ int ret;
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_PRESENT:
+ val->intval = 1;
+ break;
+
+ default:
+ ret = chagall_battery_get_value(cg, psp, &val->intval);
+ if (ret)
+ return ret;
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_TEMP:
+ val->intval -= TEMP_CELSIUS_OFFSET;
+ break;
+
+ case POWER_SUPPLY_PROP_VOLTAGE_MAX:
+ case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+ case POWER_SUPPLY_PROP_CURRENT_MAX:
+ case POWER_SUPPLY_PROP_CURRENT_NOW:
+ case POWER_SUPPLY_PROP_CHARGE_FULL:
+ case POWER_SUPPLY_PROP_CHARGE_NOW:
+ val->intval *= 1000;
+ break;
+
+ case POWER_SUPPLY_PROP_STATUS:
+ val->intval = chagall_battery_get_status(val->intval);
+ break;
+
+ default:
+ break;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+static void chagall_battery_poll_work(struct work_struct *work)
+{
+ struct chagall_battery_data *cg =
+ container_of(work, struct chagall_battery_data, poll_work.work);
+ u32 state;
+ int ret;
+
+ ret = chagall_battery_get_value(cg, POWER_SUPPLY_PROP_STATUS, &state);
+ if (ret)
+ return;
+
+ state = chagall_battery_get_status(state);
+
+ if (cg->last_state != state) {
+ cg->last_state = state;
+ power_supply_changed(cg->battery);
+ }
+
+ /* continuously send uevent notification */
+ schedule_delayed_work(&cg->poll_work,
+ msecs_to_jiffies(CHAGALL_BATTERY_DATA_REFRESH));
+}
+
+static const struct power_supply_desc chagall_battery_desc = {
+ .name = "chagall-battery",
+ .type = POWER_SUPPLY_TYPE_BATTERY,
+ .properties = chagall_battery_properties,
+ .num_properties = ARRAY_SIZE(chagall_battery_properties),
+ .get_property = chagall_battery_get_property,
+ .external_power_changed = power_supply_changed,
+};
+
+static int chagall_battery_probe(struct i2c_client *client)
+{
+ struct chagall_battery_data *cg;
+ struct device *dev = &client->dev;
+ struct power_supply_config cfg = { };
+ int ret;
+
+ cg = devm_kzalloc(dev, sizeof(*cg), GFP_KERNEL);
+ if (!cg)
+ return -ENOMEM;
+
+ cfg.drv_data = cg;
+ cfg.fwnode = dev_fwnode(dev);
+
+ i2c_set_clientdata(client, cg);
+
+ cg->regmap = devm_regmap_init_i2c(client, &chagall_battery_regmap_config);
+ if (IS_ERR(cg->regmap))
+ return dev_err_probe(dev, PTR_ERR(cg->regmap), "cannot allocate regmap\n");
+
+ cg->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
+ cg->battery = devm_power_supply_register(dev, &chagall_battery_desc, &cfg);
+ if (IS_ERR(cg->battery))
+ return dev_err_probe(dev, PTR_ERR(cg->battery),
+ "failed to register power supply\n");
+
+ cg->amber_led.name = "power::amber";
+ cg->amber_led.max_brightness = 1;
+ cg->amber_led.flags = LED_CORE_SUSPENDRESUME;
+ cg->amber_led.brightness_set = chagall_led_set_brightness_amber;
+ cg->amber_led.default_trigger = "chagall-battery-charging";
+
+ ret = devm_led_classdev_register(dev, &cg->amber_led);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to register amber LED\n");
+
+ cg->white_led.name = "power::white";
+ cg->white_led.max_brightness = 1;
+ cg->white_led.flags = LED_CORE_SUSPENDRESUME;
+ cg->white_led.brightness_set = chagall_led_set_brightness_white;
+ cg->white_led.default_trigger = "chagall-battery-full";
+
+ ret = devm_led_classdev_register(dev, &cg->white_led);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to register white LED\n");
+
+ led_set_brightness(&cg->amber_led, LED_OFF);
+ led_set_brightness(&cg->white_led, LED_OFF);
+
+ ret = devm_delayed_work_autocancel(dev, &cg->poll_work, chagall_battery_poll_work);
+ if (ret)
+ return ret;
+
+ schedule_delayed_work(&cg->poll_work, msecs_to_jiffies(CHAGALL_BATTERY_DATA_REFRESH));
+
+ return 0;
+}
+
+static int __maybe_unused chagall_battery_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct chagall_battery_data *cg = i2c_get_clientdata(client);
+
+ cancel_delayed_work_sync(&cg->poll_work);
+
+ return 0;
+}
+
+static int __maybe_unused chagall_battery_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct chagall_battery_data *cg = i2c_get_clientdata(client);
+
+ schedule_delayed_work(&cg->poll_work, msecs_to_jiffies(CHAGALL_BATTERY_DATA_REFRESH));
+
+ return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(chagall_battery_pm_ops,
+ chagall_battery_suspend, chagall_battery_resume);
+
+static const struct of_device_id chagall_of_match[] = {
+ { .compatible = "pegatron,chagall-ec" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, chagall_of_match);
+
+static struct i2c_driver chagall_battery_driver = {
+ .driver = {
+ .name = "chagall-battery",
+ .pm = &chagall_battery_pm_ops,
+ .of_match_table = chagall_of_match,
+ },
+ .probe = chagall_battery_probe,
+};
+module_i2c_driver(chagall_battery_driver);
+
+MODULE_AUTHOR("Svyatoslav Ryhel <clamor95@gmail.com>");
+MODULE_DESCRIPTION("Pegatron Chagall fuel gauge driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/supply/collie_battery.c b/drivers/power/supply/collie_battery.c
index 68390bd1004f..3daf7befc0bf 100644
--- a/drivers/power/supply/collie_battery.c
+++ b/drivers/power/supply/collie_battery.c
@@ -440,6 +440,7 @@ err_put_gpio_full:
static void collie_bat_remove(struct ucb1x00_dev *dev)
{
+ device_init_wakeup(&ucb->dev, 0);
free_irq(gpiod_to_irq(collie_bat_main.gpio_full), &collie_bat_main);
power_supply_unregister(collie_bat_bu.psy);
power_supply_unregister(collie_bat_main.psy);
diff --git a/drivers/power/supply/cros_charge-control.c b/drivers/power/supply/cros_charge-control.c
index 02d5bdbe2e8d..53e6a77e03fc 100644
--- a/drivers/power/supply/cros_charge-control.c
+++ b/drivers/power/supply/cros_charge-control.c
@@ -47,29 +47,20 @@ struct cros_chctl_priv {
static int cros_chctl_send_charge_control_cmd(struct cros_ec_device *cros_ec,
u8 cmd_version, struct ec_params_charge_control *req)
{
+ int ret;
static const u8 outsizes[] = {
[1] = offsetof(struct ec_params_charge_control, cmd),
[2] = sizeof(struct ec_params_charge_control),
[3] = sizeof(struct ec_params_charge_control),
};
- struct {
- struct cros_ec_command msg;
- union {
- struct ec_params_charge_control req;
- struct ec_response_charge_control resp;
- } __packed data;
- } __packed buf = {
- .msg = {
- .command = EC_CMD_CHARGE_CONTROL,
- .version = cmd_version,
- .insize = 0,
- .outsize = outsizes[cmd_version],
- },
- .data.req = *req,
- };
+ ret = cros_ec_cmd(cros_ec, cmd_version, EC_CMD_CHARGE_CONTROL, req,
+ outsizes[cmd_version], NULL, 0);
+
+ if (ret < 0)
+ return ret;
- return cros_ec_cmd_xfer_status(cros_ec, &buf.msg);
+ return 0;
}
static int cros_chctl_configure_ec(struct cros_chctl_priv *priv)
diff --git a/drivers/power/supply/gpio-charger.c b/drivers/power/supply/gpio-charger.c
index 1dfd5b0cb30d..1b2da9b5fb65 100644
--- a/drivers/power/supply/gpio-charger.c
+++ b/drivers/power/supply/gpio-charger.c
@@ -366,7 +366,9 @@ static int gpio_charger_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, gpio_charger);
- device_init_wakeup(dev, 1);
+ ret = devm_device_init_wakeup(dev);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to init wakeup\n");
return 0;
}
diff --git a/drivers/power/supply/huawei-gaokun-battery.c b/drivers/power/supply/huawei-gaokun-battery.c
new file mode 100644
index 000000000000..e4dfec3b4241
--- /dev/null
+++ b/drivers/power/supply/huawei-gaokun-battery.c
@@ -0,0 +1,645 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * huawei-gaokun-battery - A power supply driver for HUAWEI Matebook E Go
+ *
+ * Copyright (C) 2024 Pengyu Luo <mitltlatltl@gmail.com>
+ */
+
+#include <linux/auxiliary_bus.h>
+#include <linux/bits.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/platform_data/huawei-gaokun-ec.h>
+#include <linux/power_supply.h>
+#include <linux/sprintf.h>
+
+/* -------------------------------------------------------------------------- */
+/* String Data Reg */
+
+#define EC_BAT_VENDOR 0x01 /* from 0x01 to 0x0F, SUNWODA */
+#define EC_BAT_MODEL 0x11 /* from 0x11 to 0x1F, HB30A8P9ECW-22T */
+
+#define EC_ADP_STATUS 0x81
+#define EC_AC_STATUS BIT(0)
+#define EC_BAT_PRESENT BIT(1) /* BATC._STA */
+
+#define EC_BAT_STATUS 0x82 /* _BST */
+#define EC_BAT_DISCHARGING BIT(0)
+#define EC_BAT_CHARGING BIT(1)
+#define EC_BAT_CRITICAL BIT(2) /* Low Battery Level */
+#define EC_BAT_FULL BIT(3)
+
+/* -------------------------------------------------------------------------- */
+/* Word Data Reg */
+
+/* 0x5A: ?
+ * 0x5C: ?
+ * 0x5E: ?
+ * 0X60: ?
+ * 0x84: ?
+ */
+
+#define EC_BAT_STATUS_START 0x90
+#define EC_BAT_PERCENTAGE 0x90
+#define EC_BAT_VOLTAGE 0x92
+#define EC_BAT_CAPACITY 0x94
+#define EC_BAT_FULL_CAPACITY 0x96
+/* 0x98: ? */
+#define EC_BAT_CURRENT 0x9A
+/* 0x9C: ? */
+
+#define EC_BAT_INFO_START 0xA0
+/* 0xA0: POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT? */
+#define EC_BAT_DESIGN_CAPACITY 0xA2
+#define EC_BAT_DESIGN_VOLTAGE 0xA4
+#define EC_BAT_SERIAL_NUMBER 0xA6
+#define EC_BAT_CYCLE_COUNT 0xAA
+
+/* -------------------------------------------------------------------------- */
+/* Battery Event ID */
+
+#define EC_EVENT_BAT_A0 0xA0
+#define EC_EVENT_BAT_A1 0xA1
+#define EC_EVENT_BAT_A2 0xA2
+#define EC_EVENT_BAT_A3 0xA3
+#define EC_EVENT_BAT_B1 0xB1
+/* EVENT B1 A0 A1 repeat about every 1s 2s 3s respectively */
+
+/* ACPI _BIX field, Min sampling time, the duration between two _BST */
+#define CACHE_TIME 2000 /* cache time in milliseconds */
+
+#define MILLI_TO_MICRO 1000
+
+#define SMART_CHARGE_MODE 0
+#define SMART_CHARGE_DELAY 1
+#define SMART_CHARGE_START 2
+#define SMART_CHARGE_END 3
+
+#define NO_DELAY_MODE 1
+#define DELAY_MODE 4
+
+struct gaokun_psy_bat_status {
+ __le16 percentage_now; /* 0x90 */
+ __le16 voltage_now;
+ __le16 capacity_now;
+ __le16 full_capacity;
+ __le16 unknown1;
+ __le16 rate_now;
+ __le16 unknown2; /* 0x9C */
+} __packed;
+
+struct gaokun_psy_bat_info {
+ __le16 unknown3; /* 0xA0 */
+ __le16 design_capacity;
+ __le16 design_voltage;
+ __le16 serial_number;
+ __le16 padding2;
+ __le16 cycle_count; /* 0xAA */
+} __packed;
+
+struct gaokun_psy {
+ struct gaokun_ec *ec;
+ struct device *dev;
+ struct notifier_block nb;
+
+ struct power_supply *bat_psy;
+ struct power_supply *adp_psy;
+
+ unsigned long update_time;
+ struct gaokun_psy_bat_status status;
+ struct gaokun_psy_bat_info info;
+
+ char battery_model[0x10]; /* HB30A8P9ECW-22T, the real one is XXX-22A */
+ char battery_serial[0x10];
+ char battery_vendor[0x10];
+
+ int charge_now;
+ int online;
+ int bat_present;
+};
+
+/* -------------------------------------------------------------------------- */
+/* Adapter */
+
+static int gaokun_psy_get_adp_status(struct gaokun_psy *ecbat)
+{
+ /* _PSR */
+ int ret;
+ u8 online;
+
+ ret = gaokun_ec_psy_read_byte(ecbat->ec, EC_ADP_STATUS, &online);
+ if (ret)
+ return ret;
+
+ ecbat->online = !!(online & EC_AC_STATUS);
+
+ return 0;
+}
+
+static int gaokun_psy_get_adp_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct gaokun_psy *ecbat = power_supply_get_drvdata(psy);
+ int ret;
+
+ ret = gaokun_psy_get_adp_status(ecbat);
+ if (ret)
+ return ret;
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_ONLINE:
+ val->intval = ecbat->online;
+ break;
+ case POWER_SUPPLY_PROP_USB_TYPE:
+ val->intval = POWER_SUPPLY_USB_TYPE_C;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static enum power_supply_property gaokun_psy_adp_props[] = {
+ POWER_SUPPLY_PROP_ONLINE,
+ POWER_SUPPLY_PROP_USB_TYPE,
+};
+
+static const struct power_supply_desc gaokun_psy_adp_desc = {
+ .name = "gaokun-ec-adapter",
+ .type = POWER_SUPPLY_TYPE_USB,
+ .usb_types = BIT(POWER_SUPPLY_USB_TYPE_C),
+ .get_property = gaokun_psy_get_adp_property,
+ .properties = gaokun_psy_adp_props,
+ .num_properties = ARRAY_SIZE(gaokun_psy_adp_props),
+};
+
+/* -------------------------------------------------------------------------- */
+/* Battery */
+
+static inline void gaokun_psy_get_bat_present(struct gaokun_psy *ecbat)
+{
+ int ret;
+ u8 present;
+
+ /* Some kind of initialization */
+ gaokun_ec_write(ecbat->ec, (u8 []){0x02, 0xB2, 1, 0x90});
+
+ ret = gaokun_ec_psy_read_byte(ecbat->ec, EC_ADP_STATUS, &present);
+
+ ecbat->bat_present = ret ? false : !!(present & EC_BAT_PRESENT);
+}
+
+static inline int gaokun_psy_bat_present(struct gaokun_psy *ecbat)
+{
+ return ecbat->bat_present;
+}
+
+static int gaokun_psy_get_bat_info(struct gaokun_psy *ecbat)
+{
+ /* _BIX */
+ if (!gaokun_psy_bat_present(ecbat))
+ return 0;
+
+ return gaokun_ec_psy_multi_read(ecbat->ec, EC_BAT_INFO_START,
+ sizeof(ecbat->info), (u8 *)&ecbat->info);
+}
+
+static void gaokun_psy_update_bat_charge(struct gaokun_psy *ecbat)
+{
+ u8 charge;
+
+ gaokun_ec_psy_read_byte(ecbat->ec, EC_BAT_STATUS, &charge);
+
+ switch (charge) {
+ case EC_BAT_CHARGING:
+ ecbat->charge_now = POWER_SUPPLY_STATUS_CHARGING;
+ break;
+ case EC_BAT_DISCHARGING:
+ ecbat->charge_now = POWER_SUPPLY_STATUS_DISCHARGING;
+ break;
+ case EC_BAT_FULL:
+ ecbat->charge_now = POWER_SUPPLY_STATUS_FULL;
+ break;
+ default:
+ dev_warn(ecbat->dev, "unknown charge state %d\n", charge);
+ }
+}
+
+static int gaokun_psy_get_bat_status(struct gaokun_psy *ecbat)
+{
+ /* _BST */
+ int ret;
+
+ if (time_before(jiffies, ecbat->update_time +
+ msecs_to_jiffies(CACHE_TIME)))
+ return 0;
+
+ gaokun_psy_update_bat_charge(ecbat);
+ ret = gaokun_ec_psy_multi_read(ecbat->ec, EC_BAT_STATUS_START,
+ sizeof(ecbat->status), (u8 *)&ecbat->status);
+
+ ecbat->update_time = jiffies;
+
+ return ret;
+}
+
+static void gaokun_psy_init(struct gaokun_psy *ecbat)
+{
+ gaokun_psy_get_bat_present(ecbat);
+ if (!gaokun_psy_bat_present(ecbat))
+ return;
+
+ gaokun_psy_get_bat_info(ecbat);
+
+ snprintf(ecbat->battery_serial, sizeof(ecbat->battery_serial),
+ "%d", le16_to_cpu(ecbat->info.serial_number));
+
+ gaokun_ec_psy_multi_read(ecbat->ec, EC_BAT_VENDOR,
+ sizeof(ecbat->battery_vendor) - 1,
+ ecbat->battery_vendor);
+
+ gaokun_ec_psy_multi_read(ecbat->ec, EC_BAT_MODEL,
+ sizeof(ecbat->battery_model) - 1,
+ ecbat->battery_model);
+
+ ecbat->battery_model[14] = 'A'; /* FIX UP */
+}
+
+static int gaokun_psy_get_bat_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct gaokun_psy *ecbat = power_supply_get_drvdata(psy);
+ u8 buf[GAOKUN_SMART_CHARGE_DATA_SIZE];
+ int ret;
+
+ if (gaokun_psy_bat_present(ecbat))
+ gaokun_psy_get_bat_status(ecbat);
+ else if (psp != POWER_SUPPLY_PROP_PRESENT)
+ return -ENODEV;
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_STATUS:
+ val->intval = ecbat->charge_now;
+ break;
+
+ case POWER_SUPPLY_PROP_PRESENT:
+ val->intval = ecbat->bat_present;
+ break;
+
+ case POWER_SUPPLY_PROP_TECHNOLOGY:
+ val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
+ break;
+
+ case POWER_SUPPLY_PROP_CYCLE_COUNT:
+ val->intval = le16_to_cpu(ecbat->info.cycle_count);
+ break;
+
+ case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+ val->intval = le16_to_cpu(ecbat->info.design_voltage) * MILLI_TO_MICRO;
+ break;
+
+ case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+ val->intval = le16_to_cpu(ecbat->status.voltage_now) * MILLI_TO_MICRO;
+ break;
+
+ case POWER_SUPPLY_PROP_CURRENT_NOW:
+ val->intval = (s16)le16_to_cpu(ecbat->status.rate_now) * MILLI_TO_MICRO;
+ break;
+
+ case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+ val->intval = le16_to_cpu(ecbat->info.design_capacity) * MILLI_TO_MICRO;
+ break;
+
+ case POWER_SUPPLY_PROP_CHARGE_FULL:
+ val->intval = le16_to_cpu(ecbat->status.full_capacity) * MILLI_TO_MICRO;
+ break;
+
+ case POWER_SUPPLY_PROP_CHARGE_NOW:
+ val->intval = le16_to_cpu(ecbat->status.capacity_now) * MILLI_TO_MICRO;
+ break;
+
+ case POWER_SUPPLY_PROP_CHARGE_CONTROL_START_THRESHOLD:
+ case POWER_SUPPLY_PROP_CHARGE_CONTROL_END_THRESHOLD:
+ ret = gaokun_ec_psy_get_smart_charge(ecbat->ec, buf);
+ if (ret)
+ return ret;
+
+ if (psp == POWER_SUPPLY_PROP_CHARGE_CONTROL_START_THRESHOLD)
+ val->intval = buf[SMART_CHARGE_START];
+ else
+ val->intval = buf[SMART_CHARGE_END];
+ break;
+
+ case POWER_SUPPLY_PROP_CAPACITY:
+ val->intval = le16_to_cpu(ecbat->status.percentage_now);
+ break;
+
+ case POWER_SUPPLY_PROP_MODEL_NAME:
+ val->strval = ecbat->battery_model;
+ break;
+
+ case POWER_SUPPLY_PROP_MANUFACTURER:
+ val->strval = ecbat->battery_vendor;
+ break;
+
+ case POWER_SUPPLY_PROP_SERIAL_NUMBER:
+ val->strval = ecbat->battery_serial;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gaokun_psy_set_bat_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ const union power_supply_propval *val)
+{
+ struct gaokun_psy *ecbat = power_supply_get_drvdata(psy);
+ u8 buf[GAOKUN_SMART_CHARGE_DATA_SIZE];
+ int ret;
+
+ if (!gaokun_psy_bat_present(ecbat))
+ return -ENODEV;
+
+ ret = gaokun_ec_psy_get_smart_charge(ecbat->ec, buf);
+ if (ret)
+ return ret;
+
+ switch (psp) {
+ /*
+ * Resetting another thershold makes single thersold setting more likely
+ * to succeed. But setting start = end makes thing strange(failure).
+ */
+ case POWER_SUPPLY_PROP_CHARGE_CONTROL_START_THRESHOLD:
+ buf[SMART_CHARGE_START] = val->intval;
+ if (buf[SMART_CHARGE_START] > buf[SMART_CHARGE_END])
+ buf[SMART_CHARGE_END] = buf[SMART_CHARGE_START] + 1;
+ return gaokun_ec_psy_set_smart_charge(ecbat->ec, buf);
+
+ case POWER_SUPPLY_PROP_CHARGE_CONTROL_END_THRESHOLD:
+ buf[SMART_CHARGE_END] = val->intval;
+ if (buf[SMART_CHARGE_END] < buf[SMART_CHARGE_START])
+ buf[SMART_CHARGE_START] = buf[SMART_CHARGE_END] - 1;
+ return gaokun_ec_psy_set_smart_charge(ecbat->ec, buf);
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gaokun_psy_is_bat_property_writeable(struct power_supply *psy,
+ enum power_supply_property psp)
+{
+ return psp == POWER_SUPPLY_PROP_CHARGE_CONTROL_START_THRESHOLD ||
+ psp == POWER_SUPPLY_PROP_CHARGE_CONTROL_END_THRESHOLD;
+}
+
+static enum power_supply_property gaokun_psy_bat_props[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_PRESENT,
+ POWER_SUPPLY_PROP_TECHNOLOGY,
+ POWER_SUPPLY_PROP_CYCLE_COUNT,
+ POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
+ POWER_SUPPLY_PROP_CURRENT_NOW,
+ POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+ POWER_SUPPLY_PROP_CHARGE_FULL,
+ POWER_SUPPLY_PROP_CHARGE_NOW,
+ POWER_SUPPLY_PROP_CHARGE_CONTROL_START_THRESHOLD,
+ POWER_SUPPLY_PROP_CHARGE_CONTROL_END_THRESHOLD,
+ POWER_SUPPLY_PROP_CAPACITY,
+ POWER_SUPPLY_PROP_MODEL_NAME,
+ POWER_SUPPLY_PROP_MANUFACTURER,
+ POWER_SUPPLY_PROP_SERIAL_NUMBER,
+};
+
+static const struct power_supply_desc gaokun_psy_bat_desc = {
+ .name = "gaokun-ec-battery",
+ .type = POWER_SUPPLY_TYPE_BATTERY,
+ .get_property = gaokun_psy_get_bat_property,
+ .set_property = gaokun_psy_set_bat_property,
+ .property_is_writeable = gaokun_psy_is_bat_property_writeable,
+ .properties = gaokun_psy_bat_props,
+ .num_properties = ARRAY_SIZE(gaokun_psy_bat_props),
+};
+
+/* -------------------------------------------------------------------------- */
+/* Sysfs */
+
+/*
+ * Note that, HUAWEI calls them SBAC/GBAC and SBCM/GBCM in DSDT, they are likely
+ * Set/Get Battery Adaptive Charging and Set/Get Battery Charging Mode.
+ */
+
+/* battery adaptive charge */
+static ssize_t battery_adaptive_charge_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct power_supply *psy = to_power_supply(dev);
+ struct gaokun_psy *ecbat = power_supply_get_drvdata(psy);
+ int ret;
+ bool on;
+
+ ret = gaokun_ec_psy_get_smart_charge_enable(ecbat->ec, &on);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%d\n", on);
+}
+
+static ssize_t battery_adaptive_charge_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct power_supply *psy = to_power_supply(dev);
+ struct gaokun_psy *ecbat = power_supply_get_drvdata(psy);
+ int ret;
+ bool on;
+
+ if (kstrtobool(buf, &on))
+ return -EINVAL;
+
+ ret = gaokun_ec_psy_set_smart_charge_enable(ecbat->ec, on);
+ if (ret)
+ return ret;
+
+ return size;
+}
+
+static DEVICE_ATTR_RW(battery_adaptive_charge);
+
+static inline int get_charge_delay(u8 buf[GAOKUN_SMART_CHARGE_DATA_SIZE])
+{
+ return buf[SMART_CHARGE_MODE] == NO_DELAY_MODE ? 0 : buf[SMART_CHARGE_DELAY];
+}
+
+static inline void
+set_charge_delay(u8 buf[GAOKUN_SMART_CHARGE_DATA_SIZE], u8 delay)
+{
+ if (delay) {
+ buf[SMART_CHARGE_DELAY] = delay;
+ buf[SMART_CHARGE_MODE] = DELAY_MODE;
+ } else {
+ /* No writing zero, there is a specific mode for it. */
+ buf[SMART_CHARGE_MODE] = NO_DELAY_MODE;
+ }
+}
+
+/* Smart charge */
+static ssize_t smart_charge_delay_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct power_supply *psy = to_power_supply(dev);
+ struct gaokun_psy *ecbat = power_supply_get_drvdata(psy);
+ u8 bf[GAOKUN_SMART_CHARGE_DATA_SIZE];
+ int ret;
+
+ ret = gaokun_ec_psy_get_smart_charge(ecbat->ec, bf);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%d\n", get_charge_delay(bf));
+}
+
+static ssize_t smart_charge_delay_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct power_supply *psy = to_power_supply(dev);
+ struct gaokun_psy *ecbat = power_supply_get_drvdata(psy);
+ u8 bf[GAOKUN_SMART_CHARGE_DATA_SIZE];
+ u8 delay;
+ int ret;
+
+ if (kstrtou8(buf, 10, &delay))
+ return -EINVAL;
+
+ ret = gaokun_ec_psy_get_smart_charge(ecbat->ec, bf);
+ if (ret)
+ return ret;
+
+ set_charge_delay(bf, delay);
+
+ ret = gaokun_ec_psy_set_smart_charge(ecbat->ec, bf);
+ if (ret)
+ return ret;
+
+ return size;
+}
+
+static DEVICE_ATTR_RW(smart_charge_delay);
+
+static struct attribute *gaokun_psy_features_attrs[] = {
+ &dev_attr_battery_adaptive_charge.attr,
+ &dev_attr_smart_charge_delay.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(gaokun_psy_features);
+
+static int gaokun_psy_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct gaokun_psy *ecbat = container_of(nb, struct gaokun_psy, nb);
+
+ switch (action) {
+ case EC_EVENT_BAT_A2:
+ case EC_EVENT_BAT_B1:
+ gaokun_psy_get_bat_info(ecbat);
+ return NOTIFY_OK;
+
+ case EC_EVENT_BAT_A0:
+ gaokun_psy_get_adp_status(ecbat);
+ power_supply_changed(ecbat->adp_psy);
+ msleep(10);
+ fallthrough;
+
+ case EC_EVENT_BAT_A1:
+ case EC_EVENT_BAT_A3:
+ if (action == EC_EVENT_BAT_A3) {
+ gaokun_psy_get_bat_info(ecbat);
+ msleep(100);
+ }
+ gaokun_psy_get_bat_status(ecbat);
+ power_supply_changed(ecbat->bat_psy);
+ return NOTIFY_OK;
+
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static int gaokun_psy_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct gaokun_ec *ec = adev->dev.platform_data;
+ struct power_supply_config psy_cfg = {};
+ struct device *dev = &adev->dev;
+ struct gaokun_psy *ecbat;
+
+ ecbat = devm_kzalloc(&adev->dev, sizeof(*ecbat), GFP_KERNEL);
+ if (!ecbat)
+ return -ENOMEM;
+
+ ecbat->ec = ec;
+ ecbat->dev = dev;
+ ecbat->nb.notifier_call = gaokun_psy_notify;
+
+ auxiliary_set_drvdata(adev, ecbat);
+
+ psy_cfg.drv_data = ecbat;
+ ecbat->adp_psy = devm_power_supply_register(dev, &gaokun_psy_adp_desc,
+ &psy_cfg);
+ if (IS_ERR(ecbat->adp_psy))
+ return dev_err_probe(dev, PTR_ERR(ecbat->adp_psy),
+ "Failed to register AC power supply\n");
+
+ psy_cfg.supplied_to = (char **)&gaokun_psy_bat_desc.name;
+ psy_cfg.num_supplicants = 1;
+ psy_cfg.no_wakeup_source = true;
+ psy_cfg.attr_grp = gaokun_psy_features_groups;
+ ecbat->bat_psy = devm_power_supply_register(dev, &gaokun_psy_bat_desc,
+ &psy_cfg);
+ if (IS_ERR(ecbat->bat_psy))
+ return dev_err_probe(dev, PTR_ERR(ecbat->bat_psy),
+ "Failed to register battery power supply\n");
+ gaokun_psy_init(ecbat);
+
+ return gaokun_ec_register_notify(ec, &ecbat->nb);
+}
+
+static void gaokun_psy_remove(struct auxiliary_device *adev)
+{
+ struct gaokun_psy *ecbat = auxiliary_get_drvdata(adev);
+
+ gaokun_ec_unregister_notify(ecbat->ec, &ecbat->nb);
+}
+
+static const struct auxiliary_device_id gaokun_psy_id_table[] = {
+ { .name = GAOKUN_MOD_NAME "." GAOKUN_DEV_PSY, },
+ {}
+};
+MODULE_DEVICE_TABLE(auxiliary, gaokun_psy_id_table);
+
+static struct auxiliary_driver gaokun_psy_driver = {
+ .name = GAOKUN_DEV_PSY,
+ .id_table = gaokun_psy_id_table,
+ .probe = gaokun_psy_probe,
+ .remove = gaokun_psy_remove,
+};
+
+module_auxiliary_driver(gaokun_psy_driver);
+
+MODULE_DESCRIPTION("HUAWEI Matebook E Go psy driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/supply/max17040_battery.c b/drivers/power/supply/max17040_battery.c
index 51310f6e4803..c1640bc6accd 100644
--- a/drivers/power/supply/max17040_battery.c
+++ b/drivers/power/supply/max17040_battery.c
@@ -410,8 +410,9 @@ static int max17040_get_property(struct power_supply *psy,
if (!chip->channel_temp)
return -ENODATA;
- iio_read_channel_processed_scale(chip->channel_temp,
- &val->intval, 10);
+ iio_read_channel_processed(chip->channel_temp, &val->intval);
+ val->intval /= 100; /* Convert from milli- to deci-degree */
+
break;
default:
return -EINVAL;
diff --git a/drivers/power/supply/max77705_charger.c b/drivers/power/supply/max77705_charger.c
index eec5e9ef795e..329b430d0e50 100644
--- a/drivers/power/supply/max77705_charger.c
+++ b/drivers/power/supply/max77705_charger.c
@@ -545,20 +545,28 @@ static int max77705_charger_probe(struct i2c_client *i2c)
return dev_err_probe(dev, ret, "failed to add irq chip\n");
chg->wqueue = create_singlethread_workqueue(dev_name(dev));
- if (IS_ERR(chg->wqueue))
- return dev_err_probe(dev, PTR_ERR(chg->wqueue), "failed to create workqueue\n");
+ if (!chg->wqueue)
+ return dev_err_probe(dev, -ENOMEM, "failed to create workqueue\n");
ret = devm_work_autocancel(dev, &chg->chgin_work, max77705_chgin_isr_work);
- if (ret)
- return dev_err_probe(dev, ret, "failed to initialize interrupt work\n");
+ if (ret) {
+ dev_err_probe(dev, ret, "failed to initialize interrupt work\n");
+ goto destroy_wq;
+ }
max77705_charger_initialize(chg);
ret = max77705_charger_enable(chg);
- if (ret)
- return dev_err_probe(dev, ret, "failed to enable charge\n");
+ if (ret) {
+ dev_err_probe(dev, ret, "failed to enable charge\n");
+ goto destroy_wq;
+ }
return devm_add_action_or_reset(dev, max77705_charger_disable, chg);
+
+destroy_wq:
+ destroy_workqueue(chg->wqueue);
+ return ret;
}
static const struct of_device_id max77705_charger_of_match[] = {
diff --git a/drivers/power/supply/max8971_charger.c b/drivers/power/supply/max8971_charger.c
new file mode 100644
index 000000000000..26416d26f235
--- /dev/null
+++ b/drivers/power/supply/max8971_charger.c
@@ -0,0 +1,752 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/devm-helpers.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/extcon.h>
+#include <linux/i2c.h>
+#include <linux/mod_devicetable.h>
+#include <linux/of_graph.h>
+#include <linux/property.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#include <linux/power_supply.h>
+#include <linux/regmap.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#define MAX8971_REG_CHGINT 0x0f
+#define MAX8971_REG_CHG_RST BIT(0)
+#define MAX8971_REG_CHGINT_MASK 0x01
+#define MAX8971_AICL_MASK BIT(7)
+#define MAX8971_REG_CHG_STAT 0x02
+#define MAX8971_CHG_MASK BIT(3)
+#define MAX8971_REG_DETAILS1 0x03
+#define MAX8971_REG_DETAILS2 0x04
+#define MAX8971_REG_CHGCNTL1 0x05
+#define MAX8971_REG_FCHGCRNT 0x06
+#define MAX8971_REG_DCCRNT 0x07
+#define MAX8971_CHGRSTRT_MASK BIT(6)
+#define MAX8971_REG_TOPOFF 0x08
+#define MAX8971_REG_TEMPREG 0x09
+#define MAX8971_REG_PROTCMD 0x0a
+#define MAX8971_CHGPROT_LOCKED 0x00
+#define MAX8971_CHGPROT_UNLOCKED 0x03
+
+#define MAX8971_FCHGT_DEFAULT 2
+#define MAX8971_TOPOFFT_DEFAULT 3
+
+static const char *max8971_manufacturer = "Maxim Integrated";
+static const char *max8971_model = "MAX8971";
+
+enum max8971_charging_state {
+ MAX8971_CHARGING_DEAD_BATTERY,
+ MAX8971_CHARGING_PREQUALIFICATION,
+ MAX8971_CHARGING_FAST_CONST_CURRENT,
+ MAX8971_CHARGING_FAST_CONST_VOLTAGE,
+ MAX8971_CHARGING_TOP_OFF,
+ MAX8971_CHARGING_DONE,
+ MAX8971_CHARGING_TIMER_FAULT,
+ MAX8971_CHARGING_SUSPENDED_THERMAL,
+ MAX8971_CHARGING_OFF,
+ MAX8971_CHARGING_THERMAL_LOOP,
+};
+
+enum max8971_health_state {
+ MAX8971_HEALTH_UNKNOWN,
+ MAX8971_HEALTH_COLD,
+ MAX8971_HEALTH_COOL,
+ MAX8971_HEALTH_WARM,
+ MAX8971_HEALTH_HOT,
+ MAX8971_HEALTH_OVERHEAT,
+};
+
+/* Fast-Charge current limit, 250..1550 mA, 50 mA steps */
+#define MAX8971_CHG_CC_STEP 50000U
+#define MAX8971_CHG_CC_MIN 250000U
+#define MAX8971_CHG_CC_MAX 1550000U
+
+/* Input current limit, 250..1500 mA, 25 mA steps */
+#define MAX8971_DCILMT_STEP 25000U
+#define MAX8971_DCILMT_MIN 250000U
+#define MAX8971_DCILMT_MAX 1500000U
+
+enum max8971_field_idx {
+ THM_DTLS, /* DETAILS1 */
+ BAT_DTLS, CHG_DTLS, /* DETAILS2 */
+ CHG_CC, FCHG_T, /* FCHGCRNT */
+ DCI_LMT, /* DCCRNT */
+ TOPOFF_T, TOPOFF_S, /* TOPOFF */
+ CPROT, /* PROTCMD */
+ MAX8971_N_REGMAP_FIELDS
+};
+
+static const struct reg_field max8971_reg_field[MAX8971_N_REGMAP_FIELDS] = {
+ [THM_DTLS] = REG_FIELD(MAX8971_REG_DETAILS1, 0, 2),
+ [BAT_DTLS] = REG_FIELD(MAX8971_REG_DETAILS2, 4, 5),
+ [CHG_DTLS] = REG_FIELD(MAX8971_REG_DETAILS2, 0, 3),
+ [CHG_CC] = REG_FIELD(MAX8971_REG_FCHGCRNT, 0, 4),
+ [FCHG_T] = REG_FIELD(MAX8971_REG_FCHGCRNT, 5, 7),
+ [DCI_LMT] = REG_FIELD(MAX8971_REG_DCCRNT, 0, 5),
+ [TOPOFF_T] = REG_FIELD(MAX8971_REG_TOPOFF, 5, 7),
+ [TOPOFF_S] = REG_FIELD(MAX8971_REG_TOPOFF, 2, 3),
+ [CPROT] = REG_FIELD(MAX8971_REG_PROTCMD, 2, 3),
+};
+
+static const struct regmap_config max8971_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = MAX8971_REG_CHGINT,
+};
+
+struct max8971_data {
+ struct device *dev;
+ struct power_supply *psy_mains;
+
+ struct extcon_dev *edev;
+ struct notifier_block extcon_nb;
+ struct delayed_work extcon_work;
+
+ struct regmap *regmap;
+ struct regmap_field *rfield[MAX8971_N_REGMAP_FIELDS];
+
+ enum power_supply_usb_type usb_type;
+
+ u32 fchgt;
+ u32 tofft;
+ u32 toffs;
+
+ bool present;
+};
+
+static int max8971_get_status(struct max8971_data *priv, int *val)
+{
+ u32 regval;
+ int err;
+
+ err = regmap_field_read(priv->rfield[CHG_DTLS], &regval);
+ if (err)
+ return err;
+
+ switch (regval) {
+ case MAX8971_CHARGING_DEAD_BATTERY:
+ case MAX8971_CHARGING_PREQUALIFICATION:
+ case MAX8971_CHARGING_FAST_CONST_CURRENT:
+ case MAX8971_CHARGING_FAST_CONST_VOLTAGE:
+ case MAX8971_CHARGING_TOP_OFF:
+ case MAX8971_CHARGING_THERMAL_LOOP:
+ *val = POWER_SUPPLY_STATUS_CHARGING;
+ break;
+ case MAX8971_CHARGING_DONE:
+ *val = POWER_SUPPLY_STATUS_FULL;
+ break;
+ case MAX8971_CHARGING_TIMER_FAULT:
+ *val = POWER_SUPPLY_STATUS_NOT_CHARGING;
+ break;
+ case MAX8971_CHARGING_OFF:
+ case MAX8971_CHARGING_SUSPENDED_THERMAL:
+ *val = POWER_SUPPLY_STATUS_DISCHARGING;
+ break;
+ default:
+ *val = POWER_SUPPLY_STATUS_UNKNOWN;
+ }
+
+ return 0;
+}
+
+static int max8971_get_charge_type(struct max8971_data *priv, int *val)
+{
+ u32 regval;
+ int err;
+
+ err = regmap_field_read(priv->rfield[CHG_DTLS], &regval);
+ if (err)
+ return err;
+
+ switch (regval) {
+ case MAX8971_CHARGING_DEAD_BATTERY:
+ case MAX8971_CHARGING_PREQUALIFICATION:
+ *val = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+ break;
+ case MAX8971_CHARGING_FAST_CONST_CURRENT:
+ case MAX8971_CHARGING_FAST_CONST_VOLTAGE:
+ *val = POWER_SUPPLY_CHARGE_TYPE_FAST;
+ break;
+ case MAX8971_CHARGING_TOP_OFF:
+ case MAX8971_CHARGING_THERMAL_LOOP:
+ *val = POWER_SUPPLY_CHARGE_TYPE_STANDARD;
+ break;
+ case MAX8971_CHARGING_DONE:
+ case MAX8971_CHARGING_TIMER_FAULT:
+ case MAX8971_CHARGING_SUSPENDED_THERMAL:
+ case MAX8971_CHARGING_OFF:
+ *val = POWER_SUPPLY_CHARGE_TYPE_NONE;
+ break;
+ default:
+ *val = POWER_SUPPLY_CHARGE_TYPE_UNKNOWN;
+ }
+
+ return 0;
+}
+
+static int max8971_get_health(struct max8971_data *priv, int *val)
+{
+ u32 regval;
+ int err;
+
+ err = regmap_field_read(priv->rfield[THM_DTLS], &regval);
+ if (err)
+ return err;
+
+ switch (regval) {
+ case MAX8971_HEALTH_COLD:
+ *val = POWER_SUPPLY_HEALTH_COLD;
+ break;
+ case MAX8971_HEALTH_COOL:
+ *val = POWER_SUPPLY_HEALTH_COOL;
+ break;
+ case MAX8971_HEALTH_WARM:
+ *val = POWER_SUPPLY_HEALTH_GOOD;
+ break;
+ case MAX8971_HEALTH_HOT:
+ *val = POWER_SUPPLY_HEALTH_HOT;
+ break;
+ case MAX8971_HEALTH_OVERHEAT:
+ *val = POWER_SUPPLY_HEALTH_OVERHEAT;
+ break;
+ case MAX8971_HEALTH_UNKNOWN:
+ default:
+ *val = POWER_SUPPLY_HEALTH_UNKNOWN;
+ }
+
+ return 0;
+}
+
+static int max8971_get_online(struct max8971_data *priv, int *val)
+{
+ u32 regval;
+ int err;
+
+ err = regmap_read(priv->regmap, MAX8971_REG_CHG_STAT, &regval);
+ if (err)
+ return err;
+
+ if (priv->present)
+ /* CHG_OK bit is 0 when charger is online */
+ *val = !(regval & MAX8971_CHG_MASK);
+ else
+ *val = priv->present;
+
+ return 0;
+}
+
+static int max8971_get_integer(struct max8971_data *priv, enum max8971_field_idx fidx,
+ u32 clamp_min, u32 clamp_max, u32 mult, int *val)
+{
+ u32 regval;
+ int err;
+
+ err = regmap_field_read(priv->rfield[fidx], &regval);
+ if (err)
+ return err;
+
+ *val = clamp_val(regval * mult, clamp_min, clamp_max);
+
+ return 0;
+}
+
+static int max8971_set_integer(struct max8971_data *priv, enum max8971_field_idx fidx,
+ u32 clamp_min, u32 clamp_max, u32 div, int val)
+{
+ u32 regval;
+
+ regval = clamp_val(val, clamp_min, clamp_max) / div;
+
+ return regmap_field_write(priv->rfield[fidx], regval);
+}
+
+static int max8971_get_property(struct power_supply *psy, enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct max8971_data *priv = power_supply_get_drvdata(psy);
+ int err = 0;
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_STATUS:
+ err = max8971_get_status(priv, &val->intval);
+ break;
+ case POWER_SUPPLY_PROP_CHARGE_TYPE:
+ err = max8971_get_charge_type(priv, &val->intval);
+ break;
+ case POWER_SUPPLY_PROP_USB_TYPE:
+ val->intval = priv->usb_type;
+ break;
+ case POWER_SUPPLY_PROP_HEALTH:
+ err = max8971_get_health(priv, &val->intval);
+ break;
+ case POWER_SUPPLY_PROP_ONLINE:
+ err = max8971_get_online(priv, &val->intval);
+ break;
+ case POWER_SUPPLY_PROP_PRESENT:
+ val->intval = priv->present;
+ break;
+ case POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX:
+ val->intval = MAX8971_CHG_CC_MAX;
+ break;
+ case POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT:
+ err = max8971_get_integer(priv, CHG_CC, MAX8971_CHG_CC_MIN, MAX8971_CHG_CC_MAX,
+ MAX8971_CHG_CC_STEP, &val->intval);
+ break;
+ case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+ err = max8971_get_integer(priv, DCI_LMT, MAX8971_DCILMT_MIN, MAX8971_DCILMT_MAX,
+ MAX8971_DCILMT_STEP, &val->intval);
+ break;
+ case POWER_SUPPLY_PROP_MODEL_NAME:
+ val->strval = max8971_model;
+ break;
+ case POWER_SUPPLY_PROP_MANUFACTURER:
+ val->strval = max8971_manufacturer;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int max8971_set_property(struct power_supply *psy, enum power_supply_property psp,
+ const union power_supply_propval *val)
+{
+ struct max8971_data *priv = power_supply_get_drvdata(psy);
+ int err = 0;
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT:
+ err = max8971_set_integer(priv, CHG_CC, MAX8971_CHG_CC_MIN, MAX8971_CHG_CC_MAX,
+ MAX8971_CHG_CC_STEP, val->intval);
+ break;
+ case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+ err = max8971_set_integer(priv, DCI_LMT, MAX8971_DCILMT_MIN, MAX8971_DCILMT_MAX,
+ MAX8971_DCILMT_STEP, val->intval);
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+};
+
+static int max8971_property_is_writeable(struct power_supply *psy,
+ enum power_supply_property psp)
+{
+ switch (psp) {
+ case POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT:
+ case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static enum power_supply_property max8971_properties[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_CHARGE_TYPE,
+ POWER_SUPPLY_PROP_USB_TYPE,
+ POWER_SUPPLY_PROP_HEALTH,
+ POWER_SUPPLY_PROP_ONLINE,
+ POWER_SUPPLY_PROP_PRESENT,
+ POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT,
+ POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX,
+ POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
+ POWER_SUPPLY_PROP_MODEL_NAME,
+ POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+static const struct power_supply_desc max8971_charger_desc = {
+ .name = "max8971-charger",
+ .type = POWER_SUPPLY_TYPE_USB,
+ .usb_types = BIT(POWER_SUPPLY_USB_TYPE_UNKNOWN) |
+ BIT(POWER_SUPPLY_USB_TYPE_SDP) |
+ BIT(POWER_SUPPLY_USB_TYPE_DCP) |
+ BIT(POWER_SUPPLY_USB_TYPE_CDP) |
+ BIT(POWER_SUPPLY_USB_TYPE_ACA),
+ .properties = max8971_properties,
+ .num_properties = ARRAY_SIZE(max8971_properties),
+ .get_property = max8971_get_property,
+ .set_property = max8971_set_property,
+ .property_is_writeable = max8971_property_is_writeable,
+};
+
+static void max8971_update_config(struct max8971_data *priv)
+{
+ regmap_field_write(priv->rfield[CPROT], MAX8971_CHGPROT_UNLOCKED);
+
+ if (priv->fchgt != MAX8971_FCHGT_DEFAULT)
+ regmap_field_write(priv->rfield[FCHG_T], priv->fchgt);
+
+ regmap_write_bits(priv->regmap, MAX8971_REG_DCCRNT, MAX8971_CHGRSTRT_MASK,
+ MAX8971_CHGRSTRT_MASK);
+
+ if (priv->tofft != MAX8971_TOPOFFT_DEFAULT)
+ regmap_field_write(priv->rfield[TOPOFF_T], priv->tofft);
+
+ if (priv->toffs)
+ regmap_field_write(priv->rfield[TOPOFF_S], priv->toffs);
+
+ regmap_field_write(priv->rfield[CPROT], MAX8971_CHGPROT_LOCKED);
+}
+
+static ssize_t fast_charge_timer_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct power_supply *psy = to_power_supply(dev);
+ struct max8971_data *priv = power_supply_get_drvdata(psy);
+ u32 regval;
+ int err;
+
+ err = regmap_field_read(priv->rfield[FCHG_T], &regval);
+ if (err)
+ return err;
+
+ switch (regval) {
+ case 0x1 ... 0x7:
+ /* Time is off by 3 hours comparing to value */
+ regval += 3;
+ break;
+ case 0x0:
+ default:
+ regval = 0;
+ break;
+ }
+
+ return sysfs_emit(buf, "%u\n", regval);
+}
+
+static ssize_t fast_charge_timer_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct power_supply *psy = to_power_supply(dev);
+ struct max8971_data *priv = power_supply_get_drvdata(psy);
+ unsigned long hours;
+ int val, err;
+
+ err = kstrtoul(buf, 10, &hours);
+ if (err)
+ return err;
+
+ val = hours - 3;
+ if (val <= 0 || val > 7)
+ priv->fchgt = 0;
+ else
+ priv->fchgt = val;
+
+ max8971_update_config(priv);
+
+ return count;
+}
+
+static ssize_t top_off_threshold_current_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct power_supply *psy = to_power_supply(dev);
+ struct max8971_data *priv = power_supply_get_drvdata(psy);
+ u32 regval, val;
+ int err;
+
+ err = regmap_field_read(priv->rfield[TOPOFF_S], &regval);
+ if (err)
+ return err;
+
+ /* 50uA start with 50uA step */
+ val = regval * 50 + 50;
+ val *= 1000;
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t top_off_threshold_current_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct power_supply *psy = to_power_supply(dev);
+ struct max8971_data *priv = power_supply_get_drvdata(psy);
+ unsigned long uamp;
+ int err;
+
+ err = kstrtoul(buf, 10, &uamp);
+ if (err)
+ return err;
+
+ if (uamp < 50000 || uamp > 200000)
+ return -EINVAL;
+
+ priv->toffs = uamp / 50000 - 1;
+
+ max8971_update_config(priv);
+
+ return count;
+}
+
+static ssize_t top_off_timer_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct power_supply *psy = to_power_supply(dev);
+ struct max8971_data *priv = power_supply_get_drvdata(psy);
+ u32 regval;
+ int err;
+
+ err = regmap_field_read(priv->rfield[TOPOFF_T], &regval);
+ if (err)
+ return err;
+
+ /* 10 min intervals */
+ regval *= 10;
+
+ return sysfs_emit(buf, "%u\n", regval);
+}
+
+static ssize_t top_off_timer_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct power_supply *psy = to_power_supply(dev);
+ struct max8971_data *priv = power_supply_get_drvdata(psy);
+ unsigned long minutes;
+ int err;
+
+ err = kstrtoul(buf, 10, &minutes);
+ if (err)
+ return err;
+
+ if (minutes > 70)
+ return -EINVAL;
+
+ priv->tofft = minutes / 10;
+
+ max8971_update_config(priv);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(fast_charge_timer);
+static DEVICE_ATTR_RW(top_off_threshold_current);
+static DEVICE_ATTR_RW(top_off_timer);
+
+static struct attribute *max8971_attrs[] = {
+ &dev_attr_fast_charge_timer.attr,
+ &dev_attr_top_off_threshold_current.attr,
+ &dev_attr_top_off_timer.attr,
+ NULL
+};
+ATTRIBUTE_GROUPS(max8971);
+
+static void max8971_extcon_evt_worker(struct work_struct *work)
+{
+ struct max8971_data *priv =
+ container_of(work, struct max8971_data, extcon_work.work);
+ struct device *dev = priv->dev;
+ struct extcon_dev *edev = priv->edev;
+ u32 chgcc, dcilmt;
+
+ if (extcon_get_state(edev, EXTCON_CHG_USB_SDP) > 0) {
+ dev_dbg(dev, "USB SDP charger is connected\n");
+ priv->usb_type = POWER_SUPPLY_USB_TYPE_SDP;
+ chgcc = 500000;
+ dcilmt = 500000;
+ } else if (extcon_get_state(edev, EXTCON_USB) > 0) {
+ dev_dbg(dev, "USB charger is connected\n");
+ priv->usb_type = POWER_SUPPLY_USB_TYPE_SDP;
+ chgcc = 500000;
+ dcilmt = 500000;
+ } else if (extcon_get_state(edev, EXTCON_DISP_MHL) > 0) {
+ dev_dbg(dev, "MHL plug is connected\n");
+ priv->usb_type = POWER_SUPPLY_USB_TYPE_SDP;
+ chgcc = 500000;
+ dcilmt = 500000;
+ } else if (extcon_get_state(edev, EXTCON_CHG_USB_DCP) > 0) {
+ dev_dbg(dev, "USB DCP charger is connected\n");
+ priv->usb_type = POWER_SUPPLY_USB_TYPE_DCP;
+ chgcc = 900000;
+ dcilmt = 1200000;
+ } else if (extcon_get_state(edev, EXTCON_CHG_USB_FAST) > 0) {
+ dev_dbg(dev, "USB FAST charger is connected\n");
+ priv->usb_type = POWER_SUPPLY_USB_TYPE_ACA;
+ chgcc = 900000;
+ dcilmt = 1200000;
+ } else if (extcon_get_state(edev, EXTCON_CHG_USB_SLOW) > 0) {
+ dev_dbg(dev, "USB SLOW charger is connected\n");
+ priv->usb_type = POWER_SUPPLY_USB_TYPE_ACA;
+ chgcc = 900000;
+ dcilmt = 1200000;
+ } else if (extcon_get_state(edev, EXTCON_CHG_USB_CDP) > 0) {
+ dev_dbg(dev, "USB CDP charger is connected\n");
+ priv->usb_type = POWER_SUPPLY_USB_TYPE_CDP;
+ chgcc = 900000;
+ dcilmt = 1200000;
+ } else {
+ dev_dbg(dev, "USB state is unknown\n");
+ priv->usb_type = POWER_SUPPLY_USB_TYPE_UNKNOWN;
+ return;
+ }
+
+ regmap_field_write(priv->rfield[CPROT], MAX8971_CHGPROT_UNLOCKED);
+
+ max8971_set_integer(priv, CHG_CC, MAX8971_CHG_CC_MIN, MAX8971_CHG_CC_MAX,
+ MAX8971_CHG_CC_STEP, chgcc);
+ max8971_set_integer(priv, DCI_LMT, MAX8971_DCILMT_MIN, MAX8971_DCILMT_MAX,
+ MAX8971_DCILMT_STEP, dcilmt);
+
+ regmap_field_write(priv->rfield[CPROT], MAX8971_CHGPROT_LOCKED);
+}
+
+static int extcon_get_charger_type(struct notifier_block *nb,
+ unsigned long state, void *data)
+{
+ struct max8971_data *priv =
+ container_of(nb, struct max8971_data, extcon_nb);
+ schedule_delayed_work(&priv->extcon_work, 0);
+
+ return NOTIFY_OK;
+}
+
+static irqreturn_t max8971_interrupt(int irq, void *dev_id)
+{
+ struct max8971_data *priv = dev_id;
+ struct device *dev = priv->dev;
+ int err, state;
+
+ err = regmap_read(priv->regmap, MAX8971_REG_CHGINT, &state);
+ if (err)
+ dev_err(dev, "interrupt reg read failed %d\n", err);
+
+ err = regmap_write_bits(priv->regmap, MAX8971_REG_CHGINT_MASK,
+ MAX8971_AICL_MASK, MAX8971_AICL_MASK);
+ if (err)
+ dev_err(dev, "failed to mask IRQ\n");
+
+ /* set presence prop */
+ priv->present = state & MAX8971_REG_CHG_RST;
+
+ /* on every plug chip resets to default */
+ if (priv->present)
+ max8971_update_config(priv);
+
+ /* update supply status */
+ power_supply_changed(priv->psy_mains);
+
+ return IRQ_HANDLED;
+}
+
+static int max8971_probe(struct i2c_client *client)
+{
+ struct device *dev = &client->dev;
+ struct max8971_data *priv;
+ struct device_node *extcon;
+ struct power_supply_config cfg = { };
+ int err, i;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->dev = dev;
+ priv->usb_type = POWER_SUPPLY_USB_TYPE_UNKNOWN;
+
+ i2c_set_clientdata(client, priv);
+
+ priv->regmap = devm_regmap_init_i2c(client, &max8971_regmap_config);
+ if (IS_ERR(priv->regmap))
+ return dev_err_probe(dev, PTR_ERR(priv->regmap), "cannot allocate regmap\n");
+
+ for (i = 0; i < MAX8971_N_REGMAP_FIELDS; i++) {
+ priv->rfield[i] = devm_regmap_field_alloc(dev, priv->regmap, max8971_reg_field[i]);
+ if (IS_ERR(priv->rfield[i]))
+ return dev_err_probe(dev, PTR_ERR(priv->rfield[i]),
+ "cannot allocate regmap field\n");
+ }
+
+ cfg.attr_grp = max8971_groups;
+ cfg.drv_data = priv;
+ cfg.fwnode = dev_fwnode(dev);
+
+ priv->psy_mains = devm_power_supply_register(dev, &max8971_charger_desc, &cfg);
+ if (IS_ERR(priv->psy_mains))
+ return dev_err_probe(dev, PTR_ERR(priv->psy_mains),
+ "failed to register mains supply\n");
+
+ err = regmap_write_bits(priv->regmap, MAX8971_REG_CHGINT_MASK, MAX8971_AICL_MASK,
+ MAX8971_AICL_MASK);
+ if (err)
+ return dev_err_probe(dev, err, "failed to mask IRQ\n");
+
+ err = devm_request_threaded_irq(dev, client->irq, NULL, &max8971_interrupt,
+ IRQF_ONESHOT | IRQF_SHARED, client->name, priv);
+ if (err)
+ return dev_err_probe(dev, err, "failed to register IRQ %d\n", client->irq);
+
+ extcon = of_graph_get_remote_node(dev->of_node, -1, -1);
+ if (!extcon)
+ return 0;
+
+ priv->edev = extcon_find_edev_by_node(extcon);
+ of_node_put(extcon);
+ if (IS_ERR(priv->edev))
+ return dev_err_probe(dev, PTR_ERR(priv->edev), "failed to find extcon\n");
+
+ err = devm_delayed_work_autocancel(dev, &priv->extcon_work,
+ max8971_extcon_evt_worker);
+ if (err)
+ return dev_err_probe(dev, err, "failed to add extcon evt stop action\n");
+
+ priv->extcon_nb.notifier_call = extcon_get_charger_type;
+
+ err = devm_extcon_register_notifier_all(dev, priv->edev, &priv->extcon_nb);
+ if (err)
+ return dev_err_probe(dev, err, "failed to register notifier\n");
+
+ /* Initial configuration work with 1 sec delay */
+ schedule_delayed_work(&priv->extcon_work, msecs_to_jiffies(1000));
+
+ return 0;
+}
+
+static int __maybe_unused max8971_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct max8971_data *priv = i2c_get_clientdata(client);
+
+ irq_wake_thread(client->irq, priv);
+
+ return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(max8971_pm_ops, NULL, max8971_resume);
+
+static const struct of_device_id max8971_match_ids[] = {
+ { .compatible = "maxim,max8971" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, max8971_match_ids);
+
+static const struct i2c_device_id max8971_i2c_id[] = {
+ { "max8971" },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, max8971_i2c_id);
+
+static struct i2c_driver max8971_driver = {
+ .driver = {
+ .name = "max8971-charger",
+ .of_match_table = max8971_match_ids,
+ .pm = &max8971_pm_ops,
+ },
+ .probe = max8971_probe,
+ .id_table = max8971_i2c_id,
+};
+module_i2c_driver(max8971_driver);
+
+MODULE_AUTHOR("Svyatoslav Ryhel <clamor95@gmail.com>");
+MODULE_DESCRIPTION("MAX8971 Charger Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index 439dd0bf8644..a438f7983d4f 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -321,6 +321,27 @@ static ssize_t power_supply_show_charge_behaviour(struct device *dev,
value->intval, buf);
}
+static ssize_t power_supply_show_charge_types(struct device *dev,
+ struct power_supply *psy,
+ enum power_supply_charge_type current_type,
+ char *buf)
+{
+ struct power_supply_ext_registration *reg;
+
+ scoped_guard(rwsem_read, &psy->extensions_sem) {
+ power_supply_for_each_extension(reg, psy) {
+ if (power_supply_ext_has_property(reg->ext,
+ POWER_SUPPLY_PROP_CHARGE_TYPES))
+ return power_supply_charge_types_show(dev,
+ reg->ext->charge_types,
+ current_type, buf);
+ }
+ }
+
+ return power_supply_charge_types_show(dev, psy->desc->charge_types,
+ current_type, buf);
+}
+
static ssize_t power_supply_format_property(struct device *dev,
bool uevent,
struct device_attribute *attr,
@@ -365,7 +386,7 @@ static ssize_t power_supply_format_property(struct device *dev,
case POWER_SUPPLY_PROP_CHARGE_TYPES:
if (uevent) /* no possible values in uevents */
goto default_format;
- ret = power_supply_charge_types_show(dev, psy->desc->charge_types,
+ ret = power_supply_show_charge_types(dev, psy,
value.intval, buf);
break;
case POWER_SUPPLY_PROP_MODEL_NAME ... POWER_SUPPLY_PROP_SERIAL_NUMBER:
diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c
index 945c7720c4ae..1251022eb052 100644
--- a/drivers/power/supply/rk817_charger.c
+++ b/drivers/power/supply/rk817_charger.c
@@ -1088,7 +1088,7 @@ static int rk817_charger_probe(struct platform_device *pdev)
rk817_bat_calib_vol(charger);
pscfg.drv_data = charger;
- pscfg.fwnode = node ? &node->fwnode : NULL;
+ pscfg.fwnode = &node->fwnode;
/*
* Get sample resistor value. Note only values of 10000 or 20000
diff --git a/drivers/power/supply/rt9471.c b/drivers/power/supply/rt9471.c
index bd966abb4df5..e7f843f12c98 100644
--- a/drivers/power/supply/rt9471.c
+++ b/drivers/power/supply/rt9471.c
@@ -192,12 +192,12 @@ static const struct reg_field rt9471_reg_fields[F_MAX_FIELDS] = {
};
static const struct linear_range rt9471_chg_ranges[RT9471_MAX_RANGES] = {
- [RT9471_RANGE_AICR] = { .min = 50000, .min_sel = 1, .max_sel = 63, .step = 50000 },
- [RT9471_RANGE_MIVR] = { .min = 3900000, .min_sel = 0, .max_sel = 15, .step = 100000 },
- [RT9471_RANGE_IPRE] = { .min = 50000, .min_sel = 0, .max_sel = 15, .step = 50000 },
- [RT9471_RANGE_VCHG] = { .min = 3900000, .min_sel = 0, .max_sel = 80, .step = 10000 },
- [RT9471_RANGE_ICHG] = { .min = 0, .min_sel = 0, .max_sel = 63, .step = 50000 },
- [RT9471_RANGE_IEOC] = { .min = 50000, .min_sel = 0, .max_sel = 15, .step = 50000 },
+ [RT9471_RANGE_AICR] = LINEAR_RANGE(50000, 1, 63, 50000),
+ [RT9471_RANGE_MIVR] = LINEAR_RANGE(3900000, 0, 15, 100000),
+ [RT9471_RANGE_IPRE] = LINEAR_RANGE(50000, 0, 15, 50000),
+ [RT9471_RANGE_VCHG] = LINEAR_RANGE(3900000, 0, 80, 10000),
+ [RT9471_RANGE_ICHG] = LINEAR_RANGE(0, 0, 63, 50000),
+ [RT9471_RANGE_IEOC] = LINEAR_RANGE(50000, 0, 15, 50000),
};
static int rt9471_set_value_by_field_range(struct rt9471_chip *chip,
diff --git a/drivers/power/supply/test_power.c b/drivers/power/supply/test_power.c
index 2a975a110f48..b5f148081c51 100644
--- a/drivers/power/supply/test_power.c
+++ b/drivers/power/supply/test_power.c
@@ -37,6 +37,8 @@ static int battery_charge_counter = -1000;
static int battery_current = -1600;
static enum power_supply_charge_behaviour battery_charge_behaviour =
POWER_SUPPLY_CHARGE_BEHAVIOUR_AUTO;
+static enum power_supply_charge_type battery_charge_types =
+ POWER_SUPPLY_CHARGE_TYPE_STANDARD;
static bool battery_extension;
static bool module_initialized;
@@ -87,7 +89,7 @@ static int test_power_get_battery_property(struct power_supply *psy,
val->intval = battery_status;
break;
case POWER_SUPPLY_PROP_CHARGE_TYPE:
- val->intval = POWER_SUPPLY_CHARGE_TYPE_FAST;
+ val->intval = battery_charge_types;
break;
case POWER_SUPPLY_PROP_HEALTH:
val->intval = battery_health;
@@ -129,6 +131,9 @@ static int test_power_get_battery_property(struct power_supply *psy,
case POWER_SUPPLY_PROP_CHARGE_BEHAVIOUR:
val->intval = battery_charge_behaviour;
break;
+ case POWER_SUPPLY_PROP_CHARGE_TYPES:
+ val->intval = battery_charge_types;
+ break;
default:
pr_info("%s: some properties deliberately report errors.\n",
__func__);
@@ -140,7 +145,7 @@ static int test_power_get_battery_property(struct power_supply *psy,
static int test_power_battery_property_is_writeable(struct power_supply *psy,
enum power_supply_property psp)
{
- return psp == POWER_SUPPLY_PROP_CHARGE_BEHAVIOUR;
+ return psp == POWER_SUPPLY_PROP_CHARGE_BEHAVIOUR || psp == POWER_SUPPLY_PROP_CHARGE_TYPES;
}
static int test_power_set_battery_property(struct power_supply *psy,
@@ -156,6 +161,14 @@ static int test_power_set_battery_property(struct power_supply *psy,
}
battery_charge_behaviour = val->intval;
break;
+ case POWER_SUPPLY_PROP_CHARGE_TYPES:
+ if (val->intval < 0 ||
+ val->intval >= BITS_PER_TYPE(typeof(psy->desc->charge_types)) ||
+ !(BIT(val->intval) & psy->desc->charge_types)) {
+ return -EINVAL;
+ }
+ battery_charge_types = val->intval;
+ break;
default:
return -EINVAL;
}
@@ -188,6 +201,7 @@ static enum power_supply_property test_power_battery_props[] = {
POWER_SUPPLY_PROP_CURRENT_AVG,
POWER_SUPPLY_PROP_CURRENT_NOW,
POWER_SUPPLY_PROP_CHARGE_BEHAVIOUR,
+ POWER_SUPPLY_PROP_CHARGE_TYPES,
};
static char *test_power_ac_supplied_to[] = {
@@ -215,6 +229,8 @@ static const struct power_supply_desc test_power_desc[] = {
.charge_behaviours = BIT(POWER_SUPPLY_CHARGE_BEHAVIOUR_AUTO)
| BIT(POWER_SUPPLY_CHARGE_BEHAVIOUR_INHIBIT_CHARGE)
| BIT(POWER_SUPPLY_CHARGE_BEHAVIOUR_FORCE_DISCHARGE),
+ .charge_types = BIT(POWER_SUPPLY_CHARGE_TYPE_STANDARD)
+ | BIT(POWER_SUPPLY_CHARGE_TYPE_LONGLIFE)
},
[TEST_USB] = {
.name = "test_usb",
diff --git a/drivers/power/supply/wm831x_power.c b/drivers/power/supply/wm831x_power.c
index 538055b29dec..6acdba7885ca 100644
--- a/drivers/power/supply/wm831x_power.c
+++ b/drivers/power/supply/wm831x_power.c
@@ -89,7 +89,7 @@ static int wm831x_wall_get_prop(struct power_supply *psy,
return ret;
}
-static enum power_supply_property wm831x_wall_props[] = {
+static const enum power_supply_property wm831x_wall_props[] = {
POWER_SUPPLY_PROP_ONLINE,
POWER_SUPPLY_PROP_VOLTAGE_NOW,
};
@@ -120,7 +120,7 @@ static int wm831x_usb_get_prop(struct power_supply *psy,
return ret;
}
-static enum power_supply_property wm831x_usb_props[] = {
+static const enum power_supply_property wm831x_usb_props[] = {
POWER_SUPPLY_PROP_ONLINE,
POWER_SUPPLY_PROP_VOLTAGE_NOW,
};
@@ -171,21 +171,21 @@ struct chg_map {
int reg_val;
};
-static struct chg_map trickle_ilims[] = {
+static const struct chg_map trickle_ilims[] = {
{ 50, 0 << WM831X_CHG_TRKL_ILIM_SHIFT },
{ 100, 1 << WM831X_CHG_TRKL_ILIM_SHIFT },
{ 150, 2 << WM831X_CHG_TRKL_ILIM_SHIFT },
{ 200, 3 << WM831X_CHG_TRKL_ILIM_SHIFT },
};
-static struct chg_map vsels[] = {
+static const struct chg_map vsels[] = {
{ 4050, 0 << WM831X_CHG_VSEL_SHIFT },
{ 4100, 1 << WM831X_CHG_VSEL_SHIFT },
{ 4150, 2 << WM831X_CHG_VSEL_SHIFT },
{ 4200, 3 << WM831X_CHG_VSEL_SHIFT },
};
-static struct chg_map fast_ilims[] = {
+static const struct chg_map fast_ilims[] = {
{ 0, 0 << WM831X_CHG_FAST_ILIM_SHIFT },
{ 50, 1 << WM831X_CHG_FAST_ILIM_SHIFT },
{ 100, 2 << WM831X_CHG_FAST_ILIM_SHIFT },
@@ -204,7 +204,7 @@ static struct chg_map fast_ilims[] = {
{ 1000, 15 << WM831X_CHG_FAST_ILIM_SHIFT },
};
-static struct chg_map eoc_iterms[] = {
+static const struct chg_map eoc_iterms[] = {
{ 20, 0 << WM831X_CHG_ITERM_SHIFT },
{ 30, 1 << WM831X_CHG_ITERM_SHIFT },
{ 40, 2 << WM831X_CHG_ITERM_SHIFT },
@@ -215,7 +215,7 @@ static struct chg_map eoc_iterms[] = {
{ 90, 7 << WM831X_CHG_ITERM_SHIFT },
};
-static struct chg_map chg_times[] = {
+static const struct chg_map chg_times[] = {
{ 60, 0 << WM831X_CHG_TIME_SHIFT },
{ 90, 1 << WM831X_CHG_TIME_SHIFT },
{ 120, 2 << WM831X_CHG_TIME_SHIFT },
@@ -235,7 +235,7 @@ static struct chg_map chg_times[] = {
};
static void wm831x_battery_apply_config(struct wm831x *wm831x,
- struct chg_map *map, int count, int val,
+ const struct chg_map *map, int count, int val,
int *reg, const char *name,
const char *units)
{
@@ -462,7 +462,7 @@ static int wm831x_bat_get_prop(struct power_supply *psy,
return ret;
}
-static enum power_supply_property wm831x_bat_props[] = {
+static const enum power_supply_property wm831x_bat_props[] = {
POWER_SUPPLY_PROP_STATUS,
POWER_SUPPLY_PROP_ONLINE,
POWER_SUPPLY_PROP_VOLTAGE_NOW,
@@ -470,7 +470,7 @@ static enum power_supply_property wm831x_bat_props[] = {
POWER_SUPPLY_PROP_CHARGE_TYPE,
};
-static const char *wm831x_bat_irqs[] = {
+static const char * const wm831x_bat_irqs[] = {
"BATT HOT",
"BATT COLD",
"BATT FAIL",
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 5ab3feb29686..e3be40adc0d7 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -28,6 +28,7 @@
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/iosf_mbi.h>
+#include <asm/msr.h>
/* bitmasks for RAPL MSRs, used by primitive access functions */
#define ENERGY_STATUS_MASK 0xffffffff
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 2b81aabdb0db..8ad2115d65f6 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -24,6 +24,7 @@
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/msr.h>
/* Local defines */
#define MSR_PLATFORM_POWER_LIMIT 0x0000065C
@@ -103,7 +104,7 @@ static int rapl_cpu_down_prep(unsigned int cpu)
static int rapl_msr_read_raw(int cpu, struct reg_action *ra)
{
- if (rdmsrl_safe_on_cpu(cpu, ra->reg.msr, &ra->value)) {
+ if (rdmsrq_safe_on_cpu(cpu, ra->reg.msr, &ra->value)) {
pr_debug("failed to read msr 0x%x on cpu %d\n", ra->reg.msr, cpu);
return -EIO;
}
@@ -116,14 +117,14 @@ static void rapl_msr_update_func(void *info)
struct reg_action *ra = info;
u64 val;
- ra->err = rdmsrl_safe(ra->reg.msr, &val);
+ ra->err = rdmsrq_safe(ra->reg.msr, &val);
if (ra->err)
return;
val &= ~ra->mask;
val |= ra->value;
- ra->err = wrmsrl_safe(ra->reg.msr, val);
+ ra->err = wrmsrq_safe(ra->reg.msr, val);
}
static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index 2ccdca4f6960..e63481f24238 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -315,6 +315,8 @@ struct ptp_ocp_serial_port {
#define OCP_BOARD_ID_LEN 13
#define OCP_SERIAL_LEN 6
#define OCP_SMA_NUM 4
+#define OCP_SIGNAL_NUM 4
+#define OCP_FREQ_NUM 4
enum {
PORT_GNSS,
@@ -342,8 +344,8 @@ struct ptp_ocp {
struct dcf_master_reg __iomem *dcf_out;
struct dcf_slave_reg __iomem *dcf_in;
struct tod_reg __iomem *nmea_out;
- struct frequency_reg __iomem *freq_in[4];
- struct ptp_ocp_ext_src *signal_out[4];
+ struct frequency_reg __iomem *freq_in[OCP_FREQ_NUM];
+ struct ptp_ocp_ext_src *signal_out[OCP_SIGNAL_NUM];
struct ptp_ocp_ext_src *pps;
struct ptp_ocp_ext_src *ts0;
struct ptp_ocp_ext_src *ts1;
@@ -378,10 +380,12 @@ struct ptp_ocp {
u32 utc_tai_offset;
u32 ts_window_adjust;
u64 fw_cap;
- struct ptp_ocp_signal signal[4];
+ struct ptp_ocp_signal signal[OCP_SIGNAL_NUM];
struct ptp_ocp_sma_connector sma[OCP_SMA_NUM];
const struct ocp_sma_op *sma_op;
struct dpll_device *dpll;
+ int signals_nr;
+ int freq_in_nr;
};
#define OCP_REQ_TIMESTAMP BIT(0)
@@ -2697,6 +2701,8 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
bp->eeprom_map = fb_eeprom_map;
bp->fw_version = ioread32(&bp->image->version);
bp->sma_op = &ocp_fb_sma_op;
+ bp->signals_nr = 4;
+ bp->freq_in_nr = 4;
ptp_ocp_fb_set_version(bp);
@@ -2862,6 +2868,8 @@ ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
bp->fw_version = ioread32(&bp->reg->version);
bp->fw_tag = 2;
bp->sma_op = &ocp_art_sma_op;
+ bp->signals_nr = 4;
+ bp->freq_in_nr = 4;
/* Enable MAC serial port during initialisation */
iowrite32(1, &bp->board_config->mro50_serial_activate);
@@ -2888,6 +2896,8 @@ ptp_ocp_adva_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
bp->flash_start = 0xA00000;
bp->eeprom_map = fb_eeprom_map;
bp->sma_op = &ocp_adva_sma_op;
+ bp->signals_nr = 2;
+ bp->freq_in_nr = 2;
version = ioread32(&bp->image->version);
/* if lower 16 bits are empty, this is the fw loader. */
@@ -4008,7 +4018,7 @@ _signal_summary_show(struct seq_file *s, struct ptp_ocp *bp, int nr)
{
struct signal_reg __iomem *reg = bp->signal_out[nr]->mem;
struct ptp_ocp_signal *signal = &bp->signal[nr];
- char label[8];
+ char label[16];
bool on;
u32 val;
@@ -4031,7 +4041,7 @@ static void
_frequency_summary_show(struct seq_file *s, int nr,
struct frequency_reg __iomem *reg)
{
- char label[8];
+ char label[16];
bool on;
u32 val;
@@ -4175,11 +4185,11 @@ ptp_ocp_summary_show(struct seq_file *s, void *data)
}
if (bp->fw_cap & OCP_CAP_SIGNAL)
- for (i = 0; i < 4; i++)
+ for (i = 0; i < bp->signals_nr; i++)
_signal_summary_show(s, bp, i);
if (bp->fw_cap & OCP_CAP_FREQ)
- for (i = 0; i < 4; i++)
+ for (i = 0; i < bp->freq_in_nr; i++)
_frequency_summary_show(s, i, bp->freq_in[i]);
if (bp->irig_out) {
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 4731d5b90d7e..d9bcd1e8413e 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -114,6 +114,16 @@ config PWM_AXI_PWMGEN
To compile this driver as a module, choose M here: the module will be
called pwm-axi-pwmgen.
+config PWM_BCM2835
+ tristate "BCM2835 PWM support"
+ depends on ARCH_BCM2835 || ARCH_BRCMSTB || COMPILE_TEST
+ depends on HAS_IOMEM
+ help
+ PWM framework driver for BCM2835 controller (Raspberry Pi)
+
+ To compile this driver as a module, choose M here: the module
+ will be called pwm-bcm2835.
+
config PWM_BCM_IPROC
tristate "iProc PWM support"
depends on ARCH_BCM_IPROC || COMPILE_TEST
@@ -137,16 +147,6 @@ config PWM_BCM_KONA
To compile this driver as a module, choose M here: the module
will be called pwm-bcm-kona.
-config PWM_BCM2835
- tristate "BCM2835 PWM support"
- depends on ARCH_BCM2835 || ARCH_BRCMSTB || COMPILE_TEST
- depends on HAS_IOMEM
- help
- PWM framework driver for BCM2835 controller (Raspberry Pi)
-
- To compile this driver as a module, choose M here: the module
- will be called pwm-bcm2835.
-
config PWM_BERLIN
tristate "Marvell Berlin PWM support"
depends on ARCH_BERLIN || COMPILE_TEST
@@ -351,6 +351,18 @@ config PWM_KEEMBAY
To compile this driver as a module, choose M here: the module
will be called pwm-keembay.
+config PWM_LOONGSON
+ tristate "Loongson PWM support"
+ depends on MACH_LOONGSON64 || COMPILE_TEST
+ depends on COMMON_CLK
+ help
+ Generic PWM framework driver for Loongson family.
+ It can be found on Loongson-2K series cpus and Loongson LS7A
+ bridge chips.
+
+ To compile this driver as a module, choose M here: the module
+ will be called pwm-loongson.
+
config PWM_LP3943
tristate "TI/National Semiconductor LP3943 PWM support"
depends on MFD_LP3943
@@ -411,26 +423,17 @@ config PWM_LPSS_PLATFORM
To compile this driver as a module, choose M here: the module
will be called pwm-lpss-platform.
-config PWM_MESON
- tristate "Amlogic Meson PWM driver"
- depends on ARCH_MESON || COMPILE_TEST
- depends on COMMON_CLK && HAS_IOMEM
- help
- The platform driver for Amlogic Meson PWM controller.
-
- To compile this driver as a module, choose M here: the module
- will be called pwm-meson.
-
-config PWM_MTK_DISP
- tristate "MediaTek display PWM driver"
- depends on ARCH_MEDIATEK || COMPILE_TEST
- depends on HAS_IOMEM
+config PWM_MC33XS2410
+ tristate "MC33XS2410 PWM support"
+ depends on OF
+ depends on SPI
help
- Generic PWM framework driver for MediaTek disp-pwm device.
- The PWM is used to control the backlight brightness for display.
+ NXP MC33XS2410 high-side switch driver. The MC33XS2410 is a four
+ channel high-side switch. The device is operational from 3.0 V
+ to 60 V. The device is controlled by SPI port for configuration.
To compile this driver as a module, choose M here: the module
- will be called pwm-mtk-disp.
+ will be called pwm-mc33xs2410.
config PWM_MEDIATEK
tristate "MediaTek PWM support"
@@ -442,6 +445,16 @@ config PWM_MEDIATEK
To compile this driver as a module, choose M here: the module
will be called pwm-mediatek.
+config PWM_MESON
+ tristate "Amlogic Meson PWM driver"
+ depends on ARCH_MESON || COMPILE_TEST
+ depends on COMMON_CLK && HAS_IOMEM
+ help
+ The platform driver for Amlogic Meson PWM controller.
+
+ To compile this driver as a module, choose M here: the module
+ will be called pwm-meson.
+
config PWM_MICROCHIP_CORE
tristate "Microchip corePWM PWM support"
depends on ARCH_MICROCHIP_POLARFIRE || COMPILE_TEST
@@ -452,6 +465,17 @@ config PWM_MICROCHIP_CORE
To compile this driver as a module, choose M here: the module
will be called pwm-microchip-core.
+config PWM_MTK_DISP
+ tristate "MediaTek display PWM driver"
+ depends on ARCH_MEDIATEK || COMPILE_TEST
+ depends on HAS_IOMEM
+ help
+ Generic PWM framework driver for MediaTek disp-pwm device.
+ The PWM is used to control the backlight brightness for display.
+
+ To compile this driver as a module, choose M here: the module
+ will be called pwm-mtk-disp.
+
config PWM_MXS
tristate "Freescale MXS PWM support"
depends on ARCH_MXS || COMPILE_TEST
@@ -510,7 +534,7 @@ config PWM_RASPBERRYPI_POE
Enable Raspberry Pi firmware controller PWM bus used to control the
official RPI PoE hat
-config PWM_RCAR
+config PWM_RENESAS_RCAR
tristate "Renesas R-Car PWM support"
depends on ARCH_RENESAS || COMPILE_TEST
depends on HAS_IOMEM
@@ -521,6 +545,28 @@ config PWM_RCAR
To compile this driver as a module, choose M here: the module
will be called pwm-rcar.
+config PWM_RENESAS_RZG2L_GPT
+ tristate "Renesas RZ/G2L General PWM Timer support"
+ depends on ARCH_RZG2L || COMPILE_TEST
+ depends on HAS_IOMEM
+ help
+ This driver exposes the General PWM Timer controller found in Renesas
+ RZ/G2L like chips through the PWM API.
+
+ To compile this driver as a module, choose M here: the module
+ will be called pwm-rzg2l-gpt.
+
+config PWM_RENESAS_RZ_MTU3
+ tristate "Renesas RZ/G2L MTU3a PWM Timer support"
+ depends on RZ_MTU3
+ depends on HAS_IOMEM
+ help
+ This driver exposes the MTU3a PWM Timer controller found in Renesas
+ RZ/G2L like chips through the PWM API.
+
+ To compile this driver as a module, choose M here: the module
+ will be called pwm-rz-mtu3.
+
config PWM_RENESAS_TPU
tristate "Renesas TPU PWM support"
depends on ARCH_RENESAS || COMPILE_TEST
@@ -540,17 +586,6 @@ config PWM_ROCKCHIP
Generic PWM framework driver for the PWM controller found on
Rockchip SoCs.
-config PWM_RZ_MTU3
- tristate "Renesas RZ/G2L MTU3a PWM Timer support"
- depends on RZ_MTU3
- depends on HAS_IOMEM
- help
- This driver exposes the MTU3a PWM Timer controller found in Renesas
- RZ/G2L like chips through the PWM API.
-
- To compile this driver as a module, choose M here: the module
- will be called pwm-rz-mtu3.
-
config PWM_SAMSUNG
tristate "Samsung PWM support"
depends on PLAT_SAMSUNG || ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index 539e0def3f82..96160f4257fc 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -7,9 +7,9 @@ obj-$(CONFIG_PWM_ATMEL) += pwm-atmel.o
obj-$(CONFIG_PWM_ATMEL_HLCDC_PWM) += pwm-atmel-hlcdc.o
obj-$(CONFIG_PWM_ATMEL_TCB) += pwm-atmel-tcb.o
obj-$(CONFIG_PWM_AXI_PWMGEN) += pwm-axi-pwmgen.o
+obj-$(CONFIG_PWM_BCM2835) += pwm-bcm2835.o
obj-$(CONFIG_PWM_BCM_IPROC) += pwm-bcm-iproc.o
obj-$(CONFIG_PWM_BCM_KONA) += pwm-bcm-kona.o
-obj-$(CONFIG_PWM_BCM2835) += pwm-bcm2835.o
obj-$(CONFIG_PWM_BERLIN) += pwm-berlin.o
obj-$(CONFIG_PWM_BRCMSTB) += pwm-brcmstb.o
obj-$(CONFIG_PWM_CLK) += pwm-clk.o
@@ -30,14 +30,16 @@ obj-$(CONFIG_PWM_INTEL_LGM) += pwm-intel-lgm.o
obj-$(CONFIG_PWM_IQS620A) += pwm-iqs620a.o
obj-$(CONFIG_PWM_JZ4740) += pwm-jz4740.o
obj-$(CONFIG_PWM_KEEMBAY) += pwm-keembay.o
+obj-$(CONFIG_PWM_LOONGSON) += pwm-loongson.o
obj-$(CONFIG_PWM_LP3943) += pwm-lp3943.o
obj-$(CONFIG_PWM_LPC18XX_SCT) += pwm-lpc18xx-sct.o
obj-$(CONFIG_PWM_LPC32XX) += pwm-lpc32xx.o
obj-$(CONFIG_PWM_LPSS) += pwm-lpss.o
obj-$(CONFIG_PWM_LPSS_PCI) += pwm-lpss-pci.o
obj-$(CONFIG_PWM_LPSS_PLATFORM) += pwm-lpss-platform.o
-obj-$(CONFIG_PWM_MESON) += pwm-meson.o
+obj-$(CONFIG_PWM_MC33XS2410) += pwm-mc33xs2410.o
obj-$(CONFIG_PWM_MEDIATEK) += pwm-mediatek.o
+obj-$(CONFIG_PWM_MESON) += pwm-meson.o
obj-$(CONFIG_PWM_MICROCHIP_CORE) += pwm-microchip-core.o
obj-$(CONFIG_PWM_MTK_DISP) += pwm-mtk-disp.o
obj-$(CONFIG_PWM_MXS) += pwm-mxs.o
@@ -46,10 +48,11 @@ obj-$(CONFIG_PWM_OMAP_DMTIMER) += pwm-omap-dmtimer.o
obj-$(CONFIG_PWM_PCA9685) += pwm-pca9685.o
obj-$(CONFIG_PWM_PXA) += pwm-pxa.o
obj-$(CONFIG_PWM_RASPBERRYPI_POE) += pwm-raspberrypi-poe.o
-obj-$(CONFIG_PWM_RCAR) += pwm-rcar.o
+obj-$(CONFIG_PWM_RENESAS_RCAR) += pwm-rcar.o
+obj-$(CONFIG_PWM_RENESAS_RZG2L_GPT) += pwm-rzg2l-gpt.o
+obj-$(CONFIG_PWM_RENESAS_RZ_MTU3) += pwm-rz-mtu3.o
obj-$(CONFIG_PWM_RENESAS_TPU) += pwm-renesas-tpu.o
obj-$(CONFIG_PWM_ROCKCHIP) += pwm-rockchip.o
-obj-$(CONFIG_PWM_RZ_MTU3) += pwm-rz-mtu3.o
obj-$(CONFIG_PWM_SAMSUNG) += pwm-samsung.o
obj-$(CONFIG_PWM_SIFIVE) += pwm-sifive.o
obj-$(CONFIG_PWM_SL28CPLD) += pwm-sl28cpld.o
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 0387bd838487..4d842c692194 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -216,21 +216,28 @@ static int __pwm_write_waveform(struct pwm_chip *chip, struct pwm_device *pwm, c
*
* Typically a given waveform cannot be implemented exactly by hardware, e.g.
* because hardware only supports coarse period resolution or no duty_offset.
- * This function returns the actually implemented waveform if you pass wf to
- * pwm_set_waveform_might_sleep now.
+ * This function returns the actually implemented waveform if you pass @wf to
+ * pwm_set_waveform_might_sleep() now.
*
* Note however that the world doesn't stop turning when you call it, so when
- * doing
+ * doing::
*
- * pwm_round_waveform_might_sleep(mypwm, &wf);
- * pwm_set_waveform_might_sleep(mypwm, &wf, true);
+ * pwm_round_waveform_might_sleep(mypwm, &wf);
+ * pwm_set_waveform_might_sleep(mypwm, &wf, true);
*
* the latter might fail, e.g. because an input clock changed its rate between
* these two calls and the waveform determined by
* pwm_round_waveform_might_sleep() cannot be implemented any more.
*
- * Returns 0 on success, 1 if there is no valid hardware configuration matching
- * the input waveform under the PWM rounding rules or a negative errno.
+ * Usually all values passed in @wf are rounded down to the nearest possible
+ * value (in the order period_length_ns, duty_length_ns and then
+ * duty_offset_ns). Only if this isn't possible, a value might grow. See the
+ * documentation for pwm_set_waveform_might_sleep() for a more formal
+ * description.
+ *
+ * Returns: 0 on success, 1 if at least one value had to be rounded up or a
+ * negative errno.
+ * Context: May sleep.
*/
int pwm_round_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf)
{
@@ -270,10 +277,10 @@ int pwm_round_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *
wf_req.duty_length_ns, wf_req.period_length_ns, wf_req.duty_offset_ns, ret_tohw);
if (IS_ENABLED(CONFIG_PWM_DEBUG) &&
- ret_tohw == 0 && !pwm_check_rounding(&wf_req, wf))
- dev_err(&chip->dev, "Wrong rounding: requested %llu/%llu [+%llu], result %llu/%llu [+%llu]\n",
+ (ret_tohw == 0) != pwm_check_rounding(&wf_req, wf))
+ dev_err(&chip->dev, "Wrong rounding: requested %llu/%llu [+%llu], result %llu/%llu [+%llu], ret: %d\n",
wf_req.duty_length_ns, wf_req.period_length_ns, wf_req.duty_offset_ns,
- wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns);
+ wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns, ret_tohw);
return ret_tohw;
}
@@ -287,6 +294,9 @@ EXPORT_SYMBOL_GPL(pwm_round_waveform_might_sleep);
*
* Stores the current configuration of the PWM in @wf. Note this is the
* equivalent of pwm_get_state_hw() (and not pwm_get_state()) for pwm_waveform.
+ *
+ * Returns: 0 on success or a negative errno
+ * Context: May sleep.
*/
int pwm_get_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf)
{
@@ -341,10 +351,10 @@ static int __pwm_set_waveform(struct pwm_device *pwm,
if (err)
return err;
- if (IS_ENABLED(CONFIG_PWM_DEBUG) && ret_tohw == 0 && !pwm_check_rounding(wf, &wf_rounded))
- dev_err(&chip->dev, "Wrong rounding: requested %llu/%llu [+%llu], result %llu/%llu [+%llu]\n",
+ if (IS_ENABLED(CONFIG_PWM_DEBUG) && (ret_tohw == 0) != pwm_check_rounding(wf, &wf_rounded))
+ dev_err(&chip->dev, "Wrong rounding: requested %llu/%llu [+%llu], result %llu/%llu [+%llu], ret: %d\n",
wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns,
- wf_rounded.duty_length_ns, wf_rounded.period_length_ns, wf_rounded.duty_offset_ns);
+ wf_rounded.duty_length_ns, wf_rounded.period_length_ns, wf_rounded.duty_offset_ns, ret_tohw);
if (exact && pwmwfcmp(wf, &wf_rounded)) {
dev_dbg(&chip->dev, "Requested no rounding, but %llu/%llu [+%llu] -> %llu/%llu [+%llu]\n",
@@ -395,13 +405,37 @@ static int __pwm_set_waveform(struct pwm_device *pwm,
*
* Typically a requested waveform cannot be implemented exactly, e.g. because
* you requested .period_length_ns = 100 ns, but the hardware can only set
- * periods that are a multiple of 8.5 ns. With that hardware passing exact =
- * true results in pwm_set_waveform_might_sleep() failing and returning 1. If
- * exact = false you get a period of 93.5 ns (i.e. the biggest period not bigger
- * than the requested value).
- * Note that even with exact = true, some rounding by less than 1 is
+ * periods that are a multiple of 8.5 ns. With that hardware passing @exact =
+ * true results in pwm_set_waveform_might_sleep() failing and returning -EDOM.
+ * If @exact = false you get a period of 93.5 ns (i.e. the biggest period not
+ * bigger than the requested value).
+ * Note that even with @exact = true, some rounding by less than 1 ns is
* possible/needed. In the above example requesting .period_length_ns = 94 and
- * exact = true, you get the hardware configured with period = 93.5 ns.
+ * @exact = true, you get the hardware configured with period = 93.5 ns.
+ *
+ * Let C be the set of possible hardware configurations for a given PWM device,
+ * consisting of tuples (p, d, o) where p is the period length, d is the duty
+ * length and o the duty offset.
+ *
+ * The following algorithm is implemented to pick the hardware setting
+ * (p, d, o) ∈ C for a given request (p', d', o') with @exact = false::
+ *
+ * p = max( { ṗ | ∃ ḋ, ȯ : (ṗ, ḋ, ȯ) ∈ C ∧ ṗ ≤ p' } ∪ { min({ ṗ | ∃ ḋ, ȯ : (ṗ, ḋ, ȯ) ∈ C }) })
+ * d = max( { ḋ | ∃ ȯ : (p, ḋ, ȯ) ∈ C ∧ ḋ ≤ d' } ∪ { min({ ḋ | ∃ ȯ : (p, ḋ, ȯ) ∈ C }) })
+ * o = max( { ȯ | (p, d, ȯ) ∈ C ∧ ȯ ≤ o' } ∪ { min({ ȯ | (p, d, ȯ) ∈ C }) })
+ *
+ * In words: The chosen period length is the maximal possible period length not
+ * bigger than the requested period length and if that doesn't exist, the
+ * minimal period length. The chosen duty length is the maximal possible duty
+ * length that is compatible with the chosen period length and isn't bigger than
+ * the requested duty length. Again if such a value doesn't exist, the minimal
+ * duty length compatible with the chosen period is picked. After that the duty
+ * offset compatible with the chosen period and duty length is chosen in the
+ * same way.
+ *
+ * Returns: 0 on success, -EDOM if setting failed due to the exact waveform not
+ * being possible (if @exact), or a different negative errno on failure.
+ * Context: May sleep.
*/
int pwm_set_waveform_might_sleep(struct pwm_device *pwm,
const struct pwm_waveform *wf, bool exact)
@@ -428,6 +462,19 @@ int pwm_set_waveform_might_sleep(struct pwm_device *pwm,
err = __pwm_set_waveform(pwm, wf, exact);
}
+ /*
+ * map err == 1 to -EDOM for exact requests and 0 for !exact ones. Also
+ * make sure that -EDOM is only returned in exactly that case. Note that
+ * __pwm_set_waveform() should never return -EDOM which justifies the
+ * unlikely().
+ */
+ if (unlikely(err == -EDOM))
+ err = -EINVAL;
+ else if (exact && err == 1)
+ err = -EDOM;
+ else if (err == 1)
+ err = 0;
+
return err;
}
EXPORT_SYMBOL_GPL(pwm_set_waveform_might_sleep);
@@ -561,11 +608,6 @@ static bool pwm_state_valid(const struct pwm_state *state)
return true;
}
-/**
- * __pwm_apply() - atomically apply a new state to a PWM device
- * @pwm: PWM device
- * @state: new state to apply
- */
static int __pwm_apply(struct pwm_device *pwm, const struct pwm_state *state)
{
struct pwm_chip *chip;
@@ -674,6 +716,9 @@ static int __pwm_apply(struct pwm_device *pwm, const struct pwm_state *state)
* Cannot be used in atomic context.
* @pwm: PWM device
* @state: new state to apply
+ *
+ * Returns: 0 on success, or a negative errno
+ * Context: May sleep.
*/
int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state)
{
@@ -715,6 +760,9 @@ EXPORT_SYMBOL_GPL(pwm_apply_might_sleep);
* Not all PWM devices support this function, check with pwm_might_sleep().
* @pwm: PWM device
* @state: new state to apply
+ *
+ * Returns: 0 on success, or a negative errno
+ * Context: Any
*/
int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state)
{
@@ -788,6 +836,9 @@ EXPORT_SYMBOL_GPL(pwm_get_state_hw);
* This function will adjust the PWM config to the PWM arguments provided
* by the DT or PWM lookup table. This is particularly useful to adapt
* the bootloader config to the Linux one.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ * Context: May sleep.
*/
int pwm_adjust_config(struct pwm_device *pwm)
{
@@ -2221,25 +2272,28 @@ static void pwm_dbg_show(struct pwm_chip *chip, struct seq_file *s)
for (i = 0; i < chip->npwm; i++) {
struct pwm_device *pwm = &chip->pwms[i];
- struct pwm_state state;
+ struct pwm_state state, hwstate;
pwm_get_state(pwm, &state);
+ pwm_get_state_hw(pwm, &hwstate);
seq_printf(s, " pwm-%-3d (%-20.20s):", i, pwm->label);
if (test_bit(PWMF_REQUESTED, &pwm->flags))
seq_puts(s, " requested");
- if (state.enabled)
- seq_puts(s, " enabled");
+ seq_puts(s, "\n");
- seq_printf(s, " period: %llu ns", state.period);
- seq_printf(s, " duty: %llu ns", state.duty_cycle);
- seq_printf(s, " polarity: %s",
+ seq_printf(s, " requested configuration: %3sabled, %llu/%llu ns, %s polarity",
+ state.enabled ? "en" : "dis", state.duty_cycle, state.period,
state.polarity ? "inverse" : "normal");
-
if (state.usage_power)
- seq_puts(s, " usage_power");
+ seq_puts(s, ", usage_power");
+ seq_puts(s, "\n");
+
+ seq_printf(s, " actual configuration: %3sabled, %llu/%llu ns, %s polarity",
+ hwstate.enabled ? "en" : "dis", hwstate.duty_cycle, hwstate.period,
+ hwstate.polarity ? "inverse" : "normal");
seq_puts(s, "\n");
}
diff --git a/drivers/pwm/pwm-adp5585.c b/drivers/pwm/pwm-adp5585.c
index 40472ac5db64..d79106d12181 100644
--- a/drivers/pwm/pwm-adp5585.c
+++ b/drivers/pwm/pwm-adp5585.c
@@ -20,6 +20,7 @@
#include <linux/mfd/adp5585.h>
#include <linux/minmax.h>
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/regmap.h>
diff --git a/drivers/pwm/pwm-loongson.c b/drivers/pwm/pwm-loongson.c
new file mode 100644
index 000000000000..1ba16168cbb4
--- /dev/null
+++ b/drivers/pwm/pwm-loongson.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2025 Loongson Technology Corporation Limited.
+ *
+ * Loongson PWM driver
+ *
+ * For Loongson's PWM IP block documentation please refer Chapter 11 of
+ * Reference Manual: https://loongson.github.io/LoongArch-Documentation/Loongson-7A1000-usermanual-EN.pdf
+ *
+ * Author: Juxin Gao <gaojuxin@loongson.cn>
+ * Further cleanup and restructuring by:
+ * Binbin Zhou <zhoubinbin@loongson.cn>
+ *
+ * Limitations:
+ * - If both DUTY and PERIOD are set to 0, the output is a constant low signal.
+ * - When disabled the output is driven to 0 independent of the configured
+ * polarity.
+ * - If the register is reconfigured while PWM is running, it does not complete
+ * the currently running period.
+ * - Disabling the PWM stops the output immediately (without waiting for current
+ * period to complete first).
+ */
+
+#include <linux/acpi.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/units.h>
+
+/* Loongson PWM registers */
+#define LOONGSON_PWM_REG_DUTY 0x4 /* Low Pulse Buffer Register */
+#define LOONGSON_PWM_REG_PERIOD 0x8 /* Pulse Period Buffer Register */
+#define LOONGSON_PWM_REG_CTRL 0xc /* Control Register */
+
+/* Control register bits */
+#define LOONGSON_PWM_CTRL_REG_EN BIT(0) /* Counter Enable Bit */
+#define LOONGSON_PWM_CTRL_REG_OE BIT(3) /* Pulse Output Enable Control Bit, Valid Low */
+#define LOONGSON_PWM_CTRL_REG_SINGLE BIT(4) /* Single Pulse Control Bit */
+#define LOONGSON_PWM_CTRL_REG_INTE BIT(5) /* Interrupt Enable Bit */
+#define LOONGSON_PWM_CTRL_REG_INT BIT(6) /* Interrupt Bit */
+#define LOONGSON_PWM_CTRL_REG_RST BIT(7) /* Counter Reset Bit */
+#define LOONGSON_PWM_CTRL_REG_CAPTE BIT(8) /* Measurement Pulse Enable Bit */
+#define LOONGSON_PWM_CTRL_REG_INVERT BIT(9) /* Output flip-flop Enable Bit */
+#define LOONGSON_PWM_CTRL_REG_DZONE BIT(10) /* Anti-dead Zone Enable Bit */
+
+/* default input clk frequency for the ACPI case */
+#define LOONGSON_PWM_FREQ_DEFAULT 50000 /* Hz */
+
+struct pwm_loongson_ddata {
+ struct clk *clk;
+ void __iomem *base;
+ u64 clk_rate;
+};
+
+static inline __pure struct pwm_loongson_ddata *to_pwm_loongson_ddata(struct pwm_chip *chip)
+{
+ return pwmchip_get_drvdata(chip);
+}
+
+static inline u32 pwm_loongson_readl(struct pwm_loongson_ddata *ddata, u32 offset)
+{
+ return readl(ddata->base + offset);
+}
+
+static inline void pwm_loongson_writel(struct pwm_loongson_ddata *ddata,
+ u32 val, u32 offset)
+{
+ writel(val, ddata->base + offset);
+}
+
+static int pwm_loongson_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
+ enum pwm_polarity polarity)
+{
+ u16 val;
+ struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip);
+
+ val = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_CTRL);
+
+ if (polarity == PWM_POLARITY_INVERSED)
+ /* Duty cycle defines LOW period of PWM */
+ val |= LOONGSON_PWM_CTRL_REG_INVERT;
+ else
+ /* Duty cycle defines HIGH period of PWM */
+ val &= ~LOONGSON_PWM_CTRL_REG_INVERT;
+
+ pwm_loongson_writel(ddata, val, LOONGSON_PWM_REG_CTRL);
+
+ return 0;
+}
+
+static void pwm_loongson_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+ u32 val;
+ struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip);
+
+ val = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_CTRL);
+ val &= ~LOONGSON_PWM_CTRL_REG_EN;
+ pwm_loongson_writel(ddata, val, LOONGSON_PWM_REG_CTRL);
+}
+
+static int pwm_loongson_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+ u32 val;
+ struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip);
+
+ val = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_CTRL);
+ val |= LOONGSON_PWM_CTRL_REG_EN;
+ pwm_loongson_writel(ddata, val, LOONGSON_PWM_REG_CTRL);
+
+ return 0;
+}
+
+static int pwm_loongson_config(struct pwm_chip *chip, struct pwm_device *pwm,
+ u64 duty_ns, u64 period_ns)
+{
+ u64 duty, period;
+ struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip);
+
+ /* duty = duty_ns * ddata->clk_rate / NSEC_PER_SEC */
+ duty = mul_u64_u64_div_u64(duty_ns, ddata->clk_rate, NSEC_PER_SEC);
+ if (duty > U32_MAX)
+ duty = U32_MAX;
+
+ /* period = period_ns * ddata->clk_rate / NSEC_PER_SEC */
+ period = mul_u64_u64_div_u64(period_ns, ddata->clk_rate, NSEC_PER_SEC);
+ if (period > U32_MAX)
+ period = U32_MAX;
+
+ pwm_loongson_writel(ddata, duty, LOONGSON_PWM_REG_DUTY);
+ pwm_loongson_writel(ddata, period, LOONGSON_PWM_REG_PERIOD);
+
+ return 0;
+}
+
+static int pwm_loongson_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int ret;
+ bool enabled = pwm->state.enabled;
+
+ if (!state->enabled) {
+ if (enabled)
+ pwm_loongson_disable(chip, pwm);
+ return 0;
+ }
+
+ ret = pwm_loongson_set_polarity(chip, pwm, state->polarity);
+ if (ret)
+ return ret;
+
+ ret = pwm_loongson_config(chip, pwm, state->duty_cycle, state->period);
+ if (ret)
+ return ret;
+
+ if (!enabled && state->enabled)
+ ret = pwm_loongson_enable(chip, pwm);
+
+ return ret;
+}
+
+static int pwm_loongson_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+ struct pwm_state *state)
+{
+ u32 duty, period, ctrl;
+ struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip);
+
+ duty = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_DUTY);
+ period = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_PERIOD);
+ ctrl = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_CTRL);
+
+ /* duty & period have a max of 2^32, so we can't overflow */
+ state->duty_cycle = DIV64_U64_ROUND_UP((u64)duty * NSEC_PER_SEC, ddata->clk_rate);
+ state->period = DIV64_U64_ROUND_UP((u64)period * NSEC_PER_SEC, ddata->clk_rate);
+ state->polarity = (ctrl & LOONGSON_PWM_CTRL_REG_INVERT) ? PWM_POLARITY_INVERSED :
+ PWM_POLARITY_NORMAL;
+ state->enabled = (ctrl & LOONGSON_PWM_CTRL_REG_EN) ? true : false;
+
+ return 0;
+}
+
+static const struct pwm_ops pwm_loongson_ops = {
+ .apply = pwm_loongson_apply,
+ .get_state = pwm_loongson_get_state,
+};
+
+static int pwm_loongson_probe(struct platform_device *pdev)
+{
+ int ret;
+ struct pwm_chip *chip;
+ struct pwm_loongson_ddata *ddata;
+ struct device *dev = &pdev->dev;
+
+ chip = devm_pwmchip_alloc(dev, 1, sizeof(*ddata));
+ if (IS_ERR(chip))
+ return PTR_ERR(chip);
+ ddata = to_pwm_loongson_ddata(chip);
+
+ ddata->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(ddata->base))
+ return PTR_ERR(ddata->base);
+
+ ddata->clk = devm_clk_get_optional_enabled(dev, NULL);
+ if (IS_ERR(ddata->clk))
+ return dev_err_probe(dev, PTR_ERR(ddata->clk),
+ "Failed to get pwm clock\n");
+ if (ddata->clk) {
+ ret = devm_clk_rate_exclusive_get(dev, ddata->clk);
+ if (ret)
+ return dev_err_probe(dev, ret,
+ "Failed to get exclusive rate\n");
+
+ ddata->clk_rate = clk_get_rate(ddata->clk);
+ if (!ddata->clk_rate)
+ return dev_err_probe(dev, -EINVAL,
+ "Failed to get frequency\n");
+ } else {
+ ddata->clk_rate = LOONGSON_PWM_FREQ_DEFAULT;
+ }
+
+ /* This check is done to prevent an overflow in .apply */
+ if (ddata->clk_rate > NSEC_PER_SEC)
+ return dev_err_probe(dev, -EINVAL, "PWM clock out of range\n");
+
+ chip->ops = &pwm_loongson_ops;
+ chip->atomic = true;
+ dev_set_drvdata(dev, chip);
+
+ ret = devm_pwmchip_add(dev, chip);
+ if (ret < 0)
+ return dev_err_probe(dev, ret, "Failed to add PWM chip\n");
+
+ return 0;
+}
+
+static int pwm_loongson_suspend(struct device *dev)
+{
+ struct pwm_chip *chip = dev_get_drvdata(dev);
+ struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip);
+ struct pwm_device *pwm = &chip->pwms[0];
+
+ if (pwm->state.enabled)
+ return -EBUSY;
+
+ clk_disable_unprepare(ddata->clk);
+
+ return 0;
+}
+
+static int pwm_loongson_resume(struct device *dev)
+{
+ struct pwm_chip *chip = dev_get_drvdata(dev);
+ struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip);
+
+ return clk_prepare_enable(ddata->clk);
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(pwm_loongson_pm_ops, pwm_loongson_suspend,
+ pwm_loongson_resume);
+
+static const struct of_device_id pwm_loongson_of_ids[] = {
+ { .compatible = "loongson,ls7a-pwm" },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, pwm_loongson_of_ids);
+
+static const struct acpi_device_id pwm_loongson_acpi_ids[] = {
+ { "LOON0006" },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, pwm_loongson_acpi_ids);
+
+static struct platform_driver pwm_loongson_driver = {
+ .probe = pwm_loongson_probe,
+ .driver = {
+ .name = "loongson-pwm",
+ .pm = pm_ptr(&pwm_loongson_pm_ops),
+ .of_match_table = pwm_loongson_of_ids,
+ .acpi_match_table = pwm_loongson_acpi_ids,
+ },
+};
+module_platform_driver(pwm_loongson_driver);
+
+MODULE_DESCRIPTION("Loongson PWM driver");
+MODULE_AUTHOR("Loongson Technology Corporation Limited.");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pwm/pwm-mc33xs2410.c b/drivers/pwm/pwm-mc33xs2410.c
new file mode 100644
index 000000000000..a1ac3445ccdb
--- /dev/null
+++ b/drivers/pwm/pwm-mc33xs2410.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Liebherr-Electronics and Drives GmbH
+ *
+ * Reference Manual : https://www.nxp.com/docs/en/data-sheet/MC33XS2410.pdf
+ *
+ * Limitations:
+ * - Supports frequencies between 0.5Hz and 2048Hz with following steps:
+ * - 0.5 Hz steps from 0.5 Hz to 32 Hz
+ * - 2 Hz steps from 2 Hz to 128 Hz
+ * - 8 Hz steps from 8 Hz to 512 Hz
+ * - 32 Hz steps from 32 Hz to 2048 Hz
+ * - Cannot generate a 0 % duty cycle.
+ * - Always produces low output if disabled.
+ * - Configuration isn't atomic. When changing polarity, duty cycle or period
+ * the data is taken immediately, counters not being affected, resulting in a
+ * behavior of the output pin that is neither the old nor the new state,
+ * rather something in between.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/math64.h>
+#include <linux/minmax.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pwm.h>
+
+#include <linux/spi/spi.h>
+
+#define MC33XS2410_GLB_CTRL 0x00
+#define MC33XS2410_GLB_CTRL_MODE GENMASK(7, 6)
+#define MC33XS2410_GLB_CTRL_MODE_NORMAL FIELD_PREP(MC33XS2410_GLB_CTRL_MODE, 1)
+
+#define MC33XS2410_PWM_CTRL1 0x05
+/* chan in { 1 ... 4 } */
+#define MC33XS2410_PWM_CTRL1_POL_INV(chan) BIT((chan) + 1)
+
+#define MC33XS2410_PWM_CTRL3 0x07
+/* chan in { 1 ... 4 } */
+#define MC33XS2410_PWM_CTRL3_EN(chan) BIT(4 + (chan) - 1)
+
+/* chan in { 1 ... 4 } */
+#define MC33XS2410_PWM_FREQ(chan) (0x08 + (chan) - 1)
+#define MC33XS2410_PWM_FREQ_STEP GENMASK(7, 6)
+#define MC33XS2410_PWM_FREQ_COUNT GENMASK(5, 0)
+
+/* chan in { 1 ... 4 } */
+#define MC33XS2410_PWM_DC(chan) (0x0c + (chan) - 1)
+
+#define MC33XS2410_WDT 0x14
+
+#define MC33XS2410_PWM_MIN_PERIOD 488282
+/* step in { 0 ... 3 } */
+#define MC33XS2410_PWM_MAX_PERIOD(step) (2000000000 >> (2 * (step)))
+
+#define MC33XS2410_FRAME_IN_ADDR GENMASK(15, 8)
+#define MC33XS2410_FRAME_IN_DATA GENMASK(7, 0)
+#define MC33XS2410_FRAME_IN_ADDR_WR BIT(7)
+#define MC33XS2410_FRAME_IN_DATA_RD BIT(7)
+#define MC33XS2410_FRAME_OUT_DATA GENMASK(13, 0)
+
+#define MC33XS2410_MAX_TRANSFERS 5
+
+static int mc33xs2410_write_regs(struct spi_device *spi, u8 *reg, u8 *val,
+ unsigned int len)
+{
+ u16 tx[MC33XS2410_MAX_TRANSFERS];
+ int i;
+
+ if (len > MC33XS2410_MAX_TRANSFERS)
+ return -EINVAL;
+
+ for (i = 0; i < len; i++)
+ tx[i] = FIELD_PREP(MC33XS2410_FRAME_IN_DATA, val[i]) |
+ FIELD_PREP(MC33XS2410_FRAME_IN_ADDR,
+ MC33XS2410_FRAME_IN_ADDR_WR | reg[i]);
+
+ return spi_write(spi, tx, len * 2);
+}
+
+static int mc33xs2410_read_regs(struct spi_device *spi, u8 *reg, u8 flag,
+ u16 *val, unsigned int len)
+{
+ u16 tx[MC33XS2410_MAX_TRANSFERS];
+ u16 rx[MC33XS2410_MAX_TRANSFERS];
+ struct spi_transfer t = {
+ .tx_buf = tx,
+ .rx_buf = rx,
+ };
+ int i, ret;
+
+ len++;
+ if (len > MC33XS2410_MAX_TRANSFERS)
+ return -EINVAL;
+
+ t.len = len * 2;
+ for (i = 0; i < len - 1; i++)
+ tx[i] = FIELD_PREP(MC33XS2410_FRAME_IN_DATA, flag) |
+ FIELD_PREP(MC33XS2410_FRAME_IN_ADDR, reg[i]);
+
+ ret = spi_sync_transfer(spi, &t, 1);
+ if (ret < 0)
+ return ret;
+
+ for (i = 1; i < len; i++)
+ val[i - 1] = FIELD_GET(MC33XS2410_FRAME_OUT_DATA, rx[i]);
+
+ return 0;
+}
+
+static int mc33xs2410_write_reg(struct spi_device *spi, u8 reg, u8 val)
+{
+ return mc33xs2410_write_regs(spi, &reg, &val, 1);
+}
+
+static int mc33xs2410_read_reg(struct spi_device *spi, u8 reg, u16 *val, u8 flag)
+{
+ return mc33xs2410_read_regs(spi, &reg, flag, val, 1);
+}
+
+static int mc33xs2410_read_reg_ctrl(struct spi_device *spi, u8 reg, u16 *val)
+{
+ return mc33xs2410_read_reg(spi, reg, val, MC33XS2410_FRAME_IN_DATA_RD);
+}
+
+static int mc33xs2410_modify_reg(struct spi_device *spi, u8 reg, u8 mask, u8 val)
+{
+ u16 tmp;
+ int ret;
+
+ ret = mc33xs2410_read_reg_ctrl(spi, reg, &tmp);
+ if (ret < 0)
+ return ret;
+
+ tmp &= ~mask;
+ tmp |= val & mask;
+
+ return mc33xs2410_write_reg(spi, reg, tmp);
+}
+
+static u8 mc33xs2410_pwm_get_freq(u64 period)
+{
+ u8 step, count;
+
+ /*
+ * Check which step [0 .. 3] is appropriate for the given period. The
+ * period ranges for the different step values overlap. Prefer big step
+ * values as these allow more finegrained period and duty cycle
+ * selection.
+ */
+
+ switch (period) {
+ case MC33XS2410_PWM_MIN_PERIOD ... MC33XS2410_PWM_MAX_PERIOD(3):
+ step = 3;
+ break;
+ case MC33XS2410_PWM_MAX_PERIOD(3) + 1 ... MC33XS2410_PWM_MAX_PERIOD(2):
+ step = 2;
+ break;
+ case MC33XS2410_PWM_MAX_PERIOD(2) + 1 ... MC33XS2410_PWM_MAX_PERIOD(1):
+ step = 1;
+ break;
+ case MC33XS2410_PWM_MAX_PERIOD(1) + 1 ... MC33XS2410_PWM_MAX_PERIOD(0):
+ step = 0;
+ break;
+ }
+
+ /*
+ * Round up here because a higher count results in a higher frequency
+ * and so a smaller period.
+ */
+ count = DIV_ROUND_UP((u32)MC33XS2410_PWM_MAX_PERIOD(step), (u32)period);
+ return FIELD_PREP(MC33XS2410_PWM_FREQ_STEP, step) |
+ FIELD_PREP(MC33XS2410_PWM_FREQ_COUNT, count - 1);
+}
+
+static u64 mc33xs2410_pwm_get_period(u8 reg)
+{
+ u32 doubled_freq, code, doubled_steps;
+
+ /*
+ * steps:
+ * - 0 = 0.5Hz
+ * - 1 = 2Hz
+ * - 2 = 8Hz
+ * - 3 = 32Hz
+ * frequency = (code + 1) x steps.
+ *
+ * To avoid losing precision in case steps value is zero, scale the
+ * steps value for now by two and keep it in mind when calculating the
+ * period that the frequency had been doubled.
+ */
+ doubled_steps = 1 << (FIELD_GET(MC33XS2410_PWM_FREQ_STEP, reg) * 2);
+ code = FIELD_GET(MC33XS2410_PWM_FREQ_COUNT, reg);
+ doubled_freq = (code + 1) * doubled_steps;
+
+ /* Convert frequency to period, considering the doubled frequency. */
+ return DIV_ROUND_UP(2 * NSEC_PER_SEC, doubled_freq);
+}
+
+/*
+ * The hardware cannot generate a 0% relative duty cycle for normal and inversed
+ * polarity. For normal polarity, the channel must be disabled, the device then
+ * emits a constant low signal.
+ * For inverted polarity, the channel must be enabled, the polarity must be set
+ * to normal and the relative duty cylce must be set to 100%. The device then
+ * emits a constant high signal.
+ */
+static int mc33xs2410_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ struct spi_device *spi = pwmchip_get_drvdata(chip);
+ u8 reg[4] = {
+ MC33XS2410_PWM_FREQ(pwm->hwpwm + 1),
+ MC33XS2410_PWM_DC(pwm->hwpwm + 1),
+ MC33XS2410_PWM_CTRL1,
+ MC33XS2410_PWM_CTRL3
+ };
+ u64 period, duty_cycle;
+ int ret, rel_dc;
+ u16 rd_val[2];
+ u8 wr_val[4];
+ u8 mask;
+
+ period = min(state->period, MC33XS2410_PWM_MAX_PERIOD(0));
+ if (period < MC33XS2410_PWM_MIN_PERIOD)
+ return -EINVAL;
+
+ ret = mc33xs2410_read_regs(spi, &reg[2], MC33XS2410_FRAME_IN_DATA_RD, rd_val, 2);
+ if (ret < 0)
+ return ret;
+
+ /* Frequency */
+ wr_val[0] = mc33xs2410_pwm_get_freq(period);
+ /* Continue calculations with the possibly truncated period */
+ period = mc33xs2410_pwm_get_period(wr_val[0]);
+
+ /* Duty cycle */
+ duty_cycle = min(period, state->duty_cycle);
+ rel_dc = div64_u64(duty_cycle * 256, period) - 1;
+ if (rel_dc >= 0)
+ wr_val[1] = rel_dc;
+ else if (state->polarity == PWM_POLARITY_NORMAL)
+ wr_val[1] = 0;
+ else
+ wr_val[1] = 255;
+
+ /* Polarity */
+ mask = MC33XS2410_PWM_CTRL1_POL_INV(pwm->hwpwm + 1);
+ if (state->polarity == PWM_POLARITY_INVERSED && rel_dc >= 0)
+ wr_val[2] = rd_val[0] | mask;
+ else
+ wr_val[2] = rd_val[0] & ~mask;
+
+ /* Enable */
+ mask = MC33XS2410_PWM_CTRL3_EN(pwm->hwpwm + 1);
+ if (state->enabled &&
+ !(state->polarity == PWM_POLARITY_NORMAL && rel_dc < 0))
+ wr_val[3] = rd_val[1] | mask;
+ else
+ wr_val[3] = rd_val[1] & ~mask;
+
+ return mc33xs2410_write_regs(spi, reg, wr_val, 4);
+}
+
+static int mc33xs2410_pwm_get_state(struct pwm_chip *chip,
+ struct pwm_device *pwm,
+ struct pwm_state *state)
+{
+ struct spi_device *spi = pwmchip_get_drvdata(chip);
+ u8 reg[4] = {
+ MC33XS2410_PWM_FREQ(pwm->hwpwm + 1),
+ MC33XS2410_PWM_DC(pwm->hwpwm + 1),
+ MC33XS2410_PWM_CTRL1,
+ MC33XS2410_PWM_CTRL3,
+ };
+ u16 val[4];
+ int ret;
+
+ ret = mc33xs2410_read_regs(spi, reg, MC33XS2410_FRAME_IN_DATA_RD, val,
+ ARRAY_SIZE(reg));
+ if (ret < 0)
+ return ret;
+
+ state->period = mc33xs2410_pwm_get_period(val[0]);
+ state->polarity = (val[2] & MC33XS2410_PWM_CTRL1_POL_INV(pwm->hwpwm + 1)) ?
+ PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL;
+ state->enabled = !!(val[3] & MC33XS2410_PWM_CTRL3_EN(pwm->hwpwm + 1));
+ state->duty_cycle = DIV_ROUND_UP_ULL((val[1] + 1) * state->period, 256);
+
+ return 0;
+}
+
+static const struct pwm_ops mc33xs2410_pwm_ops = {
+ .apply = mc33xs2410_pwm_apply,
+ .get_state = mc33xs2410_pwm_get_state,
+};
+
+static int mc33xs2410_reset(struct device *dev)
+{
+ struct gpio_desc *reset_gpio;
+
+ reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR_OR_NULL(reset_gpio))
+ return PTR_ERR_OR_ZERO(reset_gpio);
+
+ /* Wake-up time */
+ fsleep(10000);
+
+ return 0;
+}
+
+static int mc33xs2410_probe(struct spi_device *spi)
+{
+ struct device *dev = &spi->dev;
+ struct pwm_chip *chip;
+ int ret;
+
+ chip = devm_pwmchip_alloc(dev, 4, 0);
+ if (IS_ERR(chip))
+ return PTR_ERR(chip);
+
+ spi->bits_per_word = 16;
+ spi->mode |= SPI_CS_WORD;
+ ret = spi_setup(spi);
+ if (ret < 0)
+ return ret;
+
+ pwmchip_set_drvdata(chip, spi);
+ chip->ops = &mc33xs2410_pwm_ops;
+
+ /*
+ * Deasserts the reset of the device. Shouldn't change the output signal
+ * if the device was setup prior to probing.
+ */
+ ret = mc33xs2410_reset(dev);
+ if (ret)
+ return ret;
+
+ /*
+ * Disable watchdog and keep in mind that the watchdog won't trigger a
+ * reset of the machine when running into an timeout, instead the
+ * control over the outputs is handed over to the INx input logic
+ * signals of the device. Disabling it here just deactivates this
+ * feature until a proper solution is found.
+ */
+ ret = mc33xs2410_write_reg(spi, MC33XS2410_WDT, 0x0);
+ if (ret < 0)
+ return dev_err_probe(dev, ret, "Failed to disable watchdog\n");
+
+ /* Transition to normal mode */
+ ret = mc33xs2410_modify_reg(spi, MC33XS2410_GLB_CTRL,
+ MC33XS2410_GLB_CTRL_MODE,
+ MC33XS2410_GLB_CTRL_MODE_NORMAL);
+ if (ret < 0)
+ return dev_err_probe(dev, ret,
+ "Failed to transition to normal mode\n");
+
+ ret = devm_pwmchip_add(dev, chip);
+ if (ret < 0)
+ return dev_err_probe(dev, ret, "Failed to add pwm chip\n");
+
+ return 0;
+}
+
+static const struct spi_device_id mc33xs2410_spi_id[] = {
+ { "mc33xs2410" },
+ { }
+};
+MODULE_DEVICE_TABLE(spi, mc33xs2410_spi_id);
+
+static const struct of_device_id mc33xs2410_of_match[] = {
+ { .compatible = "nxp,mc33xs2410" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, mc33xs2410_of_match);
+
+static struct spi_driver mc33xs2410_driver = {
+ .driver = {
+ .name = "mc33xs2410-pwm",
+ .of_match_table = mc33xs2410_of_match,
+ },
+ .probe = mc33xs2410_probe,
+ .id_table = mc33xs2410_spi_id,
+};
+module_spi_driver(mc33xs2410_driver);
+
+MODULE_DESCRIPTION("NXP MC33XS2410 high-side switch driver");
+MODULE_AUTHOR("Dimitri Fedrau <dimitri.fedrau@liebherr.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c
index 98e6c1533312..8c6bf3d49753 100644
--- a/drivers/pwm/pwm-meson.c
+++ b/drivers/pwm/pwm-meson.c
@@ -6,7 +6,7 @@
* PWM output is achieved by calculating a clock that permits calculating
* two periods (low and high). The counter then has to be set to switch after
* N cycles for the first half period.
- * The hardware has no "polarity" setting. This driver reverses the period
+ * Partly the hardware has no "polarity" setting. This driver reverses the period
* cycles (the low length is inverted with the high length) for
* PWM_POLARITY_INVERSED. This means that .get_state cannot read the polarity
* from the hardware.
@@ -56,6 +56,10 @@
#define MISC_B_CLK_SEL_SHIFT 6
#define MISC_A_CLK_SEL_SHIFT 4
#define MISC_CLK_SEL_MASK 0x3
+#define MISC_B_CONSTANT_EN BIT(29)
+#define MISC_A_CONSTANT_EN BIT(28)
+#define MISC_B_INVERT_EN BIT(27)
+#define MISC_A_INVERT_EN BIT(26)
#define MISC_B_EN BIT(1)
#define MISC_A_EN BIT(0)
@@ -68,6 +72,8 @@ static struct meson_pwm_channel_data {
u8 clk_div_shift;
u8 clk_en_shift;
u32 pwm_en_mask;
+ u32 const_en_mask;
+ u32 inv_en_mask;
} meson_pwm_per_channel_data[MESON_NUM_PWMS] = {
{
.reg_offset = REG_PWM_A,
@@ -75,6 +81,8 @@ static struct meson_pwm_channel_data {
.clk_div_shift = MISC_A_CLK_DIV_SHIFT,
.clk_en_shift = MISC_A_CLK_EN_SHIFT,
.pwm_en_mask = MISC_A_EN,
+ .const_en_mask = MISC_A_CONSTANT_EN,
+ .inv_en_mask = MISC_A_INVERT_EN,
},
{
.reg_offset = REG_PWM_B,
@@ -82,6 +90,8 @@ static struct meson_pwm_channel_data {
.clk_div_shift = MISC_B_CLK_DIV_SHIFT,
.clk_en_shift = MISC_B_CLK_EN_SHIFT,
.pwm_en_mask = MISC_B_EN,
+ .const_en_mask = MISC_B_CONSTANT_EN,
+ .inv_en_mask = MISC_B_INVERT_EN,
}
};
@@ -89,6 +99,8 @@ struct meson_pwm_channel {
unsigned long rate;
unsigned int hi;
unsigned int lo;
+ bool constant;
+ bool inverted;
struct clk_mux mux;
struct clk_divider div;
@@ -99,6 +111,8 @@ struct meson_pwm_channel {
struct meson_pwm_data {
const char *const parent_names[MESON_NUM_MUX_PARENTS];
int (*channels_init)(struct pwm_chip *chip);
+ bool has_constant;
+ bool has_polarity;
};
struct meson_pwm {
@@ -160,7 +174,7 @@ static int meson_pwm_calc(struct pwm_chip *chip, struct pwm_device *pwm,
* Fixing this needs some care however as some machines might rely on
* this.
*/
- if (state->polarity == PWM_POLARITY_INVERSED)
+ if (state->polarity == PWM_POLARITY_INVERSED && !meson->data->has_polarity)
duty = period - duty;
freq = div64_u64(NSEC_PER_SEC * 0xffffULL, period);
@@ -187,9 +201,11 @@ static int meson_pwm_calc(struct pwm_chip *chip, struct pwm_device *pwm,
if (duty == period) {
channel->hi = cnt;
channel->lo = 0;
+ channel->constant = true;
} else if (duty == 0) {
channel->hi = 0;
channel->lo = cnt;
+ channel->constant = true;
} else {
duty_cnt = mul_u64_u64_div_u64(fin_freq, duty, NSEC_PER_SEC);
@@ -197,6 +213,7 @@ static int meson_pwm_calc(struct pwm_chip *chip, struct pwm_device *pwm,
channel->hi = duty_cnt;
channel->lo = cnt - duty_cnt;
+ channel->constant = false;
}
channel->rate = fin_freq;
@@ -227,6 +244,19 @@ static void meson_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
value = readl(meson->base + REG_MISC_AB);
value |= channel_data->pwm_en_mask;
+
+ if (meson->data->has_constant) {
+ value &= ~channel_data->const_en_mask;
+ if (channel->constant)
+ value |= channel_data->const_en_mask;
+ }
+
+ if (meson->data->has_polarity) {
+ value &= ~channel_data->inv_en_mask;
+ if (channel->inverted)
+ value |= channel_data->inv_en_mask;
+ }
+
writel(value, meson->base + REG_MISC_AB);
spin_unlock_irqrestore(&meson->lock, flags);
@@ -235,13 +265,24 @@ static void meson_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
static void meson_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
{
struct meson_pwm *meson = to_meson_pwm(chip);
+ struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm];
+ struct meson_pwm_channel_data *channel_data;
unsigned long flags;
u32 value;
+ channel_data = &meson_pwm_per_channel_data[pwm->hwpwm];
+
spin_lock_irqsave(&meson->lock, flags);
value = readl(meson->base + REG_MISC_AB);
- value &= ~meson_pwm_per_channel_data[pwm->hwpwm].pwm_en_mask;
+ value &= ~channel_data->pwm_en_mask;
+
+ if (meson->data->has_polarity) {
+ value &= ~channel_data->inv_en_mask;
+ if (channel->inverted)
+ value |= channel_data->inv_en_mask;
+ }
+
writel(value, meson->base + REG_MISC_AB);
spin_unlock_irqrestore(&meson->lock, flags);
@@ -254,10 +295,12 @@ static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm];
int err = 0;
+ channel->inverted = (state->polarity == PWM_POLARITY_INVERSED);
+
if (!state->enabled) {
- if (state->polarity == PWM_POLARITY_INVERSED) {
+ if (channel->inverted && !meson->data->has_polarity) {
/*
- * This IP block revision doesn't have an "always high"
+ * Some of IP block revisions don't have an "always high"
* setting which we can use for "inverted disabled".
* Instead we achieve this by setting mux parent with
* highest rate and minimum divider value, resulting
@@ -271,6 +314,7 @@ static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
channel->rate = ULONG_MAX;
channel->hi = ~0;
channel->lo = 0;
+ channel->constant = true;
meson_pwm_enable(chip, pwm);
} else {
@@ -287,21 +331,9 @@ static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
return 0;
}
-static u64 meson_pwm_cnt_to_ns(struct pwm_chip *chip, struct pwm_device *pwm,
- u32 cnt)
+static u64 meson_pwm_cnt_to_ns(unsigned long fin_freq, u32 cnt)
{
- struct meson_pwm *meson = to_meson_pwm(chip);
- struct meson_pwm_channel *channel;
- unsigned long fin_freq;
-
- /* to_meson_pwm() can only be used after .get_state() is called */
- channel = &meson->channels[pwm->hwpwm];
-
- fin_freq = clk_get_rate(channel->clk);
- if (fin_freq == 0)
- return 0;
-
- return div64_ul(NSEC_PER_SEC * (u64)cnt, fin_freq);
+ return fin_freq ? div64_ul(NSEC_PER_SEC * (u64)cnt, fin_freq) : 0;
}
static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
@@ -309,23 +341,27 @@ static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
{
struct meson_pwm *meson = to_meson_pwm(chip);
struct meson_pwm_channel_data *channel_data;
- struct meson_pwm_channel *channel;
+ unsigned long fin_freq;
+ unsigned int hi, lo;
u32 value;
- channel = &meson->channels[pwm->hwpwm];
channel_data = &meson_pwm_per_channel_data[pwm->hwpwm];
+ fin_freq = clk_get_rate(meson->channels[pwm->hwpwm].clk);
value = readl(meson->base + REG_MISC_AB);
state->enabled = value & channel_data->pwm_en_mask;
- value = readl(meson->base + channel_data->reg_offset);
- channel->lo = FIELD_GET(PWM_LOW_MASK, value);
- channel->hi = FIELD_GET(PWM_HIGH_MASK, value);
+ if (meson->data->has_polarity && (value & channel_data->inv_en_mask))
+ state->polarity = PWM_POLARITY_INVERSED;
+ else
+ state->polarity = PWM_POLARITY_NORMAL;
- state->period = meson_pwm_cnt_to_ns(chip, pwm, channel->lo + channel->hi);
- state->duty_cycle = meson_pwm_cnt_to_ns(chip, pwm, channel->hi);
+ value = readl(meson->base + channel_data->reg_offset);
+ lo = FIELD_GET(PWM_LOW_MASK, value);
+ hi = FIELD_GET(PWM_HIGH_MASK, value);
- state->polarity = PWM_POLARITY_NORMAL;
+ state->period = meson_pwm_cnt_to_ns(fin_freq, lo + hi);
+ state->duty_cycle = meson_pwm_cnt_to_ns(fin_freq, hi);
return 0;
}
@@ -508,29 +544,52 @@ static const struct meson_pwm_data pwm_gxbb_ao_data = {
static const struct meson_pwm_data pwm_axg_ee_data = {
.parent_names = { "xtal", "fclk_div5", "fclk_div4", "fclk_div3" },
.channels_init = meson_pwm_init_channels_meson8b_legacy,
+ .has_constant = true,
+ .has_polarity = true,
};
static const struct meson_pwm_data pwm_axg_ao_data = {
.parent_names = { "xtal", "axg_ao_clk81", "fclk_div4", "fclk_div5" },
.channels_init = meson_pwm_init_channels_meson8b_legacy,
+ .has_constant = true,
+ .has_polarity = true,
+};
+
+static const struct meson_pwm_data pwm_g12a_ee_data = {
+ .parent_names = { "xtal", NULL, "fclk_div4", "fclk_div3" },
+ .channels_init = meson_pwm_init_channels_meson8b_legacy,
+ .has_constant = true,
+ .has_polarity = true,
};
static const struct meson_pwm_data pwm_g12a_ao_ab_data = {
.parent_names = { "xtal", "g12a_ao_clk81", "fclk_div4", "fclk_div5" },
.channels_init = meson_pwm_init_channels_meson8b_legacy,
+ .has_constant = true,
+ .has_polarity = true,
};
static const struct meson_pwm_data pwm_g12a_ao_cd_data = {
.parent_names = { "xtal", "g12a_ao_clk81", NULL, NULL },
.channels_init = meson_pwm_init_channels_meson8b_legacy,
+ .has_constant = true,
+ .has_polarity = true,
};
static const struct meson_pwm_data pwm_meson8_v2_data = {
.channels_init = meson_pwm_init_channels_meson8b_v2,
};
+static const struct meson_pwm_data pwm_meson_axg_v2_data = {
+ .channels_init = meson_pwm_init_channels_meson8b_v2,
+ .has_constant = true,
+ .has_polarity = true,
+};
+
static const struct meson_pwm_data pwm_s4_data = {
.channels_init = meson_pwm_init_channels_s4,
+ .has_constant = true,
+ .has_polarity = true,
};
static const struct of_device_id meson_pwm_matches[] = {
@@ -538,6 +597,14 @@ static const struct of_device_id meson_pwm_matches[] = {
.compatible = "amlogic,meson8-pwm-v2",
.data = &pwm_meson8_v2_data
},
+ {
+ .compatible = "amlogic,meson-axg-pwm-v2",
+ .data = &pwm_meson_axg_v2_data
+ },
+ {
+ .compatible = "amlogic,meson-g12-pwm-v2",
+ .data = &pwm_meson_axg_v2_data
+ },
/* The following compatibles are obsolete */
{
.compatible = "amlogic,meson8b-pwm",
@@ -561,7 +628,7 @@ static const struct of_device_id meson_pwm_matches[] = {
},
{
.compatible = "amlogic,meson-g12a-ee-pwm",
- .data = &pwm_meson8b_data
+ .data = &pwm_g12a_ee_data
},
{
.compatible = "amlogic,meson-g12a-ao-pwm-ab",
diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c
index 5162f3991644..eb03ccd5b688 100644
--- a/drivers/pwm/pwm-pca9685.c
+++ b/drivers/pwm/pwm-pca9685.c
@@ -263,12 +263,14 @@ static int pca9685_pwm_gpio_get(struct gpio_chip *gpio, unsigned int offset)
return pca9685_pwm_get_duty(chip, offset) != 0;
}
-static void pca9685_pwm_gpio_set(struct gpio_chip *gpio, unsigned int offset,
- int value)
+static int pca9685_pwm_gpio_set(struct gpio_chip *gpio, unsigned int offset,
+ int value)
{
struct pwm_chip *chip = gpiochip_get_data(gpio);
pca9685_pwm_set_duty(chip, offset, value ? PCA9685_COUNTER_RANGE : 0);
+
+ return 0;
}
static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset)
@@ -321,7 +323,7 @@ static int pca9685_pwm_gpio_probe(struct pwm_chip *chip)
pca->gpio.direction_input = pca9685_pwm_gpio_direction_input;
pca->gpio.direction_output = pca9685_pwm_gpio_direction_output;
pca->gpio.get = pca9685_pwm_gpio_get;
- pca->gpio.set = pca9685_pwm_gpio_set;
+ pca->gpio.set_rv = pca9685_pwm_gpio_set;
pca->gpio.base = -1;
pca->gpio.ngpio = PCA9685_MAXCHAN;
pca->gpio.can_sleep = true;
diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c
index 430bd6a709e9..8a4a3d2df30d 100644
--- a/drivers/pwm/pwm-pxa.c
+++ b/drivers/pwm/pwm-pxa.c
@@ -160,24 +160,24 @@ static int pwm_probe(struct platform_device *pdev)
const struct platform_device_id *id = platform_get_device_id(pdev);
struct pwm_chip *chip;
struct pxa_pwm_chip *pc;
+ struct device *dev = &pdev->dev;
int ret = 0;
if (IS_ENABLED(CONFIG_OF) && id == NULL)
- id = of_device_get_match_data(&pdev->dev);
+ id = of_device_get_match_data(dev);
if (id == NULL)
return -EINVAL;
- chip = devm_pwmchip_alloc(&pdev->dev,
- (id->driver_data & HAS_SECONDARY_PWM) ? 2 : 1,
+ chip = devm_pwmchip_alloc(dev, (id->driver_data & HAS_SECONDARY_PWM) ? 2 : 1,
sizeof(*pc));
if (IS_ERR(chip))
return PTR_ERR(chip);
pc = to_pxa_pwm_chip(chip);
- pc->clk = devm_clk_get(&pdev->dev, NULL);
+ pc->clk = devm_clk_get(dev, NULL);
if (IS_ERR(pc->clk))
- return PTR_ERR(pc->clk);
+ return dev_err_probe(dev, PTR_ERR(pc->clk), "Failed to get clock\n");
chip->ops = &pxa_pwm_ops;
@@ -188,11 +188,9 @@ static int pwm_probe(struct platform_device *pdev)
if (IS_ERR(pc->mmio_base))
return PTR_ERR(pc->mmio_base);
- ret = devm_pwmchip_add(&pdev->dev, chip);
- if (ret < 0) {
- dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
- return ret;
- }
+ ret = devm_pwmchip_add(dev, chip);
+ if (ret < 0)
+ return dev_err_probe(dev, ret, "pwmchip_add() failed\n");
return 0;
}
diff --git a/drivers/pwm/pwm-rzg2l-gpt.c b/drivers/pwm/pwm-rzg2l-gpt.c
new file mode 100644
index 000000000000..360c8bf3b190
--- /dev/null
+++ b/drivers/pwm/pwm-rzg2l-gpt.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas RZ/G2L General PWM Timer (GPT) driver
+ *
+ * Copyright (C) 2025 Renesas Electronics Corporation
+ *
+ * Hardware manual for this IP can be found here
+ * https://www.renesas.com/eu/en/document/mah/rzg2l-group-rzg2lc-group-users-manual-hardware-0?language=en
+ *
+ * Limitations:
+ * - Counter must be stopped before modifying Mode and Prescaler.
+ * - When PWM is disabled, the output is driven to inactive.
+ * - While the hardware supports both polarities, the driver (for now)
+ * only handles normal polarity.
+ * - General PWM Timer (GPT) has 8 HW channels for PWM operations and
+ * each HW channel have 2 IOs.
+ * - Each IO is modelled as an independent PWM channel.
+ * - When both channels are used, disabling the channel on one stops the
+ * other.
+ * - When both channels are used, the period of both IOs in the HW channel
+ * must be same (for now).
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/reset.h>
+#include <linux/time.h>
+#include <linux/units.h>
+
+#define RZG2L_GET_CH(hwpwm) ((hwpwm) / 2)
+#define RZG2L_GET_CH_OFFS(ch) (0x100 * (ch))
+
+#define RZG2L_GTCR(ch) (0x2c + RZG2L_GET_CH_OFFS(ch))
+#define RZG2L_GTUDDTYC(ch) (0x30 + RZG2L_GET_CH_OFFS(ch))
+#define RZG2L_GTIOR(ch) (0x34 + RZG2L_GET_CH_OFFS(ch))
+#define RZG2L_GTBER(ch) (0x40 + RZG2L_GET_CH_OFFS(ch))
+#define RZG2L_GTCNT(ch) (0x48 + RZG2L_GET_CH_OFFS(ch))
+#define RZG2L_GTCCR(ch, sub_ch) (0x4c + RZG2L_GET_CH_OFFS(ch) + 4 * (sub_ch))
+#define RZG2L_GTPR(ch) (0x64 + RZG2L_GET_CH_OFFS(ch))
+
+#define RZG2L_GTCR_CST BIT(0)
+#define RZG2L_GTCR_MD GENMASK(18, 16)
+#define RZG2L_GTCR_TPCS GENMASK(26, 24)
+
+#define RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE FIELD_PREP(RZG2L_GTCR_MD, 0)
+
+#define RZG2L_GTUDDTYC_UP BIT(0)
+#define RZG2L_GTUDDTYC_UDF BIT(1)
+#define RZG2L_GTUDDTYC_UP_COUNTING (RZG2L_GTUDDTYC_UP | RZG2L_GTUDDTYC_UDF)
+
+#define RZG2L_GTIOR_GTIOA GENMASK(4, 0)
+#define RZG2L_GTIOR_GTIOB GENMASK(20, 16)
+#define RZG2L_GTIOR_GTIOx(sub_ch) ((sub_ch) ? RZG2L_GTIOR_GTIOB : RZG2L_GTIOR_GTIOA)
+#define RZG2L_GTIOR_OAE BIT(8)
+#define RZG2L_GTIOR_OBE BIT(24)
+#define RZG2L_GTIOR_OxE(sub_ch) ((sub_ch) ? RZG2L_GTIOR_OBE : RZG2L_GTIOR_OAE)
+
+#define RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE 0x1b
+#define RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH \
+ (RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE | RZG2L_GTIOR_OAE)
+#define RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH \
+ (FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE) | RZG2L_GTIOR_OBE)
+
+#define RZG2L_GTIOR_GTIOx_OUT_HI_END_TOGGLE_CMP_MATCH(sub_ch) \
+ ((sub_ch) ? RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH : \
+ RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH)
+
+#define RZG2L_MAX_HW_CHANNELS 8
+#define RZG2L_CHANNELS_PER_IO 2
+#define RZG2L_MAX_PWM_CHANNELS (RZG2L_MAX_HW_CHANNELS * RZG2L_CHANNELS_PER_IO)
+#define RZG2L_MAX_SCALE_FACTOR 1024
+#define RZG2L_MAX_TICKS ((u64)U32_MAX * RZG2L_MAX_SCALE_FACTOR)
+
+struct rzg2l_gpt_chip {
+ void __iomem *mmio;
+ struct mutex lock; /* lock to protect shared channel resources */
+ unsigned long rate_khz;
+ u32 period_ticks[RZG2L_MAX_HW_CHANNELS];
+ u32 channel_request_count[RZG2L_MAX_HW_CHANNELS];
+ u32 channel_enable_count[RZG2L_MAX_HW_CHANNELS];
+};
+
+static inline struct rzg2l_gpt_chip *to_rzg2l_gpt_chip(struct pwm_chip *chip)
+{
+ return pwmchip_get_drvdata(chip);
+}
+
+static inline unsigned int rzg2l_gpt_subchannel(unsigned int hwpwm)
+{
+ return hwpwm & 0x1;
+}
+
+static void rzg2l_gpt_write(struct rzg2l_gpt_chip *rzg2l_gpt, u32 reg, u32 data)
+{
+ writel(data, rzg2l_gpt->mmio + reg);
+}
+
+static u32 rzg2l_gpt_read(struct rzg2l_gpt_chip *rzg2l_gpt, u32 reg)
+{
+ return readl(rzg2l_gpt->mmio + reg);
+}
+
+static void rzg2l_gpt_modify(struct rzg2l_gpt_chip *rzg2l_gpt, u32 reg, u32 clr,
+ u32 set)
+{
+ rzg2l_gpt_write(rzg2l_gpt, reg,
+ (rzg2l_gpt_read(rzg2l_gpt, reg) & ~clr) | set);
+}
+
+static u8 rzg2l_gpt_calculate_prescale(struct rzg2l_gpt_chip *rzg2l_gpt,
+ u64 period_ticks)
+{
+ u32 prescaled_period_ticks;
+ u8 prescale;
+
+ prescaled_period_ticks = period_ticks >> 32;
+ if (prescaled_period_ticks >= 256)
+ prescale = 5;
+ else
+ prescale = (fls(prescaled_period_ticks) + 1) / 2;
+
+ return prescale;
+}
+
+static int rzg2l_gpt_request(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+ struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
+ u32 ch = RZG2L_GET_CH(pwm->hwpwm);
+
+ guard(mutex)(&rzg2l_gpt->lock);
+ rzg2l_gpt->channel_request_count[ch]++;
+
+ return 0;
+}
+
+static void rzg2l_gpt_free(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+ struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
+ u32 ch = RZG2L_GET_CH(pwm->hwpwm);
+
+ guard(mutex)(&rzg2l_gpt->lock);
+ rzg2l_gpt->channel_request_count[ch]--;
+}
+
+static bool rzg2l_gpt_is_ch_enabled(struct rzg2l_gpt_chip *rzg2l_gpt, u8 hwpwm)
+{
+ u8 ch = RZG2L_GET_CH(hwpwm);
+ u32 val;
+
+ val = rzg2l_gpt_read(rzg2l_gpt, RZG2L_GTCR(ch));
+ if (!(val & RZG2L_GTCR_CST))
+ return false;
+
+ val = rzg2l_gpt_read(rzg2l_gpt, RZG2L_GTIOR(ch));
+
+ return val & RZG2L_GTIOR_OxE(rzg2l_gpt_subchannel(hwpwm));
+}
+
+/* Caller holds the lock while calling rzg2l_gpt_enable() */
+static void rzg2l_gpt_enable(struct rzg2l_gpt_chip *rzg2l_gpt,
+ struct pwm_device *pwm)
+{
+ u8 sub_ch = rzg2l_gpt_subchannel(pwm->hwpwm);
+ u32 val = RZG2L_GTIOR_GTIOx(sub_ch) | RZG2L_GTIOR_OxE(sub_ch);
+ u8 ch = RZG2L_GET_CH(pwm->hwpwm);
+
+ /* Enable pin output */
+ rzg2l_gpt_modify(rzg2l_gpt, RZG2L_GTIOR(ch), val,
+ RZG2L_GTIOR_GTIOx_OUT_HI_END_TOGGLE_CMP_MATCH(sub_ch));
+
+ if (!rzg2l_gpt->channel_enable_count[ch])
+ rzg2l_gpt_modify(rzg2l_gpt, RZG2L_GTCR(ch), 0, RZG2L_GTCR_CST);
+
+ rzg2l_gpt->channel_enable_count[ch]++;
+}
+
+/* Caller holds the lock while calling rzg2l_gpt_disable() */
+static void rzg2l_gpt_disable(struct rzg2l_gpt_chip *rzg2l_gpt,
+ struct pwm_device *pwm)
+{
+ u8 sub_ch = rzg2l_gpt_subchannel(pwm->hwpwm);
+ u8 ch = RZG2L_GET_CH(pwm->hwpwm);
+
+ /* Stop count, Output low on GTIOCx pin when counting stops */
+ rzg2l_gpt->channel_enable_count[ch]--;
+
+ if (!rzg2l_gpt->channel_enable_count[ch])
+ rzg2l_gpt_modify(rzg2l_gpt, RZG2L_GTCR(ch), RZG2L_GTCR_CST, 0);
+
+ /* Disable pin output */
+ rzg2l_gpt_modify(rzg2l_gpt, RZG2L_GTIOR(ch), RZG2L_GTIOR_OxE(sub_ch), 0);
+}
+
+static u64 rzg2l_gpt_calculate_period_or_duty(struct rzg2l_gpt_chip *rzg2l_gpt,
+ u32 val, u8 prescale)
+{
+ u64 tmp;
+
+ /*
+ * The calculation doesn't overflow an u64 because prescale ≤ 5 and so
+ * tmp = val << (2 * prescale) * USEC_PER_SEC
+ * < 2^32 * 2^10 * 10^6
+ * < 2^32 * 2^10 * 2^20
+ * = 2^62
+ */
+ tmp = (u64)val << (2 * prescale);
+ tmp *= USEC_PER_SEC;
+
+ return DIV64_U64_ROUND_UP(tmp, rzg2l_gpt->rate_khz);
+}
+
+static int rzg2l_gpt_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+ struct pwm_state *state)
+{
+ struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
+
+ state->enabled = rzg2l_gpt_is_ch_enabled(rzg2l_gpt, pwm->hwpwm);
+ if (state->enabled) {
+ u32 sub_ch = rzg2l_gpt_subchannel(pwm->hwpwm);
+ u32 ch = RZG2L_GET_CH(pwm->hwpwm);
+ u8 prescale;
+ u32 val;
+
+ val = rzg2l_gpt_read(rzg2l_gpt, RZG2L_GTCR(ch));
+ prescale = FIELD_GET(RZG2L_GTCR_TPCS, val);
+
+ val = rzg2l_gpt_read(rzg2l_gpt, RZG2L_GTPR(ch));
+ state->period = rzg2l_gpt_calculate_period_or_duty(rzg2l_gpt, val, prescale);
+
+ val = rzg2l_gpt_read(rzg2l_gpt, RZG2L_GTCCR(ch, sub_ch));
+ state->duty_cycle = rzg2l_gpt_calculate_period_or_duty(rzg2l_gpt, val, prescale);
+ if (state->duty_cycle > state->period)
+ state->duty_cycle = state->period;
+ }
+
+ state->polarity = PWM_POLARITY_NORMAL;
+
+ return 0;
+}
+
+static u32 rzg2l_gpt_calculate_pv_or_dc(u64 period_or_duty_cycle, u8 prescale)
+{
+ return min_t(u64, DIV_ROUND_DOWN_ULL(period_or_duty_cycle, 1 << (2 * prescale)),
+ U32_MAX);
+}
+
+/* Caller holds the lock while calling rzg2l_gpt_config() */
+static int rzg2l_gpt_config(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
+ u8 sub_ch = rzg2l_gpt_subchannel(pwm->hwpwm);
+ u8 ch = RZG2L_GET_CH(pwm->hwpwm);
+ u64 period_ticks, duty_ticks;
+ unsigned long pv, dc;
+ u8 prescale;
+
+ /* Limit period/duty cycle to max value supported by the HW */
+ period_ticks = mul_u64_u64_div_u64(state->period, rzg2l_gpt->rate_khz, USEC_PER_SEC);
+ if (period_ticks > RZG2L_MAX_TICKS)
+ period_ticks = RZG2L_MAX_TICKS;
+ /*
+ * GPT counter is shared by the two IOs of a single channel, so
+ * prescale and period can NOT be modified when there are multiple IOs
+ * in use with different settings.
+ */
+ if (rzg2l_gpt->channel_request_count[ch] > 1) {
+ if (period_ticks < rzg2l_gpt->period_ticks[ch])
+ return -EBUSY;
+ else
+ period_ticks = rzg2l_gpt->period_ticks[ch];
+ }
+
+ prescale = rzg2l_gpt_calculate_prescale(rzg2l_gpt, period_ticks);
+ pv = rzg2l_gpt_calculate_pv_or_dc(period_ticks, prescale);
+
+ duty_ticks = mul_u64_u64_div_u64(state->duty_cycle, rzg2l_gpt->rate_khz, USEC_PER_SEC);
+ if (duty_ticks > period_ticks)
+ duty_ticks = period_ticks;
+ dc = rzg2l_gpt_calculate_pv_or_dc(duty_ticks, prescale);
+
+ /*
+ * GPT counter is shared by multiple channels, we cache the period ticks
+ * from the first enabled channel and use the same value for both
+ * channels.
+ */
+ rzg2l_gpt->period_ticks[ch] = period_ticks;
+
+ /*
+ * Counter must be stopped before modifying mode, prescaler, timer
+ * counter and buffer enable registers. These registers are shared
+ * between both channels. So allow updating these registers only for the
+ * first enabled channel.
+ */
+ if (rzg2l_gpt->channel_enable_count[ch] <= 1) {
+ rzg2l_gpt_modify(rzg2l_gpt, RZG2L_GTCR(ch), RZG2L_GTCR_CST, 0);
+
+ /* GPT set operating mode (saw-wave up-counting) */
+ rzg2l_gpt_modify(rzg2l_gpt, RZG2L_GTCR(ch), RZG2L_GTCR_MD,
+ RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE);
+
+ /* Set count direction */
+ rzg2l_gpt_write(rzg2l_gpt, RZG2L_GTUDDTYC(ch), RZG2L_GTUDDTYC_UP_COUNTING);
+
+ /* Select count clock */
+ rzg2l_gpt_modify(rzg2l_gpt, RZG2L_GTCR(ch), RZG2L_GTCR_TPCS,
+ FIELD_PREP(RZG2L_GTCR_TPCS, prescale));
+
+ /* Set period */
+ rzg2l_gpt_write(rzg2l_gpt, RZG2L_GTPR(ch), pv);
+ }
+
+ /* Set duty cycle */
+ rzg2l_gpt_write(rzg2l_gpt, RZG2L_GTCCR(ch, sub_ch), dc);
+
+ if (rzg2l_gpt->channel_enable_count[ch] <= 1) {
+ /* Set initial value for counter */
+ rzg2l_gpt_write(rzg2l_gpt, RZG2L_GTCNT(ch), 0);
+
+ /* Set no buffer operation */
+ rzg2l_gpt_write(rzg2l_gpt, RZG2L_GTBER(ch), 0);
+
+ /* Restart the counter after updating the registers */
+ rzg2l_gpt_modify(rzg2l_gpt, RZG2L_GTCR(ch),
+ RZG2L_GTCR_CST, RZG2L_GTCR_CST);
+ }
+
+ return 0;
+}
+
+static int rzg2l_gpt_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
+ bool enabled = pwm->state.enabled;
+ int ret;
+
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
+ guard(mutex)(&rzg2l_gpt->lock);
+ if (!state->enabled) {
+ if (enabled)
+ rzg2l_gpt_disable(rzg2l_gpt, pwm);
+
+ return 0;
+ }
+
+ ret = rzg2l_gpt_config(chip, pwm, state);
+ if (!ret && !enabled)
+ rzg2l_gpt_enable(rzg2l_gpt, pwm);
+
+ return ret;
+}
+
+static const struct pwm_ops rzg2l_gpt_ops = {
+ .request = rzg2l_gpt_request,
+ .free = rzg2l_gpt_free,
+ .get_state = rzg2l_gpt_get_state,
+ .apply = rzg2l_gpt_apply,
+};
+
+static int rzg2l_gpt_probe(struct platform_device *pdev)
+{
+ struct rzg2l_gpt_chip *rzg2l_gpt;
+ struct device *dev = &pdev->dev;
+ struct reset_control *rstc;
+ struct pwm_chip *chip;
+ unsigned long rate;
+ struct clk *clk;
+ int ret;
+
+ chip = devm_pwmchip_alloc(dev, RZG2L_MAX_PWM_CHANNELS, sizeof(*rzg2l_gpt));
+ if (IS_ERR(chip))
+ return PTR_ERR(chip);
+ rzg2l_gpt = to_rzg2l_gpt_chip(chip);
+
+ rzg2l_gpt->mmio = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(rzg2l_gpt->mmio))
+ return PTR_ERR(rzg2l_gpt->mmio);
+
+ rstc = devm_reset_control_get_exclusive_deasserted(dev, NULL);
+ if (IS_ERR(rstc))
+ return dev_err_probe(dev, PTR_ERR(rstc), "Cannot deassert reset control\n");
+
+ clk = devm_clk_get_enabled(dev, NULL);
+ if (IS_ERR(clk))
+ return dev_err_probe(dev, PTR_ERR(clk), "Cannot get clock\n");
+
+ ret = devm_clk_rate_exclusive_get(dev, clk);
+ if (ret)
+ return ret;
+
+ rate = clk_get_rate(clk);
+ if (!rate)
+ return dev_err_probe(dev, -EINVAL, "The gpt clk rate is 0");
+
+ /*
+ * Refuse clk rates > 1 GHz to prevent overflow later for computing
+ * period and duty cycle.
+ */
+ if (rate > NSEC_PER_SEC)
+ return dev_err_probe(dev, -EINVAL, "The gpt clk rate is > 1GHz");
+
+ /*
+ * Rate is in MHz and is always integer for peripheral clk
+ * 2^32 * 2^10 (prescalar) * 10^6 (rate_khz) < 2^64
+ * So make sure rate is multiple of 1000.
+ */
+ rzg2l_gpt->rate_khz = rate / KILO;
+ if (rzg2l_gpt->rate_khz * KILO != rate)
+ return dev_err_probe(dev, -EINVAL, "Rate is not multiple of 1000");
+
+ mutex_init(&rzg2l_gpt->lock);
+
+ chip->ops = &rzg2l_gpt_ops;
+ ret = devm_pwmchip_add(dev, chip);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to add PWM chip\n");
+
+ return 0;
+}
+
+static const struct of_device_id rzg2l_gpt_of_table[] = {
+ { .compatible = "renesas,rzg2l-gpt", },
+ { /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rzg2l_gpt_of_table);
+
+static struct platform_driver rzg2l_gpt_driver = {
+ .driver = {
+ .name = "pwm-rzg2l-gpt",
+ .of_match_table = rzg2l_gpt_of_table,
+ },
+ .probe = rzg2l_gpt_probe,
+};
+module_platform_driver(rzg2l_gpt_driver);
+
+MODULE_AUTHOR("Biju Das <biju.das.jz@bp.renesas.com>");
+MODULE_DESCRIPTION("Renesas RZ/G2L General PWM Timer (GPT) Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c
index ec2c05c9ee7a..4b148f0afeb9 100644
--- a/drivers/pwm/pwm-stm32.c
+++ b/drivers/pwm/pwm-stm32.c
@@ -88,7 +88,7 @@ static int stm32_pwm_round_waveform_tohw(struct pwm_chip *chip,
rate = clk_get_rate(priv->clk);
- if (active_channels(priv) & ~(1 << ch * 4)) {
+ if (active_channels(priv) & ~TIM_CCER_CCxE(ch + 1)) {
u64 arr;
/*
@@ -180,11 +180,11 @@ static int stm32_pwm_round_waveform_tohw(struct pwm_chip *chip,
wfhw->ccr = min_t(u64, ccr, wfhw->arr + 1);
+out:
dev_dbg(&chip->dev, "pwm#%u: %lld/%lld [+%lld] @%lu -> CCER: %08x, PSC: %08x, ARR: %08x, CCR: %08x\n",
pwm->hwpwm, wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns,
rate, wfhw->ccer, wfhw->psc, wfhw->arr, wfhw->ccr);
-out:
clk_disable(priv->clk);
return ret;
@@ -213,10 +213,10 @@ static int stm32_pwm_round_waveform_fromhw(struct pwm_chip *chip,
{
const struct stm32_pwm_waveform *wfhw = _wfhw;
struct stm32_pwm *priv = to_stm32_pwm_dev(chip);
+ unsigned long rate = clk_get_rate(priv->clk);
unsigned int ch = pwm->hwpwm;
if (wfhw->ccer & TIM_CCER_CCxE(ch + 1)) {
- unsigned long rate = clk_get_rate(priv->clk);
u64 ccr_ns;
/* The result doesn't overflow for rate >= 15259 */
@@ -236,17 +236,16 @@ static int stm32_pwm_round_waveform_fromhw(struct pwm_chip *chip,
wf->duty_length_ns = ccr_ns;
wf->duty_offset_ns = 0;
}
-
- dev_dbg(&chip->dev, "pwm#%u: CCER: %08x, PSC: %08x, ARR: %08x, CCR: %08x @%lu -> %lld/%lld [+%lld]\n",
- pwm->hwpwm, wfhw->ccer, wfhw->psc, wfhw->arr, wfhw->ccr, rate,
- wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns);
-
} else {
*wf = (struct pwm_waveform){
.period_length_ns = 0,
};
}
+ dev_dbg(&chip->dev, "pwm#%u: CCER: %08x, PSC: %08x, ARR: %08x, CCR: %08x @%lu -> %lld/%lld [+%lld]\n",
+ pwm->hwpwm, wfhw->ccer, wfhw->psc, wfhw->arr, wfhw->ccr, rate,
+ wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns);
+
return 0;
}
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index d096b58cd0ae..2b6279d32774 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -17,8 +17,8 @@
#include <linux/bitops.h>
#include <linux/ras.h>
-#include <asm/amd_nb.h>
-#include <asm/amd_node.h>
+#include <asm/amd/nb.h>
+#include <asm/amd/node.h>
#include "reg_fields.h"
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 05e32d764028..6d8988387da4 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -122,6 +122,17 @@ config REGULATOR_AD5398
This driver supports AD5398 and AD5821 current regulator chips.
If building into module, its name is ad5398.ko.
+config REGULATOR_ADP5055
+ tristate "Analog Devices ADP5055 Triple Buck Regulator"
+ depends on I2C
+ select REGMAP_I2C
+ help
+ This driver controls an Analog Devices ADP5055 with triple buck
+ regulators using an I2C interface.
+
+ Say M here if you want to include support for the regulator as a
+ module.
+
config REGULATOR_ANATOP
tristate "Freescale i.MX on-chip ANATOP LDO regulators"
depends on ARCH_MXC || COMPILE_TEST
@@ -1579,10 +1590,16 @@ config REGULATOR_TPS65219
tristate "TI TPS65219 Power regulators"
depends on MFD_TPS65219 && OF
help
- This driver supports TPS65219 voltage regulator chips.
+ This driver supports TPS65219, TPS65215, and TPS65214 voltage
+ regulator chips.
TPS65219 series of PMICs have 3 single phase BUCKs & 4 LDOs
- voltage regulators. It supports software based voltage control
- for different voltage domains.
+ voltage regulators.
+ TPS65215 PMIC has 3 single phase BUCKs & 2 LDOs.
+ TPS65214 PMIC has 3 synchronous stepdown DC-DC converters & 2
+ LDOs. One LDO supports a maximum output current of 300 mA and the
+ other a maximum of 500 mA
+ All 3 PMICs support software based voltage control for different
+ voltage domains.
config REGULATOR_TPS6594
tristate "TI TPS6594 Power regulators"
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 524e026c0273..c0bc7a0f4e67 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_REGULATOR_AB8500) += ab8500-ext.o ab8500.o
obj-$(CONFIG_REGULATOR_ACT8865) += act8865-regulator.o
obj-$(CONFIG_REGULATOR_ACT8945A) += act8945a-regulator.o
obj-$(CONFIG_REGULATOR_AD5398) += ad5398.o
+obj-$(CONFIG_REGULATOR_ADP5055) += adp5055-regulator.o
obj-$(CONFIG_REGULATOR_ANATOP) += anatop-regulator.o
obj-$(CONFIG_REGULATOR_ARIZONA_LDO1) += arizona-ldo1.o
obj-$(CONFIG_REGULATOR_ARIZONA_MICSUPP) += arizona-micsupp.o
diff --git a/drivers/regulator/adp5055-regulator.c b/drivers/regulator/adp5055-regulator.c
new file mode 100644
index 000000000000..4b004a6b2f84
--- /dev/null
+++ b/drivers/regulator/adp5055-regulator.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Regulator driver for Analog Devices ADP5055
+//
+// Copyright (C) 2025 Analog Devices, Inc.
+
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+
+// ADP5055 Register Map.
+
+#define ADP5055_CTRL123 0xD1
+#define ADP5055_CTRL_MODE1 0xD3
+#define ADP5055_CTRL_MODE2 0xD4
+#define ADP5055_DLY0 0xD5
+#define ADP5055_DLY1 0xD6
+#define ADP5055_DLY2 0xD7
+#define ADP5055_VID0 0xD8
+#define ADP5055_VID1 0xD9
+#define ADP5055_VID2 0xDA
+#define ADP5055_DVS_LIM0 0xDC
+#define ADP5055_DVS_LIM1 0xDD
+#define ADP5055_DVS_LIM2 0xDE
+#define ADP5055_FT_CFG 0xDF
+#define ADP5055_PG_CFG 0xE0
+
+// ADP5055 Field Masks.
+
+#define ADP5055_MASK_EN_MODE BIT(0)
+#define ADP5055_MASK_OCP_BLANKING BIT(7)
+#define ADP5055_MASK_PSM BIT(4)
+#define ADP5055_MASK_DIS2 BIT(2)
+#define ADP5055_MASK_DIS1 BIT(1)
+#define ADP5055_MASK_DIS0 BIT(0)
+#define ADP5055_MASK_DIS_DLY GENMASK(6, 4)
+#define ADP5055_MASK_EN_DLY GENMASK(2, 0)
+#define ADP5055_MASK_DVS_LIM_UPPER GENMASK(7, 4)
+#define ADP5055_MASK_DVS_LIM_LOWER GENMASK(3, 0)
+#define ADP5055_MASK_FAST_TRANSIENT2 GENMASK(5, 4)
+#define ADP5055_MASK_FAST_TRANSIENT1 GENMASK(3, 2)
+#define ADP5055_MASK_FAST_TRANSIENT0 GENMASK(1, 0)
+#define ADP5055_MASK_DLY_PWRGD BIT(4)
+#define ADP5055_MASK_PWRGD2 BIT(2)
+#define ADP5055_MASK_PWRGD1 BIT(1)
+#define ADP5055_MASK_PWRGD0 BIT(0)
+
+#define ADP5055_MIN_VOUT 408000
+#define ADP5055_NUM_CH 3
+
+struct adp5055 {
+ struct device *dev;
+ struct regmap *regmap;
+ u32 tset;
+ struct gpio_desc *en_gpiod[ADP5055_NUM_CH];
+ bool en_mode_software;
+ int dvs_limit_upper[ADP5055_NUM_CH];
+ int dvs_limit_lower[ADP5055_NUM_CH];
+ u32 fast_transient[ADP5055_NUM_CH];
+ bool mask_power_good[ADP5055_NUM_CH];
+};
+
+static const unsigned int adp5055_tset_vals[] = {
+ 2600,
+ 20800,
+};
+
+static const unsigned int adp5055_enable_delay_vals_2_6[] = {
+ 0,
+ 2600,
+ 5200,
+ 7800,
+ 10400,
+ 13000,
+ 15600,
+ 18200,
+};
+
+static const unsigned int adp5055_enable_delay_vals_20_8[] = {
+ 0,
+ 20800,
+ 41600,
+ 62400,
+ 83200,
+ 104000,
+ 124800,
+ 145600,
+};
+
+static const char * const adp5055_fast_transient_vals[] = {
+ "none",
+ "3G_1.5%",
+ "5G_1.5%",
+ "5G_2.5%",
+};
+
+static int adp5055_get_prop_index(const u32 *table, size_t table_size,
+ u32 value)
+{
+ int i;
+
+ for (i = 0; i < table_size; i++)
+ if (table[i] == value)
+ return i;
+
+ return -EINVAL;
+}
+
+static const struct regmap_range adp5055_reg_ranges[] = {
+ regmap_reg_range(0xD1, 0xE0),
+};
+
+static const struct regmap_access_table adp5055_access_ranges_table = {
+ .yes_ranges = adp5055_reg_ranges,
+ .n_yes_ranges = ARRAY_SIZE(adp5055_reg_ranges),
+};
+
+static const struct regmap_config adp5055_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0xE0,
+ .wr_table = &adp5055_access_ranges_table,
+ .rd_table = &adp5055_access_ranges_table,
+};
+
+static const struct linear_range adp5055_voltage_ranges[] = {
+ REGULATOR_LINEAR_RANGE(ADP5055_MIN_VOUT, 0, 255, 1500),
+};
+
+static int adp5055_parse_fw(struct device *dev, struct adp5055 *adp5055)
+{
+ int i, ret;
+ struct regmap *regmap = adp5055->regmap;
+ int val;
+ bool ocp_blanking;
+ bool delay_power_good;
+
+ ret = device_property_read_u32(dev, "adi,tset-us", &adp5055->tset);
+ if (!ret) {
+ ret = adp5055_get_prop_index(adp5055_tset_vals,
+ ARRAY_SIZE(adp5055_tset_vals), adp5055->tset);
+ if (ret < 0)
+ return dev_err_probe(dev, ret,
+ "Failed to initialize tset.");
+ adp5055->tset = adp5055_tset_vals[ret];
+ }
+
+ ocp_blanking = device_property_read_bool(dev, "adi,ocp-blanking");
+
+ delay_power_good = device_property_read_bool(dev,
+ "adi,delay-power-good");
+
+ for (i = 0; i < ADP5055_NUM_CH; i++) {
+ val = FIELD_PREP(ADP5055_MASK_DVS_LIM_UPPER,
+ DIV_ROUND_CLOSEST_ULL(192000 - adp5055->dvs_limit_upper[i], 12000));
+ val |= FIELD_PREP(ADP5055_MASK_DVS_LIM_LOWER,
+ DIV_ROUND_CLOSEST_ULL(adp5055->dvs_limit_lower[i] + 190500, 12000));
+ ret = regmap_write(regmap, ADP5055_DVS_LIM0 + i, val);
+ if (ret)
+ return ret;
+ }
+
+ val = FIELD_PREP(ADP5055_MASK_EN_MODE, adp5055->en_mode_software);
+ ret = regmap_write(regmap, ADP5055_CTRL_MODE1, val);
+ if (ret)
+ return ret;
+
+ val = FIELD_PREP(ADP5055_MASK_OCP_BLANKING, ocp_blanking);
+ ret = regmap_update_bits(regmap, ADP5055_CTRL_MODE2,
+ ADP5055_MASK_OCP_BLANKING, val);
+ if (ret)
+ return ret;
+
+ val = FIELD_PREP(ADP5055_MASK_FAST_TRANSIENT2, adp5055->fast_transient[2]);
+ val |= FIELD_PREP(ADP5055_MASK_FAST_TRANSIENT1, adp5055->fast_transient[1]);
+ val |= FIELD_PREP(ADP5055_MASK_FAST_TRANSIENT0, adp5055->fast_transient[0]);
+ ret = regmap_write(regmap, ADP5055_FT_CFG, val);
+ if (ret)
+ return ret;
+
+ val = FIELD_PREP(ADP5055_MASK_DLY_PWRGD, delay_power_good);
+ val |= FIELD_PREP(ADP5055_MASK_PWRGD2, adp5055->mask_power_good[2]);
+ val |= FIELD_PREP(ADP5055_MASK_PWRGD1, adp5055->mask_power_good[1]);
+ val |= FIELD_PREP(ADP5055_MASK_PWRGD0, adp5055->mask_power_good[0]);
+ ret = regmap_write(regmap, ADP5055_PG_CFG, val);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int adp5055_of_parse_cb(struct device_node *np,
+ const struct regulator_desc *desc,
+ struct regulator_config *config)
+{
+ struct adp5055 *adp5055 = config->driver_data;
+ int id, ret, pval, i;
+
+ id = desc->id;
+
+ if (of_property_read_bool(np, "enable-gpios")) {
+ adp5055->en_gpiod[id] = devm_fwnode_gpiod_get(config->dev,
+ of_fwnode_handle(np), "enable",
+ GPIOD_OUT_LOW, "enable");
+ if (IS_ERR(adp5055->en_gpiod[id]))
+ return dev_err_probe(config->dev, PTR_ERR(adp5055->en_gpiod[id]),
+ "Failed to get enable GPIO\n");
+
+ config->ena_gpiod = adp5055->en_gpiod[id];
+ } else {
+ adp5055->en_mode_software = true;
+ }
+
+ ret = of_property_read_u32(np, "adi,dvs-limit-upper-microvolt", &pval);
+ if (ret)
+ adp5055->dvs_limit_upper[id] = 192000;
+ else
+ adp5055->dvs_limit_upper[id] = pval;
+
+ if (adp5055->dvs_limit_upper[id] > 192000 || adp5055->dvs_limit_upper[id] < 12000)
+ return dev_err_probe(config->dev, adp5055->dvs_limit_upper[id],
+ "Out of range - dvs-limit-upper-microvolt value.");
+
+ ret = of_property_read_u32(np, "adi,dvs-limit-lower-microvolt", &pval);
+ if (ret)
+ adp5055->dvs_limit_lower[id] = -190500;
+ else
+ adp5055->dvs_limit_lower[id] = pval;
+
+ if (adp5055->dvs_limit_lower[id] > -10500 || adp5055->dvs_limit_lower[id] < -190500)
+ return dev_err_probe(config->dev, adp5055->dvs_limit_lower[id],
+ "Out of range - dvs-limit-lower-microvolt value.");
+
+ for (i = 0; i < 4; i++) {
+ ret = of_property_match_string(np, "adi,fast-transient",
+ adp5055_fast_transient_vals[i]);
+ if (!ret)
+ break;
+ }
+
+ if (ret < 0)
+ adp5055->fast_transient[id] = 3;
+ else
+ adp5055->fast_transient[id] = i;
+
+ adp5055->mask_power_good[id] = of_property_read_bool(np, "adi,mask-power-good");
+
+ return 0;
+}
+
+static int adp5055_set_mode(struct regulator_dev *rdev, u32 mode)
+{
+ struct adp5055 *adp5055 = rdev_get_drvdata(rdev);
+ int id, ret;
+
+ id = rdev_get_id(rdev);
+
+ switch (mode) {
+ case REGULATOR_MODE_NORMAL:
+ ret = regmap_update_bits(adp5055->regmap, ADP5055_CTRL_MODE2,
+ ADP5055_MASK_PSM << id, 0);
+ break;
+ case REGULATOR_MODE_IDLE:
+ ret = regmap_update_bits(adp5055->regmap, ADP5055_CTRL_MODE2,
+ ADP5055_MASK_PSM << id, ADP5055_MASK_PSM << id);
+ break;
+ default:
+ return dev_err_probe(&rdev->dev, -EINVAL,
+ "Unsupported mode: %d\n", mode);
+ }
+
+ return ret;
+}
+
+static unsigned int adp5055_get_mode(struct regulator_dev *rdev)
+{
+ struct adp5055 *adp5055 = rdev_get_drvdata(rdev);
+ int id, ret, regval;
+
+ id = rdev_get_id(rdev);
+
+ ret = regmap_read(adp5055->regmap, ADP5055_CTRL_MODE2, &regval);
+ if (ret)
+ return ret;
+
+ if (regval & (ADP5055_MASK_PSM << id))
+ return REGULATOR_MODE_IDLE;
+ else
+ return REGULATOR_MODE_NORMAL;
+}
+
+static const struct regulator_ops adp5055_ops = {
+ .list_voltage = regulator_list_voltage_linear_range,
+ .map_voltage = regulator_map_voltage_linear_range,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .set_active_discharge = regulator_set_active_discharge_regmap,
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_mode = adp5055_set_mode,
+ .get_mode = adp5055_get_mode,
+ .set_ramp_delay = regulator_set_ramp_delay_regmap,
+};
+
+#define ADP5055_REG_(_name, _id, _ch, _ops) \
+ [_id] = { \
+ .name = _name, \
+ .of_match = of_match_ptr(_name), \
+ .of_parse_cb = adp5055_of_parse_cb, \
+ .id = _id, \
+ .ops = _ops, \
+ .linear_ranges = adp5055_voltage_ranges, \
+ .n_linear_ranges = ARRAY_SIZE(adp5055_voltage_ranges), \
+ .vsel_reg = ADP5055_VID##_ch, \
+ .vsel_mask = GENMASK(7, 0), \
+ .enable_reg = ADP5055_CTRL123, \
+ .enable_mask = BIT(_ch), \
+ .active_discharge_on = ADP5055_MASK_DIS##_id, \
+ .active_discharge_off = 0, \
+ .active_discharge_mask = ADP5055_MASK_DIS##_id, \
+ .active_discharge_reg = ADP5055_CTRL_MODE2, \
+ .ramp_reg = ADP5055_DLY##_ch, \
+ .ramp_mask = ADP5055_MASK_EN_DLY, \
+ .n_ramp_values = ARRAY_SIZE(adp5055_enable_delay_vals_2_6), \
+ .type = REGULATOR_VOLTAGE, \
+ .owner = THIS_MODULE, \
+ }
+
+#define ADP5055_REG(_name, _id, _ch) \
+ ADP5055_REG_(_name, _id, _ch, &adp5055_ops)
+
+static struct regulator_desc adp5055_regulators[] = {
+ ADP5055_REG("buck0", 0, 0),
+ ADP5055_REG("buck1", 1, 1),
+ ADP5055_REG("buck2", 2, 2),
+};
+
+static int adp5055_probe(struct i2c_client *client)
+{
+ struct regulator_init_data *init_data;
+ struct device *dev = &client->dev;
+ struct adp5055 *adp5055;
+ int i, ret;
+
+ init_data = of_get_regulator_init_data(dev, client->dev.of_node,
+ &adp5055_regulators[0]);
+ if (!init_data)
+ return -EINVAL;
+
+ adp5055 = devm_kzalloc(dev, sizeof(struct adp5055), GFP_KERNEL);
+ if (!adp5055)
+ return -ENOMEM;
+
+ adp5055->tset = 2600;
+ adp5055->en_mode_software = false;
+
+ adp5055->regmap = devm_regmap_init_i2c(client, &adp5055_regmap_config);
+ if (IS_ERR(adp5055->regmap))
+ return dev_err_probe(dev, PTR_ERR(adp5055->regmap), "Failed to allocate reg map");
+
+ for (i = 0; i < ADP5055_NUM_CH; i++) {
+ const struct regulator_desc *desc;
+ struct regulator_config config = { };
+ struct regulator_dev *rdev;
+
+ if (adp5055->tset == 2600)
+ adp5055_regulators[i].ramp_delay_table = adp5055_enable_delay_vals_2_6;
+ else
+ adp5055_regulators[i].ramp_delay_table = adp5055_enable_delay_vals_20_8;
+
+ desc = &adp5055_regulators[i];
+
+ config.dev = dev;
+ config.driver_data = adp5055;
+ config.regmap = adp5055->regmap;
+ config.init_data = init_data;
+
+ rdev = devm_regulator_register(dev, desc, &config);
+ if (IS_ERR(rdev)) {
+ return dev_err_probe(dev, PTR_ERR(rdev),
+ "Failed to register %s\n", desc->name);
+ }
+ }
+
+ ret = adp5055_parse_fw(dev, adp5055);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static const struct of_device_id adp5055_of_match[] = {
+ { .compatible = "adi,adp5055", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, adp5055_of_match);
+
+static const struct i2c_device_id adp5055_ids[] = {
+ { .name = "adp5055"},
+ { },
+};
+MODULE_DEVICE_TABLE(i2c, adp5055_ids);
+
+static struct i2c_driver adp5055_driver = {
+ .driver = {
+ .name = "adp5055",
+ .of_match_table = adp5055_of_match,
+ },
+ .probe = adp5055_probe,
+ .id_table = adp5055_ids,
+};
+module_i2c_driver(adp5055_driver);
+
+MODULE_DESCRIPTION("ADP5055 Voltage Regulator Driver");
+MODULE_AUTHOR("Alexis Czezar Torreno <alexisczezar.torreno@analog.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 90629a756693..7a248dc8d2e2 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2617,7 +2617,7 @@ static int regulator_ena_gpio_request(struct regulator_dev *rdev,
mutex_lock(&regulator_list_mutex);
list_for_each_entry(pin, &regulator_ena_gpio_list, list) {
- if (pin->gpiod == gpiod) {
+ if (gpiod_is_equal(pin->gpiod, gpiod)) {
rdev_dbg(rdev, "GPIO is already used\n");
goto update_ena_gpio_to_rdev;
}
diff --git a/drivers/regulator/da9121-regulator.c b/drivers/regulator/da9121-regulator.c
index 17527a3f53b4..ef161eb0ca27 100644
--- a/drivers/regulator/da9121-regulator.c
+++ b/drivers/regulator/da9121-regulator.c
@@ -1129,7 +1129,7 @@ static int da9121_i2c_probe(struct i2c_client *i2c)
}
chip->pdata = i2c->dev.platform_data;
- chip->subvariant_id = (enum da9121_subvariant)i2c_get_match_data(i2c);
+ chip->subvariant_id = (kernel_ulong_t)i2c_get_match_data(i2c);
ret = da9121_assign_chip_model(i2c, chip);
if (ret < 0)
diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c
index 65927fa2ef16..75bd53445ba7 100644
--- a/drivers/regulator/gpio-regulator.c
+++ b/drivers/regulator/gpio-regulator.c
@@ -240,7 +240,7 @@ static int gpio_regulator_probe(struct platform_device *pdev)
struct regulator_config cfg = { };
struct regulator_dev *rdev;
enum gpiod_flags gflags;
- int ptr, ret, state, i;
+ int ptr, state, i;
drvdata = devm_kzalloc(dev, sizeof(struct gpio_regulator_data),
GFP_KERNEL);
@@ -345,11 +345,9 @@ static int gpio_regulator_probe(struct platform_device *pdev)
return PTR_ERR(cfg.ena_gpiod);
rdev = devm_regulator_register(dev, &drvdata->desc, &cfg);
- if (IS_ERR(rdev)) {
- ret = PTR_ERR(rdev);
- dev_err(dev, "Failed to register regulator: %d\n", ret);
- return ret;
- }
+ if (IS_ERR(rdev))
+ return dev_err_probe(dev, PTR_ERR(rdev),
+ "Failed to register regulator\n");
platform_set_drvdata(pdev, drvdata);
diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c
index 59eb23d467ec..b4fe76e33ff2 100644
--- a/drivers/regulator/max20086-regulator.c
+++ b/drivers/regulator/max20086-regulator.c
@@ -28,7 +28,7 @@
#define MAX20086_REG_ADC4 0x09
/* DEVICE IDs */
-#define MAX20086_DEVICE_ID_MAX20086 0x40
+#define MAX20086_DEVICE_ID_MAX20086 0x30
#define MAX20086_DEVICE_ID_MAX20087 0x20
#define MAX20086_DEVICE_ID_MAX20088 0x10
#define MAX20086_DEVICE_ID_MAX20089 0x00
@@ -132,7 +132,7 @@ static int max20086_regulators_register(struct max20086 *chip)
static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on)
{
- struct of_regulator_match matches[MAX20086_MAX_REGULATORS] = { };
+ struct of_regulator_match *matches;
struct device_node *node;
unsigned int i;
int ret;
@@ -143,6 +143,11 @@ static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on)
return -ENODEV;
}
+ matches = devm_kcalloc(chip->dev, chip->info->num_outputs,
+ sizeof(*matches), GFP_KERNEL);
+ if (!matches)
+ return -ENOMEM;
+
for (i = 0; i < chip->info->num_outputs; ++i)
matches[i].name = max20086_output_names[i];
@@ -259,7 +264,7 @@ static int max20086_i2c_probe(struct i2c_client *i2c)
* shutdown.
*/
flags = boot_on ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
- chip->ena_gpiod = devm_gpiod_get(chip->dev, "enable", flags);
+ chip->ena_gpiod = devm_gpiod_get_optional(chip->dev, "enable", flags);
if (IS_ERR(chip->ena_gpiod)) {
ret = PTR_ERR(chip->ena_gpiod);
dev_err(chip->dev, "Failed to get enable GPIO: %d\n", ret);
diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c
index a56f3ab754fa..14d19a6d6655 100644
--- a/drivers/regulator/pca9450-regulator.c
+++ b/drivers/regulator/pca9450-regulator.c
@@ -9,6 +9,7 @@
#include <linux/i2c.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <linux/reboot.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
@@ -33,6 +34,7 @@ struct pca9450 {
struct device *dev;
struct regmap *regmap;
struct gpio_desc *sd_vsel_gpio;
+ struct notifier_block restart_nb;
enum pca9450_chip_type type;
unsigned int rcnt;
int irq;
@@ -965,6 +967,25 @@ static irqreturn_t pca9450_irq_handler(int irq, void *data)
return IRQ_HANDLED;
}
+static int pca9450_i2c_restart_handler(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct pca9450 *pca9450 = container_of(nb, struct pca9450, restart_nb);
+ struct i2c_client *i2c = container_of(pca9450->dev, struct i2c_client, dev);
+
+ dev_dbg(&i2c->dev, "Restarting device..\n");
+ if (i2c_smbus_write_byte_data(i2c, PCA9450_REG_SWRST, SW_RST_COMMAND) == 0) {
+ /* tRESTART is 250ms, so 300 should be enough to make sure it happened */
+ mdelay(300);
+ /* When we get here, the PMIC didn't power cycle for some reason. so warn.*/
+ dev_warn(&i2c->dev, "Device didn't respond to restart command\n");
+ } else {
+ dev_err(&i2c->dev, "Restart command failed\n");
+ }
+
+ return 0;
+}
+
static int pca9450_i2c_probe(struct i2c_client *i2c)
{
enum pca9450_chip_type type = (unsigned int)(uintptr_t)
@@ -1107,6 +1128,12 @@ static int pca9450_i2c_probe(struct i2c_client *i2c)
pca9450->sd_vsel_fixed_low =
of_property_read_bool(ldo5->dev.of_node, "nxp,sd-vsel-fixed-low");
+ pca9450->restart_nb.notifier_call = pca9450_i2c_restart_handler;
+ pca9450->restart_nb.priority = PCA9450_RESTART_HANDLER_PRIORITY;
+
+ if (register_restart_handler(&pca9450->restart_nb))
+ dev_warn(&i2c->dev, "Failed to register restart handler\n");
+
dev_info(&i2c->dev, "%s probed.\n",
type == PCA9450_TYPE_PCA9450A ? "pca9450a" :
(type == PCA9450_TYPE_PCA9451A ? "pca9451a" : "pca9450bc"));
diff --git a/drivers/regulator/pf9453-regulator.c b/drivers/regulator/pf9453-regulator.c
index ed6bf0f6c4fe..be627f49b617 100644
--- a/drivers/regulator/pf9453-regulator.c
+++ b/drivers/regulator/pf9453-regulator.c
@@ -214,7 +214,7 @@ static const struct regmap_config pf9453_regmap_config = {
.val_bits = 8,
.volatile_table = &pf9453_volatile_regs,
.max_register = PF9453_MAX_REG - 1,
- .cache_type = REGCACHE_RBTREE,
+ .cache_type = REGCACHE_MAPLE,
};
/*
@@ -412,6 +412,7 @@ static int find_closest_bigger(unsigned int target, const unsigned int *table,
* pf9453_regulator_set_ramp_delay_regmap
*
* @rdev: regulator to operate on
+ * @ramp_delay: desired ramp delay value in microseconds
*
* Regulators that use regmap for their register I/O can set the ramp_reg
* and ramp_mask fields in their descriptor and then use this as their
diff --git a/drivers/regulator/qcom_spmi-regulator.c b/drivers/regulator/qcom_spmi-regulator.c
index d66a0f61637e..c1a41ce70b36 100644
--- a/drivers/regulator/qcom_spmi-regulator.c
+++ b/drivers/regulator/qcom_spmi-regulator.c
@@ -400,7 +400,7 @@ struct spmi_voltage_range {
* so that range[i].set_point_max_uV < range[i+1].set_point_min_uV.
*/
struct spmi_voltage_set_points {
- struct spmi_voltage_range *range;
+ const struct spmi_voltage_range *range;
int count;
unsigned n_voltages;
};
@@ -474,6 +474,9 @@ struct spmi_regulator_data {
.set_point_max_uV = _set_point_max_uV, \
.step_uV = _step_uV, \
.range_sel = _range_sel, \
+ .n_voltages = (_set_point_max_uV != 0) ? \
+ ((_set_point_max_uV - _set_point_min_uV) / _step_uV) + 1 : \
+ 0, \
}
#define DEFINE_SPMI_SET_POINTS(name) \
@@ -489,110 +492,110 @@ struct spmi_voltage_set_points name##_set_points = { \
* increasing and unique. The set_voltage callback functions expect these
* properties to hold.
*/
-static struct spmi_voltage_range pldo_ranges[] = {
+static const struct spmi_voltage_range pldo_ranges[] = {
SPMI_VOLTAGE_RANGE(2, 750000, 750000, 1537500, 1537500, 12500),
SPMI_VOLTAGE_RANGE(3, 1500000, 1550000, 3075000, 3075000, 25000),
SPMI_VOLTAGE_RANGE(4, 1750000, 3100000, 4900000, 4900000, 50000),
};
-static struct spmi_voltage_range nldo1_ranges[] = {
+static const struct spmi_voltage_range nldo1_ranges[] = {
SPMI_VOLTAGE_RANGE(2, 750000, 750000, 1537500, 1537500, 12500),
};
-static struct spmi_voltage_range nldo2_ranges[] = {
+static const struct spmi_voltage_range nldo2_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 375000, 0, 0, 1537500, 12500),
SPMI_VOLTAGE_RANGE(1, 375000, 375000, 768750, 768750, 6250),
SPMI_VOLTAGE_RANGE(2, 750000, 775000, 1537500, 1537500, 12500),
};
-static struct spmi_voltage_range nldo3_ranges[] = {
+static const struct spmi_voltage_range nldo3_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 375000, 375000, 1537500, 1537500, 12500),
SPMI_VOLTAGE_RANGE(1, 375000, 0, 0, 1537500, 12500),
SPMI_VOLTAGE_RANGE(2, 750000, 0, 0, 1537500, 12500),
};
-static struct spmi_voltage_range ln_ldo_ranges[] = {
+static const struct spmi_voltage_range ln_ldo_ranges[] = {
SPMI_VOLTAGE_RANGE(1, 690000, 690000, 1110000, 1110000, 60000),
SPMI_VOLTAGE_RANGE(0, 1380000, 1380000, 2220000, 2220000, 120000),
};
-static struct spmi_voltage_range smps_ranges[] = {
+static const struct spmi_voltage_range smps_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 375000, 375000, 1562500, 1562500, 12500),
SPMI_VOLTAGE_RANGE(1, 1550000, 1575000, 3125000, 3125000, 25000),
};
-static struct spmi_voltage_range ftsmps_ranges[] = {
+static const struct spmi_voltage_range ftsmps_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 0, 350000, 1275000, 1275000, 5000),
SPMI_VOLTAGE_RANGE(1, 0, 1280000, 2040000, 2040000, 10000),
};
-static struct spmi_voltage_range ftsmps2p5_ranges[] = {
+static const struct spmi_voltage_range ftsmps2p5_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 80000, 350000, 1355000, 1355000, 5000),
SPMI_VOLTAGE_RANGE(1, 160000, 1360000, 2200000, 2200000, 10000),
};
-static struct spmi_voltage_range ftsmps426_ranges[] = {
+static const struct spmi_voltage_range ftsmps426_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 0, 320000, 1352000, 1352000, 4000),
};
-static struct spmi_voltage_range boost_ranges[] = {
+static const struct spmi_voltage_range boost_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 4000000, 4000000, 5550000, 5550000, 50000),
};
-static struct spmi_voltage_range boost_byp_ranges[] = {
+static const struct spmi_voltage_range boost_byp_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 2500000, 2500000, 5200000, 5650000, 50000),
};
-static struct spmi_voltage_range ult_lo_smps_ranges[] = {
+static const struct spmi_voltage_range ult_lo_smps_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 375000, 375000, 1562500, 1562500, 12500),
SPMI_VOLTAGE_RANGE(1, 750000, 0, 0, 1525000, 25000),
};
-static struct spmi_voltage_range ult_ho_smps_ranges[] = {
+static const struct spmi_voltage_range ult_ho_smps_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 1550000, 1550000, 2325000, 2325000, 25000),
};
-static struct spmi_voltage_range ult_nldo_ranges[] = {
+static const struct spmi_voltage_range ult_nldo_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 375000, 375000, 1537500, 1537500, 12500),
};
-static struct spmi_voltage_range ult_pldo_ranges[] = {
+static const struct spmi_voltage_range ult_pldo_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 1750000, 1750000, 3337500, 3337500, 12500),
};
-static struct spmi_voltage_range pldo660_ranges[] = {
+static const struct spmi_voltage_range pldo660_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 1504000, 1504000, 3544000, 3544000, 8000),
};
-static struct spmi_voltage_range nldo660_ranges[] = {
+static const struct spmi_voltage_range nldo660_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 320000, 320000, 1304000, 1304000, 8000),
};
-static struct spmi_voltage_range ht_lvpldo_ranges[] = {
+static const struct spmi_voltage_range ht_lvpldo_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 1504000, 1504000, 2000000, 2000000, 8000),
};
-static struct spmi_voltage_range ht_nldo_ranges[] = {
+static const struct spmi_voltage_range ht_nldo_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 312000, 312000, 1304000, 1304000, 8000),
};
-static struct spmi_voltage_range hfs430_ranges[] = {
+static const struct spmi_voltage_range hfs430_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 320000, 320000, 2040000, 2040000, 8000),
};
-static struct spmi_voltage_range ht_p150_ranges[] = {
+static const struct spmi_voltage_range ht_p150_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 1616000, 1616000, 3304000, 3304000, 8000),
};
-static struct spmi_voltage_range ht_p600_ranges[] = {
+static const struct spmi_voltage_range ht_p600_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 1704000, 1704000, 1896000, 1896000, 8000),
};
-static struct spmi_voltage_range nldo_510_ranges[] = {
+static const struct spmi_voltage_range nldo_510_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 320000, 320000, 1304000, 1304000, 8000),
};
-static struct spmi_voltage_range ftsmps510_ranges[] = {
+static const struct spmi_voltage_range ftsmps510_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 300000, 300000, 1372000, 1372000, 4000),
};
@@ -1676,18 +1679,10 @@ static const struct spmi_regulator_mapping supported_regulators[] = {
static void spmi_calculate_num_voltages(struct spmi_voltage_set_points *points)
{
- unsigned int n;
- struct spmi_voltage_range *range = points->range;
-
- for (; range < points->range + points->count; range++) {
- n = 0;
- if (range->set_point_max_uV) {
- n = range->set_point_max_uV - range->set_point_min_uV;
- n = (n / range->step_uV) + 1;
- }
- range->n_voltages = n;
- points->n_voltages += n;
- }
+ const struct spmi_voltage_range *range = points->range;
+
+ for (; range < points->range + points->count; range++)
+ points->n_voltages += range->n_voltages;
}
static int spmi_regulator_match(struct spmi_regulator *vreg, u16 force_type)
diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index 6c3b6bfac961..58dbf8bffa5d 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -6,12 +6,14 @@
*/
#include <linux/backlight.h>
+#include <linux/cleanup.h>
#include <linux/err.h>
#include <linux/gpio/driver.h>
#include <linux/i2c.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/regmap.h>
#include <linux/regulator/driver.h>
#include <linux/regulator/machine.h>
@@ -93,7 +95,7 @@ static int attiny_lcd_power_enable(struct regulator_dev *rdev)
{
struct attiny_lcd *state = rdev_get_drvdata(rdev);
- mutex_lock(&state->lock);
+ guard(mutex)(&state->lock);
/* Ensure bridge, and tp stay in reset */
attiny_set_port_state(state, REG_PORTC, 0);
@@ -114,8 +116,6 @@ static int attiny_lcd_power_enable(struct regulator_dev *rdev)
msleep(80);
- mutex_unlock(&state->lock);
-
return 0;
}
@@ -123,7 +123,7 @@ static int attiny_lcd_power_disable(struct regulator_dev *rdev)
{
struct attiny_lcd *state = rdev_get_drvdata(rdev);
- mutex_lock(&state->lock);
+ guard(mutex)(&state->lock);
regmap_write(rdev->regmap, REG_PWM, 0);
usleep_range(5000, 10000);
@@ -135,8 +135,6 @@ static int attiny_lcd_power_disable(struct regulator_dev *rdev)
attiny_set_port_state(state, REG_PORTC, 0);
msleep(30);
- mutex_unlock(&state->lock);
-
return 0;
}
@@ -144,19 +142,17 @@ static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev)
{
struct attiny_lcd *state = rdev_get_drvdata(rdev);
unsigned int data;
- int ret, i;
-
- mutex_lock(&state->lock);
-
- for (i = 0; i < 10; i++) {
- ret = regmap_read(rdev->regmap, REG_PORTC, &data);
- if (!ret)
- break;
- usleep_range(10000, 12000);
+ int ret = 0, i;
+
+ scoped_guard(mutex, &state->lock) {
+ for (i = 0; i < 10; i++) {
+ ret = regmap_read(rdev->regmap, REG_PORTC, &data);
+ if (!ret)
+ break;
+ usleep_range(10000, 12000);
+ }
}
- mutex_unlock(&state->lock);
-
if (ret < 0)
return ret;
@@ -189,7 +185,7 @@ static int attiny_update_status(struct backlight_device *bl)
int brightness = backlight_get_brightness(bl);
int ret, i;
- mutex_lock(&state->lock);
+ guard(mutex)(&state->lock);
for (i = 0; i < 10; i++) {
ret = regmap_write(regmap, REG_PWM, brightness);
@@ -197,8 +193,6 @@ static int attiny_update_status(struct backlight_device *bl)
break;
}
- mutex_unlock(&state->lock);
-
return ret;
}
@@ -211,15 +205,12 @@ static int attiny_gpio_get_direction(struct gpio_chip *gc, unsigned int off)
return GPIO_LINE_DIRECTION_OUT;
}
-static void attiny_gpio_set(struct gpio_chip *gc, unsigned int off, int val)
+static int attiny_gpio_set(struct gpio_chip *gc, unsigned int off, int val)
{
struct attiny_lcd *state = gpiochip_get_data(gc);
u8 last_val;
- if (off >= NUM_GPIO)
- return;
-
- mutex_lock(&state->lock);
+ guard(mutex)(&state->lock);
last_val = attiny_get_port_state(state, mappings[off].reg);
if (val)
@@ -242,7 +233,7 @@ static void attiny_gpio_set(struct gpio_chip *gc, unsigned int off, int val)
msleep(100);
}
- mutex_unlock(&state->lock);
+ return 0;
}
static int attiny_i2c_read(struct i2c_client *client, u8 reg, unsigned int *buf)
@@ -296,7 +287,10 @@ static int attiny_i2c_probe(struct i2c_client *i2c)
if (!state)
return -ENOMEM;
- mutex_init(&state->lock);
+ ret = devm_mutex_init(&i2c->dev, &state->lock);
+ if (ret)
+ return ret;
+
i2c_set_clientdata(i2c, state);
regmap = devm_regmap_init_i2c(i2c, &attiny_regmap_config);
@@ -304,13 +298,13 @@ static int attiny_i2c_probe(struct i2c_client *i2c)
ret = PTR_ERR(regmap);
dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
ret);
- goto error;
+ return ret;
}
ret = attiny_i2c_read(i2c, REG_ID, &data);
if (ret < 0) {
dev_err(&i2c->dev, "Failed to read REG_ID reg: %d\n", ret);
- goto error;
+ return ret;
}
switch (data) {
@@ -319,8 +313,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c)
break;
default:
dev_err(&i2c->dev, "Unknown Atmel firmware revision: 0x%02x\n", data);
- ret = -ENODEV;
- goto error;
+ return -ENODEV;
}
regmap_write(regmap, REG_POWERON, 0);
@@ -336,8 +329,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c)
rdev = devm_regulator_register(&i2c->dev, &attiny_regulator, &config);
if (IS_ERR(rdev)) {
dev_err(&i2c->dev, "Failed to register ATTINY regulator\n");
- ret = PTR_ERR(rdev);
- goto error;
+ return PTR_ERR(rdev);
}
props.type = BACKLIGHT_RAW;
@@ -348,10 +340,8 @@ static int attiny_i2c_probe(struct i2c_client *i2c)
bl = devm_backlight_device_register(&i2c->dev, dev_name(&i2c->dev),
&i2c->dev, state, &attiny_bl,
&props);
- if (IS_ERR(bl)) {
- ret = PTR_ERR(bl);
- goto error;
- }
+ if (IS_ERR(bl))
+ return PTR_ERR(bl);
bl->props.brightness = 0xff;
@@ -361,31 +351,17 @@ static int attiny_i2c_probe(struct i2c_client *i2c)
state->gc.base = -1;
state->gc.ngpio = NUM_GPIO;
- state->gc.set = attiny_gpio_set;
+ state->gc.set_rv = attiny_gpio_set;
state->gc.get_direction = attiny_gpio_get_direction;
state->gc.can_sleep = true;
ret = devm_gpiochip_add_data(&i2c->dev, &state->gc, state);
- if (ret) {
+ if (ret)
dev_err(&i2c->dev, "Failed to create gpiochip: %d\n", ret);
- goto error;
- }
-
- return 0;
-
-error:
- mutex_destroy(&state->lock);
return ret;
}
-static void attiny_i2c_remove(struct i2c_client *client)
-{
- struct attiny_lcd *state = i2c_get_clientdata(client);
-
- mutex_destroy(&state->lock);
-}
-
static const struct of_device_id attiny_dt_ids[] = {
{ .compatible = "raspberrypi,7inch-touchscreen-panel-regulator" },
{},
@@ -399,7 +375,6 @@ static struct i2c_driver attiny_regulator_driver = {
.of_match_table = attiny_dt_ids,
},
.probe = attiny_i2c_probe,
- .remove = attiny_i2c_remove,
};
module_i2c_driver(attiny_regulator_driver);
diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index d25cd81e3f36..fe2631378ccd 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -5,7 +5,7 @@
#include <linux/cleanup.h>
#include <linux/err.h>
-#include <linux/of_gpio.h>
+#include <linux/of.h>
#include <linux/gpio/consumer.h>
#include <linux/module.h>
#include <linux/platform_device.h>
@@ -35,8 +35,8 @@ struct s5m8767_info {
u8 buck2_vol[8];
u8 buck3_vol[8];
u8 buck4_vol[8];
- int buck_gpios[3];
- int buck_ds[3];
+ struct gpio_desc *buck_gpios[3];
+ struct gpio_desc *buck_ds[3];
int buck_gpioindex;
};
@@ -272,9 +272,9 @@ static inline int s5m8767_set_high(struct s5m8767_info *s5m8767)
{
int temp_index = s5m8767->buck_gpioindex;
- gpio_set_value(s5m8767->buck_gpios[0], (temp_index >> 2) & 0x1);
- gpio_set_value(s5m8767->buck_gpios[1], (temp_index >> 1) & 0x1);
- gpio_set_value(s5m8767->buck_gpios[2], temp_index & 0x1);
+ gpiod_set_value(s5m8767->buck_gpios[0], !!(temp_index & BIT(2)));
+ gpiod_set_value(s5m8767->buck_gpios[1], !!(temp_index & BIT(1)));
+ gpiod_set_value(s5m8767->buck_gpios[2], !!(temp_index & BIT(0)));
return 0;
}
@@ -283,9 +283,9 @@ static inline int s5m8767_set_low(struct s5m8767_info *s5m8767)
{
int temp_index = s5m8767->buck_gpioindex;
- gpio_set_value(s5m8767->buck_gpios[2], temp_index & 0x1);
- gpio_set_value(s5m8767->buck_gpios[1], (temp_index >> 1) & 0x1);
- gpio_set_value(s5m8767->buck_gpios[0], (temp_index >> 2) & 0x1);
+ gpiod_set_value(s5m8767->buck_gpios[2], !!(temp_index & BIT(0)));
+ gpiod_set_value(s5m8767->buck_gpios[1], !!(temp_index & BIT(1)));
+ gpiod_set_value(s5m8767->buck_gpios[0], !!(temp_index & BIT(2)));
return 0;
}
@@ -482,42 +482,6 @@ static int s5m8767_enable_ext_control(struct s5m8767_info *s5m8767,
#ifdef CONFIG_OF
-static int s5m8767_pmic_dt_parse_dvs_gpio(struct sec_pmic_dev *iodev,
- struct sec_platform_data *pdata,
- struct device_node *pmic_np)
-{
- int i, gpio;
-
- for (i = 0; i < 3; i++) {
- gpio = of_get_named_gpio(pmic_np,
- "s5m8767,pmic-buck-dvs-gpios", i);
- if (!gpio_is_valid(gpio)) {
- dev_err(iodev->dev, "invalid gpio[%d]: %d\n", i, gpio);
- return -EINVAL;
- }
- pdata->buck_gpios[i] = gpio;
- }
- return 0;
-}
-
-static int s5m8767_pmic_dt_parse_ds_gpio(struct sec_pmic_dev *iodev,
- struct sec_platform_data *pdata,
- struct device_node *pmic_np)
-{
- int i, gpio;
-
- for (i = 0; i < 3; i++) {
- gpio = of_get_named_gpio(pmic_np,
- "s5m8767,pmic-buck-ds-gpios", i);
- if (!gpio_is_valid(gpio)) {
- dev_err(iodev->dev, "invalid gpio[%d]: %d\n", i, gpio);
- return -EINVAL;
- }
- pdata->buck_ds[i] = gpio;
- }
- return 0;
-}
-
static int s5m8767_pmic_dt_parse_pdata(struct platform_device *pdev,
struct sec_platform_data *pdata)
{
@@ -525,7 +489,7 @@ static int s5m8767_pmic_dt_parse_pdata(struct platform_device *pdev,
struct device_node *pmic_np, *reg_np;
struct sec_regulator_data *rdata;
struct sec_opmode_data *rmode;
- unsigned int i, dvs_voltage_nr = 8, ret;
+ unsigned int i, dvs_voltage_nr = 8;
pmic_np = iodev->dev->of_node;
if (!pmic_np) {
@@ -635,10 +599,6 @@ static int s5m8767_pmic_dt_parse_pdata(struct platform_device *pdev,
if (pdata->buck2_gpiodvs || pdata->buck3_gpiodvs ||
pdata->buck4_gpiodvs) {
- ret = s5m8767_pmic_dt_parse_dvs_gpio(iodev, pdata, pmic_np);
- if (ret)
- return -EINVAL;
-
if (of_property_read_u32(pmic_np,
"s5m8767,pmic-buck-default-dvs-idx",
&pdata->buck_default_idx)) {
@@ -652,10 +612,6 @@ static int s5m8767_pmic_dt_parse_pdata(struct platform_device *pdev,
}
}
- ret = s5m8767_pmic_dt_parse_ds_gpio(iodev, pdata, pmic_np);
- if (ret)
- return -EINVAL;
-
pdata->buck2_ramp_enable = of_property_read_bool(pmic_np, "s5m8767,pmic-buck2-ramp-enable");
pdata->buck3_ramp_enable = of_property_read_bool(pmic_np, "s5m8767,pmic-buck3-ramp-enable");
pdata->buck4_ramp_enable = of_property_read_bool(pmic_np, "s5m8767,pmic-buck4-ramp-enable");
@@ -684,6 +640,8 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
struct regulator_config config = { };
struct s5m8767_info *s5m8767;
int i, ret, buck_init;
+ const char *gpiods_names[3] = { "S5M8767 DS2", "S5M8767 DS3", "S5M8767 DS4" };
+ const char *gpiodvs_names[3] = { "S5M8767 SET1", "S5M8767 SET2", "S5M8767 SET3" };
if (!pdata) {
dev_err(pdev->dev.parent, "Platform data not supplied\n");
@@ -731,12 +689,6 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
s5m8767->buck2_gpiodvs = pdata->buck2_gpiodvs;
s5m8767->buck3_gpiodvs = pdata->buck3_gpiodvs;
s5m8767->buck4_gpiodvs = pdata->buck4_gpiodvs;
- s5m8767->buck_gpios[0] = pdata->buck_gpios[0];
- s5m8767->buck_gpios[1] = pdata->buck_gpios[1];
- s5m8767->buck_gpios[2] = pdata->buck_gpios[2];
- s5m8767->buck_ds[0] = pdata->buck_ds[0];
- s5m8767->buck_ds[1] = pdata->buck_ds[1];
- s5m8767->buck_ds[2] = pdata->buck_ds[2];
s5m8767->ramp_delay = pdata->buck_ramp_delay;
s5m8767->buck2_ramp = pdata->buck2_ramp_enable;
@@ -787,58 +739,36 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
if (pdata->buck2_gpiodvs || pdata->buck3_gpiodvs ||
pdata->buck4_gpiodvs) {
+ for (i = 0; i < 3; i++) {
+ enum gpiod_flags flags;
- if (!gpio_is_valid(pdata->buck_gpios[0]) ||
- !gpio_is_valid(pdata->buck_gpios[1]) ||
- !gpio_is_valid(pdata->buck_gpios[2])) {
- dev_err(&pdev->dev, "GPIO NOT VALID\n");
- return -EINVAL;
- }
-
- ret = devm_gpio_request(&pdev->dev, pdata->buck_gpios[0],
- "S5M8767 SET1");
- if (ret)
- return ret;
-
- ret = devm_gpio_request(&pdev->dev, pdata->buck_gpios[1],
- "S5M8767 SET2");
- if (ret)
- return ret;
-
- ret = devm_gpio_request(&pdev->dev, pdata->buck_gpios[2],
- "S5M8767 SET3");
- if (ret)
- return ret;
+ if (s5m8767->buck_gpioindex & BIT(2 - i))
+ flags = GPIOD_OUT_HIGH;
+ else
+ flags = GPIOD_OUT_LOW;
+
+ s5m8767->buck_gpios[i] = devm_gpiod_get_index(iodev->dev,
+ "s5m8767,pmic-buck-dvs", i,
+ flags);
+ if (IS_ERR(s5m8767->buck_gpios[i])) {
+ return dev_err_probe(iodev->dev, PTR_ERR(s5m8767->buck_gpios[i]),
+ "invalid gpio[%d]\n", i);
+ }
- /* SET1 GPIO */
- gpio_direction_output(pdata->buck_gpios[0],
- (s5m8767->buck_gpioindex >> 2) & 0x1);
- /* SET2 GPIO */
- gpio_direction_output(pdata->buck_gpios[1],
- (s5m8767->buck_gpioindex >> 1) & 0x1);
- /* SET3 GPIO */
- gpio_direction_output(pdata->buck_gpios[2],
- (s5m8767->buck_gpioindex >> 0) & 0x1);
+ gpiod_set_consumer_name(s5m8767->buck_gpios[i], gpiodvs_names[i]);
+ }
}
- ret = devm_gpio_request(&pdev->dev, pdata->buck_ds[0], "S5M8767 DS2");
- if (ret)
- return ret;
-
- ret = devm_gpio_request(&pdev->dev, pdata->buck_ds[1], "S5M8767 DS3");
- if (ret)
- return ret;
-
- ret = devm_gpio_request(&pdev->dev, pdata->buck_ds[2], "S5M8767 DS4");
- if (ret)
- return ret;
-
- /* DS2 GPIO */
- gpio_direction_output(pdata->buck_ds[0], 0x0);
- /* DS3 GPIO */
- gpio_direction_output(pdata->buck_ds[1], 0x0);
- /* DS4 GPIO */
- gpio_direction_output(pdata->buck_ds[2], 0x0);
+ for (i = 0; i < 3; i++) {
+ s5m8767->buck_ds[i] = devm_gpiod_get_index(iodev->dev,
+ "s5m8767,pmic-buck-ds", i,
+ GPIOD_OUT_LOW);
+ if (IS_ERR(s5m8767->buck_ds[i])) {
+ return dev_err_probe(iodev->dev, PTR_ERR(s5m8767->buck_ds[i]),
+ "can't get GPIO %d\n", i);
+ }
+ gpiod_set_consumer_name(s5m8767->buck_ds[i], gpiods_names[i]);
+ }
regmap_update_bits(s5m8767->iodev->regmap_pmic,
S5M8767_REG_BUCK2CTRL, 1 << 1,
diff --git a/drivers/regulator/tps65219-regulator.c b/drivers/regulator/tps65219-regulator.c
index aa65077f9d41..b16b300d7f45 100644
--- a/drivers/regulator/tps65219-regulator.c
+++ b/drivers/regulator/tps65219-regulator.c
@@ -1,10 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
//
-// tps65219-regulator.c
-//
-// Regulator driver for TPS65219 PMIC
+// TPS65214/TPS65215/TPS65219 PMIC Regulator Driver
//
// Copyright (C) 2022 BayLibre Incorporated - https://www.baylibre.com/
+// Copyright (C) 2024 Texas Instruments Incorporated - https://www.ti.com/
//
// This implementation derived from tps65218 authored by
// "J Keerthy <j-keerthy@ti.com>"
@@ -30,6 +29,11 @@ struct tps65219_regulator_irq_type {
unsigned long event;
};
+static struct tps65219_regulator_irq_type tps65215_regulator_irq_types[] = {
+ { "SENSOR_3_WARM", "SENSOR3", "warm temperature", REGULATOR_EVENT_OVER_TEMP_WARN},
+ { "SENSOR_3_HOT", "SENSOR3", "hot temperature", REGULATOR_EVENT_OVER_TEMP},
+};
+
static struct tps65219_regulator_irq_type tps65219_regulator_irq_types[] = {
{ "LDO3_SCG", "LDO3", "short circuit to ground", REGULATOR_EVENT_REGULATION_OUT },
{ "LDO3_OC", "LDO3", "overcurrent", REGULATOR_EVENT_OVER_CURRENT },
@@ -37,6 +41,16 @@ static struct tps65219_regulator_irq_type tps65219_regulator_irq_types[] = {
{ "LDO4_SCG", "LDO4", "short circuit to ground", REGULATOR_EVENT_REGULATION_OUT },
{ "LDO4_OC", "LDO4", "overcurrent", REGULATOR_EVENT_OVER_CURRENT },
{ "LDO4_UV", "LDO4", "undervoltage", REGULATOR_EVENT_UNDER_VOLTAGE },
+ { "LDO3_RV", "LDO3", "residual voltage", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
+ { "LDO4_RV", "LDO4", "residual voltage", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
+ { "LDO3_RV_SD", "LDO3", "residual voltage on shutdown", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
+ { "LDO4_RV_SD", "LDO4", "residual voltage on shutdown", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
+ { "SENSOR_3_WARM", "SENSOR3", "warm temperature", REGULATOR_EVENT_OVER_TEMP_WARN},
+ { "SENSOR_3_HOT", "SENSOR3", "hot temperature", REGULATOR_EVENT_OVER_TEMP},
+};
+
+/* All of TPS65214's irq types are the same as common_regulator_irq_types */
+static struct tps65219_regulator_irq_type common_regulator_irq_types[] = {
{ "LDO1_SCG", "LDO1", "short circuit to ground", REGULATOR_EVENT_REGULATION_OUT },
{ "LDO1_OC", "LDO1", "overcurrent", REGULATOR_EVENT_OVER_CURRENT },
{ "LDO1_UV", "LDO1", "undervoltage", REGULATOR_EVENT_UNDER_VOLTAGE },
@@ -60,8 +74,6 @@ static struct tps65219_regulator_irq_type tps65219_regulator_irq_types[] = {
{ "BUCK3_RV", "BUCK3", "residual voltage", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
{ "LDO1_RV", "LDO1", "residual voltage", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
{ "LDO2_RV", "LDO2", "residual voltage", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
- { "LDO3_RV", "LDO3", "residual voltage", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
- { "LDO4_RV", "LDO4", "residual voltage", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
{ "BUCK1_RV_SD", "BUCK1", "residual voltage on shutdown",
REGULATOR_EVENT_OVER_VOLTAGE_WARN },
{ "BUCK2_RV_SD", "BUCK2", "residual voltage on shutdown",
@@ -70,13 +82,9 @@ static struct tps65219_regulator_irq_type tps65219_regulator_irq_types[] = {
REGULATOR_EVENT_OVER_VOLTAGE_WARN },
{ "LDO1_RV_SD", "LDO1", "residual voltage on shutdown", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
{ "LDO2_RV_SD", "LDO2", "residual voltage on shutdown", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
- { "LDO3_RV_SD", "LDO3", "residual voltage on shutdown", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
- { "LDO4_RV_SD", "LDO4", "residual voltage on shutdown", REGULATOR_EVENT_OVER_VOLTAGE_WARN },
- { "SENSOR_3_WARM", "SENSOR3", "warm temperature", REGULATOR_EVENT_OVER_TEMP_WARN},
{ "SENSOR_2_WARM", "SENSOR2", "warm temperature", REGULATOR_EVENT_OVER_TEMP_WARN },
{ "SENSOR_1_WARM", "SENSOR1", "warm temperature", REGULATOR_EVENT_OVER_TEMP_WARN },
{ "SENSOR_0_WARM", "SENSOR0", "warm temperature", REGULATOR_EVENT_OVER_TEMP_WARN },
- { "SENSOR_3_HOT", "SENSOR3", "hot temperature", REGULATOR_EVENT_OVER_TEMP},
{ "SENSOR_2_HOT", "SENSOR2", "hot temperature", REGULATOR_EVENT_OVER_TEMP },
{ "SENSOR_1_HOT", "SENSOR1", "hot temperature", REGULATOR_EVENT_OVER_TEMP },
{ "SENSOR_0_HOT", "SENSOR0", "hot temperature", REGULATOR_EVENT_OVER_TEMP },
@@ -125,12 +133,28 @@ static const struct linear_range bucks_ranges[] = {
REGULATOR_LINEAR_RANGE(3400000, 0x34, 0x3f, 0),
};
-static const struct linear_range ldos_1_2_ranges[] = {
+static const struct linear_range ldo_1_range[] = {
REGULATOR_LINEAR_RANGE(600000, 0x0, 0x37, 50000),
REGULATOR_LINEAR_RANGE(3400000, 0x38, 0x3f, 0),
};
-static const struct linear_range ldos_3_4_ranges[] = {
+static const struct linear_range tps65214_ldo_1_2_range[] = {
+ REGULATOR_LINEAR_RANGE(600000, 0x0, 0x2, 0),
+ REGULATOR_LINEAR_RANGE(650000, 0x3, 0x37, 50000),
+ REGULATOR_LINEAR_RANGE(3300000, 0x38, 0x3F, 0),
+};
+
+static const struct linear_range tps65215_ldo_2_range[] = {
+ REGULATOR_LINEAR_RANGE(1200000, 0x0, 0xC, 50000),
+ REGULATOR_LINEAR_RANGE(3300000, 0x36, 0x3F, 0),
+};
+
+static const struct linear_range tps65219_ldo_2_range[] = {
+ REGULATOR_LINEAR_RANGE(600000, 0x0, 0x37, 50000),
+ REGULATOR_LINEAR_RANGE(3400000, 0x38, 0x3f, 0),
+};
+
+static const struct linear_range tps65219_ldos_3_4_range[] = {
REGULATOR_LINEAR_RANGE(1200000, 0x0, 0xC, 0),
REGULATOR_LINEAR_RANGE(1250000, 0xD, 0x35, 50000),
REGULATOR_LINEAR_RANGE(3300000, 0x36, 0x3F, 0),
@@ -174,7 +198,7 @@ static unsigned int tps65219_get_mode(struct regulator_dev *dev)
}
/* Operations permitted on BUCK1/2/3 */
-static const struct regulator_ops tps65219_bucks_ops = {
+static const struct regulator_ops bucks_ops = {
.is_enabled = regulator_is_enabled_regmap,
.enable = regulator_enable_regmap,
.disable = regulator_disable_regmap,
@@ -189,7 +213,7 @@ static const struct regulator_ops tps65219_bucks_ops = {
};
/* Operations permitted on LDO1/2 */
-static const struct regulator_ops tps65219_ldos_1_2_ops = {
+static const struct regulator_ops ldos_1_2_ops = {
.is_enabled = regulator_is_enabled_regmap,
.enable = regulator_enable_regmap,
.disable = regulator_disable_regmap,
@@ -204,7 +228,7 @@ static const struct regulator_ops tps65219_ldos_1_2_ops = {
};
/* Operations permitted on LDO3/4 */
-static const struct regulator_ops tps65219_ldos_3_4_ops = {
+static const struct regulator_ops ldos_3_4_ops = {
.is_enabled = regulator_is_enabled_regmap,
.enable = regulator_enable_regmap,
.disable = regulator_disable_regmap,
@@ -216,55 +240,98 @@ static const struct regulator_ops tps65219_ldos_3_4_ops = {
.map_voltage = regulator_map_voltage_linear_range,
};
-static const struct regulator_desc regulators[] = {
+static const struct regulator_desc common_regs[] = {
TPS65219_REGULATOR("BUCK1", "buck1", TPS65219_BUCK_1,
- REGULATOR_VOLTAGE, tps65219_bucks_ops, 64,
+ REGULATOR_VOLTAGE, bucks_ops, 64,
TPS65219_REG_BUCK1_VOUT,
TPS65219_BUCKS_LDOS_VOUT_VSET_MASK,
TPS65219_REG_ENABLE_CTRL,
TPS65219_ENABLE_BUCK1_EN_MASK, 0, 0, bucks_ranges,
3, 4000, 0, NULL, 0, 0),
TPS65219_REGULATOR("BUCK2", "buck2", TPS65219_BUCK_2,
- REGULATOR_VOLTAGE, tps65219_bucks_ops, 64,
+ REGULATOR_VOLTAGE, bucks_ops, 64,
TPS65219_REG_BUCK2_VOUT,
TPS65219_BUCKS_LDOS_VOUT_VSET_MASK,
TPS65219_REG_ENABLE_CTRL,
TPS65219_ENABLE_BUCK2_EN_MASK, 0, 0, bucks_ranges,
3, 4000, 0, NULL, 0, 0),
TPS65219_REGULATOR("BUCK3", "buck3", TPS65219_BUCK_3,
- REGULATOR_VOLTAGE, tps65219_bucks_ops, 64,
+ REGULATOR_VOLTAGE, bucks_ops, 64,
TPS65219_REG_BUCK3_VOUT,
TPS65219_BUCKS_LDOS_VOUT_VSET_MASK,
TPS65219_REG_ENABLE_CTRL,
TPS65219_ENABLE_BUCK3_EN_MASK, 0, 0, bucks_ranges,
3, 0, 0, NULL, 0, 0),
+};
+
+static const struct regulator_desc tps65214_regs[] = {
+ // TPS65214's LDO3 pin maps to TPS65219's LDO3 pin
+ TPS65219_REGULATOR("LDO1", "ldo1", TPS65214_LDO_1,
+ REGULATOR_VOLTAGE, ldos_3_4_ops, 64,
+ TPS65214_REG_LDO1_VOUT,
+ TPS65219_BUCKS_LDOS_VOUT_VSET_MASK,
+ TPS65219_REG_ENABLE_CTRL,
+ TPS65219_ENABLE_LDO3_EN_MASK, 0, 0, tps65214_ldo_1_2_range,
+ 3, 0, 0, NULL, 0, 0),
+ TPS65219_REGULATOR("LDO2", "ldo2", TPS65214_LDO_2,
+ REGULATOR_VOLTAGE, ldos_3_4_ops, 64,
+ TPS65214_REG_LDO2_VOUT,
+ TPS65219_BUCKS_LDOS_VOUT_VSET_MASK,
+ TPS65219_REG_ENABLE_CTRL,
+ TPS65219_ENABLE_LDO2_EN_MASK, 0, 0, tps65214_ldo_1_2_range,
+ 3, 0, 0, NULL, 0, 0),
+};
+
+static const struct regulator_desc tps65215_regs[] = {
+ /*
+ * TPS65215's LDO1 is the same as TPS65219's LDO1. LDO1 is
+ * configurable as load switch and bypass-mode.
+ * TPS65215's LDO2 is the same as TPS65219's LDO3
+ */
TPS65219_REGULATOR("LDO1", "ldo1", TPS65219_LDO_1,
- REGULATOR_VOLTAGE, tps65219_ldos_1_2_ops, 64,
+ REGULATOR_VOLTAGE, ldos_1_2_ops, 64,
TPS65219_REG_LDO1_VOUT,
TPS65219_BUCKS_LDOS_VOUT_VSET_MASK,
TPS65219_REG_ENABLE_CTRL,
- TPS65219_ENABLE_LDO1_EN_MASK, 0, 0, ldos_1_2_ranges,
+ TPS65219_ENABLE_LDO1_EN_MASK, 0, 0, ldo_1_range,
+ 2, 0, 0, NULL, 0, TPS65219_LDOS_BYP_CONFIG_MASK),
+ TPS65219_REGULATOR("LDO2", "ldo2", TPS65215_LDO_2,
+ REGULATOR_VOLTAGE, ldos_3_4_ops, 64,
+ TPS65215_REG_LDO2_VOUT,
+ TPS65219_BUCKS_LDOS_VOUT_VSET_MASK,
+ TPS65219_REG_ENABLE_CTRL,
+ TPS65215_ENABLE_LDO2_EN_MASK, 0, 0, tps65215_ldo_2_range,
+ 2, 0, 0, NULL, 0, 0),
+};
+
+static const struct regulator_desc tps65219_regs[] = {
+ TPS65219_REGULATOR("LDO1", "ldo1", TPS65219_LDO_1,
+ REGULATOR_VOLTAGE, ldos_1_2_ops, 64,
+ TPS65219_REG_LDO1_VOUT,
+ TPS65219_BUCKS_LDOS_VOUT_VSET_MASK,
+ TPS65219_REG_ENABLE_CTRL,
+ TPS65219_ENABLE_LDO1_EN_MASK, 0, 0, ldo_1_range,
2, 0, 0, NULL, 0, TPS65219_LDOS_BYP_CONFIG_MASK),
TPS65219_REGULATOR("LDO2", "ldo2", TPS65219_LDO_2,
- REGULATOR_VOLTAGE, tps65219_ldos_1_2_ops, 64,
+ REGULATOR_VOLTAGE, ldos_1_2_ops, 64,
TPS65219_REG_LDO2_VOUT,
TPS65219_BUCKS_LDOS_VOUT_VSET_MASK,
TPS65219_REG_ENABLE_CTRL,
- TPS65219_ENABLE_LDO2_EN_MASK, 0, 0, ldos_1_2_ranges,
+ TPS65219_ENABLE_LDO2_EN_MASK, 0, 0, tps65219_ldo_2_range,
2, 0, 0, NULL, 0, TPS65219_LDOS_BYP_CONFIG_MASK),
TPS65219_REGULATOR("LDO3", "ldo3", TPS65219_LDO_3,
- REGULATOR_VOLTAGE, tps65219_ldos_3_4_ops, 64,
+ REGULATOR_VOLTAGE, ldos_3_4_ops, 64,
TPS65219_REG_LDO3_VOUT,
TPS65219_BUCKS_LDOS_VOUT_VSET_MASK,
TPS65219_REG_ENABLE_CTRL,
- TPS65219_ENABLE_LDO3_EN_MASK, 0, 0, ldos_3_4_ranges,
+ TPS65219_ENABLE_LDO3_EN_MASK, 0, 0, tps65219_ldos_3_4_range,
3, 0, 0, NULL, 0, 0),
TPS65219_REGULATOR("LDO4", "ldo4", TPS65219_LDO_4,
- REGULATOR_VOLTAGE, tps65219_ldos_3_4_ops, 64,
+ REGULATOR_VOLTAGE, ldos_3_4_ops, 64,
TPS65219_REG_LDO4_VOUT,
TPS65219_BUCKS_LDOS_VOUT_VSET_MASK,
TPS65219_REG_ENABLE_CTRL,
- TPS65219_ENABLE_LDO4_EN_MASK, 0, 0, ldos_3_4_ranges,
+ TPS65219_ENABLE_LDO4_EN_MASK, 0, 0, tps65219_ldos_3_4_range,
3, 0, 0, NULL, 0, 0),
};
@@ -287,64 +354,141 @@ static irqreturn_t tps65219_regulator_irq_handler(int irq, void *data)
return IRQ_HANDLED;
}
+struct tps65219_chip_data {
+ size_t rdesc_size;
+ size_t common_rdesc_size;
+ size_t dev_irq_size;
+ size_t common_irq_size;
+ const struct regulator_desc *rdesc;
+ const struct regulator_desc *common_rdesc;
+ struct tps65219_regulator_irq_type *irq_types;
+ struct tps65219_regulator_irq_type *common_irq_types;
+};
+
+static struct tps65219_chip_data chip_info_table[] = {
+ [TPS65214] = {
+ .rdesc = tps65214_regs,
+ .rdesc_size = ARRAY_SIZE(tps65214_regs),
+ .common_rdesc = common_regs,
+ .common_rdesc_size = ARRAY_SIZE(common_regs),
+ .irq_types = NULL,
+ .dev_irq_size = 0,
+ .common_irq_types = common_regulator_irq_types,
+ .common_irq_size = ARRAY_SIZE(common_regulator_irq_types),
+ },
+ [TPS65215] = {
+ .rdesc = tps65215_regs,
+ .rdesc_size = ARRAY_SIZE(tps65215_regs),
+ .common_rdesc = common_regs,
+ .common_rdesc_size = ARRAY_SIZE(common_regs),
+ .irq_types = tps65215_regulator_irq_types,
+ .dev_irq_size = ARRAY_SIZE(tps65215_regulator_irq_types),
+ .common_irq_types = common_regulator_irq_types,
+ .common_irq_size = ARRAY_SIZE(common_regulator_irq_types),
+ },
+ [TPS65219] = {
+ .rdesc = tps65219_regs,
+ .rdesc_size = ARRAY_SIZE(tps65219_regs),
+ .common_rdesc = common_regs,
+ .common_rdesc_size = ARRAY_SIZE(common_regs),
+ .irq_types = tps65219_regulator_irq_types,
+ .dev_irq_size = ARRAY_SIZE(tps65219_regulator_irq_types),
+ .common_irq_types = common_regulator_irq_types,
+ .common_irq_size = ARRAY_SIZE(common_regulator_irq_types),
+ },
+};
+
static int tps65219_regulator_probe(struct platform_device *pdev)
{
- struct tps65219 *tps = dev_get_drvdata(pdev->dev.parent);
+ struct tps65219_regulator_irq_data *irq_data;
+ struct tps65219_regulator_irq_type *irq_type;
+ struct tps65219_chip_data *pmic;
struct regulator_dev *rdev;
- struct regulator_config config = { };
- int i;
int error;
int irq;
- struct tps65219_regulator_irq_data *irq_data;
- struct tps65219_regulator_irq_type *irq_type;
+ int i;
+
+ struct tps65219 *tps = dev_get_drvdata(pdev->dev.parent);
+ struct regulator_config config = { };
+ enum pmic_id chip = platform_get_device_id(pdev)->driver_data;
+
+ pmic = &chip_info_table[chip];
config.dev = tps->dev;
config.driver_data = tps;
config.regmap = tps->regmap;
- for (i = 0; i < ARRAY_SIZE(regulators); i++) {
- rdev = devm_regulator_register(&pdev->dev, &regulators[i],
+ for (i = 0; i < pmic->common_rdesc_size; i++) {
+ rdev = devm_regulator_register(&pdev->dev, &pmic->common_rdesc[i],
+ &config);
+ if (IS_ERR(rdev))
+ return dev_err_probe(tps->dev, PTR_ERR(rdev),
+ "Failed to register %s regulator\n",
+ pmic->common_rdesc[i].name);
+ }
+
+ for (i = 0; i < pmic->rdesc_size; i++) {
+ rdev = devm_regulator_register(&pdev->dev, &pmic->rdesc[i],
&config);
if (IS_ERR(rdev))
return dev_err_probe(tps->dev, PTR_ERR(rdev),
- "Failed to register %s regulator\n",
- regulators[i].name);
+ "Failed to register %s regulator\n",
+ pmic->rdesc[i].name);
}
- irq_data = devm_kmalloc(tps->dev,
- ARRAY_SIZE(tps65219_regulator_irq_types) *
- sizeof(struct tps65219_regulator_irq_data),
- GFP_KERNEL);
+ irq_data = devm_kmalloc(tps->dev, pmic->common_irq_size, GFP_KERNEL);
if (!irq_data)
return -ENOMEM;
- for (i = 0; i < ARRAY_SIZE(tps65219_regulator_irq_types); ++i) {
- irq_type = &tps65219_regulator_irq_types[i];
-
+ for (i = 0; i < pmic->common_irq_size; ++i) {
+ irq_type = &pmic->common_irq_types[i];
irq = platform_get_irq_byname(pdev, irq_type->irq_name);
if (irq < 0)
return -EINVAL;
irq_data[i].dev = tps->dev;
irq_data[i].type = irq_type;
+ error = devm_request_threaded_irq(tps->dev, irq, NULL,
+ tps65219_regulator_irq_handler,
+ IRQF_ONESHOT,
+ irq_type->irq_name,
+ &irq_data[i]);
+ if (error)
+ return dev_err_probe(tps->dev, PTR_ERR(rdev),
+ "Failed to request %s IRQ %d: %d\n",
+ irq_type->irq_name, irq, error);
+ }
+
+ irq_data = devm_kmalloc(tps->dev, pmic->dev_irq_size, GFP_KERNEL);
+ if (!irq_data)
+ return -ENOMEM;
+ for (i = 0; i < pmic->dev_irq_size; ++i) {
+ irq_type = &pmic->irq_types[i];
+ irq = platform_get_irq_byname(pdev, irq_type->irq_name);
+ if (irq < 0)
+ return -EINVAL;
+
+ irq_data[i].dev = tps->dev;
+ irq_data[i].type = irq_type;
error = devm_request_threaded_irq(tps->dev, irq, NULL,
tps65219_regulator_irq_handler,
IRQF_ONESHOT,
irq_type->irq_name,
&irq_data[i]);
- if (error) {
- dev_err(tps->dev, "failed to request %s IRQ %d: %d\n",
- irq_type->irq_name, irq, error);
- return error;
- }
+ if (error)
+ return dev_err_probe(tps->dev, PTR_ERR(rdev),
+ "Failed to request %s IRQ %d: %d\n",
+ irq_type->irq_name, irq, error);
}
return 0;
}
static const struct platform_device_id tps65219_regulator_id_table[] = {
- { "tps65219-regulator", },
+ { "tps65214-regulator", TPS65214 },
+ { "tps65215-regulator", TPS65215 },
+ { "tps65219-regulator", TPS65219 },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(platform, tps65219_regulator_id_table);
@@ -361,5 +505,5 @@ static struct platform_driver tps65219_regulator_driver = {
module_platform_driver(tps65219_regulator_driver);
MODULE_AUTHOR("Jerome Neanne <j-neanne@baylibre.com>");
-MODULE_DESCRIPTION("TPS65219 voltage regulator driver");
+MODULE_DESCRIPTION("TPS65214/TPS65215/TPS65219 Regulator driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c
index 775b056d795a..2c7e519a2254 100644
--- a/drivers/remoteproc/qcom_wcnss.c
+++ b/drivers/remoteproc/qcom_wcnss.c
@@ -456,7 +456,8 @@ static int wcnss_init_regulators(struct qcom_wcnss *wcnss,
if (wcnss->num_pds) {
info += wcnss->num_pds;
/* Handle single power domain case */
- num_vregs += num_pd_vregs - wcnss->num_pds;
+ if (wcnss->num_pds < num_pd_vregs)
+ num_vregs += num_pd_vregs - wcnss->num_pds;
} else {
num_vregs += num_pd_vregs;
}
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 7248e547fefb..cdc7b2f16b88 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -314,7 +314,7 @@ dcssblk_load_segment(char *name, struct segment_info **seg_info)
if (*seg_info == NULL)
return -ENOMEM;
- strcpy((*seg_info)->segment_name, name);
+ strscpy((*seg_info)->segment_name, name);
/* load the segment */
rc = segment_load(name, SEGMENT_SHARED,
@@ -612,7 +612,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
rc = -ENOMEM;
goto out;
}
- strcpy(dev_info->segment_name, local_buf);
+ strscpy(dev_info->segment_name, local_buf);
dev_info->segment_type = seg_info->segment_type;
INIT_LIST_HEAD(&dev_info->seg_list);
}
diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index 34f3820d7f74..8402a0042c0d 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -102,6 +102,7 @@ struct tty3270 {
/* Input stuff. */
char *prompt; /* Output string for input area. */
+ size_t prompt_sz; /* Size of output string. */
char *input; /* Input string for read request. */
struct raw3270_request *read; /* Single read request. */
struct raw3270_request *kreset; /* Single keyboard reset request. */
@@ -206,7 +207,7 @@ static int tty3270_input_size(int cols)
static void tty3270_update_prompt(struct tty3270 *tp, char *input)
{
- strcpy(tp->prompt, input);
+ strscpy(tp->prompt, input, tp->prompt_sz);
tp->update_flags |= TTY_UPDATE_INPUT;
tty3270_set_timer(tp, 1);
}
@@ -971,6 +972,7 @@ static void tty3270_resize(struct raw3270_view *view,
char *old_input, *new_input;
struct tty_struct *tty;
struct winsize ws;
+ size_t prompt_sz;
int new_allocated, old_allocated = tp->allocated_lines;
if (old_model == new_model &&
@@ -982,10 +984,11 @@ static void tty3270_resize(struct raw3270_view *view,
return;
}
- new_input = kzalloc(tty3270_input_size(new_cols), GFP_KERNEL | GFP_DMA);
+ prompt_sz = tty3270_input_size(new_cols);
+ new_input = kzalloc(prompt_sz, GFP_KERNEL | GFP_DMA);
if (!new_input)
return;
- new_prompt = kzalloc(tty3270_input_size(new_cols), GFP_KERNEL);
+ new_prompt = kzalloc(prompt_sz, GFP_KERNEL);
if (!new_prompt)
goto out_input;
screen = tty3270_alloc_screen(tp, new_rows, new_cols, &new_allocated);
@@ -1010,6 +1013,7 @@ static void tty3270_resize(struct raw3270_view *view,
old_rcl_lines = tp->rcl_lines;
tp->input = new_input;
tp->prompt = new_prompt;
+ tp->prompt_sz = prompt_sz;
tp->rcl_lines = new_rcl_lines;
tp->rcl_read_index = 0;
tp->rcl_write_index = 0;
@@ -1096,6 +1100,7 @@ static int
tty3270_create_view(int index, struct tty3270 **newtp)
{
struct tty3270 *tp;
+ size_t prompt_sz;
int rc;
if (tty3270_max_index < index + 1)
@@ -1125,17 +1130,19 @@ tty3270_create_view(int index, struct tty3270 **newtp)
goto out_free_screen;
}
- tp->input = kzalloc(tty3270_input_size(tp->view.cols), GFP_KERNEL | GFP_DMA);
+ prompt_sz = tty3270_input_size(tp->view.cols);
+ tp->input = kzalloc(prompt_sz, GFP_KERNEL | GFP_DMA);
if (!tp->input) {
rc = -ENOMEM;
goto out_free_converted_line;
}
- tp->prompt = kzalloc(tty3270_input_size(tp->view.cols), GFP_KERNEL);
+ tp->prompt = kzalloc(prompt_sz, GFP_KERNEL);
if (!tp->prompt) {
rc = -ENOMEM;
goto out_free_input;
}
+ tp->prompt_sz = prompt_sz;
tp->rcl_lines = tty3270_alloc_recall(tp->view.cols);
if (!tp->rcl_lines) {
diff --git a/drivers/s390/char/diag_ftp.c b/drivers/s390/char/diag_ftp.c
index 711f6982438e..f41b39c9d267 100644
--- a/drivers/s390/char/diag_ftp.c
+++ b/drivers/s390/char/diag_ftp.c
@@ -159,7 +159,7 @@ ssize_t diag_ftp_cmd(const struct hmcdrv_ftp_cmdspec *ftp, size_t *fsize)
goto out;
}
- len = strscpy(ldfpl->fident, ftp->fname, sizeof(ldfpl->fident));
+ len = strscpy(ldfpl->fident, ftp->fname);
if (len < 0) {
len = -EINVAL;
goto out_free;
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 1564cd7e3f59..288734cd8f4b 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -41,6 +41,7 @@
#include <linux/module.h>
#include <asm/uv.h>
#include <asm/chsc.h>
+#include <linux/mempool.h>
#include "ap_bus.h"
#include "ap_debug.h"
@@ -103,6 +104,27 @@ static struct ap_config_info *const ap_qci_info_old = &qci[1];
debug_info_t *ap_dbf_info;
/*
+ * There is a need for a do-not-allocate-memory path through the AP bus
+ * layer. The pkey layer may be triggered via the in-kernel interface from
+ * a protected key crypto algorithm (namely PAES) to convert a secure key
+ * into a protected key. This happens in a workqueue context, so sleeping
+ * is allowed but memory allocations causing IO operations are not permitted.
+ * To accomplish this, an AP message memory pool with pre-allocated space
+ * is established. When ap_init_apmsg() with use_mempool set to true is
+ * called, instead of kmalloc() the ap message buffer is allocated from
+ * the ap_msg_pool. This pool only holds a limited amount of buffers:
+ * ap_msg_pool_min_items with the item size AP_DEFAULT_MAX_MSG_SIZE and
+ * exactly one of these items (if available) is returned if ap_init_apmsg()
+ * with the use_mempool arg set to true is called. When this pool is exhausted
+ * and use_mempool is set true, ap_init_apmsg() returns -ENOMEM without
+ * any attempt to allocate memory and the caller has to deal with that.
+ */
+static mempool_t *ap_msg_pool;
+static unsigned int ap_msg_pool_min_items = 8;
+module_param_named(msgpool_min_items, ap_msg_pool_min_items, uint, 0440);
+MODULE_PARM_DESC(msgpool_min_items, "AP message pool minimal items");
+
+/*
* AP bus rescan related things.
*/
static bool ap_scan_bus(void);
@@ -547,6 +569,48 @@ static void ap_poll_thread_stop(void)
#define is_card_dev(x) ((x)->parent == ap_root_device)
#define is_queue_dev(x) ((x)->parent != ap_root_device)
+/*
+ * ap_init_apmsg() - Initialize ap_message.
+ */
+int ap_init_apmsg(struct ap_message *ap_msg, u32 flags)
+{
+ unsigned int maxmsgsize;
+
+ memset(ap_msg, 0, sizeof(*ap_msg));
+ ap_msg->flags = flags;
+
+ if (flags & AP_MSG_FLAG_MEMPOOL) {
+ ap_msg->msg = mempool_alloc_preallocated(ap_msg_pool);
+ if (!ap_msg->msg)
+ return -ENOMEM;
+ ap_msg->bufsize = AP_DEFAULT_MAX_MSG_SIZE;
+ return 0;
+ }
+
+ maxmsgsize = atomic_read(&ap_max_msg_size);
+ ap_msg->msg = kmalloc(maxmsgsize, GFP_KERNEL);
+ if (!ap_msg->msg)
+ return -ENOMEM;
+ ap_msg->bufsize = maxmsgsize;
+
+ return 0;
+}
+EXPORT_SYMBOL(ap_init_apmsg);
+
+/*
+ * ap_release_apmsg() - Release ap_message.
+ */
+void ap_release_apmsg(struct ap_message *ap_msg)
+{
+ if (ap_msg->flags & AP_MSG_FLAG_MEMPOOL) {
+ memzero_explicit(ap_msg->msg, ap_msg->bufsize);
+ mempool_free(ap_msg->msg, ap_msg_pool);
+ } else {
+ kfree_sensitive(ap_msg->msg);
+ }
+}
+EXPORT_SYMBOL(ap_release_apmsg);
+
/**
* ap_bus_match()
* @dev: Pointer to device
@@ -2431,6 +2495,14 @@ static int __init ap_module_init(void)
/* init ap_queue hashtable */
hash_init(ap_queues);
+ /* create ap msg buffer memory pool */
+ ap_msg_pool = mempool_create_kmalloc_pool(ap_msg_pool_min_items,
+ AP_DEFAULT_MAX_MSG_SIZE);
+ if (!ap_msg_pool) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
/* set up the AP permissions (ioctls, ap and aq masks) */
ap_perms_init();
@@ -2477,6 +2549,7 @@ out_device:
out_bus:
bus_unregister(&ap_bus_type);
out:
+ mempool_destroy(ap_msg_pool);
ap_debug_exit();
return rc;
}
@@ -2487,6 +2560,7 @@ static void __exit ap_module_exit(void)
ap_irq_exit();
root_device_unregister(ap_root_device);
bus_unregister(&ap_bus_type);
+ mempool_destroy(ap_msg_pool);
ap_debug_exit();
}
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index f4622ee4d894..88b625ba1978 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -214,6 +214,11 @@ struct ap_queue {
typedef enum ap_sm_wait (ap_func_t)(struct ap_queue *queue);
+struct ap_response_type {
+ struct completion work;
+ int type;
+};
+
struct ap_message {
struct list_head list; /* Request queueing. */
unsigned long psmid; /* Message id. */
@@ -222,7 +227,7 @@ struct ap_message {
size_t bufsize; /* allocated msg buffer size */
u16 flags; /* Flags, see AP_MSG_FLAG_xxx */
int rc; /* Return code for this message */
- void *private; /* ap driver private pointer. */
+ struct ap_response_type response;
/* receive is called from tasklet context */
void (*receive)(struct ap_queue *, struct ap_message *,
struct ap_message *);
@@ -231,27 +236,10 @@ struct ap_message {
#define AP_MSG_FLAG_SPECIAL 0x0001 /* flag msg as 'special' with NQAP */
#define AP_MSG_FLAG_USAGE 0x0002 /* CCA, EP11: usage (no admin) msg */
#define AP_MSG_FLAG_ADMIN 0x0004 /* CCA, EP11: admin (=control) msg */
+#define AP_MSG_FLAG_MEMPOOL 0x0008 /* ap msg buffer allocated via mempool */
-/**
- * ap_init_message() - Initialize ap_message.
- * Initialize a message before using. Otherwise this might result in
- * unexpected behaviour.
- */
-static inline void ap_init_message(struct ap_message *ap_msg)
-{
- memset(ap_msg, 0, sizeof(*ap_msg));
-}
-
-/**
- * ap_release_message() - Release ap_message.
- * Releases all memory used internal within the ap_message struct
- * Currently this is the message and private field.
- */
-static inline void ap_release_message(struct ap_message *ap_msg)
-{
- kfree_sensitive(ap_msg->msg);
- kfree_sensitive(ap_msg->private);
-}
+int ap_init_apmsg(struct ap_message *ap_msg, u32 flags);
+void ap_release_apmsg(struct ap_message *ap_msg);
enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event);
enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event);
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 3a39e167bdbf..cef60770f68b 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -24,7 +24,8 @@
*/
static int key2protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
const u8 *key, size_t keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype)
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 xflags)
{
int rc;
@@ -32,14 +33,14 @@ static int key2protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
rc = pkey_handler_key_to_protkey(apqns, nr_apqns,
key, keylen,
protkey, protkeylen,
- protkeytype);
+ protkeytype, xflags);
/* if this did not work, try the slowpath way */
if (rc == -ENODEV) {
rc = pkey_handler_slowpath_key_to_protkey(apqns, nr_apqns,
key, keylen,
protkey, protkeylen,
- protkeytype);
+ protkeytype, xflags);
if (rc)
rc = -ENODEV;
}
@@ -52,16 +53,16 @@ static int key2protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
* In-Kernel function: Transform a key blob (of any type) into a protected key
*/
int pkey_key2protkey(const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype)
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype, u32 xflags)
{
int rc;
rc = key2protkey(NULL, 0, key, keylen,
- protkey, protkeylen, protkeytype);
+ protkey, protkeylen, protkeytype, xflags);
if (rc == -ENODEV) {
pkey_handler_request_modules();
rc = key2protkey(NULL, 0, key, keylen,
- protkey, protkeylen, protkeytype);
+ protkey, protkeylen, protkeytype, xflags);
}
return rc;
@@ -103,7 +104,7 @@ static int pkey_ioctl_genseck(struct pkey_genseck __user *ugs)
keybuflen = sizeof(kgs.seckey.seckey);
rc = pkey_handler_gen_key(&apqn, 1,
kgs.keytype, PKEY_TYPE_CCA_DATA, 0, 0,
- kgs.seckey.seckey, &keybuflen, NULL);
+ kgs.seckey.seckey, &keybuflen, NULL, 0);
pr_debug("gen_key()=%d\n", rc);
if (!rc && copy_to_user(ugs, &kgs, sizeof(kgs)))
rc = -EFAULT;
@@ -129,7 +130,7 @@ static int pkey_ioctl_clr2seck(struct pkey_clr2seck __user *ucs)
kcs.keytype, PKEY_TYPE_CCA_DATA, 0, 0,
kcs.clrkey.clrkey,
pkey_keytype_aes_to_size(kcs.keytype),
- kcs.seckey.seckey, &keybuflen, NULL);
+ kcs.seckey.seckey, &keybuflen, NULL, 0);
pr_debug("clr_to_key()=%d\n", rc);
if (!rc && copy_to_user(ucs, &kcs, sizeof(kcs)))
rc = -EFAULT;
@@ -154,7 +155,8 @@ static int pkey_ioctl_sec2protk(struct pkey_sec2protk __user *usp)
ksp.seckey.seckey,
sizeof(ksp.seckey.seckey),
ksp.protkey.protkey,
- &ksp.protkey.len, &ksp.protkey.type);
+ &ksp.protkey.len, &ksp.protkey.type,
+ 0);
pr_debug("key_to_protkey()=%d\n", rc);
if (!rc && copy_to_user(usp, &ksp, sizeof(ksp)))
rc = -EFAULT;
@@ -198,7 +200,7 @@ static int pkey_ioctl_clr2protk(struct pkey_clr2protk __user *ucp)
rc = key2protkey(NULL, 0,
tmpbuf, sizeof(*t) + keylen,
kcp.protkey.protkey,
- &kcp.protkey.len, &kcp.protkey.type);
+ &kcp.protkey.len, &kcp.protkey.type, 0);
pr_debug("key2protkey()=%d\n", rc);
kfree_sensitive(tmpbuf);
@@ -228,12 +230,12 @@ static int pkey_ioctl_findcard(struct pkey_findcard __user *ufc)
rc = pkey_handler_apqns_for_key(kfc.seckey.seckey,
sizeof(kfc.seckey.seckey),
PKEY_FLAGS_MATCH_CUR_MKVP,
- apqns, &nr_apqns);
+ apqns, &nr_apqns, 0);
if (rc == -ENODEV)
rc = pkey_handler_apqns_for_key(kfc.seckey.seckey,
sizeof(kfc.seckey.seckey),
PKEY_FLAGS_MATCH_ALT_MKVP,
- apqns, &nr_apqns);
+ apqns, &nr_apqns, 0);
pr_debug("apqns_for_key()=%d\n", rc);
if (rc) {
kfree(apqns);
@@ -262,7 +264,7 @@ static int pkey_ioctl_skey2pkey(struct pkey_skey2pkey __user *usp)
sizeof(ksp.seckey.seckey),
ksp.protkey.protkey,
&ksp.protkey.len,
- &ksp.protkey.type);
+ &ksp.protkey.type, 0);
pr_debug("key_to_protkey()=%d\n", rc);
if (!rc && copy_to_user(usp, &ksp, sizeof(ksp)))
rc = -EFAULT;
@@ -285,7 +287,7 @@ static int pkey_ioctl_verifykey(struct pkey_verifykey __user *uvk)
rc = pkey_handler_verify_key(kvk.seckey.seckey,
sizeof(kvk.seckey.seckey),
&kvk.cardnr, &kvk.domain,
- &keytype, &keybitsize, &flags);
+ &keytype, &keybitsize, &flags, 0);
pr_debug("verify_key()=%d\n", rc);
if (!rc && keytype != PKEY_TYPE_CCA_DATA)
rc = -EINVAL;
@@ -312,7 +314,7 @@ static int pkey_ioctl_genprotk(struct pkey_genprotk __user *ugp)
rc = pkey_handler_gen_key(NULL, 0, kgp.keytype,
PKEY_TYPE_PROTKEY, 0, 0,
kgp.protkey.protkey, &kgp.protkey.len,
- &kgp.protkey.type);
+ &kgp.protkey.type, 0);
pr_debug("gen_key()=%d\n", rc);
if (!rc && copy_to_user(ugp, &kgp, sizeof(kgp)))
rc = -EFAULT;
@@ -354,7 +356,7 @@ static int pkey_ioctl_verifyprotk(struct pkey_verifyprotk __user *uvp)
memcpy(t->protkey, kvp.protkey.protkey, kvp.protkey.len);
rc = pkey_handler_verify_key(tmpbuf, sizeof(*t),
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, 0);
pr_debug("verify_key()=%d\n", rc);
kfree_sensitive(tmpbuf);
@@ -377,7 +379,7 @@ static int pkey_ioctl_kblob2protk(struct pkey_kblob2pkey __user *utp)
ktp.protkey.len = sizeof(ktp.protkey.protkey);
rc = key2protkey(NULL, 0, kkey, ktp.keylen,
ktp.protkey.protkey, &ktp.protkey.len,
- &ktp.protkey.type);
+ &ktp.protkey.type, 0);
pr_debug("key2protkey()=%d\n", rc);
kfree_sensitive(kkey);
if (!rc && copy_to_user(utp, &ktp, sizeof(ktp)))
@@ -414,7 +416,7 @@ static int pkey_ioctl_genseck2(struct pkey_genseck2 __user *ugs)
}
rc = pkey_handler_gen_key(apqns, kgs.apqn_entries,
u, kgs.type, kgs.size, kgs.keygenflags,
- kkey, &klen, NULL);
+ kkey, &klen, NULL, 0);
pr_debug("gen_key()=%d\n", rc);
kfree(apqns);
if (rc) {
@@ -471,7 +473,7 @@ static int pkey_ioctl_clr2seck2(struct pkey_clr2seck2 __user *ucs)
rc = pkey_handler_clr_to_key(apqns, kcs.apqn_entries,
u, kcs.type, kcs.size, kcs.keygenflags,
kcs.clrkey.clrkey, kcs.size / 8,
- kkey, &klen, NULL);
+ kkey, &klen, NULL, 0);
pr_debug("clr_to_key()=%d\n", rc);
kfree(apqns);
if (rc) {
@@ -514,7 +516,7 @@ static int pkey_ioctl_verifykey2(struct pkey_verifykey2 __user *uvk)
rc = pkey_handler_verify_key(kkey, kvk.keylen,
&kvk.cardnr, &kvk.domain,
- &kvk.type, &kvk.size, &kvk.flags);
+ &kvk.type, &kvk.size, &kvk.flags, 0);
pr_debug("verify_key()=%d\n", rc);
kfree_sensitive(kkey);
@@ -544,7 +546,7 @@ static int pkey_ioctl_kblob2protk2(struct pkey_kblob2pkey2 __user *utp)
ktp.protkey.len = sizeof(ktp.protkey.protkey);
rc = key2protkey(apqns, ktp.apqn_entries, kkey, ktp.keylen,
ktp.protkey.protkey, &ktp.protkey.len,
- &ktp.protkey.type);
+ &ktp.protkey.type, 0);
pr_debug("key2protkey()=%d\n", rc);
kfree(apqns);
kfree_sensitive(kkey);
@@ -579,7 +581,7 @@ static int pkey_ioctl_apqns4k(struct pkey_apqns4key __user *uak)
return PTR_ERR(kkey);
}
rc = pkey_handler_apqns_for_key(kkey, kak.keylen, kak.flags,
- apqns, &nr_apqns);
+ apqns, &nr_apqns, 0);
pr_debug("apqns_for_key()=%d\n", rc);
kfree_sensitive(kkey);
if (rc && rc != -ENOSPC) {
@@ -626,7 +628,7 @@ static int pkey_ioctl_apqns4kt(struct pkey_apqns4keytype __user *uat)
}
rc = pkey_handler_apqns_for_keytype(kat.type,
kat.cur_mkvp, kat.alt_mkvp,
- kat.flags, apqns, &nr_apqns);
+ kat.flags, apqns, &nr_apqns, 0);
pr_debug("apqns_for_keytype()=%d\n", rc);
if (rc && rc != -ENOSPC) {
kfree(apqns);
@@ -678,7 +680,7 @@ static int pkey_ioctl_kblob2protk3(struct pkey_kblob2pkey3 __user *utp)
return -ENOMEM;
}
rc = key2protkey(apqns, ktp.apqn_entries, kkey, ktp.keylen,
- protkey, &protkeylen, &ktp.pkeytype);
+ protkey, &protkeylen, &ktp.pkeytype, 0);
pr_debug("key2protkey()=%d\n", rc);
kfree(apqns);
kfree_sensitive(kkey);
diff --git a/drivers/s390/crypto/pkey_base.c b/drivers/s390/crypto/pkey_base.c
index 64a376501d26..9e6f319acc63 100644
--- a/drivers/s390/crypto/pkey_base.c
+++ b/drivers/s390/crypto/pkey_base.c
@@ -150,7 +150,8 @@ EXPORT_SYMBOL(pkey_handler_put);
int pkey_handler_key_to_protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype)
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 xflags)
{
const struct pkey_handler *h;
int rc = -ENODEV;
@@ -159,7 +160,7 @@ int pkey_handler_key_to_protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
if (h && h->key_to_protkey) {
rc = h->key_to_protkey(apqns, nr_apqns, key, keylen,
protkey, protkeylen,
- protkeytype);
+ protkeytype, xflags);
}
pkey_handler_put(h);
@@ -177,7 +178,7 @@ int pkey_handler_slowpath_key_to_protkey(const struct pkey_apqn *apqns,
size_t nr_apqns,
const u8 *key, u32 keylen,
u8 *protkey, u32 *protkeylen,
- u32 *protkeytype)
+ u32 *protkeytype, u32 xflags)
{
const struct pkey_handler *h, *htmp[10];
int i, n = 0, rc = -ENODEV;
@@ -199,7 +200,7 @@ int pkey_handler_slowpath_key_to_protkey(const struct pkey_apqn *apqns,
rc = h->slowpath_key_to_protkey(apqns, nr_apqns,
key, keylen,
protkey, protkeylen,
- protkeytype);
+ protkeytype, xflags);
module_put(h->module);
}
@@ -210,7 +211,7 @@ EXPORT_SYMBOL(pkey_handler_slowpath_key_to_protkey);
int pkey_handler_gen_key(const struct pkey_apqn *apqns, size_t nr_apqns,
u32 keytype, u32 keysubtype,
u32 keybitsize, u32 flags,
- u8 *keybuf, u32 *keybuflen, u32 *keyinfo)
+ u8 *keybuf, u32 *keybuflen, u32 *keyinfo, u32 xflags)
{
const struct pkey_handler *h;
int rc = -ENODEV;
@@ -219,7 +220,7 @@ int pkey_handler_gen_key(const struct pkey_apqn *apqns, size_t nr_apqns,
if (h && h->gen_key) {
rc = h->gen_key(apqns, nr_apqns, keytype, keysubtype,
keybitsize, flags,
- keybuf, keybuflen, keyinfo);
+ keybuf, keybuflen, keyinfo, xflags);
}
pkey_handler_put(h);
@@ -231,7 +232,8 @@ int pkey_handler_clr_to_key(const struct pkey_apqn *apqns, size_t nr_apqns,
u32 keytype, u32 keysubtype,
u32 keybitsize, u32 flags,
const u8 *clrkey, u32 clrkeylen,
- u8 *keybuf, u32 *keybuflen, u32 *keyinfo)
+ u8 *keybuf, u32 *keybuflen, u32 *keyinfo,
+ u32 xflags)
{
const struct pkey_handler *h;
int rc = -ENODEV;
@@ -240,7 +242,7 @@ int pkey_handler_clr_to_key(const struct pkey_apqn *apqns, size_t nr_apqns,
if (h && h->clr_to_key) {
rc = h->clr_to_key(apqns, nr_apqns, keytype, keysubtype,
keybitsize, flags, clrkey, clrkeylen,
- keybuf, keybuflen, keyinfo);
+ keybuf, keybuflen, keyinfo, xflags);
}
pkey_handler_put(h);
@@ -250,7 +252,8 @@ EXPORT_SYMBOL(pkey_handler_clr_to_key);
int pkey_handler_verify_key(const u8 *key, u32 keylen,
u16 *card, u16 *dom,
- u32 *keytype, u32 *keybitsize, u32 *flags)
+ u32 *keytype, u32 *keybitsize, u32 *flags,
+ u32 xflags)
{
const struct pkey_handler *h;
int rc = -ENODEV;
@@ -258,7 +261,7 @@ int pkey_handler_verify_key(const u8 *key, u32 keylen,
h = pkey_handler_get_keybased(key, keylen);
if (h && h->verify_key) {
rc = h->verify_key(key, keylen, card, dom,
- keytype, keybitsize, flags);
+ keytype, keybitsize, flags, xflags);
}
pkey_handler_put(h);
@@ -267,14 +270,16 @@ int pkey_handler_verify_key(const u8 *key, u32 keylen,
EXPORT_SYMBOL(pkey_handler_verify_key);
int pkey_handler_apqns_for_key(const u8 *key, u32 keylen, u32 flags,
- struct pkey_apqn *apqns, size_t *nr_apqns)
+ struct pkey_apqn *apqns, size_t *nr_apqns,
+ u32 xflags)
{
const struct pkey_handler *h;
int rc = -ENODEV;
h = pkey_handler_get_keybased(key, keylen);
if (h && h->apqns_for_key)
- rc = h->apqns_for_key(key, keylen, flags, apqns, nr_apqns);
+ rc = h->apqns_for_key(key, keylen, flags, apqns, nr_apqns,
+ xflags);
pkey_handler_put(h);
return rc;
@@ -283,7 +288,8 @@ EXPORT_SYMBOL(pkey_handler_apqns_for_key);
int pkey_handler_apqns_for_keytype(enum pkey_key_type keysubtype,
u8 cur_mkvp[32], u8 alt_mkvp[32], u32 flags,
- struct pkey_apqn *apqns, size_t *nr_apqns)
+ struct pkey_apqn *apqns, size_t *nr_apqns,
+ u32 xflags)
{
const struct pkey_handler *h;
int rc = -ENODEV;
@@ -292,7 +298,7 @@ int pkey_handler_apqns_for_keytype(enum pkey_key_type keysubtype,
if (h && h->apqns_for_keytype) {
rc = h->apqns_for_keytype(keysubtype,
cur_mkvp, alt_mkvp, flags,
- apqns, nr_apqns);
+ apqns, nr_apqns, xflags);
}
pkey_handler_put(h);
diff --git a/drivers/s390/crypto/pkey_base.h b/drivers/s390/crypto/pkey_base.h
index 7347647dfaa7..9cdb3e74477f 100644
--- a/drivers/s390/crypto/pkey_base.h
+++ b/drivers/s390/crypto/pkey_base.h
@@ -159,29 +159,33 @@ struct pkey_handler {
bool (*is_supported_keytype)(enum pkey_key_type);
int (*key_to_protkey)(const struct pkey_apqn *apqns, size_t nr_apqns,
const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype);
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 xflags);
int (*slowpath_key_to_protkey)(const struct pkey_apqn *apqns,
size_t nr_apqns,
const u8 *key, u32 keylen,
u8 *protkey, u32 *protkeylen,
- u32 *protkeytype);
+ u32 *protkeytype, u32 xflags);
int (*gen_key)(const struct pkey_apqn *apqns, size_t nr_apqns,
u32 keytype, u32 keysubtype,
u32 keybitsize, u32 flags,
- u8 *keybuf, u32 *keybuflen, u32 *keyinfo);
+ u8 *keybuf, u32 *keybuflen, u32 *keyinfo, u32 xflags);
int (*clr_to_key)(const struct pkey_apqn *apqns, size_t nr_apqns,
u32 keytype, u32 keysubtype,
u32 keybitsize, u32 flags,
const u8 *clrkey, u32 clrkeylen,
- u8 *keybuf, u32 *keybuflen, u32 *keyinfo);
+ u8 *keybuf, u32 *keybuflen, u32 *keyinfo, u32 xflags);
int (*verify_key)(const u8 *key, u32 keylen,
u16 *card, u16 *dom,
- u32 *keytype, u32 *keybitsize, u32 *flags);
+ u32 *keytype, u32 *keybitsize, u32 *flags,
+ u32 xflags);
int (*apqns_for_key)(const u8 *key, u32 keylen, u32 flags,
- struct pkey_apqn *apqns, size_t *nr_apqns);
+ struct pkey_apqn *apqns, size_t *nr_apqns,
+ u32 xflags);
int (*apqns_for_keytype)(enum pkey_key_type ktype,
u8 cur_mkvp[32], u8 alt_mkvp[32], u32 flags,
- struct pkey_apqn *apqns, size_t *nr_apqns);
+ struct pkey_apqn *apqns, size_t *nr_apqns,
+ u32 xflags);
/* used internal by pkey base */
struct list_head list;
};
@@ -199,29 +203,34 @@ void pkey_handler_put(const struct pkey_handler *handler);
int pkey_handler_key_to_protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype);
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 xflags);
int pkey_handler_slowpath_key_to_protkey(const struct pkey_apqn *apqns,
size_t nr_apqns,
const u8 *key, u32 keylen,
u8 *protkey, u32 *protkeylen,
- u32 *protkeytype);
+ u32 *protkeytype, u32 xflags);
int pkey_handler_gen_key(const struct pkey_apqn *apqns, size_t nr_apqns,
u32 keytype, u32 keysubtype,
u32 keybitsize, u32 flags,
- u8 *keybuf, u32 *keybuflen, u32 *keyinfo);
+ u8 *keybuf, u32 *keybuflen, u32 *keyinfo, u32 xflags);
int pkey_handler_clr_to_key(const struct pkey_apqn *apqns, size_t nr_apqns,
u32 keytype, u32 keysubtype,
u32 keybitsize, u32 flags,
const u8 *clrkey, u32 clrkeylen,
- u8 *keybuf, u32 *keybuflen, u32 *keyinfo);
+ u8 *keybuf, u32 *keybuflen, u32 *keyinfo,
+ u32 xflags);
int pkey_handler_verify_key(const u8 *key, u32 keylen,
u16 *card, u16 *dom,
- u32 *keytype, u32 *keybitsize, u32 *flags);
+ u32 *keytype, u32 *keybitsize, u32 *flags,
+ u32 xflags);
int pkey_handler_apqns_for_key(const u8 *key, u32 keylen, u32 flags,
- struct pkey_apqn *apqns, size_t *nr_apqns);
+ struct pkey_apqn *apqns, size_t *nr_apqns,
+ u32 xflags);
int pkey_handler_apqns_for_keytype(enum pkey_key_type ktype,
u8 cur_mkvp[32], u8 alt_mkvp[32], u32 flags,
- struct pkey_apqn *apqns, size_t *nr_apqns);
+ struct pkey_apqn *apqns, size_t *nr_apqns,
+ u32 xflags);
/*
* Unconditional try to load all handler modules
diff --git a/drivers/s390/crypto/pkey_cca.c b/drivers/s390/crypto/pkey_cca.c
index cda22db31f6c..6c7897a93f27 100644
--- a/drivers/s390/crypto/pkey_cca.c
+++ b/drivers/s390/crypto/pkey_cca.c
@@ -70,12 +70,15 @@ static bool is_cca_keytype(enum pkey_key_type key_type)
}
static int cca_apqns4key(const u8 *key, u32 keylen, u32 flags,
- struct pkey_apqn *apqns, size_t *nr_apqns)
+ struct pkey_apqn *apqns, size_t *nr_apqns, u32 pflags)
{
struct keytoken_header *hdr = (struct keytoken_header *)key;
- u32 _nr_apqns, *_apqns = NULL;
+ u32 _apqns[MAXAPQNSINLIST], _nr_apqns = ARRAY_SIZE(_apqns);
+ u32 xflags;
int rc;
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
+
if (!flags)
flags = PKEY_FLAGS_MATCH_CUR_MKVP | PKEY_FLAGS_MATCH_ALT_MKVP;
@@ -107,9 +110,9 @@ static int cca_apqns4key(const u8 *key, u32 keylen, u32 flags,
/* unknown CCA internal token type */
return -EINVAL;
}
- rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
+ rc = cca_findcard2(_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
minhwtype, AES_MK_SET,
- cur_mkvp, old_mkvp, 1);
+ cur_mkvp, old_mkvp, xflags);
if (rc)
goto out;
@@ -126,9 +129,9 @@ static int cca_apqns4key(const u8 *key, u32 keylen, u32 flags,
/* unknown CCA internal 2 token type */
return -EINVAL;
}
- rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
+ rc = cca_findcard2(_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
ZCRYPT_CEX7, APKA_MK_SET,
- cur_mkvp, old_mkvp, 1);
+ cur_mkvp, old_mkvp, xflags);
if (rc)
goto out;
@@ -147,18 +150,21 @@ static int cca_apqns4key(const u8 *key, u32 keylen, u32 flags,
*nr_apqns = _nr_apqns;
out:
- kfree(_apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
static int cca_apqns4type(enum pkey_key_type ktype,
u8 cur_mkvp[32], u8 alt_mkvp[32], u32 flags,
- struct pkey_apqn *apqns, size_t *nr_apqns)
+ struct pkey_apqn *apqns, size_t *nr_apqns,
+ u32 pflags)
{
- u32 _nr_apqns, *_apqns = NULL;
+ u32 _apqns[MAXAPQNSINLIST], _nr_apqns = ARRAY_SIZE(_apqns);
+ u32 xflags;
int rc;
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
+
zcrypt_wait_api_operational();
if (ktype == PKEY_TYPE_CCA_DATA || ktype == PKEY_TYPE_CCA_CIPHER) {
@@ -171,9 +177,9 @@ static int cca_apqns4type(enum pkey_key_type ktype,
old_mkvp = *((u64 *)alt_mkvp);
if (ktype == PKEY_TYPE_CCA_CIPHER)
minhwtype = ZCRYPT_CEX6;
- rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
+ rc = cca_findcard2(_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
minhwtype, AES_MK_SET,
- cur_mkvp, old_mkvp, 1);
+ cur_mkvp, old_mkvp, xflags);
if (rc)
goto out;
@@ -184,9 +190,9 @@ static int cca_apqns4type(enum pkey_key_type ktype,
cur_mkvp = *((u64 *)cur_mkvp);
if (flags & PKEY_FLAGS_MATCH_ALT_MKVP)
old_mkvp = *((u64 *)alt_mkvp);
- rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
+ rc = cca_findcard2(_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
ZCRYPT_CEX7, APKA_MK_SET,
- cur_mkvp, old_mkvp, 1);
+ cur_mkvp, old_mkvp, xflags);
if (rc)
goto out;
@@ -205,19 +211,22 @@ static int cca_apqns4type(enum pkey_key_type ktype,
*nr_apqns = _nr_apqns;
out:
- kfree(_apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
static int cca_key2protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype)
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 pflags)
{
struct keytoken_header *hdr = (struct keytoken_header *)key;
- struct pkey_apqn *local_apqns = NULL;
+ struct pkey_apqn _apqns[MAXAPQNSINLIST];
+ u32 xflags;
int i, rc;
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
+
if (keylen < sizeof(*hdr))
return -EINVAL;
@@ -253,14 +262,10 @@ static int cca_key2protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
if (!apqns || (nr_apqns == 1 &&
apqns[0].card == 0xFFFF && apqns[0].domain == 0xFFFF)) {
nr_apqns = MAXAPQNSINLIST;
- local_apqns = kmalloc_array(nr_apqns, sizeof(struct pkey_apqn),
- GFP_KERNEL);
- if (!local_apqns)
- return -ENOMEM;
- rc = cca_apqns4key(key, keylen, 0, local_apqns, &nr_apqns);
+ rc = cca_apqns4key(key, keylen, 0, _apqns, &nr_apqns, pflags);
if (rc)
goto out;
- apqns = local_apqns;
+ apqns = _apqns;
}
for (rc = -ENODEV, i = 0; rc && i < nr_apqns; i++) {
@@ -268,16 +273,16 @@ static int cca_key2protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
hdr->version == TOKVER_CCA_AES) {
rc = cca_sec2protkey(apqns[i].card, apqns[i].domain,
key, protkey,
- protkeylen, protkeytype);
+ protkeylen, protkeytype, xflags);
} else if (hdr->type == TOKTYPE_CCA_INTERNAL &&
hdr->version == TOKVER_CCA_VLSC) {
rc = cca_cipher2protkey(apqns[i].card, apqns[i].domain,
key, protkey,
- protkeylen, protkeytype);
+ protkeylen, protkeytype, xflags);
} else if (hdr->type == TOKTYPE_CCA_INTERNAL_PKA) {
rc = cca_ecc2protkey(apqns[i].card, apqns[i].domain,
key, protkey,
- protkeylen, protkeytype);
+ protkeylen, protkeytype, xflags);
} else {
rc = -EINVAL;
break;
@@ -285,7 +290,6 @@ static int cca_key2protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
}
out:
- kfree(local_apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
@@ -302,10 +306,13 @@ out:
static int cca_gen_key(const struct pkey_apqn *apqns, size_t nr_apqns,
u32 keytype, u32 subtype,
u32 keybitsize, u32 flags,
- u8 *keybuf, u32 *keybuflen, u32 *_keyinfo)
+ u8 *keybuf, u32 *keybuflen, u32 *_keyinfo, u32 pflags)
{
- struct pkey_apqn *local_apqns = NULL;
+ struct pkey_apqn _apqns[MAXAPQNSINLIST];
int i, len, rc;
+ u32 xflags;
+
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
/* check keytype, subtype, keybitsize */
switch (keytype) {
@@ -340,32 +347,27 @@ static int cca_gen_key(const struct pkey_apqn *apqns, size_t nr_apqns,
if (!apqns || (nr_apqns == 1 &&
apqns[0].card == 0xFFFF && apqns[0].domain == 0xFFFF)) {
nr_apqns = MAXAPQNSINLIST;
- local_apqns = kmalloc_array(nr_apqns, sizeof(struct pkey_apqn),
- GFP_KERNEL);
- if (!local_apqns)
- return -ENOMEM;
rc = cca_apqns4type(subtype, NULL, NULL, 0,
- local_apqns, &nr_apqns);
+ _apqns, &nr_apqns, pflags);
if (rc)
goto out;
- apqns = local_apqns;
+ apqns = _apqns;
}
for (rc = -ENODEV, i = 0; rc && i < nr_apqns; i++) {
if (subtype == PKEY_TYPE_CCA_CIPHER) {
rc = cca_gencipherkey(apqns[i].card, apqns[i].domain,
keybitsize, flags,
- keybuf, keybuflen);
+ keybuf, keybuflen, xflags);
} else {
/* PKEY_TYPE_CCA_DATA */
rc = cca_genseckey(apqns[i].card, apqns[i].domain,
- keybitsize, keybuf);
+ keybitsize, keybuf, xflags);
*keybuflen = (rc ? 0 : SECKEYBLOBSIZE);
}
}
out:
- kfree(local_apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
@@ -383,10 +385,13 @@ static int cca_clr2key(const struct pkey_apqn *apqns, size_t nr_apqns,
u32 keytype, u32 subtype,
u32 keybitsize, u32 flags,
const u8 *clrkey, u32 clrkeylen,
- u8 *keybuf, u32 *keybuflen, u32 *_keyinfo)
+ u8 *keybuf, u32 *keybuflen, u32 *_keyinfo, u32 pflags)
{
- struct pkey_apqn *local_apqns = NULL;
+ struct pkey_apqn _apqns[MAXAPQNSINLIST];
int i, len, rc;
+ u32 xflags;
+
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
/* check keytype, subtype, clrkeylen, keybitsize */
switch (keytype) {
@@ -426,44 +431,42 @@ static int cca_clr2key(const struct pkey_apqn *apqns, size_t nr_apqns,
if (!apqns || (nr_apqns == 1 &&
apqns[0].card == 0xFFFF && apqns[0].domain == 0xFFFF)) {
nr_apqns = MAXAPQNSINLIST;
- local_apqns = kmalloc_array(nr_apqns, sizeof(struct pkey_apqn),
- GFP_KERNEL);
- if (!local_apqns)
- return -ENOMEM;
rc = cca_apqns4type(subtype, NULL, NULL, 0,
- local_apqns, &nr_apqns);
+ _apqns, &nr_apqns, pflags);
if (rc)
goto out;
- apqns = local_apqns;
+ apqns = _apqns;
}
for (rc = -ENODEV, i = 0; rc && i < nr_apqns; i++) {
if (subtype == PKEY_TYPE_CCA_CIPHER) {
rc = cca_clr2cipherkey(apqns[i].card, apqns[i].domain,
keybitsize, flags, clrkey,
- keybuf, keybuflen);
+ keybuf, keybuflen, xflags);
} else {
/* PKEY_TYPE_CCA_DATA */
rc = cca_clr2seckey(apqns[i].card, apqns[i].domain,
- keybitsize, clrkey, keybuf);
+ keybitsize, clrkey, keybuf, xflags);
*keybuflen = (rc ? 0 : SECKEYBLOBSIZE);
}
}
out:
- kfree(local_apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
static int cca_verifykey(const u8 *key, u32 keylen,
u16 *card, u16 *dom,
- u32 *keytype, u32 *keybitsize, u32 *flags)
+ u32 *keytype, u32 *keybitsize, u32 *flags, u32 pflags)
{
struct keytoken_header *hdr = (struct keytoken_header *)key;
- u32 nr_apqns, *apqns = NULL;
+ u32 apqns[MAXAPQNSINLIST], nr_apqns = ARRAY_SIZE(apqns);
+ u32 xflags;
int rc;
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
+
if (keylen < sizeof(*hdr))
return -EINVAL;
@@ -478,15 +481,16 @@ static int cca_verifykey(const u8 *key, u32 keylen,
goto out;
*keytype = PKEY_TYPE_CCA_DATA;
*keybitsize = t->bitsize;
- rc = cca_findcard2(&apqns, &nr_apqns, *card, *dom,
+ rc = cca_findcard2(apqns, &nr_apqns, *card, *dom,
ZCRYPT_CEX3C, AES_MK_SET,
- t->mkvp, 0, 1);
+ t->mkvp, 0, xflags);
if (!rc)
*flags = PKEY_FLAGS_MATCH_CUR_MKVP;
if (rc == -ENODEV) {
- rc = cca_findcard2(&apqns, &nr_apqns, *card, *dom,
+ nr_apqns = ARRAY_SIZE(apqns);
+ rc = cca_findcard2(apqns, &nr_apqns, *card, *dom,
ZCRYPT_CEX3C, AES_MK_SET,
- 0, t->mkvp, 1);
+ 0, t->mkvp, xflags);
if (!rc)
*flags = PKEY_FLAGS_MATCH_ALT_MKVP;
}
@@ -511,15 +515,16 @@ static int cca_verifykey(const u8 *key, u32 keylen,
*keybitsize = PKEY_SIZE_AES_192;
else if (!t->plfver && t->wpllen == 640)
*keybitsize = PKEY_SIZE_AES_256;
- rc = cca_findcard2(&apqns, &nr_apqns, *card, *dom,
+ rc = cca_findcard2(apqns, &nr_apqns, *card, *dom,
ZCRYPT_CEX6, AES_MK_SET,
- t->mkvp0, 0, 1);
+ t->mkvp0, 0, xflags);
if (!rc)
*flags = PKEY_FLAGS_MATCH_CUR_MKVP;
if (rc == -ENODEV) {
- rc = cca_findcard2(&apqns, &nr_apqns, *card, *dom,
+ nr_apqns = ARRAY_SIZE(apqns);
+ rc = cca_findcard2(apqns, &nr_apqns, *card, *dom,
ZCRYPT_CEX6, AES_MK_SET,
- 0, t->mkvp0, 1);
+ 0, t->mkvp0, xflags);
if (!rc)
*flags = PKEY_FLAGS_MATCH_ALT_MKVP;
}
@@ -535,7 +540,6 @@ static int cca_verifykey(const u8 *key, u32 keylen,
}
out:
- kfree(apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
@@ -551,12 +555,12 @@ static int cca_slowpath_key2protkey(const struct pkey_apqn *apqns,
size_t nr_apqns,
const u8 *key, u32 keylen,
u8 *protkey, u32 *protkeylen,
- u32 *protkeytype)
+ u32 *protkeytype, u32 pflags)
{
const struct keytoken_header *hdr = (const struct keytoken_header *)key;
const struct clearkeytoken *t = (const struct clearkeytoken *)key;
+ u8 tmpbuf[SECKEYBLOBSIZE]; /* 64 bytes */
u32 tmplen, keysize = 0;
- u8 *tmpbuf;
int i, rc;
if (keylen < sizeof(*hdr))
@@ -568,26 +572,20 @@ static int cca_slowpath_key2protkey(const struct pkey_apqn *apqns,
if (!keysize || t->len != keysize)
return -EINVAL;
- /* alloc tmp key buffer */
- tmpbuf = kmalloc(SECKEYBLOBSIZE, GFP_ATOMIC);
- if (!tmpbuf)
- return -ENOMEM;
-
/* try two times in case of failure */
for (i = 0, rc = -ENODEV; i < 2 && rc; i++) {
tmplen = SECKEYBLOBSIZE;
rc = cca_clr2key(NULL, 0, t->keytype, PKEY_TYPE_CCA_DATA,
8 * keysize, 0, t->clearkey, t->len,
- tmpbuf, &tmplen, NULL);
+ tmpbuf, &tmplen, NULL, pflags);
pr_debug("cca_clr2key()=%d\n", rc);
if (rc)
continue;
rc = cca_key2protkey(NULL, 0, tmpbuf, tmplen,
- protkey, protkeylen, protkeytype);
+ protkey, protkeylen, protkeytype, pflags);
pr_debug("cca_key2protkey()=%d\n", rc);
}
- kfree(tmpbuf);
pr_debug("rc=%d\n", rc);
return rc;
}
diff --git a/drivers/s390/crypto/pkey_ep11.c b/drivers/s390/crypto/pkey_ep11.c
index 5b033ca3e828..6b23adc560c8 100644
--- a/drivers/s390/crypto/pkey_ep11.c
+++ b/drivers/s390/crypto/pkey_ep11.c
@@ -70,12 +70,15 @@ static bool is_ep11_keytype(enum pkey_key_type key_type)
}
static int ep11_apqns4key(const u8 *key, u32 keylen, u32 flags,
- struct pkey_apqn *apqns, size_t *nr_apqns)
+ struct pkey_apqn *apqns, size_t *nr_apqns, u32 pflags)
{
struct keytoken_header *hdr = (struct keytoken_header *)key;
- u32 _nr_apqns, *_apqns = NULL;
+ u32 _apqns[MAXAPQNSINLIST], _nr_apqns = ARRAY_SIZE(_apqns);
+ u32 xflags;
int rc;
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
+
if (!flags)
flags = PKEY_FLAGS_MATCH_CUR_MKVP;
@@ -98,8 +101,8 @@ static int ep11_apqns4key(const u8 *key, u32 keylen, u32 flags,
minhwtype = ZCRYPT_CEX7;
api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
}
- rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
- minhwtype, api, kb->wkvp);
+ rc = ep11_findcard2(_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
+ minhwtype, api, kb->wkvp, xflags);
if (rc)
goto out;
@@ -115,8 +118,8 @@ static int ep11_apqns4key(const u8 *key, u32 keylen, u32 flags,
minhwtype = ZCRYPT_CEX7;
api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
}
- rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
- minhwtype, api, kb->wkvp);
+ rc = ep11_findcard2(_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
+ minhwtype, api, kb->wkvp, xflags);
if (rc)
goto out;
@@ -135,18 +138,20 @@ static int ep11_apqns4key(const u8 *key, u32 keylen, u32 flags,
*nr_apqns = _nr_apqns;
out:
- kfree(_apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
static int ep11_apqns4type(enum pkey_key_type ktype,
u8 cur_mkvp[32], u8 alt_mkvp[32], u32 flags,
- struct pkey_apqn *apqns, size_t *nr_apqns)
+ struct pkey_apqn *apqns, size_t *nr_apqns, u32 pflags)
{
- u32 _nr_apqns, *_apqns = NULL;
+ u32 _apqns[MAXAPQNSINLIST], _nr_apqns = ARRAY_SIZE(_apqns);
+ u32 xflags;
int rc;
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
+
zcrypt_wait_api_operational();
if (ktype == PKEY_TYPE_EP11 ||
@@ -158,8 +163,8 @@ static int ep11_apqns4type(enum pkey_key_type ktype,
if (flags & PKEY_FLAGS_MATCH_CUR_MKVP)
wkvp = cur_mkvp;
api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
- rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
- ZCRYPT_CEX7, api, wkvp);
+ rc = ep11_findcard2(_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
+ ZCRYPT_CEX7, api, wkvp, xflags);
if (rc)
goto out;
@@ -178,19 +183,22 @@ static int ep11_apqns4type(enum pkey_key_type ktype,
*nr_apqns = _nr_apqns;
out:
- kfree(_apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
static int ep11_key2protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype)
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 pflags)
{
struct keytoken_header *hdr = (struct keytoken_header *)key;
- struct pkey_apqn *local_apqns = NULL;
+ struct pkey_apqn _apqns[MAXAPQNSINLIST];
+ u32 xflags;
int i, rc;
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
+
if (keylen < sizeof(*hdr))
return -EINVAL;
@@ -225,14 +233,10 @@ static int ep11_key2protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
if (!apqns || (nr_apqns == 1 &&
apqns[0].card == 0xFFFF && apqns[0].domain == 0xFFFF)) {
nr_apqns = MAXAPQNSINLIST;
- local_apqns = kmalloc_array(nr_apqns, sizeof(struct pkey_apqn),
- GFP_KERNEL);
- if (!local_apqns)
- return -ENOMEM;
- rc = ep11_apqns4key(key, keylen, 0, local_apqns, &nr_apqns);
+ rc = ep11_apqns4key(key, keylen, 0, _apqns, &nr_apqns, pflags);
if (rc)
goto out;
- apqns = local_apqns;
+ apqns = _apqns;
}
for (rc = -ENODEV, i = 0; rc && i < nr_apqns; i++) {
@@ -241,19 +245,19 @@ static int ep11_key2protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
rc = ep11_kblob2protkey(apqns[i].card, apqns[i].domain,
key, hdr->len, protkey,
- protkeylen, protkeytype);
+ protkeylen, protkeytype, xflags);
} else if (hdr->type == TOKTYPE_NON_CCA &&
hdr->version == TOKVER_EP11_ECC_WITH_HEADER &&
is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
rc = ep11_kblob2protkey(apqns[i].card, apqns[i].domain,
key, hdr->len, protkey,
- protkeylen, protkeytype);
+ protkeylen, protkeytype, xflags);
} else if (hdr->type == TOKTYPE_NON_CCA &&
hdr->version == TOKVER_EP11_AES &&
is_ep11_keyblob(key)) {
rc = ep11_kblob2protkey(apqns[i].card, apqns[i].domain,
key, hdr->len, protkey,
- protkeylen, protkeytype);
+ protkeylen, protkeytype, xflags);
} else {
rc = -EINVAL;
break;
@@ -261,7 +265,6 @@ static int ep11_key2protkey(const struct pkey_apqn *apqns, size_t nr_apqns,
}
out:
- kfree(local_apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
@@ -278,10 +281,13 @@ out:
static int ep11_gen_key(const struct pkey_apqn *apqns, size_t nr_apqns,
u32 keytype, u32 subtype,
u32 keybitsize, u32 flags,
- u8 *keybuf, u32 *keybuflen, u32 *_keyinfo)
+ u8 *keybuf, u32 *keybuflen, u32 *_keyinfo, u32 pflags)
{
- struct pkey_apqn *local_apqns = NULL;
+ struct pkey_apqn _apqns[MAXAPQNSINLIST];
int i, len, rc;
+ u32 xflags;
+
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
/* check keytype, subtype, keybitsize */
switch (keytype) {
@@ -316,25 +322,20 @@ static int ep11_gen_key(const struct pkey_apqn *apqns, size_t nr_apqns,
if (!apqns || (nr_apqns == 1 &&
apqns[0].card == 0xFFFF && apqns[0].domain == 0xFFFF)) {
nr_apqns = MAXAPQNSINLIST;
- local_apqns = kmalloc_array(nr_apqns, sizeof(struct pkey_apqn),
- GFP_KERNEL);
- if (!local_apqns)
- return -ENOMEM;
rc = ep11_apqns4type(subtype, NULL, NULL, 0,
- local_apqns, &nr_apqns);
+ _apqns, &nr_apqns, pflags);
if (rc)
goto out;
- apqns = local_apqns;
+ apqns = _apqns;
}
for (rc = -ENODEV, i = 0; rc && i < nr_apqns; i++) {
rc = ep11_genaeskey(apqns[i].card, apqns[i].domain,
keybitsize, flags,
- keybuf, keybuflen, subtype);
+ keybuf, keybuflen, subtype, xflags);
}
out:
- kfree(local_apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
@@ -352,10 +353,13 @@ static int ep11_clr2key(const struct pkey_apqn *apqns, size_t nr_apqns,
u32 keytype, u32 subtype,
u32 keybitsize, u32 flags,
const u8 *clrkey, u32 clrkeylen,
- u8 *keybuf, u32 *keybuflen, u32 *_keyinfo)
+ u8 *keybuf, u32 *keybuflen, u32 *_keyinfo, u32 pflags)
{
- struct pkey_apqn *local_apqns = NULL;
+ struct pkey_apqn _apqns[MAXAPQNSINLIST];
int i, len, rc;
+ u32 xflags;
+
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
/* check keytype, subtype, clrkeylen, keybitsize */
switch (keytype) {
@@ -395,37 +399,35 @@ static int ep11_clr2key(const struct pkey_apqn *apqns, size_t nr_apqns,
if (!apqns || (nr_apqns == 1 &&
apqns[0].card == 0xFFFF && apqns[0].domain == 0xFFFF)) {
nr_apqns = MAXAPQNSINLIST;
- local_apqns = kmalloc_array(nr_apqns, sizeof(struct pkey_apqn),
- GFP_KERNEL);
- if (!local_apqns)
- return -ENOMEM;
rc = ep11_apqns4type(subtype, NULL, NULL, 0,
- local_apqns, &nr_apqns);
+ _apqns, &nr_apqns, pflags);
if (rc)
goto out;
- apqns = local_apqns;
+ apqns = _apqns;
}
for (rc = -ENODEV, i = 0; rc && i < nr_apqns; i++) {
rc = ep11_clr2keyblob(apqns[i].card, apqns[i].domain,
keybitsize, flags, clrkey,
- keybuf, keybuflen, subtype);
+ keybuf, keybuflen, subtype, xflags);
}
out:
- kfree(local_apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
static int ep11_verifykey(const u8 *key, u32 keylen,
u16 *card, u16 *dom,
- u32 *keytype, u32 *keybitsize, u32 *flags)
+ u32 *keytype, u32 *keybitsize, u32 *flags, u32 pflags)
{
struct keytoken_header *hdr = (struct keytoken_header *)key;
- u32 nr_apqns, *apqns = NULL;
+ u32 apqns[MAXAPQNSINLIST], nr_apqns = ARRAY_SIZE(apqns);
+ u32 xflags;
int rc;
+ xflags = pflags & PKEY_XFLAG_NOMEMALLOC ? ZCRYPT_XFLAG_NOMEMALLOC : 0;
+
if (keylen < sizeof(*hdr))
return -EINVAL;
@@ -443,9 +445,9 @@ static int ep11_verifykey(const u8 *key, u32 keylen,
*keybitsize = kb->head.bitlen;
api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
- rc = ep11_findcard2(&apqns, &nr_apqns, *card, *dom,
+ rc = ep11_findcard2(apqns, &nr_apqns, *card, *dom,
ZCRYPT_CEX7, api,
- ep11_kb_wkvp(key, keylen));
+ ep11_kb_wkvp(key, keylen), xflags);
if (rc)
goto out;
@@ -467,9 +469,9 @@ static int ep11_verifykey(const u8 *key, u32 keylen,
*keybitsize = kh->bitlen;
api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
- rc = ep11_findcard2(&apqns, &nr_apqns, *card, *dom,
+ rc = ep11_findcard2(apqns, &nr_apqns, *card, *dom,
ZCRYPT_CEX7, api,
- ep11_kb_wkvp(key, keylen));
+ ep11_kb_wkvp(key, keylen), xflags);
if (rc)
goto out;
@@ -484,7 +486,6 @@ static int ep11_verifykey(const u8 *key, u32 keylen,
}
out:
- kfree(apqns);
pr_debug("rc=%d\n", rc);
return rc;
}
@@ -500,12 +501,12 @@ static int ep11_slowpath_key2protkey(const struct pkey_apqn *apqns,
size_t nr_apqns,
const u8 *key, u32 keylen,
u8 *protkey, u32 *protkeylen,
- u32 *protkeytype)
+ u32 *protkeytype, u32 pflags)
{
const struct keytoken_header *hdr = (const struct keytoken_header *)key;
const struct clearkeytoken *t = (const struct clearkeytoken *)key;
+ u8 tmpbuf[MAXEP11AESKEYBLOBSIZE]; /* 336 bytes */
u32 tmplen, keysize = 0;
- u8 *tmpbuf;
int i, rc;
if (keylen < sizeof(*hdr))
@@ -517,26 +518,20 @@ static int ep11_slowpath_key2protkey(const struct pkey_apqn *apqns,
if (!keysize || t->len != keysize)
return -EINVAL;
- /* alloc tmp key buffer */
- tmpbuf = kmalloc(MAXEP11AESKEYBLOBSIZE, GFP_ATOMIC);
- if (!tmpbuf)
- return -ENOMEM;
-
/* try two times in case of failure */
for (i = 0, rc = -ENODEV; i < 2 && rc; i++) {
tmplen = MAXEP11AESKEYBLOBSIZE;
rc = ep11_clr2key(NULL, 0, t->keytype, PKEY_TYPE_EP11,
8 * keysize, 0, t->clearkey, t->len,
- tmpbuf, &tmplen, NULL);
+ tmpbuf, &tmplen, NULL, pflags);
pr_debug("ep11_clr2key()=%d\n", rc);
if (rc)
continue;
rc = ep11_key2protkey(NULL, 0, tmpbuf, tmplen,
- protkey, protkeylen, protkeytype);
+ protkey, protkeylen, protkeytype, pflags);
pr_debug("ep11_key2protkey()=%d\n", rc);
}
- kfree(tmpbuf);
pr_debug("rc=%d\n", rc);
return rc;
}
diff --git a/drivers/s390/crypto/pkey_pckmo.c b/drivers/s390/crypto/pkey_pckmo.c
index 835d59f4fbc5..7eca9f1340bd 100644
--- a/drivers/s390/crypto/pkey_pckmo.c
+++ b/drivers/s390/crypto/pkey_pckmo.c
@@ -406,7 +406,8 @@ out:
static int pkey_pckmo_key2protkey(const struct pkey_apqn *_apqns,
size_t _nr_apqns,
const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *keyinfo)
+ u8 *protkey, u32 *protkeylen, u32 *keyinfo,
+ u32 _xflags __always_unused)
{
return pckmo_key2protkey(key, keylen,
protkey, protkeylen, keyinfo);
@@ -415,7 +416,8 @@ static int pkey_pckmo_key2protkey(const struct pkey_apqn *_apqns,
static int pkey_pckmo_gen_key(const struct pkey_apqn *_apqns, size_t _nr_apqns,
u32 keytype, u32 keysubtype,
u32 _keybitsize, u32 _flags,
- u8 *keybuf, u32 *keybuflen, u32 *keyinfo)
+ u8 *keybuf, u32 *keybuflen, u32 *keyinfo,
+ u32 _xflags __always_unused)
{
return pckmo_gen_protkey(keytype, keysubtype,
keybuf, keybuflen, keyinfo);
@@ -423,7 +425,8 @@ static int pkey_pckmo_gen_key(const struct pkey_apqn *_apqns, size_t _nr_apqns,
static int pkey_pckmo_verifykey(const u8 *key, u32 keylen,
u16 *_card, u16 *_dom,
- u32 *_keytype, u32 *_keybitsize, u32 *_flags)
+ u32 *_keytype, u32 *_keybitsize,
+ u32 *_flags, u32 _xflags __always_unused)
{
return pckmo_verify_key(key, keylen);
}
diff --git a/drivers/s390/crypto/pkey_sysfs.c b/drivers/s390/crypto/pkey_sysfs.c
index 57edc97bafd2..cea772973649 100644
--- a/drivers/s390/crypto/pkey_sysfs.c
+++ b/drivers/s390/crypto/pkey_sysfs.c
@@ -29,13 +29,13 @@ static int sys_pkey_handler_gen_key(u32 keytype, u32 keysubtype,
rc = pkey_handler_gen_key(NULL, 0,
keytype, keysubtype,
keybitsize, flags,
- keybuf, keybuflen, keyinfo);
+ keybuf, keybuflen, keyinfo, 0);
if (rc == -ENODEV) {
pkey_handler_request_modules();
rc = pkey_handler_gen_key(NULL, 0,
keytype, keysubtype,
keybitsize, flags,
- keybuf, keybuflen, keyinfo);
+ keybuf, keybuflen, keyinfo, 0);
}
return rc;
diff --git a/drivers/s390/crypto/pkey_uv.c b/drivers/s390/crypto/pkey_uv.c
index 805817b14354..e5c6e01acaf3 100644
--- a/drivers/s390/crypto/pkey_uv.c
+++ b/drivers/s390/crypto/pkey_uv.c
@@ -21,6 +21,12 @@ MODULE_AUTHOR("IBM Corporation");
MODULE_DESCRIPTION("s390 protected key UV handler");
/*
+ * One pre-allocated uv_secret_list for use with uv_find_secret()
+ */
+static struct uv_secret_list *uv_list;
+static DEFINE_MUTEX(uv_list_mutex);
+
+/*
* UV secret token struct and defines.
*/
@@ -85,13 +91,26 @@ static bool is_uv_keytype(enum pkey_key_type keytype)
}
}
+static int get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list_item_hdr *secret)
+{
+ int rc;
+
+ mutex_lock(&uv_list_mutex);
+ memset(uv_list, 0, sizeof(*uv_list));
+ rc = uv_find_secret(secret_id, uv_list, secret);
+ mutex_unlock(&uv_list_mutex);
+
+ return rc;
+}
+
static int retrieve_secret(const u8 secret_id[UV_SECRET_ID_LEN],
u16 *secret_type, u8 *buf, u32 *buflen)
{
struct uv_secret_list_item_hdr secret_meta_data;
int rc;
- rc = uv_get_secret_metadata(secret_id, &secret_meta_data);
+ rc = get_secret_metadata(secret_id, &secret_meta_data);
if (rc)
return rc;
@@ -172,7 +191,8 @@ static int uv_get_size_and_type(u16 secret_type, u32 *pkeysize, u32 *pkeytype)
static int uv_key2protkey(const struct pkey_apqn *_apqns __always_unused,
size_t _nr_apqns __always_unused,
const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *keyinfo)
+ u8 *protkey, u32 *protkeylen, u32 *keyinfo,
+ u32 _xflags __always_unused)
{
struct uvsecrettoken *t = (struct uvsecrettoken *)key;
u32 pkeysize, pkeytype;
@@ -214,7 +234,8 @@ out:
static int uv_verifykey(const u8 *key, u32 keylen,
u16 *_card __always_unused,
u16 *_dom __always_unused,
- u32 *keytype, u32 *keybitsize, u32 *flags)
+ u32 *keytype, u32 *keybitsize, u32 *flags,
+ u32 xflags __always_unused)
{
struct uvsecrettoken *t = (struct uvsecrettoken *)key;
struct uv_secret_list_item_hdr secret_meta_data;
@@ -225,7 +246,7 @@ static int uv_verifykey(const u8 *key, u32 keylen,
if (rc)
goto out;
- rc = uv_get_secret_metadata(t->secret_id, &secret_meta_data);
+ rc = get_secret_metadata(t->secret_id, &secret_meta_data);
if (rc)
goto out;
@@ -263,13 +284,23 @@ static struct pkey_handler uv_handler = {
*/
static int __init pkey_uv_init(void)
{
+ int rc;
+
if (!is_prot_virt_guest())
return -ENODEV;
if (!test_bit_inv(BIT_UVC_CMD_RETR_SECRET, uv_info.inst_calls_list))
return -ENODEV;
- return pkey_handler_register(&uv_handler);
+ uv_list = kmalloc(sizeof(*uv_list), GFP_KERNEL);
+ if (!uv_list)
+ return -ENOMEM;
+
+ rc = pkey_handler_register(&uv_handler);
+ if (rc)
+ kfree(uv_list);
+
+ return rc;
}
/*
@@ -278,6 +309,9 @@ static int __init pkey_uv_init(void)
static void __exit pkey_uv_exit(void)
{
pkey_handler_unregister(&uv_handler);
+ mutex_lock(&uv_list_mutex);
+ kvfree(uv_list);
+ mutex_unlock(&uv_list_mutex);
}
module_cpu_feature_match(S390_CPU_FEATURE_UV, pkey_uv_init);
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 5020696f1379..89baa87a13fc 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -50,6 +50,10 @@ MODULE_DESCRIPTION("Cryptographic Coprocessor interface, " \
"Copyright IBM Corp. 2001, 2012");
MODULE_LICENSE("GPL");
+unsigned int zcrypt_mempool_threshold = 5;
+module_param_named(mempool_threshold, zcrypt_mempool_threshold, uint, 0440);
+MODULE_PARM_DESC(mempool_threshold, "CCA and EP11 request/reply mempool minimal items (min: 1)");
+
/*
* zcrypt tracepoint functions
*/
@@ -642,16 +646,17 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
struct zcrypt_queue *zq, *pref_zq;
struct ap_message ap_msg;
unsigned int wgt = 0, pref_wgt = 0;
- unsigned int func_code;
- int cpen, qpen, qid = 0, rc = -ENODEV;
+ unsigned int func_code = 0;
+ int cpen, qpen, qid = 0, rc;
struct module *mod;
trace_s390_zcrypt_req(mex, TP_ICARSAMODEXPO);
- ap_init_message(&ap_msg);
+ rc = ap_init_apmsg(&ap_msg, 0);
+ if (rc)
+ goto out;
if (mex->outputdatalength < mex->inputdatalength) {
- func_code = 0;
rc = -EINVAL;
goto out;
}
@@ -728,7 +733,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
spin_unlock(&zcrypt_list_lock);
out:
- ap_release_message(&ap_msg);
+ ap_release_apmsg(&ap_msg);
if (tr) {
tr->last_rc = rc;
tr->last_qid = qid;
@@ -746,16 +751,17 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
struct zcrypt_queue *zq, *pref_zq;
struct ap_message ap_msg;
unsigned int wgt = 0, pref_wgt = 0;
- unsigned int func_code;
- int cpen, qpen, qid = 0, rc = -ENODEV;
+ unsigned int func_code = 0;
+ int cpen, qpen, qid = 0, rc;
struct module *mod;
trace_s390_zcrypt_req(crt, TP_ICARSACRT);
- ap_init_message(&ap_msg);
+ rc = ap_init_apmsg(&ap_msg, 0);
+ if (rc)
+ goto out;
if (crt->outputdatalength < crt->inputdatalength) {
- func_code = 0;
rc = -EINVAL;
goto out;
}
@@ -832,7 +838,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
spin_unlock(&zcrypt_list_lock);
out:
- ap_release_message(&ap_msg);
+ ap_release_apmsg(&ap_msg);
if (tr) {
tr->last_rc = rc;
tr->last_qid = qid;
@@ -842,23 +848,28 @@ out:
return rc;
}
-static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
+static long _zcrypt_send_cprb(u32 xflags, struct ap_perms *perms,
struct zcrypt_track *tr,
struct ica_xcRB *xcrb)
{
+ bool userspace = xflags & ZCRYPT_XFLAG_USERSPACE;
struct zcrypt_card *zc, *pref_zc;
struct zcrypt_queue *zq, *pref_zq;
struct ap_message ap_msg;
unsigned int wgt = 0, pref_wgt = 0;
- unsigned int func_code;
+ unsigned int func_code = 0;
unsigned short *domain, tdom;
- int cpen, qpen, qid = 0, rc = -ENODEV;
+ int cpen, qpen, qid = 0, rc;
struct module *mod;
trace_s390_zcrypt_req(xcrb, TB_ZSECSENDCPRB);
xcrb->status = 0;
- ap_init_message(&ap_msg);
+
+ rc = ap_init_apmsg(&ap_msg, xflags & ZCRYPT_XFLAG_NOMEMALLOC ?
+ AP_MSG_FLAG_MEMPOOL : 0);
+ if (rc)
+ goto out;
rc = prep_cca_ap_msg(userspace, xcrb, &ap_msg, &func_code, &domain);
if (rc)
@@ -962,7 +973,7 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
spin_unlock(&zcrypt_list_lock);
out:
- ap_release_message(&ap_msg);
+ ap_release_apmsg(&ap_msg);
if (tr) {
tr->last_rc = rc;
tr->last_qid = qid;
@@ -972,7 +983,7 @@ out:
return rc;
}
-long zcrypt_send_cprb(struct ica_xcRB *xcrb)
+long zcrypt_send_cprb(struct ica_xcRB *xcrb, u32 xflags)
{
struct zcrypt_track tr;
int rc;
@@ -980,13 +991,13 @@ long zcrypt_send_cprb(struct ica_xcRB *xcrb)
memset(&tr, 0, sizeof(tr));
do {
- rc = _zcrypt_send_cprb(false, &ap_perms, &tr, xcrb);
+ rc = _zcrypt_send_cprb(xflags, &ap_perms, &tr, xcrb);
} while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
/* on ENODEV failure: retry once again after a requested rescan */
if (rc == -ENODEV && zcrypt_process_rescan())
do {
- rc = _zcrypt_send_cprb(false, &ap_perms, &tr, xcrb);
+ rc = _zcrypt_send_cprb(xflags, &ap_perms, &tr, xcrb);
} while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
@@ -1024,50 +1035,50 @@ static bool is_desired_ep11_queue(unsigned int dev_qid,
return false;
}
-static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
+static long _zcrypt_send_ep11_cprb(u32 xflags, struct ap_perms *perms,
struct zcrypt_track *tr,
struct ep11_urb *xcrb)
{
+ bool userspace = xflags & ZCRYPT_XFLAG_USERSPACE;
struct zcrypt_card *zc, *pref_zc;
struct zcrypt_queue *zq, *pref_zq;
- struct ep11_target_dev *targets;
+ struct ep11_target_dev *targets = NULL;
unsigned short target_num;
unsigned int wgt = 0, pref_wgt = 0;
- unsigned int func_code, domain;
+ unsigned int func_code = 0, domain;
struct ap_message ap_msg;
- int cpen, qpen, qid = 0, rc = -ENODEV;
+ int cpen, qpen, qid = 0, rc;
struct module *mod;
trace_s390_zcrypt_req(xcrb, TP_ZSENDEP11CPRB);
- ap_init_message(&ap_msg);
+ rc = ap_init_apmsg(&ap_msg, xflags & ZCRYPT_XFLAG_NOMEMALLOC ?
+ AP_MSG_FLAG_MEMPOOL : 0);
+ if (rc)
+ goto out;
target_num = (unsigned short)xcrb->targets_num;
/* empty list indicates autoselect (all available targets) */
- targets = NULL;
+ rc = -ENOMEM;
if (target_num != 0) {
- struct ep11_target_dev __user *uptr;
-
- targets = kcalloc(target_num, sizeof(*targets), GFP_KERNEL);
- if (!targets) {
- func_code = 0;
- rc = -ENOMEM;
- goto out;
- }
-
- uptr = (struct ep11_target_dev __force __user *)xcrb->targets;
- if (z_copy_from_user(userspace, targets, uptr,
- target_num * sizeof(*targets))) {
- func_code = 0;
- rc = -EFAULT;
- goto out_free;
+ if (userspace) {
+ targets = kcalloc(target_num, sizeof(*targets), GFP_KERNEL);
+ if (!targets)
+ goto out;
+ if (copy_from_user(targets, xcrb->targets,
+ target_num * sizeof(*targets))) {
+ rc = -EFAULT;
+ goto out;
+ }
+ } else {
+ targets = (struct ep11_target_dev __force __kernel *)xcrb->targets;
}
}
rc = prep_ep11_ap_msg(userspace, xcrb, &ap_msg, &func_code, &domain);
if (rc)
- goto out_free;
+ goto out;
print_hex_dump_debug("ep11req: ", DUMP_PREFIX_ADDRESS, 16, 1,
ap_msg.msg, ap_msg.len, false);
@@ -1075,11 +1086,11 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
if (ap_msg.flags & AP_MSG_FLAG_ADMIN) {
if (!test_bit_inv(domain, perms->adm)) {
rc = -ENODEV;
- goto out_free;
+ goto out;
}
} else if ((ap_msg.flags & AP_MSG_FLAG_USAGE) == 0) {
rc = -EOPNOTSUPP;
- goto out_free;
+ goto out;
}
}
@@ -1147,7 +1158,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
pr_debug("no match for address ff.ffff => ENODEV\n");
}
rc = -ENODEV;
- goto out_free;
+ goto out;
}
qid = pref_zq->queue->qid;
@@ -1161,10 +1172,10 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt);
spin_unlock(&zcrypt_list_lock);
-out_free:
- kfree(targets);
out:
- ap_release_message(&ap_msg);
+ if (userspace)
+ kfree(targets);
+ ap_release_apmsg(&ap_msg);
if (tr) {
tr->last_rc = rc;
tr->last_qid = qid;
@@ -1174,7 +1185,7 @@ out:
return rc;
}
-long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb)
+long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb, u32 xflags)
{
struct zcrypt_track tr;
int rc;
@@ -1182,13 +1193,13 @@ long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb)
memset(&tr, 0, sizeof(tr));
do {
- rc = _zcrypt_send_ep11_cprb(false, &ap_perms, &tr, xcrb);
+ rc = _zcrypt_send_ep11_cprb(xflags, &ap_perms, &tr, xcrb);
} while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
/* on ENODEV failure: retry once again after a requested rescan */
if (rc == -ENODEV && zcrypt_process_rescan())
do {
- rc = _zcrypt_send_ep11_cprb(false, &ap_perms, &tr, xcrb);
+ rc = _zcrypt_send_ep11_cprb(xflags, &ap_perms, &tr, xcrb);
} while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
@@ -1204,7 +1215,7 @@ static long zcrypt_rng(char *buffer)
struct zcrypt_card *zc, *pref_zc;
struct zcrypt_queue *zq, *pref_zq;
unsigned int wgt = 0, pref_wgt = 0;
- unsigned int func_code;
+ unsigned int func_code = 0;
struct ap_message ap_msg;
unsigned int domain;
int qid = 0, rc = -ENODEV;
@@ -1212,7 +1223,9 @@ static long zcrypt_rng(char *buffer)
trace_s390_zcrypt_req(buffer, TP_HWRNGCPRB);
- ap_init_message(&ap_msg);
+ rc = ap_init_apmsg(&ap_msg, 0);
+ if (rc)
+ goto out;
rc = prep_rng_ap_msg(&ap_msg, &func_code, &domain);
if (rc)
goto out;
@@ -1258,7 +1271,7 @@ static long zcrypt_rng(char *buffer)
spin_unlock(&zcrypt_list_lock);
out:
- ap_release_message(&ap_msg);
+ ap_release_apmsg(&ap_msg);
trace_s390_zcrypt_rep(buffer, func_code, rc,
AP_QID_CARD(qid), AP_QID_QUEUE(qid));
return rc;
@@ -1291,19 +1304,25 @@ static void zcrypt_device_status_mask(struct zcrypt_device_status *devstatus)
spin_unlock(&zcrypt_list_lock);
}
-void zcrypt_device_status_mask_ext(struct zcrypt_device_status_ext *devstatus)
+void zcrypt_device_status_mask_ext(struct zcrypt_device_status_ext *devstatus,
+ int maxcard, int maxqueue)
{
struct zcrypt_card *zc;
struct zcrypt_queue *zq;
struct zcrypt_device_status_ext *stat;
int card, queue;
+ maxcard = min_t(int, maxcard, MAX_ZDEV_CARDIDS_EXT);
+ maxqueue = min_t(int, maxqueue, MAX_ZDEV_DOMAINS_EXT);
+
spin_lock(&zcrypt_list_lock);
for_each_zcrypt_card(zc) {
for_each_zcrypt_queue(zq, zc) {
card = AP_QID_CARD(zq->queue->qid);
queue = AP_QID_QUEUE(zq->queue->qid);
- stat = &devstatus[card * AP_DOMAINS + queue];
+ if (card >= maxcard || queue >= maxqueue)
+ continue;
+ stat = &devstatus[card * maxqueue + queue];
stat->hwtype = zc->card->ap_dev.device_type;
stat->functions = zc->card->hwinfo.fac >> 26;
stat->qid = zq->queue->qid;
@@ -1523,6 +1542,7 @@ static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg)
int rc;
struct ica_xcRB xcrb;
struct zcrypt_track tr;
+ u32 xflags = ZCRYPT_XFLAG_USERSPACE;
struct ica_xcRB __user *uxcrb = (void __user *)arg;
memset(&tr, 0, sizeof(tr));
@@ -1530,13 +1550,13 @@ static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg)
return -EFAULT;
do {
- rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb);
+ rc = _zcrypt_send_cprb(xflags, perms, &tr, &xcrb);
} while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
/* on ENODEV failure: retry once again after a requested rescan */
if (rc == -ENODEV && zcrypt_process_rescan())
do {
- rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb);
+ rc = _zcrypt_send_cprb(xflags, perms, &tr, &xcrb);
} while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
@@ -1553,6 +1573,7 @@ static int zsendep11cprb_ioctl(struct ap_perms *perms, unsigned long arg)
int rc;
struct ep11_urb xcrb;
struct zcrypt_track tr;
+ u32 xflags = ZCRYPT_XFLAG_USERSPACE;
struct ep11_urb __user *uxcrb = (void __user *)arg;
memset(&tr, 0, sizeof(tr));
@@ -1560,13 +1581,13 @@ static int zsendep11cprb_ioctl(struct ap_perms *perms, unsigned long arg)
return -EFAULT;
do {
- rc = _zcrypt_send_ep11_cprb(true, perms, &tr, &xcrb);
+ rc = _zcrypt_send_ep11_cprb(xflags, perms, &tr, &xcrb);
} while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
/* on ENODEV failure: retry once again after a requested rescan */
if (rc == -ENODEV && zcrypt_process_rescan())
do {
- rc = _zcrypt_send_ep11_cprb(true, perms, &tr, &xcrb);
+ rc = _zcrypt_send_ep11_cprb(xflags, perms, &tr, &xcrb);
} while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
@@ -1607,7 +1628,9 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
GFP_KERNEL);
if (!device_status)
return -ENOMEM;
- zcrypt_device_status_mask_ext(device_status);
+ zcrypt_device_status_mask_ext(device_status,
+ MAX_ZDEV_CARDIDS_EXT,
+ MAX_ZDEV_DOMAINS_EXT);
if (copy_to_user((char __user *)arg, device_status,
total_size))
rc = -EFAULT;
@@ -1827,6 +1850,7 @@ static long trans_xcrb32(struct ap_perms *perms, struct file *filp,
unsigned int cmd, unsigned long arg)
{
struct compat_ica_xcrb __user *uxcrb32 = compat_ptr(arg);
+ u32 xflags = ZCRYPT_XFLAG_USERSPACE;
struct compat_ica_xcrb xcrb32;
struct zcrypt_track tr;
struct ica_xcRB xcrb64;
@@ -1856,13 +1880,13 @@ static long trans_xcrb32(struct ap_perms *perms, struct file *filp,
xcrb64.priority_window = xcrb32.priority_window;
xcrb64.status = xcrb32.status;
do {
- rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb64);
+ rc = _zcrypt_send_cprb(xflags, perms, &tr, &xcrb64);
} while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
/* on ENODEV failure: retry once again after a requested rescan */
if (rc == -ENODEV && zcrypt_process_rescan())
do {
- rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb64);
+ rc = _zcrypt_send_cprb(xflags, perms, &tr, &xcrb64);
} while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
@@ -2132,13 +2156,27 @@ int __init zcrypt_api_init(void)
{
int rc;
+ /* make sure the mempool threshold is >= 1 */
+ if (zcrypt_mempool_threshold < 1) {
+ rc = -EINVAL;
+ goto out;
+ }
+
rc = zcrypt_debug_init();
if (rc)
goto out;
rc = zcdn_init();
if (rc)
- goto out;
+ goto out_zcdn_init_failed;
+
+ rc = zcrypt_ccamisc_init();
+ if (rc)
+ goto out_ccamisc_init_failed;
+
+ rc = zcrypt_ep11misc_init();
+ if (rc)
+ goto out_ep11misc_init_failed;
/* Register the request sprayer. */
rc = misc_register(&zcrypt_misc_device);
@@ -2151,7 +2189,12 @@ int __init zcrypt_api_init(void)
return 0;
out_misc_register_failed:
+ zcrypt_ep11misc_exit();
+out_ep11misc_init_failed:
+ zcrypt_ccamisc_exit();
+out_ccamisc_init_failed:
zcdn_exit();
+out_zcdn_init_failed:
zcrypt_debug_exit();
out:
return rc;
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index 4ed481df57ca..6ef8850a42df 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -76,6 +76,13 @@ struct zcrypt_track {
#define TRACK_AGAIN_CARD_WEIGHT_PENALTY 1000
#define TRACK_AGAIN_QUEUE_WEIGHT_PENALTY 10000
+/*
+ * xflags - to be used with zcrypt_send_cprb() and
+ * zcrypt_send_ep11_cprb() for the xflags parameter.
+ */
+#define ZCRYPT_XFLAG_USERSPACE 0x0001 /* data ptrs address userspace */
+#define ZCRYPT_XFLAG_NOMEMALLOC 0x0002 /* do not allocate memory via kmalloc */
+
struct zcrypt_ops {
long (*rsa_modexpo)(struct zcrypt_queue *, struct ica_rsa_modexpo *,
struct ap_message *);
@@ -132,6 +139,8 @@ extern atomic_t zcrypt_rescan_req;
extern spinlock_t zcrypt_list_lock;
extern struct list_head zcrypt_card_list;
+extern unsigned int zcrypt_mempool_threshold;
+
#define for_each_zcrypt_card(_zc) \
list_for_each_entry(_zc, &zcrypt_card_list, list)
@@ -161,9 +170,10 @@ void zcrypt_msgtype_unregister(struct zcrypt_ops *);
struct zcrypt_ops *zcrypt_msgtype(unsigned char *, int);
int zcrypt_api_init(void);
void zcrypt_api_exit(void);
-long zcrypt_send_cprb(struct ica_xcRB *xcRB);
-long zcrypt_send_ep11_cprb(struct ep11_urb *urb);
-void zcrypt_device_status_mask_ext(struct zcrypt_device_status_ext *devstatus);
+long zcrypt_send_cprb(struct ica_xcRB *xcRB, u32 xflags);
+long zcrypt_send_ep11_cprb(struct ep11_urb *urb, u32 xflags);
+void zcrypt_device_status_mask_ext(struct zcrypt_device_status_ext *devstatus,
+ int maxcard, int maxqueue);
int zcrypt_device_status_ext(int card, int queue,
struct zcrypt_device_status_ext *devstatus);
diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c
index 43a27cb3db84..b975a3728c23 100644
--- a/drivers/s390/crypto/zcrypt_ccamisc.c
+++ b/drivers/s390/crypto/zcrypt_ccamisc.c
@@ -11,6 +11,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/init.h>
+#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/random.h>
@@ -29,16 +30,31 @@
/* Size of vardata block used for some of the cca requests/replies */
#define VARDATASIZE 4096
-struct cca_info_list_entry {
- struct list_head list;
- u16 cardnr;
- u16 domain;
- struct cca_info info;
-};
+/*
+ * Cprb memory pool held for urgent cases where no memory
+ * can be allocated via kmalloc. This pool is only used
+ * when alloc_and_prep_cprbmem() is called with the xflag
+ * ZCRYPT_XFLAG_NOMEMALLOC. The cprb memory needs to hold
+ * space for request AND reply!
+ */
+#define CPRB_MEMPOOL_ITEM_SIZE (16 * 1024)
+static mempool_t *cprb_mempool;
-/* a list with cca_info_list_entry entries */
-static LIST_HEAD(cca_info_list);
-static DEFINE_SPINLOCK(cca_info_list_lock);
+/*
+ * This is a pre-allocated memory for the device status array
+ * used within the findcard() functions. It is currently
+ * 128 * 128 * 4 bytes = 64 KB big. Usage of this memory is
+ * controlled via dev_status_mem_mutex. Needs adaption if more
+ * than 128 cards or domains to be are supported.
+ */
+#define ZCRYPT_DEV_STATUS_CARD_MAX 128
+#define ZCRYPT_DEV_STATUS_QUEUE_MAX 128
+#define ZCRYPT_DEV_STATUS_ENTRIES (ZCRYPT_DEV_STATUS_CARD_MAX * \
+ ZCRYPT_DEV_STATUS_QUEUE_MAX)
+#define ZCRYPT_DEV_STATUS_EXT_SIZE (ZCRYPT_DEV_STATUS_ENTRIES * \
+ sizeof(struct zcrypt_device_status_ext))
+static void *dev_status_mem;
+static DEFINE_MUTEX(dev_status_mem_mutex);
/*
* Simple check if the token is a valid CCA secure AES data key
@@ -219,19 +235,27 @@ EXPORT_SYMBOL(cca_check_sececckeytoken);
static int alloc_and_prep_cprbmem(size_t paramblen,
u8 **p_cprb_mem,
struct CPRBX **p_req_cprb,
- struct CPRBX **p_rep_cprb)
+ struct CPRBX **p_rep_cprb,
+ u32 xflags)
{
- u8 *cprbmem;
+ u8 *cprbmem = NULL;
size_t cprbplusparamblen = sizeof(struct CPRBX) + paramblen;
+ size_t len = 2 * cprbplusparamblen;
struct CPRBX *preqcblk, *prepcblk;
/*
* allocate consecutive memory for request CPRB, request param
* block, reply CPRB and reply param block
*/
- cprbmem = kcalloc(2, cprbplusparamblen, GFP_KERNEL);
+ if (xflags & ZCRYPT_XFLAG_NOMEMALLOC) {
+ if (len <= CPRB_MEMPOOL_ITEM_SIZE)
+ cprbmem = mempool_alloc_preallocated(cprb_mempool);
+ } else {
+ cprbmem = kmalloc(len, GFP_KERNEL);
+ }
if (!cprbmem)
return -ENOMEM;
+ memset(cprbmem, 0, len);
preqcblk = (struct CPRBX *)cprbmem;
prepcblk = (struct CPRBX *)(cprbmem + cprbplusparamblen);
@@ -261,11 +285,15 @@ static int alloc_and_prep_cprbmem(size_t paramblen,
* with zeros before freeing (useful if there was some
* clear key material in there).
*/
-static void free_cprbmem(void *mem, size_t paramblen, int scrub)
+static void free_cprbmem(void *mem, size_t paramblen, bool scrub, u32 xflags)
{
- if (scrub)
+ if (mem && scrub)
memzero_explicit(mem, 2 * (sizeof(struct CPRBX) + paramblen));
- kfree(mem);
+
+ if (xflags & ZCRYPT_XFLAG_NOMEMALLOC)
+ mempool_free(mem, cprb_mempool);
+ else
+ kfree(mem);
}
/*
@@ -290,7 +318,7 @@ static inline void prep_xcrb(struct ica_xcRB *pxcrb,
* Generate (random) CCA AES DATA secure key.
*/
int cca_genseckey(u16 cardnr, u16 domain,
- u32 keybitsize, u8 *seckey)
+ u32 keybitsize, u8 *seckey, u32 xflags)
{
int i, rc, keysize;
int seckeysize;
@@ -332,7 +360,8 @@ int cca_genseckey(u16 cardnr, u16 domain,
} __packed * prepparm;
/* get already prepared memory for 2 cprbs with param block each */
- rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk);
+ rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem,
+ &preqcblk, &prepcblk, xflags);
if (rc)
return rc;
@@ -379,7 +408,7 @@ int cca_genseckey(u16 cardnr, u16 domain,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, errno %d\n",
__func__, (int)cardnr, (int)domain, rc);
@@ -424,7 +453,7 @@ int cca_genseckey(u16 cardnr, u16 domain,
memcpy(seckey, prepparm->lv3.keyblock.tok, SECKEYBLOBSIZE);
out:
- free_cprbmem(mem, PARMBSIZE, 0);
+ free_cprbmem(mem, PARMBSIZE, false, xflags);
return rc;
}
EXPORT_SYMBOL(cca_genseckey);
@@ -433,7 +462,7 @@ EXPORT_SYMBOL(cca_genseckey);
* Generate an CCA AES DATA secure key with given key value.
*/
int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
- const u8 *clrkey, u8 *seckey)
+ const u8 *clrkey, u8 *seckey, u32 xflags)
{
int rc, keysize, seckeysize;
u8 *mem, *ptr;
@@ -473,7 +502,8 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
} __packed * prepparm;
/* get already prepared memory for 2 cprbs with param block each */
- rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk);
+ rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem,
+ &preqcblk, &prepcblk, xflags);
if (rc)
return rc;
@@ -517,7 +547,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
__func__, (int)cardnr, (int)domain, rc);
@@ -563,7 +593,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
memcpy(seckey, prepparm->lv3.keyblock.tok, SECKEYBLOBSIZE);
out:
- free_cprbmem(mem, PARMBSIZE, 1);
+ free_cprbmem(mem, PARMBSIZE, true, xflags);
return rc;
}
EXPORT_SYMBOL(cca_clr2seckey);
@@ -573,7 +603,7 @@ EXPORT_SYMBOL(cca_clr2seckey);
*/
int cca_sec2protkey(u16 cardnr, u16 domain,
const u8 *seckey, u8 *protkey, u32 *protkeylen,
- u32 *protkeytype)
+ u32 *protkeytype, u32 xflags)
{
int rc;
u8 *mem, *ptr;
@@ -619,7 +649,8 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
} __packed * prepparm;
/* get already prepared memory for 2 cprbs with param block each */
- rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk);
+ rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem,
+ &preqcblk, &prepcblk, xflags);
if (rc)
return rc;
@@ -644,7 +675,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
__func__, (int)cardnr, (int)domain, rc);
@@ -712,7 +743,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
*protkeylen = prepparm->lv3.ckb.len;
out:
- free_cprbmem(mem, PARMBSIZE, 0);
+ free_cprbmem(mem, PARMBSIZE, true, xflags);
return rc;
}
EXPORT_SYMBOL(cca_sec2protkey);
@@ -737,7 +768,7 @@ static const u8 aes_cipher_key_skeleton[] = {
* Generate (random) CCA AES CIPHER secure key.
*/
int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
- u8 *keybuf, u32 *keybufsize)
+ u8 *keybuf, u32 *keybufsize, u32 xflags)
{
int rc;
u8 *mem, *ptr;
@@ -813,7 +844,8 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
struct cipherkeytoken *t;
/* get already prepared memory for 2 cprbs with param block each */
- rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk);
+ rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem,
+ &preqcblk, &prepcblk, xflags);
if (rc)
return rc;
@@ -872,7 +904,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
__func__, (int)cardnr, (int)domain, rc);
@@ -923,7 +955,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
*keybufsize = t->len;
out:
- free_cprbmem(mem, PARMBSIZE, 0);
+ free_cprbmem(mem, PARMBSIZE, false, xflags);
return rc;
}
EXPORT_SYMBOL(cca_gencipherkey);
@@ -938,7 +970,8 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
const u8 *clr_key_value,
int clr_key_bit_size,
u8 *key_token,
- int *key_token_size)
+ int *key_token_size,
+ u32 xflags)
{
int rc, n;
u8 *mem, *ptr;
@@ -989,7 +1022,8 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
int complete = strncmp(rule_array_2, "COMPLETE", 8) ? 0 : 1;
/* get already prepared memory for 2 cprbs with param block each */
- rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk);
+ rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem,
+ &preqcblk, &prepcblk, xflags);
if (rc)
return rc;
@@ -1038,7 +1072,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
__func__, (int)cardnr, (int)domain, rc);
@@ -1077,7 +1111,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
*key_token_size = t->len;
out:
- free_cprbmem(mem, PARMBSIZE, 0);
+ free_cprbmem(mem, PARMBSIZE, false, xflags);
return rc;
}
@@ -1085,23 +1119,31 @@ out:
* Build CCA AES CIPHER secure key with a given clear key value.
*/
int cca_clr2cipherkey(u16 card, u16 dom, u32 keybitsize, u32 keygenflags,
- const u8 *clrkey, u8 *keybuf, u32 *keybufsize)
+ const u8 *clrkey, u8 *keybuf, u32 *keybufsize, u32 xflags)
{
int rc;
- u8 *token;
+ void *mem;
int tokensize;
- u8 exorbuf[32];
+ u8 *token, exorbuf[32];
struct cipherkeytoken *t;
/* fill exorbuf with random data */
get_random_bytes(exorbuf, sizeof(exorbuf));
- /* allocate space for the key token to build */
- token = kmalloc(MAXCCAVLSCTOKENSIZE, GFP_KERNEL);
- if (!token)
+ /*
+ * Allocate space for the key token to build.
+ * Also we only need up to MAXCCAVLSCTOKENSIZE bytes for this
+ * we use the already existing cprb mempool to solve this
+ * short term memory requirement.
+ */
+ mem = (xflags & ZCRYPT_XFLAG_NOMEMALLOC) ?
+ mempool_alloc_preallocated(cprb_mempool) :
+ mempool_alloc(cprb_mempool, GFP_KERNEL);
+ if (!mem)
return -ENOMEM;
/* prepare the token with the key skeleton */
+ token = (u8 *)mem;
tokensize = SIZEOF_SKELETON;
memcpy(token, aes_cipher_key_skeleton, tokensize);
@@ -1120,28 +1162,28 @@ int cca_clr2cipherkey(u16 card, u16 dom, u32 keybitsize, u32 keygenflags,
* 4/4 COMPLETE the secure cipher key import
*/
rc = _ip_cprb_helper(card, dom, "AES ", "FIRST ", "MIN3PART",
- exorbuf, keybitsize, token, &tokensize);
+ exorbuf, keybitsize, token, &tokensize, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s clear key import 1/4 with CSNBKPI2 failed, rc=%d\n",
__func__, rc);
goto out;
}
rc = _ip_cprb_helper(card, dom, "AES ", "ADD-PART", NULL,
- clrkey, keybitsize, token, &tokensize);
+ clrkey, keybitsize, token, &tokensize, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s clear key import 2/4 with CSNBKPI2 failed, rc=%d\n",
__func__, rc);
goto out;
}
rc = _ip_cprb_helper(card, dom, "AES ", "ADD-PART", NULL,
- exorbuf, keybitsize, token, &tokensize);
+ exorbuf, keybitsize, token, &tokensize, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s clear key import 3/4 with CSNBKPI2 failed, rc=%d\n",
__func__, rc);
goto out;
}
rc = _ip_cprb_helper(card, dom, "AES ", "COMPLETE", NULL,
- NULL, keybitsize, token, &tokensize);
+ NULL, keybitsize, token, &tokensize, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s clear key import 4/4 with CSNBKPI2 failed, rc=%d\n",
__func__, rc);
@@ -1158,7 +1200,7 @@ int cca_clr2cipherkey(u16 card, u16 dom, u32 keybitsize, u32 keygenflags,
*keybufsize = tokensize;
out:
- kfree(token);
+ mempool_free(mem, cprb_mempool);
return rc;
}
EXPORT_SYMBOL(cca_clr2cipherkey);
@@ -1167,7 +1209,8 @@ EXPORT_SYMBOL(cca_clr2cipherkey);
* Derive proteced key from CCA AES cipher secure key.
*/
int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype)
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 xflags)
{
int rc;
u8 *mem, *ptr;
@@ -1219,7 +1262,8 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
int keytoklen = ((struct cipherkeytoken *)ckey)->len;
/* get already prepared memory for 2 cprbs with param block each */
- rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk);
+ rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem,
+ &preqcblk, &prepcblk, xflags);
if (rc)
return rc;
@@ -1249,7 +1293,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
__func__, (int)cardnr, (int)domain, rc);
@@ -1323,7 +1367,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
*protkeylen = prepparm->vud.ckb.keylen;
out:
- free_cprbmem(mem, PARMBSIZE, 0);
+ free_cprbmem(mem, PARMBSIZE, true, xflags);
return rc;
}
EXPORT_SYMBOL(cca_cipher2protkey);
@@ -1332,7 +1376,7 @@ EXPORT_SYMBOL(cca_cipher2protkey);
* Derive protected key from CCA ECC secure private key.
*/
int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype)
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype, u32 xflags)
{
int rc;
u8 *mem, *ptr;
@@ -1382,7 +1426,8 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
int keylen = ((struct eccprivkeytoken *)key)->len;
/* get already prepared memory for 2 cprbs with param block each */
- rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk);
+ rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem,
+ &preqcblk, &prepcblk, xflags);
if (rc)
return rc;
@@ -1412,7 +1457,7 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
__func__, (int)cardnr, (int)domain, rc);
@@ -1470,7 +1515,7 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
*protkeytype = PKEY_KEYTYPE_ECC;
out:
- free_cprbmem(mem, PARMBSIZE, 0);
+ free_cprbmem(mem, PARMBSIZE, true, xflags);
return rc;
}
EXPORT_SYMBOL(cca_ecc2protkey);
@@ -1481,7 +1526,8 @@ EXPORT_SYMBOL(cca_ecc2protkey);
int cca_query_crypto_facility(u16 cardnr, u16 domain,
const char *keyword,
u8 *rarray, size_t *rarraylen,
- u8 *varray, size_t *varraylen)
+ u8 *varray, size_t *varraylen,
+ u32 xflags)
{
int rc;
u16 len;
@@ -1505,7 +1551,8 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain,
} __packed * prepparm;
/* get already prepared memory for 2 cprbs with param block each */
- rc = alloc_and_prep_cprbmem(parmbsize, &mem, &preqcblk, &prepcblk);
+ rc = alloc_and_prep_cprbmem(parmbsize, &mem,
+ &preqcblk, &prepcblk, xflags);
if (rc)
return rc;
@@ -1526,7 +1573,7 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
__func__, (int)cardnr, (int)domain, rc);
@@ -1573,94 +1620,21 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain,
}
out:
- free_cprbmem(mem, parmbsize, 0);
+ free_cprbmem(mem, parmbsize, false, xflags);
return rc;
}
EXPORT_SYMBOL(cca_query_crypto_facility);
-static int cca_info_cache_fetch(u16 cardnr, u16 domain, struct cca_info *ci)
-{
- int rc = -ENOENT;
- struct cca_info_list_entry *ptr;
-
- spin_lock_bh(&cca_info_list_lock);
- list_for_each_entry(ptr, &cca_info_list, list) {
- if (ptr->cardnr == cardnr && ptr->domain == domain) {
- memcpy(ci, &ptr->info, sizeof(*ci));
- rc = 0;
- break;
- }
- }
- spin_unlock_bh(&cca_info_list_lock);
-
- return rc;
-}
-
-static void cca_info_cache_update(u16 cardnr, u16 domain,
- const struct cca_info *ci)
-{
- int found = 0;
- struct cca_info_list_entry *ptr;
-
- spin_lock_bh(&cca_info_list_lock);
- list_for_each_entry(ptr, &cca_info_list, list) {
- if (ptr->cardnr == cardnr &&
- ptr->domain == domain) {
- memcpy(&ptr->info, ci, sizeof(*ci));
- found = 1;
- break;
- }
- }
- if (!found) {
- ptr = kmalloc(sizeof(*ptr), GFP_ATOMIC);
- if (!ptr) {
- spin_unlock_bh(&cca_info_list_lock);
- return;
- }
- ptr->cardnr = cardnr;
- ptr->domain = domain;
- memcpy(&ptr->info, ci, sizeof(*ci));
- list_add(&ptr->list, &cca_info_list);
- }
- spin_unlock_bh(&cca_info_list_lock);
-}
-
-static void cca_info_cache_scrub(u16 cardnr, u16 domain)
-{
- struct cca_info_list_entry *ptr;
-
- spin_lock_bh(&cca_info_list_lock);
- list_for_each_entry(ptr, &cca_info_list, list) {
- if (ptr->cardnr == cardnr &&
- ptr->domain == domain) {
- list_del(&ptr->list);
- kfree(ptr);
- break;
- }
- }
- spin_unlock_bh(&cca_info_list_lock);
-}
-
-static void __exit mkvp_cache_free(void)
-{
- struct cca_info_list_entry *ptr, *pnext;
-
- spin_lock_bh(&cca_info_list_lock);
- list_for_each_entry_safe(ptr, pnext, &cca_info_list, list) {
- list_del(&ptr->list);
- kfree(ptr);
- }
- spin_unlock_bh(&cca_info_list_lock);
-}
-
/*
- * Fetch cca_info values via query_crypto_facility from adapter.
+ * Fetch cca_info values about a CCA queue via
+ * query_crypto_facility from adapter.
*/
-static int fetch_cca_info(u16 cardnr, u16 domain, struct cca_info *ci)
+int cca_get_info(u16 cardnr, u16 domain, struct cca_info *ci, u32 xflags)
{
+ void *mem;
int rc, found = 0;
size_t rlen, vlen;
- u8 *rarray, *varray, *pg;
+ u8 *rarray, *varray;
struct zcrypt_device_status_ext devstat;
memset(ci, 0, sizeof(*ci));
@@ -1671,17 +1645,22 @@ static int fetch_cca_info(u16 cardnr, u16 domain, struct cca_info *ci)
return rc;
ci->hwtype = devstat.hwtype;
- /* prep page for rule array and var array use */
- pg = (u8 *)__get_free_page(GFP_KERNEL);
- if (!pg)
+ /*
+ * Prep memory for rule array and var array use.
+ * Use the cprb mempool for this.
+ */
+ mem = (xflags & ZCRYPT_XFLAG_NOMEMALLOC) ?
+ mempool_alloc_preallocated(cprb_mempool) :
+ mempool_alloc(cprb_mempool, GFP_KERNEL);
+ if (!mem)
return -ENOMEM;
- rarray = pg;
- varray = pg + PAGE_SIZE / 2;
+ rarray = (u8 *)mem;
+ varray = (u8 *)mem + PAGE_SIZE / 2;
rlen = vlen = PAGE_SIZE / 2;
/* QF for this card/domain */
rc = cca_query_crypto_facility(cardnr, domain, "STATICSA",
- rarray, &rlen, varray, &vlen);
+ rarray, &rlen, varray, &vlen, xflags);
if (rc == 0 && rlen >= 10 * 8 && vlen >= 204) {
memcpy(ci->serial, rarray, 8);
ci->new_asym_mk_state = (char)rarray[4 * 8];
@@ -1708,7 +1687,7 @@ static int fetch_cca_info(u16 cardnr, u16 domain, struct cca_info *ci)
goto out;
rlen = vlen = PAGE_SIZE / 2;
rc = cca_query_crypto_facility(cardnr, domain, "STATICSB",
- rarray, &rlen, varray, &vlen);
+ rarray, &rlen, varray, &vlen, xflags);
if (rc == 0 && rlen >= 13 * 8 && vlen >= 240) {
ci->new_apka_mk_state = (char)rarray[10 * 8];
ci->cur_apka_mk_state = (char)rarray[11 * 8];
@@ -1723,177 +1702,32 @@ static int fetch_cca_info(u16 cardnr, u16 domain, struct cca_info *ci)
}
out:
- free_page((unsigned long)pg);
+ mempool_free(mem, cprb_mempool);
return found == 2 ? 0 : -ENOENT;
}
-
-/*
- * Fetch cca information about a CCA queue.
- */
-int cca_get_info(u16 card, u16 dom, struct cca_info *ci, int verify)
-{
- int rc;
-
- rc = cca_info_cache_fetch(card, dom, ci);
- if (rc || verify) {
- rc = fetch_cca_info(card, dom, ci);
- if (rc == 0)
- cca_info_cache_update(card, dom, ci);
- }
-
- return rc;
-}
EXPORT_SYMBOL(cca_get_info);
-/*
- * Search for a matching crypto card based on the
- * Master Key Verification Pattern given.
- */
-static int findcard(u64 mkvp, u16 *pcardnr, u16 *pdomain,
- int verify, int minhwtype)
-{
- struct zcrypt_device_status_ext *device_status;
- u16 card, dom;
- struct cca_info ci;
- int i, rc, oi = -1;
-
- /* mkvp must not be zero, minhwtype needs to be >= 0 */
- if (mkvp == 0 || minhwtype < 0)
- return -EINVAL;
-
- /* fetch status of all crypto cards */
- device_status = kvcalloc(MAX_ZDEV_ENTRIES_EXT,
- sizeof(struct zcrypt_device_status_ext),
- GFP_KERNEL);
- if (!device_status)
- return -ENOMEM;
- zcrypt_device_status_mask_ext(device_status);
-
- /* walk through all crypto cards */
- for (i = 0; i < MAX_ZDEV_ENTRIES_EXT; i++) {
- card = AP_QID_CARD(device_status[i].qid);
- dom = AP_QID_QUEUE(device_status[i].qid);
- if (device_status[i].online &&
- device_status[i].functions & 0x04) {
- /* enabled CCA card, check current mkvp from cache */
- if (cca_info_cache_fetch(card, dom, &ci) == 0 &&
- ci.hwtype >= minhwtype &&
- ci.cur_aes_mk_state == '2' &&
- ci.cur_aes_mkvp == mkvp) {
- if (!verify)
- break;
- /* verify: refresh card info */
- if (fetch_cca_info(card, dom, &ci) == 0) {
- cca_info_cache_update(card, dom, &ci);
- if (ci.hwtype >= minhwtype &&
- ci.cur_aes_mk_state == '2' &&
- ci.cur_aes_mkvp == mkvp)
- break;
- }
- }
- } else {
- /* Card is offline and/or not a CCA card. */
- /* del mkvp entry from cache if it exists */
- cca_info_cache_scrub(card, dom);
- }
- }
- if (i >= MAX_ZDEV_ENTRIES_EXT) {
- /* nothing found, so this time without cache */
- for (i = 0; i < MAX_ZDEV_ENTRIES_EXT; i++) {
- if (!(device_status[i].online &&
- device_status[i].functions & 0x04))
- continue;
- card = AP_QID_CARD(device_status[i].qid);
- dom = AP_QID_QUEUE(device_status[i].qid);
- /* fresh fetch mkvp from adapter */
- if (fetch_cca_info(card, dom, &ci) == 0) {
- cca_info_cache_update(card, dom, &ci);
- if (ci.hwtype >= minhwtype &&
- ci.cur_aes_mk_state == '2' &&
- ci.cur_aes_mkvp == mkvp)
- break;
- if (ci.hwtype >= minhwtype &&
- ci.old_aes_mk_state == '2' &&
- ci.old_aes_mkvp == mkvp &&
- oi < 0)
- oi = i;
- }
- }
- if (i >= MAX_ZDEV_ENTRIES_EXT && oi >= 0) {
- /* old mkvp matched, use this card then */
- card = AP_QID_CARD(device_status[oi].qid);
- dom = AP_QID_QUEUE(device_status[oi].qid);
- }
- }
- if (i < MAX_ZDEV_ENTRIES_EXT || oi >= 0) {
- if (pcardnr)
- *pcardnr = card;
- if (pdomain)
- *pdomain = dom;
- rc = (i < MAX_ZDEV_ENTRIES_EXT ? 0 : 1);
- } else {
- rc = -ENODEV;
- }
-
- kvfree(device_status);
- return rc;
-}
-
-/*
- * Search for a matching crypto card based on the Master Key
- * Verification Pattern provided inside a secure key token.
- */
-int cca_findcard(const u8 *key, u16 *pcardnr, u16 *pdomain, int verify)
-{
- u64 mkvp;
- int minhwtype = 0;
- const struct keytoken_header *hdr = (struct keytoken_header *)key;
-
- if (hdr->type != TOKTYPE_CCA_INTERNAL)
- return -EINVAL;
-
- switch (hdr->version) {
- case TOKVER_CCA_AES:
- mkvp = ((struct secaeskeytoken *)key)->mkvp;
- break;
- case TOKVER_CCA_VLSC:
- mkvp = ((struct cipherkeytoken *)key)->mkvp0;
- minhwtype = AP_DEVICE_TYPE_CEX6;
- break;
- default:
- return -EINVAL;
- }
-
- return findcard(mkvp, pcardnr, pdomain, verify, minhwtype);
-}
-EXPORT_SYMBOL(cca_findcard);
-
-int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
+int cca_findcard2(u32 *apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
int minhwtype, int mktype, u64 cur_mkvp, u64 old_mkvp,
- int verify)
+ u32 xflags)
{
struct zcrypt_device_status_ext *device_status;
- u32 *_apqns = NULL, _nr_apqns = 0;
- int i, card, dom, curmatch, oldmatch, rc = 0;
+ int i, card, dom, curmatch, oldmatch;
struct cca_info ci;
+ u32 _nr_apqns = 0;
- /* fetch status of all crypto cards */
- device_status = kvcalloc(MAX_ZDEV_ENTRIES_EXT,
- sizeof(struct zcrypt_device_status_ext),
- GFP_KERNEL);
- if (!device_status)
- return -ENOMEM;
- zcrypt_device_status_mask_ext(device_status);
+ /* occupy the device status memory */
+ mutex_lock(&dev_status_mem_mutex);
+ memset(dev_status_mem, 0, ZCRYPT_DEV_STATUS_EXT_SIZE);
+ device_status = (struct zcrypt_device_status_ext *)dev_status_mem;
- /* allocate 1k space for up to 256 apqns */
- _apqns = kmalloc_array(256, sizeof(u32), GFP_KERNEL);
- if (!_apqns) {
- kvfree(device_status);
- return -ENOMEM;
- }
+ /* fetch crypto device status into this struct */
+ zcrypt_device_status_mask_ext(device_status,
+ ZCRYPT_DEV_STATUS_CARD_MAX,
+ ZCRYPT_DEV_STATUS_QUEUE_MAX);
/* walk through all the crypto apqnss */
- for (i = 0; i < MAX_ZDEV_ENTRIES_EXT; i++) {
+ for (i = 0; i < ZCRYPT_DEV_STATUS_ENTRIES; i++) {
card = AP_QID_CARD(device_status[i].qid);
dom = AP_QID_QUEUE(device_status[i].qid);
/* check online state */
@@ -1909,7 +1743,7 @@ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
if (domain != 0xFFFF && dom != domain)
continue;
/* get cca info on this apqn */
- if (cca_get_info(card, dom, &ci, verify))
+ if (cca_get_info(card, dom, &ci, xflags))
continue;
/* current master key needs to be valid */
if (mktype == AES_MK_SET && ci.cur_aes_mk_state != '2')
@@ -1939,27 +1773,41 @@ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
continue;
}
/* apqn passed all filtering criterons, add to the array */
- if (_nr_apqns < 256)
- _apqns[_nr_apqns++] = (((u16)card) << 16) | ((u16)dom);
+ if (_nr_apqns < *nr_apqns)
+ apqns[_nr_apqns++] = (((u16)card) << 16) | ((u16)dom);
}
- /* nothing found ? */
- if (!_nr_apqns) {
- kfree(_apqns);
- rc = -ENODEV;
- } else {
- /* no re-allocation, simple return the _apqns array */
- *apqns = _apqns;
- *nr_apqns = _nr_apqns;
- rc = 0;
- }
+ *nr_apqns = _nr_apqns;
- kvfree(device_status);
- return rc;
+ /* release the device status memory */
+ mutex_unlock(&dev_status_mem_mutex);
+
+ return _nr_apqns ? 0 : -ENODEV;
}
EXPORT_SYMBOL(cca_findcard2);
-void __exit zcrypt_ccamisc_exit(void)
+int __init zcrypt_ccamisc_init(void)
+{
+ /* Pre-allocate a small memory pool for cca cprbs. */
+ cprb_mempool = mempool_create_kmalloc_pool(zcrypt_mempool_threshold,
+ CPRB_MEMPOOL_ITEM_SIZE);
+ if (!cprb_mempool)
+ return -ENOMEM;
+
+ /* Pre-allocate one crypto status card struct used in findcard() */
+ dev_status_mem = kvmalloc(ZCRYPT_DEV_STATUS_EXT_SIZE, GFP_KERNEL);
+ if (!dev_status_mem) {
+ mempool_destroy(cprb_mempool);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void zcrypt_ccamisc_exit(void)
{
- mkvp_cache_free();
+ mutex_lock(&dev_status_mem_mutex);
+ kvfree(dev_status_mem);
+ mutex_unlock(&dev_status_mem_mutex);
+ mempool_destroy(cprb_mempool);
}
diff --git a/drivers/s390/crypto/zcrypt_ccamisc.h b/drivers/s390/crypto/zcrypt_ccamisc.h
index 26bdca702523..1ecc4e37e9ad 100644
--- a/drivers/s390/crypto/zcrypt_ccamisc.h
+++ b/drivers/s390/crypto/zcrypt_ccamisc.h
@@ -160,44 +160,47 @@ int cca_check_sececckeytoken(debug_info_t *dbg, int dbflvl,
/*
* Generate (random) CCA AES DATA secure key.
*/
-int cca_genseckey(u16 cardnr, u16 domain, u32 keybitsize, u8 *seckey);
+int cca_genseckey(u16 cardnr, u16 domain, u32 keybitsize, u8 *seckey,
+ u32 xflags);
/*
* Generate CCA AES DATA secure key with given clear key value.
*/
int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
- const u8 *clrkey, u8 *seckey);
+ const u8 *clrkey, u8 *seckey, u32 xflags);
/*
* Derive proteced key from an CCA AES DATA secure key.
*/
int cca_sec2protkey(u16 cardnr, u16 domain,
const u8 *seckey, u8 *protkey, u32 *protkeylen,
- u32 *protkeytype);
+ u32 *protkeytype, u32 xflags);
/*
* Generate (random) CCA AES CIPHER secure key.
*/
int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
- u8 *keybuf, u32 *keybufsize);
+ u8 *keybuf, u32 *keybufsize, u32 xflags);
/*
* Derive proteced key from CCA AES cipher secure key.
*/
int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype);
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 xflags);
/*
* Build CCA AES CIPHER secure key with a given clear key value.
*/
int cca_clr2cipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
- const u8 *clrkey, u8 *keybuf, u32 *keybufsize);
+ const u8 *clrkey, u8 *keybuf, u32 *keybufsize,
+ u32 xflags);
/*
* Derive proteced key from CCA ECC secure private key.
*/
int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype);
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype, u32 xflags);
/*
* Query cryptographic facility from CCA adapter
@@ -205,16 +208,8 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
int cca_query_crypto_facility(u16 cardnr, u16 domain,
const char *keyword,
u8 *rarray, size_t *rarraylen,
- u8 *varray, size_t *varraylen);
-
-/*
- * Search for a matching crypto card based on the Master Key
- * Verification Pattern provided inside a secure key.
- * Works with CCA AES data and cipher keys.
- * Returns < 0 on failure, 0 if CURRENT MKVP matches and
- * 1 if OLD MKVP matches.
- */
-int cca_findcard(const u8 *key, u16 *pcardnr, u16 *pdomain, int verify);
+ u8 *varray, size_t *varraylen,
+ u32 xflags);
/*
* Build a list of cca apqns meeting the following constrains:
@@ -224,21 +219,16 @@ int cca_findcard(const u8 *key, u16 *pcardnr, u16 *pdomain, int verify);
* - if minhwtype > 0 only apqns with hwtype >= minhwtype
* - if cur_mkvp != 0 only apqns where cur_mkvp == mkvp
* - if old_mkvp != 0 only apqns where old_mkvp == mkvp
- * - if verify is enabled and a cur_mkvp and/or old_mkvp
- * value is given, then refetch the cca_info and make sure the current
- * cur_mkvp or old_mkvp values of the apqn are used.
* The mktype determines which set of master keys to use:
* 0 = AES_MK_SET - AES MK set, 1 = APKA MK_SET - APKA MK set
- * The array of apqn entries is allocated with kmalloc and returned in *apqns;
- * the number of apqns stored into the list is returned in *nr_apqns. One apqn
- * entry is simple a 32 bit value with 16 bit cardnr and 16 bit domain nr and
- * may be casted to struct pkey_apqn. The return value is either 0 for success
- * or a negative errno value. If no apqn meeting the criteria is found,
- * -ENODEV is returned.
+ * The caller should set *nr_apqns to the nr of elements available in *apqns.
+ * On return *nr_apqns is then updated with the nr of apqns filled into *apqns.
+ * The return value is either 0 for success or a negative errno value.
+ * If no apqn meeting the criteria is found, -ENODEV is returned.
*/
-int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
+int cca_findcard2(u32 *apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
int minhwtype, int mktype, u64 cur_mkvp, u64 old_mkvp,
- int verify);
+ u32 xflags);
#define AES_MK_SET 0
#define APKA_MK_SET 1
@@ -270,8 +260,9 @@ struct cca_info {
/*
* Fetch cca information about an CCA queue.
*/
-int cca_get_info(u16 card, u16 dom, struct cca_info *ci, int verify);
+int cca_get_info(u16 card, u16 dom, struct cca_info *ci, u32 xflags);
+int zcrypt_ccamisc_init(void);
void zcrypt_ccamisc_exit(void);
#endif /* _ZCRYPT_CCAMISC_H_ */
diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c
index 64df7d2f6266..6ba7fbddd3f7 100644
--- a/drivers/s390/crypto/zcrypt_cex4.c
+++ b/drivers/s390/crypto/zcrypt_cex4.c
@@ -79,14 +79,13 @@ static ssize_t cca_serialnr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct zcrypt_card *zc = dev_get_drvdata(dev);
- struct cca_info ci;
struct ap_card *ac = to_ap_card(dev);
+ struct cca_info ci;
memset(&ci, 0, sizeof(ci));
if (ap_domain_index >= 0)
- cca_get_info(ac->id, ap_domain_index, &ci, zc->online);
+ cca_get_info(ac->id, ap_domain_index, &ci, 0);
return sysfs_emit(buf, "%s\n", ci.serial);
}
@@ -110,17 +109,17 @@ static ssize_t cca_mkvps_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ static const char * const new_state[] = { "empty", "partial", "full" };
+ static const char * const cao_state[] = { "invalid", "valid" };
struct zcrypt_queue *zq = dev_get_drvdata(dev);
- int n = 0;
struct cca_info ci;
- static const char * const cao_state[] = { "invalid", "valid" };
- static const char * const new_state[] = { "empty", "partial", "full" };
+ int n = 0;
memset(&ci, 0, sizeof(ci));
cca_get_info(AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
- &ci, zq->online);
+ &ci, 0);
if (ci.new_aes_mk_state >= '1' && ci.new_aes_mk_state <= '3')
n += sysfs_emit_at(buf, n, "AES NEW: %s 0x%016llx\n",
@@ -210,13 +209,12 @@ static ssize_t ep11_api_ordinalnr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct zcrypt_card *zc = dev_get_drvdata(dev);
- struct ep11_card_info ci;
struct ap_card *ac = to_ap_card(dev);
+ struct ep11_card_info ci;
memset(&ci, 0, sizeof(ci));
- ep11_get_card_info(ac->id, &ci, zc->online);
+ ep11_get_card_info(ac->id, &ci, 0);
if (ci.API_ord_nr > 0)
return sysfs_emit(buf, "%u\n", ci.API_ord_nr);
@@ -231,13 +229,12 @@ static ssize_t ep11_fw_version_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct zcrypt_card *zc = dev_get_drvdata(dev);
- struct ep11_card_info ci;
struct ap_card *ac = to_ap_card(dev);
+ struct ep11_card_info ci;
memset(&ci, 0, sizeof(ci));
- ep11_get_card_info(ac->id, &ci, zc->online);
+ ep11_get_card_info(ac->id, &ci, 0);
if (ci.FW_version > 0)
return sysfs_emit(buf, "%d.%d\n",
@@ -254,13 +251,12 @@ static ssize_t ep11_serialnr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct zcrypt_card *zc = dev_get_drvdata(dev);
- struct ep11_card_info ci;
struct ap_card *ac = to_ap_card(dev);
+ struct ep11_card_info ci;
memset(&ci, 0, sizeof(ci));
- ep11_get_card_info(ac->id, &ci, zc->online);
+ ep11_get_card_info(ac->id, &ci, 0);
if (ci.serial[0])
return sysfs_emit(buf, "%16.16s\n", ci.serial);
@@ -291,14 +287,13 @@ static ssize_t ep11_card_op_modes_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct zcrypt_card *zc = dev_get_drvdata(dev);
- int i, n = 0;
- struct ep11_card_info ci;
struct ap_card *ac = to_ap_card(dev);
+ struct ep11_card_info ci;
+ int i, n = 0;
memset(&ci, 0, sizeof(ci));
- ep11_get_card_info(ac->id, &ci, zc->online);
+ ep11_get_card_info(ac->id, &ci, 0);
for (i = 0; ep11_op_modes[i].mode_txt; i++) {
if (ci.op_mode & (1ULL << ep11_op_modes[i].mode_bit)) {
@@ -348,7 +343,7 @@ static ssize_t ep11_mkvps_show(struct device *dev,
if (zq->online)
ep11_get_domain_info(AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
- &di);
+ &di, 0);
if (di.cur_wk_state == '0') {
n = sysfs_emit(buf, "WK CUR: %s -\n",
@@ -395,7 +390,7 @@ static ssize_t ep11_queue_op_modes_show(struct device *dev,
if (zq->online)
ep11_get_domain_info(AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
- &di);
+ &di, 0);
for (i = 0; ep11_op_modes[i].mode_txt; i++) {
if (di.op_mode & (1ULL << ep11_op_modes[i].mode_bit)) {
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c
index cb7e6da43602..2f50fc7b8f61 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.c
+++ b/drivers/s390/crypto/zcrypt_ep11misc.c
@@ -10,9 +10,10 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/init.h>
+#include <linux/mempool.h>
#include <linux/module.h>
-#include <linux/slab.h>
#include <linux/random.h>
+#include <linux/slab.h>
#include <asm/zcrypt.h>
#include <asm/pkey.h>
#include <crypto/aes.h>
@@ -30,85 +31,29 @@
static const u8 def_iv[16] = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff };
-/* ep11 card info cache */
-struct card_list_entry {
- struct list_head list;
- u16 cardnr;
- struct ep11_card_info info;
-};
-static LIST_HEAD(card_list);
-static DEFINE_SPINLOCK(card_list_lock);
-
-static int card_cache_fetch(u16 cardnr, struct ep11_card_info *ci)
-{
- int rc = -ENOENT;
- struct card_list_entry *ptr;
-
- spin_lock_bh(&card_list_lock);
- list_for_each_entry(ptr, &card_list, list) {
- if (ptr->cardnr == cardnr) {
- memcpy(ci, &ptr->info, sizeof(*ci));
- rc = 0;
- break;
- }
- }
- spin_unlock_bh(&card_list_lock);
-
- return rc;
-}
-
-static void card_cache_update(u16 cardnr, const struct ep11_card_info *ci)
-{
- int found = 0;
- struct card_list_entry *ptr;
-
- spin_lock_bh(&card_list_lock);
- list_for_each_entry(ptr, &card_list, list) {
- if (ptr->cardnr == cardnr) {
- memcpy(&ptr->info, ci, sizeof(*ci));
- found = 1;
- break;
- }
- }
- if (!found) {
- ptr = kmalloc(sizeof(*ptr), GFP_ATOMIC);
- if (!ptr) {
- spin_unlock_bh(&card_list_lock);
- return;
- }
- ptr->cardnr = cardnr;
- memcpy(&ptr->info, ci, sizeof(*ci));
- list_add(&ptr->list, &card_list);
- }
- spin_unlock_bh(&card_list_lock);
-}
-
-static void card_cache_scrub(u16 cardnr)
-{
- struct card_list_entry *ptr;
-
- spin_lock_bh(&card_list_lock);
- list_for_each_entry(ptr, &card_list, list) {
- if (ptr->cardnr == cardnr) {
- list_del(&ptr->list);
- kfree(ptr);
- break;
- }
- }
- spin_unlock_bh(&card_list_lock);
-}
-
-static void __exit card_cache_free(void)
-{
- struct card_list_entry *ptr, *pnext;
+/*
+ * Cprb memory pool held for urgent cases where no memory
+ * can be allocated via kmalloc. This pool is only used when
+ * alloc_cprbmem() is called with the xflag ZCRYPT_XFLAG_NOMEMALLOC.
+ */
+#define CPRB_MEMPOOL_ITEM_SIZE (8 * 1024)
+static mempool_t *cprb_mempool;
- spin_lock_bh(&card_list_lock);
- list_for_each_entry_safe(ptr, pnext, &card_list, list) {
- list_del(&ptr->list);
- kfree(ptr);
- }
- spin_unlock_bh(&card_list_lock);
-}
+/*
+ * This is a pre-allocated memory for the device status array
+ * used within the ep11_findcard2() function. It is currently
+ * 128 * 128 * 4 bytes = 64 KB big. Usage of this memory is
+ * controlled via dev_status_mem_mutex. Needs adaption if more
+ * than 128 cards or domains to be are supported.
+ */
+#define ZCRYPT_DEV_STATUS_CARD_MAX 128
+#define ZCRYPT_DEV_STATUS_QUEUE_MAX 128
+#define ZCRYPT_DEV_STATUS_ENTRIES (ZCRYPT_DEV_STATUS_CARD_MAX * \
+ ZCRYPT_DEV_STATUS_QUEUE_MAX)
+#define ZCRYPT_DEV_STATUS_EXT_SIZE (ZCRYPT_DEV_STATUS_ENTRIES * \
+ sizeof(struct zcrypt_device_status_ext))
+static void *dev_status_mem;
+static DEFINE_MUTEX(dev_status_mem_mutex);
static int ep11_kb_split(const u8 *kb, size_t kblen, u32 kbver,
struct ep11kblob_header **kbhdr, size_t *kbhdrsize,
@@ -411,14 +356,20 @@ EXPORT_SYMBOL(ep11_check_aes_key);
/*
* Allocate and prepare ep11 cprb plus additional payload.
*/
-static inline struct ep11_cprb *alloc_cprb(size_t payload_len)
+static void *alloc_cprbmem(size_t payload_len, u32 xflags)
{
size_t len = sizeof(struct ep11_cprb) + payload_len;
- struct ep11_cprb *cprb;
+ struct ep11_cprb *cprb = NULL;
- cprb = kzalloc(len, GFP_KERNEL);
+ if (xflags & ZCRYPT_XFLAG_NOMEMALLOC) {
+ if (len <= CPRB_MEMPOOL_ITEM_SIZE)
+ cprb = mempool_alloc_preallocated(cprb_mempool);
+ } else {
+ cprb = kmalloc(len, GFP_KERNEL);
+ }
if (!cprb)
return NULL;
+ memset(cprb, 0, len);
cprb->cprb_len = sizeof(struct ep11_cprb);
cprb->cprb_ver_id = 0x04;
@@ -430,6 +381,20 @@ static inline struct ep11_cprb *alloc_cprb(size_t payload_len)
}
/*
+ * Free ep11 cprb buffer space.
+ */
+static void free_cprbmem(void *mem, size_t payload_len, bool scrub, u32 xflags)
+{
+ if (mem && scrub)
+ memzero_explicit(mem, sizeof(struct ep11_cprb) + payload_len);
+
+ if (xflags & ZCRYPT_XFLAG_NOMEMALLOC)
+ mempool_free(mem, cprb_mempool);
+ else
+ kfree(mem);
+}
+
+/*
* Some helper functions related to ASN1 encoding.
* Limited to length info <= 2 byte.
*/
@@ -489,6 +454,7 @@ static inline void prep_urb(struct ep11_urb *u,
struct ep11_cprb *req, size_t req_len,
struct ep11_cprb *rep, size_t rep_len)
{
+ memset(u, 0, sizeof(*u));
u->targets = (u8 __user *)t;
u->targets_num = nt;
u->req = (u8 __user *)req;
@@ -583,7 +549,7 @@ static int check_reply_cprb(const struct ep11_cprb *rep, const char *func)
* Helper function which does an ep11 query with given query type.
*/
static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
- size_t buflen, u8 *buf)
+ size_t buflen, u8 *buf, u32 xflags)
{
struct ep11_info_req_pl {
struct pl_head head;
@@ -605,11 +571,11 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
} __packed * rep_pl;
struct ep11_cprb *req = NULL, *rep = NULL;
struct ep11_target_dev target;
- struct ep11_urb *urb = NULL;
+ struct ep11_urb urb;
int api = EP11_API_V1, rc = -ENOMEM;
/* request cprb and payload */
- req = alloc_cprb(sizeof(struct ep11_info_req_pl));
+ req = alloc_cprbmem(sizeof(struct ep11_info_req_pl), xflags);
if (!req)
goto out;
req_pl = (struct ep11_info_req_pl *)(((u8 *)req) + sizeof(*req));
@@ -621,22 +587,19 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
req_pl->query_subtype_len = sizeof(u32);
/* reply cprb and payload */
- rep = alloc_cprb(sizeof(struct ep11_info_rep_pl) + buflen);
+ rep = alloc_cprbmem(sizeof(struct ep11_info_rep_pl) + buflen, xflags);
if (!rep)
goto out;
rep_pl = (struct ep11_info_rep_pl *)(((u8 *)rep) + sizeof(*rep));
/* urb and target */
- urb = kmalloc(sizeof(*urb), GFP_KERNEL);
- if (!urb)
- goto out;
target.ap_id = cardnr;
target.dom_id = domain;
- prep_urb(urb, &target, 1,
+ prep_urb(&urb, &target, 1,
req, sizeof(*req) + sizeof(*req_pl),
rep, sizeof(*rep) + sizeof(*rep_pl) + buflen);
- rc = zcrypt_send_ep11_cprb(urb);
+ rc = zcrypt_send_ep11_cprb(&urb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
__func__, (int)cardnr, (int)domain, rc);
@@ -667,16 +630,15 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
memcpy(buf, ((u8 *)rep_pl) + sizeof(*rep_pl), rep_pl->data_len);
out:
- kfree(req);
- kfree(rep);
- kfree(urb);
+ free_cprbmem(req, 0, false, xflags);
+ free_cprbmem(rep, 0, false, xflags);
return rc;
}
/*
* Provide information about an EP11 card.
*/
-int ep11_get_card_info(u16 card, struct ep11_card_info *info, int verify)
+int ep11_get_card_info(u16 card, struct ep11_card_info *info, u32 xflags)
{
int rc;
struct ep11_module_query_info {
@@ -706,30 +668,26 @@ int ep11_get_card_info(u16 card, struct ep11_card_info *info, int verify)
u32 max_CP_index;
} __packed * pmqi = NULL;
- rc = card_cache_fetch(card, info);
- if (rc || verify) {
- pmqi = kmalloc(sizeof(*pmqi), GFP_KERNEL);
- if (!pmqi)
- return -ENOMEM;
- rc = ep11_query_info(card, AUTOSEL_DOM,
- 0x01 /* module info query */,
- sizeof(*pmqi), (u8 *)pmqi);
- if (rc) {
- if (rc == -ENODEV)
- card_cache_scrub(card);
- goto out;
- }
- memset(info, 0, sizeof(*info));
- info->API_ord_nr = pmqi->API_ord_nr;
- info->FW_version =
- (pmqi->FW_major_vers << 8) + pmqi->FW_minor_vers;
- memcpy(info->serial, pmqi->serial, sizeof(info->serial));
- info->op_mode = pmqi->op_mode;
- card_cache_update(card, info);
- }
+ /* use the cprb mempool to satisfy this short term mem alloc */
+ pmqi = (xflags & ZCRYPT_XFLAG_NOMEMALLOC) ?
+ mempool_alloc_preallocated(cprb_mempool) :
+ mempool_alloc(cprb_mempool, GFP_KERNEL);
+ if (!pmqi)
+ return -ENOMEM;
+ rc = ep11_query_info(card, AUTOSEL_DOM,
+ 0x01 /* module info query */,
+ sizeof(*pmqi), (u8 *)pmqi, xflags);
+ if (rc)
+ goto out;
+
+ memset(info, 0, sizeof(*info));
+ info->API_ord_nr = pmqi->API_ord_nr;
+ info->FW_version = (pmqi->FW_major_vers << 8) + pmqi->FW_minor_vers;
+ memcpy(info->serial, pmqi->serial, sizeof(info->serial));
+ info->op_mode = pmqi->op_mode;
out:
- kfree(pmqi);
+ mempool_free(pmqi, cprb_mempool);
return rc;
}
EXPORT_SYMBOL(ep11_get_card_info);
@@ -737,7 +695,8 @@ EXPORT_SYMBOL(ep11_get_card_info);
/*
* Provide information about a domain within an EP11 card.
*/
-int ep11_get_domain_info(u16 card, u16 domain, struct ep11_domain_info *info)
+int ep11_get_domain_info(u16 card, u16 domain,
+ struct ep11_domain_info *info, u32 xflags)
{
int rc;
struct ep11_domain_query_info {
@@ -746,36 +705,32 @@ int ep11_get_domain_info(u16 card, u16 domain, struct ep11_domain_info *info)
u8 new_WK_VP[32];
u32 dom_flags;
u64 op_mode;
- } __packed * p_dom_info;
-
- p_dom_info = kmalloc(sizeof(*p_dom_info), GFP_KERNEL);
- if (!p_dom_info)
- return -ENOMEM;
+ } __packed dom_query_info;
rc = ep11_query_info(card, domain, 0x03 /* domain info query */,
- sizeof(*p_dom_info), (u8 *)p_dom_info);
+ sizeof(dom_query_info), (u8 *)&dom_query_info,
+ xflags);
if (rc)
goto out;
memset(info, 0, sizeof(*info));
info->cur_wk_state = '0';
info->new_wk_state = '0';
- if (p_dom_info->dom_flags & 0x10 /* left imprint mode */) {
- if (p_dom_info->dom_flags & 0x02 /* cur wk valid */) {
+ if (dom_query_info.dom_flags & 0x10 /* left imprint mode */) {
+ if (dom_query_info.dom_flags & 0x02 /* cur wk valid */) {
info->cur_wk_state = '1';
- memcpy(info->cur_wkvp, p_dom_info->cur_WK_VP, 32);
+ memcpy(info->cur_wkvp, dom_query_info.cur_WK_VP, 32);
}
- if (p_dom_info->dom_flags & 0x04 || /* new wk present */
- p_dom_info->dom_flags & 0x08 /* new wk committed */) {
+ if (dom_query_info.dom_flags & 0x04 || /* new wk present */
+ dom_query_info.dom_flags & 0x08 /* new wk committed */) {
info->new_wk_state =
- p_dom_info->dom_flags & 0x08 ? '2' : '1';
- memcpy(info->new_wkvp, p_dom_info->new_WK_VP, 32);
+ dom_query_info.dom_flags & 0x08 ? '2' : '1';
+ memcpy(info->new_wkvp, dom_query_info.new_WK_VP, 32);
}
}
- info->op_mode = p_dom_info->op_mode;
+ info->op_mode = dom_query_info.op_mode;
out:
- kfree(p_dom_info);
return rc;
}
EXPORT_SYMBOL(ep11_get_domain_info);
@@ -788,7 +743,7 @@ EXPORT_SYMBOL(ep11_get_domain_info);
static int _ep11_genaeskey(u16 card, u16 domain,
u32 keybitsize, u32 keygenflags,
- u8 *keybuf, size_t *keybufsize)
+ u8 *keybuf, size_t *keybufsize, u32 xflags)
{
struct keygen_req_pl {
struct pl_head head;
@@ -823,7 +778,7 @@ static int _ep11_genaeskey(u16 card, u16 domain,
struct ep11_cprb *req = NULL, *rep = NULL;
size_t req_pl_size, pinblob_size = 0;
struct ep11_target_dev target;
- struct ep11_urb *urb = NULL;
+ struct ep11_urb urb;
int api, rc = -ENOMEM;
u8 *p;
@@ -851,7 +806,7 @@ static int _ep11_genaeskey(u16 card, u16 domain,
pinblob_size = EP11_PINBLOB_V1_BYTES;
}
req_pl_size = sizeof(struct keygen_req_pl) + ASN1TAGLEN(pinblob_size);
- req = alloc_cprb(req_pl_size);
+ req = alloc_cprbmem(req_pl_size, xflags);
if (!req)
goto out;
req_pl = (struct keygen_req_pl *)(((u8 *)req) + sizeof(*req));
@@ -877,22 +832,19 @@ static int _ep11_genaeskey(u16 card, u16 domain,
*p++ = pinblob_size;
/* reply cprb and payload */
- rep = alloc_cprb(sizeof(struct keygen_rep_pl));
+ rep = alloc_cprbmem(sizeof(struct keygen_rep_pl), xflags);
if (!rep)
goto out;
rep_pl = (struct keygen_rep_pl *)(((u8 *)rep) + sizeof(*rep));
/* urb and target */
- urb = kmalloc(sizeof(*urb), GFP_KERNEL);
- if (!urb)
- goto out;
target.ap_id = card;
target.dom_id = domain;
- prep_urb(urb, &target, 1,
+ prep_urb(&urb, &target, 1,
req, sizeof(*req) + req_pl_size,
rep, sizeof(*rep) + sizeof(*rep_pl));
- rc = zcrypt_send_ep11_cprb(urb);
+ rc = zcrypt_send_ep11_cprb(&urb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
__func__, (int)card, (int)domain, rc);
@@ -925,14 +877,13 @@ static int _ep11_genaeskey(u16 card, u16 domain,
*keybufsize = rep_pl->data_len;
out:
- kfree(req);
- kfree(rep);
- kfree(urb);
+ free_cprbmem(req, 0, false, xflags);
+ free_cprbmem(rep, sizeof(struct keygen_rep_pl), true, xflags);
return rc;
}
int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
- u8 *keybuf, u32 *keybufsize, u32 keybufver)
+ u8 *keybuf, u32 *keybufsize, u32 keybufver, u32 xflags)
{
struct ep11kblob_header *hdr;
size_t hdr_size, pl_size;
@@ -953,7 +904,7 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
return rc;
rc = _ep11_genaeskey(card, domain, keybitsize, keygenflags,
- pl, &pl_size);
+ pl, &pl_size, xflags);
if (rc)
return rc;
@@ -973,7 +924,8 @@ static int ep11_cryptsingle(u16 card, u16 domain,
u16 mode, u32 mech, const u8 *iv,
const u8 *key, size_t keysize,
const u8 *inbuf, size_t inbufsize,
- u8 *outbuf, size_t *outbufsize)
+ u8 *outbuf, size_t *outbufsize,
+ u32 xflags)
{
struct crypt_req_pl {
struct pl_head head;
@@ -1000,8 +952,8 @@ static int ep11_cryptsingle(u16 card, u16 domain,
} __packed * rep_pl;
struct ep11_cprb *req = NULL, *rep = NULL;
struct ep11_target_dev target;
- struct ep11_urb *urb = NULL;
- size_t req_pl_size, rep_pl_size;
+ struct ep11_urb urb;
+ size_t req_pl_size, rep_pl_size = 0;
int n, api = EP11_API_V1, rc = -ENOMEM;
u8 *p;
@@ -1012,7 +964,7 @@ static int ep11_cryptsingle(u16 card, u16 domain,
/* request cprb and payload */
req_pl_size = sizeof(struct crypt_req_pl) + (iv ? 16 : 0)
+ ASN1TAGLEN(keysize) + ASN1TAGLEN(inbufsize);
- req = alloc_cprb(req_pl_size);
+ req = alloc_cprbmem(req_pl_size, xflags);
if (!req)
goto out;
req_pl = (struct crypt_req_pl *)(((u8 *)req) + sizeof(*req));
@@ -1034,22 +986,19 @@ static int ep11_cryptsingle(u16 card, u16 domain,
/* reply cprb and payload, assume out data size <= in data size + 32 */
rep_pl_size = sizeof(struct crypt_rep_pl) + ASN1TAGLEN(inbufsize + 32);
- rep = alloc_cprb(rep_pl_size);
+ rep = alloc_cprbmem(rep_pl_size, xflags);
if (!rep)
goto out;
rep_pl = (struct crypt_rep_pl *)(((u8 *)rep) + sizeof(*rep));
/* urb and target */
- urb = kmalloc(sizeof(*urb), GFP_KERNEL);
- if (!urb)
- goto out;
target.ap_id = card;
target.dom_id = domain;
- prep_urb(urb, &target, 1,
+ prep_urb(&urb, &target, 1,
req, sizeof(*req) + req_pl_size,
rep, sizeof(*rep) + rep_pl_size);
- rc = zcrypt_send_ep11_cprb(urb);
+ rc = zcrypt_send_ep11_cprb(&urb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
__func__, (int)card, (int)domain, rc);
@@ -1095,9 +1044,8 @@ static int ep11_cryptsingle(u16 card, u16 domain,
*outbufsize = n;
out:
- kfree(req);
- kfree(rep);
- kfree(urb);
+ free_cprbmem(req, req_pl_size, true, xflags);
+ free_cprbmem(rep, rep_pl_size, true, xflags);
return rc;
}
@@ -1106,7 +1054,7 @@ static int _ep11_unwrapkey(u16 card, u16 domain,
const u8 *enckey, size_t enckeysize,
u32 mech, const u8 *iv,
u32 keybitsize, u32 keygenflags,
- u8 *keybuf, size_t *keybufsize)
+ u8 *keybuf, size_t *keybufsize, u32 xflags)
{
struct uw_req_pl {
struct pl_head head;
@@ -1143,7 +1091,7 @@ static int _ep11_unwrapkey(u16 card, u16 domain,
struct ep11_cprb *req = NULL, *rep = NULL;
size_t req_pl_size, pinblob_size = 0;
struct ep11_target_dev target;
- struct ep11_urb *urb = NULL;
+ struct ep11_urb urb;
int api, rc = -ENOMEM;
u8 *p;
@@ -1161,7 +1109,7 @@ static int _ep11_unwrapkey(u16 card, u16 domain,
req_pl_size = sizeof(struct uw_req_pl) + (iv ? 16 : 0)
+ ASN1TAGLEN(keksize) + ASN1TAGLEN(0)
+ ASN1TAGLEN(pinblob_size) + ASN1TAGLEN(enckeysize);
- req = alloc_cprb(req_pl_size);
+ req = alloc_cprbmem(req_pl_size, xflags);
if (!req)
goto out;
req_pl = (struct uw_req_pl *)(((u8 *)req) + sizeof(*req));
@@ -1197,22 +1145,19 @@ static int _ep11_unwrapkey(u16 card, u16 domain,
p += asn1tag_write(p, 0x04, enckey, enckeysize);
/* reply cprb and payload */
- rep = alloc_cprb(sizeof(struct uw_rep_pl));
+ rep = alloc_cprbmem(sizeof(struct uw_rep_pl), xflags);
if (!rep)
goto out;
rep_pl = (struct uw_rep_pl *)(((u8 *)rep) + sizeof(*rep));
/* urb and target */
- urb = kmalloc(sizeof(*urb), GFP_KERNEL);
- if (!urb)
- goto out;
target.ap_id = card;
target.dom_id = domain;
- prep_urb(urb, &target, 1,
+ prep_urb(&urb, &target, 1,
req, sizeof(*req) + req_pl_size,
rep, sizeof(*rep) + sizeof(*rep_pl));
- rc = zcrypt_send_ep11_cprb(urb);
+ rc = zcrypt_send_ep11_cprb(&urb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
__func__, (int)card, (int)domain, rc);
@@ -1245,9 +1190,8 @@ static int _ep11_unwrapkey(u16 card, u16 domain,
*keybufsize = rep_pl->data_len;
out:
- kfree(req);
- kfree(rep);
- kfree(urb);
+ free_cprbmem(req, req_pl_size, true, xflags);
+ free_cprbmem(rep, sizeof(struct uw_rep_pl), true, xflags);
return rc;
}
@@ -1257,7 +1201,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
u32 mech, const u8 *iv,
u32 keybitsize, u32 keygenflags,
u8 *keybuf, u32 *keybufsize,
- u8 keybufver)
+ u8 keybufver, u32 xflags)
{
struct ep11kblob_header *hdr;
size_t hdr_size, pl_size;
@@ -1271,7 +1215,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
rc = _ep11_unwrapkey(card, domain, kek, keksize, enckey, enckeysize,
mech, iv, keybitsize, keygenflags,
- pl, &pl_size);
+ pl, &pl_size, xflags);
if (rc)
return rc;
@@ -1290,7 +1234,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
static int _ep11_wrapkey(u16 card, u16 domain,
const u8 *key, size_t keysize,
u32 mech, const u8 *iv,
- u8 *databuf, size_t *datasize)
+ u8 *databuf, size_t *datasize, u32 xflags)
{
struct wk_req_pl {
struct pl_head head;
@@ -1319,7 +1263,7 @@ static int _ep11_wrapkey(u16 card, u16 domain,
} __packed * rep_pl;
struct ep11_cprb *req = NULL, *rep = NULL;
struct ep11_target_dev target;
- struct ep11_urb *urb = NULL;
+ struct ep11_urb urb;
size_t req_pl_size;
int api, rc = -ENOMEM;
u8 *p;
@@ -1327,7 +1271,7 @@ static int _ep11_wrapkey(u16 card, u16 domain,
/* request cprb and payload */
req_pl_size = sizeof(struct wk_req_pl) + (iv ? 16 : 0)
+ ASN1TAGLEN(keysize) + 4;
- req = alloc_cprb(req_pl_size);
+ req = alloc_cprbmem(req_pl_size, xflags);
if (!req)
goto out;
if (!mech || mech == 0x80060001)
@@ -1357,22 +1301,19 @@ static int _ep11_wrapkey(u16 card, u16 domain,
*p++ = 0;
/* reply cprb and payload */
- rep = alloc_cprb(sizeof(struct wk_rep_pl));
+ rep = alloc_cprbmem(sizeof(struct wk_rep_pl), xflags);
if (!rep)
goto out;
rep_pl = (struct wk_rep_pl *)(((u8 *)rep) + sizeof(*rep));
/* urb and target */
- urb = kmalloc(sizeof(*urb), GFP_KERNEL);
- if (!urb)
- goto out;
target.ap_id = card;
target.dom_id = domain;
- prep_urb(urb, &target, 1,
+ prep_urb(&urb, &target, 1,
req, sizeof(*req) + req_pl_size,
rep, sizeof(*rep) + sizeof(*rep_pl));
- rc = zcrypt_send_ep11_cprb(urb);
+ rc = zcrypt_send_ep11_cprb(&urb, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
__func__, (int)card, (int)domain, rc);
@@ -1405,18 +1346,18 @@ static int _ep11_wrapkey(u16 card, u16 domain,
*datasize = rep_pl->data_len;
out:
- kfree(req);
- kfree(rep);
- kfree(urb);
+ free_cprbmem(req, req_pl_size, true, xflags);
+ free_cprbmem(rep, sizeof(struct wk_rep_pl), true, xflags);
return rc;
}
int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
const u8 *clrkey, u8 *keybuf, u32 *keybufsize,
- u32 keytype)
+ u32 keytype, u32 xflags)
{
int rc;
- u8 encbuf[64], *kek = NULL;
+ void *mem;
+ u8 encbuf[64], *kek;
size_t clrkeylen, keklen, encbuflen = sizeof(encbuf);
if (keybitsize == 128 || keybitsize == 192 || keybitsize == 256) {
@@ -1427,18 +1368,24 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
return -EINVAL;
}
- /* allocate memory for the temp kek */
+ /*
+ * Allocate space for the temp kek.
+ * Also we only need up to MAXEP11AESKEYBLOBSIZE bytes for this
+ * we use the already existing cprb mempool to solve this
+ * short term memory requirement.
+ */
+ mem = (xflags & ZCRYPT_XFLAG_NOMEMALLOC) ?
+ mempool_alloc_preallocated(cprb_mempool) :
+ mempool_alloc(cprb_mempool, GFP_KERNEL);
+ if (!mem)
+ return -ENOMEM;
+ kek = (u8 *)mem;
keklen = MAXEP11AESKEYBLOBSIZE;
- kek = kmalloc(keklen, GFP_ATOMIC);
- if (!kek) {
- rc = -ENOMEM;
- goto out;
- }
/* Step 1: generate AES 256 bit random kek key */
rc = _ep11_genaeskey(card, domain, 256,
0x00006c00, /* EN/DECRYPT, WRAP/UNWRAP */
- kek, &keklen);
+ kek, &keklen, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s generate kek key failed, rc=%d\n",
__func__, rc);
@@ -1447,7 +1394,7 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
/* Step 2: encrypt clear key value with the kek key */
rc = ep11_cryptsingle(card, domain, 0, 0, def_iv, kek, keklen,
- clrkey, clrkeylen, encbuf, &encbuflen);
+ clrkey, clrkeylen, encbuf, &encbuflen, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s encrypting key value with kek key failed, rc=%d\n",
__func__, rc);
@@ -1457,22 +1404,23 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
/* Step 3: import the encrypted key value as a new key */
rc = ep11_unwrapkey(card, domain, kek, keklen,
encbuf, encbuflen, 0, def_iv,
- keybitsize, 0, keybuf, keybufsize, keytype);
+ keybitsize, 0, keybuf, keybufsize, keytype, xflags);
if (rc) {
- ZCRYPT_DBF_ERR("%s importing key value as new key failed,, rc=%d\n",
+ ZCRYPT_DBF_ERR("%s importing key value as new key failed, rc=%d\n",
__func__, rc);
goto out;
}
out:
- kfree(kek);
+ mempool_free(mem, cprb_mempool);
return rc;
}
EXPORT_SYMBOL(ep11_clr2keyblob);
int ep11_kblob2protkey(u16 card, u16 dom,
const u8 *keyblob, u32 keybloblen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype)
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 xflags)
{
struct ep11kblob_header *hdr;
struct ep11keyblob *key;
@@ -1498,15 +1446,29 @@ int ep11_kblob2protkey(u16 card, u16 dom,
}
/* !!! hdr is no longer a valid header !!! */
- /* alloc temp working buffer */
+ /* need a temp working buffer */
wkbuflen = (keylen + AES_BLOCK_SIZE) & (~(AES_BLOCK_SIZE - 1));
- wkbuf = kmalloc(wkbuflen, GFP_ATOMIC);
- if (!wkbuf)
- return -ENOMEM;
+ if (wkbuflen > CPRB_MEMPOOL_ITEM_SIZE) {
+ /* this should never happen */
+ rc = -ENOMEM;
+ ZCRYPT_DBF_WARN("%s wkbuflen %d > cprb mempool item size %d, rc=%d\n",
+ __func__, (int)wkbuflen, CPRB_MEMPOOL_ITEM_SIZE, rc);
+ return rc;
+ }
+ /* use the cprb mempool to satisfy this short term mem allocation */
+ wkbuf = (xflags & ZCRYPT_XFLAG_NOMEMALLOC) ?
+ mempool_alloc_preallocated(cprb_mempool) :
+ mempool_alloc(cprb_mempool, GFP_ATOMIC);
+ if (!wkbuf) {
+ rc = -ENOMEM;
+ ZCRYPT_DBF_WARN("%s allocating tmp buffer via cprb mempool failed, rc=%d\n",
+ __func__, rc);
+ return rc;
+ }
/* ep11 secure key -> protected key + info */
rc = _ep11_wrapkey(card, dom, (u8 *)key, keylen,
- 0, def_iv, wkbuf, &wkbuflen);
+ 0, def_iv, wkbuf, &wkbuflen, xflags);
if (rc) {
ZCRYPT_DBF_ERR("%s rewrapping ep11 key to pkey failed, rc=%d\n",
__func__, rc);
@@ -1573,37 +1535,32 @@ int ep11_kblob2protkey(u16 card, u16 dom,
*protkeylen = wki->pkeysize;
out:
- kfree(wkbuf);
+ mempool_free(wkbuf, cprb_mempool);
return rc;
}
EXPORT_SYMBOL(ep11_kblob2protkey);
-int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
- int minhwtype, int minapi, const u8 *wkvp)
+int ep11_findcard2(u32 *apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
+ int minhwtype, int minapi, const u8 *wkvp, u32 xflags)
{
struct zcrypt_device_status_ext *device_status;
- u32 *_apqns = NULL, _nr_apqns = 0;
- int i, card, dom, rc = -ENOMEM;
struct ep11_domain_info edi;
struct ep11_card_info eci;
+ u32 _nr_apqns = 0;
+ int i, card, dom;
- /* fetch status of all crypto cards */
- device_status = kvcalloc(MAX_ZDEV_ENTRIES_EXT,
- sizeof(struct zcrypt_device_status_ext),
- GFP_KERNEL);
- if (!device_status)
- return -ENOMEM;
- zcrypt_device_status_mask_ext(device_status);
+ /* occupy the device status memory */
+ mutex_lock(&dev_status_mem_mutex);
+ memset(dev_status_mem, 0, ZCRYPT_DEV_STATUS_EXT_SIZE);
+ device_status = (struct zcrypt_device_status_ext *)dev_status_mem;
- /* allocate 1k space for up to 256 apqns */
- _apqns = kmalloc_array(256, sizeof(u32), GFP_KERNEL);
- if (!_apqns) {
- kvfree(device_status);
- return -ENOMEM;
- }
+ /* fetch crypto device status into this struct */
+ zcrypt_device_status_mask_ext(device_status,
+ ZCRYPT_DEV_STATUS_CARD_MAX,
+ ZCRYPT_DEV_STATUS_QUEUE_MAX);
/* walk through all the crypto apqnss */
- for (i = 0; i < MAX_ZDEV_ENTRIES_EXT; i++) {
+ for (i = 0; i < ZCRYPT_DEV_STATUS_ENTRIES; i++) {
card = AP_QID_CARD(device_status[i].qid);
dom = AP_QID_QUEUE(device_status[i].qid);
/* check online state */
@@ -1623,14 +1580,14 @@ int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
continue;
/* check min api version if given */
if (minapi > 0) {
- if (ep11_get_card_info(card, &eci, 0))
+ if (ep11_get_card_info(card, &eci, xflags))
continue;
if (minapi > eci.API_ord_nr)
continue;
}
/* check wkvp if given */
if (wkvp) {
- if (ep11_get_domain_info(card, dom, &edi))
+ if (ep11_get_domain_info(card, dom, &edi, xflags))
continue;
if (edi.cur_wk_state != '1')
continue;
@@ -1638,27 +1595,40 @@ int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
continue;
}
/* apqn passed all filtering criterons, add to the array */
- if (_nr_apqns < 256)
- _apqns[_nr_apqns++] = (((u16)card) << 16) | ((u16)dom);
+ if (_nr_apqns < *nr_apqns)
+ apqns[_nr_apqns++] = (((u16)card) << 16) | ((u16)dom);
}
- /* nothing found ? */
- if (!_nr_apqns) {
- kfree(_apqns);
- rc = -ENODEV;
- } else {
- /* no re-allocation, simple return the _apqns array */
- *apqns = _apqns;
- *nr_apqns = _nr_apqns;
- rc = 0;
- }
+ *nr_apqns = _nr_apqns;
- kvfree(device_status);
- return rc;
+ mutex_unlock(&dev_status_mem_mutex);
+
+ return _nr_apqns ? 0 : -ENODEV;
}
EXPORT_SYMBOL(ep11_findcard2);
-void __exit zcrypt_ep11misc_exit(void)
+int __init zcrypt_ep11misc_init(void)
+{
+ /* Pre-allocate a small memory pool for ep11 cprbs. */
+ cprb_mempool = mempool_create_kmalloc_pool(2 * zcrypt_mempool_threshold,
+ CPRB_MEMPOOL_ITEM_SIZE);
+ if (!cprb_mempool)
+ return -ENOMEM;
+
+ /* Pre-allocate one crypto status card struct used in ep11_findcard2() */
+ dev_status_mem = kvmalloc(ZCRYPT_DEV_STATUS_EXT_SIZE, GFP_KERNEL);
+ if (!dev_status_mem) {
+ mempool_destroy(cprb_mempool);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void zcrypt_ep11misc_exit(void)
{
- card_cache_free();
+ mutex_lock(&dev_status_mem_mutex);
+ kvfree(dev_status_mem);
+ mutex_unlock(&dev_status_mem_mutex);
+ mempool_destroy(cprb_mempool);
}
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h
index 9f1bdffdec68..b5e6fd861815 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.h
+++ b/drivers/s390/crypto/zcrypt_ep11misc.h
@@ -104,25 +104,26 @@ struct ep11_domain_info {
/*
* Provide information about an EP11 card.
*/
-int ep11_get_card_info(u16 card, struct ep11_card_info *info, int verify);
+int ep11_get_card_info(u16 card, struct ep11_card_info *info, u32 xflags);
/*
* Provide information about a domain within an EP11 card.
*/
-int ep11_get_domain_info(u16 card, u16 domain, struct ep11_domain_info *info);
+int ep11_get_domain_info(u16 card, u16 domain,
+ struct ep11_domain_info *info, u32 xflags);
/*
* Generate (random) EP11 AES secure key.
*/
int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
- u8 *keybuf, u32 *keybufsize, u32 keybufver);
+ u8 *keybuf, u32 *keybufsize, u32 keybufver, u32 xflags);
/*
* Generate EP11 AES secure key with given clear key value.
*/
int ep11_clr2keyblob(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
const u8 *clrkey, u8 *keybuf, u32 *keybufsize,
- u32 keytype);
+ u32 keytype, u32 xflags);
/*
* Build a list of ep11 apqns meeting the following constrains:
@@ -136,22 +137,22 @@ int ep11_clr2keyblob(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
* key for this domain. When a wkvp is given there will always be a re-fetch
* of the domain info for the potential apqn - so this triggers an request
* reply to each apqn eligible.
- * The array of apqn entries is allocated with kmalloc and returned in *apqns;
- * the number of apqns stored into the list is returned in *nr_apqns. One apqn
- * entry is simple a 32 bit value with 16 bit cardnr and 16 bit domain nr and
- * may be casted to struct pkey_apqn. The return value is either 0 for success
- * or a negative errno value. If no apqn meeting the criteria is found,
- * -ENODEV is returned.
+ * The caller should set *nr_apqns to the nr of elements available in *apqns.
+ * On return *nr_apqns is then updated with the nr of apqns filled into *apqns.
+ * The return value is either 0 for success or a negative errno value.
+ * If no apqn meeting the criteria is found, -ENODEV is returned.
*/
-int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
- int minhwtype, int minapi, const u8 *wkvp);
+int ep11_findcard2(u32 *apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
+ int minhwtype, int minapi, const u8 *wkvp, u32 xflags);
/*
* Derive proteced key from EP11 key blob (AES and ECC keys).
*/
int ep11_kblob2protkey(u16 card, u16 dom, const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype);
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 xflags);
+int zcrypt_ep11misc_init(void);
void zcrypt_ep11misc_exit(void);
#endif /* _ZCRYPT_EP11MISC_H_ */
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index adc65eddaa1e..fc0a2a053dc2 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -438,7 +438,7 @@ static void zcrypt_msgtype50_receive(struct ap_queue *aq,
msg->len = sizeof(error_reply);
}
out:
- complete((struct completion *)msg->private);
+ complete(&msg->response.work);
}
static atomic_t zcrypt_step = ATOMIC_INIT(0);
@@ -449,30 +449,30 @@ static atomic_t zcrypt_step = ATOMIC_INIT(0);
* @zq: pointer to zcrypt_queue structure that identifies the
* CEXxA device to the request distributor
* @mex: pointer to the modexpo request buffer
+ * This function assumes that ap_msg has been initialized with
+ * ap_init_apmsg() and thus a valid buffer with the size of
+ * ap_msg->bufsize is available within ap_msg. Also the caller has
+ * to make sure ap_release_apmsg() is always called even on failure.
*/
static long zcrypt_msgtype50_modexpo(struct zcrypt_queue *zq,
struct ica_rsa_modexpo *mex,
struct ap_message *ap_msg)
{
- struct completion work;
int rc;
- ap_msg->bufsize = MSGTYPE50_CRB3_MAX_MSG_SIZE;
- ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL);
- if (!ap_msg->msg)
- return -ENOMEM;
+ if (ap_msg->bufsize < MSGTYPE50_CRB3_MAX_MSG_SIZE)
+ return -EMSGSIZE;
ap_msg->receive = zcrypt_msgtype50_receive;
ap_msg->psmid = (((unsigned long)current->pid) << 32) +
atomic_inc_return(&zcrypt_step);
- ap_msg->private = &work;
rc = ICAMEX_msg_to_type50MEX_msg(zq, ap_msg, mex);
if (rc)
goto out;
- init_completion(&work);
+ init_completion(&ap_msg->response.work);
rc = ap_queue_message(zq->queue, ap_msg);
if (rc)
goto out;
- rc = wait_for_completion_interruptible(&work);
+ rc = wait_for_completion_interruptible(&ap_msg->response.work);
if (rc == 0) {
rc = ap_msg->rc;
if (rc == 0)
@@ -485,7 +485,6 @@ static long zcrypt_msgtype50_modexpo(struct zcrypt_queue *zq,
}
out:
- ap_msg->private = NULL;
if (rc)
pr_debug("send me cprb at dev=%02x.%04x rc=%d\n",
AP_QID_CARD(zq->queue->qid),
@@ -499,30 +498,30 @@ out:
* @zq: pointer to zcrypt_queue structure that identifies the
* CEXxA device to the request distributor
* @crt: pointer to the modexpoc_crt request buffer
+ * This function assumes that ap_msg has been initialized with
+ * ap_init_apmsg() and thus a valid buffer with the size of
+ * ap_msg->bufsize is available within ap_msg. Also the caller has
+ * to make sure ap_release_apmsg() is always called even on failure.
*/
static long zcrypt_msgtype50_modexpo_crt(struct zcrypt_queue *zq,
struct ica_rsa_modexpo_crt *crt,
struct ap_message *ap_msg)
{
- struct completion work;
int rc;
- ap_msg->bufsize = MSGTYPE50_CRB3_MAX_MSG_SIZE;
- ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL);
- if (!ap_msg->msg)
- return -ENOMEM;
+ if (ap_msg->bufsize < MSGTYPE50_CRB3_MAX_MSG_SIZE)
+ return -EMSGSIZE;
ap_msg->receive = zcrypt_msgtype50_receive;
ap_msg->psmid = (((unsigned long)current->pid) << 32) +
atomic_inc_return(&zcrypt_step);
- ap_msg->private = &work;
rc = ICACRT_msg_to_type50CRT_msg(zq, ap_msg, crt);
if (rc)
goto out;
- init_completion(&work);
+ init_completion(&ap_msg->response.work);
rc = ap_queue_message(zq->queue, ap_msg);
if (rc)
goto out;
- rc = wait_for_completion_interruptible(&work);
+ rc = wait_for_completion_interruptible(&ap_msg->response.work);
if (rc == 0) {
rc = ap_msg->rc;
if (rc == 0)
@@ -535,7 +534,6 @@ static long zcrypt_msgtype50_modexpo_crt(struct zcrypt_queue *zq,
}
out:
- ap_msg->private = NULL;
if (rc)
pr_debug("send crt cprb at dev=%02x.%04x rc=%d\n",
AP_QID_CARD(zq->queue->qid),
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index b64c9d9fc613..9cefbb30960f 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -31,11 +31,6 @@
#define CEIL4(x) ((((x) + 3) / 4) * 4)
-struct response_type {
- struct completion work;
- int type;
-};
-
#define CEXXC_RESPONSE_TYPE_ICA 0
#define CEXXC_RESPONSE_TYPE_XCRB 1
#define CEXXC_RESPONSE_TYPE_EP11 2
@@ -856,7 +851,7 @@ static void zcrypt_msgtype6_receive(struct ap_queue *aq,
.type = TYPE82_RSP_CODE,
.reply_code = REP82_ERROR_MACHINE_FAILURE,
};
- struct response_type *resp_type = msg->private;
+ struct ap_response_type *resp_type = &msg->response;
struct type86x_reply *t86r;
int len;
@@ -920,7 +915,7 @@ static void zcrypt_msgtype6_receive_ep11(struct ap_queue *aq,
.type = TYPE82_RSP_CODE,
.reply_code = REP82_ERROR_MACHINE_FAILURE,
};
- struct response_type *resp_type = msg->private;
+ struct ap_response_type *resp_type = &msg->response;
struct type86_ep11_reply *t86r;
int len;
@@ -967,9 +962,7 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq,
struct ica_rsa_modexpo *mex,
struct ap_message *ap_msg)
{
- struct response_type resp_type = {
- .type = CEXXC_RESPONSE_TYPE_ICA,
- };
+ struct ap_response_type *resp_type = &ap_msg->response;
int rc;
ap_msg->msg = (void *)get_zeroed_page(GFP_KERNEL);
@@ -979,15 +972,15 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq,
ap_msg->receive = zcrypt_msgtype6_receive;
ap_msg->psmid = (((unsigned long)current->pid) << 32) +
atomic_inc_return(&zcrypt_step);
- ap_msg->private = &resp_type;
rc = icamex_msg_to_type6mex_msgx(zq, ap_msg, mex);
if (rc)
goto out_free;
- init_completion(&resp_type.work);
+ resp_type->type = CEXXC_RESPONSE_TYPE_ICA;
+ init_completion(&resp_type->work);
rc = ap_queue_message(zq->queue, ap_msg);
if (rc)
goto out_free;
- rc = wait_for_completion_interruptible(&resp_type.work);
+ rc = wait_for_completion_interruptible(&resp_type->work);
if (rc == 0) {
rc = ap_msg->rc;
if (rc == 0)
@@ -1001,7 +994,6 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq,
out_free:
free_page((unsigned long)ap_msg->msg);
- ap_msg->private = NULL;
ap_msg->msg = NULL;
return rc;
}
@@ -1017,9 +1009,7 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq,
struct ica_rsa_modexpo_crt *crt,
struct ap_message *ap_msg)
{
- struct response_type resp_type = {
- .type = CEXXC_RESPONSE_TYPE_ICA,
- };
+ struct ap_response_type *resp_type = &ap_msg->response;
int rc;
ap_msg->msg = (void *)get_zeroed_page(GFP_KERNEL);
@@ -1029,15 +1019,15 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq,
ap_msg->receive = zcrypt_msgtype6_receive;
ap_msg->psmid = (((unsigned long)current->pid) << 32) +
atomic_inc_return(&zcrypt_step);
- ap_msg->private = &resp_type;
rc = icacrt_msg_to_type6crt_msgx(zq, ap_msg, crt);
if (rc)
goto out_free;
- init_completion(&resp_type.work);
+ resp_type->type = CEXXC_RESPONSE_TYPE_ICA;
+ init_completion(&resp_type->work);
rc = ap_queue_message(zq->queue, ap_msg);
if (rc)
goto out_free;
- rc = wait_for_completion_interruptible(&resp_type.work);
+ rc = wait_for_completion_interruptible(&resp_type->work);
if (rc == 0) {
rc = ap_msg->rc;
if (rc == 0)
@@ -1051,7 +1041,6 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq,
out_free:
free_page((unsigned long)ap_msg->msg);
- ap_msg->private = NULL;
ap_msg->msg = NULL;
return rc;
}
@@ -1061,28 +1050,21 @@ out_free:
* Prepare a CCA AP msg: fetch the required data from userspace,
* prepare the AP msg, fill some info into the ap_message struct,
* extract some data from the CPRB and give back to the caller.
- * This function allocates memory and needs an ap_msg prepared
- * by the caller with ap_init_message(). Also the caller has to
- * make sure ap_release_message() is always called even on failure.
+ * This function assumes that ap_msg has been initialized with
+ * ap_init_apmsg() and thus a valid buffer with the size of
+ * ap_msg->bufsize is available within ap_msg. Also the caller has
+ * to make sure ap_release_apmsg() is always called even on failure.
*/
int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcrb,
struct ap_message *ap_msg,
unsigned int *func_code, unsigned short **dom)
{
- struct response_type resp_type = {
- .type = CEXXC_RESPONSE_TYPE_XCRB,
- };
+ struct ap_response_type *resp_type = &ap_msg->response;
- ap_msg->bufsize = atomic_read(&ap_max_msg_size);
- ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL);
- if (!ap_msg->msg)
- return -ENOMEM;
ap_msg->receive = zcrypt_msgtype6_receive;
ap_msg->psmid = (((unsigned long)current->pid) << 32) +
atomic_inc_return(&zcrypt_step);
- ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL);
- if (!ap_msg->private)
- return -ENOMEM;
+ resp_type->type = CEXXC_RESPONSE_TYPE_XCRB;
return xcrb_msg_to_type6cprb_msgx(userspace, ap_msg, xcrb, func_code, dom);
}
@@ -1097,7 +1079,7 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq,
struct ica_xcRB *xcrb,
struct ap_message *ap_msg)
{
- struct response_type *rtype = ap_msg->private;
+ struct ap_response_type *resp_type = &ap_msg->response;
struct {
struct type6_hdr hdr;
struct CPRBX cprbx;
@@ -1128,11 +1110,11 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq,
msg->hdr.fromcardlen1 -= delta;
}
- init_completion(&rtype->work);
+ init_completion(&resp_type->work);
rc = ap_queue_message(zq->queue, ap_msg);
if (rc)
goto out;
- rc = wait_for_completion_interruptible(&rtype->work);
+ rc = wait_for_completion_interruptible(&resp_type->work);
if (rc == 0) {
rc = ap_msg->rc;
if (rc == 0)
@@ -1158,28 +1140,21 @@ out:
* Prepare an EP11 AP msg: fetch the required data from userspace,
* prepare the AP msg, fill some info into the ap_message struct,
* extract some data from the CPRB and give back to the caller.
- * This function allocates memory and needs an ap_msg prepared
- * by the caller with ap_init_message(). Also the caller has to
- * make sure ap_release_message() is always called even on failure.
+ * This function assumes that ap_msg has been initialized with
+ * ap_init_apmsg() and thus a valid buffer with the size of
+ * ap_msg->bufsize is available within ap_msg. Also the caller has
+ * to make sure ap_release_apmsg() is always called even on failure.
*/
int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb,
struct ap_message *ap_msg,
unsigned int *func_code, unsigned int *domain)
{
- struct response_type resp_type = {
- .type = CEXXC_RESPONSE_TYPE_EP11,
- };
+ struct ap_response_type *resp_type = &ap_msg->response;
- ap_msg->bufsize = atomic_read(&ap_max_msg_size);
- ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL);
- if (!ap_msg->msg)
- return -ENOMEM;
ap_msg->receive = zcrypt_msgtype6_receive_ep11;
ap_msg->psmid = (((unsigned long)current->pid) << 32) +
atomic_inc_return(&zcrypt_step);
- ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL);
- if (!ap_msg->private)
- return -ENOMEM;
+ resp_type->type = CEXXC_RESPONSE_TYPE_EP11;
return xcrb_msg_to_type6_ep11cprb_msgx(userspace, ap_msg, xcrb,
func_code, domain);
}
@@ -1197,7 +1172,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue *
{
int rc;
unsigned int lfmt;
- struct response_type *rtype = ap_msg->private;
+ struct ap_response_type *resp_type = &ap_msg->response;
struct {
struct type6_hdr hdr;
struct ep11_cprb cprbx;
@@ -1251,11 +1226,11 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue *
msg->hdr.fromcardlen1 = zq->reply.bufsize -
sizeof(struct type86_hdr) - sizeof(struct type86_fmt2_ext);
- init_completion(&rtype->work);
+ init_completion(&resp_type->work);
rc = ap_queue_message(zq->queue, ap_msg);
if (rc)
goto out;
- rc = wait_for_completion_interruptible(&rtype->work);
+ rc = wait_for_completion_interruptible(&resp_type->work);
if (rc == 0) {
rc = ap_msg->rc;
if (rc == 0)
@@ -1276,23 +1251,25 @@ out:
return rc;
}
+/*
+ * Prepare a CEXXC get random request ap message.
+ * This function assumes that ap_msg has been initialized with
+ * ap_init_apmsg() and thus a valid buffer with the size of
+ * ap_max_msg_size is available within ap_msg. Also the caller has
+ * to make sure ap_release_apmsg() is always called even on failure.
+ */
int prep_rng_ap_msg(struct ap_message *ap_msg, int *func_code,
unsigned int *domain)
{
- struct response_type resp_type = {
- .type = CEXXC_RESPONSE_TYPE_XCRB,
- };
+ struct ap_response_type *resp_type = &ap_msg->response;
- ap_msg->bufsize = AP_DEFAULT_MAX_MSG_SIZE;
- ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL);
- if (!ap_msg->msg)
- return -ENOMEM;
+ if (ap_msg->bufsize < AP_DEFAULT_MAX_MSG_SIZE)
+ return -EMSGSIZE;
ap_msg->receive = zcrypt_msgtype6_receive;
ap_msg->psmid = (((unsigned long)current->pid) << 32) +
atomic_inc_return(&zcrypt_step);
- ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL);
- if (!ap_msg->private)
- return -ENOMEM;
+
+ resp_type->type = CEXXC_RESPONSE_TYPE_XCRB;
rng_type6cprb_msgx(ap_msg, ZCRYPT_RNG_BUFFER_SIZE, domain);
@@ -1319,16 +1296,16 @@ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq,
short int verb_length;
short int key_length;
} __packed * msg = ap_msg->msg;
- struct response_type *rtype = ap_msg->private;
+ struct ap_response_type *resp_type = &ap_msg->response;
int rc;
msg->cprbx.domain = AP_QID_QUEUE(zq->queue->qid);
- init_completion(&rtype->work);
+ init_completion(&resp_type->work);
rc = ap_queue_message(zq->queue, ap_msg);
if (rc)
goto out;
- rc = wait_for_completion_interruptible(&rtype->work);
+ rc = wait_for_completion_interruptible(&resp_type->work);
if (rc == 0) {
rc = ap_msg->rc;
if (rc == 0)
diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c
index 9e580ef69bda..aaa1eea6149b 100644
--- a/drivers/s390/net/ctcm_mpc.c
+++ b/drivers/s390/net/ctcm_mpc.c
@@ -179,7 +179,7 @@ void ctcmpc_dumpit(char *buf, int len)
ctcm_pr_debug(" %s (+%s) : %s [%s]\n",
addr, boff, bhex, basc);
dup = 0;
- strcpy(duphex, bhex);
+ strscpy(duphex, bhex);
} else
dup++;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 5a3c670aec27..5522310bab8d 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -403,6 +403,7 @@ config SCSI_ACARD
config SCSI_AHA152X
tristate "Adaptec AHA152X/2825 support"
depends on ISA && SCSI
+ depends on !HIGHMEM
select SCSI_SPI_ATTRS
select CHECK_SIGNATURE
help
@@ -795,6 +796,7 @@ config SCSI_PPA
tristate "IOMEGA parallel port (ppa - older drives)"
depends on SCSI && PARPORT_PC
depends on HAS_IOPORT
+ depends on !HIGHMEM
help
This driver supports older versions of IOMEGA's parallel port ZIP
drive (a 100 MB removable media device).
@@ -822,6 +824,7 @@ config SCSI_PPA
config SCSI_IMM
tristate "IOMEGA parallel port (imm - newer drives)"
depends on SCSI && PARPORT_PC
+ depends on !HIGHMEM
help
This driver supports newer versions of IOMEGA's parallel port ZIP
drive (a 100 MB removable media device).
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index 4276f868cd91..e94c0a19c435 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -746,7 +746,6 @@ struct Scsi_Host *aha152x_probe_one(struct aha152x_setup *setup)
/* need to have host registered before triggering any interrupt */
list_add_tail(&HOSTDATA(shpnt)->host_list, &aha152x_host_list);
- shpnt->no_highmem = true;
shpnt->io_port = setup->io_port;
shpnt->n_io_port = IO_RANGE;
shpnt->irq = setup->irq;
diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c
index 1d4c7310f1a6..0821cf994b98 100644
--- a/drivers/scsi/imm.c
+++ b/drivers/scsi/imm.c
@@ -1224,7 +1224,6 @@ static int __imm_attach(struct parport *pb)
host = scsi_host_alloc(&imm_template, sizeof(imm_struct *));
if (!host)
goto out1;
- host->no_highmem = true;
host->io_port = pb->base;
host->n_io_port = ports;
host->dma_channel = -1;
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index d533a8aa72cc..b75f46c30759 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -3952,7 +3952,7 @@ megaraid_sysfs_get_ldmap(adapter_t *adapter)
timer_delete_sync(&timeout.timer);
- destroy_timer_on_stack(&timeout.timer);
+ timer_destroy_on_stack(&timeout.timer);
mutex_unlock(&raid_dev->sysfs_mtx);
diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c
index 1f2cd15e3361..fd7fa7640a5e 100644
--- a/drivers/scsi/megaraid/megaraid_mm.c
+++ b/drivers/scsi/megaraid/megaraid_mm.c
@@ -704,7 +704,7 @@ lld_ioctl(mraid_mmadp_t *adp, uioc_t *kioc)
wait_event(wait_q, (kioc->status != -ENODATA));
if (timeout.timer.function) {
timer_delete_sync(&timeout.timer);
- destroy_timer_on_stack(&timeout.timer);
+ timer_destroy_on_stack(&timeout.timer);
}
/*
diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c
index a06329b47851..1ed3171f1797 100644
--- a/drivers/scsi/ppa.c
+++ b/drivers/scsi/ppa.c
@@ -1104,7 +1104,6 @@ static int __ppa_attach(struct parport *pb)
host = scsi_host_alloc(&ppa_template, sizeof(ppa_struct *));
if (!host)
goto out1;
- host->no_highmem = true;
host->io_port = pb->base;
host->n_io_port = ports;
host->dma_channel = -1;
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index 2fa45556e1ea..0ddc95bafc71 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -601,7 +601,7 @@ static int sg_scsi_ioctl(struct request_queue *q, bool open_for_write,
}
if (bytes) {
- err = blk_rq_map_kern(q, rq, buffer, bytes, GFP_NOIO);
+ err = blk_rq_map_kern(rq, buffer, bytes, GFP_NOIO);
if (err)
goto error;
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1b43013d72c0..144c72f0737a 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -313,8 +313,7 @@ retry:
return PTR_ERR(req);
if (bufflen) {
- ret = blk_rq_map_kern(sdev->request_queue, req,
- buffer, bufflen, GFP_NOIO);
+ ret = blk_rq_map_kern(req, buffer, bufflen, GFP_NOIO);
if (ret)
goto out;
}
@@ -2004,9 +2003,6 @@ void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim)
lim->dma_alignment = max_t(unsigned int,
shost->dma_alignment, dma_get_cache_alignment() - 1);
- if (shost->no_highmem)
- lim->features |= BLK_FEAT_BOUNCE_HIGH;
-
/*
* Propagate the DMA formation properties to the dma-mapping layer as
* a courtesy service to the LLDDs. This needs to check that the buses
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 7a447ff600d2..a8db66428f80 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -169,6 +169,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
unsigned int nr_zones, size_t *buflen)
{
struct request_queue *q = sdkp->disk->queue;
+ unsigned int max_segments;
size_t bufsize;
void *buf;
@@ -180,12 +181,15 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
* Furthermore, since the report zone command cannot be split, make
* sure that the allocated buffer can always be mapped by limiting the
* number of pages allocated to the HBA max segments limit.
+ * Since max segments can be larger than the max inline bio vectors,
+ * further limit the allocated buffer to BIO_MAX_INLINE_VECS.
*/
nr_zones = min(nr_zones, sdkp->zone_info.nr_zones);
bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE);
bufsize = min_t(size_t, bufsize,
queue_max_hw_sectors(q) << SECTOR_SHIFT);
- bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
+ max_segments = min(BIO_MAX_INLINE_VECS, queue_max_segments(q));
+ bufsize = min_t(size_t, bufsize, max_segments << PAGE_SHIFT);
while (bufsize >= SECTOR_SIZE) {
buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 35db061ae3ec..2e6b2412d2c9 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1819,6 +1819,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
return SCSI_MLQUEUE_DEVICE_BUSY;
}
+ payload->rangecount = 1;
payload->range.len = length;
payload->range.offset = offset_in_hvpg;
diff --git a/drivers/sh/intc/irqdomain.c b/drivers/sh/intc/irqdomain.c
index 3968f1c3c5c3..ed7a570ffdf2 100644
--- a/drivers/sh/intc/irqdomain.c
+++ b/drivers/sh/intc/irqdomain.c
@@ -59,10 +59,9 @@ void __init intc_irq_domain_init(struct intc_desc_int *d,
* tree penalty for linear cases with non-zero hwirq bases.
*/
if (irq_base == 0 && irq_end == (irq_base + hw->nr_vectors - 1))
- d->domain = irq_domain_add_linear(NULL, hw->nr_vectors,
- &intc_evt_ops, NULL);
+ d->domain = irq_domain_create_linear(NULL, hw->nr_vectors, &intc_evt_ops, NULL);
else
- d->domain = irq_domain_add_tree(NULL, &intc_evt_ops, NULL);
+ d->domain = irq_domain_create_tree(NULL, &intc_evt_ops, NULL);
BUG_ON(!d->domain);
}
diff --git a/drivers/soc/dove/pmu.c b/drivers/soc/dove/pmu.c
index 6202dbcd20a8..7bbd3f940e4d 100644
--- a/drivers/soc/dove/pmu.c
+++ b/drivers/soc/dove/pmu.c
@@ -257,10 +257,9 @@ static void pmu_irq_handler(struct irq_desc *desc)
* So, let's structure the code so that the window is as small as
* possible.
*/
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
done &= readl_relaxed(base + PMC_IRQ_CAUSE);
writel_relaxed(done, base + PMC_IRQ_CAUSE);
- irq_gc_unlock(gc);
}
static int __init dove_init_pmu_irq(struct pmu_data *pmu, int irq)
@@ -274,8 +273,8 @@ static int __init dove_init_pmu_irq(struct pmu_data *pmu, int irq)
writel(0, pmu->pmc_base + PMC_IRQ_MASK);
writel(0, pmu->pmc_base + PMC_IRQ_CAUSE);
- domain = irq_domain_add_linear(pmu->of_node, NR_PMU_IRQS,
- &irq_generic_chip_ops, NULL);
+ domain = irq_domain_create_linear(of_fwnode_handle(pmu->of_node), NR_PMU_IRQS,
+ &irq_generic_chip_ops, NULL);
if (!domain) {
pr_err("%s: unable to add irq domain\n", name);
return -ENOMEM;
diff --git a/drivers/soc/fsl/qe/qe_ic.c b/drivers/soc/fsl/qe/qe_ic.c
index 77bf0e83ffcc..e4b6ff2cc76b 100644
--- a/drivers/soc/fsl/qe/qe_ic.c
+++ b/drivers/soc/fsl/qe/qe_ic.c
@@ -446,8 +446,8 @@ static int qe_ic_init(struct platform_device *pdev)
high_handler = NULL;
}
- qe_ic->irqhost = irq_domain_add_linear(node, NR_QE_IC_INTS,
- &qe_ic_host_ops, qe_ic);
+ qe_ic->irqhost = irq_domain_create_linear(of_fwnode_handle(node), NR_QE_IC_INTS,
+ &qe_ic_host_ops, qe_ic);
if (qe_ic->irqhost == NULL) {
dev_err(dev, "failed to add irq domain\n");
return -ENODEV;
diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c
index a3e88ced328a..8c8878bc87f5 100644
--- a/drivers/soc/qcom/smp2p.c
+++ b/drivers/soc/qcom/smp2p.c
@@ -399,7 +399,7 @@ static int qcom_smp2p_inbound_entry(struct qcom_smp2p *smp2p,
struct smp2p_entry *entry,
struct device_node *node)
{
- entry->domain = irq_domain_add_linear(node, 32, &smp2p_irq_ops, entry);
+ entry->domain = irq_domain_create_linear(of_fwnode_handle(node), 32, &smp2p_irq_ops, entry);
if (!entry->domain) {
dev_err(smp2p->dev, "failed to add irq_domain\n");
return -ENOMEM;
diff --git a/drivers/soc/qcom/smsm.c b/drivers/soc/qcom/smsm.c
index e803ea342c97..021e9d1f61dc 100644
--- a/drivers/soc/qcom/smsm.c
+++ b/drivers/soc/qcom/smsm.c
@@ -456,7 +456,7 @@ static int smsm_inbound_entry(struct qcom_smsm *smsm,
return ret;
}
- entry->domain = irq_domain_add_linear(node, 32, &smsm_irq_ops, entry);
+ entry->domain = irq_domain_create_linear(of_fwnode_handle(node), 32, &smsm_irq_ops, entry);
if (!entry->domain) {
dev_err(smsm->dev, "failed to add irq_domain\n");
return -ENOMEM;
diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig
index 49648cf28bd2..4990b85d7df7 100644
--- a/drivers/soc/renesas/Kconfig
+++ b/drivers/soc/renesas/Kconfig
@@ -65,17 +65,20 @@ if ARM && ARCH_RENESAS
config ARCH_EMEV2
bool "ARM32 Platform support for Emma Mobile EV2"
+ default ARCH_RENESAS
select HAVE_ARM_SCU if SMP
select SYS_SUPPORTS_EM_STI
config ARCH_R8A7794
bool "ARM32 Platform support for R-Car E2"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN2
select ARM_ERRATA_814220
select SYSC_R8A7794
config ARCH_R8A7779
bool "ARM32 Platform support for R-Car H1"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN1
select ARM_ERRATA_754322
select ARM_GLOBAL_TIMER
@@ -85,6 +88,7 @@ config ARCH_R8A7779
config ARCH_R8A7790
bool "ARM32 Platform support for R-Car H2"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN2
select ARM_ERRATA_798181 if SMP
select ARM_ERRATA_814220
@@ -93,11 +97,13 @@ config ARCH_R8A7790
config ARCH_R8A7778
bool "ARM32 Platform support for R-Car M1A"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN1
select ARM_ERRATA_754322
config ARCH_R8A7793
bool "ARM32 Platform support for R-Car M2-N"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN2
select ARM_ERRATA_798181 if SMP
select I2C
@@ -105,6 +111,7 @@ config ARCH_R8A7793
config ARCH_R8A7791
bool "ARM32 Platform support for R-Car M2-W"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN2
select ARM_ERRATA_798181 if SMP
select I2C
@@ -112,18 +119,21 @@ config ARCH_R8A7791
config ARCH_R8A7792
bool "ARM32 Platform support for R-Car V2H"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN2
select ARM_ERRATA_798181 if SMP
select SYSC_R8A7792
config ARCH_R8A7740
bool "ARM32 Platform support for R-Mobile A1"
+ default ARCH_RENESAS
select ARCH_RMOBILE
select ARM_ERRATA_754322
select RENESAS_INTC_IRQPIN
config ARCH_R8A73A4
bool "ARM32 Platform support for R-Mobile APE6"
+ default ARCH_RENESAS
select ARCH_RMOBILE
select ARM_ERRATA_798181 if SMP
select ARM_ERRATA_814220
@@ -132,6 +142,7 @@ config ARCH_R8A73A4
config ARCH_R7S72100
bool "ARM32 Platform support for RZ/A1H"
+ default ARCH_RENESAS
select ARM_ERRATA_754322
select PM
select PM_GENERIC_DOMAINS
@@ -141,6 +152,7 @@ config ARCH_R7S72100
config ARCH_R7S9210
bool "ARM32 Platform support for RZ/A2"
+ default ARCH_RENESAS
select PM
select PM_GENERIC_DOMAINS
select RENESAS_OSTM
@@ -148,18 +160,21 @@ config ARCH_R7S9210
config ARCH_R8A77470
bool "ARM32 Platform support for RZ/G1C"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN2
select ARM_ERRATA_814220
select SYSC_R8A77470
config ARCH_R8A7745
bool "ARM32 Platform support for RZ/G1E"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN2
select ARM_ERRATA_814220
select SYSC_R8A7745
config ARCH_R8A7742
bool "ARM32 Platform support for RZ/G1H"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN2
select ARM_ERRATA_798181 if SMP
select ARM_ERRATA_814220
@@ -167,23 +182,27 @@ config ARCH_R8A7742
config ARCH_R8A7743
bool "ARM32 Platform support for RZ/G1M"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN2
select ARM_ERRATA_798181 if SMP
select SYSC_R8A7743
config ARCH_R8A7744
bool "ARM32 Platform support for RZ/G1N"
+ default ARCH_RENESAS
select ARCH_RCAR_GEN2
select ARM_ERRATA_798181 if SMP
select SYSC_R8A7743
config ARCH_R9A06G032
bool "ARM32 Platform support for RZ/N1D"
+ default ARCH_RENESAS
select ARCH_RZN1
select ARM_ERRATA_814220
config ARCH_SH73A0
bool "ARM32 Platform support for SH-Mobile AG5"
+ default ARCH_RENESAS
select ARCH_RMOBILE
select ARM_ERRATA_754322
select ARM_GLOBAL_TIMER
@@ -197,6 +216,7 @@ if ARM64
config ARCH_R8A77995
bool "ARM64 Platform support for R-Car D3"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A77995
help
@@ -205,6 +225,7 @@ config ARCH_R8A77995
config ARCH_R8A77990
bool "ARM64 Platform support for R-Car E3"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A77990
help
@@ -213,6 +234,7 @@ config ARCH_R8A77990
config ARCH_R8A77951
bool "ARM64 Platform support for R-Car H3 ES2.0+"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A7795
help
@@ -222,6 +244,7 @@ config ARCH_R8A77951
config ARCH_R8A77965
bool "ARM64 Platform support for R-Car M3-N"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A77965
help
@@ -230,6 +253,7 @@ config ARCH_R8A77965
config ARCH_R8A77960
bool "ARM64 Platform support for R-Car M3-W"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A77960
help
@@ -237,6 +261,7 @@ config ARCH_R8A77960
config ARCH_R8A77961
bool "ARM64 Platform support for R-Car M3-W+"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A77961
help
@@ -245,6 +270,7 @@ config ARCH_R8A77961
config ARCH_R8A779F0
bool "ARM64 Platform support for R-Car S4-8"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN4
select SYSC_R8A779F0
help
@@ -252,6 +278,7 @@ config ARCH_R8A779F0
config ARCH_R8A77980
bool "ARM64 Platform support for R-Car V3H"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A77980
help
@@ -259,6 +286,7 @@ config ARCH_R8A77980
config ARCH_R8A77970
bool "ARM64 Platform support for R-Car V3M"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A77970
help
@@ -266,6 +294,7 @@ config ARCH_R8A77970
config ARCH_R8A779A0
bool "ARM64 Platform support for R-Car V3U"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN4
select SYSC_R8A779A0
help
@@ -273,6 +302,7 @@ config ARCH_R8A779A0
config ARCH_R8A779G0
bool "ARM64 Platform support for R-Car V4H"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN4
select SYSC_R8A779G0
help
@@ -280,6 +310,7 @@ config ARCH_R8A779G0
config ARCH_R8A779H0
bool "ARM64 Platform support for R-Car V4M"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN4
select SYSC_R8A779H0
help
@@ -287,6 +318,7 @@ config ARCH_R8A779H0
config ARCH_R8A774C0
bool "ARM64 Platform support for RZ/G2E"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A774C0
help
@@ -294,6 +326,7 @@ config ARCH_R8A774C0
config ARCH_R8A774E1
bool "ARM64 Platform support for RZ/G2H"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A774E1
help
@@ -301,6 +334,7 @@ config ARCH_R8A774E1
config ARCH_R8A774A1
bool "ARM64 Platform support for RZ/G2M"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A774A1
help
@@ -308,6 +342,7 @@ config ARCH_R8A774A1
config ARCH_R8A774B1
bool "ARM64 Platform support for RZ/G2N"
+ default y if ARCH_RENESAS
select ARCH_RCAR_GEN3
select SYSC_R8A774B1
help
@@ -315,24 +350,28 @@ config ARCH_R8A774B1
config ARCH_R9A07G043
bool "ARM64 Platform support for RZ/G2UL"
+ default y if ARCH_RENESAS
select ARCH_RZG2L
help
This enables support for the Renesas RZ/G2UL SoC variants.
config ARCH_R9A07G044
bool "ARM64 Platform support for RZ/G2L"
+ default y if ARCH_RENESAS
select ARCH_RZG2L
help
This enables support for the Renesas RZ/G2L SoC variants.
config ARCH_R9A07G054
bool "ARM64 Platform support for RZ/V2L"
+ default y if ARCH_RENESAS
select ARCH_RZG2L
help
This enables support for the Renesas RZ/V2L SoC variants.
config ARCH_R9A08G045
bool "ARM64 Platform support for RZ/G3S"
+ default y if ARCH_RENESAS
select ARCH_RZG2L
select SYSC_R9A08G045
help
@@ -340,6 +379,7 @@ config ARCH_R9A08G045
config ARCH_R9A09G011
bool "ARM64 Platform support for RZ/V2M"
+ default y if ARCH_RENESAS
select PM
select PM_GENERIC_DOMAINS
select PWC_RZV2M
@@ -348,12 +388,14 @@ config ARCH_R9A09G011
config ARCH_R9A09G047
bool "ARM64 Platform support for RZ/G3E"
+ default y if ARCH_RENESAS
select SYS_R9A09G047
help
This enables support for the Renesas RZ/G3E SoC variants.
config ARCH_R9A09G057
bool "ARM64 Platform support for RZ/V2H(P)"
+ default y if ARCH_RENESAS
select RENESAS_RZV2H_ICU
select SYS_R9A09G057
help
diff --git a/drivers/soc/samsung/exynos-usi.c b/drivers/soc/samsung/exynos-usi.c
index c5661ac19f7b..5f7bdf3bab05 100644
--- a/drivers/soc/samsung/exynos-usi.c
+++ b/drivers/soc/samsung/exynos-usi.c
@@ -233,7 +233,7 @@ static void exynos_usi_unconfigure(void *data)
/* Make sure that we've stopped providing the clock to USI IP */
val = readl(usi->regs + USI_OPTION);
val &= ~USI_OPTION_CLKREQ_ON;
- val |= ~USI_OPTION_CLKSTOP_ON;
+ val |= USI_OPTION_CLKSTOP_ON;
writel(val, usi->regs + USI_OPTION);
/* Set USI block state to reset */
diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
index 51b9d852bb6a..e0d67bfe955c 100644
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -2500,8 +2500,9 @@ static int tegra_pmc_irq_init(struct tegra_pmc *pmc)
pmc->irq.irq_set_type = pmc->soc->irq_set_type;
pmc->irq.irq_set_wake = pmc->soc->irq_set_wake;
- pmc->domain = irq_domain_add_hierarchy(parent, 0, 96, pmc->dev->of_node,
- &tegra_pmc_irq_domain_ops, pmc);
+ pmc->domain = irq_domain_create_hierarchy(parent, 0, 96,
+ of_fwnode_handle(pmc->dev->of_node),
+ &tegra_pmc_irq_domain_ops, pmc);
if (!pmc->domain) {
dev_err(pmc->dev, "failed to allocate domain\n");
return -ENOMEM;
diff --git a/drivers/soc/ti/ti_sci_inta_msi.c b/drivers/soc/ti/ti_sci_inta_msi.c
index c36364522157..193266f5e3f9 100644
--- a/drivers/soc/ti/ti_sci_inta_msi.c
+++ b/drivers/soc/ti/ti_sci_inta_msi.c
@@ -103,19 +103,15 @@ int ti_sci_inta_msi_domain_alloc_irqs(struct device *dev,
if (ret)
return ret;
- msi_lock_descs(dev);
+ guard(msi_descs_lock)(dev);
nvec = ti_sci_inta_msi_alloc_descs(dev, res);
- if (nvec <= 0) {
- ret = nvec;
- goto unlock;
- }
+ if (nvec <= 0)
+ return nvec;
/* Use alloc ALL as it's unclear whether there are gaps in the indices */
ret = msi_domain_alloc_irqs_all_locked(dev, MSI_DEFAULT_DOMAIN, nvec);
if (ret)
dev_err(dev, "Failed to allocate IRQs %d\n", ret);
-unlock:
- msi_unlock_descs(dev);
return ret;
}
EXPORT_SYMBOL_GPL(ti_sci_inta_msi_domain_alloc_irqs);
diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index 6f8a20014e76..39aecd34c641 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -122,6 +122,10 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent,
set_bit(SDW_GROUP13_DEV_NUM, bus->assigned);
set_bit(SDW_MASTER_DEV_NUM, bus->assigned);
+ ret = sdw_irq_create(bus, fwnode);
+ if (ret)
+ return ret;
+
/*
* SDW is an enumerable bus, but devices can be powered off. So,
* they won't be able to report as present.
@@ -138,6 +142,7 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent,
if (ret < 0) {
dev_err(bus->dev, "Finding slaves failed:%d\n", ret);
+ sdw_irq_delete(bus);
return ret;
}
@@ -156,10 +161,6 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent,
bus->params.curr_bank = SDW_BANK0;
bus->params.next_bank = SDW_BANK1;
- ret = sdw_irq_create(bus, fwnode);
- if (ret)
- return ret;
-
return 0;
}
EXPORT_SYMBOL(sdw_bus_master_add);
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index ed38f6d41f47..c51da3fc3604 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -1266,7 +1266,9 @@ config SPI_ZYNQMP_GQSPI
config SPI_AMD
tristate "AMD SPI controller"
- depends on SPI_MASTER || COMPILE_TEST
+ depends on PCI
+ depends on SPI_MASTER || X86 || COMPILE_TEST
+ depends on SPI_MEM
help
Enables SPI controller driver for AMD SoC.
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index c3a1a47b3bf4..4ea89f6fc531 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -162,7 +162,7 @@ obj-$(CONFIG_SPI_XLP) += spi-xlp.o
obj-$(CONFIG_SPI_XTENSA_XTFPGA) += spi-xtensa-xtfpga.o
obj-$(CONFIG_SPI_ZYNQ_QSPI) += spi-zynq-qspi.o
obj-$(CONFIG_SPI_ZYNQMP_GQSPI) += spi-zynqmp-gqspi.o
-obj-$(CONFIG_SPI_AMD) += spi-amd.o
+obj-$(CONFIG_SPI_AMD) += spi-amd.o spi-amd-pci.o
# SPI slave protocol handlers
obj-$(CONFIG_SPI_SLAVE_TIME) += spi-slave-time.o
diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c
index 244ac0106862..2f6797324227 100644
--- a/drivers/spi/atmel-quadspi.c
+++ b/drivers/spi/atmel-quadspi.c
@@ -1287,9 +1287,9 @@ static int atmel_qspi_dma_init(struct spi_controller *ctrl)
aq->rx_chan = dma_request_chan(&aq->pdev->dev, "rx");
if (IS_ERR(aq->rx_chan)) {
- aq->rx_chan = NULL;
- return dev_err_probe(&aq->pdev->dev, PTR_ERR(aq->rx_chan),
- "RX DMA channel is not available\n");
+ ret = dev_err_probe(&aq->pdev->dev, PTR_ERR(aq->rx_chan),
+ "RX DMA channel is not available\n");
+ goto null_rx_chan;
}
aq->tx_chan = dma_request_chan(&aq->pdev->dev, "tx");
@@ -1310,8 +1310,9 @@ static int atmel_qspi_dma_init(struct spi_controller *ctrl)
release_rx_chan:
dma_release_channel(aq->rx_chan);
- aq->rx_chan = NULL;
aq->tx_chan = NULL;
+null_rx_chan:
+ aq->rx_chan = NULL;
return ret;
}
@@ -1436,22 +1437,17 @@ static int atmel_qspi_probe(struct platform_device *pdev)
pm_runtime_set_autosuspend_delay(&pdev->dev, 500);
pm_runtime_use_autosuspend(&pdev->dev);
- pm_runtime_set_active(&pdev->dev);
- pm_runtime_enable(&pdev->dev);
- pm_runtime_get_noresume(&pdev->dev);
+ devm_pm_runtime_set_active_enabled(&pdev->dev);
+ devm_pm_runtime_get_noresume(&pdev->dev);
err = atmel_qspi_init(aq);
if (err)
goto dma_release;
err = spi_register_controller(ctrl);
- if (err) {
- pm_runtime_put_noidle(&pdev->dev);
- pm_runtime_disable(&pdev->dev);
- pm_runtime_set_suspended(&pdev->dev);
- pm_runtime_dont_use_autosuspend(&pdev->dev);
+ if (err)
goto dma_release;
- }
+
pm_runtime_mark_last_busy(&pdev->dev);
pm_runtime_put_autosuspend(&pdev->dev);
@@ -1530,10 +1526,6 @@ static void atmel_qspi_remove(struct platform_device *pdev)
*/
dev_warn(&pdev->dev, "Failed to resume device on remove\n");
}
-
- pm_runtime_disable(&pdev->dev);
- pm_runtime_dont_use_autosuspend(&pdev->dev);
- pm_runtime_put_noidle(&pdev->dev);
}
static int __maybe_unused atmel_qspi_suspend(struct device *dev)
diff --git a/drivers/spi/spi-amd-pci.c b/drivers/spi/spi-amd-pci.c
new file mode 100644
index 000000000000..e5faab414c17
--- /dev/null
+++ b/drivers/spi/spi-amd-pci.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD SPI controller driver
+ *
+ * Copyright (c) 2025, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Authors: Krishnamoorthi M <krishnamoorthi.m@amd.com>
+ * Akshata MukundShetty <akshata.mukundshetty@amd.com>
+ */
+
+#include <linux/init.h>
+#include <linux/spi/spi.h>
+#include <linux/pci.h>
+
+#include "spi-amd.h"
+
+#define AMD_PCI_DEVICE_ID_LPC_BRIDGE 0x1682
+#define AMD_PCI_LPC_SPI_BASE_ADDR_REG 0xA0
+#define AMD_SPI_BASE_ADDR_MASK ~0xFF
+#define AMD_HID2_PCI_BAR_OFFSET 0x00002000
+#define AMD_HID2_MEM_SIZE 0x200
+
+static struct pci_device_id pci_spi_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_PCI_DEVICE_ID_LPC_BRIDGE) },
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, pci_spi_ids);
+
+static int amd_spi_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct device *dev = &pdev->dev;
+ struct spi_controller *host;
+ struct amd_spi *amd_spi;
+ u32 io_base_addr;
+
+ /* Allocate storage for host and driver private data */
+ host = devm_spi_alloc_host(dev, sizeof(struct amd_spi));
+ if (!host)
+ return dev_err_probe(dev, -ENOMEM, "Error allocating SPI host\n");
+
+ amd_spi = spi_controller_get_devdata(host);
+
+ pci_read_config_dword(pdev, AMD_PCI_LPC_SPI_BASE_ADDR_REG, &io_base_addr);
+ io_base_addr = (io_base_addr & AMD_SPI_BASE_ADDR_MASK) + AMD_HID2_PCI_BAR_OFFSET;
+ amd_spi->io_remap_addr = devm_ioremap(dev, io_base_addr, AMD_HID2_MEM_SIZE);
+
+ if (!amd_spi->io_remap_addr)
+ return dev_err_probe(dev, -ENOMEM,
+ "ioremap of SPI registers failed\n");
+
+ dev_dbg(dev, "io_remap_address: %p\n", amd_spi->io_remap_addr);
+
+ amd_spi->version = AMD_HID2_SPI;
+ host->bus_num = 2;
+
+ return amd_spi_probe_common(dev, host);
+}
+
+static struct pci_driver amd_spi_pci_driver = {
+ .name = "amd_spi_pci",
+ .id_table = pci_spi_ids,
+ .probe = amd_spi_pci_probe,
+};
+
+module_pci_driver(amd_spi_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AMD HID2 SPI Controller Driver");
diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
index 17fc0b17e756..02e7fe095a0b 100644
--- a/drivers/spi/spi-amd.c
+++ b/drivers/spi/spi-amd.c
@@ -17,6 +17,8 @@
#include <linux/spi/spi.h>
#include <linux/spi/spi-mem.h>
+#include "spi-amd.h"
+
#define AMD_SPI_CTRL0_REG 0x00
#define AMD_SPI_EXEC_CMD BIT(16)
#define AMD_SPI_FIFO_CLEAR BIT(20)
@@ -52,10 +54,13 @@
#define AMD_SPI_SPD7_MASK GENMASK(13, AMD_SPI_SPD7_SHIFT)
#define AMD_SPI_HID2_INPUT_RING_BUF0 0X100
+#define AMD_SPI_HID2_OUTPUT_BUF0 0x140
#define AMD_SPI_HID2_CNTRL 0x150
#define AMD_SPI_HID2_INT_STATUS 0x154
#define AMD_SPI_HID2_CMD_START 0x156
#define AMD_SPI_HID2_INT_MASK 0x158
+#define AMD_SPI_HID2_WRITE_CNTRL0 0x160
+#define AMD_SPI_HID2_WRITE_CNTRL1 0x164
#define AMD_SPI_HID2_READ_CNTRL0 0x170
#define AMD_SPI_HID2_READ_CNTRL1 0x174
#define AMD_SPI_HID2_READ_CNTRL2 0x180
@@ -81,17 +86,9 @@
#define AMD_SPI_OP_READ_1_1_4_4B 0x6c /* Read data bytes (Quad Output SPI) */
#define AMD_SPI_OP_READ_1_4_4_4B 0xec /* Read data bytes (Quad I/O SPI) */
-/**
- * enum amd_spi_versions - SPI controller versions
- * @AMD_SPI_V1: AMDI0061 hardware version
- * @AMD_SPI_V2: AMDI0062 hardware version
- * @AMD_HID2_SPI: AMDI0063 hardware version
- */
-enum amd_spi_versions {
- AMD_SPI_V1 = 1,
- AMD_SPI_V2,
- AMD_HID2_SPI,
-};
+/* SPINAND write command opcodes */
+#define AMD_SPI_OP_PP 0x02 /* Page program */
+#define AMD_SPI_OP_PP_RANDOM 0x84 /* Page program */
enum amd_spi_speed {
F_66_66MHz,
@@ -118,22 +115,6 @@ struct amd_spi_freq {
u32 spd7_val;
};
-/**
- * struct amd_spi - SPI driver instance
- * @io_remap_addr: Start address of the SPI controller registers
- * @phy_dma_buf: Physical address of DMA buffer
- * @dma_virt_addr: Virtual address of DMA buffer
- * @version: SPI controller hardware version
- * @speed_hz: Device frequency
- */
-struct amd_spi {
- void __iomem *io_remap_addr;
- dma_addr_t phy_dma_buf;
- void *dma_virt_addr;
- enum amd_spi_versions version;
- unsigned int speed_hz;
-};
-
static inline u8 amd_spi_readreg8(struct amd_spi *amd_spi, int idx)
{
return readb((u8 __iomem *)amd_spi->io_remap_addr + idx);
@@ -445,6 +426,17 @@ static inline bool amd_is_spi_read_cmd(const u16 op)
}
}
+static inline bool amd_is_spi_write_cmd(const u16 op)
+{
+ switch (op) {
+ case AMD_SPI_OP_PP:
+ case AMD_SPI_OP_PP_RANDOM:
+ return true;
+ default:
+ return false;
+ }
+}
+
static bool amd_spi_supports_op(struct spi_mem *mem,
const struct spi_mem_op *op)
{
@@ -455,7 +447,7 @@ static bool amd_spi_supports_op(struct spi_mem *mem,
return false;
/* AMD SPI controllers support quad mode only for read operations */
- if (amd_is_spi_read_cmd(op->cmd.opcode)) {
+ if (amd_is_spi_read_cmd(op->cmd.opcode) || amd_is_spi_write_cmd(op->cmd.opcode)) {
if (op->data.buswidth > 4)
return false;
@@ -464,7 +456,8 @@ static bool amd_spi_supports_op(struct spi_mem *mem,
* doesn't support 4-byte address commands.
*/
if (amd_spi->version == AMD_HID2_SPI) {
- if (amd_is_spi_read_cmd_4b(op->cmd.opcode) ||
+ if ((amd_is_spi_read_cmd_4b(op->cmd.opcode) ||
+ amd_is_spi_write_cmd(op->cmd.opcode)) &&
op->data.nbytes > AMD_SPI_HID2_DMA_SIZE)
return false;
} else if (op->data.nbytes > AMD_SPI_MAX_DATA) {
@@ -490,7 +483,8 @@ static int amd_spi_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
* controller index mode supports maximum of 64 bytes in a single
* transaction.
*/
- if (amd_spi->version == AMD_HID2_SPI && amd_is_spi_read_cmd(op->cmd.opcode))
+ if (amd_spi->version == AMD_HID2_SPI && (amd_is_spi_read_cmd(op->cmd.opcode) ||
+ amd_is_spi_write_cmd(op->cmd.opcode)))
op->data.nbytes = clamp_val(op->data.nbytes, 0, AMD_SPI_HID2_DMA_SIZE);
else
op->data.nbytes = clamp_val(op->data.nbytes, 0, AMD_SPI_MAX_DATA);
@@ -514,32 +508,96 @@ static void amd_spi_set_addr(struct amd_spi *amd_spi,
}
}
+static void amd_spi_hiddma_write(struct amd_spi *amd_spi, const struct spi_mem_op *op)
+{
+ u16 hid_cmd_start, val;
+ u32 hid_regval;
+
+ /*
+ * Program the HID2 output Buffer0. 4k aligned buf_memory_addr[31:12],
+ * buf_size[2:0].
+ */
+ hid_regval = amd_spi->phy_dma_buf | BIT(0);
+ amd_spi_writereg32(amd_spi, AMD_SPI_HID2_OUTPUT_BUF0, hid_regval);
+
+ /* Program max write length in hid2_write_control1 register */
+ hid_regval = amd_spi_readreg32(amd_spi, AMD_SPI_HID2_WRITE_CNTRL1);
+ hid_regval = (hid_regval & ~GENMASK(15, 0)) | ((op->data.nbytes) + 3);
+ amd_spi_writereg32(amd_spi, AMD_SPI_HID2_WRITE_CNTRL1, hid_regval);
+
+ /* Set cmd start bit in hid2_cmd_start register to trigger HID basic write operation */
+ hid_cmd_start = amd_spi_readreg16(amd_spi, AMD_SPI_HID2_CMD_START);
+ amd_spi_writereg16(amd_spi, AMD_SPI_HID2_CMD_START, (hid_cmd_start | BIT(2)));
+
+ /* Check interrupt status of HIDDMA basic write operation in hid2_int_status register */
+ readw_poll_timeout(amd_spi->io_remap_addr + AMD_SPI_HID2_INT_STATUS, val,
+ (val & BIT(2)), AMD_SPI_IO_SLEEP_US, AMD_SPI_IO_TIMEOUT_US);
+
+ /* Clear the interrupts by writing to hid2_int_status register */
+ val = amd_spi_readreg16(amd_spi, AMD_SPI_HID2_INT_STATUS);
+ amd_spi_writereg16(amd_spi, AMD_SPI_HID2_INT_STATUS, val);
+}
+
static void amd_spi_mem_data_out(struct amd_spi *amd_spi,
const struct spi_mem_op *op)
{
int base_addr = AMD_SPI_FIFO_BASE + op->addr.nbytes;
u64 *buf_64 = (u64 *)op->data.buf.out;
+ u64 addr_val = op->addr.val;
u32 nbytes = op->data.nbytes;
u32 left_data = nbytes;
u8 *buf;
int i;
- amd_spi_set_opcode(amd_spi, op->cmd.opcode);
- amd_spi_set_addr(amd_spi, op);
+ /*
+ * Condition for using HID write mode. Only for writing complete page data, use HID write.
+ * Use index mode otherwise.
+ */
+ if (amd_spi->version == AMD_HID2_SPI && amd_is_spi_write_cmd(op->cmd.opcode)) {
+ u64 *dma_buf64 = (u64 *)(amd_spi->dma_virt_addr + op->addr.nbytes + op->cmd.nbytes);
+ u8 *dma_buf = (u8 *)amd_spi->dma_virt_addr;
- for (i = 0; left_data >= 8; i++, left_data -= 8)
- amd_spi_writereg64(amd_spi, base_addr + op->dummy.nbytes + (i * 8), *buf_64++);
+ /* Copy opcode and address to DMA buffer */
+ *dma_buf = op->cmd.opcode;
- buf = (u8 *)buf_64;
- for (i = 0; i < left_data; i++) {
- amd_spi_writereg8(amd_spi, base_addr + op->dummy.nbytes + nbytes + i - left_data,
- buf[i]);
- }
+ dma_buf = (u8 *)dma_buf64;
+ for (i = 0; i < op->addr.nbytes; i++) {
+ *--dma_buf = addr_val & GENMASK(7, 0);
+ addr_val >>= 8;
+ }
- amd_spi_set_tx_count(amd_spi, op->addr.nbytes + op->data.nbytes);
- amd_spi_set_rx_count(amd_spi, 0);
- amd_spi_clear_fifo_ptr(amd_spi);
- amd_spi_execute_opcode(amd_spi);
+ /* Copy data to DMA buffer */
+ while (left_data >= 8) {
+ *dma_buf64++ = *buf_64++;
+ left_data -= 8;
+ }
+
+ buf = (u8 *)buf_64;
+ dma_buf = (u8 *)dma_buf64;
+ while (left_data--)
+ *dma_buf++ = *buf++;
+
+ amd_spi_hiddma_write(amd_spi, op);
+ } else {
+ amd_spi_set_opcode(amd_spi, op->cmd.opcode);
+ amd_spi_set_addr(amd_spi, op);
+
+ for (i = 0; left_data >= 8; i++, left_data -= 8)
+ amd_spi_writereg64(amd_spi, base_addr + op->dummy.nbytes + (i * 8),
+ *buf_64++);
+
+ buf = (u8 *)buf_64;
+ for (i = 0; i < left_data; i++) {
+ amd_spi_writereg8(amd_spi,
+ base_addr + op->dummy.nbytes + nbytes + i - left_data,
+ buf[i]);
+ }
+
+ amd_spi_set_tx_count(amd_spi, op->addr.nbytes + op->data.nbytes);
+ amd_spi_set_rx_count(amd_spi, 0);
+ amd_spi_clear_fifo_ptr(amd_spi);
+ amd_spi_execute_opcode(amd_spi);
+ }
}
static void amd_spi_hiddma_read(struct amd_spi *amd_spi, const struct spi_mem_op *op)
@@ -616,15 +674,21 @@ static void amd_spi_mem_data_in(struct amd_spi *amd_spi,
* Use index mode otherwise.
*/
if (amd_spi->version == AMD_HID2_SPI && amd_is_spi_read_cmd(op->cmd.opcode)) {
+ u64 *dma_buf64 = (u64 *)amd_spi->dma_virt_addr;
+ u8 *dma_buf;
+
amd_spi_hiddma_read(amd_spi, op);
- for (i = 0; left_data >= 8; i++, left_data -= 8)
- *buf_64++ = readq((u8 __iomem *)amd_spi->dma_virt_addr + (i * 8));
+ /* Copy data from DMA buffer */
+ while (left_data >= 8) {
+ *buf_64++ = *dma_buf64++;
+ left_data -= 8;
+ }
buf = (u8 *)buf_64;
- for (i = 0; i < left_data; i++)
- buf[i] = readb((u8 __iomem *)amd_spi->dma_virt_addr +
- (nbytes - left_data + i));
+ dma_buf = (u8 *)dma_buf64;
+ while (left_data--)
+ *buf++ = *dma_buf++;
/* Reset HID RX memory logic */
data = amd_spi_readreg32(amd_spi, AMD_SPI_HID2_CNTRL);
@@ -728,51 +792,38 @@ static int amd_spi_setup_hiddma(struct amd_spi *amd_spi, struct device *dev)
{
u32 hid_regval;
- /* Allocate DMA buffer to use for HID basic read operation */
- amd_spi->dma_virt_addr = dma_alloc_coherent(dev, AMD_SPI_HID2_DMA_SIZE,
- &amd_spi->phy_dma_buf, GFP_KERNEL);
+ /* Allocate DMA buffer to use for HID basic read and write operations. For write
+ * operations, the DMA buffer should include the opcode, address bytes and dummy
+ * bytes(if any) in addition to the data bytes. Additionally, the hardware requires
+ * that the buffer address be 4K aligned. So, allocate DMA buffer of size
+ * 2 * AMD_SPI_HID2_DMA_SIZE.
+ */
+ amd_spi->dma_virt_addr = dmam_alloc_coherent(dev, AMD_SPI_HID2_DMA_SIZE * 2,
+ &amd_spi->phy_dma_buf, GFP_KERNEL);
if (!amd_spi->dma_virt_addr)
return -ENOMEM;
/*
* Enable interrupts and set mask bits in hid2_int_mask register to generate interrupt
- * properly for HIDDMA basic read operations.
+ * properly for HIDDMA basic read and write operations.
*/
hid_regval = amd_spi_readreg32(amd_spi, AMD_SPI_HID2_INT_MASK);
- hid_regval = (hid_regval & GENMASK(31, 8)) | BIT(19);
+ hid_regval = (hid_regval & GENMASK(31, 8)) | BIT(18) | BIT(19);
amd_spi_writereg32(amd_spi, AMD_SPI_HID2_INT_MASK, hid_regval);
- /* Configure buffer unit(4k) in hid2_control register */
+ /* Configure buffer unit(4k) and write threshold in hid2_control register */
hid_regval = amd_spi_readreg32(amd_spi, AMD_SPI_HID2_CNTRL);
- amd_spi_writereg32(amd_spi, AMD_SPI_HID2_CNTRL, hid_regval & ~BIT(3));
+ amd_spi_writereg32(amd_spi, AMD_SPI_HID2_CNTRL, (hid_regval | GENMASK(13, 12)) & ~BIT(3));
return 0;
}
-static int amd_spi_probe(struct platform_device *pdev)
+int amd_spi_probe_common(struct device *dev, struct spi_controller *host)
{
- struct device *dev = &pdev->dev;
- struct spi_controller *host;
- struct amd_spi *amd_spi;
+ struct amd_spi *amd_spi = spi_controller_get_devdata(host);
int err;
- /* Allocate storage for host and driver private data */
- host = devm_spi_alloc_host(dev, sizeof(struct amd_spi));
- if (!host)
- return dev_err_probe(dev, -ENOMEM, "Error allocating SPI host\n");
-
- amd_spi = spi_controller_get_devdata(host);
- amd_spi->io_remap_addr = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(amd_spi->io_remap_addr))
- return dev_err_probe(dev, PTR_ERR(amd_spi->io_remap_addr),
- "ioremap of SPI registers failed\n");
-
- dev_dbg(dev, "io_remap_address: %p\n", amd_spi->io_remap_addr);
-
- amd_spi->version = (uintptr_t) device_get_match_data(dev);
-
/* Initialize the spi_controller fields */
- host->bus_num = (amd_spi->version == AMD_HID2_SPI) ? 2 : 0;
host->num_chipselect = 4;
host->mode_bits = SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD;
host->flags = SPI_CONTROLLER_HALF_DUPLEX;
@@ -795,6 +846,32 @@ static int amd_spi_probe(struct platform_device *pdev)
return err;
}
+EXPORT_SYMBOL_GPL(amd_spi_probe_common);
+
+static int amd_spi_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct spi_controller *host;
+ struct amd_spi *amd_spi;
+
+ /* Allocate storage for host and driver private data */
+ host = devm_spi_alloc_host(dev, sizeof(struct amd_spi));
+ if (!host)
+ return dev_err_probe(dev, -ENOMEM, "Error allocating SPI host\n");
+
+ amd_spi = spi_controller_get_devdata(host);
+ amd_spi->io_remap_addr = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(amd_spi->io_remap_addr))
+ return dev_err_probe(dev, PTR_ERR(amd_spi->io_remap_addr),
+ "ioremap of SPI registers failed\n");
+
+ dev_dbg(dev, "io_remap_address: %p\n", amd_spi->io_remap_addr);
+
+ amd_spi->version = (uintptr_t)device_get_match_data(dev);
+ host->bus_num = 0;
+
+ return amd_spi_probe_common(dev, host);
+}
#ifdef CONFIG_ACPI
static const struct acpi_device_id spi_acpi_match[] = {
diff --git a/drivers/spi/spi-amd.h b/drivers/spi/spi-amd.h
new file mode 100644
index 000000000000..5f39ce7b5587
--- /dev/null
+++ b/drivers/spi/spi-amd.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * AMD SPI controller driver common stuff
+ *
+ * Copyright (c) 2025, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Krishnamoorthi M <krishnamoorthi.m@amd.com>
+ */
+
+#ifndef SPI_AMD_H
+#define SPI_AMD_H
+
+/**
+ * enum amd_spi_versions - SPI controller versions
+ * @AMD_SPI_V1: AMDI0061 hardware version
+ * @AMD_SPI_V2: AMDI0062 hardware version
+ * @AMD_HID2_SPI: AMDI0063 hardware version
+ */
+enum amd_spi_versions {
+ AMD_SPI_V1 = 1,
+ AMD_SPI_V2,
+ AMD_HID2_SPI,
+};
+
+/**
+ * struct amd_spi - SPI driver instance
+ * @io_remap_addr: Start address of the SPI controller registers
+ * @phy_dma_buf: Physical address of DMA buffer
+ * @dma_virt_addr: Virtual address of DMA buffer
+ * @version: SPI controller hardware version
+ * @speed_hz: Device frequency
+ */
+struct amd_spi {
+ void __iomem *io_remap_addr;
+ dma_addr_t phy_dma_buf;
+ void *dma_virt_addr;
+ enum amd_spi_versions version;
+ unsigned int speed_hz;
+};
+
+int amd_spi_probe_common(struct device *dev, struct spi_controller *host);
+
+#endif /* SPI_AMD_H */
diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index da9840957778..8cc19934b48b 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -14,6 +14,7 @@
#include <linux/fpga/adi-axi-common.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/iopoll.h>
#include <linux/of.h>
#include <linux/module.h>
#include <linux/overflow.h>
@@ -140,6 +141,8 @@ struct spi_engine_offload {
struct spi_engine *spi_engine;
unsigned long flags;
unsigned int offload_num;
+ unsigned int spi_mode_config;
+ u8 bits_per_word;
};
struct spi_engine {
@@ -159,6 +162,7 @@ struct spi_engine {
unsigned int offload_sdo_mem_size;
struct spi_offload *offload;
u32 offload_caps;
+ bool offload_requires_sync;
};
static void spi_engine_program_add_cmd(struct spi_engine_program *p,
@@ -265,6 +269,8 @@ static int spi_engine_precompile_message(struct spi_message *msg)
{
unsigned int clk_div, max_hz = msg->spi->controller->max_speed_hz;
struct spi_transfer *xfer;
+ u8 min_bits_per_word = U8_MAX;
+ u8 max_bits_per_word = 0;
list_for_each_entry(xfer, &msg->transfers, transfer_list) {
/* If we have an offload transfer, we can't rx to buffer */
@@ -273,6 +279,24 @@ static int spi_engine_precompile_message(struct spi_message *msg)
clk_div = DIV_ROUND_UP(max_hz, xfer->speed_hz);
xfer->effective_speed_hz = max_hz / min(clk_div, 256U);
+
+ if (xfer->len) {
+ min_bits_per_word = min(min_bits_per_word, xfer->bits_per_word);
+ max_bits_per_word = max(max_bits_per_word, xfer->bits_per_word);
+ }
+ }
+
+ /*
+ * If all xfers in the message use the same bits_per_word, we can
+ * provide some optimization when using SPI offload.
+ */
+ if (msg->offload) {
+ struct spi_engine_offload *priv = msg->offload->priv;
+
+ if (min_bits_per_word == max_bits_per_word)
+ priv->bits_per_word = min_bits_per_word;
+ else
+ priv->bits_per_word = 0;
}
return 0;
@@ -283,6 +307,7 @@ static void spi_engine_compile_message(struct spi_message *msg, bool dry,
{
struct spi_device *spi = msg->spi;
struct spi_controller *host = spi->controller;
+ struct spi_engine_offload *priv;
struct spi_transfer *xfer;
int clk_div, new_clk_div, inst_ns;
bool keep_cs = false;
@@ -296,9 +321,24 @@ static void spi_engine_compile_message(struct spi_message *msg, bool dry,
clk_div = 1;
- spi_engine_program_add_cmd(p, dry,
- SPI_ENGINE_CMD_WRITE(SPI_ENGINE_CMD_REG_CONFIG,
- spi_engine_get_config(spi)));
+ /*
+ * As an optimization, SPI offload sets once this when the offload is
+ * enabled instead of repeating the instruction in each message.
+ */
+ if (msg->offload) {
+ priv = msg->offload->priv;
+ priv->spi_mode_config = spi_engine_get_config(spi);
+
+ /*
+ * If all xfers use the same bits_per_word, it can be optimized
+ * in the same way.
+ */
+ bits_per_word = priv->bits_per_word;
+ } else {
+ spi_engine_program_add_cmd(p, dry,
+ SPI_ENGINE_CMD_WRITE(SPI_ENGINE_CMD_REG_CONFIG,
+ spi_engine_get_config(spi)));
+ }
xfer = list_first_entry(&msg->transfers, struct spi_transfer, transfer_list);
spi_engine_gen_cs(p, dry, spi, !xfer->cs_off);
@@ -663,6 +703,8 @@ static void spi_engine_offload_unprepare(struct spi_offload *offload)
static int spi_engine_optimize_message(struct spi_message *msg)
{
+ struct spi_controller *host = msg->spi->controller;
+ struct spi_engine *spi_engine = spi_controller_get_devdata(host);
struct spi_engine_program p_dry, *p;
int ret;
@@ -679,8 +721,13 @@ static int spi_engine_optimize_message(struct spi_message *msg)
spi_engine_compile_message(msg, false, p);
- spi_engine_program_add_cmd(p, false, SPI_ENGINE_CMD_SYNC(
- msg->offload ? 0 : AXI_SPI_ENGINE_CUR_MSG_SYNC_ID));
+ /*
+ * Non-offload needs SYNC for completion interrupt. Older versions of
+ * the IP core also need SYNC for offload to work properly.
+ */
+ if (!msg->offload || spi_engine->offload_requires_sync)
+ spi_engine_program_add_cmd(p, false, SPI_ENGINE_CMD_SYNC(
+ msg->offload ? 0 : AXI_SPI_ENGINE_CUR_MSG_SYNC_ID));
msg->opt_state = p;
@@ -739,12 +786,16 @@ static int spi_engine_setup(struct spi_device *device)
{
struct spi_controller *host = device->controller;
struct spi_engine *spi_engine = spi_controller_get_devdata(host);
+ unsigned int reg;
if (device->mode & SPI_CS_HIGH)
spi_engine->cs_inv |= BIT(spi_get_chipselect(device, 0));
else
spi_engine->cs_inv &= ~BIT(spi_get_chipselect(device, 0));
+ writel_relaxed(SPI_ENGINE_CMD_SYNC(0),
+ spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
+
writel_relaxed(SPI_ENGINE_CMD_CS_INV(spi_engine->cs_inv),
spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
@@ -755,7 +806,11 @@ static int spi_engine_setup(struct spi_device *device)
writel_relaxed(SPI_ENGINE_CMD_ASSERT(0, 0xff),
spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
- return 0;
+ writel_relaxed(SPI_ENGINE_CMD_SYNC(1),
+ spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
+
+ return readl_relaxed_poll_timeout(spi_engine->base + SPI_ENGINE_REG_SYNC_ID,
+ reg, reg == 1, 1, 1000);
}
static int spi_engine_transfer_one_message(struct spi_controller *host,
@@ -833,6 +888,27 @@ static int spi_engine_trigger_enable(struct spi_offload *offload)
struct spi_engine_offload *priv = offload->priv;
struct spi_engine *spi_engine = priv->spi_engine;
unsigned int reg;
+ int ret;
+
+ writel_relaxed(SPI_ENGINE_CMD_SYNC(0),
+ spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
+
+ writel_relaxed(SPI_ENGINE_CMD_WRITE(SPI_ENGINE_CMD_REG_CONFIG,
+ priv->spi_mode_config),
+ spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
+
+ if (priv->bits_per_word)
+ writel_relaxed(SPI_ENGINE_CMD_WRITE(SPI_ENGINE_CMD_REG_XFER_BITS,
+ priv->bits_per_word),
+ spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
+
+ writel_relaxed(SPI_ENGINE_CMD_SYNC(1),
+ spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
+
+ ret = readl_relaxed_poll_timeout(spi_engine->base + SPI_ENGINE_REG_SYNC_ID,
+ reg, reg == 1, 1, 1000);
+ if (ret)
+ return ret;
reg = readl_relaxed(spi_engine->base +
SPI_ENGINE_REG_OFFLOAD_CTRL(priv->offload_num));
@@ -987,6 +1063,9 @@ static int spi_engine_probe(struct platform_device *pdev)
spi_engine->offload_sdo_mem_size = SPI_ENGINE_OFFLOAD_SDO_FIFO_SIZE;
}
+ /* IP v1.5 dropped the requirement for SYNC in offload messages. */
+ spi_engine->offload_requires_sync = ADI_AXI_PCORE_VER_MINOR(version) < 5;
+
writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_RESET);
writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING);
writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE);
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index c90462783b3f..fe0f122f07b0 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -1949,7 +1949,7 @@ static int cqspi_probe(struct platform_device *pdev)
host->num_chipselect = cqspi->num_chipselect;
- if (ddata->quirks & CQSPI_SUPPORT_DEVICE_RESET)
+ if (ddata && (ddata->quirks & CQSPI_SUPPORT_DEVICE_RESET))
cqspi_device_reset(cqspi);
if (cqspi->use_direct_mode) {
diff --git a/drivers/spi/spi-cavium-thunderx.c b/drivers/spi/spi-cavium-thunderx.c
index 337aef12abcc..367ae7120bb3 100644
--- a/drivers/spi/spi-cavium-thunderx.c
+++ b/drivers/spi/spi-cavium-thunderx.c
@@ -34,7 +34,7 @@ static int thunderx_spi_probe(struct pci_dev *pdev,
if (ret)
goto error;
- ret = pci_request_regions(pdev, DRV_NAME);
+ ret = pcim_request_all_regions(pdev, DRV_NAME);
if (ret)
goto error;
@@ -78,7 +78,6 @@ static int thunderx_spi_probe(struct pci_dev *pdev,
return 0;
error:
- pci_release_regions(pdev);
spi_controller_put(host);
return ret;
}
@@ -92,7 +91,6 @@ static void thunderx_spi_remove(struct pci_dev *pdev)
if (!p)
return;
- pci_release_regions(pdev);
/* Put everything in a known state. */
writeq(0, p->register_base + OCTEON_SPI_CFG(p));
}
diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c
index ceefc253c549..b28a840b3b04 100644
--- a/drivers/spi/spi-cs42l43.c
+++ b/drivers/spi/spi-cs42l43.c
@@ -237,7 +237,9 @@ static int cs42l43_get_speaker_id_gpios(struct cs42l43_spi *priv, int *result)
int i, ret;
descs = gpiod_get_array_optional(priv->dev, "spk-id", GPIOD_IN);
- if (IS_ERR_OR_NULL(descs))
+ if (!descs)
+ return 0;
+ else if (IS_ERR(descs))
return PTR_ERR(descs);
spkid = 0;
diff --git a/drivers/spi/spi-dw-core.c b/drivers/spi/spi-dw-core.c
index 941ecc6f59f8..b3b883cb9541 100644
--- a/drivers/spi/spi-dw-core.c
+++ b/drivers/spi/spi-dw-core.c
@@ -423,7 +423,7 @@ static int dw_spi_transfer_one(struct spi_controller *host,
int ret;
dws->dma_mapped = 0;
- dws->n_bytes = roundup_pow_of_two(BITS_TO_BYTES(transfer->bits_per_word));
+ dws->n_bytes = spi_bpw_to_bytes(transfer->bits_per_word);
dws->tx = (void *)transfer->tx_buf;
dws->tx_len = transfer->len / dws->n_bytes;
dws->rx = transfer->rx_buf;
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 067c954cb6ea..863781ba6c16 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+
//
// Copyright 2013 Freescale Semiconductor, Inc.
-// Copyright 2020 NXP
+// Copyright 2020-2025 NXP
//
// Freescale DSPI driver
// This file contains a driver for the Freescale DSPI
@@ -62,6 +62,7 @@
#define SPI_SR_TFIWF BIT(18)
#define SPI_SR_RFDF BIT(17)
#define SPI_SR_CMDFFF BIT(16)
+#define SPI_SR_TXRXS BIT(30)
#define SPI_SR_CLEAR (SPI_SR_TCFQF | \
SPI_SR_TFUF | SPI_SR_TFFF | \
SPI_SR_CMDTCF | SPI_SR_SPEF | \
@@ -921,9 +922,20 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
struct spi_transfer *transfer;
bool cs = false;
int status = 0;
+ u32 val = 0;
+ bool cs_change = false;
message->actual_length = 0;
+ /* Put DSPI in running mode if halted. */
+ regmap_read(dspi->regmap, SPI_MCR, &val);
+ if (val & SPI_MCR_HALT) {
+ regmap_update_bits(dspi->regmap, SPI_MCR, SPI_MCR_HALT, 0);
+ while (regmap_read(dspi->regmap, SPI_SR, &val) >= 0 &&
+ !(val & SPI_SR_TXRXS))
+ ;
+ }
+
list_for_each_entry(transfer, &message->transfers, transfer_list) {
dspi->cur_transfer = transfer;
dspi->cur_msg = message;
@@ -953,6 +965,7 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
dspi->tx_cmd |= SPI_PUSHR_CMD_CONT;
}
+ cs_change = transfer->cs_change;
dspi->tx = transfer->tx_buf;
dspi->rx = transfer->rx_buf;
dspi->len = transfer->len;
@@ -962,6 +975,8 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF,
SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF);
+ regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR);
+
spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer,
dspi->progress, !dspi->irq);
@@ -988,6 +1003,15 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
dspi_deassert_cs(spi, &cs);
}
+ if (status || !cs_change) {
+ /* Put DSPI in stop mode */
+ regmap_update_bits(dspi->regmap, SPI_MCR,
+ SPI_MCR_HALT, SPI_MCR_HALT);
+ while (regmap_read(dspi->regmap, SPI_SR, &val) >= 0 &&
+ val & SPI_SR_TXRXS)
+ ;
+ }
+
message->status = status;
spi_finalize_current_message(ctlr);
@@ -1167,6 +1191,20 @@ static int dspi_resume(struct device *dev)
static SIMPLE_DEV_PM_OPS(dspi_pm, dspi_suspend, dspi_resume);
+static const struct regmap_range dspi_yes_ranges[] = {
+ regmap_reg_range(SPI_MCR, SPI_MCR),
+ regmap_reg_range(SPI_TCR, SPI_CTAR(3)),
+ regmap_reg_range(SPI_SR, SPI_TXFR3),
+ regmap_reg_range(SPI_RXFR0, SPI_RXFR3),
+ regmap_reg_range(SPI_CTARE(0), SPI_CTARE(3)),
+ regmap_reg_range(SPI_SREX, SPI_SREX),
+};
+
+static const struct regmap_access_table dspi_access_table = {
+ .yes_ranges = dspi_yes_ranges,
+ .n_yes_ranges = ARRAY_SIZE(dspi_yes_ranges),
+};
+
static const struct regmap_range dspi_volatile_ranges[] = {
regmap_reg_range(SPI_MCR, SPI_TCR),
regmap_reg_range(SPI_SR, SPI_SR),
@@ -1184,6 +1222,8 @@ static const struct regmap_config dspi_regmap_config = {
.reg_stride = 4,
.max_register = 0x88,
.volatile_table = &dspi_volatile_table,
+ .rd_table = &dspi_access_table,
+ .wr_table = &dspi_access_table,
};
static const struct regmap_range dspi_xspi_volatile_ranges[] = {
@@ -1205,6 +1245,8 @@ static const struct regmap_config dspi_xspi_regmap_config[] = {
.reg_stride = 4,
.max_register = 0x13c,
.volatile_table = &dspi_xspi_volatile_table,
+ .rd_table = &dspi_access_table,
+ .wr_table = &dspi_access_table,
},
{
.name = "pushr",
@@ -1227,6 +1269,8 @@ static int dspi_init(struct fsl_dspi *dspi)
if (!spi_controller_is_target(dspi->ctlr))
mcr |= SPI_MCR_HOST;
+ mcr |= SPI_MCR_HALT;
+
regmap_write(dspi->regmap, SPI_MCR, mcr);
regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR);
diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c
index b5ecffcaf795..c887abb028d7 100644
--- a/drivers/spi/spi-fsl-qspi.c
+++ b/drivers/spi/spi-fsl-qspi.c
@@ -264,14 +264,14 @@ static const struct fsl_qspi_devtype_data ls2080a_data = {
struct fsl_qspi {
void __iomem *iobase;
void __iomem *ahb_addr;
- u32 memmap_phy;
- struct clk *clk, *clk_en;
- struct device *dev;
- struct completion c;
const struct fsl_qspi_devtype_data *devtype_data;
struct mutex lock;
+ struct completion c;
+ struct clk *clk, *clk_en;
struct pm_qos_request pm_qos_req;
+ struct device *dev;
int selected;
+ u32 memmap_phy;
};
static inline int needs_swap_endian(struct fsl_qspi *q)
@@ -844,13 +844,18 @@ static const struct spi_controller_mem_caps fsl_qspi_mem_caps = {
.per_op_freq = true,
};
-static void fsl_qspi_cleanup(void *data)
+static void fsl_qspi_disable(void *data)
{
struct fsl_qspi *q = data;
/* disable the hardware */
qspi_writel(q, QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR);
qspi_writel(q, 0x0, q->iobase + QUADSPI_RSER);
+}
+
+static void fsl_qspi_cleanup(void *data)
+{
+ struct fsl_qspi *q = data;
fsl_qspi_clk_disable_unprep(q);
@@ -866,7 +871,7 @@ static int fsl_qspi_probe(struct platform_device *pdev)
struct fsl_qspi *q;
int ret;
- ctlr = spi_alloc_host(&pdev->dev, sizeof(*q));
+ ctlr = devm_spi_alloc_host(&pdev->dev, sizeof(*q));
if (!ctlr)
return -ENOMEM;
@@ -876,68 +881,60 @@ static int fsl_qspi_probe(struct platform_device *pdev)
q = spi_controller_get_devdata(ctlr);
q->dev = dev;
q->devtype_data = of_device_get_match_data(dev);
- if (!q->devtype_data) {
- ret = -ENODEV;
- goto err_put_ctrl;
- }
+ if (!q->devtype_data)
+ return -ENODEV;
platform_set_drvdata(pdev, q);
/* find the resources */
q->iobase = devm_platform_ioremap_resource_byname(pdev, "QuadSPI");
- if (IS_ERR(q->iobase)) {
- ret = PTR_ERR(q->iobase);
- goto err_put_ctrl;
- }
+ if (IS_ERR(q->iobase))
+ return PTR_ERR(q->iobase);
res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
"QuadSPI-memory");
- if (!res) {
- ret = -EINVAL;
- goto err_put_ctrl;
- }
+ if (!res)
+ return -EINVAL;
q->memmap_phy = res->start;
/* Since there are 4 cs, map size required is 4 times ahb_buf_size */
q->ahb_addr = devm_ioremap(dev, q->memmap_phy,
(q->devtype_data->ahb_buf_size * 4));
- if (!q->ahb_addr) {
- ret = -ENOMEM;
- goto err_put_ctrl;
- }
+ if (!q->ahb_addr)
+ return -ENOMEM;
/* find the clocks */
q->clk_en = devm_clk_get(dev, "qspi_en");
- if (IS_ERR(q->clk_en)) {
- ret = PTR_ERR(q->clk_en);
- goto err_put_ctrl;
- }
+ if (IS_ERR(q->clk_en))
+ return PTR_ERR(q->clk_en);
q->clk = devm_clk_get(dev, "qspi");
- if (IS_ERR(q->clk)) {
- ret = PTR_ERR(q->clk);
- goto err_put_ctrl;
- }
+ if (IS_ERR(q->clk))
+ return PTR_ERR(q->clk);
+
+ mutex_init(&q->lock);
ret = fsl_qspi_clk_prep_enable(q);
if (ret) {
dev_err(dev, "can not enable the clock\n");
- goto err_put_ctrl;
+ return ret;
}
+ ret = devm_add_action_or_reset(dev, fsl_qspi_cleanup, q);
+ if (ret)
+ return ret;
+
/* find the irq */
ret = platform_get_irq(pdev, 0);
if (ret < 0)
- goto err_disable_clk;
+ return ret;
ret = devm_request_irq(dev, ret,
fsl_qspi_irq_handler, 0, pdev->name, q);
if (ret) {
dev_err(dev, "failed to request irq: %d\n", ret);
- goto err_disable_clk;
+ return ret;
}
- mutex_init(&q->lock);
-
ctlr->bus_num = -1;
ctlr->num_chipselect = 4;
ctlr->mem_ops = &fsl_qspi_mem_ops;
@@ -947,23 +944,15 @@ static int fsl_qspi_probe(struct platform_device *pdev)
ctlr->dev.of_node = np;
- ret = devm_add_action_or_reset(dev, fsl_qspi_cleanup, q);
+ ret = devm_add_action_or_reset(dev, fsl_qspi_disable, q);
if (ret)
- goto err_put_ctrl;
+ return ret;
ret = devm_spi_register_controller(dev, ctlr);
if (ret)
- goto err_put_ctrl;
+ return ret;
return 0;
-
-err_disable_clk:
- fsl_qspi_clk_disable_unprep(q);
-
-err_put_ctrl:
- spi_controller_put(ctlr);
-
- return ret;
}
static int fsl_qspi_suspend(struct device *dev)
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 405deb6677c1..ea5f1b10b79e 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -46,7 +46,7 @@ struct spi_gpio {
static inline struct spi_gpio *__pure
spi_to_spi_gpio(const struct spi_device *spi)
{
- const struct spi_bitbang *bang;
+ struct spi_bitbang *bang;
struct spi_gpio *spi_gpio;
bang = spi_controller_get_devdata(spi->controller);
diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c
index 4d9ffec900bb..4b63cb98df9c 100644
--- a/drivers/spi/spi-intel-pci.c
+++ b/drivers/spi/spi-intel-pci.c
@@ -44,6 +44,7 @@ static int intel_spi_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
struct intel_spi_boardinfo *info;
+ void __iomem *base;
int ret;
ret = pcim_enable_device(pdev);
@@ -56,7 +57,12 @@ static int intel_spi_pci_probe(struct pci_dev *pdev,
return -ENOMEM;
info->data = pdev;
- return intel_spi_probe(&pdev->dev, &pdev->resource[0], info);
+
+ base = pcim_iomap_region(pdev, 0, KBUILD_MODNAME);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ return intel_spi_probe(&pdev->dev, base, info);
}
static const struct pci_device_id intel_spi_pci_ids[] = {
diff --git a/drivers/spi/spi-intel-platform.c b/drivers/spi/spi-intel-platform.c
index 0974cca83a5d..6cbed0b2e063 100644
--- a/drivers/spi/spi-intel-platform.c
+++ b/drivers/spi/spi-intel-platform.c
@@ -14,14 +14,17 @@
static int intel_spi_platform_probe(struct platform_device *pdev)
{
struct intel_spi_boardinfo *info;
- struct resource *mem;
+ void __iomem *base;
info = dev_get_platdata(&pdev->dev);
if (!info)
return -EINVAL;
- mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- return intel_spi_probe(&pdev->dev, mem, info);
+ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ return intel_spi_probe(&pdev->dev, base, info);
}
static struct platform_driver intel_spi_platform_driver = {
diff --git a/drivers/spi/spi-intel.c b/drivers/spi/spi-intel.c
index b0dcdb6fb8fa..5d5a546c62ea 100644
--- a/drivers/spi/spi-intel.c
+++ b/drivers/spi/spi-intel.c
@@ -1467,13 +1467,13 @@ EXPORT_SYMBOL_GPL(intel_spi_groups);
/**
* intel_spi_probe() - Probe the Intel SPI flash controller
* @dev: Pointer to the parent device
- * @mem: MMIO resource
+ * @base: iomapped MMIO resource
* @info: Platform specific information
*
* Probes Intel SPI flash controller and creates the flash chip device.
* Returns %0 on success and negative errno in case of failure.
*/
-int intel_spi_probe(struct device *dev, struct resource *mem,
+int intel_spi_probe(struct device *dev, void __iomem *base,
const struct intel_spi_boardinfo *info)
{
struct spi_controller *host;
@@ -1488,10 +1488,7 @@ int intel_spi_probe(struct device *dev, struct resource *mem,
ispi = spi_controller_get_devdata(host);
- ispi->base = devm_ioremap_resource(dev, mem);
- if (IS_ERR(ispi->base))
- return PTR_ERR(ispi->base);
-
+ ispi->base = base;
ispi->dev = dev;
ispi->host = host;
ispi->info = info;
diff --git a/drivers/spi/spi-intel.h b/drivers/spi/spi-intel.h
index c5f35060dd63..53b292c6ea0c 100644
--- a/drivers/spi/spi-intel.h
+++ b/drivers/spi/spi-intel.h
@@ -11,11 +11,9 @@
#include <linux/platform_data/x86/spi-intel.h>
-struct resource;
-
extern const struct attribute_group *intel_spi_groups[];
-int intel_spi_probe(struct device *dev, struct resource *mem,
+int intel_spi_probe(struct device *dev, void __iomem *base,
const struct intel_spi_boardinfo *info);
#endif /* SPI_INTEL_H */
diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c
index 31a878d9458d..7dd92deffe3f 100644
--- a/drivers/spi/spi-loopback-test.c
+++ b/drivers/spi/spi-loopback-test.c
@@ -420,7 +420,7 @@ MODULE_LICENSE("GPL");
static void spi_test_print_hex_dump(char *pre, const void *ptr, size_t len)
{
/* limit the hex_dump */
- if (len < 1024) {
+ if (len <= 1024) {
print_hex_dump(KERN_INFO, pre,
DUMP_PREFIX_OFFSET, 16, 1,
ptr, len, 0);
@@ -494,8 +494,8 @@ struct rx_ranges {
static int rx_ranges_cmp(void *priv, const struct list_head *a,
const struct list_head *b)
{
- struct rx_ranges *rx_a = list_entry(a, struct rx_ranges, list);
- struct rx_ranges *rx_b = list_entry(b, struct rx_ranges, list);
+ const struct rx_ranges *rx_a = list_entry(a, struct rx_ranges, list);
+ const struct rx_ranges *rx_b = list_entry(b, struct rx_ranges, list);
if (rx_a->start > rx_b->start)
return 1;
@@ -635,8 +635,8 @@ static int spi_test_check_loopback_result(struct spi_device *spi,
} else {
/* first byte received */
txb = ((u8 *)xfer->rx_buf)[0];
- /* first byte may be 0 or xff */
- if (!((txb == 0) || (txb == 0xff))) {
+ /* first byte may be 0 or 0xff */
+ if (txb != 0 && txb != 0xff) {
dev_err(&spi->dev,
"loopback strangeness - we expect 0x00 or 0xff, but not 0x%02x\n",
txb);
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index df74ad5060f8..6b9137307533 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -21,18 +21,26 @@
#include <linux/interrupt.h>
#include <linux/reset.h>
#include <linux/pinctrl/consumer.h>
+#include <linux/dma-mapping.h>
/*
- * The Meson SPICC controller could support DMA based transfers, but is not
- * implemented by the vendor code, and while having the registers documentation
- * it has never worked on the GXL Hardware.
- * The PIO mode is the only mode implemented, and due to badly designed HW :
- * - all transfers are cutted in 16 words burst because the FIFO hangs on
- * TX underflow, and there is no TX "Half-Empty" interrupt, so we go by
- * FIFO max size chunk only
- * - CS management is dumb, and goes UP between every burst, so is really a
- * "Data Valid" signal than a Chip Select, GPIO link should be used instead
- * to have a CS go down over the full transfer
+ * There are two modes for data transmission: PIO and DMA.
+ * When bits_per_word is 8, 16, 24, or 32, data is transferred using PIO mode.
+ * When bits_per_word is 64, DMA mode is used by default.
+ *
+ * DMA achieves a transfer with one or more SPI bursts, each SPI burst is made
+ * up of one or more DMA bursts. The DMA burst implementation mechanism is,
+ * For TX, when the number of words in TXFIFO is less than the preset
+ * reading threshold, SPICC starts a reading DMA burst, which reads the preset
+ * number of words from TX buffer, then writes them into TXFIFO.
+ * For RX, when the number of words in RXFIFO is greater than the preset
+ * writing threshold, SPICC starts a writing request burst, which reads the
+ * preset number of words from RXFIFO, then write them into RX buffer.
+ * DMA works if the transfer meets the following conditions,
+ * - 64 bits per word
+ * - The transfer length in word must be multiples of the dma_burst_len, and
+ * the dma_burst_len should be one of 8,7...2, otherwise, it will be split
+ * into several SPI bursts by this driver
*/
#define SPICC_MAX_BURST 128
@@ -128,6 +136,23 @@
#define SPICC_DWADDR 0x24 /* Write Address of DMA */
+#define SPICC_LD_CNTL0 0x28
+#define VSYNC_IRQ_SRC_SELECT BIT(0)
+#define DMA_EN_SET_BY_VSYNC BIT(2)
+#define XCH_EN_SET_BY_VSYNC BIT(3)
+#define DMA_READ_COUNTER_EN BIT(4)
+#define DMA_WRITE_COUNTER_EN BIT(5)
+#define DMA_RADDR_LOAD_BY_VSYNC BIT(6)
+#define DMA_WADDR_LOAD_BY_VSYNC BIT(7)
+#define DMA_ADDR_LOAD_FROM_LD_ADDR BIT(8)
+
+#define SPICC_LD_CNTL1 0x2c
+#define DMA_READ_COUNTER GENMASK(15, 0)
+#define DMA_WRITE_COUNTER GENMASK(31, 16)
+#define DMA_BURST_LEN_DEFAULT 8
+#define DMA_BURST_COUNT_MAX 0xffff
+#define SPI_BURST_LEN_MAX (DMA_BURST_LEN_DEFAULT * DMA_BURST_COUNT_MAX)
+
#define SPICC_ENH_CTL0 0x38 /* Enhanced Feature */
#define SPICC_ENH_CLK_CS_DELAY_MASK GENMASK(15, 0)
#define SPICC_ENH_DATARATE_MASK GENMASK(23, 16)
@@ -171,6 +196,9 @@ struct meson_spicc_device {
struct pinctrl *pinctrl;
struct pinctrl_state *pins_idle_high;
struct pinctrl_state *pins_idle_low;
+ dma_addr_t tx_dma;
+ dma_addr_t rx_dma;
+ bool using_dma;
};
#define pow2_clk_to_spicc(_div) container_of(_div, struct meson_spicc_device, pow2_div)
@@ -202,6 +230,148 @@ static void meson_spicc_oen_enable(struct meson_spicc_device *spicc)
writel_relaxed(conf, spicc->base + SPICC_ENH_CTL0);
}
+static int meson_spicc_dma_map(struct meson_spicc_device *spicc,
+ struct spi_transfer *t)
+{
+ struct device *dev = spicc->host->dev.parent;
+
+ if (!(t->tx_buf && t->rx_buf))
+ return -EINVAL;
+
+ t->tx_dma = dma_map_single(dev, (void *)t->tx_buf, t->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, t->tx_dma))
+ return -ENOMEM;
+
+ t->rx_dma = dma_map_single(dev, t->rx_buf, t->len, DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, t->rx_dma))
+ return -ENOMEM;
+
+ spicc->tx_dma = t->tx_dma;
+ spicc->rx_dma = t->rx_dma;
+
+ return 0;
+}
+
+static void meson_spicc_dma_unmap(struct meson_spicc_device *spicc,
+ struct spi_transfer *t)
+{
+ struct device *dev = spicc->host->dev.parent;
+
+ if (t->tx_dma)
+ dma_unmap_single(dev, t->tx_dma, t->len, DMA_TO_DEVICE);
+ if (t->rx_dma)
+ dma_unmap_single(dev, t->rx_dma, t->len, DMA_FROM_DEVICE);
+}
+
+/*
+ * According to the remain words length, calculate a suitable spi burst length
+ * and a dma burst length for current spi burst
+ */
+static u32 meson_spicc_calc_dma_len(struct meson_spicc_device *spicc,
+ u32 len, u32 *dma_burst_len)
+{
+ u32 i;
+
+ if (len <= spicc->data->fifo_size) {
+ *dma_burst_len = len;
+ return len;
+ }
+
+ *dma_burst_len = DMA_BURST_LEN_DEFAULT;
+
+ if (len == (SPI_BURST_LEN_MAX + 1))
+ return SPI_BURST_LEN_MAX - DMA_BURST_LEN_DEFAULT;
+
+ if (len >= SPI_BURST_LEN_MAX)
+ return SPI_BURST_LEN_MAX;
+
+ for (i = DMA_BURST_LEN_DEFAULT; i > 1; i--)
+ if ((len % i) == 0) {
+ *dma_burst_len = i;
+ return len;
+ }
+
+ i = len % DMA_BURST_LEN_DEFAULT;
+ len -= i;
+
+ if (i == 1)
+ len -= DMA_BURST_LEN_DEFAULT;
+
+ return len;
+}
+
+static void meson_spicc_setup_dma(struct meson_spicc_device *spicc)
+{
+ unsigned int len;
+ unsigned int dma_burst_len, dma_burst_count;
+ unsigned int count_en = 0;
+ unsigned int txfifo_thres = 0;
+ unsigned int read_req = 0;
+ unsigned int rxfifo_thres = 31;
+ unsigned int write_req = 0;
+ unsigned int ld_ctr1 = 0;
+
+ writel_relaxed(spicc->tx_dma, spicc->base + SPICC_DRADDR);
+ writel_relaxed(spicc->rx_dma, spicc->base + SPICC_DWADDR);
+
+ /* Set the max burst length to support a transmission with length of
+ * no more than 1024 bytes(128 words), which must use the CS management
+ * because of some strict timing requirements
+ */
+ writel_bits_relaxed(SPICC_BURSTLENGTH_MASK, SPICC_BURSTLENGTH_MASK,
+ spicc->base + SPICC_CONREG);
+
+ len = meson_spicc_calc_dma_len(spicc, spicc->xfer_remain,
+ &dma_burst_len);
+ spicc->xfer_remain -= len;
+ dma_burst_count = DIV_ROUND_UP(len, dma_burst_len);
+ dma_burst_len--;
+
+ if (spicc->tx_dma) {
+ spicc->tx_dma += len;
+ count_en |= DMA_READ_COUNTER_EN;
+ txfifo_thres = spicc->data->fifo_size - dma_burst_len;
+ read_req = dma_burst_len;
+ ld_ctr1 |= FIELD_PREP(DMA_READ_COUNTER, dma_burst_count);
+ }
+
+ if (spicc->rx_dma) {
+ spicc->rx_dma += len;
+ count_en |= DMA_WRITE_COUNTER_EN;
+ rxfifo_thres = dma_burst_len;
+ write_req = dma_burst_len;
+ ld_ctr1 |= FIELD_PREP(DMA_WRITE_COUNTER, dma_burst_count);
+ }
+
+ writel_relaxed(count_en, spicc->base + SPICC_LD_CNTL0);
+ writel_relaxed(ld_ctr1, spicc->base + SPICC_LD_CNTL1);
+ writel_relaxed(SPICC_DMA_ENABLE
+ | SPICC_DMA_URGENT
+ | FIELD_PREP(SPICC_TXFIFO_THRESHOLD_MASK, txfifo_thres)
+ | FIELD_PREP(SPICC_READ_BURST_MASK, read_req)
+ | FIELD_PREP(SPICC_RXFIFO_THRESHOLD_MASK, rxfifo_thres)
+ | FIELD_PREP(SPICC_WRITE_BURST_MASK, write_req),
+ spicc->base + SPICC_DMAREG);
+}
+
+static irqreturn_t meson_spicc_dma_irq(struct meson_spicc_device *spicc)
+{
+ if (readl_relaxed(spicc->base + SPICC_DMAREG) & SPICC_DMA_ENABLE)
+ return IRQ_HANDLED;
+
+ if (spicc->xfer_remain) {
+ meson_spicc_setup_dma(spicc);
+ } else {
+ writel_bits_relaxed(SPICC_SMC, 0, spicc->base + SPICC_CONREG);
+ writel_relaxed(0, spicc->base + SPICC_INTREG);
+ writel_relaxed(0, spicc->base + SPICC_DMAREG);
+ meson_spicc_dma_unmap(spicc, spicc->xfer);
+ complete(&spicc->done);
+ }
+
+ return IRQ_HANDLED;
+}
+
static inline bool meson_spicc_txfull(struct meson_spicc_device *spicc)
{
return !!FIELD_GET(SPICC_TF,
@@ -293,6 +463,9 @@ static irqreturn_t meson_spicc_irq(int irq, void *data)
writel_bits_relaxed(SPICC_TC, SPICC_TC, spicc->base + SPICC_STATREG);
+ if (spicc->using_dma)
+ return meson_spicc_dma_irq(spicc);
+
/* Empty RX FIFO */
meson_spicc_rx(spicc);
@@ -426,9 +599,6 @@ static int meson_spicc_transfer_one(struct spi_controller *host,
meson_spicc_reset_fifo(spicc);
- /* Setup burst */
- meson_spicc_setup_burst(spicc);
-
/* Setup wait for completion */
reinit_completion(&spicc->done);
@@ -442,11 +612,36 @@ static int meson_spicc_transfer_one(struct spi_controller *host,
/* Increase it twice and add 200 ms tolerance */
timeout += timeout + 200;
- /* Start burst */
- writel_bits_relaxed(SPICC_XCH, SPICC_XCH, spicc->base + SPICC_CONREG);
+ if (xfer->bits_per_word == 64) {
+ int ret;
+
+ /* dma_burst_len 1 can't trigger a dma burst */
+ if (xfer->len < 16)
+ return -EINVAL;
+
+ ret = meson_spicc_dma_map(spicc, xfer);
+ if (ret) {
+ meson_spicc_dma_unmap(spicc, xfer);
+ dev_err(host->dev.parent, "dma map failed\n");
+ return ret;
+ }
+
+ spicc->using_dma = true;
+ spicc->xfer_remain = DIV_ROUND_UP(xfer->len, spicc->bytes_per_word);
+ meson_spicc_setup_dma(spicc);
+ writel_relaxed(SPICC_TE_EN, spicc->base + SPICC_INTREG);
+ writel_bits_relaxed(SPICC_SMC, SPICC_SMC, spicc->base + SPICC_CONREG);
+ } else {
+ spicc->using_dma = false;
+ /* Setup burst */
+ meson_spicc_setup_burst(spicc);
- /* Enable interrupts */
- writel_relaxed(SPICC_TC_EN, spicc->base + SPICC_INTREG);
+ /* Start burst */
+ writel_bits_relaxed(SPICC_XCH, SPICC_XCH, spicc->base + SPICC_CONREG);
+
+ /* Enable interrupts */
+ writel_relaxed(SPICC_TC_EN, spicc->base + SPICC_INTREG);
+ }
if (!wait_for_completion_timeout(&spicc->done, msecs_to_jiffies(timeout)))
return -ETIMEDOUT;
@@ -545,6 +740,14 @@ static int meson_spicc_setup(struct spi_device *spi)
if (!spi->controller_state)
spi->controller_state = spi_controller_get_devdata(spi->controller);
+ /* DMA works at 64 bits, the rest works on PIO */
+ if (spi->bits_per_word != 8 &&
+ spi->bits_per_word != 16 &&
+ spi->bits_per_word != 24 &&
+ spi->bits_per_word != 32 &&
+ spi->bits_per_word != 64)
+ return -EINVAL;
+
return 0;
}
@@ -853,10 +1056,6 @@ static int meson_spicc_probe(struct platform_device *pdev)
host->num_chipselect = 4;
host->dev.of_node = pdev->dev.of_node;
host->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LOOP;
- host->bits_per_word_mask = SPI_BPW_MASK(32) |
- SPI_BPW_MASK(24) |
- SPI_BPW_MASK(16) |
- SPI_BPW_MASK(8);
host->flags = (SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX);
host->min_speed_hz = spicc->data->min_speed_hz;
host->max_speed_hz = spicc->data->max_speed_hz;
diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
index bad6b30bab0e..e63c77e41823 100644
--- a/drivers/spi/spi-nxp-fspi.c
+++ b/drivers/spi/spi-nxp-fspi.c
@@ -48,6 +48,8 @@
#include <linux/mutex.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/pm_runtime.h>
#include <linux/pm_qos.h>
#include <linux/regmap.h>
#include <linux/sizes.h>
@@ -57,6 +59,9 @@
#include <linux/spi/spi.h>
#include <linux/spi/spi-mem.h>
+/* runtime pm timeout */
+#define FSPI_RPM_TIMEOUT 50 /* 50ms */
+
/* Registers used by the driver */
#define FSPI_MCR0 0x00
#define FSPI_MCR0_AHB_TIMEOUT(x) ((x) << 24)
@@ -394,6 +399,8 @@ struct nxp_fspi {
struct mutex lock;
struct pm_qos_request pm_qos_req;
int selected;
+#define FSPI_NEED_INIT (1 << 0)
+ int flags;
};
static inline int needs_ip_only(struct nxp_fspi *f)
@@ -627,15 +634,15 @@ static int nxp_fspi_clk_prep_enable(struct nxp_fspi *f)
return 0;
}
-static int nxp_fspi_clk_disable_unprep(struct nxp_fspi *f)
+static void nxp_fspi_clk_disable_unprep(struct nxp_fspi *f)
{
if (is_acpi_node(dev_fwnode(f->dev)))
- return 0;
+ return;
clk_disable_unprepare(f->clk);
clk_disable_unprepare(f->clk_en);
- return 0;
+ return;
}
static void nxp_fspi_dll_calibration(struct nxp_fspi *f)
@@ -925,7 +932,13 @@ static int nxp_fspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->controller);
int err = 0;
- mutex_lock(&f->lock);
+ guard(mutex)(&f->lock);
+
+ err = pm_runtime_get_sync(f->dev);
+ if (err < 0) {
+ dev_err(f->dev, "Failed to enable clock %d\n", __LINE__);
+ return err;
+ }
/* Wait for controller being ready. */
err = fspi_readl_poll_tout(f, f->iobase + FSPI_STS0,
@@ -955,7 +968,8 @@ static int nxp_fspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
/* Invalidate the data in the AHB buffer. */
nxp_fspi_invalid(f);
- mutex_unlock(&f->lock);
+ pm_runtime_mark_last_busy(f->dev);
+ pm_runtime_put_autosuspend(f->dev);
return err;
}
@@ -1154,6 +1168,24 @@ static const struct spi_controller_mem_caps nxp_fspi_mem_caps = {
.per_op_freq = true,
};
+static void nxp_fspi_cleanup(void *data)
+{
+ struct nxp_fspi *f = data;
+
+ /* enable clock first since there is register access */
+ pm_runtime_get_sync(f->dev);
+
+ /* disable the hardware */
+ fspi_writel(f, FSPI_MCR0_MDIS, f->iobase + FSPI_MCR0);
+
+ pm_runtime_disable(f->dev);
+ pm_runtime_put_noidle(f->dev);
+ nxp_fspi_clk_disable_unprep(f);
+
+ if (f->ahb_addr)
+ iounmap(f->ahb_addr);
+}
+
static int nxp_fspi_probe(struct platform_device *pdev)
{
struct spi_controller *ctlr;
@@ -1161,10 +1193,10 @@ static int nxp_fspi_probe(struct platform_device *pdev)
struct device_node *np = dev->of_node;
struct resource *res;
struct nxp_fspi *f;
- int ret;
+ int ret, irq;
u32 reg;
- ctlr = spi_alloc_host(&pdev->dev, sizeof(*f));
+ ctlr = devm_spi_alloc_host(&pdev->dev, sizeof(*f));
if (!ctlr)
return -ENOMEM;
@@ -1174,10 +1206,8 @@ static int nxp_fspi_probe(struct platform_device *pdev)
f = spi_controller_get_devdata(ctlr);
f->dev = dev;
f->devtype_data = (struct nxp_fspi_devtype_data *)device_get_match_data(dev);
- if (!f->devtype_data) {
- ret = -ENODEV;
- goto err_put_ctrl;
- }
+ if (!f->devtype_data)
+ return -ENODEV;
platform_set_drvdata(pdev, f);
@@ -1186,11 +1216,8 @@ static int nxp_fspi_probe(struct platform_device *pdev)
f->iobase = devm_platform_ioremap_resource(pdev, 0);
else
f->iobase = devm_platform_ioremap_resource_byname(pdev, "fspi_base");
-
- if (IS_ERR(f->iobase)) {
- ret = PTR_ERR(f->iobase);
- goto err_put_ctrl;
- }
+ if (IS_ERR(f->iobase))
+ return PTR_ERR(f->iobase);
/* find the resources - controller memory mapped space */
if (is_acpi_node(dev_fwnode(f->dev)))
@@ -1198,11 +1225,8 @@ static int nxp_fspi_probe(struct platform_device *pdev)
else
res = platform_get_resource_byname(pdev,
IORESOURCE_MEM, "fspi_mmap");
-
- if (!res) {
- ret = -ENODEV;
- goto err_put_ctrl;
- }
+ if (!res)
+ return -ENODEV;
/* assign memory mapped starting address and mapped size. */
f->memmap_phy = res->start;
@@ -1211,100 +1235,109 @@ static int nxp_fspi_probe(struct platform_device *pdev)
/* find the clocks */
if (dev_of_node(&pdev->dev)) {
f->clk_en = devm_clk_get(dev, "fspi_en");
- if (IS_ERR(f->clk_en)) {
- ret = PTR_ERR(f->clk_en);
- goto err_put_ctrl;
- }
+ if (IS_ERR(f->clk_en))
+ return PTR_ERR(f->clk_en);
f->clk = devm_clk_get(dev, "fspi");
- if (IS_ERR(f->clk)) {
- ret = PTR_ERR(f->clk);
- goto err_put_ctrl;
- }
-
- ret = nxp_fspi_clk_prep_enable(f);
- if (ret) {
- dev_err(dev, "can not enable the clock\n");
- goto err_put_ctrl;
- }
+ if (IS_ERR(f->clk))
+ return PTR_ERR(f->clk);
}
+ /* find the irq */
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return dev_err_probe(dev, irq, "Failed to get irq source");
+
+ pm_runtime_enable(dev);
+ pm_runtime_set_autosuspend_delay(dev, FSPI_RPM_TIMEOUT);
+ pm_runtime_use_autosuspend(dev);
+
+ /* enable clock */
+ ret = pm_runtime_get_sync(f->dev);
+ if (ret < 0)
+ return dev_err_probe(dev, ret, "Failed to enable clock");
+
/* Clear potential interrupts */
reg = fspi_readl(f, f->iobase + FSPI_INTR);
if (reg)
fspi_writel(f, reg, f->iobase + FSPI_INTR);
- /* find the irq */
- ret = platform_get_irq(pdev, 0);
+ nxp_fspi_default_setup(f);
+
+ ret = pm_runtime_put_sync(dev);
if (ret < 0)
- goto err_disable_clk;
+ return dev_err_probe(dev, ret, "Failed to disable clock");
- ret = devm_request_irq(dev, ret,
+ ret = devm_request_irq(dev, irq,
nxp_fspi_irq_handler, 0, pdev->name, f);
- if (ret) {
- dev_err(dev, "failed to request irq: %d\n", ret);
- goto err_disable_clk;
- }
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to request irq\n");
- mutex_init(&f->lock);
+ devm_mutex_init(dev, &f->lock);
ctlr->bus_num = -1;
ctlr->num_chipselect = NXP_FSPI_MAX_CHIPSELECT;
ctlr->mem_ops = &nxp_fspi_mem_ops;
ctlr->mem_caps = &nxp_fspi_mem_caps;
-
- nxp_fspi_default_setup(f);
-
ctlr->dev.of_node = np;
- ret = devm_spi_register_controller(&pdev->dev, ctlr);
+ ret = devm_add_action_or_reset(dev, nxp_fspi_cleanup, f);
if (ret)
- goto err_destroy_mutex;
+ return dev_err_probe(dev, ret, "Failed to register nxp_fspi_cleanup\n");
- return 0;
+ return devm_spi_register_controller(&pdev->dev, ctlr);
+}
-err_destroy_mutex:
- mutex_destroy(&f->lock);
+static int nxp_fspi_runtime_suspend(struct device *dev)
+{
+ struct nxp_fspi *f = dev_get_drvdata(dev);
-err_disable_clk:
nxp_fspi_clk_disable_unprep(f);
-err_put_ctrl:
- spi_controller_put(ctlr);
-
- dev_err(dev, "NXP FSPI probe failed\n");
- return ret;
+ return 0;
}
-static void nxp_fspi_remove(struct platform_device *pdev)
+static int nxp_fspi_runtime_resume(struct device *dev)
{
- struct nxp_fspi *f = platform_get_drvdata(pdev);
-
- /* disable the hardware */
- fspi_writel(f, FSPI_MCR0_MDIS, f->iobase + FSPI_MCR0);
+ struct nxp_fspi *f = dev_get_drvdata(dev);
+ int ret;
- nxp_fspi_clk_disable_unprep(f);
+ ret = nxp_fspi_clk_prep_enable(f);
+ if (ret)
+ return ret;
- mutex_destroy(&f->lock);
+ if (f->flags & FSPI_NEED_INIT) {
+ nxp_fspi_default_setup(f);
+ ret = pinctrl_pm_select_default_state(dev);
+ if (ret)
+ dev_err(dev, "select flexspi default pinctrl failed!\n");
+ f->flags &= ~FSPI_NEED_INIT;
+ }
- if (f->ahb_addr)
- iounmap(f->ahb_addr);
+ return ret;
}
static int nxp_fspi_suspend(struct device *dev)
{
- return 0;
-}
-
-static int nxp_fspi_resume(struct device *dev)
-{
struct nxp_fspi *f = dev_get_drvdata(dev);
+ int ret;
- nxp_fspi_default_setup(f);
+ ret = pinctrl_pm_select_sleep_state(dev);
+ if (ret) {
+ dev_err(dev, "select flexspi sleep pinctrl failed!\n");
+ return ret;
+ }
- return 0;
+ f->flags |= FSPI_NEED_INIT;
+
+ return pm_runtime_force_suspend(dev);
}
+static const struct dev_pm_ops nxp_fspi_pm_ops = {
+ RUNTIME_PM_OPS(nxp_fspi_runtime_suspend, nxp_fspi_runtime_resume, NULL)
+ SYSTEM_SLEEP_PM_OPS(nxp_fspi_suspend, pm_runtime_force_resume)
+};
+
static const struct of_device_id nxp_fspi_dt_ids[] = {
{ .compatible = "nxp,lx2160a-fspi", .data = (void *)&lx2160a_data, },
{ .compatible = "nxp,imx8mm-fspi", .data = (void *)&imx8mm_data, },
@@ -1324,20 +1357,14 @@ static const struct acpi_device_id nxp_fspi_acpi_ids[] = {
MODULE_DEVICE_TABLE(acpi, nxp_fspi_acpi_ids);
#endif
-static const struct dev_pm_ops nxp_fspi_pm_ops = {
- .suspend = nxp_fspi_suspend,
- .resume = nxp_fspi_resume,
-};
-
static struct platform_driver nxp_fspi_driver = {
.driver = {
.name = "nxp-fspi",
.of_match_table = nxp_fspi_dt_ids,
.acpi_match_table = ACPI_PTR(nxp_fspi_acpi_ids),
- .pm = &nxp_fspi_pm_ops,
+ .pm = pm_ptr(&nxp_fspi_pm_ops),
},
.probe = nxp_fspi_probe,
- .remove = nxp_fspi_remove,
};
module_platform_driver(nxp_fspi_driver);
diff --git a/drivers/spi/spi-offload.c b/drivers/spi/spi-offload.c
index 6bad042fe437..e674097bf3be 100644
--- a/drivers/spi/spi-offload.c
+++ b/drivers/spi/spi-offload.c
@@ -183,9 +183,6 @@ static struct spi_offload_trigger
guard(mutex)(&trigger->lock);
- if (!trigger->ops)
- return ERR_PTR(-ENODEV);
-
if (trigger->ops->request) {
ret = trigger->ops->request(trigger, type, args->args, args->nargs);
if (ret)
@@ -434,7 +431,7 @@ int devm_spi_offload_trigger_register(struct device *dev,
{
struct spi_offload_trigger *trigger;
- if (!info->fwnode || !info->ops)
+ if (!info->fwnode || !info->ops || !info->ops->match)
return -EINVAL;
trigger = kzalloc(sizeof(*trigger), GFP_KERNEL);
diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index fc98979eba48..330078b1d50f 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -741,21 +741,19 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
if (ret)
return -ENOMEM;
- ret = pci_request_regions(pdev, DRV_NAME);
+ ret = pcim_request_all_regions(pdev, DRV_NAME);
if (ret)
return -ENOMEM;
spi_bus->reg_base = pcim_iomap(pdev, 0, pci_resource_len(pdev, 0));
- if (!spi_bus->reg_base) {
- ret = -EINVAL;
- goto error;
- }
+ if (!spi_bus->reg_base)
+ return -EINVAL;
ret = pci_alloc_irq_vectors(pdev, hw_inst_cnt, hw_inst_cnt,
PCI_IRQ_ALL_TYPES);
if (ret < 0) {
dev_err(&pdev->dev, "Error allocating MSI vectors\n");
- goto error;
+ return ret;
}
init_completion(&spi_sub_ptr->spi_xfer_done);
@@ -773,13 +771,12 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
if (ret < 0) {
dev_err(&pdev->dev, "Unable to request irq : %d",
spi_sub_ptr->irq);
- ret = -ENODEV;
- goto error;
+ return -ENODEV;
}
ret = pci1xxxx_spi_dma_init(spi_bus, spi_sub_ptr->irq);
if (ret && ret != -EOPNOTSUPP)
- goto error;
+ return ret;
/* This register is only applicable for 1st instance */
regval = readl(spi_bus->reg_base + SPI_PCI_CTRL_REG_OFFSET(0));
@@ -808,8 +805,7 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
if (ret < 0) {
dev_err(&pdev->dev, "Unable to request irq : %d",
spi_sub_ptr->irq);
- ret = -ENODEV;
- goto error;
+ return -ENODEV;
}
}
@@ -828,15 +824,11 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
spi_controller_set_devdata(spi_host, spi_sub_ptr);
ret = devm_spi_register_controller(dev, spi_host);
if (ret)
- goto error;
+ return ret;
}
pci_set_drvdata(pdev, spi_bus);
return 0;
-
-error:
- pci_release_regions(pdev);
- return ret;
}
static void store_restore_config(struct pci1xxxx_spi *spi_ptr,
diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c
index 94948c8781e8..fd129650434f 100644
--- a/drivers/spi/spi-qpic-snand.c
+++ b/drivers/spi/spi-qpic-snand.c
@@ -116,7 +116,6 @@ struct qpic_spi_nand {
struct nand_ecc_engine ecc_eng;
u8 *data_buf;
u8 *oob_buf;
- u32 wlen;
__le32 addr1;
__le32 addr2;
__le32 cmd;
@@ -131,9 +130,9 @@ static void qcom_spi_set_read_loc_first(struct qcom_nand_controller *snandc,
int is_last_read_loc)
{
__le32 locreg_val;
- u32 val = (((cw_offset) << READ_LOCATION_OFFSET) |
- ((read_size) << READ_LOCATION_SIZE) | ((is_last_read_loc)
- << READ_LOCATION_LAST));
+ u32 val = FIELD_PREP(READ_LOCATION_OFFSET_MASK, cw_offset) |
+ FIELD_PREP(READ_LOCATION_SIZE_MASK, read_size) |
+ FIELD_PREP(READ_LOCATION_LAST_MASK, is_last_read_loc);
locreg_val = cpu_to_le32(val);
@@ -152,9 +151,9 @@ static void qcom_spi_set_read_loc_last(struct qcom_nand_controller *snandc,
int is_last_read_loc)
{
__le32 locreg_val;
- u32 val = (((cw_offset) << READ_LOCATION_OFFSET) |
- ((read_size) << READ_LOCATION_SIZE) | ((is_last_read_loc)
- << READ_LOCATION_LAST));
+ u32 val = FIELD_PREP(READ_LOCATION_OFFSET_MASK, cw_offset) |
+ FIELD_PREP(READ_LOCATION_SIZE_MASK, read_size) |
+ FIELD_PREP(READ_LOCATION_LAST_MASK, is_last_read_loc);
locreg_val = cpu_to_le32(val);
@@ -250,9 +249,11 @@ static const struct mtd_ooblayout_ops qcom_spi_ooblayout = {
static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand)
{
struct qcom_nand_controller *snandc = nand_to_qcom_snand(nand);
+ struct nand_ecc_props *reqs = &nand->ecc.requirements;
+ struct nand_ecc_props *user = &nand->ecc.user_conf;
struct nand_ecc_props *conf = &nand->ecc.ctx.conf;
struct mtd_info *mtd = nanddev_to_mtd(nand);
- int cwperpage, bad_block_byte;
+ int cwperpage, bad_block_byte, ret;
struct qpic_ecc *ecc_cfg;
cwperpage = mtd->writesize / NANDC_STEP_SIZE;
@@ -261,11 +262,39 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand)
ecc_cfg = kzalloc(sizeof(*ecc_cfg), GFP_KERNEL);
if (!ecc_cfg)
return -ENOMEM;
- snandc->qspi->oob_buf = kzalloc(mtd->writesize + mtd->oobsize,
+
+ if (user->step_size && user->strength) {
+ ecc_cfg->step_size = user->step_size;
+ ecc_cfg->strength = user->strength;
+ } else if (reqs->step_size && reqs->strength) {
+ ecc_cfg->step_size = reqs->step_size;
+ ecc_cfg->strength = reqs->strength;
+ } else {
+ /* use defaults */
+ ecc_cfg->step_size = NANDC_STEP_SIZE;
+ ecc_cfg->strength = 4;
+ }
+
+ if (ecc_cfg->step_size != NANDC_STEP_SIZE) {
+ dev_err(snandc->dev,
+ "only %u bytes ECC step size is supported\n",
+ NANDC_STEP_SIZE);
+ ret = -EOPNOTSUPP;
+ goto err_free_ecc_cfg;
+ }
+
+ if (ecc_cfg->strength != 4) {
+ dev_err(snandc->dev,
+ "only 4 bits ECC strength is supported\n");
+ ret = -EOPNOTSUPP;
+ goto err_free_ecc_cfg;
+ }
+
+ snandc->qspi->oob_buf = kmalloc(mtd->writesize + mtd->oobsize,
GFP_KERNEL);
if (!snandc->qspi->oob_buf) {
- kfree(ecc_cfg);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_free_ecc_cfg;
}
memset(snandc->qspi->oob_buf, 0xff, mtd->writesize + mtd->oobsize);
@@ -280,8 +309,6 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand)
ecc_cfg->bytes = ecc_cfg->ecc_bytes_hw + ecc_cfg->spare_bytes + ecc_cfg->bbm_size;
ecc_cfg->steps = 4;
- ecc_cfg->strength = 4;
- ecc_cfg->step_size = 512;
ecc_cfg->cw_data = 516;
ecc_cfg->cw_size = ecc_cfg->cw_data + ecc_cfg->bytes;
bad_block_byte = mtd->writesize - ecc_cfg->cw_size * (cwperpage - 1) + 1;
@@ -325,7 +352,7 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand)
FIELD_PREP(ECC_MODE_MASK, 0) |
FIELD_PREP(ECC_PARITY_SIZE_BYTES_BCH_MASK, ecc_cfg->ecc_bytes_hw);
- ecc_cfg->ecc_buf_cfg = 0x203 << NUM_STEPS;
+ ecc_cfg->ecc_buf_cfg = FIELD_PREP(NUM_STEPS_MASK, 0x203);
ecc_cfg->clrflashstatus = FS_READY_BSY_N;
ecc_cfg->clrreadstatus = 0xc0;
@@ -339,6 +366,10 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand)
ecc_cfg->strength, ecc_cfg->step_size);
return 0;
+
+err_free_ecc_cfg:
+ kfree(ecc_cfg);
+ return ret;
}
static void qcom_spi_ecc_cleanup_ctx_pipelined(struct nand_device *nand)
@@ -452,7 +483,8 @@ static int qcom_spi_block_erase(struct qcom_nand_controller *snandc)
snandc->regs->cmd = snandc->qspi->cmd;
snandc->regs->addr0 = snandc->qspi->addr1;
snandc->regs->addr1 = snandc->qspi->addr2;
- snandc->regs->cfg0 = cpu_to_le32(ecc_cfg->cfg0_raw & ~(7 << CW_PER_PAGE));
+ snandc->regs->cfg0 = cpu_to_le32((ecc_cfg->cfg0_raw & ~CW_PER_PAGE_MASK) |
+ FIELD_PREP(CW_PER_PAGE_MASK, 0));
snandc->regs->cfg1 = cpu_to_le32(ecc_cfg->cfg1_raw);
snandc->regs->exec = cpu_to_le32(1);
@@ -493,6 +525,22 @@ static void qcom_spi_config_single_cw_page_read(struct qcom_nand_controller *sna
qcom_read_reg_dma(snandc, NAND_FLASH_STATUS, 1, 0);
}
+static int qcom_spi_check_raw_flash_errors(struct qcom_nand_controller *snandc, int cw_cnt)
+{
+ int i;
+
+ qcom_nandc_dev_to_mem(snandc, true);
+
+ for (i = 0; i < cw_cnt; i++) {
+ u32 flash = le32_to_cpu(snandc->reg_read_buf[i]);
+
+ if (flash & (FS_OP_ERR | FS_MPU_ERR))
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int qcom_spi_read_last_cw(struct qcom_nand_controller *snandc,
const struct spi_mem_op *op)
{
@@ -513,8 +561,8 @@ static int qcom_spi_read_last_cw(struct qcom_nand_controller *snandc,
snandc->regs->addr0 = (snandc->qspi->addr1 | cpu_to_le32(col));
snandc->regs->addr1 = snandc->qspi->addr2;
- cfg0 = (ecc_cfg->cfg0_raw & ~(7U << CW_PER_PAGE)) |
- 0 << CW_PER_PAGE;
+ cfg0 = (ecc_cfg->cfg0_raw & ~CW_PER_PAGE_MASK) |
+ FIELD_PREP(CW_PER_PAGE_MASK, 0);
cfg1 = ecc_cfg->cfg1_raw;
ecc_bch_cfg = ECC_CFG_ECC_DISABLE;
@@ -538,11 +586,9 @@ static int qcom_spi_read_last_cw(struct qcom_nand_controller *snandc,
return ret;
}
- qcom_nandc_dev_to_mem(snandc, true);
- u32 flash = le32_to_cpu(snandc->reg_read_buf[0]);
-
- if (flash & (FS_OP_ERR | FS_MPU_ERR))
- return -EIO;
+ ret = qcom_spi_check_raw_flash_errors(snandc, 1);
+ if (ret)
+ return ret;
bbpos = mtd->writesize - ecc_cfg->cw_size * (num_cw - 1);
@@ -556,7 +602,7 @@ static int qcom_spi_read_last_cw(struct qcom_nand_controller *snandc,
return ret;
}
-static int qcom_spi_check_error(struct qcom_nand_controller *snandc, u8 *data_buf, u8 *oob_buf)
+static int qcom_spi_check_error(struct qcom_nand_controller *snandc)
{
struct snandc_read_status *buf;
struct qpic_ecc *ecc_cfg = snandc->qspi->ecc;
@@ -573,15 +619,6 @@ static int qcom_spi_check_error(struct qcom_nand_controller *snandc, u8 *data_bu
for (i = 0; i < num_cw; i++, buf++) {
u32 flash, buffer, erased_cw;
- int data_len, oob_len;
-
- if (i == (num_cw - 1)) {
- data_len = NANDC_STEP_SIZE - ((num_cw - 1) << 2);
- oob_len = num_cw << 2;
- } else {
- data_len = ecc_cfg->cw_data;
- oob_len = 0;
- }
flash = le32_to_cpu(buf->snandc_flash);
buffer = le32_to_cpu(buf->snandc_buffer);
@@ -605,11 +642,6 @@ static int qcom_spi_check_error(struct qcom_nand_controller *snandc, u8 *data_bu
snandc->qspi->ecc_stats.corrected += stat;
max_bitflips = max(max_bitflips, stat);
}
-
- if (data_buf)
- data_buf += data_len;
- if (oob_buf)
- oob_buf += oob_len + ecc_cfg->bytes;
}
if (flash_op_err)
@@ -623,22 +655,6 @@ static int qcom_spi_check_error(struct qcom_nand_controller *snandc, u8 *data_bu
return 0;
}
-static int qcom_spi_check_raw_flash_errors(struct qcom_nand_controller *snandc, int cw_cnt)
-{
- int i;
-
- qcom_nandc_dev_to_mem(snandc, true);
-
- for (i = 0; i < cw_cnt; i++) {
- u32 flash = le32_to_cpu(snandc->reg_read_buf[i]);
-
- if (flash & (FS_OP_ERR | FS_MPU_ERR))
- return -EIO;
- }
-
- return 0;
-}
-
static int qcom_spi_read_cw_raw(struct qcom_nand_controller *snandc, u8 *data_buf,
u8 *oob_buf, int cw)
{
@@ -656,8 +672,8 @@ static int qcom_spi_read_cw_raw(struct qcom_nand_controller *snandc, u8 *data_bu
qcom_clear_bam_transaction(snandc);
raw_cw = num_cw - 1;
- cfg0 = (ecc_cfg->cfg0_raw & ~(7U << CW_PER_PAGE)) |
- 0 << CW_PER_PAGE;
+ cfg0 = (ecc_cfg->cfg0_raw & ~CW_PER_PAGE_MASK) |
+ FIELD_PREP(CW_PER_PAGE_MASK, 0);
cfg1 = ecc_cfg->cfg1_raw;
ecc_bch_cfg = ECC_CFG_ECC_DISABLE;
@@ -763,22 +779,19 @@ static int qcom_spi_read_page_ecc(struct qcom_nand_controller *snandc,
const struct spi_mem_op *op)
{
struct qpic_ecc *ecc_cfg = snandc->qspi->ecc;
- u8 *data_buf = NULL, *data_buf_start, *oob_buf = NULL, *oob_buf_start;
+ u8 *data_buf = NULL, *oob_buf = NULL;
int ret, i;
u32 cfg0, cfg1, ecc_bch_cfg, num_cw = snandc->qspi->num_cw;
data_buf = op->data.buf.in;
- data_buf_start = data_buf;
-
oob_buf = snandc->qspi->oob_buf;
- oob_buf_start = oob_buf;
snandc->buf_count = 0;
snandc->buf_start = 0;
qcom_clear_read_regs(snandc);
- cfg0 = (ecc_cfg->cfg0 & ~(7U << CW_PER_PAGE)) |
- (num_cw - 1) << CW_PER_PAGE;
+ cfg0 = (ecc_cfg->cfg0 & ~CW_PER_PAGE_MASK) |
+ FIELD_PREP(CW_PER_PAGE_MASK, num_cw - 1);
cfg1 = ecc_cfg->cfg1;
ecc_bch_cfg = ecc_cfg->ecc_bch_cfg;
@@ -852,29 +865,26 @@ static int qcom_spi_read_page_ecc(struct qcom_nand_controller *snandc,
return ret;
}
- return qcom_spi_check_error(snandc, data_buf_start, oob_buf_start);
+ return qcom_spi_check_error(snandc);
}
static int qcom_spi_read_page_oob(struct qcom_nand_controller *snandc,
const struct spi_mem_op *op)
{
struct qpic_ecc *ecc_cfg = snandc->qspi->ecc;
- u8 *data_buf = NULL, *data_buf_start, *oob_buf = NULL, *oob_buf_start;
+ u8 *oob_buf = NULL;
int ret, i;
u32 cfg0, cfg1, ecc_bch_cfg, num_cw = snandc->qspi->num_cw;
oob_buf = op->data.buf.in;
- oob_buf_start = oob_buf;
-
- data_buf_start = data_buf;
snandc->buf_count = 0;
snandc->buf_start = 0;
qcom_clear_read_regs(snandc);
qcom_clear_bam_transaction(snandc);
- cfg0 = (ecc_cfg->cfg0 & ~(7U << CW_PER_PAGE)) |
- (num_cw - 1) << CW_PER_PAGE;
+ cfg0 = (ecc_cfg->cfg0 & ~CW_PER_PAGE_MASK) |
+ FIELD_PREP(CW_PER_PAGE_MASK, num_cw - 1);
cfg1 = ecc_cfg->cfg1;
ecc_bch_cfg = ecc_cfg->ecc_bch_cfg;
@@ -934,7 +944,7 @@ static int qcom_spi_read_page_oob(struct qcom_nand_controller *snandc,
return ret;
}
- return qcom_spi_check_error(snandc, data_buf_start, oob_buf_start);
+ return qcom_spi_check_error(snandc);
}
static int qcom_spi_read_page(struct qcom_nand_controller *snandc,
@@ -984,8 +994,8 @@ static int qcom_spi_program_raw(struct qcom_nand_controller *snandc,
int num_cw = snandc->qspi->num_cw;
u32 cfg0, cfg1, ecc_bch_cfg;
- cfg0 = (ecc_cfg->cfg0_raw & ~(7U << CW_PER_PAGE)) |
- (num_cw - 1) << CW_PER_PAGE;
+ cfg0 = (ecc_cfg->cfg0_raw & ~CW_PER_PAGE_MASK) |
+ FIELD_PREP(CW_PER_PAGE_MASK, num_cw - 1);
cfg1 = ecc_cfg->cfg1_raw;
ecc_bch_cfg = ECC_CFG_ECC_DISABLE;
@@ -1067,8 +1077,8 @@ static int qcom_spi_program_ecc(struct qcom_nand_controller *snandc,
int num_cw = snandc->qspi->num_cw;
u32 cfg0, cfg1, ecc_bch_cfg, ecc_buf_cfg;
- cfg0 = (ecc_cfg->cfg0 & ~(7U << CW_PER_PAGE)) |
- (num_cw - 1) << CW_PER_PAGE;
+ cfg0 = (ecc_cfg->cfg0 & ~CW_PER_PAGE_MASK) |
+ FIELD_PREP(CW_PER_PAGE_MASK, num_cw - 1);
cfg1 = ecc_cfg->cfg1;
ecc_bch_cfg = ecc_cfg->ecc_bch_cfg;
ecc_buf_cfg = ecc_cfg->ecc_buf_cfg;
@@ -1144,8 +1154,8 @@ static int qcom_spi_program_oob(struct qcom_nand_controller *snandc,
int num_cw = snandc->qspi->num_cw;
u32 cfg0, cfg1, ecc_bch_cfg, ecc_buf_cfg;
- cfg0 = (ecc_cfg->cfg0 & ~(7U << CW_PER_PAGE)) |
- (num_cw - 1) << CW_PER_PAGE;
+ cfg0 = (ecc_cfg->cfg0 & ~CW_PER_PAGE_MASK) |
+ FIELD_PREP(CW_PER_PAGE_MASK, num_cw - 1);
cfg1 = ecc_cfg->cfg1;
ecc_bch_cfg = ecc_cfg->ecc_bch_cfg;
ecc_buf_cfg = ecc_cfg->ecc_buf_cfg;
@@ -1374,8 +1384,10 @@ static int qcom_spi_io_op(struct qcom_nand_controller *snandc, const struct spi_
}
ret = qcom_submit_descs(snandc);
- if (ret)
+ if (ret) {
dev_err(snandc->dev, "failure in submitting descriptor for:%d\n", opcode);
+ return ret;
+ }
if (copy) {
qcom_nandc_dev_to_mem(snandc, true);
@@ -1389,7 +1401,7 @@ static int qcom_spi_io_op(struct qcom_nand_controller *snandc, const struct spi_
memcpy(op->data.buf.in, &val, snandc->buf_count);
}
- return ret;
+ return 0;
}
static bool qcom_spi_is_page_op(const struct spi_mem_op *op)
diff --git a/drivers/spi/spi-rpc-if.c b/drivers/spi/spi-rpc-if.c
index e0c66a24a3cb..627cffea5d5c 100644
--- a/drivers/spi/spi-rpc-if.c
+++ b/drivers/spi/spi-rpc-if.c
@@ -75,6 +75,19 @@ static bool rpcif_spi_mem_supports_op(struct spi_mem *mem,
return true;
}
+static ssize_t xspi_spi_mem_dirmap_write(struct spi_mem_dirmap_desc *desc,
+ u64 offs, size_t len, const void *buf)
+{
+ struct rpcif *rpc = spi_controller_get_devdata(desc->mem->spi->controller);
+
+ if (offs + desc->info.offset + len > U32_MAX)
+ return -EINVAL;
+
+ rpcif_spi_mem_prepare(desc->mem->spi, &desc->info.op_tmpl, &offs, &len);
+
+ return xspi_dirmap_write(rpc->dev, offs, len, buf);
+}
+
static ssize_t rpcif_spi_mem_dirmap_read(struct spi_mem_dirmap_desc *desc,
u64 offs, size_t len, void *buf)
{
@@ -103,7 +116,7 @@ static int rpcif_spi_mem_dirmap_create(struct spi_mem_dirmap_desc *desc)
if (!rpc->dirmap)
return -EOPNOTSUPP;
- if (desc->info.op_tmpl.data.dir != SPI_MEM_DATA_IN)
+ if (!rpc->xspi && desc->info.op_tmpl.data.dir != SPI_MEM_DATA_IN)
return -EOPNOTSUPP;
return 0;
@@ -125,6 +138,7 @@ static const struct spi_controller_mem_ops rpcif_spi_mem_ops = {
.exec_op = rpcif_spi_mem_exec_op,
.dirmap_create = rpcif_spi_mem_dirmap_create,
.dirmap_read = rpcif_spi_mem_dirmap_read,
+ .dirmap_write = xspi_spi_mem_dirmap_write,
};
static int rpcif_spi_probe(struct platform_device *pdev)
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 8a98c313548e..94a867967e02 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_graph.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/sh_dma.h>
@@ -62,135 +63,6 @@ struct sh_msiof_spi_priv {
#define MAX_SS 3 /* Maximum number of native chip selects */
-#define SITMDR1 0x00 /* Transmit Mode Register 1 */
-#define SITMDR2 0x04 /* Transmit Mode Register 2 */
-#define SITMDR3 0x08 /* Transmit Mode Register 3 */
-#define SIRMDR1 0x10 /* Receive Mode Register 1 */
-#define SIRMDR2 0x14 /* Receive Mode Register 2 */
-#define SIRMDR3 0x18 /* Receive Mode Register 3 */
-#define SITSCR 0x20 /* Transmit Clock Select Register */
-#define SIRSCR 0x22 /* Receive Clock Select Register (SH, A1, APE6) */
-#define SICTR 0x28 /* Control Register */
-#define SIFCTR 0x30 /* FIFO Control Register */
-#define SISTR 0x40 /* Status Register */
-#define SIIER 0x44 /* Interrupt Enable Register */
-#define SITDR1 0x48 /* Transmit Control Data Register 1 (SH, A1) */
-#define SITDR2 0x4c /* Transmit Control Data Register 2 (SH, A1) */
-#define SITFDR 0x50 /* Transmit FIFO Data Register */
-#define SIRDR1 0x58 /* Receive Control Data Register 1 (SH, A1) */
-#define SIRDR2 0x5c /* Receive Control Data Register 2 (SH, A1) */
-#define SIRFDR 0x60 /* Receive FIFO Data Register */
-
-/* SITMDR1 and SIRMDR1 */
-#define SIMDR1_TRMD BIT(31) /* Transfer Mode (1 = Master mode) */
-#define SIMDR1_SYNCMD_MASK GENMASK(29, 28) /* SYNC Mode */
-#define SIMDR1_SYNCMD_SPI (2 << 28) /* Level mode/SPI */
-#define SIMDR1_SYNCMD_LR (3 << 28) /* L/R mode */
-#define SIMDR1_SYNCAC_SHIFT 25 /* Sync Polarity (1 = Active-low) */
-#define SIMDR1_BITLSB_SHIFT 24 /* MSB/LSB First (1 = LSB first) */
-#define SIMDR1_DTDL_SHIFT 20 /* Data Pin Bit Delay for MSIOF_SYNC */
-#define SIMDR1_SYNCDL_SHIFT 16 /* Frame Sync Signal Timing Delay */
-#define SIMDR1_FLD_MASK GENMASK(3, 2) /* Frame Sync Signal Interval (0-3) */
-#define SIMDR1_FLD_SHIFT 2
-#define SIMDR1_XXSTP BIT(0) /* Transmission/Reception Stop on FIFO */
-/* SITMDR1 */
-#define SITMDR1_PCON BIT(30) /* Transfer Signal Connection */
-#define SITMDR1_SYNCCH_MASK GENMASK(27, 26) /* Sync Signal Channel Select */
-#define SITMDR1_SYNCCH_SHIFT 26 /* 0=MSIOF_SYNC, 1=MSIOF_SS1, 2=MSIOF_SS2 */
-
-/* SITMDR2 and SIRMDR2 */
-#define SIMDR2_BITLEN1(i) (((i) - 1) << 24) /* Data Size (8-32 bits) */
-#define SIMDR2_WDLEN1(i) (((i) - 1) << 16) /* Word Count (1-64/256 (SH, A1))) */
-#define SIMDR2_GRPMASK1 BIT(0) /* Group Output Mask 1 (SH, A1) */
-
-/* SITSCR and SIRSCR */
-#define SISCR_BRPS_MASK GENMASK(12, 8) /* Prescaler Setting (1-32) */
-#define SISCR_BRPS(i) (((i) - 1) << 8)
-#define SISCR_BRDV_MASK GENMASK(2, 0) /* Baud Rate Generator's Division Ratio */
-#define SISCR_BRDV_DIV_2 0
-#define SISCR_BRDV_DIV_4 1
-#define SISCR_BRDV_DIV_8 2
-#define SISCR_BRDV_DIV_16 3
-#define SISCR_BRDV_DIV_32 4
-#define SISCR_BRDV_DIV_1 7
-
-/* SICTR */
-#define SICTR_TSCKIZ_MASK GENMASK(31, 30) /* Transmit Clock I/O Polarity Select */
-#define SICTR_TSCKIZ_SCK BIT(31) /* Disable SCK when TX disabled */
-#define SICTR_TSCKIZ_POL_SHIFT 30 /* Transmit Clock Polarity */
-#define SICTR_RSCKIZ_MASK GENMASK(29, 28) /* Receive Clock Polarity Select */
-#define SICTR_RSCKIZ_SCK BIT(29) /* Must match CTR_TSCKIZ_SCK */
-#define SICTR_RSCKIZ_POL_SHIFT 28 /* Receive Clock Polarity */
-#define SICTR_TEDG_SHIFT 27 /* Transmit Timing (1 = falling edge) */
-#define SICTR_REDG_SHIFT 26 /* Receive Timing (1 = falling edge) */
-#define SICTR_TXDIZ_MASK GENMASK(23, 22) /* Pin Output When TX is Disabled */
-#define SICTR_TXDIZ_LOW (0 << 22) /* 0 */
-#define SICTR_TXDIZ_HIGH (1 << 22) /* 1 */
-#define SICTR_TXDIZ_HIZ (2 << 22) /* High-impedance */
-#define SICTR_TSCKE BIT(15) /* Transmit Serial Clock Output Enable */
-#define SICTR_TFSE BIT(14) /* Transmit Frame Sync Signal Output Enable */
-#define SICTR_TXE BIT(9) /* Transmit Enable */
-#define SICTR_RXE BIT(8) /* Receive Enable */
-#define SICTR_TXRST BIT(1) /* Transmit Reset */
-#define SICTR_RXRST BIT(0) /* Receive Reset */
-
-/* SIFCTR */
-#define SIFCTR_TFWM_MASK GENMASK(31, 29) /* Transmit FIFO Watermark */
-#define SIFCTR_TFWM_64 (0UL << 29) /* Transfer Request when 64 empty stages */
-#define SIFCTR_TFWM_32 (1UL << 29) /* Transfer Request when 32 empty stages */
-#define SIFCTR_TFWM_24 (2UL << 29) /* Transfer Request when 24 empty stages */
-#define SIFCTR_TFWM_16 (3UL << 29) /* Transfer Request when 16 empty stages */
-#define SIFCTR_TFWM_12 (4UL << 29) /* Transfer Request when 12 empty stages */
-#define SIFCTR_TFWM_8 (5UL << 29) /* Transfer Request when 8 empty stages */
-#define SIFCTR_TFWM_4 (6UL << 29) /* Transfer Request when 4 empty stages */
-#define SIFCTR_TFWM_1 (7UL << 29) /* Transfer Request when 1 empty stage */
-#define SIFCTR_TFUA_MASK GENMASK(26, 20) /* Transmit FIFO Usable Area */
-#define SIFCTR_TFUA_SHIFT 20
-#define SIFCTR_TFUA(i) ((i) << SIFCTR_TFUA_SHIFT)
-#define SIFCTR_RFWM_MASK GENMASK(15, 13) /* Receive FIFO Watermark */
-#define SIFCTR_RFWM_1 (0 << 13) /* Transfer Request when 1 valid stages */
-#define SIFCTR_RFWM_4 (1 << 13) /* Transfer Request when 4 valid stages */
-#define SIFCTR_RFWM_8 (2 << 13) /* Transfer Request when 8 valid stages */
-#define SIFCTR_RFWM_16 (3 << 13) /* Transfer Request when 16 valid stages */
-#define SIFCTR_RFWM_32 (4 << 13) /* Transfer Request when 32 valid stages */
-#define SIFCTR_RFWM_64 (5 << 13) /* Transfer Request when 64 valid stages */
-#define SIFCTR_RFWM_128 (6 << 13) /* Transfer Request when 128 valid stages */
-#define SIFCTR_RFWM_256 (7 << 13) /* Transfer Request when 256 valid stages */
-#define SIFCTR_RFUA_MASK GENMASK(12, 4) /* Receive FIFO Usable Area (0x40 = full) */
-#define SIFCTR_RFUA_SHIFT 4
-#define SIFCTR_RFUA(i) ((i) << SIFCTR_RFUA_SHIFT)
-
-/* SISTR */
-#define SISTR_TFEMP BIT(29) /* Transmit FIFO Empty */
-#define SISTR_TDREQ BIT(28) /* Transmit Data Transfer Request */
-#define SISTR_TEOF BIT(23) /* Frame Transmission End */
-#define SISTR_TFSERR BIT(21) /* Transmit Frame Synchronization Error */
-#define SISTR_TFOVF BIT(20) /* Transmit FIFO Overflow */
-#define SISTR_TFUDF BIT(19) /* Transmit FIFO Underflow */
-#define SISTR_RFFUL BIT(13) /* Receive FIFO Full */
-#define SISTR_RDREQ BIT(12) /* Receive Data Transfer Request */
-#define SISTR_REOF BIT(7) /* Frame Reception End */
-#define SISTR_RFSERR BIT(5) /* Receive Frame Synchronization Error */
-#define SISTR_RFUDF BIT(4) /* Receive FIFO Underflow */
-#define SISTR_RFOVF BIT(3) /* Receive FIFO Overflow */
-
-/* SIIER */
-#define SIIER_TDMAE BIT(31) /* Transmit Data DMA Transfer Req. Enable */
-#define SIIER_TFEMPE BIT(29) /* Transmit FIFO Empty Enable */
-#define SIIER_TDREQE BIT(28) /* Transmit Data Transfer Request Enable */
-#define SIIER_TEOFE BIT(23) /* Frame Transmission End Enable */
-#define SIIER_TFSERRE BIT(21) /* Transmit Frame Sync Error Enable */
-#define SIIER_TFOVFE BIT(20) /* Transmit FIFO Overflow Enable */
-#define SIIER_TFUDFE BIT(19) /* Transmit FIFO Underflow Enable */
-#define SIIER_RDMAE BIT(15) /* Receive Data DMA Transfer Req. Enable */
-#define SIIER_RFFULE BIT(13) /* Receive FIFO Full Enable */
-#define SIIER_RDREQE BIT(12) /* Receive Data Transfer Request Enable */
-#define SIIER_REOFE BIT(7) /* Frame Reception End Enable */
-#define SIIER_RFSERRE BIT(5) /* Receive Frame Sync Error Enable */
-#define SIIER_RFUDFE BIT(4) /* Receive FIFO Underflow Enable */
-#define SIIER_RFOVFE BIT(3) /* Receive FIFO Overflow Enable */
-
-
static u32 sh_msiof_read(struct sh_msiof_spi_priv *p, int reg_offs)
{
switch (reg_offs) {
@@ -255,11 +127,6 @@ static void sh_msiof_spi_reset_regs(struct sh_msiof_spi_priv *p)
100);
}
-static const u32 sh_msiof_spi_div_array[] = {
- SISCR_BRDV_DIV_1, SISCR_BRDV_DIV_2, SISCR_BRDV_DIV_4,
- SISCR_BRDV_DIV_8, SISCR_BRDV_DIV_16, SISCR_BRDV_DIV_32,
-};
-
static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p,
struct spi_transfer *t)
{
@@ -298,7 +165,9 @@ static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p,
t->effective_speed_hz = parent_rate / (brps << div_pow);
- scr = sh_msiof_spi_div_array[div_pow] | SISCR_BRPS(brps);
+ /* div_pow == 0 maps to SISCR_BRDV_DIV_1 == all ones */
+ scr = FIELD_PREP(SISCR_BRDV, div_pow - 1) |
+ FIELD_PREP(SISCR_BRPS, brps - 1);
sh_msiof_write(p, SITSCR, scr);
if (!(p->ctlr->flags & SPI_CONTROLLER_MUST_TX))
sh_msiof_write(p, SIRSCR, scr);
@@ -340,18 +209,19 @@ static u32 sh_msiof_spi_get_dtdl_and_syncdl(struct sh_msiof_spi_priv *p)
return 0;
}
- val = sh_msiof_get_delay_bit(p->info->dtdl) << SIMDR1_DTDL_SHIFT;
- val |= sh_msiof_get_delay_bit(p->info->syncdl) << SIMDR1_SYNCDL_SHIFT;
+ val = FIELD_PREP(SIMDR1_DTDL, sh_msiof_get_delay_bit(p->info->dtdl)) |
+ FIELD_PREP(SIMDR1_SYNCDL,
+ sh_msiof_get_delay_bit(p->info->syncdl));
return val;
}
static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p, u32 ss,
- u32 cpol, u32 cpha,
- u32 tx_hi_z, u32 lsb_first, u32 cs_high)
+ bool cpol, bool cpha, bool tx_hi_z,
+ bool lsb_first, bool cs_high)
{
+ bool edge;
u32 tmp;
- int edge;
/*
* CPOL CPHA TSCKIZ RSCKIZ TEDG REDG
@@ -360,16 +230,18 @@ static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p, u32 ss,
* 1 0 11 11 0 0
* 1 1 11 11 1 1
*/
- tmp = SIMDR1_SYNCMD_SPI | 1 << SIMDR1_FLD_SHIFT | SIMDR1_XXSTP;
- tmp |= !cs_high << SIMDR1_SYNCAC_SHIFT;
- tmp |= lsb_first << SIMDR1_BITLSB_SHIFT;
+ tmp = FIELD_PREP(SIMDR1_SYNCMD, SIMDR1_SYNCMD_SPI) |
+ FIELD_PREP(SIMDR1_FLD, 1) | SIMDR1_XXSTP |
+ FIELD_PREP(SIMDR1_SYNCAC, !cs_high) |
+ FIELD_PREP(SIMDR1_BITLSB, lsb_first);
tmp |= sh_msiof_spi_get_dtdl_and_syncdl(p);
if (spi_controller_is_target(p->ctlr)) {
sh_msiof_write(p, SITMDR1, tmp | SITMDR1_PCON);
} else {
sh_msiof_write(p, SITMDR1,
tmp | SIMDR1_TRMD | SITMDR1_PCON |
- (ss < MAX_SS ? ss : 0) << SITMDR1_SYNCCH_SHIFT);
+ FIELD_PREP(SITMDR1_SYNCCH,
+ ss < MAX_SS ? ss : 0));
}
if (p->ctlr->flags & SPI_CONTROLLER_MUST_TX) {
/* These bits are reserved if RX needs TX */
@@ -378,30 +250,42 @@ static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p, u32 ss,
sh_msiof_write(p, SIRMDR1, tmp);
tmp = 0;
- tmp |= SICTR_TSCKIZ_SCK | cpol << SICTR_TSCKIZ_POL_SHIFT;
- tmp |= SICTR_RSCKIZ_SCK | cpol << SICTR_RSCKIZ_POL_SHIFT;
+ tmp |= SICTR_TSCKIZ_SCK | FIELD_PREP(SICTR_TSCKIZ_POL, cpol);
+ tmp |= SICTR_RSCKIZ_SCK | FIELD_PREP(SICTR_RSCKIZ_POL, cpol);
edge = cpol ^ !cpha;
- tmp |= edge << SICTR_TEDG_SHIFT;
- tmp |= edge << SICTR_REDG_SHIFT;
- tmp |= tx_hi_z ? SICTR_TXDIZ_HIZ : SICTR_TXDIZ_LOW;
+ tmp |= FIELD_PREP(SICTR_TEDG, edge);
+ tmp |= FIELD_PREP(SICTR_REDG, edge);
+ tmp |= FIELD_PREP(SICTR_TXDIZ,
+ tx_hi_z ? SICTR_TXDIZ_HIZ : SICTR_TXDIZ_LOW);
sh_msiof_write(p, SICTR, tmp);
}
static void sh_msiof_spi_set_mode_regs(struct sh_msiof_spi_priv *p,
const void *tx_buf, void *rx_buf,
- u32 bits, u32 words)
+ u32 bits, u32 words1, u32 words2)
{
- u32 dr2 = SIMDR2_BITLEN1(bits) | SIMDR2_WDLEN1(words);
+ u32 dr2 = FIELD_PREP(SIMDR2_GRP, words2 ? 1 : 0) |
+ FIELD_PREP(SIMDR2_BITLEN1, bits - 1) |
+ FIELD_PREP(SIMDR2_WDLEN1, words1 - 1);
if (tx_buf || (p->ctlr->flags & SPI_CONTROLLER_MUST_TX))
sh_msiof_write(p, SITMDR2, dr2);
else
- sh_msiof_write(p, SITMDR2, dr2 | SIMDR2_GRPMASK1);
+ sh_msiof_write(p, SITMDR2, dr2 | SIMDR2_GRPMASK);
if (rx_buf)
sh_msiof_write(p, SIRMDR2, dr2);
+
+ if (words2) {
+ u32 dr3 = FIELD_PREP(SIMDR3_BITLEN2, bits - 1) |
+ FIELD_PREP(SIMDR3_WDLEN2, words2 - 1);
+
+ sh_msiof_write(p, SITMDR3, dr3);
+ if (rx_buf)
+ sh_msiof_write(p, SIRMDR3, dr3);
+ }
}
static void sh_msiof_reset_str(struct sh_msiof_spi_priv *p)
@@ -411,140 +295,154 @@ static void sh_msiof_reset_str(struct sh_msiof_spi_priv *p)
}
static void sh_msiof_spi_write_fifo_8(struct sh_msiof_spi_priv *p,
- const void *tx_buf, int words, int fs)
+ const void *tx_buf, unsigned int words,
+ unsigned int fs)
{
const u8 *buf_8 = tx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
sh_msiof_write(p, SITFDR, buf_8[k] << fs);
}
static void sh_msiof_spi_write_fifo_16(struct sh_msiof_spi_priv *p,
- const void *tx_buf, int words, int fs)
+ const void *tx_buf, unsigned int words,
+ unsigned int fs)
{
const u16 *buf_16 = tx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
sh_msiof_write(p, SITFDR, buf_16[k] << fs);
}
static void sh_msiof_spi_write_fifo_16u(struct sh_msiof_spi_priv *p,
- const void *tx_buf, int words, int fs)
+ const void *tx_buf, unsigned int words,
+ unsigned int fs)
{
const u16 *buf_16 = tx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
sh_msiof_write(p, SITFDR, get_unaligned(&buf_16[k]) << fs);
}
static void sh_msiof_spi_write_fifo_32(struct sh_msiof_spi_priv *p,
- const void *tx_buf, int words, int fs)
+ const void *tx_buf, unsigned int words,
+ unsigned int fs)
{
const u32 *buf_32 = tx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
sh_msiof_write(p, SITFDR, buf_32[k] << fs);
}
static void sh_msiof_spi_write_fifo_32u(struct sh_msiof_spi_priv *p,
- const void *tx_buf, int words, int fs)
+ const void *tx_buf, unsigned int words,
+ unsigned int fs)
{
const u32 *buf_32 = tx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
sh_msiof_write(p, SITFDR, get_unaligned(&buf_32[k]) << fs);
}
static void sh_msiof_spi_write_fifo_s32(struct sh_msiof_spi_priv *p,
- const void *tx_buf, int words, int fs)
+ const void *tx_buf, unsigned int words,
+ unsigned int fs)
{
const u32 *buf_32 = tx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
sh_msiof_write(p, SITFDR, swab32(buf_32[k] << fs));
}
static void sh_msiof_spi_write_fifo_s32u(struct sh_msiof_spi_priv *p,
- const void *tx_buf, int words, int fs)
+ const void *tx_buf,
+ unsigned int words, unsigned int fs)
{
const u32 *buf_32 = tx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
sh_msiof_write(p, SITFDR, swab32(get_unaligned(&buf_32[k]) << fs));
}
static void sh_msiof_spi_read_fifo_8(struct sh_msiof_spi_priv *p,
- void *rx_buf, int words, int fs)
+ void *rx_buf, unsigned int words,
+ unsigned int fs)
{
u8 *buf_8 = rx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
buf_8[k] = sh_msiof_read(p, SIRFDR) >> fs;
}
static void sh_msiof_spi_read_fifo_16(struct sh_msiof_spi_priv *p,
- void *rx_buf, int words, int fs)
+ void *rx_buf, unsigned int words,
+ unsigned int fs)
{
u16 *buf_16 = rx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
buf_16[k] = sh_msiof_read(p, SIRFDR) >> fs;
}
static void sh_msiof_spi_read_fifo_16u(struct sh_msiof_spi_priv *p,
- void *rx_buf, int words, int fs)
+ void *rx_buf, unsigned int words,
+ unsigned int fs)
{
u16 *buf_16 = rx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
put_unaligned(sh_msiof_read(p, SIRFDR) >> fs, &buf_16[k]);
}
static void sh_msiof_spi_read_fifo_32(struct sh_msiof_spi_priv *p,
- void *rx_buf, int words, int fs)
+ void *rx_buf, unsigned int words,
+ unsigned int fs)
{
u32 *buf_32 = rx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
buf_32[k] = sh_msiof_read(p, SIRFDR) >> fs;
}
static void sh_msiof_spi_read_fifo_32u(struct sh_msiof_spi_priv *p,
- void *rx_buf, int words, int fs)
+ void *rx_buf, unsigned int words,
+ unsigned int fs)
{
u32 *buf_32 = rx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
put_unaligned(sh_msiof_read(p, SIRFDR) >> fs, &buf_32[k]);
}
static void sh_msiof_spi_read_fifo_s32(struct sh_msiof_spi_priv *p,
- void *rx_buf, int words, int fs)
+ void *rx_buf, unsigned int words,
+ unsigned int fs)
{
u32 *buf_32 = rx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
buf_32[k] = swab32(sh_msiof_read(p, SIRFDR) >> fs);
}
static void sh_msiof_spi_read_fifo_s32u(struct sh_msiof_spi_priv *p,
- void *rx_buf, int words, int fs)
+ void *rx_buf, unsigned int words,
+ unsigned int fs)
{
u32 *buf_32 = rx_buf;
- int k;
+ unsigned int k;
for (k = 0; k < words; k++)
put_unaligned(swab32(sh_msiof_read(p, SIRFDR) >> fs), &buf_32[k]);
@@ -564,12 +462,12 @@ static int sh_msiof_spi_setup(struct spi_device *spi)
return 0;
/* Configure native chip select mode/polarity early */
- clr = SIMDR1_SYNCMD_MASK;
- set = SIMDR1_SYNCMD_SPI;
+ clr = SIMDR1_SYNCMD;
+ set = FIELD_PREP(SIMDR1_SYNCMD, SIMDR1_SYNCMD_SPI);
if (spi->mode & SPI_CS_HIGH)
- clr |= BIT(SIMDR1_SYNCAC_SHIFT);
+ clr |= SIMDR1_SYNCAC;
else
- set |= BIT(SIMDR1_SYNCAC_SHIFT);
+ set |= SIMDR1_SYNCAC;
pm_runtime_get_sync(&p->pdev->dev);
tmp = sh_msiof_read(p, SITMDR1) & ~clr;
sh_msiof_write(p, SITMDR1, tmp | set | SIMDR1_TRMD | SITMDR1_PCON);
@@ -586,7 +484,8 @@ static int sh_msiof_prepare_message(struct spi_controller *ctlr,
{
struct sh_msiof_spi_priv *p = spi_controller_get_devdata(ctlr);
const struct spi_device *spi = msg->spi;
- u32 ss, cs_high;
+ bool cs_high;
+ u32 ss;
/* Configure pins before asserting CS */
if (spi_get_csgpiod(spi, 0)) {
@@ -594,12 +493,11 @@ static int sh_msiof_prepare_message(struct spi_controller *ctlr,
cs_high = p->native_cs_high;
} else {
ss = spi_get_chipselect(spi, 0);
- cs_high = !!(spi->mode & SPI_CS_HIGH);
+ cs_high = spi->mode & SPI_CS_HIGH;
}
- sh_msiof_spi_set_pin_regs(p, ss, !!(spi->mode & SPI_CPOL),
- !!(spi->mode & SPI_CPHA),
- !!(spi->mode & SPI_3WIRE),
- !!(spi->mode & SPI_LSB_FIRST), cs_high);
+ sh_msiof_spi_set_pin_regs(p, ss, spi->mode & SPI_CPOL,
+ spi->mode & SPI_CPHA, spi->mode & SPI_3WIRE,
+ spi->mode & SPI_LSB_FIRST, cs_high);
return 0;
}
@@ -672,20 +570,22 @@ static int sh_msiof_wait_for_completion(struct sh_msiof_spi_priv *p,
static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p,
void (*tx_fifo)(struct sh_msiof_spi_priv *,
- const void *, int, int),
+ const void *, unsigned int,
+ unsigned int),
void (*rx_fifo)(struct sh_msiof_spi_priv *,
- void *, int, int),
+ void *, unsigned int,
+ unsigned int),
const void *tx_buf, void *rx_buf,
- int words, int bits)
+ unsigned int words, unsigned int bits)
{
- int fifo_shift;
+ unsigned int fifo_shift;
int ret;
/* limit maximum word transfer to rx/tx fifo size */
if (tx_buf)
- words = min_t(int, words, p->tx_fifo_size);
+ words = min(words, p->tx_fifo_size);
if (rx_buf)
- words = min_t(int, words, p->rx_fifo_size);
+ words = min(words, p->rx_fifo_size);
/* the fifo contents need shifting */
fifo_shift = 32 - bits;
@@ -694,7 +594,7 @@ static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p,
sh_msiof_write(p, SIFCTR, 0);
/* setup msiof transfer mode registers */
- sh_msiof_spi_set_mode_regs(p, tx_buf, rx_buf, bits, words);
+ sh_msiof_spi_set_mode_regs(p, tx_buf, rx_buf, bits, words, 0);
sh_msiof_write(p, SIIER, SIIER_TEOFE | SIIER_REOFE);
/* write tx fifo */
@@ -744,10 +644,12 @@ static void sh_msiof_dma_complete(void *arg)
}
static int sh_msiof_dma_once(struct sh_msiof_spi_priv *p, const void *tx,
- void *rx, unsigned int len)
+ void *rx, unsigned int len,
+ unsigned int max_wdlen)
{
u32 ier_bits = 0;
struct dma_async_tx_descriptor *desc_tx = NULL, *desc_rx = NULL;
+ unsigned int words1, words2;
dma_cookie_t cookie;
int ret;
@@ -789,10 +691,14 @@ static int sh_msiof_dma_once(struct sh_msiof_spi_priv *p, const void *tx,
}
/* 1 stage FIFO watermarks for DMA */
- sh_msiof_write(p, SIFCTR, SIFCTR_TFWM_1 | SIFCTR_RFWM_1);
+ sh_msiof_write(p, SIFCTR,
+ FIELD_PREP(SIFCTR_TFWM, SIFCTR_TFWM_1) |
+ FIELD_PREP(SIFCTR_RFWM, SIFCTR_RFWM_1));
/* setup msiof transfer mode registers (32-bit words) */
- sh_msiof_spi_set_mode_regs(p, tx, rx, 32, len / 4);
+ words1 = min(len / 4, max_wdlen);
+ words2 = len / 4 - words1;
+ sh_msiof_spi_set_mode_regs(p, tx, rx, 32, words1, words2);
sh_msiof_write(p, SIIER, ier_bits);
@@ -911,9 +817,12 @@ static int sh_msiof_transfer_one(struct spi_controller *ctlr,
struct spi_transfer *t)
{
struct sh_msiof_spi_priv *p = spi_controller_get_devdata(ctlr);
+ unsigned int max_wdlen = FIELD_MAX(SIMDR2_WDLEN1) + 1;
void (*copy32)(u32 *, const u32 *, unsigned int);
- void (*tx_fifo)(struct sh_msiof_spi_priv *, const void *, int, int);
- void (*rx_fifo)(struct sh_msiof_spi_priv *, void *, int, int);
+ void (*tx_fifo)(struct sh_msiof_spi_priv *, const void *, unsigned int,
+ unsigned int);
+ void (*rx_fifo)(struct sh_msiof_spi_priv *, void *, unsigned int,
+ unsigned int);
const void *tx_buf = t->tx_buf;
void *rx_buf = t->rx_buf;
unsigned int len = t->len;
@@ -931,17 +840,17 @@ static int sh_msiof_transfer_one(struct spi_controller *ctlr,
if (!spi_controller_is_target(p->ctlr))
sh_msiof_spi_set_clk_regs(p, t);
+ if (tx_buf)
+ max_wdlen = min(max_wdlen, p->tx_fifo_size);
+ if (rx_buf)
+ max_wdlen = min(max_wdlen, p->rx_fifo_size);
+
while (ctlr->dma_tx && len > 15) {
/*
* DMA supports 32-bit words only, hence pack 8-bit and 16-bit
* words, with byte resp. word swapping.
*/
- unsigned int l = 0;
-
- if (tx_buf)
- l = min(round_down(len, 4), p->tx_fifo_size * 4);
- if (rx_buf)
- l = min(round_down(len, 4), p->rx_fifo_size * 4);
+ unsigned int l = min(round_down(len, 4), 2 * max_wdlen * 4);
if (bits <= 8) {
copy32 = copy_bswap32;
@@ -954,7 +863,7 @@ static int sh_msiof_transfer_one(struct spi_controller *ctlr,
if (tx_buf)
copy32(p->tx_dma_page, tx_buf, l / 4);
- ret = sh_msiof_dma_once(p, tx_buf, rx_buf, l);
+ ret = sh_msiof_dma_once(p, tx_buf, rx_buf, l, max_wdlen);
if (ret == -EAGAIN) {
dev_warn_once(&p->pdev->dev,
"DMA not available, falling back to PIO\n");
@@ -1061,7 +970,7 @@ static const struct sh_msiof_chipdata rcar_gen2_data = {
.bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16) |
SPI_BPW_MASK(24) | SPI_BPW_MASK(32),
.tx_fifo_size = 64,
- .rx_fifo_size = 64,
+ .rx_fifo_size = 128,
.ctlr_flags = SPI_CONTROLLER_MUST_TX,
.min_div_pow = 0,
};
@@ -1070,7 +979,16 @@ static const struct sh_msiof_chipdata rcar_gen3_data = {
.bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16) |
SPI_BPW_MASK(24) | SPI_BPW_MASK(32),
.tx_fifo_size = 64,
- .rx_fifo_size = 64,
+ .rx_fifo_size = 256,
+ .ctlr_flags = SPI_CONTROLLER_MUST_TX,
+ .min_div_pow = 1,
+};
+
+static const struct sh_msiof_chipdata rcar_gen4_data = {
+ .bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16) |
+ SPI_BPW_MASK(24) | SPI_BPW_MASK(32),
+ .tx_fifo_size = 256,
+ .rx_fifo_size = 256,
.ctlr_flags = SPI_CONTROLLER_MUST_TX,
.min_div_pow = 1,
};
@@ -1079,7 +997,7 @@ static const struct sh_msiof_chipdata rcar_r8a7795_data = {
.bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16) |
SPI_BPW_MASK(24) | SPI_BPW_MASK(32),
.tx_fifo_size = 64,
- .rx_fifo_size = 64,
+ .rx_fifo_size = 256,
.ctlr_flags = SPI_CONTROLLER_MUST_TX,
.min_div_pow = 1,
.flags = SH_MSIOF_FLAG_FIXED_DTDL_200,
@@ -1087,20 +1005,14 @@ static const struct sh_msiof_chipdata rcar_r8a7795_data = {
static const struct of_device_id sh_msiof_match[] __maybe_unused = {
{ .compatible = "renesas,sh-mobile-msiof", .data = &sh_data },
- { .compatible = "renesas,msiof-r8a7743", .data = &rcar_gen2_data },
- { .compatible = "renesas,msiof-r8a7745", .data = &rcar_gen2_data },
- { .compatible = "renesas,msiof-r8a7790", .data = &rcar_gen2_data },
- { .compatible = "renesas,msiof-r8a7791", .data = &rcar_gen2_data },
- { .compatible = "renesas,msiof-r8a7792", .data = &rcar_gen2_data },
- { .compatible = "renesas,msiof-r8a7793", .data = &rcar_gen2_data },
- { .compatible = "renesas,msiof-r8a7794", .data = &rcar_gen2_data },
{ .compatible = "renesas,rcar-gen2-msiof", .data = &rcar_gen2_data },
{ .compatible = "renesas,msiof-r8a7795", .data = &rcar_r8a7795_data },
- { .compatible = "renesas,msiof-r8a7796", .data = &rcar_gen3_data },
{ .compatible = "renesas,rcar-gen3-msiof", .data = &rcar_gen3_data },
- { .compatible = "renesas,rcar-gen4-msiof", .data = &rcar_gen3_data },
+ { .compatible = "renesas,msiof-r8a779a0", .data = &rcar_gen3_data },
+ { .compatible = "renesas,msiof-r8a779f0", .data = &rcar_gen3_data },
+ { .compatible = "renesas,rcar-gen4-msiof", .data = &rcar_gen4_data },
{ .compatible = "renesas,sh-msiof", .data = &sh_data }, /* Deprecated */
- {},
+ { /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, sh_msiof_match);
@@ -1276,20 +1188,26 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
const struct sh_msiof_chipdata *chipdata;
struct sh_msiof_spi_info *info;
struct sh_msiof_spi_priv *p;
+ struct device *dev = &pdev->dev;
unsigned long clksrc;
int i;
int ret;
- chipdata = of_device_get_match_data(&pdev->dev);
+ /* Check whether MSIOF is used as I2S mode or SPI mode by checking "port" node */
+ struct device_node *port __free(device_node) = of_graph_get_next_port(dev->of_node, NULL);
+ if (port) /* It was MSIOF-I2S */
+ return -ENODEV;
+
+ chipdata = of_device_get_match_data(dev);
if (chipdata) {
- info = sh_msiof_spi_parse_dt(&pdev->dev);
+ info = sh_msiof_spi_parse_dt(dev);
} else {
chipdata = (const void *)pdev->id_entry->driver_data;
- info = dev_get_platdata(&pdev->dev);
+ info = dev_get_platdata(dev);
}
if (!info) {
- dev_err(&pdev->dev, "failed to obtain device info\n");
+ dev_err(dev, "failed to obtain device info\n");
return -ENXIO;
}
@@ -1297,11 +1215,9 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
info->dtdl = 200;
if (info->mode == MSIOF_SPI_TARGET)
- ctlr = spi_alloc_target(&pdev->dev,
- sizeof(struct sh_msiof_spi_priv));
+ ctlr = spi_alloc_target(dev, sizeof(struct sh_msiof_spi_priv));
else
- ctlr = spi_alloc_host(&pdev->dev,
- sizeof(struct sh_msiof_spi_priv));
+ ctlr = spi_alloc_host(dev, sizeof(struct sh_msiof_spi_priv));
if (ctlr == NULL)
return -ENOMEM;
@@ -1315,9 +1231,9 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
init_completion(&p->done);
init_completion(&p->done_txdma);
- p->clk = devm_clk_get(&pdev->dev, NULL);
+ p->clk = devm_clk_get(dev, NULL);
if (IS_ERR(p->clk)) {
- dev_err(&pdev->dev, "cannot get clock\n");
+ dev_err(dev, "cannot get clock\n");
ret = PTR_ERR(p->clk);
goto err1;
}
@@ -1334,15 +1250,14 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
goto err1;
}
- ret = devm_request_irq(&pdev->dev, i, sh_msiof_spi_irq, 0,
- dev_name(&pdev->dev), p);
+ ret = devm_request_irq(dev, i, sh_msiof_spi_irq, 0, dev_name(dev), p);
if (ret) {
- dev_err(&pdev->dev, "unable to request irq\n");
+ dev_err(dev, "unable to request irq\n");
goto err1;
}
p->pdev = pdev;
- pm_runtime_enable(&pdev->dev);
+ pm_runtime_enable(dev);
/* Platform data may override FIFO sizes */
p->tx_fifo_size = chipdata->tx_fifo_size;
@@ -1361,7 +1276,7 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
ctlr->flags = chipdata->ctlr_flags;
ctlr->bus_num = pdev->id;
ctlr->num_chipselect = p->info->num_chipselect;
- ctlr->dev.of_node = pdev->dev.of_node;
+ ctlr->dev.of_node = dev->of_node;
ctlr->setup = sh_msiof_spi_setup;
ctlr->prepare_message = sh_msiof_prepare_message;
ctlr->target_abort = sh_msiof_target_abort;
@@ -1373,11 +1288,11 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
ret = sh_msiof_request_dma(p);
if (ret < 0)
- dev_warn(&pdev->dev, "DMA not available, using PIO\n");
+ dev_warn(dev, "DMA not available, using PIO\n");
- ret = devm_spi_register_controller(&pdev->dev, ctlr);
+ ret = devm_spi_register_controller(dev, ctlr);
if (ret < 0) {
- dev_err(&pdev->dev, "devm_spi_register_controller error.\n");
+ dev_err(dev, "devm_spi_register_controller error.\n");
goto err2;
}
@@ -1385,7 +1300,7 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
err2:
sh_msiof_release_dma(p);
- pm_runtime_disable(&pdev->dev);
+ pm_runtime_disable(dev);
err1:
spi_controller_put(ctlr);
return ret;
diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c
index 9ec9823409cc..7c1fa55fbc47 100644
--- a/drivers/spi/spi-stm32-ospi.c
+++ b/drivers/spi/spi-stm32-ospi.c
@@ -804,7 +804,7 @@ static int stm32_ospi_get_resources(struct platform_device *pdev)
return ret;
}
- ospi->rstc = devm_reset_control_array_get_optional_exclusive(dev);
+ ospi->rstc = devm_reset_control_array_get_exclusive(dev);
if (IS_ERR(ospi->rstc))
return dev_err_probe(dev, PTR_ERR(ospi->rstc),
"Can't get reset\n");
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index f89826d7dc49..aa92fd5a35a9 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -264,6 +264,9 @@ static int sun4i_spi_transfer_one(struct spi_controller *host,
else
reg |= SUN4I_CTL_DHB;
+ /* Now that the settings are correct, enable the interface */
+ reg |= SUN4I_CTL_ENABLE;
+
sun4i_spi_write(sspi, SUN4I_CTL_REG, reg);
/* Ensure that we have a parent clock fast enough */
@@ -404,7 +407,7 @@ static int sun4i_spi_runtime_resume(struct device *dev)
}
sun4i_spi_write(sspi, SUN4I_CTL_REG,
- SUN4I_CTL_ENABLE | SUN4I_CTL_MASTER | SUN4I_CTL_TP);
+ SUN4I_CTL_MASTER | SUN4I_CTL_TP);
return 0;
diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
index 2a8bb798e95b..795a8482c2c7 100644
--- a/drivers/spi/spi-tegra114.c
+++ b/drivers/spi/spi-tegra114.c
@@ -728,9 +728,9 @@ static int tegra_spi_set_hw_cs_timing(struct spi_device *spi)
u32 inactive_cycles;
u8 cs_state;
- if ((setup->unit && setup->unit != SPI_DELAY_UNIT_SCK) ||
- (hold->unit && hold->unit != SPI_DELAY_UNIT_SCK) ||
- (inactive->unit && inactive->unit != SPI_DELAY_UNIT_SCK)) {
+ if ((setup->value && setup->unit != SPI_DELAY_UNIT_SCK) ||
+ (hold->value && hold->unit != SPI_DELAY_UNIT_SCK) ||
+ (inactive->value && inactive->unit != SPI_DELAY_UNIT_SCK)) {
dev_err(&spi->dev,
"Invalid delay unit %d, should be SPI_DELAY_UNIT_SCK\n",
SPI_DELAY_UNIT_SCK);
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index 64e1b2f8a000..3581757a269b 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -22,6 +22,7 @@
#include <linux/spi/spi.h>
#include <linux/acpi.h>
#include <linux/property.h>
+#include <linux/sizes.h>
#define QSPI_COMMAND1 0x000
#define QSPI_BIT_LENGTH(x) (((x) & 0x1f) << 0)
@@ -110,6 +111,9 @@
#define QSPI_DMA_BLK 0x024
#define QSPI_DMA_BLK_SET(x) (((x) & 0xffff) << 0)
+#define QSPI_DMA_MEM_ADDRESS 0x028
+#define QSPI_DMA_HI_ADDRESS 0x02c
+
#define QSPI_TX_FIFO 0x108
#define QSPI_RX_FIFO 0x188
@@ -134,7 +138,7 @@
#define QSPI_COMMAND_VALUE_SET(X) (((x) & 0xFF) << 0)
#define QSPI_CMB_SEQ_CMD_CFG 0x1a0
-#define QSPI_COMMAND_X1_X2_X4(x) (((x) & 0x3) << 13)
+#define QSPI_COMMAND_X1_X2_X4(x) ((((x) >> 1) & 0x3) << 13)
#define QSPI_COMMAND_X1_X2_X4_MASK (0x03 << 13)
#define QSPI_COMMAND_SDR_DDR BIT(12)
#define QSPI_COMMAND_SIZE_SET(x) (((x) & 0xFF) << 0)
@@ -147,7 +151,7 @@
#define QSPI_ADDRESS_VALUE_SET(X) (((x) & 0xFFFF) << 0)
#define QSPI_CMB_SEQ_ADDR_CFG 0x1ac
-#define QSPI_ADDRESS_X1_X2_X4(x) (((x) & 0x3) << 13)
+#define QSPI_ADDRESS_X1_X2_X4(x) ((((x) >> 1) & 0x3) << 13)
#define QSPI_ADDRESS_X1_X2_X4_MASK (0x03 << 13)
#define QSPI_ADDRESS_SDR_DDR BIT(12)
#define QSPI_ADDRESS_SIZE_SET(x) (((x) & 0xFF) << 0)
@@ -156,15 +160,19 @@
#define DATA_DIR_RX BIT(1)
#define QSPI_DMA_TIMEOUT (msecs_to_jiffies(1000))
-#define DEFAULT_QSPI_DMA_BUF_LEN (64 * 1024)
-#define CMD_TRANSFER 0
-#define ADDR_TRANSFER 1
-#define DATA_TRANSFER 2
+#define DEFAULT_QSPI_DMA_BUF_LEN SZ_64K
+
+enum tegra_qspi_transfer_type {
+ CMD_TRANSFER = 0,
+ ADDR_TRANSFER = 1,
+ DUMMY_TRANSFER = 2,
+ DATA_TRANSFER = 3
+};
struct tegra_qspi_soc_data {
- bool has_dma;
bool cmb_xfer_capable;
bool supports_tpm;
+ bool has_ext_dma;
unsigned int cs_count;
};
@@ -600,13 +608,16 @@ static void tegra_qspi_dma_unmap_xfer(struct tegra_qspi *tqspi, struct spi_trans
len = DIV_ROUND_UP(tqspi->curr_dma_words * tqspi->bytes_per_word, 4) * 4;
- dma_unmap_single(tqspi->dev, t->tx_dma, len, DMA_TO_DEVICE);
- dma_unmap_single(tqspi->dev, t->rx_dma, len, DMA_FROM_DEVICE);
+ if (t->tx_buf)
+ dma_unmap_single(tqspi->dev, t->tx_dma, len, DMA_TO_DEVICE);
+ if (t->rx_buf)
+ dma_unmap_single(tqspi->dev, t->rx_dma, len, DMA_FROM_DEVICE);
}
static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct spi_transfer *t)
{
struct dma_slave_config dma_sconfig = { 0 };
+ dma_addr_t rx_dma_phys, tx_dma_phys;
unsigned int len;
u8 dma_burst;
int ret = 0;
@@ -629,60 +640,86 @@ static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct
len = tqspi->curr_dma_words * 4;
/* set attention level based on length of transfer */
- val = 0;
- if (len & 0xf) {
- val |= QSPI_TX_TRIG_1 | QSPI_RX_TRIG_1;
- dma_burst = 1;
- } else if (((len) >> 4) & 0x1) {
- val |= QSPI_TX_TRIG_4 | QSPI_RX_TRIG_4;
- dma_burst = 4;
- } else {
- val |= QSPI_TX_TRIG_8 | QSPI_RX_TRIG_8;
- dma_burst = 8;
+ if (tqspi->soc_data->has_ext_dma) {
+ val = 0;
+ if (len & 0xf) {
+ val |= QSPI_TX_TRIG_1 | QSPI_RX_TRIG_1;
+ dma_burst = 1;
+ } else if (((len) >> 4) & 0x1) {
+ val |= QSPI_TX_TRIG_4 | QSPI_RX_TRIG_4;
+ dma_burst = 4;
+ } else {
+ val |= QSPI_TX_TRIG_8 | QSPI_RX_TRIG_8;
+ dma_burst = 8;
+ }
+
+ tegra_qspi_writel(tqspi, val, QSPI_DMA_CTL);
}
- tegra_qspi_writel(tqspi, val, QSPI_DMA_CTL);
tqspi->dma_control_reg = val;
dma_sconfig.device_fc = true;
+
if (tqspi->cur_direction & DATA_DIR_TX) {
- dma_sconfig.dst_addr = tqspi->phys + QSPI_TX_FIFO;
- dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
- dma_sconfig.dst_maxburst = dma_burst;
- ret = dmaengine_slave_config(tqspi->tx_dma_chan, &dma_sconfig);
- if (ret < 0) {
- dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
- return ret;
- }
+ if (tqspi->tx_dma_chan) {
+ dma_sconfig.dst_addr = tqspi->phys + QSPI_TX_FIFO;
+ dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ dma_sconfig.dst_maxburst = dma_burst;
+ ret = dmaengine_slave_config(tqspi->tx_dma_chan, &dma_sconfig);
+ if (ret < 0) {
+ dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
+ return ret;
+ }
- tegra_qspi_copy_client_txbuf_to_qspi_txbuf(tqspi, t);
- ret = tegra_qspi_start_tx_dma(tqspi, t, len);
- if (ret < 0) {
- dev_err(tqspi->dev, "failed to starting TX DMA: %d\n", ret);
- return ret;
+ tegra_qspi_copy_client_txbuf_to_qspi_txbuf(tqspi, t);
+ ret = tegra_qspi_start_tx_dma(tqspi, t, len);
+ if (ret < 0) {
+ dev_err(tqspi->dev, "failed to starting TX DMA: %d\n", ret);
+ return ret;
+ }
+ } else {
+ if (tqspi->is_packed)
+ tx_dma_phys = t->tx_dma;
+ else
+ tx_dma_phys = tqspi->tx_dma_phys;
+ tegra_qspi_copy_client_txbuf_to_qspi_txbuf(tqspi, t);
+ tegra_qspi_writel(tqspi, lower_32_bits(tx_dma_phys),
+ QSPI_DMA_MEM_ADDRESS);
+ tegra_qspi_writel(tqspi, (upper_32_bits(tx_dma_phys) & 0xff),
+ QSPI_DMA_HI_ADDRESS);
}
}
if (tqspi->cur_direction & DATA_DIR_RX) {
- dma_sconfig.src_addr = tqspi->phys + QSPI_RX_FIFO;
- dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
- dma_sconfig.src_maxburst = dma_burst;
- ret = dmaengine_slave_config(tqspi->rx_dma_chan, &dma_sconfig);
- if (ret < 0) {
- dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
- return ret;
- }
-
- dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
- tqspi->dma_buf_size,
- DMA_FROM_DEVICE);
+ if (tqspi->rx_dma_chan) {
+ dma_sconfig.src_addr = tqspi->phys + QSPI_RX_FIFO;
+ dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ dma_sconfig.src_maxburst = dma_burst;
+ ret = dmaengine_slave_config(tqspi->rx_dma_chan, &dma_sconfig);
+ if (ret < 0) {
+ dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
+ return ret;
+ }
- ret = tegra_qspi_start_rx_dma(tqspi, t, len);
- if (ret < 0) {
- dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret);
- if (tqspi->cur_direction & DATA_DIR_TX)
- dmaengine_terminate_all(tqspi->tx_dma_chan);
- return ret;
+ dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
+ tqspi->dma_buf_size, DMA_FROM_DEVICE);
+ ret = tegra_qspi_start_rx_dma(tqspi, t, len);
+ if (ret < 0) {
+ dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret);
+ if (tqspi->cur_direction & DATA_DIR_TX)
+ dmaengine_terminate_all(tqspi->tx_dma_chan);
+ return ret;
+ }
+ } else {
+ if (tqspi->is_packed)
+ rx_dma_phys = t->rx_dma;
+ else
+ rx_dma_phys = tqspi->rx_dma_phys;
+
+ tegra_qspi_writel(tqspi, lower_32_bits(rx_dma_phys),
+ QSPI_DMA_MEM_ADDRESS);
+ tegra_qspi_writel(tqspi, (upper_32_bits(rx_dma_phys) & 0xff),
+ QSPI_DMA_HI_ADDRESS);
}
}
@@ -721,9 +758,6 @@ static int tegra_qspi_start_cpu_based_transfer(struct tegra_qspi *qspi, struct s
static void tegra_qspi_deinit_dma(struct tegra_qspi *tqspi)
{
- if (!tqspi->soc_data->has_dma)
- return;
-
if (tqspi->tx_dma_buf) {
dma_free_coherent(tqspi->dev, tqspi->dma_buf_size,
tqspi->tx_dma_buf, tqspi->tx_dma_phys);
@@ -754,16 +788,29 @@ static int tegra_qspi_init_dma(struct tegra_qspi *tqspi)
u32 *dma_buf;
int err;
- if (!tqspi->soc_data->has_dma)
- return 0;
+ if (tqspi->soc_data->has_ext_dma) {
+ dma_chan = dma_request_chan(tqspi->dev, "rx");
+ if (IS_ERR(dma_chan)) {
+ err = PTR_ERR(dma_chan);
+ goto err_out;
+ }
- dma_chan = dma_request_chan(tqspi->dev, "rx");
- if (IS_ERR(dma_chan)) {
- err = PTR_ERR(dma_chan);
- goto err_out;
- }
+ tqspi->rx_dma_chan = dma_chan;
- tqspi->rx_dma_chan = dma_chan;
+ dma_chan = dma_request_chan(tqspi->dev, "tx");
+ if (IS_ERR(dma_chan)) {
+ err = PTR_ERR(dma_chan);
+ goto err_out;
+ }
+
+ tqspi->tx_dma_chan = dma_chan;
+ } else {
+ if (!device_iommu_mapped(tqspi->dev)) {
+ dev_warn(tqspi->dev,
+ "IOMMU not enabled in device-tree, falling back to PIO mode\n");
+ return 0;
+ }
+ }
dma_buf = dma_alloc_coherent(tqspi->dev, tqspi->dma_buf_size, &dma_phys, GFP_KERNEL);
if (!dma_buf) {
@@ -774,14 +821,6 @@ static int tegra_qspi_init_dma(struct tegra_qspi *tqspi)
tqspi->rx_dma_buf = dma_buf;
tqspi->rx_dma_phys = dma_phys;
- dma_chan = dma_request_chan(tqspi->dev, "tx");
- if (IS_ERR(dma_chan)) {
- err = PTR_ERR(dma_chan);
- goto err_out;
- }
-
- tqspi->tx_dma_chan = dma_chan;
-
dma_buf = dma_alloc_coherent(tqspi->dev, tqspi->dma_buf_size, &dma_phys, GFP_KERNEL);
if (!dma_buf) {
err = -ENOMEM;
@@ -1036,10 +1075,6 @@ static u32 tegra_qspi_addr_config(bool is_ddr, u8 bus_width, u8 len)
{
u32 addr_config = 0;
- /* Extract Address configuration and value */
- is_ddr = 0; //Only SDR mode supported
- bus_width = 0; //X1 mode
-
if (is_ddr)
addr_config |= QSPI_ADDRESS_SDR_DDR;
else
@@ -1079,16 +1114,23 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
switch (transfer_phase) {
case CMD_TRANSFER:
/* X1 SDR mode */
- cmd_config = tegra_qspi_cmd_config(false, 0,
+ cmd_config = tegra_qspi_cmd_config(false, xfer->tx_nbits,
xfer->len);
cmd_value = *((const u8 *)(xfer->tx_buf));
break;
case ADDR_TRANSFER:
/* X1 SDR mode */
- addr_config = tegra_qspi_addr_config(false, 0,
+ addr_config = tegra_qspi_addr_config(false, xfer->tx_nbits,
xfer->len);
address_value = *((const u32 *)(xfer->tx_buf));
break;
+ case DUMMY_TRANSFER:
+ if (xfer->dummy_data) {
+ tqspi->dummy_cycles = xfer->len * 8 / xfer->tx_nbits;
+ break;
+ }
+ transfer_phase++;
+ fallthrough;
case DATA_TRANSFER:
/* Program Command, Address value in register */
tegra_qspi_writel(tqspi, cmd_value, QSPI_CMB_SEQ_CMD);
@@ -1120,15 +1162,14 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
if (WARN_ON_ONCE(ret == 0)) {
dev_err_ratelimited(tqspi->dev,
"QSPI Transfer failed with timeout\n");
- if (tqspi->is_curr_dma_xfer &&
- (tqspi->cur_direction & DATA_DIR_TX))
- dmaengine_terminate_all
- (tqspi->tx_dma_chan);
-
- if (tqspi->is_curr_dma_xfer &&
- (tqspi->cur_direction & DATA_DIR_RX))
- dmaengine_terminate_all
- (tqspi->rx_dma_chan);
+ if (tqspi->is_curr_dma_xfer) {
+ if ((tqspi->cur_direction & DATA_DIR_TX) &&
+ tqspi->tx_dma_chan)
+ dmaengine_terminate_all(tqspi->tx_dma_chan);
+ if ((tqspi->cur_direction & DATA_DIR_RX) &&
+ tqspi->rx_dma_chan)
+ dmaengine_terminate_all(tqspi->rx_dma_chan);
+ }
/* Abort transfer by resetting pio/dma bit */
if (!tqspi->is_curr_dma_xfer) {
@@ -1163,26 +1204,22 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
ret = -EIO;
goto exit;
}
- if (!xfer->cs_change) {
- tegra_qspi_transfer_end(spi);
- spi_transfer_delay_exec(xfer);
- }
break;
default:
ret = -EINVAL;
goto exit;
}
msg->actual_length += xfer->len;
+ if (!xfer->cs_change && transfer_phase == DATA_TRANSFER) {
+ tegra_qspi_transfer_end(spi);
+ spi_transfer_delay_exec(xfer);
+ }
transfer_phase++;
}
ret = 0;
exit:
msg->status = ret;
- if (ret < 0) {
- tegra_qspi_transfer_end(spi);
- spi_transfer_delay_exec(xfer);
- }
return ret;
}
@@ -1247,10 +1284,12 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi,
QSPI_DMA_TIMEOUT);
if (WARN_ON(ret == 0)) {
dev_err(tqspi->dev, "transfer timeout\n");
- if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_TX))
- dmaengine_terminate_all(tqspi->tx_dma_chan);
- if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_RX))
- dmaengine_terminate_all(tqspi->rx_dma_chan);
+ if (tqspi->is_curr_dma_xfer) {
+ if ((tqspi->cur_direction & DATA_DIR_TX) && tqspi->tx_dma_chan)
+ dmaengine_terminate_all(tqspi->tx_dma_chan);
+ if ((tqspi->cur_direction & DATA_DIR_RX) && tqspi->rx_dma_chan)
+ dmaengine_terminate_all(tqspi->rx_dma_chan);
+ }
tegra_qspi_handle_error(tqspi);
ret = -EIO;
goto complete_xfer;
@@ -1300,7 +1339,9 @@ static bool tegra_qspi_validate_cmb_seq(struct tegra_qspi *tqspi,
list_for_each_entry(xfer, &msg->transfers, transfer_list) {
transfer_count++;
}
- if (!tqspi->soc_data->cmb_xfer_capable || transfer_count != 3)
+ if (!tqspi->soc_data->cmb_xfer_capable)
+ return false;
+ if (transfer_count > 4 || transfer_count < 3)
return false;
xfer = list_first_entry(&msg->transfers, typeof(*xfer),
transfer_list);
@@ -1310,7 +1351,14 @@ static bool tegra_qspi_validate_cmb_seq(struct tegra_qspi *tqspi,
if (xfer->len > 4 || xfer->len < 3)
return false;
xfer = list_next_entry(xfer, transfer_list);
- if (!tqspi->soc_data->has_dma && xfer->len > (QSPI_FIFO_DEPTH << 2))
+ if (transfer_count == 4) {
+ if (xfer->dummy_data != 1)
+ return false;
+ if ((xfer->len * 8 / xfer->tx_nbits) > QSPI_DUMMY_CYCLES_MAX)
+ return false;
+ xfer = list_next_entry(xfer, transfer_list);
+ }
+ if (!tqspi->soc_data->has_ext_dma && xfer->len > (QSPI_FIFO_DEPTH << 2))
return false;
return true;
@@ -1371,41 +1419,43 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi)
unsigned int total_fifo_words;
unsigned long flags;
long wait_status;
- int err = 0;
+ int num_errors = 0;
if (tqspi->cur_direction & DATA_DIR_TX) {
if (tqspi->tx_status) {
- dmaengine_terminate_all(tqspi->tx_dma_chan);
- err += 1;
- } else {
+ if (tqspi->tx_dma_chan)
+ dmaengine_terminate_all(tqspi->tx_dma_chan);
+ num_errors++;
+ } else if (tqspi->tx_dma_chan) {
wait_status = wait_for_completion_interruptible_timeout(
&tqspi->tx_dma_complete, QSPI_DMA_TIMEOUT);
if (wait_status <= 0) {
dmaengine_terminate_all(tqspi->tx_dma_chan);
dev_err(tqspi->dev, "failed TX DMA transfer\n");
- err += 1;
+ num_errors++;
}
}
}
if (tqspi->cur_direction & DATA_DIR_RX) {
if (tqspi->rx_status) {
- dmaengine_terminate_all(tqspi->rx_dma_chan);
- err += 2;
- } else {
+ if (tqspi->rx_dma_chan)
+ dmaengine_terminate_all(tqspi->rx_dma_chan);
+ num_errors++;
+ } else if (tqspi->rx_dma_chan) {
wait_status = wait_for_completion_interruptible_timeout(
&tqspi->rx_dma_complete, QSPI_DMA_TIMEOUT);
if (wait_status <= 0) {
dmaengine_terminate_all(tqspi->rx_dma_chan);
dev_err(tqspi->dev, "failed RX DMA transfer\n");
- err += 2;
+ num_errors++;
}
}
}
spin_lock_irqsave(&tqspi->lock, flags);
- if (err) {
+ if (num_errors) {
tegra_qspi_dma_unmap_xfer(tqspi, t);
tegra_qspi_handle_error(tqspi);
complete(&tqspi->xfer_completion);
@@ -1431,9 +1481,9 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi)
/* continue transfer in current message */
total_fifo_words = tegra_qspi_calculate_curr_xfer_param(tqspi, t);
if (total_fifo_words > QSPI_FIFO_DEPTH)
- err = tegra_qspi_start_dma_based_transfer(tqspi, t);
+ num_errors = tegra_qspi_start_dma_based_transfer(tqspi, t);
else
- err = tegra_qspi_start_cpu_based_transfer(tqspi, t);
+ num_errors = tegra_qspi_start_cpu_based_transfer(tqspi, t);
exit:
spin_unlock_irqrestore(&tqspi->lock, flags);
@@ -1461,28 +1511,28 @@ static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data)
}
static struct tegra_qspi_soc_data tegra210_qspi_soc_data = {
- .has_dma = true,
+ .has_ext_dma = true,
.cmb_xfer_capable = false,
.supports_tpm = false,
.cs_count = 1,
};
static struct tegra_qspi_soc_data tegra186_qspi_soc_data = {
- .has_dma = true,
+ .has_ext_dma = true,
.cmb_xfer_capable = true,
.supports_tpm = false,
.cs_count = 1,
};
static struct tegra_qspi_soc_data tegra234_qspi_soc_data = {
- .has_dma = false,
+ .has_ext_dma = false,
.cmb_xfer_capable = true,
.supports_tpm = true,
.cs_count = 1,
};
static struct tegra_qspi_soc_data tegra241_qspi_soc_data = {
- .has_dma = false,
+ .has_ext_dma = true,
.cmb_xfer_capable = true,
.supports_tpm = true,
.cs_count = 4,
diff --git a/drivers/spi/spi-xcomm.c b/drivers/spi/spi-xcomm.c
index 3bd0149d8f4e..1a40c4866ce1 100644
--- a/drivers/spi/spi-xcomm.c
+++ b/drivers/spi/spi-xcomm.c
@@ -44,8 +44,8 @@ struct spi_xcomm {
u8 buf[63];
};
-static void spi_xcomm_gpio_set_value(struct gpio_chip *chip,
- unsigned int offset, int val)
+static int spi_xcomm_gpio_set_value(struct gpio_chip *chip,
+ unsigned int offset, int val)
{
struct spi_xcomm *spi_xcomm = gpiochip_get_data(chip);
unsigned char buf[2];
@@ -53,7 +53,7 @@ static void spi_xcomm_gpio_set_value(struct gpio_chip *chip,
buf[0] = SPI_XCOMM_CMD_GPIO_SET;
buf[1] = !!val;
- i2c_master_send(spi_xcomm->i2c, buf, 2);
+ return i2c_master_send(spi_xcomm->i2c, buf, 2);
}
static int spi_xcomm_gpio_get_direction(struct gpio_chip *chip,
@@ -70,7 +70,7 @@ static int spi_xcomm_gpio_add(struct spi_xcomm *spi_xcomm)
return 0;
spi_xcomm->gc.get_direction = spi_xcomm_gpio_get_direction;
- spi_xcomm->gc.set = spi_xcomm_gpio_set_value;
+ spi_xcomm->gc.set_rv = spi_xcomm_gpio_set_value;
spi_xcomm->gc.can_sleep = 1;
spi_xcomm->gc.base = -1;
spi_xcomm->gc.ngpio = 1;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 90e27729ef6b..1bc0fdbb1bd7 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1076,10 +1076,8 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
* Avoid calling into the driver (or doing delays) if the chip select
* isn't actually changing from the last time this was called.
*/
- if (!force && ((enable && spi->controller->last_cs_index_mask == spi->cs_index_mask &&
- spi_is_last_cs(spi)) ||
- (!enable && spi->controller->last_cs_index_mask == spi->cs_index_mask &&
- !spi_is_last_cs(spi))) &&
+ if (!force && (enable == spi_is_last_cs(spi)) &&
+ (spi->controller->last_cs_index_mask == spi->cs_index_mask) &&
(spi->controller->last_cs_mode_high == (spi->mode & SPI_CS_HIGH)))
return;
@@ -1088,9 +1086,9 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
spi->controller->last_cs_index_mask = spi->cs_index_mask;
for (idx = 0; idx < SPI_CS_CNT_MAX; idx++)
spi->controller->last_cs[idx] = enable ? spi_get_chipselect(spi, 0) : SPI_INVALID_CS;
- spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH;
- if (spi->mode & SPI_CS_HIGH)
+ spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH;
+ if (spi->controller->last_cs_mode_high)
enable = !enable;
/*
@@ -3802,7 +3800,7 @@ int spi_split_transfers_maxwords(struct spi_controller *ctlr,
size_t maxsize;
int ret;
- maxsize = maxwords * roundup_pow_of_two(BITS_TO_BYTES(xfer->bits_per_word));
+ maxsize = maxwords * spi_bpw_to_bytes(xfer->bits_per_word);
if (xfer->len > maxsize) {
ret = __spi_split_transfer_maxsize(ctlr, msg, &xfer,
maxsize);
@@ -4096,6 +4094,13 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
if (__spi_validate_bits_per_word(ctlr, xfer->bits_per_word))
return -EINVAL;
+ /* DDR mode is supported only if controller has dtr_caps=true.
+ * default considered as SDR mode for SPI and QSPI controller.
+ * Note: This is applicable only to QSPI controller.
+ */
+ if (xfer->dtr_mode && !ctlr->dtr_caps)
+ return -EINVAL;
+
/*
* SPI transfer length should be multiple of SPI word size
* where SPI word size should be power-of-two multiple.
diff --git a/drivers/staging/gpib/common/iblib.c b/drivers/staging/gpib/common/iblib.c
index b297261818f2..432540e1bc9a 100644
--- a/drivers/staging/gpib/common/iblib.c
+++ b/drivers/staging/gpib/common/iblib.c
@@ -611,7 +611,7 @@ static void start_wait_timer(struct wait_info *winfo)
static void remove_wait_timer(struct wait_info *winfo)
{
timer_delete_sync(&winfo->timer);
- destroy_timer_on_stack(&winfo->timer);
+ timer_destroy_on_stack(&winfo->timer);
}
/*
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index d3f9686e26e7..a09c188b9ad1 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -257,7 +257,7 @@ config HISI_THERMAL
depends on ARCH_HISI || COMPILE_TEST
depends on HAS_IOMEM
depends on OF
- default y
+ default ARCH_HISI
help
Enable this to plug hisilicon's thermal sensor driver into the Linux
thermal framework. cpufreq is used as the cooling device to throttle
@@ -327,6 +327,15 @@ config QORIQ_THERMAL
cpufreq is used as the cooling device to throttle CPUs when the
passive trip is crossed.
+config AIROHA_THERMAL
+ tristate "Airoha thermal sensor driver"
+ depends on ARCH_AIROHA || COMPILE_TEST
+ depends on MFD_SYSCON
+ depends on OF
+ help
+ Enable this to plug the Airoha thermal sensor driver into the Linux
+ thermal framework.
+
config SPEAR_THERMAL
tristate "SPEAr thermal sensor driver"
depends on PLAT_SPEAR || COMPILE_TEST
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 9abf43a74f2b..d7718978db24 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_K3_THERMAL) += k3_bandgap.o k3_j72xx_bandgap.o
# platform thermal drivers
obj-y += broadcom/
obj-$(CONFIG_THERMAL_MMIO) += thermal_mmio.o
+obj-$(CONFIG_AIROHA_THERMAL) += airoha_thermal.o
obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o
obj-$(CONFIG_SUN8I_THERMAL) += sun8i_thermal.o
obj-$(CONFIG_ROCKCHIP_THERMAL) += rockchip_thermal.o
diff --git a/drivers/thermal/airoha_thermal.c b/drivers/thermal/airoha_thermal.c
new file mode 100644
index 000000000000..b9fd6bfc88e5
--- /dev/null
+++ b/drivers/thermal/airoha_thermal.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/module.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/thermal.h>
+
+/* SCU regs */
+#define EN7581_PLLRG_PROTECT 0x268
+#define EN7581_PWD_TADC 0x2ec
+#define EN7581_MUX_TADC GENMASK(3, 1)
+#define EN7581_DOUT_TADC 0x2f8
+#define EN7581_DOUT_TADC_MASK GENMASK(15, 0)
+
+/* PTP_THERMAL regs */
+#define EN7581_TEMPMONCTL0 0x800
+#define EN7581_SENSE3_EN BIT(3)
+#define EN7581_SENSE2_EN BIT(2)
+#define EN7581_SENSE1_EN BIT(1)
+#define EN7581_SENSE0_EN BIT(0)
+#define EN7581_TEMPMONCTL1 0x804
+/* period unit calculated in BUS clock * 256 scaling-up */
+#define EN7581_PERIOD_UNIT GENMASK(9, 0)
+#define EN7581_TEMPMONCTL2 0x808
+#define EN7581_FILT_INTERVAL GENMASK(25, 16)
+#define EN7581_SEN_INTERVAL GENMASK(9, 0)
+#define EN7581_TEMPMONINT 0x80C
+#define EN7581_STAGE3_INT_EN BIT(31)
+#define EN7581_STAGE2_INT_EN BIT(30)
+#define EN7581_STAGE1_INT_EN BIT(29)
+#define EN7581_FILTER_INT_EN_3 BIT(28)
+#define EN7581_IMMD_INT_EN3 BIT(27)
+#define EN7581_NOHOTINTEN3 BIT(26)
+#define EN7581_HOFSINTEN3 BIT(25)
+#define EN7581_LOFSINTEN3 BIT(24)
+#define EN7581_HINTEN3 BIT(23)
+#define EN7581_CINTEN3 BIT(22)
+#define EN7581_FILTER_INT_EN_2 BIT(21)
+#define EN7581_FILTER_INT_EN_1 BIT(20)
+#define EN7581_FILTER_INT_EN_0 BIT(19)
+#define EN7581_IMMD_INT_EN2 BIT(18)
+#define EN7581_IMMD_INT_EN1 BIT(17)
+#define EN7581_IMMD_INT_EN0 BIT(16)
+#define EN7581_TIME_OUT_INT_EN BIT(15)
+#define EN7581_NOHOTINTEN2 BIT(14)
+#define EN7581_HOFSINTEN2 BIT(13)
+#define EN7581_LOFSINTEN2 BIT(12)
+#define EN7581_HINTEN2 BIT(11)
+#define EN7581_CINTEN2 BIT(10)
+#define EN7581_NOHOTINTEN1 BIT(9)
+#define EN7581_HOFSINTEN1 BIT(8)
+#define EN7581_LOFSINTEN1 BIT(7)
+#define EN7581_HINTEN1 BIT(6)
+#define EN7581_CINTEN1 BIT(5)
+#define EN7581_NOHOTINTEN0 BIT(4)
+/* Similar to COLD and HOT also these seems to be swapped in documentation */
+#define EN7581_LOFSINTEN0 BIT(3) /* In documentation: BIT(2) */
+#define EN7581_HOFSINTEN0 BIT(2) /* In documentation: BIT(3) */
+/* It seems documentation have these swapped as the HW
+ * - Fire BIT(1) when lower than EN7581_COLD_THRE
+ * - Fire BIT(0) and BIT(5) when higher than EN7581_HOT2NORMAL_THRE or
+ * EN7581_HOT_THRE
+ */
+#define EN7581_CINTEN0 BIT(1) /* In documentation: BIT(0) */
+#define EN7581_HINTEN0 BIT(0) /* In documentation: BIT(1) */
+#define EN7581_TEMPMONINTSTS 0x810
+#define EN7581_STAGE3_INT_STAT BIT(31)
+#define EN7581_STAGE2_INT_STAT BIT(30)
+#define EN7581_STAGE1_INT_STAT BIT(29)
+#define EN7581_FILTER_INT_STAT_3 BIT(28)
+#define EN7581_IMMD_INT_STS3 BIT(27)
+#define EN7581_NOHOTINTSTS3 BIT(26)
+#define EN7581_HOFSINTSTS3 BIT(25)
+#define EN7581_LOFSINTSTS3 BIT(24)
+#define EN7581_HINTSTS3 BIT(23)
+#define EN7581_CINTSTS3 BIT(22)
+#define EN7581_FILTER_INT_STAT_2 BIT(21)
+#define EN7581_FILTER_INT_STAT_1 BIT(20)
+#define EN7581_FILTER_INT_STAT_0 BIT(19)
+#define EN7581_IMMD_INT_STS2 BIT(18)
+#define EN7581_IMMD_INT_STS1 BIT(17)
+#define EN7581_IMMD_INT_STS0 BIT(16)
+#define EN7581_TIME_OUT_INT_STAT BIT(15)
+#define EN7581_NOHOTINTSTS2 BIT(14)
+#define EN7581_HOFSINTSTS2 BIT(13)
+#define EN7581_LOFSINTSTS2 BIT(12)
+#define EN7581_HINTSTS2 BIT(11)
+#define EN7581_CINTSTS2 BIT(10)
+#define EN7581_NOHOTINTSTS1 BIT(9)
+#define EN7581_HOFSINTSTS1 BIT(8)
+#define EN7581_LOFSINTSTS1 BIT(7)
+#define EN7581_HINTSTS1 BIT(6)
+#define EN7581_CINTSTS1 BIT(5)
+#define EN7581_NOHOTINTSTS0 BIT(4)
+/* Similar to COLD and HOT also these seems to be swapped in documentation */
+#define EN7581_LOFSINTSTS0 BIT(3) /* In documentation: BIT(2) */
+#define EN7581_HOFSINTSTS0 BIT(2) /* In documentation: BIT(3) */
+/* It seems documentation have these swapped as the HW
+ * - Fire BIT(1) when lower than EN7581_COLD_THRE
+ * - Fire BIT(0) and BIT(5) when higher than EN7581_HOT2NORMAL_THRE or
+ * EN7581_HOT_THRE
+ *
+ * To clear things, we swap the define but we keep them documented here.
+ */
+#define EN7581_CINTSTS0 BIT(1) /* In documentation: BIT(0) */
+#define EN7581_HINTSTS0 BIT(0) /* In documentation: BIT(1)*/
+/* Monitor will take the bigger threshold between HOT2NORMAL and HOT
+ * and will fire both HOT2NORMAL and HOT interrupt when higher than the 2
+ *
+ * It has also been observed that not setting HOT2NORMAL makes the monitor
+ * treat COLD threshold as HOT2NORMAL.
+ */
+#define EN7581_TEMPH2NTHRE 0x824
+/* It seems HOT2NORMAL is actually NORMAL2HOT */
+#define EN7581_HOT2NORMAL_THRE GENMASK(11, 0)
+#define EN7581_TEMPHTHRE 0x828
+#define EN7581_HOT_THRE GENMASK(11, 0)
+/* Monitor will use this as HOT2NORMAL (fire interrupt when lower than...)*/
+#define EN7581_TEMPCTHRE 0x82c
+#define EN7581_COLD_THRE GENMASK(11, 0)
+/* Also LOW and HIGH offset register are swapped */
+#define EN7581_TEMPOFFSETL 0x830 /* In documentation: 0x834 */
+#define EN7581_LOW_OFFSET GENMASK(11, 0)
+#define EN7581_TEMPOFFSETH 0x834 /* In documentation: 0x830 */
+#define EN7581_HIGH_OFFSET GENMASK(11, 0)
+#define EN7581_TEMPMSRCTL0 0x838
+#define EN7581_MSRCTL3 GENMASK(11, 9)
+#define EN7581_MSRCTL2 GENMASK(8, 6)
+#define EN7581_MSRCTL1 GENMASK(5, 3)
+#define EN7581_MSRCTL0 GENMASK(2, 0)
+#define EN7581_TEMPADCVALIDADDR 0x878
+#define EN7581_ADC_VALID_ADDR GENMASK(31, 0)
+#define EN7581_TEMPADCVOLTADDR 0x87c
+#define EN7581_ADC_VOLT_ADDR GENMASK(31, 0)
+#define EN7581_TEMPRDCTRL 0x880
+/*
+ * NOTICE: AHB have this set to 0 by default. Means that
+ * the same addr is used for ADC volt and valid reading.
+ * In such case, VALID ADDR is used and volt addr is ignored.
+ */
+#define EN7581_RD_CTRL_DIFF BIT(0)
+#define EN7581_TEMPADCVALIDMASK 0x884
+#define EN7581_ADV_RD_VALID_POLARITY BIT(5)
+#define EN7581_ADV_RD_VALID_POS GENMASK(4, 0)
+#define EN7581_TEMPADCVOLTAGESHIFT 0x888
+#define EN7581_ADC_VOLTAGE_SHIFT GENMASK(4, 0)
+/*
+ * Same values for each CTL.
+ * Can operate in:
+ * - 1 sample
+ * - 2 sample and make average of them
+ * - 4,6,10,16 sample, drop max and min and make average of them
+ */
+#define EN7581_MSRCTL_1SAMPLE 0x0
+#define EN7581_MSRCTL_AVG2SAMPLE 0x1
+#define EN7581_MSRCTL_4SAMPLE_MAX_MIX_AVG2 0x2
+#define EN7581_MSRCTL_6SAMPLE_MAX_MIX_AVG4 0x3
+#define EN7581_MSRCTL_10SAMPLE_MAX_MIX_AVG8 0x4
+#define EN7581_MSRCTL_18SAMPLE_MAX_MIX_AVG16 0x5
+#define EN7581_TEMPAHBPOLL 0x840
+#define EN7581_ADC_POLL_INTVL GENMASK(31, 0)
+/* PTPSPARE0,2 reg are used to store efuse info for calibrated temp offset */
+#define EN7581_EFUSE_TEMP_OFFSET_REG 0xf20 /* PTPSPARE0 */
+#define EN7581_EFUSE_TEMP_OFFSET GENMASK(31, 16)
+#define EN7581_PTPSPARE1 0xf24 /* PTPSPARE1 */
+#define EN7581_EFUSE_TEMP_CPU_SENSOR_REG 0xf28 /* PTPSPARE2 */
+
+#define EN7581_SLOPE_X100_DIO_DEFAULT 5645
+#define EN7581_SLOPE_X100_DIO_AVS 5645
+
+#define EN7581_INIT_TEMP_CPK_X10 300
+#define EN7581_INIT_TEMP_FTK_X10 620
+#define EN7581_INIT_TEMP_NONK_X10 550
+
+#define EN7581_SCU_THERMAL_PROTECT_KEY 0x12
+#define EN7581_SCU_THERMAL_MUX_DIODE1 0x7
+
+/* Convert temp to raw value as read from ADC ((((temp / 100) - init) * slope) / 1000) + offset */
+#define TEMP_TO_RAW(priv, temp) ((((((temp) / 100) - (priv)->init_temp) * \
+ (priv)->default_slope) / 1000) + \
+ (priv)->default_offset)
+
+/* Convert raw to temp ((((temp - offset) * 1000) / slope + init) * 100) */
+#define RAW_TO_TEMP(priv, raw) (((((raw) - (priv)->default_offset) * 1000) / \
+ (priv)->default_slope + \
+ (priv)->init_temp) * 100)
+
+#define AIROHA_MAX_SAMPLES 6
+
+struct airoha_thermal_priv {
+ void __iomem *base;
+ struct regmap *chip_scu;
+ struct resource scu_adc_res;
+
+ struct thermal_zone_device *tz;
+ int init_temp;
+ int default_slope;
+ int default_offset;
+};
+
+static int airoha_get_thermal_ADC(struct airoha_thermal_priv *priv)
+{
+ u32 val;
+
+ regmap_read(priv->chip_scu, EN7581_DOUT_TADC, &val);
+ return FIELD_GET(EN7581_DOUT_TADC_MASK, val);
+}
+
+static void airoha_init_thermal_ADC_mode(struct airoha_thermal_priv *priv)
+{
+ u32 adc_mux, pllrg;
+
+ /* Save PLLRG current value */
+ regmap_read(priv->chip_scu, EN7581_PLLRG_PROTECT, &pllrg);
+
+ /* Give access to thermal regs */
+ regmap_write(priv->chip_scu, EN7581_PLLRG_PROTECT, EN7581_SCU_THERMAL_PROTECT_KEY);
+ adc_mux = FIELD_PREP(EN7581_MUX_TADC, EN7581_SCU_THERMAL_MUX_DIODE1);
+ regmap_write(priv->chip_scu, EN7581_PWD_TADC, adc_mux);
+
+ /* Restore PLLRG value on exit */
+ regmap_write(priv->chip_scu, EN7581_PLLRG_PROTECT, pllrg);
+}
+
+static int airoha_thermal_get_temp(struct thermal_zone_device *tz, int *temp)
+{
+ struct airoha_thermal_priv *priv = thermal_zone_device_priv(tz);
+ int min_value, max_value, avg_value, value;
+ int i;
+
+ avg_value = 0;
+ min_value = INT_MAX;
+ max_value = INT_MIN;
+
+ for (i = 0; i < AIROHA_MAX_SAMPLES; i++) {
+ value = airoha_get_thermal_ADC(priv);
+ min_value = min(value, min_value);
+ max_value = max(value, max_value);
+ avg_value += value;
+ }
+
+ /* Drop min and max and average for the remaining sample */
+ avg_value -= (min_value + max_value);
+ avg_value /= AIROHA_MAX_SAMPLES - 2;
+
+ *temp = RAW_TO_TEMP(priv, avg_value);
+ return 0;
+}
+
+static int airoha_thermal_set_trips(struct thermal_zone_device *tz, int low,
+ int high)
+{
+ struct airoha_thermal_priv *priv = thermal_zone_device_priv(tz);
+ bool enable_monitor = false;
+
+ if (high != INT_MAX) {
+ /* Validate high and clamp it a supported value */
+ high = clamp_t(int, high, RAW_TO_TEMP(priv, 0),
+ RAW_TO_TEMP(priv, FIELD_MAX(EN7581_DOUT_TADC_MASK)));
+
+ /* We offset the high temp of 1°C to trigger correct event */
+ writel(TEMP_TO_RAW(priv, high) >> 4,
+ priv->base + EN7581_TEMPOFFSETH);
+
+ enable_monitor = true;
+ }
+
+ if (low != -INT_MAX) {
+ /* Validate low and clamp it to a supported value */
+ low = clamp_t(int, high, RAW_TO_TEMP(priv, 0),
+ RAW_TO_TEMP(priv, FIELD_MAX(EN7581_DOUT_TADC_MASK)));
+
+ /* We offset the low temp of 1°C to trigger correct event */
+ writel(TEMP_TO_RAW(priv, low) >> 4,
+ priv->base + EN7581_TEMPOFFSETL);
+
+ enable_monitor = true;
+ }
+
+ /* Enable sensor 0 monitor after trip are set */
+ if (enable_monitor)
+ writel(EN7581_SENSE0_EN, priv->base + EN7581_TEMPMONCTL0);
+
+ return 0;
+}
+
+static const struct thermal_zone_device_ops thdev_ops = {
+ .get_temp = airoha_thermal_get_temp,
+ .set_trips = airoha_thermal_set_trips,
+};
+
+static irqreturn_t airoha_thermal_irq(int irq, void *data)
+{
+ struct airoha_thermal_priv *priv = data;
+ enum thermal_notify_event event;
+ bool update = false;
+ u32 status;
+
+ status = readl(priv->base + EN7581_TEMPMONINTSTS);
+ switch (status & (EN7581_HOFSINTSTS0 | EN7581_LOFSINTSTS0)) {
+ case EN7581_HOFSINTSTS0:
+ event = THERMAL_TRIP_VIOLATED;
+ update = true;
+ break;
+ case EN7581_LOFSINTSTS0:
+ event = THERMAL_EVENT_UNSPECIFIED;
+ update = true;
+ break;
+ default:
+ /* Should be impossible as we enable only these Interrupt */
+ break;
+ }
+
+ /* Reset Interrupt */
+ writel(status, priv->base + EN7581_TEMPMONINTSTS);
+
+ if (update)
+ thermal_zone_device_update(priv->tz, event);
+
+ return IRQ_HANDLED;
+}
+
+static void airoha_thermal_setup_adc_val(struct device *dev,
+ struct airoha_thermal_priv *priv)
+{
+ u32 efuse_calib_info, cpu_sensor;
+
+ /* Setup thermal sensor to ADC mode and setup the mux to DIODE1 */
+ airoha_init_thermal_ADC_mode(priv);
+ /* sleep 10 ms for ADC to enable */
+ usleep_range(10 * USEC_PER_MSEC, 11 * USEC_PER_MSEC);
+
+ efuse_calib_info = readl(priv->base + EN7581_EFUSE_TEMP_OFFSET_REG);
+ if (efuse_calib_info) {
+ priv->default_offset = FIELD_GET(EN7581_EFUSE_TEMP_OFFSET, efuse_calib_info);
+ /* Different slope are applied if the sensor is used for CPU or for package */
+ cpu_sensor = readl(priv->base + EN7581_EFUSE_TEMP_CPU_SENSOR_REG);
+ if (cpu_sensor) {
+ priv->default_slope = EN7581_SLOPE_X100_DIO_DEFAULT;
+ priv->init_temp = EN7581_INIT_TEMP_FTK_X10;
+ } else {
+ priv->default_slope = EN7581_SLOPE_X100_DIO_AVS;
+ priv->init_temp = EN7581_INIT_TEMP_CPK_X10;
+ }
+ } else {
+ priv->default_offset = airoha_get_thermal_ADC(priv);
+ priv->default_slope = EN7581_SLOPE_X100_DIO_DEFAULT;
+ priv->init_temp = EN7581_INIT_TEMP_NONK_X10;
+ dev_info(dev, "missing thermal calibration EFUSE, using non calibrated value\n");
+ }
+}
+
+static void airoha_thermal_setup_monitor(struct airoha_thermal_priv *priv)
+{
+ /* Set measure mode */
+ writel(FIELD_PREP(EN7581_MSRCTL0, EN7581_MSRCTL_6SAMPLE_MAX_MIX_AVG4),
+ priv->base + EN7581_TEMPMSRCTL0);
+
+ /*
+ * Configure ADC valid reading addr
+ * The AHB temp monitor system doesn't have direct access to the
+ * thermal sensor. It does instead work by providing various
+ * addresses to configure how to access and setup an ADC for the
+ * sensor. EN7581 supports only one sensor hence the
+ * implementation is greatly simplified but the AHB supports
+ * up to 4 different sensors from the same ADC that can be
+ * switched by tuning the ADC mux or writing address.
+ *
+ * We set valid instead of volt as we don't enable valid/volt
+ * split reading and AHB read valid addr in such case.
+ */
+ writel(priv->scu_adc_res.start + EN7581_DOUT_TADC,
+ priv->base + EN7581_TEMPADCVALIDADDR);
+
+ /*
+ * Configure valid bit on a fake value of bit 16. The ADC outputs
+ * max of 2 bytes for voltage.
+ */
+ writel(FIELD_PREP(EN7581_ADV_RD_VALID_POS, 16),
+ priv->base + EN7581_TEMPADCVALIDMASK);
+
+ /*
+ * AHB supports max 12 bytes for ADC voltage. Shift the read
+ * value 4 bit to the right. Precision lost by this is minimal
+ * in the order of half a °C and is acceptable in the context
+ * of triggering interrupt in critical condition.
+ */
+ writel(FIELD_PREP(EN7581_ADC_VOLTAGE_SHIFT, 4),
+ priv->base + EN7581_TEMPADCVOLTAGESHIFT);
+
+ /* BUS clock is 300MHz counting unit is 3 * 68.64 * 256 = 52.715us */
+ writel(FIELD_PREP(EN7581_PERIOD_UNIT, 3),
+ priv->base + EN7581_TEMPMONCTL1);
+
+ /*
+ * filt interval is 1 * 52.715us = 52.715us,
+ * sen interval is 379 * 52.715us = 19.97ms
+ */
+ writel(FIELD_PREP(EN7581_FILT_INTERVAL, 1) |
+ FIELD_PREP(EN7581_FILT_INTERVAL, 379),
+ priv->base + EN7581_TEMPMONCTL2);
+
+ /* AHB poll is set to 146 * 68.64 = 10.02us */
+ writel(FIELD_PREP(EN7581_ADC_POLL_INTVL, 146),
+ priv->base + EN7581_TEMPAHBPOLL);
+}
+
+static int airoha_thermal_probe(struct platform_device *pdev)
+{
+ struct airoha_thermal_priv *priv;
+ struct device_node *chip_scu_np;
+ struct device *dev = &pdev->dev;
+ int irq, ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
+ chip_scu_np = of_parse_phandle(dev->of_node, "airoha,chip-scu", 0);
+ if (!chip_scu_np)
+ return -EINVAL;
+
+ priv->chip_scu = syscon_node_to_regmap(chip_scu_np);
+ if (IS_ERR(priv->chip_scu))
+ return PTR_ERR(priv->chip_scu);
+
+ of_address_to_resource(chip_scu_np, 0, &priv->scu_adc_res);
+ of_node_put(chip_scu_np);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+ airoha_thermal_irq, IRQF_ONESHOT,
+ pdev->name, priv);
+ if (ret) {
+ dev_err(dev, "Can't get interrupt working.\n");
+ return ret;
+ }
+
+ airoha_thermal_setup_monitor(priv);
+ airoha_thermal_setup_adc_val(dev, priv);
+
+ /* register of thermal sensor and get info from DT */
+ priv->tz = devm_thermal_of_zone_register(dev, 0, priv, &thdev_ops);
+ if (IS_ERR(priv->tz)) {
+ dev_err(dev, "register thermal zone sensor failed\n");
+ return PTR_ERR(priv->tz);
+ }
+
+ platform_set_drvdata(pdev, priv);
+
+ /* Enable LOW and HIGH interrupt */
+ writel(EN7581_HOFSINTEN0 | EN7581_LOFSINTEN0,
+ priv->base + EN7581_TEMPMONINT);
+
+ return 0;
+}
+
+static const struct of_device_id airoha_thermal_match[] = {
+ { .compatible = "airoha,en7581-thermal" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, airoha_thermal_match);
+
+static struct platform_driver airoha_thermal_driver = {
+ .driver = {
+ .name = "airoha-thermal",
+ .of_match_table = airoha_thermal_match,
+ },
+ .probe = airoha_thermal_probe,
+};
+
+module_platform_driver(airoha_thermal_driver);
+
+MODULE_AUTHOR("Christian Marangi <ansuelsmth@gmail.com>");
+MODULE_DESCRIPTION("Airoha thermal driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/amlogic_thermal.c b/drivers/thermal/amlogic_thermal.c
index 3c5f7dbddf2c..5448d772db12 100644
--- a/drivers/thermal/amlogic_thermal.c
+++ b/drivers/thermal/amlogic_thermal.c
@@ -7,10 +7,10 @@
*
* Register value to celsius temperature formulas:
* Read_Val m * U
- * U = ---------, Uptat = ---------
+ * U = ---------, uptat = ---------
* 2^16 1 + n * U
*
- * Temperature = A * ( Uptat + u_efuse / 2^16 )- B
+ * Temperature = A * ( uptat + u_efuse / 2^16 )- B
*
* A B m n : calibration parameters
* u_efuse : fused calibration value, it's a signed 16 bits value
@@ -112,7 +112,7 @@ static int amlogic_thermal_code_to_millicelsius(struct amlogic_thermal *pdata,
const struct amlogic_thermal_soc_calib_data *param =
pdata->data->calibration_parameters;
int temp;
- s64 factor, Uptat, uefuse;
+ s64 factor, uptat, uefuse;
uefuse = pdata->trim_info & TSENSOR_TRIM_SIGN_MASK ?
~(pdata->trim_info & TSENSOR_TRIM_TEMP_MASK) + 1 :
@@ -121,12 +121,12 @@ static int amlogic_thermal_code_to_millicelsius(struct amlogic_thermal *pdata,
factor = param->n * temp_code;
factor = div_s64(factor, 100);
- Uptat = temp_code * param->m;
- Uptat = div_s64(Uptat, 100);
- Uptat = Uptat * BIT(16);
- Uptat = div_s64(Uptat, BIT(16) + factor);
+ uptat = temp_code * param->m;
+ uptat = div_s64(uptat, 100);
+ uptat = uptat * BIT(16);
+ uptat = div_s64(uptat, BIT(16) + factor);
- temp = (Uptat + uefuse) * param->A;
+ temp = (uptat + uefuse) * param->A;
temp = div_s64(temp, BIT(16));
temp = (temp - param->B) * 100;
diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c
index 7fbba2233c4c..685a5aee5e0d 100644
--- a/drivers/thermal/broadcom/bcm2835_thermal.c
+++ b/drivers/thermal/broadcom/bcm2835_thermal.c
@@ -192,7 +192,7 @@ static int bcm2835_thermal_probe(struct platform_device *pdev)
rate = clk_get_rate(data->clk);
if ((rate < 1920000) || (rate > 5000000))
dev_warn(dev,
- "Clock %pCn running at %lu Hz is outside of the recommended range: 1.92 to 5MHz\n",
+ "Clock %pC running at %lu Hz is outside of the recommended range: 1.92 to 5MHz\n",
data->clk, rate);
/* register of thermal sensor and get info from DT */
diff --git a/drivers/thermal/intel/int340x_thermal/Makefile b/drivers/thermal/intel/int340x_thermal/Makefile
index fe3f43924525..184318d1792b 100644
--- a/drivers/thermal/intel/int340x_thermal/Makefile
+++ b/drivers/thermal/intel/int340x_thermal/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_device_pci_legacy.o
obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_device_pci.o
obj-$(CONFIG_PROC_THERMAL_MMIO_RAPL) += processor_thermal_rapl.o
obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_rfim.o
+obj-$(CONFIG_INT340X_THERMAL) += platform_temperature_control.o
obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_mbox.o
obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_wt_req.o
obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_wt_hint.o
diff --git a/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c b/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
new file mode 100644
index 000000000000..2d6504514893
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * processor thermal device platform temperature controls
+ * Copyright (c) 2025, Intel Corporation.
+ */
+
+/*
+ * Platform temperature controls hardware interface
+ *
+ * The hardware control interface is via MMIO offsets in the processor
+ * thermal device MMIO space. There are three instances of MMIO registers.
+ * All registers are 64 bit wide with RW access.
+ *
+ * Name: PLATFORM_TEMPERATURE_CONTROL
+ * Offsets: 0x5B20, 0x5B28, 0x5B30
+ *
+ * Bits Description
+ * 7:0 TARGET_TEMP : Target temperature limit to which the control
+ * mechanism is regulating. Units: 0.5C.
+ * 8:8 ENABLE: Read current enable status of the feature or enable
+ * feature.
+ * 11:9 GAIN: Sets the aggressiveness of control loop from 0 to 7
+ * 7 graceful, favors performance at the expense of temperature
+ * overshoots
+ * 0 aggressive, favors tight regulation over performance
+ * 12:12 TEMPERATURE_OVERRIDE_EN
+ * When set, hardware will use TEMPERATURE_OVERRIDE values instead
+ * of reading from corresponding sensor.
+ * 15:13 RESERVED
+ * 23:16 MIN_PERFORMANCE_LEVEL: Minimum Performance level below which the
+ * there will be no throttling. 0 - all levels of throttling allowed
+ * including survivability actions. 255 - no throttling allowed.
+ * 31:24 TEMPERATURE_OVERRIDE: Allows SW to override the input temperature.
+ * hardware will use this value instead of the sensor temperature.
+ * Units: 0.5C.
+ * 63:32 RESERVED
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "processor_thermal_device.h"
+
+struct mmio_reg {
+ int bits;
+ u16 mask;
+ u16 shift;
+ u16 units;
+};
+
+#define MAX_ATTR_GROUP_NAME_LEN 32
+#define PTC_MAX_ATTRS 3
+
+struct ptc_data {
+ u32 offset;
+ struct attribute_group ptc_attr_group;
+ struct attribute *ptc_attrs[PTC_MAX_ATTRS];
+ struct device_attribute temperature_target_attr;
+ struct device_attribute enable_attr;
+ char group_name[MAX_ATTR_GROUP_NAME_LEN];
+};
+
+static const struct mmio_reg ptc_mmio_regs[] = {
+ { 8, 0xFF, 0, 500}, /* temperature_target, units 0.5C*/
+ { 1, 0x01, 8, 0}, /* enable */
+ { 3, 0x7, 9, 0}, /* gain */
+ { 8, 0xFF, 16, 0}, /* min_performance_level */
+ { 1, 0x1, 12, 0}, /* temperature_override_enable */
+ { 8, 0xFF, 24, 500}, /* temperature_override, units 0.5C */
+};
+
+#define PTC_MAX_INSTANCES 3
+
+/* Unique offset for each PTC instance */
+static u32 ptc_offsets[PTC_MAX_INSTANCES] = {0x5B20, 0x5B28, 0x5B30};
+
+/* These will represent sysfs attribute names */
+static const char * const ptc_strings[] = {
+ "temperature_target",
+ "enable",
+ NULL
+};
+
+/* Lock to protect concurrent read/write and read-modify-write */
+static DEFINE_MUTEX(ptc_lock);
+
+static ssize_t ptc_mmio_show(struct ptc_data *data, struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct proc_thermal_device *proc_priv;
+ const struct mmio_reg *mmio_regs;
+ int ret, units;
+ u64 reg_val;
+
+ proc_priv = pci_get_drvdata(pdev);
+ mmio_regs = ptc_mmio_regs;
+ ret = match_string(ptc_strings, -1, attr->attr.name);
+ if (ret < 0)
+ return ret;
+
+ units = mmio_regs[ret].units;
+
+ guard(mutex)(&ptc_lock);
+
+ reg_val = readq((void __iomem *) (proc_priv->mmio_base + data->offset));
+ ret = (reg_val >> mmio_regs[ret].shift) & mmio_regs[ret].mask;
+ if (units)
+ ret *= units;
+
+ return sysfs_emit(buf, "%d\n", ret);
+}
+
+#define PTC_SHOW(suffix)\
+static ssize_t suffix##_show(struct device *dev,\
+ struct device_attribute *attr,\
+ char *buf)\
+{\
+ struct ptc_data *data = container_of(attr, struct ptc_data, suffix##_attr);\
+ return ptc_mmio_show(data, dev, attr, buf);\
+}
+
+static void ptc_mmio_write(struct pci_dev *pdev, u32 offset, int index, u32 value)
+{
+ struct proc_thermal_device *proc_priv;
+ u64 mask, reg_val;
+
+ proc_priv = pci_get_drvdata(pdev);
+
+ mask = GENMASK_ULL(ptc_mmio_regs[index].shift + ptc_mmio_regs[index].bits - 1,
+ ptc_mmio_regs[index].shift);
+
+ guard(mutex)(&ptc_lock);
+
+ reg_val = readq((void __iomem *) (proc_priv->mmio_base + offset));
+ reg_val &= ~mask;
+ reg_val |= (value << ptc_mmio_regs[index].shift);
+ writeq(reg_val, (void __iomem *) (proc_priv->mmio_base + offset));
+}
+
+static int ptc_store(struct ptc_data *data, struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ unsigned int input;
+ int ret;
+
+ ret = kstrtouint(buf, 10, &input);
+ if (ret)
+ return ret;
+
+ ret = match_string(ptc_strings, -1, attr->attr.name);
+ if (ret < 0)
+ return ret;
+
+ if (ptc_mmio_regs[ret].units)
+ input /= ptc_mmio_regs[ret].units;
+
+ if (input > ptc_mmio_regs[ret].mask)
+ return -EINVAL;
+
+ ptc_mmio_write(pdev, data->offset, ret, input);
+
+ return count;
+}
+
+#define PTC_STORE(suffix)\
+static ssize_t suffix##_store(struct device *dev,\
+ struct device_attribute *attr,\
+ const char *buf, size_t count)\
+{\
+ struct ptc_data *data = container_of(attr, struct ptc_data, suffix##_attr);\
+ return ptc_store(data, dev, attr, buf, count);\
+}
+
+PTC_SHOW(temperature_target);
+PTC_STORE(temperature_target);
+PTC_SHOW(enable);
+PTC_STORE(enable);
+
+#define ptc_init_attribute(_name)\
+ do {\
+ sysfs_attr_init(&data->_name##_attr.attr);\
+ data->_name##_attr.show = _name##_show;\
+ data->_name##_attr.store = _name##_store;\
+ data->_name##_attr.attr.name = #_name;\
+ data->_name##_attr.attr.mode = 0644;\
+ } while (0)
+
+static int ptc_create_groups(struct pci_dev *pdev, int instance, struct ptc_data *data)
+{
+ int ret, index = 0;
+
+ ptc_init_attribute(temperature_target);
+ ptc_init_attribute(enable);
+
+ data->ptc_attrs[index++] = &data->temperature_target_attr.attr;
+ data->ptc_attrs[index++] = &data->enable_attr.attr;
+ data->ptc_attrs[index] = NULL;
+
+ snprintf(data->group_name, MAX_ATTR_GROUP_NAME_LEN,
+ "ptc_%d_control", instance);
+ data->ptc_attr_group.name = data->group_name;
+ data->ptc_attr_group.attrs = data->ptc_attrs;
+
+ ret = sysfs_create_group(&pdev->dev.kobj, &data->ptc_attr_group);
+
+ return ret;
+}
+
+static struct ptc_data ptc_instance[PTC_MAX_INSTANCES];
+
+int proc_thermal_ptc_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv)
+{
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_PTC) {
+ int i;
+
+ for (i = 0; i < PTC_MAX_INSTANCES; i++) {
+ ptc_instance[i].offset = ptc_offsets[i];
+ ptc_create_groups(pdev, i, &ptc_instance[i]);
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(proc_thermal_ptc_add);
+
+void proc_thermal_ptc_remove(struct pci_dev *pdev)
+{
+ struct proc_thermal_device *proc_priv = pci_get_drvdata(pdev);
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_PTC) {
+ int i;
+
+ for (i = 0; i < PTC_MAX_INSTANCES; i++)
+ sysfs_remove_group(&pdev->dev.kobj, &ptc_instance[i].ptc_attr_group);
+ }
+}
+EXPORT_SYMBOL_GPL(proc_thermal_ptc_remove);
+
+MODULE_IMPORT_NS("INT340X_THERMAL");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Processor Thermal PTC Interface");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
index c868d8b7bd1c..29fcece48cad 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/thermal.h>
+#include <asm/msr.h>
#include "int340x_thermal_zone.h"
#include "processor_thermal_device.h"
#include "../intel_soc_dts_iosf.h"
@@ -153,7 +154,7 @@ static ssize_t tcc_offset_degree_celsius_store(struct device *dev,
u64 val;
int err;
- err = rdmsrl_safe(MSR_PLATFORM_INFO, &val);
+ err = rdmsrq_safe(MSR_PLATFORM_INFO, &val);
if (err)
return err;
@@ -399,13 +400,21 @@ int proc_thermal_mmio_add(struct pci_dev *pdev,
}
}
+ if (feature_mask & PROC_THERMAL_FEATURE_PTC) {
+ ret = proc_thermal_ptc_add(pdev, proc_priv);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to add PTC MMIO interface\n");
+ goto err_rem_rapl;
+ }
+ }
+
if (feature_mask & PROC_THERMAL_FEATURE_FIVR ||
feature_mask & PROC_THERMAL_FEATURE_DVFS ||
feature_mask & PROC_THERMAL_FEATURE_DLVR) {
ret = proc_thermal_rfim_add(pdev, proc_priv);
if (ret) {
dev_err(&pdev->dev, "failed to add RFIM interface\n");
- goto err_rem_rapl;
+ goto err_rem_ptc;
}
}
@@ -427,6 +436,8 @@ int proc_thermal_mmio_add(struct pci_dev *pdev,
err_rem_rfim:
proc_thermal_rfim_remove(pdev);
+err_rem_ptc:
+ proc_thermal_ptc_remove(pdev);
err_rem_rapl:
proc_thermal_rapl_remove();
@@ -439,6 +450,9 @@ void proc_thermal_mmio_remove(struct pci_dev *pdev, struct proc_thermal_device *
if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_RAPL)
proc_thermal_rapl_remove();
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_PTC)
+ proc_thermal_ptc_remove(pdev);
+
if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_FIVR ||
proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_DVFS ||
proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_DLVR)
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
index ba2d89d3024c..9a6ca43b6fa2 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
@@ -67,6 +67,7 @@ struct rapl_mmio_regs {
#define PROC_THERMAL_FEATURE_WT_HINT 0x20
#define PROC_THERMAL_FEATURE_POWER_FLOOR 0x40
#define PROC_THERMAL_FEATURE_MSI_SUPPORT 0x80
+#define PROC_THERMAL_FEATURE_PTC 0x100
#if IS_ENABLED(CONFIG_PROC_THERMAL_MMIO_RAPL)
int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
@@ -123,4 +124,6 @@ int proc_thermal_mmio_add(struct pci_dev *pdev,
struct proc_thermal_device *proc_priv,
kernel_ulong_t feature_mask);
void proc_thermal_mmio_remove(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
+int proc_thermal_ptc_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
+void proc_thermal_ptc_remove(struct pci_dev *pdev);
#endif
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
index 2097aae39946..00160936070a 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
@@ -486,7 +486,8 @@ static const struct pci_device_id proc_thermal_pci_ids[] = {
PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_WT_REQ) },
{ PCI_DEVICE_DATA(INTEL, LNLM_THERMAL, PROC_THERMAL_FEATURE_MSI_SUPPORT |
PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_DVFS |
- PROC_THERMAL_FEATURE_WT_HINT | PROC_THERMAL_FEATURE_POWER_FLOOR) },
+ PROC_THERMAL_FEATURE_WT_HINT | PROC_THERMAL_FEATURE_POWER_FLOOR |
+ PROC_THERMAL_FEATURE_PTC) },
{ PCI_DEVICE_DATA(INTEL, MTLP_THERMAL, PROC_THERMAL_FEATURE_RAPL |
PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_DLVR |
PROC_THERMAL_FEATURE_WT_HINT | PROC_THERMAL_FEATURE_POWER_FLOOR) },
@@ -497,7 +498,7 @@ static const struct pci_device_id proc_thermal_pci_ids[] = {
{ PCI_DEVICE_DATA(INTEL, PTL_THERMAL, PROC_THERMAL_FEATURE_RAPL |
PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_DVFS |
PROC_THERMAL_FEATURE_MSI_SUPPORT | PROC_THERMAL_FEATURE_WT_HINT |
- PROC_THERMAL_FEATURE_POWER_FLOOR) },
+ PROC_THERMAL_FEATURE_POWER_FLOOR | PROC_THERMAL_FEATURE_PTC) },
{ },
};
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 5b18a46a10b0..bd2fca7dc017 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -284,7 +284,7 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
if (!raw_spin_trylock(&hfi_instance->event_lock))
return;
- rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr);
+ rdmsrq(MSR_IA32_PACKAGE_THERM_STATUS, msr);
hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED;
if (!hfi) {
raw_spin_unlock(&hfi_instance->event_lock);
@@ -356,9 +356,9 @@ static void hfi_enable(void)
{
u64 msr_val;
- rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ rdmsrq(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
- wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ wrmsrq(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
}
static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
@@ -368,7 +368,7 @@ static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
hw_table_pa = virt_to_phys(hfi_instance->hw_table);
msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
- wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
+ wrmsrq(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
}
/* Caller must hold hfi_instance_lock. */
@@ -377,9 +377,9 @@ static void hfi_disable(void)
u64 msr_val;
int i;
- rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ rdmsrq(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
- wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ wrmsrq(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
/*
* Wait for hardware to acknowledge the disabling of HFI. Some
@@ -388,7 +388,7 @@ static void hfi_disable(void)
* memory.
*/
for (i = 0; i < 2000; i++) {
- rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
+ rdmsrq(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
if (msr_val & PACKAGE_THERM_STATUS_HFI_UPDATED)
break;
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index 96a24df79686..9a4cec000910 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -340,7 +340,7 @@ static bool has_pkg_state_counter(void)
/* check if any one of the counter msrs exists */
while (info->msr_index) {
- if (!rdmsrl_safe(info->msr_index, &val))
+ if (!rdmsrq_safe(info->msr_index, &val))
return true;
info++;
}
@@ -356,7 +356,7 @@ static u64 pkg_state_counter(void)
while (info->msr_index) {
if (!info->skip) {
- if (!rdmsrl_safe(info->msr_index, &val))
+ if (!rdmsrq_safe(info->msr_index, &val))
count += val;
else
info->skip = true;
diff --git a/drivers/thermal/intel/intel_tcc_cooling.c b/drivers/thermal/intel/intel_tcc_cooling.c
index 9ff0ebdde0ef..f352ecafbedf 100644
--- a/drivers/thermal/intel/intel_tcc_cooling.c
+++ b/drivers/thermal/intel/intel_tcc_cooling.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/thermal.h>
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#define TCC_PROGRAMMABLE BIT(30)
#define TCC_LOCKED BIT(31)
@@ -81,14 +82,14 @@ static int __init tcc_cooling_init(void)
if (!id)
return -ENODEV;
- err = rdmsrl_safe(MSR_PLATFORM_INFO, &val);
+ err = rdmsrq_safe(MSR_PLATFORM_INFO, &val);
if (err)
return err;
if (!(val & TCC_PROGRAMMABLE))
return -ENODEV;
- err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val);
+ err = rdmsrq_safe(MSR_IA32_TEMPERATURE_TARGET, &val);
if (err)
return err;
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index e69868e868eb..debc94e2dc16 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -273,7 +273,7 @@ void thermal_clear_package_intr_status(int level, u64 bit_mask)
}
msr_val &= ~bit_mask;
- wrmsrl(msr, msr_val);
+ wrmsrq(msr, msr_val);
}
EXPORT_SYMBOL_GPL(thermal_clear_package_intr_status);
@@ -287,7 +287,7 @@ static void get_therm_status(int level, bool *proc_hot, u8 *temp)
else
msr = MSR_IA32_PACKAGE_THERM_STATUS;
- rdmsrl(msr, msr_val);
+ rdmsrq(msr, msr_val);
if (msr_val & THERM_STATUS_PROCHOT_LOG)
*proc_hot = true;
else
@@ -643,7 +643,7 @@ static void notify_thresholds(__u64 msr_val)
void __weak notify_hwp_interrupt(void)
{
- wrmsrl_safe(MSR_HWP_STATUS, 0);
+ wrmsrq_safe(MSR_HWP_STATUS, 0);
}
/* Thermal transition interrupt handler */
@@ -654,7 +654,7 @@ void intel_thermal_interrupt(void)
if (static_cpu_has(X86_FEATURE_HWP))
notify_hwp_interrupt();
- rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
+ rdmsrq(MSR_IA32_THERM_STATUS, msr_val);
/* Check for violation of core thermal thresholds*/
notify_thresholds(msr_val);
@@ -669,7 +669,7 @@ void intel_thermal_interrupt(void)
CORE_LEVEL);
if (this_cpu_has(X86_FEATURE_PTS)) {
- rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
+ rdmsrq(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
/* check violations of package thermal thresholds */
notify_package_thresholds(msr_val);
therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
index 496abf8e55e0..3fc679b6f11b 100644
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -20,6 +20,7 @@
#include <linux/debugfs.h>
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include "thermal_interrupt.h"
@@ -329,6 +330,7 @@ static int pkg_temp_thermal_device_add(unsigned int cpu)
tj_max = intel_tcc_get_tjmax(cpu);
if (tj_max < 0)
return tj_max;
+ tj_max *= 1000;
zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL);
if (!zonedev)
diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c
index 088481d91e6e..985925147ac0 100644
--- a/drivers/thermal/mediatek/lvts_thermal.c
+++ b/drivers/thermal/mediatek/lvts_thermal.c
@@ -213,6 +213,13 @@ static const struct debugfs_reg32 lvts_regs[] = {
LVTS_DEBUG_FS_REGS(LVTS_CLKEN),
};
+static void lvts_debugfs_exit(void *data)
+{
+ struct lvts_domain *lvts_td = data;
+
+ debugfs_remove_recursive(lvts_td->dom_dentry);
+}
+
static int lvts_debugfs_init(struct device *dev, struct lvts_domain *lvts_td)
{
struct debugfs_regset32 *regset;
@@ -245,12 +252,7 @@ static int lvts_debugfs_init(struct device *dev, struct lvts_domain *lvts_td)
debugfs_create_regset32("registers", 0400, dentry, regset);
}
- return 0;
-}
-
-static void lvts_debugfs_exit(struct lvts_domain *lvts_td)
-{
- debugfs_remove_recursive(lvts_td->dom_dentry);
+ return devm_add_action_or_reset(dev, lvts_debugfs_exit, lvts_td);
}
#else
@@ -261,8 +263,6 @@ static inline int lvts_debugfs_init(struct device *dev,
return 0;
}
-static void lvts_debugfs_exit(struct lvts_domain *lvts_td) { }
-
#endif
static int lvts_raw_to_temp(u32 raw_temp, int temp_factor)
@@ -1374,8 +1374,6 @@ static void lvts_remove(struct platform_device *pdev)
for (i = 0; i < lvts_td->num_lvts_ctrl; i++)
lvts_ctrl_set_enable(&lvts_td->lvts_ctrl[i], false);
-
- lvts_debugfs_exit(lvts_td);
}
static const struct lvts_ctrl_data mt7988_lvts_ap_data_ctrl[] = {
diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
index d2d49264cf83..991d1573983d 100644
--- a/drivers/thermal/qcom/lmh.c
+++ b/drivers/thermal/qcom/lmh.c
@@ -209,7 +209,8 @@ static int lmh_probe(struct platform_device *pdev)
}
lmh_data->irq = platform_get_irq(pdev, 0);
- lmh_data->domain = irq_domain_add_linear(np, 1, &lmh_irq_ops, lmh_data);
+ lmh_data->domain = irq_domain_create_linear(of_fwnode_handle(np), 1, &lmh_irq_ops,
+ lmh_data);
if (!lmh_data->domain) {
dev_err(dev, "Error adding irq_domain\n");
return -EINVAL;
diff --git a/drivers/thermal/qcom/tsens-v1.c b/drivers/thermal/qcom/tsens-v1.c
index 1a7874676f68..faa5d00788ca 100644
--- a/drivers/thermal/qcom/tsens-v1.c
+++ b/drivers/thermal/qcom/tsens-v1.c
@@ -79,6 +79,17 @@ static struct tsens_features tsens_v1_feat = {
.trip_max_temp = 120000,
};
+static struct tsens_features tsens_v1_no_rpm_feat = {
+ .ver_major = VER_1_X_NO_RPM,
+ .crit_int = 0,
+ .combo_int = 0,
+ .adc = 1,
+ .srot_split = 1,
+ .max_sensors = 11,
+ .trip_min_temp = -40000,
+ .trip_max_temp = 120000,
+};
+
static const struct reg_field tsens_v1_regfields[MAX_REGFIELDS] = {
/* ----- SROT ------ */
/* VERSION */
@@ -150,6 +161,43 @@ static int __init init_8956(struct tsens_priv *priv) {
return init_common(priv);
}
+static int __init init_tsens_v1_no_rpm(struct tsens_priv *priv)
+{
+ int i, ret;
+ u32 mask = 0;
+
+ ret = init_common(priv);
+ if (ret < 0) {
+ dev_err(priv->dev, "Init common failed %d\n", ret);
+ return ret;
+ }
+
+ ret = regmap_field_write(priv->rf[TSENS_SW_RST], 1);
+ if (ret) {
+ dev_err(priv->dev, "Reset failed\n");
+ return ret;
+ }
+
+ for (i = 0; i < priv->num_sensors; i++)
+ mask |= BIT(priv->sensor[i].hw_id);
+
+ ret = regmap_field_update_bits(priv->rf[SENSOR_EN], mask, mask);
+ if (ret) {
+ dev_err(priv->dev, "Sensor Enable failed\n");
+ return ret;
+ }
+
+ ret = regmap_field_write(priv->rf[TSENS_EN], 1);
+ if (ret) {
+ dev_err(priv->dev, "Enable failed\n");
+ return ret;
+ }
+
+ ret = regmap_field_write(priv->rf[TSENS_SW_RST], 0);
+
+ return ret;
+}
+
static const struct tsens_ops ops_generic_v1 = {
.init = init_common,
.calibrate = calibrate_v1,
@@ -194,3 +242,17 @@ struct tsens_plat_data data_8976 = {
.feat = &tsens_v1_feat,
.fields = tsens_v1_regfields,
};
+
+static const struct tsens_ops ops_ipq5018 = {
+ .init = init_tsens_v1_no_rpm,
+ .calibrate = tsens_calibrate_common,
+ .get_temp = get_temp_tsens_valid,
+};
+
+const struct tsens_plat_data data_ipq5018 = {
+ .num_sensors = 5,
+ .ops = &ops_ipq5018,
+ .hw_ids = (unsigned int []){0, 1, 2, 3, 4},
+ .feat = &tsens_v1_no_rpm_feat,
+ .fields = tsens_v1_regfields,
+};
diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index 1f5d4de017d9..a2422ebee816 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -447,7 +447,7 @@ static void tsens_set_interrupt(struct tsens_priv *priv, u32 hw_id,
dev_dbg(priv->dev, "[%u] %s: %s -> %s\n", hw_id, __func__,
irq_type ? ((irq_type == 1) ? "UP" : "CRITICAL") : "LOW",
enable ? "en" : "dis");
- if (tsens_version(priv) > VER_1_X)
+ if (tsens_version(priv) >= VER_2_X)
tsens_set_interrupt_v2(priv, hw_id, irq_type, enable);
else
tsens_set_interrupt_v1(priv, hw_id, irq_type, enable);
@@ -499,7 +499,7 @@ static int tsens_read_irq_state(struct tsens_priv *priv, u32 hw_id,
ret = regmap_field_read(priv->rf[LOW_INT_CLEAR_0 + hw_id], &d->low_irq_clear);
if (ret)
return ret;
- if (tsens_version(priv) > VER_1_X) {
+ if (tsens_version(priv) >= VER_2_X) {
ret = regmap_field_read(priv->rf[UP_INT_MASK_0 + hw_id], &d->up_irq_mask);
if (ret)
return ret;
@@ -543,7 +543,7 @@ static int tsens_read_irq_state(struct tsens_priv *priv, u32 hw_id,
static inline u32 masked_irq(u32 hw_id, u32 mask, enum tsens_ver ver)
{
- if (ver > VER_1_X)
+ if (ver >= VER_2_X)
return mask & (1 << hw_id);
/* v1, v0.1 don't have a irq mask register */
@@ -733,7 +733,7 @@ static int tsens_set_trips(struct thermal_zone_device *tz, int low, int high)
static int tsens_enable_irq(struct tsens_priv *priv)
{
int ret;
- int val = tsens_version(priv) > VER_1_X ? 7 : 1;
+ int val = tsens_version(priv) >= VER_2_X ? 7 : 1;
ret = regmap_field_write(priv->rf[INT_EN], val);
if (ret < 0)
@@ -975,10 +975,16 @@ int __init init_common(struct tsens_priv *priv)
ret = regmap_field_read(priv->rf[TSENS_EN], &enabled);
if (ret)
goto err_put_device;
- if (!enabled && (tsens_version(priv) != VER_2_X_NO_RPM)) {
- dev_err(dev, "%s: device not enabled\n", __func__);
- ret = -ENODEV;
- goto err_put_device;
+ if (!enabled) {
+ switch (tsens_version(priv)) {
+ case VER_1_X_NO_RPM:
+ case VER_2_X_NO_RPM:
+ break;
+ default:
+ dev_err(dev, "%s: device not enabled\n", __func__);
+ ret = -ENODEV;
+ goto err_put_device;
+ }
}
priv->rf[SENSOR_EN] = devm_regmap_field_alloc(dev, priv->srot_map,
@@ -1040,7 +1046,7 @@ int __init init_common(struct tsens_priv *priv)
}
}
- if (tsens_version(priv) > VER_1_X && ver_minor > 2) {
+ if (tsens_version(priv) >= VER_2_X && ver_minor > 2) {
/* Watchdog is present only on v2.3+ */
priv->feat->has_watchdog = 1;
for (i = WDOG_BARK_STATUS; i <= CC_MON_MASK; i++) {
@@ -1102,6 +1108,9 @@ static SIMPLE_DEV_PM_OPS(tsens_pm_ops, tsens_suspend, tsens_resume);
static const struct of_device_id tsens_table[] = {
{
+ .compatible = "qcom,ipq5018-tsens",
+ .data = &data_ipq5018,
+ }, {
.compatible = "qcom,ipq5332-tsens",
.data = &data_ipq5332,
}, {
diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h
index 336bc868fd7c..2a7afa4c899b 100644
--- a/drivers/thermal/qcom/tsens.h
+++ b/drivers/thermal/qcom/tsens.h
@@ -34,6 +34,7 @@ enum tsens_ver {
VER_0 = 0,
VER_0_1,
VER_1_X,
+ VER_1_X_NO_RPM,
VER_2_X,
VER_2_X_NO_RPM,
};
@@ -651,6 +652,9 @@ extern struct tsens_plat_data data_8226, data_8909, data_8916, data_8939, data_8
/* TSENS v1 targets */
extern struct tsens_plat_data data_tsens_v1, data_8937, data_8976, data_8956;
+/* TSENS v1 with no RPM targets */
+extern const struct tsens_plat_data data_ipq5018;
+
/* TSENS v2 targets */
extern struct tsens_plat_data data_8996, data_ipq8074, data_tsens_v2;
extern const struct tsens_plat_data data_ipq5332, data_ipq5424;
diff --git a/drivers/thermal/tegra/soctherm.c b/drivers/thermal/tegra/soctherm.c
index 2c5ddf0db40c..926f1052e6de 100644
--- a/drivers/thermal/tegra/soctherm.c
+++ b/drivers/thermal/tegra/soctherm.c
@@ -1234,7 +1234,7 @@ static int soctherm_oc_int_init(struct device_node *np, int num_irqs)
soc_irq_cdata.irq_chip.irq_set_type = soctherm_oc_irq_set_type;
soc_irq_cdata.irq_chip.irq_set_wake = NULL;
- soc_irq_cdata.domain = irq_domain_add_linear(np, num_irqs,
+ soc_irq_cdata.domain = irq_domain_create_linear(of_fwnode_handle(np), num_irqs,
&soctherm_oc_domain_ops,
&soc_irq_cdata);
diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index c0761ccc1381..77f8ddb1f207 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -1849,25 +1849,38 @@ static void ufs_qcom_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
ufshcd_mcq_config_esi(hba, msg);
}
+struct ufs_qcom_irq {
+ unsigned int irq;
+ unsigned int idx;
+ struct ufs_hba *hba;
+};
+
static irqreturn_t ufs_qcom_mcq_esi_handler(int irq, void *data)
{
- struct msi_desc *desc = data;
- struct device *dev = msi_desc_to_dev(desc);
- struct ufs_hba *hba = dev_get_drvdata(dev);
- u32 id = desc->msi_index;
- struct ufs_hw_queue *hwq = &hba->uhq[id];
+ struct ufs_qcom_irq *qi = data;
+ struct ufs_hba *hba = qi->hba;
+ struct ufs_hw_queue *hwq = &hba->uhq[qi->idx];
- ufshcd_mcq_write_cqis(hba, 0x1, id);
+ ufshcd_mcq_write_cqis(hba, 0x1, qi->idx);
ufshcd_mcq_poll_cqe_lock(hba, hwq);
return IRQ_HANDLED;
}
+static void ufs_qcom_irq_free(struct ufs_qcom_irq *uqi)
+{
+ for (struct ufs_qcom_irq *q = uqi; q->irq; q++)
+ devm_free_irq(q->hba->dev, q->irq, q->hba);
+
+ platform_device_msi_free_irqs_all(uqi->hba->dev);
+ devm_kfree(uqi->hba->dev, uqi);
+}
+
+DEFINE_FREE(ufs_qcom_irq, struct ufs_qcom_irq *, if (_T) ufs_qcom_irq_free(_T))
+
static int ufs_qcom_config_esi(struct ufs_hba *hba)
{
struct ufs_qcom_host *host = ufshcd_get_variant(hba);
- struct msi_desc *desc;
- struct msi_desc *failed_desc = NULL;
int nr_irqs, ret;
if (host->esi_enabled)
@@ -1878,6 +1891,14 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba)
* 2. Poll queues do not need ESI.
*/
nr_irqs = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL];
+
+ struct ufs_qcom_irq *qi __free(ufs_qcom_irq) =
+ devm_kcalloc(hba->dev, nr_irqs, sizeof(*qi), GFP_KERNEL);
+ if (!qi)
+ return -ENOMEM;
+ /* Preset so __free() has a pointer to hba in all error paths */
+ qi[0].hba = hba;
+
ret = platform_device_msi_init_and_alloc_irqs(hba->dev, nr_irqs,
ufs_qcom_write_msi_msg);
if (ret) {
@@ -1885,41 +1906,31 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba)
return ret;
}
- msi_lock_descs(hba->dev);
- msi_for_each_desc(desc, hba->dev, MSI_DESC_ALL) {
- ret = devm_request_irq(hba->dev, desc->irq,
- ufs_qcom_mcq_esi_handler,
- IRQF_SHARED, "qcom-mcq-esi", desc);
+ for (int idx = 0; idx < nr_irqs; idx++) {
+ qi[idx].irq = msi_get_virq(hba->dev, idx);
+ qi[idx].idx = idx;
+ qi[idx].hba = hba;
+
+ ret = devm_request_irq(hba->dev, qi[idx].irq, ufs_qcom_mcq_esi_handler,
+ IRQF_SHARED, "qcom-mcq-esi", qi + idx);
if (ret) {
dev_err(hba->dev, "%s: Fail to request IRQ for %d, err = %d\n",
- __func__, desc->irq, ret);
- failed_desc = desc;
- break;
+ __func__, qi[idx].irq, ret);
+ qi[idx].irq = 0;
+ return ret;
}
}
- msi_unlock_descs(hba->dev);
- if (ret) {
- /* Rewind */
- msi_lock_descs(hba->dev);
- msi_for_each_desc(desc, hba->dev, MSI_DESC_ALL) {
- if (desc == failed_desc)
- break;
- devm_free_irq(hba->dev, desc->irq, hba);
- }
- msi_unlock_descs(hba->dev);
- platform_device_msi_free_irqs_all(hba->dev);
- } else {
- if (host->hw_ver.major == 6 && host->hw_ver.minor == 0 &&
- host->hw_ver.step == 0)
- ufshcd_rmwl(hba, ESI_VEC_MASK,
- FIELD_PREP(ESI_VEC_MASK, MAX_ESI_VEC - 1),
- REG_UFS_CFG3);
- ufshcd_mcq_enable_esi(hba);
- host->esi_enabled = true;
- }
+ retain_and_null_ptr(qi);
- return ret;
+ if (host->hw_ver.major == 6 && host->hw_ver.minor == 0 &&
+ host->hw_ver.step == 0) {
+ ufshcd_rmwl(hba, ESI_VEC_MASK, FIELD_PREP(ESI_VEC_MASK, MAX_ESI_VEC - 1),
+ REG_UFS_CFG3);
+ }
+ ufshcd_mcq_enable_esi(hba);
+ host->esi_enabled = true;
+ return 0;
}
static u32 ufs_qcom_freq_to_gear_speed(struct ufs_hba *hba, unsigned long freq)
diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c
index c6b9ad12e8fe..b7f940486414 100644
--- a/drivers/usb/atm/cxacru.c
+++ b/drivers/usb/atm/cxacru.c
@@ -598,7 +598,7 @@ static int cxacru_start_wait_urb(struct urb *urb, struct completion *done,
mod_timer(&timer.timer, jiffies + msecs_to_jiffies(CMD_TIMEOUT));
wait_for_completion(done);
timer_delete_sync(&timer.timer);
- destroy_timer_on_stack(&timer.timer);
+ timer_destroy_on_stack(&timer.timer);
if (actual_length)
*actual_length = urb->actual_length;
diff --git a/drivers/usb/gadget/function/f_midi2.c b/drivers/usb/gadget/function/f_midi2.c
index 12e866fb311d..0a800ba53816 100644
--- a/drivers/usb/gadget/function/f_midi2.c
+++ b/drivers/usb/gadget/function/f_midi2.c
@@ -475,7 +475,7 @@ static void reply_ump_stream_ep_info(struct f_midi2_ep *ep)
/* reply a UMP EP device info */
static void reply_ump_stream_ep_device(struct f_midi2_ep *ep)
{
- struct snd_ump_stream_msg_devince_info rep = {
+ struct snd_ump_stream_msg_device_info rep = {
.type = UMP_MSG_TYPE_STREAM,
.status = UMP_STREAM_MSG_STATUS_DEVICE_INFO,
.manufacture_id = ep->info.manufacturer,
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 853a5f082a70..7c2041f61cde 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -630,7 +630,7 @@ static int perform_sglist(
retval = -ETIMEDOUT;
else
retval = req->status;
- destroy_timer_on_stack(&timeout.timer);
+ timer_destroy_on_stack(&timeout.timer);
/* FIXME check resulting data pattern */
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index d36f3b6992bb..152ee3376550 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -1056,13 +1056,20 @@ int usb_stor_probe1(struct us_data **pus,
goto BadDevice;
/*
- * Some USB host controllers can't do DMA; they have to use PIO.
- * For such controllers we need to make sure the block layer sets
- * up bounce buffers in addressable memory.
+ * Some USB host controllers can't do DMA: They have to use PIO, or they
+ * have to use a small dedicated local memory area, or they have other
+ * restrictions on addressable memory.
+ *
+ * We can't support these controllers on highmem systems as we don't
+ * kmap or bounce buffer.
*/
- if (!hcd_uses_dma(bus_to_hcd(us->pusb_dev->bus)) ||
- bus_to_hcd(us->pusb_dev->bus)->localmem_pool)
- host->no_highmem = true;
+ if (IS_ENABLED(CONFIG_HIGHMEM) &&
+ (!hcd_uses_dma(bus_to_hcd(us->pusb_dev->bus)) ||
+ bus_to_hcd(us->pusb_dev->bus)->localmem_pool)) {
+ dev_warn(&intf->dev, "USB Mass Storage not supported on this host controller\n");
+ result = -EINVAL;
+ goto release;
+ }
/* Get the unusual_devs entries and the descriptors */
result = get_device_info(us, id, unusual_dev);
@@ -1081,6 +1088,7 @@ int usb_stor_probe1(struct us_data **pus,
BadDevice:
usb_stor_dbg(us, "storage_probe() failed\n");
+release:
release_everything(us);
return result;
}
diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c
index f01e4ef6619d..e9a9df1431af 100644
--- a/drivers/usb/typec/ucsi/ucsi_ccg.c
+++ b/drivers/usb/typec/ucsi/ucsi_ccg.c
@@ -1483,6 +1483,8 @@ static int ucsi_ccg_probe(struct i2c_client *client)
i2c_set_clientdata(client, uc);
+ device_disable_async_suspend(uc->dev);
+
pm_runtime_set_active(uc->dev);
pm_runtime_enable(uc->dev);
pm_runtime_use_autosuspend(uc->dev);
diff --git a/drivers/video/fbdev/geode/display_gx.c b/drivers/video/fbdev/geode/display_gx.c
index b5f25dffd274..099322cefce0 100644
--- a/drivers/video/fbdev/geode/display_gx.c
+++ b/drivers/video/fbdev/geode/display_gx.c
@@ -13,6 +13,7 @@
#include <asm/io.h>
#include <asm/div64.h>
#include <asm/delay.h>
+#include <asm/msr.h>
#include <linux/cs5535.h>
#include "gxfb.h"
diff --git a/drivers/video/fbdev/geode/gxfb_core.c b/drivers/video/fbdev/geode/gxfb_core.c
index af996634c1a9..8d69be7c9d31 100644
--- a/drivers/video/fbdev/geode/gxfb_core.c
+++ b/drivers/video/fbdev/geode/gxfb_core.c
@@ -29,6 +29,7 @@
#include <linux/pci.h>
#include <linux/cs5535.h>
+#include <asm/msr.h>
#include <asm/olpc.h>
#include "gxfb.h"
@@ -377,7 +378,7 @@ static int gxfb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
/* Figure out if this is a TFT or CRT part */
- rdmsrl(MSR_GX_GLD_MSR_CONFIG, val);
+ rdmsrq(MSR_GX_GLD_MSR_CONFIG, val);
if ((val & MSR_GX_GLD_MSR_CONFIG_FP) == MSR_GX_GLD_MSR_CONFIG_FP)
par->enable_crt = 0;
diff --git a/drivers/video/fbdev/geode/lxfb_ops.c b/drivers/video/fbdev/geode/lxfb_ops.c
index 32baaf59fcf7..2e33da9849b0 100644
--- a/drivers/video/fbdev/geode/lxfb_ops.c
+++ b/drivers/video/fbdev/geode/lxfb_ops.c
@@ -11,6 +11,7 @@
#include <linux/delay.h>
#include <linux/cs5535.h>
+#include <asm/msr.h>
#include "lxfb.h"
/* TODO
@@ -358,7 +359,7 @@ void lx_set_mode(struct fb_info *info)
/* Set output mode */
- rdmsrl(MSR_LX_GLD_MSR_CONFIG, msrval);
+ rdmsrq(MSR_LX_GLD_MSR_CONFIG, msrval);
msrval &= ~MSR_LX_GLD_MSR_CONFIG_FMT;
if (par->output & OUTPUT_PANEL) {
@@ -371,7 +372,7 @@ void lx_set_mode(struct fb_info *info)
} else
msrval |= MSR_LX_GLD_MSR_CONFIG_FMT_CRT;
- wrmsrl(MSR_LX_GLD_MSR_CONFIG, msrval);
+ wrmsrq(MSR_LX_GLD_MSR_CONFIG, msrval);
/* Clear the various buffers */
/* FIXME: Adjust for panning here */
@@ -419,7 +420,7 @@ void lx_set_mode(struct fb_info *info)
/* Set default watermark values */
- rdmsrl(MSR_LX_SPARE_MSR, msrval);
+ rdmsrq(MSR_LX_SPARE_MSR, msrval);
msrval &= ~(MSR_LX_SPARE_MSR_DIS_CFIFO_HGO
| MSR_LX_SPARE_MSR_VFIFO_ARB_SEL
@@ -427,7 +428,7 @@ void lx_set_mode(struct fb_info *info)
| MSR_LX_SPARE_MSR_WM_LPEN_OVRD);
msrval |= MSR_LX_SPARE_MSR_DIS_VIFO_WM |
MSR_LX_SPARE_MSR_DIS_INIT_V_PRI;
- wrmsrl(MSR_LX_SPARE_MSR, msrval);
+ wrmsrq(MSR_LX_SPARE_MSR, msrval);
gcfg = DC_GENERAL_CFG_DFLE; /* Display fifo enable */
gcfg |= (0x6 << DC_GENERAL_CFG_DFHPSL_SHIFT) | /* default priority */
@@ -591,10 +592,10 @@ static void lx_save_regs(struct lxfb_par *par)
} while ((i & GP_BLT_STATUS_PB) || !(i & GP_BLT_STATUS_CE));
/* save MSRs */
- rdmsrl(MSR_LX_MSR_PADSEL, par->msr.padsel);
- rdmsrl(MSR_GLCP_DOTPLL, par->msr.dotpll);
- rdmsrl(MSR_LX_GLD_MSR_CONFIG, par->msr.dfglcfg);
- rdmsrl(MSR_LX_SPARE_MSR, par->msr.dcspare);
+ rdmsrq(MSR_LX_MSR_PADSEL, par->msr.padsel);
+ rdmsrq(MSR_GLCP_DOTPLL, par->msr.dotpll);
+ rdmsrq(MSR_LX_GLD_MSR_CONFIG, par->msr.dfglcfg);
+ rdmsrq(MSR_LX_SPARE_MSR, par->msr.dcspare);
write_dc(par, DC_UNLOCK, DC_UNLOCK_UNLOCK);
@@ -664,7 +665,7 @@ static void lx_restore_display_ctlr(struct lxfb_par *par)
uint32_t filt;
int i;
- wrmsrl(MSR_LX_SPARE_MSR, par->msr.dcspare);
+ wrmsrq(MSR_LX_SPARE_MSR, par->msr.dcspare);
for (i = 0; i < ARRAY_SIZE(par->dc); i++) {
switch (i) {
@@ -729,8 +730,8 @@ static void lx_restore_video_proc(struct lxfb_par *par)
{
int i;
- wrmsrl(MSR_LX_GLD_MSR_CONFIG, par->msr.dfglcfg);
- wrmsrl(MSR_LX_MSR_PADSEL, par->msr.padsel);
+ wrmsrq(MSR_LX_GLD_MSR_CONFIG, par->msr.dfglcfg);
+ wrmsrq(MSR_LX_MSR_PADSEL, par->msr.padsel);
for (i = 0; i < ARRAY_SIZE(par->vp); i++) {
switch (i) {
diff --git a/drivers/video/fbdev/geode/suspend_gx.c b/drivers/video/fbdev/geode/suspend_gx.c
index 8c49d4e98772..73307f42e343 100644
--- a/drivers/video/fbdev/geode/suspend_gx.c
+++ b/drivers/video/fbdev/geode/suspend_gx.c
@@ -21,8 +21,8 @@ static void gx_save_regs(struct gxfb_par *par)
} while (i & (GP_BLT_STATUS_BLT_PENDING | GP_BLT_STATUS_BLT_BUSY));
/* save MSRs */
- rdmsrl(MSR_GX_MSR_PADSEL, par->msr.padsel);
- rdmsrl(MSR_GLCP_DOTPLL, par->msr.dotpll);
+ rdmsrq(MSR_GX_MSR_PADSEL, par->msr.padsel);
+ rdmsrq(MSR_GLCP_DOTPLL, par->msr.dotpll);
write_dc(par, DC_UNLOCK, DC_UNLOCK_UNLOCK);
@@ -43,14 +43,14 @@ static void gx_set_dotpll(uint32_t dotpll_hi)
uint32_t dotpll_lo;
int i;
- rdmsrl(MSR_GLCP_DOTPLL, dotpll_lo);
+ rdmsrq(MSR_GLCP_DOTPLL, dotpll_lo);
dotpll_lo |= MSR_GLCP_DOTPLL_DOTRESET;
dotpll_lo &= ~MSR_GLCP_DOTPLL_BYPASS;
wrmsr(MSR_GLCP_DOTPLL, dotpll_lo, dotpll_hi);
/* wait for the PLL to lock */
for (i = 0; i < 200; i++) {
- rdmsrl(MSR_GLCP_DOTPLL, dotpll_lo);
+ rdmsrq(MSR_GLCP_DOTPLL, dotpll_lo);
if (dotpll_lo & MSR_GLCP_DOTPLL_LOCK)
break;
udelay(1);
@@ -133,7 +133,7 @@ static void gx_restore_video_proc(struct gxfb_par *par)
{
int i;
- wrmsrl(MSR_GX_MSR_PADSEL, par->msr.padsel);
+ wrmsrq(MSR_GX_MSR_PADSEL, par->msr.padsel);
for (i = 0; i < ARRAY_SIZE(par->vp); i++) {
switch (i) {
diff --git a/drivers/video/fbdev/geode/video_gx.c b/drivers/video/fbdev/geode/video_gx.c
index 91dac2aa247c..5717c3356949 100644
--- a/drivers/video/fbdev/geode/video_gx.c
+++ b/drivers/video/fbdev/geode/video_gx.c
@@ -142,8 +142,8 @@ void gx_set_dclk_frequency(struct fb_info *info)
}
}
- rdmsrl(MSR_GLCP_SYS_RSTPLL, sys_rstpll);
- rdmsrl(MSR_GLCP_DOTPLL, dotpll);
+ rdmsrq(MSR_GLCP_SYS_RSTPLL, sys_rstpll);
+ rdmsrq(MSR_GLCP_DOTPLL, dotpll);
/* Program new M, N and P. */
dotpll &= 0x00000000ffffffffull;
@@ -151,7 +151,7 @@ void gx_set_dclk_frequency(struct fb_info *info)
dotpll |= MSR_GLCP_DOTPLL_DOTRESET;
dotpll &= ~MSR_GLCP_DOTPLL_BYPASS;
- wrmsrl(MSR_GLCP_DOTPLL, dotpll);
+ wrmsrq(MSR_GLCP_DOTPLL, dotpll);
/* Program dividers. */
sys_rstpll &= ~( MSR_GLCP_SYS_RSTPLL_DOTPREDIV2
@@ -159,15 +159,15 @@ void gx_set_dclk_frequency(struct fb_info *info)
| MSR_GLCP_SYS_RSTPLL_DOTPOSTDIV3 );
sys_rstpll |= pll_table[best_i].sys_rstpll_bits;
- wrmsrl(MSR_GLCP_SYS_RSTPLL, sys_rstpll);
+ wrmsrq(MSR_GLCP_SYS_RSTPLL, sys_rstpll);
/* Clear reset bit to start PLL. */
dotpll &= ~(MSR_GLCP_DOTPLL_DOTRESET);
- wrmsrl(MSR_GLCP_DOTPLL, dotpll);
+ wrmsrq(MSR_GLCP_DOTPLL, dotpll);
/* Wait for LOCK bit. */
do {
- rdmsrl(MSR_GLCP_DOTPLL, dotpll);
+ rdmsrq(MSR_GLCP_DOTPLL, dotpll);
} while (timeout-- && !(dotpll & MSR_GLCP_DOTPLL_LOCK));
}
@@ -180,10 +180,10 @@ gx_configure_tft(struct fb_info *info)
/* Set up the DF pad select MSR */
- rdmsrl(MSR_GX_MSR_PADSEL, val);
+ rdmsrq(MSR_GX_MSR_PADSEL, val);
val &= ~MSR_GX_MSR_PADSEL_MASK;
val |= MSR_GX_MSR_PADSEL_TFT;
- wrmsrl(MSR_GX_MSR_PADSEL, val);
+ wrmsrq(MSR_GX_MSR_PADSEL, val);
/* Turn off the panel */
diff --git a/drivers/w1/slaves/w1_ds2406.c b/drivers/w1/slaves/w1_ds2406.c
index 1cae9b243ff8..76026d615111 100644
--- a/drivers/w1/slaves/w1_ds2406.c
+++ b/drivers/w1/slaves/w1_ds2406.c
@@ -29,8 +29,6 @@ static ssize_t w1_f12_read_state(
{
u8 w1_buf[6] = {W1_F12_FUNC_READ_STATUS, 7, 0, 0, 0, 0};
struct w1_slave *sl = kobj_to_w1_slave(kobj);
- u16 crc = 0;
- int i;
ssize_t rtnval = 1;
if (off != 0)
@@ -47,9 +45,7 @@ static ssize_t w1_f12_read_state(
w1_write_block(sl->master, w1_buf, 3);
w1_read_block(sl->master, w1_buf+3, 3);
- for (i = 0; i < 6; i++)
- crc = crc16_byte(crc, w1_buf[i]);
- if (crc == 0xb001) /* good read? */
+ if (crc16(0, w1_buf, sizeof(w1_buf)) == 0xb001) /* good read? */
*buf = ((w1_buf[3]>>5)&3)|0x30;
else
rtnval = -EIO;
@@ -66,8 +62,6 @@ static ssize_t w1_f12_write_output(
{
struct w1_slave *sl = kobj_to_w1_slave(kobj);
u8 w1_buf[6] = {W1_F12_FUNC_WRITE_STATUS, 7, 0, 0, 0, 0};
- u16 crc = 0;
- int i;
ssize_t rtnval = 1;
if (count != 1 || off != 0)
@@ -83,9 +77,7 @@ static ssize_t w1_f12_write_output(
w1_buf[3] = (((*buf)&3)<<5)|0x1F;
w1_write_block(sl->master, w1_buf, 4);
w1_read_block(sl->master, w1_buf+4, 2);
- for (i = 0; i < 6; i++)
- crc = crc16_byte(crc, w1_buf[i]);
- if (crc == 0xb001) /* good read? */
+ if (crc16(0, w1_buf, sizeof(w1_buf)) == 0xb001) /* good read? */
w1_write_8(sl->master, 0xFF);
else
rtnval = -EIO;
diff --git a/drivers/watchdog/diag288_wdt.c b/drivers/watchdog/diag288_wdt.c
index 76dffc89c641..887d5a6c155b 100644
--- a/drivers/watchdog/diag288_wdt.c
+++ b/drivers/watchdog/diag288_wdt.c
@@ -29,26 +29,13 @@
#include <linux/watchdog.h>
#include <asm/machine.h>
#include <asm/ebcdic.h>
+#include <asm/diag288.h>
#include <asm/diag.h>
#include <linux/io.h>
#define MAX_CMDLEN 240
#define DEFAULT_CMD "SYSTEM RESTART"
-#define MIN_INTERVAL 15 /* Minimal time supported by diag88 */
-#define MAX_INTERVAL 3600 /* One hour should be enough - pure estimation */
-
-#define WDT_DEFAULT_TIMEOUT 30
-
-/* Function codes - init, change, cancel */
-#define WDT_FUNC_INIT 0
-#define WDT_FUNC_CHANGE 1
-#define WDT_FUNC_CANCEL 2
-#define WDT_FUNC_CONCEAL 0x80000000
-
-/* Action codes for LPAR watchdog */
-#define LPARWDT_RESTART 0
-
static char wdt_cmd[MAX_CMDLEN] = DEFAULT_CMD;
static bool conceal_on;
static bool nowayout_info = WATCHDOG_NOWAYOUT;
@@ -75,22 +62,8 @@ static char *cmd_buf;
static int diag288(unsigned int func, unsigned int timeout,
unsigned long action, unsigned int len)
{
- union register_pair r1 = { .even = func, .odd = timeout, };
- union register_pair r3 = { .even = action, .odd = len, };
- int err;
-
diag_stat_inc(DIAG_STAT_X288);
-
- err = -EINVAL;
- asm volatile(
- " diag %[r1],%[r3],0x288\n"
- "0: la %[err],0\n"
- "1:\n"
- EX_TABLE(0b, 1b)
- : [err] "+d" (err)
- : [r1] "d" (r1.pair), [r3] "d" (r3.pair)
- : "cc", "memory");
- return err;
+ return __diag288(func, timeout, action, len);
}
static int diag288_str(unsigned int func, unsigned int timeout, char *cmd)
@@ -189,8 +162,6 @@ static struct watchdog_device wdt_dev = {
static int __init diag288_init(void)
{
- int ret;
-
watchdog_set_nowayout(&wdt_dev, nowayout_info);
if (machine_is_vm()) {
@@ -199,24 +170,6 @@ static int __init diag288_init(void)
pr_err("The watchdog cannot be initialized\n");
return -ENOMEM;
}
-
- ret = diag288_str(WDT_FUNC_INIT, MIN_INTERVAL, "BEGIN");
- if (ret != 0) {
- pr_err("The watchdog cannot be initialized\n");
- kfree(cmd_buf);
- return -EINVAL;
- }
- } else {
- if (diag288(WDT_FUNC_INIT, WDT_DEFAULT_TIMEOUT,
- LPARWDT_RESTART, 0)) {
- pr_err("The watchdog cannot be initialized\n");
- return -EINVAL;
- }
- }
-
- if (diag288(WDT_FUNC_CANCEL, 0, 0, 0)) {
- pr_err("The watchdog cannot be deactivated\n");
- return -EINVAL;
}
return watchdog_register_device(&wdt_dev);
@@ -228,5 +181,5 @@ static void __exit diag288_exit(void)
kfree(cmd_buf);
}
-module_init(diag288_init);
+module_cpu_feature_match(S390_CPU_FEATURE_D288, diag288_init);
module_exit(diag288_exit);
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 8c852807ba1c..2de37dcd7556 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -704,15 +704,18 @@ static int __init balloon_add_regions(void)
/*
* Extra regions are accounted for in the physmap, but need
- * decreasing from current_pages to balloon down the initial
- * allocation, because they are already accounted for in
- * total_pages.
+ * decreasing from current_pages and target_pages to balloon
+ * down the initial allocation, because they are already
+ * accounted for in total_pages.
*/
- if (extra_pfn_end - start_pfn >= balloon_stats.current_pages) {
+ pages = extra_pfn_end - start_pfn;
+ if (pages >= balloon_stats.current_pages ||
+ pages >= balloon_stats.target_pages) {
WARN(1, "Extra pages underflow current target");
return -ERANGE;
}
- balloon_stats.current_pages -= extra_pfn_end - start_pfn;
+ balloon_stats.current_pages -= pages;
+ balloon_stats.target_pages -= pages;
}
return 0;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index ef56a2500ed6..da1a7d3d377c 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -426,4 +426,5 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
.alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
.max_mapping_size = swiotlb_max_mapping_size,
+ .map_resource = dma_direct_map_resource,
};
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 32619d146cbc..1286d96a29bc 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -164,4 +164,5 @@ const struct address_space_operations v9fs_addr_operations = {
.invalidate_folio = netfs_invalidate_folio,
.direct_IO = noop_direct_IO,
.writepages = netfs_writepages,
+ .migrate_folio = filemap_migrate_folio,
};
diff --git a/fs/Kconfig b/fs/Kconfig
index 5b4847bd2fbb..44b6cdd36dc1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -335,6 +335,7 @@ source "fs/omfs/Kconfig"
source "fs/hpfs/Kconfig"
source "fs/qnx4/Kconfig"
source "fs/qnx6/Kconfig"
+source "fs/resctrl/Kconfig"
source "fs/romfs/Kconfig"
source "fs/pstore/Kconfig"
source "fs/ufs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 77fd7f7b5d02..79c08b914c47 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -128,3 +128,4 @@ obj-$(CONFIG_EROFS_FS) += erofs/
obj-$(CONFIG_VBOXSF_FS) += vboxsf/
obj-$(CONFIG_ZONEFS_FS) += zonefs/
obj-$(CONFIG_BPF_LSM) += bpf_fs_kfuncs.o
+obj-$(CONFIG_RESCTRL_FS) += resctrl/
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 9e7b1fe82c27..bfb69e066672 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -943,7 +943,7 @@ static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry)
}
strcpy(p, name);
- ret = lookup_one_len(buf, dentry->d_parent, len);
+ ret = lookup_noperm(&QSTR(buf), dentry->d_parent);
if (IS_ERR(ret) || d_is_positive(ret))
goto out_s;
dput(ret);
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index a1e581946b93..0b80eb93fa40 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -113,16 +113,14 @@ int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
sdentry = NULL;
do {
- int slen;
-
dput(sdentry);
sillycounter++;
/* Create a silly name. Note that the ".__afs" prefix is
* understood by the salvager and must not be changed.
*/
- slen = scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
- sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+ scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
+ sdentry = lookup_noperm(&QSTR(silly), dentry->d_parent);
/* N.B. Better to return EBUSY here ... it could be dangerous
* to delete the file while it's in use.
diff --git a/fs/aio.c b/fs/aio.c
index 7b976b564cfc..793b7b15ec4b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1511,6 +1511,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb, int rw_type)
{
int ret;
+ req->ki_write_stream = 0;
req->ki_complete = aio_complete_rw;
req->private = NULL;
req->ki_pos = iocb->aio_offset;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 583ac81669c2..e51e7d88980a 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,10 +24,51 @@
#include <linux/uaccess.h>
+#include "internal.h"
+
static struct vfsmount *anon_inode_mnt __ro_after_init;
static struct inode *anon_inode_inode __ro_after_init;
/*
+ * User space expects anonymous inodes to have no file type in st_mode.
+ *
+ * In particular, 'lsof' has this legacy logic:
+ *
+ * type = s->st_mode & S_IFMT;
+ * switch (type) {
+ * ...
+ * case 0:
+ * if (!strcmp(p, "anon_inode"))
+ * Lf->ntype = Ntype = N_ANON_INODE;
+ *
+ * to detect our old anon_inode logic.
+ *
+ * Rather than mess with our internal sane inode data, just fix it
+ * up here in getattr() by masking off the format bits.
+ */
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
+ stat->mode &= ~S_IFMT;
+ return 0;
+}
+
+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr)
+{
+ return -EOPNOTSUPP;
+}
+
+static const struct inode_operations anon_inode_operations = {
+ .getattr = anon_inode_getattr,
+ .setattr = anon_inode_setattr,
+};
+
+/*
* anon_inodefs_dname() is called from d_path().
*/
static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
@@ -45,6 +86,8 @@ static int anon_inodefs_init_fs_context(struct fs_context *fc)
struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC);
if (!ctx)
return -ENOMEM;
+ fc->s_iflags |= SB_I_NOEXEC;
+ fc->s_iflags |= SB_I_NODEV;
ctx->dops = &anon_inodefs_dentry_operations;
return 0;
}
@@ -66,6 +109,7 @@ static struct inode *anon_inode_make_secure_inode(
if (IS_ERR(inode))
return inode;
inode->i_flags &= ~S_PRIVATE;
+ inode->i_op = &anon_inode_operations;
error = security_inode_init_security_anon(inode, &QSTR(name),
context_inode);
if (error) {
@@ -313,6 +357,7 @@ static int __init anon_inode_init(void)
anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
if (IS_ERR(anon_inode_inode))
panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
+ anon_inode_inode->i_op = &anon_inode_operations;
return 0;
}
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index c5a6aae12d2c..d8dd150cbd74 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -459,7 +459,8 @@ static int autofs_dev_ioctl_timeout(struct file *fp,
"the parent autofs mount timeout which could "
"prevent shutdown\n");
- dentry = try_lookup_one_len(param->path, base, path_len);
+ dentry = try_lookup_noperm(&QSTR_LEN(param->path, path_len),
+ base);
if (IS_ERR_OR_NULL(dentry))
return dentry ? PTR_ERR(dentry) : -ENOENT;
ino = autofs_dentry_ino(dentry);
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
index 07709b0d7688..8cb2b9d5da96 100644
--- a/fs/bcachefs/Kconfig
+++ b/fs/bcachefs/Kconfig
@@ -103,6 +103,14 @@ config BCACHEFS_PATH_TRACEPOINTS
Enable extra tracepoints for debugging btree_path operations; we don't
normally want these enabled because they happen at very high rates.
+config BCACHEFS_TRANS_KMALLOC_TRACE
+ bool "Trace bch2_trans_kmalloc() calls"
+ depends on BCACHEFS_FS
+
+config BCACHEFS_ASYNC_OBJECT_LISTS
+ bool "Keep async objects on fast_lists for debugfs visibility"
+ depends on BCACHEFS_FS && DEBUG_FS
+
config MEAN_AND_VARIANCE_UNIT_TEST
tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS
depends on KUNIT
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index 9af65079374f..93c8ee5425c8 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -35,11 +35,13 @@ bcachefs-y := \
disk_accounting.o \
disk_groups.o \
ec.o \
+ enumerated_ref.o \
errcode.o \
error.o \
extents.o \
extent_update.o \
eytzinger.o \
+ fast_list.o \
fs.o \
fs-ioctl.o \
fs-io.o \
@@ -97,6 +99,8 @@ bcachefs-y := \
varint.o \
xattr.o
+bcachefs-$(CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS) += async_objs.o
+
obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST) += mean_and_variance_test.o
# Silence "note: xyz changed in GCC X.X" messages
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 94ea9e49aec4..173e81c2bbcb 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -17,6 +17,7 @@
#include "debug.h"
#include "disk_accounting.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "lru.h"
#include "recovery.h"
@@ -308,7 +309,8 @@ int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k,
"data type inconsistency");
bkey_fsck_err_on(!a.io_time[READ] &&
- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
+ !(c->recovery.passes_to_run &
+ BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs)),
c, alloc_key_cached_but_read_time_zero,
"cached bucket with read_time == 0");
break;
@@ -478,12 +480,27 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans,
enum btree_iter_update_trigger_flags flags)
{
struct btree_iter iter;
- struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update_noupdate(trans, &iter, pos);
- int ret = PTR_ERR_OR_ZERO(a);
- if (ret)
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, pos,
+ BTREE_ITER_with_updates|
+ BTREE_ITER_cached|
+ BTREE_ITER_intent);
+ int ret = bkey_err(k);
+ if (unlikely(ret))
return ERR_PTR(ret);
- ret = bch2_trans_update(trans, &iter, &a->k_i, flags);
+ if ((void *) k.v >= trans->mem &&
+ (void *) k.v < trans->mem + trans->mem_top) {
+ bch2_trans_iter_exit(trans, &iter);
+ return container_of(bkey_s_c_to_alloc_v4(k).v, struct bkey_i_alloc_v4, v);
+ }
+
+ struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut_inlined(trans, k);
+ if (IS_ERR(a)) {
+ bch2_trans_iter_exit(trans, &iter);
+ return a;
+ }
+
+ ret = bch2_trans_update_ip(trans, &iter, &a->k_i, flags, _RET_IP_);
bch2_trans_iter_exit(trans, &iter);
return unlikely(ret) ? ERR_PTR(ret) : a;
}
@@ -913,15 +930,6 @@ int bch2_trigger_alloc(struct btree_trans *trans,
goto err;
}
- if ((flags & BTREE_TRIGGER_bucket_invalidate) &&
- old_a->cached_sectors) {
- ret = bch2_mod_dev_cached_sectors(trans, ca->dev_idx,
- -((s64) old_a->cached_sectors),
- flags & BTREE_TRIGGER_gc);
- if (ret)
- goto err;
- }
-
ret = bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags);
if (ret)
goto err;
@@ -1381,7 +1389,7 @@ static void check_discard_freespace_key_work(struct work_struct *work)
container_of(work, struct check_discard_freespace_key_async, work);
bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos));
- bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key);
+ enumerated_ref_put(&w->c->writes, BCH_WRITE_REF_check_discard_freespace_key);
kfree(w);
}
@@ -1458,7 +1466,7 @@ delete:
if (!w)
goto out;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)) {
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_check_discard_freespace_key)) {
kfree(w);
goto out;
}
@@ -1467,6 +1475,8 @@ delete:
w->c = c;
w->pos = BBPOS(iter->btree_id, iter->pos);
queue_work(c->write_ref_wq, &w->work);
+
+ ret = 1; /* don't allocate from this bucket */
goto out;
}
}
@@ -1806,19 +1816,6 @@ struct discard_buckets_state {
u64 discarded;
};
-/*
- * This is needed because discard is both a filesystem option and a device
- * option, and mount options are supposed to apply to that mount and not be
- * persisted, i.e. if it's set as a mount option we can't propagate it to the
- * device.
- */
-static inline bool discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca)
-{
- return test_bit(BCH_FS_discard_mount_opt_set, &c->flags)
- ? c->opts.discard
- : ca->mi.discard;
-}
-
static int bch2_discard_one_bucket(struct btree_trans *trans,
struct bch_dev *ca,
struct btree_iter *need_discard_iter,
@@ -1882,7 +1879,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
s->discarded++;
*discard_pos_done = iter.pos;
- if (discard_opt_enabled(c, ca) && !c->opts.nochanges) {
+ if (bch2_discard_opt_enabled(c, ca) && !c->opts.nochanges) {
/*
* This works without any other locks because this is the only
* thread that removes items from the need_discard tree
@@ -1952,26 +1949,26 @@ static void bch2_do_discards_work(struct work_struct *work)
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));
- percpu_ref_put(&ca->io_ref[WRITE]);
- bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_dev_do_discards);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard);
}
void bch2_dev_do_discards(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_discard))
return;
- if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_dev_do_discards))
goto put_write_ref;
if (queue_work(c->write_ref_wq, &ca->discard_work))
return;
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_dev_do_discards);
put_write_ref:
- bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard);
}
void bch2_do_discards(struct bch_fs *c)
@@ -2047,8 +2044,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
bch2_trans_put(trans);
- percpu_ref_put(&ca->io_ref[WRITE]);
- bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_discard_one_bucket_fast);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard_fast);
}
static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
@@ -2058,18 +2055,18 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
if (discard_in_flight_add(ca, bucket, false))
return;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_discard_fast))
return;
- if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_discard_one_bucket_fast))
goto put_ref;
if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
return;
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_discard_one_bucket_fast);
put_ref:
- bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard_fast);
}
static int invalidate_one_bp(struct btree_trans *trans,
@@ -2180,8 +2177,11 @@ static int invalidate_one_bucket(struct btree_trans *trans,
BUG_ON(a->data_type != BCH_DATA_cached);
BUG_ON(a->dirty_sectors);
- if (!a->cached_sectors)
- bch_err(c, "invalidating empty bucket, confused");
+ if (!a->cached_sectors) {
+ bch2_check_bucket_backpointer_mismatch(trans, ca, bucket.offset,
+ true, last_flushed);
+ goto out;
+ }
unsigned cached_sectors = a->cached_sectors;
u8 gen = a->gen;
@@ -2261,27 +2261,27 @@ restart_err:
bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_put(trans);
- percpu_ref_put(&ca->io_ref[WRITE]);
bch2_bkey_buf_exit(&last_flushed, c);
- bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_do_invalidates);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate);
}
void bch2_dev_do_invalidates(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_invalidate))
return;
- if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_do_invalidates))
goto put_ref;
if (queue_work(c->write_ref_wq, &ca->invalidate_work))
return;
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_do_invalidates);
put_ref:
- bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate);
}
void bch2_do_invalidates(struct bch_fs *c)
@@ -2392,14 +2392,16 @@ bkey_err:
int bch2_fs_freespace_init(struct bch_fs *c)
{
- int ret = 0;
- bool doing_init = false;
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image))
+ return 0;
+
/*
* We can crash during the device add path, so we need to check this on
* every mount:
*/
+ bool doing_init = false;
for_each_member_device(c, ca) {
if (ca->mi.freespace_initialized)
continue;
@@ -2409,7 +2411,7 @@ int bch2_fs_freespace_init(struct bch_fs *c)
doing_init = true;
}
- ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
+ int ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
if (ret) {
bch2_dev_put(ca);
bch_err_fn(c, ret);
@@ -2439,8 +2441,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
* We clear the LRU and need_discard btrees first so that we don't race
* with bch2_do_invalidates() and bch2_do_discards()
*/
- ret = bch2_dev_remove_stripes(c, ca->dev_idx) ?:
- bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
+ ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
BTREE_TRIGGER_norun, NULL) ?:
bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
BTREE_TRIGGER_norun, NULL) ?:
@@ -2503,15 +2504,15 @@ void bch2_recalc_capacity(struct bch_fs *c)
lockdep_assert_held(&c->state_lock);
- for_each_online_member(c, ca) {
- struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_disk->bdi;
-
- ra_pages += bdi->ra_pages;
- }
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, NULL) {
+ struct block_device *bdev = READ_ONCE(ca->disk_sb.bdev);
+ if (bdev)
+ ra_pages += bdev->bd_disk->bdi->ra_pages;
- bch2_set_ra_pages(c, ra_pages);
+ if (ca->mi.state != BCH_MEMBER_STATE_rw)
+ continue;
- __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
u64 dev_reserve = 0;
/*
@@ -2548,6 +2549,9 @@ void bch2_recalc_capacity(struct bch_fs *c)
bucket_size_max = max_t(unsigned, bucket_size_max,
ca->mi.bucket_size);
}
+ rcu_read_unlock();
+
+ bch2_set_ra_pages(c, ra_pages);
gc_reserve = c->opts.gc_reserve_bytes
? c->opts.gc_reserve_bytes >> 9
@@ -2570,27 +2574,41 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *c)
{
u64 ret = U64_MAX;
- for_each_rw_member(c, ca)
+ rcu_read_lock();
+ for_each_rw_member_rcu(c, ca)
ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
+ rcu_read_unlock();
return ret;
}
static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
{
struct open_bucket *ob;
- bool ret = false;
for (ob = c->open_buckets;
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
ob++) {
- spin_lock(&ob->lock);
- if (ob->valid && !ob->on_partial_list &&
- ob->dev == ca->dev_idx)
- ret = true;
- spin_unlock(&ob->lock);
+ scoped_guard(spinlock, &ob->lock) {
+ if (ob->valid && !ob->on_partial_list &&
+ ob->dev == ca->dev_idx)
+ return true;
+ }
}
- return ret;
+ return false;
+}
+
+void bch2_dev_allocator_set_rw(struct bch_fs *c, struct bch_dev *ca, bool rw)
+{
+ /* BCH_DATA_free == all rw devs */
+
+ for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
+ if (rw &&
+ (i == BCH_DATA_free ||
+ (ca->mi.data_allowed & BIT(i))))
+ set_bit(ca->dev_idx, c->rw_devs[i].d);
+ else
+ clear_bit(ca->dev_idx, c->rw_devs[i].d);
}
/* device goes ro: */
@@ -2599,9 +2617,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
lockdep_assert_held(&c->state_lock);
/* First, remove device from allocation groups: */
-
- for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
- clear_bit(ca->dev_idx, c->rw_devs[i].d);
+ bch2_dev_allocator_set_rw(c, ca, false);
c->rw_devs_change_count++;
@@ -2635,10 +2651,7 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
{
lockdep_assert_held(&c->state_lock);
- for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
- if (ca->mi.data_allowed & (1 << i))
- set_bit(ca->dev_idx, c->rw_devs[i].d);
-
+ bch2_dev_allocator_set_rw(c, ca, true);
c->rw_devs_change_count++;
}
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index 34b3d6ac4fbb..4f94c6a661bf 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -350,6 +350,7 @@ int bch2_dev_remove_alloc(struct bch_fs *, struct bch_dev *);
void bch2_recalc_capacity(struct bch_fs *);
u64 bch2_min_rw_member_capacity(struct bch_fs *);
+void bch2_dev_allocator_set_rw(struct bch_fs *, struct bch_dev *, bool);
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 7ec022e9361a..1a52c12c51ae 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -154,7 +154,7 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
static inline bool is_superblock_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b)
{
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_trans_mark_dev_sbs)
+ if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_trans_mark_dev_sbs))
return false;
return bch2_is_superblock_bucket(ca, b);
@@ -180,11 +180,11 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
}
static inline bool may_alloc_bucket(struct bch_fs *c,
- struct bpos bucket,
- struct bucket_alloc_state *s)
+ struct alloc_request *req,
+ struct bpos bucket)
{
if (bch2_bucket_is_open(c, bucket.inode, bucket.offset)) {
- s->skipped_open++;
+ req->counters.skipped_open++;
return false;
}
@@ -193,36 +193,37 @@ static inline bool may_alloc_bucket(struct bch_fs *c,
bucket.inode, bucket.offset);
if (journal_seq_ready > c->journal.flushed_seq_ondisk) {
if (journal_seq_ready > c->journal.flushing_seq)
- s->need_journal_commit++;
- s->skipped_need_journal_commit++;
+ req->counters.need_journal_commit++;
+ req->counters.skipped_need_journal_commit++;
return false;
}
if (bch2_bucket_nocow_is_locked(&c->nocow_locks, bucket)) {
- s->skipped_nocow++;
+ req->counters.skipped_nocow++;
return false;
}
return true;
}
-static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
+static struct open_bucket *__try_alloc_bucket(struct bch_fs *c,
+ struct alloc_request *req,
u64 bucket, u8 gen,
- enum bch_watermark watermark,
- struct bucket_alloc_state *s,
struct closure *cl)
{
+ struct bch_dev *ca = req->ca;
+
if (unlikely(is_superblock_bucket(c, ca, bucket)))
return NULL;
if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) {
- s->skipped_nouse++;
+ req->counters.skipped_nouse++;
return NULL;
}
spin_lock(&c->freelist_lock);
- if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) {
+ if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) {
if (cl)
closure_wait(&c->open_buckets_wait, cl);
@@ -234,7 +235,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
/* Recheck under lock: */
if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) {
spin_unlock(&c->freelist_lock);
- s->skipped_open++;
+ req->counters.skipped_open++;
return NULL;
}
@@ -258,16 +259,15 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
return ob;
}
-static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca,
- enum bch_watermark watermark,
- struct bucket_alloc_state *s,
+static struct open_bucket *try_alloc_bucket(struct btree_trans *trans,
+ struct alloc_request *req,
struct btree_iter *freespace_iter,
struct closure *cl)
{
struct bch_fs *c = trans->c;
u64 b = freespace_iter->pos.offset & ~(~0ULL << 56);
- if (!may_alloc_bucket(c, POS(ca->dev_idx, b), s))
+ if (!may_alloc_bucket(c, req, POS(req->ca->dev_idx, b)))
return NULL;
u8 gen;
@@ -277,7 +277,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
if (ret)
return NULL;
- return __try_alloc_bucket(c, ca, b, gen, watermark, s, cl);
+ return __try_alloc_bucket(c, req, b, gen, cl);
}
/*
@@ -285,17 +285,16 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
*/
static noinline struct open_bucket *
bch2_bucket_alloc_early(struct btree_trans *trans,
- struct bch_dev *ca,
- enum bch_watermark watermark,
- struct bucket_alloc_state *s,
+ struct alloc_request *req,
struct closure *cl)
{
struct bch_fs *c = trans->c;
+ struct bch_dev *ca = req->ca;
struct btree_iter iter, citer;
struct bkey_s_c k, ck;
struct open_bucket *ob = NULL;
u64 first_bucket = ca->mi.first_bucket;
- u64 *dev_alloc_cursor = &ca->alloc_cursor[s->btree_bitmap];
+ u64 *dev_alloc_cursor = &ca->alloc_cursor[req->btree_bitmap];
u64 alloc_start = max(first_bucket, *dev_alloc_cursor);
u64 alloc_cursor = alloc_start;
int ret;
@@ -317,10 +316,10 @@ again:
if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)))
break;
- if (s->btree_bitmap != BTREE_BITMAP_ANY &&
- s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca,
+ if (req->btree_bitmap != BTREE_BITMAP_ANY &&
+ req->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca,
bucket_to_sector(ca, bucket), ca->mi.bucket_size)) {
- if (s->btree_bitmap == BTREE_BITMAP_YES &&
+ if (req->btree_bitmap == BTREE_BITMAP_YES &&
bucket_to_sector(ca, bucket) > 64ULL << ca->mi.btree_bitmap_shift)
break;
@@ -328,8 +327,8 @@ again:
round_up(bucket_to_sector(ca, bucket) + 1,
1ULL << ca->mi.btree_bitmap_shift));
bch2_btree_iter_set_pos(trans, &iter, POS(ca->dev_idx, bucket));
- s->buckets_seen++;
- s->skipped_mi_btree_bitmap++;
+ req->counters.buckets_seen++;
+ req->counters.skipped_mi_btree_bitmap++;
continue;
}
@@ -348,11 +347,10 @@ again:
if (a->data_type != BCH_DATA_free)
goto next;
- s->buckets_seen++;
+ req->counters.buckets_seen++;
- ob = may_alloc_bucket(c, k.k->p, s)
- ? __try_alloc_bucket(c, ca, k.k->p.offset, a->gen,
- watermark, s, cl)
+ ob = may_alloc_bucket(c, req, k.k->p)
+ ? __try_alloc_bucket(c, req, k.k->p.offset, a->gen, cl)
: NULL;
next:
bch2_set_btree_iter_dontneed(trans, &citer);
@@ -378,15 +376,14 @@ next:
}
static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
- struct bch_dev *ca,
- enum bch_watermark watermark,
- struct bucket_alloc_state *s,
- struct closure *cl)
+ struct alloc_request *req,
+ struct closure *cl)
{
+ struct bch_dev *ca = req->ca;
struct btree_iter iter;
struct bkey_s_c k;
struct open_bucket *ob = NULL;
- u64 *dev_alloc_cursor = &ca->alloc_cursor[s->btree_bitmap];
+ u64 *dev_alloc_cursor = &ca->alloc_cursor[req->btree_bitmap];
u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(*dev_alloc_cursor));
u64 alloc_cursor = alloc_start;
int ret;
@@ -402,13 +399,13 @@ again:
iter.k.size = iter.k.p.offset - iter.pos.offset;
while (iter.k.size) {
- s->buckets_seen++;
+ req->counters.buckets_seen++;
u64 bucket = iter.pos.offset & ~(~0ULL << 56);
- if (s->btree_bitmap != BTREE_BITMAP_ANY &&
- s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca,
+ if (req->btree_bitmap != BTREE_BITMAP_ANY &&
+ req->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca,
bucket_to_sector(ca, bucket), ca->mi.bucket_size)) {
- if (s->btree_bitmap == BTREE_BITMAP_YES &&
+ if (req->btree_bitmap == BTREE_BITMAP_YES &&
bucket_to_sector(ca, bucket) > 64ULL << ca->mi.btree_bitmap_shift)
goto fail;
@@ -418,11 +415,11 @@ again:
alloc_cursor = bucket|(iter.pos.offset & (~0ULL << 56));
bch2_btree_iter_set_pos(trans, &iter, POS(ca->dev_idx, alloc_cursor));
- s->skipped_mi_btree_bitmap++;
+ req->counters.skipped_mi_btree_bitmap++;
goto next;
}
- ob = try_alloc_bucket(trans, ca, watermark, s, &iter, cl);
+ ob = try_alloc_bucket(trans, req, &iter, cl);
if (ob) {
if (!IS_ERR(ob))
*dev_alloc_cursor = iter.pos.offset;
@@ -453,33 +450,30 @@ fail:
return ob;
}
-static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca,
- enum bch_watermark watermark,
- enum bch_data_type data_type,
+static noinline void trace_bucket_alloc2(struct bch_fs *c,
+ struct alloc_request *req,
struct closure *cl,
- struct bch_dev_usage *usage,
- struct bucket_alloc_state *s,
struct open_bucket *ob)
{
struct printbuf buf = PRINTBUF;
printbuf_tabstop_push(&buf, 24);
- prt_printf(&buf, "dev\t%s (%u)\n", ca->name, ca->dev_idx);
- prt_printf(&buf, "watermark\t%s\n", bch2_watermarks[watermark]);
- prt_printf(&buf, "data type\t%s\n", __bch2_data_types[data_type]);
+ prt_printf(&buf, "dev\t%s (%u)\n", req->ca->name, req->ca->dev_idx);
+ prt_printf(&buf, "watermark\t%s\n", bch2_watermarks[req->watermark]);
+ prt_printf(&buf, "data type\t%s\n", __bch2_data_types[req->data_type]);
prt_printf(&buf, "blocking\t%u\n", cl != NULL);
- prt_printf(&buf, "free\t%llu\n", usage->buckets[BCH_DATA_free]);
- prt_printf(&buf, "avail\t%llu\n", dev_buckets_free(ca, *usage, watermark));
- prt_printf(&buf, "copygc_wait\t%lu/%lli\n",
+ prt_printf(&buf, "free\t%llu\n", req->usage.buckets[BCH_DATA_free]);
+ prt_printf(&buf, "avail\t%llu\n", dev_buckets_free(req->ca, req->usage, req->watermark));
+ prt_printf(&buf, "copygc_wait\t%llu/%lli\n",
bch2_copygc_wait_amount(c),
c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now));
- prt_printf(&buf, "seen\t%llu\n", s->buckets_seen);
- prt_printf(&buf, "open\t%llu\n", s->skipped_open);
- prt_printf(&buf, "need journal commit\t%llu\n", s->skipped_need_journal_commit);
- prt_printf(&buf, "nocow\t%llu\n", s->skipped_nocow);
- prt_printf(&buf, "nouse\t%llu\n", s->skipped_nouse);
- prt_printf(&buf, "mi_btree_bitmap\t%llu\n", s->skipped_mi_btree_bitmap);
+ prt_printf(&buf, "seen\t%llu\n", req->counters.buckets_seen);
+ prt_printf(&buf, "open\t%llu\n", req->counters.skipped_open);
+ prt_printf(&buf, "need journal commit\t%llu\n", req->counters.skipped_need_journal_commit);
+ prt_printf(&buf, "nocow\t%llu\n", req->counters.skipped_nocow);
+ prt_printf(&buf, "nouse\t%llu\n", req->counters.skipped_nouse);
+ prt_printf(&buf, "mi_btree_bitmap\t%llu\n", req->counters.skipped_mi_btree_bitmap);
if (!IS_ERR(ob)) {
prt_printf(&buf, "allocated\t%llu\n", ob->bucket);
@@ -495,47 +489,42 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca,
/**
* bch2_bucket_alloc_trans - allocate a single bucket from a specific device
* @trans: transaction object
- * @ca: device to allocate from
- * @watermark: how important is this allocation?
- * @data_type: BCH_DATA_journal, btree, user...
+ * @req: state for the entire allocation
* @cl: if not NULL, closure to be used to wait if buckets not available
* @nowait: if true, do not wait for buckets to become available
- * @usage: for secondarily also returning the current device usage
*
* Returns: an open_bucket on success, or an ERR_PTR() on failure.
*/
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
- struct bch_dev *ca,
- enum bch_watermark watermark,
- enum bch_data_type data_type,
- struct closure *cl,
- bool nowait,
- struct bch_dev_usage *usage)
+ struct alloc_request *req,
+ struct closure *cl,
+ bool nowait)
{
struct bch_fs *c = trans->c;
+ struct bch_dev *ca = req->ca;
struct open_bucket *ob = NULL;
bool freespace = READ_ONCE(ca->mi.freespace_initialized);
u64 avail;
- struct bucket_alloc_state s = {
- .btree_bitmap = data_type == BCH_DATA_btree,
- };
bool waiting = nowait;
+
+ req->btree_bitmap = req->data_type == BCH_DATA_btree;
+ memset(&req->counters, 0, sizeof(req->counters));
again:
- bch2_dev_usage_read_fast(ca, usage);
- avail = dev_buckets_free(ca, *usage, watermark);
+ bch2_dev_usage_read_fast(ca, &req->usage);
+ avail = dev_buckets_free(ca, req->usage, req->watermark);
- if (usage->buckets[BCH_DATA_need_discard] > avail)
+ if (req->usage.buckets[BCH_DATA_need_discard] > avail)
bch2_dev_do_discards(ca);
- if (usage->buckets[BCH_DATA_need_gc_gens] > avail)
+ if (req->usage.buckets[BCH_DATA_need_gc_gens] > avail)
bch2_gc_gens_async(c);
- if (should_invalidate_buckets(ca, *usage))
+ if (should_invalidate_buckets(ca, req->usage))
bch2_dev_do_invalidates(ca);
if (!avail) {
- if (watermark > BCH_WATERMARK_normal &&
- c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations)
+ if (req->watermark > BCH_WATERMARK_normal &&
+ c->recovery.pass_done < BCH_RECOVERY_PASS_check_allocations)
goto alloc;
if (cl && !waiting) {
@@ -554,18 +543,18 @@ again:
closure_wake_up(&c->freelist_wait);
alloc:
ob = likely(freespace)
- ? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl)
- : bch2_bucket_alloc_early(trans, ca, watermark, &s, cl);
+ ? bch2_bucket_alloc_freelist(trans, req, cl)
+ : bch2_bucket_alloc_early(trans, req, cl);
- if (s.need_journal_commit * 2 > avail)
+ if (req->counters.need_journal_commit * 2 > avail)
bch2_journal_flush_async(&c->journal, NULL);
- if (!ob && s.btree_bitmap != BTREE_BITMAP_ANY) {
- s.btree_bitmap = BTREE_BITMAP_ANY;
+ if (!ob && req->btree_bitmap != BTREE_BITMAP_ANY) {
+ req->btree_bitmap = BTREE_BITMAP_ANY;
goto alloc;
}
- if (!ob && freespace && c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) {
+ if (!ob && freespace && c->recovery.pass_done < BCH_RECOVERY_PASS_check_alloc_info) {
freespace = false;
goto alloc;
}
@@ -574,7 +563,7 @@ err:
ob = ERR_PTR(-BCH_ERR_no_buckets_found);
if (!IS_ERR(ob))
- ob->data_type = data_type;
+ ob->data_type = req->data_type;
if (!IS_ERR(ob))
count_event(c, bucket_alloc);
@@ -584,7 +573,7 @@ err:
if (!IS_ERR(ob)
? trace_bucket_alloc_enabled()
: trace_bucket_alloc_fail_enabled())
- trace_bucket_alloc2(c, ca, watermark, data_type, cl, usage, &s, ob);
+ trace_bucket_alloc2(c, req, cl, ob);
return ob;
}
@@ -594,12 +583,15 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
enum bch_data_type data_type,
struct closure *cl)
{
- struct bch_dev_usage usage;
struct open_bucket *ob;
+ struct alloc_request req = {
+ .watermark = watermark,
+ .data_type = data_type,
+ .ca = ca,
+ };
bch2_trans_do(c,
- PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark,
- data_type, cl, false, &usage)));
+ PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req, cl, false)));
return ob;
}
@@ -693,24 +685,20 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
}
static int add_new_bucket(struct bch_fs *c,
- struct open_buckets *ptrs,
- struct bch_devs_mask *devs_may_alloc,
- unsigned nr_replicas,
- unsigned *nr_effective,
- bool *have_cache,
- struct open_bucket *ob)
+ struct alloc_request *req,
+ struct open_bucket *ob)
{
unsigned durability = ob_dev(c, ob)->mi.durability;
- BUG_ON(*nr_effective >= nr_replicas);
+ BUG_ON(req->nr_effective >= req->nr_replicas);
- __clear_bit(ob->dev, devs_may_alloc->d);
- *nr_effective += durability;
- *have_cache |= !durability;
+ __clear_bit(ob->dev, req->devs_may_alloc.d);
+ req->nr_effective += durability;
+ req->have_cache |= !durability;
- ob_push(c, ptrs, ob);
+ ob_push(c, &req->ptrs, ob);
- if (*nr_effective >= nr_replicas)
+ if (req->nr_effective >= req->nr_replicas)
return 1;
if (ob->ec)
return 1;
@@ -718,39 +706,31 @@ static int add_new_bucket(struct bch_fs *c,
}
int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
- struct open_buckets *ptrs,
- struct dev_stripe_state *stripe,
- struct bch_devs_mask *devs_may_alloc,
- unsigned nr_replicas,
- unsigned *nr_effective,
- bool *have_cache,
- enum bch_write_flags flags,
- enum bch_data_type data_type,
- enum bch_watermark watermark,
- struct closure *cl)
+ struct alloc_request *req,
+ struct dev_stripe_state *stripe,
+ struct closure *cl)
{
struct bch_fs *c = trans->c;
int ret = -BCH_ERR_insufficient_devices;
- BUG_ON(*nr_effective >= nr_replicas);
+ BUG_ON(req->nr_effective >= req->nr_replicas);
- struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc);
+ struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, &req->devs_may_alloc);
darray_for_each(devs_sorted, i) {
- struct bch_dev *ca = bch2_dev_tryget_noerror(c, *i);
- if (!ca)
+ req->ca = bch2_dev_tryget_noerror(c, *i);
+ if (!req->ca)
continue;
- if (!ca->mi.durability && *have_cache) {
- bch2_dev_put(ca);
+ if (!req->ca->mi.durability && req->have_cache) {
+ bch2_dev_put(req->ca);
continue;
}
- struct bch_dev_usage usage;
- struct open_bucket *ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type,
- cl, flags & BCH_WRITE_alloc_nowait, &usage);
+ struct open_bucket *ob = bch2_bucket_alloc_trans(trans, req, cl,
+ req->flags & BCH_WRITE_alloc_nowait);
if (!IS_ERR(ob))
- bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
- bch2_dev_put(ca);
+ bch2_dev_stripe_increment_inlined(req->ca, stripe, &req->usage);
+ bch2_dev_put(req->ca);
if (IS_ERR(ob)) {
ret = PTR_ERR(ob);
@@ -759,9 +739,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
continue;
}
- if (add_new_bucket(c, ptrs, devs_may_alloc,
- nr_replicas, nr_effective,
- have_cache, ob)) {
+ if (add_new_bucket(c, req, ob)) {
ret = 0;
break;
}
@@ -779,34 +757,27 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
*/
static int bucket_alloc_from_stripe(struct btree_trans *trans,
- struct open_buckets *ptrs,
- struct write_point *wp,
- struct bch_devs_mask *devs_may_alloc,
- u16 target,
- unsigned nr_replicas,
- unsigned *nr_effective,
- bool *have_cache,
- enum bch_watermark watermark,
- enum bch_write_flags flags,
- struct closure *cl)
+ struct alloc_request *req,
+ struct closure *cl)
{
struct bch_fs *c = trans->c;
int ret = 0;
- if (nr_replicas < 2)
+ if (req->nr_replicas < 2)
return 0;
- if (ec_open_bucket(c, ptrs))
+ if (ec_open_bucket(c, &req->ptrs))
return 0;
struct ec_stripe_head *h =
- bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl);
+ bch2_ec_stripe_head_get(trans, req, 0, cl);
if (IS_ERR(h))
return PTR_ERR(h);
if (!h)
return 0;
- struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
+ struct dev_alloc_list devs_sorted =
+ bch2_dev_alloc_list(c, &req->wp->stripe, &req->devs_may_alloc);
darray_for_each(devs_sorted, i)
for (unsigned ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
if (!h->s->blocks[ec_idx])
@@ -818,9 +789,7 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
ob->ec = h->s;
ec_stripe_new_get(h->s, STRIPE_REF_io);
- ret = add_new_bucket(c, ptrs, devs_may_alloc,
- nr_replicas, nr_effective,
- have_cache, ob);
+ ret = add_new_bucket(c, req, ob);
goto out;
}
}
@@ -832,65 +801,49 @@ out:
/* Sector allocator */
static bool want_bucket(struct bch_fs *c,
- struct write_point *wp,
- struct bch_devs_mask *devs_may_alloc,
- bool *have_cache, bool ec,
+ struct alloc_request *req,
struct open_bucket *ob)
{
struct bch_dev *ca = ob_dev(c, ob);
- if (!test_bit(ob->dev, devs_may_alloc->d))
+ if (!test_bit(ob->dev, req->devs_may_alloc.d))
return false;
- if (ob->data_type != wp->data_type)
+ if (ob->data_type != req->wp->data_type)
return false;
if (!ca->mi.durability &&
- (wp->data_type == BCH_DATA_btree || ec || *have_cache))
+ (req->wp->data_type == BCH_DATA_btree || req->ec || req->have_cache))
return false;
- if (ec != (ob->ec != NULL))
+ if (req->ec != (ob->ec != NULL))
return false;
return true;
}
static int bucket_alloc_set_writepoint(struct bch_fs *c,
- struct open_buckets *ptrs,
- struct write_point *wp,
- struct bch_devs_mask *devs_may_alloc,
- unsigned nr_replicas,
- unsigned *nr_effective,
- bool *have_cache,
- bool ec)
+ struct alloc_request *req)
{
- struct open_buckets ptrs_skip = { .nr = 0 };
struct open_bucket *ob;
unsigned i;
int ret = 0;
- open_bucket_for_each(c, &wp->ptrs, ob, i) {
- if (!ret && want_bucket(c, wp, devs_may_alloc,
- have_cache, ec, ob))
- ret = add_new_bucket(c, ptrs, devs_may_alloc,
- nr_replicas, nr_effective,
- have_cache, ob);
+ req->scratch_ptrs.nr = 0;
+
+ open_bucket_for_each(c, &req->wp->ptrs, ob, i) {
+ if (!ret && want_bucket(c, req, ob))
+ ret = add_new_bucket(c, req, ob);
else
- ob_push(c, &ptrs_skip, ob);
+ ob_push(c, &req->scratch_ptrs, ob);
}
- wp->ptrs = ptrs_skip;
+ req->wp->ptrs = req->scratch_ptrs;
return ret;
}
static int bucket_alloc_set_partial(struct bch_fs *c,
- struct open_buckets *ptrs,
- struct write_point *wp,
- struct bch_devs_mask *devs_may_alloc,
- unsigned nr_replicas,
- unsigned *nr_effective,
- bool *have_cache, bool ec,
- enum bch_watermark watermark)
+ struct alloc_request *req)
{
int i, ret = 0;
@@ -905,13 +858,12 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
- if (want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) {
+ if (want_bucket(c, req, ob)) {
struct bch_dev *ca = ob_dev(c, ob);
- struct bch_dev_usage usage;
u64 avail;
- bch2_dev_usage_read_fast(ca, &usage);
- avail = dev_buckets_free(ca, usage, watermark) + ca->nr_partial_buckets;
+ bch2_dev_usage_read_fast(ca, &req->usage);
+ avail = dev_buckets_free(ca, req->usage, req->watermark) + ca->nr_partial_buckets;
if (!avail)
continue;
@@ -924,9 +876,7 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
rcu_read_unlock();
- ret = add_new_bucket(c, ptrs, devs_may_alloc,
- nr_replicas, nr_effective,
- have_cache, ob);
+ ret = add_new_bucket(c, req, ob);
if (ret)
break;
}
@@ -937,61 +887,41 @@ unlock:
}
static int __open_bucket_add_buckets(struct btree_trans *trans,
- struct open_buckets *ptrs,
- struct write_point *wp,
- struct bch_devs_list *devs_have,
- u16 target,
- bool erasure_code,
- unsigned nr_replicas,
- unsigned *nr_effective,
- bool *have_cache,
- enum bch_watermark watermark,
- enum bch_write_flags flags,
- struct closure *_cl)
+ struct alloc_request *req,
+ struct closure *_cl)
{
struct bch_fs *c = trans->c;
- struct bch_devs_mask devs;
struct open_bucket *ob;
struct closure *cl = NULL;
unsigned i;
int ret;
- devs = target_rw_devs(c, wp->data_type, target);
+ req->devs_may_alloc = target_rw_devs(c, req->wp->data_type, req->target);
/* Don't allocate from devices we already have pointers to: */
- darray_for_each(*devs_have, i)
- __clear_bit(*i, devs.d);
+ darray_for_each(*req->devs_have, i)
+ __clear_bit(*i, req->devs_may_alloc.d);
- open_bucket_for_each(c, ptrs, ob, i)
- __clear_bit(ob->dev, devs.d);
+ open_bucket_for_each(c, &req->ptrs, ob, i)
+ __clear_bit(ob->dev, req->devs_may_alloc.d);
- ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs,
- nr_replicas, nr_effective,
- have_cache, erasure_code);
+ ret = bucket_alloc_set_writepoint(c, req);
if (ret)
return ret;
- ret = bucket_alloc_set_partial(c, ptrs, wp, &devs,
- nr_replicas, nr_effective,
- have_cache, erasure_code, watermark);
+ ret = bucket_alloc_set_partial(c, req);
if (ret)
return ret;
- if (erasure_code) {
- ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
- target,
- nr_replicas, nr_effective,
- have_cache,
- watermark, flags, _cl);
+ if (req->ec) {
+ ret = bucket_alloc_from_stripe(trans, req, _cl);
} else {
retry_blocking:
/*
* Try nonblocking first, so that if one device is full we'll try from
* other devices:
*/
- ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
- nr_replicas, nr_effective, have_cache,
- flags, wp->data_type, watermark, cl);
+ ret = bch2_bucket_alloc_set_trans(trans, req, &req->wp->stripe, cl);
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
!bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
@@ -1005,38 +935,27 @@ retry_blocking:
}
static int open_bucket_add_buckets(struct btree_trans *trans,
- struct open_buckets *ptrs,
- struct write_point *wp,
- struct bch_devs_list *devs_have,
- u16 target,
- unsigned erasure_code,
- unsigned nr_replicas,
- unsigned *nr_effective,
- bool *have_cache,
- enum bch_watermark watermark,
- enum bch_write_flags flags,
- struct closure *cl)
+ struct alloc_request *req,
+ struct closure *cl)
{
int ret;
- if (erasure_code && !ec_open_bucket(trans->c, ptrs)) {
- ret = __open_bucket_add_buckets(trans, ptrs, wp,
- devs_have, target, erasure_code,
- nr_replicas, nr_effective, have_cache,
- watermark, flags, cl);
+ if (req->ec && !ec_open_bucket(trans->c, &req->ptrs)) {
+ ret = __open_bucket_add_buckets(trans, req, cl);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret, BCH_ERR_operation_blocked) ||
bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
return ret;
- if (*nr_effective >= nr_replicas)
+ if (req->nr_effective >= req->nr_replicas)
return 0;
}
- ret = __open_bucket_add_buckets(trans, ptrs, wp,
- devs_have, target, false,
- nr_replicas, nr_effective, have_cache,
- watermark, flags, cl);
+ bool ec = false;
+ swap(ec, req->ec);
+ ret = __open_bucket_add_buckets(trans, req, cl);
+ swap(ec, req->ec);
+
return ret < 0 ? ret : 0;
}
@@ -1289,26 +1208,26 @@ out:
static noinline void
deallocate_extra_replicas(struct bch_fs *c,
- struct open_buckets *ptrs,
- struct open_buckets *ptrs_no_use,
- unsigned extra_replicas)
+ struct alloc_request *req)
{
- struct open_buckets ptrs2 = { 0 };
struct open_bucket *ob;
+ unsigned extra_replicas = req->nr_effective - req->nr_replicas;
unsigned i;
- open_bucket_for_each(c, ptrs, ob, i) {
+ req->scratch_ptrs.nr = 0;
+
+ open_bucket_for_each(c, &req->ptrs, ob, i) {
unsigned d = ob_dev(c, ob)->mi.durability;
if (d && d <= extra_replicas) {
extra_replicas -= d;
- ob_push(c, ptrs_no_use, ob);
+ ob_push(c, &req->wp->ptrs, ob);
} else {
- ob_push(c, &ptrs2, ob);
+ ob_push(c, &req->scratch_ptrs, ob);
}
}
- *ptrs = ptrs2;
+ req->ptrs = req->scratch_ptrs;
}
/*
@@ -1327,51 +1246,53 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
struct write_point **wp_ret)
{
struct bch_fs *c = trans->c;
- struct write_point *wp;
struct open_bucket *ob;
- struct open_buckets ptrs;
- unsigned nr_effective, write_points_nr;
- bool have_cache;
- int ret;
+ unsigned write_points_nr;
int i;
+ struct alloc_request *req = bch2_trans_kmalloc_nomemzero(trans, sizeof(*req));
+ int ret = PTR_ERR_OR_ZERO(req);
+ if (unlikely(ret))
+ return ret;
+
if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
erasure_code = false;
+ req->nr_replicas = nr_replicas;
+ req->target = target;
+ req->ec = erasure_code;
+ req->watermark = watermark;
+ req->flags = flags;
+ req->devs_have = devs_have;
+
BUG_ON(!nr_replicas || !nr_replicas_required);
retry:
- ptrs.nr = 0;
- nr_effective = 0;
- write_points_nr = c->write_points_nr;
- have_cache = false;
+ req->ptrs.nr = 0;
+ req->nr_effective = 0;
+ req->have_cache = false;
+ write_points_nr = c->write_points_nr;
+
+ *wp_ret = req->wp = writepoint_find(trans, write_point.v);
- *wp_ret = wp = writepoint_find(trans, write_point.v);
+ req->data_type = req->wp->data_type;
ret = bch2_trans_relock(trans);
if (ret)
goto err;
/* metadata may not allocate on cache devices: */
- if (wp->data_type != BCH_DATA_user)
- have_cache = true;
+ if (req->data_type != BCH_DATA_user)
+ req->have_cache = true;
if (target && !(flags & BCH_WRITE_only_specified_devs)) {
- ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
- target, erasure_code,
- nr_replicas, &nr_effective,
- &have_cache, watermark,
- flags, NULL);
+ ret = open_bucket_add_buckets(trans, req, NULL);
if (!ret ||
bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto alloc_done;
/* Don't retry from all devices if we're out of open buckets: */
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
- int ret2 = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
- target, erasure_code,
- nr_replicas, &nr_effective,
- &have_cache, watermark,
- flags, cl);
+ int ret2 = open_bucket_add_buckets(trans, req, cl);
if (!ret2 ||
bch2_err_matches(ret2, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) {
@@ -1384,45 +1305,38 @@ retry:
* Only try to allocate cache (durability = 0 devices) from the
* specified target:
*/
- have_cache = true;
+ req->have_cache = true;
+ req->target = 0;
- ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
- 0, erasure_code,
- nr_replicas, &nr_effective,
- &have_cache, watermark,
- flags, cl);
+ ret = open_bucket_add_buckets(trans, req, cl);
} else {
- ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
- target, erasure_code,
- nr_replicas, &nr_effective,
- &have_cache, watermark,
- flags, cl);
+ ret = open_bucket_add_buckets(trans, req, cl);
}
alloc_done:
- BUG_ON(!ret && nr_effective < nr_replicas);
+ BUG_ON(!ret && req->nr_effective < req->nr_replicas);
- if (erasure_code && !ec_open_bucket(c, &ptrs))
+ if (erasure_code && !ec_open_bucket(c, &req->ptrs))
pr_debug("failed to get ec bucket: ret %u", ret);
if (ret == -BCH_ERR_insufficient_devices &&
- nr_effective >= nr_replicas_required)
+ req->nr_effective >= nr_replicas_required)
ret = 0;
if (ret)
goto err;
- if (nr_effective > nr_replicas)
- deallocate_extra_replicas(c, &ptrs, &wp->ptrs, nr_effective - nr_replicas);
+ if (req->nr_effective > req->nr_replicas)
+ deallocate_extra_replicas(c, req);
/* Free buckets we didn't use: */
- open_bucket_for_each(c, &wp->ptrs, ob, i)
+ open_bucket_for_each(c, &req->wp->ptrs, ob, i)
open_bucket_free_unused(c, ob);
- wp->ptrs = ptrs;
+ req->wp->ptrs = req->ptrs;
- wp->sectors_free = UINT_MAX;
+ req->wp->sectors_free = UINT_MAX;
- open_bucket_for_each(c, &wp->ptrs, ob, i) {
+ open_bucket_for_each(c, &req->wp->ptrs, ob, i) {
/*
* Ensure proper write alignment - either due to misaligned
* bucket sizes (from buggy bcachefs-tools), or writes that mix
@@ -1436,29 +1350,29 @@ alloc_done:
ob->sectors_free = max_t(int, 0, ob->sectors_free - align);
- wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
+ req->wp->sectors_free = min(req->wp->sectors_free, ob->sectors_free);
}
- wp->sectors_free = rounddown(wp->sectors_free, block_sectors(c));
+ req->wp->sectors_free = rounddown(req->wp->sectors_free, block_sectors(c));
/* Did alignment use up space in an open_bucket? */
- if (unlikely(!wp->sectors_free)) {
- bch2_alloc_sectors_done(c, wp);
+ if (unlikely(!req->wp->sectors_free)) {
+ bch2_alloc_sectors_done(c, req->wp);
goto retry;
}
- BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
+ BUG_ON(!req->wp->sectors_free || req->wp->sectors_free == UINT_MAX);
return 0;
err:
- open_bucket_for_each(c, &wp->ptrs, ob, i)
- if (ptrs.nr < ARRAY_SIZE(ptrs.v))
- ob_push(c, &ptrs, ob);
+ open_bucket_for_each(c, &req->wp->ptrs, ob, i)
+ if (req->ptrs.nr < ARRAY_SIZE(req->ptrs.v))
+ ob_push(c, &req->ptrs, ob);
else
open_bucket_free_unused(c, ob);
- wp->ptrs = ptrs;
+ req->wp->ptrs = req->ptrs;
- mutex_unlock(&wp->lock);
+ mutex_unlock(&req->wp->lock);
if (bch2_err_matches(ret, BCH_ERR_freelist_empty) &&
try_decrease_writepoints(trans, write_points_nr))
@@ -1474,20 +1388,6 @@ err:
return ret;
}
-struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
-{
- struct bch_dev *ca = ob_dev(c, ob);
-
- return (struct bch_extent_ptr) {
- .type = 1 << BCH_EXTENT_ENTRY_ptr,
- .gen = ob->gen,
- .dev = ob->dev,
- .offset = bucket_to_sector(ca, ob->bucket) +
- ca->mi.bucket_size -
- ob->sectors_free,
- };
-}
-
void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
struct bkey_i *k, unsigned sectors,
bool cached)
@@ -1617,6 +1517,8 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c,
struct open_bucket *ob;
unsigned i;
+ mutex_lock(&wp->lock);
+
prt_printf(out, "%lu: ", wp->write_point);
prt_human_readable_u64(out, wp->sectors_allocated << 9);
@@ -1634,6 +1536,8 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c,
open_bucket_for_each(c, &wp->ptrs, ob, i)
bch2_open_bucket_to_text(out, c, ob);
printbuf_indent_sub(out, 2);
+
+ mutex_unlock(&wp->lock);
}
void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
@@ -1731,7 +1635,12 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
printbuf_indent_sub(&buf, 2);
prt_newline(&buf);
- for_each_online_member(c, ca) {
+ bch2_printbuf_make_room(&buf, 4096);
+
+ rcu_read_lock();
+ buf.atomic++;
+
+ for_each_online_member_rcu(c, ca) {
prt_printf(&buf, "Dev %u:\n", ca->dev_idx);
printbuf_indent_add(&buf, 2);
bch2_dev_alloc_debug_to_text(&buf, ca);
@@ -1739,6 +1648,9 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
prt_newline(&buf);
}
+ --buf.atomic;
+ rcu_read_unlock();
+
prt_printf(&buf, "Copygc debug:\n");
printbuf_indent_add(&buf, 2);
bch2_copygc_wait_to_text(&buf, c);
@@ -1750,7 +1662,7 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
bch2_journal_debug_to_text(&buf, &c->journal);
printbuf_indent_sub(&buf, 2);
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
index 4c1e33cf57c0..2e01c7b61ed1 100644
--- a/fs/bcachefs/alloc_foreground.h
+++ b/fs/bcachefs/alloc_foreground.h
@@ -3,8 +3,10 @@
#define _BCACHEFS_ALLOC_FOREGROUND_H
#include "bcachefs.h"
+#include "buckets.h"
#include "alloc_types.h"
#include "extents.h"
+#include "io_write_types.h"
#include "sb-members.h"
#include <linux/hash.h>
@@ -23,6 +25,52 @@ struct dev_alloc_list {
u8 data[BCH_SB_MEMBERS_MAX];
};
+struct alloc_request {
+ unsigned nr_replicas;
+ unsigned target;
+ bool ec;
+ enum bch_watermark watermark;
+ enum bch_write_flags flags;
+ enum bch_data_type data_type;
+ struct bch_devs_list *devs_have;
+ struct write_point *wp;
+
+ /* These fields are used primarily by open_bucket_add_buckets */
+ struct open_buckets ptrs;
+ unsigned nr_effective; /* sum of @ptrs durability */
+ bool have_cache; /* have we allocated from a 0 durability dev */
+ struct bch_devs_mask devs_may_alloc;
+
+ /* bch2_bucket_alloc_set_trans(): */
+ struct bch_dev_usage usage;
+
+ /* bch2_bucket_alloc_trans(): */
+ struct bch_dev *ca;
+
+ enum {
+ BTREE_BITMAP_NO,
+ BTREE_BITMAP_YES,
+ BTREE_BITMAP_ANY,
+ } btree_bitmap;
+
+ struct {
+ u64 buckets_seen;
+ u64 skipped_open;
+ u64 skipped_need_journal_commit;
+ u64 need_journal_commit;
+ u64 skipped_nocow;
+ u64 skipped_nouse;
+ u64 skipped_mi_btree_bitmap;
+ } counters;
+
+ unsigned scratch_nr_replicas;
+ unsigned scratch_nr_effective;
+ bool scratch_have_cache;
+ enum bch_data_type scratch_data_type;
+ struct open_buckets scratch_ptrs;
+ struct bch_devs_mask scratch_devs_may_alloc;
+};
+
struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
struct dev_stripe_state *,
struct bch_devs_mask *);
@@ -173,11 +221,8 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
}
enum bch_write_flags;
-int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
- struct dev_stripe_state *, struct bch_devs_mask *,
- unsigned, unsigned *, bool *, enum bch_write_flags,
- enum bch_data_type, enum bch_watermark,
- struct closure *);
+int bch2_bucket_alloc_set_trans(struct btree_trans *, struct alloc_request *,
+ struct dev_stripe_state *, struct closure *);
int bch2_alloc_sectors_start_trans(struct btree_trans *,
unsigned, unsigned,
@@ -189,7 +234,19 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *,
struct closure *,
struct write_point **);
-struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *, struct open_bucket *);
+static inline struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
+{
+ struct bch_dev *ca = ob_dev(c, ob);
+
+ return (struct bch_extent_ptr) {
+ .type = 1 << BCH_EXTENT_ENTRY_ptr,
+ .gen = ob->gen,
+ .dev = ob->dev,
+ .offset = bucket_to_sector(ca, ob->bucket) +
+ ca->mi.bucket_size -
+ ob->sectors_free,
+ };
+}
/*
* Append pointers to the space we just allocated to @k, and mark @sectors space
diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
index 8f79f46c2a78..e7becdf22cba 100644
--- a/fs/bcachefs/alloc_types.h
+++ b/fs/bcachefs/alloc_types.h
@@ -8,22 +8,6 @@
#include "clock_types.h"
#include "fifo.h"
-struct bucket_alloc_state {
- enum {
- BTREE_BITMAP_NO,
- BTREE_BITMAP_YES,
- BTREE_BITMAP_ANY,
- } btree_bitmap;
-
- u64 buckets_seen;
- u64 skipped_open;
- u64 skipped_need_journal_commit;
- u64 need_journal_commit;
- u64 skipped_nocow;
- u64 skipped_nouse;
- u64 skipped_mi_btree_bitmap;
-};
-
#define BCH_WATERMARKS() \
x(stripe) \
x(normal) \
diff --git a/fs/bcachefs/async_objs.c b/fs/bcachefs/async_objs.c
new file mode 100644
index 000000000000..a7cd1f0f0964
--- /dev/null
+++ b/fs/bcachefs/async_objs.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Async obj debugging: keep asynchronous objects on (very fast) lists, make
+ * them visibile in debugfs:
+ */
+
+#include "bcachefs.h"
+#include "async_objs.h"
+#include "btree_io.h"
+#include "debug.h"
+#include "io_read.h"
+#include "io_write.h"
+
+#include <linux/debugfs.h>
+
+static void promote_obj_to_text(struct printbuf *out, void *obj)
+{
+ bch2_promote_op_to_text(out, obj);
+}
+
+static void rbio_obj_to_text(struct printbuf *out, void *obj)
+{
+ bch2_read_bio_to_text(out, obj);
+}
+
+static void write_op_obj_to_text(struct printbuf *out, void *obj)
+{
+ bch2_write_op_to_text(out, obj);
+}
+
+static void btree_read_bio_obj_to_text(struct printbuf *out, void *obj)
+{
+ struct btree_read_bio *rbio = obj;
+ bch2_btree_read_bio_to_text(out, rbio);
+}
+
+static void btree_write_bio_obj_to_text(struct printbuf *out, void *obj)
+{
+ struct btree_write_bio *wbio = obj;
+ bch2_bio_to_text(out, &wbio->wbio.bio);
+}
+
+static int bch2_async_obj_list_open(struct inode *inode, struct file *file)
+{
+ struct async_obj_list *list = inode->i_private;
+ struct dump_iter *i;
+
+ i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
+ if (!i)
+ return -ENOMEM;
+
+ file->private_data = i;
+ i->from = POS_MIN;
+ i->iter = 0;
+ i->c = container_of(list, struct bch_fs, async_objs[list->idx]);
+ i->list = list;
+ i->buf = PRINTBUF;
+ return 0;
+}
+
+static ssize_t bch2_async_obj_list_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct async_obj_list *list = i->list;
+ ssize_t ret = 0;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ struct genradix_iter iter;
+ void *obj;
+ fast_list_for_each_from(&list->list, iter, obj, i->iter) {
+ ret = bch2_debugfs_flush_buf(i);
+ if (ret)
+ return ret;
+
+ if (!i->size)
+ break;
+
+ list->obj_to_text(&i->buf, obj);
+ }
+
+ if (i->buf.allocation_failure)
+ ret = -ENOMEM;
+ else
+ i->iter = iter.pos;
+
+ if (!ret)
+ ret = bch2_debugfs_flush_buf(i);
+
+ return ret ?: i->ret;
+}
+
+static const struct file_operations async_obj_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_async_obj_list_open,
+ .release = bch2_dump_release,
+ .read = bch2_async_obj_list_read,
+};
+
+void bch2_fs_async_obj_debugfs_init(struct bch_fs *c)
+{
+ c->async_obj_dir = debugfs_create_dir("async_objs", c->fs_debug_dir);
+
+#define x(n) debugfs_create_file(#n, 0400, c->async_obj_dir, \
+ &c->async_objs[BCH_ASYNC_OBJ_LIST_##n], &async_obj_ops);
+ BCH_ASYNC_OBJ_LISTS()
+#undef x
+}
+
+void bch2_fs_async_obj_exit(struct bch_fs *c)
+{
+ for (unsigned i = 0; i < ARRAY_SIZE(c->async_objs); i++)
+ fast_list_exit(&c->async_objs[i].list);
+}
+
+int bch2_fs_async_obj_init(struct bch_fs *c)
+{
+ for (unsigned i = 0; i < ARRAY_SIZE(c->async_objs); i++) {
+ if (fast_list_init(&c->async_objs[i].list))
+ return -BCH_ERR_ENOMEM_async_obj_init;
+ c->async_objs[i].idx = i;
+ }
+
+#define x(n) c->async_objs[BCH_ASYNC_OBJ_LIST_##n].obj_to_text = n##_obj_to_text;
+ BCH_ASYNC_OBJ_LISTS()
+#undef x
+
+ return 0;
+}
diff --git a/fs/bcachefs/async_objs.h b/fs/bcachefs/async_objs.h
new file mode 100644
index 000000000000..cd6489b8cf76
--- /dev/null
+++ b/fs/bcachefs/async_objs.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ASYNC_OBJS_H
+#define _BCACHEFS_ASYNC_OBJS_H
+
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+static inline void __async_object_list_del(struct fast_list *head, unsigned idx)
+{
+ fast_list_remove(head, idx);
+}
+
+static inline int __async_object_list_add(struct fast_list *head, void *obj, unsigned *idx)
+{
+ int ret = fast_list_add(head, obj);
+ *idx = ret > 0 ? ret : 0;
+ return ret < 0 ? ret : 0;
+}
+
+#define async_object_list_del(_c, _list, idx) \
+ __async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, idx)
+
+#define async_object_list_add(_c, _list, obj, idx) \
+ __async_object_list_add(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, obj, idx)
+
+void bch2_fs_async_obj_debugfs_init(struct bch_fs *);
+void bch2_fs_async_obj_exit(struct bch_fs *);
+int bch2_fs_async_obj_init(struct bch_fs *);
+
+#else /* CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS */
+
+#define async_object_list_del(_c, _n, idx) do {} while (0)
+
+static inline int __async_object_list_add(void)
+{
+ return 0;
+}
+#define async_object_list_add(_c, _n, obj, idx) __async_object_list_add()
+
+static inline void bch2_fs_async_obj_debugfs_init(struct bch_fs *c) {}
+static inline void bch2_fs_async_obj_exit(struct bch_fs *c) {}
+static inline int bch2_fs_async_obj_init(struct bch_fs *c) { return 0; }
+
+#endif /* CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS */
+
+#endif /* _BCACHEFS_ASYNC_OBJS_H */
diff --git a/fs/bcachefs/async_objs_types.h b/fs/bcachefs/async_objs_types.h
new file mode 100644
index 000000000000..8d713c0f5841
--- /dev/null
+++ b/fs/bcachefs/async_objs_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ASYNC_OBJS_TYPES_H
+#define _BCACHEFS_ASYNC_OBJS_TYPES_H
+
+#define BCH_ASYNC_OBJ_LISTS() \
+ x(promote) \
+ x(rbio) \
+ x(write_op) \
+ x(btree_read_bio) \
+ x(btree_write_bio)
+
+enum bch_async_obj_lists {
+#define x(n) BCH_ASYNC_OBJ_LIST_##n,
+ BCH_ASYNC_OBJ_LISTS()
+#undef x
+ BCH_ASYNC_OBJ_NR
+};
+
+struct async_obj_list {
+ struct fast_list list;
+ void (*obj_to_text)(struct printbuf *, void *);
+ unsigned idx;
+};
+
+#endif /* _BCACHEFS_ASYNC_OBJS_TYPES_H */
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index ff26bb515150..cde7dd115267 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -12,9 +12,20 @@
#include "disk_accounting.h"
#include "error.h"
#include "progress.h"
+#include "recovery_passes.h"
#include <linux/mm.h>
+static int bch2_bucket_bitmap_set(struct bch_dev *, struct bucket_bitmap *, u64);
+
+static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
+{
+ return (struct bbpos) {
+ .btree = bp.btree_id,
+ .pos = bp.pos,
+ };
+}
+
int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
struct bkey_validate_context from)
{
@@ -96,6 +107,8 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
+ bool will_check = c->recovery.passes_to_run &
+ BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
int ret = 0;
if (insert) {
@@ -110,9 +123,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k);
-
- bch_err(c, "%s", buf.buf);
- } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
+ } else if (!will_check) {
prt_printf(&buf, "backpointer not found when deleting\n");
printbuf_indent_add(&buf, 2);
@@ -128,8 +139,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
bch2_bkey_val_to_text(&buf, c, orig_k);
}
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers &&
- __bch2_inconsistent_error(c, &buf))
+ if (!will_check && __bch2_inconsistent_error(c, &buf))
ret = -BCH_ERR_erofs_unfixed_errors;
bch_err(c, "%s", buf.buf);
@@ -174,7 +184,7 @@ err:
static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos)
{
- return (likely(!bch2_backpointers_no_use_write_buffer)
+ return (!static_branch_unlikely(&bch2_backpointers_no_use_write_buffer)
? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos)
: bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0)) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
@@ -184,7 +194,7 @@ static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans,
struct bkey_s_c visiting_k,
struct bkey_buf *last_flushed)
{
- return likely(!bch2_backpointers_no_use_write_buffer)
+ return !static_branch_unlikely(&bch2_backpointers_no_use_write_buffer)
? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed)
: 0;
}
@@ -192,7 +202,8 @@ static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans,
static int backpointer_target_not_found(struct btree_trans *trans,
struct bkey_s_c_backpointer bp,
struct bkey_s_c target_k,
- struct bkey_buf *last_flushed)
+ struct bkey_buf *last_flushed,
+ bool commit)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
@@ -228,18 +239,77 @@ static int backpointer_target_not_found(struct btree_trans *trans,
}
if (fsck_err(trans, backpointer_to_missing_ptr,
- "%s", buf.buf))
+ "%s", buf.buf)) {
ret = bch2_backpointer_del(trans, bp.k->p);
+ if (ret || !commit)
+ goto out;
+
+ /*
+ * Normally, on transaction commit from inside a transaction,
+ * we'll return -BCH_ERR_transaction_restart_nested, since a
+ * transaction commit invalidates pointers given out by peek().
+ *
+ * However, since we're updating a write buffer btree, if we
+ * return a transaction restart and loop we won't see that the
+ * backpointer has been deleted without an additional write
+ * buffer flush - and those are expensive.
+ *
+ * So we're relying on the caller immediately advancing to the
+ * next backpointer and starting a new transaction immediately
+ * after backpointer_get_key() returns NULL:
+ */
+ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+ }
+out:
fsck_err:
printbuf_exit(&buf);
return ret;
}
-struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
- struct bkey_s_c_backpointer bp,
- struct btree_iter *iter,
- unsigned iter_flags,
- struct bkey_buf *last_flushed)
+static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans,
+ struct bkey_s_c_backpointer bp,
+ struct btree_iter *iter,
+ struct bkey_buf *last_flushed,
+ bool commit)
+{
+ struct bch_fs *c = trans->c;
+
+ BUG_ON(!bp.v->level);
+
+ bch2_trans_node_iter_init(trans, iter,
+ bp.v->btree_id,
+ bp.v->pos,
+ 0,
+ bp.v->level - 1,
+ 0);
+ struct btree *b = bch2_btree_iter_peek_node(trans, iter);
+ if (IS_ERR_OR_NULL(b))
+ goto err;
+
+ BUG_ON(b->c.level != bp.v->level - 1);
+
+ if (extent_matches_bp(c, bp.v->btree_id, bp.v->level,
+ bkey_i_to_s_c(&b->key), bp))
+ return b;
+
+ if (btree_node_will_make_reachable(b)) {
+ b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
+ } else {
+ int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key),
+ last_flushed, commit);
+ b = ret ? ERR_PTR(ret) : NULL;
+ }
+err:
+ bch2_trans_iter_exit(trans, iter);
+ return b;
+}
+
+static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans,
+ struct bkey_s_c_backpointer bp,
+ struct btree_iter *iter,
+ unsigned iter_flags,
+ struct bkey_buf *last_flushed,
+ bool commit)
{
struct bch_fs *c = trans->c;
@@ -277,10 +347,10 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
bch2_trans_iter_exit(trans, iter);
if (!bp.v->level) {
- int ret = backpointer_target_not_found(trans, bp, k, last_flushed);
+ int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit);
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
} else {
- struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed);
+ struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit);
if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node))
return bkey_s_c_null;
if (IS_ERR_OR_NULL(b))
@@ -295,35 +365,16 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_buf *last_flushed)
{
- struct bch_fs *c = trans->c;
-
- BUG_ON(!bp.v->level);
-
- bch2_trans_node_iter_init(trans, iter,
- bp.v->btree_id,
- bp.v->pos,
- 0,
- bp.v->level - 1,
- 0);
- struct btree *b = bch2_btree_iter_peek_node(trans, iter);
- if (IS_ERR_OR_NULL(b))
- goto err;
-
- BUG_ON(b->c.level != bp.v->level - 1);
-
- if (extent_matches_bp(c, bp.v->btree_id, bp.v->level,
- bkey_i_to_s_c(&b->key), bp))
- return b;
+ return __bch2_backpointer_get_node(trans, bp, iter, last_flushed, true);
+}
- if (btree_node_will_make_reachable(b)) {
- b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
- } else {
- int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed);
- b = ret ? ERR_PTR(ret) : NULL;
- }
-err:
- bch2_trans_iter_exit(trans, iter);
- return b;
+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
+ struct bkey_s_c_backpointer bp,
+ struct btree_iter *iter,
+ unsigned iter_flags,
+ struct bkey_buf *last_flushed)
+{
+ return __bch2_backpointer_get_key(trans, bp, iter, iter_flags, last_flushed, true);
}
static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k,
@@ -437,7 +488,8 @@ found:
bytes = p.crc.compressed_size << 9;
- struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ,
+ BCH_DEV_READ_REF_check_extent_checksums);
if (!ca)
return false;
@@ -474,7 +526,8 @@ err:
if (bio)
bio_put(bio);
kvfree(data_buf);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_check_extent_checksums);
printbuf_exit(&buf);
return ret;
}
@@ -521,7 +574,7 @@ check_existing_bp:
struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k);
struct bkey_s_c other_extent =
- bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL);
+ __bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL, false);
ret = bkey_err(other_extent);
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
ret = 0;
@@ -628,22 +681,33 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
- bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches);
- bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty);
+ if (!ca) {
+ rcu_read_unlock();
+ continue;
+ }
- bool stale = p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr));
+ if (p.ptr.cached && dev_ptr_stale_rcu(ca, &p.ptr)) {
+ rcu_read_unlock();
+ continue;
+ }
+
+ u64 b = PTR_BUCKET_NR(ca, &p.ptr);
+ if (!bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b)) {
+ rcu_read_unlock();
+ continue;
+ }
+
+ bool empty = bch2_bucket_bitmap_test(&ca->bucket_backpointer_empty, b);
rcu_read_unlock();
- if ((check || empty) && !stale) {
- struct bkey_i_backpointer bp;
- bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
+ struct bkey_i_backpointer bp;
+ bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
- int ret = check
- ? check_bp_exists(trans, s, &bp, k)
- : bch2_bucket_backpointer_mod(trans, k, &bp, true);
- if (ret)
- return ret;
- }
+ int ret = !empty
+ ? check_bp_exists(trans, s, &bp, k)
+ : bch2_bucket_backpointer_mod(trans, k, &bp, true);
+ if (ret)
+ return ret;
}
return 0;
@@ -681,14 +745,6 @@ err:
return ret;
}
-static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
-{
- return (struct bbpos) {
- .btree = bp.btree_id,
- .pos = bp.pos,
- };
-}
-
static u64 mem_may_pin_bytes(struct bch_fs *c)
{
struct sysinfo i;
@@ -747,6 +803,13 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
return ret;
}
+static inline int bch2_fs_going_ro(struct bch_fs *c)
+{
+ return test_bit(BCH_FS_going_ro, &c->flags)
+ ? -EROFS
+ : 0;
+}
+
static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
struct extents_to_bp_state *s)
{
@@ -774,6 +837,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
+ bch2_fs_going_ro(c) ?:
check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
}));
@@ -813,6 +877,7 @@ static int data_type_to_alloc_counter(enum bch_data_type t)
static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);
static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
+ bool *had_mismatch,
struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
@@ -820,6 +885,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
bool need_commit = false;
+ *had_mismatch = false;
+
if (a->data_type == BCH_DATA_sb ||
a->data_type == BCH_DATA_journal ||
a->data_type == BCH_DATA_parity)
@@ -890,12 +957,18 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
goto err;
}
- if (!sectors[ALLOC_dirty] &&
- !sectors[ALLOC_stripe] &&
- !sectors[ALLOC_cached])
- __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty);
- else
- __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
+ bool empty = (sectors[ALLOC_dirty] +
+ sectors[ALLOC_stripe] +
+ sectors[ALLOC_cached]) == 0;
+
+ ret = bch2_bucket_bitmap_set(ca, &ca->bucket_backpointer_mismatch,
+ alloc_k.k->p.offset) ?:
+ (empty
+ ? bch2_bucket_bitmap_set(ca, &ca->bucket_backpointer_empty,
+ alloc_k.k->p.offset)
+ : 0);
+
+ *had_mismatch = true;
}
err:
bch2_dev_put(ca);
@@ -919,8 +992,14 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
goto next;
struct bpos bucket = bp_pos_to_bucket(ca, pos);
- bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches,
- ca->mi.nbuckets, bucket.offset);
+ u64 next = ca->mi.nbuckets;
+
+ unsigned long *bitmap = READ_ONCE(ca->bucket_backpointer_mismatch.buckets);
+ if (bitmap)
+ next = min_t(u64, next,
+ find_next_bit(bitmap, ca->mi.nbuckets, bucket.offset));
+
+ bucket.offset = next;
if (bucket.offset == ca->mi.nbuckets)
goto next;
@@ -1029,28 +1108,6 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
{
int ret = 0;
- /*
- * Can't allow devices to come/go/resize while we have bucket bitmaps
- * allocated
- */
- down_read(&c->state_lock);
-
- for_each_member_device(c, ca) {
- BUG_ON(ca->bucket_backpointer_mismatches);
- ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
- sizeof(unsigned long),
- GFP_KERNEL);
- ca->bucket_backpointer_empty = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
- sizeof(unsigned long),
- GFP_KERNEL);
- if (!ca->bucket_backpointer_mismatches ||
- !ca->bucket_backpointer_empty) {
- bch2_dev_put(ca);
- ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
- goto err_free_bitmaps;
- }
- }
-
struct btree_trans *trans = bch2_trans_get(c);
struct extents_to_bp_state s = { .bp_start = POS_MIN };
@@ -1059,23 +1116,24 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
ret = for_each_btree_key(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k, ({
- check_bucket_backpointer_mismatch(trans, k, &s.last_flushed);
+ bool had_mismatch;
+ bch2_fs_going_ro(c) ?:
+ check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed);
}));
if (ret)
goto err;
- u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0;
+ u64 nr_buckets = 0, nr_mismatches = 0;
for_each_member_device(c, ca) {
nr_buckets += ca->mi.nbuckets;
- nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets);
- nr_empty += bitmap_weight(ca->bucket_backpointer_empty, ca->mi.nbuckets);
+ nr_mismatches += ca->bucket_backpointer_mismatch.nr;
}
- if (!nr_mismatches && !nr_empty)
+ if (!nr_mismatches)
goto err;
bch_info(c, "scanning for missing backpointers in %llu/%llu buckets",
- nr_mismatches + nr_empty, nr_buckets);
+ nr_mismatches, nr_buckets);
while (1) {
ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end);
@@ -1106,23 +1164,71 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
s.bp_start = bpos_successor(s.bp_end);
}
+
+ for_each_member_device(c, ca) {
+ bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatch);
+ bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty);
+ }
err:
bch2_trans_put(trans);
bch2_bkey_buf_exit(&s.last_flushed, c);
bch2_btree_cache_unpin(c);
-err_free_bitmaps:
- for_each_member_device(c, ca) {
- kvfree(ca->bucket_backpointer_empty);
- ca->bucket_backpointer_empty = NULL;
- kvfree(ca->bucket_backpointer_mismatches);
- ca->bucket_backpointer_mismatches = NULL;
- }
- up_read(&c->state_lock);
bch_err_fn(c, ret);
return ret;
}
+static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans,
+ struct bpos bucket,
+ bool *had_mismatch,
+ struct bkey_buf *last_flushed)
+{
+ struct btree_iter alloc_iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &alloc_iter,
+ BTREE_ID_alloc, bucket,
+ BTREE_ITER_cached);
+ int ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ ret = check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed);
+ bch2_trans_iter_exit(trans, &alloc_iter);
+ return ret;
+}
+
+int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans,
+ struct bch_dev *ca, u64 bucket,
+ bool copygc,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ bool had_mismatch;
+ int ret = lockrestart_do(trans,
+ check_bucket_backpointer_pos_mismatch(trans, POS(ca->dev_idx, bucket),
+ &had_mismatch, last_flushed));
+ if (ret || !had_mismatch)
+ return ret;
+
+ u64 nr = ca->bucket_backpointer_mismatch.nr;
+ u64 allowed = copygc ? ca->mi.nbuckets >> 7 : 0;
+
+ struct printbuf buf = PRINTBUF;
+ __bch2_log_msg_start(ca->name, &buf);
+
+ prt_printf(&buf, "Detected missing backpointers in bucket %llu, now have %llu/%llu with missing\n",
+ bucket, nr, ca->mi.nbuckets);
+
+ bch2_run_explicit_recovery_pass(c, &buf,
+ BCH_RECOVERY_PASS_check_extents_to_backpointers,
+ nr < allowed ? RUN_RECOVERY_PASS_ratelimit : 0);
+
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ return 0;
+}
+
+/* backpointers -> extents */
+
static int check_one_backpointer(struct btree_trans *trans,
struct bbpos start,
struct bbpos end,
@@ -1238,3 +1344,48 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
bch_err_fn(c, ret);
return ret;
}
+
+static int bch2_bucket_bitmap_set(struct bch_dev *ca, struct bucket_bitmap *b, u64 bit)
+{
+ scoped_guard(mutex, &b->lock) {
+ if (!b->buckets) {
+ b->buckets = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!b->buckets)
+ return -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
+ }
+
+ b->nr += !__test_and_set_bit(bit, b->buckets);
+ }
+
+ return 0;
+}
+
+int bch2_bucket_bitmap_resize(struct bucket_bitmap *b, u64 old_size, u64 new_size)
+{
+ scoped_guard(mutex, &b->lock) {
+ if (!b->buckets)
+ return 0;
+
+ unsigned long *n = kvcalloc(BITS_TO_LONGS(new_size),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!n)
+ return -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
+
+ memcpy(n, b->buckets,
+ BITS_TO_LONGS(min(old_size, new_size)) * sizeof(unsigned long));
+ kvfree(b->buckets);
+ b->buckets = n;
+ }
+
+ return 0;
+}
+
+void bch2_bucket_bitmap_free(struct bucket_bitmap *b)
+{
+ mutex_lock(&b->lock);
+ kvfree(b->buckets);
+ b->buckets = NULL;
+ b->nr = 0;
+ mutex_unlock(&b->lock);
+}
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
index 16575dbc5736..6840561084ce 100644
--- a/fs/bcachefs/backpointers.h
+++ b/fs/bcachefs/backpointers.h
@@ -102,7 +102,7 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
struct bkey_i_backpointer *bp,
bool insert)
{
- if (unlikely(bch2_backpointers_no_use_write_buffer))
+ if (static_branch_unlikely(&bch2_backpointers_no_use_write_buffer))
return bch2_bucket_backpointer_mod_nowritebuffer(trans, orig_k, bp, insert);
if (!insert) {
@@ -182,8 +182,20 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_b
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer,
struct btree_iter *, struct bkey_buf *);
+int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bch_dev *, u64,
+ bool, struct bkey_buf *);
+
int bch2_check_btree_backpointers(struct bch_fs *);
int bch2_check_extents_to_backpointers(struct bch_fs *);
int bch2_check_backpointers_to_extents(struct bch_fs *);
+static inline bool bch2_bucket_bitmap_test(struct bucket_bitmap *b, u64 i)
+{
+ unsigned long *bitmap = READ_ONCE(b->buckets);
+ return bitmap && test_bit(i, bitmap);
+}
+
+int bch2_bucket_bitmap_resize(struct bucket_bitmap *, u64, u64);
+void bch2_bucket_bitmap_free(struct bucket_bitmap *);
+
#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 75f7408da173..7824da2af9d0 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -209,17 +209,18 @@
#include "btree_journal_iter_types.h"
#include "disk_accounting_types.h"
#include "errcode.h"
+#include "fast_list.h"
#include "fifo.h"
#include "nocow_locking_types.h"
#include "opts.h"
-#include "recovery_passes_types.h"
#include "sb-errors_types.h"
#include "seqmutex.h"
+#include "snapshot_types.h"
#include "time_stats.h"
#include "util.h"
#ifdef CONFIG_BCACHEFS_DEBUG
-#define BCH_WRITE_REF_DEBUG
+#define ENUMERATED_REF_DEBUG
#endif
#ifndef dynamic_fault
@@ -269,7 +270,8 @@ do { \
#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
-void bch2_print_str(struct bch_fs *, const char *);
+void bch2_print_str(struct bch_fs *, const char *, const char *);
+void bch2_print_str_nonblocking(struct bch_fs *, const char *, const char *);
__printf(2, 3)
void bch2_print_opts(struct bch_opts *, const char *, ...);
@@ -293,6 +295,16 @@ do { \
bch2_print(_c, __VA_ARGS__); \
} while (0)
+#define bch2_print_str_ratelimited(_c, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ \
+ if (__ratelimit(&_rs)) \
+ bch2_print_str(_c, __VA_ARGS__); \
+} while (0)
+
#define bch_info(c, fmt, ...) \
bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_info_ratelimited(c, fmt, ...) \
@@ -390,17 +402,20 @@ do { \
"compare them") \
BCH_DEBUG_PARAM(backpointers_no_use_write_buffer, \
"Don't use the write buffer for backpointers, enabling "\
- "extra runtime checks")
-
-/* Parameters that should only be compiled in debug mode: */
-#define BCH_DEBUG_PARAMS_DEBUG() \
- BCH_DEBUG_PARAM(expensive_debug_checks, \
- "Enables various runtime debugging checks that " \
- "significantly affect performance") \
+ "extra runtime checks") \
+ BCH_DEBUG_PARAM(debug_check_btree_locking, \
+ "Enable additional asserts for btree locking") \
BCH_DEBUG_PARAM(debug_check_iterators, \
"Enables extra verification for btree iterators") \
+ BCH_DEBUG_PARAM(debug_check_bset_lookups, \
+ "Enables extra verification for bset lookups") \
BCH_DEBUG_PARAM(debug_check_btree_accounting, \
"Verify btree accounting for keys within a node") \
+ BCH_DEBUG_PARAM(debug_check_bkey_unpack, \
+ "Enables extra verification for bkey unpack")
+
+/* Parameters that should only be compiled in debug mode: */
+#define BCH_DEBUG_PARAMS_DEBUG() \
BCH_DEBUG_PARAM(journal_seq_verify, \
"Store the journal sequence number in the version " \
"number of every btree key, and verify that btree " \
@@ -427,22 +442,17 @@ do { \
#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS()
#endif
-#define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name;
-BCH_DEBUG_PARAMS()
+#define BCH_DEBUG_PARAM(name, description) extern struct static_key_false bch2_##name;
+BCH_DEBUG_PARAMS_ALL()
#undef BCH_DEBUG_PARAM
-#ifndef CONFIG_BCACHEFS_DEBUG
-#define BCH_DEBUG_PARAM(name, description) static const __maybe_unused bool bch2_##name;
-BCH_DEBUG_PARAMS_DEBUG()
-#undef BCH_DEBUG_PARAM
-#endif
-
#define BCH_TIME_STATS() \
x(btree_node_mem_alloc) \
x(btree_node_split) \
x(btree_node_compact) \
x(btree_node_merge) \
x(btree_node_sort) \
+ x(btree_node_get) \
x(btree_node_read) \
x(btree_node_read_done) \
x(btree_node_write) \
@@ -450,6 +460,10 @@ BCH_DEBUG_PARAMS_DEBUG()
x(btree_interior_update_total) \
x(btree_gc) \
x(data_write) \
+ x(data_write_to_submit) \
+ x(data_write_to_queue) \
+ x(data_write_to_btree_update) \
+ x(data_write_btree_update) \
x(data_read) \
x(data_promote) \
x(journal_flush_write) \
@@ -473,6 +487,7 @@ enum bch_time_stats {
};
#include "alloc_types.h"
+#include "async_objs_types.h"
#include "btree_gc_types.h"
#include "btree_types.h"
#include "btree_node_scan_types.h"
@@ -482,10 +497,12 @@ enum bch_time_stats {
#include "clock_types.h"
#include "disk_groups_types.h"
#include "ec_types.h"
+#include "enumerated_ref_types.h"
#include "journal_types.h"
#include "keylist_types.h"
#include "quota_types.h"
#include "rebalance_types.h"
+#include "recovery_passes_types.h"
#include "replicas_types.h"
#include "sb-members_types.h"
#include "subvolume_types.h"
@@ -514,6 +531,57 @@ struct discard_in_flight {
u64 bucket:63;
};
+#define BCH_DEV_READ_REFS() \
+ x(bch2_online_devs) \
+ x(trans_mark_dev_sbs) \
+ x(read_fua_test) \
+ x(sb_field_resize) \
+ x(write_super) \
+ x(journal_read) \
+ x(fs_journal_alloc) \
+ x(fs_resize_on_mount) \
+ x(btree_node_read) \
+ x(btree_node_read_all_replicas) \
+ x(btree_node_scrub) \
+ x(btree_node_write) \
+ x(btree_node_scan) \
+ x(btree_verify_replicas) \
+ x(btree_node_ondisk_to_text) \
+ x(io_read) \
+ x(check_extent_checksums) \
+ x(ec_block)
+
+enum bch_dev_read_ref {
+#define x(n) BCH_DEV_READ_REF_##n,
+ BCH_DEV_READ_REFS()
+#undef x
+ BCH_DEV_READ_REF_NR,
+};
+
+#define BCH_DEV_WRITE_REFS() \
+ x(journal_write) \
+ x(journal_do_discards) \
+ x(dev_do_discards) \
+ x(discard_one_bucket_fast) \
+ x(do_invalidates) \
+ x(nocow_flush) \
+ x(io_write) \
+ x(ec_block) \
+ x(ec_bucket_zero)
+
+enum bch_dev_write_ref {
+#define x(n) BCH_DEV_WRITE_REF_##n,
+ BCH_DEV_WRITE_REFS()
+#undef x
+ BCH_DEV_WRITE_REF_NR,
+};
+
+struct bucket_bitmap {
+ unsigned long *buckets;
+ u64 nr;
+ struct mutex lock;
+};
+
struct bch_dev {
struct kobject kobj;
#ifdef CONFIG_BCACHEFS_DEBUG
@@ -524,8 +592,7 @@ struct bch_dev {
struct percpu_ref ref;
#endif
struct completion ref_completion;
- struct percpu_ref io_ref[2];
- struct completion io_ref_completion[2];
+ struct enumerated_ref io_ref[2];
struct bch_fs *fs;
@@ -559,8 +626,8 @@ struct bch_dev {
u8 *oldest_gen;
unsigned long *buckets_nouse;
- unsigned long *bucket_backpointer_mismatches;
- unsigned long *bucket_backpointer_empty;
+ struct bucket_bitmap bucket_backpointer_mismatch;
+ struct bucket_bitmap bucket_backpointer_empty;
struct bch_dev_usage_full __percpu
*usage;
@@ -572,10 +639,6 @@ struct bch_dev {
unsigned nr_partial_buckets;
unsigned nr_btree_reserve;
- size_t inc_gen_needs_gc;
- size_t inc_gen_really_needs_gc;
- size_t buckets_waiting_on_journal;
-
struct work_struct invalidate_work;
struct work_struct discard_work;
struct mutex discard_buckets_in_flight_lock;
@@ -614,14 +677,15 @@ struct bch_dev {
x(accounting_replay_done) \
x(may_go_rw) \
x(rw) \
+ x(rw_init_done) \
x(was_rw) \
x(stopping) \
x(emergency_ro) \
x(going_ro) \
x(write_disable_complete) \
x(clean_shutdown) \
- x(recovery_running) \
- x(fsck_running) \
+ x(in_recovery) \
+ x(in_fsck) \
x(initial_gc_unfixed) \
x(need_delete_dead_snapshots) \
x(error) \
@@ -648,8 +712,10 @@ struct btree_transaction_stats {
struct bch2_time_stats lock_hold_times;
struct mutex lock;
unsigned nr_max_paths;
- unsigned journal_entries_size;
unsigned max_mem;
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+ darray_trans_kmalloc_trace trans_kmalloc_trace;
+#endif
char *max_paths_text;
};
@@ -670,9 +736,6 @@ struct btree_trans_buf {
struct btree_trans *trans;
};
-#define BCACHEFS_ROOT_SUBVOL_INUM \
- ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
-
#define BCH_WRITE_REFS() \
x(journal) \
x(trans) \
@@ -694,7 +757,8 @@ struct btree_trans_buf {
x(snapshot_delete_pagecache) \
x(sysfs) \
x(btree_write_buffer) \
- x(btree_node_scrub)
+ x(btree_node_scrub) \
+ x(async_recovery_passes)
enum bch_write_ref {
#define x(n) BCH_WRITE_REF_##n,
@@ -728,11 +792,7 @@ struct bch_fs {
struct rw_semaphore state_lock;
/* Counts outstanding writes, for clean transition to read-only */
-#ifdef BCH_WRITE_REF_DEBUG
- atomic_long_t writes[BCH_WRITE_REF_NR];
-#else
- struct percpu_ref writes;
-#endif
+ struct enumerated_ref writes;
/*
* Certain operations are only allowed in single threaded mode, during
* recovery, and we want to assert that this is the case:
@@ -776,6 +836,7 @@ struct bch_fs {
u8 nr_devices;
u8 clean;
+ bool multi_device; /* true if we've ever had more than one device */
u8 encryption_type;
@@ -785,6 +846,7 @@ struct bch_fs {
unsigned nsec_per_time_unit;
u64 features;
u64 compat;
+ u64 recovery_passes_required;
unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
u64 btrees_lost_data;
} sb;
@@ -809,7 +871,7 @@ struct bch_fs {
struct mutex snapshot_table_lock;
struct rw_semaphore snapshot_create_lock;
- struct work_struct snapshot_delete_work;
+ struct snapshot_delete snapshot_delete;
struct work_struct snapshot_wait_for_pagecache_and_delete_work;
snapshot_id_list snapshots_unlinked;
struct mutex snapshots_unlinked_lock;
@@ -874,7 +936,7 @@ struct bch_fs {
struct btree_write_buffer btree_write_buffer;
struct workqueue_struct *btree_update_wq;
- struct workqueue_struct *btree_io_complete_wq;
+ struct workqueue_struct *btree_write_complete_wq;
/* copygc needs its own workqueue for index updates.. */
struct workqueue_struct *copygc_wq;
/*
@@ -885,6 +947,7 @@ struct bch_fs {
struct workqueue_struct *write_ref_wq;
/* ALLOCATION */
+ struct bch_devs_mask online_devs;
struct bch_devs_mask rw_devs[BCH_DATA_NR];
unsigned long rw_devs_change_count;
@@ -979,6 +1042,10 @@ struct bch_fs {
nocow_locks;
struct rhashtable promote_table;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ struct async_obj_list async_objs[BCH_ASYNC_OBJ_NR];
+#endif
+
mempool_t compression_bounce[2];
mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR];
size_t zstd_workspace_size;
@@ -1048,25 +1115,12 @@ struct bch_fs {
/* RECOVERY */
u64 journal_replay_seq_start;
u64 journal_replay_seq_end;
- /*
- * Two different uses:
- * "Has this fsck pass?" - i.e. should this type of error be an
- * emergency read-only
- * And, in certain situations fsck will rewind to an earlier pass: used
- * for signaling to the toplevel code which pass we want to run now.
- */
- enum bch_recovery_pass curr_recovery_pass;
- enum bch_recovery_pass next_recovery_pass;
- /* bitmask of recovery passes that we actually ran */
- u64 recovery_passes_complete;
- /* never rewinds version of curr_recovery_pass */
- enum bch_recovery_pass recovery_pass_done;
- spinlock_t recovery_pass_lock;
- struct semaphore online_fsck_mutex;
+ struct bch_fs_recovery recovery;
/* DEBUG JUNK */
struct dentry *fs_debug_dir;
struct dentry *btree_debug_dir;
+ struct dentry *async_obj_dir;
struct btree_debug btree_debug[BTREE_ID_NR];
struct btree *verify_data;
struct btree_node *verify_ondisk;
@@ -1108,54 +1162,6 @@ struct bch_fs {
extern struct wait_queue_head bch2_read_only_wait;
-static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref)
-{
-#ifdef BCH_WRITE_REF_DEBUG
- atomic_long_inc(&c->writes[ref]);
-#else
- percpu_ref_get(&c->writes);
-#endif
-}
-
-static inline bool __bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
-{
-#ifdef BCH_WRITE_REF_DEBUG
- return !test_bit(BCH_FS_going_ro, &c->flags) &&
- atomic_long_inc_not_zero(&c->writes[ref]);
-#else
- return percpu_ref_tryget(&c->writes);
-#endif
-}
-
-static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
-{
-#ifdef BCH_WRITE_REF_DEBUG
- return !test_bit(BCH_FS_going_ro, &c->flags) &&
- atomic_long_inc_not_zero(&c->writes[ref]);
-#else
- return percpu_ref_tryget_live(&c->writes);
-#endif
-}
-
-static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref)
-{
-#ifdef BCH_WRITE_REF_DEBUG
- long v = atomic_long_dec_return(&c->writes[ref]);
-
- BUG_ON(v < 0);
- if (v)
- return;
- for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
- if (atomic_long_read(&c->writes[i]))
- return;
-
- set_bit(BCH_FS_write_disable_complete, &c->flags);
- wake_up(&bch2_read_only_wait);
-#else
- percpu_ref_put(&c->writes);
-#endif
-}
-
static inline bool bch2_ro_ref_tryget(struct bch_fs *c)
{
if (test_bit(BCH_FS_stopping, &c->flags))
@@ -1256,4 +1262,17 @@ static inline unsigned data_replicas_required(struct bch_fs *c)
#define BKEY_PADDED_ONSTACK(key, pad) \
struct { struct bkey_i key; __u64 key ## _pad[pad]; }
+/*
+ * This is needed because discard is both a filesystem option and a device
+ * option, and mount options are supposed to apply to that mount and not be
+ * persisted, i.e. if it's set as a mount option we can't propagate it to the
+ * device.
+ */
+static inline bool bch2_discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca)
+{
+ return test_bit(BCH_FS_discard_mount_opt_set, &c->flags)
+ ? c->opts.discard
+ : ca->mi.discard;
+}
+
#endif /* _BCACHEFS_H */
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index d6e4a496f02b..b4a04df5ea95 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -497,7 +497,8 @@ struct bch_sb_field {
x(members_v2, 11) \
x(errors, 12) \
x(ext, 13) \
- x(downgrade, 14)
+ x(downgrade, 14) \
+ x(recovery_passes, 15)
#include "alloc_background_format.h"
#include "dirent_format.h"
@@ -510,6 +511,7 @@ struct bch_sb_field {
#include "logged_ops_format.h"
#include "lru_format.h"
#include "quota_format.h"
+#include "recovery_passes_format.h"
#include "reflink_format.h"
#include "replicas_format.h"
#include "snapshot_format.h"
@@ -695,7 +697,10 @@ struct bch_sb_field_ext {
x(stripe_backpointers, BCH_VERSION(1, 22)) \
x(stripe_lru, BCH_VERSION(1, 23)) \
x(casefolding, BCH_VERSION(1, 24)) \
- x(extent_flags, BCH_VERSION(1, 25))
+ x(extent_flags, BCH_VERSION(1, 25)) \
+ x(snapshot_deletion_v2, BCH_VERSION(1, 26)) \
+ x(fast_device_removal, BCH_VERSION(1, 27)) \
+ x(inode_has_case_insensitive, BCH_VERSION(1, 28))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -846,7 +851,7 @@ LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29);
LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
-/* one free bit */
+LE64_BITMASK(BCH_SB_MULTI_DEVICE, struct bch_sb, flags[3], 63, 64);
LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
@@ -867,7 +872,9 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14);
LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20);
+LE64_BITMASK(BCH_SB_DEGRADED_ACTION, struct bch_sb, flags[6], 20, 22);
LE64_BITMASK(BCH_SB_CASEFOLD, struct bch_sb, flags[6], 22, 23);
+LE64_BITMASK(BCH_SB_REBALANCE_AC_ONLY, struct bch_sb, flags[6], 23, 24);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{
@@ -922,7 +929,9 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u
x(alloc_v2, 17) \
x(extents_across_btree_nodes, 18) \
x(incompat_version_field, 19) \
- x(casefolding, 20)
+ x(casefolding, 20) \
+ x(no_alloc_info, 21) \
+ x(small_image, 22)
#define BCH_SB_FEATURES_ALWAYS \
(BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \
@@ -989,6 +998,19 @@ enum bch_error_actions {
BCH_ON_ERROR_NR
};
+#define BCH_DEGRADED_ACTIONS() \
+ x(ask, 0) \
+ x(yes, 1) \
+ x(very, 2) \
+ x(no, 3)
+
+enum bch_degraded_actions {
+#define x(t, n) BCH_DEGRADED_##t = n,
+ BCH_DEGRADED_ACTIONS()
+#undef x
+ BCH_DEGRADED_ACTIONS_NR
+};
+
#define BCH_STR_HASH_TYPES() \
x(crc32c, 0) \
x(crc64, 1) \
diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
index 995ba32e9b6e..ee823c640642 100644
--- a/fs/bcachefs/bkey.c
+++ b/fs/bcachefs/bkey.c
@@ -47,11 +47,9 @@ void bch2_bkey_packed_to_binary_text(struct printbuf *out,
}
}
-#ifdef CONFIG_BCACHEFS_DEBUG
-
-static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
- const struct bkey *unpacked,
- const struct bkey_format *format)
+static void __bch2_bkey_pack_verify(const struct bkey_packed *packed,
+ const struct bkey *unpacked,
+ const struct bkey_format *format)
{
struct bkey tmp;
@@ -95,11 +93,13 @@ static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
}
}
-#else
static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed,
- const struct bkey *unpacked,
- const struct bkey_format *format) {}
-#endif
+ const struct bkey *unpacked,
+ const struct bkey_format *format)
+{
+ if (static_branch_unlikely(&bch2_debug_check_bkey_unpack))
+ __bch2_bkey_pack_verify(packed, unpacked, format);
+}
struct pack_state {
const struct bkey_format *format;
@@ -398,7 +398,6 @@ static bool set_inc_field_lossy(struct pack_state *state, unsigned field, u64 v)
return ret;
}
-#ifdef CONFIG_BCACHEFS_DEBUG
static bool bkey_packed_successor(struct bkey_packed *out,
const struct btree *b,
struct bkey_packed k)
@@ -455,7 +454,6 @@ static bool bkey_format_has_too_big_fields(const struct bkey_format *f)
return false;
}
-#endif
/*
* Returns a packed key that compares <= in
@@ -472,9 +470,7 @@ enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out,
const struct bkey_format *f = &b->format;
struct pack_state state = pack_state_init(f, out);
u64 *w = out->_data;
-#ifdef CONFIG_BCACHEFS_DEBUG
struct bpos orig = in;
-#endif
bool exact = true;
unsigned i;
@@ -527,18 +523,18 @@ enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out,
out->format = KEY_FORMAT_LOCAL_BTREE;
out->type = KEY_TYPE_deleted;
-#ifdef CONFIG_BCACHEFS_DEBUG
- if (exact) {
- BUG_ON(bkey_cmp_left_packed(b, out, &orig));
- } else {
- struct bkey_packed successor;
+ if (static_branch_unlikely(&bch2_debug_check_bkey_unpack)) {
+ if (exact) {
+ BUG_ON(bkey_cmp_left_packed(b, out, &orig));
+ } else {
+ struct bkey_packed successor;
- BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0);
- BUG_ON(bkey_packed_successor(&successor, b, *out) &&
- bkey_cmp_left_packed(b, &successor, &orig) < 0 &&
- !bkey_format_has_too_big_fields(f));
+ BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0);
+ BUG_ON(bkey_packed_successor(&successor, b, *out) &&
+ bkey_cmp_left_packed(b, &successor, &orig) < 0 &&
+ !bkey_format_has_too_big_fields(f));
+ }
}
-#endif
return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER;
}
@@ -627,14 +623,13 @@ struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s)
}
}
-#ifdef CONFIG_BCACHEFS_DEBUG
- {
+ if (static_branch_unlikely(&bch2_debug_check_bkey_unpack)) {
struct printbuf buf = PRINTBUF;
BUG_ON(bch2_bkey_format_invalid(NULL, &ret, 0, &buf));
printbuf_exit(&buf);
}
-#endif
+
return ret;
}
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index 054e2d5e8448..3ccd521c190a 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -191,6 +191,7 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
{
return bpos_eq(l.k->p, r.k->p) &&
+ l.k->size == r.k->size &&
bkey_bytes(l.k) == bkey_bytes(r.k) &&
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
}
@@ -397,8 +398,7 @@ __bkey_unpack_key_format_checked(const struct btree *b,
compiled_unpack_fn unpack_fn = b->aux_data;
unpack_fn(dst, src);
- if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
- bch2_expensive_debug_checks) {
+ if (static_branch_unlikely(&bch2_debug_check_bkey_unpack)) {
struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 00d05ccfaf73..fcd8c82cba4f 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -356,7 +356,7 @@ bool bch2_bkey_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
return ops->key_merge &&
bch2_bkey_maybe_mergable(l.k, r.k) &&
(u64) l.k->size + r.k->size <= KEY_SIZE_MAX &&
- !bch2_key_merging_disabled &&
+ !static_branch_unlikely(&bch2_key_merging_disabled) &&
ops->key_merge(c, l, r);
}
diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
index 9a4a83d6fd2d..32841f762eb2 100644
--- a/fs/bcachefs/bset.c
+++ b/fs/bcachefs/bset.c
@@ -144,8 +144,6 @@ struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b)
return nr;
}
-#ifdef CONFIG_BCACHEFS_DEBUG
-
void __bch2_verify_btree_nr_keys(struct btree *b)
{
struct btree_nr_keys nr = bch2_btree_node_count_keys(b);
@@ -153,7 +151,7 @@ void __bch2_verify_btree_nr_keys(struct btree *b)
BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
}
-static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
+static void __bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
struct btree *b)
{
struct btree_node_iter iter = *_iter;
@@ -190,8 +188,8 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
}
}
-void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
- struct btree *b)
+void __bch2_btree_node_iter_verify(struct btree_node_iter *iter,
+ struct btree *b)
{
struct btree_node_iter_set *set, *s2;
struct bkey_packed *k, *p;
@@ -237,8 +235,8 @@ found:
}
}
-void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
- struct bkey_packed *insert, unsigned clobber_u64s)
+static void __bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
+ struct bkey_packed *insert, unsigned clobber_u64s)
{
struct bset_tree *t = bch2_bkey_to_bset(b, where);
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
@@ -285,12 +283,15 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
#endif
}
-#else
-
-static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
- struct btree *b) {}
+static inline void bch2_verify_insert_pos(struct btree *b,
+ struct bkey_packed *where,
+ struct bkey_packed *insert,
+ unsigned clobber_u64s)
+{
+ if (static_branch_unlikely(&bch2_debug_check_bset_lookups))
+ __bch2_verify_insert_pos(b, where, insert, clobber_u64s);
+}
-#endif
/* Auxiliary search trees */
@@ -361,9 +362,8 @@ static struct bkey_float *bkey_float(const struct btree *b,
return ro_aux_tree_base(b, t)->f + idx;
}
-static void bset_aux_tree_verify(struct btree *b)
+static void __bset_aux_tree_verify(struct btree *b)
{
-#ifdef CONFIG_BCACHEFS_DEBUG
for_each_bset(b, t) {
if (t->aux_data_offset == U16_MAX)
continue;
@@ -375,7 +375,12 @@ static void bset_aux_tree_verify(struct btree *b)
BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b));
BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b));
}
-#endif
+}
+
+static inline void bset_aux_tree_verify(struct btree *b)
+{
+ if (static_branch_unlikely(&bch2_debug_check_bset_lookups))
+ __bset_aux_tree_verify(b);
}
void bch2_btree_keys_init(struct btree *b)
@@ -495,15 +500,11 @@ static void rw_aux_tree_set(const struct btree *b, struct bset_tree *t,
};
}
-static void bch2_bset_verify_rw_aux_tree(struct btree *b,
- struct bset_tree *t)
+static void __bch2_bset_verify_rw_aux_tree(struct btree *b, struct bset_tree *t)
{
struct bkey_packed *k = btree_bkey_first(b, t);
unsigned j = 0;
- if (!bch2_expensive_debug_checks)
- return;
-
BUG_ON(bset_has_ro_aux_tree(t));
if (!bset_has_rw_aux_tree(t))
@@ -530,6 +531,13 @@ start:
}
}
+static inline void bch2_bset_verify_rw_aux_tree(struct btree *b,
+ struct bset_tree *t)
+{
+ if (static_branch_unlikely(&bch2_debug_check_bset_lookups))
+ __bch2_bset_verify_rw_aux_tree(b, t);
+}
+
/* returns idx of first entry >= offset: */
static unsigned rw_aux_tree_bsearch(struct btree *b,
struct bset_tree *t,
@@ -869,7 +877,7 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
k = p;
}
- if (bch2_expensive_debug_checks) {
+ if (static_branch_unlikely(&bch2_debug_check_bset_lookups)) {
BUG_ON(ret >= orig_k);
for (i = ret
@@ -1195,7 +1203,7 @@ struct bkey_packed *bch2_bset_search_linear(struct btree *b,
bkey_iter_pos_cmp(b, m, search) < 0)
m = bkey_p_next(m);
- if (bch2_expensive_debug_checks) {
+ if (static_branch_unlikely(&bch2_debug_check_bset_lookups)) {
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
BUG_ON(prev &&
@@ -1435,9 +1443,9 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
struct btree *b)
{
- if (bch2_expensive_debug_checks) {
- bch2_btree_node_iter_verify(iter, b);
- bch2_btree_node_iter_next_check(iter, b);
+ if (static_branch_unlikely(&bch2_debug_check_bset_lookups)) {
+ __bch2_btree_node_iter_verify(iter, b);
+ __bch2_btree_node_iter_next_check(iter, b);
}
__bch2_btree_node_iter_advance(iter, b);
@@ -1453,8 +1461,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter,
struct btree_node_iter_set *set;
unsigned end = 0;
- if (bch2_expensive_debug_checks)
- bch2_btree_node_iter_verify(iter, b);
+ bch2_btree_node_iter_verify(iter, b);
for_each_bset(b, t) {
k = bch2_bkey_prev_all(b, t,
@@ -1489,8 +1496,7 @@ found:
iter->data[0].k = __btree_node_key_to_offset(b, prev);
iter->data[0].end = end;
- if (bch2_expensive_debug_checks)
- bch2_btree_node_iter_verify(iter, b);
+ bch2_btree_node_iter_verify(iter, b);
return prev;
}
diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h
index 6953d55b72cc..a15ecf9d006e 100644
--- a/fs/bcachefs/bset.h
+++ b/fs/bcachefs/bset.h
@@ -517,27 +517,19 @@ void bch2_dump_bset(struct bch_fs *, struct btree *, struct bset *, unsigned);
void bch2_dump_btree_node(struct bch_fs *, struct btree *);
void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
-#ifdef CONFIG_BCACHEFS_DEBUG
-
void __bch2_verify_btree_nr_keys(struct btree *);
-void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
-void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
- struct bkey_packed *, unsigned);
-
-#else
+void __bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
-static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
- struct btree *b) {}
-static inline void bch2_verify_insert_pos(struct btree *b,
- struct bkey_packed *where,
- struct bkey_packed *insert,
- unsigned clobber_u64s) {}
-#endif
+ struct btree *b)
+{
+ if (static_branch_unlikely(&bch2_debug_check_bset_lookups))
+ __bch2_btree_node_iter_verify(iter, b);
+}
static inline void bch2_verify_btree_nr_keys(struct btree *b)
{
- if (bch2_debug_check_btree_accounting)
+ if (static_branch_unlikely(&bch2_debug_check_btree_accounting))
__bch2_verify_btree_nr_keys(b);
}
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 9b80201c7982..8557cbd3d818 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -17,12 +17,6 @@
#include <linux/sched/mm.h>
#include <linux/swap.h>
-#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
-do { \
- if (shrinker_counter) \
- bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_##counter]++; \
-} while (0)
-
const char * const bch2_btree_node_flags[] = {
"typebit",
"typebit",
@@ -350,115 +344,118 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params);
}
-/*
- * this version is for btree nodes that have already been freed (we're not
- * reaping a real btree node)
- */
-static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
+static int __btree_node_reclaim_checks(struct bch_fs *c, struct btree *b,
+ bool flush, bool locked)
{
struct btree_cache *bc = &c->btree_cache;
- int ret = 0;
lockdep_assert_held(&bc->lock);
-wait_on_io:
- if (b->flags & ((1U << BTREE_NODE_dirty)|
- (1U << BTREE_NODE_read_in_flight)|
+
+ if (btree_node_noevict(b)) {
+ bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_noevict]++;
+ return -BCH_ERR_ENOMEM_btree_node_reclaim;
+ }
+ if (btree_node_write_blocked(b)) {
+ bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_blocked]++;
+ return -BCH_ERR_ENOMEM_btree_node_reclaim;
+ }
+ if (btree_node_will_make_reachable(b)) {
+ bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_will_make_reachable]++;
+ return -BCH_ERR_ENOMEM_btree_node_reclaim;
+ }
+
+ if (btree_node_dirty(b)) {
+ if (!flush) {
+ bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_dirty]++;
+ return -BCH_ERR_ENOMEM_btree_node_reclaim;
+ }
+
+ if (locked) {
+ /*
+ * Using the underscore version because we don't want to compact
+ * bsets after the write, since this node is about to be evicted
+ * - unless btree verify mode is enabled, since it runs out of
+ * the post write cleanup:
+ */
+ if (static_branch_unlikely(&bch2_verify_btree_ondisk))
+ bch2_btree_node_write(c, b, SIX_LOCK_intent,
+ BTREE_WRITE_cache_reclaim);
+ else
+ __bch2_btree_node_write(c, b,
+ BTREE_WRITE_cache_reclaim);
+ }
+ }
+
+ if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
(1U << BTREE_NODE_write_in_flight))) {
if (!flush) {
- if (btree_node_dirty(b))
- BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
- else if (btree_node_read_in_flight(b))
- BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
+ if (btree_node_read_in_flight(b))
+ bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_read_in_flight]++;
else if (btree_node_write_in_flight(b))
- BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
+ bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_in_flight]++;
return -BCH_ERR_ENOMEM_btree_node_reclaim;
}
+ if (locked)
+ return -EINTR;
+
/* XXX: waiting on IO with btree cache lock held */
bch2_btree_node_wait_on_read(b);
bch2_btree_node_wait_on_write(b);
}
+ return 0;
+}
+
+/*
+ * this version is for btree nodes that have already been freed (we're not
+ * reaping a real btree node)
+ */
+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
+{
+ struct btree_cache *bc = &c->btree_cache;
+ int ret = 0;
+
+ lockdep_assert_held(&bc->lock);
+retry_unlocked:
+ ret = __btree_node_reclaim_checks(c, b, flush, false);
+ if (ret)
+ return ret;
+
if (!six_trylock_intent(&b->c.lock)) {
- BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
+ bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_intent]++;
return -BCH_ERR_ENOMEM_btree_node_reclaim;
}
if (!six_trylock_write(&b->c.lock)) {
- BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
- goto out_unlock_intent;
+ bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_write]++;
+ six_unlock_intent(&b->c.lock);
+ return -BCH_ERR_ENOMEM_btree_node_reclaim;
}
/* recheck under lock */
- if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
- (1U << BTREE_NODE_write_in_flight))) {
- if (!flush) {
- if (btree_node_read_in_flight(b))
- BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
- else if (btree_node_write_in_flight(b))
- BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
- goto out_unlock;
- }
+ ret = __btree_node_reclaim_checks(c, b, flush, true);
+ if (ret) {
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
- goto wait_on_io;
- }
-
- if (btree_node_noevict(b)) {
- BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
- goto out_unlock;
- }
- if (btree_node_write_blocked(b)) {
- BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
- goto out_unlock;
- }
- if (btree_node_will_make_reachable(b)) {
- BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
- goto out_unlock;
+ if (ret == -EINTR)
+ goto retry_unlocked;
+ return ret;
}
- if (btree_node_dirty(b)) {
- if (!flush) {
- BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
- goto out_unlock;
- }
- /*
- * Using the underscore version because we don't want to compact
- * bsets after the write, since this node is about to be evicted
- * - unless btree verify mode is enabled, since it runs out of
- * the post write cleanup:
- */
- if (bch2_verify_btree_ondisk)
- bch2_btree_node_write(c, b, SIX_LOCK_intent,
- BTREE_WRITE_cache_reclaim);
- else
- __bch2_btree_node_write(c, b,
- BTREE_WRITE_cache_reclaim);
-
- six_unlock_write(&b->c.lock);
- six_unlock_intent(&b->c.lock);
- goto wait_on_io;
- }
-out:
if (b->hash_val && !ret)
trace_and_count(c, btree_cache_reap, c, b);
- return ret;
-out_unlock:
- six_unlock_write(&b->c.lock);
-out_unlock_intent:
- six_unlock_intent(&b->c.lock);
- ret = -BCH_ERR_ENOMEM_btree_node_reclaim;
- goto out;
+ return 0;
}
-static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
+static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
{
- return __btree_node_reclaim(c, b, false, shrinker_counter);
+ return __btree_node_reclaim(c, b, false);
}
static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
{
- return __btree_node_reclaim(c, b, true, false);
+ return __btree_node_reclaim(c, b, true);
}
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
@@ -476,7 +473,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
unsigned long ret = SHRINK_STOP;
bool trigger_writes = atomic_long_read(&bc->nr_dirty) + nr >= list->nr * 3 / 4;
- if (bch2_btree_shrinker_disabled)
+ if (static_branch_unlikely(&bch2_btree_shrinker_disabled))
return SHRINK_STOP;
mutex_lock(&bc->lock);
@@ -490,7 +487,10 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
* IO can always make forward progress:
*/
can_free = btree_cache_can_free(list);
- nr = min_t(unsigned long, nr, can_free);
+ if (nr > can_free) {
+ bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_cache_reserve] += nr - can_free;
+ nr = can_free;
+ }
i = 0;
list_for_each_entry_safe(b, t, &bc->freeable, list) {
@@ -506,7 +506,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
if (touched >= nr)
goto out;
- if (!btree_node_reclaim(c, b, true)) {
+ if (!btree_node_reclaim(c, b)) {
btree_node_data_free(bc, b);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
@@ -522,7 +522,7 @@ restart:
clear_btree_node_accessed(b);
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++;
--touched;;
- } else if (!btree_node_reclaim(c, b, true)) {
+ } else if (!btree_node_reclaim(c, b)) {
__bch2_btree_node_hash_remove(bc, b);
__btree_node_data_free(bc, b);
@@ -569,7 +569,7 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
{
struct btree_cache_list *list = shrink->private_data;
- if (bch2_btree_shrinker_disabled)
+ if (static_branch_unlikely(&bch2_btree_shrinker_disabled))
return 0;
return btree_cache_can_free(list);
@@ -755,7 +755,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++)
list_for_each_entry_reverse(b, &bc->live[i].list, list)
- if (!btree_node_reclaim(c, b, false))
+ if (!btree_node_reclaim(c, b))
return b;
while (1) {
@@ -790,7 +790,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
* disk node. Check the freed list before allocating a new one:
*/
list_for_each_entry(b, freed, list)
- if (!btree_node_reclaim(c, b, false)) {
+ if (!btree_node_reclaim(c, b)) {
list_del_init(&b->list);
goto got_node;
}
@@ -817,7 +817,7 @@ got_node:
* the list. Check if there's any freed nodes there:
*/
list_for_each_entry(b2, &bc->freeable, list)
- if (!btree_node_reclaim(c, b2, false)) {
+ if (!btree_node_reclaim(c, b2)) {
swap(b->data, b2->data);
swap(b->aux_data, b2->aux_data);
@@ -852,7 +852,6 @@ out:
b->sib_u64s[1] = 0;
b->whiteout_u64s = 0;
bch2_btree_keys_init(b);
- set_btree_node_accessed(b);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
start_time);
@@ -978,7 +977,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
/* Unlock before doing IO: */
six_unlock_intent(&b->c.lock);
- bch2_trans_unlock_noassert(trans);
+ bch2_trans_unlock(trans);
bch2_btree_node_read(trans, b, sync);
@@ -1004,7 +1003,7 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
{
struct printbuf buf = PRINTBUF;
- if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations)
+ if (c->recovery.pass_done < BCH_RECOVERY_PASS_check_allocations)
return;
prt_printf(&buf,
@@ -1286,6 +1285,10 @@ lock_node:
six_unlock_read(&b->c.lock);
goto retry;
}
+
+ /* avoid atomic set bit if it's not needed: */
+ if (!btree_node_accessed(b))
+ set_btree_node_accessed(b);
}
/* XXX: waiting on IO with btree locks held: */
@@ -1301,10 +1304,6 @@ lock_node:
prefetch(p + L1_CACHE_BYTES * 2);
}
- /* avoid atomic set bit if it's not needed: */
- if (!btree_node_accessed(b))
- set_btree_node_accessed(b);
-
if (unlikely(btree_node_read_error(b))) {
six_unlock_read(&b->c.lock);
b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached);
@@ -1493,9 +1492,10 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc
prt_btree_cache_line(out, c, "live:", bc->live[0].nr);
prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr);
- prt_btree_cache_line(out, c, "freeable:", bc->nr_freeable);
+ prt_btree_cache_line(out, c, "reserve:", bc->nr_reserve);
+ prt_btree_cache_line(out, c, "freed:", bc->nr_freeable);
prt_btree_cache_line(out, c, "dirty:", atomic_long_read(&bc->nr_dirty));
- prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
+ prt_printf(out, "cannibalize lock:\t%s\n", bc->alloc_lock ? "held" : "not held");
prt_newline(out);
for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) {
@@ -1506,6 +1506,7 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc
}
prt_newline(out);
+ prt_printf(out, "counters since mount:\n");
prt_printf(out, "freed:\t%zu\n", bc->nr_freed);
prt_printf(out, "not freed:\n");
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 37b69d89341f..91b6395421df 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -22,6 +22,7 @@
#include "debug.h"
#include "disk_accounting.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "journal.h"
@@ -370,20 +371,13 @@ again:
prt_char(&buf, ' ');
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
- if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
- trans, btree_node_read_error,
- "Topology repair: unreadable btree node at\n%s",
- buf.buf)) {
+ if (bch2_err_matches(ret, EIO)) {
bch2_btree_node_evict(trans, cur_k.k);
cur = NULL;
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
if (ret)
break;
-
- ret = bch2_btree_lost_data(c, b->c.btree_id);
- if (ret)
- break;
continue;
}
@@ -545,9 +539,6 @@ int bch2_check_topology(struct bch_fs *c)
bch2_btree_id_to_text(&buf, i);
if (r->error) {
- ret = bch2_btree_lost_data(c, i);
- if (ret)
- break;
reconstruct_root:
bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
@@ -628,7 +619,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
deleted.p = k.k->p;
if (initial) {
- BUG_ON(bch2_journal_seq_verify &&
+ BUG_ON(static_branch_unlikely(&bch2_journal_seq_verify) &&
k.k->bversion.lo > atomic64_read(&c->journal.seq));
if (fsck_err_on(btree_id != BTREE_ID_accounting &&
@@ -1088,6 +1079,10 @@ out:
* allocator thread - issue wakeup in case they blocked on gc_lock:
*/
closure_wake_up(&c->freelist_wait);
+
+ if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags))
+ bch2_sb_members_clean_deleted(c);
+
bch_err_fn(c, ret);
return ret;
}
@@ -1256,26 +1251,21 @@ static void bch2_gc_gens_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, gc_gens_work);
bch2_gc_gens(c);
- bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens);
}
void bch2_gc_gens_async(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_gc_gens) &&
+ if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_gc_gens) &&
!queue_work(c->write_ref_wq, &c->gc_gens_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens);
}
-void bch2_fs_btree_gc_exit(struct bch_fs *c)
-{
-}
-
-int bch2_fs_btree_gc_init(struct bch_fs *c)
+void bch2_fs_btree_gc_init_early(struct bch_fs *c)
{
seqcount_init(&c->gc_pos_lock);
INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work);
init_rwsem(&c->gc_lock);
mutex_init(&c->gc_gens_lock);
- return 0;
}
diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
index 9693a90a48a2..ec77662369a2 100644
--- a/fs/bcachefs/btree_gc.h
+++ b/fs/bcachefs/btree_gc.h
@@ -83,7 +83,6 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *);
int bch2_gc_gens(struct bch_fs *);
void bch2_gc_gens_async(struct bch_fs *);
-void bch2_fs_btree_gc_exit(struct bch_fs *);
-int bch2_fs_btree_gc_init(struct bch_fs *);
+void bch2_fs_btree_gc_init_early(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_GC_H */
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 60782f3e5aec..34018296053a 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "async_objs.h"
#include "bkey_buf.h"
#include "bkey_methods.h"
#include "bkey_sort.h"
@@ -13,6 +14,7 @@
#include "buckets.h"
#include "checksum.h"
#include "debug.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "io_write.h"
@@ -514,19 +516,23 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca,
+ bool print_pos,
struct btree *b, struct bset *i, struct bkey_packed *k,
- unsigned offset, int write)
+ unsigned offset, int rw)
{
- prt_printf(out, bch2_log_msg(c, "%s"),
- write == READ
- ? "error validating btree node "
- : "corrupt btree node before write ");
+ if (print_pos) {
+ prt_str(out, rw == READ
+ ? "error validating btree node "
+ : "corrupt btree node before write ");
+ prt_printf(out, "at btree ");
+ bch2_btree_pos_to_text(out, c, b);
+ prt_newline(out);
+ }
+
if (ca)
- prt_printf(out, "on %s ", ca->name);
- prt_printf(out, "at btree ");
- bch2_btree_pos_to_text(out, c, b);
+ prt_printf(out, "%s ", ca->name);
- prt_printf(out, "\nnode offset %u/%u",
+ prt_printf(out, "node offset %u/%u",
b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)));
if (i)
prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
@@ -537,75 +543,110 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
prt_str(out, ": ");
}
-__printf(10, 11)
+__printf(11, 12)
static int __btree_err(int ret,
struct bch_fs *c,
struct bch_dev *ca,
struct btree *b,
struct bset *i,
struct bkey_packed *k,
- int write,
- bool have_retry,
+ int rw,
enum bch_sb_error_id err_type,
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg,
const char *fmt, ...)
{
- bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
+ if (c->recovery.curr_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes)
+ return -BCH_ERR_fsck_fix;
+
+ bool have_retry = false;
+ int ret2;
+
+ if (ca) {
+ bch2_mark_btree_validate_failure(failed, ca->dev_idx);
+
+ struct extent_ptr_decoded pick;
+ have_retry = !bch2_bkey_pick_read_device(c,
+ bkey_i_to_s_c(&b->key),
+ failed, &pick, -1);
+ }
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
ret = -BCH_ERR_btree_node_read_err_fixable;
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry)
ret = -BCH_ERR_btree_node_read_err_bad_node;
- if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable)
- bch2_sb_error_count(c, err_type);
+ bch2_sb_error_count(c, err_type);
+
+ bool print_deferred = err_msg &&
+ rw == READ &&
+ !(test_bit(BCH_FS_in_fsck, &c->flags) &&
+ c->opts.fix_errors == FSCK_FIX_ask);
struct printbuf out = PRINTBUF;
- if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) {
- printbuf_indent_add_nextline(&out, 2);
-#ifdef BCACHEFS_LOG_PREFIX
- prt_printf(&out, bch2_log_msg(c, ""));
-#endif
- }
+ bch2_log_msg_start(c, &out);
- btree_err_msg(&out, c, ca, b, i, k, b->written, write);
+ if (!print_deferred)
+ err_msg = &out;
+
+ btree_err_msg(err_msg, c, ca, !print_deferred, b, i, k, b->written, rw);
va_list args;
va_start(args, fmt);
- prt_vprintf(&out, fmt, args);
+ prt_vprintf(err_msg, fmt, args);
va_end(args);
- if (write == WRITE) {
+ if (print_deferred) {
+ prt_newline(err_msg);
+
+ switch (ret) {
+ case -BCH_ERR_btree_node_read_err_fixable:
+ ret2 = bch2_fsck_err_opt(c, FSCK_CAN_FIX, err_type);
+ if (ret2 != -BCH_ERR_fsck_fix &&
+ ret2 != -BCH_ERR_fsck_ignore) {
+ ret = ret2;
+ goto fsck_err;
+ }
+
+ if (!have_retry)
+ ret = -BCH_ERR_fsck_fix;
+ goto out;
+ case -BCH_ERR_btree_node_read_err_bad_node:
+ prt_str(&out, ", ");
+ ret = __bch2_topology_error(c, &out);
+ break;
+ }
+
+ goto out;
+ }
+
+ if (rw == WRITE) {
prt_str(&out, ", ");
ret = __bch2_inconsistent_error(c, &out)
? -BCH_ERR_fsck_errors_not_fixed
: 0;
- silent = false;
+ goto print;
}
switch (ret) {
case -BCH_ERR_btree_node_read_err_fixable:
- ret = !silent
- ? __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf)
- : -BCH_ERR_fsck_fix;
- if (ret != -BCH_ERR_fsck_fix &&
- ret != -BCH_ERR_fsck_ignore)
+ ret2 = __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf);
+ if (ret2 != -BCH_ERR_fsck_fix &&
+ ret2 != -BCH_ERR_fsck_ignore) {
+ ret = ret2;
goto fsck_err;
- ret = -BCH_ERR_fsck_fix;
+ }
+
+ if (!have_retry)
+ ret = -BCH_ERR_fsck_fix;
goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
ret = __bch2_topology_error(c, &out);
- if (ret)
- silent = false;
- break;
- case -BCH_ERR_btree_node_read_err_incompatible:
- ret = -BCH_ERR_fsck_errors_not_fixed;
- silent = false;
break;
}
-
- if (!silent)
- bch2_print_string_as_lines(KERN_ERR, out.buf);
+print:
+ bch2_print_str(c, KERN_ERR, out.buf);
out:
fsck_err:
printbuf_exit(&out);
@@ -614,8 +655,9 @@ fsck_err:
#define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \
({ \
- int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \
+ int _ret = __btree_err(type, c, ca, b, i, k, write, \
BCH_FSCK_ERR_##_err_type, \
+ failed, err_msg, \
msg, ##__VA_ARGS__); \
\
if (_ret != -BCH_ERR_fsck_fix) { \
@@ -623,7 +665,7 @@ fsck_err:
goto fsck_err; \
} \
\
- *saw_error = true; \
+ true; \
})
#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false)
@@ -681,8 +723,9 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
struct btree *b, struct bset *i,
- unsigned offset, unsigned sectors,
- int write, bool have_retry, bool *saw_error)
+ unsigned offset, unsigned sectors, int write,
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg)
{
unsigned version = le16_to_cpu(i->version);
unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
@@ -895,7 +938,8 @@ static inline int btree_node_read_bkey_cmp(const struct btree *b,
static int validate_bset_keys(struct bch_fs *c, struct btree *b,
struct bset *i, int write,
- bool have_retry, bool *saw_error)
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg)
{
unsigned version = le16_to_cpu(i->version);
struct bkey_packed *k, *prev = NULL;
@@ -1008,7 +1052,9 @@ fsck_err:
}
int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
- struct btree *b, bool have_retry, bool *saw_error)
+ struct btree *b,
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg)
{
struct btree_node_entry *bne;
struct sort_iter *iter;
@@ -1018,11 +1064,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
bool used_mempool, blacklisted;
bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
- unsigned u64s;
unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
u64 max_journal_seq = 0;
struct printbuf buf = PRINTBUF;
- int ret = 0, retry_read = 0, write = READ;
+ int ret = 0, write = READ;
u64 start_time = local_clock();
b->version_ondisk = U16_MAX;
@@ -1156,15 +1201,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
b->version_ondisk = min(b->version_ondisk,
le16_to_cpu(i->version));
- ret = validate_bset(c, ca, b, i, b->written, sectors,
- READ, have_retry, saw_error);
+ ret = validate_bset(c, ca, b, i, b->written, sectors, READ, failed, err_msg);
if (ret)
goto fsck_err;
if (!b->written)
btree_node_set_format(b, b->data->format);
- ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error);
+ ret = validate_bset_keys(c, b, i, READ, failed, err_msg);
if (ret)
goto fsck_err;
@@ -1225,23 +1269,20 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool);
sorted->keys.u64s = 0;
- set_btree_bset(b, b->set, &b->data->keys);
-
b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter);
memset((uint8_t *)(sorted + 1) + b->nr.live_u64s * sizeof(u64), 0,
btree_buf_bytes(b) -
sizeof(struct btree_node) -
b->nr.live_u64s * sizeof(u64));
- u64s = le16_to_cpu(sorted->keys.u64s);
+ b->data->keys.u64s = sorted->keys.u64s;
*sorted = *b->data;
- sorted->keys.u64s = cpu_to_le16(u64s);
swap(sorted, b->data);
set_btree_bset(b, b->set, &b->data->keys);
b->nsets = 1;
b->data->keys.journal_seq = cpu_to_le64(max_journal_seq);
- BUG_ON(b->nr.live_u64s != u64s);
+ BUG_ON(b->nr.live_u64s != le16_to_cpu(b->data->keys.u64s));
btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted);
@@ -1255,7 +1296,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
ret = btree_node_bkey_val_validate(c, b, u.s_c, READ);
if (ret == -BCH_ERR_fsck_delete_bkey ||
- (bch2_inject_invalid_keys &&
+ (static_branch_unlikely(&bch2_inject_invalid_keys) &&
!bversion_cmp(u.k->bversion, MAX_VERSION))) {
btree_keys_account_key_drop(&b->nr, 0, k);
@@ -1295,20 +1336,11 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
if (!ptr_written)
set_btree_node_need_rewrite(b);
-out:
+fsck_err:
mempool_free(iter, &c->fill_iter);
printbuf_exit(&buf);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time);
- return retry_read;
-fsck_err:
- if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
- ret == -BCH_ERR_btree_node_read_err_must_retry) {
- retry_read = 1;
- } else {
- set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
- }
- goto out;
+ return ret;
}
static void btree_node_read_work(struct work_struct *work)
@@ -1320,16 +1352,26 @@ static void btree_node_read_work(struct work_struct *work)
struct btree *b = rb->b;
struct bio *bio = &rb->bio;
struct bch_io_failures failed = { .nr = 0 };
+ int ret = 0;
+
struct printbuf buf = PRINTBUF;
- bool saw_error = false;
- bool retry = false;
- bool can_retry;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "btree node read error at btree ");
+ bch2_btree_pos_to_text(&buf, c, b);
+ prt_newline(&buf);
goto start;
while (1) {
- retry = true;
- bch_info(c, "retrying read");
- ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ);
+ ret = bch2_bkey_pick_read_device(c,
+ bkey_i_to_s_c(&b->key),
+ &failed, &rb->pick, -1);
+ if (ret) {
+ set_btree_node_read_error(b);
+ break;
+ }
+
+ ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read);
rb->have_ioref = ca != NULL;
rb->start_time = local_clock();
bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
@@ -1346,59 +1388,59 @@ static void btree_node_read_work(struct work_struct *work)
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
rb->start_time, !bio->bi_status);
start:
- printbuf_reset(&buf);
- bch2_btree_pos_to_text(&buf, c, b);
-
- if (ca && bio->bi_status)
- bch_err_dev_ratelimited(ca,
- "btree read error %s for %s",
- bch2_blk_status_to_str(bio->bi_status), buf.buf);
if (rb->have_ioref)
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read);
rb->have_ioref = false;
- bch2_mark_io_failure(&failed, &rb->pick, false);
-
- can_retry = bch2_bkey_pick_read_device(c,
- bkey_i_to_s_c(&b->key),
- &failed, &rb->pick, -1) > 0;
-
- if (!bio->bi_status &&
- !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) {
- if (retry)
- bch_info(c, "retry success");
- break;
+ if (bio->bi_status) {
+ bch2_mark_io_failure(&failed, &rb->pick, false);
+ continue;
}
- saw_error = true;
+ ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf);
+ if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
+ ret == -BCH_ERR_btree_node_read_err_must_retry)
+ continue;
- if (!can_retry) {
+ if (ret)
set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
- break;
- }
+
+ break;
}
- bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
- rb->start_time);
- bio_put(&rb->bio);
+ bch2_io_failures_to_text(&buf, c, &failed);
+
+ if (btree_node_read_error(b))
+ bch2_btree_lost_data(c, &buf, b->c.btree_id);
+
+ /*
+ * only print retry success if we read from a replica with no errors
+ */
+ if (btree_node_read_error(b))
+ prt_printf(&buf, "ret %s", bch2_err_str(ret));
+ else if (failed.nr) {
+ if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev))
+ prt_printf(&buf, "retry success");
+ else
+ prt_printf(&buf, "repair success");
+ }
- if ((saw_error ||
+ if ((failed.nr ||
btree_node_need_rewrite(b)) &&
!btree_node_read_error(b) &&
- c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
- if (saw_error) {
- printbuf_reset(&buf);
- bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
- prt_str(&buf, " ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s",
- __func__, buf.buf);
- }
-
+ c->recovery.curr_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
+ prt_printf(&buf, " (rewriting node)");
bch2_btree_node_rewrite_async(c, b);
}
+ prt_newline(&buf);
+
+ if (failed.nr)
+ bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
+ async_object_list_del(c, btree_read_bio, rb->list_idx);
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
+ rb->start_time);
+ bio_put(&rb->bio);
printbuf_exit(&buf);
clear_btree_node_read_in_flight(b);
smp_mb__after_atomic();
@@ -1419,6 +1461,11 @@ static void btree_node_read_endio(struct bio *bio)
queue_work(c->btree_read_complete_wq, &rb->work);
}
+void bch2_btree_read_bio_to_text(struct printbuf *out, struct btree_read_bio *rbio)
+{
+ bch2_bio_to_text(out, &rbio->bio);
+}
+
struct btree_node_read_all {
struct closure cl;
struct bch_fs *c;
@@ -1478,12 +1525,13 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
struct btree *b = ra->b;
struct printbuf buf = PRINTBUF;
bool dump_bset_maps = false;
- bool have_retry = false;
int ret = 0, best = -1, write = READ;
unsigned i, written = 0, written2 = 0;
__le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2
? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0;
bool _saw_error = false, *saw_error = &_saw_error;
+ struct printbuf *err_msg = NULL;
+ struct bch_io_failures *failed = NULL;
for (i = 0; i < ra->nr; i++) {
struct btree_node *bn = ra->buf[i];
@@ -1576,14 +1624,19 @@ fsck_err:
if (best >= 0) {
memcpy(b->data, ra->buf[best], btree_buf_bytes(b));
- ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error);
+ ret = bch2_btree_node_read_done(c, NULL, b, NULL, NULL);
} else {
ret = -1;
}
if (ret) {
set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
+
+ struct printbuf buf = PRINTBUF;
+ bch2_btree_lost_data(c, &buf, b->c.btree_id);
+ if (buf.pos)
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
} else if (*saw_error)
bch2_btree_node_rewrite_async(c, b);
@@ -1612,7 +1665,8 @@ static void btree_node_read_all_replicas_endio(struct bio *bio)
struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev);
bch2_latency_acct(ca, rb->start_time, READ);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_btree_node_read_all_replicas);
}
ra->err[rb->idx] = bio->bi_status;
@@ -1652,7 +1706,8 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
i = 0;
bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) {
- struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
+ BCH_DEV_READ_REF_btree_node_read_all_replicas);
struct btree_read_bio *rb =
container_of(ra->bio[i], struct btree_read_bio, bio);
rb->c = c;
@@ -1703,7 +1758,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
trace_and_count(c, btree_node_read, trans, b);
- if (bch2_verify_all_btree_replicas &&
+ if (static_branch_unlikely(&bch2_verify_all_btree_replicas) &&
!btree_node_read_all_replicas(c, b, sync))
return;
@@ -1711,26 +1766,34 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
NULL, &pick, -1);
if (ret <= 0) {
+ bool ratelimit = true;
struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
prt_str(&buf, "btree node read error: no device to read from\n at ");
bch2_btree_pos_to_text(&buf, c, b);
- bch_err_ratelimited(c, "%s", buf.buf);
-
- if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology)
- bch2_fatal_error(c);
+ prt_newline(&buf);
+ bch2_btree_lost_data(c, &buf, b->c.btree_id);
+
+ if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
+ bch2_fs_emergency_read_only2(c, &buf))
+ ratelimit = false;
+
+ static DEFINE_RATELIMIT_STATE(rs,
+ DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ if (!ratelimit || __ratelimit(&rs))
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
clear_btree_node_read_in_flight(b);
smp_mb__after_atomic();
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
- printbuf_exit(&buf);
return;
}
- ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read);
bio = bio_alloc_bioset(NULL,
buf_pages(b->data, btree_buf_bytes(b)),
@@ -1749,6 +1812,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
bio->bi_end_io = btree_node_read_endio;
bch2_bio_map(bio, b->data, btree_buf_bytes(b));
+ async_object_list_add(c, btree_read_bio, rb, &rb->list_idx);
+
if (rb->have_ioref) {
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
bio_sectors(bio));
@@ -1922,7 +1987,7 @@ static void btree_node_scrub_work(struct work_struct *work)
bch_err(c, "error validating btree node during scrub on %s at btree %s",
scrub->ca->name, err.buf);
- ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0);
}
err:
bch2_trans_iter_exit(trans, &iter);
@@ -1933,9 +1998,9 @@ err:
printbuf_exit(&err);
bch2_bkey_buf_exit(&scrub->key, c);;
btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
- percpu_ref_put(&scrub->ca->io_ref[READ]);
+ enumerated_ref_put(&scrub->ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub);
kfree(scrub);
- bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub);
}
static void btree_node_scrub_endio(struct bio *bio)
@@ -1954,7 +2019,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
struct bch_fs *c = trans->c;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_node_scrub))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_node_scrub))
return -BCH_ERR_erofs_no_writes;
struct extent_ptr_decoded pick;
@@ -1962,7 +2027,8 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
if (ret <= 0)
goto err;
- struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
+ BCH_DEV_READ_REF_btree_node_scrub);
if (!ca) {
ret = -BCH_ERR_device_offline;
goto err;
@@ -2002,9 +2068,9 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
return 0;
err_free:
btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub);
err:
- bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub);
return ret;
}
@@ -2121,6 +2187,7 @@ static void btree_node_write_work(struct work_struct *work)
goto err;
}
out:
+ async_object_list_del(c, btree_write_bio, wbio->list_idx);
bio_put(&wbio->wbio.bio);
btree_node_write_done(c, b, start_time);
return;
@@ -2172,7 +2239,8 @@ static void btree_node_write_endio(struct bio *bio)
* btree writes yet (due to device removal/ro):
*/
if (wbio->have_ioref)
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_btree_node_write);
if (parent) {
bio_put(bio);
@@ -2184,14 +2252,12 @@ static void btree_node_write_endio(struct bio *bio)
smp_mb__after_atomic();
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner);
INIT_WORK(&wb->work, btree_node_write_work);
- queue_work(c->btree_io_complete_wq, &wb->work);
+ queue_work(c->btree_write_complete_wq, &wb->work);
}
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
struct bset *i, unsigned sectors)
{
- bool saw_error;
-
int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key),
(struct bkey_validate_context) {
.from = BKEY_VALIDATE_btree_node,
@@ -2204,8 +2270,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
return ret;
}
- ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?:
- validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error);
+ ret = validate_bset_keys(c, b, i, WRITE, NULL, NULL) ?:
+ validate_bset(c, NULL, b, i, b->written, sectors, WRITE, NULL, NULL);
if (ret) {
bch2_inconsistent_error(c);
dump_stack();
@@ -2472,6 +2538,8 @@ do_write:
atomic64_inc(&c->btree_write_stats[type].nr);
atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes);
+ async_object_list_add(c, btree_write_bio, wbio, &wbio->list_idx);
+
INIT_WORK(&wbio->work, btree_write_submit);
queue_work(c->btree_write_submit_wq, &wbio->work);
return;
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index dbf76d22c660..30a5180532c8 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -41,6 +41,9 @@ struct btree_read_bio {
u64 start_time;
unsigned have_ioref:1;
unsigned idx:7;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ unsigned list_idx;
+#endif
struct extent_ptr_decoded pick;
struct work_struct work;
struct bio bio;
@@ -53,6 +56,9 @@ struct btree_write_bio {
unsigned data_bytes;
unsigned sector_offset;
u64 start_time;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ unsigned list_idx;
+#endif
struct bch_write_bio wbio;
};
@@ -128,11 +134,15 @@ void bch2_btree_build_aux_trees(struct btree *);
void bch2_btree_init_next(struct btree_trans *, struct btree *);
int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *,
- struct btree *, bool, bool *);
+ struct btree *,
+ struct bch_io_failures *,
+ struct printbuf *);
void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
const struct bkey_i *, unsigned);
+void bch2_btree_read_bio_to_text(struct printbuf *, struct btree_read_bio *);
+
int bch2_btree_node_scrub(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, unsigned);
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 59fa527ac685..b4bf4217a3fa 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -16,6 +16,7 @@
#include "journal_io.h"
#include "replicas.h"
#include "snapshot.h"
+#include "super.h"
#include "trace.h"
#include <linux/random.h>
@@ -114,11 +115,9 @@ static inline bool btree_path_pos_in_node(struct btree_path *path,
!btree_path_pos_after_node(path, b);
}
-/* Btree iterator: */
+/* Debug: */
-#ifdef CONFIG_BCACHEFS_DEBUG
-
-static void bch2_btree_path_verify_cached(struct btree_trans *trans,
+static void __bch2_btree_path_verify_cached(struct btree_trans *trans,
struct btree_path *path)
{
struct bkey_cached *ck;
@@ -135,7 +134,7 @@ static void bch2_btree_path_verify_cached(struct btree_trans *trans,
btree_node_unlock(trans, path, 0);
}
-static void bch2_btree_path_verify_level(struct btree_trans *trans,
+static void __bch2_btree_path_verify_level(struct btree_trans *trans,
struct btree_path *path, unsigned level)
{
struct btree_path_level *l;
@@ -147,16 +146,13 @@ static void bch2_btree_path_verify_level(struct btree_trans *trans,
struct printbuf buf3 = PRINTBUF;
const char *msg;
- if (!bch2_debug_check_iterators)
- return;
-
l = &path->l[level];
tmp = l->iter;
locked = btree_node_locked(path, level);
if (path->cached) {
if (!level)
- bch2_btree_path_verify_cached(trans, path);
+ __bch2_btree_path_verify_cached(trans, path);
return;
}
@@ -217,7 +213,7 @@ err:
msg, level, buf1.buf, buf2.buf, buf3.buf);
}
-static void bch2_btree_path_verify(struct btree_trans *trans,
+static void __bch2_btree_path_verify(struct btree_trans *trans,
struct btree_path *path)
{
struct bch_fs *c = trans->c;
@@ -229,22 +225,22 @@ static void bch2_btree_path_verify(struct btree_trans *trans,
break;
}
- bch2_btree_path_verify_level(trans, path, i);
+ __bch2_btree_path_verify_level(trans, path, i);
}
- bch2_btree_path_verify_locks(path);
+ bch2_btree_path_verify_locks(trans, path);
}
-void bch2_trans_verify_paths(struct btree_trans *trans)
+void __bch2_trans_verify_paths(struct btree_trans *trans)
{
struct btree_path *path;
unsigned iter;
trans_for_each_path(trans, path, iter)
- bch2_btree_path_verify(trans, path);
+ __bch2_btree_path_verify(trans, path);
}
-static void bch2_btree_iter_verify(struct btree_trans *trans, struct btree_iter *iter)
+static void __bch2_btree_iter_verify(struct btree_trans *trans, struct btree_iter *iter)
{
BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached);
@@ -256,11 +252,11 @@ static void bch2_btree_iter_verify(struct btree_trans *trans, struct btree_iter
!btree_type_has_snapshot_field(iter->btree_id));
if (iter->update_path)
- bch2_btree_path_verify(trans, &trans->paths[iter->update_path]);
- bch2_btree_path_verify(trans, btree_iter_path(trans, iter));
+ __bch2_btree_path_verify(trans, &trans->paths[iter->update_path]);
+ __bch2_btree_path_verify(trans, btree_iter_path(trans, iter));
}
-static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
+static void __bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
{
BUG_ON((iter->flags & BTREE_ITER_filter_snapshots) &&
!iter->pos.snapshot);
@@ -274,16 +270,13 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
bkey_gt(iter->pos, iter->k.p)));
}
-static int bch2_btree_iter_verify_ret(struct btree_trans *trans,
- struct btree_iter *iter, struct bkey_s_c k)
+static int __bch2_btree_iter_verify_ret(struct btree_trans *trans,
+ struct btree_iter *iter, struct bkey_s_c k)
{
struct btree_iter copy;
struct bkey_s_c prev;
int ret = 0;
- if (!bch2_debug_check_iterators)
- return 0;
-
if (!(iter->flags & BTREE_ITER_filter_snapshots))
return 0;
@@ -324,7 +317,7 @@ out:
return ret;
}
-void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
+void __bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
struct bpos pos)
{
bch2_trans_verify_not_unlocked_or_in_restart(trans);
@@ -357,19 +350,40 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
panic("not locked: %s %s\n", bch2_btree_id_str(id), buf.buf);
}
-#else
-
static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
- struct btree_path *path, unsigned l) {}
+ struct btree_path *path, unsigned l)
+{
+ if (static_branch_unlikely(&bch2_debug_check_iterators))
+ __bch2_btree_path_verify_level(trans, path, l);
+}
+
static inline void bch2_btree_path_verify(struct btree_trans *trans,
- struct btree_path *path) {}
+ struct btree_path *path)
+{
+ if (static_branch_unlikely(&bch2_debug_check_iterators))
+ __bch2_btree_path_verify(trans, path);
+}
+
static inline void bch2_btree_iter_verify(struct btree_trans *trans,
- struct btree_iter *iter) {}
-static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
-static inline int bch2_btree_iter_verify_ret(struct btree_trans *trans, struct btree_iter *iter,
- struct bkey_s_c k) { return 0; }
+ struct btree_iter *iter)
+{
+ if (static_branch_unlikely(&bch2_debug_check_iterators))
+ __bch2_btree_iter_verify(trans, iter);
+}
-#endif
+static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
+{
+ if (static_branch_unlikely(&bch2_debug_check_iterators))
+ __bch2_btree_iter_verify_entry_exit(iter);
+}
+
+static inline int bch2_btree_iter_verify_ret(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ return static_branch_unlikely(&bch2_debug_check_iterators)
+ ? __bch2_btree_iter_verify_ret(trans, iter, k)
+ : 0;
+}
/* Btree path: fixups after btree updates */
@@ -523,7 +537,7 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans,
__bch2_btree_node_iter_fix(path, b, node_iter, t,
where, clobber_u64s, new_u64s);
- if (bch2_debug_check_iterators)
+ if (static_branch_unlikely(&bch2_debug_check_iterators))
bch2_btree_node_iter_verify(node_iter, b);
}
@@ -977,7 +991,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
path->level = level;
bch2_btree_path_level_init(trans, path, b);
- bch2_btree_path_verify_locks(path);
+ bch2_btree_path_verify_locks(trans, path);
err:
bch2_bkey_buf_exit(&tmp, c);
return ret;
@@ -1089,7 +1103,7 @@ static void btree_path_set_level_down(struct btree_trans *trans,
if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
btree_node_unlock(trans, path, l);
- btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ btree_path_set_dirty(trans, path, BTREE_ITER_NEED_TRAVERSE);
bch2_btree_path_verify(trans, path);
}
@@ -1162,7 +1176,7 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
}
if (path->cached) {
- ret = bch2_btree_path_traverse_cached(trans, path, flags);
+ ret = bch2_btree_path_traverse_cached(trans, path_idx, flags);
goto out;
}
@@ -1287,7 +1301,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans,
if (unlikely(path->cached)) {
btree_node_unlock(trans, path, 0);
path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_up);
- btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ btree_path_set_dirty(trans, path, BTREE_ITER_NEED_TRAVERSE);
goto out;
}
@@ -1316,7 +1330,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans,
}
if (unlikely(level != path->level)) {
- btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ btree_path_set_dirty(trans, path, BTREE_ITER_NEED_TRAVERSE);
__bch2_btree_path_unlock(trans, path);
}
out:
@@ -1385,45 +1399,45 @@ static bool bch2_btree_path_can_relock(struct btree_trans *trans, struct btree_p
void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool intent)
{
- struct btree_path *path = trans->paths + path_idx, *dup;
+ struct btree_path *path = trans->paths + path_idx, *dup = NULL;
if (!__btree_path_put(trans, path, intent))
return;
+ if (!path->preserve && !path->should_be_locked)
+ goto free;
+
dup = path->preserve
? have_path_at_pos(trans, path)
: have_node_at_pos(trans, path);
-
- trace_btree_path_free(trans, path_idx, dup);
-
- if (!dup && !(!path->preserve && !is_btree_node(path, path->level)))
+ if (!dup)
return;
- if (path->should_be_locked && !trans->restarted) {
- if (!dup)
- return;
-
+ /*
+ * If we need this path locked, the duplicate also has te be locked
+ * before we free this one:
+ */
+ if (path->should_be_locked &&
+ !dup->should_be_locked &&
+ !trans->restarted) {
if (!(trans->locked
? bch2_btree_path_relock_norestart(trans, dup)
: bch2_btree_path_can_relock(trans, dup)))
return;
- }
- if (dup) {
- dup->preserve |= path->preserve;
- dup->should_be_locked |= path->should_be_locked;
+ dup->should_be_locked = true;
}
- __bch2_path_free(trans, path_idx);
-}
+ BUG_ON(path->should_be_locked &&
+ !trans->restarted &&
+ trans->locked &&
+ !btree_node_locked(dup, dup->level));
-static void bch2_path_put_nokeep(struct btree_trans *trans, btree_path_idx_t path,
- bool intent)
-{
- if (!__btree_path_put(trans, trans->paths + path, intent))
- return;
-
- __bch2_path_free(trans, path);
+ path->should_be_locked = false;
+ dup->preserve |= path->preserve;
+free:
+ trace_btree_path_free(trans, path_idx, dup);
+ __bch2_path_free(trans, path_idx);
}
void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
@@ -1485,7 +1499,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
prt_newline(buf);
}
- for (struct jset_entry *e = trans->journal_entries;
+ for (struct jset_entry *e = btree_trans_journal_entries_start(trans);
e != btree_trans_journal_entries_top(trans);
e = vstruct_next(e)) {
bch2_journal_entry_to_text(buf, trans->c, e);
@@ -1591,7 +1605,7 @@ void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort)
__bch2_trans_paths_to_text(&buf, trans, nosort);
bch2_trans_updates_to_text(&buf, trans);
- bch2_print_str(trans->c, buf.buf);
+ bch2_print_str(trans->c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
@@ -1735,6 +1749,10 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans,
btree_trans_sort_paths(trans);
+ if (intent)
+ locks_want = max(locks_want, level + 1);
+ locks_want = min(locks_want, BTREE_MAX_DEPTH);
+
trans_for_each_path_inorder(trans, path, iter) {
if (__btree_path_cmp(path,
btree_id,
@@ -1749,7 +1767,8 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans,
if (path_pos &&
trans->paths[path_pos].cached == cached &&
trans->paths[path_pos].btree_id == btree_id &&
- trans->paths[path_pos].level == level) {
+ trans->paths[path_pos].level == level &&
+ bch2_btree_path_upgrade_norestart(trans, trans->paths + path_pos, locks_want)) {
trace_btree_path_get(trans, trans->paths + path_pos, &pos);
__btree_path_get(trans, trans->paths + path_pos, intent);
@@ -1781,9 +1800,6 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans,
if (!(flags & BTREE_ITER_nopreserve))
path->preserve = true;
- if (path->intent_ref)
- locks_want = max(locks_want, level + 1);
-
/*
* If the path has locks_want greater than requested, we don't downgrade
* it here - on transaction restart because btree node split needs to
@@ -1792,10 +1808,6 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans,
* a successful transaction commit.
*/
- locks_want = min(locks_want, BTREE_MAX_DEPTH);
- if (locks_want > path->locks_want)
- bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL);
-
return path_idx;
}
@@ -1967,17 +1979,24 @@ struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_
/* got to end? */
if (!btree_path_node(path, path->level + 1)) {
+ path->should_be_locked = false;
btree_path_set_level_up(trans, path);
return NULL;
}
+ /*
+ * We don't correctly handle nodes with extra intent locks here:
+ * downgrade so we don't violate locking invariants
+ */
+ bch2_btree_path_downgrade(trans, path);
+
if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
+ trace_and_count(trans->c, trans_restart_relock_next_node, trans, _THIS_IP_, path);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
__bch2_btree_path_unlock(trans, path);
path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
path->l[path->level + 1].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
- btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
- trace_and_count(trans->c, trans_restart_relock_next_node, trans, _THIS_IP_, path);
- ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
+ btree_path_set_dirty(trans, path, BTREE_ITER_NEED_TRAVERSE);
goto err;
}
@@ -2338,8 +2357,7 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree
}
if (iter->update_path) {
- bch2_path_put_nokeep(trans, iter->update_path,
- iter->flags & BTREE_ITER_intent);
+ bch2_path_put(trans, iter->update_path, iter->flags & BTREE_ITER_intent);
iter->update_path = 0;
}
@@ -2368,8 +2386,8 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree
if (iter->update_path &&
!bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) {
- bch2_path_put_nokeep(trans, iter->update_path,
- iter->flags & BTREE_ITER_intent);
+ bch2_path_put(trans, iter->update_path,
+ iter->flags & BTREE_ITER_intent);
iter->update_path = 0;
}
@@ -2628,7 +2646,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
* the last possible snapshot overwrite, return
* it:
*/
- bch2_path_put_nokeep(trans, iter->path,
+ bch2_path_put(trans, iter->path,
iter->flags & BTREE_ITER_intent);
iter->path = saved_path;
saved_path = 0;
@@ -2658,8 +2676,8 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
* our previous saved candidate:
*/
if (saved_path) {
- bch2_path_put_nokeep(trans, saved_path,
- iter->flags & BTREE_ITER_intent);
+ bch2_path_put(trans, saved_path,
+ iter->flags & BTREE_ITER_intent);
saved_path = 0;
}
@@ -2702,7 +2720,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
iter->pos.snapshot = iter->snapshot;
out_no_locked:
if (saved_path)
- bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_intent);
+ bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_intent);
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(trans, iter);
@@ -2743,7 +2761,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
ret = trans_maybe_inject_restart(trans, _RET_IP_);
if (unlikely(ret)) {
k = bkey_s_c_err(ret);
- goto out_no_locked;
+ goto out;
}
/* extents can't span inode numbers: */
@@ -2763,13 +2781,15 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
if (unlikely(ret)) {
k = bkey_s_c_err(ret);
- goto out_no_locked;
+ goto out;
}
struct btree_path *path = btree_iter_path(trans, iter);
if (unlikely(!btree_path_node(path, path->level)))
return bkey_s_c_null;
+ btree_path_set_should_be_locked(trans, path);
+
if ((iter->flags & BTREE_ITER_cached) ||
!(iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots))) {
k = bkey_s_c_null;
@@ -2790,12 +2810,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
if (!bkey_err(k))
iter->k = *k.k;
/* We're not returning a key from iter->path: */
- goto out_no_locked;
+ goto out;
}
- k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k);
+ k = bch2_btree_path_peek_slot(btree_iter_path(trans, iter), &iter->k);
if (unlikely(!k.k))
- goto out_no_locked;
+ goto out;
if (unlikely(k.k->type == KEY_TYPE_whiteout &&
(iter->flags & BTREE_ITER_filter_snapshots) &&
@@ -2833,7 +2853,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
}
if (unlikely(bkey_err(k)))
- goto out_no_locked;
+ goto out;
next = k.k ? bkey_start_pos(k.k) : POS_MAX;
@@ -2855,8 +2875,6 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
}
}
out:
- btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter));
-out_no_locked:
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(trans, iter);
ret = bch2_btree_iter_verify_ret(trans, iter, k);
@@ -2923,7 +2941,7 @@ static void btree_trans_verify_sorted(struct btree_trans *trans)
struct btree_path *path, *prev = NULL;
struct trans_for_each_path_inorder_iter iter;
- if (!bch2_debug_check_iterators)
+ if (!static_branch_unlikely(&bch2_debug_check_iterators))
return;
trans_for_each_path_inorder(trans, path, iter) {
@@ -3025,7 +3043,7 @@ static inline void btree_path_list_add(struct btree_trans *trans,
void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
{
if (iter->update_path)
- bch2_path_put_nokeep(trans, iter->update_path,
+ bch2_path_put(trans, iter->update_path,
iter->flags & BTREE_ITER_intent);
if (iter->path)
bch2_path_put(trans, iter->path,
@@ -3089,7 +3107,19 @@ void bch2_trans_copy_iter(struct btree_trans *trans,
dst->key_cache_path = 0;
}
-void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+void bch2_trans_kmalloc_trace_to_text(struct printbuf *out,
+ darray_trans_kmalloc_trace *trace)
+{
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 60);
+
+ darray_for_each(*trace, i)
+ prt_printf(out, "%pS\t%zu\n", (void *) i->ip, i->bytes);
+}
+#endif
+
+void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long ip)
{
struct bch_fs *c = trans->c;
unsigned new_top = trans->mem_top + size;
@@ -3099,14 +3129,35 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
void *new_mem;
void *p;
- WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
+ if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) {
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+ struct printbuf buf = PRINTBUF;
+ bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace);
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+#endif
+ }
ret = trans_maybe_inject_restart(trans, _RET_IP_);
if (ret)
return ERR_PTR(ret);
struct btree_transaction_stats *s = btree_trans_stats(trans);
- s->max_mem = max(s->max_mem, new_bytes);
+ if (new_bytes > s->max_mem) {
+ mutex_lock(&s->lock);
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+ darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr);
+ s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size,
+ trans->trans_kmalloc_trace.nr);
+
+ memcpy(s->trans_kmalloc_trace.data,
+ trans->trans_kmalloc_trace.data,
+ sizeof(s->trans_kmalloc_trace.data[0]) *
+ s->trans_kmalloc_trace.nr);
+#endif
+ s->max_mem = new_bytes;
+ mutex_unlock(&s->lock);
+ }
if (trans->used_mempool) {
if (trans->mem_bytes >= new_bytes)
@@ -3166,6 +3217,8 @@ out_new_mem:
BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
}
out_change_top:
+ bch2_trans_kmalloc_trace(trans, size, ip);
+
p = trans->mem + trans->mem_top;
trans->mem_top += size;
memset(p, 0, size);
@@ -3225,7 +3278,6 @@ u32 bch2_trans_begin(struct btree_trans *trans)
trans->restart_count++;
trans->mem_top = 0;
- trans->journal_entries = NULL;
trans_for_each_path(trans, path, i) {
path->should_be_locked = false;
@@ -3279,6 +3331,10 @@ u32 bch2_trans_begin(struct btree_trans *trans)
}
#endif
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+ trans->trans_kmalloc_trace.nr = 0;
+#endif
+
trans_set_locked(trans, false);
if (trans->restarted) {
@@ -3379,7 +3435,6 @@ got_trans:
}
trans->nr_paths_max = s->nr_max_paths;
- trans->journal_entries_size = s->journal_entries_size;
}
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
@@ -3391,28 +3446,44 @@ got_trans:
return trans;
}
-static void check_btree_paths_leaked(struct btree_trans *trans)
-{
#ifdef CONFIG_BCACHEFS_DEBUG
- struct bch_fs *c = trans->c;
+
+static bool btree_paths_leaked(struct btree_trans *trans)
+{
struct btree_path *path;
unsigned i;
trans_for_each_path(trans, path, i)
if (path->ref)
- goto leaked;
- return;
-leaked:
- bch_err(c, "btree paths leaked from %s!", trans->fn);
- trans_for_each_path(trans, path, i)
- if (path->ref)
- printk(KERN_ERR " btree %s %pS\n",
- bch2_btree_id_str(path->btree_id),
- (void *) path->ip_allocated);
- /* Be noisy about this: */
- bch2_fatal_error(c);
-#endif
+ return true;
+ return false;
+}
+
+static void check_btree_paths_leaked(struct btree_trans *trans)
+{
+ if (btree_paths_leaked(trans)) {
+ struct bch_fs *c = trans->c;
+ struct btree_path *path;
+ unsigned i;
+
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "btree paths leaked from %s!\n", trans->fn);
+ trans_for_each_path(trans, path, i)
+ if (path->ref)
+ prt_printf(&buf, "btree %s %pS\n",
+ bch2_btree_id_str(path->btree_id),
+ (void *) path->ip_allocated);
+
+ bch2_fs_emergency_read_only2(c, &buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ }
}
+#else
+static inline void check_btree_paths_leaked(struct btree_trans *trans) {}
+#endif
void bch2_trans_put(struct btree_trans *trans)
__releases(&c->btree_trans_barrier)
@@ -3448,6 +3519,9 @@ void bch2_trans_put(struct btree_trans *trans)
#ifdef CONFIG_BCACHEFS_DEBUG
darray_exit(&trans->last_restarted_trace);
#endif
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+ darray_exit(&trans->trans_kmalloc_trace);
+#endif
unsigned long *paths_allocated = trans->paths_allocated;
trans->paths_allocated = NULL;
@@ -3602,6 +3676,9 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
for (s = c->btree_transaction_stats;
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
s++) {
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+ darray_exit(&s->trans_kmalloc_trace);
+#endif
kfree(s->max_paths_text);
bch2_time_stats_exit(&s->lock_hold_times);
}
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 9d2cccf5d21a..2cabb5f0f484 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -46,9 +46,11 @@ static inline bool __btree_path_put(struct btree_trans *trans, struct btree_path
return --path->ref == 0;
}
-static inline void btree_path_set_dirty(struct btree_path *path,
+static inline void btree_path_set_dirty(struct btree_trans *trans,
+ struct btree_path *path,
enum btree_path_uptodate u)
{
+ BUG_ON(path->should_be_locked && trans->locked && !trans->restarted);
path->uptodate = max_t(unsigned, path->uptodate, u);
}
@@ -285,14 +287,23 @@ static inline int bch2_trans_mutex_lock(struct btree_trans *trans, struct mutex
: __bch2_trans_mutex_lock(trans, lock);
}
-#ifdef CONFIG_BCACHEFS_DEBUG
-void bch2_trans_verify_paths(struct btree_trans *);
-void bch2_assert_pos_locked(struct btree_trans *, enum btree_id, struct bpos);
-#else
-static inline void bch2_trans_verify_paths(struct btree_trans *trans) {}
-static inline void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
- struct bpos pos) {}
-#endif
+/* Debug: */
+
+void __bch2_trans_verify_paths(struct btree_trans *);
+void __bch2_assert_pos_locked(struct btree_trans *, enum btree_id, struct bpos);
+
+static inline void bch2_trans_verify_paths(struct btree_trans *trans)
+{
+ if (static_branch_unlikely(&bch2_debug_check_iterators))
+ __bch2_trans_verify_paths(trans);
+}
+
+static inline void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id btree,
+ struct bpos pos)
+{
+ if (static_branch_unlikely(&bch2_debug_check_iterators))
+ __bch2_assert_pos_locked(trans, btree, pos);
+}
void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
struct btree *, struct bkey_packed *);
@@ -543,43 +554,73 @@ void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btre
void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *);
-void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+void bch2_trans_kmalloc_trace_to_text(struct printbuf *,
+ darray_trans_kmalloc_trace *);
+#endif
+
+void *__bch2_trans_kmalloc(struct btree_trans *, size_t, unsigned long);
-/**
- * bch2_trans_kmalloc - allocate memory for use by the current transaction
- *
- * Must be called after bch2_trans_begin, which on second and further calls
- * frees all memory allocated in this transaction
- */
-static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
+static inline void bch2_trans_kmalloc_trace(struct btree_trans *trans, size_t size,
+ unsigned long ip)
+{
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+ darray_push(&trans->trans_kmalloc_trace,
+ ((struct trans_kmalloc_trace) { .ip = ip, .bytes = size }));
+#endif
+}
+
+static __always_inline void *bch2_trans_kmalloc_nomemzero_ip(struct btree_trans *trans, size_t size,
+ unsigned long ip)
{
size = roundup(size, 8);
+ bch2_trans_kmalloc_trace(trans, size, ip);
+
if (likely(trans->mem_top + size <= trans->mem_bytes)) {
void *p = trans->mem + trans->mem_top;
trans->mem_top += size;
- memset(p, 0, size);
return p;
} else {
- return __bch2_trans_kmalloc(trans, size);
+ return __bch2_trans_kmalloc(trans, size, ip);
}
}
-static inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
+static __always_inline void *bch2_trans_kmalloc_ip(struct btree_trans *trans, size_t size,
+ unsigned long ip)
{
- size = round_up(size, 8);
+ size = roundup(size, 8);
+
+ bch2_trans_kmalloc_trace(trans, size, ip);
if (likely(trans->mem_top + size <= trans->mem_bytes)) {
void *p = trans->mem + trans->mem_top;
trans->mem_top += size;
+ memset(p, 0, size);
return p;
} else {
- return __bch2_trans_kmalloc(trans, size);
+ return __bch2_trans_kmalloc(trans, size, ip);
}
}
+/**
+ * bch2_trans_kmalloc - allocate memory for use by the current transaction
+ *
+ * Must be called after bch2_trans_begin, which on second and further calls
+ * frees all memory allocated in this transaction
+ */
+static __always_inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
+{
+ return bch2_trans_kmalloc_ip(trans, size, _THIS_IP_);
+}
+
+static __always_inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
+{
+ return bch2_trans_kmalloc_nomemzero_ip(trans, size, _THIS_IP_);
+}
+
static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans,
struct btree_iter *iter,
unsigned btree_id, struct bpos pos,
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 2b186584a291..9da950e7eb7d 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -101,8 +101,8 @@ static void __bkey_cached_free(struct rcu_pending *pending, struct rcu_head *rcu
kmem_cache_free(bch2_key_cache, ck);
}
-static void bkey_cached_free(struct btree_key_cache *bc,
- struct bkey_cached *ck)
+static inline void bkey_cached_free_noassert(struct btree_key_cache *bc,
+ struct bkey_cached *ck)
{
kfree(ck->k);
ck->k = NULL;
@@ -116,6 +116,19 @@ static void bkey_cached_free(struct btree_key_cache *bc,
this_cpu_inc(*bc->nr_pending);
}
+static void bkey_cached_free(struct btree_trans *trans,
+ struct btree_key_cache *bc,
+ struct bkey_cached *ck)
+{
+ /*
+ * we'll hit strange issues in the SRCU code if we aren't holding an
+ * SRCU read lock...
+ */
+ EBUG_ON(!trans->srcu_held);
+
+ bkey_cached_free_noassert(bc, ck);
+}
+
static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s, gfp_t gfp)
{
gfp |= __GFP_ACCOUNT|__GFP_RECLAIMABLE;
@@ -281,7 +294,7 @@ static int btree_key_cache_create(struct btree_trans *trans,
ck_path->uptodate = BTREE_ITER_UPTODATE;
return 0;
err:
- bkey_cached_free(bc, ck);
+ bkey_cached_free(trans, bc, ck);
mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED);
return ret;
@@ -301,9 +314,11 @@ static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans
}
static noinline int btree_key_cache_fill(struct btree_trans *trans,
- struct btree_path *ck_path,
+ btree_path_idx_t ck_path_idx,
unsigned flags)
{
+ struct btree_path *ck_path = trans->paths + ck_path_idx;
+
if (flags & BTREE_ITER_cached_nofill) {
ck_path->l[0].b = NULL;
return 0;
@@ -325,6 +340,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
goto err;
/* Recheck after btree lookup, before allocating: */
+ ck_path = trans->paths + ck_path_idx;
ret = bch2_btree_key_cache_find(c, ck_path->btree_id, ck_path->pos) ? -EEXIST : 0;
if (unlikely(ret))
goto out;
@@ -344,10 +360,11 @@ err:
}
static inline int btree_path_traverse_cached_fast(struct btree_trans *trans,
- struct btree_path *path)
+ btree_path_idx_t path_idx)
{
struct bch_fs *c = trans->c;
struct bkey_cached *ck;
+ struct btree_path *path = trans->paths + path_idx;
retry:
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
if (!ck)
@@ -373,27 +390,32 @@ retry:
return 0;
}
-int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
+int bch2_btree_path_traverse_cached(struct btree_trans *trans,
+ btree_path_idx_t path_idx,
unsigned flags)
{
- EBUG_ON(path->level);
-
- path->l[1].b = NULL;
+ EBUG_ON(trans->paths[path_idx].level);
int ret;
do {
- ret = btree_path_traverse_cached_fast(trans, path);
+ ret = btree_path_traverse_cached_fast(trans, path_idx);
if (unlikely(ret == -ENOENT))
- ret = btree_key_cache_fill(trans, path, flags);
+ ret = btree_key_cache_fill(trans, path_idx, flags);
} while (ret == -EEXIST);
+ struct btree_path *path = trans->paths + path_idx;
+
if (unlikely(ret)) {
path->uptodate = BTREE_ITER_NEED_TRAVERSE;
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
btree_node_unlock(trans, path, 0);
path->l[0].b = ERR_PTR(ret);
}
+ } else {
+ BUG_ON(path->uptodate);
+ BUG_ON(!path->nodes_locked);
}
+
return ret;
}
@@ -502,7 +524,7 @@ evict:
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
if (bkey_cached_evict(&c->btree_key_cache, ck)) {
- bkey_cached_free(&c->btree_key_cache, ck);
+ bkey_cached_free(trans, &c->btree_key_cache, ck);
} else {
six_unlock_write(&ck->c.lock);
six_unlock_intent(&ck->c.lock);
@@ -616,7 +638,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
}
bkey_cached_evict(bc, ck);
- bkey_cached_free(bc, ck);
+ bkey_cached_free(trans, bc, ck);
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
@@ -624,10 +646,17 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
unsigned i;
trans_for_each_path(trans, path2, i)
if (path2->l[0].b == (void *) ck) {
+ /*
+ * It's safe to clear should_be_locked here because
+ * we're evicting from the key cache, and we still have
+ * the underlying btree locked: filling into the key
+ * cache would require taking a write lock on the btree
+ * node
+ */
+ path2->should_be_locked = false;
__bch2_btree_path_unlock(trans, path2);
path2->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_drop);
- path2->should_be_locked = false;
- btree_path_set_dirty(path2, BTREE_ITER_NEED_TRAVERSE);
+ btree_path_set_dirty(trans, path2, BTREE_ITER_NEED_TRAVERSE);
}
bch2_trans_verify_locks(trans);
@@ -684,7 +713,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
} else if (!bkey_cached_lock_for_evict(ck)) {
bc->skipped_lock_fail++;
} else if (bkey_cached_evict(bc, ck)) {
- bkey_cached_free(bc, ck);
+ bkey_cached_free_noassert(bc, ck);
bc->freed++;
freed++;
} else {
diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h
index 51d6289b8dee..82d8c72512a9 100644
--- a/fs/bcachefs/btree_key_cache.h
+++ b/fs/bcachefs/btree_key_cache.h
@@ -40,8 +40,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *,
struct bkey_cached *
bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);
-int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *,
- unsigned);
+int bch2_btree_path_traverse_cached(struct btree_trans *, btree_path_idx_t, unsigned);
bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned,
struct btree_insert_entry *);
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index 94eb2b73a843..2f2aed0c9916 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "btree_cache.h"
#include "btree_locking.h"
#include "btree_types.h"
@@ -236,7 +237,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle,
prt_newline(&buf);
}
- bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf);
+ bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
BUG();
}
@@ -450,13 +451,13 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
/* relock */
-static inline bool btree_path_get_locks(struct btree_trans *trans,
- struct btree_path *path,
- bool upgrade,
- struct get_locks_fail *f)
+static int btree_path_get_locks(struct btree_trans *trans,
+ struct btree_path *path,
+ bool upgrade,
+ struct get_locks_fail *f,
+ int restart_err)
{
unsigned l = path->level;
- int fail_idx = -1;
do {
if (!btree_path_node(path, l))
@@ -464,39 +465,49 @@ static inline bool btree_path_get_locks(struct btree_trans *trans,
if (!(upgrade
? bch2_btree_node_upgrade(trans, path, l)
- : bch2_btree_node_relock(trans, path, l))) {
- fail_idx = l;
-
- if (f) {
- f->l = l;
- f->b = path->l[l].b;
- }
- }
+ : bch2_btree_node_relock(trans, path, l)))
+ goto err;
l++;
} while (l < path->locks_want);
+ if (path->uptodate == BTREE_ITER_NEED_RELOCK)
+ path->uptodate = BTREE_ITER_UPTODATE;
+
+ return path->uptodate < BTREE_ITER_NEED_RELOCK ? 0 : -1;
+err:
+ if (f) {
+ f->l = l;
+ f->b = path->l[l].b;
+ }
+
+ /*
+ * Do transaction restart before unlocking, so we don't pop
+ * should_be_locked asserts
+ */
+ if (restart_err) {
+ btree_trans_restart(trans, restart_err);
+ } else if (path->should_be_locked && !trans->restarted) {
+ if (upgrade)
+ path->locks_want = l;
+ return -1;
+ }
+
+ __bch2_btree_path_unlock(trans, path);
+ btree_path_set_dirty(trans, path, BTREE_ITER_NEED_TRAVERSE);
+
/*
* When we fail to get a lock, we have to ensure that any child nodes
* can't be relocked so bch2_btree_path_traverse has to walk back up to
* the node that we failed to relock:
*/
- if (fail_idx >= 0) {
- __bch2_btree_path_unlock(trans, path);
- btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-
- do {
- path->l[fail_idx].b = upgrade
- ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
- : ERR_PTR(-BCH_ERR_no_btree_node_relock);
- --fail_idx;
- } while (fail_idx >= 0);
- }
-
- if (path->uptodate == BTREE_ITER_NEED_RELOCK)
- path->uptodate = BTREE_ITER_UPTODATE;
+ do {
+ path->l[l].b = upgrade
+ ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
+ : ERR_PTR(-BCH_ERR_no_btree_node_relock);
+ } while (l--);
- return path->uptodate < BTREE_ITER_NEED_RELOCK;
+ return -restart_err ?: -1;
}
bool __bch2_btree_node_relock(struct btree_trans *trans,
@@ -583,7 +594,7 @@ int bch2_btree_path_relock_intent(struct btree_trans *trans,
l++) {
if (!bch2_btree_node_relock(trans, path, l)) {
__bch2_btree_path_unlock(trans, path);
- btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ btree_path_set_dirty(trans, path, BTREE_ITER_NEED_TRAVERSE);
trace_and_count(trans->c, trans_restart_relock_path_intent, trans, _RET_IP_, path);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent);
}
@@ -595,9 +606,7 @@ int bch2_btree_path_relock_intent(struct btree_trans *trans,
__flatten
bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path)
{
- struct get_locks_fail f;
-
- bool ret = btree_path_get_locks(trans, path, false, &f);
+ bool ret = !btree_path_get_locks(trans, path, false, NULL, 0);
bch2_trans_verify_locks(trans);
return ret;
}
@@ -613,27 +622,37 @@ int __bch2_btree_path_relock(struct btree_trans *trans,
return 0;
}
-bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
- struct btree_path *path,
- unsigned new_locks_want,
- struct get_locks_fail *f)
+bool __bch2_btree_path_upgrade_norestart(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned new_locks_want)
{
- EBUG_ON(path->locks_want >= new_locks_want);
-
path->locks_want = new_locks_want;
- bool ret = btree_path_get_locks(trans, path, true, f);
- bch2_trans_verify_locks(trans);
+ /*
+ * If we need it locked, we can't touch it. Otherwise, we can return
+ * success - bch2_path_get() will use this path, and it'll just be
+ * retraversed:
+ */
+ bool ret = !btree_path_get_locks(trans, path, true, NULL, 0) ||
+ !path->should_be_locked;
+
+ bch2_btree_path_verify_locks(trans, path);
return ret;
}
-bool __bch2_btree_path_upgrade(struct btree_trans *trans,
- struct btree_path *path,
- unsigned new_locks_want,
- struct get_locks_fail *f)
+int __bch2_btree_path_upgrade(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned new_locks_want)
{
- bool ret = bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f);
- if (ret)
+ unsigned old_locks = path->nodes_locked;
+ unsigned old_locks_want = path->locks_want;
+
+ path->locks_want = max_t(unsigned, path->locks_want, new_locks_want);
+
+ struct get_locks_fail f = {};
+ int ret = btree_path_get_locks(trans, path, true, &f,
+ BCH_ERR_transaction_restart_upgrade);
+ if (!ret)
goto out;
/*
@@ -665,9 +684,30 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
linked->btree_id == path->btree_id &&
linked->locks_want < new_locks_want) {
linked->locks_want = new_locks_want;
- btree_path_get_locks(trans, linked, true, NULL);
+ btree_path_get_locks(trans, linked, true, NULL, 0);
}
}
+
+ count_event(trans->c, trans_restart_upgrade);
+ if (trace_trans_restart_upgrade_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "%s %pS\n", trans->fn, (void *) _RET_IP_);
+ prt_printf(&buf, "btree %s pos\n", bch2_btree_id_str(path->btree_id));
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_printf(&buf, "locks want %u -> %u level %u\n",
+ old_locks_want, new_locks_want, f.l);
+ prt_printf(&buf, "nodes_locked %x -> %x\n",
+ old_locks, path->nodes_locked);
+ prt_printf(&buf, "node %s ", IS_ERR(f.b) ? bch2_err_str(PTR_ERR(f.b)) :
+ !f.b ? "(null)" : "(node)");
+ prt_printf(&buf, "path seq %u node seq %u\n",
+ IS_ERR_OR_NULL(f.b) ? 0 : f.b->c.lock.seq,
+ path->l[f.l].lock_seq);
+
+ trace_trans_restart_upgrade(trans->c, buf.buf);
+ printbuf_exit(&buf);
+ }
out:
bch2_trans_verify_locks(trans);
return ret;
@@ -699,7 +739,7 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
}
}
- bch2_btree_path_verify_locks(path);
+ bch2_btree_path_verify_locks(trans, path);
trace_path_downgrade(trans, _RET_IP_, path, old_locks_want);
}
@@ -728,7 +768,7 @@ static inline void __bch2_trans_unlock(struct btree_trans *trans)
__bch2_btree_path_unlock(trans, path);
}
-static noinline __cold int bch2_trans_relock_fail(struct btree_trans *trans, struct btree_path *path,
+static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, struct btree_path *path,
struct get_locks_fail *f, bool trace)
{
if (!trace)
@@ -738,7 +778,9 @@ static noinline __cold int bch2_trans_relock_fail(struct btree_trans *trans, str
struct printbuf buf = PRINTBUF;
bch2_bpos_to_text(&buf, path->pos);
- prt_printf(&buf, " l=%u seq=%u node seq=", f->l, path->l[f->l].lock_seq);
+ prt_printf(&buf, " %s l=%u seq=%u node seq=",
+ bch2_btree_id_str(path->btree_id),
+ f->l, path->l[f->l].lock_seq);
if (IS_ERR_OR_NULL(f->b)) {
prt_str(&buf, bch2_err_str(PTR_ERR(f->b)));
} else {
@@ -760,7 +802,6 @@ static noinline __cold int bch2_trans_relock_fail(struct btree_trans *trans, str
out:
__bch2_trans_unlock(trans);
bch2_trans_verify_locks(trans);
- return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
}
static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
@@ -777,10 +818,14 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
trans_for_each_path(trans, path, i) {
struct get_locks_fail f;
+ int ret;
if (path->should_be_locked &&
- !btree_path_get_locks(trans, path, false, &f))
- return bch2_trans_relock_fail(trans, path, &f, trace);
+ (ret = btree_path_get_locks(trans, path, false, &f,
+ BCH_ERR_transaction_restart_relock))) {
+ bch2_trans_relock_fail(trans, path, &f, trace);
+ return ret;
+ }
}
trans_set_locked(trans, true);
@@ -799,18 +844,11 @@ int bch2_trans_relock_notrace(struct btree_trans *trans)
return __bch2_trans_relock(trans, false);
}
-void bch2_trans_unlock_noassert(struct btree_trans *trans)
+void bch2_trans_unlock(struct btree_trans *trans)
{
- __bch2_trans_unlock(trans);
-
trans_set_unlocked(trans);
-}
-void bch2_trans_unlock(struct btree_trans *trans)
-{
__bch2_trans_unlock(trans);
-
- trans_set_unlocked(trans);
}
void bch2_trans_unlock_long(struct btree_trans *trans)
@@ -842,32 +880,28 @@ int __bch2_trans_mutex_lock(struct btree_trans *trans,
/* Debug */
-#ifdef CONFIG_BCACHEFS_DEBUG
-
-void bch2_btree_path_verify_locks(struct btree_path *path)
+void __bch2_btree_path_verify_locks(struct btree_trans *trans, struct btree_path *path)
{
- /*
- * A path may be uptodate and yet have nothing locked if and only if
- * there is no node at path->level, which generally means we were
- * iterating over all nodes and got to the end of the btree
- */
- BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
- btree_path_node(path, path->level) &&
- !path->nodes_locked);
+ if (!path->nodes_locked && btree_path_node(path, path->level)) {
+ /*
+ * A path may be uptodate and yet have nothing locked if and only if
+ * there is no node at path->level, which generally means we were
+ * iterating over all nodes and got to the end of the btree
+ */
+ BUG_ON(path->uptodate == BTREE_ITER_UPTODATE);
+ BUG_ON(path->should_be_locked && trans->locked && !trans->restarted);
+ }
if (!path->nodes_locked)
return;
for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++) {
int want = btree_lock_want(path, l);
- int have = btree_node_locked_type(path, l);
+ int have = btree_node_locked_type_nowrite(path, l);
BUG_ON(!is_btree_node(path, l) && have != BTREE_NODE_UNLOCKED);
- BUG_ON(is_btree_node(path, l) &&
- (want == BTREE_NODE_UNLOCKED ||
- have != BTREE_NODE_WRITE_LOCKED) &&
- want != have);
+ BUG_ON(is_btree_node(path, l) && want != have);
BUG_ON(btree_node_locked(path, l) &&
path->l[l].lock_seq != six_lock_seq(&path->l[l].b->c.lock));
@@ -885,7 +919,7 @@ static bool bch2_trans_locked(struct btree_trans *trans)
return false;
}
-void bch2_trans_verify_locks(struct btree_trans *trans)
+void __bch2_trans_verify_locks(struct btree_trans *trans)
{
if (!trans->locked) {
BUG_ON(bch2_trans_locked(trans));
@@ -896,7 +930,5 @@ void bch2_trans_verify_locks(struct btree_trans *trans)
unsigned i;
trans_for_each_path(trans, path, i)
- bch2_btree_path_verify_locks(path);
+ __bch2_btree_path_verify_locks(trans, path);
}
-
-#endif
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index b33ab7af8440..9adca77e2580 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -15,7 +15,6 @@
void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags, gfp_t gfp);
-void bch2_trans_unlock_noassert(struct btree_trans *);
void bch2_trans_unlock_write(struct btree_trans *);
static inline bool is_btree_node(struct btree_path *path, unsigned l)
@@ -44,6 +43,15 @@ static inline int btree_node_locked_type(struct btree_path *path,
return BTREE_NODE_UNLOCKED + ((path->nodes_locked >> (level << 1)) & 3);
}
+static inline int btree_node_locked_type_nowrite(struct btree_path *path,
+ unsigned level)
+{
+ int have = btree_node_locked_type(path, level);
+ return have == BTREE_NODE_WRITE_LOCKED
+ ? BTREE_NODE_INTENT_LOCKED
+ : have;
+}
+
static inline bool btree_node_write_locked(struct btree_path *path, unsigned l)
{
return btree_node_locked_type(path, l) == BTREE_NODE_WRITE_LOCKED;
@@ -152,7 +160,7 @@ static inline int btree_path_highest_level_locked(struct btree_path *path)
static inline void __bch2_btree_path_unlock(struct btree_trans *trans,
struct btree_path *path)
{
- btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK);
+ btree_path_set_dirty(trans, path, BTREE_ITER_NEED_RELOCK);
while (path->nodes_locked)
btree_node_unlock(trans, path, btree_path_lowest_level_locked(path));
@@ -367,8 +375,8 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
struct btree_path *path, unsigned level)
{
EBUG_ON(btree_node_locked(path, level) &&
- !btree_node_write_locked(path, level) &&
- btree_node_locked_type(path, level) != __btree_lock_want(path, level));
+ btree_node_locked_type_nowrite(path, level) !=
+ __btree_lock_want(path, level));
return likely(btree_node_locked(path, level)) ||
(!IS_ERR_OR_NULL(path->l[level].b) &&
@@ -377,31 +385,29 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
/* upgrade */
-bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
- struct btree_path *, unsigned,
- struct get_locks_fail *);
+bool __bch2_btree_path_upgrade_norestart(struct btree_trans *, struct btree_path *, unsigned);
-bool __bch2_btree_path_upgrade(struct btree_trans *,
- struct btree_path *, unsigned,
- struct get_locks_fail *);
+static inline bool bch2_btree_path_upgrade_norestart(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned new_locks_want)
+{
+ return new_locks_want > path->locks_want
+ ? __bch2_btree_path_upgrade_norestart(trans, path, new_locks_want)
+ : true;
+}
+
+int __bch2_btree_path_upgrade(struct btree_trans *,
+ struct btree_path *, unsigned);
static inline int bch2_btree_path_upgrade(struct btree_trans *trans,
struct btree_path *path,
unsigned new_locks_want)
{
- struct get_locks_fail f = {};
- unsigned old_locks_want = path->locks_want;
-
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
- if (path->locks_want < new_locks_want
- ? __bch2_btree_path_upgrade(trans, path, new_locks_want, &f)
- : path->nodes_locked)
- return 0;
-
- trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path,
- old_locks_want, new_locks_want, &f);
- return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
+ return likely(path->locks_want >= new_locks_want && path->nodes_locked)
+ ? 0
+ : __bch2_btree_path_upgrade(trans, path, new_locks_want);
}
/* misc: */
@@ -427,7 +433,7 @@ static inline void btree_path_set_level_up(struct btree_trans *trans,
struct btree_path *path)
{
__btree_path_set_level_up(trans, path, path->level++);
- btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ btree_path_set_dirty(trans, path, BTREE_ITER_NEED_TRAVERSE);
}
/* debug */
@@ -439,12 +445,20 @@ struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *,
int bch2_check_for_deadlock(struct btree_trans *, struct printbuf *);
-#ifdef CONFIG_BCACHEFS_DEBUG
-void bch2_btree_path_verify_locks(struct btree_path *);
-void bch2_trans_verify_locks(struct btree_trans *);
-#else
-static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
-static inline void bch2_trans_verify_locks(struct btree_trans *trans) {}
-#endif
+void __bch2_btree_path_verify_locks(struct btree_trans *, struct btree_path *);
+void __bch2_trans_verify_locks(struct btree_trans *);
+
+static inline void bch2_btree_path_verify_locks(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ if (static_branch_unlikely(&bch2_debug_check_btree_locking))
+ __bch2_btree_path_verify_locks(trans, path);
+}
+
+static inline void bch2_trans_verify_locks(struct btree_trans *trans)
+{
+ if (static_branch_unlikely(&bch2_debug_check_btree_locking))
+ __bch2_trans_verify_locks(trans);
+}
#endif /* _BCACHEFS_BTREE_LOCKING_H */
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 86acf037590c..5a97a6b8a757 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -271,7 +271,7 @@ static int read_btree_nodes_worker(void *p)
err:
bio_put(bio);
free_page((unsigned long) buf);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
closure_put(w->cl);
kfree(w);
return 0;
@@ -285,13 +285,13 @@ static int read_btree_nodes(struct find_btree_nodes *f)
closure_init_stack(&cl);
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_btree_node_scan) {
if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
continue;
struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
if (!w) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
ret = -ENOMEM;
goto err;
}
@@ -303,14 +303,14 @@ static int read_btree_nodes(struct find_btree_nodes *f)
struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
ret = PTR_ERR_OR_ZERO(t);
if (ret) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
kfree(w);
bch_err_msg(c, ret, "starting kthread");
break;
}
closure_get(&cl);
- percpu_ref_get(&ca->io_ref[READ]);
+ enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
wake_up_process(t);
}
err:
@@ -395,7 +395,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
printbuf_reset(&buf);
prt_printf(&buf, "%s: nodes found:\n", __func__);
found_btree_nodes_to_text(&buf, c, f->nodes);
- bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ bch2_print_str(c, KERN_INFO, buf.buf);
}
sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
@@ -424,7 +424,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
printbuf_reset(&buf);
prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__);
found_btree_nodes_to_text(&buf, c, f->nodes);
- bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ bch2_print_str(c, KERN_INFO, buf.buf);
}
swap(nodes_heap, f->nodes);
@@ -470,7 +470,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
printbuf_reset(&buf);
prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
found_btree_nodes_to_text(&buf, c, f->nodes);
- bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ bch2_print_str(c, KERN_INFO, buf.buf);
} else {
bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr);
}
@@ -541,7 +541,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
struct find_btree_nodes *f = &c->found_btree_nodes;
- int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ int ret = bch2_run_print_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
if (ret)
return ret;
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 7d7e52ddde02..1c03c965d836 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -11,6 +11,7 @@
#include "btree_write_buffer.h"
#include "buckets.h"
#include "disk_accounting.h"
+#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "journal.h"
@@ -20,6 +21,7 @@
#include "snapshot.h"
#include <linux/prefetch.h>
+#include <linux/string_helpers.h>
static const char * const trans_commit_flags_strs[] = {
#define x(n, ...) #n,
@@ -366,7 +368,8 @@ static noinline void journal_transaction_name(struct btree_trans *trans)
struct jset_entry_log *l =
container_of(entry, struct jset_entry_log, entry);
- strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64));
+ memcpy_and_pad(l->d, JSET_ENTRY_LOG_U64s * sizeof(u64),
+ trans->fn, strlen(trans->fn), 0);
}
static inline int btree_key_can_insert(struct btree_trans *trans,
@@ -644,10 +647,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
!(flags & BCH_TRANS_COMMIT_no_journal_res)) {
- if (bch2_journal_seq_verify)
+ if (static_branch_unlikely(&bch2_journal_seq_verify))
trans_for_each_update(trans, i)
i->k->k.bversion.lo = trans->journal_res.seq;
- else if (bch2_inject_invalid_keys)
+ else if (static_branch_unlikely(&bch2_inject_invalid_keys))
trans_for_each_update(trans, i)
i->k->k.bversion = MAX_VERSION;
}
@@ -660,18 +663,17 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
h = h->next;
}
- struct jset_entry *entry = trans->journal_entries;
+ struct bkey_i *accounting;
percpu_down_read(&c->mark_lock);
- for (entry = trans->journal_entries;
- entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
- entry = vstruct_next(entry))
- if (entry->type == BCH_JSET_ENTRY_write_buffer_keys &&
- entry->start->k.type == KEY_TYPE_accounting) {
- ret = bch2_accounting_trans_commit_hook(trans, bkey_i_to_accounting(entry->start), flags);
- if (ret)
- goto revert_fs_usage;
- }
+ for (accounting = btree_trans_subbuf_base(trans, &trans->accounting);
+ accounting != btree_trans_subbuf_top(trans, &trans->accounting);
+ accounting = bkey_next(accounting)) {
+ ret = bch2_accounting_trans_commit_hook(trans,
+ bkey_i_to_accounting(accounting), flags);
+ if (ret)
+ goto revert_fs_usage;
+ }
percpu_up_read(&c->mark_lock);
/* XXX: we only want to run this if deltas are nonzero */
@@ -695,8 +697,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
validate_context.flags = BCH_VALIDATE_write|BCH_VALIDATE_commit;
- for (struct jset_entry *i = trans->journal_entries;
- i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
+ for (struct jset_entry *i = btree_trans_journal_entries_start(trans);
+ i != btree_trans_journal_entries_top(trans);
i = vstruct_next(i)) {
ret = bch2_journal_entry_validate(c, NULL, i,
bcachefs_metadata_version_current,
@@ -751,11 +753,18 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
}
memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
- trans->journal_entries,
- trans->journal_entries_u64s);
+ btree_trans_journal_entries_start(trans),
+ trans->journal_entries.u64s);
+
+ trans->journal_res.offset += trans->journal_entries.u64s;
+ trans->journal_res.u64s -= trans->journal_entries.u64s;
- trans->journal_res.offset += trans->journal_entries_u64s;
- trans->journal_res.u64s -= trans->journal_entries_u64s;
+ memcpy_u64s_small(bch2_journal_add_entry(j, &trans->journal_res,
+ BCH_JSET_ENTRY_write_buffer_keys,
+ BTREE_ID_accounting, 0,
+ trans->accounting.u64s)->_data,
+ btree_trans_subbuf_base(trans, &trans->accounting),
+ trans->accounting.u64s);
if (trans->journal_seq)
*trans->journal_seq = trans->journal_res.seq;
@@ -777,13 +786,10 @@ fatal_err:
bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret));
percpu_down_read(&c->mark_lock);
revert_fs_usage:
- for (struct jset_entry *entry2 = trans->journal_entries;
- entry2 != entry;
- entry2 = vstruct_next(entry2))
- if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys &&
- entry2->start->k.type == KEY_TYPE_accounting)
- bch2_accounting_trans_commit_revert(trans,
- bkey_i_to_accounting(entry2->start), flags);
+ for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
+ i != accounting;
+ i = bkey_next(i))
+ bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags);
percpu_up_read(&c->mark_lock);
return ret;
}
@@ -958,8 +964,8 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
return ret;
}
- for (struct jset_entry *i = trans->journal_entries;
- i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
+ for (struct jset_entry *i = btree_trans_journal_entries_start(trans);
+ i != btree_trans_journal_entries_top(trans);
i = vstruct_next(i))
if (i->type == BCH_JSET_ENTRY_btree_keys ||
i->type == BCH_JSET_ENTRY_write_buffer_keys) {
@@ -968,6 +974,14 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
return ret;
}
+ for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
+ i != btree_trans_subbuf_top(trans, &trans->accounting);
+ i = bkey_next(i)) {
+ int ret = bch2_journal_key_insert(c, BTREE_ID_accounting, 0, i);
+ if (ret)
+ return ret;
+ }
+
return 0;
}
@@ -984,7 +998,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
goto out_reset;
if (!trans->nr_updates &&
- !trans->journal_entries_u64s)
+ !trans->journal_entries.u64s &&
+ !trans->accounting.u64s)
goto out_reset;
ret = bch2_trans_commit_run_triggers(trans);
@@ -992,7 +1007,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
goto out_reset;
if (!(flags & BCH_TRANS_COMMIT_no_check_rw) &&
- unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) {
+ unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_trans))) {
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags)))
ret = do_bch2_trans_commit_to_journal_replay(trans);
else
@@ -1002,7 +1017,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
- trans->journal_u64s = trans->journal_entries_u64s;
+ trans->journal_u64s = trans->journal_entries.u64s + jset_u64s(trans->accounting.u64s);
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
if (trans->journal_transaction_names)
trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
@@ -1058,7 +1073,7 @@ retry:
trace_and_count(c, transaction_commit, trans, _RET_IP_);
out:
if (likely(!(flags & BCH_TRANS_COMMIT_no_check_rw)))
- bch2_write_ref_put(c, BCH_WRITE_REF_trans);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_trans);
out_reset:
if (!ret)
bch2_trans_downgrade(trans);
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 023c472dc9ee..9d641bf9d2a2 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -139,6 +139,7 @@ struct btree {
};
#define BCH_BTREE_CACHE_NOT_FREED_REASONS() \
+ x(cache_reserve) \
x(lock_intent) \
x(lock_write) \
x(dirty) \
@@ -257,9 +258,6 @@ struct btree_node_iter {
*
* BTREE_TRIGGER_insert - @new is entering the btree
* BTREE_TRIGGER_overwrite - @old is leaving the btree
- *
- * BTREE_TRIGGER_bucket_invalidate - signal from bucket invalidate path to alloc
- * trigger
*/
#define BTREE_TRIGGER_FLAGS() \
x(norun) \
@@ -269,8 +267,7 @@ struct btree_node_iter {
x(gc) \
x(insert) \
x(overwrite) \
- x(is_root) \
- x(bucket_invalidate)
+ x(is_root)
enum {
#define x(n) BTREE_ITER_FLAG_BIT_##n,
@@ -477,6 +474,18 @@ struct btree_trans_paths {
struct btree_path paths[];
};
+struct trans_kmalloc_trace {
+ unsigned long ip;
+ size_t bytes;
+};
+typedef DARRAY(struct trans_kmalloc_trace) darray_trans_kmalloc_trace;
+
+struct btree_trans_subbuf {
+ u16 base;
+ u16 u64s;
+ u16 size;;
+};
+
struct btree_trans {
struct bch_fs *c;
@@ -488,6 +497,9 @@ struct btree_trans {
void *mem;
unsigned mem_top;
unsigned mem_bytes;
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+ darray_trans_kmalloc_trace trans_kmalloc_trace;
+#endif
btree_path_idx_t nr_sorted;
btree_path_idx_t nr_paths;
@@ -528,9 +540,8 @@ struct btree_trans {
int srcu_idx;
/* update path: */
- u16 journal_entries_u64s;
- u16 journal_entries_size;
- struct jset_entry *journal_entries;
+ struct btree_trans_subbuf journal_entries;
+ struct btree_trans_subbuf accounting;
struct btree_trans_commit_hook *hooks;
struct journal_entry_pin *journal_pin;
@@ -647,13 +658,13 @@ static inline struct bset_tree *bset_tree_last(struct btree *b)
static inline void *
__btree_node_offset_to_ptr(const struct btree *b, u16 offset)
{
- return (void *) ((u64 *) b->data + 1 + offset);
+ return (void *) ((u64 *) b->data + offset);
}
static inline u16
__btree_node_ptr_to_offset(const struct btree *b, const void *p)
{
- u16 ret = (u64 *) p - 1 - (u64 *) b->data;
+ u16 ret = (u64 *) p - (u64 *) b->data;
EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p);
return ret;
diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c
index 1e6b7836cc01..5dac09c98026 100644
--- a/fs/bcachefs/btree_update.c
+++ b/fs/bcachefs/btree_update.c
@@ -14,6 +14,8 @@
#include "snapshot.h"
#include "trace.h"
+#include <linux/string_helpers.h>
+
static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
const struct btree_insert_entry *r)
{
@@ -509,8 +511,9 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
return 0;
}
-int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
- struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
+int __must_check bch2_trans_update_ip(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_i *k, enum btree_iter_update_trigger_flags flags,
+ unsigned long ip)
{
kmsan_check_memory(k, bkey_bytes(&k->k));
@@ -546,7 +549,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
path_idx = iter->key_cache_path;
}
- return bch2_trans_update_by_path(trans, path_idx, k, flags, _RET_IP_);
+ return bch2_trans_update_by_path(trans, path_idx, k, flags, ip);
}
int bch2_btree_insert_clone_trans(struct btree_trans *trans,
@@ -562,30 +565,29 @@ int bch2_btree_insert_clone_trans(struct btree_trans *trans,
return bch2_btree_insert_trans(trans, btree, n, 0);
}
-struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
+void *__bch2_trans_subbuf_alloc(struct btree_trans *trans,
+ struct btree_trans_subbuf *buf,
+ unsigned u64s)
{
- unsigned new_top = trans->journal_entries_u64s + u64s;
- unsigned old_size = trans->journal_entries_size;
-
- if (new_top > trans->journal_entries_size) {
- trans->journal_entries_size = roundup_pow_of_two(new_top);
+ unsigned new_top = buf->u64s + u64s;
+ unsigned old_size = buf->size;
- btree_trans_stats(trans)->journal_entries_size = trans->journal_entries_size;
- }
+ if (new_top > buf->size)
+ buf->size = roundup_pow_of_two(new_top);
- struct jset_entry *n =
- bch2_trans_kmalloc_nomemzero(trans,
- trans->journal_entries_size * sizeof(u64));
+ void *n = bch2_trans_kmalloc_nomemzero(trans, buf->size * sizeof(u64));
if (IS_ERR(n))
- return ERR_CAST(n);
+ return n;
- if (trans->journal_entries)
- memcpy(n, trans->journal_entries, old_size * sizeof(u64));
- trans->journal_entries = n;
+ if (buf->u64s)
+ memcpy(n,
+ btree_trans_subbuf_base(trans, buf),
+ old_size * sizeof(u64));
+ buf->base = (u64 *) n - (u64 *) trans->mem;
- struct jset_entry *e = btree_trans_journal_entries_top(trans);
- trans->journal_entries_u64s = new_top;
- return e;
+ void *p = btree_trans_subbuf_top(trans, buf);
+ buf->u64s = new_top;
+ return p;
}
int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
@@ -826,26 +828,35 @@ int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
return bch2_trans_update_buffered(trans, btree, &k);
}
-int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf)
+static int __bch2_trans_log_str(struct btree_trans *trans, const char *str, unsigned len)
{
- unsigned u64s = DIV_ROUND_UP(buf->pos, sizeof(u64));
- prt_chars(buf, '\0', u64s * sizeof(u64) - buf->pos);
-
- int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
- if (ret)
- return ret;
+ unsigned u64s = DIV_ROUND_UP(len, sizeof(u64));
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s));
- ret = PTR_ERR_OR_ZERO(e);
+ int ret = PTR_ERR_OR_ZERO(e);
if (ret)
return ret;
struct jset_entry_log *l = container_of(e, struct jset_entry_log, entry);
journal_entry_init(e, BCH_JSET_ENTRY_log, 0, 1, u64s);
- memcpy(l->d, buf->buf, buf->pos);
+ memcpy_and_pad(l->d, u64s * sizeof(u64), str, len, 0);
return 0;
}
+int bch2_trans_log_str(struct btree_trans *trans, const char *str)
+{
+ return __bch2_trans_log_str(trans, str, strlen(str));
+}
+
+int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf)
+{
+ int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
+ if (ret)
+ return ret;
+
+ return __bch2_trans_log_str(trans, buf->buf, buf->pos);
+}
+
int bch2_trans_log_bkey(struct btree_trans *trans, enum btree_id btree,
unsigned level, struct bkey_i *k)
{
@@ -868,7 +879,6 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
prt_vprintf(&buf, fmt, args);
unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
- prt_chars(&buf, '\0', u64s * sizeof(u64) - buf.pos);
int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
if (ret)
@@ -881,7 +891,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries);
journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s);
- memcpy(l->d, buf.buf, buf.pos);
+ memcpy_and_pad(l->d, u64s * sizeof(u64), buf.buf, buf.pos, 0);
c->journal.early_journal_entries.nr += jset_u64s(u64s);
} else {
ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags,
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 568e56c91190..f907eaa8b185 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -102,26 +102,60 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *
int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *,
enum btree_id, struct bpos);
-int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
- struct bkey_i *, enum btree_iter_update_trigger_flags);
+int __must_check bch2_trans_update_ip(struct btree_trans *, struct btree_iter *,
+ struct bkey_i *, enum btree_iter_update_trigger_flags,
+ unsigned long);
-struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned);
+static inline int __must_check
+bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
+{
+ return bch2_trans_update_ip(trans, iter, k, flags, _THIS_IP_);
+}
+
+static inline void *btree_trans_subbuf_base(struct btree_trans *trans,
+ struct btree_trans_subbuf *buf)
+{
+ return (u64 *) trans->mem + buf->base;
+}
+
+static inline void *btree_trans_subbuf_top(struct btree_trans *trans,
+ struct btree_trans_subbuf *buf)
+{
+ return (u64 *) trans->mem + buf->base + buf->u64s;
+}
+
+void *__bch2_trans_subbuf_alloc(struct btree_trans *,
+ struct btree_trans_subbuf *,
+ unsigned);
+
+static inline void *
+bch2_trans_subbuf_alloc(struct btree_trans *trans,
+ struct btree_trans_subbuf *buf,
+ unsigned u64s)
+{
+ if (buf->u64s + u64s > buf->size)
+ return __bch2_trans_subbuf_alloc(trans, buf, u64s);
+
+ void *p = btree_trans_subbuf_top(trans, buf);
+ buf->u64s += u64s;
+ return p;
+}
+
+static inline struct jset_entry *btree_trans_journal_entries_start(struct btree_trans *trans)
+{
+ return btree_trans_subbuf_base(trans, &trans->journal_entries);
+}
static inline struct jset_entry *btree_trans_journal_entries_top(struct btree_trans *trans)
{
- return (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
+ return btree_trans_subbuf_top(trans, &trans->journal_entries);
}
static inline struct jset_entry *
bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
{
- if (!trans->journal_entries ||
- trans->journal_entries_u64s + u64s > trans->journal_entries_size)
- return __bch2_trans_jset_entry_alloc(trans, u64s);
-
- struct jset_entry *e = btree_trans_journal_entries_top(trans);
- trans->journal_entries_u64s += u64s;
- return e;
+ return bch2_trans_subbuf_alloc(trans, &trans->journal_entries, u64s);
}
int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
@@ -135,6 +169,8 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr
{
kmsan_check_memory(k, bkey_bytes(&k->k));
+ EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
+
if (unlikely(!btree_type_uses_write_buffer(btree))) {
int ret = bch2_btree_write_buffer_insert_err(trans, btree, k);
dump_stack();
@@ -169,6 +205,7 @@ void bch2_trans_commit_hook(struct btree_trans *,
struct btree_trans_commit_hook *);
int __bch2_trans_commit(struct btree_trans *, unsigned);
+int bch2_trans_log_str(struct btree_trans *, const char *);
int bch2_trans_log_msg(struct btree_trans *, struct printbuf *);
int bch2_trans_log_bkey(struct btree_trans *, enum btree_id, unsigned, struct bkey_i *);
@@ -217,12 +254,15 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans)
bch2_path_put(trans, i->path, true);
trans->nr_updates = 0;
- trans->journal_entries_u64s = 0;
+ trans->journal_entries.u64s = 0;
+ trans->journal_entries.size = 0;
+ trans->accounting.u64s = 0;
+ trans->accounting.size = 0;
trans->hooks = NULL;
trans->extra_disk_res = 0;
}
-static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
+static __always_inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
unsigned type, unsigned min_bytes)
{
unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k));
@@ -245,7 +285,7 @@ static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *t
return mut;
}
-static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
+static __always_inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
{
return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0);
}
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 00307356d7c8..74e65714fecd 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -14,6 +14,7 @@
#include "btree_locking.h"
#include "buckets.h"
#include "clock.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "io_write.h"
@@ -284,6 +285,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct disk_reservation *res,
struct closure *cl,
bool interior_node,
+ unsigned target,
unsigned flags)
{
struct bch_fs *c = trans->c;
@@ -317,6 +319,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
mutex_unlock(&c->btree_reserve_cache_lock);
retry:
ret = bch2_alloc_sectors_start_trans(trans,
+ target ?:
c->opts.metadata_target ?:
c->opts.foreground_target,
0,
@@ -325,7 +328,9 @@ retry:
res->nr_replicas,
min(res->nr_replicas,
c->opts.metadata_replicas_required),
- watermark, 0, cl, &wp);
+ watermark,
+ target ? BCH_WRITE_only_specified_devs : 0,
+ cl, &wp);
if (unlikely(ret))
goto err;
@@ -505,6 +510,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
static int bch2_btree_reserve_get(struct btree_trans *trans,
struct btree_update *as,
unsigned nr_nodes[2],
+ unsigned target,
unsigned flags,
struct closure *cl)
{
@@ -527,7 +533,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
while (p->nr < nr_nodes[interior]) {
b = __bch2_btree_node_alloc(trans, &as->disk_res, cl,
- interior, flags);
+ interior, target, flags);
if (IS_ERR(b)) {
ret = PTR_ERR(b);
goto err;
@@ -1116,7 +1122,8 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
static struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
- unsigned level_start, bool split, unsigned flags)
+ unsigned level_start, bool split,
+ unsigned target, unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_update *as;
@@ -1226,7 +1233,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
if (ret)
goto err;
- ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL);
+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, NULL);
if (bch2_err_matches(ret, ENOSPC) ||
bch2_err_matches(ret, ENOMEM)) {
struct closure cl;
@@ -1245,7 +1252,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
closure_init_stack(&cl);
do {
- ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, &cl);
bch2_trans_unlock(trans);
bch2_wait_on_allocator(c, &cl);
@@ -1806,10 +1813,10 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
__func__, b->c.level);
bch2_btree_update_to_text(&buf, as);
bch2_btree_path_to_text(&buf, trans, path_idx);
+ bch2_fs_emergency_read_only2(c, &buf);
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
- bch2_fs_emergency_read_only(c);
return -EIO;
}
@@ -1878,7 +1885,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
as = bch2_btree_update_start(trans, trans->paths + path,
trans->paths[path].level,
- true, flags);
+ true, 0, flags);
if (IS_ERR(as))
return PTR_ERR(as);
@@ -1948,7 +1955,8 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path,
return bch2_btree_split_leaf(trans, path, flags);
struct btree_update *as =
- bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags);
+ bch2_btree_update_start(trans, trans->paths + path, b->c.level,
+ true, 0, flags);
if (IS_ERR(as))
return PTR_ERR(as);
@@ -2077,7 +2085,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
parent = btree_node_parent(trans->paths + path, b);
as = bch2_btree_update_start(trans, trans->paths + path, level, false,
- BCH_TRANS_COMMIT_no_enospc|flags);
+ 0, BCH_TRANS_COMMIT_no_enospc|flags);
ret = PTR_ERR_OR_ZERO(as);
if (ret)
goto err;
@@ -2184,6 +2192,7 @@ err:
int bch2_btree_node_rewrite(struct btree_trans *trans,
struct btree_iter *iter,
struct btree *b,
+ unsigned target,
unsigned flags)
{
struct bch_fs *c = trans->c;
@@ -2196,7 +2205,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
struct btree_path *path = btree_iter_path(trans, iter);
parent = btree_node_parent(path, b);
- as = bch2_btree_update_start(trans, path, b->c.level, false, flags);
+ as = bch2_btree_update_start(trans, path, b->c.level,
+ false, target, flags);
ret = PTR_ERR_OR_ZERO(as);
if (ret)
goto out;
@@ -2261,7 +2271,7 @@ static int bch2_btree_node_rewrite_key(struct btree_trans *trans,
bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(k);
ret = found
- ? bch2_btree_node_rewrite(trans, &iter, b, flags)
+ ? bch2_btree_node_rewrite(trans, &iter, b, 0, flags)
: -ENOENT;
out:
bch2_trans_iter_exit(trans, &iter);
@@ -2270,7 +2280,9 @@ out:
int bch2_btree_node_rewrite_pos(struct btree_trans *trans,
enum btree_id btree, unsigned level,
- struct bpos pos, unsigned flags)
+ struct bpos pos,
+ unsigned target,
+ unsigned flags)
{
BUG_ON(!level);
@@ -2282,7 +2294,7 @@ int bch2_btree_node_rewrite_pos(struct btree_trans *trans,
if (ret)
goto err;
- ret = bch2_btree_node_rewrite(trans, &iter, b, flags);
+ ret = bch2_btree_node_rewrite(trans, &iter, b, target, flags);
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
@@ -2296,7 +2308,7 @@ int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans,
if (ret)
return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
- ret = bch2_btree_node_rewrite(trans, &iter, b, flags);
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0, flags);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -2330,7 +2342,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
closure_wake_up(&c->btree_node_rewrites_wait);
bch2_bkey_buf_exit(&a->key, c);
- bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_node_rewrite);
kfree(a);
}
@@ -2351,8 +2363,8 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
bool now = false, pending = false;
spin_lock(&c->btree_node_rewrites_lock);
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_journal_replay &&
- bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
+ if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) &&
+ enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) {
list_add(&a->list, &c->btree_node_rewrites);
now = true;
} else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) {
@@ -2391,7 +2403,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
if (!a)
break;
- bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
+ enumerated_ref_get(&c->writes, BCH_WRITE_REF_node_rewrite);
queue_work(c->btree_node_rewrite_worker, &a->work);
}
}
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index be71cd73b864..7fe793788a79 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -144,7 +144,7 @@ static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans,
EBUG_ON(!btree_node_locked(path, level));
- if (bch2_btree_node_merging_disabled)
+ if (static_branch_unlikely(&bch2_btree_node_merging_disabled))
return 0;
b = path->l[level].b;
@@ -168,10 +168,10 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
}
int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
- struct btree *, unsigned);
+ struct btree *, unsigned, unsigned);
int bch2_btree_node_rewrite_pos(struct btree_trans *,
enum btree_id, unsigned,
- struct bpos, unsigned);
+ struct bpos, unsigned, unsigned);
int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *,
struct btree *, unsigned);
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 0941fb2c026d..efb0c64d0aac 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -7,6 +7,7 @@
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "disk_accounting.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "journal.h"
@@ -181,6 +182,8 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
return wb_flush_one_slowpath(trans, iter, wb);
}
+ EBUG_ON(!bpos_eq(wb->k.k.p, path->pos));
+
bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
(*fast)++;
return 0;
@@ -629,11 +632,11 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_write_buffer))
return -BCH_ERR_erofs_no_writes;
int ret = bch2_btree_write_buffer_flush_nocheck_rw(trans);
- bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
return ret;
}
@@ -692,7 +695,7 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
} while (!ret && bch2_btree_write_buffer_should_flush(c));
mutex_unlock(&wb->flushing.lock);
- bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
}
static void wb_accounting_sort(struct btree_write_buffer *wb)
@@ -821,9 +824,9 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_
bch2_journal_pin_drop(&c->journal, &dst->wb->pin);
if (bch2_btree_write_buffer_should_flush(c) &&
- __bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer) &&
+ __enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_write_buffer) &&
!queue_work(system_unbound_wq, &c->btree_write_buffer.flush_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
if (dst->wb == &wb->flushing)
mutex_unlock(&wb->flushing.lock);
@@ -866,13 +869,18 @@ void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
darray_exit(&wb->inc.keys);
}
-int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
+void bch2_fs_btree_write_buffer_init_early(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
mutex_init(&wb->inc.lock);
mutex_init(&wb->flushing.lock);
INIT_WORK(&wb->flush_work, bch2_btree_write_buffer_flush_work);
+}
+
+int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
+{
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
/* Will be resized by journal as needed: */
unsigned initial_size = 1 << 16;
diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h
index d535cea28bde..05f56fd1eed0 100644
--- a/fs/bcachefs/btree_write_buffer.h
+++ b/fs/bcachefs/btree_write_buffer.h
@@ -101,6 +101,7 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_t
int bch2_btree_write_buffer_resize(struct bch_fs *, size_t);
void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
+void bch2_fs_btree_write_buffer_init_early(struct bch_fs *);
int bch2_fs_btree_write_buffer_init(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_H */
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 31fbc2716d8b..09eb5a543ae4 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -156,10 +156,14 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
g->gen_valid = true;
g->gen = p.ptr.gen;
} else {
+ /* this pointer will be dropped */
*do_update = true;
+ goto out;
}
}
+ /* g->gen_valid == true */
+
if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0,
trans, ptr_gen_newer_than_bucket_gen,
"bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
@@ -172,15 +176,13 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
if (!p.ptr.cached &&
(g->data_type != BCH_DATA_btree ||
data_type == BCH_DATA_btree)) {
- g->gen_valid = true;
- g->gen = p.ptr.gen;
- g->data_type = 0;
+ g->data_type = data_type;
g->stripe_sectors = 0;
g->dirty_sectors = 0;
g->cached_sectors = 0;
- } else {
- *do_update = true;
}
+
+ *do_update = true;
}
if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX,
@@ -217,9 +219,8 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
bch2_data_type_str(data_type),
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- if (data_type == BCH_DATA_btree) {
- g->gen_valid = true;
- g->gen = p.ptr.gen;
+ if (!p.ptr.cached &&
+ data_type == BCH_DATA_btree) {
g->data_type = data_type;
g->stripe_sectors = 0;
g->dirty_sectors = 0;
@@ -392,29 +393,24 @@ static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf
struct bkey_s_c k, bool insert, enum bch_sb_error_id id)
{
struct bch_fs *c = trans->c;
- bool repeat = false, print = true, suppress = false;
prt_printf(buf, "\nwhile marking ");
bch2_bkey_val_to_text(buf, c, k);
prt_newline(buf);
- __bch2_count_fsck_err(c, id, buf->buf, &repeat, &print, &suppress);
+ bool print = __bch2_count_fsck_err(c, id, buf);
- int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
+ int ret = bch2_run_explicit_recovery_pass(c, buf,
+ BCH_RECOVERY_PASS_check_allocations, 0);
if (insert) {
- print = true;
- suppress = false;
-
bch2_trans_updates_to_text(buf, trans);
__bch2_inconsistent_error(c, buf);
ret = -BCH_ERR_bucket_ref_update;
}
- if (suppress)
- prt_printf(buf, "Ratelimiting new instances of previous error\n");
- if (print)
- bch2_print_string_as_lines(KERN_ERR, buf->buf);
+ if (print || insert)
+ bch2_print_str(c, KERN_ERR, buf->buf);
return ret;
}
@@ -711,7 +707,7 @@ err:
(u64) p.ec.idx);
bch2_bkey_val_to_text(&buf, c, k);
__bch2_inconsistent_error(c, &buf);
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
return -BCH_ERR_trigger_stripe_pointer;
}
@@ -966,14 +962,23 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
return PTR_ERR(a);
if (a->v.data_type && type && a->v.data_type != type) {
- bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
- log_fsck_err(trans, bucket_metadata_type_mismatch,
- "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
- "while marking %s",
- iter.pos.inode, iter.pos.offset, a->v.gen,
- bch2_data_type_str(a->v.data_type),
- bch2_data_type_str(type),
- bch2_data_type_str(type));
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
+ "while marking %s\n",
+ iter.pos.inode, iter.pos.offset, a->v.gen,
+ bch2_data_type_str(a->v.data_type),
+ bch2_data_type_str(type),
+ bch2_data_type_str(type));
+
+ bool print = bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf);
+
+ bch2_run_explicit_recovery_pass(c, &buf,
+ BCH_RECOVERY_PASS_check_allocations, 0);
+
+ if (print)
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
ret = -BCH_ERR_metadata_bucket_inconsistency;
goto err;
}
@@ -985,7 +990,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
}
err:
-fsck_err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -1143,10 +1147,10 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca,
int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c,
enum btree_iter_update_trigger_flags flags)
{
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_trans_mark_dev_sbs) {
int ret = bch2_trans_mark_dev_sb(c, ca, flags);
if (ret) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_trans_mark_dev_sbs);
return ret;
}
}
@@ -1321,6 +1325,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
sizeof(bucket_gens->b[0]) * copy);
}
+ ret = bch2_bucket_bitmap_resize(&ca->bucket_backpointer_mismatch,
+ ca->mi.nbuckets, nbuckets) ?:
+ bch2_bucket_bitmap_resize(&ca->bucket_backpointer_empty,
+ ca->mi.nbuckets, nbuckets);
+
rcu_assign_pointer(ca->bucket_gens, bucket_gens);
bucket_gens = old_bucket_gens;
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 5891b3a1e61c..4066946b26bc 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -613,11 +613,13 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
if (!dev)
return -EINVAL;
- for_each_online_member(c, ca)
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca)
if (ca->dev == dev) {
- percpu_ref_put(&ca->io_ref[READ]);
+ rcu_read_unlock();
return ca->dev_idx;
}
+ rcu_read_unlock();
return -BCH_ERR_ENOENT_dev_idx_not_found;
}
diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
index d0a34a097b80..d3e2e4f776c6 100644
--- a/fs/bcachefs/checksum.c
+++ b/fs/bcachefs/checksum.c
@@ -91,7 +91,7 @@ static void bch2_checksum_update(struct bch2_checksum_state *state, const void *
}
}
-static void bch2_chacha20_init(u32 state[CHACHA_STATE_WORDS],
+static void bch2_chacha20_init(struct chacha_state *state,
const struct bch_key *key, struct nonce nonce)
{
u32 key_words[CHACHA_KEY_SIZE / sizeof(u32)];
@@ -106,14 +106,14 @@ static void bch2_chacha20_init(u32 state[CHACHA_STATE_WORDS],
memzero_explicit(key_words, sizeof(key_words));
}
-static void bch2_chacha20(const struct bch_key *key, struct nonce nonce,
- void *data, size_t len)
+void bch2_chacha20(const struct bch_key *key, struct nonce nonce,
+ void *data, size_t len)
{
- u32 state[CHACHA_STATE_WORDS];
+ struct chacha_state state;
- bch2_chacha20_init(state, key, nonce);
- chacha20_crypt(state, data, data, len);
- memzero_explicit(state, sizeof(state));
+ bch2_chacha20_init(&state, key, nonce);
+ chacha20_crypt(&state, data, data, len);
+ chacha_zeroize_state(&state);
}
static void bch2_poly1305_init(struct poly1305_desc_ctx *desc,
@@ -257,14 +257,14 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type,
{
struct bio_vec bv;
struct bvec_iter iter;
- u32 chacha_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha_state;
int ret = 0;
if (bch2_fs_inconsistent_on(!c->chacha20_key_set,
c, "attempting to encrypt without encryption key"))
return -BCH_ERR_no_encryption_key;
- bch2_chacha20_init(chacha_state, &c->chacha20_key, nonce);
+ bch2_chacha20_init(&chacha_state, &c->chacha20_key, nonce);
bio_for_each_segment(bv, bio, iter) {
void *p;
@@ -280,10 +280,10 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type,
}
p = bvec_kmap_local(&bv);
- chacha20_crypt(chacha_state, p, p, bv.bv_len);
+ chacha20_crypt(&chacha_state, p, p, bv.bv_len);
kunmap_local(p);
}
- memzero_explicit(chacha_state, sizeof(chacha_state));
+ chacha_zeroize_state(&chacha_state);
return ret;
}
diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h
index 1310782d3ae9..7bd9cf6104ca 100644
--- a/fs/bcachefs/checksum.h
+++ b/fs/bcachefs/checksum.h
@@ -69,6 +69,8 @@ static inline void bch2_csum_err_msg(struct printbuf *out,
bch2_csum_to_text(out, type, expected);
}
+void bch2_chacha20(const struct bch_key *, struct nonce, void *, size_t);
+
int bch2_request_key(struct bch_sb *, struct bch_key *);
#ifndef __KERNEL__
int bch2_revoke_key(struct bch_sb *);
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
index d6dd12d74d4f..f57f9f4774e6 100644
--- a/fs/bcachefs/clock.c
+++ b/fs/bcachefs/clock.c
@@ -122,7 +122,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
__set_current_state(TASK_RUNNING);
timer_delete_sync(&wait.cpu_timer);
- destroy_timer_on_stack(&wait.cpu_timer);
+ timer_destroy_on_stack(&wait.cpu_timer);
bch2_io_timer_del(clock, &wait.io_timer);
}
diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
index 28ed32449913..1bca61d17092 100644
--- a/fs/bcachefs/compress.c
+++ b/fs/bcachefs/compress.c
@@ -714,7 +714,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
ret = match_string(bch2_compression_opts, -1, type_str);
if (ret < 0 && err)
- prt_str(err, "invalid compression type");
+ prt_printf(err, "invalid compression type\n");
if (ret < 0)
goto err;
@@ -729,7 +729,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
if (!ret && level > 15)
ret = -EINVAL;
if (ret < 0 && err)
- prt_str(err, "invalid compression level");
+ prt_printf(err, "invalid compression level\n");
if (ret < 0)
goto err;
diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h
index c6151495985f..50ec3decfe8c 100644
--- a/fs/bcachefs/darray.h
+++ b/fs/bcachefs/darray.h
@@ -20,7 +20,18 @@ struct { \
#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
typedef DARRAY(char) darray_char;
-typedef DARRAY(char *) darray_str;
+typedef DARRAY(char *) darray_str;
+typedef DARRAY(const char *) darray_const_str;
+
+typedef DARRAY(u8) darray_u8;
+typedef DARRAY(u16) darray_u16;
+typedef DARRAY(u32) darray_u32;
+typedef DARRAY(u64) darray_u64;
+
+typedef DARRAY(s8) darray_s8;
+typedef DARRAY(s16) darray_s16;
+typedef DARRAY(s32) darray_s32;
+typedef DARRAY(s64) darray_s64;
int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t);
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index b211c97238ab..c34e5b88ba9d 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -100,9 +100,10 @@ static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struc
return true;
}
-static noinline void trace_io_move_finish2(struct data_update *u,
- struct bkey_i *new,
- struct bkey_i *insert)
+noinline_for_stack
+static void trace_io_move_finish2(struct data_update *u,
+ struct bkey_i *new,
+ struct bkey_i *insert)
{
struct bch_fs *c = u->op.c;
struct printbuf buf = PRINTBUF;
@@ -124,6 +125,7 @@ static noinline void trace_io_move_finish2(struct data_update *u,
printbuf_exit(&buf);
}
+noinline_for_stack
static void trace_io_move_fail2(struct data_update *m,
struct bkey_s_c new,
struct bkey_s_c wrote,
@@ -179,24 +181,84 @@ static void trace_io_move_fail2(struct data_update *m,
printbuf_exit(&buf);
}
+noinline_for_stack
+static void trace_data_update2(struct data_update *m,
+ struct bkey_s_c old, struct bkey_s_c k,
+ struct bkey_i *insert)
+{
+ struct bch_fs *c = m->op.c;
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "\nold: ");
+ bch2_bkey_val_to_text(&buf, c, old);
+ prt_str(&buf, "\nk: ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, "\nnew: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+ trace_data_update(c, buf.buf);
+ printbuf_exit(&buf);
+}
+
+noinline_for_stack
+static void trace_io_move_created_rebalance2(struct data_update *m,
+ struct bkey_s_c old, struct bkey_s_c k,
+ struct bkey_i *insert)
+{
+ struct bch_fs *c = m->op.c;
+ struct printbuf buf = PRINTBUF;
+
+ bch2_data_update_opts_to_text(&buf, c, &m->op.opts, &m->data_opts);
+
+ prt_str(&buf, "\nold: ");
+ bch2_bkey_val_to_text(&buf, c, old);
+ prt_str(&buf, "\nk: ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, "\nnew: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+ trace_io_move_created_rebalance(c, buf.buf);
+ printbuf_exit(&buf);
+
+ this_cpu_inc(c->counters[BCH_COUNTER_io_move_created_rebalance]);
+}
+
+noinline_for_stack
+static int data_update_invalid_bkey(struct data_update *m,
+ struct bkey_s_c old, struct bkey_s_c k,
+ struct bkey_i *insert)
+{
+ struct bch_fs *c = m->op.c;
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ prt_str(&buf, "about to insert invalid key in data update path");
+ prt_printf(&buf, "\nop.nonce: %u", m->op.nonce);
+ prt_str(&buf, "\nold: ");
+ bch2_bkey_val_to_text(&buf, c, old);
+ prt_str(&buf, "\nk: ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, "\nnew: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+ bch2_fs_emergency_read_only2(c, &buf);
+
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+
+ return -BCH_ERR_invalid_bkey;
+}
+
static int __bch2_data_update_index_update(struct btree_trans *trans,
struct bch_write_op *op)
{
struct bch_fs *c = op->c;
struct btree_iter iter;
- struct data_update *m =
- container_of(op, struct data_update, op);
- struct keylist *keys = &op->insert_keys;
- struct bkey_buf _new, _insert;
- struct printbuf journal_msg = PRINTBUF;
+ struct data_update *m = container_of(op, struct data_update, op);
int ret = 0;
- bch2_bkey_buf_init(&_new);
- bch2_bkey_buf_init(&_insert);
- bch2_bkey_buf_realloc(&_insert, c, U8_MAX);
-
bch2_trans_iter_init(trans, &iter, m->btree_id,
- bkey_start_pos(&bch2_keylist_front(keys)->k),
+ bkey_start_pos(&bch2_keylist_front(&op->insert_keys)->k),
BTREE_ITER_slots|BTREE_ITER_intent);
while (1) {
@@ -221,19 +283,30 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
if (ret)
goto err;
- new = bkey_i_to_extent(bch2_keylist_front(keys));
+ new = bkey_i_to_extent(bch2_keylist_front(&op->insert_keys));
if (!bch2_extents_match(k, old)) {
trace_io_move_fail2(m, k, bkey_i_to_s_c(&new->k_i),
- NULL, "no match:");
+ NULL, "no match:");
goto nowork;
}
- bkey_reassemble(_insert.k, k);
- insert = _insert.k;
+ insert = bch2_trans_kmalloc(trans,
+ bkey_bytes(k.k) +
+ bkey_val_bytes(&new->k) +
+ sizeof(struct bch_extent_rebalance));
+ ret = PTR_ERR_OR_ZERO(insert);
+ if (ret)
+ goto err;
+
+ bkey_reassemble(insert, k);
- bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys));
- new = bkey_i_to_extent(_new.k);
+ new = bch2_trans_kmalloc(trans, bkey_bytes(&new->k));
+ ret = PTR_ERR_OR_ZERO(new);
+ if (ret)
+ goto err;
+
+ bkey_copy(&new->k_i, bch2_keylist_front(&op->insert_keys));
bch2_cut_front(iter.pos, &new->k_i);
bch2_cut_front(iter.pos, insert);
@@ -346,44 +419,12 @@ restart_drop_extra_replicas:
.btree = m->btree_id,
.flags = BCH_VALIDATE_commit,
});
- if (invalid) {
- struct printbuf buf = PRINTBUF;
-
- prt_str(&buf, "about to insert invalid key in data update path");
- prt_printf(&buf, "\nop.nonce: %u", m->op.nonce);
- prt_str(&buf, "\nold: ");
- bch2_bkey_val_to_text(&buf, c, old);
- prt_str(&buf, "\nk: ");
- bch2_bkey_val_to_text(&buf, c, k);
- prt_str(&buf, "\nnew: ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
-
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
- printbuf_exit(&buf);
-
- bch2_fatal_error(c);
- ret = -BCH_ERR_invalid_bkey;
+ if (unlikely(invalid)) {
+ ret = data_update_invalid_bkey(m, old, k, insert);
goto out;
}
- if (trace_data_update_enabled()) {
- struct printbuf buf = PRINTBUF;
-
- prt_str(&buf, "\nold: ");
- bch2_bkey_val_to_text(&buf, c, old);
- prt_str(&buf, "\nk: ");
- bch2_bkey_val_to_text(&buf, c, k);
- prt_str(&buf, "\nnew: ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
-
- trace_data_update(c, buf.buf);
- printbuf_exit(&buf);
- }
-
- printbuf_reset(&journal_msg);
- prt_str(&journal_msg, bch2_data_update_type_strs[m->type]);
-
- ret = bch2_trans_log_msg(trans, &journal_msg) ?:
+ ret = bch2_trans_log_str(trans, bch2_data_update_type_strs[m->type]) ?:
bch2_trans_log_bkey(trans, m->btree_id, 0, m->k.k) ?:
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, bkey_start_pos(&insert->k)) ?:
@@ -391,28 +432,39 @@ restart_drop_extra_replicas:
k.k->p, insert->k.p) ?:
bch2_bkey_set_needs_rebalance(c, &op->opts, insert) ?:
bch2_trans_update(trans, &iter, insert,
- BTREE_UPDATE_internal_snapshot_node) ?:
- bch2_trans_commit(trans, &op->res,
+ BTREE_UPDATE_internal_snapshot_node);
+ if (ret)
+ goto err;
+
+ if (trace_data_update_enabled())
+ trace_data_update2(m, old, k, insert);
+
+ if (bch2_bkey_sectors_need_rebalance(c, bkey_i_to_s_c(insert)) * k.k->size >
+ bch2_bkey_sectors_need_rebalance(c, k) * insert->k.size)
+ trace_io_move_created_rebalance2(m, old, k, insert);
+
+ ret = bch2_trans_commit(trans, &op->res,
NULL,
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc|
m->data_opts.btree_insert_flags);
- if (!ret) {
- bch2_btree_iter_set_pos(trans, &iter, next_pos);
+ if (ret)
+ goto err;
- this_cpu_add(c->counters[BCH_COUNTER_io_move_finish], new->k.size);
- if (trace_io_move_finish_enabled())
- trace_io_move_finish2(m, &new->k_i, insert);
- }
+ bch2_btree_iter_set_pos(trans, &iter, next_pos);
+
+ this_cpu_add(c->counters[BCH_COUNTER_io_move_finish], new->k.size);
+ if (trace_io_move_finish_enabled())
+ trace_io_move_finish2(m, &new->k_i, insert);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ret = 0;
if (ret)
break;
next:
- while (bkey_ge(iter.pos, bch2_keylist_front(keys)->k.p)) {
- bch2_keylist_pop_front(keys);
- if (bch2_keylist_empty(keys))
+ while (bkey_ge(iter.pos, bch2_keylist_front(&op->insert_keys)->k.p)) {
+ bch2_keylist_pop_front(&op->insert_keys);
+ if (bch2_keylist_empty(&op->insert_keys))
goto out;
}
continue;
@@ -430,10 +482,7 @@ nowork:
goto next;
}
out:
- printbuf_exit(&journal_msg);
bch2_trans_iter_exit(trans, &iter);
- bch2_bkey_buf_exit(&_insert, c);
- bch2_bkey_buf_exit(&_new, c);
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
return ret;
}
@@ -587,6 +636,10 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
prt_str_indented(out, "extra replicas:\t");
prt_u64(out, data_opts->extra_replicas);
+ prt_newline(out);
+
+ prt_str_indented(out, "scrub:\t");
+ prt_u64(out, data_opts->scrub);
}
void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
@@ -607,9 +660,17 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update
prt_newline(out);
printbuf_indent_add(out, 2);
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
- prt_printf(out, "read_done:\t%u\n", m->read_done);
- bch2_write_op_to_text(out, &m->op);
- printbuf_indent_sub(out, 2);
+
+ if (!m->read_done) {
+ prt_printf(out, "read:\n");
+ printbuf_indent_add(out, 2);
+ bch2_read_bio_to_text(out, &m->rbio);
+ } else {
+ prt_printf(out, "write:\n");
+ printbuf_indent_add(out, 2);
+ bch2_write_op_to_text(out, &m->op);
+ }
+ printbuf_indent_sub(out, 4);
}
int bch2_extent_drop_ptrs(struct btree_trans *trans,
@@ -707,7 +768,9 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
rcu_read_lock();
unsigned nr_replicas = 0, i;
for_each_set_bit(i, devs.d, BCH_SB_MEMBERS_MAX) {
- struct bch_dev *ca = bch2_dev_rcu(c, i);
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, i);
+ if (!ca)
+ continue;
struct bch_dev_usage usage;
bch2_dev_usage_read_fast(ca, &usage);
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index ed05125867da..5e14d13568de 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -50,6 +50,21 @@ struct data_update {
struct bio_vec *bvecs;
};
+struct promote_op {
+ struct rcu_head rcu;
+ u64 start_time;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ unsigned list_idx;
+#endif
+
+ struct rhash_head hash;
+ struct bpos pos;
+
+ struct work_struct work;
+ struct data_update write;
+ struct bio_vec bi_inline_vecs[]; /* must be last */
+};
+
void bch2_data_update_to_text(struct printbuf *, struct data_update *);
void bch2_data_update_inflight_to_text(struct printbuf *, struct data_update *);
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 5a8bc7013512..4fa70634c90e 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -8,6 +8,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "async_objs.h"
#include "bkey_methods.h"
#include "btree_cache.h"
#include "btree_io.h"
@@ -16,6 +17,7 @@
#include "btree_update.h"
#include "btree_update_interior.h"
#include "buckets.h"
+#include "data_update.h"
#include "debug.h"
#include "error.h"
#include "extents.h"
@@ -40,9 +42,10 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
struct btree_node *n_sorted = c->verify_data->data;
struct bset *sorted, *inmemory = &b->data->keys;
struct bio *bio;
- bool failed = false, saw_error = false;
+ bool failed = false;
- struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
+ BCH_DEV_READ_REF_btree_verify_replicas);
if (!ca)
return false;
@@ -57,12 +60,13 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
submit_bio_wait(bio);
bio_put(bio);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_btree_verify_replicas);
memcpy(n_ondisk, n_sorted, btree_buf_bytes(b));
v->written = 0;
- if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error)
+ if (bch2_btree_node_read_done(c, ca, v, NULL, NULL))
return false;
n_sorted = c->verify_data->data;
@@ -196,7 +200,8 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
return;
}
- ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
+ BCH_DEV_READ_REF_btree_node_ondisk_to_text);
if (!ca) {
prt_printf(out, "error getting device to read from: not online\n");
return;
@@ -297,28 +302,13 @@ out:
if (bio)
bio_put(bio);
kvfree(n_ondisk);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_btree_node_ondisk_to_text);
}
#ifdef CONFIG_DEBUG_FS
-/* XXX: bch_fs refcounting */
-
-struct dump_iter {
- struct bch_fs *c;
- enum btree_id id;
- struct bpos from;
- struct bpos prev_node;
- u64 iter;
-
- struct printbuf buf;
-
- char __user *ubuf; /* destination user buffer */
- size_t size; /* size of requested read */
- ssize_t ret; /* bytes read so far */
-};
-
-static ssize_t flush_buf(struct dump_iter *i)
+ssize_t bch2_debugfs_flush_buf(struct dump_iter *i)
{
if (i->buf.pos) {
size_t bytes = min_t(size_t, i->buf.pos, i->size);
@@ -330,6 +320,11 @@ static ssize_t flush_buf(struct dump_iter *i)
i->buf.pos -= copied;
memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos);
+ if (i->buf.last_newline >= copied)
+ i->buf.last_newline -= copied;
+ if (i->buf.last_field >= copied)
+ i->buf.last_field -= copied;
+
if (copied != bytes)
return -EFAULT;
}
@@ -356,7 +351,7 @@ static int bch2_dump_open(struct inode *inode, struct file *file)
return 0;
}
-static int bch2_dump_release(struct inode *inode, struct file *file)
+int bch2_dump_release(struct inode *inode, struct file *file)
{
struct dump_iter *i = file->private_data;
@@ -374,7 +369,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
- return flush_buf(i) ?:
+ return bch2_debugfs_flush_buf(i) ?:
bch2_trans_run(i->c,
for_each_btree_key(trans, iter, i->id, i->from,
BTREE_ITER_prefetch|
@@ -383,7 +378,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
prt_newline(&i->buf);
bch2_trans_unlock(trans);
i->from = bpos_successor(iter.pos);
- flush_buf(i);
+ bch2_debugfs_flush_buf(i);
}))) ?:
i->ret;
}
@@ -404,7 +399,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
- ssize_t ret = flush_buf(i);
+ ssize_t ret = bch2_debugfs_flush_buf(i);
if (ret)
return ret;
@@ -418,7 +413,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
? bpos_successor(b->key.k.p)
: b->key.k.p;
- drop_locks_do(trans, flush_buf(i));
+ drop_locks_do(trans, bch2_debugfs_flush_buf(i));
}))) ?: i->ret;
}
@@ -438,7 +433,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
- return flush_buf(i) ?:
+ return bch2_debugfs_flush_buf(i) ?:
bch2_trans_run(i->c,
for_each_btree_key(trans, iter, i->id, i->from,
BTREE_ITER_prefetch|
@@ -456,7 +451,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
bch2_bfloat_to_text(&i->buf, l->b, _k);
bch2_trans_unlock(trans);
i->from = bpos_successor(iter.pos);
- flush_buf(i);
+ bch2_debugfs_flush_buf(i);
}))) ?:
i->ret;
}
@@ -517,7 +512,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
struct rhash_head *pos;
struct btree *b;
- ret = flush_buf(i);
+ ret = bch2_debugfs_flush_buf(i);
if (ret)
return ret;
@@ -540,7 +535,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
ret = -ENOMEM;
if (!ret)
- ret = flush_buf(i);
+ ret = bch2_debugfs_flush_buf(i);
return ret ?: i->ret;
}
@@ -614,7 +609,7 @@ restart:
closure_put(&trans->ref);
- ret = flush_buf(i);
+ ret = bch2_debugfs_flush_buf(i);
if (ret)
goto unlocked;
@@ -627,7 +622,7 @@ unlocked:
ret = -ENOMEM;
if (!ret)
- ret = flush_buf(i);
+ ret = bch2_debugfs_flush_buf(i);
return ret ?: i->ret;
}
@@ -652,7 +647,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
i->ret = 0;
while (1) {
- err = flush_buf(i);
+ err = bch2_debugfs_flush_buf(i);
if (err)
return err;
@@ -695,7 +690,7 @@ static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf,
i->iter++;
}
- err = flush_buf(i);
+ err = bch2_debugfs_flush_buf(i);
if (err)
return err;
@@ -753,7 +748,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
while (1) {
struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
- err = flush_buf(i);
+ err = bch2_debugfs_flush_buf(i);
if (err)
return err;
@@ -770,6 +765,12 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
mutex_lock(&s->lock);
prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem);
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+ printbuf_indent_add(&i->buf, 2);
+ bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace);
+ printbuf_indent_sub(&i->buf, 2);
+#endif
+
prt_printf(&i->buf, "Transaction duration:\n");
printbuf_indent_add(&i->buf, 2);
@@ -868,7 +869,7 @@ static ssize_t bch2_simple_print(struct file *file, char __user *buf,
ret = -ENOMEM;
if (!ret)
- ret = flush_buf(i);
+ ret = bch2_debugfs_flush_buf(i);
return ret ?: i->ret;
}
@@ -927,7 +928,11 @@ void bch2_fs_debug_init(struct bch_fs *c)
if (IS_ERR_OR_NULL(bch_debug))
return;
- snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
+ if (c->sb.multi_device)
+ snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
+ else
+ strscpy(name, c->name, sizeof(name));
+
c->fs_debug_dir = debugfs_create_dir(name, bch_debug);
if (IS_ERR_OR_NULL(c->fs_debug_dir))
return;
@@ -953,6 +958,8 @@ void bch2_fs_debug_init(struct bch_fs *c)
debugfs_create_file("write_points", 0400, c->fs_debug_dir,
c->btree_debug, &write_points_ops);
+ bch2_fs_async_obj_debugfs_init(c);
+
c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
if (IS_ERR_OR_NULL(c->btree_debug_dir))
return;
diff --git a/fs/bcachefs/debug.h b/fs/bcachefs/debug.h
index 2c37143b5fd1..d88b1194b8ac 100644
--- a/fs/bcachefs/debug.h
+++ b/fs/bcachefs/debug.h
@@ -14,11 +14,29 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *, struct bch_fs *,
static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
{
- if (bch2_verify_btree_ondisk)
+ if (static_branch_unlikely(&bch2_verify_btree_ondisk))
__bch2_btree_verify(c, b);
}
#ifdef CONFIG_DEBUG_FS
+struct dump_iter {
+ struct bch_fs *c;
+ struct async_obj_list *list;
+ enum btree_id id;
+ struct bpos from;
+ struct bpos prev_node;
+ u64 iter;
+
+ struct printbuf buf;
+
+ char __user *ubuf; /* destination user buffer */
+ size_t size; /* size of requested read */
+ ssize_t ret; /* bytes read so far */
+};
+
+ssize_t bch2_debugfs_flush_buf(struct dump_iter *);
+int bch2_dump_release(struct inode *, struct file *);
+
void bch2_fs_debug_exit(struct bch_fs *);
void bch2_fs_debug_init(struct bch_fs *);
#else
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index 8a680e52c1ed..d198001838f3 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -212,12 +212,19 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
struct qstr d_name = bch2_dirent_get_name(d);
- prt_printf(out, "%.*s -> ", d_name.len, d_name.name);
+ prt_printf(out, "%.*s", d_name.len, d_name.name);
+
+ if (d.v->d_casefold) {
+ struct qstr d_name = bch2_dirent_get_lookup_name(d);
+ prt_printf(out, " (casefold %.*s)", d_name.len, d_name.name);
+ }
+
+ prt_str(out, " ->");
if (d.v->d_type != DT_SUBVOL)
- prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum));
+ prt_printf(out, " %llu", le64_to_cpu(d.v->d_inum));
else
- prt_printf(out, "%u -> %u",
+ prt_printf(out, " %u -> %u",
le32_to_cpu(d.v->d_parent_subvol),
le32_to_cpu(d.v->d_child_subvol));
@@ -288,6 +295,7 @@ static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent,
}
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
+ const struct bch_hash_info *hash_info,
subvol_inum dir,
u8 type,
const struct qstr *name,
@@ -295,10 +303,19 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
u64 dst)
{
struct bkey_i_dirent *dirent;
+ struct qstr _cf_name;
if (name->len > BCH_NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
+ if (hash_info->cf_encoding && !cf_name) {
+ int ret = bch2_casefold(trans, hash_info, name, &_cf_name);
+ if (ret)
+ return ERR_PTR(ret);
+
+ cf_name = &_cf_name;
+ }
+
dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst);
if (IS_ERR(dirent))
return dirent;
@@ -324,7 +341,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
struct bkey_i_dirent *dirent;
int ret;
- dirent = dirent_create_key(trans, dir_inum, type, name, NULL, dst_inum);
+ dirent = dirent_create_key(trans, hash_info, dir_inum, type, name, NULL, dst_inum);
ret = PTR_ERR_OR_ZERO(dirent);
if (ret)
return ret;
@@ -333,8 +350,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
dirent->k.p.snapshot = snapshot;
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
- dir_inum, snapshot, &dirent->k_i,
- flags|BTREE_UPDATE_internal_snapshot_node);
+ dir_inum, snapshot, &dirent->k_i, flags);
*dir_offset = dirent->k.p.offset;
return ret;
@@ -344,28 +360,16 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
const struct bch_hash_info *hash_info,
u8 type, const struct qstr *name, u64 dst_inum,
u64 *dir_offset,
- u64 *i_size,
enum btree_iter_update_trigger_flags flags)
{
struct bkey_i_dirent *dirent;
int ret;
- if (hash_info->cf_encoding) {
- struct qstr cf_name;
- ret = bch2_casefold(trans, hash_info, name, &cf_name);
- if (ret)
- return ret;
- dirent = dirent_create_key(trans, dir, type, name, &cf_name, dst_inum);
- } else {
- dirent = dirent_create_key(trans, dir, type, name, NULL, dst_inum);
- }
-
+ dirent = dirent_create_key(trans, hash_info, dir, type, name, NULL, dst_inum);
ret = PTR_ERR_OR_ZERO(dirent);
if (ret)
return ret;
- *i_size += bkey_bytes(&dirent->k);
-
ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
dir, &dirent->k_i, flags);
*dir_offset = dirent->k.p.offset;
@@ -466,7 +470,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
*src_offset = dst_iter.pos.offset;
/* Create new dst key: */
- new_dst = dirent_create_key(trans, dst_dir, 0, dst_name,
+ new_dst = dirent_create_key(trans, dst_hash, dst_dir, 0, dst_name,
dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0);
ret = PTR_ERR_OR_ZERO(new_dst);
if (ret)
@@ -477,7 +481,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
/* Create new src key: */
if (mode == BCH_RENAME_EXCHANGE) {
- new_src = dirent_create_key(trans, src_dir, 0, src_name,
+ new_src = dirent_create_key(trans, src_hash, src_dir, 0, src_name,
src_hash->cf_encoding ? &src_name_lookup : NULL, 0);
ret = PTR_ERR_OR_ZERO(new_src);
if (ret)
diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
index 9838a7ba7ed1..d3e7ae669575 100644
--- a/fs/bcachefs/dirent.h
+++ b/fs/bcachefs/dirent.h
@@ -65,7 +65,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
enum btree_iter_update_trigger_flags);
int bch2_dirent_create(struct btree_trans *, subvol_inum,
const struct bch_hash_info *, u8,
- const struct qstr *, u64, u64 *, u64 *,
+ const struct qstr *, u64, u64 *,
enum btree_iter_update_trigger_flags);
static inline unsigned vfs_d_type(unsigned type)
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index b007319b72e9..b3840ff7c407 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -68,23 +68,31 @@ static const char * const disk_accounting_type_strs[] = {
NULL
};
-static inline void accounting_key_init(struct bkey_i *k, struct disk_accounting_pos *pos,
- s64 *d, unsigned nr)
+static inline void __accounting_key_init(struct bkey_i *k, struct bpos pos,
+ s64 *d, unsigned nr)
{
struct bkey_i_accounting *acc = bkey_accounting_init(k);
- acc->k.p = disk_accounting_pos_to_bpos(pos);
+ acc->k.p = pos;
set_bkey_val_u64s(&acc->k, sizeof(struct bch_accounting) / sizeof(u64) + nr);
memcpy_u64s_small(acc->v.d, d, nr);
}
+static inline void accounting_key_init(struct bkey_i *k, struct disk_accounting_pos *pos,
+ s64 *d, unsigned nr)
+{
+ return __accounting_key_init(k, disk_accounting_pos_to_bpos(pos), d, nr);
+}
+
static int bch2_accounting_update_sb_one(struct bch_fs *, struct bpos);
int bch2_disk_accounting_mod(struct btree_trans *trans,
struct disk_accounting_pos *k,
s64 *d, unsigned nr, bool gc)
{
+ BUG_ON(nr > BCH_ACCOUNTING_MAX_COUNTERS);
+
/* Normalize: */
switch (k->type) {
case BCH_DISK_ACCOUNTING_replicas:
@@ -92,21 +100,49 @@ int bch2_disk_accounting_mod(struct btree_trans *trans,
break;
}
- BUG_ON(nr > BCH_ACCOUNTING_MAX_COUNTERS);
+ struct bpos pos = disk_accounting_pos_to_bpos(k);
+
+ if (likely(!gc)) {
+ struct bkey_i_accounting *a;
+#if 0
+ for (a = btree_trans_subbuf_base(trans, &trans->accounting);
+ a != btree_trans_subbuf_top(trans, &trans->accounting);
+ a = (void *) bkey_next(&a->k_i))
+ if (bpos_eq(a->k.p, pos)) {
+ BUG_ON(nr != bch2_accounting_counters(&a->k));
+ acc_u64s(a->v.d, d, nr);
+
+ if (bch2_accounting_key_is_zero(accounting_i_to_s_c(a))) {
+ unsigned offset = (u64 *) a -
+ (u64 *) btree_trans_subbuf_base(trans, &trans->accounting);
+
+ trans->accounting.u64s -= a->k.u64s;
+ memmove_u64s_down(a,
+ bkey_next(&a->k_i),
+ trans->accounting.u64s - offset);
+ }
+ return 0;
+ }
+#endif
+ unsigned u64s = sizeof(*a) / sizeof(u64) + nr;
+ a = bch2_trans_subbuf_alloc(trans, &trans->accounting, u64s);
+ int ret = PTR_ERR_OR_ZERO(a);
+ if (ret)
+ return ret;
- struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i;
+ __accounting_key_init(&a->k_i, pos, d, nr);
+ return 0;
+ } else {
+ struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i;
- accounting_key_init(&k_i.k, k, d, nr);
+ __accounting_key_init(&k_i.k, pos, d, nr);
- if (unlikely(gc)) {
int ret = bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
if (ret == -BCH_ERR_btree_insert_need_mark_replicas)
ret = drop_locks_do(trans,
bch2_accounting_update_sb_one(trans->c, disk_accounting_pos_to_bpos(k))) ?:
bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
return ret;
- } else {
- return bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k);
}
}
@@ -287,7 +323,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc
static int bch2_accounting_update_sb_one(struct bch_fs *c, struct bpos p)
{
- struct bch_replicas_padded r;
+ union bch_replicas_padded r;
return accounting_to_replicas(&r.e, p)
? bch2_mark_replicas(c, &r.e)
: 0;
@@ -299,14 +335,13 @@ static int bch2_accounting_update_sb_one(struct bch_fs *c, struct bpos p)
*/
int bch2_accounting_update_sb(struct btree_trans *trans)
{
- for (struct jset_entry *i = trans->journal_entries;
- i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
- i = vstruct_next(i))
- if (jset_entry_is_key(i) && i->start->k.type == KEY_TYPE_accounting) {
- int ret = bch2_accounting_update_sb_one(trans->c, i->start->k.p);
- if (ret)
- return ret;
- }
+ for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
+ i != btree_trans_subbuf_top(trans, &trans->accounting);
+ i = bkey_next(i)) {
+ int ret = bch2_accounting_update_sb_one(trans->c, i->k.p);
+ if (ret)
+ return ret;
+ }
return 0;
}
@@ -361,7 +396,7 @@ err:
int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
enum bch_accounting_mode mode)
{
- struct bch_replicas_padded r;
+ union bch_replicas_padded r;
if (mode != BCH_ACCOUNTING_read &&
accounting_to_replicas(&r.e, a.k->p) &&
@@ -376,6 +411,19 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
return ret;
}
+int bch2_accounting_mem_insert_locked(struct bch_fs *c, struct bkey_s_c_accounting a,
+ enum bch_accounting_mode mode)
+{
+ union bch_replicas_padded r;
+
+ if (mode != BCH_ACCOUNTING_read &&
+ accounting_to_replicas(&r.e, a.k->p) &&
+ !bch2_replicas_marked_locked(c, &r.e))
+ return -BCH_ERR_btree_insert_need_mark_replicas;
+
+ return __bch2_accounting_mem_insert(c, a);
+}
+
static bool accounting_mem_entry_is_zero(struct accounting_mem_entry *e)
{
for (unsigned i = 0; i < e->nr_counters; i++)
@@ -425,10 +473,12 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
percpu_down_read(&c->mark_lock);
darray_for_each(acc->k, i) {
- struct {
+ union {
+ u8 bytes[struct_size_t(struct bch_replicas_usage, r.devs,
+ BCH_BKEY_PTRS_MAX)];
struct bch_replicas_usage r;
- u8 pad[BCH_BKEY_PTRS_MAX];
} u;
+ u.r.r.nr_devs = BCH_BKEY_PTRS_MAX;
if (!accounting_to_replicas(&u.r.r, i->pos))
continue;
@@ -557,11 +607,11 @@ int bch2_gc_accounting_done(struct bch_fs *c)
prt_str(&buf, "accounting mismatch for ");
bch2_accounting_key_to_text(&buf, &acc_k);
- prt_str(&buf, ": got");
+ prt_str(&buf, ":\n got");
for (unsigned j = 0; j < nr; j++)
prt_printf(&buf, " %llu", dst_v[j]);
- prt_str(&buf, " should be");
+ prt_str(&buf, "\nshould be");
for (unsigned j = 0; j < nr; j++)
prt_printf(&buf, " %llu", src_v[j]);
@@ -583,7 +633,7 @@ int bch2_gc_accounting_done(struct bch_fs *c)
accounting_key_init(&k_i.k, &acc_k, src_v, nr);
bch2_accounting_mem_mod_locked(trans,
bkey_i_to_s_c_accounting(&k_i.k),
- BCH_ACCOUNTING_normal);
+ BCH_ACCOUNTING_normal, true);
preempt_disable();
struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
@@ -612,23 +662,23 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
percpu_down_read(&c->mark_lock);
int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k),
- BCH_ACCOUNTING_read);
+ BCH_ACCOUNTING_read, false);
percpu_up_read(&c->mark_lock);
return ret;
}
static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
- struct disk_accounting_pos acc,
+ struct disk_accounting_pos *acc,
u64 *v, unsigned nr)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
int ret = 0, invalid_dev = -1;
- switch (acc.type) {
+ switch (acc->type) {
case BCH_DISK_ACCOUNTING_replicas: {
- struct bch_replicas_padded r;
- __accounting_to_replicas(&r.e, &acc);
+ union bch_replicas_padded r;
+ __accounting_to_replicas(&r.e, acc);
for (unsigned i = 0; i < r.e.nr_devs; i++)
if (r.e.devs[i] != BCH_SB_MEMBER_INVALID &&
@@ -647,7 +697,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
trans, accounting_replicas_not_marked,
"accounting not marked in superblock replicas\n%s",
(printbuf_reset(&buf),
- bch2_accounting_key_to_text(&buf, &acc),
+ bch2_accounting_key_to_text(&buf, acc),
buf.buf))) {
/*
* We're not RW yet and still single threaded, dropping
@@ -663,8 +713,8 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
}
case BCH_DISK_ACCOUNTING_dev_data_type:
- if (!bch2_dev_exists(c, acc.dev_data_type.dev)) {
- invalid_dev = acc.dev_data_type.dev;
+ if (!bch2_dev_exists(c, acc->dev_data_type.dev)) {
+ invalid_dev = acc->dev_data_type.dev;
goto invalid_device;
}
break;
@@ -678,13 +728,13 @@ invalid_device:
"accounting entry points to invalid device %i\n%s",
invalid_dev,
(printbuf_reset(&buf),
- bch2_accounting_key_to_text(&buf, &acc),
+ bch2_accounting_key_to_text(&buf, acc),
buf.buf))) {
for (unsigned i = 0; i < nr; i++)
v[i] = -v[i];
ret = commit_do(trans, NULL, NULL, 0,
- bch2_disk_accounting_mod(trans, &acc, v, nr, false)) ?:
+ bch2_disk_accounting_mod(trans, acc, v, nr, false)) ?:
-BCH_ERR_remove_disk_accounting_entry;
} else {
ret = -BCH_ERR_remove_disk_accounting_entry;
@@ -735,7 +785,7 @@ int bch2_accounting_read(struct bch_fs *c)
if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
break;
- if (!bch2_accounting_is_mem(acc_k)) {
+ if (!bch2_accounting_is_mem(&acc_k)) {
struct disk_accounting_pos next;
memset(&next, 0, sizeof(next));
next.type = acc_k.type + 1;
@@ -757,7 +807,7 @@ int bch2_accounting_read(struct bch_fs *c)
struct disk_accounting_pos acc_k;
bpos_to_disk_accounting_pos(&acc_k, i->k->k.p);
- if (!bch2_accounting_is_mem(acc_k))
+ if (!bch2_accounting_is_mem(&acc_k))
continue;
struct bkey_s_c k = bkey_i_to_s_c(i->k);
@@ -813,7 +863,7 @@ int bch2_accounting_read(struct bch_fs *c)
*/
ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
? -BCH_ERR_remove_disk_accounting_entry
- : bch2_disk_accounting_validate_late(trans, acc_k, v, i->nr_counters);
+ : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters);
if (ret == -BCH_ERR_remove_disk_accounting_entry) {
free_percpu(i->v[0]);
@@ -926,7 +976,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
break;
- if (!bch2_accounting_is_mem(acc_k)) {
+ if (!bch2_accounting_is_mem(&acc_k)) {
struct disk_accounting_pos next;
memset(&next, 0, sizeof(next));
next.type = acc_k.type + 1;
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index abb1f6206fe9..f6098e33ab30 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -136,12 +136,13 @@ enum bch_accounting_mode {
};
int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode);
+int bch2_accounting_mem_insert_locked(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode);
void bch2_accounting_mem_gc(struct bch_fs *);
-static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc)
+static inline bool bch2_accounting_is_mem(struct disk_accounting_pos *acc)
{
- return acc.type < BCH_DISK_ACCOUNTING_TYPE_NR &&
- acc.type != BCH_DISK_ACCOUNTING_inum;
+ return acc->type < BCH_DISK_ACCOUNTING_TYPE_NR &&
+ acc->type != BCH_DISK_ACCOUNTING_inum;
}
/*
@@ -150,7 +151,8 @@ static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc)
*/
static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
struct bkey_s_c_accounting a,
- enum bch_accounting_mode mode)
+ enum bch_accounting_mode mode,
+ bool write_locked)
{
struct bch_fs *c = trans->c;
struct bch_accounting_mem *acc = &c->accounting;
@@ -161,7 +163,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
if (gc && !acc->gc_running)
return 0;
- if (!bch2_accounting_is_mem(acc_k))
+ if (!bch2_accounting_is_mem(&acc_k))
return 0;
if (mode == BCH_ACCOUNTING_normal) {
@@ -189,7 +191,11 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &a.k->p)) >= acc->k.nr) {
- int ret = bch2_accounting_mem_insert(c, a, mode);
+ int ret = 0;
+ if (unlikely(write_locked))
+ ret = bch2_accounting_mem_insert_locked(c, a, mode);
+ else
+ ret = bch2_accounting_mem_insert(c, a, mode);
if (ret)
return ret;
}
@@ -206,7 +212,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc)
{
percpu_down_read(&trans->c->mark_lock);
- int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal);
+ int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false);
percpu_up_read(&trans->c->mark_lock);
return ret;
}
@@ -253,13 +259,13 @@ static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans,
struct bkey_i_accounting *a,
unsigned commit_flags)
{
- a->k.bversion = journal_pos_to_bversion(&trans->journal_res,
- (u64 *) a - (u64 *) trans->journal_entries);
+ u64 *base = (u64 *) btree_trans_subbuf_base(trans, &trans->accounting);
+ a->k.bversion = journal_pos_to_bversion(&trans->journal_res, (u64 *) a - base);
EBUG_ON(bversion_zero(a->k.bversion));
return likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply))
- ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal)
+ ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal, false)
: 0;
}
@@ -271,7 +277,7 @@ static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans
struct bkey_s_accounting a = accounting_i_to_s(a_i);
bch2_accounting_neg(a);
- bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal);
+ bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal, false);
bch2_accounting_neg(a);
}
}
diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c
index 2ca3cbf12b71..c20ecf5e5381 100644
--- a/fs/bcachefs/disk_groups.c
+++ b/fs/bcachefs/disk_groups.c
@@ -86,35 +86,6 @@ err:
return ret;
}
-void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c)
-{
- out->atomic++;
- rcu_read_lock();
-
- struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
- if (!g)
- goto out;
-
- for (unsigned i = 0; i < g->nr; i++) {
- if (i)
- prt_printf(out, " ");
-
- if (g->entries[i].deleted) {
- prt_printf(out, "[deleted]");
- continue;
- }
-
- prt_printf(out, "[parent %d devs", g->entries[i].parent);
- for_each_member_device_rcu(c, ca, &g->entries[i].devs)
- prt_printf(out, " %s", ca->name);
- prt_printf(out, "]");
- }
-
-out:
- rcu_read_unlock();
- out->atomic--;
-}
-
static void bch2_sb_disk_groups_to_text(struct printbuf *out,
struct bch_sb *sb,
struct bch_sb_field *f)
@@ -241,20 +212,13 @@ bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
case TARGET_DEV:
return dev == t.dev;
case TARGET_GROUP: {
- struct bch_disk_groups_cpu *g;
- const struct bch_devs_mask *m;
- bool ret;
-
- rcu_read_lock();
- g = rcu_dereference(c->disk_groups);
- m = g && t.group < g->nr && !g->entries[t.group].deleted
+ struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
+ const struct bch_devs_mask *m =
+ g && t.group < g->nr && !g->entries[t.group].deleted
? &g->entries[t.group].devs
: NULL;
- ret = m ? test_bit(dev, m->d) : false;
- rcu_read_unlock();
-
- return ret;
+ return m ? test_bit(dev, m->d) : false;
}
default:
BUG();
@@ -377,54 +341,81 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
return v;
}
-void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
+static void __bch2_disk_path_to_text(struct printbuf *out, struct bch_disk_groups_cpu *g,
+ unsigned v)
{
- struct bch_disk_groups_cpu *groups;
- struct bch_disk_group_cpu *g;
- unsigned nr = 0;
u16 path[32];
-
- out->atomic++;
- rcu_read_lock();
- groups = rcu_dereference(c->disk_groups);
- if (!groups)
- goto invalid;
+ unsigned nr = 0;
while (1) {
if (nr == ARRAY_SIZE(path))
goto invalid;
- if (v >= groups->nr)
+ if (v >= (g ? g->nr : 0))
goto invalid;
- g = groups->entries + v;
+ struct bch_disk_group_cpu *e = g->entries + v;
- if (g->deleted)
+ if (e->deleted)
goto invalid;
path[nr++] = v;
- if (!g->parent)
+ if (!e->parent)
break;
- v = g->parent - 1;
+ v = e->parent - 1;
}
while (nr) {
- v = path[--nr];
- g = groups->entries + v;
+ struct bch_disk_group_cpu *e = g->entries + path[--nr];
- prt_printf(out, "%.*s", (int) sizeof(g->label), g->label);
+ prt_printf(out, "%.*s", (int) sizeof(e->label), e->label);
if (nr)
prt_printf(out, ".");
}
-out:
- rcu_read_unlock();
- out->atomic--;
return;
invalid:
prt_printf(out, "invalid label %u", v);
- goto out;
+}
+
+void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ bch2_printbuf_make_room(out, 4096);
+
+ out->atomic++;
+ rcu_read_lock();
+ struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
+
+ for (unsigned i = 0; i < (g ? g->nr : 0); i++) {
+ prt_printf(out, "%2u: ", i);
+
+ if (g->entries[i].deleted) {
+ prt_printf(out, "[deleted]");
+ goto next;
+ }
+
+ __bch2_disk_path_to_text(out, g, i);
+
+ prt_printf(out, " devs");
+
+ for_each_member_device_rcu(c, ca, &g->entries[i].devs)
+ prt_printf(out, " %s", ca->name);
+next:
+ prt_newline(out);
+ }
+
+ rcu_read_unlock();
+ out->atomic--;
+}
+
+void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
+{
+ out->atomic++;
+ rcu_read_lock();
+ __bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v),
+ rcu_read_unlock();
+ --out->atomic;
}
void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
@@ -554,14 +545,12 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
? rcu_dereference(c->devs[t.dev])
: NULL;
- if (ca && percpu_ref_tryget(&ca->io_ref[READ])) {
+ if (ca && ca->disk_sb.bdev)
prt_printf(out, "/dev/%s", ca->name);
- percpu_ref_put(&ca->io_ref[READ]);
- } else if (ca) {
+ else if (ca)
prt_printf(out, "offline device %u", t.dev);
- } else {
+ else
prt_printf(out, "invalid device %u", t.dev);
- }
rcu_read_unlock();
out->atomic--;
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index fff58b78327c..c581426e3894 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -16,6 +16,7 @@
#include "disk_accounting.h"
#include "disk_groups.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "io_read.h"
#include "io_write.h"
@@ -507,20 +508,14 @@ static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s,
static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
{
- switch (k.k->type) {
- case KEY_TYPE_extent: {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- const union bch_extent_entry *entry;
-
- extent_for_each_entry(e, entry)
- if (extent_entry_type(entry) ==
- BCH_EXTENT_ENTRY_stripe_ptr &&
- entry->stripe_ptr.idx == idx)
- return true;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
- break;
- }
- }
+ bkey_extent_entry_for_each(ptrs, entry)
+ if (extent_entry_type(entry) ==
+ BCH_EXTENT_ENTRY_stripe_ptr &&
+ entry->stripe_ptr.idx == idx)
+ return true;
return false;
}
@@ -706,6 +701,9 @@ static void ec_block_endio(struct bio *bio)
struct bch_dev *ca = ec_bio->ca;
struct closure *cl = bio->bi_private;
int rw = ec_bio->rw;
+ unsigned ref = rw == READ
+ ? BCH_DEV_READ_REF_ec_block
+ : BCH_DEV_WRITE_REF_ec_block;
bch2_account_io_completion(ca, bio_data_dir(bio),
ec_bio->submit_time, !bio->bi_status);
@@ -727,7 +725,7 @@ static void ec_block_endio(struct bio *bio)
}
bio_put(&ec_bio->bio);
- percpu_ref_put(&ca->io_ref[rw]);
+ enumerated_ref_put(&ca->io_ref[rw], ref);
closure_put(cl);
}
@@ -741,8 +739,11 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
? BCH_DATA_user
: BCH_DATA_parity;
int rw = op_is_write(opf);
+ unsigned ref = rw == READ
+ ? BCH_DEV_READ_REF_ec_block
+ : BCH_DEV_WRITE_REF_ec_block;
- struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw, ref);
if (!ca) {
clear_bit(idx, buf->valid);
return;
@@ -788,14 +789,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
closure_get(cl);
- percpu_ref_get(&ca->io_ref[rw]);
+ enumerated_ref_get(&ca->io_ref[rw], ref);
submit_bio(&ec_bio->bio);
offset += b;
}
- percpu_ref_put(&ca->io_ref[rw]);
+ enumerated_ref_put(&ca->io_ref[rw], ref);
}
static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
@@ -1017,14 +1018,14 @@ static void ec_stripe_delete_work(struct work_struct *work)
BCH_TRANS_COMMIT_no_enospc, ({
ec_stripe_delete(trans, lru_k.k->p.offset);
})));
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete);
}
void bch2_do_stripe_deletes(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) &&
+ if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_stripe_delete) &&
!queue_work(c->write_ref_wq, &c->ec_stripe_delete_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete);
}
/* stripe creation: */
@@ -1252,7 +1253,8 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
unsigned block,
struct open_bucket *ob)
{
- struct bch_dev *ca = bch2_dev_get_ioref(c, ob->dev, WRITE);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, ob->dev, WRITE,
+ BCH_DEV_WRITE_REF_ec_bucket_zero);
if (!ca) {
s->err = -BCH_ERR_erofs_no_writes;
return;
@@ -1268,7 +1270,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
ob->sectors_free,
GFP_KERNEL, 0);
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_ec_bucket_zero);
if (ret)
s->err = ret;
@@ -1418,15 +1420,15 @@ static void ec_stripe_create_work(struct work_struct *work)
while ((s = get_pending_stripe(c)))
ec_stripe_create(s);
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create);
}
void bch2_ec_do_stripe_creates(struct bch_fs *c)
{
- bch2_write_ref_get(c, BCH_WRITE_REF_stripe_create);
+ enumerated_ref_get(&c->writes, BCH_WRITE_REF_stripe_create);
if (!queue_work(system_long_wq, &c->ec_stripe_create_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create);
}
static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
@@ -1716,23 +1718,32 @@ err:
}
static int new_stripe_alloc_buckets(struct btree_trans *trans,
+ struct alloc_request *req,
struct ec_stripe_head *h, struct ec_stripe_new *s,
- enum bch_watermark watermark, struct closure *cl)
+ struct closure *cl)
{
struct bch_fs *c = trans->c;
- struct bch_devs_mask devs = h->devs;
struct open_bucket *ob;
- struct open_buckets buckets;
struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
- bool have_cache = true;
int ret = 0;
+ req->scratch_data_type = req->data_type;
+ req->scratch_ptrs = req->ptrs;
+ req->scratch_nr_replicas = req->nr_replicas;
+ req->scratch_nr_effective = req->nr_effective;
+ req->scratch_have_cache = req->have_cache;
+ req->scratch_devs_may_alloc = req->devs_may_alloc;
+
+ req->devs_may_alloc = h->devs;
+ req->have_cache = true;
+
BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity);
BUG_ON(v->nr_redundant != s->nr_parity);
/* * We bypass the sector allocator which normally does this: */
- bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
+ bitmap_and(req->devs_may_alloc.d, req->devs_may_alloc.d,
+ c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) {
/*
@@ -1742,7 +1753,7 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans,
* block when updating the stripe
*/
if (v->ptrs[i].dev != BCH_SB_MEMBER_INVALID)
- __clear_bit(v->ptrs[i].dev, devs.d);
+ __clear_bit(v->ptrs[i].dev, req->devs_may_alloc.d);
if (i < s->nr_data)
nr_have_data++;
@@ -1753,60 +1764,58 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans,
BUG_ON(nr_have_data > s->nr_data);
BUG_ON(nr_have_parity > s->nr_parity);
- buckets.nr = 0;
+ req->ptrs.nr = 0;
if (nr_have_parity < s->nr_parity) {
- ret = bch2_bucket_alloc_set_trans(trans, &buckets,
- &h->parity_stripe,
- &devs,
- s->nr_parity,
- &nr_have_parity,
- &have_cache, 0,
- BCH_DATA_parity,
- watermark,
- cl);
-
- open_bucket_for_each(c, &buckets, ob, i) {
+ req->nr_replicas = s->nr_parity;
+ req->nr_effective = nr_have_parity;
+ req->data_type = BCH_DATA_parity;
+
+ ret = bch2_bucket_alloc_set_trans(trans, req, &h->parity_stripe, cl);
+
+ open_bucket_for_each(c, &req->ptrs, ob, i) {
j = find_next_zero_bit(s->blocks_gotten,
s->nr_data + s->nr_parity,
s->nr_data);
BUG_ON(j >= s->nr_data + s->nr_parity);
- s->blocks[j] = buckets.v[i];
+ s->blocks[j] = req->ptrs.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob);
__set_bit(j, s->blocks_gotten);
}
if (ret)
- return ret;
+ goto err;
}
- buckets.nr = 0;
+ req->ptrs.nr = 0;
if (nr_have_data < s->nr_data) {
- ret = bch2_bucket_alloc_set_trans(trans, &buckets,
- &h->block_stripe,
- &devs,
- s->nr_data,
- &nr_have_data,
- &have_cache, 0,
- BCH_DATA_user,
- watermark,
- cl);
-
- open_bucket_for_each(c, &buckets, ob, i) {
+ req->nr_replicas = s->nr_data;
+ req->nr_effective = nr_have_data;
+ req->data_type = BCH_DATA_user;
+
+ ret = bch2_bucket_alloc_set_trans(trans, req, &h->block_stripe, cl);
+
+ open_bucket_for_each(c, &req->ptrs, ob, i) {
j = find_next_zero_bit(s->blocks_gotten,
s->nr_data, 0);
BUG_ON(j >= s->nr_data);
- s->blocks[j] = buckets.v[i];
+ s->blocks[j] = req->ptrs.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob);
__set_bit(j, s->blocks_gotten);
}
if (ret)
- return ret;
+ goto err;
}
-
- return 0;
+err:
+ req->data_type = req->scratch_data_type;
+ req->ptrs = req->scratch_ptrs;
+ req->nr_replicas = req->scratch_nr_replicas;
+ req->nr_effective = req->scratch_nr_effective;
+ req->have_cache = req->scratch_have_cache;
+ req->devs_may_alloc = req->scratch_devs_may_alloc;
+ return ret;
}
static int __get_existing_stripe(struct btree_trans *trans,
@@ -1987,17 +1996,15 @@ err:
}
struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
- unsigned target,
+ struct alloc_request *req,
unsigned algo,
- unsigned redundancy,
- enum bch_watermark watermark,
struct closure *cl)
{
struct bch_fs *c = trans->c;
- struct ec_stripe_head *h;
- bool waiting = false;
+ unsigned redundancy = req->nr_replicas - 1;
unsigned disk_label = 0;
- struct target t = target_decode(target);
+ struct target t = target_decode(req->target);
+ bool waiting = false;
int ret;
if (t.type == TARGET_GROUP) {
@@ -2008,7 +2015,9 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
disk_label = t.group + 1; /* 0 == no label */
}
- h = __bch2_ec_stripe_head_get(trans, disk_label, algo, redundancy, watermark);
+ struct ec_stripe_head *h =
+ __bch2_ec_stripe_head_get(trans, disk_label, algo,
+ redundancy, req->watermark);
if (IS_ERR_OR_NULL(h))
return h;
@@ -2032,8 +2041,12 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
goto alloc_existing;
/* First, try to allocate a full stripe: */
- ret = new_stripe_alloc_buckets(trans, h, s, BCH_WATERMARK_stripe, NULL) ?:
+ enum bch_watermark saved_watermark = BCH_WATERMARK_stripe;
+ swap(req->watermark, saved_watermark);
+ ret = new_stripe_alloc_buckets(trans, req, h, s, NULL) ?:
__bch2_ec_stripe_head_reserve(trans, h, s);
+ swap(req->watermark, saved_watermark);
+
if (!ret)
goto allocate_buf;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
@@ -2051,8 +2064,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
goto err;
- if (watermark == BCH_WATERMARK_copygc) {
- ret = new_stripe_alloc_buckets(trans, h, s, watermark, NULL) ?:
+ if (req->watermark == BCH_WATERMARK_copygc) {
+ ret = new_stripe_alloc_buckets(trans, req, h, s, NULL) ?:
__bch2_ec_stripe_head_reserve(trans, h, s);
if (ret)
goto err;
@@ -2071,7 +2084,7 @@ alloc_existing:
* Retry allocating buckets, with the watermark for this
* particular write:
*/
- ret = new_stripe_alloc_buckets(trans, h, s, watermark, cl);
+ ret = new_stripe_alloc_buckets(trans, req, h, s, cl);
if (ret)
goto err;
@@ -2093,23 +2106,17 @@ err:
/* device removal */
-static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_s_c k_a)
+int bch2_invalidate_stripe_to_dev(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ unsigned dev_idx,
+ unsigned flags)
{
- struct bch_alloc_v4 a_convert;
- const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k_a, &a_convert);
-
- if (!a->stripe)
+ if (k.k->type != KEY_TYPE_stripe)
return 0;
- if (a->stripe_sectors) {
- bch_err(trans->c, "trying to invalidate device in stripe when bucket has stripe data");
- return -BCH_ERR_invalidate_stripe_to_dev;
- }
-
- struct btree_iter iter;
struct bkey_i_stripe *s =
- bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe),
- BTREE_ITER_slots, stripe);
+ bch2_bkey_make_mut_typed(trans, iter, &k, 0, stripe);
int ret = PTR_ERR_OR_ZERO(s);
if (ret)
return ret;
@@ -2126,36 +2133,79 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_
acc.replicas.data_type = BCH_DATA_user;
ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
if (ret)
- goto err;
+ return ret;
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(&s->k_i));
- bkey_for_each_ptr(ptrs, ptr)
- if (ptr->dev == k_a.k->p.inode)
+
+ /* XXX: how much redundancy do we still have? check degraded flags */
+
+ unsigned nr_good = 0;
+
+ rcu_read_lock();
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (ptr->dev == dev_idx)
ptr->dev = BCH_SB_MEMBER_INVALID;
+ struct bch_dev *ca = bch2_dev_rcu(trans->c, ptr->dev);
+ nr_good += ca && ca->mi.state != BCH_MEMBER_STATE_failed;
+ }
+ rcu_read_unlock();
+
+ if (nr_good < s->v.nr_blocks && !(flags & BCH_FORCE_IF_DATA_DEGRADED))
+ return -BCH_ERR_remove_would_lose_data;
+
+ unsigned nr_data = s->v.nr_blocks - s->v.nr_redundant;
+
+ if (nr_good < nr_data && !(flags & BCH_FORCE_IF_DATA_LOST))
+ return -BCH_ERR_remove_would_lose_data;
+
sectors = -sectors;
memset(&acc, 0, sizeof(acc));
acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
acc.replicas.data_type = BCH_DATA_user;
- ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
+ return bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
+}
+
+static int bch2_invalidate_stripe_to_dev_from_alloc(struct btree_trans *trans, struct bkey_s_c k_a,
+ unsigned flags)
+{
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k_a, &a_convert);
+
+ if (!a->stripe)
+ return 0;
+
+ if (a->stripe_sectors) {
+ bch_err(trans->c, "trying to invalidate device in stripe when bucket has stripe data");
+ return -BCH_ERR_invalidate_stripe_to_dev;
+ }
+
+ struct btree_iter iter;
+ struct bkey_s_c_stripe s =
+ bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe),
+ BTREE_ITER_slots, stripe);
+ int ret = bkey_err(s);
if (ret)
- goto err;
-err:
+ return ret;
+
+ ret = bch2_invalidate_stripe_to_dev(trans, &iter, s.s_c, k_a.k->p.inode, flags);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
-int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx)
+int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags)
{
- return bch2_trans_run(c,
+ int ret = bch2_trans_run(c,
for_each_btree_key_max_commit(trans, iter,
BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX),
BTREE_ITER_intent, k,
NULL, NULL, 0, ({
- bch2_invalidate_stripe_to_dev(trans, k);
+ bch2_invalidate_stripe_to_dev_from_alloc(trans, k, flags);
})));
+ bch_err_fn(c, ret);
+ return ret;
}
/* startup/shutdown */
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 51893e1ee874..548048adf0d5 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -255,9 +255,10 @@ void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *, int);
int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
+
+struct alloc_request;
struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *,
- unsigned, unsigned, unsigned,
- enum bch_watermark, struct closure *);
+ struct alloc_request *, unsigned, struct closure *);
void bch2_do_stripe_deletes(struct bch_fs *);
void bch2_ec_do_stripe_creates(struct bch_fs *);
@@ -287,7 +288,9 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
}
}
-int bch2_dev_remove_stripes(struct bch_fs *, unsigned);
+int bch2_invalidate_stripe_to_dev(struct btree_trans *, struct btree_iter *,
+ struct bkey_s_c, unsigned, unsigned);
+int bch2_dev_remove_stripes(struct bch_fs *, unsigned, unsigned);
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
void bch2_fs_ec_stop(struct bch_fs *);
diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h
index 06144bfd9c19..809446c78951 100644
--- a/fs/bcachefs/ec_types.h
+++ b/fs/bcachefs/ec_types.h
@@ -4,9 +4,10 @@
#include "bcachefs_format.h"
-struct bch_replicas_padded {
+union bch_replicas_padded {
+ u8 bytes[struct_size_t(struct bch_replicas_entry_v1,
+ devs, BCH_BKEY_PTRS_MAX)];
struct bch_replicas_entry_v1 e;
- u8 pad[BCH_BKEY_PTRS_MAX];
};
struct stripe {
@@ -28,7 +29,7 @@ struct gc_stripe {
u16 block_sectors[BCH_BKEY_PTRS_MAX];
struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX];
- struct bch_replicas_padded r;
+ union bch_replicas_padded r;
};
#endif /* _BCACHEFS_EC_TYPES_H */
diff --git a/fs/bcachefs/enumerated_ref.c b/fs/bcachefs/enumerated_ref.c
new file mode 100644
index 000000000000..56ab430f209f
--- /dev/null
+++ b/fs/bcachefs/enumerated_ref.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "enumerated_ref.h"
+#include "util.h"
+
+#include <linux/completion.h>
+
+#ifdef ENUMERATED_REF_DEBUG
+void enumerated_ref_get(struct enumerated_ref *ref, unsigned idx)
+{
+ BUG_ON(idx >= ref->nr);
+ atomic_long_inc(&ref->refs[idx]);
+}
+
+bool __enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
+{
+ BUG_ON(idx >= ref->nr);
+ return atomic_long_inc_not_zero(&ref->refs[idx]);
+}
+
+bool enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
+{
+ BUG_ON(idx >= ref->nr);
+ return !ref->dying &&
+ atomic_long_inc_not_zero(&ref->refs[idx]);
+}
+
+void enumerated_ref_put(struct enumerated_ref *ref, unsigned idx)
+{
+ BUG_ON(idx >= ref->nr);
+ long v = atomic_long_dec_return(&ref->refs[idx]);
+
+ BUG_ON(v < 0);
+ if (v)
+ return;
+
+ for (unsigned i = 0; i < ref->nr; i++)
+ if (atomic_long_read(&ref->refs[i]))
+ return;
+
+ if (ref->stop_fn)
+ ref->stop_fn(ref);
+ complete(&ref->stop_complete);
+}
+#endif
+
+#ifndef ENUMERATED_REF_DEBUG
+static void enumerated_ref_kill_cb(struct percpu_ref *percpu_ref)
+{
+ struct enumerated_ref *ref =
+ container_of(percpu_ref, struct enumerated_ref, ref);
+
+ if (ref->stop_fn)
+ ref->stop_fn(ref);
+ complete(&ref->stop_complete);
+}
+#endif
+
+void enumerated_ref_stop_async(struct enumerated_ref *ref)
+{
+ reinit_completion(&ref->stop_complete);
+
+#ifndef ENUMERATED_REF_DEBUG
+ percpu_ref_kill(&ref->ref);
+#else
+ ref->dying = true;
+ for (unsigned i = 0; i < ref->nr; i++)
+ enumerated_ref_put(ref, i);
+#endif
+}
+
+void enumerated_ref_stop(struct enumerated_ref *ref,
+ const char * const names[])
+{
+ enumerated_ref_stop_async(ref);
+ while (!wait_for_completion_timeout(&ref->stop_complete, HZ * 10)) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "Waited for 10 seconds to shutdown enumerated ref\n");
+ prt_str(&buf, "Outstanding refs:\n");
+ enumerated_ref_to_text(&buf, ref, names);
+ printk(KERN_ERR "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+}
+
+void enumerated_ref_start(struct enumerated_ref *ref)
+{
+#ifndef ENUMERATED_REF_DEBUG
+ percpu_ref_reinit(&ref->ref);
+#else
+ ref->dying = false;
+ for (unsigned i = 0; i < ref->nr; i++) {
+ BUG_ON(atomic_long_read(&ref->refs[i]));
+ atomic_long_inc(&ref->refs[i]);
+ }
+#endif
+}
+
+void enumerated_ref_exit(struct enumerated_ref *ref)
+{
+#ifndef ENUMERATED_REF_DEBUG
+ percpu_ref_exit(&ref->ref);
+#else
+ kfree(ref->refs);
+ ref->refs = NULL;
+ ref->nr = 0;
+#endif
+}
+
+int enumerated_ref_init(struct enumerated_ref *ref, unsigned nr,
+ void (*stop_fn)(struct enumerated_ref *))
+{
+ init_completion(&ref->stop_complete);
+ ref->stop_fn = stop_fn;
+
+#ifndef ENUMERATED_REF_DEBUG
+ return percpu_ref_init(&ref->ref, enumerated_ref_kill_cb,
+ PERCPU_REF_INIT_DEAD, GFP_KERNEL);
+#else
+ ref->refs = kzalloc(sizeof(ref->refs[0]) * nr, GFP_KERNEL);
+ if (!ref->refs)
+ return -ENOMEM;
+
+ ref->nr = nr;
+ return 0;
+#endif
+}
+
+void enumerated_ref_to_text(struct printbuf *out,
+ struct enumerated_ref *ref,
+ const char * const names[])
+{
+#ifdef ENUMERATED_REF_DEBUG
+ bch2_printbuf_tabstop_push(out, 32);
+
+ for (unsigned i = 0; i < ref->nr; i++)
+ prt_printf(out, "%s\t%li\n", names[i],
+ atomic_long_read(&ref->refs[i]));
+#else
+ prt_str(out, "(not in debug mode)\n");
+#endif
+}
diff --git a/fs/bcachefs/enumerated_ref.h b/fs/bcachefs/enumerated_ref.h
new file mode 100644
index 000000000000..ec01cf59ef80
--- /dev/null
+++ b/fs/bcachefs/enumerated_ref.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ENUMERATED_REF_H
+#define _BCACHEFS_ENUMERATED_REF_H
+
+#include "enumerated_ref_types.h"
+
+/*
+ * A refcount where the users are enumerated: in debug mode, we create sepate
+ * refcounts for each user, to make leaks and refcount errors easy to track
+ * down:
+ */
+
+#ifdef ENUMERATED_REF_DEBUG
+void enumerated_ref_get(struct enumerated_ref *, unsigned);
+bool __enumerated_ref_tryget(struct enumerated_ref *, unsigned);
+bool enumerated_ref_tryget(struct enumerated_ref *, unsigned);
+void enumerated_ref_put(struct enumerated_ref *, unsigned);
+#else
+
+static inline void enumerated_ref_get(struct enumerated_ref *ref, unsigned idx)
+{
+ percpu_ref_get(&ref->ref);
+}
+
+static inline bool __enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
+{
+ return percpu_ref_tryget(&ref->ref);
+}
+
+static inline bool enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
+{
+ return percpu_ref_tryget_live(&ref->ref);
+}
+
+static inline void enumerated_ref_put(struct enumerated_ref *ref, unsigned idx)
+{
+ percpu_ref_put(&ref->ref);
+}
+#endif
+
+static inline bool enumerated_ref_is_zero(struct enumerated_ref *ref)
+{
+#ifndef ENUMERATED_REF_DEBUG
+ return percpu_ref_is_zero(&ref->ref);
+#else
+ for (unsigned i = 0; i < ref->nr; i++)
+ if (atomic_long_read(&ref->refs[i]))
+ return false;
+ return true;
+#endif
+}
+
+void enumerated_ref_stop_async(struct enumerated_ref *);
+void enumerated_ref_stop(struct enumerated_ref *, const char * const[]);
+void enumerated_ref_start(struct enumerated_ref *);
+
+void enumerated_ref_exit(struct enumerated_ref *);
+int enumerated_ref_init(struct enumerated_ref *, unsigned,
+ void (*stop_fn)(struct enumerated_ref *));
+
+struct printbuf;
+void enumerated_ref_to_text(struct printbuf *,
+ struct enumerated_ref *,
+ const char * const[]);
+
+#endif /* _BCACHEFS_ENUMERATED_REF_H */
diff --git a/fs/bcachefs/enumerated_ref_types.h b/fs/bcachefs/enumerated_ref_types.h
new file mode 100644
index 000000000000..0e6076f466d3
--- /dev/null
+++ b/fs/bcachefs/enumerated_ref_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ENUMERATED_REF_TYPES_H
+#define _BCACHEFS_ENUMERATED_REF_TYPES_H
+
+#include <linux/percpu-refcount.h>
+
+struct enumerated_ref {
+#ifdef ENUMERATED_REF_DEBUG
+ unsigned nr;
+ bool dying;
+ atomic_long_t *refs;
+#else
+ struct percpu_ref ref;
+#endif
+ void (*stop_fn)(struct enumerated_ref *);
+ struct completion stop_complete;
+};
+
+#endif /* _BCACHEFS_ENUMERATED_REF_TYPES_H */
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index d9ebffa5b3a2..62843e772b2c 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -53,6 +53,7 @@
x(ENOMEM, ENOMEM_dio_write_bioset_init) \
x(ENOMEM, ENOMEM_nocow_flush_bioset_init) \
x(ENOMEM, ENOMEM_promote_table_init) \
+ x(ENOMEM, ENOMEM_async_obj_init) \
x(ENOMEM, ENOMEM_compression_bounce_read_init) \
x(ENOMEM, ENOMEM_compression_bounce_write_init) \
x(ENOMEM, ENOMEM_compression_workspace_init) \
@@ -174,6 +175,7 @@
x(0, backpointer_to_overwritten_btree_node) \
x(0, journal_reclaim_would_deadlock) \
x(EINVAL, fsck) \
+ x(BCH_ERR_fsck, fsck_ask) \
x(BCH_ERR_fsck, fsck_fix) \
x(BCH_ERR_fsck, fsck_delete_bkey) \
x(BCH_ERR_fsck, fsck_ignore) \
@@ -181,7 +183,6 @@
x(BCH_ERR_fsck, fsck_repair_unimplemented) \
x(BCH_ERR_fsck, fsck_repair_impossible) \
x(EINVAL, restart_recovery) \
- x(EINVAL, not_in_recovery) \
x(EINVAL, cannot_rewind_recovery) \
x(0, data_update_done) \
x(BCH_ERR_data_update_done, data_update_done_would_block) \
@@ -201,6 +202,7 @@
x(EINVAL, device_has_been_removed) \
x(EINVAL, device_splitbrain) \
x(EINVAL, device_already_online) \
+ x(EINVAL, filesystem_uuid_already_open) \
x(EINVAL, insufficient_devices_to_start) \
x(EINVAL, invalid) \
x(EINVAL, internal_fsck_err) \
@@ -211,6 +213,7 @@
x(EINVAL, inode_unpack_error) \
x(EINVAL, varint_decode_error) \
x(EINVAL, erasure_coding_found_btree_node) \
+ x(EINVAL, option_negative) \
x(EOPNOTSUPP, may_not_use_incompat_feature) \
x(EROFS, erofs_trans_commit) \
x(EROFS, erofs_no_writes) \
@@ -219,6 +222,8 @@
x(EROFS, erofs_unfixed_errors) \
x(EROFS, erofs_norecovery) \
x(EROFS, erofs_nochanges) \
+ x(EROFS, erofs_no_alloc_info) \
+ x(EROFS, erofs_filesystem_full) \
x(EROFS, insufficient_devices) \
x(0, operation_blocked) \
x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 6b8695b1349c..c2cad28635bf 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -11,12 +11,12 @@
#define FSCK_ERR_RATELIMIT_NR 10
-void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out)
+void __bch2_log_msg_start(const char *fs_or_dev_name, struct printbuf *out)
{
printbuf_indent_add_nextline(out, 2);
#ifdef BCACHEFS_LOG_PREFIX
- prt_printf(out, bch2_log_msg(c, ""));
+ prt_printf(out, "bcachefs (%s): ", fs_or_dev_name);
#endif
}
@@ -29,12 +29,10 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out)
return false;
case BCH_ON_ERROR_fix_safe:
case BCH_ON_ERROR_ro:
- if (bch2_fs_emergency_read_only(c))
- prt_printf(out, "inconsistency detected - emergency read only at journal seq %llu\n",
- journal_cur_seq(&c->journal));
+ bch2_fs_emergency_read_only2(c, out);
return true;
case BCH_ON_ERROR_panic:
- bch2_print_string_as_lines_nonblocking(KERN_ERR, out->buf);
+ bch2_print_str(c, KERN_ERR, out->buf);
panic(bch2_fmt(c, "panic after error"));
return true;
default:
@@ -71,7 +69,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra
if (trans)
bch2_trans_updates_to_text(&buf, trans);
bool ret = __bch2_inconsistent_error(c, &buf);
- bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf);
+ bch2_print_str_nonblocking(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
return ret;
@@ -100,11 +98,11 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
prt_printf(out, "btree topology error: ");
set_bit(BCH_FS_topology_error, &c->flags);
- if (!test_bit(BCH_FS_recovery_running, &c->flags)) {
+ if (!test_bit(BCH_FS_in_recovery, &c->flags)) {
__bch2_inconsistent_error(c, out);
return -BCH_ERR_btree_need_topology_repair;
} else {
- return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?:
+ return bch2_run_explicit_recovery_pass(c, out, BCH_RECOVERY_PASS_check_topology, 0) ?:
-BCH_ERR_btree_node_read_validate_error;
}
}
@@ -121,7 +119,7 @@ int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...)
va_end(args);
int ret = __bch2_topology_error(c, &buf);
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
return ret;
@@ -151,14 +149,17 @@ void bch2_io_error_work(struct work_struct *work)
bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
BCH_FORCE_IF_DEGRADED);
+ struct printbuf buf = PRINTBUF;
+ __bch2_log_msg_start(ca->name, &buf);
- bch_err(ca,
- "writes erroring for %u seconds, setting %s ro",
+ prt_printf(&buf, "writes erroring for %u seconds, setting %s ro",
c->opts.write_error_timeout,
dev ? "device" : "filesystem");
if (!dev)
- bch2_fs_emergency_read_only(c);
+ bch2_fs_emergency_read_only2(c, &buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
}
out:
up_write(&c->state_lock);
@@ -328,7 +329,7 @@ static int do_fsck_ask_yn(struct bch_fs *c,
if (bch2_fs_stdio_redirect(c))
bch2_print(c, "%s", question->buf);
else
- bch2_print_string_as_lines(KERN_ERR, question->buf);
+ bch2_print_str(c, KERN_ERR, question->buf);
int ask = bch2_fsck_ask_yn(c, trans);
@@ -376,15 +377,63 @@ static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c,
return s;
}
-void __bch2_count_fsck_err(struct bch_fs *c,
- enum bch_sb_error_id id, const char *msg,
- bool *repeat, bool *print, bool *suppress)
+bool __bch2_count_fsck_err(struct bch_fs *c,
+ enum bch_sb_error_id id, struct printbuf *msg)
{
bch2_sb_error_count(c, id);
mutex_lock(&c->fsck_error_msgs_lock);
- count_fsck_err_locked(c, id, msg, repeat, print, suppress);
+ bool print = true, repeat = false, suppress = false;
+
+ count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress);
mutex_unlock(&c->fsck_error_msgs_lock);
+
+ if (suppress)
+ prt_printf(msg, "Ratelimiting new instances of previous error\n");
+
+ return print && !repeat;
+}
+
+int bch2_fsck_err_opt(struct bch_fs *c,
+ enum bch_fsck_flags flags,
+ enum bch_sb_error_id err)
+{
+ if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
+ flags |= fsck_flags_extra[err];
+
+ if (test_bit(BCH_FS_in_fsck, &c->flags)) {
+ if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE)))
+ return -BCH_ERR_fsck_repair_unimplemented;
+
+ switch (c->opts.fix_errors) {
+ case FSCK_FIX_exit:
+ return -BCH_ERR_fsck_errors_not_fixed;
+ case FSCK_FIX_yes:
+ if (flags & FSCK_CAN_FIX)
+ return -BCH_ERR_fsck_fix;
+ fallthrough;
+ case FSCK_FIX_no:
+ if (flags & FSCK_CAN_IGNORE)
+ return -BCH_ERR_fsck_ignore;
+ return -BCH_ERR_fsck_errors_not_fixed;
+ case FSCK_FIX_ask:
+ if (flags & FSCK_AUTOFIX)
+ return -BCH_ERR_fsck_fix;
+ return -BCH_ERR_fsck_ask;
+ default:
+ BUG();
+ }
+ } else {
+ if ((flags & FSCK_AUTOFIX) &&
+ (c->opts.errors == BCH_ON_ERROR_continue ||
+ c->opts.errors == BCH_ON_ERROR_fix_safe))
+ return -BCH_ERR_fsck_fix;
+
+ if (c->opts.errors == BCH_ON_ERROR_continue &&
+ (flags & FSCK_CAN_IGNORE))
+ return -BCH_ERR_fsck_ignore;
+ return -BCH_ERR_fsck_errors_not_fixed;
+ }
}
int __bch2_fsck_err(struct bch_fs *c,
@@ -475,7 +524,7 @@ int __bch2_fsck_err(struct bch_fs *c,
}
goto print;
- } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
+ } else if (!test_bit(BCH_FS_in_fsck, &c->flags)) {
if (c->opts.errors != BCH_ON_ERROR_continue ||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
prt_str_indented(out, ", shutting down\n"
@@ -534,7 +583,7 @@ int __bch2_fsck_err(struct bch_fs *c,
!(flags & FSCK_CAN_IGNORE)))
ret = -BCH_ERR_fsck_errors_not_fixed;
- if (test_bit(BCH_FS_fsck_running, &c->flags) &&
+ if (test_bit(BCH_FS_in_fsck, &c->flags) &&
(ret != -BCH_ERR_fsck_fix &&
ret != -BCH_ERR_fsck_ignore)) {
exiting = true;
@@ -559,7 +608,7 @@ print:
if (bch2_fs_stdio_redirect(c))
bch2_print(c, "%s", out->buf);
else
- bch2_print_string_as_lines(KERN_ERR, out->buf);
+ bch2_print_str(c, KERN_ERR, out->buf);
}
if (s)
@@ -693,25 +742,9 @@ void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out,
int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
struct bpos pos)
{
- struct bch_fs *c = trans->c;
- int ret = 0;
-
- if (!bch2_snapshot_is_leaf(c, pos.snapshot))
- prt_str(out, "(multiple snapshots) ");
-
- subvol_inum inum = {
- .subvol = bch2_snapshot_tree_oldest_subvol(c, pos.snapshot),
- .inum = pos.inode,
- };
-
- if (inum.subvol) {
- ret = bch2_inum_to_path(trans, inum, out);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- return ret;
- }
-
- if (!inum.subvol || ret)
- prt_printf(out, "inum %llu:%u", pos.inode, pos.snapshot);
+ int ret = bch2_inum_snapshot_to_path(trans, pos.inode, pos.snapshot, NULL, out);
+ if (ret)
+ return ret;
prt_printf(out, " offset %llu: ", pos.offset << 8);
return 0;
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 4a364fd44abe..5123d4c86770 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -18,7 +18,12 @@ struct work_struct;
/* Error messages: */
-void bch2_log_msg_start(struct bch_fs *, struct printbuf *);
+void __bch2_log_msg_start(const char *, struct printbuf *);
+
+static inline void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out)
+{
+ __bch2_log_msg_start(c->name, out);
+}
/*
* Inconsistency errors: The on disk data is inconsistent. If these occur during
@@ -76,12 +81,14 @@ struct fsck_err_state {
#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
-void __bch2_count_fsck_err(struct bch_fs *,
- enum bch_sb_error_id, const char *,
- bool *, bool *, bool *);
+bool __bch2_count_fsck_err(struct bch_fs *, enum bch_sb_error_id, struct printbuf *);
#define bch2_count_fsck_err(_c, _err, ...) \
__bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__)
+int bch2_fsck_err_opt(struct bch_fs *,
+ enum bch_fsck_flags,
+ enum bch_sb_error_id);
+
__printf(5, 6) __cold
int __bch2_fsck_err(struct bch_fs *, struct btree_trans *,
enum bch_fsck_flags,
diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c
index 6bb42985306e..b899ee75f5b9 100644
--- a/fs/bcachefs/extent_update.c
+++ b/fs/bcachefs/extent_update.c
@@ -37,16 +37,17 @@ static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
return lru + ret * 2;
}
+#define EXTENT_ITERS_MAX 64
+
static int count_iters_for_insert(struct btree_trans *trans,
struct bkey_s_c k,
unsigned offset,
struct bpos *end,
- unsigned *nr_iters,
- unsigned max_iters)
+ unsigned *nr_iters)
{
int ret = 0, ret2 = 0;
- if (*nr_iters >= max_iters) {
+ if (*nr_iters >= EXTENT_ITERS_MAX) {
*end = bpos_min(*end, k.k->p);
ret = 1;
}
@@ -56,7 +57,7 @@ static int count_iters_for_insert(struct btree_trans *trans,
case KEY_TYPE_reflink_v:
*nr_iters += bch2_bkey_nr_alloc_ptrs(k);
- if (*nr_iters >= max_iters) {
+ if (*nr_iters >= EXTENT_ITERS_MAX) {
*end = bpos_min(*end, k.k->p);
ret = 1;
}
@@ -81,7 +82,7 @@ static int count_iters_for_insert(struct btree_trans *trans,
*nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k);
- if (*nr_iters >= max_iters) {
+ if (*nr_iters >= EXTENT_ITERS_MAX) {
struct bpos pos = bkey_start_pos(k.k);
pos.offset += min_t(u64, k.k->size,
r_k.k->p.offset - idx);
@@ -100,59 +101,31 @@ static int count_iters_for_insert(struct btree_trans *trans,
return ret2 ?: ret;
}
-#define EXTENT_ITERS_MAX (BTREE_ITER_INITIAL / 3)
-
int bch2_extent_atomic_end(struct btree_trans *trans,
struct btree_iter *iter,
- struct bkey_i *insert,
struct bpos *end)
{
- struct btree_iter copy;
- struct bkey_s_c k;
unsigned nr_iters = 0;
- int ret;
-
- ret = bch2_btree_iter_traverse(trans, iter);
- if (ret)
- return ret;
-
- *end = insert->k.p;
-
- /* extent_update_to_keys(): */
- nr_iters += 1;
-
- ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end,
- &nr_iters, EXTENT_ITERS_MAX / 2);
- if (ret < 0)
- return ret;
+ struct btree_iter copy;
bch2_trans_copy_iter(trans, &copy, iter);
- for_each_btree_key_max_continue_norestart(trans, copy, insert->k.p, 0, k, ret) {
- unsigned offset = 0;
+ int ret = bch2_btree_iter_traverse(trans, &copy);
+ if (ret)
+ goto err;
- if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k)))
- offset = bkey_start_offset(&insert->k) -
- bkey_start_offset(k.k);
+ struct bkey_s_c k;
+ for_each_btree_key_max_continue_norestart(trans, copy, *end, 0, k, ret) {
+ unsigned offset = 0;
- /* extent_handle_overwrites(): */
- switch (bch2_extent_overlap(&insert->k, k.k)) {
- case BCH_EXTENT_OVERLAP_ALL:
- case BCH_EXTENT_OVERLAP_FRONT:
- nr_iters += 1;
- break;
- case BCH_EXTENT_OVERLAP_BACK:
- case BCH_EXTENT_OVERLAP_MIDDLE:
- nr_iters += 2;
- break;
- }
+ if (bkey_gt(iter->pos, bkey_start_pos(k.k)))
+ offset = iter->pos.offset - bkey_start_offset(k.k);
- ret = count_iters_for_insert(trans, k, offset, end,
- &nr_iters, EXTENT_ITERS_MAX);
+ ret = count_iters_for_insert(trans, k, offset, end, &nr_iters);
if (ret)
break;
}
-
+err:
bch2_trans_iter_exit(trans, &copy);
return ret < 0 ? ret : 0;
}
@@ -161,10 +134,8 @@ int bch2_extent_trim_atomic(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *k)
{
- struct bpos end;
- int ret;
-
- ret = bch2_extent_atomic_end(trans, iter, k, &end);
+ struct bpos end = k->k.p;
+ int ret = bch2_extent_atomic_end(trans, iter, &end);
if (ret)
return ret;
diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h
index 6f5cf449361a..34467db53f45 100644
--- a/fs/bcachefs/extent_update.h
+++ b/fs/bcachefs/extent_update.h
@@ -5,7 +5,7 @@
#include "bcachefs.h"
int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *,
- struct bkey_i *, struct bpos *);
+ struct bpos *);
int bch2_extent_trim_atomic(struct btree_trans *, struct btree_iter *,
struct bkey_i *);
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index e597fb9c9823..1ac9897f189d 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -45,6 +45,49 @@ static void bch2_extent_crc_pack(union bch_extent_crc *,
struct bch_extent_crc_unpacked,
enum bch_extent_entry_type);
+void bch2_io_failures_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_io_failures *failed)
+{
+ static const char * const error_types[] = {
+ "io", "checksum", "ec reconstruct", NULL
+ };
+
+ for (struct bch_dev_io_failures *f = failed->devs;
+ f < failed->devs + failed->nr;
+ f++) {
+ unsigned errflags =
+ ((!!f->failed_io) << 0) |
+ ((!!f->failed_csum_nr) << 1) |
+ ((!!f->failed_ec) << 2);
+
+ if (!errflags)
+ continue;
+
+ bch2_printbuf_make_room(out, 1024);
+ rcu_read_lock();
+ out->atomic++;
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev);
+ if (ca)
+ prt_str(out, ca->name);
+ else
+ prt_printf(out, "(invalid device %u)", f->dev);
+ --out->atomic;
+ rcu_read_unlock();
+
+ prt_char(out, ' ');
+
+ if (is_power_of_2(errflags)) {
+ prt_bitflags(out, error_types, errflags);
+ prt_str(out, " error");
+ } else {
+ prt_str(out, "errors: ");
+ prt_bitflags(out, error_types, errflags);
+ }
+ prt_newline(out);
+ }
+}
+
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f,
unsigned dev)
{
@@ -79,6 +122,22 @@ void bch2_mark_io_failure(struct bch_io_failures *failed,
f->failed_csum_nr++;
}
+void bch2_mark_btree_validate_failure(struct bch_io_failures *failed,
+ unsigned dev)
+{
+ struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, dev);
+
+ if (!f) {
+ BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
+
+ f = &failed->devs[failed->nr++];
+ memset(f, 0, sizeof(*f));
+ f->dev = dev;
+ }
+
+ f->failed_btree_validate = true;
+}
+
static inline u64 dev_latency(struct bch_dev *ca)
{
return ca ? atomic64_read(&ca->cur_latency[READ]) : S64_MAX;
@@ -105,7 +164,7 @@ static inline bool ptr_better(struct bch_fs *c,
if (unlikely(failed_delta))
return failed_delta < 0;
- if (unlikely(bch2_force_reconstruct_read))
+ if (static_branch_unlikely(&bch2_force_reconstruct_read))
return p1.do_ec_reconstruct > p2.do_ec_reconstruct;
if (unlikely(p1.do_ec_reconstruct || p2.do_ec_reconstruct))
@@ -136,12 +195,8 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (k.k->type == KEY_TYPE_error)
return -BCH_ERR_key_type_error;
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-
- if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
- return -BCH_ERR_extent_poisoned;
-
rcu_read_lock();
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
u64 pick_latency;
@@ -162,7 +217,15 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (dev >= 0 && p.ptr.dev != dev)
continue;
- struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
+
+ if (unlikely(!ca && p.ptr.dev != BCH_SB_MEMBER_INVALID)) {
+ rcu_read_unlock();
+ int ret = bch2_dev_missing_bkey(c, k, p.ptr.dev);
+ if (ret)
+ return ret;
+ rcu_read_lock();
+ }
if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr)))
continue;
@@ -175,6 +238,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) {
have_io_errors |= f->failed_io;
+ have_io_errors |= f->failed_btree_validate;
have_io_errors |= f->failed_ec;
}
have_csum_errors |= !!f->failed_csum_nr;
@@ -182,6 +246,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (p.has_ec && (f->failed_io || f->failed_csum_nr))
p.do_ec_reconstruct = true;
else if (f->failed_io ||
+ f->failed_btree_validate ||
f->failed_csum_nr > c->opts.checksum_err_retry_nr)
continue;
}
@@ -194,7 +259,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
p.do_ec_reconstruct = true;
}
- if (bch2_force_reconstruct_read && p.has_ec)
+ if (static_branch_unlikely(&bch2_force_reconstruct_read) && p.has_ec)
p.do_ec_reconstruct = true;
u64 p_latency = dev_latency(ca);
@@ -1071,33 +1136,50 @@ void bch2_extent_ptr_set_cached(struct bch_fs *c,
struct bkey_s k,
struct bch_extent_ptr *ptr)
{
- struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+ struct bkey_ptrs ptrs;
union bch_extent_entry *entry;
struct extent_ptr_decoded p;
+ bool have_cached_ptr;
+ unsigned drop_dev = ptr->dev;
rcu_read_lock();
- if (!want_cached_ptr(c, opts, ptr)) {
- bch2_bkey_drop_ptr_noerror(k, ptr);
- goto out;
- }
+restart_drop_ptrs:
+ ptrs = bch2_bkey_ptrs(k);
+ have_cached_ptr = false;
- /*
- * Stripes can't contain cached data, for - reasons.
- *
- * Possibly something we can fix in the future?
- */
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (&entry->ptr == ptr) {
- if (p.has_ec)
- bch2_bkey_drop_ptr_noerror(k, ptr);
- else
- ptr->cached = true;
- goto out;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ /*
+ * Check if it's erasure coded - stripes can't contain cached
+ * data. Possibly something we can fix in the future?
+ */
+ if (&entry->ptr == ptr && p.has_ec)
+ goto drop;
+
+ if (p.ptr.cached) {
+ if (have_cached_ptr || !want_cached_ptr(c, opts, &p.ptr)) {
+ bch2_bkey_drop_ptr_noerror(k, &entry->ptr);
+ ptr = NULL;
+ goto restart_drop_ptrs;
+ }
+
+ have_cached_ptr = true;
}
+ }
+
+ if (!ptr)
+ bkey_for_each_ptr(ptrs, ptr2)
+ if (ptr2->dev == drop_dev)
+ ptr = ptr2;
- BUG();
-out:
+ if (have_cached_ptr || !want_cached_ptr(c, opts, ptr))
+ goto drop;
+
+ ptr->cached = true;
+ rcu_read_unlock();
+ return;
+drop:
rcu_read_unlock();
+ bch2_bkey_drop_ptr_noerror(k, ptr);
}
/*
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index e78a39e7e18f..b8590e51b76e 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -380,13 +380,6 @@ out: \
/* Iterate over pointers in KEY_TYPE_extent: */
-#define extent_for_each_entry_from(_e, _entry, _start) \
- __bkey_extent_entry_for_each_from(_start, \
- extent_entry_last(_e), _entry)
-
-#define extent_for_each_entry(_e, _entry) \
- extent_for_each_entry_from(_e, _entry, (_e).v->start)
-
#define extent_ptr_next(_e, _ptr) \
__bkey_ptr_next(_ptr, extent_entry_last(_e))
@@ -399,10 +392,13 @@ out: \
/* utility code common to all keys with pointers: */
+void bch2_io_failures_to_text(struct printbuf *, struct bch_fs *,
+ struct bch_io_failures *);
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *,
unsigned);
void bch2_mark_io_failure(struct bch_io_failures *,
struct extent_ptr_decoded *, bool);
+void bch2_mark_btree_validate_failure(struct bch_io_failures *, unsigned);
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
struct bch_io_failures *,
struct extent_ptr_decoded *, int);
diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h
index e51529dca4c2..b23ce4a373c0 100644
--- a/fs/bcachefs/extents_types.h
+++ b/fs/bcachefs/extents_types.h
@@ -34,6 +34,7 @@ struct bch_io_failures {
u8 dev;
unsigned failed_csum_nr:6,
failed_io:1,
+ failed_btree_validate:1,
failed_ec:1;
} devs[BCH_REPLICAS_MAX + 1];
};
diff --git a/fs/bcachefs/fast_list.c b/fs/bcachefs/fast_list.c
new file mode 100644
index 000000000000..2faec143eb31
--- /dev/null
+++ b/fs/bcachefs/fast_list.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Fast, unordered lists
+ *
+ * Supports add, remove, and iterate
+ *
+ * Underneath, they're a radix tree and an IDA, with a percpu buffer for slot
+ * allocation and freeing.
+ *
+ * This means that adding, removing, and iterating over items is lockless,
+ * except when refilling/emptying the percpu slot buffers.
+ */
+
+#include "fast_list.h"
+
+struct fast_list_pcpu {
+ u32 nr;
+ u32 entries[31];
+};
+
+static int fast_list_alloc_idx(struct fast_list *l, gfp_t gfp)
+{
+ int idx = ida_alloc_range(&l->slots_allocated, 1, INT_MAX, gfp);
+ if (unlikely(idx < 0))
+ return 0;
+
+ if (unlikely(!genradix_ptr_alloc_inlined(&l->items, idx, gfp))) {
+ ida_free(&l->slots_allocated, idx);
+ return 0;
+ }
+
+ return idx;
+}
+
+/**
+ * fast_list_get_idx - get a slot in a fast_list
+ * @l: list to get slot in
+ *
+ * This allocates a slot in the radix tree without storing to it, so that we can
+ * take the potential memory allocation failure early and do the list add later
+ * when we can't take an allocation failure.
+ *
+ * Returns: positive integer on success, -ENOMEM on failure
+ */
+int fast_list_get_idx(struct fast_list *l)
+{
+ unsigned long flags;
+ int idx;
+retry:
+ local_irq_save(flags);
+ struct fast_list_pcpu *lp = this_cpu_ptr(l->buffer);
+
+ if (unlikely(!lp->nr)) {
+ u32 entries[16], nr = 0;
+
+ local_irq_restore(flags);
+ while (nr < ARRAY_SIZE(entries) &&
+ (idx = fast_list_alloc_idx(l, GFP_KERNEL)))
+ entries[nr++] = idx;
+ local_irq_save(flags);
+
+ lp = this_cpu_ptr(l->buffer);
+
+ while (nr && lp->nr < ARRAY_SIZE(lp->entries))
+ lp->entries[lp->nr++] = entries[--nr];
+
+ if (unlikely(nr)) {
+ local_irq_restore(flags);
+ while (nr)
+ ida_free(&l->slots_allocated, entries[--nr]);
+ goto retry;
+ }
+
+ if (unlikely(!lp->nr)) {
+ local_irq_restore(flags);
+ return -ENOMEM;
+ }
+ }
+
+ idx = lp->entries[--lp->nr];
+ local_irq_restore(flags);
+
+ return idx;
+}
+
+/**
+ * fast_list_add - add an item to a fast_list
+ * @l: list
+ * @item: item to add
+ *
+ * Allocates a slot in the radix tree and stores to it and then returns the
+ * slot index, which must be passed to fast_list_remove().
+ *
+ * Returns: positive integer on success, -ENOMEM on failure
+ */
+int fast_list_add(struct fast_list *l, void *item)
+{
+ int idx = fast_list_get_idx(l);
+ if (idx < 0)
+ return idx;
+
+ *genradix_ptr_inlined(&l->items, idx) = item;
+ return idx;
+}
+
+/**
+ * fast_list_remove - remove an item from a fast_list
+ * @l: list
+ * @idx: item's slot index
+ *
+ * Zeroes out the slot in the radix tree and frees the slot for future
+ * fast_list_add() operations.
+ */
+void fast_list_remove(struct fast_list *l, unsigned idx)
+{
+ u32 entries[16], nr = 0;
+ unsigned long flags;
+
+ if (!idx)
+ return;
+
+ *genradix_ptr_inlined(&l->items, idx) = NULL;
+
+ local_irq_save(flags);
+ struct fast_list_pcpu *lp = this_cpu_ptr(l->buffer);
+
+ if (unlikely(lp->nr == ARRAY_SIZE(lp->entries)))
+ while (nr < ARRAY_SIZE(entries))
+ entries[nr++] = lp->entries[--lp->nr];
+
+ lp->entries[lp->nr++] = idx;
+ local_irq_restore(flags);
+
+ if (unlikely(nr))
+ while (nr)
+ ida_free(&l->slots_allocated, entries[--nr]);
+}
+
+void fast_list_exit(struct fast_list *l)
+{
+ /* XXX: warn if list isn't empty */
+ free_percpu(l->buffer);
+ ida_destroy(&l->slots_allocated);
+ genradix_free(&l->items);
+}
+
+int fast_list_init(struct fast_list *l)
+{
+ genradix_init(&l->items);
+ ida_init(&l->slots_allocated);
+ l->buffer = alloc_percpu(*l->buffer);
+ if (!l->buffer)
+ return -ENOMEM;
+ return 0;
+}
diff --git a/fs/bcachefs/fast_list.h b/fs/bcachefs/fast_list.h
new file mode 100644
index 000000000000..73c9bf591fd6
--- /dev/null
+++ b/fs/bcachefs/fast_list.h
@@ -0,0 +1,41 @@
+#ifndef _LINUX_FAST_LIST_H
+#define _LINUX_FAST_LIST_H
+
+#include <linux/generic-radix-tree.h>
+#include <linux/idr.h>
+#include <linux/percpu.h>
+
+struct fast_list_pcpu;
+
+struct fast_list {
+ GENRADIX(void *) items;
+ struct ida slots_allocated;;
+ struct fast_list_pcpu __percpu
+ *buffer;
+};
+
+static inline void *fast_list_iter_peek(struct genradix_iter *iter,
+ struct fast_list *list)
+{
+ void **p;
+ while ((p = genradix_iter_peek(iter, &list->items)) && !*p)
+ genradix_iter_advance(iter, &list->items);
+
+ return p ? *p : NULL;
+}
+
+#define fast_list_for_each_from(_list, _iter, _i, _start) \
+ for (_iter = genradix_iter_init(&(_list)->items, _start); \
+ (_i = fast_list_iter_peek(&(_iter), _list)) != NULL; \
+ genradix_iter_advance(&(_iter), &(_list)->items))
+
+#define fast_list_for_each(_list, _iter, _i) \
+ fast_list_for_each_from(_list, _iter, _i, 0)
+
+int fast_list_get_idx(struct fast_list *l);
+int fast_list_add(struct fast_list *l, void *item);
+void fast_list_remove(struct fast_list *l, unsigned idx);
+void fast_list_exit(struct fast_list *l);
+int fast_list_init(struct fast_list *l);
+
+#endif /* _LINUX_FAST_LIST_H */
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 535bc5fcbcc0..1f5154d9676b 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "enumerated_ref.h"
#include "fs.h"
#include "fs-io.h"
#include "fs-io-direct.h"
@@ -401,7 +402,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
ret = dio->op.error ?: ((long) dio->written << 9);
bio_put(&dio->op.wbio.bio);
- bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_dio_write);
/* inode->i_dio_count is our ref on inode and thus bch_fs */
inode_dio_end(&inode->v);
@@ -606,7 +607,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
prefetch(&inode->ei_inode);
prefetch((void *) &inode->ei_inode + 64);
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_dio_write))
return -EROFS;
inode_lock(&inode->v);
@@ -675,7 +676,7 @@ err_put_bio:
bio_put(bio);
inode_dio_end(&inode->v);
err_put_write_ref:
- bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_dio_write);
goto out;
}
diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c
index e072900e6a5b..fbae9c1de746 100644
--- a/fs/bcachefs/fs-io-pagecache.c
+++ b/fs/bcachefs/fs-io-pagecache.c
@@ -605,10 +605,14 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
struct address_space *mapping = file->f_mapping;
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_folio_reservation res;
- unsigned len;
- loff_t isize;
vm_fault_t ret;
+ loff_t file_offset = round_down(vmf->pgoff << PAGE_SHIFT, block_bytes(c));
+ unsigned offset = file_offset - folio_pos(folio);
+ unsigned len = max(PAGE_SIZE, block_bytes(c));
+
+ BUG_ON(offset + len > folio_size(folio));
+
bch2_folio_reservation_init(c, inode, &res);
sb_start_pagefault(inode->v.i_sb);
@@ -623,24 +627,24 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
bch2_pagecache_add_get(inode);
folio_lock(folio);
- isize = i_size_read(&inode->v);
+ u64 isize = i_size_read(&inode->v);
- if (folio->mapping != mapping || folio_pos(folio) >= isize) {
+ if (folio->mapping != mapping || file_offset >= isize) {
folio_unlock(folio);
ret = VM_FAULT_NOPAGE;
goto out;
}
- len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
+ len = min_t(unsigned, len, isize - file_offset);
if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
- bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
+ bch2_folio_reservation_get(c, inode, folio, &res, offset, len)) {
folio_unlock(folio);
ret = VM_FAULT_SIGBUS;
goto out;
}
- bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
+ bch2_set_folio_dirty(c, inode, folio, &res, offset, len);
bch2_folio_reservation_put(c, inode, &res);
folio_wait_stable(folio);
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 9657144666b8..b1e9ee28fc0f 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -7,6 +7,7 @@
#include "btree_update.h"
#include "buckets.h"
#include "clock.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "extent_update.h"
@@ -48,7 +49,8 @@ static void nocow_flush_endio(struct bio *_bio)
struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio);
closure_put(bio->cl);
- percpu_ref_put(&bio->ca->io_ref[WRITE]);
+ enumerated_ref_put(&bio->ca->io_ref[WRITE],
+ BCH_DEV_WRITE_REF_nocow_flush);
bio_put(&bio->bio);
}
@@ -71,7 +73,8 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c,
for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) {
rcu_read_lock();
ca = rcu_dereference(c->devs[dev]);
- if (ca && !percpu_ref_tryget(&ca->io_ref[WRITE]))
+ if (ca && !enumerated_ref_tryget(&ca->io_ref[WRITE],
+ BCH_DEV_WRITE_REF_nocow_flush))
ca = NULL;
rcu_read_unlock();
@@ -151,10 +154,9 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
inode->v.i_ino, (u64) inode->v.i_blocks, sectors,
inode->ei_inode.bi_sectors);
- bool repeat = false, print = false, suppress = false;
- bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, buf.buf, &repeat, &print, &suppress);
+ bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &buf);
if (print)
- bch2_print_str(c, buf.buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
if (sectors < 0)
@@ -220,7 +222,7 @@ static int bch2_flush_inode(struct bch_fs *c,
if (c->opts.journal_flush_disabled)
return 0;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_fsync))
return -EROFS;
u64 seq;
@@ -228,7 +230,7 @@ static int bch2_flush_inode(struct bch_fs *c,
bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?:
bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?:
bch2_inode_flush_nocow_writes(c, inode);
- bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_fsync);
return ret;
}
@@ -526,11 +528,9 @@ int bchfs_truncate(struct mnt_idmap *idmap,
inode->v.i_ino, (u64) inode->v.i_blocks,
inode->ei_inode.bi_sectors);
- bool repeat = false, print = false, suppress = false;
- bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, buf.buf,
- &repeat, &print, &suppress);
+ bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &buf);
if (print)
- bch2_print_str(c, buf.buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
@@ -821,7 +821,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
long ret;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_fallocate))
return -EROFS;
inode_lock(&inode->v);
@@ -845,7 +845,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
err:
bch2_pagecache_block_put(inode);
inode_unlock(&inode->v);
- bch2_write_ref_put(c, BCH_WRITE_REF_fallocate);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_fallocate);
return bch2_err_class(ret);
}
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index a82dfce9e4ad..05361a793206 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -172,7 +172,10 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
if (get_user(flags, arg))
return -EFAULT;
- bch_notice(c, "shutdown by ioctl type %u", flags);
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "shutdown by ioctl type %u", flags);
switch (flags) {
case FSOP_GOING_FLAGS_DEFAULT:
@@ -180,20 +183,23 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
if (ret)
break;
bch2_journal_flush(&c->journal);
- bch2_fs_emergency_read_only(c);
+ bch2_fs_emergency_read_only2(c, &buf);
bdev_thaw(c->vfs_sb->s_bdev);
break;
case FSOP_GOING_FLAGS_LOGFLUSH:
bch2_journal_flush(&c->journal);
fallthrough;
case FSOP_GOING_FLAGS_NOLOGFLUSH:
- bch2_fs_emergency_read_only(c);
+ bch2_fs_emergency_read_only2(c, &buf);
break;
default:
ret = -EINVAL;
- break;
+ goto noprint;
}
+ bch2_print_str(c, KERN_ERR, buf.buf);
+noprint:
+ printbuf_exit(&buf);
return ret;
}
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index b6801861c66f..ddfe89d84966 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -191,11 +191,6 @@ int bch2_fs_quota_transfer(struct bch_fs *c,
return ret;
}
-static bool subvol_inum_eq(subvol_inum a, subvol_inum b)
-{
- return a.subvol == b.subvol && a.inum == b.inum;
-}
-
static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed)
{
const subvol_inum *inum = data;
@@ -352,9 +347,8 @@ repeat:
if (!trans) {
__wait_on_freeing_inode(c, inode, inum);
} else {
- bch2_trans_unlock(trans);
- __wait_on_freeing_inode(c, inode, inum);
- int ret = bch2_trans_relock(trans);
+ int ret = drop_locks_do(trans,
+ (__wait_on_freeing_inode(c, inode, inum), 0));
if (ret)
return ERR_PTR(ret);
}
@@ -1429,7 +1423,9 @@ static int bch2_next_fiemap_extent(struct btree_trans *trans,
if (ret)
goto err;
- ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur);
+ u64 pagecache_end = k.k ? max(start, bkey_start_offset(k.k)) : end;
+
+ ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, pagecache_end, cur);
if (ret)
goto err;
@@ -1662,33 +1658,9 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans,
return -EINVAL;
if (s->casefold != bch2_inode_casefold(c, bi)) {
-#ifdef CONFIG_UNICODE
- int ret = 0;
- /* Not supported on individual files. */
- if (!S_ISDIR(bi->bi_mode))
- return -EOPNOTSUPP;
-
- /*
- * Make sure the dir is empty, as otherwise we'd need to
- * rehash everything and update the dirent keys.
- */
- ret = bch2_empty_dir_trans(trans, inode_inum(inode));
- if (ret < 0)
- return ret;
-
- ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
+ int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->casefold);
if (ret)
return ret;
-
- bch2_check_set_feature(c, BCH_FEATURE_casefolding);
-
- bi->bi_casefold = s->casefold + 1;
- bi->bi_fields_set |= BIT(Inode_opt_casefold);
-
-#else
- printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
- return -EOPNOTSUPP;
-#endif
}
if (s->set_project) {
@@ -2350,12 +2322,14 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
struct bch_fs *c = root->d_sb->s_fs_info;
bool first = true;
- for_each_online_member(c, ca) {
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca) {
if (!first)
seq_putc(seq, ':');
first = false;
seq_puts(seq, ca->disk_sb.sb_name);
}
+ rcu_read_unlock();
return 0;
}
@@ -2462,7 +2436,7 @@ static int bch2_fs_get_tree(struct fs_context *fc)
struct inode *vinode;
struct bch2_opts_parse *opts_parse = fc->fs_private;
struct bch_opts opts = opts_parse->opts;
- darray_str devs;
+ darray_const_str devs;
darray_fs devs_to_fs = {};
int ret;
@@ -2486,7 +2460,7 @@ static int bch2_fs_get_tree(struct fs_context *fc)
if (!IS_ERR(sb))
goto got_sb;
- c = bch2_fs_open(devs.data, devs.nr, opts);
+ c = bch2_fs_open(&devs, &opts);
ret = PTR_ERR_OR_ZERO(c);
if (ret)
goto err;
@@ -2536,7 +2510,12 @@ got_sb:
sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
super_set_uuid(sb, c->sb.user_uuid.b, sizeof(c->sb.user_uuid));
- super_set_sysfs_name_uuid(sb);
+
+ if (c->sb.multi_device)
+ super_set_sysfs_name_uuid(sb);
+ else
+ strscpy(sb->s_sysfs_name, c->name, sizeof(sb->s_sysfs_name));
+
sb->s_shrink->seeks = 0;
c->vfs_sb = sb;
strscpy(sb->s_id, c->name, sizeof(sb->s_id));
@@ -2547,15 +2526,16 @@ got_sb:
sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
- for_each_online_member(c, ca) {
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca) {
struct block_device *bdev = ca->disk_sb.bdev;
/* XXX: create an anonymous device for multi device filesystems */
sb->s_bdev = bdev;
sb->s_dev = bdev->bd_dev;
- percpu_ref_put(&ca->io_ref[READ]);
break;
}
+ rcu_read_unlock();
c->dev = sb->s_dev;
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 7b25cedd3e40..49f46df8340e 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -109,27 +109,6 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol,
return ret;
}
-static int lookup_inode(struct btree_trans *trans, u64 inode_nr, u32 snapshot,
- struct bch_inode_unpacked *inode)
-{
- struct btree_iter iter;
- struct bkey_s_c k;
- int ret;
-
- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
- SPOS(0, inode_nr, snapshot), 0);
- ret = bkey_err(k);
- if (ret)
- goto err;
-
- ret = bkey_is_inode(k.k)
- ? bch2_inode_unpack(k, inode)
- : -BCH_ERR_ENOENT_inode;
-err:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
-}
-
static int lookup_dirent_in_snapshot(struct btree_trans *trans,
struct bch_hash_info hash_info,
subvol_inum dir, struct qstr *name,
@@ -231,7 +210,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
struct bch_inode_unpacked root_inode;
struct bch_hash_info root_hash_info;
- ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode);
+ ret = bch2_inode_find_by_inum_snapshot(trans, root_inum.inum, snapshot, &root_inode, 0);
bch_err_msg(c, ret, "looking up root inode %llu for subvol %u",
root_inum.inum, subvolid);
if (ret)
@@ -257,7 +236,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
* The bch2_check_dirents pass has already run, dangling dirents
* shouldn't exist here:
*/
- ret = lookup_inode(trans, inum, snapshot, lostfound);
+ ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, lostfound, 0);
bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)",
inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot));
return ret;
@@ -285,7 +264,7 @@ create_lostfound:
u64 cpu = raw_smp_processor_id();
bch2_inode_init_early(c, lostfound);
- bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode);
+ bch2_inode_init_late(c, lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode);
lostfound->bi_dir = root_inode.bi_inum;
lostfound->bi_snapshot = le32_to_cpu(st.root_snapshot);
@@ -306,6 +285,7 @@ create_lostfound:
&lostfound_str,
lostfound->bi_inum,
&lostfound->bi_dir_offset,
+ BTREE_UPDATE_internal_snapshot_node|
STR_HASH_must_create) ?:
bch2_inode_write_flags(trans, &lostfound_iter, lostfound,
BTREE_UPDATE_internal_snapshot_node);
@@ -431,6 +411,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
&name,
inode->bi_subvol ?: inode->bi_inum,
&inode->bi_dir_offset,
+ BTREE_UPDATE_internal_snapshot_node|
STR_HASH_must_create);
if (ret) {
bch_err_msg(c, ret, "error creating dirent");
@@ -564,7 +545,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub
u64 cpu = raw_smp_processor_id();
bch2_inode_init_early(c, &new_inode);
- bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL);
+ bch2_inode_init_late(c, &new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL);
new_inode.bi_subvol = subvolid;
@@ -654,7 +635,7 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32
struct bch_inode_unpacked new_inode;
bch2_inode_init_early(c, &new_inode);
- bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL);
+ bch2_inode_init_late(c, &new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL);
new_inode.bi_size = i_size;
new_inode.bi_inum = inum;
new_inode.bi_snapshot = snapshot;
@@ -785,12 +766,12 @@ static int ref_visible2(struct bch_fs *c,
#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
- (_i)->snapshot <= (_snapshot); _i++) \
- if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
+ (_i)->inode.bi_snapshot <= (_snapshot); _i++) \
+ if (key_visible_in_snapshot(_c, _s, _i->inode.bi_snapshot, _snapshot))
struct inode_walker_entry {
struct bch_inode_unpacked inode;
- u32 snapshot;
+ bool whiteout;
u64 count;
u64 i_size;
};
@@ -819,13 +800,20 @@ static struct inode_walker inode_walker_init(void)
static int add_inode(struct bch_fs *c, struct inode_walker *w,
struct bkey_s_c inode)
{
- struct bch_inode_unpacked u;
-
- return bch2_inode_unpack(inode, &u) ?:
- darray_push(&w->inodes, ((struct inode_walker_entry) {
- .inode = u,
- .snapshot = inode.k->p.snapshot,
+ int ret = darray_push(&w->inodes, ((struct inode_walker_entry) {
+ .whiteout = !bkey_is_inode(inode.k),
}));
+ if (ret)
+ return ret;
+
+ struct inode_walker_entry *n = &darray_last(w->inodes);
+ if (!n->whiteout) {
+ return bch2_inode_unpack(inode, &n->inode);
+ } else {
+ n->inode.bi_inum = inode.k->p.inode;
+ n->inode.bi_snapshot = inode.k->p.snapshot;
+ return 0;
+ }
}
static int get_inodes_all_snapshots(struct btree_trans *trans,
@@ -845,13 +833,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
w->recalculate_sums = false;
w->inodes.nr = 0;
- for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
- BTREE_ITER_all_snapshots, k, ret) {
- if (k.k->p.offset != inum)
+ for_each_btree_key_max_norestart(trans, iter,
+ BTREE_ID_inodes, POS(0, inum), SPOS(0, inum, U32_MAX),
+ BTREE_ITER_all_snapshots, k, ret) {
+ ret = add_inode(c, w, k);
+ if (ret)
break;
-
- if (bkey_is_inode(k.k))
- add_inode(c, w, k);
}
bch2_trans_iter_exit(trans, &iter);
@@ -863,48 +850,112 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
return 0;
}
+static int get_visible_inodes(struct btree_trans *trans,
+ struct inode_walker *w,
+ struct snapshots_seen *s,
+ u64 inum)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ w->inodes.nr = 0;
+ w->deletes.nr = 0;
+
+ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, s->pos.snapshot),
+ BTREE_ITER_all_snapshots, k, ret) {
+ if (k.k->p.offset != inum)
+ break;
+
+ if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot))
+ continue;
+
+ if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot))
+ continue;
+
+ ret = bkey_is_inode(k.k)
+ ? add_inode(c, w, k)
+ : snapshot_list_add(c, &w->deletes, k.k->p.snapshot);
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
static struct inode_walker_entry *
-lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k)
+lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, struct bkey_s_c k)
{
- bool is_whiteout = k.k->type == KEY_TYPE_whiteout;
+ struct bch_fs *c = trans->c;
struct inode_walker_entry *i;
__darray_for_each(w->inodes, i)
- if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot))
+ if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->inode.bi_snapshot))
goto found;
return NULL;
found:
- BUG_ON(k.k->p.snapshot > i->snapshot);
+ BUG_ON(k.k->p.snapshot > i->inode.bi_snapshot);
- if (k.k->p.snapshot != i->snapshot && !is_whiteout) {
- struct inode_walker_entry new = *i;
-
- new.snapshot = k.k->p.snapshot;
- new.count = 0;
- new.i_size = 0;
-
- struct printbuf buf = PRINTBUF;
- bch2_bkey_val_to_text(&buf, c, k);
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
- bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
+ if (fsck_err_on(k.k->p.snapshot != i->inode.bi_snapshot,
+ trans, snapshot_key_missing_inode_snapshot,
+ "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
"unexpected because we should always update the inode when we update a key in that inode\n"
"%s",
- w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf);
- printbuf_exit(&buf);
+ w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot,
+ (bch2_bkey_val_to_text(&buf, c, k),
+ buf.buf))) {
+ struct bch_inode_unpacked new = i->inode;
+ struct bkey_i whiteout;
+
+ new.bi_snapshot = k.k->p.snapshot;
+
+ if (!i->whiteout) {
+ ret = __bch2_fsck_write_inode(trans, &new);
+ } else {
+ bkey_init(&whiteout.k);
+ whiteout.k.type = KEY_TYPE_whiteout;
+ whiteout.k.p = SPOS(0, i->inode.bi_inum, i->inode.bi_snapshot);
+ ret = bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
+ &whiteout,
+ BTREE_UPDATE_internal_snapshot_node);
+ }
+
+ if (ret)
+ goto fsck_err;
+
+ ret = bch2_trans_commit(trans, NULL, NULL, 0);
+ if (ret)
+ goto fsck_err;
- while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot)
+ struct inode_walker_entry new_entry = *i;
+
+ new_entry.inode.bi_snapshot = k.k->p.snapshot;
+ new_entry.count = 0;
+ new_entry.i_size = 0;
+
+ while (i > w->inodes.data && i[-1].inode.bi_snapshot > k.k->p.snapshot)
--i;
size_t pos = i - w->inodes.data;
- int ret = darray_insert_item(&w->inodes, pos, new);
+ ret = darray_insert_item(&w->inodes, pos, new_entry);
if (ret)
- return ERR_PTR(ret);
+ goto fsck_err;
- i = w->inodes.data + pos;
+ ret = -BCH_ERR_transaction_restart_nested;
+ goto fsck_err;
}
+ printbuf_exit(&buf);
return i;
+fsck_err:
+ printbuf_exit(&buf);
+ return ERR_PTR(ret);
}
static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
@@ -919,42 +970,7 @@ static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
w->last_pos = k.k->p;
- return lookup_inode_for_snapshot(trans->c, w, k);
-}
-
-static int get_visible_inodes(struct btree_trans *trans,
- struct inode_walker *w,
- struct snapshots_seen *s,
- u64 inum)
-{
- struct bch_fs *c = trans->c;
- struct btree_iter iter;
- struct bkey_s_c k;
- int ret;
-
- w->inodes.nr = 0;
- w->deletes.nr = 0;
-
- for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, s->pos.snapshot),
- BTREE_ITER_all_snapshots, k, ret) {
- if (k.k->p.offset != inum)
- break;
-
- if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot))
- continue;
-
- if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot))
- continue;
-
- ret = bkey_is_inode(k.k)
- ? add_inode(c, w, k)
- : snapshot_list_add(c, &w->deletes, k.k->p.snapshot);
- if (ret)
- break;
- }
- bch2_trans_iter_exit(trans, &iter);
-
- return ret;
+ return lookup_inode_for_snapshot(trans, w, k);
}
/*
@@ -1078,32 +1094,6 @@ fsck_err:
return ret;
}
-static int get_snapshot_root_inode(struct btree_trans *trans,
- struct bch_inode_unpacked *root,
- u64 inum)
-{
- struct btree_iter iter;
- struct bkey_s_c k;
- int ret = 0;
-
- for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes,
- SPOS(0, inum, U32_MAX),
- BTREE_ITER_all_snapshots, k, ret) {
- if (k.k->p.offset != inum)
- break;
- if (bkey_is_inode(k.k))
- goto found_root;
- }
- if (ret)
- goto err;
- BUG();
-found_root:
- ret = bch2_inode_unpack(k, root);
-err:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
-}
-
static int check_inode(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
@@ -1134,20 +1124,23 @@ static int check_inode(struct btree_trans *trans,
goto err;
if (snapshot_root->bi_inum != u.bi_inum) {
- ret = get_snapshot_root_inode(trans, snapshot_root, u.bi_inum);
+ ret = bch2_inode_find_snapshot_root(trans, u.bi_inum, snapshot_root);
if (ret)
goto err;
}
- if (fsck_err_on(u.bi_hash_seed != snapshot_root->bi_hash_seed ||
- INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root),
- trans, inode_snapshot_mismatch,
- "inode hash info in different snapshots don't match")) {
- u.bi_hash_seed = snapshot_root->bi_hash_seed;
- SET_INODE_STR_HASH(&u, INODE_STR_HASH(snapshot_root));
- do_update = true;
+ if (u.bi_hash_seed != snapshot_root->bi_hash_seed ||
+ INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root)) {
+ ret = bch2_repair_inode_hash_info(trans, snapshot_root);
+ BUG_ON(ret == -BCH_ERR_fsck_repair_unimplemented);
+ if (ret)
+ goto err;
}
+ ret = bch2_check_inode_has_case_insensitive(trans, &u, &s->ids, &do_update);
+ if (ret)
+ goto err;
+
if (u.bi_dir || u.bi_dir_offset) {
ret = check_inode_dirent_inode(trans, &u, &do_update);
if (ret)
@@ -1450,7 +1443,9 @@ static int check_key_has_inode(struct btree_trans *trans,
if (k.k->type == KEY_TYPE_whiteout)
goto out;
- if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
+ bool have_inode = i && !i->whiteout;
+
+ if (!have_inode && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret)
@@ -1461,14 +1456,14 @@ static int check_key_has_inode(struct btree_trans *trans,
goto err;
}
- if (fsck_err_on(!i,
+ if (fsck_err_on(!have_inode,
trans, key_in_missing_inode,
"key in missing inode:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
goto delete;
- if (fsck_err_on(i && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode),
+ if (fsck_err_on(have_inode && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode),
trans, key_in_wrong_inode_type,
"key for wrong inode mode %o:\n%s",
i->inode.bi_mode,
@@ -1496,21 +1491,21 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal
if (i->inode.bi_sectors == i->count)
continue;
- count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot);
+ count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->inode.bi_snapshot);
if (w->recalculate_sums)
i->count = count2;
if (i->count != count2) {
bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
- w->last_pos.inode, i->snapshot, i->count, count2);
+ w->last_pos.inode, i->inode.bi_snapshot, i->count, count2);
i->count = count2;
}
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty),
trans, inode_i_sectors_wrong,
"inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
- w->last_pos.inode, i->snapshot,
+ w->last_pos.inode, i->inode.bi_snapshot,
i->inode.bi_sectors, i->count)) {
i->inode.bi_sectors = i->count;
ret = bch2_fsck_write_inode(trans, &i->inode);
@@ -1821,20 +1816,20 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
inode->inodes.data && i >= inode->inodes.data;
--i) {
- if (i->snapshot > k.k->p.snapshot ||
- !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
+ if (i->inode.bi_snapshot > k.k->p.snapshot ||
+ !key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot))
continue;
if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
!bkey_extent_is_reservation(k),
trans, extent_past_end_of_inode,
"extent type past end of inode %llu:%u, i_size %llu\n%s",
- i->inode.bi_inum, i->snapshot, i->inode.bi_size,
+ i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
struct btree_iter iter2;
bch2_trans_copy_iter(trans, &iter2, iter);
- bch2_btree_iter_set_snapshot(trans, &iter2, i->snapshot);
+ bch2_btree_iter_set_snapshot(trans, &iter2, i->inode.bi_snapshot);
ret = bch2_btree_iter_traverse(trans, &iter2) ?:
bch2_btree_delete_at(trans, &iter2,
BTREE_UPDATE_internal_snapshot_node);
@@ -1856,8 +1851,9 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
inode->inodes.data && i >= inode->inodes.data;
--i) {
- if (i->snapshot > k.k->p.snapshot ||
- !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
+ if (i->whiteout ||
+ i->inode.bi_snapshot > k.k->p.snapshot ||
+ !key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot))
continue;
i->count += k.k->size;
@@ -1939,13 +1935,13 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
if (i->inode.bi_nlink == i->count)
continue;
- count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot);
+ count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->inode.bi_snapshot);
if (count2 < 0)
return count2;
if (i->count != count2) {
bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu",
- w->last_pos.inode, i->snapshot, i->count, count2);
+ w->last_pos.inode, i->inode.bi_snapshot, i->count, count2);
i->count = count2;
if (i->inode.bi_nlink == i->count)
continue;
@@ -1954,7 +1950,7 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
if (fsck_err_on(i->inode.bi_nlink != i->count,
trans, inode_dir_wrong_nlink,
"directory %llu:%u with wrong i_nlink: got %u, should be %llu",
- w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
+ w->last_pos.inode, i->inode.bi_snapshot, i->inode.bi_nlink, i->count)) {
i->inode.bi_nlink = i->count;
ret = bch2_fsck_write_inode(trans, &i->inode);
if (ret)
@@ -2066,7 +2062,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
0, subvolume);
ret = bkey_err(s.s_c);
if (ret && !bch2_err_matches(ret, ENOENT))
- return ret;
+ goto err;
if (ret) {
if (fsck_err(trans, dirent_to_missing_subvol,
@@ -2077,24 +2073,35 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
goto out;
}
- if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol,
- trans, subvol_fs_path_parent_wrong,
- "subvol with wrong fs_path_parent, should be be %u\n%s",
- parent_subvol,
- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- struct bkey_i_subvolume *n =
- bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume);
- ret = PTR_ERR_OR_ZERO(n);
+ if (le32_to_cpu(s.v->fs_path_parent) != parent_subvol) {
+ printbuf_reset(&buf);
+
+ prt_printf(&buf, "subvol with wrong fs_path_parent, should be be %u\n",
+ parent_subvol);
+
+ ret = bch2_inum_to_path(trans, (subvol_inum) { s.k->p.offset,
+ le64_to_cpu(s.v->inode) }, &buf);
if (ret)
goto err;
+ prt_newline(&buf);
+ bch2_bkey_val_to_text(&buf, c, s.s_c);
+
+ if (fsck_err(trans, subvol_fs_path_parent_wrong, "%s", buf.buf)) {
+ struct bkey_i_subvolume *n =
+ bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ goto err;
- n->v.fs_path_parent = cpu_to_le32(parent_subvol);
+ n->v.fs_path_parent = cpu_to_le32(parent_subvol);
+ }
}
u64 target_inum = le64_to_cpu(s.v->inode);
u32 target_snapshot = le32_to_cpu(s.v->snapshot);
- ret = lookup_inode(trans, target_inum, target_snapshot, &subvol_root);
+ ret = bch2_inode_find_by_inum_snapshot(trans, target_inum, target_snapshot,
+ &subvol_root, 0);
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
@@ -2167,7 +2174,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (ret)
goto err;
- if (!i)
+ if (!i || i->whiteout)
goto out;
if (dir->first_this_inode)
@@ -2188,6 +2195,41 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+ /* check casefold */
+ if (fsck_err_on(d.v->d_casefold != !!hash_info->cf_encoding,
+ trans, dirent_casefold_mismatch,
+ "dirent casefold does not match dir casefold\n%s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k),
+ buf.buf))) {
+ struct qstr name = bch2_dirent_get_name(d);
+ u32 subvol = d.v->d_type == DT_SUBVOL
+ ? le32_to_cpu(d.v->d_parent_subvol)
+ : 0;
+ u64 target = d.v->d_type == DT_SUBVOL
+ ? le32_to_cpu(d.v->d_child_subvol)
+ : le64_to_cpu(d.v->d_inum);
+ u64 dir_offset;
+
+ ret = bch2_hash_delete_at(trans,
+ bch2_dirent_hash_desc, hash_info, iter,
+ BTREE_UPDATE_internal_snapshot_node) ?:
+ bch2_dirent_create_snapshot(trans, subvol,
+ d.k->p.inode, d.k->p.snapshot,
+ hash_info,
+ d.v->d_type,
+ &name,
+ target,
+ &dir_offset,
+ BTREE_ITER_with_updates|
+ BTREE_UPDATE_internal_snapshot_node|
+ STR_HASH_must_create) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+
+ /* might need another check_dirents pass */
+ goto out;
+ }
+
if (d.v->d_type == DT_SUBVOL) {
ret = check_dirent_to_subvol(trans, iter, d);
if (ret)
@@ -2307,7 +2349,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
if (ret)
return ret;
- if (!i)
+ if (!i || i->whiteout)
return 0;
if (inode->first_this_inode)
@@ -2376,7 +2418,8 @@ static int check_root_trans(struct btree_trans *trans)
goto err;
}
- ret = lookup_inode(trans, BCACHEFS_ROOT_INO, snapshot, &root_inode);
+ ret = bch2_inode_find_by_inum_snapshot(trans, BCACHEFS_ROOT_INO, snapshot,
+ &root_inode, 0);
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
@@ -2408,8 +2451,6 @@ int bch2_check_root(struct bch_fs *c)
return ret;
}
-typedef DARRAY(u32) darray_u32;
-
static bool darray_u32_has(darray_u32 *d, u32 v)
{
darray_for_each(*d, i)
@@ -2446,7 +2487,14 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
u32 parent = le32_to_cpu(s.v->fs_path_parent);
if (darray_u32_has(&subvol_path, parent)) {
- if (fsck_err(c, subvol_loop, "subvolume loop"))
+ printbuf_reset(&buf);
+ prt_printf(&buf, "subvolume loop:\n");
+
+ darray_for_each_reverse(subvol_path, i)
+ prt_printf(&buf, "%u ", *i);
+ prt_printf(&buf, "%u", parent);
+
+ if (fsck_err(trans, subvol_loop, "%s", buf.buf))
ret = reattach_subvol(trans, s);
break;
}
@@ -2462,7 +2510,8 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
if (fsck_err_on(k.k->type != KEY_TYPE_subvolume,
trans, subvol_unreachable,
"unreachable subvolume %s",
- (bch2_bkey_val_to_text(&buf, c, s.s_c),
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, s.s_c),
buf.buf))) {
ret = reattach_subvol(trans, s);
break;
@@ -2618,14 +2667,13 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
redo_bi_depth = true;
if (path_is_dup(&path, inode.bi_inum, snapshot)) {
- /* XXX print path */
- bch_err(c, "directory structure loop");
-
- darray_for_each(path, i)
- pr_err("%llu:%u", i->inum, i->snapshot);
- pr_err("%llu:%u", inode.bi_inum, snapshot);
+ printbuf_reset(&buf);
+ prt_printf(&buf, "directory structure loop:\n");
+ darray_for_each_reverse(path, i)
+ prt_printf(&buf, "%llu:%u ", i->inum, i->snapshot);
+ prt_printf(&buf, "%llu:%u", inode.bi_inum, snapshot);
- if (fsck_err(trans, dir_loop, "directory structure loop")) {
+ if (fsck_err(trans, dir_loop, "%s", buf.buf)) {
ret = remove_backpointer(trans, &inode);
bch_err_msg(c, ret, "removing dirent");
if (ret)
@@ -3024,7 +3072,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
{
struct bch_ioctl_fsck_offline arg;
struct fsck_thread *thr = NULL;
- darray_str(devs) = {};
+ darray_const_str devs = {};
long ret = 0;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
@@ -3082,7 +3130,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops);
- thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
+ thr->c = bch2_fs_open(&devs, &thr->opts);
if (!IS_ERR(thr->c) &&
thr->c->opts.errors == BCH_ON_ERROR_panic)
@@ -3119,19 +3167,18 @@ static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio)
c->opts.fix_errors = FSCK_FIX_ask;
c->opts.fsck = true;
- set_bit(BCH_FS_fsck_running, &c->flags);
+ set_bit(BCH_FS_in_fsck, &c->flags);
- c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
- int ret = bch2_run_online_recovery_passes(c);
+ int ret = bch2_run_online_recovery_passes(c, ~0ULL);
- clear_bit(BCH_FS_fsck_running, &c->flags);
+ clear_bit(BCH_FS_in_fsck, &c->flags);
bch_err_fn(c, ret);
c->stdio = NULL;
c->stdio_filter = NULL;
c->opts.fix_errors = old_fix_errors;
- up(&c->online_fsck_mutex);
+ up(&c->recovery.run_lock);
bch2_ro_ref_put(c);
return ret;
}
@@ -3155,7 +3202,7 @@ long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg)
if (!bch2_ro_ref_tryget(c))
return -EROFS;
- if (down_trylock(&c->online_fsck_mutex)) {
+ if (down_trylock(&c->recovery.run_lock)) {
bch2_ro_ref_put(c);
return -EAGAIN;
}
@@ -3187,7 +3234,7 @@ err:
bch_err_fn(c, ret);
if (thr)
bch2_fsck_thread_exit(&thr->thr);
- up(&c->online_fsck_mutex);
+ up(&c->recovery.run_lock);
bch2_ro_ref_put(c);
}
return ret;
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index b51d98cf8a80..5cf70108ae2f 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -14,6 +14,7 @@
#include "extent_update.h"
#include "fs.h"
#include "inode.h"
+#include "namei.h"
#include "opts.h"
#include "str_hash.h"
#include "snapshot.h"
@@ -240,6 +241,7 @@ static int bch2_inode_unpack_v3(struct bkey_s_c k,
u64 v[2];
unpacked->bi_inum = inode.k->p.offset;
+ unpacked->bi_snapshot = inode.k->p.snapshot;
unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
@@ -284,13 +286,12 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
{
memset(unpacked, 0, sizeof(*unpacked));
- unpacked->bi_snapshot = k.k->p.snapshot;
-
switch (k.k->type) {
case KEY_TYPE_inode: {
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
unpacked->bi_inum = inode.k->p.offset;
+ unpacked->bi_snapshot = inode.k->p.snapshot;
unpacked->bi_journal_seq= 0;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
@@ -309,6 +310,7 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
unpacked->bi_inum = inode.k->p.offset;
+ unpacked->bi_snapshot = inode.k->p.snapshot;
unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
@@ -326,8 +328,6 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
int bch2_inode_unpack(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked)
{
- unpacked->bi_snapshot = k.k->p.snapshot;
-
return likely(k.k->type == KEY_TYPE_inode_v3)
? bch2_inode_unpack_v3(k, unpacked)
: bch2_inode_unpack_slowpath(k, unpacked);
@@ -367,6 +367,82 @@ err:
return ret;
}
+int bch2_inode_find_by_inum_snapshot(struct btree_trans *trans,
+ u64 inode_nr, u32 snapshot,
+ struct bch_inode_unpacked *inode,
+ unsigned flags)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, inode_nr, snapshot), flags);
+ int ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ ret = bkey_is_inode(k.k)
+ ? bch2_inode_unpack(k, inode)
+ : -BCH_ERR_ENOENT_inode;
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans,
+ subvol_inum inum,
+ struct bch_inode_unpacked *inode)
+{
+ struct btree_iter iter;
+ int ret;
+
+ ret = bch2_inode_peek_nowarn(trans, &iter, inode, inum, 0);
+ if (!ret)
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_inode_find_by_inum_trans(struct btree_trans *trans,
+ subvol_inum inum,
+ struct bch_inode_unpacked *inode)
+{
+ struct btree_iter iter;
+ int ret;
+
+ ret = bch2_inode_peek(trans, &iter, inode, inum, 0);
+ if (!ret)
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
+ struct bch_inode_unpacked *inode)
+{
+ return bch2_trans_do(c, bch2_inode_find_by_inum_trans(trans, inum, inode));
+}
+
+int bch2_inode_find_snapshot_root(struct btree_trans *trans, u64 inum,
+ struct bch_inode_unpacked *root)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes,
+ SPOS(0, inum, U32_MAX),
+ BTREE_ITER_all_snapshots, k, ret) {
+ if (k.k->p.offset != inum)
+ break;
+ if (bkey_is_inode(k.k)) {
+ ret = bch2_inode_unpack(k, root);
+ goto out;
+ }
+ }
+ /* We're only called when we know we have an inode for @inum */
+ BUG_ON(!ret);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
int bch2_inode_write_flags(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode,
@@ -832,7 +908,8 @@ void bch2_inode_init_early(struct bch_fs *c,
get_random_bytes(&inode_u->bi_hash_seed, sizeof(inode_u->bi_hash_seed));
}
-void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now,
+void bch2_inode_init_late(struct bch_fs *c,
+ struct bch_inode_unpacked *inode_u, u64 now,
uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
struct bch_inode_unpacked *parent)
{
@@ -856,6 +933,12 @@ void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now,
BCH_INODE_OPTS()
#undef x
}
+
+ if (!S_ISDIR(mode))
+ inode_u->bi_casefold = 0;
+
+ if (bch2_inode_casefold(c, inode_u))
+ inode_u->bi_flags |= BCH_INODE_has_case_insensitive;
}
void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
@@ -863,7 +946,7 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
struct bch_inode_unpacked *parent)
{
bch2_inode_init_early(c, inode_u);
- bch2_inode_init_late(inode_u, bch2_current_time(c),
+ bch2_inode_init_late(c, inode_u, bch2_current_time(c),
uid, gid, mode, rdev, parent);
}
@@ -1099,38 +1182,6 @@ err2:
return ret;
}
-int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans,
- subvol_inum inum,
- struct bch_inode_unpacked *inode)
-{
- struct btree_iter iter;
- int ret;
-
- ret = bch2_inode_peek_nowarn(trans, &iter, inode, inum, 0);
- if (!ret)
- bch2_trans_iter_exit(trans, &iter);
- return ret;
-}
-
-int bch2_inode_find_by_inum_trans(struct btree_trans *trans,
- subvol_inum inum,
- struct bch_inode_unpacked *inode)
-{
- struct btree_iter iter;
- int ret;
-
- ret = bch2_inode_peek(trans, &iter, inode, inum, 0);
- if (!ret)
- bch2_trans_iter_exit(trans, &iter);
- return ret;
-}
-
-int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
- struct bch_inode_unpacked *inode)
-{
- return bch2_trans_do(c, bch2_inode_find_by_inum_trans(trans, inum, inode));
-}
-
int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
{
if (bi->bi_flags & BCH_INODE_unlinked)
@@ -1204,6 +1255,41 @@ int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_i
return 0;
}
+int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
+ struct bch_inode_unpacked *bi, unsigned v)
+{
+ struct bch_fs *c = trans->c;
+
+#ifdef CONFIG_UNICODE
+ int ret = 0;
+ /* Not supported on individual files. */
+ if (!S_ISDIR(bi->bi_mode))
+ return -EOPNOTSUPP;
+
+ /*
+ * Make sure the dir is empty, as otherwise we'd need to
+ * rehash everything and update the dirent keys.
+ */
+ ret = bch2_empty_dir_trans(trans, inum);
+ if (ret < 0)
+ return ret;
+
+ ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
+ if (ret)
+ return ret;
+
+ bch2_check_set_feature(c, BCH_FEATURE_casefolding);
+
+ bi->bi_casefold = v + 1;
+ bi->bi_fields_set |= BIT(Inode_opt_casefold);
+
+ return bch2_maybe_propagate_has_case_insensitive(trans, inum, bi);
+#else
+ bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE");
+ return -EOPNOTSUPP;
+#endif
+}
+
static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
{
struct bch_fs *c = trans->c;
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index c74af15b14f2..77ad2d549541 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -134,10 +134,21 @@ static inline int bch2_inode_peek(struct btree_trans *trans,
subvol_inum inum, unsigned flags)
{
return __bch2_inode_peek(trans, iter, inode, inum, flags, true);
- int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);
- return ret;
}
+int bch2_inode_find_by_inum_snapshot(struct btree_trans *, u64, u32,
+ struct bch_inode_unpacked *, unsigned);
+int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *,
+ subvol_inum,
+ struct bch_inode_unpacked *);
+int bch2_inode_find_by_inum_trans(struct btree_trans *, subvol_inum,
+ struct bch_inode_unpacked *);
+int bch2_inode_find_by_inum(struct bch_fs *, subvol_inum,
+ struct bch_inode_unpacked *);
+
+int bch2_inode_find_snapshot_root(struct btree_trans *trans, u64 inum,
+ struct bch_inode_unpacked *root);
+
int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *,
struct bch_inode_unpacked *, enum btree_iter_update_trigger_flags);
@@ -153,7 +164,7 @@ int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *);
void bch2_inode_init_early(struct bch_fs *,
struct bch_inode_unpacked *);
-void bch2_inode_init_late(struct bch_inode_unpacked *, u64,
+void bch2_inode_init_late(struct bch_fs *, struct bch_inode_unpacked *, u64,
uid_t, gid_t, umode_t, dev_t,
struct bch_inode_unpacked *);
void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
@@ -165,14 +176,6 @@ int bch2_inode_create(struct btree_trans *, struct btree_iter *,
int bch2_inode_rm(struct bch_fs *, subvol_inum);
-int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *,
- subvol_inum,
- struct bch_inode_unpacked *);
-int bch2_inode_find_by_inum_trans(struct btree_trans *, subvol_inum,
- struct bch_inode_unpacked *);
-int bch2_inode_find_by_inum(struct bch_fs *, subvol_inum,
- struct bch_inode_unpacked *);
-
#define inode_opt_get(_c, _inode, _name) \
((_inode)->bi_##_name ? (_inode)->bi_##_name - 1 : (_c)->opts._name)
@@ -245,7 +248,7 @@ static inline unsigned bkey_inode_mode(struct bkey_s_c k)
static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_unpacked *bi)
{
- /* inode apts are stored with a +1 bias: 0 means "unset, use fs opt" */
+ /* inode opts are stored with a +1 bias: 0 means "unset, use fs opt" */
return bi->bi_casefold
? bi->bi_casefold - 1
: c->opts.casefold;
@@ -292,7 +295,9 @@ static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *i
struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *);
void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *,
struct bch_inode_unpacked *);
-int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *);
+int bch2_inum_opts_get(struct btree_trans *, subvol_inum, struct bch_io_opts *);
+int bch2_inode_set_casefold(struct btree_trans *, subvol_inum,
+ struct bch_inode_unpacked *, unsigned);
#include "rebalance.h"
@@ -304,6 +309,14 @@ bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode
return io_opts_to_rebalance_opts(c, &io_opts);
}
+#define BCACHEFS_ROOT_SUBVOL_INUM \
+ ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
+
+static inline bool subvol_inum_eq(subvol_inum a, subvol_inum b)
+{
+ return a.subvol == b.subvol && a.inum == b.inum;
+}
+
int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32);
int bch2_delete_dead_inodes(struct bch_fs *);
diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h
index 87e193e8ed25..1f00938b1bdc 100644
--- a/fs/bcachefs/inode_format.h
+++ b/fs/bcachefs/inode_format.h
@@ -129,6 +129,10 @@ enum inode_opt_id {
Inode_opt_nr,
};
+/*
+ * BCH_INODE_has_case_insensitive is set if any descendent is case insensitive -
+ * for overlayfs
+ */
#define BCH_INODE_FLAGS() \
x(sync, 0) \
x(immutable, 1) \
@@ -139,7 +143,8 @@ enum inode_opt_id {
x(i_sectors_dirty, 6) \
x(unlinked, 7) \
x(backptr_untrusted, 8) \
- x(has_child_snapshot, 9)
+ x(has_child_snapshot, 9) \
+ x(has_case_insensitive, 10)
/* bits 20+ reserved for packed fields below: */
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index def4a26a3b45..cc708d46557e 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -9,6 +9,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
+#include "async_objs.h"
#include "btree_update.h"
#include "buckets.h"
#include "checksum.h"
@@ -17,6 +18,7 @@
#include "data_update.h"
#include "disk_groups.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "io_read.h"
#include "io_misc.h"
@@ -25,6 +27,7 @@
#include "subvolume.h"
#include "trace.h"
+#include <linux/moduleparam.h>
#include <linux/random.h>
#include <linux/sched/mm.h>
@@ -34,6 +37,12 @@ module_param_named(read_corrupt_ratio, bch2_read_corrupt_ratio, uint, 0644);
MODULE_PARM_DESC(read_corrupt_ratio, "");
#endif
+static bool bch2_poison_extents_on_checksum_error;
+module_param_named(poison_extents_on_checksum_error,
+ bch2_poison_extents_on_checksum_error, bool, 0644);
+MODULE_PARM_DESC(poison_extents_on_checksum_error,
+ "Extents with checksum errors are marked as poisoned - unsafe without read fua support");
+
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
static bool bch2_target_congested(struct bch_fs *c, u16 target)
@@ -80,18 +89,6 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
/* Cache promotion on read */
-struct promote_op {
- struct rcu_head rcu;
- u64 start_time;
-
- struct rhash_head hash;
- struct bpos pos;
-
- struct work_struct work;
- struct data_update write;
- struct bio_vec bi_inline_vecs[]; /* must be last */
-};
-
static const struct rhashtable_params bch_promote_params = {
.head_offset = offsetof(struct promote_op, hash),
.key_offset = offsetof(struct promote_op, pos),
@@ -169,9 +166,11 @@ static noinline void promote_free(struct bch_read_bio *rbio)
bch_promote_params);
BUG_ON(ret);
+ async_object_list_del(c, promote, op->list_idx);
+
bch2_data_update_exit(&op->write);
- bch2_write_ref_put(c, BCH_WRITE_REF_promote);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_promote);
kfree_rcu(op, rcu);
}
@@ -236,7 +235,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
return NULL;
}
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_promote))
return ERR_PTR(-BCH_ERR_nopromote_no_writes);
struct promote_op *op = kzalloc(sizeof(*op), GFP_KERNEL);
@@ -254,6 +253,10 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
goto err;
}
+ ret = async_object_list_add(c, promote, op, &op->list_idx);
+ if (ret < 0)
+ goto err_remove_hash;
+
ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
writepoint_hashed((unsigned long) current),
&orig->opts,
@@ -265,7 +268,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
* -BCH_ERR_ENOSPC_disk_reservation:
*/
if (ret)
- goto err_remove_hash;
+ goto err_remove_list;
rbio_init_fragment(&op->write.rbio.bio, orig);
op->write.rbio.bounce = true;
@@ -273,6 +276,8 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
op->write.op.end_io = promote_done;
return &op->write.rbio;
+err_remove_list:
+ async_object_list_del(c, promote, op->list_idx);
err_remove_hash:
BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
bch_promote_params));
@@ -281,7 +286,7 @@ err:
/* We may have added to the rhashtable and thus need rcu freeing: */
kfree_rcu(op, rcu);
err_put:
- bch2_write_ref_put(c, BCH_WRITE_REF_promote);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_promote);
return ERR_PTR(ret);
}
@@ -296,6 +301,13 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
bool *read_full,
struct bch_io_failures *failed)
{
+ /*
+ * We're in the retry path, but we don't know what to repair yet, and we
+ * don't want to do a promote here:
+ */
+ if (failed && !failed->nr)
+ return NULL;
+
struct bch_fs *c = trans->c;
/*
* if failed != NULL we're not actually doing a promote, we're
@@ -338,6 +350,18 @@ nopromote:
return NULL;
}
+void bch2_promote_op_to_text(struct printbuf *out, struct promote_op *op)
+{
+ if (!op->write.read_done) {
+ prt_printf(out, "parent read: %px\n", op->write.rbio.parent);
+ printbuf_indent_add(out, 2);
+ bch2_read_bio_to_text(out, op->write.rbio.parent);
+ printbuf_indent_sub(out, 2);
+ }
+
+ bch2_data_update_to_text(out, &op->write);
+}
+
/* Read */
static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
@@ -394,7 +418,7 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
if (rbio->have_ioref) {
struct bch_dev *ca = bch2_dev_have_ref(rbio->c, rbio->pick.ptr.dev);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read);
}
if (rbio->split) {
@@ -406,6 +430,8 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
else
promote_free(rbio);
} else {
+ async_object_list_del(rbio->c, rbio, rbio->list_idx);
+
if (rbio->bounce)
bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
@@ -430,6 +456,74 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
bio_endio(&rbio->bio);
}
+static void get_rbio_extent(struct btree_trans *trans,
+ struct bch_read_bio *rbio,
+ struct bkey_buf *sk)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = lockrestart_do(trans,
+ bkey_err(k = bch2_bkey_get_iter(trans, &iter,
+ rbio->data_btree, rbio->data_pos, 0)));
+ if (ret)
+ return;
+
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr(ptrs, ptr)
+ if (bch2_extent_ptr_eq(*ptr, rbio->pick.ptr)) {
+ bch2_bkey_buf_reassemble(sk, trans->c, k);
+ break;
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+}
+
+static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
+ enum btree_id btree, struct bkey_s_c read_k)
+{
+ if (!bch2_poison_extents_on_checksum_error)
+ return 0;
+
+ struct bch_fs *c = trans->c;
+
+ struct data_update *u = rbio_data_update(rbio);
+ if (u)
+ read_k = bkey_i_to_s_c(u->k.k);
+
+ u64 flags = bch2_bkey_extent_flags(read_k);
+ if (flags & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
+ return 0;
+
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, btree, bkey_start_pos(read_k.k),
+ BTREE_ITER_intent);
+ int ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ if (!bkey_and_val_eq(k, read_k))
+ goto out;
+
+ struct bkey_i *new = bch2_trans_kmalloc(trans,
+ bkey_bytes(k.k) + sizeof(struct bch_extent_flags));
+ ret = PTR_ERR_OR_ZERO(new) ?:
+ (bkey_reassemble(new, k), 0) ?:
+ bch2_bkey_extent_flags_set(c, new, flags|BIT_ULL(BCH_EXTENT_FLAG_poisoned)) ?:
+ bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0);
+
+ /*
+ * Propagate key change back to data update path, in particular so it
+ * knows the extent has been poisoned and it's safe to change the
+ * checksum
+ */
+ if (u && !ret)
+ bch2_bkey_buf_copy(&u->k, c, new);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
static noinline int bch2_read_retry_nodecode(struct btree_trans *trans,
struct bch_read_bio *rbio,
struct bvec_iter bvec_iter,
@@ -463,7 +557,8 @@ retry:
err:
bch2_trans_iter_exit(trans, &iter);
- if (bch2_err_matches(ret, BCH_ERR_data_read_retry))
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret, BCH_ERR_data_read_retry))
goto retry;
if (ret) {
@@ -487,15 +582,21 @@ static void bch2_rbio_retry(struct work_struct *work)
.inum = rbio->read_pos.inode,
};
struct bch_io_failures failed = { .nr = 0 };
- int orig_error = rbio->ret;
struct btree_trans *trans = bch2_trans_get(c);
+ struct bkey_buf sk;
+ bch2_bkey_buf_init(&sk);
+ bkey_init(&sk.k->k);
+
trace_io_read_retry(&rbio->bio);
this_cpu_add(c->counters[BCH_COUNTER_io_read_retry],
bvec_iter_sectors(rbio->bvec_iter));
- if (bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
+ get_rbio_extent(trans, rbio, &sk);
+
+ if (!bkey_deleted(&sk.k->k) &&
+ bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
bch2_mark_io_failure(&failed, &rbio->pick,
rbio->ret == -BCH_ERR_data_read_retry_csum_err);
@@ -516,15 +617,16 @@ static void bch2_rbio_retry(struct work_struct *work)
int ret = rbio->data_update
? bch2_read_retry_nodecode(trans, rbio, iter, &failed, flags)
- : __bch2_read(trans, rbio, iter, inum, &failed, flags);
+ : __bch2_read(trans, rbio, iter, inum, &failed, &sk, flags);
if (ret) {
rbio->ret = ret;
rbio->bio.bi_status = BLK_STS_IOERR;
- } else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace &&
- orig_error != -BCH_ERR_data_read_ptr_stale_race &&
- !failed.nr) {
+ }
+
+ if (failed.nr || ret) {
struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
lockrestart_do(trans,
bch2_inum_offset_err_msg_trans(trans, &buf,
@@ -532,13 +634,27 @@ static void bch2_rbio_retry(struct work_struct *work)
read_pos.offset << 9));
if (rbio->data_update)
prt_str(&buf, "(internal move) ");
- prt_str(&buf, "successful retry");
- bch_err_ratelimited(c, "%s", buf.buf);
+ prt_str(&buf, "data read error, ");
+ if (!ret)
+ prt_str(&buf, "successful retry");
+ else
+ prt_str(&buf, bch2_err_str(ret));
+ prt_newline(&buf);
+
+ if (!bkey_deleted(&sk.k->k)) {
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k));
+ prt_newline(&buf);
+ }
+
+ bch2_io_failures_to_text(&buf, c, &failed);
+
+ bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
bch2_rbio_done(rbio);
+ bch2_bkey_buf_exit(&sk, c);
bch2_trans_put(trans);
}
@@ -568,27 +684,6 @@ static void bch2_rbio_error(struct bch_read_bio *rbio,
}
}
-static void bch2_read_io_err(struct work_struct *work)
-{
- struct bch_read_bio *rbio =
- container_of(work, struct bch_read_bio, work);
- struct bio *bio = &rbio->bio;
- struct bch_fs *c = rbio->c;
- struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
- struct printbuf buf = PRINTBUF;
-
- bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
- prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status));
-
- if (ca)
- bch_err_ratelimited(ca, "%s", buf.buf);
- else
- bch_err_ratelimited(c, "%s", buf.buf);
-
- printbuf_exit(&buf);
- bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status);
-}
-
static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
struct bch_read_bio *rbio)
{
@@ -652,31 +747,6 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
__bch2_rbio_narrow_crcs(trans, rbio));
}
-static void bch2_read_csum_err(struct work_struct *work)
-{
- struct bch_read_bio *rbio =
- container_of(work, struct bch_read_bio, work);
- struct bch_fs *c = rbio->c;
- struct bio *src = &rbio->bio;
- struct bch_extent_crc_unpacked crc = rbio->pick.crc;
- struct nonce nonce = extent_nonce(rbio->version, crc);
- struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
- struct printbuf buf = PRINTBUF;
-
- bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
- prt_str(&buf, "data ");
- bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
-
- struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
- if (ca)
- bch_err_ratelimited(ca, "%s", buf.buf);
- else
- bch_err_ratelimited(c, "%s", buf.buf);
-
- bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR);
- printbuf_exit(&buf);
-}
-
static void bch2_read_decompress_err(struct work_struct *work)
{
struct bch_read_bio *rbio =
@@ -837,7 +907,7 @@ out:
memalloc_nofs_restore(nofs_flags);
return;
csum_err:
- bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR);
goto out;
decompression_err:
bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
@@ -863,7 +933,7 @@ static void bch2_read_endio(struct bio *bio)
rbio->bio.bi_end_io = rbio->end_io;
if (unlikely(bio->bi_status)) {
- bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status);
return;
}
@@ -963,6 +1033,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
bvec_iter_sectors(iter));
goto out_read_done;
}
+
+ if ((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) &&
+ !orig->data_update)
+ return -BCH_ERR_extent_poisoned;
retry_pick:
ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev);
@@ -971,6 +1045,16 @@ retry_pick:
goto hole;
if (unlikely(ret < 0)) {
+ if (ret == -BCH_ERR_data_read_csum_err) {
+ int ret2 = maybe_poison_extent(trans, orig, data_btree, k);
+ if (ret2) {
+ ret = ret2;
+ goto err;
+ }
+
+ trace_and_count(c, io_read_fail_and_poison, &orig->bio);
+ }
+
struct printbuf buf = PRINTBUF;
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
prt_printf(&buf, "%s\n ", bch2_err_str(ret));
@@ -994,7 +1078,8 @@ retry_pick:
goto err;
}
- struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
+ BCH_DEV_READ_REF_io_read);
/*
* Stale dirty pointers are treated as IO errors, but @failed isn't
@@ -1008,7 +1093,7 @@ retry_pick:
unlikely(dev_ptr_stale(ca, &pick.ptr))) {
read_from_stale_dirty_pointer(trans, ca, k, pick.ptr);
bch2_mark_io_failure(failed, &pick, false);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read);
goto retry_pick;
}
@@ -1041,7 +1126,8 @@ retry_pick:
*/
if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) {
if (ca)
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_io_read);
rbio->ret = -BCH_ERR_data_read_buffer_too_small;
goto out_read_done;
}
@@ -1138,6 +1224,8 @@ retry_pick:
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio;
+ async_object_list_add(c, rbio, rbio, &rbio->list_idx);
+
if (rbio->bounce)
trace_and_count(c, io_read_bounce, &rbio->bio);
@@ -1171,14 +1259,6 @@ retry_pick:
if (likely(!rbio->pick.do_ec_reconstruct)) {
if (unlikely(!rbio->have_ioref)) {
- struct printbuf buf = PRINTBUF;
- bch2_read_err_msg_trans(trans, &buf, rbio, read_pos);
- prt_printf(&buf, "no device to read from:\n ");
- bch2_bkey_val_to_text(&buf, c, k);
-
- bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
-
bch2_rbio_error(rbio,
-BCH_ERR_data_read_retry_device_offline,
BLK_STS_IOERR);
@@ -1265,12 +1345,15 @@ out_read_done:
int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, subvol_inum inum,
- struct bch_io_failures *failed, unsigned flags)
+ struct bch_io_failures *failed,
+ struct bkey_buf *prev_read,
+ unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_buf sk;
struct bkey_s_c k;
+ enum btree_id data_btree;
int ret;
EBUG_ON(rbio->data_update);
@@ -1281,7 +1364,7 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
BTREE_ITER_slots);
while (1) {
- enum btree_id data_btree = BTREE_ID_extents;
+ data_btree = BTREE_ID_extents;
bch2_trans_begin(trans);
@@ -1313,6 +1396,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
k = bkey_i_to_s_c(sk.k);
+ if (unlikely(flags & BCH_READ_in_retry)) {
+ if (!bkey_and_val_eq(k, bkey_i_to_s_c(prev_read->k)))
+ failed->nr = 0;
+ bch2_bkey_buf_copy(prev_read, c, sk.k);
+ }
+
/*
* With indirect extents, the amount of data to read is the min
* of the original extent and the indirect extent:
@@ -1347,8 +1436,6 @@ err:
break;
}
- bch2_trans_iter_exit(trans, &iter);
-
if (unlikely(ret)) {
if (ret != -BCH_ERR_extent_poisoned) {
struct printbuf buf = PRINTBUF;
@@ -1367,20 +1454,64 @@ err:
bch2_rbio_done(rbio);
}
+ bch2_trans_iter_exit(trans, &iter);
bch2_bkey_buf_exit(&sk, c);
return ret;
}
+static const char * const bch2_read_bio_flags[] = {
+#define x(n) #n,
+ BCH_READ_FLAGS()
+#undef x
+ NULL
+};
+
+void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio)
+{
+ u64 now = local_clock();
+ prt_printf(out, "start_time:\t%llu\n", rbio->start_time ? now - rbio->start_time : 0);
+ prt_printf(out, "submit_time:\t%llu\n", rbio->submit_time ? now - rbio->submit_time : 0);
+
+ if (!rbio->split)
+ prt_printf(out, "end_io:\t%ps\n", rbio->end_io);
+ else
+ prt_printf(out, "parent:\t%px\n", rbio->parent);
+
+ prt_printf(out, "bi_end_io:\t%ps\n", rbio->bio.bi_end_io);
+
+ prt_printf(out, "promote:\t%u\n", rbio->promote);
+ prt_printf(out, "bounce:\t%u\n", rbio->bounce);
+ prt_printf(out, "split:\t%u\n", rbio->split);
+ prt_printf(out, "have_ioref:\t%u\n", rbio->have_ioref);
+ prt_printf(out, "narrow_crcs:\t%u\n", rbio->narrow_crcs);
+ prt_printf(out, "context:\t%u\n", rbio->context);
+ prt_printf(out, "ret:\t%s\n", bch2_err_str(rbio->ret));
+
+ prt_printf(out, "flags:\t");
+ bch2_prt_bitflags(out, bch2_read_bio_flags, rbio->flags);
+ prt_newline(out);
+
+ bch2_bio_to_text(out, &rbio->bio);
+}
+
void bch2_fs_io_read_exit(struct bch_fs *c)
{
if (c->promote_table.tbl)
rhashtable_destroy(&c->promote_table);
bioset_exit(&c->bio_read_split);
bioset_exit(&c->bio_read);
+ mempool_exit(&c->bio_bounce_pages);
}
int bch2_fs_io_read_init(struct bch_fs *c)
{
+ if (mempool_init_page_pool(&c->bio_bounce_pages,
+ max_t(unsigned,
+ c->opts.btree_node_size,
+ c->opts.encoded_extent_max) /
+ PAGE_SIZE, 0))
+ return -BCH_ERR_ENOMEM_bio_bounce_pages_init;
+
if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
BIOSET_NEED_BVECS))
return -BCH_ERR_ENOMEM_bio_read_init;
diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h
index c78025d863e0..c08b9c047b3e 100644
--- a/fs/bcachefs/io_read.h
+++ b/fs/bcachefs/io_read.h
@@ -4,6 +4,7 @@
#include "bkey_buf.h"
#include "btree_iter.h"
+#include "extents_types.h"
#include "reflink.h"
struct bch_read_bio {
@@ -48,6 +49,9 @@ struct bch_read_bio {
u16 _state;
};
s16 ret;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ unsigned list_idx;
+#endif
struct extent_ptr_decoded pick;
@@ -144,7 +148,8 @@ static inline void bch2_read_extent(struct btree_trans *trans,
}
int __bch2_read(struct btree_trans *, struct bch_read_bio *, struct bvec_iter,
- subvol_inum, struct bch_io_failures *, unsigned flags);
+ subvol_inum,
+ struct bch_io_failures *, struct bkey_buf *, unsigned flags);
static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
subvol_inum inum)
@@ -154,7 +159,7 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
rbio->subvol = inum.subvol;
bch2_trans_run(c,
- __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL,
+ __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL,
BCH_READ_retry_if_stale|
BCH_READ_may_promote|
BCH_READ_user_mapped));
@@ -172,6 +177,9 @@ static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio,
rbio->split = true;
rbio->parent = orig;
rbio->opts = orig->opts;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ rbio->list_idx = 0;
+#endif
return rbio;
}
@@ -189,9 +197,16 @@ static inline struct bch_read_bio *rbio_init(struct bio *bio,
rbio->ret = 0;
rbio->opts = opts;
rbio->bio.bi_end_io = end_io;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ rbio->list_idx = 0;
+#endif
return rbio;
}
+struct promote_op;
+void bch2_promote_op_to_text(struct printbuf *, struct promote_op *);
+void bch2_read_bio_to_text(struct printbuf *, struct bch_read_bio *);
+
void bch2_fs_io_read_exit(struct bch_fs *);
int bch2_fs_io_read_init(struct bch_fs *);
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index c1237da079ed..52a60982a66b 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -6,6 +6,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "async_objs.h"
#include "bkey_buf.h"
#include "bset.h"
#include "btree_update.h"
@@ -15,6 +16,7 @@
#include "compress.h"
#include "debug.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extent_update.h"
#include "inode.h"
@@ -263,11 +265,9 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0",
extent_iter->pos.inode, bi_sectors, i_sectors_delta);
- bool repeat = false, print = false, suppress = false;
- bch2_count_fsck_err(c, inode_i_sectors_underflow, buf.buf,
- &repeat, &print, &suppress);
+ bool print = bch2_count_fsck_err(c, inode_i_sectors_underflow, &buf);
if (print)
- bch2_print_str(c, buf.buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
if (i_sectors_delta < 0)
@@ -280,6 +280,12 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
inode_update_flags = 0;
}
+ /*
+ * extents, dirents and xattrs updates require that an inode update also
+ * happens - to ensure that if a key exists in one of those btrees with
+ * a given snapshot ID an inode is also present - so we may have to skip
+ * the nojournal optimization:
+ */
if (inode->k.p.snapshot != iter.snapshot) {
inode->k.p.snapshot = iter.snapshot;
inode_update_flags = 0;
@@ -397,8 +403,7 @@ static int bch2_write_index_default(struct bch_write_op *op)
bkey_start_pos(&sk.k->k),
BTREE_ITER_slots|BTREE_ITER_intent);
- ret = bch2_bkey_set_needs_rebalance(c, &op->opts, sk.k) ?:
- bch2_extent_update(trans, inum, &iter, sk.k,
+ ret = bch2_extent_update(trans, inum, &iter, sk.k,
&op->res,
op->new_i_size, &op->i_sectors_delta,
op->flags & BCH_WRITE_check_enospc);
@@ -462,9 +467,17 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
struct bch_write_bio *n;
+ unsigned ref_rw = type == BCH_DATA_btree ? READ : WRITE;
+ unsigned ref_idx = type == BCH_DATA_btree
+ ? BCH_DEV_READ_REF_btree_node_write
+ : BCH_DEV_WRITE_REF_io_write;
BUG_ON(c->opts.nochanges);
+ const struct bch_extent_ptr *last = NULL;
+ bkey_for_each_ptr(ptrs, ptr)
+ last = ptr;
+
bkey_for_each_ptr(ptrs, ptr) {
/*
* XXX: btree writes should be using io_ref[WRITE], but we
@@ -473,9 +486,9 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
*/
struct bch_dev *ca = nocow
? bch2_dev_have_ref(c, ptr->dev)
- : bch2_dev_get_ioref(c, ptr->dev, type == BCH_DATA_btree ? READ : WRITE);
+ : bch2_dev_get_ioref(c, ptr->dev, ref_rw, ref_idx);
- if (to_entry(ptr + 1) < ptrs.end) {
+ if (ptr != last) {
n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, GFP_NOFS, &c->replica_set));
n->bio.bi_end_io = wbio->bio.bi_end_io;
@@ -533,11 +546,12 @@ static void bch2_write_done(struct closure *cl)
bch2_disk_reservation_put(c, &op->res);
if (!(op->flags & BCH_WRITE_move))
- bch2_write_ref_put(c, BCH_WRITE_REF_write);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_write);
bch2_keylist_free(&op->insert_keys, op->inline_keys);
EBUG_ON(cl->parent);
closure_debug_destroy(cl);
+ async_object_list_del(c, write_op, op->list_idx);
if (op->end_io)
op->end_io(op);
}
@@ -748,7 +762,8 @@ static void bch2_write_endio(struct bio *bio)
}
if (wbio->have_ioref)
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE],
+ BCH_DEV_WRITE_REF_io_write);
if (wbio->bounce)
bch2_bio_free_pages_pool(c, bio);
@@ -784,6 +799,9 @@ static void init_append_extent(struct bch_write_op *op,
bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size,
op->flags & BCH_WRITE_cached);
+ if (!(op->flags & BCH_WRITE_move))
+ bch2_bkey_set_needs_rebalance(op->c, &op->opts, &e->k_i);
+
bch2_keylist_push(&op->insert_keys);
}
@@ -1345,7 +1363,8 @@ retry:
/* Get iorefs before dropping btree locks: */
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE,
+ BCH_DEV_WRITE_REF_io_write);
if (unlikely(!ca))
goto err_get_ioref;
@@ -1447,7 +1466,8 @@ err:
return;
err_get_ioref:
darray_for_each(buckets, i)
- percpu_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE]);
+ enumerated_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE],
+ BCH_DEV_WRITE_REF_io_write);
/* Fall back to COW path: */
goto out;
@@ -1661,6 +1681,8 @@ CLOSURE_CALLBACK(bch2_write)
BUG_ON(!op->write_point.v);
BUG_ON(bkey_eq(op->pos, POS_MAX));
+ async_object_list_add(c, write_op, op, &op->list_idx);
+
if (op->flags & BCH_WRITE_only_specified_devs)
op->flags |= BCH_WRITE_alloc_nowait;
@@ -1681,7 +1703,7 @@ CLOSURE_CALLBACK(bch2_write)
}
if (!(op->flags & BCH_WRITE_move) &&
- !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
+ !enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_write)) {
op->error = -BCH_ERR_erofs_no_writes;
goto err;
}
@@ -1705,6 +1727,7 @@ err:
bch2_disk_reservation_put(c, &op->res);
closure_debug_destroy(&op->cl);
+ async_object_list_del(c, write_op, op->list_idx);
if (op->end_io)
op->end_io(op);
}
@@ -1738,13 +1761,13 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
prt_printf(out, "nr_replicas_required:\t%u\n", op->nr_replicas_required);
prt_printf(out, "ref:\t%u\n", closure_nr_remaining(&op->cl));
+ prt_printf(out, "ret\t%s\n", bch2_err_str(op->error));
printbuf_indent_sub(out, 2);
}
void bch2_fs_io_write_exit(struct bch_fs *c)
{
- mempool_exit(&c->bio_bounce_pages);
bioset_exit(&c->replica_set);
bioset_exit(&c->bio_write);
}
@@ -1755,12 +1778,5 @@ int bch2_fs_io_write_init(struct bch_fs *c)
bioset_init(&c->replica_set, 4, offsetof(struct bch_write_bio, bio), 0))
return -BCH_ERR_ENOMEM_bio_write_init;
- if (mempool_init_page_pool(&c->bio_bounce_pages,
- max_t(unsigned,
- c->opts.btree_node_size,
- c->opts.encoded_extent_max) /
- PAGE_SIZE, 0))
- return -BCH_ERR_ENOMEM_bio_bounce_pages_init;
-
return 0;
}
diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h
index b8ab19a1e1da..2c0a8f35ee1f 100644
--- a/fs/bcachefs/io_write.h
+++ b/fs/bcachefs/io_write.h
@@ -17,34 +17,6 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
__printf(3, 4)
void bch2_write_op_error(struct bch_write_op *op, u64, const char *, ...);
-#define BCH_WRITE_FLAGS() \
- x(alloc_nowait) \
- x(cached) \
- x(data_encoded) \
- x(pages_stable) \
- x(pages_owned) \
- x(only_specified_devs) \
- x(wrote_data_inline) \
- x(check_enospc) \
- x(sync) \
- x(move) \
- x(in_worker) \
- x(submitted) \
- x(io_error) \
- x(convert_unwritten)
-
-enum __bch_write_flags {
-#define x(f) __BCH_WRITE_##f,
- BCH_WRITE_FLAGS()
-#undef x
-};
-
-enum bch_write_flags {
-#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f),
- BCH_WRITE_FLAGS()
-#undef x
-};
-
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
{
return op->watermark == BCH_WATERMARK_copygc
diff --git a/fs/bcachefs/io_write_types.h b/fs/bcachefs/io_write_types.h
index 3ef6df9145ef..5da4eb8bb6f6 100644
--- a/fs/bcachefs/io_write_types.h
+++ b/fs/bcachefs/io_write_types.h
@@ -13,6 +13,34 @@
#include <linux/llist.h>
#include <linux/workqueue.h>
+#define BCH_WRITE_FLAGS() \
+ x(alloc_nowait) \
+ x(cached) \
+ x(data_encoded) \
+ x(pages_stable) \
+ x(pages_owned) \
+ x(only_specified_devs) \
+ x(wrote_data_inline) \
+ x(check_enospc) \
+ x(sync) \
+ x(move) \
+ x(in_worker) \
+ x(submitted) \
+ x(io_error) \
+ x(convert_unwritten)
+
+enum __bch_write_flags {
+#define x(f) __BCH_WRITE_##f,
+ BCH_WRITE_FLAGS()
+#undef x
+};
+
+enum bch_write_flags {
+#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f),
+ BCH_WRITE_FLAGS()
+#undef x
+};
+
struct bch_write_bio {
struct_group(wbio,
struct bch_fs *c;
@@ -43,6 +71,10 @@ struct bch_write_op {
void (*end_io)(struct bch_write_op *);
u64 start_time;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ unsigned list_idx;
+#endif
+
unsigned written; /* sectors */
u16 flags;
s16 error; /* dio write path expects it to hold -ERESTARTSYS... */
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index bb45d3634194..09b70fd140a1 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -12,6 +12,7 @@
#include "btree_update.h"
#include "btree_write_buffer.h"
#include "buckets.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "journal.h"
#include "journal_io.h"
@@ -173,7 +174,7 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
spin_unlock(&j->lock);
prt_printf(&buf, bch2_fmt(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)"),
bch2_err_str(error));
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_reset(&buf);
bch2_journal_pins_to_text(&buf, j);
@@ -331,16 +332,6 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
__bch2_journal_buf_put(j, le64_to_cpu(buf->data->seq));
}
-void bch2_journal_halt(struct journal *j)
-{
- spin_lock(&j->lock);
- __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL, true);
- if (!j->err_seq)
- j->err_seq = journal_cur_seq(j);
- journal_wake(j);
- spin_unlock(&j->lock);
-}
-
void bch2_journal_halt_locked(struct journal *j)
{
lockdep_assert_held(&j->lock);
@@ -351,6 +342,13 @@ void bch2_journal_halt_locked(struct journal *j)
journal_wake(j);
}
+void bch2_journal_halt(struct journal *j)
+{
+ spin_lock(&j->lock);
+ bch2_journal_halt_locked(j);
+ spin_unlock(&j->lock);
+}
+
static bool journal_entry_want_write(struct journal *j)
{
bool ret = !journal_entry_is_open(j) ||
@@ -417,7 +415,7 @@ static int journal_entry_open(struct journal *j)
if (atomic64_read(&j->seq) - j->seq_write_started == JOURNAL_STATE_BUF_NR)
return -BCH_ERR_journal_max_open;
- if (journal_cur_seq(j) >= JOURNAL_SEQ_MAX) {
+ if (unlikely(journal_cur_seq(j) >= JOURNAL_SEQ_MAX)) {
bch_err(c, "cannot start: journal seq overflow");
if (bch2_fs_emergency_read_only_locked(c))
bch_err(c, "fatal error - emergency read only");
@@ -461,6 +459,14 @@ static int journal_entry_open(struct journal *j)
atomic64_inc(&j->seq);
journal_pin_list_init(fifo_push_ref(&j->pin), 1);
+ if (unlikely(bch2_journal_seq_is_blacklisted(c, journal_cur_seq(j), false))) {
+ bch_err(c, "attempting to open blacklisted journal seq %llu",
+ journal_cur_seq(j));
+ if (bch2_fs_emergency_read_only_locked(c))
+ bch_err(c, "fatal error - emergency read only");
+ return -BCH_ERR_journal_shutdown;
+ }
+
BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
@@ -702,8 +708,10 @@ static unsigned max_dev_latency(struct bch_fs *c)
{
u64 nsecs = 0;
- for_each_rw_member(c, ca)
+ rcu_read_lock();
+ for_each_rw_member_rcu(c, ca)
nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
+ rcu_read_unlock();
return nsecs_to_jiffies(nsecs);
}
@@ -746,7 +754,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
struct printbuf buf = PRINTBUF;
bch2_journal_debug_to_text(&buf, j);
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret));
printbuf_exit(&buf);
@@ -990,11 +998,11 @@ int bch2_journal_meta(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_journal))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_journal))
return -BCH_ERR_erofs_no_writes;
int ret = __bch2_journal_meta(j);
- bch2_write_ref_put(c, BCH_WRITE_REF_journal);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_journal);
return ret;
}
@@ -1298,6 +1306,16 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
{
+ struct bch_fs *c = ca->fs;
+
+ if (!(ca->mi.data_allowed & BIT(BCH_DATA_journal)))
+ return 0;
+
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
+ bch_err(c, "cannot allocate journal, filesystem is an unresized image file");
+ return -BCH_ERR_erofs_filesystem_full;
+ }
+
unsigned nr;
int ret;
@@ -1318,7 +1336,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
min(1 << 13,
(1 << 24) / ca->mi.bucket_size));
- ret = bch2_set_nr_journal_buckets_loop(ca->fs, ca, nr, new_fs);
+ ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, new_fs);
err:
bch_err_fn(ca, ret);
return ret;
@@ -1326,13 +1344,14 @@ err:
int bch2_fs_journal_alloc(struct bch_fs *c)
{
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_journal_alloc) {
if (ca->journal.nr)
continue;
int ret = bch2_dev_journal_alloc(ca, true);
if (ret) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_fs_journal_alloc);
return ret;
}
}
@@ -1404,6 +1423,13 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
bool had_entries = false;
u64 last_seq = cur_seq, nr, seq;
+ /*
+ *
+ * XXX pick most recent non blacklisted sequence number
+ */
+
+ cur_seq = max(cur_seq, bch2_journal_last_blacklisted_seq(c));
+
if (cur_seq >= JOURNAL_SEQ_MAX) {
bch_err(c, "cannot start: journal seq overflow");
return -EINVAL;
@@ -1429,13 +1455,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
*/
nr += nr / 4;
- if (nr + 1 > j->pin.size) {
- free_fifo(&j->pin);
- init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
- if (!j->pin.data) {
- bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
- return -BCH_ERR_ENOMEM_journal_pin_fifo;
- }
+ nr = max(nr, JOURNAL_PIN);
+ init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL);
+ if (!j->pin.data) {
+ bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
+ return -BCH_ERR_ENOMEM_journal_pin_fifo;
}
j->replay_journal_seq = last_seq;
@@ -1590,7 +1614,7 @@ void bch2_fs_journal_exit(struct journal *j)
free_fifo(&j->pin);
}
-int bch2_fs_journal_init(struct journal *j)
+void bch2_fs_journal_init_early(struct journal *j)
{
static struct lock_class_key res_key;
@@ -1609,10 +1633,10 @@ int bch2_fs_journal_init(struct journal *j)
atomic64_set(&j->reservations.counter,
((union journal_res_state)
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
+}
- if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)))
- return -BCH_ERR_ENOMEM_journal_pin_fifo;
-
+int bch2_fs_journal_init(struct journal *j)
+{
j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN;
j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL);
if (!j->free_buf)
@@ -1621,8 +1645,6 @@ int bch2_fs_journal_init(struct journal *j)
for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
j->buf[i].idx = i;
- j->pin.front = j->pin.back = 1;
-
j->wq = alloc_workqueue("bcachefs_journal",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512);
if (!j->wq)
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 641e20c05a14..8ff00a0ec778 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -426,8 +426,8 @@ int bch2_journal_flush(struct journal *);
bool bch2_journal_noflush_seq(struct journal *, u64, u64);
int bch2_journal_meta(struct journal *);
-void bch2_journal_halt(struct journal *);
void bch2_journal_halt_locked(struct journal *);
+void bch2_journal_halt(struct journal *);
static inline int bch2_journal_error(struct journal *j)
{
@@ -458,6 +458,7 @@ void bch2_journal_set_replay_done(struct journal *);
void bch2_dev_journal_exit(struct bch_dev *);
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
void bch2_fs_journal_exit(struct journal *);
+void bch2_fs_journal_init_early(struct journal *);
int bch2_fs_journal_init(struct journal *);
#endif /* _BCACHEFS_JOURNAL_H */
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index ded18a94ed02..63bb207208b2 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1219,7 +1219,7 @@ static CLOSURE_CALLBACK(bch2_journal_read_device)
out:
bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret);
kvfree(buf.data);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_journal_read);
closure_return(cl);
return;
err:
@@ -1254,7 +1254,8 @@ int bch2_journal_read(struct bch_fs *c,
if ((ca->mi.state == BCH_MEMBER_STATE_rw ||
ca->mi.state == BCH_MEMBER_STATE_ro) &&
- percpu_ref_tryget(&ca->io_ref[READ]))
+ enumerated_ref_tryget(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_journal_read))
closure_call(&ca->journal.read,
bch2_journal_read_device,
system_unbound_wq,
@@ -1405,7 +1406,7 @@ int bch2_journal_read(struct bch_fs *c,
}
genradix_for_each(&c->journal_entries, radix_iter, _i) {
- struct bch_replicas_padded replicas = {
+ union bch_replicas_padded replicas = {
.e.data_type = BCH_DATA_journal,
.e.nr_devs = 0,
.e.nr_required = 1,
@@ -1466,6 +1467,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ rcu_read_lock();
darray_for_each(*devs, i) {
struct bch_dev *ca = rcu_dereference(c->devs[*i]);
if (!ca)
@@ -1487,6 +1489,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
ja->bucket_seq[ja->cur_idx] = le64_to_cpu(seq);
}
}
+ rcu_read_unlock();
}
static void __journal_write_alloc(struct journal *j,
@@ -1499,7 +1502,8 @@ static void __journal_write_alloc(struct journal *j,
struct bch_fs *c = container_of(j, struct bch_fs, journal);
darray_for_each(*devs, i) {
- struct bch_dev *ca = rcu_dereference(c->devs[*i]);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, *i, WRITE,
+ BCH_DEV_WRITE_REF_journal_write);
if (!ca)
continue;
@@ -1513,8 +1517,10 @@ static void __journal_write_alloc(struct journal *j,
ca->mi.state != BCH_MEMBER_STATE_rw ||
!ja->nr ||
bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) ||
- sectors > ja->sectors_free)
+ sectors > ja->sectors_free) {
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
continue;
+ }
bch2_dev_stripe_increment(ca, &j->wp.stripe);
@@ -1537,15 +1543,8 @@ static void __journal_write_alloc(struct journal *j,
}
}
-/**
- * journal_write_alloc - decide where to write next journal entry
- *
- * @j: journal object
- * @w: journal buf (entry to be written)
- *
- * Returns: 0 on success, or -BCH_ERR_insufficient_devices on failure
- */
-static int journal_write_alloc(struct journal *j, struct journal_buf *w)
+static int journal_write_alloc(struct journal *j, struct journal_buf *w,
+ unsigned *replicas)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_devs_mask devs;
@@ -1553,29 +1552,18 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w)
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
unsigned target = c->opts.metadata_target ?:
c->opts.foreground_target;
- unsigned replicas = 0, replicas_want =
- READ_ONCE(c->opts.metadata_replicas);
+ unsigned replicas_want = READ_ONCE(c->opts.metadata_replicas);
unsigned replicas_need = min_t(unsigned, replicas_want,
READ_ONCE(c->opts.metadata_replicas_required));
bool advance_done = false;
- rcu_read_lock();
-
- /* We might run more than once if we have to stop and do discards: */
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&w->key));
- bkey_for_each_ptr(ptrs, p) {
- struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->dev);
- if (ca)
- replicas += ca->mi.durability;
- }
-
retry_target:
devs = target_rw_devs(c, BCH_DATA_journal, target);
devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs);
retry_alloc:
- __journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want);
+ __journal_write_alloc(j, w, &devs_sorted, sectors, replicas, replicas_want);
- if (likely(replicas >= replicas_want))
+ if (likely(*replicas >= replicas_want))
goto done;
if (!advance_done) {
@@ -1584,18 +1572,16 @@ retry_alloc:
goto retry_alloc;
}
- if (replicas < replicas_want && target) {
+ if (*replicas < replicas_want && target) {
/* Retry from all devices: */
target = 0;
advance_done = false;
goto retry_target;
}
done:
- rcu_read_unlock();
-
BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
- return replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
+ return *replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
}
static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
@@ -1633,7 +1619,7 @@ static CLOSURE_CALLBACK(journal_write_done)
closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
- struct bch_replicas_padded replicas;
+ union bch_replicas_padded replicas;
u64 seq = le64_to_cpu(w->data->seq);
int err = 0;
@@ -1642,8 +1628,6 @@ static CLOSURE_CALLBACK(journal_write_done)
: j->noflush_write_time, j->write_start_time);
if (!w->devs_written.nr) {
- if (!bch2_journal_error(j))
- bch_err(c, "unable to write journal to sufficient devices");
err = -BCH_ERR_journal_write_err;
} else {
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
@@ -1651,8 +1635,20 @@ static CLOSURE_CALLBACK(journal_write_done)
err = bch2_mark_replicas(c, &replicas.e);
}
- if (err)
- bch2_fatal_error(c);
+ if (err && !bch2_journal_error(j)) {
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ if (err == -BCH_ERR_journal_write_err)
+ prt_printf(&buf, "unable to write journal to sufficient devices");
+ else
+ prt_printf(&buf, "journal write error marking replicas: %s", bch2_err_str(err));
+
+ bch2_fs_emergency_read_only2(c, &buf);
+
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ }
closure_debug_destroy(cl);
@@ -1770,7 +1766,7 @@ static void journal_write_endio(struct bio *bio)
}
closure_put(&w->io);
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
}
static CLOSURE_CALLBACK(journal_write_submit)
@@ -1781,12 +1777,7 @@ static CLOSURE_CALLBACK(journal_write_submit)
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
- struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE);
- if (!ca) {
- /* XXX: fix this */
- bch_err(c, "missing device %u for journal write", ptr->dev);
- continue;
- }
+ struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
sectors);
@@ -1844,8 +1835,9 @@ static CLOSURE_CALLBACK(journal_write_preflush)
}
if (w->separate_flush) {
- for_each_rw_member(c, ca) {
- percpu_ref_get(&ca->io_ref[WRITE]);
+ for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_write) {
+ enumerated_ref_get(&ca->io_ref[WRITE],
+ BCH_DEV_WRITE_REF_journal_write);
struct journal_device *ja = &ca->journal;
struct bio *bio = &ja->bio[w->idx]->bio;
@@ -1872,9 +1864,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
struct jset_entry *start, *end;
struct jset *jset = w->data;
struct journal_keys_to_wb wb = { NULL };
- unsigned sectors, bytes, u64s;
+ unsigned u64s;
unsigned long btree_roots_have = 0;
- bool validate_before_checksum = false;
u64 seq = le64_to_cpu(jset->seq);
int ret;
@@ -1957,8 +1948,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
le32_add_cpu(&jset->u64s, u64s);
- sectors = vstruct_sectors(jset, c->block_bits);
- bytes = vstruct_bytes(jset);
+ unsigned sectors = vstruct_sectors(jset, c->block_bits);
if (sectors > w->sectors) {
bch2_fs_fatal_error(c, ": journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
@@ -1967,6 +1957,17 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
return -EINVAL;
}
+ return 0;
+}
+
+static int bch2_journal_write_checksum(struct journal *j, struct journal_buf *w)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct jset *jset = w->data;
+ u64 seq = le64_to_cpu(jset->seq);
+ bool validate_before_checksum = false;
+ int ret = 0;
+
jset->magic = cpu_to_le64(jset_magic(c));
jset->version = cpu_to_le32(c->sb.version);
@@ -1989,7 +1990,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
jset->encrypted_start,
vstruct_end(jset) - (void *) jset->encrypted_start);
- if (bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret)))
+ if (bch2_fs_fatal_err_on(ret, c, "encrypting journal entry: %s", bch2_err_str(ret)))
return ret;
jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
@@ -1999,6 +2000,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
(ret = jset_validate(c, NULL, jset, 0, WRITE)))
return ret;
+ unsigned sectors = vstruct_sectors(jset, c->block_bits);
+ unsigned bytes = vstruct_bytes(jset);
memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
return 0;
}
@@ -2054,13 +2057,10 @@ CLOSURE_CALLBACK(bch2_journal_write)
closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
- struct bch_replicas_padded replicas;
- unsigned nr_rw_members = 0;
+ union bch_replicas_padded replicas;
+ unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_journal]);
int ret;
- for_each_rw_member(c, ca)
- nr_rw_members++;
-
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
BUG_ON(!w->write_started);
BUG_ON(w->write_allocated);
@@ -2074,7 +2074,8 @@ CLOSURE_CALLBACK(bch2_journal_write)
ret = bch2_journal_write_pick_flush(j, w);
spin_unlock(&j->lock);
- if (ret)
+
+ if (unlikely(ret))
goto err;
mutex_lock(&j->buf_lock);
@@ -2082,43 +2083,34 @@ CLOSURE_CALLBACK(bch2_journal_write)
ret = bch2_journal_write_prep(j, w);
mutex_unlock(&j->buf_lock);
- if (ret)
- goto err;
- j->entry_bytes_written += vstruct_bytes(w->data);
+ if (unlikely(ret))
+ goto err;
+ unsigned replicas_allocated = 0;
while (1) {
- spin_lock(&j->lock);
- ret = journal_write_alloc(j, w);
+ ret = journal_write_alloc(j, w, &replicas_allocated);
if (!ret || !j->can_discard)
break;
- spin_unlock(&j->lock);
bch2_journal_do_discards(j);
}
- if (ret && !bch2_journal_error(j)) {
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
+ if (unlikely(ret))
+ goto err_allocate_write;
- __bch2_journal_debug_to_text(&buf, j);
- spin_unlock(&j->lock);
- prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
- le64_to_cpu(w->data->seq),
- vstruct_sectors(w->data, c->block_bits),
- bch2_err_str(ret));
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
- printbuf_exit(&buf);
- }
- if (ret)
+ ret = bch2_journal_write_checksum(j, w);
+ if (unlikely(ret))
goto err;
+ spin_lock(&j->lock);
/*
* write is allocated, no longer need to account for it in
* bch2_journal_space_available():
*/
w->sectors = 0;
w->write_allocated = true;
+ j->entry_bytes_written += vstruct_bytes(w->data);
/*
* journal entry has been compacted and allocated, recalculate space
@@ -2130,9 +2122,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
- if (c->opts.nochanges)
- goto no_io;
-
/*
* Mark journal replicas before we submit the write to guarantee
* recovery will find the journal entries after a crash.
@@ -2143,15 +2132,33 @@ CLOSURE_CALLBACK(bch2_journal_write)
if (ret)
goto err;
+ if (c->opts.nochanges)
+ goto no_io;
+
if (!JSET_NO_FLUSH(w->data))
continue_at(cl, journal_write_preflush, j->wq);
else
continue_at(cl, journal_write_submit, j->wq);
return;
-no_io:
- continue_at(cl, journal_write_done, j->wq);
- return;
+err_allocate_write:
+ if (!bch2_journal_error(j)) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_journal_debug_to_text(&buf, j);
+ prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
+ le64_to_cpu(w->data->seq),
+ vstruct_sectors(w->data, c->block_bits),
+ bch2_err_str(ret));
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ }
err:
bch2_fatal_error(c);
+no_io:
+ extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
+ struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
+ }
+
continue_at(cl, journal_write_done, j->wq);
}
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 976464d8a695..70f36f6bc482 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -17,6 +17,8 @@
#include <linux/kthread.h>
#include <linux/sched/mm.h>
+static bool __should_discard_bucket(struct journal *, struct journal_device *);
+
/* Free space calculations: */
static unsigned journal_space_from(struct journal_device *ja,
@@ -203,8 +205,7 @@ void bch2_journal_space_available(struct journal *j)
ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
- if (ja->discard_idx != ja->dirty_idx_ondisk)
- can_discard = true;
+ can_discard |= __should_discard_bucket(j, ja);
max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size);
nr_online++;
@@ -214,18 +215,20 @@ void bch2_journal_space_available(struct journal *j)
j->can_discard = can_discard;
if (nr_online < metadata_replicas_required(c)) {
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
- prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
- "rw journal devs:", nr_online, metadata_replicas_required(c));
-
- rcu_read_lock();
- for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
- prt_printf(&buf, " %s", ca->name);
- rcu_read_unlock();
-
- bch_err(c, "%s", buf.buf);
- printbuf_exit(&buf);
+ if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) {
+ struct printbuf buf = PRINTBUF;
+ buf.atomic++;
+ prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
+ "rw journal devs:", nr_online, metadata_replicas_required(c));
+
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
+ prt_printf(&buf, " %s", ca->name);
+ rcu_read_unlock();
+
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
ret = -BCH_ERR_insufficient_journal_devices;
goto out;
}
@@ -264,13 +267,19 @@ out:
/* Discards - last part of journal reclaim: */
-static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
+static bool __should_discard_bucket(struct journal *j, struct journal_device *ja)
{
- spin_lock(&j->lock);
unsigned min_free = max(4, ja->nr / 8);
- bool ret = bch2_journal_dev_buckets_available(j, ja, journal_space_discarded) < min_free &&
+ return bch2_journal_dev_buckets_available(j, ja, journal_space_discarded) <
+ min_free &&
ja->discard_idx != ja->dirty_idx_ondisk;
+}
+
+static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
+{
+ spin_lock(&j->lock);
+ bool ret = __should_discard_bucket(j, ja);
spin_unlock(&j->lock);
return ret;
@@ -286,12 +295,12 @@ void bch2_journal_do_discards(struct journal *j)
mutex_lock(&j->discard_lock);
- for_each_rw_member(c, ca) {
+ for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_do_discards) {
struct journal_device *ja = &ca->journal;
while (should_discard_bucket(j, ja)) {
if (!c->opts.nochanges &&
- ca->mi.discard &&
+ bch2_discard_opt_enabled(c, ca) &&
bdev_max_discard_sectors(ca->disk_sb.bdev))
blkdev_issue_discard(ca->disk_sb.bdev,
bucket_to_sector(ca,
@@ -618,7 +627,8 @@ static u64 journal_seq_to_flush(struct journal *j)
spin_lock(&j->lock);
- for_each_rw_member(c, ca) {
+ rcu_read_lock();
+ for_each_rw_member_rcu(c, ca) {
struct journal_device *ja = &ca->journal;
unsigned nr_buckets, bucket_to_flush;
@@ -628,12 +638,11 @@ static u64 journal_seq_to_flush(struct journal *j)
/* Try to keep the journal at most half full: */
nr_buckets = ja->nr / 2;
- nr_buckets = min(nr_buckets, ja->nr);
-
bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
seq_to_flush = max(seq_to_flush,
ja->bucket_seq[bucket_to_flush]);
}
+ rcu_read_unlock();
/* Also flush if the pin fifo is more than half full */
seq_to_flush = max_t(s64, seq_to_flush,
@@ -692,6 +701,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
if (ret)
break;
+ /* XXX shove journal discards off to another thread */
bch2_journal_do_discards(j);
seq_to_flush = journal_seq_to_flush(j);
@@ -954,7 +964,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
seq = 0;
spin_lock(&j->lock);
while (!ret) {
- struct bch_replicas_padded replicas;
+ union bch_replicas_padded replicas;
seq = max(seq, journal_last_seq(j));
if (seq >= j->pin.back)
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index e463d2d95359..c5a7d800a0f5 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -130,6 +130,16 @@ bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
return true;
}
+u64 bch2_journal_last_blacklisted_seq(struct bch_fs *c)
+{
+ struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
+
+ if (!t || !t->nr)
+ return 0;
+
+ return t->entries[eytzinger0_last(t->nr)].end - 1;
+}
+
int bch2_blacklist_table_initialize(struct bch_fs *c)
{
struct bch_sb_field_journal_seq_blacklist *bl =
diff --git a/fs/bcachefs/journal_seq_blacklist.h b/fs/bcachefs/journal_seq_blacklist.h
index d47636f96fdc..f06942ccfcdd 100644
--- a/fs/bcachefs/journal_seq_blacklist.h
+++ b/fs/bcachefs/journal_seq_blacklist.h
@@ -12,6 +12,7 @@ blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
}
bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
+u64 bch2_journal_last_blacklisted_seq(struct bch_fs *);
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
int bch2_blacklist_table_initialize(struct bch_fs *);
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 8e0eba776b9d..51104bbb99da 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -151,8 +151,6 @@ enum journal_flags {
#undef x
};
-typedef DARRAY(u64) darray_u64;
-
struct journal_bio {
struct bch_dev *ca;
unsigned buf_idx;
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index 90dcf80bd64a..bb7a92270c09 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -4,10 +4,13 @@
*/
#include "bcachefs.h"
+#include "backpointers.h"
#include "bkey_buf.h"
#include "btree_update.h"
#include "btree_update_interior.h"
+#include "btree_write_buffer.h"
#include "buckets.h"
+#include "ec.h"
#include "errcode.h"
#include "extents.h"
#include "io_write.h"
@@ -20,7 +23,7 @@
#include "super-io.h"
static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
- unsigned dev_idx, int flags, bool metadata)
+ unsigned dev_idx, unsigned flags, bool metadata)
{
unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
@@ -37,11 +40,28 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
return 0;
}
+static int drop_btree_ptrs(struct btree_trans *trans, struct btree_iter *iter,
+ struct btree *b, unsigned dev_idx, unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_buf k;
+
+ bch2_bkey_buf_init(&k);
+ bch2_bkey_buf_copy(&k, c, &b->key);
+
+ int ret = drop_dev_ptrs(c, bkey_i_to_s(k.k), dev_idx, flags, true) ?:
+ bch2_btree_node_update_key(trans, iter, b, k.k, 0, false);
+
+ bch_err_fn(c, ret);
+ bch2_bkey_buf_exit(&k, c);
+ return ret;
+}
+
static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
unsigned dev_idx,
- int flags)
+ unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_i *n;
@@ -77,9 +97,27 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
return 0;
}
+static int bch2_dev_btree_drop_key(struct btree_trans *trans,
+ struct bkey_s_c_backpointer bp,
+ unsigned dev_idx,
+ struct bkey_buf *last_flushed,
+ unsigned flags)
+{
+ struct btree_iter iter;
+ struct btree *b = bch2_backpointer_get_node(trans, bp, &iter, last_flushed);
+ int ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ return ret == -BCH_ERR_backpointer_to_overwritten_btree_node ? 0 : ret;
+
+ ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags);
+
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
static int bch2_dev_usrdata_drop(struct bch_fs *c,
struct progress_indicator_state *progress,
- unsigned dev_idx, int flags)
+ unsigned dev_idx, unsigned flags)
{
struct btree_trans *trans = bch2_trans_get(c);
enum btree_id id;
@@ -106,7 +144,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c,
static int bch2_dev_metadata_drop(struct bch_fs *c,
struct progress_indicator_state *progress,
- unsigned dev_idx, int flags)
+ unsigned dev_idx, unsigned flags)
{
struct btree_trans *trans;
struct btree_iter iter;
@@ -137,20 +175,12 @@ retry:
if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
goto next;
- bch2_bkey_buf_copy(&k, c, &b->key);
-
- ret = drop_dev_ptrs(c, bkey_i_to_s(k.k),
- dev_idx, flags, true);
- if (ret)
- break;
-
- ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false);
+ ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
continue;
}
- bch_err_msg(c, ret, "updating btree node key");
if (ret)
break;
next:
@@ -176,7 +206,66 @@ err:
return ret;
}
-int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+static int data_drop_bp(struct btree_trans *trans, unsigned dev_idx,
+ struct bkey_s_c_backpointer bp, struct bkey_buf *last_flushed,
+ unsigned flags)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent,
+ last_flushed);
+ int ret = bkey_err(k);
+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+ return 0;
+ if (ret)
+ return ret;
+
+ if (!k.k || !bch2_bkey_has_device_c(k, dev_idx))
+ goto out;
+
+ /*
+ * XXX: pass flags arg to invalidate_stripe_to_dev and handle it
+ * properly
+ */
+
+ if (bkey_is_btree_ptr(k.k))
+ ret = bch2_dev_btree_drop_key(trans, bp, dev_idx, last_flushed, flags);
+ else if (k.k->type == KEY_TYPE_stripe)
+ ret = bch2_invalidate_stripe_to_dev(trans, &iter, k, dev_idx, flags);
+ else
+ ret = bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsigned flags)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+
+ struct bkey_buf last_flushed;
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ int ret = bch2_btree_write_buffer_flush_sync(trans) ?:
+ for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
+ POS(dev_idx, 0),
+ POS(dev_idx, U64_MAX), 0, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ if (k.k->type != KEY_TYPE_backpointer)
+ continue;
+
+ data_drop_bp(trans, dev_idx, bkey_s_c_to_backpointer(k),
+ &last_flushed, flags);
+
+ }));
+
+ bch2_bkey_buf_exit(&last_flushed, trans->c);
+ bch2_trans_put(trans);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, unsigned flags)
{
struct progress_indicator_state progress;
bch2_progress_init(&progress, c,
diff --git a/fs/bcachefs/migrate.h b/fs/bcachefs/migrate.h
index 027efaa0d575..30018140711b 100644
--- a/fs/bcachefs/migrate.h
+++ b/fs/bcachefs/migrate.h
@@ -2,6 +2,7 @@
#ifndef _BCACHEFS_MIGRATE_H
#define _BCACHEFS_MIGRATE_H
-int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
+int bch2_dev_data_drop_by_backpointers(struct bch_fs *, unsigned, unsigned);
+int bch2_dev_data_drop(struct bch_fs *, unsigned, unsigned);
#endif /* _BCACHEFS_MIGRATE_H */
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index dfdbb9259985..79f4722621d5 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -67,7 +67,7 @@ static void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k)
struct moving_io {
struct list_head read_list;
struct list_head io_list;
- struct move_bucket_in_flight *b;
+ struct move_bucket *b;
struct closure cl;
bool read_completed;
@@ -109,7 +109,6 @@ static void move_write_done(struct bch_write_op *op)
struct printbuf buf = PRINTBUF;
bch2_write_op_to_text(&buf, op);
- prt_printf(&buf, "ret\t%s\n", bch2_err_str(op->error));
trace_io_move_write_fail(c, buf.buf);
printbuf_exit(&buf);
}
@@ -126,26 +125,40 @@ static void move_write_done(struct bch_write_op *op)
static void move_write(struct moving_io *io)
{
+ struct bch_fs *c = io->write.op.c;
struct moving_context *ctxt = io->write.ctxt;
+ struct bch_read_bio *rbio = &io->write.rbio;
if (ctxt->stats) {
- if (io->write.rbio.bio.bi_status)
+ if (rbio->bio.bi_status)
atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
&ctxt->stats->sectors_error_uncorrected);
- else if (io->write.rbio.saw_error)
+ else if (rbio->saw_error)
atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
&ctxt->stats->sectors_error_corrected);
}
- if (unlikely(io->write.rbio.ret ||
- io->write.rbio.bio.bi_status ||
- io->write.data_opts.scrub)) {
+ /*
+ * If the extent has been bitrotted, we're going to have to give it a
+ * new checksum in order to move it - but the poison bit will ensure
+ * that userspace still gets the appropriate error.
+ */
+ if (unlikely(rbio->ret == -BCH_ERR_data_read_csum_err &&
+ (bch2_bkey_extent_flags(bkey_i_to_s_c(io->write.k.k)) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)))) {
+ struct bch_extent_crc_unpacked crc = rbio->pick.crc;
+ struct nonce nonce = extent_nonce(rbio->version, crc);
+
+ rbio->pick.crc.csum = bch2_checksum_bio(c, rbio->pick.crc.csum_type,
+ nonce, &rbio->bio);
+ rbio->ret = 0;
+ }
+
+ if (unlikely(rbio->ret || io->write.data_opts.scrub)) {
move_free(io);
return;
}
if (trace_io_move_write_enabled()) {
- struct bch_fs *c = io->write.op.c;
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k));
@@ -275,7 +288,7 @@ void bch2_move_stats_init(struct bch_move_stats *stats, const char *name)
}
int bch2_move_extent(struct moving_context *ctxt,
- struct move_bucket_in_flight *bucket_in_flight,
+ struct move_bucket *bucket_in_flight,
struct btree_iter *iter,
struct bkey_s_c k,
struct bch_io_opts io_opts,
@@ -398,7 +411,7 @@ err:
return ret;
}
-static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
struct per_snapshot_io_opts *io_opts,
struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
struct btree_iter *extent_iter,
@@ -409,6 +422,9 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
struct bch_io_opts *opts_ret = &io_opts->fs_io_opts;
int ret = 0;
+ if (extent_iter->min_depth)
+ return opts_ret;
+
if (extent_k.k->type == KEY_TYPE_reflink_v)
goto out;
@@ -559,11 +575,11 @@ static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *
return k;
}
-static int bch2_move_data_btree(struct moving_context *ctxt,
- struct bpos start,
- struct bpos end,
- move_pred_fn pred, void *arg,
- enum btree_id btree_id)
+int bch2_move_data_btree(struct moving_context *ctxt,
+ struct bpos start,
+ struct bpos end,
+ move_pred_fn pred, void *arg,
+ enum btree_id btree_id, unsigned level)
{
struct btree_trans *trans = ctxt->trans;
struct bch_fs *c = trans->c;
@@ -589,11 +605,56 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
ctxt->stats->pos = BBPOS(btree_id, start);
}
+retry_root:
bch2_trans_begin(trans);
- bch2_trans_iter_init(trans, &iter, btree_id, start,
- BTREE_ITER_prefetch|
- BTREE_ITER_not_extents|
- BTREE_ITER_all_snapshots);
+
+ if (level == bch2_btree_id_root(c, btree_id)->level + 1) {
+ bch2_trans_node_iter_init(trans, &iter, btree_id, start, 0, level - 1,
+ BTREE_ITER_prefetch|
+ BTREE_ITER_not_extents|
+ BTREE_ITER_all_snapshots);
+ struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto root_err;
+
+ if (b != btree_node_root(c, b)) {
+ bch2_trans_iter_exit(trans, &iter);
+ goto retry_root;
+ }
+
+ k = bkey_i_to_s_c(&b->key);
+
+ io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts,
+ iter.pos, &iter, k);
+ ret = PTR_ERR_OR_ZERO(io_opts);
+ if (ret)
+ goto root_err;
+
+ memset(&data_opts, 0, sizeof(data_opts));
+ if (!pred(c, arg, iter.btree_id, k, io_opts, &data_opts))
+ goto out;
+
+
+ if (!data_opts.scrub)
+ ret = bch2_btree_node_rewrite_pos(trans, btree_id, level,
+ k.k->p, data_opts.target, 0);
+ else
+ ret = bch2_btree_node_scrub(trans, btree_id, level, k, data_opts.read_dev);
+
+root_err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ bch2_trans_iter_exit(trans, &iter);
+ goto retry_root;
+ }
+
+ goto out;
+ }
+
+ bch2_trans_node_iter_init(trans, &iter, btree_id, start, 0, level,
+ BTREE_ITER_prefetch|
+ BTREE_ITER_not_extents|
+ BTREE_ITER_all_snapshots);
if (ctxt->rate)
bch2_ratelimit_reset(ctxt->rate);
@@ -613,7 +674,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
if (ret)
break;
- if (bkey_ge(bkey_start_pos(k.k), end))
+ if (bkey_gt(bkey_start_pos(k.k), end))
break;
if (ctxt->stats)
@@ -653,7 +714,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
continue;
memset(&data_opts, 0, sizeof(data_opts));
- if (!pred(c, arg, k, io_opts, &data_opts))
+ if (!pred(c, arg, extent_iter->btree_id, k, io_opts, &data_opts))
goto next;
/*
@@ -663,7 +724,14 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts);
+ if (!level)
+ ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts);
+ else if (!data_opts.scrub)
+ ret2 = bch2_btree_node_rewrite_pos(trans, btree_id, level,
+ k.k->p, data_opts.target, 0);
+ else
+ ret2 = bch2_btree_node_scrub(trans, btree_id, level, k, data_opts.read_dev);
+
if (ret2) {
if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
continue;
@@ -681,9 +749,10 @@ next:
if (ctxt->stats)
atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
next_nondata:
- bch2_btree_iter_advance(trans, &iter);
+ if (!bch2_btree_iter_advance(trans, &iter))
+ break;
}
-
+out:
bch2_trans_iter_exit(trans, &reflink_iter);
bch2_trans_iter_exit(trans, &iter);
bch2_bkey_buf_exit(&sk, c);
@@ -713,7 +782,7 @@ int __bch2_move_data(struct moving_context *ctxt,
ret = bch2_move_data_btree(ctxt,
id == start.btree ? start.pos : POS_MIN,
id == end.btree ? end.pos : POS_MAX,
- pred, arg, id);
+ pred, arg, id, 0);
if (ret)
break;
}
@@ -740,11 +809,12 @@ int bch2_move_data(struct bch_fs *c,
}
static int __bch2_move_data_phys(struct moving_context *ctxt,
- struct move_bucket_in_flight *bucket_in_flight,
+ struct move_bucket *bucket_in_flight,
unsigned dev,
u64 bucket_start,
u64 bucket_end,
unsigned data_types,
+ bool copygc,
move_pred_fn pred, void *arg)
{
struct btree_trans *trans = ctxt->trans;
@@ -755,6 +825,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
struct bkey_buf sk;
struct bkey_s_c k;
struct bkey_buf last_flushed;
+ u64 check_mismatch_done = bucket_start;
int ret = 0;
struct bch_dev *ca = bch2_dev_tryget(c, dev);
@@ -765,8 +836,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start));
struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end));
- bch2_dev_put(ca);
- ca = NULL;
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
@@ -779,10 +848,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_start, 0);
- bch_err_msg(c, ret, "looking up alloc key");
- if (ret)
- goto err;
-
ret = bch2_btree_write_buffer_tryflush(trans);
if (!bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "flushing btree write buffer");
@@ -805,6 +870,14 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
if (!k.k || bkey_gt(k.k->p, bp_end))
break;
+ if (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) {
+ while (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) {
+ bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++,
+ copygc, &last_flushed);
+ }
+ continue;
+ }
+
if (k.k->type != KEY_TYPE_backpointer)
goto next;
@@ -837,7 +910,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
}
struct data_update_opts data_opts = {};
- if (!pred(c, arg, k, &io_opts, &data_opts)) {
+ if (!pred(c, arg, bp.v->btree_id, k, &io_opts, &data_opts)) {
bch2_trans_iter_exit(trans, &iter);
goto next;
}
@@ -858,7 +931,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
if (!bp.v->level)
ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts);
else if (!data_opts.scrub)
- ret = bch2_btree_node_rewrite_pos(trans, bp.v->btree_id, bp.v->level, k.k->p, 0);
+ ret = bch2_btree_node_rewrite_pos(trans, bp.v->btree_id, bp.v->level,
+ k.k->p, data_opts.target, 0);
else
ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev);
@@ -879,33 +953,41 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
next:
bch2_btree_iter_advance(trans, &bp_iter);
}
+
+ while (check_mismatch_done < bucket_end)
+ bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++,
+ copygc, &last_flushed);
err:
bch2_trans_iter_exit(trans, &bp_iter);
bch2_bkey_buf_exit(&sk, c);
bch2_bkey_buf_exit(&last_flushed, c);
+ bch2_dev_put(ca);
return ret;
}
-static int bch2_move_data_phys(struct bch_fs *c,
- unsigned dev,
- u64 start,
- u64 end,
- unsigned data_types,
- struct bch_ratelimit *rate,
- struct bch_move_stats *stats,
- struct write_point_specifier wp,
- bool wait_on_copygc,
- move_pred_fn pred, void *arg)
+int bch2_move_data_phys(struct bch_fs *c,
+ unsigned dev,
+ u64 start,
+ u64 end,
+ unsigned data_types,
+ struct bch_ratelimit *rate,
+ struct bch_move_stats *stats,
+ struct write_point_specifier wp,
+ bool wait_on_copygc,
+ move_pred_fn pred, void *arg)
{
struct moving_context ctxt;
bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans));
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
- ctxt.stats->phys = true;
- ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys;
+ if (ctxt.stats) {
+ ctxt.stats->phys = true;
+ ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys;
+ }
- int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg);
+ int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end,
+ data_types, false, pred, arg);
bch2_moving_ctxt_exit(&ctxt);
return ret;
@@ -917,7 +999,8 @@ struct evacuate_bucket_arg {
struct data_update_opts data_opts;
};
-static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k,
+static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg,
+ enum btree_id btree, struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
{
@@ -938,9 +1021,9 @@ static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k
}
int bch2_evacuate_bucket(struct moving_context *ctxt,
- struct move_bucket_in_flight *bucket_in_flight,
- struct bpos bucket, int gen,
- struct data_update_opts data_opts)
+ struct move_bucket *bucket_in_flight,
+ struct bpos bucket, int gen,
+ struct data_update_opts data_opts)
{
struct evacuate_bucket_arg arg = { bucket, gen, data_opts, };
@@ -949,6 +1032,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
bucket.offset,
bucket.offset + 1,
~0,
+ true,
evacuate_bucket_pred, &arg);
}
@@ -1006,7 +1090,7 @@ retry:
if (!pred(c, arg, b, &io_opts, &data_opts))
goto next;
- ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret;
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0) ?: ret;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
@@ -1031,7 +1115,7 @@ next:
}
static bool rereplicate_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
+ enum btree_id btree, struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
{
@@ -1063,7 +1147,7 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg,
}
static bool migrate_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
+ enum btree_id btree, struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
{
@@ -1090,7 +1174,7 @@ static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
{
- return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
+ return rereplicate_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}
/*
@@ -1146,7 +1230,7 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
}
static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
+ enum btree_id btree, struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
{
@@ -1179,11 +1263,12 @@ static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
{
- return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
+ return drop_extra_replicas_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key),
+ io_opts, data_opts);
}
static bool scrub_pred(struct bch_fs *c, void *_arg,
- struct bkey_s_c k,
+ enum btree_id btree, struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
{
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index 51e0505a8156..86b80499ac55 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -72,7 +72,7 @@ do { \
break; \
} while (1)
-typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c,
+typedef bool (*move_pred_fn)(struct bch_fs *, void *, enum btree_id, struct bkey_s_c,
struct bch_io_opts *, struct data_update_opts *);
extern const char * const bch2_data_ops_strs[];
@@ -116,12 +116,18 @@ int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *,
int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
int bch2_move_extent(struct moving_context *,
- struct move_bucket_in_flight *,
+ struct move_bucket *,
struct btree_iter *,
struct bkey_s_c,
struct bch_io_opts,
struct data_update_opts);
+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *,
+ struct per_snapshot_io_opts *, struct bpos,
+ struct btree_iter *, struct bkey_s_c);
+
+int bch2_move_data_btree(struct moving_context *, struct bpos, struct bpos,
+ move_pred_fn, void *, enum btree_id, unsigned);
int __bch2_move_data(struct moving_context *,
struct bbpos,
struct bbpos,
@@ -135,8 +141,13 @@ int bch2_move_data(struct bch_fs *,
bool,
move_pred_fn, void *);
+int bch2_move_data_phys(struct bch_fs *, unsigned, u64, u64, unsigned,
+ struct bch_ratelimit *, struct bch_move_stats *,
+ struct write_point_specifier, bool,
+ move_pred_fn, void *);
+
int bch2_evacuate_bucket(struct moving_context *,
- struct move_bucket_in_flight *,
+ struct move_bucket *,
struct bpos, int,
struct data_update_opts);
int bch2_data_job(struct bch_fs *,
diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h
index 807f779f6f76..c5c62cd600de 100644
--- a/fs/bcachefs/move_types.h
+++ b/fs/bcachefs/move_types.h
@@ -36,14 +36,10 @@ struct move_bucket_key {
};
struct move_bucket {
+ struct move_bucket *next;
+ struct rhash_head hash;
struct move_bucket_key k;
unsigned sectors;
-};
-
-struct move_bucket_in_flight {
- struct move_bucket_in_flight *next;
- struct rhash_head hash;
- struct move_bucket bucket;
atomic_t count;
};
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 96873372b516..e7a2a13554d7 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -8,6 +8,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
+#include "backpointers.h"
#include "btree_iter.h"
#include "btree_update.h"
#include "btree_write_buffer.h"
@@ -27,47 +28,32 @@
#include <linux/wait.h>
struct buckets_in_flight {
- struct rhashtable table;
- struct move_bucket_in_flight *first;
- struct move_bucket_in_flight *last;
- size_t nr;
- size_t sectors;
+ struct rhashtable table;
+ struct move_bucket *first;
+ struct move_bucket *last;
+ size_t nr;
+ size_t sectors;
+
+ DARRAY(struct move_bucket *) to_evacuate;
};
static const struct rhashtable_params bch_move_bucket_params = {
- .head_offset = offsetof(struct move_bucket_in_flight, hash),
- .key_offset = offsetof(struct move_bucket_in_flight, bucket.k),
+ .head_offset = offsetof(struct move_bucket, hash),
+ .key_offset = offsetof(struct move_bucket, k),
.key_len = sizeof(struct move_bucket_key),
.automatic_shrinking = true,
};
-static struct move_bucket_in_flight *
-move_bucket_in_flight_add(struct buckets_in_flight *list, struct move_bucket b)
+static void move_bucket_in_flight_add(struct buckets_in_flight *list, struct move_bucket *b)
{
- struct move_bucket_in_flight *new = kzalloc(sizeof(*new), GFP_KERNEL);
- int ret;
-
- if (!new)
- return ERR_PTR(-ENOMEM);
-
- new->bucket = b;
-
- ret = rhashtable_lookup_insert_fast(&list->table, &new->hash,
- bch_move_bucket_params);
- if (ret) {
- kfree(new);
- return ERR_PTR(ret);
- }
-
if (!list->first)
- list->first = new;
+ list->first = b;
else
- list->last->next = new;
+ list->last->next = b;
- list->last = new;
+ list->last = b;
list->nr++;
- list->sectors += b.sectors;
- return new;
+ list->sectors += b->sectors;
}
static int bch2_bucket_is_movable(struct btree_trans *trans,
@@ -89,9 +75,12 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
if (!ca)
goto out;
+ if (bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b->k.bucket.offset))
+ goto out;
+
if (ca->mi.state != BCH_MEMBER_STATE_rw ||
!bch2_dev_is_online(ca))
- goto out_put;
+ goto out;
struct bch_alloc_v4 _a;
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
@@ -100,19 +89,26 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca);
ret = lru_idx && lru_idx <= time;
-out_put:
- bch2_dev_put(ca);
out:
+ bch2_dev_put(ca);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
+static void move_bucket_free(struct buckets_in_flight *list,
+ struct move_bucket *b)
+{
+ int ret = rhashtable_remove_fast(&list->table, &b->hash,
+ bch_move_bucket_params);
+ BUG_ON(ret);
+ kfree(b);
+}
+
static void move_buckets_wait(struct moving_context *ctxt,
struct buckets_in_flight *list,
bool flush)
{
- struct move_bucket_in_flight *i;
- int ret;
+ struct move_bucket *i;
while ((i = list->first)) {
if (flush)
@@ -126,12 +122,9 @@ static void move_buckets_wait(struct moving_context *ctxt,
list->last = NULL;
list->nr--;
- list->sectors -= i->bucket.sectors;
+ list->sectors -= i->sectors;
- ret = rhashtable_remove_fast(&list->table, &i->hash,
- bch_move_bucket_params);
- BUG_ON(ret);
- kfree(i);
+ move_bucket_free(list, i);
}
bch2_trans_unlock_long(ctxt->trans);
@@ -143,11 +136,8 @@ static bool bucket_in_flight(struct buckets_in_flight *list,
return rhashtable_lookup_fast(&list->table, &k, bch_move_bucket_params);
}
-typedef DARRAY(struct move_bucket) move_buckets;
-
static int bch2_copygc_get_buckets(struct moving_context *ctxt,
- struct buckets_in_flight *buckets_in_flight,
- move_buckets *buckets)
+ struct buckets_in_flight *buckets_in_flight)
{
struct btree_trans *trans = ctxt->trans;
struct bch_fs *c = trans->c;
@@ -164,8 +154,6 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret)))
return ret;
- bch2_trans_begin(trans);
-
ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru,
lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, 0, 0),
lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, U64_MAX, LRU_TIME_MAX),
@@ -184,20 +172,34 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
else if (bucket_in_flight(buckets_in_flight, b.k))
in_flight++;
else {
- ret2 = darray_push(buckets, b);
+ struct move_bucket *b_i = kmalloc(sizeof(*b_i), GFP_KERNEL);
+ ret2 = b_i ? 0 : -ENOMEM;
if (ret2)
goto err;
+
+ *b_i = b;
+
+ ret2 = darray_push(&buckets_in_flight->to_evacuate, b_i);
+ if (ret2) {
+ kfree(b_i);
+ goto err;
+ }
+
+ ret2 = rhashtable_lookup_insert_fast(&buckets_in_flight->table, &b_i->hash,
+ bch_move_bucket_params);
+ BUG_ON(ret2);
+
sectors += b.sectors;
}
- ret2 = buckets->nr >= nr_to_get;
+ ret2 = buckets_in_flight->to_evacuate.nr >= nr_to_get;
err:
ret2;
}));
pr_debug("have: %zu (%zu) saw %zu in flight %zu not movable %zu got %zu (%zu)/%zu buckets ret %i",
buckets_in_flight->nr, buckets_in_flight->sectors,
- saw, in_flight, not_movable, buckets->nr, sectors, nr_to_get, ret);
+ saw, in_flight, not_movable, buckets_in_flight->to_evacuate.nr, sectors, nr_to_get, ret);
return ret < 0 ? ret : 0;
}
@@ -212,40 +214,30 @@ static int bch2_copygc(struct moving_context *ctxt,
struct data_update_opts data_opts = {
.btree_insert_flags = BCH_WATERMARK_copygc,
};
- move_buckets buckets = { 0 };
- struct move_bucket_in_flight *f;
u64 sectors_seen = atomic64_read(&ctxt->stats->sectors_seen);
u64 sectors_moved = atomic64_read(&ctxt->stats->sectors_moved);
int ret = 0;
- ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets);
+ ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight);
if (ret)
goto err;
- darray_for_each(buckets, i) {
+ darray_for_each(buckets_in_flight->to_evacuate, i) {
if (kthread_should_stop() || freezing(current))
break;
- f = move_bucket_in_flight_add(buckets_in_flight, *i);
- ret = PTR_ERR_OR_ZERO(f);
- if (ret == -EEXIST) { /* rare race: copygc_get_buckets returned same bucket more than once */
- ret = 0;
- continue;
- }
- if (ret == -ENOMEM) { /* flush IO, continue later */
- ret = 0;
- break;
- }
+ struct move_bucket *b = *i;
+ *i = NULL;
+
+ move_bucket_in_flight_add(buckets_in_flight, b);
- ret = bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket,
- f->bucket.k.gen, data_opts);
+ ret = bch2_evacuate_bucket(ctxt, b, b->k.bucket, b->k.gen, data_opts);
if (ret)
goto err;
*did_work = true;
}
err:
-
/* no entries in LRU btree found, or got to end: */
if (bch2_err_matches(ret, ENOENT))
ret = 0;
@@ -255,12 +247,34 @@ err:
sectors_seen = atomic64_read(&ctxt->stats->sectors_seen) - sectors_seen;
sectors_moved = atomic64_read(&ctxt->stats->sectors_moved) - sectors_moved;
- trace_and_count(c, copygc, c, buckets.nr, sectors_seen, sectors_moved);
+ trace_and_count(c, copygc, c, buckets_in_flight->to_evacuate.nr, sectors_seen, sectors_moved);
- darray_exit(&buckets);
+ darray_for_each(buckets_in_flight->to_evacuate, i)
+ if (*i)
+ move_bucket_free(buckets_in_flight, *i);
+ darray_exit(&buckets_in_flight->to_evacuate);
return ret;
}
+static u64 bch2_copygc_dev_wait_amount(struct bch_dev *ca)
+{
+ struct bch_dev_usage_full usage_full = bch2_dev_usage_full_read(ca);
+ struct bch_dev_usage usage;
+
+ for (unsigned i = 0; i < BCH_DATA_NR; i++)
+ usage.buckets[i] = usage_full.d[i].buckets;
+
+ s64 fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) *
+ ca->mi.bucket_size) >> 1);
+ s64 fragmented = 0;
+
+ for (unsigned i = 0; i < BCH_DATA_NR; i++)
+ if (data_type_movable(i))
+ fragmented += usage_full.d[i].fragmented;
+
+ return max(0LL, fragmented_allowed - fragmented);
+}
+
/*
* Copygc runs when the amount of fragmented data is above some arbitrary
* threshold:
@@ -275,27 +289,14 @@ err:
* often and continually reduce the amount of fragmented space as the device
* fills up. So, we increase the threshold by half the current free space.
*/
-unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
+u64 bch2_copygc_wait_amount(struct bch_fs *c)
{
- s64 wait = S64_MAX, fragmented_allowed, fragmented;
-
- for_each_rw_member(c, ca) {
- struct bch_dev_usage_full usage_full = bch2_dev_usage_full_read(ca);
- struct bch_dev_usage usage;
-
- for (unsigned i = 0; i < BCH_DATA_NR; i++)
- usage.buckets[i] = usage_full.d[i].buckets;
-
- fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) *
- ca->mi.bucket_size) >> 1);
- fragmented = 0;
+ u64 wait = U64_MAX;
- for (unsigned i = 0; i < BCH_DATA_NR; i++)
- if (data_type_movable(i))
- fragmented += usage_full.d[i].fragmented;
-
- wait = min(wait, max(0LL, fragmented_allowed - fragmented));
- }
+ rcu_read_lock();
+ for_each_rw_member_rcu(c, ca)
+ wait = min(wait, bch2_copygc_dev_wait_amount(ca));
+ rcu_read_unlock();
return wait;
}
@@ -318,14 +319,22 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
c->copygc_wait_at) << 9);
prt_newline(out);
- prt_printf(out, "Currently calculated wait:\t");
- prt_human_readable_u64(out, bch2_copygc_wait_amount(c));
- prt_newline(out);
+ bch2_printbuf_make_room(out, 4096);
rcu_read_lock();
+ out->atomic++;
+
+ prt_printf(out, "Currently calculated wait:\n");
+ for_each_rw_member_rcu(c, ca) {
+ prt_printf(out, " %s:\t", ca->name);
+ prt_human_readable_u64(out, bch2_copygc_dev_wait_amount(ca));
+ prt_newline(out);
+ }
+
struct task_struct *t = rcu_dereference(c->copygc_thread);
if (t)
get_task_struct(t);
+ --out->atomic;
rcu_read_unlock();
if (t) {
@@ -340,19 +349,13 @@ static int bch2_copygc_thread(void *arg)
struct moving_context ctxt;
struct bch_move_stats move_stats;
struct io_clock *clock = &c->io_clock[WRITE];
- struct buckets_in_flight *buckets;
+ struct buckets_in_flight buckets = {};
u64 last, wait;
- int ret = 0;
- buckets = kzalloc(sizeof(struct buckets_in_flight), GFP_KERNEL);
- if (!buckets)
- return -ENOMEM;
- ret = rhashtable_init(&buckets->table, &bch_move_bucket_params);
+ int ret = rhashtable_init(&buckets.table, &bch_move_bucket_params);
bch_err_msg(c, ret, "allocating copygc buckets in flight");
- if (ret) {
- kfree(buckets);
+ if (ret)
return ret;
- }
set_freezable();
@@ -360,7 +363,7 @@ static int bch2_copygc_thread(void *arg)
* Data move operations can't run until after check_snapshots has
* completed, and bch2_snapshot_is_ancestor() is available.
*/
- kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
+ kthread_wait_freezable(c->recovery.pass_done > BCH_RECOVERY_PASS_check_snapshots ||
kthread_should_stop());
bch2_move_stats_init(&move_stats, "copygc");
@@ -375,13 +378,13 @@ static int bch2_copygc_thread(void *arg)
cond_resched();
if (!c->opts.copygc_enabled) {
- move_buckets_wait(&ctxt, buckets, true);
+ move_buckets_wait(&ctxt, &buckets, true);
kthread_wait_freezable(c->opts.copygc_enabled ||
kthread_should_stop());
}
if (unlikely(freezing(current))) {
- move_buckets_wait(&ctxt, buckets, true);
+ move_buckets_wait(&ctxt, &buckets, true);
__refrigerator(false);
continue;
}
@@ -392,7 +395,7 @@ static int bch2_copygc_thread(void *arg)
if (wait > clock->max_slop) {
c->copygc_wait_at = last;
c->copygc_wait = last + wait;
- move_buckets_wait(&ctxt, buckets, true);
+ move_buckets_wait(&ctxt, &buckets, true);
trace_and_count(c, copygc_wait, c, wait, last + wait);
bch2_kthread_io_clock_wait(clock, last + wait,
MAX_SCHEDULE_TIMEOUT);
@@ -402,7 +405,7 @@ static int bch2_copygc_thread(void *arg)
c->copygc_wait = 0;
c->copygc_running = true;
- ret = bch2_copygc(&ctxt, buckets, &did_work);
+ ret = bch2_copygc(&ctxt, &buckets, &did_work);
c->copygc_running = false;
wake_up(&c->copygc_running_wq);
@@ -413,16 +416,14 @@ static int bch2_copygc_thread(void *arg)
if (min_member_capacity == U64_MAX)
min_member_capacity = 128 * 2048;
- move_buckets_wait(&ctxt, buckets, true);
+ move_buckets_wait(&ctxt, &buckets, true);
bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6),
MAX_SCHEDULE_TIMEOUT);
}
}
- move_buckets_wait(&ctxt, buckets, true);
-
- rhashtable_destroy(&buckets->table);
- kfree(buckets);
+ move_buckets_wait(&ctxt, &buckets, true);
+ rhashtable_destroy(&buckets.table);
bch2_moving_ctxt_exit(&ctxt);
bch2_move_stats_exit(&move_stats, c);
diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h
index d1885cf67a45..b9683d22bab0 100644
--- a/fs/bcachefs/movinggc.h
+++ b/fs/bcachefs/movinggc.h
@@ -2,7 +2,7 @@
#ifndef _BCACHEFS_MOVINGGC_H
#define _BCACHEFS_MOVINGGC_H
-unsigned long bch2_copygc_wait_amount(struct bch_fs *);
+u64 bch2_copygc_wait_amount(struct bch_fs *);
void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *);
static inline void bch2_copygc_wakeup(struct bch_fs *c)
diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c
index 52c58c6d53d2..a84b69d6caef 100644
--- a/fs/bcachefs/namei.c
+++ b/fs/bcachefs/namei.c
@@ -11,6 +11,14 @@
#include <linux/posix_acl.h>
+static inline subvol_inum parent_inum(subvol_inum inum, struct bch_inode_unpacked *inode)
+{
+ return (subvol_inum) {
+ .subvol = inode->bi_parent_subvol ?: inum.subvol,
+ .inum = inode->bi_dir,
+ };
+}
+
static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode)
{
return S_ISDIR(inode->bi_mode) && !inode->bi_subvol;
@@ -49,7 +57,7 @@ int bch2_create_trans(struct btree_trans *trans,
if (!(flags & BCH_CREATE_SNAPSHOT)) {
/* Normal create path - allocate a new inode: */
- bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
+ bch2_inode_init_late(c, new_inode, now, uid, gid, mode, rdev, dir_u);
if (flags & BCH_CREATE_TMPFILE)
new_inode->bi_flags |= BCH_INODE_unlinked;
@@ -158,7 +166,6 @@ int bch2_create_trans(struct btree_trans *trans,
name,
dir_target,
&dir_offset,
- &dir_u->bi_size,
STR_HASH_must_create|BTREE_ITER_with_updates) ?:
bch2_inode_write(trans, &dir_iter, dir_u);
if (ret)
@@ -225,7 +232,6 @@ int bch2_link_trans(struct btree_trans *trans,
mode_to_type(inode_u->bi_mode),
name, inum.inum,
&dir_offset,
- &dir_u->bi_size,
STR_HASH_must_create);
if (ret)
goto err;
@@ -406,8 +412,7 @@ int bch2_rename_trans(struct btree_trans *trans,
src_hash = bch2_hash_info_init(c, src_dir_u);
- if (dst_dir.inum != src_dir.inum ||
- dst_dir.subvol != src_dir.subvol) {
+ if (!subvol_inum_eq(dst_dir, src_dir)) {
ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir,
BTREE_ITER_intent);
if (ret)
@@ -499,32 +504,41 @@ int bch2_rename_trans(struct btree_trans *trans,
}
}
- if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
- S_ISDIR(src_inode_u->bi_mode)) {
- ret = -EXDEV;
- goto err;
- }
+ if (!subvol_inum_eq(dst_dir, src_dir)) {
+ if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
+ S_ISDIR(src_inode_u->bi_mode)) {
+ ret = -EXDEV;
+ goto err;
+ }
- if (mode == BCH_RENAME_EXCHANGE &&
- bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
- S_ISDIR(dst_inode_u->bi_mode)) {
- ret = -EXDEV;
- goto err;
- }
+ if (mode == BCH_RENAME_EXCHANGE &&
+ bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
+ S_ISDIR(dst_inode_u->bi_mode)) {
+ ret = -EXDEV;
+ goto err;
+ }
- if (is_subdir_for_nlink(src_inode_u)) {
- src_dir_u->bi_nlink--;
- dst_dir_u->bi_nlink++;
- }
+ ret = bch2_maybe_propagate_has_case_insensitive(trans, src_inum, src_inode_u) ?:
+ (mode == BCH_RENAME_EXCHANGE
+ ? bch2_maybe_propagate_has_case_insensitive(trans, dst_inum, dst_inode_u)
+ : 0);
+ if (ret)
+ goto err;
- if (S_ISDIR(src_inode_u->bi_mode) &&
- !src_inode_u->bi_subvol)
- src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;
+ if (is_subdir_for_nlink(src_inode_u)) {
+ src_dir_u->bi_nlink--;
+ dst_dir_u->bi_nlink++;
+ }
- if (mode == BCH_RENAME_EXCHANGE &&
- S_ISDIR(dst_inode_u->bi_mode) &&
- !dst_inode_u->bi_subvol)
- dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;
+ if (S_ISDIR(src_inode_u->bi_mode) &&
+ !src_inode_u->bi_subvol)
+ src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;
+
+ if (mode == BCH_RENAME_EXCHANGE &&
+ S_ISDIR(dst_inode_u->bi_mode) &&
+ !dst_inode_u->bi_subvol)
+ dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;
+ }
if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
dst_dir_u->bi_nlink--;
@@ -595,31 +609,39 @@ static inline void reverse_bytes(void *b, size_t n)
}
}
-/* XXX: we don't yet attempt to print paths when we don't know the subvol */
-int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printbuf *path)
+static int __bch2_inum_to_path(struct btree_trans *trans,
+ u32 subvol, u64 inum, u32 snapshot,
+ struct printbuf *path)
{
unsigned orig_pos = path->pos;
int ret = 0;
- while (!(inum.subvol == BCACHEFS_ROOT_SUBVOL &&
- inum.inum == BCACHEFS_ROOT_INO)) {
+ while (true) {
+ if (!snapshot) {
+ ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot);
+ if (ret)
+ goto disconnected;
+ }
+
struct bch_inode_unpacked inode;
- ret = bch2_inode_find_by_inum_trans(trans, inum, &inode);
+ ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0);
if (ret)
goto disconnected;
+ if (inode.bi_subvol == BCACHEFS_ROOT_SUBVOL &&
+ inode.bi_inum == BCACHEFS_ROOT_INO)
+ break;
+
if (!inode.bi_dir && !inode.bi_dir_offset) {
ret = -BCH_ERR_ENOENT_inode_no_backpointer;
goto disconnected;
}
- inum.subvol = inode.bi_parent_subvol ?: inum.subvol;
- inum.inum = inode.bi_dir;
-
- u32 snapshot;
- ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
- if (ret)
- goto disconnected;
+ inum = inode.bi_dir;
+ if (inode.bi_parent_subvol) {
+ subvol = inode.bi_parent_subvol;
+ snapshot = 0;
+ }
struct btree_iter d_iter;
struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter,
@@ -656,6 +678,20 @@ disconnected:
goto out;
}
+int bch2_inum_to_path(struct btree_trans *trans,
+ subvol_inum inum,
+ struct printbuf *path)
+{
+ return __bch2_inum_to_path(trans, inum.subvol, inum.inum, 0, path);
+}
+
+int bch2_inum_snapshot_to_path(struct btree_trans *trans, u64 inum, u32 snapshot,
+ snapshot_id_list *snapshot_overwrites,
+ struct printbuf *path)
+{
+ return __bch2_inum_to_path(trans, 0, inum, snapshot, path);
+}
+
/* fsck */
static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
@@ -831,3 +867,149 @@ fsck_err:
bch_err_fn(c, ret);
return ret;
}
+
+/*
+ * BCH_INODE_has_case_insensitive:
+ * We have to track whether directories have any descendent directory that is
+ * casefolded - for overlayfs:
+ */
+
+static int bch2_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum)
+{
+ struct btree_iter iter = {};
+ int ret = 0;
+
+ while (true) {
+ struct bch_inode_unpacked inode;
+ ret = bch2_inode_peek(trans, &iter, &inode, inum,
+ BTREE_ITER_intent|BTREE_ITER_with_updates);
+ if (ret)
+ break;
+
+ if (inode.bi_flags & BCH_INODE_has_case_insensitive)
+ break;
+
+ inode.bi_flags |= BCH_INODE_has_case_insensitive;
+ ret = bch2_inode_write(trans, &iter, &inode);
+ if (ret)
+ break;
+
+ bch2_trans_iter_exit(trans, &iter);
+ if (subvol_inum_eq(inum, BCACHEFS_ROOT_SUBVOL_INUM))
+ break;
+
+ inum = parent_inum(inum, &inode);
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_maybe_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum,
+ struct bch_inode_unpacked *inode)
+{
+ if (!bch2_inode_casefold(trans->c, inode))
+ return 0;
+
+ inode->bi_flags |= BCH_INODE_has_case_insensitive;
+
+ return bch2_propagate_has_case_insensitive(trans, parent_inum(inum, inode));
+}
+
+int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
+ struct bch_inode_unpacked *inode,
+ snapshot_id_list *snapshot_overwrites,
+ bool *do_update)
+{
+ struct printbuf buf = PRINTBUF;
+ bool repairing_parents = false;
+ int ret = 0;
+
+ if (!S_ISDIR(inode->bi_mode)) {
+ /*
+ * Old versions set bi_casefold for non dirs, but that's
+ * unnecessary and wasteful
+ */
+ if (inode->bi_casefold) {
+ inode->bi_casefold = 0;
+ *do_update = true;
+ }
+ return 0;
+ }
+
+ if (trans->c->sb.version < bcachefs_metadata_version_inode_has_case_insensitive)
+ return 0;
+
+ if (bch2_inode_casefold(trans->c, inode) &&
+ !(inode->bi_flags & BCH_INODE_has_case_insensitive)) {
+ prt_printf(&buf, "casefolded dir with has_case_insensitive not set\ninum %llu:%u ",
+ inode->bi_inum, inode->bi_snapshot);
+
+ ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot,
+ snapshot_overwrites, &buf);
+ if (ret)
+ goto err;
+
+ if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) {
+ inode->bi_flags |= BCH_INODE_has_case_insensitive;
+ *do_update = true;
+ }
+ }
+
+ if (!(inode->bi_flags & BCH_INODE_has_case_insensitive))
+ goto out;
+
+ struct bch_inode_unpacked dir = *inode;
+ u32 snapshot = dir.bi_snapshot;
+
+ while (!(dir.bi_inum == BCACHEFS_ROOT_INO &&
+ dir.bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
+ if (dir.bi_parent_subvol) {
+ ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ snapshot_overwrites = NULL;
+ }
+
+ ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0);
+ if (ret)
+ goto err;
+
+ if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) {
+ prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n");
+
+ ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot,
+ snapshot_overwrites, &buf);
+ if (ret)
+ goto err;
+
+ if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) {
+ dir.bi_flags |= BCH_INODE_has_case_insensitive;
+ ret = __bch2_fsck_write_inode(trans, &dir);
+ if (ret)
+ goto err;
+ }
+ }
+
+ /*
+ * We only need to check the first parent, unless we find an
+ * inconsistency
+ */
+ if (!repairing_parents)
+ break;
+ }
+out:
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ if (ret)
+ return ret;
+
+ if (repairing_parents) {
+ return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
+ -BCH_ERR_transaction_restart_nested;
+ }
+
+ return 0;
+}
diff --git a/fs/bcachefs/namei.h b/fs/bcachefs/namei.h
index 2e6f6364767f..ae6ebc2d0785 100644
--- a/fs/bcachefs/namei.h
+++ b/fs/bcachefs/namei.h
@@ -43,6 +43,8 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *,
struct bch_inode_unpacked *);
int bch2_inum_to_path(struct btree_trans *, subvol_inum, struct printbuf *);
+int bch2_inum_snapshot_to_path(struct btree_trans *, u64, u32,
+ snapshot_id_list *, struct printbuf *);
int __bch2_check_dirent_target(struct btree_trans *,
struct btree_iter *,
@@ -69,4 +71,9 @@ static inline int bch2_check_dirent_target(struct btree_trans *trans,
return __bch2_check_dirent_target(trans, dirent_iter, d, target, in_fsck);
}
+int bch2_maybe_propagate_has_case_insensitive(struct btree_trans *, subvol_inum,
+ struct bch_inode_unpacked *);
+int bch2_check_inode_has_case_insensitive(struct btree_trans *, struct bch_inode_unpacked *,
+ snapshot_id_list *, bool *);
+
#endif /* _BCACHEFS_NAMEI_H */
diff --git a/fs/bcachefs/nocow_locking.c b/fs/bcachefs/nocow_locking.c
index 3c21981a4a1c..962218fa68ec 100644
--- a/fs/bcachefs/nocow_locking.c
+++ b/fs/bcachefs/nocow_locking.c
@@ -133,12 +133,10 @@ void bch2_fs_nocow_locking_exit(struct bch_fs *c)
BUG_ON(atomic_read(&l->l[j]));
}
-int bch2_fs_nocow_locking_init(struct bch_fs *c)
+void bch2_fs_nocow_locking_init_early(struct bch_fs *c)
{
struct bucket_nocow_lock_table *t = &c->nocow_locks;
for (struct nocow_lock_bucket *l = t->l; l < t->l + ARRAY_SIZE(t->l); l++)
spin_lock_init(&l->lock);
-
- return 0;
}
diff --git a/fs/bcachefs/nocow_locking.h b/fs/bcachefs/nocow_locking.h
index f9d6a426a960..48b8a003c0d2 100644
--- a/fs/bcachefs/nocow_locking.h
+++ b/fs/bcachefs/nocow_locking.h
@@ -45,6 +45,6 @@ static inline bool bch2_bucket_nocow_trylock(struct bucket_nocow_lock_table *t,
void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *);
void bch2_fs_nocow_locking_exit(struct bch_fs *);
-int bch2_fs_nocow_locking_init(struct bch_fs *);
+void bch2_fs_nocow_locking_init_early(struct bch_fs *);
#endif /* _BCACHEFS_NOCOW_LOCKING_H */
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index af3258814822..b1cf88905b81 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -7,7 +7,9 @@
#include "compress.h"
#include "disk_groups.h"
#include "error.h"
+#include "movinggc.h"
#include "opts.h"
+#include "rebalance.h"
#include "recovery_passes.h"
#include "super-io.h"
#include "util.h"
@@ -19,6 +21,11 @@ const char * const bch2_error_actions[] = {
NULL
};
+const char * const bch2_degraded_actions[] = {
+ BCH_DEGRADED_ACTIONS()
+ NULL
+};
+
const char * const bch2_fsck_fix_opts[] = {
BCH_FIX_ERRORS_OPTS()
NULL
@@ -273,20 +280,20 @@ int bch2_opt_lookup(const char *name)
return -1;
}
-struct synonym {
+struct opt_synonym {
const char *s1, *s2;
};
-static const struct synonym bch_opt_synonyms[] = {
+static const struct opt_synonym bch2_opt_synonyms[] = {
{ "quota", "usrquota" },
};
static int bch2_mount_opt_lookup(const char *name)
{
- const struct synonym *i;
+ const struct opt_synonym *i;
- for (i = bch_opt_synonyms;
- i < bch_opt_synonyms + ARRAY_SIZE(bch_opt_synonyms);
+ for (i = bch2_opt_synonyms;
+ i < bch2_opt_synonyms + ARRAY_SIZE(bch2_opt_synonyms);
i++)
if (!strcmp(name, i->s1))
name = i->s2;
@@ -294,6 +301,30 @@ static int bch2_mount_opt_lookup(const char *name)
return bch2_opt_lookup(name);
}
+struct opt_val_synonym {
+ const char *opt, *v1, *v2;
+};
+
+static const struct opt_val_synonym bch2_opt_val_synonyms[] = {
+ { "degraded", "true", "yes" },
+ { "degraded", "false", "no" },
+ { "degraded", "1", "yes" },
+ { "degraded", "0", "no" },
+};
+
+static const char *bch2_opt_val_synonym_lookup(const char *opt, const char *val)
+{
+ const struct opt_val_synonym *i;
+
+ for (i = bch2_opt_val_synonyms;
+ i < bch2_opt_val_synonyms + ARRAY_SIZE(bch2_opt_val_synonyms);
+ i++)
+ if (!strcmp(opt, i->opt) && !strcmp(val, i->v1))
+ return i->v2;
+
+ return val;
+}
+
int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err)
{
if (v < opt->min) {
@@ -337,21 +368,22 @@ int bch2_opt_parse(struct bch_fs *c,
{
ssize_t ret;
+ if (err)
+ printbuf_indent_add_nextline(err, 2);
+
switch (opt->type) {
case BCH_OPT_BOOL:
- if (val) {
- ret = lookup_constant(bool_names, val, -BCH_ERR_option_not_bool);
- if (ret != -BCH_ERR_option_not_bool) {
- *res = ret;
- } else {
- if (err)
- prt_printf(err, "%s: must be bool", opt->attr.name);
- return ret;
- }
+ if (!val)
+ val = "1";
+
+ ret = lookup_constant(bool_names, val, -BCH_ERR_option_not_bool);
+ if (ret != -BCH_ERR_option_not_bool) {
+ *res = ret;
} else {
- *res = 1;
+ if (err)
+ prt_printf(err, "%s: must be bool", opt->attr.name);
+ return ret;
}
-
break;
case BCH_OPT_UINT:
if (!val) {
@@ -360,9 +392,15 @@ int bch2_opt_parse(struct bch_fs *c,
return -EINVAL;
}
- ret = opt->flags & OPT_HUMAN_READABLE
- ? bch2_strtou64_h(val, res)
- : kstrtou64(val, 10, res);
+ if (*val != '-') {
+ ret = opt->flags & OPT_HUMAN_READABLE
+ ? bch2_strtou64_h(val, res)
+ : kstrtou64(val, 10, res);
+ } else {
+ prt_printf(err, "%s: must be a non-negative number", opt->attr.name);
+ return -BCH_ERR_option_negative;
+ }
+
if (ret < 0) {
if (err)
prt_printf(err, "%s: must be a number",
@@ -480,7 +518,7 @@ void bch2_opts_to_text(struct printbuf *out,
}
}
-int bch2_opt_check_may_set(struct bch_fs *c, struct bch_dev *ca, int id, u64 v)
+int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, enum bch_opt_id id, u64 v)
{
int ret = 0;
@@ -498,15 +536,17 @@ int bch2_opt_check_may_set(struct bch_fs *c, struct bch_dev *ca, int id, u64 v)
if (v)
bch2_check_set_feature(c, BCH_FEATURE_ec);
break;
+ default:
+ break;
}
return ret;
}
-int bch2_opts_check_may_set(struct bch_fs *c)
+int bch2_opts_hooks_pre_set(struct bch_fs *c)
{
for (unsigned i = 0; i < bch2_opts_nr; i++) {
- int ret = bch2_opt_check_may_set(c, NULL, i, bch2_opt_get_by_id(&c->opts, i));
+ int ret = bch2_opt_hook_pre_set(c, NULL, i, bch2_opt_get_by_id(&c->opts, i));
if (ret)
return ret;
}
@@ -514,6 +554,61 @@ int bch2_opts_check_may_set(struct bch_fs *c)
return 0;
}
+void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum,
+ struct bch_opts *new_opts, enum bch_opt_id id)
+{
+ switch (id) {
+ case Opt_foreground_target:
+ if (new_opts->foreground_target &&
+ !new_opts->background_target)
+ bch2_set_rebalance_needs_scan(c, inum);
+ break;
+ case Opt_compression:
+ if (new_opts->compression &&
+ !new_opts->background_compression)
+ bch2_set_rebalance_needs_scan(c, inum);
+ break;
+ case Opt_background_target:
+ if (new_opts->background_target)
+ bch2_set_rebalance_needs_scan(c, inum);
+ break;
+ case Opt_background_compression:
+ if (new_opts->background_compression)
+ bch2_set_rebalance_needs_scan(c, inum);
+ break;
+ case Opt_rebalance_enabled:
+ bch2_rebalance_wakeup(c);
+ break;
+ case Opt_copygc_enabled:
+ bch2_copygc_wakeup(c);
+ break;
+ case Opt_discard:
+ if (!ca) {
+ mutex_lock(&c->sb_lock);
+ for_each_member_device(c, ca) {
+ struct bch_member *m =
+ bch2_members_v2_get_mut(ca->disk_sb.sb, ca->dev_idx);
+ SET_BCH_MEMBER_DISCARD(m, c->opts.discard);
+ }
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ }
+ break;
+ case Opt_version_upgrade:
+ /*
+ * XXX: in the future we'll likely want to do compatible
+ * upgrades at runtime as well, but right now there's nothing
+ * that does that:
+ */
+ if (new_opts->version_upgrade == BCH_VERSION_UPGRADE_incompatible)
+ bch2_sb_upgrade_incompat(c);
+ break;
+ default:
+ break;
+ }
+}
+
int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts,
struct printbuf *parse_later,
const char *name, const char *val)
@@ -536,6 +631,12 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts,
if (id < 0)
return 0;
+ /* must have a value for synonym lookup - but OPT_FN is weird */
+ if (!val && bch2_opt_table[id].type != BCH_OPT_FN)
+ val = "1";
+
+ val = bch2_opt_val_synonym_lookup(name, val);
+
if (!(bch2_opt_table[id].flags & OPT_MOUNT))
goto bad_opt;
@@ -667,9 +768,11 @@ int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb)
return 0;
}
-void __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx,
+bool __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx,
const struct bch_option *opt, u64 v)
{
+ bool changed = false;
+
if (opt->flags & OPT_SB_FIELD_SECTORS)
v >>= 9;
@@ -679,26 +782,35 @@ void __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx,
if (opt->flags & OPT_SB_FIELD_ONE_BIAS)
v++;
- if ((opt->flags & OPT_FS) && opt->set_sb && dev_idx < 0)
+ if ((opt->flags & OPT_FS) && opt->set_sb && dev_idx < 0) {
+ changed = v != opt->get_sb(sb);
+
opt->set_sb(sb, v);
+ }
if ((opt->flags & OPT_DEVICE) && opt->set_member && dev_idx >= 0) {
if (WARN(!bch2_member_exists(sb, dev_idx),
"tried to set device option %s on nonexistent device %i",
opt->attr.name, dev_idx))
- return;
+ return false;
- opt->set_member(bch2_members_v2_get_mut(sb, dev_idx), v);
+ struct bch_member *m = bch2_members_v2_get_mut(sb, dev_idx);
+ changed = v != opt->get_member(m);
+ opt->set_member(m, v);
}
+
+ return changed;
}
-void bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca,
+bool bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca,
const struct bch_option *opt, u64 v)
{
mutex_lock(&c->sb_lock);
- __bch2_opt_set_sb(c->disk_sb.sb, ca ? ca->dev_idx : -1, opt, v);
- bch2_write_super(c);
+ bool changed = __bch2_opt_set_sb(c->disk_sb.sb, ca ? ca->dev_idx : -1, opt, v);
+ if (changed)
+ bch2_write_super(c);
mutex_unlock(&c->sb_lock);
+ return changed;
}
/* io opts: */
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index dfb14810124c..2a02606254b3 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -11,6 +11,7 @@
struct bch_fs;
extern const char * const bch2_error_actions[];
+extern const char * const bch2_degraded_actions[];
extern const char * const bch2_fsck_fix_opts[];
extern const char * const bch2_version_upgrade_opts[];
extern const char * const bch2_sb_features[];
@@ -307,14 +308,9 @@ enum fsck_err_opts {
NULL, "Enable project quotas") \
x(degraded, u8, \
OPT_FS|OPT_MOUNT, \
- OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ OPT_STR(bch2_degraded_actions), \
+ BCH_SB_DEGRADED_ACTION, BCH_DEGRADED_ask, \
NULL, "Allow mounting in degraded mode") \
- x(very_degraded, u8, \
- OPT_FS|OPT_MOUNT, \
- OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
- NULL, "Allow mounting in when data will be missing") \
x(no_splitbrain_check, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
@@ -454,7 +450,7 @@ enum fsck_err_opts {
BCH2_NO_SB_OPT, false, \
NULL, "Reconstruct alloc btree") \
x(version_upgrade, u8, \
- OPT_FS|OPT_MOUNT, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_STR(bch2_version_upgrade_opts), \
BCH_SB_VERSION_UPGRADE, BCH_VERSION_UPGRADE_compatible, \
NULL, "Set superblock to latest version,\n" \
@@ -494,6 +490,17 @@ enum fsck_err_opts {
BCH2_NO_SB_OPT, true, \
NULL, "Enable rebalance: disable for debugging, or to\n"\
"quiet the system when doing performance testing\n")\
+ x(rebalance_on_ac_only, u8, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH_SB_REBALANCE_AC_ONLY, false, \
+ NULL, "Enable rebalance while on mains power only\n") \
+ x(auto_snapshot_deletion, u8, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, true, \
+ NULL, "Enable automatic snapshot deletion: disable for debugging, or to\n"\
+ "quiet the system when doing performance testing\n")\
x(no_data_io, u8, \
OPT_MOUNT, \
OPT_BOOL(), \
@@ -522,7 +529,7 @@ enum fsck_err_opts {
BCH_MEMBER_DATA_ALLOWED, BIT(BCH_DATA_journal)|BIT(BCH_DATA_btree)|BIT(BCH_DATA_user),\
"types", "Allowed data types for this device: journal, btree, and/or user")\
x(discard, u8, \
- OPT_MOUNT|OPT_DEVICE|OPT_RUNTIME, \
+ OPT_MOUNT|OPT_FS|OPT_DEVICE|OPT_RUNTIME, \
OPT_BOOL(), \
BCH_MEMBER_DISCARD, true, \
NULL, "Enable discard/TRIM support") \
@@ -530,7 +537,7 @@ enum fsck_err_opts {
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \
- NULL, "BTREE_ITER_prefetch casuse btree nodes to be\n"\
+ NULL, "BTREE_ITER_prefetch causes btree nodes to be\n"\
" prefetched sequentially")
struct bch_opts {
@@ -616,10 +623,10 @@ void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64);
u64 bch2_opt_from_sb(struct bch_sb *, enum bch_opt_id, int);
int bch2_opts_from_sb(struct bch_opts *, struct bch_sb *);
-void __bch2_opt_set_sb(struct bch_sb *, int, const struct bch_option *, u64);
+bool __bch2_opt_set_sb(struct bch_sb *, int, const struct bch_option *, u64);
struct bch_dev;
-void bch2_opt_set_sb(struct bch_fs *, struct bch_dev *, const struct bch_option *, u64);
+bool bch2_opt_set_sb(struct bch_fs *, struct bch_dev *, const struct bch_option *, u64);
int bch2_opt_lookup(const char *);
int bch2_opt_validate(const struct bch_option *, u64, struct printbuf *);
@@ -636,8 +643,11 @@ void bch2_opts_to_text(struct printbuf *,
struct bch_fs *, struct bch_sb *,
unsigned, unsigned, unsigned);
-int bch2_opt_check_may_set(struct bch_fs *, struct bch_dev *, int, u64);
-int bch2_opts_check_may_set(struct bch_fs *);
+int bch2_opt_hook_pre_set(struct bch_fs *, struct bch_dev *, enum bch_opt_id, u64);
+int bch2_opts_hooks_pre_set(struct bch_fs *);
+void bch2_opt_hook_post_set(struct bch_fs *, struct bch_dev *, u64,
+ struct bch_opts *, enum bch_opt_id);
+
int bch2_parse_one_mount_opt(struct bch_fs *, struct bch_opts *,
struct printbuf *, const char *, const char *);
int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, struct printbuf *,
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 4ccdfc1f34aa..de1ec9e0caa0 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -80,11 +80,13 @@ static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c,
unsigned ptr_bit = 1;
unsigned rewrite_ptrs = 0;
+ rcu_read_lock();
bkey_for_each_ptr(ptrs, ptr) {
if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target))
rewrite_ptrs |= ptr_bit;
ptr_bit <<= 1;
}
+ rcu_read_unlock();
return rewrite_ptrs;
}
@@ -95,6 +97,9 @@ static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c,
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
+ return 0;
+
return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) |
bch2_bkey_ptrs_need_move(c, opts, ptrs);
}
@@ -107,6 +112,9 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
if (!opts)
return 0;
+ if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
+ return 0;
+
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
u64 sectors = 0;
@@ -126,10 +134,14 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
}
}
incompressible:
- if (opts->background_target)
+ if (opts->background_target) {
+ rcu_read_lock();
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target))
+ if (!p.ptr.cached &&
+ !bch2_dev_in_target(c, p.ptr.dev, opts->background_target))
sectors += p.crc.compressed_size;
+ rcu_read_unlock();
+ }
return sectors;
}
@@ -309,7 +321,7 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
- if (!bch2_bkey_rebalance_opts(k))
+ if (k.k->type == KEY_TYPE_reflink_v || !bch2_bkey_rebalance_opts(k))
return 0;
struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0);
@@ -447,22 +459,11 @@ out:
return ret;
}
-static bool rebalance_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts,
- struct data_update_opts *data_opts)
-{
- data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k);
- data_opts->target = io_opts->background_target;
- data_opts->write_flags |= BCH_WRITE_only_specified_devs;
- return data_opts->rewrite_ptrs != 0;
-}
-
static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie)
{
struct btree_trans *trans = ctxt->trans;
+ struct bch_fs *c = trans->c;
struct bch_fs_rebalance *r = &trans->c->rebalance;
- int ret;
bch2_move_stats_init(&r->scan_stats, "rebalance_scan");
ctxt->stats = &r->scan_stats;
@@ -477,11 +478,34 @@ static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie)
r->state = BCH_REBALANCE_scanning;
- ret = __bch2_move_data(ctxt, r->scan_start, r->scan_end, rebalance_pred, NULL) ?:
- commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_clear_rebalance_needs_scan(trans, inum, cookie));
+ struct per_snapshot_io_opts snapshot_io_opts;
+ per_snapshot_io_opts_init(&snapshot_io_opts, c);
+
+ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents,
+ r->scan_start.pos, r->scan_end.pos,
+ BTREE_ITER_all_snapshots|
+ BTREE_ITER_not_extents|
+ BTREE_ITER_prefetch, k, ({
+ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
+ struct bch_io_opts *io_opts = bch2_move_get_io_opts(trans,
+ &snapshot_io_opts, iter.pos, &iter, k);
+ PTR_ERR_OR_ZERO(io_opts);
+ })) ?:
+ commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_clear_rebalance_needs_scan(trans, inum, cookie));
+
+ per_snapshot_io_opts_exit(&snapshot_io_opts);
bch2_move_stats_exit(&r->scan_stats, trans->c);
+
+ /*
+ * Ensure that the rebalance_work entries we created are seen by the
+ * next iteration of do_rebalance(), so we don't end up stuck in
+ * rebalance_wait():
+ */
+ atomic64_inc(&r->scan_stats.sectors_seen);
+ bch2_btree_write_buffer_flush_sync(trans);
+
return ret;
}
@@ -506,6 +530,13 @@ static void rebalance_wait(struct bch_fs *c)
bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT);
}
+static bool bch2_rebalance_enabled(struct bch_fs *c)
+{
+ return c->opts.rebalance_enabled &&
+ !(c->opts.rebalance_on_ac_only &&
+ c->rebalance.on_battery);
+}
+
static int do_rebalance(struct moving_context *ctxt)
{
struct btree_trans *trans = ctxt->trans;
@@ -525,9 +556,9 @@ static int do_rebalance(struct moving_context *ctxt)
BTREE_ITER_all_snapshots);
while (!bch2_move_ratelimit(ctxt)) {
- if (!c->opts.rebalance_enabled) {
+ if (!bch2_rebalance_enabled(c)) {
bch2_moving_ctxt_flush_all(ctxt);
- kthread_wait_freezable(c->opts.rebalance_enabled ||
+ kthread_wait_freezable(bch2_rebalance_enabled(c) ||
kthread_should_stop());
}
@@ -585,7 +616,7 @@ static int bch2_rebalance_thread(void *arg)
* Data move operations can't run until after check_snapshots has
* completed, and bch2_snapshot_is_ancestor() is available.
*/
- kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
+ kthread_wait_freezable(c->recovery.pass_done > BCH_RECOVERY_PASS_check_snapshots ||
kthread_should_stop());
bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
@@ -702,7 +733,156 @@ int bch2_rebalance_start(struct bch_fs *c)
return 0;
}
-void bch2_fs_rebalance_init(struct bch_fs *c)
+#ifdef CONFIG_POWER_SUPPLY
+#include <linux/power_supply.h>
+
+static int bch2_rebalance_power_notifier(struct notifier_block *nb,
+ unsigned long event, void *data)
{
- bch2_pd_controller_init(&c->rebalance.pd);
+ struct bch_fs *c = container_of(nb, struct bch_fs, rebalance.power_notifier);
+
+ c->rebalance.on_battery = !power_supply_is_system_supplied();
+ bch2_rebalance_wakeup(c);
+ return NOTIFY_OK;
+}
+#endif
+
+void bch2_fs_rebalance_exit(struct bch_fs *c)
+{
+#ifdef CONFIG_POWER_SUPPLY
+ power_supply_unreg_notifier(&c->rebalance.power_notifier);
+#endif
+}
+
+int bch2_fs_rebalance_init(struct bch_fs *c)
+{
+ struct bch_fs_rebalance *r = &c->rebalance;
+
+ bch2_pd_controller_init(&r->pd);
+
+#ifdef CONFIG_POWER_SUPPLY
+ r->power_notifier.notifier_call = bch2_rebalance_power_notifier;
+ int ret = power_supply_reg_notifier(&r->power_notifier);
+ if (ret)
+ return ret;
+
+ r->on_battery = !power_supply_is_system_supplied();
+#endif
+ return 0;
+}
+
+static int check_rebalance_work_one(struct btree_trans *trans,
+ struct btree_iter *extent_iter,
+ struct btree_iter *rebalance_iter,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c extent_k, rebalance_k;
+ struct printbuf buf = PRINTBUF;
+
+ int ret = bkey_err(extent_k = bch2_btree_iter_peek(trans, extent_iter)) ?:
+ bkey_err(rebalance_k = bch2_btree_iter_peek(trans, rebalance_iter));
+ if (ret)
+ return ret;
+
+ if (!extent_k.k &&
+ extent_iter->btree_id == BTREE_ID_reflink &&
+ (!rebalance_k.k ||
+ rebalance_k.k->p.inode >= BCACHEFS_ROOT_INO)) {
+ bch2_trans_iter_exit(trans, extent_iter);
+ bch2_trans_iter_init(trans, extent_iter,
+ BTREE_ID_extents, POS_MIN,
+ BTREE_ITER_prefetch|
+ BTREE_ITER_all_snapshots);
+ return -BCH_ERR_transaction_restart_nested;
+ }
+
+ if (!extent_k.k && !rebalance_k.k)
+ return 1;
+
+ int cmp = bpos_cmp(extent_k.k ? extent_k.k->p : SPOS_MAX,
+ rebalance_k.k ? rebalance_k.k->p : SPOS_MAX);
+
+ struct bkey deleted;
+ bkey_init(&deleted);
+
+ if (cmp < 0) {
+ deleted.p = extent_k.k->p;
+ rebalance_k.k = &deleted;
+ } else if (cmp > 0) {
+ deleted.p = rebalance_k.k->p;
+ extent_k.k = &deleted;
+ }
+
+ bool should_have_rebalance =
+ bch2_bkey_sectors_need_rebalance(c, extent_k) != 0;
+ bool have_rebalance = rebalance_k.k->type == KEY_TYPE_set;
+
+ if (should_have_rebalance != have_rebalance) {
+ ret = bch2_btree_write_buffer_maybe_flush(trans, extent_k, last_flushed);
+ if (ret)
+ return ret;
+
+ bch2_bkey_val_to_text(&buf, c, extent_k);
+ }
+
+ if (fsck_err_on(!should_have_rebalance && have_rebalance,
+ trans, rebalance_work_incorrectly_set,
+ "rebalance work incorrectly set\n%s", buf.buf)) {
+ ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
+ extent_k.k->p, false);
+ if (ret)
+ goto err;
+ }
+
+ if (fsck_err_on(should_have_rebalance && !have_rebalance,
+ trans, rebalance_work_incorrectly_unset,
+ "rebalance work incorrectly unset\n%s", buf.buf)) {
+ ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
+ extent_k.k->p, true);
+ if (ret)
+ goto err;
+ }
+
+ if (cmp <= 0)
+ bch2_btree_iter_advance(trans, extent_iter);
+ if (cmp >= 0)
+ bch2_btree_iter_advance(trans, rebalance_iter);
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+int bch2_check_rebalance_work(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter rebalance_iter, extent_iter;
+ int ret = 0;
+
+ bch2_trans_iter_init(trans, &extent_iter,
+ BTREE_ID_reflink, POS_MIN,
+ BTREE_ITER_prefetch);
+ bch2_trans_iter_init(trans, &rebalance_iter,
+ BTREE_ID_rebalance_work, POS_MIN,
+ BTREE_ITER_prefetch);
+
+ struct bkey_buf last_flushed;
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ while (!ret) {
+ bch2_trans_begin(trans);
+
+ ret = check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+ }
+
+ bch2_bkey_buf_exit(&last_flushed, c);
+ bch2_trans_iter_exit(trans, &extent_iter);
+ bch2_trans_iter_exit(trans, &rebalance_iter);
+ bch2_trans_put(trans);
+ return ret < 0 ? ret : 0;
}
diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
index e5e8eb4a2dd1..5d9214fe1a22 100644
--- a/fs/bcachefs/rebalance.h
+++ b/fs/bcachefs/rebalance.h
@@ -52,6 +52,10 @@ void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *);
void bch2_rebalance_stop(struct bch_fs *);
int bch2_rebalance_start(struct bch_fs *);
-void bch2_fs_rebalance_init(struct bch_fs *);
+
+void bch2_fs_rebalance_exit(struct bch_fs *);
+int bch2_fs_rebalance_init(struct bch_fs *);
+
+int bch2_check_rebalance_work(struct bch_fs *);
#endif /* _BCACHEFS_REBALANCE_H */
diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h
index fe5098c17dfc..33d77286f1d5 100644
--- a/fs/bcachefs/rebalance_types.h
+++ b/fs/bcachefs/rebalance_types.h
@@ -30,6 +30,11 @@ struct bch_fs_rebalance {
struct bbpos scan_start;
struct bbpos scan_end;
struct bch_move_stats scan_stats;
+
+ bool on_battery;
+#ifdef CONFIG_POWER_SUPPLY
+ struct notifier_block power_notifier;
+#endif
};
#endif /* _BCACHEFS_REBALANCE_TYPES_H */
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index d6c4ef819d40..4fca57575565 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -33,8 +33,9 @@
#include <linux/sort.h>
#include <linux/stat.h>
-
-int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
+int bch2_btree_lost_data(struct bch_fs *c,
+ struct printbuf *msg,
+ enum btree_id btree)
{
u64 b = BIT_ULL(btree);
int ret = 0;
@@ -43,32 +44,32 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (!(c->sb.btrees_lost_data & b)) {
- struct printbuf buf = PRINTBUF;
- bch2_btree_id_to_text(&buf, btree);
- bch_err(c, "flagging btree %s lost data", buf.buf);
- printbuf_exit(&buf);
+ prt_printf(msg, "flagging btree ");
+ bch2_btree_id_to_text(msg, btree);
+ prt_printf(msg, " lost data\n");
+
ext->btrees_lost_data |= cpu_to_le64(b);
}
/* Once we have runtime self healing for topology errors we won't need this: */
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret;
/* Btree node accounting will be off: */
__set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret;
#ifdef CONFIG_BCACHEFS_DEBUG
/*
* These are much more minor, and don't need to be corrected right away,
* but in debug mode we want the next fsck run to be clean:
*/
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret;
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0) ?: ret;
#endif
switch (btree) {
case BTREE_ID_alloc:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
__set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
__set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
@@ -78,26 +79,30 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
__set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
goto out;
case BTREE_ID_backpointers:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers, 0) ?: ret;
goto out;
case BTREE_ID_need_discard:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
goto out;
case BTREE_ID_freespace:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
goto out;
case BTREE_ID_bucket_gens:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
goto out;
case BTREE_ID_lru:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
goto out;
case BTREE_ID_accounting:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret;
+ goto out;
+ case BTREE_ID_snapshots:
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret;
goto out;
default:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret;
goto out;
}
out:
@@ -114,11 +119,8 @@ static void kill_btree(struct bch_fs *c, enum btree_id btree)
}
/* for -o reconstruct_alloc: */
-static void bch2_reconstruct_alloc(struct bch_fs *c)
+void bch2_reconstruct_alloc(struct bch_fs *c)
{
- bch2_journal_log_msg(c, "dropping alloc info");
- bch_info(c, "dropping and reconstructing all alloc info");
-
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
@@ -160,6 +162,8 @@ static void bch2_reconstruct_alloc(struct bch_fs *c)
c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+ c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_no_alloc_info));
+
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
@@ -282,7 +286,12 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
goto out;
if (k->k->k.type == KEY_TYPE_accounting) {
- ret = bch2_trans_update_buffered(trans, BTREE_ID_accounting, k->k);
+ struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, k->k->k.u64s);
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ goto out;
+
+ bkey_copy(n, k->k);
goto out;
}
@@ -430,7 +439,7 @@ int bch2_journal_replay(struct bch_fs *c)
trans = NULL;
if (!c->opts.retain_recovery_info &&
- c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
+ c->recovery.pass_done >= BCH_RECOVERY_PASS_journal_replay)
bch2_journal_keys_put_initial(c);
replay_now_at(j, j->replay_journal_seq_end);
@@ -585,9 +594,6 @@ static int read_btree_roots(struct bch_fs *c)
buf.buf, bch2_err_str(ret))) {
if (btree_id_is_alloc(i))
r->error = 0;
-
- ret = bch2_btree_lost_data(c, i);
- BUG_ON(ret);
}
}
@@ -667,7 +673,7 @@ static bool check_version_upgrade(struct bch_fs *c)
bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
}
- bch_info(c, "%s", buf.buf);
+ bch_notice(c, "%s", buf.buf);
printbuf_exit(&buf);
ret = true;
@@ -683,7 +689,7 @@ static bool check_version_upgrade(struct bch_fs *c)
bch2_version_to_text(&buf, c->sb.version_incompat_allowed);
prt_newline(&buf);
- bch_info(c, "%s", buf.buf);
+ bch_notice(c, "%s", buf.buf);
printbuf_exit(&buf);
ret = true;
@@ -790,11 +796,11 @@ int bch2_fs_recovery(struct bch_fs *c)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
- if (c->opts.fsck)
- set_bit(BCH_FS_fsck_running, &c->flags);
if (c->sb.clean)
set_bit(BCH_FS_clean_recovery, &c->flags);
- set_bit(BCH_FS_recovery_running, &c->flags);
+ if (c->opts.fsck)
+ set_bit(BCH_FS_in_fsck, &c->flags);
+ set_bit(BCH_FS_in_recovery, &c->flags);
ret = bch2_blacklist_table_initialize(c);
if (ret) {
@@ -889,8 +895,37 @@ use_clean:
if (ret)
goto err;
- if (c->opts.reconstruct_alloc)
+ ret = bch2_fs_resize_on_mount(c);
+ if (ret) {
+ up_write(&c->state_lock);
+ goto err;
+ }
+
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
+ bch_info(c, "filesystem is an unresized image file, mounting ro");
+ c->opts.read_only = true;
+ }
+
+ if (!c->opts.read_only &&
+ (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))) {
+ bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate");
+
bch2_reconstruct_alloc(c);
+ } else if (c->opts.reconstruct_alloc) {
+ bch2_journal_log_msg(c, "dropping alloc info");
+ bch_info(c, "dropping and reconstructing all alloc info");
+
+ bch2_reconstruct_alloc(c);
+ }
+
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) {
+ /* We can't go RW to fix errors without alloc info */
+ if (c->opts.fix_errors == FSCK_FIX_yes ||
+ c->opts.fix_errors == FSCK_FIX_ask)
+ c->opts.fix_errors = FSCK_FIX_no;
+ if (c->opts.errors == BCH_ON_ERROR_fix_safe)
+ c->opts.errors = BCH_ON_ERROR_continue;
+ }
/*
* After an unclean shutdown, skip then next few journal sequence
@@ -933,8 +968,10 @@ use_clean:
set_bit(BCH_FS_btree_running, &c->flags);
ret = bch2_sb_set_upgrade_extra(c);
+ if (ret)
+ goto err;
- ret = bch2_run_recovery_passes(c);
+ ret = bch2_run_recovery_passes(c, 0);
if (ret)
goto err;
@@ -945,8 +982,7 @@ use_clean:
* multithreaded use:
*/
set_bit(BCH_FS_may_go_rw, &c->flags);
- clear_bit(BCH_FS_fsck_running, &c->flags);
- clear_bit(BCH_FS_recovery_running, &c->flags);
+ clear_bit(BCH_FS_in_fsck, &c->flags);
/* in case we don't run journal replay, i.e. norecovery mode */
set_bit(BCH_FS_accounting_replay_done, &c->flags);
@@ -969,9 +1005,8 @@ use_clean:
bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean");
clear_bit(BCH_FS_errors_fixed, &c->flags);
- c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
-
- ret = bch2_run_recovery_passes(c);
+ ret = bch2_run_recovery_passes(c,
+ BCH_RECOVERY_PASS_check_alloc_info);
if (ret)
goto err;
@@ -1015,7 +1050,7 @@ use_clean:
if (c->opts.fsck &&
!test_bit(BCH_FS_error, &c->flags) &&
- c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 &&
+ c->recovery.pass_done == BCH_RECOVERY_PASS_NR - 1 &&
ext->btrees_lost_data) {
ext->btrees_lost_data = 0;
write_sb = true;
@@ -1076,8 +1111,17 @@ out:
return ret;
err:
fsck_err:
- bch2_fs_emergency_read_only(c);
- goto out;
+ {
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "error in recovery: %s", bch2_err_str(ret));
+ bch2_fs_emergency_read_only2(c, &buf);
+
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ }
+ return ret;
}
int bch2_fs_initialize(struct bch_fs *c)
@@ -1193,7 +1237,7 @@ int bch2_fs_initialize(struct bch_fs *c)
if (ret)
goto err;
- c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
+ c->recovery.pass_done = BCH_RECOVERY_PASS_NR - 1;
bch2_copygc_wakeup(c);
bch2_rebalance_wakeup(c);
@@ -1216,7 +1260,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
- c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
+ c->recovery.curr_pass = BCH_RECOVERY_PASS_NR;
return 0;
err:
bch_err_fn(c, ret);
diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h
index b0d55754b21b..c023f52fc2d6 100644
--- a/fs/bcachefs/recovery.h
+++ b/fs/bcachefs/recovery.h
@@ -2,7 +2,8 @@
#ifndef _BCACHEFS_RECOVERY_H
#define _BCACHEFS_RECOVERY_H
-int bch2_btree_lost_data(struct bch_fs *, enum btree_id);
+int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id);
+void bch2_reconstruct_alloc(struct bch_fs *);
int bch2_journal_replay(struct bch_fs *);
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index 22f72bb5b853..dabb29b08ad0 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -28,6 +28,145 @@ const char * const bch2_recovery_passes[] = {
NULL
};
+static const u8 passes_to_stable_map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+static const u8 passes_from_stable_map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
+{
+ return passes_to_stable_map[pass];
+}
+
+u64 bch2_recovery_passes_to_stable(u64 v)
+{
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(passes_to_stable_map[i]);
+ return ret;
+}
+
+static enum bch_recovery_pass bch2_recovery_pass_from_stable(enum bch_recovery_pass_stable pass)
+{
+ return pass < ARRAY_SIZE(passes_from_stable_map)
+ ? passes_from_stable_map[pass]
+ : 0;
+}
+
+u64 bch2_recovery_passes_from_stable(u64 v)
+{
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(passes_from_stable_map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(passes_from_stable_map[i]);
+ return ret;
+}
+
+static int bch2_sb_recovery_passes_validate(struct bch_sb *sb, struct bch_sb_field *f,
+ enum bch_validate_flags flags, struct printbuf *err)
+{
+ return 0;
+}
+
+static void bch2_sb_recovery_passes_to_text(struct printbuf *out,
+ struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_recovery_passes *r =
+ field_to_type(f, recovery_passes);
+ unsigned nr = recovery_passes_nr_entries(r);
+
+ if (out->nr_tabstops < 1)
+ printbuf_tabstop_push(out, 32);
+ if (out->nr_tabstops < 2)
+ printbuf_tabstop_push(out, 16);
+
+ prt_printf(out, "Pass\tLast run\tLast runtime\n");
+
+ for (struct recovery_pass_entry *i = r->start; i < r->start + nr; i++) {
+ if (!i->last_run)
+ continue;
+
+ unsigned idx = i - r->start;
+
+ prt_printf(out, "%s\t", bch2_recovery_passes[bch2_recovery_pass_from_stable(idx)]);
+
+ bch2_prt_datetime(out, le64_to_cpu(i->last_run));
+ prt_tab(out);
+
+ bch2_pr_time_units(out, le32_to_cpu(i->last_runtime) * NSEC_PER_SEC);
+ prt_newline(out);
+ }
+}
+
+static void bch2_sb_recovery_pass_complete(struct bch_fs *c,
+ enum bch_recovery_pass pass,
+ s64 start_time)
+{
+ enum bch_recovery_pass_stable stable = bch2_recovery_pass_to_stable(pass);
+ s64 end_time = ktime_get_real_seconds();
+
+ mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ __clear_bit_le64(stable, ext->recovery_passes_required);
+
+ struct bch_sb_field_recovery_passes *r =
+ bch2_sb_field_get(c->disk_sb.sb, recovery_passes);
+
+ if (stable >= recovery_passes_nr_entries(r)) {
+ unsigned u64s = struct_size(r, start, stable + 1) / sizeof(u64);
+
+ r = bch2_sb_field_resize(&c->disk_sb, recovery_passes, u64s);
+ if (!r) {
+ bch_err(c, "error creating recovery_passes sb section");
+ goto out;
+ }
+ }
+
+ r->start[stable].last_run = cpu_to_le64(end_time);
+ r->start[stable].last_runtime = cpu_to_le32(max(0, end_time - start_time));
+out:
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
+
+static bool bch2_recovery_pass_want_ratelimit(struct bch_fs *c, enum bch_recovery_pass pass)
+{
+ enum bch_recovery_pass_stable stable = bch2_recovery_pass_to_stable(pass);
+ bool ret = false;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ struct bch_sb_field_recovery_passes *r =
+ bch2_sb_field_get(c->disk_sb.sb, recovery_passes);
+
+ if (stable < recovery_passes_nr_entries(r)) {
+ struct recovery_pass_entry *i = r->start + stable;
+
+ /*
+ * Ratelimit if the last runtime was more than 1% of the time
+ * since we last ran
+ */
+ ret = (u64) le32_to_cpu(i->last_runtime) * 100 >
+ ktime_get_real_seconds() - le64_to_cpu(i->last_run);
+ }
+
+ return ret;
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_recovery_passes = {
+ .validate = bch2_sb_recovery_passes_validate,
+ .to_text = bch2_sb_recovery_passes_to_text
+};
+
/* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */
static int bch2_recovery_pass_empty(struct bch_fs *c)
{
@@ -47,11 +186,36 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
set_bit(BCH_FS_may_go_rw, &c->flags);
- if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes)
+ if (keys->nr ||
+ !c->opts.read_only ||
+ !c->sb.clean ||
+ c->opts.recovery_passes ||
+ (c->opts.fsck && !(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))) {
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) {
+ bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate");
+ bch2_reconstruct_alloc(c);
+ }
+
return bch2_fs_read_write_early(c);
+ }
return 0;
}
+/*
+ * Make sure root inode is readable while we're still in recovery and can rewind
+ * for repair:
+ */
+static int bch2_lookup_root_inode(struct bch_fs *c)
+{
+ subvol_inum inum = BCACHEFS_ROOT_SUBVOL_INUM;
+ struct bch_inode_unpacked inode_u;
+ struct bch_subvolume subvol;
+
+ return bch2_trans_do(c,
+ bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
+ bch2_inode_find_by_inum_trans(trans, inum, &inode_u));
+}
+
struct recovery_pass_fn {
int (*fn)(struct bch_fs *);
unsigned when;
@@ -63,252 +227,351 @@ static struct recovery_pass_fn recovery_pass_fns[] = {
#undef x
};
-static const u8 passes_to_stable_map[] = {
-#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
- BCH_RECOVERY_PASSES()
-#undef x
-};
+static u64 bch2_recovery_passes_match(unsigned flags)
+{
+ u64 ret = 0;
-static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
+ if (recovery_pass_fns[i].when & flags)
+ ret |= BIT_ULL(i);
+ return ret;
+}
+
+u64 bch2_fsck_recovery_passes(void)
{
- return passes_to_stable_map[pass];
+ return bch2_recovery_passes_match(PASS_FSCK);
}
-u64 bch2_recovery_passes_to_stable(u64 v)
+static void bch2_run_async_recovery_passes(struct bch_fs *c)
{
- u64 ret = 0;
- for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
- if (v & BIT_ULL(i))
- ret |= BIT_ULL(passes_to_stable_map[i]);
- return ret;
+ if (!down_trylock(&c->recovery.run_lock))
+ return;
+
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_async_recovery_passes))
+ goto unlock;
+
+ if (queue_work(system_long_wq, &c->recovery.work))
+ return;
+
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_async_recovery_passes);
+unlock:
+ up(&c->recovery.run_lock);
}
-u64 bch2_recovery_passes_from_stable(u64 v)
+static bool recovery_pass_needs_set(struct bch_fs *c,
+ enum bch_recovery_pass pass,
+ enum bch_run_recovery_pass_flags *flags)
{
- static const u8 map[] = {
-#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
- BCH_RECOVERY_PASSES()
-#undef x
- };
+ struct bch_fs_recovery *r = &c->recovery;
+ bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
+ bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent);
- u64 ret = 0;
- for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
- if (v & BIT_ULL(i))
- ret |= BIT_ULL(map[i]);
- return ret;
+ if ((*flags & RUN_RECOVERY_PASS_ratelimit) &&
+ !bch2_recovery_pass_want_ratelimit(c, pass))
+ *flags &= ~RUN_RECOVERY_PASS_ratelimit;
+
+ /*
+ * If RUN_RECOVERY_PASS_nopersistent is set, we don't want to do
+ * anything if the pass has already run: these mean we need a prior pass
+ * to run before we continue to repair, we don't expect that pass to fix
+ * the damage we encountered.
+ *
+ * Otherwise, we run run_explicit_recovery_pass when we find damage, so
+ * it should run again even if it's already run:
+ */
+
+ if (persistent
+ ? !(c->sb.recovery_passes_required & BIT_ULL(pass))
+ : !((r->passes_to_run|r->passes_complete) & BIT_ULL(pass)))
+ return true;
+
+ if (!(*flags & RUN_RECOVERY_PASS_ratelimit) &&
+ (r->passes_ratelimiting & BIT_ULL(pass)))
+ return true;
+
+ return false;
}
/*
* For when we need to rewind recovery passes and run a pass we skipped:
*/
-static int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
- enum bch_recovery_pass pass)
+int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
+ struct printbuf *out,
+ enum bch_recovery_pass pass,
+ enum bch_run_recovery_pass_flags flags)
{
- if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns))
- return -BCH_ERR_not_in_recovery;
+ struct bch_fs_recovery *r = &c->recovery;
+ int ret = 0;
- if (c->recovery_passes_complete & BIT_ULL(pass))
- return 0;
+ lockdep_assert_held(&c->sb_lock);
- bool print = !(c->opts.recovery_passes & BIT_ULL(pass));
+ bch2_printbuf_make_room(out, 1024);
+ out->atomic++;
- if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
- c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) {
- if (print)
- bch_info(c, "need recovery pass %s (%u), but already rw",
- bch2_recovery_passes[pass], pass);
- return -BCH_ERR_cannot_rewind_recovery;
- }
+ unsigned long lockflags;
+ spin_lock_irqsave(&r->lock, lockflags);
- if (print)
- bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
- bch2_recovery_passes[pass], pass,
- bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
+ if (!recovery_pass_needs_set(c, pass, &flags))
+ goto out;
- c->opts.recovery_passes |= BIT_ULL(pass);
+ bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
+ bool rewind = in_recovery && r->curr_pass > pass;
+ bool ratelimit = flags & RUN_RECOVERY_PASS_ratelimit;
- if (c->curr_recovery_pass > pass) {
- c->next_recovery_pass = pass;
- c->recovery_passes_complete &= (1ULL << pass) >> 1;
- return -BCH_ERR_restart_recovery;
- } else {
- return 0;
+ if (!(in_recovery && (flags & RUN_RECOVERY_PASS_nopersistent))) {
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
}
-}
-
-int bch2_run_explicit_recovery_pass(struct bch_fs *c,
- enum bch_recovery_pass pass)
-{
- unsigned long flags;
- spin_lock_irqsave(&c->recovery_pass_lock, flags);
- int ret = __bch2_run_explicit_recovery_pass(c, pass);
- spin_unlock_irqrestore(&c->recovery_pass_lock, flags);
- return ret;
-}
-int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c,
- enum bch_recovery_pass pass)
-{
- lockdep_assert_held(&c->sb_lock);
+ if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
+ (!in_recovery || r->curr_pass >= BCH_RECOVERY_PASS_set_may_go_rw)) {
+ prt_printf(out, "need recovery pass %s (%u), but already rw\n",
+ bch2_recovery_passes[pass], pass);
+ ret = -BCH_ERR_cannot_rewind_recovery;
+ goto out;
+ }
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
- __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
+ if (ratelimit)
+ r->passes_ratelimiting |= BIT_ULL(pass);
+ else
+ r->passes_ratelimiting &= ~BIT_ULL(pass);
- return bch2_run_explicit_recovery_pass(c, pass);
-}
+ if (in_recovery && !ratelimit) {
+ prt_printf(out, "running recovery pass %s (%u), currently at %s (%u)%s\n",
+ bch2_recovery_passes[pass], pass,
+ bch2_recovery_passes[r->curr_pass], r->curr_pass,
+ rewind ? " - rewinding" : "");
-int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
- enum bch_recovery_pass pass)
-{
- enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+ r->passes_to_run |= BIT_ULL(pass);
- mutex_lock(&c->sb_lock);
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ if (rewind) {
+ r->next_pass = pass;
+ r->passes_complete &= (1ULL << pass) >> 1;
+ ret = -BCH_ERR_restart_recovery;
+ }
+ } else {
+ prt_printf(out, "scheduling recovery pass %s (%u)%s\n",
+ bch2_recovery_passes[pass], pass,
+ ratelimit ? " - ratelimiting" : "");
- if (!test_bit_le64(s, ext->recovery_passes_required)) {
- __set_bit_le64(s, ext->recovery_passes_required);
- bch2_write_super(c);
+ struct recovery_pass_fn *p = recovery_pass_fns + pass;
+ if (p->when & PASS_ONLINE)
+ bch2_run_async_recovery_passes(c);
}
- mutex_unlock(&c->sb_lock);
-
- return bch2_run_explicit_recovery_pass(c, pass);
+out:
+ spin_unlock_irqrestore(&r->lock, lockflags);
+ --out->atomic;
+ return ret;
}
-static void bch2_clear_recovery_pass_required(struct bch_fs *c,
- enum bch_recovery_pass pass)
+int bch2_run_explicit_recovery_pass(struct bch_fs *c,
+ struct printbuf *out,
+ enum bch_recovery_pass pass,
+ enum bch_run_recovery_pass_flags flags)
{
- enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+ int ret = 0;
- mutex_lock(&c->sb_lock);
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ scoped_guard(mutex, &c->sb_lock) {
+ if (!recovery_pass_needs_set(c, pass, &flags))
+ return 0;
- if (test_bit_le64(s, ext->recovery_passes_required)) {
- __clear_bit_le64(s, ext->recovery_passes_required);
+ ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags);
bch2_write_super(c);
}
- mutex_unlock(&c->sb_lock);
-}
-u64 bch2_fsck_recovery_passes(void)
-{
- u64 ret = 0;
-
- for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
- if (recovery_pass_fns[i].when & PASS_FSCK)
- ret |= BIT_ULL(i);
return ret;
}
-static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
+int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
{
- struct recovery_pass_fn *p = recovery_pass_fns + pass;
+ enum bch_run_recovery_pass_flags flags = RUN_RECOVERY_PASS_nopersistent;
- if (c->opts.recovery_passes_exclude & BIT_ULL(pass))
- return false;
- if (c->opts.recovery_passes & BIT_ULL(pass))
- return true;
- if ((p->when & PASS_FSCK) && c->opts.fsck)
- return true;
- if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
- return true;
- if (p->when & PASS_ALWAYS)
- return true;
- return false;
+ if (!recovery_pass_needs_set(c, pass, &flags))
+ return 0;
+
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ mutex_lock(&c->sb_lock);
+ int ret = __bch2_run_explicit_recovery_pass(c, &buf, pass,
+ RUN_RECOVERY_PASS_nopersistent);
+ mutex_unlock(&c->sb_lock);
+
+ bch2_print_str(c, KERN_NOTICE, buf.buf);
+ printbuf_exit(&buf);
+ return ret;
}
static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
{
+ struct bch_fs_recovery *r = &c->recovery;
struct recovery_pass_fn *p = recovery_pass_fns + pass;
- int ret;
if (!(p->when & PASS_SILENT))
bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
bch2_recovery_passes[pass]);
- ret = p->fn(c);
- if (ret)
+
+ s64 start_time = ktime_get_real_seconds();
+ int ret = p->fn(c);
+
+ r->passes_to_run &= ~BIT_ULL(pass);
+
+ if (ret) {
+ r->passes_failing |= BIT_ULL(pass);
return ret;
+ }
+
+ r->passes_failing = 0;
+
+ if (!test_bit(BCH_FS_error, &c->flags))
+ bch2_sb_recovery_pass_complete(c, pass, start_time);
+
if (!(p->when & PASS_SILENT))
bch2_print(c, KERN_CONT " done\n");
return 0;
}
-int bch2_run_online_recovery_passes(struct bch_fs *c)
+static int __bch2_run_recovery_passes(struct bch_fs *c, u64 orig_passes_to_run,
+ bool online)
{
- for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
- struct recovery_pass_fn *p = recovery_pass_fns + i;
-
- if (!(p->when & PASS_ONLINE))
- continue;
+ struct bch_fs_recovery *r = &c->recovery;
+ int ret = 0;
- int ret = bch2_run_recovery_pass(c, i);
- if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
- i = c->curr_recovery_pass;
- continue;
- }
- if (ret)
- return ret;
- }
+ spin_lock_irq(&r->lock);
- return 0;
-}
+ if (online)
+ orig_passes_to_run &= bch2_recovery_passes_match(PASS_ONLINE);
-int bch2_run_recovery_passes(struct bch_fs *c)
-{
- int ret = 0;
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))
+ orig_passes_to_run &= ~bch2_recovery_passes_match(PASS_ALLOC);
/*
- * We can't allow set_may_go_rw to be excluded; that would cause us to
- * use the journal replay keys for updates where it's not expected.
+ * A failed recovery pass will be retried after another pass succeeds -
+ * but not this iteration.
+ *
+ * This is because some passes depend on repair done by other passes: we
+ * may want to retry, but we don't want to loop on failing passes.
*/
- c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
- spin_lock_irq(&c->recovery_pass_lock);
+ orig_passes_to_run &= ~r->passes_failing;
- while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
- unsigned prev_done = c->recovery_pass_done;
- unsigned pass = c->curr_recovery_pass;
+ r->passes_to_run = orig_passes_to_run;
- c->next_recovery_pass = pass + 1;
+ while (r->passes_to_run) {
+ unsigned prev_done = r->pass_done;
+ unsigned pass = __ffs64(r->passes_to_run);
+ r->curr_pass = pass;
+ r->next_pass = r->curr_pass + 1;
+ r->passes_to_run &= ~BIT_ULL(pass);
- if (c->opts.recovery_pass_last &&
- c->curr_recovery_pass > c->opts.recovery_pass_last)
- break;
+ spin_unlock_irq(&r->lock);
- if (should_run_recovery_pass(c, pass)) {
- spin_unlock_irq(&c->recovery_pass_lock);
- ret = bch2_run_recovery_pass(c, pass) ?:
- bch2_journal_flush(&c->journal);
-
- if (!ret && !test_bit(BCH_FS_error, &c->flags))
- bch2_clear_recovery_pass_required(c, pass);
- spin_lock_irq(&c->recovery_pass_lock);
-
- if (c->next_recovery_pass < c->curr_recovery_pass) {
- /*
- * bch2_run_explicit_recovery_pass() was called: we
- * can't always catch -BCH_ERR_restart_recovery because
- * it may have been called from another thread (btree
- * node read completion)
- */
- ret = 0;
- c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
- } else {
- c->recovery_passes_complete |= BIT_ULL(pass);
- c->recovery_pass_done = max(c->recovery_pass_done, pass);
- }
+ int ret2 = bch2_run_recovery_pass(c, pass) ?:
+ bch2_journal_flush(&c->journal);
+
+ spin_lock_irq(&r->lock);
+
+ if (r->next_pass < r->curr_pass) {
+ /* Rewind: */
+ r->passes_to_run |= orig_passes_to_run & (~0ULL << r->next_pass);
+ } else if (!ret2) {
+ r->pass_done = max(r->pass_done, pass);
+ r->passes_complete |= BIT_ULL(pass);
+ } else {
+ ret = ret2;
}
- c->curr_recovery_pass = c->next_recovery_pass;
+ if (ret && !online)
+ break;
if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
- c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) {
+ r->pass_done > BCH_RECOVERY_PASS_check_snapshots) {
bch2_copygc_wakeup(c);
bch2_rebalance_wakeup(c);
}
}
- spin_unlock_irq(&c->recovery_pass_lock);
+ clear_bit(BCH_FS_in_recovery, &c->flags);
+ spin_unlock_irq(&r->lock);
return ret;
}
+
+static void bch2_async_recovery_passes_work(struct work_struct *work)
+{
+ struct bch_fs *c = container_of(work, struct bch_fs, recovery.work);
+ struct bch_fs_recovery *r = &c->recovery;
+
+ __bch2_run_recovery_passes(c,
+ c->sb.recovery_passes_required & ~r->passes_ratelimiting,
+ true);
+
+ up(&r->run_lock);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_async_recovery_passes);
+}
+
+int bch2_run_online_recovery_passes(struct bch_fs *c, u64 passes)
+{
+ return __bch2_run_recovery_passes(c, c->sb.recovery_passes_required|passes, true);
+}
+
+int bch2_run_recovery_passes(struct bch_fs *c, enum bch_recovery_pass from)
+{
+ u64 passes =
+ bch2_recovery_passes_match(PASS_ALWAYS) |
+ (!c->sb.clean ? bch2_recovery_passes_match(PASS_UNCLEAN) : 0) |
+ (c->opts.fsck ? bch2_recovery_passes_match(PASS_FSCK) : 0) |
+ c->opts.recovery_passes |
+ c->sb.recovery_passes_required;
+
+ if (c->opts.recovery_pass_last)
+ passes &= BIT_ULL(c->opts.recovery_pass_last + 1) - 1;
+
+ /*
+ * We can't allow set_may_go_rw to be excluded; that would cause us to
+ * use the journal replay keys for updates where it's not expected.
+ */
+ c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
+ passes &= ~c->opts.recovery_passes_exclude;
+
+ passes &= ~(BIT_ULL(from) - 1);
+
+ down(&c->recovery.run_lock);
+ int ret = __bch2_run_recovery_passes(c, passes, false);
+ up(&c->recovery.run_lock);
+
+ return ret;
+}
+
+static void prt_passes(struct printbuf *out, const char *msg, u64 passes)
+{
+ prt_printf(out, "%s:\t", msg);
+ prt_bitflags(out, bch2_recovery_passes, passes);
+ prt_newline(out);
+}
+
+void bch2_recovery_pass_status_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ struct bch_fs_recovery *r = &c->recovery;
+
+ printbuf_tabstop_push(out, 32);
+ prt_passes(out, "Scheduled passes", c->sb.recovery_passes_required);
+ prt_passes(out, "Scheduled online passes", c->sb.recovery_passes_required &
+ bch2_recovery_passes_match(PASS_ONLINE));
+ prt_passes(out, "Complete passes", r->passes_complete);
+ prt_passes(out, "Failing passes", r->passes_failing);
+
+ if (r->curr_pass) {
+ prt_printf(out, "Current pass:\t%s\n", bch2_recovery_passes[r->curr_pass]);
+ prt_passes(out, "Current passes", r->passes_to_run);
+ }
+}
+
+void bch2_fs_recovery_passes_init(struct bch_fs *c)
+{
+ spin_lock_init(&c->recovery.lock);
+ sema_init(&c->recovery.run_lock, 1);
+
+ INIT_WORK(&c->recovery.work, bch2_async_recovery_passes_work);
+}
diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h
index 7d7339c8fa29..dc0d2014ff9b 100644
--- a/fs/bcachefs/recovery_passes.h
+++ b/fs/bcachefs/recovery_passes.h
@@ -3,16 +3,32 @@
extern const char * const bch2_recovery_passes[];
+extern const struct bch_sb_field_ops bch_sb_field_ops_recovery_passes;
+
u64 bch2_recovery_passes_to_stable(u64 v);
u64 bch2_recovery_passes_from_stable(u64 v);
u64 bch2_fsck_recovery_passes(void);
-int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
-int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass);
-int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
+enum bch_run_recovery_pass_flags {
+ RUN_RECOVERY_PASS_nopersistent = BIT(0),
+ RUN_RECOVERY_PASS_ratelimit = BIT(1),
+};
+
+int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
+
+int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *,
+ enum bch_recovery_pass,
+ enum bch_run_recovery_pass_flags);
+int bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *,
+ enum bch_recovery_pass,
+ enum bch_run_recovery_pass_flags);
+
+int bch2_run_online_recovery_passes(struct bch_fs *, u64);
+int bch2_run_recovery_passes(struct bch_fs *, enum bch_recovery_pass);
+
+void bch2_recovery_pass_status_to_text(struct printbuf *, struct bch_fs *);
-int bch2_run_online_recovery_passes(struct bch_fs *);
-int bch2_run_recovery_passes(struct bch_fs *);
+void bch2_fs_recovery_passes_init(struct bch_fs *);
#endif /* _BCACHEFS_RECOVERY_PASSES_H */
diff --git a/fs/bcachefs/recovery_passes_format.h b/fs/bcachefs/recovery_passes_format.h
new file mode 100644
index 000000000000..c434eafbca19
--- /dev/null
+++ b/fs/bcachefs/recovery_passes_format.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_RECOVERY_PASSES_FORMAT_H
+#define _BCACHEFS_RECOVERY_PASSES_FORMAT_H
+
+#define PASS_SILENT BIT(0)
+#define PASS_FSCK BIT(1)
+#define PASS_UNCLEAN BIT(2)
+#define PASS_ALWAYS BIT(3)
+#define PASS_ONLINE BIT(4)
+#define PASS_ALLOC BIT(5)
+#define PASS_FSCK_ALLOC (PASS_FSCK|PASS_ALLOC)
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+#define PASS_FSCK_DEBUG BIT(1)
+#else
+#define PASS_FSCK_DEBUG 0
+#endif
+
+/*
+ * Passes may be reordered, but the second field is a persistent identifier and
+ * must never change:
+ */
+#define BCH_RECOVERY_PASSES() \
+ x(recovery_pass_empty, 41, PASS_SILENT) \
+ x(scan_for_btree_nodes, 37, 0) \
+ x(check_topology, 4, 0) \
+ x(accounting_read, 39, PASS_ALWAYS) \
+ x(alloc_read, 0, PASS_ALWAYS) \
+ x(stripes_read, 1, 0) \
+ x(initialize_subvolumes, 2, 0) \
+ x(snapshots_read, 3, PASS_ALWAYS) \
+ x(check_allocations, 5, PASS_FSCK_ALLOC) \
+ x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \
+ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \
+ x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \
+ x(journal_replay, 9, PASS_ALWAYS) \
+ x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK_ALLOC) \
+ x(check_lrus, 11, PASS_ONLINE|PASS_FSCK_ALLOC) \
+ x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK_ALLOC) \
+ x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \
+ x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK_ALLOC) \
+ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK_ALLOC) \
+ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
+ x(bucket_gens_init, 17, 0) \
+ x(reconstruct_snapshots, 38, 0) \
+ x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
+ x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
+ x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
+ x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \
+ x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
+ x(fs_upgrade_for_subvolumes, 22, 0) \
+ x(check_inodes, 24, PASS_FSCK) \
+ x(check_extents, 25, PASS_FSCK) \
+ x(check_indirect_extents, 26, PASS_ONLINE|PASS_FSCK) \
+ x(check_dirents, 27, PASS_FSCK) \
+ x(check_xattrs, 28, PASS_FSCK) \
+ x(check_root, 29, PASS_ONLINE|PASS_FSCK) \
+ x(check_unreachable_inodes, 40, PASS_FSCK) \
+ x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \
+ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
+ x(check_nlinks, 31, PASS_FSCK) \
+ x(check_rebalance_work, 43, PASS_ONLINE|PASS_FSCK) \
+ x(resume_logged_ops, 23, PASS_ALWAYS) \
+ x(delete_dead_inodes, 32, PASS_ALWAYS) \
+ x(fix_reflink_p, 33, 0) \
+ x(set_fs_needs_rebalance, 34, 0) \
+ x(lookup_root_inode, 42, PASS_ALWAYS|PASS_SILENT)
+
+/* We normally enumerate recovery passes in the order we run them: */
+enum bch_recovery_pass {
+#define x(n, id, when) BCH_RECOVERY_PASS_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+ BCH_RECOVERY_PASS_NR
+};
+
+/* But we also need stable identifiers that can be used in the superblock */
+enum bch_recovery_pass_stable {
+#define x(n, id, when) BCH_RECOVERY_PASS_STABLE_##n = id,
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+struct recovery_pass_entry {
+ __le64 last_run;
+ __le32 last_runtime;
+ __le32 flags;
+};
+
+struct bch_sb_field_recovery_passes {
+ struct bch_sb_field field;
+ struct recovery_pass_entry start[];
+};
+
+static inline unsigned
+recovery_passes_nr_entries(struct bch_sb_field_recovery_passes *r)
+{
+ return r
+ ? ((vstruct_end(&r->field) - (void *) &r->start[0]) /
+ sizeof(struct recovery_pass_entry))
+ : 0;
+}
+
+#endif /* _BCACHEFS_RECOVERY_PASSES_FORMAT_H */
diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h
index e89b9c783285..aa9526938cc3 100644
--- a/fs/bcachefs/recovery_passes_types.h
+++ b/fs/bcachefs/recovery_passes_types.h
@@ -2,79 +2,26 @@
#ifndef _BCACHEFS_RECOVERY_PASSES_TYPES_H
#define _BCACHEFS_RECOVERY_PASSES_TYPES_H
-#define PASS_SILENT BIT(0)
-#define PASS_FSCK BIT(1)
-#define PASS_UNCLEAN BIT(2)
-#define PASS_ALWAYS BIT(3)
-#define PASS_ONLINE BIT(4)
-
-#ifdef CONFIG_BCACHEFS_DEBUG
-#define PASS_FSCK_DEBUG BIT(1)
-#else
-#define PASS_FSCK_DEBUG 0
-#endif
-
-/*
- * Passes may be reordered, but the second field is a persistent identifier and
- * must never change:
- */
-#define BCH_RECOVERY_PASSES() \
- x(recovery_pass_empty, 41, PASS_SILENT) \
- x(scan_for_btree_nodes, 37, 0) \
- x(check_topology, 4, 0) \
- x(accounting_read, 39, PASS_ALWAYS) \
- x(alloc_read, 0, PASS_ALWAYS) \
- x(stripes_read, 1, 0) \
- x(initialize_subvolumes, 2, 0) \
- x(snapshots_read, 3, PASS_ALWAYS) \
- x(check_allocations, 5, PASS_FSCK) \
- x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \
- x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \
- x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \
- x(journal_replay, 9, PASS_ALWAYS) \
- x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \
- x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \
- x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \
- x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \
- x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \
- x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \
- x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
- x(bucket_gens_init, 17, 0) \
- x(reconstruct_snapshots, 38, 0) \
- x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
- x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
- x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
- x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \
- x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
- x(fs_upgrade_for_subvolumes, 22, 0) \
- x(check_inodes, 24, PASS_FSCK) \
- x(check_extents, 25, PASS_FSCK) \
- x(check_indirect_extents, 26, PASS_ONLINE|PASS_FSCK) \
- x(check_dirents, 27, PASS_FSCK) \
- x(check_xattrs, 28, PASS_FSCK) \
- x(check_root, 29, PASS_ONLINE|PASS_FSCK) \
- x(check_unreachable_inodes, 40, PASS_FSCK) \
- x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \
- x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
- x(check_nlinks, 31, PASS_FSCK) \
- x(resume_logged_ops, 23, PASS_ALWAYS) \
- x(delete_dead_inodes, 32, PASS_ALWAYS) \
- x(fix_reflink_p, 33, 0) \
- x(set_fs_needs_rebalance, 34, 0)
-
-/* We normally enumerate recovery passes in the order we run them: */
-enum bch_recovery_pass {
-#define x(n, id, when) BCH_RECOVERY_PASS_##n,
- BCH_RECOVERY_PASSES()
-#undef x
- BCH_RECOVERY_PASS_NR
-};
-
-/* But we also need stable identifiers that can be used in the superblock */
-enum bch_recovery_pass_stable {
-#define x(n, id, when) BCH_RECOVERY_PASS_STABLE_##n = id,
- BCH_RECOVERY_PASSES()
-#undef x
+struct bch_fs_recovery {
+ /*
+ * Two different uses:
+ * "Has this fsck pass?" - i.e. should this type of error be an
+ * emergency read-only
+ * And, in certain situations fsck will rewind to an earlier pass: used
+ * for signaling to the toplevel code which pass we want to run now.
+ */
+ enum bch_recovery_pass curr_pass;
+ enum bch_recovery_pass next_pass;
+ /* never rewinds version of curr_pass */
+ enum bch_recovery_pass pass_done;
+ u64 passes_to_run;
+ /* bitmask of recovery passes that we actually ran */
+ u64 passes_complete;
+ u64 passes_failing;
+ u64 passes_ratelimiting;
+ spinlock_t lock;
+ struct semaphore run_lock;
+ struct work_struct work;
};
#endif /* _BCACHEFS_RECOVERY_PASSES_TYPES_H */
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 710178e3da4c..3a13dbcab6ba 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -3,6 +3,7 @@
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "inode.h"
@@ -610,7 +611,7 @@ s64 bch2_remap_range(struct bch_fs *c,
!bch2_request_incompat_feature(c, bcachefs_metadata_version_reflink_p_may_update_opts);
int ret = 0, ret2 = 0;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_reflink))
return -BCH_ERR_erofs_no_writes;
bch2_check_set_feature(c, BCH_FEATURE_reflink);
@@ -761,7 +762,7 @@ err:
bch2_bkey_buf_exit(&new_src, c);
bch2_bkey_buf_exit(&new_dst, c);
- bch2_write_ref_put(c, BCH_WRITE_REF_reflink);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_reflink);
return dst_done ?: ret ?: ret2;
}
diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h
index fa27ec59a647..7c0c9c842b4e 100644
--- a/fs/bcachefs/sb-counters_format.h
+++ b/fs/bcachefs/sb-counters_format.h
@@ -16,6 +16,7 @@ enum counters_flags {
x(io_read_split, 33, TYPE_COUNTER) \
x(io_read_reuse_race, 34, TYPE_COUNTER) \
x(io_read_retry, 32, TYPE_COUNTER) \
+ x(io_read_fail_and_poison, 82, TYPE_COUNTER) \
x(io_write, 1, TYPE_SECTORS) \
x(io_move, 2, TYPE_SECTORS) \
x(io_move_read, 35, TYPE_SECTORS) \
@@ -24,6 +25,7 @@ enum counters_flags {
x(io_move_fail, 38, TYPE_COUNTER) \
x(io_move_write_fail, 82, TYPE_COUNTER) \
x(io_move_start_fail, 39, TYPE_COUNTER) \
+ x(io_move_created_rebalance, 83, TYPE_COUNTER) \
x(bucket_invalidate, 3, TYPE_COUNTER) \
x(bucket_discard, 4, TYPE_COUNTER) \
x(bucket_discard_fast, 79, TYPE_COUNTER) \
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index badd0e17ada5..861fce1630f0 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -100,7 +100,11 @@
BCH_FSCK_ERR_ptr_to_missing_backpointer) \
x(stripe_backpointers, \
BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
- BCH_FSCK_ERR_ptr_to_missing_backpointer)
+ BCH_FSCK_ERR_ptr_to_missing_backpointer) \
+ x(inode_has_case_insensitive, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
+ BCH_FSCK_ERR_inode_has_case_insensitive_not_set, \
+ BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set)
#define DOWNGRADE_TABLE() \
x(bucket_stripe_sectors, \
@@ -374,6 +378,9 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
continue;
+ if (src->version < c->sb.version_incompat)
+ continue;
+
struct bch_sb_field_downgrade_entry *dst;
unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 3b69a924086f..0bfb151da9cf 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -209,6 +209,7 @@ enum bch_fsck_flags {
x(subvol_to_missing_root, 188, 0) \
x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \
x(bkey_in_missing_snapshot, 190, 0) \
+ x(bkey_in_deleted_snapshot, 315, FSCK_AUTOFIX) \
x(inode_pos_inode_nonzero, 191, 0) \
x(inode_pos_blockdev_range, 192, 0) \
x(inode_alloc_cursor_inode_bad, 301, 0) \
@@ -216,6 +217,7 @@ enum bch_fsck_flags {
x(inode_str_hash_invalid, 194, 0) \
x(inode_v3_fields_start_bad, 195, 0) \
x(inode_snapshot_mismatch, 196, 0) \
+ x(snapshot_key_missing_inode_snapshot, 314, 0) \
x(inode_unlinked_but_clean, 197, 0) \
x(inode_unlinked_but_nlink_nonzero, 198, 0) \
x(inode_unlinked_and_not_open, 281, 0) \
@@ -237,6 +239,8 @@ enum bch_fsck_flags {
x(inode_unreachable, 210, FSCK_AUTOFIX) \
x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \
x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \
+ x(inode_has_case_insensitive_not_set, 316, FSCK_AUTOFIX) \
+ x(inode_parent_has_case_insensitive_not_set, 317, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \
x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \
@@ -262,6 +266,7 @@ enum bch_fsck_flags {
x(dirent_to_overwritten_inode, 302, 0) \
x(dirent_to_missing_subvol, 230, 0) \
x(dirent_to_itself, 231, 0) \
+ x(dirent_casefold_mismatch, 318, FSCK_AUTOFIX) \
x(quota_type_invalid, 232, 0) \
x(xattr_val_size_too_small, 233, 0) \
x(xattr_val_size_too_big, 234, 0) \
@@ -301,6 +306,7 @@ enum bch_fsck_flags {
x(btree_ptr_v2_written_0, 268, 0) \
x(subvol_snapshot_bad, 269, 0) \
x(subvol_inode_bad, 270, 0) \
+ x(subvol_missing, 308, FSCK_AUTOFIX) \
x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \
x(accounting_mismatch, 272, FSCK_AUTOFIX) \
x(accounting_replicas_not_marked, 273, 0) \
@@ -322,7 +328,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
- x(MAX, 314, 0)
+ x(MAX, 319, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 72779912939b..3398906660a5 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -5,11 +5,31 @@
#include "disk_groups.h"
#include "error.h"
#include "opts.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "sb-members.h"
#include "super-io.h"
-void bch2_dev_missing(struct bch_fs *c, unsigned dev)
+int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
+{
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "pointer to nonexistent device %u in key\n", dev);
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ bool print = bch2_count_fsck_err(c, ptr_to_invalid_device, &buf);
+
+ int ret = bch2_run_explicit_recovery_pass(c, &buf,
+ BCH_RECOVERY_PASS_check_allocations, 0);
+
+ if (print)
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev)
{
if (dev != BCH_SB_MEMBER_INVALID)
bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
@@ -119,6 +139,11 @@ int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
struct bch_sb_field_members_v1 *mi1;
struct bch_sb_field_members_v2 *mi2;
+ if (BCH_SB_VERSION_INCOMPAT(disk_sb->sb) > bcachefs_metadata_version_extent_flags) {
+ bch2_sb_field_resize(disk_sb, members_v1, 0);
+ return 0;
+ }
+
mi1 = bch2_sb_field_resize(disk_sb, members_v1,
DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES *
disk_sb->sb->nr_devices, sizeof(u64)));
@@ -170,6 +195,12 @@ static int validate_member(struct printbuf *err,
return -BCH_ERR_invalid_sb_members;
}
+ if (BCH_MEMBER_FREESPACE_INITIALIZED(&m) &&
+ sb->features[0] & cpu_to_le64(BIT_ULL(BCH_FEATURE_no_alloc_info))) {
+ prt_printf(err, "device %u: freespace initialized but fs has no alloc info", i);
+ return -BCH_ERR_invalid_sb_members;
+ }
+
return 0;
}
@@ -191,17 +222,11 @@ static void member_to_text(struct printbuf *out,
printbuf_indent_add(out, 2);
prt_printf(out, "Label:\t");
- if (BCH_MEMBER_GROUP(&m)) {
- unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
-
- if (idx < disk_groups_nr(gi))
- prt_printf(out, "%s (%u)",
- gi->entries[idx].label, idx);
- else
- prt_printf(out, "(bad disk labels section)");
- } else {
+ if (BCH_MEMBER_GROUP(&m))
+ bch2_disk_path_to_text_sb(out, sb,
+ BCH_MEMBER_GROUP(&m) - 1);
+ else
prt_printf(out, "(none)");
- }
prt_newline(out);
prt_printf(out, "UUID:\t");
@@ -268,6 +293,7 @@ static void member_to_text(struct printbuf *out,
prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m));
prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m));
+ prt_printf(out, "Resize on mount:\t%llu\n", BCH_MEMBER_RESIZE_ON_MOUNT(&m));
printbuf_indent_sub(out, 2);
}
@@ -493,6 +519,7 @@ int bch2_sb_member_alloc(struct bch_fs *c)
unsigned u64s;
int best = -1;
u64 best_last_mount = 0;
+ unsigned nr_deleted = 0;
if (dev_idx < BCH_SB_MEMBERS_MAX)
goto have_slot;
@@ -503,7 +530,10 @@ int bch2_sb_member_alloc(struct bch_fs *c)
continue;
struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx);
- if (bch2_member_alive(&m))
+
+ nr_deleted += uuid_equal(&m.uuid, &BCH_SB_MEMBER_DELETED_UUID);
+
+ if (!bch2_is_zero(&m.uuid, sizeof(m.uuid)))
continue;
u64 last_mount = le64_to_cpu(m.last_mount);
@@ -517,6 +547,10 @@ int bch2_sb_member_alloc(struct bch_fs *c)
goto have_slot;
}
+ if (nr_deleted)
+ bch_err(c, "unable to allocate new member, but have %u deleted: run fsck",
+ nr_deleted);
+
return -BCH_ERR_ENOSPC_sb_members;
have_slot:
nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
@@ -532,3 +566,22 @@ have_slot:
c->disk_sb.sb->nr_devices = nr_devices;
return dev_idx;
}
+
+void bch2_sb_members_clean_deleted(struct bch_fs *c)
+{
+ mutex_lock(&c->sb_lock);
+ bool write_sb = false;
+
+ for (unsigned i = 0; i < c->sb.nr_devices; i++) {
+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, i);
+
+ if (uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID)) {
+ memset(&m->uuid, 0, sizeof(m->uuid));
+ write_sb = true;
+ }
+ }
+
+ if (write_sb)
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index 42786657522c..6bd9b86aee5b 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -4,6 +4,7 @@
#include "darray.h"
#include "bkey_types.h"
+#include "enumerated_ref.h"
extern char * const bch2_member_error_strs[];
@@ -20,7 +21,7 @@ struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i);
static inline bool bch2_dev_is_online(struct bch_dev *ca)
{
- return !percpu_ref_is_zero(&ca->io_ref[READ]);
+ return !enumerated_ref_is_zero(&ca->io_ref[READ]);
}
static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned);
@@ -104,6 +105,12 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev *
for (struct bch_dev *_ca = NULL; \
(_ca = __bch2_next_dev((_c), _ca, (_mask)));)
+#define for_each_online_member_rcu(_c, _ca) \
+ for_each_member_device_rcu(_c, _ca, &(_c)->online_devs)
+
+#define for_each_rw_member_rcu(_c, _ca) \
+ for_each_member_device_rcu(_c, _ca, &(_c)->rw_devs[BCH_DATA_free])
+
static inline void bch2_dev_get(struct bch_dev *ca)
{
#ifdef CONFIG_BCACHEFS_DEBUG
@@ -157,33 +164,33 @@ static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev
static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
struct bch_dev *ca,
unsigned state_mask,
- int rw)
+ int rw, unsigned ref_idx)
{
rcu_read_lock();
if (ca)
- percpu_ref_put(&ca->io_ref[rw]);
+ enumerated_ref_put(&ca->io_ref[rw], ref_idx);
while ((ca = __bch2_next_dev(c, ca, NULL)) &&
(!((1 << ca->mi.state) & state_mask) ||
- !percpu_ref_tryget(&ca->io_ref[rw])))
+ !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx)))
;
rcu_read_unlock();
return ca;
}
-#define __for_each_online_member(_c, _ca, state_mask, rw) \
+#define __for_each_online_member(_c, _ca, state_mask, rw, ref_idx) \
for (struct bch_dev *_ca = NULL; \
- (_ca = bch2_get_next_online_dev(_c, _ca, state_mask, rw));)
+ (_ca = bch2_get_next_online_dev(_c, _ca, state_mask, rw, ref_idx));)
-#define for_each_online_member(c, ca) \
- __for_each_online_member(c, ca, ~0, READ)
+#define for_each_online_member(c, ca, ref_idx) \
+ __for_each_online_member(c, ca, ~0, READ, ref_idx)
-#define for_each_rw_member(c, ca) \
- __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), WRITE)
+#define for_each_rw_member(c, ca, ref_idx) \
+ __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), WRITE, ref_idx)
-#define for_each_readable_member(c, ca) \
- __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro), READ)
+#define for_each_readable_member(c, ca, ref_idx) \
+ __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro), READ, ref_idx)
static inline bool bch2_dev_exists(const struct bch_fs *c, unsigned dev)
{
@@ -218,13 +225,15 @@ static inline struct bch_dev *bch2_dev_rcu_noerror(struct bch_fs *c, unsigned de
: NULL;
}
-void bch2_dev_missing(struct bch_fs *, unsigned);
+int bch2_dev_missing_bkey(struct bch_fs *, struct bkey_s_c, unsigned);
+
+void bch2_dev_missing_atomic(struct bch_fs *, unsigned);
static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *c, unsigned dev)
{
struct bch_dev *ca = bch2_dev_rcu_noerror(c, dev);
if (unlikely(!ca))
- bch2_dev_missing(c, dev);
+ bch2_dev_missing_atomic(c, dev);
return ca;
}
@@ -242,7 +251,7 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev)
{
struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev);
if (unlikely(!ca))
- bch2_dev_missing(c, dev);
+ bch2_dev_missing_atomic(c, dev);
return ca;
}
@@ -285,13 +294,14 @@ static inline struct bch_dev *bch2_dev_iterate(struct bch_fs *c, struct bch_dev
return bch2_dev_tryget(c, dev_idx);
}
-static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, int rw)
+static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev,
+ int rw, unsigned ref_idx)
{
might_sleep();
rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu(c, dev);
- if (ca && !percpu_ref_tryget(&ca->io_ref[rw]))
+ if (ca && !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx))
ca = NULL;
rcu_read_unlock();
@@ -301,27 +311,17 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev,
return ca;
if (ca)
- percpu_ref_put(&ca->io_ref[rw]);
+ enumerated_ref_put(&ca->io_ref[rw], ref_idx);
return NULL;
}
-/* XXX kill, move to struct bch_fs */
-static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
-{
- struct bch_devs_mask devs;
-
- memset(&devs, 0, sizeof(devs));
- for_each_online_member(c, ca)
- __set_bit(ca->dev_idx, devs.d);
- return devs;
-}
-
extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1;
extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;
static inline bool bch2_member_alive(struct bch_member *m)
{
- return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
+ return !bch2_is_zero(&m->uuid, sizeof(m->uuid)) &&
+ !uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID);
}
static inline bool bch2_member_exists(struct bch_sb *sb, unsigned dev)
@@ -351,6 +351,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
? BCH_MEMBER_DURABILITY(mi) - 1
: 1,
.freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
+ .resize_on_mount = BCH_MEMBER_RESIZE_ON_MOUNT(mi),
.valid = bch2_member_alive(mi),
.btree_bitmap_shift = mi->btree_bitmap_shift,
.btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap),
@@ -381,5 +382,6 @@ bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
int bch2_sb_member_alloc(struct bch_fs *);
+void bch2_sb_members_clean_deleted(struct bch_fs *);
#endif /* _BCACHEFS_SB_MEMBERS_H */
diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h
index 3affec823b3f..fb72ad730518 100644
--- a/fs/bcachefs/sb-members_format.h
+++ b/fs/bcachefs/sb-members_format.h
@@ -13,6 +13,10 @@
*/
#define BCH_SB_MEMBER_INVALID 255
+#define BCH_SB_MEMBER_DELETED_UUID \
+ UUID_INIT(0xffffffff, 0xffff, 0xffff, \
+ 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef)
+
#define BCH_MIN_NR_NBUCKETS (1 << 6)
#define BCH_IOPS_MEASUREMENTS() \
@@ -88,6 +92,8 @@ LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28)
LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30)
LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED,
struct bch_member, flags, 30, 31)
+LE64_BITMASK(BCH_MEMBER_RESIZE_ON_MOUNT,
+ struct bch_member, flags, 31, 32)
#if 0
LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
diff --git a/fs/bcachefs/sb-members_types.h b/fs/bcachefs/sb-members_types.h
index c0eda888fe39..d6443e186872 100644
--- a/fs/bcachefs/sb-members_types.h
+++ b/fs/bcachefs/sb-members_types.h
@@ -13,6 +13,7 @@ struct bch_member_cpu {
u8 data_allowed;
u8 durability;
u8 freespace_initialized;
+ u8 resize_on_mount;
u8 valid;
u8 btree_bitmap_shift;
u64 btree_allocated_bitmap;
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index fec569c7deb1..00d62d1190ef 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bbpos.h"
#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_key_cache.h"
#include "btree_update.h"
#include "buckets.h"
+#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "fs.h"
@@ -141,7 +143,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
rcu_read_lock();
struct snapshot_table *t = rcu_dereference(c->snapshots);
- if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
+ if (unlikely(c->recovery.pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
goto out;
}
@@ -209,9 +211,14 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
{
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
- prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
- BCH_SNAPSHOT_SUBVOL(s.v),
- BCH_SNAPSHOT_DELETED(s.v),
+ if (BCH_SNAPSHOT_SUBVOL(s.v))
+ prt_str(out, "subvol ");
+ if (BCH_SNAPSHOT_WILL_DELETE(s.v))
+ prt_str(out, "will_delete ");
+ if (BCH_SNAPSHOT_DELETED(s.v))
+ prt_str(out, "deleted ");
+
+ prt_printf(out, "parent %10u children %10u %10u subvol %u tree %u",
le32_to_cpu(s.v->parent),
le32_to_cpu(s.v->children[0]),
le32_to_cpu(s.v->children[1]),
@@ -281,6 +288,16 @@ fsck_err:
return ret;
}
+static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id)
+{
+ mutex_lock(&c->snapshot_table_lock);
+ int ret = snapshot_t_mut(c, id)
+ ? 0
+ : -BCH_ERR_ENOMEM_mark_snapshot;
+ mutex_unlock(&c->snapshot_table_lock);
+ return ret;
+}
+
static int __bch2_mark_snapshot(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
@@ -302,7 +319,9 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
if (new.k->type == KEY_TYPE_snapshot) {
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
- t->live = true;
+ t->state = !BCH_SNAPSHOT_DELETED(s.v)
+ ? SNAPSHOT_ID_live
+ : SNAPSHOT_ID_deleted;
t->parent = le32_to_cpu(s.v->parent);
t->children[0] = le32_to_cpu(s.v->children[0]);
t->children[1] = le32_to_cpu(s.v->children[1]);
@@ -327,9 +346,9 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
parent - id - 1 < IS_ANCESTOR_BITMAP)
__set_bit(parent - id - 1, t->is_ancestor);
- if (BCH_SNAPSHOT_DELETED(s.v)) {
+ if (BCH_SNAPSHOT_WILL_DELETE(s.v)) {
set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
+ if (c->recovery.pass_done > BCH_RECOVERY_PASS_delete_dead_snapshots)
bch2_delete_dead_snapshots_async(c);
}
} else {
@@ -390,22 +409,31 @@ static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
return 0;
}
-u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
+u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root,
+ snapshot_id_list *skip)
{
- u32 id = snapshot_root;
- u32 subvol = 0, s;
-
+ u32 id, subvol = 0, s;
+retry:
+ id = snapshot_root;
rcu_read_lock();
while (id && bch2_snapshot_exists(c, id)) {
- s = snapshot_t(c, id)->subvol;
-
- if (s && (!subvol || s < subvol))
- subvol = s;
+ if (!(skip && snapshot_list_has_id(skip, id))) {
+ s = snapshot_t(c, id)->subvol;
+ if (s && (!subvol || s < subvol))
+ subvol = s;
+ }
id = bch2_snapshot_tree_next(c, id);
+ if (id == snapshot_root)
+ break;
}
rcu_read_unlock();
+ if (!subvol && skip) {
+ skip = NULL;
+ goto retry;
+ }
+
return subvol;
}
@@ -437,7 +465,7 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
if (!ret && !found) {
struct bkey_i_subvolume *u;
- *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
+ *subvol_id = bch2_snapshot_oldest_subvol(c, snapshot_root, NULL);
u = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_subvolumes, POS(0, *subvol_id),
@@ -654,7 +682,7 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans,
u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u) ?:
bch2_snapshot_tree_create(trans, root_id,
- bch2_snapshot_tree_oldest_subvol(c, root_id),
+ bch2_snapshot_oldest_subvol(c, root_id, NULL),
&tree_id);
if (ret)
goto err;
@@ -699,6 +727,9 @@ static int check_snapshot(struct btree_trans *trans,
memset(&s, 0, sizeof(s));
memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k)));
+ if (BCH_SNAPSHOT_DELETED(&s))
+ return 0;
+
id = le32_to_cpu(s.parent);
if (id) {
ret = bch2_snapshot_lookup(trans, id, &v);
@@ -736,7 +767,7 @@ static int check_snapshot(struct btree_trans *trans,
}
bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
- !BCH_SNAPSHOT_DELETED(&s);
+ !BCH_SNAPSHOT_WILL_DELETE(&s);
if (should_have_subvol) {
id = le32_to_cpu(s.subvol);
@@ -887,9 +918,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
}
bch2_trans_iter_exit(trans, &iter);
- return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
- bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
- bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0);
+ return bch2_snapshot_table_make_room(c, id) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0);
}
/* Figure out which snapshot nodes belong in the same tree: */
@@ -987,7 +1017,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c)
snapshot_id_list_to_text(&buf, t);
darray_for_each(*t, id) {
- if (fsck_err_on(!bch2_snapshot_exists(c, *id),
+ if (fsck_err_on(bch2_snapshot_id_state(c, *id) == SNAPSHOT_ID_empty,
trans, snapshot_node_missing,
"snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) {
if (t->nr > 1) {
@@ -1012,22 +1042,38 @@ err:
return ret;
}
-int bch2_check_key_has_snapshot(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_s_c k)
+int __bch2_check_key_has_snapshot(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
int ret = 0;
+ enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot);
+
+ /* Snapshot was definitively deleted, this error is marked autofix */
+ if (fsck_err_on(state == SNAPSHOT_ID_deleted,
+ trans, bkey_in_deleted_snapshot,
+ "key in deleted snapshot %s, delete?",
+ (bch2_btree_id_to_text(&buf, iter->btree_id),
+ prt_char(&buf, ' '),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ ret = bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_internal_snapshot_node) ?: 1;
- if (fsck_err_on(!bch2_snapshot_exists(c, k.k->p.snapshot),
+ /*
+ * Snapshot missing: we should have caught this with btree_lost_data and
+ * kicked off reconstruct_snapshots, so if we end up here we have no
+ * idea what happened:
+ */
+ if (fsck_err_on(state == SNAPSHOT_ID_empty,
trans, bkey_in_missing_snapshot,
"key in missing snapshot %s, delete?",
(bch2_btree_id_to_text(&buf, iter->btree_id),
prt_char(&buf, ' '),
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter,
- BTREE_UPDATE_internal_snapshot_node) ?: 1;
+ BTREE_UPDATE_internal_snapshot_node) ?: 1;
fsck_err:
printbuf_exit(&buf);
return ret;
@@ -1051,10 +1097,10 @@ int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
}
/* already deleted? */
- if (BCH_SNAPSHOT_DELETED(&s->v))
+ if (BCH_SNAPSHOT_WILL_DELETE(&s->v))
goto err;
- SET_BCH_SNAPSHOT_DELETED(&s->v, true);
+ SET_BCH_SNAPSHOT_WILL_DELETE(&s->v, true);
SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
s->v.subvol = 0;
err:
@@ -1074,24 +1120,25 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
struct btree_iter iter, p_iter = {};
struct btree_iter c_iter = {};
struct btree_iter tree_iter = {};
- struct bkey_s_c_snapshot s;
u32 parent_id, child_id;
unsigned i;
int ret = 0;
- s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
- BTREE_ITER_intent, snapshot);
- ret = bkey_err(s);
+ struct bkey_i_snapshot *s =
+ bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
+ BTREE_ITER_intent, snapshot);
+ ret = PTR_ERR_OR_ZERO(s);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
"missing snapshot %u", id);
if (ret)
goto err;
- BUG_ON(s.v->children[1]);
+ BUG_ON(BCH_SNAPSHOT_DELETED(&s->v));
+ BUG_ON(s->v.children[1]);
- parent_id = le32_to_cpu(s.v->parent);
- child_id = le32_to_cpu(s.v->children[0]);
+ parent_id = le32_to_cpu(s->v.parent);
+ child_id = le32_to_cpu(s->v.children[0]);
if (parent_id) {
struct bkey_i_snapshot *parent;
@@ -1149,24 +1196,38 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
*/
struct bkey_i_snapshot_tree *s_t;
- BUG_ON(s.v->children[1]);
+ BUG_ON(s->v.children[1]);
s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
- BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
+ BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)),
0, snapshot_tree);
ret = PTR_ERR_OR_ZERO(s_t);
if (ret)
goto err;
- if (s.v->children[0]) {
- s_t->v.root_snapshot = s.v->children[0];
+ if (s->v.children[0]) {
+ s_t->v.root_snapshot = s->v.children[0];
} else {
s_t->k.type = KEY_TYPE_deleted;
set_bkey_val_u64s(&s_t->k, 0);
}
}
- ret = bch2_btree_delete_at(trans, &iter, 0);
+ if (!bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)) {
+ SET_BCH_SNAPSHOT_DELETED(&s->v, true);
+ s->v.parent = 0;
+ s->v.children[0] = 0;
+ s->v.children[1] = 0;
+ s->v.subvol = 0;
+ s->v.tree = 0;
+ s->v.depth = 0;
+ s->v.skip[0] = 0;
+ s->v.skip[1] = 0;
+ s->v.skip[2] = 0;
+ } else {
+ s->k.type = KEY_TYPE_deleted;
+ set_bkey_val_u64s(&s->k, 0);
+ }
err:
bch2_trans_iter_exit(trans, &tree_iter);
bch2_trans_iter_exit(trans, &p_iter);
@@ -1336,12 +1397,6 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
* that key to snapshot leaf nodes, where we can mutate it
*/
-struct snapshot_interior_delete {
- u32 id;
- u32 live_child;
-};
-typedef DARRAY(struct snapshot_interior_delete) interior_delete_list;
-
static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id)
{
darray_for_each(*l, i)
@@ -1375,28 +1430,34 @@ static unsigned __live_child(struct snapshot_table *t, u32 id,
return 0;
}
-static unsigned live_child(struct bch_fs *c, u32 id,
- snapshot_id_list *delete_leaves,
- interior_delete_list *delete_interior)
+static unsigned live_child(struct bch_fs *c, u32 id)
{
+ struct snapshot_delete *d = &c->snapshot_delete;
+
rcu_read_lock();
u32 ret = __live_child(rcu_dereference(c->snapshots), id,
- delete_leaves, delete_interior);
+ &d->delete_leaves, &d->delete_interior);
rcu_read_unlock();
return ret;
}
+static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id)
+{
+ return snapshot_list_has_id(&d->delete_leaves, id) ||
+ interior_delete_has_id(&d->delete_interior, id) != 0;
+}
+
static int delete_dead_snapshots_process_key(struct btree_trans *trans,
struct btree_iter *iter,
- struct bkey_s_c k,
- snapshot_id_list *delete_leaves,
- interior_delete_list *delete_interior)
+ struct bkey_s_c k)
{
- if (snapshot_list_has_id(delete_leaves, k.k->p.snapshot))
+ struct snapshot_delete *d = &trans->c->snapshot_delete;
+
+ if (snapshot_list_has_id(&d->delete_leaves, k.k->p.snapshot))
return bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node);
- u32 live_child = interior_delete_has_id(delete_interior, k.k->p.snapshot);
+ u32 live_child = interior_delete_has_id(&d->delete_interior, k.k->p.snapshot);
if (live_child) {
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
int ret = PTR_ERR_OR_ZERO(new);
@@ -1427,49 +1488,208 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
return 0;
}
+static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter, u64 *prev_inum)
+{
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+
+ u64 inum = iter->btree_id != BTREE_ID_inodes
+ ? iter->pos.inode
+ : iter->pos.offset;
+
+ if (*prev_inum == inum)
+ return false;
+
+ *prev_inum = inum;
+
+ bool ret = !snapshot_list_has_id(&d->deleting_from_trees,
+ bch2_snapshot_tree(c, iter->pos.snapshot));
+ if (unlikely(ret)) {
+ struct bpos pos = iter->pos;
+ pos.snapshot = 0;
+ if (iter->btree_id != BTREE_ID_inodes)
+ pos.offset = U64_MAX;
+ bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(pos));
+ }
+
+ return ret;
+}
+
+static int delete_dead_snapshot_keys_v1(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+
+ for (d->pos.btree = 0; d->pos.btree < BTREE_ID_NR; d->pos.btree++) {
+ struct disk_reservation res = { 0 };
+ u64 prev_inum = 0;
+
+ d->pos.pos = POS_MIN;
+
+ if (!btree_type_has_snapshots(d->pos.btree))
+ continue;
+
+ int ret = for_each_btree_key_commit(trans, iter,
+ d->pos.btree, POS_MIN,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
+ &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ d->pos.pos = iter.pos;
+
+ if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
+ continue;
+
+ delete_dead_snapshots_process_key(trans, &iter, k);
+ }));
+
+ bch2_disk_reservation_put(c, &res);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int delete_dead_snapshot_keys_range(struct btree_trans *trans, enum btree_id btree,
+ struct bpos start, struct bpos end)
+{
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+ struct disk_reservation res = { 0 };
+
+ d->pos.btree = btree;
+ d->pos.pos = POS_MIN;
+
+ int ret = for_each_btree_key_max_commit(trans, iter,
+ btree, start, end,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
+ &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ d->pos.pos = iter.pos;
+ delete_dead_snapshots_process_key(trans, &iter, k);
+ }));
+
+ bch2_disk_reservation_put(c, &res);
+ return ret;
+}
+
+static int delete_dead_snapshot_keys_v2(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+ struct disk_reservation res = { 0 };
+ u64 prev_inum = 0;
+ int ret = 0;
+
+ struct btree_iter iter;
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, POS_MIN,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots);
+
+ while (1) {
+ struct bkey_s_c k;
+ ret = lockrestart_do(trans,
+ bkey_err(k = bch2_btree_iter_peek(trans, &iter)));
+ if (ret)
+ break;
+
+ if (!k.k)
+ break;
+
+ d->pos.btree = iter.btree_id;
+ d->pos.pos = iter.pos;
+
+ if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
+ continue;
+
+ if (snapshot_id_dying(d, k.k->p.snapshot)) {
+ struct bpos start = POS(k.k->p.offset, 0);
+ struct bpos end = POS(k.k->p.offset, U64_MAX);
+
+ ret = delete_dead_snapshot_keys_range(trans, BTREE_ID_extents, start, end) ?:
+ delete_dead_snapshot_keys_range(trans, BTREE_ID_dirents, start, end) ?:
+ delete_dead_snapshot_keys_range(trans, BTREE_ID_xattrs, start, end);
+ if (ret)
+ break;
+
+ bch2_btree_iter_set_pos(trans, &iter, POS(0, k.k->p.offset + 1));
+ } else {
+ bch2_btree_iter_advance(trans, &iter);
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret)
+ goto err;
+
+ prev_inum = 0;
+ ret = for_each_btree_key_commit(trans, iter,
+ BTREE_ID_inodes, POS_MIN,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
+ &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ d->pos.btree = iter.btree_id;
+ d->pos.pos = iter.pos;
+
+ if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
+ continue;
+
+ delete_dead_snapshots_process_key(trans, &iter, k);
+ }));
+err:
+ bch2_disk_reservation_put(c, &res);
+ return ret;
+}
+
/*
* For a given snapshot, if it doesn't have a subvolume that points to it, and
* it doesn't have child snapshot nodes - it's now redundant and we can mark it
* as deleted.
*/
-static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k,
- snapshot_id_list *delete_leaves,
- interior_delete_list *delete_interior)
+static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k)
{
if (k.k->type != KEY_TYPE_snapshot)
return 0;
struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
unsigned live_children = 0;
+ int ret = 0;
if (BCH_SNAPSHOT_SUBVOL(s.v))
return 0;
+ if (BCH_SNAPSHOT_DELETED(s.v))
+ return 0;
+
+ mutex_lock(&d->progress_lock);
for (unsigned i = 0; i < 2; i++) {
u32 child = le32_to_cpu(s.v->children[i]);
live_children += child &&
- !snapshot_list_has_id(delete_leaves, child);
+ !snapshot_list_has_id(&d->delete_leaves, child);
}
+ u32 tree = bch2_snapshot_tree(c, s.k->p.offset);
+
if (live_children == 0) {
- return snapshot_list_add(c, delete_leaves, s.k->p.offset);
+ ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
+ snapshot_list_add(c, &d->delete_leaves, s.k->p.offset);
} else if (live_children == 1) {
- struct snapshot_interior_delete d = {
+ struct snapshot_interior_delete n = {
.id = s.k->p.offset,
- .live_child = live_child(c, s.k->p.offset, delete_leaves, delete_interior),
+ .live_child = live_child(c, s.k->p.offset),
};
- if (!d.live_child) {
- bch_err(c, "error finding live child of snapshot %u", d.id);
- return -EINVAL;
+ if (!n.live_child) {
+ bch_err(c, "error finding live child of snapshot %u", n.id);
+ ret = -EINVAL;
+ } else {
+ ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
+ darray_push(&d->delete_interior, n);
}
-
- return darray_push(delete_interior, d);
- } else {
- return 0;
}
+ mutex_unlock(&d->progress_lock);
+
+ return ret;
}
static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
@@ -1498,6 +1718,9 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
struct bkey_i_snapshot *s;
int ret;
+ if (!bch2_snapshot_exists(c, k.k->p.offset))
+ return 0;
+
if (k.k->type != KEY_TYPE_snapshot)
return 0;
@@ -1545,39 +1768,56 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
return bch2_trans_update(trans, iter, &s->k_i, 0);
}
-int bch2_delete_dead_snapshots(struct bch_fs *c)
+static void bch2_snapshot_delete_nodes_to_text(struct printbuf *out, struct snapshot_delete *d)
{
- if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
+ prt_printf(out, "deleting from trees");
+ darray_for_each(d->deleting_from_trees, i)
+ prt_printf(out, " %u", *i);
+
+ prt_printf(out, "deleting leaves");
+ darray_for_each(d->delete_leaves, i)
+ prt_printf(out, " %u", *i);
+ prt_newline(out);
+
+ prt_printf(out, "interior");
+ darray_for_each(d->delete_interior, i)
+ prt_printf(out, " %u->%u", i->id, i->live_child);
+ prt_newline(out);
+}
+
+int __bch2_delete_dead_snapshots(struct bch_fs *c)
+{
+ struct snapshot_delete *d = &c->snapshot_delete;
+ int ret = 0;
+
+ if (!mutex_trylock(&d->lock))
return 0;
+ if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
+ goto out_unlock;
+
struct btree_trans *trans = bch2_trans_get(c);
- snapshot_id_list delete_leaves = {};
- interior_delete_list delete_interior = {};
- int ret = 0;
/*
* For every snapshot node: If we have no live children and it's not
* pointed to by a subvolume, delete it:
*/
+ d->running = true;
+ d->pos = BBPOS_MIN;
+
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
- check_should_delete_snapshot(trans, k, &delete_leaves, &delete_interior));
+ check_should_delete_snapshot(trans, k));
if (!bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "walking snapshots");
if (ret)
goto err;
- if (!delete_leaves.nr && !delete_interior.nr)
+ if (!d->delete_leaves.nr && !d->delete_interior.nr)
goto err;
{
struct printbuf buf = PRINTBUF;
- prt_printf(&buf, "deleting leaves");
- darray_for_each(delete_leaves, i)
- prt_printf(&buf, " %u", *i);
-
- prt_printf(&buf, " interior");
- darray_for_each(delete_interior, i)
- prt_printf(&buf, " %u->%u", i->id, i->live_child);
+ bch2_snapshot_delete_nodes_to_text(&buf, d);
ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf));
printbuf_exit(&buf);
@@ -1585,29 +1825,15 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err;
}
- for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
- struct disk_reservation res = { 0 };
-
- if (!btree_type_has_snapshots(btree))
- continue;
-
- ret = for_each_btree_key_commit(trans, iter,
- btree, POS_MIN,
- BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
- &res, NULL, BCH_TRANS_COMMIT_no_enospc,
- delete_dead_snapshots_process_key(trans, &iter, k,
- &delete_leaves,
- &delete_interior));
-
- bch2_disk_reservation_put(c, &res);
-
- if (!bch2_err_matches(ret, EROFS))
- bch_err_msg(c, ret, "deleting keys from dying snapshots");
- if (ret)
- goto err;
- }
+ ret = !bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)
+ ? delete_dead_snapshot_keys_v2(trans)
+ : delete_dead_snapshot_keys_v1(trans);
+ if (!bch2_err_matches(ret, EROFS))
+ bch_err_msg(c, ret, "deleting keys from dying snapshots");
+ if (ret)
+ goto err;
- darray_for_each(delete_leaves, i) {
+ darray_for_each(d->delete_leaves, i) {
ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, *i));
if (!bch2_err_matches(ret, EROFS))
@@ -1624,11 +1850,11 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
BTREE_ITER_intent, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &delete_interior));
+ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior));
if (ret)
goto err;
- darray_for_each(delete_interior, i) {
+ darray_for_each(d->delete_interior, i) {
ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, i->id));
if (!bch2_err_matches(ret, EROFS))
@@ -1637,33 +1863,66 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err;
}
err:
- darray_exit(&delete_interior);
- darray_exit(&delete_leaves);
+ mutex_lock(&d->progress_lock);
+ darray_exit(&d->deleting_from_trees);
+ darray_exit(&d->delete_interior);
+ darray_exit(&d->delete_leaves);
+ d->running = false;
+ mutex_unlock(&d->progress_lock);
bch2_trans_put(trans);
+out_unlock:
+ mutex_unlock(&d->lock);
if (!bch2_err_matches(ret, EROFS))
bch_err_fn(c, ret);
return ret;
}
+int bch2_delete_dead_snapshots(struct bch_fs *c)
+{
+ if (!c->opts.auto_snapshot_deletion)
+ return 0;
+
+ return __bch2_delete_dead_snapshots(c);
+}
+
void bch2_delete_dead_snapshots_work(struct work_struct *work)
{
- struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
+ struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete.work);
set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
bch2_delete_dead_snapshots(c);
- bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
}
void bch2_delete_dead_snapshots_async(struct bch_fs *c)
{
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots))
+ if (!c->opts.auto_snapshot_deletion)
+ return;
+
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_delete_dead_snapshots))
return;
BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
- if (!queue_work(c->write_ref_wq, &c->snapshot_delete_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
+ if (!queue_work(c->write_ref_wq, &c->snapshot_delete.work))
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
+}
+
+void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ struct snapshot_delete *d = &c->snapshot_delete;
+
+ if (!d->running) {
+ prt_str(out, "(not running)");
+ return;
+ }
+
+ mutex_lock(&d->progress_lock);
+ bch2_snapshot_delete_nodes_to_text(out, d);
+
+ bch2_bbpos_to_text(out, d->pos);
+ mutex_unlock(&d->progress_lock);
}
int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
@@ -1704,7 +1963,7 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct
return 0;
struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k);
- if (BCH_SNAPSHOT_DELETED(snap.v) ||
+ if (BCH_SNAPSHOT_WILL_DELETE(snap.v) ||
interior_snapshot_needs_delete(snap))
set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags);
@@ -1733,10 +1992,6 @@ int bch2_snapshots_read(struct bch_fs *c)
BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
test_bit(BCH_FS_may_go_rw, &c->flags));
- if (bch2_err_matches(ret, EIO) ||
- (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
- ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
-
return ret;
}
@@ -1744,3 +1999,11 @@ void bch2_fs_snapshots_exit(struct bch_fs *c)
{
kvfree(rcu_dereference_protected(c->snapshots, true));
}
+
+void bch2_fs_snapshots_init_early(struct bch_fs *c)
+{
+ INIT_WORK(&c->snapshot_delete.work, bch2_delete_dead_snapshots_work);
+ mutex_init(&c->snapshot_delete.lock);
+ mutex_init(&c->snapshot_delete.progress_lock);
+ mutex_init(&c->snapshots_unlinked_lock);
+}
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
index 81180181d7c9..382a171f5413 100644
--- a/fs/bcachefs/snapshot.h
+++ b/fs/bcachefs/snapshot.h
@@ -105,7 +105,7 @@ static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
return id;
}
-u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *, u32);
+u32 bch2_snapshot_oldest_subvol(struct bch_fs *, u32, snapshot_id_list *);
u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32);
static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
@@ -120,21 +120,26 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
return id;
}
-static inline bool __bch2_snapshot_exists(struct bch_fs *c, u32 id)
+static inline enum snapshot_id_state __bch2_snapshot_id_state(struct bch_fs *c, u32 id)
{
const struct snapshot_t *s = snapshot_t(c, id);
- return s ? s->live : 0;
+ return s ? s->state : SNAPSHOT_ID_empty;
}
-static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id)
+static inline enum snapshot_id_state bch2_snapshot_id_state(struct bch_fs *c, u32 id)
{
rcu_read_lock();
- bool ret = __bch2_snapshot_exists(c, id);
+ enum snapshot_id_state ret = __bch2_snapshot_id_state(c, id);
rcu_read_unlock();
return ret;
}
+static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id)
+{
+ return bch2_snapshot_id_state(c, id) == SNAPSHOT_ID_live;
+}
+
static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
{
rcu_read_lock();
@@ -241,10 +246,19 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
int bch2_check_snapshot_trees(struct bch_fs *);
int bch2_check_snapshots(struct bch_fs *);
int bch2_reconstruct_snapshots(struct bch_fs *);
-int bch2_check_key_has_snapshot(struct btree_trans *, struct btree_iter *, struct bkey_s_c);
+
+int __bch2_check_key_has_snapshot(struct btree_trans *, struct btree_iter *, struct bkey_s_c);
+
+static inline int bch2_check_key_has_snapshot(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ return likely(bch2_snapshot_exists(trans->c, k.k->p.snapshot))
+ ? 0
+ : __bch2_check_key_has_snapshot(trans, iter, k);
+}
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
-void bch2_delete_dead_snapshots_work(struct work_struct *);
int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos);
@@ -259,7 +273,14 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
return __bch2_key_has_snapshot_overwrites(trans, id, pos);
}
+int __bch2_delete_dead_snapshots(struct bch_fs *);
+int bch2_delete_dead_snapshots(struct bch_fs *);
+void bch2_delete_dead_snapshots_work(struct work_struct *);
+void bch2_delete_dead_snapshots_async(struct bch_fs *);
+void bch2_snapshot_delete_status_to_text(struct printbuf *, struct bch_fs *);
+
int bch2_snapshots_read(struct bch_fs *);
void bch2_fs_snapshots_exit(struct bch_fs *);
+void bch2_fs_snapshots_init_early(struct bch_fs *);
#endif /* _BCACHEFS_SNAPSHOT_H */
diff --git a/fs/bcachefs/snapshot_format.h b/fs/bcachefs/snapshot_format.h
index aabcd3a74cd9..9bccae1f3590 100644
--- a/fs/bcachefs/snapshot_format.h
+++ b/fs/bcachefs/snapshot_format.h
@@ -15,10 +15,10 @@ struct bch_snapshot {
bch_le128 btime;
};
-LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
-
+LE32_BITMASK(BCH_SNAPSHOT_WILL_DELETE, struct bch_snapshot, flags, 0, 1)
/* True if a subvolume points to this snapshot node: */
LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
+LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 2, 3)
/*
* Snapshot trees:
diff --git a/fs/bcachefs/snapshot_types.h b/fs/bcachefs/snapshot_types.h
new file mode 100644
index 000000000000..0ab698f13e5c
--- /dev/null
+++ b/fs/bcachefs/snapshot_types.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SNAPSHOT_TYPES_H
+#define _BCACHEFS_SNAPSHOT_TYPES_H
+
+#include "bbpos_types.h"
+#include "darray.h"
+#include "subvolume_types.h"
+
+typedef DARRAY(u32) snapshot_id_list;
+
+#define IS_ANCESTOR_BITMAP 128
+
+struct snapshot_t {
+ enum snapshot_id_state {
+ SNAPSHOT_ID_empty,
+ SNAPSHOT_ID_live,
+ SNAPSHOT_ID_deleted,
+ } state;
+ u32 parent;
+ u32 skip[3];
+ u32 depth;
+ u32 children[2];
+ u32 subvol; /* Nonzero only if a subvolume points to this node: */
+ u32 tree;
+ unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)];
+};
+
+struct snapshot_table {
+ struct rcu_head rcu;
+ size_t nr;
+#ifndef RUST_BINDGEN
+ DECLARE_FLEX_ARRAY(struct snapshot_t, s);
+#else
+ struct snapshot_t s[0];
+#endif
+};
+
+struct snapshot_interior_delete {
+ u32 id;
+ u32 live_child;
+};
+typedef DARRAY(struct snapshot_interior_delete) interior_delete_list;
+
+struct snapshot_delete {
+ struct mutex lock;
+ struct work_struct work;
+
+ struct mutex progress_lock;
+ snapshot_id_list deleting_from_trees;
+ snapshot_id_list delete_leaves;
+ interior_delete_list delete_interior;
+
+ bool running;
+ struct bbpos pos;
+};
+
+#endif /* _BCACHEFS_SNAPSHOT_TYPES_H */
diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c
index a90bf7b8a2b4..0cbf5508a32c 100644
--- a/fs/bcachefs/str_hash.c
+++ b/fs/bcachefs/str_hash.c
@@ -101,17 +101,25 @@ static noinline int hash_pick_winner(struct btree_trans *trans,
}
}
-static int repair_inode_hash_info(struct btree_trans *trans,
- struct bch_inode_unpacked *snapshot_root)
+/*
+ * str_hash lookups across snapshots break in wild ways if hash_info in
+ * different snapshot versions doesn't match - so if we find one mismatch, check
+ * them all
+ */
+int bch2_repair_inode_hash_info(struct btree_trans *trans,
+ struct bch_inode_unpacked *snapshot_root)
{
+ struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
+ struct printbuf buf = PRINTBUF;
+ bool need_commit = false;
int ret = 0;
- for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes,
- SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot - 1),
- BTREE_ITER_all_snapshots, k, ret) {
- if (k.k->p.offset != snapshot_root->bi_inum)
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes,
+ POS(0, snapshot_root->bi_inum),
+ BTREE_ITER_all_snapshots, k, ret) {
+ if (bpos_ge(k.k->p, SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot)))
break;
if (!bkey_is_inode(k.k))
continue;
@@ -121,19 +129,72 @@ static int repair_inode_hash_info(struct btree_trans *trans,
if (ret)
break;
- if (fsck_err_on(inode.bi_hash_seed != snapshot_root->bi_hash_seed ||
- INODE_STR_HASH(&inode) != INODE_STR_HASH(snapshot_root),
- trans, inode_snapshot_mismatch,
- "inode hash info in different snapshots don't match")) {
+ if (inode.bi_hash_seed == snapshot_root->bi_hash_seed &&
+ INODE_STR_HASH(&inode) == INODE_STR_HASH(snapshot_root)) {
+#ifdef CONFIG_BCACHEFS_DEBUG
+ struct bch_hash_info hash1 = bch2_hash_info_init(c, snapshot_root);
+ struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode);
+
+ BUG_ON(hash1.type != hash2.type ||
+ memcmp(&hash1.siphash_key,
+ &hash2.siphash_key,
+ sizeof(hash1.siphash_key)));
+#endif
+ continue;
+ }
+
+ printbuf_reset(&buf);
+ prt_printf(&buf, "inode %llu hash info in snapshots %u %u don't match\n",
+ snapshot_root->bi_inum,
+ inode.bi_snapshot,
+ snapshot_root->bi_snapshot);
+
+ bch2_prt_str_hash_type(&buf, INODE_STR_HASH(&inode));
+ prt_printf(&buf, " %llx\n", inode.bi_hash_seed);
+
+ bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root));
+ prt_printf(&buf, " %llx", snapshot_root->bi_hash_seed);
+
+ if (fsck_err(trans, inode_snapshot_mismatch, "%s", buf.buf)) {
inode.bi_hash_seed = snapshot_root->bi_hash_seed;
SET_INODE_STR_HASH(&inode, INODE_STR_HASH(snapshot_root));
- ret = __bch2_fsck_write_inode(trans, &inode) ?:
- bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_nested;
- break;
+
+ ret = __bch2_fsck_write_inode(trans, &inode);
+ if (ret)
+ break;
+ need_commit = true;
}
}
+
+ if (ret)
+ goto err;
+
+ if (!need_commit) {
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n",
+ snapshot_root->bi_inum);
+
+ prt_printf(&buf, "root snapshot %u ", snapshot_root->bi_snapshot);
+ bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root));
+ prt_printf(&buf, " %llx\n", snapshot_root->bi_hash_seed);
+#if 0
+ prt_printf(&buf, "vs snapshot %u ", hash_info->inum_snapshot);
+ bch2_prt_str_hash_type(&buf, hash_info->type);
+ prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1);
+#endif
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ goto err;
+ }
+
+ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
+ -BCH_ERR_transaction_restart_nested;
+err:
fsck_err:
+ printbuf_exit(&buf);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -145,46 +206,18 @@ fsck_err:
static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum,
struct bch_hash_info *hash_info)
{
- struct bch_fs *c = trans->c;
- struct btree_iter iter;
- struct bkey_s_c k;
- int ret = 0;
-
- for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, U32_MAX),
- BTREE_ITER_all_snapshots, k, ret) {
- if (k.k->p.offset != inum)
- break;
- if (bkey_is_inode(k.k))
- goto found;
- }
- bch_err(c, "%s(): inum %llu not found", __func__, inum);
- ret = -BCH_ERR_fsck_repair_unimplemented;
- goto err;
-found:;
- struct bch_inode_unpacked inode;
- ret = bch2_inode_unpack(k, &inode);
+ struct bch_inode_unpacked snapshot_root;
+ int ret = bch2_inode_find_snapshot_root(trans, inum, &snapshot_root);
if (ret)
- goto err;
+ return ret;
+
+ struct bch_hash_info hash_root = bch2_hash_info_init(trans->c, &snapshot_root);
+ if (hash_info->type != hash_root.type ||
+ memcmp(&hash_info->siphash_key,
+ &hash_root.siphash_key,
+ sizeof(hash_root.siphash_key)))
+ ret = bch2_repair_inode_hash_info(trans, &snapshot_root);
- struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode);
- if (hash_info->type != hash2.type ||
- memcmp(&hash_info->siphash_key, &hash2.siphash_key, sizeof(hash2.siphash_key))) {
- ret = repair_inode_hash_info(trans, &inode);
- if (!ret) {
- bch_err(c, "inode hash info mismatch with root, but mismatch not found\n"
- "%u %llx %llx\n"
- "%u %llx %llx",
- hash_info->type,
- hash_info->siphash_key.k0,
- hash_info->siphash_key.k1,
- hash2.type,
- hash2.siphash_key.k0,
- hash2.siphash_key.k1);
- ret = -BCH_ERR_fsck_repair_unimplemented;
- }
- }
-err:
- bch2_trans_iter_exit(trans, &iter);
return ret;
}
diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
index 0c1a00539bd1..6762b3627e1b 100644
--- a/fs/bcachefs/str_hash.h
+++ b/fs/bcachefs/str_hash.h
@@ -32,6 +32,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
}
struct bch_hash_info {
+ u32 inum_snapshot;
u8 type;
struct unicode_map *cf_encoding;
/*
@@ -45,11 +46,12 @@ static inline struct bch_hash_info
bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
{
struct bch_hash_info info = {
- .type = INODE_STR_HASH(bi),
+ .inum_snapshot = bi->bi_snapshot,
+ .type = INODE_STR_HASH(bi),
#ifdef CONFIG_UNICODE
- .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
+ .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
#endif
- .siphash_key = { .k0 = bi->bi_hash_seed }
+ .siphash_key = { .k0 = bi->bi_hash_seed }
};
if (unlikely(info.type == BCH_STR_HASH_siphash_old)) {
@@ -392,6 +394,8 @@ int bch2_hash_delete(struct btree_trans *trans,
return ret;
}
+int bch2_repair_inode_hash_info(struct btree_trans *, struct bch_inode_unpacked *);
+
struct snapshots_seen;
int __bch2_str_hash_check_key(struct btree_trans *,
struct snapshots_seen *,
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index d0209f7658bb..35c9f86a73c1 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "btree_key_cache.h"
#include "btree_update.h"
+#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "fs.h"
@@ -14,6 +15,22 @@
static int bch2_subvolume_delete(struct btree_trans *, u32);
+static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid)
+{
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "missing subvolume %u", subvolid);
+ bool print = bch2_count_fsck_err(c, subvol_missing, &buf);
+
+ int ret = bch2_run_explicit_recovery_pass(c, &buf,
+ BCH_RECOVERY_PASS_check_inodes, 0);
+ if (print)
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ return ret;
+}
+
static struct bpos subvolume_children_pos(struct bkey_s_c k)
{
if (k.k->type != KEY_TYPE_subvolume)
@@ -45,7 +62,7 @@ static int check_subvol(struct btree_trans *trans,
ret = bch2_snapshot_lookup(trans, snapid, &snapshot);
if (bch2_err_matches(ret, ENOENT))
- return bch2_run_explicit_recovery_pass(c,
+ return bch2_run_print_explicit_recovery_pass(c,
BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
if (ret)
return ret;
@@ -292,9 +309,8 @@ bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
BTREE_ITER_cached|
BTREE_ITER_with_updates, subvolume, s);
- bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) &&
- inconsistent_if_not_found,
- trans->c, "missing subvolume %u", subvol);
+ if (bch2_err_matches(ret, ENOENT) && inconsistent_if_not_found)
+ ret = bch2_subvolume_missing(trans->c, subvol) ?: ret;
return ret;
}
@@ -344,8 +360,8 @@ int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid,
subvolume);
ret = bkey_err(subvol);
- bch2_fs_inconsistent_on(warn && bch2_err_matches(ret, ENOENT), trans->c,
- "missing subvolume %u", subvolid);
+ if (bch2_err_matches(ret, ENOENT))
+ ret = bch2_subvolume_missing(trans->c, subvolid) ?: ret;
if (likely(!ret))
*snapid = le32_to_cpu(subvol.v->snapshot);
@@ -418,8 +434,8 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
BTREE_ITER_cached|BTREE_ITER_intent,
subvolume);
int ret = bkey_err(subvol);
- bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
- "missing subvolume %u", subvolid);
+ if (bch2_err_matches(ret, ENOENT))
+ ret = bch2_subvolume_missing(trans->c, subvolid) ?: ret;
if (ret)
goto err;
@@ -479,13 +495,11 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor
{
struct bch_fs *c = container_of(work, struct bch_fs,
snapshot_wait_for_pagecache_and_delete_work);
- snapshot_id_list s;
- u32 *id;
int ret = 0;
while (!ret) {
mutex_lock(&c->snapshots_unlinked_lock);
- s = c->snapshots_unlinked;
+ snapshot_id_list s = c->snapshots_unlinked;
darray_init(&c->snapshots_unlinked);
mutex_unlock(&c->snapshots_unlinked_lock);
@@ -494,7 +508,7 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor
bch2_evict_subvolume_inodes(c, &s);
- for (id = s.data; id < s.data + s.nr; id++) {
+ darray_for_each(s, id) {
ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id));
bch_err_msg(c, ret, "deleting subvolume %u", *id);
if (ret)
@@ -504,7 +518,7 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor
darray_exit(&s);
}
- bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache);
}
struct subvolume_unlink_hook {
@@ -527,11 +541,11 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans
if (ret)
return ret;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache))
return -EROFS;
if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache);
return 0;
}
@@ -555,11 +569,10 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
BTREE_ID_subvolumes, POS(0, subvolid),
BTREE_ITER_cached, subvolume);
ret = PTR_ERR_OR_ZERO(n);
- if (unlikely(ret)) {
- bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
- "missing subvolume %u", subvolid);
+ if (bch2_err_matches(ret, ENOENT))
+ ret = bch2_subvolume_missing(trans->c, subvolid) ?: ret;
+ if (unlikely(ret))
return ret;
- }
SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
n->v.fs_path_parent = 0;
@@ -598,11 +611,10 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
BTREE_ID_subvolumes, POS(0, src_subvolid),
BTREE_ITER_cached, subvolume);
ret = PTR_ERR_OR_ZERO(src_subvol);
- if (unlikely(ret)) {
- bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
- "subvolume %u not found", src_subvolid);
+ if (bch2_err_matches(ret, ENOENT))
+ ret = bch2_subvolume_missing(trans->c, src_subvolid) ?: ret;
+ if (unlikely(ret))
goto err;
- }
parent = le32_to_cpu(src_subvol->v.snapshot);
}
@@ -716,11 +728,8 @@ int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
return ret;
}
-int bch2_fs_subvolumes_init(struct bch_fs *c)
+void bch2_fs_subvolumes_init_early(struct bch_fs *c)
{
- INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
bch2_subvolume_wait_for_pagecache_and_delete);
- mutex_init(&c->snapshots_unlinked_lock);
- return 0;
}
diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h
index f640c1e3d639..075f55e25c70 100644
--- a/fs/bcachefs/subvolume.h
+++ b/fs/bcachefs/subvolume.h
@@ -77,15 +77,12 @@ bch2_btree_iter_peek_in_subvolume_max_type(struct btree_trans *trans, struct btr
_end, _subvolid, _flags, _k, _do); \
})
-int bch2_delete_dead_snapshots(struct bch_fs *);
-void bch2_delete_dead_snapshots_async(struct bch_fs *);
-
int bch2_subvolume_unlink(struct btree_trans *, u32);
int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);
int bch2_initialize_subvolumes(struct bch_fs *);
int bch2_fs_upgrade_for_subvolumes(struct bch_fs *);
-int bch2_fs_subvolumes_init(struct bch_fs *);
+void bch2_fs_subvolumes_init_early(struct bch_fs *);
#endif /* _BCACHEFS_SUBVOLUME_H */
diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h
index 1549d6daf7af..9d634b906dcd 100644
--- a/fs/bcachefs/subvolume_types.h
+++ b/fs/bcachefs/subvolume_types.h
@@ -2,33 +2,6 @@
#ifndef _BCACHEFS_SUBVOLUME_TYPES_H
#define _BCACHEFS_SUBVOLUME_TYPES_H
-#include "darray.h"
-
-typedef DARRAY(u32) snapshot_id_list;
-
-#define IS_ANCESTOR_BITMAP 128
-
-struct snapshot_t {
- bool live;
- u32 parent;
- u32 skip[3];
- u32 depth;
- u32 children[2];
- u32 subvol; /* Nonzero only if a subvolume points to this node: */
- u32 tree;
- unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)];
-};
-
-struct snapshot_table {
- struct rcu_head rcu;
- size_t nr;
-#ifndef RUST_BINDGEN
- DECLARE_FLEX_ARRAY(struct snapshot_t, s);
-#else
- struct snapshot_t s[0];
-#endif
-};
-
typedef struct {
/* we can't have padding in this struct: */
u64 subvol;
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index cb5d960aed92..6687b9235d3c 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -87,7 +87,8 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v
struct printbuf buf = PRINTBUF;
prt_str(&buf, "requested incompat feature ");
bch2_version_to_text(&buf, version);
- prt_str(&buf, " currently not enabled");
+ prt_str(&buf, " currently not enabled, allowed up to ");
+ bch2_version_to_text(&buf, version);
prt_printf(&buf, "\n set version_upgrade=incompat to enable");
bch_notice(c, "%s", buf.buf);
@@ -260,11 +261,11 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
/* XXX: we're not checking that offline device have enough space */
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_sb_field_resize) {
struct bch_sb_handle *dev_sb = &ca->disk_sb;
if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_sb_field_resize);
return NULL;
}
}
@@ -384,7 +385,6 @@ static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out)
int bch2_sb_validate(struct bch_sb *sb, u64 read_offset,
enum bch_validate_flags flags, struct printbuf *out)
{
- struct bch_sb_field_members_v1 *mi;
enum bch_opt_id opt_id;
int ret;
@@ -468,6 +468,9 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset,
SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, BCH_SB_VERSION_INCOMPAT(sb));
}
+ if (sb->nr_devices > 1)
+ SET_BCH_SB_MULTI_DEVICE(sb, true);
+
if (!flags) {
/*
* Been seeing a bug where these are getting inexplicably
@@ -536,14 +539,17 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset,
}
}
+ struct bch_sb_field *mi =
+ bch2_sb_field_get_id(sb, BCH_SB_FIELD_members_v2) ?:
+ bch2_sb_field_get_id(sb, BCH_SB_FIELD_members_v1);
+
/* members must be validated first: */
- mi = bch2_sb_field_get(sb, members_v1);
if (!mi) {
prt_printf(out, "Invalid superblock: member info area missing");
return -BCH_ERR_invalid_sb_members_missing;
}
- ret = bch2_sb_field_validate(sb, &mi->field, flags, out);
+ ret = bch2_sb_field_validate(sb, mi, flags, out);
if (ret)
return ret;
@@ -612,11 +618,15 @@ static void bch2_sb_update(struct bch_fs *c)
c->sb.features = le64_to_cpu(src->features[0]);
c->sb.compat = le64_to_cpu(src->compat[0]);
+ c->sb.multi_device = BCH_SB_MULTI_DEVICE(src);
memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
if (ext) {
+ c->sb.recovery_passes_required =
+ bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+
le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
sizeof(c->sb.errors_silent) * 8);
c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
@@ -961,7 +971,7 @@ static void write_super_endio(struct bio *bio)
}
closure_put(&ca->fs->sb_write);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
}
static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
@@ -979,7 +989,7 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio));
- percpu_ref_get(&ca->io_ref[READ]);
+ enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
closure_bio_submit(bio, &c->sb_write);
}
@@ -1005,7 +1015,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
bio_sectors(bio));
- percpu_ref_get(&ca->io_ref[READ]);
+ enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
closure_bio_submit(bio, &c->sb_write);
}
@@ -1022,7 +1032,7 @@ int bch2_write_super(struct bch_fs *c)
trace_and_count(c, write_super, c, _RET_IP_);
- if (c->opts.very_degraded)
+ if (c->opts.degraded == BCH_DEGRADED_very)
degraded_flags |= BCH_FORCE_IF_LOST;
lockdep_assert_held(&c->sb_lock);
@@ -1037,13 +1047,13 @@ int bch2_write_super(struct bch_fs *c)
* For now, we expect to be able to call write_super() when we're not
* yet RW:
*/
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_write_super) {
ret = darray_push(&online_devices, ca);
if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
goto out;
}
- percpu_ref_get(&ca->io_ref[READ]);
+ enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
}
/* Make sure we're using the new magic numbers: */
@@ -1210,7 +1220,7 @@ out:
/* Make new options visible after they're persistent: */
bch2_sb_update(c);
darray_for_each(online_devices, ca)
- percpu_ref_put(&(*ca)->io_ref[READ]);
+ enumerated_ref_put(&(*ca)->io_ref[READ], BCH_DEV_READ_REF_write_super);
darray_exit(&online_devices);
printbuf_exit(&err);
return ret;
@@ -1270,6 +1280,31 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat)
}
}
+void bch2_sb_upgrade_incompat(struct bch_fs *c)
+{
+ mutex_lock(&c->sb_lock);
+ if (c->sb.version == c->sb.version_incompat_allowed)
+ goto unlock;
+
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "Now allowing incompatible features up to ");
+ bch2_version_to_text(&buf, c->sb.version);
+ prt_str(&buf, ", previously allowed up to ");
+ bch2_version_to_text(&buf, c->sb.version_incompat_allowed);
+ prt_newline(&buf);
+
+ bch_notice(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+
+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb,
+ max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), c->sb.version));
+ bch2_write_super(c);
+unlock:
+ mutex_unlock(&c->sb_lock);
+}
+
static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f,
enum bch_validate_flags flags, struct printbuf *err)
{
diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
index 78f708a6fbcd..a3b7a90f2533 100644
--- a/fs/bcachefs/super-io.h
+++ b/fs/bcachefs/super-io.h
@@ -107,6 +107,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
bool bch2_check_version_downgrade(struct bch_fs *);
void bch2_sb_upgrade(struct bch_fs *, unsigned, bool);
+void bch2_sb_upgrade_incompat(struct bch_fs *);
void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
struct bch_sb_field *);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 84a37d971ffd..11579b74c640 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -10,6 +10,8 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
+#include "async_objs.h"
+#include "backpointers.h"
#include "bkey_sort.h"
#include "btree_cache.h"
#include "btree_gc.h"
@@ -28,6 +30,7 @@
#include "disk_accounting.h"
#include "disk_groups.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "fs.h"
@@ -48,6 +51,7 @@
#include "quota.h"
#include "rebalance.h"
#include "recovery.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "sb-clean.h"
#include "sb-counters.h"
@@ -75,14 +79,32 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
MODULE_DESCRIPTION("bcachefs filesystem");
-const char * const bch2_fs_flag_strs[] = {
+typedef DARRAY(struct bch_sb_handle) bch_sb_handles;
+
#define x(n) #n,
+const char * const bch2_fs_flag_strs[] = {
BCH_FS_FLAGS()
-#undef x
NULL
};
-void bch2_print_str(struct bch_fs *c, const char *str)
+const char * const bch2_write_refs[] = {
+ BCH_WRITE_REFS()
+ NULL
+};
+
+const char * const bch2_dev_read_refs[] = {
+ BCH_DEV_READ_REFS()
+ NULL
+};
+
+const char * const bch2_dev_write_refs[] = {
+ BCH_DEV_WRITE_REFS()
+ NULL
+};
+#undef x
+
+static void __bch2_print_str(struct bch_fs *c, const char *prefix,
+ const char *str, bool nonblocking)
{
#ifdef __KERNEL__
struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c);
@@ -92,7 +114,17 @@ void bch2_print_str(struct bch_fs *c, const char *str)
return;
}
#endif
- bch2_print_string_as_lines(KERN_ERR, str);
+ bch2_print_string_as_lines(KERN_ERR, str, nonblocking);
+}
+
+void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str)
+{
+ __bch2_print_str(c, prefix, str, false);
+}
+
+void bch2_print_str_nonblocking(struct bch_fs *c, const char *prefix, const char *str)
+{
+ __bch2_print_str(c, prefix, str, true);
}
__printf(2, 0)
@@ -183,6 +215,7 @@ static int bch2_dev_alloc(struct bch_fs *, unsigned);
static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
static void bch2_dev_io_ref_stop(struct bch_dev *, int);
static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
+static int bch2_fs_init_rw(struct bch_fs *);
struct bch_fs *bch2_dev_to_fs(dev_t dev)
{
@@ -297,15 +330,13 @@ static void __bch2_fs_read_only(struct bch_fs *c)
}
}
-#ifndef BCH_WRITE_REF_DEBUG
-static void bch2_writes_disabled(struct percpu_ref *writes)
+static void bch2_writes_disabled(struct enumerated_ref *writes)
{
struct bch_fs *c = container_of(writes, struct bch_fs, writes);
set_bit(BCH_FS_write_disable_complete, &c->flags);
wake_up(&bch2_read_only_wait);
}
-#endif
void bch2_fs_read_only(struct bch_fs *c)
{
@@ -323,12 +354,7 @@ void bch2_fs_read_only(struct bch_fs *c)
* writes will return -EROFS:
*/
set_bit(BCH_FS_going_ro, &c->flags);
-#ifndef BCH_WRITE_REF_DEBUG
- percpu_ref_kill(&c->writes);
-#else
- for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
- bch2_write_ref_put(c, i);
-#endif
+ enumerated_ref_stop_async(&c->writes);
/*
* If we're not doing an emergency shutdown, we want to wait on
@@ -366,7 +392,7 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_emergency_ro, &c->flags) &&
test_bit(BCH_FS_started, &c->flags) &&
test_bit(BCH_FS_clean_shutdown, &c->flags) &&
- c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
+ c->recovery.pass_done >= BCH_RECOVERY_PASS_journal_replay) {
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
BUG_ON(atomic_long_read(&c->btree_cache.nr_dirty));
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
@@ -412,6 +438,30 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c)
return ret;
}
+static bool __bch2_fs_emergency_read_only2(struct bch_fs *c, struct printbuf *out,
+ bool locked)
+{
+ bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags);
+
+ if (!locked)
+ bch2_journal_halt(&c->journal);
+ else
+ bch2_journal_halt_locked(&c->journal);
+ bch2_fs_read_only_async(c);
+ wake_up(&bch2_read_only_wait);
+
+ if (ret)
+ prt_printf(out, "emergency read only at seq %llu\n",
+ journal_cur_seq(&c->journal));
+
+ return ret;
+}
+
+bool bch2_fs_emergency_read_only2(struct bch_fs *c, struct printbuf *out)
+{
+ return __bch2_fs_emergency_read_only2(c, out, false);
+}
+
bool bch2_fs_emergency_read_only_locked(struct bch_fs *c)
{
bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags);
@@ -429,26 +479,42 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
+ if (WARN_ON(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))
+ return -BCH_ERR_erofs_no_alloc_info;
+
if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) {
bch_err(c, "cannot go rw, unfixed btree errors");
return -BCH_ERR_erofs_unfixed_errors;
}
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
+ bch_err(c, "cannot go rw, filesystem is an unresized image file");
+ return -BCH_ERR_erofs_filesystem_full;
+ }
+
if (test_bit(BCH_FS_rw, &c->flags))
return 0;
bch_info(c, "going read-write");
+ ret = bch2_fs_init_rw(c);
+ if (ret)
+ goto err;
+
ret = bch2_sb_members_v2_init(c);
if (ret)
goto err;
clear_bit(BCH_FS_clean_shutdown, &c->flags);
- __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
- bch2_dev_allocator_add(c, ca);
- percpu_ref_reinit(&ca->io_ref[WRITE]);
- }
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca)
+ if (ca->mi.state == BCH_MEMBER_STATE_rw) {
+ bch2_dev_allocator_add(c, ca);
+ enumerated_ref_start(&ca->io_ref[WRITE]);
+ }
+ rcu_read_unlock();
+
bch2_recalc_capacity(c);
/*
@@ -474,14 +540,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
set_bit(BCH_FS_rw, &c->flags);
set_bit(BCH_FS_was_rw, &c->flags);
-#ifndef BCH_WRITE_REF_DEBUG
- percpu_ref_reinit(&c->writes);
-#else
- for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) {
- BUG_ON(atomic_long_read(&c->writes[i]));
- atomic_long_inc(&c->writes[i]);
- }
-#endif
+ enumerated_ref_start(&c->writes);
ret = bch2_copygc_start(c);
if (ret) {
@@ -517,6 +576,9 @@ int bch2_fs_read_write(struct bch_fs *c)
if (c->opts.nochanges)
return -BCH_ERR_erofs_nochanges;
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))
+ return -BCH_ERR_erofs_no_alloc_info;
+
return __bch2_fs_read_write(c, false);
}
@@ -543,35 +605,37 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
bch2_free_fsck_errs(c);
- bch2_fs_accounting_exit(c);
- bch2_fs_sb_errors_exit(c);
- bch2_fs_counters_exit(c);
+ bch2_fs_vfs_exit(c);
bch2_fs_snapshots_exit(c);
+ bch2_fs_sb_errors_exit(c);
+ bch2_fs_replicas_exit(c);
+ bch2_fs_rebalance_exit(c);
bch2_fs_quota_exit(c);
+ bch2_fs_nocow_locking_exit(c);
+ bch2_fs_journal_exit(&c->journal);
bch2_fs_fs_io_direct_exit(c);
bch2_fs_fs_io_buffered_exit(c);
bch2_fs_fsio_exit(c);
- bch2_fs_vfs_exit(c);
- bch2_fs_ec_exit(c);
- bch2_fs_encryption_exit(c);
- bch2_fs_nocow_locking_exit(c);
bch2_fs_io_write_exit(c);
bch2_fs_io_read_exit(c);
+ bch2_fs_encryption_exit(c);
+ bch2_fs_ec_exit(c);
+ bch2_fs_counters_exit(c);
+ bch2_fs_compress_exit(c);
+ bch2_io_clock_exit(&c->io_clock[WRITE]);
+ bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_buckets_waiting_for_journal_exit(c);
- bch2_fs_btree_interior_update_exit(c);
+ bch2_fs_btree_write_buffer_exit(c);
bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
- bch2_fs_btree_cache_exit(c);
bch2_fs_btree_iter_exit(c);
- bch2_fs_replicas_exit(c);
- bch2_fs_journal_exit(&c->journal);
- bch2_io_clock_exit(&c->io_clock[WRITE]);
- bch2_io_clock_exit(&c->io_clock[READ]);
- bch2_fs_compress_exit(c);
- bch2_fs_btree_gc_exit(c);
+ bch2_fs_btree_interior_update_exit(c);
+ bch2_fs_btree_cache_exit(c);
+ bch2_fs_accounting_exit(c);
+ bch2_fs_async_obj_exit(c);
bch2_journal_keys_put_initial(c);
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
+
BUG_ON(atomic_read(&c->journal_keys.ref));
- bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
if (c->online_reserved) {
u64 v = percpu_u64_get(c->online_reserved);
@@ -587,9 +651,7 @@ static void __bch2_fs_free(struct bch_fs *c)
mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio);
mempool_exit(&c->fill_iter);
-#ifndef BCH_WRITE_REF_DEBUG
- percpu_ref_exit(&c->writes);
-#endif
+ enumerated_ref_exit(&c->writes);
kfree(rcu_dereference_protected(c->disk_groups, 1));
kfree(c->journal_seq_blacklist_table);
@@ -601,8 +663,8 @@ static void __bch2_fs_free(struct bch_fs *c)
destroy_workqueue(c->btree_read_complete_wq);
if (c->copygc_wq)
destroy_workqueue(c->copygc_wq);
- if (c->btree_io_complete_wq)
- destroy_workqueue(c->btree_io_complete_wq);
+ if (c->btree_write_complete_wq)
+ destroy_workqueue(c->btree_write_complete_wq);
if (c->btree_update_wq)
destroy_workqueue(c->btree_update_wq);
@@ -628,6 +690,12 @@ void __bch2_fs_stop(struct bch_fs *c)
bch2_fs_read_only(c);
up_write(&c->state_lock);
+ for (unsigned i = 0; i < c->sb.nr_devices; i++) {
+ struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true);
+ if (ca)
+ bch2_dev_io_ref_stop(ca, READ);
+ }
+
for_each_member_device(c, ca)
bch2_dev_unlink(ca);
@@ -656,8 +724,6 @@ void __bch2_fs_stop(struct bch_fs *c)
void bch2_fs_free(struct bch_fs *c)
{
- unsigned i;
-
mutex_lock(&bch_fs_list_lock);
list_del(&c->list);
mutex_unlock(&bch_fs_list_lock);
@@ -665,7 +731,7 @@ void bch2_fs_free(struct bch_fs *c)
closure_sync(&c->cl);
closure_debug_destroy(&c->cl);
- for (i = 0; i < c->sb.nr_devices; i++) {
+ for (unsigned i = 0; i < c->sb.nr_devices; i++) {
struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true);
if (ca) {
@@ -693,9 +759,10 @@ static int bch2_fs_online(struct bch_fs *c)
lockdep_assert_held(&bch_fs_list_lock);
- if (__bch2_uuid_to_fs(c->sb.uuid)) {
+ if (c->sb.multi_device &&
+ __bch2_uuid_to_fs(c->sb.uuid)) {
bch_err(c, "filesystem UUID already open");
- return -EINVAL;
+ return -BCH_ERR_filesystem_uuid_already_open;
}
ret = bch2_fs_chardev_init(c);
@@ -706,7 +773,9 @@ static int bch2_fs_online(struct bch_fs *c)
bch2_fs_debug_init(c);
- ret = kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ?:
+ ret = (c->sb.multi_device
+ ? kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b)
+ : kobject_add(&c->kobj, NULL, "%s", c->name)) ?:
kobject_add(&c->internal, &c->kobj, "internal") ?:
kobject_add(&c->opts_dir, &c->kobj, "options") ?:
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
@@ -737,7 +806,37 @@ err:
return ret;
}
-static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
+static int bch2_fs_init_rw(struct bch_fs *c)
+{
+ if (test_bit(BCH_FS_rw_init_done, &c->flags))
+ return 0;
+
+ if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
+ WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
+ !(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete",
+ WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
+ !(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
+ WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
+ !(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit",
+ WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
+ !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
+ WQ_FREEZABLE, 0)))
+ return -BCH_ERR_ENOMEM_fs_other_alloc;
+
+ int ret = bch2_fs_btree_interior_update_init(c) ?:
+ bch2_fs_btree_write_buffer_init(c) ?:
+ bch2_fs_fs_io_buffered_init(c) ?:
+ bch2_fs_io_write_init(c) ?:
+ bch2_fs_journal_init(&c->journal);
+ if (ret)
+ return ret;
+
+ set_bit(BCH_FS_rw_init_done, &c->flags);
+ return 0;
+}
+
+static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
+ bch_sb_handles *sbs)
{
struct bch_fs *c;
struct printbuf name = PRINTBUF;
@@ -750,7 +849,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
goto out;
}
- c->stdio = (void *)(unsigned long) opts.stdio;
+ c->stdio = (void *)(unsigned long) opts->stdio;
__module_get(THIS_MODULE);
@@ -774,24 +873,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
refcount_set(&c->ro_ref, 1);
init_waitqueue_head(&c->ro_ref_wait);
- spin_lock_init(&c->recovery_pass_lock);
- sema_init(&c->online_fsck_mutex, 1);
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_init(&c->times[i]);
- bch2_fs_copygc_init(c);
- bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
- bch2_fs_btree_iter_init_early(c);
- bch2_fs_btree_interior_update_init_early(c);
- bch2_fs_journal_keys_init(c);
bch2_fs_allocator_background_init(c);
bch2_fs_allocator_foreground_init(c);
- bch2_fs_rebalance_init(c);
- bch2_fs_quota_init(c);
+ bch2_fs_btree_cache_init_early(&c->btree_cache);
+ bch2_fs_btree_gc_init_early(c);
+ bch2_fs_btree_interior_update_init_early(c);
+ bch2_fs_btree_iter_init_early(c);
+ bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
+ bch2_fs_btree_write_buffer_init_early(c);
+ bch2_fs_copygc_init(c);
bch2_fs_ec_init_early(c);
+ bch2_fs_journal_init_early(&c->journal);
+ bch2_fs_journal_keys_init(c);
bch2_fs_move_init(c);
+ bch2_fs_nocow_locking_init_early(c);
+ bch2_fs_quota_init(c);
+ bch2_fs_recovery_passes_init(c);
bch2_fs_sb_errors_init_early(c);
+ bch2_fs_snapshots_init_early(c);
+ bch2_fs_subvolumes_init_early(c);
INIT_LIST_HEAD(&c->list);
@@ -817,8 +921,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write];
c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
- bch2_fs_btree_cache_init_early(&c->btree_cache);
-
mutex_init(&c->sectors_available_lock);
ret = percpu_init_rwsem(&c->mark_lock);
@@ -832,14 +934,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
if (ret)
goto err;
- pr_uuid(&name, c->sb.user_uuid.b);
- ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0;
- if (ret)
- goto err;
-
- strscpy(c->name, name.buf, sizeof(c->name));
- printbuf_exit(&name);
-
/* Compat: */
if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 &&
!BCH_SB_JOURNAL_FLUSH_DELAY(sb))
@@ -854,7 +948,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
if (ret)
goto err;
- bch2_opts_apply(&c->opts, opts);
+ bch2_opts_apply(&c->opts, *opts);
+
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+ c->opts.block_size > PAGE_SIZE) {
+ bch_err(c, "cannot mount bs > ps filesystem without CONFIG_TRANSPARENT_HUGEPAGE");
+ ret = -EINVAL;
+ goto err;
+ }
c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc;
if (c->opts.inodes_use_key_cache)
@@ -870,26 +971,26 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
goto err;
}
+ if (c->sb.multi_device)
+ pr_uuid(&name, c->sb.user_uuid.b);
+ else
+ prt_bdevname(&name, sbs->data[0].bdev);
+
+ ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0;
+ if (ret)
+ goto err;
+
+ strscpy(c->name, name.buf, sizeof(c->name));
+ printbuf_exit(&name);
+
iter_size = sizeof(struct sort_iter) +
(btree_blocks(c) + 1) * 2 *
sizeof(struct sort_iter_set);
- if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
- WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
- !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io",
- WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
- !(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
- WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
- !(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete",
+ if (!(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) ||
- !(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit",
- WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
- !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
- WQ_FREEZABLE, 0)) ||
-#ifndef BCH_WRITE_REF_DEBUG
- percpu_ref_init(&c->writes, bch2_writes_disabled,
- PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
-#endif
+ enumerated_ref_init(&c->writes, BCH_WRITE_REF_NR,
+ bch2_writes_disabled) ||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
bioset_init(&c->btree_bio, 1,
max(offsetof(struct btree_read_bio, bio),
@@ -905,29 +1006,24 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
goto err;
}
- ret = bch2_fs_counters_init(c) ?:
- bch2_fs_sb_errors_init(c) ?:
- bch2_io_clock_init(&c->io_clock[READ]) ?:
- bch2_io_clock_init(&c->io_clock[WRITE]) ?:
- bch2_fs_journal_init(&c->journal) ?:
- bch2_fs_btree_iter_init(c) ?:
+ ret =
+ bch2_fs_async_obj_init(c) ?:
bch2_fs_btree_cache_init(c) ?:
+ bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
- bch2_fs_btree_interior_update_init(c) ?:
- bch2_fs_btree_gc_init(c) ?:
bch2_fs_buckets_waiting_for_journal_init(c) ?:
- bch2_fs_btree_write_buffer_init(c) ?:
- bch2_fs_subvolumes_init(c) ?:
- bch2_fs_io_read_init(c) ?:
- bch2_fs_io_write_init(c) ?:
- bch2_fs_nocow_locking_init(c) ?:
- bch2_fs_encryption_init(c) ?:
+ bch2_io_clock_init(&c->io_clock[READ]) ?:
+ bch2_io_clock_init(&c->io_clock[WRITE]) ?:
bch2_fs_compress_init(c) ?:
+ bch2_fs_counters_init(c) ?:
bch2_fs_ec_init(c) ?:
- bch2_fs_vfs_init(c) ?:
+ bch2_fs_encryption_init(c) ?:
bch2_fs_fsio_init(c) ?:
- bch2_fs_fs_io_buffered_init(c) ?:
- bch2_fs_fs_io_direct_init(c);
+ bch2_fs_fs_io_direct_init(c) ?:
+ bch2_fs_io_read_init(c) ?:
+ bch2_fs_rebalance_init(c) ?:
+ bch2_fs_sb_errors_init(c) ?:
+ bch2_fs_vfs_init(c);
if (ret)
goto err;
@@ -1013,6 +1109,11 @@ static void print_mount_opts(struct bch_fs *c)
bch2_version_to_text(&p, c->sb.version_incompat_allowed);
}
+ if (c->opts.verbose) {
+ prt_printf(&p, "\n features: ");
+ prt_bitflags(&p, bch2_sb_features, c->sb.features);
+ }
+
bch_info(c, "%s", p.buf);
printbuf_exit(&p);
}
@@ -1020,19 +1121,18 @@ static void print_mount_opts(struct bch_fs *c)
static bool bch2_fs_may_start(struct bch_fs *c)
{
struct bch_dev *ca;
- unsigned i, flags = 0;
+ unsigned flags = 0;
- if (c->opts.very_degraded)
+ switch (c->opts.degraded) {
+ case BCH_DEGRADED_very:
flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
-
- if (c->opts.degraded)
+ break;
+ case BCH_DEGRADED_yes:
flags |= BCH_FORCE_IF_DEGRADED;
-
- if (!c->opts.degraded &&
- !c->opts.very_degraded) {
+ break;
+ default:
mutex_lock(&c->sb_lock);
-
- for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
+ for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) {
if (!bch2_member_exists(c->disk_sb.sb, i))
continue;
@@ -1046,9 +1146,10 @@ static bool bch2_fs_may_start(struct bch_fs *c)
}
}
mutex_unlock(&c->sb_lock);
+ break;
}
- return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
+ return bch2_have_enough_devs(c, c->online_devs, flags, true);
}
int bch2_fs_start(struct bch_fs *c)
@@ -1081,13 +1182,22 @@ int bch2_fs_start(struct bch_fs *c)
goto err;
}
- for_each_online_member(c, ca)
- bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca)
+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
+ cpu_to_le64(now);
+ rcu_read_unlock();
+ /*
+ * Dno't write superblock yet: recovery might have to downgrade
+ */
mutex_unlock(&c->sb_lock);
- for_each_rw_member(c, ca)
- bch2_dev_allocator_add(c, ca);
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca)
+ if (ca->mi.state == BCH_MEMBER_STATE_rw)
+ bch2_dev_allocator_add(c, ca);
+ rcu_read_unlock();
bch2_recalc_capacity(c);
up_write(&c->state_lock);
@@ -1100,7 +1210,7 @@ int bch2_fs_start(struct bch_fs *c)
if (ret)
goto err;
- ret = bch2_opts_check_may_set(c);
+ ret = bch2_opts_hooks_pre_set(c);
if (ret)
goto err;
@@ -1234,11 +1344,14 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
static void bch2_dev_io_ref_stop(struct bch_dev *ca, int rw)
{
- if (!percpu_ref_is_zero(&ca->io_ref[rw])) {
- reinit_completion(&ca->io_ref_completion[rw]);
- percpu_ref_kill(&ca->io_ref[rw]);
- wait_for_completion(&ca->io_ref_completion[rw]);
- }
+ if (rw == READ)
+ clear_bit(ca->dev_idx, ca->fs->online_devs.d);
+
+ if (!enumerated_ref_is_zero(&ca->io_ref[rw]))
+ enumerated_ref_stop(&ca->io_ref[rw],
+ rw == READ
+ ? bch2_dev_read_refs
+ : bch2_dev_write_refs);
}
static void bch2_dev_release(struct kobject *kobj)
@@ -1250,8 +1363,8 @@ static void bch2_dev_release(struct kobject *kobj)
static void bch2_dev_free(struct bch_dev *ca)
{
- WARN_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE]));
- WARN_ON(!percpu_ref_is_zero(&ca->io_ref[READ]));
+ WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE]));
+ WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[READ]));
cancel_work_sync(&ca->io_error_work);
@@ -1260,6 +1373,9 @@ static void bch2_dev_free(struct bch_dev *ca)
if (ca->kobj.state_in_sysfs)
kobject_del(&ca->kobj);
+ bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatch);
+ bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty);
+
bch2_free_super(&ca->disk_sb);
bch2_dev_allocator_background_exit(ca);
bch2_dev_journal_exit(ca);
@@ -1271,8 +1387,8 @@ static void bch2_dev_free(struct bch_dev *ca)
bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]);
bch2_time_stats_quantiles_exit(&ca->io_latency[READ]);
- percpu_ref_exit(&ca->io_ref[WRITE]);
- percpu_ref_exit(&ca->io_ref[READ]);
+ enumerated_ref_exit(&ca->io_ref[WRITE]);
+ enumerated_ref_exit(&ca->io_ref[READ]);
#ifndef CONFIG_BCACHEFS_DEBUG
percpu_ref_exit(&ca->ref);
#endif
@@ -1284,7 +1400,7 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
lockdep_assert_held(&c->state_lock);
- if (percpu_ref_is_zero(&ca->io_ref[READ]))
+ if (enumerated_ref_is_zero(&ca->io_ref[READ]))
return;
__bch2_dev_read_only(c, ca);
@@ -1306,20 +1422,6 @@ static void bch2_dev_ref_complete(struct percpu_ref *ref)
}
#endif
-static void bch2_dev_io_ref_read_complete(struct percpu_ref *ref)
-{
- struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[READ]);
-
- complete(&ca->io_ref_completion[READ]);
-}
-
-static void bch2_dev_io_ref_write_complete(struct percpu_ref *ref)
-{
- struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[WRITE]);
-
- complete(&ca->io_ref_completion[WRITE]);
-}
-
static void bch2_dev_unlink(struct bch_dev *ca)
{
struct kobject *b;
@@ -1381,8 +1483,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
kobject_init(&ca->kobj, &bch2_dev_ktype);
init_completion(&ca->ref_completion);
- init_completion(&ca->io_ref_completion[READ]);
- init_completion(&ca->io_ref_completion[WRITE]);
INIT_WORK(&ca->io_error_work, bch2_io_error_work);
@@ -1406,12 +1506,13 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
atomic_long_set(&ca->ref, 1);
#endif
+ mutex_init(&ca->bucket_backpointer_mismatch.lock);
+ mutex_init(&ca->bucket_backpointer_empty.lock);
+
bch2_dev_allocator_background_init(ca);
- if (percpu_ref_init(&ca->io_ref[READ], bch2_dev_io_ref_read_complete,
- PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
- percpu_ref_init(&ca->io_ref[WRITE], bch2_dev_io_ref_write_complete,
- PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
+ if (enumerated_ref_init(&ca->io_ref[READ], BCH_DEV_READ_REF_NR, NULL) ||
+ enumerated_ref_init(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_NR, NULL) ||
!(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) ||
bch2_dev_buckets_alloc(c, ca) ||
!(ca->io_done = alloc_percpu(*ca->io_done)))
@@ -1428,7 +1529,9 @@ static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca,
{
ca->dev_idx = dev_idx;
__set_bit(ca->dev_idx, ca->self.d);
- scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
+
+ if (!ca->name[0])
+ scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
ca->fs = c;
rcu_assign_pointer(c->devs[ca->dev_idx], ca);
@@ -1473,13 +1576,18 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
return -BCH_ERR_device_size_too_small;
}
- BUG_ON(!percpu_ref_is_zero(&ca->io_ref[READ]));
- BUG_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE]));
+ BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[READ]));
+ BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE]));
ret = bch2_dev_journal_init(ca, sb->sb);
if (ret)
return ret;
+ struct printbuf name = PRINTBUF;
+ prt_bdevname(&name, sb->bdev);
+ strscpy(ca->name, name.buf, sizeof(ca->name));
+ printbuf_exit(&name);
+
/* Commit: */
ca->disk_sb = *sb;
memset(sb, 0, sizeof(*sb));
@@ -1493,7 +1601,7 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
ca->dev = ca->disk_sb.bdev->bd_dev;
- percpu_ref_reinit(&ca->io_ref[READ]);
+ enumerated_ref_start(&ca->io_ref[READ]);
return 0;
}
@@ -1517,16 +1625,9 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
if (ret)
return ret;
- bch2_dev_sysfs_online(c, ca);
-
- struct printbuf name = PRINTBUF;
- prt_bdevname(&name, ca->disk_sb.bdev);
-
- if (c->sb.nr_devices == 1)
- strscpy(c->name, name.buf, sizeof(c->name));
- strscpy(ca->name, name.buf, sizeof(ca->name));
+ set_bit(ca->dev_idx, c->online_devs.d);
- printbuf_exit(&name);
+ bch2_dev_sysfs_online(c, ca);
bch2_rebalance_wakeup(c);
return 0;
@@ -1578,7 +1679,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
return true;
/* do we have enough devices to read from? */
- new_online_devs = bch2_online_devs(c);
+ new_online_devs = c->online_devs;
__clear_bit(ca->dev_idx, new_online_devs.d);
return bch2_have_enough_devs(c, new_online_devs, flags, false);
@@ -1608,8 +1709,8 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
- if (percpu_ref_is_zero(&ca->io_ref[WRITE]))
- percpu_ref_reinit(&ca->io_ref[WRITE]);
+ if (enumerated_ref_is_zero(&ca->io_ref[WRITE]))
+ enumerated_ref_start(&ca->io_ref[WRITE]);
bch2_dev_do_discards(ca);
}
@@ -1663,6 +1764,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
{
struct bch_member *m;
unsigned dev_idx = ca->dev_idx, data;
+ bool fast_device_removal = !bch2_request_incompat_feature(c,
+ bcachefs_metadata_version_fast_device_removal);
int ret;
down_write(&c->state_lock);
@@ -1681,11 +1784,25 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
__bch2_dev_read_only(c, ca);
- ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
- bch_err_msg(ca, ret, "bch2_dev_data_drop()");
+ ret = fast_device_removal
+ ? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags)
+ : (bch2_dev_data_drop(c, ca->dev_idx, flags) ?:
+ bch2_dev_remove_stripes(c, ca->dev_idx, flags));
if (ret)
goto err;
+ /* Check if device still has data before blowing away alloc info */
+ struct bch_dev_usage usage = bch2_dev_usage_read(ca);
+ for (unsigned i = 0; i < BCH_DATA_NR; i++)
+ if (!data_type_is_empty(i) &&
+ !data_type_is_hidden(i) &&
+ usage.buckets[i]) {
+ bch_err(ca, "Remove failed: still has data (%s, %llu buckets)",
+ __bch2_data_types[i], usage.buckets[i]);
+ ret = -EBUSY;
+ goto err;
+ }
+
ret = bch2_dev_remove_alloc(c, ca);
bch_err_msg(ca, ret, "bch2_dev_remove_alloc()");
if (ret)
@@ -1749,7 +1866,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
*/
mutex_lock(&c->sb_lock);
m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
- memset(&m->uuid, 0, sizeof(m->uuid));
+
+ if (fast_device_removal)
+ m->uuid = BCH_SB_MEMBER_DELETED_UUID;
+ else
+ memset(&m->uuid, 0, sizeof(m->uuid));
bch2_write_super(c);
@@ -1759,7 +1880,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
err:
if (test_bit(BCH_FS_rw, &c->flags) &&
ca->mi.state == BCH_MEMBER_STATE_rw &&
- !percpu_ref_is_zero(&ca->io_ref[READ]))
+ !enumerated_ref_is_zero(&ca->io_ref[READ]))
__bch2_dev_read_write(c, ca);
up_write(&c->state_lock);
return ret;
@@ -1769,11 +1890,11 @@ err:
int bch2_dev_add(struct bch_fs *c, const char *path)
{
struct bch_opts opts = bch2_opts_empty();
- struct bch_sb_handle sb;
+ struct bch_sb_handle sb = {};
struct bch_dev *ca = NULL;
struct printbuf errbuf = PRINTBUF;
struct printbuf label = PRINTBUF;
- int ret;
+ int ret = 0;
ret = bch2_read_super(path, &opts, &sb);
bch_err_msg(c, ret, "reading super");
@@ -1790,6 +1911,20 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
}
}
+ if (list_empty(&c->list)) {
+ mutex_lock(&bch_fs_list_lock);
+ if (__bch2_uuid_to_fs(c->sb.uuid))
+ ret = -BCH_ERR_filesystem_uuid_already_open;
+ else
+ list_add(&c->list, &bch_fs_list);
+ mutex_unlock(&bch_fs_list_lock);
+
+ if (ret) {
+ bch_err(c, "filesystem UUID already open");
+ goto err;
+ }
+ }
+
ret = bch2_dev_may_add(sb.sb, c);
if (ret)
goto err;
@@ -1806,6 +1941,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
down_write(&c->state_lock);
mutex_lock(&c->sb_lock);
+ SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true);
ret = bch2_sb_from_fs(c, ca);
bch_err_msg(c, ret, "setting up new superblock");
@@ -1821,6 +1957,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
goto err_unlock;
}
unsigned dev_idx = ret;
+ ret = 0;
/* success: */
@@ -1840,27 +1977,29 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
- ret = bch2_dev_usage_init(ca, false);
- if (ret)
- goto err_late;
+ if (test_bit(BCH_FS_started, &c->flags)) {
+ ret = bch2_dev_usage_init(ca, false);
+ if (ret)
+ goto err_late;
- ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional);
- bch_err_msg(ca, ret, "marking new superblock");
- if (ret)
- goto err_late;
+ ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional);
+ bch_err_msg(ca, ret, "marking new superblock");
+ if (ret)
+ goto err_late;
- ret = bch2_fs_freespace_init(c);
- bch_err_msg(ca, ret, "initializing free space");
- if (ret)
- goto err_late;
+ ret = bch2_fs_freespace_init(c);
+ bch_err_msg(ca, ret, "initializing free space");
+ if (ret)
+ goto err_late;
- if (ca->mi.state == BCH_MEMBER_STATE_rw)
- __bch2_dev_read_write(c, ca);
+ if (ca->mi.state == BCH_MEMBER_STATE_rw)
+ __bch2_dev_read_write(c, ca);
- ret = bch2_dev_journal_alloc(ca, false);
- bch_err_msg(c, ret, "allocating journal");
- if (ret)
- goto err_late;
+ ret = bch2_dev_journal_alloc(ca, false);
+ bch_err_msg(c, ret, "allocating journal");
+ if (ret)
+ goto err_late;
+ }
up_write(&c->state_lock);
out:
@@ -1971,6 +2110,18 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
return 0;
}
+static int __bch2_dev_resize_alloc(struct bch_dev *ca, u64 old_nbuckets, u64 new_nbuckets)
+{
+ struct bch_fs *c = ca->fs;
+ u64 v[3] = { new_nbuckets - old_nbuckets, 0, 0 };
+
+ return bch2_trans_commit_do(ca->fs, NULL, NULL, 0,
+ bch2_disk_accounting_mod2(trans, false, v, dev_data_type,
+ .dev = ca->dev_idx,
+ .data_type = BCH_DATA_free)) ?:
+ bch2_dev_freespace_init(c, ca, old_nbuckets, new_nbuckets);
+}
+
int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
{
struct bch_member *m;
@@ -2018,13 +2169,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
mutex_unlock(&c->sb_lock);
if (ca->mi.freespace_initialized) {
- u64 v[3] = { nbuckets - old_nbuckets, 0, 0 };
-
- ret = bch2_trans_commit_do(ca->fs, NULL, NULL, 0,
- bch2_disk_accounting_mod2(trans, false, v, dev_data_type,
- .dev = ca->dev_idx,
- .data_type = BCH_DATA_free)) ?:
- bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets);
+ ret = __bch2_dev_resize_alloc(ca, old_nbuckets, nbuckets);
if (ret)
goto err;
}
@@ -2035,6 +2180,49 @@ err:
return ret;
}
+int bch2_fs_resize_on_mount(struct bch_fs *c)
+{
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) {
+ u64 old_nbuckets = ca->mi.nbuckets;
+ u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
+ ca->mi.bucket_size);
+
+ if (ca->mi.resize_on_mount &&
+ new_nbuckets > ca->mi.nbuckets) {
+ bch_info(ca, "resizing to size %llu", new_nbuckets * ca->mi.bucket_size);
+ int ret = bch2_dev_buckets_resize(c, ca, new_nbuckets);
+ bch_err_fn(ca, ret);
+ if (ret) {
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_fs_resize_on_mount);
+ up_write(&c->state_lock);
+ return ret;
+ }
+
+ mutex_lock(&c->sb_lock);
+ struct bch_member *m =
+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ m->nbuckets = cpu_to_le64(new_nbuckets);
+ SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false);
+
+ c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image));
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ if (ca->mi.freespace_initialized) {
+ ret = __bch2_dev_resize_alloc(ca, old_nbuckets, new_nbuckets);
+ if (ret) {
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_fs_resize_on_mount);
+ up_write(&c->state_lock);
+ return ret;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
/* return with ref on ca->ref: */
struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name)
{
@@ -2095,20 +2283,32 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
if (!ca)
goto unlock;
- if (bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, BCH_FORCE_IF_DEGRADED)) {
+ bool dev = bch2_dev_state_allowed(c, ca,
+ BCH_MEMBER_STATE_failed,
+ BCH_FORCE_IF_DEGRADED);
+
+ if (!dev && sb) {
+ if (!surprise)
+ sync_filesystem(sb);
+ shrink_dcache_sb(sb);
+ evict_inodes(sb);
+ }
+
+ struct printbuf buf = PRINTBUF;
+ __bch2_log_msg_start(ca->name, &buf);
+
+ prt_printf(&buf, "offline from block layer");
+
+ if (dev) {
__bch2_dev_offline(c, ca);
} else {
- if (sb) {
- if (!surprise)
- sync_filesystem(sb);
- shrink_dcache_sb(sb);
- evict_inodes(sb);
- }
-
bch2_journal_flush(&c->journal);
- bch2_fs_emergency_read_only(c);
+ bch2_fs_emergency_read_only2(c, &buf);
}
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+
bch2_dev_put(ca);
unlock:
if (sb)
@@ -2151,10 +2351,10 @@ static inline int sb_cmp(struct bch_sb *l, struct bch_sb *r)
cmp_int(le64_to_cpu(l->write_time), le64_to_cpu(r->write_time));
}
-struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
- struct bch_opts opts)
+struct bch_fs *bch2_fs_open(darray_const_str *devices,
+ struct bch_opts *opts)
{
- DARRAY(struct bch_sb_handle) sbs = { 0 };
+ bch_sb_handles sbs = {};
struct bch_fs *c = NULL;
struct bch_sb_handle *best = NULL;
struct printbuf errbuf = PRINTBUF;
@@ -2163,26 +2363,26 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
if (!try_module_get(THIS_MODULE))
return ERR_PTR(-ENODEV);
- if (!nr_devices) {
+ if (!devices->nr) {
ret = -EINVAL;
goto err;
}
- ret = darray_make_room(&sbs, nr_devices);
+ ret = darray_make_room(&sbs, devices->nr);
if (ret)
goto err;
- for (unsigned i = 0; i < nr_devices; i++) {
+ darray_for_each(*devices, i) {
struct bch_sb_handle sb = { NULL };
- ret = bch2_read_super(devices[i], &opts, &sb);
+ ret = bch2_read_super(*i, opts, &sb);
if (ret)
goto err;
BUG_ON(darray_push(&sbs, sb));
}
- if (opts.nochanges && !opts.read_only) {
+ if (opts->nochanges && !opts->read_only) {
ret = -BCH_ERR_erofs_nochanges;
goto err_print;
}
@@ -2192,7 +2392,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
best = sb;
darray_for_each_reverse(sbs, sb) {
- ret = bch2_dev_in_fs(best, sb, &opts);
+ ret = bch2_dev_in_fs(best, sb, opts);
if (ret == -BCH_ERR_device_has_been_removed ||
ret == -BCH_ERR_device_splitbrain) {
@@ -2207,7 +2407,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
goto err_print;
}
- c = bch2_fs_alloc(best->sb, opts);
+ c = bch2_fs_alloc(best->sb, opts, &sbs);
ret = PTR_ERR_OR_ZERO(c);
if (ret)
goto err;
@@ -2236,7 +2436,7 @@ out:
return c;
err_print:
pr_err("bch_fs_open err opening %s: %s",
- devices[0], bch2_err_str(ret));
+ devices->data[0], bch2_err_str(ret));
err:
if (!IS_ERR_OR_NULL(c))
bch2_fs_stop(c);
@@ -2273,9 +2473,45 @@ err:
return -ENOMEM;
}
-#define BCH_DEBUG_PARAM(name, description) \
- bool bch2_##name; \
- module_param_named(name, bch2_##name, bool, 0644); \
+#define BCH_DEBUG_PARAM(name, description) DEFINE_STATIC_KEY_FALSE(bch2_##name);
+BCH_DEBUG_PARAMS_ALL()
+#undef BCH_DEBUG_PARAM
+
+static int bch2_param_set_static_key_t(const char *val, const struct kernel_param *kp)
+{
+ /* Match bool exactly, by re-using it. */
+ struct static_key *key = kp->arg;
+ struct kernel_param boolkp = *kp;
+ bool v;
+ int ret;
+
+ boolkp.arg = &v;
+
+ ret = param_set_bool(val, &boolkp);
+ if (ret)
+ return ret;
+ if (v)
+ static_key_enable(key);
+ else
+ static_key_disable(key);
+ return 0;
+}
+
+static int bch2_param_get_static_key_t(char *buffer, const struct kernel_param *kp)
+{
+ struct static_key *key = kp->arg;
+ return sprintf(buffer, "%c\n", static_key_enabled(key) ? 'N' : 'Y');
+}
+
+static const struct kernel_param_ops bch2_param_ops_static_key_t = {
+ .flags = KERNEL_PARAM_OPS_FL_NOARG,
+ .set = bch2_param_set_static_key_t,
+ .get = bch2_param_get_static_key_t,
+};
+
+#define BCH_DEBUG_PARAM(name, description) \
+ module_param_cb(name, &bch2_param_ops_static_key_t, &bch2_##name.key, 0644);\
+ __MODULE_PARM_TYPE(name, "static_key_t"); \
MODULE_PARM_DESC(name, description);
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
index 23533bce5709..dc52f06cb2b9 100644
--- a/fs/bcachefs/super.h
+++ b/fs/bcachefs/super.h
@@ -9,6 +9,9 @@
#include <linux/math64.h>
extern const char * const bch2_fs_flag_strs[];
+extern const char * const bch2_write_refs[];
+extern const char * const bch2_dev_read_refs[];
+extern const char * const bch2_dev_write_refs[];
struct bch_fs *bch2_dev_to_fs(dev_t);
struct bch_fs *bch2_uuid_to_fs(__uuid_t);
@@ -29,18 +32,22 @@ int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
bool bch2_fs_emergency_read_only(struct bch_fs *);
+bool bch2_fs_emergency_read_only2(struct bch_fs *, struct printbuf *);
+
bool bch2_fs_emergency_read_only_locked(struct bch_fs *);
void bch2_fs_read_only(struct bch_fs *);
int bch2_fs_read_write(struct bch_fs *);
int bch2_fs_read_write_early(struct bch_fs *);
+int bch2_fs_resize_on_mount(struct bch_fs *);
+
void __bch2_fs_stop(struct bch_fs *);
void bch2_fs_free(struct bch_fs *);
void bch2_fs_stop(struct bch_fs *);
int bch2_fs_start(struct bch_fs *);
-struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts);
+struct bch_fs *bch2_fs_open(darray_const_str *, struct bch_opts *);
extern const struct blk_holder_ops bch2_sb_handle_bdev_ops;
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 82ee333ddd21..1a55196d69f1 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -25,6 +25,7 @@
#include "disk_accounting.h"
#include "disk_groups.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "inode.h"
#include "journal.h"
#include "journal_reclaim.h"
@@ -34,6 +35,7 @@
#include "nocow_locking.h"
#include "opts.h"
#include "rebalance.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "super-io.h"
#include "tests.h"
@@ -145,8 +147,10 @@ write_attribute(trigger_journal_flush);
write_attribute(trigger_journal_writes);
write_attribute(trigger_btree_cache_shrink);
write_attribute(trigger_btree_key_cache_shrink);
-write_attribute(trigger_freelist_wakeup);
write_attribute(trigger_btree_updates);
+write_attribute(trigger_freelist_wakeup);
+write_attribute(trigger_recalc_capacity);
+write_attribute(trigger_delete_dead_snapshots);
read_attribute(gc_gens_pos);
read_attribute(uuid);
@@ -176,25 +180,9 @@ read_attribute(open_buckets);
read_attribute(open_buckets_partial);
read_attribute(nocow_lock_table);
-#ifdef BCH_WRITE_REF_DEBUG
+read_attribute(read_refs);
read_attribute(write_refs);
-static const char * const bch2_write_refs[] = {
-#define x(n) #n,
- BCH_WRITE_REFS()
-#undef x
- NULL
-};
-
-static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c)
-{
- bch2_printbuf_tabstop_push(out, 24);
-
- for (unsigned i = 0; i < ARRAY_SIZE(c->writes); i++)
- prt_printf(out, "%s\t%li\n", bch2_write_refs[i], atomic_long_read(&c->writes[i]));
-}
-#endif
-
read_attribute(internal_uuid);
read_attribute(disk_groups);
@@ -212,6 +200,8 @@ read_attribute(copy_gc_wait);
sysfs_pd_controller_attribute(rebalance);
read_attribute(rebalance_status);
+read_attribute(snapshot_delete_status);
+read_attribute(recovery_status);
read_attribute(new_stripes);
@@ -334,6 +324,12 @@ SHOW(bch2_fs)
if (attr == &sysfs_rebalance_status)
bch2_rebalance_status_to_text(out, c);
+ if (attr == &sysfs_snapshot_delete_status)
+ bch2_snapshot_delete_status_to_text(out, c);
+
+ if (attr == &sysfs_recovery_status)
+ bch2_recovery_pass_status_to_text(out, c);
+
/* Debugging: */
if (attr == &sysfs_journal_debug)
@@ -369,10 +365,8 @@ SHOW(bch2_fs)
if (attr == &sysfs_moving_ctxts)
bch2_fs_moving_ctxts_to_text(out, c);
-#ifdef BCH_WRITE_REF_DEBUG
if (attr == &sysfs_write_refs)
- bch2_write_refs_to_text(out, c);
-#endif
+ enumerated_ref_to_text(out, &c->writes, bch2_write_refs);
if (attr == &sysfs_nocow_lock_table)
bch2_nocow_locks_to_text(out, &c->nocow_locks);
@@ -405,7 +399,7 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_btree_updates)
queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_sysfs))
return -EROFS;
if (attr == &sysfs_trigger_btree_cache_shrink) {
@@ -445,6 +439,15 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_freelist_wakeup)
closure_wake_up(&c->freelist_wait);
+ if (attr == &sysfs_trigger_recalc_capacity) {
+ down_read(&c->state_lock);
+ bch2_recalc_capacity(c);
+ up_read(&c->state_lock);
+ }
+
+ if (attr == &sysfs_trigger_delete_dead_snapshots)
+ __bch2_delete_dead_snapshots(c);
+
#ifdef CONFIG_BCACHEFS_TESTS
if (attr == &sysfs_perf_test) {
char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
@@ -465,7 +468,7 @@ STORE(bch2_fs)
size = ret;
}
#endif
- bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_sysfs);
return size;
}
SYSFS_OPS(bch2_fs);
@@ -476,6 +479,8 @@ struct attribute *bch2_fs_files[] = {
&sysfs_btree_write_stats,
&sysfs_rebalance_status,
+ &sysfs_snapshot_delete_status,
+ &sysfs_recovery_status,
&sysfs_compression_stats,
@@ -558,9 +563,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_new_stripes,
&sysfs_open_buckets,
&sysfs_open_buckets_partial,
-#ifdef BCH_WRITE_REF_DEBUG
&sysfs_write_refs,
-#endif
&sysfs_nocow_lock_table,
&sysfs_io_timers_read,
&sysfs_io_timers_write,
@@ -572,8 +575,10 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_trigger_journal_writes,
&sysfs_trigger_btree_cache_shrink,
&sysfs_trigger_btree_key_cache_shrink,
- &sysfs_trigger_freelist_wakeup,
&sysfs_trigger_btree_updates,
+ &sysfs_trigger_freelist_wakeup,
+ &sysfs_trigger_recalc_capacity,
+ &sysfs_trigger_delete_dead_snapshots,
&sysfs_gc_gens_pos,
@@ -626,7 +631,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
* We don't need to take c->writes for correctness, but it eliminates an
* unsightly error message in the dmesg log when we're RO:
*/
- if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
+ if (unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_sysfs)))
return -EROFS;
char *tmp = kstrdup(buf, GFP_KERNEL);
@@ -637,40 +642,34 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
u64 v;
ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL) ?:
- bch2_opt_check_may_set(c, ca, id, v);
+ bch2_opt_hook_pre_set(c, ca, id, v);
kfree(tmp);
if (ret < 0)
goto err;
- bch2_opt_set_sb(c, ca, opt, v);
- bch2_opt_set_by_id(&c->opts, id, v);
-
- if (v &&
- (id == Opt_background_target ||
- (id == Opt_foreground_target && !c->opts.background_target) ||
- id == Opt_background_compression ||
- (id == Opt_compression && !c->opts.background_compression)))
- bch2_set_rebalance_needs_scan(c, 0);
+ bool is_sb = opt->get_sb || opt->get_member;
+ bool changed = false;
- if (v && id == Opt_rebalance_enabled)
- bch2_rebalance_wakeup(c);
-
- if (v && id == Opt_copygc_enabled)
- bch2_copygc_wakeup(c);
+ if (is_sb) {
+ changed = bch2_opt_set_sb(c, ca, opt, v);
+ } else if (!ca) {
+ changed = bch2_opt_get_by_id(&c->opts, id) != v;
+ } else {
+ /* device options that aren't superblock options aren't
+ * supported */
+ BUG();
+ }
- if (id == Opt_discard && !ca) {
- mutex_lock(&c->sb_lock);
- for_each_member_device(c, ca)
- opt->set_member(bch2_members_v2_get_mut(ca->disk_sb.sb, ca->dev_idx), v);
+ if (!ca)
+ bch2_opt_set_by_id(&c->opts, id, v);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
- }
+ if (changed)
+ bch2_opt_hook_post_set(c, ca, 0, &c->opts, id);
ret = size;
err:
- bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_sysfs);
return ret;
}
@@ -821,6 +820,12 @@ SHOW(bch2_dev)
if (opt_id >= 0)
return sysfs_opt_show(c, ca, opt_id, out);
+ if (attr == &sysfs_read_refs)
+ enumerated_ref_to_text(out, &ca->io_ref[READ], bch2_dev_read_refs);
+
+ if (attr == &sysfs_write_refs)
+ enumerated_ref_to_text(out, &ca->io_ref[WRITE], bch2_dev_write_refs);
+
return 0;
}
@@ -876,6 +881,9 @@ struct attribute *bch2_dev_files[] = {
/* debug: */
&sysfs_alloc_debug,
&sysfs_open_buckets,
+
+ &sysfs_read_refs,
+ &sysfs_write_refs,
NULL
};
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 519d00d62ae7..8cb5b40704fd 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -339,6 +339,11 @@ DEFINE_EVENT(bio, io_read_reuse_race,
TP_ARGS(bio)
);
+DEFINE_EVENT(bio, io_read_fail_and_poison,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
/* ec.c */
TRACE_EVENT(stripe_create,
@@ -1122,51 +1127,9 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split,
TP_ARGS(trans, caller_ip, path)
);
-TRACE_EVENT(trans_restart_upgrade,
- TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip,
- struct btree_path *path,
- unsigned old_locks_want,
- unsigned new_locks_want,
- struct get_locks_fail *f),
- TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want, f),
-
- TP_STRUCT__entry(
- __array(char, trans_fn, 32 )
- __field(unsigned long, caller_ip )
- __field(u8, btree_id )
- __field(u8, old_locks_want )
- __field(u8, new_locks_want )
- __field(u8, level )
- __field(u32, path_seq )
- __field(u32, node_seq )
- TRACE_BPOS_entries(pos)
- ),
-
- TP_fast_assign(
- strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
- __entry->caller_ip = caller_ip;
- __entry->btree_id = path->btree_id;
- __entry->old_locks_want = old_locks_want;
- __entry->new_locks_want = new_locks_want;
- __entry->level = f->l;
- __entry->path_seq = path->l[f->l].lock_seq;
- __entry->node_seq = IS_ERR_OR_NULL(f->b) ? 0 : f->b->c.lock.seq;
- TRACE_BPOS_assign(pos, path->pos)
- ),
-
- TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u",
- __entry->trans_fn,
- (void *) __entry->caller_ip,
- bch2_btree_id_str(__entry->btree_id),
- __entry->pos_inode,
- __entry->pos_offset,
- __entry->pos_snapshot,
- __entry->old_locks_want,
- __entry->new_locks_want,
- __entry->level,
- __entry->path_seq,
- __entry->node_seq)
+DEFINE_EVENT(fs_str, trans_restart_upgrade,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
DEFINE_EVENT(trans_str, trans_restart_relock,
@@ -1468,6 +1431,11 @@ DEFINE_EVENT(fs_str, data_update,
TP_ARGS(c, str)
);
+DEFINE_EVENT(fs_str, io_move_created_rebalance,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
TRACE_EVENT(error_downcast,
TP_PROTO(int bch_err, int std_err, unsigned long ip),
TP_ARGS(bch_err, std_err, ip),
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 87af551692f4..dc3817f545fa 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -252,8 +252,18 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v)
bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1);
}
-static void __bch2_print_string_as_lines(const char *prefix, const char *lines,
- bool nonblocking)
+static bool string_is_spaces(const char *str)
+{
+ while (*str) {
+ if (*str != ' ')
+ return false;
+ str++;
+ }
+ return true;
+}
+
+void bch2_print_string_as_lines(const char *prefix, const char *lines,
+ bool nonblocking)
{
bool locked = false;
const char *p;
@@ -272,6 +282,9 @@ static void __bch2_print_string_as_lines(const char *prefix, const char *lines,
while (*lines) {
p = strchrnul(lines, '\n');
+ if (!*p && string_is_spaces(lines))
+ break;
+
printk("%s%.*s\n", prefix, (int) (p - lines), lines);
if (!*p)
break;
@@ -281,16 +294,6 @@ static void __bch2_print_string_as_lines(const char *prefix, const char *lines,
console_unlock();
}
-void bch2_print_string_as_lines(const char *prefix, const char *lines)
-{
- return __bch2_print_string_as_lines(prefix, lines, false);
-}
-
-void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines)
-{
- return __bch2_print_string_as_lines(prefix, lines, true);
-}
-
int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr,
gfp_t gfp)
{
@@ -725,6 +728,16 @@ void bch2_corrupt_bio(struct bio *bio)
}
#endif
+void bch2_bio_to_text(struct printbuf *out, struct bio *bio)
+{
+ prt_printf(out, "bi_remaining:\t%u\n",
+ atomic_read(&bio->__bi_remaining));
+ prt_printf(out, "bi_end_io:\t%ps\n",
+ bio->bi_end_io);
+ prt_printf(out, "bi_status:\t%u\n",
+ bio->bi_status);
+}
+
#if 0
void eytzinger1_test(void)
{
@@ -1003,14 +1016,14 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr)
return ret;
}
-void bch2_darray_str_exit(darray_str *d)
+void bch2_darray_str_exit(darray_const_str *d)
{
darray_for_each(*d, i)
kfree(*i);
darray_exit(d);
}
-int bch2_split_devs(const char *_dev_name, darray_str *ret)
+int bch2_split_devs(const char *_dev_name, darray_const_str *ret)
{
darray_init(ret);
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 3e52c7f8ddd2..25cf61ebd40c 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -14,6 +14,7 @@
#include <linux/log2.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
+#include <linux/random.h>
#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -55,15 +56,16 @@ static inline size_t buf_pages(void *p, size_t len)
PAGE_SIZE);
}
-static inline void *bch2_kvmalloc(size_t n, gfp_t flags)
+static inline void *bch2_kvmalloc_noprof(size_t n, gfp_t flags)
{
void *p = unlikely(n >= INT_MAX)
- ? vmalloc(n)
- : kvmalloc(n, flags & ~__GFP_ZERO);
+ ? vmalloc_noprof(n)
+ : kvmalloc_noprof(n, flags & ~__GFP_ZERO);
if (p && (flags & __GFP_ZERO))
memset(p, 0, n);
return p;
}
+#define bch2_kvmalloc(...) alloc_hooks(bch2_kvmalloc_noprof(__VA_ARGS__))
#define init_heap(heap, _size, gfp) \
({ \
@@ -211,8 +213,7 @@ u64 bch2_read_flag_list(const char *, const char * const[]);
void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned);
void bch2_prt_u64_base2(struct printbuf *, u64);
-void bch2_print_string_as_lines(const char *prefix, const char *lines);
-void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines);
+void bch2_print_string_as_lines(const char *, const char *, bool);
typedef DARRAY(unsigned long) bch_stacktrace;
int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t);
@@ -419,6 +420,8 @@ static inline void bch2_maybe_corrupt_bio(struct bio *bio, unsigned ratio)
#define bch2_maybe_corrupt_bio(...) do {} while (0)
#endif
+void bch2_bio_to_text(struct printbuf *, struct bio *);
+
static inline void memcpy_u64s_small(void *dst, const void *src,
unsigned u64s)
{
@@ -688,8 +691,8 @@ static inline bool qstr_eq(const struct qstr l, const struct qstr r)
return l.len == r.len && !memcmp(l.name, r.name, l.len);
}
-void bch2_darray_str_exit(darray_str *);
-int bch2_split_devs(const char *, darray_str *);
+void bch2_darray_str_exit(darray_const_str *);
+int bch2_split_devs(const char *, darray_const_str *);
#ifdef __KERNEL__
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index 651da52b2cbc..627f153798c6 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -38,7 +38,7 @@ static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k);
return bch2_xattr_hash(info,
- &X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len));
+ &X_SEARCH(x.v->x_type, x.v->x_name_and_value, x.v->x_name_len));
}
static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
@@ -48,7 +48,7 @@ static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
return l.v->x_type != r->type ||
l.v->x_name_len != r->name.len ||
- memcmp(l.v->x_name, r->name.name, r->name.len);
+ memcmp(l.v->x_name_and_value, r->name.name, r->name.len);
}
static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
@@ -58,7 +58,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
return l.v->x_type != r.v->x_type ||
l.v->x_name_len != r.v->x_name_len ||
- memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
+ memcmp(l.v->x_name_and_value, r.v->x_name_and_value, r.v->x_name_len);
}
const struct bch_hash_desc bch2_xattr_hash_desc = {
@@ -96,7 +96,7 @@ int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k,
c, xattr_invalid_type,
"invalid type (%u)", xattr.v->x_type);
- bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len),
+ bkey_fsck_err_on(memchr(xattr.v->x_name_and_value, '\0', xattr.v->x_name_len),
c, xattr_name_invalid_chars,
"xattr name has invalid characters");
fsck_err:
@@ -120,13 +120,13 @@ void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
unsigned name_len = xattr.v->x_name_len;
unsigned val_len = le16_to_cpu(xattr.v->x_val_len);
unsigned max_name_val_bytes = bkey_val_bytes(xattr.k) -
- offsetof(struct bch_xattr, x_name);
+ offsetof(struct bch_xattr, x_name_and_value);
val_len = min_t(int, val_len, max_name_val_bytes - name_len);
name_len = min(name_len, max_name_val_bytes);
prt_printf(out, "%.*s:%.*s",
- name_len, xattr.v->x_name,
+ name_len, xattr.v->x_name_and_value,
val_len, (char *) xattr_val(xattr.v));
if (xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS ||
@@ -176,6 +176,11 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
if (ret)
return ret;
+ /*
+ * Besides the ctime update, extents, dirents and xattrs updates require
+ * that an inode update also happens - to ensure that if a key exists in
+ * one of those btrees with a given snapshot ID an inode is also present
+ */
inode_u->bi_ctime = bch2_current_time(c);
ret = bch2_inode_write(trans, &inode_iter, inode_u);
@@ -202,7 +207,7 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
xattr->v.x_type = type;
xattr->v.x_name_len = namelen;
xattr->v.x_val_len = cpu_to_le16(size);
- memcpy(xattr->v.x_name, name, namelen);
+ memcpy(xattr->v.x_name_and_value, name, namelen);
memcpy(xattr_val(&xattr->v), value, size);
ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
@@ -270,7 +275,7 @@ static int bch2_xattr_emit(struct dentry *dentry,
if (!prefix)
return 0;
- return __bch2_xattr_emit(prefix, xattr->x_name, xattr->x_name_len, buf);
+ return __bch2_xattr_emit(prefix, xattr->x_name_and_value, xattr->x_name_len, buf);
}
static int bch2_xattr_list_bcachefs(struct bch_fs *c,
@@ -473,6 +478,12 @@ static int inode_opt_set_fn(struct btree_trans *trans,
{
struct inode_opt_set *s = p;
+ if (s->id == Inode_opt_casefold) {
+ int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->v);
+ if (ret)
+ return ret;
+ }
+
if (s->defined)
bi->bi_fields_set |= 1U << s->id;
else
@@ -523,7 +534,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
if (ret < 0)
goto err_class_exit;
- ret = bch2_opt_check_may_set(c, NULL, opt_id, v);
+ ret = bch2_opt_hook_pre_set(c, NULL, opt_id, v);
if (ret < 0)
goto err_class_exit;
diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h
index 132fbbd15a66..1139bf345f70 100644
--- a/fs/bcachefs/xattr.h
+++ b/fs/bcachefs/xattr.h
@@ -18,12 +18,12 @@ void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
{
- return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
+ return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name_and_value) +
name_len + val_len, sizeof(u64));
}
#define xattr_val(_xattr) \
- ((void *) (_xattr)->x_name + (_xattr)->x_name_len)
+ ((void *) (_xattr)->x_name_and_value + (_xattr)->x_name_len)
struct xattr_search_key {
u8 type;
diff --git a/fs/bcachefs/xattr_format.h b/fs/bcachefs/xattr_format.h
index 67426e33d04e..4121b78d9a92 100644
--- a/fs/bcachefs/xattr_format.h
+++ b/fs/bcachefs/xattr_format.h
@@ -16,10 +16,10 @@ struct bch_xattr {
/*
* x_name contains the name and value counted by
* x_name_len + x_val_len. The introduction of
- * __counted_by(x_name_len) caused a false positive
+ * __counted_by(x_name_len) previously caused a false positive
* detection of an out of bounds write.
*/
- __u8 x_name[];
+ __u8 x_name_and_value[];
} __packed __aligned(8);
#endif /* _BCACHEFS_XATTR_FORMAT_H */
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index db81570c9637..1d41ce477df5 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -17,6 +17,7 @@
#include <linux/writeback.h>
#include <linux/uio.h>
#include <linux/uaccess.h>
+#include <linux/fs_context.h>
#include "bfs.h"
MODULE_AUTHOR("Tigran Aivazian <aivazian.tigran@gmail.com>");
@@ -305,7 +306,7 @@ void bfs_dump_imap(const char *prefix, struct super_block *s)
#endif
}
-static int bfs_fill_super(struct super_block *s, void *data, int silent)
+static int bfs_fill_super(struct super_block *s, struct fs_context *fc)
{
struct buffer_head *bh, *sbh;
struct bfs_super_block *bfs_sb;
@@ -314,6 +315,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
struct bfs_sb_info *info;
int ret = -EINVAL;
unsigned long i_sblock, i_eblock, i_eoff, s_size;
+ int silent = fc->sb_flags & SB_SILENT;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
@@ -446,18 +448,28 @@ out:
return ret;
}
-static struct dentry *bfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int bfs_get_tree(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, bfs_fill_super);
+ return get_tree_bdev(fc, bfs_fill_super);
+}
+
+static const struct fs_context_operations bfs_context_ops = {
+ .get_tree = bfs_get_tree,
+};
+
+static int bfs_init_fs_context(struct fs_context *fc)
+{
+ fc->ops = &bfs_context_ops;
+
+ return 0;
}
static struct file_system_type bfs_fs_type = {
- .owner = THIS_MODULE,
- .name = "bfs",
- .mount = bfs_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .owner = THIS_MODULE,
+ .name = "bfs",
+ .init_fs_context = bfs_init_fs_context,
+ .kill_sb = kill_block_super,
+ .fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("bfs");
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 584fa89bc877..a43363d593e5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -68,12 +68,6 @@
static int load_elf_binary(struct linux_binprm *bprm);
-#ifdef CONFIG_USELIB
-static int load_elf_library(struct file *);
-#else
-#define load_elf_library NULL
-#endif
-
/*
* If we don't support core dumping, then supply a NULL so we
* don't even try.
@@ -101,7 +95,6 @@ static int elf_core_dump(struct coredump_params *cprm);
static struct linux_binfmt elf_format = {
.module = THIS_MODULE,
.load_binary = load_elf_binary,
- .load_shlib = load_elf_library,
#ifdef CONFIG_COREDUMP
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
@@ -830,6 +823,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
struct elf_phdr *elf_property_phdata = NULL;
unsigned long elf_brk;
+ bool brk_moved = false;
int retval, i;
unsigned long elf_entry;
unsigned long e_entry;
@@ -1097,15 +1091,19 @@ out_free_interp:
/* Calculate any requested alignment. */
alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
- /*
- * There are effectively two types of ET_DYN
- * binaries: programs (i.e. PIE: ET_DYN with PT_INTERP)
- * and loaders (ET_DYN without PT_INTERP, since they
- * _are_ the ELF interpreter). The loaders must
- * be loaded away from programs since the program
- * may otherwise collide with the loader (especially
- * for ET_EXEC which does not have a randomized
- * position). For example to handle invocations of
+ /**
+ * DOC: PIE handling
+ *
+ * There are effectively two types of ET_DYN ELF
+ * binaries: programs (i.e. PIE: ET_DYN with
+ * PT_INTERP) and loaders (i.e. static PIE: ET_DYN
+ * without PT_INTERP, usually the ELF interpreter
+ * itself). Loaders must be loaded away from programs
+ * since the program may otherwise collide with the
+ * loader (especially for ET_EXEC which does not have
+ * a randomized position).
+ *
+ * For example, to handle invocations of
* "./ld.so someprog" to test out a new version of
* the loader, the subsequent program that the
* loader loads must avoid the loader itself, so
@@ -1118,6 +1116,9 @@ out_free_interp:
* ELF_ET_DYN_BASE and loaders are loaded into the
* independently randomized mmap region (0 load_bias
* without MAP_FIXED nor MAP_FIXED_NOREPLACE).
+ *
+ * See below for "brk" handling details, which is
+ * also affected by program vs loader and ASLR.
*/
if (interpreter) {
/* On ET_DYN with PT_INTERP, we do the ASLR. */
@@ -1234,8 +1235,6 @@ out_free_interp:
start_data += load_bias;
end_data += load_bias;
- current->mm->start_brk = current->mm->brk = ELF_PAGEALIGN(elf_brk);
-
if (interpreter) {
elf_entry = load_elf_interp(interp_elf_ex,
interpreter,
@@ -1291,27 +1290,44 @@ out_free_interp:
mm->end_data = end_data;
mm->start_stack = bprm->p;
- if ((current->flags & PF_RANDOMIZE) && (snapshot_randomize_va_space > 1)) {
+ /**
+ * DOC: "brk" handling
+ *
+ * For architectures with ELF randomization, when executing a
+ * loader directly (i.e. static PIE: ET_DYN without PT_INTERP),
+ * move the brk area out of the mmap region and into the unused
+ * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide
+ * early with the stack growing down or other regions being put
+ * into the mmap region by the kernel (e.g. vdso).
+ *
+ * In the CONFIG_COMPAT_BRK case, though, everything is turned
+ * off because we're not allowed to move the brk at all.
+ */
+ if (!IS_ENABLED(CONFIG_COMPAT_BRK) &&
+ IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
+ elf_ex->e_type == ET_DYN && !interpreter) {
+ elf_brk = ELF_ET_DYN_BASE;
+ /* This counts as moving the brk, so let brk(2) know. */
+ brk_moved = true;
+ }
+ mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk);
+
+ if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) {
/*
- * For architectures with ELF randomization, when executing
- * a loader directly (i.e. no interpreter listed in ELF
- * headers), move the brk area out of the mmap region
- * (since it grows up, and may collide early with the stack
- * growing down), and into the unused ELF_ET_DYN_BASE region.
+ * If we didn't move the brk to ELF_ET_DYN_BASE (above),
+ * leave a gap between .bss and brk.
*/
- if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
- elf_ex->e_type == ET_DYN && !interpreter) {
- mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
- } else {
- /* Otherwise leave a gap between .bss and brk. */
+ if (!brk_moved)
mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
- }
mm->brk = mm->start_brk = arch_randomize_brk(mm);
+ brk_moved = true;
+ }
+
#ifdef compat_brk_randomized
+ if (brk_moved)
current->brk_randomized = 1;
#endif
- }
if (current->personality & MMAP_PAGE_ZERO) {
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
@@ -1361,75 +1377,6 @@ out_free_ph:
goto out;
}
-#ifdef CONFIG_USELIB
-/* This is really simpleminded and specialized - we are loading an
- a.out library that is given an ELF header. */
-static int load_elf_library(struct file *file)
-{
- struct elf_phdr *elf_phdata;
- struct elf_phdr *eppnt;
- int retval, error, i, j;
- struct elfhdr elf_ex;
-
- error = -ENOEXEC;
- retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
- if (retval < 0)
- goto out;
-
- if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
- goto out;
-
- /* First of all, some simple consistency checks */
- if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
- !elf_check_arch(&elf_ex) || !file->f_op->mmap)
- goto out;
- if (elf_check_fdpic(&elf_ex))
- goto out;
-
- /* Now read in all of the header information */
-
- j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
- /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
-
- error = -ENOMEM;
- elf_phdata = kmalloc(j, GFP_KERNEL);
- if (!elf_phdata)
- goto out;
-
- eppnt = elf_phdata;
- error = -ENOEXEC;
- retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
- if (retval < 0)
- goto out_free_ph;
-
- for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
- if ((eppnt + i)->p_type == PT_LOAD)
- j++;
- if (j != 1)
- goto out_free_ph;
-
- while (eppnt->p_type != PT_LOAD)
- eppnt++;
-
- /* Now use mmap to map the library into memory. */
- error = elf_load(file, ELF_PAGESTART(eppnt->p_vaddr),
- eppnt,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED_NOREPLACE | MAP_PRIVATE,
- 0);
-
- if (error != ELF_PAGESTART(eppnt->p_vaddr))
- goto out_free_ph;
-
- error = 0;
-
-out_free_ph:
- kfree(elf_phdata);
-out:
- return error;
-}
-#endif /* #ifdef CONFIG_USELIB */
-
#ifdef CONFIG_ELF_CORE
/*
* ELF core dumper
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 5a7ebd160724..432fbf4fc334 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -842,7 +842,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
}
inode_lock(d_inode(root));
- dentry = lookup_one_len(e->name, root, strlen(e->name));
+ dentry = lookup_noperm(&QSTR(e->name), root);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 73a2dfb854c5..c352f3ae0385 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -52,10 +52,10 @@ config BTRFS_FS_RUN_SANITY_TESTS
bool "Btrfs will run sanity tests upon loading"
depends on BTRFS_FS
help
- This will run some basic sanity tests on the free space cache
- code to make sure it is acting as it should. These are mostly
- regression tests and are only really interesting to btrfs
- developers.
+ This will run sanity tests for core functionality like free space,
+ extent maps, extent io, extent buffers, inodes, qgroups and others,
+ at module load time. These are mostly regression tests and are only
+ interesting to developers.
If unsure, say N.
@@ -63,9 +63,12 @@ config BTRFS_DEBUG
bool "Btrfs debugging support"
depends on BTRFS_FS
help
- Enable run-time debugging support for the btrfs filesystem. This may
- enable additional and expensive checks with negative impact on
- performance, or export extra information via sysfs.
+ Enable run-time debugging support for the btrfs filesystem.
+
+ Additional potentially expensive checks, debugging functionality or
+ sysfs exported information is enabled, like leak checks of internal
+ objects, optional forced space fragmentation and /sys/fs/btrfs/debug .
+ This has negative impact on performance.
If unsure, say N.
@@ -73,8 +76,10 @@ config BTRFS_ASSERT
bool "Btrfs assert support"
depends on BTRFS_FS
help
- Enable run-time assertion checking. This will result in panics if
- any of the assertions trip. This is meant for btrfs developers only.
+ Enable run-time assertion checking. Additional safety checks are
+ done, simple enough not to affect performance but verify invariants
+ and assumptions of code to run properly. This may result in panics,
+ and is meant for developers but can be enabled in general.
If unsure, say N.
@@ -89,7 +94,14 @@ config BTRFS_EXPERIMENTAL
Current list:
- - extent map shrinker - performance problems with too frequent shrinks
+ - COW fixup worker warning - last warning before removing the
+ functionality catching out-of-band page
+ dirtying, not necessary since 5.8
+
+ - RAID mirror read policy - additional read policies for balancing
+ reading from redundant block group
+ profiles (currently: pid, round-robin,
+ fixed devid)
- send stream protocol v3 - fs-verity support
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index f3bffe08b290..6c6f3bb58f4e 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -219,8 +219,7 @@ static void run_ordered_work(struct btrfs_workqueue *wq,
spin_lock_irqsave(lock, flags);
if (list_empty(list))
break;
- work = list_entry(list->next, struct btrfs_work,
- ordered_list);
+ work = list_first_entry(list, struct btrfs_work, ordered_list);
if (!test_bit(WORK_DONE_BIT, &work->flags))
break;
/*
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 5936cff80ff3..ed497f5f8d1b 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -2877,7 +2877,7 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
goto release;
}
if (path->slots[0] == 0) {
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ DEBUG_WARN();
ret = -EUCLEAN;
goto release;
}
@@ -3134,8 +3134,8 @@ void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
return;
while (!list_empty(&node->upper)) {
- edge = list_entry(node->upper.next, struct btrfs_backref_edge,
- list[LOWER]);
+ edge = list_first_entry(&node->upper, struct btrfs_backref_edge,
+ list[LOWER]);
list_del(&edge->list[LOWER]);
list_del(&edge->list[UPPER]);
btrfs_backref_free_edge(cache, edge);
@@ -3473,8 +3473,8 @@ int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans,
* type BTRFS_TREE_BLOCK_REF_KEY
*/
ASSERT(list_is_singular(&cur->upper));
- edge = list_entry(cur->upper.next, struct btrfs_backref_edge,
- list[LOWER]);
+ edge = list_first_entry(&cur->upper, struct btrfs_backref_edge,
+ list[LOWER]);
ASSERT(list_empty(&edge->list[UPPER]));
exist = edge->node[UPPER];
/*
@@ -3617,7 +3617,7 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
/* Sanity check, we shouldn't have any unchecked nodes */
if (!upper->checked) {
- ASSERT(0);
+ DEBUG_WARN("we should not have any unchecked nodes");
return -EUCLEAN;
}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 74e614031274..953637115956 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -423,8 +423,8 @@ struct btrfs_backref_node *btrfs_backref_alloc_node(
struct btrfs_backref_edge *btrfs_backref_alloc_edge(
struct btrfs_backref_cache *cache);
-#define LINK_LOWER (1 << 0)
-#define LINK_UPPER (1 << 1)
+#define LINK_LOWER (1U << 0)
+#define LINK_UPPER (1U << 1)
void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
struct btrfs_backref_node *lower,
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 8c2eee1f1878..f7d8958b7327 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -192,7 +192,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
btrfs_repair_io_failure(fs_info, btrfs_ino(inode),
repair_bbio->file_offset, fs_info->sectorsize,
repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
- page_folio(bv->bv_page), bv->bv_offset, mirror);
+ bvec_phys(bv), mirror);
} while (mirror != fbio->bbio->mirror_num);
done:
@@ -512,7 +512,7 @@ static void btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
}
}
-static blk_status_t btrfs_bio_csum(struct btrfs_bio *bbio)
+static int btrfs_bio_csum(struct btrfs_bio *bbio)
{
if (bbio->bio.bi_opf & REQ_META)
return btree_csum_one_bio(bbio);
@@ -543,11 +543,11 @@ static void run_one_async_start(struct btrfs_work *work)
{
struct async_submit_bio *async =
container_of(work, struct async_submit_bio, work);
- blk_status_t ret;
+ int ret;
ret = btrfs_bio_csum(async->bbio);
if (ret)
- async->bbio->bio.bi_status = ret;
+ async->bbio->bio.bi_status = errno_to_blk_status(ret);
}
/*
@@ -674,8 +674,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
bool use_append = btrfs_use_zone_append(bbio);
struct btrfs_io_context *bioc = NULL;
struct btrfs_io_stripe smap;
- blk_status_t ret;
- int error;
+ blk_status_t status;
+ int ret;
if (!bbio->inode || btrfs_is_data_reloc_root(inode->root))
smap.rst_search_commit_root = true;
@@ -683,10 +683,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
smap.rst_search_commit_root = false;
btrfs_bio_counter_inc_blocked(fs_info);
- error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
- &bioc, &smap, &mirror_num);
- if (error) {
- ret = errno_to_blk_status(error);
+ ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
+ &bioc, &smap, &mirror_num);
+ if (ret) {
+ status = errno_to_blk_status(ret);
btrfs_bio_counter_dec(fs_info);
goto end_bbio;
}
@@ -700,7 +700,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
split = btrfs_split_bio(fs_info, bbio, map_length);
if (IS_ERR(split)) {
- ret = errno_to_blk_status(PTR_ERR(split));
+ status = errno_to_blk_status(PTR_ERR(split));
btrfs_bio_counter_dec(fs_info);
goto end_bbio;
}
@@ -715,7 +715,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio)) {
bbio->saved_iter = bio->bi_iter;
ret = btrfs_lookup_bio_sums(bbio);
- if (ret)
+ status = errno_to_blk_status(ret);
+ if (status)
goto fail;
}
@@ -748,13 +749,15 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
goto done;
ret = btrfs_bio_csum(bbio);
- if (ret)
+ status = errno_to_blk_status(ret);
+ if (status)
goto fail;
} else if (use_append ||
(btrfs_is_zoned(fs_info) && inode &&
inode->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_alloc_dummy_sum(bbio);
- if (ret)
+ status = errno_to_blk_status(ret);
+ if (status)
goto fail;
}
}
@@ -775,10 +778,10 @@ fail:
ASSERT(bbio->bio.bi_pool == &btrfs_clone_bioset);
ASSERT(remaining);
- btrfs_bio_end_io(remaining, ret);
+ btrfs_bio_end_io(remaining, status);
}
end_bbio:
- btrfs_bio_end_io(bbio, ret);
+ btrfs_bio_end_io(bbio, status);
/* Do not submit another chunk */
return true;
}
@@ -803,8 +806,7 @@ void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num)
* freeing the bio.
*/
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
- u64 length, u64 logical, struct folio *folio,
- unsigned int folio_offset, int mirror_num)
+ u64 length, u64 logical, phys_addr_t paddr, int mirror_num)
{
struct btrfs_io_stripe smap = { 0 };
struct bio_vec bvec;
@@ -835,8 +837,7 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
- ret = bio_add_folio(&bio, folio, length, folio_offset);
- ASSERT(ret);
+ __bio_add_page(&bio, phys_to_page(paddr), length, offset_in_page(paddr));
ret = submit_bio_wait(&bio);
if (ret) {
/* try to remap that extent elsewhere? */
@@ -900,22 +901,18 @@ int __init btrfs_bioset_init(void)
return -ENOMEM;
if (bioset_init(&btrfs_clone_bioset, BIO_POOL_SIZE,
offsetof(struct btrfs_bio, bio), 0))
- goto out_free_bioset;
+ goto out;
if (bioset_init(&btrfs_repair_bioset, BIO_POOL_SIZE,
offsetof(struct btrfs_bio, bio),
BIOSET_NEED_BVECS))
- goto out_free_clone_bioset;
+ goto out;
if (mempool_init_kmalloc_pool(&btrfs_failed_bio_pool, BIO_POOL_SIZE,
sizeof(struct btrfs_failed_bio)))
- goto out_free_repair_bioset;
+ goto out;
return 0;
-out_free_repair_bioset:
- bioset_exit(&btrfs_repair_bioset);
-out_free_clone_bioset:
- bioset_exit(&btrfs_clone_bioset);
-out_free_bioset:
- bioset_exit(&btrfs_bioset);
+out:
+ btrfs_bioset_exit();
return -ENOMEM;
}
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index e2fe16074ad6..dc2eb43b7097 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -110,7 +110,6 @@ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num);
void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace);
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
- u64 length, u64 logical, struct folio *folio,
- unsigned int folio_offset, int mirror_num);
+ u64 length, u64 logical, phys_addr_t paddr, int mirror_num);
#endif
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index a8129f1ce78c..5b0cb04b2b93 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -525,10 +525,9 @@ int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start,
*total_added_ret = 0;
while (start < end) {
- if (!find_first_extent_bit(&info->excluded_extents, start,
- &extent_start, &extent_end,
- EXTENT_DIRTY | EXTENT_UPTODATE,
- NULL))
+ if (!btrfs_find_first_extent_bit(&info->excluded_extents, start,
+ &extent_start, &extent_end,
+ EXTENT_DIRTY, NULL))
break;
if (extent_start <= start) {
@@ -701,7 +700,7 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
struct btrfs_block_group *block_group = caching_ctl->block_group;
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *extent_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_key key;
u64 total_found = 0;
@@ -828,14 +827,13 @@ next:
block_group->start + block_group->length,
NULL);
out:
- btrfs_free_path(path);
return ret;
}
static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg)
{
- clear_extent_bits(&bg->fs_info->excluded_extents, bg->start,
- bg->start + bg->length - 1, EXTENT_UPTODATE);
+ btrfs_clear_extent_bits(&bg->fs_info->excluded_extents, bg->start,
+ bg->start + bg->length - 1, EXTENT_DIRTY);
}
static noinline void caching_thread(struct btrfs_work *work)
@@ -1420,9 +1418,8 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
int ret;
spin_lock(&fs_info->trans_lock);
- if (trans->transaction->list.prev != &fs_info->trans_list) {
- prev_trans = list_last_entry(&trans->transaction->list,
- struct btrfs_transaction, list);
+ if (!list_is_first(&trans->transaction->list, &fs_info->trans_list)) {
+ prev_trans = list_prev_entry(trans->transaction, list);
refcount_inc(&prev_trans->use_count);
}
spin_unlock(&fs_info->trans_lock);
@@ -1439,14 +1436,14 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
if (prev_trans) {
- ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
- EXTENT_DIRTY);
+ ret = btrfs_clear_extent_bits(&prev_trans->pinned_extents, start, end,
+ EXTENT_DIRTY);
if (ret)
goto out;
}
- ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
- EXTENT_DIRTY);
+ ret = btrfs_clear_extent_bits(&trans->transaction->pinned_extents, start, end,
+ EXTENT_DIRTY);
out:
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
if (prev_trans)
@@ -2218,9 +2215,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
cache->bytes_super += stripe_len;
- ret = set_extent_bit(&fs_info->excluded_extents, cache->start,
- cache->start + stripe_len - 1,
- EXTENT_UPTODATE, NULL);
+ ret = btrfs_set_extent_bit(&fs_info->excluded_extents, cache->start,
+ cache->start + stripe_len - 1,
+ EXTENT_DIRTY, NULL);
if (ret)
return ret;
}
@@ -2246,9 +2243,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
cache->start + cache->length - logical[nr]);
cache->bytes_super += len;
- ret = set_extent_bit(&fs_info->excluded_extents, logical[nr],
- logical[nr] + len - 1,
- EXTENT_UPTODATE, NULL);
+ ret = btrfs_set_extent_bit(&fs_info->excluded_extents,
+ logical[nr], logical[nr] + len - 1,
+ EXTENT_DIRTY, NULL);
if (ret) {
kfree(logical);
return ret;
@@ -2373,6 +2370,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
cache->commit_used = cache->used;
cache->flags = btrfs_stack_block_group_flags(bgi);
cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
+ cache->space_info = btrfs_find_space_info(info, cache->flags);
set_free_space_tree_thresholds(cache);
@@ -2451,6 +2449,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
btrfs_remove_free_space_cache(cache);
goto error;
}
+
trace_btrfs_add_block_group(info, cache, 0);
btrfs_add_bg_to_space_info(info, cache);
@@ -2495,6 +2494,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
bg->cached = BTRFS_CACHE_FINISHED;
bg->used = map->chunk_len;
bg->flags = map->type;
+ bg->space_info = btrfs_find_space_info(fs_info, bg->flags);
ret = btrfs_add_block_group_cache(bg);
/*
* We may have some valid block group cache added already, in
@@ -2868,8 +2868,8 @@ static u64 calculate_global_root_id(const struct btrfs_fs_info *fs_info, u64 off
}
struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
- u64 type,
- u64 chunk_offset, u64 size)
+ struct btrfs_space_info *space_info,
+ u64 type, u64 chunk_offset, u64 size)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group *cache;
@@ -2923,7 +2923,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
* assigned to our block group. We want our bg to be added to the rbtree
* with its ->space_info set.
*/
- cache->space_info = btrfs_find_space_info(fs_info, cache->flags);
+ cache->space_info = space_info;
ASSERT(cache->space_info);
ret = btrfs_add_block_group_cache(cache);
@@ -2968,6 +2968,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
bool do_chunk_alloc)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
+ struct btrfs_space_info *space_info = cache->space_info;
struct btrfs_trans_handle *trans;
struct btrfs_root *root = btrfs_block_group_root(fs_info);
u64 alloc_flags;
@@ -3020,7 +3021,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
*/
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
if (alloc_flags != cache->flags) {
- ret = btrfs_chunk_alloc(trans, alloc_flags,
+ ret = btrfs_chunk_alloc(trans, space_info, alloc_flags,
CHUNK_ALLOC_FORCE);
/*
* ENOSPC is allowed here, we may have enough space
@@ -3048,15 +3049,15 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
(cache->flags & BTRFS_BLOCK_GROUP_SYSTEM))
goto unlock_out;
- alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
- ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+ alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags);
+ ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
/*
* We have allocated a new chunk. We also need to activate that chunk to
* grant metadata tickets for zoned filesystem.
*/
- ret = btrfs_zoned_activate_one_bg(fs_info, cache->space_info, true);
+ ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true);
if (ret < 0)
goto out;
@@ -3738,8 +3739,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
- set_extent_bit(&trans->transaction->pinned_extents, bytenr,
- bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
+ btrfs_set_extent_bit(&trans->transaction->pinned_extents, bytenr,
+ bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
}
spin_lock(&trans->transaction->dirty_bgs_lock);
@@ -3828,17 +3829,17 @@ out:
/*
* Update the block_group and space info counters.
*
- * @cache: The cache we are manipulating
- * @num_bytes: The number of bytes in question
- * @delalloc: The blocks are allocated for the delalloc write
+ * @cache: The cache we are manipulating.
+ * @num_bytes: The number of bytes in question.
+ * @is_delalloc: Whether the blocks are allocated for a delalloc write.
*
* This is called by somebody who is freeing space that was never actually used
* on disk. For example if you reserve some space for a new leaf in transaction
* A and before transaction A commits you free that leaf, you call this with
* reserve set to 0 in order to clear the reservation.
*/
-void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
- u64 num_bytes, int delalloc)
+void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes,
+ bool is_delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
@@ -3852,7 +3853,7 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
space_info->bytes_reserved -= num_bytes;
space_info->max_extent_size = 0;
- if (delalloc)
+ if (is_delalloc)
cache->delalloc_bytes -= num_bytes;
spin_unlock(&cache->lock);
@@ -3871,14 +3872,14 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
}
}
-static int should_alloc_chunk(const struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *sinfo, int force)
+static bool should_alloc_chunk(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo, int force)
{
u64 bytes_used = btrfs_space_info_used(sinfo, false);
u64 thresh;
if (force == CHUNK_ALLOC_FORCE)
- return 1;
+ return true;
/*
* in limited mode, we want to have some free space up to
@@ -3889,22 +3890,31 @@ static int should_alloc_chunk(const struct btrfs_fs_info *fs_info,
thresh = max_t(u64, SZ_64M, mult_perc(thresh, 1));
if (sinfo->total_bytes - bytes_used < thresh)
- return 1;
+ return true;
}
if (bytes_used + SZ_2M < mult_perc(sinfo->total_bytes, 80))
- return 0;
- return 1;
+ return false;
+ return true;
}
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
{
u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type);
+ struct btrfs_space_info *space_info;
- return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+ space_info = btrfs_find_space_info(trans->fs_info, type);
+ if (!space_info) {
+ DEBUG_WARN();
+ return -EINVAL;
+ }
+
+ return btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE);
}
-static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
+static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_space_info *space_info,
+ u64 flags)
{
struct btrfs_block_group *bg;
int ret;
@@ -3917,7 +3927,7 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans
*/
check_system_chunk(trans, flags);
- bg = btrfs_create_chunk(trans, flags);
+ bg = btrfs_create_chunk(trans, space_info, flags);
if (IS_ERR(bg)) {
ret = PTR_ERR(bg);
goto out;
@@ -3965,8 +3975,16 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans
if (ret == -ENOSPC) {
const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info);
struct btrfs_block_group *sys_bg;
+ struct btrfs_space_info *sys_space_info;
+
+ sys_space_info = btrfs_find_space_info(trans->fs_info, sys_flags);
+ if (!sys_space_info) {
+ ret = -EINVAL;
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
- sys_bg = btrfs_create_chunk(trans, sys_flags);
+ sys_bg = btrfs_create_chunk(trans, sys_space_info, sys_flags);
if (IS_ERR(sys_bg)) {
ret = PTR_ERR(sys_bg);
btrfs_abort_transaction(trans, ret);
@@ -4097,6 +4115,8 @@ out:
*
* This function, btrfs_chunk_alloc(), belongs to phase 1.
*
+ * @space_info: specify which space_info the new chunk should belong to.
+ *
* If @force is CHUNK_ALLOC_FORCE:
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
@@ -4105,11 +4125,11 @@ out:
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
*/
-int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+int btrfs_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_space_info *space_info, u64 flags,
enum btrfs_chunk_alloc_enum force)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_space_info *space_info;
struct btrfs_block_group *ret_bg;
bool wait_for_alloc = false;
bool should_alloc = false;
@@ -4148,9 +4168,6 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
return -ENOSPC;
- space_info = btrfs_find_space_info(fs_info, flags);
- ASSERT(space_info);
-
do {
spin_lock(&space_info->lock);
if (force < space_info->force_alloc)
@@ -4211,7 +4228,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
force_metadata_allocation(fs_info);
}
- ret_bg = do_chunk_alloc(trans, flags);
+ ret_bg = do_chunk_alloc(trans, space_info, flags);
trans->allocating_chunk = false;
if (IS_ERR(ret_bg)) {
@@ -4287,6 +4304,10 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
if (left < bytes) {
u64 flags = btrfs_system_alloc_profile(fs_info);
struct btrfs_block_group *bg;
+ struct btrfs_space_info *space_info;
+
+ space_info = btrfs_find_space_info(fs_info, flags);
+ ASSERT(space_info);
/*
* Ignore failure to create system chunk. We might end up not
@@ -4294,7 +4315,7 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
* the paths we visit in the chunk tree (they were already COWed
* or created in the current transaction for example).
*/
- bg = btrfs_create_chunk(trans, flags);
+ bg = btrfs_create_chunk(trans, space_info, flags);
if (IS_ERR(bg)) {
ret = PTR_ERR(bg);
} else {
@@ -4402,6 +4423,43 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
}
}
+static void check_removing_space_info(struct btrfs_space_info *space_info)
+{
+ struct btrfs_fs_info *info = space_info->fs_info;
+
+ if (space_info->subgroup_id == BTRFS_SUB_GROUP_PRIMARY) {
+ /* This is a top space_info, proceed with its children first. */
+ for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++) {
+ if (space_info->sub_group[i]) {
+ check_removing_space_info(space_info->sub_group[i]);
+ kfree(space_info->sub_group[i]);
+ space_info->sub_group[i] = NULL;
+ }
+ }
+ }
+
+ /*
+ * Do not hide this behind enospc_debug, this is actually important and
+ * indicates a real bug if this happens.
+ */
+ if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_may_use > 0))
+ btrfs_dump_space_info(info, space_info, 0, 0);
+
+ /*
+ * If there was a failure to cleanup a log tree, very likely due to an
+ * IO failure on a writeback attempt of one or more of its extent
+ * buffers, we could not do proper (and cheap) unaccounting of their
+ * reserved space, so don't warn on bytes_reserved > 0 in that case.
+ */
+ if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+ !BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
+ if (WARN_ON(space_info->bytes_reserved > 0))
+ btrfs_dump_space_info(info, space_info, 0, 0);
+ }
+
+ WARN_ON(space_info->reclaim_size > 0);
+}
+
/*
* Must be called only after stopping all workers, since we could have block
* group caching kthreads running, and therefore they could race with us if we
@@ -4427,8 +4485,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
write_lock(&info->block_group_cache_lock);
while (!list_empty(&info->caching_block_groups)) {
- caching_ctl = list_entry(info->caching_block_groups.next,
- struct btrfs_caching_control, list);
+ caching_ctl = list_first_entry(&info->caching_block_groups,
+ struct btrfs_caching_control, list);
list_del(&caching_ctl->list);
btrfs_put_caching_control(caching_ctl);
}
@@ -4499,32 +4557,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
btrfs_release_global_block_rsv(info);
while (!list_empty(&info->space_info)) {
- space_info = list_entry(info->space_info.next,
- struct btrfs_space_info,
- list);
-
- /*
- * Do not hide this behind enospc_debug, this is actually
- * important and indicates a real bug if this happens.
- */
- if (WARN_ON(space_info->bytes_pinned > 0 ||
- space_info->bytes_may_use > 0))
- btrfs_dump_space_info(info, space_info, 0, 0);
-
- /*
- * If there was a failure to cleanup a log tree, very likely due
- * to an IO failure on a writeback attempt of one or more of its
- * extent buffers, we could not do proper (and cheap) unaccounting
- * of their reserved space, so don't warn on bytes_reserved > 0 in
- * that case.
- */
- if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
- !BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
- if (WARN_ON(space_info->bytes_reserved > 0))
- btrfs_dump_space_info(info, space_info, 0, 0);
- }
+ space_info = list_first_entry(&info->space_info,
+ struct btrfs_space_info, list);
- WARN_ON(space_info->reclaim_size > 0);
+ check_removing_space_info(space_info);
list_del(&space_info->list);
btrfs_sysfs_remove_space_info(space_info);
}
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 36937eeab9b8..9de356bcb411 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -326,8 +326,8 @@ void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info);
void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg);
int btrfs_read_block_groups(struct btrfs_fs_info *info);
struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
- u64 type,
- u64 chunk_offset, u64 size);
+ struct btrfs_space_info *space_info,
+ u64 type, u64 chunk_offset, u64 size);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
bool do_chunk_alloc);
@@ -340,9 +340,10 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
u64 ram_bytes, u64 num_bytes, int delalloc,
bool force_wrong_size_class);
-void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
- u64 num_bytes, int delalloc);
-int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes,
+ bool is_delalloc);
+int btrfs_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_space_info *space_info, u64 flags,
enum btrfs_chunk_alloc_enum force);
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index 3f3608299c0b..5ad6de738aee 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -418,6 +418,9 @@ void btrfs_init_root_block_rsv(struct btrfs_root *root)
case BTRFS_CHUNK_TREE_OBJECTID:
root->block_rsv = &fs_info->chunk_block_rsv;
break;
+ case BTRFS_TREE_LOG_OBJECTID:
+ root->block_rsv = &fs_info->treelog_rsv;
+ break;
default:
root->block_rsv = NULL;
break;
@@ -438,6 +441,14 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
fs_info->delayed_block_rsv.space_info = space_info;
fs_info->delayed_refs_rsv.space_info = space_info;
+ /* The treelog_rsv uses a dedicated space_info on the zoned mode. */
+ if (!btrfs_is_zoned(fs_info)) {
+ fs_info->treelog_rsv.space_info = space_info;
+ } else {
+ ASSERT(space_info->sub_group[0]->subgroup_id == BTRFS_SUB_GROUP_TREELOG);
+ fs_info->treelog_rsv.space_info = space_info->sub_group[0];
+ }
+
btrfs_update_global_block_rsv(fs_info);
}
diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h
index d12b1fac5c74..79ae9d05cd91 100644
--- a/fs/btrfs/block-rsv.h
+++ b/fs/btrfs/block-rsv.h
@@ -24,6 +24,7 @@ enum btrfs_rsv_type {
BTRFS_BLOCK_RSV_CHUNK,
BTRFS_BLOCK_RSV_DELOPS,
BTRFS_BLOCK_RSV_DELREFS,
+ BTRFS_BLOCK_RSV_TREELOG,
BTRFS_BLOCK_RSV_EMPTY,
BTRFS_BLOCK_RSV_TEMP,
};
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4e2952cf5766..a79fa0726f1d 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -529,8 +529,8 @@ static inline void btrfs_update_inode_mapping_flags(struct btrfs_inode *inode)
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
-int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
- u32 pgoff, u8 *csum, const u8 * const csum_expected);
+int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum,
+ const u8 * const csum_expected);
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
u32 bio_offset, struct bio_vec *bv);
noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len,
@@ -547,8 +547,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
const struct fscrypt_str *name, int add_backref, u64 index);
int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry);
-int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
- int front);
+int btrfs_truncate_block(struct btrfs_inode *inode, u64 offset, u64 start, u64 end);
int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 7f11ef559be6..48d07939fee4 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -285,12 +285,12 @@ static noinline void end_compressed_writeback(const struct compressed_bio *cb)
unsigned long index = cb->start >> PAGE_SHIFT;
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct folio_batch fbatch;
- const int error = blk_status_to_errno(cb->bbio.bio.bi_status);
int i;
int ret;
- if (error)
- mapping_set_error(inode->i_mapping, error);
+ ret = blk_status_to_errno(cb->bbio.bio.bi_status);
+ if (ret)
+ mapping_set_error(inode->i_mapping, ret);
folio_batch_init(&fbatch);
while (index <= end_index) {
@@ -499,9 +499,9 @@ static noinline int add_ra_bio_pages(struct inode *inode,
}
page_end = (pg_index << PAGE_SHIFT) + folio_size(folio) - 1;
- lock_extent(tree, cur, page_end, NULL);
+ btrfs_lock_extent(tree, cur, page_end, NULL);
read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur);
+ em = btrfs_lookup_extent_mapping(em_tree, cur, page_end + 1 - cur);
read_unlock(&em_tree->lock);
/*
@@ -510,20 +510,20 @@ static noinline int add_ra_bio_pages(struct inode *inode,
* to this compressed extent on disk.
*/
if (!em || cur < em->start ||
- (cur + fs_info->sectorsize > extent_map_end(em)) ||
- (extent_map_block_start(em) >> SECTOR_SHIFT) !=
+ (cur + fs_info->sectorsize > btrfs_extent_map_end(em)) ||
+ (btrfs_extent_map_block_start(em) >> SECTOR_SHIFT) !=
orig_bio->bi_iter.bi_sector) {
- free_extent_map(em);
- unlock_extent(tree, cur, page_end, NULL);
+ btrfs_free_extent_map(em);
+ btrfs_unlock_extent(tree, cur, page_end, NULL);
folio_unlock(folio);
folio_put(folio);
break;
}
add_size = min(em->start + em->len, page_end + 1) - cur;
- free_extent_map(em);
- unlock_extent(tree, cur, page_end, NULL);
+ btrfs_free_extent_map(em);
+ btrfs_unlock_extent(tree, cur, page_end, NULL);
- if (folio->index == end_index) {
+ if (folio_contains(folio, end_index)) {
size_t zero_offset = offset_in_folio(folio, isize);
if (zero_offset) {
@@ -576,19 +576,19 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
struct extent_map *em;
unsigned long pflags;
int memstall = 0;
- blk_status_t ret;
- int ret2;
+ blk_status_t status;
+ int ret;
/* we need the actual starting offset of this extent in the file */
read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
+ em = btrfs_lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
read_unlock(&em_tree->lock);
if (!em) {
- ret = BLK_STS_IOERR;
+ status = BLK_STS_IOERR;
goto out;
}
- ASSERT(extent_map_is_compressed(em));
+ ASSERT(btrfs_extent_map_is_compressed(em));
compressed_len = em->disk_num_bytes;
cb = alloc_compressed_bio(inode, file_offset, REQ_OP_READ,
@@ -600,21 +600,21 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
cb->len = bbio->bio.bi_iter.bi_size;
cb->compressed_len = compressed_len;
- cb->compress_type = extent_map_compression(em);
+ cb->compress_type = btrfs_extent_map_compression(em);
cb->orig_bbio = bbio;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct folio *), GFP_NOFS);
if (!cb->compressed_folios) {
- ret = BLK_STS_RESOURCE;
+ status = BLK_STS_RESOURCE;
goto out_free_bio;
}
- ret2 = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios);
- if (ret2) {
- ret = BLK_STS_RESOURCE;
+ ret = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios);
+ if (ret) {
+ status = BLK_STS_RESOURCE;
goto out_free_compressed_pages;
}
@@ -637,7 +637,7 @@ out_free_compressed_pages:
out_free_bio:
bio_put(&cb->bbio.bio);
out:
- btrfs_bio_end_io(bbio, ret);
+ btrfs_bio_end_io(bbio, status);
}
/*
@@ -1138,6 +1138,22 @@ void __cold btrfs_exit_compress(void)
}
/*
+ * The bvec is a single page bvec from a bio that contains folios from a filemap.
+ *
+ * Since the folio may be a large one, and if the bv_page is not a head page of
+ * a large folio, then page->index is unreliable.
+ *
+ * Thus we need this helper to grab the proper file offset.
+ */
+static u64 file_offset_from_bvec(const struct bio_vec *bvec)
+{
+ const struct page *page = bvec->bv_page;
+ const struct folio *folio = page_folio(page);
+
+ return (page_pgoff(folio, page) << PAGE_SHIFT) + bvec->bv_offset;
+}
+
+/*
* Copy decompressed data from working buffer to pages.
*
* @buf: The decompressed data buffer
@@ -1182,13 +1198,14 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
u32 copy_start;
/* Offset inside the full decompressed extent */
u32 bvec_offset;
+ void *kaddr;
bvec = bio_iter_iovec(orig_bio, orig_bio->bi_iter);
/*
* cb->start may underflow, but subtracting that value can still
* give us correct offset inside the full decompressed extent.
*/
- bvec_offset = page_offset(bvec.bv_page) + bvec.bv_offset - cb->start;
+ bvec_offset = file_offset_from_bvec(&bvec) - cb->start;
/* Haven't reached the bvec range, exit */
if (decompressed + buf_len <= bvec_offset)
@@ -1204,10 +1221,12 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
* @buf + @buf_len.
*/
ASSERT(copy_start - decompressed < buf_len);
- memcpy_to_page(bvec.bv_page, bvec.bv_offset,
- buf + copy_start - decompressed, copy_len);
- cur_offset += copy_len;
+ kaddr = bvec_kmap_local(&bvec);
+ memcpy(kaddr, buf + copy_start - decompressed, copy_len);
+ kunmap_local(kaddr);
+
+ cur_offset += copy_len;
bio_advance(orig_bio, copy_len);
/* Finished the bio */
if (!orig_bio->bi_iter.bi_size)
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index df198623cc08..d34c4341eaf4 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -11,7 +11,9 @@
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/wait.h>
+#include <linux/pagemap.h>
#include "bio.h"
+#include "messages.h"
struct address_space;
struct page;
@@ -73,11 +75,14 @@ struct compressed_bio {
};
/* @range_end must be exclusive. */
-static inline u32 btrfs_calc_input_length(u64 range_end, u64 cur)
+static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u64 cur)
{
- u64 page_end = round_down(cur, PAGE_SIZE) + PAGE_SIZE;
+ const u64 folio_end = folio_pos(folio) + folio_size(folio);
- return min(range_end, page_end) - cur;
+ /* @cur must be inside the folio. */
+ ASSERT(folio_pos(folio) <= cur);
+ ASSERT(cur < folio_end);
+ return min(range_end, folio_end) - cur;
}
int __init btrfs_init_compress(void);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 075a06db43a1..71fa42ca04fe 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -61,7 +61,6 @@ struct btrfs_path {
/* if there is real range locking, this locks field will change */
u8 locks[BTRFS_MAX_LEVEL];
u8 reada;
- /* keep some upper locks as we walk down */
u8 lowest_level;
/*
@@ -69,6 +68,7 @@ struct btrfs_path {
* and to force calls to keep space in the nodes
*/
unsigned int search_for_split:1;
+ /* Keep some upper locks as we walk down. */
unsigned int keep_locks:1;
unsigned int skip_locking:1;
unsigned int search_commit_root:1;
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index d4310d93f532..1831618579cb 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -105,15 +105,15 @@ static int btrfs_insert_inode_defrag(struct btrfs_inode *inode,
return 0;
}
-static inline int need_auto_defrag(struct btrfs_fs_info *fs_info)
+static inline bool need_auto_defrag(struct btrfs_fs_info *fs_info)
{
if (!btrfs_test_opt(fs_info, AUTO_DEFRAG))
- return 0;
+ return false;
if (btrfs_fs_closing(fs_info))
- return 0;
+ return false;
- return 1;
+ return true;
}
/*
@@ -191,10 +191,7 @@ static struct inode_defrag *btrfs_pick_defrag_inode(
if (parent && compare_inode_defrag(&tmp, entry) > 0) {
parent = rb_next(parent);
- if (parent)
- entry = rb_entry(parent, struct inode_defrag, rb_node);
- else
- entry = NULL;
+ entry = rb_entry_safe(parent, struct inode_defrag, rb_node);
}
out:
if (entry)
@@ -624,7 +621,7 @@ static struct extent_map *defrag_get_extent(struct btrfs_inode *inode,
u64 ino = btrfs_ino(inode);
int ret;
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
ret = -ENOMEM;
goto err;
@@ -734,12 +731,12 @@ next:
not_found:
btrfs_release_path(&path);
- free_extent_map(em);
+ btrfs_free_extent_map(em);
return NULL;
err:
btrfs_release_path(&path);
- free_extent_map(em);
+ btrfs_free_extent_map(em);
return ERR_PTR(ret);
}
@@ -756,7 +753,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
* full extent lock.
*/
read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, sectorsize);
+ em = btrfs_lookup_extent_mapping(em_tree, start, sectorsize);
read_unlock(&em_tree->lock);
/*
@@ -769,7 +766,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
* file extent items in the inode's subvolume tree).
*/
if (em && (em->flags & EXTENT_FLAG_MERGED)) {
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = NULL;
}
@@ -779,10 +776,10 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
/* Get the big lock and read metadata off disk. */
if (!locked)
- lock_extent(io_tree, start, end, &cached);
+ btrfs_lock_extent(io_tree, start, end, &cached);
em = defrag_get_extent(BTRFS_I(inode), start, newer_than);
if (!locked)
- unlock_extent(io_tree, start, end, &cached);
+ btrfs_unlock_extent(io_tree, start, end, &cached);
if (IS_ERR(em))
return NULL;
@@ -794,7 +791,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
static u32 get_extent_max_capacity(const struct btrfs_fs_info *fs_info,
const struct extent_map *em)
{
- if (extent_map_is_compressed(em))
+ if (btrfs_extent_map_is_compressed(em))
return BTRFS_MAX_COMPRESSED;
return fs_info->max_extent_size;
}
@@ -837,7 +834,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
ret = true;
out:
- free_extent_map(next);
+ btrfs_free_extent_map(next);
return ret;
}
@@ -857,13 +854,14 @@ static struct folio *defrag_prepare_one_folio(struct btrfs_inode *inode, pgoff_t
{
struct address_space *mapping = inode->vfs_inode.i_mapping;
gfp_t mask = btrfs_alloc_write_mask(mapping);
- u64 page_start = (u64)index << PAGE_SHIFT;
- u64 page_end = page_start + PAGE_SIZE - 1;
+ u64 folio_start;
+ u64 folio_end;
struct extent_state *cached_state = NULL;
struct folio *folio;
int ret;
again:
+ /* TODO: Add order fgp order flags when large folios are fully enabled. */
folio = __filemap_get_folio(mapping, index,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mask);
if (IS_ERR(folio))
@@ -871,13 +869,16 @@ again:
/*
* Since we can defragment files opened read-only, we can encounter
- * transparent huge pages here (see CONFIG_READ_ONLY_THP_FOR_FS). We
- * can't do I/O using huge pages yet, so return an error for now.
+ * transparent huge pages here (see CONFIG_READ_ONLY_THP_FOR_FS).
+ *
+ * The IO for such large folios is not fully tested, thus return
+ * an error to reject such folios unless it's an experimental build.
+ *
* Filesystem transparent huge pages are typically only used for
* executables that explicitly enable them, so this isn't very
* restrictive.
*/
- if (folio_test_large(folio)) {
+ if (!IS_ENABLED(CONFIG_BTRFS_EXPERIMENTAL) && folio_test_large(folio)) {
folio_unlock(folio);
folio_put(folio);
return ERR_PTR(-ETXTBSY);
@@ -890,14 +891,15 @@ again:
return ERR_PTR(ret);
}
+ folio_start = folio_pos(folio);
+ folio_end = folio_pos(folio) + folio_size(folio) - 1;
/* Wait for any existing ordered extent in the range */
while (1) {
struct btrfs_ordered_extent *ordered;
- lock_extent(&inode->io_tree, page_start, page_end, &cached_state);
- ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
- unlock_extent(&inode->io_tree, page_start, page_end,
- &cached_state);
+ btrfs_lock_extent(&inode->io_tree, folio_start, folio_end, &cached_state);
+ ordered = btrfs_lookup_ordered_range(inode, folio_start, folio_size(folio));
+ btrfs_unlock_extent(&inode->io_tree, folio_start, folio_end, &cached_state);
if (!ordered)
break;
@@ -1027,8 +1029,8 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
* very likely resulting in a larger extent after writeback is
* triggered (except in a case of free space fragmentation).
*/
- if (test_range_bit_exists(&inode->io_tree, cur, cur + range_len - 1,
- EXTENT_DELALLOC))
+ if (btrfs_test_range_bit_exists(&inode->io_tree, cur, cur + range_len - 1,
+ EXTENT_DELALLOC))
goto next;
/*
@@ -1066,8 +1068,8 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
/* Empty target list, no way to merge with last entry */
if (list_empty(target_list))
goto next;
- last = list_entry(target_list->prev,
- struct defrag_target_range, list);
+ last = list_last_entry(target_list,
+ struct defrag_target_range, list);
/* Not mergeable with last entry */
if (last->start + last->len != cur)
goto next;
@@ -1077,7 +1079,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
add:
last_is_target = true;
- range_len = min(extent_map_end(em), start + len) - cur;
+ range_len = min(btrfs_extent_map_end(em), start + len) - cur;
/*
* This one is a good target, check if it can be merged into
* last range of the target list.
@@ -1085,8 +1087,8 @@ add:
if (!list_empty(target_list)) {
struct defrag_target_range *last;
- last = list_entry(target_list->prev,
- struct defrag_target_range, list);
+ last = list_last_entry(target_list,
+ struct defrag_target_range, list);
ASSERT(last->start + last->len <= cur);
if (last->start + last->len == cur) {
/* Mergeable, enlarge the last entry */
@@ -1099,7 +1101,7 @@ add:
/* Allocate new defrag_target_range */
new = kmalloc(sizeof(*new), GFP_NOFS);
if (!new) {
- free_extent_map(em);
+ btrfs_free_extent_map(em);
ret = -ENOMEM;
break;
}
@@ -1108,8 +1110,8 @@ add:
list_add_tail(&new->list, target_list);
next:
- cur = extent_map_end(em);
- free_extent_map(em);
+ cur = btrfs_extent_map_end(em);
+ btrfs_free_extent_map(em);
}
if (ret < 0) {
struct defrag_target_range *entry;
@@ -1162,27 +1164,31 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
struct extent_changeset *data_reserved = NULL;
const u64 start = target->start;
const u64 len = target->len;
- unsigned long last_index = (start + len - 1) >> PAGE_SHIFT;
- unsigned long start_index = start >> PAGE_SHIFT;
- unsigned long first_index = folios[0]->index;
int ret = 0;
- int i;
-
- ASSERT(last_index - first_index + 1 <= nr_pages);
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, start, len);
if (ret < 0)
return ret;
- clear_extent_bit(&inode->io_tree, start, start + len - 1,
- EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, cached_state);
- set_extent_bit(&inode->io_tree, start, start + len - 1,
- EXTENT_DELALLOC | EXTENT_DEFRAG, cached_state);
-
- /* Update the page status */
- for (i = start_index - first_index; i <= last_index - first_index; i++) {
- folio_clear_checked(folios[i]);
- btrfs_folio_clamp_set_dirty(fs_info, folios[i], start, len);
+ btrfs_clear_extent_bit(&inode->io_tree, start, start + len - 1,
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, cached_state);
+ btrfs_set_extent_bit(&inode->io_tree, start, start + len - 1,
+ EXTENT_DELALLOC | EXTENT_DEFRAG, cached_state);
+
+ /*
+ * Update the page status.
+ * Due to possible large folios, we have to check all folios one by one.
+ */
+ for (int i = 0; i < nr_pages && folios[i]; i++) {
+ struct folio *folio = folios[i];
+
+ if (!folio)
+ break;
+ if (start >= folio_pos(folio) + folio_size(folio) ||
+ start + len <= folio_pos(folio))
+ continue;
+ btrfs_folio_clamp_clear_checked(fs_info, folio, start, len);
+ btrfs_folio_clamp_set_dirty(fs_info, folio, start, len);
}
btrfs_delalloc_release_extents(inode, len);
extent_changeset_free(data_reserved);
@@ -1200,11 +1206,10 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
LIST_HEAD(target_list);
struct folio **folios;
const u32 sectorsize = inode->root->fs_info->sectorsize;
- u64 last_index = (start + len - 1) >> PAGE_SHIFT;
- u64 start_index = start >> PAGE_SHIFT;
- unsigned int nr_pages = last_index - start_index + 1;
+ u64 cur = start;
+ const unsigned int nr_pages = ((start + len - 1) >> PAGE_SHIFT) -
+ (start >> PAGE_SHIFT) + 1;
int ret = 0;
- int i;
ASSERT(nr_pages <= CLUSTER_SIZE / PAGE_SIZE);
ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(len, sectorsize));
@@ -1214,21 +1219,25 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
return -ENOMEM;
/* Prepare all pages */
- for (i = 0; i < nr_pages; i++) {
- folios[i] = defrag_prepare_one_folio(inode, start_index + i);
+ for (int i = 0; cur < start + len && i < nr_pages; i++) {
+ folios[i] = defrag_prepare_one_folio(inode, cur >> PAGE_SHIFT);
if (IS_ERR(folios[i])) {
ret = PTR_ERR(folios[i]);
- nr_pages = i;
+ folios[i] = NULL;
goto free_folios;
}
+ cur = folio_pos(folios[i]) + folio_size(folios[i]);
}
- for (i = 0; i < nr_pages; i++)
+ for (int i = 0; i < nr_pages; i++) {
+ if (!folios[i])
+ break;
folio_wait_writeback(folios[i]);
+ }
+ /* We should get at least one folio. */
+ ASSERT(folios[0]);
/* Lock the pages range */
- lock_extent(&inode->io_tree, start_index << PAGE_SHIFT,
- (last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
- &cached_state);
+ btrfs_lock_extent(&inode->io_tree, folio_pos(folios[0]), cur - 1, &cached_state);
/*
* Now we have a consistent view about the extent map, re-check
* which range really needs to be defragged.
@@ -1254,11 +1263,11 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
kfree(entry);
}
unlock_extent:
- unlock_extent(&inode->io_tree, start_index << PAGE_SHIFT,
- (last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
- &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, folio_pos(folios[0]), cur - 1, &cached_state);
free_folios:
- for (i = 0; i < nr_pages; i++) {
+ for (int i = 0; i < nr_pages; i++) {
+ if (!folios[i])
+ break;
folio_unlock(folios[i]);
folio_put(folios[i]);
}
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index 88e900e5a43d..288e1776c02d 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -111,6 +111,18 @@
* making error handling and cleanup easier.
*/
+static inline struct btrfs_space_info *data_sinfo_for_inode(const struct btrfs_inode *inode)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+ if (btrfs_is_zoned(fs_info) && btrfs_is_data_reloc_root(inode->root)) {
+ ASSERT(fs_info->data_sinfo->sub_group[0]->subgroup_id ==
+ BTRFS_SUB_GROUP_DATA_RELOC);
+ return fs_info->data_sinfo->sub_group[0];
+ }
+ return fs_info->data_sinfo;
+}
+
int btrfs_alloc_data_chunk_ondemand(const struct btrfs_inode *inode, u64 bytes)
{
struct btrfs_root *root = inode->root;
@@ -123,7 +135,7 @@ int btrfs_alloc_data_chunk_ondemand(const struct btrfs_inode *inode, u64 bytes)
if (btrfs_is_free_space_inode(inode))
flush = BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE;
- return btrfs_reserve_data_bytes(fs_info, bytes, flush);
+ return btrfs_reserve_data_bytes(data_sinfo_for_inode(inode), bytes, flush);
}
int btrfs_check_data_free_space(struct btrfs_inode *inode,
@@ -144,14 +156,14 @@ int btrfs_check_data_free_space(struct btrfs_inode *inode,
else if (btrfs_is_free_space_inode(inode))
flush = BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE;
- ret = btrfs_reserve_data_bytes(fs_info, len, flush);
+ ret = btrfs_reserve_data_bytes(data_sinfo_for_inode(inode), len, flush);
if (ret < 0)
return ret;
/* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
if (ret < 0) {
- btrfs_free_reserved_data_space_noquota(fs_info, len);
+ btrfs_free_reserved_data_space_noquota(inode, len);
extent_changeset_free(*reserved);
*reserved = NULL;
} else {
@@ -168,15 +180,13 @@ int btrfs_check_data_free_space(struct btrfs_inode *inode,
* which we can't sleep and is sure it won't affect qgroup reserved space.
* Like clear_bit_hook().
*/
-void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info,
- u64 len)
+void btrfs_free_reserved_data_space_noquota(struct btrfs_inode *inode, u64 len)
{
- struct btrfs_space_info *data_sinfo;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
ASSERT(IS_ALIGNED(len, fs_info->sectorsize));
- data_sinfo = fs_info->data_sinfo;
- btrfs_space_info_free_bytes_may_use(data_sinfo, len);
+ btrfs_space_info_free_bytes_may_use(data_sinfo_for_inode(inode), len);
}
/*
@@ -196,7 +206,7 @@ void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
round_down(start, fs_info->sectorsize);
start = round_down(start, fs_info->sectorsize);
- btrfs_free_reserved_data_space_noquota(fs_info, len);
+ btrfs_free_reserved_data_space_noquota(inode, len);
btrfs_qgroup_free_data(inode, reserved, start, len, NULL);
}
@@ -439,6 +449,29 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
btrfs_inode_rsv_release(inode, true);
}
+/* Shrink a previously reserved extent to a new length. */
+void btrfs_delalloc_shrink_extents(struct btrfs_inode *inode, u64 reserved_len, u64 new_len)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ const u32 reserved_num_extents = count_max_extents(fs_info, reserved_len);
+ const u32 new_num_extents = count_max_extents(fs_info, new_len);
+ const int diff_num_extents = new_num_extents - reserved_num_extents;
+
+ ASSERT(new_len <= reserved_len);
+ if (new_num_extents == reserved_num_extents)
+ return;
+
+ spin_lock(&inode->lock);
+ btrfs_mod_outstanding_extents(inode, diff_num_extents);
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+ spin_unlock(&inode->lock);
+
+ if (btrfs_is_testing(fs_info))
+ return;
+
+ btrfs_inode_rsv_release(inode, true);
+}
+
/*
* Reserve data and metadata space for delalloc
*
diff --git a/fs/btrfs/delalloc-space.h b/fs/btrfs/delalloc-space.h
index 3f32953c0a80..6119c0d3f883 100644
--- a/fs/btrfs/delalloc-space.h
+++ b/fs/btrfs/delalloc-space.h
@@ -18,8 +18,7 @@ void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
void btrfs_delalloc_release_space(struct btrfs_inode *inode,
struct extent_changeset *reserved,
u64 start, u64 len, bool qgroup_free);
-void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info,
- u64 len);
+void btrfs_free_reserved_data_space_noquota(struct btrfs_inode *inode, u64 len);
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free);
int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
@@ -27,5 +26,6 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
u64 disk_num_bytes, bool noflush);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
+void btrfs_delalloc_shrink_extents(struct btrfs_inode *inode, u64 reserved_len, u64 new_len);
#endif /* BTRFS_DELALLOC_SPACE_H */
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 3f1551d8a5c6..c7cc24a5dd5e 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -119,7 +119,12 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
return NULL;
}
-/* Will return either the node or PTR_ERR(-ENOMEM) */
+/*
+ * Look up an existing delayed node associated with @btrfs_inode or create a new
+ * one and insert it to the delayed nodes of the root.
+ *
+ * Return the delayed node, or error pointer on failure.
+ */
static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
struct btrfs_inode *btrfs_inode)
{
@@ -211,17 +216,13 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
static struct btrfs_delayed_node *btrfs_first_delayed_node(
struct btrfs_delayed_root *delayed_root)
{
- struct list_head *p;
- struct btrfs_delayed_node *node = NULL;
+ struct btrfs_delayed_node *node;
spin_lock(&delayed_root->lock);
- if (list_empty(&delayed_root->node_list))
- goto out;
-
- p = delayed_root->node_list.next;
- node = list_entry(p, struct btrfs_delayed_node, n_list);
- refcount_inc(&node->refs);
-out:
+ node = list_first_entry_or_null(&delayed_root->node_list,
+ struct btrfs_delayed_node, n_list);
+ if (node)
+ refcount_inc(&node->refs);
spin_unlock(&delayed_root->lock);
return node;
@@ -293,18 +294,15 @@ static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
struct btrfs_delayed_root *delayed_root)
{
- struct list_head *p;
- struct btrfs_delayed_node *node = NULL;
+ struct btrfs_delayed_node *node;
spin_lock(&delayed_root->lock);
- if (list_empty(&delayed_root->prepare_list))
- goto out;
-
- p = delayed_root->prepare_list.next;
- list_del_init(p);
- node = list_entry(p, struct btrfs_delayed_node, p_list);
- refcount_inc(&node->refs);
-out:
+ node = list_first_entry_or_null(&delayed_root->prepare_list,
+ struct btrfs_delayed_node, p_list);
+ if (node) {
+ list_del_init(&node->p_list);
+ refcount_inc(&node->refs);
+ }
spin_unlock(&delayed_root->lock);
return node;
@@ -454,40 +452,25 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
struct btrfs_delayed_node *delayed_node)
{
- struct rb_node *p;
- struct btrfs_delayed_item *item = NULL;
+ struct rb_node *p = rb_first_cached(&delayed_node->ins_root);
- p = rb_first_cached(&delayed_node->ins_root);
- if (p)
- item = rb_entry(p, struct btrfs_delayed_item, rb_node);
-
- return item;
+ return rb_entry_safe(p, struct btrfs_delayed_item, rb_node);
}
static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
struct btrfs_delayed_node *delayed_node)
{
- struct rb_node *p;
- struct btrfs_delayed_item *item = NULL;
-
- p = rb_first_cached(&delayed_node->del_root);
- if (p)
- item = rb_entry(p, struct btrfs_delayed_item, rb_node);
+ struct rb_node *p = rb_first_cached(&delayed_node->del_root);
- return item;
+ return rb_entry_safe(p, struct btrfs_delayed_item, rb_node);
}
static struct btrfs_delayed_item *__btrfs_next_delayed_item(
struct btrfs_delayed_item *item)
{
- struct rb_node *p;
- struct btrfs_delayed_item *next = NULL;
-
- p = rb_next(&item->rb_node);
- if (p)
- next = rb_entry(p, struct btrfs_delayed_item, rb_node);
+ struct rb_node *p = rb_next(&item->rb_node);
- return next;
+ return rb_entry_safe(p, struct btrfs_delayed_item, rb_node);
}
static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -1397,17 +1380,17 @@ void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
}
-static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
+static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
{
int val = atomic_read(&delayed_root->items_seq);
if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
- return 1;
+ return true;
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
- return 1;
+ return true;
- return 0;
+ return false;
}
void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 98c5b61dabe8..739c9e29aaa3 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -331,12 +331,9 @@ static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
struct btrfs_delayed_ref_node *ins)
{
struct rb_node *node = &ins->ref_node;
- struct rb_node *exist;
+ struct rb_node *exist = rb_find_add_cached(node, root, cmp_refs_node);
- exist = rb_find_add_cached(node, root, cmp_refs_node);
- if (exist)
- return rb_entry(exist, struct btrfs_delayed_ref_node, ref_node);
- return NULL;
+ return rb_entry_safe(exist, struct btrfs_delayed_ref_node, ref_node);
}
static struct btrfs_delayed_ref_head *find_first_ref_head(
@@ -1339,7 +1336,7 @@ int __init btrfs_delayed_ref_init(void)
{
btrfs_delayed_ref_head_cachep = KMEM_CACHE(btrfs_delayed_ref_head, 0);
if (!btrfs_delayed_ref_head_cachep)
- goto fail;
+ return -ENOMEM;
btrfs_delayed_ref_node_cachep = KMEM_CACHE(btrfs_delayed_ref_node, 0);
if (!btrfs_delayed_ref_node_cachep)
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index f5ae880308d3..78cc23837610 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -262,7 +262,6 @@ enum btrfs_ref_type {
BTRFS_REF_NOT_SET,
BTRFS_REF_DATA,
BTRFS_REF_METADATA,
- BTRFS_REF_LAST,
} __packed;
struct btrfs_ref {
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 53d7d85cb4be..2decb9fff445 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -637,7 +637,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
- ASSERT(0);
+ DEBUG_WARN("unexpected STARTED ot SUSPENDED dev-replace state");
ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED;
up_write(&dev_replace->rwsem);
goto leave;
@@ -794,17 +794,17 @@ static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev,
lockdep_assert_held(&srcdev->fs_info->chunk_mutex);
- while (find_first_extent_bit(&srcdev->alloc_state, start,
- &found_start, &found_end,
- CHUNK_ALLOCATED, &cached_state)) {
- ret = set_extent_bit(&tgtdev->alloc_state, found_start,
- found_end, CHUNK_ALLOCATED, NULL);
+ while (btrfs_find_first_extent_bit(&srcdev->alloc_state, start,
+ &found_start, &found_end,
+ CHUNK_ALLOCATED, &cached_state)) {
+ ret = btrfs_set_extent_bit(&tgtdev->alloc_state, found_start,
+ found_end, CHUNK_ALLOCATED, NULL);
if (ret)
break;
start = found_end + 1;
}
- free_extent_state(cached_state);
+ btrfs_free_extent_state(cached_state);
return ret;
}
@@ -1265,16 +1265,16 @@ static int btrfs_dev_replace_kthread(void *data)
return 0;
}
-int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
+bool __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
{
if (!dev_replace->is_valid)
- return 0;
+ return false;
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
- return 0;
+ return false;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
/*
@@ -1289,7 +1289,7 @@ int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
*/
break;
}
- return 1;
+ return true;
}
void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 23e480efe5e6..b35cecf388f2 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -25,7 +25,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
-int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
+bool __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
struct btrfs_block_group *cache,
u64 physical);
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
index a374ce7a1813..fe9a4bd7e6e6 100644
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -42,21 +42,21 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
/* Direct lock must be taken before the extent lock. */
if (nowait) {
- if (!try_lock_dio_extent(io_tree, lockstart, lockend, cached_state))
+ if (!btrfs_try_lock_dio_extent(io_tree, lockstart, lockend, cached_state))
return -EAGAIN;
} else {
- lock_dio_extent(io_tree, lockstart, lockend, cached_state);
+ btrfs_lock_dio_extent(io_tree, lockstart, lockend, cached_state);
}
while (1) {
if (nowait) {
- if (!try_lock_extent(io_tree, lockstart, lockend,
- cached_state)) {
+ if (!btrfs_try_lock_extent(io_tree, lockstart, lockend,
+ cached_state)) {
ret = -EAGAIN;
break;
}
} else {
- lock_extent(io_tree, lockstart, lockend, cached_state);
+ btrfs_lock_extent(io_tree, lockstart, lockend, cached_state);
}
/*
* We're concerned with the entire range that we're going to be
@@ -78,7 +78,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
lockstart, lockend)))
break;
- unlock_extent(io_tree, lockstart, lockend, cached_state);
+ btrfs_unlock_extent(io_tree, lockstart, lockend, cached_state);
if (ordered) {
if (nowait) {
@@ -131,7 +131,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
}
if (ret)
- unlock_dio_extent(io_tree, lockstart, lockend, cached_state);
+ btrfs_unlock_dio_extent(io_tree, lockstart, lockend, cached_state);
return ret;
}
@@ -151,11 +151,11 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
}
ordered = btrfs_alloc_ordered_extent(inode, start, file_extent,
- (1 << type) |
- (1 << BTRFS_ORDERED_DIRECT));
+ (1U << type) |
+ (1U << BTRFS_ORDERED_DIRECT));
if (IS_ERR(ordered)) {
if (em) {
- free_extent_map(em);
+ btrfs_free_extent_map(em);
btrfs_drop_extent_map_range(inode, start,
start + file_extent->num_bytes - 1, false);
}
@@ -204,8 +204,7 @@ again:
BTRFS_ORDERED_REGULAR);
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
if (IS_ERR(em))
- btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
- 1);
+ btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, true);
return em;
}
@@ -246,7 +245,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
else
type = BTRFS_ORDERED_NOCOW;
len = min(len, em->len - (start - em->start));
- block_start = extent_map_block_start(em) + (start - em->start);
+ block_start = btrfs_extent_map_block_start(em) + (start - em->start);
if (can_nocow_extent(BTRFS_I(inode), start, &len, &file_extent,
false) == 1) {
@@ -265,7 +264,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
nowait);
if (ret < 0) {
/* Our caller expects us to free the input extent map. */
- free_extent_map(em);
+ btrfs_free_extent_map(em);
*map = NULL;
btrfs_dec_nocow_writers(bg);
if (nowait && (ret == -ENOSPC || ret == -EDQUOT))
@@ -278,7 +277,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
&file_extent, type);
btrfs_dec_nocow_writers(bg);
if (type == BTRFS_ORDERED_PREALLOC) {
- free_extent_map(em);
+ btrfs_free_extent_map(em);
*map = em2;
em = em2;
}
@@ -291,7 +290,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
dio_data->nocow_done = true;
} else {
/* Our caller expects us to free the input extent map. */
- free_extent_map(em);
+ btrfs_free_extent_map(em);
*map = NULL;
if (nowait) {
@@ -440,8 +439,8 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
start, data_alloc_len, false);
if (!ret)
dio_data->data_space_reserved = true;
- else if (ret && !(BTRFS_I(inode)->flags &
- (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
+ else if (!(BTRFS_I(inode)->flags &
+ (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
goto err;
}
@@ -474,8 +473,8 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
* to buffered IO. Don't blame me, this is the price we pay for using
* the generic code.
*/
- if (extent_map_is_compressed(em) || em->disk_bytenr == EXTENT_MAP_INLINE) {
- free_extent_map(em);
+ if (btrfs_extent_map_is_compressed(em) || em->disk_bytenr == EXTENT_MAP_INLINE) {
+ btrfs_free_extent_map(em);
/*
* If we are in a NOWAIT context, return -EAGAIN in order to
* fallback to buffered IO. This is not only because we can
@@ -516,7 +515,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
* after we have submitted bios for all the extents in the range.
*/
if ((flags & IOMAP_NOWAIT) && len < length) {
- free_extent_map(em);
+ btrfs_free_extent_map(em);
ret = -EAGAIN;
goto unlock_err;
}
@@ -558,13 +557,13 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
iomap->addr = IOMAP_NULL_ADDR;
iomap->type = IOMAP_HOLE;
} else {
- iomap->addr = extent_map_block_start(em) + (start - em->start);
+ iomap->addr = btrfs_extent_map_block_start(em) + (start - em->start);
iomap->type = IOMAP_MAPPED;
}
iomap->offset = start;
iomap->bdev = fs_info->fs_devices->latest_dev->bdev;
iomap->length = len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/*
* Reads will hold the EXTENT_DIO_LOCKED bit until the io is completed,
@@ -575,13 +574,13 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
if (write)
unlock_bits |= EXTENT_DIO_LOCKED;
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- unlock_bits, &cached_state);
+ btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ unlock_bits, &cached_state);
/* We didn't use everything, unlock the dio extent for the remainder. */
if (!write && (start + len) < lockend)
- unlock_dio_extent(&BTRFS_I(inode)->io_tree, start + len,
- lockend, NULL);
+ btrfs_unlock_dio_extent(&BTRFS_I(inode)->io_tree, start + len,
+ lockend, NULL);
return 0;
@@ -591,8 +590,8 @@ unlock_err:
* to update this, be explicit that we expect EXTENT_LOCKED and
* EXTENT_DIO_LOCKED to be set here, and so that's what we're clearing.
*/
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- EXTENT_LOCKED | EXTENT_DIO_LOCKED, &cached_state);
+ btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ EXTENT_LOCKED | EXTENT_DIO_LOCKED, &cached_state);
err:
if (dio_data->data_space_reserved) {
btrfs_free_reserved_data_space(BTRFS_I(inode),
@@ -615,8 +614,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
if (!write && (iomap->type == IOMAP_HOLE)) {
/* If reading from a hole, unlock and return */
- unlock_dio_extent(&BTRFS_I(inode)->io_tree, pos,
- pos + length - 1, NULL);
+ btrfs_unlock_dio_extent(&BTRFS_I(inode)->io_tree, pos,
+ pos + length - 1, NULL);
return 0;
}
@@ -627,8 +626,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
btrfs_finish_ordered_extent(dio_data->ordered, NULL,
pos, length, false);
else
- unlock_dio_extent(&BTRFS_I(inode)->io_tree, pos,
- pos + length - 1, NULL);
+ btrfs_unlock_dio_extent(&BTRFS_I(inode)->io_tree, pos,
+ pos + length - 1, NULL);
ret = -ENOTBLK;
}
if (write) {
@@ -660,8 +659,8 @@ static void btrfs_dio_end_io(struct btrfs_bio *bbio)
dip->file_offset, dip->bytes,
!bio->bi_status);
} else {
- unlock_dio_extent(&inode->io_tree, dip->file_offset,
- dip->file_offset + dip->bytes - 1, NULL);
+ btrfs_unlock_dio_extent(&inode->io_tree, dip->file_offset,
+ dip->file_offset + dip->bytes - 1, NULL);
}
bbio->bio.bi_private = bbio->private;
@@ -692,9 +691,9 @@ static int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
* a pre-existing one.
*/
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
- ret = split_extent_map(bbio->inode, bbio->file_offset,
- ordered->num_bytes, len,
- ordered->disk_bytenr);
+ ret = btrfs_split_extent_map(bbio->inode, bbio->file_offset,
+ ordered->num_bytes, len,
+ ordered->disk_bytenr);
if (ret)
return ret;
}
diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
index d6eef4bd9e9d..89fe85778115 100644
--- a/fs/btrfs/discard.c
+++ b/fs/btrfs/discard.c
@@ -94,8 +94,6 @@ static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group)
{
lockdep_assert_held(&discard_ctl->lock);
- if (!btrfs_run_discard_work(discard_ctl))
- return;
if (list_empty(&block_group->discard_list) ||
block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
@@ -118,6 +116,9 @@ static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
if (!btrfs_is_block_group_data_only(block_group))
return;
+ if (!btrfs_run_discard_work(discard_ctl))
+ return;
+
spin_lock(&discard_ctl->lock);
__add_to_discard_list(discard_ctl, block_group);
spin_unlock(&discard_ctl->lock);
@@ -244,6 +245,20 @@ again:
block_group->used != 0) {
if (btrfs_is_block_group_data_only(block_group)) {
__add_to_discard_list(discard_ctl, block_group);
+ /*
+ * The block group must have been moved to other
+ * discard list even if discard was disabled in
+ * the meantime or a transaction abort happened,
+ * otherwise we can end up in an infinite loop,
+ * always jumping into the 'again' label and
+ * keep getting this block group over and over
+ * in case there are no other block groups in
+ * the discard lists.
+ */
+ ASSERT(block_group->discard_index !=
+ BTRFS_DISCARD_INDEX_UNUSED,
+ "discard_index=%d",
+ block_group->discard_index);
} else {
list_del_init(&block_group->discard_list);
btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index aa58e0663a5d..1beb9458f622 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -193,10 +193,11 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
u64 end = min_t(u64, eb->start + eb->len,
folio_pos(folio) + eb->folio_size);
u32 len = end - start;
+ phys_addr_t paddr = PFN_PHYS(folio_pfn(folio)) +
+ offset_in_folio(folio, start);
- ret = btrfs_repair_io_failure(fs_info, 0, start, len,
- start, folio, offset_in_folio(folio, start),
- mirror_num);
+ ret = btrfs_repair_io_failure(fs_info, 0, start, len, start,
+ paddr, mirror_num);
if (ret)
break;
}
@@ -224,7 +225,6 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
ASSERT(check);
while (1) {
- clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = read_extent_buffer_pages(eb, mirror_num, check);
if (!ret)
break;
@@ -256,7 +256,7 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
/*
* Checksum a dirty tree block before IO.
*/
-blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
+int btree_csum_one_bio(struct btrfs_bio *bbio)
{
struct extent_buffer *eb = bbio->private;
struct btrfs_fs_info *fs_info = eb->fs_info;
@@ -267,9 +267,9 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
/* Btree blocks are always contiguous on disk. */
if (WARN_ON_ONCE(bbio->file_offset != eb->start))
- return BLK_STS_IOERR;
+ return -EIO;
if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len))
- return BLK_STS_IOERR;
+ return -EIO;
/*
* If an extent_buffer is marked as EXTENT_BUFFER_ZONED_ZEROOUT, don't
@@ -278,13 +278,13 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
*/
if (test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)) {
memzero_extent_buffer(eb, 0, eb->len);
- return BLK_STS_OK;
+ return 0;
}
if (WARN_ON_ONCE(found_start != eb->start))
- return BLK_STS_IOERR;
+ return -EIO;
if (WARN_ON(!btrfs_meta_folio_test_uptodate(eb->folios[0], eb)))
- return BLK_STS_IOERR;
+ return -EIO;
ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
offsetof(struct btrfs_header, fsid),
@@ -312,7 +312,7 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
goto error;
}
write_extent_buffer(eb, result, 0, fs_info->csum_size);
- return BLK_STS_OK;
+ return 0;
error:
btrfs_print_tree(eb, 0);
@@ -326,7 +326,7 @@ error:
*/
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
- return errno_to_blk_status(ret);
+ return ret;
}
static bool check_tree_block_fsid(struct extent_buffer *eb)
@@ -452,15 +452,9 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
goto out;
}
- /*
- * If this is a leaf block and it is corrupt, set the corrupt bit so
- * that we don't try and read the other copies of this block, just
- * return -EIO.
- */
- if (found_level == 0 && btrfs_check_leaf(eb)) {
- set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
+ /* If this is a leaf block and it is corrupt, just return -EIO. */
+ if (found_level == 0 && btrfs_check_leaf(eb))
ret = -EIO;
- }
if (found_level > 0 && btrfs_check_node(eb))
ret = -EIO;
@@ -641,11 +635,16 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
}
-static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
- u64 objectid)
+static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
+ u64 objectid, gfp_t flags)
{
+ struct btrfs_root *root;
bool dummy = btrfs_is_testing(fs_info);
+ root = kzalloc(sizeof(*root), flags);
+ if (!root)
+ return NULL;
+
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -698,10 +697,10 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
btrfs_set_root_last_log_commit(root, 0);
root->anon_dev = 0;
if (!dummy) {
- extent_io_tree_init(fs_info, &root->dirty_log_pages,
- IO_TREE_ROOT_DIRTY_LOG_PAGES);
- extent_io_tree_init(fs_info, &root->log_csum_range,
- IO_TREE_LOG_CSUM_RANGE);
+ btrfs_extent_io_tree_init(fs_info, &root->dirty_log_pages,
+ IO_TREE_ROOT_DIRTY_LOG_PAGES);
+ btrfs_extent_io_tree_init(fs_info, &root->log_csum_range,
+ IO_TREE_LOG_CSUM_RANGE);
}
spin_lock_init(&root->root_item_lock);
@@ -712,14 +711,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
list_add_tail(&root->leak_list, &fs_info->allocated_roots);
spin_unlock(&fs_info->fs_roots_radix_lock);
#endif
-}
-static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
- u64 objectid, gfp_t flags)
-{
- struct btrfs_root *root = kzalloc(sizeof(*root), flags);
- if (root)
- __setup_root(root, fs_info, objectid);
return root;
}
@@ -1863,8 +1855,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
int i;
while (!list_empty(&fs_info->dead_roots)) {
- gang[0] = list_entry(fs_info->dead_roots.next,
- struct btrfs_root, root_list);
+ gang[0] = list_first_entry(&fs_info->dead_roots,
+ struct btrfs_root, root_list);
list_del(&gang[0]->root_list);
if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
@@ -1927,9 +1919,9 @@ static int btrfs_init_btree_inode(struct super_block *sb)
inode->i_mapping->a_ops = &btree_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
- extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
- IO_TREE_BTREE_INODE_IO);
- extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
+ btrfs_extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
+ IO_TREE_BTREE_INODE_IO);
+ btrfs_extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
@@ -2002,7 +1994,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
btrfs_alloc_ordered_workqueue(fs_info, "qgroup-rescan",
ordered_flags);
fs_info->discard_ctl.discard_workers =
- alloc_ordered_workqueue("btrfs_discard", WQ_FREEZABLE);
+ alloc_ordered_workqueue("btrfs-discard", WQ_FREEZABLE);
if (!(fs_info->workers &&
fs_info->delalloc_workers && fs_info->flush_workers &&
@@ -2769,10 +2761,21 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
return ret;
}
+/*
+ * Lockdep gets confused between our buffer_tree which requires IRQ locking because
+ * we modify marks in the IRQ context, and our delayed inode xarray which doesn't
+ * have these requirements. Use a class key so lockdep doesn't get them mixed up.
+ */
+static struct lock_class_key buffer_xa_class;
+
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
{
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
- INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
+
+ /* Use the same flags as mapping->i_pages. */
+ xa_init_flags(&fs_info->buffer_tree, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
+ lockdep_set_class(&fs_info->buffer_tree.xa_lock, &buffer_xa_class);
+
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@ -2784,7 +2787,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->super_lock);
- spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
spin_lock_init(&fs_info->treelog_bg_lock);
spin_lock_init(&fs_info->zone_active_bgs_lock);
@@ -2829,6 +2831,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
BTRFS_BLOCK_RSV_GLOBAL);
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
+ btrfs_init_block_rsv(&fs_info->treelog_rsv, BTRFS_BLOCK_RSV_TREELOG);
btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
BTRFS_BLOCK_RSV_DELOPS);
@@ -2862,8 +2865,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
rwlock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT_CACHED;
- extent_io_tree_init(fs_info, &fs_info->excluded_extents,
- IO_TREE_FS_EXCLUDED_EXTENTS);
+ btrfs_extent_io_tree_init(fs_info, &fs_info->excluded_extents,
+ IO_TREE_FS_EXCLUDED_EXTENTS);
mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->tree_log_mutex);
@@ -3315,7 +3318,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
/*
* Read super block and check the signature bytes only
*/
- disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev);
+ disk_super = btrfs_read_disk_super(fs_devices->latest_dev->bdev, 0, false);
if (IS_ERR(disk_super)) {
ret = PTR_ERR(disk_super);
goto fail_alloc;
@@ -3710,85 +3713,6 @@ static void btrfs_end_super_write(struct bio *bio)
bio_put(bio);
}
-struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
- int copy_num, bool drop_cache)
-{
- struct btrfs_super_block *super;
- struct page *page;
- u64 bytenr, bytenr_orig;
- struct address_space *mapping = bdev->bd_mapping;
- int ret;
-
- bytenr_orig = btrfs_sb_offset(copy_num);
- ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr);
- if (ret == -ENOENT)
- return ERR_PTR(-EINVAL);
- else if (ret)
- return ERR_PTR(ret);
-
- if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
- return ERR_PTR(-EINVAL);
-
- if (drop_cache) {
- /* This should only be called with the primary sb. */
- ASSERT(copy_num == 0);
-
- /*
- * Drop the page of the primary superblock, so later read will
- * always read from the device.
- */
- invalidate_inode_pages2_range(mapping,
- bytenr >> PAGE_SHIFT,
- (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
- }
-
- page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
- if (IS_ERR(page))
- return ERR_CAST(page);
-
- super = page_address(page);
- if (btrfs_super_magic(super) != BTRFS_MAGIC) {
- btrfs_release_disk_super(super);
- return ERR_PTR(-ENODATA);
- }
-
- if (btrfs_super_bytenr(super) != bytenr_orig) {
- btrfs_release_disk_super(super);
- return ERR_PTR(-EINVAL);
- }
-
- return super;
-}
-
-
-struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
-{
- struct btrfs_super_block *super, *latest = NULL;
- int i;
- u64 transid = 0;
-
- /* we would like to check all the supers, but that would make
- * a btrfs mount succeed after a mkfs from a different FS.
- * So, we need to add a special mount option to scan for
- * later supers, using BTRFS_SUPER_MIRROR_MAX instead
- */
- for (i = 0; i < 1; i++) {
- super = btrfs_read_dev_one_super(bdev, i, false);
- if (IS_ERR(super))
- continue;
-
- if (!latest || btrfs_super_generation(super) > transid) {
- if (latest)
- btrfs_release_disk_super(super);
-
- latest = super;
- transid = btrfs_super_generation(super);
- }
- }
-
- return super;
-}
-
/*
* Write superblock @sb to the @device. Do not wait for completion, all the
* folios we use for writing are locked.
@@ -3828,8 +3752,8 @@ static int write_dev_supers(struct btrfs_device *device,
continue;
} else if (ret < 0) {
btrfs_err(device->fs_info,
- "couldn't get super block location for mirror %d",
- i);
+ "couldn't get super block location for mirror %d error %d",
+ i, ret);
atomic_inc(&device->sb_write_errors);
continue;
}
@@ -3848,8 +3772,8 @@ static int write_dev_supers(struct btrfs_device *device,
GFP_NOFS);
if (IS_ERR(folio)) {
btrfs_err(device->fs_info,
- "couldn't get super block page for bytenr %llu",
- bytenr);
+ "couldn't get super block page for bytenr %llu error %ld",
+ bytenr, PTR_ERR(folio));
atomic_inc(&device->sb_write_errors);
continue;
}
@@ -4244,8 +4168,9 @@ static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
u64 found_end;
found = true;
- while (find_first_extent_bit(&trans->dirty_pages, cur,
- &found_start, &found_end, EXTENT_DIRTY, &cached)) {
+ while (btrfs_find_first_extent_bit(&trans->dirty_pages, cur,
+ &found_start, &found_end,
+ EXTENT_DIRTY, &cached)) {
dirty_bytes += found_end + 1 - found_start;
cur = found_end + 1;
}
@@ -4441,7 +4366,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
if (btrfs_check_quota_leak(fs_info)) {
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ DEBUG_WARN("qgroup reserved space leaked");
btrfs_err(fs_info, "qgroup reserved space leaked");
}
@@ -4698,9 +4623,9 @@ static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
u64 start = 0;
u64 end;
- while (find_first_extent_bit(dirty_pages, start, &start, &end,
- mark, NULL)) {
- clear_extent_bits(dirty_pages, start, end, mark);
+ while (btrfs_find_first_extent_bit(dirty_pages, start, &start, &end,
+ mark, NULL)) {
+ btrfs_clear_extent_bits(dirty_pages, start, end, mark);
while (start <= end) {
eb = find_extent_buffer(fs_info, start);
start += fs_info->nodesize;
@@ -4733,14 +4658,14 @@ static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
* the same extent range.
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
- if (!find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY, &cached_state)) {
+ if (!btrfs_find_first_extent_bit(unpin, 0, &start, &end,
+ EXTENT_DIRTY, &cached_state)) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
}
- clear_extent_dirty(unpin, start, end, &cached_state);
- free_extent_state(cached_state);
+ btrfs_clear_extent_dirty(unpin, start, end, &cached_state);
+ btrfs_free_extent_state(cached_state);
btrfs_error_unpin_extent_range(fs_info, start, end);
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
cond_resched();
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 587842991b24..864a55a96226 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -58,9 +58,6 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
const struct btrfs_super_block *sb, int mirror_num);
int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount);
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
-struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
-struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
- int copy_num, bool drop_cache);
int btrfs_commit_super(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
const struct btrfs_key *key);
@@ -114,7 +111,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int btrfs_read_extent_buffer(struct extent_buffer *buf,
const struct btrfs_tree_parent_check *check);
-blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio);
+int btree_csum_one_bio(struct btrfs_bio *bbio);
int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index 13de6af279e5..b1b96eb5f64e 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -42,7 +42,7 @@ static inline void btrfs_extent_state_leak_debug_check(void)
struct extent_state *state;
while (!list_empty(&states)) {
- state = list_entry(states.next, struct extent_state, leak_list);
+ state = list_first_entry(&states, struct extent_state, leak_list);
pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
state->start, state->end, state->state,
extent_state_in_tree(state),
@@ -59,13 +59,12 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
struct extent_io_tree *tree,
u64 start, u64 end)
{
- const struct btrfs_inode *inode;
+ const struct btrfs_inode *inode = tree->inode;
u64 isize;
if (tree->owner != IO_TREE_INODE_IO)
return;
- inode = extent_io_tree_to_inode_const(tree);
isize = i_size_read(&inode->vfs_inode);
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
btrfs_debug_rl(inode->root->fs_info,
@@ -80,25 +79,8 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
#endif
-
-/*
- * The only tree allowed to set the inode is IO_TREE_INODE_IO.
- */
-static bool is_inode_io_tree(const struct extent_io_tree *tree)
-{
- return tree->owner == IO_TREE_INODE_IO;
-}
-
-/* Return the inode if it's valid for the given tree, otherwise NULL. */
-struct btrfs_inode *extent_io_tree_to_inode(struct extent_io_tree *tree)
-{
- if (tree->owner == IO_TREE_INODE_IO)
- return tree->inode;
- return NULL;
-}
-
/* Read-only access to the inode. */
-const struct btrfs_inode *extent_io_tree_to_inode_const(const struct extent_io_tree *tree)
+const struct btrfs_inode *btrfs_extent_io_tree_to_inode(const struct extent_io_tree *tree)
{
if (tree->owner == IO_TREE_INODE_IO)
return tree->inode;
@@ -106,15 +88,15 @@ const struct btrfs_inode *extent_io_tree_to_inode_const(const struct extent_io_t
}
/* For read-only access to fs_info. */
-const struct btrfs_fs_info *extent_io_tree_to_fs_info(const struct extent_io_tree *tree)
+const struct btrfs_fs_info *btrfs_extent_io_tree_to_fs_info(const struct extent_io_tree *tree)
{
if (tree->owner == IO_TREE_INODE_IO)
return tree->inode->root->fs_info;
return tree->fs_info;
}
-void extent_io_tree_init(struct btrfs_fs_info *fs_info,
- struct extent_io_tree *tree, unsigned int owner)
+void btrfs_extent_io_tree_init(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *tree, unsigned int owner)
{
tree->state = RB_ROOT;
spin_lock_init(&tree->lock);
@@ -129,7 +111,7 @@ void extent_io_tree_init(struct btrfs_fs_info *fs_info,
* aren't any waiters on any extent state record (EXTENT_LOCK_BITS are never
* set on any extent state when calling this function).
*/
-void extent_io_tree_release(struct extent_io_tree *tree)
+void btrfs_extent_io_tree_release(struct extent_io_tree *tree)
{
struct rb_root root;
struct extent_state *state;
@@ -148,7 +130,7 @@ void extent_io_tree_release(struct extent_io_tree *tree)
* (see wait_extent_bit()).
*/
ASSERT(!waitqueue_active(&state->wq));
- free_extent_state(state);
+ btrfs_free_extent_state(state);
cond_resched_lock(&tree->lock);
}
/*
@@ -176,7 +158,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
btrfs_leak_debug_add_state(state);
refcount_set(&state->refs, 1);
init_waitqueue_head(&state->wq);
- trace_alloc_extent_state(state, mask, _RET_IP_);
+ trace_btrfs_alloc_extent_state(state, mask, _RET_IP_);
return state;
}
@@ -188,14 +170,14 @@ static struct extent_state *alloc_extent_state_atomic(struct extent_state *preal
return prealloc;
}
-void free_extent_state(struct extent_state *state)
+void btrfs_free_extent_state(struct extent_state *state)
{
if (!state)
return;
if (refcount_dec_and_test(&state->refs)) {
WARN_ON(extent_state_in_tree(state));
btrfs_leak_debug_del_state(state);
- trace_free_extent_state(state, _RET_IP_);
+ trace_btrfs_free_extent_state(state, _RET_IP_);
kmem_cache_free(extent_state_cache, state);
}
}
@@ -222,38 +204,34 @@ static inline struct extent_state *next_state(struct extent_state *state)
{
struct rb_node *next = rb_next(&state->rb_node);
- if (next)
- return rb_entry(next, struct extent_state, rb_node);
- else
- return NULL;
+ return rb_entry_safe(next, struct extent_state, rb_node);
}
static inline struct extent_state *prev_state(struct extent_state *state)
{
struct rb_node *next = rb_prev(&state->rb_node);
- if (next)
- return rb_entry(next, struct extent_state, rb_node);
- else
- return NULL;
+ return rb_entry_safe(next, struct extent_state, rb_node);
}
/*
- * Search @tree for an entry that contains @offset. Such entry would have
- * entry->start <= offset && entry->end >= offset.
+ * Search @tree for an entry that contains @offset or if none exists for the
+ * first entry that starts and ends after that offset.
*
* @tree: the tree to search
- * @offset: offset that should fall within an entry in @tree
+ * @offset: search offset
* @node_ret: pointer where new node should be anchored (used when inserting an
* entry in the tree)
* @parent_ret: points to entry which would have been the parent of the entry,
* containing @offset
*
- * Return a pointer to the entry that contains @offset byte address and don't change
- * @node_ret and @parent_ret.
+ * Return a pointer to the entry that contains @offset byte address.
+ *
+ * If no such entry exists, return the first entry that starts and ends after
+ * @offset if one exists, otherwise NULL.
*
- * If no such entry exists, return pointer to entry that ends before @offset
- * and fill parameters @node_ret and @parent_ret, ie. does not return NULL.
+ * If the returned entry starts at @offset, then @node_ret and @parent_ret
+ * aren't changed.
*/
static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree,
u64 offset,
@@ -282,7 +260,11 @@ static inline struct extent_state *tree_search_for_insert(struct extent_io_tree
if (parent_ret)
*parent_ret = prev;
- /* Search neighbors until we find the first one past the end */
+ /*
+ * Return either the current entry if it contains offset (it ends after
+ * or at offset) or the first entry that starts and ends after offset if
+ * one exists, or NULL.
+ */
while (entry && offset > entry->end)
entry = next_state(entry);
@@ -351,7 +333,7 @@ static void __cold extent_io_tree_panic(const struct extent_io_tree *tree,
const char *opname,
int err)
{
- btrfs_panic(extent_io_tree_to_fs_info(tree), err,
+ btrfs_panic(btrfs_extent_io_tree_to_fs_info(tree), err,
"extent io tree error on %s state start %llu end %llu",
opname, state->start, state->end);
}
@@ -362,13 +344,12 @@ static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *s
prev = prev_state(state);
if (prev && prev->end == state->start - 1 && prev->state == state->state) {
- if (is_inode_io_tree(tree))
- btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree),
- state, prev);
+ if (tree->owner == IO_TREE_INODE_IO)
+ btrfs_merge_delalloc_extent(tree->inode, state, prev);
state->start = prev->start;
rb_erase(&prev->rb_node, &tree->state);
RB_CLEAR_NODE(&prev->rb_node);
- free_extent_state(prev);
+ btrfs_free_extent_state(prev);
}
}
@@ -378,13 +359,12 @@ static void merge_next_state(struct extent_io_tree *tree, struct extent_state *s
next = next_state(state);
if (next && next->start == state->end + 1 && next->state == state->state) {
- if (is_inode_io_tree(tree))
- btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree),
- state, next);
+ if (tree->owner == IO_TREE_INODE_IO)
+ btrfs_merge_delalloc_extent(tree->inode, state, next);
state->end = next->end;
rb_erase(&next->rb_node, &tree->state);
RB_CLEAR_NODE(&next->rb_node);
- free_extent_state(next);
+ btrfs_free_extent_state(next);
}
}
@@ -413,8 +393,8 @@ static void set_state_bits(struct extent_io_tree *tree,
u32 bits_to_set = bits & ~EXTENT_CTLBITS;
int ret;
- if (is_inode_io_tree(tree))
- btrfs_set_delalloc_extent(extent_io_tree_to_inode(tree), state, bits);
+ if (tree->owner == IO_TREE_INODE_IO)
+ btrfs_set_delalloc_extent(tree->inode, state, bits);
ret = add_extent_changeset(state, bits_to_set, changeset, 1);
BUG_ON(ret < 0);
@@ -459,10 +439,9 @@ static struct extent_state *insert_state(struct extent_io_tree *tree,
if (state->end < entry->start) {
if (try_merge && end == entry->start &&
state->state == entry->state) {
- if (is_inode_io_tree(tree))
- btrfs_merge_delalloc_extent(
- extent_io_tree_to_inode(tree),
- state, entry);
+ if (tree->owner == IO_TREE_INODE_IO)
+ btrfs_merge_delalloc_extent(tree->inode,
+ state, entry);
entry->start = state->start;
merge_prev_state(tree, entry);
state->state = 0;
@@ -472,10 +451,9 @@ static struct extent_state *insert_state(struct extent_io_tree *tree,
} else if (state->end > entry->end) {
if (try_merge && entry->end == start &&
state->state == entry->state) {
- if (is_inode_io_tree(tree))
- btrfs_merge_delalloc_extent(
- extent_io_tree_to_inode(tree),
- state, entry);
+ if (tree->owner == IO_TREE_INODE_IO)
+ btrfs_merge_delalloc_extent(tree->inode,
+ state, entry);
entry->end = state->end;
merge_next_state(tree, entry);
state->state = 0;
@@ -527,9 +505,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
struct rb_node *parent = NULL;
struct rb_node **node;
- if (is_inode_io_tree(tree))
- btrfs_split_delalloc_extent(extent_io_tree_to_inode(tree), orig,
- split);
+ if (tree->owner == IO_TREE_INODE_IO)
+ btrfs_split_delalloc_extent(tree->inode, orig, split);
prealloc->start = orig->start;
prealloc->end = split - 1;
@@ -549,7 +526,7 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
} else if (prealloc->end > entry->end) {
node = &(*node)->rb_right;
} else {
- free_extent_state(prealloc);
+ btrfs_free_extent_state(prealloc);
return -EEXIST;
}
}
@@ -561,6 +538,18 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
}
/*
+ * Use this during tree iteration to avoid doing next node searches when it's
+ * not needed (the current record ends at or after the target range's end).
+ */
+static inline struct extent_state *next_search_state(struct extent_state *state, u64 end)
+{
+ if (state->end < end)
+ return next_state(state);
+
+ return NULL;
+}
+
+/*
* Utility function to clear some bits in an extent state struct. It will
* optionally wake up anyone waiting on this state (wake == 1).
*
@@ -569,16 +558,15 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
*/
static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
struct extent_state *state,
- u32 bits, int wake,
+ u32 bits, int wake, u64 end,
struct extent_changeset *changeset)
{
struct extent_state *next;
u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
int ret;
- if (is_inode_io_tree(tree))
- btrfs_clear_delalloc_extent(extent_io_tree_to_inode(tree), state,
- bits);
+ if (tree->owner == IO_TREE_INODE_IO)
+ btrfs_clear_delalloc_extent(tree->inode, state, bits);
ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
BUG_ON(ret < 0);
@@ -586,17 +574,17 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
if (wake)
wake_up(&state->wq);
if (state->state == 0) {
- next = next_state(state);
+ next = next_search_state(state, end);
if (extent_state_in_tree(state)) {
rb_erase(&state->rb_node, &tree->state);
RB_CLEAR_NODE(&state->rb_node);
- free_extent_state(state);
+ btrfs_free_extent_state(state);
} else {
WARN_ON(1);
}
} else {
merge_state(tree, state);
- next = next_state(state);
+ next = next_search_state(state, end);
}
return next;
}
@@ -620,18 +608,18 @@ static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
*
* This takes the tree lock, and returns 0 on success and < 0 on error.
*/
-int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, struct extent_state **cached_state,
- struct extent_changeset *changeset)
+int btrfs_clear_extent_bit_changeset(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, struct extent_state **cached_state,
+ struct extent_changeset *changeset)
{
struct extent_state *state;
struct extent_state *cached;
struct extent_state *prealloc = NULL;
u64 last_end;
- int err;
- int clear = 0;
- int wake;
- int delete = (bits & EXTENT_CLEAR_ALL_BITS);
+ int ret = 0;
+ bool clear;
+ bool wake;
+ const bool delete = (bits & EXTENT_CLEAR_ALL_BITS);
gfp_t mask;
set_gfp_mask_from_bits(&bits, &mask);
@@ -644,9 +632,8 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (bits & EXTENT_DELALLOC)
bits |= EXTENT_NORESERVE;
- wake = ((bits & EXTENT_LOCK_BITS) ? 1 : 0);
- if (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY))
- clear = 1;
+ wake = (bits & EXTENT_LOCK_BITS);
+ clear = (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY));
again:
if (!prealloc) {
/*
@@ -676,7 +663,7 @@ again:
goto hit_next;
}
if (clear)
- free_extent_state(cached);
+ btrfs_free_extent_state(cached);
}
/* This search will find the extents that end after our range starts. */
@@ -691,7 +678,7 @@ hit_next:
/* The state doesn't have the wanted bits, go ahead. */
if (!(state->state & bits)) {
- state = next_state(state);
+ state = next_search_state(state, end);
goto next;
}
@@ -714,18 +701,24 @@ hit_next:
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
goto search_again;
- err = split_state(tree, state, prealloc, start);
- if (err)
- extent_io_tree_panic(tree, state, "split", err);
-
+ ret = split_state(tree, state, prealloc, start);
prealloc = NULL;
- if (err)
+ if (ret) {
+ extent_io_tree_panic(tree, state, "split", ret);
goto out;
+ }
if (state->end <= end) {
- state = clear_state_bit(tree, state, bits, wake, changeset);
+ state = clear_state_bit(tree, state, bits, wake, end,
+ changeset);
goto next;
}
- goto search_again;
+ if (need_resched())
+ goto search_again;
+ /*
+ * Fallthrough and try atomic extent state allocation if needed.
+ * If it fails we'll jump to 'search_again' retry the allocation
+ * in non-atomic mode and start the search again.
+ */
}
/*
* | ---- desired range ---- |
@@ -736,30 +729,31 @@ hit_next:
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
goto search_again;
- err = split_state(tree, state, prealloc, end + 1);
- if (err)
- extent_io_tree_panic(tree, state, "split", err);
+ ret = split_state(tree, state, prealloc, end + 1);
+ if (ret) {
+ extent_io_tree_panic(tree, state, "split", ret);
+ prealloc = NULL;
+ goto out;
+ }
if (wake)
wake_up(&state->wq);
- clear_state_bit(tree, prealloc, bits, wake, changeset);
+ clear_state_bit(tree, prealloc, bits, wake, end, changeset);
prealloc = NULL;
goto out;
}
- state = clear_state_bit(tree, state, bits, wake, changeset);
+ state = clear_state_bit(tree, state, bits, wake, end, changeset);
next:
- if (last_end == (u64)-1)
+ if (last_end >= end)
goto out;
start = last_end + 1;
- if (start <= end && state && !need_resched())
+ if (state && !need_resched())
goto hit_next;
search_again:
- if (start > end)
- goto out;
spin_unlock(&tree->lock);
if (gfpflags_allow_blocking(mask))
cond_resched();
@@ -767,10 +761,9 @@ search_again:
out:
spin_unlock(&tree->lock);
- if (prealloc)
- free_extent_state(prealloc);
+ btrfs_free_extent_state(prealloc);
- return 0;
+ return ret;
}
@@ -820,7 +813,7 @@ process_node:
schedule();
spin_lock(&tree->lock);
finish_wait(&state->wq, &wait);
- free_extent_state(state);
+ btrfs_free_extent_state(state);
goto again;
}
start = state->end + 1;
@@ -838,7 +831,7 @@ out:
if (cached_state && *cached_state) {
state = *cached_state;
*cached_state = NULL;
- free_extent_state(state);
+ btrfs_free_extent_state(state);
}
spin_unlock(&tree->lock);
}
@@ -877,7 +870,7 @@ static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *t
*/
state = tree_search(tree, start);
while (state) {
- if (state->end >= start && (state->state & bits))
+ if (state->state & bits)
return state;
state = next_state(state);
}
@@ -892,9 +885,9 @@ static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *t
* Return true if we find something, and update @start_ret and @end_ret.
* Return false if we found nothing.
*/
-bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, u32 bits,
- struct extent_state **cached_state)
+bool btrfs_find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, u32 bits,
+ struct extent_state **cached_state)
{
struct extent_state *state;
bool ret = false;
@@ -914,13 +907,13 @@ bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
* again. If we haven't found any, clear as well since
* it's now useless.
*/
- free_extent_state(*cached_state);
+ btrfs_free_extent_state(*cached_state);
*cached_state = NULL;
if (state)
goto got_it;
goto out;
}
- free_extent_state(*cached_state);
+ btrfs_free_extent_state(*cached_state);
*cached_state = NULL;
}
@@ -952,14 +945,17 @@ out:
* contiguous area for given bits. We will search to the first bit we find, and
* then walk down the tree until we find a non-contiguous area. The area
* returned will be the full contiguous area with the bits set.
+ *
+ * Returns true if we found a range with the given bits set, in which case
+ * @start_ret and @end_ret are updated, or false if no range was found.
*/
-int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, u32 bits)
+bool btrfs_find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, u32 bits)
{
struct extent_state *state;
- int ret = 1;
+ bool ret = false;
- ASSERT(!btrfs_fs_incompat(extent_io_tree_to_fs_info(tree), NO_HOLES));
+ ASSERT(!btrfs_fs_incompat(btrfs_extent_io_tree_to_fs_info(tree), NO_HOLES));
spin_lock(&tree->lock);
state = find_first_extent_bit_state(tree, start, bits);
@@ -971,7 +967,7 @@ int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
break;
*end_ret = state->end;
}
- ret = 0;
+ ret = true;
}
spin_unlock(&tree->lock);
return ret;
@@ -1046,11 +1042,11 @@ out:
*
* [start, end] is inclusive This takes the tree lock.
*/
-static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, u64 *failed_start,
- struct extent_state **failed_state,
- struct extent_state **cached_state,
- struct extent_changeset *changeset)
+static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, u64 *failed_start,
+ struct extent_state **failed_state,
+ struct extent_state **cached_state,
+ struct extent_changeset *changeset)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
@@ -1129,12 +1125,11 @@ hit_next:
set_state_bits(tree, state, bits, changeset);
cache_state(state, cached_state);
merge_state(tree, state);
- if (last_end == (u64)-1)
+ if (last_end >= end)
goto out;
start = last_end + 1;
state = next_state(state);
- if (start < end && state && state->start == start &&
- !need_resched())
+ if (state && state->start == start && !need_resched())
goto hit_next;
goto search_again;
}
@@ -1186,12 +1181,11 @@ hit_next:
set_state_bits(tree, state, bits, changeset);
cache_state(state, cached_state);
merge_state(tree, state);
- if (last_end == (u64)-1)
+ if (last_end >= end)
goto out;
start = last_end + 1;
state = next_state(state);
- if (start < end && state && state->start == start &&
- !need_resched())
+ if (state && state->start == start && !need_resched())
goto hit_next;
}
goto search_again;
@@ -1204,14 +1198,8 @@ hit_next:
* extent we found.
*/
if (state->start > start) {
- u64 this_end;
struct extent_state *inserted_state;
- if (end < last_start)
- this_end = end;
- else
- this_end = last_start - 1;
-
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
goto search_again;
@@ -1221,17 +1209,38 @@ hit_next:
* extent.
*/
prealloc->start = start;
- prealloc->end = this_end;
+ if (end < last_start)
+ prealloc->end = end;
+ else
+ prealloc->end = last_start - 1;
+
inserted_state = insert_state(tree, prealloc, bits, changeset);
if (IS_ERR(inserted_state)) {
ret = PTR_ERR(inserted_state);
extent_io_tree_panic(tree, prealloc, "insert", ret);
+ goto out;
}
cache_state(inserted_state, cached_state);
if (inserted_state == prealloc)
prealloc = NULL;
- start = this_end + 1;
+ start = inserted_state->end + 1;
+
+ /* Beyond target range, stop. */
+ if (start > end)
+ goto out;
+
+ if (need_resched())
+ goto search_again;
+
+ state = next_search_state(inserted_state, end);
+ /*
+ * If there's a next state, whether contiguous or not, we don't
+ * need to unlock and start search agian. If it's not contiguous
+ * we will end up here and try to allocate a prealloc state and insert.
+ */
+ if (state)
+ goto hit_next;
goto search_again;
}
/*
@@ -1252,8 +1261,11 @@ hit_next:
if (!prealloc)
goto search_again;
ret = split_state(tree, state, prealloc, end + 1);
- if (ret)
+ if (ret) {
extent_io_tree_panic(tree, state, "split", ret);
+ prealloc = NULL;
+ goto out;
+ }
set_state_bits(tree, prealloc, bits, changeset);
cache_state(prealloc, cached_state);
@@ -1272,18 +1284,16 @@ search_again:
out:
spin_unlock(&tree->lock);
- if (prealloc)
- free_extent_state(prealloc);
+ btrfs_free_extent_state(prealloc);
return ret;
}
-int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, struct extent_state **cached_state)
+int btrfs_set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, struct extent_state **cached_state)
{
- return __set_extent_bit(tree, start, end, bits, NULL, NULL,
- cached_state, NULL);
+ return set_extent_bit(tree, start, end, bits, NULL, NULL, cached_state, NULL);
}
/*
@@ -1304,9 +1314,9 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
*
* All allocations are done with GFP_NOFS.
*/
-int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, u32 clear_bits,
- struct extent_state **cached_state)
+int btrfs_convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, u32 clear_bits,
+ struct extent_state **cached_state)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
@@ -1374,12 +1384,11 @@ hit_next:
if (state->start == start && state->end <= end) {
set_state_bits(tree, state, bits, NULL);
cache_state(state, cached_state);
- state = clear_state_bit(tree, state, clear_bits, 0, NULL);
- if (last_end == (u64)-1)
+ state = clear_state_bit(tree, state, clear_bits, 0, end, NULL);
+ if (last_end >= end)
goto out;
start = last_end + 1;
- if (start < end && state && state->start == start &&
- !need_resched())
+ if (state && state->start == start && !need_resched())
goto hit_next;
goto search_again;
}
@@ -1406,20 +1415,19 @@ hit_next:
goto out;
}
ret = split_state(tree, state, prealloc, start);
- if (ret)
- extent_io_tree_panic(tree, state, "split", ret);
prealloc = NULL;
- if (ret)
+ if (ret) {
+ extent_io_tree_panic(tree, state, "split", ret);
goto out;
+ }
if (state->end <= end) {
set_state_bits(tree, state, bits, NULL);
cache_state(state, cached_state);
- state = clear_state_bit(tree, state, clear_bits, 0, NULL);
- if (last_end == (u64)-1)
+ state = clear_state_bit(tree, state, clear_bits, 0, end, NULL);
+ if (last_end >= end)
goto out;
start = last_end + 1;
- if (start < end && state && state->start == start &&
- !need_resched())
+ if (state && state->start == start && !need_resched())
goto hit_next;
}
goto search_again;
@@ -1432,14 +1440,8 @@ hit_next:
* extent we found.
*/
if (state->start > start) {
- u64 this_end;
struct extent_state *inserted_state;
- if (end < last_start)
- this_end = end;
- else
- this_end = last_start - 1;
-
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc) {
ret = -ENOMEM;
@@ -1451,16 +1453,37 @@ hit_next:
* extent.
*/
prealloc->start = start;
- prealloc->end = this_end;
+ if (end < last_start)
+ prealloc->end = end;
+ else
+ prealloc->end = last_start - 1;
+
inserted_state = insert_state(tree, prealloc, bits, NULL);
if (IS_ERR(inserted_state)) {
ret = PTR_ERR(inserted_state);
extent_io_tree_panic(tree, prealloc, "insert", ret);
+ goto out;
}
cache_state(inserted_state, cached_state);
if (inserted_state == prealloc)
prealloc = NULL;
- start = this_end + 1;
+ start = inserted_state->end + 1;
+
+ /* Beyond target range, stop. */
+ if (start > end)
+ goto out;
+
+ if (need_resched())
+ goto search_again;
+
+ state = next_search_state(inserted_state, end);
+ /*
+ * If there's a next state, whether contiguous or not, we don't
+ * need to unlock and start search again. If it's not contiguous
+ * we will end up here and try to allocate a prealloc state and insert.
+ */
+ if (state)
+ goto hit_next;
goto search_again;
}
/*
@@ -1477,12 +1500,15 @@ hit_next:
}
ret = split_state(tree, state, prealloc, end + 1);
- if (ret)
+ if (ret) {
extent_io_tree_panic(tree, state, "split", ret);
+ prealloc = NULL;
+ goto out;
+ }
set_state_bits(tree, prealloc, bits, NULL);
cache_state(prealloc, cached_state);
- clear_state_bit(tree, prealloc, clear_bits, 0, NULL);
+ clear_state_bit(tree, prealloc, clear_bits, 0, end, NULL);
prealloc = NULL;
goto out;
}
@@ -1497,8 +1523,7 @@ search_again:
out:
spin_unlock(&tree->lock);
- if (prealloc)
- free_extent_state(prealloc);
+ btrfs_free_extent_state(prealloc);
return ret;
}
@@ -1518,8 +1543,8 @@ out:
* spans (last_range_end, end of device]. In this case it's up to the caller to
* trim @end_ret to the appropriate size.
*/
-void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, u32 bits)
+void btrfs_find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, u32 bits)
{
struct extent_state *state;
struct extent_state *prev = NULL, *next = NULL;
@@ -1636,10 +1661,10 @@ out:
* all given bits set. If the returned number of bytes is greater than zero
* then @start is updated with the offset of the first byte with the bits set.
*/
-u64 count_range_bits(struct extent_io_tree *tree,
- u64 *start, u64 search_end, u64 max_bytes,
- u32 bits, int contig,
- struct extent_state **cached_state)
+u64 btrfs_count_range_bits(struct extent_io_tree *tree,
+ u64 *start, u64 search_end, u64 max_bytes,
+ u32 bits, int contig,
+ struct extent_state **cached_state)
{
struct extent_state *state = NULL;
struct extent_state *cached;
@@ -1710,7 +1735,7 @@ search:
}
if (cached_state) {
- free_extent_state(*cached_state);
+ btrfs_free_extent_state(*cached_state);
*cached_state = state;
if (state)
refcount_inc(&state->refs);
@@ -1724,16 +1749,16 @@ search:
/*
* Check if the single @bit exists in the given range.
*/
-bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit)
+bool btrfs_test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit)
{
- struct extent_state *state = NULL;
+ struct extent_state *state;
bool bitset = false;
ASSERT(is_power_of_2(bit));
spin_lock(&tree->lock);
state = tree_search(tree, start);
- while (state && start <= end) {
+ while (state) {
if (state->start > end)
break;
@@ -1742,9 +1767,7 @@ bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32
break;
}
- /* If state->end is (u64)-1, start will overflow to 0 */
- start = state->end + 1;
- if (start > end || start == 0)
+ if (state->end >= end)
break;
state = next_state(state);
}
@@ -1752,16 +1775,51 @@ bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32
return bitset;
}
+void btrfs_get_range_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 *bits,
+ struct extent_state **cached_state)
+{
+ struct extent_state *state;
+
+ /*
+ * The cached state is currently mandatory and not used to start the
+ * search, only to cache the first state record found in the range.
+ */
+ ASSERT(cached_state != NULL);
+ ASSERT(*cached_state == NULL);
+
+ *bits = 0;
+
+ spin_lock(&tree->lock);
+ state = tree_search(tree, start);
+ if (state && state->start < end) {
+ *cached_state = state;
+ refcount_inc(&state->refs);
+ }
+ while (state) {
+ if (state->start > end)
+ break;
+
+ *bits |= state->state;
+
+ if (state->end >= end)
+ break;
+
+ state = next_state(state);
+ }
+ spin_unlock(&tree->lock);
+}
+
/*
* Check if the whole range [@start,@end) contains the single @bit set.
*/
-bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit,
- struct extent_state *cached)
+bool btrfs_test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit,
+ struct extent_state *cached)
{
- struct extent_state *state = NULL;
+ struct extent_state *state;
bool bitset = true;
ASSERT(is_power_of_2(bit));
+ ASSERT(start < end);
spin_lock(&tree->lock);
if (cached && extent_state_in_tree(cached) && cached->start <= start &&
@@ -1769,30 +1827,22 @@ bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit,
state = cached;
else
state = tree_search(tree, start);
- while (state && start <= end) {
+ while (state) {
if (state->start > start) {
bitset = false;
break;
}
- if (state->start > end)
- break;
-
if ((state->state & bit) == 0) {
bitset = false;
break;
}
- if (state->end == (u64)-1)
+ if (state->end >= end)
break;
- /*
- * Last entry (if state->end is (u64)-1 and overflow happens),
- * or next entry starts after the range.
- */
+ /* Next state must start where this one ends. */
start = state->end + 1;
- if (start > end || start == 0)
- break;
state = next_state(state);
}
@@ -1804,8 +1854,8 @@ bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit,
}
/* Wrappers around set/clear extent bit */
-int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, struct extent_changeset *changeset)
+int btrfs_set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, struct extent_changeset *changeset)
{
/*
* We don't support EXTENT_LOCK_BITS yet, as current changeset will
@@ -1814,11 +1864,11 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
*/
ASSERT(!(bits & EXTENT_LOCK_BITS));
- return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
+ return set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
}
-int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, struct extent_changeset *changeset)
+int btrfs_clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, struct extent_changeset *changeset)
{
/*
* Don't support EXTENT_LOCK_BITS case, same reason as
@@ -1826,20 +1876,21 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
*/
ASSERT(!(bits & EXTENT_LOCK_BITS));
- return __clear_extent_bit(tree, start, end, bits, NULL, changeset);
+ return btrfs_clear_extent_bit_changeset(tree, start, end, bits, NULL, changeset);
}
-bool __try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
- struct extent_state **cached)
+bool btrfs_try_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, struct extent_state **cached)
{
int err;
u64 failed_start;
- err = __set_extent_bit(tree, start, end, bits, &failed_start,
- NULL, cached, NULL);
+ err = set_extent_bit(tree, start, end, bits, &failed_start, NULL,
+ cached, NULL);
if (err == -EEXIST) {
if (failed_start > start)
- clear_extent_bit(tree, start, failed_start - 1, bits, cached);
+ btrfs_clear_extent_bit(tree, start, failed_start - 1,
+ bits, cached);
return 0;
}
return 1;
@@ -1849,35 +1900,54 @@ bool __try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits
* Either insert or lock state struct between start and end use mask to tell
* us if waiting is desired.
*/
-int __lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
- struct extent_state **cached_state)
+int btrfs_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
+ struct extent_state **cached_state)
{
struct extent_state *failed_state = NULL;
int err;
u64 failed_start;
- err = __set_extent_bit(tree, start, end, bits, &failed_start,
- &failed_state, cached_state, NULL);
+ err = set_extent_bit(tree, start, end, bits, &failed_start,
+ &failed_state, cached_state, NULL);
while (err == -EEXIST) {
if (failed_start != start)
- clear_extent_bit(tree, start, failed_start - 1,
- bits, cached_state);
+ btrfs_clear_extent_bit(tree, start, failed_start - 1,
+ bits, cached_state);
wait_extent_bit(tree, failed_start, end, bits, &failed_state);
- err = __set_extent_bit(tree, start, end, bits,
- &failed_start, &failed_state,
- cached_state, NULL);
+ err = set_extent_bit(tree, start, end, bits, &failed_start,
+ &failed_state, cached_state, NULL);
}
return err;
}
-void __cold extent_state_free_cachep(void)
+/*
+ * Get the extent state that follows the given extent state.
+ * This is meant to be used in a context where we know no other tasks can
+ * concurrently modify the tree.
+ */
+struct extent_state *btrfs_next_extent_state(struct extent_io_tree *tree,
+ struct extent_state *state)
+{
+ struct extent_state *next;
+
+ spin_lock(&tree->lock);
+ ASSERT(extent_state_in_tree(state));
+ next = next_state(state);
+ if (next)
+ refcount_inc(&next->refs);
+ spin_unlock(&tree->lock);
+
+ return next;
+}
+
+void __cold btrfs_extent_state_free_cachep(void)
{
btrfs_extent_state_leak_debug_check();
kmem_cache_destroy(extent_state_cache);
}
-int __init extent_state_init_cachep(void)
+int __init btrfs_extent_state_init_cachep(void)
{
extent_state_cache = kmem_cache_create("btrfs_extent_state",
sizeof(struct extent_state), 0, 0,
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index 6ffef1cd37c1..0a18ca9c59c3 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -17,7 +17,6 @@ struct btrfs_inode;
/* Bits for the extent state */
enum {
ENUM_BIT(EXTENT_DIRTY),
- ENUM_BIT(EXTENT_UPTODATE),
ENUM_BIT(EXTENT_LOCKED),
ENUM_BIT(EXTENT_DIO_LOCKED),
ENUM_BIT(EXTENT_NEW),
@@ -39,6 +38,11 @@ enum {
*/
ENUM_BIT(EXTENT_DELALLOC_NEW),
/*
+ * Mark that a range is being locked for finishing an ordered extent.
+ * Used together with EXTENT_LOCKED.
+ */
+ ENUM_BIT(EXTENT_FINISHING_ORDERED),
+ /*
* When an ordered extent successfully completes for a region marked as
* a new delalloc range, use this flag when clearing a new delalloc
* range to indicate that the VFS' inode number of bytes should be
@@ -130,117 +134,116 @@ struct extent_state {
#endif
};
-struct btrfs_inode *extent_io_tree_to_inode(struct extent_io_tree *tree);
-const struct btrfs_inode *extent_io_tree_to_inode_const(const struct extent_io_tree *tree);
-const struct btrfs_fs_info *extent_io_tree_to_fs_info(const struct extent_io_tree *tree);
+const struct btrfs_inode *btrfs_extent_io_tree_to_inode(const struct extent_io_tree *tree);
+const struct btrfs_fs_info *btrfs_extent_io_tree_to_fs_info(const struct extent_io_tree *tree);
-void extent_io_tree_init(struct btrfs_fs_info *fs_info,
- struct extent_io_tree *tree, unsigned int owner);
-void extent_io_tree_release(struct extent_io_tree *tree);
-int __lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
- struct extent_state **cached);
-bool __try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
- struct extent_state **cached);
+void btrfs_extent_io_tree_init(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *tree, unsigned int owner);
+void btrfs_extent_io_tree_release(struct extent_io_tree *tree);
+int btrfs_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
+ struct extent_state **cached);
+bool btrfs_try_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, struct extent_state **cached);
-static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached)
+static inline int btrfs_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
+ struct extent_state **cached)
{
- return __lock_extent(tree, start, end, EXTENT_LOCKED, cached);
+ return btrfs_lock_extent_bits(tree, start, end, EXTENT_LOCKED, cached);
}
-static inline bool try_lock_extent(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached)
+static inline bool btrfs_try_lock_extent(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached)
{
- return __try_lock_extent(tree, start, end, EXTENT_LOCKED, cached);
+ return btrfs_try_lock_extent_bits(tree, start, end, EXTENT_LOCKED, cached);
}
-int __init extent_state_init_cachep(void);
-void __cold extent_state_free_cachep(void);
-
-u64 count_range_bits(struct extent_io_tree *tree,
- u64 *start, u64 search_end,
- u64 max_bytes, u32 bits, int contig,
- struct extent_state **cached_state);
-
-void free_extent_state(struct extent_state *state);
-bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit,
- struct extent_state *cached_state);
-bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit);
-int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, struct extent_changeset *changeset);
-int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, struct extent_state **cached,
- struct extent_changeset *changeset);
-
-static inline int clear_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 end, u32 bits,
- struct extent_state **cached)
-{
- return __clear_extent_bit(tree, start, end, bits, cached, NULL);
-}
+int __init btrfs_extent_state_init_cachep(void);
+void __cold btrfs_extent_state_free_cachep(void);
+
+u64 btrfs_count_range_bits(struct extent_io_tree *tree,
+ u64 *start, u64 search_end,
+ u64 max_bytes, u32 bits, int contig,
+ struct extent_state **cached_state);
-static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached)
+void btrfs_free_extent_state(struct extent_state *state);
+bool btrfs_test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit,
+ struct extent_state *cached_state);
+bool btrfs_test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit);
+void btrfs_get_range_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 *bits,
+ struct extent_state **cached_state);
+int btrfs_clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, struct extent_changeset *changeset);
+int btrfs_clear_extent_bit_changeset(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, struct extent_state **cached,
+ struct extent_changeset *changeset);
+
+static inline int btrfs_clear_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 end, u32 bits,
+ struct extent_state **cached)
{
- return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached, NULL);
+ return btrfs_clear_extent_bit_changeset(tree, start, end, bits, cached, NULL);
}
-static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
- u64 end, u32 bits)
+static inline int btrfs_unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
+ struct extent_state **cached)
{
- return clear_extent_bit(tree, start, end, bits, NULL);
+ return btrfs_clear_extent_bit_changeset(tree, start, end, EXTENT_LOCKED,
+ cached, NULL);
}
-int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, struct extent_changeset *changeset);
-int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, struct extent_state **cached_state);
-
-static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached_state)
+static inline int btrfs_clear_extent_bits(struct extent_io_tree *tree, u64 start,
+ u64 end, u32 bits)
{
- return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
- cached_state, NULL);
+ return btrfs_clear_extent_bit(tree, start, end, bits, NULL);
}
-static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached)
+int btrfs_set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, struct extent_changeset *changeset);
+int btrfs_set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, struct extent_state **cached_state);
+
+static inline int btrfs_clear_extent_dirty(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached)
{
- return clear_extent_bit(tree, start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING, cached);
+ return btrfs_clear_extent_bit(tree, start, end,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, cached);
}
-int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, u32 clear_bits,
- struct extent_state **cached_state);
-
-bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, u32 bits,
- struct extent_state **cached_state);
-void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, u32 bits);
-int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, u32 bits);
+int btrfs_convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, u32 clear_bits,
+ struct extent_state **cached_state);
+
+bool btrfs_find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, u32 bits,
+ struct extent_state **cached_state);
+void btrfs_find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, u32 bits);
+bool btrfs_find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, u32 bits);
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
u64 *end, u64 max_bytes,
struct extent_state **cached_state);
-static inline int lock_dio_extent(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached)
+static inline int btrfs_lock_dio_extent(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached)
{
- return __lock_extent(tree, start, end, EXTENT_DIO_LOCKED, cached);
+ return btrfs_lock_extent_bits(tree, start, end, EXTENT_DIO_LOCKED, cached);
}
-static inline bool try_lock_dio_extent(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached)
+static inline bool btrfs_try_lock_dio_extent(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached)
{
- return __try_lock_extent(tree, start, end, EXTENT_DIO_LOCKED, cached);
+ return btrfs_try_lock_extent_bits(tree, start, end, EXTENT_DIO_LOCKED, cached);
}
-static inline int unlock_dio_extent(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached)
+static inline int btrfs_unlock_dio_extent(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached)
{
- return __clear_extent_bit(tree, start, end, EXTENT_DIO_LOCKED, cached, NULL);
+ return btrfs_clear_extent_bit_changeset(tree, start, end, EXTENT_DIO_LOCKED,
+ cached, NULL);
}
+struct extent_state *btrfs_next_extent_state(struct extent_io_tree *tree,
+ struct extent_state *state);
+
#endif /* BTRFS_EXTENT_IO_TREE_H */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 957230abd827..cb6128778a83 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -409,15 +409,15 @@ static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
btrfs_extent_data_ref_offset(leaf, ref));
}
-static int match_extent_data_ref(struct extent_buffer *leaf,
- struct btrfs_extent_data_ref *ref,
- u64 root_objectid, u64 owner, u64 offset)
+static bool match_extent_data_ref(struct extent_buffer *leaf,
+ struct btrfs_extent_data_ref *ref,
+ u64 root_objectid, u64 owner, u64 offset)
{
if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
btrfs_extent_data_ref_offset(leaf, ref) != offset)
- return 0;
- return 1;
+ return false;
+ return true;
}
static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
@@ -2006,7 +2006,12 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
if (min_bytes == 0) {
- max_count = delayed_refs->num_heads_ready;
+ /*
+ * We may be subject to a harmless race if some task is
+ * concurrently adding or removing a delayed ref, so silence
+ * KCSAN and similar tools.
+ */
+ max_count = data_race(delayed_refs->num_heads_ready);
min_bytes = U64_MAX;
}
@@ -2598,8 +2603,8 @@ static int pin_down_extent(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
- set_extent_bit(&trans->transaction->pinned_extents, bytenr,
- bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
+ btrfs_set_extent_bit(&trans->transaction->pinned_extents, bytenr,
+ bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
return 0;
}
@@ -2818,34 +2823,63 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group *block_group, *tmp;
struct list_head *deleted_bgs;
- struct extent_io_tree *unpin;
+ struct extent_io_tree *unpin = &trans->transaction->pinned_extents;
+ struct extent_state *cached_state = NULL;
u64 start;
u64 end;
+ int unpin_error = 0;
int ret;
- unpin = &trans->transaction->pinned_extents;
+ mutex_lock(&fs_info->unused_bg_unpin_mutex);
+ btrfs_find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY, &cached_state);
- while (!TRANS_ABORTED(trans)) {
- struct extent_state *cached_state = NULL;
-
- mutex_lock(&fs_info->unused_bg_unpin_mutex);
- if (!find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY, &cached_state)) {
- mutex_unlock(&fs_info->unused_bg_unpin_mutex);
- break;
- }
+ while (!TRANS_ABORTED(trans) && cached_state) {
+ struct extent_state *next_state;
if (btrfs_test_opt(fs_info, DISCARD_SYNC))
ret = btrfs_discard_extent(fs_info, start,
end + 1 - start, NULL);
- clear_extent_dirty(unpin, start, end, &cached_state);
+ next_state = btrfs_next_extent_state(unpin, cached_state);
+ btrfs_clear_extent_dirty(unpin, start, end, &cached_state);
ret = unpin_extent_range(fs_info, start, end, true);
- BUG_ON(ret);
- mutex_unlock(&fs_info->unused_bg_unpin_mutex);
- free_extent_state(cached_state);
- cond_resched();
+ /*
+ * If we get an error unpinning an extent range, store the first
+ * error to return later after trying to unpin all ranges and do
+ * the sync discards. Our caller will abort the transaction
+ * (which already wrote new superblocks) and on the next mount
+ * the space will be available as it was pinned by in-memory
+ * only structures in this phase.
+ */
+ if (ret) {
+ btrfs_err_rl(fs_info,
+"failed to unpin extent range [%llu, %llu] when committing transaction %llu: %s (%d)",
+ start, end, trans->transid,
+ btrfs_decode_error(ret), ret);
+ if (!unpin_error)
+ unpin_error = ret;
+ }
+
+ btrfs_free_extent_state(cached_state);
+
+ if (need_resched()) {
+ btrfs_free_extent_state(next_state);
+ mutex_unlock(&fs_info->unused_bg_unpin_mutex);
+ cond_resched();
+ cached_state = NULL;
+ mutex_lock(&fs_info->unused_bg_unpin_mutex);
+ btrfs_find_first_extent_bit(unpin, 0, &start, &end,
+ EXTENT_DIRTY, &cached_state);
+ } else {
+ cached_state = next_state;
+ if (cached_state) {
+ start = cached_state->start;
+ end = cached_state->end;
+ }
+ }
}
+ mutex_unlock(&fs_info->unused_bg_unpin_mutex);
+ btrfs_free_extent_state(cached_state);
if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
btrfs_discard_calc_delay(&fs_info->discard_ctl);
@@ -2859,14 +2893,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
*/
deleted_bgs = &trans->transaction->deleted_bgs;
list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
- u64 trimmed = 0;
-
ret = -EROFS;
if (!TRANS_ABORTED(trans))
- ret = btrfs_discard_extent(fs_info,
- block_group->start,
- block_group->length,
- &trimmed);
+ ret = btrfs_discard_extent(fs_info, block_group->start,
+ block_group->length, NULL);
/*
* Not strictly necessary to lock, as the block_group should be
@@ -2888,7 +2918,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
}
}
- return 0;
+ return unpin_error;
}
/*
@@ -3483,17 +3513,11 @@ int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
btrfs_add_free_space(bg, buf->start, buf->len);
- btrfs_free_reserved_bytes(bg, buf->len, 0);
+ btrfs_free_reserved_bytes(bg, buf->len, false);
btrfs_put_block_group(bg);
trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
out:
-
- /*
- * Deleting the buffer, clear the corrupt flag since it doesn't
- * matter anymore.
- */
- clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
return 0;
}
@@ -4111,6 +4135,7 @@ static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
struct btrfs_key *ins,
struct find_free_extent_ctl *ffe_ctl,
+ struct btrfs_space_info *space_info,
bool full_search)
{
struct btrfs_root *root = fs_info->chunk_root;
@@ -4165,7 +4190,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
return ret;
}
- ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
+ ret = btrfs_chunk_alloc(trans, space_info, ffe_ctl->flags,
CHUNK_ALLOC_FORCE_FOR_EXTENT);
/* Do not bail out on ENOSPC since we can do more. */
@@ -4382,11 +4407,22 @@ static noinline int find_free_extent(struct btrfs_root *root,
ins->objectid = 0;
ins->offset = 0;
- trace_find_free_extent(root, ffe_ctl);
+ trace_btrfs_find_free_extent(root, ffe_ctl);
space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
+ if (btrfs_is_zoned(fs_info) && space_info) {
+ /* Use dedicated sub-space_info for dedicated block group users. */
+ if (ffe_ctl->for_data_reloc) {
+ space_info = space_info->sub_group[0];
+ ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
+ } else if (ffe_ctl->for_treelog) {
+ space_info = space_info->sub_group[0];
+ ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_TREELOG);
+ }
+ }
if (!space_info) {
- btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
+ btrfs_err(fs_info, "no space info for %llu, tree-log %d, relocation %d",
+ ffe_ctl->flags, ffe_ctl->for_treelog, ffe_ctl->for_data_reloc);
return -ENOSPC;
}
@@ -4408,6 +4444,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
* picked out then we don't care that the block group is cached.
*/
if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
+ block_group->space_info == space_info &&
block_group->cached != BTRFS_CACHE_NO) {
down_read(&space_info->groups_sem);
if (list_empty(&block_group->list) ||
@@ -4433,7 +4470,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
}
}
search:
- trace_find_free_extent_search_loop(root, ffe_ctl);
+ trace_btrfs_find_free_extent_search_loop(root, ffe_ctl);
ffe_ctl->have_caching_bg = false;
if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
ffe_ctl->index == 0)
@@ -4485,7 +4522,7 @@ search:
}
have_block_group:
- trace_find_free_extent_have_block_group(root, ffe_ctl, block_group);
+ trace_btrfs_find_free_extent_have_block_group(root, ffe_ctl, block_group);
ffe_ctl->cached = btrfs_block_group_done(block_group);
if (unlikely(!ffe_ctl->cached)) {
ffe_ctl->have_caching_bg = true;
@@ -4578,7 +4615,8 @@ loop:
}
up_read(&space_info->groups_sem);
- ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
+ ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, space_info,
+ full_search);
if (ret > 0)
goto search;
@@ -4700,8 +4738,8 @@ again:
return ret;
}
-int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
- u64 start, u64 len, int delalloc)
+int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len,
+ bool is_delalloc)
{
struct btrfs_block_group *cache;
@@ -4713,7 +4751,7 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
}
btrfs_add_free_space(cache, start, len);
- btrfs_free_reserved_bytes(cache, len, delalloc);
+ btrfs_free_reserved_bytes(cache, len, is_delalloc);
trace_btrfs_reserved_extent_free(fs_info, start, len);
btrfs_put_block_group(cache);
@@ -5071,17 +5109,17 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* EXTENT bit to differentiate dirty pages.
*/
if (buf->log_index == 0)
- set_extent_bit(&root->dirty_log_pages, buf->start,
- buf->start + buf->len - 1,
- EXTENT_DIRTY, NULL);
+ btrfs_set_extent_bit(&root->dirty_log_pages, buf->start,
+ buf->start + buf->len - 1,
+ EXTENT_DIRTY, NULL);
else
- set_extent_bit(&root->dirty_log_pages, buf->start,
- buf->start + buf->len - 1,
- EXTENT_NEW, NULL);
+ btrfs_set_extent_bit(&root->dirty_log_pages, buf->start,
+ buf->start + buf->len - 1,
+ EXTENT_NEW, NULL);
} else {
buf->log_index = -1;
- set_extent_bit(&trans->transaction->dirty_pages, buf->start,
- buf->start + buf->len - 1, EXTENT_DIRTY, NULL);
+ btrfs_set_extent_bit(&trans->transaction->dirty_pages, buf->start,
+ buf->start + buf->len - 1, EXTENT_DIRTY, NULL);
}
/* this returns a buffer locked for blocking */
return buf;
@@ -5187,7 +5225,7 @@ out_free_buf:
btrfs_tree_unlock(buf);
free_extent_buffer(buf);
out_free_reserved:
- btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
+ btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, false);
out_unuse:
btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
return ERR_PTR(ret);
@@ -6397,13 +6435,13 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
if (ret)
break;
- find_first_clear_extent_bit(&device->alloc_state, start,
- &start, &end,
- CHUNK_TRIMMED | CHUNK_ALLOCATED);
+ btrfs_find_first_clear_extent_bit(&device->alloc_state, start,
+ &start, &end,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED);
/* Check if there are any CHUNK_* bits left */
if (start > device->total_bytes) {
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ DEBUG_WARN();
btrfs_warn_in_rcu(fs_info,
"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
start, end - start + 1,
@@ -6436,8 +6474,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
ret = btrfs_issue_discard(device->bdev, start, len,
&bytes);
if (!ret)
- set_extent_bit(&device->alloc_state, start,
- start + bytes - 1, CHUNK_TRIMMED, NULL);
+ btrfs_set_extent_bit(&device->alloc_state, start,
+ start + bytes - 1, CHUNK_TRIMMED, NULL);
mutex_unlock(&fs_info->chunk_mutex);
if (ret)
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index 0ed682d9ed7b..72914074c304 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -149,8 +149,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
u64 btrfs_get_extent_owner_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf, int slot);
-int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
- u64 start, u64 len, int delalloc);
+int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len,
+ bool is_delalloc);
int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans,
const struct extent_buffer *eb);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 13bdd60da3c7..e43f6280f954 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -96,6 +96,8 @@ void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
*/
struct btrfs_bio_ctrl {
struct btrfs_bio *bbio;
+ /* Last byte contained in bbio + 1 . */
+ loff_t next_file_offset;
enum btrfs_compression_type compress_type;
u32 len_to_oe_boundary;
blk_opf_t opf;
@@ -221,22 +223,17 @@ static void __process_folios_contig(struct address_space *mapping,
}
static noinline void unlock_delalloc_folio(const struct inode *inode,
- const struct folio *locked_folio,
+ struct folio *locked_folio,
u64 start, u64 end)
{
- unsigned long index = start >> PAGE_SHIFT;
- unsigned long end_index = end >> PAGE_SHIFT;
-
ASSERT(locked_folio);
- if (index == locked_folio->index && end_index == index)
- return;
__process_folios_contig(inode->i_mapping, locked_folio, start, end,
PAGE_UNLOCK);
}
static noinline int lock_delalloc_folios(struct inode *inode,
- const struct folio *locked_folio,
+ struct folio *locked_folio,
u64 start, u64 end)
{
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
@@ -246,9 +243,6 @@ static noinline int lock_delalloc_folios(struct inode *inode,
u64 processed_end = start;
struct folio_batch fbatch;
- if (index == locked_folio->index && index == end_index)
- return 0;
-
folio_batch_init(&fbatch);
while (index <= end_index) {
unsigned int found_folios, i;
@@ -340,7 +334,7 @@ again:
/* @delalloc_end can be -1, never go beyond @orig_end */
*end = min(delalloc_end, orig_end);
- free_extent_state(cached_state);
+ btrfs_free_extent_state(cached_state);
return false;
}
@@ -366,7 +360,7 @@ again:
/* some of the folios are gone, lets avoid looping by
* shortening the size of the delalloc range we're searching
*/
- free_extent_state(cached_state);
+ btrfs_free_extent_state(cached_state);
cached_state = NULL;
if (!loops) {
max_bytes = PAGE_SIZE;
@@ -379,13 +373,13 @@ again:
}
/* step three, lock the state bits for the whole range */
- lock_extent(tree, delalloc_start, delalloc_end, &cached_state);
+ btrfs_lock_extent(tree, delalloc_start, delalloc_end, &cached_state);
/* then test to make sure it is all still delalloc */
- ret = test_range_bit(tree, delalloc_start, delalloc_end,
- EXTENT_DELALLOC, cached_state);
+ ret = btrfs_test_range_bit(tree, delalloc_start, delalloc_end,
+ EXTENT_DELALLOC, cached_state);
- unlock_extent(tree, delalloc_start, delalloc_end, &cached_state);
+ btrfs_unlock_extent(tree, delalloc_start, delalloc_end, &cached_state);
if (!ret) {
unlock_delalloc_folio(inode, locked_folio, delalloc_start,
delalloc_end);
@@ -403,7 +397,7 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct extent_state **cached,
u32 clear_bits, unsigned long page_ops)
{
- clear_extent_bit(&inode->io_tree, start, end, clear_bits, cached);
+ btrfs_clear_extent_bit(&inode->io_tree, start, end, clear_bits, cached);
__process_folios_contig(inode->vfs_inode.i_mapping, locked_folio, start,
end, page_ops);
@@ -462,9 +456,6 @@ static void end_bbio_data_write(struct btrfs_bio *bbio)
u64 start = folio_pos(folio) + fi.offset;
u32 len = fi.length;
- /* Only order 0 (single page) folios are allowed for data. */
- ASSERT(folio_order(folio) == 0);
-
/* Our read/write should always be sector aligned. */
if (!IS_ALIGNED(fi.offset, sectorsize))
btrfs_err(fs_info,
@@ -512,43 +503,22 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
struct btrfs_fs_info *fs_info = bbio->fs_info;
struct bio *bio = &bbio->bio;
struct folio_iter fi;
- const u32 sectorsize = fs_info->sectorsize;
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_folio_all(fi, &bbio->bio) {
bool uptodate = !bio->bi_status;
struct folio *folio = fi.folio;
struct inode *inode = folio->mapping->host;
- u64 start;
- u64 end;
- u32 len;
+ u64 start = folio_pos(folio) + fi.offset;
btrfs_debug(fs_info,
"%s: bi_sector=%llu, err=%d, mirror=%u",
__func__, bio->bi_iter.bi_sector, bio->bi_status,
bbio->mirror_num);
- /*
- * We always issue full-sector reads, but if some block in a
- * folio fails to read, blk_update_request() will advance
- * bv_offset and adjust bv_len to compensate. Print a warning
- * for unaligned offsets, and an error if they don't add up to
- * a full sector.
- */
- if (!IS_ALIGNED(fi.offset, sectorsize))
- btrfs_err(fs_info,
- "partial page read in btrfs with offset %zu and length %zu",
- fi.offset, fi.length);
- else if (!IS_ALIGNED(fi.offset + fi.length, sectorsize))
- btrfs_info(fs_info,
- "incomplete page read with offset %zu and length %zu",
- fi.offset, fi.length);
-
- start = folio_pos(folio) + fi.offset;
- end = start + fi.length - 1;
- len = fi.length;
if (likely(uptodate)) {
+ u64 end = start + fi.length - 1;
loff_t i_size = i_size_read(inode);
/*
@@ -573,7 +543,7 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
}
/* Update page status and unlock. */
- end_folio_read(folio, uptodate, start, len);
+ end_folio_read(folio, uptodate, start, fi.length);
}
bio_put(bio);
}
@@ -664,13 +634,10 @@ static int alloc_eb_folio_array(struct extent_buffer *eb, bool nofail)
}
static bool btrfs_bio_is_contig(struct btrfs_bio_ctrl *bio_ctrl,
- struct folio *folio, u64 disk_bytenr,
- unsigned int pg_offset)
+ u64 disk_bytenr, loff_t file_offset)
{
struct bio *bio = &bio_ctrl->bbio->bio;
- struct bio_vec *bvec = bio_last_bvec_all(bio);
const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
- struct folio *bv_folio = page_folio(bvec->bv_page);
if (bio_ctrl->compress_type != BTRFS_COMPRESS_NONE) {
/*
@@ -681,19 +648,11 @@ static bool btrfs_bio_is_contig(struct btrfs_bio_ctrl *bio_ctrl,
}
/*
- * The contig check requires the following conditions to be met:
- *
- * 1) The folios are belonging to the same inode
- * This is implied by the call chain.
- *
- * 2) The range has adjacent logical bytenr
- *
- * 3) The range has adjacent file offset
- * This is required for the usage of btrfs_bio->file_offset.
+ * To merge into a bio both the disk sector and the logical offset in
+ * the file need to be contiguous.
*/
- return bio_end_sector(bio) == sector &&
- folio_pos(bv_folio) + bvec->bv_offset + bvec->bv_len ==
- folio_pos(folio) + pg_offset;
+ return bio_ctrl->next_file_offset == file_offset &&
+ bio_end_sector(bio) == sector;
}
static void alloc_new_bio(struct btrfs_inode *inode,
@@ -711,6 +670,7 @@ static void alloc_new_bio(struct btrfs_inode *inode,
bbio->file_offset = file_offset;
bio_ctrl->bbio = bbio;
bio_ctrl->len_to_oe_boundary = U32_MAX;
+ bio_ctrl->next_file_offset = file_offset;
/* Limit data write bios to the ordered boundary. */
if (bio_ctrl->wbc) {
@@ -752,22 +712,21 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
size_t size, unsigned long pg_offset)
{
struct btrfs_inode *inode = folio_to_inode(folio);
+ loff_t file_offset = folio_pos(folio) + pg_offset;
ASSERT(pg_offset + size <= folio_size(folio));
ASSERT(bio_ctrl->end_io_func);
if (bio_ctrl->bbio &&
- !btrfs_bio_is_contig(bio_ctrl, folio, disk_bytenr, pg_offset))
+ !btrfs_bio_is_contig(bio_ctrl, disk_bytenr, file_offset))
submit_one_bio(bio_ctrl);
do {
u32 len = size;
/* Allocate new bio if needed */
- if (!bio_ctrl->bbio) {
- alloc_new_bio(inode, bio_ctrl, disk_bytenr,
- folio_pos(folio) + pg_offset);
- }
+ if (!bio_ctrl->bbio)
+ alloc_new_bio(inode, bio_ctrl, disk_bytenr, file_offset);
/* Cap to the current ordered extent boundary if there is one. */
if (len > bio_ctrl->len_to_oe_boundary) {
@@ -781,14 +740,15 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
submit_one_bio(bio_ctrl);
continue;
}
+ bio_ctrl->next_file_offset += len;
if (bio_ctrl->wbc)
- wbc_account_cgroup_owner(bio_ctrl->wbc, folio,
- len);
+ wbc_account_cgroup_owner(bio_ctrl->wbc, folio, len);
size -= len;
pg_offset += len;
disk_bytenr += len;
+ file_offset += len;
/*
* len_to_oe_boundary defaults to U32_MAX, which isn't folio or
@@ -903,13 +863,13 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode,
if (*em_cached) {
em = *em_cached;
- if (extent_map_in_tree(em) && start >= em->start &&
- start < extent_map_end(em)) {
+ if (btrfs_extent_map_in_tree(em) && start >= em->start &&
+ start < btrfs_extent_map_end(em)) {
refcount_inc(&em->refs);
return em;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
*em_cached = NULL;
}
@@ -980,20 +940,20 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
return PTR_ERR(em);
}
extent_offset = cur - em->start;
- BUG_ON(extent_map_end(em) <= cur);
+ BUG_ON(btrfs_extent_map_end(em) <= cur);
BUG_ON(end < cur);
- compress_type = extent_map_compression(em);
+ compress_type = btrfs_extent_map_compression(em);
if (compress_type != BTRFS_COMPRESS_NONE)
disk_bytenr = em->disk_bytenr;
else
- disk_bytenr = extent_map_block_start(em) + extent_offset;
+ disk_bytenr = btrfs_extent_map_block_start(em) + extent_offset;
if (em->flags & EXTENT_FLAG_PREALLOC)
block_start = EXTENT_MAP_HOLE;
else
- block_start = extent_map_block_start(em);
+ block_start = btrfs_extent_map_block_start(em);
/*
* If we have a file range that points to a compressed extent
@@ -1037,7 +997,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
if (prev_em_start)
*prev_em_start = em->start;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = NULL;
/* we've found a hole, just zero and go on */
@@ -1212,7 +1172,7 @@ static void lock_extents_for_read(struct btrfs_inode *inode, u64 start, u64 end,
ASSERT(IS_ALIGNED(end + 1, PAGE_SIZE));
again:
- lock_extent(&inode->io_tree, start, end, cached_state);
+ btrfs_lock_extent(&inode->io_tree, start, end, cached_state);
cur_pos = start;
while (cur_pos < end) {
struct btrfs_ordered_extent *ordered;
@@ -1235,7 +1195,7 @@ again:
}
/* Now wait for the OE to finish. */
- unlock_extent(&inode->io_tree, start, end, cached_state);
+ btrfs_unlock_extent(&inode->io_tree, start, end, cached_state);
btrfs_start_ordered_extent_nowriteback(ordered, start, end + 1 - start);
btrfs_put_ordered_extent(ordered);
/* We have unlocked the whole range, restart from the beginning. */
@@ -1255,9 +1215,9 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
lock_extents_for_read(inode, start, end, &cached_state);
ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL);
- unlock_extent(&inode->io_tree, start, end, &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state);
- free_extent_map(em_cached);
+ btrfs_free_extent_map(em_cached);
/*
* If btrfs_do_readpage() failed we will want to submit the assembled
@@ -1443,8 +1403,8 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
* We've hit an error during previous delalloc range,
* have to cleanup the remaining locked ranges.
*/
- unlock_extent(&inode->io_tree, found_start,
- found_start + found_len - 1, NULL);
+ btrfs_unlock_extent(&inode->io_tree, found_start,
+ found_start + found_len - 1, NULL);
unlock_delalloc_folio(&inode->vfs_inode, folio,
found_start,
found_start + found_len - 1);
@@ -1550,19 +1510,19 @@ static int submit_one_sector(struct btrfs_inode *inode,
return PTR_ERR(em);
extent_offset = filepos - em->start;
- em_end = extent_map_end(em);
+ em_end = btrfs_extent_map_end(em);
ASSERT(filepos <= em_end);
ASSERT(IS_ALIGNED(em->start, sectorsize));
ASSERT(IS_ALIGNED(em->len, sectorsize));
- block_start = extent_map_block_start(em);
- disk_bytenr = extent_map_block_start(em) + extent_offset;
+ block_start = btrfs_extent_map_block_start(em);
+ disk_bytenr = btrfs_extent_map_block_start(em) + extent_offset;
- ASSERT(!extent_map_is_compressed(em));
+ ASSERT(!btrfs_extent_map_is_compressed(em));
ASSERT(block_start != EXTENT_MAP_HOLE);
ASSERT(block_start != EXTENT_MAP_INLINE);
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = NULL;
/*
@@ -1718,7 +1678,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
return 0;
}
- if (folio->index == end_index)
+ if (folio_contains(folio, end_index))
folio_zero_range(folio, pg_offset, folio_size(folio) - pg_offset);
/*
@@ -1814,8 +1774,18 @@ static noinline_for_stack bool lock_extent_buffer_for_io(struct extent_buffer *e
*/
spin_lock(&eb->refs_lock);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+ XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits);
+ unsigned long flags;
+
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
spin_unlock(&eb->refs_lock);
+
+ xas_lock_irqsave(&xas, flags);
+ xas_load(&xas);
+ xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
+ xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
+ xas_unlock_irqrestore(&xas, flags);
+
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
-eb->len,
@@ -1901,24 +1871,151 @@ static void set_btree_ioerr(struct extent_buffer *eb)
}
}
+static void buffer_tree_set_mark(const struct extent_buffer *eb, xa_mark_t mark)
+{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits);
+ unsigned long flags;
+
+ xas_lock_irqsave(&xas, flags);
+ xas_load(&xas);
+ xas_set_mark(&xas, mark);
+ xas_unlock_irqrestore(&xas, flags);
+}
+
+static void buffer_tree_clear_mark(const struct extent_buffer *eb, xa_mark_t mark)
+{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits);
+ unsigned long flags;
+
+ xas_lock_irqsave(&xas, flags);
+ xas_load(&xas);
+ xas_clear_mark(&xas, mark);
+ xas_unlock_irqrestore(&xas, flags);
+}
+
+static void buffer_tree_tag_for_writeback(struct btrfs_fs_info *fs_info,
+ unsigned long start, unsigned long end)
+{
+ XA_STATE(xas, &fs_info->buffer_tree, start);
+ unsigned int tagged = 0;
+ void *eb;
+
+ xas_lock_irq(&xas);
+ xas_for_each_marked(&xas, eb, end, PAGECACHE_TAG_DIRTY) {
+ xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
+ if (++tagged % XA_CHECK_SCHED)
+ continue;
+ xas_pause(&xas);
+ xas_unlock_irq(&xas);
+ cond_resched();
+ xas_lock_irq(&xas);
+ }
+ xas_unlock_irq(&xas);
+}
+
+struct eb_batch {
+ unsigned int nr;
+ unsigned int cur;
+ struct extent_buffer *ebs[PAGEVEC_SIZE];
+};
+
+static inline bool eb_batch_add(struct eb_batch *batch, struct extent_buffer *eb)
+{
+ batch->ebs[batch->nr++] = eb;
+ return (batch->nr < PAGEVEC_SIZE);
+}
+
+static inline void eb_batch_init(struct eb_batch *batch)
+{
+ batch->nr = 0;
+ batch->cur = 0;
+}
+
+static inline struct extent_buffer *eb_batch_next(struct eb_batch *batch)
+{
+ if (batch->cur >= batch->nr)
+ return NULL;
+ return batch->ebs[batch->cur++];
+}
+
+static inline void eb_batch_release(struct eb_batch *batch)
+{
+ for (unsigned int i = 0; i < batch->nr; i++)
+ free_extent_buffer(batch->ebs[i]);
+ eb_batch_init(batch);
+}
+
+static inline struct extent_buffer *find_get_eb(struct xa_state *xas, unsigned long max,
+ xa_mark_t mark)
+{
+ struct extent_buffer *eb;
+
+retry:
+ eb = xas_find_marked(xas, max, mark);
+
+ if (xas_retry(xas, eb))
+ goto retry;
+
+ if (!eb)
+ return NULL;
+
+ if (!atomic_inc_not_zero(&eb->refs)) {
+ xas_reset(xas);
+ goto retry;
+ }
+
+ if (unlikely(eb != xas_reload(xas))) {
+ free_extent_buffer(eb);
+ xas_reset(xas);
+ goto retry;
+ }
+
+ return eb;
+}
+
+static unsigned int buffer_tree_get_ebs_tag(struct btrfs_fs_info *fs_info,
+ unsigned long *start,
+ unsigned long end, xa_mark_t tag,
+ struct eb_batch *batch)
+{
+ XA_STATE(xas, &fs_info->buffer_tree, *start);
+ struct extent_buffer *eb;
+
+ rcu_read_lock();
+ while ((eb = find_get_eb(&xas, end, tag)) != NULL) {
+ if (!eb_batch_add(batch, eb)) {
+ *start = ((eb->start + eb->len) >> fs_info->sectorsize_bits);
+ goto out;
+ }
+ }
+ if (end == ULONG_MAX)
+ *start = ULONG_MAX;
+ else
+ *start = end + 1;
+out:
+ rcu_read_unlock();
+
+ return batch->nr;
+}
+
/*
* The endio specific version which won't touch any unsafe spinlock in endio
* context.
*/
static struct extent_buffer *find_extent_buffer_nolock(
- const struct btrfs_fs_info *fs_info, u64 start)
+ struct btrfs_fs_info *fs_info, u64 start)
{
struct extent_buffer *eb;
+ unsigned long index = (start >> fs_info->sectorsize_bits);
rcu_read_lock();
- eb = radix_tree_lookup(&fs_info->buffer_radix,
- start >> fs_info->sectorsize_bits);
- if (eb && atomic_inc_not_zero(&eb->refs)) {
- rcu_read_unlock();
- return eb;
- }
+ eb = xa_load(&fs_info->buffer_tree, index);
+ if (eb && !atomic_inc_not_zero(&eb->refs))
+ eb = NULL;
rcu_read_unlock();
- return NULL;
+ return eb;
}
static void end_bbio_meta_write(struct btrfs_bio *bbio)
@@ -1933,6 +2030,7 @@ static void end_bbio_meta_write(struct btrfs_bio *bbio)
btrfs_meta_folio_clear_writeback(fi.folio, eb);
}
+ buffer_tree_clear_mark(eb, PAGECACHE_TAG_WRITEBACK);
clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
smp_mb__after_atomic();
wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
@@ -2004,163 +2102,36 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
}
/*
- * Submit one subpage btree page.
- *
- * The main difference to submit_eb_page() is:
- * - Page locking
- * For subpage, we don't rely on page locking at all.
- *
- * - Flush write bio
- * We only flush bio if we may be unable to fit current extent buffers into
- * current bio.
+ * Wait for all eb writeback in the given range to finish.
*
- * Return >=0 for the number of submitted extent buffers.
- * Return <0 for fatal error.
+ * @fs_info: The fs_info for this file system.
+ * @start: The offset of the range to start waiting on writeback.
+ * @end: The end of the range, inclusive. This is meant to be used in
+ * conjuction with wait_marked_extents, so this will usually be
+ * the_next_eb->start - 1.
*/
-static int submit_eb_subpage(struct folio *folio, struct writeback_control *wbc)
+void btrfs_btree_wait_writeback_range(struct btrfs_fs_info *fs_info, u64 start,
+ u64 end)
{
- struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
- int submitted = 0;
- u64 folio_start = folio_pos(folio);
- int bit_start = 0;
- int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
- const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
+ struct eb_batch batch;
+ unsigned long start_index = (start >> fs_info->sectorsize_bits);
+ unsigned long end_index = (end >> fs_info->sectorsize_bits);
- /* Lock and write each dirty extent buffers in the range */
- while (bit_start < blocks_per_folio) {
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ eb_batch_init(&batch);
+ while (start_index <= end_index) {
struct extent_buffer *eb;
- unsigned long flags;
- u64 start;
+ unsigned int nr_ebs;
- /*
- * Take private lock to ensure the subpage won't be detached
- * in the meantime.
- */
- spin_lock(&folio->mapping->i_private_lock);
- if (!folio_test_private(folio)) {
- spin_unlock(&folio->mapping->i_private_lock);
+ nr_ebs = buffer_tree_get_ebs_tag(fs_info, &start_index, end_index,
+ PAGECACHE_TAG_WRITEBACK, &batch);
+ if (!nr_ebs)
break;
- }
- spin_lock_irqsave(&subpage->lock, flags);
- if (!test_bit(bit_start + btrfs_bitmap_nr_dirty * blocks_per_folio,
- subpage->bitmaps)) {
- spin_unlock_irqrestore(&subpage->lock, flags);
- spin_unlock(&folio->mapping->i_private_lock);
- bit_start += sectors_per_node;
- continue;
- }
-
- start = folio_start + bit_start * fs_info->sectorsize;
- bit_start += sectors_per_node;
-
- /*
- * Here we just want to grab the eb without touching extra
- * spin locks, so call find_extent_buffer_nolock().
- */
- eb = find_extent_buffer_nolock(fs_info, start);
- spin_unlock_irqrestore(&subpage->lock, flags);
- spin_unlock(&folio->mapping->i_private_lock);
-
- /*
- * The eb has already reached 0 refs thus find_extent_buffer()
- * doesn't return it. We don't need to write back such eb
- * anyway.
- */
- if (!eb)
- continue;
-
- if (lock_extent_buffer_for_io(eb, wbc)) {
- write_one_eb(eb, wbc);
- submitted++;
- }
- free_extent_buffer(eb);
- }
- return submitted;
-}
-
-/*
- * Submit all page(s) of one extent buffer.
- *
- * @page: the page of one extent buffer
- * @eb_context: to determine if we need to submit this page, if current page
- * belongs to this eb, we don't need to submit
- *
- * The caller should pass each page in their bytenr order, and here we use
- * @eb_context to determine if we have submitted pages of one extent buffer.
- *
- * If we have, we just skip until we hit a new page that doesn't belong to
- * current @eb_context.
- *
- * If not, we submit all the page(s) of the extent buffer.
- *
- * Return >0 if we have submitted the extent buffer successfully.
- * Return 0 if we don't need to submit the page, as it's already submitted by
- * previous call.
- * Return <0 for fatal error.
- */
-static int submit_eb_page(struct folio *folio, struct btrfs_eb_write_context *ctx)
-{
- struct writeback_control *wbc = ctx->wbc;
- struct address_space *mapping = folio->mapping;
- struct extent_buffer *eb;
- int ret;
-
- if (!folio_test_private(folio))
- return 0;
-
- if (btrfs_meta_is_subpage(folio_to_fs_info(folio)))
- return submit_eb_subpage(folio, wbc);
-
- spin_lock(&mapping->i_private_lock);
- if (!folio_test_private(folio)) {
- spin_unlock(&mapping->i_private_lock);
- return 0;
- }
-
- eb = folio_get_private(folio);
-
- /*
- * Shouldn't happen and normally this would be a BUG_ON but no point
- * crashing the machine for something we can survive anyway.
- */
- if (WARN_ON(!eb)) {
- spin_unlock(&mapping->i_private_lock);
- return 0;
- }
-
- if (eb == ctx->eb) {
- spin_unlock(&mapping->i_private_lock);
- return 0;
- }
- ret = atomic_inc_not_zero(&eb->refs);
- spin_unlock(&mapping->i_private_lock);
- if (!ret)
- return 0;
-
- ctx->eb = eb;
-
- ret = btrfs_check_meta_write_pointer(eb->fs_info, ctx);
- if (ret) {
- if (ret == -EBUSY)
- ret = 0;
- free_extent_buffer(eb);
- return ret;
- }
- if (!lock_extent_buffer_for_io(eb, wbc)) {
- free_extent_buffer(eb);
- return 0;
- }
- /* Implies write in zoned mode. */
- if (ctx->zoned_bg) {
- /* Mark the last eb in the block group. */
- btrfs_schedule_zone_finish_bg(ctx->zoned_bg, eb);
- ctx->zoned_bg->meta_write_pointer += eb->len;
+ while ((eb = eb_batch_next(&batch)) != NULL)
+ wait_on_extent_buffer_writeback(eb);
+ eb_batch_release(&batch);
+ cond_resched();
}
- write_one_eb(eb, wbc);
- free_extent_buffer(eb);
- return 1;
}
int btree_write_cache_pages(struct address_space *mapping,
@@ -2171,25 +2142,27 @@ int btree_write_cache_pages(struct address_space *mapping,
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
- struct folio_batch fbatch;
- unsigned int nr_folios;
- pgoff_t index;
- pgoff_t end; /* Inclusive */
+ struct eb_batch batch;
+ unsigned int nr_ebs;
+ unsigned long index;
+ unsigned long end;
int scanned = 0;
xa_mark_t tag;
- folio_batch_init(&fbatch);
+ eb_batch_init(&batch);
if (wbc->range_cyclic) {
- index = mapping->writeback_index; /* Start from prev offset */
+ index = ((mapping->writeback_index << PAGE_SHIFT) >> fs_info->sectorsize_bits);
end = -1;
+
/*
* Start from the beginning does not need to cycle over the
* range, mark it as scanned.
*/
scanned = (index == 0);
} else {
- index = wbc->range_start >> PAGE_SHIFT;
- end = wbc->range_end >> PAGE_SHIFT;
+ index = (wbc->range_start >> fs_info->sectorsize_bits);
+ end = (wbc->range_end >> fs_info->sectorsize_bits);
+
scanned = 1;
}
if (wbc->sync_mode == WB_SYNC_ALL)
@@ -2199,31 +2172,40 @@ int btree_write_cache_pages(struct address_space *mapping,
btrfs_zoned_meta_io_lock(fs_info);
retry:
if (wbc->sync_mode == WB_SYNC_ALL)
- tag_pages_for_writeback(mapping, index, end);
+ buffer_tree_tag_for_writeback(fs_info, index, end);
while (!done && !nr_to_write_done && (index <= end) &&
- (nr_folios = filemap_get_folios_tag(mapping, &index, end,
- tag, &fbatch))) {
- unsigned i;
+ (nr_ebs = buffer_tree_get_ebs_tag(fs_info, &index, end, tag, &batch))) {
+ struct extent_buffer *eb;
- for (i = 0; i < nr_folios; i++) {
- struct folio *folio = fbatch.folios[i];
+ while ((eb = eb_batch_next(&batch)) != NULL) {
+ ctx.eb = eb;
+
+ ret = btrfs_check_meta_write_pointer(eb->fs_info, &ctx);
+ if (ret) {
+ if (ret == -EBUSY)
+ ret = 0;
- ret = submit_eb_page(folio, &ctx);
- if (ret == 0)
+ if (ret) {
+ done = 1;
+ break;
+ }
+ free_extent_buffer(eb);
continue;
- if (ret < 0) {
- done = 1;
- break;
}
- /*
- * the filesystem may choose to bump up nr_to_write.
- * We have to make sure to honor the new nr_to_write
- * at any time
- */
- nr_to_write_done = wbc->nr_to_write <= 0;
+ if (!lock_extent_buffer_for_io(eb, wbc))
+ continue;
+
+ /* Implies write in zoned mode. */
+ if (ctx.zoned_bg) {
+ /* Mark the last eb in the block group. */
+ btrfs_schedule_zone_finish_bg(ctx.zoned_bg, eb);
+ ctx.zoned_bg->meta_write_pointer += eb->len;
+ }
+ write_one_eb(eb, wbc);
}
- folio_batch_release(&fbatch);
+ nr_to_write_done = (wbc->nr_to_write <= 0);
+ eb_batch_release(&batch);
cond_resched();
}
if (!scanned && !done) {
@@ -2574,10 +2556,10 @@ void btrfs_readahead(struct readahead_control *rac)
while ((folio = readahead_folio(rac)) != NULL)
btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start);
- unlock_extent(&inode->io_tree, start, end, &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state);
if (em_cached)
- free_extent_map(em_cached);
+ btrfs_free_extent_map(em_cached);
submit_one_bio(&bio_ctrl);
}
@@ -2601,7 +2583,7 @@ int extent_invalidate_folio(struct extent_io_tree *tree,
if (start > end)
return 0;
- lock_extent(tree, start, end, &cached_state);
+ btrfs_lock_extent(tree, start, end, &cached_state);
folio_wait_writeback(folio);
/*
@@ -2609,46 +2591,54 @@ int extent_invalidate_folio(struct extent_io_tree *tree,
* so here we only need to unlock the extent range to free any
* existing extent state.
*/
- unlock_extent(tree, start, end, &cached_state);
+ btrfs_unlock_extent(tree, start, end, &cached_state);
return 0;
}
/*
- * a helper for release_folio, this tests for areas of the page that
- * are locked or under IO and drops the related state bits if it is safe
- * to drop the page.
+ * A helper for struct address_space_operations::release_folio, this tests for
+ * areas of the folio that are locked or under IO and drops the related state
+ * bits if it is safe to drop the folio.
*/
static bool try_release_extent_state(struct extent_io_tree *tree,
struct folio *folio)
{
+ struct extent_state *cached_state = NULL;
u64 start = folio_pos(folio);
u64 end = start + folio_size(folio) - 1;
- bool ret;
+ u32 range_bits;
+ u32 clear_bits;
+ bool ret = false;
+ int ret2;
- if (test_range_bit_exists(tree, start, end, EXTENT_LOCKED)) {
- ret = false;
- } else {
- u32 clear_bits = ~(EXTENT_LOCKED | EXTENT_NODATASUM |
- EXTENT_DELALLOC_NEW | EXTENT_CTLBITS |
- EXTENT_QGROUP_RESERVED);
- int ret2;
+ btrfs_get_range_bits(tree, start, end, &range_bits, &cached_state);
- /*
- * At this point we can safely clear everything except the
- * locked bit, the nodatasum bit and the delalloc new bit.
- * The delalloc new bit will be cleared by ordered extent
- * completion.
- */
- ret2 = __clear_extent_bit(tree, start, end, clear_bits, NULL, NULL);
+ /*
+ * We can release the folio if it's locked only for ordered extent
+ * completion, since that doesn't require using the folio.
+ */
+ if ((range_bits & EXTENT_LOCKED) &&
+ !(range_bits & EXTENT_FINISHING_ORDERED))
+ goto out;
+
+ clear_bits = ~(EXTENT_LOCKED | EXTENT_NODATASUM | EXTENT_DELALLOC_NEW |
+ EXTENT_CTLBITS | EXTENT_QGROUP_RESERVED |
+ EXTENT_FINISHING_ORDERED);
+ /*
+ * At this point we can safely clear everything except the locked,
+ * nodatasum, delalloc new and finishing ordered bits. The delalloc new
+ * bit will be cleared by ordered extent completion.
+ */
+ ret2 = btrfs_clear_extent_bit(tree, start, end, clear_bits, &cached_state);
+ /*
+ * If clear_extent_bit failed for enomem reasons, we can't allow the
+ * release to continue.
+ */
+ if (ret2 == 0)
+ ret = true;
+out:
+ btrfs_free_extent_state(cached_state);
- /* if clear_extent_bit failed for enomem reasons,
- * we can't allow the release to continue.
- */
- if (ret2 < 0)
- ret = false;
- else
- ret = true;
- }
return ret;
}
@@ -2671,18 +2661,19 @@ bool try_release_extent_mapping(struct folio *folio, gfp_t mask)
struct extent_map *em;
write_lock(&extent_tree->lock);
- em = lookup_extent_mapping(extent_tree, start, len);
+ em = btrfs_lookup_extent_mapping(extent_tree, start, len);
if (!em) {
write_unlock(&extent_tree->lock);
break;
}
if ((em->flags & EXTENT_FLAG_PINNED) || em->start != start) {
write_unlock(&extent_tree->lock);
- free_extent_map(em);
+ btrfs_free_extent_map(em);
break;
}
- if (test_range_bit_exists(io_tree, em->start,
- extent_map_end(em) - 1, EXTENT_LOCKED))
+ if (btrfs_test_range_bit_exists(io_tree, em->start,
+ btrfs_extent_map_end(em) - 1,
+ EXTENT_LOCKED))
goto next;
/*
* If it's not in the list of modified extents, used by a fast
@@ -2709,15 +2700,15 @@ remove_em:
* fsync performance for workloads with a data size that exceeds
* or is close to the system's memory).
*/
- remove_extent_mapping(inode, em);
+ btrfs_remove_extent_mapping(inode, em);
/* Once for the inode's extent map tree. */
- free_extent_map(em);
+ btrfs_free_extent_map(em);
next:
- start = extent_map_end(em);
+ start = btrfs_extent_map_end(em);
write_unlock(&extent_tree->lock);
/* Once for us, for the lookup_extent_mapping() reference. */
- free_extent_map(em);
+ btrfs_free_extent_map(em);
if (need_resched()) {
/*
@@ -2756,6 +2747,7 @@ static bool folio_range_has_eb(struct folio *folio)
static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct folio *folio)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
+ struct address_space *mapping = folio->mapping;
const bool mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
/*
@@ -2763,21 +2755,20 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo
* be done under the i_private_lock.
*/
if (mapped)
- spin_lock(&folio->mapping->i_private_lock);
+ spin_lock(&mapping->i_private_lock);
if (!folio_test_private(folio)) {
if (mapped)
- spin_unlock(&folio->mapping->i_private_lock);
+ spin_unlock(&mapping->i_private_lock);
return;
}
if (!btrfs_meta_is_subpage(fs_info)) {
/*
- * We do this since we'll remove the pages after we've
- * removed the eb from the radix tree, so we could race
- * and have this page now attached to the new eb. So
- * only clear folio if it's still connected to
- * this eb.
+ * We do this since we'll remove the pages after we've removed
+ * the eb from the xarray, so we could race and have this page
+ * now attached to the new eb. So only clear folio if it's
+ * still connected to this eb.
*/
if (folio_test_private(folio) && folio_get_private(folio) == eb) {
BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
@@ -2787,7 +2778,7 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo
folio_detach_private(folio);
}
if (mapped)
- spin_unlock(&folio->mapping->i_private_lock);
+ spin_unlock(&mapping->i_private_lock);
return;
}
@@ -2810,7 +2801,7 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo
if (!folio_range_has_eb(folio))
btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA);
- spin_unlock(&folio->mapping->i_private_lock);
+ spin_unlock(&mapping->i_private_lock);
}
/* Release all folios attached to the extent buffer */
@@ -2825,9 +2816,6 @@ static void btrfs_release_extent_buffer_folios(const struct extent_buffer *eb)
continue;
detach_extent_buffer_folio(eb, folio);
-
- /* One for when we allocated the folio. */
- folio_put(folio);
}
}
@@ -2862,9 +2850,28 @@ static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *fs_info
return eb;
}
+/*
+ * For use in eb allocation error cleanup paths, as btrfs_release_extent_buffer()
+ * does not call folio_put(), and we need to set the folios to NULL so that
+ * btrfs_release_extent_buffer() will not detach them a second time.
+ */
+static void cleanup_extent_buffer_folios(struct extent_buffer *eb)
+{
+ const int num_folios = num_extent_folios(eb);
+
+ /* We canont use num_extent_folios() as loop bound as eb->folios changes. */
+ for (int i = 0; i < num_folios; i++) {
+ ASSERT(eb->folios[i]);
+ detach_extent_buffer_folio(eb, eb->folios[i]);
+ folio_put(eb->folios[i]);
+ eb->folios[i] = NULL;
+ }
+}
+
struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
{
struct extent_buffer *new;
+ int num_folios;
int ret;
new = __alloc_extent_buffer(src->fs_info, src->start);
@@ -2879,25 +2886,34 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
ret = alloc_eb_folio_array(new, false);
- if (ret) {
- btrfs_release_extent_buffer(new);
- return NULL;
- }
+ if (ret)
+ goto release_eb;
- for (int i = 0; i < num_extent_folios(src); i++) {
+ ASSERT(num_extent_folios(src) == num_extent_folios(new),
+ "%d != %d", num_extent_folios(src), num_extent_folios(new));
+ /* Explicitly use the cached num_extent value from now on. */
+ num_folios = num_extent_folios(src);
+ for (int i = 0; i < num_folios; i++) {
struct folio *folio = new->folios[i];
ret = attach_extent_buffer_folio(new, folio, NULL);
- if (ret < 0) {
- btrfs_release_extent_buffer(new);
- return NULL;
- }
+ if (ret < 0)
+ goto cleanup_folios;
WARN_ON(folio_test_dirty(folio));
}
+ for (int i = 0; i < num_folios; i++)
+ folio_put(new->folios[i]);
+
copy_extent_buffer_full(new, src);
set_extent_buffer_uptodate(new);
return new;
+
+cleanup_folios:
+ cleanup_extent_buffer_folios(new);
+release_eb:
+ btrfs_release_extent_buffer(new);
+ return NULL;
}
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
@@ -2912,13 +2928,15 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
ret = alloc_eb_folio_array(eb, false);
if (ret)
- goto out;
+ goto release_eb;
for (int i = 0; i < num_extent_folios(eb); i++) {
ret = attach_extent_buffer_folio(eb, eb->folios[i], NULL);
if (ret < 0)
- goto out_detach;
+ goto cleanup_folios;
}
+ for (int i = 0; i < num_extent_folios(eb); i++)
+ folio_put(eb->folios[i]);
set_extent_buffer_uptodate(eb);
btrfs_set_header_nritems(eb, 0);
@@ -2926,15 +2944,10 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
return eb;
-out_detach:
- for (int i = 0; i < num_extent_folios(eb); i++) {
- if (eb->folios[i]) {
- detach_extent_buffer_folio(eb, eb->folios[i]);
- folio_put(eb->folios[i]);
- }
- }
-out:
- kmem_cache_free(extent_buffer_cache, eb);
+cleanup_folios:
+ cleanup_extent_buffer_folios(eb);
+release_eb:
+ btrfs_release_extent_buffer(eb);
return NULL;
}
@@ -2942,9 +2955,9 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
{
int refs;
/*
- * The TREE_REF bit is first set when the extent_buffer is added
- * to the radix tree. It is also reset, if unset, when a new reference
- * is created by find_extent_buffer.
+ * The TREE_REF bit is first set when the extent_buffer is added to the
+ * xarray. It is also reset, if unset, when a new reference is created
+ * by find_extent_buffer.
*
* It is only cleared in two cases: freeing the last non-tree
* reference to the extent_buffer when its STALE bit is set or
@@ -2956,13 +2969,12 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
* conditions between the calls to check_buffer_tree_ref in those
* codepaths and clearing TREE_REF in try_release_extent_buffer.
*
- * The actual lifetime of the extent_buffer in the radix tree is
- * adequately protected by the refcount, but the TREE_REF bit and
- * its corresponding reference are not. To protect against this
- * class of races, we call check_buffer_tree_ref from the codepaths
- * which trigger io. Note that once io is initiated, TREE_REF can no
- * longer be cleared, so that is the moment at which any such race is
- * best fixed.
+ * The actual lifetime of the extent_buffer in the xarray is adequately
+ * protected by the refcount, but the TREE_REF bit and its corresponding
+ * reference are not. To protect against this class of races, we call
+ * check_buffer_tree_ref() from the code paths which trigger io. Note that
+ * once io is initiated, TREE_REF can no longer be cleared, so that is
+ * the moment at which any such race is best fixed.
*/
refs = atomic_read(&eb->refs);
if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
@@ -3026,30 +3038,29 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
return ERR_PTR(-ENOMEM);
eb->fs_info = fs_info;
again:
- ret = radix_tree_preload(GFP_NOFS);
- if (ret) {
- exists = ERR_PTR(ret);
- goto free_eb;
+ xa_lock_irq(&fs_info->buffer_tree);
+ exists = __xa_cmpxchg(&fs_info->buffer_tree, start >> fs_info->sectorsize_bits,
+ NULL, eb, GFP_NOFS);
+ if (xa_is_err(exists)) {
+ ret = xa_err(exists);
+ xa_unlock_irq(&fs_info->buffer_tree);
+ btrfs_release_extent_buffer(eb);
+ return ERR_PTR(ret);
}
- spin_lock(&fs_info->buffer_lock);
- ret = radix_tree_insert(&fs_info->buffer_radix,
- start >> fs_info->sectorsize_bits, eb);
- spin_unlock(&fs_info->buffer_lock);
- radix_tree_preload_end();
- if (ret == -EEXIST) {
- exists = find_extent_buffer(fs_info, start);
- if (exists)
- goto free_eb;
- else
+ if (exists) {
+ if (!atomic_inc_not_zero(&exists->refs)) {
+ /* The extent buffer is being freed, retry. */
+ xa_unlock_irq(&fs_info->buffer_tree);
goto again;
+ }
+ xa_unlock_irq(&fs_info->buffer_tree);
+ btrfs_release_extent_buffer(eb);
+ return exists;
}
+ xa_unlock_irq(&fs_info->buffer_tree);
check_buffer_tree_ref(eb);
- set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
return eb;
-free_eb:
- btrfs_release_extent_buffer(eb);
- return exists;
#else
/* Stub to avoid linker error when compiled with optimizations turned off. */
return NULL;
@@ -3064,9 +3075,9 @@ static struct extent_buffer *grab_extent_buffer(struct btrfs_fs_info *fs_info,
lockdep_assert_held(&folio->mapping->i_private_lock);
/*
- * For subpage case, we completely rely on radix tree to ensure we
- * don't try to insert two ebs for the same bytenr. So here we always
- * return NULL and just continue.
+ * For subpage case, we completely rely on xarray to ensure we don't try
+ * to insert two ebs for the same bytenr. So here we always return NULL
+ * and just continue.
*/
if (btrfs_meta_is_subpage(fs_info))
return NULL;
@@ -3100,10 +3111,9 @@ static bool check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
return true;
}
- if (fs_info->nodesize < PAGE_SIZE &&
- offset_in_page(start) + fs_info->nodesize > PAGE_SIZE) {
+ if (fs_info->nodesize < PAGE_SIZE && !IS_ALIGNED(start, fs_info->nodesize)) {
btrfs_err(fs_info,
- "tree block crosses page boundary, start %llu nodesize %u",
+ "tree block is not nodesize aligned, start %llu nodesize %u",
start, fs_info->nodesize);
return true;
}
@@ -3139,7 +3149,7 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
struct btrfs_fs_info *fs_info = eb->fs_info;
struct address_space *mapping = fs_info->btree_inode->i_mapping;
const unsigned long index = eb->start >> PAGE_SHIFT;
- struct folio *existing_folio = NULL;
+ struct folio *existing_folio;
int ret;
ASSERT(found_eb_ret);
@@ -3148,6 +3158,7 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
ASSERT(eb->folios[i]);
retry:
+ existing_folio = NULL;
ret = filemap_add_folio(mapping, eb->folios[i], index + i,
GFP_NOFS | __GFP_NOFAIL);
if (!ret)
@@ -3155,10 +3166,8 @@ retry:
existing_folio = filemap_lock_folio(mapping, index + i);
/* The page cache only exists for a very short time, just retry. */
- if (IS_ERR(existing_folio)) {
- existing_folio = NULL;
+ if (IS_ERR(existing_folio))
goto retry;
- }
/* For now, we should only have single-page folios for btree inode. */
ASSERT(folio_nr_pages(existing_folio) == 1);
@@ -3199,7 +3208,7 @@ finish:
/*
* To inform we have an extra eb under allocation, so that
* detach_extent_buffer_page() won't release the folio private when the
- * eb hasn't been inserted into radix tree yet.
+ * eb hasn't been inserted into the xarray yet.
*
* The ref will be decreased when the eb releases the page, in
* detach_extent_buffer_page(). Thus needs no special handling in the
@@ -3306,7 +3315,7 @@ reallocate:
* using 0-order folios.
*/
if (unlikely(ret == -EAGAIN)) {
- ASSERT(0);
+ DEBUG_WARN("folio order mismatch between new eb and filemap");
goto reallocate;
}
attached++;
@@ -3333,10 +3342,9 @@ reallocate:
/*
* We can't unlock the pages just yet since the extent buffer
- * hasn't been properly inserted in the radix tree, this
- * opens a race with btree_release_folio which can free a page
- * while we are still filling in all pages for the buffer and
- * we could crash.
+ * hasn't been properly inserted into the xarray, this opens a
+ * race with btree_release_folio() which can free a page while we
+ * are still filling in all pages for the buffer and we could crash.
*/
}
if (uptodate)
@@ -3345,34 +3353,42 @@ reallocate:
if (page_contig)
eb->addr = folio_address(eb->folios[0]) + offset_in_page(eb->start);
again:
- ret = radix_tree_preload(GFP_NOFS);
- if (ret)
+ xa_lock_irq(&fs_info->buffer_tree);
+ existing_eb = __xa_cmpxchg(&fs_info->buffer_tree,
+ start >> fs_info->sectorsize_bits, NULL, eb,
+ GFP_NOFS);
+ if (xa_is_err(existing_eb)) {
+ ret = xa_err(existing_eb);
+ xa_unlock_irq(&fs_info->buffer_tree);
goto out;
-
- spin_lock(&fs_info->buffer_lock);
- ret = radix_tree_insert(&fs_info->buffer_radix,
- start >> fs_info->sectorsize_bits, eb);
- spin_unlock(&fs_info->buffer_lock);
- radix_tree_preload_end();
- if (ret == -EEXIST) {
- ret = 0;
- existing_eb = find_extent_buffer(fs_info, start);
- if (existing_eb)
- goto out;
- else
+ }
+ if (existing_eb) {
+ if (!atomic_inc_not_zero(&existing_eb->refs)) {
+ xa_unlock_irq(&fs_info->buffer_tree);
goto again;
+ }
+ xa_unlock_irq(&fs_info->buffer_tree);
+ goto out;
}
+ xa_unlock_irq(&fs_info->buffer_tree);
+
/* add one reference for the tree */
check_buffer_tree_ref(eb);
- set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
/*
* Now it's safe to unlock the pages because any calls to
* btree_release_folio will correctly detect that a page belongs to a
* live buffer and won't free them prematurely.
*/
- for (int i = 0; i < num_extent_folios(eb); i++)
+ for (int i = 0; i < num_extent_folios(eb); i++) {
folio_unlock(eb->folios[i]);
+ /*
+ * A folio that has been added to an address_space mapping
+ * should not continue holding the refcount from its original
+ * allocation indefinitely.
+ */
+ folio_put(eb->folios[i]);
+ }
return eb;
out:
@@ -3386,26 +3402,22 @@ out:
* want that to grab this eb, as we're getting ready to free it. So we
* have to detach it first and then unlock it.
*
- * We have to drop our reference and NULL it out here because in the
- * subpage case detaching does a btrfs_folio_dec_eb_refs() for our eb.
- * Below when we call btrfs_release_extent_buffer() we will call
- * detach_extent_buffer_folio() on our remaining pages in the !subpage
- * case. If we left eb->folios[i] populated in the subpage case we'd
- * double put our reference and be super sad.
+ * Note: the bounds is num_extent_pages() as we need to go through all slots.
*/
- for (int i = 0; i < attached; i++) {
- ASSERT(eb->folios[i]);
- detach_extent_buffer_folio(eb, eb->folios[i]);
- folio_unlock(eb->folios[i]);
- folio_put(eb->folios[i]);
+ for (int i = 0; i < num_extent_pages(eb); i++) {
+ struct folio *folio = eb->folios[i];
+
+ if (i < attached) {
+ ASSERT(folio);
+ detach_extent_buffer_folio(eb, folio);
+ folio_unlock(folio);
+ } else if (!folio) {
+ continue;
+ }
+
+ folio_put(folio);
eb->folios[i] = NULL;
}
- /*
- * Now all pages of that extent buffer is unmapped, set UNMAPPED flag,
- * so it can be cleaned up without utilizing folio->mapping.
- */
- set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
-
btrfs_release_extent_buffer(eb);
if (ret < 0)
return ERR_PTR(ret);
@@ -3428,18 +3440,27 @@ static int release_extent_buffer(struct extent_buffer *eb)
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
- if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
- struct btrfs_fs_info *fs_info = eb->fs_info;
+ struct btrfs_fs_info *fs_info = eb->fs_info;
- spin_unlock(&eb->refs_lock);
+ spin_unlock(&eb->refs_lock);
- spin_lock(&fs_info->buffer_lock);
- radix_tree_delete(&fs_info->buffer_radix,
- eb->start >> fs_info->sectorsize_bits);
- spin_unlock(&fs_info->buffer_lock);
- } else {
- spin_unlock(&eb->refs_lock);
- }
+ /*
+ * We're erasing, theoretically there will be no allocations, so
+ * just use GFP_ATOMIC.
+ *
+ * We use cmpxchg instead of erase because we do not know if
+ * this eb is actually in the tree or not, we could be cleaning
+ * up an eb that we allocated but never inserted into the tree.
+ * Thus use cmpxchg to remove it from the tree if it is there,
+ * or leave the other entry if this isn't in the tree.
+ *
+ * The documentation says that putting a NULL value is the same
+ * as erase as long as XA_FLAGS_ALLOC is not set, which it isn't
+ * in this case.
+ */
+ xa_cmpxchg_irq(&fs_info->buffer_tree,
+ eb->start >> fs_info->sectorsize_bits, eb, NULL,
+ GFP_ATOMIC);
btrfs_leak_debug_del_eb(eb);
/* Should be safe to release folios at this point. */
@@ -3540,6 +3561,7 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
if (!test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags))
return;
+ buffer_tree_clear_mark(eb, PAGECACHE_TAG_DIRTY);
percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, -eb->len,
fs_info->dirty_metadata_batch);
@@ -3588,6 +3610,7 @@ void set_extent_buffer_dirty(struct extent_buffer *eb)
folio_lock(eb->folios[0]);
for (int i = 0; i < num_extent_folios(eb); i++)
btrfs_meta_folio_set_dirty(eb->folios[i], eb);
+ buffer_tree_set_mark(eb, PAGECACHE_TAG_DIRTY);
if (subpage)
folio_unlock(eb->folios[0]);
percpu_counter_add_batch(&eb->fs_info->dirty_metadata_bytes,
@@ -3647,12 +3670,10 @@ static void end_bbio_meta_read(struct btrfs_bio *bbio)
btrfs_validate_extent_buffer(eb, &bbio->parent_check) < 0)
uptodate = false;
- if (uptodate) {
+ if (uptodate)
set_extent_buffer_uptodate(eb);
- } else {
+ else
clear_extent_buffer_uptodate(eb);
- set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
- }
clear_extent_buffer_reading(eb);
free_extent_buffer(eb);
@@ -3691,7 +3712,6 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
return 0;
}
- clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
check_buffer_tree_ref(eb);
atomic_inc(&eb->refs);
@@ -3737,7 +3757,7 @@ static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
btrfs_warn(eb->fs_info,
"access to eb bytenr %llu len %u out of range start %lu len %lu",
eb->start, eb->len, start, len);
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ DEBUG_WARN();
return true;
}
@@ -4273,71 +4293,17 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
}
}
-#define GANG_LOOKUP_SIZE 16
-static struct extent_buffer *get_next_extent_buffer(
- const struct btrfs_fs_info *fs_info, struct folio *folio, u64 bytenr)
-{
- struct extent_buffer *gang[GANG_LOOKUP_SIZE];
- struct extent_buffer *found = NULL;
- u64 folio_start = folio_pos(folio);
- u64 cur = folio_start;
-
- ASSERT(in_range(bytenr, folio_start, PAGE_SIZE));
- lockdep_assert_held(&fs_info->buffer_lock);
-
- while (cur < folio_start + PAGE_SIZE) {
- int ret;
- int i;
-
- ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
- (void **)gang, cur >> fs_info->sectorsize_bits,
- min_t(unsigned int, GANG_LOOKUP_SIZE,
- PAGE_SIZE / fs_info->nodesize));
- if (ret == 0)
- goto out;
- for (i = 0; i < ret; i++) {
- /* Already beyond page end */
- if (gang[i]->start >= folio_start + PAGE_SIZE)
- goto out;
- /* Found one */
- if (gang[i]->start >= bytenr) {
- found = gang[i];
- goto out;
- }
- }
- cur = gang[ret - 1]->start + gang[ret - 1]->len;
- }
-out:
- return found;
-}
-
static int try_release_subpage_extent_buffer(struct folio *folio)
{
struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
- u64 cur = folio_pos(folio);
- const u64 end = cur + PAGE_SIZE;
+ struct extent_buffer *eb;
+ unsigned long start = (folio_pos(folio) >> fs_info->sectorsize_bits);
+ unsigned long index = start;
+ unsigned long end = index + (PAGE_SIZE >> fs_info->sectorsize_bits) - 1;
int ret;
- while (cur < end) {
- struct extent_buffer *eb = NULL;
-
- /*
- * Unlike try_release_extent_buffer() which uses folio private
- * to grab buffer, for subpage case we rely on radix tree, thus
- * we need to ensure radix tree consistency.
- *
- * We also want an atomic snapshot of the radix tree, thus go
- * with spinlock rather than RCU.
- */
- spin_lock(&fs_info->buffer_lock);
- eb = get_next_extent_buffer(fs_info, folio, cur);
- if (!eb) {
- /* No more eb in the page range after or at cur */
- spin_unlock(&fs_info->buffer_lock);
- break;
- }
- cur = eb->start + eb->len;
-
+ xa_lock_irq(&fs_info->buffer_tree);
+ xa_for_each_range(&fs_info->buffer_tree, index, eb, start, end) {
/*
* The same as try_release_extent_buffer(), to ensure the eb
* won't disappear out from under us.
@@ -4345,10 +4311,9 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
spin_lock(&eb->refs_lock);
if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
spin_unlock(&eb->refs_lock);
- spin_unlock(&fs_info->buffer_lock);
- break;
+ continue;
}
- spin_unlock(&fs_info->buffer_lock);
+ xa_unlock_irq(&fs_info->buffer_tree);
/*
* If tree ref isn't set then we know the ref on this eb is a
@@ -4366,7 +4331,10 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
* release_extent_buffer() will release the refs_lock.
*/
release_extent_buffer(eb);
+ xa_lock_irq(&fs_info->buffer_tree);
}
+ xa_unlock_irq(&fs_info->buffer_tree);
+
/*
* Finally to check if we have cleared folio private, as if we have
* released all ebs in the page, the folio private should be cleared now.
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f5b28b5c4908..e36e8d6a00bc 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -38,16 +38,10 @@ struct btrfs_tree_parent_check;
enum {
EXTENT_BUFFER_UPTODATE,
EXTENT_BUFFER_DIRTY,
- EXTENT_BUFFER_CORRUPT,
- /* this got triggered by readahead */
- EXTENT_BUFFER_READAHEAD,
EXTENT_BUFFER_TREE_REF,
EXTENT_BUFFER_STALE,
EXTENT_BUFFER_WRITEBACK,
- /* read IO error */
- EXTENT_BUFFER_READ_ERR,
EXTENT_BUFFER_UNMAPPED,
- EXTENT_BUFFER_IN_TREE,
/* write IO error */
EXTENT_BUFFER_WRITE_ERR,
/* Indicate the extent buffer is written zeroed out (for zoned) */
@@ -79,7 +73,7 @@ enum {
* single word in a bitmap may straddle two pages in the extent buffer.
*/
#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
-#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BYTE_MASK ((1U << BITS_PER_BYTE) - 1)
#define BITMAP_FIRST_BYTE_MASK(start) \
((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
#define BITMAP_LAST_BYTE_MASK(nbits) \
@@ -246,6 +240,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
+void btrfs_btree_wait_writeback_range(struct btrfs_fs_info *fs_info, u64 start, u64 end);
void btrfs_readahead(struct readahead_control *rac);
int set_folio_extent_mapped(struct folio *folio);
void clear_folio_extent_mapped(struct folio *folio);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 7f46abbd6311..02bfdb976e40 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -13,7 +13,7 @@
static struct kmem_cache *extent_map_cache;
-int __init extent_map_init(void)
+int __init btrfs_extent_map_init(void)
{
extent_map_cache = kmem_cache_create("btrfs_extent_map",
sizeof(struct extent_map), 0, 0, NULL);
@@ -22,7 +22,7 @@ int __init extent_map_init(void)
return 0;
}
-void __cold extent_map_exit(void)
+void __cold btrfs_extent_map_exit(void)
{
kmem_cache_destroy(extent_map_cache);
}
@@ -31,7 +31,7 @@ void __cold extent_map_exit(void)
* Initialize the extent tree @tree. Should be called for each new inode or
* other user of the extent_map interface.
*/
-void extent_map_tree_init(struct extent_map_tree *tree)
+void btrfs_extent_map_tree_init(struct extent_map_tree *tree)
{
tree->root = RB_ROOT;
INIT_LIST_HEAD(&tree->modified_extents);
@@ -42,7 +42,7 @@ void extent_map_tree_init(struct extent_map_tree *tree)
* Allocate a new extent_map structure. The new structure is returned with a
* reference count of one and needs to be freed using free_extent_map()
*/
-struct extent_map *alloc_extent_map(void)
+struct extent_map *btrfs_alloc_extent_map(void)
{
struct extent_map *em;
em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
@@ -58,12 +58,12 @@ struct extent_map *alloc_extent_map(void)
* Drop the reference out on @em by one and free the structure if the reference
* count hits zero.
*/
-void free_extent_map(struct extent_map *em)
+void btrfs_free_extent_map(struct extent_map *em)
{
if (!em)
return;
if (refcount_dec_and_test(&em->refs)) {
- WARN_ON(extent_map_in_tree(em));
+ WARN_ON(btrfs_extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
kmem_cache_free(extent_map_cache, em);
}
@@ -102,19 +102,19 @@ static int tree_insert(struct rb_root *root, struct extent_map *em)
if (em->start < entry->start)
p = &(*p)->rb_left;
- else if (em->start >= extent_map_end(entry))
+ else if (em->start >= btrfs_extent_map_end(entry))
p = &(*p)->rb_right;
else
return -EEXIST;
}
orig_parent = parent;
- while (parent && em->start >= extent_map_end(entry)) {
+ while (parent && em->start >= btrfs_extent_map_end(entry)) {
parent = rb_next(parent);
entry = rb_entry(parent, struct extent_map, rb_node);
}
if (parent)
- if (end > entry->start && em->start < extent_map_end(entry))
+ if (end > entry->start && em->start < btrfs_extent_map_end(entry))
return -EEXIST;
parent = orig_parent;
@@ -124,7 +124,7 @@ static int tree_insert(struct rb_root *root, struct extent_map *em)
entry = rb_entry(parent, struct extent_map, rb_node);
}
if (parent)
- if (end > entry->start && em->start < extent_map_end(entry))
+ if (end > entry->start && em->start < btrfs_extent_map_end(entry))
return -EEXIST;
rb_link_node(&em->rb_node, orig_parent, p);
@@ -136,8 +136,8 @@ static int tree_insert(struct rb_root *root, struct extent_map *em)
* Search through the tree for an extent_map with a given offset. If it can't
* be found, try to find some neighboring extents
*/
-static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
- struct rb_node **prev_or_next_ret)
+static struct rb_node *tree_search(struct rb_root *root, u64 offset,
+ struct rb_node **prev_or_next_ret)
{
struct rb_node *n = root->rb_node;
struct rb_node *prev = NULL;
@@ -154,14 +154,14 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
if (offset < entry->start)
n = n->rb_left;
- else if (offset >= extent_map_end(entry))
+ else if (offset >= btrfs_extent_map_end(entry))
n = n->rb_right;
else
return n;
}
orig_prev = prev;
- while (prev && offset >= extent_map_end(prev_entry)) {
+ while (prev && offset >= btrfs_extent_map_end(prev_entry)) {
prev = rb_next(prev);
prev_entry = rb_entry(prev, struct extent_map, rb_node);
}
@@ -188,14 +188,14 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
static inline u64 extent_map_block_len(const struct extent_map *em)
{
- if (extent_map_is_compressed(em))
+ if (btrfs_extent_map_is_compressed(em))
return em->disk_num_bytes;
return em->len;
}
static inline u64 extent_map_block_end(const struct extent_map *em)
{
- const u64 block_start = extent_map_block_start(em);
+ const u64 block_start = btrfs_extent_map_block_start(em);
const u64 block_end = block_start + extent_map_block_len(em);
if (block_end < block_start)
@@ -210,7 +210,7 @@ static bool can_merge_extent_map(const struct extent_map *em)
return false;
/* Don't merge compressed extents, we need to know their actual size. */
- if (extent_map_is_compressed(em))
+ if (btrfs_extent_map_is_compressed(em))
return false;
if (em->flags & EXTENT_FLAG_LOGGING)
@@ -230,7 +230,7 @@ static bool can_merge_extent_map(const struct extent_map *em)
/* Check to see if two extent_map structs are adjacent and safe to merge. */
static bool mergeable_maps(const struct extent_map *prev, const struct extent_map *next)
{
- if (extent_map_end(prev) != next->start)
+ if (btrfs_extent_map_end(prev) != next->start)
return false;
/*
@@ -242,7 +242,7 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
return false;
if (next->disk_bytenr < EXTENT_MAP_LAST_BYTE - 1)
- return extent_map_block_start(next) == extent_map_block_end(prev);
+ return btrfs_extent_map_block_start(next) == extent_map_block_end(prev);
/* HOLES and INLINE extents. */
return next->disk_bytenr == prev->disk_bytenr;
@@ -270,8 +270,8 @@ static void merge_ondisk_extents(const struct extent_map *prev, const struct ext
u64 new_offset;
/* @prev and @next should not be compressed. */
- ASSERT(!extent_map_is_compressed(prev));
- ASSERT(!extent_map_is_compressed(next));
+ ASSERT(!btrfs_extent_map_is_compressed(prev));
+ ASSERT(!btrfs_extent_map_is_compressed(next));
/*
* There are two different cases where @prev and @next can be merged.
@@ -327,9 +327,9 @@ static void validate_extent_map(struct btrfs_fs_info *fs_info, struct extent_map
if (em->offset + em->len > em->ram_bytes)
dump_extent_map(fs_info, "ram_bytes too small", em);
if (em->offset + em->len > em->disk_num_bytes &&
- !extent_map_is_compressed(em))
+ !btrfs_extent_map_is_compressed(em))
dump_extent_map(fs_info, "disk_num_bytes too small", em);
- if (!extent_map_is_compressed(em) &&
+ if (!btrfs_extent_map_is_compressed(em) &&
em->ram_bytes != em->disk_num_bytes)
dump_extent_map(fs_info,
"ram_bytes mismatch with disk_num_bytes for non-compressed em",
@@ -361,8 +361,8 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
if (em->start != 0) {
rb = rb_prev(&em->rb_node);
- if (rb)
- merge = rb_entry(rb, struct extent_map, rb_node);
+ merge = rb_entry_safe(rb, struct extent_map, rb_node);
+
if (rb && can_merge_extent_map(merge) && mergeable_maps(merge, em)) {
em->start = merge->start;
em->len += merge->len;
@@ -374,13 +374,13 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
validate_extent_map(fs_info, em);
remove_em(inode, merge);
- free_extent_map(merge);
+ btrfs_free_extent_map(merge);
}
}
rb = rb_next(&em->rb_node);
- if (rb)
- merge = rb_entry(rb, struct extent_map, rb_node);
+ merge = rb_entry_safe(rb, struct extent_map, rb_node);
+
if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
em->len += merge->len;
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
@@ -389,7 +389,7 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
em->generation = max(em->generation, merge->generation);
em->flags |= EXTENT_FLAG_MERGED;
remove_em(inode, merge);
- free_extent_map(merge);
+ btrfs_free_extent_map(merge);
}
}
@@ -409,7 +409,7 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
* -ENOENT when the extent is not found in the tree
* -EUCLEAN if the found extent does not match the expected start
*/
-int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
+int btrfs_unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_map_tree *tree = &inode->extent_tree;
@@ -417,7 +417,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
struct extent_map *em;
write_lock(&tree->lock);
- em = lookup_extent_mapping(tree, start, len);
+ em = btrfs_lookup_extent_mapping(tree, start, len);
if (WARN_ON(!em)) {
btrfs_warn(fs_info,
@@ -444,17 +444,17 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
out:
write_unlock(&tree->lock);
- free_extent_map(em);
+ btrfs_free_extent_map(em);
return ret;
}
-void clear_em_logging(struct btrfs_inode *inode, struct extent_map *em)
+void btrfs_clear_em_logging(struct btrfs_inode *inode, struct extent_map *em)
{
lockdep_assert_held_write(&inode->extent_tree.lock);
em->flags &= ~EXTENT_FLAG_LOGGING;
- if (extent_map_in_tree(em))
+ if (btrfs_extent_map_in_tree(em))
try_merge_map(inode, em);
}
@@ -508,16 +508,15 @@ static int add_extent_mapping(struct btrfs_inode *inode,
return 0;
}
-static struct extent_map *
-__lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len, int strict)
+static struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len, int strict)
{
struct extent_map *em;
struct rb_node *rb_node;
struct rb_node *prev_or_next = NULL;
u64 end = range_end(start, len);
- rb_node = __tree_search(&tree->root, start, &prev_or_next);
+ rb_node = tree_search(&tree->root, start, &prev_or_next);
if (!rb_node) {
if (prev_or_next)
rb_node = prev_or_next;
@@ -527,7 +526,7 @@ __lookup_extent_mapping(struct extent_map_tree *tree,
em = rb_entry(rb_node, struct extent_map, rb_node);
- if (strict && !(end > em->start && start < extent_map_end(em)))
+ if (strict && !(end > em->start && start < btrfs_extent_map_end(em)))
return NULL;
refcount_inc(&em->refs);
@@ -546,10 +545,10 @@ __lookup_extent_mapping(struct extent_map_tree *tree,
* intersect, so check the object returned carefully to make sure that no
* additional lookups are needed.
*/
-struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len)
+struct extent_map *btrfs_lookup_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len)
{
- return __lookup_extent_mapping(tree, start, len, 1);
+ return lookup_extent_mapping(tree, start, len, 1);
}
/*
@@ -564,10 +563,10 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
*
* If one can't be found, any nearby extent may be returned
*/
-struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len)
+struct extent_map *btrfs_search_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len)
{
- return __lookup_extent_mapping(tree, start, len, 0);
+ return lookup_extent_mapping(tree, start, len, 0);
}
/*
@@ -579,7 +578,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
* Remove @em from the extent tree of @inode. No reference counts are dropped,
* and no checks are done to see if the range is in use.
*/
-void remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em)
+void btrfs_remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em)
{
struct extent_map_tree *tree = &inode->extent_tree;
@@ -605,7 +604,7 @@ static void replace_extent_mapping(struct btrfs_inode *inode,
validate_extent_map(fs_info, new);
WARN_ON(cur->flags & EXTENT_FLAG_PINNED);
- ASSERT(extent_map_in_tree(cur));
+ ASSERT(btrfs_extent_map_in_tree(cur));
if (!(cur->flags & EXTENT_FLAG_LOGGING))
list_del_init(&cur->list);
rb_replace_node(&cur->rb_node, &new->rb_node, &tree->root);
@@ -651,7 +650,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
u64 end;
u64 start_diff;
- if (map_start < em->start || map_start >= extent_map_end(em))
+ if (map_start < em->start || map_start >= btrfs_extent_map_end(em))
return -EINVAL;
if (existing->start > map_start) {
@@ -662,10 +661,10 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
next = next_extent_map(prev);
}
- start = prev ? extent_map_end(prev) : em->start;
+ start = prev ? btrfs_extent_map_end(prev) : em->start;
start = max_t(u64, start, em->start);
- end = next ? next->start : extent_map_end(em);
- end = min_t(u64, end, extent_map_end(em));
+ end = next ? next->start : btrfs_extent_map_end(em);
+ end = min_t(u64, end, btrfs_extent_map_end(em));
start_diff = start - em->start;
em->start = start;
em->len = end - start;
@@ -716,7 +715,7 @@ int btrfs_add_extent_mapping(struct btrfs_inode *inode,
if (ret == -EEXIST) {
struct extent_map *existing;
- existing = search_extent_mapping(&inode->extent_tree, start, len);
+ existing = btrfs_search_extent_mapping(&inode->extent_tree, start, len);
trace_btrfs_handle_em_exist(fs_info, existing, em, start, len);
@@ -725,8 +724,8 @@ int btrfs_add_extent_mapping(struct btrfs_inode *inode,
* extent causing the -EEXIST.
*/
if (start >= existing->start &&
- start < extent_map_end(existing)) {
- free_extent_map(em);
+ start < btrfs_extent_map_end(existing)) {
+ btrfs_free_extent_map(em);
*em_in = existing;
ret = 0;
} else {
@@ -739,14 +738,14 @@ int btrfs_add_extent_mapping(struct btrfs_inode *inode,
*/
ret = merge_extent_mapping(inode, existing, em, start);
if (WARN_ON(ret)) {
- free_extent_map(em);
+ btrfs_free_extent_map(em);
*em_in = NULL;
btrfs_warn(fs_info,
"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu",
- existing->start, extent_map_end(existing),
+ existing->start, btrfs_extent_map_end(existing),
orig_start, orig_start + orig_len, start);
}
- free_extent_map(existing);
+ btrfs_free_extent_map(existing);
}
}
@@ -772,8 +771,8 @@ static void drop_all_extent_maps_fast(struct btrfs_inode *inode)
em = rb_entry(node, struct extent_map, rb_node);
em->flags &= ~(EXTENT_FLAG_PINNED | EXTENT_FLAG_LOGGING);
- remove_extent_mapping(inode, em);
- free_extent_map(em);
+ btrfs_remove_extent_mapping(inode, em);
+ btrfs_free_extent_map(em);
if (cond_resched_rwlock_write(&tree->lock))
node = rb_first(&tree->root);
@@ -826,15 +825,15 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
* range ends after our range (and they might be the same extent map),
* because we need to split those two extent maps at the boundaries.
*/
- split = alloc_extent_map();
- split2 = alloc_extent_map();
+ split = btrfs_alloc_extent_map();
+ split2 = btrfs_alloc_extent_map();
write_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, len);
+ em = btrfs_lookup_extent_mapping(em_tree, start, len);
while (em) {
/* extent_map_end() returns exclusive value (last byte + 1). */
- const u64 em_end = extent_map_end(em);
+ const u64 em_end = btrfs_extent_map_end(em);
struct extent_map *next_em = NULL;
u64 gen;
unsigned long flags;
@@ -898,7 +897,7 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
split->generation = gen;
split->flags = flags;
replace_extent_mapping(inode, em, split, modified);
- free_extent_map(split);
+ btrfs_free_extent_map(split);
split = split2;
split2 = NULL;
}
@@ -925,7 +924,7 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
split->ram_bytes = split->len;
}
- if (extent_map_in_tree(em)) {
+ if (btrfs_extent_map_in_tree(em)) {
replace_extent_mapping(inode, em, split, modified);
} else {
int ret;
@@ -936,11 +935,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
if (WARN_ON(ret != 0) && modified)
btrfs_set_inode_full_sync(inode);
}
- free_extent_map(split);
+ btrfs_free_extent_map(split);
split = NULL;
}
remove_em:
- if (extent_map_in_tree(em)) {
+ if (btrfs_extent_map_in_tree(em)) {
/*
* If the extent map is still in the tree it means that
* either of the following is true:
@@ -965,25 +964,25 @@ remove_em:
ASSERT(!split);
btrfs_set_inode_full_sync(inode);
}
- remove_extent_mapping(inode, em);
+ btrfs_remove_extent_mapping(inode, em);
}
/*
* Once for the tree reference (we replaced or removed the
* extent map from the tree).
*/
- free_extent_map(em);
+ btrfs_free_extent_map(em);
next:
/* Once for us (for our lookup reference). */
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = next_em;
}
write_unlock(&em_tree->lock);
- free_extent_map(split);
- free_extent_map(split2);
+ btrfs_free_extent_map(split);
+ btrfs_free_extent_map(split2);
}
/*
@@ -1007,7 +1006,7 @@ int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
struct extent_map_tree *tree = &inode->extent_tree;
int ret;
- ASSERT(!extent_map_in_tree(new_em));
+ ASSERT(!btrfs_extent_map_in_tree(new_em));
/*
* The caller has locked an appropriate file range in the inode's io
@@ -1033,8 +1032,8 @@ int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
*
* This function is used when an ordered_extent needs to be split.
*/
-int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
- u64 new_logical)
+int btrfs_split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
+ u64 new_logical)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
@@ -1046,25 +1045,25 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
ASSERT(pre != 0);
ASSERT(pre < len);
- split_pre = alloc_extent_map();
+ split_pre = btrfs_alloc_extent_map();
if (!split_pre)
return -ENOMEM;
- split_mid = alloc_extent_map();
+ split_mid = btrfs_alloc_extent_map();
if (!split_mid) {
ret = -ENOMEM;
goto out_free_pre;
}
- lock_extent(&inode->io_tree, start, start + len - 1, NULL);
+ btrfs_lock_extent(&inode->io_tree, start, start + len - 1, NULL);
write_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, len);
+ em = btrfs_lookup_extent_mapping(em_tree, start, len);
if (!em) {
ret = -EIO;
goto out_unlock;
}
ASSERT(em->len == len);
- ASSERT(!extent_map_is_compressed(em));
+ ASSERT(!btrfs_extent_map_is_compressed(em));
ASSERT(em->disk_bytenr < EXTENT_MAP_LAST_BYTE);
ASSERT(em->flags & EXTENT_FLAG_PINNED);
ASSERT(!(em->flags & EXTENT_FLAG_LOGGING));
@@ -1093,7 +1092,7 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
/* Insert the middle extent_map. */
split_mid->start = em->start + pre;
split_mid->len = em->len - pre;
- split_mid->disk_bytenr = extent_map_block_start(em) + pre;
+ split_mid->disk_bytenr = btrfs_extent_map_block_start(em) + pre;
split_mid->disk_num_bytes = split_mid->len;
split_mid->offset = 0;
split_mid->ram_bytes = split_mid->len;
@@ -1102,16 +1101,16 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
add_extent_mapping(inode, split_mid, 1);
/* Once for us */
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/* Once for the tree */
- free_extent_map(em);
+ btrfs_free_extent_map(em);
out_unlock:
write_unlock(&em_tree->lock);
- unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
- free_extent_map(split_mid);
+ btrfs_unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
+ btrfs_free_extent_map(split_mid);
out_free_pre:
- free_extent_map(split_pre);
+ btrfs_free_extent_map(split_pre);
return ret;
}
@@ -1168,10 +1167,10 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c
if (!list_empty(&em->list) && em->generation >= cur_fs_gen)
btrfs_set_inode_full_sync(inode);
- remove_extent_mapping(inode, em);
+ btrfs_remove_extent_mapping(inode, em);
trace_btrfs_extent_map_shrinker_remove_em(inode, em);
/* Drop the reference for the tree. */
- free_extent_map(em);
+ btrfs_free_extent_map(em);
nr_dropped++;
next:
if (ctx->scanned >= ctx->nr_to_scan)
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index cd123b266b64..d4b81ee4d97b 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -108,8 +108,8 @@ struct extent_map_tree {
struct btrfs_inode;
-static inline void extent_map_set_compression(struct extent_map *em,
- enum btrfs_compression_type type)
+static inline void btrfs_extent_map_set_compression(struct extent_map *em,
+ enum btrfs_compression_type type)
{
if (type == BTRFS_COMPRESS_ZLIB)
em->flags |= EXTENT_FLAG_COMPRESS_ZLIB;
@@ -119,7 +119,8 @@ static inline void extent_map_set_compression(struct extent_map *em,
em->flags |= EXTENT_FLAG_COMPRESS_ZSTD;
}
-static inline enum btrfs_compression_type extent_map_compression(const struct extent_map *em)
+static inline enum btrfs_compression_type btrfs_extent_map_compression(
+ const struct extent_map *em)
{
if (em->flags & EXTENT_FLAG_COMPRESS_ZLIB)
return BTRFS_COMPRESS_ZLIB;
@@ -137,50 +138,50 @@ static inline enum btrfs_compression_type extent_map_compression(const struct ex
* More efficient way to determine if extent is compressed, instead of using
* 'extent_map_compression() != BTRFS_COMPRESS_NONE'.
*/
-static inline bool extent_map_is_compressed(const struct extent_map *em)
+static inline bool btrfs_extent_map_is_compressed(const struct extent_map *em)
{
return (em->flags & (EXTENT_FLAG_COMPRESS_ZLIB |
EXTENT_FLAG_COMPRESS_LZO |
EXTENT_FLAG_COMPRESS_ZSTD)) != 0;
}
-static inline int extent_map_in_tree(const struct extent_map *em)
+static inline int btrfs_extent_map_in_tree(const struct extent_map *em)
{
return !RB_EMPTY_NODE(&em->rb_node);
}
-static inline u64 extent_map_block_start(const struct extent_map *em)
+static inline u64 btrfs_extent_map_block_start(const struct extent_map *em)
{
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
- if (extent_map_is_compressed(em))
+ if (btrfs_extent_map_is_compressed(em))
return em->disk_bytenr;
return em->disk_bytenr + em->offset;
}
return em->disk_bytenr;
}
-static inline u64 extent_map_end(const struct extent_map *em)
+static inline u64 btrfs_extent_map_end(const struct extent_map *em)
{
if (em->start + em->len < em->start)
return (u64)-1;
return em->start + em->len;
}
-void extent_map_tree_init(struct extent_map_tree *tree);
-struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len);
-void remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em);
-int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
- u64 new_logical);
-
-struct extent_map *alloc_extent_map(void);
-void free_extent_map(struct extent_map *em);
-int __init extent_map_init(void);
-void __cold extent_map_exit(void);
-int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen);
-void clear_em_logging(struct btrfs_inode *inode, struct extent_map *em);
-struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len);
+void btrfs_extent_map_tree_init(struct extent_map_tree *tree);
+struct extent_map *btrfs_lookup_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len);
+void btrfs_remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em);
+int btrfs_split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
+ u64 new_logical);
+
+struct extent_map *btrfs_alloc_extent_map(void);
+void btrfs_free_extent_map(struct extent_map *em);
+int __init btrfs_extent_map_init(void);
+void __cold btrfs_extent_map_exit(void);
+int btrfs_unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen);
+void btrfs_clear_em_logging(struct btrfs_inode *inode, struct extent_map *em);
+struct extent_map *btrfs_search_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len);
int btrfs_add_extent_mapping(struct btrfs_inode *inode,
struct extent_map **em_in, u64 start, u64 len);
void btrfs_drop_extent_map_range(struct btrfs_inode *inode,
diff --git a/fs/btrfs/fiemap.c b/fs/btrfs/fiemap.c
index b80c07ad8c5e..43bf0979fd53 100644
--- a/fs/btrfs/fiemap.c
+++ b/fs/btrfs/fiemap.c
@@ -634,7 +634,7 @@ static int extent_fiemap(struct btrfs_inode *inode,
const u64 ino = btrfs_ino(inode);
struct extent_state *cached_state = NULL;
struct extent_state *delalloc_cached_state = NULL;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct fiemap_cache cache = { 0 };
struct btrfs_backref_share_check_ctx *backref_ctx;
u64 last_extent_end = 0;
@@ -661,7 +661,7 @@ restart:
range_end = round_up(start + len, sectorsize);
prev_extent_end = range_start;
- lock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+ btrfs_lock_extent(&inode->io_tree, range_start, range_end, &cached_state);
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0)
@@ -841,7 +841,7 @@ check_eof_delalloc:
}
out_unlock:
- unlock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, range_start, range_end, &cached_state);
if (ret == BTRFS_FIEMAP_FLUSH_CACHE) {
btrfs_release_path(path);
@@ -871,10 +871,9 @@ out_unlock:
ret = emit_last_fiemap_cache(fieinfo, &cache);
out:
- free_extent_state(delalloc_cached_state);
+ btrfs_free_extent_state(delalloc_cached_state);
kfree(cache.entries);
btrfs_free_backref_share_ctx(backref_ctx);
- btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 344b4db487a0..54d523d4f421 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -46,7 +46,7 @@
void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size)
{
u64 start, end, i_size;
- int ret;
+ bool found;
spin_lock(&inode->lock);
i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
@@ -55,9 +55,9 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
goto out_unlock;
}
- ret = find_contiguous_extent_bit(inode->file_extent_tree, 0, &start,
- &end, EXTENT_DIRTY);
- if (!ret && start == 0)
+ found = btrfs_find_contiguous_extent_bit(inode->file_extent_tree, 0, &start,
+ &end, EXTENT_DIRTY);
+ if (found && start == 0)
i_size = min(i_size, end + 1);
else
i_size = 0;
@@ -91,8 +91,8 @@ int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize));
- return set_extent_bit(inode->file_extent_tree, start, start + len - 1,
- EXTENT_DIRTY, NULL);
+ return btrfs_set_extent_bit(inode->file_extent_tree, start, start + len - 1,
+ EXTENT_DIRTY, NULL);
}
/*
@@ -121,8 +121,8 @@ int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) ||
len == (u64)-1);
- return clear_extent_bit(inode->file_extent_tree, start,
- start + len - 1, EXTENT_DIRTY, NULL);
+ return btrfs_clear_extent_bit(inode->file_extent_tree, start,
+ start + len - 1, EXTENT_DIRTY, NULL);
}
static size_t bytes_to_csum_size(const struct btrfs_fs_info *fs_info, u32 bytes)
@@ -336,7 +336,7 @@ out:
*
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
*/
-blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
+int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
{
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
@@ -347,12 +347,12 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
u32 orig_len = bio->bi_iter.bi_size;
u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
- blk_status_t ret = BLK_STS_OK;
+ int ret = 0;
u32 bio_offset = 0;
if ((inode->flags & BTRFS_INODE_NODATASUM) ||
test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state))
- return BLK_STS_OK;
+ return 0;
/*
* This function is only called for read bio.
@@ -369,12 +369,12 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
ASSERT(bio_op(bio) == REQ_OP_READ);
path = btrfs_alloc_path();
if (!path)
- return BLK_STS_RESOURCE;
+ return -ENOMEM;
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
if (!bbio->csum)
- return BLK_STS_RESOURCE;
+ return -ENOMEM;
} else {
bbio->csum = bbio->csum_inline;
}
@@ -406,7 +406,7 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
count = search_csum_tree(fs_info, path, cur_disk_bytenr,
orig_len - bio_offset, csum_dst);
if (count < 0) {
- ret = errno_to_blk_status(count);
+ ret = count;
if (bbio->csum != bbio->csum_inline)
kfree(bbio->csum);
bbio->csum = NULL;
@@ -430,9 +430,9 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
if (btrfs_root_id(inode->root) == BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 file_offset = bbio->file_offset + bio_offset;
- set_extent_bit(&inode->io_tree, file_offset,
- file_offset + sectorsize - 1,
- EXTENT_NODATASUM, NULL);
+ btrfs_set_extent_bit(&inode->io_tree, file_offset,
+ file_offset + sectorsize - 1,
+ EXTENT_NODATASUM, NULL);
} else {
btrfs_warn_rl(fs_info,
"csum hole found for disk bytenr range [%llu, %llu)",
@@ -735,7 +735,7 @@ fail:
/*
* Calculate checksums of the data contained inside a bio.
*/
-blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
+int btrfs_csum_one_bio(struct btrfs_bio *bbio)
{
struct btrfs_ordered_extent *ordered = bbio->ordered;
struct btrfs_inode *inode = bbio->inode;
@@ -757,7 +757,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
memalloc_nofs_restore(nofs_flag);
if (!sums)
- return BLK_STS_RESOURCE;
+ return -ENOMEM;
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
@@ -794,11 +794,11 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
* record the updated logical address on Zone Append completion.
* Allocate just the structure with an empty sums array here for that case.
*/
-blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio)
+int btrfs_alloc_dummy_sum(struct btrfs_bio *bbio)
{
bbio->sums = kmalloc(sizeof(*bbio->sums), GFP_NOFS);
if (!bbio->sums)
- return BLK_STS_RESOURCE;
+ return -ENOMEM;
bbio->sums->len = bbio->bio.bi_iter.bi_size;
bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
btrfs_add_ordered_sum(bbio->ordered, bbio->sums);
@@ -1048,7 +1048,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key file_key;
struct btrfs_key found_key;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_csum_item *item;
struct btrfs_csum_item *item_end;
struct extent_buffer *leaf = NULL;
@@ -1259,7 +1259,6 @@ found:
goto again;
}
out:
- btrfs_free_path(path);
return ret;
}
@@ -1297,7 +1296,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
em->offset = btrfs_file_extent_offset(leaf, fi);
if (compress_type != BTRFS_COMPRESS_NONE) {
- extent_map_set_compression(em, compress_type);
+ btrfs_extent_map_set_compression(em, compress_type);
} else {
/*
* Older kernels can create regular non-hole data
@@ -1317,7 +1316,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
em->start = 0;
em->len = fs_info->sectorsize;
em->offset = 0;
- extent_map_set_compression(em, compress_type);
+ btrfs_extent_map_set_compression(em, compress_type);
} else {
btrfs_err(fs_info,
"unknown file extent item type %d, inode %llu, offset %llu, "
diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h
index 6181a70ec3ef..63216c43676d 100644
--- a/fs/btrfs/file-item.h
+++ b/fs/btrfs/file-item.h
@@ -53,7 +53,7 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, u64 len);
-blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio);
+int btrfs_lookup_bio_sums(struct btrfs_bio *bbio);
int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid, u64 pos,
u64 num_bytes);
@@ -64,8 +64,8 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
-blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio);
-blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio);
+int btrfs_csum_one_bio(struct btrfs_bio *bbio);
+int btrfs_alloc_dummy_sum(struct btrfs_bio *bbio);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit,
bool nowait);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 71b8a825c447..8ce6f45f45e0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -98,9 +98,9 @@ int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos
* The pages may have already been dirty, clear out old accounting so
* we can set things up properly
*/
- clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block,
- EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
- cached);
+ btrfs_clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block,
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
+ cached);
ret = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
extra_bits, cached);
@@ -508,20 +508,19 @@ out:
return ret;
}
-static int extent_mergeable(struct extent_buffer *leaf, int slot,
- u64 objectid, u64 bytenr, u64 orig_offset,
- u64 *start, u64 *end)
+static bool extent_mergeable(struct extent_buffer *leaf, int slot, u64 objectid,
+ u64 bytenr, u64 orig_offset, u64 *start, u64 *end)
{
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
u64 extent_end;
if (slot < 0 || slot >= btrfs_header_nritems(leaf))
- return 0;
+ return false;
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
- return 0;
+ return false;
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
@@ -530,15 +529,15 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
btrfs_file_extent_compression(leaf, fi) ||
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
- return 0;
+ return false;
extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
if ((*start && *start != key.offset) || (*end && *end != extent_end))
- return 0;
+ return false;
*start = key.offset;
*end = extent_end;
- return 1;
+ return true;
}
/*
@@ -553,7 +552,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_file_extent_item *fi;
struct btrfs_ref ref = { 0 };
struct btrfs_key key;
@@ -791,7 +790,6 @@ again:
}
}
out:
- btrfs_free_path(path);
return ret;
}
@@ -800,7 +798,7 @@ out:
* On success return a locked folio and 0
*/
static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64 pos,
- u64 len, bool force_uptodate)
+ u64 len)
{
u64 clamp_start = max_t(u64, pos, folio_pos(folio));
u64 clamp_end = min_t(u64, pos + len, folio_pos(folio) + folio_size(folio));
@@ -810,8 +808,7 @@ static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64
if (folio_test_uptodate(folio))
return 0;
- if (!force_uptodate &&
- IS_ALIGNED(clamp_start, blocksize) &&
+ if (IS_ALIGNED(clamp_start, blocksize) &&
IS_ALIGNED(clamp_end, blocksize))
return 0;
@@ -858,32 +855,27 @@ static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait)
*/
static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_ret,
loff_t pos, size_t write_bytes,
- bool force_uptodate, bool nowait)
+ bool nowait)
{
unsigned long index = pos >> PAGE_SHIFT;
gfp_t mask = get_prepare_gfp_flags(inode, nowait);
- fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN);
+ fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN) |
+ fgf_set_order(write_bytes);
struct folio *folio;
int ret = 0;
again:
folio = __filemap_get_folio(inode->i_mapping, index, fgp_flags, mask);
- if (IS_ERR(folio)) {
- if (nowait)
- ret = -EAGAIN;
- else
- ret = PTR_ERR(folio);
- return ret;
- }
- /* Only support page sized folio yet. */
- ASSERT(folio_order(folio) == 0);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+
ret = set_folio_extent_mapped(folio);
if (ret < 0) {
folio_unlock(folio);
folio_put(folio);
return ret;
}
- ret = prepare_uptodate_folio(inode, folio, pos, write_bytes, force_uptodate);
+ ret = prepare_uptodate_folio(inode, folio, pos, write_bytes);
if (ret) {
/* The folio is already unlocked. */
folio_put(folio);
@@ -924,14 +916,15 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio,
struct btrfs_ordered_extent *ordered;
if (nowait) {
- if (!try_lock_extent(&inode->io_tree, start_pos, last_pos,
- cached_state)) {
+ if (!btrfs_try_lock_extent(&inode->io_tree, start_pos,
+ last_pos, cached_state)) {
folio_unlock(folio);
folio_put(folio);
return -EAGAIN;
}
} else {
- lock_extent(&inode->io_tree, start_pos, last_pos, cached_state);
+ btrfs_lock_extent(&inode->io_tree, start_pos, last_pos,
+ cached_state);
}
ordered = btrfs_lookup_ordered_range(inode, start_pos,
@@ -939,8 +932,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio,
if (ordered &&
ordered->file_offset + ordered->num_bytes > start_pos &&
ordered->file_offset <= last_pos) {
- unlock_extent(&inode->io_tree, start_pos, last_pos,
- cached_state);
+ btrfs_unlock_extent(&inode->io_tree, start_pos, last_pos,
+ cached_state);
folio_unlock(folio);
folio_put(folio);
btrfs_start_ordered_extent(ordered);
@@ -1020,7 +1013,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
else
*write_bytes = min_t(size_t, *write_bytes ,
num_bytes - pos + lockstart);
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
return ret;
}
@@ -1077,241 +1070,306 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
return 0;
}
-ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
+static void release_space(struct btrfs_inode *inode, struct extent_changeset *data_reserved,
+ u64 start, u64 len, bool only_release_metadata)
{
- struct file *file = iocb->ki_filp;
- loff_t pos;
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
- struct extent_changeset *data_reserved = NULL;
- u64 release_bytes = 0;
- u64 lockstart;
- u64 lockend;
- size_t num_written = 0;
- ssize_t ret;
- loff_t old_isize;
- unsigned int ilock_flags = 0;
- const bool nowait = (iocb->ki_flags & IOCB_NOWAIT);
- unsigned int bdp_flags = (nowait ? BDP_ASYNC : 0);
- bool only_release_metadata = false;
-
- if (nowait)
- ilock_flags |= BTRFS_ILOCK_TRY;
+ if (len == 0)
+ return;
- ret = btrfs_inode_lock(BTRFS_I(inode), ilock_flags);
- if (ret < 0)
- return ret;
+ if (only_release_metadata) {
+ btrfs_check_nocow_unlock(inode);
+ btrfs_delalloc_release_metadata(inode, len, true);
+ } else {
+ const struct btrfs_fs_info *fs_info = inode->root->fs_info;
- /*
- * We can only trust the isize with inode lock held, or it can race with
- * other buffered writes and cause incorrect call of
- * pagecache_isize_extended() to overwrite existing data.
- */
- old_isize = i_size_read(inode);
+ btrfs_delalloc_release_space(inode, data_reserved,
+ round_down(start, fs_info->sectorsize),
+ len, true);
+ }
+}
- ret = generic_write_checks(iocb, i);
- if (ret <= 0)
- goto out;
+/*
+ * Reserve data and metadata space for this buffered write range.
+ *
+ * Return >0 for the number of bytes reserved, which is always block aligned.
+ * Return <0 for error.
+ */
+static ssize_t reserve_space(struct btrfs_inode *inode,
+ struct extent_changeset **data_reserved,
+ u64 start, size_t *len, bool nowait,
+ bool *only_release_metadata)
+{
+ const struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ const unsigned int block_offset = (start & (fs_info->sectorsize - 1));
+ size_t reserve_bytes;
+ int ret;
- ret = btrfs_write_check(iocb, ret);
- if (ret < 0)
- goto out;
+ ret = btrfs_check_data_free_space(inode, data_reserved, start, *len, nowait);
+ if (ret < 0) {
+ int can_nocow;
- pos = iocb->ki_pos;
- while (iov_iter_count(i) > 0) {
- struct extent_state *cached_state = NULL;
- size_t offset = offset_in_page(pos);
- size_t sector_offset;
- size_t write_bytes = min(iov_iter_count(i), PAGE_SIZE - offset);
- size_t reserve_bytes;
- size_t copied;
- size_t dirty_sectors;
- size_t num_sectors;
- struct folio *folio = NULL;
- int extents_locked;
- bool force_page_uptodate = false;
+ if (nowait && (ret == -ENOSPC || ret == -EAGAIN))
+ return -EAGAIN;
/*
- * Fault pages before locking them in prepare_one_folio()
- * to avoid recursive lock
+ * If we don't have to COW at the offset, reserve metadata only.
+ * write_bytes may get smaller than requested here.
*/
- if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) {
- ret = -EFAULT;
- break;
- }
+ can_nocow = btrfs_check_nocow_lock(inode, start, len, nowait);
+ if (can_nocow < 0)
+ ret = can_nocow;
+ if (can_nocow > 0)
+ ret = 0;
+ if (ret)
+ return ret;
+ *only_release_metadata = true;
+ }
- only_release_metadata = false;
- sector_offset = pos & (fs_info->sectorsize - 1);
+ reserve_bytes = round_up(*len + block_offset, fs_info->sectorsize);
+ WARN_ON(reserve_bytes == 0);
+ ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes,
+ reserve_bytes, nowait);
+ if (ret) {
+ if (!*only_release_metadata)
+ btrfs_free_reserved_data_space(inode, *data_reserved,
+ start, *len);
+ else
+ btrfs_check_nocow_unlock(inode);
- extent_changeset_release(data_reserved);
- ret = btrfs_check_data_free_space(BTRFS_I(inode),
- &data_reserved, pos,
- write_bytes, nowait);
- if (ret < 0) {
- int can_nocow;
+ if (nowait && ret == -ENOSPC)
+ ret = -EAGAIN;
+ return ret;
+ }
+ return reserve_bytes;
+}
- if (nowait && (ret == -ENOSPC || ret == -EAGAIN)) {
- ret = -EAGAIN;
- break;
- }
+/* Shrink the reserved data and metadata space from @reserved_len to @new_len. */
+static void shrink_reserved_space(struct btrfs_inode *inode,
+ struct extent_changeset *data_reserved,
+ u64 reserved_start, u64 reserved_len,
+ u64 new_len, bool only_release_metadata)
+{
+ const u64 diff = reserved_len - new_len;
- /*
- * If we don't have to COW at the offset, reserve
- * metadata only. write_bytes may get smaller than
- * requested here.
- */
- can_nocow = btrfs_check_nocow_lock(BTRFS_I(inode), pos,
- &write_bytes, nowait);
- if (can_nocow < 0)
- ret = can_nocow;
- if (can_nocow > 0)
- ret = 0;
- if (ret)
- break;
- only_release_metadata = true;
- }
+ ASSERT(new_len <= reserved_len);
+ btrfs_delalloc_shrink_extents(inode, reserved_len, new_len);
+ if (only_release_metadata)
+ btrfs_delalloc_release_metadata(inode, diff, true);
+ else
+ btrfs_delalloc_release_space(inode, data_reserved,
+ reserved_start + new_len, diff, true);
+}
- reserve_bytes = round_up(write_bytes + sector_offset,
- fs_info->sectorsize);
- WARN_ON(reserve_bytes == 0);
- ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
- reserve_bytes,
- reserve_bytes, nowait);
- if (ret) {
- if (!only_release_metadata)
- btrfs_free_reserved_data_space(BTRFS_I(inode),
- data_reserved, pos,
- write_bytes);
- else
- btrfs_check_nocow_unlock(BTRFS_I(inode));
+/* Calculate the maximum amount of bytes we can write into one folio. */
+static size_t calc_write_bytes(const struct btrfs_inode *inode,
+ const struct iov_iter *iter, u64 start)
+{
+ const size_t max_folio_size = mapping_max_folio_size(inode->vfs_inode.i_mapping);
- if (nowait && ret == -ENOSPC)
- ret = -EAGAIN;
- break;
- }
+ return min(max_folio_size - (start & (max_folio_size - 1)),
+ iov_iter_count(iter));
+}
+
+/*
+ * Do the heavy-lifting work to copy one range into one folio of the page cache.
+ *
+ * Return > 0 in case we copied all bytes or just some of them.
+ * Return 0 if no bytes were copied, in which case the caller should retry.
+ * Return <0 on error.
+ */
+static int copy_one_range(struct btrfs_inode *inode, struct iov_iter *iter,
+ struct extent_changeset **data_reserved, u64 start,
+ bool nowait)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct extent_state *cached_state = NULL;
+ size_t write_bytes = calc_write_bytes(inode, iter, start);
+ size_t copied;
+ const u64 reserved_start = round_down(start, fs_info->sectorsize);
+ u64 reserved_len;
+ struct folio *folio = NULL;
+ int extents_locked;
+ u64 lockstart;
+ u64 lockend;
+ bool only_release_metadata = false;
+ const unsigned int bdp_flags = (nowait ? BDP_ASYNC : 0);
+ int ret;
+
+ /*
+ * Fault all pages before locking them in prepare_one_folio() to avoid
+ * recursive lock.
+ */
+ if (unlikely(fault_in_iov_iter_readable(iter, write_bytes)))
+ return -EFAULT;
+ extent_changeset_release(*data_reserved);
+ ret = reserve_space(inode, data_reserved, start, &write_bytes, nowait,
+ &only_release_metadata);
+ if (ret < 0)
+ return ret;
+ reserved_len = ret;
+ /* Write range must be inside the reserved range. */
+ ASSERT(reserved_start <= start);
+ ASSERT(start + write_bytes <= reserved_start + reserved_len);
- release_bytes = reserve_bytes;
again:
- ret = balance_dirty_pages_ratelimited_flags(inode->i_mapping, bdp_flags);
- if (ret) {
- btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
- break;
- }
+ ret = balance_dirty_pages_ratelimited_flags(inode->vfs_inode.i_mapping,
+ bdp_flags);
+ if (ret) {
+ btrfs_delalloc_release_extents(inode, reserved_len);
+ release_space(inode, *data_reserved, reserved_start, reserved_len,
+ only_release_metadata);
+ return ret;
+ }
- ret = prepare_one_folio(inode, &folio, pos, write_bytes,
- force_page_uptodate, false);
- if (ret) {
- btrfs_delalloc_release_extents(BTRFS_I(inode),
- reserve_bytes);
- break;
- }
+ ret = prepare_one_folio(&inode->vfs_inode, &folio, start, write_bytes, false);
+ if (ret) {
+ btrfs_delalloc_release_extents(inode, reserved_len);
+ release_space(inode, *data_reserved, reserved_start, reserved_len,
+ only_release_metadata);
+ return ret;
+ }
+
+ /*
+ * The reserved range goes beyond the current folio, shrink the reserved
+ * space to the folio boundary.
+ */
+ if (reserved_start + reserved_len > folio_pos(folio) + folio_size(folio)) {
+ const u64 last_block = folio_pos(folio) + folio_size(folio);
+
+ shrink_reserved_space(inode, *data_reserved, reserved_start,
+ reserved_len, last_block - reserved_start,
+ only_release_metadata);
+ write_bytes = last_block - start;
+ reserved_len = last_block - reserved_start;
+ }
+
+ extents_locked = lock_and_cleanup_extent_if_need(inode, folio, start,
+ write_bytes, &lockstart,
+ &lockend, nowait,
+ &cached_state);
+ if (extents_locked < 0) {
+ if (!nowait && extents_locked == -EAGAIN)
+ goto again;
- extents_locked = lock_and_cleanup_extent_if_need(BTRFS_I(inode),
- folio, pos, write_bytes, &lockstart,
- &lockend, nowait, &cached_state);
- if (extents_locked < 0) {
- if (!nowait && extents_locked == -EAGAIN)
- goto again;
+ btrfs_delalloc_release_extents(inode, reserved_len);
+ release_space(inode, *data_reserved, reserved_start, reserved_len,
+ only_release_metadata);
+ ret = extents_locked;
+ return ret;
+ }
- btrfs_delalloc_release_extents(BTRFS_I(inode),
- reserve_bytes);
- ret = extents_locked;
- break;
- }
+ copied = copy_folio_from_iter_atomic(folio, offset_in_folio(folio, start),
+ write_bytes, iter);
+ flush_dcache_folio(folio);
- copied = copy_folio_from_iter_atomic(folio,
- offset_in_folio(folio, pos), write_bytes, i);
- flush_dcache_folio(folio);
+ if (unlikely(copied < write_bytes)) {
+ u64 last_block;
/*
- * If we get a partial write, we can end up with partially
- * uptodate page. Although if sector size < page size we can
- * handle it, but if it's not sector aligned it can cause
- * a lot of complexity, so make sure they don't happen by
- * forcing retry this copy.
+ * The original write range doesn't need an uptodate folio as
+ * the range is block aligned. But now a short copy happened.
+ * We cannot handle it without an uptodate folio.
+ *
+ * So just revert the range and we will retry.
*/
- if (unlikely(copied < write_bytes)) {
- if (!folio_test_uptodate(folio)) {
- iov_iter_revert(i, copied);
- copied = 0;
- }
+ if (!folio_test_uptodate(folio)) {
+ iov_iter_revert(iter, copied);
+ copied = 0;
}
- num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
- dirty_sectors = round_up(copied + sector_offset,
- fs_info->sectorsize);
- dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors);
-
+ /* No copied bytes, unlock, release reserved space and exit. */
if (copied == 0) {
- force_page_uptodate = true;
- dirty_sectors = 0;
- } else {
- force_page_uptodate = false;
+ if (extents_locked)
+ btrfs_unlock_extent(&inode->io_tree, lockstart, lockend,
+ &cached_state);
+ else
+ btrfs_free_extent_state(cached_state);
+ btrfs_delalloc_release_extents(inode, reserved_len);
+ release_space(inode, *data_reserved, reserved_start, reserved_len,
+ only_release_metadata);
+ btrfs_drop_folio(fs_info, folio, start, copied);
+ return 0;
}
- if (num_sectors > dirty_sectors) {
- /* release everything except the sectors we dirtied */
- release_bytes -= dirty_sectors << fs_info->sectorsize_bits;
- if (only_release_metadata) {
- btrfs_delalloc_release_metadata(BTRFS_I(inode),
- release_bytes, true);
- } else {
- u64 release_start = round_up(pos + copied,
- fs_info->sectorsize);
- btrfs_delalloc_release_space(BTRFS_I(inode),
- data_reserved, release_start,
- release_bytes, true);
- }
- }
+ /* Release the reserved space beyond the last block. */
+ last_block = round_up(start + copied, fs_info->sectorsize);
+
+ shrink_reserved_space(inode, *data_reserved, reserved_start,
+ reserved_len, last_block - reserved_start,
+ only_release_metadata);
+ reserved_len = last_block - reserved_start;
+ }
- release_bytes = round_up(copied + sector_offset,
- fs_info->sectorsize);
+ ret = btrfs_dirty_folio(inode, folio, start, copied, &cached_state,
+ only_release_metadata);
+ /*
+ * If we have not locked the extent range, because the range's start
+ * offset is >= i_size, we might still have a non-NULL cached extent
+ * state, acquired while marking the extent range as delalloc through
+ * btrfs_dirty_page(). Therefore free any possible cached extent state
+ * to avoid a memory leak.
+ */
+ if (extents_locked)
+ btrfs_unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ else
+ btrfs_free_extent_state(cached_state);
- ret = btrfs_dirty_folio(BTRFS_I(inode), folio, pos, copied,
- &cached_state, only_release_metadata);
+ btrfs_delalloc_release_extents(inode, reserved_len);
+ if (ret) {
+ btrfs_drop_folio(fs_info, folio, start, copied);
+ release_space(inode, *data_reserved, reserved_start, reserved_len,
+ only_release_metadata);
+ return ret;
+ }
+ if (only_release_metadata)
+ btrfs_check_nocow_unlock(inode);
- /*
- * If we have not locked the extent range, because the range's
- * start offset is >= i_size, we might still have a non-NULL
- * cached extent state, acquired while marking the extent range
- * as delalloc through btrfs_dirty_page(). Therefore free any
- * possible cached extent state to avoid a memory leak.
- */
- if (extents_locked)
- unlock_extent(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, &cached_state);
- else
- free_extent_state(cached_state);
+ btrfs_drop_folio(fs_info, folio, start, copied);
+ return copied;
+}
- btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
- if (ret) {
- btrfs_drop_folio(fs_info, folio, pos, copied);
- break;
- }
+ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ loff_t pos;
+ struct inode *inode = file_inode(file);
+ struct extent_changeset *data_reserved = NULL;
+ size_t num_written = 0;
+ ssize_t ret;
+ loff_t old_isize;
+ unsigned int ilock_flags = 0;
+ const bool nowait = (iocb->ki_flags & IOCB_NOWAIT);
- release_bytes = 0;
- if (only_release_metadata)
- btrfs_check_nocow_unlock(BTRFS_I(inode));
+ if (nowait)
+ ilock_flags |= BTRFS_ILOCK_TRY;
- btrfs_drop_folio(fs_info, folio, pos, copied);
+ ret = btrfs_inode_lock(BTRFS_I(inode), ilock_flags);
+ if (ret < 0)
+ return ret;
- cond_resched();
+ /*
+ * We can only trust the isize with inode lock held, or it can race with
+ * other buffered writes and cause incorrect call of
+ * pagecache_isize_extended() to overwrite existing data.
+ */
+ old_isize = i_size_read(inode);
- pos += copied;
- num_written += copied;
- }
+ ret = generic_write_checks(iocb, iter);
+ if (ret <= 0)
+ goto out;
- if (release_bytes) {
- if (only_release_metadata) {
- btrfs_check_nocow_unlock(BTRFS_I(inode));
- btrfs_delalloc_release_metadata(BTRFS_I(inode),
- release_bytes, true);
- } else {
- btrfs_delalloc_release_space(BTRFS_I(inode),
- data_reserved,
- round_down(pos, fs_info->sectorsize),
- release_bytes, true);
- }
+ ret = btrfs_write_check(iocb, ret);
+ if (ret < 0)
+ goto out;
+
+ pos = iocb->ki_pos;
+ while (iov_iter_count(iter) > 0) {
+ ret = copy_one_range(BTRFS_I(inode), iter, &data_reserved, pos, nowait);
+ if (ret < 0)
+ break;
+ pos += ret;
+ num_written += ret;
+ cond_resched();
}
extent_changeset_free(data_reserved);
@@ -1406,7 +1464,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
if (private) {
kfree(private->filldir_buf);
- free_extent_state(private->llseek_cached_state);
+ btrfs_free_extent_state(private->llseek_cached_state);
kfree(private);
filp->private_data = NULL;
}
@@ -1783,16 +1841,12 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
unsigned long zero_start;
loff_t size;
size_t fsize = folio_size(folio);
- vm_fault_t ret;
- int ret2;
- int reserved = 0;
+ int ret;
u64 reserved_space;
u64 page_start;
u64 page_end;
u64 end;
- ASSERT(folio_order(folio) == 0);
-
reserved_space = fsize;
sb_start_pagefault(inode->i_sb);
@@ -1808,21 +1862,14 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
* end up waiting indefinitely to get a lock on the page currently
* being processed by btrfs_page_mkwrite() function.
*/
- ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
- page_start, reserved_space);
- if (!ret2) {
- ret2 = file_update_time(vmf->vma->vm_file);
- reserved = 1;
- }
- if (ret2) {
- ret = vmf_error(ret2);
- if (reserved)
- goto out;
+ ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
+ page_start, reserved_space);
+ if (ret < 0)
goto out_noreserve;
- }
- /* Make the VM retry the fault. */
- ret = VM_FAULT_NOPAGE;
+ ret = file_update_time(vmf->vma->vm_file);
+ if (ret < 0)
+ goto out;
again:
down_read(&BTRFS_I(inode)->i_mmap_lock);
folio_lock(folio);
@@ -1835,11 +1882,10 @@ again:
}
folio_wait_writeback(folio);
- lock_extent(io_tree, page_start, page_end, &cached_state);
- ret2 = set_folio_extent_mapped(folio);
- if (ret2 < 0) {
- ret = vmf_error(ret2);
- unlock_extent(io_tree, page_start, page_end, &cached_state);
+ btrfs_lock_extent(io_tree, page_start, page_end, &cached_state);
+ ret = set_folio_extent_mapped(folio);
+ if (ret < 0) {
+ btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state);
goto out_unlock;
}
@@ -1849,7 +1895,7 @@ again:
*/
ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start, fsize);
if (ordered) {
- unlock_extent(io_tree, page_start, page_end, &cached_state);
+ btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state);
folio_unlock(folio);
up_read(&BTRFS_I(inode)->i_mmap_lock);
btrfs_start_ordered_extent(ordered);
@@ -1857,12 +1903,12 @@ again:
goto again;
}
- if (folio->index == ((size - 1) >> PAGE_SHIFT)) {
+ if (folio_contains(folio, (size - 1) >> PAGE_SHIFT)) {
reserved_space = round_up(size - page_start, fs_info->sectorsize);
if (reserved_space < fsize) {
end = page_start + reserved_space - 1;
btrfs_delalloc_release_space(BTRFS_I(inode),
- data_reserved, page_start,
+ data_reserved, end + 1,
fsize - reserved_space, true);
}
}
@@ -1874,15 +1920,14 @@ again:
* clear any delalloc bits within this page range since we have to
* reserve data&meta space before lock_page() (see above comments).
*/
- clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
- EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, &cached_state);
+ btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, &cached_state);
- ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
+ ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
&cached_state);
- if (ret2) {
- unlock_extent(io_tree, page_start, page_end, &cached_state);
- ret = VM_FAULT_SIGBUS;
+ if (ret < 0) {
+ btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state);
goto out_unlock;
}
@@ -1901,7 +1946,7 @@ again:
btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
- unlock_extent(io_tree, page_start, page_end, &cached_state);
+ btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state);
up_read(&BTRFS_I(inode)->i_mmap_lock);
btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
@@ -1915,11 +1960,16 @@ out_unlock:
out:
btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
- reserved_space, (ret != 0));
+ reserved_space, true);
+ extent_changeset_free(data_reserved);
out_noreserve:
sb_end_pagefault(inode->i_sb);
- extent_changeset_free(data_reserved);
- return ret;
+
+ if (ret < 0)
+ return vmf_error(ret);
+
+ /* Make the VM retry the fault. */
+ return VM_FAULT_NOPAGE;
}
static const struct vm_operations_struct btrfs_file_vm_ops = {
@@ -1941,33 +1991,33 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
return 0;
}
-static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
- int slot, u64 start, u64 end)
+static bool hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
+ int slot, u64 start, u64 end)
{
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
if (slot < 0 || slot >= btrfs_header_nritems(leaf))
- return 0;
+ return false;
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid != btrfs_ino(inode) ||
key.type != BTRFS_EXTENT_DATA_KEY)
- return 0;
+ return false;
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
- return 0;
+ return false;
if (btrfs_file_extent_disk_bytenr(leaf, fi))
- return 0;
+ return false;
if (key.offset == end)
- return 1;
+ return true;
if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
- return 1;
- return 0;
+ return true;
+ return false;
}
static int fill_holes(struct btrfs_trans_handle *trans,
@@ -2041,7 +2091,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
out:
btrfs_release_path(path);
- hole_em = alloc_extent_map();
+ hole_em = btrfs_alloc_extent_map();
if (!hole_em) {
btrfs_drop_extent_map_range(inode, offset, end - 1, false);
btrfs_set_inode_full_sync(inode);
@@ -2055,7 +2105,7 @@ out:
hole_em->generation = trans->transid;
ret = btrfs_replace_extent_map_range(inode, hole_em, true);
- free_extent_map(hole_em);
+ btrfs_free_extent_map(hole_em);
if (ret)
btrfs_set_inode_full_sync(inode);
}
@@ -2088,15 +2138,33 @@ static int find_first_non_hole(struct btrfs_inode *inode, u64 *start, u64 *len)
0 : *start + *len - em->start - em->len;
*start = em->start + em->len;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
return ret;
}
-static void btrfs_punch_hole_lock_range(struct inode *inode,
- const u64 lockstart,
- const u64 lockend,
- struct extent_state **cached_state)
+/*
+ * Check if there is no folio in the range.
+ *
+ * We cannot utilize filemap_range_has_page() in a filemap with large folios
+ * as we can hit the following false positive:
+ *
+ * start end
+ * | |
+ * |//|//|//|//| | | | | | | | |//|//|
+ * \ / \ /
+ * Folio A Folio B
+ *
+ * That large folio A and B cover the start and end indexes.
+ * In that case filemap_range_has_page() will always return true, but the above
+ * case is fine for btrfs_punch_hole_lock_range() usage.
+ *
+ * So here we only ensure that no other folios is in the range, excluding the
+ * head/tail large folio.
+ */
+static bool check_range_has_page(struct inode *inode, u64 start, u64 end)
{
+ struct folio_batch fbatch;
+ bool ret = false;
/*
* For subpage case, if the range is not at page boundary, we could
* have pages at the leading/tailing part of the range.
@@ -2107,17 +2175,45 @@ static void btrfs_punch_hole_lock_range(struct inode *inode,
*
* And do not decrease page_lockend right now, as it can be 0.
*/
- const u64 page_lockstart = round_up(lockstart, PAGE_SIZE);
- const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE);
+ const u64 page_lockstart = round_up(start, PAGE_SIZE);
+ const u64 page_lockend = round_down(end + 1, PAGE_SIZE);
+ const pgoff_t start_index = page_lockstart >> PAGE_SHIFT;
+ const pgoff_t end_index = (page_lockend - 1) >> PAGE_SHIFT;
+ pgoff_t tmp = start_index;
+ int found_folios;
+
+ /* The same page or adjacent pages. */
+ if (page_lockend <= page_lockstart)
+ return false;
+ folio_batch_init(&fbatch);
+ found_folios = filemap_get_folios(inode->i_mapping, &tmp, end_index, &fbatch);
+ for (int i = 0; i < found_folios; i++) {
+ struct folio *folio = fbatch.folios[i];
+
+ /* A large folio begins before the start. Not a target. */
+ if (folio->index < start_index)
+ continue;
+ /* A large folio extends beyond the end. Not a target. */
+ if (folio->index + folio_nr_pages(folio) > end_index)
+ continue;
+ /* A folio doesn't cover the head/tail index. Found a target. */
+ ret = true;
+ break;
+ }
+ folio_batch_release(&fbatch);
+ return ret;
+}
+
+static void btrfs_punch_hole_lock_range(struct inode *inode,
+ const u64 lockstart, const u64 lockend,
+ struct extent_state **cached_state)
+{
while (1) {
truncate_pagecache_range(inode, lockstart, lockend);
- lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- cached_state);
- /* The same page or adjacent pages. */
- if (page_lockend <= page_lockstart)
- break;
+ btrfs_lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ cached_state);
/*
* We can't have ordered extents in the range, nor dirty/writeback
* pages, because we have locked the inode's VFS lock in exclusive
@@ -2128,12 +2224,11 @@ static void btrfs_punch_hole_lock_range(struct inode *inode,
* locking the range check if we have pages in the range, and if
* we do, unlock the range and retry.
*/
- if (!filemap_range_has_page(inode->i_mapping, page_lockstart,
- page_lockend - 1))
+ if (!check_range_has_page(inode, lockstart, lockend))
break;
- unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- cached_state);
+ btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ cached_state);
}
btrfs_assert_inode_range_clean(BTRFS_I(inode), lockstart, lockend);
@@ -2506,7 +2601,8 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
u64 lockend;
u64 tail_start;
u64 tail_len;
- u64 orig_start = offset;
+ const u64 orig_start = offset;
+ const u64 orig_end = offset + len - 1;
int ret = 0;
bool same_block;
u64 ino_size;
@@ -2538,18 +2634,14 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
same_block = (BTRFS_BYTES_TO_BLKS(fs_info, offset))
== (BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1));
/*
- * We needn't truncate any block which is beyond the end of the file
- * because we are sure there is no data there.
- */
- /*
* Only do this if we are in the same block and we aren't doing the
* entire block.
*/
if (same_block && len < fs_info->sectorsize) {
if (offset < ino_size) {
truncated_block = true;
- ret = btrfs_truncate_block(BTRFS_I(inode), offset, len,
- 0);
+ ret = btrfs_truncate_block(BTRFS_I(inode), offset + len - 1,
+ orig_start, orig_end);
} else {
ret = 0;
}
@@ -2559,7 +2651,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
/* zero back part of the first block */
if (offset < ino_size) {
truncated_block = true;
- ret = btrfs_truncate_block(BTRFS_I(inode), offset, 0, 0);
+ ret = btrfs_truncate_block(BTRFS_I(inode), offset, orig_start, orig_end);
if (ret) {
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
return ret;
@@ -2596,8 +2688,8 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
if (tail_start + tail_len < ino_size) {
truncated_block = true;
ret = btrfs_truncate_block(BTRFS_I(inode),
- tail_start + tail_len,
- 0, 1);
+ tail_start + tail_len - 1,
+ orig_start, orig_end);
if (ret)
goto out_only_mutex;
}
@@ -2631,8 +2723,8 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
out:
- unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- &cached_state);
+ btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ &cached_state);
out_only_mutex:
if (!updated_inode && truncated_block && !ret) {
/*
@@ -2750,7 +2842,7 @@ static int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode,
else
ret = RANGE_BOUNDARY_WRITTEN_EXTENT;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
return ret;
}
@@ -2765,6 +2857,8 @@ static int btrfs_zero_range(struct inode *inode,
int ret;
u64 alloc_hint = 0;
const u64 sectorsize = fs_info->sectorsize;
+ const u64 orig_start = offset;
+ const u64 orig_end = offset + len - 1;
u64 alloc_start = round_down(offset, sectorsize);
u64 alloc_end = round_up(offset + len, sectorsize);
u64 bytes_to_reserve = 0;
@@ -2794,7 +2888,7 @@ static int btrfs_zero_range(struct inode *inode,
* do nothing except updating the inode's i_size if
* needed.
*/
- free_extent_map(em);
+ btrfs_free_extent_map(em);
ret = btrfs_fallocate_update_isize(inode, offset + len,
mode);
goto out;
@@ -2807,9 +2901,9 @@ static int btrfs_zero_range(struct inode *inode,
ASSERT(IS_ALIGNED(alloc_start, sectorsize));
len = offset + len - alloc_start;
offset = alloc_start;
- alloc_hint = extent_map_block_start(em) + em->len;
+ alloc_hint = btrfs_extent_map_block_start(em) + em->len;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
@@ -2820,22 +2914,22 @@ static int btrfs_zero_range(struct inode *inode,
}
if (em->flags & EXTENT_FLAG_PREALLOC) {
- free_extent_map(em);
+ btrfs_free_extent_map(em);
ret = btrfs_fallocate_update_isize(inode, offset + len,
mode);
goto out;
}
if (len < sectorsize && em->disk_bytenr != EXTENT_MAP_HOLE) {
- free_extent_map(em);
- ret = btrfs_truncate_block(BTRFS_I(inode), offset, len,
- 0);
+ btrfs_free_extent_map(em);
+ ret = btrfs_truncate_block(BTRFS_I(inode), offset + len - 1,
+ orig_start, orig_end);
if (!ret)
ret = btrfs_fallocate_update_isize(inode,
offset + len,
mode);
return ret;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
alloc_start = round_down(offset, sectorsize);
alloc_end = alloc_start + sectorsize;
goto reserve_space;
@@ -2859,7 +2953,8 @@ static int btrfs_zero_range(struct inode *inode,
alloc_start = round_down(offset, sectorsize);
ret = 0;
} else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) {
- ret = btrfs_truncate_block(BTRFS_I(inode), offset, 0, 0);
+ ret = btrfs_truncate_block(BTRFS_I(inode), offset,
+ orig_start, orig_end);
if (ret)
goto out;
} else {
@@ -2876,8 +2971,8 @@ static int btrfs_zero_range(struct inode *inode,
alloc_end = round_up(offset + len, sectorsize);
ret = 0;
} else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) {
- ret = btrfs_truncate_block(BTRFS_I(inode), offset + len,
- 0, 1);
+ ret = btrfs_truncate_block(BTRFS_I(inode), offset + len - 1,
+ orig_start, orig_end);
if (ret)
goto out;
} else {
@@ -2902,16 +2997,16 @@ reserve_space:
ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved,
alloc_start, bytes_to_reserve);
if (ret) {
- unlock_extent(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, &cached_state);
+ btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, &cached_state);
goto out;
}
ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
alloc_end - alloc_start,
fs_info->sectorsize,
offset + len, &alloc_hint);
- unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- &cached_state);
+ btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ &cached_state);
/* btrfs_prealloc_file_range releases reserved space on error */
if (ret) {
space_reserved = false;
@@ -2997,7 +3092,8 @@ static long btrfs_fallocate(struct file *file, int mode,
* need to zero out the end of the block if i_size lands in the
* middle of a block.
*/
- ret = btrfs_truncate_block(BTRFS_I(inode), inode->i_size, 0, 0);
+ ret = btrfs_truncate_block(BTRFS_I(inode), inode->i_size,
+ inode->i_size, (u64)-1);
if (ret)
goto out;
}
@@ -3022,8 +3118,8 @@ static long btrfs_fallocate(struct file *file, int mode,
}
locked_end = alloc_end - 1;
- lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
- &cached_state);
+ btrfs_lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
+ &cached_state);
btrfs_assert_inode_range_clean(BTRFS_I(inode), alloc_start, locked_end);
@@ -3035,8 +3131,8 @@ static long btrfs_fallocate(struct file *file, int mode,
ret = PTR_ERR(em);
break;
}
- last_byte = min(extent_map_end(em), alloc_end);
- actual_end = min_t(u64, extent_map_end(em), offset + len);
+ last_byte = min(btrfs_extent_map_end(em), alloc_end);
+ actual_end = min_t(u64, btrfs_extent_map_end(em), offset + len);
last_byte = ALIGN(last_byte, blocksize);
if (em->disk_bytenr == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size &&
@@ -3045,19 +3141,19 @@ static long btrfs_fallocate(struct file *file, int mode,
ret = add_falloc_range(&reserve_list, cur_offset, range_len);
if (ret < 0) {
- free_extent_map(em);
+ btrfs_free_extent_map(em);
break;
}
ret = btrfs_qgroup_reserve_data(BTRFS_I(inode),
&data_reserved, cur_offset, range_len);
if (ret < 0) {
- free_extent_map(em);
+ btrfs_free_extent_map(em);
break;
}
qgroup_reserved += range_len;
data_space_needed += range_len;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
cur_offset = last_byte;
}
@@ -3111,8 +3207,8 @@ static long btrfs_fallocate(struct file *file, int mode,
*/
ret = btrfs_fallocate_update_isize(inode, actual_end, mode);
out_unlock:
- unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
- &cached_state);
+ btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
+ &cached_state);
out:
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
extent_changeset_free(data_reserved);
@@ -3146,10 +3242,10 @@ static bool find_delalloc_subrange(struct btrfs_inode *inode, u64 start, u64 end
if (inode->delalloc_bytes > 0) {
spin_unlock(&inode->lock);
*delalloc_start_ret = start;
- delalloc_len = count_range_bits(&inode->io_tree,
- delalloc_start_ret, end,
- len, EXTENT_DELALLOC, 1,
- cached_state);
+ delalloc_len = btrfs_count_range_bits(&inode->io_tree,
+ delalloc_start_ret, end,
+ len, EXTENT_DELALLOC, 1,
+ cached_state);
} else {
spin_unlock(&inode->lock);
}
@@ -3458,7 +3554,7 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
last_extent_end = lockstart;
- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ btrfs_lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
@@ -3604,7 +3700,7 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
}
out:
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
btrfs_free_path(path);
if (ret < 0)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 05e173311c1a..4b34ea1f01c2 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -308,8 +308,9 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
bool locked = false;
if (block_group) {
- struct btrfs_path *path = btrfs_alloc_path();
+ BTRFS_PATH_AUTO_FREE(path);
+ path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto fail;
@@ -330,13 +331,12 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_CLEAR;
spin_unlock(&block_group->lock);
- btrfs_free_path(path);
}
btrfs_i_size_write(inode, 0);
truncate_pagecache(vfs_inode, 0);
- lock_extent(&inode->io_tree, 0, (u64)-1, &cached_state);
+ btrfs_lock_extent(&inode->io_tree, 0, (u64)-1, &cached_state);
btrfs_drop_extent_map_range(inode, 0, (u64)-1, false);
/*
@@ -348,7 +348,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
inode_sub_bytes(&inode->vfs_inode, control.sub_bytes);
btrfs_inode_safe_disk_i_size_write(inode, control.last_size);
- unlock_extent(&inode->io_tree, 0, (u64)-1, &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, 0, (u64)-1, &cached_state);
if (ret)
goto fail;
@@ -457,7 +457,7 @@ static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
mask);
if (IS_ERR(folio)) {
io_ctl_drop_pages(io_ctl);
- return -ENOMEM;
+ return PTR_ERR(folio);
}
ret = set_folio_extent_mapped(folio);
@@ -1080,9 +1080,8 @@ int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
/* Get the cluster for this block_group if it exists */
if (block_group && !list_empty(&block_group->cluster_list)) {
- cluster = list_entry(block_group->cluster_list.next,
- struct btrfs_free_cluster,
- block_group_list);
+ cluster = list_first_entry(&block_group->cluster_list,
+ struct btrfs_free_cluster, block_group_list);
}
if (!node && cluster) {
@@ -1160,8 +1159,8 @@ update_cache_item(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
- EXTENT_DELALLOC, NULL);
+ btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
+ EXTENT_DELALLOC, NULL);
goto fail;
}
leaf = path->nodes[0];
@@ -1172,9 +1171,9 @@ update_cache_item(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
found_key.offset != offset) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
- inode->i_size - 1, EXTENT_DELALLOC,
- NULL);
+ btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
+ inode->i_size - 1, EXTENT_DELALLOC,
+ NULL);
btrfs_release_path(path);
goto fail;
}
@@ -1219,9 +1218,9 @@ static noinline_for_stack int write_pinned_extent_entries(
start = block_group->start;
while (start < block_group->start + block_group->length) {
- if (!find_first_extent_bit(unpin, start,
- &extent_start, &extent_end,
- EXTENT_DIRTY, NULL))
+ if (!btrfs_find_first_extent_bit(unpin, start,
+ &extent_start, &extent_end,
+ EXTENT_DIRTY, NULL))
return 0;
/* This pinned extent is out of our range */
@@ -1267,8 +1266,8 @@ static int flush_dirty_cache(struct inode *inode)
ret = btrfs_wait_ordered_range(BTRFS_I(inode), 0, (u64)-1);
if (ret)
- clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
- EXTENT_DELALLOC, NULL);
+ btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
+ EXTENT_DELALLOC, NULL);
return ret;
}
@@ -1288,8 +1287,8 @@ cleanup_write_cache_enospc(struct inode *inode,
struct extent_state **cached_state)
{
io_ctl_drop_pages(io_ctl);
- unlock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
- cached_state);
+ btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
+ cached_state);
}
static int __btrfs_wait_cache_io(struct btrfs_root *root,
@@ -1414,8 +1413,8 @@ static int __btrfs_write_out_cache(struct inode *inode,
if (ret)
goto out_unlock;
- lock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
- &cached_state);
+ btrfs_lock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
+ &cached_state);
io_ctl_set_generation(io_ctl, trans->transid);
@@ -1475,8 +1474,8 @@ static int __btrfs_write_out_cache(struct inode *inode,
io_ctl_drop_pages(io_ctl);
io_ctl_free(io_ctl);
- unlock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
- &cached_state);
+ btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
+ &cached_state);
/*
* at this point the pages are under IO and we're happy,
@@ -2342,9 +2341,8 @@ again:
struct rb_node *node;
struct btrfs_free_space *entry;
- cluster = list_entry(block_group->cluster_list.next,
- struct btrfs_free_cluster,
- block_group_list);
+ cluster = list_first_entry(&block_group->cluster_list,
+ struct btrfs_free_cluster, block_group_list);
spin_lock(&cluster->lock);
node = rb_first(&cluster->root);
if (!node) {
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 39c6b96a4c25..0c573d46639a 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -117,7 +117,7 @@ struct btrfs_free_space_info *search_free_space_info(
if (ret != 0) {
btrfs_warn(fs_info, "missing free space info for %llu",
block_group->start);
- ASSERT(0);
+ DEBUG_WARN();
return ERR_PTR(-ENOENT);
}
@@ -141,12 +141,12 @@ static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
return ret;
if (ret == 0) {
- ASSERT(0);
+ DEBUG_WARN();
return -EIO;
}
if (p->slots[0] == 0) {
- ASSERT(0);
+ DEBUG_WARN("no previous slot found");
return -EIO;
}
p->slots[0]--;
@@ -223,6 +223,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
bitmap = alloc_bitmap(bitmap_size);
if (!bitmap) {
ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -235,8 +236,10 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out;
+ }
leaf = path->nodes[0];
nr = 0;
@@ -271,14 +274,17 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out;
+ }
btrfs_release_path(path);
}
info = search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
leaf = path->nodes[0];
@@ -293,8 +299,8 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
expected_extent_count);
- ASSERT(0);
ret = -EIO;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -315,8 +321,10 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, root, path, &key,
data_size);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out;
+ }
leaf = path->nodes[0];
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
@@ -331,8 +339,6 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
ret = 0;
out:
kvfree(bitmap);
- if (ret)
- btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -358,6 +364,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
bitmap = alloc_bitmap(bitmap_size);
if (!bitmap) {
ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -370,8 +377,10 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out;
+ }
leaf = path->nodes[0];
nr = 0;
@@ -412,14 +421,17 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out;
+ }
btrfs_release_path(path);
}
info = search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
leaf = path->nodes[0];
@@ -441,8 +453,10 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out;
+ }
btrfs_release_path(path);
extent_count++;
@@ -455,16 +469,14 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
expected_extent_count);
- ASSERT(0);
ret = -EIO;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ret = 0;
out:
kvfree(bitmap);
- if (ret)
- btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -838,13 +850,15 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
block_group = btrfs_lookup_block_group(trans->fs_info, start);
if (!block_group) {
- ASSERT(0);
+ DEBUG_WARN("no block group found for start=%llu", start);
ret = -ENOENT;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -852,12 +866,12 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
ret = __remove_from_free_space_tree(trans, block_group, path, start,
size);
mutex_unlock(&block_group->free_space_lock);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
btrfs_put_block_group(block_group);
out:
btrfs_free_path(path);
- if (ret)
- btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1031,25 +1045,27 @@ int add_to_free_space_tree(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
block_group = btrfs_lookup_block_group(trans->fs_info, start);
if (!block_group) {
- ASSERT(0);
+ DEBUG_WARN("no block group found for start=%llu", start);
ret = -ENOENT;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
mutex_lock(&block_group->free_space_lock);
ret = __add_to_free_space_tree(trans, block_group, path, start, size);
mutex_unlock(&block_group->free_space_lock);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
btrfs_put_block_group(block_group);
out:
btrfs_free_path(path);
- if (ret)
- btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1555,7 +1571,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
expected_extent_count);
- ASSERT(0);
+ DEBUG_WARN();
ret = -EIO;
goto out;
}
@@ -1619,7 +1635,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
expected_extent_count);
- ASSERT(0);
+ DEBUG_WARN();
ret = -EIO;
goto out;
}
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index bcca43046064..4394de12a767 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -300,6 +300,7 @@ enum {
#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
+#define BTRFS_WARNING_COMMIT_INTERVAL (300)
#define BTRFS_DEFAULT_MAX_INLINE (2048)
struct btrfs_dev_replace {
@@ -471,6 +472,8 @@ struct btrfs_fs_info {
struct btrfs_block_rsv delayed_block_rsv;
/* Block reservation for delayed refs */
struct btrfs_block_rsv delayed_refs_rsv;
+ /* Block reservation for treelog tree */
+ struct btrfs_block_rsv treelog_rsv;
struct btrfs_block_rsv empty_block_rsv;
@@ -776,10 +779,8 @@ struct btrfs_fs_info {
struct btrfs_delayed_root *delayed_root;
- /* Extent buffer radix tree */
- spinlock_t buffer_lock;
/* Entries are eb->start / sectorsize */
- struct radix_tree_root buffer_radix;
+ struct xarray buffer_tree;
/* Next backup root to be overwritten */
int backup_root_index;
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 3530de0618c8..a61c3540d67b 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -109,7 +109,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
u64 inode_objectid, u64 ref_objectid,
u64 *index)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_inode_extref *extref;
struct extent_buffer *leaf;
@@ -129,9 +129,9 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0)
- ret = -ENOENT;
+ return -ENOENT;
if (ret < 0)
- goto out;
+ return ret;
/*
* Sanity check - did we find the right item for this name?
@@ -142,8 +142,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
ref_objectid, name);
if (!extref) {
btrfs_abort_transaction(trans, -ENOENT);
- ret = -ENOENT;
- goto out;
+ return -ENOENT;
}
leaf = path->nodes[0];
@@ -152,12 +151,8 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
*index = btrfs_inode_extref_index(leaf, extref);
if (del_len == item_size) {
- /*
- * Common case only one ref in the item, remove the
- * whole item.
- */
- ret = btrfs_del_item(trans, root, path);
- goto out;
+ /* Common case only one ref in the item, remove the whole item. */
+ return btrfs_del_item(trans, root, path);
}
ptr = (unsigned long)extref;
@@ -168,9 +163,6 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
btrfs_truncate_item(trans, path, item_size - del_len, 1);
-out:
- btrfs_free_path(path);
-
return ret;
}
@@ -260,7 +252,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
int ret;
int ins_len = name->len + sizeof(*extref);
unsigned long ptr;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct extent_buffer *leaf;
@@ -279,13 +271,13 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
path->slots[0],
ref_objectid,
name))
- goto out;
+ return ret;
btrfs_extend_item(trans, path, ins_len);
ret = 0;
}
if (ret < 0)
- goto out;
+ return ret;
leaf = path->nodes[0];
ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char);
@@ -298,9 +290,8 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
ptr = (unsigned long)&extref->name;
write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
-out:
- btrfs_free_path(path);
- return ret;
+
+ return 0;
}
/* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bdafe4d4c4a5..c0c778243bf1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -686,12 +686,12 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode,
if (!can_cow_file_range_inline(inode, offset, size, compressed_size))
return 1;
- lock_extent(&inode->io_tree, offset, end, &cached);
+ btrfs_lock_extent(&inode->io_tree, offset, end, &cached);
ret = __cow_file_range_inline(inode, size, compressed_size,
compress_type, compressed_folio,
update_i_size);
if (ret > 0) {
- unlock_extent(&inode->io_tree, offset, end, &cached);
+ btrfs_unlock_extent(&inode->io_tree, offset, end, &cached);
return ret;
}
@@ -777,26 +777,9 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
if (!btrfs_inode_can_compress(inode)) {
- WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
- KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
- btrfs_ino(inode));
+ DEBUG_WARN("BTRFS: unexpected compression for ino %llu", btrfs_ino(inode));
return 0;
}
- /*
- * Only enable sector perfect compression for experimental builds.
- *
- * This is a big feature change for subpage cases, and can hit
- * different corner cases, so only limit this feature for
- * experimental build for now.
- *
- * ETA for moving this out of experimental builds is 6.15.
- */
- if (fs_info->sectorsize < PAGE_SIZE &&
- !IS_ENABLED(CONFIG_BTRFS_EXPERIMENTAL)) {
- if (!PAGE_ALIGNED(start) ||
- !PAGE_ALIGNED(end + 1))
- return 0;
- }
/* force compress */
if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
@@ -1109,6 +1092,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
struct extent_state *cached = NULL;
struct extent_map *em;
int ret = 0;
+ bool free_pages = false;
u64 start = async_extent->start;
u64 end = async_extent->start + async_extent->ram_size - 1;
@@ -1129,7 +1113,10 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
}
if (async_extent->compress_type == BTRFS_COMPRESS_NONE) {
+ ASSERT(!async_extent->folios);
+ ASSERT(async_extent->nr_folios == 0);
submit_uncompressed_range(inode, async_extent, locked_folio);
+ free_pages = true;
goto done;
}
@@ -1145,10 +1132,11 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
* fall back to uncompressed.
*/
submit_uncompressed_range(inode, async_extent, locked_folio);
+ free_pages = true;
goto done;
}
- lock_extent(io_tree, start, end, &cached);
+ btrfs_lock_extent(io_tree, start, end, &cached);
/* Here we're doing allocation and writeback of the compressed pages */
file_extent.disk_bytenr = ins.objectid;
@@ -1163,10 +1151,10 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
ret = PTR_ERR(em);
goto out_free_reserve;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
ordered = btrfs_alloc_ordered_extent(inode, start, &file_extent,
- 1 << BTRFS_ORDERED_COMPRESSED);
+ 1U << BTRFS_ORDERED_COMPRESSED);
if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
ret = PTR_ERR(ordered);
@@ -1186,12 +1174,14 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
done:
if (async_chunk->blkcg_css)
kthread_associate_blkcg(NULL);
+ if (free_pages)
+ free_async_extent_pages(async_extent);
kfree(async_extent);
return;
out_free_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
- btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
+ btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, true);
mapping_set_error(inode->vfs_inode.i_mapping, -EIO);
extent_clear_unlock_delalloc(inode, start, end,
NULL, &cached,
@@ -1218,7 +1208,7 @@ u64 btrfs_get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
u64 alloc_hint = 0;
read_lock(&em_tree->lock);
- em = search_extent_mapping(em_tree, start, num_bytes);
+ em = btrfs_search_extent_mapping(em_tree, start, num_bytes);
if (em) {
/*
* if block start isn't an actual block number then find the
@@ -1226,15 +1216,15 @@ u64 btrfs_get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
* block is also bogus then just don't worry about it.
*/
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
- free_extent_map(em);
- em = search_extent_mapping(em_tree, 0, 0);
+ btrfs_free_extent_map(em);
+ em = btrfs_search_extent_mapping(em_tree, 0, 0);
if (em && em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
- alloc_hint = extent_map_block_start(em);
+ alloc_hint = btrfs_extent_map_block_start(em);
if (em)
- free_extent_map(em);
+ btrfs_free_extent_map(em);
} else {
- alloc_hint = extent_map_block_start(em);
- free_extent_map(em);
+ alloc_hint = btrfs_extent_map_block_start(em);
+ btrfs_free_extent_map(em);
}
}
read_unlock(&em_tree->lock);
@@ -1397,24 +1387,24 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
* Locked range will be released either during error clean up or
* after the whole range is finished.
*/
- lock_extent(&inode->io_tree, start, start + cur_alloc_size - 1,
- &cached);
+ btrfs_lock_extent(&inode->io_tree, start, start + cur_alloc_size - 1,
+ &cached);
em = btrfs_create_io_em(inode, start, &file_extent,
BTRFS_ORDERED_REGULAR);
if (IS_ERR(em)) {
- unlock_extent(&inode->io_tree, start,
- start + cur_alloc_size - 1, &cached);
+ btrfs_unlock_extent(&inode->io_tree, start,
+ start + cur_alloc_size - 1, &cached);
ret = PTR_ERR(em);
goto out_reserve;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
ordered = btrfs_alloc_ordered_extent(inode, start, &file_extent,
- 1 << BTRFS_ORDERED_REGULAR);
+ 1U << BTRFS_ORDERED_REGULAR);
if (IS_ERR(ordered)) {
- unlock_extent(&inode->io_tree, start,
- start + cur_alloc_size - 1, &cached);
+ btrfs_unlock_extent(&inode->io_tree, start,
+ start + cur_alloc_size - 1, &cached);
ret = PTR_ERR(ordered);
goto out_drop_extent_cache;
}
@@ -1469,7 +1459,7 @@ out_drop_extent_cache:
btrfs_drop_extent_map_range(inode, start, start + cur_alloc_size - 1, false);
out_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
- btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
+ btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, true);
out_unlock:
/*
* Now, we have three regions to clean up:
@@ -1578,8 +1568,8 @@ static noinline void submit_compressed_extents(struct btrfs_work *work, bool do_
PAGE_SHIFT;
while (!list_empty(&async_chunk->extents)) {
- async_extent = list_entry(async_chunk->extents.next,
- struct async_extent, list);
+ async_extent = list_first_entry(&async_chunk->extents,
+ struct async_extent, list);
list_del(&async_extent->list);
submit_one_async_extent(async_chunk, async_extent, &alloc_hint);
}
@@ -1749,9 +1739,9 @@ static int fallback_to_cow(struct btrfs_inode *inode,
* group that contains that extent to RO mode and therefore force COW
* when starting writeback.
*/
- lock_extent(io_tree, start, end, &cached_state);
- count = count_range_bits(io_tree, &range_start, end, range_bytes,
- EXTENT_NORESERVE, 0, NULL);
+ btrfs_lock_extent(io_tree, start, end, &cached_state);
+ count = btrfs_count_range_bits(io_tree, &range_start, end, range_bytes,
+ EXTENT_NORESERVE, 0, NULL);
if (count > 0 || is_space_ino || is_reloc_ino) {
u64 bytes = count;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
@@ -1765,10 +1755,9 @@ static int fallback_to_cow(struct btrfs_inode *inode,
spin_unlock(&sinfo->lock);
if (count > 0)
- clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE,
- NULL);
+ btrfs_clear_extent_bits(io_tree, start, end, EXTENT_NORESERVE);
}
- unlock_extent(io_tree, start, end, &cached_state);
+ btrfs_unlock_extent(io_tree, start, end, &cached_state);
/*
* Don't try to create inline extents, as a mix of inline extent that
@@ -1976,7 +1965,7 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio
u64 end = file_pos + len - 1;
int ret = 0;
- lock_extent(&inode->io_tree, file_pos, end, cached);
+ btrfs_lock_extent(&inode->io_tree, file_pos, end, cached);
if (is_prealloc) {
struct extent_map *em;
@@ -1984,20 +1973,20 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio
em = btrfs_create_io_em(inode, file_pos, &nocow_args->file_extent,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
- unlock_extent(&inode->io_tree, file_pos, end, cached);
+ btrfs_unlock_extent(&inode->io_tree, file_pos, end, cached);
return PTR_ERR(em);
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
}
ordered = btrfs_alloc_ordered_extent(inode, file_pos, &nocow_args->file_extent,
is_prealloc
- ? (1 << BTRFS_ORDERED_PREALLOC)
- : (1 << BTRFS_ORDERED_NOCOW));
+ ? (1U << BTRFS_ORDERED_PREALLOC)
+ : (1U << BTRFS_ORDERED_NOCOW));
if (IS_ERR(ordered)) {
if (is_prealloc)
btrfs_drop_extent_map_range(inode, file_pos, end, false);
- unlock_extent(&inode->io_tree, file_pos, end, cached);
+ btrfs_unlock_extent(&inode->io_tree, file_pos, end, cached);
return PTR_ERR(ordered);
}
@@ -2296,7 +2285,7 @@ error:
if (cur_offset < end) {
struct extent_state *cached = NULL;
- lock_extent(&inode->io_tree, cur_offset, end, &cached);
+ btrfs_lock_extent(&inode->io_tree, cur_offset, end, &cached);
extent_clear_unlock_delalloc(inode, cur_offset, end,
locked_folio, &cached,
EXTENT_LOCKED | EXTENT_DELALLOC |
@@ -2318,7 +2307,7 @@ static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
{
if (inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) {
if (inode->defrag_bytes &&
- test_range_bit_exists(&inode->io_tree, start, end, EXTENT_DEFRAG))
+ btrfs_test_range_bit_exists(&inode->io_tree, start, end, EXTENT_DEFRAG))
return false;
return true;
}
@@ -2607,7 +2596,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
!btrfs_is_free_space_inode(inode) &&
!(state->state & EXTENT_NORESERVE) &&
(bits & EXTENT_CLEAR_DATA_RESV))
- btrfs_free_reserved_data_space_noquota(fs_info, len);
+ btrfs_free_reserved_data_space_noquota(inode, len);
percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
fs_info->delalloc_batch);
@@ -2691,12 +2680,12 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
if (em_len > search_len)
em_len = search_len;
- ret = set_extent_bit(&inode->io_tree, search_start,
- search_start + em_len - 1,
- EXTENT_DELALLOC_NEW, cached_state);
+ ret = btrfs_set_extent_bit(&inode->io_tree, search_start,
+ search_start + em_len - 1,
+ EXTENT_DELALLOC_NEW, cached_state);
next:
- search_start = extent_map_end(em);
- free_extent_map(em);
+ search_start = btrfs_extent_map_end(em);
+ btrfs_free_extent_map(em);
if (ret)
return ret;
}
@@ -2726,8 +2715,8 @@ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
return ret;
}
- return set_extent_bit(&inode->io_tree, start, end,
- EXTENT_DELALLOC | extra_bits, cached_state);
+ return btrfs_set_extent_bit(&inode->io_tree, start, end,
+ EXTENT_DELALLOC | extra_bits, cached_state);
}
/* see btrfs_writepage_start_hook for details on why this is required */
@@ -2802,7 +2791,7 @@ again:
if (ret)
goto out_page;
- lock_extent(&inode->io_tree, page_start, page_end, &cached_state);
+ btrfs_lock_extent(&inode->io_tree, page_start, page_end, &cached_state);
/* already ordered? We're done */
if (folio_test_ordered(folio))
@@ -2810,8 +2799,8 @@ again:
ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
if (ordered) {
- unlock_extent(&inode->io_tree, page_start, page_end,
- &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, page_start, page_end,
+ &cached_state);
folio_unlock(folio);
btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
@@ -2837,7 +2826,7 @@ out_reserved:
if (free_delalloc_space)
btrfs_delalloc_release_space(inode, data_reserved, page_start,
PAGE_SIZE, true);
- unlock_extent(&inode->io_tree, page_start, page_end, &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, page_start, page_end, &cached_state);
out_page:
if (ret) {
/*
@@ -2889,7 +2878,7 @@ int btrfs_writepage_cow_fixup(struct folio *folio)
* We should not hit such out-of-band dirty folios anymore.
*/
if (IS_ENABLED(CONFIG_BTRFS_EXPERIMENTAL)) {
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ DEBUG_WARN();
btrfs_err_rl(fs_info,
"root %lld ino %llu folio %llu is marked dirty without notifying the fs",
BTRFS_I(inode)->root->root_key.objectid,
@@ -2938,7 +2927,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = inode->root;
const u64 sectorsize = root->fs_info->sectorsize;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_key ins;
u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
@@ -3020,8 +3009,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
file_pos - offset,
qgroup_reserved, &ins);
out:
- btrfs_free_path(path);
-
return ret;
}
@@ -3137,8 +3124,10 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
* depending on their current state).
*/
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
- clear_bits |= EXTENT_LOCKED;
- lock_extent(io_tree, start, end, &cached_state);
+ clear_bits |= EXTENT_LOCKED | EXTENT_FINISHING_ORDERED;
+ btrfs_lock_extent_bits(io_tree, start, end,
+ EXTENT_LOCKED | EXTENT_FINISHING_ORDERED,
+ &cached_state);
}
if (freespace_inode)
@@ -3202,8 +3191,8 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
- ret = unpin_extent_cache(inode, ordered_extent->file_offset,
- ordered_extent->num_bytes, trans->transid);
+ ret = btrfs_unpin_extent_cache(inode, ordered_extent->file_offset,
+ ordered_extent->num_bytes, trans->transid);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3222,9 +3211,9 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
*/
if ((clear_bits & EXTENT_DELALLOC_NEW) &&
!test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags))
- clear_extent_bit(&inode->io_tree, start, end,
- EXTENT_DELALLOC_NEW | EXTENT_ADD_INODE_BYTES,
- &cached_state);
+ btrfs_clear_extent_bit(&inode->io_tree, start, end,
+ EXTENT_DELALLOC_NEW | EXTENT_ADD_INODE_BYTES,
+ &cached_state);
btrfs_inode_safe_disk_i_size_write(inode, 0);
ret = btrfs_update_inode_fallback(trans, inode);
@@ -3233,15 +3222,13 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
out:
- clear_extent_bit(&inode->io_tree, start, end, clear_bits,
- &cached_state);
+ btrfs_clear_extent_bit(&inode->io_tree, start, end, clear_bits,
+ &cached_state);
if (trans)
btrfs_end_transaction(trans);
if (ret || truncated) {
- u64 unwritten_start = start;
-
/*
* If we failed to finish this ordered extent for any reason we
* need to make sure BTRFS_ORDERED_IOERR is set on the ordered
@@ -3253,10 +3240,6 @@ out:
if (ret)
btrfs_mark_ordered_extent_error(ordered_extent);
- if (truncated)
- unwritten_start += logical_len;
- clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
-
/*
* Drop extent maps for the part of the extent we didn't write.
*
@@ -3271,9 +3254,15 @@ out:
* we don't mess with the extent map tree in the NOCOW case, but
* for now simply skip this if we are the free space inode.
*/
- if (!btrfs_is_free_space_inode(inode))
+ if (!btrfs_is_free_space_inode(inode)) {
+ u64 unwritten_start = start;
+
+ if (truncated)
+ unwritten_start += logical_len;
+
btrfs_drop_extent_map_range(inode, unwritten_start,
end, false);
+ }
/*
* If the ordered extent had an IOERR or something else went
@@ -3300,7 +3289,7 @@ out:
NULL);
btrfs_free_reserved_extent(fs_info,
ordered_extent->disk_bytenr,
- ordered_extent->disk_num_bytes, 1);
+ ordered_extent->disk_num_bytes, true);
/*
* Actually free the qgroup rsv which was released when
* the ordered extent was created.
@@ -3337,20 +3326,16 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
/*
* Verify the checksum for a single sector without any extra action that depend
* on the type of I/O.
+ *
+ * @kaddr must be a properly kmapped address.
*/
-int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
- u32 pgoff, u8 *csum, const u8 * const csum_expected)
+int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum,
+ const u8 * const csum_expected)
{
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
- char *kaddr;
-
- ASSERT(pgoff + fs_info->sectorsize <= PAGE_SIZE);
shash->tfm = fs_info->csum_shash;
-
- kaddr = kmap_local_page(page) + pgoff;
crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
- kunmap_local(kaddr);
if (memcmp(csum, csum_expected, fs_info->csum_size))
return -EIO;
@@ -3379,6 +3364,7 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
u64 end = file_offset + bv->bv_len - 1;
u8 *csum_expected;
u8 csum[BTRFS_CSUM_SIZE];
+ void *kaddr;
ASSERT(bv->bv_len == fs_info->sectorsize);
@@ -3386,19 +3372,22 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
return true;
if (btrfs_is_data_reloc_root(inode->root) &&
- test_range_bit(&inode->io_tree, file_offset, end, EXTENT_NODATASUM,
- NULL)) {
+ btrfs_test_range_bit(&inode->io_tree, file_offset, end, EXTENT_NODATASUM,
+ NULL)) {
/* Skip the range without csum for data reloc inode */
- clear_extent_bits(&inode->io_tree, file_offset, end,
- EXTENT_NODATASUM);
+ btrfs_clear_extent_bits(&inode->io_tree, file_offset, end,
+ EXTENT_NODATASUM);
return true;
}
csum_expected = bbio->csum + (bio_offset >> fs_info->sectorsize_bits) *
fs_info->csum_size;
- if (btrfs_check_sector_csum(fs_info, bv->bv_page, bv->bv_offset, csum,
- csum_expected))
+ kaddr = bvec_kmap_local(bv);
+ if (btrfs_check_sector_csum(fs_info, kaddr, csum, csum_expected)) {
+ kunmap_local(kaddr);
goto zeroit;
+ }
+ kunmap_local(kaddr);
return true;
zeroit:
@@ -3545,7 +3534,7 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
int btrfs_orphan_cleanup(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_key key, found_key;
struct btrfs_trans_handle *trans;
@@ -3735,19 +3724,22 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
out:
if (ret)
btrfs_err(fs_info, "could not do orphan cleanup %d", ret);
- btrfs_free_path(path);
return ret;
}
/*
- * very simple check to peek ahead in the leaf looking for xattrs. If we
- * don't find any xattrs, we know there can't be any acls.
+ * Look ahead in the leaf for xattrs. If we don't find any then we know there
+ * can't be any ACLs.
+ *
+ * @leaf: the eb leaf where to search
+ * @slot: the slot the inode is in
+ * @objectid: the objectid of the inode
*
- * slot is the slot the inode is in, objectid is the objectid of the inode
+ * Return true if there is xattr/ACL, false otherwise.
*/
-static noinline int acls_after_inode_item(struct extent_buffer *leaf,
- int slot, u64 objectid,
- int *first_xattr_slot)
+static noinline bool acls_after_inode_item(struct extent_buffer *leaf,
+ int slot, u64 objectid,
+ int *first_xattr_slot)
{
u32 nritems = btrfs_header_nritems(leaf);
struct btrfs_key found_key;
@@ -3767,45 +3759,50 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
while (slot < nritems) {
btrfs_item_key_to_cpu(leaf, &found_key, slot);
- /* we found a different objectid, there must not be acls */
+ /* We found a different objectid, there must be no ACLs. */
if (found_key.objectid != objectid)
- return 0;
+ return false;
- /* we found an xattr, assume we've got an acl */
+ /* We found an xattr, assume we've got an ACL. */
if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
if (*first_xattr_slot == -1)
*first_xattr_slot = slot;
if (found_key.offset == xattr_access ||
found_key.offset == xattr_default)
- return 1;
+ return true;
}
/*
- * we found a key greater than an xattr key, there can't
- * be any acls later on
+ * We found a key greater than an xattr key, there can't be any
+ * ACLs later on.
*/
if (found_key.type > BTRFS_XATTR_ITEM_KEY)
- return 0;
+ return false;
slot++;
scanned++;
/*
- * it goes inode, inode backrefs, xattrs, extents,
- * so if there are a ton of hard links to an inode there can
- * be a lot of backrefs. Don't waste time searching too hard,
- * this is just an optimization
+ * The item order goes like:
+ * - inode
+ * - inode backrefs
+ * - xattrs
+ * - extents,
+ *
+ * so if there are lots of hard links to an inode there can be
+ * a lot of backrefs. Don't waste time searching too hard,
+ * this is just an optimization.
*/
if (scanned >= 8)
break;
}
- /* we hit the end of the leaf before we found an xattr or
- * something larger than an xattr. We have to assume the inode
- * has acls
+ /*
+ * We hit the end of the leaf before we found an xattr or something
+ * larger than an xattr. We have to assume the inode has ACLs.
*/
if (*first_xattr_slot == -1)
*first_xattr_slot = slot;
- return 1;
+ return true;
}
static int btrfs_init_file_extent_tree(struct btrfs_inode *inode)
@@ -3825,7 +3822,8 @@ static int btrfs_init_file_extent_tree(struct btrfs_inode *inode)
if (!inode->file_extent_tree)
return -ENOMEM;
- extent_io_tree_init(fs_info, inode->file_extent_tree, IO_TREE_INODE_FILE_EXTENT);
+ btrfs_extent_io_tree_init(fs_info, inode->file_extent_tree,
+ IO_TREE_INODE_FILE_EXTENT);
/* Lockdep class is set only for the file extent tree. */
lockdep_set_class(&inode->file_extent_tree->lock, &file_extent_tree_class);
@@ -4128,7 +4126,7 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
struct btrfs_inode_item *inode_item;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_key key;
int ret;
@@ -4142,7 +4140,7 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
if (ret) {
if (ret > 0)
ret = -ENOENT;
- goto failed;
+ return ret;
}
leaf = path->nodes[0];
@@ -4151,10 +4149,7 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
fill_inode_item(trans, leaf, inode_item, &inode->vfs_inode);
btrfs_set_inode_last_trans(trans, inode);
- ret = 0;
-failed:
- btrfs_free_path(path);
- return ret;
+ return 0;
}
/*
@@ -4488,7 +4483,7 @@ out:
static noinline int may_destroy_subvol(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_dir_item *di;
struct btrfs_key key;
struct fscrypt_str name = FSTR_INIT("default", 7);
@@ -4510,7 +4505,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
btrfs_err(fs_info,
"deleting default subvolume %llu is not allowed",
key.objectid);
- goto out;
+ return ret;
}
btrfs_release_path(path);
}
@@ -4521,14 +4516,13 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0)
- goto out;
+ return ret;
if (ret == 0) {
/*
* Key with offset -1 found, there would have to exist a root
* with such id, but this is out of valid range.
*/
- ret = -EUCLEAN;
- goto out;
+ return -EUCLEAN;
}
ret = 0;
@@ -4538,8 +4532,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
if (key.objectid == btrfs_root_id(root) && key.type == BTRFS_ROOT_REF_KEY)
ret = -ENOTEMPTY;
}
-out:
- btrfs_free_path(path);
+
return ret;
}
@@ -4782,20 +4775,80 @@ out_notrans:
return ret;
}
+static bool is_inside_block(u64 bytenr, u64 blockstart, u32 blocksize)
+{
+ ASSERT(IS_ALIGNED(blockstart, blocksize), "blockstart=%llu blocksize=%u",
+ blockstart, blocksize);
+
+ if (blockstart <= bytenr && bytenr <= blockstart + blocksize - 1)
+ return true;
+ return false;
+}
+
+static int truncate_block_zero_beyond_eof(struct btrfs_inode *inode, u64 start)
+{
+ const pgoff_t index = (start >> PAGE_SHIFT);
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
+ struct folio *folio;
+ u64 zero_start;
+ u64 zero_end;
+ int ret = 0;
+
+again:
+ folio = filemap_lock_folio(mapping, index);
+ /* No folio present. */
+ if (IS_ERR(folio))
+ return 0;
+
+ if (!folio_test_uptodate(folio)) {
+ ret = btrfs_read_folio(NULL, folio);
+ folio_lock(folio);
+ if (folio->mapping != mapping) {
+ folio_unlock(folio);
+ folio_put(folio);
+ goto again;
+ }
+ if (!folio_test_uptodate(folio)) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+ }
+ folio_wait_writeback(folio);
+
+ /*
+ * We do not need to lock extents nor wait for OE, as it's already
+ * beyond EOF.
+ */
+
+ zero_start = max_t(u64, folio_pos(folio), start);
+ zero_end = folio_pos(folio) + folio_size(folio) - 1;
+ folio_zero_range(folio, zero_start - folio_pos(folio),
+ zero_end - zero_start + 1);
+
+out_unlock:
+ folio_unlock(folio);
+ folio_put(folio);
+ return ret;
+}
+
/*
- * Read, zero a chunk and write a block.
+ * Handle the truncation of a fs block.
*
- * @inode - inode that we're zeroing
- * @from - the offset to start zeroing
- * @len - the length to zero, 0 to zero the entire range respective to the
- * offset
- * @front - zero up to the offset instead of from the offset on
+ * @inode - inode that we're zeroing
+ * @offset - the file offset of the block to truncate
+ * The value must be inside [@start, @end], and the function will do
+ * extra checks if the block that covers @offset needs to be zeroed.
+ * @start - the start file offset of the range we want to zero
+ * @end - the end (inclusive) file offset of the range we want to zero.
*
- * This will find the block for the "from" offset and cow the block and zero the
- * part we want to zero. This is used with truncate and hole punching.
+ * If the range is not block aligned, read out the folio that covers @offset,
+ * and if needed zero blocks that are inside the folio and covered by [@start, @end).
+ * If @start or @end + 1 lands inside a block, that block will be marked dirty
+ * for writeback.
+ *
+ * This is utilized by hole punch, zero range, file expansion.
*/
-int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
- int front)
+int btrfs_truncate_block(struct btrfs_inode *inode, u64 offset, u64 start, u64 end)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct address_space *mapping = inode->vfs_inode.i_mapping;
@@ -4805,20 +4858,56 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
struct extent_changeset *data_reserved = NULL;
bool only_release_metadata = false;
u32 blocksize = fs_info->sectorsize;
- pgoff_t index = from >> PAGE_SHIFT;
- unsigned offset = from & (blocksize - 1);
+ pgoff_t index = (offset >> PAGE_SHIFT);
struct folio *folio;
gfp_t mask = btrfs_alloc_write_mask(mapping);
size_t write_bytes = blocksize;
int ret = 0;
+ const bool in_head_block = is_inside_block(offset, round_down(start, blocksize),
+ blocksize);
+ const bool in_tail_block = is_inside_block(offset, round_down(end, blocksize),
+ blocksize);
+ bool need_truncate_head = false;
+ bool need_truncate_tail = false;
+ u64 zero_start;
+ u64 zero_end;
u64 block_start;
u64 block_end;
- if (IS_ALIGNED(offset, blocksize) &&
- (!len || IS_ALIGNED(len, blocksize)))
+ /* @offset should be inside the range. */
+ ASSERT(start <= offset && offset <= end, "offset=%llu start=%llu end=%llu",
+ offset, start, end);
+
+ /* The range is aligned at both ends. */
+ if (IS_ALIGNED(start, blocksize) && IS_ALIGNED(end + 1, blocksize)) {
+ /*
+ * For block size < page size case, we may have polluted blocks
+ * beyond EOF. So we also need to zero them out.
+ */
+ if (end == (u64)-1 && blocksize < PAGE_SIZE)
+ ret = truncate_block_zero_beyond_eof(inode, start);
+ goto out;
+ }
+
+ /*
+ * @offset may not be inside the head nor tail block. In that case we
+ * don't need to do anything.
+ */
+ if (!in_head_block && !in_tail_block)
+ goto out;
+
+ /*
+ * Skip the truncatioin if the range in the target block is already aligned.
+ * The seemingly complex check will also handle the same block case.
+ */
+ if (in_head_block && !IS_ALIGNED(start, blocksize))
+ need_truncate_head = true;
+ if (in_tail_block && !IS_ALIGNED(end + 1, blocksize))
+ need_truncate_tail = true;
+ if (!need_truncate_head && !need_truncate_tail)
goto out;
- block_start = round_down(from, blocksize);
+ block_start = round_down(offset, blocksize);
block_end = block_start + blocksize - 1;
ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
@@ -4842,10 +4931,13 @@ again:
folio = __filemap_get_folio(mapping, index,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mask);
if (IS_ERR(folio)) {
- btrfs_delalloc_release_space(inode, data_reserved, block_start,
- blocksize, true);
+ if (only_release_metadata)
+ btrfs_delalloc_release_metadata(inode, blocksize, true);
+ else
+ btrfs_delalloc_release_space(inode, data_reserved,
+ block_start, blocksize, true);
btrfs_delalloc_release_extents(inode, blocksize);
- ret = -ENOMEM;
+ ret = PTR_ERR(folio);
goto out;
}
@@ -4875,11 +4967,11 @@ again:
folio_wait_writeback(folio);
- lock_extent(io_tree, block_start, block_end, &cached_state);
+ btrfs_lock_extent(io_tree, block_start, block_end, &cached_state);
ordered = btrfs_lookup_ordered_extent(inode, block_start);
if (ordered) {
- unlock_extent(io_tree, block_start, block_end, &cached_state);
+ btrfs_unlock_extent(io_tree, block_start, block_end, &cached_state);
folio_unlock(folio);
folio_put(folio);
btrfs_start_ordered_extent(ordered);
@@ -4887,37 +4979,46 @@ again:
goto again;
}
- clear_extent_bit(&inode->io_tree, block_start, block_end,
- EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
- &cached_state);
+ btrfs_clear_extent_bit(&inode->io_tree, block_start, block_end,
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
+ &cached_state);
ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
&cached_state);
if (ret) {
- unlock_extent(io_tree, block_start, block_end, &cached_state);
+ btrfs_unlock_extent(io_tree, block_start, block_end, &cached_state);
goto out_unlock;
}
- if (offset != blocksize) {
- if (!len)
- len = blocksize - offset;
- if (front)
- folio_zero_range(folio, block_start - folio_pos(folio),
- offset);
- else
- folio_zero_range(folio,
- (block_start - folio_pos(folio)) + offset,
- len);
+ if (end == (u64)-1) {
+ /*
+ * We're truncating beyond EOF, the remaining blocks normally are
+ * already holes thus no need to zero again, but it's possible for
+ * fs block size < page size cases to have memory mapped writes
+ * to pollute ranges beyond EOF.
+ *
+ * In that case although such polluted blocks beyond EOF will
+ * not reach disk, it still affects our page caches.
+ */
+ zero_start = max_t(u64, folio_pos(folio), start);
+ zero_end = min_t(u64, folio_pos(folio) + folio_size(folio) - 1,
+ end);
+ } else {
+ zero_start = max_t(u64, block_start, start);
+ zero_end = min_t(u64, block_end, end);
}
+ folio_zero_range(folio, zero_start - folio_pos(folio),
+ zero_end - zero_start + 1);
+
btrfs_folio_clear_checked(fs_info, folio, block_start,
block_end + 1 - block_start);
btrfs_folio_set_dirty(fs_info, folio, block_start,
block_end + 1 - block_start);
- unlock_extent(io_tree, block_start, block_end, &cached_state);
+ btrfs_unlock_extent(io_tree, block_start, block_end, &cached_state);
if (only_release_metadata)
- set_extent_bit(&inode->io_tree, block_start, block_end,
- EXTENT_NORESERVE, NULL);
+ btrfs_set_extent_bit(&inode->io_tree, block_start, block_end,
+ EXTENT_NORESERVE, NULL);
out_unlock:
if (ret) {
@@ -5010,7 +5111,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
* rest of the block before we expand the i_size, otherwise we could
* expose stale data.
*/
- ret = btrfs_truncate_block(inode, oldsize, 0, 0);
+ ret = btrfs_truncate_block(inode, oldsize, oldsize, -1);
if (ret)
return ret;
@@ -5027,7 +5128,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
em = NULL;
break;
}
- last_byte = min(extent_map_end(em), block_end);
+ last_byte = min(btrfs_extent_map_end(em), block_end);
last_byte = ALIGN(last_byte, fs_info->sectorsize);
hole_size = last_byte - cur_offset;
@@ -5043,7 +5144,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
if (ret)
break;
- hole_em = alloc_extent_map();
+ hole_em = btrfs_alloc_extent_map();
if (!hole_em) {
btrfs_drop_extent_map_range(inode, cur_offset,
cur_offset + hole_size - 1,
@@ -5060,7 +5161,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
hole_em->generation = btrfs_get_fs_generation(fs_info);
ret = btrfs_replace_extent_map_range(inode, hole_em, true);
- free_extent_map(hole_em);
+ btrfs_free_extent_map(hole_em);
} else {
ret = btrfs_inode_set_file_extent_range(inode,
cur_offset, hole_size);
@@ -5068,14 +5169,14 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
break;
}
next:
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = NULL;
cur_offset = last_byte;
if (cur_offset >= block_end)
break;
}
- free_extent_map(em);
- unlock_extent(io_tree, hole_start, block_end - 1, &cached_state);
+ btrfs_free_extent_map(em);
+ btrfs_unlock_extent(io_tree, hole_start, block_end - 1, &cached_state);
return ret;
}
@@ -5259,7 +5360,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
state_flags = state->state;
spin_unlock(&io_tree->lock);
- lock_extent(io_tree, start, end, &cached_state);
+ btrfs_lock_extent(io_tree, start, end, &cached_state);
/*
* If still has DELALLOC flag, the extent didn't reach disk,
@@ -5273,9 +5374,9 @@ static void evict_inode_truncate_pages(struct inode *inode)
btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
end - start + 1, NULL);
- clear_extent_bit(io_tree, start, end,
- EXTENT_CLEAR_ALL_BITS | EXTENT_DO_ACCOUNTING,
- &cached_state);
+ btrfs_clear_extent_bit(io_tree, start, end,
+ EXTENT_CLEAR_ALL_BITS | EXTENT_DO_ACCOUNTING,
+ &cached_state);
cond_resched();
spin_lock(&io_tree->lock);
@@ -5461,7 +5562,7 @@ static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry,
struct btrfs_key *location, u8 *type)
{
struct btrfs_dir_item *di;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root *root = dir->root;
int ret = 0;
struct fscrypt_name fname;
@@ -5472,7 +5573,7 @@ static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry,
ret = fscrypt_setup_filename(&dir->vfs_inode, &dentry->d_name, 1, &fname);
if (ret < 0)
- goto out;
+ return ret;
/*
* fscrypt_setup_filename() should never return a positive value, but
* gcc on sparc/parisc thinks it can, so assert that doesn't happen.
@@ -5501,7 +5602,6 @@ static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry,
*type = btrfs_dir_ftype(path->nodes[0], di);
out:
fscrypt_free_filename(&fname);
- btrfs_free_path(path);
return ret;
}
@@ -5516,7 +5616,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
struct btrfs_key *location,
struct btrfs_root **sub_root)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root *new_root;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
@@ -5572,7 +5672,6 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
location->offset = 0;
err = 0;
out:
- btrfs_free_path(path);
fscrypt_free_filename(&fname);
return err;
}
@@ -5851,7 +5950,7 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
{
struct btrfs_root *root = inode->root;
struct btrfs_key key, found_key;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
int ret;
@@ -5865,15 +5964,14 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
- goto out;
+ return ret;
/* FIXME: we should be able to handle this */
if (ret == 0)
- goto out;
- ret = 0;
+ return ret;
if (path->slots[0] == 0) {
inode->index_cnt = BTRFS_DIR_START_INDEX;
- goto out;
+ return 0;
}
path->slots[0]--;
@@ -5884,13 +5982,12 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
if (found_key.objectid != btrfs_ino(inode) ||
found_key.type != BTRFS_DIR_INDEX_KEY) {
inode->index_cnt = BTRFS_DIR_START_INDEX;
- goto out;
+ return 0;
}
inode->index_cnt = found_key.offset + 1;
-out:
- btrfs_free_path(path);
- return ret;
+
+ return 0;
}
static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
@@ -5993,7 +6090,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
struct btrfs_dir_item *di;
struct btrfs_key key;
struct btrfs_key found_key;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
void *addr;
LIST_HEAD(ins_list);
LIST_HEAD(del_list);
@@ -6106,7 +6203,6 @@ nopos:
err:
if (put)
btrfs_readdir_put_delayed_items(BTRFS_I(inode), &ins_list, &del_list);
- btrfs_free_path(path);
return ret;
}
@@ -6896,18 +6992,18 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
struct extent_map_tree *em_tree = &inode->extent_tree;
read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, len);
+ em = btrfs_lookup_extent_mapping(em_tree, start, len);
read_unlock(&em_tree->lock);
if (em) {
if (em->start > start || em->start + em->len <= start)
- free_extent_map(em);
+ btrfs_free_extent_map(em);
else if (em->disk_bytenr == EXTENT_MAP_INLINE && folio)
- free_extent_map(em);
+ btrfs_free_extent_map(em);
else
goto out;
}
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
ret = -ENOMEM;
goto out;
@@ -7044,7 +7140,7 @@ not_found:
insert:
ret = 0;
btrfs_release_path(path);
- if (em->start > start || extent_map_end(em) <= start) {
+ if (em->start > start || btrfs_extent_map_end(em) <= start) {
btrfs_err(fs_info,
"bad extent! em: [%llu %llu] passed [%llu %llu]",
em->start, em->len, start, len);
@@ -7061,7 +7157,7 @@ out:
trace_btrfs_get_extent(root, inode, em);
if (ret) {
- free_extent_map(em);
+ btrfs_free_extent_map(em);
return ERR_PTR(ret);
}
return em;
@@ -7105,7 +7201,7 @@ noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len,
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct can_nocow_file_extent_args nocow_args = { 0 };
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int ret;
struct extent_buffer *leaf;
struct extent_io_tree *io_tree = &inode->io_tree;
@@ -7121,13 +7217,12 @@ noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len,
ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
offset, 0);
if (ret < 0)
- goto out;
+ return ret;
if (ret == 1) {
if (path->slots[0] == 0) {
- /* can't find the item, must cow */
- ret = 0;
- goto out;
+ /* Can't find the item, must COW. */
+ return 0;
}
path->slots[0]--;
}
@@ -7136,17 +7231,17 @@ noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len,
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != btrfs_ino(inode) ||
key.type != BTRFS_EXTENT_DATA_KEY) {
- /* not our file or wrong item type, must cow */
- goto out;
+ /* Not our file or wrong item type, must COW. */
+ return 0;
}
if (key.offset > offset) {
- /* Wrong offset, must cow */
- goto out;
+ /* Wrong offset, must COW. */
+ return 0;
}
if (btrfs_file_extent_end(path) <= offset)
- goto out;
+ return 0;
fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
found_type = btrfs_file_extent_type(leaf, fi);
@@ -7161,15 +7256,13 @@ noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len,
if (ret != 1) {
/* Treat errors as not being able to NOCOW. */
- ret = 0;
- goto out;
+ return 0;
}
- ret = 0;
if (btrfs_extent_readonly(fs_info,
nocow_args.file_extent.disk_bytenr +
nocow_args.file_extent.offset))
- goto out;
+ return 0;
if (!(inode->flags & BTRFS_INODE_NODATACOW) &&
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
@@ -7177,21 +7270,18 @@ noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len,
range_end = round_up(offset + nocow_args.file_extent.num_bytes,
root->fs_info->sectorsize) - 1;
- ret = test_range_bit_exists(io_tree, offset, range_end, EXTENT_DELALLOC);
- if (ret) {
- ret = -EAGAIN;
- goto out;
- }
+ ret = btrfs_test_range_bit_exists(io_tree, offset, range_end,
+ EXTENT_DELALLOC);
+ if (ret)
+ return -EAGAIN;
}
if (file_extent)
memcpy(file_extent, &nocow_args.file_extent, sizeof(*file_extent));
*len = nocow_args.file_extent.num_bytes;
- ret = 1;
-out:
- btrfs_free_path(path);
- return ret;
+
+ return 1;
}
/* The callers of this must take lock_extent() */
@@ -7239,7 +7329,7 @@ struct extent_map *btrfs_create_io_em(struct btrfs_inode *inode, u64 start,
break;
}
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em)
return ERR_PTR(-ENOMEM);
@@ -7252,15 +7342,15 @@ struct extent_map *btrfs_create_io_em(struct btrfs_inode *inode, u64 start,
em->offset = file_extent->offset;
em->flags |= EXTENT_FLAG_PINNED;
if (type == BTRFS_ORDERED_COMPRESSED)
- extent_map_set_compression(em, file_extent->compression);
+ btrfs_extent_map_set_compression(em, file_extent->compression);
ret = btrfs_replace_extent_map_range(inode, em, true);
if (ret) {
- free_extent_map(em);
+ btrfs_free_extent_map(em);
return ERR_PTR(ret);
}
- /* em got 2 refs now, callers needs to do free_extent_map once. */
+ /* em got 2 refs now, callers needs to do btrfs_free_extent_map once. */
return em;
}
@@ -7387,7 +7477,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
}
if (!inode_evicting)
- lock_extent(tree, page_start, page_end, &cached_state);
+ btrfs_lock_extent(tree, page_start, page_end, &cached_state);
cur = page_start;
while (cur < page_end) {
@@ -7443,10 +7533,10 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
* btrfs_finish_ordered_io().
*/
if (!inode_evicting)
- clear_extent_bit(tree, cur, range_end,
- EXTENT_DELALLOC |
- EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, &cached_state);
+ btrfs_clear_extent_bit(tree, cur, range_end,
+ EXTENT_DELALLOC |
+ EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, &cached_state);
spin_lock_irq(&inode->ordered_tree_lock);
set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
@@ -7488,12 +7578,11 @@ next:
* Since the IO will never happen for this page.
*/
btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur, NULL);
- if (!inode_evicting) {
- clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
- EXTENT_DELALLOC | EXTENT_UPTODATE |
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG |
- extra_flags, &cached_state);
- }
+ if (!inode_evicting)
+ btrfs_clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG | extra_flags,
+ &cached_state);
cur = range_end + 1;
}
/*
@@ -7597,7 +7686,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize);
control.new_size = new_size;
- lock_extent(&inode->io_tree, lock_start, (u64)-1, &cached_state);
+ btrfs_lock_extent(&inode->io_tree, lock_start, (u64)-1, &cached_state);
/*
* We want to drop from the next block forward in case this new
* size is not block aligned since we will be keeping the last
@@ -7612,7 +7701,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
inode_sub_bytes(&inode->vfs_inode, control.sub_bytes);
btrfs_inode_safe_disk_i_size_write(inode, control.last_size);
- unlock_extent(&inode->io_tree, lock_start, (u64)-1, &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, lock_start, (u64)-1, &cached_state);
trans->block_rsv = &fs_info->trans_block_rsv;
if (ret != -ENOSPC && ret != -EAGAIN)
@@ -7656,7 +7745,8 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
- ret = btrfs_truncate_block(inode, inode->vfs_inode.i_size, 0, 0);
+ ret = btrfs_truncate_block(inode, inode->vfs_inode.i_size,
+ inode->vfs_inode.i_size, (u64)-1);
if (ret)
goto out;
trans = btrfs_start_transaction(root, 1);
@@ -7768,10 +7858,10 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->i_otime_nsec = 0;
inode = &ei->vfs_inode;
- extent_map_tree_init(&ei->extent_tree);
+ btrfs_extent_map_tree_init(&ei->extent_tree);
/* This io tree sets the valid inode. */
- extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO);
+ btrfs_extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO);
ei->io_tree.inode = ei;
ei->file_extent_tree = NULL;
@@ -8548,7 +8638,7 @@ static int start_delalloc_inodes(struct btrfs_root *root,
struct btrfs_inode *inode;
struct inode *tmp_inode;
- inode = list_entry(splice.next, struct btrfs_inode, delalloc_inodes);
+ inode = list_first_entry(&splice, struct btrfs_inode, delalloc_inodes);
list_move_tail(&inode->delalloc_inodes, &root->delalloc_inodes);
@@ -8912,11 +9002,11 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
btrfs_free_reserved_extent(fs_info, ins.objectid,
- ins.offset, 0);
+ ins.offset, false);
break;
}
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
btrfs_drop_extent_map_range(BTRFS_I(inode), cur_offset,
cur_offset + ins.offset - 1, false);
@@ -8934,7 +9024,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
em->generation = trans->transid;
ret = btrfs_replace_extent_map_range(BTRFS_I(inode), em, true);
- free_extent_map(em);
+ btrfs_free_extent_map(em);
next:
num_bytes -= ins.offset;
cur_offset += ins.offset;
@@ -9106,7 +9196,7 @@ static ssize_t btrfs_encoded_read_inline(
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_io_tree *io_tree = &inode->io_tree;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_file_extent_item *item;
u64 ram_bytes;
@@ -9116,10 +9206,8 @@ static ssize_t btrfs_encoded_read_inline(
const bool nowait = (iocb->ki_flags & IOCB_NOWAIT);
path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!path)
+ return -ENOMEM;
path->nowait = nowait;
@@ -9128,9 +9216,9 @@ static ssize_t btrfs_encoded_read_inline(
if (ret) {
if (ret > 0) {
/* The extent item disappeared? */
- ret = -EIO;
+ return -EIO;
}
- goto out;
+ return ret;
}
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
@@ -9143,17 +9231,16 @@ static ssize_t btrfs_encoded_read_inline(
ret = btrfs_encoded_io_compression_from_extent(fs_info,
btrfs_file_extent_compression(leaf, item));
if (ret < 0)
- goto out;
+ return ret;
encoded->compression = ret;
if (encoded->compression) {
size_t inline_size;
inline_size = btrfs_file_extent_inline_item_len(leaf,
path->slots[0]);
- if (inline_size > count) {
- ret = -ENOBUFS;
- goto out;
- }
+ if (inline_size > count)
+ return -ENOBUFS;
+
count = inline_size;
encoded->unencoded_len = ram_bytes;
encoded->unencoded_offset = iocb->ki_pos - extent_start;
@@ -9165,13 +9252,12 @@ static ssize_t btrfs_encoded_read_inline(
}
tmp = kmalloc(count, GFP_NOFS);
- if (!tmp) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!tmp)
+ return -ENOMEM;
+
read_extent_buffer(leaf, tmp, ptr, count);
btrfs_release_path(path);
- unlock_extent(io_tree, start, lockend, cached_state);
+ btrfs_unlock_extent(io_tree, start, lockend, cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
*unlocked = true;
@@ -9179,8 +9265,7 @@ static ssize_t btrfs_encoded_read_inline(
if (ret != count)
ret = -EFAULT;
kfree(tmp);
-out:
- btrfs_free_path(path);
+
return ret;
}
@@ -9320,7 +9405,7 @@ ssize_t btrfs_encoded_read_regular(struct kiocb *iocb, struct iov_iter *iter,
if (ret)
goto out;
- unlock_extent(io_tree, start, lockend, cached_state);
+ btrfs_unlock_extent(io_tree, start, lockend, cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
*unlocked = true;
@@ -9397,7 +9482,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
goto out_unlock_inode;
}
- if (!try_lock_extent(io_tree, start, lockend, cached_state)) {
+ if (!btrfs_try_lock_extent(io_tree, start, lockend, cached_state)) {
ret = -EAGAIN;
goto out_unlock_inode;
}
@@ -9406,7 +9491,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
lockend - start + 1);
if (ordered) {
btrfs_put_ordered_extent(ordered);
- unlock_extent(io_tree, start, lockend, cached_state);
+ btrfs_unlock_extent(io_tree, start, lockend, cached_state);
ret = -EAGAIN;
goto out_unlock_inode;
}
@@ -9419,13 +9504,13 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
if (ret)
goto out_unlock_inode;
- lock_extent(io_tree, start, lockend, cached_state);
+ btrfs_lock_extent(io_tree, start, lockend, cached_state);
ordered = btrfs_lookup_ordered_range(inode, start,
lockend - start + 1);
if (!ordered)
break;
btrfs_put_ordered_extent(ordered);
- unlock_extent(io_tree, start, lockend, cached_state);
+ btrfs_unlock_extent(io_tree, start, lockend, cached_state);
cond_resched();
}
}
@@ -9443,7 +9528,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
* For inline extents we get everything we need out of the
* extent item.
*/
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = NULL;
ret = btrfs_encoded_read_inline(iocb, iter, start, lockend,
cached_state, extent_start,
@@ -9455,7 +9540,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
* We only want to return up to EOF even if the extent extends beyond
* that.
*/
- encoded->len = min_t(u64, extent_map_end(em),
+ encoded->len = min_t(u64, btrfs_extent_map_end(em),
inode->vfs_inode.i_size) - iocb->ki_pos;
if (em->disk_bytenr == EXTENT_MAP_HOLE ||
(em->flags & EXTENT_FLAG_PREALLOC)) {
@@ -9463,7 +9548,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
count = min_t(u64, count, encoded->len);
encoded->len = count;
encoded->unencoded_len = count;
- } else if (extent_map_is_compressed(em)) {
+ } else if (btrfs_extent_map_is_compressed(em)) {
*disk_bytenr = em->disk_bytenr;
/*
* Bail if the buffer isn't large enough to return the whole
@@ -9478,12 +9563,12 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
encoded->unencoded_len = em->ram_bytes;
encoded->unencoded_offset = iocb->ki_pos - (em->start - em->offset);
ret = btrfs_encoded_io_compression_from_extent(fs_info,
- extent_map_compression(em));
+ btrfs_extent_map_compression(em));
if (ret < 0)
goto out_em;
encoded->compression = ret;
} else {
- *disk_bytenr = extent_map_block_start(em) + (start - em->start);
+ *disk_bytenr = btrfs_extent_map_block_start(em) + (start - em->start);
if (encoded->len > count)
encoded->len = count;
/*
@@ -9496,11 +9581,11 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
encoded->unencoded_len = count;
*disk_io_size = ALIGN(*disk_io_size, fs_info->sectorsize);
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = NULL;
if (*disk_bytenr == EXTENT_MAP_HOLE) {
- unlock_extent(io_tree, start, lockend, cached_state);
+ btrfs_unlock_extent(io_tree, start, lockend, cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
unlocked = true;
ret = iov_iter_zero(count, iter);
@@ -9512,11 +9597,11 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
}
out_em:
- free_extent_map(em);
+ btrfs_free_extent_map(em);
out_unlock_extent:
/* Leave inode and extent locked if we need to do a read. */
if (!unlocked && ret != -EIOCBQUEUED)
- unlock_extent(io_tree, start, lockend, cached_state);
+ btrfs_unlock_extent(io_tree, start, lockend, cached_state);
out_unlock_inode:
if (!unlocked && ret != -EIOCBQUEUED)
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
@@ -9663,14 +9748,14 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
end >> PAGE_SHIFT);
if (ret)
goto out_folios;
- lock_extent(io_tree, start, end, &cached_state);
+ btrfs_lock_extent(io_tree, start, end, &cached_state);
ordered = btrfs_lookup_ordered_range(inode, start, num_bytes);
if (!ordered &&
!filemap_range_has_page(inode->vfs_inode.i_mapping, start, end))
break;
if (ordered)
btrfs_put_ordered_extent(ordered);
- unlock_extent(io_tree, start, end, &cached_state);
+ btrfs_unlock_extent(io_tree, start, end, &cached_state);
cond_resched();
}
@@ -9720,11 +9805,11 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
ret = PTR_ERR(em);
goto out_free_reserved;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
ordered = btrfs_alloc_ordered_extent(inode, start, &file_extent,
- (1 << BTRFS_ORDERED_ENCODED) |
- (1 << BTRFS_ORDERED_COMPRESSED));
+ (1U << BTRFS_ORDERED_ENCODED) |
+ (1U << BTRFS_ORDERED_COMPRESSED));
if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
ret = PTR_ERR(ordered);
@@ -9735,7 +9820,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
if (start + encoded->len > inode->vfs_inode.i_size)
i_size_write(&inode->vfs_inode, start + encoded->len);
- unlock_extent(io_tree, start, end, &cached_state);
+ btrfs_unlock_extent(io_tree, start, end, &cached_state);
btrfs_delalloc_release_extents(inode, num_bytes);
@@ -9745,7 +9830,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
out_free_reserved:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
- btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
+ btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, true);
out_delalloc_release:
btrfs_delalloc_release_extents(inode, num_bytes);
btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0);
@@ -9758,9 +9843,9 @@ out_free_data_space:
* bytes_may_use.
*/
if (!extent_reserved)
- btrfs_free_reserved_data_space_noquota(fs_info, disk_num_bytes);
+ btrfs_free_reserved_data_space_noquota(inode, disk_num_bytes);
out_unlock:
- unlock_extent(io_tree, start, end, &cached_state);
+ btrfs_unlock_extent(io_tree, start, end, &cached_state);
out_folios:
for (i = 0; i < nr_folios; i++) {
if (folios[i])
@@ -10025,7 +10110,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
- lock_extent(io_tree, 0, isize - 1, &cached_state);
+ btrfs_lock_extent(io_tree, 0, isize - 1, &cached_state);
while (prev_extent_end < isize) {
struct btrfs_key key;
struct extent_buffer *leaf;
@@ -10203,7 +10288,7 @@ out:
if (!IS_ERR_OR_NULL(map))
btrfs_free_chunk_map(map);
- unlock_extent(io_tree, 0, isize - 1, &cached_state);
+ btrfs_unlock_extent(io_tree, 0, isize - 1, &cached_state);
if (ret)
btrfs_swap_deactivate(file);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 63aeacc54945..913acef3f0a9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -909,7 +909,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
if (error == -EINTR)
return error;
- dentry = lookup_one(idmap, name, parent->dentry, namelen);
+ dentry = lookup_one(idmap, &QSTR_LEN(name, namelen), parent->dentry);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_unlock;
@@ -1446,8 +1446,8 @@ out:
return ret;
}
-static noinline int key_in_sk(const struct btrfs_key *key,
- const struct btrfs_ioctl_search_key *sk)
+static noinline bool key_in_sk(const struct btrfs_key *key,
+ const struct btrfs_ioctl_search_key *sk)
{
struct btrfs_key test;
int ret;
@@ -1458,7 +1458,7 @@ static noinline int key_in_sk(const struct btrfs_key *key,
ret = btrfs_comp_cpu_keys(key, &test);
if (ret < 0)
- return 0;
+ return false;
test.objectid = sk->max_objectid;
test.type = sk->max_type;
@@ -1466,8 +1466,8 @@ static noinline int key_in_sk(const struct btrfs_key *key,
ret = btrfs_comp_cpu_keys(key, &test);
if (ret > 0)
- return 0;
- return 1;
+ return false;
+ return true;
}
static noinline int copy_to_sk(struct btrfs_path *path,
@@ -2288,7 +2288,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
struct mnt_idmap *idmap = file_mnt_idmap(file);
char *subvol_name, *subvol_name_ptr = NULL;
- int subvol_namelen;
int ret = 0;
bool destroy_parent = false;
@@ -2411,10 +2410,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
goto out;
}
- subvol_namelen = strlen(subvol_name);
-
if (strchr(subvol_name, '/') ||
- strncmp(subvol_name, "..", subvol_namelen) == 0) {
+ strcmp(subvol_name, "..") == 0) {
ret = -EINVAL;
goto free_subvol_name;
}
@@ -2427,7 +2424,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
if (ret == -EINTR)
goto free_subvol_name;
- dentry = lookup_one(idmap, subvol_name, parent, subvol_namelen);
+ dentry = lookup_one(idmap, &QSTR(subvol_name), parent);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto out_unlock_dir;
@@ -4510,7 +4507,7 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
args.compression, &unlocked);
if (!unlocked) {
- unlock_extent(io_tree, start, lockend, &cached_state);
+ btrfs_unlock_extent(io_tree, start, lockend, &cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
}
}
@@ -4699,7 +4696,7 @@ static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int iss
ret = priv->count;
out:
- unlock_extent(io_tree, priv->start, priv->lockend, &priv->cached_state);
+ btrfs_unlock_extent(io_tree, priv->start, priv->lockend, &priv->cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
io_uring_cmd_done(cmd, ret, 0, issue_flags);
@@ -4788,7 +4785,7 @@ static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter,
return -EIOCBQUEUED;
out_fail:
- unlock_extent(io_tree, start, lockend, &cached_state);
+ btrfs_unlock_extent(io_tree, start, lockend, &cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
kfree(priv);
return ret;
@@ -4913,7 +4910,7 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue
(const char *)&data->args + copy_end_kernel,
sizeof(data->args) - copy_end_kernel)) {
if (ret == -EIOCBQUEUED) {
- unlock_extent(io_tree, start, lockend, &cached_state);
+ btrfs_unlock_extent(io_tree, start, lockend, &cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
}
ret = -EFAULT;
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 81e62b652e21..a3e6d9616e60 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -149,15 +149,15 @@ void btrfs_tree_read_lock_nested(struct extent_buffer *eb, enum btrfs_lock_nesti
/*
* Try-lock for read.
*
- * Return 1 if the rwlock has been taken, 0 otherwise
+ * Return true if the rwlock has been taken, false otherwise
*/
-int btrfs_try_tree_read_lock(struct extent_buffer *eb)
+bool btrfs_try_tree_read_lock(struct extent_buffer *eb)
{
if (down_read_trylock(&eb->lock)) {
trace_btrfs_try_tree_read_lock(eb);
- return 1;
+ return true;
}
- return 0;
+ return false;
}
/*
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index c69e57ff804b..af29df98ac14 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -189,7 +189,7 @@ static inline void btrfs_tree_read_lock(struct extent_buffer *eb)
}
void btrfs_tree_read_unlock(struct extent_buffer *eb);
-int btrfs_try_tree_read_lock(struct extent_buffer *eb);
+bool btrfs_try_tree_read_lock(struct extent_buffer *eb);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index a45bc11f8665..d403641889ca 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -252,9 +252,8 @@ int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
/* Compress at most one sector of data each time */
in_len = min_t(u32, start + len - cur_in, sectorsize - sector_off);
ASSERT(in_len);
- data_in = kmap_local_folio(folio_in, 0);
- ret = lzo1x_1_compress(data_in +
- offset_in_page(cur_in), in_len,
+ data_in = kmap_local_folio(folio_in, offset_in_folio(folio_in, cur_in));
+ ret = lzo1x_1_compress(data_in, in_len,
workspace->cbuf, &out_len,
workspace->mem);
kunmap_local(data_in);
diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h
index 08a9272399d2..6abf81bb00c2 100644
--- a/fs/btrfs/messages.h
+++ b/fs/btrfs/messages.h
@@ -4,6 +4,7 @@
#define BTRFS_MESSAGES_H
#include <linux/types.h>
+#include <linux/types.h>
#include <linux/printk.h>
#include <linux/bug.h>
@@ -170,15 +171,83 @@ do { \
#ifdef CONFIG_BTRFS_ASSERT
-#define btrfs_assertfail(expr, file, line) ({ \
- pr_err("assertion failed: %s, in %s:%d\n", (expr), (file), (line)); \
- BUG(); \
-})
+__printf(1, 2)
+static inline void verify_assert_printk_format(const char *fmt, ...) {
+ /* Stub to verify the assertion format string. */
+}
+
+/* Take the first token if any. */
+#define __FIRST_ARG(_, ...) _
+/*
+ * Skip the first token and return the rest, if it's empty the comma is dropped.
+ * As ##__VA_ARGS__ cannot be at the beginning of the macro the __VA_OPT__ is needed
+ * and supported since GCC 8 and Clang 12.
+ */
+#define __REST_ARGS(_, ... ) __VA_OPT__(,) __VA_ARGS__
+
+#if defined(CONFIG_CC_IS_CLANG) || GCC_VERSION >= 80000
+/*
+ * Assertion with optional printk() format.
+ *
+ * Accepted syntax:
+ * ASSERT(condition);
+ * ASSERT(condition, "string");
+ * ASSERT(condition, "variable=%d", variable);
+ *
+ * How it works:
+ * - if there's no format string, ""[0] evaluates at compile time to 0 and the
+ * true branch is executed
+ * - any non-empty format string with the "" prefix evaluates to != 0 at
+ * compile time and the false branch is executed
+ * - stringified condition is printed as %s so we don't accidentally mix format
+ * strings (the % operator)
+ * - there can be only one printk() call, so the format strings and arguments are
+ * spliced together:
+ * DEFAULT_FMT [USER_FMT], DEFAULT_ARGS [, USER_ARGS]
+ * - comma between DEFAULT_ARGS and USER_ARGS is handled by preprocessor
+ * (requires __VA_OPT__ support)
+ * - otherwise we could use __VA_OPT(,) __VA_ARGS__ for the 2nd+ argument of args,
+ */
+#define ASSERT(cond, args...) \
+do { \
+ verify_assert_printk_format("check the format string" args); \
+ if (!likely(cond)) { \
+ if (("" __FIRST_ARG(args) [0]) == 0) { \
+ pr_err("assertion failed: %s :: %ld, in %s:%d\n", \
+ #cond, (long)(cond), __FILE__, __LINE__); \
+ } else { \
+ pr_err("assertion failed: %s :: %ld, in %s:%d (" __FIRST_ARG(args) ")\n", \
+ #cond, (long)(cond), __FILE__, __LINE__ __REST_ARGS(args)); \
+ } \
+ BUG(); \
+ } \
+} while(0)
+
+#else
+
+/* For GCC < 8.x only the simple output. */
+
+#define ASSERT(cond, args...) \
+do { \
+ verify_assert_printk_format("check the format string" args); \
+ if (!likely(cond)) { \
+ pr_err("assertion failed: %s :: %ld, in %s:%d\n", \
+ #cond, (long)(cond), __FILE__, __LINE__); \
+ BUG(); \
+ } \
+} while(0)
+
+#endif
+
+#else
+#define ASSERT(cond, args...) (void)(cond)
+#endif
-#define ASSERT(expr) \
- (likely(expr) ? (void)0 : btrfs_assertfail(#expr, __FILE__, __LINE__))
+#ifdef CONFIG_BTRFS_DEBUG
+/* Verbose warning only under debug build. */
+#define DEBUG_WARN(args...) WARN(1, KERN_ERR args)
#else
-#define ASSERT(expr) (void)(expr)
+#define DEBUG_WARN(...) do {} while(0)
#endif
__printf(5, 6)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 03c945711003..9212ce110cde 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -153,25 +153,30 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
struct btrfs_ordered_extent *entry;
int ret;
u64 qgroup_rsv = 0;
+ const bool is_nocow = (flags &
+ ((1U << BTRFS_ORDERED_NOCOW) | (1U << BTRFS_ORDERED_PREALLOC)));
- if (flags &
- ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
- /* For nocow write, we can release the qgroup rsv right now */
+ /*
+ * For a NOCOW write we can free the qgroup reserve right now. For a COW
+ * one we transfer the reserved space from the inode's iotree into the
+ * ordered extent by calling btrfs_qgroup_release_data() and tracking
+ * the qgroup reserved amount in the ordered extent, so that later after
+ * completing the ordered extent, when running the data delayed ref it
+ * creates, we free the reserved data with btrfs_qgroup_free_refroot().
+ */
+ if (is_nocow)
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv);
- if (ret < 0)
- return ERR_PTR(ret);
- } else {
- /*
- * The ordered extent has reserved qgroup space, release now
- * and pass the reserved number for qgroup_record to free.
- */
+ else
ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes, &qgroup_rsv);
- if (ret < 0)
- return ERR_PTR(ret);
- }
+
+ if (ret < 0)
+ return ERR_PTR(ret);
+
entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
- if (!entry)
- return ERR_PTR(-ENOMEM);
+ if (!entry) {
+ entry = ERR_PTR(-ENOMEM);
+ goto out;
+ }
entry->file_offset = file_offset;
entry->num_bytes = num_bytes;
@@ -180,7 +185,12 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
entry->disk_num_bytes = disk_num_bytes;
entry->offset = offset;
entry->bytes_left = num_bytes;
- entry->inode = BTRFS_I(igrab(&inode->vfs_inode));
+ if (WARN_ON_ONCE(!igrab(&inode->vfs_inode))) {
+ kmem_cache_free(btrfs_ordered_extent_cache, entry);
+ entry = ERR_PTR(-ESTALE);
+ goto out;
+ }
+ entry->inode = inode;
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = qgroup_rsv;
@@ -203,6 +213,12 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
btrfs_mod_outstanding_extents(inode, 1);
spin_unlock(&inode->lock);
+out:
+ if (IS_ERR(entry) && !is_nocow)
+ btrfs_qgroup_free_refroot(inode->root->fs_info,
+ btrfs_root_id(inode->root),
+ qgroup_rsv, BTRFS_QGROUP_RSV_DATA);
+
return entry;
}
@@ -253,7 +269,7 @@ static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
* @disk_bytenr: Offset of extent on disk.
* @disk_num_bytes: Size of extent on disk.
* @offset: Offset into unencoded data where file data starts.
- * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
+ * @flags: Flags specifying type of extent (1U << BTRFS_ORDERED_*).
* @compress_type: Compression algorithm used for data.
*
* Most of these parameters correspond to &struct btrfs_file_extent_item. The
@@ -607,23 +623,18 @@ out:
*/
void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
{
- struct list_head *cur;
- struct btrfs_ordered_sum *sum;
-
trace_btrfs_ordered_extent_put(entry->inode, entry);
if (refcount_dec_and_test(&entry->refs)) {
+ struct btrfs_ordered_sum *sum;
+ struct btrfs_ordered_sum *tmp;
+
ASSERT(list_empty(&entry->root_extent_list));
ASSERT(list_empty(&entry->log_list));
ASSERT(RB_EMPTY_NODE(&entry->rb_node));
- if (entry->inode)
- btrfs_add_delayed_iput(entry->inode);
- while (!list_empty(&entry->list)) {
- cur = entry->list.next;
- sum = list_entry(cur, struct btrfs_ordered_sum, list);
- list_del(&sum->list);
+ btrfs_add_delayed_iput(entry->inode);
+ list_for_each_entry_safe(sum, tmp, &entry->list, list)
kvfree(sum);
- }
kmem_cache_free(btrfs_ordered_extent_cache, entry);
}
}
@@ -1173,7 +1184,7 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
cachedp = cached_state;
while (1) {
- lock_extent(&inode->io_tree, start, end, cachedp);
+ btrfs_lock_extent(&inode->io_tree, start, end, cachedp);
ordered = btrfs_lookup_ordered_range(inode, start,
end - start + 1);
if (!ordered) {
@@ -1186,7 +1197,7 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
refcount_dec(&cache->refs);
break;
}
- unlock_extent(&inode->io_tree, start, end, cachedp);
+ btrfs_unlock_extent(&inode->io_tree, start, end, cachedp);
btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
}
@@ -1204,7 +1215,7 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
{
struct btrfs_ordered_extent *ordered;
- if (!try_lock_extent(&inode->io_tree, start, end, cached_state))
+ if (!btrfs_try_lock_extent(&inode->io_tree, start, end, cached_state))
return false;
ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1);
@@ -1212,7 +1223,7 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
return true;
btrfs_put_ordered_extent(ordered);
- unlock_extent(&inode->io_tree, start, end, cached_state);
+ btrfs_unlock_extent(&inode->io_tree, start, end, cached_state);
return false;
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d6fa36674270..b3176edbde82 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -83,7 +83,7 @@ static void qgroup_rsv_add(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup, u64 num_bytes,
enum btrfs_qgroup_rsv_type type)
{
- trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
+ trace_btrfs_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
qgroup->rsv.values[type] += num_bytes;
}
@@ -91,7 +91,7 @@ static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup, u64 num_bytes,
enum btrfs_qgroup_rsv_type type)
{
- trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
+ trace_btrfs_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
if (qgroup->rsv.values[type] >= num_bytes) {
qgroup->rsv.values[type] -= num_bytes;
return;
@@ -1823,7 +1823,7 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
if (qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] ||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] ||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS]) {
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ DEBUG_WARN();
btrfs_warn_rl(fs_info,
"to be deleted qgroup %u/%llu has non-zero numbers, data %llu meta prealloc %llu meta pertrans %llu",
btrfs_qgroup_level(qgroup->qgroupid),
@@ -1843,7 +1843,7 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)) {
if (qgroup->rfer || qgroup->excl ||
qgroup->rfer_cmpr || qgroup->excl_cmpr) {
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ DEBUG_WARN();
btrfs_warn_rl(fs_info,
"to be deleted qgroup %u/%llu has non-zero numbers, rfer %llu rfer_cmpr %llu excl %llu excl_cmpr %llu",
btrfs_qgroup_level(qgroup->qgroupid),
@@ -2837,8 +2837,8 @@ static void qgroup_update_counters(struct btrfs_fs_info *fs_info,
cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
- trace_qgroup_update_counters(fs_info, qg, cur_old_count,
- cur_new_count);
+ trace_btrfs_qgroup_update_counters(fs_info, qg, cur_old_count,
+ cur_new_count);
/* Rfer update part */
if (cur_old_count == 0 && cur_new_count > 0) {
@@ -3100,8 +3100,7 @@ cleanup:
kfree(record);
}
- trace_qgroup_num_dirty_extents(fs_info, trans->transid,
- num_dirty_extents);
+ trace_btrfs_qgroup_num_dirty_extents(fs_info, trans->transid, num_dirty_extents);
return ret;
}
@@ -4129,8 +4128,8 @@ static int qgroup_unreserve_range(struct btrfs_inode *inode,
* Now the entry is in [start, start + len), revert the
* EXTENT_QGROUP_RESERVED bit.
*/
- clear_ret = clear_extent_bits(&inode->io_tree, entry_start,
- entry_end, EXTENT_QGROUP_RESERVED);
+ clear_ret = btrfs_clear_extent_bits(&inode->io_tree, entry_start,
+ entry_end, EXTENT_QGROUP_RESERVED);
if (!ret && clear_ret < 0)
ret = clear_ret;
@@ -4232,8 +4231,9 @@ static int qgroup_reserve_data(struct btrfs_inode *inode,
reserved = *reserved_ret;
/* Record already reserved space */
orig_reserved = reserved->bytes_changed;
- ret = set_record_extent_bits(&inode->io_tree, start,
- start + len -1, EXTENT_QGROUP_RESERVED, reserved);
+ ret = btrfs_set_record_extent_bits(&inode->io_tree, start,
+ start + len - 1, EXTENT_QGROUP_RESERVED,
+ reserved);
/* Newly reserved space */
to_reserve = reserved->bytes_changed - orig_reserved;
@@ -4326,9 +4326,10 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode,
* EXTENT_QGROUP_RESERVED, we won't double free.
* So not need to rush.
*/
- ret = clear_record_extent_bits(&inode->io_tree, free_start,
- free_start + free_len - 1,
- EXTENT_QGROUP_RESERVED, &changeset);
+ ret = btrfs_clear_record_extent_bits(&inode->io_tree, free_start,
+ free_start + free_len - 1,
+ EXTENT_QGROUP_RESERVED,
+ &changeset);
if (ret < 0)
goto out;
freed += changeset.bytes_changed;
@@ -4352,9 +4353,9 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
int ret;
if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) {
- return clear_record_extent_bits(&inode->io_tree, start,
- start + len - 1,
- EXTENT_QGROUP_RESERVED, NULL);
+ return btrfs_clear_record_extent_bits(&inode->io_tree, start,
+ start + len - 1,
+ EXTENT_QGROUP_RESERVED, NULL);
}
/* In release case, we shouldn't have @reserved */
@@ -4362,8 +4363,8 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
if (free && reserved)
return qgroup_free_reserved_data(inode, reserved, start, len, released);
extent_changeset_init(&changeset);
- ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1,
- EXTENT_QGROUP_RESERVED, &changeset);
+ ret = btrfs_clear_record_extent_bits(&inode->io_tree, start, start + len - 1,
+ EXTENT_QGROUP_RESERVED, &changeset);
if (ret < 0)
goto out;
@@ -4472,7 +4473,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
return 0;
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
- trace_qgroup_meta_reserve(root, (s64)num_bytes, type);
+ trace_btrfs_qgroup_meta_reserve(root, (s64)num_bytes, type);
ret = qgroup_reserve(root, num_bytes, enforce, type);
if (ret < 0)
return ret;
@@ -4517,7 +4518,7 @@ void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
return;
/* TODO: Update trace point to handle such free */
- trace_qgroup_meta_free_all_pertrans(root);
+ trace_btrfs_qgroup_meta_free_all_pertrans(root);
/* Special value -1 means to free all reserved space */
btrfs_qgroup_free_refroot(fs_info, btrfs_root_id(root), (u64)-1,
BTRFS_QGROUP_RSV_META_PERTRANS);
@@ -4539,7 +4540,7 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
*/
num_bytes = sub_root_meta_rsv(root, num_bytes, type);
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
- trace_qgroup_meta_reserve(root, -(s64)num_bytes, type);
+ trace_btrfs_qgroup_meta_reserve(root, -(s64)num_bytes, type);
btrfs_qgroup_free_refroot(fs_info, btrfs_root_id(root), num_bytes, type);
}
@@ -4593,7 +4594,7 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
/* Same as btrfs_qgroup_free_meta_prealloc() */
num_bytes = sub_root_meta_rsv(root, num_bytes,
BTRFS_QGROUP_RSV_META_PREALLOC);
- trace_qgroup_meta_convert(root, num_bytes);
+ trace_btrfs_qgroup_meta_convert(root, num_bytes);
qgroup_convert_meta(fs_info, btrfs_root_id(root), num_bytes);
if (!sb_rdonly(fs_info->sb))
add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS);
@@ -4611,8 +4612,8 @@ void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode)
int ret;
extent_changeset_init(&changeset);
- ret = clear_record_extent_bits(&inode->io_tree, 0, (u64)-1,
- EXTENT_QGROUP_RESERVED, &changeset);
+ ret = btrfs_clear_record_extent_bits(&inode->io_tree, 0, (u64)-1,
+ EXTENT_QGROUP_RESERVED, &changeset);
WARN_ON(ret < 0);
if (WARN_ON(changeset.bytes_changed)) {
@@ -4766,7 +4767,7 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_root *subvol_root,
* Marking qgroup inconsistent should be enough
* for end users.
*/
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ DEBUG_WARN("duplicated but mismatched entry found");
ret = -EEXIST;
}
kfree(block);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index cdd373c27784..3ff2bedfb3a4 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -134,14 +134,17 @@ struct btrfs_stripe_hash_table {
};
/*
- * A bvec like structure to present a sector inside a page.
- *
- * Unlike bvec we don't need bvlen, as it's fixed to sectorsize.
+ * A structure to present a sector inside a page, the length is fixed to
+ * sectorsize;
*/
struct sector_ptr {
- struct page *page;
- unsigned int pgoff:24;
- unsigned int uptodate:8;
+ /*
+ * Blocks from the bio list can still be highmem.
+ * So here we use physical address to present a page and the offset inside it.
+ */
+ phys_addr_t paddr;
+ bool has_paddr;
+ bool uptodate;
};
static void rmw_rbio_work(struct work_struct *work);
@@ -200,8 +203,7 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
struct btrfs_stripe_hash_table *x;
struct btrfs_stripe_hash *cur;
struct btrfs_stripe_hash *h;
- int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
- int i;
+ unsigned int num_entries = 1U << BTRFS_STRIPE_HASH_TABLE_BITS;
if (info->stripe_hash_table)
return 0;
@@ -222,7 +224,7 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
h = table->table;
- for (i = 0; i < num_entries; i++) {
+ for (unsigned int i = 0; i < num_entries; i++) {
cur = h + i;
INIT_LIST_HEAD(&cur->hash_list);
spin_lock_init(&cur->lock);
@@ -233,6 +235,14 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
return 0;
}
+static void memcpy_sectors(const struct sector_ptr *dst,
+ const struct sector_ptr *src, u32 blocksize)
+{
+ memcpy_page(phys_to_page(dst->paddr), offset_in_page(dst->paddr),
+ phys_to_page(src->paddr), offset_in_page(src->paddr),
+ blocksize);
+}
+
/*
* caching an rbio means to copy anything from the
* bio_sectors array into the stripe_pages array. We
@@ -253,7 +263,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
for (i = 0; i < rbio->nr_sectors; i++) {
/* Some range not covered by bio (partial write), skip it */
- if (!rbio->bio_sectors[i].page) {
+ if (!rbio->bio_sectors[i].has_paddr) {
/*
* Even if the sector is not covered by bio, if it is
* a data sector it should still be uptodate as it is
@@ -264,12 +274,8 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
continue;
}
- ASSERT(rbio->stripe_sectors[i].page);
- memcpy_page(rbio->stripe_sectors[i].page,
- rbio->stripe_sectors[i].pgoff,
- rbio->bio_sectors[i].page,
- rbio->bio_sectors[i].pgoff,
- rbio->bioc->fs_info->sectorsize);
+ memcpy_sectors(&rbio->stripe_sectors[i], &rbio->bio_sectors[i],
+ rbio->bioc->fs_info->sectorsize);
rbio->stripe_sectors[i].uptodate = 1;
}
set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@@ -326,8 +332,13 @@ static void index_stripe_sectors(struct btrfs_raid_bio *rbio)
int page_index = offset >> PAGE_SHIFT;
ASSERT(page_index < rbio->nr_pages);
- rbio->stripe_sectors[i].page = rbio->stripe_pages[page_index];
- rbio->stripe_sectors[i].pgoff = offset_in_page(offset);
+ if (!rbio->stripe_pages[page_index])
+ continue;
+
+ rbio->stripe_sectors[i].has_paddr = true;
+ rbio->stripe_sectors[i].paddr =
+ page_to_phys(rbio->stripe_pages[page_index]) +
+ offset_in_page(offset);
}
}
@@ -507,9 +518,8 @@ static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
spin_lock(&table->cache_lock);
while (!list_empty(&table->stripe_cache)) {
- rbio = list_entry(table->stripe_cache.next,
- struct btrfs_raid_bio,
- stripe_cache);
+ rbio = list_first_entry(&table->stripe_cache,
+ struct btrfs_raid_bio, stripe_cache);
__remove_rbio_from_cache(rbio);
}
spin_unlock(&table->cache_lock);
@@ -567,9 +577,9 @@ static void cache_rbio(struct btrfs_raid_bio *rbio)
if (table->cache_size > RBIO_CACHE_SIZE) {
struct btrfs_raid_bio *found;
- found = list_entry(table->stripe_cache.prev,
- struct btrfs_raid_bio,
- stripe_cache);
+ found = list_last_entry(&table->stripe_cache,
+ struct btrfs_raid_bio,
+ stripe_cache);
if (found != rbio)
__remove_rbio_from_cache(found);
@@ -882,14 +892,14 @@ done_nolock:
remove_rbio_from_cache(rbio);
}
-static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
+static void rbio_endio_bio_list(struct bio *cur, blk_status_t status)
{
struct bio *next;
while (cur) {
next = cur->bi_next;
cur->bi_next = NULL;
- cur->bi_status = err;
+ cur->bi_status = status;
bio_endio(cur);
cur = next;
}
@@ -899,7 +909,7 @@ static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
* this frees the rbio and runs through all the bios in the
* bio_list and calls end_io on them
*/
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t status)
{
struct bio *cur = bio_list_get(&rbio->bio_list);
struct bio *extra;
@@ -928,9 +938,9 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
extra = bio_list_get(&rbio->bio_list);
free_raid_bio(rbio);
- rbio_endio_bio_list(cur, err);
+ rbio_endio_bio_list(cur, status);
if (extra)
- rbio_endio_bio_list(extra, err);
+ rbio_endio_bio_list(extra, status);
}
/*
@@ -962,9 +972,9 @@ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
spin_lock(&rbio->bio_list_lock);
sector = &rbio->bio_sectors[index];
- if (sector->page || bio_list_only) {
+ if (sector->has_paddr || bio_list_only) {
/* Don't return sector without a valid page pointer */
- if (!sector->page)
+ if (!sector->has_paddr)
sector = NULL;
spin_unlock(&rbio->bio_list_lock);
return sector;
@@ -1142,7 +1152,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
rbio, stripe_nr);
ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors,
rbio, sector_nr);
- ASSERT(sector->page);
+ ASSERT(sector->has_paddr);
stripe = &rbio->bioc->stripes[stripe_nr];
disk_start = stripe->physical + sector_nr * sectorsize;
@@ -1173,8 +1183,8 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
*/
if (last_end == disk_start && !last->bi_status &&
last->bi_bdev == stripe->dev->bdev) {
- ret = bio_add_page(last, sector->page, sectorsize,
- sector->pgoff);
+ ret = bio_add_page(last, phys_to_page(sector->paddr),
+ sectorsize, offset_in_page(sector->paddr));
if (ret == sectorsize)
return 0;
}
@@ -1187,7 +1197,8 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
bio->bi_private = rbio;
- __bio_add_page(bio, sector->page, sectorsize, sector->pgoff);
+ __bio_add_page(bio, phys_to_page(sector->paddr), sectorsize,
+ offset_in_page(sector->paddr));
bio_list_add(bio_list, bio);
return 0;
}
@@ -1195,23 +1206,20 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
{
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
- struct bio_vec bvec;
- struct bvec_iter iter;
+ const u32 sectorsize_bits = rbio->bioc->fs_info->sectorsize_bits;
+ struct bvec_iter iter = bio->bi_iter;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
rbio->bioc->full_stripe_logical;
- bio_for_each_segment(bvec, bio, iter) {
- u32 bvec_offset;
-
- for (bvec_offset = 0; bvec_offset < bvec.bv_len;
- bvec_offset += sectorsize, offset += sectorsize) {
- int index = offset / sectorsize;
- struct sector_ptr *sector = &rbio->bio_sectors[index];
+ while (iter.bi_size) {
+ unsigned int index = (offset >> sectorsize_bits);
+ struct sector_ptr *sector = &rbio->bio_sectors[index];
+ struct bio_vec bv = bio_iter_iovec(bio, iter);
- sector->page = bvec.bv_page;
- sector->pgoff = bvec.bv_offset + bvec_offset;
- ASSERT(sector->pgoff < PAGE_SIZE);
- }
+ sector->has_paddr = true;
+ sector->paddr = bvec_phys(&bv);
+ bio_advance_iter_single(bio, &iter, sectorsize);
+ offset += sectorsize;
}
}
@@ -1289,6 +1297,15 @@ static void assert_rbio(struct btrfs_raid_bio *rbio)
ASSERT_RBIO(rbio->nr_data < rbio->real_stripes, rbio);
}
+static inline void *kmap_local_sector(const struct sector_ptr *sector)
+{
+ /* The sector pointer must have a page mapped to it. */
+ ASSERT(sector->has_paddr);
+
+ return kmap_local_page(phys_to_page(sector->paddr)) +
+ offset_in_page(sector->paddr);
+}
+
/* Generate PQ for one vertical stripe. */
static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
{
@@ -1301,14 +1318,13 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
/* First collect one sector from each data stripe */
for (stripe = 0; stripe < rbio->nr_data; stripe++) {
sector = sector_in_rbio(rbio, stripe, sectornr, 0);
- pointers[stripe] = kmap_local_page(sector->page) +
- sector->pgoff;
+ pointers[stripe] = kmap_local_sector(sector);
}
/* Then add the parity stripe */
sector = rbio_pstripe_sector(rbio, sectornr);
sector->uptodate = 1;
- pointers[stripe++] = kmap_local_page(sector->page) + sector->pgoff;
+ pointers[stripe++] = kmap_local_sector(sector);
if (has_qstripe) {
/*
@@ -1317,8 +1333,7 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
*/
sector = rbio_qstripe_sector(rbio, sectornr);
sector->uptodate = 1;
- pointers[stripe++] = kmap_local_page(sector->page) +
- sector->pgoff;
+ pointers[stripe++] = kmap_local_sector(sector);
assert_rbio(rbio);
raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
@@ -1477,15 +1492,14 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
* stripe_pages[], thus we need to locate the sector.
*/
static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
- struct page *page,
- unsigned int pgoff)
+ phys_addr_t paddr)
{
int i;
for (i = 0; i < rbio->nr_sectors; i++) {
struct sector_ptr *sector = &rbio->stripe_sectors[i];
- if (sector->page == page && sector->pgoff == pgoff)
+ if (sector->has_paddr && sector->paddr == paddr)
return sector;
}
return NULL;
@@ -1505,11 +1519,10 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
bio_for_each_segment_all(bvec, bio, iter_all) {
struct sector_ptr *sector;
- int pgoff;
+ phys_addr_t paddr = bvec_phys(bvec);
- for (pgoff = bvec->bv_offset; pgoff - bvec->bv_offset < bvec->bv_len;
- pgoff += sectorsize) {
- sector = find_stripe_sector(rbio, bvec->bv_page, pgoff);
+ for (u32 off = 0; off < bvec->bv_len; off += sectorsize) {
+ sector = find_stripe_sector(rbio, paddr + off);
ASSERT(sector);
if (sector)
sector->uptodate = 1;
@@ -1519,17 +1532,14 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
static int get_bio_sector_nr(struct btrfs_raid_bio *rbio, struct bio *bio)
{
- struct bio_vec *bv = bio_first_bvec_all(bio);
+ phys_addr_t bvec_paddr = bvec_phys(bio_first_bvec_all(bio));
int i;
for (i = 0; i < rbio->nr_sectors; i++) {
- struct sector_ptr *sector;
-
- sector = &rbio->stripe_sectors[i];
- if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
+ if (rbio->stripe_sectors[i].paddr == bvec_paddr)
break;
- sector = &rbio->bio_sectors[i];
- if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
+ if (rbio->bio_sectors[i].has_paddr &&
+ rbio->bio_sectors[i].paddr == bvec_paddr)
break;
}
ASSERT(i < rbio->nr_sectors);
@@ -1575,11 +1585,11 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
return;
bio_for_each_segment_all(bvec, bio, iter_all) {
- int bv_offset;
+ void *kaddr;
- for (bv_offset = bvec->bv_offset;
- bv_offset < bvec->bv_offset + bvec->bv_len;
- bv_offset += fs_info->sectorsize, total_sector_nr++) {
+ kaddr = bvec_kmap_local(bvec);
+ for (u32 off = 0; off < bvec->bv_len;
+ off += fs_info->sectorsize, total_sector_nr++) {
u8 csum_buf[BTRFS_CSUM_SIZE];
u8 *expected_csum = rbio->csum_buf +
total_sector_nr * fs_info->csum_size;
@@ -1589,11 +1599,12 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
if (!test_bit(total_sector_nr, rbio->csum_bitmap))
continue;
- ret = btrfs_check_sector_csum(fs_info, bvec->bv_page,
- bv_offset, csum_buf, expected_csum);
+ ret = btrfs_check_sector_csum(fs_info, kaddr + off,
+ csum_buf, expected_csum);
if (ret < 0)
set_bit(total_sector_nr, rbio->error_bitmap);
}
+ kunmap_local(kaddr);
}
}
@@ -1689,8 +1700,8 @@ static void raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
list_sort(NULL, &plug->rbio_list, plug_cmp);
while (!list_empty(&plug->rbio_list)) {
- cur = list_entry(plug->rbio_list.next,
- struct btrfs_raid_bio, plug_list);
+ cur = list_first_entry(&plug->rbio_list,
+ struct btrfs_raid_bio, plug_list);
list_del_init(&cur->plug_list);
if (rbio_is_full(cur)) {
@@ -1791,6 +1802,7 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
struct sector_ptr *sector;
u8 csum_buf[BTRFS_CSUM_SIZE];
u8 *csum_expected;
+ void *kaddr;
int ret;
if (!rbio->csum_bitmap || !rbio->csum_buf)
@@ -1809,13 +1821,12 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
}
- ASSERT(sector->page);
-
csum_expected = rbio->csum_buf +
(stripe_nr * rbio->stripe_nsectors + sector_nr) *
fs_info->csum_size;
- ret = btrfs_check_sector_csum(fs_info, sector->page, sector->pgoff,
- csum_buf, csum_expected);
+ kaddr = kmap_local_sector(sector);
+ ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, csum_expected);
+ kunmap_local(kaddr);
return ret;
}
@@ -1872,9 +1883,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
} else {
sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
}
- ASSERT(sector->page);
- pointers[stripe_nr] = kmap_local_page(sector->page) +
- sector->pgoff;
+ pointers[stripe_nr] = kmap_local_sector(sector);
unmap_array[stripe_nr] = pointers[stripe_nr];
}
@@ -2282,9 +2291,8 @@ static int rmw_read_wait_recover(struct btrfs_raid_bio *rbio)
static void raid_wait_write_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- blk_status_t err = bio->bi_status;
- if (err)
+ if (bio->bi_status)
rbio_update_error_bitmap(rbio, bio);
bio_put(bio);
if (atomic_dec_and_test(&rbio->stripes_pending))
@@ -2326,7 +2334,7 @@ static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio)
* thus this rbio can not be cached one, as cached one must
* have all its data sectors present and uptodate.
*/
- if (!sector->page || !sector->uptodate)
+ if (!sector->has_paddr || !sector->uptodate)
return true;
}
return false;
@@ -2516,6 +2524,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
int stripe;
int sectornr;
bool has_qstripe;
+ struct page *page;
struct sector_ptr p_sector = { 0 };
struct sector_ptr q_sector = { 0 };
struct bio_list bio_list;
@@ -2547,29 +2556,33 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
*/
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
- p_sector.page = alloc_page(GFP_NOFS);
- if (!p_sector.page)
+ page = alloc_page(GFP_NOFS);
+ if (!page)
return -ENOMEM;
- p_sector.pgoff = 0;
+ p_sector.has_paddr = true;
+ p_sector.paddr = page_to_phys(page);
p_sector.uptodate = 1;
+ page = NULL;
if (has_qstripe) {
/* RAID6, allocate and map temp space for the Q stripe */
- q_sector.page = alloc_page(GFP_NOFS);
- if (!q_sector.page) {
- __free_page(p_sector.page);
- p_sector.page = NULL;
+ page = alloc_page(GFP_NOFS);
+ if (!page) {
+ __free_page(phys_to_page(p_sector.paddr));
+ p_sector.has_paddr = false;
return -ENOMEM;
}
- q_sector.pgoff = 0;
+ q_sector.has_paddr = true;
+ q_sector.paddr = page_to_phys(page);
q_sector.uptodate = 1;
- pointers[rbio->real_stripes - 1] = kmap_local_page(q_sector.page);
+ page = NULL;
+ pointers[rbio->real_stripes - 1] = kmap_local_sector(&q_sector);
}
bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
/* Map the parity stripe just once */
- pointers[nr_data] = kmap_local_page(p_sector.page);
+ pointers[nr_data] = kmap_local_sector(&p_sector);
for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
struct sector_ptr *sector;
@@ -2578,8 +2591,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
/* first collect one page from each data stripe */
for (stripe = 0; stripe < nr_data; stripe++) {
sector = sector_in_rbio(rbio, stripe, sectornr, 0);
- pointers[stripe] = kmap_local_page(sector->page) +
- sector->pgoff;
+ pointers[stripe] = kmap_local_sector(sector);
}
if (has_qstripe) {
@@ -2595,7 +2607,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
/* Check scrubbing parity and repair it */
sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
- parity = kmap_local_page(sector->page) + sector->pgoff;
+ parity = kmap_local_sector(sector);
if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0)
memcpy(parity, pointers[rbio->scrubp], sectorsize);
else
@@ -2608,12 +2620,11 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
}
kunmap_local(pointers[nr_data]);
- __free_page(p_sector.page);
- p_sector.page = NULL;
- if (q_sector.page) {
- kunmap_local(pointers[rbio->real_stripes - 1]);
- __free_page(q_sector.page);
- q_sector.page = NULL;
+ __free_page(phys_to_page(p_sector.paddr));
+ p_sector.has_paddr = false;
+ if (q_sector.has_paddr) {
+ __free_page(phys_to_page(q_sector.paddr));
+ q_sector.has_paddr = false;
}
/*
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 15c296cb4dac..62161beca559 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -87,7 +87,7 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
btrfs_alloc_write_mask(mapping));
if (IS_ERR(folio)) {
- ret = -ENOMEM;
+ ret = PTR_ERR(folio);
goto out_unlock;
}
@@ -95,9 +95,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
if (ret < 0)
goto out_unlock;
- clear_extent_bit(&inode->io_tree, file_offset, range_end,
- EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
- NULL);
+ btrfs_clear_extent_bits(&inode->io_tree, file_offset, range_end,
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG);
ret = btrfs_set_extent_delalloc(inode, file_offset, range_end, 0, NULL);
if (ret)
goto out_unlock;
@@ -646,10 +645,10 @@ static int btrfs_extent_same_range(struct btrfs_inode *src, u64 loff, u64 len,
* because we have already locked the inode's i_mmap_lock in exclusive
* mode.
*/
- lock_extent(&dst->io_tree, dst_loff, end, &cached_state);
+ btrfs_lock_extent(&dst->io_tree, dst_loff, end, &cached_state);
ret = btrfs_clone(&src->vfs_inode, &dst->vfs_inode, loff, len,
ALIGN(len, bs), dst_loff, 1);
- unlock_extent(&dst->io_tree, dst_loff, end, &cached_state);
+ btrfs_unlock_extent(&dst->io_tree, dst_loff, end, &cached_state);
btrfs_btree_balance_dirty(fs_info);
@@ -749,9 +748,9 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
* mode.
*/
end = destoff + len - 1;
- lock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
+ btrfs_lock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
- unlock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
+ btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
/*
* We may have copied an inline extent into a page of the destination
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index e17bcb034595..02086191630d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -178,8 +178,9 @@ static void mark_block_processed(struct reloc_control *rc,
in_range(node->bytenr, rc->block_group->start,
rc->block_group->length)) {
blocksize = rc->extent_root->fs_info->nodesize;
- set_extent_bit(&rc->processed_blocks, node->bytenr,
- node->bytenr + blocksize - 1, EXTENT_DIRTY, NULL);
+ btrfs_set_extent_bit(&rc->processed_blocks, node->bytenr,
+ node->bytenr + blocksize - 1, EXTENT_DIRTY,
+ NULL);
}
node->processed = 1;
}
@@ -195,8 +196,8 @@ static struct btrfs_backref_node *walk_up_backref(
int idx = *index;
while (!list_empty(&node->upper)) {
- edge = list_entry(node->upper.next,
- struct btrfs_backref_edge, list[LOWER]);
+ edge = list_first_entry(&node->upper, struct btrfs_backref_edge,
+ list[LOWER]);
edges[idx++] = edge;
node = edge->node[UPPER];
}
@@ -222,8 +223,8 @@ static struct btrfs_backref_node *walk_down_backref(
idx--;
continue;
}
- edge = list_entry(edge->list[LOWER].next,
- struct btrfs_backref_edge, list[LOWER]);
+ edge = list_first_entry(&edge->list[LOWER], struct btrfs_backref_edge,
+ list[LOWER]);
edges[idx - 1] = edge;
*index = idx;
return edge->node[UPPER];
@@ -347,8 +348,8 @@ static bool handle_useless_nodes(struct reloc_control *rc,
struct btrfs_backref_edge *edge;
struct btrfs_backref_node *lower;
- edge = list_entry(cur->lower.next,
- struct btrfs_backref_edge, list[UPPER]);
+ edge = list_first_entry(&cur->lower, struct btrfs_backref_edge,
+ list[UPPER]);
list_del(&edge->list[UPPER]);
list_del(&edge->list[LOWER]);
lower = edge->node[LOWER];
@@ -910,16 +911,16 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
/* Take mmap lock to serialize with reflinks. */
if (!down_read_trylock(&inode->i_mmap_lock))
continue;
- ret = try_lock_extent(&inode->io_tree, key.offset,
- end, &cached_state);
+ ret = btrfs_try_lock_extent(&inode->io_tree, key.offset,
+ end, &cached_state);
if (!ret) {
up_read(&inode->i_mmap_lock);
continue;
}
btrfs_drop_extent_map_range(inode, key.offset, end, true);
- unlock_extent(&inode->io_tree, key.offset, end,
- &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, key.offset, end,
+ &cached_state);
up_read(&inode->i_mmap_lock);
}
}
@@ -1378,9 +1379,9 @@ static int invalidate_extent_cache(struct btrfs_root *root,
}
/* the lock_extent waits for read_folio to complete */
- lock_extent(&inode->io_tree, start, end, &cached_state);
+ btrfs_lock_extent(&inode->io_tree, start, end, &cached_state);
btrfs_drop_extent_map_range(inode, start, end, true);
- unlock_extent(&inode->io_tree, start, end, &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state);
}
return 0;
}
@@ -1697,8 +1698,8 @@ again:
rc->merge_reloc_tree = true;
while (!list_empty(&rc->reloc_roots)) {
- reloc_root = list_entry(rc->reloc_roots.next,
- struct btrfs_root, root_list);
+ reloc_root = list_first_entry(&rc->reloc_roots,
+ struct btrfs_root, root_list);
list_del_init(&reloc_root->root_list);
root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
@@ -1813,8 +1814,7 @@ again:
while (!list_empty(&reloc_roots)) {
found = 1;
- reloc_root = list_entry(reloc_roots.next,
- struct btrfs_root, root_list);
+ reloc_root = list_first_entry(&reloc_roots, struct btrfs_root, root_list);
root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
false);
@@ -1930,11 +1930,11 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
* reloc root without a corresponding root this could return ENOENT.
*/
if (IS_ERR(root)) {
- ASSERT(0);
+ DEBUG_WARN("error %ld reading root for reloc root", PTR_ERR(root));
return PTR_ERR(root);
}
if (root->reloc_root != reloc_root) {
- ASSERT(0);
+ DEBUG_WARN("unexpected reloc root found");
btrfs_err(fs_info,
"root %llu has two reloc roots associated with it",
reloc_root->root_key.offset);
@@ -2109,8 +2109,8 @@ static noinline_for_stack u64 calcu_metadata_size(struct reloc_control *rc,
if (list_empty(&next->upper))
break;
- edge = list_entry(next->upper.next,
- struct btrfs_backref_edge, list[LOWER]);
+ edge = list_first_entry(&next->upper, struct btrfs_backref_edge,
+ list[LOWER]);
edges[index++] = edge;
next = edge->node[UPPER];
}
@@ -2356,8 +2356,8 @@ static int finish_pending_nodes(struct btrfs_trans_handle *trans,
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
while (!list_empty(&cache->pending[level])) {
- node = list_entry(cache->pending[level].next,
- struct btrfs_backref_node, list);
+ node = list_first_entry(&cache->pending[level],
+ struct btrfs_backref_node, list);
list_move_tail(&node->list, &list);
BUG_ON(!node->pending);
@@ -2395,8 +2395,8 @@ static void update_processed_blocks(struct reloc_control *rc,
if (list_empty(&next->upper))
break;
- edge = list_entry(next->upper.next,
- struct btrfs_backref_edge, list[LOWER]);
+ edge = list_first_entry(&next->upper, struct btrfs_backref_edge,
+ list[LOWER]);
edges[index++] = edge;
next = edge->node[UPPER];
}
@@ -2408,8 +2408,8 @@ static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
{
u32 blocksize = rc->extent_root->fs_info->nodesize;
- if (test_range_bit(&rc->processed_blocks, bytenr,
- bytenr + blocksize - 1, EXTENT_DIRTY, NULL))
+ if (btrfs_test_range_bit(&rc->processed_blocks, bytenr,
+ bytenr + blocksize - 1, EXTENT_DIRTY, NULL))
return 1;
return 0;
}
@@ -2706,9 +2706,6 @@ static noinline_for_stack int prealloc_file_extent_cluster(struct reloc_control
if (ret < 0)
return ret;
- clear_extent_bits(&inode->io_tree, i_size,
- round_up(i_size, PAGE_SIZE) - 1,
- EXTENT_UPTODATE);
folio = filemap_lock_folio(mapping, i_size >> PAGE_SHIFT);
/*
* If page is freed we don't need to do anything then, as we
@@ -2738,21 +2735,21 @@ static noinline_for_stack int prealloc_file_extent_cluster(struct reloc_control
else
end = cluster->end - offset;
- lock_extent(&inode->io_tree, start, end, &cached_state);
+ btrfs_lock_extent(&inode->io_tree, start, end, &cached_state);
num_bytes = end + 1 - start;
ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
num_bytes, num_bytes,
end + 1, &alloc_hint);
cur_offset = end + 1;
- unlock_extent(&inode->io_tree, start, end, &cached_state);
+ btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state);
if (ret)
break;
}
btrfs_inode_unlock(inode, 0);
if (cur_offset < prealloc_end)
- btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
- prealloc_end + 1 - cur_offset);
+ btrfs_free_reserved_data_space_noquota(inode,
+ prealloc_end + 1 - cur_offset);
return ret;
}
@@ -2766,7 +2763,7 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct reloc_contr
u64 end = rc->cluster.end - offset;
int ret = 0;
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em)
return -ENOMEM;
@@ -2777,10 +2774,10 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct reloc_contr
em->ram_bytes = em->len;
em->flags |= EXTENT_FLAG_PINNED;
- lock_extent(&inode->io_tree, start, end, &cached_state);
+ btrfs_lock_extent(&inode->io_tree, start, end, &cached_state);
ret = btrfs_replace_extent_map_range(inode, em, false);
- unlock_extent(&inode->io_tree, start, end, &cached_state);
- free_extent_map(em);
+ btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state);
+ btrfs_free_extent_map(em);
return ret;
}
@@ -2902,15 +2899,15 @@ again:
goto release_folio;
/* Mark the range delalloc and dirty for later writeback */
- lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end,
- &cached_state);
+ btrfs_lock_extent(&BTRFS_I(inode)->io_tree, clamped_start,
+ clamped_end, &cached_state);
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start,
clamped_end, 0, &cached_state);
if (ret) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree,
- clamped_start, clamped_end,
- EXTENT_LOCKED | EXTENT_BOUNDARY,
- &cached_state);
+ btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree,
+ clamped_start, clamped_end,
+ EXTENT_LOCKED | EXTENT_BOUNDARY,
+ &cached_state);
btrfs_delalloc_release_metadata(BTRFS_I(inode),
clamped_len, true);
btrfs_delalloc_release_extents(BTRFS_I(inode),
@@ -2932,12 +2929,12 @@ again:
u64 boundary_end = boundary_start +
fs_info->sectorsize - 1;
- set_extent_bit(&BTRFS_I(inode)->io_tree,
- boundary_start, boundary_end,
- EXTENT_BOUNDARY, NULL);
+ btrfs_set_extent_bit(&BTRFS_I(inode)->io_tree,
+ boundary_start, boundary_end,
+ EXTENT_BOUNDARY, NULL);
}
- unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end,
- &cached_state);
+ btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end,
+ &cached_state);
btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len);
cur += clamped_len;
@@ -3435,9 +3432,9 @@ next:
goto next;
}
- block_found = find_first_extent_bit(&rc->processed_blocks,
- key.objectid, &start, &end,
- EXTENT_DIRTY, NULL);
+ block_found = btrfs_find_first_extent_bit(&rc->processed_blocks,
+ key.objectid, &start, &end,
+ EXTENT_DIRTY, NULL);
if (block_found && start <= key.objectid) {
btrfs_release_path(path);
@@ -3646,7 +3643,7 @@ restart:
}
btrfs_release_path(path);
- clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY);
+ btrfs_clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY);
if (trans) {
btrfs_end_transaction_throttle(trans);
@@ -3862,7 +3859,7 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
btrfs_backref_init_cache(fs_info, &rc->backref_cache, true);
rc->reloc_root_tree.rb_root = RB_ROOT;
spin_lock_init(&rc->reloc_root_tree.lock);
- extent_io_tree_init(fs_info, &rc->processed_blocks, IO_TREE_RELOC_BLOCKS);
+ btrfs_extent_io_tree_init(fs_info, &rc->processed_blocks, IO_TREE_RELOC_BLOCKS);
return rc;
}
@@ -4185,8 +4182,7 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
rc->merge_reloc_tree = true;
while (!list_empty(&reloc_roots)) {
- reloc_root = list_entry(reloc_roots.next,
- struct btrfs_root, root_list);
+ reloc_root = list_first_entry(&reloc_roots, struct btrfs_root, root_list);
list_del(&reloc_root->root_list);
if (btrfs_root_refs(&reloc_root->root_item) == 0) {
@@ -4279,7 +4275,7 @@ int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered)
while (!list_empty(&list)) {
struct btrfs_ordered_sum *sums =
- list_entry(list.next, struct btrfs_ordered_sum, list);
+ list_first_entry(&list, struct btrfs_ordered_sum, list);
list_del_init(&sums->list);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c3b2e29e3e01..ce36fafc771e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -66,8 +66,6 @@ struct scrub_ctx;
/* Represent one sector and its needed info to verify the content. */
struct scrub_sector_verification {
- bool is_metadata;
-
union {
/*
* Csum pointer for data csum verification. Should point to a
@@ -100,6 +98,38 @@ enum scrub_stripe_flags {
SCRUB_STRIPE_FLAG_NO_REPORT,
};
+/*
+ * We have multiple bitmaps for one scrub_stripe.
+ * However each bitmap has at most (BTRFS_STRIPE_LEN / blocksize) bits,
+ * which is normally 16, and much smaller than BITS_PER_LONG (32 or 64).
+ *
+ * So to reduce memory usage for each scrub_stripe, we pack those bitmaps
+ * into a larger one.
+ *
+ * These enum records where the sub-bitmap are inside the larger one.
+ * Each subbitmap starts at scrub_bitmap_nr_##name * nr_sectors bit.
+ */
+enum {
+ /* Which blocks are covered by extent items. */
+ scrub_bitmap_nr_has_extent = 0,
+
+ /* Which blocks are meteadata. */
+ scrub_bitmap_nr_is_metadata,
+
+ /*
+ * Which blocks have errors, including IO, csum, and metadata
+ * errors.
+ * This sub-bitmap is the OR results of the next few error related
+ * sub-bitmaps.
+ */
+ scrub_bitmap_nr_error,
+ scrub_bitmap_nr_io_error,
+ scrub_bitmap_nr_csum_error,
+ scrub_bitmap_nr_meta_error,
+ scrub_bitmap_nr_meta_gen_error,
+ scrub_bitmap_nr_last,
+};
+
#define SCRUB_STRIPE_PAGES (BTRFS_STRIPE_LEN / PAGE_SIZE)
/*
@@ -138,36 +168,15 @@ struct scrub_stripe {
*/
unsigned long state;
- /* Indicate which sectors are covered by extent items. */
- unsigned long extent_sector_bitmap;
-
- /*
- * The errors hit during the initial read of the stripe.
- *
- * Would be utilized for error reporting and repair.
- *
- * The remaining init_nr_* records the number of errors hit, only used
- * by error reporting.
- */
- unsigned long init_error_bitmap;
- unsigned int init_nr_io_errors;
- unsigned int init_nr_csum_errors;
- unsigned int init_nr_meta_errors;
+ /* The large bitmap contains all the sub-bitmaps. */
+ unsigned long bitmaps[BITS_TO_LONGS(scrub_bitmap_nr_last *
+ (BTRFS_STRIPE_LEN / BTRFS_MIN_BLOCKSIZE))];
/*
- * The following error bitmaps are all for the current status.
- * Every time we submit a new read, these bitmaps may be updated.
- *
- * error_bitmap = io_error_bitmap | csum_error_bitmap | meta_error_bitmap;
- *
- * IO and csum errors can happen for both metadata and data.
+ * For writeback (repair or replace) error reporting.
+ * This one is protected by a spinlock, thus can not be packed into
+ * the larger bitmap.
*/
- unsigned long error_bitmap;
- unsigned long io_error_bitmap;
- unsigned long csum_error_bitmap;
- unsigned long meta_error_bitmap;
-
- /* For writeback (repair or replace) error reporting. */
unsigned long write_error_bitmap;
/* Writeback can be concurrent, thus we need to protect the bitmap. */
@@ -219,6 +228,90 @@ struct scrub_ctx {
refcount_t refs;
};
+#define scrub_calc_start_bit(stripe, name, block_nr) \
+({ \
+ unsigned int __start_bit; \
+ \
+ ASSERT(block_nr < stripe->nr_sectors, \
+ "nr_sectors=%u block_nr=%u", stripe->nr_sectors, block_nr); \
+ __start_bit = scrub_bitmap_nr_##name * stripe->nr_sectors + block_nr; \
+ __start_bit; \
+})
+
+#define IMPLEMENT_SCRUB_BITMAP_OPS(name) \
+static inline void scrub_bitmap_set_##name(struct scrub_stripe *stripe, \
+ unsigned int block_nr, \
+ unsigned int nr_blocks) \
+{ \
+ const unsigned int start_bit = scrub_calc_start_bit(stripe, \
+ name, block_nr); \
+ \
+ bitmap_set(stripe->bitmaps, start_bit, nr_blocks); \
+} \
+static inline void scrub_bitmap_clear_##name(struct scrub_stripe *stripe, \
+ unsigned int block_nr, \
+ unsigned int nr_blocks) \
+{ \
+ const unsigned int start_bit = scrub_calc_start_bit(stripe, name, \
+ block_nr); \
+ \
+ bitmap_clear(stripe->bitmaps, start_bit, nr_blocks); \
+} \
+static inline bool scrub_bitmap_test_bit_##name(struct scrub_stripe *stripe, \
+ unsigned int block_nr) \
+{ \
+ const unsigned int start_bit = scrub_calc_start_bit(stripe, name, \
+ block_nr); \
+ \
+ return test_bit(start_bit, stripe->bitmaps); \
+} \
+static inline void scrub_bitmap_set_bit_##name(struct scrub_stripe *stripe, \
+ unsigned int block_nr) \
+{ \
+ const unsigned int start_bit = scrub_calc_start_bit(stripe, name, \
+ block_nr); \
+ \
+ set_bit(start_bit, stripe->bitmaps); \
+} \
+static inline void scrub_bitmap_clear_bit_##name(struct scrub_stripe *stripe, \
+ unsigned int block_nr) \
+{ \
+ const unsigned int start_bit = scrub_calc_start_bit(stripe, name, \
+ block_nr); \
+ \
+ clear_bit(start_bit, stripe->bitmaps); \
+} \
+static inline unsigned long scrub_bitmap_read_##name(struct scrub_stripe *stripe) \
+{ \
+ const unsigned int nr_blocks = stripe->nr_sectors; \
+ \
+ ASSERT(nr_blocks > 0 && nr_blocks <= BITS_PER_LONG, \
+ "nr_blocks=%u BITS_PER_LONG=%u", \
+ nr_blocks, BITS_PER_LONG); \
+ \
+ return bitmap_read(stripe->bitmaps, nr_blocks * scrub_bitmap_nr_##name, \
+ stripe->nr_sectors); \
+} \
+static inline bool scrub_bitmap_empty_##name(struct scrub_stripe *stripe) \
+{ \
+ unsigned long bitmap = scrub_bitmap_read_##name(stripe); \
+ \
+ return bitmap_empty(&bitmap, stripe->nr_sectors); \
+} \
+static inline unsigned int scrub_bitmap_weight_##name(struct scrub_stripe *stripe) \
+{ \
+ unsigned long bitmap = scrub_bitmap_read_##name(stripe); \
+ \
+ return bitmap_weight(&bitmap, stripe->nr_sectors); \
+}
+IMPLEMENT_SCRUB_BITMAP_OPS(has_extent);
+IMPLEMENT_SCRUB_BITMAP_OPS(is_metadata);
+IMPLEMENT_SCRUB_BITMAP_OPS(error);
+IMPLEMENT_SCRUB_BITMAP_OPS(io_error);
+IMPLEMENT_SCRUB_BITMAP_OPS(csum_error);
+IMPLEMENT_SCRUB_BITMAP_OPS(meta_error);
+IMPLEMENT_SCRUB_BITMAP_OPS(meta_gen_error);
+
struct scrub_warning {
struct btrfs_path *path;
u64 extent_item_size;
@@ -228,6 +321,19 @@ struct scrub_warning {
struct btrfs_device *dev;
};
+struct scrub_error_records {
+ /*
+ * Bitmap recording which blocks hit errors (IO/csum/...) during the
+ * initial read.
+ */
+ unsigned long init_error_bitmap;
+
+ unsigned int nr_io_errors;
+ unsigned int nr_csum_errors;
+ unsigned int nr_meta_errors;
+ unsigned int nr_meta_gen_errors;
+};
+
static void release_scrub_stripe(struct scrub_stripe *stripe)
{
if (!stripe)
@@ -579,20 +685,15 @@ static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
return ret;
}
-static struct page *scrub_stripe_get_page(struct scrub_stripe *stripe, int sector_nr)
+static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr)
{
- struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
- int page_index = (sector_nr << fs_info->sectorsize_bits) >> PAGE_SHIFT;
+ u32 offset = (sector_nr << stripe->bg->fs_info->sectorsize_bits);
+ const struct page *page = stripe->pages[offset >> PAGE_SHIFT];
- return stripe->pages[page_index];
-}
-
-static unsigned int scrub_stripe_get_page_offset(struct scrub_stripe *stripe,
- int sector_nr)
-{
- struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
-
- return offset_in_page(sector_nr << fs_info->sectorsize_bits);
+ /* stripe->pages[] is allocated by us and no highmem is allowed. */
+ ASSERT(page);
+ ASSERT(!PageHighMem(page));
+ return page_address(page) + offset_in_page(offset);
}
static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr)
@@ -600,24 +701,22 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits;
const u64 logical = stripe->logical + (sector_nr << fs_info->sectorsize_bits);
- const struct page *first_page = scrub_stripe_get_page(stripe, sector_nr);
- const unsigned int first_off = scrub_stripe_get_page_offset(stripe, sector_nr);
+ void *first_kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
+ struct btrfs_header *header = first_kaddr;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
u8 on_disk_csum[BTRFS_CSUM_SIZE];
u8 calculated_csum[BTRFS_CSUM_SIZE];
- struct btrfs_header *header;
/*
* Here we don't have a good way to attach the pages (and subpages)
* to a dummy extent buffer, thus we have to directly grab the members
* from pages.
*/
- header = (struct btrfs_header *)(page_address(first_page) + first_off);
memcpy(on_disk_csum, header->csum, fs_info->csum_size);
if (logical != btrfs_stack_header_bytenr(header)) {
- bitmap_set(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad bytenr, has %llu want %llu",
logical, stripe->mirror_num,
@@ -626,8 +725,8 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
}
if (memcmp(header->fsid, fs_info->fs_devices->metadata_uuid,
BTRFS_FSID_SIZE) != 0) {
- bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad fsid, has %pU want %pU",
logical, stripe->mirror_num,
@@ -636,8 +735,8 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
}
if (memcmp(header->chunk_tree_uuid, fs_info->chunk_tree_uuid,
BTRFS_UUID_SIZE) != 0) {
- bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU",
logical, stripe->mirror_num,
@@ -648,21 +747,18 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
/* Now check tree block csum. */
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
- crypto_shash_update(shash, page_address(first_page) + first_off +
- BTRFS_CSUM_SIZE, fs_info->sectorsize - BTRFS_CSUM_SIZE);
+ crypto_shash_update(shash, first_kaddr + BTRFS_CSUM_SIZE,
+ fs_info->sectorsize - BTRFS_CSUM_SIZE);
for (int i = sector_nr + 1; i < sector_nr + sectors_per_tree; i++) {
- struct page *page = scrub_stripe_get_page(stripe, i);
- unsigned int page_off = scrub_stripe_get_page_offset(stripe, i);
-
- crypto_shash_update(shash, page_address(page) + page_off,
+ crypto_shash_update(shash, scrub_stripe_get_kaddr(stripe, i),
fs_info->sectorsize);
}
crypto_shash_final(shash, calculated_csum);
if (memcmp(calculated_csum, on_disk_csum, fs_info->csum_size) != 0) {
- bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT,
logical, stripe->mirror_num,
@@ -672,8 +768,8 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
}
if (stripe->sectors[sector_nr].generation !=
btrfs_stack_header_generation(header)) {
- bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_meta_gen_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad generation, has %llu want %llu",
logical, stripe->mirror_num,
@@ -681,9 +777,10 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
stripe->sectors[sector_nr].generation);
return;
}
- bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree);
- bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_clear_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_clear_csum_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_clear_meta_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_clear_meta_gen_error(stripe, sector_nr, sectors_per_tree);
}
static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
@@ -691,23 +788,22 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct scrub_sector_verification *sector = &stripe->sectors[sector_nr];
const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits;
- struct page *page = scrub_stripe_get_page(stripe, sector_nr);
- unsigned int pgoff = scrub_stripe_get_page_offset(stripe, sector_nr);
+ void *kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
u8 csum_buf[BTRFS_CSUM_SIZE];
int ret;
ASSERT(sector_nr >= 0 && sector_nr < stripe->nr_sectors);
/* Sector not utilized, skip it. */
- if (!test_bit(sector_nr, &stripe->extent_sector_bitmap))
+ if (!scrub_bitmap_test_bit_has_extent(stripe, sector_nr))
return;
/* IO error, no need to check. */
- if (test_bit(sector_nr, &stripe->io_error_bitmap))
+ if (scrub_bitmap_test_bit_io_error(stripe, sector_nr))
return;
/* Metadata, verify the full tree block. */
- if (sector->is_metadata) {
+ if (scrub_bitmap_test_bit_is_metadata(stripe, sector_nr)) {
/*
* Check if the tree block crosses the stripe boundary. If
* crossed the boundary, we cannot verify it but only give a
@@ -733,17 +829,17 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
* cases without csum, we have no other choice but to trust it.
*/
if (!sector->csum) {
- clear_bit(sector_nr, &stripe->error_bitmap);
+ scrub_bitmap_clear_bit_error(stripe, sector_nr);
return;
}
- ret = btrfs_check_sector_csum(fs_info, page, pgoff, csum_buf, sector->csum);
+ ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, sector->csum);
if (ret < 0) {
- set_bit(sector_nr, &stripe->csum_error_bitmap);
- set_bit(sector_nr, &stripe->error_bitmap);
+ scrub_bitmap_set_bit_csum_error(stripe, sector_nr);
+ scrub_bitmap_set_bit_error(stripe, sector_nr);
} else {
- clear_bit(sector_nr, &stripe->csum_error_bitmap);
- clear_bit(sector_nr, &stripe->error_bitmap);
+ scrub_bitmap_clear_bit_csum_error(stripe, sector_nr);
+ scrub_bitmap_clear_bit_error(stripe, sector_nr);
}
}
@@ -756,7 +852,7 @@ static void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long b
for_each_set_bit(sector_nr, &bitmap, stripe->nr_sectors) {
scrub_verify_one_sector(stripe, sector_nr);
- if (stripe->sectors[sector_nr].is_metadata)
+ if (scrub_bitmap_test_bit_is_metadata(stripe, sector_nr))
sector_nr += sectors_per_tree - 1;
}
}
@@ -766,8 +862,7 @@ static int calc_sector_number(struct scrub_stripe *stripe, struct bio_vec *first
int i;
for (i = 0; i < stripe->nr_sectors; i++) {
- if (scrub_stripe_get_page(stripe, i) == first_bvec->bv_page &&
- scrub_stripe_get_page_offset(stripe, i) == first_bvec->bv_offset)
+ if (scrub_stripe_get_kaddr(stripe, i) == bvec_virt(first_bvec))
break;
}
ASSERT(i < stripe->nr_sectors);
@@ -795,13 +890,13 @@ static void scrub_repair_read_endio(struct btrfs_bio *bbio)
bio_size += bvec->bv_len;
if (bbio->bio.bi_status) {
- bitmap_set(&stripe->io_error_bitmap, sector_nr,
- bio_size >> fs_info->sectorsize_bits);
- bitmap_set(&stripe->error_bitmap, sector_nr,
- bio_size >> fs_info->sectorsize_bits);
+ scrub_bitmap_set_io_error(stripe, sector_nr,
+ bio_size >> fs_info->sectorsize_bits);
+ scrub_bitmap_set_error(stripe, sector_nr,
+ bio_size >> fs_info->sectorsize_bits);
} else {
- bitmap_clear(&stripe->io_error_bitmap, sector_nr,
- bio_size >> fs_info->sectorsize_bits);
+ scrub_bitmap_clear_io_error(stripe, sector_nr,
+ bio_size >> fs_info->sectorsize_bits);
}
bio_put(&bbio->bio);
if (atomic_dec_and_test(&stripe->pending_io))
@@ -814,27 +909,39 @@ static int calc_next_mirror(int mirror, int num_copies)
return (mirror + 1 > num_copies) ? 1 : mirror + 1;
}
+static void scrub_bio_add_sector(struct btrfs_bio *bbio, struct scrub_stripe *stripe,
+ int sector_nr)
+{
+ void *kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
+ int ret;
+
+ ret = bio_add_page(&bbio->bio, virt_to_page(kaddr), bbio->fs_info->sectorsize,
+ offset_in_page(kaddr));
+ /*
+ * Caller should ensure the bbio has enough size.
+ * And we cannot use __bio_add_page(), which doesn't do any merge.
+ *
+ * Meanwhile for scrub_submit_initial_read() we fully rely on the merge
+ * to create the minimal amount of bio vectors, for fs block size < page
+ * size cases.
+ */
+ ASSERT(ret == bbio->fs_info->sectorsize);
+}
+
static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
int mirror, int blocksize, bool wait)
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct btrfs_bio *bbio = NULL;
- const unsigned long old_error_bitmap = stripe->error_bitmap;
+ const unsigned long old_error_bitmap = scrub_bitmap_read_error(stripe);
int i;
ASSERT(stripe->mirror_num >= 1);
ASSERT(atomic_read(&stripe->pending_io) == 0);
for_each_set_bit(i, &old_error_bitmap, stripe->nr_sectors) {
- struct page *page;
- int pgoff;
- int ret;
-
- page = scrub_stripe_get_page(stripe, i);
- pgoff = scrub_stripe_get_page_offset(stripe, i);
-
/* The current sector cannot be merged, submit the bio. */
- if (bbio && ((i > 0 && !test_bit(i - 1, &stripe->error_bitmap)) ||
+ if (bbio && ((i > 0 && !test_bit(i - 1, &old_error_bitmap)) ||
bbio->bio.bi_iter.bi_size >= blocksize)) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
@@ -851,8 +958,7 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
(i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT;
}
- ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
- ASSERT(ret == fs_info->sectorsize);
+ scrub_bio_add_sector(bbio, stripe, i);
}
if (bbio) {
ASSERT(bbio->bio.bi_iter.bi_size);
@@ -864,12 +970,15 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
}
static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
- struct scrub_stripe *stripe)
+ struct scrub_stripe *stripe,
+ const struct scrub_error_records *errors)
{
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_device *dev = NULL;
+ const unsigned long extent_bitmap = scrub_bitmap_read_has_extent(stripe);
+ const unsigned long error_bitmap = scrub_bitmap_read_error(stripe);
u64 physical = 0;
int nr_data_sectors = 0;
int nr_meta_sectors = 0;
@@ -886,7 +995,7 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
* Although our scrub_stripe infrastructure is mostly based on btrfs_submit_bio()
* thus no need for dev/physical, error reporting still needs dev and physical.
*/
- if (!bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) {
+ if (!bitmap_empty(&errors->init_error_bitmap, stripe->nr_sectors)) {
u64 mapped_len = fs_info->sectorsize;
struct btrfs_io_context *bioc = NULL;
int stripe_index = stripe->mirror_num - 1;
@@ -909,10 +1018,10 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
}
skip:
- for_each_set_bit(sector_nr, &stripe->extent_sector_bitmap, stripe->nr_sectors) {
+ for_each_set_bit(sector_nr, &extent_bitmap, stripe->nr_sectors) {
bool repaired = false;
- if (stripe->sectors[sector_nr].is_metadata) {
+ if (scrub_bitmap_test_bit_is_metadata(stripe, sector_nr)) {
nr_meta_sectors++;
} else {
nr_data_sectors++;
@@ -920,14 +1029,14 @@ skip:
nr_nodatacsum_sectors++;
}
- if (test_bit(sector_nr, &stripe->init_error_bitmap) &&
- !test_bit(sector_nr, &stripe->error_bitmap)) {
+ if (test_bit(sector_nr, &errors->init_error_bitmap) &&
+ !test_bit(sector_nr, &error_bitmap)) {
nr_repaired_sectors++;
repaired = true;
}
/* Good sector from the beginning, nothing need to be done. */
- if (!test_bit(sector_nr, &stripe->init_error_bitmap))
+ if (!test_bit(sector_nr, &errors->init_error_bitmap))
continue;
/*
@@ -960,31 +1069,46 @@ skip:
stripe->logical, stripe->mirror_num);
}
- if (test_bit(sector_nr, &stripe->io_error_bitmap))
+ if (scrub_bitmap_test_bit_io_error(stripe, sector_nr))
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("i/o error", dev, false,
stripe->logical, physical);
- if (test_bit(sector_nr, &stripe->csum_error_bitmap))
+ if (scrub_bitmap_test_bit_csum_error(stripe, sector_nr))
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("checksum error", dev, false,
stripe->logical, physical);
- if (test_bit(sector_nr, &stripe->meta_error_bitmap))
+ if (scrub_bitmap_test_bit_meta_error(stripe, sector_nr))
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("header error", dev, false,
stripe->logical, physical);
+ if (scrub_bitmap_test_bit_meta_gen_error(stripe, sector_nr))
+ if (__ratelimit(&rs) && dev)
+ scrub_print_common_warning("generation error", dev, false,
+ stripe->logical, physical);
}
+ /* Update the device stats. */
+ for (int i = 0; i < errors->nr_io_errors; i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_READ_ERRS);
+ for (int i = 0; i < errors->nr_csum_errors; i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
+ /* Generation mismatch error is based on each metadata, not each block. */
+ for (int i = 0; i < errors->nr_meta_gen_errors;
+ i += (fs_info->nodesize >> fs_info->sectorsize_bits))
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_GENERATION_ERRS);
+
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed += stripe->nr_data_extents;
sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents;
sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits;
sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits;
sctx->stat.no_csum += nr_nodatacsum_sectors;
- sctx->stat.read_errors += stripe->init_nr_io_errors;
- sctx->stat.csum_errors += stripe->init_nr_csum_errors;
- sctx->stat.verify_errors += stripe->init_nr_meta_errors;
+ sctx->stat.read_errors += errors->nr_io_errors;
+ sctx->stat.csum_errors += errors->nr_csum_errors;
+ sctx->stat.verify_errors += errors->nr_meta_errors +
+ errors->nr_meta_gen_errors;
sctx->stat.uncorrectable_errors +=
- bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
+ bitmap_weight(&error_bitmap, stripe->nr_sectors);
sctx->stat.corrected_errors += nr_repaired_sectors;
spin_unlock(&sctx->stat_lock);
}
@@ -1010,26 +1134,26 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, work);
struct scrub_ctx *sctx = stripe->sctx;
struct btrfs_fs_info *fs_info = sctx->fs_info;
+ struct scrub_error_records errors = { 0 };
int num_copies = btrfs_num_copies(fs_info, stripe->bg->start,
stripe->bg->length);
unsigned long repaired;
+ unsigned long error;
int mirror;
int i;
ASSERT(stripe->mirror_num > 0);
wait_scrub_stripe_io(stripe);
- scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap);
+ scrub_verify_one_stripe(stripe, scrub_bitmap_read_has_extent(stripe));
/* Save the initial failed bitmap for later repair and report usage. */
- stripe->init_error_bitmap = stripe->error_bitmap;
- stripe->init_nr_io_errors = bitmap_weight(&stripe->io_error_bitmap,
- stripe->nr_sectors);
- stripe->init_nr_csum_errors = bitmap_weight(&stripe->csum_error_bitmap,
- stripe->nr_sectors);
- stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap,
- stripe->nr_sectors);
-
- if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
+ errors.init_error_bitmap = scrub_bitmap_read_error(stripe);
+ errors.nr_io_errors = scrub_bitmap_weight_io_error(stripe);
+ errors.nr_csum_errors = scrub_bitmap_weight_csum_error(stripe);
+ errors.nr_meta_errors = scrub_bitmap_weight_meta_error(stripe);
+ errors.nr_meta_gen_errors = scrub_bitmap_weight_meta_gen_error(stripe);
+
+ if (bitmap_empty(&errors.init_error_bitmap, stripe->nr_sectors))
goto out;
/*
@@ -1041,13 +1165,13 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
for (mirror = calc_next_mirror(stripe->mirror_num, num_copies);
mirror != stripe->mirror_num;
mirror = calc_next_mirror(mirror, num_copies)) {
- const unsigned long old_error_bitmap = stripe->error_bitmap;
+ const unsigned long old_error_bitmap = scrub_bitmap_read_error(stripe);
scrub_stripe_submit_repair_read(stripe, mirror,
BTRFS_STRIPE_LEN, false);
wait_scrub_stripe_io(stripe);
scrub_verify_one_stripe(stripe, old_error_bitmap);
- if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+ if (scrub_bitmap_empty_error(stripe))
goto out;
}
@@ -1065,21 +1189,22 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
for (i = 0, mirror = stripe->mirror_num;
i < num_copies;
i++, mirror = calc_next_mirror(mirror, num_copies)) {
- const unsigned long old_error_bitmap = stripe->error_bitmap;
+ const unsigned long old_error_bitmap = scrub_bitmap_read_error(stripe);
scrub_stripe_submit_repair_read(stripe, mirror,
fs_info->sectorsize, true);
wait_scrub_stripe_io(stripe);
scrub_verify_one_stripe(stripe, old_error_bitmap);
- if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+ if (scrub_bitmap_empty_error(stripe))
goto out;
}
out:
+ error = scrub_bitmap_read_error(stripe);
/*
* Submit the repaired sectors. For zoned case, we cannot do repair
* in-place, but queue the bg to be relocated.
*/
- bitmap_andnot(&repaired, &stripe->init_error_bitmap, &stripe->error_bitmap,
+ bitmap_andnot(&repaired, &errors.init_error_bitmap, &error,
stripe->nr_sectors);
if (!sctx->readonly && !bitmap_empty(&repaired, stripe->nr_sectors)) {
if (btrfs_is_zoned(fs_info)) {
@@ -1090,7 +1215,7 @@ out:
}
}
- scrub_stripe_report_errors(sctx, stripe);
+ scrub_stripe_report_errors(sctx, stripe, &errors);
set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state);
wake_up(&stripe->repair_wait);
}
@@ -1110,10 +1235,10 @@ static void scrub_read_endio(struct btrfs_bio *bbio)
num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits;
if (bbio->bio.bi_status) {
- bitmap_set(&stripe->io_error_bitmap, sector_nr, num_sectors);
- bitmap_set(&stripe->error_bitmap, sector_nr, num_sectors);
+ scrub_bitmap_set_io_error(stripe, sector_nr, num_sectors);
+ scrub_bitmap_set_error(stripe, sector_nr, num_sectors);
} else {
- bitmap_clear(&stripe->io_error_bitmap, sector_nr, num_sectors);
+ scrub_bitmap_clear_io_error(stripe, sector_nr, num_sectors);
}
bio_put(&bbio->bio);
if (atomic_dec_and_test(&stripe->pending_io)) {
@@ -1142,6 +1267,9 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
bitmap_set(&stripe->write_error_bitmap, sector_nr,
bio_size >> fs_info->sectorsize_bits);
spin_unlock_irqrestore(&stripe->write_error_lock, flags);
+ for (int i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev,
+ BTRFS_DEV_STAT_WRITE_ERRS);
}
bio_put(&bbio->bio);
@@ -1199,12 +1327,8 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
int sector_nr;
for_each_set_bit(sector_nr, &write_bitmap, stripe->nr_sectors) {
- struct page *page = scrub_stripe_get_page(stripe, sector_nr);
- unsigned int pgoff = scrub_stripe_get_page_offset(stripe, sector_nr);
- int ret;
-
/* We should only writeback sectors covered by an extent. */
- ASSERT(test_bit(sector_nr, &stripe->extent_sector_bitmap));
+ ASSERT(scrub_bitmap_test_bit_has_extent(stripe, sector_nr));
/* Cannot merge with previous sector, submit the current one. */
if (bbio && sector_nr && !test_bit(sector_nr - 1, &write_bitmap)) {
@@ -1218,8 +1342,7 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
(sector_nr << fs_info->sectorsize_bits)) >>
SECTOR_SHIFT;
}
- ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
- ASSERT(ret == fs_info->sectorsize);
+ scrub_bio_add_sector(bbio, stripe, sector_nr);
}
if (bbio)
scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
@@ -1493,9 +1616,9 @@ static void fill_one_extent_info(struct btrfs_fs_info *fs_info,
struct scrub_sector_verification *sector =
&stripe->sectors[nr_sector];
- set_bit(nr_sector, &stripe->extent_sector_bitmap);
+ scrub_bitmap_set_bit_has_extent(stripe, nr_sector);
if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- sector->is_metadata = true;
+ scrub_bitmap_set_bit_is_metadata(stripe, nr_sector);
sector->generation = extent_gen;
}
}
@@ -1503,15 +1626,8 @@ static void fill_one_extent_info(struct btrfs_fs_info *fs_info,
static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
{
- stripe->extent_sector_bitmap = 0;
- stripe->init_error_bitmap = 0;
- stripe->init_nr_io_errors = 0;
- stripe->init_nr_csum_errors = 0;
- stripe->init_nr_meta_errors = 0;
- stripe->error_bitmap = 0;
- stripe->io_error_bitmap = 0;
- stripe->csum_error_bitmap = 0;
- stripe->meta_error_bitmap = 0;
+ ASSERT(stripe->nr_sectors);
+ bitmap_zero(stripe->bitmaps, scrub_bitmap_nr_last * stripe->nr_sectors);
}
/*
@@ -1646,7 +1762,6 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe)
stripe->state = 0;
for (int i = 0; i < stripe->nr_sectors; i++) {
- stripe->sectors[i].is_metadata = false;
stripe->sectors[i].csum = NULL;
stripe->sectors[i].generation = 0;
}
@@ -1665,24 +1780,21 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct btrfs_bio *bbio = NULL;
unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits;
+ const unsigned long has_extent = scrub_bitmap_read_has_extent(stripe);
u64 stripe_len = BTRFS_STRIPE_LEN;
int mirror = stripe->mirror_num;
int i;
atomic_inc(&stripe->pending_io);
- for_each_set_bit(i, &stripe->extent_sector_bitmap, stripe->nr_sectors) {
- struct page *page = scrub_stripe_get_page(stripe, i);
- unsigned int pgoff = scrub_stripe_get_page_offset(stripe, i);
-
+ for_each_set_bit(i, &has_extent, stripe->nr_sectors) {
/* We're beyond the chunk boundary, no need to read anymore. */
if (i >= nr_sectors)
break;
/* The current sector cannot be merged, submit the bio. */
if (bbio &&
- ((i > 0 &&
- !test_bit(i - 1, &stripe->extent_sector_bitmap)) ||
+ ((i > 0 && !test_bit(i - 1, &has_extent)) ||
bbio->bio.bi_iter.bi_size >= stripe_len)) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
@@ -1716,8 +1828,8 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
* the extent tree, then it's a preallocated
* extent and not an error.
*/
- set_bit(i, &stripe->io_error_bitmap);
- set_bit(i, &stripe->error_bitmap);
+ scrub_bitmap_set_bit_io_error(stripe, i);
+ scrub_bitmap_set_bit_error(stripe, i);
}
continue;
}
@@ -1727,7 +1839,7 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
}
- __bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
+ scrub_bio_add_sector(bbio, stripe, i);
}
if (bbio) {
@@ -1765,15 +1877,8 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT;
/* Read the whole range inside the chunk boundary. */
- for (unsigned int cur = 0; cur < nr_sectors; cur++) {
- struct page *page = scrub_stripe_get_page(stripe, cur);
- unsigned int pgoff = scrub_stripe_get_page_offset(stripe, cur);
- int ret;
-
- ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
- /* We should have allocated enough bio vectors. */
- ASSERT(ret == fs_info->sectorsize);
- }
+ for (unsigned int cur = 0; cur < nr_sectors; cur++)
+ scrub_bio_add_sector(bbio, stripe, cur);
atomic_inc(&stripe->pending_io);
/*
@@ -1794,10 +1899,11 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
static bool stripe_has_metadata_error(struct scrub_stripe *stripe)
{
+ const unsigned long error = scrub_bitmap_read_error(stripe);
int i;
- for_each_set_bit(i, &stripe->error_bitmap, stripe->nr_sectors) {
- if (stripe->sectors[i].is_metadata) {
+ for_each_set_bit(i, &error, stripe->nr_sectors) {
+ if (scrub_bitmap_test_bit_is_metadata(stripe, i)) {
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
btrfs_err(fs_info,
@@ -1872,13 +1978,16 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
}
for (int i = 0; i < nr_stripes; i++) {
unsigned long good;
+ unsigned long has_extent;
+ unsigned long error;
stripe = &sctx->stripes[i];
ASSERT(stripe->dev == fs_info->dev_replace.srcdev);
- bitmap_andnot(&good, &stripe->extent_sector_bitmap,
- &stripe->error_bitmap, stripe->nr_sectors);
+ has_extent = scrub_bitmap_read_has_extent(stripe);
+ error = scrub_bitmap_read_error(stripe);
+ bitmap_andnot(&good, &has_extent, &error, stripe->nr_sectors);
scrub_write_sectors(sctx, stripe, good, true);
}
}
@@ -2012,7 +2121,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
/* Check if all data stripes are empty. */
for (int i = 0; i < data_stripes; i++) {
stripe = &sctx->raid56_data_stripes[i];
- if (!bitmap_empty(&stripe->extent_sector_bitmap, stripe->nr_sectors)) {
+ if (!scrub_bitmap_empty_has_extent(stripe)) {
all_empty = false;
break;
}
@@ -2044,15 +2153,18 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
*/
for (int i = 0; i < data_stripes; i++) {
unsigned long error;
+ unsigned long has_extent;
stripe = &sctx->raid56_data_stripes[i];
+ error = scrub_bitmap_read_error(stripe);
+ has_extent = scrub_bitmap_read_has_extent(stripe);
+
/*
* We should only check the errors where there is an extent.
* As we may hit an empty data stripe while it's missing.
*/
- bitmap_and(&error, &stripe->error_bitmap,
- &stripe->extent_sector_bitmap, stripe->nr_sectors);
+ bitmap_and(&error, &error, &has_extent, stripe->nr_sectors);
if (!bitmap_empty(&error, stripe->nr_sectors)) {
btrfs_err(fs_info,
"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
@@ -2061,8 +2173,8 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
ret = -EIO;
goto out;
}
- bitmap_or(&extent_bitmap, &extent_bitmap,
- &stripe->extent_sector_bitmap, stripe->nr_sectors);
+ bitmap_or(&extent_bitmap, &extent_bitmap, &has_extent,
+ stripe->nr_sectors);
}
/* Now we can check and regenerate the P/Q stripe. */
@@ -2770,17 +2882,11 @@ static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev,
struct page *page, u64 physical, u64 generation)
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
- struct bio_vec bvec;
- struct bio bio;
struct btrfs_super_block *sb = page_address(page);
int ret;
- bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_READ);
- bio.bi_iter.bi_sector = physical >> SECTOR_SHIFT;
- __bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE, 0);
- ret = submit_bio_wait(&bio);
- bio_uninit(&bio);
-
+ ret = bdev_rw_virt(dev->bdev, physical >> SECTOR_SHIFT, sb,
+ BTRFS_SUPER_INFO_SIZE, REQ_OP_READ);
if (ret < 0)
return ret;
ret = btrfs_check_super_csum(fs_info, sb);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 0c8c58c4f29b..2891ec4056c6 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -383,11 +383,11 @@ static void inconsistent_snapshot_error(struct send_ctx *sctx,
result_string = "updated";
break;
case BTRFS_COMPARE_TREE_SAME:
- ASSERT(0);
+ DEBUG_WARN("no change between trees");
result_string = "unchanged";
break;
default:
- ASSERT(0);
+ DEBUG_WARN("unexpected comparison result %d", result);
result_string = "unexpected";
}
@@ -816,11 +816,8 @@ static int send_cmd(struct send_ctx *sctx)
static int send_rename(struct send_ctx *sctx,
struct fs_path *from, struct fs_path *to)
{
- struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret;
- btrfs_debug(fs_info, "send_rename %s -> %s", from->start, to->start);
-
ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
if (ret < 0)
return ret;
@@ -840,11 +837,8 @@ tlv_put_failure:
static int send_link(struct send_ctx *sctx,
struct fs_path *path, struct fs_path *lnk)
{
- struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret;
- btrfs_debug(fs_info, "send_link %s -> %s", path->start, lnk->start);
-
ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
if (ret < 0)
return ret;
@@ -863,11 +857,8 @@ tlv_put_failure:
*/
static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
{
- struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret;
- btrfs_debug(fs_info, "send_unlink %s", path->start);
-
ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
if (ret < 0)
return ret;
@@ -885,11 +876,8 @@ tlv_put_failure:
*/
static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
{
- struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret;
- btrfs_debug(fs_info, "send_rmdir %s", path->start);
-
ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
if (ret < 0)
return ret;
@@ -1573,7 +1561,6 @@ static int find_extent_clone(struct send_ctx *sctx,
struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret;
int extent_type;
- u64 logical;
u64 disk_byte;
u64 num_bytes;
struct btrfs_file_extent_item *fi;
@@ -1604,7 +1591,6 @@ static int find_extent_clone(struct send_ctx *sctx,
compressed = btrfs_file_extent_compression(eb, fi);
num_bytes = btrfs_file_extent_num_bytes(eb, fi);
- logical = disk_byte + btrfs_file_extent_offset(eb, fi);
/*
* Setup the clone roots.
@@ -1686,14 +1672,8 @@ static int find_extent_clone(struct send_ctx *sctx,
}
up_read(&fs_info->commit_root_sem);
- btrfs_debug(fs_info,
- "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
- data_offset, ino, num_bytes, logical);
-
- if (!backref_ctx.found) {
- btrfs_debug(fs_info, "no clones found");
+ if (!backref_ctx.found)
return -ENOENT;
- }
cur_clone_root = NULL;
for (i = 0; i < sctx->clone_roots_cnt; i++) {
@@ -2631,12 +2611,9 @@ static void free_path_for_command(const struct send_ctx *sctx, struct fs_path *p
static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
{
- struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret = 0;
struct fs_path *p;
- btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size);
-
p = get_path_for_command(sctx, ino, gen);
if (IS_ERR(p))
return PTR_ERR(p);
@@ -2658,12 +2635,9 @@ out:
static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
{
- struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret = 0;
struct fs_path *p;
- btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode);
-
p = get_path_for_command(sctx, ino, gen);
if (IS_ERR(p))
return PTR_ERR(p);
@@ -2685,15 +2659,12 @@ out:
static int send_fileattr(struct send_ctx *sctx, u64 ino, u64 gen, u64 fileattr)
{
- struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret = 0;
struct fs_path *p;
if (sctx->proto < 2)
return 0;
- btrfs_debug(fs_info, "send_fileattr %llu fileattr=%llu", ino, fileattr);
-
p = get_path_for_command(sctx, ino, gen);
if (IS_ERR(p))
return PTR_ERR(p);
@@ -2715,13 +2686,9 @@ out:
static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
{
- struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret = 0;
struct fs_path *p;
- btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu",
- ino, uid, gid);
-
p = get_path_for_command(sctx, ino, gen);
if (IS_ERR(p))
return PTR_ERR(p);
@@ -2744,7 +2711,6 @@ out:
static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
{
- struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret = 0;
struct fs_path *p = NULL;
struct btrfs_inode_item *ii;
@@ -2753,8 +2719,6 @@ static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
struct btrfs_key key;
int slot;
- btrfs_debug(fs_info, "send_utimes %llu", ino);
-
p = get_path_for_command(sctx, ino, gen);
if (IS_ERR(p))
return PTR_ERR(p);
@@ -2861,7 +2825,6 @@ static int trim_dir_utimes_cache(struct send_ctx *sctx)
*/
static int send_create_inode(struct send_ctx *sctx, u64 ino)
{
- struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret = 0;
struct fs_path *p;
int cmd;
@@ -2870,8 +2833,6 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino)
u64 mode;
u64 rdev;
- btrfs_debug(fs_info, "send_create_inode %llu", ino);
-
p = fs_path_alloc();
if (!p)
return -ENOMEM;
@@ -3098,7 +3059,7 @@ static void __free_recorded_refs(struct list_head *head)
struct recorded_ref *cur;
while (!list_empty(head)) {
- cur = list_entry(head->next, struct recorded_ref, list);
+ cur = list_first_entry(head, struct recorded_ref, list);
recorded_ref_free(cur);
}
}
@@ -4224,8 +4185,6 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
bool orphanized_dir = false;
bool orphanized_ancestor = false;
- btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino);
-
/*
* This should never happen as the root dir always has the same ref
* which is always '..'
@@ -4560,8 +4519,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
/*
* We have a moved dir. Add the old parent to check_dirs
*/
- cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
- list);
+ cur = list_first_entry(&sctx->deleted_refs, struct recorded_ref, list);
ret = dup_ref(cur, &check_dirs);
if (ret < 0)
goto out;
@@ -5263,10 +5221,9 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
{
struct btrfs_root *root = sctx->send_root;
struct btrfs_fs_info *fs_info = root->fs_info;
- struct folio *folio;
- pgoff_t index = offset >> PAGE_SHIFT;
- pgoff_t last_index;
- unsigned pg_offset = offset_in_page(offset);
+ u64 cur = offset;
+ const u64 end = offset + len;
+ const pgoff_t last_index = ((end - 1) >> PAGE_SHIFT);
struct address_space *mapping = sctx->cur_inode->i_mapping;
int ret;
@@ -5274,13 +5231,12 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
if (ret)
return ret;
- last_index = (offset + len - 1) >> PAGE_SHIFT;
+ while (cur < end) {
+ pgoff_t index = (cur >> PAGE_SHIFT);
+ unsigned int cur_len;
+ unsigned int pg_offset;
+ struct folio *folio;
- while (index <= last_index) {
- unsigned cur_len = min_t(unsigned, len,
- PAGE_SIZE - pg_offset);
-
-again:
folio = filemap_lock_folio(mapping, index);
if (IS_ERR(folio)) {
page_cache_sync_readahead(mapping,
@@ -5293,8 +5249,8 @@ again:
break;
}
}
-
- WARN_ON(folio_order(folio));
+ pg_offset = offset_in_folio(folio, cur);
+ cur_len = min_t(unsigned int, end - cur, folio_size(folio) - pg_offset);
if (folio_test_readahead(folio))
page_cache_async_readahead(mapping, &sctx->ra, NULL, folio,
@@ -5316,7 +5272,7 @@ again:
if (folio->mapping != mapping) {
folio_unlock(folio);
folio_put(folio);
- goto again;
+ continue;
}
}
@@ -5324,9 +5280,7 @@ again:
pg_offset, cur_len);
folio_unlock(folio);
folio_put(folio);
- index++;
- pg_offset = 0;
- len -= cur_len;
+ cur += cur_len;
sctx->send_size += cur_len;
}
@@ -5339,12 +5293,9 @@ again:
*/
static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
{
- struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret = 0;
struct fs_path *p;
- btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
-
p = get_cur_inode_path(sctx);
if (IS_ERR(p))
return PTR_ERR(p);
@@ -5377,11 +5328,6 @@ static int send_clone(struct send_ctx *sctx,
struct fs_path *cur_inode_path;
u64 gen;
- btrfs_debug(sctx->send_root->fs_info,
- "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu",
- offset, len, btrfs_root_id(clone_root->root),
- clone_root->ino, clone_root->offset);
-
cur_inode_path = get_cur_inode_path(sctx);
if (IS_ERR(cur_inode_path))
return PTR_ERR(cur_inode_path);
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index ff089e3e4103..d9087aa81b21 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -50,11 +50,11 @@
* num_bytes we want to reserve.
*
* ->reserve
- * space_info->bytes_may_reserve += num_bytes
+ * space_info->bytes_may_use += num_bytes
*
* ->extent allocation
* Call btrfs_add_reserved_bytes() which does
- * space_info->bytes_may_reserve -= num_bytes
+ * space_info->bytes_may_use -= num_bytes
* space_info->bytes_reserved += extent_bytes
*
* ->insert reference
@@ -234,19 +234,11 @@ void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
WRITE_ONCE(space_info->chunk_size, chunk_size);
}
-static int create_space_info(struct btrfs_fs_info *info, u64 flags)
+static void init_space_info(struct btrfs_fs_info *info,
+ struct btrfs_space_info *space_info, u64 flags)
{
-
- struct btrfs_space_info *space_info;
- int i;
- int ret;
-
- space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
- if (!space_info)
- return -ENOMEM;
-
space_info->fs_info = info;
- for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+ for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++)
INIT_LIST_HEAD(&space_info->block_groups[i]);
init_rwsem(&space_info->groups_sem);
spin_lock_init(&space_info->lock);
@@ -257,9 +249,64 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
INIT_LIST_HEAD(&space_info->priority_tickets);
space_info->clamp = 1;
btrfs_update_space_info_chunk_size(space_info, calc_chunk_size(info, flags));
+ space_info->subgroup_id = BTRFS_SUB_GROUP_PRIMARY;
if (btrfs_is_zoned(info))
space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
+}
+
+static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flags,
+ enum btrfs_space_info_sub_group id, int index)
+{
+ struct btrfs_fs_info *fs_info = parent->fs_info;
+ struct btrfs_space_info *sub_group;
+ int ret;
+
+ ASSERT(parent->subgroup_id == BTRFS_SUB_GROUP_PRIMARY);
+ ASSERT(id != BTRFS_SUB_GROUP_PRIMARY);
+
+ sub_group = kzalloc(sizeof(*sub_group), GFP_NOFS);
+ if (!sub_group)
+ return -ENOMEM;
+
+ init_space_info(fs_info, sub_group, flags);
+ parent->sub_group[index] = sub_group;
+ sub_group->parent = parent;
+ sub_group->subgroup_id = id;
+
+ ret = btrfs_sysfs_add_space_info_type(fs_info, sub_group);
+ if (ret) {
+ kfree(sub_group);
+ parent->sub_group[index] = NULL;
+ }
+ return ret;
+}
+
+static int create_space_info(struct btrfs_fs_info *info, u64 flags)
+{
+
+ struct btrfs_space_info *space_info;
+ int ret = 0;
+
+ space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
+ if (!space_info)
+ return -ENOMEM;
+
+ init_space_info(info, space_info, flags);
+
+ if (btrfs_is_zoned(info)) {
+ if (flags & BTRFS_BLOCK_GROUP_DATA)
+ ret = create_space_info_sub_group(space_info, flags,
+ BTRFS_SUB_GROUP_DATA_RELOC,
+ 0);
+ else if (flags & BTRFS_BLOCK_GROUP_METADATA)
+ ret = create_space_info_sub_group(space_info, flags,
+ BTRFS_SUB_GROUP_TREELOG,
+ 0);
+
+ if (ret)
+ return ret;
+ }
ret = btrfs_sysfs_add_space_info_type(info, space_info);
if (ret)
@@ -312,31 +359,29 @@ out:
void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
struct btrfs_block_group *block_group)
{
- struct btrfs_space_info *found;
+ struct btrfs_space_info *space_info = block_group->space_info;
int factor, index;
factor = btrfs_bg_type_to_factor(block_group->flags);
- found = btrfs_find_space_info(info, block_group->flags);
- ASSERT(found);
- spin_lock(&found->lock);
- found->total_bytes += block_group->length;
- found->disk_total += block_group->length * factor;
- found->bytes_used += block_group->used;
- found->disk_used += block_group->used * factor;
- found->bytes_readonly += block_group->bytes_super;
- btrfs_space_info_update_bytes_zone_unusable(found, block_group->zone_unusable);
+ spin_lock(&space_info->lock);
+ space_info->total_bytes += block_group->length;
+ space_info->disk_total += block_group->length * factor;
+ space_info->bytes_used += block_group->used;
+ space_info->disk_used += block_group->used * factor;
+ space_info->bytes_readonly += block_group->bytes_super;
+ btrfs_space_info_update_bytes_zone_unusable(space_info, block_group->zone_unusable);
if (block_group->length > 0)
- found->full = 0;
- btrfs_try_granting_tickets(info, found);
- spin_unlock(&found->lock);
+ space_info->full = 0;
+ btrfs_try_granting_tickets(info, space_info);
+ spin_unlock(&space_info->lock);
- block_group->space_info = found;
+ block_group->space_info = space_info;
index = btrfs_bg_flags_to_raid_index(block_group->flags);
- down_write(&found->groups_sem);
- list_add_tail(&block_group->list, &found->block_groups[index]);
- up_write(&found->groups_sem);
+ down_write(&space_info->groups_sem);
+ list_add_tail(&block_group->list, &space_info->block_groups[index]);
+ up_write(&space_info->groups_sem);
}
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
@@ -556,8 +601,9 @@ static void __btrfs_dump_space_info(const struct btrfs_fs_info *fs_info,
lockdep_assert_held(&info->lock);
/* The free space could be negative in case of overcommit */
- btrfs_info(fs_info, "space_info %s has %lld free, is %sfull",
- flag_str,
+ btrfs_info(fs_info,
+ "space_info %s (sub-group id %d) has %lld free, is %sfull",
+ flag_str, info->subgroup_id,
(s64)(info->total_bytes - btrfs_space_info_used(info, true)),
info->full ? "" : "not ");
btrfs_info(fs_info,
@@ -812,7 +858,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
ret = PTR_ERR(trans);
break;
}
- ret = btrfs_chunk_alloc(trans,
+ ret = btrfs_chunk_alloc(trans, space_info,
btrfs_get_alloc_profile(fs_info, space_info->flags),
(state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
CHUNK_ALLOC_FORCE);
@@ -1083,23 +1129,15 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
return (tickets_id != space_info->tickets_id);
}
-/*
- * This is for normal flushers, we can wait all goddamned day if we want to. We
- * will loop and continuously try to flush as long as we are making progress.
- * We count progress as clearing off tickets each time we have to loop.
- */
-static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
+static void do_async_reclaim_metadata_space(struct btrfs_space_info *space_info)
{
- struct btrfs_fs_info *fs_info;
- struct btrfs_space_info *space_info;
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
u64 to_reclaim;
enum btrfs_flush_state flush_state;
int commit_cycles = 0;
u64 last_tickets_id;
enum btrfs_flush_state final_state;
- fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
- space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
if (btrfs_is_zoned(fs_info))
final_state = RESET_ZONES;
else
@@ -1174,6 +1212,25 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
}
/*
+ * This is for normal flushers, it can wait as much time as needed. We will
+ * loop and continuously try to flush as long as we are making progress. We
+ * count progress as clearing off tickets each time we have to loop.
+ */
+static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
+{
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_space_info *space_info;
+
+ fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
+ space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+ do_async_reclaim_metadata_space(space_info);
+ for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++) {
+ if (space_info->sub_group[i])
+ do_async_reclaim_metadata_space(space_info->sub_group[i]);
+ }
+}
+
+/*
* This handles pre-flushing of metadata space before we get to the point that
* we need to start blocking threads on tickets. The logic here is different
* from the other flush paths because it doesn't rely on tickets to tell us how
@@ -1318,16 +1375,12 @@ static const enum btrfs_flush_state data_flush_states[] = {
ALLOC_CHUNK_FORCE,
};
-static void btrfs_async_reclaim_data_space(struct work_struct *work)
+static void do_async_reclaim_data_space(struct btrfs_space_info *space_info)
{
- struct btrfs_fs_info *fs_info;
- struct btrfs_space_info *space_info;
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
u64 last_tickets_id;
enum btrfs_flush_state flush_state = 0;
- fs_info = container_of(work, struct btrfs_fs_info, async_data_reclaim_work);
- space_info = fs_info->data_sinfo;
-
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
space_info->flush = 0;
@@ -1395,6 +1448,19 @@ aborted_fs:
spin_unlock(&space_info->lock);
}
+static void btrfs_async_reclaim_data_space(struct work_struct *work)
+{
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_space_info *space_info;
+
+ fs_info = container_of(work, struct btrfs_fs_info, async_data_reclaim_work);
+ space_info = fs_info->data_sinfo;
+ do_async_reclaim_data_space(space_info);
+ for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++)
+ if (space_info->sub_group[i])
+ do_async_reclaim_data_space(space_info->sub_group[i]);
+}
+
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info)
{
INIT_WORK(&fs_info->async_reclaim_work, btrfs_async_reclaim_metadata_space);
@@ -1836,10 +1902,10 @@ int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
* This will reserve bytes from the data space info. If there is not enough
* space then we will attempt to flush space as specified by flush.
*/
-int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
+int btrfs_reserve_data_bytes(struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush)
{
- struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
int ret;
ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA ||
@@ -1847,12 +1913,12 @@ int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
flush == BTRFS_RESERVE_NO_FLUSH);
ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA);
- ret = __reserve_bytes(fs_info, data_sinfo, bytes, flush);
+ ret = __reserve_bytes(fs_info, space_info, bytes, flush);
if (ret == -ENOSPC) {
trace_btrfs_space_reservation(fs_info, "space_info:enospc",
- data_sinfo->flags, bytes, 1);
+ space_info->flags, bytes, 1);
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- btrfs_dump_space_info(fs_info, data_sinfo, bytes, 0);
+ btrfs_dump_space_info(fs_info, space_info, bytes, 0);
}
return ret;
}
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index a96efdb5e681..92b7f5e2b850 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -98,8 +98,18 @@ enum btrfs_flush_state {
RESET_ZONES = 12,
};
+enum btrfs_space_info_sub_group {
+ BTRFS_SUB_GROUP_PRIMARY,
+ BTRFS_SUB_GROUP_DATA_RELOC,
+ BTRFS_SUB_GROUP_TREELOG,
+};
+
+#define BTRFS_SPACE_INFO_SUB_GROUP_MAX 1
struct btrfs_space_info {
struct btrfs_fs_info *fs_info;
+ struct btrfs_space_info *parent;
+ struct btrfs_space_info *sub_group[BTRFS_SPACE_INFO_SUB_GROUP_MAX];
+ int subgroup_id;
spinlock_t lock;
u64 total_bytes; /* total bytes in the space,
@@ -288,7 +298,7 @@ static inline void btrfs_space_info_free_bytes_may_use(
btrfs_try_granting_tickets(space_info->fs_info, space_info);
spin_unlock(&space_info->lock);
}
-int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
+int btrfs_reserve_data_bytes(struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush);
void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info);
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index c0a0b8b063d0..d4f019233493 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -69,7 +69,8 @@ int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
struct btrfs_subpage *subpage;
/* For metadata we don't support large folio yet. */
- ASSERT(!folio_test_large(folio));
+ if (type == BTRFS_SUBPAGE_METADATA)
+ ASSERT(!folio_test_large(folio));
/*
* We have cases like a dummy extent buffer page, which is not mapped
@@ -181,9 +182,6 @@ void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *
static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- /* For subpage support, the folio must be single page. */
- ASSERT(folio_order(folio) == 0);
-
/* Basic checks */
ASSERT(folio_test_private(folio) && folio_get_private(folio));
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 7121d8c7a318..a0c65adce1ab 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -125,7 +125,6 @@ enum {
/* Rescue options */
Opt_rescue,
Opt_usebackuproot,
- Opt_nologreplay,
/* Debugging options */
Opt_enospc_debug,
@@ -246,8 +245,6 @@ static const struct fs_parameter_spec btrfs_fs_parameters[] = {
/* Rescue options. */
fsparam_enum("rescue", Opt_rescue, btrfs_parameter_rescue),
- /* Deprecated, with alias rescue=nologreplay */
- __fsparam(NULL, "nologreplay", Opt_nologreplay, fs_param_deprecated, NULL),
/* Deprecated, with alias rescue=usebackuproot */
__fsparam(NULL, "usebackuproot", Opt_usebackuproot, fs_param_deprecated, NULL),
/* For compatibility only, alias for "rescue=nologreplay". */
@@ -449,11 +446,6 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
else
btrfs_clear_opt(ctx->mount_opt, NOTREELOG);
break;
- case Opt_nologreplay:
- btrfs_warn(NULL,
- "'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
- btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY);
- break;
case Opt_norecovery:
btrfs_info(NULL,
"'norecovery' is for compatibility only, recommended to use 'rescue=nologreplay'");
@@ -569,6 +561,10 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
break;
case Opt_commit_interval:
ctx->commit_interval = result.uint_32;
+ if (ctx->commit_interval > BTRFS_WARNING_COMMIT_INTERVAL) {
+ btrfs_warn(NULL, "excessive commit interval %u, use with care",
+ ctx->commit_interval);
+ }
if (ctx->commit_interval == 0)
ctx->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
break;
@@ -1148,11 +1144,11 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
/*
* subvolumes are identified by ino 256
*/
-static inline int is_subvolume_inode(struct inode *inode)
+static inline bool is_subvolume_inode(struct inode *inode)
{
if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
- return 1;
- return 0;
+ return true;
+ return false;
}
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
@@ -2292,7 +2288,7 @@ static int check_dev_super(struct btrfs_device *dev)
return 0;
/* Only need to check the primary super block. */
- sb = btrfs_read_dev_one_super(dev->bdev, 0, true);
+ sb = btrfs_read_disk_super(dev->bdev, 0, true);
if (IS_ERR(sb))
return PTR_ERR(sb);
@@ -2525,8 +2521,8 @@ static const struct init_sequence mod_init_seq[] = {
.init_func = btrfs_free_space_init,
.exit_func = btrfs_free_space_exit,
}, {
- .init_func = extent_state_init_cachep,
- .exit_func = extent_state_free_cachep,
+ .init_func = btrfs_extent_state_init_cachep,
+ .exit_func = btrfs_extent_state_free_cachep,
}, {
.init_func = extent_buffer_init_cachep,
.exit_func = extent_buffer_free_cachep,
@@ -2534,8 +2530,8 @@ static const struct init_sequence mod_init_seq[] = {
.init_func = btrfs_bioset_init,
.exit_func = btrfs_bioset_exit,
}, {
- .init_func = extent_map_init,
- .exit_func = extent_map_exit,
+ .init_func = btrfs_extent_map_init,
+ .exit_func = btrfs_extent_map_exit,
#ifdef CONFIG_BTRFS_EXPERIMENTAL
}, {
.init_func = btrfs_read_policy_init,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index b9af74498b0c..5d93d9dd2c12 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1930,16 +1930,35 @@ void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info)
kobject_put(&space_info->kobj);
}
-static const char *alloc_name(u64 flags)
+static const char *alloc_name(struct btrfs_space_info *space_info)
{
+ u64 flags = space_info->flags;
+
switch (flags) {
case BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA:
return "mixed";
case BTRFS_BLOCK_GROUP_METADATA:
- return "metadata";
+ switch (space_info->subgroup_id) {
+ case BTRFS_SUB_GROUP_PRIMARY:
+ return "metadata";
+ case BTRFS_SUB_GROUP_TREELOG:
+ return "metadata-treelog";
+ default:
+ WARN_ON_ONCE(1);
+ return "metadata (unknown sub-group)";
+ }
case BTRFS_BLOCK_GROUP_DATA:
- return "data";
+ switch (space_info->subgroup_id) {
+ case BTRFS_SUB_GROUP_PRIMARY:
+ return "data";
+ case BTRFS_SUB_GROUP_DATA_RELOC:
+ return "data-reloc";
+ default:
+ WARN_ON_ONCE(1);
+ return "data (unknown sub-group)";
+ }
case BTRFS_BLOCK_GROUP_SYSTEM:
+ ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_PRIMARY);
return "system";
default:
WARN_ON(1);
@@ -1958,7 +1977,7 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
fs_info->space_info_kobj, "%s",
- alloc_name(space_info->flags));
+ alloc_name(space_info));
if (ret) {
kobject_put(&space_info->kobj);
return ret;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 5eff8d7d2360..b576897d71cc 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -102,7 +102,7 @@ struct btrfs_device *btrfs_alloc_dummy_device(struct btrfs_fs_info *fs_info)
if (!dev)
return ERR_PTR(-ENOMEM);
- extent_io_tree_init(fs_info, &dev->alloc_state, 0);
+ btrfs_extent_io_tree_init(fs_info, &dev->alloc_state, 0);
INIT_LIST_HEAD(&dev->dev_list);
list_add(&dev->dev_list, &fs_info->fs_devices->devices);
@@ -111,7 +111,7 @@ struct btrfs_device *btrfs_alloc_dummy_device(struct btrfs_fs_info *fs_info)
static void btrfs_free_dummy_device(struct btrfs_device *dev)
{
- extent_io_tree_release(&dev->alloc_state);
+ btrfs_extent_io_tree_release(&dev->alloc_state);
kfree(dev);
}
@@ -157,9 +157,9 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
{
- struct radix_tree_iter iter;
- void **slot;
struct btrfs_device *dev, *tmp;
+ struct extent_buffer *eb;
+ unsigned long index;
if (!fs_info)
return;
@@ -169,25 +169,13 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
test_mnt->mnt_sb->s_fs_info = NULL;
- spin_lock(&fs_info->buffer_lock);
- radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
- struct extent_buffer *eb;
-
- eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock);
- if (!eb)
- continue;
- /* Shouldn't happen but that kind of thinking creates CVE's */
- if (radix_tree_exception(eb)) {
- if (radix_tree_deref_retry(eb))
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- spin_unlock(&fs_info->buffer_lock);
- free_extent_buffer_stale(eb);
- spin_lock(&fs_info->buffer_lock);
+ xa_lock_irq(&fs_info->buffer_tree);
+ xa_for_each(&fs_info->buffer_tree, index, eb) {
+ xa_unlock_irq(&fs_info->buffer_tree);
+ free_extent_buffer(eb);
+ xa_lock_irq(&fs_info->buffer_tree);
}
- spin_unlock(&fs_info->buffer_lock);
+ xa_unlock_irq(&fs_info->buffer_tree);
btrfs_mapping_tree_free(fs_info);
list_for_each_entry_safe(dev, tmp, &fs_info->fs_devices->devices,
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 74aca7180a5a..00da54f0164c 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -14,9 +14,9 @@
#include "../disk-io.h"
#include "../btrfs_inode.h"
-#define PROCESS_UNLOCK (1 << 0)
-#define PROCESS_RELEASE (1 << 1)
-#define PROCESS_TEST_LOCKED (1 << 2)
+#define PROCESS_UNLOCK (1U << 0)
+#define PROCESS_RELEASE (1U << 1)
+#define PROCESS_TEST_LOCKED (1U << 2)
static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
unsigned long flags)
@@ -74,7 +74,6 @@ static void extent_flag_to_str(const struct extent_state *state, char *dest)
dest[0] = 0;
PRINT_ONE_FLAG(state, dest, cur, DIRTY);
- PRINT_ONE_FLAG(state, dest, cur, UPTODATE);
PRINT_ONE_FLAG(state, dest, cur, LOCKED);
PRINT_ONE_FLAG(state, dest, cur, NEW);
PRINT_ONE_FLAG(state, dest, cur, DELALLOC);
@@ -150,7 +149,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
* Passing NULL as we don't have fs_info but tracepoints are not used
* at this point
*/
- extent_io_tree_init(NULL, tmp, IO_TREE_SELFTEST);
+ btrfs_extent_io_tree_init(NULL, tmp, IO_TREE_SELFTEST);
/*
* First go through and create and mark all of our pages dirty, we pin
@@ -177,7 +176,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
* |--- delalloc ---|
* |--- search ---|
*/
- set_extent_bit(tmp, 0, sectorsize - 1, EXTENT_DELALLOC, NULL);
+ btrfs_set_extent_bit(tmp, 0, sectorsize - 1, EXTENT_DELALLOC, NULL);
start = 0;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, page_folio(locked_page), &start,
@@ -191,7 +190,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
sectorsize - 1, start, end);
goto out_bits;
}
- unlock_extent(tmp, start, end, NULL);
+ btrfs_unlock_extent(tmp, start, end, NULL);
unlock_page(locked_page);
put_page(locked_page);
@@ -208,7 +207,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
test_err("couldn't find the locked page");
goto out_bits;
}
- set_extent_bit(tmp, sectorsize, max_bytes - 1, EXTENT_DELALLOC, NULL);
+ btrfs_set_extent_bit(tmp, sectorsize, max_bytes - 1, EXTENT_DELALLOC, NULL);
start = test_start;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, page_folio(locked_page), &start,
@@ -227,7 +226,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
test_err("there were unlocked pages in the range");
goto out_bits;
}
- unlock_extent(tmp, start, end, NULL);
+ btrfs_unlock_extent(tmp, start, end, NULL);
/* locked_page was unlocked above */
put_page(locked_page);
@@ -263,7 +262,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
*
* We are re-using our test_start from above since it works out well.
*/
- set_extent_bit(tmp, max_bytes, total_dirty - 1, EXTENT_DELALLOC, NULL);
+ btrfs_set_extent_bit(tmp, max_bytes, total_dirty - 1, EXTENT_DELALLOC, NULL);
start = test_start;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, page_folio(locked_page), &start,
@@ -282,7 +281,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
test_err("pages in range were not all locked");
goto out_bits;
}
- unlock_extent(tmp, start, end, NULL);
+ btrfs_unlock_extent(tmp, start, end, NULL);
/*
* Now to test where we run into a page that is no longer dirty in the
@@ -327,7 +326,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
out_bits:
if (ret)
dump_extent_io_tree(tmp);
- clear_extent_bits(tmp, 0, total_dirty - 1, (unsigned)-1);
+ btrfs_clear_extent_bits(tmp, 0, total_dirty - 1, (unsigned)-1);
out:
if (locked_page)
put_page(locked_page);
@@ -565,10 +564,10 @@ static int test_find_first_clear_extent_bit(void)
test_msg("running find_first_clear_extent_bit test");
- extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST);
+ btrfs_extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST);
/* Test correct handling of empty tree */
- find_first_clear_extent_bit(&tree, 0, &start, &end, CHUNK_TRIMMED);
+ btrfs_find_first_clear_extent_bit(&tree, 0, &start, &end, CHUNK_TRIMMED);
if (start != 0 || end != -1) {
test_err(
"error getting a range from completely empty tree: start %llu end %llu",
@@ -579,11 +578,11 @@ static int test_find_first_clear_extent_bit(void)
* Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between
* 4M-32M
*/
- set_extent_bit(&tree, SZ_1M, SZ_4M - 1,
- CHUNK_TRIMMED | CHUNK_ALLOCATED, NULL);
+ btrfs_set_extent_bit(&tree, SZ_1M, SZ_4M - 1,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED, NULL);
- find_first_clear_extent_bit(&tree, SZ_512K, &start, &end,
- CHUNK_TRIMMED | CHUNK_ALLOCATED);
+ btrfs_find_first_clear_extent_bit(&tree, SZ_512K, &start, &end,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED);
if (start != 0 || end != SZ_1M - 1) {
test_err("error finding beginning range: start %llu end %llu",
@@ -592,14 +591,14 @@ static int test_find_first_clear_extent_bit(void)
}
/* Now add 32M-64M so that we have a hole between 4M-32M */
- set_extent_bit(&tree, SZ_32M, SZ_64M - 1,
- CHUNK_TRIMMED | CHUNK_ALLOCATED, NULL);
+ btrfs_set_extent_bit(&tree, SZ_32M, SZ_64M - 1,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED, NULL);
/*
* Request first hole starting at 12M, we should get 4M-32M
*/
- find_first_clear_extent_bit(&tree, 12 * SZ_1M, &start, &end,
- CHUNK_TRIMMED | CHUNK_ALLOCATED);
+ btrfs_find_first_clear_extent_bit(&tree, 12 * SZ_1M, &start, &end,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED);
if (start != SZ_4M || end != SZ_32M - 1) {
test_err("error finding trimmed range: start %llu end %llu",
@@ -611,8 +610,8 @@ static int test_find_first_clear_extent_bit(void)
* Search in the middle of allocated range, should get the next one
* available, which happens to be unallocated -> 4M-32M
*/
- find_first_clear_extent_bit(&tree, SZ_2M, &start, &end,
- CHUNK_TRIMMED | CHUNK_ALLOCATED);
+ btrfs_find_first_clear_extent_bit(&tree, SZ_2M, &start, &end,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED);
if (start != SZ_4M || end != SZ_32M - 1) {
test_err("error finding next unalloc range: start %llu end %llu",
@@ -624,9 +623,9 @@ static int test_find_first_clear_extent_bit(void)
* Set 64M-72M with CHUNK_ALLOC flag, then search for CHUNK_TRIMMED flag
* being unset in this range, we should get the entry in range 64M-72M
*/
- set_extent_bit(&tree, SZ_64M, SZ_64M + SZ_8M - 1, CHUNK_ALLOCATED, NULL);
- find_first_clear_extent_bit(&tree, SZ_64M + SZ_1M, &start, &end,
- CHUNK_TRIMMED);
+ btrfs_set_extent_bit(&tree, SZ_64M, SZ_64M + SZ_8M - 1, CHUNK_ALLOCATED, NULL);
+ btrfs_find_first_clear_extent_bit(&tree, SZ_64M + SZ_1M, &start, &end,
+ CHUNK_TRIMMED);
if (start != SZ_64M || end != SZ_64M + SZ_8M - 1) {
test_err("error finding exact range: start %llu end %llu",
@@ -634,8 +633,8 @@ static int test_find_first_clear_extent_bit(void)
goto out;
}
- find_first_clear_extent_bit(&tree, SZ_64M - SZ_8M, &start, &end,
- CHUNK_TRIMMED);
+ btrfs_find_first_clear_extent_bit(&tree, SZ_64M - SZ_8M, &start, &end,
+ CHUNK_TRIMMED);
/*
* Search in the middle of set range whose immediate neighbour doesn't
@@ -651,7 +650,7 @@ static int test_find_first_clear_extent_bit(void)
* Search beyond any known range, shall return after last known range
* and end should be -1
*/
- find_first_clear_extent_bit(&tree, -1, &start, &end, CHUNK_TRIMMED);
+ btrfs_find_first_clear_extent_bit(&tree, -1, &start, &end, CHUNK_TRIMMED);
if (start != SZ_64M + SZ_8M || end != -1) {
test_err(
"error handling beyond end of range search: start %llu end %llu",
@@ -663,7 +662,7 @@ static int test_find_first_clear_extent_bit(void)
out:
if (ret)
dump_extent_io_tree(&tree);
- clear_extent_bits(&tree, 0, (u64)-1, CHUNK_TRIMMED | CHUNK_ALLOCATED);
+ btrfs_clear_extent_bits(&tree, 0, (u64)-1, CHUNK_TRIMMED | CHUNK_ALLOCATED);
return ret;
}
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 609bb6c9c087..3a86534c116f 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -22,7 +22,7 @@ static int free_extent_map_tree(struct btrfs_inode *inode)
while (!RB_EMPTY_ROOT(&em_tree->root)) {
node = rb_first(&em_tree->root);
em = rb_entry(node, struct extent_map, rb_node);
- remove_extent_mapping(inode, em);
+ btrfs_remove_extent_mapping(inode, em);
#ifdef CONFIG_BTRFS_DEBUG
if (refcount_read(&em->refs) != 1) {
@@ -36,7 +36,7 @@ static int free_extent_map_tree(struct btrfs_inode *inode)
refcount_set(&em->refs, 1);
}
#endif
- free_extent_map(em);
+ btrfs_free_extent_map(em);
}
write_unlock(&em_tree->lock);
@@ -68,7 +68,7 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
int ret;
int ret2;
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
@@ -87,10 +87,10 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
test_err("cannot add extent range [0, 16K)");
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/* Add [16K, 20K) following [0, 16K) */
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
@@ -109,9 +109,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
test_err("cannot add extent range [16K, 20K)");
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
@@ -137,7 +137,7 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
ret = -ENOENT;
goto out;
}
- if (em->start != 0 || extent_map_end(em) != SZ_16K ||
+ if (em->start != 0 || btrfs_extent_map_end(em) != SZ_16K ||
em->disk_bytenr != 0 || em->disk_num_bytes != SZ_16K) {
test_err(
"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu disk_bytenr %llu disk_num_bytes %llu",
@@ -145,7 +145,7 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
em->disk_bytenr, em->disk_num_bytes);
ret = -EINVAL;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
out:
ret2 = free_extent_map_tree(inode);
if (ret == 0)
@@ -167,7 +167,7 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
int ret;
int ret2;
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
@@ -186,10 +186,10 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
test_err("cannot add extent range [0, 1K)");
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/* Add [4K, 8K) following [0, 1K) */
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
@@ -208,9 +208,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
test_err("cannot add extent range [4K, 8K)");
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
@@ -235,14 +235,14 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
ret = -ENOENT;
goto out;
}
- if (em->start != 0 || extent_map_end(em) != SZ_1K ||
+ if (em->start != 0 || btrfs_extent_map_end(em) != SZ_1K ||
em->disk_bytenr != EXTENT_MAP_INLINE) {
test_err(
"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu disk_bytenr %llu",
ret, em->start, em->len, em->disk_bytenr);
ret = -EINVAL;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
out:
ret2 = free_extent_map_tree(inode);
if (ret == 0)
@@ -260,7 +260,7 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
int ret;
int ret2;
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
@@ -279,9 +279,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
test_err("cannot add extent range [4K, 8K)");
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
@@ -312,15 +312,15 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
* Since bytes within em are contiguous, em->block_start is identical to
* em->start.
*/
- if (start < em->start || start + len > extent_map_end(em) ||
- em->start != extent_map_block_start(em)) {
+ if (start < em->start || start + len > btrfs_extent_map_end(em) ||
+ em->start != btrfs_extent_map_block_start(em)) {
test_err(
"case3 [%llu %llu): ret %d em (start %llu len %llu disk_bytenr %llu block_len %llu)",
start, start + len, ret, em->start, em->len,
em->disk_bytenr, em->disk_num_bytes);
ret = -EINVAL;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
out:
ret2 = free_extent_map_tree(inode);
if (ret == 0)
@@ -369,7 +369,7 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
int ret;
int ret2;
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
@@ -388,9 +388,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
test_err("cannot add extent range [0, 8K)");
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
@@ -410,9 +410,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
test_err("cannot add extent range [8K, 32K)");
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
@@ -438,14 +438,14 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
ret = -ENOENT;
goto out;
}
- if (start < em->start || start + len > extent_map_end(em)) {
+ if (start < em->start || start + len > btrfs_extent_map_end(em)) {
test_err(
"case4 [%llu %llu): ret %d, added wrong em (start %llu len %llu disk_bytenr %llu disk_num_bytes %llu)",
start, start + len, ret, em->start, em->len,
em->disk_bytenr, em->disk_num_bytes);
ret = -EINVAL;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
out:
ret2 = free_extent_map_tree(inode);
if (ret == 0)
@@ -498,7 +498,7 @@ static int add_compressed_extent(struct btrfs_inode *inode,
struct extent_map *em;
int ret;
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
@@ -513,7 +513,7 @@ static int add_compressed_extent(struct btrfs_inode *inode,
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
- free_extent_map(em);
+ btrfs_free_extent_map(em);
if (ret < 0) {
test_err("cannot add extent map [%llu, %llu)", start, start + len);
return ret;
@@ -719,7 +719,7 @@ static int test_case_6(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
if (ret)
goto out;
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
@@ -751,7 +751,7 @@ static int test_case_6(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
}
ret = 0;
out:
- free_extent_map(em);
+ btrfs_free_extent_map(em);
ret2 = free_extent_map_tree(inode);
if (ret == 0)
ret = ret2;
@@ -773,7 +773,7 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
test_msg("Running btrfs_drop_extent_cache with pinned");
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
@@ -793,9 +793,9 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
test_err("couldn't add extent map");
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
@@ -815,7 +815,7 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
test_err("couldn't add extent map");
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/*
* Drop [0, 36K) This should skip the [0, 4K) extent and then split the
@@ -826,7 +826,7 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
/* Make sure our extent maps look sane. */
ret = -EINVAL;
- em = lookup_extent_mapping(em_tree, 0, SZ_16K);
+ em = btrfs_lookup_extent_mapping(em_tree, 0, SZ_16K);
if (!em) {
test_err("didn't find an em at 0 as expected");
goto out;
@@ -842,10 +842,10 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, SZ_16K, SZ_16K);
+ em = btrfs_lookup_extent_mapping(em_tree, SZ_16K, SZ_16K);
read_unlock(&em_tree->lock);
if (em) {
test_err("found an em when we weren't expecting one");
@@ -853,7 +853,7 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
}
read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, SZ_32K, SZ_16K);
+ em = btrfs_lookup_extent_mapping(em_tree, SZ_32K, SZ_16K);
read_unlock(&em_tree->lock);
if (!em) {
test_err("didn't find an em at 32K as expected");
@@ -870,16 +870,16 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
goto out;
}
- if (extent_map_block_start(em) != SZ_32K + SZ_4K) {
+ if (btrfs_extent_map_block_start(em) != SZ_32K + SZ_4K) {
test_err("em->block_start is %llu, expected 36K",
- extent_map_block_start(em));
+ btrfs_extent_map_block_start(em));
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, 48 * SZ_1K, (u64)-1);
+ em = btrfs_lookup_extent_mapping(em_tree, 48 * SZ_1K, (u64)-1);
read_unlock(&em_tree->lock);
if (em) {
test_err("found an unexpected em above 48K");
@@ -888,9 +888,9 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
ret = 0;
out:
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/* Unpin our extent to prevent warning when removing it below. */
- ret2 = unpin_extent_cache(inode, 0, SZ_16K, 0);
+ ret2 = btrfs_unpin_extent_cache(inode, 0, SZ_16K, 0);
if (ret == 0)
ret = ret2;
ret2 = free_extent_map_tree(inode);
@@ -913,7 +913,7 @@ static int test_case_8(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
int ret;
int ret2;
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
@@ -928,13 +928,13 @@ static int test_case_8(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
- free_extent_map(em);
+ btrfs_free_extent_map(em);
if (ret < 0) {
test_err("couldn't add extent map for range [120K, 128K)");
goto out;
}
- em = alloc_extent_map();
+ em = btrfs_alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
@@ -967,7 +967,7 @@ static int test_case_8(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, SZ_1K * 140, SZ_4K);
write_unlock(&em_tree->lock);
- free_extent_map(em);
+ btrfs_free_extent_map(em);
if (ret < 0) {
test_err("couldn't add extent map for range [108K, 144K)");
goto out;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 3ea3bc2225fe..a29d2c02c2c8 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -268,7 +268,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("expected a hole, got %llu", em->disk_bytenr);
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (u64)-1, false);
/*
@@ -314,7 +314,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
* this?
*/
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
@@ -336,7 +336,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/* Regular extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
@@ -363,7 +363,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/* The next 3 are split extents */
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
@@ -389,10 +389,10 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("wrong offset, want 0, have %llu", em->offset);
goto out;
}
- disk_bytenr = extent_map_block_start(em);
+ disk_bytenr = btrfs_extent_map_block_start(em);
orig_start = em->start;
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
@@ -414,7 +414,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
@@ -441,13 +441,13 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
goto out;
}
disk_bytenr += (em->start - orig_start);
- if (extent_map_block_start(em) != disk_bytenr) {
+ if (btrfs_extent_map_block_start(em) != disk_bytenr) {
test_err("wrong block start, want %llu, have %llu",
- disk_bytenr, extent_map_block_start(em));
+ disk_bytenr, btrfs_extent_map_block_start(em));
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/* Prealloc extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
@@ -475,7 +475,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/* The next 3 are a half written prealloc extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
@@ -502,10 +502,10 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("wrong offset, want 0, have %llu", em->offset);
goto out;
}
- disk_bytenr = extent_map_block_start(em);
+ disk_bytenr = btrfs_extent_map_block_start(em);
orig_start = em->start;
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
@@ -531,13 +531,13 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em->start - orig_start, em->offset);
goto out;
}
- if (extent_map_block_start(em) != disk_bytenr + em->offset) {
+ if (btrfs_extent_map_block_start(em) != disk_bytenr + em->offset) {
test_err("unexpected block start, wanted %llu, have %llu",
- disk_bytenr + em->offset, extent_map_block_start(em));
+ disk_bytenr + em->offset, btrfs_extent_map_block_start(em));
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
@@ -564,13 +564,13 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em->start, em->offset, orig_start);
goto out;
}
- if (extent_map_block_start(em) != disk_bytenr + em->offset) {
+ if (btrfs_extent_map_block_start(em) != disk_bytenr + em->offset) {
test_err("unexpected block start, wanted %llu, have %llu",
- disk_bytenr + em->offset, extent_map_block_start(em));
+ disk_bytenr + em->offset, btrfs_extent_map_block_start(em));
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/* Now for the compressed extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
@@ -597,13 +597,13 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("wrong offset, want 0, have %llu", em->offset);
goto out;
}
- if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
+ if (btrfs_extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
test_err("unexpected compress type, wanted %d, got %d",
- BTRFS_COMPRESS_ZLIB, extent_map_compression(em));
+ BTRFS_COMPRESS_ZLIB, btrfs_extent_map_compression(em));
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/* Split compressed extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
@@ -630,15 +630,15 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("wrong offset, want 0, have %llu", em->offset);
goto out;
}
- if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
+ if (btrfs_extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
test_err("unexpected compress type, wanted %d, got %d",
- BTRFS_COMPRESS_ZLIB, extent_map_compression(em));
+ BTRFS_COMPRESS_ZLIB, btrfs_extent_map_compression(em));
goto out;
}
- disk_bytenr = extent_map_block_start(em);
+ disk_bytenr = btrfs_extent_map_block_start(em);
orig_start = em->start;
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
@@ -664,16 +664,16 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
}
- if (extent_map_block_start(em) != disk_bytenr) {
+ if (btrfs_extent_map_block_start(em) != disk_bytenr) {
test_err("block start does not match, want %llu got %llu",
- disk_bytenr, extent_map_block_start(em));
+ disk_bytenr, btrfs_extent_map_block_start(em));
goto out;
}
if (em->start != offset || em->len != 2 * sectorsize) {
@@ -692,13 +692,13 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em->start, em->offset, orig_start);
goto out;
}
- if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
+ if (btrfs_extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
test_err("unexpected compress type, wanted %d, got %d",
- BTRFS_COMPRESS_ZLIB, extent_map_compression(em));
+ BTRFS_COMPRESS_ZLIB, btrfs_extent_map_compression(em));
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
/* A hole between regular extents but no hole extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset + 6, sectorsize);
@@ -725,7 +725,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, SZ_4M);
if (IS_ERR(em)) {
@@ -757,7 +757,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
goto out;
}
offset = em->start + em->len;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
@@ -785,7 +785,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
ret = 0;
out:
if (!IS_ERR(em))
- free_extent_map(em);
+ btrfs_free_extent_map(em);
iput(inode);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);
@@ -858,15 +858,16 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
em->flags);
goto out;
}
- free_extent_map(em);
+ btrfs_free_extent_map(em);
em = btrfs_get_extent(BTRFS_I(inode), NULL, sectorsize, 2 * sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
}
- if (extent_map_block_start(em) != sectorsize) {
- test_err("expected a real extent, got %llu", extent_map_block_start(em));
+ if (btrfs_extent_map_block_start(em) != sectorsize) {
+ test_err("expected a real extent, got %llu",
+ btrfs_extent_map_block_start(em));
goto out;
}
if (em->start != sectorsize || em->len != sectorsize) {
@@ -883,7 +884,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
ret = 0;
out:
if (!IS_ERR(em))
- free_extent_map(em);
+ btrfs_free_extent_map(em);
iput(inode);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);
@@ -949,11 +950,10 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
}
/* [BTRFS_MAX_EXTENT_SIZE/2][sectorsize HOLE][the rest] */
- ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
- BTRFS_MAX_EXTENT_SIZE >> 1,
- (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
- EXTENT_UPTODATE, NULL);
+ ret = btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree,
+ BTRFS_MAX_EXTENT_SIZE >> 1,
+ (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@@ -1017,11 +1017,10 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
}
/* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
- ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
- BTRFS_MAX_EXTENT_SIZE + sectorsize,
- BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
- EXTENT_UPTODATE, NULL);
+ ret = btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree,
+ BTRFS_MAX_EXTENT_SIZE + sectorsize,
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@@ -1052,9 +1051,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
}
/* Empty */
- ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
- EXTENT_UPTODATE, NULL);
+ ret = btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@@ -1068,9 +1066,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
ret = 0;
out:
if (ret)
- clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
- EXTENT_UPTODATE, NULL);
+ btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW);
iput(inode);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f26a394a9ec5..b96195d6480f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -197,7 +197,7 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
list_del_init(&root->dirty_list);
free_extent_buffer(root->commit_root);
root->commit_root = btrfs_root_node(root);
- extent_io_tree_release(&root->dirty_log_pages);
+ btrfs_extent_io_tree_release(&root->dirty_log_pages);
btrfs_qgroup_clean_swapped_blocks(root);
}
@@ -383,10 +383,10 @@ loop:
INIT_LIST_HEAD(&cur_trans->deleted_bgs);
spin_lock_init(&cur_trans->dropped_roots_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
- extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
- IO_TREE_TRANS_DIRTY_PAGES);
- extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
- IO_TREE_FS_PINNED_EXTENTS);
+ btrfs_extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
+ IO_TREE_TRANS_DIRTY_PAGES);
+ btrfs_extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
+ IO_TREE_FS_PINNED_EXTENTS);
btrfs_set_fs_generation(fs_info, fs_info->generation + 1);
cur_trans->transid = fs_info->generation;
fs_info->running_transaction = cur_trans;
@@ -538,15 +538,15 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info)
}
}
-static int may_wait_transaction(struct btrfs_fs_info *fs_info, int type)
+static bool may_wait_transaction(struct btrfs_fs_info *fs_info, int type)
{
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
- return 0;
+ return false;
if (type == TRANS_START)
- return 1;
+ return true;
- return 0;
+ return false;
}
static inline bool need_reserve_reloc_root(struct btrfs_root *root)
@@ -761,9 +761,10 @@ got_it:
* value here.
*/
if (do_chunk_alloc && num_bytes) {
- u64 flags = h->block_rsv->space_info->flags;
+ struct btrfs_space_info *space_info = h->block_rsv->space_info;
+ u64 flags = space_info->flags;
- btrfs_chunk_alloc(h, btrfs_get_alloc_profile(fs_info, flags),
+ btrfs_chunk_alloc(h, space_info, btrfs_get_alloc_profile(fs_info, flags),
CHUNK_ALLOC_NO_FORCE);
}
@@ -1128,13 +1129,13 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
u64 start = 0;
u64 end;
- while (find_first_extent_bit(dirty_pages, start, &start, &end,
- mark, &cached_state)) {
+ while (btrfs_find_first_extent_bit(dirty_pages, start, &start, &end,
+ mark, &cached_state)) {
bool wait_writeback = false;
- ret = convert_extent_bit(dirty_pages, start, end,
- EXTENT_NEED_WAIT,
- mark, &cached_state);
+ ret = btrfs_convert_extent_bit(dirty_pages, start, end,
+ EXTENT_NEED_WAIT,
+ mark, &cached_state);
/*
* convert_extent_bit can return -ENOMEM, which is most of the
* time a temporary error. So when it happens, ignore the error
@@ -1155,8 +1156,8 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
if (!ret)
ret = filemap_fdatawrite_range(mapping, start, end);
if (!ret && wait_writeback)
- ret = filemap_fdatawait_range(mapping, start, end);
- free_extent_state(cached_state);
+ btrfs_btree_wait_writeback_range(fs_info, start, end);
+ btrfs_free_extent_state(cached_state);
if (ret)
break;
cached_state = NULL;
@@ -1175,14 +1176,13 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
struct extent_io_tree *dirty_pages)
{
- struct address_space *mapping = fs_info->btree_inode->i_mapping;
struct extent_state *cached_state = NULL;
u64 start = 0;
u64 end;
int ret = 0;
- while (find_first_extent_bit(dirty_pages, start, &start, &end,
- EXTENT_NEED_WAIT, &cached_state)) {
+ while (btrfs_find_first_extent_bit(dirty_pages, start, &start, &end,
+ EXTENT_NEED_WAIT, &cached_state)) {
/*
* Ignore -ENOMEM errors returned by clear_extent_bit().
* When committing the transaction, we'll remove any entries
@@ -1191,13 +1191,13 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
* concurrently - we do it only at transaction commit time when
* it's safe to do it (through extent_io_tree_release()).
*/
- ret = clear_extent_bit(dirty_pages, start, end,
- EXTENT_NEED_WAIT, &cached_state);
+ ret = btrfs_clear_extent_bit(dirty_pages, start, end,
+ EXTENT_NEED_WAIT, &cached_state);
if (ret == -ENOMEM)
ret = 0;
if (!ret)
- ret = filemap_fdatawait_range(mapping, start, end);
- free_extent_state(cached_state);
+ btrfs_btree_wait_writeback_range(fs_info, start, end);
+ btrfs_free_extent_state(cached_state);
if (ret)
break;
cached_state = NULL;
@@ -1265,7 +1265,7 @@ static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans)
blk_finish_plug(&plug);
ret2 = btrfs_wait_extents(fs_info, dirty_pages);
- extent_io_tree_release(&trans->transaction->dirty_pages);
+ btrfs_extent_io_tree_release(&trans->transaction->dirty_pages);
if (ret)
return ret;
@@ -1327,7 +1327,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
struct btrfs_fs_info *fs_info = trans->fs_info;
struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
struct list_head *io_bgs = &trans->transaction->io_bgs;
- struct list_head *next;
struct extent_buffer *eb;
int ret;
@@ -1363,13 +1362,13 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
again:
while (!list_empty(&fs_info->dirty_cowonly_roots)) {
struct btrfs_root *root;
- next = fs_info->dirty_cowonly_roots.next;
- list_del_init(next);
- root = list_entry(next, struct btrfs_root, dirty_list);
+
+ root = list_first_entry(&fs_info->dirty_cowonly_roots,
+ struct btrfs_root, dirty_list);
clear_bit(BTRFS_ROOT_DIRTY, &root->state);
+ list_move_tail(&root->dirty_list,
+ &trans->transaction->switch_commits);
- list_add_tail(&root->dirty_list,
- &trans->transaction->switch_commits);
ret = update_cowonly_root(trans, root);
if (ret)
return ret;
@@ -2271,14 +2270,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
wake_up(&fs_info->transaction_blocked_wait);
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
- if (cur_trans->list.prev != &fs_info->trans_list) {
+ if (!list_is_first(&cur_trans->list, &fs_info->trans_list)) {
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
if (trans->in_fsync)
want_state = TRANS_STATE_SUPER_COMMITTED;
- prev_trans = list_entry(cur_trans->list.prev,
- struct btrfs_transaction, list);
+ prev_trans = list_prev_entry(cur_trans, list);
if (prev_trans->state < want_state) {
refcount_inc(&prev_trans->use_count);
spin_unlock(&fs_info->trans_lock);
@@ -2555,7 +2553,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
wake_up(&cur_trans->commit_wait);
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
- btrfs_finish_extent_commit(trans);
+ ret = btrfs_finish_extent_commit(trans);
+ if (ret)
+ goto scrub_continue;
if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags))
btrfs_clear_space_info_full(fs_info);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 2b66a6130269..8f4703b488b7 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1571,7 +1571,7 @@ static int check_extent_item(struct extent_buffer *leaf,
inline_type);
return -EUCLEAN;
}
- if (inline_type < last_type) {
+ if (unlikely(inline_type < last_type)) {
extent_err(leaf, slot,
"inline ref out-of-order: has type %u, prev type %u",
inline_type, last_type);
@@ -1580,7 +1580,7 @@ static int check_extent_item(struct extent_buffer *leaf,
/* Type changed, allow the sequence starts from U64_MAX again. */
if (inline_type > last_type)
last_seq = U64_MAX;
- if (seq > last_seq) {
+ if (unlikely(seq > last_seq)) {
extent_err(leaf, slot,
"inline ref out-of-order: has type %u offset %llu seq 0x%llx, prev type %u seq 0x%llx",
inline_type, inline_offset, seq,
@@ -1929,7 +1929,7 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf,
break;
}
- if (ret)
+ if (unlikely(ret))
return BTRFS_TREE_BLOCK_INVALID_ITEM;
return BTRFS_TREE_BLOCK_CLEAN;
}
@@ -2229,9 +2229,8 @@ int btrfs_verify_level_key(struct extent_buffer *eb,
int ret;
found_level = btrfs_header_level(eb);
- if (found_level != check->level) {
- WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
- KERN_ERR "BTRFS: tree level check failed\n");
+ if (unlikely(found_level != check->level)) {
+ DEBUG_WARN();
btrfs_err(fs_info,
"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
eb->start, check->level, found_level);
@@ -2251,11 +2250,11 @@ int btrfs_verify_level_key(struct extent_buffer *eb,
return 0;
/* We have @first_key, so this @eb must have at least one item */
- if (btrfs_header_nritems(eb) == 0) {
+ if (unlikely(btrfs_header_nritems(eb) == 0)) {
btrfs_err(fs_info,
"invalid tree nritems, bytenr=%llu nritems=0 expect >0",
eb->start);
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ DEBUG_WARN();
return -EUCLEAN;
}
@@ -2263,11 +2262,10 @@ int btrfs_verify_level_key(struct extent_buffer *eb,
btrfs_node_key_to_cpu(eb, &found_key, 0);
else
btrfs_item_key_to_cpu(eb, &found_key, 0);
- ret = btrfs_comp_cpu_keys(&check->first_key, &found_key);
- if (ret) {
- WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
- KERN_ERR "BTRFS: tree first key check failed\n");
+ ret = btrfs_comp_cpu_keys(&check->first_key, &found_key);
+ if (unlikely(ret)) {
+ DEBUG_WARN();
btrfs_err(fs_info,
"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
eb->start, check->transid, check->first_key.objectid,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 90dc094cfa5e..97e933113b82 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -860,9 +860,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum *sums;
struct btrfs_root *csum_root;
- sums = list_entry(ordered_sums.next,
- struct btrfs_ordered_sum,
- list);
+ sums = list_first_entry(&ordered_sums,
+ struct btrfs_ordered_sum,
+ list);
csum_root = btrfs_csum_root(fs_info,
sums->logical);
if (!ret)
@@ -3251,8 +3251,8 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
}
}
- extent_io_tree_release(&log->dirty_log_pages);
- extent_io_tree_release(&log->log_csum_range);
+ btrfs_extent_io_tree_release(&log->dirty_log_pages);
+ btrfs_extent_io_tree_release(&log->log_csum_range);
btrfs_put_root(log);
}
@@ -4300,8 +4300,8 @@ static int log_csums(struct btrfs_trans_handle *trans,
* file which happens to refer to the same extent as well. Such races
* can leave checksum items in the log with overlapping ranges.
*/
- ret = lock_extent(&log_root->log_csum_range, sums->logical, lock_end,
- &cached_state);
+ ret = btrfs_lock_extent(&log_root->log_csum_range, sums->logical, lock_end,
+ &cached_state);
if (ret)
return ret;
/*
@@ -4317,8 +4317,8 @@ static int log_csums(struct btrfs_trans_handle *trans,
if (!ret)
ret = btrfs_csum_file_blocks(trans, log_root, sums);
- unlock_extent(&log_root->log_csum_range, sums->logical, lock_end,
- &cached_state);
+ btrfs_unlock_extent(&log_root->log_csum_range, sums->logical, lock_end,
+ &cached_state);
return ret;
}
@@ -4648,7 +4648,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
return 0;
/* If we're compressed we have to save the entire range of csums. */
- if (extent_map_is_compressed(em)) {
+ if (btrfs_extent_map_is_compressed(em)) {
csum_offset = 0;
csum_len = em->disk_num_bytes;
} else {
@@ -4657,7 +4657,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
}
/* block start is already adjusted for the file extent offset. */
- block_start = extent_map_block_start(em);
+ block_start = btrfs_extent_map_block_start(em);
csum_root = btrfs_csum_root(trans->fs_info, block_start);
ret = btrfs_lookup_csums_list(csum_root, block_start + csum_offset,
block_start + csum_offset + csum_len - 1,
@@ -4667,9 +4667,9 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
ret = 0;
while (!list_empty(&ordered_sums)) {
- struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
- struct btrfs_ordered_sum,
- list);
+ struct btrfs_ordered_sum *sums = list_first_entry(&ordered_sums,
+ struct btrfs_ordered_sum,
+ list);
if (!ret)
ret = log_csums(trans, inode, log_root, sums);
list_del(&sums->list);
@@ -4692,7 +4692,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_key key;
enum btrfs_compression_type compress_type;
u64 extent_offset = em->offset;
- u64 block_start = extent_map_block_start(em);
+ u64 block_start = btrfs_extent_map_block_start(em);
u64 block_len;
int ret;
@@ -4703,7 +4703,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_REG);
block_len = em->disk_num_bytes;
- compress_type = extent_map_compression(em);
+ compress_type = btrfs_extent_map_compression(em);
if (compress_type != BTRFS_COMPRESS_NONE) {
btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start);
btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
@@ -4947,7 +4947,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
list_sort(NULL, &extents, extent_cmp);
process:
while (!list_empty(&extents)) {
- em = list_entry(extents.next, struct extent_map, list);
+ em = list_first_entry(&extents, struct extent_map, list);
list_del_init(&em->list);
@@ -4956,8 +4956,8 @@ process:
* private list.
*/
if (ret) {
- clear_em_logging(inode, em);
- free_extent_map(em);
+ btrfs_clear_em_logging(inode, em);
+ btrfs_free_extent_map(em);
continue;
}
@@ -4965,8 +4965,8 @@ process:
ret = log_one_extent(trans, inode, em, path, ctx);
write_lock(&tree->lock);
- clear_em_logging(inode, em);
- free_extent_map(em);
+ btrfs_clear_em_logging(inode, em);
+ btrfs_free_extent_map(em);
}
WARN_ON(!list_empty(&extents));
write_unlock(&tree->lock);
@@ -6583,6 +6583,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
btrfs_log_get_delayed_items(inode, &delayed_ins_list,
&delayed_del_list);
+ /*
+ * If we are fsyncing a file with 0 hard links, then commit the delayed
+ * inode because the last inode ref (or extref) item may still be in the
+ * subvolume tree and if we log it the file will still exist after a log
+ * replay. So commit the delayed inode to delete that last ref and we
+ * skip logging it.
+ */
+ if (inode->vfs_inode.i_nlink == 0) {
+ ret = btrfs_commit_inode_delayed_inode(inode);
+ if (ret)
+ goto out_unlock;
+ }
+
ret = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
path, dst_path, logged_isize,
inode_only, ctx,
@@ -7051,14 +7064,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (btrfs_root_generation(&root->root_item) == trans->transid)
return BTRFS_LOG_FORCE_COMMIT;
- /*
- * Skip already logged inodes or inodes corresponding to tmpfiles
- * (since logging them is pointless, a link count of 0 means they
- * will never be accessible).
- */
- if ((btrfs_inode_in_log(inode, trans->transid) &&
- list_empty(&ctx->ordered_extents)) ||
- inode->vfs_inode.i_nlink == 0)
+ /* Skip already logged inodes and without new extents. */
+ if (btrfs_inode_in_log(inode, trans->transid) &&
+ list_empty(&ctx->ordered_extents))
return BTRFS_NO_LOG_SYNC;
ret = start_log_trans(trans, root, ctx);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8e6b6fed7429..89835071cfea 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -404,7 +404,7 @@ static void btrfs_free_device(struct btrfs_device *device)
{
WARN_ON(!list_empty(&device->post_commit_list));
rcu_string_free(device->name);
- extent_io_tree_release(&device->alloc_state);
+ btrfs_extent_io_tree_release(&device->alloc_state);
btrfs_destroy_dev_zone_info(device);
kfree(device);
}
@@ -415,8 +415,8 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
WARN_ON(fs_devices->opened);
while (!list_empty(&fs_devices->devices)) {
- device = list_entry(fs_devices->devices.next,
- struct btrfs_device, dev_list);
+ device = list_first_entry(&fs_devices->devices,
+ struct btrfs_device, dev_list);
list_del(&device->dev_list);
btrfs_free_device(device);
}
@@ -428,8 +428,8 @@ void __exit btrfs_cleanup_fs_uuids(void)
struct btrfs_fs_devices *fs_devices;
while (!list_empty(&fs_uuids)) {
- fs_devices = list_entry(fs_uuids.next,
- struct btrfs_fs_devices, fs_list);
+ fs_devices = list_first_entry(&fs_uuids, struct btrfs_fs_devices,
+ fs_list);
list_del(&fs_devices->fs_list);
free_fs_devices(fs_devices);
}
@@ -493,7 +493,7 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
}
}
invalidate_bdev(bdev);
- *disk_super = btrfs_read_dev_super(bdev);
+ *disk_super = btrfs_read_disk_super(bdev, 0, false);
if (IS_ERR(*disk_super)) {
ret = PTR_ERR(*disk_super);
fput(*bdev_file);
@@ -1149,7 +1149,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
device->fs_info = NULL;
atomic_set(&device->dev_stats_ccnt, 0);
- extent_io_tree_release(&device->alloc_state);
+ btrfs_extent_io_tree_release(&device->alloc_state);
/*
* Reset the flush error record. We might have a transient flush error
@@ -1325,48 +1325,58 @@ void btrfs_release_disk_super(struct btrfs_super_block *super)
put_page(page);
}
-static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
- u64 bytenr, u64 bytenr_orig)
+struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
+ int copy_num, bool drop_cache)
{
- struct btrfs_super_block *disk_super;
+ struct btrfs_super_block *super;
struct page *page;
- void *p;
- pgoff_t index;
+ u64 bytenr, bytenr_orig;
+ struct address_space *mapping = bdev->bd_mapping;
+ int ret;
- /* make sure our super fits in the device */
- if (bytenr + PAGE_SIZE >= bdev_nr_bytes(bdev))
- return ERR_PTR(-EINVAL);
+ bytenr_orig = btrfs_sb_offset(copy_num);
+ ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ ret = -EINVAL;
+ return ERR_PTR(ret);
+ }
- /* make sure our super fits in the page */
- if (sizeof(*disk_super) > PAGE_SIZE)
+ if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
return ERR_PTR(-EINVAL);
- /* make sure our super doesn't straddle pages on disk */
- index = bytenr >> PAGE_SHIFT;
- if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
- return ERR_PTR(-EINVAL);
+ if (drop_cache) {
+ /* This should only be called with the primary sb. */
+ ASSERT(copy_num == 0);
- /* pull in the page with our super */
- page = read_cache_page_gfp(bdev->bd_mapping, index, GFP_KERNEL);
+ /*
+ * Drop the page of the primary superblock, so later read will
+ * always read from the device.
+ */
+ invalidate_inode_pages2_range(mapping, bytenr >> PAGE_SHIFT,
+ (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
+ }
+ page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
if (IS_ERR(page))
return ERR_CAST(page);
- p = page_address(page);
-
- /* align our pointer to the offset of the super block */
- disk_super = p + offset_in_page(bytenr);
-
- if (btrfs_super_bytenr(disk_super) != bytenr_orig ||
- btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
- btrfs_release_disk_super(p);
+ super = page_address(page);
+ if (btrfs_super_magic(super) != BTRFS_MAGIC ||
+ btrfs_super_bytenr(super) != bytenr_orig) {
+ btrfs_release_disk_super(super);
return ERR_PTR(-EINVAL);
}
- if (disk_super->label[0] && disk_super->label[BTRFS_LABEL_SIZE - 1])
- disk_super->label[BTRFS_LABEL_SIZE - 1] = 0;
+ /*
+ * Make sure the last byte of label is properly NUL termiated. We use
+ * '%s' to print the label, if not properly NUL termiated we can access
+ * beyond the label.
+ */
+ if (super->label[0] && super->label[BTRFS_LABEL_SIZE - 1])
+ super->label[BTRFS_LABEL_SIZE - 1] = 0;
- return disk_super;
+ return super;
}
int btrfs_forget_devices(dev_t devt)
@@ -1437,9 +1447,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
bool new_device_added = false;
struct btrfs_device *device = NULL;
struct file *bdev_file;
- u64 bytenr;
dev_t devt;
- int ret;
lockdep_assert_held(&uuid_mutex);
@@ -1457,20 +1465,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
if (IS_ERR(bdev_file))
return ERR_CAST(bdev_file);
- /*
- * We would like to check all the super blocks, but doing so would
- * allow a mount to succeed after a mkfs from a different filesystem.
- * Currently, recovery from a bad primary btrfs superblock is done
- * using the userspace command 'btrfs check --super'.
- */
- ret = btrfs_sb_log_location_bdev(file_bdev(bdev_file), 0, READ, &bytenr);
- if (ret) {
- device = ERR_PTR(ret);
- goto error_bdev_put;
- }
-
- disk_super = btrfs_read_disk_super(file_bdev(bdev_file), bytenr,
- btrfs_sb_offset(0));
+ disk_super = btrfs_read_disk_super(file_bdev(bdev_file), 0, false);
if (IS_ERR(disk_super)) {
device = ERR_CAST(disk_super);
goto error_bdev_put;
@@ -1511,9 +1506,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
lockdep_assert_held(&device->fs_info->chunk_mutex);
- if (find_first_extent_bit(&device->alloc_state, *start,
- &physical_start, &physical_end,
- CHUNK_ALLOCATED, NULL)) {
+ if (btrfs_find_first_extent_bit(&device->alloc_state, *start,
+ &physical_start, &physical_end,
+ CHUNK_ALLOCATED, NULL)) {
if (in_range(physical_start, *start, len) ||
in_range(*start, physical_start,
@@ -1528,6 +1523,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
static u64 dev_extent_search_start(struct btrfs_device *device)
{
switch (device->fs_devices->chunk_alloc_policy) {
+ default:
+ btrfs_warn_unknown_chunk_allocation(device->fs_devices->chunk_alloc_policy);
+ fallthrough;
case BTRFS_CHUNK_ALLOC_REGULAR:
return BTRFS_DEVICE_RANGE_RESERVED;
case BTRFS_CHUNK_ALLOC_ZONED:
@@ -1537,8 +1535,6 @@ static u64 dev_extent_search_start(struct btrfs_device *device)
* for superblock logging.
*/
return 0;
- default:
- BUG();
}
}
@@ -1551,7 +1547,8 @@ static bool dev_extent_hole_check_zoned(struct btrfs_device *device,
int ret;
bool changed = false;
- ASSERT(IS_ALIGNED(*hole_start, zone_size));
+ ASSERT(IS_ALIGNED(*hole_start, zone_size),
+ "hole_start=%llu zone_size=%llu", *hole_start, zone_size);
while (*hole_size > 0) {
pos = btrfs_find_allocatable_zones(device, *hole_start,
@@ -1617,6 +1614,9 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
}
switch (device->fs_devices->chunk_alloc_policy) {
+ default:
+ btrfs_warn_unknown_chunk_allocation(device->fs_devices->chunk_alloc_policy);
+ fallthrough;
case BTRFS_CHUNK_ALLOC_REGULAR:
/* No extra check */
break;
@@ -1631,8 +1631,6 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
continue;
}
break;
- default:
- BUG();
}
break;
@@ -1802,7 +1800,9 @@ next:
else
ret = 0;
- ASSERT(max_hole_start + max_hole_size <= search_end);
+ ASSERT(max_hole_start + max_hole_size <= search_end,
+ "max_hole_start=%llu max_hole_size=%llu search_end=%llu",
+ max_hole_start, max_hole_size, search_end);
out:
btrfs_free_path(path);
*start = max_hole_start;
@@ -2115,7 +2115,7 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
down_read(&fs_info->dev_replace.rwsem);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
- ASSERT(num_devices > 1);
+ ASSERT(num_devices > 1, "num_devices=%llu", num_devices);
num_devices--;
}
up_read(&fs_info->dev_replace.rwsem);
@@ -2131,7 +2131,7 @@ static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info,
const u64 bytenr = btrfs_sb_offset(copy_num);
int ret;
- disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr);
+ disk_super = btrfs_read_disk_super(bdev, copy_num, false);
if (IS_ERR(disk_super))
return;
@@ -2319,7 +2319,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
*/
if (cur_devices->num_devices == 0) {
list_del_init(&cur_devices->seed_list);
- ASSERT(cur_devices->opened == 1);
+ ASSERT(cur_devices->opened == 1, "opened=%d", cur_devices->opened);
cur_devices->opened--;
free_fs_devices(cur_devices);
}
@@ -3249,7 +3249,8 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
* user having built with ASSERT enabled, so if ASSERT doesn't
* do anything we still error out.
*/
- ASSERT(0);
+ DEBUG_WARN("errr %ld reading chunk map at offset %llu",
+ PTR_ERR(map), chunk_offset);
return PTR_ERR(map);
}
@@ -3330,8 +3331,16 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
if (ret == -ENOSPC) {
const u64 sys_flags = btrfs_system_alloc_profile(fs_info);
struct btrfs_block_group *sys_bg;
+ struct btrfs_space_info *space_info;
+
+ space_info = btrfs_find_space_info(fs_info, sys_flags);
+ if (!space_info) {
+ ret = -EINVAL;
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
- sys_bg = btrfs_create_chunk(trans, sys_flags);
+ sys_bg = btrfs_create_chunk(trans, space_info, sys_flags);
if (IS_ERR(sys_bg)) {
ret = PTR_ERR(sys_bg);
btrfs_abort_transaction(trans, ret);
@@ -3791,26 +3800,25 @@ static void reset_balance_state(struct btrfs_fs_info *fs_info)
* Balance filters. Return 1 if chunk should be filtered out
* (should not be balanced).
*/
-static int chunk_profiles_filter(u64 chunk_type,
- struct btrfs_balance_args *bargs)
+static bool chunk_profiles_filter(u64 chunk_type, struct btrfs_balance_args *bargs)
{
chunk_type = chunk_to_extended(chunk_type) &
BTRFS_EXTENDED_PROFILE_MASK;
if (bargs->profiles & chunk_type)
- return 0;
+ return false;
- return 1;
+ return true;
}
-static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
- struct btrfs_balance_args *bargs)
+static bool chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+ struct btrfs_balance_args *bargs)
{
struct btrfs_block_group *cache;
u64 chunk_used;
u64 user_thresh_min;
u64 user_thresh_max;
- int ret = 1;
+ bool ret = true;
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
chunk_used = cache->used;
@@ -3828,18 +3836,18 @@ static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_off
user_thresh_max = mult_perc(cache->length, bargs->usage_max);
if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
- ret = 0;
+ ret = false;
btrfs_put_block_group(cache);
return ret;
}
-static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
- u64 chunk_offset, struct btrfs_balance_args *bargs)
+static bool chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+ struct btrfs_balance_args *bargs)
{
struct btrfs_block_group *cache;
u64 chunk_used, user_thresh;
- int ret = 1;
+ bool ret = true;
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
chunk_used = cache->used;
@@ -3852,15 +3860,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
user_thresh = mult_perc(cache->length, bargs->usage);
if (chunk_used < user_thresh)
- ret = 0;
+ ret = false;
btrfs_put_block_group(cache);
return ret;
}
-static int chunk_devid_filter(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk,
- struct btrfs_balance_args *bargs)
+static bool chunk_devid_filter(struct extent_buffer *leaf, struct btrfs_chunk *chunk,
+ struct btrfs_balance_args *bargs)
{
struct btrfs_stripe *stripe;
int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
@@ -3869,10 +3876,10 @@ static int chunk_devid_filter(struct extent_buffer *leaf,
for (i = 0; i < num_stripes; i++) {
stripe = btrfs_stripe_nr(chunk, i);
if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
- return 0;
+ return false;
}
- return 1;
+ return true;
}
static u64 calc_data_stripes(u64 type, int num_stripes)
@@ -3885,9 +3892,8 @@ static u64 calc_data_stripes(u64 type, int num_stripes)
}
/* [pstart, pend) */
-static int chunk_drange_filter(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk,
- struct btrfs_balance_args *bargs)
+static bool chunk_drange_filter(struct extent_buffer *leaf, struct btrfs_chunk *chunk,
+ struct btrfs_balance_args *bargs)
{
struct btrfs_stripe *stripe;
int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
@@ -3898,7 +3904,7 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
int i;
if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
- return 0;
+ return false;
type = btrfs_chunk_type(leaf, chunk);
factor = calc_data_stripes(type, num_stripes);
@@ -3914,56 +3920,53 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
if (stripe_offset < bargs->pend &&
stripe_offset + stripe_length > bargs->pstart)
- return 0;
+ return false;
}
- return 1;
+ return true;
}
/* [vstart, vend) */
-static int chunk_vrange_filter(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk,
- u64 chunk_offset,
- struct btrfs_balance_args *bargs)
+static bool chunk_vrange_filter(struct extent_buffer *leaf, struct btrfs_chunk *chunk,
+ u64 chunk_offset, struct btrfs_balance_args *bargs)
{
if (chunk_offset < bargs->vend &&
chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
/* at least part of the chunk is inside this vrange */
- return 0;
+ return false;
- return 1;
+ return true;
}
-static int chunk_stripes_range_filter(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk,
- struct btrfs_balance_args *bargs)
+static bool chunk_stripes_range_filter(struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk,
+ struct btrfs_balance_args *bargs)
{
int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
if (bargs->stripes_min <= num_stripes
&& num_stripes <= bargs->stripes_max)
- return 0;
+ return false;
- return 1;
+ return true;
}
-static int chunk_soft_convert_filter(u64 chunk_type,
- struct btrfs_balance_args *bargs)
+static bool chunk_soft_convert_filter(u64 chunk_type, struct btrfs_balance_args *bargs)
{
if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
- return 0;
+ return false;
chunk_type = chunk_to_extended(chunk_type) &
BTRFS_EXTENDED_PROFILE_MASK;
if (bargs->target == chunk_type)
- return 1;
+ return true;
- return 0;
+ return false;
}
-static int should_balance_chunk(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk, u64 chunk_offset)
+static bool should_balance_chunk(struct extent_buffer *leaf, struct btrfs_chunk *chunk,
+ u64 chunk_offset)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
@@ -3973,7 +3976,7 @@ static int should_balance_chunk(struct extent_buffer *leaf,
/* type filter */
if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
(bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
- return 0;
+ return false;
}
if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
@@ -3986,46 +3989,46 @@ static int should_balance_chunk(struct extent_buffer *leaf,
/* profiles filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
chunk_profiles_filter(chunk_type, bargs)) {
- return 0;
+ return false;
}
/* usage filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
chunk_usage_filter(fs_info, chunk_offset, bargs)) {
- return 0;
+ return false;
} else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
- return 0;
+ return false;
}
/* devid filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
chunk_devid_filter(leaf, chunk, bargs)) {
- return 0;
+ return false;
}
/* drange filter, makes sense only with devid filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
chunk_drange_filter(leaf, chunk, bargs)) {
- return 0;
+ return false;
}
/* vrange filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
- return 0;
+ return false;
}
/* stripes filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
chunk_stripes_range_filter(leaf, chunk, bargs)) {
- return 0;
+ return false;
}
/* soft profile changing mode */
if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
chunk_soft_convert_filter(chunk_type, bargs)) {
- return 0;
+ return false;
}
/*
@@ -4033,7 +4036,7 @@ static int should_balance_chunk(struct extent_buffer *leaf,
*/
if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
if (bargs->limit == 0)
- return 0;
+ return false;
else
bargs->limit--;
} else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
@@ -4043,12 +4046,12 @@ static int should_balance_chunk(struct extent_buffer *leaf,
* about the count of all chunks that satisfy the filters.
*/
if (bargs->limit_max == 0)
- return 0;
+ return false;
else
bargs->limit_max--;
}
- return 1;
+ return true;
}
static int __btrfs_balance(struct btrfs_fs_info *fs_info)
@@ -4663,7 +4666,8 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
}
spin_lock(&fs_info->super_lock);
- ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
+ ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED,
+ "exclusive_operation=%d", fs_info->exclusive_operation);
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
spin_unlock(&fs_info->super_lock);
/*
@@ -4999,8 +5003,8 @@ again:
mutex_lock(&fs_info->chunk_mutex);
/* Clear all state bits beyond the shrunk device size */
- clear_extent_bits(&device->alloc_state, new_size, (u64)-1,
- CHUNK_STATE_MASK);
+ btrfs_clear_extent_bits(&device->alloc_state, new_size, (u64)-1,
+ CHUNK_STATE_MASK);
btrfs_device_set_disk_total_bytes(device, new_size);
if (list_empty(&device->post_commit_list))
@@ -5127,6 +5131,8 @@ struct alloc_chunk_ctl {
u64 stripe_size;
u64 chunk_size;
int ndevs;
+ /* Space_info the block group is going to belong. */
+ struct btrfs_space_info *space_info;
};
static void init_alloc_chunk_ctl_policy_regular(
@@ -5200,14 +5206,15 @@ static void init_alloc_chunk_ctl(struct btrfs_fs_devices *fs_devices,
ctl->ndevs = 0;
switch (fs_devices->chunk_alloc_policy) {
+ default:
+ btrfs_warn_unknown_chunk_allocation(fs_devices->chunk_alloc_policy);
+ fallthrough;
case BTRFS_CHUNK_ALLOC_REGULAR:
init_alloc_chunk_ctl_policy_regular(fs_devices, ctl);
break;
case BTRFS_CHUNK_ALLOC_ZONED:
init_alloc_chunk_ctl_policy_zoned(fs_devices, ctl);
break;
- default:
- BUG();
}
}
@@ -5346,7 +5353,9 @@ static int decide_stripe_size_zoned(struct alloc_chunk_ctl *ctl,
* It should hold because:
* dev_extent_min == dev_extent_want == zone_size * dev_stripes
*/
- ASSERT(devices_info[ctl->ndevs - 1].max_avail == ctl->dev_extent_min);
+ ASSERT(devices_info[ctl->ndevs - 1].max_avail == ctl->dev_extent_min,
+ "ndevs=%d max_avail=%llu dev_extent_min=%llu", ctl->ndevs,
+ devices_info[ctl->ndevs - 1].max_avail, ctl->dev_extent_min);
ctl->stripe_size = zone_size;
ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
@@ -5359,7 +5368,9 @@ static int decide_stripe_size_zoned(struct alloc_chunk_ctl *ctl,
ctl->dev_stripes);
ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
- ASSERT(ctl->stripe_size * data_stripes <= ctl->max_chunk_size);
+ ASSERT(ctl->stripe_size * data_stripes <= ctl->max_chunk_size,
+ "stripe_size=%llu data_stripes=%d max_chunk_size=%llu",
+ ctl->stripe_size, data_stripes, ctl->max_chunk_size);
}
ctl->chunk_size = ctl->stripe_size * data_stripes;
@@ -5392,12 +5403,13 @@ static int decide_stripe_size(struct btrfs_fs_devices *fs_devices,
ctl->ndevs = min(ctl->ndevs, ctl->devs_max);
switch (fs_devices->chunk_alloc_policy) {
+ default:
+ btrfs_warn_unknown_chunk_allocation(fs_devices->chunk_alloc_policy);
+ fallthrough;
case BTRFS_CHUNK_ALLOC_REGULAR:
return decide_stripe_size_regular(ctl, devices_info);
case BTRFS_CHUNK_ALLOC_ZONED:
return decide_stripe_size_zoned(ctl, devices_info);
- default:
- BUG();
}
}
@@ -5407,9 +5419,9 @@ static void chunk_map_device_set_bits(struct btrfs_chunk_map *map, unsigned int
struct btrfs_io_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
- set_extent_bit(&device->alloc_state, stripe->physical,
- stripe->physical + map->stripe_size - 1,
- bits | EXTENT_NOWAIT, NULL);
+ btrfs_set_extent_bit(&device->alloc_state, stripe->physical,
+ stripe->physical + map->stripe_size - 1,
+ bits | EXTENT_NOWAIT, NULL);
}
}
@@ -5419,10 +5431,9 @@ static void chunk_map_device_clear_bits(struct btrfs_chunk_map *map, unsigned in
struct btrfs_io_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
- __clear_extent_bit(&device->alloc_state, stripe->physical,
- stripe->physical + map->stripe_size - 1,
- bits | EXTENT_NOWAIT,
- NULL, NULL);
+ btrfs_clear_extent_bits(&device->alloc_state, stripe->physical,
+ stripe->physical + map->stripe_size - 1,
+ bits | EXTENT_NOWAIT);
}
}
@@ -5529,7 +5540,8 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
return ERR_PTR(ret);
}
- block_group = btrfs_make_block_group(trans, type, start, ctl->chunk_size);
+ block_group = btrfs_make_block_group(trans, ctl->space_info, type, start,
+ ctl->chunk_size);
if (IS_ERR(block_group)) {
btrfs_remove_chunk_map(info, map);
return block_group;
@@ -5555,7 +5567,8 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
}
struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
- u64 type)
+ struct btrfs_space_info *space_info,
+ u64 type)
{
struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_fs_devices *fs_devices = info->fs_devices;
@@ -5567,7 +5580,7 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
lockdep_assert_held(&info->chunk_mutex);
if (!alloc_profile_is_valid(type, 0)) {
- ASSERT(0);
+ DEBUG_WARN("invalid alloc profile for type %llu", type);
return ERR_PTR(-EINVAL);
}
@@ -5579,12 +5592,13 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
btrfs_err(info, "invalid chunk type 0x%llx requested", type);
- ASSERT(0);
+ DEBUG_WARN();
return ERR_PTR(-EINVAL);
}
ctl.start = find_next_chunk(info);
ctl.type = type;
+ ctl.space_info = space_info;
init_alloc_chunk_ctl(fs_devices, &ctl);
devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
@@ -5728,7 +5742,9 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
struct btrfs_fs_info *fs_info = trans->fs_info;
u64 alloc_profile;
struct btrfs_block_group *meta_bg;
+ struct btrfs_space_info *meta_space_info;
struct btrfs_block_group *sys_bg;
+ struct btrfs_space_info *sys_space_info;
/*
* When adding a new device for sprouting, the seed device is read-only
@@ -5752,12 +5768,22 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
*/
alloc_profile = btrfs_metadata_alloc_profile(fs_info);
- meta_bg = btrfs_create_chunk(trans, alloc_profile);
+ meta_space_info = btrfs_find_space_info(fs_info, alloc_profile);
+ if (!meta_space_info) {
+ DEBUG_WARN();
+ return -EINVAL;
+ }
+ meta_bg = btrfs_create_chunk(trans, meta_space_info, alloc_profile);
if (IS_ERR(meta_bg))
return PTR_ERR(meta_bg);
alloc_profile = btrfs_system_alloc_profile(fs_info);
- sys_bg = btrfs_create_chunk(trans, alloc_profile);
+ sys_space_info = btrfs_find_space_info(fs_info, alloc_profile);
+ if (!sys_space_info) {
+ DEBUG_WARN();
+ return -EINVAL;
+ }
+ sys_bg = btrfs_create_chunk(trans, sys_space_info, alloc_profile);
if (IS_ERR(sys_bg))
return PTR_ERR(sys_bg);
@@ -5957,7 +5983,7 @@ static int btrfs_read_rr(const struct btrfs_chunk_map *map, int first, int num_s
static int find_live_mirror(struct btrfs_fs_info *fs_info,
struct btrfs_chunk_map *map, int first,
- int dev_replace_is_ongoing)
+ bool dev_replace_is_ongoing)
{
const enum btrfs_read_policy policy = READ_ONCE(fs_info->fs_devices->read_policy);
int i;
@@ -5966,8 +5992,8 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
int tolerance;
struct btrfs_device *srcdev;
- ASSERT((map->type &
- (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
+ ASSERT((map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)),
+ "type=%llu", map->type);
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
num_stripes = map->sub_stripes;
@@ -6268,7 +6294,7 @@ static void handle_ops_on_dev_replace(struct btrfs_io_context *bioc,
}
/* We can only have at most 2 extra nr_stripes (for DUP). */
- ASSERT(nr_extra_stripes <= 2);
+ ASSERT(nr_extra_stripes <= 2, "nr_extra_stripes=%d", nr_extra_stripes);
/*
* For GET_READ_MIRRORS, we can only return at most 1 extra stripe for
* replace.
@@ -6279,7 +6305,8 @@ static void handle_ops_on_dev_replace(struct btrfs_io_context *bioc,
struct btrfs_io_stripe *second = &bioc->stripes[num_stripes + 1];
/* Only DUP can have two extra stripes. */
- ASSERT(bioc->map_type & BTRFS_BLOCK_GROUP_DUP);
+ ASSERT(bioc->map_type & BTRFS_BLOCK_GROUP_DUP,
+ "map_type=%llu", bioc->map_type);
/*
* Swap the last stripe stripes and reduce @nr_extra_stripes.
@@ -6306,7 +6333,8 @@ static u64 btrfs_max_io_len(struct btrfs_chunk_map *map, u64 offset,
*/
io_geom->stripe_offset = offset & BTRFS_STRIPE_LEN_MASK;
io_geom->stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT;
- ASSERT(io_geom->stripe_offset < U32_MAX);
+ ASSERT(io_geom->stripe_offset < U32_MAX,
+ "stripe_offset=%llu", io_geom->stripe_offset);
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
unsigned long full_stripe_len =
@@ -6324,8 +6352,12 @@ static u64 btrfs_max_io_len(struct btrfs_chunk_map *map, u64 offset,
io_geom->raid56_full_stripe_start = btrfs_stripe_nr_to_offset(
rounddown(io_geom->stripe_nr, nr_data_stripes(map)));
- ASSERT(io_geom->raid56_full_stripe_start + full_stripe_len > offset);
- ASSERT(io_geom->raid56_full_stripe_start <= offset);
+ ASSERT(io_geom->raid56_full_stripe_start + full_stripe_len > offset,
+ "raid56_full_stripe_start=%llu full_stripe_len=%lu offset=%llu",
+ io_geom->raid56_full_stripe_start, full_stripe_len, offset);
+ ASSERT(io_geom->raid56_full_stripe_start <= offset,
+ "raid56_full_stripe_start=%llu offset=%llu",
+ io_geom->raid56_full_stripe_start, offset);
/*
* For writes to RAID56, allow to write a full stripe set, but
* no straddling of stripe sets.
@@ -6491,7 +6523,7 @@ static void map_blocks_raid56_read(struct btrfs_chunk_map *map,
{
int data_stripes = nr_data_stripes(map);
- ASSERT(io_geom->mirror_num <= 1);
+ ASSERT(io_geom->mirror_num <= 1, "mirror_num=%d", io_geom->mirror_num);
/* Just grab the data stripe directly. */
io_geom->stripe_index = io_geom->stripe_nr % data_stripes;
io_geom->stripe_nr /= data_stripes;
@@ -6559,7 +6591,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
int num_copies;
struct btrfs_io_context *bioc = NULL;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
- int dev_replace_is_ongoing = 0;
+ bool dev_replace_is_ongoing = false;
u16 num_alloc_stripes;
u64 max_len;
@@ -6864,7 +6896,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
atomic_set(&dev->dev_stats_ccnt, 0);
btrfs_device_data_ordered_init(dev);
- extent_io_tree_init(fs_info, &dev->alloc_state, IO_TREE_DEVICE_ALLOC_STATE);
+ btrfs_extent_io_tree_init(fs_info, &dev->alloc_state, IO_TREE_DEVICE_ALLOC_STATE);
if (devid)
tmp = *devid;
@@ -7836,7 +7868,7 @@ void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
{
struct btrfs_device *curr, *next;
- ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
+ ASSERT(trans->state == TRANS_STATE_COMMIT_DOING, "state=%d" , trans->state);
if (list_empty(&trans->dev_update_list))
return;
@@ -8205,7 +8237,7 @@ static void map_raid56_repair_block(struct btrfs_io_context *bioc,
logical < stripe_start + BTRFS_STRIPE_LEN)
break;
}
- ASSERT(i < data_stripes);
+ ASSERT(i < data_stripes, "i=%d data_stripes=%d", i, data_stripes);
smap->dev = bioc->stripes[i].dev;
smap->physical = bioc->stripes[i].physical +
((logical - bioc->full_stripe_logical) &
@@ -8234,7 +8266,7 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
int mirror_ret = mirror_num;
int ret;
- ASSERT(mirror_num > 0);
+ ASSERT(mirror_num > 0, "mirror_num=%d", mirror_num);
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length,
&bioc, smap, &mirror_ret);
@@ -8242,7 +8274,7 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
return ret;
/* The map range should not cross stripe boundary. */
- ASSERT(map_length >= length);
+ ASSERT(map_length >= length, "map_length=%llu length=%u", map_length, length);
/* Already mapped to single stripe. */
if (!bioc)
@@ -8254,7 +8286,8 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
goto out;
}
- ASSERT(mirror_num <= bioc->num_stripes);
+ ASSERT(mirror_num <= bioc->num_stripes,
+ "mirror_num=%d num_stripes=%d", mirror_num, bioc->num_stripes);
smap->dev = bioc->stripes[mirror_num - 1].dev;
smap->physical = bioc->stripes[mirror_num - 1].physical;
out:
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index e247d551da67..137cc232f58e 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -473,7 +473,6 @@ struct btrfs_io_stripe {
struct btrfs_device *dev;
/* Block mapping. */
u64 physical;
- u64 length;
bool rst_search_commit_root;
/* For the endio handler. */
struct btrfs_io_context *bioc;
@@ -715,7 +714,8 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
- u64 type);
+ struct btrfs_space_info *space_info,
+ u64 type);
void btrfs_mapping_tree_free(struct btrfs_fs_info *fs_info);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
blk_mode_t flags, void *holder);
@@ -786,6 +786,8 @@ struct btrfs_chunk_map *btrfs_find_chunk_map_nolock(struct btrfs_fs_info *fs_inf
struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
u64 logical, u64 length);
void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map);
+struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
+ int copy_num, bool drop_cache);
void btrfs_release_disk_super(struct btrfs_super_block *super);
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
@@ -847,6 +849,11 @@ static inline const char *btrfs_dev_name(const struct btrfs_device *device)
return rcu_str_deref(device->name);
}
+static inline void btrfs_warn_unknown_chunk_allocation(enum btrfs_chunk_allocation_policy pol)
+{
+ WARN_ONCE(1, "unknown allocation policy %d, fallback to regular", pol);
+}
+
void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
struct list_head * __attribute_const__ btrfs_get_fs_uuids(void);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 545f413d81fc..5292cd341f70 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -120,8 +120,6 @@ static int copy_data_into_buffer(struct address_space *mapping,
ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio);
if (ret < 0)
return ret;
- /* No large folio support yet. */
- ASSERT(!folio_test_large(folio));
offset = offset_in_folio(folio, cur);
copy_length = min(folio_size(folio) - offset,
@@ -205,7 +203,6 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
workspace->strm.next_in = workspace->buf;
workspace->strm.avail_in = copy_length;
} else {
- unsigned int pg_off;
unsigned int cur_len;
if (data_in) {
@@ -217,9 +214,9 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
start, &in_folio);
if (ret < 0)
goto out;
- pg_off = offset_in_page(start);
- cur_len = btrfs_calc_input_length(orig_end, start);
- data_in = kmap_local_folio(in_folio, pg_off);
+ cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
+ data_in = kmap_local_folio(in_folio,
+ offset_in_folio(in_folio, start));
start += cur_len;
workspace->strm.next_in = data_in;
workspace->strm.avail_in = cur_len;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 4a3e02b49f29..b5b0156d5b95 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -989,7 +989,7 @@ int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
}
/* All the zones are FULL. Should not reach here. */
- ASSERT(0);
+ DEBUG_WARN("unexpected state, all zones full");
return -EIO;
}
@@ -1797,12 +1797,12 @@ static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered,
ordered->disk_bytenr = logical;
write_lock(&em_tree->lock);
- em = search_extent_mapping(em_tree, ordered->file_offset,
- ordered->num_bytes);
+ em = btrfs_search_extent_mapping(em_tree, ordered->file_offset,
+ ordered->num_bytes);
/* The em should be a new COW extent, thus it should not have an offset. */
ASSERT(em->offset == 0);
em->disk_bytenr = logical;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
write_unlock(&em_tree->lock);
}
@@ -1812,8 +1812,8 @@ static bool btrfs_zoned_split_ordered(struct btrfs_ordered_extent *ordered,
struct btrfs_ordered_extent *new;
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
- split_extent_map(ordered->inode, ordered->file_offset,
- ordered->num_bytes, len, logical))
+ btrfs_split_extent_map(ordered->inode, ordered->file_offset,
+ ordered->num_bytes, len, logical))
return false;
new = btrfs_split_ordered_extent(ordered, len);
@@ -2171,27 +2171,15 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
const u64 end = block_group->start + block_group->length;
- struct radix_tree_iter iter;
struct extent_buffer *eb;
- void __rcu **slot;
+ unsigned long index, start = (block_group->start >> fs_info->sectorsize_bits);
rcu_read_lock();
- radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter,
- block_group->start >> fs_info->sectorsize_bits) {
- eb = radix_tree_deref_slot(slot);
- if (!eb)
- continue;
- if (radix_tree_deref_retry(eb)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
-
+ xa_for_each_start(&fs_info->buffer_tree, index, eb, start) {
if (eb->start < block_group->start)
continue;
if (eb->start >= end)
break;
-
- slot = radix_tree_iter_resume(slot, &iter);
rcu_read_unlock();
wait_on_extent_buffer_writeback(eb);
rcu_read_lock();
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 3541efa765c7..4a796a049b5a 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -24,7 +24,7 @@
#include "super.h"
#define ZSTD_BTRFS_MAX_WINDOWLOG 17
-#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
+#define ZSTD_BTRFS_MAX_INPUT (1U << ZSTD_BTRFS_MAX_WINDOWLOG)
#define ZSTD_BTRFS_DEFAULT_LEVEL 3
#define ZSTD_BTRFS_MIN_LEVEL -15
#define ZSTD_BTRFS_MAX_LEVEL 15
@@ -426,8 +426,8 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
if (ret < 0)
goto out;
- cur_len = btrfs_calc_input_length(orig_end, start);
- workspace->in_buf.src = kmap_local_folio(in_folio, offset_in_page(start));
+ cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
+ workspace->in_buf.src = kmap_local_folio(in_folio, offset_in_folio(in_folio, start));
workspace->in_buf.pos = 0;
workspace->in_buf.size = cur_len;
@@ -511,9 +511,9 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
if (ret < 0)
goto out;
- cur_len = btrfs_calc_input_length(orig_end, start);
+ cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
workspace->in_buf.src = kmap_local_folio(in_folio,
- offset_in_page(start));
+ offset_in_folio(in_folio, start));
workspace->in_buf.pos = 0;
workspace->in_buf.size = cur_len;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 7be23ff20b27..8cf4a1dc481e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -297,7 +297,6 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
still_busy:
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
- return;
}
struct postprocess_bh_ctx {
@@ -422,7 +421,6 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
still_busy:
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
- return;
}
/*
@@ -1122,6 +1120,8 @@ static struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
+ bool blocking = gfpflags_allow_blocking(gfp);
+
/* Size must be multiple of hard sectorsize */
if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
(size < 512 || size > PAGE_SIZE))) {
@@ -1137,12 +1137,15 @@ __getblk_slow(struct block_device *bdev, sector_t block,
for (;;) {
struct buffer_head *bh;
- bh = __find_get_block(bdev, block, size);
- if (bh)
- return bh;
-
if (!grow_buffers(bdev, block, size, gfp))
return NULL;
+
+ if (blocking)
+ bh = __find_get_block_nonatomic(bdev, block, size);
+ else
+ bh = __find_get_block(bdev, block, size);
+ if (bh)
+ return bh;
}
}
@@ -1220,10 +1223,8 @@ void mark_buffer_write_io_error(struct buffer_head *bh)
/* FIXME: do we need to set this in both places? */
if (bh->b_folio && bh->b_folio->mapping)
mapping_set_error(bh->b_folio->mapping, -EIO);
- if (bh->b_assoc_map) {
+ if (bh->b_assoc_map)
mapping_set_error(bh->b_assoc_map, -EIO);
- errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO);
- }
}
EXPORT_SYMBOL(mark_buffer_write_io_error);
@@ -1613,8 +1614,8 @@ static void discard_buffer(struct buffer_head * bh)
bh->b_bdev = NULL;
b_state = READ_ONCE(bh->b_state);
do {
- } while (!try_cmpxchg(&bh->b_state, &b_state,
- b_state & ~BUFFER_FLAGS_DISCARD));
+ } while (!try_cmpxchg_relaxed(&bh->b_state, &b_state,
+ b_state & ~BUFFER_FLAGS_DISCARD));
unlock_buffer(bh);
}
@@ -1679,7 +1680,6 @@ void block_invalidate_folio(struct folio *folio, size_t offset, size_t length)
filemap_release_folio(folio, 0);
out:
folio_clear_mappedtodisk(folio);
- return;
}
EXPORT_SYMBOL(block_invalidate_folio);
@@ -2730,7 +2730,7 @@ unlock:
EXPORT_SYMBOL(block_truncate_page);
/*
- * The generic ->writepage function for buffer-backed address_spaces
+ * The generic write folio function for buffer-backed address_spaces
*/
int block_write_full_folio(struct folio *folio, struct writeback_control *wbc,
void *get_block)
@@ -2750,7 +2750,7 @@ int block_write_full_folio(struct folio *folio, struct writeback_control *wbc,
/*
* The folio straddles i_size. It must be zeroed out on each and every
- * writepage invocation because it may be mmapped. "A file is mapped
+ * writeback invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 38c236e38cef..b62cd3e9a18e 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -71,7 +71,6 @@ struct cachefiles_object {
int debug_id;
spinlock_t lock;
refcount_t ref;
- u8 d_name_len; /* Length of filename */
enum cachefiles_content content_info:8; /* Info about content presence */
unsigned long flags;
#define CACHEFILES_OBJECT_USING_TMPFILE 0 /* Have an unlinked tmpfile */
diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c
index b48525680e73..aae86af48ed5 100644
--- a/fs/cachefiles/key.c
+++ b/fs/cachefiles/key.c
@@ -132,7 +132,6 @@ bool cachefiles_cook_key(struct cachefiles_object *object)
success:
name[len] = 0;
object->d_name = name;
- object->d_name_len = len;
_leave(" = %s", object->d_name);
return true;
}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 14d0cc894000..aecfc5c37b49 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -98,7 +98,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
retry:
ret = cachefiles_inject_read_error();
if (ret == 0)
- subdir = lookup_one_len(dirname, dir, strlen(dirname));
+ subdir = lookup_one(&nop_mnt_idmap, &QSTR(dirname), dir);
else
subdir = ERR_PTR(ret);
trace_cachefiles_lookup(NULL, dir, subdir);
@@ -338,7 +338,7 @@ try_again:
return -EIO;
}
- grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer));
+ grave = lookup_one(&nop_mnt_idmap, &QSTR(nbuffer), cache->graveyard);
if (IS_ERR(grave)) {
unlock_rename(cache->graveyard, dir);
trace_cachefiles_vfs_error(object, d_inode(cache->graveyard),
@@ -630,8 +630,8 @@ bool cachefiles_look_up_object(struct cachefiles_object *object)
/* Look up path "cache/vol/fanout/file". */
ret = cachefiles_inject_read_error();
if (ret == 0)
- dentry = lookup_positive_unlocked(object->d_name, fan,
- object->d_name_len);
+ dentry = lookup_one_positive_unlocked(&nop_mnt_idmap,
+ &QSTR(object->d_name), fan);
else
dentry = ERR_PTR(ret);
trace_cachefiles_lookup(object, fan, dentry);
@@ -683,7 +683,7 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
ret = cachefiles_inject_read_error();
if (ret == 0)
- dentry = lookup_one_len(object->d_name, fan, object->d_name_len);
+ dentry = lookup_one(&nop_mnt_idmap, &QSTR(object->d_name), fan);
else
dentry = ERR_PTR(ret);
if (IS_ERR(dentry)) {
@@ -702,7 +702,7 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
dput(dentry);
ret = cachefiles_inject_read_error();
if (ret == 0)
- dentry = lookup_one_len(object->d_name, fan, object->d_name_len);
+ dentry = lookup_one(&nop_mnt_idmap, &QSTR(object->d_name), fan);
else
dentry = ERR_PTR(ret);
if (IS_ERR(dentry)) {
@@ -751,7 +751,7 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
- victim = lookup_one_len(filename, dir, strlen(filename));
+ victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir);
if (IS_ERR(victim))
goto lookup_error;
if (d_is_negative(victim))
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5568cb74b322..ebf32822e29b 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -619,7 +619,7 @@ static int populate_attrs(struct config_item *item)
break;
}
}
- if (t->ct_bin_attrs) {
+ if (!error && t->ct_bin_attrs) {
for (i = 0; (bin_attr = t->ct_bin_attrs[i]) != NULL; i++) {
if (ops && ops->is_bin_visible && !ops->is_bin_visible(item, bin_attr, i))
continue;
@@ -970,7 +970,7 @@ static void configfs_dump_one(struct configfs_dirent *sd, int level)
{
pr_info("%*s\"%s\":\n", level, " ", configfs_get_name(sd));
-#define type_print(_type) if (sd->s_type & _type) pr_info("%*s %s\n", level, " ", #_type);
+#define type_print(_type) if (sd->s_type & _type) pr_info("%*s %s\n", level, " ", #_type)
type_print(CONFIGFS_ROOT);
type_print(CONFIGFS_DIR);
type_print(CONFIGFS_ITEM_ATTR);
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 254170a82aa3..c378b5cbf87d 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -66,7 +66,7 @@ int config_item_set_name(struct config_item *item, const char *fmt, ...)
name = kvasprintf(GFP_KERNEL, fmt, args);
va_end(args);
if (!name)
- return -EFAULT;
+ return -ENOMEM;
}
/* Free the old name, if necessary. */
diff --git a/fs/coredump.c b/fs/coredump.c
index c33c177a701b..f217ebf2b3b6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -43,6 +43,14 @@
#include <linux/timekeeping.h>
#include <linux/sysctl.h>
#include <linux/elf.h>
+#include <linux/pidfs.h>
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <net/af_unix.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <uapi/linux/pidfd.h>
+#include <uapi/linux/un.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -60,6 +68,12 @@ static void free_vma_snapshot(struct coredump_params *cprm);
#define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024)
/* Define a reasonable max cap */
#define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024)
+/*
+ * File descriptor number for the pidfd for the thread-group leader of
+ * the coredumping task installed into the usermode helper's file
+ * descriptor table.
+ */
+#define COREDUMP_PIDFD_NUMBER 3
static int core_uses_pid;
static unsigned int core_pipe_limit;
@@ -68,9 +82,16 @@ static char core_pattern[CORENAME_MAX_SIZE] = "core";
static int core_name_size = CORENAME_MAX_SIZE;
unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT;
+enum coredump_type_t {
+ COREDUMP_FILE = 1,
+ COREDUMP_PIPE = 2,
+ COREDUMP_SOCK = 3,
+};
+
struct core_name {
char *corename;
int used, size;
+ enum coredump_type_t core_type;
};
static int expand_corename(struct core_name *cn, int size)
@@ -210,18 +231,24 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
{
const struct cred *cred = current_cred();
const char *pat_ptr = core_pattern;
- int ispipe = (*pat_ptr == '|');
bool was_space = false;
int pid_in_pattern = 0;
int err = 0;
cn->used = 0;
cn->corename = NULL;
+ if (*pat_ptr == '|')
+ cn->core_type = COREDUMP_PIPE;
+ else if (*pat_ptr == '@')
+ cn->core_type = COREDUMP_SOCK;
+ else
+ cn->core_type = COREDUMP_FILE;
if (expand_corename(cn, core_name_size))
return -ENOMEM;
cn->corename[0] = '\0';
- if (ispipe) {
+ switch (cn->core_type) {
+ case COREDUMP_PIPE: {
int argvs = sizeof(core_pattern) / 2;
(*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
if (!(*argv))
@@ -230,6 +257,45 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
++pat_ptr;
if (!(*pat_ptr))
return -ENOMEM;
+ break;
+ }
+ case COREDUMP_SOCK: {
+ /* skip the @ */
+ pat_ptr++;
+ if (!(*pat_ptr))
+ return -ENOMEM;
+
+ err = cn_printf(cn, "%s", pat_ptr);
+ if (err)
+ return err;
+
+ /* Require absolute paths. */
+ if (cn->corename[0] != '/')
+ return -EINVAL;
+
+ /*
+ * Ensure we can uses spaces to indicate additional
+ * parameters in the future.
+ */
+ if (strchr(cn->corename, ' ')) {
+ coredump_report_failure("Coredump socket may not %s contain spaces", cn->corename);
+ return -EINVAL;
+ }
+
+ /*
+ * Currently no need to parse any other options.
+ * Relevant information can be retrieved from the peer
+ * pidfd retrievable via SO_PEERPIDFD by the receiver or
+ * via /proc/<pid>, using the SO_PEERPIDFD to guard
+ * against pid recycling when opening /proc/<pid>.
+ */
+ return 0;
+ }
+ case COREDUMP_FILE:
+ break;
+ default:
+ WARN_ON_ONCE(true);
+ return -EINVAL;
}
/* Repeat as long as we have more pattern to process and more output
@@ -239,7 +305,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
* Split on spaces before doing template expansion so that
* %e and %E don't get split if they have spaces in them
*/
- if (ispipe) {
+ if (cn->core_type == COREDUMP_PIPE) {
if (isspace(*pat_ptr)) {
if (cn->used != 0)
was_space = true;
@@ -339,6 +405,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
case 'C':
err = cn_printf(cn, "%d", cprm->cpu);
break;
+ /* pidfd number */
+ case 'F': {
+ /*
+ * Installing a pidfd only makes sense if
+ * we actually spawn a usermode helper.
+ */
+ if (cn->core_type != COREDUMP_PIPE)
+ break;
+
+ /*
+ * Note that we'll install a pidfd for the
+ * thread-group leader. We know that task
+ * linkage hasn't been removed yet and even if
+ * this @current isn't the actual thread-group
+ * leader we know that the thread-group leader
+ * cannot be reaped until @current has exited.
+ */
+ cprm->pid = task_tgid(current);
+ err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER);
+ break;
+ }
default:
break;
}
@@ -355,12 +442,10 @@ out:
* If core_pattern does not include a %p (as is the default)
* and core_uses_pid is set, then .%pid will be appended to
* the filename. Do not do this for piped commands. */
- if (!ispipe && !pid_in_pattern && core_uses_pid) {
- err = cn_printf(cn, ".%d", task_tgid_vnr(current));
- if (err)
- return err;
- }
- return ispipe;
+ if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid)
+ return cn_printf(cn, ".%d", task_tgid_vnr(current));
+
+ return 0;
}
static int zap_process(struct signal_struct *signal, int exit_code)
@@ -493,7 +578,7 @@ static void wait_for_dump_helpers(struct file *file)
}
/*
- * umh_pipe_setup
+ * umh_coredump_setup
* helper function to customize the process used
* to collect the core in userspace. Specifically
* it sets up a pipe and installs it as fd 0 (stdin)
@@ -503,11 +588,34 @@ static void wait_for_dump_helpers(struct file *file)
* is a special value that we use to trap recursive
* core dumps
*/
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
{
struct file *files[2];
struct coredump_params *cp = (struct coredump_params *)info->data;
- int err = create_pipe_files(files, 0);
+ int err;
+
+ if (cp->pid) {
+ struct file *pidfs_file __free(fput) = NULL;
+
+ pidfs_file = pidfs_alloc_file(cp->pid, 0);
+ if (IS_ERR(pidfs_file))
+ return PTR_ERR(pidfs_file);
+
+ pidfs_coredump(cp);
+
+ /*
+ * Usermode helpers are childen of either
+ * system_unbound_wq or of kthreadd. So we know that
+ * we're starting off with a clean file descriptor
+ * table. So we should always be able to use
+ * COREDUMP_PIDFD_NUMBER as our file descriptor value.
+ */
+ err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0);
+ if (err < 0)
+ return err;
+ }
+
+ err = create_pipe_files(files, 0);
if (err)
return err;
@@ -515,10 +623,13 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
err = replace_fd(0, files[0], 0);
fput(files[0]);
+ if (err < 0)
+ return err;
+
/* and disallow core files too */
current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
- return err;
+ return 0;
}
void do_coredump(const kernel_siginfo_t *siginfo)
@@ -530,7 +641,6 @@ void do_coredump(const kernel_siginfo_t *siginfo)
const struct cred *old_cred;
struct cred *cred;
int retval = 0;
- int ispipe;
size_t *argv = NULL;
int argc = 0;
/* require nonrelative corefile path and be extra careful */
@@ -579,70 +689,14 @@ void do_coredump(const kernel_siginfo_t *siginfo)
old_cred = override_creds(cred);
- ispipe = format_corename(&cn, &cprm, &argv, &argc);
-
- if (ispipe) {
- int argi;
- int dump_count;
- char **helper_argv;
- struct subprocess_info *sub_info;
-
- if (ispipe < 0) {
- coredump_report_failure("format_corename failed, aborting core");
- goto fail_unlock;
- }
-
- if (cprm.limit == 1) {
- /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
- *
- * Normally core limits are irrelevant to pipes, since
- * we're not writing to the file system, but we use
- * cprm.limit of 1 here as a special value, this is a
- * consistent way to catch recursive crashes.
- * We can still crash if the core_pattern binary sets
- * RLIM_CORE = !1, but it runs as root, and can do
- * lots of stupid things.
- *
- * Note that we use task_tgid_vnr here to grab the pid
- * of the process group leader. That way we get the
- * right pid if a thread in a multi-threaded
- * core_pattern process dies.
- */
- coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
- goto fail_unlock;
- }
- cprm.limit = RLIM_INFINITY;
-
- dump_count = atomic_inc_return(&core_dump_count);
- if (core_pipe_limit && (core_pipe_limit < dump_count)) {
- coredump_report_failure("over core_pipe_limit, skipping core dump");
- goto fail_dropcount;
- }
-
- helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
- GFP_KERNEL);
- if (!helper_argv) {
- coredump_report_failure("%s failed to allocate memory", __func__);
- goto fail_dropcount;
- }
- for (argi = 0; argi < argc; argi++)
- helper_argv[argi] = cn.corename + argv[argi];
- helper_argv[argi] = NULL;
-
- retval = -ENOMEM;
- sub_info = call_usermodehelper_setup(helper_argv[0],
- helper_argv, NULL, GFP_KERNEL,
- umh_pipe_setup, NULL, &cprm);
- if (sub_info)
- retval = call_usermodehelper_exec(sub_info,
- UMH_WAIT_EXEC);
+ retval = format_corename(&cn, &cprm, &argv, &argc);
+ if (retval < 0) {
+ coredump_report_failure("format_corename failed, aborting core");
+ goto fail_unlock;
+ }
- kfree(helper_argv);
- if (retval) {
- coredump_report_failure("|%s pipe failed", cn.corename);
- goto close_fail;
- }
- } else {
+ switch (cn.core_type) {
+ case COREDUMP_FILE: {
struct mnt_idmap *idmap;
struct inode *inode;
int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW |
@@ -736,6 +790,143 @@ void do_coredump(const kernel_siginfo_t *siginfo)
if (do_truncate(idmap, cprm.file->f_path.dentry,
0, 0, cprm.file))
goto close_fail;
+ break;
+ }
+ case COREDUMP_PIPE: {
+ int argi;
+ int dump_count;
+ char **helper_argv;
+ struct subprocess_info *sub_info;
+
+ if (cprm.limit == 1) {
+ /* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
+ *
+ * Normally core limits are irrelevant to pipes, since
+ * we're not writing to the file system, but we use
+ * cprm.limit of 1 here as a special value, this is a
+ * consistent way to catch recursive crashes.
+ * We can still crash if the core_pattern binary sets
+ * RLIM_CORE = !1, but it runs as root, and can do
+ * lots of stupid things.
+ *
+ * Note that we use task_tgid_vnr here to grab the pid
+ * of the process group leader. That way we get the
+ * right pid if a thread in a multi-threaded
+ * core_pattern process dies.
+ */
+ coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
+ goto fail_unlock;
+ }
+ cprm.limit = RLIM_INFINITY;
+
+ dump_count = atomic_inc_return(&core_dump_count);
+ if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+ coredump_report_failure("over core_pipe_limit, skipping core dump");
+ goto fail_dropcount;
+ }
+
+ helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
+ GFP_KERNEL);
+ if (!helper_argv) {
+ coredump_report_failure("%s failed to allocate memory", __func__);
+ goto fail_dropcount;
+ }
+ for (argi = 0; argi < argc; argi++)
+ helper_argv[argi] = cn.corename + argv[argi];
+ helper_argv[argi] = NULL;
+
+ retval = -ENOMEM;
+ sub_info = call_usermodehelper_setup(helper_argv[0],
+ helper_argv, NULL, GFP_KERNEL,
+ umh_coredump_setup, NULL, &cprm);
+ if (sub_info)
+ retval = call_usermodehelper_exec(sub_info,
+ UMH_WAIT_EXEC);
+
+ kfree(helper_argv);
+ if (retval) {
+ coredump_report_failure("|%s pipe failed", cn.corename);
+ goto close_fail;
+ }
+ break;
+ }
+ case COREDUMP_SOCK: {
+#ifdef CONFIG_UNIX
+ struct file *file __free(fput) = NULL;
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ ssize_t addr_len;
+ struct socket *socket;
+
+ addr_len = strscpy(addr.sun_path, cn.corename);
+ if (addr_len < 0)
+ goto close_fail;
+ addr_len += offsetof(struct sockaddr_un, sun_path) + 1;
+
+ /*
+ * It is possible that the userspace process which is
+ * supposed to handle the coredump and is listening on
+ * the AF_UNIX socket coredumps. Userspace should just
+ * mark itself non dumpable.
+ */
+
+ retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket);
+ if (retval < 0)
+ goto close_fail;
+
+ file = sock_alloc_file(socket, 0, NULL);
+ if (IS_ERR(file))
+ goto close_fail;
+
+ /*
+ * Set the thread-group leader pid which is used for the
+ * peer credentials during connect() below. Then
+ * immediately register it in pidfs...
+ */
+ cprm.pid = task_tgid(current);
+ retval = pidfs_register_pid(cprm.pid);
+ if (retval)
+ goto close_fail;
+
+ /*
+ * ... and set the coredump information so userspace
+ * has it available after connect()...
+ */
+ pidfs_coredump(&cprm);
+
+ retval = kernel_connect(socket, (struct sockaddr *)(&addr),
+ addr_len, O_NONBLOCK | SOCK_COREDUMP);
+
+ /*
+ * ... Make sure to only put our reference after connect() took
+ * its own reference keeping the pidfs entry alive ...
+ */
+ pidfs_put_pid(cprm.pid);
+
+ if (retval) {
+ if (retval == -EAGAIN)
+ coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path);
+ else
+ coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval);
+ goto close_fail;
+ }
+
+ /* ... and validate that @sk_peer_pid matches @cprm.pid. */
+ if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm.pid))
+ goto close_fail;
+
+ cprm.limit = RLIM_INFINITY;
+ cprm.file = no_free_ptr(file);
+#else
+ coredump_report_failure("Core dump socket support %s disabled", cn.corename);
+ goto close_fail;
+#endif
+ break;
+ }
+ default:
+ WARN_ON_ONCE(true);
+ goto close_fail;
}
/* get us an unshared descriptor table; almost always a no-op */
@@ -770,13 +961,49 @@ void do_coredump(const kernel_siginfo_t *siginfo)
file_end_write(cprm.file);
free_vma_snapshot(&cprm);
}
- if (ispipe && core_pipe_limit)
- wait_for_dump_helpers(cprm.file);
+
+#ifdef CONFIG_UNIX
+ /* Let userspace know we're done processing the coredump. */
+ if (sock_from_file(cprm.file))
+ kernel_sock_shutdown(sock_from_file(cprm.file), SHUT_WR);
+#endif
+
+ /*
+ * When core_pipe_limit is set we wait for the coredump server
+ * or usermodehelper to finish before exiting so it can e.g.,
+ * inspect /proc/<pid>.
+ */
+ if (core_pipe_limit) {
+ switch (cn.core_type) {
+ case COREDUMP_PIPE:
+ wait_for_dump_helpers(cprm.file);
+ break;
+#ifdef CONFIG_UNIX
+ case COREDUMP_SOCK: {
+ ssize_t n;
+
+ /*
+ * We use a simple read to wait for the coredump
+ * processing to finish. Either the socket is
+ * closed or we get sent unexpected data. In
+ * both cases, we're done.
+ */
+ n = __kernel_read(cprm.file, &(char){ 0 }, 1, NULL);
+ if (n != 0)
+ coredump_report_failure("Unexpected data on coredump socket");
+ break;
+ }
+#endif
+ default:
+ break;
+ }
+ }
+
close_fail:
if (cprm.file)
filp_close(cprm.file, NULL);
fail_dropcount:
- if (ispipe)
+ if (cn.core_type == COREDUMP_PIPE)
atomic_dec(&core_dump_count);
fail_unlock:
kfree(argv);
@@ -799,10 +1026,9 @@ static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr)
struct file *file = cprm->file;
loff_t pos = file->f_pos;
ssize_t n;
+
if (cprm->written + nr > cprm->limit)
return 0;
-
-
if (dump_interrupted())
return 0;
n = __kernel_write(file, addr, nr, &pos);
@@ -819,20 +1045,21 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr)
{
static char zeroes[PAGE_SIZE];
struct file *file = cprm->file;
+
if (file->f_mode & FMODE_LSEEK) {
- if (dump_interrupted() ||
- vfs_llseek(file, nr, SEEK_CUR) < 0)
+ if (dump_interrupted() || vfs_llseek(file, nr, SEEK_CUR) < 0)
return 0;
cprm->pos += nr;
return 1;
- } else {
- while (nr > PAGE_SIZE) {
- if (!__dump_emit(cprm, zeroes, PAGE_SIZE))
- return 0;
- nr -= PAGE_SIZE;
- }
- return __dump_emit(cprm, zeroes, nr);
}
+
+ while (nr > PAGE_SIZE) {
+ if (!__dump_emit(cprm, zeroes, PAGE_SIZE))
+ return 0;
+ nr -= PAGE_SIZE;
+ }
+
+ return __dump_emit(cprm, zeroes, nr);
}
int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
@@ -1001,7 +1228,7 @@ EXPORT_SYMBOL(dump_align);
void validate_coredump_safety(void)
{
if (suid_dumpable == SUID_DUMP_ROOT &&
- core_pattern[0] != '/' && core_pattern[0] != '|') {
+ core_pattern[0] != '/' && core_pattern[0] != '|' && core_pattern[0] != '@') {
coredump_report_failure("Unsafe core_pattern used with fs.suid_dumpable=2: "
"pipe handler or fully qualified core dump path required. "
@@ -1009,18 +1236,55 @@ void validate_coredump_safety(void)
}
}
+static inline bool check_coredump_socket(void)
+{
+ if (core_pattern[0] != '@')
+ return true;
+
+ /*
+ * Coredump socket must be located in the initial mount
+ * namespace. Don't give the impression that anything else is
+ * supported right now.
+ */
+ if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns)
+ return false;
+
+ /* Must be an absolute path. */
+ if (*(core_pattern + 1) != '/')
+ return false;
+
+ return true;
+}
+
static int proc_dostring_coredump(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- int error = proc_dostring(table, write, buffer, lenp, ppos);
+ int error;
+ ssize_t retval;
+ char old_core_pattern[CORENAME_MAX_SIZE];
+
+ retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE);
+
+ error = proc_dostring(table, write, buffer, lenp, ppos);
+ if (error)
+ return error;
+ if (!check_coredump_socket()) {
+ strscpy(core_pattern, old_core_pattern, retval + 1);
+ return -EINVAL;
+ }
- if (!error)
- validate_coredump_safety();
+ validate_coredump_safety();
return error;
}
static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX;
+static char core_modes[] = {
+ "file\npipe"
+#ifdef CONFIG_UNIX
+ "\nsocket"
+#endif
+};
static const struct ctl_table coredump_sysctls[] = {
{
@@ -1064,6 +1328,13 @@ static const struct ctl_table coredump_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ {
+ .procname = "core_modes",
+ .data = core_modes,
+ .maxlen = sizeof(core_modes) - 1,
+ .mode = 0444,
+ .proc_handler = proc_dostring,
+ },
};
static int __init init_fs_coredump_sysctls(void)
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 8371e4e1f596..c1d92074b65c 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -12,6 +12,7 @@
#define _FSCRYPT_PRIVATE_H
#include <linux/fscrypt.h>
+#include <linux/minmax.h>
#include <linux/siphash.h>
#include <crypto/hash.h>
#include <linux/blk-crypto.h>
@@ -27,6 +28,23 @@
*/
#define FSCRYPT_MIN_KEY_SIZE 16
+/* Maximum size of a raw fscrypt master key */
+#define FSCRYPT_MAX_RAW_KEY_SIZE 64
+
+/* Maximum size of a hardware-wrapped fscrypt master key */
+#define FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE
+
+/* Maximum size of an fscrypt master key across both key types */
+#define FSCRYPT_MAX_ANY_KEY_SIZE \
+ MAX(FSCRYPT_MAX_RAW_KEY_SIZE, FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE)
+
+/*
+ * FSCRYPT_MAX_KEY_SIZE is defined in the UAPI header, but the addition of
+ * hardware-wrapped keys has made it misleading as it's only for raw keys.
+ * Don't use it in kernel code; use one of the above constants instead.
+ */
+#undef FSCRYPT_MAX_KEY_SIZE
+
#define FSCRYPT_CONTEXT_V1 1
#define FSCRYPT_CONTEXT_V2 2
@@ -360,13 +378,15 @@ int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key,
* outputs are unique and cryptographically isolated, i.e. knowledge of one
* output doesn't reveal another.
*/
-#define HKDF_CONTEXT_KEY_IDENTIFIER 1 /* info=<empty> */
+#define HKDF_CONTEXT_KEY_IDENTIFIER_FOR_RAW_KEY 1 /* info=<empty> */
#define HKDF_CONTEXT_PER_FILE_ENC_KEY 2 /* info=file_nonce */
#define HKDF_CONTEXT_DIRECT_KEY 3 /* info=mode_num */
#define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 /* info=mode_num||fs_uuid */
#define HKDF_CONTEXT_DIRHASH_KEY 5 /* info=file_nonce */
#define HKDF_CONTEXT_IV_INO_LBLK_32_KEY 6 /* info=mode_num||fs_uuid */
#define HKDF_CONTEXT_INODE_HASH_KEY 7 /* info=<empty> */
+#define HKDF_CONTEXT_KEY_IDENTIFIER_FOR_HW_WRAPPED_KEY \
+ 8 /* info=<empty> */
int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context,
const u8 *info, unsigned int infolen,
@@ -376,7 +396,8 @@ void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf);
/* inline_crypt.c */
#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
-int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci);
+int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci,
+ bool is_hw_wrapped_key);
static inline bool
fscrypt_using_inline_encryption(const struct fscrypt_inode_info *ci)
@@ -385,12 +406,17 @@ fscrypt_using_inline_encryption(const struct fscrypt_inode_info *ci)
}
int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
- const u8 *raw_key,
+ const u8 *key_bytes, size_t key_size,
+ bool is_hw_wrapped,
const struct fscrypt_inode_info *ci);
void fscrypt_destroy_inline_crypt_key(struct super_block *sb,
struct fscrypt_prepared_key *prep_key);
+int fscrypt_derive_sw_secret(struct super_block *sb,
+ const u8 *wrapped_key, size_t wrapped_key_size,
+ u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE]);
+
/*
* Check whether the crypto transform or blk-crypto key has been allocated in
* @prep_key, depending on which encryption implementation the file will use.
@@ -414,7 +440,8 @@ fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key,
#else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
-static inline int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci)
+static inline int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci,
+ bool is_hw_wrapped_key)
{
return 0;
}
@@ -427,7 +454,8 @@ fscrypt_using_inline_encryption(const struct fscrypt_inode_info *ci)
static inline int
fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
- const u8 *raw_key,
+ const u8 *key_bytes, size_t key_size,
+ bool is_hw_wrapped,
const struct fscrypt_inode_info *ci)
{
WARN_ON_ONCE(1);
@@ -440,6 +468,15 @@ fscrypt_destroy_inline_crypt_key(struct super_block *sb,
{
}
+static inline int
+fscrypt_derive_sw_secret(struct super_block *sb,
+ const u8 *wrapped_key, size_t wrapped_key_size,
+ u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE])
+{
+ fscrypt_warn(NULL, "kernel doesn't support hardware-wrapped keys");
+ return -EOPNOTSUPP;
+}
+
static inline bool
fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key,
const struct fscrypt_inode_info *ci)
@@ -456,20 +493,38 @@ fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key,
struct fscrypt_master_key_secret {
/*
- * For v2 policy keys: HKDF context keyed by this master key.
- * For v1 policy keys: not set (hkdf.hmac_tfm == NULL).
+ * The KDF with which subkeys of this key can be derived.
+ *
+ * For v1 policy keys, this isn't applicable and won't be set.
+ * Otherwise, this KDF will be keyed by this master key if
+ * ->is_hw_wrapped=false, or by the "software secret" that hardware
+ * derived from this master key if ->is_hw_wrapped=true.
*/
struct fscrypt_hkdf hkdf;
/*
- * Size of the raw key in bytes. This remains set even if ->raw was
+ * True if this key is a hardware-wrapped key; false if this key is a
+ * raw key (i.e. a "software key"). For v1 policy keys this will always
+ * be false, as v1 policy support is a legacy feature which doesn't
+ * support newer functionality such as hardware-wrapped keys.
+ */
+ bool is_hw_wrapped;
+
+ /*
+ * Size of the key in bytes. This remains set even if ->bytes was
* zeroized due to no longer being needed. I.e. we still remember the
* size of the key even if we don't need to remember the key itself.
*/
u32 size;
- /* For v1 policy keys: the raw key. Wiped for v2 policy keys. */
- u8 raw[FSCRYPT_MAX_KEY_SIZE];
+ /*
+ * The bytes of the key, when still needed. This can be either a raw
+ * key or a hardware-wrapped key, as indicated by ->is_hw_wrapped. In
+ * the case of a raw, v2 policy key, there is no need to remember the
+ * actual key separately from ->hkdf so this field will be zeroized as
+ * soon as ->hkdf is initialized.
+ */
+ u8 bytes[FSCRYPT_MAX_ANY_KEY_SIZE];
} __randomize_layout;
diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c
index 855a0f4b7318..0f3028adc9c7 100644
--- a/fs/crypto/hkdf.c
+++ b/fs/crypto/hkdf.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * This is used to derive keys from the fscrypt master keys.
+ * This is used to derive keys from the fscrypt master keys (or from the
+ * "software secrets" which hardware derives from the fscrypt master keys, in
+ * the case that the fscrypt master keys are hardware-wrapped keys).
*
* Copyright 2019 Google LLC
*/
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index 7fa53d30aec3..1d008c440cb6 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -89,7 +89,8 @@ static void fscrypt_log_blk_crypto_impl(struct fscrypt_mode *mode,
}
/* Enable inline encryption for this file if supported. */
-int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci)
+int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci,
+ bool is_hw_wrapped_key)
{
const struct inode *inode = ci->ci_inode;
struct super_block *sb = inode->i_sb;
@@ -130,7 +131,8 @@ int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci)
crypto_cfg.crypto_mode = ci->ci_mode->blk_crypto_mode;
crypto_cfg.data_unit_size = 1U << ci->ci_data_unit_bits;
crypto_cfg.dun_bytes = fscrypt_get_dun_bytes(ci);
- crypto_cfg.key_type = BLK_CRYPTO_KEY_TYPE_RAW;
+ crypto_cfg.key_type = is_hw_wrapped_key ?
+ BLK_CRYPTO_KEY_TYPE_HW_WRAPPED : BLK_CRYPTO_KEY_TYPE_RAW;
devs = fscrypt_get_devices(sb, &num_devs);
if (IS_ERR(devs))
@@ -151,12 +153,15 @@ out_free_devs:
}
int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
- const u8 *raw_key,
+ const u8 *key_bytes, size_t key_size,
+ bool is_hw_wrapped,
const struct fscrypt_inode_info *ci)
{
const struct inode *inode = ci->ci_inode;
struct super_block *sb = inode->i_sb;
enum blk_crypto_mode_num crypto_mode = ci->ci_mode->blk_crypto_mode;
+ enum blk_crypto_key_type key_type = is_hw_wrapped ?
+ BLK_CRYPTO_KEY_TYPE_HW_WRAPPED : BLK_CRYPTO_KEY_TYPE_RAW;
struct blk_crypto_key *blk_key;
struct block_device **devs;
unsigned int num_devs;
@@ -167,9 +172,8 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
if (!blk_key)
return -ENOMEM;
- err = blk_crypto_init_key(blk_key, raw_key, ci->ci_mode->keysize,
- BLK_CRYPTO_KEY_TYPE_RAW, crypto_mode,
- fscrypt_get_dun_bytes(ci),
+ err = blk_crypto_init_key(blk_key, key_bytes, key_size, key_type,
+ crypto_mode, fscrypt_get_dun_bytes(ci),
1U << ci->ci_data_unit_bits);
if (err) {
fscrypt_err(inode, "error %d initializing blk-crypto key", err);
@@ -228,6 +232,34 @@ void fscrypt_destroy_inline_crypt_key(struct super_block *sb,
kfree_sensitive(blk_key);
}
+/*
+ * Ask the inline encryption hardware to derive the software secret from a
+ * hardware-wrapped key. Returns -EOPNOTSUPP if hardware-wrapped keys aren't
+ * supported on this filesystem or hardware.
+ */
+int fscrypt_derive_sw_secret(struct super_block *sb,
+ const u8 *wrapped_key, size_t wrapped_key_size,
+ u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE])
+{
+ int err;
+
+ /* The filesystem must be mounted with -o inlinecrypt. */
+ if (!(sb->s_flags & SB_INLINECRYPT)) {
+ fscrypt_warn(NULL,
+ "%s: filesystem not mounted with inlinecrypt\n",
+ sb->s_id);
+ return -EOPNOTSUPP;
+ }
+
+ err = blk_crypto_derive_sw_secret(sb->s_bdev, wrapped_key,
+ wrapped_key_size, sw_secret);
+ if (err == -EOPNOTSUPP)
+ fscrypt_warn(NULL,
+ "%s: block device doesn't support hardware-wrapped keys\n",
+ sb->s_id);
+ return err;
+}
+
bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode)
{
return inode->i_crypt_info->ci_inlinecrypt;
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index 787e9c8938ba..ace369f13068 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -149,11 +149,11 @@ static int fscrypt_user_key_instantiate(struct key *key,
struct key_preparsed_payload *prep)
{
/*
- * We just charge FSCRYPT_MAX_KEY_SIZE bytes to the user's key quota for
- * each key, regardless of the exact key size. The amount of memory
+ * We just charge FSCRYPT_MAX_RAW_KEY_SIZE bytes to the user's key quota
+ * for each key, regardless of the exact key size. The amount of memory
* actually used is greater than the size of the raw key anyway.
*/
- return key_payload_reserve(key, FSCRYPT_MAX_KEY_SIZE);
+ return key_payload_reserve(key, FSCRYPT_MAX_RAW_KEY_SIZE);
}
static void fscrypt_user_key_describe(const struct key *key, struct seq_file *m)
@@ -558,20 +558,45 @@ static int add_master_key(struct super_block *sb,
int err;
if (key_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) {
- err = fscrypt_init_hkdf(&secret->hkdf, secret->raw,
- secret->size);
- if (err)
- return err;
+ u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE];
+ u8 *kdf_key = secret->bytes;
+ unsigned int kdf_key_size = secret->size;
+ u8 keyid_kdf_ctx = HKDF_CONTEXT_KEY_IDENTIFIER_FOR_RAW_KEY;
/*
- * Now that the HKDF context is initialized, the raw key is no
- * longer needed.
+ * For raw keys, the fscrypt master key is used directly as the
+ * fscrypt KDF key. For hardware-wrapped keys, we have to pass
+ * the master key to the hardware to derive the KDF key, which
+ * is then only used to derive non-file-contents subkeys.
+ */
+ if (secret->is_hw_wrapped) {
+ err = fscrypt_derive_sw_secret(sb, secret->bytes,
+ secret->size, sw_secret);
+ if (err)
+ return err;
+ kdf_key = sw_secret;
+ kdf_key_size = sizeof(sw_secret);
+ /*
+ * To avoid weird behavior if someone manages to
+ * determine sw_secret and add it as a raw key, ensure
+ * that hardware-wrapped keys and raw keys will have
+ * different key identifiers by deriving their key
+ * identifiers using different KDF contexts.
+ */
+ keyid_kdf_ctx =
+ HKDF_CONTEXT_KEY_IDENTIFIER_FOR_HW_WRAPPED_KEY;
+ }
+ err = fscrypt_init_hkdf(&secret->hkdf, kdf_key, kdf_key_size);
+ /*
+ * Now that the KDF context is initialized, the raw KDF key is
+ * no longer needed.
*/
- memzero_explicit(secret->raw, secret->size);
+ memzero_explicit(kdf_key, kdf_key_size);
+ if (err)
+ return err;
/* Calculate the key identifier */
- err = fscrypt_hkdf_expand(&secret->hkdf,
- HKDF_CONTEXT_KEY_IDENTIFIER, NULL, 0,
+ err = fscrypt_hkdf_expand(&secret->hkdf, keyid_kdf_ctx, NULL, 0,
key_spec->u.identifier,
FSCRYPT_KEY_IDENTIFIER_SIZE);
if (err)
@@ -580,19 +605,36 @@ static int add_master_key(struct super_block *sb,
return do_add_master_key(sb, secret, key_spec);
}
+/*
+ * Validate the size of an fscrypt master key being added. Note that this is
+ * just an initial check, as we don't know which ciphers will be used yet.
+ * There is a stricter size check later when the key is actually used by a file.
+ */
+static inline bool fscrypt_valid_key_size(size_t size, u32 add_key_flags)
+{
+ u32 max_size = (add_key_flags & FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED) ?
+ FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE :
+ FSCRYPT_MAX_RAW_KEY_SIZE;
+
+ return size >= FSCRYPT_MIN_KEY_SIZE && size <= max_size;
+}
+
static int fscrypt_provisioning_key_preparse(struct key_preparsed_payload *prep)
{
const struct fscrypt_provisioning_key_payload *payload = prep->data;
- if (prep->datalen < sizeof(*payload) + FSCRYPT_MIN_KEY_SIZE ||
- prep->datalen > sizeof(*payload) + FSCRYPT_MAX_KEY_SIZE)
+ if (prep->datalen < sizeof(*payload))
+ return -EINVAL;
+
+ if (!fscrypt_valid_key_size(prep->datalen - sizeof(*payload),
+ payload->flags))
return -EINVAL;
if (payload->type != FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR &&
payload->type != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER)
return -EINVAL;
- if (payload->__reserved)
+ if (payload->flags & ~FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED)
return -EINVAL;
prep->payload.data[0] = kmemdup(payload, prep->datalen, GFP_KERNEL);
@@ -636,21 +678,21 @@ static struct key_type key_type_fscrypt_provisioning = {
};
/*
- * Retrieve the raw key from the Linux keyring key specified by 'key_id', and
- * store it into 'secret'.
+ * Retrieve the key from the Linux keyring key specified by 'key_id', and store
+ * it into 'secret'.
*
- * The key must be of type "fscrypt-provisioning" and must have the field
- * fscrypt_provisioning_key_payload::type set to 'type', indicating that it's
- * only usable with fscrypt with the particular KDF version identified by
- * 'type'. We don't use the "logon" key type because there's no way to
- * completely restrict the use of such keys; they can be used by any kernel API
- * that accepts "logon" keys and doesn't require a specific service prefix.
+ * The key must be of type "fscrypt-provisioning" and must have the 'type' and
+ * 'flags' field of the payload set to the given values, indicating that the key
+ * is intended for use for the specified purpose. We don't use the "logon" key
+ * type because there's no way to completely restrict the use of such keys; they
+ * can be used by any kernel API that accepts "logon" keys and doesn't require a
+ * specific service prefix.
*
* The ability to specify the key via Linux keyring key is intended for cases
* where userspace needs to re-add keys after the filesystem is unmounted and
- * re-mounted. Most users should just provide the raw key directly instead.
+ * re-mounted. Most users should just provide the key directly instead.
*/
-static int get_keyring_key(u32 key_id, u32 type,
+static int get_keyring_key(u32 key_id, u32 type, u32 flags,
struct fscrypt_master_key_secret *secret)
{
key_ref_t ref;
@@ -667,12 +709,16 @@ static int get_keyring_key(u32 key_id, u32 type,
goto bad_key;
payload = key->payload.data[0];
- /* Don't allow fscrypt v1 keys to be used as v2 keys and vice versa. */
- if (payload->type != type)
+ /*
+ * Don't allow fscrypt v1 keys to be used as v2 keys and vice versa.
+ * Similarly, don't allow hardware-wrapped keys to be used as
+ * non-hardware-wrapped keys and vice versa.
+ */
+ if (payload->type != type || payload->flags != flags)
goto bad_key;
secret->size = key->datalen - sizeof(*payload);
- memcpy(secret->raw, payload->raw, secret->size);
+ memcpy(secret->bytes, payload->raw, secret->size);
err = 0;
goto out_put;
@@ -734,19 +780,28 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg)
return -EACCES;
memset(&secret, 0, sizeof(secret));
+
+ if (arg.flags) {
+ if (arg.flags & ~FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED)
+ return -EINVAL;
+ if (arg.key_spec.type != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER)
+ return -EINVAL;
+ secret.is_hw_wrapped = true;
+ }
+
if (arg.key_id) {
if (arg.raw_size != 0)
return -EINVAL;
- err = get_keyring_key(arg.key_id, arg.key_spec.type, &secret);
+ err = get_keyring_key(arg.key_id, arg.key_spec.type, arg.flags,
+ &secret);
if (err)
goto out_wipe_secret;
} else {
- if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE ||
- arg.raw_size > FSCRYPT_MAX_KEY_SIZE)
+ if (!fscrypt_valid_key_size(arg.raw_size, arg.flags))
return -EINVAL;
secret.size = arg.raw_size;
err = -EFAULT;
- if (copy_from_user(secret.raw, uarg->raw, secret.size))
+ if (copy_from_user(secret.bytes, uarg->raw, secret.size))
goto out_wipe_secret;
}
@@ -770,13 +825,13 @@ EXPORT_SYMBOL_GPL(fscrypt_ioctl_add_key);
static void
fscrypt_get_test_dummy_secret(struct fscrypt_master_key_secret *secret)
{
- static u8 test_key[FSCRYPT_MAX_KEY_SIZE];
+ static u8 test_key[FSCRYPT_MAX_RAW_KEY_SIZE];
- get_random_once(test_key, FSCRYPT_MAX_KEY_SIZE);
+ get_random_once(test_key, sizeof(test_key));
memset(secret, 0, sizeof(*secret));
- secret->size = FSCRYPT_MAX_KEY_SIZE;
- memcpy(secret->raw, test_key, FSCRYPT_MAX_KEY_SIZE);
+ secret->size = sizeof(test_key);
+ memcpy(secret->bytes, test_key, sizeof(test_key));
}
int fscrypt_get_test_dummy_key_identifier(
@@ -787,10 +842,11 @@ int fscrypt_get_test_dummy_key_identifier(
fscrypt_get_test_dummy_secret(&secret);
- err = fscrypt_init_hkdf(&secret.hkdf, secret.raw, secret.size);
+ err = fscrypt_init_hkdf(&secret.hkdf, secret.bytes, secret.size);
if (err)
goto out;
- err = fscrypt_hkdf_expand(&secret.hkdf, HKDF_CONTEXT_KEY_IDENTIFIER,
+ err = fscrypt_hkdf_expand(&secret.hkdf,
+ HKDF_CONTEXT_KEY_IDENTIFIER_FOR_RAW_KEY,
NULL, 0, key_identifier,
FSCRYPT_KEY_IDENTIFIER_SIZE);
out:
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index b4fe01ea4bd4..0d71843af946 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -153,7 +153,9 @@ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key,
struct crypto_skcipher *tfm;
if (fscrypt_using_inline_encryption(ci))
- return fscrypt_prepare_inline_crypt_key(prep_key, raw_key, ci);
+ return fscrypt_prepare_inline_crypt_key(prep_key, raw_key,
+ ci->ci_mode->keysize,
+ false, ci);
tfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, ci->ci_inode);
if (IS_ERR(tfm))
@@ -195,14 +197,29 @@ static int setup_per_mode_enc_key(struct fscrypt_inode_info *ci,
struct fscrypt_mode *mode = ci->ci_mode;
const u8 mode_num = mode - fscrypt_modes;
struct fscrypt_prepared_key *prep_key;
- u8 mode_key[FSCRYPT_MAX_KEY_SIZE];
+ u8 mode_key[FSCRYPT_MAX_RAW_KEY_SIZE];
u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)];
unsigned int hkdf_infolen = 0;
+ bool use_hw_wrapped_key = false;
int err;
if (WARN_ON_ONCE(mode_num > FSCRYPT_MODE_MAX))
return -EINVAL;
+ if (mk->mk_secret.is_hw_wrapped && S_ISREG(inode->i_mode)) {
+ /* Using a hardware-wrapped key for file contents encryption */
+ if (!fscrypt_using_inline_encryption(ci)) {
+ if (sb->s_flags & SB_INLINECRYPT)
+ fscrypt_warn(ci->ci_inode,
+ "Hardware-wrapped key required, but no suitable inline encryption capabilities are available");
+ else
+ fscrypt_warn(ci->ci_inode,
+ "Hardware-wrapped keys require inline encryption (-o inlinecrypt)");
+ return -EINVAL;
+ }
+ use_hw_wrapped_key = true;
+ }
+
prep_key = &keys[mode_num];
if (fscrypt_is_key_prepared(prep_key, ci)) {
ci->ci_enc_key = *prep_key;
@@ -214,6 +231,16 @@ static int setup_per_mode_enc_key(struct fscrypt_inode_info *ci,
if (fscrypt_is_key_prepared(prep_key, ci))
goto done_unlock;
+ if (use_hw_wrapped_key) {
+ err = fscrypt_prepare_inline_crypt_key(prep_key,
+ mk->mk_secret.bytes,
+ mk->mk_secret.size, true,
+ ci);
+ if (err)
+ goto out_unlock;
+ goto done_unlock;
+ }
+
BUILD_BUG_ON(sizeof(mode_num) != 1);
BUILD_BUG_ON(sizeof(sb->s_uuid) != 16);
BUILD_BUG_ON(sizeof(hkdf_info) != 17);
@@ -336,6 +363,14 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_inode_info *ci,
{
int err;
+ if (mk->mk_secret.is_hw_wrapped &&
+ !(ci->ci_policy.v2.flags & (FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 |
+ FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32))) {
+ fscrypt_warn(ci->ci_inode,
+ "Hardware-wrapped keys are only supported with IV_INO_LBLK policies");
+ return -EINVAL;
+ }
+
if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) {
/*
* DIRECT_KEY: instead of deriving per-file encryption keys, the
@@ -362,7 +397,7 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_inode_info *ci,
FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) {
err = fscrypt_setup_iv_ino_lblk_32_key(ci, mk);
} else {
- u8 derived_key[FSCRYPT_MAX_KEY_SIZE];
+ u8 derived_key[FSCRYPT_MAX_RAW_KEY_SIZE];
err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf,
HKDF_CONTEXT_PER_FILE_ENC_KEY,
@@ -445,10 +480,6 @@ static int setup_file_encryption_key(struct fscrypt_inode_info *ci,
struct fscrypt_master_key *mk;
int err;
- err = fscrypt_select_encryption_impl(ci);
- if (err)
- return err;
-
err = fscrypt_policy_to_key_spec(&ci->ci_policy, &mk_spec);
if (err)
return err;
@@ -476,6 +507,10 @@ static int setup_file_encryption_key(struct fscrypt_inode_info *ci,
if (ci->ci_policy.version != FSCRYPT_POLICY_V1)
return -ENOKEY;
+ err = fscrypt_select_encryption_impl(ci, false);
+ if (err)
+ return err;
+
/*
* As a legacy fallback for v1 policies, search for the key in
* the current task's subscribed keyrings too. Don't move this
@@ -497,9 +532,21 @@ static int setup_file_encryption_key(struct fscrypt_inode_info *ci,
goto out_release_key;
}
+ err = fscrypt_select_encryption_impl(ci, mk->mk_secret.is_hw_wrapped);
+ if (err)
+ goto out_release_key;
+
switch (ci->ci_policy.version) {
case FSCRYPT_POLICY_V1:
- err = fscrypt_setup_v1_file_key(ci, mk->mk_secret.raw);
+ if (WARN_ON_ONCE(mk->mk_secret.is_hw_wrapped)) {
+ /*
+ * This should never happen, as adding a v1 policy key
+ * that is hardware-wrapped isn't allowed.
+ */
+ err = -EINVAL;
+ goto out_release_key;
+ }
+ err = fscrypt_setup_v1_file_key(ci, mk->mk_secret.bytes);
break;
case FSCRYPT_POLICY_V2:
err = fscrypt_setup_v2_file_key(ci, mk, need_dirhash_key);
diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c
index cf3b58ec32cc..b70521c55132 100644
--- a/fs/crypto/keysetup_v1.c
+++ b/fs/crypto/keysetup_v1.c
@@ -118,7 +118,7 @@ find_and_lock_process_key(const char *prefix,
payload = (const struct fscrypt_key *)ukp->data;
if (ukp->datalen != sizeof(struct fscrypt_key) ||
- payload->size < 1 || payload->size > FSCRYPT_MAX_KEY_SIZE) {
+ payload->size < 1 || payload->size > sizeof(payload->raw)) {
fscrypt_warn(NULL,
"key with description '%s' has invalid payload",
key->description);
@@ -149,7 +149,7 @@ struct fscrypt_direct_key {
const struct fscrypt_mode *dk_mode;
struct fscrypt_prepared_key dk_key;
u8 dk_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
- u8 dk_raw[FSCRYPT_MAX_KEY_SIZE];
+ u8 dk_raw[FSCRYPT_MAX_RAW_KEY_SIZE];
};
static void free_direct_key(struct fscrypt_direct_key *dk)
diff --git a/fs/dcache.c b/fs/dcache.c
index bd5aa136153a..03d58b2d4fa3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -74,10 +74,11 @@
* arbitrary, since it's serialized on rename_lock
*/
static int sysctl_vfs_cache_pressure __read_mostly = 100;
+static int sysctl_vfs_cache_pressure_denom __read_mostly = 100;
unsigned long vfs_pressure_ratio(unsigned long val)
{
- return mult_frac(val, sysctl_vfs_cache_pressure, 100);
+ return mult_frac(val, sysctl_vfs_cache_pressure, sysctl_vfs_cache_pressure_denom);
}
EXPORT_SYMBOL_GPL(vfs_pressure_ratio);
@@ -225,6 +226,14 @@ static const struct ctl_table vm_dcache_sysctls[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
+ {
+ .procname = "vfs_cache_pressure_denom",
+ .data = &sysctl_vfs_cache_pressure_denom,
+ .maxlen = sizeof(sysctl_vfs_cache_pressure_denom),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE_HUNDRED,
+ },
};
static int __init init_fs_dcache_sysctls(void)
@@ -2412,7 +2421,6 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
}
return d_lookup(dir, name);
}
-EXPORT_SYMBOL(d_hash_and_lookup);
/*
* When a file is deleted, we have two options:
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 75715d8877ee..30c4944e1862 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -346,7 +346,7 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
if (!parent)
parent = debugfs_mount->mnt_root;
- dentry = lookup_positive_unlocked(name, parent, strlen(name));
+ dentry = lookup_noperm_positive_unlocked(&QSTR(name), parent);
if (IS_ERR(dentry))
return NULL;
return dentry;
@@ -388,7 +388,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
if (unlikely(IS_DEADDIR(d_inode(parent))))
dentry = ERR_PTR(-ENOENT);
else
- dentry = lookup_one_len(name, parent, strlen(name));
+ dentry = lookup_noperm(&QSTR(name), parent);
if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
if (d_is_dir(dentry))
pr_err("Directory '%s' with parent '%s' already present!\n",
@@ -872,7 +872,7 @@ int __printf(2, 3) debugfs_change_name(struct dentry *dentry, const char *fmt, .
}
if (strcmp(old_name.name.name, new_name) == 0)
goto out;
- target = lookup_one_len(new_name, parent, strlen(new_name));
+ target = lookup_noperm(&QSTR(new_name), parent);
if (IS_ERR(target)) {
error = PTR_ERR(target);
goto out;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 51a5c54eb740..493d7f194956 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -394,8 +394,8 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
char *encrypted_and_encoded_name = NULL;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct dentry *lower_dir_dentry, *lower_dentry;
- const char *name = ecryptfs_dentry->d_name.name;
- size_t len = ecryptfs_dentry->d_name.len;
+ struct qstr qname = QSTR_INIT(ecryptfs_dentry->d_name.name,
+ ecryptfs_dentry->d_name.len);
struct dentry *res;
int rc = 0;
@@ -404,23 +404,25 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
mount_crypt_stat = &ecryptfs_superblock_to_private(
ecryptfs_dentry->d_sb)->mount_crypt_stat;
if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
+ size_t len = qname.len;
rc = ecryptfs_encrypt_and_encode_filename(
&encrypted_and_encoded_name, &len,
- mount_crypt_stat, name, len);
+ mount_crypt_stat, qname.name, len);
if (rc) {
printk(KERN_ERR "%s: Error attempting to encrypt and encode "
"filename; rc = [%d]\n", __func__, rc);
return ERR_PTR(rc);
}
- name = encrypted_and_encoded_name;
+ qname.name = encrypted_and_encoded_name;
+ qname.len = len;
}
- lower_dentry = lookup_one_len_unlocked(name, lower_dir_dentry, len);
+ lower_dentry = lookup_noperm_unlocked(&qname, lower_dir_dentry);
if (IS_ERR(lower_dentry)) {
- ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
+ ecryptfs_printk(KERN_DEBUG, "%s: lookup_noperm() returned "
"[%ld] on lower_dentry = [%s]\n", __func__,
PTR_ERR(lower_dentry),
- name);
+ qname.name);
res = ERR_CAST(lower_dentry);
} else {
res = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry);
diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h
index ac6a1dd0a6a5..f913b6824289 100644
--- a/fs/efivarfs/internal.h
+++ b/fs/efivarfs/internal.h
@@ -17,7 +17,6 @@ struct efivarfs_fs_info {
struct efivarfs_mount_opts mount_opts;
struct super_block *sb;
struct notifier_block nb;
- struct notifier_block pm_nb;
};
struct efi_variable {
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 0486e9b68bc6..c900d98bf494 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -18,8 +18,10 @@
#include <linux/statfs.h>
#include <linux/notifier.h>
#include <linux/printk.h>
+#include <linux/namei.h>
#include "internal.h"
+#include "../internal.h"
static int efivarfs_ops_notifier(struct notifier_block *nb, unsigned long event,
void *data)
@@ -119,12 +121,18 @@ static int efivarfs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
+
+static int efivarfs_freeze_fs(struct super_block *sb);
+static int efivarfs_unfreeze_fs(struct super_block *sb);
+
static const struct super_operations efivarfs_ops = {
.statfs = efivarfs_statfs,
.drop_inode = generic_delete_inode,
.alloc_inode = efivarfs_alloc_inode,
.free_inode = efivarfs_free_inode,
.show_options = efivarfs_show_options,
+ .freeze_fs = efivarfs_freeze_fs,
+ .unfreeze_fs = efivarfs_unfreeze_fs,
};
/*
@@ -204,7 +212,6 @@ bool efivarfs_variable_is_present(efi_char16_t *variable_name,
char *name = efivar_get_utf8name(variable_name, vendor);
struct super_block *sb = data;
struct dentry *dentry;
- struct qstr qstr;
if (!name)
/*
@@ -217,9 +224,7 @@ bool efivarfs_variable_is_present(efi_char16_t *variable_name,
*/
return true;
- qstr.name = name;
- qstr.len = strlen(name);
- dentry = d_hash_and_lookup(sb->s_root, &qstr);
+ dentry = try_lookup_noperm(&QSTR(name), sb->s_root);
kfree(name);
if (!IS_ERR_OR_NULL(dentry))
dput(dentry);
@@ -367,8 +372,6 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
- register_pm_notifier(&sfi->pm_nb);
-
return efivar_init(efivarfs_callback, sb, true);
}
@@ -393,55 +396,12 @@ static const struct fs_context_operations efivarfs_context_ops = {
.reconfigure = efivarfs_reconfigure,
};
-struct efivarfs_ctx {
- struct dir_context ctx;
- struct super_block *sb;
- struct dentry *dentry;
-};
-
-static bool efivarfs_actor(struct dir_context *ctx, const char *name, int len,
- loff_t offset, u64 ino, unsigned mode)
-{
- unsigned long size;
- struct efivarfs_ctx *ectx = container_of(ctx, struct efivarfs_ctx, ctx);
- struct qstr qstr = { .name = name, .len = len };
- struct dentry *dentry = d_hash_and_lookup(ectx->sb->s_root, &qstr);
- struct inode *inode;
- struct efivar_entry *entry;
- int err;
-
- if (IS_ERR_OR_NULL(dentry))
- return true;
-
- inode = d_inode(dentry);
- entry = efivar_entry(inode);
-
- err = efivar_entry_size(entry, &size);
- size += sizeof(__u32); /* attributes */
- if (err)
- size = 0;
-
- inode_lock_nested(inode, I_MUTEX_CHILD);
- i_size_write(inode, size);
- inode_unlock(inode);
-
- if (!size) {
- ectx->dentry = dentry;
- return false;
- }
-
- dput(dentry);
-
- return true;
-}
-
static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor,
unsigned long name_size, void *data)
{
char *name;
struct super_block *sb = data;
struct dentry *dentry;
- struct qstr qstr;
int err;
if (guid_equal(&vendor, &LINUX_EFI_RANDOM_SEED_TABLE_GUID))
@@ -451,9 +411,7 @@ static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor,
if (!name)
return -ENOMEM;
- qstr.name = name;
- qstr.len = strlen(name);
- dentry = d_hash_and_lookup(sb->s_root, &qstr);
+ dentry = try_lookup_noperm(&QSTR(name), sb->s_root);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
goto out;
@@ -474,111 +432,59 @@ static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor,
return err;
}
-static void efivarfs_deactivate_super_work(struct work_struct *work)
-{
- struct super_block *s = container_of(work, struct super_block,
- destroy_work);
- /*
- * note: here s->destroy_work is free for reuse (which
- * will happen in deactivate_super)
- */
- deactivate_super(s);
-}
-
static struct file_system_type efivarfs_type;
-static int efivarfs_pm_notify(struct notifier_block *nb, unsigned long action,
- void *ptr)
+static int efivarfs_freeze_fs(struct super_block *sb)
{
- struct efivarfs_fs_info *sfi = container_of(nb, struct efivarfs_fs_info,
- pm_nb);
- struct path path;
- struct efivarfs_ctx ectx = {
- .ctx = {
- .actor = efivarfs_actor,
- },
- .sb = sfi->sb,
- };
- struct file *file;
- struct super_block *s = sfi->sb;
- static bool rescan_done = true;
-
- if (action == PM_HIBERNATION_PREPARE) {
- rescan_done = false;
- return NOTIFY_OK;
- } else if (action != PM_POST_HIBERNATION) {
- return NOTIFY_DONE;
- }
-
- if (rescan_done)
- return NOTIFY_DONE;
-
- /* ensure single superblock is alive and pin it */
- if (!atomic_inc_not_zero(&s->s_active))
- return NOTIFY_DONE;
-
- pr_info("efivarfs: resyncing variable state\n");
-
- path.dentry = sfi->sb->s_root;
-
- /*
- * do not add SB_KERNMOUNT which a single superblock could
- * expose to userspace and which also causes MNT_INTERNAL, see
- * below
- */
- path.mnt = vfs_kern_mount(&efivarfs_type, 0,
- efivarfs_type.name, NULL);
- if (IS_ERR(path.mnt)) {
- pr_err("efivarfs: internal mount failed\n");
- /*
- * We may be the last pinner of the superblock but
- * calling efivarfs_kill_sb from within the notifier
- * here would deadlock trying to unregister it
- */
- INIT_WORK(&s->destroy_work, efivarfs_deactivate_super_work);
- schedule_work(&s->destroy_work);
- return PTR_ERR(path.mnt);
- }
-
- /* path.mnt now has pin on superblock, so this must be above one */
- atomic_dec(&s->s_active);
-
- file = kernel_file_open(&path, O_RDONLY | O_DIRECTORY | O_NOATIME,
- current_cred());
- /*
- * safe even if last put because no MNT_INTERNAL means this
- * will do delayed deactivate_super and not deadlock
- */
- mntput(path.mnt);
- if (IS_ERR(file))
- return NOTIFY_DONE;
+ /* Nothing for us to do. */
+ return 0;
+}
- rescan_done = true;
+static int efivarfs_unfreeze_fs(struct super_block *sb)
+{
+ struct dentry *child = NULL;
/*
- * First loop over the directory and verify each entry exists,
- * removing it if it doesn't
+ * Unconditionally resync the variable state on a thaw request.
+ * Given the size of efivarfs it really doesn't matter to simply
+ * iterate through all of the entries and resync. Freeze/thaw
+ * requests are rare enough for that to not matter and the
+ * number of entries is pretty low too. So we really don't care.
*/
- file->f_pos = 2; /* skip . and .. */
- do {
- ectx.dentry = NULL;
- iterate_dir(file, &ectx.ctx);
- if (ectx.dentry) {
- pr_info("efivarfs: removing variable %pd\n",
- ectx.dentry);
- simple_recursive_removal(ectx.dentry, NULL);
- dput(ectx.dentry);
+ pr_info("efivarfs: resyncing variable state\n");
+ for (;;) {
+ int err;
+ unsigned long size = 0;
+ struct inode *inode;
+ struct efivar_entry *entry;
+
+ child = find_next_child(sb->s_root, child);
+ if (!child)
+ break;
+
+ inode = d_inode(child);
+ entry = efivar_entry(inode);
+
+ err = efivar_entry_size(entry, &size);
+ if (err)
+ size = 0;
+ else
+ size += sizeof(__u32);
+
+ inode_lock(inode);
+ i_size_write(inode, size);
+ inode_unlock(inode);
+
+ /* The variable doesn't exist anymore, delete it. */
+ if (!size) {
+ pr_info("efivarfs: removing variable %pd\n", child);
+ simple_recursive_removal(child, NULL);
}
- } while (ectx.dentry);
- fput(file);
-
- /*
- * then loop over variables, creating them if there's no matching
- * dentry
- */
- efivar_init(efivarfs_check_missing, sfi->sb, false);
+ }
- return NOTIFY_OK;
+ efivar_init(efivarfs_check_missing, sb, false);
+ pr_info("efivarfs: finished resyncing variable state\n");
+ return 0;
}
static int efivarfs_init_fs_context(struct fs_context *fc)
@@ -598,9 +504,6 @@ static int efivarfs_init_fs_context(struct fs_context *fc)
fc->s_fs_info = sfi;
fc->ops = &efivarfs_context_ops;
- sfi->pm_nb.notifier_call = efivarfs_pm_notify;
- sfi->pm_nb.priority = 0;
-
return 0;
}
@@ -610,7 +513,6 @@ static void efivarfs_kill_sb(struct super_block *sb)
blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi->nb);
kill_litter_super(sb);
- unregister_pm_notifier(&sfi->pm_nb);
kfree(sfi);
}
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 8f68ec49ad89..6beeb7063871 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -144,6 +144,20 @@ config EROFS_FS_ZIP_ZSTD
If unsure, say N.
+config EROFS_FS_ZIP_ACCEL
+ bool "EROFS hardware decompression support"
+ depends on EROFS_FS_ZIP
+ help
+ Saying Y here includes hardware accelerator support for reading
+ EROFS file systems containing compressed data. It gives better
+ decompression speed than the software-implemented decompression, and
+ it costs lower CPU overhead.
+
+ Hardware accelerator support is an experimental feature for now and
+ file systems are still readable without selecting this option.
+
+ If unsure, say N.
+
config EROFS_FS_ONDEMAND
bool "EROFS fscache-based on-demand read support (deprecated)"
depends on EROFS_FS
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
index 4331d53c7109..549abc424763 100644
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -7,5 +7,6 @@ erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
+erofs-$(CONFIG_EROFS_FS_ZIP_ACCEL) += decompressor_crypto.o
erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 2704d7a592a5..510e922c5193 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -76,4 +76,14 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize);
int __init z_erofs_init_decompressor(void);
void z_erofs_exit_decompressor(void);
+int z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
+ struct page **pgpl);
+int z_erofs_crypto_enable_engine(const char *name, int len);
+#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
+void z_erofs_crypto_disable_all_engines(void);
+int z_erofs_crypto_show_engines(char *buf, int size, char sep);
+#else
+static inline void z_erofs_crypto_disable_all_engines(void) {}
+static inline int z_erofs_crypto_show_engines(char *buf, int size, char sep) { return 0; }
+#endif
#endif
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 2409d2ab0c28..6a329c329f43 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -27,7 +27,7 @@ void erofs_put_metabuf(struct erofs_buf *buf)
void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap)
{
- pgoff_t index = offset >> PAGE_SHIFT;
+ pgoff_t index = (buf->off + offset) >> PAGE_SHIFT;
struct folio *folio = NULL;
if (buf->page) {
@@ -54,6 +54,7 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
struct erofs_sb_info *sbi = EROFS_SB(sb);
buf->file = NULL;
+ buf->off = sbi->dif0.fsoff;
if (erofs_is_fileio_mode(sbi)) {
buf->file = sbi->dif0.file; /* some fs like FUSE needs it */
buf->mapping = buf->file->f_mapping;
@@ -299,7 +300,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->private = buf.base;
} else {
iomap->type = IOMAP_MAPPED;
- iomap->addr = mdev.m_pa;
+ iomap->addr = mdev.m_dif->fsoff + mdev.m_pa;
if (flags & IOMAP_DAX)
iomap->addr += mdev.m_dif->dax_part_off;
}
diff --git a/fs/erofs/decompressor_crypto.c b/fs/erofs/decompressor_crypto.c
new file mode 100644
index 000000000000..97b77ab64432
--- /dev/null
+++ b/fs/erofs/decompressor_crypto.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/scatterlist.h>
+#include <crypto/acompress.h>
+#include "compress.h"
+
+static int __z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
+ struct crypto_acomp *tfm)
+{
+ struct sg_table st_src, st_dst;
+ struct acomp_req *req;
+ struct crypto_wait wait;
+ u8 *headpage;
+ int ret;
+
+ headpage = kmap_local_page(*rq->in);
+ ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
+ min_t(unsigned int, rq->inputsize,
+ rq->sb->s_blocksize - rq->pageofs_in));
+ kunmap_local(headpage);
+ if (ret)
+ return ret;
+
+ req = acomp_request_alloc(tfm);
+ if (!req)
+ return -ENOMEM;
+
+ ret = sg_alloc_table_from_pages_segment(&st_src, rq->in, rq->inpages,
+ rq->pageofs_in, rq->inputsize, UINT_MAX, GFP_KERNEL);
+ if (ret < 0)
+ goto failed_src_alloc;
+
+ ret = sg_alloc_table_from_pages_segment(&st_dst, rq->out, rq->outpages,
+ rq->pageofs_out, rq->outputsize, UINT_MAX, GFP_KERNEL);
+ if (ret < 0)
+ goto failed_dst_alloc;
+
+ acomp_request_set_params(req, st_src.sgl,
+ st_dst.sgl, rq->inputsize, rq->outputsize);
+
+ crypto_init_wait(&wait);
+ acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &wait);
+
+ ret = crypto_wait_req(crypto_acomp_decompress(req), &wait);
+ if (ret) {
+ erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
+ ret, rq->inputsize, rq->pageofs_in, rq->outputsize);
+ ret = -EIO;
+ }
+
+ sg_free_table(&st_dst);
+failed_dst_alloc:
+ sg_free_table(&st_src);
+failed_src_alloc:
+ acomp_request_free(req);
+ return ret;
+}
+
+struct z_erofs_crypto_engine {
+ char *crypto_name;
+ struct crypto_acomp *tfm;
+};
+
+struct z_erofs_crypto_engine *z_erofs_crypto[Z_EROFS_COMPRESSION_MAX] = {
+ [Z_EROFS_COMPRESSION_LZ4] = (struct z_erofs_crypto_engine[]) {
+ {},
+ },
+ [Z_EROFS_COMPRESSION_LZMA] = (struct z_erofs_crypto_engine[]) {
+ {},
+ },
+ [Z_EROFS_COMPRESSION_DEFLATE] = (struct z_erofs_crypto_engine[]) {
+ { .crypto_name = "qat_deflate", },
+ {},
+ },
+ [Z_EROFS_COMPRESSION_ZSTD] = (struct z_erofs_crypto_engine[]) {
+ {},
+ },
+};
+static DECLARE_RWSEM(z_erofs_crypto_rwsem);
+
+static struct crypto_acomp *z_erofs_crypto_get_engine(int alg)
+{
+ struct z_erofs_crypto_engine *e;
+
+ for (e = z_erofs_crypto[alg]; e->crypto_name; ++e)
+ if (e->tfm)
+ return e->tfm;
+ return NULL;
+}
+
+int z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
+ struct page **pgpl)
+{
+ struct crypto_acomp *tfm;
+ int i, err;
+
+ down_read(&z_erofs_crypto_rwsem);
+ tfm = z_erofs_crypto_get_engine(rq->alg);
+ if (!tfm) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ for (i = 0; i < rq->outpages; i++) {
+ struct page *const page = rq->out[i];
+ struct page *victim;
+
+ if (!page) {
+ victim = __erofs_allocpage(pgpl, rq->gfp, true);
+ if (!victim) {
+ err = -ENOMEM;
+ goto out;
+ }
+ set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
+ rq->out[i] = victim;
+ }
+ }
+ err = __z_erofs_crypto_decompress(rq, tfm);
+out:
+ up_read(&z_erofs_crypto_rwsem);
+ return err;
+}
+
+int z_erofs_crypto_enable_engine(const char *name, int len)
+{
+ struct z_erofs_crypto_engine *e;
+ struct crypto_acomp *tfm;
+ int alg;
+
+ down_write(&z_erofs_crypto_rwsem);
+ for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
+ for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
+ if (!strncmp(name, e->crypto_name, len)) {
+ if (e->tfm)
+ break;
+ tfm = crypto_alloc_acomp(e->crypto_name, 0, 0);
+ if (IS_ERR(tfm)) {
+ up_write(&z_erofs_crypto_rwsem);
+ return -EOPNOTSUPP;
+ }
+ e->tfm = tfm;
+ break;
+ }
+ }
+ }
+ up_write(&z_erofs_crypto_rwsem);
+ return 0;
+}
+
+void z_erofs_crypto_disable_all_engines(void)
+{
+ struct z_erofs_crypto_engine *e;
+ int alg;
+
+ down_write(&z_erofs_crypto_rwsem);
+ for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
+ for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
+ if (!e->tfm)
+ continue;
+ crypto_free_acomp(e->tfm);
+ e->tfm = NULL;
+ }
+ }
+ up_write(&z_erofs_crypto_rwsem);
+}
+
+int z_erofs_crypto_show_engines(char *buf, int size, char sep)
+{
+ struct z_erofs_crypto_engine *e;
+ int alg, len = 0;
+
+ for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
+ for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
+ if (!e->tfm)
+ continue;
+ len += scnprintf(buf + len, size - len, "%s%c",
+ e->crypto_name, sep);
+ }
+ }
+ return len;
+}
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
index c6908a487054..6909b2d529c7 100644
--- a/fs/erofs/decompressor_deflate.c
+++ b/fs/erofs/decompressor_deflate.c
@@ -97,8 +97,8 @@ failed:
return -ENOMEM;
}
-static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
- struct page **pgpl)
+static int __z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
+ struct page **pgpl)
{
struct super_block *sb = rq->sb;
struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 };
@@ -178,6 +178,22 @@ failed_zinit:
return err;
}
+static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
+ struct page **pgpl)
+{
+#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
+ int err;
+
+ if (!rq->partial_decoding) {
+ err = z_erofs_crypto_decompress(rq, pgpl);
+ if (err != -EOPNOTSUPP)
+ return err;
+
+ }
+#endif
+ return __z_erofs_deflate_decompress(rq, pgpl);
+}
+
const struct z_erofs_decompressor z_erofs_deflate_decomp = {
.config = z_erofs_load_deflate_config,
.decompress = z_erofs_deflate_decompress,
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index 60c7cc4c105c..7d81f504bff0 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -147,7 +147,8 @@ io_retry:
if (err)
break;
io->rq = erofs_fileio_rq_alloc(&io->dev);
- io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> 9;
+ io->rq->bio.bi_iter.bi_sector =
+ (io->dev.m_dif->fsoff + io->dev.m_pa) >> 9;
attached = 0;
}
if (!bio_add_folio(&io->rq->bio, folio, len, cur))
@@ -180,7 +181,7 @@ static void erofs_fileio_readahead(struct readahead_control *rac)
struct folio *folio;
int err;
- trace_erofs_readpages(inode, readahead_index(rac),
+ trace_erofs_readahead(inode, readahead_index(rac),
readahead_count(rac), true);
while ((folio = readahead_folio(rac))) {
err = erofs_fileio_scan_folio(&io, folio);
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 4ac188d5d894..a32c03a80c70 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -44,7 +44,7 @@ struct erofs_device_info {
struct erofs_fscache *fscache;
struct file *file;
struct dax_device *dax_dev;
- u64 dax_part_off;
+ u64 fsoff, dax_part_off;
erofs_blk_t blocks;
erofs_blk_t uniaddr;
@@ -199,6 +199,7 @@ enum {
struct erofs_buf {
struct address_space *mapping;
struct file *file;
+ u64 off;
struct page *page;
void *base;
};
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index da6ee7c39290..e1e9f06e8342 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -165,8 +165,11 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
filp_open(dif->path, O_RDONLY | O_LARGEFILE, 0) :
bdev_file_open_by_path(dif->path,
BLK_OPEN_READ, sb->s_type, NULL);
- if (IS_ERR(file))
+ if (IS_ERR(file)) {
+ if (file == ERR_PTR(-ENOTBLK))
+ return -EINVAL;
return PTR_ERR(file);
+ }
if (!erofs_is_fileio_mode(sbi)) {
dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file),
@@ -356,7 +359,7 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
enum {
Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
- Opt_device, Opt_fsid, Opt_domain_id, Opt_directio,
+ Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, Opt_fsoffset,
};
static const struct constant_table erofs_param_cache_strategy[] = {
@@ -383,6 +386,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
fsparam_string("fsid", Opt_fsid),
fsparam_string("domain_id", Opt_domain_id),
fsparam_flag_no("directio", Opt_directio),
+ fsparam_u64("fsoffset", Opt_fsoffset),
{}
};
@@ -506,28 +510,59 @@ static int erofs_fc_parse_param(struct fs_context *fc,
errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
#endif
break;
+ case Opt_fsoffset:
+ sbi->dif0.fsoff = result.uint_64;
+ break;
}
return 0;
}
-static struct inode *erofs_nfs_get_inode(struct super_block *sb,
- u64 ino, u32 generation)
+static int erofs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
+ struct inode *parent)
{
- return erofs_iget(sb, ino);
+ erofs_nid_t nid = EROFS_I(inode)->nid;
+ int len = parent ? 6 : 3;
+
+ if (*max_len < len) {
+ *max_len = len;
+ return FILEID_INVALID;
+ }
+
+ fh[0] = (u32)(nid >> 32);
+ fh[1] = (u32)(nid & 0xffffffff);
+ fh[2] = inode->i_generation;
+
+ if (parent) {
+ nid = EROFS_I(parent)->nid;
+
+ fh[3] = (u32)(nid >> 32);
+ fh[4] = (u32)(nid & 0xffffffff);
+ fh[5] = parent->i_generation;
+ }
+
+ *max_len = len;
+ return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN;
}
static struct dentry *erofs_fh_to_dentry(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type)
{
- return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
- erofs_nfs_get_inode);
+ if ((fh_type != FILEID_INO64_GEN &&
+ fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3)
+ return NULL;
+
+ return d_obtain_alias(erofs_iget(sb,
+ ((u64)fid->raw[0] << 32) | fid->raw[1]));
}
static struct dentry *erofs_fh_to_parent(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type)
{
- return generic_fh_to_parent(sb, fid, fh_len, fh_type,
- erofs_nfs_get_inode);
+ if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6)
+ return NULL;
+
+ return d_obtain_alias(erofs_iget(sb,
+ ((u64)fid->raw[3] << 32) | fid->raw[4]));
}
static struct dentry *erofs_get_parent(struct dentry *child)
@@ -543,7 +578,7 @@ static struct dentry *erofs_get_parent(struct dentry *child)
}
static const struct export_operations erofs_export_ops = {
- .encode_fh = generic_encode_ino32_fh,
+ .encode_fh = erofs_encode_fh,
.fh_to_dentry = erofs_fh_to_dentry,
.fh_to_parent = erofs_fh_to_parent,
.get_parent = erofs_get_parent,
@@ -618,6 +653,14 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
}
}
+ if (sbi->dif0.fsoff) {
+ if (sbi->dif0.fsoff & (sb->s_blocksize - 1))
+ return invalfc(fc, "fsoffset %llu is not aligned to block size %lu",
+ sbi->dif0.fsoff, sb->s_blocksize);
+ if (erofs_is_fscache_mode(sb))
+ return invalfc(fc, "cannot use fsoffset in fscache mode");
+ }
+
if (test_opt(&sbi->opt, DAX_ALWAYS)) {
if (!sbi->dif0.dax_dev) {
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
@@ -947,6 +990,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
if (sbi->domain_id)
seq_printf(seq, ",domain_id=%s", sbi->domain_id);
#endif
+ if (sbi->dif0.fsoff)
+ seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff);
return 0;
}
diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
index dad4e6c6c155..eed8797a193f 100644
--- a/fs/erofs/sysfs.c
+++ b/fs/erofs/sysfs.c
@@ -7,12 +7,14 @@
#include <linux/kobject.h>
#include "internal.h"
+#include "compress.h"
enum {
attr_feature,
attr_drop_caches,
attr_pointer_ui,
attr_pointer_bool,
+ attr_accel,
};
enum {
@@ -60,14 +62,25 @@ static struct erofs_attr erofs_attr_##_name = { \
EROFS_ATTR_RW_UI(sync_decompress, erofs_mount_opts);
EROFS_ATTR_FUNC(drop_caches, 0200);
#endif
+#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
+EROFS_ATTR_FUNC(accel, 0644);
+#endif
-static struct attribute *erofs_attrs[] = {
+static struct attribute *erofs_sb_attrs[] = {
#ifdef CONFIG_EROFS_FS_ZIP
ATTR_LIST(sync_decompress),
ATTR_LIST(drop_caches),
#endif
NULL,
};
+ATTRIBUTE_GROUPS(erofs_sb);
+
+static struct attribute *erofs_attrs[] = {
+#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
+ ATTR_LIST(accel),
+#endif
+ NULL,
+};
ATTRIBUTE_GROUPS(erofs);
/* Features this copy of erofs supports */
@@ -128,12 +141,14 @@ static ssize_t erofs_attr_show(struct kobject *kobj,
if (!ptr)
return 0;
return sysfs_emit(buf, "%d\n", *(bool *)ptr);
+ case attr_accel:
+ return z_erofs_crypto_show_engines(buf, PAGE_SIZE, '\n');
}
return 0;
}
static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t len)
+ const char *buf, size_t len)
{
struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info,
s_kobj);
@@ -182,6 +197,19 @@ static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr,
invalidate_mapping_pages(MNGD_MAPPING(sbi), 0, -1);
return len;
#endif
+#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
+ case attr_accel:
+ buf = skip_spaces(buf);
+ z_erofs_crypto_disable_all_engines();
+ while (*buf) {
+ t = strcspn(buf, "\n");
+ ret = z_erofs_crypto_enable_engine(buf, t);
+ if (ret < 0)
+ return ret;
+ buf += buf[t] != '\0' ? t + 1 : t;
+ }
+ return len;
+#endif
}
return 0;
}
@@ -199,12 +227,13 @@ static const struct sysfs_ops erofs_attr_ops = {
};
static const struct kobj_type erofs_sb_ktype = {
- .default_groups = erofs_groups,
+ .default_groups = erofs_sb_groups,
.sysfs_ops = &erofs_attr_ops,
.release = erofs_sb_release,
};
static const struct kobj_type erofs_ktype = {
+ .default_groups = erofs_groups,
.sysfs_ops = &erofs_attr_ops,
};
@@ -248,6 +277,12 @@ void erofs_unregister_sysfs(struct super_block *sb)
}
}
+void erofs_exit_sysfs(void)
+{
+ kobject_put(&erofs_feat);
+ kset_unregister(&erofs_root);
+}
+
int __init erofs_init_sysfs(void)
{
int ret;
@@ -255,24 +290,12 @@ int __init erofs_init_sysfs(void)
kobject_set_name(&erofs_root.kobj, "erofs");
erofs_root.kobj.parent = fs_kobj;
ret = kset_register(&erofs_root);
- if (ret)
- goto root_err;
-
- ret = kobject_init_and_add(&erofs_feat, &erofs_feat_ktype,
- NULL, "features");
- if (ret)
- goto feat_err;
- return ret;
-
-feat_err:
- kobject_put(&erofs_feat);
- kset_unregister(&erofs_root);
-root_err:
+ if (!ret) {
+ ret = kobject_init_and_add(&erofs_feat, &erofs_feat_ktype,
+ NULL, "features");
+ if (!ret)
+ return 0;
+ erofs_exit_sysfs();
+ }
return ret;
}
-
-void erofs_exit_sysfs(void)
-{
- kobject_put(&erofs_feat);
- kset_unregister(&erofs_root);
-}
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index b8e6b76c23d5..fe8071844724 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -288,6 +288,7 @@ static struct workqueue_struct *z_erofs_workqueue __read_mostly;
#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD
static struct kthread_worker __rcu **z_erofs_pcpu_workers;
+static atomic_t erofs_percpu_workers_initialized = ATOMIC_INIT(0);
static void erofs_destroy_percpu_workers(void)
{
@@ -333,12 +334,8 @@ static int erofs_init_percpu_workers(void)
}
return 0;
}
-#else
-static inline void erofs_destroy_percpu_workers(void) {}
-static inline int erofs_init_percpu_workers(void) { return 0; }
-#endif
-#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_EROFS_FS_PCPU_KTHREAD)
+#ifdef CONFIG_HOTPLUG_CPU
static DEFINE_SPINLOCK(z_erofs_pcpu_worker_lock);
static enum cpuhp_state erofs_cpuhp_state;
@@ -395,17 +392,56 @@ static void erofs_cpu_hotplug_destroy(void)
if (erofs_cpuhp_state)
cpuhp_remove_state_nocalls(erofs_cpuhp_state);
}
-#else /* !CONFIG_HOTPLUG_CPU || !CONFIG_EROFS_FS_PCPU_KTHREAD */
+#else /* !CONFIG_HOTPLUG_CPU */
static inline int erofs_cpu_hotplug_init(void) { return 0; }
static inline void erofs_cpu_hotplug_destroy(void) {}
-#endif
+#endif/* CONFIG_HOTPLUG_CPU */
+static int z_erofs_init_pcpu_workers(struct super_block *sb)
+{
+ int err;
-void z_erofs_exit_subsystem(void)
+ if (atomic_xchg(&erofs_percpu_workers_initialized, 1))
+ return 0;
+
+ err = erofs_init_percpu_workers();
+ if (err) {
+ erofs_err(sb, "per-cpu workers: failed to allocate.");
+ goto err_init_percpu_workers;
+ }
+
+ err = erofs_cpu_hotplug_init();
+ if (err < 0) {
+ erofs_err(sb, "per-cpu workers: failed CPU hotplug init.");
+ goto err_cpuhp_init;
+ }
+ erofs_info(sb, "initialized per-cpu workers successfully.");
+ return err;
+
+err_cpuhp_init:
+ erofs_destroy_percpu_workers();
+err_init_percpu_workers:
+ atomic_set(&erofs_percpu_workers_initialized, 0);
+ return err;
+}
+
+static void z_erofs_destroy_pcpu_workers(void)
{
+ if (!atomic_xchg(&erofs_percpu_workers_initialized, 0))
+ return;
erofs_cpu_hotplug_destroy();
erofs_destroy_percpu_workers();
+}
+#else /* !CONFIG_EROFS_FS_PCPU_KTHREAD */
+static inline int z_erofs_init_pcpu_workers(struct super_block *sb) { return 0; }
+static inline void z_erofs_destroy_pcpu_workers(void) {}
+#endif/* CONFIG_EROFS_FS_PCPU_KTHREAD */
+
+void z_erofs_exit_subsystem(void)
+{
+ z_erofs_destroy_pcpu_workers();
destroy_workqueue(z_erofs_workqueue);
z_erofs_destroy_pcluster_pool();
+ z_erofs_crypto_disable_all_engines();
z_erofs_exit_decompressor();
}
@@ -427,19 +463,8 @@ int __init z_erofs_init_subsystem(void)
goto err_workqueue_init;
}
- err = erofs_init_percpu_workers();
- if (err)
- goto err_pcpu_worker;
-
- err = erofs_cpu_hotplug_init();
- if (err < 0)
- goto err_cpuhp_init;
return err;
-err_cpuhp_init:
- erofs_destroy_percpu_workers();
-err_pcpu_worker:
- destroy_workqueue(z_erofs_workqueue);
err_workqueue_init:
z_erofs_destroy_pcluster_pool();
err_pcluster_pool:
@@ -641,8 +666,14 @@ static const struct address_space_operations z_erofs_cache_aops = {
int z_erofs_init_super(struct super_block *sb)
{
- struct inode *const inode = new_inode(sb);
+ struct inode *inode;
+ int err;
+ err = z_erofs_init_pcpu_workers(sb);
+ if (err)
+ return err;
+
+ inode = new_inode(sb);
if (!inode)
return -ENOMEM;
set_nlink(inode, 1);
@@ -1707,7 +1738,8 @@ drain_io:
bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
REQ_OP_READ, GFP_NOIO);
bio->bi_end_io = z_erofs_endio;
- bio->bi_iter.bi_sector = cur >> 9;
+ bio->bi_iter.bi_sector =
+ (mdev.m_dif->fsoff + cur) >> 9;
bio->bi_private = q[JQ_SUBMIT];
if (readahead)
bio->bi_opf |= REQ_RAHEAD;
@@ -1855,13 +1887,12 @@ static void z_erofs_readahead(struct readahead_control *rac)
{
struct inode *const inode = rac->mapping->host;
Z_EROFS_DEFINE_FRONTEND(f, inode, readahead_pos(rac));
- struct folio *head = NULL, *folio;
unsigned int nrpages = readahead_count(rac);
+ struct folio *head = NULL, *folio;
int err;
+ trace_erofs_readahead(inode, readahead_index(rac), nrpages, false);
z_erofs_pcluster_readmore(&f, rac, true);
- nrpages = readahead_count(rac);
- trace_erofs_readpages(inode, readahead_index(rac), nrpages, false);
while ((folio = readahead_folio(rac))) {
folio->private = head;
head = folio;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4bc264b854c4..d4dbffdedd08 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2111,9 +2111,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
write_unlock_irq(&ep->lock);
- if (!eavail && ep_schedule_timeout(to))
- timed_out = !schedule_hrtimeout_range(to, slack,
- HRTIMER_MODE_ABS);
+ if (!eavail)
+ timed_out = !ep_schedule_timeout(to) ||
+ !schedule_hrtimeout_range(to, slack,
+ HRTIMER_MODE_ABS);
__set_current_state(TASK_RUNNING);
/*
diff --git a/fs/exec.c b/fs/exec.c
index 8e4ea5f1e64c..cfbb2b9ee3c9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,66 +115,6 @@ bool path_noexec(const struct path *path)
(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
}
-#ifdef CONFIG_USELIB
-/*
- * Note that a shared library must be both readable and executable due to
- * security reasons.
- *
- * Also note that we take the address to load from the file itself.
- */
-SYSCALL_DEFINE1(uselib, const char __user *, library)
-{
- struct linux_binfmt *fmt;
- struct file *file;
- struct filename *tmp = getname(library);
- int error = PTR_ERR(tmp);
- static const struct open_flags uselib_flags = {
- .open_flag = O_LARGEFILE | O_RDONLY,
- .acc_mode = MAY_READ | MAY_EXEC,
- .intent = LOOKUP_OPEN,
- .lookup_flags = LOOKUP_FOLLOW,
- };
-
- if (IS_ERR(tmp))
- goto out;
-
- file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
- putname(tmp);
- error = PTR_ERR(file);
- if (IS_ERR(file))
- goto out;
-
- /*
- * Check do_open_execat() for an explanation.
- */
- error = -EACCES;
- if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
- path_noexec(&file->f_path))
- goto exit;
-
- error = -ENOEXEC;
-
- read_lock(&binfmt_lock);
- list_for_each_entry(fmt, &formats, lh) {
- if (!fmt->load_shlib)
- continue;
- if (!try_module_get(fmt->module))
- continue;
- read_unlock(&binfmt_lock);
- error = fmt->load_shlib(file);
- read_lock(&binfmt_lock);
- put_binfmt(fmt);
- if (error != -ENOEXEC)
- break;
- }
- read_unlock(&binfmt_lock);
-exit:
- fput(file);
-out:
- return error;
-}
-#endif /* #ifdef CONFIG_USELIB */
-
#ifdef CONFIG_MMU
/*
* The nascent bprm->mm is not visible until exec_mmap() but it can
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 128dd092916b..cdefea17986a 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -143,7 +143,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
if (err)
goto out_err;
dprintk("%s: found name: %s\n", __func__, nbuf);
- tmp = lookup_one_unlocked(mnt_idmap(mnt), nbuf, parent, strlen(nbuf));
+ tmp = lookup_one_unlocked(mnt_idmap(mnt), &QSTR(nbuf), parent);
if (IS_ERR(tmp)) {
dprintk("lookup failed: %ld\n", PTR_ERR(tmp));
err = PTR_ERR(tmp);
@@ -284,6 +284,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
};
struct getdents_callback buffer = {
.ctx.actor = filldir_one,
+ .ctx.count = INT_MAX,
.name = name,
};
@@ -549,8 +550,7 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
}
inode_lock(target_dir->d_inode);
- nresult = lookup_one(mnt_idmap(mnt), nbuf,
- target_dir, strlen(nbuf));
+ nresult = lookup_one(mnt_idmap(mnt), &QSTR(nbuf), target_dir);
if (!IS_ERR(nresult)) {
if (unlikely(nresult->d_inode != result->d_inode)) {
dput(nresult);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 94c7d2d828a6..cdf01e60fa6d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5692,7 +5692,7 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
awu_max = sbi->s_awu_max;
}
- generic_fill_statx_atomic_writes(stat, awu_min, awu_max);
+ generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0);
}
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 2b8f9239bede..dd0ba0532e01 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -2271,12 +2271,12 @@ out_drop_write:
if (err)
return err;
- err = freeze_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
+ err = freeze_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL);
if (err)
return err;
if (f2fs_readonly(sbi->sb)) {
- err = thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
+ err = thaw_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL);
if (err)
return err;
return -EROFS;
@@ -2333,6 +2333,6 @@ recover_out:
out_err:
f2fs_up_write(&sbi->cp_global_sem);
f2fs_up_write(&sbi->gc_lock);
- thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
+ thaw_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL);
return err;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index c04ed94cdc4b..138114d64307 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(get_max_files);
static int proc_nr_files(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
- files_stat.nr_files = get_nr_files();
+ files_stat.nr_files = percpu_counter_sum_positive(&nr_files);
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 58b9067b2391..95e5256821a5 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -156,15 +156,19 @@ static int fs_index(const char __user * __name)
static int fs_name(unsigned int index, char __user * buf)
{
struct file_system_type * tmp;
- int len, res;
+ int len, res = -EINVAL;
read_lock(&file_systems_lock);
- for (tmp = file_systems; tmp; tmp = tmp->next, index--)
- if (index <= 0 && try_module_get(tmp->owner))
+ for (tmp = file_systems; tmp; tmp = tmp->next, index--) {
+ if (index == 0) {
+ if (try_module_get(tmp->owner))
+ res = 0;
break;
+ }
+ }
read_unlock(&file_systems_lock);
- if (!tmp)
- return -EINVAL;
+ if (res)
+ return res;
/* OK, we got the reference, so we can safely block */
len = strlen(tmp->name) + 1;
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 582d33e81117..666e61753aed 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -222,7 +222,7 @@ int vfs_parse_monolithic_sep(struct fs_context *fc, void *data,
char *value = strchr(key, '=');
if (value) {
- if (value == key)
+ if (unlikely(value == key))
continue;
*value++ = 0;
v_len = strlen(value);
@@ -449,6 +449,10 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
printk(KERN_ERR "%s%s%pV\n", prefix ? prefix : "",
prefix ? ": " : "", &vaf);
break;
+ case 'i':
+ printk(KERN_INFO "%s%s%pV\n", prefix ? prefix : "",
+ prefix ? ": " : "", &vaf);
+ break;
default:
printk(KERN_NOTICE "%s%s%pV\n", prefix ? prefix : "",
prefix ? ": " : "", &vaf);
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index e635a81e17d9..c092a9f79e32 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -380,58 +380,9 @@ EXPORT_SYMBOL(fs_param_is_path);
#ifdef CONFIG_VALIDATE_FS_PARSER
/**
- * validate_constant_table - Validate a constant table
- * @tbl: The constant table to validate.
- * @tbl_size: The size of the table.
- * @low: The lowest permissible value.
- * @high: The highest permissible value.
- * @special: One special permissible value outside of the range.
- */
-bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
- int low, int high, int special)
-{
- size_t i;
- bool good = true;
-
- if (tbl_size == 0) {
- pr_warn("VALIDATE C-TBL: Empty\n");
- return true;
- }
-
- for (i = 0; i < tbl_size; i++) {
- if (!tbl[i].name) {
- pr_err("VALIDATE C-TBL[%zu]: Null\n", i);
- good = false;
- } else if (i > 0 && tbl[i - 1].name) {
- int c = strcmp(tbl[i-1].name, tbl[i].name);
-
- if (c == 0) {
- pr_err("VALIDATE C-TBL[%zu]: Duplicate %s\n",
- i, tbl[i].name);
- good = false;
- }
- if (c > 0) {
- pr_err("VALIDATE C-TBL[%zu]: Missorted %s>=%s\n",
- i, tbl[i-1].name, tbl[i].name);
- good = false;
- }
- }
-
- if (tbl[i].value != special &&
- (tbl[i].value < low || tbl[i].value > high)) {
- pr_err("VALIDATE C-TBL[%zu]: %s->%d const out of range (%d-%d)\n",
- i, tbl[i].name, tbl[i].value, low, high);
- good = false;
- }
- }
-
- return good;
-}
-
-/**
- * fs_validate_description - Validate a parameter description
- * @name: The parameter name to search for.
- * @desc: The parameter description to validate.
+ * fs_validate_description - Validate a parameter specification array
+ * @name: Owner name of the parameter specification array
+ * @desc: The parameter specification array to validate.
*/
bool fs_validate_description(const char *name,
const struct fs_parameter_spec *desc)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 83ac192e7fdd..33b82529cb6e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1676,7 +1676,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
goto out_err;
}
- set_delayed_call(callback, page_put_link, &folio->page);
+ set_delayed_call(callback, page_put_link, folio);
return folio_address(folio);
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 17ce9636a2b1..edcd6f18a8a8 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -120,7 +120,7 @@ static bool fuse_emit(struct file *file, struct dir_context *ctx,
fuse_add_dirent_to_cache(file, dirent, ctx->pos);
return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
- dirent->type);
+ dirent->type | FILLDIR_FLAG_NOINTR);
}
static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
@@ -419,7 +419,7 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
if (ff->readdir.pos == ctx->pos) {
res = FOUND_SOME;
if (!dir_emit(ctx, dirent->name, dirent->namelen,
- dirent->ino, dirent->type))
+ dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR))
return FOUND_ALL;
ctx->pos = dirent->off;
}
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 68fc8af14700..14f204cd5a82 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -37,27 +37,6 @@
#include "aops.h"
-void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio,
- size_t from, size_t len)
-{
- struct buffer_head *head = folio_buffers(folio);
- unsigned int bsize = head->b_size;
- struct buffer_head *bh;
- size_t to = from + len;
- size_t start, end;
-
- for (bh = head, start = 0; bh != head || !start;
- bh = bh->b_this_page, start = end) {
- end = start + bsize;
- if (end <= from)
- continue;
- if (start >= to)
- break;
- set_buffer_uptodate(bh);
- gfs2_trans_add_data(ip->i_gl, bh);
- }
-}
-
/**
* gfs2_get_block_noalloc - Fills in a buffer head with details about a block
* @inode: The inode
@@ -133,12 +112,43 @@ static int __gfs2_jdata_write_folio(struct folio *folio,
inode->i_sb->s_blocksize,
BIT(BH_Dirty)|BIT(BH_Uptodate));
}
- gfs2_trans_add_databufs(ip, folio, 0, folio_size(folio));
+ gfs2_trans_add_databufs(ip->i_gl, folio, 0, folio_size(folio));
}
return gfs2_write_jdata_folio(folio, wbc);
}
/**
+ * gfs2_jdata_writeback - Write jdata folios to the log
+ * @mapping: The mapping to write
+ * @wbc: The writeback control
+ *
+ * Returns: errno
+ */
+int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
+ struct folio *folio = NULL;
+ int error;
+
+ BUG_ON(current->journal_info);
+ if (gfs2_assert_withdraw(sdp, ip->i_gl->gl_state == LM_ST_EXCLUSIVE))
+ return 0;
+
+ while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
+ if (folio_test_checked(folio)) {
+ folio_redirty_for_writepage(wbc, folio);
+ folio_unlock(folio);
+ continue;
+ }
+ error = __gfs2_jdata_write_folio(folio, wbc);
+ }
+
+ return error;
+}
+
+/**
* gfs2_writepages - Write a bunch of dirty pages back to disk
* @mapping: The mapping to write
* @wbc: Write-back control
@@ -228,24 +238,16 @@ continue_unlock:
ret = __gfs2_jdata_write_folio(folio, wbc);
if (unlikely(ret)) {
- if (ret == AOP_WRITEPAGE_ACTIVATE) {
- folio_unlock(folio);
- ret = 0;
- } else {
-
- /*
- * done_index is set past this page,
- * so media errors will not choke
- * background writeout for the entire
- * file. This has consequences for
- * range_cyclic semantics (ie. it may
- * not be suitable for data integrity
- * writeout).
- */
- *done_index = folio_next_index(folio);
- ret = 1;
- break;
- }
+ /*
+ * done_index is set past this page, so media errors
+ * will not choke background writeout for the entire
+ * file. This has consequences for range_cyclic
+ * semantics (ie. it may not be suitable for data
+ * integrity writeout).
+ */
+ *done_index = folio_next_index(folio);
+ ret = 1;
+ break;
}
/*
@@ -540,7 +542,7 @@ out:
gfs2_trans_end(sdp);
}
-static bool jdata_dirty_folio(struct address_space *mapping,
+static bool gfs2_jdata_dirty_folio(struct address_space *mapping,
struct folio *folio)
{
if (current->journal_info)
@@ -722,7 +724,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
.writepages = gfs2_jdata_writepages,
.read_folio = gfs2_read_folio,
.readahead = gfs2_readahead,
- .dirty_folio = jdata_dirty_folio,
+ .dirty_folio = gfs2_jdata_dirty_folio,
.bmap = gfs2_bmap,
.migrate_folio = buffer_migrate_folio,
.invalidate_folio = gfs2_invalidate_folio,
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
index a10c4334d248..bf002522a782 100644
--- a/fs/gfs2/aops.h
+++ b/fs/gfs2/aops.h
@@ -9,7 +9,6 @@
#include "incore.h"
void adjust_fs_space(struct inode *inode);
-void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio,
- size_t from, size_t len);
+int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc);
#endif /* __AOPS_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 366516b98b3f..7703d0471139 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -988,7 +988,8 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
struct gfs2_sbd *sdp = GFS2_SB(inode);
if (!gfs2_is_stuffed(ip))
- gfs2_trans_add_databufs(ip, folio, offset_in_folio(folio, pos),
+ gfs2_trans_add_databufs(ip->i_gl, folio,
+ offset_in_folio(folio, pos),
copied);
folio_unlock(folio);
@@ -1296,10 +1297,12 @@ int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
* uses iomap write to perform its actions, which begin their own transactions
* (iomap_begin, get_folio, etc.)
*/
-static int gfs2_block_zero_range(struct inode *inode, loff_t from,
- unsigned int length)
+static int gfs2_block_zero_range(struct inode *inode, loff_t from, loff_t length)
{
BUG_ON(current->journal_info);
+ if (from >= inode->i_size)
+ return 0;
+ length = min(length, inode->i_size - from);
return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops,
NULL);
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d7220a6fe8f5..ba25b884169e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1166,7 +1166,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops, int create,
struct gfs2_glock **glp)
{
- struct super_block *s = sdp->sd_vfs;
struct lm_lockname name = { .ln_number = number,
.ln_type = glops->go_type,
.ln_sbd = sdp };
@@ -1229,7 +1228,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping = gfs2_glock2aspace(gl);
if (mapping) {
mapping->a_ops = &gfs2_meta_aops;
- mapping->host = s->s_bdev->bd_mapping->host;
+ mapping->host = sdp->sd_inode;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->i_private_data = NULL;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index eb4714f299ef..cebd66b22694 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -168,7 +168,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
static int gfs2_rgrp_metasync(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct address_space *metamapping = &sdp->sd_aspace;
+ struct address_space *metamapping = gfs2_aspace(sdp);
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
const unsigned bsize = sdp->sd_sb.sb_bsize;
loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
@@ -225,7 +225,7 @@ static int rgrp_go_sync(struct gfs2_glock *gl)
static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct address_space *mapping = &sdp->sd_aspace;
+ struct address_space *mapping = gfs2_aspace(sdp);
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
const unsigned bsize = sdp->sd_sb.sb_bsize;
loff_t start, end;
@@ -601,14 +601,13 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl)
if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
- error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
+ error = gfs2_find_jhead(sdp->sd_jdesc, &head);
if (gfs2_assert_withdraw_delayed(sdp, !error))
return error;
if (gfs2_assert_withdraw_delayed(sdp, head.lh_flags &
GFS2_LOG_HEAD_UNMOUNT))
return -EIO;
- sdp->sd_log_sequence = head.lh_sequence + 1;
- gfs2_log_pointers_init(sdp, head.lh_blkno);
+ gfs2_log_pointers_init(sdp, &head);
}
return 0;
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 74abbd4970f8..0a41c4e76b32 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -795,7 +795,7 @@ struct gfs2_sbd {
/* Log stuff */
- struct address_space sd_aspace;
+ struct inode *sd_inode;
spinlock_t sd_log_lock;
@@ -851,6 +851,13 @@ struct gfs2_sbd {
unsigned long sd_glock_dqs_held;
};
+#define GFS2_BAD_INO 1
+
+static inline struct address_space *gfs2_aspace(struct gfs2_sbd *sdp)
+{
+ return sdp->sd_inode->i_mapping;
+}
+
static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
{
gl->gl_stats.stats[which]++;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 198a8cbaf5e5..187d789a8f1e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -439,6 +439,74 @@ out:
return error;
}
+static void gfs2_final_release_pages(struct gfs2_inode *ip)
+{
+ struct inode *inode = &ip->i_inode;
+ struct gfs2_glock *gl = ip->i_gl;
+
+ if (unlikely(!gl)) {
+ /* This can only happen during incomplete inode creation. */
+ BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+ return;
+ }
+
+ truncate_inode_pages(gfs2_glock2aspace(gl), 0);
+ truncate_inode_pages(&inode->i_data, 0);
+
+ if (atomic_read(&gl->gl_revokes) == 0) {
+ clear_bit(GLF_LFLUSH, &gl->gl_flags);
+ clear_bit(GLF_DIRTY, &gl->gl_flags);
+ }
+}
+
+int gfs2_dinode_dealloc(struct gfs2_inode *ip)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_holder gh;
+ int error;
+
+ if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
+
+ gfs2_rindex_update(sdp);
+
+ error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
+ if (error)
+ return error;
+
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
+ if (!rgd) {
+ gfs2_consist_inode(ip);
+ error = -EIO;
+ goto out_qs;
+ }
+
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_SCOPE, &gh);
+ if (error)
+ goto out_qs;
+
+ error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
+ sdp->sd_jdesc->jd_blocks);
+ if (error)
+ goto out_rg_gunlock;
+
+ gfs2_free_di(rgd, ip);
+
+ gfs2_final_release_pages(ip);
+
+ gfs2_trans_end(sdp);
+
+out_rg_gunlock:
+ gfs2_glock_dq_uninit(&gh);
+out_qs:
+ gfs2_quota_unhold(ip);
+ return error;
+}
+
static void gfs2_init_dir(struct buffer_head *dibh,
const struct gfs2_inode *parent)
{
@@ -629,10 +697,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
struct gfs2_inode *dip = GFS2_I(dir), *ip;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_glock *io_gl;
- int error;
+ int error, dealloc_error;
u32 aflags = 0;
unsigned blocks = 1;
struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, };
+ bool xattr_initialized = false;
if (!name->len || name->len > GFS2_FNAMESIZE)
return -ENAMETOOLONG;
@@ -659,7 +728,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (!IS_ERR(inode)) {
if (S_ISDIR(inode->i_mode)) {
iput(inode);
- inode = ERR_PTR(-EISDIR);
+ inode = NULL;
+ error = -EISDIR;
goto fail_gunlock;
}
d_instantiate(dentry, inode);
@@ -744,11 +814,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
if (error)
- goto fail_free_inode;
+ goto fail_dealloc_inode;
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (error)
- goto fail_free_inode;
+ goto fail_dealloc_inode;
gfs2_cancel_delete_work(io_gl);
io_gl->gl_no_formal_ino = ip->i_no_formal_ino;
@@ -767,13 +837,16 @@ retry:
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
if (error)
goto fail_gunlock3;
+ clear_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags);
error = gfs2_trans_begin(sdp, blocks, 0);
if (error)
goto fail_gunlock3;
- if (blocks > 1)
+ if (blocks > 1) {
gfs2_init_xattr(ip);
+ xattr_initialized = true;
+ }
init_dinode(dip, ip, symname);
gfs2_trans_end(sdp);
@@ -828,6 +901,18 @@ fail_gunlock3:
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_gunlock2:
gfs2_glock_put(io_gl);
+fail_dealloc_inode:
+ set_bit(GIF_ALLOC_FAILED, &ip->i_flags);
+ dealloc_error = 0;
+ if (ip->i_eattr)
+ dealloc_error = gfs2_ea_dealloc(ip, xattr_initialized);
+ clear_nlink(inode);
+ mark_inode_dirty(inode);
+ if (!dealloc_error)
+ dealloc_error = gfs2_dinode_dealloc(ip);
+ if (dealloc_error)
+ fs_warn(sdp, "%s: %d\n", __func__, dealloc_error);
+ ip->i_no_addr = 0;
fail_free_inode:
if (ip->i_gl) {
gfs2_glock_put(ip->i_gl);
@@ -842,10 +927,6 @@ fail_gunlock:
gfs2_dir_no_add(&da);
gfs2_glock_dq_uninit(&d_gh);
if (!IS_ERR_OR_NULL(inode)) {
- set_bit(GIF_ALLOC_FAILED, &ip->i_flags);
- clear_nlink(inode);
- if (ip->i_no_addr)
- mark_inode_dirty(inode);
if (inode->i_state & I_NEW)
iget_failed(inode);
else
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 9e5e1622d50a..eafe123617e6 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -92,6 +92,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
u64 no_formal_ino,
unsigned int blktype);
+int gfs2_dinode_dealloc(struct gfs2_inode *ip);
struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
int is_root);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 58aeeae7ed8c..7cb9d216d8bb 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -328,6 +328,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ uint32_t flags = 0;
int error;
BUG_ON(!__lockref_is_dead(&gl->gl_lockref));
@@ -352,7 +353,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
* When the lockspace is released, all remaining glocks will be
* unlocked automatically. This is more efficient than unlocking them
* individually, but when the lock is held in DLM_LOCK_EX or
- * DLM_LOCK_PW mode, the lock value block (LVB) will be lost.
+ * DLM_LOCK_PW mode, the lock value block (LVB) would be lost.
*/
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
@@ -361,8 +362,11 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
return;
}
+ if (gl->gl_lksb.sb_lvbptr)
+ flags |= DLM_LKF_VALBLK;
+
again:
- error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
+ error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, flags,
NULL, gl);
if (error == -EBUSY) {
msleep(20);
@@ -996,14 +1000,15 @@ locks_done:
if (sdp->sd_args.ar_spectator) {
fs_info(sdp, "Recovery is required. Waiting for a "
"non-spectator to mount.\n");
+ spin_unlock(&ls->ls_recover_spin);
msleep_interruptible(1000);
} else {
fs_info(sdp, "control_mount wait1 block %u start %u "
"mount %u lvb %u flags %lx\n", block_gen,
start_gen, mount_gen, lvb_gen,
ls->ls_recover_flags);
+ spin_unlock(&ls->ls_recover_spin);
}
- spin_unlock(&ls->ls_recover_spin);
goto restart;
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f9c5089783d2..115c4ac457e9 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -31,6 +31,7 @@
#include "dir.h"
#include "trace_gfs2.h"
#include "trans.h"
+#include "aops.h"
static void gfs2_log_shutdown(struct gfs2_sbd *sdp);
@@ -131,7 +132,11 @@ __acquires(&sdp->sd_ail_lock)
if (!mapping)
continue;
spin_unlock(&sdp->sd_ail_lock);
- ret = mapping->a_ops->writepages(mapping, wbc);
+ BUG_ON(GFS2_SB(mapping->host) != sdp);
+ if (gfs2_is_jdata(GFS2_I(mapping->host)))
+ ret = gfs2_jdata_writeback(mapping, wbc);
+ else
+ ret = mapping->a_ops->writepages(mapping, wbc);
if (need_resched()) {
blk_finish_plug(plug);
cond_resched();
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index c27b05099c1e..fc30ebdad83a 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -44,17 +44,6 @@ __releases(&sdp->sd_log_lock)
spin_unlock(&sdp->sd_log_lock);
}
-static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
- unsigned int value)
-{
- if (++value == sdp->sd_jdesc->jd_blocks) {
- value = 0;
- }
- sdp->sd_log_tail = value;
- sdp->sd_log_flush_tail = value;
- sdp->sd_log_head = value;
-}
-
static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 0fd3b5ec7d8c..9c8c305a75c4 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -204,9 +204,11 @@ static void gfs2_end_log_write(struct bio *bio)
struct bvec_iter_all iter_all;
if (bio->bi_status) {
- if (!cmpxchg(&sdp->sd_log_error, 0, (int)bio->bi_status))
+ int err = blk_status_to_errno(bio->bi_status);
+
+ if (!cmpxchg(&sdp->sd_log_error, 0, err))
fs_err(sdp, "Error %d writing to journal, jid=%u\n",
- bio->bi_status, sdp->sd_jdesc->jd_jid);
+ err, sdp->sd_jdesc->jd_jid);
gfs2_withdraw_delayed(sdp);
/* prevent more writes to the journal */
clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
@@ -449,7 +451,7 @@ static bool gfs2_jhead_folio_search(struct gfs2_jdesc *jd,
* Find the folio with 'index' in the journal's mapping. Search the folio for
* the journal head if requested (cleanup == false). Release refs on the
* folio so the page cache can reclaim it. We grabbed a
- * reference on this folio twice, first when we did a grab_cache_page()
+ * reference on this folio twice, first when we did a filemap_grab_folio()
* to obtain the folio to add it to the bio and second when we do a
* filemap_get_folio() here to get the folio to wait on while I/O on it is being
* completed.
@@ -474,7 +476,7 @@ static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
if (!*done)
*done = gfs2_jhead_folio_search(jd, head, folio);
- /* filemap_get_folio() and the earlier grab_cache_page() */
+ /* filemap_get_folio() and the earlier filemap_grab_folio() */
folio_put_refs(folio, 2);
}
@@ -494,15 +496,13 @@ static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs)
* gfs2_find_jhead - find the head of a log
* @jd: The journal descriptor
* @head: The log descriptor for the head of the log is returned here
- * @keep_cache: If set inode pages will not be truncated
*
* Do a search of a journal by reading it in large chunks using bios and find
* the valid log entry with the highest sequence number. (i.e. the log head)
*
* Returns: 0 on success, errno otherwise
*/
-int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
- bool keep_cache)
+int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct address_space *mapping = jd->jd_inode->i_mapping;
@@ -591,8 +591,7 @@ out:
if (!ret)
ret = filemap_check_wb_err(mapping, since);
- if (!keep_cache)
- truncate_inode_pages(mapping, 0);
+ truncate_inode_pages(mapping, 0);
return ret;
}
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 07890c7b145d..be740bf33666 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -20,7 +20,7 @@ void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf);
void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
int gfs2_find_jhead(struct gfs2_jdesc *jd,
- struct gfs2_log_header_host *head, bool keep_cache);
+ struct gfs2_log_header_host *head);
void gfs2_drain_revokes(struct gfs2_sbd *sdp);
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 198cc7056637..9dc8885c95d0 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -132,7 +132,7 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
unsigned int bufnum;
if (mapping == NULL)
- mapping = &sdp->sd_aspace;
+ mapping = gfs2_aspace(sdp);
shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
index = blkno >> shift; /* convert block to page */
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 831d988c2ceb..b7c8a6684d02 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -44,9 +44,7 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping)
struct gfs2_glock_aspace *gla =
container_of(mapping, struct gfs2_glock_aspace, mapping);
return gla->glock.gl_name.ln_sbd;
- } else if (mapping->a_ops == &gfs2_rgrp_aops)
- return container_of(mapping, struct gfs2_sbd, sd_aspace);
- else
+ } else
return inode->i_sb->s_fs_info;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e83d293c3614..653f0ff4b057 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -64,15 +64,13 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
void free_sbd(struct gfs2_sbd *sdp)
{
- if (sdp->sd_lkstats)
- free_percpu(sdp->sd_lkstats);
+ free_percpu(sdp->sd_lkstats);
kfree(sdp);
}
static struct gfs2_sbd *init_sbd(struct super_block *sb)
{
struct gfs2_sbd *sdp;
- struct address_space *mapping;
sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL);
if (!sdp)
@@ -109,16 +107,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
INIT_LIST_HEAD(&sdp->sd_sc_inodes_list);
- mapping = &sdp->sd_aspace;
-
- address_space_init_once(mapping);
- mapping->a_ops = &gfs2_rgrp_aops;
- mapping->host = sb->s_bdev->bd_mapping->host;
- mapping->flags = 0;
- mapping_set_gfp_mask(mapping, GFP_NOFS);
- mapping->i_private_data = NULL;
- mapping->writeback_index = 0;
-
spin_lock_init(&sdp->sd_log_lock);
atomic_set(&sdp->sd_log_pinned, 0);
INIT_LIST_HEAD(&sdp->sd_log_revokes);
@@ -226,28 +214,22 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const struct gfs2_sb *str)
static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
{
- struct super_block *sb = sdp->sd_vfs;
- struct page *page;
- struct bio_vec bvec;
- struct bio bio;
+ struct gfs2_sb *sb;
int err;
- page = alloc_page(GFP_KERNEL);
- if (unlikely(!page))
+ sb = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (unlikely(!sb))
return -ENOMEM;
-
- bio_init(&bio, sb->s_bdev, &bvec, 1, REQ_OP_READ | REQ_META);
- bio.bi_iter.bi_sector = sector * (sb->s_blocksize >> 9);
- __bio_add_page(&bio, page, PAGE_SIZE, 0);
-
- err = submit_bio_wait(&bio);
+ err = bdev_rw_virt(sdp->sd_vfs->s_bdev,
+ sector * (sdp->sd_vfs->s_blocksize >> 9), sb, PAGE_SIZE,
+ REQ_OP_READ | REQ_META);
if (err) {
pr_warn("error %d reading superblock\n", err);
- __free_page(page);
+ kfree(sb);
return err;
}
- gfs2_sb_in(sdp, page_address(page));
- __free_page(page);
+ gfs2_sb_in(sdp, sb);
+ kfree(sb);
return gfs2_check_sb(sdp, silent);
}
@@ -500,7 +482,9 @@ static int init_sb(struct gfs2_sbd *sdp, int silent)
sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
goto out;
}
- sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
+ ret = -EINVAL;
+ if (!sb_set_blocksize(sb, sdp->sd_sb.sb_bsize))
+ goto out;
/* Get the root inode */
no_addr = sdp->sd_sb.sb_root_dir.no_addr;
@@ -1135,6 +1119,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
int silent = fc->sb_flags & SB_SILENT;
struct gfs2_sbd *sdp;
struct gfs2_holder mount_gh;
+ struct address_space *mapping;
int error;
sdp = init_sbd(sb);
@@ -1156,6 +1141,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_flags |= SB_NOSEC;
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
+
sb->s_d_op = &gfs2_dops;
sb->s_export_op = &gfs2_export_ops;
sb->s_qcop = &gfs2_quotactl_ops;
@@ -1167,6 +1153,9 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
/* Set up the buffer cache and fill in some fake block size values
to allow us to read-in the on-disk superblock. */
sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, 512);
+ error = -EINVAL;
+ if (!sdp->sd_sb.sb_bsize)
+ goto fail_free;
sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 9;
sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
@@ -1181,9 +1170,21 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
sdp->sd_tune.gt_statfs_quantum = 30;
}
+ /* Set up an address space for metadata writes */
+ sdp->sd_inode = new_inode(sb);
+ error = -ENOMEM;
+ if (!sdp->sd_inode)
+ goto fail_free;
+ sdp->sd_inode->i_ino = GFS2_BAD_INO;
+ sdp->sd_inode->i_size = OFFSET_MAX;
+
+ mapping = gfs2_aspace(sdp);
+ mapping->a_ops = &gfs2_rgrp_aops;
+ mapping_set_gfp_mask(mapping, GFP_NOFS);
+
error = init_names(sdp, silent);
if (error)
- goto fail_free;
+ goto fail_iput;
snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);
@@ -1192,7 +1193,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0,
sdp->sd_fsname);
if (!sdp->sd_glock_wq)
- goto fail_free;
+ goto fail_iput;
sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s",
WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, sdp->sd_fsname);
@@ -1309,6 +1310,8 @@ fail_delete_wq:
fail_glock_wq:
if (sdp->sd_glock_wq)
destroy_workqueue(sdp->sd_glock_wq);
+fail_iput:
+ iput(sdp->sd_inode);
fail_free:
free_sbd(sdp);
sb->s_fs_info = NULL;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index f4fe7039f725..24250478b085 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -118,6 +118,7 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd)
int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
unsigned int blkno, struct gfs2_log_header_host *head)
{
+ const u32 zero = 0;
u32 hash, crc;
if (lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
@@ -126,7 +127,7 @@ int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
return 1;
hash = crc32(~0, lh, LH_V1_SIZE - 4);
- hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */
+ hash = ~crc32(hash, &zero, 4); /* assume lh_hash is zero */
if (be32_to_cpu(lh->lh_hash) != hash)
return 1;
@@ -263,16 +264,12 @@ static void clean_journal(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
- u32 lblock = head->lh_blkno;
- gfs2_replay_incr_blk(jd, &lblock);
- gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0, lblock,
+ gfs2_replay_incr_blk(jd, &head->lh_blkno);
+ head->lh_sequence++;
+ gfs2_write_log_header(sdp, jd, head->lh_sequence, 0, head->lh_blkno,
GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY,
REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC);
- if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
- sdp->sd_log_flush_head = lblock;
- gfs2_log_incr_head(sdp);
- }
}
@@ -457,7 +454,7 @@ void gfs2_recover_func(struct work_struct *work)
if (error)
goto fail_gunlock_ji;
- error = gfs2_find_jhead(jd, &head, true);
+ error = gfs2_find_jhead(jd, &head);
if (error)
goto fail_gunlock_ji;
t_jhd = ktime_get();
@@ -533,6 +530,9 @@ void gfs2_recover_func(struct work_struct *work)
ktime_ms_delta(t_rep, t_tlck));
}
+ if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
+ gfs2_log_pointers_init(sdp, &head);
+
gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
if (jlocked) {
@@ -580,3 +580,13 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
return wait ? jd->jd_recover_error : 0;
}
+void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
+ struct gfs2_log_header_host *head)
+{
+ sdp->sd_log_sequence = head->lh_sequence + 1;
+ gfs2_replay_incr_blk(sdp->sd_jdesc, &head->lh_blkno);
+ sdp->sd_log_tail = head->lh_blkno;
+ sdp->sd_log_flush_head = head->lh_blkno;
+ sdp->sd_log_flush_tail = head->lh_blkno;
+ sdp->sd_log_head = head->lh_blkno;
+}
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 6a0fd42e1120..5a5ba72ecd75 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -29,6 +29,8 @@ void gfs2_recover_func(struct work_struct *work);
int __get_log_header(struct gfs2_sbd *sdp,
const struct gfs2_log_header *lh, unsigned int blkno,
struct gfs2_log_header_host *head);
+void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
+ struct gfs2_log_header_host *head);
#endif /* __RECOVERY_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 44e5658b896c..7c518c4ff638 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -134,28 +134,18 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
{
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
struct gfs2_glock *j_gl = ip->i_gl;
- struct gfs2_log_header_host head;
int error;
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
if (gfs2_withdrawing_or_withdrawn(sdp))
return -EIO;
- error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
- if (error) {
- gfs2_consist(sdp);
- return error;
- }
-
- if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
- gfs2_consist(sdp);
+ if (sdp->sd_log_sequence == 0) {
+ fs_err(sdp, "unknown status of our own journal jid %d",
+ sdp->sd_lockstruct.ls_jid);
return -EIO;
}
- /* Initialize some head of the log stuff */
- sdp->sd_log_sequence = head.lh_sequence + 1;
- gfs2_log_pointers_init(sdp, head.lh_blkno);
-
error = gfs2_quota_init(sdp);
if (!error && gfs2_withdrawing_or_withdrawn(sdp))
error = -EIO;
@@ -370,7 +360,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
error = gfs2_jdesc_check(jd);
if (error)
break;
- error = gfs2_find_jhead(jd, &lh, false);
+ error = gfs2_find_jhead(jd, &lh);
if (error)
break;
if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
@@ -648,7 +638,7 @@ restart:
gfs2_jindex_free(sdp);
/* Take apart glock structures and buffer lists */
gfs2_gl_hash_clear(sdp);
- truncate_inode_pages_final(&sdp->sd_aspace);
+ iput(sdp->sd_inode);
gfs2_delete_debugfs_file(sdp);
gfs2_sys_fs_del(sdp);
@@ -674,7 +664,7 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
return sdp->sd_log_error;
}
-static int gfs2_do_thaw(struct gfs2_sbd *sdp)
+static int gfs2_do_thaw(struct gfs2_sbd *sdp, enum freeze_holder who, const void *freeze_owner)
{
struct super_block *sb = sdp->sd_vfs;
int error;
@@ -682,7 +672,7 @@ static int gfs2_do_thaw(struct gfs2_sbd *sdp)
error = gfs2_freeze_lock_shared(sdp);
if (error)
goto fail;
- error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+ error = thaw_super(sb, who, freeze_owner);
if (!error)
return 0;
@@ -703,14 +693,14 @@ void gfs2_freeze_func(struct work_struct *work)
if (test_bit(SDF_FROZEN, &sdp->sd_flags))
goto freeze_failed;
- error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+ error = freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
if (error)
goto freeze_failed;
gfs2_freeze_unlock(sdp);
set_bit(SDF_FROZEN, &sdp->sd_flags);
- error = gfs2_do_thaw(sdp);
+ error = gfs2_do_thaw(sdp, FREEZE_HOLDER_USERSPACE, NULL);
if (error)
goto out;
@@ -728,10 +718,13 @@ out:
/**
* gfs2_freeze_super - prevent further writes to the filesystem
* @sb: the VFS structure for the filesystem
+ * @who: freeze flags
+ * @freeze_owner: owner of the freeze
*
*/
-static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
+static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
@@ -744,7 +737,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
}
for (;;) {
- error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+ error = freeze_super(sb, who, freeze_owner);
if (error) {
fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
error);
@@ -758,7 +751,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
break;
}
- error = gfs2_do_thaw(sdp);
+ error = gfs2_do_thaw(sdp, who, freeze_owner);
if (error)
goto out;
@@ -796,10 +789,13 @@ static int gfs2_freeze_fs(struct super_block *sb)
/**
* gfs2_thaw_super - reallow writes to the filesystem
* @sb: the VFS structure for the filesystem
+ * @who: freeze flags
+ * @freeze_owner: owner of the freeze
*
*/
-static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who)
+static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
@@ -814,7 +810,7 @@ static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who)
atomic_inc(&sb->s_active);
gfs2_freeze_unlock(sdp);
- error = gfs2_do_thaw(sdp);
+ error = gfs2_do_thaw(sdp, who, freeze_owner);
if (!error) {
clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
@@ -1173,74 +1169,6 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
return 0;
}
-static void gfs2_final_release_pages(struct gfs2_inode *ip)
-{
- struct inode *inode = &ip->i_inode;
- struct gfs2_glock *gl = ip->i_gl;
-
- if (unlikely(!gl)) {
- /* This can only happen during incomplete inode creation. */
- BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
- return;
- }
-
- truncate_inode_pages(gfs2_glock2aspace(gl), 0);
- truncate_inode_pages(&inode->i_data, 0);
-
- if (atomic_read(&gl->gl_revokes) == 0) {
- clear_bit(GLF_LFLUSH, &gl->gl_flags);
- clear_bit(GLF_DIRTY, &gl->gl_flags);
- }
-}
-
-static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct gfs2_rgrpd *rgd;
- struct gfs2_holder gh;
- int error;
-
- if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
- gfs2_consist_inode(ip);
- return -EIO;
- }
-
- gfs2_rindex_update(sdp);
-
- error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
- if (error)
- return error;
-
- rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
- if (!rgd) {
- gfs2_consist_inode(ip);
- error = -EIO;
- goto out_qs;
- }
-
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
- LM_FLAG_NODE_SCOPE, &gh);
- if (error)
- goto out_qs;
-
- error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
- sdp->sd_jdesc->jd_blocks);
- if (error)
- goto out_rg_gunlock;
-
- gfs2_free_di(rgd, ip);
-
- gfs2_final_release_pages(ip);
-
- gfs2_trans_end(sdp);
-
-out_rg_gunlock:
- gfs2_glock_dq_uninit(&gh);
-out_qs:
- gfs2_quota_unhold(ip);
- return error;
-}
-
/**
* gfs2_glock_put_eventually
* @gl: The glock to put
@@ -1326,9 +1254,6 @@ static enum evict_behavior evict_should_delete(struct inode *inode,
struct gfs2_sbd *sdp = sb->s_fs_info;
int ret;
- if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags)))
- goto should_delete;
-
if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
test_bit(GLF_DEFER_DELETE, &ip->i_iopen_gh.gh_gl->gl_flags))
return EVICT_SHOULD_DEFER_DELETE;
@@ -1358,7 +1283,6 @@ static enum evict_behavior evict_should_delete(struct inode *inode,
if (inode->i_nlink)
return EVICT_SHOULD_SKIP_DELETE;
-should_delete:
if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
return gfs2_upgrade_iopen_glock(inode);
@@ -1382,7 +1306,7 @@ static int evict_unlinked_inode(struct inode *inode)
}
if (ip->i_eattr) {
- ret = gfs2_ea_dealloc(ip);
+ ret = gfs2_ea_dealloc(ip, true);
if (ret)
goto out;
}
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index ecc699f8d9fc..748125653d6c 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -174,10 +174,10 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
switch (n) {
case 0:
- error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE);
+ error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE, NULL);
break;
case 1:
- error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE);
+ error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE, NULL);
break;
default:
return -EINVAL;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index f8ae2c666fd6..075f7e9abe47 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -226,6 +226,27 @@ out:
unlock_buffer(bh);
}
+void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio,
+ size_t from, size_t len)
+{
+ struct buffer_head *head = folio_buffers(folio);
+ unsigned int bsize = head->b_size;
+ struct buffer_head *bh;
+ size_t to = from + len;
+ size_t start, end;
+
+ for (bh = head, start = 0; bh != head || !start;
+ bh = bh->b_this_page, start = end) {
+ end = start + bsize;
+ if (end <= from)
+ continue;
+ if (start >= to)
+ break;
+ set_buffer_uptodate(bh);
+ gfs2_trans_add_data(gl, bh);
+ }
+}
+
void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
{
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index f8ce5302280d..790c55f59e61 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -42,6 +42,8 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
void gfs2_trans_end(struct gfs2_sbd *sdp);
void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
+void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio,
+ size_t from, size_t len);
void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 13be8d1d228b..d5a1e63fa257 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -73,7 +73,7 @@ int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
"mount.\n");
goto out_unlock;
}
- error = gfs2_find_jhead(jd, &head, false);
+ error = gfs2_find_jhead(jd, &head);
if (error) {
if (verbose)
fs_err(sdp, "Error parsing journal for spectator "
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 17ae5070a90e..df9c93de94c7 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1383,7 +1383,7 @@ out:
return error;
}
-static int ea_dealloc_block(struct gfs2_inode *ip)
+static int ea_dealloc_block(struct gfs2_inode *ip, bool initialized)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd;
@@ -1416,7 +1416,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
ip->i_eattr = 0;
gfs2_add_inode_blocks(&ip->i_inode, -1);
- if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
+ if (initialized) {
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
gfs2_trans_add_meta(ip->i_gl, dibh);
@@ -1435,11 +1435,12 @@ out_gunlock:
/**
* gfs2_ea_dealloc - deallocate the extended attribute fork
* @ip: the inode
+ * @initialized: xattrs have been initialized
*
* Returns: errno
*/
-int gfs2_ea_dealloc(struct gfs2_inode *ip)
+int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized)
{
int error;
@@ -1451,7 +1452,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
if (error)
return error;
- if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
+ if (initialized) {
error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
if (error)
goto out_quota;
@@ -1463,7 +1464,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
}
}
- error = ea_dealloc_block(ip);
+ error = ea_dealloc_block(ip, initialized);
out_quota:
gfs2_quota_unhold(ip);
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index eb12eb7e37c1..3c9788e0e137 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -54,7 +54,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name,
const void *value, size_t size,
int flags, int type);
ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
-int gfs2_ea_dealloc(struct gfs2_inode *ip);
+int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized);
/* Exported to acl.c */
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 74801911bc1c..30cf4fe78b3d 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -48,47 +48,19 @@ struct hfsplus_wd {
int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
void *buf, void **data, blk_opf_t opf)
{
- const enum req_op op = opf & REQ_OP_MASK;
- struct bio *bio;
- int ret = 0;
- u64 io_size;
- loff_t start;
- int offset;
+ u64 io_size = hfsplus_min_io_size(sb);
+ loff_t start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT;
+ int offset = start & (io_size - 1);
+
+ if ((opf & REQ_OP_MASK) != REQ_OP_WRITE && data)
+ *data = (u8 *)buf + offset;
/*
- * Align sector to hardware sector size and find offset. We
- * assume that io_size is a power of two, which _should_
- * be true.
+ * Align sector to hardware sector size and find offset. We assume that
+ * io_size is a power of two, which _should_ be true.
*/
- io_size = hfsplus_min_io_size(sb);
- start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT;
- offset = start & (io_size - 1);
sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
-
- bio = bio_alloc(sb->s_bdev, 1, opf, GFP_NOIO);
- bio->bi_iter.bi_sector = sector;
-
- if (op != REQ_OP_WRITE && data)
- *data = (u8 *)buf + offset;
-
- while (io_size > 0) {
- unsigned int page_offset = offset_in_page(buf);
- unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset,
- io_size);
-
- ret = bio_add_page(bio, virt_to_page(buf), len, page_offset);
- if (ret != len) {
- ret = -EIO;
- goto out;
- }
- io_size -= len;
- buf = (u8 *)buf + len;
- }
-
- ret = submit_bio_wait(bio);
-out:
- bio_put(bio);
- return ret < 0 ? ret : 0;
+ return bdev_rw_virt(sb->s_bdev, sector, buf, io_size, opf);
}
static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
diff --git a/fs/internal.h b/fs/internal.h
index b9b3e29a73fd..393f6c5c24f6 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -66,6 +66,7 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
int vfs_tmpfile(struct mnt_idmap *idmap,
const struct path *parentpath,
struct file *file, umode_t mode);
+struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
/*
* namespace.c
@@ -343,3 +344,9 @@ static inline bool path_mounted(const struct path *path)
void file_f_owner_release(struct file *file);
bool file_seek_cur_needs_f_lock(struct file *file);
int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
+struct dentry *find_next_child(struct dentry *parent, struct dentry *prev);
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags);
+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index c91fd2b46a77..69107a245b4c 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -396,8 +396,8 @@ static int ioctl_fsfreeze(struct file *filp)
/* Freeze */
if (sb->s_op->freeze_super)
- return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
- return freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+ return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
+ return freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
}
static int ioctl_fsthaw(struct file *filp)
@@ -409,8 +409,8 @@ static int ioctl_fsthaw(struct file *filp)
/* Thaw */
if (sb->s_op->thaw_super)
- return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
- return thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+ return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
+ return thaw_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
}
static int ioctl_file_dedupe_range(struct file *file,
@@ -821,7 +821,8 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
return ioctl_fioasync(fd, filp, argp);
case FIOQSIZE:
- if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
+ if (S_ISDIR(inode->i_mode) ||
+ (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) ||
S_ISLNK(inode->i_mode)) {
loff_t res = inode_get_bytes(inode);
return copy_to_user(argp, &res, sizeof(res)) ?
@@ -856,7 +857,7 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
return ioctl_file_dedupe_range(filp, argp);
case FIONREAD:
- if (!S_ISREG(inode->i_mode))
+ if (!S_ISREG(inode->i_mode) || IS_ANON_FILE(inode))
return vfs_ioctl(filp, cmd, arg);
return put_user(i_size_read(inode) - filp->f_pos,
@@ -881,7 +882,7 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
return ioctl_get_fs_sysfs_path(filp, argp);
default:
- if (S_ISREG(inode->i_mode))
+ if (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode))
return file_ioctl(filp, cmd, argp);
break;
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 5b08bd417b28..233abf598f65 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -679,11 +679,12 @@ static int iomap_read_folio_sync(loff_t block_start, struct folio *folio,
return submit_bio_wait(&bio);
}
-static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
- size_t len, struct folio *folio)
+static int __iomap_write_begin(const struct iomap_iter *iter, size_t len,
+ struct folio *folio)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
struct iomap_folio_state *ifs;
+ loff_t pos = iter->pos;
loff_t block_size = i_blocksize(iter->inode);
loff_t block_start = round_down(pos, block_size);
loff_t block_end = round_up(pos + len, block_size);
@@ -741,10 +742,13 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
return 0;
}
-static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos,
- size_t len)
+static struct folio *__iomap_get_folio(struct iomap_iter *iter, size_t len)
{
const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
+ loff_t pos = iter->pos;
+
+ if (!mapping_large_folio_support(iter->inode->i_mapping))
+ len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
if (folio_ops && folio_ops->get_folio)
return folio_ops->get_folio(iter, pos, len);
@@ -752,10 +756,11 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos,
return iomap_get_folio(iter, pos, len);
}
-static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
+static void __iomap_put_folio(struct iomap_iter *iter, size_t ret,
struct folio *folio)
{
const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
+ loff_t pos = iter->pos;
if (folio_ops && folio_ops->put_folio) {
folio_ops->put_folio(iter->inode, pos, ret, folio);
@@ -765,6 +770,22 @@ static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
}
}
+/* trim pos and bytes to within a given folio */
+static loff_t iomap_trim_folio_range(struct iomap_iter *iter,
+ struct folio *folio, size_t *offset, u64 *bytes)
+{
+ loff_t pos = iter->pos;
+ size_t fsize = folio_size(folio);
+
+ WARN_ON_ONCE(pos < folio_pos(folio));
+ WARN_ON_ONCE(pos >= folio_pos(folio) + fsize);
+
+ *offset = offset_in_folio(folio, pos);
+ *bytes = min(*bytes, fsize - *offset);
+
+ return pos;
+}
+
static int iomap_write_begin_inline(const struct iomap_iter *iter,
struct folio *folio)
{
@@ -774,14 +795,22 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter,
return iomap_read_inline_data(iter, folio);
}
-static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
- size_t len, struct folio **foliop)
+/*
+ * Grab and prepare a folio for write based on iter state. Returns the folio,
+ * offset, and length. Callers can optionally pass a max length *plen,
+ * otherwise init to zero.
+ */
+static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop,
+ size_t *poffset, u64 *plen)
{
const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
+ loff_t pos = iter->pos;
+ u64 len = min_t(u64, SIZE_MAX, iomap_length(iter));
struct folio *folio;
int status = 0;
+ len = min_not_zero(len, *plen);
BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
if (srcmap != &iter->iomap)
BUG_ON(pos + len > srcmap->offset + srcmap->length);
@@ -789,10 +818,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
if (fatal_signal_pending(current))
return -EINTR;
- if (!mapping_large_folio_support(iter->inode->i_mapping))
- len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
-
- folio = __iomap_get_folio(iter, pos, len);
+ folio = __iomap_get_folio(iter, len);
if (IS_ERR(folio))
return PTR_ERR(folio);
@@ -816,24 +842,24 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
}
}
- if (pos + len > folio_pos(folio) + folio_size(folio))
- len = folio_pos(folio) + folio_size(folio) - pos;
+ pos = iomap_trim_folio_range(iter, folio, poffset, &len);
if (srcmap->type == IOMAP_INLINE)
status = iomap_write_begin_inline(iter, folio);
else if (srcmap->flags & IOMAP_F_BUFFER_HEAD)
status = __block_write_begin_int(folio, pos, len, NULL, srcmap);
else
- status = __iomap_write_begin(iter, pos, len, folio);
+ status = __iomap_write_begin(iter, len, folio);
if (unlikely(status))
goto out_unlock;
*foliop = folio;
+ *plen = len;
return 0;
out_unlock:
- __iomap_put_folio(iter, pos, 0, folio);
+ __iomap_put_folio(iter, 0, folio);
return status;
}
@@ -883,10 +909,11 @@ static void iomap_write_end_inline(const struct iomap_iter *iter,
* Returns true if all copied bytes have been written to the pagecache,
* otherwise return false.
*/
-static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
- size_t copied, struct folio *folio)
+static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied,
+ struct folio *folio)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
+ loff_t pos = iter->pos;
if (srcmap->type == IOMAP_INLINE) {
iomap_write_end_inline(iter, folio, pos, copied);
@@ -917,14 +944,14 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
struct folio *folio;
loff_t old_size;
size_t offset; /* Offset into folio */
- size_t bytes; /* Bytes to write to folio */
+ u64 bytes; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
u64 written; /* Bytes have been written */
- loff_t pos = iter->pos;
+ loff_t pos;
bytes = iov_iter_count(i);
retry:
- offset = pos & (chunk - 1);
+ offset = iter->pos & (chunk - 1);
bytes = min(chunk - offset, bytes);
status = balance_dirty_pages_ratelimited_flags(mapping,
bdp_flags);
@@ -949,23 +976,21 @@ retry:
break;
}
- status = iomap_write_begin(iter, pos, bytes, &folio);
+ status = iomap_write_begin(iter, &folio, &offset, &bytes);
if (unlikely(status)) {
- iomap_write_failed(iter->inode, pos, bytes);
+ iomap_write_failed(iter->inode, iter->pos, bytes);
break;
}
if (iter->iomap.flags & IOMAP_F_STALE)
break;
- offset = offset_in_folio(folio, pos);
- if (bytes > folio_size(folio) - offset)
- bytes = folio_size(folio) - offset;
+ pos = iter->pos;
if (mapping_writably_mapped(mapping))
flush_dcache_folio(folio);
copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
- written = iomap_write_end(iter, pos, bytes, copied, folio) ?
+ written = iomap_write_end(iter, bytes, copied, folio) ?
copied : 0;
/*
@@ -980,7 +1005,7 @@ retry:
i_size_write(iter->inode, pos + written);
iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
}
- __iomap_put_folio(iter, pos, written, folio);
+ __iomap_put_folio(iter, written, folio);
if (old_size < pos)
pagecache_isize_extended(iter->inode, old_size, pos);
@@ -1276,22 +1301,17 @@ static int iomap_unshare_iter(struct iomap_iter *iter)
do {
struct folio *folio;
size_t offset;
- loff_t pos = iter->pos;
bool ret;
bytes = min_t(u64, SIZE_MAX, bytes);
- status = iomap_write_begin(iter, pos, bytes, &folio);
+ status = iomap_write_begin(iter, &folio, &offset, &bytes);
if (unlikely(status))
return status;
if (iomap->flags & IOMAP_F_STALE)
break;
- offset = offset_in_folio(folio, pos);
- if (bytes > folio_size(folio) - offset)
- bytes = folio_size(folio) - offset;
-
- ret = iomap_write_end(iter, pos, bytes, bytes, folio);
- __iomap_put_folio(iter, pos, bytes, folio);
+ ret = iomap_write_end(iter, bytes, bytes, folio);
+ __iomap_put_folio(iter, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;
@@ -1351,11 +1371,10 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
do {
struct folio *folio;
size_t offset;
- loff_t pos = iter->pos;
bool ret;
bytes = min_t(u64, SIZE_MAX, bytes);
- status = iomap_write_begin(iter, pos, bytes, &folio);
+ status = iomap_write_begin(iter, &folio, &offset, &bytes);
if (status)
return status;
if (iter->iomap.flags & IOMAP_F_STALE)
@@ -1363,15 +1382,12 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
/* warn about zeroing folios beyond eof that won't write back */
WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size);
- offset = offset_in_folio(folio, pos);
- if (bytes > folio_size(folio) - offset)
- bytes = folio_size(folio) - offset;
folio_zero_range(folio, offset, bytes);
folio_mark_accessed(folio);
- ret = iomap_write_end(iter, pos, bytes, bytes, folio);
- __iomap_put_folio(iter, pos, bytes, folio);
+ ret = iomap_write_end(iter, bytes, bytes, folio);
+ __iomap_put_folio(iter, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index 9eab2c8ac3c5..455cc6f90be0 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -99,7 +99,11 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
{ IOMAP_FAULT, "FAULT" }, \
{ IOMAP_DIRECT, "DIRECT" }, \
{ IOMAP_NOWAIT, "NOWAIT" }, \
- { IOMAP_ATOMIC, "ATOMIC" }
+ { IOMAP_OVERWRITE_ONLY, "OVERWRITE_ONLY" }, \
+ { IOMAP_UNSHARE, "UNSHARE" }, \
+ { IOMAP_DAX, "DAX" }, \
+ { IOMAP_ATOMIC, "ATOMIC" }, \
+ { IOMAP_DONTCACHE, "DONTCACHE" }
#define IOMAP_F_FLAGS_STRINGS \
{ IOMAP_F_NEW, "NEW" }, \
@@ -107,7 +111,14 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
{ IOMAP_F_SHARED, "SHARED" }, \
{ IOMAP_F_MERGED, "MERGED" }, \
{ IOMAP_F_BUFFER_HEAD, "BH" }, \
- { IOMAP_F_SIZE_CHANGED, "SIZE_CHANGED" }
+ { IOMAP_F_XATTR, "XATTR" }, \
+ { IOMAP_F_BOUNDARY, "BOUNDARY" }, \
+ { IOMAP_F_ANON_WRITE, "ANON_WRITE" }, \
+ { IOMAP_F_ATOMIC_BIO, "ATOMIC_BIO" }, \
+ { IOMAP_F_PRIVATE, "PRIVATE" }, \
+ { IOMAP_F_SIZE_CHANGED, "SIZE_CHANGED" }, \
+ { IOMAP_F_STALE, "STALE" }
+
#define IOMAP_DIO_STRINGS \
{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
@@ -138,7 +149,7 @@ DECLARE_EVENT_CLASS(iomap_class,
__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
),
TP_printk("dev %d:%d ino 0x%llx bdev %d:%d addr 0x%llx offset 0x%llx "
- "length 0x%llx type %s flags %s",
+ "length 0x%llx type %s (0x%x) flags %s (0x%x)",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
MAJOR(__entry->bdev), MINOR(__entry->bdev),
@@ -146,7 +157,9 @@ DECLARE_EVENT_CLASS(iomap_class,
__entry->offset,
__entry->length,
__print_symbolic(__entry->type, IOMAP_TYPE_STRINGS),
- __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS))
+ __entry->type,
+ __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS),
+ __entry->flags)
)
#define DEFINE_IOMAP_EVENT(name) \
@@ -185,7 +198,7 @@ TRACE_EVENT(iomap_writepage_map,
__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
),
TP_printk("dev %d:%d ino 0x%llx bdev %d:%d pos 0x%llx dirty len 0x%llx "
- "addr 0x%llx offset 0x%llx length 0x%llx type %s flags %s",
+ "addr 0x%llx offset 0x%llx length 0x%llx type %s (0x%x) flags %s (0x%x)",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
MAJOR(__entry->bdev), MINOR(__entry->bdev),
@@ -195,7 +208,9 @@ TRACE_EVENT(iomap_writepage_map,
__entry->offset,
__entry->length,
__print_symbolic(__entry->type, IOMAP_TYPE_STRINGS),
- __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS))
+ __entry->type,
+ __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS),
+ __entry->flags)
);
TRACE_EVENT(iomap_iter,
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 5124e196c2bf..c1719b5778a1 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -62,6 +62,21 @@ const struct super_operations kernfs_sops = {
.show_options = kernfs_sop_show_options,
.show_path = kernfs_sop_show_path,
+
+ /*
+ * sysfs is built on top of kernfs and sysfs provides the power
+ * management infrastructure to support suspend/hibernate by
+ * writing to various files in /sys/power/. As filesystems may
+ * be automatically frozen during suspend/hibernate implementing
+ * freeze/thaw support for kernfs generically will cause
+ * deadlocks as the suspending/hibernation initiating task will
+ * hold a VFS lock that it will then wait upon to be released.
+ * If freeze/thaw for kernfs is needed talk to the VFS.
+ */
+ .freeze_fs = NULL,
+ .unfreeze_fs = NULL,
+ .freeze_super = NULL,
+ .thaw_super = NULL,
};
static int kernfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
@@ -255,7 +270,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
dput(dentry);
return ERR_PTR(-ENOMEM);
}
- dtmp = lookup_positive_unlocked(name, dentry, strlen(name));
+ dtmp = lookup_noperm_positive_unlocked(&QSTR(name), dentry);
dput(dentry);
kfree(name);
if (IS_ERR(dtmp))
diff --git a/fs/libfs.c b/fs/libfs.c
index 6393d7c49ee6..9ea0ecc325a8 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -583,7 +583,7 @@ const struct file_operations simple_offset_dir_operations = {
.fsync = noop_fsync,
};
-static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
+struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
{
struct dentry *child = NULL, *d;
@@ -603,6 +603,7 @@ static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev
dput(prev);
return child;
}
+EXPORT_SYMBOL(find_next_child);
void simple_recursive_removal(struct dentry *dentry,
void (*callback)(struct dentry *))
@@ -1647,10 +1648,16 @@ struct inode *alloc_anon_inode(struct super_block *s)
* that it already _is_ on the dirty list.
*/
inode->i_state = I_DIRTY;
- inode->i_mode = S_IRUSR | S_IWUSR;
+ /*
+ * Historically anonymous inodes didn't have a type at all and
+ * userspace has come to rely on this. Internally they're just
+ * regular files but S_IFREG is masked off when reporting
+ * information to userspace.
+ */
+ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_flags |= S_PRIVATE;
+ inode->i_flags |= S_PRIVATE | S_ANON_INODE;
simple_inode_init_ts(inode);
return inode;
}
diff --git a/fs/mpage.c b/fs/mpage.c
index ad7844de87c3..c5fd821fd30e 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -445,10 +445,9 @@ static void clean_buffers(struct folio *folio, unsigned first_unmapped)
try_to_free_buffers(folio);
}
-static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
- void *data)
+static int mpage_write_folio(struct writeback_control *wbc, struct folio *folio,
+ struct mpage_data *mpd)
{
- struct mpage_data *mpd = data;
struct bio *bio = mpd->bio;
struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
@@ -656,14 +655,16 @@ mpage_writepages(struct address_space *mapping,
struct mpage_data mpd = {
.get_block = get_block,
};
+ struct folio *folio = NULL;
struct blk_plug plug;
- int ret;
+ int error;
blk_start_plug(&plug);
- ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
+ while ((folio = writeback_iter(mapping, wbc, folio, &error)))
+ error = mpage_write_folio(wbc, folio, &mpd);
if (mpd.bio)
mpage_bio_submit_write(mpd.bio);
blk_finish_plug(&plug);
- return ret;
+ return error;
}
EXPORT_SYMBOL(mpage_writepages);
diff --git a/fs/namei.c b/fs/namei.c
index 84a0e0b0111c..4bb889fc980b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -571,14 +571,14 @@ int inode_permission(struct mnt_idmap *idmap,
int retval;
retval = sb_permission(inode->i_sb, inode, mask);
- if (retval)
+ if (unlikely(retval))
return retval;
if (unlikely(mask & MAY_WRITE)) {
/*
* Nobody gets write access to an immutable file.
*/
- if (IS_IMMUTABLE(inode))
+ if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
/*
@@ -586,16 +586,16 @@ int inode_permission(struct mnt_idmap *idmap,
* written back improperly if their true value is unknown
* to the vfs.
*/
- if (HAS_UNMAPPED_ID(idmap, inode))
+ if (unlikely(HAS_UNMAPPED_ID(idmap, inode)))
return -EACCES;
}
retval = do_inode_permission(idmap, inode, mask);
- if (retval)
+ if (unlikely(retval))
return retval;
retval = devcgroup_inode_permission(inode, mask);
- if (retval)
+ if (unlikely(retval))
return retval;
return security_inode_permission(inode, mask);
@@ -1915,13 +1915,13 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
return ERR_PTR(-ELOOP);
- if (!(nd->flags & LOOKUP_RCU)) {
+ if (unlikely(atime_needs_update(&last->link, inode))) {
+ if (nd->flags & LOOKUP_RCU) {
+ if (!try_to_unlazy(nd))
+ return ERR_PTR(-ECHILD);
+ }
touch_atime(&last->link);
cond_resched();
- } else if (atime_needs_update(&last->link, inode)) {
- if (!try_to_unlazy(nd))
- return ERR_PTR(-ECHILD);
- touch_atime(&last->link);
}
error = security_inode_follow_link(link->dentry, inode,
@@ -2434,9 +2434,12 @@ static int link_path_walk(const char *name, struct nameidata *nd)
nd->flags |= LOOKUP_PARENT;
if (IS_ERR(name))
return PTR_ERR(name);
- while (*name=='/')
- name++;
- if (!*name) {
+ if (*name == '/') {
+ do {
+ name++;
+ } while (unlikely(*name == '/'));
+ }
+ if (unlikely(!*name)) {
nd->dir_mode = 0; // short-circuit the 'hardening' idiocy
return 0;
}
@@ -2449,7 +2452,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
idmap = mnt_idmap(nd->path.mnt);
err = may_lookup(idmap, nd);
- if (err)
+ if (unlikely(err))
return err;
nd->last.name = name;
@@ -2869,13 +2872,12 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(vfs_path_lookup);
-static int lookup_one_common(struct mnt_idmap *idmap,
- const char *name, struct dentry *base, int len,
- struct qstr *this)
+static int lookup_noperm_common(struct qstr *qname, struct dentry *base)
{
- this->name = name;
- this->len = len;
- this->hash = full_name_hash(base, name, len);
+ const char *name = qname->name;
+ u32 len = qname->len;
+
+ qname->hash = full_name_hash(base, name, len);
if (!len)
return -EACCES;
@@ -2892,139 +2894,135 @@ static int lookup_one_common(struct mnt_idmap *idmap,
* to use its own hash..
*/
if (base->d_flags & DCACHE_OP_HASH) {
- int err = base->d_op->d_hash(base, this);
+ int err = base->d_op->d_hash(base, qname);
if (err < 0)
return err;
}
+ return 0;
+}
+static int lookup_one_common(struct mnt_idmap *idmap,
+ struct qstr *qname, struct dentry *base)
+{
+ int err;
+ err = lookup_noperm_common(qname, base);
+ if (err < 0)
+ return err;
return inode_permission(idmap, base->d_inode, MAY_EXEC);
}
/**
- * try_lookup_one_len - filesystem helper to lookup single pathname component
- * @name: pathname component to lookup
+ * try_lookup_noperm - filesystem helper to lookup single pathname component
+ * @name: qstr storing pathname component to lookup
* @base: base directory to lookup from
- * @len: maximum length @len should be interpreted to
*
* Look up a dentry by name in the dcache, returning NULL if it does not
* currently exist. The function does not try to create a dentry.
*
* Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code.
+ * not be called by generic code. It does no permission checking.
*
* No locks need be held - only a counted reference to @base is needed.
*
*/
-struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len)
+struct dentry *try_lookup_noperm(struct qstr *name, struct dentry *base)
{
- struct qstr this;
int err;
- err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
+ err = lookup_noperm_common(name, base);
if (err)
return ERR_PTR(err);
- return lookup_dcache(&this, base, 0);
+ return lookup_dcache(name, base, 0);
}
-EXPORT_SYMBOL(try_lookup_one_len);
+EXPORT_SYMBOL(try_lookup_noperm);
/**
- * lookup_one_len - filesystem helper to lookup single pathname component
- * @name: pathname component to lookup
+ * lookup_noperm - filesystem helper to lookup single pathname component
+ * @name: qstr storing pathname component to lookup
* @base: base directory to lookup from
- * @len: maximum length @len should be interpreted to
*
* Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code.
+ * not be called by generic code. It does no permission checking.
*
* The caller must hold base->i_mutex.
*/
-struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+struct dentry *lookup_noperm(struct qstr *name, struct dentry *base)
{
struct dentry *dentry;
- struct qstr this;
int err;
WARN_ON_ONCE(!inode_is_locked(base->d_inode));
- err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
+ err = lookup_noperm_common(name, base);
if (err)
return ERR_PTR(err);
- dentry = lookup_dcache(&this, base, 0);
- return dentry ? dentry : __lookup_slow(&this, base, 0);
+ dentry = lookup_dcache(name, base, 0);
+ return dentry ? dentry : __lookup_slow(name, base, 0);
}
-EXPORT_SYMBOL(lookup_one_len);
+EXPORT_SYMBOL(lookup_noperm);
/**
- * lookup_one - filesystem helper to lookup single pathname component
+ * lookup_one - lookup single pathname component
* @idmap: idmap of the mount the lookup is performed from
- * @name: pathname component to lookup
+ * @name: qstr holding pathname component to lookup
* @base: base directory to lookup from
- * @len: maximum length @len should be interpreted to
*
- * Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code.
+ * This can be used for in-kernel filesystem clients such as file servers.
*
* The caller must hold base->i_mutex.
*/
-struct dentry *lookup_one(struct mnt_idmap *idmap, const char *name,
- struct dentry *base, int len)
+struct dentry *lookup_one(struct mnt_idmap *idmap, struct qstr *name,
+ struct dentry *base)
{
struct dentry *dentry;
- struct qstr this;
int err;
WARN_ON_ONCE(!inode_is_locked(base->d_inode));
- err = lookup_one_common(idmap, name, base, len, &this);
+ err = lookup_one_common(idmap, name, base);
if (err)
return ERR_PTR(err);
- dentry = lookup_dcache(&this, base, 0);
- return dentry ? dentry : __lookup_slow(&this, base, 0);
+ dentry = lookup_dcache(name, base, 0);
+ return dentry ? dentry : __lookup_slow(name, base, 0);
}
EXPORT_SYMBOL(lookup_one);
/**
- * lookup_one_unlocked - filesystem helper to lookup single pathname component
+ * lookup_one_unlocked - lookup single pathname component
* @idmap: idmap of the mount the lookup is performed from
- * @name: pathname component to lookup
+ * @name: qstr olding pathname component to lookup
* @base: base directory to lookup from
- * @len: maximum length @len should be interpreted to
*
- * Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code.
+ * This can be used for in-kernel filesystem clients such as file servers.
*
- * Unlike lookup_one_len, it should be called without the parent
- * i_mutex held, and will take the i_mutex itself if necessary.
+ * Unlike lookup_one, it should be called without the parent
+ * i_rwsem held, and will take the i_rwsem itself if necessary.
*/
-struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
- const char *name, struct dentry *base,
- int len)
+struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, struct qstr *name,
+ struct dentry *base)
{
- struct qstr this;
int err;
struct dentry *ret;
- err = lookup_one_common(idmap, name, base, len, &this);
+ err = lookup_one_common(idmap, name, base);
if (err)
return ERR_PTR(err);
- ret = lookup_dcache(&this, base, 0);
+ ret = lookup_dcache(name, base, 0);
if (!ret)
- ret = lookup_slow(&this, base, 0);
+ ret = lookup_slow(name, base, 0);
return ret;
}
EXPORT_SYMBOL(lookup_one_unlocked);
/**
- * lookup_one_positive_unlocked - filesystem helper to lookup single
- * pathname component
+ * lookup_one_positive_unlocked - lookup single pathname component
* @idmap: idmap of the mount the lookup is performed from
- * @name: pathname component to lookup
+ * @name: qstr holding pathname component to lookup
* @base: base directory to lookup from
- * @len: maximum length @len should be interpreted to
*
* This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns
* known positive or ERR_PTR(). This is what most of the users want.
@@ -3033,16 +3031,15 @@ EXPORT_SYMBOL(lookup_one_unlocked);
* time, so callers of lookup_one_unlocked() need to be very careful; pinned
* positives have >d_inode stable, so this one avoids such problems.
*
- * Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code.
+ * This can be used for in-kernel filesystem clients such as file servers.
*
- * The helper should be called without i_mutex held.
+ * The helper should be called without i_rwsem held.
*/
struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
- const char *name,
- struct dentry *base, int len)
+ struct qstr *name,
+ struct dentry *base)
{
- struct dentry *ret = lookup_one_unlocked(idmap, name, base, len);
+ struct dentry *ret = lookup_one_unlocked(idmap, name, base);
if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
dput(ret);
@@ -3053,38 +3050,48 @@ struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
EXPORT_SYMBOL(lookup_one_positive_unlocked);
/**
- * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
+ * lookup_noperm_unlocked - filesystem helper to lookup single pathname component
* @name: pathname component to lookup
* @base: base directory to lookup from
- * @len: maximum length @len should be interpreted to
*
* Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code.
+ * not be called by generic code. It does no permission checking.
*
- * Unlike lookup_one_len, it should be called without the parent
- * i_mutex held, and will take the i_mutex itself if necessary.
+ * Unlike lookup_noperm, it should be called without the parent
+ * i_rwsem held, and will take the i_rwsem itself if necessary.
*/
-struct dentry *lookup_one_len_unlocked(const char *name,
- struct dentry *base, int len)
+struct dentry *lookup_noperm_unlocked(struct qstr *name, struct dentry *base)
{
- return lookup_one_unlocked(&nop_mnt_idmap, name, base, len);
+ struct dentry *ret;
+
+ ret = try_lookup_noperm(name, base);
+ if (!ret)
+ ret = lookup_slow(name, base, 0);
+ return ret;
}
-EXPORT_SYMBOL(lookup_one_len_unlocked);
+EXPORT_SYMBOL(lookup_noperm_unlocked);
/*
- * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
+ * Like lookup_noperm_unlocked(), except that it yields ERR_PTR(-ENOENT)
* on negatives. Returns known positive or ERR_PTR(); that's what
* most of the users want. Note that pinned negative with unlocked parent
- * _can_ become positive at any time, so callers of lookup_one_len_unlocked()
+ * _can_ become positive at any time, so callers of lookup_noperm_unlocked()
* need to be very careful; pinned positives have ->d_inode stable, so
* this one avoids such problems.
*/
-struct dentry *lookup_positive_unlocked(const char *name,
- struct dentry *base, int len)
+struct dentry *lookup_noperm_positive_unlocked(struct qstr *name,
+ struct dentry *base)
{
- return lookup_one_positive_unlocked(&nop_mnt_idmap, name, base, len);
+ struct dentry *ret;
+
+ ret = lookup_noperm_unlocked(name, base);
+ if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
+ dput(ret);
+ ret = ERR_PTR(-ENOENT);
+ }
+ return ret;
}
-EXPORT_SYMBOL(lookup_positive_unlocked);
+EXPORT_SYMBOL(lookup_noperm_positive_unlocked);
#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
@@ -5403,25 +5410,25 @@ EXPORT_SYMBOL(vfs_get_link);
static char *__page_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
- struct page *page;
+ struct folio *folio;
struct address_space *mapping = inode->i_mapping;
if (!dentry) {
- page = find_get_page(mapping, 0);
- if (!page)
+ folio = filemap_get_folio(mapping, 0);
+ if (IS_ERR(folio))
return ERR_PTR(-ECHILD);
- if (!PageUptodate(page)) {
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ folio_put(folio);
return ERR_PTR(-ECHILD);
}
} else {
- page = read_mapping_page(mapping, 0, NULL);
- if (IS_ERR(page))
- return (char*)page;
+ folio = read_mapping_folio(mapping, 0, NULL);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
}
- set_delayed_call(callback, page_put_link, page);
+ set_delayed_call(callback, page_put_link, folio);
BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
- return page_address(page);
+ return folio_address(folio);
}
const char *page_get_link_raw(struct dentry *dentry, struct inode *inode,
@@ -5431,6 +5438,17 @@ const char *page_get_link_raw(struct dentry *dentry, struct inode *inode,
}
EXPORT_SYMBOL_GPL(page_get_link_raw);
+/**
+ * page_get_link() - An implementation of the get_link inode_operation.
+ * @dentry: The directory entry which is the symlink.
+ * @inode: The inode for the symlink.
+ * @callback: Used to drop the reference to the symlink.
+ *
+ * Filesystems which store their symlinks in the page cache should use
+ * this to implement the get_link() member of their inode_operations.
+ *
+ * Return: A pointer to the NUL-terminated symlink.
+ */
const char *page_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
@@ -5440,12 +5458,25 @@ const char *page_get_link(struct dentry *dentry, struct inode *inode,
nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
return kaddr;
}
-
EXPORT_SYMBOL(page_get_link);
+/**
+ * page_put_link() - Drop the reference to the symlink.
+ * @arg: The folio which contains the symlink.
+ *
+ * This is used internally by page_get_link(). It is exported for use
+ * by filesystems which need to implement a variant of page_get_link()
+ * themselves. Despite the apparent symmetry, filesystems which use
+ * page_get_link() do not need to call page_put_link().
+ *
+ * The argument, while it has a void pointer type, must be a pointer to
+ * the folio which was retrieved from the page cache. The delayed_call
+ * infrastructure is used to drop the reference count once the caller
+ * is done with the symlink.
+ */
void page_put_link(void *arg)
{
- put_page(arg);
+ folio_put(arg);
}
EXPORT_SYMBOL(page_put_link);
diff --git a/fs/namespace.c b/fs/namespace.c
index 1b466c54a357..552ad7f4d18b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -355,12 +355,13 @@ static struct mount *alloc_vfsmnt(const char *name)
if (err)
goto out_free_cache;
- if (name) {
+ if (name)
mnt->mnt_devname = kstrdup_const(name,
GFP_KERNEL_ACCOUNT);
- if (!mnt->mnt_devname)
- goto out_free_id;
- }
+ else
+ mnt->mnt_devname = "none";
+ if (!mnt->mnt_devname)
+ goto out_free_id;
#ifdef CONFIG_SMP
mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
@@ -1264,7 +1265,7 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc)
if (!fc->root)
return ERR_PTR(-EINVAL);
- mnt = alloc_vfsmnt(fc->source ?: "none");
+ mnt = alloc_vfsmnt(fc->source);
if (!mnt)
return ERR_PTR(-ENOMEM);
@@ -5491,7 +5492,7 @@ static int statmount_sb_source(struct kstatmount *s, struct seq_file *seq)
seq->buf[seq->count] = '\0';
seq->count = start;
seq_commit(seq, string_unescape_inplace(seq->buf + start, UNESCAPE_OCTAL));
- } else if (r->mnt_devname) {
+ } else {
seq_puts(seq, r->mnt_devname);
}
return 0;
@@ -5804,7 +5805,9 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
STATMOUNT_SB_SOURCE | \
STATMOUNT_OPT_ARRAY | \
STATMOUNT_OPT_SEC_ARRAY | \
- STATMOUNT_SUPPORTED_MASK)
+ STATMOUNT_SUPPORTED_MASK | \
+ STATMOUNT_MNT_UIDMAP | \
+ STATMOUNT_MNT_GIDMAP)
static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
struct mnt_namespace *ns)
@@ -5839,13 +5842,29 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
return err;
s->root = root;
- s->idmap = mnt_idmap(s->mnt);
- if (s->mask & STATMOUNT_SB_BASIC)
- statmount_sb_basic(s);
+ /*
+ * Note that mount properties in mnt->mnt_flags, mnt->mnt_idmap
+ * can change concurrently as we only hold the read-side of the
+ * namespace semaphore and mount properties may change with only
+ * the mount lock held.
+ *
+ * We could sample the mount lock sequence counter to detect
+ * those changes and retry. But it's not worth it. Worst that
+ * happens is that the mnt->mnt_idmap pointer is already changed
+ * while mnt->mnt_flags isn't or vica versa. So what.
+ *
+ * Both mnt->mnt_flags and mnt->mnt_idmap are set and retrieved
+ * via READ_ONCE()/WRITE_ONCE() and guard against theoretical
+ * torn read/write. That's all we care about right now.
+ */
+ s->idmap = mnt_idmap(s->mnt);
if (s->mask & STATMOUNT_MNT_BASIC)
statmount_mnt_basic(s);
+ if (s->mask & STATMOUNT_SB_BASIC)
+ statmount_sb_basic(s);
+
if (s->mask & STATMOUNT_PROPAGATE_FROM)
statmount_propagate_from(s);
@@ -6157,6 +6176,10 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -ENOENT;
+ /*
+ * We only need to guard against mount topology changes as
+ * listmount() doesn't care about any mount properties.
+ */
scoped_guard(rwsem_read, &namespace_sem)
ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids,
nr_mnt_ids, (flags & LISTMOUNT_REVERSE));
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 02c916a55020..6d63b958c4bb 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1105,6 +1105,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
server->namelen = NFS2_MAXNAMLEN;
}
+ /* Linux 'subtree_check' borkenness mandates this setting */
+ server->fh_expire_type = NFS_FH_VOL_RENAME;
if (!(fattr->valid & NFS_ATTR_FATTR)) {
error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
@@ -1200,6 +1202,10 @@ void nfs_clients_init(struct net *net)
#if IS_ENABLED(CONFIG_NFS_V4)
idr_init(&nn->cb_ident_idr);
#endif
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+ INIT_LIST_HEAD(&nn->nfs4_data_server_cache);
+ spin_lock_init(&nn->nfs4_data_server_lock);
+#endif
spin_lock_init(&nn->nfs_client_lock);
nn->boot_time = ktime_get_real();
memset(&nn->rpcstats, 0, sizeof(nn->rpcstats));
@@ -1216,6 +1222,9 @@ void nfs_clients_exit(struct net *net)
nfs_cleanup_cb_ident_idr(net);
WARN_ON_ONCE(!list_empty(&nn->nfs_client_list));
WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list));
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+ WARN_ON_ONCE(!list_empty(&nn->nfs4_data_server_cache));
+#endif
}
#ifdef CONFIG_PROC_FS
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index bd23fc736b39..d0e0b435a843 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2676,6 +2676,18 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
unblock_revalidate(new_dentry);
}
+static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry,
+ struct dentry *new_dentry)
+{
+ struct nfs_server *server = NFS_SB(old_dentry->d_sb);
+
+ if (old_dentry->d_parent != new_dentry->d_parent)
+ return false;
+ if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE)
+ return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN);
+ return true;
+}
+
/*
* RENAME
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -2763,7 +2775,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
}
- if (S_ISREG(old_inode->i_mode))
+ if (S_ISREG(old_inode->i_mode) &&
+ nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry))
nfs_sync_inode(old_inode);
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
must_unblock ? nfs_unblock_rename : NULL);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f32f8d7c9122..48d89716193a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -757,7 +757,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_commit_info cinfo;
- struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct inode *inode = dreq->inode;
int flags = NFS_ODIRECT_DONE;
@@ -786,6 +785,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
spin_unlock(&inode->i_lock);
while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req;
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 4fa304fa5bc4..29d9234d5c08 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -76,6 +76,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct page *scratch;
struct list_head dsaddrs;
struct nfs4_pnfs_ds_addr *da;
+ struct net *net = server->nfs_client->cl_net;
/* set up xdr stream */
scratch = alloc_page(gfp_flags);
@@ -159,8 +160,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
mp_count = be32_to_cpup(p); /* multipath count */
for (j = 0; j < mp_count; j++) {
- da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
- &stream, gfp_flags);
+ da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
@@ -170,7 +170,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_err_free_deviceid;
}
- dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ dsaddr->ds_list[i] = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags);
if (!dsaddr->ds_list[i])
goto out_err_drain_dsaddrs;
trace_fl_getdevinfo(server, &pdev->dev_id, dsaddr->ds_list[i]->ds_remotestr);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 61ad269c825f..e6909cafab68 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1329,7 +1329,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
hdr->args.offset, hdr->args.count,
&hdr->res.op_status, OP_READ,
task->tk_status);
- trace_ff_layout_read_error(hdr);
+ trace_ff_layout_read_error(hdr, task->tk_status);
}
err = ff_layout_async_handle_error(task, hdr->args.context->state,
@@ -1502,7 +1502,7 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
hdr->args.offset, hdr->args.count,
&hdr->res.op_status, OP_WRITE,
task->tk_status);
- trace_ff_layout_write_error(hdr);
+ trace_ff_layout_write_error(hdr, task->tk_status);
}
err = ff_layout_async_handle_error(task, hdr->args.context->state,
@@ -1551,7 +1551,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
data->args.offset, data->args.count,
&data->res.op_status, OP_COMMIT,
task->tk_status);
- trace_ff_layout_commit_error(data);
+ trace_ff_layout_commit_error(data, task->tk_status);
}
err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e58bedfb1dcc..4a304cf17c4b 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -49,6 +49,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct nfs4_pnfs_ds_addr *da;
struct nfs4_ff_layout_ds *new_ds = NULL;
struct nfs4_ff_ds_version *ds_versions = NULL;
+ struct net *net = server->nfs_client->cl_net;
u32 mp_count;
u32 version_count;
__be32 *p;
@@ -80,8 +81,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
for (i = 0; i < mp_count; i++) {
/* multipath ds */
- da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
- &stream, gfp_flags);
+ da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
@@ -149,7 +149,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
new_ds->ds_versions = ds_versions;
new_ds->ds_versions_cnt = version_count;
- new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ new_ds->ds = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags);
if (!new_ds->ds)
goto out_err_drain_dsaddrs;
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 5c21caeae075..4ec952f9f47d 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -278,6 +278,7 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
new = __nfs_local_open_fh(clp, cred, fh, nfl, mode);
if (IS_ERR(new))
return NULL;
+ rcu_read_lock();
/* try to swap in the pointer */
spin_lock(&clp->cl_uuid.lock);
nf = rcu_dereference_protected(*pnf, 1);
@@ -287,7 +288,6 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
rcu_assign_pointer(*pnf, nf);
}
spin_unlock(&clp->cl_uuid.lock);
- rcu_read_lock();
}
nf = nfs_local_file_get(nf);
rcu_read_unlock();
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index a68b21603ea9..6ba3ea39e928 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -31,7 +31,11 @@ struct nfs_net {
unsigned short nfs_callback_tcpport;
unsigned short nfs_callback_tcpport6;
int cb_users[NFS4_MAX_MINOR_VERSION + 1];
-#endif
+#endif /* CONFIG_NFS_V4 */
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+ struct list_head nfs4_data_server_cache;
+ spinlock_t nfs4_data_server_lock;
+#endif /* CONFIG_NFS_V4_1 */
struct nfs_netns_client *nfs_client;
spinlock_t nfs_client_lock;
ktime_t boot_time;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 18d8f6529f61..a126eb31f62f 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -104,7 +104,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu)
switch (status) {
case 0:
- status = nfs_refresh_inode(inode, res.fattr);
+ nfs_refresh_inode(inode, res.fattr);
break;
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 970f28dbf253..b1d2122bd5a7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -671,6 +671,15 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
struct nfs_client *clp = server->nfs_client;
int ret;
+ if ((task->tk_rpc_status == -ENETDOWN ||
+ task->tk_rpc_status == -ENETUNREACH) &&
+ task->tk_flags & RPC_TASK_NETUNREACH_FATAL) {
+ exception->delay = 0;
+ exception->recovering = 0;
+ exception->retry = 0;
+ return -EIO;
+ }
+
ret = nfs4_do_handle_exception(server, errorcode, exception);
if (exception->delay) {
int ret2 = nfs4_exception_should_retrans(server, exception);
@@ -7074,10 +7083,18 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
struct nfs4_unlockdata *p;
struct nfs4_state *state = lsp->ls_state;
struct inode *inode = state->inode;
+ struct nfs_lock_context *l_ctx;
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (p == NULL)
return NULL;
+ l_ctx = nfs_get_lock_context(ctx);
+ if (!IS_ERR(l_ctx)) {
+ p->l_ctx = l_ctx;
+ } else {
+ kfree(p);
+ return NULL;
+ }
p->arg.fh = NFS_FH(inode);
p->arg.fl = &p->fl;
p->arg.seqid = seqid;
@@ -7085,7 +7102,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
p->lsp = lsp;
/* Ensure we don't close file until we're done freeing locks! */
p->ctx = get_nfs_open_context(ctx);
- p->l_ctx = nfs_get_lock_context(ctx);
locks_init_lock(&p->fl);
locks_copy_lock(&p->fl, fl);
p->server = NFS_SERVER(inode);
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index bc67fe6801b1..deab4c0e21a0 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -2051,13 +2051,15 @@ TRACE_EVENT(fl_getdevinfo,
DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_PROTO(
- const struct nfs_pgio_header *hdr
+ const struct nfs_pgio_header *hdr,
+ int error
),
- TP_ARGS(hdr),
+ TP_ARGS(hdr, error),
TP_STRUCT__entry(
__field(unsigned long, error)
+ __field(unsigned long, nfs_error)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
@@ -2073,7 +2075,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_fast_assign(
const struct inode *inode = hdr->inode;
- __entry->error = hdr->res.op_status;
+ __entry->error = -error;
+ __entry->nfs_error = hdr->res.op_status;
__entry->fhandle = nfs_fhandle_hash(hdr->args.fh);
__entry->fileid = NFS_FILEID(inode);
__entry->dev = inode->i_sb->s_dev;
@@ -2088,7 +2091,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
+ "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s "
+ "nfs_error=%lu (%s)",
-__entry->error,
show_nfs4_status(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -2096,28 +2100,32 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
__entry->fhandle,
__entry->offset, __entry->count,
__entry->stateid_seq, __entry->stateid_hash,
- __get_str(dstaddr)
+ __get_str(dstaddr), __entry->nfs_error,
+ show_nfs4_status(__entry->nfs_error)
)
);
#define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \
DEFINE_EVENT(nfs4_flexfiles_io_event, name, \
TP_PROTO( \
- const struct nfs_pgio_header *hdr \
+ const struct nfs_pgio_header *hdr, \
+ int error \
), \
- TP_ARGS(hdr))
+ TP_ARGS(hdr, error))
DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error);
DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error);
TRACE_EVENT(ff_layout_commit_error,
TP_PROTO(
- const struct nfs_commit_data *data
+ const struct nfs_commit_data *data,
+ int error
),
- TP_ARGS(data),
+ TP_ARGS(data, error),
TP_STRUCT__entry(
__field(unsigned long, error)
+ __field(unsigned long, nfs_error)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
@@ -2131,7 +2139,8 @@ TRACE_EVENT(ff_layout_commit_error,
TP_fast_assign(
const struct inode *inode = data->inode;
- __entry->error = data->res.op_status;
+ __entry->error = -error;
+ __entry->nfs_error = data->res.op_status;
__entry->fhandle = nfs_fhandle_hash(data->args.fh);
__entry->fileid = NFS_FILEID(inode);
__entry->dev = inode->i_sb->s_dev;
@@ -2142,14 +2151,15 @@ TRACE_EVENT(ff_layout_commit_error,
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%llu count=%u dstaddr=%s",
+ "offset=%llu count=%u dstaddr=%s nfs_error=%lu (%s)",
-__entry->error,
show_nfs4_status(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
__entry->offset, __entry->count,
- __get_str(dstaddr)
+ __get_str(dstaddr), __entry->nfs_error,
+ show_nfs4_status(__entry->nfs_error)
)
);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5f582713bf05..3adb7d0dbec7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -745,6 +745,14 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
return remaining;
}
+static void pnfs_reset_return_info(struct pnfs_layout_hdr *lo)
+{
+ struct pnfs_layout_segment *lseg;
+
+ list_for_each_entry(lseg, &lo->plh_return_segs, pls_list)
+ pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
+}
+
static void
pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
struct list_head *free_me,
@@ -1246,21 +1254,15 @@ static void pnfs_clear_layoutcommit(struct inode *inode,
static void
pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
- const struct pnfs_layout_range *range)
+ const struct pnfs_layout_range *range,
+ struct list_head *freeme)
{
- const struct pnfs_layout_segment *lseg;
- u32 seq = be32_to_cpu(arg_stateid->seqid);
-
if (pnfs_layout_is_valid(lo) &&
- nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) {
- list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) {
- if (pnfs_seqid_is_newer(lseg->pls_seq, seq) ||
- !pnfs_should_free_range(&lseg->pls_range, range))
- continue;
- pnfs_set_plh_return_info(lo, range->iomode, seq);
- break;
- }
- }
+ nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
+ pnfs_reset_return_info(lo);
+ else
+ pnfs_mark_layout_stateid_invalid(lo, freeme);
+ pnfs_clear_layoutreturn_waitbit(lo);
}
void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
@@ -1268,11 +1270,12 @@ void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
const struct pnfs_layout_range *range)
{
struct inode *inode = lo->plh_inode;
+ LIST_HEAD(freeme);
spin_lock(&inode->i_lock);
- pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range);
- pnfs_clear_layoutreturn_waitbit(lo);
+ pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range, &freeme);
spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&freeme);
}
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
@@ -1292,6 +1295,7 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
pnfs_free_returned_lsegs(lo, &freeme, range, seq);
pnfs_set_layout_stateid(lo, stateid, NULL, true);
+ pnfs_reset_return_info(lo);
} else
pnfs_mark_layout_stateid_invalid(lo, &freeme);
out_unlock:
@@ -1661,6 +1665,18 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
/* Was there an RPC level error? If not, retry */
if (task->tk_rpc_status == 0)
break;
+ /*
+ * Is there a fatal network level error?
+ * If so release the layout, but flag the error.
+ */
+ if ((task->tk_rpc_status == -ENETDOWN ||
+ task->tk_rpc_status == -ENETUNREACH) &&
+ task->tk_flags & RPC_TASK_NETUNREACH_FATAL) {
+ *ret = 0;
+ (*respp)->lrs_present = 0;
+ retval = -EIO;
+ break;
+ }
/* If the call was not sent, let caller handle it */
if (!RPC_WAS_SENT(task))
return 0;
@@ -1695,6 +1711,7 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
struct inode *inode = args->inode;
const nfs4_stateid *res_stateid = NULL;
struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
+ LIST_HEAD(freeme);
switch (ret) {
case -NFS4ERR_BADSESSION:
@@ -1703,9 +1720,9 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
case -NFS4ERR_NOMATCHING_LAYOUT:
spin_lock(&inode->i_lock);
pnfs_layoutreturn_retry_later_locked(lo, &args->stateid,
- &args->range);
- pnfs_clear_layoutreturn_waitbit(lo);
+ &args->range, &freeme);
spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&freeme);
break;
case 0:
if (res->lrs_present)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 30d2613e912b..91ff877185c8 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -60,6 +60,7 @@ struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
char *ds_remotestr; /* comma sep list of addrs */
struct list_head ds_addrs;
+ const struct net *ds_net;
struct nfs_client *ds_clp;
refcount_t ds_count;
unsigned long ds_state;
@@ -415,7 +416,8 @@ int pnfs_generic_commit_pagelist(struct inode *inode,
int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max);
void pnfs_generic_write_commit_done(struct rpc_task *task, void *data);
void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
-struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
+struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(const struct net *net,
+ struct list_head *dsaddrs,
gfp_t gfp_flags);
void nfs4_pnfs_v3_ds_connect_unload(void);
int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index dbef837e871a..91ef486f40b9 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -16,6 +16,7 @@
#include "nfs4session.h"
#include "internal.h"
#include "pnfs.h"
+#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
@@ -504,14 +505,14 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist);
/*
* Data server cache
*
- * Data servers can be mapped to different device ids.
- * nfs4_pnfs_ds reference counting
+ * Data servers can be mapped to different device ids, but should
+ * never be shared between net namespaces.
+ *
+ * nfs4_pnfs_ds reference counting:
* - set to 1 on allocation
* - incremented when a device id maps a data server already in the cache.
* - decremented when deviceid is removed from the cache.
*/
-static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
-static LIST_HEAD(nfs4_data_server_cache);
/* Debug routines */
static void
@@ -604,11 +605,11 @@ _same_data_server_addrs_locked(const struct list_head *dsaddrs1,
* Lookup DS by addresses. nfs4_ds_cache_lock is held
*/
static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(const struct list_head *dsaddrs)
+_data_server_lookup_locked(const struct nfs_net *nn, const struct list_head *dsaddrs)
{
struct nfs4_pnfs_ds *ds;
- list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
+ list_for_each_entry(ds, &nn->nfs4_data_server_cache, ds_node)
if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
return ds;
return NULL;
@@ -653,10 +654,11 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds)
void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
{
- if (refcount_dec_and_lock(&ds->ds_count,
- &nfs4_ds_cache_lock)) {
+ struct nfs_net *nn = net_generic(ds->ds_net, nfs_net_id);
+
+ if (refcount_dec_and_lock(&ds->ds_count, &nn->nfs4_data_server_lock)) {
list_del_init(&ds->ds_node);
- spin_unlock(&nfs4_ds_cache_lock);
+ spin_unlock(&nn->nfs4_data_server_lock);
destroy_ds(ds);
}
}
@@ -716,8 +718,9 @@ out_err:
* uncached and return cached struct nfs4_pnfs_ds.
*/
struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
+nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags)
{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
char *remotestr;
@@ -733,16 +736,17 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
/* this is only used for debugging, so it's ok if its NULL */
remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
- spin_lock(&nfs4_ds_cache_lock);
- tmp_ds = _data_server_lookup_locked(dsaddrs);
+ spin_lock(&nn->nfs4_data_server_lock);
+ tmp_ds = _data_server_lookup_locked(nn, dsaddrs);
if (tmp_ds == NULL) {
INIT_LIST_HEAD(&ds->ds_addrs);
list_splice_init(dsaddrs, &ds->ds_addrs);
ds->ds_remotestr = remotestr;
refcount_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
+ ds->ds_net = net;
ds->ds_clp = NULL;
- list_add(&ds->ds_node, &nfs4_data_server_cache);
+ list_add(&ds->ds_node, &nn->nfs4_data_server_cache);
dprintk("%s add new data server %s\n", __func__,
ds->ds_remotestr);
} else {
@@ -754,7 +758,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
refcount_read(&tmp_ds->ds_count));
ds = tmp_ds;
}
- spin_unlock(&nfs4_ds_cache_lock);
+ spin_unlock(&nn->nfs4_data_server_lock);
out:
return ds;
}
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 1c62a5a9f51d..58146e935402 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -40,31 +40,31 @@ static const char *nfs_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct page *page;
+ struct folio *folio;
void *err;
if (!dentry) {
err = ERR_PTR(nfs_revalidate_mapping_rcu(inode));
if (err)
return err;
- page = find_get_page(inode->i_mapping, 0);
- if (!page)
+ folio = filemap_get_folio(inode->i_mapping, 0);
+ if (IS_ERR(folio))
return ERR_PTR(-ECHILD);
- if (!PageUptodate(page)) {
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ folio_put(folio);
return ERR_PTR(-ECHILD);
}
} else {
err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
return err;
- page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler,
+ folio = read_cache_folio(&inode->i_data, 0, nfs_symlink_filler,
NULL);
- if (IS_ERR(page))
- return ERR_CAST(page);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
}
- set_delayed_call(done, page_put_link, page);
- return page_address(page);
+ set_delayed_call(done, page_put_link, folio);
+ return folio_address(folio);
}
/*
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index bf77399696a7..b55467911648 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -464,18 +464,17 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
sdentry = NULL;
do {
- int slen;
dput(sdentry);
sillycounter++;
- slen = scnprintf(silly, sizeof(silly),
- SILLYNAME_PREFIX "%0*llx%0*x",
- SILLYNAME_FILEID_LEN, fileid,
- SILLYNAME_COUNTER_LEN, sillycounter);
+ scnprintf(silly, sizeof(silly),
+ SILLYNAME_PREFIX "%0*llx%0*x",
+ SILLYNAME_FILEID_LEN, fileid,
+ SILLYNAME_COUNTER_LEN, sillycounter);
dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
dentry, silly);
- sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+ sdentry = lookup_noperm(&QSTR(silly), dentry->d_parent);
/*
* N.B. Better to return EBUSY here ... it could be
* dangerous to delete the file while it's in use.
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 372bdcf5e07a..ac1731eb34ab 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -284,7 +284,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
inode_lock_nested(inode, I_MUTEX_PARENT);
- child = lookup_one_len(argp->name, parent, argp->len);
+ child = lookup_one(&nop_mnt_idmap,
+ &QSTR_LEN(argp->name, argp->len),
+ parent);
if (IS_ERR(child)) {
status = nfserrno(PTR_ERR(child));
goto out;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index a7a07470c1f8..ef4971d71ac4 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1001,7 +1001,9 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
} else
dchild = dget(dparent);
} else
- dchild = lookup_positive_unlocked(name, dparent, namlen);
+ dchild = lookup_one_positive_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, namlen),
+ dparent);
if (IS_ERR(dchild))
return rv;
if (d_mountpoint(dchild))
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b397246dae7b..fd560dcf6059 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -266,7 +266,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
inode_lock_nested(inode, I_MUTEX_PARENT);
- child = lookup_one_len(open->op_fname, parent, open->op_fnamelen);
+ child = lookup_one(&nop_mnt_idmap,
+ &QSTR_LEN(open->op_fname, open->op_fnamelen),
+ parent);
if (IS_ERR(child)) {
status = nfserrno(PTR_ERR(child));
goto out;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index c1d9bd07285f..acde3edab733 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -218,7 +218,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
/* lock the parent */
inode_lock(d_inode(dir));
- dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
+ dentry = lookup_one(&nop_mnt_idmap, &QSTR(dname), dir);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
goto out_unlock;
@@ -316,7 +316,8 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
if (!status) {
struct dentry *dentry;
- dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
+ dentry = lookup_one(&nop_mnt_idmap,
+ &QSTR(entry->name), dir);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
break;
@@ -339,16 +340,16 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
}
static int
-nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
+nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
{
struct dentry *dir, *dentry;
int status;
- dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
+ dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name);
dir = nn->rec_file->f_path.dentry;
inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
- dentry = lookup_one_len(name, dir, namlen);
+ dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
goto out_unlock;
@@ -408,7 +409,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
if (status < 0)
goto out_drop_write;
- status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn);
+ status = nfsd4_unlink_clid_dir(dname, nn);
nfs4_reset_creds(original_cred);
if (status == 0) {
vfs_fsync(nn->rec_file, 0);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e67420729ecd..fe876395985a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3812,7 +3812,9 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name,
__be32 nfserr;
int ignore_crossmnt = 0;
- dentry = lookup_positive_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
+ dentry = lookup_one_positive_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, namlen),
+ cd->rd_fhp->fh_dentry);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 6dda081eb24c..6370ac0a85fd 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -312,7 +312,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
}
inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT);
- dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
+ dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(argp->name, argp->len),
+ dirfhp->fh_dentry);
if (IS_ERR(dchild)) {
resp->status = nfserrno(PTR_ERR(dchild));
goto out_unlock;
@@ -331,7 +332,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
*/
resp->status = nfserr_acces;
if (!newfhp->fh_dentry) {
- printk(KERN_WARNING
+ printk(KERN_WARNING
"nfsd_proc_create: file handle not verified\n");
goto out_unlock;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9abdc4b75813..160a839af405 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -264,7 +264,8 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_nfserr;
}
} else {
- dentry = lookup_one_len_unlocked(name, dparent, len);
+ dentry = lookup_one_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, len), dparent);
host_err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_nfserr;
@@ -922,7 +923,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
* directories, but we never have and it doesn't seem to have
* caused anyone a problem. If we were to change this, note
* also that our filldir callbacks would need a variant of
- * lookup_one_len that doesn't check permissions.
+ * lookup_one_positive_unlocked() that doesn't check permissions.
*/
if (type == S_IFREG)
may_flags |= NFSD_MAY_OWNER_OVERRIDE;
@@ -1554,7 +1555,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
return nfserrno(host_err);
inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
- dchild = lookup_one_len(fname, dentry, flen);
+ dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
host_err = PTR_ERR(dchild);
if (IS_ERR(dchild)) {
err = nfserrno(host_err);
@@ -1659,7 +1660,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
- dnew = lookup_one_len(fname, dentry, flen);
+ dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
if (IS_ERR(dnew)) {
err = nfserrno(PTR_ERR(dnew));
inode_unlock(dentry->d_inode);
@@ -1734,7 +1735,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
dirp = d_inode(ddir);
inode_lock_nested(dirp, I_MUTEX_PARENT);
- dnew = lookup_one_len(name, ddir, len);
+ dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(name, len), ddir);
if (IS_ERR(dnew)) {
host_err = PTR_ERR(dnew);
goto out_unlock;
@@ -1867,7 +1868,7 @@ retry:
if (err != nfs_ok)
goto out_unlock;
- odentry = lookup_one_len(fname, fdentry, flen);
+ odentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), fdentry);
host_err = PTR_ERR(odentry);
if (IS_ERR(odentry))
goto out_nfserr;
@@ -1880,7 +1881,7 @@ retry:
goto out_dput_old;
type = d_inode(odentry)->i_mode & S_IFMT;
- ndentry = lookup_one_len(tname, tdentry, tlen);
+ ndentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(tname, tlen), tdentry);
host_err = PTR_ERR(ndentry);
if (IS_ERR(ndentry))
goto out_dput_old;
@@ -1998,7 +1999,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
dirp = d_inode(dentry);
inode_lock_nested(dirp, I_MUTEX_PARENT);
- rdentry = lookup_one_len(fname, dentry, flen);
+ rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
host_err = PTR_ERR(rdentry);
if (IS_ERR(rdentry))
goto out_unlock;
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index d6cd81163030..135c49c5d848 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -9,12 +9,13 @@
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/cred.h>
-#include <linux/parser.h>
#include <linux/buffer_head.h>
#include <linux/vmalloc.h>
#include <linux/writeback.h>
#include <linux/seq_file.h>
#include <linux/crc-itu-t.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include "omfs.h"
MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>");
@@ -384,79 +385,83 @@ nomem:
return -ENOMEM;
}
+struct omfs_mount_options {
+ kuid_t s_uid;
+ kgid_t s_gid;
+ int s_dmask;
+ int s_fmask;
+};
+
enum {
- Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, Opt_err
+ Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask,
};
-static const match_table_t tokens = {
- {Opt_uid, "uid=%u"},
- {Opt_gid, "gid=%u"},
- {Opt_umask, "umask=%o"},
- {Opt_dmask, "dmask=%o"},
- {Opt_fmask, "fmask=%o"},
- {Opt_err, NULL},
+static const struct fs_parameter_spec omfs_param_spec[] = {
+ fsparam_uid ("uid", Opt_uid),
+ fsparam_gid ("gid", Opt_gid),
+ fsparam_u32oct ("umask", Opt_umask),
+ fsparam_u32oct ("dmask", Opt_dmask),
+ fsparam_u32oct ("fmask", Opt_fmask),
+ {}
};
-static int parse_options(char *options, struct omfs_sb_info *sbi)
+static int
+omfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int option;
-
- if (!options)
- return 1;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_uid:
- if (match_int(&args[0], &option))
- return 0;
- sbi->s_uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(sbi->s_uid))
- return 0;
- break;
- case Opt_gid:
- if (match_int(&args[0], &option))
- return 0;
- sbi->s_gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(sbi->s_gid))
- return 0;
- break;
- case Opt_umask:
- if (match_octal(&args[0], &option))
- return 0;
- sbi->s_fmask = sbi->s_dmask = option;
- break;
- case Opt_dmask:
- if (match_octal(&args[0], &option))
- return 0;
- sbi->s_dmask = option;
- break;
- case Opt_fmask:
- if (match_octal(&args[0], &option))
- return 0;
- sbi->s_fmask = option;
- break;
- default:
- return 0;
- }
+ struct omfs_mount_options *opts = fc->fs_private;
+ int token;
+ struct fs_parse_result result;
+
+ /* All options are ignored on remount */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE)
+ return 0;
+
+ token = fs_parse(fc, omfs_param_spec, param, &result);
+ if (token < 0)
+ return token;
+
+ switch (token) {
+ case Opt_uid:
+ opts->s_uid = result.uid;
+ break;
+ case Opt_gid:
+ opts->s_gid = result.gid;
+ break;
+ case Opt_umask:
+ opts->s_fmask = opts->s_dmask = result.uint_32;
+ break;
+ case Opt_dmask:
+ opts->s_dmask = result.uint_32;
+ break;
+ case Opt_fmask:
+ opts->s_fmask = result.uint_32;
+ break;
+ default:
+ return -EINVAL;
}
- return 1;
+
+ return 0;
}
-static int omfs_fill_super(struct super_block *sb, void *data, int silent)
+static void
+omfs_set_options(struct omfs_sb_info *sbi, struct omfs_mount_options *opts)
+{
+ sbi->s_uid = opts->s_uid;
+ sbi->s_gid = opts->s_gid;
+ sbi->s_dmask = opts->s_dmask;
+ sbi->s_fmask = opts->s_fmask;
+}
+
+static int omfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct buffer_head *bh, *bh2;
struct omfs_super_block *omfs_sb;
struct omfs_root_block *omfs_rb;
struct omfs_sb_info *sbi;
struct inode *root;
+ struct omfs_mount_options *parsed_opts = fc->fs_private;
int ret = -EINVAL;
+ int silent = fc->sb_flags & SB_SILENT;
sbi = kzalloc(sizeof(struct omfs_sb_info), GFP_KERNEL);
if (!sbi)
@@ -464,12 +469,7 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_fs_info = sbi;
- sbi->s_uid = current_uid();
- sbi->s_gid = current_gid();
- sbi->s_dmask = sbi->s_fmask = current_umask();
-
- if (!parse_options((char *) data, sbi))
- goto end;
+ omfs_set_options(sbi, parsed_opts);
sb->s_maxbytes = 0xffffffff;
@@ -594,18 +594,50 @@ end:
return ret;
}
-static struct dentry *omfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int omfs_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, omfs_fill_super);
+}
+
+static void omfs_free_fc(struct fs_context *fc);
+
+static const struct fs_context_operations omfs_context_ops = {
+ .parse_param = omfs_parse_param,
+ .get_tree = omfs_get_tree,
+ .free = omfs_free_fc,
+};
+
+static int omfs_init_fs_context(struct fs_context *fc)
+{
+ struct omfs_mount_options *opts;
+
+ opts = kzalloc(sizeof(*opts), GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+
+ /* Set mount options defaults */
+ opts->s_uid = current_uid();
+ opts->s_gid = current_gid();
+ opts->s_dmask = opts->s_fmask = current_umask();
+
+ fc->fs_private = opts;
+ fc->ops = &omfs_context_ops;
+
+ return 0;
+}
+
+static void omfs_free_fc(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super);
+ kfree(fc->fs_private);
}
static struct file_system_type omfs_fs_type = {
- .owner = THIS_MODULE,
- .name = "omfs",
- .mount = omfs_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .owner = THIS_MODULE,
+ .name = "omfs",
+ .kill_sb = kill_block_super,
+ .fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = omfs_init_fs_context,
+ .parameters = omfs_param_spec,
};
MODULE_ALIAS_FS("omfs");
diff --git a/fs/open.c b/fs/open.c
index a9063cca9911..7828234a7caa 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -60,7 +60,10 @@ int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry,
if (ret)
newattrs.ia_valid |= ret | ATTR_FORCE;
- inode_lock(dentry->d_inode);
+ ret = inode_lock_killable(dentry->d_inode);
+ if (ret)
+ return ret;
+
/* Note any delegations or leases have already been broken: */
ret = notify_change(idmap, dentry, &newattrs, NULL);
inode_unlock(dentry->d_inode);
@@ -635,7 +638,9 @@ int chmod_common(const struct path *path, umode_t mode)
if (error)
return error;
retry_deleg:
- inode_lock(inode);
+ error = inode_lock_killable(inode);
+ if (error)
+ goto out_mnt_unlock;
error = security_path_chmod(path, mode);
if (error)
goto out_unlock;
@@ -650,6 +655,7 @@ out_unlock:
if (!error)
goto retry_deleg;
}
+out_mnt_unlock:
mnt_drop_write(path->mnt);
return error;
}
@@ -769,7 +775,9 @@ retry_deleg:
return -EINVAL;
if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
return -EINVAL;
- inode_lock(inode);
+ error = inode_lock_killable(inode);
+ if (error)
+ return error;
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
setattr_should_drop_sgid(idmap, inode);
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 5ac743c6bc2e..08a6f372a352 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -32,12 +32,13 @@ static int orangefs_writepage_locked(struct folio *folio,
len = i_size_read(inode);
if (folio->private) {
wr = folio->private;
- WARN_ON(wr->pos >= len);
off = wr->pos;
- if (off + wr->len > len)
+ if ((off + wr->len > len) && (off <= len))
wlen = len - off;
else
wlen = wr->len;
+ if (wlen == 0)
+ wlen = wr->len;
} else {
WARN_ON(1);
off = folio_pos(folio);
@@ -46,8 +47,6 @@ static int orangefs_writepage_locked(struct folio *folio,
if (wlen > len - off)
wlen = len - off;
}
- /* Should've been handled in orangefs_invalidate_folio. */
- WARN_ON(off == len || off + wlen > len);
WARN_ON(wlen == 0);
bvec_set_folio(&bv, folio, wlen, offset_in_folio(folio, off));
@@ -320,6 +319,8 @@ static int orangefs_write_begin(struct file *file,
wr->len += len;
goto okay;
} else {
+ wr->pos = pos;
+ wr->len = len;
ret = orangefs_launder_folio(folio);
if (ret)
return ret;
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 444aeeccb6da..83f80fdb1567 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -385,11 +385,9 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
*/
take_dentry_name_snapshot(&name, real);
/*
- * No idmap handling here: it's an internal lookup. Could skip
- * permission checking altogether, but for now just use non-idmap
- * transformed ids.
+ * No idmap handling here: it's an internal lookup.
*/
- this = lookup_one_len(name.name.name, connected, name.name.len);
+ this = lookup_noperm(&name.name, connected);
release_dentry_name_snapshot(&name);
err = PTR_ERR(this);
if (IS_ERR(this)) {
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index be5c65d6f848..bf722daf19a9 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -205,8 +205,8 @@ static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
struct dentry *base, int len,
bool drop_negative)
{
- struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt), name,
- base, len);
+ struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt),
+ &QSTR_LEN(name, len), base);
if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
if (drop_negative && ret->d_lockref.count == 1) {
@@ -757,7 +757,7 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
if (err)
return ERR_PTR(err);
- index = lookup_positive_unlocked(name.name, ofs->workdir, name.len);
+ index = lookup_noperm_positive_unlocked(&name, ofs->workdir);
kfree(name.name);
if (IS_ERR(index)) {
if (PTR_ERR(index) == -ENOENT)
@@ -789,8 +789,8 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
if (err)
return ERR_PTR(err);
- index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), name.name,
- ofs->workdir, name.len);
+ index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), &name,
+ ofs->workdir);
if (IS_ERR(index)) {
err = PTR_ERR(index);
if (err == -ENOENT) {
@@ -1371,7 +1371,7 @@ out:
bool ovl_lower_positive(struct dentry *dentry)
{
struct ovl_entry *poe = OVL_E(dentry->d_parent);
- const struct qstr *name = &dentry->d_name;
+ struct qstr *name = &dentry->d_name;
const struct cred *old_cred;
unsigned int i;
bool positive = false;
@@ -1396,7 +1396,7 @@ bool ovl_lower_positive(struct dentry *dentry)
this = lookup_one_positive_unlocked(
mnt_idmap(parentpath->layer->mnt),
- name->name, parentpath->dentry, name->len);
+ name, parentpath->dentry);
if (IS_ERR(this)) {
switch (PTR_ERR(this)) {
case -ENOENT:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index aef942a758ce..8baaba0a3fe5 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -402,7 +402,7 @@ static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs,
const char *name,
struct dentry *base, int len)
{
- return lookup_one(ovl_upper_mnt_idmap(ofs), name, base, len);
+ return lookup_one(ovl_upper_mnt_idmap(ofs), &QSTR_LEN(name, len), base);
}
static inline bool ovl_open_flags_need_copy_up(int flags)
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 881ec5592da5..44e208da417c 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -271,7 +271,6 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
{
int err;
- struct ovl_cache_entry *p;
struct dentry *dentry, *dir = path->dentry;
const struct cred *old_cred;
@@ -280,9 +279,11 @@ static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data
err = down_write_killable(&dir->d_inode->i_rwsem);
if (!err) {
while (rdd->first_maybe_whiteout) {
- p = rdd->first_maybe_whiteout;
+ struct ovl_cache_entry *p =
+ rdd->first_maybe_whiteout;
rdd->first_maybe_whiteout = p->next_maybe_whiteout;
- dentry = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len);
+ dentry = lookup_one(mnt_idmap(path->mnt),
+ &QSTR_LEN(p->name, p->len), dir);
if (!IS_ERR(dentry)) {
p->is_whiteout = ovl_is_whiteout(dentry);
dput(dentry);
@@ -351,6 +352,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
struct path realpath;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_merge,
+ .ctx.count = INT_MAX,
.dentry = dentry,
.list = list,
.root = root,
@@ -492,7 +494,7 @@ static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p,
}
}
/* This checks also for xwhiteouts */
- this = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len);
+ this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);
if (IS_ERR_OR_NULL(this) || !this->d_inode) {
/* Mark a stale entry */
p->is_whiteout = true;
@@ -571,6 +573,7 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
struct ovl_cache_entry *p, *n;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = list,
.root = root,
};
@@ -672,6 +675,7 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name,
struct ovl_readdir_translate *rdt =
container_of(ctx, struct ovl_readdir_translate, ctx);
struct dir_context *orig_ctx = rdt->orig_ctx;
+ bool res;
if (rdt->parent_ino && strcmp(name, "..") == 0) {
ino = rdt->parent_ino;
@@ -686,7 +690,10 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name,
name, namelen, rdt->xinowarn);
}
- return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+ res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+ ctx->count = orig_ctx->count;
+
+ return res;
}
static bool ovl_is_impure_dir(struct file *file)
@@ -713,6 +720,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
struct ovl_readdir_translate rdt = {
.ctx.actor = ovl_fill_real,
+ .ctx.count = ctx->count,
.orig_ctx = ctx,
.xinobits = ovl_xino_bits(ofs),
.xinowarn = ovl_xino_warn(ofs),
@@ -1073,6 +1081,7 @@ int ovl_check_d_type_supported(const struct path *realpath)
int err;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_check_d_type,
+ .ctx.count = INT_MAX,
.d_type_supported = false,
};
@@ -1094,6 +1103,7 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa
struct ovl_cache_entry *p;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = &list,
};
bool incompat = false;
@@ -1178,6 +1188,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
struct ovl_cache_entry *p;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = &list,
};
diff --git a/fs/pidfs.c b/fs/pidfs.c
index d64a4cbeb0da..c1f0a067be40 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -20,6 +20,7 @@
#include <linux/time_namespace.h>
#include <linux/utsname.h>
#include <net/net_namespace.h>
+#include <linux/coredump.h>
#include "internal.h"
#include "mount.h"
@@ -33,6 +34,7 @@ static struct kmem_cache *pidfs_cachep __ro_after_init;
struct pidfs_exit_info {
__u64 cgroupid;
__s32 exit_code;
+ __u32 coredump_mask;
};
struct pidfs_inode {
@@ -240,6 +242,22 @@ static inline bool pid_in_current_pidns(const struct pid *pid)
return false;
}
+static __u32 pidfs_coredump_mask(unsigned long mm_flags)
+{
+ switch (__get_dumpable(mm_flags)) {
+ case SUID_DUMP_USER:
+ return PIDFD_COREDUMP_USER;
+ case SUID_DUMP_ROOT:
+ return PIDFD_COREDUMP_ROOT;
+ case SUID_DUMP_DISABLE:
+ return PIDFD_COREDUMP_SKIP;
+ default:
+ WARN_ON_ONCE(true);
+ }
+
+ return 0;
+}
+
static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
{
struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
@@ -280,6 +298,11 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
}
}
+ if (mask & PIDFD_INFO_COREDUMP) {
+ kinfo.mask |= PIDFD_INFO_COREDUMP;
+ kinfo.coredump_mask = READ_ONCE(pidfs_i(inode)->__pei.coredump_mask);
+ }
+
task = get_pid_task(pid, PIDTYPE_PID);
if (!task) {
/*
@@ -296,6 +319,13 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
if (!c)
return -ESRCH;
+ if (!(kinfo.mask & PIDFD_INFO_COREDUMP)) {
+ task_lock(task);
+ if (task->mm)
+ kinfo.coredump_mask = pidfs_coredump_mask(task->mm->flags);
+ task_unlock(task);
+ }
+
/* Unconditionally return identifiers and credentials, the rest only on request */
user_ns = current_user_ns();
@@ -559,6 +589,31 @@ void pidfs_exit(struct task_struct *tsk)
}
}
+#ifdef CONFIG_COREDUMP
+void pidfs_coredump(const struct coredump_params *cprm)
+{
+ struct pid *pid = cprm->pid;
+ struct pidfs_exit_info *exit_info;
+ struct dentry *dentry;
+ struct inode *inode;
+ __u32 coredump_mask = 0;
+
+ dentry = pid->stashed;
+ if (WARN_ON_ONCE(!dentry))
+ return;
+
+ inode = d_inode(dentry);
+ exit_info = &pidfs_i(inode)->__pei;
+ /* Note how we were coredumped. */
+ coredump_mask = pidfs_coredump_mask(cprm->mm_flags);
+ /* Note that we actually did coredump. */
+ coredump_mask |= PIDFD_COREDUMPED;
+ /* If coredumping is set to skip we should never end up here. */
+ VFS_WARN_ON_ONCE(coredump_mask & PIDFD_COREDUMP_SKIP);
+ smp_store_release(&exit_info->coredump_mask, coredump_mask);
+}
+#endif
+
static struct vfsmount *pidfs_mnt __ro_after_init;
/*
@@ -569,36 +624,14 @@ static struct vfsmount *pidfs_mnt __ro_after_init;
static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
- return -EOPNOTSUPP;
+ return anon_inode_setattr(idmap, dentry, attr);
}
-
-/*
- * User space expects pidfs inodes to have no file type in st_mode.
- *
- * In particular, 'lsof' has this legacy logic:
- *
- * type = s->st_mode & S_IFMT;
- * switch (type) {
- * ...
- * case 0:
- * if (!strcmp(p, "anon_inode"))
- * Lf->ntype = Ntype = N_ANON_INODE;
- *
- * to detect our old anon_inode logic.
- *
- * Rather than mess with our internal sane inode data, just fix it
- * up here in getattr() by masking off the format bits.
- */
static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags)
{
- struct inode *inode = d_inode(path->dentry);
-
- generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
- stat->mode &= ~S_IFMT;
- return 0;
+ return anon_inode_getattr(idmap, path, stat, request_mask, query_flags);
}
static const struct inode_operations pidfs_inode_operations = {
@@ -768,7 +801,7 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
{
enum pid_type type;
- if (flags & PIDFD_CLONE)
+ if (flags & PIDFD_STALE)
return true;
/*
@@ -777,10 +810,14 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
* pidfd has been allocated perform another check that the pid
* is still alive. If it is exit information is available even
* if the task gets reaped before the pidfd is returned to
- * userspace. The only exception is PIDFD_CLONE where no task
- * linkage has been established for @pid yet and the kernel is
- * in the middle of process creation so there's nothing for
- * pidfs to miss.
+ * userspace. The only exception are indicated by PIDFD_STALE:
+ *
+ * (1) The kernel is in the middle of task creation and thus no
+ * task linkage has been established yet.
+ * (2) The caller knows @pid has been registered in pidfs at a
+ * time when the task was still alive.
+ *
+ * In both cases exit information will have been reported.
*/
if (flags & PIDFD_THREAD)
type = PIDTYPE_PID;
@@ -826,7 +863,7 @@ static int pidfs_init_inode(struct inode *inode, void *data)
const struct pid *pid = data;
inode->i_private = data;
- inode->i_flags |= S_PRIVATE;
+ inode->i_flags |= S_PRIVATE | S_ANON_INODE;
inode->i_mode |= S_IRWXU;
inode->i_op = &pidfs_inode_operations;
inode->i_fop = &pidfs_file_operations;
@@ -874,11 +911,11 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
int ret;
/*
- * Ensure that PIDFD_CLONE can be passed as a flag without
+ * Ensure that PIDFD_STALE can be passed as a flag without
* overloading other uapi pidfd flags.
*/
- BUILD_BUG_ON(PIDFD_CLONE == PIDFD_THREAD);
- BUILD_BUG_ON(PIDFD_CLONE == PIDFD_NONBLOCK);
+ BUILD_BUG_ON(PIDFD_STALE == PIDFD_THREAD);
+ BUILD_BUG_ON(PIDFD_STALE == PIDFD_NONBLOCK);
ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
if (ret < 0)
@@ -887,7 +924,8 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
if (!pidfs_pid_valid(pid, &path, flags))
return ERR_PTR(-ESRCH);
- flags &= ~PIDFD_CLONE;
+ flags &= ~PIDFD_STALE;
+ flags |= O_RDWR;
pidfd_file = dentry_open(&path, flags, current_cred());
/* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */
if (!IS_ERR(pidfd_file))
@@ -896,6 +934,65 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
return pidfd_file;
}
+/**
+ * pidfs_register_pid - register a struct pid in pidfs
+ * @pid: pid to pin
+ *
+ * Register a struct pid in pidfs. Needs to be paired with
+ * pidfs_put_pid() to not risk leaking the pidfs dentry and inode.
+ *
+ * Return: On success zero, on error a negative error code is returned.
+ */
+int pidfs_register_pid(struct pid *pid)
+{
+ struct path path __free(path_put) = {};
+ int ret;
+
+ might_sleep();
+
+ if (!pid)
+ return 0;
+
+ ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
+ if (unlikely(ret))
+ return ret;
+ /* Keep the dentry and only put the reference to the mount. */
+ path.dentry = NULL;
+ return 0;
+}
+
+/**
+ * pidfs_get_pid - pin a struct pid through pidfs
+ * @pid: pid to pin
+ *
+ * Similar to pidfs_register_pid() but only valid if the caller knows
+ * there's a reference to the @pid through a dentry already that can't
+ * go away.
+ */
+void pidfs_get_pid(struct pid *pid)
+{
+ if (!pid)
+ return;
+ WARN_ON_ONCE(!stashed_dentry_get(&pid->stashed));
+}
+
+/**
+ * pidfs_put_pid - drop a pidfs reference
+ * @pid: pid to drop
+ *
+ * Drop a reference to @pid via pidfs. This is only safe if the
+ * reference has been taken via pidfs_get_pid().
+ */
+void pidfs_put_pid(struct pid *pid)
+{
+ might_sleep();
+
+ if (!pid)
+ return;
+ VFS_WARN_ON_ONCE(!pid->stashed);
+ dput(pid->stashed);
+}
+
static void pidfs_inode_init_once(void *data)
{
struct pidfs_inode *pi = data;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b0d4e1908b22..fe33a5843fbd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2121,7 +2121,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
unsigned type = DT_UNKNOWN;
ino_t ino = 1;
- child = d_hash_and_lookup(dir, &qname);
+ child = try_lookup_noperm(&qname, dir);
if (!child) {
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
child = d_alloc_parallel(dir, &qname, &wq);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 83be312159c9..bc2bc60c36cc 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -120,8 +120,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
global_node_page_state(NR_SECONDARY_PAGETABLE));
show_val_kb(m, "NFS_Unstable: ", 0);
- show_val_kb(m, "Bounce: ",
- global_zone_page_state(NR_BOUNCE));
+ show_val_kb(m, "Bounce: ", 0);
show_val_kb(m, "WritebackTmp: ",
global_node_page_state(NR_WRITEBACK_TEMP));
show_val_kb(m, "CommitLimit: ", vm_commit_limit());
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index e133b507ddf3..5c555db68aa2 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -111,7 +111,7 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
if (err)
goto out;
} else {
- mangle(m, r->mnt_devname ? r->mnt_devname : "none");
+ mangle(m, r->mnt_devname);
}
seq_putc(m, ' ');
/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
@@ -177,7 +177,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
if (err)
goto out;
} else {
- mangle(m, r->mnt_devname ? r->mnt_devname : "none");
+ mangle(m, r->mnt_devname);
}
seq_puts(m, sb_rdonly(sb) ? " ro" : " rw");
err = show_sb_opts(m, sb);
@@ -199,17 +199,13 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
int err;
/* device */
+ seq_puts(m, "device ");
if (sb->s_op->show_devname) {
- seq_puts(m, "device ");
err = sb->s_op->show_devname(m, mnt_path.dentry);
if (err)
goto out;
} else {
- if (r->mnt_devname) {
- seq_puts(m, "device ");
- mangle(m, r->mnt_devname);
- } else
- seq_puts(m, "no device");
+ mangle(m, r->mnt_devname);
}
/* mount point */
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 825c5c2e0962..df4a9b348769 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2560,7 +2560,7 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
struct dentry *dentry;
int error;
- dentry = lookup_positive_unlocked(qf_name, sb->s_root, strlen(qf_name));
+ dentry = lookup_noperm_positive_unlocked(&QSTR(qf_name), sb->s_root);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
diff --git a/fs/read_write.c b/fs/read_write.c
index bb0ed26a0b3a..0ef70e128c4a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -332,7 +332,9 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file_inode(file);
loff_t retval;
- inode_lock(inode);
+ retval = inode_lock_killable(inode);
+ if (retval)
+ return retval;
switch (whence) {
case SEEK_END:
offset += i_size_read(inode);
diff --git a/fs/readdir.c b/fs/readdir.c
index 0038efda417b..7764b8638978 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -222,6 +222,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct readdir_callback buf = {
.ctx.actor = fillonedir,
+ .ctx.count = 1, /* Hint to fs: just one entry. */
.dirent = dirent
};
@@ -252,7 +253,6 @@ struct getdents_callback {
struct dir_context ctx;
struct linux_dirent __user * current_dir;
int prev_reclen;
- int count;
int error;
};
@@ -266,12 +266,16 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen,
int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
sizeof(long));
int prev_reclen;
+ unsigned int flags = d_type;
+
+ BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
+ d_type &= S_DT_MASK;
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
return false;
buf->error = -EINVAL; /* only used if we fail.. */
- if (reclen > buf->count)
+ if (reclen > ctx->count)
return false;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
@@ -279,7 +283,7 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen,
return false;
}
prev_reclen = buf->prev_reclen;
- if (prev_reclen && signal_pending(current))
+ if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
return false;
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
@@ -296,7 +300,7 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen,
buf->current_dir = (void __user *)dirent + reclen;
buf->prev_reclen = reclen;
- buf->count -= reclen;
+ ctx->count -= reclen;
return true;
efault_end:
user_write_access_end();
@@ -311,7 +315,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct getdents_callback buf = {
.ctx.actor = filldir,
- .count = count,
+ .ctx.count = count,
.current_dir = dirent
};
int error;
@@ -329,7 +333,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
if (put_user(buf.ctx.pos, &lastdirent->d_off))
error = -EFAULT;
else
- error = count - buf.count;
+ error = count - buf.ctx.count;
}
return error;
}
@@ -338,7 +342,6 @@ struct getdents_callback64 {
struct dir_context ctx;
struct linux_dirent64 __user * current_dir;
int prev_reclen;
- int count;
int error;
};
@@ -351,15 +354,19 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
sizeof(u64));
int prev_reclen;
+ unsigned int flags = d_type;
+
+ BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
+ d_type &= S_DT_MASK;
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
return false;
buf->error = -EINVAL; /* only used if we fail.. */
- if (reclen > buf->count)
+ if (reclen > ctx->count)
return false;
prev_reclen = buf->prev_reclen;
- if (prev_reclen && signal_pending(current))
+ if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
return false;
dirent = buf->current_dir;
prev = (void __user *)dirent - prev_reclen;
@@ -376,7 +383,7 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
buf->prev_reclen = reclen;
buf->current_dir = (void __user *)dirent + reclen;
- buf->count -= reclen;
+ ctx->count -= reclen;
return true;
efault_end:
@@ -392,7 +399,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct getdents_callback64 buf = {
.ctx.actor = filldir64,
- .count = count,
+ .ctx.count = count,
.current_dir = dirent
};
int error;
@@ -411,7 +418,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
if (put_user(d_off, &lastdirent->d_off))
error = -EFAULT;
else
- error = count - buf.count;
+ error = count - buf.ctx.count;
}
return error;
}
@@ -475,6 +482,7 @@ COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct compat_readdir_callback buf = {
.ctx.actor = compat_fillonedir,
+ .ctx.count = 1, /* Hint to fs: just one entry. */
.dirent = dirent
};
@@ -499,7 +507,6 @@ struct compat_getdents_callback {
struct dir_context ctx;
struct compat_linux_dirent __user *current_dir;
int prev_reclen;
- int count;
int error;
};
@@ -513,12 +520,16 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen
int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
namlen + 2, sizeof(compat_long_t));
int prev_reclen;
+ unsigned int flags = d_type;
+
+ BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
+ d_type &= S_DT_MASK;
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
return false;
buf->error = -EINVAL; /* only used if we fail.. */
- if (reclen > buf->count)
+ if (reclen > ctx->count)
return false;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
@@ -526,7 +537,7 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen
return false;
}
prev_reclen = buf->prev_reclen;
- if (prev_reclen && signal_pending(current))
+ if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
return false;
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
@@ -542,7 +553,7 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen
buf->prev_reclen = reclen;
buf->current_dir = (void __user *)dirent + reclen;
- buf->count -= reclen;
+ ctx->count -= reclen;
return true;
efault_end:
user_write_access_end();
@@ -557,8 +568,8 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct compat_getdents_callback buf = {
.ctx.actor = compat_filldir,
+ .ctx.count = count,
.current_dir = dirent,
- .count = count
};
int error;
@@ -575,7 +586,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
if (put_user(buf.ctx.pos, &lastdirent->d_off))
error = -EFAULT;
else
- error = count - buf.count;
+ error = count - buf.ctx.count;
}
return error;
}
diff --git a/fs/resctrl/Kconfig b/fs/resctrl/Kconfig
new file mode 100644
index 000000000000..21671301bd8a
--- /dev/null
+++ b/fs/resctrl/Kconfig
@@ -0,0 +1,39 @@
+config RESCTRL_FS
+ bool "CPU Resource Control Filesystem (resctrl)"
+ depends on ARCH_HAS_CPU_RESCTRL
+ select KERNFS
+ select PROC_CPU_RESCTRL if PROC_FS
+ help
+ Some architectures provide hardware facilities to group tasks and
+ monitor and control their usage of memory system resources such as
+ caches and memory bandwidth. Examples of such facilities include
+ Intel's Resource Director Technology (Intel(R) RDT) and AMD's
+ Platform Quality of Service (AMD QoS).
+
+ If your system has the necessary support and you want to be able to
+ assign tasks to groups and manipulate the associated resource
+ monitors and controls from userspace, say Y here to get a mountable
+ 'resctrl' filesystem that lets you do just that.
+
+ If nothing mounts or prods the 'resctrl' filesystem, resource
+ controls and monitors are left in a quiescent, permissive state.
+
+ On architectures where this can be disabled independently, it is
+ safe to say N.
+
+ See <file:Documentation/filesystems/resctrl.rst> for more information.
+
+config RESCTRL_FS_PSEUDO_LOCK
+ bool
+ depends on RESCTRL_FS
+ help
+ Software mechanism to pin data in a cache portion using
+ micro-architecture specific knowledge.
+
+config RESCTRL_RMID_DEPENDS_ON_CLOSID
+ bool
+ depends on RESCTRL_FS
+ help
+ Enabled by the architecture when the RMID values depend on the CLOSID.
+ This causes the CLOSID allocator to search for CLOSID with clean
+ RMID.
diff --git a/fs/resctrl/Makefile b/fs/resctrl/Makefile
new file mode 100644
index 000000000000..e67f34d2236a
--- /dev/null
+++ b/fs/resctrl/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_RESCTRL_FS) += rdtgroup.o ctrlmondata.o monitor.o
+obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o
+
+# To allow define_trace.h's recursive include:
+CFLAGS_monitor.o = -I$(src)
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
new file mode 100644
index 000000000000..6ed2dfd4dbbd
--- /dev/null
+++ b/fs/resctrl/ctrlmondata.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Resource Director Technology(RDT)
+ * - Cache Allocation code.
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Authors:
+ * Fenghua Yu <fenghua.yu@intel.com>
+ * Tony Luck <tony.luck@intel.com>
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/kernfs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/tick.h>
+
+#include "internal.h"
+
+struct rdt_parse_data {
+ struct rdtgroup *rdtgrp;
+ char *buf;
+};
+
+typedef int (ctrlval_parser_t)(struct rdt_parse_data *data,
+ struct resctrl_schema *s,
+ struct rdt_ctrl_domain *d);
+
+/*
+ * Check whether MBA bandwidth percentage value is correct. The value is
+ * checked against the minimum and max bandwidth values specified by the
+ * hardware. The allocated bandwidth percentage is rounded to the next
+ * control step available on the hardware.
+ */
+static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
+{
+ int ret;
+ u32 bw;
+
+ /*
+ * Only linear delay values is supported for current Intel SKUs.
+ */
+ if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
+ rdt_last_cmd_puts("No support for non-linear MB domains\n");
+ return false;
+ }
+
+ ret = kstrtou32(buf, 10, &bw);
+ if (ret) {
+ rdt_last_cmd_printf("Invalid MB value %s\n", buf);
+ return false;
+ }
+
+ /* Nothing else to do if software controller is enabled. */
+ if (is_mba_sc(r)) {
+ *data = bw;
+ return true;
+ }
+
+ if (bw < r->membw.min_bw || bw > r->membw.max_bw) {
+ rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n",
+ bw, r->membw.min_bw, r->membw.max_bw);
+ return false;
+ }
+
+ *data = roundup(bw, (unsigned long)r->membw.bw_gran);
+ return true;
+}
+
+static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
+ struct rdt_ctrl_domain *d)
+{
+ struct resctrl_staged_config *cfg;
+ u32 closid = data->rdtgrp->closid;
+ struct rdt_resource *r = s->res;
+ u32 bw_val;
+
+ cfg = &d->staged_config[s->conf_type];
+ if (cfg->have_new_ctrl) {
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
+ return -EINVAL;
+ }
+
+ if (!bw_validate(data->buf, &bw_val, r))
+ return -EINVAL;
+
+ if (is_mba_sc(r)) {
+ d->mbps_val[closid] = bw_val;
+ return 0;
+ }
+
+ cfg->new_ctrl = bw_val;
+ cfg->have_new_ctrl = true;
+
+ return 0;
+}
+
+/*
+ * Check whether a cache bit mask is valid.
+ * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID:
+ * - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1
+ * - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1
+ *
+ * Haswell does not support a non-contiguous 1s value and additionally
+ * requires at least two bits set.
+ * AMD allows non-contiguous bitmasks.
+ */
+static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
+{
+ u32 supported_bits = BIT_MASK(r->cache.cbm_len) - 1;
+ unsigned int cbm_len = r->cache.cbm_len;
+ unsigned long first_bit, zero_bit, val;
+ int ret;
+
+ ret = kstrtoul(buf, 16, &val);
+ if (ret) {
+ rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
+ return false;
+ }
+
+ if ((r->cache.min_cbm_bits > 0 && val == 0) || val > supported_bits) {
+ rdt_last_cmd_puts("Mask out of range\n");
+ return false;
+ }
+
+ first_bit = find_first_bit(&val, cbm_len);
+ zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
+
+ /* Are non-contiguous bitmasks allowed? */
+ if (!r->cache.arch_has_sparse_bitmasks &&
+ (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
+ rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
+ return false;
+ }
+
+ if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
+ rdt_last_cmd_printf("Need at least %d bits in the mask\n",
+ r->cache.min_cbm_bits);
+ return false;
+ }
+
+ *data = val;
+ return true;
+}
+
+/*
+ * Read one cache bit mask (hex). Check that it is valid for the current
+ * resource type.
+ */
+static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
+ struct rdt_ctrl_domain *d)
+{
+ struct rdtgroup *rdtgrp = data->rdtgrp;
+ struct resctrl_staged_config *cfg;
+ struct rdt_resource *r = s->res;
+ u32 cbm_val;
+
+ cfg = &d->staged_config[s->conf_type];
+ if (cfg->have_new_ctrl) {
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
+ return -EINVAL;
+ }
+
+ /*
+ * Cannot set up more than one pseudo-locked region in a cache
+ * hierarchy.
+ */
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
+ rdtgroup_pseudo_locked_in_hierarchy(d)) {
+ rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n");
+ return -EINVAL;
+ }
+
+ if (!cbm_validate(data->buf, &cbm_val, r))
+ return -EINVAL;
+
+ if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
+ rdtgrp->mode == RDT_MODE_SHAREABLE) &&
+ rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
+ rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n");
+ return -EINVAL;
+ }
+
+ /*
+ * The CBM may not overlap with the CBM of another closid if
+ * either is exclusive.
+ */
+ if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) {
+ rdt_last_cmd_puts("Overlaps with exclusive group\n");
+ return -EINVAL;
+ }
+
+ if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) {
+ if (rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
+ rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ rdt_last_cmd_puts("Overlaps with other group\n");
+ return -EINVAL;
+ }
+ }
+
+ cfg->new_ctrl = cbm_val;
+ cfg->have_new_ctrl = true;
+
+ return 0;
+}
+
+/*
+ * For each domain in this resource we expect to find a series of:
+ * id=mask
+ * separated by ";". The "id" is in decimal, and must match one of
+ * the "id"s for this resource.
+ */
+static int parse_line(char *line, struct resctrl_schema *s,
+ struct rdtgroup *rdtgrp)
+{
+ enum resctrl_conf_type t = s->conf_type;
+ ctrlval_parser_t *parse_ctrlval = NULL;
+ struct resctrl_staged_config *cfg;
+ struct rdt_resource *r = s->res;
+ struct rdt_parse_data data;
+ struct rdt_ctrl_domain *d;
+ char *dom = NULL, *id;
+ unsigned long dom_id;
+
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
+ switch (r->schema_fmt) {
+ case RESCTRL_SCHEMA_BITMAP:
+ parse_ctrlval = &parse_cbm;
+ break;
+ case RESCTRL_SCHEMA_RANGE:
+ parse_ctrlval = &parse_bw;
+ break;
+ }
+
+ if (WARN_ON_ONCE(!parse_ctrlval))
+ return -EINVAL;
+
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
+ (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) {
+ rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
+ return -EINVAL;
+ }
+
+next:
+ if (!line || line[0] == '\0')
+ return 0;
+ dom = strsep(&line, ";");
+ id = strsep(&dom, "=");
+ if (!dom || kstrtoul(id, 10, &dom_id)) {
+ rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
+ return -EINVAL;
+ }
+ dom = strim(dom);
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
+ data.buf = dom;
+ data.rdtgrp = rdtgrp;
+ if (parse_ctrlval(&data, s, d))
+ return -EINVAL;
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ cfg = &d->staged_config[t];
+ /*
+ * In pseudo-locking setup mode and just
+ * parsed a valid CBM that should be
+ * pseudo-locked. Only one locked region per
+ * resource group and domain so just do
+ * the required initialization for single
+ * region and return.
+ */
+ rdtgrp->plr->s = s;
+ rdtgrp->plr->d = d;
+ rdtgrp->plr->cbm = cfg->new_ctrl;
+ d->plr = rdtgrp->plr;
+ return 0;
+ }
+ goto next;
+ }
+ }
+ return -EINVAL;
+}
+
+static int rdtgroup_parse_resource(char *resname, char *tok,
+ struct rdtgroup *rdtgrp)
+{
+ struct resctrl_schema *s;
+
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid)
+ return parse_line(tok, s, rdtgrp);
+ }
+ rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname);
+ return -EINVAL;
+}
+
+ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct resctrl_schema *s;
+ struct rdtgroup *rdtgrp;
+ struct rdt_resource *r;
+ char *tok, *resname;
+ int ret = 0;
+
+ /* Valid input requires a trailing newline */
+ if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ return -EINVAL;
+ buf[nbytes - 1] = '\0';
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ rdtgroup_kn_unlock(of->kn);
+ return -ENOENT;
+ }
+ rdt_last_cmd_clear();
+
+ /*
+ * No changes to pseudo-locked region allowed. It has to be removed
+ * and re-created instead.
+ */
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+ ret = -EINVAL;
+ rdt_last_cmd_puts("Resource group is pseudo-locked\n");
+ goto out;
+ }
+
+ rdt_staged_configs_clear();
+
+ while ((tok = strsep(&buf, "\n")) != NULL) {
+ resname = strim(strsep(&tok, ":"));
+ if (!tok) {
+ rdt_last_cmd_puts("Missing ':'\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (tok[0] == '\0') {
+ rdt_last_cmd_printf("Missing '%s' value\n", resname);
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
+ if (ret)
+ goto out;
+ }
+
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
+
+ /*
+ * Writes to mba_sc resources update the software controller,
+ * not the control MSR.
+ */
+ if (is_mba_sc(r))
+ continue;
+
+ ret = resctrl_arch_update_domains(r, rdtgrp->closid);
+ if (ret)
+ goto out;
+ }
+
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ /*
+ * If pseudo-locking fails we keep the resource group in
+ * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service
+ * active and updated for just the domain the pseudo-locked
+ * region was requested for.
+ */
+ ret = rdtgroup_pseudo_lock_create(rdtgrp);
+ }
+
+out:
+ rdt_staged_configs_clear();
+ rdtgroup_kn_unlock(of->kn);
+ return ret ?: nbytes;
+}
+
+static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid)
+{
+ struct rdt_resource *r = schema->res;
+ struct rdt_ctrl_domain *dom;
+ bool sep = false;
+ u32 ctrl_val;
+
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
+ seq_printf(s, "%*s:", max_name_width, schema->name);
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
+ if (sep)
+ seq_puts(s, ";");
+
+ if (is_mba_sc(r))
+ ctrl_val = dom->mbps_val[closid];
+ else
+ ctrl_val = resctrl_arch_get_config(r, dom, closid,
+ schema->conf_type);
+
+ seq_printf(s, schema->fmt_str, dom->hdr.id, ctrl_val);
+ sep = true;
+ }
+ seq_puts(s, "\n");
+}
+
+int rdtgroup_schemata_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct resctrl_schema *schema;
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+ u32 closid;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (rdtgrp) {
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ list_for_each_entry(schema, &resctrl_schema_all, list) {
+ seq_printf(s, "%s:uninitialized\n", schema->name);
+ }
+ } else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ seq_printf(s, "%s:%d=%x\n",
+ rdtgrp->plr->s->res->name,
+ rdtgrp->plr->d->hdr.id,
+ rdtgrp->plr->cbm);
+ }
+ } else {
+ closid = rdtgrp->closid;
+ list_for_each_entry(schema, &resctrl_schema_all, list) {
+ if (closid < schema->num_closid)
+ show_doms(s, schema, closid);
+ }
+ }
+ } else {
+ ret = -ENOENT;
+ }
+ rdtgroup_kn_unlock(of->kn);
+ return ret;
+}
+
+static int smp_mon_event_count(void *arg)
+{
+ mon_event_count(arg);
+
+ return 0;
+}
+
+ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+
+ /* Valid input requires a trailing newline */
+ if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ return -EINVAL;
+ buf[nbytes - 1] = '\0';
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ rdtgroup_kn_unlock(of->kn);
+ return -ENOENT;
+ }
+ rdt_last_cmd_clear();
+
+ if (!strcmp(buf, "mbm_local_bytes")) {
+ if (resctrl_arch_is_mbm_local_enabled())
+ rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
+ else
+ ret = -EINVAL;
+ } else if (!strcmp(buf, "mbm_total_bytes")) {
+ if (resctrl_arch_is_mbm_total_enabled())
+ rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
+ else
+ ret = -EINVAL;
+ } else {
+ ret = -EINVAL;
+ }
+
+ if (ret)
+ rdt_last_cmd_printf("Unsupported event id '%s'\n", buf);
+
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret ?: nbytes;
+}
+
+int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+
+ if (rdtgrp) {
+ switch (rdtgrp->mba_mbps_event) {
+ case QOS_L3_MBM_LOCAL_EVENT_ID:
+ seq_puts(s, "mbm_local_bytes\n");
+ break;
+ case QOS_L3_MBM_TOTAL_EVENT_ID:
+ seq_puts(s, "mbm_total_bytes\n");
+ break;
+ default:
+ pr_warn_once("Bad event %d\n", rdtgrp->mba_mbps_event);
+ ret = -EINVAL;
+ break;
+ }
+ } else {
+ ret = -ENOENT;
+ }
+
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret;
+}
+
+struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
+ struct list_head **pos)
+{
+ struct rdt_domain_hdr *d;
+ struct list_head *l;
+
+ list_for_each(l, h) {
+ d = list_entry(l, struct rdt_domain_hdr, list);
+ /* When id is found, return its domain. */
+ if (id == d->id)
+ return d;
+ /* Stop searching when finding id's position in sorted list. */
+ if (id < d->id)
+ break;
+ }
+
+ if (pos)
+ *pos = l;
+
+ return NULL;
+}
+
+void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
+ struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
+ cpumask_t *cpumask, int evtid, int first)
+{
+ int cpu;
+
+ /* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
+ /*
+ * Setup the parameters to pass to mon_event_count() to read the data.
+ */
+ rr->rgrp = rdtgrp;
+ rr->evtid = evtid;
+ rr->r = r;
+ rr->d = d;
+ rr->first = first;
+ rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
+ if (IS_ERR(rr->arch_mon_ctx)) {
+ rr->err = -EINVAL;
+ return;
+ }
+
+ cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU);
+
+ /*
+ * cpumask_any_housekeeping() prefers housekeeping CPUs, but
+ * are all the CPUs nohz_full? If yes, pick a CPU to IPI.
+ * MPAM's resctrl_arch_rmid_read() is unable to read the
+ * counters on some platforms if its called in IRQ context.
+ */
+ if (tick_nohz_full_cpu(cpu))
+ smp_call_function_any(cpumask, mon_event_count, rr, 1);
+ else
+ smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
+
+ resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx);
+}
+
+int rdtgroup_mondata_show(struct seq_file *m, void *arg)
+{
+ struct kernfs_open_file *of = m->private;
+ enum resctrl_res_level resid;
+ enum resctrl_event_id evtid;
+ struct rdt_domain_hdr *hdr;
+ struct rmid_read rr = {0};
+ struct rdt_mon_domain *d;
+ struct rdtgroup *rdtgrp;
+ struct rdt_resource *r;
+ struct mon_data *md;
+ int domid, ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ md = of->kn->priv;
+ if (WARN_ON_ONCE(!md)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ resid = md->rid;
+ domid = md->domid;
+ evtid = md->evtid;
+ r = resctrl_arch_get_resource(resid);
+
+ if (md->sum) {
+ /*
+ * This file requires summing across all domains that share
+ * the L3 cache id that was provided in the "domid" field of the
+ * struct mon_data. Search all domains in the resource for
+ * one that matches this cache id.
+ */
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ if (d->ci->id == domid) {
+ rr.ci = d->ci;
+ mon_event_read(&rr, r, NULL, rdtgrp,
+ &d->ci->shared_cpu_map, evtid, false);
+ goto checkresult;
+ }
+ }
+ ret = -ENOENT;
+ goto out;
+ } else {
+ /*
+ * This file provides data from a single domain. Search
+ * the resource to find the domain with "domid".
+ */
+ hdr = resctrl_find_domain(&r->mon_domains, domid, NULL);
+ if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) {
+ ret = -ENOENT;
+ goto out;
+ }
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+ mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false);
+ }
+
+checkresult:
+
+ if (rr.err == -EIO)
+ seq_puts(m, "Error\n");
+ else if (rr.err == -EINVAL)
+ seq_puts(m, "Unavailable\n");
+ else
+ seq_printf(m, "%llu\n", rr.val);
+
+out:
+ rdtgroup_kn_unlock(of->kn);
+ return ret;
+}
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
new file mode 100644
index 000000000000..9a8cf6f11151
--- /dev/null
+++ b/fs/resctrl/internal.h
@@ -0,0 +1,426 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _FS_RESCTRL_INTERNAL_H
+#define _FS_RESCTRL_INTERNAL_H
+
+#include <linux/resctrl.h>
+#include <linux/kernfs.h>
+#include <linux/fs_context.h>
+#include <linux/tick.h>
+
+#define CQM_LIMBOCHECK_INTERVAL 1000
+
+/**
+ * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
+ * aren't marked nohz_full
+ * @mask: The mask to pick a CPU from.
+ * @exclude_cpu:The CPU to avoid picking.
+ *
+ * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping
+ * CPUs that don't use nohz_full, these are preferred. Pass
+ * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs.
+ *
+ * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available.
+ */
+static inline unsigned int
+cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
+{
+ unsigned int cpu;
+
+ /* Try to find a CPU that isn't nohz_full to use in preference */
+ if (tick_nohz_full_enabled()) {
+ cpu = cpumask_any_andnot_but(mask, tick_nohz_full_mask, exclude_cpu);
+ if (cpu < nr_cpu_ids)
+ return cpu;
+ }
+
+ return cpumask_any_but(mask, exclude_cpu);
+}
+
+struct rdt_fs_context {
+ struct kernfs_fs_context kfc;
+ bool enable_cdpl2;
+ bool enable_cdpl3;
+ bool enable_mba_mbps;
+ bool enable_debug;
+};
+
+static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
+{
+ struct kernfs_fs_context *kfc = fc->fs_private;
+
+ return container_of(kfc, struct rdt_fs_context, kfc);
+}
+
+/**
+ * struct mon_evt - Entry in the event list of a resource
+ * @evtid: event id
+ * @name: name of the event
+ * @configurable: true if the event is configurable
+ * @list: entry in &rdt_resource->evt_list
+ */
+struct mon_evt {
+ enum resctrl_event_id evtid;
+ char *name;
+ bool configurable;
+ struct list_head list;
+};
+
+/**
+ * struct mon_data - Monitoring details for each event file.
+ * @list: Member of the global @mon_data_kn_priv_list list.
+ * @rid: Resource id associated with the event file.
+ * @evtid: Event id associated with the event file.
+ * @sum: Set when event must be summed across multiple
+ * domains.
+ * @domid: When @sum is zero this is the domain to which
+ * the event file belongs. When @sum is one this
+ * is the id of the L3 cache that all domains to be
+ * summed share.
+ *
+ * Pointed to by the kernfs kn->priv field of monitoring event files.
+ * Readers and writers must hold rdtgroup_mutex.
+ */
+struct mon_data {
+ struct list_head list;
+ enum resctrl_res_level rid;
+ enum resctrl_event_id evtid;
+ int domid;
+ bool sum;
+};
+
+/**
+ * struct rmid_read - Data passed across smp_call*() to read event count.
+ * @rgrp: Resource group for which the counter is being read. If it is a parent
+ * resource group then its event count is summed with the count from all
+ * its child resource groups.
+ * @r: Resource describing the properties of the event being read.
+ * @d: Domain that the counter should be read from. If NULL then sum all
+ * domains in @r sharing L3 @ci.id
+ * @evtid: Which monitor event to read.
+ * @first: Initialize MBM counter when true.
+ * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
+ * @err: Error encountered when reading counter.
+ * @val: Returned value of event counter. If @rgrp is a parent resource group,
+ * @val includes the sum of event counts from its child resource groups.
+ * If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id,
+ * (summed across child resource groups if @rgrp is a parent resource group).
+ * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only).
+ */
+struct rmid_read {
+ struct rdtgroup *rgrp;
+ struct rdt_resource *r;
+ struct rdt_mon_domain *d;
+ enum resctrl_event_id evtid;
+ bool first;
+ struct cacheinfo *ci;
+ int err;
+ u64 val;
+ void *arch_mon_ctx;
+};
+
+extern struct list_head resctrl_schema_all;
+
+extern bool resctrl_mounted;
+
+enum rdt_group_type {
+ RDTCTRL_GROUP = 0,
+ RDTMON_GROUP,
+ RDT_NUM_GROUP,
+};
+
+/**
+ * enum rdtgrp_mode - Mode of a RDT resource group
+ * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
+ * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
+ * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
+ * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
+ * allowed AND the allocations are Cache Pseudo-Locked
+ * @RDT_NUM_MODES: Total number of modes
+ *
+ * The mode of a resource group enables control over the allowed overlap
+ * between allocations associated with different resource groups (classes
+ * of service). User is able to modify the mode of a resource group by
+ * writing to the "mode" resctrl file associated with the resource group.
+ *
+ * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
+ * writing the appropriate text to the "mode" file. A resource group enters
+ * "pseudo-locked" mode after the schemata is written while the resource
+ * group is in "pseudo-locksetup" mode.
+ */
+enum rdtgrp_mode {
+ RDT_MODE_SHAREABLE = 0,
+ RDT_MODE_EXCLUSIVE,
+ RDT_MODE_PSEUDO_LOCKSETUP,
+ RDT_MODE_PSEUDO_LOCKED,
+
+ /* Must be last */
+ RDT_NUM_MODES,
+};
+
+/**
+ * struct mongroup - store mon group's data in resctrl fs.
+ * @mon_data_kn: kernfs node for the mon_data directory
+ * @parent: parent rdtgrp
+ * @crdtgrp_list: child rdtgroup node list
+ * @rmid: rmid for this rdtgroup
+ */
+struct mongroup {
+ struct kernfs_node *mon_data_kn;
+ struct rdtgroup *parent;
+ struct list_head crdtgrp_list;
+ u32 rmid;
+};
+
+/**
+ * struct rdtgroup - store rdtgroup's data in resctrl file system.
+ * @kn: kernfs node
+ * @rdtgroup_list: linked list for all rdtgroups
+ * @closid: closid for this rdtgroup
+ * @cpu_mask: CPUs assigned to this rdtgroup
+ * @flags: status bits
+ * @waitcount: how many cpus expect to find this
+ * group when they acquire rdtgroup_mutex
+ * @type: indicates type of this rdtgroup - either
+ * monitor only or ctrl_mon group
+ * @mon: mongroup related data
+ * @mode: mode of resource group
+ * @mba_mbps_event: input monitoring event id when mba_sc is enabled
+ * @plr: pseudo-locked region
+ */
+struct rdtgroup {
+ struct kernfs_node *kn;
+ struct list_head rdtgroup_list;
+ u32 closid;
+ struct cpumask cpu_mask;
+ int flags;
+ atomic_t waitcount;
+ enum rdt_group_type type;
+ struct mongroup mon;
+ enum rdtgrp_mode mode;
+ enum resctrl_event_id mba_mbps_event;
+ struct pseudo_lock_region *plr;
+};
+
+/* rdtgroup.flags */
+#define RDT_DELETED 1
+
+/* rftype.flags */
+#define RFTYPE_FLAGS_CPUS_LIST 1
+
+/*
+ * Define the file type flags for base and info directories.
+ */
+#define RFTYPE_INFO BIT(0)
+
+#define RFTYPE_BASE BIT(1)
+
+#define RFTYPE_CTRL BIT(4)
+
+#define RFTYPE_MON BIT(5)
+
+#define RFTYPE_TOP BIT(6)
+
+#define RFTYPE_RES_CACHE BIT(8)
+
+#define RFTYPE_RES_MB BIT(9)
+
+#define RFTYPE_DEBUG BIT(10)
+
+#define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL)
+
+#define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON)
+
+#define RFTYPE_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP)
+
+#define RFTYPE_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL)
+
+#define RFTYPE_MON_BASE (RFTYPE_BASE | RFTYPE_MON)
+
+/* List of all resource groups */
+extern struct list_head rdt_all_groups;
+
+extern int max_name_width;
+
+/**
+ * struct rftype - describe each file in the resctrl file system
+ * @name: File name
+ * @mode: Access mode
+ * @kf_ops: File operations
+ * @flags: File specific RFTYPE_FLAGS_* flags
+ * @fflags: File specific RFTYPE_* flags
+ * @seq_show: Show content of the file
+ * @write: Write to the file
+ */
+struct rftype {
+ char *name;
+ umode_t mode;
+ const struct kernfs_ops *kf_ops;
+ unsigned long flags;
+ unsigned long fflags;
+
+ int (*seq_show)(struct kernfs_open_file *of,
+ struct seq_file *sf, void *v);
+ /*
+ * write() is the generic write callback which maps directly to
+ * kernfs write operation and overrides all other operations.
+ * Maximum write size is determined by ->max_write_len.
+ */
+ ssize_t (*write)(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off);
+};
+
+/**
+ * struct mbm_state - status for each MBM counter in each domain
+ * @prev_bw_bytes: Previous bytes value read for bandwidth calculation
+ * @prev_bw: The most recent bandwidth in MBps
+ */
+struct mbm_state {
+ u64 prev_bw_bytes;
+ u32 prev_bw;
+};
+
+extern struct mutex rdtgroup_mutex;
+
+static inline const char *rdt_kn_name(const struct kernfs_node *kn)
+{
+ return rcu_dereference_check(kn->name, lockdep_is_held(&rdtgroup_mutex));
+}
+
+extern struct rdtgroup rdtgroup_default;
+
+extern struct dentry *debugfs_resctrl;
+
+extern enum resctrl_event_id mba_mbps_default_event;
+
+void rdt_last_cmd_clear(void);
+
+void rdt_last_cmd_puts(const char *s);
+
+__printf(1, 2)
+void rdt_last_cmd_printf(const char *fmt, ...);
+
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
+
+void rdtgroup_kn_unlock(struct kernfs_node *kn);
+
+int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
+
+int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
+ umode_t mask);
+
+ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off);
+
+int rdtgroup_schemata_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v);
+
+ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off);
+
+int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v);
+
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
+ unsigned long cbm, int closid, bool exclusive);
+
+unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d,
+ unsigned long cbm);
+
+enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
+
+int rdtgroup_tasks_assigned(struct rdtgroup *r);
+
+int closids_supported(void);
+
+void closid_free(int closid);
+
+int alloc_rmid(u32 closid);
+
+void free_rmid(u32 closid, u32 rmid);
+
+void resctrl_mon_resource_exit(void);
+
+void mon_event_count(void *info);
+
+int rdtgroup_mondata_show(struct seq_file *m, void *arg);
+
+void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
+ struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
+ cpumask_t *cpumask, int evtid, int first);
+
+int resctrl_mon_resource_init(void);
+
+void mbm_setup_overflow_handler(struct rdt_mon_domain *dom,
+ unsigned long delay_ms,
+ int exclude_cpu);
+
+void mbm_handle_overflow(struct work_struct *work);
+
+bool is_mba_sc(struct rdt_resource *r);
+
+void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
+ int exclude_cpu);
+
+void cqm_handle_limbo(struct work_struct *work);
+
+bool has_busy_rmid(struct rdt_mon_domain *d);
+
+void __check_limbo(struct rdt_mon_domain *d, bool force_free);
+
+void resctrl_file_fflags_init(const char *config, unsigned long fflags);
+
+void rdt_staged_configs_clear(void);
+
+bool closid_allocated(unsigned int closid);
+
+int resctrl_find_cleanest_closid(void);
+
+#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK
+int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
+
+int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
+
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm);
+
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d);
+
+int rdt_pseudo_lock_init(void);
+
+void rdt_pseudo_lock_release(void);
+
+int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
+
+void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
+
+#else
+static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm)
+{
+ return false;
+}
+
+static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
+{
+ return false;
+}
+
+static inline int rdt_pseudo_lock_init(void) { return 0; }
+static inline void rdt_pseudo_lock_release(void) { }
+static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { }
+#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */
+
+#endif /* _FS_RESCTRL_INTERNAL_H */
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
new file mode 100644
index 000000000000..bde2801289d3
--- /dev/null
+++ b/fs/resctrl/monitor.c
@@ -0,0 +1,929 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Resource Director Technology(RDT)
+ * - Monitoring code
+ *
+ * Copyright (C) 2017 Intel Corporation
+ *
+ * Author:
+ * Vikas Shivappa <vikas.shivappa@intel.com>
+ *
+ * This replaces the cqm.c based on perf but we reuse a lot of
+ * code and datastructures originally from Peter Zijlstra and Matt Fleming.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#define pr_fmt(fmt) "resctrl: " fmt
+
+#include <linux/cpu.h>
+#include <linux/resctrl.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+#include "internal.h"
+
+#define CREATE_TRACE_POINTS
+
+#include "monitor_trace.h"
+
+/**
+ * struct rmid_entry - dirty tracking for all RMID.
+ * @closid: The CLOSID for this entry.
+ * @rmid: The RMID for this entry.
+ * @busy: The number of domains with cached data using this RMID.
+ * @list: Member of the rmid_free_lru list when busy == 0.
+ *
+ * Depending on the architecture the correct monitor is accessed using
+ * both @closid and @rmid, or @rmid only.
+ *
+ * Take the rdtgroup_mutex when accessing.
+ */
+struct rmid_entry {
+ u32 closid;
+ u32 rmid;
+ int busy;
+ struct list_head list;
+};
+
+/*
+ * @rmid_free_lru - A least recently used list of free RMIDs
+ * These RMIDs are guaranteed to have an occupancy less than the
+ * threshold occupancy
+ */
+static LIST_HEAD(rmid_free_lru);
+
+/*
+ * @closid_num_dirty_rmid The number of dirty RMID each CLOSID has.
+ * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
+ * Indexed by CLOSID. Protected by rdtgroup_mutex.
+ */
+static u32 *closid_num_dirty_rmid;
+
+/*
+ * @rmid_limbo_count - count of currently unused but (potentially)
+ * dirty RMIDs.
+ * This counts RMIDs that no one is currently using but that
+ * may have a occupancy value > resctrl_rmid_realloc_threshold. User can
+ * change the threshold occupancy value.
+ */
+static unsigned int rmid_limbo_count;
+
+/*
+ * @rmid_entry - The entry in the limbo and free lists.
+ */
+static struct rmid_entry *rmid_ptrs;
+
+/*
+ * This is the threshold cache occupancy in bytes at which we will consider an
+ * RMID available for re-allocation.
+ */
+unsigned int resctrl_rmid_realloc_threshold;
+
+/*
+ * This is the maximum value for the reallocation threshold, in bytes.
+ */
+unsigned int resctrl_rmid_realloc_limit;
+
+/*
+ * x86 and arm64 differ in their handling of monitoring.
+ * x86's RMID are independent numbers, there is only one source of traffic
+ * with an RMID value of '1'.
+ * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
+ * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
+ * value is no longer unique.
+ * To account for this, resctrl uses an index. On x86 this is just the RMID,
+ * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
+ *
+ * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
+ * must accept an attempt to read every index.
+ */
+static inline struct rmid_entry *__rmid_entry(u32 idx)
+{
+ struct rmid_entry *entry;
+ u32 closid, rmid;
+
+ entry = &rmid_ptrs[idx];
+ resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
+
+ WARN_ON_ONCE(entry->closid != closid);
+ WARN_ON_ONCE(entry->rmid != rmid);
+
+ return entry;
+}
+
+static void limbo_release_entry(struct rmid_entry *entry)
+{
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ rmid_limbo_count--;
+ list_add_tail(&entry->list, &rmid_free_lru);
+
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
+ closid_num_dirty_rmid[entry->closid]--;
+}
+
+/*
+ * Check the RMIDs that are marked as busy for this domain. If the
+ * reported LLC occupancy is below the threshold clear the busy bit and
+ * decrement the count. If the busy count gets to zero on an RMID, we
+ * free the RMID
+ */
+void __check_limbo(struct rdt_mon_domain *d, bool force_free)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+ struct rmid_entry *entry;
+ u32 idx, cur_idx = 1;
+ void *arch_mon_ctx;
+ bool rmid_dirty;
+ u64 val = 0;
+
+ arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
+ if (IS_ERR(arch_mon_ctx)) {
+ pr_warn_ratelimited("Failed to allocate monitor context: %ld",
+ PTR_ERR(arch_mon_ctx));
+ return;
+ }
+
+ /*
+ * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
+ * are marked as busy for occupancy < threshold. If the occupancy
+ * is less than the threshold decrement the busy counter of the
+ * RMID and move it to the free list when the counter reaches 0.
+ */
+ for (;;) {
+ idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
+ if (idx >= idx_limit)
+ break;
+
+ entry = __rmid_entry(idx);
+ if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
+ QOS_L3_OCCUP_EVENT_ID, &val,
+ arch_mon_ctx)) {
+ rmid_dirty = true;
+ } else {
+ rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
+
+ /*
+ * x86's CLOSID and RMID are independent numbers, so the entry's
+ * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the
+ * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't
+ * used to select the configuration. It is thus necessary to track both
+ * CLOSID and RMID because there may be dependencies between them
+ * on some architectures.
+ */
+ trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val);
+ }
+
+ if (force_free || !rmid_dirty) {
+ clear_bit(idx, d->rmid_busy_llc);
+ if (!--entry->busy)
+ limbo_release_entry(entry);
+ }
+ cur_idx = idx + 1;
+ }
+
+ resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
+}
+
+bool has_busy_rmid(struct rdt_mon_domain *d)
+{
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+
+ return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
+}
+
+static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
+{
+ struct rmid_entry *itr;
+ u32 itr_idx, cmp_idx;
+
+ if (list_empty(&rmid_free_lru))
+ return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
+
+ list_for_each_entry(itr, &rmid_free_lru, list) {
+ /*
+ * Get the index of this free RMID, and the index it would need
+ * to be if it were used with this CLOSID.
+ * If the CLOSID is irrelevant on this architecture, the two
+ * index values are always the same on every entry and thus the
+ * very first entry will be returned.
+ */
+ itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
+ cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
+
+ if (itr_idx == cmp_idx)
+ return itr;
+ }
+
+ return ERR_PTR(-ENOSPC);
+}
+
+/**
+ * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
+ * RMID are clean, or the CLOSID that has
+ * the most clean RMID.
+ *
+ * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
+ * may not be able to allocate clean RMID. To avoid this the allocator will
+ * choose the CLOSID with the most clean RMID.
+ *
+ * When the CLOSID and RMID are independent numbers, the first free CLOSID will
+ * be returned.
+ */
+int resctrl_find_cleanest_closid(void)
+{
+ u32 cleanest_closid = ~0;
+ int i = 0;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
+ return -EIO;
+
+ for (i = 0; i < closids_supported(); i++) {
+ int num_dirty;
+
+ if (closid_allocated(i))
+ continue;
+
+ num_dirty = closid_num_dirty_rmid[i];
+ if (num_dirty == 0)
+ return i;
+
+ if (cleanest_closid == ~0)
+ cleanest_closid = i;
+
+ if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
+ cleanest_closid = i;
+ }
+
+ if (cleanest_closid == ~0)
+ return -ENOSPC;
+
+ return cleanest_closid;
+}
+
+/*
+ * For MPAM the RMID value is not unique, and has to be considered with
+ * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
+ * allows all domains to be managed by a single free list.
+ * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
+ */
+int alloc_rmid(u32 closid)
+{
+ struct rmid_entry *entry;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ entry = resctrl_find_free_rmid(closid);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+
+ list_del(&entry->list);
+ return entry->rmid;
+}
+
+static void add_rmid_to_limbo(struct rmid_entry *entry)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ struct rdt_mon_domain *d;
+ u32 idx;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
+ idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
+
+ entry->busy = 0;
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ /*
+ * For the first limbo RMID in the domain,
+ * setup up the limbo worker.
+ */
+ if (!has_busy_rmid(d))
+ cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
+ RESCTRL_PICK_ANY_CPU);
+ set_bit(idx, d->rmid_busy_llc);
+ entry->busy++;
+ }
+
+ rmid_limbo_count++;
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
+ closid_num_dirty_rmid[entry->closid]++;
+}
+
+void free_rmid(u32 closid, u32 rmid)
+{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ struct rmid_entry *entry;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ /*
+ * Do not allow the default rmid to be free'd. Comparing by index
+ * allows architectures that ignore the closid parameter to avoid an
+ * unnecessary check.
+ */
+ if (!resctrl_arch_mon_capable() ||
+ idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
+ RESCTRL_RESERVED_RMID))
+ return;
+
+ entry = __rmid_entry(idx);
+
+ if (resctrl_arch_is_llc_occupancy_enabled())
+ add_rmid_to_limbo(entry);
+ else
+ list_add_tail(&entry->list, &rmid_free_lru);
+}
+
+static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
+ u32 rmid, enum resctrl_event_id evtid)
+{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+
+ switch (evtid) {
+ case QOS_L3_MBM_TOTAL_EVENT_ID:
+ return &d->mbm_total[idx];
+ case QOS_L3_MBM_LOCAL_EVENT_ID:
+ return &d->mbm_local[idx];
+ default:
+ return NULL;
+ }
+}
+
+static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
+{
+ int cpu = smp_processor_id();
+ struct rdt_mon_domain *d;
+ struct mbm_state *m;
+ int err, ret;
+ u64 tval = 0;
+
+ if (rr->first) {
+ resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
+ m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
+ if (m)
+ memset(m, 0, sizeof(struct mbm_state));
+ return 0;
+ }
+
+ if (rr->d) {
+ /* Reading a single domain, must be on a CPU in that domain. */
+ if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
+ return -EINVAL;
+ rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
+ rr->evtid, &tval, rr->arch_mon_ctx);
+ if (rr->err)
+ return rr->err;
+
+ rr->val += tval;
+
+ return 0;
+ }
+
+ /* Summing domains that share a cache, must be on a CPU for that cache. */
+ if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
+ return -EINVAL;
+
+ /*
+ * Legacy files must report the sum of an event across all
+ * domains that share the same L3 cache instance.
+ * Report success if a read from any domain succeeds, -EINVAL
+ * (translated to "Unavailable" for user space) if reading from
+ * all domains fail for any reason.
+ */
+ ret = -EINVAL;
+ list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
+ if (d->ci->id != rr->ci->id)
+ continue;
+ err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
+ rr->evtid, &tval, rr->arch_mon_ctx);
+ if (!err) {
+ rr->val += tval;
+ ret = 0;
+ }
+ }
+
+ if (ret)
+ rr->err = ret;
+
+ return ret;
+}
+
+/*
+ * mbm_bw_count() - Update bw count from values previously read by
+ * __mon_event_count().
+ * @closid: The closid used to identify the cached mbm_state.
+ * @rmid: The rmid used to identify the cached mbm_state.
+ * @rr: The struct rmid_read populated by __mon_event_count().
+ *
+ * Supporting function to calculate the memory bandwidth
+ * and delta bandwidth in MBps. The chunks value previously read by
+ * __mon_event_count() is compared with the chunks value from the previous
+ * invocation. This must be called once per second to maintain values in MBps.
+ */
+static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
+{
+ u64 cur_bw, bytes, cur_bytes;
+ struct mbm_state *m;
+
+ m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
+ if (WARN_ON_ONCE(!m))
+ return;
+
+ cur_bytes = rr->val;
+ bytes = cur_bytes - m->prev_bw_bytes;
+ m->prev_bw_bytes = cur_bytes;
+
+ cur_bw = bytes / SZ_1M;
+
+ m->prev_bw = cur_bw;
+}
+
+/*
+ * This is scheduled by mon_event_read() to read the CQM/MBM counters
+ * on a domain.
+ */
+void mon_event_count(void *info)
+{
+ struct rdtgroup *rdtgrp, *entry;
+ struct rmid_read *rr = info;
+ struct list_head *head;
+ int ret;
+
+ rdtgrp = rr->rgrp;
+
+ ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);
+
+ /*
+ * For Ctrl groups read data from child monitor groups and
+ * add them together. Count events which are read successfully.
+ * Discard the rmid_read's reporting errors.
+ */
+ head = &rdtgrp->mon.crdtgrp_list;
+
+ if (rdtgrp->type == RDTCTRL_GROUP) {
+ list_for_each_entry(entry, head, mon.crdtgrp_list) {
+ if (__mon_event_count(entry->closid, entry->mon.rmid,
+ rr) == 0)
+ ret = 0;
+ }
+ }
+
+ /*
+ * __mon_event_count() calls for newly created monitor groups may
+ * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
+ * Discard error if any of the monitor event reads succeeded.
+ */
+ if (ret == 0)
+ rr->err = 0;
+}
+
+static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu,
+ struct rdt_resource *r)
+{
+ struct rdt_ctrl_domain *d;
+
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ /* Find the domain that contains this CPU */
+ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
+ return d;
+ }
+
+ return NULL;
+}
+
+/*
+ * Feedback loop for MBA software controller (mba_sc)
+ *
+ * mba_sc is a feedback loop where we periodically read MBM counters and
+ * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
+ * that:
+ *
+ * current bandwidth(cur_bw) < user specified bandwidth(user_bw)
+ *
+ * This uses the MBM counters to measure the bandwidth and MBA throttle
+ * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
+ * fact that resctrl rdtgroups have both monitoring and control.
+ *
+ * The frequency of the checks is 1s and we just tag along the MBM overflow
+ * timer. Having 1s interval makes the calculation of bandwidth simpler.
+ *
+ * Although MBA's goal is to restrict the bandwidth to a maximum, there may
+ * be a need to increase the bandwidth to avoid unnecessarily restricting
+ * the L2 <-> L3 traffic.
+ *
+ * Since MBA controls the L2 external bandwidth where as MBM measures the
+ * L3 external bandwidth the following sequence could lead to such a
+ * situation.
+ *
+ * Consider an rdtgroup which had high L3 <-> memory traffic in initial
+ * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
+ * after some time rdtgroup has mostly L2 <-> L3 traffic.
+ *
+ * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
+ * throttle MSRs already have low percentage values. To avoid
+ * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
+ */
+static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
+{
+ u32 closid, rmid, cur_msr_val, new_msr_val;
+ struct mbm_state *pmbm_data, *cmbm_data;
+ struct rdt_ctrl_domain *dom_mba;
+ enum resctrl_event_id evt_id;
+ struct rdt_resource *r_mba;
+ struct list_head *head;
+ struct rdtgroup *entry;
+ u32 cur_bw, user_bw;
+
+ r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
+ evt_id = rgrp->mba_mbps_event;
+
+ closid = rgrp->closid;
+ rmid = rgrp->mon.rmid;
+ pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
+ if (WARN_ON_ONCE(!pmbm_data))
+ return;
+
+ dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
+ if (!dom_mba) {
+ pr_warn_once("Failure to get domain for MBA update\n");
+ return;
+ }
+
+ cur_bw = pmbm_data->prev_bw;
+ user_bw = dom_mba->mbps_val[closid];
+
+ /* MBA resource doesn't support CDP */
+ cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
+
+ /*
+ * For Ctrl groups read data from child monitor groups.
+ */
+ head = &rgrp->mon.crdtgrp_list;
+ list_for_each_entry(entry, head, mon.crdtgrp_list) {
+ cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
+ if (WARN_ON_ONCE(!cmbm_data))
+ return;
+ cur_bw += cmbm_data->prev_bw;
+ }
+
+ /*
+ * Scale up/down the bandwidth linearly for the ctrl group. The
+ * bandwidth step is the bandwidth granularity specified by the
+ * hardware.
+ * Always increase throttling if current bandwidth is above the
+ * target set by user.
+ * But avoid thrashing up and down on every poll by checking
+ * whether a decrease in throttling is likely to push the group
+ * back over target. E.g. if currently throttling to 30% of bandwidth
+ * on a system with 10% granularity steps, check whether moving to
+ * 40% would go past the limit by multiplying current bandwidth by
+ * "(30 + 10) / 30".
+ */
+ if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
+ new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
+ } else if (cur_msr_val < MAX_MBA_BW &&
+ (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
+ new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
+ } else {
+ return;
+ }
+
+ resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
+}
+
+static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 closid, u32 rmid, enum resctrl_event_id evtid)
+{
+ struct rmid_read rr = {0};
+
+ rr.r = r;
+ rr.d = d;
+ rr.evtid = evtid;
+ rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+ if (IS_ERR(rr.arch_mon_ctx)) {
+ pr_warn_ratelimited("Failed to allocate monitor context: %ld",
+ PTR_ERR(rr.arch_mon_ctx));
+ return;
+ }
+
+ __mon_event_count(closid, rmid, &rr);
+
+ /*
+ * If the software controller is enabled, compute the
+ * bandwidth for this event id.
+ */
+ if (is_mba_sc(NULL))
+ mbm_bw_count(closid, rmid, &rr);
+
+ resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
+}
+
+static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 closid, u32 rmid)
+{
+ /*
+ * This is protected from concurrent reads from user as both
+ * the user and overflow handler hold the global mutex.
+ */
+ if (resctrl_arch_is_mbm_total_enabled())
+ mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID);
+
+ if (resctrl_arch_is_mbm_local_enabled())
+ mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID);
+}
+
+/*
+ * Handler to scan the limbo list and move the RMIDs
+ * to free list whose occupancy < threshold_occupancy.
+ */
+void cqm_handle_limbo(struct work_struct *work)
+{
+ unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
+ struct rdt_mon_domain *d;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ d = container_of(work, struct rdt_mon_domain, cqm_limbo.work);
+
+ __check_limbo(d, false);
+
+ if (has_busy_rmid(d)) {
+ d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
+ RESCTRL_PICK_ANY_CPU);
+ schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
+ delay);
+ }
+
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+}
+
+/**
+ * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
+ * domain.
+ * @dom: The domain the limbo handler should run for.
+ * @delay_ms: How far in the future the handler should run.
+ * @exclude_cpu: Which CPU the handler should not run on,
+ * RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
+ int exclude_cpu)
+{
+ unsigned long delay = msecs_to_jiffies(delay_ms);
+ int cpu;
+
+ cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
+ dom->cqm_work_cpu = cpu;
+
+ if (cpu < nr_cpu_ids)
+ schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+}
+
+void mbm_handle_overflow(struct work_struct *work)
+{
+ unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
+ struct rdtgroup *prgrp, *crgrp;
+ struct rdt_mon_domain *d;
+ struct list_head *head;
+ struct rdt_resource *r;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ /*
+ * If the filesystem has been unmounted this work no longer needs to
+ * run.
+ */
+ if (!resctrl_mounted || !resctrl_arch_mon_capable())
+ goto out_unlock;
+
+ r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ d = container_of(work, struct rdt_mon_domain, mbm_over.work);
+
+ list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+ mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
+
+ head = &prgrp->mon.crdtgrp_list;
+ list_for_each_entry(crgrp, head, mon.crdtgrp_list)
+ mbm_update(r, d, crgrp->closid, crgrp->mon.rmid);
+
+ if (is_mba_sc(NULL))
+ update_mba_bw(prgrp, d);
+ }
+
+ /*
+ * Re-check for housekeeping CPUs. This allows the overflow handler to
+ * move off a nohz_full CPU quickly.
+ */
+ d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
+ RESCTRL_PICK_ANY_CPU);
+ schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+}
+
+/**
+ * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
+ * domain.
+ * @dom: The domain the overflow handler should run for.
+ * @delay_ms: How far in the future the handler should run.
+ * @exclude_cpu: Which CPU the handler should not run on,
+ * RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
+ int exclude_cpu)
+{
+ unsigned long delay = msecs_to_jiffies(delay_ms);
+ int cpu;
+
+ /*
+ * When a domain comes online there is no guarantee the filesystem is
+ * mounted. If not, there is no need to catch counter overflow.
+ */
+ if (!resctrl_mounted || !resctrl_arch_mon_capable())
+ return;
+ cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
+ dom->mbm_work_cpu = cpu;
+
+ if (cpu < nr_cpu_ids)
+ schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
+}
+
+static int dom_data_init(struct rdt_resource *r)
+{
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+ u32 num_closid = resctrl_arch_get_num_closid(r);
+ struct rmid_entry *entry = NULL;
+ int err = 0, i;
+ u32 idx;
+
+ mutex_lock(&rdtgroup_mutex);
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+ u32 *tmp;
+
+ /*
+ * If the architecture hasn't provided a sanitised value here,
+ * this may result in larger arrays than necessary. Resctrl will
+ * use a smaller system wide value based on the resources in
+ * use.
+ */
+ tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ closid_num_dirty_rmid = tmp;
+ }
+
+ rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
+ if (!rmid_ptrs) {
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+ kfree(closid_num_dirty_rmid);
+ closid_num_dirty_rmid = NULL;
+ }
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ for (i = 0; i < idx_limit; i++) {
+ entry = &rmid_ptrs[i];
+ INIT_LIST_HEAD(&entry->list);
+
+ resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
+ list_add_tail(&entry->list, &rmid_free_lru);
+ }
+
+ /*
+ * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
+ * are always allocated. These are used for the rdtgroup_default
+ * control group, which will be setup later in resctrl_init().
+ */
+ idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
+ RESCTRL_RESERVED_RMID);
+ entry = __rmid_entry(idx);
+ list_del(&entry->list);
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+
+ return err;
+}
+
+static void dom_data_exit(struct rdt_resource *r)
+{
+ mutex_lock(&rdtgroup_mutex);
+
+ if (!r->mon_capable)
+ goto out_unlock;
+
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+ kfree(closid_num_dirty_rmid);
+ closid_num_dirty_rmid = NULL;
+ }
+
+ kfree(rmid_ptrs);
+ rmid_ptrs = NULL;
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+static struct mon_evt llc_occupancy_event = {
+ .name = "llc_occupancy",
+ .evtid = QOS_L3_OCCUP_EVENT_ID,
+};
+
+static struct mon_evt mbm_total_event = {
+ .name = "mbm_total_bytes",
+ .evtid = QOS_L3_MBM_TOTAL_EVENT_ID,
+};
+
+static struct mon_evt mbm_local_event = {
+ .name = "mbm_local_bytes",
+ .evtid = QOS_L3_MBM_LOCAL_EVENT_ID,
+};
+
+/*
+ * Initialize the event list for the resource.
+ *
+ * Note that MBM events are also part of RDT_RESOURCE_L3 resource
+ * because as per the SDM the total and local memory bandwidth
+ * are enumerated as part of L3 monitoring.
+ */
+static void l3_mon_evt_init(struct rdt_resource *r)
+{
+ INIT_LIST_HEAD(&r->evt_list);
+
+ if (resctrl_arch_is_llc_occupancy_enabled())
+ list_add_tail(&llc_occupancy_event.list, &r->evt_list);
+ if (resctrl_arch_is_mbm_total_enabled())
+ list_add_tail(&mbm_total_event.list, &r->evt_list);
+ if (resctrl_arch_is_mbm_local_enabled())
+ list_add_tail(&mbm_local_event.list, &r->evt_list);
+}
+
+/**
+ * resctrl_mon_resource_init() - Initialise global monitoring structures.
+ *
+ * Allocate and initialise global monitor resources that do not belong to a
+ * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists.
+ * Called once during boot after the struct rdt_resource's have been configured
+ * but before the filesystem is mounted.
+ * Resctrl's cpuhp callbacks may be called before this point to bring a domain
+ * online.
+ *
+ * Returns 0 for success, or -ENOMEM.
+ */
+int resctrl_mon_resource_init(void)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ int ret;
+
+ if (!r->mon_capable)
+ return 0;
+
+ ret = dom_data_init(r);
+ if (ret)
+ return ret;
+
+ l3_mon_evt_init(r);
+
+ if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
+ mbm_total_event.configurable = true;
+ resctrl_file_fflags_init("mbm_total_bytes_config",
+ RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
+ }
+ if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
+ mbm_local_event.configurable = true;
+ resctrl_file_fflags_init("mbm_local_bytes_config",
+ RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
+ }
+
+ if (resctrl_arch_is_mbm_local_enabled())
+ mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
+ else if (resctrl_arch_is_mbm_total_enabled())
+ mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
+
+ return 0;
+}
+
+void resctrl_mon_resource_exit(void)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+
+ dom_data_exit(r);
+}
diff --git a/fs/resctrl/monitor_trace.h b/fs/resctrl/monitor_trace.h
new file mode 100644
index 000000000000..fdf49f22576a
--- /dev/null
+++ b/fs/resctrl/monitor_trace.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM resctrl
+
+#if !defined(_FS_RESCTRL_MONITOR_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _FS_RESCTRL_MONITOR_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mon_llc_occupancy_limbo,
+ TP_PROTO(u32 ctrl_hw_id, u32 mon_hw_id, int domain_id, u64 llc_occupancy_bytes),
+ TP_ARGS(ctrl_hw_id, mon_hw_id, domain_id, llc_occupancy_bytes),
+ TP_STRUCT__entry(__field(u32, ctrl_hw_id)
+ __field(u32, mon_hw_id)
+ __field(int, domain_id)
+ __field(u64, llc_occupancy_bytes)),
+ TP_fast_assign(__entry->ctrl_hw_id = ctrl_hw_id;
+ __entry->mon_hw_id = mon_hw_id;
+ __entry->domain_id = domain_id;
+ __entry->llc_occupancy_bytes = llc_occupancy_bytes;),
+ TP_printk("ctrl_hw_id=%u mon_hw_id=%u domain_id=%d llc_occupancy_bytes=%llu",
+ __entry->ctrl_hw_id, __entry->mon_hw_id, __entry->domain_id,
+ __entry->llc_occupancy_bytes)
+ );
+
+#endif /* _FS_RESCTRL_MONITOR_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#define TRACE_INCLUDE_FILE monitor_trace
+
+#include <trace/define_trace.h>
diff --git a/fs/resctrl/pseudo_lock.c b/fs/resctrl/pseudo_lock.c
new file mode 100644
index 000000000000..ccc2f9213b4b
--- /dev/null
+++ b/fs/resctrl/pseudo_lock.c
@@ -0,0 +1,1105 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Resource Director Technology (RDT)
+ *
+ * Pseudo-locking support built on top of Cache Allocation Technology (CAT)
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Author: Reinette Chatre <reinette.chatre@intel.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cacheinfo.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/debugfs.h>
+#include <linux/kthread.h>
+#include <linux/mman.h>
+#include <linux/pm_qos.h>
+#include <linux/resctrl.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "internal.h"
+
+/*
+ * Major number assigned to and shared by all devices exposing
+ * pseudo-locked regions.
+ */
+static unsigned int pseudo_lock_major;
+
+static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
+
+static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode)
+{
+ const struct rdtgroup *rdtgrp;
+
+ rdtgrp = dev_get_drvdata(dev);
+ if (mode)
+ *mode = 0600;
+ guard(mutex)(&rdtgroup_mutex);
+ return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdt_kn_name(rdtgrp->kn));
+}
+
+static const struct class pseudo_lock_class = {
+ .name = "pseudo_lock",
+ .devnode = pseudo_lock_devnode,
+};
+
+/**
+ * pseudo_lock_minor_get - Obtain available minor number
+ * @minor: Pointer to where new minor number will be stored
+ *
+ * A bitmask is used to track available minor numbers. Here the next free
+ * minor number is marked as unavailable and returned.
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int pseudo_lock_minor_get(unsigned int *minor)
+{
+ unsigned long first_bit;
+
+ first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS);
+
+ if (first_bit == MINORBITS)
+ return -ENOSPC;
+
+ __clear_bit(first_bit, &pseudo_lock_minor_avail);
+ *minor = first_bit;
+
+ return 0;
+}
+
+/**
+ * pseudo_lock_minor_release - Return minor number to available
+ * @minor: The minor number made available
+ */
+static void pseudo_lock_minor_release(unsigned int minor)
+{
+ __set_bit(minor, &pseudo_lock_minor_avail);
+}
+
+/**
+ * region_find_by_minor - Locate a pseudo-lock region by inode minor number
+ * @minor: The minor number of the device representing pseudo-locked region
+ *
+ * When the character device is accessed we need to determine which
+ * pseudo-locked region it belongs to. This is done by matching the minor
+ * number of the device to the pseudo-locked region it belongs.
+ *
+ * Minor numbers are assigned at the time a pseudo-locked region is associated
+ * with a cache instance.
+ *
+ * Return: On success return pointer to resource group owning the pseudo-locked
+ * region, NULL on failure.
+ */
+static struct rdtgroup *region_find_by_minor(unsigned int minor)
+{
+ struct rdtgroup *rdtgrp, *rdtgrp_match = NULL;
+
+ list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
+ if (rdtgrp->plr && rdtgrp->plr->minor == minor) {
+ rdtgrp_match = rdtgrp;
+ break;
+ }
+ }
+ return rdtgrp_match;
+}
+
+/**
+ * struct pseudo_lock_pm_req - A power management QoS request list entry
+ * @list: Entry within the @pm_reqs list for a pseudo-locked region
+ * @req: PM QoS request
+ */
+struct pseudo_lock_pm_req {
+ struct list_head list;
+ struct dev_pm_qos_request req;
+};
+
+static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
+{
+ struct pseudo_lock_pm_req *pm_req, *next;
+
+ list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
+ dev_pm_qos_remove_request(&pm_req->req);
+ list_del(&pm_req->list);
+ kfree(pm_req);
+ }
+}
+
+/**
+ * pseudo_lock_cstates_constrain - Restrict cores from entering C6
+ * @plr: Pseudo-locked region
+ *
+ * To prevent the cache from being affected by power management entering
+ * C6 has to be avoided. This is accomplished by requesting a latency
+ * requirement lower than lowest C6 exit latency of all supported
+ * platforms as found in the cpuidle state tables in the intel_idle driver.
+ * At this time it is possible to do so with a single latency requirement
+ * for all supported platforms.
+ *
+ * Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
+ * the ACPI latencies need to be considered while keeping in mind that C2
+ * may be set to map to deeper sleep states. In this case the latency
+ * requirement needs to prevent entering C2 also.
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
+{
+ struct pseudo_lock_pm_req *pm_req;
+ int cpu;
+ int ret;
+
+ for_each_cpu(cpu, &plr->d->hdr.cpu_mask) {
+ pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
+ if (!pm_req) {
+ rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ ret = dev_pm_qos_add_request(get_cpu_device(cpu),
+ &pm_req->req,
+ DEV_PM_QOS_RESUME_LATENCY,
+ 30);
+ if (ret < 0) {
+ rdt_last_cmd_printf("Failed to add latency req CPU%d\n",
+ cpu);
+ kfree(pm_req);
+ ret = -1;
+ goto out_err;
+ }
+ list_add(&pm_req->list, &plr->pm_reqs);
+ }
+
+ return 0;
+
+out_err:
+ pseudo_lock_cstates_relax(plr);
+ return ret;
+}
+
+/**
+ * pseudo_lock_region_clear - Reset pseudo-lock region data
+ * @plr: pseudo-lock region
+ *
+ * All content of the pseudo-locked region is reset - any memory allocated
+ * freed.
+ *
+ * Return: void
+ */
+static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
+{
+ plr->size = 0;
+ plr->line_size = 0;
+ kfree(plr->kmem);
+ plr->kmem = NULL;
+ plr->s = NULL;
+ if (plr->d)
+ plr->d->plr = NULL;
+ plr->d = NULL;
+ plr->cbm = 0;
+ plr->debugfs_dir = NULL;
+}
+
+/**
+ * pseudo_lock_region_init - Initialize pseudo-lock region information
+ * @plr: pseudo-lock region
+ *
+ * Called after user provided a schemata to be pseudo-locked. From the
+ * schemata the &struct pseudo_lock_region is on entry already initialized
+ * with the resource, domain, and capacity bitmask. Here the information
+ * required for pseudo-locking is deduced from this data and &struct
+ * pseudo_lock_region initialized further. This information includes:
+ * - size in bytes of the region to be pseudo-locked
+ * - cache line size to know the stride with which data needs to be accessed
+ * to be pseudo-locked
+ * - a cpu associated with the cache instance on which the pseudo-locking
+ * flow can be executed
+ *
+ * Return: 0 on success, <0 on failure. Descriptive error will be written
+ * to last_cmd_status buffer.
+ */
+static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
+{
+ enum resctrl_scope scope = plr->s->res->ctrl_scope;
+ struct cacheinfo *ci;
+ int ret;
+
+ if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE))
+ return -ENODEV;
+
+ /* Pick the first cpu we find that is associated with the cache. */
+ plr->cpu = cpumask_first(&plr->d->hdr.cpu_mask);
+
+ if (!cpu_online(plr->cpu)) {
+ rdt_last_cmd_printf("CPU %u associated with cache not online\n",
+ plr->cpu);
+ ret = -ENODEV;
+ goto out_region;
+ }
+
+ ci = get_cpu_cacheinfo_level(plr->cpu, scope);
+ if (ci) {
+ plr->line_size = ci->coherency_line_size;
+ plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
+ return 0;
+ }
+
+ ret = -1;
+ rdt_last_cmd_puts("Unable to determine cache line size\n");
+out_region:
+ pseudo_lock_region_clear(plr);
+ return ret;
+}
+
+/**
+ * pseudo_lock_init - Initialize a pseudo-lock region
+ * @rdtgrp: resource group to which new pseudo-locked region will belong
+ *
+ * A pseudo-locked region is associated with a resource group. When this
+ * association is created the pseudo-locked region is initialized. The
+ * details of the pseudo-locked region are not known at this time so only
+ * allocation is done and association established.
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static int pseudo_lock_init(struct rdtgroup *rdtgrp)
+{
+ struct pseudo_lock_region *plr;
+
+ plr = kzalloc(sizeof(*plr), GFP_KERNEL);
+ if (!plr)
+ return -ENOMEM;
+
+ init_waitqueue_head(&plr->lock_thread_wq);
+ INIT_LIST_HEAD(&plr->pm_reqs);
+ rdtgrp->plr = plr;
+ return 0;
+}
+
+/**
+ * pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked
+ * @plr: pseudo-lock region
+ *
+ * Initialize the details required to set up the pseudo-locked region and
+ * allocate the contiguous memory that will be pseudo-locked to the cache.
+ *
+ * Return: 0 on success, <0 on failure. Descriptive error will be written
+ * to last_cmd_status buffer.
+ */
+static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
+{
+ int ret;
+
+ ret = pseudo_lock_region_init(plr);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * We do not yet support contiguous regions larger than
+ * KMALLOC_MAX_SIZE.
+ */
+ if (plr->size > KMALLOC_MAX_SIZE) {
+ rdt_last_cmd_puts("Requested region exceeds maximum size\n");
+ ret = -E2BIG;
+ goto out_region;
+ }
+
+ plr->kmem = kzalloc(plr->size, GFP_KERNEL);
+ if (!plr->kmem) {
+ rdt_last_cmd_puts("Unable to allocate memory\n");
+ ret = -ENOMEM;
+ goto out_region;
+ }
+
+ ret = 0;
+ goto out;
+out_region:
+ pseudo_lock_region_clear(plr);
+out:
+ return ret;
+}
+
+/**
+ * pseudo_lock_free - Free a pseudo-locked region
+ * @rdtgrp: resource group to which pseudo-locked region belonged
+ *
+ * The pseudo-locked region's resources have already been released, or not
+ * yet created at this point. Now it can be freed and disassociated from the
+ * resource group.
+ *
+ * Return: void
+ */
+static void pseudo_lock_free(struct rdtgroup *rdtgrp)
+{
+ pseudo_lock_region_clear(rdtgrp->plr);
+ kfree(rdtgrp->plr);
+ rdtgrp->plr = NULL;
+}
+
+/**
+ * rdtgroup_monitor_in_progress - Test if monitoring in progress
+ * @rdtgrp: resource group being queried
+ *
+ * Return: 1 if monitor groups have been created for this resource
+ * group, 0 otherwise.
+ */
+static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp)
+{
+ return !list_empty(&rdtgrp->mon.crdtgrp_list);
+}
+
+/**
+ * rdtgroup_locksetup_user_restrict - Restrict user access to group
+ * @rdtgrp: resource group needing access restricted
+ *
+ * A resource group used for cache pseudo-locking cannot have cpus or tasks
+ * assigned to it. This is communicated to the user by restricting access
+ * to all the files that can be used to make such changes.
+ *
+ * Permissions restored with rdtgroup_locksetup_user_restore()
+ *
+ * Return: 0 on success, <0 on failure. If a failure occurs during the
+ * restriction of access an attempt will be made to restore permissions but
+ * the state of the mode of these files will be uncertain when a failure
+ * occurs.
+ */
+static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
+{
+ int ret;
+
+ ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
+ if (ret)
+ return ret;
+
+ ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
+ if (ret)
+ goto err_tasks;
+
+ ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
+ if (ret)
+ goto err_cpus;
+
+ if (resctrl_arch_mon_capable()) {
+ ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups");
+ if (ret)
+ goto err_cpus_list;
+ }
+
+ ret = 0;
+ goto out;
+
+err_cpus_list:
+ rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
+err_cpus:
+ rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
+err_tasks:
+ rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
+out:
+ return ret;
+}
+
+/**
+ * rdtgroup_locksetup_user_restore - Restore user access to group
+ * @rdtgrp: resource group needing access restored
+ *
+ * Restore all file access previously removed using
+ * rdtgroup_locksetup_user_restrict()
+ *
+ * Return: 0 on success, <0 on failure. If a failure occurs during the
+ * restoration of access an attempt will be made to restrict permissions
+ * again but the state of the mode of these files will be uncertain when
+ * a failure occurs.
+ */
+static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
+{
+ int ret;
+
+ ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
+ if (ret)
+ return ret;
+
+ ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
+ if (ret)
+ goto err_tasks;
+
+ ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
+ if (ret)
+ goto err_cpus;
+
+ if (resctrl_arch_mon_capable()) {
+ ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777);
+ if (ret)
+ goto err_cpus_list;
+ }
+
+ ret = 0;
+ goto out;
+
+err_cpus_list:
+ rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
+err_cpus:
+ rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
+err_tasks:
+ rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
+out:
+ return ret;
+}
+
+/**
+ * rdtgroup_locksetup_enter - Resource group enters locksetup mode
+ * @rdtgrp: resource group requested to enter locksetup mode
+ *
+ * A resource group enters locksetup mode to reflect that it would be used
+ * to represent a pseudo-locked region and is in the process of being set
+ * up to do so. A resource group used for a pseudo-locked region would
+ * lose the closid associated with it so we cannot allow it to have any
+ * tasks or cpus assigned nor permit tasks or cpus to be assigned in the
+ * future. Monitoring of a pseudo-locked region is not allowed either.
+ *
+ * The above and more restrictions on a pseudo-locked region are checked
+ * for and enforced before the resource group enters the locksetup mode.
+ *
+ * Returns: 0 if the resource group successfully entered locksetup mode, <0
+ * on failure. On failure the last_cmd_status buffer is updated with text to
+ * communicate details of failure to the user.
+ */
+int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
+{
+ int ret;
+
+ /*
+ * The default resource group can neither be removed nor lose the
+ * default closid associated with it.
+ */
+ if (rdtgrp == &rdtgroup_default) {
+ rdt_last_cmd_puts("Cannot pseudo-lock default group\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Cache Pseudo-locking not supported when CDP is enabled.
+ *
+ * Some things to consider if you would like to enable this
+ * support (using L3 CDP as example):
+ * - When CDP is enabled two separate resources are exposed,
+ * L3DATA and L3CODE, but they are actually on the same cache.
+ * The implication for pseudo-locking is that if a
+ * pseudo-locked region is created on a domain of one
+ * resource (eg. L3CODE), then a pseudo-locked region cannot
+ * be created on that same domain of the other resource
+ * (eg. L3DATA). This is because the creation of a
+ * pseudo-locked region involves a call to wbinvd that will
+ * affect all cache allocations on particular domain.
+ * - Considering the previous, it may be possible to only
+ * expose one of the CDP resources to pseudo-locking and
+ * hide the other. For example, we could consider to only
+ * expose L3DATA and since the L3 cache is unified it is
+ * still possible to place instructions there are execute it.
+ * - If only one region is exposed to pseudo-locking we should
+ * still keep in mind that availability of a portion of cache
+ * for pseudo-locking should take into account both resources.
+ * Similarly, if a pseudo-locked region is created in one
+ * resource, the portion of cache used by it should be made
+ * unavailable to all future allocations from both resources.
+ */
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) ||
+ resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) {
+ rdt_last_cmd_puts("CDP enabled\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Not knowing the bits to disable prefetching implies that this
+ * platform does not support Cache Pseudo-Locking.
+ */
+ if (resctrl_arch_get_prefetch_disable_bits() == 0) {
+ rdt_last_cmd_puts("Pseudo-locking not supported\n");
+ return -EINVAL;
+ }
+
+ if (rdtgroup_monitor_in_progress(rdtgrp)) {
+ rdt_last_cmd_puts("Monitoring in progress\n");
+ return -EINVAL;
+ }
+
+ if (rdtgroup_tasks_assigned(rdtgrp)) {
+ rdt_last_cmd_puts("Tasks assigned to resource group\n");
+ return -EINVAL;
+ }
+
+ if (!cpumask_empty(&rdtgrp->cpu_mask)) {
+ rdt_last_cmd_puts("CPUs assigned to resource group\n");
+ return -EINVAL;
+ }
+
+ if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
+ rdt_last_cmd_puts("Unable to modify resctrl permissions\n");
+ return -EIO;
+ }
+
+ ret = pseudo_lock_init(rdtgrp);
+ if (ret) {
+ rdt_last_cmd_puts("Unable to init pseudo-lock region\n");
+ goto out_release;
+ }
+
+ /*
+ * If this system is capable of monitoring a rmid would have been
+ * allocated when the control group was created. This is not needed
+ * anymore when this group would be used for pseudo-locking. This
+ * is safe to call on platforms not capable of monitoring.
+ */
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+
+ ret = 0;
+ goto out;
+
+out_release:
+ rdtgroup_locksetup_user_restore(rdtgrp);
+out:
+ return ret;
+}
+
+/**
+ * rdtgroup_locksetup_exit - resource group exist locksetup mode
+ * @rdtgrp: resource group
+ *
+ * When a resource group exits locksetup mode the earlier restrictions are
+ * lifted.
+ *
+ * Return: 0 on success, <0 on failure
+ */
+int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
+{
+ int ret;
+
+ if (resctrl_arch_mon_capable()) {
+ ret = alloc_rmid(rdtgrp->closid);
+ if (ret < 0) {
+ rdt_last_cmd_puts("Out of RMIDs\n");
+ return ret;
+ }
+ rdtgrp->mon.rmid = ret;
+ }
+
+ ret = rdtgroup_locksetup_user_restore(rdtgrp);
+ if (ret) {
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+ return ret;
+ }
+
+ pseudo_lock_free(rdtgrp);
+ return 0;
+}
+
+/**
+ * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
+ * @d: RDT domain
+ * @cbm: CBM to test
+ *
+ * @d represents a cache instance and @cbm a capacity bitmask that is
+ * considered for it. Determine if @cbm overlaps with any existing
+ * pseudo-locked region on @d.
+ *
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
+ * Return: true if @cbm overlaps with pseudo-locked region on @d, false
+ * otherwise.
+ */
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm)
+{
+ unsigned int cbm_len;
+ unsigned long cbm_b;
+
+ if (d->plr) {
+ cbm_len = d->plr->s->res->cache.cbm_len;
+ cbm_b = d->plr->cbm;
+ if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
+ return true;
+ }
+ return false;
+}
+
+/**
+ * rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy
+ * @d: RDT domain under test
+ *
+ * The setup of a pseudo-locked region affects all cache instances within
+ * the hierarchy of the region. It is thus essential to know if any
+ * pseudo-locked regions exist within a cache hierarchy to prevent any
+ * attempts to create new pseudo-locked regions in the same hierarchy.
+ *
+ * Return: true if a pseudo-locked region exists in the hierarchy of @d or
+ * if it is not possible to test due to memory allocation issue,
+ * false otherwise.
+ */
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
+{
+ struct rdt_ctrl_domain *d_i;
+ cpumask_var_t cpu_with_psl;
+ struct rdt_resource *r;
+ bool ret = false;
+
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
+ if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
+ return true;
+
+ /*
+ * First determine which cpus have pseudo-locked regions
+ * associated with them.
+ */
+ for_each_alloc_capable_rdt_resource(r) {
+ list_for_each_entry(d_i, &r->ctrl_domains, hdr.list) {
+ if (d_i->plr)
+ cpumask_or(cpu_with_psl, cpu_with_psl,
+ &d_i->hdr.cpu_mask);
+ }
+ }
+
+ /*
+ * Next test if new pseudo-locked region would intersect with
+ * existing region.
+ */
+ if (cpumask_intersects(&d->hdr.cpu_mask, cpu_with_psl))
+ ret = true;
+
+ free_cpumask_var(cpu_with_psl);
+ return ret;
+}
+
+/**
+ * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
+ * @rdtgrp: Resource group to which the pseudo-locked region belongs.
+ * @sel: Selector of which measurement to perform on a pseudo-locked region.
+ *
+ * The measurement of latency to access a pseudo-locked region should be
+ * done from a cpu that is associated with that pseudo-locked region.
+ * Determine which cpu is associated with this region and start a thread on
+ * that cpu to perform the measurement, wait for that thread to complete.
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
+{
+ struct pseudo_lock_region *plr = rdtgrp->plr;
+ struct task_struct *thread;
+ unsigned int cpu;
+ int ret = -1;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ if (rdtgrp->flags & RDT_DELETED) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (!plr->d) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ plr->thread_done = 0;
+ cpu = cpumask_first(&plr->d->hdr.cpu_mask);
+ if (!cpu_online(cpu)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ plr->cpu = cpu;
+
+ if (sel == 1)
+ thread = kthread_run_on_cpu(resctrl_arch_measure_cycles_lat_fn,
+ plr, cpu, "pseudo_lock_measure/%u");
+ else if (sel == 2)
+ thread = kthread_run_on_cpu(resctrl_arch_measure_l2_residency,
+ plr, cpu, "pseudo_lock_measure/%u");
+ else if (sel == 3)
+ thread = kthread_run_on_cpu(resctrl_arch_measure_l3_residency,
+ plr, cpu, "pseudo_lock_measure/%u");
+ else
+ goto out;
+
+ if (IS_ERR(thread)) {
+ ret = PTR_ERR(thread);
+ goto out;
+ }
+
+ ret = wait_event_interruptible(plr->lock_thread_wq,
+ plr->thread_done == 1);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+
+out:
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+ return ret;
+}
+
+static ssize_t pseudo_lock_measure_trigger(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct rdtgroup *rdtgrp = file->private_data;
+ size_t buf_size;
+ char buf[32];
+ int ret;
+ int sel;
+
+ buf_size = min(count, (sizeof(buf) - 1));
+ if (copy_from_user(buf, user_buf, buf_size))
+ return -EFAULT;
+
+ buf[buf_size] = '\0';
+ ret = kstrtoint(buf, 10, &sel);
+ if (ret == 0) {
+ if (sel != 1 && sel != 2 && sel != 3)
+ return -EINVAL;
+ ret = debugfs_file_get(file->f_path.dentry);
+ if (ret)
+ return ret;
+ ret = pseudo_lock_measure_cycles(rdtgrp, sel);
+ if (ret == 0)
+ ret = count;
+ debugfs_file_put(file->f_path.dentry);
+ }
+
+ return ret;
+}
+
+static const struct file_operations pseudo_measure_fops = {
+ .write = pseudo_lock_measure_trigger,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+/**
+ * rdtgroup_pseudo_lock_create - Create a pseudo-locked region
+ * @rdtgrp: resource group to which pseudo-lock region belongs
+ *
+ * Called when a resource group in the pseudo-locksetup mode receives a
+ * valid schemata that should be pseudo-locked. Since the resource group is
+ * in pseudo-locksetup mode the &struct pseudo_lock_region has already been
+ * allocated and initialized with the essential information. If a failure
+ * occurs the resource group remains in the pseudo-locksetup mode with the
+ * &struct pseudo_lock_region associated with it, but cleared from all
+ * information and ready for the user to re-attempt pseudo-locking by
+ * writing the schemata again.
+ *
+ * Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0
+ * on failure. Descriptive error will be written to last_cmd_status buffer.
+ */
+int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
+{
+ struct pseudo_lock_region *plr = rdtgrp->plr;
+ struct task_struct *thread;
+ unsigned int new_minor;
+ struct device *dev;
+ char *kn_name __free(kfree) = NULL;
+ int ret;
+
+ ret = pseudo_lock_region_alloc(plr);
+ if (ret < 0)
+ return ret;
+
+ ret = pseudo_lock_cstates_constrain(plr);
+ if (ret < 0) {
+ ret = -EINVAL;
+ goto out_region;
+ }
+ kn_name = kstrdup(rdt_kn_name(rdtgrp->kn), GFP_KERNEL);
+ if (!kn_name) {
+ ret = -ENOMEM;
+ goto out_cstates;
+ }
+
+ plr->thread_done = 0;
+
+ thread = kthread_run_on_cpu(resctrl_arch_pseudo_lock_fn, plr,
+ plr->cpu, "pseudo_lock/%u");
+ if (IS_ERR(thread)) {
+ ret = PTR_ERR(thread);
+ rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
+ goto out_cstates;
+ }
+
+ ret = wait_event_interruptible(plr->lock_thread_wq,
+ plr->thread_done == 1);
+ if (ret < 0) {
+ /*
+ * If the thread does not get on the CPU for whatever
+ * reason and the process which sets up the region is
+ * interrupted then this will leave the thread in runnable
+ * state and once it gets on the CPU it will dereference
+ * the cleared, but not freed, plr struct resulting in an
+ * empty pseudo-locking loop.
+ */
+ rdt_last_cmd_puts("Locking thread interrupted\n");
+ goto out_cstates;
+ }
+
+ ret = pseudo_lock_minor_get(&new_minor);
+ if (ret < 0) {
+ rdt_last_cmd_puts("Unable to obtain a new minor number\n");
+ goto out_cstates;
+ }
+
+ /*
+ * Unlock access but do not release the reference. The
+ * pseudo-locked region will still be here on return.
+ *
+ * The mutex has to be released temporarily to avoid a potential
+ * deadlock with the mm->mmap_lock which is obtained in the
+ * device_create() and debugfs_create_dir() callpath below as well as
+ * before the mmap() callback is called.
+ */
+ mutex_unlock(&rdtgroup_mutex);
+
+ if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
+ plr->debugfs_dir = debugfs_create_dir(kn_name, debugfs_resctrl);
+ if (!IS_ERR_OR_NULL(plr->debugfs_dir))
+ debugfs_create_file("pseudo_lock_measure", 0200,
+ plr->debugfs_dir, rdtgrp,
+ &pseudo_measure_fops);
+ }
+
+ dev = device_create(&pseudo_lock_class, NULL,
+ MKDEV(pseudo_lock_major, new_minor),
+ rdtgrp, "%s", kn_name);
+
+ mutex_lock(&rdtgroup_mutex);
+
+ if (IS_ERR(dev)) {
+ ret = PTR_ERR(dev);
+ rdt_last_cmd_printf("Failed to create character device: %d\n",
+ ret);
+ goto out_debugfs;
+ }
+
+ /* We released the mutex - check if group was removed while we did so */
+ if (rdtgrp->flags & RDT_DELETED) {
+ ret = -ENODEV;
+ goto out_device;
+ }
+
+ plr->minor = new_minor;
+
+ rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
+ closid_free(rdtgrp->closid);
+ rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444);
+ rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444);
+
+ ret = 0;
+ goto out;
+
+out_device:
+ device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
+out_debugfs:
+ debugfs_remove_recursive(plr->debugfs_dir);
+ pseudo_lock_minor_release(new_minor);
+out_cstates:
+ pseudo_lock_cstates_relax(plr);
+out_region:
+ pseudo_lock_region_clear(plr);
+out:
+ return ret;
+}
+
+/**
+ * rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region
+ * @rdtgrp: resource group to which the pseudo-locked region belongs
+ *
+ * The removal of a pseudo-locked region can be initiated when the resource
+ * group is removed from user space via a "rmdir" from userspace or the
+ * unmount of the resctrl filesystem. On removal the resource group does
+ * not go back to pseudo-locksetup mode before it is removed, instead it is
+ * removed directly. There is thus asymmetry with the creation where the
+ * &struct pseudo_lock_region is removed here while it was not created in
+ * rdtgroup_pseudo_lock_create().
+ *
+ * Return: void
+ */
+void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
+{
+ struct pseudo_lock_region *plr = rdtgrp->plr;
+
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ /*
+ * Default group cannot be a pseudo-locked region so we can
+ * free closid here.
+ */
+ closid_free(rdtgrp->closid);
+ goto free;
+ }
+
+ pseudo_lock_cstates_relax(plr);
+ debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
+ device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
+ pseudo_lock_minor_release(plr->minor);
+
+free:
+ pseudo_lock_free(rdtgrp);
+}
+
+static int pseudo_lock_dev_open(struct inode *inode, struct file *filp)
+{
+ struct rdtgroup *rdtgrp;
+
+ mutex_lock(&rdtgroup_mutex);
+
+ rdtgrp = region_find_by_minor(iminor(inode));
+ if (!rdtgrp) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -ENODEV;
+ }
+
+ filp->private_data = rdtgrp;
+ atomic_inc(&rdtgrp->waitcount);
+ /* Perform a non-seekable open - llseek is not supported */
+ filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
+
+ mutex_unlock(&rdtgroup_mutex);
+
+ return 0;
+}
+
+static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
+{
+ struct rdtgroup *rdtgrp;
+
+ mutex_lock(&rdtgroup_mutex);
+ rdtgrp = filp->private_data;
+ WARN_ON(!rdtgrp);
+ if (!rdtgrp) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -ENODEV;
+ }
+ filp->private_data = NULL;
+ atomic_dec(&rdtgrp->waitcount);
+ mutex_unlock(&rdtgroup_mutex);
+ return 0;
+}
+
+static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
+{
+ /* Not supported */
+ return -EINVAL;
+}
+
+static const struct vm_operations_struct pseudo_mmap_ops = {
+ .mremap = pseudo_lock_dev_mremap,
+};
+
+static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ unsigned long vsize = vma->vm_end - vma->vm_start;
+ unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+ struct pseudo_lock_region *plr;
+ struct rdtgroup *rdtgrp;
+ unsigned long physical;
+ unsigned long psize;
+
+ mutex_lock(&rdtgroup_mutex);
+
+ rdtgrp = filp->private_data;
+ WARN_ON(!rdtgrp);
+ if (!rdtgrp) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -ENODEV;
+ }
+
+ plr = rdtgrp->plr;
+
+ if (!plr->d) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -ENODEV;
+ }
+
+ /*
+ * Task is required to run with affinity to the cpus associated
+ * with the pseudo-locked region. If this is not the case the task
+ * may be scheduled elsewhere and invalidate entries in the
+ * pseudo-locked region.
+ */
+ if (!cpumask_subset(current->cpus_ptr, &plr->d->hdr.cpu_mask)) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -EINVAL;
+ }
+
+ physical = __pa(plr->kmem) >> PAGE_SHIFT;
+ psize = plr->size - off;
+
+ if (off > plr->size) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -ENOSPC;
+ }
+
+ /*
+ * Ensure changes are carried directly to the memory being mapped,
+ * do not allow copy-on-write mapping.
+ */
+ if (!(vma->vm_flags & VM_SHARED)) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -EINVAL;
+ }
+
+ if (vsize > psize) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -ENOSPC;
+ }
+
+ memset(plr->kmem + off, 0, vsize);
+
+ if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
+ vsize, vma->vm_page_prot)) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -EAGAIN;
+ }
+ vma->vm_ops = &pseudo_mmap_ops;
+ mutex_unlock(&rdtgroup_mutex);
+ return 0;
+}
+
+static const struct file_operations pseudo_lock_dev_fops = {
+ .owner = THIS_MODULE,
+ .read = NULL,
+ .write = NULL,
+ .open = pseudo_lock_dev_open,
+ .release = pseudo_lock_dev_release,
+ .mmap = pseudo_lock_dev_mmap,
+};
+
+int rdt_pseudo_lock_init(void)
+{
+ int ret;
+
+ ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops);
+ if (ret < 0)
+ return ret;
+
+ pseudo_lock_major = ret;
+
+ ret = class_register(&pseudo_lock_class);
+ if (ret) {
+ unregister_chrdev(pseudo_lock_major, "pseudo_lock");
+ return ret;
+ }
+
+ return 0;
+}
+
+void rdt_pseudo_lock_release(void)
+{
+ class_unregister(&pseudo_lock_class);
+ unregister_chrdev(pseudo_lock_major, "pseudo_lock");
+ pseudo_lock_major = 0;
+}
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
new file mode 100644
index 000000000000..cc37f58b47dd
--- /dev/null
+++ b/fs/resctrl/rdtgroup.c
@@ -0,0 +1,4353 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * User interface for Resource Allocation in Resource Director Technology(RDT)
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Author: Fenghua Yu <fenghua.yu@intel.com>
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/fs_parser.h>
+#include <linux/sysfs.h>
+#include <linux/kernfs.h>
+#include <linux/resctrl.h>
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+#include <linux/sched/task.h>
+#include <linux/slab.h>
+#include <linux/user_namespace.h>
+
+#include <uapi/linux/magic.h>
+
+#include "internal.h"
+
+/* Mutex to protect rdtgroup access. */
+DEFINE_MUTEX(rdtgroup_mutex);
+
+static struct kernfs_root *rdt_root;
+
+struct rdtgroup rdtgroup_default;
+
+LIST_HEAD(rdt_all_groups);
+
+/* list of entries for the schemata file */
+LIST_HEAD(resctrl_schema_all);
+
+/*
+ * List of struct mon_data containing private data of event files for use by
+ * rdtgroup_mondata_show(). Protected by rdtgroup_mutex.
+ */
+static LIST_HEAD(mon_data_kn_priv_list);
+
+/* The filesystem can only be mounted once. */
+bool resctrl_mounted;
+
+/* Kernel fs node for "info" directory under root */
+static struct kernfs_node *kn_info;
+
+/* Kernel fs node for "mon_groups" directory under root */
+static struct kernfs_node *kn_mongrp;
+
+/* Kernel fs node for "mon_data" directory under root */
+static struct kernfs_node *kn_mondata;
+
+/*
+ * Used to store the max resource name width to display the schemata names in
+ * a tabular format.
+ */
+int max_name_width;
+
+static struct seq_buf last_cmd_status;
+
+static char last_cmd_status_buf[512];
+
+static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
+
+static void rdtgroup_destroy_root(void);
+
+struct dentry *debugfs_resctrl;
+
+/*
+ * Memory bandwidth monitoring event to use for the default CTRL_MON group
+ * and each new CTRL_MON group created by the user. Only relevant when
+ * the filesystem is mounted with the "mba_MBps" option so it does not
+ * matter that it remains uninitialized on systems that do not support
+ * the "mba_MBps" option.
+ */
+enum resctrl_event_id mba_mbps_default_event;
+
+static bool resctrl_debug;
+
+void rdt_last_cmd_clear(void)
+{
+ lockdep_assert_held(&rdtgroup_mutex);
+ seq_buf_clear(&last_cmd_status);
+}
+
+void rdt_last_cmd_puts(const char *s)
+{
+ lockdep_assert_held(&rdtgroup_mutex);
+ seq_buf_puts(&last_cmd_status, s);
+}
+
+void rdt_last_cmd_printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ lockdep_assert_held(&rdtgroup_mutex);
+ seq_buf_vprintf(&last_cmd_status, fmt, ap);
+ va_end(ap);
+}
+
+void rdt_staged_configs_clear(void)
+{
+ struct rdt_ctrl_domain *dom;
+ struct rdt_resource *r;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ for_each_alloc_capable_rdt_resource(r) {
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
+ memset(dom->staged_config, 0, sizeof(dom->staged_config));
+ }
+}
+
+static bool resctrl_is_mbm_enabled(void)
+{
+ return (resctrl_arch_is_mbm_total_enabled() ||
+ resctrl_arch_is_mbm_local_enabled());
+}
+
+static bool resctrl_is_mbm_event(int e)
+{
+ return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
+ e <= QOS_L3_MBM_LOCAL_EVENT_ID);
+}
+
+/*
+ * Trivial allocator for CLOSIDs. Use BITMAP APIs to manipulate a bitmap
+ * of free CLOSIDs.
+ *
+ * Using a global CLOSID across all resources has some advantages and
+ * some drawbacks:
+ * + We can simply set current's closid to assign a task to a resource
+ * group.
+ * + Context switch code can avoid extra memory references deciding which
+ * CLOSID to load into the PQR_ASSOC MSR
+ * - We give up some options in configuring resource groups across multi-socket
+ * systems.
+ * - Our choices on how to configure each resource become progressively more
+ * limited as the number of resources grows.
+ */
+static unsigned long *closid_free_map;
+
+static int closid_free_map_len;
+
+int closids_supported(void)
+{
+ return closid_free_map_len;
+}
+
+static int closid_init(void)
+{
+ struct resctrl_schema *s;
+ u32 rdt_min_closid = ~0;
+
+ /* Monitor only platforms still call closid_init() */
+ if (list_empty(&resctrl_schema_all))
+ return 0;
+
+ /* Compute rdt_min_closid across all resources */
+ list_for_each_entry(s, &resctrl_schema_all, list)
+ rdt_min_closid = min(rdt_min_closid, s->num_closid);
+
+ closid_free_map = bitmap_alloc(rdt_min_closid, GFP_KERNEL);
+ if (!closid_free_map)
+ return -ENOMEM;
+ bitmap_fill(closid_free_map, rdt_min_closid);
+
+ /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
+ __clear_bit(RESCTRL_RESERVED_CLOSID, closid_free_map);
+ closid_free_map_len = rdt_min_closid;
+
+ return 0;
+}
+
+static void closid_exit(void)
+{
+ bitmap_free(closid_free_map);
+ closid_free_map = NULL;
+}
+
+static int closid_alloc(void)
+{
+ int cleanest_closid;
+ u32 closid;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
+ resctrl_arch_is_llc_occupancy_enabled()) {
+ cleanest_closid = resctrl_find_cleanest_closid();
+ if (cleanest_closid < 0)
+ return cleanest_closid;
+ closid = cleanest_closid;
+ } else {
+ closid = find_first_bit(closid_free_map, closid_free_map_len);
+ if (closid == closid_free_map_len)
+ return -ENOSPC;
+ }
+ __clear_bit(closid, closid_free_map);
+
+ return closid;
+}
+
+void closid_free(int closid)
+{
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ __set_bit(closid, closid_free_map);
+}
+
+/**
+ * closid_allocated - test if provided closid is in use
+ * @closid: closid to be tested
+ *
+ * Return: true if @closid is currently associated with a resource group,
+ * false if @closid is free
+ */
+bool closid_allocated(unsigned int closid)
+{
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ return !test_bit(closid, closid_free_map);
+}
+
+/**
+ * rdtgroup_mode_by_closid - Return mode of resource group with closid
+ * @closid: closid if the resource group
+ *
+ * Each resource group is associated with a @closid. Here the mode
+ * of a resource group can be queried by searching for it using its closid.
+ *
+ * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
+ */
+enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
+{
+ struct rdtgroup *rdtgrp;
+
+ list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
+ if (rdtgrp->closid == closid)
+ return rdtgrp->mode;
+ }
+
+ return RDT_NUM_MODES;
+}
+
+static const char * const rdt_mode_str[] = {
+ [RDT_MODE_SHAREABLE] = "shareable",
+ [RDT_MODE_EXCLUSIVE] = "exclusive",
+ [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup",
+ [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked",
+};
+
+/**
+ * rdtgroup_mode_str - Return the string representation of mode
+ * @mode: the resource group mode as &enum rdtgroup_mode
+ *
+ * Return: string representation of valid mode, "unknown" otherwise
+ */
+static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
+{
+ if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
+ return "unknown";
+
+ return rdt_mode_str[mode];
+}
+
+/* set uid and gid of rdtgroup dirs and files to that of the creator */
+static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
+{
+ struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
+ .ia_uid = current_fsuid(),
+ .ia_gid = current_fsgid(), };
+
+ if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
+ gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
+ return 0;
+
+ return kernfs_setattr(kn, &iattr);
+}
+
+static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
+{
+ struct kernfs_node *kn;
+ int ret;
+
+ kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+ 0, rft->kf_ops, rft, NULL, NULL);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
+
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret) {
+ kernfs_remove(kn);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
+{
+ struct kernfs_open_file *of = m->private;
+ struct rftype *rft = of->kn->priv;
+
+ if (rft->seq_show)
+ return rft->seq_show(of, m, arg);
+ return 0;
+}
+
+static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct rftype *rft = of->kn->priv;
+
+ if (rft->write)
+ return rft->write(of, buf, nbytes, off);
+
+ return -EINVAL;
+}
+
+static const struct kernfs_ops rdtgroup_kf_single_ops = {
+ .atomic_write_len = PAGE_SIZE,
+ .write = rdtgroup_file_write,
+ .seq_show = rdtgroup_seqfile_show,
+};
+
+static const struct kernfs_ops kf_mondata_ops = {
+ .atomic_write_len = PAGE_SIZE,
+ .seq_show = rdtgroup_mondata_show,
+};
+
+static bool is_cpu_list(struct kernfs_open_file *of)
+{
+ struct rftype *rft = of->kn->priv;
+
+ return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
+}
+
+static int rdtgroup_cpus_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+ struct cpumask *mask;
+ int ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+
+ if (rdtgrp) {
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ mask = &rdtgrp->plr->d->hdr.cpu_mask;
+ seq_printf(s, is_cpu_list(of) ?
+ "%*pbl\n" : "%*pb\n",
+ cpumask_pr_args(mask));
+ }
+ } else {
+ seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
+ cpumask_pr_args(&rdtgrp->cpu_mask));
+ }
+ } else {
+ ret = -ENOENT;
+ }
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret;
+}
+
+/*
+ * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
+ *
+ * Per task closids/rmids must have been set up before calling this function.
+ * @r may be NULL.
+ */
+static void
+update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
+{
+ struct resctrl_cpu_defaults defaults, *p = NULL;
+
+ if (r) {
+ defaults.closid = r->closid;
+ defaults.rmid = r->mon.rmid;
+ p = &defaults;
+ }
+
+ on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_closid_rmid, p, 1);
+}
+
+static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+ cpumask_var_t tmpmask)
+{
+ struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
+ struct list_head *head;
+
+ /* Check whether cpus belong to parent ctrl group */
+ cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
+ if (!cpumask_empty(tmpmask)) {
+ rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
+ return -EINVAL;
+ }
+
+ /* Check whether cpus are dropped from this group */
+ cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
+ if (!cpumask_empty(tmpmask)) {
+ /* Give any dropped cpus to parent rdtgroup */
+ cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
+ update_closid_rmid(tmpmask, prgrp);
+ }
+
+ /*
+ * If we added cpus, remove them from previous group that owned them
+ * and update per-cpu rmid
+ */
+ cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
+ if (!cpumask_empty(tmpmask)) {
+ head = &prgrp->mon.crdtgrp_list;
+ list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+ if (crgrp == rdtgrp)
+ continue;
+ cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
+ tmpmask);
+ }
+ update_closid_rmid(tmpmask, rdtgrp);
+ }
+
+ /* Done pushing/pulling - update this group with new mask */
+ cpumask_copy(&rdtgrp->cpu_mask, newmask);
+
+ return 0;
+}
+
+static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
+{
+ struct rdtgroup *crgrp;
+
+ cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
+ /* update the child mon group masks as well*/
+ list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
+ cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
+}
+
+static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+ cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
+{
+ struct rdtgroup *r, *crgrp;
+ struct list_head *head;
+
+ /* Check whether cpus are dropped from this group */
+ cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
+ if (!cpumask_empty(tmpmask)) {
+ /* Can't drop from default group */
+ if (rdtgrp == &rdtgroup_default) {
+ rdt_last_cmd_puts("Can't drop CPUs from default group\n");
+ return -EINVAL;
+ }
+
+ /* Give any dropped cpus to rdtgroup_default */
+ cpumask_or(&rdtgroup_default.cpu_mask,
+ &rdtgroup_default.cpu_mask, tmpmask);
+ update_closid_rmid(tmpmask, &rdtgroup_default);
+ }
+
+ /*
+ * If we added cpus, remove them from previous group and
+ * the prev group's child groups that owned them
+ * and update per-cpu closid/rmid.
+ */
+ cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
+ if (!cpumask_empty(tmpmask)) {
+ list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
+ if (r == rdtgrp)
+ continue;
+ cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
+ if (!cpumask_empty(tmpmask1))
+ cpumask_rdtgrp_clear(r, tmpmask1);
+ }
+ update_closid_rmid(tmpmask, rdtgrp);
+ }
+
+ /* Done pushing/pulling - update this group with new mask */
+ cpumask_copy(&rdtgrp->cpu_mask, newmask);
+
+ /*
+ * Clear child mon group masks since there is a new parent mask
+ * now and update the rmid for the cpus the child lost.
+ */
+ head = &rdtgrp->mon.crdtgrp_list;
+ list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+ cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
+ update_closid_rmid(tmpmask, rdtgrp);
+ cpumask_clear(&crgrp->cpu_mask);
+ }
+
+ return 0;
+}
+
+static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ cpumask_var_t tmpmask, newmask, tmpmask1;
+ struct rdtgroup *rdtgrp;
+ int ret;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ return -ENOMEM;
+ if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
+ free_cpumask_var(tmpmask);
+ return -ENOMEM;
+ }
+ if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
+ free_cpumask_var(tmpmask);
+ free_cpumask_var(newmask);
+ return -ENOMEM;
+ }
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
+ rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ ret = -EINVAL;
+ rdt_last_cmd_puts("Pseudo-locking in progress\n");
+ goto unlock;
+ }
+
+ if (is_cpu_list(of))
+ ret = cpulist_parse(buf, newmask);
+ else
+ ret = cpumask_parse(buf, newmask);
+
+ if (ret) {
+ rdt_last_cmd_puts("Bad CPU list/mask\n");
+ goto unlock;
+ }
+
+ /* check that user didn't specify any offline cpus */
+ cpumask_andnot(tmpmask, newmask, cpu_online_mask);
+ if (!cpumask_empty(tmpmask)) {
+ ret = -EINVAL;
+ rdt_last_cmd_puts("Can only assign online CPUs\n");
+ goto unlock;
+ }
+
+ if (rdtgrp->type == RDTCTRL_GROUP)
+ ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
+ else if (rdtgrp->type == RDTMON_GROUP)
+ ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
+ else
+ ret = -EINVAL;
+
+unlock:
+ rdtgroup_kn_unlock(of->kn);
+ free_cpumask_var(tmpmask);
+ free_cpumask_var(newmask);
+ free_cpumask_var(tmpmask1);
+
+ return ret ?: nbytes;
+}
+
+/**
+ * rdtgroup_remove - the helper to remove resource group safely
+ * @rdtgrp: resource group to remove
+ *
+ * On resource group creation via a mkdir, an extra kernfs_node reference is
+ * taken to ensure that the rdtgroup structure remains accessible for the
+ * rdtgroup_kn_unlock() calls where it is removed.
+ *
+ * Drop the extra reference here, then free the rdtgroup structure.
+ *
+ * Return: void
+ */
+static void rdtgroup_remove(struct rdtgroup *rdtgrp)
+{
+ kernfs_put(rdtgrp->kn);
+ kfree(rdtgrp);
+}
+
+static void _update_task_closid_rmid(void *task)
+{
+ /*
+ * If the task is still current on this CPU, update PQR_ASSOC MSR.
+ * Otherwise, the MSR is updated when the task is scheduled in.
+ */
+ if (task == current)
+ resctrl_arch_sched_in(task);
+}
+
+static void update_task_closid_rmid(struct task_struct *t)
+{
+ if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
+ smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
+ else
+ _update_task_closid_rmid(t);
+}
+
+static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
+{
+ u32 closid, rmid = rdtgrp->mon.rmid;
+
+ if (rdtgrp->type == RDTCTRL_GROUP)
+ closid = rdtgrp->closid;
+ else if (rdtgrp->type == RDTMON_GROUP)
+ closid = rdtgrp->mon.parent->closid;
+ else
+ return false;
+
+ return resctrl_arch_match_closid(tsk, closid) &&
+ resctrl_arch_match_rmid(tsk, closid, rmid);
+}
+
+static int __rdtgroup_move_task(struct task_struct *tsk,
+ struct rdtgroup *rdtgrp)
+{
+ /* If the task is already in rdtgrp, no need to move the task. */
+ if (task_in_rdtgroup(tsk, rdtgrp))
+ return 0;
+
+ /*
+ * Set the task's closid/rmid before the PQR_ASSOC MSR can be
+ * updated by them.
+ *
+ * For ctrl_mon groups, move both closid and rmid.
+ * For monitor groups, can move the tasks only from
+ * their parent CTRL group.
+ */
+ if (rdtgrp->type == RDTMON_GROUP &&
+ !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
+ rdt_last_cmd_puts("Can't move task to different control group\n");
+ return -EINVAL;
+ }
+
+ if (rdtgrp->type == RDTMON_GROUP)
+ resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
+ rdtgrp->mon.rmid);
+ else
+ resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
+ rdtgrp->mon.rmid);
+
+ /*
+ * Ensure the task's closid and rmid are written before determining if
+ * the task is current that will decide if it will be interrupted.
+ * This pairs with the full barrier between the rq->curr update and
+ * resctrl_arch_sched_in() during context switch.
+ */
+ smp_mb();
+
+ /*
+ * By now, the task's closid and rmid are set. If the task is current
+ * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
+ * group go into effect. If the task is not current, the MSR will be
+ * updated when the task is scheduled in.
+ */
+ update_task_closid_rmid(tsk);
+
+ return 0;
+}
+
+static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
+{
+ return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
+ resctrl_arch_match_closid(t, r->closid));
+}
+
+static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
+{
+ return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
+ resctrl_arch_match_rmid(t, r->mon.parent->closid,
+ r->mon.rmid));
+}
+
+/**
+ * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
+ * @r: Resource group
+ *
+ * Return: 1 if tasks have been assigned to @r, 0 otherwise
+ */
+int rdtgroup_tasks_assigned(struct rdtgroup *r)
+{
+ struct task_struct *p, *t;
+ int ret = 0;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ rcu_read_lock();
+ for_each_process_thread(p, t) {
+ if (is_closid_match(t, r) || is_rmid_match(t, r)) {
+ ret = 1;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int rdtgroup_task_write_permission(struct task_struct *task,
+ struct kernfs_open_file *of)
+{
+ const struct cred *tcred = get_task_cred(task);
+ const struct cred *cred = current_cred();
+ int ret = 0;
+
+ /*
+ * Even if we're attaching all tasks in the thread group, we only
+ * need to check permissions on one of them.
+ */
+ if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+ !uid_eq(cred->euid, tcred->uid) &&
+ !uid_eq(cred->euid, tcred->suid)) {
+ rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
+ ret = -EPERM;
+ }
+
+ put_cred(tcred);
+ return ret;
+}
+
+static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
+ struct kernfs_open_file *of)
+{
+ struct task_struct *tsk;
+ int ret;
+
+ rcu_read_lock();
+ if (pid) {
+ tsk = find_task_by_vpid(pid);
+ if (!tsk) {
+ rcu_read_unlock();
+ rdt_last_cmd_printf("No task %d\n", pid);
+ return -ESRCH;
+ }
+ } else {
+ tsk = current;
+ }
+
+ get_task_struct(tsk);
+ rcu_read_unlock();
+
+ ret = rdtgroup_task_write_permission(tsk, of);
+ if (!ret)
+ ret = __rdtgroup_move_task(tsk, rdtgrp);
+
+ put_task_struct(tsk);
+ return ret;
+}
+
+static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct rdtgroup *rdtgrp;
+ char *pid_str;
+ int ret = 0;
+ pid_t pid;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ rdtgroup_kn_unlock(of->kn);
+ return -ENOENT;
+ }
+ rdt_last_cmd_clear();
+
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
+ rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ ret = -EINVAL;
+ rdt_last_cmd_puts("Pseudo-locking in progress\n");
+ goto unlock;
+ }
+
+ while (buf && buf[0] != '\0' && buf[0] != '\n') {
+ pid_str = strim(strsep(&buf, ","));
+
+ if (kstrtoint(pid_str, 0, &pid)) {
+ rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str);
+ ret = -EINVAL;
+ break;
+ }
+
+ if (pid < 0) {
+ rdt_last_cmd_printf("Invalid pid %d\n", pid);
+ ret = -EINVAL;
+ break;
+ }
+
+ ret = rdtgroup_move_task(pid, rdtgrp, of);
+ if (ret) {
+ rdt_last_cmd_printf("Error while processing task %d\n", pid);
+ break;
+ }
+ }
+
+unlock:
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret ?: nbytes;
+}
+
+static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
+{
+ struct task_struct *p, *t;
+ pid_t pid;
+
+ rcu_read_lock();
+ for_each_process_thread(p, t) {
+ if (is_closid_match(t, r) || is_rmid_match(t, r)) {
+ pid = task_pid_vnr(t);
+ if (pid)
+ seq_printf(s, "%d\n", pid);
+ }
+ }
+ rcu_read_unlock();
+}
+
+static int rdtgroup_tasks_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (rdtgrp)
+ show_rdt_tasks(rdtgrp, s);
+ else
+ ret = -ENOENT;
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret;
+}
+
+static int rdtgroup_closid_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (rdtgrp)
+ seq_printf(s, "%u\n", rdtgrp->closid);
+ else
+ ret = -ENOENT;
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret;
+}
+
+static int rdtgroup_rmid_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (rdtgrp)
+ seq_printf(s, "%u\n", rdtgrp->mon.rmid);
+ else
+ ret = -ENOENT;
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret;
+}
+
+#ifdef CONFIG_PROC_CPU_RESCTRL
+/*
+ * A task can only be part of one resctrl control group and of one monitor
+ * group which is associated to that control group.
+ *
+ * 1) res:
+ * mon:
+ *
+ * resctrl is not available.
+ *
+ * 2) res:/
+ * mon:
+ *
+ * Task is part of the root resctrl control group, and it is not associated
+ * to any monitor group.
+ *
+ * 3) res:/
+ * mon:mon0
+ *
+ * Task is part of the root resctrl control group and monitor group mon0.
+ *
+ * 4) res:group0
+ * mon:
+ *
+ * Task is part of resctrl control group group0, and it is not associated
+ * to any monitor group.
+ *
+ * 5) res:group0
+ * mon:mon1
+ *
+ * Task is part of resctrl control group group0 and monitor group mon1.
+ */
+int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *tsk)
+{
+ struct rdtgroup *rdtg;
+ int ret = 0;
+
+ mutex_lock(&rdtgroup_mutex);
+
+ /* Return empty if resctrl has not been mounted. */
+ if (!resctrl_mounted) {
+ seq_puts(s, "res:\nmon:\n");
+ goto unlock;
+ }
+
+ list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
+ struct rdtgroup *crg;
+
+ /*
+ * Task information is only relevant for shareable
+ * and exclusive groups.
+ */
+ if (rdtg->mode != RDT_MODE_SHAREABLE &&
+ rdtg->mode != RDT_MODE_EXCLUSIVE)
+ continue;
+
+ if (!resctrl_arch_match_closid(tsk, rdtg->closid))
+ continue;
+
+ seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
+ rdt_kn_name(rdtg->kn));
+ seq_puts(s, "mon:");
+ list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
+ mon.crdtgrp_list) {
+ if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
+ crg->mon.rmid))
+ continue;
+ seq_printf(s, "%s", rdt_kn_name(crg->kn));
+ break;
+ }
+ seq_putc(s, '\n');
+ goto unlock;
+ }
+ /*
+ * The above search should succeed. Otherwise return
+ * with an error.
+ */
+ ret = -ENOENT;
+unlock:
+ mutex_unlock(&rdtgroup_mutex);
+
+ return ret;
+}
+#endif
+
+static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ int len;
+
+ mutex_lock(&rdtgroup_mutex);
+ len = seq_buf_used(&last_cmd_status);
+ if (len)
+ seq_printf(seq, "%.*s", len, last_cmd_status_buf);
+ else
+ seq_puts(seq, "ok\n");
+ mutex_unlock(&rdtgroup_mutex);
+ return 0;
+}
+
+static void *rdt_kn_parent_priv(struct kernfs_node *kn)
+{
+ /*
+ * The parent pointer is only valid within RCU section since it can be
+ * replaced.
+ */
+ guard(rcu)();
+ return rcu_dereference(kn->__parent)->priv;
+}
+
+static int rdt_num_closids_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+
+ seq_printf(seq, "%u\n", s->num_closid);
+ return 0;
+}
+
+static int rdt_default_ctrl_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+
+ seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r));
+ return 0;
+}
+
+static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+
+ seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
+ return 0;
+}
+
+static int rdt_shareable_bits_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+
+ seq_printf(seq, "%x\n", r->cache.shareable_bits);
+ return 0;
+}
+
+/*
+ * rdt_bit_usage_show - Display current usage of resources
+ *
+ * A domain is a shared resource that can now be allocated differently. Here
+ * we display the current regions of the domain as an annotated bitmask.
+ * For each domain of this resource its allocation bitmask
+ * is annotated as below to indicate the current usage of the corresponding bit:
+ * 0 - currently unused
+ * X - currently available for sharing and used by software and hardware
+ * H - currently used by hardware only but available for software use
+ * S - currently used and shareable by software only
+ * E - currently used exclusively by one resource group
+ * P - currently pseudo-locked by one resource group
+ */
+static int rdt_bit_usage_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ /*
+ * Use unsigned long even though only 32 bits are used to ensure
+ * test_bit() is used safely.
+ */
+ unsigned long sw_shareable = 0, hw_shareable = 0;
+ unsigned long exclusive = 0, pseudo_locked = 0;
+ struct rdt_resource *r = s->res;
+ struct rdt_ctrl_domain *dom;
+ int i, hwb, swb, excl, psl;
+ enum rdtgrp_mode mode;
+ bool sep = false;
+ u32 ctrl_val;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+ hw_shareable = r->cache.shareable_bits;
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
+ if (sep)
+ seq_putc(seq, ';');
+ sw_shareable = 0;
+ exclusive = 0;
+ seq_printf(seq, "%d=", dom->hdr.id);
+ for (i = 0; i < closids_supported(); i++) {
+ if (!closid_allocated(i))
+ continue;
+ ctrl_val = resctrl_arch_get_config(r, dom, i,
+ s->conf_type);
+ mode = rdtgroup_mode_by_closid(i);
+ switch (mode) {
+ case RDT_MODE_SHAREABLE:
+ sw_shareable |= ctrl_val;
+ break;
+ case RDT_MODE_EXCLUSIVE:
+ exclusive |= ctrl_val;
+ break;
+ case RDT_MODE_PSEUDO_LOCKSETUP:
+ /*
+ * RDT_MODE_PSEUDO_LOCKSETUP is possible
+ * here but not included since the CBM
+ * associated with this CLOSID in this mode
+ * is not initialized and no task or cpu can be
+ * assigned this CLOSID.
+ */
+ break;
+ case RDT_MODE_PSEUDO_LOCKED:
+ case RDT_NUM_MODES:
+ WARN(1,
+ "invalid mode for closid %d\n", i);
+ break;
+ }
+ }
+ for (i = r->cache.cbm_len - 1; i >= 0; i--) {
+ pseudo_locked = dom->plr ? dom->plr->cbm : 0;
+ hwb = test_bit(i, &hw_shareable);
+ swb = test_bit(i, &sw_shareable);
+ excl = test_bit(i, &exclusive);
+ psl = test_bit(i, &pseudo_locked);
+ if (hwb && swb)
+ seq_putc(seq, 'X');
+ else if (hwb && !swb)
+ seq_putc(seq, 'H');
+ else if (!hwb && swb)
+ seq_putc(seq, 'S');
+ else if (excl)
+ seq_putc(seq, 'E');
+ else if (psl)
+ seq_putc(seq, 'P');
+ else /* Unused bits remain */
+ seq_putc(seq, '0');
+ }
+ sep = true;
+ }
+ seq_putc(seq, '\n');
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+ return 0;
+}
+
+static int rdt_min_bw_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+
+ seq_printf(seq, "%u\n", r->membw.min_bw);
+ return 0;
+}
+
+static int rdt_num_rmids_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+
+ seq_printf(seq, "%d\n", r->num_rmid);
+
+ return 0;
+}
+
+static int rdt_mon_features_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+ struct mon_evt *mevt;
+
+ list_for_each_entry(mevt, &r->evt_list, list) {
+ seq_printf(seq, "%s\n", mevt->name);
+ if (mevt->configurable)
+ seq_printf(seq, "%s_config\n", mevt->name);
+ }
+
+ return 0;
+}
+
+static int rdt_bw_gran_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+
+ seq_printf(seq, "%u\n", r->membw.bw_gran);
+ return 0;
+}
+
+static int rdt_delay_linear_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+
+ seq_printf(seq, "%u\n", r->membw.delay_linear);
+ return 0;
+}
+
+static int max_threshold_occ_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
+
+ return 0;
+}
+
+static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+
+ switch (r->membw.throttle_mode) {
+ case THREAD_THROTTLE_PER_THREAD:
+ seq_puts(seq, "per-thread\n");
+ return 0;
+ case THREAD_THROTTLE_MAX:
+ seq_puts(seq, "max\n");
+ return 0;
+ case THREAD_THROTTLE_UNDEFINED:
+ seq_puts(seq, "undefined\n");
+ return 0;
+ }
+
+ WARN_ON_ONCE(1);
+
+ return 0;
+}
+
+static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ unsigned int bytes;
+ int ret;
+
+ ret = kstrtouint(buf, 0, &bytes);
+ if (ret)
+ return ret;
+
+ if (bytes > resctrl_rmid_realloc_limit)
+ return -EINVAL;
+
+ resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
+
+ return nbytes;
+}
+
+/*
+ * rdtgroup_mode_show - Display mode of this resource group
+ */
+static int rdtgroup_mode_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ rdtgroup_kn_unlock(of->kn);
+ return -ENOENT;
+ }
+
+ seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
+
+ rdtgroup_kn_unlock(of->kn);
+ return 0;
+}
+
+static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
+{
+ switch (my_type) {
+ case CDP_CODE:
+ return CDP_DATA;
+ case CDP_DATA:
+ return CDP_CODE;
+ default:
+ case CDP_NONE:
+ return CDP_NONE;
+ }
+}
+
+static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r = s->res;
+
+ seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
+
+ return 0;
+}
+
+/**
+ * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
+ * @r: Resource to which domain instance @d belongs.
+ * @d: The domain instance for which @closid is being tested.
+ * @cbm: Capacity bitmask being tested.
+ * @closid: Intended closid for @cbm.
+ * @type: CDP type of @r.
+ * @exclusive: Only check if overlaps with exclusive resource groups
+ *
+ * Checks if provided @cbm intended to be used for @closid on domain
+ * @d overlaps with any other closids or other hardware usage associated
+ * with this domain. If @exclusive is true then only overlaps with
+ * resource groups in exclusive mode will be considered. If @exclusive
+ * is false then overlaps with any resource group or hardware entities
+ * will be considered.
+ *
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
+ * Return: false if CBM does not overlap, true if it does.
+ */
+static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
+ unsigned long cbm, int closid,
+ enum resctrl_conf_type type, bool exclusive)
+{
+ enum rdtgrp_mode mode;
+ unsigned long ctrl_b;
+ int i;
+
+ /* Check for any overlap with regions used by hardware directly */
+ if (!exclusive) {
+ ctrl_b = r->cache.shareable_bits;
+ if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
+ return true;
+ }
+
+ /* Check for overlap with other resource groups */
+ for (i = 0; i < closids_supported(); i++) {
+ ctrl_b = resctrl_arch_get_config(r, d, i, type);
+ mode = rdtgroup_mode_by_closid(i);
+ if (closid_allocated(i) && i != closid &&
+ mode != RDT_MODE_PSEUDO_LOCKSETUP) {
+ if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
+ if (exclusive) {
+ if (mode == RDT_MODE_EXCLUSIVE)
+ return true;
+ continue;
+ }
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/**
+ * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
+ * @s: Schema for the resource to which domain instance @d belongs.
+ * @d: The domain instance for which @closid is being tested.
+ * @cbm: Capacity bitmask being tested.
+ * @closid: Intended closid for @cbm.
+ * @exclusive: Only check if overlaps with exclusive resource groups
+ *
+ * Resources that can be allocated using a CBM can use the CBM to control
+ * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
+ * for overlap. Overlap test is not limited to the specific resource for
+ * which the CBM is intended though - when dealing with CDP resources that
+ * share the underlying hardware the overlap check should be performed on
+ * the CDP resource sharing the hardware also.
+ *
+ * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
+ * overlap test.
+ *
+ * Return: true if CBM overlap detected, false if there is no overlap
+ */
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
+ unsigned long cbm, int closid, bool exclusive)
+{
+ enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
+ struct rdt_resource *r = s->res;
+
+ if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
+ exclusive))
+ return true;
+
+ if (!resctrl_arch_get_cdp_enabled(r->rid))
+ return false;
+ return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
+}
+
+/**
+ * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
+ * @rdtgrp: Resource group identified through its closid.
+ *
+ * An exclusive resource group implies that there should be no sharing of
+ * its allocated resources. At the time this group is considered to be
+ * exclusive this test can determine if its current schemata supports this
+ * setting by testing for overlap with all other resource groups.
+ *
+ * Return: true if resource group can be exclusive, false if there is overlap
+ * with allocations of other resource groups and thus this resource group
+ * cannot be exclusive.
+ */
+static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
+{
+ int closid = rdtgrp->closid;
+ struct rdt_ctrl_domain *d;
+ struct resctrl_schema *s;
+ struct rdt_resource *r;
+ bool has_cache = false;
+ u32 ctrl;
+
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
+ if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
+ continue;
+ has_cache = true;
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ ctrl = resctrl_arch_get_config(r, d, closid,
+ s->conf_type);
+ if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
+ rdt_last_cmd_puts("Schemata overlaps\n");
+ return false;
+ }
+ }
+ }
+
+ if (!has_cache) {
+ rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * rdtgroup_mode_write - Modify the resource group's mode
+ */
+static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct rdtgroup *rdtgrp;
+ enum rdtgrp_mode mode;
+ int ret = 0;
+
+ /* Valid input requires a trailing newline */
+ if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ return -EINVAL;
+ buf[nbytes - 1] = '\0';
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ rdtgroup_kn_unlock(of->kn);
+ return -ENOENT;
+ }
+
+ rdt_last_cmd_clear();
+
+ mode = rdtgrp->mode;
+
+ if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
+ (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
+ (!strcmp(buf, "pseudo-locksetup") &&
+ mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
+ (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
+ goto out;
+
+ if (mode == RDT_MODE_PSEUDO_LOCKED) {
+ rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!strcmp(buf, "shareable")) {
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ ret = rdtgroup_locksetup_exit(rdtgrp);
+ if (ret)
+ goto out;
+ }
+ rdtgrp->mode = RDT_MODE_SHAREABLE;
+ } else if (!strcmp(buf, "exclusive")) {
+ if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ ret = rdtgroup_locksetup_exit(rdtgrp);
+ if (ret)
+ goto out;
+ }
+ rdtgrp->mode = RDT_MODE_EXCLUSIVE;
+ } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) &&
+ !strcmp(buf, "pseudo-locksetup")) {
+ ret = rdtgroup_locksetup_enter(rdtgrp);
+ if (ret)
+ goto out;
+ rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
+ } else {
+ rdt_last_cmd_puts("Unknown or unsupported mode\n");
+ ret = -EINVAL;
+ }
+
+out:
+ rdtgroup_kn_unlock(of->kn);
+ return ret ?: nbytes;
+}
+
+/**
+ * rdtgroup_cbm_to_size - Translate CBM to size in bytes
+ * @r: RDT resource to which @d belongs.
+ * @d: RDT domain instance.
+ * @cbm: bitmask for which the size should be computed.
+ *
+ * The bitmask provided associated with the RDT domain instance @d will be
+ * translated into how many bytes it represents. The size in bytes is
+ * computed by first dividing the total cache size by the CBM length to
+ * determine how many bytes each bit in the bitmask represents. The result
+ * is multiplied with the number of bits set in the bitmask.
+ *
+ * @cbm is unsigned long, even if only 32 bits are used to make the
+ * bitmap functions work correctly.
+ */
+unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
+ struct rdt_ctrl_domain *d, unsigned long cbm)
+{
+ unsigned int size = 0;
+ struct cacheinfo *ci;
+ int num_b;
+
+ if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
+ return size;
+
+ num_b = bitmap_weight(&cbm, r->cache.cbm_len);
+ ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope);
+ if (ci)
+ size = ci->size / r->cache.cbm_len * num_b;
+
+ return size;
+}
+
+bool is_mba_sc(struct rdt_resource *r)
+{
+ if (!r)
+ r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
+
+ /*
+ * The software controller support is only applicable to MBA resource.
+ * Make sure to check for resource type.
+ */
+ if (r->rid != RDT_RESOURCE_MBA)
+ return false;
+
+ return r->membw.mba_sc;
+}
+
+/*
+ * rdtgroup_size_show - Display size in bytes of allocated regions
+ *
+ * The "size" file mirrors the layout of the "schemata" file, printing the
+ * size in bytes of each region instead of the capacity bitmask.
+ */
+static int rdtgroup_size_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct resctrl_schema *schema;
+ enum resctrl_conf_type type;
+ struct rdt_ctrl_domain *d;
+ struct rdtgroup *rdtgrp;
+ struct rdt_resource *r;
+ unsigned int size;
+ int ret = 0;
+ u32 closid;
+ bool sep;
+ u32 ctrl;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ rdtgroup_kn_unlock(of->kn);
+ return -ENOENT;
+ }
+
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ seq_printf(s, "%*s:", max_name_width,
+ rdtgrp->plr->s->name);
+ size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
+ rdtgrp->plr->d,
+ rdtgrp->plr->cbm);
+ seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
+ }
+ goto out;
+ }
+
+ closid = rdtgrp->closid;
+
+ list_for_each_entry(schema, &resctrl_schema_all, list) {
+ r = schema->res;
+ type = schema->conf_type;
+ sep = false;
+ seq_printf(s, "%*s:", max_name_width, schema->name);
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ if (sep)
+ seq_putc(s, ';');
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ size = 0;
+ } else {
+ if (is_mba_sc(r))
+ ctrl = d->mbps_val[closid];
+ else
+ ctrl = resctrl_arch_get_config(r, d,
+ closid,
+ type);
+ if (r->rid == RDT_RESOURCE_MBA ||
+ r->rid == RDT_RESOURCE_SMBA)
+ size = ctrl;
+ else
+ size = rdtgroup_cbm_to_size(r, d, ctrl);
+ }
+ seq_printf(s, "%d=%u", d->hdr.id, size);
+ sep = true;
+ }
+ seq_putc(s, '\n');
+ }
+
+out:
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret;
+}
+
+static void mondata_config_read(struct resctrl_mon_config_info *mon_info)
+{
+ smp_call_function_any(&mon_info->d->hdr.cpu_mask,
+ resctrl_arch_mon_event_config_read, mon_info, 1);
+}
+
+static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
+{
+ struct resctrl_mon_config_info mon_info;
+ struct rdt_mon_domain *dom;
+ bool sep = false;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
+ if (sep)
+ seq_puts(s, ";");
+
+ memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info));
+ mon_info.r = r;
+ mon_info.d = dom;
+ mon_info.evtid = evtid;
+ mondata_config_read(&mon_info);
+
+ seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
+ sep = true;
+ }
+ seq_puts(s, "\n");
+
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
+ return 0;
+}
+
+static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+
+ mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
+
+ return 0;
+}
+
+static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+
+ mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
+
+ return 0;
+}
+
+static void mbm_config_write_domain(struct rdt_resource *r,
+ struct rdt_mon_domain *d, u32 evtid, u32 val)
+{
+ struct resctrl_mon_config_info mon_info = {0};
+
+ /*
+ * Read the current config value first. If both are the same then
+ * no need to write it again.
+ */
+ mon_info.r = r;
+ mon_info.d = d;
+ mon_info.evtid = evtid;
+ mondata_config_read(&mon_info);
+ if (mon_info.mon_config == val)
+ return;
+
+ mon_info.mon_config = val;
+
+ /*
+ * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
+ * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
+ * are scoped at the domain level. Writing any of these MSRs
+ * on one CPU is observed by all the CPUs in the domain.
+ */
+ smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_mon_event_config_write,
+ &mon_info, 1);
+
+ /*
+ * When an Event Configuration is changed, the bandwidth counters
+ * for all RMIDs and Events will be cleared by the hardware. The
+ * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
+ * every RMID on the next read to any event for every RMID.
+ * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
+ * cleared while it is tracked by the hardware. Clear the
+ * mbm_local and mbm_total counts for all the RMIDs.
+ */
+ resctrl_arch_reset_rmid_all(r, d);
+}
+
+static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
+{
+ char *dom_str = NULL, *id_str;
+ unsigned long dom_id, val;
+ struct rdt_mon_domain *d;
+
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
+next:
+ if (!tok || tok[0] == '\0')
+ return 0;
+
+ /* Start processing the strings for each domain */
+ dom_str = strim(strsep(&tok, ";"));
+ id_str = strsep(&dom_str, "=");
+
+ if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
+ rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n");
+ return -EINVAL;
+ }
+
+ if (!dom_str || kstrtoul(dom_str, 16, &val)) {
+ rdt_last_cmd_puts("Non-numeric event configuration value\n");
+ return -EINVAL;
+ }
+
+ /* Value from user cannot be more than the supported set of events */
+ if ((val & r->mbm_cfg_mask) != val) {
+ rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
+ r->mbm_cfg_mask);
+ return -EINVAL;
+ }
+
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
+ mbm_config_write_domain(r, d, evtid, val);
+ goto next;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes,
+ loff_t off)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+ int ret;
+
+ /* Valid input requires a trailing newline */
+ if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ return -EINVAL;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ rdt_last_cmd_clear();
+
+ buf[nbytes - 1] = '\0';
+
+ ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
+
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
+ return ret ?: nbytes;
+}
+
+static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes,
+ loff_t off)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+ int ret;
+
+ /* Valid input requires a trailing newline */
+ if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ return -EINVAL;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ rdt_last_cmd_clear();
+
+ buf[nbytes - 1] = '\0';
+
+ ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
+
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
+ return ret ?: nbytes;
+}
+
+/* rdtgroup information files for one cache resource. */
+static struct rftype res_common_files[] = {
+ {
+ .name = "last_cmd_status",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_last_cmd_status_show,
+ .fflags = RFTYPE_TOP_INFO,
+ },
+ {
+ .name = "num_closids",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_num_closids_show,
+ .fflags = RFTYPE_CTRL_INFO,
+ },
+ {
+ .name = "mon_features",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_mon_features_show,
+ .fflags = RFTYPE_MON_INFO,
+ },
+ {
+ .name = "num_rmids",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_num_rmids_show,
+ .fflags = RFTYPE_MON_INFO,
+ },
+ {
+ .name = "cbm_mask",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_default_ctrl_show,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
+ },
+ {
+ .name = "min_cbm_bits",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_min_cbm_bits_show,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
+ },
+ {
+ .name = "shareable_bits",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_shareable_bits_show,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
+ },
+ {
+ .name = "bit_usage",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_bit_usage_show,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
+ },
+ {
+ .name = "min_bandwidth",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_min_bw_show,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
+ },
+ {
+ .name = "bandwidth_gran",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_bw_gran_show,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
+ },
+ {
+ .name = "delay_linear",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_delay_linear_show,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
+ },
+ /*
+ * Platform specific which (if any) capabilities are provided by
+ * thread_throttle_mode. Defer "fflags" initialization to platform
+ * discovery.
+ */
+ {
+ .name = "thread_throttle_mode",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_thread_throttle_mode_show,
+ },
+ {
+ .name = "max_threshold_occupancy",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = max_threshold_occ_write,
+ .seq_show = max_threshold_occ_show,
+ .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
+ },
+ {
+ .name = "mbm_total_bytes_config",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = mbm_total_bytes_config_show,
+ .write = mbm_total_bytes_config_write,
+ },
+ {
+ .name = "mbm_local_bytes_config",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = mbm_local_bytes_config_show,
+ .write = mbm_local_bytes_config_write,
+ },
+ {
+ .name = "cpus",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_cpus_write,
+ .seq_show = rdtgroup_cpus_show,
+ .fflags = RFTYPE_BASE,
+ },
+ {
+ .name = "cpus_list",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_cpus_write,
+ .seq_show = rdtgroup_cpus_show,
+ .flags = RFTYPE_FLAGS_CPUS_LIST,
+ .fflags = RFTYPE_BASE,
+ },
+ {
+ .name = "tasks",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_tasks_write,
+ .seq_show = rdtgroup_tasks_show,
+ .fflags = RFTYPE_BASE,
+ },
+ {
+ .name = "mon_hw_id",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdtgroup_rmid_show,
+ .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG,
+ },
+ {
+ .name = "schemata",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_schemata_write,
+ .seq_show = rdtgroup_schemata_show,
+ .fflags = RFTYPE_CTRL_BASE,
+ },
+ {
+ .name = "mba_MBps_event",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_mba_mbps_event_write,
+ .seq_show = rdtgroup_mba_mbps_event_show,
+ },
+ {
+ .name = "mode",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_mode_write,
+ .seq_show = rdtgroup_mode_show,
+ .fflags = RFTYPE_CTRL_BASE,
+ },
+ {
+ .name = "size",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdtgroup_size_show,
+ .fflags = RFTYPE_CTRL_BASE,
+ },
+ {
+ .name = "sparse_masks",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_has_sparse_bitmasks_show,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
+ },
+ {
+ .name = "ctrl_hw_id",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdtgroup_closid_show,
+ .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
+ },
+};
+
+static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
+{
+ struct rftype *rfts, *rft;
+ int ret, len;
+
+ rfts = res_common_files;
+ len = ARRAY_SIZE(res_common_files);
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ if (resctrl_debug)
+ fflags |= RFTYPE_DEBUG;
+
+ for (rft = rfts; rft < rfts + len; rft++) {
+ if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
+ ret = rdtgroup_add_file(kn, rft);
+ if (ret)
+ goto error;
+ }
+ }
+
+ return 0;
+error:
+ pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
+ while (--rft >= rfts) {
+ if ((fflags & rft->fflags) == rft->fflags)
+ kernfs_remove_by_name(kn, rft->name);
+ }
+ return ret;
+}
+
+static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
+{
+ struct rftype *rfts, *rft;
+ int len;
+
+ rfts = res_common_files;
+ len = ARRAY_SIZE(res_common_files);
+
+ for (rft = rfts; rft < rfts + len; rft++) {
+ if (!strcmp(rft->name, name))
+ return rft;
+ }
+
+ return NULL;
+}
+
+static void thread_throttle_mode_init(void)
+{
+ enum membw_throttle_mode throttle_mode = THREAD_THROTTLE_UNDEFINED;
+ struct rdt_resource *r_mba, *r_smba;
+
+ r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
+ if (r_mba->alloc_capable &&
+ r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
+ throttle_mode = r_mba->membw.throttle_mode;
+
+ r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA);
+ if (r_smba->alloc_capable &&
+ r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
+ throttle_mode = r_smba->membw.throttle_mode;
+
+ if (throttle_mode == THREAD_THROTTLE_UNDEFINED)
+ return;
+
+ resctrl_file_fflags_init("thread_throttle_mode",
+ RFTYPE_CTRL_INFO | RFTYPE_RES_MB);
+}
+
+void resctrl_file_fflags_init(const char *config, unsigned long fflags)
+{
+ struct rftype *rft;
+
+ rft = rdtgroup_get_rftype_by_name(config);
+ if (rft)
+ rft->fflags = fflags;
+}
+
+/**
+ * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
+ * @r: The resource group with which the file is associated.
+ * @name: Name of the file
+ *
+ * The permissions of named resctrl file, directory, or link are modified
+ * to not allow read, write, or execute by any user.
+ *
+ * WARNING: This function is intended to communicate to the user that the
+ * resctrl file has been locked down - that it is not relevant to the
+ * particular state the system finds itself in. It should not be relied
+ * on to protect from user access because after the file's permissions
+ * are restricted the user can still change the permissions using chmod
+ * from the command line.
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
+{
+ struct iattr iattr = {.ia_valid = ATTR_MODE,};
+ struct kernfs_node *kn;
+ int ret = 0;
+
+ kn = kernfs_find_and_get_ns(r->kn, name, NULL);
+ if (!kn)
+ return -ENOENT;
+
+ switch (kernfs_type(kn)) {
+ case KERNFS_DIR:
+ iattr.ia_mode = S_IFDIR;
+ break;
+ case KERNFS_FILE:
+ iattr.ia_mode = S_IFREG;
+ break;
+ case KERNFS_LINK:
+ iattr.ia_mode = S_IFLNK;
+ break;
+ }
+
+ ret = kernfs_setattr(kn, &iattr);
+ kernfs_put(kn);
+ return ret;
+}
+
+/**
+ * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
+ * @r: The resource group with which the file is associated.
+ * @name: Name of the file
+ * @mask: Mask of permissions that should be restored
+ *
+ * Restore the permissions of the named file. If @name is a directory the
+ * permissions of its parent will be used.
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
+ umode_t mask)
+{
+ struct iattr iattr = {.ia_valid = ATTR_MODE,};
+ struct kernfs_node *kn, *parent;
+ struct rftype *rfts, *rft;
+ int ret, len;
+
+ rfts = res_common_files;
+ len = ARRAY_SIZE(res_common_files);
+
+ for (rft = rfts; rft < rfts + len; rft++) {
+ if (!strcmp(rft->name, name))
+ iattr.ia_mode = rft->mode & mask;
+ }
+
+ kn = kernfs_find_and_get_ns(r->kn, name, NULL);
+ if (!kn)
+ return -ENOENT;
+
+ switch (kernfs_type(kn)) {
+ case KERNFS_DIR:
+ parent = kernfs_get_parent(kn);
+ if (parent) {
+ iattr.ia_mode |= parent->mode;
+ kernfs_put(parent);
+ }
+ iattr.ia_mode |= S_IFDIR;
+ break;
+ case KERNFS_FILE:
+ iattr.ia_mode |= S_IFREG;
+ break;
+ case KERNFS_LINK:
+ iattr.ia_mode |= S_IFLNK;
+ break;
+ }
+
+ ret = kernfs_setattr(kn, &iattr);
+ kernfs_put(kn);
+ return ret;
+}
+
+static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
+ unsigned long fflags)
+{
+ struct kernfs_node *kn_subdir;
+ int ret;
+
+ kn_subdir = kernfs_create_dir(kn_info, name,
+ kn_info->mode, priv);
+ if (IS_ERR(kn_subdir))
+ return PTR_ERR(kn_subdir);
+
+ ret = rdtgroup_kn_set_ugid(kn_subdir);
+ if (ret)
+ return ret;
+
+ ret = rdtgroup_add_files(kn_subdir, fflags);
+ if (!ret)
+ kernfs_activate(kn_subdir);
+
+ return ret;
+}
+
+static unsigned long fflags_from_resource(struct rdt_resource *r)
+{
+ switch (r->rid) {
+ case RDT_RESOURCE_L3:
+ case RDT_RESOURCE_L2:
+ return RFTYPE_RES_CACHE;
+ case RDT_RESOURCE_MBA:
+ case RDT_RESOURCE_SMBA:
+ return RFTYPE_RES_MB;
+ }
+
+ return WARN_ON_ONCE(1);
+}
+
+static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
+{
+ struct resctrl_schema *s;
+ struct rdt_resource *r;
+ unsigned long fflags;
+ char name[32];
+ int ret;
+
+ /* create the directory */
+ kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
+ if (IS_ERR(kn_info))
+ return PTR_ERR(kn_info);
+
+ ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO);
+ if (ret)
+ goto out_destroy;
+
+ /* loop over enabled controls, these are all alloc_capable */
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
+ fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO;
+ ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
+ if (ret)
+ goto out_destroy;
+ }
+
+ for_each_mon_capable_rdt_resource(r) {
+ fflags = fflags_from_resource(r) | RFTYPE_MON_INFO;
+ sprintf(name, "%s_MON", r->name);
+ ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
+ if (ret)
+ goto out_destroy;
+ }
+
+ ret = rdtgroup_kn_set_ugid(kn_info);
+ if (ret)
+ goto out_destroy;
+
+ kernfs_activate(kn_info);
+
+ return 0;
+
+out_destroy:
+ kernfs_remove(kn_info);
+ return ret;
+}
+
+static int
+mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
+ char *name, struct kernfs_node **dest_kn)
+{
+ struct kernfs_node *kn;
+ int ret;
+
+ /* create the directory */
+ kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
+
+ if (dest_kn)
+ *dest_kn = kn;
+
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret)
+ goto out_destroy;
+
+ kernfs_activate(kn);
+
+ return 0;
+
+out_destroy:
+ kernfs_remove(kn);
+ return ret;
+}
+
+static inline bool is_mba_linear(void)
+{
+ return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear;
+}
+
+static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
+{
+ u32 num_closid = resctrl_arch_get_num_closid(r);
+ int cpu = cpumask_any(&d->hdr.cpu_mask);
+ int i;
+
+ d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!d->mbps_val)
+ return -ENOMEM;
+
+ for (i = 0; i < num_closid; i++)
+ d->mbps_val[i] = MBA_MAX_MBPS;
+
+ return 0;
+}
+
+static void mba_sc_domain_destroy(struct rdt_resource *r,
+ struct rdt_ctrl_domain *d)
+{
+ kfree(d->mbps_val);
+ d->mbps_val = NULL;
+}
+
+/*
+ * MBA software controller is supported only if
+ * MBM is supported and MBA is in linear scale,
+ * and the MBM monitor scope is the same as MBA
+ * control scope.
+ */
+static bool supports_mba_mbps(void)
+{
+ struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
+
+ return (resctrl_is_mbm_enabled() &&
+ r->alloc_capable && is_mba_linear() &&
+ r->ctrl_scope == rmbm->mon_scope);
+}
+
+/*
+ * Enable or disable the MBA software controller
+ * which helps user specify bandwidth in MBps.
+ */
+static int set_mba_sc(bool mba_sc)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
+ u32 num_closid = resctrl_arch_get_num_closid(r);
+ struct rdt_ctrl_domain *d;
+ unsigned long fflags;
+ int i;
+
+ if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
+ return -EINVAL;
+
+ r->membw.mba_sc = mba_sc;
+
+ rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
+
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ for (i = 0; i < num_closid; i++)
+ d->mbps_val[i] = MBA_MAX_MBPS;
+ }
+
+ fflags = mba_sc ? RFTYPE_CTRL_BASE | RFTYPE_MON_BASE : 0;
+ resctrl_file_fflags_init("mba_MBps_event", fflags);
+
+ return 0;
+}
+
+/*
+ * We don't allow rdtgroup directories to be created anywhere
+ * except the root directory. Thus when looking for the rdtgroup
+ * structure for a kernfs node we are either looking at a directory,
+ * in which case the rdtgroup structure is pointed at by the "priv"
+ * field, otherwise we have a file, and need only look to the parent
+ * to find the rdtgroup.
+ */
+static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
+{
+ if (kernfs_type(kn) == KERNFS_DIR) {
+ /*
+ * All the resource directories use "kn->priv"
+ * to point to the "struct rdtgroup" for the
+ * resource. "info" and its subdirectories don't
+ * have rdtgroup structures, so return NULL here.
+ */
+ if (kn == kn_info ||
+ rcu_access_pointer(kn->__parent) == kn_info)
+ return NULL;
+ else
+ return kn->priv;
+ } else {
+ return rdt_kn_parent_priv(kn);
+ }
+}
+
+static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
+{
+ atomic_inc(&rdtgrp->waitcount);
+ kernfs_break_active_protection(kn);
+}
+
+static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
+{
+ if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+ (rdtgrp->flags & RDT_DELETED)) {
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+ rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
+ rdtgroup_pseudo_lock_remove(rdtgrp);
+ kernfs_unbreak_active_protection(kn);
+ rdtgroup_remove(rdtgrp);
+ } else {
+ kernfs_unbreak_active_protection(kn);
+ }
+}
+
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
+{
+ struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
+
+ if (!rdtgrp)
+ return NULL;
+
+ rdtgroup_kn_get(rdtgrp, kn);
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ /* Was this group deleted while we waited? */
+ if (rdtgrp->flags & RDT_DELETED)
+ return NULL;
+
+ return rdtgrp;
+}
+
+void rdtgroup_kn_unlock(struct kernfs_node *kn)
+{
+ struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
+
+ if (!rdtgrp)
+ return;
+
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
+ rdtgroup_kn_put(rdtgrp, kn);
+}
+
+static int mkdir_mondata_all(struct kernfs_node *parent_kn,
+ struct rdtgroup *prgrp,
+ struct kernfs_node **mon_data_kn);
+
+static void rdt_disable_ctx(void)
+{
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
+ set_mba_sc(false);
+
+ resctrl_debug = false;
+}
+
+static int rdt_enable_ctx(struct rdt_fs_context *ctx)
+{
+ int ret = 0;
+
+ if (ctx->enable_cdpl2) {
+ ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
+ if (ret)
+ goto out_done;
+ }
+
+ if (ctx->enable_cdpl3) {
+ ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
+ if (ret)
+ goto out_cdpl2;
+ }
+
+ if (ctx->enable_mba_mbps) {
+ ret = set_mba_sc(true);
+ if (ret)
+ goto out_cdpl3;
+ }
+
+ if (ctx->enable_debug)
+ resctrl_debug = true;
+
+ return 0;
+
+out_cdpl3:
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
+out_cdpl2:
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
+out_done:
+ return ret;
+}
+
+static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
+{
+ struct resctrl_schema *s;
+ const char *suffix = "";
+ int ret, cl;
+
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ s->res = r;
+ s->num_closid = resctrl_arch_get_num_closid(r);
+ if (resctrl_arch_get_cdp_enabled(r->rid))
+ s->num_closid /= 2;
+
+ s->conf_type = type;
+ switch (type) {
+ case CDP_CODE:
+ suffix = "CODE";
+ break;
+ case CDP_DATA:
+ suffix = "DATA";
+ break;
+ case CDP_NONE:
+ suffix = "";
+ break;
+ }
+
+ ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
+ if (ret >= sizeof(s->name)) {
+ kfree(s);
+ return -EINVAL;
+ }
+
+ cl = strlen(s->name);
+
+ /*
+ * If CDP is supported by this resource, but not enabled,
+ * include the suffix. This ensures the tabular format of the
+ * schemata file does not change between mounts of the filesystem.
+ */
+ if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
+ cl += 4;
+
+ if (cl > max_name_width)
+ max_name_width = cl;
+
+ switch (r->schema_fmt) {
+ case RESCTRL_SCHEMA_BITMAP:
+ s->fmt_str = "%d=%x";
+ break;
+ case RESCTRL_SCHEMA_RANGE:
+ s->fmt_str = "%d=%u";
+ break;
+ }
+
+ if (WARN_ON_ONCE(!s->fmt_str)) {
+ kfree(s);
+ return -EINVAL;
+ }
+
+ INIT_LIST_HEAD(&s->list);
+ list_add(&s->list, &resctrl_schema_all);
+
+ return 0;
+}
+
+static int schemata_list_create(void)
+{
+ struct rdt_resource *r;
+ int ret = 0;
+
+ for_each_alloc_capable_rdt_resource(r) {
+ if (resctrl_arch_get_cdp_enabled(r->rid)) {
+ ret = schemata_list_add(r, CDP_CODE);
+ if (ret)
+ break;
+
+ ret = schemata_list_add(r, CDP_DATA);
+ } else {
+ ret = schemata_list_add(r, CDP_NONE);
+ }
+
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static void schemata_list_destroy(void)
+{
+ struct resctrl_schema *s, *tmp;
+
+ list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
+ list_del(&s->list);
+ kfree(s);
+ }
+}
+
+static int rdt_get_tree(struct fs_context *fc)
+{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
+ unsigned long flags = RFTYPE_CTRL_BASE;
+ struct rdt_mon_domain *dom;
+ struct rdt_resource *r;
+ int ret;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+ /*
+ * resctrl file system can only be mounted once.
+ */
+ if (resctrl_mounted) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ ret = rdtgroup_setup_root(ctx);
+ if (ret)
+ goto out;
+
+ ret = rdt_enable_ctx(ctx);
+ if (ret)
+ goto out_root;
+
+ ret = schemata_list_create();
+ if (ret) {
+ schemata_list_destroy();
+ goto out_ctx;
+ }
+
+ ret = closid_init();
+ if (ret)
+ goto out_schemata_free;
+
+ if (resctrl_arch_mon_capable())
+ flags |= RFTYPE_MON;
+
+ ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
+ if (ret)
+ goto out_closid_exit;
+
+ kernfs_activate(rdtgroup_default.kn);
+
+ ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
+ if (ret < 0)
+ goto out_closid_exit;
+
+ if (resctrl_arch_mon_capable()) {
+ ret = mongroup_create_dir(rdtgroup_default.kn,
+ &rdtgroup_default, "mon_groups",
+ &kn_mongrp);
+ if (ret < 0)
+ goto out_info;
+
+ ret = mkdir_mondata_all(rdtgroup_default.kn,
+ &rdtgroup_default, &kn_mondata);
+ if (ret < 0)
+ goto out_mongrp;
+ rdtgroup_default.mon.mon_data_kn = kn_mondata;
+ }
+
+ ret = rdt_pseudo_lock_init();
+ if (ret)
+ goto out_mondata;
+
+ ret = kernfs_get_tree(fc);
+ if (ret < 0)
+ goto out_psl;
+
+ if (resctrl_arch_alloc_capable())
+ resctrl_arch_enable_alloc();
+ if (resctrl_arch_mon_capable())
+ resctrl_arch_enable_mon();
+
+ if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
+ resctrl_mounted = true;
+
+ if (resctrl_is_mbm_enabled()) {
+ r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ list_for_each_entry(dom, &r->mon_domains, hdr.list)
+ mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
+ RESCTRL_PICK_ANY_CPU);
+ }
+
+ goto out;
+
+out_psl:
+ rdt_pseudo_lock_release();
+out_mondata:
+ if (resctrl_arch_mon_capable())
+ kernfs_remove(kn_mondata);
+out_mongrp:
+ if (resctrl_arch_mon_capable())
+ kernfs_remove(kn_mongrp);
+out_info:
+ kernfs_remove(kn_info);
+out_closid_exit:
+ closid_exit();
+out_schemata_free:
+ schemata_list_destroy();
+out_ctx:
+ rdt_disable_ctx();
+out_root:
+ rdtgroup_destroy_root();
+out:
+ rdt_last_cmd_clear();
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+ return ret;
+}
+
+enum rdt_param {
+ Opt_cdp,
+ Opt_cdpl2,
+ Opt_mba_mbps,
+ Opt_debug,
+ nr__rdt_params
+};
+
+static const struct fs_parameter_spec rdt_fs_parameters[] = {
+ fsparam_flag("cdp", Opt_cdp),
+ fsparam_flag("cdpl2", Opt_cdpl2),
+ fsparam_flag("mba_MBps", Opt_mba_mbps),
+ fsparam_flag("debug", Opt_debug),
+ {}
+};
+
+static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
+ struct fs_parse_result result;
+ const char *msg;
+ int opt;
+
+ opt = fs_parse(fc, rdt_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_cdp:
+ ctx->enable_cdpl3 = true;
+ return 0;
+ case Opt_cdpl2:
+ ctx->enable_cdpl2 = true;
+ return 0;
+ case Opt_mba_mbps:
+ msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
+ if (!supports_mba_mbps())
+ return invalfc(fc, msg);
+ ctx->enable_mba_mbps = true;
+ return 0;
+ case Opt_debug:
+ ctx->enable_debug = true;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void rdt_fs_context_free(struct fs_context *fc)
+{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
+
+ kernfs_free_fs_context(fc);
+ kfree(ctx);
+}
+
+static const struct fs_context_operations rdt_fs_context_ops = {
+ .free = rdt_fs_context_free,
+ .parse_param = rdt_parse_param,
+ .get_tree = rdt_get_tree,
+};
+
+static int rdt_init_fs_context(struct fs_context *fc)
+{
+ struct rdt_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
+ fc->fs_private = &ctx->kfc;
+ fc->ops = &rdt_fs_context_ops;
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(&init_user_ns);
+ fc->global = true;
+ return 0;
+}
+
+/*
+ * Move tasks from one to the other group. If @from is NULL, then all tasks
+ * in the systems are moved unconditionally (used for teardown).
+ *
+ * If @mask is not NULL the cpus on which moved tasks are running are set
+ * in that mask so the update smp function call is restricted to affected
+ * cpus.
+ */
+static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
+ struct cpumask *mask)
+{
+ struct task_struct *p, *t;
+
+ read_lock(&tasklist_lock);
+ for_each_process_thread(p, t) {
+ if (!from || is_closid_match(t, from) ||
+ is_rmid_match(t, from)) {
+ resctrl_arch_set_closid_rmid(t, to->closid,
+ to->mon.rmid);
+
+ /*
+ * Order the closid/rmid stores above before the loads
+ * in task_curr(). This pairs with the full barrier
+ * between the rq->curr update and
+ * resctrl_arch_sched_in() during context switch.
+ */
+ smp_mb();
+
+ /*
+ * If the task is on a CPU, set the CPU in the mask.
+ * The detection is inaccurate as tasks might move or
+ * schedule before the smp function call takes place.
+ * In such a case the function call is pointless, but
+ * there is no other side effect.
+ */
+ if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
+ cpumask_set_cpu(task_cpu(t), mask);
+ }
+ }
+ read_unlock(&tasklist_lock);
+}
+
+static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
+{
+ struct rdtgroup *sentry, *stmp;
+ struct list_head *head;
+
+ head = &rdtgrp->mon.crdtgrp_list;
+ list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
+ free_rmid(sentry->closid, sentry->mon.rmid);
+ list_del(&sentry->mon.crdtgrp_list);
+
+ if (atomic_read(&sentry->waitcount) != 0)
+ sentry->flags = RDT_DELETED;
+ else
+ rdtgroup_remove(sentry);
+ }
+}
+
+/*
+ * Forcibly remove all of subdirectories under root.
+ */
+static void rmdir_all_sub(void)
+{
+ struct rdtgroup *rdtgrp, *tmp;
+
+ /* Move all tasks to the default resource group */
+ rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
+
+ list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
+ /* Free any child rmids */
+ free_all_child_rdtgrp(rdtgrp);
+
+ /* Remove each rdtgroup other than root */
+ if (rdtgrp == &rdtgroup_default)
+ continue;
+
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+ rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
+ rdtgroup_pseudo_lock_remove(rdtgrp);
+
+ /*
+ * Give any CPUs back to the default group. We cannot copy
+ * cpu_online_mask because a CPU might have executed the
+ * offline callback already, but is still marked online.
+ */
+ cpumask_or(&rdtgroup_default.cpu_mask,
+ &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
+
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+
+ kernfs_remove(rdtgrp->kn);
+ list_del(&rdtgrp->rdtgroup_list);
+
+ if (atomic_read(&rdtgrp->waitcount) != 0)
+ rdtgrp->flags = RDT_DELETED;
+ else
+ rdtgroup_remove(rdtgrp);
+ }
+ /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
+ update_closid_rmid(cpu_online_mask, &rdtgroup_default);
+
+ kernfs_remove(kn_info);
+ kernfs_remove(kn_mongrp);
+ kernfs_remove(kn_mondata);
+}
+
+/**
+ * mon_get_kn_priv() - Get the mon_data priv data for this event.
+ *
+ * The same values are used across the mon_data directories of all control and
+ * monitor groups for the same event in the same domain. Keep a list of
+ * allocated structures and re-use an existing one with the same values for
+ * @rid, @domid, etc.
+ *
+ * @rid: The resource id for the event file being created.
+ * @domid: The domain id for the event file being created.
+ * @mevt: The type of event file being created.
+ * @do_sum: Whether SNC summing monitors are being created.
+ */
+static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
+ struct mon_evt *mevt,
+ bool do_sum)
+{
+ struct mon_data *priv;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ list_for_each_entry(priv, &mon_data_kn_priv_list, list) {
+ if (priv->rid == rid && priv->domid == domid &&
+ priv->sum == do_sum && priv->evtid == mevt->evtid)
+ return priv;
+ }
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ priv->rid = rid;
+ priv->domid = domid;
+ priv->sum = do_sum;
+ priv->evtid = mevt->evtid;
+ list_add_tail(&priv->list, &mon_data_kn_priv_list);
+
+ return priv;
+}
+
+/**
+ * mon_put_kn_priv() - Free all allocated mon_data structures.
+ *
+ * Called when resctrl file system is unmounted.
+ */
+static void mon_put_kn_priv(void)
+{
+ struct mon_data *priv, *tmp;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ list_for_each_entry_safe(priv, tmp, &mon_data_kn_priv_list, list) {
+ list_del(&priv->list);
+ kfree(priv);
+ }
+}
+
+static void resctrl_fs_teardown(void)
+{
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ /* Cleared by rdtgroup_destroy_root() */
+ if (!rdtgroup_default.kn)
+ return;
+
+ rmdir_all_sub();
+ mon_put_kn_priv();
+ rdt_pseudo_lock_release();
+ rdtgroup_default.mode = RDT_MODE_SHAREABLE;
+ closid_exit();
+ schemata_list_destroy();
+ rdtgroup_destroy_root();
+}
+
+static void rdt_kill_sb(struct super_block *sb)
+{
+ struct rdt_resource *r;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ rdt_disable_ctx();
+
+ /* Put everything back to default values. */
+ for_each_alloc_capable_rdt_resource(r)
+ resctrl_arch_reset_all_ctrls(r);
+
+ resctrl_fs_teardown();
+ if (resctrl_arch_alloc_capable())
+ resctrl_arch_disable_alloc();
+ if (resctrl_arch_mon_capable())
+ resctrl_arch_disable_mon();
+ resctrl_mounted = false;
+ kernfs_kill_sb(sb);
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+}
+
+static struct file_system_type rdt_fs_type = {
+ .name = "resctrl",
+ .init_fs_context = rdt_init_fs_context,
+ .parameters = rdt_fs_parameters,
+ .kill_sb = rdt_kill_sb,
+};
+
+static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
+ void *priv)
+{
+ struct kernfs_node *kn;
+ int ret = 0;
+
+ kn = __kernfs_create_file(parent_kn, name, 0444,
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
+ &kf_mondata_ops, priv, NULL, NULL);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
+
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret) {
+ kernfs_remove(kn);
+ return ret;
+ }
+
+ return ret;
+}
+
+static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname)
+{
+ struct kernfs_node *kn;
+
+ kn = kernfs_find_and_get(pkn, name);
+ if (!kn)
+ return;
+ kernfs_put(kn);
+
+ if (kn->dir.subdirs <= 1)
+ kernfs_remove(kn);
+ else
+ kernfs_remove_by_name(kn, subname);
+}
+
+/*
+ * Remove all subdirectories of mon_data of ctrl_mon groups
+ * and monitor groups for the given domain.
+ * Remove files and directories containing "sum" of domain data
+ * when last domain being summed is removed.
+ */
+static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
+ struct rdt_mon_domain *d)
+{
+ struct rdtgroup *prgrp, *crgrp;
+ char subname[32];
+ bool snc_mode;
+ char name[32];
+
+ snc_mode = r->mon_scope == RESCTRL_L3_NODE;
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ if (snc_mode)
+ sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
+
+ list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+ mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
+
+ list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
+ mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname);
+ }
+}
+
+static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
+ struct rdt_resource *r, struct rdtgroup *prgrp,
+ bool do_sum)
+{
+ struct rmid_read rr = {0};
+ struct mon_data *priv;
+ struct mon_evt *mevt;
+ int ret, domid;
+
+ if (WARN_ON(list_empty(&r->evt_list)))
+ return -EPERM;
+
+ list_for_each_entry(mevt, &r->evt_list, list) {
+ domid = do_sum ? d->ci->id : d->hdr.id;
+ priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
+ if (WARN_ON_ONCE(!priv))
+ return -EINVAL;
+
+ ret = mon_addfile(kn, mevt->name, priv);
+ if (ret)
+ return ret;
+
+ if (!do_sum && resctrl_is_mbm_event(mevt->evtid))
+ mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
+ }
+
+ return 0;
+}
+
+static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
+ struct rdt_mon_domain *d,
+ struct rdt_resource *r, struct rdtgroup *prgrp)
+{
+ struct kernfs_node *kn, *ckn;
+ char name[32];
+ bool snc_mode;
+ int ret = 0;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ snc_mode = r->mon_scope == RESCTRL_L3_NODE;
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ kn = kernfs_find_and_get(parent_kn, name);
+ if (kn) {
+ /*
+ * rdtgroup_mutex will prevent this directory from being
+ * removed. No need to keep this hold.
+ */
+ kernfs_put(kn);
+ } else {
+ kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
+
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret)
+ goto out_destroy;
+ ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
+ if (ret)
+ goto out_destroy;
+ }
+
+ if (snc_mode) {
+ sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
+ ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
+ if (IS_ERR(ckn)) {
+ ret = -EINVAL;
+ goto out_destroy;
+ }
+
+ ret = rdtgroup_kn_set_ugid(ckn);
+ if (ret)
+ goto out_destroy;
+
+ ret = mon_add_all_files(ckn, d, r, prgrp, false);
+ if (ret)
+ goto out_destroy;
+ }
+
+ kernfs_activate(kn);
+ return 0;
+
+out_destroy:
+ kernfs_remove(kn);
+ return ret;
+}
+
+/*
+ * Add all subdirectories of mon_data for "ctrl_mon" groups
+ * and "monitor" groups with given domain id.
+ */
+static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
+ struct rdt_mon_domain *d)
+{
+ struct kernfs_node *parent_kn;
+ struct rdtgroup *prgrp, *crgrp;
+ struct list_head *head;
+
+ list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+ parent_kn = prgrp->mon.mon_data_kn;
+ mkdir_mondata_subdir(parent_kn, d, r, prgrp);
+
+ head = &prgrp->mon.crdtgrp_list;
+ list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+ parent_kn = crgrp->mon.mon_data_kn;
+ mkdir_mondata_subdir(parent_kn, d, r, crgrp);
+ }
+ }
+}
+
+static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
+ struct rdt_resource *r,
+ struct rdtgroup *prgrp)
+{
+ struct rdt_mon_domain *dom;
+ int ret;
+
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
+ ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * This creates a directory mon_data which contains the monitored data.
+ *
+ * mon_data has one directory for each domain which are named
+ * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
+ * with L3 domain looks as below:
+ * ./mon_data:
+ * mon_L3_00
+ * mon_L3_01
+ * mon_L3_02
+ * ...
+ *
+ * Each domain directory has one file per event:
+ * ./mon_L3_00/:
+ * llc_occupancy
+ *
+ */
+static int mkdir_mondata_all(struct kernfs_node *parent_kn,
+ struct rdtgroup *prgrp,
+ struct kernfs_node **dest_kn)
+{
+ struct rdt_resource *r;
+ struct kernfs_node *kn;
+ int ret;
+
+ /*
+ * Create the mon_data directory first.
+ */
+ ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
+ if (ret)
+ return ret;
+
+ if (dest_kn)
+ *dest_kn = kn;
+
+ /*
+ * Create the subdirectories for each domain. Note that all events
+ * in a domain like L3 are grouped into a resource whose domain is L3
+ */
+ for_each_mon_capable_rdt_resource(r) {
+ ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
+ if (ret)
+ goto out_destroy;
+ }
+
+ return 0;
+
+out_destroy:
+ kernfs_remove(kn);
+ return ret;
+}
+
+/**
+ * cbm_ensure_valid - Enforce validity on provided CBM
+ * @_val: Candidate CBM
+ * @r: RDT resource to which the CBM belongs
+ *
+ * The provided CBM represents all cache portions available for use. This
+ * may be represented by a bitmap that does not consist of contiguous ones
+ * and thus be an invalid CBM.
+ * Here the provided CBM is forced to be a valid CBM by only considering
+ * the first set of contiguous bits as valid and clearing all bits.
+ * The intention here is to provide a valid default CBM with which a new
+ * resource group is initialized. The user can follow this with a
+ * modification to the CBM if the default does not satisfy the
+ * requirements.
+ */
+static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
+{
+ unsigned int cbm_len = r->cache.cbm_len;
+ unsigned long first_bit, zero_bit;
+ unsigned long val = _val;
+
+ if (!val)
+ return 0;
+
+ first_bit = find_first_bit(&val, cbm_len);
+ zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
+
+ /* Clear any remaining bits to ensure contiguous region */
+ bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
+ return (u32)val;
+}
+
+/*
+ * Initialize cache resources per RDT domain
+ *
+ * Set the RDT domain up to start off with all usable allocations. That is,
+ * all shareable and unused bits. All-zero CBM is invalid.
+ */
+static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
+ u32 closid)
+{
+ enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
+ enum resctrl_conf_type t = s->conf_type;
+ struct resctrl_staged_config *cfg;
+ struct rdt_resource *r = s->res;
+ u32 used_b = 0, unused_b = 0;
+ unsigned long tmp_cbm;
+ enum rdtgrp_mode mode;
+ u32 peer_ctl, ctrl_val;
+ int i;
+
+ cfg = &d->staged_config[t];
+ cfg->have_new_ctrl = false;
+ cfg->new_ctrl = r->cache.shareable_bits;
+ used_b = r->cache.shareable_bits;
+ for (i = 0; i < closids_supported(); i++) {
+ if (closid_allocated(i) && i != closid) {
+ mode = rdtgroup_mode_by_closid(i);
+ if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
+ /*
+ * ctrl values for locksetup aren't relevant
+ * until the schemata is written, and the mode
+ * becomes RDT_MODE_PSEUDO_LOCKED.
+ */
+ continue;
+ /*
+ * If CDP is active include peer domain's
+ * usage to ensure there is no overlap
+ * with an exclusive group.
+ */
+ if (resctrl_arch_get_cdp_enabled(r->rid))
+ peer_ctl = resctrl_arch_get_config(r, d, i,
+ peer_type);
+ else
+ peer_ctl = 0;
+ ctrl_val = resctrl_arch_get_config(r, d, i,
+ s->conf_type);
+ used_b |= ctrl_val | peer_ctl;
+ if (mode == RDT_MODE_SHAREABLE)
+ cfg->new_ctrl |= ctrl_val | peer_ctl;
+ }
+ }
+ if (d->plr && d->plr->cbm > 0)
+ used_b |= d->plr->cbm;
+ unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
+ unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
+ cfg->new_ctrl |= unused_b;
+ /*
+ * Force the initial CBM to be valid, user can
+ * modify the CBM based on system availability.
+ */
+ cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
+ /*
+ * Assign the u32 CBM to an unsigned long to ensure that
+ * bitmap_weight() does not access out-of-bound memory.
+ */
+ tmp_cbm = cfg->new_ctrl;
+ if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
+ rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
+ return -ENOSPC;
+ }
+ cfg->have_new_ctrl = true;
+
+ return 0;
+}
+
+/*
+ * Initialize cache resources with default values.
+ *
+ * A new RDT group is being created on an allocation capable (CAT)
+ * supporting system. Set this group up to start off with all usable
+ * allocations.
+ *
+ * If there are no more shareable bits available on any domain then
+ * the entire allocation will fail.
+ */
+static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
+{
+ struct rdt_ctrl_domain *d;
+ int ret;
+
+ list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
+ ret = __init_one_rdt_domain(d, s, closid);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Initialize MBA resource with default values. */
+static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
+{
+ struct resctrl_staged_config *cfg;
+ struct rdt_ctrl_domain *d;
+
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ if (is_mba_sc(r)) {
+ d->mbps_val[closid] = MBA_MAX_MBPS;
+ continue;
+ }
+
+ cfg = &d->staged_config[CDP_NONE];
+ cfg->new_ctrl = resctrl_get_default_ctrl(r);
+ cfg->have_new_ctrl = true;
+ }
+}
+
+/* Initialize the RDT group's allocations. */
+static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
+{
+ struct resctrl_schema *s;
+ struct rdt_resource *r;
+ int ret = 0;
+
+ rdt_staged_configs_clear();
+
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
+ if (r->rid == RDT_RESOURCE_MBA ||
+ r->rid == RDT_RESOURCE_SMBA) {
+ rdtgroup_init_mba(r, rdtgrp->closid);
+ if (is_mba_sc(r))
+ continue;
+ } else {
+ ret = rdtgroup_init_cat(s, rdtgrp->closid);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = resctrl_arch_update_domains(r, rdtgrp->closid);
+ if (ret < 0) {
+ rdt_last_cmd_puts("Failed to initialize allocations\n");
+ goto out;
+ }
+ }
+
+ rdtgrp->mode = RDT_MODE_SHAREABLE;
+
+out:
+ rdt_staged_configs_clear();
+ return ret;
+}
+
+static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
+{
+ int ret;
+
+ if (!resctrl_arch_mon_capable())
+ return 0;
+
+ ret = alloc_rmid(rdtgrp->closid);
+ if (ret < 0) {
+ rdt_last_cmd_puts("Out of RMIDs\n");
+ return ret;
+ }
+ rdtgrp->mon.rmid = ret;
+
+ ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
+ if (ret) {
+ rdt_last_cmd_puts("kernfs subdir error\n");
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
+{
+ if (resctrl_arch_mon_capable())
+ free_rmid(rgrp->closid, rgrp->mon.rmid);
+}
+
+/*
+ * We allow creating mon groups only with in a directory called "mon_groups"
+ * which is present in every ctrl_mon group. Check if this is a valid
+ * "mon_groups" directory.
+ *
+ * 1. The directory should be named "mon_groups".
+ * 2. The mon group itself should "not" be named "mon_groups".
+ * This makes sure "mon_groups" directory always has a ctrl_mon group
+ * as parent.
+ */
+static bool is_mon_groups(struct kernfs_node *kn, const char *name)
+{
+ return (!strcmp(rdt_kn_name(kn), "mon_groups") &&
+ strcmp(name, "mon_groups"));
+}
+
+static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
+ const char *name, umode_t mode,
+ enum rdt_group_type rtype, struct rdtgroup **r)
+{
+ struct rdtgroup *prdtgrp, *rdtgrp;
+ unsigned long files = 0;
+ struct kernfs_node *kn;
+ int ret;
+
+ prdtgrp = rdtgroup_kn_lock_live(parent_kn);
+ if (!prdtgrp) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ /*
+ * Check that the parent directory for a monitor group is a "mon_groups"
+ * directory.
+ */
+ if (rtype == RDTMON_GROUP && !is_mon_groups(parent_kn, name)) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (rtype == RDTMON_GROUP &&
+ (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+ prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
+ ret = -EINVAL;
+ rdt_last_cmd_puts("Pseudo-locking in progress\n");
+ goto out_unlock;
+ }
+
+ /* allocate the rdtgroup. */
+ rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
+ if (!rdtgrp) {
+ ret = -ENOSPC;
+ rdt_last_cmd_puts("Kernel out of memory\n");
+ goto out_unlock;
+ }
+ *r = rdtgrp;
+ rdtgrp->mon.parent = prdtgrp;
+ rdtgrp->type = rtype;
+ INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
+
+ /* kernfs creates the directory for rdtgrp */
+ kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
+ if (IS_ERR(kn)) {
+ ret = PTR_ERR(kn);
+ rdt_last_cmd_puts("kernfs create error\n");
+ goto out_free_rgrp;
+ }
+ rdtgrp->kn = kn;
+
+ /*
+ * kernfs_remove() will drop the reference count on "kn" which
+ * will free it. But we still need it to stick around for the
+ * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
+ * which will be dropped by kernfs_put() in rdtgroup_remove().
+ */
+ kernfs_get(kn);
+
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret) {
+ rdt_last_cmd_puts("kernfs perm error\n");
+ goto out_destroy;
+ }
+
+ if (rtype == RDTCTRL_GROUP) {
+ files = RFTYPE_BASE | RFTYPE_CTRL;
+ if (resctrl_arch_mon_capable())
+ files |= RFTYPE_MON;
+ } else {
+ files = RFTYPE_BASE | RFTYPE_MON;
+ }
+
+ ret = rdtgroup_add_files(kn, files);
+ if (ret) {
+ rdt_last_cmd_puts("kernfs fill error\n");
+ goto out_destroy;
+ }
+
+ /*
+ * The caller unlocks the parent_kn upon success.
+ */
+ return 0;
+
+out_destroy:
+ kernfs_put(rdtgrp->kn);
+ kernfs_remove(rdtgrp->kn);
+out_free_rgrp:
+ kfree(rdtgrp);
+out_unlock:
+ rdtgroup_kn_unlock(parent_kn);
+ return ret;
+}
+
+static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
+{
+ kernfs_remove(rgrp->kn);
+ rdtgroup_remove(rgrp);
+}
+
+/*
+ * Create a monitor group under "mon_groups" directory of a control
+ * and monitor group(ctrl_mon). This is a resource group
+ * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
+ */
+static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
+ const char *name, umode_t mode)
+{
+ struct rdtgroup *rdtgrp, *prgrp;
+ int ret;
+
+ ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
+ if (ret)
+ return ret;
+
+ prgrp = rdtgrp->mon.parent;
+ rdtgrp->closid = prgrp->closid;
+
+ ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
+ if (ret) {
+ mkdir_rdt_prepare_clean(rdtgrp);
+ goto out_unlock;
+ }
+
+ kernfs_activate(rdtgrp->kn);
+
+ /*
+ * Add the rdtgrp to the list of rdtgrps the parent
+ * ctrl_mon group has to track.
+ */
+ list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
+
+out_unlock:
+ rdtgroup_kn_unlock(parent_kn);
+ return ret;
+}
+
+/*
+ * These are rdtgroups created under the root directory. Can be used
+ * to allocate and monitor resources.
+ */
+static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
+ const char *name, umode_t mode)
+{
+ struct rdtgroup *rdtgrp;
+ struct kernfs_node *kn;
+ u32 closid;
+ int ret;
+
+ ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
+ if (ret)
+ return ret;
+
+ kn = rdtgrp->kn;
+ ret = closid_alloc();
+ if (ret < 0) {
+ rdt_last_cmd_puts("Out of CLOSIDs\n");
+ goto out_common_fail;
+ }
+ closid = ret;
+ ret = 0;
+
+ rdtgrp->closid = closid;
+
+ ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
+ if (ret)
+ goto out_closid_free;
+
+ kernfs_activate(rdtgrp->kn);
+
+ ret = rdtgroup_init_alloc(rdtgrp);
+ if (ret < 0)
+ goto out_rmid_free;
+
+ list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
+
+ if (resctrl_arch_mon_capable()) {
+ /*
+ * Create an empty mon_groups directory to hold the subset
+ * of tasks and cpus to monitor.
+ */
+ ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
+ if (ret) {
+ rdt_last_cmd_puts("kernfs subdir error\n");
+ goto out_del_list;
+ }
+ if (is_mba_sc(NULL))
+ rdtgrp->mba_mbps_event = mba_mbps_default_event;
+ }
+
+ goto out_unlock;
+
+out_del_list:
+ list_del(&rdtgrp->rdtgroup_list);
+out_rmid_free:
+ mkdir_rdt_prepare_rmid_free(rdtgrp);
+out_closid_free:
+ closid_free(closid);
+out_common_fail:
+ mkdir_rdt_prepare_clean(rdtgrp);
+out_unlock:
+ rdtgroup_kn_unlock(parent_kn);
+ return ret;
+}
+
+static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
+ umode_t mode)
+{
+ /* Do not accept '\n' to avoid unparsable situation. */
+ if (strchr(name, '\n'))
+ return -EINVAL;
+
+ /*
+ * If the parent directory is the root directory and RDT
+ * allocation is supported, add a control and monitoring
+ * subdirectory
+ */
+ if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
+ return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
+
+ /* Else, attempt to add a monitoring subdirectory. */
+ if (resctrl_arch_mon_capable())
+ return rdtgroup_mkdir_mon(parent_kn, name, mode);
+
+ return -EPERM;
+}
+
+static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
+{
+ struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
+ u32 closid, rmid;
+ int cpu;
+
+ /* Give any tasks back to the parent group */
+ rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
+
+ /*
+ * Update per cpu closid/rmid of the moved CPUs first.
+ * Note: the closid will not change, but the arch code still needs it.
+ */
+ closid = prdtgrp->closid;
+ rmid = prdtgrp->mon.rmid;
+ for_each_cpu(cpu, &rdtgrp->cpu_mask)
+ resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
+
+ /*
+ * Update the MSR on moved CPUs and CPUs which have moved
+ * task running on them.
+ */
+ cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
+ update_closid_rmid(tmpmask, NULL);
+
+ rdtgrp->flags = RDT_DELETED;
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+
+ /*
+ * Remove the rdtgrp from the parent ctrl_mon group's list
+ */
+ WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
+ list_del(&rdtgrp->mon.crdtgrp_list);
+
+ kernfs_remove(rdtgrp->kn);
+
+ return 0;
+}
+
+static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
+{
+ rdtgrp->flags = RDT_DELETED;
+ list_del(&rdtgrp->rdtgroup_list);
+
+ kernfs_remove(rdtgrp->kn);
+ return 0;
+}
+
+static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
+{
+ u32 closid, rmid;
+ int cpu;
+
+ /* Give any tasks back to the default group */
+ rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
+
+ /* Give any CPUs back to the default group */
+ cpumask_or(&rdtgroup_default.cpu_mask,
+ &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
+
+ /* Update per cpu closid and rmid of the moved CPUs first */
+ closid = rdtgroup_default.closid;
+ rmid = rdtgroup_default.mon.rmid;
+ for_each_cpu(cpu, &rdtgrp->cpu_mask)
+ resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
+
+ /*
+ * Update the MSR on moved CPUs and CPUs which have moved
+ * task running on them.
+ */
+ cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
+ update_closid_rmid(tmpmask, NULL);
+
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+ closid_free(rdtgrp->closid);
+
+ rdtgroup_ctrl_remove(rdtgrp);
+
+ /*
+ * Free all the child monitor group rmids.
+ */
+ free_all_child_rdtgrp(rdtgrp);
+
+ return 0;
+}
+
+static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn)
+{
+ /*
+ * Valid within the RCU section it was obtained or while rdtgroup_mutex
+ * is held.
+ */
+ return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex));
+}
+
+static int rdtgroup_rmdir(struct kernfs_node *kn)
+{
+ struct kernfs_node *parent_kn;
+ struct rdtgroup *rdtgrp;
+ cpumask_var_t tmpmask;
+ int ret = 0;
+
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ return -ENOMEM;
+
+ rdtgrp = rdtgroup_kn_lock_live(kn);
+ if (!rdtgrp) {
+ ret = -EPERM;
+ goto out;
+ }
+ parent_kn = rdt_kn_parent(kn);
+
+ /*
+ * If the rdtgroup is a ctrl_mon group and parent directory
+ * is the root directory, remove the ctrl_mon group.
+ *
+ * If the rdtgroup is a mon group and parent directory
+ * is a valid "mon_groups" directory, remove the mon group.
+ */
+ if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
+ rdtgrp != &rdtgroup_default) {
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+ rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+ ret = rdtgroup_ctrl_remove(rdtgrp);
+ } else {
+ ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
+ }
+ } else if (rdtgrp->type == RDTMON_GROUP &&
+ is_mon_groups(parent_kn, rdt_kn_name(kn))) {
+ ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
+ } else {
+ ret = -EPERM;
+ }
+
+out:
+ rdtgroup_kn_unlock(kn);
+ free_cpumask_var(tmpmask);
+ return ret;
+}
+
+/**
+ * mongrp_reparent() - replace parent CTRL_MON group of a MON group
+ * @rdtgrp: the MON group whose parent should be replaced
+ * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp
+ * @cpus: cpumask provided by the caller for use during this call
+ *
+ * Replaces the parent CTRL_MON group for a MON group, resulting in all member
+ * tasks' CLOSID immediately changing to that of the new parent group.
+ * Monitoring data for the group is unaffected by this operation.
+ */
+static void mongrp_reparent(struct rdtgroup *rdtgrp,
+ struct rdtgroup *new_prdtgrp,
+ cpumask_var_t cpus)
+{
+ struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
+
+ WARN_ON(rdtgrp->type != RDTMON_GROUP);
+ WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
+
+ /* Nothing to do when simply renaming a MON group. */
+ if (prdtgrp == new_prdtgrp)
+ return;
+
+ WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
+ list_move_tail(&rdtgrp->mon.crdtgrp_list,
+ &new_prdtgrp->mon.crdtgrp_list);
+
+ rdtgrp->mon.parent = new_prdtgrp;
+ rdtgrp->closid = new_prdtgrp->closid;
+
+ /* Propagate updated closid to all tasks in this group. */
+ rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
+
+ update_closid_rmid(cpus, NULL);
+}
+
+static int rdtgroup_rename(struct kernfs_node *kn,
+ struct kernfs_node *new_parent, const char *new_name)
+{
+ struct kernfs_node *kn_parent;
+ struct rdtgroup *new_prdtgrp;
+ struct rdtgroup *rdtgrp;
+ cpumask_var_t tmpmask;
+ int ret;
+
+ rdtgrp = kernfs_to_rdtgroup(kn);
+ new_prdtgrp = kernfs_to_rdtgroup(new_parent);
+ if (!rdtgrp || !new_prdtgrp)
+ return -ENOENT;
+
+ /* Release both kernfs active_refs before obtaining rdtgroup mutex. */
+ rdtgroup_kn_get(rdtgrp, kn);
+ rdtgroup_kn_get(new_prdtgrp, new_parent);
+
+ mutex_lock(&rdtgroup_mutex);
+
+ rdt_last_cmd_clear();
+
+ /*
+ * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
+ * either kernfs_node is a file.
+ */
+ if (kernfs_type(kn) != KERNFS_DIR ||
+ kernfs_type(new_parent) != KERNFS_DIR) {
+ rdt_last_cmd_puts("Source and destination must be directories");
+ ret = -EPERM;
+ goto out;
+ }
+
+ if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ kn_parent = rdt_kn_parent(kn);
+ if (rdtgrp->type != RDTMON_GROUP || !kn_parent ||
+ !is_mon_groups(kn_parent, rdt_kn_name(kn))) {
+ rdt_last_cmd_puts("Source must be a MON group\n");
+ ret = -EPERM;
+ goto out;
+ }
+
+ if (!is_mon_groups(new_parent, new_name)) {
+ rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
+ ret = -EPERM;
+ goto out;
+ }
+
+ /*
+ * If the MON group is monitoring CPUs, the CPUs must be assigned to the
+ * current parent CTRL_MON group and therefore cannot be assigned to
+ * the new parent, making the move illegal.
+ */
+ if (!cpumask_empty(&rdtgrp->cpu_mask) &&
+ rdtgrp->mon.parent != new_prdtgrp) {
+ rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
+ ret = -EPERM;
+ goto out;
+ }
+
+ /*
+ * Allocate the cpumask for use in mongrp_reparent() to avoid the
+ * possibility of failing to allocate it after kernfs_rename() has
+ * succeeded.
+ */
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Perform all input validation and allocations needed to ensure
+ * mongrp_reparent() will succeed before calling kernfs_rename(),
+ * otherwise it would be necessary to revert this call if
+ * mongrp_reparent() failed.
+ */
+ ret = kernfs_rename(kn, new_parent, new_name);
+ if (!ret)
+ mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
+
+ free_cpumask_var(tmpmask);
+
+out:
+ mutex_unlock(&rdtgroup_mutex);
+ rdtgroup_kn_put(rdtgrp, kn);
+ rdtgroup_kn_put(new_prdtgrp, new_parent);
+ return ret;
+}
+
+static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
+{
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
+ seq_puts(seq, ",cdp");
+
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
+ seq_puts(seq, ",cdpl2");
+
+ if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA)))
+ seq_puts(seq, ",mba_MBps");
+
+ if (resctrl_debug)
+ seq_puts(seq, ",debug");
+
+ return 0;
+}
+
+static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
+ .mkdir = rdtgroup_mkdir,
+ .rmdir = rdtgroup_rmdir,
+ .rename = rdtgroup_rename,
+ .show_options = rdtgroup_show_options,
+};
+
+static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
+{
+ rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
+ KERNFS_ROOT_CREATE_DEACTIVATED |
+ KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
+ &rdtgroup_default);
+ if (IS_ERR(rdt_root))
+ return PTR_ERR(rdt_root);
+
+ ctx->kfc.root = rdt_root;
+ rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
+
+ return 0;
+}
+
+static void rdtgroup_destroy_root(void)
+{
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ kernfs_destroy_root(rdt_root);
+ rdtgroup_default.kn = NULL;
+}
+
+static void rdtgroup_setup_default(void)
+{
+ mutex_lock(&rdtgroup_mutex);
+
+ rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
+ rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
+ rdtgroup_default.type = RDTCTRL_GROUP;
+ INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
+
+ list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
+
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+static void domain_destroy_mon_state(struct rdt_mon_domain *d)
+{
+ bitmap_free(d->rmid_busy_llc);
+ kfree(d->mbm_total);
+ kfree(d->mbm_local);
+}
+
+void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
+{
+ mutex_lock(&rdtgroup_mutex);
+
+ if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
+ mba_sc_domain_destroy(r, d);
+
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ mutex_lock(&rdtgroup_mutex);
+
+ /*
+ * If resctrl is mounted, remove all the
+ * per domain monitor data directories.
+ */
+ if (resctrl_mounted && resctrl_arch_mon_capable())
+ rmdir_mondata_subdir_allrdtgrp(r, d);
+
+ if (resctrl_is_mbm_enabled())
+ cancel_delayed_work(&d->mbm_over);
+ if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) {
+ /*
+ * When a package is going down, forcefully
+ * decrement rmid->ebusy. There is no way to know
+ * that the L3 was flushed and hence may lead to
+ * incorrect counts in rare scenarios, but leaving
+ * the RMID as busy creates RMID leaks if the
+ * package never comes back.
+ */
+ __check_limbo(d, true);
+ cancel_delayed_work(&d->cqm_limbo);
+ }
+
+ domain_destroy_mon_state(d);
+
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+/**
+ * domain_setup_mon_state() - Initialise domain monitoring structures.
+ * @r: The resource for the newly online domain.
+ * @d: The newly online domain.
+ *
+ * Allocate monitor resources that belong to this domain.
+ * Called when the first CPU of a domain comes online, regardless of whether
+ * the filesystem is mounted.
+ * During boot this may be called before global allocations have been made by
+ * resctrl_mon_resource_init().
+ *
+ * Returns 0 for success, or -ENOMEM.
+ */
+static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+ size_t tsize;
+
+ if (resctrl_arch_is_llc_occupancy_enabled()) {
+ d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
+ if (!d->rmid_busy_llc)
+ return -ENOMEM;
+ }
+ if (resctrl_arch_is_mbm_total_enabled()) {
+ tsize = sizeof(*d->mbm_total);
+ d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
+ if (!d->mbm_total) {
+ bitmap_free(d->rmid_busy_llc);
+ return -ENOMEM;
+ }
+ }
+ if (resctrl_arch_is_mbm_local_enabled()) {
+ tsize = sizeof(*d->mbm_local);
+ d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
+ if (!d->mbm_local) {
+ bitmap_free(d->rmid_busy_llc);
+ kfree(d->mbm_total);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
+{
+ int err = 0;
+
+ mutex_lock(&rdtgroup_mutex);
+
+ if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
+ /* RDT_RESOURCE_MBA is never mon_capable */
+ err = mba_sc_domain_allocate(r, d);
+ }
+
+ mutex_unlock(&rdtgroup_mutex);
+
+ return err;
+}
+
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ int err;
+
+ mutex_lock(&rdtgroup_mutex);
+
+ err = domain_setup_mon_state(r, d);
+ if (err)
+ goto out_unlock;
+
+ if (resctrl_is_mbm_enabled()) {
+ INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
+ mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
+ RESCTRL_PICK_ANY_CPU);
+ }
+
+ if (resctrl_arch_is_llc_occupancy_enabled())
+ INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
+
+ /*
+ * If the filesystem is not mounted then only the default resource group
+ * exists. Creation of its directories is deferred until mount time
+ * by rdt_get_tree() calling mkdir_mondata_all().
+ * If resctrl is mounted, add per domain monitor data directories.
+ */
+ if (resctrl_mounted && resctrl_arch_mon_capable())
+ mkdir_mondata_subdir_allrdtgrp(r, d);
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+
+ return err;
+}
+
+void resctrl_online_cpu(unsigned int cpu)
+{
+ mutex_lock(&rdtgroup_mutex);
+ /* The CPU is set in default rdtgroup after online. */
+ cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
+{
+ struct rdtgroup *cr;
+
+ list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
+ if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
+ break;
+ }
+}
+
+static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu,
+ struct rdt_resource *r)
+{
+ struct rdt_mon_domain *d;
+
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ /* Find the domain that contains this CPU */
+ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
+ return d;
+ }
+
+ return NULL;
+}
+
+void resctrl_offline_cpu(unsigned int cpu)
+{
+ struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ struct rdt_mon_domain *d;
+ struct rdtgroup *rdtgrp;
+
+ mutex_lock(&rdtgroup_mutex);
+ list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
+ if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
+ clear_childcpus(rdtgrp, cpu);
+ break;
+ }
+ }
+
+ if (!l3->mon_capable)
+ goto out_unlock;
+
+ d = get_mon_domain_from_cpu(cpu, l3);
+ if (d) {
+ if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+ cancel_delayed_work(&d->mbm_over);
+ mbm_setup_overflow_handler(d, 0, cpu);
+ }
+ if (resctrl_arch_is_llc_occupancy_enabled() &&
+ cpu == d->cqm_work_cpu && has_busy_rmid(d)) {
+ cancel_delayed_work(&d->cqm_limbo);
+ cqm_setup_limbo_handler(d, 0, cpu);
+ }
+ }
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+/*
+ * resctrl_init - resctrl filesystem initialization
+ *
+ * Setup resctrl file system including set up root, create mount point,
+ * register resctrl filesystem, and initialize files under root directory.
+ *
+ * Return: 0 on success or -errno
+ */
+int resctrl_init(void)
+{
+ int ret = 0;
+
+ seq_buf_init(&last_cmd_status, last_cmd_status_buf,
+ sizeof(last_cmd_status_buf));
+
+ rdtgroup_setup_default();
+
+ thread_throttle_mode_init();
+
+ ret = resctrl_mon_resource_init();
+ if (ret)
+ return ret;
+
+ ret = sysfs_create_mount_point(fs_kobj, "resctrl");
+ if (ret) {
+ resctrl_mon_resource_exit();
+ return ret;
+ }
+
+ ret = register_filesystem(&rdt_fs_type);
+ if (ret)
+ goto cleanup_mountpoint;
+
+ /*
+ * Adding the resctrl debugfs directory here may not be ideal since
+ * it would let the resctrl debugfs directory appear on the debugfs
+ * filesystem before the resctrl filesystem is mounted.
+ * It may also be ok since that would enable debugging of RDT before
+ * resctrl is mounted.
+ * The reason why the debugfs directory is created here and not in
+ * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
+ * during the debugfs directory creation also &sb->s_type->i_mutex_key
+ * (the lockdep class of inode->i_rwsem). Other filesystem
+ * interactions (eg. SyS_getdents) have the lock ordering:
+ * &sb->s_type->i_mutex_key --> &mm->mmap_lock
+ * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
+ * is taken, thus creating dependency:
+ * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
+ * issues considering the other two lock dependencies.
+ * By creating the debugfs directory here we avoid a dependency
+ * that may cause deadlock (even though file operations cannot
+ * occur until the filesystem is mounted, but I do not know how to
+ * tell lockdep that).
+ */
+ debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
+
+ return 0;
+
+cleanup_mountpoint:
+ sysfs_remove_mount_point(fs_kobj, "resctrl");
+ resctrl_mon_resource_exit();
+
+ return ret;
+}
+
+static bool resctrl_online_domains_exist(void)
+{
+ struct rdt_resource *r;
+
+ /*
+ * Only walk capable resources to allow resctrl_arch_get_resource()
+ * to return dummy 'not capable' resources.
+ */
+ for_each_alloc_capable_rdt_resource(r) {
+ if (!list_empty(&r->ctrl_domains))
+ return true;
+ }
+
+ for_each_mon_capable_rdt_resource(r) {
+ if (!list_empty(&r->mon_domains))
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * resctrl_exit() - Remove the resctrl filesystem and free resources.
+ *
+ * Called by the architecture code in response to a fatal error.
+ * Removes resctrl files and structures from kernfs to prevent further
+ * configuration.
+ *
+ * When called by the architecture code, all CPUs and resctrl domains must be
+ * offline. This ensures the limbo and overflow handlers are not scheduled to
+ * run, meaning the data structures they access can be freed by
+ * resctrl_mon_resource_exit().
+ *
+ * After resctrl_exit() returns, the architecture code should return an
+ * error from all resctrl_arch_ functions that can do this.
+ * resctrl_arch_get_resource() must continue to return struct rdt_resources
+ * with the correct rid field to ensure the filesystem can be unmounted.
+ */
+void resctrl_exit(void)
+{
+ cpus_read_lock();
+ WARN_ON_ONCE(resctrl_online_domains_exist());
+
+ mutex_lock(&rdtgroup_mutex);
+ resctrl_fs_teardown();
+ mutex_unlock(&rdtgroup_mutex);
+
+ cpus_read_unlock();
+
+ debugfs_remove_recursive(debugfs_resctrl);
+ debugfs_resctrl = NULL;
+ unregister_filesystem(&rdt_fs_type);
+
+ /*
+ * Do not remove the sysfs mount point added by resctrl_init() so that
+ * it can be used to umount resctrl.
+ */
+
+ resctrl_mon_resource_exit();
+}
diff --git a/fs/select.c b/fs/select.c
index 7da531b1cf6b..9fb650d03d52 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -630,7 +630,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
ret = -EINVAL;
- if (n < 0)
+ if (unlikely(n < 0))
goto out_nofds;
/* max_fds can increase, so grab it once to avoid race */
@@ -857,7 +857,7 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
int fd = pollfd->fd;
__poll_t mask, filter;
- if (fd < 0)
+ if (unlikely(fd < 0))
return 0;
CLASS(fd, f)(fd);
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 240d82c6f908..89d2dbbb742c 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -102,7 +102,8 @@ path_to_dentry(struct cifs_sb_info *cifs_sb, const char *path)
while (*s && *s != sep)
s++;
- child = lookup_positive_unlocked(p, dentry, s - p);
+ child = lookup_noperm_positive_unlocked(&QSTR_LEN(p, s - p),
+ dentry);
dput(dentry);
dentry = child;
} while (!IS_ERR(dentry));
@@ -201,7 +202,7 @@ replay_again:
spin_unlock(&cfids->cfid_list_lock);
/*
- * Skip any prefix paths in @path as lookup_positive_unlocked() ends up
+ * Skip any prefix paths in @path as lookup_noperm_positive_unlocked() ends up
* calling ->lookup() which already adds those through
* build_path_from_dentry(). Also, do it earlier as we might reconnect
* below when trying to send compounded request and then potentially
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index a08c42363ffc..fb04e263611c 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -929,7 +929,8 @@ cifs_get_root(struct smb3_fs_context *ctx, struct super_block *sb)
while (*s && *s != sep)
s++;
- child = lookup_positive_unlocked(p, dentry, s - p);
+ child = lookup_noperm_positive_unlocked(&QSTR_LEN(p, s - p),
+ dentry);
dput(dentry);
dentry = child;
} while (!IS_ERR(dentry));
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 851b74f557c1..950aa4f912f5 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -160,8 +160,10 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq)
server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses);
rdata->server = server;
- cifs_negotiate_rsize(server, cifs_sb->ctx,
- tlink_tcon(req->cfile->tlink));
+ if (cifs_sb->ctx->rsize == 0) {
+ cifs_negotiate_rsize(server, cifs_sb->ctx,
+ tlink_tcon(req->cfile->tlink));
+ }
rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
&size, &rdata->credits);
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index 50f96259d9ad..f9f11cbf89be 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -9,6 +9,7 @@
*
*/
#include <linux/fs.h>
+#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/stat.h>
@@ -78,7 +79,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
- dentry = d_hash_and_lookup(parent, name);
+ dentry = try_lookup_noperm(name, parent);
if (!dentry) {
/*
* If we know that the inode will need to be revalidated
@@ -733,7 +734,10 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
else
cifs_buf_release(cfile->srch_inf.
ntwrk_buf_start);
+ /* Reset all pointers to the network buffer to prevent stale references */
cfile->srch_inf.ntwrk_buf_start = NULL;
+ cfile->srch_inf.srch_entries_start = NULL;
+ cfile->srch_inf.last_entry = NULL;
}
rc = initiate_cifs_search(xid, file, full_path);
if (rc) {
@@ -756,11 +760,11 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
rc = server->ops->query_dir_next(xid, tcon, &cfile->fid,
search_flags,
&cfile->srch_inf);
+ if (rc)
+ return -ENOENT;
/* FindFirst/Next set last_entry to NULL on malformed reply */
if (cfile->srch_inf.last_entry)
cifs_save_resume_key(cfile->srch_inf.last_entry, cfile);
- if (rc)
- return -ENOENT;
}
if (index_to_find < cfile->srch_inf.index_of_last_entry) {
/* we found the buffer that contains the entry */
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 0b35816d551f..4e28632b5fd6 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -2968,7 +2968,7 @@ replay_again:
/* Eventually save off posix specific response info and timestamps */
err_free_rsp_buf:
- free_rsp_buf(resp_buftype, rsp);
+ free_rsp_buf(resp_buftype, rsp_iov.iov_base);
kfree(pc_buf);
err_free_req:
cifs_small_buf_release(req);
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 03f606afad93..d7a8a580d013 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -146,12 +146,9 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci)
{
struct oplock_info *opinfo;
- if (list_empty(&ci->m_op_list))
- return NULL;
-
down_read(&ci->m_lock);
- opinfo = list_first_entry(&ci->m_op_list, struct oplock_info,
- op_entry);
+ opinfo = list_first_entry_or_null(&ci->m_op_list, struct oplock_info,
+ op_entry);
if (opinfo) {
if (opinfo->conn == NULL ||
!atomic_inc_not_zero(&opinfo->refcount))
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index f2a2be8467c6..8d414239b3fe 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -4120,9 +4120,10 @@ static int process_query_dir_entries(struct smb2_query_dir_private *priv)
return -EINVAL;
lock_dir(priv->dir_fp);
- dent = lookup_one(idmap, priv->d_info->name,
- priv->dir_fp->filp->f_path.dentry,
- priv->d_info->name_len);
+ dent = lookup_one(idmap,
+ &QSTR_LEN(priv->d_info->name,
+ priv->d_info->name_len),
+ priv->dir_fp->filp->f_path.dentry);
unlock_dir(priv->dir_fp);
if (IS_ERR(dent)) {
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 482eba0f4dc1..baf0d3031a44 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -409,10 +409,15 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n",
*pos, count);
+ if (*pos >= XATTR_SIZE_MAX) {
+ pr_err("stream write position %lld is out of bounds\n", *pos);
+ return -EINVAL;
+ }
+
size = *pos + count;
if (size > XATTR_SIZE_MAX) {
size = XATTR_SIZE_MAX;
- count = (*pos + count) - XATTR_SIZE_MAX;
+ count = XATTR_SIZE_MAX - *pos;
}
v_len = ksmbd_vfs_getcasexattr(idmap,
@@ -426,13 +431,6 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
goto out;
}
- if (v_len <= *pos) {
- pr_err("stream write position %lld is out of bounds (stream length: %zd)\n",
- *pos, v_len);
- err = -EINVAL;
- goto out;
- }
-
if (v_len < size) {
wbuf = kvzalloc(size, KSMBD_DEFAULT_GFP);
if (!wbuf) {
@@ -684,7 +682,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
struct ksmbd_file *parent_fp;
int new_type;
int err, lookup_flags = LOOKUP_NO_SYMLINKS;
- int target_lookup_flags = LOOKUP_RENAME_TARGET;
+ int target_lookup_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
if (ksmbd_override_fsids(work))
return -ENOMEM;
diff --git a/fs/stat.c b/fs/stat.c
index 3d9222807214..f95c1dc3eaa4 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -136,13 +136,15 @@ EXPORT_SYMBOL(generic_fill_statx_attr);
* @stat: Where to fill in the attribute flags
* @unit_min: Minimum supported atomic write length in bytes
* @unit_max: Maximum supported atomic write length in bytes
+ * @unit_max_opt: Optimised maximum supported atomic write length in bytes
*
* Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from
* atomic write unit_min and unit_max values.
*/
void generic_fill_statx_atomic_writes(struct kstat *stat,
unsigned int unit_min,
- unsigned int unit_max)
+ unsigned int unit_max,
+ unsigned int unit_max_opt)
{
/* Confirm that the request type is known */
stat->result_mask |= STATX_WRITE_ATOMIC;
@@ -153,6 +155,7 @@ void generic_fill_statx_atomic_writes(struct kstat *stat,
if (unit_min) {
stat->atomic_write_unit_min = unit_min;
stat->atomic_write_unit_max = unit_max;
+ stat->atomic_write_unit_max_opt = unit_max_opt;
/* Initially only allow 1x segment */
stat->atomic_write_segments_max = 1;
@@ -254,7 +257,7 @@ int vfs_getattr(const struct path *path, struct kstat *stat,
int retval;
retval = security_inode_getattr(path);
- if (retval)
+ if (unlikely(retval))
return retval;
return vfs_getattr_nosec(path, stat, request_mask, query_flags);
}
@@ -425,7 +428,7 @@ SYSCALL_DEFINE2(stat, const char __user *, filename,
int error;
error = vfs_stat(filename, &stat);
- if (error)
+ if (unlikely(error))
return error;
return cp_old_stat(&stat, statbuf);
@@ -438,7 +441,7 @@ SYSCALL_DEFINE2(lstat, const char __user *, filename,
int error;
error = vfs_lstat(filename, &stat);
- if (error)
+ if (unlikely(error))
return error;
return cp_old_stat(&stat, statbuf);
@@ -447,12 +450,13 @@ SYSCALL_DEFINE2(lstat, const char __user *, filename,
SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_fstat(fd, &stat);
+ int error;
- if (!error)
- error = cp_old_stat(&stat, statbuf);
+ error = vfs_fstat(fd, &stat);
+ if (unlikely(error))
+ return error;
- return error;
+ return cp_old_stat(&stat, statbuf);
}
#endif /* __ARCH_WANT_OLD_STAT */
@@ -506,10 +510,12 @@ SYSCALL_DEFINE2(newstat, const char __user *, filename,
struct stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_stat(filename, &stat);
+ int error;
- if (error)
+ error = vfs_stat(filename, &stat);
+ if (unlikely(error))
return error;
+
return cp_new_stat(&stat, statbuf);
}
@@ -520,7 +526,7 @@ SYSCALL_DEFINE2(newlstat, const char __user *, filename,
int error;
error = vfs_lstat(filename, &stat);
- if (error)
+ if (unlikely(error))
return error;
return cp_new_stat(&stat, statbuf);
@@ -534,8 +540,9 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename,
int error;
error = vfs_fstatat(dfd, filename, &stat, flag);
- if (error)
+ if (unlikely(error))
return error;
+
return cp_new_stat(&stat, statbuf);
}
#endif
@@ -543,12 +550,13 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename,
SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_fstat(fd, &stat);
+ int error;
- if (!error)
- error = cp_new_stat(&stat, statbuf);
+ error = vfs_fstat(fd, &stat);
+ if (unlikely(error))
+ return error;
- return error;
+ return cp_new_stat(&stat, statbuf);
}
#endif
@@ -736,6 +744,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
tmp.stx_atomic_write_segments_max = stat->atomic_write_segments_max;
+ tmp.stx_atomic_write_unit_max_opt = stat->atomic_write_unit_max_opt;
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}
diff --git a/fs/super.c b/fs/super.c
index 97a17f9d9023..bcc4e87123c8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -39,7 +39,8 @@
#include <uapi/linux/mount.h>
#include "internal.h"
-static int thaw_super_locked(struct super_block *sb, enum freeze_holder who);
+static int thaw_super_locked(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner);
static LIST_HEAD(super_blocks);
static DEFINE_SPINLOCK(sb_lock);
@@ -201,7 +202,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
- total_objects = dentries + inodes + fs_objects + 1;
+ total_objects = dentries + inodes + fs_objects;
if (!total_objects)
total_objects = 1;
@@ -887,52 +888,48 @@ void drop_super_exclusive(struct super_block *sb)
}
EXPORT_SYMBOL(drop_super_exclusive);
-static void __iterate_supers(void (*f)(struct super_block *))
-{
- struct super_block *sb, *p = NULL;
-
- spin_lock(&sb_lock);
- list_for_each_entry(sb, &super_blocks, s_list) {
- if (super_flags(sb, SB_DYING))
- continue;
- sb->s_count++;
- spin_unlock(&sb_lock);
+enum super_iter_flags_t {
+ SUPER_ITER_EXCL = (1U << 0),
+ SUPER_ITER_UNLOCKED = (1U << 1),
+ SUPER_ITER_REVERSE = (1U << 2),
+};
- f(sb);
+static inline struct super_block *first_super(enum super_iter_flags_t flags)
+{
+ if (flags & SUPER_ITER_REVERSE)
+ return list_last_entry(&super_blocks, struct super_block, s_list);
+ return list_first_entry(&super_blocks, struct super_block, s_list);
+}
- spin_lock(&sb_lock);
- if (p)
- __put_super(p);
- p = sb;
- }
- if (p)
- __put_super(p);
- spin_unlock(&sb_lock);
+static inline struct super_block *next_super(struct super_block *sb,
+ enum super_iter_flags_t flags)
+{
+ if (flags & SUPER_ITER_REVERSE)
+ return list_prev_entry(sb, s_list);
+ return list_next_entry(sb, s_list);
}
-/**
- * iterate_supers - call function for all active superblocks
- * @f: function to call
- * @arg: argument to pass to it
- *
- * Scans the superblock list and calls given function, passing it
- * locked superblock and given argument.
- */
-void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
+
+static void __iterate_supers(void (*f)(struct super_block *, void *), void *arg,
+ enum super_iter_flags_t flags)
{
struct super_block *sb, *p = NULL;
+ bool excl = flags & SUPER_ITER_EXCL;
- spin_lock(&sb_lock);
- list_for_each_entry(sb, &super_blocks, s_list) {
- bool locked;
+ guard(spinlock)(&sb_lock);
+ for (sb = first_super(flags);
+ !list_entry_is_head(sb, &super_blocks, s_list);
+ sb = next_super(sb, flags)) {
+ if (super_flags(sb, SB_DYING))
+ continue;
sb->s_count++;
spin_unlock(&sb_lock);
- locked = super_lock_shared(sb);
- if (locked) {
- if (sb->s_root)
- f(sb, arg);
- super_unlock_shared(sb);
+ if (flags & SUPER_ITER_UNLOCKED) {
+ f(sb, arg);
+ } else if (super_lock(sb, excl)) {
+ f(sb, arg);
+ super_unlock(sb, excl);
}
spin_lock(&sb_lock);
@@ -942,7 +939,11 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
}
if (p)
__put_super(p);
- spin_unlock(&sb_lock);
+}
+
+void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
+{
+ __iterate_supers(f, arg, 0);
}
/**
@@ -963,15 +964,15 @@ void iterate_supers_type(struct file_system_type *type,
hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
bool locked;
+ if (super_flags(sb, SB_DYING))
+ continue;
+
sb->s_count++;
spin_unlock(&sb_lock);
locked = super_lock_shared(sb);
- if (locked) {
- if (sb->s_root)
- f(sb, arg);
- super_unlock_shared(sb);
- }
+ if (locked)
+ f(sb, arg);
spin_lock(&sb_lock);
if (p)
@@ -991,23 +992,21 @@ struct super_block *user_get_super(dev_t dev, bool excl)
spin_lock(&sb_lock);
list_for_each_entry(sb, &super_blocks, s_list) {
- if (sb->s_dev == dev) {
- bool locked;
-
- sb->s_count++;
- spin_unlock(&sb_lock);
- /* still alive? */
- locked = super_lock(sb, excl);
- if (locked) {
- if (sb->s_root)
- return sb;
- super_unlock(sb, excl);
- }
- /* nope, got unmounted */
- spin_lock(&sb_lock);
- __put_super(sb);
- break;
- }
+ bool locked;
+
+ if (sb->s_dev != dev)
+ continue;
+
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+
+ locked = super_lock(sb, excl);
+ if (locked)
+ return sb;
+
+ spin_lock(&sb_lock);
+ __put_super(sb);
+ break;
}
spin_unlock(&sb_lock);
return NULL;
@@ -1111,11 +1110,9 @@ cancel_readonly:
return retval;
}
-static void do_emergency_remount_callback(struct super_block *sb)
+static void do_emergency_remount_callback(struct super_block *sb, void *unused)
{
- bool locked = super_lock_excl(sb);
-
- if (locked && sb->s_root && sb->s_bdev && !sb_rdonly(sb)) {
+ if (sb->s_bdev && !sb_rdonly(sb)) {
struct fs_context *fc;
fc = fs_context_for_reconfigure(sb->s_root,
@@ -1126,13 +1123,12 @@ static void do_emergency_remount_callback(struct super_block *sb)
put_fs_context(fc);
}
}
- if (locked)
- super_unlock_excl(sb);
}
static void do_emergency_remount(struct work_struct *work)
{
- __iterate_supers(do_emergency_remount_callback);
+ __iterate_supers(do_emergency_remount_callback, NULL,
+ SUPER_ITER_EXCL | SUPER_ITER_REVERSE);
kfree(work);
printk("Emergency Remount complete\n");
}
@@ -1148,24 +1144,18 @@ void emergency_remount(void)
}
}
-static void do_thaw_all_callback(struct super_block *sb)
+static void do_thaw_all_callback(struct super_block *sb, void *unused)
{
- bool locked = super_lock_excl(sb);
-
- if (locked && sb->s_root) {
- if (IS_ENABLED(CONFIG_BLOCK))
- while (sb->s_bdev && !bdev_thaw(sb->s_bdev))
- pr_warn("Emergency Thaw on %pg\n", sb->s_bdev);
- thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE);
- return;
- }
- if (locked)
- super_unlock_excl(sb);
+ if (IS_ENABLED(CONFIG_BLOCK))
+ while (sb->s_bdev && !bdev_thaw(sb->s_bdev))
+ pr_warn("Emergency Thaw on %pg\n", sb->s_bdev);
+ thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE, NULL);
+ return;
}
static void do_thaw_all(struct work_struct *work)
{
- __iterate_supers(do_thaw_all_callback);
+ __iterate_supers(do_thaw_all_callback, NULL, SUPER_ITER_EXCL);
kfree(work);
printk(KERN_WARNING "Emergency Thaw complete\n");
}
@@ -1186,6 +1176,66 @@ void emergency_thaw_all(void)
}
}
+static inline bool get_active_super(struct super_block *sb)
+{
+ bool active = false;
+
+ if (super_lock_excl(sb)) {
+ active = atomic_inc_not_zero(&sb->s_active);
+ super_unlock_excl(sb);
+ }
+ return active;
+}
+
+static const char *filesystems_freeze_ptr = "filesystems_freeze";
+
+static void filesystems_freeze_callback(struct super_block *sb, void *unused)
+{
+ if (!sb->s_op->freeze_fs && !sb->s_op->freeze_super)
+ return;
+
+ if (!get_active_super(sb))
+ return;
+
+ if (sb->s_op->freeze_super)
+ sb->s_op->freeze_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL,
+ filesystems_freeze_ptr);
+ else
+ freeze_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL,
+ filesystems_freeze_ptr);
+
+ deactivate_super(sb);
+}
+
+void filesystems_freeze(void)
+{
+ __iterate_supers(filesystems_freeze_callback, NULL,
+ SUPER_ITER_UNLOCKED | SUPER_ITER_REVERSE);
+}
+
+static void filesystems_thaw_callback(struct super_block *sb, void *unused)
+{
+ if (!sb->s_op->freeze_fs && !sb->s_op->freeze_super)
+ return;
+
+ if (!get_active_super(sb))
+ return;
+
+ if (sb->s_op->thaw_super)
+ sb->s_op->thaw_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL,
+ filesystems_freeze_ptr);
+ else
+ thaw_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL,
+ filesystems_freeze_ptr);
+
+ deactivate_super(sb);
+}
+
+void filesystems_thaw(void)
+{
+ __iterate_supers(filesystems_thaw_callback, NULL, SUPER_ITER_UNLOCKED);
+}
+
static DEFINE_IDA(unnamed_dev_ida);
/**
@@ -1479,10 +1529,10 @@ static int fs_bdev_freeze(struct block_device *bdev)
if (sb->s_op->freeze_super)
error = sb->s_op->freeze_super(sb,
- FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
+ FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL);
else
error = freeze_super(sb,
- FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
+ FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL);
if (!error)
error = sync_blockdev(bdev);
deactivate_super(sb);
@@ -1528,10 +1578,10 @@ static int fs_bdev_thaw(struct block_device *bdev)
if (sb->s_op->thaw_super)
error = sb->s_op->thaw_super(sb,
- FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
+ FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL);
else
error = thaw_super(sb,
- FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
+ FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL);
deactivate_super(sb);
return error;
}
@@ -1903,7 +1953,7 @@ static int wait_for_partially_frozen(struct super_block *sb)
}
#define FREEZE_HOLDERS (FREEZE_HOLDER_KERNEL | FREEZE_HOLDER_USERSPACE)
-#define FREEZE_FLAGS (FREEZE_HOLDERS | FREEZE_MAY_NEST)
+#define FREEZE_FLAGS (FREEZE_HOLDERS | FREEZE_MAY_NEST | FREEZE_EXCL)
static inline int freeze_inc(struct super_block *sb, enum freeze_holder who)
{
@@ -1929,11 +1979,34 @@ static inline int freeze_dec(struct super_block *sb, enum freeze_holder who)
return sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount;
}
-static inline bool may_freeze(struct super_block *sb, enum freeze_holder who)
+static inline bool may_freeze(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
+ lockdep_assert_held(&sb->s_umount);
+
WARN_ON_ONCE((who & ~FREEZE_FLAGS));
WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1);
+ if (who & FREEZE_EXCL) {
+ if (WARN_ON_ONCE(!(who & FREEZE_HOLDER_KERNEL)))
+ return false;
+ if (WARN_ON_ONCE(who & ~(FREEZE_EXCL | FREEZE_HOLDER_KERNEL)))
+ return false;
+ if (WARN_ON_ONCE(!freeze_owner))
+ return false;
+ /* This freeze already has a specific owner. */
+ if (sb->s_writers.freeze_owner)
+ return false;
+ /*
+ * This is already frozen multiple times so we're just
+ * going to take a reference count and mark the freeze as
+ * being owned by the caller.
+ */
+ if (sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount)
+ sb->s_writers.freeze_owner = freeze_owner;
+ return true;
+ }
+
if (who & FREEZE_HOLDER_KERNEL)
return (who & FREEZE_MAY_NEST) ||
sb->s_writers.freeze_kcount == 0;
@@ -1943,10 +2016,61 @@ static inline bool may_freeze(struct super_block *sb, enum freeze_holder who)
return false;
}
+static inline bool may_unfreeze(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
+{
+ lockdep_assert_held(&sb->s_umount);
+
+ WARN_ON_ONCE((who & ~FREEZE_FLAGS));
+ WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1);
+
+ if (who & FREEZE_EXCL) {
+ if (WARN_ON_ONCE(!(who & FREEZE_HOLDER_KERNEL)))
+ return false;
+ if (WARN_ON_ONCE(who & ~(FREEZE_EXCL | FREEZE_HOLDER_KERNEL)))
+ return false;
+ if (WARN_ON_ONCE(!freeze_owner))
+ return false;
+ if (WARN_ON_ONCE(sb->s_writers.freeze_kcount == 0))
+ return false;
+ /* This isn't exclusively frozen. */
+ if (!sb->s_writers.freeze_owner)
+ return false;
+ /* This isn't exclusively frozen by us. */
+ if (sb->s_writers.freeze_owner != freeze_owner)
+ return false;
+ /*
+ * This is still frozen multiple times so we're just
+ * going to drop our reference count and undo our
+ * exclusive freeze.
+ */
+ if ((sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount) > 1)
+ sb->s_writers.freeze_owner = NULL;
+ return true;
+ }
+
+ if (who & FREEZE_HOLDER_KERNEL) {
+ /*
+ * Someone's trying to steal the reference belonging to
+ * @sb->s_writers.freeze_owner.
+ */
+ if (sb->s_writers.freeze_kcount == 1 &&
+ sb->s_writers.freeze_owner)
+ return false;
+ return sb->s_writers.freeze_kcount > 0;
+ }
+
+ if (who & FREEZE_HOLDER_USERSPACE)
+ return sb->s_writers.freeze_ucount > 0;
+
+ return false;
+}
+
/**
* freeze_super - lock the filesystem and force it into a consistent state
* @sb: the super to lock
* @who: context that wants to freeze
+ * @freeze_owner: owner of the freeze
*
* Syncs the super to make sure the filesystem is consistent and calls the fs's
* freeze_fs. Subsequent calls to this without first thawing the fs may return
@@ -1998,7 +2122,7 @@ static inline bool may_freeze(struct super_block *sb, enum freeze_holder who)
* Return: If the freeze was successful zero is returned. If the freeze
* failed a negative error code is returned.
*/
-int freeze_super(struct super_block *sb, enum freeze_holder who)
+int freeze_super(struct super_block *sb, enum freeze_holder who, const void *freeze_owner)
{
int ret;
@@ -2010,7 +2134,7 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
retry:
if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
- if (may_freeze(sb, who))
+ if (may_freeze(sb, who, freeze_owner))
ret = !!WARN_ON_ONCE(freeze_inc(sb, who) == 1);
else
ret = -EBUSY;
@@ -2032,6 +2156,7 @@ retry:
if (sb_rdonly(sb)) {
/* Nothing to do really... */
WARN_ON_ONCE(freeze_inc(sb, who) > 1);
+ sb->s_writers.freeze_owner = freeze_owner;
sb->s_writers.frozen = SB_FREEZE_COMPLETE;
wake_up_var(&sb->s_writers.frozen);
super_unlock_excl(sb);
@@ -2079,6 +2204,7 @@ retry:
* when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
*/
WARN_ON_ONCE(freeze_inc(sb, who) > 1);
+ sb->s_writers.freeze_owner = freeze_owner;
sb->s_writers.frozen = SB_FREEZE_COMPLETE;
wake_up_var(&sb->s_writers.frozen);
lockdep_sb_freeze_release(sb);
@@ -2093,13 +2219,17 @@ EXPORT_SYMBOL(freeze_super);
* removes that state without releasing the other state or unlocking the
* filesystem.
*/
-static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
+static int thaw_super_locked(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
int error = -EINVAL;
if (sb->s_writers.frozen != SB_FREEZE_COMPLETE)
goto out_unlock;
+ if (!may_unfreeze(sb, who, freeze_owner))
+ goto out_unlock;
+
/*
* All freezers share a single active reference.
* So just unlock in case there are any left.
@@ -2109,6 +2239,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
if (sb_rdonly(sb)) {
sb->s_writers.frozen = SB_UNFROZEN;
+ sb->s_writers.freeze_owner = NULL;
wake_up_var(&sb->s_writers.frozen);
goto out_deactivate;
}
@@ -2126,6 +2257,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
}
sb->s_writers.frozen = SB_UNFROZEN;
+ sb->s_writers.freeze_owner = NULL;
wake_up_var(&sb->s_writers.frozen);
sb_freeze_unlock(sb, SB_FREEZE_FS);
out_deactivate:
@@ -2141,6 +2273,7 @@ out_unlock:
* thaw_super -- unlock filesystem
* @sb: the super to thaw
* @who: context that wants to freeze
+ * @freeze_owner: owner of the freeze
*
* Unlocks the filesystem and marks it writeable again after freeze_super()
* if there are no remaining freezes on the filesystem.
@@ -2154,13 +2287,14 @@ out_unlock:
* have been frozen through the block layer via multiple block devices.
* The filesystem remains frozen until all block devices are unfrozen.
*/
-int thaw_super(struct super_block *sb, enum freeze_holder who)
+int thaw_super(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
if (!super_lock_excl(sb)) {
WARN_ON_ONCE("Dying superblock while thawing!");
return -EINVAL;
}
- return thaw_super_locked(sb, who);
+ return thaw_super_locked(sb, who, freeze_owner);
}
EXPORT_SYMBOL(thaw_super);
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index cb1af30b49f5..a3fd3cc591bd 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -555,7 +555,7 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent)
if (unlikely(IS_DEADDIR(d_inode(parent))))
dentry = ERR_PTR(-ENOENT);
else
- dentry = lookup_one_len(name, parent, strlen(name));
+ dentry = lookup_noperm(&QSTR(name), parent);
if (!IS_ERR(dentry) && d_inode(dentry)) {
dput(dentry);
dentry = ERR_PTR(-EEXIST);
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index ea6f06adcd43..059a02691edd 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -19,6 +19,11 @@
#include <linux/highmem.h>
#include "ubifs.h"
+union ubifs_in_ptr {
+ const void *buf;
+ struct folio *folio;
+};
+
/* Fake description object for the "none" compressor */
static struct ubifs_compressor none_compr = {
.compr_type = UBIFS_COMPR_NONE,
@@ -68,28 +73,61 @@ static struct ubifs_compressor zstd_compr = {
/* All UBIFS compressors */
struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
-static int ubifs_compress_req(const struct ubifs_info *c,
- struct acomp_req *req,
- void *out_buf, int *out_len,
- const char *compr_name)
+static void ubifs_compress_common(int *compr_type, union ubifs_in_ptr in_ptr,
+ size_t in_offset, int in_len, bool in_folio,
+ void *out_buf, int *out_len)
{
- struct crypto_wait wait;
- int in_len = req->slen;
+ struct ubifs_compressor *compr = ubifs_compressors[*compr_type];
int dlen = *out_len;
int err;
+ if (*compr_type == UBIFS_COMPR_NONE)
+ goto no_compr;
+
+ /* If the input data is small, do not even try to compress it */
+ if (in_len < UBIFS_MIN_COMPR_LEN)
+ goto no_compr;
+
dlen = min(dlen, in_len - UBIFS_MIN_COMPRESS_DIFF);
- crypto_init_wait(&wait);
- acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &wait);
- acomp_request_set_dst_dma(req, out_buf, dlen);
- err = crypto_acomp_compress(req);
- err = crypto_wait_req(err, &wait);
- *out_len = req->dlen;
- acomp_request_free(req);
+ do {
+ ACOMP_REQUEST_ON_STACK(req, compr->cc);
+ DECLARE_CRYPTO_WAIT(wait);
+
+ acomp_request_set_callback(req, 0, NULL, NULL);
+ if (in_folio)
+ acomp_request_set_src_folio(req, in_ptr.folio,
+ in_offset, in_len);
+ else
+ acomp_request_set_src_dma(req, in_ptr.buf, in_len);
+ acomp_request_set_dst_dma(req, out_buf, dlen);
+ err = crypto_acomp_compress(req);
+ dlen = req->dlen;
+ if (err != -EAGAIN)
+ break;
+
+ req = ACOMP_REQUEST_CLONE(req, GFP_NOFS | __GFP_NOWARN);
+ acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &wait);
+ err = crypto_acomp_compress(req);
+ err = crypto_wait_req(err, &wait);
+ dlen = req->dlen;
+ acomp_request_free(req);
+ } while (0);
+
+ *out_len = dlen;
+ if (err)
+ goto no_compr;
- return err;
+ return;
+
+no_compr:
+ if (in_folio)
+ memcpy_from_folio(out_buf, in_ptr.folio, in_offset, in_len);
+ else
+ memcpy(out_buf, in_ptr.buf, in_len);
+ *out_len = in_len;
+ *compr_type = UBIFS_COMPR_NONE;
}
/**
@@ -114,32 +152,10 @@ static int ubifs_compress_req(const struct ubifs_info *c,
void ubifs_compress(const struct ubifs_info *c, const void *in_buf,
int in_len, void *out_buf, int *out_len, int *compr_type)
{
- int err;
- struct ubifs_compressor *compr = ubifs_compressors[*compr_type];
-
- if (*compr_type == UBIFS_COMPR_NONE)
- goto no_compr;
+ union ubifs_in_ptr in_ptr = { .buf = in_buf };
- /* If the input data is small, do not even try to compress it */
- if (in_len < UBIFS_MIN_COMPR_LEN)
- goto no_compr;
-
- {
- ACOMP_REQUEST_ALLOC(req, compr->cc, GFP_NOFS | __GFP_NOWARN);
-
- acomp_request_set_src_dma(req, in_buf, in_len);
- err = ubifs_compress_req(c, req, out_buf, out_len, compr->name);
- }
-
- if (err)
- goto no_compr;
-
- return;
-
-no_compr:
- memcpy(out_buf, in_buf, in_len);
- *out_len = in_len;
- *compr_type = UBIFS_COMPR_NONE;
+ ubifs_compress_common(compr_type, in_ptr, 0, in_len, false,
+ out_buf, out_len);
}
/**
@@ -166,55 +182,71 @@ void ubifs_compress_folio(const struct ubifs_info *c, struct folio *in_folio,
size_t in_offset, int in_len, void *out_buf,
int *out_len, int *compr_type)
{
- int err;
- struct ubifs_compressor *compr = ubifs_compressors[*compr_type];
+ union ubifs_in_ptr in_ptr = { .folio = in_folio };
- if (*compr_type == UBIFS_COMPR_NONE)
- goto no_compr;
-
- /* If the input data is small, do not even try to compress it */
- if (in_len < UBIFS_MIN_COMPR_LEN)
- goto no_compr;
+ ubifs_compress_common(compr_type, in_ptr, in_offset, in_len, true,
+ out_buf, out_len);
+}
- {
- ACOMP_REQUEST_ALLOC(req, compr->cc, GFP_NOFS | __GFP_NOWARN);
+static int ubifs_decompress_common(const struct ubifs_info *c,
+ const void *in_buf, int in_len,
+ void *out_ptr, size_t out_offset,
+ int *out_len, bool out_folio,
+ int compr_type)
+{
+ struct ubifs_compressor *compr;
+ int dlen = *out_len;
+ int err;
- acomp_request_set_src_folio(req, in_folio, in_offset, in_len);
- err = ubifs_compress_req(c, req, out_buf, out_len, compr->name);
+ if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) {
+ ubifs_err(c, "invalid compression type %d", compr_type);
+ return -EINVAL;
}
- if (err)
- goto no_compr;
-
- return;
+ compr = ubifs_compressors[compr_type];
-no_compr:
- memcpy_from_folio(out_buf, in_folio, in_offset, in_len);
- *out_len = in_len;
- *compr_type = UBIFS_COMPR_NONE;
-}
+ if (unlikely(!compr->capi_name)) {
+ ubifs_err(c, "%s compression is not compiled in", compr->name);
+ return -EINVAL;
+ }
-static int ubifs_decompress_req(const struct ubifs_info *c,
- struct acomp_req *req,
- const void *in_buf, int in_len, int *out_len,
- const char *compr_name)
-{
- struct crypto_wait wait;
- int err;
+ if (compr_type == UBIFS_COMPR_NONE) {
+ if (out_folio)
+ memcpy_to_folio(out_ptr, out_offset, in_buf, in_len);
+ else
+ memcpy(out_ptr, in_buf, in_len);
+ *out_len = in_len;
+ return 0;
+ }
- crypto_init_wait(&wait);
- acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &wait);
- acomp_request_set_src_dma(req, in_buf, in_len);
- err = crypto_acomp_decompress(req);
- err = crypto_wait_req(err, &wait);
- *out_len = req->dlen;
+ do {
+ ACOMP_REQUEST_ON_STACK(req, compr->cc);
+ DECLARE_CRYPTO_WAIT(wait);
+ acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &wait);
+ acomp_request_set_src_dma(req, in_buf, in_len);
+ if (out_folio)
+ acomp_request_set_dst_folio(req, out_ptr, out_offset,
+ dlen);
+ else
+ acomp_request_set_dst_dma(req, out_ptr, dlen);
+ err = crypto_acomp_decompress(req);
+ dlen = req->dlen;
+ if (err != -EAGAIN)
+ break;
+
+ req = ACOMP_REQUEST_CLONE(req, GFP_NOFS | __GFP_NOWARN);
+ err = crypto_acomp_decompress(req);
+ err = crypto_wait_req(err, &wait);
+ dlen = req->dlen;
+ acomp_request_free(req);
+ } while (0);
+
+ *out_len = dlen;
if (err)
ubifs_err(c, "cannot decompress %d bytes, compressor %s, error %d",
- in_len, compr_name, err);
-
- acomp_request_free(req);
+ in_len, compr->name, err);
return err;
}
@@ -235,33 +267,8 @@ static int ubifs_decompress_req(const struct ubifs_info *c,
int ubifs_decompress(const struct ubifs_info *c, const void *in_buf,
int in_len, void *out_buf, int *out_len, int compr_type)
{
- struct ubifs_compressor *compr;
-
- if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) {
- ubifs_err(c, "invalid compression type %d", compr_type);
- return -EINVAL;
- }
-
- compr = ubifs_compressors[compr_type];
-
- if (unlikely(!compr->capi_name)) {
- ubifs_err(c, "%s compression is not compiled in", compr->name);
- return -EINVAL;
- }
-
- if (compr_type == UBIFS_COMPR_NONE) {
- memcpy(out_buf, in_buf, in_len);
- *out_len = in_len;
- return 0;
- }
-
- {
- ACOMP_REQUEST_ALLOC(req, compr->cc, GFP_NOFS | __GFP_NOWARN);
-
- acomp_request_set_dst_dma(req, out_buf, *out_len);
- return ubifs_decompress_req(c, req, in_buf, in_len, out_len,
- compr->name);
- }
+ return ubifs_decompress_common(c, in_buf, in_len, out_buf, 0, out_len,
+ false, compr_type);
}
/**
@@ -283,34 +290,8 @@ int ubifs_decompress_folio(const struct ubifs_info *c, const void *in_buf,
int in_len, struct folio *out_folio,
size_t out_offset, int *out_len, int compr_type)
{
- struct ubifs_compressor *compr;
-
- if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) {
- ubifs_err(c, "invalid compression type %d", compr_type);
- return -EINVAL;
- }
-
- compr = ubifs_compressors[compr_type];
-
- if (unlikely(!compr->capi_name)) {
- ubifs_err(c, "%s compression is not compiled in", compr->name);
- return -EINVAL;
- }
-
- if (compr_type == UBIFS_COMPR_NONE) {
- memcpy_to_folio(out_folio, out_offset, in_buf, in_len);
- *out_len = in_len;
- return 0;
- }
-
- {
- ACOMP_REQUEST_ALLOC(req, compr->cc, GFP_NOFS | __GFP_NOWARN);
-
- acomp_request_set_dst_folio(req, out_folio, out_offset,
- *out_len);
- return ubifs_decompress_req(c, req, in_buf, in_len, out_len,
- compr->name);
- }
+ return ubifs_decompress_common(c, in_buf, in_len, out_folio,
+ out_offset, out_len, true, compr_type);
}
/**
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 4f33a4a48886..b4071c9cf8c9 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -115,7 +115,7 @@ void udf_truncate_tail_extent(struct inode *inode)
}
/* This inode entry is in-memory only and thus we don't have to mark
* the inode dirty */
- if (ret == 0)
+ if (ret >= 0)
iinfo->i_lenExtents = inode->i_size;
brelse(epos.bh);
}
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
index b780deb81b02..b492794f8e9a 100644
--- a/fs/vboxsf/file.c
+++ b/fs/vboxsf/file.c
@@ -262,40 +262,42 @@ static struct vboxsf_handle *vboxsf_get_write_handle(struct vboxsf_inode *sf_i)
return sf_handle;
}
-static int vboxsf_writepage(struct page *page, struct writeback_control *wbc)
+static int vboxsf_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = mapping->host;
+ struct folio *folio = NULL;
struct vboxsf_inode *sf_i = VBOXSF_I(inode);
struct vboxsf_handle *sf_handle;
- loff_t off = page_offset(page);
loff_t size = i_size_read(inode);
- u32 nwrite = PAGE_SIZE;
- u8 *buf;
- int err;
-
- if (off + PAGE_SIZE > size)
- nwrite = size & ~PAGE_MASK;
+ int error;
sf_handle = vboxsf_get_write_handle(sf_i);
if (!sf_handle)
return -EBADF;
- buf = kmap(page);
- err = vboxsf_write(sf_handle->root, sf_handle->handle,
- off, &nwrite, buf);
- kunmap(page);
+ while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
+ loff_t off = folio_pos(folio);
+ u32 nwrite = folio_size(folio);
+ u8 *buf;
- kref_put(&sf_handle->refcount, vboxsf_handle_release);
+ if (nwrite > size - off)
+ nwrite = size - off;
- if (err == 0) {
- /* mtime changed */
- sf_i->force_restat = 1;
- } else {
- ClearPageUptodate(page);
+ buf = kmap_local_folio(folio, 0);
+ error = vboxsf_write(sf_handle->root, sf_handle->handle,
+ off, &nwrite, buf);
+ kunmap_local(buf);
+
+ folio_unlock(folio);
}
- unlock_page(page);
- return err;
+ kref_put(&sf_handle->refcount, vboxsf_handle_release);
+
+ /* mtime changed */
+ if (error == 0)
+ sf_i->force_restat = 1;
+ return error;
}
static int vboxsf_write_end(struct file *file, struct address_space *mapping,
@@ -347,10 +349,11 @@ out:
*/
const struct address_space_operations vboxsf_reg_aops = {
.read_folio = vboxsf_read_folio,
- .writepage = vboxsf_writepage,
+ .writepages = vboxsf_writepages,
.dirty_folio = filemap_dirty_folio,
.write_begin = simple_write_begin,
.write_end = vboxsf_write_end,
+ .migrate_folio = filemap_migrate_folio,
};
static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode,
diff --git a/fs/xattr.c b/fs/xattr.c
index fabb2a04501e..8ec5b0204bfd 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1428,6 +1428,15 @@ static bool xattr_is_trusted(const char *name)
return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
}
+static bool xattr_is_maclabel(const char *name)
+{
+ const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+
+ return !strncmp(name, XATTR_SECURITY_PREFIX,
+ XATTR_SECURITY_PREFIX_LEN) &&
+ security_ismaclabel(suffix);
+}
+
/**
* simple_xattr_list - list all xattr objects
* @inode: inode from which to get the xattrs
@@ -1460,6 +1469,17 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
if (err)
return err;
+ err = security_inode_listsecurity(inode, buffer, remaining_size);
+ if (err < 0)
+ return err;
+
+ if (buffer) {
+ if (remaining_size < err)
+ return -ERANGE;
+ buffer += err;
+ }
+ remaining_size -= err;
+
read_lock(&xattrs->lock);
for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
xattr = rb_entry(rbp, struct simple_xattr, rb_node);
@@ -1468,6 +1488,10 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
if (!trusted && xattr_is_trusted(xattr->name))
continue;
+ /* skip MAC labels; these are provided by LSM above */
+ if (xattr_is_maclabel(xattr->name))
+ continue;
+
err = xattr_list_one(&buffer, &remaining_size, xattr->name);
if (err)
break;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 63255820b58a..d954f9b8071f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3312,6 +3312,11 @@ xfs_bmap_compute_alignments(
align = xfs_get_cowextsz_hint(ap->ip);
else if (ap->datatype & XFS_ALLOC_USERDATA)
align = xfs_get_extsz_hint(ap->ip);
+
+ /* Try to align start block to any minimum allocation alignment */
+ if (align > 1 && (ap->flags & XFS_BMAPI_EXTSZALIGN))
+ args->alignment = align;
+
if (align) {
if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
ap->eof, 0, ap->conv, &ap->offset,
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index b4d9c6e0f3f9..d5f2729305fa 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -87,6 +87,9 @@ struct xfs_bmalloca {
/* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */
#define XFS_BMAPI_NORMAP (1u << 10)
+/* Try to align allocations to the extent size hint */
+#define XFS_BMAPI_EXTSZALIGN (1u << 11)
+
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
@@ -98,7 +101,8 @@ struct xfs_bmalloca {
{ XFS_BMAPI_REMAP, "REMAP" }, \
{ XFS_BMAPI_COWFORK, "COWFORK" }, \
{ XFS_BMAPI_NODISCARD, "NODISCARD" }, \
- { XFS_BMAPI_NORMAP, "NORMAP" }
+ { XFS_BMAPI_NORMAP, "NORMAP" },\
+ { XFS_BMAPI_EXTSZALIGN, "EXTSZALIGN" }
static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index d3bd6a86c8fe..34bba96d30ca 100644
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -91,6 +91,7 @@ xfs_log_calc_trans_resv_for_minlogblocks(
*/
if (xfs_want_minlogsize_fixes(&mp->m_sb)) {
xfs_trans_resv_calc(mp, resv);
+ resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend;
return;
}
@@ -107,6 +108,9 @@ xfs_log_calc_trans_resv_for_minlogblocks(
xfs_trans_resv_calc(mp, resv);
+ /* Copy the dynamic transaction reservation types from the running fs */
+ resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend;
+
if (xfs_has_reflink(mp)) {
/*
* In the early days of reflink, typical log operation counts
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 13d00c7166e1..86a111d0f2fc 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -22,6 +22,12 @@
#include "xfs_rtbitmap.h"
#include "xfs_attr_item.h"
#include "xfs_log.h"
+#include "xfs_defer.h"
+#include "xfs_bmap_item.h"
+#include "xfs_extfree_item.h"
+#include "xfs_rmap_item.h"
+#include "xfs_refcount_item.h"
+#include "xfs_trace.h"
#define _ALLOC true
#define _FREE false
@@ -264,6 +270,42 @@ xfs_rtalloc_block_count(
*/
/*
+ * Finishing a data device refcount updates (t1):
+ * the agfs of the ags containing the blocks: nr_ops * sector size
+ * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_cui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr_ops)
+{
+ if (!xfs_has_reflink(mp))
+ return 0;
+
+ return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Realtime refcount updates (t2);
+ * the rt refcount inode
+ * the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_rt_cui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr_ops)
+{
+ if (!xfs_has_rtreflink(mp))
+ return 0;
+
+ return xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
* Compute the log reservation required to handle the refcount update
* transaction. Refcount updates are always done via deferred log items.
*
@@ -280,19 +322,10 @@ xfs_calc_refcountbt_reservation(
struct xfs_mount *mp,
unsigned int nr_ops)
{
- unsigned int blksz = XFS_FSB_TO_B(mp, 1);
- unsigned int t1, t2 = 0;
+ unsigned int t1, t2;
- if (!xfs_has_reflink(mp))
- return 0;
-
- t1 = xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops), blksz);
-
- if (xfs_has_realtime(mp))
- t2 = xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops),
- blksz);
+ t1 = xfs_calc_finish_cui_reservation(mp, nr_ops);
+ t2 = xfs_calc_finish_rt_cui_reservation(mp, nr_ops);
return max(t1, t2);
}
@@ -380,6 +413,96 @@ xfs_calc_write_reservation_minlogsize(
}
/*
+ * Finishing an EFI can free the blocks and bmap blocks (t2):
+ * the agf for each of the ags: nr * sector size
+ * the agfl for each of the ags: nr * sector size
+ * the super block to reflect the freed blocks: sector size
+ * worst case split in allocation btrees per extent assuming nr extents:
+ * nr exts * 2 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_efi_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Or, if it's a realtime file (t3):
+ * the agf for each of the ags: 2 * sector size
+ * the agfl for each of the ags: 2 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * the realtime bitmap:
+ * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
+ * the realtime summary: 2 exts * 1 block
+ * worst case split in allocation btrees per extent assuming 2 extents:
+ * 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_rt_efi_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ if (!xfs_has_realtime(mp))
+ return 0;
+
+ return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_rtalloc_block_count(mp, nr),
+ mp->m_sb.sb_blocksize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Finishing an RUI is the same as an EFI. We can split the rmap btree twice
+ * on each end of the record, and that can cause the AGFL to be refilled or
+ * emptied out.
+ */
+inline unsigned int
+xfs_calc_finish_rui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ if (!xfs_has_rmapbt(mp))
+ return 0;
+ return xfs_calc_finish_efi_reservation(mp, nr);
+}
+
+/*
+ * Finishing an RUI is the same as an EFI. We can split the rmap btree twice
+ * on each end of the record, and that can cause the AGFL to be refilled or
+ * emptied out.
+ */
+inline unsigned int
+xfs_calc_finish_rt_rui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ if (!xfs_has_rtrmapbt(mp))
+ return 0;
+ return xfs_calc_finish_rt_efi_reservation(mp, nr);
+}
+
+/*
+ * In finishing a BUI, we can modify:
+ * the inode being truncated: inode size
+ * dquots
+ * the inode's bmap btree: (max depth + 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_bui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ return xfs_calc_inode_res(mp, 1) + XFS_DQUOT_LOGRES +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
+ mp->m_sb.sb_blocksize);
+}
+
+/*
* In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size
@@ -411,16 +534,8 @@ xfs_calc_itruncate_reservation(
t1 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
- t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), blksz);
-
- if (xfs_has_realtime(mp)) {
- t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 2), blksz) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
- } else {
- t3 = 0;
- }
+ t2 = xfs_calc_finish_efi_reservation(mp, 4);
+ t3 = xfs_calc_finish_rt_efi_reservation(mp, 2);
/*
* In the early days of reflink, we included enough reservation to log
@@ -501,9 +616,7 @@ xfs_calc_rename_reservation(
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1));
- t2 = xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
- XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_finish_efi_reservation(mp, 3);
if (xfs_has_parent(mp)) {
unsigned int rename_overhead, exchange_overhead;
@@ -611,9 +724,7 @@ xfs_calc_link_reservation(
overhead += xfs_calc_iunlink_remove_reservation(mp);
t1 = xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
- t2 = xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
- XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_finish_efi_reservation(mp, 1);
if (xfs_has_parent(mp)) {
t3 = resp->tr_attrsetm.tr_logres;
@@ -676,9 +787,7 @@ xfs_calc_remove_reservation(
t1 = xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
- t2 = xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
- XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_finish_efi_reservation(mp, 2);
if (xfs_has_parent(mp)) {
t3 = resp->tr_attrrm.tr_logres;
@@ -1181,6 +1290,15 @@ xfs_calc_namespace_reservations(
resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
}
+STATIC void
+xfs_calc_default_atomic_ioend_reservation(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ /* Pick a default that will scale reasonably for the log size. */
+ resp->tr_atomic_ioend = resp->tr_itruncate;
+}
+
void
xfs_trans_resv_calc(
struct xfs_mount *mp,
@@ -1275,4 +1393,167 @@ xfs_trans_resv_calc(
resp->tr_itruncate.tr_logcount += logcount_adj;
resp->tr_write.tr_logcount += logcount_adj;
resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
+
+ /*
+ * Now that we've finished computing the static reservations, we can
+ * compute the dynamic reservation for atomic writes.
+ */
+ xfs_calc_default_atomic_ioend_reservation(mp, resp);
+}
+
+/*
+ * Return the per-extent and fixed transaction reservation sizes needed to
+ * complete an atomic write.
+ */
+STATIC unsigned int
+xfs_calc_atomic_write_ioend_geometry(
+ struct xfs_mount *mp,
+ unsigned int *step_size)
+{
+ const unsigned int efi = xfs_efi_log_space(1);
+ const unsigned int efd = xfs_efd_log_space(1);
+ const unsigned int rui = xfs_rui_log_space(1);
+ const unsigned int rud = xfs_rud_log_space();
+ const unsigned int cui = xfs_cui_log_space(1);
+ const unsigned int cud = xfs_cud_log_space();
+ const unsigned int bui = xfs_bui_log_space(1);
+ const unsigned int bud = xfs_bud_log_space();
+
+ /*
+ * Maximum overhead to complete an atomic write ioend in software:
+ * remove data fork extent + remove cow fork extent + map extent into
+ * data fork.
+ *
+ * tx0: Creates a BUI and a CUI and that's all it needs.
+ *
+ * tx1: Roll to finish the BUI. Need space for the BUD, an RUI, and
+ * enough space to relog the CUI (== CUI + CUD).
+ *
+ * tx2: Roll again to finish the RUI. Need space for the RUD and space
+ * to relog the CUI.
+ *
+ * tx3: Roll again, need space for the CUD and possibly a new EFI.
+ *
+ * tx4: Roll again, need space for an EFD.
+ *
+ * If the extent referenced by the pair of BUI/CUI items is not the one
+ * being currently processed, then we need to reserve space to relog
+ * both items.
+ */
+ const unsigned int tx0 = bui + cui;
+ const unsigned int tx1 = bud + rui + cui + cud;
+ const unsigned int tx2 = rud + cui + cud;
+ const unsigned int tx3 = cud + efi;
+ const unsigned int tx4 = efd;
+ const unsigned int relog = bui + bud + cui + cud;
+
+ const unsigned int per_intent = max(max3(tx0, tx1, tx2),
+ max3(tx3, tx4, relog));
+
+ /* Overhead to finish one step of each intent item type */
+ const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
+ const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
+ const unsigned int f3 = xfs_calc_finish_cui_reservation(mp, 1);
+ const unsigned int f4 = xfs_calc_finish_bui_reservation(mp, 1);
+
+ /* We only finish one item per transaction in a chain */
+ *step_size = max(f4, max3(f1, f2, f3));
+
+ return per_intent;
+}
+
+/*
+ * Compute the maximum size (in fsblocks) of atomic writes that we can complete
+ * given the existing log reservations.
+ */
+xfs_extlen_t
+xfs_calc_max_atomic_write_fsblocks(
+ struct xfs_mount *mp)
+{
+ const struct xfs_trans_res *resv = &M_RES(mp)->tr_atomic_ioend;
+ unsigned int per_intent = 0;
+ unsigned int step_size = 0;
+ unsigned int ret = 0;
+
+ if (resv->tr_logres > 0) {
+ per_intent = xfs_calc_atomic_write_ioend_geometry(mp,
+ &step_size);
+
+ if (resv->tr_logres >= step_size)
+ ret = (resv->tr_logres - step_size) / per_intent;
+ }
+
+ trace_xfs_calc_max_atomic_write_fsblocks(mp, per_intent, step_size,
+ resv->tr_logres, ret);
+
+ return ret;
+}
+
+/*
+ * Compute the log blocks and transaction reservation needed to complete an
+ * atomic write of a given number of blocks. Worst case, each block requires
+ * separate handling. A return value of 0 means something went wrong.
+ */
+xfs_extlen_t
+xfs_calc_atomic_write_log_geometry(
+ struct xfs_mount *mp,
+ xfs_extlen_t blockcount,
+ unsigned int *new_logres)
+{
+ struct xfs_trans_res *curr_res = &M_RES(mp)->tr_atomic_ioend;
+ uint old_logres = curr_res->tr_logres;
+ unsigned int per_intent, step_size;
+ unsigned int logres;
+ xfs_extlen_t min_logblocks;
+
+ ASSERT(blockcount > 0);
+
+ xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
+
+ per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size);
+
+ /* Check for overflows */
+ if (check_mul_overflow(blockcount, per_intent, &logres) ||
+ check_add_overflow(logres, step_size, &logres))
+ return 0;
+
+ curr_res->tr_logres = logres;
+ min_logblocks = xfs_log_calc_minimum_size(mp);
+ curr_res->tr_logres = old_logres;
+
+ trace_xfs_calc_max_atomic_write_log_geometry(mp, per_intent, step_size,
+ blockcount, min_logblocks, logres);
+
+ *new_logres = logres;
+ return min_logblocks;
+}
+
+/*
+ * Compute the transaction reservation needed to complete an out of place
+ * atomic write of a given number of blocks.
+ */
+int
+xfs_calc_atomic_write_reservation(
+ struct xfs_mount *mp,
+ xfs_extlen_t blockcount)
+{
+ unsigned int new_logres;
+ xfs_extlen_t min_logblocks;
+
+ /*
+ * If the caller doesn't ask for a specific atomic write size, then
+ * use the defaults.
+ */
+ if (blockcount == 0) {
+ xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
+ return 0;
+ }
+
+ min_logblocks = xfs_calc_atomic_write_log_geometry(mp, blockcount,
+ &new_logres);
+ if (!min_logblocks || min_logblocks > mp->m_sb.sb_logblocks)
+ return -EINVAL;
+
+ M_RES(mp)->tr_atomic_ioend.tr_logres = new_logres;
+ return 0;
}
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
index 0554b9d775d2..336279e0fc61 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
@@ -48,6 +48,7 @@ struct xfs_trans_resv {
struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */
struct xfs_trans_res tr_sb; /* modify superblock */
struct xfs_trans_res tr_fsyncts; /* update timestamps on fsync */
+ struct xfs_trans_res tr_atomic_ioend; /* untorn write completion */
};
/* shorthand way of accessing reservation structure */
@@ -98,8 +99,32 @@ struct xfs_trans_resv {
void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp);
uint xfs_allocfree_block_count(struct xfs_mount *mp, uint num_ops);
+unsigned int xfs_calc_finish_bui_reservation(struct xfs_mount *mp,
+ unsigned int nr_ops);
+
+unsigned int xfs_calc_finish_efi_reservation(struct xfs_mount *mp,
+ unsigned int nr_ops);
+unsigned int xfs_calc_finish_rt_efi_reservation(struct xfs_mount *mp,
+ unsigned int nr_ops);
+
+unsigned int xfs_calc_finish_rui_reservation(struct xfs_mount *mp,
+ unsigned int nr_ops);
+unsigned int xfs_calc_finish_rt_rui_reservation(struct xfs_mount *mp,
+ unsigned int nr_ops);
+
+unsigned int xfs_calc_finish_cui_reservation(struct xfs_mount *mp,
+ unsigned int nr_ops);
+unsigned int xfs_calc_finish_rt_cui_reservation(struct xfs_mount *mp,
+ unsigned int nr_ops);
+
unsigned int xfs_calc_itruncate_reservation_minlogsize(struct xfs_mount *mp);
unsigned int xfs_calc_write_reservation_minlogsize(struct xfs_mount *mp);
unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount *mp);
+xfs_extlen_t xfs_calc_max_atomic_write_fsblocks(struct xfs_mount *mp);
+xfs_extlen_t xfs_calc_atomic_write_log_geometry(struct xfs_mount *mp,
+ xfs_extlen_t blockcount, unsigned int *new_logres);
+int xfs_calc_atomic_write_reservation(struct xfs_mount *mp,
+ xfs_extlen_t blockcount);
+
#endif /* __XFS_TRANS_RESV_H__ */
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index e629663e460a..9b598c5790ad 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -123,7 +123,7 @@ xchk_fsfreeze(
{
int error;
- error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+ error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL, NULL);
trace_xchk_fsfreeze(sc, error);
return error;
}
@@ -135,7 +135,7 @@ xchk_fsthaw(
int error;
/* This should always succeed, we have a kernel freeze */
- error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+ error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL, NULL);
trace_xchk_fsthaw(sc, error);
return error;
}
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
index 3537f3cca6d5..9c12cb844231 100644
--- a/fs/xfs/scrub/orphanage.c
+++ b/fs/xfs/scrub/orphanage.c
@@ -153,8 +153,7 @@ xrep_orphanage_create(
/* Try to find the orphanage directory. */
inode_lock_nested(root_inode, I_MUTEX_PARENT);
- orphanage_dentry = lookup_one_len(ORPHANAGE, root_dentry,
- strlen(ORPHANAGE));
+ orphanage_dentry = lookup_noperm(&QSTR(ORPHANAGE), root_dentry);
if (IS_ERR(orphanage_dentry)) {
error = PTR_ERR(orphanage_dentry);
goto out_unlock_root;
@@ -445,7 +444,7 @@ xrep_adoption_check_dcache(
if (!d_orphanage)
return 0;
- d_child = d_hash_and_lookup(d_orphanage, &qname);
+ d_child = try_lookup_noperm(&qname, d_orphanage);
if (d_child) {
trace_xrep_adoption_check_child(sc->mp, d_child);
@@ -482,7 +481,7 @@ xrep_adoption_zap_dcache(
if (!d_orphanage)
return;
- d_child = d_hash_and_lookup(d_orphanage, &qname);
+ d_child = try_lookup_noperm(&qname, d_orphanage);
while (d_child != NULL) {
trace_xrep_adoption_invalidate_child(sc->mp, d_child);
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 9908850bf76f..76e24032e99a 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -680,8 +680,6 @@ xfs_scrub_metadata(
if (error)
goto out;
- xfs_warn_experimental(mp, XFS_EXPERIMENTAL_SCRUB);
-
sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
if (!sc) {
error = -ENOMEM;
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
index fe21c76f75b8..2a736d10eafb 100644
--- a/fs/xfs/xfs_bio_io.c
+++ b/fs/xfs/xfs_bio_io.c
@@ -18,42 +18,36 @@ xfs_rw_bdev(
enum req_op op)
{
- unsigned int is_vmalloc = is_vmalloc_addr(data);
- unsigned int left = count;
+ unsigned int done = 0, added;
int error;
struct bio *bio;
- if (is_vmalloc && op == REQ_OP_WRITE)
- flush_kernel_vmap_range(data, count);
+ op |= REQ_META | REQ_SYNC;
+ if (!is_vmalloc_addr(data))
+ return bdev_rw_virt(bdev, sector, data, count, op);
- bio = bio_alloc(bdev, bio_max_vecs(left), op | REQ_META | REQ_SYNC,
- GFP_KERNEL);
+ bio = bio_alloc(bdev, bio_max_vecs(count), op, GFP_KERNEL);
bio->bi_iter.bi_sector = sector;
do {
- struct page *page = kmem_to_page(data);
- unsigned int off = offset_in_page(data);
- unsigned int len = min_t(unsigned, left, PAGE_SIZE - off);
-
- while (bio_add_page(bio, page, len, off) != len) {
+ added = bio_add_vmalloc_chunk(bio, data + done, count - done);
+ if (!added) {
struct bio *prev = bio;
- bio = bio_alloc(prev->bi_bdev, bio_max_vecs(left),
+ bio = bio_alloc(prev->bi_bdev,
+ bio_max_vecs(count - done),
prev->bi_opf, GFP_KERNEL);
bio->bi_iter.bi_sector = bio_end_sector(prev);
bio_chain(prev, bio);
-
submit_bio(prev);
}
-
- data += len;
- left -= len;
- } while (left > 0);
+ done += added;
+ } while (done < count);
error = submit_bio_wait(bio);
bio_put(bio);
- if (is_vmalloc && op == REQ_OP_READ)
+ if (op == REQ_OP_READ)
invalidate_kernel_vmap_range(data, count);
return error;
}
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 3d52e9d7ad57..646c515ee355 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -77,6 +77,11 @@ xfs_bui_item_size(
*nbytes += xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents);
}
+unsigned int xfs_bui_log_space(unsigned int nr)
+{
+ return xlog_item_space(1, xfs_bui_log_format_sizeof(nr));
+}
+
/*
* This is called to fill in the vector of log iovecs for the
* given bui log item. We use only 1 iovec, and we point that
@@ -168,6 +173,11 @@ xfs_bud_item_size(
*nbytes += sizeof(struct xfs_bud_log_format);
}
+unsigned int xfs_bud_log_space(void)
+{
+ return xlog_item_space(1, sizeof(struct xfs_bud_log_format));
+}
+
/*
* This is called to fill in the vector of log iovecs for the
* given bud log item. We use only 1 iovec, and we point that
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index 6fee6a508343..b42fee06899d 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -72,4 +72,7 @@ struct xfs_bmap_intent;
void xfs_bmap_defer_add(struct xfs_trans *tp, struct xfs_bmap_intent *bi);
+unsigned int xfs_bui_log_space(unsigned int nr);
+unsigned int xfs_bud_log_space(void);
+
#endif /* __XFS_BMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 1a2b3f06fa71..8af83bd161f9 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1333,45 +1333,18 @@ static void
xfs_buf_submit_bio(
struct xfs_buf *bp)
{
+ unsigned int len = BBTOB(bp->b_length);
+ unsigned int nr_vecs = bio_add_max_vecs(bp->b_addr, len);
unsigned int map = 0;
struct blk_plug plug;
struct bio *bio;
- if (is_vmalloc_addr(bp->b_addr)) {
- unsigned int size = BBTOB(bp->b_length);
- unsigned int alloc_size = roundup(size, PAGE_SIZE);
- void *data = bp->b_addr;
-
- bio = bio_alloc(bp->b_target->bt_bdev, alloc_size >> PAGE_SHIFT,
- xfs_buf_bio_op(bp), GFP_NOIO);
-
- do {
- unsigned int len = min(size, PAGE_SIZE);
-
- ASSERT(offset_in_page(data) == 0);
- __bio_add_page(bio, vmalloc_to_page(data), len, 0);
- data += len;
- size -= len;
- } while (size);
-
- flush_kernel_vmap_range(bp->b_addr, alloc_size);
- } else {
- /*
- * Single folio or slab allocation. Must be contiguous and thus
- * only a single bvec is needed.
- *
- * This uses the page based bio add helper for now as that is
- * the lowest common denominator between folios and slab
- * allocations. To be replaced with a better block layer
- * helper soon (hopefully).
- */
- bio = bio_alloc(bp->b_target->bt_bdev, 1, xfs_buf_bio_op(bp),
- GFP_NOIO);
- __bio_add_page(bio, virt_to_page(bp->b_addr),
- BBTOB(bp->b_length),
- offset_in_page(bp->b_addr));
- }
-
+ bio = bio_alloc(bp->b_target->bt_bdev, nr_vecs, xfs_buf_bio_op(bp),
+ GFP_NOIO);
+ if (is_vmalloc_addr(bp->b_addr))
+ bio_add_vmalloc(bio, bp->b_addr, len);
+ else
+ bio_add_virt_nofail(bio, bp->b_addr, len);
bio->bi_private = bp;
bio->bi_end_io = xfs_buf_bio_end_io;
@@ -1714,23 +1687,65 @@ xfs_free_buftarg(
kfree(btp);
}
+/*
+ * Configure this buffer target for hardware-assisted atomic writes if the
+ * underlying block device supports is congruent with the filesystem geometry.
+ */
+static inline void
+xfs_configure_buftarg_atomic_writes(
+ struct xfs_buftarg *btp)
+{
+ struct xfs_mount *mp = btp->bt_mount;
+ unsigned int min_bytes, max_bytes;
+
+ min_bytes = bdev_atomic_write_unit_min_bytes(btp->bt_bdev);
+ max_bytes = bdev_atomic_write_unit_max_bytes(btp->bt_bdev);
+
+ /*
+ * Ignore atomic write geometry that is nonsense or doesn't even cover
+ * a single fsblock.
+ */
+ if (min_bytes > max_bytes ||
+ min_bytes > mp->m_sb.sb_blocksize ||
+ max_bytes < mp->m_sb.sb_blocksize) {
+ min_bytes = 0;
+ max_bytes = 0;
+ }
+
+ btp->bt_bdev_awu_min = min_bytes;
+ btp->bt_bdev_awu_max = max_bytes;
+}
+
+/* Configure a buffer target that abstracts a block device. */
int
-xfs_setsize_buftarg(
+xfs_configure_buftarg(
struct xfs_buftarg *btp,
unsigned int sectorsize)
{
+ int error;
+
+ ASSERT(btp->bt_bdev != NULL);
+
/* Set up metadata sector size info */
btp->bt_meta_sectorsize = sectorsize;
btp->bt_meta_sectormask = sectorsize - 1;
- if (set_blocksize(btp->bt_bdev_file, sectorsize)) {
+ error = bdev_validate_blocksize(btp->bt_bdev, sectorsize);
+ if (error) {
xfs_warn(btp->bt_mount,
- "Cannot set_blocksize to %u on device %pg",
- sectorsize, btp->bt_bdev);
+ "Cannot use blocksize %u on device %pg, err %d",
+ sectorsize, btp->bt_bdev, error);
return -EINVAL;
}
- return 0;
+ /*
+ * Flush the block device pagecache so our bios see anything dirtied
+ * before mount.
+ */
+ if (bdev_can_atomic_write(btp->bt_bdev))
+ xfs_configure_buftarg_atomic_writes(btp);
+
+ return sync_blockdev(btp->bt_bdev);
}
int
@@ -1779,6 +1794,8 @@ xfs_alloc_buftarg(
{
struct xfs_buftarg *btp;
const struct dax_holder_operations *ops = NULL;
+ int error;
+
#if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE)
ops = &xfs_dax_holder_operations;
@@ -1792,28 +1809,31 @@ xfs_alloc_buftarg(
btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
mp, ops);
- if (bdev_can_atomic_write(btp->bt_bdev)) {
- btp->bt_bdev_awu_min = bdev_atomic_write_unit_min_bytes(
- btp->bt_bdev);
- btp->bt_bdev_awu_max = bdev_atomic_write_unit_max_bytes(
- btp->bt_bdev);
- }
+ /*
+ * Flush and invalidate all devices' pagecaches before reading any
+ * metadata because XFS doesn't use the bdev pagecache.
+ */
+ error = sync_blockdev(btp->bt_bdev);
+ if (error)
+ goto error_free;
/*
* When allocating the buftargs we have not yet read the super block and
* thus don't know the file system sector size yet.
*/
- if (xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev)))
- goto error_free;
- if (xfs_init_buftarg(btp, bdev_logical_block_size(btp->bt_bdev),
- mp->m_super->s_id))
+ btp->bt_meta_sectorsize = bdev_logical_block_size(btp->bt_bdev);
+ btp->bt_meta_sectormask = btp->bt_meta_sectorsize - 1;
+
+ error = xfs_init_buftarg(btp, btp->bt_meta_sectorsize,
+ mp->m_super->s_id);
+ if (error)
goto error_free;
return btp;
error_free:
kfree(btp);
- return NULL;
+ return ERR_PTR(error);
}
static inline void
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d0b065a9a9f0..9d2ab567cf81 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -112,7 +112,7 @@ struct xfs_buftarg {
struct percpu_counter bt_readahead_count;
struct ratelimit_state bt_ioerror_rl;
- /* Atomic write unit values */
+ /* Atomic write unit values, bytes */
unsigned int bt_bdev_awu_min;
unsigned int bt_bdev_awu_max;
@@ -374,7 +374,7 @@ struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
extern void xfs_free_buftarg(struct xfs_buftarg *);
extern void xfs_buftarg_wait(struct xfs_buftarg *);
extern void xfs_buftarg_drain(struct xfs_buftarg *);
-extern int xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int);
+int xfs_configure_buftarg(struct xfs_buftarg *btp, unsigned int sectorsize);
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 19eb0b7a3e58..90139e0f3271 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -104,6 +104,25 @@ xfs_buf_item_size_segment(
}
/*
+ * Compute the worst case log item overhead for an invalidated buffer with the
+ * given map count and block size.
+ */
+unsigned int
+xfs_buf_inval_log_space(
+ unsigned int map_count,
+ unsigned int blocksize)
+{
+ unsigned int chunks = DIV_ROUND_UP(blocksize, XFS_BLF_CHUNK);
+ unsigned int bitmap_size = DIV_ROUND_UP(chunks, NBWORD);
+ unsigned int ret =
+ offsetof(struct xfs_buf_log_format, blf_data_map) +
+ (bitmap_size * sizeof_field(struct xfs_buf_log_format,
+ blf_data_map[0]));
+
+ return ret * map_count;
+}
+
+/*
* Return the number of log iovecs and space needed to log the given buf log
* item.
*
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 8cde85259a58..e10e324cd245 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -64,6 +64,9 @@ static inline void xfs_buf_dquot_iodone(struct xfs_buf *bp)
void xfs_buf_iodone(struct xfs_buf *);
bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
+unsigned int xfs_buf_inval_log_space(unsigned int map_count,
+ unsigned int blocksize);
+
extern struct kmem_cache *xfs_buf_item_cache;
#endif /* __XFS_BUF_ITEM_H__ */
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index c1a306268ae4..94d0873bcd62 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -167,6 +167,14 @@ xfs_discard_extents(
return error;
}
+/*
+ * Care must be taken setting up the trim cursor as the perags may not have been
+ * initialised when the cursor is initialised. e.g. a clean mount which hasn't
+ * read in AGFs and the first operation run on the mounted fs is a trim. This
+ * can result in perag fields that aren't initialised until
+ * xfs_trim_gather_extents() calls xfs_alloc_read_agf() to lock down the AG for
+ * the free space search.
+ */
struct xfs_trim_cur {
xfs_agblock_t start;
xfs_extlen_t count;
@@ -204,6 +212,14 @@ xfs_trim_gather_extents(
if (error)
goto out_trans_cancel;
+ /*
+ * First time through tcur->count will not have been initialised as
+ * pag->pagf_longest is not guaranteed to be valid before we read
+ * the AGF buffer above.
+ */
+ if (!tcur->count)
+ tcur->count = pag->pagf_longest;
+
if (tcur->by_bno) {
/* sub-AG discard request always starts at tcur->start */
cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag);
@@ -350,7 +366,6 @@ xfs_trim_perag_extents(
{
struct xfs_trim_cur tcur = {
.start = start,
- .count = pag->pagf_longest,
.end = end,
.minlen = minlen,
};
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 777438b853da..d574f5f639fa 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -83,6 +83,11 @@ xfs_efi_item_size(
*nbytes += xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents);
}
+unsigned int xfs_efi_log_space(unsigned int nr)
+{
+ return xlog_item_space(1, xfs_efi_log_format_sizeof(nr));
+}
+
/*
* This is called to fill in the vector of log iovecs for the
* given efi log item. We use only 1 iovec, and we point that
@@ -254,6 +259,11 @@ xfs_efd_item_size(
*nbytes += xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents);
}
+unsigned int xfs_efd_log_space(unsigned int nr)
+{
+ return xlog_item_space(1, xfs_efd_log_format_sizeof(nr));
+}
+
/*
* This is called to fill in the vector of log iovecs for the
* given efd log item. We use only 1 iovec, and we point that
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 41b7c4306079..c8402040410b 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -94,4 +94,7 @@ void xfs_extent_free_defer_add(struct xfs_trans *tp,
struct xfs_extent_free_item *xefi,
struct xfs_defer_pending **dfpp);
+unsigned int xfs_efi_log_space(unsigned int nr);
+unsigned int xfs_efd_log_space(unsigned int nr);
+
#endif /* __XFS_EXTFREE_ITEM_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 84f08c976ac4..48254a72071b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -576,7 +576,10 @@ xfs_dio_write_end_io(
nofs_flag = memalloc_nofs_save();
if (flags & IOMAP_DIO_COW) {
- error = xfs_reflink_end_cow(ip, offset, size);
+ if (iocb->ki_flags & IOCB_ATOMIC)
+ error = xfs_reflink_end_atomic_cow(ip, offset, size);
+ else
+ error = xfs_reflink_end_cow(ip, offset, size);
if (error)
goto out;
}
@@ -726,6 +729,72 @@ xfs_file_dio_write_zoned(
}
/*
+ * Handle block atomic writes
+ *
+ * Two methods of atomic writes are supported:
+ * - REQ_ATOMIC-based, which would typically use some form of HW offload in the
+ * disk
+ * - COW-based, which uses a COW fork as a staging extent for data updates
+ * before atomically updating extent mappings for the range being written
+ *
+ */
+static noinline ssize_t
+xfs_file_dio_write_atomic(
+ struct xfs_inode *ip,
+ struct kiocb *iocb,
+ struct iov_iter *from)
+{
+ unsigned int iolock = XFS_IOLOCK_SHARED;
+ ssize_t ret, ocount = iov_iter_count(from);
+ const struct iomap_ops *dops;
+
+ /*
+ * HW offload should be faster, so try that first if it is already
+ * known that the write length is not too large.
+ */
+ if (ocount > xfs_inode_buftarg(ip)->bt_bdev_awu_max)
+ dops = &xfs_atomic_write_cow_iomap_ops;
+ else
+ dops = &xfs_direct_write_iomap_ops;
+
+retry:
+ ret = xfs_ilock_iocb_for_write(iocb, &iolock);
+ if (ret)
+ return ret;
+
+ ret = xfs_file_write_checks(iocb, from, &iolock, NULL);
+ if (ret)
+ goto out_unlock;
+
+ /* Demote similar to xfs_file_dio_write_aligned() */
+ if (iolock == XFS_IOLOCK_EXCL) {
+ xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
+ iolock = XFS_IOLOCK_SHARED;
+ }
+
+ trace_xfs_file_direct_write(iocb, from);
+ ret = iomap_dio_rw(iocb, from, dops, &xfs_dio_write_ops,
+ 0, NULL, 0);
+
+ /*
+ * The retry mechanism is based on the ->iomap_begin method returning
+ * -ENOPROTOOPT, which would be when the REQ_ATOMIC-based write is not
+ * possible. The REQ_ATOMIC-based method typically not be possible if
+ * the write spans multiple extents or the disk blocks are misaligned.
+ */
+ if (ret == -ENOPROTOOPT && dops == &xfs_direct_write_iomap_ops) {
+ xfs_iunlock(ip, iolock);
+ dops = &xfs_atomic_write_cow_iomap_ops;
+ goto retry;
+ }
+
+out_unlock:
+ if (iolock)
+ xfs_iunlock(ip, iolock);
+ return ret;
+}
+
+/*
* Handle block unaligned direct I/O writes
*
* In most cases direct I/O writes will be done holding IOLOCK_SHARED, allowing
@@ -840,6 +909,8 @@ xfs_file_dio_write(
return xfs_file_dio_write_unaligned(ip, iocb, from);
if (xfs_is_zoned_inode(ip))
return xfs_file_dio_write_zoned(ip, iocb, from);
+ if (iocb->ki_flags & IOCB_ATOMIC)
+ return xfs_file_dio_write_atomic(ip, iocb, from);
return xfs_file_dio_write_aligned(ip, iocb, from,
&xfs_direct_write_iomap_ops, &xfs_dio_write_ops, NULL);
}
@@ -1032,14 +1103,12 @@ xfs_file_write_iter(
return xfs_file_dax_write(iocb, from);
if (iocb->ki_flags & IOCB_ATOMIC) {
- /*
- * Currently only atomic writing of a single FS block is
- * supported. It would be possible to atomic write smaller than
- * a FS block, but there is no requirement to support this.
- * Note that iomap also does not support this yet.
- */
- if (ocount != ip->i_mount->m_sb.sb_blocksize)
+ if (ocount < xfs_get_atomic_write_min(ip))
return -EINVAL;
+
+ if (ocount > xfs_get_atomic_write_max(ip))
+ return -EINVAL;
+
ret = generic_atomic_write_valid(iocb, from);
if (ret)
return ret;
@@ -1488,7 +1557,7 @@ xfs_file_open(
if (xfs_is_shutdown(XFS_M(inode->i_sb)))
return -EIO;
file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
- if (xfs_inode_can_atomicwrite(XFS_I(inode)))
+ if (xfs_get_atomic_write_min(XFS_I(inode)) > 0)
file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
return generic_file_open(inode, file);
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index a961aa420c48..044918fbae06 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -304,11 +304,9 @@ xfs_filestream_create_association(
* for us, so all we need to do here is take another active reference to
* the perag for the cached association.
*
- * If we fail to store the association, we need to drop the fstrms
- * counter as well as drop the perag reference we take here for the
- * item. We do not need to return an error for this failure - as long as
- * we return a referenced AG, the allocation can still go ahead just
- * fine.
+ * If we fail to store the association, we do not need to return an
+ * error for this failure - as long as we return a referenced AG, the
+ * allocation can still go ahead just fine.
*/
item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!item)
@@ -316,14 +314,9 @@ xfs_filestream_create_association(
atomic_inc(&pag_group(args->pag)->xg_active_ref);
item->pag = args->pag;
- error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru);
- if (error)
- goto out_free_item;
+ xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru);
return 0;
-out_free_item:
- xfs_perag_rele(item->pag);
- kfree(item);
out_put_fstrms:
atomic_dec(&args->pag->pagf_fstrms);
return 0;
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index f18fec0adf66..f6f628c01feb 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -23,8 +23,6 @@ xfs_param_t xfs_params = {
.inherit_sync = { 0, 1, 1 },
.inherit_nodump = { 0, 1, 1 },
.inherit_noatim = { 0, 1, 1 },
- .xfs_buf_timer = { 100/2, 1*100, 30*100 },
- .xfs_buf_age = { 1*100, 15*100, 7200*100},
.inherit_nosym = { 0, 0, 1 },
.rotorstep = { 1, 1, 255 },
.inherit_nodfrg = { 0, 1, 1 },
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index eae0159983ca..d7e2b902ef5c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -356,19 +356,9 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
(XFS_IS_REALTIME_INODE(ip) ? \
(ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
-static inline bool
-xfs_inode_can_atomicwrite(
- struct xfs_inode *ip)
+static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip)
{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_buftarg *target = xfs_inode_buftarg(ip);
-
- if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min)
- return false;
- if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max)
- return false;
-
- return true;
+ return xfs_inode_buftarg(ip)->bt_bdev_awu_max > 0;
}
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index cb23c8871f81..ff05e6b1b0bb 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -798,6 +798,38 @@ imap_spans_range(
return true;
}
+static bool
+xfs_bmap_hw_atomic_write_possible(
+ struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap,
+ xfs_fileoff_t offset_fsb,
+ xfs_fileoff_t end_fsb)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fsize_t len = XFS_FSB_TO_B(mp, end_fsb - offset_fsb);
+
+ /*
+ * atomic writes are required to be naturally aligned for disk blocks,
+ * which ensures that we adhere to block layer rules that we won't
+ * straddle any boundary or violate write alignment requirement.
+ */
+ if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount))
+ return false;
+
+ /*
+ * Spanning multiple extents would mean that multiple BIOs would be
+ * issued, and so would lose atomicity required for REQ_ATOMIC-based
+ * atomics.
+ */
+ if (!imap_spans_range(imap, offset_fsb, end_fsb))
+ return false;
+
+ /*
+ * The ->iomap_begin caller should ensure this, but check anyway.
+ */
+ return len <= xfs_inode_buftarg(ip)->bt_bdev_awu_max;
+}
+
static int
xfs_direct_write_iomap_begin(
struct inode *inode,
@@ -812,9 +844,11 @@ xfs_direct_write_iomap_begin(
struct xfs_bmbt_irec imap, cmap;
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
+ xfs_fileoff_t orig_end_fsb = end_fsb;
int nimaps = 1, error = 0;
bool shared = false;
u16 iomap_flags = 0;
+ bool needs_alloc;
unsigned int lockmode;
u64 seq;
@@ -875,13 +909,37 @@ relock:
(flags & IOMAP_DIRECT) || IS_DAX(inode));
if (error)
goto out_unlock;
- if (shared)
+ if (shared) {
+ if ((flags & IOMAP_ATOMIC) &&
+ !xfs_bmap_hw_atomic_write_possible(ip, &cmap,
+ offset_fsb, end_fsb)) {
+ error = -ENOPROTOOPT;
+ goto out_unlock;
+ }
goto out_found_cow;
+ }
end_fsb = imap.br_startoff + imap.br_blockcount;
length = XFS_FSB_TO_B(mp, end_fsb) - offset;
}
- if (imap_needs_alloc(inode, flags, &imap, nimaps))
+ needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps);
+
+ if (flags & IOMAP_ATOMIC) {
+ error = -ENOPROTOOPT;
+ /*
+ * If we allocate less than what is required for the write
+ * then we may end up with multiple extents, which means that
+ * REQ_ATOMIC-based cannot be used, so avoid this possibility.
+ */
+ if (needs_alloc && orig_end_fsb - offset_fsb > 1)
+ goto out_unlock;
+
+ if (!xfs_bmap_hw_atomic_write_possible(ip, &imap, offset_fsb,
+ orig_end_fsb))
+ goto out_unlock;
+ }
+
+ if (needs_alloc)
goto allocate_blocks;
/*
@@ -1023,6 +1081,134 @@ const struct iomap_ops xfs_zoned_direct_write_iomap_ops = {
#endif /* CONFIG_XFS_RT */
static int
+xfs_atomic_write_cow_iomap_begin(
+ struct inode *inode,
+ loff_t offset,
+ loff_t length,
+ unsigned flags,
+ struct iomap *iomap,
+ struct iomap *srcmap)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ const xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
+ xfs_filblks_t count_fsb = end_fsb - offset_fsb;
+ int nmaps = 1;
+ xfs_filblks_t resaligned;
+ struct xfs_bmbt_irec cmap;
+ struct xfs_iext_cursor icur;
+ struct xfs_trans *tp;
+ unsigned int dblocks = 0, rblocks = 0;
+ int error;
+ u64 seq;
+
+ ASSERT(flags & IOMAP_WRITE);
+ ASSERT(flags & IOMAP_DIRECT);
+
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+
+ if (!xfs_can_sw_atomic_write(mp)) {
+ ASSERT(xfs_can_sw_atomic_write(mp));
+ return -EINVAL;
+ }
+
+ /* blocks are always allocated in this path */
+ if (flags & IOMAP_NOWAIT)
+ return -EAGAIN;
+
+ trace_xfs_iomap_atomic_write_cow(ip, offset, length);
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ if (!ip->i_cowfp) {
+ ASSERT(!xfs_is_reflink_inode(ip));
+ xfs_ifork_init_cow(ip);
+ }
+
+ if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
+ cmap.br_startoff = end_fsb;
+ if (cmap.br_startoff <= offset_fsb) {
+ xfs_trim_extent(&cmap, offset_fsb, count_fsb);
+ goto found;
+ }
+
+ end_fsb = cmap.br_startoff;
+ count_fsb = end_fsb - offset_fsb;
+
+ resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb,
+ xfs_get_cowextsz_hint(ip));
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+ rblocks = resaligned;
+ } else {
+ dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+ rblocks = 0;
+ }
+
+ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks,
+ rblocks, false, &tp);
+ if (error)
+ return error;
+
+ /* extent layout could have changed since the unlock, so check again */
+ if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
+ cmap.br_startoff = end_fsb;
+ if (cmap.br_startoff <= offset_fsb) {
+ xfs_trim_extent(&cmap, offset_fsb, count_fsb);
+ xfs_trans_cancel(tp);
+ goto found;
+ }
+
+ /*
+ * Allocate the entire reservation as unwritten blocks.
+ *
+ * Use XFS_BMAPI_EXTSZALIGN to hint at aligning new extents according to
+ * extszhint, such that there will be a greater chance that future
+ * atomic writes to that same range will be aligned (and don't require
+ * this COW-based method).
+ */
+ error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
+ XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC |
+ XFS_BMAPI_EXTSZALIGN, 0, &cmap, &nmaps);
+ if (error) {
+ xfs_trans_cancel(tp);
+ goto out_unlock;
+ }
+
+ xfs_inode_set_cowblocks_tag(ip);
+ error = xfs_trans_commit(tp);
+ if (error)
+ goto out_unlock;
+
+found:
+ if (cmap.br_state != XFS_EXT_NORM) {
+ error = xfs_reflink_convert_cow_locked(ip, offset_fsb,
+ count_fsb);
+ if (error)
+ goto out_unlock;
+ cmap.br_state = XFS_EXT_NORM;
+ }
+
+ length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
+ trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
+ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq);
+
+out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+}
+
+const struct iomap_ops xfs_atomic_write_cow_iomap_ops = {
+ .iomap_begin = xfs_atomic_write_cow_iomap_begin,
+};
+
+static int
xfs_dax_write_iomap_end(
struct inode *inode,
loff_t pos,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index d330c4a581b1..674f8ac1b9bd 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -56,5 +56,6 @@ extern const struct iomap_ops xfs_read_iomap_ops;
extern const struct iomap_ops xfs_seek_iomap_ops;
extern const struct iomap_ops xfs_xattr_iomap_ops;
extern const struct iomap_ops xfs_dax_write_iomap_ops;
+extern const struct iomap_ops xfs_atomic_write_cow_iomap_ops;
#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 756bd3ca8e00..8cddbb7c149b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -601,16 +601,82 @@ xfs_report_dioalign(
stat->dio_offset_align = stat->dio_read_offset_align;
}
+unsigned int
+xfs_get_atomic_write_min(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ /*
+ * If we can complete an atomic write via atomic out of place writes,
+ * then advertise a minimum size of one fsblock. Without this
+ * mechanism, we can only guarantee atomic writes up to a single LBA.
+ *
+ * If out of place writes are not available, we can guarantee an atomic
+ * write of exactly one single fsblock if the bdev will make that
+ * guarantee for us.
+ */
+ if (xfs_inode_can_hw_atomic_write(ip) || xfs_can_sw_atomic_write(mp))
+ return mp->m_sb.sb_blocksize;
+
+ return 0;
+}
+
+unsigned int
+xfs_get_atomic_write_max(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ /*
+ * If out of place writes are not available, we can guarantee an atomic
+ * write of exactly one single fsblock if the bdev will make that
+ * guarantee for us.
+ */
+ if (!xfs_can_sw_atomic_write(mp)) {
+ if (xfs_inode_can_hw_atomic_write(ip))
+ return mp->m_sb.sb_blocksize;
+ return 0;
+ }
+
+ /*
+ * If we can complete an atomic write via atomic out of place writes,
+ * then advertise a maximum size of whatever we can complete through
+ * that means. Hardware support is reported via max_opt, not here.
+ */
+ if (XFS_IS_REALTIME_INODE(ip))
+ return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max);
+ return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max);
+}
+
+unsigned int
+xfs_get_atomic_write_max_opt(
+ struct xfs_inode *ip)
+{
+ unsigned int awu_max = xfs_get_atomic_write_max(ip);
+
+ /* if the max is 1x block, then just keep behaviour that opt is 0 */
+ if (awu_max <= ip->i_mount->m_sb.sb_blocksize)
+ return 0;
+
+ /*
+ * Advertise the maximum size of an atomic write that we can tell the
+ * block device to perform for us. In general the bdev limit will be
+ * less than our out of place write limit, but we don't want to exceed
+ * the awu_max.
+ */
+ return min(awu_max, xfs_inode_buftarg(ip)->bt_bdev_awu_max);
+}
+
static void
xfs_report_atomic_write(
struct xfs_inode *ip,
struct kstat *stat)
{
- unsigned int unit_min = 0, unit_max = 0;
-
- if (xfs_inode_can_atomicwrite(ip))
- unit_min = unit_max = ip->i_mount->m_sb.sb_blocksize;
- generic_fill_statx_atomic_writes(stat, unit_min, unit_max);
+ generic_fill_statx_atomic_writes(stat,
+ xfs_get_atomic_write_min(ip),
+ xfs_get_atomic_write_max(ip),
+ xfs_get_atomic_write_max_opt(ip));
}
STATIC int
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index 3c1a2605ffd2..0896f6b8b3b8 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -19,5 +19,8 @@ int xfs_inode_init_security(struct inode *inode, struct inode *dir,
extern void xfs_setup_inode(struct xfs_inode *ip);
extern void xfs_setup_iops(struct xfs_inode *ip);
extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init);
+unsigned int xfs_get_atomic_write_min(struct xfs_inode *ip);
+unsigned int xfs_get_atomic_write_max(struct xfs_inode *ip);
+unsigned int xfs_get_atomic_write_max_opt(struct xfs_inode *ip);
#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 980aabc49512..793468b4d30d 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1607,27 +1607,6 @@ xlog_bio_end_io(
&iclog->ic_end_io_work);
}
-static int
-xlog_map_iclog_data(
- struct bio *bio,
- void *data,
- size_t count)
-{
- do {
- struct page *page = kmem_to_page(data);
- unsigned int off = offset_in_page(data);
- size_t len = min_t(size_t, count, PAGE_SIZE - off);
-
- if (bio_add_page(bio, page, len, off) != len)
- return -EIO;
-
- data += len;
- count -= len;
- } while (count);
-
- return 0;
-}
-
STATIC void
xlog_write_iclog(
struct xlog *log,
@@ -1693,11 +1672,12 @@ xlog_write_iclog(
iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
- if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count))
- goto shutdown;
-
- if (is_vmalloc_addr(iclog->ic_data))
- flush_kernel_vmap_range(iclog->ic_data, count);
+ if (is_vmalloc_addr(iclog->ic_data)) {
+ if (!bio_add_vmalloc(&iclog->ic_bio, iclog->ic_data, count))
+ goto shutdown;
+ } else {
+ bio_add_virt_nofail(&iclog->ic_bio, iclog->ic_data, count);
+ }
/*
* If this log buffer would straddle the end of the log we will have
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 1ca406ec1b40..f66d2d430e4f 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -309,9 +309,7 @@ xlog_cil_alloc_shadow_bufs(
* Then round nbytes up to 64-bit alignment so that the initial
* buffer alignment is easy to calculate and verify.
*/
- nbytes += niovecs *
- (sizeof(uint64_t) + sizeof(struct xlog_op_header));
- nbytes = round_up(nbytes, sizeof(uint64_t));
+ nbytes = xlog_item_space(niovecs, nbytes);
/*
* The data buffer needs to start 64-bit aligned, so round up
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index f3d78869e5e5..39a102cc1b43 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -698,4 +698,17 @@ xlog_kvmalloc(
return p;
}
+/*
+ * Given a count of iovecs and space for a log item, compute the space we need
+ * in the log to store that data plus the log headers.
+ */
+static inline unsigned int
+xlog_item_space(
+ unsigned int niovecs,
+ unsigned int nbytes)
+{
+ nbytes += niovecs * (sizeof(uint64_t) + sizeof(struct xlog_op_header));
+ return round_up(nbytes, sizeof(uint64_t));
+}
+
#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 15d410d16bb2..19aba2c3d525 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -141,14 +141,6 @@ xfs_warn_experimental(
const char *name;
long opstate;
} features[] = {
- [XFS_EXPERIMENTAL_PNFS] = {
- .opstate = XFS_OPSTATE_WARNED_PNFS,
- .name = "pNFS",
- },
- [XFS_EXPERIMENTAL_SCRUB] = {
- .opstate = XFS_OPSTATE_WARNED_SCRUB,
- .name = "online scrub",
- },
[XFS_EXPERIMENTAL_SHRINK] = {
.opstate = XFS_OPSTATE_WARNED_SHRINK,
.name = "online shrink",
@@ -161,14 +153,6 @@ xfs_warn_experimental(
.opstate = XFS_OPSTATE_WARNED_LBS,
.name = "large block size",
},
- [XFS_EXPERIMENTAL_EXCHRANGE] = {
- .opstate = XFS_OPSTATE_WARNED_EXCHRANGE,
- .name = "exchange range",
- },
- [XFS_EXPERIMENTAL_PPTR] = {
- .opstate = XFS_OPSTATE_WARNED_PPTR,
- .name = "parent pointer",
- },
[XFS_EXPERIMENTAL_METADIR] = {
.opstate = XFS_OPSTATE_WARNED_METADIR,
.name = "metadata directory tree",
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index a92a4d09c8e9..d68e72379f9d 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -91,13 +91,9 @@ void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg,
const char *fmt, ...);
enum xfs_experimental_feat {
- XFS_EXPERIMENTAL_PNFS,
- XFS_EXPERIMENTAL_SCRUB,
XFS_EXPERIMENTAL_SHRINK,
XFS_EXPERIMENTAL_LARP,
XFS_EXPERIMENTAL_LBS,
- XFS_EXPERIMENTAL_EXCHRANGE,
- XFS_EXPERIMENTAL_PPTR,
XFS_EXPERIMENTAL_METADIR,
XFS_EXPERIMENTAL_ZONED,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 00b53f479ece..29276fe60df9 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -666,6 +666,158 @@ xfs_agbtree_compute_maxlevels(
mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
}
+/* Maximum atomic write IO size that the kernel allows. */
+static inline xfs_extlen_t xfs_calc_atomic_write_max(struct xfs_mount *mp)
+{
+ return rounddown_pow_of_two(XFS_B_TO_FSB(mp, MAX_RW_COUNT));
+}
+
+static inline unsigned int max_pow_of_two_factor(const unsigned int nr)
+{
+ return 1 << (ffs(nr) - 1);
+}
+
+/*
+ * If the data device advertises atomic write support, limit the size of data
+ * device atomic writes to the greatest power-of-two factor of the AG size so
+ * that every atomic write unit aligns with the start of every AG. This is
+ * required so that the per-AG allocations for an atomic write will always be
+ * aligned compatibly with the alignment requirements of the storage.
+ *
+ * If the data device doesn't advertise atomic writes, then there are no
+ * alignment restrictions and the largest out-of-place write we can do
+ * ourselves is the number of blocks that user files can allocate from any AG.
+ */
+static inline xfs_extlen_t xfs_calc_perag_awu_max(struct xfs_mount *mp)
+{
+ if (mp->m_ddev_targp->bt_bdev_awu_min > 0)
+ return max_pow_of_two_factor(mp->m_sb.sb_agblocks);
+ return rounddown_pow_of_two(mp->m_ag_max_usable);
+}
+
+/*
+ * Reflink on the realtime device requires rtgroups, and atomic writes require
+ * reflink.
+ *
+ * If the realtime device advertises atomic write support, limit the size of
+ * data device atomic writes to the greatest power-of-two factor of the rtgroup
+ * size so that every atomic write unit aligns with the start of every rtgroup.
+ * This is required so that the per-rtgroup allocations for an atomic write
+ * will always be aligned compatibly with the alignment requirements of the
+ * storage.
+ *
+ * If the rt device doesn't advertise atomic writes, then there are no
+ * alignment restrictions and the largest out-of-place write we can do
+ * ourselves is the number of blocks that user files can allocate from any
+ * rtgroup.
+ */
+static inline xfs_extlen_t xfs_calc_rtgroup_awu_max(struct xfs_mount *mp)
+{
+ struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
+
+ if (rgs->blocks == 0)
+ return 0;
+ if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev_awu_min > 0)
+ return max_pow_of_two_factor(rgs->blocks);
+ return rounddown_pow_of_two(rgs->blocks);
+}
+
+/* Compute the maximum atomic write unit size for each section. */
+static inline void
+xfs_calc_atomic_write_unit_max(
+ struct xfs_mount *mp)
+{
+ struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG];
+ struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
+
+ const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp);
+ const xfs_extlen_t max_ioend = xfs_reflink_max_atomic_cow(mp);
+ const xfs_extlen_t max_agsize = xfs_calc_perag_awu_max(mp);
+ const xfs_extlen_t max_rgsize = xfs_calc_rtgroup_awu_max(mp);
+
+ ags->awu_max = min3(max_write, max_ioend, max_agsize);
+ rgs->awu_max = min3(max_write, max_ioend, max_rgsize);
+
+ trace_xfs_calc_atomic_write_unit_max(mp, max_write, max_ioend,
+ max_agsize, max_rgsize);
+}
+
+/*
+ * Try to set the atomic write maximum to a new value that we got from
+ * userspace via mount option.
+ */
+int
+xfs_set_max_atomic_write_opt(
+ struct xfs_mount *mp,
+ unsigned long long new_max_bytes)
+{
+ const xfs_filblks_t new_max_fsbs = XFS_B_TO_FSBT(mp, new_max_bytes);
+ const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp);
+ const xfs_extlen_t max_group =
+ max(mp->m_groups[XG_TYPE_AG].blocks,
+ mp->m_groups[XG_TYPE_RTG].blocks);
+ const xfs_extlen_t max_group_write =
+ max(xfs_calc_perag_awu_max(mp), xfs_calc_rtgroup_awu_max(mp));
+ int error;
+
+ if (new_max_bytes == 0)
+ goto set_limit;
+
+ ASSERT(max_write <= U32_MAX);
+
+ /* generic_atomic_write_valid enforces power of two length */
+ if (!is_power_of_2(new_max_bytes)) {
+ xfs_warn(mp,
+ "max atomic write size of %llu bytes is not a power of 2",
+ new_max_bytes);
+ return -EINVAL;
+ }
+
+ if (new_max_bytes & mp->m_blockmask) {
+ xfs_warn(mp,
+ "max atomic write size of %llu bytes not aligned with fsblock",
+ new_max_bytes);
+ return -EINVAL;
+ }
+
+ if (new_max_fsbs > max_write) {
+ xfs_warn(mp,
+ "max atomic write size of %lluk cannot be larger than max write size %lluk",
+ new_max_bytes >> 10,
+ XFS_FSB_TO_B(mp, max_write) >> 10);
+ return -EINVAL;
+ }
+
+ if (new_max_fsbs > max_group) {
+ xfs_warn(mp,
+ "max atomic write size of %lluk cannot be larger than allocation group size %lluk",
+ new_max_bytes >> 10,
+ XFS_FSB_TO_B(mp, max_group) >> 10);
+ return -EINVAL;
+ }
+
+ if (new_max_fsbs > max_group_write) {
+ xfs_warn(mp,
+ "max atomic write size of %lluk cannot be larger than max allocation group write size %lluk",
+ new_max_bytes >> 10,
+ XFS_FSB_TO_B(mp, max_group_write) >> 10);
+ return -EINVAL;
+ }
+
+set_limit:
+ error = xfs_calc_atomic_write_reservation(mp, new_max_fsbs);
+ if (error) {
+ xfs_warn(mp,
+ "cannot support completing atomic writes of %lluk",
+ new_max_bytes >> 10);
+ return error;
+ }
+
+ xfs_calc_atomic_write_unit_max(mp);
+ mp->m_awu_max_bytes = new_max_bytes;
+ return 0;
+}
+
/* Compute maximum possible height for realtime btree types for this fs. */
static inline void
xfs_rtbtree_compute_maxlevels(
@@ -1082,6 +1234,15 @@ xfs_mountfs(
xfs_zone_gc_start(mp);
}
+ /*
+ * Pre-calculate atomic write unit max. This involves computations
+ * derived from transaction reservations, so we must do this after the
+ * log is fully initialized.
+ */
+ error = xfs_set_max_atomic_write_opt(mp, mp->m_awu_max_bytes);
+ if (error)
+ goto out_agresv;
+
return 0;
out_agresv:
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index e5192c12e7ac..d85084f9f317 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -119,6 +119,12 @@ struct xfs_groups {
* SMR hard drives.
*/
xfs_fsblock_t start_fsb;
+
+ /*
+ * Maximum length of an atomic write for files stored in this
+ * collection of allocation groups, in fsblocks.
+ */
+ xfs_extlen_t awu_max;
};
struct xfs_freecounter {
@@ -230,6 +236,10 @@ typedef struct xfs_mount {
bool m_update_sb; /* sb needs update in mount */
unsigned int m_max_open_zones;
unsigned int m_zonegc_low_space;
+ struct xfs_mru_cache *m_zone_cache; /* Inode to open zone cache */
+
+ /* max_atomic_write mount option value */
+ unsigned long long m_awu_max_bytes;
/*
* Bitsets of per-fs metadata that have been checked and/or are sick.
@@ -464,6 +474,11 @@ static inline bool xfs_has_nonzoned(const struct xfs_mount *mp)
return !xfs_has_zoned(mp);
}
+static inline bool xfs_can_sw_atomic_write(struct xfs_mount *mp)
+{
+ return xfs_has_reflink(mp);
+}
+
/*
* Some features are always on for v5 file systems, allow the compiler to
* eliminiate dead code when building without v4 support.
@@ -543,10 +558,6 @@ __XFS_HAS_FEAT(nouuid, NOUUID)
*/
#define XFS_OPSTATE_BLOCKGC_ENABLED 6
-/* Kernel has logged a warning about pNFS being used on this fs. */
-#define XFS_OPSTATE_WARNED_PNFS 7
-/* Kernel has logged a warning about online fsck being used on this fs. */
-#define XFS_OPSTATE_WARNED_SCRUB 8
/* Kernel has logged a warning about shrink being used on this fs. */
#define XFS_OPSTATE_WARNED_SHRINK 9
/* Kernel has logged a warning about logged xattr updates being used. */
@@ -559,10 +570,6 @@ __XFS_HAS_FEAT(nouuid, NOUUID)
#define XFS_OPSTATE_USE_LARP 13
/* Kernel has logged a warning about blocksize > pagesize on this fs. */
#define XFS_OPSTATE_WARNED_LBS 14
-/* Kernel has logged a warning about exchange-range being used on this fs. */
-#define XFS_OPSTATE_WARNED_EXCHRANGE 15
-/* Kernel has logged a warning about parent pointers being used on this fs. */
-#define XFS_OPSTATE_WARNED_PPTR 16
/* Kernel has logged a warning about metadata dirs being used on this fs. */
#define XFS_OPSTATE_WARNED_METADIR 17
/* Filesystem should use qflags to determine quotaon status */
@@ -631,7 +638,6 @@ xfs_should_warn(struct xfs_mount *mp, long nr)
{ (1UL << XFS_OPSTATE_READONLY), "read_only" }, \
{ (1UL << XFS_OPSTATE_INODEGC_ENABLED), "inodegc" }, \
{ (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \
- { (1UL << XFS_OPSTATE_WARNED_SCRUB), "wscrub" }, \
{ (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \
{ (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }, \
{ (1UL << XFS_OPSTATE_QUOTACHECK_RUNNING), "quotacheck" }, \
@@ -793,4 +799,7 @@ static inline void xfs_mod_sb_delalloc(struct xfs_mount *mp, int64_t delta)
percpu_counter_add(&mp->m_delalloc_blks, delta);
}
+int xfs_set_max_atomic_write_opt(struct xfs_mount *mp,
+ unsigned long long new_max_bytes);
+
#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index d0f5b403bdbe..08443ceec329 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -414,6 +414,8 @@ xfs_mru_cache_destroy(
* To insert an element, call xfs_mru_cache_insert() with the data store, the
* element's key and the client data pointer. This function returns 0 on
* success or ENOMEM if memory for the data element couldn't be allocated.
+ *
+ * The passed in elem is freed through the per-cache free_func on failure.
*/
int
xfs_mru_cache_insert(
@@ -421,14 +423,15 @@ xfs_mru_cache_insert(
unsigned long key,
struct xfs_mru_cache_elem *elem)
{
- int error;
+ int error = -EINVAL;
ASSERT(mru && mru->lists);
if (!mru || !mru->lists)
- return -EINVAL;
+ goto out_free;
+ error = -ENOMEM;
if (radix_tree_preload(GFP_KERNEL))
- return -ENOMEM;
+ goto out_free;
INIT_LIST_HEAD(&elem->list_node);
elem->key = key;
@@ -440,6 +443,12 @@ xfs_mru_cache_insert(
_xfs_mru_cache_list_insert(mru, elem);
spin_unlock(&mru->lock);
+ if (error)
+ goto out_free;
+ return 0;
+
+out_free:
+ mru->free_func(mru->data, elem);
return error;
}
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index ed8d8ed42f0a..3545dc1d953c 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -127,7 +127,7 @@ xfs_dax_notify_failure_freeze(
struct super_block *sb = mp->m_super;
int error;
- error = freeze_super(sb, FREEZE_HOLDER_KERNEL);
+ error = freeze_super(sb, FREEZE_HOLDER_KERNEL, NULL);
if (error)
xfs_emerg(mp, "already frozen by kernel, err=%d", error);
@@ -143,7 +143,7 @@ xfs_dax_notify_failure_thaw(
int error;
if (kernel_frozen) {
- error = thaw_super(sb, FREEZE_HOLDER_KERNEL);
+ error = thaw_super(sb, FREEZE_HOLDER_KERNEL, NULL);
if (error)
xfs_emerg(mp, "still frozen after notify failure, err=%d",
error);
@@ -153,7 +153,7 @@ xfs_dax_notify_failure_thaw(
* Also thaw userspace call anyway because the device is about to be
* removed immediately.
*/
- thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+ thaw_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
}
static int
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 6f4479deac6d..afe7497012d4 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -58,8 +58,6 @@ xfs_fs_get_uuid(
{
struct xfs_mount *mp = XFS_M(sb);
- xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PNFS);
-
if (*len < sizeof(uuid_t))
return -EINVAL;
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index fe2d7aab8554..076501123d89 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -78,6 +78,11 @@ xfs_cui_item_size(
*nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents);
}
+unsigned int xfs_cui_log_space(unsigned int nr)
+{
+ return xlog_item_space(1, xfs_cui_log_format_sizeof(nr));
+}
+
/*
* This is called to fill in the vector of log iovecs for the
* given cui log item. We use only 1 iovec, and we point that
@@ -179,6 +184,11 @@ xfs_cud_item_size(
*nbytes += sizeof(struct xfs_cud_log_format);
}
+unsigned int xfs_cud_log_space(void)
+{
+ return xlog_item_space(1, sizeof(struct xfs_cud_log_format));
+}
+
/*
* This is called to fill in the vector of log iovecs for the
* given cud log item. We use only 1 iovec, and we point that
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index bfee8f30c63c..0fc3f493342b 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -76,4 +76,7 @@ struct xfs_refcount_intent;
void xfs_refcount_defer_add(struct xfs_trans *tp,
struct xfs_refcount_intent *ri);
+unsigned int xfs_cui_log_space(unsigned int nr);
+unsigned int xfs_cud_log_space(void);
+
#endif /* __XFS_REFCOUNT_ITEM_H__ */
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cc3b4df88110..ad3bcb76d805 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -293,7 +293,7 @@ xfs_bmap_trim_cow(
return xfs_reflink_trim_around_shared(ip, imap, shared);
}
-static int
+int
xfs_reflink_convert_cow_locked(
struct xfs_inode *ip,
xfs_fileoff_t offset_fsb,
@@ -786,35 +786,19 @@ xfs_reflink_update_quota(
* requirements as low as possible.
*/
STATIC int
-xfs_reflink_end_cow_extent(
+xfs_reflink_end_cow_extent_locked(
+ struct xfs_trans *tp,
struct xfs_inode *ip,
xfs_fileoff_t *offset_fsb,
xfs_fileoff_t end_fsb)
{
struct xfs_iext_cursor icur;
struct xfs_bmbt_irec got, del, data;
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
- unsigned int resblks;
int nmaps;
bool isrt = XFS_IS_REALTIME_INODE(ip);
int error;
- resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
- XFS_TRANS_RESERVE, &tp);
- if (error)
- return error;
-
- /*
- * Lock the inode. We have to ijoin without automatic unlock because
- * the lead transaction is the refcountbt record deletion; the data
- * fork update follows as a deferred log item.
- */
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, 0);
-
/*
* In case of racing, overlapping AIO writes no COW extents might be
* left by the time I/O completes for the loser of the race. In that
@@ -823,7 +807,7 @@ xfs_reflink_end_cow_extent(
if (!xfs_iext_lookup_extent(ip, ifp, *offset_fsb, &icur, &got) ||
got.br_startoff >= end_fsb) {
*offset_fsb = end_fsb;
- goto out_cancel;
+ return 0;
}
/*
@@ -837,7 +821,7 @@ xfs_reflink_end_cow_extent(
if (!xfs_iext_next_extent(ifp, &icur, &got) ||
got.br_startoff >= end_fsb) {
*offset_fsb = end_fsb;
- goto out_cancel;
+ return 0;
}
}
del = got;
@@ -846,14 +830,14 @@ xfs_reflink_end_cow_extent(
error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK,
XFS_IEXT_REFLINK_END_COW_CNT);
if (error)
- goto out_cancel;
+ return error;
/* Grab the corresponding mapping in the data fork. */
nmaps = 1;
error = xfs_bmapi_read(ip, del.br_startoff, del.br_blockcount, &data,
&nmaps, 0);
if (error)
- goto out_cancel;
+ return error;
/* We can only remap the smaller of the two extent sizes. */
data.br_blockcount = min(data.br_blockcount, del.br_blockcount);
@@ -882,7 +866,7 @@ xfs_reflink_end_cow_extent(
error = xfs_bunmapi(NULL, ip, data.br_startoff,
data.br_blockcount, 0, 1, &done);
if (error)
- goto out_cancel;
+ return error;
ASSERT(done);
}
@@ -899,17 +883,45 @@ xfs_reflink_end_cow_extent(
/* Remove the mapping from the CoW fork. */
xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
- error = xfs_trans_commit(tp);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (error)
- return error;
-
/* Update the caller about how much progress we made. */
*offset_fsb = del.br_startoff + del.br_blockcount;
return 0;
+}
-out_cancel:
- xfs_trans_cancel(tp);
+/*
+ * Remap part of the CoW fork into the data fork.
+ *
+ * We aim to remap the range starting at @offset_fsb and ending at @end_fsb
+ * into the data fork; this function will remap what it can (at the end of the
+ * range) and update @end_fsb appropriately. Each remap gets its own
+ * transaction because we can end up merging and splitting bmbt blocks for
+ * every remap operation and we'd like to keep the block reservation
+ * requirements as low as possible.
+ */
+STATIC int
+xfs_reflink_end_cow_extent(
+ struct xfs_inode *ip,
+ xfs_fileoff_t *offset_fsb,
+ xfs_fileoff_t end_fsb)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ unsigned int resblks;
+ int error;
+
+ resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
+ XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
+
+ error = xfs_reflink_end_cow_extent_locked(tp, ip, offset_fsb, end_fsb);
+ if (error)
+ xfs_trans_cancel(tp);
+ else
+ error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
@@ -973,6 +985,78 @@ xfs_reflink_end_cow(
}
/*
+ * Fully remap all of the file's data fork at once, which is the critical part
+ * in achieving atomic behaviour.
+ * The regular CoW end path does not use function as to keep the block
+ * reservation per transaction as low as possible.
+ */
+int
+xfs_reflink_end_atomic_cow(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t count)
+{
+ xfs_fileoff_t offset_fsb;
+ xfs_fileoff_t end_fsb;
+ int error = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ unsigned int resblks;
+
+ trace_xfs_reflink_end_cow(ip, offset, count);
+
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ end_fsb = XFS_B_TO_FSB(mp, offset + count);
+
+ /*
+ * Each remapping operation could cause a btree split, so in the worst
+ * case that's one for each block.
+ */
+ resblks = (end_fsb - offset_fsb) *
+ XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK);
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_atomic_ioend, resblks, 0,
+ XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
+
+ while (end_fsb > offset_fsb && !error) {
+ error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb,
+ end_fsb);
+ }
+ if (error) {
+ trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
+ goto out_cancel;
+ }
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+out_cancel:
+ xfs_trans_cancel(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+}
+
+/* Compute the largest atomic write that we can complete through software. */
+xfs_extlen_t
+xfs_reflink_max_atomic_cow(
+ struct xfs_mount *mp)
+{
+ /* We cannot do any atomic writes without out of place writes. */
+ if (!xfs_can_sw_atomic_write(mp))
+ return 0;
+
+ /*
+ * Atomic write limits must always be a power-of-2, according to
+ * generic_atomic_write_valid.
+ */
+ return rounddown_pow_of_two(xfs_calc_max_atomic_write_fsblocks(mp));
+}
+
+/*
* Free all CoW staging blocks that are still referenced by the ondisk refcount
* metadata. The ondisk metadata does not track which inode created the
* staging extent, so callers must ensure that there are no cached inodes with
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index cc4e92278279..36cda724da89 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -35,6 +35,8 @@ int xfs_reflink_allocate_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
bool convert_now);
extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count);
+int xfs_reflink_convert_cow_locked(struct xfs_inode *ip,
+ xfs_fileoff_t offset_fsb, xfs_filblks_t count_fsb);
extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
@@ -43,6 +45,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count, bool cancel_real);
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count);
+int xfs_reflink_end_atomic_cow(struct xfs_inode *ip, xfs_off_t offset,
+ xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, loff_t len,
@@ -64,4 +68,6 @@ extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen,
bool xfs_reflink_supports_rextsize(struct xfs_mount *mp, unsigned int rextsize);
+xfs_extlen_t xfs_reflink_max_atomic_cow(struct xfs_mount *mp);
+
#endif /* __XFS_REFLINK_H */
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 89decffe76c8..c99700318ec2 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -77,6 +77,11 @@ xfs_rui_item_size(
*nbytes += xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents);
}
+unsigned int xfs_rui_log_space(unsigned int nr)
+{
+ return xlog_item_space(1, xfs_rui_log_format_sizeof(nr));
+}
+
/*
* This is called to fill in the vector of log iovecs for the
* given rui log item. We use only 1 iovec, and we point that
@@ -180,6 +185,11 @@ xfs_rud_item_size(
*nbytes += sizeof(struct xfs_rud_log_format);
}
+unsigned int xfs_rud_log_space(void)
+{
+ return xlog_item_space(1, sizeof(struct xfs_rud_log_format));
+}
+
/*
* This is called to fill in the vector of log iovecs for the
* given rud log item. We use only 1 iovec, and we point that
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
index 40d331555675..3a99f0117f2d 100644
--- a/fs/xfs/xfs_rmap_item.h
+++ b/fs/xfs/xfs_rmap_item.h
@@ -75,4 +75,7 @@ struct xfs_rmap_intent;
void xfs_rmap_defer_add(struct xfs_trans *tp, struct xfs_rmap_intent *ri);
+unsigned int xfs_rui_log_space(unsigned int nr);
+unsigned int xfs_rud_log_space(void);
+
#endif /* __XFS_RMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index b2dd0c0bf509..0bc4b5489078 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -111,7 +111,7 @@ enum {
Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
- Opt_lifetime, Opt_nolifetime,
+ Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write,
};
static const struct fs_parameter_spec xfs_fs_parameters[] = {
@@ -159,6 +159,7 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = {
fsparam_u32("max_open_zones", Opt_max_open_zones),
fsparam_flag("lifetime", Opt_lifetime),
fsparam_flag("nolifetime", Opt_nolifetime),
+ fsparam_string("max_atomic_write", Opt_max_atomic_write),
{}
};
@@ -241,6 +242,9 @@ xfs_fs_show_options(
if (mp->m_max_open_zones)
seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones);
+ if (mp->m_awu_max_bytes)
+ seq_printf(m, ",max_atomic_write=%lluk",
+ mp->m_awu_max_bytes >> 10);
return 0;
}
@@ -380,10 +384,11 @@ xfs_blkdev_get(
struct file **bdev_filep)
{
int error = 0;
+ blk_mode_t mode;
- *bdev_filep = bdev_file_open_by_path(name,
- BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
- mp->m_super, &fs_holder_ops);
+ mode = sb_open_mode(mp->m_super->s_flags);
+ *bdev_filep = bdev_file_open_by_path(name, mode,
+ mp->m_super, &fs_holder_ops);
if (IS_ERR(*bdev_filep)) {
error = PTR_ERR(*bdev_filep);
*bdev_filep = NULL;
@@ -481,21 +486,29 @@ xfs_open_devices(
/*
* Setup xfs_mount buffer target pointers
*/
- error = -ENOMEM;
mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file);
- if (!mp->m_ddev_targp)
+ if (IS_ERR(mp->m_ddev_targp)) {
+ error = PTR_ERR(mp->m_ddev_targp);
+ mp->m_ddev_targp = NULL;
goto out_close_rtdev;
+ }
if (rtdev_file) {
mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file);
- if (!mp->m_rtdev_targp)
+ if (IS_ERR(mp->m_rtdev_targp)) {
+ error = PTR_ERR(mp->m_rtdev_targp);
+ mp->m_rtdev_targp = NULL;
goto out_free_ddev_targ;
+ }
}
if (logdev_file && file_bdev(logdev_file) != ddev) {
mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file);
- if (!mp->m_logdev_targp)
+ if (IS_ERR(mp->m_logdev_targp)) {
+ error = PTR_ERR(mp->m_logdev_targp);
+ mp->m_logdev_targp = NULL;
goto out_free_rtdev_targ;
+ }
} else {
mp->m_logdev_targp = mp->m_ddev_targp;
/* Handle won't be used, drop it */
@@ -528,7 +541,7 @@ xfs_setup_devices(
{
int error;
- error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
+ error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
if (error)
return error;
@@ -537,7 +550,7 @@ xfs_setup_devices(
if (xfs_has_sector(mp))
log_sector_size = mp->m_sb.sb_logsectsize;
- error = xfs_setsize_buftarg(mp->m_logdev_targp,
+ error = xfs_configure_buftarg(mp->m_logdev_targp,
log_sector_size);
if (error)
return error;
@@ -551,7 +564,7 @@ xfs_setup_devices(
}
mp->m_rtdev_targp = mp->m_ddev_targp;
} else if (mp->m_rtname) {
- error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+ error = xfs_configure_buftarg(mp->m_rtdev_targp,
mp->m_sb.sb_sectsize);
if (error)
return error;
@@ -1149,7 +1162,7 @@ xfs_init_percpu_counters(
return 0;
free_freecounters:
- while (--i > 0)
+ while (--i >= 0)
percpu_counter_destroy(&mp->m_free[i].count);
percpu_counter_destroy(&mp->m_delalloc_rtextents);
free_delalloc:
@@ -1334,6 +1347,42 @@ suffix_kstrtoint(
return ret;
}
+static int
+suffix_kstrtoull(
+ const char *s,
+ unsigned int base,
+ unsigned long long *res)
+{
+ int last, shift_left_factor = 0;
+ unsigned long long _res;
+ char *value;
+ int ret = 0;
+
+ value = kstrdup(s, GFP_KERNEL);
+ if (!value)
+ return -ENOMEM;
+
+ last = strlen(value) - 1;
+ if (value[last] == 'K' || value[last] == 'k') {
+ shift_left_factor = 10;
+ value[last] = '\0';
+ }
+ if (value[last] == 'M' || value[last] == 'm') {
+ shift_left_factor = 20;
+ value[last] = '\0';
+ }
+ if (value[last] == 'G' || value[last] == 'g') {
+ shift_left_factor = 30;
+ value[last] = '\0';
+ }
+
+ if (kstrtoull(value, base, &_res))
+ ret = -EINVAL;
+ kfree(value);
+ *res = _res << shift_left_factor;
+ return ret;
+}
+
static inline void
xfs_fs_warn_deprecated(
struct fs_context *fc,
@@ -1518,6 +1567,14 @@ xfs_fs_parse_param(
case Opt_nolifetime:
parsing_mp->m_features |= XFS_FEAT_NOLIFETIME;
return 0;
+ case Opt_max_atomic_write:
+ if (suffix_kstrtoull(param->string, 10,
+ &parsing_mp->m_awu_max_bytes)) {
+ xfs_warn(parsing_mp,
+ "max atomic write size must be positive integer");
+ return -EINVAL;
+ }
+ return 0;
default:
xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
return -EINVAL;
@@ -1897,13 +1954,6 @@ xfs_fs_fill_super(
}
}
-
- if (xfs_has_exchange_range(mp))
- xfs_warn_experimental(mp, XFS_EXPERIMENTAL_EXCHRANGE);
-
- if (xfs_has_parent(mp))
- xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PPTR);
-
/*
* If no quota mount options were provided, maybe we'll try to pick
* up the quota accounting and enforcement flags from the ondisk sb.
@@ -1969,6 +2019,20 @@ xfs_remount_rw(
struct xfs_sb *sbp = &mp->m_sb;
int error;
+ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp &&
+ bdev_read_only(mp->m_logdev_targp->bt_bdev)) {
+ xfs_warn(mp,
+ "ro->rw transition prohibited by read-only logdev");
+ return -EACCES;
+ }
+
+ if (mp->m_rtdev_targp &&
+ bdev_read_only(mp->m_rtdev_targp->bt_bdev)) {
+ xfs_warn(mp,
+ "ro->rw transition prohibited by read-only rtdev");
+ return -EACCES;
+ }
+
if (xfs_has_norecovery(mp)) {
xfs_warn(mp,
"ro->rw transition prohibited on norecovery mount");
@@ -2114,6 +2178,29 @@ xfs_fs_reconfigure(
if (error)
return error;
+ /* attr2 -> noattr2 */
+ if (xfs_has_noattr2(new_mp)) {
+ if (xfs_has_crc(mp)) {
+ xfs_warn(mp,
+ "attr2 is always enabled for a V5 filesystem - can't be changed.");
+ return -EINVAL;
+ }
+ mp->m_features &= ~XFS_FEAT_ATTR2;
+ mp->m_features |= XFS_FEAT_NOATTR2;
+ } else if (xfs_has_attr2(new_mp)) {
+ /* noattr2 -> attr2 */
+ mp->m_features &= ~XFS_FEAT_NOATTR2;
+ mp->m_features |= XFS_FEAT_ATTR2;
+ }
+
+ /* Validate new max_atomic_write option before making other changes */
+ if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) {
+ error = xfs_set_max_atomic_write_opt(mp,
+ new_mp->m_awu_max_bytes);
+ if (error)
+ return error;
+ }
+
/* inode32 -> inode64 */
if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
@@ -2126,6 +2213,17 @@ xfs_fs_reconfigure(
mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
}
+ /*
+ * Now that mp has been modified according to the remount options, we
+ * do a final option validation with xfs_finish_flags() just like it is
+ * just like it is done during mount. We cannot use
+ * done during mount. We cannot use xfs_finish_flags() on new_mp as it
+ * contains only the user given options.
+ */
+ error = xfs_finish_flags(mp);
+ if (error)
+ return error;
+
/* ro -> rw */
if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
error = xfs_remount_rw(mp);
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index 276696a07040..51646f066c4f 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -29,8 +29,6 @@ typedef struct xfs_param {
xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */
xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
- xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
- xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */
xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e56ba1963160..01d284a1c759 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -170,6 +170,99 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list);
DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list);
+TRACE_EVENT(xfs_calc_atomic_write_unit_max,
+ TP_PROTO(struct xfs_mount *mp, unsigned int max_write,
+ unsigned int max_ioend, unsigned int max_agsize,
+ unsigned int max_rgsize),
+ TP_ARGS(mp, max_write, max_ioend, max_agsize, max_rgsize),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, max_write)
+ __field(unsigned int, max_ioend)
+ __field(unsigned int, max_agsize)
+ __field(unsigned int, max_rgsize)
+ __field(unsigned int, data_awu_max)
+ __field(unsigned int, rt_awu_max)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->max_write = max_write;
+ __entry->max_ioend = max_ioend;
+ __entry->max_agsize = max_agsize;
+ __entry->max_rgsize = max_rgsize;
+ __entry->data_awu_max = mp->m_groups[XG_TYPE_AG].awu_max;
+ __entry->rt_awu_max = mp->m_groups[XG_TYPE_RTG].awu_max;
+ ),
+ TP_printk("dev %d:%d max_write %u max_ioend %u max_agsize %u max_rgsize %u data_awu_max %u rt_awu_max %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->max_write,
+ __entry->max_ioend,
+ __entry->max_agsize,
+ __entry->max_rgsize,
+ __entry->data_awu_max,
+ __entry->rt_awu_max)
+);
+
+TRACE_EVENT(xfs_calc_max_atomic_write_fsblocks,
+ TP_PROTO(struct xfs_mount *mp, unsigned int per_intent,
+ unsigned int step_size, unsigned int logres,
+ unsigned int blockcount),
+ TP_ARGS(mp, per_intent, step_size, logres, blockcount),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, per_intent)
+ __field(unsigned int, step_size)
+ __field(unsigned int, logres)
+ __field(unsigned int, blockcount)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->per_intent = per_intent;
+ __entry->step_size = step_size;
+ __entry->logres = logres;
+ __entry->blockcount = blockcount;
+ ),
+ TP_printk("dev %d:%d per_intent %u step_size %u logres %u blockcount %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->per_intent,
+ __entry->step_size,
+ __entry->logres,
+ __entry->blockcount)
+);
+
+TRACE_EVENT(xfs_calc_max_atomic_write_log_geometry,
+ TP_PROTO(struct xfs_mount *mp, unsigned int per_intent,
+ unsigned int step_size, unsigned int blockcount,
+ unsigned int min_logblocks, unsigned int logres),
+ TP_ARGS(mp, per_intent, step_size, blockcount, min_logblocks, logres),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, per_intent)
+ __field(unsigned int, step_size)
+ __field(unsigned int, blockcount)
+ __field(unsigned int, min_logblocks)
+ __field(unsigned int, cur_logblocks)
+ __field(unsigned int, logres)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->per_intent = per_intent;
+ __entry->step_size = step_size;
+ __entry->blockcount = blockcount;
+ __entry->min_logblocks = min_logblocks;
+ __entry->cur_logblocks = mp->m_sb.sb_logblocks;
+ __entry->logres = logres;
+ ),
+ TP_printk("dev %d:%d per_intent %u step_size %u blockcount %u min_logblocks %u logblocks %u logres %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->per_intent,
+ __entry->step_size,
+ __entry->blockcount,
+ __entry->min_logblocks,
+ __entry->cur_logblocks,
+ __entry->logres)
+);
+
TRACE_EVENT(xlog_intent_recovery_failed,
TP_PROTO(struct xfs_mount *mp, const struct xfs_defer_op_type *ops,
int error),
@@ -1657,6 +1750,28 @@ DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_dax_write);
DEFINE_RW_EVENT(xfs_reflink_bounce_dio_write);
+TRACE_EVENT(xfs_iomap_atomic_write_cow,
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
+ TP_ARGS(ip, offset, count),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_off_t, offset)
+ __field(ssize_t, count)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->offset = offset;
+ __entry->count = count;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx pos 0x%llx bytecount 0x%zx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->offset,
+ __entry->count)
+)
+
DECLARE_EVENT_CLASS(xfs_imap_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
int whichfork, struct xfs_bmbt_irec *irec),
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 85a649fec6ac..67c328d23e4a 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -315,7 +315,7 @@ xfs_ail_splice(
}
/*
- * Delete the given item from the AIL. Return a pointer to the item.
+ * Delete the given item from the AIL.
*/
static void
xfs_ail_delete(
@@ -777,26 +777,28 @@ xfs_ail_update_finish(
}
/*
- * xfs_trans_ail_update - bulk AIL insertion operation.
+ * xfs_trans_ail_update_bulk - bulk AIL insertion operation.
*
- * @xfs_trans_ail_update takes an array of log items that all need to be
+ * @xfs_trans_ail_update_bulk takes an array of log items that all need to be
* positioned at the same LSN in the AIL. If an item is not in the AIL, it will
- * be added. Otherwise, it will be repositioned by removing it and re-adding
- * it to the AIL. If we move the first item in the AIL, update the log tail to
- * match the new minimum LSN in the AIL.
+ * be added. Otherwise, it will be repositioned by removing it and re-adding
+ * it to the AIL.
*
- * This function takes the AIL lock once to execute the update operations on
- * all the items in the array, and as such should not be called with the AIL
- * lock held. As a result, once we have the AIL lock, we need to check each log
- * item LSN to confirm it needs to be moved forward in the AIL.
+ * If we move the first item in the AIL, update the log tail to match the new
+ * minimum LSN in the AIL.
*
- * To optimise the insert operation, we delete all the items from the AIL in
- * the first pass, moving them into a temporary list, then splice the temporary
- * list into the correct position in the AIL. This avoids needing to do an
- * insert operation on every item.
+ * This function should be called with the AIL lock held.
*
- * This function must be called with the AIL lock held. The lock is dropped
- * before returning.
+ * To optimise the insert operation, we add all items to a temporary list, then
+ * splice this list into the correct position in the AIL.
+ *
+ * Items that are already in the AIL are first deleted from their current
+ * location before being added to the temporary list.
+ *
+ * This avoids needing to do an insert operation on every item.
+ *
+ * The AIL lock is dropped by xfs_ail_update_finish() before returning to
+ * the caller.
*/
void
xfs_trans_ail_update_bulk(
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index d509e49b2aaa..80add26c0111 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -24,6 +24,7 @@
#include "xfs_zone_priv.h"
#include "xfs_zones.h"
#include "xfs_trace.h"
+#include "xfs_mru_cache.h"
void
xfs_open_zone_put(
@@ -796,6 +797,100 @@ xfs_submit_zoned_bio(
submit_bio(&ioend->io_bio);
}
+/*
+ * Cache the last zone written to for an inode so that it is considered first
+ * for subsequent writes.
+ */
+struct xfs_zone_cache_item {
+ struct xfs_mru_cache_elem mru;
+ struct xfs_open_zone *oz;
+};
+
+static inline struct xfs_zone_cache_item *
+xfs_zone_cache_item(struct xfs_mru_cache_elem *mru)
+{
+ return container_of(mru, struct xfs_zone_cache_item, mru);
+}
+
+static void
+xfs_zone_cache_free_func(
+ void *data,
+ struct xfs_mru_cache_elem *mru)
+{
+ struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru);
+
+ xfs_open_zone_put(item->oz);
+ kfree(item);
+}
+
+/*
+ * Check if we have a cached last open zone available for the inode and
+ * if yes return a reference to it.
+ */
+static struct xfs_open_zone *
+xfs_cached_zone(
+ struct xfs_mount *mp,
+ struct xfs_inode *ip)
+{
+ struct xfs_mru_cache_elem *mru;
+ struct xfs_open_zone *oz;
+
+ mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
+ if (!mru)
+ return NULL;
+ oz = xfs_zone_cache_item(mru)->oz;
+ if (oz) {
+ /*
+ * GC only steals open zones at mount time, so no GC zones
+ * should end up in the cache.
+ */
+ ASSERT(!oz->oz_is_gc);
+ ASSERT(atomic_read(&oz->oz_ref) > 0);
+ atomic_inc(&oz->oz_ref);
+ }
+ xfs_mru_cache_done(mp->m_zone_cache);
+ return oz;
+}
+
+/*
+ * Update the last used zone cache for a given inode.
+ *
+ * The caller must have a reference on the open zone.
+ */
+static void
+xfs_zone_cache_create_association(
+ struct xfs_inode *ip,
+ struct xfs_open_zone *oz)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_zone_cache_item *item = NULL;
+ struct xfs_mru_cache_elem *mru;
+
+ ASSERT(atomic_read(&oz->oz_ref) > 0);
+ atomic_inc(&oz->oz_ref);
+
+ mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
+ if (mru) {
+ /*
+ * If we have an association already, update it to point to the
+ * new zone.
+ */
+ item = xfs_zone_cache_item(mru);
+ xfs_open_zone_put(item->oz);
+ item->oz = oz;
+ xfs_mru_cache_done(mp->m_zone_cache);
+ return;
+ }
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item) {
+ xfs_open_zone_put(oz);
+ return;
+ }
+ item->oz = oz;
+ xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
+}
+
void
xfs_zone_alloc_and_submit(
struct iomap_ioend *ioend,
@@ -819,11 +914,16 @@ xfs_zone_alloc_and_submit(
*/
if (!*oz && ioend->io_offset)
*oz = xfs_last_used_zone(ioend);
+ if (!*oz)
+ *oz = xfs_cached_zone(mp, ip);
+
if (!*oz) {
select_zone:
*oz = xfs_select_zone(mp, write_hint, pack_tight);
if (!*oz)
goto out_error;
+
+ xfs_zone_cache_create_association(ip, *oz);
}
alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size),
@@ -1211,6 +1311,14 @@ xfs_mount_zones(
error = xfs_zone_gc_mount(mp);
if (error)
goto out_free_zone_info;
+
+ /*
+ * Set up a mru cache to track inode to open zone for data placement
+ * purposes. The magic values for group count and life time is the
+ * same as the defaults for file streams, which seems sane enough.
+ */
+ xfs_mru_cache_create(&mp->m_zone_cache, mp,
+ 5000, 10, xfs_zone_cache_free_func);
return 0;
out_free_zone_info:
@@ -1224,4 +1332,5 @@ xfs_unmount_zones(
{
xfs_zone_gc_unmount(mp);
xfs_free_zone_info(mp->m_zone_info);
+ xfs_mru_cache_destroy(mp->m_zone_cache);
}
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index 81c94dd1d596..d613a4094db6 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -807,7 +807,8 @@ xfs_zone_gc_write_chunk(
{
struct xfs_zone_gc_data *data = chunk->data;
struct xfs_mount *mp = chunk->ip->i_mount;
- unsigned int folio_offset = chunk->bio.bi_io_vec->bv_offset;
+ phys_addr_t bvec_paddr =
+ bvec_phys(bio_first_bvec_all(&chunk->bio));
struct xfs_gc_bio *split_chunk;
if (chunk->bio.bi_status)
@@ -822,7 +823,7 @@ xfs_zone_gc_write_chunk(
bio_reset(&chunk->bio, mp->m_rtdev_targp->bt_bdev, REQ_OP_WRITE);
bio_add_folio_nofail(&chunk->bio, chunk->scratch->folio, chunk->len,
- folio_offset);
+ offset_in_folio(chunk->scratch->folio, bvec_paddr));
while ((split_chunk = xfs_zone_gc_split_write(data, chunk)))
xfs_zone_gc_submit_write(data, split_chunk);
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index faf1eb87895d..d165eb979f21 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1111,28 +1111,19 @@ static int zonefs_read_super(struct super_block *sb)
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
struct zonefs_super *super;
u32 crc, stored_crc;
- struct page *page;
- struct bio_vec bio_vec;
- struct bio bio;
int ret;
- page = alloc_page(GFP_KERNEL);
- if (!page)
+ super = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!super)
return -ENOMEM;
- bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ);
- bio.bi_iter.bi_sector = 0;
- __bio_add_page(&bio, page, PAGE_SIZE, 0);
-
- ret = submit_bio_wait(&bio);
+ ret = bdev_rw_virt(sb->s_bdev, 0, super, PAGE_SIZE, REQ_OP_READ);
if (ret)
- goto free_page;
-
- super = page_address(page);
+ goto free_super;
ret = -EINVAL;
if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC)
- goto free_page;
+ goto free_super;
stored_crc = le32_to_cpu(super->s_crc);
super->s_crc = 0;
@@ -1140,14 +1131,14 @@ static int zonefs_read_super(struct super_block *sb)
if (crc != stored_crc) {
zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)",
crc, stored_crc);
- goto free_page;
+ goto free_super;
}
sbi->s_features = le64_to_cpu(super->s_features);
if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) {
zonefs_err(sb, "Unknown features set 0x%llx\n",
sbi->s_features);
- goto free_page;
+ goto free_super;
}
if (sbi->s_features & ZONEFS_F_UID) {
@@ -1155,7 +1146,7 @@ static int zonefs_read_super(struct super_block *sb)
le32_to_cpu(super->s_uid));
if (!uid_valid(sbi->s_uid)) {
zonefs_err(sb, "Invalid UID feature\n");
- goto free_page;
+ goto free_super;
}
}
@@ -1164,7 +1155,7 @@ static int zonefs_read_super(struct super_block *sb)
le32_to_cpu(super->s_gid));
if (!gid_valid(sbi->s_gid)) {
zonefs_err(sb, "Invalid GID feature\n");
- goto free_page;
+ goto free_super;
}
}
@@ -1173,15 +1164,14 @@ static int zonefs_read_super(struct super_block *sb)
if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) {
zonefs_err(sb, "Reserved area is being used\n");
- goto free_page;
+ goto free_super;
}
import_uuid(&sbi->s_uuid, super->s_uuid);
ret = 0;
-free_page:
- __free_page(page);
-
+free_super:
+ kfree(super);
return ret;
}
diff --git a/include/acpi/acbuffer.h b/include/acpi/acbuffer.h
index 252b235dce5a..cbc9aeabcd99 100644
--- a/include/acpi/acbuffer.h
+++ b/include/acpi/acbuffer.h
@@ -3,7 +3,7 @@
*
* Name: acbuffer.h - Support for buffers returned by ACPI predefined names
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index 2da5f4a6e814..521d4bfa6ef0 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -3,7 +3,7 @@
*
* Name: acconfig.h - Global configuration constants
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h
index c5ecd0a0170c..53c98f5fe3c3 100644
--- a/include/acpi/acexcep.h
+++ b/include/acpi/acexcep.h
@@ -3,7 +3,7 @@
*
* Name: acexcep.h - Exception codes returned by the ACPI subsystem
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h
index 76aa6aa346ba..cb6a4dcc4e8e 100644
--- a/include/acpi/acnames.h
+++ b/include/acpi/acnames.h
@@ -3,7 +3,7 @@
*
* Name: acnames.h - Global names and strings
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 5e0346142f98..3584f33e352c 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -3,7 +3,7 @@
*
* Name: acoutput.h -- debug output
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/acpi.h b/include/acpi/acpi.h
index 8b4a497c1300..92bf80937e5f 100644
--- a/include/acpi/acpi.h
+++ b/include/acpi/acpi.h
@@ -3,7 +3,7 @@
*
* Name: acpi.h - Master public include file used to interface to ACPICA
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 914c029f64c9..65c5737b6286 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -5,7 +5,7 @@
* interfaces must be implemented by OSL to interface the
* ACPI components to the host operating system.
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 78b24b090488..b49396aa4058 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -3,7 +3,7 @@
*
* Name: acpixf.h - External interfaces to the ACPI subsystem
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -12,7 +12,7 @@
/* Current ACPICA subsystem version in YYYYMMDD format */
-#define ACPI_CA_VERSION 0x20240827
+#define ACPI_CA_VERSION 0x20250404
#include <acpi/acconfig.h>
#include <acpi/actypes.h>
diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h
index efef208b0324..842f932e2c2b 100644
--- a/include/acpi/acrestyp.h
+++ b/include/acpi/acrestyp.h
@@ -3,7 +3,7 @@
*
* Name: acrestyp.h - Defines, types, and structures for resource descriptors
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 2fc89704be17..243097a3da63 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -3,7 +3,7 @@
*
* Name: actbl.h - Basic ACPI Table Definitions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -66,12 +66,12 @@
******************************************************************************/
struct acpi_table_header {
- char signature[ACPI_NAMESEG_SIZE] __nonstring; /* ASCII table signature */
+ char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; /* ASCII table signature */
u32 length; /* Length of table in bytes, including this header */
u8 revision; /* ACPI Specification minor version number */
u8 checksum; /* To make sum of entire table == 0 */
- char oem_id[ACPI_OEM_ID_SIZE]; /* ASCII OEM identification */
- char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; /* ASCII OEM table identification */
+ char oem_id[ACPI_OEM_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM identification */
+ char oem_table_id[ACPI_OEM_TABLE_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM table identification */
u32 oem_revision; /* OEM revision number */
char asl_compiler_id[ACPI_NAMESEG_SIZE]; /* ASCII ASL compiler vendor ID */
u32 asl_compiler_revision; /* ASL compiler version */
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 387fc821703a..99fd1588ff38 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -3,7 +3,7 @@
*
* Name: actbl1.h - Additional ACPI table definitions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -155,6 +155,13 @@ struct acpi_aspt_acpi_mbox_regs {
u64 reserved2[2];
};
+/* Larger subtable header (when Length can exceed 255) */
+
+struct acpi_subtbl_hdr_16 {
+ u16 type;
+ u16 length;
+};
+
/*******************************************************************************
*
* ASF - Alert Standard Format table (Signature "ASF!")
@@ -819,7 +826,8 @@ enum acpi_dmar_type {
ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3,
ACPI_DMAR_TYPE_NAMESPACE = 4,
ACPI_DMAR_TYPE_SATC = 5,
- ACPI_DMAR_TYPE_RESERVED = 6 /* 6 and greater are reserved */
+ ACPI_DMAR_TYPE_SIDP = 6,
+ ACPI_DMAR_TYPE_RESERVED = 7 /* 7 and greater are reserved */
};
/* DMAR Device Scope structure */
@@ -827,7 +835,8 @@ enum acpi_dmar_type {
struct acpi_dmar_device_scope {
u8 entry_type;
u8 length;
- u16 reserved;
+ u8 flags;
+ u8 reserved;
u8 enumeration_id;
u8 bus;
};
@@ -923,6 +932,15 @@ struct acpi_dmar_satc {
u8 reserved;
u16 segment;
};
+
+/* 6: so_c Integrated Device Property Reporting Structure */
+
+struct acpi_dmar_sidp {
+ struct acpi_dmar_header header;
+ u16 reserved;
+ u16 segment;
+};
+
/*******************************************************************************
*
* DRTM - Dynamic Root of Trust for Measurement table
@@ -1024,17 +1042,18 @@ struct acpi_einj_entry {
/* Values for Action field above */
enum acpi_einj_actions {
- ACPI_EINJ_BEGIN_OPERATION = 0,
- ACPI_EINJ_GET_TRIGGER_TABLE = 1,
- ACPI_EINJ_SET_ERROR_TYPE = 2,
- ACPI_EINJ_GET_ERROR_TYPE = 3,
- ACPI_EINJ_END_OPERATION = 4,
- ACPI_EINJ_EXECUTE_OPERATION = 5,
- ACPI_EINJ_CHECK_BUSY_STATUS = 6,
- ACPI_EINJ_GET_COMMAND_STATUS = 7,
- ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 8,
- ACPI_EINJ_GET_EXECUTE_TIMINGS = 9,
- ACPI_EINJ_ACTION_RESERVED = 10, /* 10 and greater are reserved */
+ ACPI_EINJ_BEGIN_OPERATION = 0x0,
+ ACPI_EINJ_GET_TRIGGER_TABLE = 0x1,
+ ACPI_EINJ_SET_ERROR_TYPE = 0x2,
+ ACPI_EINJ_GET_ERROR_TYPE = 0x3,
+ ACPI_EINJ_END_OPERATION = 0x4,
+ ACPI_EINJ_EXECUTE_OPERATION = 0x5,
+ ACPI_EINJ_CHECK_BUSY_STATUS = 0x6,
+ ACPI_EINJ_GET_COMMAND_STATUS = 0x7,
+ ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 0x8,
+ ACPI_EINJ_GET_EXECUTE_TIMINGS = 0x9,
+ ACPI_EINJV2_GET_ERROR_TYPE = 0x11,
+ ACPI_EINJ_ACTION_RESERVED = 0x12, /* 0x12 and greater are reserved */
ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */
};
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 2e917a8f8bca..048f5f47f8b8 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -1,9 +1,9 @@
/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
/******************************************************************************
*
- * Name: actbl2.h - ACPI Table Definitions (tables not in ACPI spec)
+ * Name: actbl2.h - ACPI Table Definitions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -29,6 +29,7 @@
#define ACPI_SIG_BDAT "BDAT" /* BIOS Data ACPI Table */
#define ACPI_SIG_CCEL "CCEL" /* CC Event Log Table */
#define ACPI_SIG_CDAT "CDAT" /* Coherent Device Attribute Table */
+#define ACPI_SIG_ERDT "ERDT" /* Enhanced Resource Director Technology */
#define ACPI_SIG_IORT "IORT" /* IO Remapping Table */
#define ACPI_SIG_IVRS "IVRS" /* I/O Virtualization Reporting Structure */
#define ACPI_SIG_LPIT "LPIT" /* Low Power Idle Table */
@@ -37,6 +38,7 @@
#define ACPI_SIG_MCHI "MCHI" /* Management Controller Host Interface table */
#define ACPI_SIG_MPAM "MPAM" /* Memory System Resource Partitioning and Monitoring Table */
#define ACPI_SIG_MPST "MPST" /* Memory Power State Table */
+#define ACPI_SIG_MRRM "MRRM" /* Memory Range and Region Mapping table */
#define ACPI_SIG_MSDM "MSDM" /* Microsoft Data Management Table */
#define ACPI_SIG_NFIT "NFIT" /* NVDIMM Firmware Interface Table */
#define ACPI_SIG_NHLT "NHLT" /* Non HD Audio Link Table */
@@ -50,6 +52,7 @@
#define ACPI_SIG_RAS2 "RAS2" /* RAS2 Feature table */
#define ACPI_SIG_RGRT "RGRT" /* Regulatory Graphics Resource Table */
#define ACPI_SIG_RHCT "RHCT" /* RISC-V Hart Capabilities Table */
+#define ACPI_SIG_RIMT "RIMT" /* RISC-V IO Mapping Table */
#define ACPI_SIG_SBST "SBST" /* Smart Battery Specification Table */
#define ACPI_SIG_SDEI "SDEI" /* Software Delegated Exception Interface Table */
#define ACPI_SIG_SDEV "SDEV" /* Secure Devices table */
@@ -450,6 +453,195 @@ struct acpi_table_ccel {
/*******************************************************************************
*
+ * ERDT - Enhanced Resource Director Technology (ERDT) table
+ *
+ * Conforms to "Intel Resource Director Technology Architecture Specification"
+ * Version 1.1, January 2025
+ *
+ ******************************************************************************/
+
+struct acpi_table_erdt {
+ struct acpi_table_header header; /* Common ACPI table header */
+ u32 max_clos; /* Maximum classes of service */
+ u8 reserved[24];
+ u8 erdt_substructures[];
+};
+
+/* Values for subtable type in struct acpi_subtbl_hdr_16 */
+
+enum acpi_erdt_type {
+ ACPI_ERDT_TYPE_RMDD = 0,
+ ACPI_ERDT_TYPE_CACD = 1,
+ ACPI_ERDT_TYPE_DACD = 2,
+ ACPI_ERDT_TYPE_CMRC = 3,
+ ACPI_ERDT_TYPE_MMRC = 4,
+ ACPI_ERDT_TYPE_MARC = 5,
+ ACPI_ERDT_TYPE_CARC = 6,
+ ACPI_ERDT_TYPE_CMRD = 7,
+ ACPI_ERDT_TYPE_IBRD = 8,
+ ACPI_ERDT_TYPE_IBAD = 9,
+ ACPI_ERDT_TYPE_CARD = 10,
+ ACPI_ERDT_TYPE_RESERVED = 11 /* 11 and above are reserved */
+};
+
+/*
+ * ERDT Subtables, correspond to Type in struct acpi_subtbl_hdr_16
+ */
+
+/* 0: RMDD - Resource Management Domain Description */
+
+struct acpi_erdt_rmdd {
+ struct acpi_subtbl_hdr_16 header;
+ u16 flags;
+ u16 IO_l3_slices; /* Number of slices in IO cache */
+ u8 IO_l3_sets; /* Number of sets in IO cache */
+ u8 IO_l3_ways; /* Number of ways in IO cache */
+ u64 reserved;
+ u16 domain_id; /* Unique domain ID */
+ u32 max_rmid; /* Maximun RMID supported */
+ u64 creg_base; /* Control Register Base Address */
+ u16 creg_size; /* Control Register Size (4K pages) */
+ u8 rmdd_structs[];
+};
+
+/* 1: CACD - CPU Agent Collection Description */
+
+struct acpi_erdt_cacd {
+ struct acpi_subtbl_hdr_16 header;
+ u16 reserved;
+ u16 domain_id; /* Unique domain ID */
+ u32 X2APICIDS[];
+};
+
+/* 2: DACD - Device Agent Collection Description */
+
+struct acpi_erdt_dacd {
+ struct acpi_subtbl_hdr_16 header;
+ u16 reserved;
+ u16 domain_id; /* Unique domain ID */
+ u8 dev_paths[];
+};
+
+struct acpi_erdt_dacd_dev_paths {
+ struct acpi_subtable_header header;
+ u16 segment;
+ u8 reserved;
+ u8 start_bus;
+ u8 path[];
+};
+
+/* 3: CMRC - Cache Monitoring Registers for CPU Agents */
+
+struct acpi_erdt_cmrc {
+ struct acpi_subtbl_hdr_16 header;
+ u32 reserved1;
+ u32 flags;
+ u8 index_fn;
+ u8 reserved2[11];
+ u64 cmt_reg_base;
+ u32 cmt_reg_size;
+ u16 clump_size;
+ u16 clump_stride;
+ u64 up_scale;
+};
+
+/* 4: MMRC - Memory-bandwidth Monitoring Registers for CPU Agents */
+
+struct acpi_erdt_mmrc {
+ struct acpi_subtbl_hdr_16 header;
+ u32 reserved1;
+ u32 flags;
+ u8 index_fn;
+ u8 reserved2[11];
+ u64 reg_base;
+ u32 reg_size;
+ u8 counter_width;
+ u64 up_scale;
+ u8 reserved3[7];
+ u32 corr_factor_list_len;
+ u32 corr_factor_list[];
+};
+
+/* 5: MARC - Memory-bandwidth Allocation Registers for CPU Agents */
+
+struct acpi_erdt_marc {
+ struct acpi_subtbl_hdr_16 header;
+ u16 reserved1;
+ u16 flags;
+ u8 index_fn;
+ u8 reserved2[7];
+ u64 reg_base_opt;
+ u64 reg_base_min;
+ u64 reg_base_max;
+ u32 mba_reg_size;
+ u32 mba_ctrl_range;
+};
+
+/* 6: CARC - Cache Allocation Registers for CPU Agents */
+
+struct acpi_erdt_carc {
+ struct acpi_subtbl_hdr_16 header;
+};
+
+/* 7: CMRD - Cache Monitoring Registers for Device Agents */
+
+struct acpi_erdt_cmrd {
+ struct acpi_subtbl_hdr_16 header;
+ u32 reserved1;
+ u32 flags;
+ u8 index_fn;
+ u8 reserved2[11];
+ u64 reg_base;
+ u32 reg_size;
+ u16 cmt_reg_off;
+ u16 cmt_clump_size;
+ u64 up_scale;
+};
+
+/* 8: IBRD - Cache Monitoring Registers for Device Agents */
+
+struct acpi_erdt_ibrd {
+ struct acpi_subtbl_hdr_16 header;
+ u32 reserved1;
+ u32 flags;
+ u8 index_fn;
+ u8 reserved2[11];
+ u64 reg_base;
+ u32 reg_size;
+ u16 total_bw_offset;
+ u16 Iomiss_bw_offset;
+ u16 total_bw_clump;
+ u16 Iomiss_bw_clump;
+ u8 reserved3[7];
+ u8 counter_width;
+ u64 up_scale;
+ u32 corr_factor_list_len;
+ u32 corr_factor_list[];
+};
+
+/* 9: IBAD - IO bandwidth Allocation Registers for device agents */
+
+struct acpi_erdt_ibad {
+ struct acpi_subtbl_hdr_16 header;
+};
+
+/* 10: CARD - IO bandwidth Allocation Registers for Device Agents */
+
+struct acpi_erdt_card {
+ struct acpi_subtbl_hdr_16 header;
+ u32 reserved1;
+ u32 flags;
+ u32 contention_mask;
+ u8 index_fn;
+ u8 reserved2[7];
+ u64 reg_base;
+ u32 reg_size;
+ u16 cat_reg_offset;
+ u16 cat_reg_block_size;
+};
+
+/*******************************************************************************
+ *
* IORT - IO Remapping Table
*
* Conforms to "IO Remapping Table System Software on ARM Platforms",
@@ -1738,6 +1930,47 @@ struct acpi_msct_proximity {
/*******************************************************************************
*
+ * MRRM - Memory Range and Region Mapping (MRRM) table
+ * Conforms to "Intel Resource Director Technology Architecture Specification"
+ * Version 1.1, January 2025
+ *
+ ******************************************************************************/
+
+struct acpi_table_mrrm {
+ struct acpi_table_header header; /* Common ACPI table header */
+ u8 max_mem_region; /* Max Memory Regions supported */
+ u8 flags; /* Region assignment type */
+ u8 reserved[26];
+ u8 memory_range_entry[];
+};
+
+/* Flags */
+#define ACPI_MRRM_FLAGS_REGION_ASSIGNMENT_OS (1<<0)
+
+/*******************************************************************************
+ *
+ * Memory Range entry - Memory Range entry in MRRM table
+ *
+ ******************************************************************************/
+
+struct acpi_mrrm_mem_range_entry {
+ struct acpi_subtbl_hdr_16 header;
+ u32 reserved0; /* Reserved */
+ u64 addr_base; /* Base addr of the mem range */
+ u64 addr_len; /* Length of the mem range */
+ u16 region_id_flags; /* Valid local or remote Region-ID */
+ u8 local_region_id; /* Platform-assigned static local Region-ID */
+ u8 remote_region_id; /* Platform-assigned static remote Region-ID */
+ u32 reserved1; /* Reserved */
+ /* Region-ID Programming Registers[] */
+};
+
+/* Values for region_id_flags above */
+#define ACPI_MRRM_VALID_REGION_ID_FLAGS_LOCAL (1<<0)
+#define ACPI_MRRM_VALID_REGION_ID_FLAGS_REMOTE (1<<1)
+
+/*******************************************************************************
+ *
* MSDM - Microsoft Data Management table
*
* Conforms to "Microsoft Software Licensing Tables (SLIC and MSDM)",
@@ -2802,15 +3035,15 @@ struct acpi_ras2_pcc_desc {
/* RAS2 Platform Communication Channel Shared Memory Region */
-struct acpi_ras2_shared_memory {
+struct acpi_ras2_shmem {
u32 signature;
u16 command;
u16 status;
u16 version;
u8 features[16];
- u8 set_capabilities[16];
- u16 num_parameter_blocks;
- u32 set_capabilities_status;
+ u8 set_caps[16];
+ u16 num_param_blks;
+ u32 set_caps_status;
};
/* RAS2 Parameter Block Structure for PATROL_SCRUB */
@@ -2823,11 +3056,11 @@ struct acpi_ras2_parameter_block {
/* RAS2 Parameter Block Structure for PATROL_SCRUB */
-struct acpi_ras2_patrol_scrub_parameter {
+struct acpi_ras2_patrol_scrub_param {
struct acpi_ras2_parameter_block header;
- u16 patrol_scrub_command;
- u64 requested_address_range[2];
- u64 actual_address_range[2];
+ u16 command;
+ u64 req_addr_range[2];
+ u64 actl_addr_range[2];
u32 flags;
u32 scrub_params_out;
u32 scrub_params_in;
@@ -3004,6 +3237,88 @@ struct acpi_rhct_hart_info {
/*******************************************************************************
*
+ * RIMT - RISC-V IO Remapping Table
+ *
+ * https://github.com/riscv-non-isa/riscv-acpi-rimt
+ *
+ ******************************************************************************/
+
+struct acpi_table_rimt {
+ struct acpi_table_header header; /* Common ACPI table header */
+ u32 num_nodes; /* Number of RIMT Nodes */
+ u32 node_offset; /* Offset to RIMT Node Array */
+ u32 reserved;
+};
+
+struct acpi_rimt_node {
+ u8 type;
+ u8 revision;
+ u16 length;
+ u16 reserved;
+ u16 id;
+ char node_data[];
+};
+
+enum acpi_rimt_node_type {
+ ACPI_RIMT_NODE_TYPE_IOMMU = 0x0,
+ ACPI_RIMT_NODE_TYPE_PCIE_ROOT_COMPLEX = 0x1,
+ ACPI_RIMT_NODE_TYPE_PLAT_DEVICE = 0x2,
+};
+
+struct acpi_rimt_iommu {
+ u8 hardware_id[8]; /* Hardware ID */
+ u64 base_address; /* Base Address */
+ u32 flags; /* Flags */
+ u32 proximity_domain; /* Proximity Domain */
+ u16 pcie_segment_number; /* PCIe Segment number */
+ u16 pcie_bdf; /* PCIe B/D/F */
+ u16 num_interrupt_wires; /* Number of interrupt wires */
+ u16 interrupt_wire_offset; /* Interrupt wire array offset */
+ u64 interrupt_wire[]; /* Interrupt wire array */
+};
+
+/* IOMMU Node Flags */
+#define ACPI_RIMT_IOMMU_FLAGS_PCIE (1)
+#define ACPI_RIMT_IOMMU_FLAGS_PXM_VALID (1 << 1)
+
+/* Interrupt Wire Structure */
+struct acpi_rimt_iommu_wire_gsi {
+ u32 irq_num; /* Interrupt Number */
+ u32 flags; /* Flags */
+};
+
+/* Interrupt Wire Flags */
+#define ACPI_RIMT_GSI_LEVEL_TRIGGERRED (1)
+#define ACPI_RIMT_GSI_ACTIVE_HIGH (1 << 1)
+
+struct acpi_rimt_id_mapping {
+ u32 source_id_base; /* Source ID Base */
+ u32 num_ids; /* Number of IDs */
+ u32 dest_id_base; /* Destination Device ID Base */
+ u32 dest_offset; /* Destination IOMMU Offset */
+ u32 flags; /* Flags */
+};
+
+struct acpi_rimt_pcie_rc {
+ u32 flags; /* Flags */
+ u16 reserved; /* Reserved */
+ u16 pcie_segment_number; /* PCIe Segment number */
+ u16 id_mapping_offset; /* ID mapping array offset */
+ u16 num_id_mappings; /* Number of ID mappings */
+};
+
+/* PCIe Root Complex Node Flags */
+#define ACPI_RIMT_PCIE_ATS_SUPPORTED (1)
+#define ACPI_RIMT_PCIE_PRI_SUPPORTED (1 << 1)
+
+struct acpi_rimt_platform_device {
+ u16 id_mapping_offset; /* ID Mapping array offset */
+ u16 num_id_mappings; /* Number of ID mappings */
+ char device_name[]; /* Device Object Name */
+};
+
+/*******************************************************************************
+ *
* SBST - Smart Battery Specification Table
* Version 1
*
diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h
index a97b1dbab975..79d3aa5a4bad 100644
--- a/include/acpi/actbl3.h
+++ b/include/acpi/actbl3.h
@@ -3,7 +3,7 @@
*
* Name: actbl3.h - ACPI Table Definitions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -269,7 +269,7 @@ struct acpi_srat_gicc_affinity {
#define ACPI_SRAT_GICC_ENABLED (1) /* 00: Use affinity structure */
-/* 4: GCC ITS Affinity (ACPI 6.2) */
+/* 4: GIC ITS Affinity (ACPI 6.2) */
struct acpi_srat_gic_its_affinity {
struct acpi_subtable_header header;
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 80767e8bf3ad..8fe893d776dd 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -3,7 +3,7 @@
*
* Name: actypes.h - Common data types for the entire ACPI subsystem
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -522,12 +522,12 @@ typedef u64 acpi_integer;
#define ACPI_COPY_NAMESEG(dest,src) (*ACPI_CAST_PTR (u32, (dest)) = *ACPI_CAST_PTR (u32, (src)))
#else
#define ACPI_COMPARE_NAMESEG(a,b) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAMESEG_SIZE))
-#define ACPI_COPY_NAMESEG(dest,src) (strncpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAMESEG_SIZE))
+#define ACPI_COPY_NAMESEG(dest,src) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAMESEG_SIZE))
#endif
/* Support for the special RSDP signature (8 characters) */
-#define ACPI_VALIDATE_RSDP_SIG(a) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, 8))
+#define ACPI_VALIDATE_RSDP_SIG(a) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, (sizeof(a) < 8) ? ACPI_NAMESEG_SIZE : 8))
#define ACPI_MAKE_RSDP_SIG(dest) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8))
/* Support for OEMx signature (x can be any character) */
@@ -1327,4 +1327,8 @@ typedef enum {
#define ACPI_FLEX_ARRAY(TYPE, NAME) TYPE NAME[0]
#endif
+#ifndef ACPI_NONSTRING
+#define ACPI_NONSTRING /* No terminating NUL character */
+#endif
+
#endif /* __ACTYPES_H__ */
diff --git a/include/acpi/acuuid.h b/include/acpi/acuuid.h
index 52a84523bfac..25dd3e998727 100644
--- a/include/acpi/acuuid.h
+++ b/include/acpi/acuuid.h
@@ -3,7 +3,7 @@
*
* Name: acuuid.h - ACPI-related UUID/GUID definitions
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 62d368bcd9ec..325e9543e08f 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -32,6 +32,15 @@
#define CMD_READ 0
#define CMD_WRITE 1
+#define CPPC_AUTO_ACT_WINDOW_SIG_BIT_SIZE (7)
+#define CPPC_AUTO_ACT_WINDOW_EXP_BIT_SIZE (3)
+#define CPPC_AUTO_ACT_WINDOW_MAX_SIG ((1 << CPPC_AUTO_ACT_WINDOW_SIG_BIT_SIZE) - 1)
+#define CPPC_AUTO_ACT_WINDOW_MAX_EXP ((1 << CPPC_AUTO_ACT_WINDOW_EXP_BIT_SIZE) - 1)
+/* CPPC_AUTO_ACT_WINDOW_MAX_SIG is 127, so 128 and 129 will decay to 127 when writing */
+#define CPPC_AUTO_ACT_WINDOW_SIG_CARRY_THRESH 129
+
+#define CPPC_ENERGY_PERF_MAX (0xFF)
+
/* Each register has the folowing format. */
struct cpc_reg {
u8 descriptor;
@@ -159,7 +168,10 @@ extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val);
extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val);
extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf);
extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable);
-extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps);
+extern int cppc_set_epp(int cpu, u64 epp_val);
+extern int cppc_get_auto_act_window(int cpu, u64 *auto_act_window);
+extern int cppc_set_auto_act_window(int cpu, u64 auto_act_window);
+extern int cppc_get_auto_sel(int cpu, bool *enable);
extern int cppc_set_auto_sel(int cpu, bool enable);
extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf);
extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator);
@@ -229,11 +241,23 @@ static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf)
{
return -EOPNOTSUPP;
}
-static inline int cppc_set_auto_sel(int cpu, bool enable)
+static inline int cppc_set_epp(int cpu, u64 epp_val)
{
return -EOPNOTSUPP;
}
-static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps)
+static inline int cppc_get_auto_act_window(int cpu, u64 *auto_act_window)
+{
+ return -EOPNOTSUPP;
+}
+static inline int cppc_set_auto_act_window(int cpu, u64 auto_act_window)
+{
+ return -EOPNOTSUPP;
+}
+static inline int cppc_get_auto_sel(int cpu, bool *enable)
+{
+ return -EOPNOTSUPP;
+}
+static inline int cppc_set_auto_sel(int cpu, bool enable)
{
return -EOPNOTSUPP;
}
diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index 3f31df09a9d6..a11fa83955f8 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -3,7 +3,7 @@
*
* Name: acenv.h - Host and compiler configuration
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/platform/acenvex.h b/include/acpi/platform/acenvex.h
index 7e67e3503f7b..8ffc4e1c87cf 100644
--- a/include/acpi/platform/acenvex.h
+++ b/include/acpi/platform/acenvex.h
@@ -3,7 +3,7 @@
*
* Name: acenvex.h - Extra host and compiler configuration
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index 04b4bf620517..8e4cf2f6b383 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -3,7 +3,7 @@
*
* Name: acgcc.h - GCC specific defines, etc.
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -72,4 +72,12 @@
TYPE NAME[]; \
}
+/*
+ * Explicitly mark strings that lack a terminating NUL character so
+ * that ACPICA can be built with -Wunterminated-string-initialization.
+ */
+#if __has_attribute(__nonstring__)
+#define ACPI_NONSTRING __attribute__((__nonstring__))
+#endif
+
#endif /* __ACGCC_H__ */
diff --git a/include/acpi/platform/acgccex.h b/include/acpi/platform/acgccex.h
index 7c9f10e9633a..4a3c019a4d03 100644
--- a/include/acpi/platform/acgccex.h
+++ b/include/acpi/platform/acgccex.h
@@ -3,7 +3,7 @@
*
* Name: acgccex.h - Extra GCC specific defines, etc.
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index f3249b7df5cb..edbbc9061d1e 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -3,7 +3,7 @@
*
* Name: aclinux.h - OS specific defines, etc. for Linux
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/platform/aclinuxex.h b/include/acpi/platform/aclinuxex.h
index eeff40295b4b..73265650f46b 100644
--- a/include/acpi/platform/aclinuxex.h
+++ b/include/acpi/platform/aclinuxex.h
@@ -3,7 +3,7 @@
*
* Name: aclinuxex.h - Extra OS specific defines, etc. for Linux
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/acpi/platform/aczephyr.h b/include/acpi/platform/aczephyr.h
index 703db4dc740d..03d9a4a39c80 100644
--- a/include/acpi/platform/aczephyr.h
+++ b/include/acpi/platform/aczephyr.h
@@ -3,7 +3,7 @@
*
* Module Name: aczephyr.h - OS specific defines, etc.
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h
index d0343d58a74a..ac29a22eb7cf 100644
--- a/include/asm-generic/simd.h
+++ b/include/asm-generic/simd.h
@@ -1,6 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_SIMD_H
+#define _ASM_GENERIC_SIMD_H
-#include <linux/hardirq.h>
+#include <linux/compiler_attributes.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
@@ -13,3 +17,5 @@ static __must_check inline bool may_use_simd(void)
{
return !in_interrupt();
}
+
+#endif /* _ASM_GENERIC_SIMD_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 58a635a6d5bd..66409bc3a4e0 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -108,13 +108,13 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
#define TEXT_MAIN .text
#endif
#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
-#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L*
+#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data.rel.* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L*
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L*
#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral*
#define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]*
#else
-#define DATA_MAIN .data
+#define DATA_MAIN .data .data.rel .data.rel.local
#define SDATA_MAIN .sdata
#define RODATA_MAIN .rodata
#define BSS_MAIN .bss
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index c497c73baf13..9eacb9fa375d 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -32,30 +32,28 @@
/* Set this bit for if virtual address destination cannot be used for DMA. */
#define CRYPTO_ACOMP_REQ_DST_NONDMA 0x00000010
-/* Set this bit if source is a folio. */
-#define CRYPTO_ACOMP_REQ_SRC_FOLIO 0x00000020
-
-/* Set this bit if destination is a folio. */
-#define CRYPTO_ACOMP_REQ_DST_FOLIO 0x00000040
+/* Private flags that should not be touched by the user. */
+#define CRYPTO_ACOMP_REQ_PRIVATE \
+ (CRYPTO_ACOMP_REQ_SRC_VIRT | CRYPTO_ACOMP_REQ_SRC_NONDMA | \
+ CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_DST_NONDMA)
#define CRYPTO_ACOMP_DST_MAX 131072
#define MAX_SYNC_COMP_REQSIZE 0
-#define ACOMP_REQUEST_ALLOC(name, tfm, gfp) \
+#define ACOMP_REQUEST_ON_STACK(name, tfm) \
char __##name##_req[sizeof(struct acomp_req) + \
MAX_SYNC_COMP_REQSIZE] CRYPTO_MINALIGN_ATTR; \
struct acomp_req *name = acomp_request_on_stack_init( \
- __##name##_req, (tfm), (gfp), false)
+ __##name##_req, (tfm))
+
+#define ACOMP_REQUEST_CLONE(name, gfp) \
+ acomp_request_clone(name, sizeof(__##name##_req), gfp)
struct acomp_req;
struct folio;
struct acomp_req_chain {
- struct list_head head;
- struct acomp_req *req0;
- struct acomp_req *cur;
- int (*op)(struct acomp_req *req);
crypto_completion_t compl;
void *data;
struct scatterlist ssg;
@@ -68,8 +66,6 @@ struct acomp_req_chain {
u8 *dst;
struct folio *dfolio;
};
- size_t soff;
- size_t doff;
u32 flags;
};
@@ -81,10 +77,6 @@ struct acomp_req_chain {
* @dst: Destination scatterlist
* @svirt: Source virtual address
* @dvirt: Destination virtual address
- * @sfolio: Source folio
- * @soff: Source folio offset
- * @dfolio: Destination folio
- * @doff: Destination folio offset
* @slen: Size of the input buffer
* @dlen: Size of the output buffer and number of bytes produced
* @chain: Private API code data, do not use
@@ -95,15 +87,11 @@ struct acomp_req {
union {
struct scatterlist *src;
const u8 *svirt;
- struct folio *sfolio;
};
union {
struct scatterlist *dst;
u8 *dvirt;
- struct folio *dfolio;
};
- size_t soff;
- size_t doff;
unsigned int slen;
unsigned int dlen;
@@ -126,18 +114,11 @@ struct crypto_acomp {
int (*compress)(struct acomp_req *req);
int (*decompress)(struct acomp_req *req);
unsigned int reqsize;
- struct crypto_acomp *fb;
struct crypto_tfm base;
};
-struct crypto_acomp_stream {
- spinlock_t lock;
- void *ctx;
-};
-
#define COMP_ALG_COMMON { \
struct crypto_alg base; \
- struct crypto_acomp_stream __percpu *stream; \
}
struct comp_alg_common COMP_ALG_COMMON;
@@ -213,7 +194,7 @@ static inline unsigned int crypto_acomp_reqsize(struct crypto_acomp *tfm)
static inline void acomp_request_set_tfm(struct acomp_req *req,
struct crypto_acomp *tfm)
{
- req->base.tfm = crypto_acomp_tfm(tfm);
+ crypto_request_set_tfm(&req->base, crypto_acomp_tfm(tfm));
}
static inline bool acomp_is_async(struct crypto_acomp *tfm)
@@ -310,6 +291,11 @@ static inline void *acomp_request_extra(struct acomp_req *req)
return (void *)((char *)req + len);
}
+static inline bool acomp_req_on_stack(struct acomp_req *req)
+{
+ return crypto_req_on_stack(&req->base);
+}
+
/**
* acomp_request_free() -- zeroize and free asynchronous (de)compression
* request as well as the output buffer if allocated
@@ -319,7 +305,7 @@ static inline void *acomp_request_extra(struct acomp_req *req)
*/
static inline void acomp_request_free(struct acomp_req *req)
{
- if (!req || (req->base.flags & CRYPTO_TFM_REQ_ON_STACK))
+ if (!req || acomp_req_on_stack(req))
return;
kfree_sensitive(req);
}
@@ -340,17 +326,9 @@ static inline void acomp_request_set_callback(struct acomp_req *req,
crypto_completion_t cmpl,
void *data)
{
- u32 keep = CRYPTO_ACOMP_REQ_SRC_VIRT | CRYPTO_ACOMP_REQ_SRC_NONDMA |
- CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_DST_NONDMA |
- CRYPTO_ACOMP_REQ_SRC_FOLIO | CRYPTO_ACOMP_REQ_DST_FOLIO |
- CRYPTO_TFM_REQ_ON_STACK;
-
- req->base.complete = cmpl;
- req->base.data = data;
- req->base.flags &= keep;
- req->base.flags |= flgs & ~keep;
-
- crypto_reqchain_init(&req->base);
+ flgs &= ~CRYPTO_ACOMP_REQ_PRIVATE;
+ flgs |= req->base.flags & CRYPTO_ACOMP_REQ_PRIVATE;
+ crypto_request_set_callback(&req->base, flgs, cmpl, data);
}
/**
@@ -379,8 +357,6 @@ static inline void acomp_request_set_params(struct acomp_req *req,
req->base.flags &= ~(CRYPTO_ACOMP_REQ_SRC_VIRT |
CRYPTO_ACOMP_REQ_SRC_NONDMA |
- CRYPTO_ACOMP_REQ_SRC_FOLIO |
- CRYPTO_ACOMP_REQ_DST_FOLIO |
CRYPTO_ACOMP_REQ_DST_VIRT |
CRYPTO_ACOMP_REQ_DST_NONDMA);
}
@@ -403,7 +379,6 @@ static inline void acomp_request_set_src_sg(struct acomp_req *req,
req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_NONDMA;
req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_VIRT;
- req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_FOLIO;
}
/**
@@ -423,7 +398,6 @@ static inline void acomp_request_set_src_dma(struct acomp_req *req,
req->slen = slen;
req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_NONDMA;
- req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_FOLIO;
req->base.flags |= CRYPTO_ACOMP_REQ_SRC_VIRT;
}
@@ -444,7 +418,6 @@ static inline void acomp_request_set_src_nondma(struct acomp_req *req,
req->svirt = src;
req->slen = slen;
- req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_FOLIO;
req->base.flags |= CRYPTO_ACOMP_REQ_SRC_NONDMA;
req->base.flags |= CRYPTO_ACOMP_REQ_SRC_VIRT;
}
@@ -463,13 +436,9 @@ static inline void acomp_request_set_src_folio(struct acomp_req *req,
struct folio *folio, size_t off,
unsigned int len)
{
- req->sfolio = folio;
- req->soff = off;
- req->slen = len;
-
- req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_NONDMA;
- req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_VIRT;
- req->base.flags |= CRYPTO_ACOMP_REQ_SRC_FOLIO;
+ sg_init_table(&req->chain.ssg, 1);
+ sg_set_folio(&req->chain.ssg, folio, len, off);
+ acomp_request_set_src_sg(req, &req->chain.ssg, len);
}
/**
@@ -490,7 +459,6 @@ static inline void acomp_request_set_dst_sg(struct acomp_req *req,
req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_NONDMA;
req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_VIRT;
- req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_FOLIO;
}
/**
@@ -510,7 +478,6 @@ static inline void acomp_request_set_dst_dma(struct acomp_req *req,
req->dlen = dlen;
req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_NONDMA;
- req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_FOLIO;
req->base.flags |= CRYPTO_ACOMP_REQ_DST_VIRT;
}
@@ -530,7 +497,6 @@ static inline void acomp_request_set_dst_nondma(struct acomp_req *req,
req->dvirt = dst;
req->dlen = dlen;
- req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_FOLIO;
req->base.flags |= CRYPTO_ACOMP_REQ_DST_NONDMA;
req->base.flags |= CRYPTO_ACOMP_REQ_DST_VIRT;
}
@@ -549,19 +515,9 @@ static inline void acomp_request_set_dst_folio(struct acomp_req *req,
struct folio *folio, size_t off,
unsigned int len)
{
- req->dfolio = folio;
- req->doff = off;
- req->dlen = len;
-
- req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_NONDMA;
- req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_VIRT;
- req->base.flags |= CRYPTO_ACOMP_REQ_DST_FOLIO;
-}
-
-static inline void acomp_request_chain(struct acomp_req *req,
- struct acomp_req *head)
-{
- crypto_request_chain(&req->base, &head->base);
+ sg_init_table(&req->chain.dsg, 1);
+ sg_set_folio(&req->chain.dsg, folio, len, off);
+ acomp_request_set_dst_sg(req, &req->chain.dsg, len);
}
/**
@@ -587,18 +543,15 @@ int crypto_acomp_compress(struct acomp_req *req);
int crypto_acomp_decompress(struct acomp_req *req);
static inline struct acomp_req *acomp_request_on_stack_init(
- char *buf, struct crypto_acomp *tfm, gfp_t gfp, bool stackonly)
+ char *buf, struct crypto_acomp *tfm)
{
- struct acomp_req *req;
-
- if (!stackonly && (req = acomp_request_alloc(tfm, gfp)))
- return req;
-
- req = (void *)buf;
- acomp_request_set_tfm(req, tfm->fb);
- req->base.flags = CRYPTO_TFM_REQ_ON_STACK;
+ struct acomp_req *req = (void *)buf;
+ crypto_stack_request_init(&req->base, crypto_acomp_tfm(tfm));
return req;
}
+struct acomp_req *acomp_request_clone(struct acomp_req *req,
+ size_t total, gfp_t gfp);
+
#endif
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 6e07bbc04089..188eface0a11 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -68,16 +68,17 @@ struct crypto_instance {
struct crypto_spawn *spawns;
};
- struct work_struct free_work;
-
void *__ctx[] CRYPTO_MINALIGN_ATTR;
};
struct crypto_template {
struct list_head list;
struct hlist_head instances;
+ struct hlist_head dead;
struct module *module;
+ struct work_struct free_work;
+
int (*create)(struct crypto_template *tmpl, struct rtattr **tb);
char name[CRYPTO_MAX_ALG_NAME];
@@ -106,18 +107,6 @@ struct crypto_queue {
unsigned int max_qlen;
};
-struct scatter_walk {
- /* Must be the first member, see struct skcipher_walk. */
- union {
- void *const addr;
-
- /* Private API field, do not touch. */
- union crypto_no_such_thing *__addr;
- };
- struct scatterlist *sg;
- unsigned int offset;
-};
-
struct crypto_attr_alg {
char name[CRYPTO_MAX_ALG_NAME];
};
@@ -157,8 +146,16 @@ void *crypto_spawn_tfm2(struct crypto_spawn *spawn);
struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb);
int crypto_check_attr_type(struct rtattr **tb, u32 type, u32 *mask_ret);
const char *crypto_attr_alg_name(struct rtattr *rta);
-int crypto_inst_setname(struct crypto_instance *inst, const char *name,
- struct crypto_alg *alg);
+int __crypto_inst_setname(struct crypto_instance *inst, const char *name,
+ const char *driver, struct crypto_alg *alg);
+
+#define crypto_inst_setname(inst, name, ...) \
+ CONCATENATE(crypto_inst_setname_, COUNT_ARGS(__VA_ARGS__))( \
+ inst, name, ##__VA_ARGS__)
+#define crypto_inst_setname_1(inst, name, alg) \
+ __crypto_inst_setname(inst, name, name, alg)
+#define crypto_inst_setname_2(inst, name, driver, alg) \
+ __crypto_inst_setname(inst, name, driver, alg)
void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen);
int crypto_enqueue_request(struct crypto_queue *queue,
@@ -266,14 +263,14 @@ static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm)
return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK;
}
-static inline bool crypto_request_chained(struct crypto_async_request *req)
+static inline bool crypto_tfm_req_virt(struct crypto_tfm *tfm)
{
- return !list_empty(&req->list);
+ return tfm->__crt_alg->cra_flags & CRYPTO_ALG_REQ_VIRT;
}
-static inline bool crypto_tfm_req_chain(struct crypto_tfm *tfm)
+static inline u32 crypto_request_flags(struct crypto_async_request *req)
{
- return tfm->__crt_alg->cra_flags & CRYPTO_ALG_REQ_CHAIN;
+ return req->flags & ~CRYPTO_TFM_REQ_ON_STACK;
}
#endif /* _CRYPTO_ALGAPI_H */
diff --git a/include/crypto/blake2b.h b/include/crypto/blake2b.h
index 0c0176285349..dd7694477e50 100644
--- a/include/crypto/blake2b.h
+++ b/include/crypto/blake2b.h
@@ -7,10 +7,20 @@
#include <linux/types.h>
#include <linux/string.h>
+struct blake2b_state {
+ /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */
+ u64 h[8];
+ u64 t[2];
+ /* The true state ends here. The rest is temporary storage. */
+ u64 f[2];
+};
+
enum blake2b_lengths {
BLAKE2B_BLOCK_SIZE = 128,
BLAKE2B_HASH_SIZE = 64,
BLAKE2B_KEY_SIZE = 64,
+ BLAKE2B_STATE_SIZE = offsetof(struct blake2b_state, f),
+ BLAKE2B_DESC_SIZE = sizeof(struct blake2b_state),
BLAKE2B_160_HASH_SIZE = 20,
BLAKE2B_256_HASH_SIZE = 32,
@@ -18,16 +28,6 @@ enum blake2b_lengths {
BLAKE2B_512_HASH_SIZE = 64,
};
-struct blake2b_state {
- /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */
- u64 h[8];
- u64 t[2];
- u64 f[2];
- u8 buf[BLAKE2B_BLOCK_SIZE];
- unsigned int buflen;
- unsigned int outlen;
-};
-
enum blake2b_iv {
BLAKE2B_IV0 = 0x6A09E667F3BCC908ULL,
BLAKE2B_IV1 = 0xBB67AE8584CAA73BULL,
@@ -40,7 +40,7 @@ enum blake2b_iv {
};
static inline void __blake2b_init(struct blake2b_state *state, size_t outlen,
- const void *key, size_t keylen)
+ size_t keylen)
{
state->h[0] = BLAKE2B_IV0 ^ (0x01010000 | keylen << 8 | outlen);
state->h[1] = BLAKE2B_IV1;
@@ -52,15 +52,6 @@ static inline void __blake2b_init(struct blake2b_state *state, size_t outlen,
state->h[7] = BLAKE2B_IV7;
state->t[0] = 0;
state->t[1] = 0;
- state->f[0] = 0;
- state->f[1] = 0;
- state->buflen = 0;
- state->outlen = outlen;
- if (keylen) {
- memcpy(state->buf, key, keylen);
- memset(&state->buf[keylen], 0, BLAKE2B_BLOCK_SIZE - keylen);
- state->buflen = BLAKE2B_BLOCK_SIZE;
- }
}
#endif /* _CRYPTO_BLAKE2B_H */
diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
index f8cc073bba41..91f6b4cf561c 100644
--- a/include/crypto/chacha.h
+++ b/include/crypto/chacha.h
@@ -16,6 +16,7 @@
#define _CRYPTO_CHACHA_H
#include <linux/unaligned.h>
+#include <linux/string.h>
#include <linux/types.h>
/* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */
@@ -25,21 +26,32 @@
#define CHACHA_BLOCK_SIZE 64
#define CHACHAPOLY_IV_SIZE 12
-#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32))
+#define CHACHA_KEY_WORDS 8
+#define CHACHA_STATE_WORDS 16
+#define HCHACHA_OUT_WORDS 8
/* 192-bit nonce, then 64-bit stream position */
#define XCHACHA_IV_SIZE 32
-void chacha_block_generic(u32 *state, u8 *stream, int nrounds);
-static inline void chacha20_block(u32 *state, u8 *stream)
+struct chacha_state {
+ u32 x[CHACHA_STATE_WORDS];
+};
+
+void chacha_block_generic(struct chacha_state *state,
+ u8 out[CHACHA_BLOCK_SIZE], int nrounds);
+static inline void chacha20_block(struct chacha_state *state,
+ u8 out[CHACHA_BLOCK_SIZE])
{
- chacha_block_generic(state, stream, 20);
+ chacha_block_generic(state, out, 20);
}
-void hchacha_block_arch(const u32 *state, u32 *out, int nrounds);
-void hchacha_block_generic(const u32 *state, u32 *out, int nrounds);
+void hchacha_block_arch(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds);
+void hchacha_block_generic(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds);
-static inline void hchacha_block(const u32 *state, u32 *out, int nrounds)
+static inline void hchacha_block(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds)
{
if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
hchacha_block_arch(state, out, nrounds);
@@ -54,37 +66,40 @@ enum chacha_constants { /* expand 32-byte k */
CHACHA_CONSTANT_TE_K = 0x6b206574U
};
-static inline void chacha_init_consts(u32 *state)
+static inline void chacha_init_consts(struct chacha_state *state)
{
- state[0] = CHACHA_CONSTANT_EXPA;
- state[1] = CHACHA_CONSTANT_ND_3;
- state[2] = CHACHA_CONSTANT_2_BY;
- state[3] = CHACHA_CONSTANT_TE_K;
+ state->x[0] = CHACHA_CONSTANT_EXPA;
+ state->x[1] = CHACHA_CONSTANT_ND_3;
+ state->x[2] = CHACHA_CONSTANT_2_BY;
+ state->x[3] = CHACHA_CONSTANT_TE_K;
}
-static inline void chacha_init(u32 *state, const u32 *key, const u8 *iv)
+static inline void chacha_init(struct chacha_state *state,
+ const u32 key[CHACHA_KEY_WORDS],
+ const u8 iv[CHACHA_IV_SIZE])
{
chacha_init_consts(state);
- state[4] = key[0];
- state[5] = key[1];
- state[6] = key[2];
- state[7] = key[3];
- state[8] = key[4];
- state[9] = key[5];
- state[10] = key[6];
- state[11] = key[7];
- state[12] = get_unaligned_le32(iv + 0);
- state[13] = get_unaligned_le32(iv + 4);
- state[14] = get_unaligned_le32(iv + 8);
- state[15] = get_unaligned_le32(iv + 12);
+ state->x[4] = key[0];
+ state->x[5] = key[1];
+ state->x[6] = key[2];
+ state->x[7] = key[3];
+ state->x[8] = key[4];
+ state->x[9] = key[5];
+ state->x[10] = key[6];
+ state->x[11] = key[7];
+ state->x[12] = get_unaligned_le32(iv + 0);
+ state->x[13] = get_unaligned_le32(iv + 4);
+ state->x[14] = get_unaligned_le32(iv + 8);
+ state->x[15] = get_unaligned_le32(iv + 12);
}
-void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds);
-void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
+void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds);
-static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src,
+static inline void chacha_crypt(struct chacha_state *state,
+ u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
@@ -93,10 +108,24 @@ static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src,
chacha_crypt_generic(state, dst, src, bytes, nrounds);
}
-static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes)
+static inline void chacha20_crypt(struct chacha_state *state,
+ u8 *dst, const u8 *src, unsigned int bytes)
{
chacha_crypt(state, dst, src, bytes, 20);
}
+static inline void chacha_zeroize_state(struct chacha_state *state)
+{
+ memzero_explicit(state, sizeof(*state));
+}
+
+#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)
+bool chacha_is_arch_optimized(void);
+#else
+static inline bool chacha_is_arch_optimized(void)
+{
+ return false;
+}
+#endif
+
#endif /* _CRYPTO_CHACHA_H */
diff --git a/include/crypto/ctr.h b/include/crypto/ctr.h
index da1ee73e9ce9..06984a26c8cf 100644
--- a/include/crypto/ctr.h
+++ b/include/crypto/ctr.h
@@ -8,58 +8,8 @@
#ifndef _CRYPTO_CTR_H
#define _CRYPTO_CTR_H
-#include <crypto/algapi.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
#define CTR_RFC3686_NONCE_SIZE 4
#define CTR_RFC3686_IV_SIZE 8
#define CTR_RFC3686_BLOCK_SIZE 16
-static inline int crypto_ctr_encrypt_walk(struct skcipher_request *req,
- void (*fn)(struct crypto_skcipher *,
- const u8 *, u8 *))
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- int blocksize = crypto_skcipher_chunksize(tfm);
- u8 buf[MAX_CIPHER_BLOCKSIZE];
- struct skcipher_walk walk;
- int err;
-
- /* avoid integer division due to variable blocksize parameter */
- if (WARN_ON_ONCE(!is_power_of_2(blocksize)))
- return -EINVAL;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while (walk.nbytes > 0) {
- const u8 *src = walk.src.virt.addr;
- u8 *dst = walk.dst.virt.addr;
- int nbytes = walk.nbytes;
- int tail = 0;
-
- if (nbytes < walk.total) {
- tail = walk.nbytes & (blocksize - 1);
- nbytes -= tail;
- }
-
- do {
- int bsize = min(nbytes, blocksize);
-
- fn(tfm, walk.iv, buf);
-
- crypto_xor_cpy(dst, src, buf, bsize);
- crypto_inc(walk.iv, blocksize);
-
- dst += bsize;
- src += bsize;
- nbytes -= bsize;
- } while (nbytes > 0);
-
- err = skcipher_walk_done(&walk, tail);
- }
- return err;
-}
-
#endif /* _CRYPTO_CTR_H */
diff --git a/include/crypto/ghash.h b/include/crypto/ghash.h
index f832c9f2aca3..043d938e9a2c 100644
--- a/include/crypto/ghash.h
+++ b/include/crypto/ghash.h
@@ -7,18 +7,18 @@
#define __CRYPTO_GHASH_H__
#include <linux/types.h>
-#include <crypto/gf128mul.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
+struct gf128mul_4k;
+
struct ghash_ctx {
struct gf128mul_4k *gf128;
};
struct ghash_desc_ctx {
u8 buffer[GHASH_BLOCK_SIZE];
- u32 bytes;
};
#endif
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index a67988316d06..6f6b9de12cd3 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -8,14 +8,17 @@
#ifndef _CRYPTO_HASH_H
#define _CRYPTO_HASH_H
-#include <linux/atomic.h>
#include <linux/crypto.h>
+#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/string.h>
/* Set this bit for virtual address instead of SG list. */
#define CRYPTO_AHASH_REQ_VIRT 0x00000001
+#define CRYPTO_AHASH_REQ_PRIVATE \
+ CRYPTO_AHASH_REQ_VIRT
+
struct crypto_ahash;
/**
@@ -62,6 +65,10 @@ struct ahash_request {
};
u8 *result;
+ struct scatterlist sg_head[2];
+ crypto_completion_t saved_complete;
+ void *saved_data;
+
void *__ctx[] CRYPTO_MINALIGN_ATTR;
};
@@ -82,6 +89,8 @@ struct ahash_request {
* transformation object. Data processing can happen synchronously
* [SHASH] or asynchronously [AHASH] at this point. Driver must not use
* req->result.
+ * For block-only algorithms, @update must return the number
+ * of bytes to store in the API partial block buffer.
* @final: **[mandatory]** Retrieve result from the driver. This function finalizes the
* transformation and retrieves the resulting hash from the driver and
* pushes it back to upper layers. No data processing happens at this
@@ -124,6 +133,10 @@ struct ahash_request {
* data so the transformation can continue from this point onward. No
* data processing happens at this point. Driver must not use
* req->result.
+ * @export_core: Export partial state without partial block. Only defined
+ * for algorithms that are not block-only.
+ * @import_core: Import partial state without partial block. Only defined
+ * for algorithms that are not block-only.
* @init_tfm: Initialize the cryptographic transformation object.
* This function is called only once at the instantiation
* time, right after the transformation context was
@@ -136,7 +149,6 @@ struct ahash_request {
* This is a counterpart to @init_tfm, used to remove
* various changes set in @init_tfm.
* @clone_tfm: Copy transform into new object, may allocate memory.
- * @reqsize: Size of the request context.
* @halg: see struct hash_alg_common
*/
struct ahash_alg {
@@ -147,14 +159,14 @@ struct ahash_alg {
int (*digest)(struct ahash_request *req);
int (*export)(struct ahash_request *req, void *out);
int (*import)(struct ahash_request *req, const void *in);
+ int (*export_core)(struct ahash_request *req, void *out);
+ int (*import_core)(struct ahash_request *req, const void *in);
int (*setkey)(struct crypto_ahash *tfm, const u8 *key,
unsigned int keylen);
int (*init_tfm)(struct crypto_ahash *tfm);
void (*exit_tfm)(struct crypto_ahash *tfm);
int (*clone_tfm)(struct crypto_ahash *dst, struct crypto_ahash *src);
- unsigned int reqsize;
-
struct hash_alg_common halg;
};
@@ -165,17 +177,31 @@ struct shash_desc {
#define HASH_MAX_DIGESTSIZE 64
+/* Worst case is sha3-224. */
+#define HASH_MAX_STATESIZE 200 + 144 + 1
+
/*
- * Worst case is hmac(sha3-224-generic). Its context is a nested 'shash_desc'
- * containing a 'struct sha3_state'.
+ * Worst case is hmac(sha3-224-s390). Its context is a nested 'shash_desc'
+ * containing a 'struct s390_sha_ctx'.
*/
#define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 360)
+#define MAX_SYNC_HASH_REQSIZE (sizeof(struct ahash_request) + \
+ HASH_MAX_DESCSIZE)
#define SHASH_DESC_ON_STACK(shash, ctx) \
char __##shash##_desc[sizeof(struct shash_desc) + HASH_MAX_DESCSIZE] \
__aligned(__alignof__(struct shash_desc)); \
struct shash_desc *shash = (struct shash_desc *)__##shash##_desc
+#define HASH_REQUEST_ON_STACK(name, _tfm) \
+ char __##name##_req[sizeof(struct ahash_request) + \
+ MAX_SYNC_HASH_REQSIZE] CRYPTO_MINALIGN_ATTR; \
+ struct ahash_request *name = \
+ ahash_request_on_stack_init(__##name##_req, (_tfm))
+
+#define HASH_REQUEST_CLONE(name, gfp) \
+ hash_request_clone(name, sizeof(__##name##_req), gfp)
+
/**
* struct shash_alg - synchronous message digest definition
* @init: see struct ahash_alg
@@ -185,6 +211,8 @@ struct shash_desc {
* @digest: see struct ahash_alg
* @export: see struct ahash_alg
* @import: see struct ahash_alg
+ * @export_core: see struct ahash_alg
+ * @import_core: see struct ahash_alg
* @setkey: see struct ahash_alg
* @init_tfm: Initialize the cryptographic transformation object.
* This function is called only once at the instantiation
@@ -215,6 +243,8 @@ struct shash_alg {
unsigned int len, u8 *out);
int (*export)(struct shash_desc *desc, void *out);
int (*import)(struct shash_desc *desc, const void *in);
+ int (*export_core)(struct shash_desc *desc, void *out);
+ int (*import_core)(struct shash_desc *desc, const void *in);
int (*setkey)(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen);
int (*init_tfm)(struct crypto_shash *tfm);
@@ -238,7 +268,6 @@ struct crypto_ahash {
};
struct crypto_shash {
- unsigned int descsize;
struct crypto_tfm base;
};
@@ -252,6 +281,11 @@ struct crypto_shash {
* CRYPTO_ALG_TYPE_SKCIPHER API applies here as well.
*/
+static inline bool ahash_req_on_stack(struct ahash_request *req)
+{
+ return crypto_req_on_stack(&req->base);
+}
+
static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm)
{
return container_of(tfm, struct crypto_ahash, base);
@@ -459,7 +493,11 @@ int crypto_ahash_finup(struct ahash_request *req);
* -EBUSY if queue is full and request should be resubmitted later;
* other < 0 if an error occurred
*/
-int crypto_ahash_final(struct ahash_request *req);
+static inline int crypto_ahash_final(struct ahash_request *req)
+{
+ req->nbytes = 0;
+ return crypto_ahash_finup(req);
+}
/**
* crypto_ahash_digest() - calculate message digest for a buffer
@@ -548,7 +586,7 @@ int crypto_ahash_update(struct ahash_request *req);
static inline void ahash_request_set_tfm(struct ahash_request *req,
struct crypto_ahash *tfm)
{
- req->base.tfm = crypto_ahash_tfm(tfm);
+ crypto_request_set_tfm(&req->base, crypto_ahash_tfm(tfm));
}
/**
@@ -582,9 +620,12 @@ static inline struct ahash_request *ahash_request_alloc_noprof(
* ahash_request_free() - zeroize and free the request data structure
* @req: request data structure cipher handle to be freed
*/
-static inline void ahash_request_free(struct ahash_request *req)
+void ahash_request_free(struct ahash_request *req);
+
+static inline void ahash_request_zero(struct ahash_request *req)
{
- kfree_sensitive(req);
+ memzero_explicit(req, sizeof(*req) +
+ crypto_ahash_reqsize(crypto_ahash_reqtfm(req)));
}
static inline struct ahash_request *ahash_request_cast(
@@ -623,14 +664,9 @@ static inline void ahash_request_set_callback(struct ahash_request *req,
crypto_completion_t compl,
void *data)
{
- u32 keep = CRYPTO_AHASH_REQ_VIRT;
-
- req->base.complete = compl;
- req->base.data = data;
- flags &= ~keep;
- req->base.flags &= keep;
- req->base.flags |= flags;
- crypto_reqchain_init(&req->base);
+ flags &= ~CRYPTO_AHASH_REQ_PRIVATE;
+ flags |= req->base.flags & CRYPTO_AHASH_REQ_PRIVATE;
+ crypto_request_set_callback(&req->base, flags, compl, data);
}
/**
@@ -679,12 +715,6 @@ static inline void ahash_request_set_virt(struct ahash_request *req,
req->base.flags |= CRYPTO_AHASH_REQ_VIRT;
}
-static inline void ahash_request_chain(struct ahash_request *req,
- struct ahash_request *head)
-{
- crypto_request_chain(&req->base, &head->base);
-}
-
/**
* DOC: Synchronous Message Digest API
*
@@ -820,7 +850,7 @@ static inline void crypto_shash_clear_flags(struct crypto_shash *tfm, u32 flags)
*/
static inline unsigned int crypto_shash_descsize(struct crypto_shash *tfm)
{
- return tfm->descsize;
+ return crypto_shash_alg(tfm)->descsize;
}
static inline void *shash_desc_ctx(struct shash_desc *desc)
@@ -838,7 +868,7 @@ static inline void *shash_desc_ctx(struct shash_desc *desc)
* cipher handle must point to a keyed message digest cipher in order for this
* function to succeed.
*
- * Context: Any context.
+ * Context: Softirq or process context.
* Return: 0 if the setting of the key was successful; < 0 if an error occurred
*/
int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
@@ -855,7 +885,7 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
* crypto_shash_update and crypto_shash_final. The parameters have the same
* meaning as discussed for those separate three functions.
*
- * Context: Any context.
+ * Context: Softirq or process context.
* Return: 0 if the message digest creation was successful; < 0 if an error
* occurred
*/
@@ -875,12 +905,15 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
* directly, and it allocates a hash descriptor on the stack internally.
* Note that this stack allocation may be fairly large.
*
- * Context: Any context.
+ * Context: Softirq or process context.
* Return: 0 on success; < 0 if an error occurred.
*/
int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data,
unsigned int len, u8 *out);
+int crypto_hash_digest(struct crypto_ahash *tfm, const u8 *data,
+ unsigned int len, u8 *out);
+
/**
* crypto_shash_export() - extract operational state for message digest
* @desc: reference to the operational state handle whose state is exported
@@ -890,7 +923,7 @@ int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data,
* caller-allocated output buffer out which must have sufficient size (e.g. by
* calling crypto_shash_descsize).
*
- * Context: Any context.
+ * Context: Softirq or process context.
* Return: 0 if the export creation was successful; < 0 if an error occurred
*/
int crypto_shash_export(struct shash_desc *desc, void *out);
@@ -904,7 +937,7 @@ int crypto_shash_export(struct shash_desc *desc, void *out);
* the input buffer. That buffer should have been generated with the
* crypto_ahash_export function.
*
- * Context: Any context.
+ * Context: Softirq or process context.
* Return: 0 if the import was successful; < 0 if an error occurred
*/
int crypto_shash_import(struct shash_desc *desc, const void *in);
@@ -917,19 +950,29 @@ int crypto_shash_import(struct shash_desc *desc, const void *in);
* operational state handle. Any potentially existing state created by
* previous operations is discarded.
*
- * Context: Any context.
+ * Context: Softirq or process context.
* Return: 0 if the message digest initialization was successful; < 0 if an
* error occurred
*/
-static inline int crypto_shash_init(struct shash_desc *desc)
-{
- struct crypto_shash *tfm = desc->tfm;
+int crypto_shash_init(struct shash_desc *desc);
- if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
- return -ENOKEY;
-
- return crypto_shash_alg(tfm)->init(desc);
-}
+/**
+ * crypto_shash_finup() - calculate message digest of buffer
+ * @desc: see crypto_shash_final()
+ * @data: see crypto_shash_update()
+ * @len: see crypto_shash_update()
+ * @out: see crypto_shash_final()
+ *
+ * This function is a "short-hand" for the function calls of
+ * crypto_shash_update and crypto_shash_final. The parameters have the same
+ * meaning as discussed for those separate functions.
+ *
+ * Context: Softirq or process context.
+ * Return: 0 if the message digest creation was successful; < 0 if an error
+ * occurred
+ */
+int crypto_shash_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out);
/**
* crypto_shash_update() - add data to message digest for processing
@@ -939,12 +982,15 @@ static inline int crypto_shash_init(struct shash_desc *desc)
*
* Updates the message digest state of the operational state handle.
*
- * Context: Any context.
+ * Context: Softirq or process context.
* Return: 0 if the message digest update was successful; < 0 if an error
* occurred
*/
-int crypto_shash_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
+static inline int crypto_shash_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ return crypto_shash_finup(desc, data, len, NULL);
+}
/**
* crypto_shash_final() - calculate message digest
@@ -956,29 +1002,14 @@ int crypto_shash_update(struct shash_desc *desc, const u8 *data,
* into the output buffer. The caller must ensure that the output buffer is
* large enough by using crypto_shash_digestsize.
*
- * Context: Any context.
- * Return: 0 if the message digest creation was successful; < 0 if an error
- * occurred
- */
-int crypto_shash_final(struct shash_desc *desc, u8 *out);
-
-/**
- * crypto_shash_finup() - calculate message digest of buffer
- * @desc: see crypto_shash_final()
- * @data: see crypto_shash_update()
- * @len: see crypto_shash_update()
- * @out: see crypto_shash_final()
- *
- * This function is a "short-hand" for the function calls of
- * crypto_shash_update and crypto_shash_final. The parameters have the same
- * meaning as discussed for those separate functions.
- *
- * Context: Any context.
+ * Context: Softirq or process context.
* Return: 0 if the message digest creation was successful; < 0 if an error
* occurred
*/
-int crypto_shash_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out);
+static inline int crypto_shash_final(struct shash_desc *desc, u8 *out)
+{
+ return crypto_shash_finup(desc, NULL, 0, out);
+}
static inline void shash_desc_zero(struct shash_desc *desc)
{
@@ -986,14 +1017,25 @@ static inline void shash_desc_zero(struct shash_desc *desc)
sizeof(*desc) + crypto_shash_descsize(desc->tfm));
}
-static inline int ahash_request_err(struct ahash_request *req)
+static inline bool ahash_is_async(struct crypto_ahash *tfm)
{
- return req->base.err;
+ return crypto_tfm_is_async(&tfm->base);
}
-static inline bool ahash_is_async(struct crypto_ahash *tfm)
+static inline struct ahash_request *ahash_request_on_stack_init(
+ char *buf, struct crypto_ahash *tfm)
{
- return crypto_tfm_is_async(&tfm->base);
+ struct ahash_request *req = (void *)buf;
+
+ crypto_stack_request_init(&req->base, crypto_ahash_tfm(tfm));
+ return req;
+}
+
+static inline struct ahash_request *ahash_request_clone(
+ struct ahash_request *req, size_t total, gfp_t gfp)
+{
+ return container_of(crypto_request_clone(&req->base, total, gfp),
+ struct ahash_request, base);
}
#endif /* _CRYPTO_HASH_H */
diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h
index aaf59f3236fa..ffffd88bbbad 100644
--- a/include/crypto/internal/acompress.h
+++ b/include/crypto/internal/acompress.h
@@ -11,12 +11,17 @@
#include <crypto/acompress.h>
#include <crypto/algapi.h>
+#include <crypto/scatterwalk.h>
+#include <linux/compiler_types.h>
+#include <linux/cpumask_types.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue_types.h>
-#define ACOMP_REQUEST_ON_STACK(name, tfm) \
+#define ACOMP_FBREQ_ON_STACK(name, req) \
char __##name##_req[sizeof(struct acomp_req) + \
MAX_SYNC_COMP_REQSIZE] CRYPTO_MINALIGN_ATTR; \
- struct acomp_req *name = acomp_request_on_stack_init( \
- __##name##_req, (tfm), 0, true)
+ struct acomp_req *name = acomp_fbreq_on_stack_init( \
+ __##name##_req, (req))
/**
* struct acomp_alg - asynchronous compression algorithm
@@ -35,9 +40,7 @@
* counterpart to @init, used to remove various changes set in
* @init.
*
- * @reqsize: Context size for (de)compression requests
* @base: Common crypto API algorithm data structure
- * @stream: Per-cpu memory for algorithm
* @calg: Cmonn algorithm data structure shared with scomp
*/
struct acomp_alg {
@@ -46,14 +49,61 @@ struct acomp_alg {
int (*init)(struct crypto_acomp *tfm);
void (*exit)(struct crypto_acomp *tfm);
- unsigned int reqsize;
-
union {
struct COMP_ALG_COMMON;
struct comp_alg_common calg;
};
};
+struct crypto_acomp_stream {
+ spinlock_t lock;
+ void *ctx;
+};
+
+struct crypto_acomp_streams {
+ /* These must come first because of struct scomp_alg. */
+ void *(*alloc_ctx)(void);
+ union {
+ void (*free_ctx)(void *);
+ void (*cfree_ctx)(const void *);
+ };
+
+ struct crypto_acomp_stream __percpu *streams;
+ struct work_struct stream_work;
+ cpumask_t stream_want;
+};
+
+struct acomp_walk {
+ union {
+ /* Virtual address of the source. */
+ struct {
+ struct {
+ const void *const addr;
+ } virt;
+ } src;
+
+ /* Private field for the API, do not use. */
+ struct scatter_walk in;
+ };
+
+ union {
+ /* Virtual address of the destination. */
+ struct {
+ struct {
+ void *const addr;
+ } virt;
+ } dst;
+
+ /* Private field for the API, do not use. */
+ struct scatter_walk out;
+ };
+
+ unsigned int slen;
+ unsigned int dlen;
+
+ int flags;
+};
+
/*
* Transform internal helpers.
*/
@@ -98,17 +148,10 @@ void crypto_unregister_acomp(struct acomp_alg *alg);
int crypto_register_acomps(struct acomp_alg *algs, int count);
void crypto_unregister_acomps(struct acomp_alg *algs, int count);
-static inline bool acomp_request_chained(struct acomp_req *req)
-{
- return crypto_request_chained(&req->base);
-}
-
static inline bool acomp_request_issg(struct acomp_req *req)
{
return !(req->base.flags & (CRYPTO_ACOMP_REQ_SRC_VIRT |
- CRYPTO_ACOMP_REQ_DST_VIRT |
- CRYPTO_ACOMP_REQ_SRC_FOLIO |
- CRYPTO_ACOMP_REQ_DST_FOLIO));
+ CRYPTO_ACOMP_REQ_DST_VIRT));
}
static inline bool acomp_request_src_isvirt(struct acomp_req *req)
@@ -143,19 +186,62 @@ static inline bool acomp_request_isnondma(struct acomp_req *req)
CRYPTO_ACOMP_REQ_DST_NONDMA);
}
-static inline bool acomp_request_src_isfolio(struct acomp_req *req)
+static inline bool crypto_acomp_req_virt(struct crypto_acomp *tfm)
+{
+ return crypto_tfm_req_virt(&tfm->base);
+}
+
+void crypto_acomp_free_streams(struct crypto_acomp_streams *s);
+int crypto_acomp_alloc_streams(struct crypto_acomp_streams *s);
+
+struct crypto_acomp_stream *crypto_acomp_lock_stream_bh(
+ struct crypto_acomp_streams *s) __acquires(stream);
+
+static inline void crypto_acomp_unlock_stream_bh(
+ struct crypto_acomp_stream *stream) __releases(stream)
+{
+ spin_unlock_bh(&stream->lock);
+}
+
+void acomp_walk_done_src(struct acomp_walk *walk, int used);
+void acomp_walk_done_dst(struct acomp_walk *walk, int used);
+int acomp_walk_next_src(struct acomp_walk *walk);
+int acomp_walk_next_dst(struct acomp_walk *walk);
+int acomp_walk_virt(struct acomp_walk *__restrict walk,
+ struct acomp_req *__restrict req, bool atomic);
+
+static inline bool acomp_walk_more_src(const struct acomp_walk *walk, int cur)
+{
+ return walk->slen != cur;
+}
+
+static inline u32 acomp_request_flags(struct acomp_req *req)
{
- return req->base.flags & CRYPTO_ACOMP_REQ_SRC_FOLIO;
+ return crypto_request_flags(&req->base) & ~CRYPTO_ACOMP_REQ_PRIVATE;
}
-static inline bool acomp_request_dst_isfolio(struct acomp_req *req)
+static inline struct crypto_acomp *crypto_acomp_fb(struct crypto_acomp *tfm)
{
- return req->base.flags & CRYPTO_ACOMP_REQ_DST_FOLIO;
+ return __crypto_acomp_tfm(crypto_acomp_tfm(tfm)->fb);
}
-static inline bool crypto_acomp_req_chain(struct crypto_acomp *tfm)
+static inline struct acomp_req *acomp_fbreq_on_stack_init(
+ char *buf, struct acomp_req *old)
{
- return crypto_tfm_req_chain(&tfm->base);
+ struct crypto_acomp *tfm = crypto_acomp_reqtfm(old);
+ struct acomp_req *req = (void *)buf;
+
+ crypto_stack_request_init(&req->base,
+ crypto_acomp_tfm(crypto_acomp_fb(tfm)));
+ acomp_request_set_callback(req, acomp_request_flags(old), NULL, NULL);
+ req->base.flags &= ~CRYPTO_ACOMP_REQ_PRIVATE;
+ req->base.flags |= old->base.flags & CRYPTO_ACOMP_REQ_PRIVATE;
+ req->src = old->src;
+ req->dst = old->dst;
+ req->slen = old->slen;
+ req->dlen = old->dlen;
+
+ return req;
}
#endif
diff --git a/include/crypto/internal/blake2b.h b/include/crypto/internal/blake2b.h
index 982fe5e8471c..3e09e2485306 100644
--- a/include/crypto/internal/blake2b.h
+++ b/include/crypto/internal/blake2b.h
@@ -7,65 +7,36 @@
#ifndef _CRYPTO_INTERNAL_BLAKE2B_H
#define _CRYPTO_INTERNAL_BLAKE2B_H
+#include <asm/byteorder.h>
#include <crypto/blake2b.h>
#include <crypto/internal/hash.h>
+#include <linux/array_size.h>
+#include <linux/compiler.h>
+#include <linux/build_bug.h>
+#include <linux/errno.h>
+#include <linux/math.h>
#include <linux/string.h>
-
-void blake2b_compress_generic(struct blake2b_state *state,
- const u8 *block, size_t nblocks, u32 inc);
+#include <linux/types.h>
static inline void blake2b_set_lastblock(struct blake2b_state *state)
{
state->f[0] = -1;
+ state->f[1] = 0;
}
-typedef void (*blake2b_compress_t)(struct blake2b_state *state,
- const u8 *block, size_t nblocks, u32 inc);
-
-static inline void __blake2b_update(struct blake2b_state *state,
- const u8 *in, size_t inlen,
- blake2b_compress_t compress)
+static inline void blake2b_set_nonlast(struct blake2b_state *state)
{
- const size_t fill = BLAKE2B_BLOCK_SIZE - state->buflen;
-
- if (unlikely(!inlen))
- return;
- if (inlen > fill) {
- memcpy(state->buf + state->buflen, in, fill);
- (*compress)(state, state->buf, 1, BLAKE2B_BLOCK_SIZE);
- state->buflen = 0;
- in += fill;
- inlen -= fill;
- }
- if (inlen > BLAKE2B_BLOCK_SIZE) {
- const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2B_BLOCK_SIZE);
- /* Hash one less (full) block than strictly possible */
- (*compress)(state, in, nblocks - 1, BLAKE2B_BLOCK_SIZE);
- in += BLAKE2B_BLOCK_SIZE * (nblocks - 1);
- inlen -= BLAKE2B_BLOCK_SIZE * (nblocks - 1);
- }
- memcpy(state->buf + state->buflen, in, inlen);
- state->buflen += inlen;
+ state->f[0] = 0;
+ state->f[1] = 0;
}
-static inline void __blake2b_final(struct blake2b_state *state, u8 *out,
- blake2b_compress_t compress)
-{
- int i;
-
- blake2b_set_lastblock(state);
- memset(state->buf + state->buflen, 0,
- BLAKE2B_BLOCK_SIZE - state->buflen); /* Padding */
- (*compress)(state, state->buf, 1, state->buflen);
- for (i = 0; i < ARRAY_SIZE(state->h); i++)
- __cpu_to_le64s(&state->h[i]);
- memcpy(out, state->h, state->outlen);
-}
+typedef void (*blake2b_compress_t)(struct blake2b_state *state,
+ const u8 *block, size_t nblocks, u32 inc);
/* Helper functions for shash implementations of BLAKE2b */
struct blake2b_tfm_ctx {
- u8 key[BLAKE2B_KEY_SIZE];
+ u8 key[BLAKE2B_BLOCK_SIZE];
unsigned int keylen;
};
@@ -74,10 +45,13 @@ static inline int crypto_blake2b_setkey(struct crypto_shash *tfm,
{
struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(tfm);
- if (keylen == 0 || keylen > BLAKE2B_KEY_SIZE)
+ if (keylen > BLAKE2B_KEY_SIZE)
return -EINVAL;
+ BUILD_BUG_ON(BLAKE2B_KEY_SIZE > BLAKE2B_BLOCK_SIZE);
+
memcpy(tctx->key, key, keylen);
+ memset(tctx->key + keylen, 0, BLAKE2B_BLOCK_SIZE - keylen);
tctx->keylen = keylen;
return 0;
@@ -89,26 +63,38 @@ static inline int crypto_blake2b_init(struct shash_desc *desc)
struct blake2b_state *state = shash_desc_ctx(desc);
unsigned int outlen = crypto_shash_digestsize(desc->tfm);
- __blake2b_init(state, outlen, tctx->key, tctx->keylen);
- return 0;
+ __blake2b_init(state, outlen, tctx->keylen);
+ return tctx->keylen ?
+ crypto_shash_update(desc, tctx->key, BLAKE2B_BLOCK_SIZE) : 0;
}
-static inline int crypto_blake2b_update(struct shash_desc *desc,
- const u8 *in, unsigned int inlen,
- blake2b_compress_t compress)
+static inline int crypto_blake2b_update_bo(struct shash_desc *desc,
+ const u8 *in, unsigned int inlen,
+ blake2b_compress_t compress)
{
struct blake2b_state *state = shash_desc_ctx(desc);
- __blake2b_update(state, in, inlen, compress);
- return 0;
+ blake2b_set_nonlast(state);
+ compress(state, in, inlen / BLAKE2B_BLOCK_SIZE, BLAKE2B_BLOCK_SIZE);
+ return inlen - round_down(inlen, BLAKE2B_BLOCK_SIZE);
}
-static inline int crypto_blake2b_final(struct shash_desc *desc, u8 *out,
+static inline int crypto_blake2b_finup(struct shash_desc *desc, const u8 *in,
+ unsigned int inlen, u8 *out,
blake2b_compress_t compress)
{
struct blake2b_state *state = shash_desc_ctx(desc);
+ u8 buf[BLAKE2B_BLOCK_SIZE];
+ int i;
- __blake2b_final(state, out, compress);
+ memcpy(buf, in, inlen);
+ memset(buf + inlen, 0, BLAKE2B_BLOCK_SIZE - inlen);
+ blake2b_set_lastblock(state);
+ compress(state, buf, 1, inlen);
+ for (i = 0; i < ARRAY_SIZE(state->h); i++)
+ __cpu_to_le64s(&state->h[i]);
+ memcpy(out, state->h, crypto_shash_digestsize(desc->tfm));
+ memzero_explicit(buf, sizeof(buf));
return 0;
}
diff --git a/include/crypto/internal/blockhash.h b/include/crypto/internal/blockhash.h
new file mode 100644
index 000000000000..52d9d4c82493
--- /dev/null
+++ b/include/crypto/internal/blockhash.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Handle partial blocks for block hash.
+ *
+ * Copyright (c) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (c) 2025 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+
+#ifndef _CRYPTO_INTERNAL_BLOCKHASH_H
+#define _CRYPTO_INTERNAL_BLOCKHASH_H
+
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define BLOCK_HASH_UPDATE_BASE(block_fn, state, src, nbytes, bs, dv, \
+ buf, buflen) \
+ ({ \
+ typeof(block_fn) *_block_fn = &(block_fn); \
+ typeof(state + 0) _state = (state); \
+ unsigned int _buflen = (buflen); \
+ size_t _nbytes = (nbytes); \
+ unsigned int _bs = (bs); \
+ const u8 *_src = (src); \
+ u8 *_buf = (buf); \
+ while ((_buflen + _nbytes) >= _bs) { \
+ const u8 *data = _src; \
+ size_t len = _nbytes; \
+ size_t blocks; \
+ int remain; \
+ if (_buflen) { \
+ remain = _bs - _buflen; \
+ memcpy(_buf + _buflen, _src, remain); \
+ data = _buf; \
+ len = _bs; \
+ } \
+ remain = len % bs; \
+ blocks = (len - remain) / (dv); \
+ (*_block_fn)(_state, data, blocks); \
+ _src += len - remain - _buflen; \
+ _nbytes -= len - remain - _buflen; \
+ _buflen = 0; \
+ } \
+ memcpy(_buf + _buflen, _src, _nbytes); \
+ _buflen += _nbytes; \
+ })
+
+#define BLOCK_HASH_UPDATE(block, state, src, nbytes, bs, buf, buflen) \
+ BLOCK_HASH_UPDATE_BASE(block, state, src, nbytes, bs, 1, buf, buflen)
+#define BLOCK_HASH_UPDATE_BLOCKS(block, state, src, nbytes, bs, buf, buflen) \
+ BLOCK_HASH_UPDATE_BASE(block, state, src, nbytes, bs, bs, buf, buflen)
+
+#endif /* _CRYPTO_INTERNAL_BLOCKHASH_H */
diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h
deleted file mode 100644
index b085dc1ac151..000000000000
--- a/include/crypto/internal/chacha.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _CRYPTO_INTERNAL_CHACHA_H
-#define _CRYPTO_INTERNAL_CHACHA_H
-
-#include <crypto/chacha.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/crypto.h>
-
-struct chacha_ctx {
- u32 key[8];
- int nrounds;
-};
-
-static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize, int nrounds)
-{
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- int i;
-
- if (keysize != CHACHA_KEY_SIZE)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
- ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
-
- ctx->nrounds = nrounds;
- return 0;
-}
-
-static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize)
-{
- return chacha_setkey(tfm, key, keysize, 20);
-}
-
-static inline int chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize)
-{
- return chacha_setkey(tfm, key, keysize, 12);
-}
-
-#endif /* _CRYPTO_CHACHA_H */
diff --git a/include/crypto/internal/engine.h b/include/crypto/internal/engine.h
index fbf4be56cf12..b6a4ea2240fc 100644
--- a/include/crypto/internal/engine.h
+++ b/include/crypto/internal/engine.h
@@ -27,10 +27,10 @@ struct device;
* @retry_support: indication that the hardware allows re-execution
* of a failed backlog request
* crypto-engine, in head position to keep order
+ * @rt: whether this queue is set to run as a realtime task
* @list: link with the global crypto engine list
* @queue_lock: spinlock to synchronise access to request queue
* @queue: the crypto queue of the engine
- * @rt: whether this queue is set to run as a realtime task
* @prepare_crypt_hardware: a request will soon arrive from the queue
* so the subsystem requests the driver to prepare the hardware
* by issuing this call
@@ -51,14 +51,13 @@ struct crypto_engine {
bool running;
bool retry_support;
+ bool rt;
struct list_head list;
spinlock_t queue_lock;
struct crypto_queue queue;
struct device *dev;
- bool rt;
-
int (*prepare_crypt_hardware)(struct crypto_engine *engine);
int (*unprepare_crypt_hardware)(struct crypto_engine *engine);
int (*do_batch_requests)(struct crypto_engine *engine);
diff --git a/include/crypto/internal/geniv.h b/include/crypto/internal/geniv.h
index 7fd7126f593a..012f5fb22d43 100644
--- a/include/crypto/internal/geniv.h
+++ b/include/crypto/internal/geniv.h
@@ -15,7 +15,6 @@
struct aead_geniv_ctx {
spinlock_t lock;
struct crypto_aead *child;
- struct crypto_sync_skcipher *sknull;
u8 salt[] __attribute__ ((aligned(__alignof__(u32))));
};
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 052ac7924af3..0f85c543f80b 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -11,6 +11,24 @@
#include <crypto/algapi.h>
#include <crypto/hash.h>
+/* Set this bit to handle partial blocks in the API. */
+#define CRYPTO_AHASH_ALG_BLOCK_ONLY 0x01000000
+
+/* Set this bit if final requires at least one byte. */
+#define CRYPTO_AHASH_ALG_FINAL_NONZERO 0x02000000
+
+/* Set this bit if finup can deal with multiple blocks. */
+#define CRYPTO_AHASH_ALG_FINUP_MAX 0x04000000
+
+/* This bit is set by the Crypto API if export_core is not supported. */
+#define CRYPTO_AHASH_ALG_NO_EXPORT_CORE 0x08000000
+
+#define HASH_FBREQ_ON_STACK(name, req) \
+ char __##name##_req[sizeof(struct ahash_request) + \
+ MAX_SYNC_HASH_REQSIZE] CRYPTO_MINALIGN_ATTR; \
+ struct ahash_request *name = ahash_fbreq_on_stack_init( \
+ __##name##_req, (req))
+
struct ahash_request;
struct ahash_instance {
@@ -49,6 +67,7 @@ int crypto_register_ahashes(struct ahash_alg *algs, int count);
void crypto_unregister_ahashes(struct ahash_alg *algs, int count);
int ahash_register_instance(struct crypto_template *tmpl,
struct ahash_instance *inst);
+void ahash_free_singlespawn_instance(struct ahash_instance *inst);
int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen);
@@ -58,12 +77,20 @@ static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
return alg->setkey != shash_no_setkey;
}
+bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg);
+
static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg)
{
return crypto_shash_alg_has_setkey(alg) &&
!(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY);
}
+static inline bool crypto_hash_alg_needs_key(struct hash_alg_common *alg)
+{
+ return crypto_hash_alg_has_setkey(alg) &&
+ !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY);
+}
+
int crypto_grab_ahash(struct crypto_ahash_spawn *spawn,
struct crypto_instance *inst,
const char *name, u32 type, u32 mask);
@@ -187,7 +214,7 @@ static inline void ahash_request_complete(struct ahash_request *req, int err)
static inline u32 ahash_request_flags(struct ahash_request *req)
{
- return req->base.flags;
+ return crypto_request_flags(&req->base) & ~CRYPTO_AHASH_REQ_PRIVATE;
}
static inline struct crypto_ahash *crypto_spawn_ahash(
@@ -247,20 +274,96 @@ static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm)
return container_of(tfm, struct crypto_shash, base);
}
-static inline bool ahash_request_chained(struct ahash_request *req)
+static inline bool ahash_request_isvirt(struct ahash_request *req)
{
- return false;
+ return req->base.flags & CRYPTO_AHASH_REQ_VIRT;
}
-static inline bool ahash_request_isvirt(struct ahash_request *req)
+static inline bool crypto_ahash_req_virt(struct crypto_ahash *tfm)
{
- return req->base.flags & CRYPTO_AHASH_REQ_VIRT;
+ return crypto_tfm_req_virt(&tfm->base);
}
-static inline bool crypto_ahash_req_chain(struct crypto_ahash *tfm)
+static inline struct crypto_ahash *crypto_ahash_fb(struct crypto_ahash *tfm)
{
- return crypto_tfm_req_chain(&tfm->base);
+ return __crypto_ahash_cast(crypto_ahash_tfm(tfm)->fb);
}
+static inline struct ahash_request *ahash_fbreq_on_stack_init(
+ char *buf, struct ahash_request *old)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(old);
+ struct ahash_request *req = (void *)buf;
+
+ crypto_stack_request_init(&req->base,
+ crypto_ahash_tfm(crypto_ahash_fb(tfm)));
+ ahash_request_set_callback(req, ahash_request_flags(old), NULL, NULL);
+ req->base.flags &= ~CRYPTO_AHASH_REQ_PRIVATE;
+ req->base.flags |= old->base.flags & CRYPTO_AHASH_REQ_PRIVATE;
+ req->src = old->src;
+ req->result = old->result;
+ req->nbytes = old->nbytes;
+
+ return req;
+}
+
+/* Return the state size without partial block for block-only algorithms. */
+static inline unsigned int crypto_shash_coresize(struct crypto_shash *tfm)
+{
+ return crypto_shash_statesize(tfm) - crypto_shash_blocksize(tfm) - 1;
+}
+
+/* This can only be used if the request was never cloned. */
+#define HASH_REQUEST_ZERO(name) \
+ memzero_explicit(__##name##_req, sizeof(__##name##_req))
+
+/**
+ * crypto_ahash_export_core() - extract core state for message digest
+ * @req: reference to the ahash_request handle whose state is exported
+ * @out: output buffer of sufficient size that can hold the hash state
+ *
+ * Export the hash state without the partial block buffer.
+ *
+ * Context: Softirq or process context.
+ * Return: 0 if the export creation was successful; < 0 if an error occurred
+ */
+int crypto_ahash_export_core(struct ahash_request *req, void *out);
+
+/**
+ * crypto_ahash_import_core() - import core state
+ * @req: reference to ahash_request handle the state is imported into
+ * @in: buffer holding the state
+ *
+ * Import the hash state without the partial block buffer.
+ *
+ * Context: Softirq or process context.
+ * Return: 0 if the import was successful; < 0 if an error occurred
+ */
+int crypto_ahash_import_core(struct ahash_request *req, const void *in);
+
+/**
+ * crypto_shash_export_core() - extract core state for message digest
+ * @desc: reference to the operational state handle whose state is exported
+ * @out: output buffer of sufficient size that can hold the hash state
+ *
+ * Export the hash state without the partial block buffer.
+ *
+ * Context: Softirq or process context.
+ * Return: 0 if the export creation was successful; < 0 if an error occurred
+ */
+int crypto_shash_export_core(struct shash_desc *desc, void *out);
+
+/**
+ * crypto_shash_import_core() - import core state
+ * @desc: reference to the operational state handle the state imported into
+ * @in: buffer holding the state
+ *
+ * Import the hash state without the partial block buffer.
+ *
+ * Context: Softirq or process context.
+ * Return: 0 if the import was successful; < 0 if an error occurred
+ */
+int crypto_shash_import_core(struct shash_desc *desc, const void *in);
+
#endif /* _CRYPTO_INTERNAL_HASH_H */
diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
index e614594f88c1..c60315f47562 100644
--- a/include/crypto/internal/poly1305.h
+++ b/include/crypto/internal/poly1305.h
@@ -6,9 +6,8 @@
#ifndef _CRYPTO_INTERNAL_POLY1305_H
#define _CRYPTO_INTERNAL_POLY1305_H
-#include <linux/unaligned.h>
-#include <linux/types.h>
#include <crypto/poly1305.h>
+#include <linux/types.h>
/*
* Poly1305 core functions. These only accept whole blocks; the caller must
@@ -31,4 +30,29 @@ void poly1305_core_blocks(struct poly1305_state *state,
void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4],
void *dst);
+void poly1305_block_init_arch(struct poly1305_block_state *state,
+ const u8 raw_key[POLY1305_BLOCK_SIZE]);
+void poly1305_block_init_generic(struct poly1305_block_state *state,
+ const u8 raw_key[POLY1305_BLOCK_SIZE]);
+void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src,
+ unsigned int len, u32 padbit);
+
+static inline void poly1305_blocks_generic(struct poly1305_block_state *state,
+ const u8 *src, unsigned int len,
+ u32 padbit)
+{
+ poly1305_core_blocks(&state->h, &state->core_r, src,
+ len / POLY1305_BLOCK_SIZE, padbit);
+}
+
+void poly1305_emit_arch(const struct poly1305_state *state,
+ u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]);
+
+static inline void poly1305_emit_generic(const struct poly1305_state *state,
+ u8 digest[POLY1305_DIGEST_SIZE],
+ const u32 nonce[4])
+{
+ poly1305_core_emit(state, nonce, digest);
+}
+
#endif
diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h
index f25aa2ea3b48..533d6c16a491 100644
--- a/include/crypto/internal/scompress.h
+++ b/include/crypto/internal/scompress.h
@@ -9,10 +9,7 @@
#ifndef _CRYPTO_SCOMP_INT_H
#define _CRYPTO_SCOMP_INT_H
-#include <crypto/acompress.h>
-#include <crypto/algapi.h>
-
-struct acomp_req;
+#include <crypto/internal/acompress.h>
struct crypto_scomp {
struct crypto_tfm base;
@@ -26,12 +23,10 @@ struct crypto_scomp {
* @compress: Function performs a compress operation
* @decompress: Function performs a de-compress operation
* @base: Common crypto API algorithm data structure
- * @stream: Per-cpu memory for algorithm
+ * @streams: Per-cpu memory for algorithm
* @calg: Cmonn algorithm data structure shared with acomp
*/
struct scomp_alg {
- void *(*alloc_ctx)(void);
- void (*free_ctx)(void *ctx);
int (*compress)(struct crypto_scomp *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen,
void *ctx);
@@ -40,6 +35,14 @@ struct scomp_alg {
void *ctx);
union {
+ struct {
+ void *(*alloc_ctx)(void);
+ void (*free_ctx)(void *ctx);
+ };
+ struct crypto_acomp_streams streams;
+ };
+
+ union {
struct COMP_ALG_COMMON;
struct comp_alg_common calg;
};
diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h
new file mode 100644
index 000000000000..b9bccd3ff57f
--- /dev/null
+++ b/include/crypto/internal/sha2.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _CRYPTO_INTERNAL_SHA2_H
+#define _CRYPTO_INTERNAL_SHA2_H
+
+#include <crypto/internal/simd.h>
+#include <crypto/sha2.h>
+#include <linux/compiler_attributes.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/unaligned.h>
+
+#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256)
+bool sha256_is_arch_optimized(void);
+#else
+static inline bool sha256_is_arch_optimized(void)
+{
+ return false;
+}
+#endif
+void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks);
+
+static inline void sha256_choose_blocks(
+ u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks,
+ bool force_generic, bool force_simd)
+{
+ if (!IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) || force_generic)
+ sha256_blocks_generic(state, data, nblocks);
+ else if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD) &&
+ (force_simd || crypto_simd_usable()))
+ sha256_blocks_simd(state, data, nblocks);
+ else
+ sha256_blocks_arch(state, data, nblocks);
+}
+
+static __always_inline void sha256_finup(
+ struct crypto_sha256_state *sctx, u8 buf[SHA256_BLOCK_SIZE],
+ size_t len, u8 out[SHA256_DIGEST_SIZE], size_t digest_size,
+ bool force_generic, bool force_simd)
+{
+ const size_t bit_offset = SHA256_BLOCK_SIZE - 8;
+ __be64 *bits = (__be64 *)&buf[bit_offset];
+ int i;
+
+ buf[len++] = 0x80;
+ if (len > bit_offset) {
+ memset(&buf[len], 0, SHA256_BLOCK_SIZE - len);
+ sha256_choose_blocks(sctx->state, buf, 1, force_generic,
+ force_simd);
+ len = 0;
+ }
+
+ memset(&buf[len], 0, bit_offset - len);
+ *bits = cpu_to_be64(sctx->count << 3);
+ sha256_choose_blocks(sctx->state, buf, 1, force_generic, force_simd);
+
+ for (i = 0; i < digest_size; i += 4)
+ put_unaligned_be32(sctx->state[i / 4], out + i);
+}
+
+#endif /* _CRYPTO_INTERNAL_SHA2_H */
diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h
index be97b97a75dd..7e7f1ac3b7fd 100644
--- a/include/crypto/internal/simd.h
+++ b/include/crypto/internal/simd.h
@@ -6,6 +6,7 @@
#ifndef _CRYPTO_INTERNAL_SIMD_H
#define _CRYPTO_INTERNAL_SIMD_H
+#include <asm/simd.h>
#include <linux/percpu.h>
#include <linux/types.h>
@@ -43,14 +44,9 @@ void simd_unregister_aeads(struct aead_alg *algs, int count,
*
* This delegates to may_use_simd(), except that this also returns false if SIMD
* in crypto code has been temporarily disabled on this CPU by the crypto
- * self-tests, in order to test the no-SIMD fallback code. This override is
- * currently limited to configurations where the extra self-tests are enabled,
- * because it might be a bit too invasive to be part of the regular self-tests.
- *
- * This is a macro so that <asm/simd.h>, which some architectures don't have,
- * doesn't have to be included directly here.
+ * self-tests, in order to test the no-SIMD fallback code.
*/
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
+#ifdef CONFIG_CRYPTO_SELFTESTS
DECLARE_PER_CPU(bool, crypto_simd_disabled_for_test);
#define crypto_simd_usable() \
(may_use_simd() && !this_cpu_read(crypto_simd_disabled_for_test))
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index a958ab0636ad..d5aa535263f6 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -10,6 +10,7 @@
#include <crypto/algapi.h>
#include <crypto/internal/cipher.h>
+#include <crypto/scatterwalk.h>
#include <crypto/skcipher.h>
#include <linux/types.h>
@@ -54,48 +55,6 @@ struct crypto_lskcipher_spawn {
struct crypto_spawn base;
};
-struct skcipher_walk {
- union {
- /* Virtual address of the source. */
- struct {
- struct {
- const void *const addr;
- } virt;
- } src;
-
- /* Private field for the API, do not use. */
- struct scatter_walk in;
- };
-
- unsigned int nbytes;
-
- union {
- /* Virtual address of the destination. */
- struct {
- struct {
- void *const addr;
- } virt;
- } dst;
-
- /* Private field for the API, do not use. */
- struct scatter_walk out;
- };
-
- unsigned int total;
-
- u8 *page;
- u8 *buffer;
- u8 *oiv;
- void *iv;
-
- unsigned int ivsize;
-
- int flags;
- unsigned int blocksize;
- unsigned int stride;
- unsigned int alignmask;
-};
-
static inline struct crypto_instance *skcipher_crypto_instance(
struct skcipher_instance *inst)
{
@@ -212,7 +171,6 @@ void crypto_unregister_lskciphers(struct lskcipher_alg *algs, int count);
int lskcipher_register_instance(struct crypto_template *tmpl,
struct lskcipher_instance *inst);
-int skcipher_walk_done(struct skcipher_walk *walk, int res);
int skcipher_walk_virt(struct skcipher_walk *__restrict walk,
struct skcipher_request *__restrict req,
bool atomic);
@@ -223,11 +181,6 @@ int skcipher_walk_aead_decrypt(struct skcipher_walk *__restrict walk,
struct aead_request *__restrict req,
bool atomic);
-static inline void skcipher_walk_abort(struct skcipher_walk *walk)
-{
- skcipher_walk_done(walk, -ECANCELED);
-}
-
static inline void *crypto_skcipher_ctx(struct crypto_skcipher *tfm)
{
return crypto_tfm_ctx(&tfm->base);
diff --git a/include/crypto/md5.h b/include/crypto/md5.h
index cf9e9dec3d21..198b5d69b92f 100644
--- a/include/crypto/md5.h
+++ b/include/crypto/md5.h
@@ -8,6 +8,7 @@
#define MD5_HMAC_BLOCK_SIZE 64
#define MD5_BLOCK_WORDS 16
#define MD5_HASH_WORDS 4
+#define MD5_STATE_SIZE 24
#define MD5_H0 0x67452301UL
#define MD5_H1 0xefcdab89UL
@@ -18,8 +19,8 @@ extern const u8 md5_zero_message_hash[MD5_DIGEST_SIZE];
struct md5_state {
u32 hash[MD5_HASH_WORDS];
- u32 block[MD5_BLOCK_WORDS];
u64 byte_count;
+ u32 block[MD5_BLOCK_WORDS];
};
#endif
diff --git a/include/crypto/null.h b/include/crypto/null.h
index 0ef577cc00e3..1c66abf9de3b 100644
--- a/include/crypto/null.h
+++ b/include/crypto/null.h
@@ -9,7 +9,4 @@
#define NULL_DIGEST_SIZE 0
#define NULL_IV_SIZE 0
-struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void);
-void crypto_put_default_null_skcipher(void);
-
#endif
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index 090692ec3bc7..e54abda8cfe9 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -7,7 +7,6 @@
#define _CRYPTO_POLY1305_H
#include <linux/types.h>
-#include <linux/crypto.h>
#define POLY1305_BLOCK_SIZE 16
#define POLY1305_KEY_SIZE 32
@@ -38,17 +37,8 @@ struct poly1305_state {
};
};
-struct poly1305_desc_ctx {
- /* partial buffer */
- u8 buf[POLY1305_BLOCK_SIZE];
- /* bytes used in partial buffer */
- unsigned int buflen;
- /* how many keys have been set in r[] */
- unsigned short rset;
- /* whether s[] has been set */
- bool sset;
- /* finalize key */
- u32 s[4];
+/* Combined state for block function. */
+struct poly1305_block_state {
/* accumulator */
struct poly1305_state h;
/* key */
@@ -58,42 +48,29 @@ struct poly1305_desc_ctx {
};
};
-void poly1305_init_arch(struct poly1305_desc_ctx *desc,
- const u8 key[POLY1305_KEY_SIZE]);
-void poly1305_init_generic(struct poly1305_desc_ctx *desc,
- const u8 key[POLY1305_KEY_SIZE]);
-
-static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key)
-{
- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
- poly1305_init_arch(desc, key);
- else
- poly1305_init_generic(desc, key);
-}
-
-void poly1305_update_arch(struct poly1305_desc_ctx *desc, const u8 *src,
- unsigned int nbytes);
-void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src,
- unsigned int nbytes);
-
-static inline void poly1305_update(struct poly1305_desc_ctx *desc,
- const u8 *src, unsigned int nbytes)
-{
- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
- poly1305_update_arch(desc, src, nbytes);
- else
- poly1305_update_generic(desc, src, nbytes);
-}
+struct poly1305_desc_ctx {
+ /* partial buffer */
+ u8 buf[POLY1305_BLOCK_SIZE];
+ /* bytes used in partial buffer */
+ unsigned int buflen;
+ /* finalize key */
+ u32 s[4];
+ struct poly1305_block_state state;
+};
-void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest);
-void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *digest);
+void poly1305_init(struct poly1305_desc_ctx *desc,
+ const u8 key[POLY1305_KEY_SIZE]);
+void poly1305_update(struct poly1305_desc_ctx *desc,
+ const u8 *src, unsigned int nbytes);
+void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest);
-static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest)
+#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)
+bool poly1305_is_arch_optimized(void);
+#else
+static inline bool poly1305_is_arch_optimized(void)
{
- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
- poly1305_final_arch(desc, digest);
- else
- poly1305_final_generic(desc, digest);
+ return false;
}
+#endif
#endif
diff --git a/include/crypto/polyval.h b/include/crypto/polyval.h
index 1d630f371f77..d2e63743e592 100644
--- a/include/crypto/polyval.h
+++ b/include/crypto/polyval.h
@@ -8,15 +8,7 @@
#ifndef _CRYPTO_POLYVAL_H
#define _CRYPTO_POLYVAL_H
-#include <linux/types.h>
-#include <linux/crypto.h>
-
#define POLYVAL_BLOCK_SIZE 16
#define POLYVAL_DIGEST_SIZE 16
-void polyval_mul_non4k(u8 *op1, const u8 *op2);
-
-void polyval_update_non4k(const u8 *key, const u8 *in,
- size_t nblocks, u8 *accumulator);
-
#endif
diff --git a/include/crypto/rng.h b/include/crypto/rng.h
index 5ac4388f50e1..f8224cc390f8 100644
--- a/include/crypto/rng.h
+++ b/include/crypto/rng.h
@@ -102,12 +102,10 @@ static inline struct rng_alg *__crypto_rng_alg(struct crypto_alg *alg)
}
/**
- * crypto_rng_alg - obtain name of RNG
- * @tfm: cipher handle
- *
- * Return the generic name (cra_name) of the initialized random number generator
+ * crypto_rng_alg() - obtain 'struct rng_alg' pointer from RNG handle
+ * @tfm: RNG handle
*
- * Return: generic name string
+ * Return: Pointer to 'struct rng_alg', derived from @tfm RNG handle
*/
static inline struct rng_alg *crypto_rng_alg(struct crypto_rng *tfm)
{
diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index 94a8585f26b2..15ab743f68c8 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -11,11 +11,64 @@
#ifndef _CRYPTO_SCATTERWALK_H
#define _CRYPTO_SCATTERWALK_H
-#include <crypto/algapi.h>
-
+#include <linux/errno.h>
#include <linux/highmem.h>
#include <linux/mm.h>
#include <linux/scatterlist.h>
+#include <linux/types.h>
+
+struct scatter_walk {
+ /* Must be the first member, see struct skcipher_walk. */
+ union {
+ void *const addr;
+
+ /* Private API field, do not touch. */
+ union crypto_no_such_thing *__addr;
+ };
+ struct scatterlist *sg;
+ unsigned int offset;
+};
+
+struct skcipher_walk {
+ union {
+ /* Virtual address of the source. */
+ struct {
+ struct {
+ const void *const addr;
+ } virt;
+ } src;
+
+ /* Private field for the API, do not use. */
+ struct scatter_walk in;
+ };
+
+ union {
+ /* Virtual address of the destination. */
+ struct {
+ struct {
+ void *const addr;
+ } virt;
+ } dst;
+
+ /* Private field for the API, do not use. */
+ struct scatter_walk out;
+ };
+
+ unsigned int nbytes;
+ unsigned int total;
+
+ u8 *page;
+ u8 *buffer;
+ u8 *oiv;
+ void *iv;
+
+ unsigned int ivsize;
+
+ int flags;
+ unsigned int blocksize;
+ unsigned int stride;
+ unsigned int alignmask;
+};
static inline void scatterwalk_crypto_chain(struct scatterlist *head,
struct scatterlist *sg, int num)
@@ -243,4 +296,12 @@ struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
struct scatterlist *src,
unsigned int len);
+int skcipher_walk_first(struct skcipher_walk *walk, bool atomic);
+int skcipher_walk_done(struct skcipher_walk *walk, int res);
+
+static inline void skcipher_walk_abort(struct skcipher_walk *walk)
+{
+ skcipher_walk_done(walk, -ECANCELED);
+}
+
#endif /* _CRYPTO_SCATTERWALK_H */
diff --git a/include/crypto/sha1.h b/include/crypto/sha1.h
index 044ecea60ac8..f48230b1413c 100644
--- a/include/crypto/sha1.h
+++ b/include/crypto/sha1.h
@@ -10,6 +10,7 @@
#define SHA1_DIGEST_SIZE 20
#define SHA1_BLOCK_SIZE 64
+#define SHA1_STATE_SIZE offsetof(struct sha1_state, buffer)
#define SHA1_H0 0x67452301UL
#define SHA1_H1 0xefcdab89UL
@@ -25,14 +26,6 @@ struct sha1_state {
u8 buffer[SHA1_BLOCK_SIZE];
};
-struct shash_desc;
-
-extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
-
-extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *hash);
-
/*
* An implementation of SHA-1's compression function. Don't use in new code!
* You shouldn't be using SHA-1, and even if you *have* to use SHA-1, this isn't
diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h
index 0c342ed0d038..62701d136c79 100644
--- a/include/crypto/sha1_base.h
+++ b/include/crypto/sha1_base.h
@@ -10,10 +10,9 @@
#include <crypto/internal/hash.h>
#include <crypto/sha1.h>
-#include <linux/crypto.h>
-#include <linux/module.h>
+#include <linux/math.h>
#include <linux/string.h>
-
+#include <linux/types.h>
#include <linux/unaligned.h>
typedef void (sha1_block_fn)(struct sha1_state *sst, u8 const *src, int blocks);
@@ -32,63 +31,38 @@ static inline int sha1_base_init(struct shash_desc *desc)
return 0;
}
-static inline int sha1_base_do_update(struct shash_desc *desc,
- const u8 *data,
- unsigned int len,
- sha1_block_fn *block_fn)
+static inline int sha1_base_do_update_blocks(struct shash_desc *desc,
+ const u8 *data,
+ unsigned int len,
+ sha1_block_fn *block_fn)
{
+ unsigned int remain = len - round_down(len, SHA1_BLOCK_SIZE);
struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-
- sctx->count += len;
-
- if (unlikely((partial + len) >= SHA1_BLOCK_SIZE)) {
- int blocks;
-
- if (partial) {
- int p = SHA1_BLOCK_SIZE - partial;
-
- memcpy(sctx->buffer + partial, data, p);
- data += p;
- len -= p;
-
- block_fn(sctx, sctx->buffer, 1);
- }
- blocks = len / SHA1_BLOCK_SIZE;
- len %= SHA1_BLOCK_SIZE;
-
- if (blocks) {
- block_fn(sctx, data, blocks);
- data += blocks * SHA1_BLOCK_SIZE;
- }
- partial = 0;
- }
- if (len)
- memcpy(sctx->buffer + partial, data, len);
-
- return 0;
+ sctx->count += len - remain;
+ block_fn(sctx, data, len / SHA1_BLOCK_SIZE);
+ return remain;
}
-static inline int sha1_base_do_finalize(struct shash_desc *desc,
- sha1_block_fn *block_fn)
+static inline int sha1_base_do_finup(struct shash_desc *desc,
+ const u8 *src, unsigned int len,
+ sha1_block_fn *block_fn)
{
- const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
+ unsigned int bit_offset = SHA1_BLOCK_SIZE / 8 - 1;
struct sha1_state *sctx = shash_desc_ctx(desc);
- __be64 *bits = (__be64 *)(sctx->buffer + bit_offset);
- unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-
- sctx->buffer[partial++] = 0x80;
- if (partial > bit_offset) {
- memset(sctx->buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial);
- partial = 0;
-
- block_fn(sctx, sctx->buffer, 1);
- }
-
- memset(sctx->buffer + partial, 0x0, bit_offset - partial);
- *bits = cpu_to_be64(sctx->count << 3);
- block_fn(sctx, sctx->buffer, 1);
+ union {
+ __be64 b64[SHA1_BLOCK_SIZE / 4];
+ u8 u8[SHA1_BLOCK_SIZE * 2];
+ } block = {};
+
+ if (len >= bit_offset * 8)
+ bit_offset += SHA1_BLOCK_SIZE / 8;
+ memcpy(&block, src, len);
+ block.u8[len] = 0x80;
+ sctx->count += len;
+ block.b64[bit_offset] = cpu_to_be64(sctx->count << 3);
+ block_fn(sctx, block.u8, (bit_offset + 1) * 8 / SHA1_BLOCK_SIZE);
+ memzero_explicit(&block, sizeof(block));
return 0;
}
@@ -102,7 +76,6 @@ static inline int sha1_base_finish(struct shash_desc *desc, u8 *out)
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], digest++);
- memzero_explicit(sctx, sizeof(*sctx));
return 0;
}
diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h
index b9e9281d76c9..4912572578dc 100644
--- a/include/crypto/sha2.h
+++ b/include/crypto/sha2.h
@@ -13,12 +13,14 @@
#define SHA256_DIGEST_SIZE 32
#define SHA256_BLOCK_SIZE 64
+#define SHA256_STATE_WORDS 8
#define SHA384_DIGEST_SIZE 48
#define SHA384_BLOCK_SIZE 128
#define SHA512_DIGEST_SIZE 64
#define SHA512_BLOCK_SIZE 128
+#define SHA512_STATE_SIZE 80
#define SHA224_H0 0xc1059ed8UL
#define SHA224_H1 0x367cd507UL
@@ -64,9 +66,19 @@ extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE];
extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE];
-struct sha256_state {
- u32 state[SHA256_DIGEST_SIZE / 4];
+struct crypto_sha256_state {
+ u32 state[SHA256_STATE_WORDS];
u64 count;
+};
+
+struct sha256_state {
+ union {
+ struct crypto_sha256_state ctx;
+ struct {
+ u32 state[SHA256_STATE_WORDS];
+ u64 count;
+ };
+ };
u8 buf[SHA256_BLOCK_SIZE];
};
@@ -76,31 +88,7 @@ struct sha512_state {
u8 buf[SHA512_BLOCK_SIZE];
};
-struct shash_desc;
-
-extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
-
-extern int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *hash);
-
-extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
-
-extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *hash);
-
-/*
- * Stand-alone implementation of the SHA256 algorithm. It is designed to
- * have as little dependencies as possible so it can be used in the
- * kexec_file purgatory. In other cases you should generally use the
- * hash APIs from include/crypto/hash.h. Especially when hashing large
- * amounts of data as those APIs may be hw-accelerated.
- *
- * For details see lib/crypto/sha256.c
- */
-
-static inline void sha256_init(struct sha256_state *sctx)
+static inline void sha256_block_init(struct crypto_sha256_state *sctx)
{
sctx->state[0] = SHA256_H0;
sctx->state[1] = SHA256_H1;
@@ -112,11 +100,16 @@ static inline void sha256_init(struct sha256_state *sctx)
sctx->state[7] = SHA256_H7;
sctx->count = 0;
}
-void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len);
-void sha256_final(struct sha256_state *sctx, u8 *out);
-void sha256(const u8 *data, unsigned int len, u8 *out);
-static inline void sha224_init(struct sha256_state *sctx)
+static inline void sha256_init(struct sha256_state *sctx)
+{
+ sha256_block_init(&sctx->ctx);
+}
+void sha256_update(struct sha256_state *sctx, const u8 *data, size_t len);
+void sha256_final(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE]);
+void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]);
+
+static inline void sha224_block_init(struct crypto_sha256_state *sctx)
{
sctx->state[0] = SHA224_H0;
sctx->state[1] = SHA224_H1;
@@ -128,7 +121,12 @@ static inline void sha224_init(struct sha256_state *sctx)
sctx->state[7] = SHA224_H7;
sctx->count = 0;
}
+
+static inline void sha224_init(struct sha256_state *sctx)
+{
+ sha224_block_init(&sctx->ctx);
+}
/* Simply use sha256_update as it is equivalent to sha224_update. */
-void sha224_final(struct sha256_state *sctx, u8 *out);
+void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]);
#endif /* _CRYPTO_SHA2_H */
diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h
deleted file mode 100644
index e0418818d63c..000000000000
--- a/include/crypto/sha256_base.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * sha256_base.h - core logic for SHA-256 implementations
- *
- * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#ifndef _CRYPTO_SHA256_BASE_H
-#define _CRYPTO_SHA256_BASE_H
-
-#include <asm/byteorder.h>
-#include <linux/unaligned.h>
-#include <crypto/internal/hash.h>
-#include <crypto/sha2.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-typedef void (sha256_block_fn)(struct sha256_state *sst, u8 const *src,
- int blocks);
-
-static inline int sha224_base_init(struct shash_desc *desc)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- sha224_init(sctx);
- return 0;
-}
-
-static inline int sha256_base_init(struct shash_desc *desc)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- sha256_init(sctx);
- return 0;
-}
-
-static inline int lib_sha256_base_do_update(struct sha256_state *sctx,
- const u8 *data,
- unsigned int len,
- sha256_block_fn *block_fn)
-{
- unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
-
- sctx->count += len;
-
- if (unlikely((partial + len) >= SHA256_BLOCK_SIZE)) {
- int blocks;
-
- if (partial) {
- int p = SHA256_BLOCK_SIZE - partial;
-
- memcpy(sctx->buf + partial, data, p);
- data += p;
- len -= p;
-
- block_fn(sctx, sctx->buf, 1);
- }
-
- blocks = len / SHA256_BLOCK_SIZE;
- len %= SHA256_BLOCK_SIZE;
-
- if (blocks) {
- block_fn(sctx, data, blocks);
- data += blocks * SHA256_BLOCK_SIZE;
- }
- partial = 0;
- }
- if (len)
- memcpy(sctx->buf + partial, data, len);
-
- return 0;
-}
-
-static inline int sha256_base_do_update(struct shash_desc *desc,
- const u8 *data,
- unsigned int len,
- sha256_block_fn *block_fn)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- return lib_sha256_base_do_update(sctx, data, len, block_fn);
-}
-
-static inline int lib_sha256_base_do_finalize(struct sha256_state *sctx,
- sha256_block_fn *block_fn)
-{
- const int bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64);
- __be64 *bits = (__be64 *)(sctx->buf + bit_offset);
- unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
-
- sctx->buf[partial++] = 0x80;
- if (partial > bit_offset) {
- memset(sctx->buf + partial, 0x0, SHA256_BLOCK_SIZE - partial);
- partial = 0;
-
- block_fn(sctx, sctx->buf, 1);
- }
-
- memset(sctx->buf + partial, 0x0, bit_offset - partial);
- *bits = cpu_to_be64(sctx->count << 3);
- block_fn(sctx, sctx->buf, 1);
-
- return 0;
-}
-
-static inline int sha256_base_do_finalize(struct shash_desc *desc,
- sha256_block_fn *block_fn)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- return lib_sha256_base_do_finalize(sctx, block_fn);
-}
-
-static inline int lib_sha256_base_finish(struct sha256_state *sctx, u8 *out,
- unsigned int digest_size)
-{
- __be32 *digest = (__be32 *)out;
- int i;
-
- for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be32))
- put_unaligned_be32(sctx->state[i], digest++);
-
- memzero_explicit(sctx, sizeof(*sctx));
- return 0;
-}
-
-static inline int sha256_base_finish(struct shash_desc *desc, u8 *out)
-{
- unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- return lib_sha256_base_finish(sctx, out, digest_size);
-}
-
-#endif /* _CRYPTO_SHA256_BASE_H */
diff --git a/include/crypto/sha3.h b/include/crypto/sha3.h
index 080f60c2e6b1..41e1b83a6d91 100644
--- a/include/crypto/sha3.h
+++ b/include/crypto/sha3.h
@@ -5,30 +5,32 @@
#ifndef __CRYPTO_SHA3_H__
#define __CRYPTO_SHA3_H__
+#include <linux/types.h>
+
#define SHA3_224_DIGEST_SIZE (224 / 8)
#define SHA3_224_BLOCK_SIZE (200 - 2 * SHA3_224_DIGEST_SIZE)
+#define SHA3_224_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_224_BLOCK_SIZE + 1
#define SHA3_256_DIGEST_SIZE (256 / 8)
#define SHA3_256_BLOCK_SIZE (200 - 2 * SHA3_256_DIGEST_SIZE)
+#define SHA3_256_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_256_BLOCK_SIZE + 1
#define SHA3_384_DIGEST_SIZE (384 / 8)
#define SHA3_384_BLOCK_SIZE (200 - 2 * SHA3_384_DIGEST_SIZE)
+#define SHA3_384_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_384_BLOCK_SIZE + 1
#define SHA3_512_DIGEST_SIZE (512 / 8)
#define SHA3_512_BLOCK_SIZE (200 - 2 * SHA3_512_DIGEST_SIZE)
+#define SHA3_512_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_512_BLOCK_SIZE + 1
-struct sha3_state {
- u64 st[25];
- unsigned int rsiz;
- unsigned int rsizw;
+#define SHA3_STATE_SIZE 200
- unsigned int partial;
- u8 buf[SHA3_224_BLOCK_SIZE];
+struct shash_desc;
+
+struct sha3_state {
+ u64 st[SHA3_STATE_SIZE / 8];
};
int crypto_sha3_init(struct shash_desc *desc);
-int crypto_sha3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
-int crypto_sha3_final(struct shash_desc *desc, u8 *out);
#endif
diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h
index 679916a84cb2..aa814bab442d 100644
--- a/include/crypto/sha512_base.h
+++ b/include/crypto/sha512_base.h
@@ -10,10 +10,10 @@
#include <crypto/internal/hash.h>
#include <crypto/sha2.h>
-#include <linux/crypto.h>
-#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/math.h>
#include <linux/string.h>
-
+#include <linux/types.h>
#include <linux/unaligned.h>
typedef void (sha512_block_fn)(struct sha512_state *sst, u8 const *src,
@@ -53,66 +53,51 @@ static inline int sha512_base_init(struct shash_desc *desc)
return 0;
}
-static inline int sha512_base_do_update(struct shash_desc *desc,
- const u8 *data,
- unsigned int len,
- sha512_block_fn *block_fn)
+static inline int sha512_base_do_update_blocks(struct shash_desc *desc,
+ const u8 *data,
+ unsigned int len,
+ sha512_block_fn *block_fn)
{
+ unsigned int remain = len - round_down(len, SHA512_BLOCK_SIZE);
struct sha512_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
+ len -= remain;
sctx->count[0] += len;
if (sctx->count[0] < len)
sctx->count[1]++;
-
- if (unlikely((partial + len) >= SHA512_BLOCK_SIZE)) {
- int blocks;
-
- if (partial) {
- int p = SHA512_BLOCK_SIZE - partial;
-
- memcpy(sctx->buf + partial, data, p);
- data += p;
- len -= p;
-
- block_fn(sctx, sctx->buf, 1);
- }
-
- blocks = len / SHA512_BLOCK_SIZE;
- len %= SHA512_BLOCK_SIZE;
-
- if (blocks) {
- block_fn(sctx, data, blocks);
- data += blocks * SHA512_BLOCK_SIZE;
- }
- partial = 0;
- }
- if (len)
- memcpy(sctx->buf + partial, data, len);
-
- return 0;
+ block_fn(sctx, data, len / SHA512_BLOCK_SIZE);
+ return remain;
}
-static inline int sha512_base_do_finalize(struct shash_desc *desc,
- sha512_block_fn *block_fn)
+static inline int sha512_base_do_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len,
+ sha512_block_fn *block_fn)
{
- const int bit_offset = SHA512_BLOCK_SIZE - sizeof(__be64[2]);
+ unsigned int bit_offset = SHA512_BLOCK_SIZE / 8 - 2;
struct sha512_state *sctx = shash_desc_ctx(desc);
- __be64 *bits = (__be64 *)(sctx->buf + bit_offset);
- unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
+ union {
+ __be64 b64[SHA512_BLOCK_SIZE / 4];
+ u8 u8[SHA512_BLOCK_SIZE * 2];
+ } block = {};
- sctx->buf[partial++] = 0x80;
- if (partial > bit_offset) {
- memset(sctx->buf + partial, 0x0, SHA512_BLOCK_SIZE - partial);
- partial = 0;
+ if (len >= SHA512_BLOCK_SIZE) {
+ int remain;
- block_fn(sctx, sctx->buf, 1);
+ remain = sha512_base_do_update_blocks(desc, src, len, block_fn);
+ src += len - remain;
+ len = remain;
}
- memset(sctx->buf + partial, 0x0, bit_offset - partial);
- bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
- bits[1] = cpu_to_be64(sctx->count[0] << 3);
- block_fn(sctx, sctx->buf, 1);
+ if (len >= bit_offset * 8)
+ bit_offset += SHA512_BLOCK_SIZE / 8;
+ memcpy(&block, src, len);
+ block.u8[len] = 0x80;
+ sctx->count[0] += len;
+ block.b64[bit_offset] = cpu_to_be64(sctx->count[1] << 3 |
+ sctx->count[0] >> 61);
+ block.b64[bit_offset + 1] = cpu_to_be64(sctx->count[0] << 3);
+ block_fn(sctx, block.u8, (bit_offset + 2) * 8 / SHA512_BLOCK_SIZE);
+ memzero_explicit(&block, sizeof(block));
return 0;
}
@@ -126,9 +111,10 @@ static inline int sha512_base_finish(struct shash_desc *desc, u8 *out)
for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be64))
put_unaligned_be64(sctx->state[i], digest++);
-
- memzero_explicit(sctx, sizeof(*sctx));
return 0;
}
+void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,
+ int blocks);
+
#endif /* _CRYPTO_SHA512_BASE_H */
diff --git a/include/crypto/sig.h b/include/crypto/sig.h
index 11024708c069..fa6dafafab3f 100644
--- a/include/crypto/sig.h
+++ b/include/crypto/sig.h
@@ -128,7 +128,7 @@ static inline void crypto_free_sig(struct crypto_sig *tfm)
/**
* crypto_sig_keysize() - Get key size
*
- * Function returns the key size in bytes.
+ * Function returns the key size in bits.
* Function assumes that the key is already set in the transformation. If this
* function is called without a setkey or with a failed setkey, you may end up
* in a NULL dereference.
diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h
index 1f021ad0533f..c8d02c86c298 100644
--- a/include/crypto/sm3.h
+++ b/include/crypto/sm3.h
@@ -14,6 +14,7 @@
#define SM3_DIGEST_SIZE 32
#define SM3_BLOCK_SIZE 64
+#define SM3_STATE_SIZE 40
#define SM3_T1 0x79CC4519
#define SM3_T2 0x7A879D8A
@@ -58,7 +59,6 @@ static inline void sm3_init(struct sm3_state *sctx)
sctx->count = 0;
}
-void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len);
-void sm3_final(struct sm3_state *sctx, u8 *out);
+void sm3_block_generic(struct sm3_state *sctx, u8 const *data, int blocks);
#endif
diff --git a/include/crypto/sm3_base.h b/include/crypto/sm3_base.h
index b33ed39c2bce..7c53570bc05e 100644
--- a/include/crypto/sm3_base.h
+++ b/include/crypto/sm3_base.h
@@ -11,87 +11,59 @@
#include <crypto/internal/hash.h>
#include <crypto/sm3.h>
-#include <linux/crypto.h>
+#include <linux/math.h>
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/types.h>
#include <linux/unaligned.h>
typedef void (sm3_block_fn)(struct sm3_state *sst, u8 const *src, int blocks);
static inline int sm3_base_init(struct shash_desc *desc)
{
- struct sm3_state *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SM3_IVA;
- sctx->state[1] = SM3_IVB;
- sctx->state[2] = SM3_IVC;
- sctx->state[3] = SM3_IVD;
- sctx->state[4] = SM3_IVE;
- sctx->state[5] = SM3_IVF;
- sctx->state[6] = SM3_IVG;
- sctx->state[7] = SM3_IVH;
- sctx->count = 0;
-
+ sm3_init(shash_desc_ctx(desc));
return 0;
}
-static inline int sm3_base_do_update(struct shash_desc *desc,
- const u8 *data,
- unsigned int len,
- sm3_block_fn *block_fn)
+static inline int sm3_base_do_update_blocks(struct shash_desc *desc,
+ const u8 *data, unsigned int len,
+ sm3_block_fn *block_fn)
{
+ unsigned int remain = len - round_down(len, SM3_BLOCK_SIZE);
struct sm3_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
-
- sctx->count += len;
-
- if (unlikely((partial + len) >= SM3_BLOCK_SIZE)) {
- int blocks;
-
- if (partial) {
- int p = SM3_BLOCK_SIZE - partial;
-
- memcpy(sctx->buffer + partial, data, p);
- data += p;
- len -= p;
- block_fn(sctx, sctx->buffer, 1);
- }
-
- blocks = len / SM3_BLOCK_SIZE;
- len %= SM3_BLOCK_SIZE;
-
- if (blocks) {
- block_fn(sctx, data, blocks);
- data += blocks * SM3_BLOCK_SIZE;
- }
- partial = 0;
- }
- if (len)
- memcpy(sctx->buffer + partial, data, len);
-
- return 0;
+ sctx->count += len - remain;
+ block_fn(sctx, data, len / SM3_BLOCK_SIZE);
+ return remain;
}
-static inline int sm3_base_do_finalize(struct shash_desc *desc,
- sm3_block_fn *block_fn)
+static inline int sm3_base_do_finup(struct shash_desc *desc,
+ const u8 *src, unsigned int len,
+ sm3_block_fn *block_fn)
{
- const int bit_offset = SM3_BLOCK_SIZE - sizeof(__be64);
+ unsigned int bit_offset = SM3_BLOCK_SIZE / 8 - 1;
struct sm3_state *sctx = shash_desc_ctx(desc);
- __be64 *bits = (__be64 *)(sctx->buffer + bit_offset);
- unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
+ union {
+ __be64 b64[SM3_BLOCK_SIZE / 4];
+ u8 u8[SM3_BLOCK_SIZE * 2];
+ } block = {};
- sctx->buffer[partial++] = 0x80;
- if (partial > bit_offset) {
- memset(sctx->buffer + partial, 0x0, SM3_BLOCK_SIZE - partial);
- partial = 0;
+ if (len >= SM3_BLOCK_SIZE) {
+ int remain;
- block_fn(sctx, sctx->buffer, 1);
+ remain = sm3_base_do_update_blocks(desc, src, len, block_fn);
+ src += len - remain;
+ len = remain;
}
- memset(sctx->buffer + partial, 0x0, bit_offset - partial);
- *bits = cpu_to_be64(sctx->count << 3);
- block_fn(sctx, sctx->buffer, 1);
+ if (len >= bit_offset * 8)
+ bit_offset += SM3_BLOCK_SIZE / 8;
+ memcpy(&block, src, len);
+ block.u8[len] = 0x80;
+ sctx->count += len;
+ block.b64[bit_offset] = cpu_to_be64(sctx->count << 3);
+ block_fn(sctx, block.u8, (bit_offset + 1) * 8 / SM3_BLOCK_SIZE);
+ memzero_explicit(&block, sizeof(block));
return 0;
}
@@ -104,8 +76,6 @@ static inline int sm3_base_finish(struct shash_desc *desc, u8 *out)
for (i = 0; i < SM3_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], digest++);
-
- memzero_explicit(sctx, sizeof(*sctx));
return 0;
}
diff --git a/include/crypto/streebog.h b/include/crypto/streebog.h
index cae1b4a01971..570f720a113b 100644
--- a/include/crypto/streebog.h
+++ b/include/crypto/streebog.h
@@ -23,15 +23,10 @@ struct streebog_uint512 {
};
struct streebog_state {
- union {
- u8 buffer[STREEBOG_BLOCK_SIZE];
- struct streebog_uint512 m;
- };
struct streebog_uint512 hash;
struct streebog_uint512 h;
struct streebog_uint512 N;
struct streebog_uint512 Sigma;
- size_t fillsize;
};
#endif /* !_CRYPTO_STREEBOG_H_ */
diff --git a/include/drm/Makefile b/include/drm/Makefile
index a7bd15d2803e..1df6962556ef 100644
--- a/include/drm/Makefile
+++ b/include/drm/Makefile
@@ -11,7 +11,7 @@ always-$(CONFIG_DRM_HEADER_TEST) += \
quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
cmd_hdrtest = \
$(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \
- $(srctree)/scripts/kernel-doc -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \
+ PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \
touch $@
$(obj)/%.hdrtest: $(src)/%.h FORCE
diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h
index df120b4d1f83..eaf704d3d05e 100644
--- a/include/drm/drm_gpusvm.h
+++ b/include/drm/drm_gpusvm.h
@@ -89,6 +89,7 @@ struct drm_gpusvm_devmem_ops {
* @ops: Pointer to the operations structure for GPU SVM device memory
* @dpagemap: The struct drm_pagemap of the pages this allocation belongs to.
* @size: Size of device memory allocation
+ * @timeslice_expiration: Timeslice expiration in jiffies
*/
struct drm_gpusvm_devmem {
struct device *dev;
@@ -97,6 +98,7 @@ struct drm_gpusvm_devmem {
const struct drm_gpusvm_devmem_ops *ops;
struct drm_pagemap *dpagemap;
size_t size;
+ u64 timeslice_expiration;
};
/**
@@ -186,6 +188,31 @@ struct drm_gpusvm_notifier {
};
/**
+ * struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags
+ *
+ * @migrate_devmem: Flag indicating whether the range can be migrated to device memory
+ * @unmapped: Flag indicating if the range has been unmapped
+ * @partial_unmap: Flag indicating if the range has been partially unmapped
+ * @has_devmem_pages: Flag indicating if the range has devmem pages
+ * @has_dma_mapping: Flag indicating if the range has a DMA mapping
+ * @__flags: Flags for range in u16 form (used for READ_ONCE)
+ */
+struct drm_gpusvm_range_flags {
+ union {
+ struct {
+ /* All flags below must be set upon creation */
+ u16 migrate_devmem : 1;
+ /* All flags below must be set / cleared under notifier lock */
+ u16 unmapped : 1;
+ u16 partial_unmap : 1;
+ u16 has_devmem_pages : 1;
+ u16 has_dma_mapping : 1;
+ };
+ u16 __flags;
+ };
+};
+
+/**
* struct drm_gpusvm_range - Structure representing a GPU SVM range
*
* @gpusvm: Pointer to the GPU SVM structure
@@ -198,11 +225,6 @@ struct drm_gpusvm_notifier {
* @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping.
* Note this is assuming only one drm_pagemap per range is allowed.
* @flags: Flags for range
- * @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory
- * @flags.unmapped: Flag indicating if the range has been unmapped
- * @flags.partial_unmap: Flag indicating if the range has been partially unmapped
- * @flags.has_devmem_pages: Flag indicating if the range has devmem pages
- * @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping
*
* This structure represents a GPU SVM range used for tracking memory ranges
* mapped in a DRM device.
@@ -216,15 +238,7 @@ struct drm_gpusvm_range {
unsigned long notifier_seq;
struct drm_pagemap_device_addr *dma_addr;
struct drm_pagemap *dpagemap;
- struct {
- /* All flags below must be set upon creation */
- u16 migrate_devmem : 1;
- /* All flags below must be set / cleared under notifier lock */
- u16 unmapped : 1;
- u16 partial_unmap : 1;
- u16 has_devmem_pages : 1;
- u16 has_dma_mapping : 1;
- } flags;
+ struct drm_gpusvm_range_flags flags;
};
/**
@@ -283,17 +297,22 @@ struct drm_gpusvm {
* @check_pages_threshold: Check CPU pages for present if chunk is less than or
* equal to threshold. If not present, reduce chunk
* size.
+ * @timeslice_ms: The timeslice MS which in minimum time a piece of memory
+ * remains with either exclusive GPU or CPU access.
* @in_notifier: entering from a MMU notifier
* @read_only: operating on read-only memory
* @devmem_possible: possible to use device memory
+ * @devmem_only: use only device memory
*
* Context that is DRM GPUSVM is operating in (i.e. user arguments).
*/
struct drm_gpusvm_ctx {
unsigned long check_pages_threshold;
+ unsigned long timeslice_ms;
unsigned int in_notifier :1;
unsigned int read_only :1;
unsigned int devmem_possible :1;
+ unsigned int devmem_only :1;
};
int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h
index d212848d07f3..a7ce9523c50d 100644
--- a/include/drm/intel/pciids.h
+++ b/include/drm/intel/pciids.h
@@ -861,6 +861,10 @@
MACRO__(0xB081, ## __VA_ARGS__), \
MACRO__(0xB082, ## __VA_ARGS__), \
MACRO__(0xB083, ## __VA_ARGS__), \
+ MACRO__(0xB084, ## __VA_ARGS__), \
+ MACRO__(0xB085, ## __VA_ARGS__), \
+ MACRO__(0xB086, ## __VA_ARGS__), \
+ MACRO__(0xB087, ## __VA_ARGS__), \
MACRO__(0xB08F, ## __VA_ARGS__), \
MACRO__(0xB090, ## __VA_ARGS__), \
MACRO__(0xB0A0, ## __VA_ARGS__), \
diff --git a/include/dt-bindings/power/mediatek,mt6893-power.h b/include/dt-bindings/power/mediatek,mt6893-power.h
new file mode 100644
index 000000000000..aeab51bb2ad8
--- /dev/null
+++ b/include/dt-bindings/power/mediatek,mt6893-power.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Copyright (c) 2025 Collabora Ltd
+ * AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+ */
+
+#ifndef _DT_BINDINGS_POWER_MT6893_POWER_H
+#define _DT_BINDINGS_POWER_MT6893_POWER_H
+
+#define MT6893_POWER_DOMAIN_CONN 0
+#define MT6893_POWER_DOMAIN_MFG0 1
+#define MT6893_POWER_DOMAIN_MFG1 2
+#define MT6893_POWER_DOMAIN_MFG2 3
+#define MT6893_POWER_DOMAIN_MFG3 4
+#define MT6893_POWER_DOMAIN_MFG4 5
+#define MT6893_POWER_DOMAIN_MFG5 6
+#define MT6893_POWER_DOMAIN_MFG6 7
+#define MT6893_POWER_DOMAIN_ISP 8
+#define MT6893_POWER_DOMAIN_ISP2 9
+#define MT6893_POWER_DOMAIN_IPE 10
+#define MT6893_POWER_DOMAIN_VDEC0 11
+#define MT6893_POWER_DOMAIN_VDEC1 12
+#define MT6893_POWER_DOMAIN_VENC0 13
+#define MT6893_POWER_DOMAIN_VENC1 14
+#define MT6893_POWER_DOMAIN_MDP 15
+#define MT6893_POWER_DOMAIN_DISP 16
+#define MT6893_POWER_DOMAIN_AUDIO 17
+#define MT6893_POWER_DOMAIN_ADSP 18
+#define MT6893_POWER_DOMAIN_CAM 19
+#define MT6893_POWER_DOMAIN_CAM_RAWA 20
+#define MT6893_POWER_DOMAIN_CAM_RAWB 21
+#define MT6893_POWER_DOMAIN_CAM_RAWC 22
+#define MT6893_POWER_DOMAIN_DP_TX 23
+
+#endif /* _DT_BINDINGS_POWER_MT6893_POWER_H */
diff --git a/include/dt-bindings/power/rockchip,rk3562-power.h b/include/dt-bindings/power/rockchip,rk3562-power.h
new file mode 100644
index 000000000000..5182c2427a55
--- /dev/null
+++ b/include/dt-bindings/power/rockchip,rk3562-power.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2022-2024 Rockchip Electronics Co., Ltd.
+ */
+#ifndef __DT_BINDINGS_POWER_RK3562_POWER_H__
+#define __DT_BINDINGS_POWER_RK3562_POWER_H__
+
+/* VD_CORE */
+#define RK3562_PD_CPU_0 0
+#define RK3562_PD_CPU_1 1
+#define RK3562_PD_CPU_2 2
+#define RK3562_PD_CPU_3 3
+#define RK3562_PD_CORE_ALIVE 4
+
+/* VD_PMU */
+#define RK3562_PD_PMU 5
+#define RK3562_PD_PMU_ALIVE 6
+
+/* VD_NPU */
+#define RK3562_PD_NPU 7
+
+/* VD_GPU */
+#define RK3562_PD_GPU 8
+
+/* VD_LOGIC */
+#define RK3562_PD_DDR 9
+#define RK3562_PD_VEPU 10
+#define RK3562_PD_VDPU 11
+#define RK3562_PD_VI 12
+#define RK3562_PD_VO 13
+#define RK3562_PD_RGA 14
+#define RK3562_PD_PHP 15
+#define RK3562_PD_LOGIC_ALIVE 16
+
+#endif
diff --git a/include/dt-bindings/sound/cs48l32.h b/include/dt-bindings/sound/cs48l32.h
new file mode 100644
index 000000000000..4e82260fff67
--- /dev/null
+++ b/include/dt-bindings/sound/cs48l32.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+/*
+ * Device Tree defines for CS48L32 DSP.
+ *
+ * Copyright (C) 2016-2018, 2022, 2025 Cirrus Logic, Inc. and
+ * Cirrus Logic International Semiconductor Ltd.
+ */
+
+#ifndef DT_BINDINGS_SOUND_CS48L32_H
+#define DT_BINDINGS_SOUND_CS48L32_H
+
+/* Values for cirrus,in-type */
+#define CS48L32_IN_TYPE_DIFF 0
+#define CS48L32_IN_TYPE_SE 1
+
+/* Values for cirrus,pdm-sup */
+#define CS48L32_PDM_SUP_VOUT_MIC 0
+#define CS48L32_PDM_SUP_MICBIAS1 1
+
+#endif
diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
index abf0bd76e370..68606fa5fe73 100644
--- a/include/hyperv/hvgdk_mini.h
+++ b/include/hyperv/hvgdk_mini.h
@@ -1013,7 +1013,7 @@ enum hv_register_name {
/*
* To support arch-generic code calling hv_set/get_register:
- * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl
+ * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrq/wrmsrq
* - On ARM, HV_MSR_ indicates a VP register accessed via hypercall
*/
#define HV_MSR_CRASH_P0 (HV_X64_MSR_CRASH_P0)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3f2e93ed9730..f4b3d442b7df 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -772,6 +772,10 @@ int acpi_get_local_u64_address(acpi_handle handle, u64 *addr);
int acpi_get_local_address(acpi_handle handle, u32 *addr);
const char *acpi_get_subsystem_id(acpi_handle handle);
+#ifdef CONFIG_ACPI_MRRM
+int acpi_mrrm_max_mem_region(void);
+#endif
+
#else /* !CONFIG_ACPI */
#define acpi_disabled 1
@@ -1092,6 +1096,11 @@ static inline acpi_handle acpi_get_processor_handle(int cpu)
return NULL;
}
+static inline int acpi_mrrm_max_mem_region(void)
+{
+ return 1;
+}
+
#endif /* !CONFIG_ACPI */
#ifdef CONFIG_ACPI_HMAT
@@ -1125,13 +1134,13 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state,
acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state,
u32 val_a, u32 val_b);
-#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86)
struct acpi_s2idle_dev_ops {
struct list_head list_node;
void (*prepare)(void);
void (*check)(void);
void (*restore)(void);
};
+#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86)
int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg);
void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg);
int acpi_get_lps0_constraint(struct acpi_device *adev);
@@ -1140,6 +1149,13 @@ static inline int acpi_get_lps0_constraint(struct device *dev)
{
return ACPI_STATE_UNKNOWN;
}
+static inline int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg)
+{
+ return -ENODEV;
+}
+static inline void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg)
+{
+}
#endif /* CONFIG_SUSPEND && CONFIG_X86 */
void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
#else
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index a946e0203e6d..8f7931eb7d16 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -104,6 +104,16 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
#else /* ARCH_NEEDS_WEAK_PER_CPU */
+#ifdef MODULE
+
+#define DEFINE_ALLOC_TAG(_alloc_tag) \
+ static struct alloc_tag _alloc_tag __used __aligned(8) \
+ __section(ALLOC_TAG_SECTION_NAME) = { \
+ .ct = CODE_TAG_INIT, \
+ .counters = NULL };
+
+#else /* MODULE */
+
#define DEFINE_ALLOC_TAG(_alloc_tag) \
static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \
static struct alloc_tag _alloc_tag __used __aligned(8) \
@@ -111,6 +121,8 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
.ct = CODE_TAG_INIT, \
.counters = &_alloc_tag_cntr };
+#endif /* MODULE */
+
#endif /* ARCH_NEEDS_WEAK_PER_CPU */
DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 2222e8b03ff4..d72d6e5aa200 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -14,14 +14,6 @@ int topology_update_cpu_topology(void);
struct device_node;
bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
-DECLARE_PER_CPU(unsigned long, cpu_scale);
-
-static inline unsigned long topology_get_cpu_scale(int cpu)
-{
- return per_cpu(cpu_scale, cpu);
-}
-
-void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity);
DECLARE_PER_CPU(unsigned long, capacity_freq_ref);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 1625c8529e70..65abd5ab8836 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -90,7 +90,6 @@ struct linux_binfmt {
struct list_head lh;
struct module *module;
int (*load_binary)(struct linux_binprm *);
- int (*load_shlib)(struct file *);
#ifdef CONFIG_COREDUMP
int (*core_dump)(struct coredump_params *cprm);
unsigned long min_coredump; /* minimal dump size */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index cafc7c215de8..9c37c66ef9ca 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -11,6 +11,7 @@
#include <linux/uio.h>
#define BIO_MAX_VECS 256U
+#define BIO_MAX_INLINE_VECS UIO_MAXIOV
struct queue_limits;
@@ -402,7 +403,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
struct request_queue;
-extern int submit_bio_wait(struct bio *bio);
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
unsigned short max_vecs, blk_opf_t opf);
extern void bio_uninit(struct bio *);
@@ -417,6 +417,30 @@ void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
size_t off);
+void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len);
+
+/**
+ * bio_add_max_vecs - number of bio_vecs needed to add data to a bio
+ * @kaddr: kernel virtual address to add
+ * @len: length in bytes to add
+ *
+ * Calculate how many bio_vecs need to be allocated to add the kernel virtual
+ * address range in [@kaddr:@len] in the worse case.
+ */
+static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len)
+{
+ if (is_vmalloc_addr(kaddr))
+ return DIV_ROUND_UP(offset_in_page(kaddr) + len, PAGE_SIZE);
+ return 1;
+}
+
+unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len);
+bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len);
+
+int submit_bio_wait(struct bio *bio);
+int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
+ size_t len, enum req_op op);
+
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
void __bio_release_pages(struct bio *bio, bool mark_dirty);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8eb9b3310167..de8c85a03bb7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -9,6 +9,7 @@
#include <linux/prefetch.h>
#include <linux/srcu.h>
#include <linux/rw_hint.h>
+#include <linux/rwsem.h>
struct blk_mq_tags;
struct blk_flush_queue;
@@ -506,6 +507,9 @@ enum hctx_type {
* request_queue.tag_set_list.
* @srcu: Use as lock when type of the request queue is blocking
* (BLK_MQ_F_BLOCKING).
+ * @update_nr_hwq_lock:
+ * Synchronize updating nr_hw_queues with add/del disk &
+ * switching elevator.
*/
struct blk_mq_tag_set {
const struct blk_mq_ops *ops;
@@ -527,6 +531,8 @@ struct blk_mq_tag_set {
struct mutex tag_list_lock;
struct list_head tag_list;
struct srcu_struct *srcu;
+
+ struct rw_semaphore update_nr_hwq_lock;
};
/**
@@ -1031,8 +1037,8 @@ int blk_rq_map_user_io(struct request *, struct rq_map_data *,
int blk_rq_map_user_iov(struct request_queue *, struct request *,
struct rq_map_data *, const struct iov_iter *, gfp_t);
int blk_rq_unmap_user(struct bio *);
-int blk_rq_map_kern(struct request_queue *, struct request *, void *,
- unsigned int, gfp_t);
+int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len,
+ gfp_t gfp);
int blk_rq_append_bio(struct request *rq, struct bio *bio);
void blk_execute_rq_nowait(struct request *rq, bool at_head);
blk_status_t blk_execute_rq(struct request *rq, bool at_head);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dce7615c35e7..3d1577f07c1c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -220,6 +220,7 @@ struct bio {
unsigned short bi_flags; /* BIO_* below */
unsigned short bi_ioprio;
enum rw_hint bi_write_hint;
+ u8 bi_write_stream;
blk_status_t bi_status;
atomic_t __bi_remaining;
@@ -286,7 +287,6 @@ struct bio {
enum {
BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */
BIO_CLONED, /* doesn't own data */
- BIO_BOUNCED, /* bio is a bounce bio */
BIO_QUIET, /* Make BIO Quiet */
BIO_CHAIN, /* chained bio, ->bi_remaining in effect */
BIO_REFFED, /* bio has elevated ->bi_cnt */
@@ -296,6 +296,14 @@ enum {
* of this bio. */
BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */
+ /*
+ * This bio has completed bps throttling at the single tg granularity,
+ * which is different from BIO_BPS_THROTTLED. When the bio is enqueued
+ * into the sq->queued of the upper tg, or is about to be dispatched,
+ * this flag needs to be cleared. Since blk-throttle and rq_qos are not
+ * on the same hierarchical level, reuse the value.
+ */
+ BIO_TG_BPS_THROTTLED = BIO_QOS_THROTTLED,
BIO_QOS_MERGED, /* but went through rq_qos merge path */
BIO_REMAPPED,
BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9a1f0ee40b56..332b56f323d9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -182,7 +182,6 @@ struct gendisk {
struct list_head slave_bdevs;
#endif
struct timer_rand_state *random;
- atomic_t sync_io; /* RAID */
struct disk_events *ev;
#ifdef CONFIG_BLK_DEV_ZONED
@@ -218,6 +217,8 @@ struct gendisk {
* devices that do not have multiple independent access ranges.
*/
struct blk_independent_access_ranges *ia_ranges;
+
+ struct mutex rqos_state_mutex; /* rqos state change mutex */
};
/**
@@ -331,9 +332,6 @@ typedef unsigned int __bitwise blk_features_t;
/* skip this queue in blk_mq_(un)quiesce_tagset */
#define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13))
-/* bounce all highmem pages */
-#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14))
-
/* undocumented magic for bcache */
#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
((__force blk_features_t)(1u << 15))
@@ -347,7 +345,7 @@ typedef unsigned int __bitwise blk_features_t;
*/
#define BLK_FEAT_INHERIT_MASK \
(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \
- BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \
+ BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | \
BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE)
/* internal flags in queue_limits.flags */
@@ -405,6 +403,9 @@ struct queue_limits {
unsigned short max_integrity_segments;
unsigned short max_discard_segments;
+ unsigned short max_write_streams;
+ unsigned int write_stream_granularity;
+
unsigned int max_open_zones;
unsigned int max_active_zones;
@@ -644,6 +645,8 @@ enum {
QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */
QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */
QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */
+ QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */
+ QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */
QUEUE_FLAG_MAX
};
@@ -679,6 +682,10 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
#define blk_queue_skip_tagset_quiesce(q) \
((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE)
+#define blk_queue_disable_wbt(q) \
+ test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags)
+#define blk_queue_no_elv_switch(q) \
+ test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags)
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
@@ -1288,6 +1295,13 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev)
return queue_max_segments(bdev_get_queue(bdev));
}
+static inline unsigned short bdev_max_write_streams(struct block_device *bdev)
+{
+ if (bdev_is_partition(bdev))
+ return 0;
+ return bdev_limits(bdev)->max_write_streams;
+}
+
static inline unsigned queue_logical_block_size(const struct request_queue *q)
{
return q->limits.logical_block_size;
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 9de7adb68294..60d1511b4f4d 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -114,8 +114,7 @@ struct bpf_prog_list {
u32 flags;
};
-int cgroup_bpf_inherit(struct cgroup *cgrp);
-void cgroup_bpf_offline(struct cgroup *cgrp);
+void __init cgroup_bpf_lifetime_notifier_init(void);
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
@@ -431,8 +430,10 @@ const struct bpf_func_proto *
cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
#else
-static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
-static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
+static inline void cgroup_bpf_lifetime_notifier_init(void)
+{
+ return;
+}
static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype,
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 5bc8f55c8cca..e61687d5e496 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -170,6 +170,23 @@ struct cgroup_subsys_state {
struct percpu_ref refcnt;
/*
+ * Depending on the context, this field is initialized
+ * via css_rstat_init() at different places:
+ *
+ * when css is associated with cgroup::self
+ * when css->cgroup is the root cgroup
+ * performed in cgroup_init()
+ * when css->cgroup is not the root cgroup
+ * performed in cgroup_create()
+ * when css is associated with a subsystem
+ * when css->cgroup is the root cgroup
+ * performed in cgroup_init_subsys() in the non-early path
+ * when css->cgroup is not the root cgroup
+ * performed in css_create()
+ */
+ struct css_rstat_cpu __percpu *rstat_cpu;
+
+ /*
* siblings list anchored at the parent's ->children
*
* linkage is protected by cgroup_mutex or RCU
@@ -177,9 +194,6 @@ struct cgroup_subsys_state {
struct list_head sibling;
struct list_head children;
- /* flush target list anchored at cgrp->rstat_css_list */
- struct list_head rstat_css_node;
-
/*
* PI: Subsys-unique ID. 0 is unused and root is always 1. The
* matching css can be looked up using css_from_id().
@@ -219,6 +233,16 @@ struct cgroup_subsys_state {
* Protected by cgroup_mutex.
*/
int nr_descendants;
+
+ /*
+ * A singly-linked list of css structures to be rstat flushed.
+ * This is a scratch field to be used exclusively by
+ * css_rstat_flush().
+ *
+ * Protected by rstat_base_lock when css is cgroup::self.
+ * Protected by css->ss->rstat_ss_lock otherwise.
+ */
+ struct cgroup_subsys_state *rstat_flush_next;
};
/*
@@ -329,10 +353,10 @@ struct cgroup_base_stat {
/*
* rstat - cgroup scalable recursive statistics. Accounting is done
- * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
+ * per-cpu in css_rstat_cpu which is then lazily propagated up the
* hierarchy on reads.
*
- * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
+ * When a stat gets updated, the css_rstat_cpu and its ancestors are
* linked into the updated tree. On the following read, propagation only
* considers and consumes the updated tree. This makes reading O(the
* number of descendants which have been active since last read) instead of
@@ -344,10 +368,29 @@ struct cgroup_base_stat {
* frequency decreases the cost of each read.
*
* This struct hosts both the fields which implement the above -
- * updated_children and updated_next - and the fields which track basic
- * resource statistics on top of it - bsync, bstat and last_bstat.
+ * updated_children and updated_next.
*/
-struct cgroup_rstat_cpu {
+struct css_rstat_cpu {
+ /*
+ * Child cgroups with stat updates on this cpu since the last read
+ * are linked on the parent's ->updated_children through
+ * ->updated_next. updated_children is terminated by its container css.
+ *
+ * In addition to being more compact, singly-linked list pointing to
+ * the css makes it unnecessary for each per-cpu struct to point back
+ * to the associated css.
+ *
+ * Protected by per-cpu css->ss->rstat_ss_cpu_lock.
+ */
+ struct cgroup_subsys_state *updated_children;
+ struct cgroup_subsys_state *updated_next; /* NULL if not on the list */
+};
+
+/*
+ * This struct hosts the fields which track basic resource statistics on
+ * top of it - bsync, bstat and last_bstat.
+ */
+struct cgroup_rstat_base_cpu {
/*
* ->bsync protects ->bstat. These are the only fields which get
* updated in the hot path.
@@ -374,20 +417,6 @@ struct cgroup_rstat_cpu {
* deltas to propagate to the per-cpu subtree_bstat.
*/
struct cgroup_base_stat last_subtree_bstat;
-
- /*
- * Child cgroups with stat updates on this cpu since the last read
- * are linked on the parent's ->updated_children through
- * ->updated_next.
- *
- * In addition to being more compact, singly-linked list pointing
- * to the cgroup makes it unnecessary for each per-cpu struct to
- * point back to the associated cgroup.
- *
- * Protected by per-cpu cgroup_rstat_cpu_lock.
- */
- struct cgroup *updated_children; /* terminated by self cgroup */
- struct cgroup *updated_next; /* NULL iff not on the list */
};
struct cgroup_freezer_state {
@@ -516,23 +545,23 @@ struct cgroup {
struct cgroup *dom_cgrp;
struct cgroup *old_dom_cgrp; /* used while enabling threaded */
- /* per-cpu recursive resource statistics */
- struct cgroup_rstat_cpu __percpu *rstat_cpu;
- struct list_head rstat_css_list;
-
/*
- * Add padding to separate the read mostly rstat_cpu and
- * rstat_css_list into a different cacheline from the following
- * rstat_flush_next and *bstat fields which can have frequent updates.
+ * Depending on the context, this field is initialized via
+ * css_rstat_init() at different places:
+ *
+ * when cgroup is the root cgroup
+ * performed in cgroup_setup_root()
+ * otherwise
+ * performed in cgroup_create()
*/
- CACHELINE_PADDING(_pad_);
+ struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu;
/*
- * A singly-linked list of cgroup structures to be rstat flushed.
- * This is a scratch field to be used exclusively by
- * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock.
+ * Add padding to keep the read mostly rstat per-cpu pointer on a
+ * different cacheline than the following *bstat fields which can have
+ * frequent updates.
*/
- struct cgroup *rstat_flush_next;
+ CACHELINE_PADDING(_pad_);
/* cgroup basic resource statistics */
struct cgroup_base_stat last_bstat;
@@ -790,6 +819,9 @@ struct cgroup_subsys {
* specifies the mask of subsystems that this one depends on.
*/
unsigned int depends_on;
+
+ spinlock_t rstat_ss_lock;
+ raw_spinlock_t __percpu *rstat_ss_cpu_lock;
};
extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e7da3c3b098b..b18fb5fcb38e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -19,6 +19,7 @@
#include <linux/kernfs.h>
#include <linux/jump_label.h>
#include <linux/types.h>
+#include <linux/notifier.h>
#include <linux/ns_common.h>
#include <linux/nsproxy.h>
#include <linux/user_namespace.h>
@@ -40,7 +41,7 @@ struct kernel_clone_args;
#ifdef CONFIG_CGROUPS
-enum {
+enum css_task_iter_flags {
CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */
CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */
CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */
@@ -66,10 +67,16 @@ struct css_task_iter {
struct list_head iters_node; /* css_set->task_iters */
};
+enum cgroup_lifetime_events {
+ CGROUP_LIFETIME_ONLINE,
+ CGROUP_LIFETIME_OFFLINE,
+};
+
extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
extern spinlock_t css_set_lock;
+extern struct blocking_notifier_head cgroup_lifetime_notifier;
#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
#include <linux/cgroup_subsys.h>
@@ -347,6 +354,17 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css)
return css->flags & CSS_DYING;
}
+static inline bool css_is_self(struct cgroup_subsys_state *css)
+{
+ if (css == &css->cgroup->self) {
+ /* cgroup::self should not have subsystem association */
+ WARN_ON(css->ss != NULL);
+ return true;
+ }
+
+ return false;
+}
+
static inline void cgroup_get(struct cgroup *cgrp)
{
css_get(&cgrp->self);
@@ -688,8 +706,8 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
/*
* cgroup scalable recursive statistics.
*/
-void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
-void cgroup_rstat_flush(struct cgroup *cgrp);
+void css_rstat_updated(struct cgroup_subsys_state *css, int cpu);
+void css_rstat_flush(struct cgroup_subsys_state *css);
/*
* Basic resource stats.
@@ -785,6 +803,17 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
struct cgroup_namespace *ns);
+static inline void get_cgroup_ns(struct cgroup_namespace *ns)
+{
+ refcount_inc(&ns->ns.count);
+}
+
+static inline void put_cgroup_ns(struct cgroup_namespace *ns)
+{
+ if (refcount_dec_and_test(&ns->ns.count))
+ free_cgroup_ns(ns);
+}
+
#else /* !CONFIG_CGROUPS */
static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
@@ -795,19 +824,10 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
return old_ns;
}
-#endif /* !CONFIG_CGROUPS */
+static inline void get_cgroup_ns(struct cgroup_namespace *ns) { }
+static inline void put_cgroup_ns(struct cgroup_namespace *ns) { }
-static inline void get_cgroup_ns(struct cgroup_namespace *ns)
-{
- if (ns)
- refcount_inc(&ns->ns.count);
-}
-
-static inline void put_cgroup_ns(struct cgroup_namespace *ns)
-{
- if (ns && refcount_dec_and_test(&ns->ns.count))
- free_cgroup_ns(ns);
-}
+#endif /* !CONFIG_CGROUPS */
#ifdef CONFIG_CGROUPS
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 7e57047e1564..7093e1d08af0 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -216,6 +216,25 @@ const volatile void * __must_check_fn(const volatile void *val)
#define return_ptr(p) return no_free_ptr(p)
+/*
+ * Only for situations where an allocation is handed in to another function
+ * and consumed by that function on success.
+ *
+ * struct foo *f __free(kfree) = kzalloc(sizeof(*f), GFP_KERNEL);
+ *
+ * setup(f);
+ * if (some_condition)
+ * return -EINVAL;
+ * ....
+ * ret = bar(f);
+ * if (!ret)
+ * retain_and_null_ptr(f);
+ * return ret;
+ *
+ * After retain_and_null_ptr(f) the variable f is NULL and cannot be
+ * dereferenced anymore.
+ */
+#define retain_and_null_ptr(p) ((void)__get_and_null(p, NULL))
/*
* DEFINE_CLASS(name, type, exit, init, init_args...):
diff --git a/include/linux/codetag.h b/include/linux/codetag.h
index d14dbd26b370..0ee4c21c6dbc 100644
--- a/include/linux/codetag.h
+++ b/include/linux/codetag.h
@@ -36,10 +36,10 @@ union codetag_ref {
struct codetag_type_desc {
const char *section;
size_t tag_size;
- void (*module_load)(struct codetag_type *cttype,
- struct codetag_module *cmod);
- void (*module_unload)(struct codetag_type *cttype,
- struct codetag_module *cmod);
+ void (*module_load)(struct module *mod,
+ struct codetag *start, struct codetag *end);
+ void (*module_unload)(struct module *mod,
+ struct codetag *start, struct codetag *end);
#ifdef CONFIG_MODULES
void (*module_replaced)(struct module *mod, struct module *new_mod);
bool (*needs_section_mem)(struct module *mod, unsigned long size);
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index c771e9d0d0b9..698520b1bfdb 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -120,15 +120,19 @@ struct configfs_attribute {
ssize_t (*store)(struct config_item *, const char *, size_t);
};
-#define CONFIGFS_ATTR(_pfx, _name) \
+#define CONFIGFS_ATTR_PERM(_pfx, _name, _perm) \
static struct configfs_attribute _pfx##attr_##_name = { \
.ca_name = __stringify(_name), \
- .ca_mode = S_IRUGO | S_IWUSR, \
+ .ca_mode = _perm, \
.ca_owner = THIS_MODULE, \
.show = _pfx##_name##_show, \
.store = _pfx##_name##_store, \
}
+#define CONFIGFS_ATTR(_pfx, _name) CONFIGFS_ATTR_PERM( \
+ _pfx, _name, S_IRUGO | S_IWUSR \
+)
+
#define CONFIGFS_ATTR_RO(_pfx, _name) \
static struct configfs_attribute _pfx##attr_##_name = { \
.ca_name = __stringify(_name), \
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 77e6e195d1d6..76e41805b92d 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -28,6 +28,7 @@ struct coredump_params {
int vma_count;
size_t vma_data_size;
struct core_vma_metadata *vma_meta;
+ struct pid *pid;
};
extern unsigned int core_file_note_size_limit;
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index e3049543008b..e6089abc28e2 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -78,6 +78,10 @@ extern ssize_t cpu_show_gds(struct device *dev,
extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_old_microcode(struct device *dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_indirect_target_selection(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 7a5b391dcc01..95f3807c8c55 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -170,6 +170,12 @@ struct cpufreq_policy {
struct notifier_block nb_max;
};
+DEFINE_GUARD(cpufreq_policy_write, struct cpufreq_policy *,
+ down_write(&_T->rwsem), up_write(&_T->rwsem))
+
+DEFINE_GUARD(cpufreq_policy_read, struct cpufreq_policy *,
+ down_read(&_T->rwsem), up_read(&_T->rwsem))
+
/*
* Used for passing new cpufreq policy data to the cpufreq driver's ->verify()
* callback for sanitization. That callback is only expected to modify the min
@@ -235,9 +241,6 @@ void disable_cpufreq(void);
u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
-struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu);
-void cpufreq_cpu_release(struct cpufreq_policy *policy);
-int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
void refresh_frequency_limits(struct cpufreq_policy *policy);
void cpufreq_update_policy(unsigned int cpu);
void cpufreq_update_limits(unsigned int cpu);
@@ -395,7 +398,7 @@ struct cpufreq_driver {
unsigned int (*get)(unsigned int cpu);
/* Called to update policy limits on firmware notifications. */
- void (*update_limits)(unsigned int cpu);
+ void (*update_limits)(struct cpufreq_policy *policy);
/* optional */
int (*bios_limit)(int cpu, unsigned int *limit);
@@ -647,6 +650,15 @@ module_exit(__governor##_exit)
struct cpufreq_governor *cpufreq_default_governor(void);
struct cpufreq_governor *cpufreq_fallback_governor(void);
+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
+bool sugov_is_governor(struct cpufreq_policy *policy);
+#else
+static inline bool sugov_is_governor(struct cpufreq_policy *policy)
+{
+ return false;
+}
+#endif
+
static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy)
{
if (policy->max < policy->cur)
@@ -1225,6 +1237,8 @@ void cpufreq_generic_init(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table,
unsigned int transition_latency);
+bool cpufreq_ready_for_eas(const struct cpumask *cpu_mask);
+
static inline void cpufreq_register_em_with_opp(struct cpufreq_policy *policy)
{
dev_pm_opp_of_register_em(get_cpu_device(policy->cpu),
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 1987400000b4..df366ee15456 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -60,7 +60,6 @@ enum cpuhp_state {
/* PREPARE section invoked on a control CPU */
CPUHP_OFFLINE = 0,
CPUHP_CREATE_THREADS,
- CPUHP_PERF_PREPARE,
CPUHP_PERF_X86_PREPARE,
CPUHP_PERF_X86_AMD_UNCORE_PREP,
CPUHP_PERF_POWER,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index f9a868384083..6a569c7534db 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -179,6 +179,19 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask
}
/**
+ * cpumask_first_andnot - return the first cpu from *srcp1 & ~*srcp2
+ * @srcp1: the first input
+ * @srcp2: the second input
+ *
+ * Return: >= nr_cpu_ids if no such cpu found.
+ */
+static __always_inline
+unsigned int cpumask_first_andnot(const struct cpumask *srcp1, const struct cpumask *srcp2)
+{
+ return find_first_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
+}
+
+/**
* cpumask_first_and_and - return the first cpu from *srcp1 & *srcp2 & *srcp3
* @srcp1: the first input
* @srcp2: the second input
@@ -285,6 +298,25 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
}
/**
+ * cpumask_next_andnot - get the next cpu in *src1p & ~*src2p
+ * @n: the cpu prior to the place to search (i.e. return will be > @n)
+ * @src1p: the first cpumask pointer
+ * @src2p: the second cpumask pointer
+ *
+ * Return: >= nr_cpu_ids if no further cpus set in both.
+ */
+static __always_inline
+unsigned int cpumask_next_andnot(int n, const struct cpumask *src1p,
+ const struct cpumask *src2p)
+{
+ /* -1 is a legal arg here. */
+ if (n != -1)
+ cpumask_check(n);
+ return find_next_andnot_bit(cpumask_bits(src1p), cpumask_bits(src2p),
+ small_cpumask_bits, n + 1);
+}
+
+/**
* cpumask_next_and_wrap - get the next cpu in *src1p & *src2p, starting from
* @n+1. If nothing found, wrap around and start from
* the beginning
@@ -413,14 +445,18 @@ unsigned int cpumask_next_wrap(int n, const struct cpumask *src)
* @cpu: the cpu to ignore.
*
* Often used to find any cpu but smp_processor_id() in a mask.
+ * If @cpu == -1, the function is equivalent to cpumask_any().
* Return: >= nr_cpu_ids if no cpus set.
*/
static __always_inline
-unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
+unsigned int cpumask_any_but(const struct cpumask *mask, int cpu)
{
unsigned int i;
- cpumask_check(cpu);
+ /* -1 is a legal arg here. */
+ if (cpu != -1)
+ cpumask_check(cpu);
+
for_each_cpu(i, mask)
if (i != cpu)
break;
@@ -433,16 +469,20 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
* @mask2: the second input cpumask
* @cpu: the cpu to ignore
*
+ * If @cpu == -1, the function is equivalent to cpumask_any_and().
* Returns >= nr_cpu_ids if no cpus set.
*/
static __always_inline
unsigned int cpumask_any_and_but(const struct cpumask *mask1,
const struct cpumask *mask2,
- unsigned int cpu)
+ int cpu)
{
unsigned int i;
- cpumask_check(cpu);
+ /* -1 is a legal arg here. */
+ if (cpu != -1)
+ cpumask_check(cpu);
+
i = cpumask_first_and(mask1, mask2);
if (i != cpu)
return i;
@@ -451,6 +491,33 @@ unsigned int cpumask_any_and_but(const struct cpumask *mask1,
}
/**
+ * cpumask_any_andnot_but - pick an arbitrary cpu from *mask1 & ~*mask2, but not this one.
+ * @mask1: the first input cpumask
+ * @mask2: the second input cpumask
+ * @cpu: the cpu to ignore
+ *
+ * If @cpu == -1, the function returns the first matching cpu.
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+static __always_inline
+unsigned int cpumask_any_andnot_but(const struct cpumask *mask1,
+ const struct cpumask *mask2,
+ int cpu)
+{
+ unsigned int i;
+
+ /* -1 is a legal arg here. */
+ if (cpu != -1)
+ cpumask_check(cpu);
+
+ i = cpumask_first_andnot(mask1, mask2);
+ if (i != cpu)
+ return i;
+
+ return cpumask_next_andnot(cpu, mask1, mask2);
+}
+
+/**
* cpumask_nth - get the Nth cpu in a cpumask
* @srcp: the cpumask pointer
* @cpu: the Nth cpu to find, starting from 0
diff --git a/include/linux/crc16.h b/include/linux/crc16.h
index 9fa74529b317..b861d969b161 100644
--- a/include/linux/crc16.h
+++ b/include/linux/crc16.h
@@ -15,14 +15,7 @@
#include <linux/types.h>
-extern u16 const crc16_table[256];
-
-extern u16 crc16(u16 crc, const u8 *buffer, size_t len);
-
-static inline u16 crc16_byte(u16 crc, const u8 data)
-{
- return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff];
-}
+u16 crc16(u16 crc, const u8 *p, size_t len);
#endif /* __CRC16_H */
diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 69c2e8bb3782..569dc13f139f 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -1,7 +1,4 @@
-/*
- * crc32.h
- * See linux/lib/crc32.c for license and changes
- */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _LINUX_CRC32_H
#define _LINUX_CRC32_H
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 1e3809d28abd..b50f1954d1bb 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -14,8 +14,7 @@
#include <linux/completion.h>
#include <linux/errno.h>
-#include <linux/list.h>
-#include <linux/refcount.h>
+#include <linux/refcount_types.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -51,6 +50,15 @@
#define CRYPTO_ALG_NEED_FALLBACK 0x00000100
/*
+ * Set if the algorithm data structure should be duplicated into
+ * kmalloc memory before registration. This is useful for hardware
+ * that can be disconnected at will. Do not use this if the data
+ * structure is embedded into a bigger one. Duplicate the overall
+ * data structure in the driver in that case.
+ */
+#define CRYPTO_ALG_DUP_FIRST 0x00000200
+
+/*
* Set if the algorithm has passed automated run-time testing. Note that
* if there is no run-time testing for a given algorithm it is considered
* to have passed.
@@ -125,8 +133,10 @@
*/
#define CRYPTO_ALG_FIPS_INTERNAL 0x00020000
-/* Set if the algorithm supports request chains and virtual addresses. */
-#define CRYPTO_ALG_REQ_CHAIN 0x00040000
+/* Set if the algorithm supports virtual addresses. */
+#define CRYPTO_ALG_REQ_VIRT 0x00040000
+
+/* The high bits 0xff000000 are reserved for type-specific flags. */
/*
* Transform masks and values (for crt_flags).
@@ -179,7 +189,6 @@ struct crypto_async_request {
struct crypto_tfm *tfm;
u32 flags;
- int err;
};
/**
@@ -278,6 +287,7 @@ struct cipher_alg {
* to the alignmask of the algorithm being used, in order to
* avoid the API having to realign them. Note: the alignmask is
* not supported for hash algorithms and is always 0 for them.
+ * @cra_reqsize: Size of the request context for this algorithm.
* @cra_priority: Priority of this transformation implementation. In case
* multiple transformations with same @cra_name are available to
* the Crypto API, the kernel will use the one with highest
@@ -302,17 +312,8 @@ struct cipher_alg {
* by @cra_type and @cra_flags above, the associated structure must be
* filled with callbacks. This field might be empty. This is the case
* for ahash, shash.
- * @cra_init: Initialize the cryptographic transformation object. This function
- * is used to initialize the cryptographic transformation object.
- * This function is called only once at the instantiation time, right
- * after the transformation context was allocated. In case the
- * cryptographic hardware has some special requirements which need to
- * be handled by software, this function shall check for the precise
- * requirement of the transformation and put any software fallbacks
- * in place.
- * @cra_exit: Deinitialize the cryptographic transformation object. This is a
- * counterpart to @cra_init, used to remove various changes set in
- * @cra_init.
+ * @cra_init: Deprecated, do not use.
+ * @cra_exit: Deprecated, do not use.
* @cra_u.cipher: Union member which contains a single-block symmetric cipher
* definition. See @struct @cipher_alg.
* @cra_module: Owner of this transformation implementation. Set to THIS_MODULE
@@ -333,6 +334,7 @@ struct crypto_alg {
unsigned int cra_blocksize;
unsigned int cra_ctxsize;
unsigned int cra_alignmask;
+ unsigned int cra_reqsize;
int cra_priority;
refcount_t cra_refcnt;
@@ -409,9 +411,11 @@ struct crypto_tfm {
u32 crt_flags;
int node;
-
+
+ struct crypto_tfm *fb;
+
void (*exit)(struct crypto_tfm *tfm);
-
+
struct crypto_alg *__crt_alg;
void *__crt_ctx[] CRYPTO_MINALIGN_ATTR;
@@ -452,6 +456,11 @@ static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm)
return tfm->__crt_alg->cra_alignmask;
}
+static inline unsigned int crypto_tfm_alg_reqsize(struct crypto_tfm *tfm)
+{
+ return tfm->__crt_alg->cra_reqsize;
+}
+
static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm)
{
return tfm->crt_flags;
@@ -473,22 +482,44 @@ static inline unsigned int crypto_tfm_ctx_alignment(void)
return __alignof__(tfm->__crt_ctx);
}
-static inline void crypto_reqchain_init(struct crypto_async_request *req)
+static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm)
{
- req->err = -EINPROGRESS;
- INIT_LIST_HEAD(&req->list);
+ return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
}
-static inline void crypto_request_chain(struct crypto_async_request *req,
- struct crypto_async_request *head)
+static inline bool crypto_req_on_stack(struct crypto_async_request *req)
{
- req->err = -EINPROGRESS;
- list_add_tail(&req->list, &head->list);
+ return req->flags & CRYPTO_TFM_REQ_ON_STACK;
}
-static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm)
+static inline void crypto_request_set_callback(
+ struct crypto_async_request *req, u32 flags,
+ crypto_completion_t compl, void *data)
{
- return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+ u32 keep = CRYPTO_TFM_REQ_ON_STACK;
+
+ req->complete = compl;
+ req->data = data;
+ req->flags &= keep;
+ req->flags |= flags & ~keep;
+}
+
+static inline void crypto_request_set_tfm(struct crypto_async_request *req,
+ struct crypto_tfm *tfm)
+{
+ req->tfm = tfm;
+ req->flags &= ~CRYPTO_TFM_REQ_ON_STACK;
+}
+
+struct crypto_async_request *crypto_request_clone(
+ struct crypto_async_request *req, size_t total, gfp_t gfp);
+
+static inline void crypto_stack_request_init(struct crypto_async_request *req,
+ struct crypto_tfm *tfm)
+{
+ req->flags = 0;
+ crypto_request_set_tfm(req, tfm);
+ req->flags |= CRYPTO_TFM_REQ_ON_STACK;
}
#endif /* _LINUX_CRYPTO_H */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index e9f07e37dd6f..e29823c701ac 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -57,7 +57,8 @@ struct qstr {
};
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
-#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n))
+#define QSTR_LEN(n,l) (struct qstr)QSTR_INIT(n,l)
+#define QSTR(n) QSTR_LEN(n, strlen(n))
extern const struct qstr empty_name;
extern const struct qstr slash_name;
@@ -281,7 +282,6 @@ extern void d_exchange(struct dentry *, struct dentry *);
extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
extern struct dentry *d_lookup(const struct dentry *, const struct qstr *);
-extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
static inline unsigned d_count(const struct dentry *dentry)
{
diff --git a/include/linux/device.h b/include/linux/device.h
index 79e49fe494b7..4940db137fff 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -281,44 +281,6 @@ int __must_check device_create_bin_file(struct device *dev,
void device_remove_bin_file(struct device *dev,
const struct bin_attribute *attr);
-/* allows to add/remove a custom action to devres stack */
-int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data);
-
-/**
- * devm_remove_action() - removes previously added custom action
- * @dev: Device that owns the action
- * @action: Function implementing the action
- * @data: Pointer to data passed to @action implementation
- *
- * Removes instance of @action previously added by devm_add_action().
- * Both action and data should match one of the existing entries.
- */
-static inline
-void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
-{
- WARN_ON(devm_remove_action_nowarn(dev, action, data));
-}
-
-void devm_release_action(struct device *dev, void (*action)(void *), void *data);
-
-int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name);
-#define devm_add_action(dev, action, data) \
- __devm_add_action(dev, action, data, #action)
-
-static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *),
- void *data, const char *name)
-{
- int ret;
-
- ret = __devm_add_action(dev, action, data, name);
- if (ret)
- action(data);
-
- return ret;
-}
-#define devm_add_action_or_reset(dev, action, data) \
- __devm_add_action_or_reset(dev, action, data, #action)
-
/**
* devm_alloc_percpu - Resource-managed alloc_percpu
* @dev: Device to allocate per-cpu memory for
diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h
index 9b49f9915850..ae696d10faff 100644
--- a/include/linux/device/devres.h
+++ b/include/linux/device/devres.h
@@ -8,6 +8,7 @@
#include <linux/overflow.h>
#include <linux/stdarg.h>
#include <linux/types.h>
+#include <asm/bug.h>
struct device;
struct device_node;
@@ -126,4 +127,44 @@ void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int in
#endif
+/* allows to add/remove a custom action to devres stack */
+int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data);
+
+/**
+ * devm_remove_action() - removes previously added custom action
+ * @dev: Device that owns the action
+ * @action: Function implementing the action
+ * @data: Pointer to data passed to @action implementation
+ *
+ * Removes instance of @action previously added by devm_add_action().
+ * Both action and data should match one of the existing entries.
+ */
+static inline
+void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
+{
+ WARN_ON(devm_remove_action_nowarn(dev, action, data));
+}
+
+void devm_release_action(struct device *dev, void (*action)(void *), void *data);
+
+int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name);
+#define devm_add_action(dev, action, data) \
+ __devm_add_action(dev, action, data, #action)
+
+static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *),
+ void *data, const char *name)
+{
+ int ret;
+
+ ret = __devm_add_action(dev, action, data, name);
+ if (ret)
+ action(data);
+
+ return ret;
+}
+#define devm_add_action_or_reset(dev, action, data) \
+ __devm_add_action_or_reset(dev, action, data, #action)
+
+bool devm_is_action_added(struct device *dev, void (*action)(void *), void *data);
+
#endif /* _DEVICE_DEVRES_H_ */
diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h
index d02f32b7514e..0864773a57e8 100644
--- a/include/linux/device_cgroup.h
+++ b/include/linux/device_cgroup.h
@@ -18,15 +18,16 @@ static inline int devcgroup_inode_permission(struct inode *inode, int mask)
{
short type, access = 0;
+ if (likely(!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)))
+ return 0;
+
if (likely(!inode->i_rdev))
return 0;
if (S_ISBLK(inode->i_mode))
type = DEVCG_DEV_BLOCK;
- else if (S_ISCHR(inode->i_mode))
+ else /* S_ISCHR by the test above */
type = DEVCG_DEV_CHAR;
- else
- return 0;
if (mask & MAY_WRITE)
access |= DEVCG_ACC_WRITE;
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index e172522cd936..f48e5fb88bd5 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -434,58 +434,4 @@ static inline void debug_dma_dump_mappings(struct device *dev)
#endif /* CONFIG_DMA_API_DEBUG */
extern const struct dma_map_ops dma_dummy_ops;
-
-enum pci_p2pdma_map_type {
- /*
- * PCI_P2PDMA_MAP_UNKNOWN: Used internally for indicating the mapping
- * type hasn't been calculated yet. Functions that return this enum
- * never return this value.
- */
- PCI_P2PDMA_MAP_UNKNOWN = 0,
-
- /*
- * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will
- * traverse the host bridge and the host bridge is not in the
- * allowlist. DMA Mapping routines should return an error when
- * this is returned.
- */
- PCI_P2PDMA_MAP_NOT_SUPPORTED,
-
- /*
- * PCI_P2PDMA_BUS_ADDR: Indicates that two devices can talk to
- * each other directly through a PCI switch and the transaction will
- * not traverse the host bridge. Such a mapping should program
- * the DMA engine with PCI bus addresses.
- */
- PCI_P2PDMA_MAP_BUS_ADDR,
-
- /*
- * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk
- * to each other, but the transaction traverses a host bridge on the
- * allowlist. In this case, a normal mapping either with CPU physical
- * addresses (in the case of dma-direct) or IOVA addresses (in the
- * case of IOMMUs) should be used to program the DMA engine.
- */
- PCI_P2PDMA_MAP_THRU_HOST_BRIDGE,
-};
-
-struct pci_p2pdma_map_state {
- struct dev_pagemap *pgmap;
- int map;
- u64 bus_off;
-};
-
-#ifdef CONFIG_PCI_P2PDMA
-enum pci_p2pdma_map_type
-pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
- struct scatterlist *sg);
-#else /* CONFIG_PCI_P2PDMA */
-static inline enum pci_p2pdma_map_type
-pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
- struct scatterlist *sg)
-{
- return PCI_P2PDMA_MAP_NOT_SUPPORTED;
-}
-#endif /* CONFIG_PCI_P2PDMA */
-
#endif /* _LINUX_DMA_MAP_OPS_H */
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 85ab710ec0e7..55c03e5fe8cb 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -72,6 +72,22 @@
#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
+struct dma_iova_state {
+ dma_addr_t addr;
+ u64 __size;
+};
+
+/*
+ * Use the high bit to mark if we used swiotlb for one or more ranges.
+ */
+#define DMA_IOVA_USE_SWIOTLB (1ULL << 63)
+
+static inline size_t dma_iova_size(struct dma_iova_state *state)
+{
+ /* Casting is needed for 32-bits systems */
+ return (size_t)(state->__size & ~DMA_IOVA_USE_SWIOTLB);
+}
+
#ifdef CONFIG_DMA_API_DEBUG
void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
void debug_dma_map_single(struct device *dev, const void *addr,
@@ -277,6 +293,70 @@ static inline int dma_mmap_noncontiguous(struct device *dev,
}
#endif /* CONFIG_HAS_DMA */
+#ifdef CONFIG_IOMMU_DMA
+/**
+ * dma_use_iova - check if the IOVA API is used for this state
+ * @state: IOVA state
+ *
+ * Return %true if the DMA transfers uses the dma_iova_*() calls or %false if
+ * they can't be used.
+ */
+static inline bool dma_use_iova(struct dma_iova_state *state)
+{
+ return state->__size != 0;
+}
+
+bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t size);
+void dma_iova_free(struct device *dev, struct dma_iova_state *state);
+void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
+ size_t mapped_len, enum dma_data_direction dir,
+ unsigned long attrs);
+int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size);
+int dma_iova_link(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs);
+void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs);
+#else /* CONFIG_IOMMU_DMA */
+static inline bool dma_use_iova(struct dma_iova_state *state)
+{
+ return false;
+}
+static inline bool dma_iova_try_alloc(struct device *dev,
+ struct dma_iova_state *state, phys_addr_t phys, size_t size)
+{
+ return false;
+}
+static inline void dma_iova_free(struct device *dev,
+ struct dma_iova_state *state)
+{
+}
+static inline void dma_iova_destroy(struct device *dev,
+ struct dma_iova_state *state, size_t mapped_len,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+}
+static inline int dma_iova_sync(struct device *dev,
+ struct dma_iova_state *state, size_t offset, size_t size)
+{
+ return -EOPNOTSUPP;
+}
+static inline int dma_iova_link(struct device *dev,
+ struct dma_iova_state *state, phys_addr_t phys, size_t offset,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ return -EOPNOTSUPP;
+}
+static inline void dma_iova_unlink(struct device *dev,
+ struct dma_iova_state *state, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+}
+#endif /* CONFIG_IOMMU_DMA */
+
#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir);
@@ -326,6 +406,7 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false;
}
+bool dma_need_unmap(struct device *dev);
#else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */
static inline bool dma_dev_need_sync(const struct device *dev)
{
@@ -351,6 +432,10 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
return false;
}
+static inline bool dma_need_unmap(struct device *dev)
+{
+ return false;
+}
#endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */
struct page *dma_alloc_pages(struct device *dev, size_t size,
diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index f632ecfb4238..06c4de602b2f 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -11,6 +11,7 @@
#ifndef LINUX_DMAPOOL_H
#define LINUX_DMAPOOL_H
+#include <linux/nodemask_types.h>
#include <linux/scatterlist.h>
#include <asm/io.h>
@@ -18,8 +19,8 @@ struct device;
#ifdef CONFIG_HAS_DMA
-struct dma_pool *dma_pool_create(const char *name, struct device *dev,
- size_t size, size_t align, size_t allocation);
+struct dma_pool *dma_pool_create_node(const char *name, struct device *dev,
+ size_t size, size_t align, size_t boundary, int node);
void dma_pool_destroy(struct dma_pool *pool);
@@ -35,9 +36,12 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
void dmam_pool_destroy(struct dma_pool *pool);
#else /* !CONFIG_HAS_DMA */
-static inline struct dma_pool *dma_pool_create(const char *name,
- struct device *dev, size_t size, size_t align, size_t allocation)
-{ return NULL; }
+static inline struct dma_pool *dma_pool_create_node(const char *name,
+ struct device *dev, size_t size, size_t align, size_t boundary,
+ int node)
+{
+ return NULL;
+}
static inline void dma_pool_destroy(struct dma_pool *pool) { }
static inline void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
dma_addr_t *handle) { return NULL; }
@@ -49,6 +53,13 @@ static inline struct dma_pool *dmam_pool_create(const char *name,
static inline void dmam_pool_destroy(struct dma_pool *pool) { }
#endif /* !CONFIG_HAS_DMA */
+static inline struct dma_pool *dma_pool_create(const char *name,
+ struct device *dev, size_t size, size_t align, size_t boundary)
+{
+ return dma_pool_create_node(name, dev, size, align, boundary,
+ NUMA_NO_NODE);
+}
+
static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
dma_addr_t *handle)
{
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index d8eabbf86a5b..7fa1eb3cc823 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -179,6 +179,7 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
int em_dev_update_chip_binning(struct device *dev);
int em_update_performance_limits(struct em_perf_domain *pd,
unsigned long freq_min_khz, unsigned long freq_max_khz);
+void em_adjust_cpu_capacity(unsigned int cpu);
void em_rebuild_sched_domains(void);
/**
@@ -403,6 +404,7 @@ int em_update_performance_limits(struct em_perf_domain *pd,
{
return -EINVAL;
}
+static inline void em_adjust_cpu_capacity(unsigned int cpu) {}
static inline void em_rebuild_sched_domains(void) {}
#endif
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index fc61d0205c97..f94f3fdf15fc 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -14,6 +14,7 @@
#include <linux/kmsan.h>
#include <asm/entry-common.h>
+#include <asm/syscall.h>
/*
* Define dummy _TIF work flags if not defined by the architecture or for
@@ -367,6 +368,15 @@ static __always_inline void exit_to_user_mode(void)
}
/**
+ * syscall_exit_work - Handle work before returning to user mode
+ * @regs: Pointer to current pt_regs
+ * @work: Current thread syscall work
+ *
+ * Do one-time syscall specific work.
+ */
+void syscall_exit_work(struct pt_regs *regs, unsigned long work);
+
+/**
* syscall_exit_to_user_mode_work - Handle work before returning to user mode
* @regs: Pointer to currents pt_regs
*
@@ -379,7 +389,30 @@ static __always_inline void exit_to_user_mode(void)
* make the final state transitions. Interrupts must stay disabled between
* return from this function and the invocation of exit_to_user_mode().
*/
-void syscall_exit_to_user_mode_work(struct pt_regs *regs);
+static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
+{
+ unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
+ unsigned long nr = syscall_get_nr(current, regs);
+
+ CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
+
+ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+ if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
+ local_irq_enable();
+ }
+
+ rseq_syscall(regs);
+
+ /*
+ * Do one-time syscall specific work. If these work items are
+ * enabled, we want to run them exactly once per syscall exit with
+ * interrupts enabled.
+ */
+ if (unlikely(work & SYSCALL_WORK_EXIT))
+ syscall_exit_work(regs, work);
+ local_irq_disable_exit_to_user();
+ exit_to_user_mode_prepare(regs);
+}
/**
* syscall_exit_to_user_mode - Handle work before returning to user mode
@@ -410,7 +443,13 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs);
* exit_to_user_mode(). This function is preferred unless there is a
* compelling architectural reason to use the separate functions.
*/
-void syscall_exit_to_user_mode(struct pt_regs *regs);
+static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
+{
+ instrumentation_begin();
+ syscall_exit_to_user_mode_work(regs);
+ instrumentation_end();
+ exit_to_user_mode();
+}
/**
* irqentry_enter_from_user_mode - Establish state before invoking the irq handler
diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 65655a5d1be2..ca42d5e46ccc 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/moduleloader.h>
+#include <linux/cleanup.h>
#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
!defined(CONFIG_KASAN_VMALLOC)
@@ -53,7 +54,7 @@ enum execmem_range_flags {
EXECMEM_ROX_CACHE = (1 << 1),
};
-#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX
+#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM)
/**
* execmem_fill_trapping_insns - set memory to contain instructions that
* will trap
@@ -93,9 +94,15 @@ int execmem_make_temp_rw(void *ptr, size_t size);
* Return: 0 on success or negative error code on failure.
*/
int execmem_restore_rox(void *ptr, size_t size);
+
+/*
+ * Called from mark_readonly(), where the system transitions to ROX.
+ */
+void execmem_cache_make_ro(void);
#else
static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; }
static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; }
+static inline void execmem_cache_make_ro(void) { }
#endif
/**
@@ -170,6 +177,8 @@ void *execmem_alloc(enum execmem_type type, size_t size);
*/
void execmem_free(void *ptr);
+DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T));
+
#ifdef CONFIG_MMU
/**
* execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory
diff --git a/include/linux/file.h b/include/linux/file.h
index 302f11355b10..af1768d934a0 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -59,7 +59,7 @@ static inline struct fd CLONED_FD(struct file *f)
static inline void fdput(struct fd fd)
{
- if (fd.word & FDPUT_FPUT)
+ if (unlikely(fd.word & FDPUT_FPUT))
fput(fd_file(fd));
}
diff --git a/include/linux/find.h b/include/linux/find.h
index 68685714bc18..5a2c267ea7f9 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -29,6 +29,8 @@ unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsign
unsigned long n);
extern unsigned long _find_first_and_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size);
+unsigned long _find_first_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long size);
unsigned long _find_first_and_and_bit(const unsigned long *addr1, const unsigned long *addr2,
const unsigned long *addr3, unsigned long size);
extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
@@ -348,6 +350,29 @@ unsigned long find_first_and_bit(const unsigned long *addr1,
#endif
/**
+ * find_first_andnot_bit - find the first bit set in 1st memory region and unset in 2nd
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the first set bit
+ * If no bits are set, returns >= @size.
+ */
+static __always_inline
+unsigned long find_first_andnot_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0);
+
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_first_andnot_bit(addr1, addr2, size);
+}
+
+/**
* find_first_and_and_bit - find the first set bit in 3 memory regions
* @addr1: The first address to base the search on
* @addr2: The second address to base the search on
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..0db87f8e676c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -408,6 +408,7 @@ struct kiocb {
void *private;
int ki_flags;
u16 ki_ioprio; /* See linux/ioprio.h */
+ u8 ki_write_stream;
union {
/*
* Only used for async buffered reads, where it denotes the
@@ -433,7 +434,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
}
struct address_space_operations {
- int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*read_folio)(struct file *, struct folio *);
/* Write back some dirty pages from this mapping. */
@@ -867,6 +867,11 @@ static inline void inode_lock(struct inode *inode)
down_write(&inode->i_rwsem);
}
+static inline __must_check int inode_lock_killable(struct inode *inode)
+{
+ return down_write_killable(&inode->i_rwsem);
+}
+
static inline void inode_unlock(struct inode *inode)
{
up_write(&inode->i_rwsem);
@@ -877,6 +882,11 @@ static inline void inode_lock_shared(struct inode *inode)
down_read(&inode->i_rwsem);
}
+static inline __must_check int inode_lock_shared_killable(struct inode *inode)
+{
+ return down_read_killable(&inode->i_rwsem);
+}
+
static inline void inode_unlock_shared(struct inode *inode)
{
up_read(&inode->i_rwsem);
@@ -1307,6 +1317,7 @@ struct sb_writers {
unsigned short frozen; /* Is sb frozen? */
int freeze_kcount; /* How many kernel freeze requests? */
int freeze_ucount; /* How many userspace freeze requests? */
+ const void *freeze_owner; /* Owner of the freeze */
struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
};
@@ -1780,7 +1791,7 @@ static inline void __sb_end_write(struct super_block *sb, int level)
static inline void __sb_start_write(struct super_block *sb, int level)
{
- percpu_down_read(sb->s_writers.rw_sem + level - 1);
+ percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true);
}
static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
@@ -2071,8 +2082,18 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
struct dir_context {
filldir_t actor;
loff_t pos;
+ /*
+ * Filesystems MUST NOT MODIFY count, but may use as a hint:
+ * 0 unknown
+ * > 0 space in buffer (assume at least one entry)
+ * INT_MAX unlimited
+ */
+ int count;
};
+/* If OR-ed with d_type, pending signals are not checked */
+#define FILLDIR_FLAG_NOINTR 0x1000
+
/*
* These flags let !MMU mmap() govern direct device mapping vs immediate
* copying more easily for MAP_PRIVATE, especially for ROM filesystems.
@@ -2186,7 +2207,7 @@ struct file_operations {
/* Supports asynchronous lock callbacks */
#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
/* File system supports uncached read/write buffered IO */
-#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7))
+#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
@@ -2269,6 +2290,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
* @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem
* @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem
* @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed
+ * @FREEZE_EXCL: a freeze that can only be undone by the owner
*
* Indicate who the owner of the freeze or thaw request is and whether
* the freeze needs to be exclusive or can nest.
@@ -2282,6 +2304,7 @@ enum freeze_holder {
FREEZE_HOLDER_KERNEL = (1U << 0),
FREEZE_HOLDER_USERSPACE = (1U << 1),
FREEZE_MAY_NEST = (1U << 2),
+ FREEZE_EXCL = (1U << 3),
};
struct super_operations {
@@ -2295,9 +2318,9 @@ struct super_operations {
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
- int (*freeze_super) (struct super_block *, enum freeze_holder who);
+ int (*freeze_super) (struct super_block *, enum freeze_holder who, const void *owner);
int (*freeze_fs) (struct super_block *);
- int (*thaw_super) (struct super_block *, enum freeze_holder who);
+ int (*thaw_super) (struct super_block *, enum freeze_holder who, const void *owner);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
@@ -2344,6 +2367,7 @@ struct super_operations {
#define S_CASEFOLD (1 << 15) /* Casefolded file */
#define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */
#define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
+#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
@@ -2400,6 +2424,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
(inode)->i_rdev == WHITEOUT_DEV)
+#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE)
static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap,
struct inode *inode)
@@ -2705,8 +2730,10 @@ extern int unregister_filesystem(struct file_system_type *);
extern int vfs_statfs(const struct path *, struct kstatfs *);
extern int user_statfs(const char __user *, struct kstatfs *);
extern int fd_statfs(int, struct kstatfs *);
-int freeze_super(struct super_block *super, enum freeze_holder who);
-int thaw_super(struct super_block *super, enum freeze_holder who);
+int freeze_super(struct super_block *super, enum freeze_holder who,
+ const void *freeze_owner);
+int thaw_super(struct super_block *super, enum freeze_holder who,
+ const void *freeze_owner);
extern __printf(2, 3)
int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
extern int super_setup_bdi(struct super_block *sb);
@@ -3475,7 +3502,8 @@ void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
void generic_fill_statx_atomic_writes(struct kstat *stat,
unsigned int unit_min,
- unsigned int unit_max);
+ unsigned int unit_max,
+ unsigned int unit_max_opt);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
void __inode_add_bytes(struct inode *inode, loff_t bytes);
@@ -3515,9 +3543,11 @@ extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
extern void drop_super(struct super_block *sb);
extern void drop_super_exclusive(struct super_block *sb);
-extern void iterate_supers(void (*)(struct super_block *, void *), void *);
+extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg);
extern void iterate_supers_type(struct file_system_type *,
void (*)(struct super_block *, void *), void *);
+void filesystems_freeze(void);
+void filesystems_thaw(void);
extern int dcache_dir_open(struct inode *, struct file *);
extern int dcache_dir_close(struct inode *, struct file *);
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index 53e566efd5fd..5a0e897cae80 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -87,14 +87,9 @@ extern int lookup_constant(const struct constant_table tbl[], const char *name,
extern const struct constant_table bool_names[];
#ifdef CONFIG_VALIDATE_FS_PARSER
-extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
- int low, int high, int special);
extern bool fs_validate_description(const char *name,
const struct fs_parameter_spec *desc);
#else
-static inline bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
- int low, int high, int special)
-{ return true; }
static inline bool fs_validate_description(const char *name,
const struct fs_parameter_spec *desc)
{ return true; }
@@ -125,8 +120,6 @@ static inline bool fs_validate_description(const char *name,
#define fsparam_u32(NAME, OPT) __fsparam(fs_param_is_u32, NAME, OPT, 0, NULL)
#define fsparam_u32oct(NAME, OPT) \
__fsparam(fs_param_is_u32, NAME, OPT, 0, (void *)8)
-#define fsparam_u32hex(NAME, OPT) \
- __fsparam(fs_param_is_u32_hex, NAME, OPT, 0, (void *)16)
#define fsparam_s32(NAME, OPT) __fsparam(fs_param_is_s32, NAME, OPT, 0, NULL)
#define fsparam_u64(NAME, OPT) __fsparam(fs_param_is_u64, NAME, OPT, 0, NULL)
#define fsparam_enum(NAME, OPT, array) __fsparam(fs_param_is_enum, NAME, OPT, 0, array)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fbabc3d848b3..95851a6fb942 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -569,8 +569,6 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
#ifdef CONFIG_STACK_TRACER
-extern int stack_tracer_enabled;
-
int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos);
@@ -1298,16 +1296,9 @@ static inline void unpause_graph_tracing(void) { }
#ifdef CONFIG_TRACING
enum ftrace_dump_mode;
-#define MAX_TRACER_SIZE 100
-extern char ftrace_dump_on_oops[];
extern int ftrace_dump_on_oops_enabled(void);
-extern int tracepoint_printk;
extern void disable_trace_on_warning(void);
-extern int __disable_trace_on_warning;
-
-int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
#else /* CONFIG_TRACING */
static inline void disable_trace_on_warning(void) { }
diff --git a/include/linux/futex.h b/include/linux/futex.h
index b70df27d7e85..005b040c4791 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -4,11 +4,11 @@
#include <linux/sched.h>
#include <linux/ktime.h>
+#include <linux/mm_types.h>
#include <uapi/linux/futex.h>
struct inode;
-struct mm_struct;
struct task_struct;
/*
@@ -34,6 +34,7 @@ union futex_key {
u64 i_seq;
unsigned long pgoff;
unsigned int offset;
+ /* unsigned int node; */
} shared;
struct {
union {
@@ -42,11 +43,13 @@ union futex_key {
};
unsigned long address;
unsigned int offset;
+ /* unsigned int node; */
} private;
struct {
u64 ptr;
unsigned long word;
unsigned int offset;
+ unsigned int node; /* NOT hashed! */
} both;
};
@@ -77,7 +80,25 @@ void futex_exec_release(struct task_struct *tsk);
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
-#else
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4);
+
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+int futex_hash_allocate_default(void);
+void futex_hash_free(struct mm_struct *mm);
+
+static inline void futex_mm_init(struct mm_struct *mm)
+{
+ RCU_INIT_POINTER(mm->futex_phash, NULL);
+ mutex_init(&mm->futex_hash_lock);
+}
+
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
+static inline int futex_hash_allocate_default(void) { return 0; }
+static inline void futex_hash_free(struct mm_struct *mm) { }
+static inline void futex_mm_init(struct mm_struct *mm) { }
+#endif /* CONFIG_FUTEX_PRIVATE_HASH */
+
+#else /* !CONFIG_FUTEX */
static inline void futex_init_task(struct task_struct *tsk) { }
static inline void futex_exit_recursive(struct task_struct *tsk) { }
static inline void futex_exit_release(struct task_struct *tsk) { }
@@ -88,6 +109,17 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
{
return -EINVAL;
}
+static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
+{
+ return -EINVAL;
+}
+static inline int futex_hash_allocate_default(void)
+{
+ return 0;
+}
+static inline void futex_hash_free(struct mm_struct *mm) { }
+static inline void futex_mm_init(struct mm_struct *mm) { }
+
#endif
#endif
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 8adc8e9cb4a7..f0b1982da0cc 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -181,6 +181,8 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev,
enum gpiod_flags flags,
const char *label);
+bool gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other);
+
#else /* CONFIG_GPIOLIB */
#include <linux/bug.h>
@@ -548,6 +550,13 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev,
return ERR_PTR(-ENOSYS);
}
+static inline bool
+gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other)
+{
+ WARN_ON(desc || other);
+ return false;
+}
+
#endif /* CONFIG_GPIOLIB */
#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_HTE)
@@ -588,7 +597,7 @@ struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev,
struct acpi_gpio_params {
unsigned int crs_entry_index;
- unsigned int line_index;
+ unsigned short line_index;
bool active_low;
};
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 4c0294a9104d..b53233051bee 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -287,8 +287,9 @@ struct gpio_irq_chip {
/**
* @first:
*
- * Required for static IRQ allocation. If set, irq_domain_add_simple()
- * will allocate and map all IRQs during initialization.
+ * Required for static IRQ allocation. If set,
+ * irq_domain_create_simple() will allocate and map all IRQs
+ * during initialization.
*/
unsigned int first;
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 5c6bea81a90e..c698f8415675 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -461,7 +461,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio,
const char *from = kmap_local_folio(folio, offset);
size_t chunk = len;
- if (folio_test_highmem(folio) &&
+ if (folio_test_partial_kmap(folio) &&
chunk > PAGE_SIZE - offset_in_page(offset))
chunk = PAGE_SIZE - offset_in_page(offset);
memcpy(to, from, chunk);
@@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset,
char *to = kmap_local_folio(folio, offset);
size_t chunk = len;
- if (folio_test_highmem(folio) &&
+ if (folio_test_partial_kmap(folio) &&
chunk > PAGE_SIZE - offset_in_page(offset))
chunk = PAGE_SIZE - offset_in_page(offset);
memcpy(to, from, chunk);
@@ -522,7 +522,7 @@ static inline __must_check void *folio_zero_tail(struct folio *folio,
{
size_t len = folio_size(folio) - offset;
- if (folio_test_highmem(folio)) {
+ if (folio_test_partial_kmap(folio)) {
size_t max = PAGE_SIZE - offset_in_page(offset);
while (len > max) {
@@ -560,7 +560,7 @@ static inline void folio_fill_tail(struct folio *folio, size_t offset,
VM_BUG_ON(offset + len > folio_size(folio));
- if (folio_test_highmem(folio)) {
+ if (folio_test_partial_kmap(folio)) {
size_t max = PAGE_SIZE - offset_in_page(offset);
while (len > max) {
@@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio,
size_t offset = offset_in_folio(folio, pos);
char *from = kmap_local_folio(folio, offset);
- if (folio_test_highmem(folio)) {
+ if (folio_test_partial_kmap(folio)) {
offset = offset_in_page(offset);
len = min_t(size_t, len, PAGE_SIZE - offset);
} else
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8f3ac832ee7f..4861a7e304bb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index d6ffe01962c2..b52ac40d5830 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1167,13 +1167,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel,
enum vmbus_packet_type type,
u32 flags);
-extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
- struct hv_page_buffer pagebuffers[],
- u32 pagecount,
- void *buffer,
- u32 bufferlen,
- u64 requestid);
-
extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
struct vmbus_packet_mpb_array *mpb,
u32 desc_size,
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c782a74d2a30..51b6484c0493 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -140,7 +140,7 @@ extern irqreturn_t no_action(int cpl, void *dev_id);
/*
* If a (PCI) device interrupt is not connected we set dev->irq to
* IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we
- * can distingiush that case from other error returns.
+ * can distinguish that case from other error returns.
*
* 0x80000000 is guaranteed to be outside the available range of interrupts
* and easy to distinguish from other possible incorrect values.
diff --git a/include/linux/io.h b/include/linux/io.h
index 6a6bc4d46d0a..0642c7ee41db 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -183,4 +183,25 @@ static inline void arch_io_free_memtype_wc(resource_size_t base,
int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start,
resource_size_t size);
+#ifdef CONFIG_STRICT_DEVMEM
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+ u64 from = ((u64)pfn) << PAGE_SHIFT;
+ u64 to = from + size;
+ u64 cursor = from;
+
+ while (cursor < to) {
+ if (!devmem_is_allowed(pfn))
+ return 0;
+ cursor += PAGE_SIZE;
+ pfn++;
+ }
+ return 1;
+}
+#else
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+ return 1;
+}
+#endif
#endif /* _LINUX_IO_H */
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 0634a3de1782..53408124c1e5 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -140,6 +140,15 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur
return cmd_to_io_kiocb(cmd)->async_data;
}
+/*
+ * Return uring_cmd's context reference as its context handle for driver to
+ * track per-context resource, such as registered kernel IO buffer
+ */
+static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd)
+{
+ return cmd_to_io_kiocb(cmd)->ctx;
+}
+
int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
void (*release)(void *), unsigned int index,
unsigned int issue_flags);
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index b44d201520d8..2922635986f5 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -40,8 +40,6 @@ enum io_uring_cmd_flags {
IO_URING_F_TASK_DEAD = (1 << 13),
};
-struct io_zcrx_ifq;
-
struct io_wq_work_node {
struct io_wq_work_node *next;
};
@@ -343,7 +341,6 @@ struct io_ring_ctx {
unsigned cached_cq_tail;
unsigned cq_entries;
struct io_ev_fd __rcu *io_ev_fd;
- unsigned cq_extra;
void *cq_wait_arg;
size_t cq_wait_size;
@@ -394,7 +391,8 @@ struct io_ring_ctx {
struct wait_queue_head poll_wq;
struct io_restriction restrictions;
- struct io_zcrx_ifq *ifq;
+ /* Stores zcrx object pointers of type struct io_zcrx_ifq */
+ struct xarray zcrx_ctxs;
u32 pers_next;
struct xarray personalities;
@@ -418,6 +416,7 @@ struct io_ring_ctx {
struct callback_head poll_wq_task_work;
struct list_head defer_list;
+ unsigned nr_drained;
struct io_alloc_cache msg_cache;
spinlock_t msg_lock;
@@ -436,6 +435,7 @@ struct io_ring_ctx {
/* protected by ->completion_lock */
unsigned evfd_last_cq_tail;
+ unsigned nr_req_allocated;
/*
* Protection for resize vs mmap races - both the mmap and resize
@@ -448,8 +448,6 @@ struct io_ring_ctx {
struct io_mapped_region ring_region;
/* used for optimised request parameter and wait argument passing */
struct io_mapped_region param_region;
- /* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */
- struct io_mapped_region zcrx_region;
};
/*
@@ -653,8 +651,7 @@ struct io_kiocb {
u8 iopoll_completed;
/*
* Can be either a fixed buffer index, or used with provided buffers.
- * For the latter, before issue it points to the buffer group ID,
- * and after selection it points to the buffer ID itself.
+ * For the latter, it points to the selected buffer ID.
*/
u16 buf_index;
@@ -713,7 +710,7 @@ struct io_kiocb {
const struct cred *creds;
struct io_wq_work work;
- struct {
+ struct io_big_cqe {
u64 extra1;
u64 extra2;
} big_cqe;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3a8d35d41fda..15cdadace993 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -872,6 +872,10 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+int iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+ size_t size);
extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size);
extern size_t iommu_unmap_fast(struct iommu_domain *domain,
diff --git a/include/linux/irq.h b/include/linux/irq.h
index dd5df1e2d032..1d6b606a81ef 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -597,7 +597,6 @@ enum {
struct irqaction;
extern int setup_percpu_irq(unsigned int irq, struct irqaction *new);
-extern void remove_percpu_irq(unsigned int irq, struct irqaction *act);
#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE
extern void irq_cpu_online(void);
@@ -700,7 +699,7 @@ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret);
extern int noirqdebug_setup(char *str);
/* Checks whether the interrupt can be requested by request_irq(): */
-extern int can_request_irq(unsigned int irq, unsigned long irqflags);
+extern bool can_request_irq(unsigned int irq, unsigned long irqflags);
/* Dummy irq-chip implementations: */
extern struct irq_chip no_irq_chip;
@@ -1222,31 +1221,6 @@ static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
#define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX)
-#ifdef CONFIG_SMP
-static inline void irq_gc_lock(struct irq_chip_generic *gc)
-{
- raw_spin_lock(&gc->lock);
-}
-
-static inline void irq_gc_unlock(struct irq_chip_generic *gc)
-{
- raw_spin_unlock(&gc->lock);
-}
-#else
-static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
-static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
-#endif
-
-/*
- * The irqsave variants are for usage in non interrupt code. Do not use
- * them in irq_chip callbacks. Use irq_gc_lock() instead.
- */
-#define irq_gc_lock_irqsave(gc, flags) \
- raw_spin_lock_irqsave(&(gc)->lock, flags)
-
-#define irq_gc_unlock_irqrestore(gc, flags) \
- raw_spin_unlock_irqrestore(&(gc)->lock, flags)
-
static inline void irq_reg_writel(struct irq_chip_generic *gc,
u32 val, int reg_offset)
{
diff --git a/drivers/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h
index 681ceabb7bc7..dd8d1d138544 100644
--- a/drivers/irqchip/irq-msi-lib.h
+++ b/include/linux/irqchip/irq-msi-lib.h
@@ -2,8 +2,8 @@
// Copyright (C) 2022 Linutronix GmbH
// Copyright (C) 2022 Intel
-#ifndef _DRIVERS_IRQCHIP_IRQ_MSI_LIB_H
-#define _DRIVERS_IRQCHIP_IRQ_MSI_LIB_H
+#ifndef _IRQCHIP_IRQ_MSI_LIB_H
+#define _IRQCHIP_IRQ_MSI_LIB_H
#include <linux/bits.h>
#include <linux/irqdomain.h>
@@ -24,4 +24,4 @@ bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
struct irq_domain *real_parent,
struct msi_domain_info *info);
-#endif /* _DRIVERS_IRQCHIP_IRQ_MSI_LIB_H */
+#endif /* _IRQCHIP_IRQ_MSI_LIB_H */
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index bb7111105296..7387d183029b 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -1,30 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * irq_domain - IRQ translation domains
+ * irq_domain - IRQ Translation Domains
*
- * Translation infrastructure between hw and linux irq numbers. This is
- * helpful for interrupt controllers to implement mapping between hardware
- * irq numbers and the Linux irq number space.
- *
- * irq_domains also have hooks for translating device tree or other
- * firmware interrupt representations into a hardware irq number that
- * can be mapped back to a Linux irq number without any extra platform
- * support code.
- *
- * Interrupt controller "domain" data structure. This could be defined as a
- * irq domain controller. That is, it handles the mapping between hardware
- * and virtual interrupt numbers for a given interrupt domain. The domain
- * structure is generally created by the PIC code for a given PIC instance
- * (though a domain can cover more than one PIC if they have a flat number
- * model). It's the domain callbacks that are responsible for setting the
- * irq_chip on a given irq_desc after it's been mapped.
- *
- * The host code and data structures use a fwnode_handle pointer to
- * identify the domain. In some cases, and in order to preserve source
- * code compatibility, this fwnode pointer is "upgraded" to a DT
- * device_node. For those firmware infrastructures that do not provide
- * a unique identifier for an interrupt controller, the irq_domain
- * code offers a fwnode allocator.
+ * See Documentation/core-api/irq/irq-domain.rst for the details.
*/
#ifndef _LINUX_IRQDOMAIN_H
@@ -61,9 +39,9 @@ struct msi_parent_ops;
* pass a device-specific description of an interrupt.
*/
struct irq_fwspec {
- struct fwnode_handle *fwnode;
- int param_count;
- u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS];
+ struct fwnode_handle *fwnode;
+ int param_count;
+ u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS];
};
/* Conversion function from of_phandle_args fields to fwspec */
@@ -72,26 +50,26 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
/**
* struct irq_domain_ops - Methods for irq_domain objects
- * @match: Match an interrupt controller device node to a domain, returns
- * 1 on a match
- * @select: Match an interrupt controller fw specification. It is more generic
- * than @match as it receives a complete struct irq_fwspec. Therefore,
- * @select is preferred if provided. Returns 1 on a match.
- * @map: Create or update a mapping between a virtual irq number and a hw
- * irq number. This is called only once for a given mapping.
- * @unmap: Dispose of such a mapping
- * @xlate: Given a device tree node and interrupt specifier, decode
- * the hardware irq number and linux irq type value.
- * @alloc: Allocate @nr_irqs interrupts starting from @virq.
- * @free: Free @nr_irqs interrupts starting from @virq.
- * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only
- * reserve the vector. If unset, assign the vector (called from
- * request_irq()).
- * @deactivate: Disarm one interrupt (@irqd).
- * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and
- * linux irq type value (@out_type). This is a generalised @xlate
- * (over struct irq_fwspec) and is preferred if provided.
- * @debug_show: For domains to show specific data for an interrupt in debugfs.
+ * @match: Match an interrupt controller device node to a domain, returns
+ * 1 on a match
+ * @select: Match an interrupt controller fw specification. It is more generic
+ * than @match as it receives a complete struct irq_fwspec. Therefore,
+ * @select is preferred if provided. Returns 1 on a match.
+ * @map: Create or update a mapping between a virtual irq number and a hw
+ * irq number. This is called only once for a given mapping.
+ * @unmap: Dispose of such a mapping
+ * @xlate: Given a device tree node and interrupt specifier, decode
+ * the hardware irq number and linux irq type value.
+ * @alloc: Allocate @nr_irqs interrupts starting from @virq.
+ * @free: Free @nr_irqs interrupts starting from @virq.
+ * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only
+ * reserve the vector. If unset, assign the vector (called from
+ * request_irq()).
+ * @deactivate: Disarm one interrupt (@irqd).
+ * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and
+ * linux irq type value (@out_type). This is a generalised @xlate
+ * (over struct irq_fwspec) and is preferred if provided.
+ * @debug_show: For domains to show specific data for an interrupt in debugfs.
*
* Functions below are provided by the driver and called whenever a new mapping
* is created or an old mapping is disposed. The driver can then proceed to
@@ -99,29 +77,29 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
* to setup the irq_desc when returning from map().
*/
struct irq_domain_ops {
- int (*match)(struct irq_domain *d, struct device_node *node,
- enum irq_domain_bus_token bus_token);
- int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec,
- enum irq_domain_bus_token bus_token);
- int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw);
- void (*unmap)(struct irq_domain *d, unsigned int virq);
- int (*xlate)(struct irq_domain *d, struct device_node *node,
- const u32 *intspec, unsigned int intsize,
- unsigned long *out_hwirq, unsigned int *out_type);
+ int (*match)(struct irq_domain *d, struct device_node *node,
+ enum irq_domain_bus_token bus_token);
+ int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token);
+ int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw);
+ void (*unmap)(struct irq_domain *d, unsigned int virq);
+ int (*xlate)(struct irq_domain *d, struct device_node *node,
+ const u32 *intspec, unsigned int intsize,
+ unsigned long *out_hwirq, unsigned int *out_type);
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
/* extended V2 interfaces to support hierarchy irq_domains */
- int (*alloc)(struct irq_domain *d, unsigned int virq,
- unsigned int nr_irqs, void *arg);
- void (*free)(struct irq_domain *d, unsigned int virq,
- unsigned int nr_irqs);
- int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve);
- void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
- int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
- unsigned long *out_hwirq, unsigned int *out_type);
+ int (*alloc)(struct irq_domain *d, unsigned int virq,
+ unsigned int nr_irqs, void *arg);
+ void (*free)(struct irq_domain *d, unsigned int virq,
+ unsigned int nr_irqs);
+ int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve);
+ void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
+ int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
+ unsigned long *out_hwirq, unsigned int *out_type);
#endif
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
- void (*debug_show)(struct seq_file *m, struct irq_domain *d,
- struct irq_data *irqd, int ind);
+ void (*debug_show)(struct seq_file *m, struct irq_domain *d,
+ struct irq_data *irqd, int ind);
#endif
};
@@ -231,6 +209,9 @@ enum {
/* Irq domain must destroy generic chips when removed */
IRQ_DOMAIN_FLAG_DESTROY_GC = (1 << 10),
+ /* Address and data pair is mutable when irq_set_affinity() */
+ IRQ_DOMAIN_FLAG_MSI_IMMUTABLE = (1 << 11),
+
/*
* Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
* for implementation specific purposes and ignored by the
@@ -244,8 +225,7 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
return to_of_node(d->fwnode);
}
-static inline void irq_domain_set_pm_device(struct irq_domain *d,
- struct device *dev)
+static inline void irq_domain_set_pm_device(struct irq_domain *d, struct device *dev)
{
if (d)
d->pm_dev = dev;
@@ -261,14 +241,12 @@ enum {
IRQCHIP_FWNODE_NAMED_ID,
};
-static inline
-struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name)
+static inline struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name)
{
return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL);
}
-static inline
-struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id)
+static inline struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id)
{
return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name,
NULL);
@@ -281,6 +259,8 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
+DEFINE_FREE(irq_domain_free_fwnode, struct fwnode_handle *, if (_T) irq_domain_free_fwnode(_T))
+
struct irq_domain_chip_generic_info;
/**
@@ -333,36 +313,19 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info);
struct irq_domain *devm_irq_domain_instantiate(struct device *dev,
const struct irq_domain_info *info);
-struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
- unsigned int size,
+struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, unsigned int size,
unsigned int first_irq,
- const struct irq_domain_ops *ops,
- void *host_data);
-struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
- unsigned int size,
- unsigned int first_irq,
- irq_hw_number_t first_hwirq,
- const struct irq_domain_ops *ops,
- void *host_data);
-struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
- unsigned int size,
- unsigned int first_irq,
- irq_hw_number_t first_hwirq,
- const struct irq_domain_ops *ops,
- void *host_data);
+ const struct irq_domain_ops *ops, void *host_data);
+struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, unsigned int size,
+ unsigned int first_irq, irq_hw_number_t first_hwirq,
+ const struct irq_domain_ops *ops, void *host_data);
struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token);
void irq_set_default_domain(struct irq_domain *domain);
struct irq_domain *irq_get_default_domain(void);
-int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
- irq_hw_number_t hwirq, int node,
+int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node,
const struct irq_affinity_desc *affinity);
-static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
-{
- return node ? &node->fwnode : NULL;
-}
-
extern const struct fwnode_operations irqchip_fwnode_ops;
static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode)
@@ -370,12 +333,10 @@ static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode)
return fwnode && fwnode->ops == &irqchip_fwnode_ops;
}
-void irq_domain_update_bus_token(struct irq_domain *domain,
- enum irq_domain_bus_token bus_token);
+void irq_domain_update_bus_token(struct irq_domain *domain, enum irq_domain_bus_token bus_token);
-static inline
-struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
- enum irq_domain_bus_token bus_token)
+static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
+ enum irq_domain_bus_token bus_token)
{
struct irq_fwspec fwspec = {
.fwnode = fwnode,
@@ -387,7 +348,7 @@ struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
static inline struct irq_domain *irq_find_matching_host(struct device_node *node,
enum irq_domain_bus_token bus_token)
{
- return irq_find_matching_fwnode(of_node_to_fwnode(node), bus_token);
+ return irq_find_matching_fwnode(of_fwnode_handle(node), bus_token);
}
static inline struct irq_domain *irq_find_host(struct device_node *node)
@@ -401,128 +362,92 @@ static inline struct irq_domain *irq_find_host(struct device_node *node)
return d;
}
-static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
- unsigned int size,
- unsigned int first_irq,
- const struct irq_domain_ops *ops,
- void *host_data)
-{
- return irq_domain_create_simple(of_node_to_fwnode(of_node), size, first_irq, ops, host_data);
-}
-
-/**
- * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain.
- * @of_node: pointer to interrupt controller's device tree node.
- * @size: Number of interrupts in the domain.
- * @ops: map/unmap domain callbacks
- * @host_data: Controller private data pointer
- */
-static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
- unsigned int size,
- const struct irq_domain_ops *ops,
- void *host_data)
-{
- struct irq_domain_info info = {
- .fwnode = of_node_to_fwnode(of_node),
- .size = size,
- .hwirq_max = size,
- .ops = ops,
- .host_data = host_data,
- };
- struct irq_domain *d;
-
- d = irq_domain_instantiate(&info);
- return IS_ERR(d) ? NULL : d;
-}
-
#ifdef CONFIG_IRQ_DOMAIN_NOMAP
-static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
- unsigned int max_irq,
- const struct irq_domain_ops *ops,
- void *host_data)
+static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *fwnode,
+ unsigned int max_irq,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
- struct irq_domain_info info = {
- .fwnode = of_node_to_fwnode(of_node),
+ const struct irq_domain_info info = {
+ .fwnode = fwnode,
.hwirq_max = max_irq,
.direct_max = max_irq,
.ops = ops,
.host_data = host_data,
};
- struct irq_domain *d;
+ struct irq_domain *d = irq_domain_instantiate(&info);
- d = irq_domain_instantiate(&info);
return IS_ERR(d) ? NULL : d;
}
unsigned int irq_create_direct_mapping(struct irq_domain *domain);
#endif
-static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
- const struct irq_domain_ops *ops,
- void *host_data)
-{
- struct irq_domain_info info = {
- .fwnode = of_node_to_fwnode(of_node),
- .hwirq_max = ~0U,
- .ops = ops,
- .host_data = host_data,
- };
- struct irq_domain *d;
-
- d = irq_domain_instantiate(&info);
- return IS_ERR(d) ? NULL : d;
-}
-
+/**
+ * irq_domain_create_linear - Allocate and register a linear revmap irq_domain.
+ * @fwnode: pointer to interrupt controller's FW node.
+ * @size: Number of interrupts in the domain.
+ * @ops: map/unmap domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * Returns: Newly created irq_domain
+ */
static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode,
- unsigned int size,
- const struct irq_domain_ops *ops,
- void *host_data)
+ unsigned int size,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
- struct irq_domain_info info = {
+ const struct irq_domain_info info = {
.fwnode = fwnode,
.size = size,
.hwirq_max = size,
.ops = ops,
.host_data = host_data,
};
- struct irq_domain *d;
+ struct irq_domain *d = irq_domain_instantiate(&info);
- d = irq_domain_instantiate(&info);
return IS_ERR(d) ? NULL : d;
}
static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode,
- const struct irq_domain_ops *ops,
- void *host_data)
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
- struct irq_domain_info info = {
+ const struct irq_domain_info info = {
.fwnode = fwnode,
.hwirq_max = ~0,
.ops = ops,
.host_data = host_data,
};
- struct irq_domain *d;
+ struct irq_domain *d = irq_domain_instantiate(&info);
- d = irq_domain_instantiate(&info);
return IS_ERR(d) ? NULL : d;
}
void irq_domain_remove(struct irq_domain *domain);
-int irq_domain_associate(struct irq_domain *domain, unsigned int irq,
- irq_hw_number_t hwirq);
-void irq_domain_associate_many(struct irq_domain *domain,
- unsigned int irq_base,
+int irq_domain_associate(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq);
+void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
irq_hw_number_t hwirq_base, int count);
-unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
- irq_hw_number_t hwirq,
+unsigned int irq_create_mapping_affinity(struct irq_domain *domain, irq_hw_number_t hwirq,
const struct irq_affinity_desc *affinity);
unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec);
void irq_dispose_mapping(unsigned int virq);
-static inline unsigned int irq_create_mapping(struct irq_domain *domain,
- irq_hw_number_t hwirq)
+/**
+ * irq_create_mapping - Map a hardware interrupt into linux irq space
+ * @domain: domain owning this hardware interrupt or NULL for default domain
+ * @hwirq: hardware irq number in that domain space
+ *
+ * Only one mapping per hardware interrupt is permitted.
+ *
+ * If the sense/trigger is to be specified, set_irq_type() should be called
+ * on the number returned from that call.
+ *
+ * Returns: Linux irq number or 0 on error
+ */
+static inline unsigned int irq_create_mapping(struct irq_domain *domain, irq_hw_number_t hwirq)
{
return irq_create_mapping_affinity(domain, hwirq, NULL);
}
@@ -531,6 +456,13 @@ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq,
unsigned int *irq);
+/**
+ * irq_resolve_mapping - Find a linux irq from a hw irq number.
+ * @domain: domain owning this hardware interrupt
+ * @hwirq: hardware irq number in that domain space
+ *
+ * Returns: Interrupt descriptor
+ */
static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq)
{
@@ -539,8 +471,10 @@ static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain,
/**
* irq_find_mapping() - Find a linux irq from a hw irq number.
- * @domain: domain owning this hardware interrupt
- * @hwirq: hardware irq number in that domain space
+ * @domain: domain owning this hardware interrupt
+ * @hwirq: hardware irq number in that domain space
+ *
+ * Returns: Linux irq number or 0 if not found
*/
static inline unsigned int irq_find_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq)
@@ -553,107 +487,115 @@ static inline unsigned int irq_find_mapping(struct irq_domain *domain,
return 0;
}
-static inline unsigned int irq_linear_revmap(struct irq_domain *domain,
- irq_hw_number_t hwirq)
-{
- return irq_find_mapping(domain, hwirq);
-}
-
extern const struct irq_domain_ops irq_domain_simple_ops;
/* stock xlate functions */
int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr,
- const u32 *intspec, unsigned int intsize,
- irq_hw_number_t *out_hwirq, unsigned int *out_type);
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_type);
int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr,
- const u32 *intspec, unsigned int intsize,
- irq_hw_number_t *out_hwirq, unsigned int *out_type);
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_type);
int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr,
- const u32 *intspec, unsigned int intsize,
- irq_hw_number_t *out_hwirq, unsigned int *out_type);
-
-int irq_domain_translate_twocell(struct irq_domain *d,
- struct irq_fwspec *fwspec,
- unsigned long *out_hwirq,
- unsigned int *out_type);
-
-int irq_domain_translate_onecell(struct irq_domain *d,
- struct irq_fwspec *fwspec,
- unsigned long *out_hwirq,
- unsigned int *out_type);
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_type);
+int irq_domain_xlate_twothreecell(struct irq_domain *d, struct device_node *ctrlr,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_type);
+
+int irq_domain_translate_onecell(struct irq_domain *d, struct irq_fwspec *fwspec,
+ unsigned long *out_hwirq, unsigned int *out_type);
+int irq_domain_translate_twocell(struct irq_domain *d, struct irq_fwspec *fwspec,
+ unsigned long *out_hwirq, unsigned int *out_type);
+int irq_domain_translate_twothreecell(struct irq_domain *d, struct irq_fwspec *fwspec,
+ unsigned long *out_hwirq, unsigned int *out_type);
/* IPI functions */
int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest);
int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest);
/* V2 interfaces to support hierarchy IRQ domains. */
-struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
- unsigned int virq);
-void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq,
- const struct irq_chip *chip,
- void *chip_data, irq_flow_handler_t handler,
+struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq);
+void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq,
+ const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler,
void *handler_data, const char *handler_name);
void irq_domain_reset_irq_data(struct irq_data *irq_data);
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
-struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
- unsigned int flags,
- unsigned int size,
- struct fwnode_handle *fwnode,
- const struct irq_domain_ops *ops,
- void *host_data);
-
-static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent,
- unsigned int flags,
- unsigned int size,
- struct device_node *node,
- const struct irq_domain_ops *ops,
- void *host_data)
-{
- return irq_domain_create_hierarchy(parent, flags, size,
- of_node_to_fwnode(node),
- ops, host_data);
-}
-
-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
- unsigned int nr_irqs, int node, void *arg,
- bool realloc,
+/**
+ * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy
+ * @parent: Parent irq domain to associate with the new domain
+ * @flags: Irq domain flags associated to the domain
+ * @size: Size of the domain. See below
+ * @fwnode: Optional fwnode of the interrupt controller
+ * @ops: Pointer to the interrupt domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * If @size is 0 a tree domain is created, otherwise a linear domain.
+ *
+ * If successful the parent is associated to the new domain and the
+ * domain flags are set.
+ *
+ * Returns: A pointer to IRQ domain, or %NULL on failure.
+ */
+static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
+ unsigned int flags, unsigned int size,
+ struct fwnode_handle *fwnode,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ const struct irq_domain_info info = {
+ .fwnode = fwnode,
+ .size = size,
+ .hwirq_max = size ? : ~0U,
+ .ops = ops,
+ .host_data = host_data,
+ .domain_flags = flags,
+ .parent = parent,
+ };
+ struct irq_domain *d = irq_domain_instantiate(&info);
+
+ return IS_ERR(d) ? NULL : d;
+}
+
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs,
+ int node, void *arg, bool realloc,
const struct irq_affinity_desc *affinity);
void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
int irq_domain_activate_irq(struct irq_data *irq_data, bool early);
void irq_domain_deactivate_irq(struct irq_data *irq_data);
-static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
- unsigned int nr_irqs, int node, void *arg)
+/**
+ * irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain: domain to allocate from
+ * @nr_irqs: number of IRQs to allocate
+ * @node: NUMA node id for memory allocation
+ * @arg: domain specific argument
+ *
+ * See __irq_domain_alloc_irqs()' documentation.
+ */
+static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs,
+ int node, void *arg)
{
- return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false,
- NULL);
+ return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, NULL);
}
-int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
- unsigned int virq,
- irq_hw_number_t hwirq,
- const struct irq_chip *chip,
+int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
+ irq_hw_number_t hwirq, const struct irq_chip *chip,
void *chip_data);
-void irq_domain_free_irqs_common(struct irq_domain *domain,
- unsigned int virq,
+void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs);
-void irq_domain_free_irqs_top(struct irq_domain *domain,
- unsigned int virq, unsigned int nr_irqs);
+void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs);
int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg);
int irq_domain_pop_irq(struct irq_domain *domain, int virq);
-int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
- unsigned int irq_base,
+int irq_domain_alloc_irqs_parent(struct irq_domain *domain, unsigned int irq_base,
unsigned int nr_irqs, void *arg);
-void irq_domain_free_irqs_parent(struct irq_domain *domain,
- unsigned int irq_base,
+void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_base,
unsigned int nr_irqs);
-int irq_domain_disconnect_hierarchy(struct irq_domain *domain,
- unsigned int virq);
+int irq_domain_disconnect_hierarchy(struct irq_domain *domain, unsigned int virq);
static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
{
@@ -662,8 +604,7 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
static inline bool irq_domain_is_ipi(struct irq_domain *domain)
{
- return domain->flags &
- (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE);
+ return domain->flags & (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE);
}
static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain)
@@ -691,15 +632,18 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
return domain->flags & IRQ_DOMAIN_FLAG_MSI_DEVICE;
}
+static inline bool irq_domain_is_msi_immutable(struct irq_domain *domain)
+{
+ return domain->flags & IRQ_DOMAIN_FLAG_MSI_IMMUTABLE;
+}
#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */
-static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
- unsigned int nr_irqs, int node, void *arg)
+static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs,
+ int node, void *arg)
{
return -1;
}
-static inline void irq_domain_free_irqs(unsigned int virq,
- unsigned int nr_irqs) { }
+static inline void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) { }
static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
{
@@ -739,8 +683,7 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
#ifdef CONFIG_GENERIC_MSI_IRQ
-int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
- unsigned int type);
+int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, unsigned int type);
void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq);
#else
static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
@@ -755,10 +698,50 @@ static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsig
}
#endif
+/* Deprecated functions. Will be removed in the merge window */
+static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
+{
+ return node ? &node->fwnode : NULL;
+}
+
+static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain_info info = {
+ .fwnode = of_fwnode_handle(of_node),
+ .hwirq_max = ~0U,
+ .ops = ops,
+ .host_data = host_data,
+ };
+ struct irq_domain *d;
+
+ d = irq_domain_instantiate(&info);
+ return IS_ERR(d) ? NULL : d;
+}
+
+static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
+ unsigned int size,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain_info info = {
+ .fwnode = of_fwnode_handle(of_node),
+ .size = size,
+ .hwirq_max = size,
+ .ops = ops,
+ .host_data = host_data,
+ };
+ struct irq_domain *d;
+
+ d = irq_domain_instantiate(&info);
+ return IS_ERR(d) ? NULL : d;
+}
+
#else /* CONFIG_IRQ_DOMAIN */
static inline void irq_dispose_mapping(unsigned int virq) { }
-static inline struct irq_domain *irq_find_matching_fwnode(
- struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token)
+static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
+ enum irq_domain_bus_token bus_token)
{
return NULL;
}
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 0ea8c9887429..91b20788273d 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -59,7 +59,7 @@
/* LATCH is used in the interval timer and ftape setup. */
#define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */
-extern int register_refined_jiffies(long clock_tick_rate);
+extern void register_refined_jiffies(long clock_tick_rate);
/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */
#define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ)
diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
index 013794fb5da0..065c185f2763 100644
--- a/include/linux/livepatch_sched.h
+++ b/include/linux/livepatch_sched.h
@@ -3,27 +3,23 @@
#define _LINUX_LIVEPATCH_SCHED_H_
#include <linux/jump_label.h>
-#include <linux/static_call_types.h>
+#include <linux/sched.h>
#ifdef CONFIG_LIVEPATCH
void __klp_sched_try_switch(void);
-#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-
DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
-static __always_inline void klp_sched_try_switch(void)
+static __always_inline void klp_sched_try_switch(struct task_struct *curr)
{
- if (static_branch_unlikely(&klp_sched_try_switch_key))
+ if (static_branch_unlikely(&klp_sched_try_switch_key) &&
+ READ_ONCE(curr->__state) & TASK_FREEZABLE)
__klp_sched_try_switch();
}
-#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
#else /* !CONFIG_LIVEPATCH */
-static inline void klp_sched_try_switch(void) {}
-static inline void __klp_sched_try_switch(void) {}
+static inline void klp_sched_try_switch(struct task_struct *curr) {}
#endif /* CONFIG_LIVEPATCH */
#endif /* _LINUX_LIVEPATCH_SCHED_H_ */
diff --git a/include/linux/mfd/max77759.h b/include/linux/mfd/max77759.h
new file mode 100644
index 000000000000..c6face34e385
--- /dev/null
+++ b/include/linux/mfd/max77759.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2020 Google Inc.
+ * Copyright 2025 Linaro Ltd.
+ *
+ * Maxim MAX77759 core driver
+ */
+
+#ifndef __LINUX_MFD_MAX77759_H
+#define __LINUX_MFD_MAX77759_H
+
+#include <linux/completion.h>
+#include <linux/mutex.h>
+#include <linux/regmap.h>
+
+#define MAX77759_PMIC_REG_PMIC_ID 0x00
+#define MAX77759_PMIC_REG_PMIC_REVISION 0x01
+#define MAX77759_PMIC_REG_OTP_REVISION 0x02
+#define MAX77759_PMIC_REG_INTSRC 0x22
+#define MAX77759_PMIC_REG_INTSRCMASK 0x23
+#define MAX77759_PMIC_REG_INTSRC_MAXQ BIT(3)
+#define MAX77759_PMIC_REG_INTSRC_TOPSYS BIT(1)
+#define MAX77759_PMIC_REG_INTSRC_CHGR BIT(0)
+#define MAX77759_PMIC_REG_TOPSYS_INT 0x24
+#define MAX77759_PMIC_REG_TOPSYS_INT_MASK 0x26
+#define MAX77759_PMIC_REG_TOPSYS_INT_TSHDN BIT(6)
+#define MAX77759_PMIC_REG_TOPSYS_INT_SYSOVLO BIT(5)
+#define MAX77759_PMIC_REG_TOPSYS_INT_SYSUVLO BIT(4)
+#define MAX77759_PMIC_REG_TOPSYS_INT_FSHIP BIT(0)
+#define MAX77759_PMIC_REG_I2C_CNFG 0x40
+#define MAX77759_PMIC_REG_SWRESET 0x50
+#define MAX77759_PMIC_REG_CONTROL_FG 0x51
+
+#define MAX77759_MAXQ_REG_UIC_INT1 0x64
+#define MAX77759_MAXQ_REG_UIC_INT1_APCMDRESI BIT(7)
+#define MAX77759_MAXQ_REG_UIC_INT1_SYSMSGI BIT(6)
+#define MAX77759_MAXQ_REG_UIC_INT1_GPIO6I BIT(1)
+#define MAX77759_MAXQ_REG_UIC_INT1_GPIO5I BIT(0)
+#define MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(offs, en) (((en) & 1) << (offs))
+#define MAX77759_MAXQ_REG_UIC_INT1_GPIOxI_MASK(offs) \
+ MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(offs, ~0)
+#define MAX77759_MAXQ_REG_UIC_INT2 0x65
+#define MAX77759_MAXQ_REG_UIC_INT3 0x66
+#define MAX77759_MAXQ_REG_UIC_INT4 0x67
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS1 0x68
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS2 0x69
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS3 0x6a
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS4 0x6b
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS5 0x6c
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS6 0x6d
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS7 0x6f
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS8 0x6f
+#define MAX77759_MAXQ_REG_UIC_INT1_M 0x70
+#define MAX77759_MAXQ_REG_UIC_INT2_M 0x71
+#define MAX77759_MAXQ_REG_UIC_INT3_M 0x72
+#define MAX77759_MAXQ_REG_UIC_INT4_M 0x73
+#define MAX77759_MAXQ_REG_AP_DATAOUT0 0x81
+#define MAX77759_MAXQ_REG_AP_DATAOUT32 0xa1
+#define MAX77759_MAXQ_REG_AP_DATAIN0 0xb1
+#define MAX77759_MAXQ_REG_UIC_SWRST 0xe0
+
+#define MAX77759_CHGR_REG_CHG_INT 0xb0
+#define MAX77759_CHGR_REG_CHG_INT2 0xb1
+#define MAX77759_CHGR_REG_CHG_INT_MASK 0xb2
+#define MAX77759_CHGR_REG_CHG_INT2_MASK 0xb3
+#define MAX77759_CHGR_REG_CHG_INT_OK 0xb4
+#define MAX77759_CHGR_REG_CHG_DETAILS_00 0xb5
+#define MAX77759_CHGR_REG_CHG_DETAILS_01 0xb6
+#define MAX77759_CHGR_REG_CHG_DETAILS_02 0xb7
+#define MAX77759_CHGR_REG_CHG_DETAILS_03 0xb8
+#define MAX77759_CHGR_REG_CHG_CNFG_00 0xb9
+#define MAX77759_CHGR_REG_CHG_CNFG_01 0xba
+#define MAX77759_CHGR_REG_CHG_CNFG_02 0xbb
+#define MAX77759_CHGR_REG_CHG_CNFG_03 0xbc
+#define MAX77759_CHGR_REG_CHG_CNFG_04 0xbd
+#define MAX77759_CHGR_REG_CHG_CNFG_05 0xbe
+#define MAX77759_CHGR_REG_CHG_CNFG_06 0xbf
+#define MAX77759_CHGR_REG_CHG_CNFG_07 0xc0
+#define MAX77759_CHGR_REG_CHG_CNFG_08 0xc1
+#define MAX77759_CHGR_REG_CHG_CNFG_09 0xc2
+#define MAX77759_CHGR_REG_CHG_CNFG_10 0xc3
+#define MAX77759_CHGR_REG_CHG_CNFG_11 0xc4
+#define MAX77759_CHGR_REG_CHG_CNFG_12 0xc5
+#define MAX77759_CHGR_REG_CHG_CNFG_13 0xc6
+#define MAX77759_CHGR_REG_CHG_CNFG_14 0xc7
+#define MAX77759_CHGR_REG_CHG_CNFG_15 0xc8
+#define MAX77759_CHGR_REG_CHG_CNFG_16 0xc9
+#define MAX77759_CHGR_REG_CHG_CNFG_17 0xca
+#define MAX77759_CHGR_REG_CHG_CNFG_18 0xcb
+#define MAX77759_CHGR_REG_CHG_CNFG_19 0xcc
+
+/* MaxQ opcodes for max77759_maxq_command() */
+#define MAX77759_MAXQ_OPCODE_MAXLENGTH (MAX77759_MAXQ_REG_AP_DATAOUT32 - \
+ MAX77759_MAXQ_REG_AP_DATAOUT0 + \
+ 1)
+
+#define MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_READ 0x21
+#define MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_WRITE 0x22
+#define MAX77759_MAXQ_OPCODE_GPIO_CONTROL_READ 0x23
+#define MAX77759_MAXQ_OPCODE_GPIO_CONTROL_WRITE 0x24
+#define MAX77759_MAXQ_OPCODE_USER_SPACE_READ 0x81
+#define MAX77759_MAXQ_OPCODE_USER_SPACE_WRITE 0x82
+
+/**
+ * struct max77759 - core max77759 internal data structure
+ *
+ * @regmap_top: Regmap for accessing TOP registers
+ * @maxq_lock: Lock for serializing access to MaxQ
+ * @regmap_maxq: Regmap for accessing MaxQ registers
+ * @cmd_done: Used to signal completion of a MaxQ command
+ * @regmap_charger: Regmap for accessing charger registers
+ *
+ * The MAX77759 comprises several sub-blocks, namely TOP, MaxQ, Charger,
+ * Fuel Gauge, and TCPCI.
+ */
+struct max77759 {
+ struct regmap *regmap_top;
+
+ /* This protects MaxQ commands - only one can be active */
+ struct mutex maxq_lock;
+ struct regmap *regmap_maxq;
+ struct completion cmd_done;
+
+ struct regmap *regmap_charger;
+};
+
+/**
+ * struct max77759_maxq_command - structure containing the MaxQ command to
+ * send
+ *
+ * @length: The number of bytes to send.
+ * @cmd: The data to send.
+ */
+struct max77759_maxq_command {
+ u8 length;
+ u8 cmd[] __counted_by(length);
+};
+
+/**
+ * struct max77759_maxq_response - structure containing the MaxQ response
+ *
+ * @length: The number of bytes to receive.
+ * @rsp: The data received. Must have at least @length bytes space.
+ */
+struct max77759_maxq_response {
+ u8 length;
+ u8 rsp[] __counted_by(length);
+};
+
+/**
+ * max77759_maxq_command() - issue a MaxQ command and wait for the response
+ * and associated data
+ *
+ * @max77759: The core max77759 device handle.
+ * @cmd: The command to be sent.
+ * @rsp: Any response data associated with the command will be copied here;
+ * can be %NULL if the command has no response (other than ACK).
+ *
+ * Return: 0 on success, a negative error number otherwise.
+ */
+int max77759_maxq_command(struct max77759 *max77759,
+ const struct max77759_maxq_command *cmd,
+ struct max77759_maxq_response *rsp);
+
+#endif /* __LINUX_MFD_MAX77759_H */
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 591bf5b5e8dc..9af01bdd86d2 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -44,7 +44,6 @@
#define MICREL_PHY_50MHZ_CLK BIT(0)
#define MICREL_PHY_FXEN BIT(1)
#define MICREL_KSZ8_P1_ERRATA BIT(2)
-#define MICREL_NO_EEE BIT(3)
#define MICREL_KSZ9021_EXTREG_CTRL 0xB
#define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bf55206935c4..fdda6b16263b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -385,7 +385,7 @@ extern unsigned int kobjsize(const void *objp);
#endif
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
-# define VM_UFFD_MINOR_BIT 38
+# define VM_UFFD_MINOR_BIT 41
# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */
#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
# define VM_UFFD_MINOR VM_NONE
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 56d07edd01f9..32ba5126e221 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -31,6 +31,7 @@
#define INIT_PASID 0
struct address_space;
+struct futex_private_hash;
struct mem_cgroup;
/*
@@ -1031,7 +1032,11 @@ struct mm_struct {
*/
seqcount_t mm_lock_seq;
#endif
-
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+ struct mutex futex_hash_lock;
+ struct futex_private_hash __rcu *futex_phash;
+ struct futex_private_hash *futex_phash_new;
+#endif
unsigned long hiwater_rss; /* High-watermark of RSS usage */
unsigned long hiwater_vm; /* High-water virtual memory usage */
diff --git a/include/linux/mman.h b/include/linux/mman.h
index bce214fece16..f4c6346a8fcd 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -155,7 +155,9 @@ calc_vm_flag_bits(struct file *file, unsigned long flags)
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
_calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) |
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
_calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) |
+#endif
arch_calc_vm_flag_bits(file, flags);
}
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 4706c6769902..e0eddfd306ef 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -7,6 +7,7 @@
#include <linux/rwsem.h>
#include <linux/tracepoint-defs.h>
#include <linux/types.h>
+#include <linux/cleanup.h>
#define MMAP_LOCK_INITIALIZER(name) \
.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
@@ -211,6 +212,9 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
up_read(&mm->mmap_lock);
}
+DEFINE_GUARD(mmap_read_lock, struct mm_struct *,
+ mmap_read_lock(_T), mmap_read_unlock(_T))
+
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
{
__mmap_lock_trace_released(mm, false);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 526fce581657..ddcdf23d731c 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -329,6 +329,7 @@ struct mmc_card {
#define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */
#define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */
#define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY (1<<17) /* Disable broken SD poweroff notify support */
+#define MMC_QUIRK_NO_UHS_DDR50_TUNING (1<<18) /* Disable DDR50 tuning */
bool written_flag; /* Indicates eMMC has been written since power on */
bool reenable_cmdq; /* Re-enable Command Queue */
diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index 1ed7b0d1e4f9..23ac5696fa38 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -24,7 +24,7 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config);
int mmc_gpio_set_cd_wake(struct mmc_host *host, bool on);
void mmc_gpiod_request_cd_irq(struct mmc_host *host);
-bool mmc_can_gpio_cd(struct mmc_host *host);
-bool mmc_can_gpio_ro(struct mmc_host *host);
+bool mmc_host_can_gpio_cd(struct mmc_host *host);
+bool mmc_host_can_gpio_ro(struct mmc_host *host);
#endif
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6ccec1bf2896..b1c459f7a485 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -148,7 +148,6 @@ enum zone_stat_item {
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
/* Second 128 byte cacheline */
- NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
NR_ZSPAGES, /* allocated in zsmalloc */
#endif
diff --git a/include/linux/module.h b/include/linux/module.h
index b3329110d668..8050f77c3b64 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -586,6 +586,11 @@ struct module {
atomic_t refcnt;
#endif
+#ifdef CONFIG_MITIGATION_ITS
+ int its_num_pages;
+ void **its_page_array;
+#endif
+
#ifdef CONFIG_CONSTRUCTORS
/* Constructor functions. */
ctor_fn_t *ctors;
diff --git a/include/linux/mount.h b/include/linux/mount.h
index dcc17ce8a959..6904ad33ee7a 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -22,48 +22,51 @@ struct fs_context;
struct file;
struct path;
-#define MNT_NOSUID 0x01
-#define MNT_NODEV 0x02
-#define MNT_NOEXEC 0x04
-#define MNT_NOATIME 0x08
-#define MNT_NODIRATIME 0x10
-#define MNT_RELATIME 0x20
-#define MNT_READONLY 0x40 /* does the user want this to be r/o? */
-#define MNT_NOSYMFOLLOW 0x80
-
-#define MNT_SHRINKABLE 0x100
-#define MNT_WRITE_HOLD 0x200
-
-#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */
-#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */
-/*
- * MNT_SHARED_MASK is the set of flags that should be cleared when a
- * mount becomes shared. Currently, this is only the flag that says a
- * mount cannot be bind mounted, since this is how we create a mount
- * that shares events with another mount. If you add a new MNT_*
- * flag, consider how it interacts with shared mounts.
- */
-#define MNT_SHARED_MASK (MNT_UNBINDABLE)
-#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
- | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
- | MNT_READONLY | MNT_NOSYMFOLLOW)
-#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
-
-#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
- MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
-
-#define MNT_INTERNAL 0x4000
-
-#define MNT_LOCK_ATIME 0x040000
-#define MNT_LOCK_NOEXEC 0x080000
-#define MNT_LOCK_NOSUID 0x100000
-#define MNT_LOCK_NODEV 0x200000
-#define MNT_LOCK_READONLY 0x400000
-#define MNT_LOCKED 0x800000
-#define MNT_DOOMED 0x1000000
-#define MNT_SYNC_UMOUNT 0x2000000
-#define MNT_MARKED 0x4000000
-#define MNT_UMOUNT 0x8000000
+enum mount_flags {
+ MNT_NOSUID = 0x01,
+ MNT_NODEV = 0x02,
+ MNT_NOEXEC = 0x04,
+ MNT_NOATIME = 0x08,
+ MNT_NODIRATIME = 0x10,
+ MNT_RELATIME = 0x20,
+ MNT_READONLY = 0x40, /* does the user want this to be r/o? */
+ MNT_NOSYMFOLLOW = 0x80,
+
+ MNT_SHRINKABLE = 0x100,
+ MNT_WRITE_HOLD = 0x200,
+
+ MNT_SHARED = 0x1000, /* if the vfsmount is a shared mount */
+ MNT_UNBINDABLE = 0x2000, /* if the vfsmount is a unbindable mount */
+
+ MNT_INTERNAL = 0x4000,
+
+ MNT_LOCK_ATIME = 0x040000,
+ MNT_LOCK_NOEXEC = 0x080000,
+ MNT_LOCK_NOSUID = 0x100000,
+ MNT_LOCK_NODEV = 0x200000,
+ MNT_LOCK_READONLY = 0x400000,
+ MNT_LOCKED = 0x800000,
+ MNT_DOOMED = 0x1000000,
+ MNT_SYNC_UMOUNT = 0x2000000,
+ MNT_MARKED = 0x4000000,
+ MNT_UMOUNT = 0x8000000,
+
+ /*
+ * MNT_SHARED_MASK is the set of flags that should be cleared when a
+ * mount becomes shared. Currently, this is only the flag that says a
+ * mount cannot be bind mounted, since this is how we create a mount
+ * that shares events with another mount. If you add a new MNT_*
+ * flag, consider how it interacts with shared mounts.
+ */
+ MNT_SHARED_MASK = MNT_UNBINDABLE,
+ MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC
+ | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME
+ | MNT_READONLY | MNT_NOSYMFOLLOW,
+ MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
+
+ MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL |
+ MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED,
+};
struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 58a2401e4b55..0075f6e5c3da 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -262,6 +262,11 @@ struct mr_table {
int mroute_reg_vif_num;
};
+static inline bool mr_can_free_table(struct net *net)
+{
+ return !check_net(net) || !net_initialized(net);
+}
+
#ifdef CONFIG_IP_MROUTE_COMMON
void vif_device_init(struct vif_device *v,
struct net_device *dev,
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 86e42742fd0f..6863540f4b71 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -229,8 +229,11 @@ struct msi_dev_domain {
int msi_setup_device_data(struct device *dev);
-void msi_lock_descs(struct device *dev);
-void msi_unlock_descs(struct device *dev);
+void __msi_lock_descs(struct device *dev);
+void __msi_unlock_descs(struct device *dev);
+
+DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, __msi_lock_descs(_T->lock),
+ __msi_unlock_descs(_T->lock));
struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
enum msi_desc_filter filter);
@@ -420,6 +423,7 @@ struct msi_domain_info;
* @msi_init: Domain specific init function for MSI interrupts
* @msi_free: Domain specific function to free a MSI interrupts
* @msi_prepare: Prepare the allocation of the interrupts in the domain
+ * @msi_teardown: Reverse the effects of @msi_prepare
* @prepare_desc: Optional function to prepare the allocated MSI descriptor
* in the domain
* @set_desc: Set the msi descriptor for an interrupt
@@ -435,8 +439,9 @@ struct msi_domain_info;
* @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying
* irqdomain.
*
- * @msi_check, @msi_prepare, @prepare_desc and @set_desc are callbacks used by the
- * msi_domain_alloc/free_irqs*() variants.
+ * @msi_check, @msi_prepare, @msi_teardown, @prepare_desc and
+ * @set_desc are callbacks used by the msi_domain_alloc/free_irqs*()
+ * variants.
*
* @domain_alloc_irqs, @domain_free_irqs can be used to override the
* default allocation/free functions (__msi_domain_alloc/free_irqs). This
@@ -458,6 +463,8 @@ struct msi_domain_ops {
int (*msi_prepare)(struct irq_domain *domain,
struct device *dev, int nvec,
msi_alloc_info_t *arg);
+ void (*msi_teardown)(struct irq_domain *domain,
+ msi_alloc_info_t *arg);
void (*prepare_desc)(struct irq_domain *domain, msi_alloc_info_t *arg,
struct msi_desc *desc);
void (*set_desc)(msi_alloc_info_t *arg,
@@ -486,6 +493,7 @@ struct msi_domain_ops {
* @handler: Optional: associated interrupt flow handler
* @handler_data: Optional: associated interrupt flow handler data
* @handler_name: Optional: associated interrupt flow handler name
+ * @alloc_data: Optional: associated interrupt allocation data
* @data: Optional: domain specific data
*/
struct msi_domain_info {
@@ -498,6 +506,7 @@ struct msi_domain_info {
irq_flow_handler_t handler;
void *handler_data;
const char *handler_name;
+ msi_alloc_info_t *alloc_data;
void *data;
};
@@ -507,12 +516,14 @@ struct msi_domain_info {
* @chip: Interrupt chip for this domain
* @ops: MSI domain ops
* @info: MSI domain info data
+ * @alloc_info: MSI domain allocation data (architecture specific)
*/
struct msi_domain_template {
char name[48];
struct irq_chip chip;
struct msi_domain_ops ops;
struct msi_domain_info info;
+ msi_alloc_info_t alloc_info;
};
/*
@@ -625,6 +636,10 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
struct msi_domain_info *info,
struct irq_domain *parent);
+struct irq_domain_info;
+struct irq_domain *msi_create_parent_irq_domain(struct irq_domain_info *info,
+ const struct msi_parent_ops *msi_parent_ops);
+
bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
const struct msi_domain_template *template,
unsigned int hwsize, void *domain_data,
diff --git a/include/linux/namei.h b/include/linux/namei.h
index bbaf55fb3101..5d085428e471 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -70,17 +70,16 @@ int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *,
unsigned int, struct path *);
-extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
-extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
-extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
-extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int);
-struct dentry *lookup_one(struct mnt_idmap *, const char *, struct dentry *, int);
+extern struct dentry *try_lookup_noperm(struct qstr *, struct dentry *);
+extern struct dentry *lookup_noperm(struct qstr *, struct dentry *);
+extern struct dentry *lookup_noperm_unlocked(struct qstr *, struct dentry *);
+extern struct dentry *lookup_noperm_positive_unlocked(struct qstr *, struct dentry *);
+struct dentry *lookup_one(struct mnt_idmap *, struct qstr *, struct dentry *);
struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
- const char *name, struct dentry *base,
- int len);
+ struct qstr *name, struct dentry *base);
struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
- const char *name,
- struct dentry *base, int len);
+ struct qstr *name,
+ struct dentry *base);
extern int follow_down_one(struct path *);
extern int follow_down(struct path *path, unsigned int flags);
diff --git a/include/linux/net.h b/include/linux/net.h
index 0ff950eecc6b..f60fff91e1df 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -70,6 +70,7 @@ enum sock_type {
SOCK_DCCP = 6,
SOCK_PACKET = 10,
};
+#endif /* ARCH_HAS_SOCKET_TYPES */
#define SOCK_MAX (SOCK_PACKET + 1)
/* Mask which covers at least up to SOCK_MASK-1. The
@@ -81,8 +82,7 @@ enum sock_type {
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
#endif
-
-#endif /* ARCH_HAS_SOCKET_TYPES */
+#define SOCK_COREDUMP O_NOCTTY
/**
* enum sock_shutdown_cmd - Shutdown types
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 71319637a84e..ee03f3cef30c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -213,6 +213,15 @@ struct nfs_server {
char *fscache_uniq; /* Uniquifier (or NULL) */
#endif
+ /* The following #defines numerically match the NFSv4 equivalents */
+#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1)
+#define NFS_FH_VOLATILE_ANY (0x2)
+#define NFS_FH_VOL_MIGRATION (0x4)
+#define NFS_FH_VOL_RENAME (0x8)
+#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME)
+ u32 fh_expire_type; /* V4 bitmask representing file
+ handle volatility type for
+ this filesystem */
u32 pnfs_blksize; /* layout_blksize attr */
#if IS_ENABLED(CONFIG_NFS_V4)
u32 attr_bitmask[3];/* V4 bitmask representing the set
@@ -236,9 +245,6 @@ struct nfs_server {
u32 acl_bitmask; /* V4 bitmask representing the ACEs
that are supported on this
filesystem */
- u32 fh_expire_type; /* V4 bitmask representing file
- handle volatility type for
- this filesystem */
struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */
struct rpc_wait_queue roc_rpcwaitq;
void *pnfs_ld_data; /* per mount point data */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2479ed10f53e..51308f65b72f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -303,6 +303,7 @@ enum nvme_ctrl_attr {
NVME_CTRL_ATTR_TBKAS = (1 << 6),
NVME_CTRL_ATTR_ELBAS = (1 << 15),
NVME_CTRL_ATTR_RHII = (1 << 18),
+ NVME_CTRL_ATTR_FDPS = (1 << 19),
};
struct nvme_id_ctrl {
@@ -689,6 +690,44 @@ struct nvme_rotational_media_log {
__u8 rsvd24[488];
};
+struct nvme_fdp_config {
+ __u8 flags;
+#define FDPCFG_FDPE (1U << 0)
+ __u8 fdpcidx;
+ __le16 reserved;
+};
+
+struct nvme_fdp_ruh_desc {
+ __u8 ruht;
+ __u8 reserved[3];
+};
+
+struct nvme_fdp_config_desc {
+ __le16 dsze;
+ __u8 fdpa;
+ __u8 vss;
+ __le32 nrg;
+ __le16 nruh;
+ __le16 maxpids;
+ __le32 nns;
+ __le64 runs;
+ __le32 erutl;
+ __u8 rsvd28[36];
+ struct nvme_fdp_ruh_desc ruhs[];
+};
+
+struct nvme_fdp_config_log {
+ __le16 numfdpc;
+ __u8 ver;
+ __u8 rsvd3;
+ __le32 sze;
+ __u8 rsvd8[8];
+ /*
+ * This is followed by variable number of nvme_fdp_config_desc
+ * structures, but sparse doesn't like nested variable sized arrays.
+ */
+};
+
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
@@ -915,6 +954,7 @@ enum nvme_opcode {
nvme_cmd_resv_register = 0x0d,
nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11,
+ nvme_cmd_io_mgmt_recv = 0x12,
nvme_cmd_resv_release = 0x15,
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
@@ -936,6 +976,7 @@ enum nvme_opcode {
nvme_opcode_name(nvme_cmd_resv_register), \
nvme_opcode_name(nvme_cmd_resv_report), \
nvme_opcode_name(nvme_cmd_resv_acquire), \
+ nvme_opcode_name(nvme_cmd_io_mgmt_recv), \
nvme_opcode_name(nvme_cmd_resv_release), \
nvme_opcode_name(nvme_cmd_zone_mgmt_send), \
nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \
@@ -1087,6 +1128,7 @@ enum {
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
NVME_RW_DTYPE_STREAMS = 1 << 4,
+ NVME_RW_DTYPE_DPLCMT = 2 << 4,
NVME_WZ_DEAC = 1 << 9,
};
@@ -1174,6 +1216,38 @@ struct nvme_zone_mgmt_recv_cmd {
__le32 cdw14[2];
};
+struct nvme_io_mgmt_recv_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __le64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 mo;
+ __u8 rsvd11;
+ __u16 mos;
+ __le32 numd;
+ __le32 cdw12[4];
+};
+
+enum {
+ NVME_IO_MGMT_RECV_MO_RUHS = 1,
+};
+
+struct nvme_fdp_ruh_status_desc {
+ __le16 pid;
+ __le16 ruhid;
+ __le32 earutr;
+ __le64 ruamw;
+ __u8 reserved[16];
+};
+
+struct nvme_fdp_ruh_status {
+ __u8 rsvd0[14];
+ __le16 nruhsd;
+ struct nvme_fdp_ruh_status_desc ruhsd[];
+};
+
enum {
NVME_ZRA_ZONE_REPORT = 0,
NVME_ZRASF_ZONE_REPORT_ALL = 0,
@@ -1309,6 +1383,7 @@ enum {
NVME_FEAT_PLM_WINDOW = 0x14,
NVME_FEAT_HOST_BEHAVIOR = 0x16,
NVME_FEAT_SANITIZE = 0x17,
+ NVME_FEAT_FDP = 0x1d,
NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81,
NVME_FEAT_RESV_MASK = 0x82,
@@ -1329,6 +1404,7 @@ enum {
NVME_LOG_ANA = 0x0c,
NVME_LOG_FEATURES = 0x12,
NVME_LOG_RMI = 0x16,
+ NVME_LOG_FDP_CONFIGS = 0x20,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
@@ -1923,6 +1999,7 @@ struct nvme_command {
struct nvmf_auth_receive_command auth_receive;
struct nvme_dbbuf dbbuf;
struct nvme_directive_cmd directive;
+ struct nvme_io_mgmt_recv_cmd imr;
};
};
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e6a21b62dcce..3b814ce08331 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -615,6 +615,13 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE)
PAGEFLAG_FALSE(HighMem, highmem)
#endif
+/* Does kmap_local_folio() only allow access to one page of the folio? */
+#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP
+#define folio_test_partial_kmap(f) true
+#else
+#define folio_test_partial_kmap(f) folio_test_highmem(f)
+#endif
+
#ifdef CONFIG_SWAP
static __always_inline bool folio_test_swapcache(const struct folio *folio)
{
diff --git a/include/linux/panic.h b/include/linux/panic.h
index 2494d51707ef..4adc65766935 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -20,8 +20,6 @@ extern bool panic_triggering_all_cpu_backtrace;
extern int panic_timeout;
extern unsigned long panic_print;
extern int panic_on_oops;
-extern int panic_on_unrecovered_nmi;
-extern int panic_on_io_nmi;
extern int panic_on_warn;
extern unsigned long panic_on_taint;
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index c5e9cac0575e..eeeff2a04529 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -79,4 +79,6 @@ static inline void part_stat_set_all(struct block_device *part, int value)
#define part_stat_local_read_cpu(part, field, cpu) \
local_read(&(part_stat_get_cpu(part, field, cpu)))
+unsigned int bdev_count_inflight(struct block_device *part);
+
#endif /* _LINUX_PART_STAT_H */
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index 2c07aa6b7665..075c20b161d9 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -104,4 +104,89 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client)
return pci_p2pmem_find_many(&client, 1);
}
+enum pci_p2pdma_map_type {
+ /*
+ * PCI_P2PDMA_MAP_UNKNOWN: Used internally as an initial state before
+ * the mapping type has been calculated. Exported routines for the API
+ * will never return this value.
+ */
+ PCI_P2PDMA_MAP_UNKNOWN = 0,
+
+ /*
+ * Not a PCI P2PDMA transfer.
+ */
+ PCI_P2PDMA_MAP_NONE,
+
+ /*
+ * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will
+ * traverse the host bridge and the host bridge is not in the
+ * allowlist. DMA Mapping routines should return an error when
+ * this is returned.
+ */
+ PCI_P2PDMA_MAP_NOT_SUPPORTED,
+
+ /*
+ * PCI_P2PDMA_MAP_BUS_ADDR: Indicates that two devices can talk to
+ * each other directly through a PCI switch and the transaction will
+ * not traverse the host bridge. Such a mapping should program
+ * the DMA engine with PCI bus addresses.
+ */
+ PCI_P2PDMA_MAP_BUS_ADDR,
+
+ /*
+ * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk
+ * to each other, but the transaction traverses a host bridge on the
+ * allowlist. In this case, a normal mapping either with CPU physical
+ * addresses (in the case of dma-direct) or IOVA addresses (in the
+ * case of IOMMUs) should be used to program the DMA engine.
+ */
+ PCI_P2PDMA_MAP_THRU_HOST_BRIDGE,
+};
+
+struct pci_p2pdma_map_state {
+ struct dev_pagemap *pgmap;
+ enum pci_p2pdma_map_type map;
+ u64 bus_off;
+};
+
+/* helper for pci_p2pdma_state(), do not use directly */
+void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state,
+ struct device *dev, struct page *page);
+
+/**
+ * pci_p2pdma_state - check the P2P transfer state of a page
+ * @state: P2P state structure
+ * @dev: device to transfer to/from
+ * @page: page to map
+ *
+ * Check if @page is a PCI P2PDMA page, and if yes of what kind. Returns the
+ * map type, and updates @state with all information needed for a P2P transfer.
+ */
+static inline enum pci_p2pdma_map_type
+pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev,
+ struct page *page)
+{
+ if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) {
+ if (state->pgmap != page_pgmap(page))
+ __pci_p2pdma_update_state(state, dev, page);
+ return state->map;
+ }
+ return PCI_P2PDMA_MAP_NONE;
+}
+
+/**
+ * pci_p2pdma_bus_addr_map - Translate a physical address to a bus address
+ * for a PCI_P2PDMA_MAP_BUS_ADDR transfer.
+ * @state: P2P state structure
+ * @paddr: physical address to map
+ *
+ * Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer.
+ */
+static inline dma_addr_t
+pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr)
+{
+ WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR);
+ return paddr + state->bus_off;
+}
+
#endif /* _LINUX_PCI_P2P_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 51e2bd6405cd..b231cbc67a35 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1671,7 +1671,7 @@ void pci_disable_msi(struct pci_dev *dev);
int pci_msix_vec_count(struct pci_dev *dev);
void pci_disable_msix(struct pci_dev *dev);
void pci_restore_msi_state(struct pci_dev *dev);
-int pci_msi_enabled(void);
+bool pci_msi_enabled(void);
int pci_enable_msi(struct pci_dev *dev);
int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
int minvec, int maxvec);
@@ -1704,7 +1704,7 @@ static inline void pci_disable_msi(struct pci_dev *dev) { }
static inline int pci_msix_vec_count(struct pci_dev *dev) { return -ENOSYS; }
static inline void pci_disable_msix(struct pci_dev *dev) { }
static inline void pci_restore_msi_state(struct pci_dev *dev) { }
-static inline int pci_msi_enabled(void) { return 0; }
+static inline bool pci_msi_enabled(void) { return false; }
static inline int pci_enable_msi(struct pci_dev *dev)
{ return -ENOSYS; }
static inline int pci_enable_msix_range(struct pci_dev *dev,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2e28182c3af0..e2d71b6fdd84 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -3049,6 +3049,7 @@
#define PCI_DEVICE_ID_INTEL_HDA_DG1 0x490d
#define PCI_DEVICE_ID_INTEL_HDA_EHL_0 0x4b55
#define PCI_DEVICE_ID_INTEL_HDA_EHL_3 0x4b58
+#define PCI_DEVICE_ID_INTEL_HDA_WCL 0x4d28
#define PCI_DEVICE_ID_INTEL_HDA_JSL_N 0x4dc8
#define PCI_DEVICE_ID_INTEL_HDA_DG2_0 0x4f90
#define PCI_DEVICE_ID_INTEL_HDA_DG2_1 0x4f91
@@ -3070,6 +3071,7 @@
#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5
#define PCI_DEVICE_ID_INTEL_5100_22 0x65f6
#define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff
+#define PCI_DEVICE_ID_INTEL_HDA_FCL 0x67a8
#define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000
#define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010
#define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index af7d75ede619..288f5235649a 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -43,9 +43,10 @@ is_static struct percpu_rw_semaphore name = { \
#define DEFINE_STATIC_PERCPU_RWSEM(name) \
__DEFINE_PERCPU_RWSEM(name, static)
-extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
+extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool, bool);
-static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+static inline void percpu_down_read_internal(struct percpu_rw_semaphore *sem,
+ bool freezable)
{
might_sleep();
@@ -63,7 +64,7 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
if (likely(rcu_sync_is_idle(&sem->rss)))
this_cpu_inc(*sem->read_count);
else
- __percpu_down_read(sem, false); /* Unconditional memory barrier */
+ __percpu_down_read(sem, false, freezable); /* Unconditional memory barrier */
/*
* The preempt_enable() prevents the compiler from
* bleeding the critical section out.
@@ -71,6 +72,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
preempt_enable();
}
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+ percpu_down_read_internal(sem, false);
+}
+
+static inline void percpu_down_read_freezable(struct percpu_rw_semaphore *sem,
+ bool freeze)
+{
+ percpu_down_read_internal(sem, freeze);
+}
+
static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
{
bool ret = true;
@@ -82,7 +94,7 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
if (likely(rcu_sync_is_idle(&sem->rss)))
this_cpu_inc(*sem->read_count);
else
- ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
+ ret = __percpu_down_read(sem, true, false); /* Unconditional memory barrier */
preempt_enable();
/*
* The barrier() from preempt_enable() prevents the compiler from
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 52b5ea663b9f..85bf8dd9f087 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -15,11 +15,7 @@
/* enough to cover all DEFINE_PER_CPUs in modules */
#ifdef CONFIG_MODULES
-#ifdef CONFIG_MEM_ALLOC_PROFILING
-#define PERCPU_MODULE_RESERVE (8 << 13)
-#else
#define PERCPU_MODULE_RESERVE (8 << 10)
-#endif
#else
#define PERCPU_MODULE_RESERVE 0
#endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0069ba6866a4..52dc7cfab0e0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -26,18 +26,9 @@
# include <asm/local64.h>
#endif
-#define PERF_GUEST_ACTIVE 0x01
-#define PERF_GUEST_USER 0x02
-
-struct perf_guest_info_callbacks {
- unsigned int (*state)(void);
- unsigned long (*get_ip)(void);
- unsigned int (*handle_intel_pt_intr)(void);
-};
-
#ifdef CONFIG_HAVE_HW_BREAKPOINT
-#include <linux/rhashtable-types.h>
-#include <asm/hw_breakpoint.h>
+# include <linux/rhashtable-types.h>
+# include <asm/hw_breakpoint.h>
#endif
#include <linux/list.h>
@@ -62,19 +53,20 @@ struct perf_guest_info_callbacks {
#include <linux/security.h>
#include <linux/static_call.h>
#include <linux/lockdep.h>
+
#include <asm/local.h>
struct perf_callchain_entry {
- __u64 nr;
- __u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */
+ u64 nr;
+ u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */
};
struct perf_callchain_entry_ctx {
- struct perf_callchain_entry *entry;
- u32 max_stack;
- u32 nr;
- short contexts;
- bool contexts_maxed;
+ struct perf_callchain_entry *entry;
+ u32 max_stack;
+ u32 nr;
+ short contexts;
+ bool contexts_maxed;
};
typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
@@ -121,8 +113,8 @@ static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
* already stored in age order, the hw_idx should be 0.
*/
struct perf_branch_stack {
- __u64 nr;
- __u64 hw_idx;
+ u64 nr;
+ u64 hw_idx;
struct perf_branch_entry entries[];
};
@@ -132,10 +124,10 @@ struct task_struct;
* extra PMU register associated with an event
*/
struct hw_perf_event_extra {
- u64 config; /* register value */
- unsigned int reg; /* register address or index */
- int alloc; /* extra register already allocated */
- int idx; /* index in shared_regs->regs[] */
+ u64 config; /* register value */
+ unsigned int reg; /* register address or index */
+ int alloc; /* extra register already allocated */
+ int idx; /* index in shared_regs->regs[] */
};
/**
@@ -144,8 +136,8 @@ struct hw_perf_event_extra {
* PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
* usage.
*/
-#define PERF_EVENT_FLAG_ARCH 0x000fffff
-#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000
+#define PERF_EVENT_FLAG_ARCH 0x0fffffff
+#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000
static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
@@ -157,7 +149,9 @@ struct hw_perf_event {
union {
struct { /* hardware */
u64 config;
+ u64 config1;
u64 last_tag;
+ u64 dyn_constraint;
unsigned long config_base;
unsigned long event_base;
int event_base_rdpmc;
@@ -225,9 +219,14 @@ struct hw_perf_event {
/*
* hw_perf_event::state flags; used to track the PERF_EF_* state.
*/
-#define PERF_HES_STOPPED 0x01 /* the counter is stopped */
-#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */
-#define PERF_HES_ARCH 0x04
+
+/* the counter is stopped */
+#define PERF_HES_STOPPED 0x01
+
+/* event->count up-to-date */
+#define PERF_HES_UPTODATE 0x02
+
+#define PERF_HES_ARCH 0x04
int state;
@@ -276,7 +275,7 @@ struct hw_perf_event {
*/
u64 freq_time_stamp;
u64 freq_count_stamp;
-#endif
+#endif /* CONFIG_PERF_EVENTS */
};
struct perf_event;
@@ -285,28 +284,33 @@ struct perf_event_pmu_context;
/*
* Common implementation detail of pmu::{start,commit,cancel}_txn
*/
-#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */
-#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */
+
+/* txn to add/schedule event on PMU */
+#define PERF_PMU_TXN_ADD 0x1
+
+/* txn to read event group from PMU */
+#define PERF_PMU_TXN_READ 0x2
/**
* pmu::capabilities flags
*/
-#define PERF_PMU_CAP_NO_INTERRUPT 0x0001
-#define PERF_PMU_CAP_NO_NMI 0x0002
-#define PERF_PMU_CAP_AUX_NO_SG 0x0004
-#define PERF_PMU_CAP_EXTENDED_REGS 0x0008
-#define PERF_PMU_CAP_EXCLUSIVE 0x0010
-#define PERF_PMU_CAP_ITRACE 0x0020
-#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
-#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
-#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
-#define PERF_PMU_CAP_AUX_PAUSE 0x0200
+#define PERF_PMU_CAP_NO_INTERRUPT 0x0001
+#define PERF_PMU_CAP_NO_NMI 0x0002
+#define PERF_PMU_CAP_AUX_NO_SG 0x0004
+#define PERF_PMU_CAP_EXTENDED_REGS 0x0008
+#define PERF_PMU_CAP_EXCLUSIVE 0x0010
+#define PERF_PMU_CAP_ITRACE 0x0020
+#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
+#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
+#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
+#define PERF_PMU_CAP_AUX_PAUSE 0x0200
+#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400
/**
* pmu::scope
*/
enum perf_pmu_scope {
- PERF_PMU_SCOPE_NONE = 0,
+ PERF_PMU_SCOPE_NONE = 0,
PERF_PMU_SCOPE_CORE,
PERF_PMU_SCOPE_DIE,
PERF_PMU_SCOPE_CLUSTER,
@@ -325,6 +329,9 @@ struct perf_output_handle;
struct pmu {
struct list_head entry;
+ spinlock_t events_lock;
+ struct list_head events;
+
struct module *module;
struct device *dev;
struct device *parent;
@@ -387,11 +394,21 @@ struct pmu {
* Flags for ->add()/->del()/ ->start()/->stop(). There are
* matching hw_perf_event::state flags.
*/
-#define PERF_EF_START 0x01 /* start the counter when adding */
-#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
-#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
-#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */
-#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */
+
+/* start the counter when adding */
+#define PERF_EF_START 0x01
+
+/* reload the counter when starting */
+#define PERF_EF_RELOAD 0x02
+
+/* update the counter when stopping */
+#define PERF_EF_UPDATE 0x04
+
+/* AUX area event, pause tracing */
+#define PERF_EF_PAUSE 0x08
+
+/* AUX area event, resume tracing */
+#define PERF_EF_RESUME 0x10
/*
* Adds/Removes a counter to/from the PMU, can be done inside a
@@ -590,10 +607,10 @@ enum perf_addr_filter_action_t {
* This is a hardware-agnostic filter configuration as specified by the user.
*/
struct perf_addr_filter {
- struct list_head entry;
- struct path path;
- unsigned long offset;
- unsigned long size;
+ struct list_head entry;
+ struct path path;
+ unsigned long offset;
+ unsigned long size;
enum perf_addr_filter_action_t action;
};
@@ -608,23 +625,24 @@ struct perf_addr_filter {
* bundled together; see perf_event_addr_filters().
*/
struct perf_addr_filters_head {
- struct list_head list;
- raw_spinlock_t lock;
- unsigned int nr_file_filters;
+ struct list_head list;
+ raw_spinlock_t lock;
+ unsigned int nr_file_filters;
};
struct perf_addr_filter_range {
- unsigned long start;
- unsigned long size;
+ unsigned long start;
+ unsigned long size;
};
/**
* enum perf_event_state - the states of an event:
*/
enum perf_event_state {
- PERF_EVENT_STATE_DEAD = -4,
- PERF_EVENT_STATE_EXIT = -3,
- PERF_EVENT_STATE_ERROR = -2,
+ PERF_EVENT_STATE_DEAD = -5,
+ PERF_EVENT_STATE_REVOKED = -4, /* pmu gone, must not touch */
+ PERF_EVENT_STATE_EXIT = -3, /* task died, still inherit */
+ PERF_EVENT_STATE_ERROR = -2, /* scheduling error, can enable */
PERF_EVENT_STATE_OFF = -1,
PERF_EVENT_STATE_INACTIVE = 0,
PERF_EVENT_STATE_ACTIVE = 1,
@@ -662,24 +680,24 @@ struct swevent_hlist {
struct rcu_head rcu_head;
};
-#define PERF_ATTACH_CONTEXT 0x0001
-#define PERF_ATTACH_GROUP 0x0002
-#define PERF_ATTACH_TASK 0x0004
-#define PERF_ATTACH_TASK_DATA 0x0008
-#define PERF_ATTACH_GLOBAL_DATA 0x0010
-#define PERF_ATTACH_SCHED_CB 0x0020
-#define PERF_ATTACH_CHILD 0x0040
-#define PERF_ATTACH_EXCLUSIVE 0x0080
-#define PERF_ATTACH_CALLCHAIN 0x0100
-#define PERF_ATTACH_ITRACE 0x0200
+#define PERF_ATTACH_CONTEXT 0x0001
+#define PERF_ATTACH_GROUP 0x0002
+#define PERF_ATTACH_TASK 0x0004
+#define PERF_ATTACH_TASK_DATA 0x0008
+#define PERF_ATTACH_GLOBAL_DATA 0x0010
+#define PERF_ATTACH_SCHED_CB 0x0020
+#define PERF_ATTACH_CHILD 0x0040
+#define PERF_ATTACH_EXCLUSIVE 0x0080
+#define PERF_ATTACH_CALLCHAIN 0x0100
+#define PERF_ATTACH_ITRACE 0x0200
struct bpf_prog;
struct perf_cgroup;
struct perf_buffer;
struct pmu_event_list {
- raw_spinlock_t lock;
- struct list_head list;
+ raw_spinlock_t lock;
+ struct list_head list;
};
/*
@@ -689,12 +707,12 @@ struct pmu_event_list {
* disabled is sufficient since it will hold-off the IPIs.
*/
#ifdef CONFIG_PROVE_LOCKING
-#define lockdep_assert_event_ctx(event) \
+# define lockdep_assert_event_ctx(event) \
WARN_ON_ONCE(__lockdep_enabled && \
(this_cpu_read(hardirqs_enabled) && \
lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
#else
-#define lockdep_assert_event_ctx(event)
+# define lockdep_assert_event_ctx(event)
#endif
#define for_each_sibling_event(sibling, event) \
@@ -852,9 +870,9 @@ struct perf_event {
#ifdef CONFIG_EVENT_TRACING
struct trace_event_call *tp_event;
struct event_filter *filter;
-#ifdef CONFIG_FUNCTION_TRACER
+# ifdef CONFIG_FUNCTION_TRACER
struct ftrace_ops ftrace_ops;
-#endif
+# endif
#endif
#ifdef CONFIG_CGROUP_PERF
@@ -865,6 +883,7 @@ struct perf_event {
void *security;
#endif
struct list_head sb_list;
+ struct list_head pmu_list;
/*
* Certain events gets forwarded to another pmu internally by over-
@@ -872,7 +891,7 @@ struct perf_event {
* of it. event->orig_type contains original 'type' requested by
* user.
*/
- __u32 orig_type;
+ u32 orig_type;
#endif /* CONFIG_PERF_EVENTS */
};
@@ -937,8 +956,8 @@ static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc)
}
struct perf_event_groups {
- struct rb_root tree;
- u64 index;
+ struct rb_root tree;
+ u64 index;
};
@@ -1155,7 +1174,7 @@ extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
extern void perf_event_itrace_started(struct perf_event *event);
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
-extern void perf_pmu_unregister(struct pmu *pmu);
+extern int perf_pmu_unregister(struct pmu *pmu);
extern void __perf_event_task_sched_in(struct task_struct *prev,
struct task_struct *task);
@@ -1181,16 +1200,18 @@ extern void perf_pmu_resched(struct pmu *pmu);
extern int perf_event_refresh(struct perf_event *event, int refresh);
extern void perf_event_update_userpage(struct perf_event *event);
extern int perf_event_release_kernel(struct perf_event *event);
+
extern struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr,
- int cpu,
- struct task_struct *task,
- perf_overflow_handler_t callback,
- void *context);
+ int cpu,
+ struct task_struct *task,
+ perf_overflow_handler_t callback,
+ void *context);
+
extern void perf_pmu_migrate_context(struct pmu *pmu,
- int src_cpu, int dst_cpu);
-int perf_event_read_local(struct perf_event *event, u64 *value,
- u64 *enabled, u64 *running);
+ int src_cpu, int dst_cpu);
+extern int perf_event_read_local(struct perf_event *event, u64 *value,
+ u64 *enabled, u64 *running);
extern u64 perf_event_read_value(struct perf_event *event,
u64 *enabled, u64 *running);
@@ -1407,14 +1428,14 @@ static inline u32 perf_sample_data_size(struct perf_sample_data *data,
*/
static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
{
- br->mispred = 0;
- br->predicted = 0;
- br->in_tx = 0;
- br->abort = 0;
- br->cycles = 0;
- br->type = 0;
- br->spec = PERF_BR_SPEC_NA;
- br->reserved = 0;
+ br->mispred = 0;
+ br->predicted = 0;
+ br->in_tx = 0;
+ br->abort = 0;
+ br->cycles = 0;
+ br->type = 0;
+ br->spec = PERF_BR_SPEC_NA;
+ br->reserved = 0;
}
extern void perf_output_sample(struct perf_output_handle *handle,
@@ -1603,7 +1624,17 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
enum perf_bpf_event_type type,
u16 flags);
+#define PERF_GUEST_ACTIVE 0x01
+#define PERF_GUEST_USER 0x02
+
+struct perf_guest_info_callbacks {
+ unsigned int (*state)(void);
+ unsigned long (*get_ip)(void);
+ unsigned int (*handle_intel_pt_intr)(void);
+};
+
#ifdef CONFIG_GUEST_PERF_EVENTS
+
extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
@@ -1614,21 +1645,27 @@ static inline unsigned int perf_guest_state(void)
{
return static_call(__perf_guest_state)();
}
+
static inline unsigned long perf_guest_get_ip(void)
{
return static_call(__perf_guest_get_ip)();
}
+
static inline unsigned int perf_guest_handle_intel_pt_intr(void)
{
return static_call(__perf_guest_handle_intel_pt_intr)();
}
+
extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
-#else
+
+#else /* !CONFIG_GUEST_PERF_EVENTS: */
+
static inline unsigned int perf_guest_state(void) { return 0; }
static inline unsigned long perf_guest_get_ip(void) { return 0; }
static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
-#endif /* CONFIG_GUEST_PERF_EVENTS */
+
+#endif /* !CONFIG_GUEST_PERF_EVENTS */
extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
@@ -1658,6 +1695,7 @@ static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *
{
if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
struct perf_callchain_entry *entry = ctx->entry;
+
entry->ip[entry->nr++] = ip;
++ctx->contexts;
return 0;
@@ -1671,6 +1709,7 @@ static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64
{
if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
struct perf_callchain_entry *entry = ctx->entry;
+
entry->ip[entry->nr++] = ip;
++ctx->nr;
return 0;
@@ -1697,7 +1736,7 @@ static inline int perf_is_paranoid(void)
return sysctl_perf_event_paranoid > -1;
}
-int perf_allow_kernel(void);
+extern int perf_allow_kernel(void);
static inline int perf_allow_cpu(void)
{
@@ -1760,7 +1799,7 @@ static inline bool needs_branch_stack(struct perf_event *event)
static inline bool has_aux(struct perf_event *event)
{
- return event->pmu->setup_aux;
+ return event->pmu && event->pmu->setup_aux;
}
static inline bool has_aux_action(struct perf_event *event)
@@ -1819,7 +1858,7 @@ extern int perf_output_begin_backward(struct perf_output_handle *handle,
extern void perf_output_end(struct perf_output_handle *handle);
extern unsigned int perf_output_copy(struct perf_output_handle *handle,
- const void *buf, unsigned int len);
+ const void *buf, unsigned int len);
extern unsigned int perf_output_skip(struct perf_output_handle *handle,
unsigned int len);
extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
@@ -1836,7 +1875,9 @@ extern void perf_event_task_tick(void);
extern int perf_event_account_interrupt(struct perf_event *event);
extern int perf_event_period(struct perf_event *event, u64 value);
extern u64 perf_event_pause(struct perf_event *event, bool reset);
+
#else /* !CONFIG_PERF_EVENTS: */
+
static inline void *
perf_aux_output_begin(struct perf_output_handle *handle,
struct perf_event *event) { return NULL; }
@@ -1914,19 +1955,14 @@ static inline void perf_event_disable(struct perf_event *event) { }
static inline int __perf_event_disable(void *info) { return -1; }
static inline void perf_event_task_tick(void) { }
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
-static inline int perf_event_period(struct perf_event *event, u64 value)
-{
- return -EINVAL;
-}
-static inline u64 perf_event_pause(struct perf_event *event, bool reset)
-{
- return 0;
-}
-static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs)
-{
- return 0;
-}
-#endif
+static inline int
+perf_event_period(struct perf_event *event, u64 value) { return -EINVAL; }
+static inline u64
+perf_event_pause(struct perf_event *event, bool reset) { return 0; }
+static inline int
+perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { return 0; }
+
+#endif /* !CONFIG_PERF_EVENTS */
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
extern void perf_restore_debug_store(void);
@@ -1934,31 +1970,31 @@ extern void perf_restore_debug_store(void);
static inline void perf_restore_debug_store(void) { }
#endif
-#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
+#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
struct perf_pmu_events_attr {
- struct device_attribute attr;
- u64 id;
- const char *event_str;
+ struct device_attribute attr;
+ u64 id;
+ const char *event_str;
};
struct perf_pmu_events_ht_attr {
- struct device_attribute attr;
- u64 id;
- const char *event_str_ht;
- const char *event_str_noht;
+ struct device_attribute attr;
+ u64 id;
+ const char *event_str_ht;
+ const char *event_str_noht;
};
struct perf_pmu_events_hybrid_attr {
- struct device_attribute attr;
- u64 id;
- const char *event_str;
- u64 pmu_type;
+ struct device_attribute attr;
+ u64 id;
+ const char *event_str;
+ u64 pmu_type;
};
struct perf_pmu_format_hybrid_attr {
- struct device_attribute attr;
- u64 pmu_type;
+ struct device_attribute attr;
+ u64 pmu_type;
};
ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
@@ -2000,11 +2036,11 @@ static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
/* Performance counter hotplug functions */
#ifdef CONFIG_PERF_EVENTS
-int perf_event_init_cpu(unsigned int cpu);
-int perf_event_exit_cpu(unsigned int cpu);
+extern int perf_event_init_cpu(unsigned int cpu);
+extern int perf_event_exit_cpu(unsigned int cpu);
#else
-#define perf_event_init_cpu NULL
-#define perf_event_exit_cpu NULL
+# define perf_event_init_cpu NULL
+# define perf_event_exit_cpu NULL
#endif
extern void arch_perf_update_userpage(struct perf_event *event,
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index c74077977830..8a7f4f802c57 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -188,6 +188,13 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page)
return tag;
}
+static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
+{
+ if (mem_alloc_profiling_enabled())
+ return __pgalloc_tag_get(page);
+ return NULL;
+}
+
void pgalloc_tag_split(struct folio *folio, int old_order, int new_order);
void pgalloc_tag_swap(struct folio *new, struct folio *old);
@@ -199,6 +206,7 @@ static inline void clear_page_tag_ref(struct page *page) {}
static inline void alloc_tag_sec_init(void) {}
static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {}
static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {}
+static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
#endif /* CONFIG_MEM_ALLOC_PROFILING */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 311ecebd7d56..453ae6d8a68d 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -77,7 +77,7 @@ struct file;
struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
-int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
+int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file);
void do_notify_pidfd(struct task_struct *task);
static inline struct pid *get_pid(struct pid *pid)
diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
index 05e6f8f4a026..77e7db194914 100644
--- a/include/linux/pidfs.h
+++ b/include/linux/pidfs.h
@@ -2,11 +2,19 @@
#ifndef _LINUX_PID_FS_H
#define _LINUX_PID_FS_H
+struct coredump_params;
+
struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
void __init pidfs_init(void);
void pidfs_add_pid(struct pid *pid);
void pidfs_remove_pid(struct pid *pid);
void pidfs_exit(struct task_struct *tsk);
+#ifdef CONFIG_COREDUMP
+void pidfs_coredump(const struct coredump_params *cprm);
+#endif
extern const struct dentry_operations pidfs_dentry_operations;
+int pidfs_register_pid(struct pid *pid);
+void pidfs_get_pid(struct pid *pid);
+void pidfs_put_pid(struct pid *pid);
#endif /* _LINUX_PID_FS_H */
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index d56a78af4af1..0b18160901a2 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -142,6 +142,8 @@ struct genpd_governor_data {
bool max_off_time_changed;
ktime_t next_wakeup;
ktime_t next_hrtimer;
+ ktime_t last_enter;
+ bool reflect_residency;
bool cached_power_down_ok;
bool cached_power_down_state_idx;
};
@@ -153,6 +155,8 @@ struct genpd_power_state {
s64 residency_ns;
u64 usage;
u64 rejected;
+ u64 above;
+ u64 below;
struct fwnode_handle *fwnode;
u64 idle_time;
void *data;
@@ -285,6 +289,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
int pm_genpd_init(struct generic_pm_domain *genpd,
struct dev_power_governor *gov, bool is_off);
int pm_genpd_remove(struct generic_pm_domain *genpd);
+void pm_genpd_inc_rejected(struct generic_pm_domain *genpd,
+ unsigned int state_idx);
struct device *dev_to_genpd_dev(struct device *dev);
int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state);
int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb);
@@ -336,6 +342,10 @@ static inline int pm_genpd_remove(struct generic_pm_domain *genpd)
return -EOPNOTSUPP;
}
+static inline void pm_genpd_inc_rejected(struct generic_pm_domain *genpd,
+ unsigned int state_idx)
+{ }
+
static inline struct device *dev_to_genpd_dev(struct device *dev)
{
return ERR_PTR(-EOPNOTSUPP);
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index c247317aae38..cf477beae4bb 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -11,6 +11,7 @@
#ifndef __LINUX_OPP_H__
#define __LINUX_OPP_H__
+#include <linux/cleanup.h>
#include <linux/energy_model.h>
#include <linux/err.h>
#include <linux/notifier.h>
@@ -100,7 +101,7 @@ struct dev_pm_opp_data {
#if defined(CONFIG_PM_OPP)
struct opp_table *dev_pm_opp_get_opp_table(struct device *dev);
-void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table);
+struct opp_table *dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table);
void dev_pm_opp_put_opp_table(struct opp_table *opp_table);
unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index);
@@ -161,7 +162,7 @@ struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev,
struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev,
unsigned int *bw, int index);
-void dev_pm_opp_get(struct dev_pm_opp *opp);
+struct dev_pm_opp *dev_pm_opp_get(struct dev_pm_opp *opp);
void dev_pm_opp_put(struct dev_pm_opp *opp);
int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp);
@@ -196,6 +197,7 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
void dev_pm_opp_remove_table(struct device *dev);
void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask);
int dev_pm_opp_sync_regulators(struct device *dev);
+
#else
static inline struct opp_table *dev_pm_opp_get_opp_table(struct device *dev)
{
@@ -207,7 +209,10 @@ static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *
return ERR_PTR(-EOPNOTSUPP);
}
-static inline void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table) {}
+static inline struct opp_table *dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table)
+{
+ return opp_table;
+}
static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {}
@@ -345,7 +350,10 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev,
return ERR_PTR(-EOPNOTSUPP);
}
-static inline void dev_pm_opp_get(struct dev_pm_opp *opp) {}
+static inline struct dev_pm_opp *dev_pm_opp_get(struct dev_pm_opp *opp)
+{
+ return opp;
+}
static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {}
@@ -573,6 +581,12 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta
}
#endif
+/* Scope based cleanup macro for OPP reference counting */
+DEFINE_FREE(put_opp, struct dev_pm_opp *, if (!IS_ERR_OR_NULL(_T)) dev_pm_opp_put(_T))
+
+/* Scope based cleanup macro for OPP table reference counting */
+DEFINE_FREE(put_opp_table, struct opp_table *, if (!IS_ERR_OR_NULL(_T)) dev_pm_opp_put_opp_table(_T))
+
/* OPP Configuration helpers */
static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
@@ -704,4 +718,14 @@ static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
return dev_pm_opp_get_freq_indexed(opp, 0);
}
+static inline int dev_pm_opp_set_level(struct device *dev, unsigned int level)
+{
+ struct dev_pm_opp *opp __free(put_opp) = dev_pm_opp_find_level_exact(dev, level);
+
+ if (IS_ERR(opp))
+ return PTR_ERR(opp);
+
+ return dev_pm_opp_set_opp(dev, opp);
+}
+
#endif /* __LINUX_OPP_H__ */
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 7fb5a459847e..756b842dcd30 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -96,7 +96,9 @@ extern void pm_runtime_new_link(struct device *dev);
extern void pm_runtime_drop_link(struct device_link *link);
extern void pm_runtime_release_supplier(struct device_link *link);
+int devm_pm_runtime_set_active_enabled(struct device *dev);
extern int devm_pm_runtime_enable(struct device *dev);
+int devm_pm_runtime_get_noresume(struct device *dev);
/**
* pm_suspend_ignore_children - Set runtime PM behavior regarding children.
@@ -294,7 +296,9 @@ static inline bool pm_runtime_blocked(struct device *dev) { return true; }
static inline void pm_runtime_allow(struct device *dev) {}
static inline void pm_runtime_forbid(struct device *dev) {}
+static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; }
static inline int devm_pm_runtime_enable(struct device *dev) { return 0; }
+static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; }
static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {}
static inline void pm_runtime_get_noresume(struct device *dev) {}
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 888824592953..c4cb854971f5 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -288,6 +288,7 @@ struct power_supply_desc {
struct power_supply_ext {
const char *const name;
u8 charge_behaviours;
+ u32 charge_types;
const enum power_supply_property *properties;
size_t num_properties;
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index f3cad182d4ef..0b3a36bdaa90 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -954,6 +954,7 @@ int sev_do_cmd(int cmd, void *data, int *psp_ret);
void *psp_copy_user_blob(u64 uaddr, u32 len);
void *snp_alloc_firmware_page(gfp_t mask);
void snp_free_firmware_page(void *addr);
+void sev_platform_shutdown(void);
#else /* !CONFIG_CRYPTO_DEV_SP_PSP */
@@ -988,6 +989,8 @@ static inline void *snp_alloc_firmware_page(gfp_t mask)
static inline void snp_free_firmware_page(void *addr) { }
+static inline void sev_platform_shutdown(void) { }
+
#endif /* CONFIG_CRYPTO_DEV_SP_PSP */
#endif /* __PSP_SEV_H__ */
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 9ece4e5d3815..63a17d2b4ec8 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -218,6 +218,8 @@ static inline void pwm_init_state(const struct pwm_device *pwm,
*
* pwm_get_state(pwm, &state);
* duty = pwm_get_relative_duty_cycle(&state, 100);
+ *
+ * Returns: rounded relative duty cycle multiplied by @scale
*/
static inline unsigned int
pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale)
@@ -244,8 +246,8 @@ pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale)
* pwm_set_relative_duty_cycle(&state, 50, 100);
* pwm_apply_might_sleep(pwm, &state);
*
- * This functions returns -EINVAL if @duty_cycle and/or @scale are
- * inconsistent (@scale == 0 or @duty_cycle > @scale).
+ * Returns: 0 on success or ``-EINVAL`` if @duty_cycle and/or @scale are
+ * inconsistent (@scale == 0 or @duty_cycle > @scale)
*/
static inline int
pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle,
@@ -351,7 +353,7 @@ struct pwm_chip {
* pwmchip_supports_waveform() - checks if the given chip supports waveform callbacks
* @chip: The pwm_chip to test
*
- * Returns true iff the pwm chip support the waveform functions like
+ * Returns: true iff the pwm chip support the waveform functions like
* pwm_set_waveform_might_sleep() and pwm_round_waveform_might_sleep()
*/
static inline bool pwmchip_supports_waveform(struct pwm_chip *chip)
@@ -369,7 +371,7 @@ static inline struct device *pwmchip_parent(const struct pwm_chip *chip)
return chip->dev.parent;
}
-static inline void *pwmchip_get_drvdata(struct pwm_chip *chip)
+static inline void *pwmchip_get_drvdata(const struct pwm_chip *chip)
{
return dev_get_drvdata(&chip->dev);
}
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index b17e0cd0a30c..7aaad158ee37 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -22,16 +22,43 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs)
DEFAULT_RATELIMIT_BURST);
}
+static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs)
+{
+ atomic_inc(&rs->missed);
+}
+
+static inline int ratelimit_state_get_miss(struct ratelimit_state *rs)
+{
+ return atomic_read(&rs->missed);
+}
+
+static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs)
+{
+ return atomic_xchg_relaxed(&rs->missed, 0);
+}
+
+static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&rs->lock, flags);
+ rs->interval = interval_init;
+ rs->flags &= ~RATELIMIT_INITIALIZED;
+ atomic_set(&rs->rs_n_left, rs->burst);
+ ratelimit_state_reset_miss(rs);
+ raw_spin_unlock_irqrestore(&rs->lock, flags);
+}
+
static inline void ratelimit_state_exit(struct ratelimit_state *rs)
{
+ int m;
+
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE))
return;
- if (rs->missed) {
- pr_warn("%s: %d output lines suppressed due to ratelimiting\n",
- current->comm, rs->missed);
- rs->missed = 0;
- }
+ m = ratelimit_state_reset_miss(rs);
+ if (m)
+ pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, m);
}
static inline void
diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h
index 765232ce0b5e..b19c4354540a 100644
--- a/include/linux/ratelimit_types.h
+++ b/include/linux/ratelimit_types.h
@@ -11,14 +11,15 @@
/* issue num suppressed message on exit */
#define RATELIMIT_MSG_ON_RELEASE BIT(0)
+#define RATELIMIT_INITIALIZED BIT(1)
struct ratelimit_state {
raw_spinlock_t lock; /* protect the state */
int interval;
int burst;
- int printed;
- int missed;
+ atomic_t rs_n_left;
+ atomic_t missed;
unsigned int flags;
unsigned long begin;
};
diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h
index 6322d8c1c6b4..2fb2af6d9824 100644
--- a/include/linux/rcuref.h
+++ b/include/linux/rcuref.h
@@ -30,7 +30,11 @@ static inline void rcuref_init(rcuref_t *ref, unsigned int cnt)
* rcuref_read - Read the number of held reference counts of a rcuref
* @ref: Pointer to the reference count
*
- * Return: The number of held references (0 ... N)
+ * Return: The number of held references (0 ... N). The value 0 does not
+ * indicate that it is safe to schedule the object, protected by this reference
+ * counter, for deconstruction.
+ * If you want to know if the reference counter has been marked DEAD (as
+ * signaled by rcuref_put()) please use rcuread_is_dead().
*/
static inline unsigned int rcuref_read(rcuref_t *ref)
{
@@ -40,6 +44,22 @@ static inline unsigned int rcuref_read(rcuref_t *ref)
return c >= RCUREF_RELEASED ? 0 : c + 1;
}
+/**
+ * rcuref_is_dead - Check if the rcuref has been already marked dead
+ * @ref: Pointer to the reference count
+ *
+ * Return: True if the object has been marked DEAD. This signals that a previous
+ * invocation of rcuref_put() returned true on this reference counter meaning
+ * the protected object can safely be scheduled for deconstruction.
+ * Otherwise, returns false.
+ */
+static inline bool rcuref_is_dead(rcuref_t *ref)
+{
+ unsigned int c = atomic_read(&ref->refcnt);
+
+ return (c >= RCUREF_RELEASED) && (c < RCUREF_NOREF);
+}
+
extern __must_check bool rcuref_get_slowpath(rcuref_t *ref);
/**
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index d17c5ea3d55d..02b83f5499b8 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1641,6 +1641,8 @@ struct regmap_irq_chip_data;
* @ack_invert: Inverted ack register: cleared bits for ack.
* @clear_ack: Use this to set 1 and 0 or vice-versa to clear interrupts.
* @status_invert: Inverted status register: cleared bits are active interrupts.
+ * @status_is_level: Status register is actuall signal level: Xor status
+ * register with previous value to get active interrupts.
* @wake_invert: Inverted wake register: cleared bits are wake enabled.
* @type_in_mask: Use the mask registers for controlling irq type. Use this if
* the hardware provides separate bits for rising/falling edge
@@ -1704,6 +1706,7 @@ struct regmap_irq_chip {
unsigned int ack_invert:1;
unsigned int clear_ack:1;
unsigned int status_invert:1;
+ unsigned int status_is_level:1;
unsigned int wake_invert:1;
unsigned int type_in_mask:1;
unsigned int clear_on_unmask:1;
diff --git a/include/linux/regulator/max8952.h b/include/linux/regulator/max8952.h
index 8712c091abf0..61dcd8e00a2f 100644
--- a/include/linux/regulator/max8952.h
+++ b/include/linux/regulator/max8952.h
@@ -2,7 +2,7 @@
/*
* max8952.h - Voltage regulation for the Maxim 8952
*
- * Copyright (C) 2010 Samsung Electrnoics
+ * Copyright (C) 2010 Samsung Electronics
* MyungJoo Ham <myungjoo.ham@samsung.com>
*/
diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h
index b427b5873de1..85b4fecc10d8 100644
--- a/include/linux/regulator/pca9450.h
+++ b/include/linux/regulator/pca9450.h
@@ -35,6 +35,8 @@ enum {
PCA9450_DVS_LEVEL_MAX,
};
+#define PCA9450_RESTART_HANDLER_PRIORITY 130
+
#define PCA9450_BUCK1_VOLTAGE_NUM 0x80
#define PCA9450_BUCK2_VOLTAGE_NUM 0x80
#define PCA9450_BUCK3_VOLTAGE_NUM 0x80
@@ -235,4 +237,7 @@ enum {
#define I2C_LT_ON_RUN 0x02
#define I2C_LT_FORCE_ENABLE 0x03
+/* PCA9450_REG_SW_RST command */
+#define SW_RST_COMMAND 0x14
+
#endif /* __LINUX_REG_PCA9450_H__ */
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 880351ca3dfc..9ba771f2ddea 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -8,6 +8,10 @@
#include <linux/pid.h>
#include <linux/resctrl_types.h>
+#ifdef CONFIG_ARCH_HAS_CPU_RESCTRL
+#include <asm/resctrl.h>
+#endif
+
/* CLOSID, RMID value used by the default control group */
#define RESCTRL_RESERVED_CLOSID 0
#define RESCTRL_RESERVED_RMID 0
@@ -44,6 +48,16 @@ int proc_resctrl_show(struct seq_file *m,
for_each_rdt_resource((r)) \
if ((r)->mon_capable)
+enum resctrl_res_level {
+ RDT_RESOURCE_L3,
+ RDT_RESOURCE_L2,
+ RDT_RESOURCE_MBA,
+ RDT_RESOURCE_SMBA,
+
+ /* Must be the last */
+ RDT_NUM_RESOURCES,
+};
+
/**
* enum resctrl_conf_type - The type of configuration.
* @CDP_NONE: No prioritisation, both code and data are controlled or monitored.
@@ -358,7 +372,7 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
u32 resctrl_arch_system_num_rmid_idx(void);
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
-__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
+bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
/**
* resctrl_arch_mon_event_config_write() - Write the config for an event.
@@ -399,6 +413,9 @@ static inline u32 resctrl_get_config_index(u32 closid,
}
}
+bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l);
+int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
+
/*
* Update the ctrl_val and apply this config right now.
* Must be called on one of the domain's CPUs.
@@ -514,7 +531,20 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r);
extern unsigned int resctrl_rmid_realloc_threshold;
extern unsigned int resctrl_rmid_realloc_limit;
-int __init resctrl_init(void);
-void __exit resctrl_exit(void);
-
+int resctrl_init(void);
+void resctrl_exit(void);
+
+#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK
+u64 resctrl_arch_get_prefetch_disable_bits(void);
+int resctrl_arch_pseudo_lock_fn(void *_plr);
+int resctrl_arch_measure_cycles_lat_fn(void *_plr);
+int resctrl_arch_measure_l2_residency(void *_plr);
+int resctrl_arch_measure_l3_residency(void *_plr);
+#else
+static inline u64 resctrl_arch_get_prefetch_disable_bits(void) { return 0; }
+static inline int resctrl_arch_pseudo_lock_fn(void *_plr) { return 0; }
+static inline int resctrl_arch_measure_cycles_lat_fn(void *_plr) { return 0; }
+static inline int resctrl_arch_measure_l2_residency(void *_plr) { return 0; }
+static inline int resctrl_arch_measure_l3_residency(void *_plr) { return 0; }
+#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */
#endif /* _RESCTRL_H */
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index f26450b3326b..a25fb9c4070d 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -7,6 +7,9 @@
#ifndef __LINUX_RESCTRL_TYPES_H
#define __LINUX_RESCTRL_TYPES_H
+#define MAX_MBA_BW 100u
+#define MBM_OVERFLOW_INTERVAL 1000
+
/* Reads to Local DRAM Memory */
#define READS_TO_LOCAL_MEM BIT(0)
@@ -31,16 +34,6 @@
/* Max event bits supported */
#define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
-enum resctrl_res_level {
- RDT_RESOURCE_L3,
- RDT_RESOURCE_L2,
- RDT_RESOURCE_MBA,
- RDT_RESOURCE_SMBA,
-
- /* Must be the last */
- RDT_NUM_RESOURCES,
-};
-
/*
* Event IDs, the values match those used to program IA32_QM_EVTSEL before
* reading IA32_QM_CTR on RDT systems.
@@ -49,6 +42,9 @@ enum resctrl_event_id {
QOS_L3_OCCUP_EVENT_ID = 0x01,
QOS_L3_MBM_TOTAL_EVENT_ID = 0x02,
QOS_L3_MBM_LOCAL_EVENT_ID = 0x03,
+
+ /* Must be the last */
+ QOS_NUM_EVENTS,
};
#endif /* __LINUX_RESCTRL_TYPES_H */
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index 13f17676c5f4..7e50bbc94e47 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -26,7 +26,7 @@ struct restart_block {
unsigned long arch_data;
long (*fn)(struct restart_block *);
union {
- /* For futex_wait and futex_wait_requeue_pi */
+ /* For futex_wait() */
struct {
u32 __user *uaddr;
u32 val;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac1982893..45e5953b8f32 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -44,7 +44,6 @@
#include <linux/seqlock_types.h>
#include <linux/kcsan.h>
#include <linux/rv.h>
-#include <linux/livepatch_sched.h>
#include <linux/uidgid_types.h>
#include <linux/tracepoint-defs.h>
#include <asm/kmap_size.h>
@@ -1646,22 +1645,15 @@ struct task_struct {
struct user_event_mm *user_event_mm;
#endif
- /*
- * New fields for task_struct should be added above here, so that
- * they are included in the randomized portion of task_struct.
- */
- randomized_struct_fields_end
-
/* CPU-specific state of this task: */
struct thread_struct thread;
/*
- * WARNING: on x86, 'thread_struct' contains a variable-sized
- * structure. It *MUST* be at the end of 'task_struct'.
- *
- * Do not put anything below here!
+ * New fields for task_struct should be added above here, so that
+ * they are included in the randomized portion of task_struct.
*/
-};
+ randomized_struct_fields_end
+} __attribute__ ((aligned (64)));
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
@@ -2089,9 +2081,6 @@ extern int __cond_resched(void);
#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-void sched_dynamic_klp_enable(void);
-void sched_dynamic_klp_disable(void);
-
DECLARE_STATIC_CALL(cond_resched, __cond_resched);
static __always_inline int _cond_resched(void)
@@ -2112,7 +2101,6 @@ static __always_inline int _cond_resched(void)
static inline int _cond_resched(void)
{
- klp_sched_try_switch();
return __cond_resched();
}
@@ -2122,7 +2110,6 @@ static inline int _cond_resched(void)
static inline int _cond_resched(void)
{
- klp_sched_try_switch();
return 0;
}
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 7b4301b7235f..198bb5cc1774 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -195,6 +195,8 @@ struct sched_domain_topology_level {
};
extern void __init set_sched_topology(struct sched_domain_topology_level *tl);
+extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio);
+
# define SD_INIT_NAME(type) .name = #type
@@ -223,6 +225,10 @@ static inline bool cpus_share_resources(int this_cpu, int that_cpu)
return true;
}
+static inline void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio)
+{
+}
+
#endif /* !CONFIG_SMP */
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 0b273a7b9f01..5f03a39a26f7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -104,10 +104,11 @@ static inline bool shmem_mapping(struct address_space *mapping)
return false;
}
#endif /* CONFIG_SHMEM */
-extern void shmem_unlock_mapping(struct address_space *mapping);
-extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
+void shmem_unlock_mapping(struct address_space *mapping);
+struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
-extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
+int shmem_writeout(struct folio *folio, struct writeback_control *wbc);
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
int shmem_unuse(unsigned int type);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 493d9de4e472..dc6ebaee3d18 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -365,7 +365,7 @@ struct sdw_intel_res {
* on e.g. which machine driver to select (I2S mode, HDaudio or
* SoundWire).
*/
-int sdw_intel_acpi_scan(acpi_handle *parent_handle,
+int sdw_intel_acpi_scan(acpi_handle parent_handle,
struct sdw_intel_acpi_info *info);
void sdw_intel_process_wakeen_event(struct sdw_intel_ctx *ctx);
diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h
index f950d280461b..9fbef3fd4056 100644
--- a/include/linux/spi/sh_msiof.h
+++ b/include/linux/spi/sh_msiof.h
@@ -2,6 +2,131 @@
#ifndef __SPI_SH_MSIOF_H__
#define __SPI_SH_MSIOF_H__
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+
+#define SITMDR1 0x00 /* Transmit Mode Register 1 */
+#define SITMDR2 0x04 /* Transmit Mode Register 2 */
+#define SITMDR3 0x08 /* Transmit Mode Register 3 */
+#define SIRMDR1 0x10 /* Receive Mode Register 1 */
+#define SIRMDR2 0x14 /* Receive Mode Register 2 */
+#define SIRMDR3 0x18 /* Receive Mode Register 3 */
+#define SITSCR 0x20 /* Transmit Clock Select Register */
+#define SIRSCR 0x22 /* Receive Clock Select Register (SH, A1, APE6) */
+#define SICTR 0x28 /* Control Register */
+#define SIFCTR 0x30 /* FIFO Control Register */
+#define SISTR 0x40 /* Status Register */
+#define SIIER 0x44 /* Interrupt Enable Register */
+#define SITDR1 0x48 /* Transmit Control Data Register 1 (SH, A1) */
+#define SITDR2 0x4c /* Transmit Control Data Register 2 (SH, A1) */
+#define SITFDR 0x50 /* Transmit FIFO Data Register */
+#define SIRDR1 0x58 /* Receive Control Data Register 1 (SH, A1) */
+#define SIRDR2 0x5c /* Receive Control Data Register 2 (SH, A1) */
+#define SIRFDR 0x60 /* Receive FIFO Data Register */
+
+/* SITMDR1 and SIRMDR1 */
+#define SIMDR1_TRMD BIT(31) /* Transfer Mode (1 = Master mode) */
+#define SIMDR1_SYNCMD GENMASK(29, 28) /* SYNC Mode */
+#define SIMDR1_SYNCMD_PULSE 0U /* Frame start sync pulse */
+#define SIMDR1_SYNCMD_SPI 2U /* Level mode/SPI */
+#define SIMDR1_SYNCMD_LR 3U /* L/R mode */
+#define SIMDR1_SYNCAC BIT(25) /* Sync Polarity (1 = Active-low) */
+#define SIMDR1_BITLSB BIT(24) /* MSB/LSB First (1 = LSB first) */
+#define SIMDR1_DTDL GENMASK(22, 20) /* Data Pin Bit Delay for MSIOF_SYNC */
+#define SIMDR1_SYNCDL GENMASK(18, 16) /* Frame Sync Signal Timing Delay */
+#define SIMDR1_FLD GENMASK(3, 2) /* Frame Sync Signal Interval (0-3) */
+#define SIMDR1_XXSTP BIT(0) /* Transmission/Reception Stop on FIFO */
+/* SITMDR1 */
+#define SITMDR1_PCON BIT(30) /* Transfer Signal Connection */
+#define SITMDR1_SYNCCH GENMASK(27, 26) /* Sync Signal Channel Select */
+ /* 0=MSIOF_SYNC, 1=MSIOF_SS1, 2=MSIOF_SS2 */
+
+/* SITMDR2 and SIRMDR2 */
+#define SIMDR2_GRP GENMASK(31, 30) /* Group Count */
+#define SIMDR2_BITLEN1 GENMASK(28, 24) /* Data Size (8-32 bits) */
+#define SIMDR2_WDLEN1 GENMASK(23, 16) /* Word Count (1-64/256 (SH, A1))) */
+#define SIMDR2_GRPMASK GENMASK(3, 0) /* Group Output Mask 1-4 (SH, A1) */
+
+/* SITMDR3 and SIRMDR3 */
+#define SIMDR3_BITLEN2 GENMASK(28, 24) /* Data Size (8-32 bits) */
+#define SIMDR3_WDLEN2 GENMASK(23, 16) /* Word Count (1-64/256 (SH, A1))) */
+
+/* SITSCR and SIRSCR */
+#define SISCR_BRPS GENMASK(12, 8) /* Prescaler Setting (1-32) */
+#define SISCR_BRDV GENMASK(2, 0) /* Baud Rate Generator's Division Ratio */
+
+/* SICTR */
+#define SICTR_TSCKIZ GENMASK(31, 30) /* Transmit Clock I/O Polarity Select */
+#define SICTR_TSCKIZ_SCK BIT(31) /* Disable SCK when TX disabled */
+#define SICTR_TSCKIZ_POL BIT(30) /* Transmit Clock Polarity */
+#define SICTR_RSCKIZ GENMASK(29, 28) /* Receive Clock Polarity Select */
+#define SICTR_RSCKIZ_SCK BIT(29) /* Must match CTR_TSCKIZ_SCK */
+#define SICTR_RSCKIZ_POL BIT(28) /* Receive Clock Polarity */
+#define SICTR_TEDG BIT(27) /* Transmit Timing (1 = falling edge) */
+#define SICTR_REDG BIT(26) /* Receive Timing (1 = falling edge) */
+#define SICTR_TXDIZ GENMASK(23, 22) /* Pin Output When TX is Disabled */
+#define SICTR_TXDIZ_LOW 0U /* 0 */
+#define SICTR_TXDIZ_HIGH 1U /* 1 */
+#define SICTR_TXDIZ_HIZ 2U /* High-impedance */
+#define SICTR_TSCKE BIT(15) /* Transmit Serial Clock Output Enable */
+#define SICTR_TFSE BIT(14) /* Transmit Frame Sync Signal Output Enable */
+#define SICTR_TXE BIT(9) /* Transmit Enable */
+#define SICTR_RXE BIT(8) /* Receive Enable */
+#define SICTR_TXRST BIT(1) /* Transmit Reset */
+#define SICTR_RXRST BIT(0) /* Receive Reset */
+
+/* SIFCTR */
+#define SIFCTR_TFWM GENMASK(31, 29) /* Transmit FIFO Watermark */
+#define SIFCTR_TFWM_64 0U /* Transfer Request when 64 empty stages */
+#define SIFCTR_TFWM_32 1U /* Transfer Request when 32 empty stages */
+#define SIFCTR_TFWM_24 2U /* Transfer Request when 24 empty stages */
+#define SIFCTR_TFWM_16 3U /* Transfer Request when 16 empty stages */
+#define SIFCTR_TFWM_12 4U /* Transfer Request when 12 empty stages */
+#define SIFCTR_TFWM_8 5U /* Transfer Request when 8 empty stages */
+#define SIFCTR_TFWM_4 6U /* Transfer Request when 4 empty stages */
+#define SIFCTR_TFWM_1 7U /* Transfer Request when 1 empty stage */
+#define SIFCTR_TFUA GENMASK(28, 20) /* Transmit FIFO Usable Area */
+#define SIFCTR_RFWM GENMASK(15, 13) /* Receive FIFO Watermark */
+#define SIFCTR_RFWM_1 0U /* Transfer Request when 1 valid stages */
+#define SIFCTR_RFWM_4 1U /* Transfer Request when 4 valid stages */
+#define SIFCTR_RFWM_8 2U /* Transfer Request when 8 valid stages */
+#define SIFCTR_RFWM_16 3U /* Transfer Request when 16 valid stages */
+#define SIFCTR_RFWM_32 4U /* Transfer Request when 32 valid stages */
+#define SIFCTR_RFWM_64 5U /* Transfer Request when 64 valid stages */
+#define SIFCTR_RFWM_128 6U /* Transfer Request when 128 valid stages */
+#define SIFCTR_RFWM_256 7U /* Transfer Request when 256 valid stages */
+#define SIFCTR_RFUA GENMASK(12, 4) /* Receive FIFO Usable Area (0x40 = full) */
+
+/* SISTR */
+#define SISTR_TFEMP BIT(29) /* Transmit FIFO Empty */
+#define SISTR_TDREQ BIT(28) /* Transmit Data Transfer Request */
+#define SISTR_TEOF BIT(23) /* Frame Transmission End */
+#define SISTR_TFSERR BIT(21) /* Transmit Frame Synchronization Error */
+#define SISTR_TFOVF BIT(20) /* Transmit FIFO Overflow */
+#define SISTR_TFUDF BIT(19) /* Transmit FIFO Underflow */
+#define SISTR_RFFUL BIT(13) /* Receive FIFO Full */
+#define SISTR_RDREQ BIT(12) /* Receive Data Transfer Request */
+#define SISTR_REOF BIT(7) /* Frame Reception End */
+#define SISTR_RFSERR BIT(5) /* Receive Frame Synchronization Error */
+#define SISTR_RFUDF BIT(4) /* Receive FIFO Underflow */
+#define SISTR_RFOVF BIT(3) /* Receive FIFO Overflow */
+
+/* SIIER */
+#define SIIER_TDMAE BIT(31) /* Transmit Data DMA Transfer Req. Enable */
+#define SIIER_TFEMPE BIT(29) /* Transmit FIFO Empty Enable */
+#define SIIER_TDREQE BIT(28) /* Transmit Data Transfer Request Enable */
+#define SIIER_TEOFE BIT(23) /* Frame Transmission End Enable */
+#define SIIER_TFSERRE BIT(21) /* Transmit Frame Sync Error Enable */
+#define SIIER_TFOVFE BIT(20) /* Transmit FIFO Overflow Enable */
+#define SIIER_TFUDFE BIT(19) /* Transmit FIFO Underflow Enable */
+#define SIIER_RDMAE BIT(15) /* Receive Data DMA Transfer Req. Enable */
+#define SIIER_RFFULE BIT(13) /* Receive FIFO Full Enable */
+#define SIIER_RDREQE BIT(12) /* Receive Data Transfer Request Enable */
+#define SIIER_REOFE BIT(7) /* Frame Reception End Enable */
+#define SIIER_RFSERRE BIT(5) /* Receive Frame Sync Error Enable */
+#define SIIER_RFUDFE BIT(4) /* Receive FIFO Underflow Enable */
+#define SIIER_RFOVFE BIT(3) /* Receive FIFO Overflow Enable */
+
enum {
MSIOF_SPI_HOST,
MSIOF_SPI_TARGET,
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 0ba5e49bace4..4789f91dae94 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -136,13 +136,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
* @max_speed_hz: Maximum clock rate to be used with this chip
* (on this board); may be changed by the device's driver.
* The spi_transfer.speed_hz can override this for each transfer.
- * @chip_select: Array of physical chipselect, spi->chipselect[i] gives
- * the corresponding physical CS for logical CS i.
- * @mode: The spi mode defines how data is clocked out and in.
- * This may be changed by the device's driver.
- * The "active low" default for chipselect mode can be overridden
- * (by specifying SPI_CS_HIGH) as can the "MSB first" default for
- * each word in a transfer (by specifying SPI_LSB_FIRST).
* @bits_per_word: Data transfers involve one or more words; word sizes
* like eight or 12 bits are common. In-memory wordsizes are
* powers of two bytes (e.g. 20 bit samples use 32 bits).
@@ -150,6 +143,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
* default (0) indicating protocol words are eight bit bytes.
* The spi_transfer.bits_per_word can override this for each transfer.
* @rt: Make the pump thread real time priority.
+ * @mode: The spi mode defines how data is clocked out and in.
+ * This may be changed by the device's driver.
+ * The "active low" default for chipselect mode can be overridden
+ * (by specifying SPI_CS_HIGH) as can the "MSB first" default for
+ * each word in a transfer (by specifying SPI_LSB_FIRST).
* @irq: Negative, or the number passed to request_irq() to receive
* interrupts from this device.
* @controller_state: Controller's runtime state
@@ -162,8 +160,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
* the device will bind to the named driver and only the named driver.
* Do not set directly, because core frees it; use driver_set_override() to
* set or clear it.
- * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines
- * (optional, NULL when not using a GPIO line)
+ * @pcpu_statistics: statistics for the spi_device
* @word_delay: delay to be inserted between consecutive
* words of a transfer
* @cs_setup: delay to be introduced by the controller after CS is asserted
@@ -171,8 +168,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
* @cs_inactive: delay to be introduced by the controller after CS is
* deasserted. If @cs_change_delay is used from @spi_transfer, then the
* two delays will be added up.
- * @pcpu_statistics: statistics for the spi_device
+ * @chip_select: Array of physical chipselect, spi->chipselect[i] gives
+ * the corresponding physical CS for logical CS i.
* @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array
+ * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines
+ * (optional, NULL when not using a GPIO line)
*
* A @spi_device is used to interchange data between an SPI target device
* (usually a discrete chip) and CPU memory.
@@ -187,7 +187,6 @@ struct spi_device {
struct device dev;
struct spi_controller *controller;
u32 max_speed_hz;
- u8 chip_select[SPI_CS_CNT_MAX];
u8 bits_per_word;
bool rt;
#define SPI_NO_TX BIT(31) /* No transmit wire */
@@ -218,23 +217,29 @@ struct spi_device {
void *controller_data;
char modalias[SPI_NAME_SIZE];
const char *driver_override;
- struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */
+
+ /* The statistics */
+ struct spi_statistics __percpu *pcpu_statistics;
+
struct spi_delay word_delay; /* Inter-word delay */
+
/* CS delays */
struct spi_delay cs_setup;
struct spi_delay cs_hold;
struct spi_delay cs_inactive;
- /* The statistics */
- struct spi_statistics __percpu *pcpu_statistics;
+ u8 chip_select[SPI_CS_CNT_MAX];
- /* Bit mask of the chipselect(s) that the driver need to use from
- * the chipselect array.When the controller is capable to handle
+ /*
+ * Bit mask of the chipselect(s) that the driver need to use from
+ * the chipselect array. When the controller is capable to handle
* multiple chip selects & memories are connected in parallel
* then more than one bit need to be set in cs_index_mask.
*/
u32 cs_index_mask : SPI_CS_CNT_MAX;
+ struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */
+
/*
* Likely need more hooks for more protocol options affecting how
* the controller talks to each chip, like:
@@ -249,10 +254,7 @@ struct spi_device {
static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0,
"SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap");
-static inline struct spi_device *to_spi_device(const struct device *dev)
-{
- return dev ? container_of(dev, struct spi_device, dev) : NULL;
-}
+#define to_spi_device(__dev) container_of_const(__dev, struct spi_device, dev)
/* Most drivers won't need to care about device refcounting */
static inline struct spi_device *spi_dev_get(struct spi_device *spi)
@@ -503,6 +505,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* found.
* @put_offload: release the offload instance acquired by @get_offload.
* @mem_caps: controller capabilities for the handling of memory operations.
+ * @dtr_caps: true if controller has dtr(single/dual transfer rate) capability.
+ * QSPI based controller should fill this based on controller's capability.
* @unprepare_message: undo any work done by prepare_message().
* @target_abort: abort the ongoing transfer request on an SPI target controller
* @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS
@@ -746,6 +750,9 @@ struct spi_controller {
const struct spi_controller_mem_ops *mem_ops;
const struct spi_controller_mem_caps *mem_caps;
+ /* SPI or QSPI controller can set to true if supports SDR/DDR transfer rate */
+ bool dtr_caps;
+
struct spi_offload *(*get_offload)(struct spi_device *spi,
const struct spi_offload_config *config);
void (*put_offload)(struct spi_offload *offload);
@@ -998,6 +1005,7 @@ struct spi_res {
* processed the word, i.e. the "pre" timestamp should be taken before
* transmitting the "pre" word, and the "post" timestamp after receiving
* transmit confirmation from the controller for the "post" word.
+ * @dtr_mode: true if supports double transfer rate.
* @timestamped: true if the transfer has been timestamped
* @error: Error status logged by SPI controller driver.
*
@@ -1049,6 +1057,9 @@ struct spi_res {
* two should both be set. User can set transfer mode with SPI_NBITS_SINGLE(1x)
* SPI_NBITS_DUAL(2x) and SPI_NBITS_QUAD(4x) to support these three transfer.
*
+ * User may also set dtr_mode to true to use dual transfer mode if desired. if
+ * not, default considered as single transfer mode.
+ *
* The code that submits an spi_message (and its spi_transfers)
* to the lower layers is responsible for managing its memory.
* Zero-initialize every field you don't set up explicitly, to
@@ -1083,6 +1094,7 @@ struct spi_transfer {
unsigned tx_nbits:4;
unsigned rx_nbits:4;
unsigned timestamped:1;
+ bool dtr_mode;
#define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */
#define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */
#define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */
@@ -1326,6 +1338,32 @@ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw)
}
/**
+ * spi_bpw_to_bytes - Covert bits per word to bytes
+ * @bpw: Bits per word
+ *
+ * This function converts the given @bpw to bytes. The result is always
+ * power-of-two, e.g.,
+ *
+ * =============== =================
+ * Input (in bits) Output (in bytes)
+ * =============== =================
+ * 5 1
+ * 9 2
+ * 21 4
+ * 37 8
+ * =============== =================
+ *
+ * It will return 0 for the 0 input.
+ *
+ * Returns:
+ * Bytes for the given @bpw.
+ */
+static inline u32 spi_bpw_to_bytes(u32 bpw)
+{
+ return roundup_pow_of_two(BITS_TO_BYTES(bpw));
+}
+
+/**
* spi_controller_xfer_timeout - Compute a suitable timeout value
* @ctlr: SPI device
* @xfer: Transfer descriptor
diff --git a/include/linux/stat.h b/include/linux/stat.h
index be7496a6a0dd..e3d00e7bb26d 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -57,6 +57,7 @@ struct kstat {
u32 dio_read_offset_align;
u32 atomic_write_unit_min;
u32 atomic_write_unit_max;
+ u32 atomic_write_unit_max_opt;
u32 atomic_write_segments_max;
};
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index e93fbb5b0c01..3fb88a1e9898 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -31,6 +31,7 @@ enum string_size_units {
int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
char *buf, int len);
+int parse_int_array(const char *buf, size_t count, int **array);
int parse_int_array_user(const char __user *from, size_t count, int **array);
#define UNESCAPE_SPACE BIT(0)
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index da6ebca3ff77..b1c76c8f2c82 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -298,6 +298,11 @@ static inline void s2idle_set_ops(const struct platform_s2idle_ops *ops) {}
static inline void s2idle_wake(void) {}
#endif /* !CONFIG_SUSPEND */
+static inline bool pm_suspend_in_progress(void)
+{
+ return pm_suspend_target_state != PM_SUSPEND_ON;
+}
+
/* struct pbe is used for creating lists of pages that should be restored
* atomically during the resume from disk, because the page frames they have
* occupied before the suspend are in use.
@@ -470,6 +475,8 @@ extern void pm_print_active_wakeup_sources(void);
extern unsigned int lock_system_sleep(void);
extern void unlock_system_sleep(unsigned int);
+extern bool pm_sleep_transition_in_progress(void);
+
#else /* !CONFIG_PM_SLEEP */
static inline int register_pm_notifier(struct notifier_block *nb)
@@ -498,6 +505,8 @@ static inline void pm_system_irq_wakeup(unsigned int irq_number) {}
static inline unsigned int lock_system_sleep(void) { return 0; }
static inline void unlock_system_sleep(unsigned int flags) {}
+static inline bool pm_sleep_transition_in_progress(void) { return false; }
+
#endif /* !CONFIG_PM_SLEEP */
#ifdef CONFIG_PM_SLEEP_DEBUG
diff --git a/include/linux/tick.h b/include/linux/tick.h
index b8ddc8e631a3..ac76ae9fa36d 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -195,12 +195,6 @@ static inline bool tick_nohz_full_enabled(void)
__ret; \
})
-static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
-{
- if (tick_nohz_full_enabled())
- cpumask_or(mask, mask, tick_nohz_full_mask);
-}
-
extern void tick_nohz_dep_set(enum tick_dep_bits bit);
extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
@@ -281,7 +275,6 @@ extern void __init tick_nohz_full_setup(cpumask_var_t cpumask);
#else
static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
-static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 10596d7c3a34..f636f55c427d 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -67,44 +67,44 @@
/*
* LOCKDEP and DEBUG timer interfaces.
*/
-void init_timer_key(struct timer_list *timer,
+void timer_init_key(struct timer_list *timer,
void (*func)(struct timer_list *), unsigned int flags,
const char *name, struct lock_class_key *key);
#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
-extern void init_timer_on_stack_key(struct timer_list *timer,
+extern void timer_init_key_on_stack(struct timer_list *timer,
void (*func)(struct timer_list *),
unsigned int flags, const char *name,
struct lock_class_key *key);
#else
-static inline void init_timer_on_stack_key(struct timer_list *timer,
+static inline void timer_init_key_on_stack(struct timer_list *timer,
void (*func)(struct timer_list *),
unsigned int flags,
const char *name,
struct lock_class_key *key)
{
- init_timer_key(timer, func, flags, name, key);
+ timer_init_key(timer, func, flags, name, key);
}
#endif
#ifdef CONFIG_LOCKDEP
-#define __init_timer(_timer, _fn, _flags) \
+#define __timer_init(_timer, _fn, _flags) \
do { \
static struct lock_class_key __key; \
- init_timer_key((_timer), (_fn), (_flags), #_timer, &__key);\
+ timer_init_key((_timer), (_fn), (_flags), #_timer, &__key);\
} while (0)
-#define __init_timer_on_stack(_timer, _fn, _flags) \
+#define __timer_init_on_stack(_timer, _fn, _flags) \
do { \
static struct lock_class_key __key; \
- init_timer_on_stack_key((_timer), (_fn), (_flags), \
+ timer_init_key_on_stack((_timer), (_fn), (_flags), \
#_timer, &__key); \
} while (0)
#else
-#define __init_timer(_timer, _fn, _flags) \
- init_timer_key((_timer), (_fn), (_flags), NULL, NULL)
-#define __init_timer_on_stack(_timer, _fn, _flags) \
- init_timer_on_stack_key((_timer), (_fn), (_flags), NULL, NULL)
+#define __timer_init(_timer, _fn, _flags) \
+ timer_init_key((_timer), (_fn), (_flags), NULL, NULL)
+#define __timer_init_on_stack(_timer, _fn, _flags) \
+ timer_init_key_on_stack((_timer), (_fn), (_flags), NULL, NULL)
#endif
/**
@@ -115,18 +115,18 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
*
* Regular timer initialization should use either DEFINE_TIMER() above,
* or timer_setup(). For timers on the stack, timer_setup_on_stack() must
- * be used and must be balanced with a call to destroy_timer_on_stack().
+ * be used and must be balanced with a call to timer_destroy_on_stack().
*/
#define timer_setup(timer, callback, flags) \
- __init_timer((timer), (callback), (flags))
+ __timer_init((timer), (callback), (flags))
#define timer_setup_on_stack(timer, callback, flags) \
- __init_timer_on_stack((timer), (callback), (flags))
+ __timer_init_on_stack((timer), (callback), (flags))
#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
-extern void destroy_timer_on_stack(struct timer_list *timer);
+extern void timer_destroy_on_stack(struct timer_list *timer);
#else
-static inline void destroy_timer_on_stack(struct timer_list *timer) { }
+static inline void timer_destroy_on_stack(struct timer_list *timer) { }
#endif
#define from_timer(var, callback_timer, timer_fieldname) \
@@ -156,28 +156,26 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires);
* The jiffies value which is added to now, when there is no timer
* in the timer wheel:
*/
-#define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1)
+#define TIMER_NEXT_MAX_DELTA ((1UL << 30) - 1)
extern void add_timer(struct timer_list *timer);
extern void add_timer_local(struct timer_list *timer);
extern void add_timer_global(struct timer_list *timer);
-extern int try_to_del_timer_sync(struct timer_list *timer);
+extern int timer_delete_sync_try(struct timer_list *timer);
extern int timer_delete_sync(struct timer_list *timer);
extern int timer_delete(struct timer_list *timer);
extern int timer_shutdown_sync(struct timer_list *timer);
extern int timer_shutdown(struct timer_list *timer);
-extern void init_timers(void);
+extern void timers_init(void);
struct hrtimer;
extern enum hrtimer_restart it_real_fn(struct hrtimer *);
-unsigned long __round_jiffies(unsigned long j, int cpu);
unsigned long __round_jiffies_relative(unsigned long j, int cpu);
unsigned long round_jiffies(unsigned long j);
unsigned long round_jiffies_relative(unsigned long j);
-unsigned long __round_jiffies_up(unsigned long j, int cpu);
unsigned long __round_jiffies_up_relative(unsigned long j, int cpu);
unsigned long round_jiffies_up(unsigned long j);
unsigned long round_jiffies_up_relative(unsigned long j);
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 24e715f0f6d2..cd6b4bdc9cfd 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -332,4 +332,13 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops)
!IS_ERR_OR_NULL(mask); \
__hops++)
+DECLARE_PER_CPU(unsigned long, cpu_scale);
+
+static inline unsigned long topology_get_cpu_scale(int cpu)
+{
+ return per_cpu(cpu_scale, cpu);
+}
+
+void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity);
+
#endif /* _LINUX_TOPOLOGY_H */
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 6c3125300c00..a3d8305e88a5 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -224,7 +224,7 @@ enum tpm2_const {
enum tpm2_timeouts {
TPM2_TIMEOUT_A = 750,
- TPM2_TIMEOUT_B = 2000,
+ TPM2_TIMEOUT_B = 4000,
TPM2_TIMEOUT_C = 200,
TPM2_TIMEOUT_D = 30,
TPM2_DURATION_SHORT = 20,
@@ -257,6 +257,7 @@ enum tpm2_return_codes {
TPM2_RC_TESTING = 0x090A, /* RC_WARN */
TPM2_RC_REFERENCE_H0 = 0x0910,
TPM2_RC_RETRY = 0x0922,
+ TPM2_RC_SESSION_MEMORY = 0x0903,
};
enum tpm2_command_codes {
@@ -437,6 +438,24 @@ static inline u32 tpm2_rc_value(u32 rc)
return (rc & BIT(7)) ? rc & 0xbf : rc;
}
+/*
+ * Convert a return value from tpm_transmit_cmd() to POSIX error code.
+ */
+static inline ssize_t tpm_ret_to_err(ssize_t ret)
+{
+ if (ret < 0)
+ return ret;
+
+ switch (tpm2_rc_value(ret)) {
+ case TPM2_RC_SUCCESS:
+ return 0;
+ case TPM2_RC_SESSION_MEMORY:
+ return -ENOMEM;
+ default:
+ return -EFAULT;
+ }
+}
+
#if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE)
extern int tpm_is_tpm2(struct tpm_chip *chip);
diff --git a/include/linux/tpm_svsm.h b/include/linux/tpm_svsm.h
new file mode 100644
index 000000000000..38e341f9761a
--- /dev/null
+++ b/include/linux/tpm_svsm.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 James.Bottomley@HansenPartnership.com
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ *
+ * Helpers for the SVSM_VTPM_CMD calls used by the vTPM protocol defined by the
+ * AMD SVSM spec [1].
+ *
+ * The vTPM protocol follows the Official TPM 2.0 Reference Implementation
+ * (originally by Microsoft, now part of the TCG) simulator protocol.
+ *
+ * [1] "Secure VM Service Module for SEV-SNP Guests"
+ * Publication # 58019 Revision: 1.00
+ */
+#ifndef _TPM_SVSM_H_
+#define _TPM_SVSM_H_
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define SVSM_VTPM_MAX_BUFFER 4096 /* max req/resp buffer size */
+
+/**
+ * struct svsm_vtpm_request - Generic request for single word command
+ * @cmd: The command to send
+ *
+ * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" -
+ * Table 15: vTPM Common Request/Response Structure
+ * Byte Size     In/Out    Description
+ * Offset    (Bytes)
+ * 0x000     4          In        Platform command
+ *                         Out       Platform command response size
+ */
+struct svsm_vtpm_request {
+ u32 cmd;
+};
+
+/**
+ * struct svsm_vtpm_response - Generic response
+ * @size: The response size (zero if nothing follows)
+ *
+ * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" -
+ * Table 15: vTPM Common Request/Response Structure
+ * Byte Size     In/Out    Description
+ * Offset    (Bytes)
+ * 0x000     4          In        Platform command
+ *                         Out       Platform command response size
+ *
+ * Note: most TCG Simulator commands simply return zero here with no indication
+ * of success or failure.
+ */
+struct svsm_vtpm_response {
+ u32 size;
+};
+
+/**
+ * struct svsm_vtpm_cmd_request - Structure for a TPM_SEND_COMMAND request
+ * @cmd: The command to send (must be TPM_SEND_COMMAND)
+ * @locality: The locality
+ * @buf_size: The size of the input buffer following
+ * @buf: A buffer of size buf_size
+ *
+ * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" -
+ * Table 16: TPM_SEND_COMMAND Request Structure
+ * Byte Size Meaning
+ * Offset    (Bytes)
+ * 0x000     4          Platform command (8)
+ * 0x004     1          Locality (must-be-0)
+ * 0x005     4          TPM Command size (in bytes)
+ * 0x009     Variable   TPM Command
+ *
+ * Note: the TCG Simulator expects @buf_size to be equal to the size of the
+ * specific TPM command, otherwise an TPM_RC_COMMAND_SIZE error is returned.
+ */
+struct svsm_vtpm_cmd_request {
+ u32 cmd;
+ u8 locality;
+ u32 buf_size;
+ u8 buf[];
+} __packed;
+
+/**
+ * struct svsm_vtpm_cmd_response - Structure for a TPM_SEND_COMMAND response
+ * @buf_size: The size of the output buffer following
+ * @buf: A buffer of size buf_size
+ *
+ * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" -
+ * Table 17: TPM_SEND_COMMAND Response Structure
+ * Byte Size Meaning
+ * Offset    (Bytes)
+ * 0x000     4          Response size (in bytes)
+ * 0x004     Variable   Response
+ */
+struct svsm_vtpm_cmd_response {
+ u32 buf_size;
+ u8 buf[];
+};
+
+/**
+ * svsm_vtpm_cmd_request_fill() - Fill a TPM_SEND_COMMAND request to be sent to SVSM
+ * @req: The struct svsm_vtpm_cmd_request to fill
+ * @locality: The locality
+ * @buf: The buffer from where to copy the payload of the command
+ * @len: The size of the buffer
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+static inline int
+svsm_vtpm_cmd_request_fill(struct svsm_vtpm_cmd_request *req, u8 locality,
+ const u8 *buf, size_t len)
+{
+ if (len > SVSM_VTPM_MAX_BUFFER - sizeof(*req))
+ return -EINVAL;
+
+ req->cmd = 8; /* TPM_SEND_COMMAND */
+ req->locality = locality;
+ req->buf_size = len;
+
+ memcpy(req->buf, buf, len);
+
+ return 0;
+}
+
+/**
+ * svsm_vtpm_cmd_response_parse() - Parse a TPM_SEND_COMMAND response received from SVSM
+ * @resp: The struct svsm_vtpm_cmd_response to parse
+ * @buf: The buffer where to copy the response
+ * @len: The size of the buffer
+ *
+ * Return: buffer size filled with the response on success, negative error
+ * code on failure.
+ */
+static inline int
+svsm_vtpm_cmd_response_parse(const struct svsm_vtpm_cmd_response *resp, u8 *buf,
+ size_t len)
+{
+ if (len < resp->buf_size)
+ return -E2BIG;
+
+ if (resp->buf_size > SVSM_VTPM_MAX_BUFFER - sizeof(*resp))
+ return -EINVAL; // Invalid response from the platform TPM
+
+ memcpy(buf, resp->buf, resp->buf_size);
+
+ return resp->buf_size;
+}
+
+#endif /* _TPM_SVSM_H_ */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 5ca8d4dd149d..a40a905e5e1b 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -169,8 +169,13 @@ void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_m
int node, const void *caller) __alloc_size(1);
#define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__))
-void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
-#define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__))
+void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) __alloc_size(1);
+#define vmalloc_huge_node(...) alloc_hooks(vmalloc_huge_node_noprof(__VA_ARGS__))
+
+static inline void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
+{
+ return vmalloc_huge_node(size, gfp_mask, NUMA_NO_NODE);
+}
extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
#define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__))
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b0dc957c3e56..6e30f275da77 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -316,7 +316,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
#define __INIT_DELAYED_WORK(_work, _func, _tflags) \
do { \
INIT_WORK(&(_work)->work, (_func)); \
- __init_timer(&(_work)->timer, \
+ __timer_init(&(_work)->timer, \
delayed_work_timer_fn, \
(_tflags) | TIMER_IRQSAFE); \
} while (0)
@@ -324,7 +324,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
#define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags) \
do { \
INIT_WORK_ONSTACK(&(_work)->work, (_func)); \
- __init_timer_on_stack(&(_work)->timer, \
+ __timer_init_on_stack(&(_work)->timer, \
delayed_work_timer_fn, \
(_tflags) | TIMER_IRQSAFE); \
} while (0)
@@ -480,7 +480,7 @@ void workqueue_softirq_dead(unsigned int cpu);
* executing at most one work item for the workqueue.
*
* For unbound workqueues, @max_active limits the number of in-flight work items
- * for the whole system. e.g. @max_active of 16 indicates that that there can be
+ * for the whole system. e.g. @max_active of 16 indicates that there can be
* at most 16 work items executing for the workqueue in the whole system.
*
* As sharing the same active counter for an unbound workqueue across multiple
diff --git a/include/memory/renesas-rpc-if.h b/include/memory/renesas-rpc-if.h
index b8fa30fd6b50..53663c4e5ae3 100644
--- a/include/memory/renesas-rpc-if.h
+++ b/include/memory/renesas-rpc-if.h
@@ -61,12 +61,14 @@ enum rpcif_type {
RPCIF_RCAR_GEN3,
RPCIF_RCAR_GEN4,
RPCIF_RZ_G2L,
+ XSPI_RZ_G3E,
};
struct rpcif {
struct device *dev;
void __iomem *dirmap;
size_t size;
+ bool xspi;
};
int rpcif_sw_init(struct rpcif *rpc, struct device *dev);
@@ -75,5 +77,7 @@ void rpcif_prepare(struct device *dev, const struct rpcif_op *op, u64 *offs,
size_t *len);
int rpcif_manual_xfer(struct device *dev);
ssize_t rpcif_dirmap_read(struct device *dev, u64 offs, size_t len, void *buf);
+ssize_t xspi_dirmap_write(struct device *dev, u64 offs, size_t len,
+ const void *buf);
#endif // __RENESAS_RPC_IF_H
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 522d837a23fa..54bfeeaa0995 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1798,6 +1798,7 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
void hci_uuids_clear(struct hci_dev *hdev);
void hci_link_keys_clear(struct hci_dev *hdev);
+u8 *hci_conn_key_enc_size(struct hci_conn *conn);
struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr);
struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn,
bdaddr_t *bdaddr, u8 *val, u8 type,
diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h
index c316b551df8d..0ee5bc766810 100644
--- a/include/net/netdev_lock.h
+++ b/include/net/netdev_lock.h
@@ -98,6 +98,9 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a,
&qdisc_xmit_lock_key); \
}
+#define netdev_lock_dereference(p, dev) \
+ rcu_dereference_protected(p, lockdep_is_held(&(dev)->lock))
+
int netdev_debug_event(struct notifier_block *nb, unsigned long event,
void *ptr);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d48c657191cd..1c05fed05f2b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1031,6 +1031,21 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
return skb;
}
+static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool direct)
+{
+ struct sk_buff *skb;
+
+ skb = __skb_dequeue(&sch->gso_skb);
+ if (skb) {
+ sch->q.qlen--;
+ return skb;
+ }
+ if (direct)
+ return __qdisc_dequeue_head(&sch->q);
+ else
+ return sch->dequeue(sch);
+}
+
static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
{
struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 39365fd2ea17..06ab2a3d2ebd 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -236,7 +236,6 @@ struct xfrm_state {
/* Data for encapsulator */
struct xfrm_encap_tmpl *encap;
- struct sock __rcu *encap_sk;
/* NAT keepalive */
u32 nat_keepalive_interval; /* seconds */
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 26bc23419cfd..c53812b9026f 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -670,8 +670,6 @@ struct Scsi_Host {
/* The transport requires the LUN bits NOT to be stored in CDB[1] */
unsigned no_scsi2_lun_in_cdb:1;
- unsigned no_highmem:1;
-
/*
* Optional work queue to be utilized by the transport
*/
diff --git a/include/sound/core.h b/include/sound/core.h
index 1f3f5dccd736..64327e971122 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -326,7 +326,6 @@ void snd_device_disconnect(struct snd_card *card, void *device_data);
void snd_device_disconnect_all(struct snd_card *card);
void snd_device_free(struct snd_card *card, void *device_data);
void snd_device_free_all(struct snd_card *card);
-int snd_device_get_state(struct snd_card *card, void *device_data);
/* isadma.c */
diff --git a/include/sound/cs-amp-lib.h b/include/sound/cs-amp-lib.h
index f481148735e1..5459c221badf 100644
--- a/include/sound/cs-amp-lib.h
+++ b/include/sound/cs-amp-lib.h
@@ -23,7 +23,7 @@ struct cirrus_amp_cal_data {
struct cirrus_amp_efi_data {
u32 size;
u32 count;
- struct cirrus_amp_cal_data data[];
+ struct cirrus_amp_cal_data data[] __counted_by(count);
} __packed;
/**
diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h
index 5d653a3491d0..e17c4cadd04d 100644
--- a/include/sound/cs35l56.h
+++ b/include/sound/cs35l56.h
@@ -71,6 +71,8 @@
#define CS35L56_DSP_VIRTUAL1_MBOX_6 0x0011034
#define CS35L56_DSP_VIRTUAL1_MBOX_7 0x0011038
#define CS35L56_DSP_VIRTUAL1_MBOX_8 0x001103C
+#define CS35L56_DIE_STS1 0x0017040
+#define CS35L56_DIE_STS2 0x0017044
#define CS35L56_DSP_RESTRICT_STS1 0x00190F0
#define CS35L56_DSP1_XMEM_PACKED_0 0x2000000
#define CS35L56_DSP1_XMEM_PACKED_6143 0x2005FFC
@@ -104,6 +106,15 @@
#define CS35L56_DSP1_PMEM_0 0x3800000
#define CS35L56_DSP1_PMEM_5114 0x3804FE8
+#define CS35L63_DSP1_FW_VER CS35L56_DSP1_FW_VER
+#define CS35L63_DSP1_HALO_STATE 0x280396C
+#define CS35L63_DSP1_PM_CUR_STATE 0x28042C8
+#define CS35L63_PROTECTION_STATUS 0x340009C
+#define CS35L63_TRANSDUCER_ACTUAL_PS 0x34000F4
+#define CS35L63_MAIN_RENDER_USER_MUTE 0x3400020
+#define CS35L63_MAIN_RENDER_USER_VOLUME 0x3400028
+#define CS35L63_MAIN_POSTURE_NUMBER 0x3400068
+
/* DEVID */
#define CS35L56_DEVID_MASK 0x00FFFFFF
@@ -267,6 +278,17 @@ struct cs35l56_spi_payload {
} __packed;
static_assert(sizeof(struct cs35l56_spi_payload) == 10);
+struct cs35l56_fw_reg {
+ unsigned int fw_ver;
+ unsigned int halo_state;
+ unsigned int pm_cur_stat;
+ unsigned int prot_sts;
+ unsigned int transducer_actual_ps;
+ unsigned int user_mute;
+ unsigned int user_volume;
+ unsigned int posture_number;
+};
+
struct cs35l56_base {
struct device *dev;
struct regmap *regmap;
@@ -283,6 +305,7 @@ struct cs35l56_base {
struct cirrus_amp_cal_data cal_data;
struct gpio_desc *reset_gpio;
struct cs35l56_spi_payload *spi_payload_buf;
+ const struct cs35l56_fw_reg *fw_reg;
};
static inline bool cs35l56_is_otp_register(unsigned int reg)
@@ -310,6 +333,11 @@ static inline bool cs35l56_is_spi(struct cs35l56_base *cs35l56)
extern const struct regmap_config cs35l56_regmap_i2c;
extern const struct regmap_config cs35l56_regmap_spi;
extern const struct regmap_config cs35l56_regmap_sdw;
+extern const struct regmap_config cs35l63_regmap_i2c;
+extern const struct regmap_config cs35l63_regmap_sdw;
+
+extern const struct cs35l56_fw_reg cs35l56_fw_reg;
+extern const struct cs35l56_fw_reg cs35l63_fw_reg;
extern const struct cirrus_amp_cal_controls cs35l56_calibration_controls;
@@ -332,6 +360,7 @@ void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_ds
int cs35l56_get_calibration(struct cs35l56_base *cs35l56_base);
int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base,
bool *fw_missing, unsigned int *fw_version);
+void cs35l56_log_tuning(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_dsp);
int cs35l56_hw_init(struct cs35l56_base *cs35l56_base);
int cs35l56_get_speaker_id(struct cs35l56_base *cs35l56_base);
int cs35l56_get_bclk_freq_id(unsigned int freq);
diff --git a/include/sound/cs42l52.h b/include/sound/cs42l52.h
deleted file mode 100644
index c20649666abe..000000000000
--- a/include/sound/cs42l52.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/sound/cs42l52.h -- Platform data for CS42L52
- *
- * Copyright (c) 2012 Cirrus Logic Inc.
- */
-
-#ifndef __CS42L52_H
-#define __CS42L52_H
-
-struct cs42l52_platform_data {
-
- /* MICBIAS Level. Check datasheet Pg48 */
- unsigned int micbias_lvl;
-
- /* MICA mode selection Differential or Single-ended */
- bool mica_diff_cfg;
-
- /* MICB mode selection Differential or Single-ended */
- bool micb_diff_cfg;
-
- /* Charge Pump Freq. Check datasheet Pg73 */
- unsigned int chgfreq;
-
- /* Reset GPIO */
- unsigned int reset_gpio;
-};
-
-#endif /* __CS42L52_H */
diff --git a/include/sound/cs42l56.h b/include/sound/cs42l56.h
deleted file mode 100644
index 62e9f7a3b414..000000000000
--- a/include/sound/cs42l56.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/sound/cs42l56.h -- Platform data for CS42L56
- *
- * Copyright (c) 2014 Cirrus Logic Inc.
- */
-
-#ifndef __CS42L56_H
-#define __CS42L56_H
-
-struct cs42l56_platform_data {
-
- /* GPIO for Reset */
- unsigned int gpio_nreset;
-
- /* MICBIAS Level. Check datasheet Pg48 */
- unsigned int micbias_lvl;
-
- /* Analog Input 1A Reference 0=Single 1=Pseudo-Differential */
- unsigned int ain1a_ref_cfg;
-
- /* Analog Input 2A Reference 0=Single 1=Pseudo-Differential */
- unsigned int ain2a_ref_cfg;
-
- /* Analog Input 1B Reference 0=Single 1=Pseudo-Differential */
- unsigned int ain1b_ref_cfg;
-
- /* Analog Input 2B Reference 0=Single 1=Pseudo-Differential */
- unsigned int ain2b_ref_cfg;
-
- /* Charge Pump Freq. Check datasheet Pg62 */
- unsigned int chgfreq;
-
- /* HighPass Filter Right Channel Corner Frequency */
- unsigned int hpfb_freq;
-
- /* HighPass Filter Left Channel Corner Frequency */
- unsigned int hpfa_freq;
-
- /* Adaptive Power Control for LO/HP */
- unsigned int adaptive_pwr;
-
-};
-
-#endif /* __CS42L56_H */
diff --git a/include/sound/cs42l73.h b/include/sound/cs42l73.h
deleted file mode 100644
index 5a93393b6124..000000000000
--- a/include/sound/cs42l73.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/sound/cs42l73.h -- Platform data for CS42L73
- *
- * Copyright (c) 2012 Cirrus Logic Inc.
- */
-
-#ifndef __CS42L73_H
-#define __CS42L73_H
-
-struct cs42l73_platform_data {
- /* RST GPIO */
- unsigned int reset_gpio;
- unsigned int chgfreq;
- int jack_detection;
- unsigned int mclk_freq;
-};
-
-#endif /* __CS42L73_H */
diff --git a/include/sound/cs48l32.h b/include/sound/cs48l32.h
new file mode 100644
index 000000000000..27b3e7cf999a
--- /dev/null
+++ b/include/sound/cs48l32.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Register definitions for Cirrus Logic CS48L32
+ *
+ * Copyright (C) 2017-2018, 2020, 2022, 2025 Cirrus Logic, Inc. and
+ * Cirrus Logic International Semiconductor Ltd.
+ */
+
+#ifndef CS48L32_H
+#define CS48L32_H
+
+/* pll_id for snd_soc_component_set_pll() */
+#define CS48L32_FLL1_REFCLK 1
+
+/* source for snd_soc_component_set_pll() */
+#define CS48L32_FLL_SRC_NONE -1
+#define CS48L32_FLL_SRC_MCLK1 0
+#define CS48L32_FLL_SRC_PDMCLK 5
+#define CS48L32_FLL_SRC_ASP1_BCLK 8
+#define CS48L32_FLL_SRC_ASP2_BCLK 9
+#define CS48L32_FLL_SRC_ASP1_FSYNC 12
+#define CS48L32_FLL_SRC_ASP2_FSYNC 13
+
+/* clk_id for snd_soc_component_set_sysclk() and snd_soc_dai_set_sysclk() */
+#define CS48L32_CLK_SYSCLK_1 1
+#define CS48L32_CLK_SYSCLK_2 2
+#define CS48L32_CLK_SYSCLK_3 3
+#define CS48L32_CLK_SYSCLK_4 4
+#define CS48L32_CLK_DSPCLK 7
+#define CS48L32_CLK_PDM_FLLCLK 13
+
+/* source for snd_soc_component_set_sysclk() */
+#define CS48L32_CLK_SRC_MCLK1 0x0
+#define CS48L32_CLK_SRC_FLL1 0x4
+#define CS48L32_CLK_SRC_ASP1_BCLK 0x8
+#define CS48L32_CLK_SRC_ASP2_BCLK 0x9
+
+struct cs48l32 {
+ struct regmap *regmap;
+ struct device *dev;
+ struct gpio_desc *reset_gpio;
+ struct clk *mclk1;
+ struct regulator_bulk_data core_supplies[2];
+ struct regulator *vdd_d;
+ int irq;
+};
+#endif
diff --git a/include/sound/cs48l32_registers.h b/include/sound/cs48l32_registers.h
new file mode 100644
index 000000000000..f29410fdf76f
--- /dev/null
+++ b/include/sound/cs48l32_registers.h
@@ -0,0 +1,530 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Register definitions for Cirrus Logic CS48L32
+ *
+ * Copyright (C) 2017-2018, 2020, 2022, 2025 Cirrus Logic, Inc. and
+ * Cirrus Logic International Semiconductor Ltd.
+ */
+
+#ifndef CS48L32_REGISTERS_H
+#define CS48L32_REGISTERS_H
+
+/* Register Addresses. */
+#define CS48L32_DEVID 0x0
+#define CS48L32_REVID 0x4
+#define CS48L32_OTPID 0x10
+#define CS48L32_SFT_RESET 0x20
+#define CS48L32_CTRL_IF_DEBUG3 0xA8
+#define CS48L32_MCU_CTRL1 0x804
+#define CS48L32_GPIO1_CTRL1 0xc08
+#define CS48L32_GPIO3_CTRL1 0xc10
+#define CS48L32_GPIO7_CTRL1 0xc20
+#define CS48L32_GPIO16_CTRL1 0xc44
+#define CS48L32_OUTPUT_SYS_CLK 0x1020
+#define CS48L32_AUXPDM_CTRL 0x1044
+#define CS48L32_AUXPDM_CTRL2 0x105c
+#define CS48L32_CLOCK32K 0x1400
+#define CS48L32_SYSTEM_CLOCK1 0x1404
+#define CS48L32_SYSTEM_CLOCK2 0x1408
+#define CS48L32_SAMPLE_RATE1 0x1420
+#define CS48L32_SAMPLE_RATE2 0x1424
+#define CS48L32_SAMPLE_RATE3 0x1428
+#define CS48L32_SAMPLE_RATE4 0x142c
+#define CS48L32_DSP_CLOCK1 0x1510
+#define CS48L32_FLL1_CONTROL1 0x1c00
+#define CS48L32_FLL1_CONTROL5 0x1c10
+#define CS48L32_FLL1_CONTROL6 0x1c14
+#define CS48L32_FLL1_GPIO_CLOCK 0x1ca0
+#define CS48L32_CHARGE_PUMP1 0x2000
+#define CS48L32_LDO2_CTRL1 0x2408
+#define CS48L32_MICBIAS_CTRL1 0x2410
+#define CS48L32_MICBIAS_CTRL5 0x2418
+#define CS48L32_IRQ1_CTRL_AOD 0x2710
+#define CS48L32_AOD_PAD_CTRL 0x2718
+#define CS48L32_INPUT_CONTROL 0x4000
+#define CS48L32_INPUT_STATUS 0x4004
+#define CS48L32_INPUT_RATE_CONTROL 0x4008
+#define CS48L32_INPUT_CONTROL2 0x400c
+#define CS48L32_INPUT_CONTROL3 0x4014
+#define CS48L32_INPUT1_CONTROL1 0x4020
+#define CS48L32_IN1L_CONTROL1 0x4024
+#define CS48L32_IN1L_CONTROL2 0x4028
+#define CS48L32_IN1R_CONTROL1 0x4044
+#define CS48L32_IN1R_CONTROL2 0x4048
+#define CS48L32_INPUT2_CONTROL1 0x4060
+#define CS48L32_IN2L_CONTROL1 0x4064
+#define CS48L32_IN2L_CONTROL2 0x4068
+#define CS48L32_IN2R_CONTROL1 0x4084
+#define CS48L32_IN2R_CONTROL2 0x4088
+#define CS48L32_INPUT_HPF_CONTROL 0x4244
+#define CS48L32_INPUT_VOL_CONTROL 0x4248
+#define CS48L32_AUXPDM_CONTROL1 0x4300
+#define CS48L32_AUXPDM_CONTROL2 0x4304
+#define CS48L32_AUXPDM1_CONTROL1 0x4308
+#define CS48L32_AUXPDM2_CONTROL1 0x4310
+#define CS48L32_ADC1L_ANA_CONTROL1 0x4688
+#define CS48L32_ADC1R_ANA_CONTROL1 0x468c
+#define CS48L32_ASP1_ENABLES1 0x6000
+#define CS48L32_ASP1_CONTROL3 0x600C
+#define CS48L32_ASP1_DATA_CONTROL5 0x6040
+#define CS48L32_ASP2_ENABLES1 0x6080
+#define CS48L32_ASP2_CONTROL3 0x608C
+#define CS48L32_ASP2_DATA_CONTROL5 0x60c0
+#define CS48L32_ASP1TX1_INPUT1 0x8200
+#define CS48L32_ASP1TX2_INPUT1 0x8210
+#define CS48L32_ASP1TX3_INPUT1 0x8220
+#define CS48L32_ASP1TX4_INPUT1 0x8230
+#define CS48L32_ASP1TX5_INPUT1 0x8240
+#define CS48L32_ASP1TX6_INPUT1 0x8250
+#define CS48L32_ASP1TX7_INPUT1 0x8260
+#define CS48L32_ASP1TX8_INPUT1 0x8270
+#define CS48L32_ASP1TX8_INPUT4 0x827c
+#define CS48L32_ASP2TX1_INPUT1 0x8300
+#define CS48L32_ASP2TX2_INPUT1 0x8310
+#define CS48L32_ASP2TX3_INPUT1 0x8320
+#define CS48L32_ASP2TX4_INPUT1 0x8330
+#define CS48L32_ASP2TX4_INPUT4 0x833c
+#define CS48L32_ISRC1INT1_INPUT1 0x8980
+#define CS48L32_ISRC1INT2_INPUT1 0x8990
+#define CS48L32_ISRC1INT3_INPUT1 0x89a0
+#define CS48L32_ISRC1INT4_INPUT1 0x89b0
+#define CS48L32_ISRC1DEC1_INPUT1 0x89c0
+#define CS48L32_ISRC1DEC2_INPUT1 0x89d0
+#define CS48L32_ISRC1DEC3_INPUT1 0x89e0
+#define CS48L32_ISRC1DEC4_INPUT1 0x89f0
+#define CS48L32_ISRC2INT1_INPUT1 0x8a00
+#define CS48L32_ISRC2INT2_INPUT1 0x8a10
+#define CS48L32_ISRC2DEC1_INPUT1 0x8a40
+#define CS48L32_ISRC2DEC2_INPUT1 0x8a50
+#define CS48L32_ISRC3INT1_INPUT1 0x8a80
+#define CS48L32_ISRC3INT2_INPUT1 0x8a90
+#define CS48L32_ISRC3DEC1_INPUT1 0x8ac0
+#define CS48L32_ISRC3DEC2_INPUT1 0x8ad0
+#define CS48L32_EQ1_INPUT1 0x8b80
+#define CS48L32_EQ2_INPUT1 0x8b90
+#define CS48L32_EQ3_INPUT1 0x8ba0
+#define CS48L32_EQ4_INPUT1 0x8bb0
+#define CS48L32_EQ4_INPUT4 0x8bbc
+#define CS48L32_DRC1L_INPUT1 0x8c00
+#define CS48L32_DRC1R_INPUT1 0x8c10
+#define CS48L32_DRC1R_INPUT4 0x8c1c
+#define CS48L32_DRC2L_INPUT1 0x8c20
+#define CS48L32_DRC2R_INPUT1 0x8c30
+#define CS48L32_DRC2R_INPUT4 0x8c3c
+#define CS48L32_LHPF1_INPUT1 0x8c80
+#define CS48L32_LHPF1_INPUT4 0x8c8c
+#define CS48L32_LHPF2_INPUT1 0x8c90
+#define CS48L32_LHPF2_INPUT4 0x8c9c
+#define CS48L32_LHPF3_INPUT1 0x8ca0
+#define CS48L32_LHPF3_INPUT4 0x8cac
+#define CS48L32_LHPF4_INPUT1 0x8cb0
+#define CS48L32_LHPF4_INPUT4 0x8cbc
+#define CS48L32_DSP1RX1_INPUT1 0x9000
+#define CS48L32_DSP1RX2_INPUT1 0x9010
+#define CS48L32_DSP1RX3_INPUT1 0x9020
+#define CS48L32_DSP1RX4_INPUT1 0x9030
+#define CS48L32_DSP1RX5_INPUT1 0x9040
+#define CS48L32_DSP1RX6_INPUT1 0x9050
+#define CS48L32_DSP1RX7_INPUT1 0x9060
+#define CS48L32_DSP1RX8_INPUT1 0x9070
+#define CS48L32_DSP1RX8_INPUT4 0x907c
+#define CS48L32_ISRC1_CONTROL1 0xa400
+#define CS48L32_ISRC1_CONTROL2 0xa404
+#define CS48L32_ISRC2_CONTROL1 0xa510
+#define CS48L32_ISRC2_CONTROL2 0xa514
+#define CS48L32_ISRC3_CONTROL1 0xa620
+#define CS48L32_ISRC3_CONTROL2 0xa624
+#define CS48L32_FX_SAMPLE_RATE 0xa800
+#define CS48L32_EQ_CONTROL1 0xa808
+#define CS48L32_EQ_CONTROL2 0xa80c
+#define CS48L32_EQ1_GAIN1 0xa810
+#define CS48L32_EQ1_GAIN2 0xa814
+#define CS48L32_EQ1_BAND1_COEFF1 0xa818
+#define CS48L32_EQ1_BAND1_COEFF2 0xa81c
+#define CS48L32_EQ1_BAND1_PG 0xa820
+#define CS48L32_EQ1_BAND2_COEFF1 0xa824
+#define CS48L32_EQ1_BAND2_COEFF2 0xa828
+#define CS48L32_EQ1_BAND2_PG 0xa82c
+#define CS48L32_EQ1_BAND3_COEFF1 0xa830
+#define CS48L32_EQ1_BAND3_COEFF2 0xa834
+#define CS48L32_EQ1_BAND3_PG 0xa838
+#define CS48L32_EQ1_BAND4_COEFF1 0xa83c
+#define CS48L32_EQ1_BAND4_COEFF2 0xa840
+#define CS48L32_EQ1_BAND4_PG 0xa844
+#define CS48L32_EQ1_BAND5_COEFF1 0xa848
+#define CS48L32_EQ1_BAND5_PG 0xa850
+#define CS48L32_EQ2_GAIN1 0xa854
+#define CS48L32_EQ2_GAIN2 0xa858
+#define CS48L32_EQ2_BAND1_COEFF1 0xa85c
+#define CS48L32_EQ2_BAND1_COEFF2 0xa860
+#define CS48L32_EQ2_BAND1_PG 0xa864
+#define CS48L32_EQ2_BAND2_COEFF1 0xa868
+#define CS48L32_EQ2_BAND2_COEFF2 0xa86c
+#define CS48L32_EQ2_BAND2_PG 0xa870
+#define CS48L32_EQ2_BAND3_COEFF1 0xa874
+#define CS48L32_EQ2_BAND3_COEFF2 0xa878
+#define CS48L32_EQ2_BAND3_PG 0xa87c
+#define CS48L32_EQ2_BAND4_COEFF1 0xa880
+#define CS48L32_EQ2_BAND4_COEFF2 0xa884
+#define CS48L32_EQ2_BAND4_PG 0xa888
+#define CS48L32_EQ2_BAND5_COEFF1 0xa88c
+#define CS48L32_EQ2_BAND5_PG 0xa894
+#define CS48L32_EQ3_GAIN1 0xa898
+#define CS48L32_EQ3_GAIN2 0xa89c
+#define CS48L32_EQ3_BAND1_COEFF1 0xa8a0
+#define CS48L32_EQ3_BAND1_COEFF2 0xa8a4
+#define CS48L32_EQ3_BAND1_PG 0xa8a8
+#define CS48L32_EQ3_BAND2_COEFF1 0xa8ac
+#define CS48L32_EQ3_BAND2_COEFF2 0xa8b0
+#define CS48L32_EQ3_BAND2_PG 0xa8b4
+#define CS48L32_EQ3_BAND3_COEFF1 0xa8b8
+#define CS48L32_EQ3_BAND3_COEFF2 0xa8bc
+#define CS48L32_EQ3_BAND3_PG 0xa8c0
+#define CS48L32_EQ3_BAND4_COEFF1 0xa8c4
+#define CS48L32_EQ3_BAND4_COEFF2 0xa8c8
+#define CS48L32_EQ3_BAND4_PG 0xa8cc
+#define CS48L32_EQ3_BAND5_COEFF1 0xa8d0
+#define CS48L32_EQ3_BAND5_PG 0xa8d8
+#define CS48L32_EQ4_GAIN1 0xa8dc
+#define CS48L32_EQ4_GAIN2 0xa8e0
+#define CS48L32_EQ4_BAND1_COEFF1 0xa8e4
+#define CS48L32_EQ4_BAND1_COEFF2 0xa8e8
+#define CS48L32_EQ4_BAND1_PG 0xa8ec
+#define CS48L32_EQ4_BAND2_COEFF1 0xa8f0
+#define CS48L32_EQ4_BAND2_COEFF2 0xa8f4
+#define CS48L32_EQ4_BAND2_PG 0xa8f8
+#define CS48L32_EQ4_BAND3_COEFF1 0xa8fc
+#define CS48L32_EQ4_BAND3_COEFF2 0xa900
+#define CS48L32_EQ4_BAND3_PG 0xa904
+#define CS48L32_EQ4_BAND4_COEFF1 0xa908
+#define CS48L32_EQ4_BAND4_COEFF2 0xa90c
+#define CS48L32_EQ4_BAND4_PG 0xa910
+#define CS48L32_EQ4_BAND5_COEFF1 0xa914
+#define CS48L32_EQ4_BAND5_PG 0xa91c
+#define CS48L32_LHPF_CONTROL1 0xaa30
+#define CS48L32_LHPF_CONTROL2 0xaa34
+#define CS48L32_LHPF1_COEFF 0xaa38
+#define CS48L32_LHPF2_COEFF 0xaa3c
+#define CS48L32_LHPF3_COEFF 0xaa40
+#define CS48L32_LHPF4_COEFF 0xaa44
+#define CS48L32_DRC1_CONTROL1 0xab00
+#define CS48L32_DRC1_CONTROL4 0xab0c
+#define CS48L32_DRC2_CONTROL1 0xab14
+#define CS48L32_DRC2_CONTROL4 0xab20
+#define CS48L32_TONE_GENERATOR1 0xb000
+#define CS48L32_TONE_GENERATOR2 0xb004
+#define CS48L32_COMFORT_NOISE_GENERATOR 0xb400
+#define CS48L32_US_CONTROL 0xb800
+#define CS48L32_US1_CONTROL 0xb804
+#define CS48L32_US1_DET_CONTROL 0xb808
+#define CS48L32_US2_CONTROL 0xb814
+#define CS48L32_US2_DET_CONTROL 0xb818
+#define CS48L32_DSP1_XM_SRAM_IBUS_SETUP_0 0x1700c
+#define CS48L32_DSP1_XM_SRAM_IBUS_SETUP_1 0x17010
+#define CS48L32_DSP1_XM_SRAM_IBUS_SETUP_24 0x1706c
+#define CS48L32_DSP1_YM_SRAM_IBUS_SETUP_0 0x17070
+#define CS48L32_DSP1_YM_SRAM_IBUS_SETUP_1 0x17074
+#define CS48L32_DSP1_YM_SRAM_IBUS_SETUP_8 0x17090
+#define CS48L32_DSP1_PM_SRAM_IBUS_SETUP_0 0x17094
+#define CS48L32_DSP1_PM_SRAM_IBUS_SETUP_1 0x17098
+#define CS48L32_DSP1_PM_SRAM_IBUS_SETUP_7 0x170b0
+#define CS48L32_IRQ1_STATUS 0x18004
+#define CS48L32_IRQ1_EINT_1 0x18010
+#define CS48L32_IRQ1_EINT_2 0x18014
+#define CS48L32_IRQ1_EINT_7 0x18028
+#define CS48L32_IRQ1_EINT_9 0x18030
+#define CS48L32_IRQ1_EINT_11 0x18038
+#define CS48L32_IRQ1_STS_1 0x18090
+#define CS48L32_IRQ1_STS_6 0x180a4
+#define CS48L32_IRQ1_STS_11 0x180b8
+#define CS48L32_IRQ1_MASK_1 0x18110
+#define CS48L32_IRQ1_MASK_2 0x18114
+#define CS48L32_IRQ1_MASK_7 0x18128
+#define CS48L32_IRQ1_MASK_9 0x18130
+#define CS48L32_IRQ1_MASK_11 0x18138
+#define CS48L32_DSP1_XMEM_PACKED_0 0x2000000
+#define CS48L32_DSP1_XMEM_PACKED_LAST 0x208fff0
+#define CS48L32_DSP1_SYS_INFO_ID 0x25e0000
+#define CS48L32_DSP1_AHBM_WINDOW_DEBUG_1 0x25e2044
+#define CS48L32_DSP1_XMEM_UNPACKED24_0 0x2800000
+#define CS48L32_DSP1_XMEM_UNPACKED24_LAST 0x28bfff4
+#define CS48L32_DSP1_CLOCK_FREQ 0x2b80000
+#define CS48L32_DSP1_SAMPLE_RATE_TX8 0x2b802b8
+#define CS48L32_DSP1_SCRATCH1 0x2b805c0
+#define CS48L32_DSP1_SCRATCH4 0x2b805d8
+#define CS48L32_DSP1_CCM_CORE_CONTROL 0x2bc1000
+#define CS48L32_DSP1_STREAM_ARB_RESYNC_MSK1 0x2bc5a00
+#define CS48L32_DSP1_YMEM_PACKED_0 0x2c00000
+#define CS48L32_DSP1_YMEM_PACKED_LAST 0x2c2fff0
+#define CS48L32_DSP1_YMEM_UNPACKED24_0 0x3400000
+#define CS48L32_DSP1_YMEM_UNPACKED24_LAST 0x343fff4
+#define CS48L32_DSP1_PMEM_0 0x3800000
+#define CS48L32_DSP1_PMEM_LAST 0x3845fe8
+
+/* (0x0) DEVID */
+#define CS48L32_DEVID_MASK 0x00ffffff
+#define CS48L32_DEVID_SHIFT 0
+
+/* (0x4) REVID */
+#define CS48L32_AREVID_MASK 0x000000f0
+#define CS48L32_AREVID_SHIFT 4
+#define CS48L32_MTLREVID_MASK 0x0000000f
+#define CS48L32_MTLREVID_SHIFT 0
+
+/* (0x10) OTPID */
+#define CS48L32_OTPID_MASK 0x0000000f
+
+/* (0x0804) MCU_CTRL1 */
+#define CS48L32_MCU_STS_MASK 0x0000ff00
+#define CS48L32_MCU_STS_SHIFT 8
+
+/* (0xc08) GPIO1_CTRL1 */
+#define CS48L32_GPIOX_CTRL1_FN_MASK 0x000003ff
+
+/* (0x1020) OUTPUT_SYS_CLK */
+#define CS48L32_OPCLK_EN_SHIFT 15
+#define CS48L32_OPCLK_DIV_MASK 0x000000f8
+#define CS48L32_OPCLK_DIV_SHIFT 3
+#define CS48L32_OPCLK_SEL_MASK 0x00000007
+
+/* (0x105c) AUXPDM_CTRL2 */
+#define CS48L32_AUXPDMDAT2_SRC_SHIFT 4
+#define CS48L32_AUXPDMDAT1_SRC_SHIFT 0
+
+/* (0x1400) CLOCK32K */
+#define CS48L32_CLK_32K_EN_MASK 0x00000040
+#define CS48L32_CLK_32K_SRC_MASK 0x00000003
+
+/* (0x1404) SYSTEM_CLOCK1 */
+#define CS48L32_SYSCLK_FRAC_MASK 0x00008000
+#define CS48L32_SYSCLK_FREQ_MASK 0x00000700
+#define CS48L32_SYSCLK_FREQ_SHIFT 8
+#define CS48L32_SYSCLK_EN_SHIFT 6
+#define CS48L32_SYSCLK_SRC_MASK 0x0000001f
+#define CS48L32_SYSCLK_SRC_SHIFT 0
+
+/* (0x1408) SYSTEM_CLOCK2 */
+#define CS48L32_SYSCLK_FREQ_STS_MASK 0x00000700
+#define CS48L32_SYSCLK_FREQ_STS_SHIFT 8
+
+/* (0x1420) SAMPLE_RATE1 */
+#define CS48L32_SAMPLE_RATE_1_MASK 0x0000001f
+#define CS48L32_SAMPLE_RATE_1_SHIFT 0
+
+/* (0x1510) DSP_CLOCK1 */
+#define CS48L32_DSP_CLK_FREQ_MASK 0xffff0000
+#define CS48L32_DSP_CLK_FREQ_SHIFT 16
+
+/* (0x1c00) FLL_CONTROL1 */
+#define CS48L32_FLL_CTRL_UPD_MASK 0x00000004
+#define CS48L32_FLL_HOLD_MASK 0x00000002
+#define CS48L32_FLL_EN_MASK 0x00000001
+
+/* (0x1c04) FLL_CONTROL2 */
+#define CS48L32_FLL_LOCKDET_THR_MASK 0xf0000000
+#define CS48L32_FLL_LOCKDET_THR_SHIFT 28
+#define CS48L32_FLL_LOCKDET_MASK 0x08000000
+#define CS48L32_FLL_PHASEDET_MASK 0x00400000
+#define CS48L32_FLL_PHASEDET_SHIFT 22
+#define CS48L32_FLL_REFCLK_DIV_MASK 0x00030000
+#define CS48L32_FLL_REFCLK_DIV_SHIFT 16
+#define CS48L32_FLL_REFCLK_SRC_MASK 0x0000f000
+#define CS48L32_FLL_REFCLK_SRC_SHIFT 12
+#define CS48L32_FLL_N_MASK 0x000003ff
+#define CS48L32_FLL_N_SHIFT 0
+
+/* (0x1c08) FLL_CONTROL3 */
+#define CS48L32_FLL_LAMBDA_MASK 0xffff0000
+#define CS48L32_FLL_LAMBDA_SHIFT 16
+#define CS48L32_FLL_THETA_MASK 0x0000ffff
+#define CS48L32_FLL_THETA_SHIFT 0
+
+/* (0x1c0c) FLL_CONTROL4 */
+#define CS48L32_FLL_FD_GAIN_COARSE_SHIFT 16
+#define CS48L32_FLL_HP_MASK 0x00003000
+#define CS48L32_FLL_HP_SHIFT 12
+#define CS48L32_FLL_FB_DIV_MASK 0x000003ff
+#define CS48L32_FLL_FB_DIV_SHIFT 0
+
+/* (0x1c10) FLL_CONTROL5 */
+#define CS48L32_FLL_FRC_INTEG_UPD_MASK 0x00008000
+
+/* (0x2000) CHARGE_PUMP1 */
+#define CS48L32_CP2_BYPASS_SHIFT 1
+#define CS48L32_CP2_EN_SHIFT 0
+
+/* (0x2408) LDO2_CTRL1 */
+#define CS48L32_LDO2_VSEL_MASK 0x000007e0
+#define CS48L32_LDO2_VSEL_SHIFT 5
+
+/* (0x2410) MICBIAS_CTRL1 */
+#define CS48L32_MICB1_LVL_MASK 0x000001e0
+#define CS48L32_MICB1_LVL_SHIFT 5
+#define CS48L32_MICB1_EN_SHIFT 0
+
+/* (0x2418) MICBIAS_CTRL5 */
+#define CS48L32_MICB1C_EN_SHIFT 8
+#define CS48L32_MICB1B_EN_SHIFT 4
+#define CS48L32_MICB1A_EN_SHIFT 0
+
+/* (0x2710) IRQ1_CTRL_AOD */
+#define CS48L32_IRQ_POL_MASK 0x00000400
+
+/* (0x4000) INPUT_CONTROL */
+#define CS48L32_IN2L_EN_SHIFT 3
+#define CS48L32_IN2R_EN_SHIFT 2
+#define CS48L32_IN1L_EN_SHIFT 1
+#define CS48L32_IN1R_EN_SHIFT 0
+
+/* (0x400c) INPUT_CONTROL2 */
+#define CS48L32_PDM_FLLCLK_SRC_MASK 0x0000000f
+#define CS48L32_PDM_FLLCLK_SRC_SHIFT 0
+
+/* (0x4014) INPUT_CONTROL3 */
+#define CS48L32_IN_VU 0x20000000
+#define CS48L32_IN_VU_MASK 0x20000000
+#define CS48L32_IN_VU_SHIFT 29
+#define CS48L32_IN_VU_WIDTH 1
+
+/* (0x4020) INPUT1_CONTROL1 */
+#define CS48L32_IN1_OSR_SHIFT 16
+#define CS48L32_IN1_PDM_SUP_MASK 0x00000300
+#define CS48L32_IN1_PDM_SUP_SHIFT 8
+#define CS48L32_IN1_MODE_SHIFT 0
+
+/*
+ * (0x4024) IN1L_CONTROL1
+ * (0x4044) IN1R_CONTROL1
+ */
+#define CS48L32_INx_SRC_MASK 0x30000000
+#define CS48L32_INx_SRC_SHIFT 28
+#define CS48L32_INx_RATE_MASK 0x0000f800
+#define CS48L32_INx_RATE_SHIFT 11
+#define CS48L32_INx_HPF_SHIFT 2
+#define CS48L32_INx_LP_MODE_SHIFT 0
+
+/*
+ * (0x4028) IN1L_CONTROL2
+ * (0x4048) IN1R_CONTROL2
+ */
+#define CS48L32_INx_MUTE_MASK 0x10000000
+#define CS48L32_INx_VOL_SHIFT 16
+#define CS48L32_INx_PGA_VOL_SHIFT 1
+
+/* (0x4244) INPUT_HPF_CONTROL */
+#define CS48L32_IN_HPF_CUT_SHIFT 0
+
+/* (0x4248) INPUT_VOL_CONTROL */
+#define CS48L32_IN_VD_RAMP_SHIFT 4
+#define CS48L32_IN_VI_RAMP_SHIFT 0
+
+/* (0x4308) AUXPDM1_CONTROL1 */
+#define CS48L32_AUXPDM1_FREQ_SHIFT 16
+#define CS48L32_AUXPDM1_SRC_MASK 0x00000f00
+#define CS48L32_AUXPDM1_SRC_SHIFT 8
+
+/* (0x4688) ADC1L_ANA_CONTROL1 */
+/* (0x468c) ADC1R_ANA_CONTROL1 */
+#define CS48L32_ADC1x_INT_ENA_FRC_MASK 0x00000002
+
+/* (0x6004) ASPn_CONTROL1 */
+#define CS48L32_ASP_RATE_MASK 0x00001f00
+#define CS48L32_ASP_RATE_SHIFT 8
+#define CS48L32_ASP_BCLK_FREQ_MASK 0x0000003f
+
+/* (0x6008) ASPn_CONTROL2 */
+#define CS48L32_ASP_RX_WIDTH_MASK 0xff000000
+#define CS48L32_ASP_RX_WIDTH_SHIFT 24
+#define CS48L32_ASP_TX_WIDTH_MASK 0x00ff0000
+#define CS48L32_ASP_TX_WIDTH_SHIFT 16
+#define CS48L32_ASP_FMT_MASK 0x00000700
+#define CS48L32_ASP_FMT_SHIFT 8
+#define CS48L32_ASP_BCLK_INV_MASK 0x00000040
+#define CS48L32_ASP_BCLK_MSTR_MASK 0x00000010
+#define CS48L32_ASP_FSYNC_INV_MASK 0x00000004
+#define CS48L32_ASP_FSYNC_MSTR_MASK 0x00000001
+
+/* (0x6010) ASPn_CONTROL3 */
+#define CS48L32_ASP_DOUT_HIZ_MASK 0x00000003
+
+/* (0x6030) ASPn_DATA_CONTROL1 */
+#define CS48L32_ASP_TX_WL_MASK 0x0000003f
+
+/* (0x6040) ASPn_DATA_CONTROL5 */
+#define CS48L32_ASP_RX_WL_MASK 0x0000003f
+
+/* (0x82xx - 0x90xx) *_INPUT[1-4] */
+#define CS48L32_MIXER_VOL_MASK 0x00FE0000
+#define CS48L32_MIXER_VOL_SHIFT 17
+#define CS48L32_MIXER_VOL_WIDTH 7
+#define CS48L32_MIXER_SRC_MASK 0x000001ff
+#define CS48L32_MIXER_SRC_SHIFT 0
+#define CS48L32_MIXER_SRC_WIDTH 9
+
+/* (0xa400) ISRC1_CONTROL1 */
+#define CS48L32_ISRC1_FSL_MASK 0xf8000000
+#define CS48L32_ISRC1_FSL_SHIFT 27
+#define CS48L32_ISRC1_FSH_MASK 0x0000f800
+#define CS48L32_ISRC1_FSH_SHIFT 11
+
+/* (0xa404) ISRC1_CONTROL2 */
+#define CS48L32_ISRC1_INT4_EN_SHIFT 11
+#define CS48L32_ISRC1_INT3_EN_SHIFT 10
+#define CS48L32_ISRC1_INT2_EN_SHIFT 9
+#define CS48L32_ISRC1_INT1_EN_SHIFT 8
+#define CS48L32_ISRC1_DEC4_EN_SHIFT 3
+#define CS48L32_ISRC1_DEC3_EN_SHIFT 2
+#define CS48L32_ISRC1_DEC2_EN_SHIFT 1
+#define CS48L32_ISRC1_DEC1_EN_SHIFT 0
+
+/* (0xa800) FX_SAMPLE_RATE */
+#define CS48L32_FX_RATE_MASK 0x0000f800
+#define CS48L32_FX_RATE_SHIFT 11
+
+/* (0xab00) DRC1_CONTROL1 */
+#define CS48L32_DRC1L_EN_SHIFT 1
+#define CS48L32_DRC1R_EN_SHIFT 0
+
+/* (0xb400) Comfort_Noise_Generator */
+#define CS48L32_NOISE_GEN_RATE_MASK 0x0000f800
+#define CS48L32_NOISE_GEN_RATE_SHIFT 11
+#define CS48L32_NOISE_GEN_EN_SHIFT 5
+#define CS48L32_NOISE_GEN_GAIN_SHIFT 0
+
+/* (0xb800) US_CONTROL */
+#define CS48L32_US1_DET_EN_SHIFT 8
+
+/* (0xb804) US1_CONTROL */
+#define CS48L32_US1_RATE_MASK 0xf8000000
+#define CS48L32_US1_RATE_SHIFT 27
+#define CS48L32_US1_GAIN_SHIFT 12
+#define CS48L32_US1_SRC_MASK 0x00000f00
+#define CS48L32_US1_SRC_SHIFT 8
+#define CS48L32_US1_FREQ_MASK 0x00000070
+#define CS48L32_US1_FREQ_SHIFT 4
+
+/* (0xb808) US1_DET_CONTROL */
+#define CS48L32_US1_DET_DCY_SHIFT 28
+#define CS48L32_US1_DET_HOLD_SHIFT 24
+#define CS48L32_US1_DET_NUM_SHIFT 20
+#define CS48L32_US1_DET_THR_SHIFT 16
+#define CS48L32_US1_DET_LPF_CUT_SHIFT 5
+#define CS48L32_US1_DET_LPF_SHIFT 4
+
+/* (0x18004) IRQ1_STATUS */
+#define CS48L32_IRQ1_STS_MASK 0x00000001
+
+/* (0x18014) IRQ1_EINT_2 */
+#define CS48L32_BOOT_DONE_EINT1_MASK 0x00000008
+
+/* (0x18028) IRQ1_EINT_7 */
+#define CS48L32_DSP1_MPU_ERR_EINT1_MASK 0x00200000
+#define CS48L32_DSP1_WDT_EXPIRE_EINT1_MASK 0x00100000
+
+/* (0x18030) IRQ1_EINT_9 */
+#define CS48L32_DSP1_IRQ0_EINT1_MASK 0x00000001
+
+/* (0x180a4) IRQ1_STS_6 */
+#define CS48L32_FLL1_LOCK_STS1_MASK 0x00000001
+
+#endif
diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h
index f6baa9a01868..1ef13bcdc43f 100644
--- a/include/sound/dmaengine_pcm.h
+++ b/include/sound/dmaengine_pcm.h
@@ -38,8 +38,6 @@ int snd_dmaengine_pcm_open(struct snd_pcm_substream *substream,
int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream);
int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream);
-int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream,
- dma_filter_fn filter_fn, void *filter_data);
int snd_dmaengine_pcm_close_release_chan(struct snd_pcm_substream *substream);
struct dma_chan *snd_dmaengine_pcm_request_channel(dma_filter_fn filter_fn,
diff --git a/include/sound/gus.h b/include/sound/gus.h
index cd8da68cab92..1c8fb6c93e50 100644
--- a/include/sound/gus.h
+++ b/include/sound/gus.h
@@ -513,22 +513,6 @@ struct _SND_IW_LFO_PROGRAM {
unsigned short depth;
};
-#if 0
-extern irqreturn_t snd_gf1_lfo_effect_interrupt(struct snd_gus_card * gus, snd_gf1_voice_t * voice);
-#endif
-extern void snd_gf1_lfo_init(struct snd_gus_card * gus);
-extern void snd_gf1_lfo_done(struct snd_gus_card * gus);
-extern void snd_gf1_lfo_program(struct snd_gus_card * gus, int voice, int lfo_type, struct _SND_IW_LFO_PROGRAM *program);
-extern void snd_gf1_lfo_enable(struct snd_gus_card * gus, int voice, int lfo_type);
-extern void snd_gf1_lfo_disable(struct snd_gus_card * gus, int voice, int lfo_type);
-extern void snd_gf1_lfo_change_freq(struct snd_gus_card * gus, int voice, int lfo_type, int freq);
-extern void snd_gf1_lfo_change_depth(struct snd_gus_card * gus, int voice, int lfo_type, int depth);
-extern void snd_gf1_lfo_setup(struct snd_gus_card * gus, int voice, int lfo_type, int freq, int current_depth, int depth, int sweep, int shape);
-extern void snd_gf1_lfo_shutdown(struct snd_gus_card * gus, int voice, int lfo_type);
-#if 0
-extern void snd_gf1_lfo_command(struct snd_gus_card * gus, int voice, unsigned char *command);
-#endif
-
/* gus_mem.c */
void snd_gf1_mem_lock(struct snd_gf1_mem * alloc, int xup);
@@ -578,14 +562,8 @@ int snd_gf1_new_mixer(struct snd_gus_card * gus);
int snd_gf1_pcm_new(struct snd_gus_card *gus, int pcm_dev, int control_index);
-#ifdef CONFIG_SND_DEBUG
-extern void snd_gf1_print_voice_registers(struct snd_gus_card * gus);
-#endif
-
/* gus.c */
-int snd_gus_use_inc(struct snd_gus_card * gus);
-void snd_gus_use_dec(struct snd_gus_card * gus);
int snd_gus_create(struct snd_card *card,
unsigned long port,
int irq, int dma1, int dma2,
diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index b098ceadbe74..25668eee65cf 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -223,7 +223,7 @@ struct hdac_driver {
struct device_driver driver;
int type;
const struct hda_device_id *id_table;
- int (*match)(struct hdac_device *dev, struct hdac_driver *drv);
+ int (*match)(struct hdac_device *dev, const struct hdac_driver *drv);
void (*unsol_event)(struct hdac_device *dev, unsigned int event);
/* fields used by ext bus APIs */
@@ -235,7 +235,7 @@ struct hdac_driver {
#define drv_to_hdac_driver(_drv) container_of(_drv, struct hdac_driver, driver)
const struct hda_device_id *
-hdac_get_device_id(struct hdac_device *hdev, struct hdac_driver *drv);
+hdac_get_device_id(struct hdac_device *hdev, const struct hdac_driver *drv);
/*
* Bus verb operators
@@ -598,8 +598,6 @@ void snd_hdac_stream_spbcap_enable(struct hdac_bus *chip,
bool enable, int index);
int snd_hdac_stream_set_spib(struct hdac_bus *bus,
struct hdac_stream *azx_dev, u32 value);
-int snd_hdac_stream_get_spbmaxfifo(struct hdac_bus *bus,
- struct hdac_stream *azx_dev);
void snd_hdac_stream_drsm_enable(struct hdac_bus *bus,
bool enable, int index);
int snd_hdac_stream_wait_drsm(struct hdac_stream *azx_dev);
diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index 4c7a40e149a5..7de390022ac2 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -22,6 +22,7 @@ void snd_hdac_ext_bus_ppcap_enable(struct hdac_bus *chip, bool enable);
void snd_hdac_ext_bus_ppcap_int_enable(struct hdac_bus *chip, bool enable);
int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus);
+struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_id(struct hdac_bus *bus, u32 id);
struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_addr(struct hdac_bus *bus, int addr);
struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_name(struct hdac_bus *bus,
const char *codec_name);
@@ -97,12 +98,17 @@ struct hdac_ext_link {
void __iomem *ml_addr; /* link output stream reg pointer */
u32 lcaps; /* link capablities */
u16 lsdiid; /* link sdi identifier */
+ u32 id;
+ u8 slcount;
int ref_count;
struct list_head list;
};
+#define hdac_ext_link_alt(link) ((link)->lcaps & AZX_ML_HDA_LCAP_ALT)
+#define hdac_ext_link_ofls(link) ((link)->lcaps & AZX_ML_HDA_LCAP_OFLS)
+
int snd_hdac_ext_bus_link_power_up(struct hdac_ext_link *hlink);
int snd_hdac_ext_bus_link_power_down(struct hdac_ext_link *hlink);
int snd_hdac_ext_bus_link_power_up_all(struct hdac_bus *bus);
diff --git a/include/sound/jack.h b/include/sound/jack.h
index 1ed90e2109e9..36dc104c1145 100644
--- a/include/sound/jack.h
+++ b/include/sound/jack.h
@@ -79,7 +79,6 @@ int snd_jack_new(struct snd_card *card, const char *id, int type,
struct snd_jack **jack, bool initial_kctl, bool phantom_jack);
int snd_jack_add_new_kctl(struct snd_jack *jack, const char * name, int mask);
#ifdef CONFIG_SND_JACK_INPUT_DEV
-void snd_jack_set_parent(struct snd_jack *jack, struct device *parent);
int snd_jack_set_key(struct snd_jack *jack, enum snd_jack_types type,
int keytype);
#endif
@@ -104,11 +103,6 @@ static inline void snd_jack_report(struct snd_jack *jack, int status)
#endif
#if !defined(CONFIG_SND_JACK) || !defined(CONFIG_SND_JACK_INPUT_DEV)
-static inline void snd_jack_set_parent(struct snd_jack *jack,
- struct device *parent)
-{
-}
-
static inline int snd_jack_set_key(struct snd_jack *jack,
enum snd_jack_types type,
int keytype)
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 8becb4504887..58fd6e84f961 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -1251,8 +1251,6 @@ unsigned int snd_pcm_rate_to_rate_bit(unsigned int rate);
unsigned int snd_pcm_rate_bit_to_rate(unsigned int rate_bit);
unsigned int snd_pcm_rate_mask_intersect(unsigned int rates_a,
unsigned int rates_b);
-unsigned int snd_pcm_rate_range_to_bits(unsigned int rate_min,
- unsigned int rate_max);
/**
* snd_pcm_set_runtime_buffer - Set the PCM runtime buffer
@@ -1404,6 +1402,8 @@ int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_s
#define snd_pcm_lib_mmap_iomem NULL
#endif
+void snd_pcm_runtime_buffer_set_silence(struct snd_pcm_runtime *runtime);
+
/**
* snd_pcm_limit_isa_dma_size - Get the max size fitting with ISA DMA transfer
* @dma: DMA number
diff --git a/include/sound/sdca_asoc.h b/include/sound/sdca_asoc.h
new file mode 100644
index 000000000000..9121531f0826
--- /dev/null
+++ b/include/sound/sdca_asoc.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The MIPI SDCA specification is available for public downloads at
+ * https://www.mipi.org/mipi-sdca-v1-0-download
+ *
+ * Copyright (C) 2025 Cirrus Logic, Inc. and
+ * Cirrus Logic International Semiconductor Ltd.
+ */
+
+#ifndef __SDCA_ASOC_H__
+#define __SDCA_ASOC_H__
+
+struct device;
+struct sdca_function_data;
+struct snd_kcontrol_new;
+struct snd_soc_component_driver;
+struct snd_soc_dai_driver;
+struct snd_soc_dai_ops;
+struct snd_soc_dapm_route;
+struct snd_soc_dapm_widget;
+
+int sdca_asoc_count_component(struct device *dev, struct sdca_function_data *function,
+ int *num_widgets, int *num_routes, int *num_controls,
+ int *num_dais);
+
+int sdca_asoc_populate_dapm(struct device *dev, struct sdca_function_data *function,
+ struct snd_soc_dapm_widget *widgets,
+ struct snd_soc_dapm_route *routes);
+int sdca_asoc_populate_controls(struct device *dev,
+ struct sdca_function_data *function,
+ struct snd_kcontrol_new *kctl);
+int sdca_asoc_populate_dais(struct device *dev, struct sdca_function_data *function,
+ struct snd_soc_dai_driver *dais,
+ const struct snd_soc_dai_ops *ops);
+
+int sdca_asoc_populate_component(struct device *dev,
+ struct sdca_function_data *function,
+ struct snd_soc_component_driver *component_drv,
+ struct snd_soc_dai_driver **dai_drv, int *num_dai_drv,
+ const struct snd_soc_dai_ops *ops);
+
+#endif // __SDCA_ASOC_H__
diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h
index 253654568a41..eaedb54a8322 100644
--- a/include/sound/sdca_function.h
+++ b/include/sound/sdca_function.h
@@ -125,7 +125,7 @@ struct sdca_init_write {
* macros.
*
* Short hand to specific a Control type statically for example:
- * SDAC_CTL_TYPE_S(IT, MIC_BIAS).
+ * SDCA_CTL_TYPE_S(IT, MIC_BIAS).
*/
#define SDCA_CTL_TYPE_S(ent, sel) SDCA_CTL_TYPE(SDCA_ENTITY_TYPE_##ent, \
SDCA_CTL_##ent##_##sel)
@@ -169,6 +169,20 @@ enum sdca_ot_controls {
};
/**
+ * enum sdca_usage_range - Column definitions for Usage
+ */
+enum sdca_usage_range {
+ SDCA_USAGE_NUMBER = 0,
+ SDCA_USAGE_CBN = 1,
+ SDCA_USAGE_SAMPLE_RATE = 2,
+ SDCA_USAGE_SAMPLE_WIDTH = 3,
+ SDCA_USAGE_FULL_SCALE = 4,
+ SDCA_USAGE_NOISE_FLOOR = 5,
+ SDCA_USAGE_TAG = 6,
+ SDCA_USAGE_NCOLS = 7,
+};
+
+/**
* enum sdca_mu_controls - SDCA Controls for Mixer Unit
*
* Control Selectors for Mixer Unit from SDCA specification v1.0
@@ -207,6 +221,16 @@ enum sdca_fu_controls {
};
/**
+ * enum sdca_volume_range - Column definitions for Q7.8dB volumes/gains
+ */
+enum sdca_volume_range {
+ SDCA_VOLUME_LINEAR_MIN = 0,
+ SDCA_VOLUME_LINEAR_MAX = 1,
+ SDCA_VOLUME_LINEAR_STEP = 2,
+ SDCA_VOLUME_LINEAR_NCOLS = 3,
+};
+
+/**
* enum sdca_xu_controls - SDCA Controls for Extension Unit
*
* Control Selectors for Extension Unit from SDCA specification v1.0
@@ -237,6 +261,15 @@ enum sdca_cs_controls {
};
/**
+ * enum sdca_samplerateindex_range - Column definitions for SampleRateIndex
+ */
+enum sdca_samplerateindex_range {
+ SDCA_SAMPLERATEINDEX_INDEX = 0,
+ SDCA_SAMPLERATEINDEX_RATE = 1,
+ SDCA_SAMPLERATEINDEX_NCOLS = 2,
+};
+
+/**
* enum sdca_cx_controls - SDCA Controls for Clock Selector
*
* Control Selectors for Clock Selector from SDCA specification v1.0
@@ -258,6 +291,14 @@ enum sdca_pde_controls {
};
/**
+ * enum sdca_requested_ps_range - Column definitions for Requested PS
+ */
+enum sdca_requested_ps_range {
+ SDCA_REQUESTED_PS_STATE = 0,
+ SDCA_REQUESTED_PS_NCOLS = 1,
+};
+
+/**
* enum sdca_ge_controls - SDCA Controls for Group Unit
*
* Control Selectors for Group Unit from SDCA specification v1.0
@@ -269,6 +310,15 @@ enum sdca_ge_controls {
};
/**
+ * enum sdca_selected_mode_range - Column definitions for Selected Mode
+ */
+enum sdca_selected_mode_range {
+ SDCA_SELECTED_MODE_INDEX = 0,
+ SDCA_SELECTED_MODE_TERM_TYPE = 1,
+ SDCA_SELECTED_MODE_NCOLS = 2,
+};
+
+/**
* enum sdca_spe_controls - SDCA Controls for Security & Privacy Unit
*
* Control Selectors for Security & Privacy Unit from SDCA
@@ -773,6 +823,25 @@ enum sdca_terminal_type {
SDCA_TERM_TYPE_PRIVACY_INDICATORS = 0x747,
};
+#define SDCA_TERM_TYPE_LINEIN_STEREO_NAME "LineIn Stereo"
+#define SDCA_TERM_TYPE_LINEIN_FRONT_LR_NAME "LineIn Front-LR"
+#define SDCA_TERM_TYPE_LINEIN_CENTER_LFE_NAME "LineIn Center-LFE"
+#define SDCA_TERM_TYPE_LINEIN_SURROUND_LR_NAME "LineIn Surround-LR"
+#define SDCA_TERM_TYPE_LINEIN_REAR_LR_NAME "LineIn Rear-LR"
+#define SDCA_TERM_TYPE_LINEOUT_STEREO_NAME "LineOut Stereo"
+#define SDCA_TERM_TYPE_LINEOUT_FRONT_LR_NAME "LineOut Front-LR"
+#define SDCA_TERM_TYPE_LINEOUT_CENTER_LFE_NAME "LineOut Center-LFE"
+#define SDCA_TERM_TYPE_LINEOUT_SURROUND_LR_NAME "LineOut Surround-LR"
+#define SDCA_TERM_TYPE_LINEOUT_REAR_LR_NAME "LineOut Rear-LR"
+#define SDCA_TERM_TYPE_MIC_JACK_NAME "Microphone"
+#define SDCA_TERM_TYPE_STEREO_JACK_NAME "Speaker Stereo"
+#define SDCA_TERM_TYPE_FRONT_LR_JACK_NAME "Speaker Front-LR"
+#define SDCA_TERM_TYPE_CENTER_LFE_JACK_NAME "Speaker Center-LFE"
+#define SDCA_TERM_TYPE_SURROUND_LR_JACK_NAME "Speaker Surround-LR"
+#define SDCA_TERM_TYPE_REAR_LR_JACK_NAME "Speaker Rear-LR"
+#define SDCA_TERM_TYPE_HEADPHONE_JACK_NAME "Headphone"
+#define SDCA_TERM_TYPE_HEADSET_JACK_NAME "Headset"
+
/**
* enum sdca_connector_type - SDCA Connector Types
*
diff --git a/include/sound/snd_wavefront.h b/include/sound/snd_wavefront.h
index 27f7e8a477c2..30f508a56766 100644
--- a/include/sound/snd_wavefront.h
+++ b/include/sound/snd_wavefront.h
@@ -110,12 +110,8 @@ struct _snd_wavefront_card {
};
extern void snd_wavefront_internal_interrupt (snd_wavefront_card_t *card);
-extern int snd_wavefront_detect_irq (snd_wavefront_t *dev) ;
-extern int snd_wavefront_check_irq (snd_wavefront_t *dev, int irq);
-extern int snd_wavefront_restart (snd_wavefront_t *dev);
extern int snd_wavefront_start (snd_wavefront_t *dev);
extern int snd_wavefront_detect (snd_wavefront_card_t *card);
-extern int snd_wavefront_config_midi (snd_wavefront_t *dev) ;
extern int snd_wavefront_cmd (snd_wavefront_t *, int, unsigned char *,
unsigned char *);
diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index 72e371a21767..b8af309c2683 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -10,6 +10,7 @@
#include <linux/acpi.h>
#include <linux/mod_devicetable.h>
#include <linux/soundwire/sdw.h>
+#include <sound/soc.h>
struct snd_soc_acpi_package_context {
char *name; /* package name */
@@ -193,6 +194,15 @@ struct snd_soc_acpi_link_adr {
* is not constant since this field may be updated at run-time
* @sof_tplg_filename: Sound Open Firmware topology file name, if enabled
* @tplg_quirk_mask: quirks to select different topology files dynamically
+ * @get_function_tplg_files: This is an optional callback, if specified then instead of
+ * the single sof_tplg_filename the callback will return the list of function topology
+ * files to be loaded.
+ * Return value: The number of the files or negative ERRNO. 0 means that the single topology
+ * file should be used, no function topology split can be used on the machine.
+ * @card: the pointer of the card
+ * @mach: the pointer of the machine driver
+ * @prefix: the prefix of the topology file name. Typically, it is the path.
+ * @tplg_files: the pointer of the array of the topology file names.
*/
/* Descriptor for SST ASoC machine driver */
struct snd_soc_acpi_mach {
@@ -212,6 +222,9 @@ struct snd_soc_acpi_mach {
struct snd_soc_acpi_mach_params mach_params;
const char *sof_tplg_filename;
const u32 tplg_quirk_mask;
+ int (*get_function_tplg_files)(struct snd_soc_card *card,
+ const struct snd_soc_acpi_mach *mach,
+ const char *prefix, const char ***tplg_files);
};
#define SND_SOC_ACPI_MAX_CODECS 3
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index af802ef536e7..400584474bc8 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -445,6 +445,10 @@ int snd_soc_dapm_get_pin_switch(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *uncontrol);
int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *uncontrol);
+int snd_soc_dapm_get_component_pin_switch(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *uncontrol);
+int snd_soc_dapm_put_component_pin_switch(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *uncontrol);
int snd_soc_dapm_new_controls(struct snd_soc_dapm_context *dapm,
const struct snd_soc_dapm_widget *widget, unsigned int num);
struct snd_soc_dapm_widget *snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm,
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 952ed77b8c87..1fffef311c41 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -394,27 +394,20 @@ struct platform_device;
#define SOC_ENUM_SINGLE_VIRT_DECL(name, xtexts) \
const struct soc_enum name = SOC_ENUM_SINGLE_VIRT(ARRAY_SIZE(xtexts), xtexts)
-struct snd_jack;
struct snd_soc_card;
-struct snd_soc_pcm_stream;
-struct snd_soc_ops;
struct snd_soc_pcm_runtime;
struct snd_soc_dai;
struct snd_soc_dai_driver;
struct snd_soc_dai_link;
struct snd_soc_component;
struct snd_soc_component_driver;
-struct soc_enum;
struct snd_soc_jack;
-struct snd_soc_jack_zone;
struct snd_soc_jack_pin;
#include <sound/soc-dapm.h>
#include <sound/soc-dpcm.h>
#include <sound/soc-topology.h>
-struct snd_soc_jack_gpio;
-
enum snd_soc_pcm_subclass {
SND_SOC_PCM_CLASS_PCM = 0,
SND_SOC_PCM_CLASS_BE = 1,
@@ -423,6 +416,7 @@ enum snd_soc_pcm_subclass {
int snd_soc_register_card(struct snd_soc_card *card);
void snd_soc_unregister_card(struct snd_soc_card *card);
int devm_snd_soc_register_card(struct device *dev, struct snd_soc_card *card);
+int devm_snd_soc_register_deferrable_card(struct device *dev, struct snd_soc_card *card);
#ifdef CONFIG_PM_SLEEP
int snd_soc_suspend(struct device *dev);
int snd_soc_resume(struct device *dev);
@@ -450,7 +444,7 @@ int snd_soc_register_component(struct device *dev,
int devm_snd_soc_register_component(struct device *dev,
const struct snd_soc_component_driver *component_driver,
struct snd_soc_dai_driver *dai_drv, int num_dai);
-void snd_soc_unregister_component(struct device *dev);
+#define snd_soc_unregister_component(dev) snd_soc_unregister_component_by_driver(dev, NULL)
void snd_soc_unregister_component_by_driver(struct device *dev,
const struct snd_soc_component_driver *component_driver);
struct snd_soc_component *snd_soc_lookup_component_nolocked(struct device *dev,
@@ -468,8 +462,6 @@ static inline int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd)
}
#endif
-void snd_soc_disconnect_sync(struct device *dev);
-
struct snd_soc_pcm_runtime *snd_soc_get_pcm_runtime(struct snd_soc_card *card,
struct snd_soc_dai_link *dai_link);
@@ -935,7 +927,7 @@ snd_soc_link_to_platform(struct snd_soc_dai_link *link, int n) {
extern struct snd_soc_dai_link_component null_dailink_component[0];
extern struct snd_soc_dai_link_component snd_soc_dummy_dlc;
-
+int snd_soc_dlc_is_dummy(struct snd_soc_dai_link_component *dlc);
struct snd_soc_codec_conf {
/*
@@ -1087,6 +1079,7 @@ struct snd_soc_card {
unsigned int fully_routed:1;
unsigned int probed:1;
unsigned int component_chaining:1;
+ struct device *devres_dev;
void *drvdata;
};
diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index d8bd5d37131a..b63021f5afaf 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -159,9 +159,8 @@ void asoc_sdw_init_dai_link(struct device *dev, struct snd_soc_dai_link *dai_lin
int asoc_sdw_init_simple_dai_link(struct device *dev, struct snd_soc_dai_link *dai_links,
int *be_id, char *name, int playback, int capture,
const char *cpu_dai_name, const char *platform_comp_name,
- int num_platforms, const char *codec_name,
- const char *codec_dai_name, int no_pcm,
- int (*init)(struct snd_soc_pcm_runtime *rtd),
+ const char *codec_name, const char *codec_dai_name,
+ int no_pcm, int (*init)(struct snd_soc_pcm_runtime *rtd),
const struct snd_soc_ops *ops);
int asoc_sdw_count_sdw_endpoints(struct snd_soc_card *card, int *num_devs, int *num_ends);
diff --git a/include/sound/sof.h b/include/sound/sof.h
index 64fd5504cb2b..eddea82c7b5a 100644
--- a/include/sound/sof.h
+++ b/include/sound/sof.h
@@ -106,6 +106,7 @@ struct snd_sof_pdata {
const char *fw_filename;
const char *tplg_filename_prefix;
const char *tplg_filename;
+ bool disable_function_topology;
/* loadable external libraries available under this directory */
const char *fw_lib_prefix;
diff --git a/include/sound/tas2781-comlib-i2c.h b/include/sound/tas2781-comlib-i2c.h
new file mode 100644
index 000000000000..a1afa5c444ba
--- /dev/null
+++ b/include/sound/tas2781-comlib-i2c.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+//
+// ALSA SoC Texas Instruments TAS2563/TAS2781 Audio Smart Amplifier
+//
+// Copyright (C) 2025 Texas Instruments Incorporated
+// https://www.ti.com
+//
+// The TAS2563/TAS2781 driver implements a flexible and configurable
+// algo coefficient setting for one, two, or even multiple
+// TAS2563/TAS2781 chips.
+//
+// Author: Shenghao Ding <shenghao-ding@ti.com>
+//
+
+#ifndef __TAS2781_COMLIB_I2C_H__
+#define __TAS2781_COMLIB_I2C_H__
+
+void tasdevice_reset(struct tasdevice_priv *tas_dev);
+int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
+ struct module *module,
+ void (*cont)(const struct firmware *fw, void *context));
+struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c);
+int tasdevice_init(struct tasdevice_priv *tas_priv);
+int tasdev_chn_switch(struct tasdevice_priv *tas_priv,
+ unsigned short chn);
+int tasdevice_dev_update_bits(
+ struct tasdevice_priv *tasdevice, unsigned short chn,
+ unsigned int reg, unsigned int mask, unsigned int value);
+int tasdevice_amp_putvol(struct tasdevice_priv *tas_priv,
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc);
+int tasdevice_amp_getvol(struct tasdevice_priv *tas_priv,
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc);
+int tasdevice_digital_getvol(struct tasdevice_priv *tas_priv,
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc);
+int tasdevice_digital_putvol(struct tasdevice_priv *tas_priv,
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc);
+#endif /* __TAS2781_COMLIB_I2C_H__ */
diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index eff011444cc8..40cd3bd079b5 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -32,6 +32,8 @@
SNDRV_PCM_FMTBIT_S24_LE | \
SNDRV_PCM_FMTBIT_S32_LE)
+#define TASDEVICE_CRC8_POLYNOMIAL 0x4d
+
/* PAGE Control Register (available in page0 of each book) */
#define TASDEVICE_PAGE_SELECT 0x00
#define TASDEVICE_BOOKCTL_PAGE 0x00
@@ -47,7 +49,7 @@
#define TASDEVICE_REG_SWRESET TASDEVICE_REG(0x0, 0x0, 0x01)
#define TASDEVICE_REG_SWRESET_RESET BIT(0)
-/* I2C Checksum */
+/* Checksum */
#define TASDEVICE_CHECKSUM_REG TASDEVICE_REG(0x0, 0x0, 0x7e)
/* XM_340 */
@@ -103,11 +105,6 @@
#define TAS2781_RUNTIME_RE_REG_TF TASDEVICE_REG(0x64, 0x62, 0x48)
#define TAS2781_RUNTIME_RE_REG TASDEVICE_REG(0x64, 0x63, 0x44)
-#define TASDEVICE_CMD_SING_W 0x1
-#define TASDEVICE_CMD_BURST 0x2
-#define TASDEVICE_CMD_DELAY 0x3
-#define TASDEVICE_CMD_FIELD_W 0x4
-
enum audio_device {
TAS2563,
TAS2781,
@@ -119,11 +116,6 @@ enum dspbin_type {
TASDEV_BETA,
};
-enum device_catlog_id {
- LENOVO = 0,
- OTHERS
-};
-
struct bulk_reg_val {
int reg;
unsigned char val[4];
@@ -159,10 +151,33 @@ struct calidata {
unsigned int cali_dat_sz_per_dev;
};
+/*
+ * To enable CONFIG_SND_SOC_TAS2781_ACOUST_I2C will create a bridge to the
+ * acoustic tuning tool which can tune the chips' acoustic effect. Due to the
+ * whole directly exposing the registers, there exist some potential risks. So
+ * this define is invisible in Kconfig, anyone who wants to use acoustic tool
+ * have to edit the source manually.
+ */
+#ifdef CONFIG_SND_SOC_TAS2781_ACOUST_I2C
+#define TASDEV_DATA_PAYLOAD_SIZE 128
+struct acoustic_data {
+ unsigned char len;
+ unsigned char id;
+ unsigned char addr;
+ unsigned char book;
+ unsigned char page;
+ unsigned char reg;
+ unsigned char data[TASDEV_DATA_PAYLOAD_SIZE];
+};
+#endif
+
struct tasdevice_priv {
struct tasdevice tasdevice[TASDEVICE_MAX_CHANNELS];
struct tasdevice_rca rcabin;
struct calidata cali_data;
+#ifdef CONFIG_SND_SOC_TAS2781_ACOUST_I2C
+ struct acoustic_data acou_data;
+#endif
struct tasdevice_fw *fmw;
struct gpio_desc *speaker_id;
struct gpio_desc *reset;
@@ -170,7 +185,6 @@ struct tasdevice_priv {
struct regmap *regmap;
struct device *dev;
- enum device_catlog_id catlog_id;
unsigned char cal_binaryname[TASDEVICE_MAX_CHANNELS][64];
unsigned char crc8_lkp_tbl[CRC8_TABLE_SIZE];
unsigned char coef_binaryname[64];
@@ -193,6 +207,7 @@ struct tasdevice_priv {
bool force_fwload_status;
bool playback_started;
bool isacpi;
+ bool isspi;
bool is_user_space_calidata;
unsigned int global_addr;
@@ -210,41 +225,27 @@ struct tasdevice_priv {
int (*tasdevice_load_block)(struct tasdevice_priv *tas_priv,
struct tasdev_blk *block);
- int (*save_calibration)(struct tasdevice_priv *tas_priv);
- void (*apply_calibration)(struct tasdevice_priv *tas_priv);
+ int (*change_chn_book)(struct tasdevice_priv *tas_priv,
+ unsigned short chn, int book);
+ int (*update_bits)(struct tasdevice_priv *tas_priv,
+ unsigned short chn, unsigned int reg, unsigned int mask,
+ unsigned int value);
+ int (*dev_read)(struct tasdevice_priv *tas_priv,
+ unsigned short chn, unsigned int reg, unsigned int *value);
+ int (*dev_bulk_read)(struct tasdevice_priv *tas_priv,
+ unsigned short chn, unsigned int reg, unsigned char *p_data,
+ unsigned int n_length);
};
-void tasdevice_reset(struct tasdevice_priv *tas_dev);
-int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
- struct module *module,
- void (*cont)(const struct firmware *fw, void *context));
-struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c);
-int tasdevice_init(struct tasdevice_priv *tas_priv);
-void tasdevice_remove(struct tasdevice_priv *tas_priv);
-int tasdevice_save_calibration(struct tasdevice_priv *tas_priv);
-void tasdevice_apply_calibration(struct tasdevice_priv *tas_priv);
-int tasdev_chn_switch(struct tasdevice_priv *tas_priv,
- unsigned short chn);
int tasdevice_dev_read(struct tasdevice_priv *tas_priv,
unsigned short chn, unsigned int reg, unsigned int *value);
+int tasdevice_dev_bulk_read(struct tasdevice_priv *tas_priv,
+ unsigned short chn, unsigned int reg, unsigned char *p_data,
+ unsigned int n_length);
int tasdevice_dev_write(struct tasdevice_priv *tas_priv,
unsigned short chn, unsigned int reg, unsigned int value);
int tasdevice_dev_bulk_write(
struct tasdevice_priv *tas_priv, unsigned short chn,
unsigned int reg, unsigned char *p_data, unsigned int n_length);
-int tasdevice_dev_bulk_read(struct tasdevice_priv *tas_priv,
- unsigned short chn, unsigned int reg, unsigned char *p_data,
- unsigned int n_length);
-int tasdevice_dev_update_bits(
- struct tasdevice_priv *tasdevice, unsigned short chn,
- unsigned int reg, unsigned int mask, unsigned int value);
-int tasdevice_amp_putvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc);
-int tasdevice_amp_getvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc);
-int tasdevice_digital_putvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc);
-int tasdevice_digital_getvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc);
-
+void tasdevice_remove(struct tasdevice_priv *tas_priv);
#endif /* __TAS2781_H__ */
diff --git a/include/sound/tpa6130a2-plat.h b/include/sound/tpa6130a2-plat.h
deleted file mode 100644
index a60930e36e93..000000000000
--- a/include/sound/tpa6130a2-plat.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * TPA6130A2 driver platform header
- *
- * Copyright (C) Nokia Corporation
- *
- * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
- */
-
-#ifndef TPA6130A2_PLAT_H
-#define TPA6130A2_PLAT_H
-
-struct tpa6130a2_platform_data {
- int power_gpio;
-};
-
-#endif
diff --git a/include/sound/ump_msg.h b/include/sound/ump_msg.h
index 72f60ddfea75..9556b4755a1e 100644
--- a/include/sound/ump_msg.h
+++ b/include/sound/ump_msg.h
@@ -604,7 +604,7 @@ struct snd_ump_stream_msg_ep_info {
} __packed;
/* UMP Stream Message: Device Info Notification (128bit) */
-struct snd_ump_stream_msg_devince_info {
+struct snd_ump_stream_msg_device_info {
#ifdef __BIG_ENDIAN_BITFIELD
/* 0 */
u32 type:4;
@@ -754,7 +754,7 @@ struct snd_ump_stream_msg_fb_name {
union snd_ump_stream_msg {
struct snd_ump_stream_msg_ep_discovery ep_discovery;
struct snd_ump_stream_msg_ep_info ep_info;
- struct snd_ump_stream_msg_devince_info device_info;
+ struct snd_ump_stream_msg_device_info device_info;
struct snd_ump_stream_msg_stream_cfg stream_cfg;
struct snd_ump_stream_msg_fb_discovery fb_discovery;
struct snd_ump_stream_msg_fb_info fb_info;
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index bd0ea07338eb..14a924c0e303 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -11,7 +11,7 @@
#include <linux/tracepoint.h>
#include <uapi/linux/ioprio.h>
-#define RWBS_LEN 8
+#define RWBS_LEN 9
#define IOPRIO_CLASS_STRINGS \
{ IOPRIO_CLASS_NONE, "none" }, \
@@ -361,21 +361,6 @@ DECLARE_EVENT_CLASS(block_bio,
);
/**
- * block_bio_bounce - used bounce buffer when processing block operation
- * @bio: block operation
- *
- * A bounce buffer was used to handle the block operation @bio in @q.
- * This occurs when hardware limitations prevent a direct transfer of
- * data between the @bio data memory area and the IO device. Use of a
- * bounce buffer requires extra copying of data and decreases
- * performance.
- */
-DEFINE_EVENT(block_bio, block_bio_bounce,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
-);
-
-/**
* block_bio_backmerge - merging block operation to the end of an existing operation
* @bio: new block operation to merge
*
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 3efc00cc1bcd..bebc252db865 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -143,7 +143,6 @@ FLUSH_STATES
#define EXTENT_FLAGS \
{ EXTENT_DIRTY, "DIRTY"}, \
- { EXTENT_UPTODATE, "UPTODATE"}, \
{ EXTENT_LOCKED, "LOCKED"}, \
{ EXTENT_NEW, "NEW"}, \
{ EXTENT_DELALLOC, "DELALLOC"}, \
@@ -224,8 +223,7 @@ DECLARE_EVENT_CLASS(btrfs__inode,
__entry->generation = BTRFS_I(inode)->generation;
__entry->last_trans = BTRFS_I(inode)->last_trans;
__entry->logged_trans = BTRFS_I(inode)->logged_trans;
- __entry->root_objectid =
- BTRFS_I(inode)->root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root);
),
TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%llu blocks=%llu "
@@ -297,7 +295,7 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
),
TP_fast_assign_btrfs(root->fs_info,
- __entry->root_objectid = root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(root);
__entry->ino = btrfs_ino(inode);
__entry->start = map->start;
__entry->len = map->len;
@@ -376,7 +374,7 @@ DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
),
TP_fast_assign_btrfs(bi->root->fs_info,
- __entry->root_obj = bi->root->root_key.objectid;
+ __entry->root_obj = btrfs_root_id(bi->root);
__entry->ino = btrfs_ino(bi);
__entry->isize = bi->vfs_inode.i_size;
__entry->disk_isize = bi->disk_i_size;
@@ -427,7 +425,7 @@ DECLARE_EVENT_CLASS(
TP_fast_assign_btrfs(
bi->root->fs_info,
- __entry->root_obj = bi->root->root_key.objectid;
+ __entry->root_obj = btrfs_root_id(bi->root);
__entry->ino = btrfs_ino(bi);
__entry->isize = bi->vfs_inode.i_size;
__entry->disk_isize = bi->disk_i_size;
@@ -527,7 +525,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
__entry->flags = ordered->flags;
__entry->compress_type = ordered->compress_type;
__entry->refs = refcount_read(&ordered->refs);
- __entry->root_objectid = inode->root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(inode->root);
__entry->truncated_len = ordered->truncated_len;
),
@@ -664,7 +662,7 @@ TRACE_EVENT(btrfs_finish_ordered_extent,
__entry->start = start;
__entry->len = len;
__entry->uptodate = uptodate;
- __entry->root_objectid = inode->root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(inode->root);
),
TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu uptodate=%d",
@@ -705,8 +703,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
__entry->for_reclaim = wbc->for_reclaim;
__entry->range_cyclic = wbc->range_cyclic;
__entry->writeback_index = inode->i_mapping->writeback_index;
- __entry->root_objectid =
- BTRFS_I(inode)->root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root);
),
TP_printk_btrfs("root=%llu(%s) ino=%llu page_index=%lu "
@@ -750,7 +747,7 @@ TRACE_EVENT(btrfs_writepage_end_io_hook,
__entry->start = start;
__entry->end = end;
__entry->uptodate = uptodate;
- __entry->root_objectid = inode->root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(inode->root);
),
TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu end=%llu uptodate=%d",
@@ -780,8 +777,7 @@ TRACE_EVENT(btrfs_sync_file,
__entry->ino = btrfs_ino(BTRFS_I(inode));
__entry->parent = btrfs_ino(BTRFS_I(d_inode(dentry->d_parent)));
__entry->datasync = datasync;
- __entry->root_objectid =
- BTRFS_I(inode)->root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root);
),
TP_printk_btrfs("root=%llu(%s) ino=%llu parent=%llu datasync=%d",
@@ -1052,7 +1048,7 @@ DECLARE_EVENT_CLASS(btrfs__chunk,
__entry->sub_stripes = map->sub_stripes;
__entry->offset = offset;
__entry->size = size;
- __entry->root_objectid = fs_info->chunk_root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(fs_info->chunk_root);
),
TP_printk_btrfs("root=%llu(%s) offset=%llu size=%llu "
@@ -1097,7 +1093,7 @@ TRACE_EVENT(btrfs_cow_block,
),
TP_fast_assign_btrfs(root->fs_info,
- __entry->root_objectid = root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(root);
__entry->buf_start = buf->start;
__entry->refs = atomic_read(&buf->refs);
__entry->cow_start = cow->start;
@@ -1240,7 +1236,7 @@ DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_free,
TP_ARGS(fs_info, start, len)
);
-TRACE_EVENT(find_free_extent,
+TRACE_EVENT(btrfs_find_free_extent,
TP_PROTO(const struct btrfs_root *root,
const struct find_free_extent_ctl *ffe_ctl),
@@ -1255,7 +1251,7 @@ TRACE_EVENT(find_free_extent,
),
TP_fast_assign_btrfs(root->fs_info,
- __entry->root_objectid = root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(root);
__entry->num_bytes = ffe_ctl->num_bytes;
__entry->empty_size = ffe_ctl->empty_size;
__entry->flags = ffe_ctl->flags;
@@ -1268,7 +1264,7 @@ TRACE_EVENT(find_free_extent,
BTRFS_GROUP_FLAGS))
);
-TRACE_EVENT(find_free_extent_search_loop,
+TRACE_EVENT(btrfs_find_free_extent_search_loop,
TP_PROTO(const struct btrfs_root *root,
const struct find_free_extent_ctl *ffe_ctl),
@@ -1284,7 +1280,7 @@ TRACE_EVENT(find_free_extent_search_loop,
),
TP_fast_assign_btrfs(root->fs_info,
- __entry->root_objectid = root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(root);
__entry->num_bytes = ffe_ctl->num_bytes;
__entry->empty_size = ffe_ctl->empty_size;
__entry->flags = ffe_ctl->flags;
@@ -1298,7 +1294,7 @@ TRACE_EVENT(find_free_extent_search_loop,
__entry->loop)
);
-TRACE_EVENT(find_free_extent_have_block_group,
+TRACE_EVENT(btrfs_find_free_extent_have_block_group,
TP_PROTO(const struct btrfs_root *root,
const struct find_free_extent_ctl *ffe_ctl,
@@ -1318,7 +1314,7 @@ TRACE_EVENT(find_free_extent_have_block_group,
),
TP_fast_assign_btrfs(root->fs_info,
- __entry->root_objectid = root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(root);
__entry->num_bytes = ffe_ctl->num_bytes;
__entry->empty_size = ffe_ctl->empty_size;
__entry->flags = ffe_ctl->flags;
@@ -1480,7 +1476,7 @@ TRACE_EVENT(btrfs_setup_cluster,
);
struct extent_state;
-TRACE_EVENT(alloc_extent_state,
+TRACE_EVENT(btrfs_alloc_extent_state,
TP_PROTO(const struct extent_state *state,
gfp_t mask, unsigned long IP),
@@ -1503,7 +1499,7 @@ TRACE_EVENT(alloc_extent_state,
show_gfp_flags(__entry->mask), __entry->ip)
);
-TRACE_EVENT(free_extent_state,
+TRACE_EVENT(btrfs_free_extent_state,
TP_PROTO(const struct extent_state *state, unsigned long IP),
@@ -1672,8 +1668,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data,
),
TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
- __entry->rootid =
- BTRFS_I(inode)->root->root_key.objectid;
+ __entry->rootid = btrfs_root_id(BTRFS_I(inode)->root);
__entry->ino = btrfs_ino(BTRFS_I(inode));
__entry->start = start;
__entry->len = len;
@@ -1744,7 +1739,7 @@ DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_trace_extent,
TP_ARGS(fs_info, rec, bytenr)
);
-TRACE_EVENT(qgroup_num_dirty_extents,
+TRACE_EVENT(btrfs_qgroup_num_dirty_extents,
TP_PROTO(const struct btrfs_fs_info *fs_info, u64 transid,
u64 num_dirty_extents),
@@ -1798,7 +1793,7 @@ TRACE_EVENT(btrfs_qgroup_account_extent,
__entry->nr_new_roots)
);
-TRACE_EVENT(qgroup_update_counters,
+TRACE_EVENT(btrfs_qgroup_update_counters,
TP_PROTO(const struct btrfs_fs_info *fs_info,
const struct btrfs_qgroup *qgroup,
@@ -1827,7 +1822,7 @@ TRACE_EVENT(qgroup_update_counters,
__entry->cur_old_count, __entry->cur_new_count)
);
-TRACE_EVENT(qgroup_update_reserve,
+TRACE_EVENT(btrfs_qgroup_update_reserve,
TP_PROTO(const struct btrfs_fs_info *fs_info, const struct btrfs_qgroup *qgroup,
s64 diff, int type),
@@ -1853,7 +1848,7 @@ TRACE_EVENT(qgroup_update_reserve,
__entry->cur_reserved, __entry->diff)
);
-TRACE_EVENT(qgroup_meta_reserve,
+TRACE_EVENT(btrfs_qgroup_meta_reserve,
TP_PROTO(const struct btrfs_root *root, s64 diff, int type),
@@ -1866,7 +1861,7 @@ TRACE_EVENT(qgroup_meta_reserve,
),
TP_fast_assign_btrfs(root->fs_info,
- __entry->refroot = root->root_key.objectid;
+ __entry->refroot = btrfs_root_id(root);
__entry->diff = diff;
__entry->type = type;
),
@@ -1876,7 +1871,7 @@ TRACE_EVENT(qgroup_meta_reserve,
__print_symbolic(__entry->type, QGROUP_RSV_TYPES), __entry->diff)
);
-TRACE_EVENT(qgroup_meta_convert,
+TRACE_EVENT(btrfs_qgroup_meta_convert,
TP_PROTO(const struct btrfs_root *root, s64 diff),
@@ -1888,7 +1883,7 @@ TRACE_EVENT(qgroup_meta_convert,
),
TP_fast_assign_btrfs(root->fs_info,
- __entry->refroot = root->root_key.objectid;
+ __entry->refroot = btrfs_root_id(root);
__entry->diff = diff;
),
@@ -1899,7 +1894,7 @@ TRACE_EVENT(qgroup_meta_convert,
__entry->diff)
);
-TRACE_EVENT(qgroup_meta_free_all_pertrans,
+TRACE_EVENT(btrfs_qgroup_meta_free_all_pertrans,
TP_PROTO(struct btrfs_root *root),
@@ -1912,7 +1907,7 @@ TRACE_EVENT(qgroup_meta_free_all_pertrans,
),
TP_fast_assign_btrfs(root->fs_info,
- __entry->refroot = root->root_key.objectid;
+ __entry->refroot = btrfs_root_id(root);
spin_lock(&root->qgroup_meta_rsv_lock);
__entry->diff = -(s64)root->qgroup_meta_rsv_pertrans;
spin_unlock(&root->qgroup_meta_rsv_lock);
@@ -1994,7 +1989,7 @@ TRACE_EVENT(btrfs_inode_mod_outstanding_extents,
),
TP_fast_assign_btrfs(root->fs_info,
- __entry->root_objectid = root->root_key.objectid;
+ __entry->root_objectid = btrfs_root_id(root);
__entry->ino = ino;
__entry->mod = mod;
__entry->outstanding = outstanding;
@@ -2074,12 +2069,12 @@ TRACE_EVENT(btrfs_set_extent_bit,
__field( unsigned, set_bits)
),
- TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree),
- const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree);
+ TP_fast_assign_btrfs(btrfs_extent_io_tree_to_fs_info(tree),
+ const struct btrfs_inode *inode = btrfs_extent_io_tree_to_inode(tree);
__entry->owner = tree->owner;
__entry->ino = inode ? btrfs_ino(inode) : 0;
- __entry->rootid = inode ? inode->root->root_key.objectid : 0;
+ __entry->rootid = inode ? btrfs_root_id(inode->root) : 0;
__entry->start = start;
__entry->len = len;
__entry->set_bits = set_bits;
@@ -2107,12 +2102,12 @@ TRACE_EVENT(btrfs_clear_extent_bit,
__field( unsigned, clear_bits)
),
- TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree),
- const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree);
+ TP_fast_assign_btrfs(btrfs_extent_io_tree_to_fs_info(tree),
+ const struct btrfs_inode *inode = btrfs_extent_io_tree_to_inode(tree);
__entry->owner = tree->owner;
__entry->ino = inode ? btrfs_ino(inode) : 0;
- __entry->rootid = inode ? inode->root->root_key.objectid : 0;
+ __entry->rootid = inode ? btrfs_root_id(inode->root) : 0;
__entry->start = start;
__entry->len = len;
__entry->clear_bits = clear_bits;
@@ -2141,12 +2136,12 @@ TRACE_EVENT(btrfs_convert_extent_bit,
__field( unsigned, clear_bits)
),
- TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree),
- const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree);
+ TP_fast_assign_btrfs(btrfs_extent_io_tree_to_fs_info(tree),
+ const struct btrfs_inode *inode = btrfs_extent_io_tree_to_inode(tree);
__entry->owner = tree->owner;
__entry->ino = inode ? btrfs_ino(inode) : 0;
- __entry->rootid = inode ? inode->root->root_key.objectid : 0;
+ __entry->rootid = inode ? btrfs_root_id(inode->root) : 0;
__entry->start = start;
__entry->len = len;
__entry->set_bits = set_bits;
@@ -2341,11 +2336,7 @@ DEFINE_EVENT(btrfs_locking_events, name, \
DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_unlock);
DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock);
-DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock_blocking);
-DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_read);
-DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_write);
DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock);
-DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic);
DECLARE_EVENT_CLASS(btrfs__space_info_update,
@@ -2621,7 +2612,7 @@ TRACE_EVENT(btrfs_extent_map_shrinker_remove_em,
TP_fast_assign_btrfs(inode->root->fs_info,
__entry->ino = btrfs_ino(inode);
- __entry->root_id = inode->root->root_key.objectid;
+ __entry->root_id = btrfs_root_id(inode->root);
__entry->start = em->start;
__entry->len = em->len;
__entry->flags = em->flags;
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index af2755bda6eb..7d332387be6c 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -231,7 +231,11 @@ DECLARE_EVENT_CLASS(cgroup_rstat,
__entry->cpu, __entry->contended)
);
-/* Related to global: cgroup_rstat_lock */
+/*
+ * Related to locks:
+ * global rstat_base_lock for base stats
+ * cgroup_subsys::rstat_ss_lock for subsystem stats
+ */
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_lock_contended,
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
@@ -253,7 +257,11 @@ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_unlock,
TP_ARGS(cgrp, cpu, contended)
);
-/* Related to per CPU: cgroup_rstat_cpu_lock */
+/*
+ * Related to per CPU locks:
+ * global rstat_base_cpu_lock for base stats
+ * cgroup_subsys::rstat_ss_cpu_lock for subsystem stats
+ */
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended,
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index c69c7b1e41d1..a5f4b9234f46 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -113,7 +113,7 @@ TRACE_EVENT(erofs_read_folio,
__entry->raw)
);
-TRACE_EVENT(erofs_readpages,
+TRACE_EVENT(erofs_readahead,
TP_PROTO(struct inode *inode, pgoff_t start, unsigned int nrpage,
bool raw),
diff --git a/arch/x86/include/asm/trace/exceptions.h b/include/trace/events/exceptions.h
index 6b1e87194809..a631f8de8917 100644
--- a/arch/x86/include/asm/trace/exceptions.h
+++ b/include/trace/events/exceptions.h
@@ -6,12 +6,8 @@
#define _TRACE_PAGE_FAULT_H
#include <linux/tracepoint.h>
-#include <asm/trace/common.h>
-extern int trace_pagefault_reg(void);
-extern void trace_pagefault_unreg(void);
-
-DECLARE_EVENT_CLASS(x86_exceptions,
+DECLARE_EVENT_CLASS(exceptions,
TP_PROTO(unsigned long address, struct pt_regs *regs,
unsigned long error_code),
@@ -26,7 +22,7 @@ DECLARE_EVENT_CLASS(x86_exceptions,
TP_fast_assign(
__entry->address = address;
- __entry->ip = regs->ip;
+ __entry->ip = instruction_pointer(regs);
__entry->error_code = error_code;
),
@@ -34,20 +30,13 @@ DECLARE_EVENT_CLASS(x86_exceptions,
(void *)__entry->address, (void *)__entry->ip,
__entry->error_code) );
-#define DEFINE_PAGE_FAULT_EVENT(name) \
-DEFINE_EVENT_FN(x86_exceptions, name, \
- TP_PROTO(unsigned long address, struct pt_regs *regs, \
- unsigned long error_code), \
- TP_ARGS(address, regs, error_code), \
- trace_pagefault_reg, trace_pagefault_unreg);
-
-DEFINE_PAGE_FAULT_EVENT(page_fault_user);
-DEFINE_PAGE_FAULT_EVENT(page_fault_kernel);
+DEFINE_EVENT(exceptions, page_fault_user,
+ TP_PROTO(unsigned long address, struct pt_regs *regs, unsigned long error_code),
+ TP_ARGS(address, regs, error_code));
+DEFINE_EVENT(exceptions, page_fault_kernel,
+ TP_PROTO(unsigned long address, struct pt_regs *regs, unsigned long error_code),
+ TP_ARGS(address, regs, error_code));
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE exceptions
#endif /* _TRACE_PAGE_FAULT_H */
/* This part must be outside protection */
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index fb81c533b310..178ab6f611be 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -645,7 +645,7 @@ TRACE_EVENT(io_uring_short_write,
/*
* io_uring_local_work_run - ran ring local task work
*
- * @tctx: pointer to a io_uring_ctx
+ * @ctx: pointer to an io_ring_ctx
* @count: how many functions it ran
* @loops: how many loops it ran
*
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 8994e97d86c1..3bec9fb73a36 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -326,11 +326,37 @@ DEFINE_EVENT(sched_process_template, sched_process_free,
TP_ARGS(p));
/*
- * Tracepoint for a task exiting:
+ * Tracepoint for a task exiting.
+ * Note, it's a superset of sched_process_template and should be kept
+ * compatible as much as possible. sched_process_exits has an extra
+ * `group_dead` argument, so sched_process_template can't be used,
+ * unfortunately, just like sched_migrate_task above.
*/
-DEFINE_EVENT(sched_process_template, sched_process_exit,
- TP_PROTO(struct task_struct *p),
- TP_ARGS(p));
+TRACE_EVENT(sched_process_exit,
+
+ TP_PROTO(struct task_struct *p, bool group_dead),
+
+ TP_ARGS(p, group_dead),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( pid_t, pid )
+ __field( int, prio )
+ __field( bool, group_dead )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+ __entry->pid = p->pid;
+ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
+ __entry->group_dead = group_dead;
+ ),
+
+ TP_printk("comm=%s pid=%d prio=%d group_dead=%s",
+ __entry->comm, __entry->pid, __entry->prio,
+ __entry->group_dead ? "true" : "false"
+ )
+);
/*
* Tracepoint for waiting on task to unschedule:
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index 690621b610e5..1bfb635e309b 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -49,7 +49,7 @@ enum blktrace_act {
__BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */
__BLK_TA_INSERT, /* insert request */
__BLK_TA_SPLIT, /* bio was split */
- __BLK_TA_BOUNCE, /* bio was bounced */
+ __BLK_TA_BOUNCE, /* unused, was: bio was bounced */
__BLK_TA_REMAP, /* bio was remapped */
__BLK_TA_ABORT, /* request aborted */
__BLK_TA_DRV_DATA, /* driver-specific binary data */
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index 7a8f4c290187..3aff99f2696a 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -119,7 +119,7 @@ struct fscrypt_key_specifier {
*/
struct fscrypt_provisioning_key_payload {
__u32 type;
- __u32 __reserved;
+ __u32 flags;
__u8 raw[];
};
@@ -128,7 +128,9 @@ struct fscrypt_add_key_arg {
struct fscrypt_key_specifier key_spec;
__u32 raw_size;
__u32 key_id;
- __u32 __reserved[8];
+#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001
+ __u32 flags;
+ __u32 __reserved[7];
__u8 raw[];
};
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index d2ee625ea189..7e2744ec8933 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -63,7 +63,7 @@
#define FUTEX2_SIZE_U32 0x02
#define FUTEX2_SIZE_U64 0x03
#define FUTEX2_NUMA 0x04
- /* 0x08 */
+#define FUTEX2_MPOL 0x08
/* 0x10 */
/* 0x20 */
/* 0x40 */
@@ -75,6 +75,13 @@
#define FUTEX_32 FUTEX2_SIZE_U32 /* historical accident :-( */
/*
+ * When FUTEX2_NUMA doubles the futex word, the second word is a node value.
+ * The special value -1 indicates no-node. This is the same value as
+ * NUMA_NO_NODE, except that value is not ABI, this is.
+ */
+#define FUTEX_NO_NODE (-1)
+
+/*
* Max numbers of elements in a futex_waitv array
*/
#define FUTEX_WAITV_MAX 128
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8f1fc12bac46..cfd17e382082 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -73,6 +73,7 @@ struct io_uring_sqe {
__u32 futex_flags;
__u32 install_fd_flags;
__u32 nop_flags;
+ __u32 pipe_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
@@ -93,6 +94,10 @@ struct io_uring_sqe {
__u16 addr_len;
__u16 __pad3[1];
};
+ struct {
+ __u8 write_stream;
+ __u8 __pad4[3];
+ };
};
union {
struct {
@@ -283,6 +288,7 @@ enum io_uring_op {
IORING_OP_EPOLL_WAIT,
IORING_OP_READV_FIXED,
IORING_OP_WRITEV_FIXED,
+ IORING_OP_PIPE,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -988,12 +994,16 @@ struct io_uring_zcrx_offsets {
__u64 __resv[2];
};
+enum io_uring_zcrx_area_flags {
+ IORING_ZCRX_AREA_DMABUF = 1,
+};
+
struct io_uring_zcrx_area_reg {
__u64 addr;
__u64 len;
__u64 rq_area_token;
__u32 flags;
- __u32 __resv1;
+ __u32 dmabuf_fd;
__u64 __resv2[2];
};
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 5fc753c23734..78a362b80027 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -39,18 +39,21 @@ enum perf_type_id {
/*
* attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
+ *
* PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA
* AA: hardware event ID
* EEEEEEEE: PMU type ID
+ *
* PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB
* BB: hardware cache ID
* CC: hardware cache op ID
* DD: hardware cache op result ID
* EEEEEEEE: PMU type ID
- * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
+ *
+ * If the PMU type ID is 0, PERF_TYPE_RAW will be applied.
*/
-#define PERF_PMU_TYPE_SHIFT 32
-#define PERF_HW_EVENT_MASK 0xffffffff
+#define PERF_PMU_TYPE_SHIFT 32
+#define PERF_HW_EVENT_MASK 0xffffffff
/*
* Generalized performance event event_id types, used by the
@@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id {
/*
* Special "software" events provided by the kernel, even if the hardware
* does not support performance events. These events measure various
- * physical and sw events of the kernel (and allow the profiling of them as
+ * physical and SW events of the kernel (and allow the profiling of them as
* well):
*/
enum perf_sw_ids {
@@ -167,8 +170,9 @@ enum perf_event_sample_format {
};
#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
+
/*
- * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set.
*
* If the user does not pass priv level information via branch_sample_type,
* the kernel uses the event's priv level. Branch and event priv levels do
@@ -178,20 +182,20 @@ enum perf_event_sample_format {
* of branches and therefore it supersedes all the other types.
*/
enum perf_branch_sample_type_shift {
- PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
- PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
- PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
-
- PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
- PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
- PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
- PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
- PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
- PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
- PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
+ PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
+ PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
+ PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
+
+ PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
+ PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
+ PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
+ PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
+ PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
+ PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
+ PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */
- PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */
+ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */
PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */
PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */
@@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift {
};
enum perf_branch_sample_type {
- PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
- PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
- PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+ PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+ PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+ PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
- PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
- PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
- PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
- PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
- PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
- PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
- PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
- PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
- PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
- PERF_SAMPLE_BRANCH_TYPE_SAVE =
- 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
+ PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
- PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
+ PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
- PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
+ PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
/*
- * Common flow change classification
+ * Common control flow change classifications:
*/
enum {
- PERF_BR_UNKNOWN = 0, /* unknown */
- PERF_BR_COND = 1, /* conditional */
- PERF_BR_UNCOND = 2, /* unconditional */
- PERF_BR_IND = 3, /* indirect */
- PERF_BR_CALL = 4, /* function call */
- PERF_BR_IND_CALL = 5, /* indirect function call */
- PERF_BR_RET = 6, /* function return */
- PERF_BR_SYSCALL = 7, /* syscall */
- PERF_BR_SYSRET = 8, /* syscall return */
- PERF_BR_COND_CALL = 9, /* conditional function call */
- PERF_BR_COND_RET = 10, /* conditional function return */
- PERF_BR_ERET = 11, /* exception return */
- PERF_BR_IRQ = 12, /* irq */
- PERF_BR_SERROR = 13, /* system error */
- PERF_BR_NO_TX = 14, /* not in transaction */
- PERF_BR_EXTEND_ABI = 15, /* extend ABI */
+ PERF_BR_UNKNOWN = 0, /* Unknown */
+ PERF_BR_COND = 1, /* Conditional */
+ PERF_BR_UNCOND = 2, /* Unconditional */
+ PERF_BR_IND = 3, /* Indirect */
+ PERF_BR_CALL = 4, /* Function call */
+ PERF_BR_IND_CALL = 5, /* Indirect function call */
+ PERF_BR_RET = 6, /* Function return */
+ PERF_BR_SYSCALL = 7, /* Syscall */
+ PERF_BR_SYSRET = 8, /* Syscall return */
+ PERF_BR_COND_CALL = 9, /* Conditional function call */
+ PERF_BR_COND_RET = 10, /* Conditional function return */
+ PERF_BR_ERET = 11, /* Exception return */
+ PERF_BR_IRQ = 12, /* IRQ */
+ PERF_BR_SERROR = 13, /* System error */
+ PERF_BR_NO_TX = 14, /* Not in transaction */
+ PERF_BR_EXTEND_ABI = 15, /* Extend ABI */
PERF_BR_MAX,
};
/*
- * Common branch speculation outcome classification
+ * Common branch speculation outcome classifications:
*/
enum {
- PERF_BR_SPEC_NA = 0, /* Not available */
- PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
- PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
- PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
+ PERF_BR_SPEC_NA = 0, /* Not available */
+ PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
+ PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
+ PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
PERF_BR_SPEC_MAX,
};
enum {
- PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
- PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
- PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
- PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
- PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
- PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
- PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
- PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
+ PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
+ PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
+ PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
+ PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
+ PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
+ PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
+ PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
+ PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
PERF_BR_NEW_MAX,
};
enum {
- PERF_BR_PRIV_UNKNOWN = 0,
- PERF_BR_PRIV_USER = 1,
- PERF_BR_PRIV_KERNEL = 2,
- PERF_BR_PRIV_HV = 3,
+ PERF_BR_PRIV_UNKNOWN = 0,
+ PERF_BR_PRIV_USER = 1,
+ PERF_BR_PRIV_KERNEL = 2,
+ PERF_BR_PRIV_HV = 3,
};
-#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
-#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
-#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
-#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
-#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
+#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
+#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
+#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
+#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
+#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
@@ -310,9 +313,9 @@ enum {
* Values to determine ABI of the registers dump.
*/
enum perf_sample_regs_abi {
- PERF_SAMPLE_REGS_ABI_NONE = 0,
- PERF_SAMPLE_REGS_ABI_32 = 1,
- PERF_SAMPLE_REGS_ABI_64 = 2,
+ PERF_SAMPLE_REGS_ABI_NONE = 0,
+ PERF_SAMPLE_REGS_ABI_32 = 1,
+ PERF_SAMPLE_REGS_ABI_64 = 2,
};
/*
@@ -320,21 +323,21 @@ enum perf_sample_regs_abi {
* abort events. Multiple bits can be set.
*/
enum {
- PERF_TXN_ELISION = (1 << 0), /* From elision */
- PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
- PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
- PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */
- PERF_TXN_RETRY = (1 << 4), /* Retry possible */
- PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
- PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
- PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
+ PERF_TXN_ELISION = (1 << 0), /* From elision */
+ PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
+ PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
+ PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */
+ PERF_TXN_RETRY = (1 << 4), /* Retry possible */
+ PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
+ PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+ PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
- PERF_TXN_MAX = (1 << 8), /* non-ABI */
+ PERF_TXN_MAX = (1 << 8), /* non-ABI */
- /* bits 32..63 are reserved for the abort code */
+ /* Bits 32..63 are reserved for the abort code */
- PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
- PERF_TXN_ABORT_SHIFT = 32,
+ PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
+ PERF_TXN_ABORT_SHIFT = 32,
};
/*
@@ -369,24 +372,22 @@ enum perf_event_read_format {
PERF_FORMAT_MAX = 1U << 5, /* non-ABI */
};
-#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
-#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
-#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
-#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
- /* add: sample_stack_user */
-#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
-#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
-#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
-#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
-#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
+#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */
+#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */
+#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */
+ /* Add: sample_stack_user */
+#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */
+#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */
+#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */
/*
- * Hardware event_id to monitor via a performance monitoring event:
- *
- * @sample_max_stack: Max number of frame pointers in a callchain,
- * should be < /proc/sys/kernel/perf_event_max_stack
- * Max number of entries of branch stack
- * should be < hardware limit
+ * 'struct perf_event_attr' contains various attributes that define
+ * a performance event - most of them hardware related configuration
+ * details, but also a lot of behavioral switches and values implemented
+ * by the kernel.
*/
struct perf_event_attr {
@@ -396,7 +397,7 @@ struct perf_event_attr {
__u32 type;
/*
- * Size of the attr structure, for fwd/bwd compat.
+ * Size of the attr structure, for forward/backwards compatibility.
*/
__u32 size;
@@ -451,21 +452,21 @@ struct perf_event_attr {
comm_exec : 1, /* flag comm events that are due to an exec */
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
- write_backward : 1, /* Write ring buffer from end to beginning */
+ write_backward : 1, /* write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
ksymbol : 1, /* include ksymbol events */
- bpf_event : 1, /* include bpf events */
+ bpf_event : 1, /* include BPF events */
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
- build_id : 1, /* use build id in mmap2 events */
+ build_id : 1, /* use build ID in mmap2 events */
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
__reserved_1 : 26;
union {
- __u32 wakeup_events; /* wakeup every n events */
+ __u32 wakeup_events; /* wake up every n events */
__u32 wakeup_watermark; /* bytes before wakeup */
};
@@ -474,13 +475,13 @@ struct perf_event_attr {
__u64 bp_addr;
__u64 kprobe_func; /* for perf_kprobe */
__u64 uprobe_path; /* for perf_uprobe */
- __u64 config1; /* extension of config */
+ __u64 config1; /* extension of config */
};
union {
__u64 bp_len;
- __u64 kprobe_addr; /* when kprobe_func == NULL */
+ __u64 kprobe_addr; /* when kprobe_func == NULL */
__u64 probe_offset; /* for perf_[k,u]probe */
- __u64 config2; /* extension of config1 */
+ __u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum perf_branch_sample_type */
@@ -510,7 +511,16 @@ struct perf_event_attr {
* Wakeup watermark for AUX area
*/
__u32 aux_watermark;
+
+ /*
+ * Max number of frame pointers in a callchain, should be
+ * lower than /proc/sys/kernel/perf_event_max_stack.
+ *
+ * Max number of entries of branch stack should be lower
+ * than the hardware limit.
+ */
__u16 sample_max_stack;
+
__u16 __reserved_2;
__u32 aux_sample_size;
@@ -537,7 +547,7 @@ struct perf_event_attr {
/*
* Structure used by below PERF_EVENT_IOC_QUERY_BPF command
- * to query bpf programs attached to the same perf tracepoint
+ * to query BPF programs attached to the same perf tracepoint
* as the given perf event.
*/
struct perf_event_query_bpf {
@@ -559,21 +569,21 @@ struct perf_event_query_bpf {
/*
* Ioctls that can be done on a perf event fd:
*/
-#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
-#define PERF_EVENT_IOC_RESET _IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
-#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
-#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
-#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
-#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
-#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
+#define PERF_EVENT_IOC_RESET _IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *)
+#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32)
#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
-#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *)
+#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *)
enum perf_event_ioc_flags {
- PERF_IOC_FLAG_GROUP = 1U << 0,
+ PERF_IOC_FLAG_GROUP = 1U << 0,
};
/*
@@ -584,7 +594,7 @@ struct perf_event_mmap_page {
__u32 compat_version; /* lowest version this is compat with */
/*
- * Bits needed to read the hw events in user-space.
+ * Bits needed to read the HW events in user-space.
*
* u32 seq, time_mult, time_shift, index, width;
* u64 count, enabled, running;
@@ -622,7 +632,7 @@ struct perf_event_mmap_page {
__u32 index; /* hardware event identifier */
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
- __u64 time_running; /* time event on cpu */
+ __u64 time_running; /* time event on CPU */
union {
__u64 capabilities;
struct {
@@ -650,7 +660,7 @@ struct perf_event_mmap_page {
/*
* If cap_usr_time the below fields can be used to compute the time
- * delta since time_enabled (in ns) using rdtsc or similar.
+ * delta since time_enabled (in ns) using RDTSC or similar.
*
* u64 quot, rem;
* u64 delta;
@@ -723,7 +733,7 @@ struct perf_event_mmap_page {
* after reading this value.
*
* When the mapping is PROT_WRITE the @data_tail value should be
- * written by userspace to reflect the last read data, after issueing
+ * written by user-space to reflect the last read data, after issuing
* an smp_mb() to separate the data read from the ->data_tail store.
* In this case the kernel will not over-write unread data.
*
@@ -739,7 +749,7 @@ struct perf_event_mmap_page {
/*
* AUX area is defined by aux_{offset,size} fields that should be set
- * by the userspace, so that
+ * by the user-space, so that
*
* aux_offset >= data_offset + data_size
*
@@ -813,7 +823,7 @@ struct perf_event_mmap_page {
* Indicates that thread was preempted in TASK_RUNNING state.
*
* PERF_RECORD_MISC_MMAP_BUILD_ID:
- * Indicates that mmap2 event carries build id data.
+ * Indicates that mmap2 event carries build ID data.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
@@ -824,26 +834,26 @@ struct perf_event_mmap_page {
#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
struct perf_event_header {
- __u32 type;
- __u16 misc;
- __u16 size;
+ __u32 type;
+ __u16 misc;
+ __u16 size;
};
struct perf_ns_link_info {
- __u64 dev;
- __u64 ino;
+ __u64 dev;
+ __u64 ino;
};
enum {
- NET_NS_INDEX = 0,
- UTS_NS_INDEX = 1,
- IPC_NS_INDEX = 2,
- PID_NS_INDEX = 3,
- USER_NS_INDEX = 4,
- MNT_NS_INDEX = 5,
- CGROUP_NS_INDEX = 6,
-
- NR_NAMESPACES, /* number of available namespaces */
+ NET_NS_INDEX = 0,
+ UTS_NS_INDEX = 1,
+ IPC_NS_INDEX = 2,
+ PID_NS_INDEX = 3,
+ USER_NS_INDEX = 4,
+ MNT_NS_INDEX = 5,
+ CGROUP_NS_INDEX = 6,
+
+ NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
@@ -859,11 +869,11 @@ enum perf_event_type {
* optional fields being ignored.
*
* struct sample_id {
- * { u32 pid, tid; } && PERF_SAMPLE_TID
- * { u64 time; } && PERF_SAMPLE_TIME
- * { u64 id; } && PERF_SAMPLE_ID
- * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
- * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 id; } && PERF_SAMPLE_IDENTIFIER
* } && perf_event_attr::sample_id_all
*
@@ -874,7 +884,7 @@ enum perf_event_type {
/*
* The MMAP events record the PROT_EXEC mappings so that we can
- * correlate userspace IPs to code. They have the following structure:
+ * correlate user-space IPs to code. They have the following structure:
*
* struct {
* struct perf_event_header header;
@@ -884,7 +894,7 @@ enum perf_event_type {
* u64 len;
* u64 pgoff;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP = 1,
@@ -894,7 +904,7 @@ enum perf_event_type {
* struct perf_event_header header;
* u64 id;
* u64 lost;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_LOST = 2,
@@ -905,7 +915,7 @@ enum perf_event_type {
*
* u32 pid, tid;
* char comm[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_COMM = 3,
@@ -916,7 +926,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_EXIT = 4,
@@ -927,7 +937,7 @@ enum perf_event_type {
* u64 time;
* u64 id;
* u64 stream_id;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_THROTTLE = 5,
@@ -939,7 +949,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_FORK = 7,
@@ -950,7 +960,7 @@ enum perf_event_type {
* u32 pid, tid;
*
* struct read_format values;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_READ = 8,
@@ -1005,12 +1015,12 @@ enum perf_event_type {
* { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
* } && PERF_SAMPLE_BRANCH_STACK
*
- * { u64 abi; # enum perf_sample_regs_abi
- * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
*
- * { u64 size;
- * char data[size];
- * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
+ * { u64 size;
+ * char data[size];
+ * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
*
* { union perf_sample_weight
* {
@@ -1035,10 +1045,11 @@ enum perf_event_type {
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
- * { u64 size;
- * char data[size]; } && PERF_SAMPLE_AUX
+ * { u64 cgroup;} && PERF_SAMPLE_CGROUP
* { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
* { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE
+ * { u64 size;
+ * char data[size]; } && PERF_SAMPLE_AUX
* };
*/
PERF_RECORD_SAMPLE = 9,
@@ -1070,7 +1081,7 @@ enum perf_event_type {
* };
* u32 prot, flags;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP2 = 10,
@@ -1079,12 +1090,12 @@ enum perf_event_type {
* Records that new data landed in the AUX buffer part.
*
* struct {
- * struct perf_event_header header;
+ * struct perf_event_header header;
*
- * u64 aux_offset;
- * u64 aux_size;
+ * u64 aux_offset;
+ * u64 aux_size;
* u64 flags;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_AUX = 11,
@@ -1167,7 +1178,7 @@ enum perf_event_type {
PERF_RECORD_KSYMBOL = 17,
/*
- * Record bpf events:
+ * Record BPF events:
* enum perf_bpf_event_type {
* PERF_BPF_EVENT_UNKNOWN = 0,
* PERF_BPF_EVENT_PROG_LOAD = 1,
@@ -1245,181 +1256,181 @@ enum perf_record_ksymbol_type {
#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
enum perf_bpf_event_type {
- PERF_BPF_EVENT_UNKNOWN = 0,
- PERF_BPF_EVENT_PROG_LOAD = 1,
- PERF_BPF_EVENT_PROG_UNLOAD = 2,
- PERF_BPF_EVENT_MAX, /* non-ABI */
+ PERF_BPF_EVENT_UNKNOWN = 0,
+ PERF_BPF_EVENT_PROG_LOAD = 1,
+ PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ PERF_BPF_EVENT_MAX, /* non-ABI */
};
-#define PERF_MAX_STACK_DEPTH 127
-#define PERF_MAX_CONTEXTS_PER_STACK 8
+#define PERF_MAX_STACK_DEPTH 127
+#define PERF_MAX_CONTEXTS_PER_STACK 8
enum perf_callchain_context {
- PERF_CONTEXT_HV = (__u64)-32,
- PERF_CONTEXT_KERNEL = (__u64)-128,
- PERF_CONTEXT_USER = (__u64)-512,
+ PERF_CONTEXT_HV = (__u64)-32,
+ PERF_CONTEXT_KERNEL = (__u64)-128,
+ PERF_CONTEXT_USER = (__u64)-512,
- PERF_CONTEXT_GUEST = (__u64)-2048,
- PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
- PERF_CONTEXT_GUEST_USER = (__u64)-2560,
+ PERF_CONTEXT_GUEST = (__u64)-2048,
+ PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
+ PERF_CONTEXT_GUEST_USER = (__u64)-2560,
- PERF_CONTEXT_MAX = (__u64)-4095,
+ PERF_CONTEXT_MAX = (__u64)-4095,
};
/**
* PERF_RECORD_AUX::flags bits
*/
-#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
-#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
-#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
-#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */
+#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */
+#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */
#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */
/* CoreSight PMU AUX buffer formats */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
-#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
-#define PERF_FLAG_FD_OUTPUT (1UL << 1)
-#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
-#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
+#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
+#define PERF_FLAG_FD_OUTPUT (1UL << 1)
+#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */
+#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
#if defined(__LITTLE_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_op:5, /* type of opcode */
- mem_lvl:14, /* memory hierarchy level */
- mem_snoop:5, /* snoop mode */
- mem_lock:2, /* lock instr */
- mem_dtlb:7, /* tlb access */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_remote:1, /* remote */
- mem_snoopx:2, /* snoop mode, ext */
- mem_blk:3, /* access blocked */
- mem_hops:3, /* hop level */
- mem_rsvd:18;
+ __u64 mem_op : 5, /* Type of opcode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lock : 2, /* Lock instr */
+ mem_dtlb : 7, /* TLB access */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_remote : 1, /* Remote */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_blk : 3, /* Access blocked */
+ mem_hops : 3, /* Hop level */
+ mem_rsvd : 18;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd:18,
- mem_hops:3, /* hop level */
- mem_blk:3, /* access blocked */
- mem_snoopx:2, /* snoop mode, ext */
- mem_remote:1, /* remote */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_dtlb:7, /* tlb access */
- mem_lock:2, /* lock instr */
- mem_snoop:5, /* snoop mode */
- mem_lvl:14, /* memory hierarchy level */
- mem_op:5; /* type of opcode */
+ __u64 mem_rsvd : 18,
+ mem_hops : 3, /* Hop level */
+ mem_blk : 3, /* Access blocked */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_remote : 1, /* Remote */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_dtlb : 7, /* TLB access */
+ mem_lock : 2, /* Lock instr */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_op : 5; /* Type of opcode */
};
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
-/* type of opcode (load/store/prefetch,code) */
-#define PERF_MEM_OP_NA 0x01 /* not available */
-#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
-#define PERF_MEM_OP_STORE 0x04 /* store instruction */
-#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
-#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
-#define PERF_MEM_OP_SHIFT 0
+/* Type of memory opcode: */
+#define PERF_MEM_OP_NA 0x0001 /* Not available */
+#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */
+#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */
+#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */
+#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */
+#define PERF_MEM_OP_SHIFT 0
/*
- * PERF_MEM_LVL_* namespace being depricated to some extent in the
+ * The PERF_MEM_LVL_* namespace is being deprecated to some extent in
* favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
- * Supporting this namespace inorder to not break defined ABIs.
+ * We support this namespace in order to not break defined ABIs.
*
- * memory hierarchy (memory level, hit or miss)
+ * Memory hierarchy (memory level, hit or miss)
*/
-#define PERF_MEM_LVL_NA 0x01 /* not available */
-#define PERF_MEM_LVL_HIT 0x02 /* hit level */
-#define PERF_MEM_LVL_MISS 0x04 /* miss level */
-#define PERF_MEM_LVL_L1 0x08 /* L1 */
-#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
-#define PERF_MEM_LVL_L2 0x20 /* L2 */
-#define PERF_MEM_LVL_L3 0x40 /* L3 */
-#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
-#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
-#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
-#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
-#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
-#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
-#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
-#define PERF_MEM_LVL_SHIFT 5
-
-#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */
-#define PERF_MEM_REMOTE_SHIFT 37
-
-#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */
-#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
-#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
-#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
-#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */
-#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */
-/* 0x7 available */
-#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
-#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
-#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
-#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */
-#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
-#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
-#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
-
-#define PERF_MEM_LVLNUM_SHIFT 33
-
-/* snoop mode */
-#define PERF_MEM_SNOOP_NA 0x01 /* not available */
-#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
-#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
-#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
-#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
-#define PERF_MEM_SNOOP_SHIFT 19
-
-#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
-#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
-#define PERF_MEM_SNOOPX_SHIFT 38
-
-/* locked instruction */
-#define PERF_MEM_LOCK_NA 0x01 /* not available */
-#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
-#define PERF_MEM_LOCK_SHIFT 24
+#define PERF_MEM_LVL_NA 0x0001 /* Not available */
+#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */
+#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */
+#define PERF_MEM_LVL_L1 0x0008 /* L1 */
+#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2 0x0020 /* L2 */
+#define PERF_MEM_LVL_L3 0x0040 /* L3 */
+#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT 5
+
+#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */
+#define PERF_MEM_REMOTE_SHIFT 37
+
+#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */
+#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */
+#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */
+#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */
+#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */
+/* 0x007 available */
+#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */
+#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */
+#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */
+#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */
+#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */
+#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */
+#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */
+
+#define PERF_MEM_LVLNUM_SHIFT 33
+
+/* Snoop mode */
+#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */
+#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */
+#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */
+#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */
+#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT 19
+
+#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */
+#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */
+#define PERF_MEM_SNOOPX_SHIFT 38
+
+/* Locked instruction */
+#define PERF_MEM_LOCK_NA 0x0001 /* Not available */
+#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */
+#define PERF_MEM_LOCK_SHIFT 24
/* TLB access */
-#define PERF_MEM_TLB_NA 0x01 /* not available */
-#define PERF_MEM_TLB_HIT 0x02 /* hit level */
-#define PERF_MEM_TLB_MISS 0x04 /* miss level */
-#define PERF_MEM_TLB_L1 0x08 /* L1 */
-#define PERF_MEM_TLB_L2 0x10 /* L2 */
-#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
-#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
-#define PERF_MEM_TLB_SHIFT 26
+#define PERF_MEM_TLB_NA 0x0001 /* Not available */
+#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */
+#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */
+#define PERF_MEM_TLB_L1 0x0008 /* L1 */
+#define PERF_MEM_TLB_L2 0x0010 /* L2 */
+#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT 26
/* Access blocked */
-#define PERF_MEM_BLK_NA 0x01 /* not available */
-#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */
-#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
-#define PERF_MEM_BLK_SHIFT 40
-
-/* hop level */
-#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
-#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
-#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
-#define PERF_MEM_HOPS_3 0x04 /* remote board */
+#define PERF_MEM_BLK_NA 0x0001 /* Not available */
+#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */
+#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */
+#define PERF_MEM_BLK_SHIFT 40
+
+/* Hop level */
+#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */
+#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */
+#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */
+#define PERF_MEM_HOPS_3 0x0004 /* Remote board */
/* 5-7 available */
-#define PERF_MEM_HOPS_SHIFT 43
+#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
/*
- * single taken branch record layout:
+ * Layout of single taken branch records:
*
* from: source instruction (may not always be a branch insn)
* to: branch target
@@ -1438,37 +1449,37 @@ union perf_mem_data_src {
struct perf_branch_entry {
__u64 from;
__u64 to;
- __u64 mispred:1, /* target mispredicted */
- predicted:1,/* target predicted */
- in_tx:1, /* in transaction */
- abort:1, /* transaction abort */
- cycles:16, /* cycle count to last branch */
- type:4, /* branch type */
- spec:2, /* branch speculation info */
- new_type:4, /* additional branch type */
- priv:3, /* privilege level */
- reserved:31;
+ __u64 mispred : 1, /* target mispredicted */
+ predicted : 1, /* target predicted */
+ in_tx : 1, /* in transaction */
+ abort : 1, /* transaction abort */
+ cycles : 16, /* cycle count to last branch */
+ type : 4, /* branch type */
+ spec : 2, /* branch speculation info */
+ new_type : 4, /* additional branch type */
+ priv : 3, /* privilege level */
+ reserved : 31;
};
/* Size of used info bits in struct perf_branch_entry */
#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33
union perf_sample_weight {
- __u64 full;
+ __u64 full;
#if defined(__LITTLE_ENDIAN_BITFIELD)
struct {
- __u32 var1_dw;
- __u16 var2_w;
- __u16 var3_w;
+ __u32 var1_dw;
+ __u16 var2_w;
+ __u16 var3_w;
};
#elif defined(__BIG_ENDIAN_BITFIELD)
struct {
- __u16 var3_w;
- __u16 var2_w;
- __u32 var1_dw;
+ __u16 var3_w;
+ __u16 var2_w;
+ __u32 var1_dw;
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
};
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 2970ef44655a..c27a4e238e4b 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -12,7 +12,7 @@
#define PIDFD_THREAD O_EXCL
#ifdef __KERNEL__
#include <linux/sched.h>
-#define PIDFD_CLONE CLONE_PIDFD
+#define PIDFD_STALE CLONE_PIDFD
#endif
/* Flags for pidfd_send_signal(). */
@@ -25,10 +25,24 @@
#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */
#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */
#define PIDFD_INFO_EXIT (1UL << 3) /* Only returned if requested. */
+#define PIDFD_INFO_COREDUMP (1UL << 4) /* Only returned if requested. */
#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */
/*
+ * Values for @coredump_mask in pidfd_info.
+ * Only valid if PIDFD_INFO_COREDUMP is set in @mask.
+ *
+ * Note, the @PIDFD_COREDUMP_ROOT flag indicates that the generated
+ * coredump should be treated as sensitive and access should only be
+ * granted to privileged users.
+ */
+#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */
+#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */
+#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */
+#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */
+
+/*
* The concept of process and threads in userland and the kernel is a confusing
* one - within the kernel every thread is a 'task' with its own individual PID,
* however from userland's point of view threads are grouped by a single PID,
@@ -92,6 +106,8 @@ struct pidfd_info {
__u32 fsuid;
__u32 fsgid;
__s32 exit_code;
+ __u32 coredump_mask;
+ __u32 __spare1;
};
#define PIDFS_IOCTL_MAGIC 0xFF
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 15c18ef4eb11..43dec6eed559 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -364,4 +364,11 @@ struct prctl_mm_map {
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+/* FUTEX hash management */
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define FH_FLAG_IMMUTABLE (1ULL << 0)
+# define PR_FUTEX_HASH_GET_SLOTS 2
+# define PR_FUTEX_HASH_GET_IMMUTABLE 3
+
#endif /* _LINUX_PRCTL_H */
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index f78ee3670dd5..1686861aae20 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -182,8 +182,12 @@ struct statx {
/* File offset alignment for direct I/O reads */
__u32 stx_dio_read_offset_align;
- /* 0xb8 */
- __u64 __spare3[9]; /* Spare space for future expansion */
+ /* Optimised max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max_opt;
+ __u32 __spare2[1];
+
+ /* 0xc0 */
+ __u64 __spare3[8]; /* Spare space for future expansion */
/* 0x100 */
};
diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index 95762232e018..5929030d4e8b 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -34,7 +34,7 @@
*/
-#define TASKSTATS_VERSION 15
+#define TASKSTATS_VERSION 16
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -72,8 +72,6 @@ struct taskstats {
*/
__u64 cpu_count __attribute__((aligned(8)));
__u64 cpu_delay_total;
- __u64 cpu_delay_max;
- __u64 cpu_delay_min;
/* Following four fields atomically updated using task->delays->lock */
@@ -82,14 +80,10 @@ struct taskstats {
*/
__u64 blkio_count;
__u64 blkio_delay_total;
- __u64 blkio_delay_max;
- __u64 blkio_delay_min;
/* Delay waiting for page fault I/O (swap in only) */
__u64 swapin_count;
__u64 swapin_delay_total;
- __u64 swapin_delay_max;
- __u64 swapin_delay_min;
/* cpu "wall-clock" running time
* On some architectures, value will adjust for cpu time stolen
@@ -172,14 +166,11 @@ struct taskstats {
/* Delay waiting for memory reclaim */
__u64 freepages_count;
__u64 freepages_delay_total;
- __u64 freepages_delay_max;
- __u64 freepages_delay_min;
+
/* Delay waiting for thrashing page */
__u64 thrashing_count;
__u64 thrashing_delay_total;
- __u64 thrashing_delay_max;
- __u64 thrashing_delay_min;
/* v10: 64-bit btime to avoid overflow */
__u64 ac_btime64; /* 64-bit begin time */
@@ -187,8 +178,6 @@ struct taskstats {
/* v11: Delay waiting for memory compact */
__u64 compact_count;
__u64 compact_delay_total;
- __u64 compact_delay_max;
- __u64 compact_delay_min;
/* v12 begin */
__u32 ac_tgid; /* thread group ID */
@@ -210,15 +199,37 @@ struct taskstats {
/* v13: Delay waiting for write-protect copy */
__u64 wpcopy_count;
__u64 wpcopy_delay_total;
- __u64 wpcopy_delay_max;
- __u64 wpcopy_delay_min;
/* v14: Delay waiting for IRQ/SOFTIRQ */
__u64 irq_count;
__u64 irq_delay_total;
- __u64 irq_delay_max;
- __u64 irq_delay_min;
- /* v15: add Delay max */
+
+ /* v15: add Delay max and Delay min */
+
+ /* v16: move Delay max and Delay min to the end of taskstat */
+ __u64 cpu_delay_max;
+ __u64 cpu_delay_min;
+
+ __u64 blkio_delay_max;
+ __u64 blkio_delay_min;
+
+ __u64 swapin_delay_max;
+ __u64 swapin_delay_min;
+
+ __u64 freepages_delay_max;
+ __u64 freepages_delay_min;
+
+ __u64 thrashing_delay_max;
+ __u64 thrashing_delay_min;
+
+ __u64 compact_delay_max;
+ __u64 compact_delay_min;
+
+ __u64 wpcopy_delay_max;
+ __u64 wpcopy_delay_min;
+
+ __u64 irq_delay_max;
+ __u64 irq_delay_min;
};
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index 583b86681c93..56c7e3fc666f 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -51,6 +51,10 @@
_IOR('u', 0x13, struct ublksrv_ctrl_cmd)
#define UBLK_U_CMD_DEL_DEV_ASYNC \
_IOR('u', 0x14, struct ublksrv_ctrl_cmd)
+#define UBLK_U_CMD_UPDATE_SIZE \
+ _IOWR('u', 0x15, struct ublksrv_ctrl_cmd)
+#define UBLK_U_CMD_QUIESCE_DEV \
+ _IOWR('u', 0x16, struct ublksrv_ctrl_cmd)
/*
* 64bits are enough now, and it should be easy to extend in case of
@@ -211,6 +215,63 @@
*/
#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9)
+/*
+ * Resizing a block device is possible with UBLK_U_CMD_UPDATE_SIZE
+ * New size is passed in cmd->data[0] and is in units of sectors
+ */
+#define UBLK_F_UPDATE_SIZE (1ULL << 10)
+
+/*
+ * request buffer is registered automatically to uring_cmd's io_uring
+ * context before delivering this io command to ublk server, meantime
+ * it is un-registered automatically when completing this io command.
+ *
+ * For using this feature:
+ *
+ * - ublk server has to create sparse buffer table on the same `io_ring_ctx`
+ * for issuing `UBLK_IO_FETCH_REQ` and `UBLK_IO_COMMIT_AND_FETCH_REQ`.
+ * If uring_cmd isn't issued on same `io_ring_ctx`, it is ublk server's
+ * responsibility to unregister the buffer by issuing `IO_UNREGISTER_IO_BUF`
+ * manually, otherwise this ublk request won't complete.
+ *
+ * - ublk server passes auto buf register data via uring_cmd's sqe->addr,
+ * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see
+ * the definition of ublk_sqe_addr_to_auto_buf_reg()
+ *
+ * - pass buffer index from `ublk_auto_buf_reg.index`
+ *
+ * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed
+ *
+ * - pass flags from `ublk_auto_buf_reg.flags` if needed
+ *
+ * This way avoids extra cost from two uring_cmd, but also simplifies backend
+ * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
+ * IO_UNREGISTER_IO_BUF becomes not necessary.
+ *
+ * If wrong data or flags are provided, both IO_FETCH_REQ and
+ * IO_COMMIT_AND_FETCH_REQ are failed, for the latter, the ublk IO request
+ * won't be completed until new IO_COMMIT_AND_FETCH_REQ command is issued
+ * successfully
+ */
+#define UBLK_F_AUTO_BUF_REG (1ULL << 11)
+
+/*
+ * Control command `UBLK_U_CMD_QUIESCE_DEV` is added for quiescing device,
+ * which state can be transitioned to `UBLK_S_DEV_QUIESCED` or
+ * `UBLK_S_DEV_FAIL_IO` finally, and it needs ublk server cooperation for
+ * handling `UBLK_IO_RES_ABORT` correctly.
+ *
+ * Typical use case is for supporting to upgrade ublk server application,
+ * meantime keep ublk block device persistent during the period.
+ *
+ * This feature is only available when UBLK_F_USER_RECOVERY is enabled.
+ *
+ * Note, this command returns -EBUSY in case that all IO commands are being
+ * handled by ublk server and not completed in specified time period which
+ * is passed from the control command parameter.
+ */
+#define UBLK_F_QUIESCE (1ULL << 12)
+
/* device state */
#define UBLK_S_DEV_DEAD 0
#define UBLK_S_DEV_LIVE 1
@@ -297,6 +358,17 @@ struct ublksrv_ctrl_dev_info {
#define UBLK_IO_F_FUA (1U << 13)
#define UBLK_IO_F_NOUNMAP (1U << 15)
#define UBLK_IO_F_SWAP (1U << 16)
+/*
+ * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only.
+ *
+ * This flag is set if auto buffer register is failed & ublk server passes
+ * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer
+ * manually for handling the delivered IO command if this flag is observed
+ *
+ * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is
+ * passed in.
+ */
+#define UBLK_IO_F_NEED_REG_BUF (1U << 17)
/*
* io cmd is described by this structure, and stored in share memory, indexed
@@ -331,6 +403,62 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
return iod->op_flags >> 8;
}
+/*
+ * If this flag is set, fallback by completing the uring_cmd and setting
+ * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure;
+ * otherwise the client ublk request is failed silently
+ *
+ * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF
+ * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set,
+ * ublk server needs to register io buffer manually for handling IO command.
+ */
+#define UBLK_AUTO_BUF_REG_FALLBACK (1 << 0)
+#define UBLK_AUTO_BUF_REG_F_MASK UBLK_AUTO_BUF_REG_FALLBACK
+
+struct ublk_auto_buf_reg {
+ /* index for registering the delivered request buffer */
+ __u16 index;
+ __u8 flags;
+ __u8 reserved0;
+
+ /*
+ * io_ring FD can be passed via the reserve field in future for
+ * supporting to register io buffer to external io_uring
+ */
+ __u32 reserved1;
+};
+
+/*
+ * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via
+ * uring_cmd's sqe->addr:
+ *
+ * - bit0 ~ bit15: buffer index
+ * - bit16 ~ bit23: flags
+ * - bit24 ~ bit31: reserved0
+ * - bit32 ~ bit63: reserved1
+ */
+static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
+ __u64 sqe_addr)
+{
+ struct ublk_auto_buf_reg reg = {
+ .index = sqe_addr & 0xffff,
+ .flags = (sqe_addr >> 16) & 0xff,
+ .reserved0 = (sqe_addr >> 24) & 0xff,
+ .reserved1 = sqe_addr >> 32,
+ };
+
+ return reg;
+}
+
+static inline __u64
+ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf)
+{
+ __u64 addr = buf->index | (__u64)buf->flags << 16 | (__u64)buf->reserved0 << 24 |
+ (__u64)buf->reserved1 << 32;
+
+ return addr;
+}
+
/* issued to ublk driver via /dev/ublkcN */
struct ublksrv_io_cmd {
__u16 q_id;
diff --git a/init/Kconfig b/init/Kconfig
index 4cdd1049283c..ae5d00baeafb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -87,11 +87,6 @@ config CC_CAN_LINK
default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT
default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag))
-config CC_CAN_LINK_STATIC
- bool
- default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT
- default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static)
-
# Fixed in GCC 14, 13.3, 12.4 and 11.5
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
config GCC_ASM_GOTO_OUTPUT_BROKEN
@@ -482,16 +477,6 @@ config CROSS_MEMORY_ATTACH
to directly read from or write to another process' address space.
See the man page for more details.
-config USELIB
- bool "uselib syscall (for libc5 and earlier)"
- default ALPHA || M68K || SPARC
- help
- This option enables the uselib syscall, a system call used in the
- dynamic linker from libc5 and earlier. glibc does not use this
- system call. If you intend to run programs built on libc5 or
- earlier, you may need to enable this syscall. Current systems
- running glibc can safely disable this.
-
config AUDIT
bool "Auditing support"
depends on NET
@@ -1090,6 +1075,17 @@ config RT_GROUP_SCHED
realtime bandwidth for them.
See Documentation/scheduler/sched-rt-group.rst for more information.
+config RT_GROUP_SCHED_DEFAULT_DISABLED
+ bool "Require boot parameter to enable group scheduling for SCHED_RR/FIFO"
+ depends on RT_GROUP_SCHED
+ default n
+ help
+ When set, the RT group scheduling is disabled by default. The option
+ is in inverted form so that mere RT_GROUP_SCHED enables the group
+ scheduling.
+
+ Say N if unsure.
+
config EXT_GROUP_SCHED
bool
depends on SCHED_CLASS_EXT && CGROUP_SCHED
@@ -1702,6 +1698,16 @@ config FUTEX_PI
depends on FUTEX && RT_MUTEXES
default y
+config FUTEX_PRIVATE_HASH
+ bool
+ depends on FUTEX && !BASE_SMALL && MMU
+ default y
+
+config FUTEX_MPOL
+ bool
+ depends on FUTEX && NUMA
+ default y
+
config EPOLL
bool "Enable eventpoll support" if EXPERT
default y
diff --git a/init/main.c b/init/main.c
index 7f0a2a3dbd29..bf9c5d22953b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1002,7 +1002,7 @@ void start_kernel(void)
init_IRQ();
tick_init();
rcu_init_nohz();
- init_timers();
+ timers_init();
srcu_init();
hrtimers_init();
softirq_init();
diff --git a/io_uring/Makefile b/io_uring/Makefile
index 3e28a741ca15..d97c6b51d584 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -7,11 +7,11 @@ GCOV_PROFILE := y
endif
obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
- tctx.o filetable.o rw.o net.o poll.o \
+ tctx.o filetable.o rw.o poll.o \
eventfd.o uring_cmd.o openclose.o \
sqpoll.o xattr.o nop.o fs.o splice.o \
sync.o msg_ring.o advise.o openclose.o \
- statx.o timeout.o fdinfo.o cancel.o \
+ statx.o timeout.o cancel.o \
waitid.o register.o truncate.o \
memmap.o alloc_cache.o
obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o
@@ -19,3 +19,5 @@ obj-$(CONFIG_IO_WQ) += io-wq.o
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_EPOLL) += epoll.o
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
+obj-$(CONFIG_NET) += net.o cmd_net.o
+obj-$(CONFIG_PROC_FS) += fdinfo.o
diff --git a/io_uring/advise.c b/io_uring/advise.c
index cb7b881665e5..0073f74e3658 100644
--- a/io_uring/advise.c
+++ b/io_uring/advise.c
@@ -58,7 +58,7 @@ int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
#else
return -EOPNOTSUPP;
#endif
@@ -104,5 +104,5 @@ int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index 0870060bac7c..6d57602304df 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -229,7 +229,7 @@ done:
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
static int __io_sync_cancel(struct io_uring_task *tctx,
diff --git a/io_uring/cmd_net.c b/io_uring/cmd_net.c
new file mode 100644
index 000000000000..e99170c7d41a
--- /dev/null
+++ b/io_uring/cmd_net.c
@@ -0,0 +1,83 @@
+#include <asm/ioctls.h>
+#include <linux/io_uring/net.h>
+#include <net/sock.h>
+
+#include "uring_cmd.h"
+
+static inline int io_uring_cmd_getsockopt(struct socket *sock,
+ struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ const struct io_uring_sqe *sqe = cmd->sqe;
+ bool compat = !!(issue_flags & IO_URING_F_COMPAT);
+ int optlen, optname, level, err;
+ void __user *optval;
+
+ level = READ_ONCE(sqe->level);
+ if (level != SOL_SOCKET)
+ return -EOPNOTSUPP;
+
+ optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
+ optname = READ_ONCE(sqe->optname);
+ optlen = READ_ONCE(sqe->optlen);
+
+ err = do_sock_getsockopt(sock, compat, level, optname,
+ USER_SOCKPTR(optval),
+ KERNEL_SOCKPTR(&optlen));
+ if (err)
+ return err;
+
+ /* On success, return optlen */
+ return optlen;
+}
+
+static inline int io_uring_cmd_setsockopt(struct socket *sock,
+ struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ const struct io_uring_sqe *sqe = cmd->sqe;
+ bool compat = !!(issue_flags & IO_URING_F_COMPAT);
+ int optname, optlen, level;
+ void __user *optval;
+ sockptr_t optval_s;
+
+ optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
+ optname = READ_ONCE(sqe->optname);
+ optlen = READ_ONCE(sqe->optlen);
+ level = READ_ONCE(sqe->level);
+ optval_s = USER_SOCKPTR(optval);
+
+ return do_sock_setsockopt(sock, compat, level, optname, optval_s,
+ optlen);
+}
+
+int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct socket *sock = cmd->file->private_data;
+ struct sock *sk = sock->sk;
+ struct proto *prot = READ_ONCE(sk->sk_prot);
+ int ret, arg = 0;
+
+ if (!prot || !prot->ioctl)
+ return -EOPNOTSUPP;
+
+ switch (cmd->cmd_op) {
+ case SOCKET_URING_OP_SIOCINQ:
+ ret = prot->ioctl(sk, SIOCINQ, &arg);
+ if (ret)
+ return ret;
+ return arg;
+ case SOCKET_URING_OP_SIOCOUTQ:
+ ret = prot->ioctl(sk, SIOCOUTQ, &arg);
+ if (ret)
+ return ret;
+ return arg;
+ case SOCKET_URING_OP_GETSOCKOPT:
+ return io_uring_cmd_getsockopt(sock, cmd, issue_flags);
+ case SOCKET_URING_OP_SETSOCKOPT:
+ return io_uring_cmd_setsockopt(sock, cmd, issue_flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+EXPORT_SYMBOL_GPL(io_uring_cmd_sock);
diff --git a/io_uring/epoll.c b/io_uring/epoll.c
index 6d2c48ba1923..8d4610246ba0 100644
--- a/io_uring/epoll.c
+++ b/io_uring/epoll.c
@@ -61,7 +61,7 @@ int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -88,5 +88,5 @@ int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 100d5da94cb9..78f8ab7db104 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -47,13 +47,6 @@ static void io_eventfd_do_signal(struct rcu_head *rcu)
io_eventfd_put(ev_fd);
}
-static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref)
-{
- if (put_ref)
- io_eventfd_put(ev_fd);
- rcu_read_unlock();
-}
-
/*
* Returns true if the caller should put the ev_fd reference, false if not.
*/
@@ -72,63 +65,34 @@ static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
/*
* Trigger if eventfd_async isn't set, or if it's set and the caller is
- * an async worker. If ev_fd isn't valid, obviously return false.
+ * an async worker.
*/
static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
{
- if (ev_fd)
- return !ev_fd->eventfd_async || io_wq_current_is_worker();
- return false;
+ return !ev_fd->eventfd_async || io_wq_current_is_worker();
}
-/*
- * On success, returns with an ev_fd reference grabbed and the RCU read
- * lock held.
- */
-static struct io_ev_fd *io_eventfd_grab(struct io_ring_ctx *ctx)
+void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event)
{
+ bool skip = false;
struct io_ev_fd *ev_fd;
if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
- return NULL;
-
- rcu_read_lock();
+ return;
- /*
- * rcu_dereference ctx->io_ev_fd once and use it for both for checking
- * and eventfd_signal
- */
+ guard(rcu)();
ev_fd = rcu_dereference(ctx->io_ev_fd);
-
/*
* Check again if ev_fd exists in case an io_eventfd_unregister call
* completed between the NULL check of ctx->io_ev_fd at the start of
* the function and rcu_read_lock.
*/
- if (io_eventfd_trigger(ev_fd) && refcount_inc_not_zero(&ev_fd->refs))
- return ev_fd;
-
- rcu_read_unlock();
- return NULL;
-}
-
-void io_eventfd_signal(struct io_ring_ctx *ctx)
-{
- struct io_ev_fd *ev_fd;
-
- ev_fd = io_eventfd_grab(ctx);
- if (ev_fd)
- io_eventfd_release(ev_fd, __io_eventfd_signal(ev_fd));
-}
-
-void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
-{
- struct io_ev_fd *ev_fd;
-
- ev_fd = io_eventfd_grab(ctx);
- if (ev_fd) {
- bool skip, put_ref = true;
+ if (!ev_fd)
+ return;
+ if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs))
+ return;
+ if (cqe_event) {
/*
* Eventfd should only get triggered when at least one event
* has been posted. Some applications rely on the eventfd
@@ -142,12 +106,10 @@ void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
ev_fd->last_cq_tail = ctx->cached_cq_tail;
spin_unlock(&ctx->completion_lock);
-
- if (!skip)
- put_ref = __io_eventfd_signal(ev_fd);
-
- io_eventfd_release(ev_fd, put_ref);
}
+
+ if (skip || __io_eventfd_signal(ev_fd))
+ io_eventfd_put(ev_fd);
}
int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
diff --git a/io_uring/eventfd.h b/io_uring/eventfd.h
index d394f49c6321..e2f1985c2cf9 100644
--- a/io_uring/eventfd.h
+++ b/io_uring/eventfd.h
@@ -4,5 +4,4 @@ int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned int eventfd_async);
int io_eventfd_unregister(struct io_ring_ctx *ctx);
-void io_eventfd_flush_signal(struct io_ring_ctx *ctx);
-void io_eventfd_signal(struct io_ring_ctx *ctx);
+void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event);
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index 9414ca6d101c..e9355276ab5d 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -15,37 +15,6 @@
#include "cancel.h"
#include "rsrc.h"
-#ifdef CONFIG_PROC_FS
-static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
- const struct cred *cred)
-{
- struct user_namespace *uns = seq_user_ns(m);
- struct group_info *gi;
- kernel_cap_t cap;
- int g;
-
- seq_printf(m, "%5d\n", id);
- seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid));
- seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid));
- seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid));
- seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid));
- seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid));
- seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid));
- seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid));
- seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid));
- seq_puts(m, "\n\tGroups:\t");
- gi = cred->group_info;
- for (g = 0; g < gi->ngroups; g++) {
- seq_put_decimal_ull(m, g ? " " : "",
- from_kgid_munged(uns, gi->gid[g]));
- }
- seq_puts(m, "\n\tCapEff:\t");
- cap = cred->cap_effective;
- seq_put_hex_ll(m, NULL, cap.val, 16);
- seq_putc(m, '\n');
- return 0;
-}
-
#ifdef CONFIG_NET_RX_BUSY_POLL
static __cold void common_tracking_show_fdinfo(struct io_ring_ctx *ctx,
struct seq_file *m,
@@ -86,13 +55,8 @@ static inline void napi_show_fdinfo(struct io_ring_ctx *ctx,
}
#endif
-/*
- * Caller holds a reference to the file already, we don't need to do
- * anything else to get an extra reference.
- */
-__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
+static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
{
- struct io_ring_ctx *ctx = file->private_data;
struct io_overflow_cqe *ocqe;
struct io_rings *r = ctx->rings;
struct rusage sq_usage;
@@ -106,7 +70,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
unsigned int sq_entries, cq_entries;
int sq_pid = -1, sq_cpu = -1;
u64 sq_total_time = 0, sq_work_time = 0;
- bool has_lock;
unsigned int i;
if (ctx->flags & IORING_SETUP_CQE32)
@@ -176,15 +139,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
seq_printf(m, "\n");
}
- /*
- * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
- * since fdinfo case grabs it in the opposite direction of normal use
- * cases. If we fail to get the lock, we just don't iterate any
- * structures that could be going away outside the io_uring mutex.
- */
- has_lock = mutex_trylock(&ctx->uring_lock);
-
- if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
+ if (ctx->flags & IORING_SETUP_SQPOLL) {
struct io_sq_data *sq = ctx->sq_data;
/*
@@ -206,7 +161,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
seq_printf(m, "UserFiles:\t%u\n", ctx->file_table.data.nr);
- for (i = 0; has_lock && i < ctx->file_table.data.nr; i++) {
+ for (i = 0; i < ctx->file_table.data.nr; i++) {
struct file *f = NULL;
if (ctx->file_table.data.nodes[i])
@@ -218,7 +173,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
}
}
seq_printf(m, "UserBufs:\t%u\n", ctx->buf_table.nr);
- for (i = 0; has_lock && i < ctx->buf_table.nr; i++) {
+ for (i = 0; i < ctx->buf_table.nr; i++) {
struct io_mapped_ubuf *buf = NULL;
if (ctx->buf_table.nodes[i])
@@ -228,17 +183,9 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
else
seq_printf(m, "%5u: <none>\n", i);
}
- if (has_lock && !xa_empty(&ctx->personalities)) {
- unsigned long index;
- const struct cred *cred;
-
- seq_printf(m, "Personalities:\n");
- xa_for_each(&ctx->personalities, index, cred)
- io_uring_show_cred(m, index, cred);
- }
seq_puts(m, "PollList:\n");
- for (i = 0; has_lock && i < (1U << ctx->cancel_table.hash_bits); i++) {
+ for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) {
struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i];
struct io_kiocb *req;
@@ -247,9 +194,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
task_work_pending(req->tctx->task));
}
- if (has_lock)
- mutex_unlock(&ctx->uring_lock);
-
seq_puts(m, "CqOverflowList:\n");
spin_lock(&ctx->completion_lock);
list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) {
@@ -262,4 +206,22 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
spin_unlock(&ctx->completion_lock);
napi_show_fdinfo(ctx, m);
}
-#endif
+
+/*
+ * Caller holds a reference to the file already, we don't need to do
+ * anything else to get an extra reference.
+ */
+__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
+{
+ struct io_ring_ctx *ctx = file->private_data;
+
+ /*
+ * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
+ * since fdinfo case grabs it in the opposite direction of normal use
+ * cases.
+ */
+ if (mutex_trylock(&ctx->uring_lock)) {
+ __io_uring_show_fdinfo(ctx, m);
+ mutex_unlock(&ctx->uring_lock);
+ }
+}
diff --git a/io_uring/fs.c b/io_uring/fs.c
index eccea851dd5a..37079a414eab 100644
--- a/io_uring/fs.c
+++ b/io_uring/fs.c
@@ -90,7 +90,7 @@ int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
req->flags &= ~REQ_F_NEED_CLEANUP;
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
void io_renameat_cleanup(struct io_kiocb *req)
@@ -141,7 +141,7 @@ int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
req->flags &= ~REQ_F_NEED_CLEANUP;
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
void io_unlinkat_cleanup(struct io_kiocb *req)
@@ -185,7 +185,7 @@ int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags)
req->flags &= ~REQ_F_NEED_CLEANUP;
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
void io_mkdirat_cleanup(struct io_kiocb *req)
@@ -235,7 +235,7 @@ int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags)
req->flags &= ~REQ_F_NEED_CLEANUP;
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -281,7 +281,7 @@ int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
req->flags &= ~REQ_F_NEED_CLEANUP;
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
void io_link_cleanup(struct io_kiocb *req)
diff --git a/io_uring/futex.c b/io_uring/futex.c
index 0ea4820cd8ff..fa374afbaa51 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -234,7 +234,7 @@ int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags)
kfree(futexv);
req->async_data = NULL;
req->flags &= ~REQ_F_ASYNC_DATA;
- return IOU_OK;
+ return IOU_COMPLETE;
}
/*
@@ -273,7 +273,6 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
struct io_ring_ctx *ctx = req->ctx;
struct io_futex_data *ifd = NULL;
- struct futex_hash_bucket *hb;
int ret;
if (!iof->futex_mask) {
@@ -295,12 +294,11 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
ifd->req = req;
ret = futex_wait_setup(iof->uaddr, iof->futex_val, iof->futex_flags,
- &ifd->q, &hb);
+ &ifd->q, NULL, NULL);
if (!ret) {
hlist_add_head(&req->hash_node, &ctx->futex_list);
io_ring_submit_unlock(ctx, issue_flags);
- futex_queue(&ifd->q, hb, NULL);
return IOU_ISSUE_SKIP_COMPLETE;
}
@@ -311,7 +309,7 @@ done:
req_set_fail(req);
io_req_set_res(req, ret, 0);
kfree(ifd);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags)
@@ -328,5 +326,5 @@ int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 04a75d666195..cd1fcb115739 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -114,9 +114,6 @@ enum {
struct io_wq {
unsigned long state;
- free_work_fn *free_work;
- io_wq_work_fn *do_work;
-
struct io_wq_hash *hash;
atomic_t worker_refs;
@@ -153,6 +150,16 @@ static bool io_acct_cancel_pending_work(struct io_wq *wq,
static void create_worker_cb(struct callback_head *cb);
static void io_wq_cancel_tw_create(struct io_wq *wq);
+static inline unsigned int __io_get_work_hash(unsigned int work_flags)
+{
+ return work_flags >> IO_WQ_HASH_SHIFT;
+}
+
+static inline unsigned int io_get_work_hash(struct io_wq_work *work)
+{
+ return __io_get_work_hash(atomic_read(&work->flags));
+}
+
static bool io_worker_get(struct io_worker *worker)
{
return refcount_inc_not_zero(&worker->ref);
@@ -412,6 +419,30 @@ fail:
return false;
}
+/* Defer if current and next work are both hashed to the same chain */
+static bool io_wq_hash_defer(struct io_wq_work *work, struct io_wq_acct *acct)
+{
+ unsigned int hash, work_flags;
+ struct io_wq_work *next;
+
+ lockdep_assert_held(&acct->lock);
+
+ work_flags = atomic_read(&work->flags);
+ if (!__io_wq_is_hashed(work_flags))
+ return false;
+
+ /* should not happen, io_acct_run_queue() said we had work */
+ if (wq_list_empty(&acct->work_list))
+ return true;
+
+ hash = __io_get_work_hash(work_flags);
+ next = container_of(acct->work_list.first, struct io_wq_work, list);
+ work_flags = atomic_read(&next->flags);
+ if (!__io_wq_is_hashed(work_flags))
+ return false;
+ return hash == __io_get_work_hash(work_flags);
+}
+
static void io_wq_dec_running(struct io_worker *worker)
{
struct io_wq_acct *acct = io_wq_get_acct(worker);
@@ -422,8 +453,14 @@ static void io_wq_dec_running(struct io_worker *worker)
if (!atomic_dec_and_test(&acct->nr_running))
return;
+ if (!worker->cur_work)
+ return;
if (!io_acct_run_queue(acct))
return;
+ if (io_wq_hash_defer(worker->cur_work, acct)) {
+ raw_spin_unlock(&acct->lock);
+ return;
+ }
raw_spin_unlock(&acct->lock);
atomic_inc(&acct->nr_running);
@@ -457,16 +494,6 @@ static void __io_worker_idle(struct io_wq_acct *acct, struct io_worker *worker)
}
}
-static inline unsigned int __io_get_work_hash(unsigned int work_flags)
-{
- return work_flags >> IO_WQ_HASH_SHIFT;
-}
-
-static inline unsigned int io_get_work_hash(struct io_wq_work *work)
-{
- return __io_get_work_hash(atomic_read(&work->flags));
-}
-
static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash)
{
bool ret = false;
@@ -612,10 +639,10 @@ static void io_worker_handle_work(struct io_wq_acct *acct,
if (do_kill &&
(work_flags & IO_WQ_WORK_UNBOUND))
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
- wq->do_work(work);
+ io_wq_submit_work(work);
io_assign_current_work(worker, NULL);
- linked = wq->free_work(work);
+ linked = io_wq_free_work(work);
work = next_hashed;
if (!work && linked && !io_wq_is_hashed(linked)) {
work = linked;
@@ -934,8 +961,8 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq)
{
do {
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
- wq->do_work(work);
- work = wq->free_work(work);
+ io_wq_submit_work(work);
+ work = io_wq_free_work(work);
} while (work);
}
@@ -1195,8 +1222,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
int ret, i;
struct io_wq *wq;
- if (WARN_ON_ONCE(!data->free_work || !data->do_work))
- return ERR_PTR(-EINVAL);
if (WARN_ON_ONCE(!bounded))
return ERR_PTR(-EINVAL);
@@ -1206,8 +1231,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
refcount_inc(&data->hash->refs);
wq->hash = data->hash;
- wq->free_work = data->free_work;
- wq->do_work = data->do_work;
ret = -ENOMEM;
diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h
index d4fb2940e435..774abab54732 100644
--- a/io_uring/io-wq.h
+++ b/io_uring/io-wq.h
@@ -21,9 +21,6 @@ enum io_wq_cancel {
IO_WQ_CANCEL_NOTFOUND, /* work not found */
};
-typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
-typedef void (io_wq_work_fn)(struct io_wq_work *);
-
struct io_wq_hash {
refcount_t refs;
unsigned long map;
@@ -39,8 +36,6 @@ static inline void io_wq_put_hash(struct io_wq_hash *hash)
struct io_wq_data {
struct io_wq_hash *hash;
struct task_struct *task;
- io_wq_work_fn *do_work;
- free_work_fn *free_work;
};
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 541e65a1eebf..c7a9cecf528e 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -129,7 +129,6 @@
struct io_defer_entry {
struct list_head list;
struct io_kiocb *req;
- u32 seq;
};
/* requests with any of those set should undergo io_disarm_next() */
@@ -149,6 +148,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
bool is_sqpoll_thread);
static void io_queue_sqe(struct io_kiocb *req);
+static void __io_req_caches_free(struct io_ring_ctx *ctx);
static __read_mostly DEFINE_STATIC_KEY_FALSE(io_key_has_sqarray);
@@ -359,6 +359,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->tctx_list);
ctx->submit_state.free_list.next = NULL;
INIT_HLIST_HEAD(&ctx->waitid_list);
+ xa_init_flags(&ctx->zcrx_ctxs, XA_FLAGS_ALLOC);
#ifdef CONFIG_FUTEX
INIT_HLIST_HEAD(&ctx->futex_list);
#endif
@@ -380,25 +381,6 @@ err:
return NULL;
}
-static void io_account_cq_overflow(struct io_ring_ctx *ctx)
-{
- struct io_rings *r = ctx->rings;
-
- WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1);
- ctx->cq_extra--;
-}
-
-static bool req_need_defer(struct io_kiocb *req, u32 seq)
-{
- if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
- struct io_ring_ctx *ctx = req->ctx;
-
- return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail;
- }
-
- return false;
-}
-
static void io_clean_op(struct io_kiocb *req)
{
if (unlikely(req->flags & REQ_F_BUFFER_SELECTED))
@@ -537,20 +519,37 @@ void io_req_queue_iowq(struct io_kiocb *req)
io_req_task_work_add(req);
}
+static unsigned io_linked_nr(struct io_kiocb *req)
+{
+ struct io_kiocb *tmp;
+ unsigned nr = 0;
+
+ io_for_each_link(tmp, req)
+ nr++;
+ return nr;
+}
+
static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx)
{
- spin_lock(&ctx->completion_lock);
+ bool drain_seen = false, first = true;
+
+ lockdep_assert_held(&ctx->uring_lock);
+ __io_req_caches_free(ctx);
+
while (!list_empty(&ctx->defer_list)) {
struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
struct io_defer_entry, list);
- if (req_need_defer(de->req, de->seq))
- break;
+ drain_seen |= de->req->flags & REQ_F_IO_DRAIN;
+ if ((drain_seen || first) && ctx->nr_req_allocated != ctx->nr_drained)
+ return;
+
list_del_init(&de->list);
+ ctx->nr_drained -= io_linked_nr(de->req);
io_req_task_queue(de->req);
kfree(de);
+ first = false;
}
- spin_unlock(&ctx->completion_lock);
}
void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
@@ -559,10 +558,8 @@ void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
io_poll_wq_wake(ctx);
if (ctx->off_timeout_used)
io_flush_timeouts(ctx);
- if (ctx->drain_active)
- io_queue_deferred(ctx);
if (ctx->has_evfd)
- io_eventfd_flush_signal(ctx);
+ io_eventfd_signal(ctx, true);
}
static inline void __io_cq_lock(struct io_ring_ctx *ctx)
@@ -636,6 +633,7 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool dying)
* to care for a non-real case.
*/
if (need_resched()) {
+ ctx->cqe_sentinel = ctx->cqe_cached;
io_cq_unlock_post(ctx);
mutex_unlock(&ctx->uring_lock);
cond_resched();
@@ -700,27 +698,20 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
}
}
-static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags, u64 extra1, u64 extra2)
+static __cold bool io_cqring_add_overflow(struct io_ring_ctx *ctx,
+ struct io_overflow_cqe *ocqe)
{
- struct io_overflow_cqe *ocqe;
- size_t ocq_size = sizeof(struct io_overflow_cqe);
- bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
-
lockdep_assert_held(&ctx->completion_lock);
- if (is_cqe32)
- ocq_size += sizeof(struct io_uring_cqe);
-
- ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT);
- trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
if (!ocqe) {
+ struct io_rings *r = ctx->rings;
+
/*
* If we're in ring overflow flush mode, or in task cancel mode,
* or cannot allocate an overflow entry, then we need to drop it
* on the floor.
*/
- io_account_cq_overflow(ctx);
+ WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1);
set_bit(IO_CHECK_CQ_DROPPED_BIT, &ctx->check_cq);
return false;
}
@@ -729,23 +720,35 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
}
- ocqe->cqe.user_data = user_data;
- ocqe->cqe.res = res;
- ocqe->cqe.flags = cflags;
- if (is_cqe32) {
- ocqe->cqe.big_cqe[0] = extra1;
- ocqe->cqe.big_cqe[1] = extra2;
- }
list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
return true;
}
-static void io_req_cqe_overflow(struct io_kiocb *req)
+static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
+ struct io_cqe *cqe,
+ struct io_big_cqe *big_cqe, gfp_t gfp)
{
- io_cqring_event_overflow(req->ctx, req->cqe.user_data,
- req->cqe.res, req->cqe.flags,
- req->big_cqe.extra1, req->big_cqe.extra2);
- memset(&req->big_cqe, 0, sizeof(req->big_cqe));
+ struct io_overflow_cqe *ocqe;
+ size_t ocq_size = sizeof(struct io_overflow_cqe);
+ bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
+
+ if (is_cqe32)
+ ocq_size += sizeof(struct io_uring_cqe);
+
+ ocqe = kzalloc(ocq_size, gfp | __GFP_ACCOUNT);
+ trace_io_uring_cqe_overflow(ctx, cqe->user_data, cqe->res, cqe->flags, ocqe);
+ if (ocqe) {
+ ocqe->cqe.user_data = cqe->user_data;
+ ocqe->cqe.res = cqe->res;
+ ocqe->cqe.flags = cqe->flags;
+ if (is_cqe32 && big_cqe) {
+ ocqe->cqe.big_cqe[0] = big_cqe->extra1;
+ ocqe->cqe.big_cqe[1] = big_cqe->extra2;
+ }
+ }
+ if (big_cqe)
+ big_cqe->extra1 = big_cqe->extra2 = 0;
+ return ocqe;
}
/*
@@ -790,13 +793,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
{
struct io_uring_cqe *cqe;
- ctx->cq_extra++;
-
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
if (likely(io_get_cqe(ctx, &cqe))) {
WRITE_ONCE(cqe->user_data, user_data);
WRITE_ONCE(cqe->res, res);
@@ -813,14 +809,43 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
return false;
}
+static inline struct io_cqe io_init_cqe(u64 user_data, s32 res, u32 cflags)
+{
+ return (struct io_cqe) { .user_data = user_data, .res = res, .flags = cflags };
+}
+
+static __cold void io_cqe_overflow(struct io_ring_ctx *ctx, struct io_cqe *cqe,
+ struct io_big_cqe *big_cqe)
+{
+ struct io_overflow_cqe *ocqe;
+
+ ocqe = io_alloc_ocqe(ctx, cqe, big_cqe, GFP_KERNEL);
+ spin_lock(&ctx->completion_lock);
+ io_cqring_add_overflow(ctx, ocqe);
+ spin_unlock(&ctx->completion_lock);
+}
+
+static __cold bool io_cqe_overflow_locked(struct io_ring_ctx *ctx,
+ struct io_cqe *cqe,
+ struct io_big_cqe *big_cqe)
+{
+ struct io_overflow_cqe *ocqe;
+
+ ocqe = io_alloc_ocqe(ctx, cqe, big_cqe, GFP_ATOMIC);
+ return io_cqring_add_overflow(ctx, ocqe);
+}
+
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
{
bool filled;
io_cq_lock(ctx);
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
- if (!filled)
- filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
+ if (unlikely(!filled)) {
+ struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
+
+ filled = io_cqe_overflow_locked(ctx, &cqe, NULL);
+ }
io_cq_unlock_post(ctx);
return filled;
}
@@ -831,10 +856,13 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
*/
void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
{
+ lockdep_assert_held(&ctx->uring_lock);
+ lockdep_assert(ctx->lockless_cq);
+
if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
- spin_lock(&ctx->completion_lock);
- io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
- spin_unlock(&ctx->completion_lock);
+ struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
+
+ io_cqe_overflow(ctx, &cqe, NULL);
}
ctx->submit_state.cq_flush = true;
}
@@ -924,22 +952,6 @@ void io_req_defer_failed(struct io_kiocb *req, s32 res)
}
/*
- * Don't initialise the fields below on every allocation, but do that in
- * advance and keep them valid across allocations.
- */
-static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
-{
- req->ctx = ctx;
- req->buf_node = NULL;
- req->file_node = NULL;
- req->link = NULL;
- req->async_data = NULL;
- /* not necessary, but safer to zero */
- memset(&req->cqe, 0, sizeof(req->cqe));
- memset(&req->big_cqe, 0, sizeof(req->big_cqe));
-}
-
-/*
* A request might get retired back into the request caches even before opcode
* handlers and io_issue_sqe() are done with it, e.g. inline completion path.
* Because of that, io_alloc_req() should be called only under ->uring_lock
@@ -948,7 +960,7 @@ static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
__cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
- gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
+ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO;
void *reqs[IO_REQ_ALLOC_BATCH];
int ret;
@@ -966,10 +978,11 @@ __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
}
percpu_ref_get_many(&ctx->refs, ret);
+ ctx->nr_req_allocated += ret;
+
while (ret--) {
struct io_kiocb *req = reqs[ret];
- io_preinit_req(req, ctx);
io_req_add_to_cache(req, ctx);
}
return true;
@@ -1191,7 +1204,7 @@ static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
if (ctx->has_evfd)
- io_eventfd_signal(ctx);
+ io_eventfd_signal(ctx, false);
}
nr_wait = atomic_read(&ctx->cq_wait_nr);
@@ -1383,6 +1396,16 @@ void io_queue_next(struct io_kiocb *req)
io_req_task_queue(nxt);
}
+static inline void io_req_put_rsrc_nodes(struct io_kiocb *req)
+{
+ if (req->file_node) {
+ io_put_rsrc_node(req->ctx, req->file_node);
+ req->file_node = NULL;
+ }
+ if (req->flags & REQ_F_BUF_NODE)
+ io_put_rsrc_node(req->ctx, req->buf_node);
+}
+
static void io_free_batch_list(struct io_ring_ctx *ctx,
struct io_wq_work_node *node)
__must_hold(&ctx->uring_lock)
@@ -1443,13 +1466,10 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
*/
if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
unlikely(!io_fill_cqe_req(ctx, req))) {
- if (ctx->lockless_cq) {
- spin_lock(&ctx->completion_lock);
- io_req_cqe_overflow(req);
- spin_unlock(&ctx->completion_lock);
- } else {
- io_req_cqe_overflow(req);
- }
+ if (ctx->lockless_cq)
+ io_cqe_overflow(ctx, &req->cqe, &req->big_cqe);
+ else
+ io_cqe_overflow_locked(ctx, &req->cqe, &req->big_cqe);
}
}
__io_cq_unlock_post(ctx);
@@ -1458,6 +1478,10 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
io_free_batch_list(ctx, state->compl_reqs.first);
INIT_WQ_LIST(&state->compl_reqs);
}
+
+ if (unlikely(ctx->drain_active))
+ io_queue_deferred(ctx);
+
ctx->submit_state.cq_flush = false;
}
@@ -1645,56 +1669,28 @@ io_req_flags_t io_file_get_flags(struct file *file)
return res;
}
-static u32 io_get_sequence(struct io_kiocb *req)
-{
- u32 seq = req->ctx->cached_sq_head;
- struct io_kiocb *cur;
-
- /* need original cached_sq_head, but it was increased for each req */
- io_for_each_link(cur, req)
- seq--;
- return seq;
-}
-
static __cold void io_drain_req(struct io_kiocb *req)
__must_hold(&ctx->uring_lock)
{
struct io_ring_ctx *ctx = req->ctx;
+ bool drain = req->flags & IOSQE_IO_DRAIN;
struct io_defer_entry *de;
- int ret;
- u32 seq = io_get_sequence(req);
-
- /* Still need defer if there is pending req in defer list. */
- spin_lock(&ctx->completion_lock);
- if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list)) {
- spin_unlock(&ctx->completion_lock);
-queue:
- ctx->drain_active = false;
- io_req_task_queue(req);
- return;
- }
- spin_unlock(&ctx->completion_lock);
- io_prep_async_link(req);
- de = kmalloc(sizeof(*de), GFP_KERNEL);
+ de = kmalloc(sizeof(*de), GFP_KERNEL_ACCOUNT);
if (!de) {
- ret = -ENOMEM;
- io_req_defer_failed(req, ret);
+ io_req_defer_failed(req, -ENOMEM);
return;
}
- spin_lock(&ctx->completion_lock);
- if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
- spin_unlock(&ctx->completion_lock);
- kfree(de);
- goto queue;
- }
-
+ io_prep_async_link(req);
trace_io_uring_defer(req);
de->req = req;
- de->seq = seq;
+
+ ctx->nr_drained += io_linked_nr(req);
list_add_tail(&de->list, &ctx->defer_list);
- spin_unlock(&ctx->completion_lock);
+ io_queue_deferred(ctx);
+ if (!drain && list_empty(&ctx->defer_list))
+ ctx->drain_active = false;
}
static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
@@ -1756,7 +1752,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
ret = __io_issue_sqe(req, issue_flags, def);
- if (ret == IOU_OK) {
+ if (ret == IOU_COMPLETE) {
if (issue_flags & IO_URING_F_COMPLETE_DEFER)
io_req_complete_defer(req);
else
@@ -1815,7 +1811,7 @@ void io_wq_submit_work(struct io_wq_work *work)
bool needs_poll = false;
int ret = 0, err = -ECANCELED;
- /* one will be dropped by ->io_wq_free_work() after returning to io-wq */
+ /* one will be dropped by io_wq_free_work() after returning to io-wq */
if (!(req->flags & REQ_F_REFCOUNT))
__io_req_set_refcount(req, 2);
else
@@ -1913,7 +1909,8 @@ inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
io_ring_submit_lock(ctx, issue_flags);
node = io_rsrc_node_lookup(&ctx->file_table.data, fd);
if (node) {
- io_req_assign_rsrc_node(&req->file_node, node);
+ node->refs++;
+ req->file_node = node;
req->flags |= io_slot_flags(node);
file = io_slot_file(node);
}
@@ -2046,7 +2043,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
int personality;
u8 opcode;
- /* req is partially pre-initialised, see io_preinit_req() */
+ req->ctx = ctx;
req->opcode = opcode = READ_ONCE(sqe->opcode);
/* same numerical values with corresponding REQ_F_*, safe to copy */
sqe_flags = READ_ONCE(sqe->flags);
@@ -2277,10 +2274,6 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
(!(ctx->flags & IORING_SETUP_NO_SQARRAY))) {
head = READ_ONCE(ctx->sq_array[head]);
if (unlikely(head >= ctx->sq_entries)) {
- /* drop invalid entries */
- spin_lock(&ctx->completion_lock);
- ctx->cq_extra--;
- spin_unlock(&ctx->completion_lock);
WRITE_ONCE(ctx->rings->sq_dropped,
READ_ONCE(ctx->rings->sq_dropped) + 1);
return false;
@@ -2698,21 +2691,26 @@ unsigned long rings_size(unsigned int flags, unsigned int sq_entries,
return off;
}
-static void io_req_caches_free(struct io_ring_ctx *ctx)
+static __cold void __io_req_caches_free(struct io_ring_ctx *ctx)
{
struct io_kiocb *req;
int nr = 0;
- mutex_lock(&ctx->uring_lock);
-
while (!io_req_cache_empty(ctx)) {
req = io_extract_req(ctx);
kmem_cache_free(req_cachep, req);
nr++;
}
- if (nr)
+ if (nr) {
+ ctx->nr_req_allocated -= nr;
percpu_ref_put_many(&ctx->refs, nr);
- mutex_unlock(&ctx->uring_lock);
+ }
+}
+
+static __cold void io_req_caches_free(struct io_ring_ctx *ctx)
+{
+ guard(mutex)(&ctx->uring_lock);
+ __io_req_caches_free(ctx);
}
static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
@@ -2748,6 +2746,9 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
percpu_ref_exit(&ctx->refs);
free_uid(ctx->user);
io_req_caches_free(ctx);
+
+ WARN_ON_ONCE(ctx->nr_req_allocated);
+
if (ctx->hash_map)
io_wq_put_hash(ctx->hash_map);
io_napi_free(ctx);
@@ -2882,7 +2883,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
io_cqring_overflow_kill(ctx);
mutex_unlock(&ctx->uring_lock);
}
- if (ctx->ifq) {
+ if (!xa_empty(&ctx->zcrx_ctxs)) {
mutex_lock(&ctx->uring_lock);
io_shutdown_zcrx_ifqs(ctx);
mutex_unlock(&ctx->uring_lock);
@@ -3014,20 +3015,19 @@ static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
struct io_defer_entry *de;
LIST_HEAD(list);
- spin_lock(&ctx->completion_lock);
list_for_each_entry_reverse(de, &ctx->defer_list, list) {
if (io_match_task_safe(de->req, tctx, cancel_all)) {
list_cut_position(&list, &ctx->defer_list, &de->list);
break;
}
}
- spin_unlock(&ctx->completion_lock);
if (list_empty(&list))
return false;
while (!list_empty(&list)) {
de = list_first_entry(&list, struct io_defer_entry, list);
list_del_init(&de->list);
+ ctx->nr_drained -= io_linked_nr(de->req);
io_req_task_queue_fail(de->req, -ECANCELED);
kfree(de);
}
@@ -3102,8 +3102,8 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
io_allowed_defer_tw_run(ctx))
ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0;
- ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
mutex_lock(&ctx->uring_lock);
+ ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
ret |= io_poll_remove_all(ctx, tctx, cancel_all);
ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
ret |= io_futex_remove_all(ctx, tctx, cancel_all);
@@ -3913,6 +3913,8 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
BUILD_BUG_SQE_ELEM(44, __u32, file_index);
BUILD_BUG_SQE_ELEM(44, __u16, addr_len);
+ BUILD_BUG_SQE_ELEM(44, __u8, write_stream);
+ BUILD_BUG_SQE_ELEM(45, __u8, __pad4[0]);
BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]);
BUILD_BUG_SQE_ELEM(48, __u64, addr3);
BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index e4050b2d0821..0ea7a435d1de 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -19,7 +19,6 @@
#endif
enum {
- IOU_OK = 0, /* deprecated, use IOU_COMPLETE */
IOU_COMPLETE = 0,
IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED,
@@ -196,7 +195,6 @@ static inline bool io_defer_get_uncommited_cqe(struct io_ring_ctx *ctx,
{
io_lockdep_assert_cq_locked(ctx);
- ctx->cq_extra++;
ctx->submit_state.cq_flush = true;
return io_get_cqe(ctx, cqe_ret);
}
@@ -414,7 +412,7 @@ static inline void io_req_complete_defer(struct io_kiocb *req)
static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx)
{
- if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
+ if (unlikely(ctx->off_timeout_used ||
ctx->has_evfd || ctx->poll_activated))
__io_commit_cqring_flush(ctx);
}
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 953d5e742569..8cce3ebd813f 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -92,7 +92,6 @@ void io_kbuf_drop_legacy(struct io_kiocb *req)
{
if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED)))
return;
- req->buf_index = req->kbuf->bgid;
req->flags &= ~REQ_F_BUFFER_SELECTED;
kfree(req->kbuf);
req->kbuf = NULL;
@@ -110,7 +109,6 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
bl = io_buffer_get_list(ctx, buf->bgid);
list_add(&buf->list, &bl->buf_list);
req->flags &= ~REQ_F_BUFFER_SELECTED;
- req->buf_index = buf->bgid;
io_ring_submit_unlock(ctx, issue_flags);
return true;
@@ -193,7 +191,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
}
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
- unsigned int issue_flags)
+ unsigned buf_group, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_buffer_list *bl;
@@ -201,7 +199,7 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
io_ring_submit_lock(req->ctx, issue_flags);
- bl = io_buffer_get_list(ctx, req->buf_index);
+ bl = io_buffer_get_list(ctx, buf_group);
if (likely(bl)) {
if (bl->flags & IOBL_BUF_RING)
ret = io_ring_buffer_select(req, len, bl, issue_flags);
@@ -302,7 +300,7 @@ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
int ret = -ENOENT;
io_ring_submit_lock(ctx, issue_flags);
- bl = io_buffer_get_list(ctx, req->buf_index);
+ bl = io_buffer_get_list(ctx, arg->buf_group);
if (unlikely(!bl))
goto out_unlock;
@@ -335,7 +333,7 @@ int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg)
lockdep_assert_held(&ctx->uring_lock);
- bl = io_buffer_get_list(ctx, req->buf_index);
+ bl = io_buffer_get_list(ctx, arg->buf_group);
if (unlikely(!bl))
return -ENOENT;
@@ -355,10 +353,9 @@ static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
struct io_buffer_list *bl = req->buf_list;
bool ret = true;
- if (bl) {
+ if (bl)
ret = io_kbuf_commit(req, bl, len, nr);
- req->buf_index = bl->bgid;
- }
+
req->flags &= ~REQ_F_BUFFER_RING;
return ret;
}
@@ -379,45 +376,33 @@ unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs)
return ret;
}
-static int __io_remove_buffers(struct io_ring_ctx *ctx,
- struct io_buffer_list *bl, unsigned nbufs)
+static int io_remove_buffers_legacy(struct io_ring_ctx *ctx,
+ struct io_buffer_list *bl,
+ unsigned long nbufs)
{
- unsigned i = 0;
-
- /* shouldn't happen */
- if (!nbufs)
- return 0;
-
- if (bl->flags & IOBL_BUF_RING) {
- i = bl->buf_ring->tail - bl->head;
- io_free_region(ctx, &bl->region);
- /* make sure it's seen as empty */
- INIT_LIST_HEAD(&bl->buf_list);
- bl->flags &= ~IOBL_BUF_RING;
- return i;
- }
+ unsigned long i = 0;
+ struct io_buffer *nxt;
/* protects io_buffers_cache */
lockdep_assert_held(&ctx->uring_lock);
+ WARN_ON_ONCE(bl->flags & IOBL_BUF_RING);
- while (!list_empty(&bl->buf_list)) {
- struct io_buffer *nxt;
-
+ for (i = 0; i < nbufs && !list_empty(&bl->buf_list); i++) {
nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
list_del(&nxt->list);
kfree(nxt);
-
- if (++i == nbufs)
- return i;
cond_resched();
}
-
return i;
}
static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
{
- __io_remove_buffers(ctx, bl, -1U);
+ if (bl->flags & IOBL_BUF_RING)
+ io_free_region(ctx, &bl->region);
+ else
+ io_remove_buffers_legacy(ctx, bl, -1U);
+
kfree(bl);
}
@@ -465,30 +450,6 @@ int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
- struct io_ring_ctx *ctx = req->ctx;
- struct io_buffer_list *bl;
- int ret = 0;
-
- io_ring_submit_lock(ctx, issue_flags);
-
- ret = -ENOENT;
- bl = io_buffer_get_list(ctx, p->bgid);
- if (bl) {
- ret = -EINVAL;
- /* can't use provide/remove buffers command on mapped buffers */
- if (!(bl->flags & IOBL_BUF_RING))
- ret = __io_remove_buffers(ctx, bl, p->nbufs);
- }
- io_ring_submit_unlock(ctx, issue_flags);
- if (ret < 0)
- req_set_fail(req);
- io_req_set_res(req, ret, 0);
- return IOU_OK;
-}
-
int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
unsigned long size, tmp_check;
@@ -512,8 +473,6 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
return -EOVERFLOW;
if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
return -EOVERFLOW;
-
- size = (unsigned long)p->len * p->nbufs;
if (!access_ok(u64_to_user_ptr(p->addr), size))
return -EFAULT;
@@ -552,49 +511,56 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
return i ? 0 : -ENOMEM;
}
-int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
+static int __io_manage_buffers_legacy(struct io_kiocb *req,
+ struct io_buffer_list *bl)
{
struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
- struct io_ring_ctx *ctx = req->ctx;
- struct io_buffer_list *bl;
- int ret = 0;
-
- io_ring_submit_lock(ctx, issue_flags);
+ int ret;
- bl = io_buffer_get_list(ctx, p->bgid);
- if (unlikely(!bl)) {
+ if (!bl) {
+ if (req->opcode != IORING_OP_PROVIDE_BUFFERS)
+ return -ENOENT;
bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
- if (!bl) {
- ret = -ENOMEM;
- goto err;
- }
+ if (!bl)
+ return -ENOMEM;
+
INIT_LIST_HEAD(&bl->buf_list);
- ret = io_buffer_add_list(ctx, bl, p->bgid);
+ ret = io_buffer_add_list(req->ctx, bl, p->bgid);
if (ret) {
kfree(bl);
- goto err;
+ return ret;
}
}
- /* can't add buffers via this command for a mapped buffer ring */
- if (bl->flags & IOBL_BUF_RING) {
- ret = -EINVAL;
- goto err;
- }
+ /* can't use provide/remove buffers command on mapped buffers */
+ if (bl->flags & IOBL_BUF_RING)
+ return -EINVAL;
+ if (req->opcode == IORING_OP_PROVIDE_BUFFERS)
+ return io_add_buffers(req->ctx, p, bl);
+ return io_remove_buffers_legacy(req->ctx, bl, p->nbufs);
+}
+
+int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer_list *bl;
+ int ret;
- ret = io_add_buffers(ctx, p, bl);
-err:
+ io_ring_submit_lock(ctx, issue_flags);
+ bl = io_buffer_get_list(ctx, p->bgid);
+ ret = __io_manage_buffers_legacy(req, bl);
io_ring_submit_unlock(ctx, issue_flags);
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_buf_reg reg;
- struct io_buffer_list *bl, *free_bl = NULL;
+ struct io_buffer_list *bl;
struct io_uring_region_desc rd;
struct io_uring_buf_ring *br;
unsigned long mmap_offset;
@@ -605,8 +571,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT;
-
- if (reg.resv[0] || reg.resv[1] || reg.resv[2])
+ if (!mem_is_zero(reg.resv, sizeof(reg.resv)))
return -EINVAL;
if (reg.flags & ~(IOU_PBUF_RING_MMAP | IOU_PBUF_RING_INC))
return -EINVAL;
@@ -624,7 +589,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
io_destroy_bl(ctx, bl);
}
- free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
if (!bl)
return -ENOMEM;
@@ -669,7 +634,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
return 0;
fail:
io_free_region(ctx, &bl->region);
- kfree(free_bl);
+ kfree(bl);
return ret;
}
@@ -682,9 +647,7 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT;
- if (reg.resv[0] || reg.resv[1] || reg.resv[2])
- return -EINVAL;
- if (reg.flags)
+ if (!mem_is_zero(reg.resv, sizeof(reg.resv)) || reg.flags)
return -EINVAL;
bl = io_buffer_get_list(ctx, reg.bgid);
@@ -704,14 +667,11 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_buf_status buf_status;
struct io_buffer_list *bl;
- int i;
if (copy_from_user(&buf_status, arg, sizeof(buf_status)))
return -EFAULT;
-
- for (i = 0; i < ARRAY_SIZE(buf_status.resv); i++)
- if (buf_status.resv[i])
- return -EINVAL;
+ if (!mem_is_zero(buf_status.resv, sizeof(buf_status.resv)))
+ return -EINVAL;
bl = io_buffer_get_list(ctx, buf_status.buf_group);
if (!bl)
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 2ec0b983ce24..4d2c209d1a41 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -55,20 +55,19 @@ struct buf_sel_arg {
size_t max_len;
unsigned short nr_iovs;
unsigned short mode;
+ unsigned buf_group;
};
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
- unsigned int issue_flags);
+ unsigned buf_group, unsigned int issue_flags);
int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
unsigned int issue_flags);
int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg);
void io_destroy_buffers(struct io_ring_ctx *ctx);
int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
-int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags);
-
int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
-int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags);
+int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags);
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
@@ -94,7 +93,6 @@ static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
* to monopolize the buffer.
*/
if (req->buf_list) {
- req->buf_index = req->buf_list->bgid;
req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT);
return true;
}
diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index 76fcc79656b0..725dc0bec24c 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -13,6 +13,7 @@
#include "memmap.h"
#include "kbuf.h"
#include "rsrc.h"
+#include "zcrx.h"
static void *io_mem_alloc_compound(struct page **pages, int nr_pages,
size_t size, gfp_t gfp)
@@ -116,7 +117,7 @@ static int io_region_init_ptr(struct io_mapped_region *mr)
void *ptr;
if (io_check_coalesce_buffer(mr->pages, mr->nr_pages, &ifd)) {
- if (ifd.nr_folios == 1) {
+ if (ifd.nr_folios == 1 && !PageHighMem(mr->pages[0])) {
mr->ptr = page_address(mr->pages[0]);
return 0;
}
@@ -258,7 +259,8 @@ static struct io_mapped_region *io_mmap_get_region(struct io_ring_ctx *ctx,
loff_t pgoff)
{
loff_t offset = pgoff << PAGE_SHIFT;
- unsigned int bgid;
+ unsigned int id;
+
switch (offset & IORING_OFF_MMAP_MASK) {
case IORING_OFF_SQ_RING:
@@ -267,12 +269,13 @@ static struct io_mapped_region *io_mmap_get_region(struct io_ring_ctx *ctx,
case IORING_OFF_SQES:
return &ctx->sq_region;
case IORING_OFF_PBUF_RING:
- bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
- return io_pbuf_get_region(ctx, bgid);
+ id = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
+ return io_pbuf_get_region(ctx, id);
case IORING_MAP_OFF_PARAM_REGION:
return &ctx->param_region;
case IORING_MAP_OFF_ZCRX_REGION:
- return &ctx->zcrx_region;
+ id = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_ZCRX_SHIFT;
+ return io_zcrx_get_region(ctx, id);
}
return NULL;
}
diff --git a/io_uring/memmap.h b/io_uring/memmap.h
index dad0aa5b1b45..08419684e4bc 100644
--- a/io_uring/memmap.h
+++ b/io_uring/memmap.h
@@ -4,7 +4,9 @@
#define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL
#define IORING_MAP_OFF_ZCRX_REGION 0x30000000ULL
-struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
+#define IORING_OFF_ZCRX_SHIFT 16
+
+struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages);
#ifndef CONFIG_MMU
unsigned int io_uring_nommu_mmap_capabilities(struct file *file);
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 50a958e9c921..71400d6cefc8 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -328,7 +328,7 @@ done:
req_set_fail(req);
}
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_uring_sync_msg_ring(struct io_uring_sqe *sqe)
diff --git a/io_uring/net.c b/io_uring/net.c
index 24040bc3916a..d13f3e8f6c72 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -18,7 +18,6 @@
#include "rsrc.h"
#include "zcrx.h"
-#if defined(CONFIG_NET)
struct io_shutdown {
struct file *file;
int how;
@@ -129,7 +128,7 @@ int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
ret = __sys_shutdown_sock(sock, shutdown->how);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
static bool io_net_retry(struct socket *sock, int flags)
@@ -190,7 +189,6 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
sr->done_io = 0;
sr->retry = false;
sr->len = 0; /* get from the provided buffer */
- req->buf_index = sr->buf_group;
}
static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg,
@@ -359,15 +357,13 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
kmsg->msg.msg_name = &kmsg->addr;
kmsg->msg.msg_namelen = addr_len;
}
- if (sr->flags & IORING_RECVSEND_FIXED_BUF)
+ if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
+ req->flags |= REQ_F_IMPORT_BUFFER;
return 0;
- if (!io_do_buffer_select(req)) {
- ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
- &kmsg->msg.msg_iter);
- if (unlikely(ret < 0))
- return ret;
}
- return 0;
+ if (req->flags & REQ_F_BUFFER_SELECT)
+ return 0;
+ return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
}
static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -409,13 +405,12 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
if (sr->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
+ if (req->flags & REQ_F_BUFFER_SELECT)
+ sr->buf_group = req->buf_index;
if (sr->flags & IORING_RECVSEND_BUNDLE) {
if (req->opcode == IORING_OP_SENDMSG)
return -EINVAL;
- if (!(req->flags & REQ_F_BUFFER_SELECT))
- return -EINVAL;
sr->msg_flags |= MSG_WAITALL;
- sr->buf_group = req->buf_index;
req->buf_list = NULL;
req->flags |= REQ_F_MULTISHOT;
}
@@ -507,7 +502,7 @@ static inline bool io_send_finish(struct io_kiocb *req, int *ret,
/* Otherwise stop bundle and use the current result. */
finish:
io_req_set_res(req, *ret, cflags);
- *ret = IOU_OK;
+ *ret = IOU_COMPLETE;
return true;
}
@@ -558,7 +553,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
else if (sr->done_io)
ret = sr->done_io;
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
@@ -571,6 +566,7 @@ static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
.iovs = &kmsg->fast_iov,
.max_len = min_not_zero(sr->len, INT_MAX),
.nr_iovs = 1,
+ .buf_group = sr->buf_group,
};
if (kmsg->vec.iovec) {
@@ -723,7 +719,6 @@ static int io_recvmsg_prep_setup(struct io_kiocb *req)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_async_msghdr *kmsg;
- int ret;
kmsg = io_msg_alloc_async(req);
if (unlikely(!kmsg))
@@ -739,13 +734,10 @@ static int io_recvmsg_prep_setup(struct io_kiocb *req)
kmsg->msg.msg_iocb = NULL;
kmsg->msg.msg_ubuf = NULL;
- if (!io_do_buffer_select(req)) {
- ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
- &kmsg->msg.msg_iter);
- if (unlikely(ret))
- return ret;
- }
- return 0;
+ if (req->flags & REQ_F_BUFFER_SELECT)
+ return 0;
+ return import_ubuf(ITER_DEST, sr->buf, sr->len,
+ &kmsg->msg.msg_iter);
}
return io_recvmsg_copy_hdr(req, kmsg);
@@ -827,18 +819,24 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
if (sr->flags & IORING_RECVSEND_BUNDLE) {
- cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
+ size_t this_ret = *ret - sr->done_io;
+
+ cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret),
issue_flags);
if (sr->retry)
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
/* bundle with no more immediate buffers, we're done */
if (req->flags & REQ_F_BL_EMPTY)
goto finish;
- /* if more is available, retry and append to this one */
- if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) {
+ /*
+ * If more is available AND it was a full transfer, retry and
+ * append to this one
+ */
+ if (!sr->retry && kmsg->msg.msg_inq > 0 && this_ret > 0 &&
+ !iov_iter_count(&kmsg->msg.msg_iter)) {
req->cqe.flags = cflags & ~CQE_F_MASK;
sr->len = kmsg->msg.msg_inq;
- sr->done_io += *ret;
+ sr->done_io += this_ret;
sr->retry = true;
return false;
}
@@ -985,7 +983,7 @@ retry_multishot:
void __user *buf;
size_t len = sr->len;
- buf = io_buffer_select(req, &len, issue_flags);
+ buf = io_buffer_select(req, &len, sr->buf_group, issue_flags);
if (!buf)
return -ENOBUFS;
@@ -1063,6 +1061,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
.iovs = &kmsg->fast_iov,
.nr_iovs = 1,
.mode = KBUF_MODE_EXPAND,
+ .buf_group = sr->buf_group,
};
if (kmsg->vec.iovec) {
@@ -1095,7 +1094,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
void __user *buf;
*len = sr->len;
- buf = io_buffer_select(req, len, issue_flags);
+ buf = io_buffer_select(req, len, sr->buf_group, issue_flags);
if (!buf)
return -ENOBUFS;
sr->buf = buf;
@@ -1191,16 +1190,14 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
unsigned ifq_idx;
- if (unlikely(sqe->file_index || sqe->addr2 || sqe->addr ||
- sqe->addr3))
+ if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3))
return -EINVAL;
ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx);
- if (ifq_idx != 0)
- return -EINVAL;
- zc->ifq = req->ctx->ifq;
+ zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx);
if (!zc->ifq)
return -EINVAL;
+
zc->len = READ_ONCE(sqe->len);
zc->flags = READ_ONCE(sqe->ioprio);
zc->msg_flags = READ_ONCE(sqe->msg_flags);
@@ -1321,8 +1318,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -ENOMEM;
if (req->opcode == IORING_OP_SEND_ZC) {
- if (zc->flags & IORING_RECVSEND_FIXED_BUF)
- req->flags |= REQ_F_IMPORT_BUFFER;
ret = io_send_setup(req, sqe);
} else {
if (unlikely(sqe->addr2 || sqe->file_index))
@@ -1470,7 +1465,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
io_req_msg_cleanup(req, 0);
}
io_req_set_res(req, ret, IORING_CQE_F_MORE);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
@@ -1541,7 +1536,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
io_req_msg_cleanup(req, 0);
}
io_req_set_res(req, ret, IORING_CQE_F_MORE);
- return IOU_OK;
+ return IOU_COMPLETE;
}
void io_sendrecv_fail(struct io_kiocb *req)
@@ -1705,7 +1700,7 @@ int io_socket(struct io_kiocb *req, unsigned int issue_flags)
sock->file_slot);
}
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -1772,7 +1767,7 @@ out:
req_set_fail(req);
io_req_msg_cleanup(req, issue_flags);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -1846,4 +1841,3 @@ void io_netmsg_cache_free(const void *entry)
io_vec_free(&kmsg->vec);
kfree(kmsg);
}
-#endif
diff --git a/io_uring/nop.c b/io_uring/nop.c
index 28f06285fdc2..6ac2de761fd3 100644
--- a/io_uring/nop.c
+++ b/io_uring/nop.c
@@ -68,5 +68,5 @@ done:
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, nop->result, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
diff --git a/io_uring/notif.c b/io_uring/notif.c
index 7bd92538dccb..9a6f6e92d742 100644
--- a/io_uring/notif.c
+++ b/io_uring/notif.c
@@ -112,6 +112,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
if (unlikely(!io_alloc_req(ctx, &notif)))
return NULL;
+ notif->ctx = ctx;
notif->opcode = IORING_OP_NOP;
notif->flags = 0;
notif->file = NULL;
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 489384c0438b..6e0882b051f9 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -333,13 +333,13 @@ const struct io_issue_def io_issue_defs[] = {
.audit_skip = 1,
.iopoll = 1,
.prep = io_provide_buffers_prep,
- .issue = io_provide_buffers,
+ .issue = io_manage_buffers_legacy,
},
[IORING_OP_REMOVE_BUFFERS] = {
.audit_skip = 1,
.iopoll = 1,
.prep = io_remove_buffers_prep,
- .issue = io_remove_buffers,
+ .issue = io_manage_buffers_legacy,
},
[IORING_OP_TEE] = {
.needs_file = 1,
@@ -569,6 +569,10 @@ const struct io_issue_def io_issue_defs[] = {
.prep = io_prep_writev_fixed,
.issue = io_write,
},
+ [IORING_OP_PIPE] = {
+ .prep = io_pipe_prep,
+ .issue = io_pipe,
+ },
};
const struct io_cold_def io_cold_defs[] = {
@@ -815,6 +819,9 @@ const struct io_cold_def io_cold_defs[] = {
.cleanup = io_readv_writev_cleanup,
.fail = io_rw_fail,
},
+ [IORING_OP_PIPE] = {
+ .name = "PIPE",
+ },
};
const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index e3357dfa14ca..83e36ad4e31b 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -6,6 +6,8 @@
#include <linux/fdtable.h>
#include <linux/fsnotify.h>
#include <linux/namei.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/watch_queue.h>
#include <linux/io_uring.h>
#include <uapi/linux/io_uring.h>
@@ -169,7 +171,7 @@ err:
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_openat(struct io_kiocb *req, unsigned int issue_flags)
@@ -257,7 +259,7 @@ err:
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -300,5 +302,136 @@ int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
+}
+
+struct io_pipe {
+ struct file *file;
+ int __user *fds;
+ int flags;
+ int file_slot;
+ unsigned long nofile;
+};
+
+int io_pipe_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
+
+ if (sqe->fd || sqe->off || sqe->addr3)
+ return -EINVAL;
+
+ p->fds = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ p->flags = READ_ONCE(sqe->pipe_flags);
+ if (p->flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
+ return -EINVAL;
+
+ p->file_slot = READ_ONCE(sqe->file_index);
+ p->nofile = rlimit(RLIMIT_NOFILE);
+ return 0;
+}
+
+static int io_pipe_fixed(struct io_kiocb *req, struct file **files,
+ unsigned int issue_flags)
+{
+ struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
+ struct io_ring_ctx *ctx = req->ctx;
+ int ret, fds[2] = { -1, -1 };
+ int slot = p->file_slot;
+
+ if (p->flags & O_CLOEXEC)
+ return -EINVAL;
+
+ io_ring_submit_lock(ctx, issue_flags);
+
+ ret = __io_fixed_fd_install(ctx, files[0], slot);
+ if (ret < 0)
+ goto err;
+ fds[0] = ret;
+ files[0] = NULL;
+
+ /*
+ * If a specific slot is given, next one will be used for
+ * the write side.
+ */
+ if (slot != IORING_FILE_INDEX_ALLOC)
+ slot++;
+
+ ret = __io_fixed_fd_install(ctx, files[1], slot);
+ if (ret < 0)
+ goto err;
+ fds[1] = ret;
+ files[1] = NULL;
+
+ io_ring_submit_unlock(ctx, issue_flags);
+
+ if (!copy_to_user(p->fds, fds, sizeof(fds)))
+ return 0;
+
+ ret = -EFAULT;
+ io_ring_submit_lock(ctx, issue_flags);
+err:
+ if (fds[0] != -1)
+ io_fixed_fd_remove(ctx, fds[0]);
+ if (fds[1] != -1)
+ io_fixed_fd_remove(ctx, fds[1]);
+ io_ring_submit_unlock(ctx, issue_flags);
+ return ret;
+}
+
+static int io_pipe_fd(struct io_kiocb *req, struct file **files)
+{
+ struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
+ int ret, fds[2] = { -1, -1 };
+
+ ret = __get_unused_fd_flags(p->flags, p->nofile);
+ if (ret < 0)
+ goto err;
+ fds[0] = ret;
+
+ ret = __get_unused_fd_flags(p->flags, p->nofile);
+ if (ret < 0)
+ goto err;
+ fds[1] = ret;
+
+ if (!copy_to_user(p->fds, fds, sizeof(fds))) {
+ fd_install(fds[0], files[0]);
+ fd_install(fds[1], files[1]);
+ return 0;
+ }
+ ret = -EFAULT;
+err:
+ if (fds[0] != -1)
+ put_unused_fd(fds[0]);
+ if (fds[1] != -1)
+ put_unused_fd(fds[1]);
+ return ret;
+}
+
+int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
+ struct file *files[2];
+ int ret;
+
+ ret = create_pipe_files(files, p->flags);
+ if (ret)
+ return ret;
+ files[0]->f_mode |= FMODE_NOWAIT;
+ files[1]->f_mode |= FMODE_NOWAIT;
+
+ if (!!p->file_slot)
+ ret = io_pipe_fixed(req, files, issue_flags);
+ else
+ ret = io_pipe_fd(req, files);
+
+ io_req_set_res(req, ret, 0);
+ if (!ret)
+ return IOU_COMPLETE;
+
+ req_set_fail(req);
+ if (files[0])
+ fput(files[0]);
+ if (files[1])
+ fput(files[1]);
+ return ret;
}
diff --git a/io_uring/openclose.h b/io_uring/openclose.h
index 8a93c98ad0ad..4ca2a9935abc 100644
--- a/io_uring/openclose.h
+++ b/io_uring/openclose.h
@@ -13,5 +13,8 @@ int io_openat2(struct io_kiocb *req, unsigned int issue_flags);
int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_close(struct io_kiocb *req, unsigned int issue_flags);
+int io_pipe_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_pipe(struct io_kiocb *req, unsigned int issue_flags);
+
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags);
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 8eb744eb9f4c..0526062e2f81 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -893,7 +893,7 @@ int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags);
if (ret > 0) {
io_req_set_res(req, ipt.result_mask, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
return ret ?: IOU_ISSUE_SKIP_COMPLETE;
}
@@ -948,5 +948,5 @@ out:
}
/* complete update request, we're done with it */
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index f80a77c4973f..c592ceace97d 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -80,10 +80,21 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
return 0;
}
-int io_buffer_validate(struct iovec *iov)
+int io_validate_user_buf_range(u64 uaddr, u64 ulen)
{
- unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1);
+ unsigned long tmp, base = (unsigned long)uaddr;
+ unsigned long acct_len = (unsigned long)PAGE_ALIGN(ulen);
+ /* arbitrary limit, but we need something */
+ if (ulen > SZ_1G || !ulen)
+ return -EFAULT;
+ if (check_add_overflow(base, acct_len, &tmp))
+ return -EOVERFLOW;
+ return 0;
+}
+
+static int io_buffer_validate(struct iovec *iov)
+{
/*
* Don't impose further limits on the size and buffer
* constraints here, we'll -EINVAL later when IO is
@@ -91,17 +102,9 @@ int io_buffer_validate(struct iovec *iov)
*/
if (!iov->iov_base)
return iov->iov_len ? -EFAULT : 0;
- if (!iov->iov_len)
- return -EFAULT;
-
- /* arbitrary limit, but we need something */
- if (iov->iov_len > SZ_1G)
- return -EFAULT;
- if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp))
- return -EOVERFLOW;
-
- return 0;
+ return io_validate_user_buf_range((unsigned long)iov->iov_base,
+ iov->iov_len);
}
static void io_release_ubuf(void *priv)
@@ -497,7 +500,7 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
@@ -685,38 +688,34 @@ static bool io_coalesce_buffer(struct page ***pages, int *nr_pages,
struct io_imu_folio_data *data)
{
struct page **page_array = *pages, **new_array = NULL;
- int nr_pages_left = *nr_pages, i, j;
- int nr_folios = data->nr_folios;
+ unsigned nr_pages_left = *nr_pages;
+ unsigned nr_folios = data->nr_folios;
+ unsigned i, j;
/* Store head pages only*/
- new_array = kvmalloc_array(nr_folios, sizeof(struct page *),
- GFP_KERNEL);
+ new_array = kvmalloc_array(nr_folios, sizeof(struct page *), GFP_KERNEL);
if (!new_array)
return false;
- new_array[0] = compound_head(page_array[0]);
- /*
- * The pages are bound to the folio, it doesn't
- * actually unpin them but drops all but one reference,
- * which is usually put down by io_buffer_unmap().
- * Note, needs a better helper.
- */
- if (data->nr_pages_head > 1)
- unpin_user_pages(&page_array[1], data->nr_pages_head - 1);
-
- j = data->nr_pages_head;
- nr_pages_left -= data->nr_pages_head;
- for (i = 1; i < nr_folios; i++) {
- unsigned int nr_unpin;
-
- new_array[i] = page_array[j];
- nr_unpin = min_t(unsigned int, nr_pages_left - 1,
- data->nr_pages_mid - 1);
- if (nr_unpin)
- unpin_user_pages(&page_array[j+1], nr_unpin);
- j += data->nr_pages_mid;
- nr_pages_left -= data->nr_pages_mid;
+ for (i = 0, j = 0; i < nr_folios; i++) {
+ struct page *p = compound_head(page_array[j]);
+ struct folio *folio = page_folio(p);
+ unsigned int nr;
+
+ WARN_ON_ONCE(i > 0 && p != page_array[j]);
+
+ nr = i ? data->nr_pages_mid : data->nr_pages_head;
+ nr = min(nr, nr_pages_left);
+ /* Drop all but one ref, the entire folio will remain pinned. */
+ if (nr > 1)
+ unpin_user_folio(folio, nr - 1);
+ j += nr;
+ nr_pages_left -= nr;
+ new_array[i] = p;
}
+
+ WARN_ON_ONCE(j != *nr_pages);
+
kvfree(page_array);
*pages = new_array;
*nr_pages = nr_folios;
@@ -1062,8 +1061,6 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
size_t offset;
int ret;
- if (WARN_ON_ONCE(!imu))
- return -EFAULT;
ret = validate_fixed_range(buf_addr, len, imu);
if (unlikely(ret))
return ret;
@@ -1110,13 +1107,19 @@ inline struct io_rsrc_node *io_find_buf_node(struct io_kiocb *req,
if (req->flags & REQ_F_BUF_NODE)
return req->buf_node;
+ req->flags |= REQ_F_BUF_NODE;
io_ring_submit_lock(ctx, issue_flags);
node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
- if (node)
- io_req_assign_buf_node(req, node);
+ if (node) {
+ node->refs++;
+ req->buf_node = node;
+ io_ring_submit_unlock(ctx, issue_flags);
+ return node;
+ }
+ req->flags &= ~REQ_F_BUF_NODE;
io_ring_submit_unlock(ctx, issue_flags);
- return node;
+ return NULL;
}
int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter,
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index b52242852ff3..0d2138f16322 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -83,7 +83,7 @@ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
unsigned size, unsigned type);
int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
unsigned int size, unsigned int type);
-int io_buffer_validate(struct iovec *iov);
+int io_validate_user_buf_range(u64 uaddr, u64 ulen);
bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
struct io_imu_folio_data *data);
@@ -115,32 +115,6 @@ static inline bool io_reset_rsrc_node(struct io_ring_ctx *ctx,
return true;
}
-static inline void io_req_put_rsrc_nodes(struct io_kiocb *req)
-{
- if (req->file_node) {
- io_put_rsrc_node(req->ctx, req->file_node);
- req->file_node = NULL;
- }
- if (req->flags & REQ_F_BUF_NODE) {
- io_put_rsrc_node(req->ctx, req->buf_node);
- req->buf_node = NULL;
- }
-}
-
-static inline void io_req_assign_rsrc_node(struct io_rsrc_node **dst_node,
- struct io_rsrc_node *node)
-{
- node->refs++;
- *dst_node = node;
-}
-
-static inline void io_req_assign_buf_node(struct io_kiocb *req,
- struct io_rsrc_node *node)
-{
- io_req_assign_rsrc_node(&req->buf_node, node);
- req->flags |= REQ_F_BUF_NODE;
-}
-
int io_files_update(struct io_kiocb *req, unsigned int issue_flags);
int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 039e063f7091..710d8cd53ebb 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -119,7 +119,7 @@ static int __io_import_rw_buffer(int ddir, struct io_kiocb *req,
return io_import_vec(ddir, req, io, buf, sqe_len);
if (io_do_buffer_select(req)) {
- buf = io_buffer_select(req, &sqe_len, issue_flags);
+ buf = io_buffer_select(req, &sqe_len, io->buf_group, issue_flags);
if (!buf)
return -ENOBUFS;
rw->addr = (unsigned long) buf;
@@ -253,16 +253,19 @@ static int __io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
int ddir)
{
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ struct io_async_rw *io;
unsigned ioprio;
u64 attr_type_mask;
int ret;
if (io_rw_alloc_async(req))
return -ENOMEM;
+ io = req->async_data;
rw->kiocb.ki_pos = READ_ONCE(sqe->off);
/* used for fixed read/write too - just read unconditionally */
req->buf_index = READ_ONCE(sqe->buf_index);
+ io->buf_group = req->buf_index;
ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
@@ -276,6 +279,7 @@ static int __io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
rw->kiocb.dio_complete = NULL;
rw->kiocb.ki_flags = 0;
+ rw->kiocb.ki_write_stream = READ_ONCE(sqe->write_stream);
if (req->ctx->flags & IORING_SETUP_IOPOLL)
rw->kiocb.ki_complete = io_complete_rw_iopoll;
@@ -657,7 +661,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret,
io_req_io_end(req);
io_req_set_res(req, final_ret, io_put_kbuf(req, ret, issue_flags));
io_req_rw_cleanup(req, issue_flags);
- return IOU_OK;
+ return IOU_COMPLETE;
} else {
io_rw_done(req, ret);
}
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 81d6d9a8cf69..129a53fe5482 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -16,6 +16,8 @@ struct io_async_rw {
struct iov_iter iter;
struct iov_iter_state iter_state;
struct iovec fast_iov;
+ unsigned buf_group;
+
/*
* wpq is for buffered io, while meta fields are used with
* direct io
diff --git a/io_uring/splice.c b/io_uring/splice.c
index 7b89bd84d486..35ce4e60b495 100644
--- a/io_uring/splice.c
+++ b/io_uring/splice.c
@@ -103,7 +103,7 @@ done:
if (ret != sp->len)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -144,5 +144,5 @@ done:
if (ret != sp->len)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
diff --git a/io_uring/statx.c b/io_uring/statx.c
index 6bc4651700a2..5111e9befbfe 100644
--- a/io_uring/statx.c
+++ b/io_uring/statx.c
@@ -59,7 +59,7 @@ int io_statx(struct io_kiocb *req, unsigned int issue_flags)
ret = do_statx(sx->dfd, sx->filename, sx->flags, sx->mask, sx->buffer);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
void io_statx_cleanup(struct io_kiocb *req)
diff --git a/io_uring/sync.c b/io_uring/sync.c
index 255f68c37e55..cea2d381ffd2 100644
--- a/io_uring/sync.c
+++ b/io_uring/sync.c
@@ -47,7 +47,7 @@ int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
ret = sync_file_range(req->file, sync->off, sync->len, sync->flags);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -79,7 +79,7 @@ int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
ret = vfs_fsync_range(req->file, sync->off, end > 0 ? end : LLONG_MAX,
sync->flags & IORING_FSYNC_DATASYNC);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_fallocate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -108,5 +108,5 @@ int io_fallocate(struct io_kiocb *req, unsigned int issue_flags)
if (ret >= 0)
fsnotify_modify(req->file);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index adc6e42c14df..5b66755579c0 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -35,8 +35,6 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
data.hash = hash;
data.task = task;
- data.free_work = io_wq_free_work;
- data.do_work = io_wq_submit_work;
/* Do QD, or 4 * CPUS, whatever is smallest */
concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 2a107665230b..7f13bfa9f2b6 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -35,6 +35,9 @@ struct io_timeout_rem {
bool ltimeout;
};
+static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
+ struct io_kiocb *link);
+
static inline bool io_is_timeout_noseq(struct io_kiocb *req)
{
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
@@ -218,7 +221,9 @@ void io_disarm_next(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
raw_spin_lock_irq(&ctx->timeout_lock);
- link = io_disarm_linked_timeout(req);
+ if (req->link && req->link->opcode == IORING_OP_LINK_TIMEOUT)
+ link = __io_disarm_linked_timeout(req, req->link);
+
raw_spin_unlock_irq(&ctx->timeout_lock);
if (link)
io_req_queue_tw_complete(link, -ECANCELED);
@@ -228,8 +233,8 @@ void io_disarm_next(struct io_kiocb *req)
io_fail_links(req);
}
-struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
- struct io_kiocb *link)
+static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
+ struct io_kiocb *link)
__must_hold(&req->ctx->completion_lock)
__must_hold(&req->ctx->timeout_lock)
{
@@ -500,7 +505,7 @@ int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
static int __io_timeout_prep(struct io_kiocb *req,
diff --git a/io_uring/timeout.h b/io_uring/timeout.h
index e91b32448dcf..2b7c9ad72992 100644
--- a/io_uring/timeout.h
+++ b/io_uring/timeout.h
@@ -8,19 +8,6 @@ struct io_timeout_data {
u32 flags;
};
-struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
- struct io_kiocb *link);
-
-static inline struct io_kiocb *io_disarm_linked_timeout(struct io_kiocb *req)
-{
- struct io_kiocb *link = req->link;
-
- if (link && link->opcode == IORING_OP_LINK_TIMEOUT)
- return __io_disarm_linked_timeout(req, link);
-
- return NULL;
-}
-
__cold void io_flush_timeouts(struct io_ring_ctx *ctx);
struct io_cancel_data;
int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd);
diff --git a/io_uring/truncate.c b/io_uring/truncate.c
index 62ee73d34d72..487baf23b44e 100644
--- a/io_uring/truncate.c
+++ b/io_uring/truncate.c
@@ -44,5 +44,5 @@ int io_ftruncate(struct io_kiocb *req, unsigned int issue_flags)
ret = do_ftruncate(req->file, ft->len, 1);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index a9ea7d29cdd9..929cad6ee326 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -3,13 +3,10 @@
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/io_uring/cmd.h>
-#include <linux/io_uring/net.h>
#include <linux/security.h>
#include <linux/nospec.h>
-#include <net/sock.h>
#include <uapi/linux/io_uring.h>
-#include <asm/ioctls.h>
#include "io_uring.h"
#include "alloc_cache.h"
@@ -254,6 +251,11 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
return -EOPNOTSUPP;
issue_flags |= IO_URING_F_IOPOLL;
req->iopoll_completed = 0;
+ if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) {
+ /* make sure every req only blocks once */
+ req->flags &= ~REQ_F_IOPOLL_STATE;
+ req->iopoll_start = ktime_get_ns();
+ }
}
ret = file->f_op->uring_cmd(ioucmd, issue_flags);
@@ -263,7 +265,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
req_set_fail(req);
io_req_uring_cleanup(req, issue_flags);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
@@ -273,6 +275,9 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
{
struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
+ if (WARN_ON_ONCE(!(ioucmd->flags & IORING_URING_CMD_FIXED)))
+ return -EINVAL;
+
return io_import_reg_buf(req, iter, ubuf, len, rw, issue_flags);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
@@ -287,6 +292,9 @@ int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
struct io_async_cmd *ac = req->async_data;
int ret;
+ if (WARN_ON_ONCE(!(ioucmd->flags & IORING_URING_CMD_FIXED)))
+ return -EINVAL;
+
ret = io_prep_reg_iovec(req, &ac->vec, uvec, uvec_segs);
if (ret)
return ret;
@@ -302,83 +310,3 @@ void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
io_req_queue_iowq(req);
}
-
-static inline int io_uring_cmd_getsockopt(struct socket *sock,
- struct io_uring_cmd *cmd,
- unsigned int issue_flags)
-{
- const struct io_uring_sqe *sqe = cmd->sqe;
- bool compat = !!(issue_flags & IO_URING_F_COMPAT);
- int optlen, optname, level, err;
- void __user *optval;
-
- level = READ_ONCE(sqe->level);
- if (level != SOL_SOCKET)
- return -EOPNOTSUPP;
-
- optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
- optname = READ_ONCE(sqe->optname);
- optlen = READ_ONCE(sqe->optlen);
-
- err = do_sock_getsockopt(sock, compat, level, optname,
- USER_SOCKPTR(optval),
- KERNEL_SOCKPTR(&optlen));
- if (err)
- return err;
-
- /* On success, return optlen */
- return optlen;
-}
-
-static inline int io_uring_cmd_setsockopt(struct socket *sock,
- struct io_uring_cmd *cmd,
- unsigned int issue_flags)
-{
- const struct io_uring_sqe *sqe = cmd->sqe;
- bool compat = !!(issue_flags & IO_URING_F_COMPAT);
- int optname, optlen, level;
- void __user *optval;
- sockptr_t optval_s;
-
- optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
- optname = READ_ONCE(sqe->optname);
- optlen = READ_ONCE(sqe->optlen);
- level = READ_ONCE(sqe->level);
- optval_s = USER_SOCKPTR(optval);
-
- return do_sock_setsockopt(sock, compat, level, optname, optval_s,
- optlen);
-}
-
-#if defined(CONFIG_NET)
-int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
-{
- struct socket *sock = cmd->file->private_data;
- struct sock *sk = sock->sk;
- struct proto *prot = READ_ONCE(sk->sk_prot);
- int ret, arg = 0;
-
- if (!prot || !prot->ioctl)
- return -EOPNOTSUPP;
-
- switch (cmd->cmd_op) {
- case SOCKET_URING_OP_SIOCINQ:
- ret = prot->ioctl(sk, SIOCINQ, &arg);
- if (ret)
- return ret;
- return arg;
- case SOCKET_URING_OP_SIOCOUTQ:
- ret = prot->ioctl(sk, SIOCOUTQ, &arg);
- if (ret)
- return ret;
- return arg;
- case SOCKET_URING_OP_GETSOCKOPT:
- return io_uring_cmd_getsockopt(sock, cmd, issue_flags);
- case SOCKET_URING_OP_SETSOCKOPT:
- return io_uring_cmd_setsockopt(sock, cmd, issue_flags);
- default:
- return -EOPNOTSUPP;
- }
-}
-EXPORT_SYMBOL_GPL(io_uring_cmd_sock);
-#endif
diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h
index b04686b6b5d2..e6a5142c890e 100644
--- a/io_uring/uring_cmd.h
+++ b/io_uring/uring_cmd.h
@@ -17,9 +17,3 @@ bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
struct io_uring_task *tctx, bool cancel_all);
void io_cmd_cache_free(const void *entry);
-
-int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
- const struct iovec __user *uvec,
- size_t uvec_segs,
- int ddir, struct iov_iter *iter,
- unsigned issue_flags);
diff --git a/io_uring/waitid.c b/io_uring/waitid.c
index 54e69984cd8a..e07a94694397 100644
--- a/io_uring/waitid.c
+++ b/io_uring/waitid.c
@@ -323,5 +323,5 @@ done:
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
}
diff --git a/io_uring/xattr.c b/io_uring/xattr.c
index de5064fcae8a..322b94ff9e4b 100644
--- a/io_uring/xattr.c
+++ b/io_uring/xattr.c
@@ -109,7 +109,7 @@ int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags)
ret = file_getxattr(req->file, &ix->ctx);
io_xattr_finish(req, ret);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
@@ -122,7 +122,7 @@ int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
ret = filename_getxattr(AT_FDCWD, ix->filename, LOOKUP_FOLLOW, &ix->ctx);
ix->filename = NULL;
io_xattr_finish(req, ret);
- return IOU_OK;
+ return IOU_COMPLETE;
}
static int __io_setxattr_prep(struct io_kiocb *req,
@@ -190,7 +190,7 @@ int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags)
ret = file_setxattr(req->file, &ix->ctx);
io_xattr_finish(req, ret);
- return IOU_OK;
+ return IOU_COMPLETE;
}
int io_setxattr(struct io_kiocb *req, unsigned int issue_flags)
@@ -203,5 +203,5 @@ int io_setxattr(struct io_kiocb *req, unsigned int issue_flags)
ret = filename_setxattr(AT_FDCWD, ix->filename, LOOKUP_FOLLOW, &ix->ctx);
ix->filename = NULL;
io_xattr_finish(req, ret);
- return IOU_OK;
+ return IOU_COMPLETE;
}
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index fe86606b9f30..9a568d049204 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -26,29 +26,207 @@
#include "zcrx.h"
#include "rsrc.h"
+#define IO_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
static inline struct io_zcrx_ifq *io_pp_to_ifq(struct page_pool *pp)
{
return pp->mp_priv;
}
-#define IO_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *niov)
+{
+ struct net_iov_area *owner = net_iov_owner(niov);
-static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
- struct io_zcrx_area *area, int nr_mapped)
+ return container_of(owner, struct io_zcrx_area, nia);
+}
+
+static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
+{
+ struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
+
+ return area->mem.pages[net_iov_idx(niov)];
+}
+
+static void io_release_dmabuf(struct io_zcrx_mem *mem)
+{
+ if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
+ return;
+
+ if (mem->sgt)
+ dma_buf_unmap_attachment_unlocked(mem->attach, mem->sgt,
+ DMA_FROM_DEVICE);
+ if (mem->attach)
+ dma_buf_detach(mem->dmabuf, mem->attach);
+ if (mem->dmabuf)
+ dma_buf_put(mem->dmabuf);
+
+ mem->sgt = NULL;
+ mem->attach = NULL;
+ mem->dmabuf = NULL;
+}
+
+static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
+ struct io_zcrx_mem *mem,
+ struct io_uring_zcrx_area_reg *area_reg)
+{
+ unsigned long off = (unsigned long)area_reg->addr;
+ unsigned long len = (unsigned long)area_reg->len;
+ unsigned long total_size = 0;
+ struct scatterlist *sg;
+ int dmabuf_fd = area_reg->dmabuf_fd;
+ int i, ret;
+
+ if (WARN_ON_ONCE(!ifq->dev))
+ return -EFAULT;
+ if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
+ return -EINVAL;
+
+ mem->is_dmabuf = true;
+ mem->dmabuf = dma_buf_get(dmabuf_fd);
+ if (IS_ERR(mem->dmabuf)) {
+ ret = PTR_ERR(mem->dmabuf);
+ mem->dmabuf = NULL;
+ goto err;
+ }
+
+ mem->attach = dma_buf_attach(mem->dmabuf, ifq->dev);
+ if (IS_ERR(mem->attach)) {
+ ret = PTR_ERR(mem->attach);
+ mem->attach = NULL;
+ goto err;
+ }
+
+ mem->sgt = dma_buf_map_attachment_unlocked(mem->attach, DMA_FROM_DEVICE);
+ if (IS_ERR(mem->sgt)) {
+ ret = PTR_ERR(mem->sgt);
+ mem->sgt = NULL;
+ goto err;
+ }
+
+ for_each_sgtable_dma_sg(mem->sgt, sg, i)
+ total_size += sg_dma_len(sg);
+
+ if (total_size < off + len)
+ return -EINVAL;
+
+ mem->dmabuf_offset = off;
+ mem->size = len;
+ return 0;
+err:
+ io_release_dmabuf(mem);
+ return ret;
+}
+
+static int io_zcrx_map_area_dmabuf(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
+{
+ unsigned long off = area->mem.dmabuf_offset;
+ struct scatterlist *sg;
+ unsigned i, niov_idx = 0;
+
+ if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
+ return -EINVAL;
+
+ for_each_sgtable_dma_sg(area->mem.sgt, sg, i) {
+ dma_addr_t dma = sg_dma_address(sg);
+ unsigned long sg_len = sg_dma_len(sg);
+ unsigned long sg_off = min(sg_len, off);
+
+ off -= sg_off;
+ sg_len -= sg_off;
+ dma += sg_off;
+
+ while (sg_len && niov_idx < area->nia.num_niovs) {
+ struct net_iov *niov = &area->nia.niovs[niov_idx];
+
+ if (net_mp_niov_set_dma_addr(niov, dma))
+ return 0;
+ sg_len -= PAGE_SIZE;
+ dma += PAGE_SIZE;
+ niov_idx++;
+ }
+ }
+ return niov_idx;
+}
+
+static int io_import_umem(struct io_zcrx_ifq *ifq,
+ struct io_zcrx_mem *mem,
+ struct io_uring_zcrx_area_reg *area_reg)
+{
+ struct page **pages;
+ int nr_pages;
+
+ if (area_reg->dmabuf_fd)
+ return -EINVAL;
+ if (!area_reg->addr)
+ return -EFAULT;
+ pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len,
+ &nr_pages);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ mem->pages = pages;
+ mem->nr_folios = nr_pages;
+ mem->size = area_reg->len;
+ return 0;
+}
+
+static void io_release_area_mem(struct io_zcrx_mem *mem)
+{
+ if (mem->is_dmabuf) {
+ io_release_dmabuf(mem);
+ return;
+ }
+ if (mem->pages) {
+ unpin_user_pages(mem->pages, mem->nr_folios);
+ kvfree(mem->pages);
+ }
+}
+
+static int io_import_area(struct io_zcrx_ifq *ifq,
+ struct io_zcrx_mem *mem,
+ struct io_uring_zcrx_area_reg *area_reg)
+{
+ int ret;
+
+ ret = io_validate_user_buf_range(area_reg->addr, area_reg->len);
+ if (ret)
+ return ret;
+ if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
+ return -EINVAL;
+
+ if (area_reg->flags & IORING_ZCRX_AREA_DMABUF)
+ return io_import_dmabuf(ifq, mem, area_reg);
+ return io_import_umem(ifq, mem, area_reg);
+}
+
+static void io_zcrx_unmap_umem(struct io_zcrx_ifq *ifq,
+ struct io_zcrx_area *area, int nr_mapped)
{
int i;
for (i = 0; i < nr_mapped; i++) {
- struct net_iov *niov = &area->nia.niovs[i];
- dma_addr_t dma;
+ netmem_ref netmem = net_iov_to_netmem(&area->nia.niovs[i]);
+ dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem);
- dma = page_pool_get_dma_addr_netmem(net_iov_to_netmem(niov));
dma_unmap_page_attrs(ifq->dev, dma, PAGE_SIZE,
DMA_FROM_DEVICE, IO_DMA_ATTR);
- net_mp_niov_set_dma_addr(niov, 0);
}
}
+static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
+ struct io_zcrx_area *area, int nr_mapped)
+{
+ int i;
+
+ if (area->mem.is_dmabuf)
+ io_release_dmabuf(&area->mem);
+ else
+ io_zcrx_unmap_umem(ifq, area, nr_mapped);
+
+ for (i = 0; i < area->nia.num_niovs; i++)
+ net_mp_niov_set_dma_addr(&area->nia.niovs[i], 0);
+}
+
static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
{
guard(mutex)(&ifq->dma_lock);
@@ -58,20 +236,16 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *are
area->is_mapped = false;
}
-static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
+static int io_zcrx_map_area_umem(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
{
int i;
- guard(mutex)(&ifq->dma_lock);
- if (area->is_mapped)
- return 0;
-
for (i = 0; i < area->nia.num_niovs; i++) {
struct net_iov *niov = &area->nia.niovs[i];
dma_addr_t dma;
- dma = dma_map_page_attrs(ifq->dev, area->pages[i], 0, PAGE_SIZE,
- DMA_FROM_DEVICE, IO_DMA_ATTR);
+ dma = dma_map_page_attrs(ifq->dev, area->mem.pages[i], 0,
+ PAGE_SIZE, DMA_FROM_DEVICE, IO_DMA_ATTR);
if (dma_mapping_error(ifq->dev, dma))
break;
if (net_mp_niov_set_dma_addr(niov, dma)) {
@@ -80,9 +254,24 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
break;
}
}
+ return i;
+}
+
+static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
+{
+ unsigned nr;
+
+ guard(mutex)(&ifq->dma_lock);
+ if (area->is_mapped)
+ return 0;
+
+ if (area->mem.is_dmabuf)
+ nr = io_zcrx_map_area_dmabuf(ifq, area);
+ else
+ nr = io_zcrx_map_area_umem(ifq, area);
- if (i != area->nia.num_niovs) {
- __io_zcrx_unmap_area(ifq, area, i);
+ if (nr != area->nia.num_niovs) {
+ __io_zcrx_unmap_area(ifq, area, nr);
return -EINVAL;
}
@@ -118,13 +307,6 @@ struct io_zcrx_args {
static const struct memory_provider_ops io_uring_pp_zc_ops;
-static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *niov)
-{
- struct net_iov_area *owner = net_iov_owner(niov);
-
- return container_of(owner, struct io_zcrx_area, nia);
-}
-
static inline atomic_t *io_get_user_counter(struct net_iov *niov)
{
struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
@@ -147,17 +329,12 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov)
atomic_inc(io_get_user_counter(niov));
}
-static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
-{
- struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
-
- return area->pages[net_iov_idx(niov)];
-}
-
static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
struct io_uring_zcrx_ifq_reg *reg,
- struct io_uring_region_desc *rd)
+ struct io_uring_region_desc *rd,
+ u32 id)
{
+ u64 mmap_offset;
size_t off, size;
void *ptr;
int ret;
@@ -167,12 +344,14 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
if (size > rd->size)
return -EINVAL;
- ret = io_create_region_mmap_safe(ifq->ctx, &ifq->ctx->zcrx_region, rd,
- IORING_MAP_OFF_ZCRX_REGION);
+ mmap_offset = IORING_MAP_OFF_ZCRX_REGION;
+ mmap_offset += id << IORING_OFF_PBUF_SHIFT;
+
+ ret = io_create_region(ifq->ctx, &ifq->region, rd, mmap_offset);
if (ret < 0)
return ret;
- ptr = io_region_get_ptr(&ifq->ctx->zcrx_region);
+ ptr = io_region_get_ptr(&ifq->region);
ifq->rq_ring = (struct io_uring *)ptr;
ifq->rqes = (struct io_uring_zcrx_rqe *)(ptr + off);
return 0;
@@ -180,7 +359,7 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
{
- io_free_region(ifq->ctx, &ifq->ctx->zcrx_region);
+ io_free_region(ifq->ctx, &ifq->region);
ifq->rq_ring = NULL;
ifq->rqes = NULL;
}
@@ -188,53 +367,44 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
static void io_zcrx_free_area(struct io_zcrx_area *area)
{
io_zcrx_unmap_area(area->ifq, area);
+ io_release_area_mem(&area->mem);
kvfree(area->freelist);
kvfree(area->nia.niovs);
kvfree(area->user_refs);
- if (area->pages) {
- unpin_user_pages(area->pages, area->nr_folios);
- kvfree(area->pages);
- }
kfree(area);
}
+#define IO_ZCRX_AREA_SUPPORTED_FLAGS (IORING_ZCRX_AREA_DMABUF)
+
static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
struct io_zcrx_area **res,
struct io_uring_zcrx_area_reg *area_reg)
{
struct io_zcrx_area *area;
- int i, ret, nr_pages, nr_iovs;
- struct iovec iov;
+ unsigned nr_iovs;
+ int i, ret;
- if (area_reg->flags || area_reg->rq_area_token)
+ if (area_reg->flags & ~IO_ZCRX_AREA_SUPPORTED_FLAGS)
return -EINVAL;
- if (area_reg->__resv1 || area_reg->__resv2[0] || area_reg->__resv2[1])
+ if (area_reg->rq_area_token)
return -EINVAL;
- if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
+ if (area_reg->__resv2[0] || area_reg->__resv2[1])
return -EINVAL;
- iov.iov_base = u64_to_user_ptr(area_reg->addr);
- iov.iov_len = area_reg->len;
- ret = io_buffer_validate(&iov);
- if (ret)
- return ret;
-
ret = -ENOMEM;
area = kzalloc(sizeof(*area), GFP_KERNEL);
if (!area)
goto err;
- area->pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len,
- &nr_pages);
- if (IS_ERR(area->pages)) {
- ret = PTR_ERR(area->pages);
- area->pages = NULL;
+ ret = io_import_area(ifq, &area->mem, area_reg);
+ if (ret)
goto err;
- }
- area->nr_folios = nr_iovs = nr_pages;
+
+ nr_iovs = area->mem.size >> PAGE_SHIFT;
area->nia.num_niovs = nr_iovs;
+ ret = -ENOMEM;
area->nia.niovs = kvmalloc_array(nr_iovs, sizeof(area->nia.niovs[0]),
GFP_KERNEL | __GFP_ZERO);
if (!area->nia.niovs)
@@ -245,9 +415,6 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
if (!area->freelist)
goto err;
- for (i = 0; i < nr_iovs; i++)
- area->freelist[i] = i;
-
area->user_refs = kvmalloc_array(nr_iovs, sizeof(area->user_refs[0]),
GFP_KERNEL | __GFP_ZERO);
if (!area->user_refs)
@@ -341,6 +508,16 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
kfree(ifq);
}
+struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
+ unsigned int id)
+{
+ struct io_zcrx_ifq *ifq = xa_load(&ctx->zcrx_ctxs, id);
+
+ lockdep_assert_held(&ctx->mmap_lock);
+
+ return ifq ? &ifq->region : NULL;
+}
+
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
struct io_uring_zcrx_ifq_reg __user *arg)
{
@@ -350,6 +527,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
struct io_uring_region_desc rd;
struct io_zcrx_ifq *ifq;
int ret;
+ u32 id;
/*
* 1. Interface queue allocation.
@@ -362,8 +540,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN &&
ctx->flags & IORING_SETUP_CQE32))
return -EINVAL;
- if (ctx->ifq)
- return -EBUSY;
if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT;
if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd)))
@@ -386,29 +562,37 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
ifq = io_zcrx_ifq_alloc(ctx);
if (!ifq)
return -ENOMEM;
+ ifq->rq_entries = reg.rq_entries;
- ret = io_allocate_rbuf_ring(ifq, &reg, &rd);
- if (ret)
- goto err;
+ scoped_guard(mutex, &ctx->mmap_lock) {
+ /* preallocate id */
+ ret = xa_alloc(&ctx->zcrx_ctxs, &id, NULL, xa_limit_31b, GFP_KERNEL);
+ if (ret)
+ goto ifq_free;
+ }
- ret = io_zcrx_create_area(ifq, &ifq->area, &area);
+ ret = io_allocate_rbuf_ring(ifq, &reg, &rd, id);
if (ret)
goto err;
- ifq->rq_entries = reg.rq_entries;
-
- ret = -ENODEV;
ifq->netdev = netdev_get_by_index(current->nsproxy->net_ns, reg.if_idx,
&ifq->netdev_tracker, GFP_KERNEL);
- if (!ifq->netdev)
+ if (!ifq->netdev) {
+ ret = -ENODEV;
goto err;
+ }
ifq->dev = ifq->netdev->dev.parent;
- ret = -EOPNOTSUPP;
- if (!ifq->dev)
+ if (!ifq->dev) {
+ ret = -EOPNOTSUPP;
goto err;
+ }
get_device(ifq->dev);
+ ret = io_zcrx_create_area(ifq, &ifq->area, &area);
+ if (ret)
+ goto err;
+
mp_param.mp_ops = &io_uring_pp_zc_ops;
mp_param.mp_priv = ifq;
ret = net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param);
@@ -419,6 +603,14 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
reg.offsets.rqes = sizeof(struct io_uring);
reg.offsets.head = offsetof(struct io_uring, head);
reg.offsets.tail = offsetof(struct io_uring, tail);
+ reg.zcrx_id = id;
+
+ scoped_guard(mutex, &ctx->mmap_lock) {
+ /* publish ifq */
+ ret = -ENOMEM;
+ if (xa_store(&ctx->zcrx_ctxs, id, ifq, GFP_KERNEL))
+ goto err;
+ }
if (copy_to_user(arg, &reg, sizeof(reg)) ||
copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd)) ||
@@ -426,24 +618,34 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
ret = -EFAULT;
goto err;
}
- ctx->ifq = ifq;
return 0;
err:
+ scoped_guard(mutex, &ctx->mmap_lock)
+ xa_erase(&ctx->zcrx_ctxs, id);
+ifq_free:
io_zcrx_ifq_free(ifq);
return ret;
}
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
{
- struct io_zcrx_ifq *ifq = ctx->ifq;
+ struct io_zcrx_ifq *ifq;
+ unsigned long id;
lockdep_assert_held(&ctx->uring_lock);
- if (!ifq)
- return;
+ while (1) {
+ scoped_guard(mutex, &ctx->mmap_lock) {
+ ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT);
+ if (ifq)
+ xa_erase(&ctx->zcrx_ctxs, id);
+ }
+ if (!ifq)
+ break;
+ io_zcrx_ifq_free(ifq);
+ }
- ctx->ifq = NULL;
- io_zcrx_ifq_free(ifq);
+ xa_destroy(&ctx->zcrx_ctxs);
}
static struct net_iov *__io_zcrx_get_free_niov(struct io_zcrx_area *area)
@@ -500,12 +702,15 @@ static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
{
+ struct io_zcrx_ifq *ifq;
+ unsigned long index;
+
lockdep_assert_held(&ctx->uring_lock);
- if (!ctx->ifq)
- return;
- io_zcrx_scrub(ctx->ifq);
- io_close_queue(ctx->ifq);
+ xa_for_each(&ctx->zcrx_ctxs, index, ifq) {
+ io_zcrx_scrub(ifq);
+ io_close_queue(ifq);
+ }
}
static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq)
@@ -742,6 +947,9 @@ static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
size_t copied = 0;
int ret = 0;
+ if (area->mem.is_dmabuf)
+ return -EFAULT;
+
while (len) {
size_t copy_size = min_t(size_t, PAGE_SIZE, len);
const int dst_off = 0;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index f2bc811f022c..2f5e26389f22 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -3,10 +3,24 @@
#define IOU_ZC_RX_H
#include <linux/io_uring_types.h>
+#include <linux/dma-buf.h>
#include <linux/socket.h>
#include <net/page_pool/types.h>
#include <net/net_trackers.h>
+struct io_zcrx_mem {
+ unsigned long size;
+ bool is_dmabuf;
+
+ struct page **pages;
+ unsigned long nr_folios;
+
+ struct dma_buf_attachment *attach;
+ struct dma_buf *dmabuf;
+ struct sg_table *sgt;
+ unsigned long dmabuf_offset;
+};
+
struct io_zcrx_area {
struct net_iov_area nia;
struct io_zcrx_ifq *ifq;
@@ -14,13 +28,13 @@ struct io_zcrx_area {
bool is_mapped;
u16 area_id;
- struct page **pages;
- unsigned long nr_folios;
/* freelist */
spinlock_t freelist_lock ____cacheline_aligned_in_smp;
u32 free_count;
u32 *freelist;
+
+ struct io_zcrx_mem mem;
};
struct io_zcrx_ifq {
@@ -39,6 +53,7 @@ struct io_zcrx_ifq {
netdevice_tracker netdev_tracker;
spinlock_t lock;
struct mutex dma_lock;
+ struct io_mapped_region region;
};
#if defined(CONFIG_IO_URING_ZCRX)
@@ -49,6 +64,8 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx);
int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
struct socket *sock, unsigned int flags,
unsigned issue_flags, unsigned int *len);
+struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
+ unsigned int id);
#else
static inline int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
struct io_uring_zcrx_ifq_reg __user *arg)
@@ -67,6 +84,11 @@ static inline int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
{
return -EOPNOTSUPP;
}
+static inline struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
+ unsigned int id)
+{
+ return NULL;
+}
#endif
int io_recvzc(struct io_kiocb *req, unsigned int issue_flags);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 35b4f8659904..82ed2d3c9846 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -913,7 +913,7 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
ro = mnt_want_write(mnt); /* we'll drop it in any case */
inode_lock(d_inode(root));
- path.dentry = lookup_one_len(name->name, root, strlen(name->name));
+ path.dentry = lookup_noperm(&QSTR(name->name), root);
if (IS_ERR(path.dentry)) {
error = PTR_ERR(path.dentry);
goto out_putfd;
@@ -969,8 +969,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
if (err)
goto out_name;
inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
- dentry = lookup_one_len(name->name, mnt->mnt_root,
- strlen(name->name));
+ dentry = lookup_noperm(&QSTR(name->name), mnt->mnt_root);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
goto out_unlock;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 84f58f3d028a..cad0194552fb 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -41,6 +41,19 @@ static int __init cgroup_bpf_wq_init(void)
}
core_initcall(cgroup_bpf_wq_init);
+static int cgroup_bpf_lifetime_notify(struct notifier_block *nb,
+ unsigned long action, void *data);
+
+static struct notifier_block cgroup_bpf_lifetime_nb = {
+ .notifier_call = cgroup_bpf_lifetime_notify,
+};
+
+void __init cgroup_bpf_lifetime_notifier_init(void)
+{
+ BUG_ON(blocking_notifier_chain_register(&cgroup_lifetime_notifier,
+ &cgroup_bpf_lifetime_nb));
+}
+
/* __always_inline is necessary to prevent indirect call through run_prog
* function pointer.
*/
@@ -206,7 +219,7 @@ bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
}
#endif /* CONFIG_BPF_LSM */
-void cgroup_bpf_offline(struct cgroup *cgrp)
+static void cgroup_bpf_offline(struct cgroup *cgrp)
{
cgroup_get(cgrp);
percpu_ref_kill(&cgrp->bpf.refcnt);
@@ -491,7 +504,7 @@ static void activate_effective_progs(struct cgroup *cgrp,
* cgroup_bpf_inherit() - inherit effective programs from parent
* @cgrp: the cgroup to modify
*/
-int cgroup_bpf_inherit(struct cgroup *cgrp)
+static int cgroup_bpf_inherit(struct cgroup *cgrp)
{
/* has to use marco instead of const int, since compiler thinks
* that array below is variable length
@@ -534,6 +547,27 @@ cleanup:
return -ENOMEM;
}
+static int cgroup_bpf_lifetime_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct cgroup *cgrp = data;
+ int ret = 0;
+
+ if (cgrp->root != &cgrp_dfl_root)
+ return NOTIFY_OK;
+
+ switch (action) {
+ case CGROUP_LIFETIME_ONLINE:
+ ret = cgroup_bpf_inherit(cgrp);
+ break;
+ case CGROUP_LIFETIME_OFFLINE:
+ cgroup_bpf_offline(cgrp);
+ break;
+ }
+
+ return notifier_from_errno(ret);
+}
+
static int update_effective_progs(struct cgroup *cgrp,
enum cgroup_bpf_attach_type atype)
{
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index dc3aa91a6ba0..5c2e96b19392 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -421,7 +421,7 @@ static int bpf_iter_link_pin_kernel(struct dentry *parent,
int ret;
inode_lock(parent->d_inode);
- dentry = lookup_one_len(name, parent, strlen(name));
+ dentry = lookup_noperm(&QSTR(name), parent);
if (IS_ERR(dentry)) {
inode_unlock(parent->d_inode);
return PTR_ERR(dentry);
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 95ab39e1ec8f..b14e61c64a34 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -270,9 +270,9 @@ int cgroup_task_count(const struct cgroup *cgrp);
/*
* rstat.c
*/
-int cgroup_rstat_init(struct cgroup *cgrp);
-void cgroup_rstat_exit(struct cgroup *cgrp);
-void cgroup_rstat_boot(void);
+int css_rstat_init(struct cgroup_subsys_state *css);
+void css_rstat_exit(struct cgroup_subsys_state *css);
+int ss_rstat_init(struct cgroup_subsys *ss);
void cgroup_base_stat_cputime_show(struct seq_file *seq);
/*
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 63e5b90da1f3..a723b7dc6e4e 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -95,6 +95,9 @@ EXPORT_SYMBOL_GPL(cgroup_mutex);
EXPORT_SYMBOL_GPL(css_set_lock);
#endif
+struct blocking_notifier_head cgroup_lifetime_notifier =
+ BLOCKING_NOTIFIER_INIT(cgroup_lifetime_notifier);
+
DEFINE_SPINLOCK(trace_cgroup_path_lock);
char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
static bool cgroup_debug __read_mostly;
@@ -161,10 +164,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
};
#undef SUBSYS
-static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
+static DEFINE_PER_CPU(struct css_rstat_cpu, root_rstat_cpu);
+static DEFINE_PER_CPU(struct cgroup_rstat_base_cpu, root_rstat_base_cpu);
/* the default hierarchy */
-struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
+struct cgroup_root cgrp_dfl_root = {
+ .cgrp.self.rstat_cpu = &root_rstat_cpu,
+ .cgrp.rstat_base_cpu = &root_rstat_base_cpu,
+};
EXPORT_SYMBOL_GPL(cgrp_dfl_root);
/*
@@ -1335,6 +1342,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
{
struct cgroup *cgrp = &root->cgrp;
struct cgrp_cset_link *link, *tmp_link;
+ int ret;
trace_cgroup_destroy_root(root);
@@ -1343,6 +1351,10 @@ static void cgroup_destroy_root(struct cgroup_root *root)
BUG_ON(atomic_read(&root->nr_cgrps));
BUG_ON(!list_empty(&cgrp->self.children));
+ ret = blocking_notifier_call_chain(&cgroup_lifetime_notifier,
+ CGROUP_LIFETIME_OFFLINE, cgrp);
+ WARN_ON_ONCE(notifier_to_errno(ret));
+
/* Rebind all subsystems back to the default hierarchy */
WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
@@ -1371,7 +1383,6 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_unlock();
- cgroup_rstat_exit(cgrp);
kernfs_destroy_root(root->kf_root);
cgroup_free_root(root);
}
@@ -1715,7 +1726,7 @@ static void css_clear_dir(struct cgroup_subsys_state *css)
css->flags &= ~CSS_VISIBLE;
- if (!css->ss) {
+ if (css_is_self(css)) {
if (cgroup_on_dfl(cgrp)) {
cgroup_addrm_files(css, cgrp,
cgroup_base_files, false);
@@ -1747,7 +1758,7 @@ static int css_populate_dir(struct cgroup_subsys_state *css)
if (css->flags & CSS_VISIBLE)
return 0;
- if (!css->ss) {
+ if (css_is_self(css)) {
if (cgroup_on_dfl(cgrp)) {
ret = cgroup_addrm_files(css, cgrp,
cgroup_base_files, true);
@@ -1876,13 +1887,6 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
}
spin_unlock_irq(&css_set_lock);
- if (ss->css_rstat_flush) {
- list_del_rcu(&css->rstat_css_node);
- synchronize_rcu();
- list_add_rcu(&css->rstat_css_node,
- &dcgrp->rstat_css_list);
- }
-
/* default hierarchy doesn't enable controllers by default */
dst_root->subsys_mask |= 1 << ssid;
if (dst_root == &cgrp_dfl_root) {
@@ -2065,7 +2069,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
cgrp->dom_cgrp = cgrp;
cgrp->max_descendants = INT_MAX;
cgrp->max_depth = INT_MAX;
- INIT_LIST_HEAD(&cgrp->rstat_css_list);
prev_cputime_init(&cgrp->prev_cputime);
for_each_subsys(ss, ssid)
@@ -2146,7 +2149,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
if (ret)
goto destroy_root;
- ret = cgroup_rstat_init(root_cgrp);
+ ret = css_rstat_init(&root_cgrp->self);
if (ret)
goto destroy_root;
@@ -2154,10 +2157,9 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
if (ret)
goto exit_stats;
- if (root == &cgrp_dfl_root) {
- ret = cgroup_bpf_inherit(root_cgrp);
- WARN_ON_ONCE(ret);
- }
+ ret = blocking_notifier_call_chain(&cgroup_lifetime_notifier,
+ CGROUP_LIFETIME_ONLINE, root_cgrp);
+ WARN_ON_ONCE(notifier_to_errno(ret));
trace_cgroup_setup_root(root);
@@ -2188,7 +2190,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
goto out;
exit_stats:
- cgroup_rstat_exit(root_cgrp);
+ css_rstat_exit(&root_cgrp->self);
destroy_root:
kernfs_destroy_root(root->kf_root);
root->kf_root = NULL;
@@ -5444,8 +5446,9 @@ static void css_free_rwork_fn(struct work_struct *work)
struct cgroup *cgrp = css->cgroup;
percpu_ref_exit(&css->refcnt);
+ css_rstat_exit(css);
- if (ss) {
+ if (!css_is_self(css)) {
/* css free path */
struct cgroup_subsys_state *parent = css->parent;
int id = css->id;
@@ -5474,7 +5477,6 @@ static void css_free_rwork_fn(struct work_struct *work)
cgroup_put(cgroup_parent(cgrp));
kernfs_put(cgrp->kn);
psi_cgroup_free(cgrp);
- cgroup_rstat_exit(cgrp);
kfree(cgrp);
} else {
/*
@@ -5499,14 +5501,10 @@ static void css_release_work_fn(struct work_struct *work)
css->flags |= CSS_RELEASED;
list_del_rcu(&css->sibling);
- if (ss) {
+ if (!css_is_self(css)) {
struct cgroup *parent_cgrp;
- /* css release path */
- if (!list_empty(&css->rstat_css_node)) {
- cgroup_rstat_flush(cgrp);
- list_del_rcu(&css->rstat_css_node);
- }
+ css_rstat_flush(css);
cgroup_idr_replace(&ss->css_idr, NULL, css->id);
if (ss->css_released)
@@ -5532,7 +5530,7 @@ static void css_release_work_fn(struct work_struct *work)
/* cgroup release path */
TRACE_CGROUP_PATH(release, cgrp);
- cgroup_rstat_flush(cgrp);
+ css_rstat_flush(&cgrp->self);
spin_lock_irq(&css_set_lock);
for (tcgrp = cgroup_parent(cgrp); tcgrp;
@@ -5580,7 +5578,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
css->id = -1;
INIT_LIST_HEAD(&css->sibling);
INIT_LIST_HEAD(&css->children);
- INIT_LIST_HEAD(&css->rstat_css_node);
css->serial_nr = css_serial_nr_next++;
atomic_set(&css->online_cnt, 0);
@@ -5589,9 +5586,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
css_get(css->parent);
}
- if (ss->css_rstat_flush)
- list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
-
BUG_ON(cgroup_css(cgrp, ss));
}
@@ -5684,6 +5678,10 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
goto err_free_css;
css->id = err;
+ err = css_rstat_init(css);
+ if (err)
+ goto err_free_css;
+
/* @css is ready to be brought online now, make it visible */
list_add_tail_rcu(&css->sibling, &parent_css->children);
cgroup_idr_replace(&ss->css_idr, css, css->id);
@@ -5697,7 +5695,6 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
err_list_del:
list_del_rcu(&css->sibling);
err_free_css:
- list_del_rcu(&css->rstat_css_node);
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
return ERR_PTR(err);
@@ -5713,7 +5710,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
struct cgroup_root *root = parent->root;
struct cgroup *cgrp, *tcgrp;
struct kernfs_node *kn;
- int level = parent->level + 1;
+ int i, level = parent->level + 1;
int ret;
/* allocate the cgroup and its ID, 0 is reserved for the root */
@@ -5725,17 +5722,13 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
if (ret)
goto out_free_cgrp;
- ret = cgroup_rstat_init(cgrp);
- if (ret)
- goto out_cancel_ref;
-
/* create the directory */
kn = kernfs_create_dir_ns(parent->kn, name, mode,
current_fsuid(), current_fsgid(),
cgrp, NULL);
if (IS_ERR(kn)) {
ret = PTR_ERR(kn);
- goto out_stat_exit;
+ goto out_cancel_ref;
}
cgrp->kn = kn;
@@ -5745,15 +5738,20 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
cgrp->root = root;
cgrp->level = level;
- ret = psi_cgroup_alloc(cgrp);
+ /*
+ * Now that init_cgroup_housekeeping() has been called and cgrp->self
+ * is setup, it is safe to perform rstat initialization on it.
+ */
+ ret = css_rstat_init(&cgrp->self);
if (ret)
goto out_kernfs_remove;
- if (cgrp->root == &cgrp_dfl_root) {
- ret = cgroup_bpf_inherit(cgrp);
- if (ret)
- goto out_psi_free;
- }
+ ret = psi_cgroup_alloc(cgrp);
+ if (ret)
+ goto out_stat_exit;
+
+ for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp))
+ cgrp->ancestors[tcgrp->level] = tcgrp;
/*
* New cgroup inherits effective freeze counter, and
@@ -5771,24 +5769,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
set_bit(CGRP_FROZEN, &cgrp->flags);
}
- spin_lock_irq(&css_set_lock);
- for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
- cgrp->ancestors[tcgrp->level] = tcgrp;
-
- if (tcgrp != cgrp) {
- tcgrp->nr_descendants++;
-
- /*
- * If the new cgroup is frozen, all ancestor cgroups
- * get a new frozen descendant, but their state can't
- * change because of this.
- */
- if (cgrp->freezer.e_freeze)
- tcgrp->freezer.nr_frozen_descendants++;
- }
- }
- spin_unlock_irq(&css_set_lock);
-
if (notify_on_release(parent))
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -5797,7 +5777,29 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
cgrp->self.serial_nr = css_serial_nr_next++;
+ ret = blocking_notifier_call_chain_robust(&cgroup_lifetime_notifier,
+ CGROUP_LIFETIME_ONLINE,
+ CGROUP_LIFETIME_OFFLINE, cgrp);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ goto out_psi_free;
+
/* allocation complete, commit to creation */
+ spin_lock_irq(&css_set_lock);
+ for (i = 0; i < level; i++) {
+ tcgrp = cgrp->ancestors[i];
+ tcgrp->nr_descendants++;
+
+ /*
+ * If the new cgroup is frozen, all ancestor cgroups get a new
+ * frozen descendant, but their state can't change because of
+ * this.
+ */
+ if (cgrp->freezer.e_freeze)
+ tcgrp->freezer.nr_frozen_descendants++;
+ }
+ spin_unlock_irq(&css_set_lock);
+
list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
atomic_inc(&root->nr_cgrps);
cgroup_get_live(parent);
@@ -5815,10 +5817,10 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
out_psi_free:
psi_cgroup_free(cgrp);
+out_stat_exit:
+ css_rstat_exit(&cgrp->self);
out_kernfs_remove:
kernfs_remove(cgrp->kn);
-out_stat_exit:
- cgroup_rstat_exit(cgrp);
out_cancel_ref:
percpu_ref_exit(&cgrp->self.refcnt);
out_free_cgrp:
@@ -6015,7 +6017,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
struct cgroup *tcgrp, *parent = cgroup_parent(cgrp);
struct cgroup_subsys_state *css;
struct cgrp_cset_link *link;
- int ssid;
+ int ssid, ret;
lockdep_assert_held(&cgroup_mutex);
@@ -6073,8 +6075,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
cgroup1_check_for_release(parent);
- if (cgrp->root == &cgrp_dfl_root)
- cgroup_bpf_offline(cgrp);
+ ret = blocking_notifier_call_chain(&cgroup_lifetime_notifier,
+ CGROUP_LIFETIME_OFFLINE, cgrp);
+ WARN_ON_ONCE(notifier_to_errno(ret));
/* put the base reference */
percpu_ref_kill(&cgrp->self.refcnt);
@@ -6136,6 +6139,9 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
} else {
css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
BUG_ON(css->id < 0);
+
+ BUG_ON(ss_rstat_init(ss));
+ BUG_ON(css_rstat_init(css));
}
/* Update the init_css_set to contain a subsys
@@ -6184,6 +6190,8 @@ int __init cgroup_init_early(void)
ss->id, ss->name);
WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
"cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
+ WARN(ss->early_init && ss->css_rstat_flush,
+ "cgroup rstat cannot be used with early init subsystem\n");
ss->id = i;
ss->name = cgroup_subsys_name[i];
@@ -6212,7 +6220,7 @@ int __init cgroup_init(void)
BUG_ON(cgroup_init_cftypes(NULL, cgroup_psi_files));
BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
- cgroup_rstat_boot();
+ BUG_ON(ss_rstat_init(NULL));
get_user_ns(init_cgroup_ns.user_ns);
@@ -6225,6 +6233,8 @@ int __init cgroup_init(void)
hash_add(css_set_table, &init_css_set.hlist,
css_set_hash(init_css_set.subsys));
+ cgroup_bpf_lifetime_notifier_init();
+
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
cgroup_unlock();
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 306b60430091..6d3ac19cc2ac 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -192,6 +192,20 @@ static inline void notify_partition_change(struct cpuset *cs, int old_prs)
WRITE_ONCE(cs->prs_err, PERR_NONE);
}
+/*
+ * The top_cpuset is always synchronized to cpu_active_mask and we should avoid
+ * using cpu_online_mask as much as possible. An active CPU is always an online
+ * CPU, but not vice versa. cpu_active_mask and cpu_online_mask can differ
+ * during hotplug operations. A CPU is marked active at the last stage of CPU
+ * bringup (CPUHP_AP_ACTIVE). It is also the stage where cpuset hotplug code
+ * will be called to update the sched domains so that the scheduler can move
+ * a normal task to a newly active CPU or remove tasks away from a newly
+ * inactivated CPU. The online bit is set much earlier in the CPU bringup
+ * process and cleared much later in CPU teardown.
+ *
+ * If cpu_online_mask is used while a hotunplug operation is happening in
+ * parallel, we may leave an offline CPU in cpu_allowed or some other masks.
+ */
static struct cpuset top_cpuset = {
.flags = BIT(CS_ONLINE) | BIT(CS_CPU_EXCLUSIVE) |
BIT(CS_MEM_EXCLUSIVE) | BIT(CS_SCHED_LOAD_BALANCE),
@@ -355,18 +369,18 @@ static inline bool partition_is_populated(struct cpuset *cs,
* appropriate cpus.
*
* One way or another, we guarantee to return some non-empty subset
- * of cpu_online_mask.
+ * of cpu_active_mask.
*
* Call with callback_lock or cpuset_mutex held.
*/
-static void guarantee_online_cpus(struct task_struct *tsk,
+static void guarantee_active_cpus(struct task_struct *tsk,
struct cpumask *pmask)
{
const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
struct cpuset *cs;
- if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_online_mask)))
- cpumask_copy(pmask, cpu_online_mask);
+ if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_active_mask)))
+ cpumask_copy(pmask, cpu_active_mask);
rcu_read_lock();
cs = task_cs(tsk);
@@ -1116,9 +1130,11 @@ void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
if (top_cs) {
/*
- * Percpu kthreads in top_cpuset are ignored
+ * PF_NO_SETAFFINITY tasks are ignored.
+ * All per cpu kthreads should have PF_NO_SETAFFINITY
+ * flag set, see kthread_set_per_cpu().
*/
- if (kthread_is_per_cpu(task))
+ if (task->flags & PF_NO_SETAFFINITY)
continue;
cpumask_andnot(new_cpus, possible_mask, subpartitions_cpus);
} else {
@@ -1388,14 +1404,12 @@ static int compute_effective_exclusive_cpumask(struct cpuset *cs,
if (sibling == cs)
continue;
- if (!cpumask_empty(sibling->exclusive_cpus) &&
- cpumask_intersects(xcpus, sibling->exclusive_cpus)) {
+ if (cpumask_intersects(xcpus, sibling->exclusive_cpus)) {
cpumask_andnot(xcpus, xcpus, sibling->exclusive_cpus);
retval++;
continue;
}
- if (!cpumask_empty(sibling->effective_xcpus) &&
- cpumask_intersects(xcpus, sibling->effective_xcpus)) {
+ if (cpumask_intersects(xcpus, sibling->effective_xcpus)) {
cpumask_andnot(xcpus, xcpus, sibling->effective_xcpus);
retval++;
}
@@ -1439,13 +1453,15 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
* The requested exclusive_cpus must not be allocated to other
* partitions and it can't use up all the root's effective_cpus.
*
- * Note that if there is any local partition root above it or
- * remote partition root underneath it, its exclusive_cpus must
- * have overlapped with subpartitions_cpus.
+ * The effective_xcpus mask can contain offline CPUs, but there must
+ * be at least one or more online CPUs present before it can be enabled.
+ *
+ * Note that creating a remote partition with any local partition root
+ * above it or remote partition root underneath it is not allowed.
*/
compute_effective_exclusive_cpumask(cs, tmp->new_cpus, NULL);
- if (cpumask_empty(tmp->new_cpus) ||
- cpumask_intersects(tmp->new_cpus, subpartitions_cpus) ||
+ WARN_ON_ONCE(cpumask_intersects(tmp->new_cpus, subpartitions_cpus));
+ if (!cpumask_intersects(tmp->new_cpus, cpu_active_mask) ||
cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus))
return PERR_INVCPUS;
@@ -1541,6 +1557,7 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus,
* left in the top cpuset.
*/
if (adding) {
+ WARN_ON_ONCE(cpumask_intersects(tmp->addmask, subpartitions_cpus));
if (!capable(CAP_SYS_ADMIN))
cs->prs_err = PERR_ACCESS;
else if (cpumask_intersects(tmp->addmask, subpartitions_cpus) ||
@@ -1650,7 +1667,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
bool nocpu;
lockdep_assert_held(&cpuset_mutex);
- WARN_ON_ONCE(is_remote_partition(cs));
+ WARN_ON_ONCE(is_remote_partition(cs)); /* For local partition only */
/*
* new_prs will only be changed for the partcmd_update and
@@ -1696,7 +1713,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
* exclusive_cpus not set. Sibling conflict should only happen
* if exclusive_cpus isn't set.
*/
- xcpus = tmp->new_cpus;
+ xcpus = tmp->delmask;
if (compute_effective_exclusive_cpumask(cs, xcpus, NULL))
WARN_ON_ONCE(!cpumask_empty(cs->exclusive_cpus));
@@ -1717,9 +1734,20 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
if (nocpu)
return PERR_NOCPUS;
- deleting = cpumask_and(tmp->delmask, xcpus, parent->effective_xcpus);
- if (deleting)
- subparts_delta++;
+ /*
+ * This function will only be called when all the preliminary
+ * checks have passed. At this point, the following condition
+ * should hold.
+ *
+ * (cs->effective_xcpus & cpu_active_mask) ⊆ parent->effective_cpus
+ *
+ * Warn if it is not the case.
+ */
+ cpumask_and(tmp->new_cpus, xcpus, cpu_active_mask);
+ WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, parent->effective_cpus));
+
+ deleting = true;
+ subparts_delta++;
new_prs = (cmd == partcmd_enable) ? PRS_ROOT : PRS_ISOLATED;
} else if (cmd == partcmd_disable) {
/*
@@ -1774,6 +1802,15 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
parent->effective_xcpus);
}
/*
+ * The new CPUs to be removed from parent's effective CPUs
+ * must be present.
+ */
+ if (deleting) {
+ cpumask_and(tmp->new_cpus, tmp->delmask, cpu_active_mask);
+ WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, parent->effective_cpus));
+ }
+
+ /*
* Make partition invalid if parent's effective_cpus could
* become empty and there are tasks in the parent.
*/
@@ -2263,7 +2300,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
bool force = false;
int old_prs = cs->partition_root_state;
- /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
+ /* top_cpuset.cpus_allowed tracks cpu_active_mask; it's read-only */
if (cs == &top_cpuset)
return -EACCES;
@@ -3082,7 +3119,7 @@ static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
lockdep_assert_held(&cpuset_mutex);
if (cs != &top_cpuset)
- guarantee_online_cpus(task, cpus_attach);
+ guarantee_active_cpus(task, cpus_attach);
else
cpumask_andnot(cpus_attach, task_cpu_possible_mask(task),
subpartitions_cpus);
@@ -3524,11 +3561,7 @@ out_unlock:
* will call rebuild_sched_domains_locked(). That is not needed
* in the default hierarchy where only changes in partition
* will cause repartitioning.
- *
- * If the cpuset has the 'sched.partition' flag enabled, simulate
- * turning 'sched.partition" off.
*/
-
static void cpuset_css_offline(struct cgroup_subsys_state *css)
{
struct cpuset *cs = css_cs(css);
@@ -3546,6 +3579,11 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
cpus_read_unlock();
}
+/*
+ * If a dying cpuset has the 'cpus.partition' enabled, turn it off by
+ * changing it back to member to free its exclusive CPUs back to the pool to
+ * be used by other online cpusets.
+ */
static void cpuset_css_killed(struct cgroup_subsys_state *css)
{
struct cpuset *cs = css_cs(css);
@@ -4026,7 +4064,7 @@ void __init cpuset_init_smp(void)
*
* Description: Returns the cpumask_var_t cpus_allowed of the cpuset
* attached to the specified @tsk. Guaranteed to return some non-empty
- * subset of cpu_online_mask, even if this means going outside the
+ * subset of cpu_active_mask, even if this means going outside the
* tasks cpuset, except when the task is in the top cpuset.
**/
@@ -4040,7 +4078,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
cs = task_cs(tsk);
if (cs != &top_cpuset)
- guarantee_online_cpus(tsk, pmask);
+ guarantee_active_cpus(tsk, pmask);
/*
* Tasks in the top cpuset won't get update to their cpumasks
* when a hotplug online/offline event happens. So we include all
@@ -4054,7 +4092,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
* allowable online cpu left, we fall back to all possible cpus.
*/
cpumask_andnot(pmask, possible_mask, subpartitions_cpus);
- if (!cpumask_intersects(pmask, cpu_online_mask))
+ if (!cpumask_intersects(pmask, cpu_active_mask))
cpumask_copy(pmask, possible_mask);
}
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index b2239156b7de..ce4752ab9e09 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -9,18 +9,52 @@
#include <trace/events/cgroup.h>
-static DEFINE_SPINLOCK(cgroup_rstat_lock);
-static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
+static DEFINE_SPINLOCK(rstat_base_lock);
+static DEFINE_PER_CPU(raw_spinlock_t, rstat_base_cpu_lock);
static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
-static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
+/*
+ * Determines whether a given css can participate in rstat.
+ * css's that are cgroup::self use rstat for base stats.
+ * Other css's associated with a subsystem use rstat only when
+ * they define the ss->css_rstat_flush callback.
+ */
+static inline bool css_uses_rstat(struct cgroup_subsys_state *css)
+{
+ return css_is_self(css) || css->ss->css_rstat_flush != NULL;
+}
+
+static struct css_rstat_cpu *css_rstat_cpu(
+ struct cgroup_subsys_state *css, int cpu)
+{
+ return per_cpu_ptr(css->rstat_cpu, cpu);
+}
+
+static struct cgroup_rstat_base_cpu *cgroup_rstat_base_cpu(
+ struct cgroup *cgrp, int cpu)
{
- return per_cpu_ptr(cgrp->rstat_cpu, cpu);
+ return per_cpu_ptr(cgrp->rstat_base_cpu, cpu);
+}
+
+static spinlock_t *ss_rstat_lock(struct cgroup_subsys *ss)
+{
+ if (ss)
+ return &ss->rstat_ss_lock;
+
+ return &rstat_base_lock;
+}
+
+static raw_spinlock_t *ss_rstat_cpu_lock(struct cgroup_subsys *ss, int cpu)
+{
+ if (ss)
+ return per_cpu_ptr(ss->rstat_ss_cpu_lock, cpu);
+
+ return per_cpu_ptr(&rstat_base_cpu_lock, cpu);
}
/*
- * Helper functions for rstat per CPU lock (cgroup_rstat_cpu_lock).
+ * Helper functions for rstat per CPU locks.
*
* This makes it easier to diagnose locking issues and contention in
* production environments. The parameter @fast_path determine the
@@ -28,20 +62,23 @@ static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
* operations without handling high-frequency fast-path "update" events.
*/
static __always_inline
-unsigned long _cgroup_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
- struct cgroup *cgrp, const bool fast_path)
+unsigned long _css_rstat_cpu_lock(struct cgroup_subsys_state *css, int cpu,
+ const bool fast_path)
{
+ struct cgroup *cgrp = css->cgroup;
+ raw_spinlock_t *cpu_lock;
unsigned long flags;
bool contended;
/*
- * The _irqsave() is needed because cgroup_rstat_lock is
- * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
- * this lock with the _irq() suffix only disables interrupts on
- * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
- * interrupts on both configurations. The _irqsave() ensures
- * that interrupts are always disabled and later restored.
+ * The _irqsave() is needed because the locks used for flushing are
+ * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring this lock
+ * with the _irq() suffix only disables interrupts on a non-PREEMPT_RT
+ * kernel. The raw_spinlock_t below disables interrupts on both
+ * configurations. The _irqsave() ensures that interrupts are always
+ * disabled and later restored.
*/
+ cpu_lock = ss_rstat_cpu_lock(css->ss, cpu);
contended = !raw_spin_trylock_irqsave(cpu_lock, flags);
if (contended) {
if (fast_path)
@@ -61,50 +98,59 @@ unsigned long _cgroup_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
}
static __always_inline
-void _cgroup_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu,
- struct cgroup *cgrp, unsigned long flags,
- const bool fast_path)
+void _css_rstat_cpu_unlock(struct cgroup_subsys_state *css, int cpu,
+ unsigned long flags, const bool fast_path)
{
+ struct cgroup *cgrp = css->cgroup;
+ raw_spinlock_t *cpu_lock;
+
if (fast_path)
trace_cgroup_rstat_cpu_unlock_fastpath(cgrp, cpu, false);
else
trace_cgroup_rstat_cpu_unlock(cgrp, cpu, false);
+ cpu_lock = ss_rstat_cpu_lock(css->ss, cpu);
raw_spin_unlock_irqrestore(cpu_lock, flags);
}
/**
- * cgroup_rstat_updated - keep track of updated rstat_cpu
- * @cgrp: target cgroup
+ * css_rstat_updated - keep track of updated rstat_cpu
+ * @css: target cgroup subsystem state
* @cpu: cpu on which rstat_cpu was updated
*
- * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching
- * rstat_cpu->updated_children list. See the comment on top of
- * cgroup_rstat_cpu definition for details.
+ * @css's rstat_cpu on @cpu was updated. Put it on the parent's matching
+ * rstat_cpu->updated_children list. See the comment on top of
+ * css_rstat_cpu definition for details.
*/
-__bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
+__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
{
- raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
unsigned long flags;
/*
+ * Since bpf programs can call this function, prevent access to
+ * uninitialized rstat pointers.
+ */
+ if (!css_uses_rstat(css))
+ return;
+
+ /*
* Speculative already-on-list test. This may race leading to
* temporary inaccuracies, which is fine.
*
* Because @parent's updated_children is terminated with @parent
- * instead of NULL, we can tell whether @cgrp is on the list by
+ * instead of NULL, we can tell whether @css is on the list by
* testing the next pointer for NULL.
*/
- if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next))
+ if (data_race(css_rstat_cpu(css, cpu)->updated_next))
return;
- flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, cgrp, true);
+ flags = _css_rstat_cpu_lock(css, cpu, true);
- /* put @cgrp and all ancestors on the corresponding updated lists */
+ /* put @css and all ancestors on the corresponding updated lists */
while (true) {
- struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
- struct cgroup *parent = cgroup_parent(cgrp);
- struct cgroup_rstat_cpu *prstatc;
+ struct css_rstat_cpu *rstatc = css_rstat_cpu(css, cpu);
+ struct cgroup_subsys_state *parent = css->parent;
+ struct css_rstat_cpu *prstatc;
/*
* Both additions and removals are bottom-up. If a cgroup
@@ -115,53 +161,78 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
/* Root has no parent to link it to, but mark it busy */
if (!parent) {
- rstatc->updated_next = cgrp;
+ rstatc->updated_next = css;
break;
}
- prstatc = cgroup_rstat_cpu(parent, cpu);
+ prstatc = css_rstat_cpu(parent, cpu);
rstatc->updated_next = prstatc->updated_children;
- prstatc->updated_children = cgrp;
+ prstatc->updated_children = css;
- cgrp = parent;
+ css = parent;
}
- _cgroup_rstat_cpu_unlock(cpu_lock, cpu, cgrp, flags, true);
+ _css_rstat_cpu_unlock(css, cpu, flags, true);
}
/**
- * cgroup_rstat_push_children - push children cgroups into the given list
+ * css_rstat_push_children - push children css's into the given list
* @head: current head of the list (= subtree root)
* @child: first child of the root
* @cpu: target cpu
- * Return: A new singly linked list of cgroups to be flush
+ * Return: A new singly linked list of css's to be flushed
*
- * Iteratively traverse down the cgroup_rstat_cpu updated tree level by
+ * Iteratively traverse down the css_rstat_cpu updated tree level by
* level and push all the parents first before their next level children
- * into a singly linked list built from the tail backward like "pushing"
- * cgroups into a stack. The root is pushed by the caller.
+ * into a singly linked list via the rstat_flush_next pointer built from the
+ * tail backward like "pushing" css's into a stack. The root is pushed by
+ * the caller.
*/
-static struct cgroup *cgroup_rstat_push_children(struct cgroup *head,
- struct cgroup *child, int cpu)
+static struct cgroup_subsys_state *css_rstat_push_children(
+ struct cgroup_subsys_state *head,
+ struct cgroup_subsys_state *child, int cpu)
{
- struct cgroup *chead = child; /* Head of child cgroup level */
- struct cgroup *ghead = NULL; /* Head of grandchild cgroup level */
- struct cgroup *parent, *grandchild;
- struct cgroup_rstat_cpu *crstatc;
+ struct cgroup_subsys_state *cnext = child; /* Next head of child css level */
+ struct cgroup_subsys_state *ghead = NULL; /* Head of grandchild css level */
+ struct cgroup_subsys_state *parent, *grandchild;
+ struct css_rstat_cpu *crstatc;
child->rstat_flush_next = NULL;
+ /*
+ * The subsystem rstat lock must be held for the whole duration from
+ * here as the rstat_flush_next list is being constructed to when
+ * it is consumed later in css_rstat_flush().
+ */
+ lockdep_assert_held(ss_rstat_lock(head->ss));
+
+ /*
+ * Notation: -> updated_next pointer
+ * => rstat_flush_next pointer
+ *
+ * Assuming the following sample updated_children lists:
+ * P: C1 -> C2 -> P
+ * C1: G11 -> G12 -> C1
+ * C2: G21 -> G22 -> C2
+ *
+ * After 1st iteration:
+ * head => C2 => C1 => NULL
+ * ghead => G21 => G11 => NULL
+ *
+ * After 2nd iteration:
+ * head => G12 => G11 => G22 => G21 => C2 => C1 => NULL
+ */
next_level:
- while (chead) {
- child = chead;
- chead = child->rstat_flush_next;
- parent = cgroup_parent(child);
+ while (cnext) {
+ child = cnext;
+ cnext = child->rstat_flush_next;
+ parent = child->parent;
- /* updated_next is parent cgroup terminated */
+ /* updated_next is parent cgroup terminated if !NULL */
while (child != parent) {
child->rstat_flush_next = head;
head = child;
- crstatc = cgroup_rstat_cpu(child, cpu);
+ crstatc = css_rstat_cpu(child, cpu);
grandchild = crstatc->updated_children;
if (grandchild != child) {
/* Push the grand child to the next level */
@@ -175,7 +246,7 @@ next_level:
}
if (ghead) {
- chead = ghead;
+ cnext = ghead;
ghead = NULL;
goto next_level;
}
@@ -183,31 +254,31 @@ next_level:
}
/**
- * cgroup_rstat_updated_list - return a list of updated cgroups to be flushed
- * @root: root of the cgroup subtree to traverse
+ * css_rstat_updated_list - build a list of updated css's to be flushed
+ * @root: root of the css subtree to traverse
* @cpu: target cpu
- * Return: A singly linked list of cgroups to be flushed
+ * Return: A singly linked list of css's to be flushed
*
* Walks the updated rstat_cpu tree on @cpu from @root. During traversal,
- * each returned cgroup is unlinked from the updated tree.
+ * each returned css is unlinked from the updated tree.
*
* The only ordering guarantee is that, for a parent and a child pair
* covered by a given traversal, the child is before its parent in
* the list.
*
* Note that updated_children is self terminated and points to a list of
- * child cgroups if not empty. Whereas updated_next is like a sibling link
- * within the children list and terminated by the parent cgroup. An exception
- * here is the cgroup root whose updated_next can be self terminated.
+ * child css's if not empty. Whereas updated_next is like a sibling link
+ * within the children list and terminated by the parent css. An exception
+ * here is the css root whose updated_next can be self terminated.
*/
-static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
+static struct cgroup_subsys_state *css_rstat_updated_list(
+ struct cgroup_subsys_state *root, int cpu)
{
- raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
- struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(root, cpu);
- struct cgroup *head = NULL, *parent, *child;
+ struct css_rstat_cpu *rstatc = css_rstat_cpu(root, cpu);
+ struct cgroup_subsys_state *head = NULL, *parent, *child;
unsigned long flags;
- flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, root, false);
+ flags = _css_rstat_cpu_lock(root, cpu, false);
/* Return NULL if this subtree is not on-list */
if (!rstatc->updated_next)
@@ -217,17 +288,17 @@ static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
* Unlink @root from its parent. As the updated_children list is
* singly linked, we have to walk it to find the removal point.
*/
- parent = cgroup_parent(root);
+ parent = root->parent;
if (parent) {
- struct cgroup_rstat_cpu *prstatc;
- struct cgroup **nextp;
+ struct css_rstat_cpu *prstatc;
+ struct cgroup_subsys_state **nextp;
- prstatc = cgroup_rstat_cpu(parent, cpu);
+ prstatc = css_rstat_cpu(parent, cpu);
nextp = &prstatc->updated_children;
while (*nextp != root) {
- struct cgroup_rstat_cpu *nrstatc;
+ struct css_rstat_cpu *nrstatc;
- nrstatc = cgroup_rstat_cpu(*nextp, cpu);
+ nrstatc = css_rstat_cpu(*nextp, cpu);
WARN_ON_ONCE(*nextp == parent);
nextp = &nrstatc->updated_next;
}
@@ -242,16 +313,16 @@ static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
child = rstatc->updated_children;
rstatc->updated_children = root;
if (child != root)
- head = cgroup_rstat_push_children(head, child, cpu);
+ head = css_rstat_push_children(head, child, cpu);
unlock_ret:
- _cgroup_rstat_cpu_unlock(cpu_lock, cpu, root, flags, false);
+ _css_rstat_cpu_unlock(root, cpu, flags, false);
return head;
}
/*
* A hook for bpf stat collectors to attach to and flush their stats.
- * Together with providing bpf kfuncs for cgroup_rstat_updated() and
- * cgroup_rstat_flush(), this enables a complete workflow where bpf progs that
+ * Together with providing bpf kfuncs for css_rstat_updated() and
+ * css_rstat_flush(), this enables a complete workflow where bpf progs that
* collect cgroup stats can integrate with rstat for efficient flushing.
*
* A static noinline declaration here could cause the compiler to optimize away
@@ -271,7 +342,7 @@ __weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
__bpf_hook_end();
/*
- * Helper functions for locking cgroup_rstat_lock.
+ * Helper functions for locking.
*
* This makes it easier to diagnose locking issues and contention in
* production environments. The parameter @cpu_in_loop indicate lock
@@ -279,115 +350,186 @@ __bpf_hook_end();
* value -1 is used when obtaining the main lock else this is the CPU
* number processed last.
*/
-static inline void __cgroup_rstat_lock(struct cgroup *cgrp, int cpu_in_loop)
- __acquires(&cgroup_rstat_lock)
+static inline void __css_rstat_lock(struct cgroup_subsys_state *css,
+ int cpu_in_loop)
+ __acquires(ss_rstat_lock(css->ss))
{
+ struct cgroup *cgrp = css->cgroup;
+ spinlock_t *lock;
bool contended;
- contended = !spin_trylock_irq(&cgroup_rstat_lock);
+ lock = ss_rstat_lock(css->ss);
+ contended = !spin_trylock_irq(lock);
if (contended) {
trace_cgroup_rstat_lock_contended(cgrp, cpu_in_loop, contended);
- spin_lock_irq(&cgroup_rstat_lock);
+ spin_lock_irq(lock);
}
trace_cgroup_rstat_locked(cgrp, cpu_in_loop, contended);
}
-static inline void __cgroup_rstat_unlock(struct cgroup *cgrp, int cpu_in_loop)
- __releases(&cgroup_rstat_lock)
+static inline void __css_rstat_unlock(struct cgroup_subsys_state *css,
+ int cpu_in_loop)
+ __releases(ss_rstat_lock(css->ss))
{
+ struct cgroup *cgrp = css->cgroup;
+ spinlock_t *lock;
+
+ lock = ss_rstat_lock(css->ss);
trace_cgroup_rstat_unlock(cgrp, cpu_in_loop, false);
- spin_unlock_irq(&cgroup_rstat_lock);
+ spin_unlock_irq(lock);
}
/**
- * cgroup_rstat_flush - flush stats in @cgrp's subtree
- * @cgrp: target cgroup
+ * css_rstat_flush - flush stats in @css's rstat subtree
+ * @css: target cgroup subsystem state
*
- * Collect all per-cpu stats in @cgrp's subtree into the global counters
- * and propagate them upwards. After this function returns, all cgroups in
- * the subtree have up-to-date ->stat.
+ * Collect all per-cpu stats in @css's subtree into the global counters
+ * and propagate them upwards. After this function returns, all rstat
+ * nodes in the subtree have up-to-date ->stat.
*
- * This also gets all cgroups in the subtree including @cgrp off the
+ * This also gets all rstat nodes in the subtree including @css off the
* ->updated_children lists.
*
* This function may block.
*/
-__bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
+__bpf_kfunc void css_rstat_flush(struct cgroup_subsys_state *css)
{
int cpu;
+ bool is_self = css_is_self(css);
+
+ /*
+ * Since bpf programs can call this function, prevent access to
+ * uninitialized rstat pointers.
+ */
+ if (!css_uses_rstat(css))
+ return;
might_sleep();
for_each_possible_cpu(cpu) {
- struct cgroup *pos;
+ struct cgroup_subsys_state *pos;
/* Reacquire for each CPU to avoid disabling IRQs too long */
- __cgroup_rstat_lock(cgrp, cpu);
- pos = cgroup_rstat_updated_list(cgrp, cpu);
+ __css_rstat_lock(css, cpu);
+ pos = css_rstat_updated_list(css, cpu);
for (; pos; pos = pos->rstat_flush_next) {
- struct cgroup_subsys_state *css;
-
- cgroup_base_stat_flush(pos, cpu);
- bpf_rstat_flush(pos, cgroup_parent(pos), cpu);
-
- rcu_read_lock();
- list_for_each_entry_rcu(css, &pos->rstat_css_list,
- rstat_css_node)
- css->ss->css_rstat_flush(css, cpu);
- rcu_read_unlock();
+ if (is_self) {
+ cgroup_base_stat_flush(pos->cgroup, cpu);
+ bpf_rstat_flush(pos->cgroup,
+ cgroup_parent(pos->cgroup), cpu);
+ } else
+ pos->ss->css_rstat_flush(pos, cpu);
}
- __cgroup_rstat_unlock(cgrp, cpu);
+ __css_rstat_unlock(css, cpu);
if (!cond_resched())
cpu_relax();
}
}
-int cgroup_rstat_init(struct cgroup *cgrp)
+int css_rstat_init(struct cgroup_subsys_state *css)
{
+ struct cgroup *cgrp = css->cgroup;
int cpu;
+ bool is_self = css_is_self(css);
+
+ if (is_self) {
+ /* the root cgrp has rstat_base_cpu preallocated */
+ if (!cgrp->rstat_base_cpu) {
+ cgrp->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu);
+ if (!cgrp->rstat_base_cpu)
+ return -ENOMEM;
+ }
+ } else if (css->ss->css_rstat_flush == NULL)
+ return 0;
+
+ /* the root cgrp's self css has rstat_cpu preallocated */
+ if (!css->rstat_cpu) {
+ css->rstat_cpu = alloc_percpu(struct css_rstat_cpu);
+ if (!css->rstat_cpu) {
+ if (is_self)
+ free_percpu(cgrp->rstat_base_cpu);
- /* the root cgrp has rstat_cpu preallocated */
- if (!cgrp->rstat_cpu) {
- cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
- if (!cgrp->rstat_cpu)
return -ENOMEM;
+ }
}
/* ->updated_children list is self terminated */
for_each_possible_cpu(cpu) {
- struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
+ struct css_rstat_cpu *rstatc = css_rstat_cpu(css, cpu);
+
+ rstatc->updated_children = css;
+
+ if (is_self) {
+ struct cgroup_rstat_base_cpu *rstatbc;
- rstatc->updated_children = cgrp;
- u64_stats_init(&rstatc->bsync);
+ rstatbc = cgroup_rstat_base_cpu(cgrp, cpu);
+ u64_stats_init(&rstatbc->bsync);
+ }
}
return 0;
}
-void cgroup_rstat_exit(struct cgroup *cgrp)
+void css_rstat_exit(struct cgroup_subsys_state *css)
{
int cpu;
- cgroup_rstat_flush(cgrp);
+ if (!css_uses_rstat(css))
+ return;
+
+ css_rstat_flush(css);
/* sanity check */
for_each_possible_cpu(cpu) {
- struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
+ struct css_rstat_cpu *rstatc = css_rstat_cpu(css, cpu);
- if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
+ if (WARN_ON_ONCE(rstatc->updated_children != css) ||
WARN_ON_ONCE(rstatc->updated_next))
return;
}
- free_percpu(cgrp->rstat_cpu);
- cgrp->rstat_cpu = NULL;
+ if (css_is_self(css)) {
+ struct cgroup *cgrp = css->cgroup;
+
+ free_percpu(cgrp->rstat_base_cpu);
+ cgrp->rstat_base_cpu = NULL;
+ }
+
+ free_percpu(css->rstat_cpu);
+ css->rstat_cpu = NULL;
}
-void __init cgroup_rstat_boot(void)
+/**
+ * ss_rstat_init - subsystem-specific rstat initialization
+ * @ss: target subsystem
+ *
+ * If @ss is NULL, the static locks associated with the base stats
+ * are initialized. If @ss is non-NULL, the subsystem-specific locks
+ * are initialized.
+ */
+int __init ss_rstat_init(struct cgroup_subsys *ss)
{
int cpu;
+#ifdef CONFIG_SMP
+ /*
+ * On uniprocessor machines, arch_spinlock_t is defined as an empty
+ * struct. Avoid allocating a size of zero by having this block
+ * excluded in this case. It's acceptable to leave the subsystem locks
+ * unitialized since the associated lock functions are no-ops in the
+ * non-smp case.
+ */
+ if (ss) {
+ ss->rstat_ss_cpu_lock = alloc_percpu(raw_spinlock_t);
+ if (!ss->rstat_ss_cpu_lock)
+ return -ENOMEM;
+ }
+#endif
+
+ spin_lock_init(ss_rstat_lock(ss));
for_each_possible_cpu(cpu)
- raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
+ raw_spin_lock_init(ss_rstat_cpu_lock(ss, cpu));
+
+ return 0;
}
/*
@@ -420,9 +562,9 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
{
- struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
+ struct cgroup_rstat_base_cpu *rstatbc = cgroup_rstat_base_cpu(cgrp, cpu);
struct cgroup *parent = cgroup_parent(cgrp);
- struct cgroup_rstat_cpu *prstatc;
+ struct cgroup_rstat_base_cpu *prstatbc;
struct cgroup_base_stat delta;
unsigned seq;
@@ -432,15 +574,15 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
/* fetch the current per-cpu values */
do {
- seq = __u64_stats_fetch_begin(&rstatc->bsync);
- delta = rstatc->bstat;
- } while (__u64_stats_fetch_retry(&rstatc->bsync, seq));
+ seq = __u64_stats_fetch_begin(&rstatbc->bsync);
+ delta = rstatbc->bstat;
+ } while (__u64_stats_fetch_retry(&rstatbc->bsync, seq));
/* propagate per-cpu delta to cgroup and per-cpu global statistics */
- cgroup_base_stat_sub(&delta, &rstatc->last_bstat);
+ cgroup_base_stat_sub(&delta, &rstatbc->last_bstat);
cgroup_base_stat_add(&cgrp->bstat, &delta);
- cgroup_base_stat_add(&rstatc->last_bstat, &delta);
- cgroup_base_stat_add(&rstatc->subtree_bstat, &delta);
+ cgroup_base_stat_add(&rstatbc->last_bstat, &delta);
+ cgroup_base_stat_add(&rstatbc->subtree_bstat, &delta);
/* propagate cgroup and per-cpu global delta to parent (unless that's root) */
if (cgroup_parent(parent)) {
@@ -449,73 +591,73 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
cgroup_base_stat_add(&parent->bstat, &delta);
cgroup_base_stat_add(&cgrp->last_bstat, &delta);
- delta = rstatc->subtree_bstat;
- prstatc = cgroup_rstat_cpu(parent, cpu);
- cgroup_base_stat_sub(&delta, &rstatc->last_subtree_bstat);
- cgroup_base_stat_add(&prstatc->subtree_bstat, &delta);
- cgroup_base_stat_add(&rstatc->last_subtree_bstat, &delta);
+ delta = rstatbc->subtree_bstat;
+ prstatbc = cgroup_rstat_base_cpu(parent, cpu);
+ cgroup_base_stat_sub(&delta, &rstatbc->last_subtree_bstat);
+ cgroup_base_stat_add(&prstatbc->subtree_bstat, &delta);
+ cgroup_base_stat_add(&rstatbc->last_subtree_bstat, &delta);
}
}
-static struct cgroup_rstat_cpu *
+static struct cgroup_rstat_base_cpu *
cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags)
{
- struct cgroup_rstat_cpu *rstatc;
+ struct cgroup_rstat_base_cpu *rstatbc;
- rstatc = get_cpu_ptr(cgrp->rstat_cpu);
- *flags = u64_stats_update_begin_irqsave(&rstatc->bsync);
- return rstatc;
+ rstatbc = get_cpu_ptr(cgrp->rstat_base_cpu);
+ *flags = u64_stats_update_begin_irqsave(&rstatbc->bsync);
+ return rstatbc;
}
static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
- struct cgroup_rstat_cpu *rstatc,
+ struct cgroup_rstat_base_cpu *rstatbc,
unsigned long flags)
{
- u64_stats_update_end_irqrestore(&rstatc->bsync, flags);
- cgroup_rstat_updated(cgrp, smp_processor_id());
- put_cpu_ptr(rstatc);
+ u64_stats_update_end_irqrestore(&rstatbc->bsync, flags);
+ css_rstat_updated(&cgrp->self, smp_processor_id());
+ put_cpu_ptr(rstatbc);
}
void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
{
- struct cgroup_rstat_cpu *rstatc;
+ struct cgroup_rstat_base_cpu *rstatbc;
unsigned long flags;
- rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
- rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
- cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
+ rstatbc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
+ rstatbc->bstat.cputime.sum_exec_runtime += delta_exec;
+ cgroup_base_stat_cputime_account_end(cgrp, rstatbc, flags);
}
void __cgroup_account_cputime_field(struct cgroup *cgrp,
enum cpu_usage_stat index, u64 delta_exec)
{
- struct cgroup_rstat_cpu *rstatc;
+ struct cgroup_rstat_base_cpu *rstatbc;
unsigned long flags;
- rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
+ rstatbc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
switch (index) {
case CPUTIME_NICE:
- rstatc->bstat.ntime += delta_exec;
+ rstatbc->bstat.ntime += delta_exec;
fallthrough;
case CPUTIME_USER:
- rstatc->bstat.cputime.utime += delta_exec;
+ rstatbc->bstat.cputime.utime += delta_exec;
break;
case CPUTIME_SYSTEM:
case CPUTIME_IRQ:
case CPUTIME_SOFTIRQ:
- rstatc->bstat.cputime.stime += delta_exec;
+ rstatbc->bstat.cputime.stime += delta_exec;
break;
#ifdef CONFIG_SCHED_CORE
case CPUTIME_FORCEIDLE:
- rstatc->bstat.forceidle_sum += delta_exec;
+ rstatbc->bstat.forceidle_sum += delta_exec;
break;
#endif
default:
break;
}
- cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
+ cgroup_base_stat_cputime_account_end(cgrp, rstatbc, flags);
}
/*
@@ -574,12 +716,12 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
struct cgroup_base_stat bstat;
if (cgroup_parent(cgrp)) {
- cgroup_rstat_flush(cgrp);
- __cgroup_rstat_lock(cgrp, -1);
+ css_rstat_flush(&cgrp->self);
+ __css_rstat_lock(&cgrp->self, -1);
bstat = cgrp->bstat;
cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
&bstat.cputime.utime, &bstat.cputime.stime);
- __cgroup_rstat_unlock(cgrp, -1);
+ __css_rstat_unlock(&cgrp->self, -1);
} else {
root_cgroup_cputime(&bstat);
}
@@ -601,10 +743,10 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
cgroup_force_idle_show(seq, &bstat);
}
-/* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */
+/* Add bpf kfuncs for css_rstat_updated() and css_rstat_flush() */
BTF_KFUNCS_START(bpf_rstat_kfunc_ids)
-BTF_ID_FLAGS(func, cgroup_rstat_updated)
-BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, css_rstat_updated)
+BTF_ID_FLAGS(func, css_rstat_flush, KF_SLEEPABLE)
BTF_KFUNCS_END(bpf_rstat_kfunc_ids)
static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = {
diff --git a/kernel/configs/xen.config b/kernel/configs/xen.config
index 6878b9a49be8..1875a0a5047a 100644
--- a/kernel/configs/xen.config
+++ b/kernel/configs/xen.config
@@ -13,6 +13,8 @@ CONFIG_SCSI=y
CONFIG_FB=y
CONFIG_INPUT_MISC=y
CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_ZONE_DEVICE=y
CONFIG_TTY=y
# Technically not required but otherwise produces
# pretty useless systems starting from allnoconfig
@@ -47,3 +49,4 @@ CONFIG_XEN_GNTDEV=m
CONFIG_XEN_GRANT_DEV_ALLOC=m
CONFIG_SWIOTLB_XEN=y
CONFIG_XEN_PRIVCMD=m
+CONFIG_XEN_UNPOPULATED_ALLOC=y
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b08bb34b1718..a59e009e0be4 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2069,11 +2069,6 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.teardown.single = NULL,
.cant_stop = true,
},
- [CPUHP_PERF_PREPARE] = {
- .name = "perf:prepare",
- .startup.single = perf_event_init_cpu,
- .teardown.single = perf_event_exit_cpu,
- },
[CPUHP_RANDOM_PREPARE] = {
.name = "random:prepare",
.startup.single = random_prepare_cpu,
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index b8fe0b3d0ffb..24c359d9c879 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -13,6 +13,7 @@
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
#include <linux/slab.h>
+#include <linux/pci-p2pdma.h>
#include "direct.h"
/*
@@ -462,34 +463,33 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
struct pci_p2pdma_map_state p2pdma_state = {};
- enum pci_p2pdma_map_type map;
struct scatterlist *sg;
int i, ret;
for_each_sg(sgl, sg, nents, i) {
- if (is_pci_p2pdma_page(sg_page(sg))) {
- map = pci_p2pdma_map_segment(&p2pdma_state, dev, sg);
- switch (map) {
- case PCI_P2PDMA_MAP_BUS_ADDR:
- continue;
- case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
- /*
- * Any P2P mapping that traverses the PCI
- * host bridge must be mapped with CPU physical
- * address and not PCI bus addresses. This is
- * done with dma_direct_map_page() below.
- */
- break;
- default:
- ret = -EREMOTEIO;
+ switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) {
+ case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+ /*
+ * Any P2P mapping that traverses the PCI host bridge
+ * must be mapped with CPU physical address and not PCI
+ * bus addresses.
+ */
+ break;
+ case PCI_P2PDMA_MAP_NONE:
+ sg->dma_address = dma_direct_map_page(dev, sg_page(sg),
+ sg->offset, sg->length, dir, attrs);
+ if (sg->dma_address == DMA_MAPPING_ERROR) {
+ ret = -EIO;
goto out_unmap;
}
- }
-
- sg->dma_address = dma_direct_map_page(dev, sg_page(sg),
- sg->offset, sg->length, dir, attrs);
- if (sg->dma_address == DMA_MAPPING_ERROR) {
- ret = -EIO;
+ break;
+ case PCI_P2PDMA_MAP_BUS_ADDR:
+ sg->dma_address = pci_p2pdma_bus_addr_map(&p2pdma_state,
+ sg_phys(sg));
+ sg_dma_mark_bus_address(sg);
+ continue;
+ default:
+ ret = -EREMOTEIO;
goto out_unmap;
}
sg_dma_len(sg) = sg->length;
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 051a32988040..107e4a4d251d 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -443,6 +443,24 @@ bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
}
EXPORT_SYMBOL_GPL(__dma_need_sync);
+/**
+ * dma_need_unmap - does this device need dma_unmap_* operations
+ * @dev: device to check
+ *
+ * If this function returns %false, drivers can skip calling dma_unmap_* after
+ * finishing an I/O. This function must be called after all mappings that might
+ * need to be unmapped have been performed.
+ */
+bool dma_need_unmap(struct device *dev)
+{
+ if (!dma_map_direct(dev, get_dma_ops(dev)))
+ return true;
+ if (!dev->dma_skip_sync)
+ return true;
+ return IS_ENABLED(CONFIG_DMA_API_DEBUG);
+}
+EXPORT_SYMBOL_GPL(dma_need_unmap);
+
static void dma_setup_need_sync(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 20154572ede9..a8dd1f27417c 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -146,7 +146,7 @@ static inline bool report_single_step(unsigned long work)
return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
}
-static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
+void syscall_exit_work(struct pt_regs *regs, unsigned long work)
{
bool step;
@@ -173,53 +173,6 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
ptrace_report_syscall_exit(regs, step);
}
-/*
- * Syscall specific exit to user mode preparation. Runs with interrupts
- * enabled.
- */
-static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
-{
- unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
- unsigned long nr = syscall_get_nr(current, regs);
-
- CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
-
- if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
- if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
- local_irq_enable();
- }
-
- rseq_syscall(regs);
-
- /*
- * Do one-time syscall specific work. If these work items are
- * enabled, we want to run them exactly once per syscall exit with
- * interrupts enabled.
- */
- if (unlikely(work & SYSCALL_WORK_EXIT))
- syscall_exit_work(regs, work);
-}
-
-static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
-{
- syscall_exit_to_user_mode_prepare(regs);
- local_irq_disable_exit_to_user();
- exit_to_user_mode_prepare(regs);
-}
-
-void syscall_exit_to_user_mode_work(struct pt_regs *regs)
-{
- __syscall_exit_to_user_mode_work(regs);
-}
-
-__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
-{
- instrumentation_begin();
- __syscall_exit_to_user_mode_work(regs);
- instrumentation_end();
- exit_to_user_mode();
-}
-
noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
{
enter_from_user_mode(regs);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 95e703891b24..f34c99f8ce8f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1270,6 +1270,10 @@ static void put_ctx(struct perf_event_context *ctx)
if (ctx->task && ctx->task != TASK_TOMBSTONE)
put_task_struct(ctx->task);
call_rcu(&ctx->rcu_head, free_ctx);
+ } else {
+ smp_mb__after_atomic(); /* pairs with wait_var_event() */
+ if (ctx->task == TASK_TOMBSTONE)
+ wake_up_var(&ctx->refcount);
}
}
@@ -2167,7 +2171,7 @@ static void perf_put_aux_event(struct perf_event *event)
* If the event is an aux_event, tear down all links to
* it from other events.
*/
- for_each_sibling_event(iter, event->group_leader) {
+ for_each_sibling_event(iter, event) {
if (iter->aux_event != event)
continue;
@@ -2325,7 +2329,11 @@ static void perf_child_detach(struct perf_event *event)
if (WARN_ON_ONCE(!parent_event))
return;
+ /*
+ * Can't check this from an IPI, the holder is likey another CPU.
+ *
lockdep_assert_held(&parent_event->child_mutex);
+ */
sync_child_event(event);
list_del_init(&event->child_list);
@@ -2343,6 +2351,11 @@ event_filter_match(struct perf_event *event)
perf_cgroup_match(event);
}
+static inline bool is_event_in_freq_mode(struct perf_event *event)
+{
+ return event->attr.freq && event->attr.sample_freq;
+}
+
static void
event_sched_out(struct perf_event *event, struct perf_event_context *ctx)
{
@@ -2380,7 +2393,7 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx)
if (!is_software_event(event))
cpc->active_oncpu--;
- if (event->attr.freq && event->attr.sample_freq) {
+ if (is_event_in_freq_mode(event)) {
ctx->nr_freq--;
epc->nr_freq--;
}
@@ -2450,8 +2463,9 @@ ctx_time_update_event(struct perf_event_context *ctx, struct perf_event *event)
#define DETACH_GROUP 0x01UL
#define DETACH_CHILD 0x02UL
-#define DETACH_DEAD 0x04UL
-#define DETACH_EXIT 0x08UL
+#define DETACH_EXIT 0x04UL
+#define DETACH_REVOKE 0x08UL
+#define DETACH_DEAD 0x10UL
/*
* Cross CPU call to remove a performance event
@@ -2477,12 +2491,15 @@ __perf_remove_from_context(struct perf_event *event,
*/
if (flags & DETACH_EXIT)
state = PERF_EVENT_STATE_EXIT;
+ if (flags & DETACH_REVOKE)
+ state = PERF_EVENT_STATE_REVOKED;
if (flags & DETACH_DEAD) {
event->pending_disable = 1;
state = PERF_EVENT_STATE_DEAD;
}
event_sched_out(event, ctx);
perf_event_set_state(event, min(event->state, state));
+
if (flags & DETACH_GROUP)
perf_group_detach(event);
if (flags & DETACH_CHILD)
@@ -2628,6 +2645,41 @@ void perf_event_disable_inatomic(struct perf_event *event)
static void perf_log_throttle(struct perf_event *event, int enable);
static void perf_log_itrace_start(struct perf_event *event);
+static void perf_event_unthrottle(struct perf_event *event, bool start)
+{
+ event->hw.interrupts = 0;
+ if (start)
+ event->pmu->start(event, 0);
+ if (event == event->group_leader)
+ perf_log_throttle(event, 1);
+}
+
+static void perf_event_throttle(struct perf_event *event)
+{
+ event->pmu->stop(event, 0);
+ event->hw.interrupts = MAX_INTERRUPTS;
+ if (event == event->group_leader)
+ perf_log_throttle(event, 0);
+}
+
+static void perf_event_unthrottle_group(struct perf_event *event, bool skip_start_event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+
+ perf_event_unthrottle(leader, skip_start_event ? leader != event : true);
+ for_each_sibling_event(sibling, leader)
+ perf_event_unthrottle(sibling, skip_start_event ? sibling != event : true);
+}
+
+static void perf_event_throttle_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+
+ perf_event_throttle(leader);
+ for_each_sibling_event(sibling, leader)
+ perf_event_throttle(sibling);
+}
+
static int
event_sched_in(struct perf_event *event, struct perf_event_context *ctx)
{
@@ -2656,10 +2708,8 @@ event_sched_in(struct perf_event *event, struct perf_event_context *ctx)
* ticks already, also for a heavily scheduling task there is little
* guarantee it'll get a tick in a timely manner.
*/
- if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
- perf_log_throttle(event, 1);
- event->hw.interrupts = 0;
- }
+ if (unlikely(event->hw.interrupts == MAX_INTERRUPTS))
+ perf_event_unthrottle(event, false);
perf_pmu_disable(event->pmu);
@@ -2674,7 +2724,7 @@ event_sched_in(struct perf_event *event, struct perf_event_context *ctx)
if (!is_software_event(event))
cpc->active_oncpu++;
- if (event->attr.freq && event->attr.sample_freq) {
+ if (is_event_in_freq_mode(event)) {
ctx->nr_freq++;
epc->nr_freq++;
}
@@ -4237,14 +4287,10 @@ static void perf_adjust_freq_unthr_events(struct list_head *event_list)
hwc = &event->hw;
- if (hwc->interrupts == MAX_INTERRUPTS) {
- hwc->interrupts = 0;
- perf_log_throttle(event, 1);
- if (!event->attr.freq || !event->attr.sample_freq)
- event->pmu->start(event, 0);
- }
+ if (hwc->interrupts == MAX_INTERRUPTS)
+ perf_event_unthrottle_group(event, is_event_in_freq_mode(event));
- if (!event->attr.freq || !event->attr.sample_freq)
+ if (!is_event_in_freq_mode(event))
continue;
/*
@@ -4516,7 +4562,8 @@ out:
static void perf_remove_from_owner(struct perf_event *event);
static void perf_event_exit_event(struct perf_event *event,
- struct perf_event_context *ctx);
+ struct perf_event_context *ctx,
+ bool revoke);
/*
* Removes all events from the current task that have been marked
@@ -4543,7 +4590,7 @@ static void perf_event_remove_on_exec(struct perf_event_context *ctx)
modified = true;
- perf_event_exit_event(event, ctx);
+ perf_event_exit_event(event, ctx, false);
}
raw_spin_lock_irqsave(&ctx->lock, flags);
@@ -5125,6 +5172,7 @@ static bool is_sb_event(struct perf_event *event)
attr->context_switch || attr->text_poke ||
attr->bpf_event)
return true;
+
return false;
}
@@ -5521,6 +5569,8 @@ static void perf_free_addr_filters(struct perf_event *event);
/* vs perf_event_alloc() error */
static void __free_event(struct perf_event *event)
{
+ struct pmu *pmu = event->pmu;
+
if (event->attach_state & PERF_ATTACH_CALLCHAIN)
put_callchain_buffers();
@@ -5550,6 +5600,7 @@ static void __free_event(struct perf_event *event)
* put_pmu_ctx() needs an event->ctx reference, because of
* epc->ctx.
*/
+ WARN_ON_ONCE(!pmu);
WARN_ON_ONCE(!event->ctx);
WARN_ON_ONCE(event->pmu_ctx->ctx != event->ctx);
put_pmu_ctx(event->pmu_ctx);
@@ -5562,8 +5613,13 @@ static void __free_event(struct perf_event *event)
if (event->ctx)
put_ctx(event->ctx);
- if (event->pmu)
- module_put(event->pmu->module);
+ if (pmu) {
+ module_put(pmu->module);
+ scoped_guard (spinlock, &pmu->events_lock) {
+ list_del(&event->pmu_list);
+ wake_up_var(pmu);
+ }
+ }
call_rcu(&event->rcu_head, free_event_rcu);
}
@@ -5600,13 +5656,13 @@ static void _free_event(struct perf_event *event)
/*
* Used to free events which have a known refcount of 1, such as in error paths
- * where the event isn't exposed yet and inherited events.
+ * of inherited events.
*/
static void free_event(struct perf_event *event)
{
if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
- "unexpected event refcount: %ld; ptr=%p\n",
- atomic_long_read(&event->refcount), event)) {
+ "unexpected event refcount: %ld; ptr=%p\n",
+ atomic_long_read(&event->refcount), event)) {
/* leak to avoid use-after-free */
return;
}
@@ -5689,7 +5745,6 @@ int perf_event_release_kernel(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct perf_event *child, *tmp;
- LIST_HEAD(free_list);
/*
* If we got here through err_alloc: free_event(event); we will not
@@ -5718,15 +5773,17 @@ int perf_event_release_kernel(struct perf_event *event)
* Thus this guarantees that we will in fact observe and kill _ALL_
* child events.
*/
- perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD);
+ if (event->state > PERF_EVENT_STATE_REVOKED) {
+ perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD);
+ } else {
+ event->state = PERF_EVENT_STATE_DEAD;
+ }
perf_event_ctx_unlock(event, ctx);
again:
mutex_lock(&event->child_mutex);
list_for_each_entry(child, &event->child_list, child_list) {
- void *var = NULL;
-
/*
* Cannot change, child events are not migrated, see the
* comment with perf_event_ctx_lock_nested().
@@ -5759,44 +5816,24 @@ again:
tmp = list_first_entry_or_null(&event->child_list,
struct perf_event, child_list);
if (tmp == child) {
- perf_remove_from_context(child, DETACH_GROUP);
- list_move(&child->child_list, &free_list);
+ perf_remove_from_context(child, DETACH_GROUP | DETACH_CHILD);
} else {
- var = &ctx->refcount;
+ child = NULL;
}
mutex_unlock(&event->child_mutex);
mutex_unlock(&ctx->mutex);
- put_ctx(ctx);
- if (var) {
- /*
- * If perf_event_free_task() has deleted all events from the
- * ctx while the child_mutex got released above, make sure to
- * notify about the preceding put_ctx().
- */
- smp_mb(); /* pairs with wait_var_event() */
- wake_up_var(var);
+ if (child) {
+ /* Last reference unless ->pending_task work is pending */
+ put_event(child);
}
+ put_ctx(ctx);
+
goto again;
}
mutex_unlock(&event->child_mutex);
- list_for_each_entry_safe(child, tmp, &free_list, child_list) {
- void *var = &child->ctx->refcount;
-
- list_del(&child->child_list);
- /* Last reference unless ->pending_task work is pending */
- put_event(child);
-
- /*
- * Wake any perf_event_free_task() waiting for this event to be
- * freed.
- */
- smp_mb(); /* pairs with wait_var_event() */
- wake_up_var(var);
- }
-
no_ctx:
/*
* Last reference unless ->pending_task work is pending on this event
@@ -6068,8 +6105,14 @@ static __poll_t perf_poll(struct file *file, poll_table *wait)
struct perf_buffer *rb;
__poll_t events = EPOLLHUP;
+ if (event->state <= PERF_EVENT_STATE_REVOKED)
+ return EPOLLERR;
+
poll_wait(file, &event->waitq, wait);
+ if (event->state <= PERF_EVENT_STATE_REVOKED)
+ return EPOLLERR;
+
if (is_event_hup(event))
return events;
@@ -6167,14 +6210,6 @@ static void __perf_event_period(struct perf_event *event,
active = (event->state == PERF_EVENT_STATE_ACTIVE);
if (active) {
perf_pmu_disable(event->pmu);
- /*
- * We could be throttled; unthrottle now to avoid the tick
- * trying to unthrottle while we already re-started the event.
- */
- if (event->hw.interrupts == MAX_INTERRUPTS) {
- event->hw.interrupts = 0;
- perf_log_throttle(event, 1);
- }
event->pmu->stop(event, PERF_EF_UPDATE);
}
@@ -6182,6 +6217,14 @@ static void __perf_event_period(struct perf_event *event,
if (active) {
event->pmu->start(event, PERF_EF_RELOAD);
+ /*
+ * Once the period is force-reset, the event starts immediately.
+ * But the event/group could be throttled. Unthrottle the
+ * event/group now to avoid the next tick trying to unthrottle
+ * while we already re-started the event/group.
+ */
+ if (event->hw.interrupts == MAX_INTERRUPTS)
+ perf_event_unthrottle_group(event, true);
perf_pmu_enable(event->pmu);
}
}
@@ -6239,12 +6282,18 @@ static int perf_event_set_output(struct perf_event *event,
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static int perf_copy_attr(struct perf_event_attr __user *uattr,
struct perf_event_attr *attr);
+static int __perf_event_set_bpf_prog(struct perf_event *event,
+ struct bpf_prog *prog,
+ u64 bpf_cookie);
static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
{
void (*func)(struct perf_event *);
u32 flags = arg;
+ if (event->state <= PERF_EVENT_STATE_REVOKED)
+ return -ENODEV;
+
switch (cmd) {
case PERF_EVENT_IOC_ENABLE:
func = _perf_event_enable;
@@ -6301,7 +6350,7 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
if (IS_ERR(prog))
return PTR_ERR(prog);
- err = perf_event_set_bpf_prog(event, prog, 0);
+ err = __perf_event_set_bpf_prog(event, prog, 0);
if (err) {
bpf_prog_put(prog);
return err;
@@ -6620,9 +6669,22 @@ void ring_buffer_put(struct perf_buffer *rb)
call_rcu(&rb->rcu_head, rb_free_rcu);
}
+typedef void (*mapped_f)(struct perf_event *event, struct mm_struct *mm);
+
+#define get_mapped(event, func) \
+({ struct pmu *pmu; \
+ mapped_f f = NULL; \
+ guard(rcu)(); \
+ pmu = READ_ONCE(event->pmu); \
+ if (pmu) \
+ f = pmu->func; \
+ f; \
+})
+
static void perf_mmap_open(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
+ mapped_f mapped = get_mapped(event, event_mapped);
atomic_inc(&event->mmap_count);
atomic_inc(&event->rb->mmap_count);
@@ -6630,8 +6692,8 @@ static void perf_mmap_open(struct vm_area_struct *vma)
if (vma->vm_pgoff)
atomic_inc(&event->rb->aux_mmap_count);
- if (event->pmu->event_mapped)
- event->pmu->event_mapped(event, vma->vm_mm);
+ if (mapped)
+ mapped(event, vma->vm_mm);
}
static void perf_pmu_output_stop(struct perf_event *event);
@@ -6647,14 +6709,16 @@ static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
+ mapped_f unmapped = get_mapped(event, event_unmapped);
struct perf_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
bool detach_rest = false;
- if (event->pmu->event_unmapped)
- event->pmu->event_unmapped(event, vma->vm_mm);
+ /* FIXIES vs perf_pmu_unregister() */
+ if (unmapped)
+ unmapped(event, vma->vm_mm);
/*
* The AUX buffer is strictly a sub-buffer, serialize using aux_mutex
@@ -6847,6 +6911,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
unsigned long nr_pages;
long user_extra = 0, extra = 0;
int ret, flags = 0;
+ mapped_f mapped;
/*
* Don't allow mmap() of inherited per-task counters. This would
@@ -6877,6 +6942,16 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
mutex_lock(&event->mmap_mutex);
ret = -EINVAL;
+ /*
+ * This relies on __pmu_detach_event() taking mmap_mutex after marking
+ * the event REVOKED. Either we observe the state, or __pmu_detach_event()
+ * will detach the rb created here.
+ */
+ if (event->state <= PERF_EVENT_STATE_REVOKED) {
+ ret = -ENODEV;
+ goto unlock;
+ }
+
if (vma->vm_pgoff == 0) {
nr_pages -= 1;
@@ -7055,8 +7130,9 @@ aux_unlock:
if (!ret)
ret = map_range(rb, vma);
- if (!ret && event->pmu->event_mapped)
- event->pmu->event_mapped(event, vma->vm_mm);
+ mapped = get_mapped(event, event_mapped);
+ if (mapped)
+ mapped(event, vma->vm_mm);
return ret;
}
@@ -7067,6 +7143,9 @@ static int perf_fasync(int fd, struct file *filp, int on)
struct perf_event *event = filp->private_data;
int retval;
+ if (event->state <= PERF_EVENT_STATE_REVOKED)
+ return -ENODEV;
+
inode_lock(inode);
retval = fasync_helper(fd, filp, on, &event->fasync);
inode_unlock(inode);
@@ -9946,7 +10025,7 @@ void perf_event_text_poke(const void *addr, const void *old_bytes,
void perf_event_itrace_started(struct perf_event *event)
{
- event->attach_state |= PERF_ATTACH_ITRACE;
+ WRITE_ONCE(event->attach_state, event->attach_state | PERF_ATTACH_ITRACE);
}
static void perf_log_itrace_start(struct perf_event *event)
@@ -10029,14 +10108,13 @@ __perf_event_account_interrupt(struct perf_event *event, int throttle)
hwc->interrupts = 1;
} else {
hwc->interrupts++;
- if (unlikely(throttle &&
- hwc->interrupts > max_samples_per_tick)) {
- __this_cpu_inc(perf_throttled_count);
- tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
- hwc->interrupts = MAX_INTERRUPTS;
- perf_log_throttle(event, 0);
- ret = 1;
- }
+ }
+
+ if (unlikely(throttle && hwc->interrupts >= max_samples_per_tick)) {
+ __this_cpu_inc(perf_throttled_count);
+ tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
+ perf_event_throttle_group(event);
+ ret = 1;
}
if (event->attr.freq) {
@@ -11069,11 +11147,15 @@ static inline bool perf_event_is_tracing(struct perf_event *event)
return false;
}
-int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
- u64 bpf_cookie)
+static int __perf_event_set_bpf_prog(struct perf_event *event,
+ struct bpf_prog *prog,
+ u64 bpf_cookie)
{
bool is_kprobe, is_uprobe, is_tracepoint, is_syscall_tp;
+ if (event->state <= PERF_EVENT_STATE_REVOKED)
+ return -ENODEV;
+
if (!perf_event_is_tracing(event))
return perf_event_set_bpf_handler(event, prog, bpf_cookie);
@@ -11108,6 +11190,20 @@ int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
return perf_event_attach_bpf_prog(event, prog, bpf_cookie);
}
+int perf_event_set_bpf_prog(struct perf_event *event,
+ struct bpf_prog *prog,
+ u64 bpf_cookie)
+{
+ struct perf_event_context *ctx;
+ int ret;
+
+ ctx = perf_event_ctx_lock(event);
+ ret = __perf_event_set_bpf_prog(event, prog, bpf_cookie);
+ perf_event_ctx_unlock(event, ctx);
+
+ return ret;
+}
+
void perf_event_free_bpf_prog(struct perf_event *event)
{
if (!event->prog)
@@ -11130,7 +11226,15 @@ static void perf_event_free_filter(struct perf_event *event)
{
}
-int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
+static int __perf_event_set_bpf_prog(struct perf_event *event,
+ struct bpf_prog *prog,
+ u64 bpf_cookie)
+{
+ return -ENOENT;
+}
+
+int perf_event_set_bpf_prog(struct perf_event *event,
+ struct bpf_prog *prog,
u64 bpf_cookie)
{
return -ENOENT;
@@ -12235,6 +12339,9 @@ int perf_pmu_register(struct pmu *_pmu, const char *name, int type)
if (!pmu->event_idx)
pmu->event_idx = perf_event_idx_default;
+ INIT_LIST_HEAD(&pmu->events);
+ spin_lock_init(&pmu->events_lock);
+
/*
* Now that the PMU is complete, make it visible to perf_try_init_event().
*/
@@ -12248,21 +12355,143 @@ int perf_pmu_register(struct pmu *_pmu, const char *name, int type)
}
EXPORT_SYMBOL_GPL(perf_pmu_register);
-void perf_pmu_unregister(struct pmu *pmu)
+static void __pmu_detach_event(struct pmu *pmu, struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+ /*
+ * De-schedule the event and mark it REVOKED.
+ */
+ perf_event_exit_event(event, ctx, true);
+
+ /*
+ * All _free_event() bits that rely on event->pmu:
+ *
+ * Notably, perf_mmap() relies on the ordering here.
+ */
+ scoped_guard (mutex, &event->mmap_mutex) {
+ WARN_ON_ONCE(pmu->event_unmapped);
+ /*
+ * Mostly an empty lock sequence, such that perf_mmap(), which
+ * relies on mmap_mutex, is sure to observe the state change.
+ */
+ }
+
+ perf_event_free_bpf_prog(event);
+ perf_free_addr_filters(event);
+
+ if (event->destroy) {
+ event->destroy(event);
+ event->destroy = NULL;
+ }
+
+ if (event->pmu_ctx) {
+ put_pmu_ctx(event->pmu_ctx);
+ event->pmu_ctx = NULL;
+ }
+
+ exclusive_event_destroy(event);
+ module_put(pmu->module);
+
+ event->pmu = NULL; /* force fault instead of UAF */
+}
+
+static void pmu_detach_event(struct pmu *pmu, struct perf_event *event)
+{
+ struct perf_event_context *ctx;
+
+ ctx = perf_event_ctx_lock(event);
+ __pmu_detach_event(pmu, event, ctx);
+ perf_event_ctx_unlock(event, ctx);
+
+ scoped_guard (spinlock, &pmu->events_lock)
+ list_del(&event->pmu_list);
+}
+
+static struct perf_event *pmu_get_event(struct pmu *pmu)
+{
+ struct perf_event *event;
+
+ guard(spinlock)(&pmu->events_lock);
+ list_for_each_entry(event, &pmu->events, pmu_list) {
+ if (atomic_long_inc_not_zero(&event->refcount))
+ return event;
+ }
+
+ return NULL;
+}
+
+static bool pmu_empty(struct pmu *pmu)
+{
+ guard(spinlock)(&pmu->events_lock);
+ return list_empty(&pmu->events);
+}
+
+static void pmu_detach_events(struct pmu *pmu)
+{
+ struct perf_event *event;
+
+ for (;;) {
+ event = pmu_get_event(pmu);
+ if (!event)
+ break;
+
+ pmu_detach_event(pmu, event);
+ put_event(event);
+ }
+
+ /*
+ * wait for pending _free_event()s
+ */
+ wait_var_event(pmu, pmu_empty(pmu));
+}
+
+int perf_pmu_unregister(struct pmu *pmu)
{
scoped_guard (mutex, &pmus_lock) {
+ if (!idr_cmpxchg(&pmu_idr, pmu->type, pmu, NULL))
+ return -EINVAL;
+
list_del_rcu(&pmu->entry);
- idr_remove(&pmu_idr, pmu->type);
}
/*
* We dereference the pmu list under both SRCU and regular RCU, so
* synchronize against both of those.
+ *
+ * Notably, the entirety of event creation, from perf_init_event()
+ * (which will now fail, because of the above) until
+ * perf_install_in_context() should be under SRCU such that
+ * this synchronizes against event creation. This avoids trying to
+ * detach events that are not fully formed.
*/
synchronize_srcu(&pmus_srcu);
synchronize_rcu();
+ if (pmu->event_unmapped && !pmu_empty(pmu)) {
+ /*
+ * Can't force remove events when pmu::event_unmapped()
+ * is used in perf_mmap_close().
+ */
+ guard(mutex)(&pmus_lock);
+ idr_cmpxchg(&pmu_idr, pmu->type, NULL, pmu);
+ list_add_rcu(&pmu->entry, &pmus);
+ return -EBUSY;
+ }
+
+ scoped_guard (mutex, &pmus_lock)
+ idr_remove(&pmu_idr, pmu->type);
+
+ /*
+ * PMU is removed from the pmus list, so no new events will
+ * be created, now take care of the existing ones.
+ */
+ pmu_detach_events(pmu);
+
+ /*
+ * PMU is unused, make it go away.
+ */
perf_pmu_free(pmu);
+ return 0;
}
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
@@ -12356,7 +12585,7 @@ static struct pmu *perf_init_event(struct perf_event *event)
struct pmu *pmu;
int type, ret;
- guard(srcu)(&pmus_srcu);
+ guard(srcu)(&pmus_srcu); /* pmu idr/list access */
/*
* Save original type before calling pmu->event_init() since certain
@@ -12580,6 +12809,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
INIT_LIST_HEAD(&event->active_entry);
INIT_LIST_HEAD(&event->addr_filters.list);
INIT_HLIST_NODE(&event->hlist_entry);
+ INIT_LIST_HEAD(&event->pmu_list);
init_waitqueue_head(&event->waitq);
@@ -12651,7 +12881,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
hwc = &event->hw;
hwc->sample_period = attr->sample_period;
- if (attr->freq && attr->sample_freq)
+ if (is_event_in_freq_mode(event))
hwc->sample_period = 1;
hwc->last_period = hwc->sample_period;
@@ -12758,6 +12988,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
/* symmetric to unaccount_event() in _free_event() */
account_event(event);
+ /*
+ * Event creation should be under SRCU, see perf_pmu_unregister().
+ */
+ lockdep_assert_held(&pmus_srcu);
+ scoped_guard (spinlock, &pmu->events_lock)
+ list_add(&event->pmu_list, &pmu->events);
+
return_ptr(event);
}
@@ -12957,6 +13194,9 @@ set:
goto unlock;
if (output_event) {
+ if (output_event->state <= PERF_EVENT_STATE_REVOKED)
+ goto unlock;
+
/* get the rb we want to redirect to */
rb = ring_buffer_get(output_event);
if (!rb)
@@ -13138,6 +13378,11 @@ SYSCALL_DEFINE5(perf_event_open,
if (event_fd < 0)
return event_fd;
+ /*
+ * Event creation should be under SRCU, see perf_pmu_unregister().
+ */
+ guard(srcu)(&pmus_srcu);
+
CLASS(fd, group)(group_fd); // group_fd == -1 => empty
if (group_fd != -1) {
if (!is_perf_file(group)) {
@@ -13145,6 +13390,10 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_fd;
}
group_leader = fd_file(group)->private_data;
+ if (group_leader->state <= PERF_EVENT_STATE_REVOKED) {
+ err = -ENODEV;
+ goto err_fd;
+ }
if (flags & PERF_FLAG_FD_OUTPUT)
output_event = group_leader;
if (flags & PERF_FLAG_FD_NO_GROUP)
@@ -13441,7 +13690,7 @@ err_cred:
if (task)
up_read(&task->signal->exec_update_lock);
err_alloc:
- free_event(event);
+ put_event(event);
err_task:
if (task)
put_task_struct(task);
@@ -13478,6 +13727,11 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
if (attr->aux_output || attr->aux_action)
return ERR_PTR(-EINVAL);
+ /*
+ * Event creation should be under SRCU, see perf_pmu_unregister().
+ */
+ guard(srcu)(&pmus_srcu);
+
event = perf_event_alloc(attr, cpu, task, NULL, NULL,
overflow_handler, context, -1);
if (IS_ERR(event)) {
@@ -13549,7 +13803,7 @@ err_unlock:
perf_unpin_context(ctx);
put_ctx(ctx);
err_alloc:
- free_event(event);
+ put_event(event);
err:
return ERR_PTR(err);
}
@@ -13689,10 +13943,12 @@ static void sync_child_event(struct perf_event *child_event)
}
static void
-perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
+perf_event_exit_event(struct perf_event *event,
+ struct perf_event_context *ctx, bool revoke)
{
struct perf_event *parent_event = event->parent;
- unsigned long detach_flags = 0;
+ unsigned long detach_flags = DETACH_EXIT;
+ unsigned int attach_state;
if (parent_event) {
/*
@@ -13707,22 +13963,38 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
* Do destroy all inherited groups, we don't care about those
* and being thorough is better.
*/
- detach_flags = DETACH_GROUP | DETACH_CHILD;
+ detach_flags |= DETACH_GROUP | DETACH_CHILD;
mutex_lock(&parent_event->child_mutex);
+ /* PERF_ATTACH_ITRACE might be set concurrently */
+ attach_state = READ_ONCE(event->attach_state);
}
- perf_remove_from_context(event, detach_flags | DETACH_EXIT);
+ if (revoke)
+ detach_flags |= DETACH_GROUP | DETACH_REVOKE;
+ perf_remove_from_context(event, detach_flags);
/*
* Child events can be freed.
*/
if (parent_event) {
mutex_unlock(&parent_event->child_mutex);
+
/*
- * Kick perf_poll() for is_event_hup();
+ * Match the refcount initialization. Make sure it doesn't happen
+ * twice if pmu_detach_event() calls it on an already exited task.
*/
- perf_event_wakeup(parent_event);
- put_event(event);
+ if (attach_state & PERF_ATTACH_CHILD) {
+ /*
+ * Kick perf_poll() for is_event_hup();
+ */
+ perf_event_wakeup(parent_event);
+ /*
+ * pmu_detach_event() will have an extra refcount.
+ * perf_pending_task() might have one too.
+ */
+ put_event(event);
+ }
+
return;
}
@@ -13732,15 +14004,13 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
perf_event_wakeup(event);
}
-static void perf_event_exit_task_context(struct task_struct *child)
+static void perf_event_exit_task_context(struct task_struct *task, bool exit)
{
- struct perf_event_context *child_ctx, *clone_ctx = NULL;
+ struct perf_event_context *ctx, *clone_ctx = NULL;
struct perf_event *child_event, *next;
- WARN_ON_ONCE(child != current);
-
- child_ctx = perf_pin_task_context(child);
- if (!child_ctx)
+ ctx = perf_pin_task_context(task);
+ if (!ctx)
return;
/*
@@ -13753,27 +14023,28 @@ static void perf_event_exit_task_context(struct task_struct *child)
* without ctx::mutex (it cannot because of the move_group double mutex
* lock thing). See the comments in perf_install_in_context().
*/
- mutex_lock(&child_ctx->mutex);
+ mutex_lock(&ctx->mutex);
/*
* In a single ctx::lock section, de-schedule the events and detach the
* context from the task such that we cannot ever get it scheduled back
* in.
*/
- raw_spin_lock_irq(&child_ctx->lock);
- task_ctx_sched_out(child_ctx, NULL, EVENT_ALL);
+ raw_spin_lock_irq(&ctx->lock);
+ if (exit)
+ task_ctx_sched_out(ctx, NULL, EVENT_ALL);
/*
* Now that the context is inactive, destroy the task <-> ctx relation
* and mark the context dead.
*/
- RCU_INIT_POINTER(child->perf_event_ctxp, NULL);
- put_ctx(child_ctx); /* cannot be last */
- WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
- put_task_struct(current); /* cannot be last */
+ RCU_INIT_POINTER(task->perf_event_ctxp, NULL);
+ put_ctx(ctx); /* cannot be last */
+ WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
+ put_task_struct(task); /* cannot be last */
- clone_ctx = unclone_ctx(child_ctx);
- raw_spin_unlock_irq(&child_ctx->lock);
+ clone_ctx = unclone_ctx(ctx);
+ raw_spin_unlock_irq(&ctx->lock);
if (clone_ctx)
put_ctx(clone_ctx);
@@ -13783,28 +14054,48 @@ static void perf_event_exit_task_context(struct task_struct *child)
* won't get any samples after PERF_RECORD_EXIT. We can however still
* get a few PERF_RECORD_READ events.
*/
- perf_event_task(child, child_ctx, 0);
+ if (exit)
+ perf_event_task(task, ctx, 0);
- list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
- perf_event_exit_event(child_event, child_ctx);
+ list_for_each_entry_safe(child_event, next, &ctx->event_list, event_entry)
+ perf_event_exit_event(child_event, ctx, false);
- mutex_unlock(&child_ctx->mutex);
+ mutex_unlock(&ctx->mutex);
- put_ctx(child_ctx);
+ if (!exit) {
+ /*
+ * perf_event_release_kernel() could still have a reference on
+ * this context. In that case we must wait for these events to
+ * have been freed (in particular all their references to this
+ * task must've been dropped).
+ *
+ * Without this copy_process() will unconditionally free this
+ * task (irrespective of its reference count) and
+ * _free_event()'s put_task_struct(event->hw.target) will be a
+ * use-after-free.
+ *
+ * Wait for all events to drop their context reference.
+ */
+ wait_var_event(&ctx->refcount,
+ refcount_read(&ctx->refcount) == 1);
+ }
+ put_ctx(ctx);
}
/*
- * When a child task exits, feed back event values to parent events.
+ * When a task exits, feed back event values to parent events.
*
* Can be called with exec_update_lock held when called from
* setup_new_exec().
*/
-void perf_event_exit_task(struct task_struct *child)
+void perf_event_exit_task(struct task_struct *task)
{
struct perf_event *event, *tmp;
- mutex_lock(&child->perf_event_mutex);
- list_for_each_entry_safe(event, tmp, &child->perf_event_list,
+ WARN_ON_ONCE(task != current);
+
+ mutex_lock(&task->perf_event_mutex);
+ list_for_each_entry_safe(event, tmp, &task->perf_event_list,
owner_entry) {
list_del_init(&event->owner_entry);
@@ -13815,42 +14106,23 @@ void perf_event_exit_task(struct task_struct *child)
*/
smp_store_release(&event->owner, NULL);
}
- mutex_unlock(&child->perf_event_mutex);
+ mutex_unlock(&task->perf_event_mutex);
- perf_event_exit_task_context(child);
+ perf_event_exit_task_context(task, true);
/*
* The perf_event_exit_task_context calls perf_event_task
- * with child's task_ctx, which generates EXIT events for
- * child contexts and sets child->perf_event_ctxp[] to NULL.
+ * with task's task_ctx, which generates EXIT events for
+ * task contexts and sets task->perf_event_ctxp[] to NULL.
* At this point we need to send EXIT events to cpu contexts.
*/
- perf_event_task(child, NULL, 0);
+ perf_event_task(task, NULL, 0);
/*
* Detach the perf_ctx_data for the system-wide event.
*/
guard(percpu_read)(&global_ctx_data_rwsem);
- detach_task_ctx_data(child);
-}
-
-static void perf_free_event(struct perf_event *event,
- struct perf_event_context *ctx)
-{
- struct perf_event *parent = event->parent;
-
- if (WARN_ON_ONCE(!parent))
- return;
-
- mutex_lock(&parent->child_mutex);
- list_del_init(&event->child_list);
- mutex_unlock(&parent->child_mutex);
-
- raw_spin_lock_irq(&ctx->lock);
- perf_group_detach(event);
- list_del_event(event, ctx);
- raw_spin_unlock_irq(&ctx->lock);
- put_event(event);
+ detach_task_ctx_data(task);
}
/*
@@ -13862,48 +14134,7 @@ static void perf_free_event(struct perf_event *event,
*/
void perf_event_free_task(struct task_struct *task)
{
- struct perf_event_context *ctx;
- struct perf_event *event, *tmp;
-
- ctx = rcu_access_pointer(task->perf_event_ctxp);
- if (!ctx)
- return;
-
- mutex_lock(&ctx->mutex);
- raw_spin_lock_irq(&ctx->lock);
- /*
- * Destroy the task <-> ctx relation and mark the context dead.
- *
- * This is important because even though the task hasn't been
- * exposed yet the context has been (through child_list).
- */
- RCU_INIT_POINTER(task->perf_event_ctxp, NULL);
- WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
- put_task_struct(task); /* cannot be last */
- raw_spin_unlock_irq(&ctx->lock);
-
-
- list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
- perf_free_event(event, ctx);
-
- mutex_unlock(&ctx->mutex);
-
- /*
- * perf_event_release_kernel() could've stolen some of our
- * child events and still have them on its free_list. In that
- * case we must wait for these events to have been freed (in
- * particular all their references to this task must've been
- * dropped).
- *
- * Without this copy_process() will unconditionally free this
- * task (irrespective of its reference count) and
- * _free_event()'s put_task_struct(event->hw.target) will be a
- * use-after-free.
- *
- * Wait for all events to drop their context reference.
- */
- wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
- put_ctx(ctx); /* must be last */
+ perf_event_exit_task_context(task, false);
}
void perf_event_delayed_put(struct task_struct *task)
@@ -13980,6 +14211,14 @@ inherit_event(struct perf_event *parent_event,
if (parent_event->parent)
parent_event = parent_event->parent;
+ if (parent_event->state <= PERF_EVENT_STATE_REVOKED)
+ return NULL;
+
+ /*
+ * Event creation should be under SRCU, see perf_pmu_unregister().
+ */
+ guard(srcu)(&pmus_srcu);
+
child_event = perf_event_alloc(&parent_event->attr,
parent_event->cpu,
child,
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 5130b119d0ae..d2aef87c7e9f 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -679,7 +679,15 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
{
bool overwrite = !(flags & RING_BUFFER_WRITABLE);
int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
- int ret = -ENOMEM, max_order;
+ bool use_contiguous_pages = event->pmu->capabilities & (
+ PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_PREFER_LARGE);
+ /*
+ * Initialize max_order to 0 for page allocation. This allocates single
+ * pages to minimize memory fragmentation. This is overridden if the
+ * PMU needs or prefers contiguous pages (use_contiguous_pages = true).
+ */
+ int max_order = 0;
+ int ret = -ENOMEM;
if (!has_aux(event))
return -EOPNOTSUPP;
@@ -689,8 +697,8 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
if (!overwrite) {
/*
- * Watermark defaults to half the buffer, and so does the
- * max_order, to aid PMU drivers in double buffering.
+ * Watermark defaults to half the buffer, to aid PMU drivers
+ * in double buffering.
*/
if (!watermark)
watermark = min_t(unsigned long,
@@ -698,16 +706,19 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
(unsigned long)nr_pages << (PAGE_SHIFT - 1));
/*
- * Use aux_watermark as the basis for chunking to
- * help PMU drivers honor the watermark.
+ * If using contiguous pages, use aux_watermark as the basis
+ * for chunking to help PMU drivers honor the watermark.
*/
- max_order = get_order(watermark);
+ if (use_contiguous_pages)
+ max_order = get_order(watermark);
} else {
/*
- * We need to start with the max_order that fits in nr_pages,
- * not the other way around, hence ilog2() and not get_order.
+ * If using contiguous pages, we need to start with the
+ * max_order that fits in nr_pages, not the other way around,
+ * hence ilog2() and not get_order.
*/
- max_order = ilog2(nr_pages);
+ if (use_contiguous_pages)
+ max_order = ilog2(nr_pages);
watermark = 0;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 1b51dc099f1e..38645039dd8f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -133,8 +133,13 @@ struct release_task_post {
static void __unhash_process(struct release_task_post *post, struct task_struct *p,
bool group_dead)
{
+ struct pid *pid = task_pid(p);
+
nr_threads--;
+
detach_pid(post->pids, p, PIDTYPE_PID);
+ wake_up_all(&pid->wait_pidfd);
+
if (group_dead) {
detach_pid(post->pids, p, PIDTYPE_TGID);
detach_pid(post->pids, p, PIDTYPE_PGID);
@@ -253,7 +258,8 @@ repeat:
pidfs_exit(p);
cgroup_release(p);
- thread_pid = get_pid(p->thread_pid);
+ /* Retrieve @thread_pid before __unhash_process() may set it to NULL. */
+ thread_pid = task_pid(p);
write_lock_irq(&tasklist_lock);
ptrace_release_task(p);
@@ -282,8 +288,8 @@ repeat:
}
write_unlock_irq(&tasklist_lock);
+ /* @thread_pid can't go away until free_pids() below */
proc_flush_pid(thread_pid);
- put_pid(thread_pid);
add_device_randomness(&p->se.sum_exec_runtime,
sizeof(p->se.sum_exec_runtime));
free_pids(post.pids);
@@ -936,12 +942,12 @@ void __noreturn do_exit(long code)
tsk->exit_code = code;
taskstats_exit(tsk, group_dead);
+ trace_sched_process_exit(tsk, group_dead);
exit_mm();
if (group_dead)
acct_process();
- trace_sched_process_exit(tsk);
exit_sem(tsk);
exit_shm(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index c4b26cd8998b..85afccfdf3b1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -498,10 +498,6 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
vma_numab_state_init(new);
dup_anon_vma_name(orig, new);
- /* track_pfn_copy() will later take care of copying internal state. */
- if (unlikely(new->vm_flags & VM_PFNMAP))
- untrack_pfn_clear(new);
-
return new;
}
@@ -672,6 +668,11 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
tmp = vm_area_dup(mpnt);
if (!tmp)
goto fail_nomem;
+
+ /* track_pfn_copy() will later take care of copying internal state. */
+ if (unlikely(tmp->vm_flags & VM_PFNMAP))
+ untrack_pfn_clear(tmp);
+
retval = vma_dup_policy(mpnt, tmp);
if (retval)
goto fail_nomem_policy;
@@ -1305,6 +1306,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_subscriptions_init(mm);
init_tlb_flush_pending(mm);
+ futex_mm_init(mm);
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS)
mm->pmd_huge_pte = NULL;
#endif
@@ -1387,6 +1389,7 @@ static inline void __mmput(struct mm_struct *mm)
if (mm->binfmt)
module_put(mm->binfmt->module);
lru_gen_del_mm(mm);
+ futex_hash_free(mm);
mmdrop(mm);
}
@@ -2036,17 +2039,16 @@ static inline void rcu_copy_process(struct task_struct *p)
}
/**
- * __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
+ * pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
* @pid: the struct pid for which to create a pidfd
* @flags: flags of the new @pidfd
- * @ret: Where to return the file for the pidfd.
+ * @ret_file: return the new pidfs file
*
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
* caller's file descriptor table. The pidfd is reserved but not installed yet.
*
- * The helper doesn't perform checks on @pid which makes it useful for pidfds
- * created via CLONE_PIDFD where @pid has no task attached when the pidfd and
- * pidfd file are prepared.
+ * The helper verifies that @pid is still in use, without PIDFD_THREAD the
+ * task identified by @pid must be a thread-group leader.
*
* If this function returns successfully the caller is responsible to either
* call fd_install() passing the returned pidfd and pidfd file as arguments in
@@ -2063,59 +2065,50 @@ static inline void rcu_copy_process(struct task_struct *p)
* error, a negative error code is returned from the function and the
* last argument remains unchanged.
*/
-static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
+int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file)
{
- struct file *pidfd_file;
+ struct file *pidfs_file;
+
+ /*
+ * PIDFD_STALE is only allowed to be passed if the caller knows
+ * that @pid is already registered in pidfs and thus
+ * PIDFD_INFO_EXIT information is guaranteed to be available.
+ */
+ if (!(flags & PIDFD_STALE)) {
+ /*
+ * While holding the pidfd waitqueue lock removing the
+ * task linkage for the thread-group leader pid
+ * (PIDTYPE_TGID) isn't possible. Thus, if there's still
+ * task linkage for PIDTYPE_PID not having thread-group
+ * leader linkage for the pid means it wasn't a
+ * thread-group leader in the first place.
+ */
+ guard(spinlock_irq)(&pid->wait_pidfd.lock);
+
+ /* Task has already been reaped. */
+ if (!pid_has_task(pid, PIDTYPE_PID))
+ return -ESRCH;
+ /*
+ * If this struct pid isn't used as a thread-group
+ * leader but the caller requested to create a
+ * thread-group leader pidfd then report ENOENT.
+ */
+ if (!(flags & PIDFD_THREAD) && !pid_has_task(pid, PIDTYPE_TGID))
+ return -ENOENT;
+ }
CLASS(get_unused_fd, pidfd)(O_CLOEXEC);
if (pidfd < 0)
return pidfd;
- pidfd_file = pidfs_alloc_file(pid, flags | O_RDWR);
- if (IS_ERR(pidfd_file))
- return PTR_ERR(pidfd_file);
+ pidfs_file = pidfs_alloc_file(pid, flags | O_RDWR);
+ if (IS_ERR(pidfs_file))
+ return PTR_ERR(pidfs_file);
- *ret = pidfd_file;
+ *ret_file = pidfs_file;
return take_fd(pidfd);
}
-/**
- * pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
- * @pid: the struct pid for which to create a pidfd
- * @flags: flags of the new @pidfd
- * @ret: Where to return the pidfd.
- *
- * Allocate a new file that stashes @pid and reserve a new pidfd number in the
- * caller's file descriptor table. The pidfd is reserved but not installed yet.
- *
- * The helper verifies that @pid is still in use, without PIDFD_THREAD the
- * task identified by @pid must be a thread-group leader.
- *
- * If this function returns successfully the caller is responsible to either
- * call fd_install() passing the returned pidfd and pidfd file as arguments in
- * order to install the pidfd into its file descriptor table or they must use
- * put_unused_fd() and fput() on the returned pidfd and pidfd file
- * respectively.
- *
- * This function is useful when a pidfd must already be reserved but there
- * might still be points of failure afterwards and the caller wants to ensure
- * that no pidfd is leaked into its file descriptor table.
- *
- * Return: On success, a reserved pidfd is returned from the function and a new
- * pidfd file is returned in the last argument to the function. On
- * error, a negative error code is returned from the function and the
- * last argument remains unchanged.
- */
-int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
-{
- bool thread = flags & PIDFD_THREAD;
-
- if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID))
- return -EINVAL;
-
- return __pidfd_prepare(pid, flags, ret);
-}
-
static void __delayed_free_task(struct rcu_head *rhp)
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
@@ -2162,6 +2155,13 @@ static void rv_task_fork(struct task_struct *p)
#define rv_task_fork(p) do {} while (0)
#endif
+static bool need_futex_hash_allocate_default(u64 clone_flags)
+{
+ if ((clone_flags & (CLONE_THREAD | CLONE_VM)) != (CLONE_THREAD | CLONE_VM))
+ return false;
+ return true;
+}
+
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
@@ -2462,7 +2462,7 @@ __latent_entropy struct task_struct *copy_process(
* Note that no task has been attached to @pid yet indicate
* that via CLONE_PIDFD.
*/
- retval = __pidfd_prepare(pid, flags | PIDFD_CLONE, &pidfile);
+ retval = pidfd_prepare(pid, flags | PIDFD_STALE, &pidfile);
if (retval < 0)
goto bad_fork_free_pid;
pidfd = retval;
@@ -2543,6 +2543,21 @@ __latent_entropy struct task_struct *copy_process(
goto bad_fork_cancel_cgroup;
/*
+ * Allocate a default futex hash for the user process once the first
+ * thread spawns.
+ */
+ if (need_futex_hash_allocate_default(clone_flags)) {
+ retval = futex_hash_allocate_default();
+ if (retval)
+ goto bad_fork_core_free;
+ /*
+ * If we fail beyond this point we don't free the allocated
+ * futex hash map. We assume that another thread will be created
+ * and makes use of it. The hash map will be freed once the main
+ * thread terminates.
+ */
+ }
+ /*
* From this point on we must avoid any synchronous user-space
* communication until we take the tasklist-lock. In particular, we do
* not want user-space to be able to predict the process start-time by
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index cca15859a50b..19a2c65f3d37 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -36,9 +36,15 @@
#include <linux/pagemap.h>
#include <linux/debugfs.h>
#include <linux/plist.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
#include <linux/memblock.h>
#include <linux/fault-inject.h>
#include <linux/slab.h>
+#include <linux/prctl.h>
+#include <linux/rcuref.h>
+#include <linux/mempolicy.h>
+#include <linux/mmap_lock.h>
#include "futex.h"
#include "../locking/rtmutex_common.h"
@@ -49,12 +55,24 @@
* reside in the same cacheline.
*/
static struct {
- struct futex_hash_bucket *queues;
unsigned long hashmask;
+ unsigned int hashshift;
+ struct futex_hash_bucket *queues[MAX_NUMNODES];
} __futex_data __read_mostly __aligned(2*sizeof(long));
-#define futex_queues (__futex_data.queues)
-#define futex_hashmask (__futex_data.hashmask)
+#define futex_hashmask (__futex_data.hashmask)
+#define futex_hashshift (__futex_data.hashshift)
+#define futex_queues (__futex_data.queues)
+
+struct futex_private_hash {
+ rcuref_t users;
+ unsigned int hash_mask;
+ struct rcu_head rcu;
+ void *mm;
+ bool custom;
+ bool immutable;
+ struct futex_hash_bucket queues[];
+};
/*
* Fault injections for futexes.
@@ -107,21 +125,328 @@ late_initcall(fail_futex_debugfs);
#endif /* CONFIG_FAIL_FUTEX */
+static struct futex_hash_bucket *
+__futex_hash(union futex_key *key, struct futex_private_hash *fph);
+
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+static inline bool futex_key_is_private(union futex_key *key)
+{
+ /*
+ * Relies on get_futex_key() to set either bit for shared
+ * futexes -- see comment with union futex_key.
+ */
+ return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED));
+}
+
+bool futex_private_hash_get(struct futex_private_hash *fph)
+{
+ if (fph->immutable)
+ return true;
+ return rcuref_get(&fph->users);
+}
+
+void futex_private_hash_put(struct futex_private_hash *fph)
+{
+ /* Ignore return value, last put is verified via rcuref_is_dead() */
+ if (fph->immutable)
+ return;
+ if (rcuref_put(&fph->users))
+ wake_up_var(fph->mm);
+}
+
/**
- * futex_hash - Return the hash bucket in the global hash
- * @key: Pointer to the futex key for which the hash is calculated
+ * futex_hash_get - Get an additional reference for the local hash.
+ * @hb: ptr to the private local hash.
*
- * We hash on the keys returned from get_futex_key (see below) and return the
- * corresponding hash bucket in the global hash.
+ * Obtain an additional reference for the already obtained hash bucket. The
+ * caller must already own an reference.
*/
+void futex_hash_get(struct futex_hash_bucket *hb)
+{
+ struct futex_private_hash *fph = hb->priv;
+
+ if (!fph)
+ return;
+ WARN_ON_ONCE(!futex_private_hash_get(fph));
+}
+
+void futex_hash_put(struct futex_hash_bucket *hb)
+{
+ struct futex_private_hash *fph = hb->priv;
+
+ if (!fph)
+ return;
+ futex_private_hash_put(fph);
+}
+
+static struct futex_hash_bucket *
+__futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
+{
+ u32 hash;
+
+ if (!futex_key_is_private(key))
+ return NULL;
+
+ if (!fph)
+ fph = rcu_dereference(key->private.mm->futex_phash);
+ if (!fph || !fph->hash_mask)
+ return NULL;
+
+ hash = jhash2((void *)&key->private.address,
+ sizeof(key->private.address) / 4,
+ key->both.offset);
+ return &fph->queues[hash & fph->hash_mask];
+}
+
+static void futex_rehash_private(struct futex_private_hash *old,
+ struct futex_private_hash *new)
+{
+ struct futex_hash_bucket *hb_old, *hb_new;
+ unsigned int slots = old->hash_mask + 1;
+ unsigned int i;
+
+ for (i = 0; i < slots; i++) {
+ struct futex_q *this, *tmp;
+
+ hb_old = &old->queues[i];
+
+ spin_lock(&hb_old->lock);
+ plist_for_each_entry_safe(this, tmp, &hb_old->chain, list) {
+
+ plist_del(&this->list, &hb_old->chain);
+ futex_hb_waiters_dec(hb_old);
+
+ WARN_ON_ONCE(this->lock_ptr != &hb_old->lock);
+
+ hb_new = __futex_hash(&this->key, new);
+ futex_hb_waiters_inc(hb_new);
+ /*
+ * The new pointer isn't published yet but an already
+ * moved user can be unqueued due to timeout or signal.
+ */
+ spin_lock_nested(&hb_new->lock, SINGLE_DEPTH_NESTING);
+ plist_add(&this->list, &hb_new->chain);
+ this->lock_ptr = &hb_new->lock;
+ spin_unlock(&hb_new->lock);
+ }
+ spin_unlock(&hb_old->lock);
+ }
+}
+
+static bool __futex_pivot_hash(struct mm_struct *mm,
+ struct futex_private_hash *new)
+{
+ struct futex_private_hash *fph;
+
+ WARN_ON_ONCE(mm->futex_phash_new);
+
+ fph = rcu_dereference_protected(mm->futex_phash,
+ lockdep_is_held(&mm->futex_hash_lock));
+ if (fph) {
+ if (!rcuref_is_dead(&fph->users)) {
+ mm->futex_phash_new = new;
+ return false;
+ }
+
+ futex_rehash_private(fph, new);
+ }
+ rcu_assign_pointer(mm->futex_phash, new);
+ kvfree_rcu(fph, rcu);
+ return true;
+}
+
+static void futex_pivot_hash(struct mm_struct *mm)
+{
+ scoped_guard(mutex, &mm->futex_hash_lock) {
+ struct futex_private_hash *fph;
+
+ fph = mm->futex_phash_new;
+ if (fph) {
+ mm->futex_phash_new = NULL;
+ __futex_pivot_hash(mm, fph);
+ }
+ }
+}
+
+struct futex_private_hash *futex_private_hash(void)
+{
+ struct mm_struct *mm = current->mm;
+ /*
+ * Ideally we don't loop. If there is a replacement in progress
+ * then a new private hash is already prepared and a reference can't be
+ * obtained once the last user dropped it's.
+ * In that case we block on mm_struct::futex_hash_lock and either have
+ * to perform the replacement or wait while someone else is doing the
+ * job. Eitherway, on the second iteration we acquire a reference on the
+ * new private hash or loop again because a new replacement has been
+ * requested.
+ */
+again:
+ scoped_guard(rcu) {
+ struct futex_private_hash *fph;
+
+ fph = rcu_dereference(mm->futex_phash);
+ if (!fph)
+ return NULL;
+
+ if (fph->immutable)
+ return fph;
+ if (rcuref_get(&fph->users))
+ return fph;
+ }
+ futex_pivot_hash(mm);
+ goto again;
+}
+
struct futex_hash_bucket *futex_hash(union futex_key *key)
{
- u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
- key->both.offset);
+ struct futex_private_hash *fph;
+ struct futex_hash_bucket *hb;
+
+again:
+ scoped_guard(rcu) {
+ hb = __futex_hash(key, NULL);
+ fph = hb->priv;
+
+ if (!fph || futex_private_hash_get(fph))
+ return hb;
+ }
+ futex_pivot_hash(key->private.mm);
+ goto again;
+}
+
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
+
+static struct futex_hash_bucket *
+__futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
+{
+ return NULL;
+}
+
+struct futex_hash_bucket *futex_hash(union futex_key *key)
+{
+ return __futex_hash(key, NULL);
+}
+
+#endif /* CONFIG_FUTEX_PRIVATE_HASH */
+
+#ifdef CONFIG_FUTEX_MPOL
+
+static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr)
+{
+ struct vm_area_struct *vma = vma_lookup(mm, addr);
+ struct mempolicy *mpol;
+ int node = FUTEX_NO_NODE;
+
+ if (!vma)
+ return FUTEX_NO_NODE;
+
+ mpol = vma_policy(vma);
+ if (!mpol)
+ return FUTEX_NO_NODE;
+
+ switch (mpol->mode) {
+ case MPOL_PREFERRED:
+ node = first_node(mpol->nodes);
+ break;
+ case MPOL_PREFERRED_MANY:
+ case MPOL_BIND:
+ if (mpol->home_node != NUMA_NO_NODE)
+ node = mpol->home_node;
+ break;
+ default:
+ break;
+ }
+
+ return node;
+}
+
+static int futex_key_to_node_opt(struct mm_struct *mm, unsigned long addr)
+{
+ int seq, node;
+
+ guard(rcu)();
+
+ if (!mmap_lock_speculate_try_begin(mm, &seq))
+ return -EBUSY;
+
+ node = __futex_key_to_node(mm, addr);
+
+ if (mmap_lock_speculate_retry(mm, seq))
+ return -EAGAIN;
+
+ return node;
+}
+
+static int futex_mpol(struct mm_struct *mm, unsigned long addr)
+{
+ int node;
+
+ node = futex_key_to_node_opt(mm, addr);
+ if (node >= FUTEX_NO_NODE)
+ return node;
- return &futex_queues[hash & futex_hashmask];
+ guard(mmap_read_lock)(mm);
+ return __futex_key_to_node(mm, addr);
}
+#else /* !CONFIG_FUTEX_MPOL */
+
+static int futex_mpol(struct mm_struct *mm, unsigned long addr)
+{
+ return FUTEX_NO_NODE;
+}
+
+#endif /* CONFIG_FUTEX_MPOL */
+
+/**
+ * __futex_hash - Return the hash bucket
+ * @key: Pointer to the futex key for which the hash is calculated
+ * @fph: Pointer to private hash if known
+ *
+ * We hash on the keys returned from get_futex_key (see below) and return the
+ * corresponding hash bucket.
+ * If the FUTEX is PROCESS_PRIVATE then a per-process hash bucket (from the
+ * private hash) is returned if existing. Otherwise a hash bucket from the
+ * global hash is returned.
+ */
+static struct futex_hash_bucket *
+__futex_hash(union futex_key *key, struct futex_private_hash *fph)
+{
+ int node = key->both.node;
+ u32 hash;
+
+ if (node == FUTEX_NO_NODE) {
+ struct futex_hash_bucket *hb;
+
+ hb = __futex_hash_private(key, fph);
+ if (hb)
+ return hb;
+ }
+
+ hash = jhash2((u32 *)key,
+ offsetof(typeof(*key), both.offset) / sizeof(u32),
+ key->both.offset);
+
+ if (node == FUTEX_NO_NODE) {
+ /*
+ * In case of !FLAGS_NUMA, use some unused hash bits to pick a
+ * node -- this ensures regular futexes are interleaved across
+ * the nodes and avoids having to allocate multiple
+ * hash-tables.
+ *
+ * NOTE: this isn't perfectly uniform, but it is fast and
+ * handles sparse node masks.
+ */
+ node = (hash >> futex_hashshift) % nr_node_ids;
+ if (!node_possible(node)) {
+ node = find_next_bit_wrap(node_possible_map.bits,
+ nr_node_ids, node);
+ }
+ }
+
+ return &futex_queues[node][hash & futex_hashmask];
+}
/**
* futex_setup_timer - set up the sleeping hrtimer.
@@ -227,25 +552,60 @@ int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
struct page *page;
struct folio *folio;
struct address_space *mapping;
- int err, ro = 0;
+ int node, err, size, ro = 0;
+ bool node_updated = false;
bool fshared;
fshared = flags & FLAGS_SHARED;
+ size = futex_size(flags);
+ if (flags & FLAGS_NUMA)
+ size *= 2;
/*
* The futex address must be "naturally" aligned.
*/
key->both.offset = address % PAGE_SIZE;
- if (unlikely((address % sizeof(u32)) != 0))
+ if (unlikely((address % size) != 0))
return -EINVAL;
address -= key->both.offset;
- if (unlikely(!access_ok(uaddr, sizeof(u32))))
+ if (unlikely(!access_ok(uaddr, size)))
return -EFAULT;
if (unlikely(should_fail_futex(fshared)))
return -EFAULT;
+ node = FUTEX_NO_NODE;
+
+ if (flags & FLAGS_NUMA) {
+ u32 __user *naddr = (void *)uaddr + size / 2;
+
+ if (futex_get_value(&node, naddr))
+ return -EFAULT;
+
+ if (node != FUTEX_NO_NODE &&
+ (node >= MAX_NUMNODES || !node_possible(node)))
+ return -EINVAL;
+ }
+
+ if (node == FUTEX_NO_NODE && (flags & FLAGS_MPOL)) {
+ node = futex_mpol(mm, address);
+ node_updated = true;
+ }
+
+ if (flags & FLAGS_NUMA) {
+ u32 __user *naddr = (void *)uaddr + size / 2;
+
+ if (node == FUTEX_NO_NODE) {
+ node = numa_node_id();
+ node_updated = true;
+ }
+ if (node_updated && futex_put_value(node, naddr))
+ return -EFAULT;
+ }
+
+ key->both.node = node;
+
/*
* PROCESS_PRIVATE futexes are fast.
* As the mm cannot disappear under us and the 'key' only needs
@@ -502,13 +862,9 @@ void __futex_unqueue(struct futex_q *q)
}
/* The key must be already stored in q->key. */
-struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
+void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb)
__acquires(&hb->lock)
{
- struct futex_hash_bucket *hb;
-
- hb = futex_hash(&q->key);
-
/*
* Increment the counter before taking the lock so that
* a potential waker won't miss a to-be-slept task that is
@@ -522,14 +878,13 @@ struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
q->lock_ptr = &hb->lock;
spin_lock(&hb->lock);
- return hb;
}
void futex_q_unlock(struct futex_hash_bucket *hb)
__releases(&hb->lock)
{
- spin_unlock(&hb->lock);
futex_hb_waiters_dec(hb);
+ spin_unlock(&hb->lock);
}
void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
@@ -568,6 +923,8 @@ int futex_unqueue(struct futex_q *q)
spinlock_t *lock_ptr;
int ret = 0;
+ /* RCU so lock_ptr is not going away during locking. */
+ guard(rcu)();
/* In the common case we don't take the spinlock, which is nice. */
retry:
/*
@@ -606,6 +963,24 @@ retry:
return ret;
}
+void futex_q_lockptr_lock(struct futex_q *q)
+{
+ spinlock_t *lock_ptr;
+
+ /*
+ * See futex_unqueue() why lock_ptr can change.
+ */
+ guard(rcu)();
+retry:
+ lock_ptr = READ_ONCE(q->lock_ptr);
+ spin_lock(lock_ptr);
+
+ if (unlikely(lock_ptr != q->lock_ptr)) {
+ spin_unlock(lock_ptr);
+ goto retry;
+ }
+}
+
/*
* PI futexes can not be requeued and must remove themselves from the hash
* bucket. The hash bucket lock (i.e. lock_ptr) is held.
@@ -949,10 +1324,20 @@ static void exit_pi_state_list(struct task_struct *curr)
{
struct list_head *next, *head = &curr->pi_state_list;
struct futex_pi_state *pi_state;
- struct futex_hash_bucket *hb;
union futex_key key = FUTEX_KEY_INIT;
/*
+ * The mutex mm_struct::futex_hash_lock might be acquired.
+ */
+ might_sleep();
+ /*
+ * Ensure the hash remains stable (no resize) during the while loop
+ * below. The hb pointer is acquired under the pi_lock so we can't block
+ * on the mutex.
+ */
+ WARN_ON(curr != current);
+ guard(private_hash)();
+ /*
* We are a ZOMBIE and nobody can enqueue itself on
* pi_state_list anymore, but we have to be careful
* versus waiters unqueueing themselves:
@@ -962,50 +1347,52 @@ static void exit_pi_state_list(struct task_struct *curr)
next = head->next;
pi_state = list_entry(next, struct futex_pi_state, list);
key = pi_state->key;
- hb = futex_hash(&key);
-
- /*
- * We can race against put_pi_state() removing itself from the
- * list (a waiter going away). put_pi_state() will first
- * decrement the reference count and then modify the list, so
- * its possible to see the list entry but fail this reference
- * acquire.
- *
- * In that case; drop the locks to let put_pi_state() make
- * progress and retry the loop.
- */
- if (!refcount_inc_not_zero(&pi_state->refcount)) {
+ if (1) {
+ CLASS(hb, hb)(&key);
+
+ /*
+ * We can race against put_pi_state() removing itself from the
+ * list (a waiter going away). put_pi_state() will first
+ * decrement the reference count and then modify the list, so
+ * its possible to see the list entry but fail this reference
+ * acquire.
+ *
+ * In that case; drop the locks to let put_pi_state() make
+ * progress and retry the loop.
+ */
+ if (!refcount_inc_not_zero(&pi_state->refcount)) {
+ raw_spin_unlock_irq(&curr->pi_lock);
+ cpu_relax();
+ raw_spin_lock_irq(&curr->pi_lock);
+ continue;
+ }
raw_spin_unlock_irq(&curr->pi_lock);
- cpu_relax();
- raw_spin_lock_irq(&curr->pi_lock);
- continue;
- }
- raw_spin_unlock_irq(&curr->pi_lock);
- spin_lock(&hb->lock);
- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
- raw_spin_lock(&curr->pi_lock);
- /*
- * We dropped the pi-lock, so re-check whether this
- * task still owns the PI-state:
- */
- if (head->next != next) {
- /* retain curr->pi_lock for the loop invariant */
- raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+ spin_lock(&hb->lock);
+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ raw_spin_lock(&curr->pi_lock);
+ /*
+ * We dropped the pi-lock, so re-check whether this
+ * task still owns the PI-state:
+ */
+ if (head->next != next) {
+ /* retain curr->pi_lock for the loop invariant */
+ raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+ spin_unlock(&hb->lock);
+ put_pi_state(pi_state);
+ continue;
+ }
+
+ WARN_ON(pi_state->owner != curr);
+ WARN_ON(list_empty(&pi_state->list));
+ list_del_init(&pi_state->list);
+ pi_state->owner = NULL;
+
+ raw_spin_unlock(&curr->pi_lock);
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
spin_unlock(&hb->lock);
- put_pi_state(pi_state);
- continue;
}
- WARN_ON(pi_state->owner != curr);
- WARN_ON(list_empty(&pi_state->list));
- list_del_init(&pi_state->list);
- pi_state->owner = NULL;
-
- raw_spin_unlock(&curr->pi_lock);
- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
- spin_unlock(&hb->lock);
-
rt_mutex_futex_unlock(&pi_state->pi_mutex);
put_pi_state(pi_state);
@@ -1125,30 +1512,304 @@ void futex_exit_release(struct task_struct *tsk)
futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
}
+static void futex_hash_bucket_init(struct futex_hash_bucket *fhb,
+ struct futex_private_hash *fph)
+{
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+ fhb->priv = fph;
+#endif
+ atomic_set(&fhb->waiters, 0);
+ plist_head_init(&fhb->chain);
+ spin_lock_init(&fhb->lock);
+}
+
+#define FH_CUSTOM 0x01
+#define FH_IMMUTABLE 0x02
+
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+void futex_hash_free(struct mm_struct *mm)
+{
+ struct futex_private_hash *fph;
+
+ kvfree(mm->futex_phash_new);
+ fph = rcu_dereference_raw(mm->futex_phash);
+ if (fph) {
+ WARN_ON_ONCE(rcuref_read(&fph->users) > 1);
+ kvfree(fph);
+ }
+}
+
+static bool futex_pivot_pending(struct mm_struct *mm)
+{
+ struct futex_private_hash *fph;
+
+ guard(rcu)();
+
+ if (!mm->futex_phash_new)
+ return true;
+
+ fph = rcu_dereference(mm->futex_phash);
+ return rcuref_is_dead(&fph->users);
+}
+
+static bool futex_hash_less(struct futex_private_hash *a,
+ struct futex_private_hash *b)
+{
+ /* user provided always wins */
+ if (!a->custom && b->custom)
+ return true;
+ if (a->custom && !b->custom)
+ return false;
+
+ /* zero-sized hash wins */
+ if (!b->hash_mask)
+ return true;
+ if (!a->hash_mask)
+ return false;
+
+ /* keep the biggest */
+ if (a->hash_mask < b->hash_mask)
+ return true;
+ if (a->hash_mask > b->hash_mask)
+ return false;
+
+ return false; /* equal */
+}
+
+static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct futex_private_hash *fph;
+ bool custom = flags & FH_CUSTOM;
+ int i;
+
+ if (hash_slots && (hash_slots == 1 || !is_power_of_2(hash_slots)))
+ return -EINVAL;
+
+ /*
+ * Once we've disabled the global hash there is no way back.
+ */
+ scoped_guard(rcu) {
+ fph = rcu_dereference(mm->futex_phash);
+ if (fph && (!fph->hash_mask || fph->immutable)) {
+ if (custom)
+ return -EBUSY;
+ return 0;
+ }
+ }
+
+ fph = kvzalloc(struct_size(fph, queues, hash_slots), GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+ if (!fph)
+ return -ENOMEM;
+
+ rcuref_init(&fph->users, 1);
+ fph->hash_mask = hash_slots ? hash_slots - 1 : 0;
+ fph->custom = custom;
+ fph->immutable = !!(flags & FH_IMMUTABLE);
+ fph->mm = mm;
+
+ for (i = 0; i < hash_slots; i++)
+ futex_hash_bucket_init(&fph->queues[i], fph);
+
+ if (custom) {
+ /*
+ * Only let prctl() wait / retry; don't unduly delay clone().
+ */
+again:
+ wait_var_event(mm, futex_pivot_pending(mm));
+ }
+
+ scoped_guard(mutex, &mm->futex_hash_lock) {
+ struct futex_private_hash *free __free(kvfree) = NULL;
+ struct futex_private_hash *cur, *new;
+
+ cur = rcu_dereference_protected(mm->futex_phash,
+ lockdep_is_held(&mm->futex_hash_lock));
+ new = mm->futex_phash_new;
+ mm->futex_phash_new = NULL;
+
+ if (fph) {
+ if (cur && !new) {
+ /*
+ * If we have an existing hash, but do not yet have
+ * allocated a replacement hash, drop the initial
+ * reference on the existing hash.
+ */
+ futex_private_hash_put(cur);
+ }
+
+ if (new) {
+ /*
+ * Two updates raced; throw out the lesser one.
+ */
+ if (futex_hash_less(new, fph)) {
+ free = new;
+ new = fph;
+ } else {
+ free = fph;
+ }
+ } else {
+ new = fph;
+ }
+ fph = NULL;
+ }
+
+ if (new) {
+ /*
+ * Will set mm->futex_phash_new on failure;
+ * futex_private_hash_get() will try again.
+ */
+ if (!__futex_pivot_hash(mm, new) && custom)
+ goto again;
+ }
+ }
+ return 0;
+}
+
+int futex_hash_allocate_default(void)
+{
+ unsigned int threads, buckets, current_buckets = 0;
+ struct futex_private_hash *fph;
+
+ if (!current->mm)
+ return 0;
+
+ scoped_guard(rcu) {
+ threads = min_t(unsigned int,
+ get_nr_threads(current),
+ num_online_cpus());
+
+ fph = rcu_dereference(current->mm->futex_phash);
+ if (fph) {
+ if (fph->custom)
+ return 0;
+
+ current_buckets = fph->hash_mask + 1;
+ }
+ }
+
+ /*
+ * The default allocation will remain within
+ * 16 <= threads * 4 <= global hash size
+ */
+ buckets = roundup_pow_of_two(4 * threads);
+ buckets = clamp(buckets, 16, futex_hashmask + 1);
+
+ if (current_buckets >= buckets)
+ return 0;
+
+ return futex_hash_allocate(buckets, 0);
+}
+
+static int futex_hash_get_slots(void)
+{
+ struct futex_private_hash *fph;
+
+ guard(rcu)();
+ fph = rcu_dereference(current->mm->futex_phash);
+ if (fph && fph->hash_mask)
+ return fph->hash_mask + 1;
+ return 0;
+}
+
+static int futex_hash_get_immutable(void)
+{
+ struct futex_private_hash *fph;
+
+ guard(rcu)();
+ fph = rcu_dereference(current->mm->futex_phash);
+ if (fph && fph->immutable)
+ return 1;
+ if (fph && !fph->hash_mask)
+ return 1;
+ return 0;
+}
+
+#else
+
+static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags)
+{
+ return -EINVAL;
+}
+
+static int futex_hash_get_slots(void)
+{
+ return 0;
+}
+
+static int futex_hash_get_immutable(void)
+{
+ return 0;
+}
+#endif
+
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
+{
+ unsigned int flags = FH_CUSTOM;
+ int ret;
+
+ switch (arg2) {
+ case PR_FUTEX_HASH_SET_SLOTS:
+ if (arg4 & ~FH_FLAG_IMMUTABLE)
+ return -EINVAL;
+ if (arg4 & FH_FLAG_IMMUTABLE)
+ flags |= FH_IMMUTABLE;
+ ret = futex_hash_allocate(arg3, flags);
+ break;
+
+ case PR_FUTEX_HASH_GET_SLOTS:
+ ret = futex_hash_get_slots();
+ break;
+
+ case PR_FUTEX_HASH_GET_IMMUTABLE:
+ ret = futex_hash_get_immutable();
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
static int __init futex_init(void)
{
unsigned long hashsize, i;
- unsigned int futex_shift;
+ unsigned int order, n;
+ unsigned long size;
#ifdef CONFIG_BASE_SMALL
hashsize = 16;
#else
- hashsize = roundup_pow_of_two(256 * num_possible_cpus());
+ hashsize = 256 * num_possible_cpus();
+ hashsize /= num_possible_nodes();
+ hashsize = max(4, hashsize);
+ hashsize = roundup_pow_of_two(hashsize);
#endif
+ futex_hashshift = ilog2(hashsize);
+ size = sizeof(struct futex_hash_bucket) * hashsize;
+ order = get_order(size);
+
+ for_each_node(n) {
+ struct futex_hash_bucket *table;
+
+ if (order > MAX_PAGE_ORDER)
+ table = vmalloc_huge_node(size, GFP_KERNEL, n);
+ else
+ table = alloc_pages_exact_nid(n, size, GFP_KERNEL);
+
+ BUG_ON(!table);
- futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
- hashsize, 0, 0,
- &futex_shift, NULL,
- hashsize, hashsize);
- hashsize = 1UL << futex_shift;
+ for (i = 0; i < hashsize; i++)
+ futex_hash_bucket_init(&table[i], NULL);
- for (i = 0; i < hashsize; i++) {
- atomic_set(&futex_queues[i].waiters, 0);
- plist_head_init(&futex_queues[i].chain);
- spin_lock_init(&futex_queues[i].lock);
+ futex_queues[n] = table;
}
futex_hashmask = hashsize - 1;
+ pr_info("futex hash table entries: %lu (%lu bytes on %d NUMA nodes, total %lu KiB, %s).\n",
+ hashsize, size, num_possible_nodes(), size * num_possible_nodes() / 1024,
+ order > MAX_PAGE_ORDER ? "vmalloc" : "linear");
return 0;
}
core_initcall(futex_init);
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 6b2f4c7eb720..fcd1617212ee 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -7,6 +7,7 @@
#include <linux/sched/wake_q.h>
#include <linux/compat.h>
#include <linux/uaccess.h>
+#include <linux/cleanup.h>
#ifdef CONFIG_PREEMPT_RT
#include <linux/rcuwait.h>
@@ -38,6 +39,7 @@
#define FLAGS_HAS_TIMEOUT 0x0040
#define FLAGS_NUMA 0x0080
#define FLAGS_STRICT 0x0100
+#define FLAGS_MPOL 0x0200
/* FUTEX_ to FLAGS_ */
static inline unsigned int futex_to_flags(unsigned int op)
@@ -53,7 +55,7 @@ static inline unsigned int futex_to_flags(unsigned int op)
return flags;
}
-#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE)
+#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_NUMA | FUTEX2_MPOL | FUTEX2_PRIVATE)
/* FUTEX2_ to FLAGS_ */
static inline unsigned int futex2_to_flags(unsigned int flags2)
@@ -66,6 +68,9 @@ static inline unsigned int futex2_to_flags(unsigned int flags2)
if (flags2 & FUTEX2_NUMA)
flags |= FLAGS_NUMA;
+ if (flags2 & FUTEX2_MPOL)
+ flags |= FLAGS_MPOL;
+
return flags;
}
@@ -86,6 +91,19 @@ static inline bool futex_flags_valid(unsigned int flags)
if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32)
return false;
+ /*
+ * Must be able to represent both FUTEX_NO_NODE and every valid nodeid
+ * in a futex word.
+ */
+ if (flags & FLAGS_NUMA) {
+ int bits = 8 * futex_size(flags);
+ u64 max = ~0ULL;
+
+ max >>= 64 - bits;
+ if (nr_node_ids >= max)
+ return false;
+ }
+
return true;
}
@@ -117,6 +135,7 @@ struct futex_hash_bucket {
atomic_t waiters;
spinlock_t lock;
struct plist_head chain;
+ struct futex_private_hash *priv;
} ____cacheline_aligned_in_smp;
/*
@@ -156,6 +175,7 @@ typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q);
* @requeue_pi_key: the requeue_pi target futex key
* @bitset: bitset for the optional bitmasked wakeup
* @requeue_state: State field for futex_requeue_pi()
+ * @drop_hb_ref: Waiter should drop the extra hash bucket reference if true
* @requeue_wait: RCU wait for futex_requeue_pi() (RT only)
*
* We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
@@ -182,6 +202,7 @@ struct futex_q {
union futex_key *requeue_pi_key;
u32 bitset;
atomic_t requeue_state;
+ bool drop_hb_ref;
#ifdef CONFIG_PREEMPT_RT
struct rcuwait requeue_wait;
#endif
@@ -196,12 +217,35 @@ enum futex_access {
extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
enum futex_access rw);
-
+extern void futex_q_lockptr_lock(struct futex_q *q);
extern struct hrtimer_sleeper *
futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
int flags, u64 range_ns);
extern struct futex_hash_bucket *futex_hash(union futex_key *key);
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+extern void futex_hash_get(struct futex_hash_bucket *hb);
+extern void futex_hash_put(struct futex_hash_bucket *hb);
+
+extern struct futex_private_hash *futex_private_hash(void);
+extern bool futex_private_hash_get(struct futex_private_hash *fph);
+extern void futex_private_hash_put(struct futex_private_hash *fph);
+
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
+static inline void futex_hash_get(struct futex_hash_bucket *hb) { }
+static inline void futex_hash_put(struct futex_hash_bucket *hb) { }
+static inline struct futex_private_hash *futex_private_hash(void) { return NULL; }
+static inline bool futex_private_hash_get(void) { return false; }
+static inline void futex_private_hash_put(struct futex_private_hash *fph) { }
+#endif
+
+DEFINE_CLASS(hb, struct futex_hash_bucket *,
+ if (_T) futex_hash_put(_T),
+ futex_hash(key), union futex_key *key);
+
+DEFINE_CLASS(private_hash, struct futex_private_hash *,
+ if (_T) futex_private_hash_put(_T),
+ futex_private_hash(), void);
/**
* futex_match - Check whether two futex keys are equal
@@ -219,9 +263,9 @@ static inline int futex_match(union futex_key *key1, union futex_key *key2)
}
extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- struct futex_q *q, struct futex_hash_bucket **hb);
-extern void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
- struct hrtimer_sleeper *timeout);
+ struct futex_q *q, union futex_key *key2,
+ struct task_struct *task);
+extern void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout);
extern bool __futex_wake_mark(struct futex_q *q);
extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
@@ -256,7 +300,7 @@ static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32
* This looks a bit overkill, but generally just results in a couple
* of instructions.
*/
-static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
+static __always_inline int futex_get_value(u32 *dest, u32 __user *from)
{
u32 val;
@@ -273,12 +317,26 @@ Efault:
return -EFAULT;
}
+static __always_inline int futex_put_value(u32 val, u32 __user *to)
+{
+ if (can_do_masked_user_access())
+ to = masked_user_access_begin(to);
+ else if (!user_read_access_begin(to, sizeof(*to)))
+ return -EFAULT;
+ unsafe_put_user(val, to, Efault);
+ user_read_access_end();
+ return 0;
+Efault:
+ user_read_access_end();
+ return -EFAULT;
+}
+
static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
{
int ret;
pagefault_disable();
- ret = futex_read_inatomic(dest, from);
+ ret = futex_get_value(dest, from);
pagefault_enable();
return ret;
@@ -354,7 +412,7 @@ static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb)
#endif
}
-extern struct futex_hash_bucket *futex_q_lock(struct futex_q *q);
+extern void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb);
extern void futex_q_unlock(struct futex_hash_bucket *hb);
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index 7a941845f7ee..dacb2330f1fb 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -806,7 +806,7 @@ handle_err:
break;
}
- spin_lock(q->lock_ptr);
+ futex_q_lockptr_lock(q);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
/*
@@ -920,7 +920,6 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
struct hrtimer_sleeper timeout, *to;
struct task_struct *exiting = NULL;
struct rt_mutex_waiter rt_waiter;
- struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
DEFINE_WAKE_Q(wake_q);
int res, ret;
@@ -939,151 +938,183 @@ retry:
goto out;
retry_private:
- hb = futex_q_lock(&q);
+ if (1) {
+ CLASS(hb, hb)(&q.key);
- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
- &exiting, 0);
- if (unlikely(ret)) {
- /*
- * Atomic work succeeded and we got the lock,
- * or failed. Either way, we do _not_ block.
- */
- switch (ret) {
- case 1:
- /* We got the lock. */
- ret = 0;
- goto out_unlock_put_key;
- case -EFAULT:
- goto uaddr_faulted;
- case -EBUSY:
- case -EAGAIN:
- /*
- * Two reasons for this:
- * - EBUSY: Task is exiting and we just wait for the
- * exit to complete.
- * - EAGAIN: The user space value changed.
- */
- futex_q_unlock(hb);
+ futex_q_lock(&q, hb);
+
+ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
+ &exiting, 0);
+ if (unlikely(ret)) {
/*
- * Handle the case where the owner is in the middle of
- * exiting. Wait for the exit to complete otherwise
- * this task might loop forever, aka. live lock.
+ * Atomic work succeeded and we got the lock,
+ * or failed. Either way, we do _not_ block.
*/
- wait_for_owner_exiting(ret, exiting);
- cond_resched();
- goto retry;
- default:
- goto out_unlock_put_key;
+ switch (ret) {
+ case 1:
+ /* We got the lock. */
+ ret = 0;
+ goto out_unlock_put_key;
+ case -EFAULT:
+ goto uaddr_faulted;
+ case -EBUSY:
+ case -EAGAIN:
+ /*
+ * Two reasons for this:
+ * - EBUSY: Task is exiting and we just wait for the
+ * exit to complete.
+ * - EAGAIN: The user space value changed.
+ */
+ futex_q_unlock(hb);
+ /*
+ * Handle the case where the owner is in the middle of
+ * exiting. Wait for the exit to complete otherwise
+ * this task might loop forever, aka. live lock.
+ */
+ wait_for_owner_exiting(ret, exiting);
+ cond_resched();
+ goto retry;
+ default:
+ goto out_unlock_put_key;
+ }
}
- }
- WARN_ON(!q.pi_state);
+ WARN_ON(!q.pi_state);
- /*
- * Only actually queue now that the atomic ops are done:
- */
- __futex_queue(&q, hb, current);
+ /*
+ * Only actually queue now that the atomic ops are done:
+ */
+ __futex_queue(&q, hb, current);
- if (trylock) {
- ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
- /* Fixup the trylock return value: */
- ret = ret ? 0 : -EWOULDBLOCK;
- goto no_block;
- }
+ if (trylock) {
+ ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
+ /* Fixup the trylock return value: */
+ ret = ret ? 0 : -EWOULDBLOCK;
+ goto no_block;
+ }
- /*
- * Must be done before we enqueue the waiter, here is unfortunately
- * under the hb lock, but that *should* work because it does nothing.
- */
- rt_mutex_pre_schedule();
+ /*
+ * Caution; releasing @hb in-scope. The hb->lock is still locked
+ * while the reference is dropped. The reference can not be dropped
+ * after the unlock because if a user initiated resize is in progress
+ * then we might need to wake him. This can not be done after the
+ * rt_mutex_pre_schedule() invocation. The hb will remain valid because
+ * the thread, performing resize, will block on hb->lock during
+ * the requeue.
+ */
+ futex_hash_put(no_free_ptr(hb));
+ /*
+ * Must be done before we enqueue the waiter, here is unfortunately
+ * under the hb lock, but that *should* work because it does nothing.
+ */
+ rt_mutex_pre_schedule();
- rt_mutex_init_waiter(&rt_waiter);
+ rt_mutex_init_waiter(&rt_waiter);
- /*
- * On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not
- * hold it while doing rt_mutex_start_proxy(), because then it will
- * include hb->lock in the blocking chain, even through we'll not in
- * fact hold it while blocking. This will lead it to report -EDEADLK
- * and BUG when futex_unlock_pi() interleaves with this.
- *
- * Therefore acquire wait_lock while holding hb->lock, but drop the
- * latter before calling __rt_mutex_start_proxy_lock(). This
- * interleaves with futex_unlock_pi() -- which does a similar lock
- * handoff -- such that the latter can observe the futex_q::pi_state
- * before __rt_mutex_start_proxy_lock() is done.
- */
- raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
- spin_unlock(q.lock_ptr);
- /*
- * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
- * such that futex_unlock_pi() is guaranteed to observe the waiter when
- * it sees the futex_q::pi_state.
- */
- ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q);
- raw_spin_unlock_irq_wake(&q.pi_state->pi_mutex.wait_lock, &wake_q);
+ /*
+ * On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not
+ * hold it while doing rt_mutex_start_proxy(), because then it will
+ * include hb->lock in the blocking chain, even through we'll not in
+ * fact hold it while blocking. This will lead it to report -EDEADLK
+ * and BUG when futex_unlock_pi() interleaves with this.
+ *
+ * Therefore acquire wait_lock while holding hb->lock, but drop the
+ * latter before calling __rt_mutex_start_proxy_lock(). This
+ * interleaves with futex_unlock_pi() -- which does a similar lock
+ * handoff -- such that the latter can observe the futex_q::pi_state
+ * before __rt_mutex_start_proxy_lock() is done.
+ */
+ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
+ spin_unlock(q.lock_ptr);
+ /*
+ * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
+ * such that futex_unlock_pi() is guaranteed to observe the waiter when
+ * it sees the futex_q::pi_state.
+ */
+ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q);
+ raw_spin_unlock_irq_wake(&q.pi_state->pi_mutex.wait_lock, &wake_q);
- if (ret) {
- if (ret == 1)
- ret = 0;
- goto cleanup;
- }
+ if (ret) {
+ if (ret == 1)
+ ret = 0;
+ goto cleanup;
+ }
- if (unlikely(to))
- hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
+ if (unlikely(to))
+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
- ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
+ ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
cleanup:
- /*
- * If we failed to acquire the lock (deadlock/signal/timeout), we must
- * must unwind the above, however we canont lock hb->lock because
- * rt_mutex already has a waiter enqueued and hb->lock can itself try
- * and enqueue an rt_waiter through rtlock.
- *
- * Doing the cleanup without holding hb->lock can cause inconsistent
- * state between hb and pi_state, but only in the direction of not
- * seeing a waiter that is leaving.
- *
- * See futex_unlock_pi(), it deals with this inconsistency.
- *
- * There be dragons here, since we must deal with the inconsistency on
- * the way out (here), it is impossible to detect/warn about the race
- * the other way around (missing an incoming waiter).
- *
- * What could possibly go wrong...
- */
- if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
- ret = 0;
+ /*
+ * If we failed to acquire the lock (deadlock/signal/timeout), we must
+ * unwind the above, however we canont lock hb->lock because
+ * rt_mutex already has a waiter enqueued and hb->lock can itself try
+ * and enqueue an rt_waiter through rtlock.
+ *
+ * Doing the cleanup without holding hb->lock can cause inconsistent
+ * state between hb and pi_state, but only in the direction of not
+ * seeing a waiter that is leaving.
+ *
+ * See futex_unlock_pi(), it deals with this inconsistency.
+ *
+ * There be dragons here, since we must deal with the inconsistency on
+ * the way out (here), it is impossible to detect/warn about the race
+ * the other way around (missing an incoming waiter).
+ *
+ * What could possibly go wrong...
+ */
+ if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
+ ret = 0;
- /*
- * Now that the rt_waiter has been dequeued, it is safe to use
- * spinlock/rtlock (which might enqueue its own rt_waiter) and fix up
- * the
- */
- spin_lock(q.lock_ptr);
- /*
- * Waiter is unqueued.
- */
- rt_mutex_post_schedule();
+ /*
+ * Now that the rt_waiter has been dequeued, it is safe to use
+ * spinlock/rtlock (which might enqueue its own rt_waiter) and fix up
+ * the
+ */
+ futex_q_lockptr_lock(&q);
+ /*
+ * Waiter is unqueued.
+ */
+ rt_mutex_post_schedule();
no_block:
- /*
- * Fixup the pi_state owner and possibly acquire the lock if we
- * haven't already.
- */
- res = fixup_pi_owner(uaddr, &q, !ret);
- /*
- * If fixup_pi_owner() returned an error, propagate that. If it acquired
- * the lock, clear our -ETIMEDOUT or -EINTR.
- */
- if (res)
- ret = (res < 0) ? res : 0;
-
- futex_unqueue_pi(&q);
- spin_unlock(q.lock_ptr);
- goto out;
+ /*
+ * Fixup the pi_state owner and possibly acquire the lock if we
+ * haven't already.
+ */
+ res = fixup_pi_owner(uaddr, &q, !ret);
+ /*
+ * If fixup_pi_owner() returned an error, propagate that. If it acquired
+ * the lock, clear our -ETIMEDOUT or -EINTR.
+ */
+ if (res)
+ ret = (res < 0) ? res : 0;
+
+ futex_unqueue_pi(&q);
+ spin_unlock(q.lock_ptr);
+ if (q.drop_hb_ref) {
+ CLASS(hb, hb)(&q.key);
+ /* Additional reference from futex_unlock_pi() */
+ futex_hash_put(hb);
+ }
+ goto out;
out_unlock_put_key:
- futex_q_unlock(hb);
+ futex_q_unlock(hb);
+ goto out;
+
+uaddr_faulted:
+ futex_q_unlock(hb);
+
+ ret = fault_in_user_writeable(uaddr);
+ if (ret)
+ goto out;
+
+ if (!(flags & FLAGS_SHARED))
+ goto retry_private;
+
+ goto retry;
+ }
out:
if (to) {
@@ -1091,18 +1122,6 @@ out:
destroy_hrtimer_on_stack(&to->timer);
}
return ret != -EINTR ? ret : -ERESTARTNOINTR;
-
-uaddr_faulted:
- futex_q_unlock(hb);
-
- ret = fault_in_user_writeable(uaddr);
- if (ret)
- goto out;
-
- if (!(flags & FLAGS_SHARED))
- goto retry_private;
-
- goto retry;
}
/*
@@ -1114,7 +1133,6 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
{
u32 curval, uval, vpid = task_pid_vnr(current);
union futex_key key = FUTEX_KEY_INIT;
- struct futex_hash_bucket *hb;
struct futex_q *top_waiter;
int ret;
@@ -1134,7 +1152,7 @@ retry:
if (ret)
return ret;
- hb = futex_hash(&key);
+ CLASS(hb, hb)(&key);
spin_lock(&hb->lock);
retry_hb:
@@ -1187,6 +1205,12 @@ retry_hb:
*/
rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
if (!rt_waiter) {
+ /*
+ * Acquire a reference for the leaving waiter to ensure
+ * valid futex_q::lock_ptr.
+ */
+ futex_hash_get(hb);
+ top_waiter->drop_hb_ref = true;
__futex_unqueue(top_waiter);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
goto retry_hb;
diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c
index b47bb764b352..c716a66f8692 100644
--- a/kernel/futex/requeue.c
+++ b/kernel/futex/requeue.c
@@ -87,6 +87,11 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
futex_hb_waiters_inc(hb2);
plist_add(&q->list, &hb2->chain);
q->lock_ptr = &hb2->lock;
+ /*
+ * hb1 and hb2 belong to the same futex_hash_bucket_private
+ * because if we managed get a reference on hb1 then it can't be
+ * replaced. Therefore we avoid put(hb1)+get(hb2) here.
+ */
}
q->key = *key2;
}
@@ -231,7 +236,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
WARN_ON(!q->rt_waiter);
q->rt_waiter = NULL;
-
+ /*
+ * Acquire a reference for the waiter to ensure valid
+ * futex_q::lock_ptr.
+ */
+ futex_hash_get(hb);
+ q->drop_hb_ref = true;
q->lock_ptr = &hb->lock;
/* Signal locked state to the waiter */
@@ -371,7 +381,6 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
int task_count = 0, ret;
struct futex_pi_state *pi_state = NULL;
- struct futex_hash_bucket *hb1, *hb2;
struct futex_q *this, *next;
DEFINE_WAKE_Q(wake_q);
@@ -443,240 +452,242 @@ retry:
if (requeue_pi && futex_match(&key1, &key2))
return -EINVAL;
- hb1 = futex_hash(&key1);
- hb2 = futex_hash(&key2);
-
retry_private:
- futex_hb_waiters_inc(hb2);
- double_lock_hb(hb1, hb2);
+ if (1) {
+ CLASS(hb, hb1)(&key1);
+ CLASS(hb, hb2)(&key2);
- if (likely(cmpval != NULL)) {
- u32 curval;
+ futex_hb_waiters_inc(hb2);
+ double_lock_hb(hb1, hb2);
- ret = futex_get_value_locked(&curval, uaddr1);
+ if (likely(cmpval != NULL)) {
+ u32 curval;
- if (unlikely(ret)) {
- double_unlock_hb(hb1, hb2);
- futex_hb_waiters_dec(hb2);
+ ret = futex_get_value_locked(&curval, uaddr1);
- ret = get_user(curval, uaddr1);
- if (ret)
- return ret;
+ if (unlikely(ret)) {
+ futex_hb_waiters_dec(hb2);
+ double_unlock_hb(hb1, hb2);
- if (!(flags1 & FLAGS_SHARED))
- goto retry_private;
+ ret = get_user(curval, uaddr1);
+ if (ret)
+ return ret;
- goto retry;
- }
- if (curval != *cmpval) {
- ret = -EAGAIN;
- goto out_unlock;
- }
- }
+ if (!(flags1 & FLAGS_SHARED))
+ goto retry_private;
- if (requeue_pi) {
- struct task_struct *exiting = NULL;
+ goto retry;
+ }
+ if (curval != *cmpval) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+ }
- /*
- * Attempt to acquire uaddr2 and wake the top waiter. If we
- * intend to requeue waiters, force setting the FUTEX_WAITERS
- * bit. We force this here where we are able to easily handle
- * faults rather in the requeue loop below.
- *
- * Updates topwaiter::requeue_state if a top waiter exists.
- */
- ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
- &key2, &pi_state,
- &exiting, nr_requeue);
+ if (requeue_pi) {
+ struct task_struct *exiting = NULL;
- /*
- * At this point the top_waiter has either taken uaddr2 or
- * is waiting on it. In both cases pi_state has been
- * established and an initial refcount on it. In case of an
- * error there's nothing.
- *
- * The top waiter's requeue_state is up to date:
- *
- * - If the lock was acquired atomically (ret == 1), then
- * the state is Q_REQUEUE_PI_LOCKED.
- *
- * The top waiter has been dequeued and woken up and can
- * return to user space immediately. The kernel/user
- * space state is consistent. In case that there must be
- * more waiters requeued the WAITERS bit in the user
- * space futex is set so the top waiter task has to go
- * into the syscall slowpath to unlock the futex. This
- * will block until this requeue operation has been
- * completed and the hash bucket locks have been
- * dropped.
- *
- * - If the trylock failed with an error (ret < 0) then
- * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
- * happened", or Q_REQUEUE_PI_IGNORE when there was an
- * interleaved early wakeup.
- *
- * - If the trylock did not succeed (ret == 0) then the
- * state is either Q_REQUEUE_PI_IN_PROGRESS or
- * Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
- * This will be cleaned up in the loop below, which
- * cannot fail because futex_proxy_trylock_atomic() did
- * the same sanity checks for requeue_pi as the loop
- * below does.
- */
- switch (ret) {
- case 0:
- /* We hold a reference on the pi state. */
- break;
-
- case 1:
/*
- * futex_proxy_trylock_atomic() acquired the user space
- * futex. Adjust task_count.
+ * Attempt to acquire uaddr2 and wake the top waiter. If we
+ * intend to requeue waiters, force setting the FUTEX_WAITERS
+ * bit. We force this here where we are able to easily handle
+ * faults rather in the requeue loop below.
+ *
+ * Updates topwaiter::requeue_state if a top waiter exists.
*/
- task_count++;
- ret = 0;
- break;
+ ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
+ &key2, &pi_state,
+ &exiting, nr_requeue);
- /*
- * If the above failed, then pi_state is NULL and
- * waiter::requeue_state is correct.
- */
- case -EFAULT:
- double_unlock_hb(hb1, hb2);
- futex_hb_waiters_dec(hb2);
- ret = fault_in_user_writeable(uaddr2);
- if (!ret)
- goto retry;
- return ret;
- case -EBUSY:
- case -EAGAIN:
- /*
- * Two reasons for this:
- * - EBUSY: Owner is exiting and we just wait for the
- * exit to complete.
- * - EAGAIN: The user space value changed.
- */
- double_unlock_hb(hb1, hb2);
- futex_hb_waiters_dec(hb2);
/*
- * Handle the case where the owner is in the middle of
- * exiting. Wait for the exit to complete otherwise
- * this task might loop forever, aka. live lock.
+ * At this point the top_waiter has either taken uaddr2 or
+ * is waiting on it. In both cases pi_state has been
+ * established and an initial refcount on it. In case of an
+ * error there's nothing.
+ *
+ * The top waiter's requeue_state is up to date:
+ *
+ * - If the lock was acquired atomically (ret == 1), then
+ * the state is Q_REQUEUE_PI_LOCKED.
+ *
+ * The top waiter has been dequeued and woken up and can
+ * return to user space immediately. The kernel/user
+ * space state is consistent. In case that there must be
+ * more waiters requeued the WAITERS bit in the user
+ * space futex is set so the top waiter task has to go
+ * into the syscall slowpath to unlock the futex. This
+ * will block until this requeue operation has been
+ * completed and the hash bucket locks have been
+ * dropped.
+ *
+ * - If the trylock failed with an error (ret < 0) then
+ * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
+ * happened", or Q_REQUEUE_PI_IGNORE when there was an
+ * interleaved early wakeup.
+ *
+ * - If the trylock did not succeed (ret == 0) then the
+ * state is either Q_REQUEUE_PI_IN_PROGRESS or
+ * Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
+ * This will be cleaned up in the loop below, which
+ * cannot fail because futex_proxy_trylock_atomic() did
+ * the same sanity checks for requeue_pi as the loop
+ * below does.
*/
- wait_for_owner_exiting(ret, exiting);
- cond_resched();
- goto retry;
- default:
- goto out_unlock;
- }
- }
-
- plist_for_each_entry_safe(this, next, &hb1->chain, list) {
- if (task_count - nr_wake >= nr_requeue)
- break;
-
- if (!futex_match(&this->key, &key1))
- continue;
-
- /*
- * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
- * be paired with each other and no other futex ops.
- *
- * We should never be requeueing a futex_q with a pi_state,
- * which is awaiting a futex_unlock_pi().
- */
- if ((requeue_pi && !this->rt_waiter) ||
- (!requeue_pi && this->rt_waiter) ||
- this->pi_state) {
- ret = -EINVAL;
- break;
+ switch (ret) {
+ case 0:
+ /* We hold a reference on the pi state. */
+ break;
+
+ case 1:
+ /*
+ * futex_proxy_trylock_atomic() acquired the user space
+ * futex. Adjust task_count.
+ */
+ task_count++;
+ ret = 0;
+ break;
+
+ /*
+ * If the above failed, then pi_state is NULL and
+ * waiter::requeue_state is correct.
+ */
+ case -EFAULT:
+ futex_hb_waiters_dec(hb2);
+ double_unlock_hb(hb1, hb2);
+ ret = fault_in_user_writeable(uaddr2);
+ if (!ret)
+ goto retry;
+ return ret;
+ case -EBUSY:
+ case -EAGAIN:
+ /*
+ * Two reasons for this:
+ * - EBUSY: Owner is exiting and we just wait for the
+ * exit to complete.
+ * - EAGAIN: The user space value changed.
+ */
+ futex_hb_waiters_dec(hb2);
+ double_unlock_hb(hb1, hb2);
+ /*
+ * Handle the case where the owner is in the middle of
+ * exiting. Wait for the exit to complete otherwise
+ * this task might loop forever, aka. live lock.
+ */
+ wait_for_owner_exiting(ret, exiting);
+ cond_resched();
+ goto retry;
+ default:
+ goto out_unlock;
+ }
}
- /* Plain futexes just wake or requeue and are done */
- if (!requeue_pi) {
- if (++task_count <= nr_wake)
- this->wake(&wake_q, this);
- else
- requeue_futex(this, hb1, hb2, &key2);
- continue;
- }
+ plist_for_each_entry_safe(this, next, &hb1->chain, list) {
+ if (task_count - nr_wake >= nr_requeue)
+ break;
- /* Ensure we requeue to the expected futex for requeue_pi. */
- if (!futex_match(this->requeue_pi_key, &key2)) {
- ret = -EINVAL;
- break;
- }
+ if (!futex_match(&this->key, &key1))
+ continue;
- /*
- * Requeue nr_requeue waiters and possibly one more in the case
- * of requeue_pi if we couldn't acquire the lock atomically.
- *
- * Prepare the waiter to take the rt_mutex. Take a refcount
- * on the pi_state and store the pointer in the futex_q
- * object of the waiter.
- */
- get_pi_state(pi_state);
-
- /* Don't requeue when the waiter is already on the way out. */
- if (!futex_requeue_pi_prepare(this, pi_state)) {
/*
- * Early woken waiter signaled that it is on the
- * way out. Drop the pi_state reference and try the
- * next waiter. @this->pi_state is still NULL.
+ * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
+ * be paired with each other and no other futex ops.
+ *
+ * We should never be requeueing a futex_q with a pi_state,
+ * which is awaiting a futex_unlock_pi().
*/
- put_pi_state(pi_state);
- continue;
- }
+ if ((requeue_pi && !this->rt_waiter) ||
+ (!requeue_pi && this->rt_waiter) ||
+ this->pi_state) {
+ ret = -EINVAL;
+ break;
+ }
+
+ /* Plain futexes just wake or requeue and are done */
+ if (!requeue_pi) {
+ if (++task_count <= nr_wake)
+ this->wake(&wake_q, this);
+ else
+ requeue_futex(this, hb1, hb2, &key2);
+ continue;
+ }
+
+ /* Ensure we requeue to the expected futex for requeue_pi. */
+ if (!futex_match(this->requeue_pi_key, &key2)) {
+ ret = -EINVAL;
+ break;
+ }
- ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
- this->rt_waiter,
- this->task);
-
- if (ret == 1) {
- /*
- * We got the lock. We do neither drop the refcount
- * on pi_state nor clear this->pi_state because the
- * waiter needs the pi_state for cleaning up the
- * user space value. It will drop the refcount
- * after doing so. this::requeue_state is updated
- * in the wakeup as well.
- */
- requeue_pi_wake_futex(this, &key2, hb2);
- task_count++;
- } else if (!ret) {
- /* Waiter is queued, move it to hb2 */
- requeue_futex(this, hb1, hb2, &key2);
- futex_requeue_pi_complete(this, 0);
- task_count++;
- } else {
/*
- * rt_mutex_start_proxy_lock() detected a potential
- * deadlock when we tried to queue that waiter.
- * Drop the pi_state reference which we took above
- * and remove the pointer to the state from the
- * waiters futex_q object.
+ * Requeue nr_requeue waiters and possibly one more in the case
+ * of requeue_pi if we couldn't acquire the lock atomically.
+ *
+ * Prepare the waiter to take the rt_mutex. Take a refcount
+ * on the pi_state and store the pointer in the futex_q
+ * object of the waiter.
*/
- this->pi_state = NULL;
- put_pi_state(pi_state);
- futex_requeue_pi_complete(this, ret);
- /*
- * We stop queueing more waiters and let user space
- * deal with the mess.
- */
- break;
+ get_pi_state(pi_state);
+
+ /* Don't requeue when the waiter is already on the way out. */
+ if (!futex_requeue_pi_prepare(this, pi_state)) {
+ /*
+ * Early woken waiter signaled that it is on the
+ * way out. Drop the pi_state reference and try the
+ * next waiter. @this->pi_state is still NULL.
+ */
+ put_pi_state(pi_state);
+ continue;
+ }
+
+ ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
+ this->rt_waiter,
+ this->task);
+
+ if (ret == 1) {
+ /*
+ * We got the lock. We do neither drop the refcount
+ * on pi_state nor clear this->pi_state because the
+ * waiter needs the pi_state for cleaning up the
+ * user space value. It will drop the refcount
+ * after doing so. this::requeue_state is updated
+ * in the wakeup as well.
+ */
+ requeue_pi_wake_futex(this, &key2, hb2);
+ task_count++;
+ } else if (!ret) {
+ /* Waiter is queued, move it to hb2 */
+ requeue_futex(this, hb1, hb2, &key2);
+ futex_requeue_pi_complete(this, 0);
+ task_count++;
+ } else {
+ /*
+ * rt_mutex_start_proxy_lock() detected a potential
+ * deadlock when we tried to queue that waiter.
+ * Drop the pi_state reference which we took above
+ * and remove the pointer to the state from the
+ * waiters futex_q object.
+ */
+ this->pi_state = NULL;
+ put_pi_state(pi_state);
+ futex_requeue_pi_complete(this, ret);
+ /*
+ * We stop queueing more waiters and let user space
+ * deal with the mess.
+ */
+ break;
+ }
}
- }
- /*
- * We took an extra initial reference to the pi_state in
- * futex_proxy_trylock_atomic(). We need to drop it here again.
- */
- put_pi_state(pi_state);
+ /*
+ * We took an extra initial reference to the pi_state in
+ * futex_proxy_trylock_atomic(). We need to drop it here again.
+ */
+ put_pi_state(pi_state);
out_unlock:
- double_unlock_hb(hb1, hb2);
+ futex_hb_waiters_dec(hb2);
+ double_unlock_hb(hb1, hb2);
+ }
wake_up_q(&wake_q);
- futex_hb_waiters_dec(hb2);
return ret ? ret : task_count;
}
@@ -769,7 +780,6 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
{
struct hrtimer_sleeper timeout, *to;
struct rt_mutex_waiter rt_waiter;
- struct futex_hash_bucket *hb;
union futex_key key2 = FUTEX_KEY_INIT;
struct futex_q q = futex_q_init;
struct rt_mutex_base *pi_mutex;
@@ -805,35 +815,28 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* Prepare to wait on uaddr. On success, it holds hb->lock and q
* is initialized.
*/
- ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
+ ret = futex_wait_setup(uaddr, val, flags, &q, &key2, current);
if (ret)
goto out;
- /*
- * The check above which compares uaddrs is not sufficient for
- * shared futexes. We need to compare the keys:
- */
- if (futex_match(&q.key, &key2)) {
- futex_q_unlock(hb);
- ret = -EINVAL;
- goto out;
- }
-
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
- futex_wait_queue(hb, &q, to);
+ futex_do_wait(&q, to);
switch (futex_requeue_pi_wakeup_sync(&q)) {
case Q_REQUEUE_PI_IGNORE:
- /* The waiter is still on uaddr1 */
- spin_lock(&hb->lock);
- ret = handle_early_requeue_pi_wakeup(hb, &q, to);
- spin_unlock(&hb->lock);
+ {
+ CLASS(hb, hb)(&q.key);
+ /* The waiter is still on uaddr1 */
+ spin_lock(&hb->lock);
+ ret = handle_early_requeue_pi_wakeup(hb, &q, to);
+ spin_unlock(&hb->lock);
+ }
break;
case Q_REQUEUE_PI_LOCKED:
/* The requeue acquired the lock */
if (q.pi_state && (q.pi_state->owner != current)) {
- spin_lock(q.lock_ptr);
+ futex_q_lockptr_lock(&q);
ret = fixup_pi_owner(uaddr2, &q, true);
/*
* Drop the reference to the pi state which the
@@ -860,7 +863,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
ret = 0;
- spin_lock(q.lock_ptr);
+ futex_q_lockptr_lock(&q);
debug_rt_mutex_free_waiter(&rt_waiter);
/*
* Fixup the pi_state owner and possibly acquire the lock if we
@@ -892,6 +895,11 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
default:
BUG();
}
+ if (q.drop_hb_ref) {
+ CLASS(hb, hb)(&q.key);
+ /* Additional reference from requeue_pi_wake_futex() */
+ futex_hash_put(hb);
+ }
out:
if (to) {
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index 25877d4f2f8f..e2bbe5509ec2 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -154,7 +154,6 @@ void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
*/
int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
{
- struct futex_hash_bucket *hb;
struct futex_q *this, *next;
union futex_key key = FUTEX_KEY_INIT;
DEFINE_WAKE_Q(wake_q);
@@ -170,7 +169,7 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
if ((flags & FLAGS_STRICT) && !nr_wake)
return 0;
- hb = futex_hash(&key);
+ CLASS(hb, hb)(&key);
/* Make sure we really have tasks to wakeup */
if (!futex_hb_waiters_pending(hb))
@@ -253,7 +252,6 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
int nr_wake, int nr_wake2, int op)
{
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
- struct futex_hash_bucket *hb1, *hb2;
struct futex_q *this, *next;
int ret, op_ret;
DEFINE_WAKE_Q(wake_q);
@@ -266,67 +264,69 @@ retry:
if (unlikely(ret != 0))
return ret;
- hb1 = futex_hash(&key1);
- hb2 = futex_hash(&key2);
-
retry_private:
- double_lock_hb(hb1, hb2);
- op_ret = futex_atomic_op_inuser(op, uaddr2);
- if (unlikely(op_ret < 0)) {
- double_unlock_hb(hb1, hb2);
-
- if (!IS_ENABLED(CONFIG_MMU) ||
- unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
- /*
- * we don't get EFAULT from MMU faults if we don't have
- * an MMU, but we might get them from range checking
- */
- ret = op_ret;
- return ret;
- }
-
- if (op_ret == -EFAULT) {
- ret = fault_in_user_writeable(uaddr2);
- if (ret)
+ if (1) {
+ CLASS(hb, hb1)(&key1);
+ CLASS(hb, hb2)(&key2);
+
+ double_lock_hb(hb1, hb2);
+ op_ret = futex_atomic_op_inuser(op, uaddr2);
+ if (unlikely(op_ret < 0)) {
+ double_unlock_hb(hb1, hb2);
+
+ if (!IS_ENABLED(CONFIG_MMU) ||
+ unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
+ /*
+ * we don't get EFAULT from MMU faults if we don't have
+ * an MMU, but we might get them from range checking
+ */
+ ret = op_ret;
return ret;
- }
-
- cond_resched();
- if (!(flags & FLAGS_SHARED))
- goto retry_private;
- goto retry;
- }
+ }
- plist_for_each_entry_safe(this, next, &hb1->chain, list) {
- if (futex_match (&this->key, &key1)) {
- if (this->pi_state || this->rt_waiter) {
- ret = -EINVAL;
- goto out_unlock;
+ if (op_ret == -EFAULT) {
+ ret = fault_in_user_writeable(uaddr2);
+ if (ret)
+ return ret;
}
- this->wake(&wake_q, this);
- if (++ret >= nr_wake)
- break;
+
+ cond_resched();
+ if (!(flags & FLAGS_SHARED))
+ goto retry_private;
+ goto retry;
}
- }
- if (op_ret > 0) {
- op_ret = 0;
- plist_for_each_entry_safe(this, next, &hb2->chain, list) {
- if (futex_match (&this->key, &key2)) {
+ plist_for_each_entry_safe(this, next, &hb1->chain, list) {
+ if (futex_match(&this->key, &key1)) {
if (this->pi_state || this->rt_waiter) {
ret = -EINVAL;
goto out_unlock;
}
this->wake(&wake_q, this);
- if (++op_ret >= nr_wake2)
+ if (++ret >= nr_wake)
break;
}
}
- ret += op_ret;
- }
+
+ if (op_ret > 0) {
+ op_ret = 0;
+ plist_for_each_entry_safe(this, next, &hb2->chain, list) {
+ if (futex_match(&this->key, &key2)) {
+ if (this->pi_state || this->rt_waiter) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ this->wake(&wake_q, this);
+ if (++op_ret >= nr_wake2)
+ break;
+ }
+ }
+ ret += op_ret;
+ }
out_unlock:
- double_unlock_hb(hb1, hb2);
+ double_unlock_hb(hb1, hb2);
+ }
wake_up_q(&wake_q);
return ret;
}
@@ -334,23 +334,12 @@ out_unlock:
static long futex_wait_restart(struct restart_block *restart);
/**
- * futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal
- * @hb: the futex hash bucket, must be locked by the caller
+ * futex_do_wait() - wait for wakeup, timeout, or signal
* @q: the futex_q to queue up on
* @timeout: the prepared hrtimer_sleeper, or null for no timeout
*/
-void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
- struct hrtimer_sleeper *timeout)
+void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout)
{
- /*
- * The task state is guaranteed to be set before another task can
- * wake it. set_current_state() is implemented using smp_store_mb() and
- * futex_queue() calls spin_unlock() upon completion, both serializing
- * access to the hash list and forcing another memory barrier.
- */
- set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
- futex_queue(q, hb, current);
-
/* Arm the timer */
if (timeout)
hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
@@ -412,12 +401,17 @@ int futex_unqueue_multiple(struct futex_vector *v, int count)
*/
int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
{
- struct futex_hash_bucket *hb;
bool retry = false;
int ret, i;
u32 uval;
/*
+ * Make sure to have a reference on the private_hash such that we
+ * don't block on rehash after changing the task state below.
+ */
+ guard(private_hash)();
+
+ /*
* Enqueuing multiple futexes is tricky, because we need to enqueue
* each futex on the list before dealing with the next one to avoid
* deadlocking on the hash bucket. But, before enqueuing, we need to
@@ -451,20 +445,24 @@ retry:
struct futex_q *q = &vs[i].q;
u32 val = vs[i].w.val;
- hb = futex_q_lock(q);
- ret = futex_get_value_locked(&uval, uaddr);
+ if (1) {
+ CLASS(hb, hb)(&q->key);
- if (!ret && uval == val) {
- /*
- * The bucket lock can't be held while dealing with the
- * next futex. Queue each futex at this moment so hb can
- * be unlocked.
- */
- futex_queue(q, hb, current);
- continue;
- }
+ futex_q_lock(q, hb);
+ ret = futex_get_value_locked(&uval, uaddr);
+
+ if (!ret && uval == val) {
+ /*
+ * The bucket lock can't be held while dealing with the
+ * next futex. Queue each futex at this moment so hb can
+ * be unlocked.
+ */
+ futex_queue(q, hb, current);
+ continue;
+ }
- futex_q_unlock(hb);
+ futex_q_unlock(hb);
+ }
__set_current_state(TASK_RUNNING);
/*
@@ -578,7 +576,8 @@ int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
* @val: the expected value
* @flags: futex flags (FLAGS_SHARED, etc.)
* @q: the associated futex_q
- * @hb: storage for hash_bucket pointer to be returned to caller
+ * @key2: the second futex_key if used for requeue PI
+ * @task: Task queueing this futex
*
* Setup the futex_q and locate the hash_bucket. Get the futex value and
* compare it with the expected value. Handle atomic faults internally.
@@ -586,10 +585,12 @@ int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
*
* Return:
* - 0 - uaddr contains val and hb has been locked;
- * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
+ * - <0 - On error and the hb is unlocked. A possible reason: the uaddr can not
+ * be read, does not contain the expected value or is not properly aligned.
*/
int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- struct futex_q *q, struct futex_hash_bucket **hb)
+ struct futex_q *q, union futex_key *key2,
+ struct task_struct *task)
{
u32 uval;
int ret;
@@ -618,26 +619,45 @@ retry:
return ret;
retry_private:
- *hb = futex_q_lock(q);
+ if (1) {
+ CLASS(hb, hb)(&q->key);
- ret = futex_get_value_locked(&uval, uaddr);
+ futex_q_lock(q, hb);
- if (ret) {
- futex_q_unlock(*hb);
+ ret = futex_get_value_locked(&uval, uaddr);
- ret = get_user(uval, uaddr);
- if (ret)
- return ret;
+ if (ret) {
+ futex_q_unlock(hb);
- if (!(flags & FLAGS_SHARED))
- goto retry_private;
+ ret = get_user(uval, uaddr);
+ if (ret)
+ return ret;
- goto retry;
- }
+ if (!(flags & FLAGS_SHARED))
+ goto retry_private;
+
+ goto retry;
+ }
- if (uval != val) {
- futex_q_unlock(*hb);
- ret = -EWOULDBLOCK;
+ if (uval != val) {
+ futex_q_unlock(hb);
+ return -EWOULDBLOCK;
+ }
+
+ if (key2 && futex_match(&q->key, key2)) {
+ futex_q_unlock(hb);
+ return -EINVAL;
+ }
+
+ /*
+ * The task state is guaranteed to be set before another task can
+ * wake it. set_current_state() is implemented using smp_store_mb() and
+ * futex_queue() calls spin_unlock() upon completion, both serializing
+ * access to the hash list and forcing another memory barrier.
+ */
+ if (task == current)
+ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
+ futex_queue(q, hb, task);
}
return ret;
@@ -647,7 +667,6 @@ int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
struct hrtimer_sleeper *to, u32 bitset)
{
struct futex_q q = futex_q_init;
- struct futex_hash_bucket *hb;
int ret;
if (!bitset)
@@ -660,12 +679,12 @@ retry:
* Prepare to wait on uaddr. On success, it holds hb->lock and q
* is initialized.
*/
- ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
+ ret = futex_wait_setup(uaddr, val, flags, &q, NULL, current);
if (ret)
return ret;
/* futex_queue and wait for wakeup, timeout, or a signal. */
- futex_wait_queue(hb, &q, to);
+ futex_do_wait(&q, to);
/* If we were woken (and unqueued), we succeeded, whatever. */
if (!futex_unqueue(&q))
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index ae60cae24e9a..d0af8a8b3ae6 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -43,18 +43,16 @@ unsigned long probe_irq_on(void)
* flush such a longstanding irq before considering it as spurious.
*/
for_each_irq_desc_reverse(i, desc) {
- raw_spin_lock_irq(&desc->lock);
+ guard(raw_spinlock_irq)(&desc->lock);
if (!desc->action && irq_settings_can_probe(desc)) {
/*
* Some chips need to know about probing in
* progress:
*/
if (desc->irq_data.chip->irq_set_type)
- desc->irq_data.chip->irq_set_type(&desc->irq_data,
- IRQ_TYPE_PROBE);
+ desc->irq_data.chip->irq_set_type(&desc->irq_data, IRQ_TYPE_PROBE);
irq_activate_and_startup(desc, IRQ_NORESEND);
}
- raw_spin_unlock_irq(&desc->lock);
}
/* Wait for longstanding interrupts to trigger. */
@@ -66,13 +64,12 @@ unsigned long probe_irq_on(void)
* happened in the previous stage, it may have masked itself)
*/
for_each_irq_desc_reverse(i, desc) {
- raw_spin_lock_irq(&desc->lock);
+ guard(raw_spinlock_irq)(&desc->lock);
if (!desc->action && irq_settings_can_probe(desc)) {
desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
if (irq_activate_and_startup(desc, IRQ_NORESEND))
desc->istate |= IRQS_PENDING;
}
- raw_spin_unlock_irq(&desc->lock);
}
/*
@@ -84,18 +81,16 @@ unsigned long probe_irq_on(void)
* Now filter out any obviously spurious interrupts
*/
for_each_irq_desc(i, desc) {
- raw_spin_lock_irq(&desc->lock);
-
+ guard(raw_spinlock_irq)(&desc->lock);
if (desc->istate & IRQS_AUTODETECT) {
/* It triggered already - consider it spurious. */
if (!(desc->istate & IRQS_WAITING)) {
desc->istate &= ~IRQS_AUTODETECT;
irq_shutdown_and_deactivate(desc);
- } else
- if (i < 32)
- mask |= 1 << i;
+ } else if (i < 32) {
+ mask |= 1 << i;
+ }
}
- raw_spin_unlock_irq(&desc->lock);
}
return mask;
@@ -121,7 +116,7 @@ unsigned int probe_irq_mask(unsigned long val)
int i;
for_each_irq_desc(i, desc) {
- raw_spin_lock_irq(&desc->lock);
+ guard(raw_spinlock_irq)(&desc->lock);
if (desc->istate & IRQS_AUTODETECT) {
if (i < 16 && !(desc->istate & IRQS_WAITING))
mask |= 1 << i;
@@ -129,7 +124,6 @@ unsigned int probe_irq_mask(unsigned long val)
desc->istate &= ~IRQS_AUTODETECT;
irq_shutdown_and_deactivate(desc);
}
- raw_spin_unlock_irq(&desc->lock);
}
mutex_unlock(&probing_active);
@@ -160,8 +154,7 @@ int probe_irq_off(unsigned long val)
struct irq_desc *desc;
for_each_irq_desc(i, desc) {
- raw_spin_lock_irq(&desc->lock);
-
+ guard(raw_spinlock_irq)(&desc->lock);
if (desc->istate & IRQS_AUTODETECT) {
if (!(desc->istate & IRQS_WAITING)) {
if (!nr_of_irqs)
@@ -171,7 +164,6 @@ int probe_irq_off(unsigned long val)
desc->istate &= ~IRQS_AUTODETECT;
irq_shutdown_and_deactivate(desc);
}
- raw_spin_unlock_irq(&desc->lock);
}
mutex_unlock(&probing_active);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 36cf1b09cc84..b0e0a7332993 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -34,98 +34,80 @@ struct irqaction chained_action = {
};
/**
- * irq_set_chip - set the irq chip for an irq
- * @irq: irq number
- * @chip: pointer to irq chip description structure
+ * irq_set_chip - set the irq chip for an irq
+ * @irq: irq number
+ * @chip: pointer to irq chip description structure
*/
int irq_set_chip(unsigned int irq, const struct irq_chip *chip)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
+ int ret = -EINVAL;
- if (!desc)
- return -EINVAL;
-
- desc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip);
- irq_put_desc_unlock(desc, flags);
- /*
- * For !CONFIG_SPARSE_IRQ make the irq show up in
- * allocated_irqs.
- */
- irq_mark_irq(irq);
- return 0;
+ scoped_irqdesc_get_and_lock(irq, 0) {
+ scoped_irqdesc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip);
+ ret = 0;
+ }
+ /* For !CONFIG_SPARSE_IRQ make the irq show up in allocated_irqs. */
+ if (!ret)
+ irq_mark_irq(irq);
+ return ret;
}
EXPORT_SYMBOL(irq_set_chip);
/**
- * irq_set_irq_type - set the irq trigger type for an irq
- * @irq: irq number
- * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
+ * irq_set_irq_type - set the irq trigger type for an irq
+ * @irq: irq number
+ * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
*/
int irq_set_irq_type(unsigned int irq, unsigned int type)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
- int ret = 0;
-
- if (!desc)
- return -EINVAL;
-
- ret = __irq_set_trigger(desc, type);
- irq_put_desc_busunlock(desc, flags);
- return ret;
+ scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL)
+ return __irq_set_trigger(scoped_irqdesc, type);
+ return -EINVAL;
}
EXPORT_SYMBOL(irq_set_irq_type);
/**
- * irq_set_handler_data - set irq handler data for an irq
- * @irq: Interrupt number
- * @data: Pointer to interrupt specific data
+ * irq_set_handler_data - set irq handler data for an irq
+ * @irq: Interrupt number
+ * @data: Pointer to interrupt specific data
*
- * Set the hardware irq controller data for an irq
+ * Set the hardware irq controller data for an irq
*/
int irq_set_handler_data(unsigned int irq, void *data)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
-
- if (!desc)
- return -EINVAL;
- desc->irq_common_data.handler_data = data;
- irq_put_desc_unlock(desc, flags);
- return 0;
+ scoped_irqdesc_get_and_lock(irq, 0) {
+ scoped_irqdesc->irq_common_data.handler_data = data;
+ return 0;
+ }
+ return -EINVAL;
}
EXPORT_SYMBOL(irq_set_handler_data);
/**
- * irq_set_msi_desc_off - set MSI descriptor data for an irq at offset
- * @irq_base: Interrupt number base
- * @irq_offset: Interrupt number offset
- * @entry: Pointer to MSI descriptor data
+ * irq_set_msi_desc_off - set MSI descriptor data for an irq at offset
+ * @irq_base: Interrupt number base
+ * @irq_offset: Interrupt number offset
+ * @entry: Pointer to MSI descriptor data
*
- * Set the MSI descriptor entry for an irq at offset
+ * Set the MSI descriptor entry for an irq at offset
*/
-int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
- struct msi_desc *entry)
-{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
-
- if (!desc)
- return -EINVAL;
- desc->irq_common_data.msi_desc = entry;
- if (entry && !irq_offset)
- entry->irq = irq_base;
- irq_put_desc_unlock(desc, flags);
- return 0;
+int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, struct msi_desc *entry)
+{
+ scoped_irqdesc_get_and_lock(irq_base + irq_offset, IRQ_GET_DESC_CHECK_GLOBAL) {
+ scoped_irqdesc->irq_common_data.msi_desc = entry;
+ if (entry && !irq_offset)
+ entry->irq = irq_base;
+ return 0;
+ }
+ return -EINVAL;
}
/**
- * irq_set_msi_desc - set MSI descriptor data for an irq
- * @irq: Interrupt number
- * @entry: Pointer to MSI descriptor data
+ * irq_set_msi_desc - set MSI descriptor data for an irq
+ * @irq: Interrupt number
+ * @entry: Pointer to MSI descriptor data
*
- * Set the MSI descriptor entry for an irq
+ * Set the MSI descriptor entry for an irq
*/
int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
{
@@ -133,22 +115,19 @@ int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
}
/**
- * irq_set_chip_data - set irq chip data for an irq
- * @irq: Interrupt number
- * @data: Pointer to chip specific data
+ * irq_set_chip_data - set irq chip data for an irq
+ * @irq: Interrupt number
+ * @data: Pointer to chip specific data
*
- * Set the hardware irq chip data for an irq
+ * Set the hardware irq chip data for an irq
*/
int irq_set_chip_data(unsigned int irq, void *data)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
-
- if (!desc)
- return -EINVAL;
- desc->irq_data.chip_data = data;
- irq_put_desc_unlock(desc, flags);
- return 0;
+ scoped_irqdesc_get_and_lock(irq, 0) {
+ scoped_irqdesc->irq_data.chip_data = data;
+ return 0;
+ }
+ return -EINVAL;
}
EXPORT_SYMBOL(irq_set_chip_data);
@@ -223,6 +202,19 @@ __irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff,
return IRQ_STARTUP_ABORT;
return IRQ_STARTUP_MANAGED;
}
+
+void irq_startup_managed(struct irq_desc *desc)
+{
+ /*
+ * Only start it up when the disable depth is 1, so that a disable,
+ * hotunplug, hotplug sequence does not end up enabling it during
+ * hotplug unconditionally.
+ */
+ desc->depth--;
+ if (!desc->depth)
+ irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+}
+
#else
static __always_inline int
__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff,
@@ -290,6 +282,7 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force)
ret = __irq_startup(desc);
break;
case IRQ_STARTUP_ABORT:
+ desc->depth = 1;
irqd_set_managed_shutdown(d);
return 0;
}
@@ -322,7 +315,13 @@ void irq_shutdown(struct irq_desc *desc)
{
if (irqd_is_started(&desc->irq_data)) {
clear_irq_resend(desc);
- desc->depth = 1;
+ /*
+ * Increment disable depth, so that a managed shutdown on
+ * CPU hotunplug preserves the actual disabled state when the
+ * CPU comes back online. See irq_startup_managed().
+ */
+ desc->depth++;
+
if (desc->irq_data.chip->irq_shutdown) {
desc->irq_data.chip->irq_shutdown(&desc->irq_data);
irq_state_set_disabled(desc);
@@ -450,48 +449,6 @@ void unmask_threaded_irq(struct irq_desc *desc)
unmask_irq(desc);
}
-/*
- * handle_nested_irq - Handle a nested irq from a irq thread
- * @irq: the interrupt number
- *
- * Handle interrupts which are nested into a threaded interrupt
- * handler. The handler function is called inside the calling
- * threads context.
- */
-void handle_nested_irq(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irqaction *action;
- irqreturn_t action_ret;
-
- might_sleep();
-
- raw_spin_lock_irq(&desc->lock);
-
- desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
-
- action = desc->action;
- if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
- desc->istate |= IRQS_PENDING;
- raw_spin_unlock_irq(&desc->lock);
- return;
- }
-
- kstat_incr_irqs_this_cpu(desc);
- atomic_inc(&desc->threads_active);
- raw_spin_unlock_irq(&desc->lock);
-
- action_ret = IRQ_NONE;
- for_each_action_of_desc(desc, action)
- action_ret |= action->thread_fn(action->irq, action->dev_id);
-
- if (!irq_settings_no_debug(desc))
- note_interrupt(desc, action_ret);
-
- wake_threads_waitq(desc);
-}
-EXPORT_SYMBOL_GPL(handle_nested_irq);
-
static bool irq_check_poll(struct irq_desc *desc)
{
if (!(desc->istate & IRQS_POLL_INPROGRESS))
@@ -499,7 +456,7 @@ static bool irq_check_poll(struct irq_desc *desc)
return irq_wait_for_poll(desc);
}
-static bool irq_may_run(struct irq_desc *desc)
+static bool irq_can_handle_pm(struct irq_desc *desc)
{
unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED;
@@ -524,77 +481,111 @@ static bool irq_may_run(struct irq_desc *desc)
return irq_check_poll(desc);
}
+static inline bool irq_can_handle_actions(struct irq_desc *desc)
+{
+ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
+
+ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
+ desc->istate |= IRQS_PENDING;
+ return false;
+ }
+ return true;
+}
+
+static inline bool irq_can_handle(struct irq_desc *desc)
+{
+ if (!irq_can_handle_pm(desc))
+ return false;
+
+ return irq_can_handle_actions(desc);
+}
+
/**
- * handle_simple_irq - Simple and software-decoded IRQs.
- * @desc: the interrupt description structure for this irq
- *
- * Simple interrupts are either sent from a demultiplexing interrupt
- * handler or come from hardware, where no interrupt hardware control
- * is necessary.
+ * handle_nested_irq - Handle a nested irq from a irq thread
+ * @irq: the interrupt number
*
- * Note: The caller is expected to handle the ack, clear, mask and
- * unmask issues if necessary.
+ * Handle interrupts which are nested into a threaded interrupt
+ * handler. The handler function is called inside the calling threads
+ * context.
*/
-void handle_simple_irq(struct irq_desc *desc)
+void handle_nested_irq(unsigned int irq)
{
- raw_spin_lock(&desc->lock);
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irqaction *action;
+ irqreturn_t action_ret;
- if (!irq_may_run(desc))
- goto out_unlock;
+ might_sleep();
- desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
+ scoped_guard(raw_spinlock_irq, &desc->lock) {
+ if (!irq_can_handle_actions(desc))
+ return;
- if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
- desc->istate |= IRQS_PENDING;
- goto out_unlock;
+ action = desc->action;
+ kstat_incr_irqs_this_cpu(desc);
+ atomic_inc(&desc->threads_active);
}
+ action_ret = IRQ_NONE;
+ for_each_action_of_desc(desc, action)
+ action_ret |= action->thread_fn(action->irq, action->dev_id);
+
+ if (!irq_settings_no_debug(desc))
+ note_interrupt(desc, action_ret);
+
+ wake_threads_waitq(desc);
+}
+EXPORT_SYMBOL_GPL(handle_nested_irq);
+
+/**
+ * handle_simple_irq - Simple and software-decoded IRQs.
+ * @desc: the interrupt description structure for this irq
+ *
+ * Simple interrupts are either sent from a demultiplexing interrupt
+ * handler or come from hardware, where no interrupt hardware control is
+ * necessary.
+ *
+ * Note: The caller is expected to handle the ack, clear, mask and unmask
+ * issues if necessary.
+ */
+void handle_simple_irq(struct irq_desc *desc)
+{
+ guard(raw_spinlock)(&desc->lock);
+
+ if (!irq_can_handle(desc))
+ return;
+
kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);
-
-out_unlock:
- raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_simple_irq);
/**
- * handle_untracked_irq - Simple and software-decoded IRQs.
- * @desc: the interrupt description structure for this irq
+ * handle_untracked_irq - Simple and software-decoded IRQs.
+ * @desc: the interrupt description structure for this irq
*
- * Untracked interrupts are sent from a demultiplexing interrupt
- * handler when the demultiplexer does not know which device it its
- * multiplexed irq domain generated the interrupt. IRQ's handled
- * through here are not subjected to stats tracking, randomness, or
- * spurious interrupt detection.
+ * Untracked interrupts are sent from a demultiplexing interrupt handler
+ * when the demultiplexer does not know which device it its multiplexed irq
+ * domain generated the interrupt. IRQ's handled through here are not
+ * subjected to stats tracking, randomness, or spurious interrupt
+ * detection.
*
- * Note: Like handle_simple_irq, the caller is expected to handle
- * the ack, clear, mask and unmask issues if necessary.
+ * Note: Like handle_simple_irq, the caller is expected to handle the ack,
+ * clear, mask and unmask issues if necessary.
*/
void handle_untracked_irq(struct irq_desc *desc)
{
- raw_spin_lock(&desc->lock);
-
- if (!irq_may_run(desc))
- goto out_unlock;
-
- desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
+ scoped_guard(raw_spinlock, &desc->lock) {
+ if (!irq_can_handle(desc))
+ return;
- if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
- desc->istate |= IRQS_PENDING;
- goto out_unlock;
+ desc->istate &= ~IRQS_PENDING;
+ irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
}
- desc->istate &= ~IRQS_PENDING;
- irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
- raw_spin_unlock(&desc->lock);
-
__handle_irq_event_percpu(desc);
- raw_spin_lock(&desc->lock);
- irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
-
-out_unlock:
- raw_spin_unlock(&desc->lock);
+ scoped_guard(raw_spinlock, &desc->lock)
+ irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
}
EXPORT_SYMBOL_GPL(handle_untracked_irq);
@@ -617,40 +608,26 @@ static void cond_unmask_irq(struct irq_desc *desc)
}
/**
- * handle_level_irq - Level type irq handler
- * @desc: the interrupt description structure for this irq
+ * handle_level_irq - Level type irq handler
+ * @desc: the interrupt description structure for this irq
*
- * Level type interrupts are active as long as the hardware line has
- * the active level. This may require to mask the interrupt and unmask
- * it after the associated handler has acknowledged the device, so the
- * interrupt line is back to inactive.
+ * Level type interrupts are active as long as the hardware line has the
+ * active level. This may require to mask the interrupt and unmask it after
+ * the associated handler has acknowledged the device, so the interrupt
+ * line is back to inactive.
*/
void handle_level_irq(struct irq_desc *desc)
{
- raw_spin_lock(&desc->lock);
+ guard(raw_spinlock)(&desc->lock);
mask_ack_irq(desc);
- if (!irq_may_run(desc))
- goto out_unlock;
-
- desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
-
- /*
- * If its disabled or no action available
- * keep it masked and get out of here
- */
- if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
- desc->istate |= IRQS_PENDING;
- goto out_unlock;
- }
+ if (!irq_can_handle(desc))
+ return;
kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);
cond_unmask_irq(desc);
-
-out_unlock:
- raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_level_irq);
@@ -675,42 +652,43 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
}
}
+static inline void cond_eoi_irq(struct irq_chip *chip, struct irq_data *data)
+{
+ if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
+ chip->irq_eoi(data);
+}
+
/**
- * handle_fasteoi_irq - irq handler for transparent controllers
- * @desc: the interrupt description structure for this irq
+ * handle_fasteoi_irq - irq handler for transparent controllers
+ * @desc: the interrupt description structure for this irq
*
- * Only a single callback will be issued to the chip: an ->eoi()
- * call when the interrupt has been serviced. This enables support
- * for modern forms of interrupt handlers, which handle the flow
- * details in hardware, transparently.
+ * Only a single callback will be issued to the chip: an ->eoi() call when
+ * the interrupt has been serviced. This enables support for modern forms
+ * of interrupt handlers, which handle the flow details in hardware,
+ * transparently.
*/
void handle_fasteoi_irq(struct irq_desc *desc)
{
struct irq_chip *chip = desc->irq_data.chip;
- raw_spin_lock(&desc->lock);
+ guard(raw_spinlock)(&desc->lock);
/*
* When an affinity change races with IRQ handling, the next interrupt
* can arrive on the new CPU before the original CPU has completed
* handling the previous one - it may need to be resent.
*/
- if (!irq_may_run(desc)) {
+ if (!irq_can_handle_pm(desc)) {
if (irqd_needs_resend_when_in_progress(&desc->irq_data))
desc->istate |= IRQS_PENDING;
- goto out;
+ cond_eoi_irq(chip, &desc->irq_data);
+ return;
}
- desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
-
- /*
- * If its disabled or no action available
- * then mask it and get out of here:
- */
- if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
- desc->istate |= IRQS_PENDING;
+ if (!irq_can_handle_actions(desc)) {
mask_irq(desc);
- goto out;
+ cond_eoi_irq(chip, &desc->irq_data);
+ return;
}
kstat_incr_irqs_this_cpu(desc);
@@ -726,13 +704,6 @@ void handle_fasteoi_irq(struct irq_desc *desc)
*/
if (unlikely(desc->istate & IRQS_PENDING))
check_irq_resend(desc, false);
-
- raw_spin_unlock(&desc->lock);
- return;
-out:
- if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
- chip->irq_eoi(&desc->irq_data);
- raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_fasteoi_irq);
@@ -770,40 +741,27 @@ void handle_fasteoi_nmi(struct irq_desc *desc)
EXPORT_SYMBOL_GPL(handle_fasteoi_nmi);
/**
- * handle_edge_irq - edge type IRQ handler
- * @desc: the interrupt description structure for this irq
+ * handle_edge_irq - edge type IRQ handler
+ * @desc: the interrupt description structure for this irq
*
- * Interrupt occurs on the falling and/or rising edge of a hardware
- * signal. The occurrence is latched into the irq controller hardware
- * and must be acked in order to be reenabled. After the ack another
- * interrupt can happen on the same source even before the first one
- * is handled by the associated event handler. If this happens it
- * might be necessary to disable (mask) the interrupt depending on the
- * controller hardware. This requires to reenable the interrupt inside
- * of the loop which handles the interrupts which have arrived while
- * the handler was running. If all pending interrupts are handled, the
- * loop is left.
+ * Interrupt occurs on the falling and/or rising edge of a hardware
+ * signal. The occurrence is latched into the irq controller hardware and
+ * must be acked in order to be reenabled. After the ack another interrupt
+ * can happen on the same source even before the first one is handled by
+ * the associated event handler. If this happens it might be necessary to
+ * disable (mask) the interrupt depending on the controller hardware. This
+ * requires to reenable the interrupt inside of the loop which handles the
+ * interrupts which have arrived while the handler was running. If all
+ * pending interrupts are handled, the loop is left.
*/
void handle_edge_irq(struct irq_desc *desc)
{
- raw_spin_lock(&desc->lock);
-
- desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
-
- if (!irq_may_run(desc)) {
- desc->istate |= IRQS_PENDING;
- mask_ack_irq(desc);
- goto out_unlock;
- }
+ guard(raw_spinlock)(&desc->lock);
- /*
- * If its disabled or no action available then mask it and get
- * out of here.
- */
- if (irqd_irq_disabled(&desc->irq_data) || !desc->action) {
+ if (!irq_can_handle(desc)) {
desc->istate |= IRQS_PENDING;
mask_ack_irq(desc);
- goto out_unlock;
+ return;
}
kstat_incr_irqs_this_cpu(desc);
@@ -814,7 +772,7 @@ void handle_edge_irq(struct irq_desc *desc)
do {
if (unlikely(!desc->action)) {
mask_irq(desc);
- goto out_unlock;
+ return;
}
/*
@@ -830,11 +788,7 @@ void handle_edge_irq(struct irq_desc *desc)
handle_irq_event(desc);
- } while ((desc->istate & IRQS_PENDING) &&
- !irqd_irq_disabled(&desc->irq_data));
-
-out_unlock:
- raw_spin_unlock(&desc->lock);
+ } while ((desc->istate & IRQS_PENDING) && !irqd_irq_disabled(&desc->irq_data));
}
EXPORT_SYMBOL(handle_edge_irq);
@@ -1007,35 +961,23 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
}
}
-void
-__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
- const char *name)
+void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+ const char *name)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0);
-
- if (!desc)
- return;
-
- __irq_do_set_handler(desc, handle, is_chained, name);
- irq_put_desc_busunlock(desc, flags);
+ scoped_irqdesc_get_and_lock(irq, 0)
+ __irq_do_set_handler(scoped_irqdesc, handle, is_chained, name);
}
EXPORT_SYMBOL_GPL(__irq_set_handler);
-void
-irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
- void *data)
+void irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
+ void *data)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0);
-
- if (!desc)
- return;
-
- desc->irq_common_data.handler_data = data;
- __irq_do_set_handler(desc, handle, 1, NULL);
+ scoped_irqdesc_get_and_buslock(irq, 0) {
+ struct irq_desc *desc = scoped_irqdesc;
- irq_put_desc_busunlock(desc, flags);
+ desc->irq_common_data.handler_data = data;
+ __irq_do_set_handler(desc, handle, 1, NULL);
+ }
}
EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data);
@@ -1050,38 +992,34 @@ EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name);
void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
{
- unsigned long flags, trigger, tmp;
- struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
-
- if (!desc)
- return;
-
- /*
- * Warn when a driver sets the no autoenable flag on an already
- * active interrupt.
- */
- WARN_ON_ONCE(!desc->depth && (set & _IRQ_NOAUTOEN));
-
- irq_settings_clr_and_set(desc, clr, set);
+ scoped_irqdesc_get_and_lock(irq, 0) {
+ struct irq_desc *desc = scoped_irqdesc;
+ unsigned long trigger, tmp;
+ /*
+ * Warn when a driver sets the no autoenable flag on an already
+ * active interrupt.
+ */
+ WARN_ON_ONCE(!desc->depth && (set & _IRQ_NOAUTOEN));
- trigger = irqd_get_trigger_type(&desc->irq_data);
+ irq_settings_clr_and_set(desc, clr, set);
- irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
- IRQD_TRIGGER_MASK | IRQD_LEVEL);
- if (irq_settings_has_no_balance_set(desc))
- irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
- if (irq_settings_is_per_cpu(desc))
- irqd_set(&desc->irq_data, IRQD_PER_CPU);
- if (irq_settings_is_level(desc))
- irqd_set(&desc->irq_data, IRQD_LEVEL);
+ trigger = irqd_get_trigger_type(&desc->irq_data);
- tmp = irq_settings_get_trigger_mask(desc);
- if (tmp != IRQ_TYPE_NONE)
- trigger = tmp;
+ irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
+ IRQD_TRIGGER_MASK | IRQD_LEVEL);
+ if (irq_settings_has_no_balance_set(desc))
+ irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
+ if (irq_settings_is_per_cpu(desc))
+ irqd_set(&desc->irq_data, IRQD_PER_CPU);
+ if (irq_settings_is_level(desc))
+ irqd_set(&desc->irq_data, IRQD_LEVEL);
- irqd_set(&desc->irq_data, trigger);
+ tmp = irq_settings_get_trigger_mask(desc);
+ if (tmp != IRQ_TYPE_NONE)
+ trigger = tmp;
- irq_put_desc_unlock(desc, flags);
+ irqd_set(&desc->irq_data, trigger);
+ }
}
EXPORT_SYMBOL_GPL(irq_modify_status);
@@ -1094,25 +1032,21 @@ EXPORT_SYMBOL_GPL(irq_modify_status);
*/
void irq_cpu_online(void)
{
- struct irq_desc *desc;
- struct irq_chip *chip;
- unsigned long flags;
unsigned int irq;
for_each_active_irq(irq) {
- desc = irq_to_desc(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_chip *chip;
+
if (!desc)
continue;
- raw_spin_lock_irqsave(&desc->lock, flags);
-
+ guard(raw_spinlock_irqsave)(&desc->lock);
chip = irq_data_get_irq_chip(&desc->irq_data);
if (chip && chip->irq_cpu_online &&
(!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
!irqd_irq_disabled(&desc->irq_data)))
chip->irq_cpu_online(&desc->irq_data);
-
- raw_spin_unlock_irqrestore(&desc->lock, flags);
}
}
@@ -1124,25 +1058,21 @@ void irq_cpu_online(void)
*/
void irq_cpu_offline(void)
{
- struct irq_desc *desc;
- struct irq_chip *chip;
- unsigned long flags;
unsigned int irq;
for_each_active_irq(irq) {
- desc = irq_to_desc(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_chip *chip;
+
if (!desc)
continue;
- raw_spin_lock_irqsave(&desc->lock, flags);
-
+ guard(raw_spinlock_irqsave)(&desc->lock);
chip = irq_data_get_irq_chip(&desc->irq_data);
if (chip && chip->irq_cpu_offline &&
(!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
!irqd_irq_disabled(&desc->irq_data)))
chip->irq_cpu_offline(&desc->irq_data);
-
- raw_spin_unlock_irqrestore(&desc->lock, flags);
}
}
#endif
@@ -1151,102 +1081,69 @@ void irq_cpu_offline(void)
#ifdef CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS
/**
- * handle_fasteoi_ack_irq - irq handler for edge hierarchy
- * stacked on transparent controllers
+ * handle_fasteoi_ack_irq - irq handler for edge hierarchy stacked on
+ * transparent controllers
*
- * @desc: the interrupt description structure for this irq
+ * @desc: the interrupt description structure for this irq
*
- * Like handle_fasteoi_irq(), but for use with hierarchy where
- * the irq_chip also needs to have its ->irq_ack() function
- * called.
+ * Like handle_fasteoi_irq(), but for use with hierarchy where the irq_chip
+ * also needs to have its ->irq_ack() function called.
*/
void handle_fasteoi_ack_irq(struct irq_desc *desc)
{
struct irq_chip *chip = desc->irq_data.chip;
- raw_spin_lock(&desc->lock);
-
- if (!irq_may_run(desc))
- goto out;
+ guard(raw_spinlock)(&desc->lock);
- desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
+ if (!irq_can_handle_pm(desc)) {
+ cond_eoi_irq(chip, &desc->irq_data);
+ return;
+ }
- /*
- * If its disabled or no action available
- * then mask it and get out of here:
- */
- if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
- desc->istate |= IRQS_PENDING;
+ if (unlikely(!irq_can_handle_actions(desc))) {
mask_irq(desc);
- goto out;
+ cond_eoi_irq(chip, &desc->irq_data);
+ return;
}
kstat_incr_irqs_this_cpu(desc);
if (desc->istate & IRQS_ONESHOT)
mask_irq(desc);
- /* Start handling the irq */
desc->irq_data.chip->irq_ack(&desc->irq_data);
handle_irq_event(desc);
cond_unmask_eoi_irq(desc, chip);
-
- raw_spin_unlock(&desc->lock);
- return;
-out:
- if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
- chip->irq_eoi(&desc->irq_data);
- raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_fasteoi_ack_irq);
/**
- * handle_fasteoi_mask_irq - irq handler for level hierarchy
- * stacked on transparent controllers
+ * handle_fasteoi_mask_irq - irq handler for level hierarchy stacked on
+ * transparent controllers
*
- * @desc: the interrupt description structure for this irq
+ * @desc: the interrupt description structure for this irq
*
- * Like handle_fasteoi_irq(), but for use with hierarchy where
- * the irq_chip also needs to have its ->irq_mask_ack() function
- * called.
+ * Like handle_fasteoi_irq(), but for use with hierarchy where the irq_chip
+ * also needs to have its ->irq_mask_ack() function called.
*/
void handle_fasteoi_mask_irq(struct irq_desc *desc)
{
struct irq_chip *chip = desc->irq_data.chip;
- raw_spin_lock(&desc->lock);
+ guard(raw_spinlock)(&desc->lock);
mask_ack_irq(desc);
- if (!irq_may_run(desc))
- goto out;
-
- desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
-
- /*
- * If its disabled or no action available
- * then mask it and get out of here:
- */
- if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
- desc->istate |= IRQS_PENDING;
- mask_irq(desc);
- goto out;
+ if (!irq_can_handle(desc)) {
+ cond_eoi_irq(chip, &desc->irq_data);
+ return;
}
kstat_incr_irqs_this_cpu(desc);
- if (desc->istate & IRQS_ONESHOT)
- mask_irq(desc);
handle_irq_event(desc);
cond_unmask_eoi_irq(desc, chip);
-
- raw_spin_unlock(&desc->lock);
- return;
-out:
- if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
- chip->irq_eoi(&desc->irq_data);
- raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq);
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 15a7654eff68..f07529ae4895 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -177,9 +177,8 @@ void irq_migrate_all_off_this_cpu(void)
bool affinity_broken;
desc = irq_to_desc(irq);
- raw_spin_lock(&desc->lock);
- affinity_broken = migrate_one_irq(desc);
- raw_spin_unlock(&desc->lock);
+ scoped_guard(raw_spinlock, &desc->lock)
+ affinity_broken = migrate_one_irq(desc);
if (affinity_broken) {
pr_debug_ratelimited("IRQ %u: no longer affine to CPU%u\n",
@@ -219,7 +218,7 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
return;
if (irqd_is_managed_and_shutdown(data))
- irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+ irq_startup_managed(desc);
/*
* If the interrupt can only be directed to a single target
@@ -244,9 +243,8 @@ int irq_affinity_online_cpu(unsigned int cpu)
irq_lock_sparse();
for_each_active_irq(irq) {
desc = irq_to_desc(irq);
- raw_spin_lock_irq(&desc->lock);
- irq_restore_affinity_of_irq(desc, cpu);
- raw_spin_unlock_irq(&desc->lock);
+ scoped_guard(raw_spinlock_irq, &desc->lock)
+ irq_restore_affinity_of_irq(desc, cpu);
}
irq_unlock_sparse();
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index ca142b9a4db3..3527defd2890 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -160,7 +160,7 @@ static int irq_debug_show(struct seq_file *m, void *p)
struct irq_desc *desc = m->private;
struct irq_data *data;
- raw_spin_lock_irq(&desc->lock);
+ guard(raw_spinlock_irq)(&desc->lock);
data = irq_desc_get_irq_data(desc);
seq_printf(m, "handler: %ps\n", desc->handle_irq);
seq_printf(m, "device: %s\n", desc->dev_name);
@@ -178,7 +178,6 @@ static int irq_debug_show(struct seq_file *m, void *p)
seq_printf(m, "node: %d\n", irq_data_get_node(data));
irq_debug_show_masks(m, desc);
irq_debug_show_data(m, data, 0);
- raw_spin_unlock_irq(&desc->lock);
return 0;
}
@@ -226,12 +225,12 @@ void irq_debugfs_copy_devname(int irq, struct device *dev)
void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc)
{
- char name [10];
+ char name [12];
if (!irq_dir || !desc || desc->debugfs_file)
return;
- sprintf(name, "%d", irq);
+ sprintf(name, "%u", irq);
desc->debugfs_file = debugfs_create_file(name, 0644, irq_dir, desc,
&dfs_irq_ops);
}
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index c4a8bca5f2b0..bf59e37d650a 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -40,10 +40,9 @@ void irq_gc_mask_disable_reg(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, mask, ct->regs.disable);
*ct->mask_cache &= ~mask;
- irq_gc_unlock(gc);
}
EXPORT_SYMBOL_GPL(irq_gc_mask_disable_reg);
@@ -60,10 +59,9 @@ void irq_gc_mask_set_bit(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
*ct->mask_cache |= mask;
irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask);
- irq_gc_unlock(gc);
}
EXPORT_SYMBOL_GPL(irq_gc_mask_set_bit);
@@ -80,10 +78,9 @@ void irq_gc_mask_clr_bit(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
*ct->mask_cache &= ~mask;
irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask);
- irq_gc_unlock(gc);
}
EXPORT_SYMBOL_GPL(irq_gc_mask_clr_bit);
@@ -100,10 +97,9 @@ void irq_gc_unmask_enable_reg(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, mask, ct->regs.enable);
*ct->mask_cache |= mask;
- irq_gc_unlock(gc);
}
EXPORT_SYMBOL_GPL(irq_gc_unmask_enable_reg);
@@ -117,9 +113,8 @@ void irq_gc_ack_set_bit(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, mask, ct->regs.ack);
- irq_gc_unlock(gc);
}
EXPORT_SYMBOL_GPL(irq_gc_ack_set_bit);
@@ -133,9 +128,8 @@ void irq_gc_ack_clr_bit(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = ~d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, mask, ct->regs.ack);
- irq_gc_unlock(gc);
}
/**
@@ -156,11 +150,10 @@ void irq_gc_mask_disable_and_ack_set(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, mask, ct->regs.disable);
*ct->mask_cache &= ~mask;
irq_reg_writel(gc, mask, ct->regs.ack);
- irq_gc_unlock(gc);
}
EXPORT_SYMBOL_GPL(irq_gc_mask_disable_and_ack_set);
@@ -174,9 +167,8 @@ void irq_gc_eoi(struct irq_data *d)
struct irq_chip_type *ct = irq_data_get_chip_type(d);
u32 mask = d->mask;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
irq_reg_writel(gc, mask, ct->regs.eoi);
- irq_gc_unlock(gc);
}
/**
@@ -196,12 +188,11 @@ int irq_gc_set_wake(struct irq_data *d, unsigned int on)
if (!(mask & gc->wake_enabled))
return -EINVAL;
- irq_gc_lock(gc);
+ guard(raw_spinlock)(&gc->lock);
if (on)
gc->wake_active |= mask;
else
gc->wake_active &= ~mask;
- irq_gc_unlock(gc);
return 0;
}
EXPORT_SYMBOL_GPL(irq_gc_set_wake);
@@ -288,7 +279,6 @@ int irq_domain_alloc_generic_chips(struct irq_domain *d,
{
struct irq_domain_chip_generic *dgc;
struct irq_chip_generic *gc;
- unsigned long flags;
int numchips, i;
size_t dgc_sz;
size_t gc_sz;
@@ -340,9 +330,8 @@ int irq_domain_alloc_generic_chips(struct irq_domain *d,
goto err;
}
- raw_spin_lock_irqsave(&gc_lock, flags);
- list_add_tail(&gc->list, &gc_list);
- raw_spin_unlock_irqrestore(&gc_lock, flags);
+ scoped_guard (raw_spinlock_irqsave, &gc_lock)
+ list_add_tail(&gc->list, &gc_list);
/* Calc pointer to the next generic chip */
tmp += gc_sz;
}
@@ -459,7 +448,6 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
struct irq_chip_generic *gc;
struct irq_chip_type *ct;
struct irq_chip *chip;
- unsigned long flags;
int idx;
gc = __irq_get_domain_generic_chip(d, hw_irq);
@@ -479,9 +467,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
/* We only init the cache for the first mapping of a generic chip */
if (!gc->installed) {
- raw_spin_lock_irqsave(&gc->lock, flags);
+ guard(raw_spinlock_irqsave)(&gc->lock);
irq_gc_init_mask_cache(gc, dgc->gc_flags);
- raw_spin_unlock_irqrestore(&gc->lock, flags);
}
/* Mark the interrupt as installed */
@@ -548,9 +535,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
struct irq_chip *chip = &ct->chip;
unsigned int i;
- raw_spin_lock(&gc_lock);
- list_add_tail(&gc->list, &gc_list);
- raw_spin_unlock(&gc_lock);
+ scoped_guard (raw_spinlock, &gc_lock)
+ list_add_tail(&gc->list, &gc_list);
irq_gc_init_mask_cache(gc, flags);
@@ -616,9 +602,8 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
{
unsigned int i, virq;
- raw_spin_lock(&gc_lock);
- list_del(&gc->list);
- raw_spin_unlock(&gc_lock);
+ scoped_guard (raw_spinlock, &gc_lock)
+ list_del(&gc->list);
for (i = 0; msk; msk >>= 1, i++) {
if (!(msk & 0x01))
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index b0290849c395..aebfe225c9a6 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -87,6 +87,7 @@ extern void __enable_irq(struct irq_desc *desc);
extern int irq_activate(struct irq_desc *desc);
extern int irq_activate_and_startup(struct irq_desc *desc, bool resend);
extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
+extern void irq_startup_managed(struct irq_desc *desc);
extern void irq_shutdown(struct irq_desc *desc);
extern void irq_shutdown_and_deactivate(struct irq_desc *desc);
@@ -141,6 +142,10 @@ extern int irq_setup_affinity(struct irq_desc *desc);
static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; }
#endif
+
+#define for_each_action_of_desc(desc, act) \
+ for (act = desc->action; act; act = act->next)
+
/* Inline functions for support of irq chips on slow busses */
static inline void chip_bus_lock(struct irq_desc *desc)
{
@@ -160,38 +165,33 @@ static inline void chip_bus_sync_unlock(struct irq_desc *desc)
#define IRQ_GET_DESC_CHECK_GLOBAL (_IRQ_DESC_CHECK)
#define IRQ_GET_DESC_CHECK_PERCPU (_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU)
-#define for_each_action_of_desc(desc, act) \
- for (act = desc->action; act; act = act->next)
-
-struct irq_desc *
-__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
- unsigned int check);
+struct irq_desc *__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
+ unsigned int check);
void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus);
-static inline struct irq_desc *
-irq_get_desc_buslock(unsigned int irq, unsigned long *flags, unsigned int check)
-{
- return __irq_get_desc_lock(irq, flags, true, check);
-}
+__DEFINE_CLASS_IS_CONDITIONAL(irqdesc_lock, true);
+__DEFINE_UNLOCK_GUARD(irqdesc_lock, struct irq_desc,
+ __irq_put_desc_unlock(_T->lock, _T->flags, _T->bus),
+ unsigned long flags; bool bus);
-static inline void
-irq_put_desc_busunlock(struct irq_desc *desc, unsigned long flags)
+static inline class_irqdesc_lock_t class_irqdesc_lock_constructor(unsigned int irq, bool bus,
+ unsigned int check)
{
- __irq_put_desc_unlock(desc, flags, true);
-}
+ class_irqdesc_lock_t _t = { .bus = bus, };
-static inline struct irq_desc *
-irq_get_desc_lock(unsigned int irq, unsigned long *flags, unsigned int check)
-{
- return __irq_get_desc_lock(irq, flags, false, check);
-}
+ _t.lock = __irq_get_desc_lock(irq, &_t.flags, bus, check);
-static inline void
-irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
-{
- __irq_put_desc_unlock(desc, flags, false);
+ return _t;
}
+#define scoped_irqdesc_get_and_lock(_irq, _check) \
+ scoped_guard(irqdesc_lock, _irq, false, _check)
+
+#define scoped_irqdesc_get_and_buslock(_irq, _check) \
+ scoped_guard(irqdesc_lock, _irq, true, _check)
+
+#define scoped_irqdesc ((struct irq_desc *)(__guard_ptr(irqdesc_lock)(&scope)))
+
#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
static inline unsigned int irqd_get(struct irq_data *d)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 4258cd6bd3b4..b64c57b44c20 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -246,8 +246,7 @@ static struct kobject *irq_kobj_base;
#define IRQ_ATTR_RO(_name) \
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
-static ssize_t per_cpu_count_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+static ssize_t per_cpu_count_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
ssize_t ret = 0;
@@ -257,112 +256,83 @@ static ssize_t per_cpu_count_show(struct kobject *kobj,
for_each_possible_cpu(cpu) {
unsigned int c = irq_desc_kstat_cpu(desc, cpu);
- ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c);
+ ret += sysfs_emit_at(buf, ret, "%s%u", p, c);
p = ",";
}
- ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+ ret += sysfs_emit_at(buf, ret, "\n");
return ret;
}
IRQ_ATTR_RO(per_cpu_count);
-static ssize_t chip_name_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+static ssize_t chip_name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
- ssize_t ret = 0;
-
- raw_spin_lock_irq(&desc->lock);
- if (desc->irq_data.chip && desc->irq_data.chip->name) {
- ret = scnprintf(buf, PAGE_SIZE, "%s\n",
- desc->irq_data.chip->name);
- }
- raw_spin_unlock_irq(&desc->lock);
- return ret;
+ guard(raw_spinlock_irq)(&desc->lock);
+ if (desc->irq_data.chip && desc->irq_data.chip->name)
+ return sysfs_emit(buf, "%s\n", desc->irq_data.chip->name);
+ return 0;
}
IRQ_ATTR_RO(chip_name);
-static ssize_t hwirq_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+static ssize_t hwirq_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
- ssize_t ret = 0;
- raw_spin_lock_irq(&desc->lock);
+ guard(raw_spinlock_irq)(&desc->lock);
if (desc->irq_data.domain)
- ret = sprintf(buf, "%lu\n", desc->irq_data.hwirq);
- raw_spin_unlock_irq(&desc->lock);
-
- return ret;
+ return sysfs_emit(buf, "%lu\n", desc->irq_data.hwirq);
+ return 0;
}
IRQ_ATTR_RO(hwirq);
-static ssize_t type_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
- ssize_t ret = 0;
- raw_spin_lock_irq(&desc->lock);
- ret = sprintf(buf, "%s\n",
- irqd_is_level_type(&desc->irq_data) ? "level" : "edge");
- raw_spin_unlock_irq(&desc->lock);
-
- return ret;
+ guard(raw_spinlock_irq)(&desc->lock);
+ return sysfs_emit(buf, "%s\n", irqd_is_level_type(&desc->irq_data) ? "level" : "edge");
}
IRQ_ATTR_RO(type);
-static ssize_t wakeup_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+static ssize_t wakeup_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
- ssize_t ret = 0;
-
- raw_spin_lock_irq(&desc->lock);
- ret = sprintf(buf, "%s\n", str_enabled_disabled(irqd_is_wakeup_set(&desc->irq_data)));
- raw_spin_unlock_irq(&desc->lock);
-
- return ret;
+ guard(raw_spinlock_irq)(&desc->lock);
+ return sysfs_emit(buf, "%s\n", str_enabled_disabled(irqd_is_wakeup_set(&desc->irq_data)));
}
IRQ_ATTR_RO(wakeup);
-static ssize_t name_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+static ssize_t name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
- ssize_t ret = 0;
- raw_spin_lock_irq(&desc->lock);
+ guard(raw_spinlock_irq)(&desc->lock);
if (desc->name)
- ret = scnprintf(buf, PAGE_SIZE, "%s\n", desc->name);
- raw_spin_unlock_irq(&desc->lock);
-
- return ret;
+ return sysfs_emit(buf, "%s\n", desc->name);
+ return 0;
}
IRQ_ATTR_RO(name);
-static ssize_t actions_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+static ssize_t actions_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
struct irqaction *action;
ssize_t ret = 0;
char *p = "";
- raw_spin_lock_irq(&desc->lock);
- for_each_action_of_desc(desc, action) {
- ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
- p, action->name);
- p = ",";
+ scoped_guard(raw_spinlock_irq, &desc->lock) {
+ for_each_action_of_desc(desc, action) {
+ ret += sysfs_emit_at(buf, ret, "%s%s", p, action->name);
+ p = ",";
+ }
}
- raw_spin_unlock_irq(&desc->lock);
if (ret)
- ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
-
+ ret += sysfs_emit_at(buf, ret, "\n");
return ret;
}
IRQ_ATTR_RO(actions);
@@ -418,19 +388,14 @@ static int __init irq_sysfs_init(void)
int irq;
/* Prevent concurrent irq alloc/free */
- irq_lock_sparse();
-
+ guard(mutex)(&sparse_irq_lock);
irq_kobj_base = kobject_create_and_add("irq", kernel_kobj);
- if (!irq_kobj_base) {
- irq_unlock_sparse();
+ if (!irq_kobj_base)
return -ENOMEM;
- }
/* Add the already allocated interrupts */
for_each_irq_desc(irq, desc)
irq_sysfs_add(irq, desc);
- irq_unlock_sparse();
-
return 0;
}
postcore_initcall(irq_sysfs_init);
@@ -573,12 +538,12 @@ err:
return -ENOMEM;
}
-static int irq_expand_nr_irqs(unsigned int nr)
+static bool irq_expand_nr_irqs(unsigned int nr)
{
if (nr > MAX_SPARSE_IRQS)
- return -ENOMEM;
+ return false;
nr_irqs = nr;
- return 0;
+ return true;
}
int __init early_irq_init(void)
@@ -656,11 +621,9 @@ EXPORT_SYMBOL(irq_to_desc);
static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
- unsigned long flags;
- raw_spin_lock_irqsave(&desc->lock, flags);
- desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ scoped_guard(raw_spinlock_irqsave, &desc->lock)
+ desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL);
delete_irq_desc(irq);
}
@@ -679,16 +642,15 @@ static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
return start;
}
-static int irq_expand_nr_irqs(unsigned int nr)
+static inline bool irq_expand_nr_irqs(unsigned int nr)
{
- return -ENOMEM;
+ return false;
}
void irq_mark_irq(unsigned int irq)
{
- mutex_lock(&sparse_irq_lock);
+ guard(mutex)(&sparse_irq_lock);
irq_insert_desc(irq, irq_desc + irq);
- mutex_unlock(&sparse_irq_lock);
}
#ifdef CONFIG_GENERIC_IRQ_LEGACY
@@ -827,11 +789,9 @@ void irq_free_descs(unsigned int from, unsigned int cnt)
if (from >= nr_irqs || (from + cnt) > nr_irqs)
return;
- mutex_lock(&sparse_irq_lock);
+ guard(mutex)(&sparse_irq_lock);
for (i = 0; i < cnt; i++)
free_desc(from + i);
-
- mutex_unlock(&sparse_irq_lock);
}
EXPORT_SYMBOL_GPL(irq_free_descs);
@@ -848,11 +808,10 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
*
* Returns the first irq number or error code
*/
-int __ref
-__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
- struct module *owner, const struct irq_affinity_desc *affinity)
+int __ref __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+ struct module *owner, const struct irq_affinity_desc *affinity)
{
- int start, ret;
+ int start;
if (!cnt)
return -EINVAL;
@@ -870,22 +829,17 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
from = arch_dynirq_lower_bound(from);
}
- mutex_lock(&sparse_irq_lock);
+ guard(mutex)(&sparse_irq_lock);
start = irq_find_free_area(from, cnt);
- ret = -EEXIST;
if (irq >=0 && start != irq)
- goto unlock;
+ return -EEXIST;
if (start + cnt > nr_irqs) {
- ret = irq_expand_nr_irqs(start + cnt);
- if (ret)
- goto unlock;
+ if (!irq_expand_nr_irqs(start + cnt))
+ return -ENOMEM;
}
- ret = alloc_descs(start, cnt, node, affinity, owner);
-unlock:
- mutex_unlock(&sparse_irq_lock);
- return ret;
+ return alloc_descs(start, cnt, node, affinity, owner);
}
EXPORT_SYMBOL_GPL(__irq_alloc_descs);
@@ -900,27 +854,27 @@ unsigned int irq_get_next_irq(unsigned int offset)
return irq_find_at_or_after(offset);
}
-struct irq_desc *
-__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
- unsigned int check)
+struct irq_desc *__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
+ unsigned int check)
{
- struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_desc *desc;
- if (desc) {
- if (check & _IRQ_DESC_CHECK) {
- if ((check & _IRQ_DESC_PERCPU) &&
- !irq_settings_is_per_cpu_devid(desc))
- return NULL;
-
- if (!(check & _IRQ_DESC_PERCPU) &&
- irq_settings_is_per_cpu_devid(desc))
- return NULL;
- }
+ desc = irq_to_desc(irq);
+ if (!desc)
+ return NULL;
+
+ if (check & _IRQ_DESC_CHECK) {
+ if ((check & _IRQ_DESC_PERCPU) && !irq_settings_is_per_cpu_devid(desc))
+ return NULL;
- if (bus)
- chip_bus_lock(desc);
- raw_spin_lock_irqsave(&desc->lock, *flags);
+ if (!(check & _IRQ_DESC_PERCPU) && irq_settings_is_per_cpu_devid(desc))
+ return NULL;
}
+
+ if (bus)
+ chip_bus_lock(desc);
+ raw_spin_lock_irqsave(&desc->lock, *flags);
+
return desc;
}
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9d5c8651492d..c8b6de09047b 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -480,33 +480,6 @@ struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
}
EXPORT_SYMBOL_GPL(irq_domain_create_simple);
-/**
- * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain.
- * @of_node: pointer to interrupt controller's device tree node.
- * @size: total number of irqs in legacy mapping
- * @first_irq: first number of irq block assigned to the domain
- * @first_hwirq: first hwirq number to use for the translation. Should normally
- * be '0', but a positive integer can be used if the effective
- * hwirqs numbering does not begin at zero.
- * @ops: map/unmap domain callbacks
- * @host_data: Controller private data pointer
- *
- * Note: the map() callback will be called before this function returns
- * for all legacy interrupts except 0 (which is always the invalid irq for
- * a legacy controller).
- */
-struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
- unsigned int size,
- unsigned int first_irq,
- irq_hw_number_t first_hwirq,
- const struct irq_domain_ops *ops,
- void *host_data)
-{
- return irq_domain_create_legacy(of_node_to_fwnode(of_node), size,
- first_irq, first_hwirq, ops, host_data);
-}
-EXPORT_SYMBOL_GPL(irq_domain_add_legacy);
-
struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
unsigned int size,
unsigned int first_irq,
@@ -885,7 +858,7 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
{
int i;
- fwspec->fwnode = of_node_to_fwnode(np);
+ fwspec->fwnode = of_fwnode_handle(np);
fwspec->param_count = count;
for (i = 0; i < count; i++)
@@ -1133,6 +1106,31 @@ int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr,
EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell);
/**
+ * irq_domain_xlate_twothreecell() - Generic xlate for direct two or three cell bindings
+ * @d: Interrupt domain involved in the translation
+ * @ctrlr: The device tree node for the device whose interrupt is translated
+ * @intspec: The interrupt specifier data from the device tree
+ * @intsize: The number of entries in @intspec
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
+ *
+ * Device Tree interrupt specifier translation function for two or three
+ * cell bindings, where the cell values map directly to the hardware
+ * interrupt number and the type specifier.
+ */
+int irq_domain_xlate_twothreecell(struct irq_domain *d, struct device_node *ctrlr,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+ struct irq_fwspec fwspec;
+
+ of_phandle_args_to_fwspec(ctrlr, intspec, intsize, &fwspec);
+
+ return irq_domain_translate_twothreecell(d, &fwspec, out_hwirq, out_type);
+}
+EXPORT_SYMBOL_GPL(irq_domain_xlate_twothreecell);
+
+/**
* irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings
* @d: Interrupt domain involved in the translation
* @ctrlr: The device tree node for the device whose interrupt is translated
@@ -1216,6 +1214,37 @@ int irq_domain_translate_twocell(struct irq_domain *d,
}
EXPORT_SYMBOL_GPL(irq_domain_translate_twocell);
+/**
+ * irq_domain_translate_twothreecell() - Generic translate for direct two or three cell
+ * bindings
+ * @d: Interrupt domain involved in the translation
+ * @fwspec: The firmware interrupt specifier to translate
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
+ *
+ * Firmware interrupt specifier translation function for two or three cell
+ * specifications, where the parameter values map directly to the hardware
+ * interrupt number and the type specifier.
+ */
+int irq_domain_translate_twothreecell(struct irq_domain *d, struct irq_fwspec *fwspec,
+ unsigned long *out_hwirq, unsigned int *out_type)
+{
+ if (fwspec->param_count == 2) {
+ *out_hwirq = fwspec->param[0];
+ *out_type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
+ return 0;
+ }
+
+ if (fwspec->param_count == 3) {
+ *out_hwirq = fwspec->param[1];
+ *out_type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(irq_domain_translate_twothreecell);
+
int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
int node, const struct irq_affinity_desc *affinity)
{
@@ -1252,47 +1281,6 @@ void irq_domain_reset_irq_data(struct irq_data *irq_data)
EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data);
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
-/**
- * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy
- * @parent: Parent irq domain to associate with the new domain
- * @flags: Irq domain flags associated to the domain
- * @size: Size of the domain. See below
- * @fwnode: Optional fwnode of the interrupt controller
- * @ops: Pointer to the interrupt domain callbacks
- * @host_data: Controller private data pointer
- *
- * If @size is 0 a tree domain is created, otherwise a linear domain.
- *
- * If successful the parent is associated to the new domain and the
- * domain flags are set.
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
- unsigned int flags,
- unsigned int size,
- struct fwnode_handle *fwnode,
- const struct irq_domain_ops *ops,
- void *host_data)
-{
- struct irq_domain_info info = {
- .fwnode = fwnode,
- .size = size,
- .hwirq_max = size,
- .ops = ops,
- .host_data = host_data,
- .domain_flags = flags,
- .parent = parent,
- };
- struct irq_domain *d;
-
- if (!info.size)
- info.hwirq_max = ~0U;
-
- d = irq_domain_instantiate(&info);
- return IS_ERR(d) ? NULL : d;
-}
-EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy);
-
static void irq_domain_insert_irq(int virq)
{
struct irq_data *data;
@@ -2008,7 +1996,7 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain)
domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY;
}
#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */
-/**
+/*
* irq_domain_get_irq_data - Get irq_data associated with @virq and @domain
* @domain: domain to match
* @virq: IRQ number to get irq_data
@@ -2022,7 +2010,7 @@ struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
}
EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
-/**
+/*
* irq_domain_set_info - Set the complete data for a @virq in @domain
* @domain: Interrupt domain to match
* @virq: IRQ number
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 753eef8e041c..c94837382037 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -43,8 +43,6 @@ static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
bool inprogress;
do {
- unsigned long flags;
-
/*
* Wait until we're out of the critical section. This might
* give the wrong answer due to the lack of memory barriers.
@@ -53,7 +51,7 @@ static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
cpu_relax();
/* Ok, that indicated we're done: double-check carefully. */
- raw_spin_lock_irqsave(&desc->lock, flags);
+ guard(raw_spinlock_irqsave)(&desc->lock);
inprogress = irqd_irq_inprogress(&desc->irq_data);
/*
@@ -69,33 +67,30 @@ static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
__irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE,
&inprogress);
}
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
/* Oops, that failed? */
} while (inprogress);
}
/**
- * synchronize_hardirq - wait for pending hard IRQ handlers (on other CPUs)
- * @irq: interrupt number to wait for
+ * synchronize_hardirq - wait for pending hard IRQ handlers (on other CPUs)
+ * @irq: interrupt number to wait for
*
- * This function waits for any pending hard IRQ handlers for this
- * interrupt to complete before returning. If you use this
- * function while holding a resource the IRQ handler may need you
- * will deadlock. It does not take associated threaded handlers
- * into account.
+ * This function waits for any pending hard IRQ handlers for this interrupt
+ * to complete before returning. If you use this function while holding a
+ * resource the IRQ handler may need you will deadlock. It does not take
+ * associated threaded handlers into account.
*
- * Do not use this for shutdown scenarios where you must be sure
- * that all parts (hardirq and threaded handler) have completed.
+ * Do not use this for shutdown scenarios where you must be sure that all
+ * parts (hardirq and threaded handler) have completed.
*
- * Returns: false if a threaded handler is active.
+ * Returns: false if a threaded handler is active.
*
- * This function may be called - with care - from IRQ context.
+ * This function may be called - with care - from IRQ context.
*
- * It does not check whether there is an interrupt in flight at the
- * hardware level, but not serviced yet, as this might deadlock when
- * called with interrupts disabled and the target CPU of the interrupt
- * is the current CPU.
+ * It does not check whether there is an interrupt in flight at the
+ * hardware level, but not serviced yet, as this might deadlock when called
+ * with interrupts disabled and the target CPU of the interrupt is the
+ * current CPU.
*/
bool synchronize_hardirq(unsigned int irq)
{
@@ -121,19 +116,19 @@ static void __synchronize_irq(struct irq_desc *desc)
}
/**
- * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
- * @irq: interrupt number to wait for
+ * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
+ * @irq: interrupt number to wait for
*
- * This function waits for any pending IRQ handlers for this interrupt
- * to complete before returning. If you use this function while
- * holding a resource the IRQ handler may need you will deadlock.
+ * This function waits for any pending IRQ handlers for this interrupt to
+ * complete before returning. If you use this function while holding a
+ * resource the IRQ handler may need you will deadlock.
*
- * Can only be called from preemptible code as it might sleep when
- * an interrupt thread is associated to @irq.
+ * Can only be called from preemptible code as it might sleep when
+ * an interrupt thread is associated to @irq.
*
- * It optionally makes sure (when the irq chip supports that method)
- * that the interrupt is not pending in any CPU and waiting for
- * service.
+ * It optionally makes sure (when the irq chip supports that method)
+ * that the interrupt is not pending in any CPU and waiting for
+ * service.
*/
void synchronize_irq(unsigned int irq)
{
@@ -156,8 +151,8 @@ static bool __irq_can_set_affinity(struct irq_desc *desc)
}
/**
- * irq_can_set_affinity - Check if the affinity of a given irq can be set
- * @irq: Interrupt to check
+ * irq_can_set_affinity - Check if the affinity of a given irq can be set
+ * @irq: Interrupt to check
*
*/
int irq_can_set_affinity(unsigned int irq)
@@ -181,13 +176,13 @@ bool irq_can_set_affinity_usr(unsigned int irq)
}
/**
- * irq_set_thread_affinity - Notify irq threads to adjust affinity
- * @desc: irq descriptor which has affinity changed
+ * irq_set_thread_affinity - Notify irq threads to adjust affinity
+ * @desc: irq descriptor which has affinity changed
*
- * We just set IRQTF_AFFINITY and delegate the affinity setting
- * to the interrupt thread itself. We can not call
- * set_cpus_allowed_ptr() here as we hold desc->lock and this
- * code can be called from hard interrupt context.
+ * Just set IRQTF_AFFINITY and delegate the affinity setting to the
+ * interrupt thread itself. We can not call set_cpus_allowed_ptr() here as
+ * we hold desc->lock and this code can be called from hard interrupt
+ * context.
*/
static void irq_set_thread_affinity(struct irq_desc *desc)
{
@@ -400,14 +395,8 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
* an interrupt which is already started or which has already been configured
* as managed will also fail, as these mean invalid init state or double init.
*/
-int irq_update_affinity_desc(unsigned int irq,
- struct irq_affinity_desc *affinity)
+int irq_update_affinity_desc(unsigned int irq, struct irq_affinity_desc *affinity)
{
- struct irq_desc *desc;
- unsigned long flags;
- bool activated;
- int ret = 0;
-
/*
* Supporting this with the reservation scheme used by x86 needs
* some more thought. Fail it for now.
@@ -415,60 +404,50 @@ int irq_update_affinity_desc(unsigned int irq,
if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
return -EOPNOTSUPP;
- desc = irq_get_desc_buslock(irq, &flags, 0);
- if (!desc)
- return -EINVAL;
-
- /* Requires the interrupt to be shut down */
- if (irqd_is_started(&desc->irq_data)) {
- ret = -EBUSY;
- goto out_unlock;
- }
+ scoped_irqdesc_get_and_buslock(irq, 0) {
+ struct irq_desc *desc = scoped_irqdesc;
+ bool activated;
- /* Interrupts which are already managed cannot be modified */
- if (irqd_affinity_is_managed(&desc->irq_data)) {
- ret = -EBUSY;
- goto out_unlock;
- }
-
- /*
- * Deactivate the interrupt. That's required to undo
- * anything an earlier activation has established.
- */
- activated = irqd_is_activated(&desc->irq_data);
- if (activated)
- irq_domain_deactivate_irq(&desc->irq_data);
+ /* Requires the interrupt to be shut down */
+ if (irqd_is_started(&desc->irq_data))
+ return -EBUSY;
- if (affinity->is_managed) {
- irqd_set(&desc->irq_data, IRQD_AFFINITY_MANAGED);
- irqd_set(&desc->irq_data, IRQD_MANAGED_SHUTDOWN);
- }
+ /* Interrupts which are already managed cannot be modified */
+ if (irqd_affinity_is_managed(&desc->irq_data))
+ return -EBUSY;
+ /*
+ * Deactivate the interrupt. That's required to undo
+ * anything an earlier activation has established.
+ */
+ activated = irqd_is_activated(&desc->irq_data);
+ if (activated)
+ irq_domain_deactivate_irq(&desc->irq_data);
- cpumask_copy(desc->irq_common_data.affinity, &affinity->mask);
+ if (affinity->is_managed) {
+ irqd_set(&desc->irq_data, IRQD_AFFINITY_MANAGED);
+ irqd_set(&desc->irq_data, IRQD_MANAGED_SHUTDOWN);
+ }
- /* Restore the activation state */
- if (activated)
- irq_domain_activate_irq(&desc->irq_data, false);
+ cpumask_copy(desc->irq_common_data.affinity, &affinity->mask);
-out_unlock:
- irq_put_desc_busunlock(desc, flags);
- return ret;
+ /* Restore the activation state */
+ if (activated)
+ irq_domain_activate_irq(&desc->irq_data, false);
+ return 0;
+ }
+ return -EINVAL;
}
static int __irq_set_affinity(unsigned int irq, const struct cpumask *mask,
bool force)
{
struct irq_desc *desc = irq_to_desc(irq);
- unsigned long flags;
- int ret;
if (!desc)
return -EINVAL;
- raw_spin_lock_irqsave(&desc->lock, flags);
- ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- return ret;
+ guard(raw_spinlock_irqsave)(&desc->lock);
+ return irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force);
}
/**
@@ -501,39 +480,36 @@ int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask)
}
EXPORT_SYMBOL_GPL(irq_force_affinity);
-int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m,
- bool setaffinity)
+int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, bool setaffinity)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
+ int ret = -EINVAL;
- if (!desc)
- return -EINVAL;
- desc->affinity_hint = m;
- irq_put_desc_unlock(desc, flags);
- if (m && setaffinity)
+ scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
+ scoped_irqdesc->affinity_hint = m;
+ ret = 0;
+ }
+
+ if (!ret && m && setaffinity)
__irq_set_affinity(irq, m, false);
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint);
static void irq_affinity_notify(struct work_struct *work)
{
- struct irq_affinity_notify *notify =
- container_of(work, struct irq_affinity_notify, work);
+ struct irq_affinity_notify *notify = container_of(work, struct irq_affinity_notify, work);
struct irq_desc *desc = irq_to_desc(notify->irq);
cpumask_var_t cpumask;
- unsigned long flags;
if (!desc || !alloc_cpumask_var(&cpumask, GFP_KERNEL))
goto out;
- raw_spin_lock_irqsave(&desc->lock, flags);
- if (irq_move_pending(&desc->irq_data))
- irq_get_pending(cpumask, desc);
- else
- cpumask_copy(cpumask, desc->irq_common_data.affinity);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ scoped_guard(raw_spinlock_irqsave, &desc->lock) {
+ if (irq_move_pending(&desc->irq_data))
+ irq_get_pending(cpumask, desc);
+ else
+ cpumask_copy(cpumask, desc->irq_common_data.affinity);
+ }
notify->notify(notify, cpumask);
@@ -543,22 +519,20 @@ out:
}
/**
- * irq_set_affinity_notifier - control notification of IRQ affinity changes
- * @irq: Interrupt for which to enable/disable notification
- * @notify: Context for notification, or %NULL to disable
- * notification. Function pointers must be initialised;
- * the other fields will be initialised by this function.
- *
- * Must be called in process context. Notification may only be enabled
- * after the IRQ is allocated and must be disabled before the IRQ is
- * freed using free_irq().
+ * irq_set_affinity_notifier - control notification of IRQ affinity changes
+ * @irq: Interrupt for which to enable/disable notification
+ * @notify: Context for notification, or %NULL to disable
+ * notification. Function pointers must be initialised;
+ * the other fields will be initialised by this function.
+ *
+ * Must be called in process context. Notification may only be enabled
+ * after the IRQ is allocated and must be disabled before the IRQ is freed
+ * using free_irq().
*/
-int
-irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
+int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_affinity_notify *old_notify;
- unsigned long flags;
/* The release function is promised process context */
might_sleep();
@@ -573,10 +547,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
INIT_WORK(&notify->work, irq_affinity_notify);
}
- raw_spin_lock_irqsave(&desc->lock, flags);
- old_notify = desc->affinity_notify;
- desc->affinity_notify = notify;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ scoped_guard(raw_spinlock_irqsave, &desc->lock) {
+ old_notify = desc->affinity_notify;
+ desc->affinity_notify = notify;
+ }
if (old_notify) {
if (cancel_work_sync(&old_notify->work)) {
@@ -597,7 +571,8 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
int irq_setup_affinity(struct irq_desc *desc)
{
struct cpumask *set = irq_default_affinity;
- int ret, node = irq_desc_get_node(desc);
+ int node = irq_desc_get_node(desc);
+
static DEFINE_RAW_SPINLOCK(mask_lock);
static struct cpumask mask;
@@ -605,7 +580,7 @@ int irq_setup_affinity(struct irq_desc *desc)
if (!__irq_can_set_affinity(desc))
return 0;
- raw_spin_lock(&mask_lock);
+ guard(raw_spinlock)(&mask_lock);
/*
* Preserve the managed affinity setting and a userspace affinity
* setup, but make sure that one of the targets is online.
@@ -630,9 +605,7 @@ int irq_setup_affinity(struct irq_desc *desc)
if (cpumask_intersects(&mask, nodemask))
cpumask_and(&mask, &mask, nodemask);
}
- ret = irq_do_set_affinity(&desc->irq_data, &mask, false);
- raw_spin_unlock(&mask_lock);
- return ret;
+ return irq_do_set_affinity(&desc->irq_data, &mask, false);
}
#else
/* Wrapper for ALPHA specific affinity selector magic */
@@ -645,44 +618,36 @@ int irq_setup_affinity(struct irq_desc *desc)
/**
- * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt
- * @irq: interrupt number to set affinity
- * @vcpu_info: vCPU specific data or pointer to a percpu array of vCPU
- * specific data for percpu_devid interrupts
- *
- * This function uses the vCPU specific data to set the vCPU
- * affinity for an irq. The vCPU specific data is passed from
- * outside, such as KVM. One example code path is as below:
- * KVM -> IOMMU -> irq_set_vcpu_affinity().
+ * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt
+ * @irq: interrupt number to set affinity
+ * @vcpu_info: vCPU specific data or pointer to a percpu array of vCPU
+ * specific data for percpu_devid interrupts
+ *
+ * This function uses the vCPU specific data to set the vCPU affinity for
+ * an irq. The vCPU specific data is passed from outside, such as KVM. One
+ * example code path is as below: KVM -> IOMMU -> irq_set_vcpu_affinity().
*/
int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
- struct irq_data *data;
- struct irq_chip *chip;
- int ret = -ENOSYS;
+ scoped_irqdesc_get_and_lock(irq, 0) {
+ struct irq_desc *desc = scoped_irqdesc;
+ struct irq_data *data;
+ struct irq_chip *chip;
- if (!desc)
- return -EINVAL;
-
- data = irq_desc_get_irq_data(desc);
- do {
- chip = irq_data_get_irq_chip(data);
- if (chip && chip->irq_set_vcpu_affinity)
- break;
-#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
- data = data->parent_data;
-#else
- data = NULL;
-#endif
- } while (data);
+ data = irq_desc_get_irq_data(desc);
+ do {
+ chip = irq_data_get_irq_chip(data);
+ if (chip && chip->irq_set_vcpu_affinity)
+ break;
- if (data)
- ret = chip->irq_set_vcpu_affinity(data, vcpu_info);
- irq_put_desc_unlock(desc, flags);
+ data = irqd_get_parent_data(data);
+ } while (data);
- return ret;
+ if (!data)
+ return -ENOSYS;
+ return chip->irq_set_vcpu_affinity(data, vcpu_info);
+ }
+ return -EINVAL;
}
EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity);
@@ -694,26 +659,23 @@ void __disable_irq(struct irq_desc *desc)
static int __disable_irq_nosync(unsigned int irq)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
-
- if (!desc)
- return -EINVAL;
- __disable_irq(desc);
- irq_put_desc_busunlock(desc, flags);
- return 0;
+ scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
+ __disable_irq(scoped_irqdesc);
+ return 0;
+ }
+ return -EINVAL;
}
/**
- * disable_irq_nosync - disable an irq without waiting
- * @irq: Interrupt to disable
+ * disable_irq_nosync - disable an irq without waiting
+ * @irq: Interrupt to disable
*
- * Disable the selected interrupt line. Disables and Enables are
- * nested.
- * Unlike disable_irq(), this function does not ensure existing
- * instances of the IRQ handler have completed before returning.
+ * Disable the selected interrupt line. Disables and Enables are
+ * nested.
+ * Unlike disable_irq(), this function does not ensure existing
+ * instances of the IRQ handler have completed before returning.
*
- * This function may be called from IRQ context.
+ * This function may be called from IRQ context.
*/
void disable_irq_nosync(unsigned int irq)
{
@@ -722,17 +684,17 @@ void disable_irq_nosync(unsigned int irq)
EXPORT_SYMBOL(disable_irq_nosync);
/**
- * disable_irq - disable an irq and wait for completion
- * @irq: Interrupt to disable
+ * disable_irq - disable an irq and wait for completion
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Enables and Disables are nested.
*
- * Disable the selected interrupt line. Enables and Disables are
- * nested.
- * This function waits for any pending IRQ handlers for this interrupt
- * to complete before returning. If you use this function while
- * holding a resource the IRQ handler may need you will deadlock.
+ * This function waits for any pending IRQ handlers for this interrupt to
+ * complete before returning. If you use this function while holding a
+ * resource the IRQ handler may need you will deadlock.
*
- * Can only be called from preemptible code as it might sleep when
- * an interrupt thread is associated to @irq.
+ * Can only be called from preemptible code as it might sleep when an
+ * interrupt thread is associated to @irq.
*
*/
void disable_irq(unsigned int irq)
@@ -744,40 +706,39 @@ void disable_irq(unsigned int irq)
EXPORT_SYMBOL(disable_irq);
/**
- * disable_hardirq - disables an irq and waits for hardirq completion
- * @irq: Interrupt to disable
+ * disable_hardirq - disables an irq and waits for hardirq completion
+ * @irq: Interrupt to disable
*
- * Disable the selected interrupt line. Enables and Disables are
- * nested.
- * This function waits for any pending hard IRQ handlers for this
- * interrupt to complete before returning. If you use this function while
- * holding a resource the hard IRQ handler may need you will deadlock.
+ * Disable the selected interrupt line. Enables and Disables are nested.
*
- * When used to optimistically disable an interrupt from atomic context
- * the return value must be checked.
+ * This function waits for any pending hard IRQ handlers for this interrupt
+ * to complete before returning. If you use this function while holding a
+ * resource the hard IRQ handler may need you will deadlock.
*
- * Returns: false if a threaded handler is active.
+ * When used to optimistically disable an interrupt from atomic context the
+ * return value must be checked.
*
- * This function may be called - with care - from IRQ context.
+ * Returns: false if a threaded handler is active.
+ *
+ * This function may be called - with care - from IRQ context.
*/
bool disable_hardirq(unsigned int irq)
{
if (!__disable_irq_nosync(irq))
return synchronize_hardirq(irq);
-
return false;
}
EXPORT_SYMBOL_GPL(disable_hardirq);
/**
- * disable_nmi_nosync - disable an nmi without waiting
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Disables and enables are
- * nested.
- * The interrupt to disable must have been requested through request_nmi.
- * Unlike disable_nmi(), this function does not ensure existing
- * instances of the IRQ handler have completed before returning.
+ * disable_nmi_nosync - disable an nmi without waiting
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Disables and enables are nested.
+ *
+ * The interrupt to disable must have been requested through request_nmi.
+ * Unlike disable_nmi(), this function does not ensure existing
+ * instances of the IRQ handler have completed before returning.
*/
void disable_nmi_nosync(unsigned int irq)
{
@@ -817,41 +778,34 @@ void __enable_irq(struct irq_desc *desc)
}
/**
- * enable_irq - enable handling of an irq
- * @irq: Interrupt to enable
+ * enable_irq - enable handling of an irq
+ * @irq: Interrupt to enable
*
- * Undoes the effect of one call to disable_irq(). If this
- * matches the last disable, processing of interrupts on this
- * IRQ line is re-enabled.
+ * Undoes the effect of one call to disable_irq(). If this matches the
+ * last disable, processing of interrupts on this IRQ line is re-enabled.
*
- * This function may be called from IRQ context only when
- * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL !
+ * This function may be called from IRQ context only when
+ * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL !
*/
void enable_irq(unsigned int irq)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
+ scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
+ struct irq_desc *desc = scoped_irqdesc;
- if (!desc)
- return;
- if (WARN(!desc->irq_data.chip,
- KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
- goto out;
-
- __enable_irq(desc);
-out:
- irq_put_desc_busunlock(desc, flags);
+ if (WARN(!desc->irq_data.chip, "enable_irq before setup/request_irq: irq %u\n", irq))
+ return;
+ __enable_irq(desc);
+ }
}
EXPORT_SYMBOL(enable_irq);
/**
- * enable_nmi - enable handling of an nmi
- * @irq: Interrupt to enable
+ * enable_nmi - enable handling of an nmi
+ * @irq: Interrupt to enable
*
- * The interrupt to enable must have been requested through request_nmi.
- * Undoes the effect of one call to disable_nmi(). If this
- * matches the last disable, processing of interrupts on this
- * IRQ line is re-enabled.
+ * The interrupt to enable must have been requested through request_nmi.
+ * Undoes the effect of one call to disable_nmi(). If this matches the last
+ * disable, processing of interrupts on this IRQ line is re-enabled.
*/
void enable_nmi(unsigned int irq)
{
@@ -873,65 +827,59 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
}
/**
- * irq_set_irq_wake - control irq power management wakeup
- * @irq: interrupt to control
- * @on: enable/disable power management wakeup
- *
- * Enable/disable power management wakeup mode, which is
- * disabled by default. Enables and disables must match,
- * just as they match for non-wakeup mode support.
- *
- * Wakeup mode lets this IRQ wake the system from sleep
- * states like "suspend to RAM".
- *
- * Note: irq enable/disable state is completely orthogonal
- * to the enable/disable state of irq wake. An irq can be
- * disabled with disable_irq() and still wake the system as
- * long as the irq has wake enabled. If this does not hold,
- * then the underlying irq chip and the related driver need
- * to be investigated.
+ * irq_set_irq_wake - control irq power management wakeup
+ * @irq: interrupt to control
+ * @on: enable/disable power management wakeup
+ *
+ * Enable/disable power management wakeup mode, which is disabled by
+ * default. Enables and disables must match, just as they match for
+ * non-wakeup mode support.
+ *
+ * Wakeup mode lets this IRQ wake the system from sleep states like
+ * "suspend to RAM".
+ *
+ * Note: irq enable/disable state is completely orthogonal to the
+ * enable/disable state of irq wake. An irq can be disabled with
+ * disable_irq() and still wake the system as long as the irq has wake
+ * enabled. If this does not hold, then the underlying irq chip and the
+ * related driver need to be investigated.
*/
int irq_set_irq_wake(unsigned int irq, unsigned int on)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
- int ret = 0;
+ scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
+ struct irq_desc *desc = scoped_irqdesc;
+ int ret = 0;
- if (!desc)
- return -EINVAL;
-
- /* Don't use NMIs as wake up interrupts please */
- if (irq_is_nmi(desc)) {
- ret = -EINVAL;
- goto out_unlock;
- }
+ /* Don't use NMIs as wake up interrupts please */
+ if (irq_is_nmi(desc))
+ return -EINVAL;
- /* wakeup-capable irqs can be shared between drivers that
- * don't need to have the same sleep mode behaviors.
- */
- if (on) {
- if (desc->wake_depth++ == 0) {
- ret = set_irq_wake_real(irq, on);
- if (ret)
- desc->wake_depth = 0;
- else
- irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE);
- }
- } else {
- if (desc->wake_depth == 0) {
- WARN(1, "Unbalanced IRQ %d wake disable\n", irq);
- } else if (--desc->wake_depth == 0) {
- ret = set_irq_wake_real(irq, on);
- if (ret)
- desc->wake_depth = 1;
- else
- irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE);
+ /*
+ * wakeup-capable irqs can be shared between drivers that
+ * don't need to have the same sleep mode behaviors.
+ */
+ if (on) {
+ if (desc->wake_depth++ == 0) {
+ ret = set_irq_wake_real(irq, on);
+ if (ret)
+ desc->wake_depth = 0;
+ else
+ irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE);
+ }
+ } else {
+ if (desc->wake_depth == 0) {
+ WARN(1, "Unbalanced IRQ %d wake disable\n", irq);
+ } else if (--desc->wake_depth == 0) {
+ ret = set_irq_wake_real(irq, on);
+ if (ret)
+ desc->wake_depth = 1;
+ else
+ irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE);
+ }
}
+ return ret;
}
-
-out_unlock:
- irq_put_desc_busunlock(desc, flags);
- return ret;
+ return -EINVAL;
}
EXPORT_SYMBOL(irq_set_irq_wake);
@@ -940,22 +888,17 @@ EXPORT_SYMBOL(irq_set_irq_wake);
* particular irq has been exclusively allocated or is available
* for driver use.
*/
-int can_request_irq(unsigned int irq, unsigned long irqflags)
+bool can_request_irq(unsigned int irq, unsigned long irqflags)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
- int canrequest = 0;
+ scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
+ struct irq_desc *desc = scoped_irqdesc;
- if (!desc)
- return 0;
-
- if (irq_settings_can_request(desc)) {
- if (!desc->action ||
- irqflags & desc->action->flags & IRQF_SHARED)
- canrequest = 1;
+ if (irq_settings_can_request(desc)) {
+ if (!desc->action || irqflags & desc->action->flags & IRQF_SHARED)
+ return true;
+ }
}
- irq_put_desc_unlock(desc, flags);
- return canrequest;
+ return false;
}
int __irq_set_trigger(struct irq_desc *desc, unsigned long flags)
@@ -1016,16 +959,11 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags)
#ifdef CONFIG_HARDIRQS_SW_RESEND
int irq_set_parent(int irq, int parent_irq)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
-
- if (!desc)
- return -EINVAL;
-
- desc->parent_irq = parent_irq;
-
- irq_put_desc_unlock(desc, flags);
- return 0;
+ scoped_irqdesc_get_and_lock(irq, 0) {
+ scoped_irqdesc->parent_irq = parent_irq;
+ return 0;
+ }
+ return -EINVAL;
}
EXPORT_SYMBOL_GPL(irq_set_parent);
#endif
@@ -1079,19 +1017,19 @@ static void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *a
return;
}
- raw_spin_lock_irq(&desc->lock);
- /*
- * This code is triggered unconditionally. Check the affinity
- * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out.
- */
- if (cpumask_available(desc->irq_common_data.affinity)) {
- const struct cpumask *m;
+ scoped_guard(raw_spinlock_irq, &desc->lock) {
+ /*
+ * This code is triggered unconditionally. Check the affinity
+ * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out.
+ */
+ if (cpumask_available(desc->irq_common_data.affinity)) {
+ const struct cpumask *m;
- m = irq_data_get_effective_affinity_mask(&desc->irq_data);
- cpumask_copy(mask, m);
- valid = true;
+ m = irq_data_get_effective_affinity_mask(&desc->irq_data);
+ cpumask_copy(mask, m);
+ valid = true;
+ }
}
- raw_spin_unlock_irq(&desc->lock);
if (valid)
set_cpus_allowed_ptr(current, mask);
@@ -1259,9 +1197,8 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action)
if (WARN_ON_ONCE(!secondary))
return;
- raw_spin_lock_irq(&desc->lock);
+ guard(raw_spinlock_irq)(&desc->lock);
__irq_wake_thread(desc, secondary);
- raw_spin_unlock_irq(&desc->lock);
}
/*
@@ -1334,21 +1271,19 @@ static int irq_thread(void *data)
}
/**
- * irq_wake_thread - wake the irq thread for the action identified by dev_id
- * @irq: Interrupt line
- * @dev_id: Device identity for which the thread should be woken
- *
+ * irq_wake_thread - wake the irq thread for the action identified by dev_id
+ * @irq: Interrupt line
+ * @dev_id: Device identity for which the thread should be woken
*/
void irq_wake_thread(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action;
- unsigned long flags;
if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc)))
return;
- raw_spin_lock_irqsave(&desc->lock, flags);
+ guard(raw_spinlock_irqsave)(&desc->lock);
for_each_action_of_desc(desc, action) {
if (action->dev_id == dev_id) {
if (action->thread)
@@ -1356,7 +1291,6 @@ void irq_wake_thread(unsigned int irq, void *dev_id)
break;
}
}
- raw_spin_unlock_irqrestore(&desc->lock, flags);
}
EXPORT_SYMBOL_GPL(irq_wake_thread);
@@ -1987,9 +1921,8 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
* There is no interrupt on the fly anymore. Deactivate it
* completely.
*/
- raw_spin_lock_irqsave(&desc->lock, flags);
- irq_domain_deactivate_irq(&desc->irq_data);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ scoped_guard(raw_spinlock_irqsave, &desc->lock)
+ irq_domain_deactivate_irq(&desc->irq_data);
irq_release_resources(desc);
chip_bus_sync_unlock(desc);
@@ -2005,20 +1938,19 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
}
/**
- * free_irq - free an interrupt allocated with request_irq
- * @irq: Interrupt line to free
- * @dev_id: Device identity to free
+ * free_irq - free an interrupt allocated with request_irq
+ * @irq: Interrupt line to free
+ * @dev_id: Device identity to free
*
- * Remove an interrupt handler. The handler is removed and if the
- * interrupt line is no longer in use by any driver it is disabled.
- * On a shared IRQ the caller must ensure the interrupt is disabled
- * on the card it drives before calling this function. The function
- * does not return until any executing interrupts for this IRQ
- * have completed.
+ * Remove an interrupt handler. The handler is removed and if the interrupt
+ * line is no longer in use by any driver it is disabled. On a shared IRQ
+ * the caller must ensure the interrupt is disabled on the card it drives
+ * before calling this function. The function does not return until any
+ * executing interrupts for this IRQ have completed.
*
- * This function must not be called from interrupt context.
+ * This function must not be called from interrupt context.
*
- * Returns the devname argument passed to request_irq.
+ * Returns the devname argument passed to request_irq.
*/
const void *free_irq(unsigned int irq, void *dev_id)
{
@@ -2075,8 +2007,6 @@ static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
const void *free_nmi(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
- unsigned long flags;
- const void *devname;
if (!desc || WARN_ON(!irq_is_nmi(desc)))
return NULL;
@@ -2088,53 +2018,46 @@ const void *free_nmi(unsigned int irq, void *dev_id)
if (WARN_ON(desc->depth == 0))
disable_nmi_nosync(irq);
- raw_spin_lock_irqsave(&desc->lock, flags);
-
+ guard(raw_spinlock_irqsave)(&desc->lock);
irq_nmi_teardown(desc);
- devname = __cleanup_nmi(irq, desc);
-
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
- return devname;
+ return __cleanup_nmi(irq, desc);
}
/**
- * request_threaded_irq - allocate an interrupt line
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs.
- * Primary handler for threaded interrupts.
- * If handler is NULL and thread_fn != NULL
- * the default primary handler is installed.
- * @thread_fn: Function called from the irq handler thread
- * If NULL, no irq thread is created
- * @irqflags: Interrupt type flags
- * @devname: An ascii name for the claiming device
- * @dev_id: A cookie passed back to the handler function
- *
- * This call allocates interrupt resources and enables the
- * interrupt line and IRQ handling. From the point this
- * call is made your handler function may be invoked. Since
- * your handler function must clear any interrupt the board
- * raises, you must take care both to initialise your hardware
- * and to set up the interrupt handler in the right order.
- *
- * If you want to set up a threaded irq handler for your device
- * then you need to supply @handler and @thread_fn. @handler is
- * still called in hard interrupt context and has to check
- * whether the interrupt originates from the device. If yes it
- * needs to disable the interrupt on the device and return
- * IRQ_WAKE_THREAD which will wake up the handler thread and run
- * @thread_fn. This split handler design is necessary to support
- * shared interrupts.
- *
- * Dev_id must be globally unique. Normally the address of the
- * device data structure is used as the cookie. Since the handler
- * receives this value it makes sense to use it.
- *
- * If your interrupt is shared you must pass a non NULL dev_id
- * as this is required when freeing the interrupt.
- *
- * Flags:
+ * request_threaded_irq - allocate an interrupt line
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs.
+ * Primary handler for threaded interrupts.
+ * If handler is NULL and thread_fn != NULL
+ * the default primary handler is installed.
+ * @thread_fn: Function called from the irq handler thread
+ * If NULL, no irq thread is created
+ * @irqflags: Interrupt type flags
+ * @devname: An ascii name for the claiming device
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the interrupt line
+ * and IRQ handling. From the point this call is made your handler function
+ * may be invoked. Since your handler function must clear any interrupt the
+ * board raises, you must take care both to initialise your hardware and to
+ * set up the interrupt handler in the right order.
+ *
+ * If you want to set up a threaded irq handler for your device then you
+ * need to supply @handler and @thread_fn. @handler is still called in hard
+ * interrupt context and has to check whether the interrupt originates from
+ * the device. If yes it needs to disable the interrupt on the device and
+ * return IRQ_WAKE_THREAD which will wake up the handler thread and run
+ * @thread_fn. This split handler design is necessary to support shared
+ * interrupts.
+ *
+ * @dev_id must be globally unique. Normally the address of the device data
+ * structure is used as the cookie. Since the handler receives this value
+ * it makes sense to use it.
+ *
+ * If your interrupt is shared you must pass a non NULL dev_id as this is
+ * required when freeing the interrupt.
+ *
+ * Flags:
*
* IRQF_SHARED Interrupt is shared
* IRQF_TRIGGER_* Specify active edge(s) or level
@@ -2232,21 +2155,20 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
EXPORT_SYMBOL(request_threaded_irq);
/**
- * request_any_context_irq - allocate an interrupt line
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs.
- * Threaded handler for threaded interrupts.
- * @flags: Interrupt type flags
- * @name: An ascii name for the claiming device
- * @dev_id: A cookie passed back to the handler function
- *
- * This call allocates interrupt resources and enables the
- * interrupt line and IRQ handling. It selects either a
- * hardirq or threaded handling method depending on the
- * context.
- *
- * On failure, it returns a negative value. On success,
- * it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED.
+ * request_any_context_irq - allocate an interrupt line
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs.
+ * Threaded handler for threaded interrupts.
+ * @flags: Interrupt type flags
+ * @name: An ascii name for the claiming device
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the interrupt line
+ * and IRQ handling. It selects either a hardirq or threaded handling
+ * method depending on the context.
+ *
+ * Returns: On failure, it returns a negative value. On success, it returns either
+ * IRQC_IS_HARDIRQ or IRQC_IS_NESTED.
*/
int request_any_context_irq(unsigned int irq, irq_handler_t handler,
unsigned long flags, const char *name, void *dev_id)
@@ -2273,37 +2195,35 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler,
EXPORT_SYMBOL_GPL(request_any_context_irq);
/**
- * request_nmi - allocate an interrupt line for NMI delivery
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs.
- * Threaded handler for threaded interrupts.
- * @irqflags: Interrupt type flags
- * @name: An ascii name for the claiming device
- * @dev_id: A cookie passed back to the handler function
- *
- * This call allocates interrupt resources and enables the
- * interrupt line and IRQ handling. It sets up the IRQ line
- * to be handled as an NMI.
- *
- * An interrupt line delivering NMIs cannot be shared and IRQ handling
- * cannot be threaded.
- *
- * Interrupt lines requested for NMI delivering must produce per cpu
- * interrupts and have auto enabling setting disabled.
- *
- * Dev_id must be globally unique. Normally the address of the
- * device data structure is used as the cookie. Since the handler
- * receives this value it makes sense to use it.
- *
- * If the interrupt line cannot be used to deliver NMIs, function
- * will fail and return a negative value.
+ * request_nmi - allocate an interrupt line for NMI delivery
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs.
+ * Threaded handler for threaded interrupts.
+ * @irqflags: Interrupt type flags
+ * @name: An ascii name for the claiming device
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the interrupt line
+ * and IRQ handling. It sets up the IRQ line to be handled as an NMI.
+ *
+ * An interrupt line delivering NMIs cannot be shared and IRQ handling
+ * cannot be threaded.
+ *
+ * Interrupt lines requested for NMI delivering must produce per cpu
+ * interrupts and have auto enabling setting disabled.
+ *
+ * @dev_id must be globally unique. Normally the address of the device data
+ * structure is used as the cookie. Since the handler receives this value
+ * it makes sense to use it.
+ *
+ * If the interrupt line cannot be used to deliver NMIs, function will fail
+ * and return a negative value.
*/
int request_nmi(unsigned int irq, irq_handler_t handler,
unsigned long irqflags, const char *name, void *dev_id)
{
struct irqaction *action;
struct irq_desc *desc;
- unsigned long flags;
int retval;
if (irq == IRQ_NOTCONNECTED)
@@ -2345,21 +2265,17 @@ int request_nmi(unsigned int irq, irq_handler_t handler,
if (retval)
goto err_irq_setup;
- raw_spin_lock_irqsave(&desc->lock, flags);
-
- /* Setup NMI state */
- desc->istate |= IRQS_NMI;
- retval = irq_nmi_setup(desc);
- if (retval) {
- __cleanup_nmi(irq, desc);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- return -EINVAL;
+ scoped_guard(raw_spinlock_irqsave, &desc->lock) {
+ /* Setup NMI state */
+ desc->istate |= IRQS_NMI;
+ retval = irq_nmi_setup(desc);
+ if (retval) {
+ __cleanup_nmi(irq, desc);
+ return -EINVAL;
+ }
+ return 0;
}
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
- return 0;
-
err_irq_setup:
irq_chip_pm_put(&desc->irq_data);
err_out:
@@ -2370,35 +2286,25 @@ err_out:
void enable_percpu_irq(unsigned int irq, unsigned int type)
{
- unsigned int cpu = smp_processor_id();
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU);
+ scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) {
+ struct irq_desc *desc = scoped_irqdesc;
- if (!desc)
- return;
-
- /*
- * If the trigger type is not specified by the caller, then
- * use the default for this interrupt.
- */
- type &= IRQ_TYPE_SENSE_MASK;
- if (type == IRQ_TYPE_NONE)
- type = irqd_get_trigger_type(&desc->irq_data);
-
- if (type != IRQ_TYPE_NONE) {
- int ret;
-
- ret = __irq_set_trigger(desc, type);
-
- if (ret) {
- WARN(1, "failed to set type for IRQ%d\n", irq);
- goto out;
+ /*
+ * If the trigger type is not specified by the caller, then
+ * use the default for this interrupt.
+ */
+ type &= IRQ_TYPE_SENSE_MASK;
+ if (type == IRQ_TYPE_NONE)
+ type = irqd_get_trigger_type(&desc->irq_data);
+
+ if (type != IRQ_TYPE_NONE) {
+ if (__irq_set_trigger(desc, type)) {
+ WARN(1, "failed to set type for IRQ%d\n", irq);
+ return;
+ }
}
+ irq_percpu_enable(desc, smp_processor_id());
}
-
- irq_percpu_enable(desc, cpu);
-out:
- irq_put_desc_unlock(desc, flags);
}
EXPORT_SYMBOL_GPL(enable_percpu_irq);
@@ -2416,33 +2322,16 @@ void enable_percpu_nmi(unsigned int irq, unsigned int type)
*/
bool irq_percpu_is_enabled(unsigned int irq)
{
- unsigned int cpu = smp_processor_id();
- struct irq_desc *desc;
- unsigned long flags;
- bool is_enabled;
-
- desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU);
- if (!desc)
- return false;
-
- is_enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);
- irq_put_desc_unlock(desc, flags);
-
- return is_enabled;
+ scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU)
+ return cpumask_test_cpu(smp_processor_id(), scoped_irqdesc->percpu_enabled);
+ return false;
}
EXPORT_SYMBOL_GPL(irq_percpu_is_enabled);
void disable_percpu_irq(unsigned int irq)
{
- unsigned int cpu = smp_processor_id();
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU);
-
- if (!desc)
- return;
-
- irq_percpu_disable(desc, cpu);
- irq_put_desc_unlock(desc, flags);
+ scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU)
+ irq_percpu_disable(scoped_irqdesc, smp_processor_id());
}
EXPORT_SYMBOL_GPL(disable_percpu_irq);
@@ -2458,71 +2347,47 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action;
- unsigned long flags;
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
if (!desc)
return NULL;
- raw_spin_lock_irqsave(&desc->lock, flags);
+ scoped_guard(raw_spinlock_irqsave, &desc->lock) {
+ action = desc->action;
+ if (!action || action->percpu_dev_id != dev_id) {
+ WARN(1, "Trying to free already-free IRQ %d\n", irq);
+ return NULL;
+ }
- action = desc->action;
- if (!action || action->percpu_dev_id != dev_id) {
- WARN(1, "Trying to free already-free IRQ %d\n", irq);
- goto bad;
- }
+ if (!cpumask_empty(desc->percpu_enabled)) {
+ WARN(1, "percpu IRQ %d still enabled on CPU%d!\n",
+ irq, cpumask_first(desc->percpu_enabled));
+ return NULL;
+ }
- if (!cpumask_empty(desc->percpu_enabled)) {
- WARN(1, "percpu IRQ %d still enabled on CPU%d!\n",
- irq, cpumask_first(desc->percpu_enabled));
- goto bad;
+ /* Found it - now remove it from the list of entries: */
+ desc->action = NULL;
+ desc->istate &= ~IRQS_NMI;
}
- /* Found it - now remove it from the list of entries: */
- desc->action = NULL;
-
- desc->istate &= ~IRQS_NMI;
-
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
unregister_handler_proc(irq, action);
-
irq_chip_pm_put(&desc->irq_data);
module_put(desc->owner);
return action;
-
-bad:
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- return NULL;
}
/**
- * remove_percpu_irq - free a per-cpu interrupt
- * @irq: Interrupt line to free
- * @act: irqaction for the interrupt
+ * free_percpu_irq - free an interrupt allocated with request_percpu_irq
+ * @irq: Interrupt line to free
+ * @dev_id: Device identity to free
*
- * Used to remove interrupts statically setup by the early boot process.
- */
-void remove_percpu_irq(unsigned int irq, struct irqaction *act)
-{
- struct irq_desc *desc = irq_to_desc(irq);
-
- if (desc && irq_settings_is_per_cpu_devid(desc))
- __free_percpu_irq(irq, act->percpu_dev_id);
-}
-
-/**
- * free_percpu_irq - free an interrupt allocated with request_percpu_irq
- * @irq: Interrupt line to free
- * @dev_id: Device identity to free
+ * Remove a percpu interrupt handler. The handler is removed, but the
+ * interrupt line is not disabled. This must be done on each CPU before
+ * calling this function. The function does not return until any executing
+ * interrupts for this IRQ have completed.
*
- * Remove a percpu interrupt handler. The handler is removed, but
- * the interrupt line is not disabled. This must be done on each
- * CPU before calling this function. The function does not return
- * until any executing interrupts for this IRQ have completed.
- *
- * This function must not be called from interrupt context.
+ * This function must not be called from interrupt context.
*/
void free_percpu_irq(unsigned int irq, void __percpu *dev_id)
{
@@ -2551,9 +2416,9 @@ void free_percpu_nmi(unsigned int irq, void __percpu *dev_id)
}
/**
- * setup_percpu_irq - setup a per-cpu interrupt
- * @irq: Interrupt line to setup
- * @act: irqaction for the interrupt
+ * setup_percpu_irq - setup a per-cpu interrupt
+ * @irq: Interrupt line to setup
+ * @act: irqaction for the interrupt
*
* Used to statically setup per-cpu interrupts in the early boot process.
*/
@@ -2578,21 +2443,20 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act)
}
/**
- * __request_percpu_irq - allocate a percpu interrupt line
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs.
- * @flags: Interrupt type flags (IRQF_TIMER only)
- * @devname: An ascii name for the claiming device
- * @dev_id: A percpu cookie passed back to the handler function
- *
- * This call allocates interrupt resources and enables the
- * interrupt on the local CPU. If the interrupt is supposed to be
- * enabled on other CPUs, it has to be done on each CPU using
- * enable_percpu_irq().
- *
- * Dev_id must be globally unique. It is a per-cpu variable, and
- * the handler gets called with the interrupted CPU's instance of
- * that variable.
+ * __request_percpu_irq - allocate a percpu interrupt line
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs.
+ * @flags: Interrupt type flags (IRQF_TIMER only)
+ * @devname: An ascii name for the claiming device
+ * @dev_id: A percpu cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the interrupt on the
+ * local CPU. If the interrupt is supposed to be enabled on other CPUs, it
+ * has to be done on each CPU using enable_percpu_irq().
+ *
+ * @dev_id must be globally unique. It is a per-cpu variable, and
+ * the handler gets called with the interrupted CPU's instance of
+ * that variable.
*/
int __request_percpu_irq(unsigned int irq, irq_handler_t handler,
unsigned long flags, const char *devname,
@@ -2640,32 +2504,31 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler,
EXPORT_SYMBOL_GPL(__request_percpu_irq);
/**
- * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs.
- * @name: An ascii name for the claiming device
- * @dev_id: A percpu cookie passed back to the handler function
+ * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs.
+ * @name: An ascii name for the claiming device
+ * @dev_id: A percpu cookie passed back to the handler function
*
- * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs
- * have to be setup on each CPU by calling prepare_percpu_nmi() before
- * being enabled on the same CPU by using enable_percpu_nmi().
+ * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs
+ * have to be setup on each CPU by calling prepare_percpu_nmi() before
+ * being enabled on the same CPU by using enable_percpu_nmi().
*
- * Dev_id must be globally unique. It is a per-cpu variable, and
- * the handler gets called with the interrupted CPU's instance of
- * that variable.
+ * @dev_id must be globally unique. It is a per-cpu variable, and the
+ * handler gets called with the interrupted CPU's instance of that
+ * variable.
*
- * Interrupt lines requested for NMI delivering should have auto enabling
- * setting disabled.
+ * Interrupt lines requested for NMI delivering should have auto enabling
+ * setting disabled.
*
- * If the interrupt line cannot be used to deliver NMIs, function
- * will fail returning a negative value.
+ * If the interrupt line cannot be used to deliver NMIs, function
+ * will fail returning a negative value.
*/
int request_percpu_nmi(unsigned int irq, irq_handler_t handler,
const char *name, void __percpu *dev_id)
{
struct irqaction *action;
struct irq_desc *desc;
- unsigned long flags;
int retval;
if (!handler)
@@ -2701,10 +2564,8 @@ int request_percpu_nmi(unsigned int irq, irq_handler_t handler,
if (retval)
goto err_irq_setup;
- raw_spin_lock_irqsave(&desc->lock, flags);
- desc->istate |= IRQS_NMI;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
+ scoped_guard(raw_spinlock_irqsave, &desc->lock)
+ desc->istate |= IRQS_NMI;
return 0;
err_irq_setup:
@@ -2716,79 +2577,55 @@ err_out:
}
/**
- * prepare_percpu_nmi - performs CPU local setup for NMI delivery
- * @irq: Interrupt line to prepare for NMI delivery
+ * prepare_percpu_nmi - performs CPU local setup for NMI delivery
+ * @irq: Interrupt line to prepare for NMI delivery
*
- * This call prepares an interrupt line to deliver NMI on the current CPU,
- * before that interrupt line gets enabled with enable_percpu_nmi().
+ * This call prepares an interrupt line to deliver NMI on the current CPU,
+ * before that interrupt line gets enabled with enable_percpu_nmi().
*
- * As a CPU local operation, this should be called from non-preemptible
- * context.
+ * As a CPU local operation, this should be called from non-preemptible
+ * context.
*
- * If the interrupt line cannot be used to deliver NMIs, function
- * will fail returning a negative value.
+ * If the interrupt line cannot be used to deliver NMIs, function will fail
+ * returning a negative value.
*/
int prepare_percpu_nmi(unsigned int irq)
{
- unsigned long flags;
- struct irq_desc *desc;
- int ret = 0;
+ int ret = -EINVAL;
WARN_ON(preemptible());
- desc = irq_get_desc_lock(irq, &flags,
- IRQ_GET_DESC_CHECK_PERCPU);
- if (!desc)
- return -EINVAL;
-
- if (WARN(!irq_is_nmi(desc),
- KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n",
- irq)) {
- ret = -EINVAL;
- goto out;
- }
+ scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) {
+ if (WARN(!irq_is_nmi(scoped_irqdesc),
+ "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n", irq))
+ return -EINVAL;
- ret = irq_nmi_setup(desc);
- if (ret) {
- pr_err("Failed to setup NMI delivery: irq %u\n", irq);
- goto out;
+ ret = irq_nmi_setup(scoped_irqdesc);
+ if (ret)
+ pr_err("Failed to setup NMI delivery: irq %u\n", irq);
}
-
-out:
- irq_put_desc_unlock(desc, flags);
return ret;
}
/**
- * teardown_percpu_nmi - undoes NMI setup of IRQ line
- * @irq: Interrupt line from which CPU local NMI configuration should be
- * removed
- *
- * This call undoes the setup done by prepare_percpu_nmi().
+ * teardown_percpu_nmi - undoes NMI setup of IRQ line
+ * @irq: Interrupt line from which CPU local NMI configuration should be removed
*
- * IRQ line should not be enabled for the current CPU.
+ * This call undoes the setup done by prepare_percpu_nmi().
*
- * As a CPU local operation, this should be called from non-preemptible
- * context.
+ * IRQ line should not be enabled for the current CPU.
+ * As a CPU local operation, this should be called from non-preemptible
+ * context.
*/
void teardown_percpu_nmi(unsigned int irq)
{
- unsigned long flags;
- struct irq_desc *desc;
-
WARN_ON(preemptible());
- desc = irq_get_desc_lock(irq, &flags,
- IRQ_GET_DESC_CHECK_PERCPU);
- if (!desc)
- return;
-
- if (WARN_ON(!irq_is_nmi(desc)))
- goto out;
-
- irq_nmi_teardown(desc);
-out:
- irq_put_desc_unlock(desc, flags);
+ scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) {
+ if (WARN_ON(!irq_is_nmi(scoped_irqdesc)))
+ return;
+ irq_nmi_teardown(scoped_irqdesc);
+ }
}
static int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, bool *state)
@@ -2815,87 +2652,62 @@ static int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state
}
/**
- * irq_get_irqchip_state - returns the irqchip state of a interrupt.
- * @irq: Interrupt line that is forwarded to a VM
- * @which: One of IRQCHIP_STATE_* the caller wants to know about
- * @state: a pointer to a boolean where the state is to be stored
+ * irq_get_irqchip_state - returns the irqchip state of a interrupt.
+ * @irq: Interrupt line that is forwarded to a VM
+ * @which: One of IRQCHIP_STATE_* the caller wants to know about
+ * @state: a pointer to a boolean where the state is to be stored
*
- * This call snapshots the internal irqchip state of an
- * interrupt, returning into @state the bit corresponding to
- * stage @which
+ * This call snapshots the internal irqchip state of an interrupt,
+ * returning into @state the bit corresponding to stage @which
*
- * This function should be called with preemption disabled if the
- * interrupt controller has per-cpu registers.
+ * This function should be called with preemption disabled if the interrupt
+ * controller has per-cpu registers.
*/
-int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
- bool *state)
+int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool *state)
{
- struct irq_desc *desc;
- struct irq_data *data;
- unsigned long flags;
- int err = -EINVAL;
-
- desc = irq_get_desc_buslock(irq, &flags, 0);
- if (!desc)
- return err;
+ scoped_irqdesc_get_and_buslock(irq, 0) {
+ struct irq_data *data = irq_desc_get_irq_data(scoped_irqdesc);
- data = irq_desc_get_irq_data(desc);
-
- err = __irq_get_irqchip_state(data, which, state);
-
- irq_put_desc_busunlock(desc, flags);
- return err;
+ return __irq_get_irqchip_state(data, which, state);
+ }
+ return -EINVAL;
}
EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
/**
- * irq_set_irqchip_state - set the state of a forwarded interrupt.
- * @irq: Interrupt line that is forwarded to a VM
- * @which: State to be restored (one of IRQCHIP_STATE_*)
- * @val: Value corresponding to @which
+ * irq_set_irqchip_state - set the state of a forwarded interrupt.
+ * @irq: Interrupt line that is forwarded to a VM
+ * @which: State to be restored (one of IRQCHIP_STATE_*)
+ * @val: Value corresponding to @which
*
- * This call sets the internal irqchip state of an interrupt,
- * depending on the value of @which.
+ * This call sets the internal irqchip state of an interrupt, depending on
+ * the value of @which.
*
- * This function should be called with migration disabled if the
- * interrupt controller has per-cpu registers.
+ * This function should be called with migration disabled if the interrupt
+ * controller has per-cpu registers.
*/
-int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
- bool val)
+int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool val)
{
- struct irq_desc *desc;
- struct irq_data *data;
- struct irq_chip *chip;
- unsigned long flags;
- int err = -EINVAL;
+ scoped_irqdesc_get_and_buslock(irq, 0) {
+ struct irq_data *data = irq_desc_get_irq_data(scoped_irqdesc);
+ struct irq_chip *chip;
- desc = irq_get_desc_buslock(irq, &flags, 0);
- if (!desc)
- return err;
+ do {
+ chip = irq_data_get_irq_chip(data);
- data = irq_desc_get_irq_data(desc);
+ if (WARN_ON_ONCE(!chip))
+ return -ENODEV;
- do {
- chip = irq_data_get_irq_chip(data);
- if (WARN_ON_ONCE(!chip)) {
- err = -ENODEV;
- goto out_unlock;
- }
- if (chip->irq_set_irqchip_state)
- break;
-#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
- data = data->parent_data;
-#else
- data = NULL;
-#endif
- } while (data);
+ if (chip->irq_set_irqchip_state)
+ break;
- if (data)
- err = chip->irq_set_irqchip_state(data, which, val);
+ data = irqd_get_parent_data(data);
+ } while (data);
-out_unlock:
- irq_put_desc_busunlock(desc, flags);
- return err;
+ if (data)
+ return chip->irq_set_irqchip_state(data, which, val);
+ }
+ return -EINVAL;
}
EXPORT_SYMBOL_GPL(irq_set_irqchip_state);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index c05ba7ca00fa..9febe797a5f6 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -59,7 +59,8 @@ struct msi_ctrl {
static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid);
static inline int msi_sysfs_create_group(struct device *dev);
-
+static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *arg);
/**
* msi_alloc_desc - Allocate an initialized msi_desc
@@ -343,26 +344,30 @@ int msi_setup_device_data(struct device *dev)
}
/**
- * msi_lock_descs - Lock the MSI descriptor storage of a device
+ * __msi_lock_descs - Lock the MSI descriptor storage of a device
* @dev: Device to operate on
+ *
+ * Internal function for guard(msi_descs_lock). Don't use in code.
*/
-void msi_lock_descs(struct device *dev)
+void __msi_lock_descs(struct device *dev)
{
mutex_lock(&dev->msi.data->mutex);
}
-EXPORT_SYMBOL_GPL(msi_lock_descs);
+EXPORT_SYMBOL_GPL(__msi_lock_descs);
/**
- * msi_unlock_descs - Unlock the MSI descriptor storage of a device
+ * __msi_unlock_descs - Unlock the MSI descriptor storage of a device
* @dev: Device to operate on
+ *
+ * Internal function for guard(msi_descs_lock). Don't use in code.
*/
-void msi_unlock_descs(struct device *dev)
+void __msi_unlock_descs(struct device *dev)
{
/* Invalidate the index which was cached by the iterator */
dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
mutex_unlock(&dev->msi.data->mutex);
}
-EXPORT_SYMBOL_GPL(msi_unlock_descs);
+EXPORT_SYMBOL_GPL(__msi_unlock_descs);
static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
enum msi_desc_filter filter)
@@ -448,7 +453,6 @@ EXPORT_SYMBOL_GPL(msi_next_desc);
unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index)
{
struct msi_desc *desc;
- unsigned int ret = 0;
bool pcimsi = false;
struct xarray *xa;
@@ -462,7 +466,7 @@ unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne
if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN)
pcimsi = to_pci_dev(dev)->msi_enabled;
- msi_lock_descs(dev);
+ guard(msi_descs_lock)(dev);
xa = &dev->msi.data->__domains[domid].store;
desc = xa_load(xa, pcimsi ? 0 : index);
if (desc && desc->irq) {
@@ -471,16 +475,12 @@ unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne
* PCI-MSIX and platform MSI use a descriptor per
* interrupt.
*/
- if (pcimsi) {
- if (index < desc->nvec_used)
- ret = desc->irq + index;
- } else {
- ret = desc->irq;
- }
+ if (!pcimsi)
+ return desc->irq;
+ if (index < desc->nvec_used)
+ return desc->irq + index;
}
-
- msi_unlock_descs(dev);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(msi_domain_get_virq);
@@ -796,6 +796,10 @@ static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
return 0;
}
+static void msi_domain_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg)
+{
+}
+
static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
struct msi_desc *desc)
{
@@ -821,6 +825,7 @@ static struct msi_domain_ops msi_domain_ops_default = {
.get_hwirq = msi_domain_ops_get_hwirq,
.msi_init = msi_domain_ops_init,
.msi_prepare = msi_domain_ops_prepare,
+ .msi_teardown = msi_domain_ops_teardown,
.set_desc = msi_domain_ops_set_desc,
};
@@ -842,6 +847,8 @@ static void msi_domain_update_dom_ops(struct msi_domain_info *info)
ops->msi_init = msi_domain_ops_default.msi_init;
if (ops->msi_prepare == NULL)
ops->msi_prepare = msi_domain_ops_default.msi_prepare;
+ if (ops->msi_teardown == NULL)
+ ops->msi_teardown = msi_domain_ops_default.msi_teardown;
if (ops->set_desc == NULL)
ops->set_desc = msi_domain_ops_default.set_desc;
}
@@ -905,6 +912,32 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
}
/**
+ * msi_create_parent_irq_domain - Create an MSI-parent interrupt domain
+ * @info: MSI irqdomain creation info
+ * @msi_parent_ops: MSI parent callbacks and configuration
+ *
+ * Return: pointer to the created &struct irq_domain or %NULL on failure
+ */
+struct irq_domain *msi_create_parent_irq_domain(struct irq_domain_info *info,
+ const struct msi_parent_ops *msi_parent_ops)
+{
+ struct irq_domain *d;
+
+ info->hwirq_max = max(info->hwirq_max, info->size);
+ info->size = info->hwirq_max;
+ info->domain_flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+ info->bus_token = msi_parent_ops->bus_select_token;
+
+ d = irq_domain_instantiate(info);
+ if (IS_ERR(d))
+ return NULL;
+
+ d->msi_parent_ops = msi_parent_ops;
+ return d;
+}
+EXPORT_SYMBOL_GPL(msi_create_parent_irq_domain);
+
+/**
* msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down
* in the domain hierarchy
* @dev: The device for which the domain should be created
@@ -998,9 +1031,8 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
void *chip_data)
{
struct irq_domain *domain, *parent = dev->msi.domain;
- struct fwnode_handle *fwnode, *fwnalloced = NULL;
- struct msi_domain_template *bundle;
const struct msi_parent_ops *pops;
+ struct fwnode_handle *fwnode;
if (!irq_domain_is_msi_parent(parent))
return false;
@@ -1008,7 +1040,8 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
if (domid >= MSI_MAX_DEVICE_IRQDOMAINS)
return false;
- bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL);
+ struct msi_domain_template *bundle __free(kfree) =
+ kmemdup(template, sizeof(*bundle), GFP_KERNEL);
if (!bundle)
return false;
@@ -1017,6 +1050,7 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
bundle->info.ops = &bundle->ops;
bundle->info.data = domain_data;
bundle->info.chip_data = chip_data;
+ bundle->info.alloc_data = &bundle->alloc_info;
pops = parent->msi_parent_ops;
snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s",
@@ -1031,41 +1065,43 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
* node as they are not guaranteed to have a fwnode. They are never
* looked up and always handled in the context of the device.
*/
- if (bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE)
- fwnode = dev->fwnode;
+ struct fwnode_handle *fwnode_alloced __free(irq_domain_free_fwnode) = NULL;
+
+ if (!(bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE))
+ fwnode = fwnode_alloced = irq_domain_alloc_named_fwnode(bundle->name);
else
- fwnode = fwnalloced = irq_domain_alloc_named_fwnode(bundle->name);
+ fwnode = dev->fwnode;
if (!fwnode)
- goto free_bundle;
+ return false;
if (msi_setup_device_data(dev))
- goto free_fwnode;
-
- msi_lock_descs(dev);
+ return false;
+ guard(msi_descs_lock)(dev);
if (WARN_ON_ONCE(msi_get_device_domain(dev, domid)))
- goto fail;
+ return false;
if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info))
- goto fail;
+ return false;
domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent);
if (!domain)
- goto fail;
+ return false;
domain->dev = dev;
dev->msi.data->__domains[domid].domain = domain;
- msi_unlock_descs(dev);
- return true;
-fail:
- msi_unlock_descs(dev);
-free_fwnode:
- irq_domain_free_fwnode(fwnalloced);
-free_bundle:
- kfree(bundle);
- return false;
+ if (msi_domain_prepare_irqs(domain, dev, hwsize, &bundle->alloc_info)) {
+ dev->msi.data->__domains[domid].domain = NULL;
+ irq_domain_remove(domain);
+ return false;
+ }
+
+ /* @bundle and @fwnode_alloced are now in use. Prevent cleanup */
+ retain_and_null_ptr(bundle);
+ retain_and_null_ptr(fwnode_alloced);
+ return true;
}
/**
@@ -1079,23 +1115,21 @@ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
struct msi_domain_info *info;
struct irq_domain *domain;
- msi_lock_descs(dev);
-
+ guard(msi_descs_lock)(dev);
domain = msi_get_device_domain(dev, domid);
-
if (!domain || !irq_domain_is_msi_device(domain))
- goto unlock;
+ return;
dev->msi.data->__domains[domid].domain = NULL;
info = domain->host_data;
+
+ info->ops->msi_teardown(domain, info->alloc_data);
+
if (irq_domain_is_msi_device(domain))
fwnode = domain->fwnode;
irq_domain_remove(domain);
irq_domain_free_fwnode(fwnode);
kfree(container_of(info, struct msi_domain_template, info));
-
-unlock:
- msi_unlock_descs(dev);
}
/**
@@ -1111,16 +1145,14 @@ bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
{
struct msi_domain_info *info;
struct irq_domain *domain;
- bool ret = false;
- msi_lock_descs(dev);
+ guard(msi_descs_lock)(dev);
domain = msi_get_device_domain(dev, domid);
if (domain && irq_domain_is_msi_device(domain)) {
info = domain->host_data;
- ret = info->bus_token == bus_token;
+ return info->bus_token == bus_token;
}
- msi_unlock_descs(dev);
- return ret;
+ return false;
}
static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
@@ -1238,6 +1270,24 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag
return 0;
}
+static int populate_alloc_info(struct irq_domain *domain, struct device *dev,
+ unsigned int nirqs, msi_alloc_info_t *arg)
+{
+ struct msi_domain_info *info = domain->host_data;
+
+ /*
+ * If the caller has provided a template alloc info, use that. Once
+ * all users of msi_create_irq_domain() have been eliminated, this
+ * should be the only source of allocation information, and the
+ * prepare call below should be finally removed.
+ */
+ if (!info->alloc_data)
+ return msi_domain_prepare_irqs(domain, dev, nirqs, arg);
+
+ *arg = *info->alloc_data;
+ return 0;
+}
+
static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
struct msi_ctrl *ctrl)
{
@@ -1250,7 +1300,7 @@ static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain
unsigned long idx;
int i, ret, virq;
- ret = msi_domain_prepare_irqs(domain, dev, ctrl->nirqs, &arg);
+ ret = populate_alloc_info(domain, dev, ctrl->nirqs, &arg);
if (ret)
return ret;
@@ -1391,12 +1441,9 @@ int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
unsigned int first, unsigned int last)
{
- int ret;
- msi_lock_descs(dev);
- ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last);
- msi_unlock_descs(dev);
- return ret;
+ guard(msi_descs_lock)(dev);
+ return msi_domain_alloc_irqs_range_locked(dev, domid, first, last);
}
EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range);
@@ -1500,12 +1547,8 @@ struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, u
const struct irq_affinity_desc *affdesc,
union msi_instance_cookie *icookie)
{
- struct msi_map map;
-
- msi_lock_descs(dev);
- map = __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie);
- msi_unlock_descs(dev);
- return map;
+ guard(msi_descs_lock)(dev);
+ return __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie);
}
/**
@@ -1542,13 +1585,11 @@ int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
icookie.value = ((u64)type << 32) | hwirq;
- msi_lock_descs(dev);
+ guard(msi_descs_lock)(dev);
if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain))
map.index = -EINVAL;
else
map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, &icookie);
- msi_unlock_descs(dev);
-
return map.index >= 0 ? map.virq : map.index;
}
@@ -1641,9 +1682,8 @@ void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
unsigned int first, unsigned int last)
{
- msi_lock_descs(dev);
+ guard(msi_descs_lock)(dev);
msi_domain_free_irqs_range_locked(dev, domid, first, last);
- msi_unlock_descs(dev);
}
EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all);
@@ -1673,9 +1713,8 @@ void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid)
*/
void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
{
- msi_lock_descs(dev);
+ guard(msi_descs_lock)(dev);
msi_domain_free_irqs_all_locked(dev, domid);
- msi_unlock_descs(dev);
}
/**
@@ -1694,12 +1733,11 @@ void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
return;
- msi_lock_descs(dev);
- if (!WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain)) {
- msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index,
- desc->msi_index);
- }
- msi_unlock_descs(dev);
+ guard(msi_descs_lock)(dev);
+ if (WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain))
+ return;
+ msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index,
+ desc->msi_index);
}
/**
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index c556bc49d213..445912d51033 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -46,8 +46,7 @@ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action)
desc->cond_suspend_depth++;
WARN_ON_ONCE(desc->no_suspend_depth &&
- (desc->no_suspend_depth +
- desc->cond_suspend_depth) != desc->nr_actions);
+ (desc->no_suspend_depth + desc->cond_suspend_depth) != desc->nr_actions);
}
/*
@@ -134,14 +133,12 @@ void suspend_device_irqs(void)
int irq;
for_each_irq_desc(irq, desc) {
- unsigned long flags;
bool sync;
if (irq_settings_is_nested_thread(desc))
continue;
- raw_spin_lock_irqsave(&desc->lock, flags);
- sync = suspend_device_irq(desc);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ scoped_guard(raw_spinlock_irqsave, &desc->lock)
+ sync = suspend_device_irq(desc);
if (sync)
synchronize_irq(irq);
@@ -186,18 +183,15 @@ static void resume_irqs(bool want_early)
int irq;
for_each_irq_desc(irq, desc) {
- unsigned long flags;
- bool is_early = desc->action &&
- desc->action->flags & IRQF_EARLY_RESUME;
+ bool is_early = desc->action && desc->action->flags & IRQF_EARLY_RESUME;
if (!is_early && want_early)
continue;
if (irq_settings_is_nested_thread(desc))
continue;
- raw_spin_lock_irqsave(&desc->lock, flags);
+ guard(raw_spinlock_irqsave)(&desc->lock);
resume_irq(desc);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
}
}
@@ -207,22 +201,16 @@ static void resume_irqs(bool want_early)
*/
void rearm_wake_irq(unsigned int irq)
{
- unsigned long flags;
- struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
+ scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
+ struct irq_desc *desc = scoped_irqdesc;
- if (!desc)
- return;
-
- if (!(desc->istate & IRQS_SUSPENDED) ||
- !irqd_is_wakeup_set(&desc->irq_data))
- goto unlock;
-
- desc->istate &= ~IRQS_SUSPENDED;
- irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED);
- __enable_irq(desc);
+ if (!(desc->istate & IRQS_SUSPENDED) || !irqd_is_wakeup_set(&desc->irq_data))
+ return;
-unlock:
- irq_put_desc_busunlock(desc, flags);
+ desc->istate &= ~IRQS_SUSPENDED;
+ irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED);
+ __enable_irq(desc);
+ }
}
/**
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 8e29809de38d..29c2404e743b 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -81,20 +81,18 @@ static int show_irq_affinity(int type, struct seq_file *m)
static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
{
struct irq_desc *desc = irq_to_desc((long)m->private);
- unsigned long flags;
cpumask_var_t mask;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
- raw_spin_lock_irqsave(&desc->lock, flags);
- if (desc->affinity_hint)
- cpumask_copy(mask, desc->affinity_hint);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ scoped_guard(raw_spinlock_irq, &desc->lock) {
+ if (desc->affinity_hint)
+ cpumask_copy(mask, desc->affinity_hint);
+ }
seq_printf(m, "%*pb\n", cpumask_pr_args(mask));
free_cpumask_var(mask);
-
return 0;
}
@@ -295,32 +293,26 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v)
#define MAX_NAMELEN 128
-static int name_unique(unsigned int irq, struct irqaction *new_action)
+static bool name_unique(unsigned int irq, struct irqaction *new_action)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action;
- unsigned long flags;
- int ret = 1;
- raw_spin_lock_irqsave(&desc->lock, flags);
+ guard(raw_spinlock_irq)(&desc->lock);
for_each_action_of_desc(desc, action) {
if ((action != new_action) && action->name &&
- !strcmp(new_action->name, action->name)) {
- ret = 0;
- break;
- }
+ !strcmp(new_action->name, action->name))
+ return false;
}
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- return ret;
+ return true;
}
void register_handler_proc(unsigned int irq, struct irqaction *action)
{
- char name [MAX_NAMELEN];
+ char name[MAX_NAMELEN];
struct irq_desc *desc = irq_to_desc(irq);
- if (!desc->dir || action->dir || !action->name ||
- !name_unique(irq, action))
+ if (!desc->dir || action->dir || !action->name || !name_unique(irq, action))
return;
snprintf(name, MAX_NAMELEN, "%s", action->name);
@@ -347,17 +339,16 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
* added, not when the descriptor is created, so multiple
* tasks might try to register at the same time.
*/
- mutex_lock(&register_lock);
+ guard(mutex)(&register_lock);
if (desc->dir)
- goto out_unlock;
-
- sprintf(name, "%d", irq);
+ return;
/* create /proc/irq/1234 */
+ sprintf(name, "%u", irq);
desc->dir = proc_mkdir(name, root_irq_dir);
if (!desc->dir)
- goto out_unlock;
+ return;
#ifdef CONFIG_SMP
umode_t umode = S_IRUGO;
@@ -366,31 +357,27 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
umode |= S_IWUSR;
/* create /proc/irq/<irq>/smp_affinity */
- proc_create_data("smp_affinity", umode, desc->dir,
- &irq_affinity_proc_ops, irqp);
+ proc_create_data("smp_affinity", umode, desc->dir, &irq_affinity_proc_ops, irqp);
/* create /proc/irq/<irq>/affinity_hint */
proc_create_single_data("affinity_hint", 0444, desc->dir,
- irq_affinity_hint_proc_show, irqp);
+ irq_affinity_hint_proc_show, irqp);
/* create /proc/irq/<irq>/smp_affinity_list */
proc_create_data("smp_affinity_list", umode, desc->dir,
&irq_affinity_list_proc_ops, irqp);
- proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show,
- irqp);
+ proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show, irqp);
# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
proc_create_single_data("effective_affinity", 0444, desc->dir,
- irq_effective_aff_proc_show, irqp);
+ irq_effective_aff_proc_show, irqp);
proc_create_single_data("effective_affinity_list", 0444, desc->dir,
- irq_effective_aff_list_proc_show, irqp);
+ irq_effective_aff_list_proc_show, irqp);
# endif
#endif
proc_create_single_data("spurious", 0444, desc->dir,
- irq_spurious_proc_show, (void *)(long)irq);
+ irq_spurious_proc_show, (void *)(long)irq);
-out_unlock:
- mutex_unlock(&register_lock);
}
void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
@@ -468,7 +455,6 @@ int show_interrupts(struct seq_file *p, void *v)
int i = *(loff_t *) v, j;
struct irqaction *action;
struct irq_desc *desc;
- unsigned long flags;
if (i > ACTUAL_NR_IRQS)
return 0;
@@ -487,13 +473,13 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
}
- rcu_read_lock();
+ guard(rcu)();
desc = irq_to_desc(i);
if (!desc || irq_settings_is_hidden(desc))
- goto outsparse;
+ return 0;
if (!desc->action || irq_desc_is_chained(desc) || !desc->kstat_irqs)
- goto outsparse;
+ return 0;
seq_printf(p, "%*d:", prec, i);
for_each_online_cpu(j) {
@@ -503,7 +489,7 @@ int show_interrupts(struct seq_file *p, void *v)
}
seq_putc(p, ' ');
- raw_spin_lock_irqsave(&desc->lock, flags);
+ guard(raw_spinlock_irq)(&desc->lock);
if (desc->irq_data.chip) {
if (desc->irq_data.chip->irq_print_chip)
desc->irq_data.chip->irq_print_chip(&desc->irq_data, p);
@@ -532,9 +518,6 @@ int show_interrupts(struct seq_file *p, void *v)
}
seq_putc(p, '\n');
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-outsparse:
- rcu_read_unlock();
return 0;
}
#endif
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 1b7fa72968bd..ca9cc1b806a9 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -30,18 +30,17 @@ static DEFINE_RAW_SPINLOCK(irq_resend_lock);
*/
static void resend_irqs(struct tasklet_struct *unused)
{
- struct irq_desc *desc;
-
- raw_spin_lock_irq(&irq_resend_lock);
+ guard(raw_spinlock_irq)(&irq_resend_lock);
while (!hlist_empty(&irq_resend_list)) {
- desc = hlist_entry(irq_resend_list.first, struct irq_desc,
- resend_node);
+ struct irq_desc *desc;
+
+ desc = hlist_entry(irq_resend_list.first, struct irq_desc, resend_node);
hlist_del_init(&desc->resend_node);
+
raw_spin_unlock(&irq_resend_lock);
desc->handle_irq(desc);
raw_spin_lock(&irq_resend_lock);
}
- raw_spin_unlock_irq(&irq_resend_lock);
}
/* Tasklet to handle resend: */
@@ -75,19 +74,18 @@ static int irq_sw_resend(struct irq_desc *desc)
}
/* Add to resend_list and activate the softirq: */
- raw_spin_lock(&irq_resend_lock);
- if (hlist_unhashed(&desc->resend_node))
- hlist_add_head(&desc->resend_node, &irq_resend_list);
- raw_spin_unlock(&irq_resend_lock);
+ scoped_guard(raw_spinlock, &irq_resend_lock) {
+ if (hlist_unhashed(&desc->resend_node))
+ hlist_add_head(&desc->resend_node, &irq_resend_list);
+ }
tasklet_schedule(&resend_tasklet);
return 0;
}
void clear_irq_resend(struct irq_desc *desc)
{
- raw_spin_lock(&irq_resend_lock);
+ guard(raw_spinlock)(&irq_resend_lock);
hlist_del_init(&desc->resend_node);
- raw_spin_unlock(&irq_resend_lock);
}
void irq_resend_init(struct irq_desc *desc)
@@ -172,30 +170,24 @@ int check_irq_resend(struct irq_desc *desc, bool inject)
*/
int irq_inject_interrupt(unsigned int irq)
{
- struct irq_desc *desc;
- unsigned long flags;
- int err;
+ int err = -EINVAL;
/* Try the state injection hardware interface first */
if (!irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, true))
return 0;
/* That failed, try via the resend mechanism */
- desc = irq_get_desc_buslock(irq, &flags, 0);
- if (!desc)
- return -EINVAL;
+ scoped_irqdesc_get_and_buslock(irq, 0) {
+ struct irq_desc *desc = scoped_irqdesc;
- /*
- * Only try to inject when the interrupt is:
- * - not NMI type
- * - activated
- */
- if (irq_is_nmi(desc) || !irqd_is_activated(&desc->irq_data))
- err = -EINVAL;
- else
- err = check_irq_resend(desc, true);
-
- irq_put_desc_busunlock(desc, flags);
+ /*
+ * Only try to inject when the interrupt is:
+ * - not NMI type
+ * - activated
+ */
+ if (!irq_is_nmi(desc) && irqd_is_activated(&desc->irq_data))
+ err = check_irq_resend(desc, true);
+ }
return err;
}
EXPORT_SYMBOL_GPL(irq_inject_interrupt);
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 02b2daf07441..8f26982e7300 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -34,8 +34,9 @@ static atomic_t irq_poll_active;
* true and let the handler run.
*/
bool irq_wait_for_poll(struct irq_desc *desc)
- __must_hold(&desc->lock)
{
+ lockdep_assert_held(&desc->lock);
+
if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
"irq poll in progress on cpu %d for irq %d\n",
smp_processor_id(), desc->irq_data.irq))
@@ -59,37 +60,35 @@ bool irq_wait_for_poll(struct irq_desc *desc)
/*
* Recovery handler for misrouted interrupts.
*/
-static int try_one_irq(struct irq_desc *desc, bool force)
+static bool try_one_irq(struct irq_desc *desc, bool force)
{
- irqreturn_t ret = IRQ_NONE;
struct irqaction *action;
+ bool ret = false;
- raw_spin_lock(&desc->lock);
+ guard(raw_spinlock)(&desc->lock);
/*
* PER_CPU, nested thread interrupts and interrupts explicitly
* marked polled are excluded from polling.
*/
- if (irq_settings_is_per_cpu(desc) ||
- irq_settings_is_nested_thread(desc) ||
+ if (irq_settings_is_per_cpu(desc) || irq_settings_is_nested_thread(desc) ||
irq_settings_is_polled(desc))
- goto out;
+ return false;
/*
* Do not poll disabled interrupts unless the spurious
* disabled poller asks explicitly.
*/
if (irqd_irq_disabled(&desc->irq_data) && !force)
- goto out;
+ return false;
/*
* All handlers must agree on IRQF_SHARED, so we test just the
* first.
*/
action = desc->action;
- if (!action || !(action->flags & IRQF_SHARED) ||
- (action->flags & __IRQF_TIMER))
- goto out;
+ if (!action || !(action->flags & IRQF_SHARED) || (action->flags & __IRQF_TIMER))
+ return false;
/* Already running on another processor */
if (irqd_irq_inprogress(&desc->irq_data)) {
@@ -98,21 +97,19 @@ static int try_one_irq(struct irq_desc *desc, bool force)
* CPU to go looking for our mystery interrupt too
*/
desc->istate |= IRQS_PENDING;
- goto out;
+ return false;
}
/* Mark it poll in progress */
desc->istate |= IRQS_POLL_INPROGRESS;
do {
if (handle_irq_event(desc) == IRQ_HANDLED)
- ret = IRQ_HANDLED;
+ ret = true;
/* Make sure that there is still a valid action */
action = desc->action;
} while ((desc->istate & IRQS_PENDING) && action);
desc->istate &= ~IRQS_POLL_INPROGRESS;
-out:
- raw_spin_unlock(&desc->lock);
- return ret == IRQ_HANDLED;
+ return ret;
}
static int misrouted_irq(int irq)
@@ -157,8 +154,7 @@ static void poll_spurious_irqs(struct timer_list *unused)
continue;
/* Racy but it doesn't matter */
- state = desc->istate;
- barrier();
+ state = READ_ONCE(desc->istate);
if (!(state & IRQS_SPURIOUS_DISABLED))
continue;
@@ -168,8 +164,7 @@ static void poll_spurious_irqs(struct timer_list *unused)
}
out:
atomic_dec(&irq_poll_active);
- mod_timer(&poll_spurious_irq_timer,
- jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+ mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
}
static inline int bad_action_ret(irqreturn_t action_ret)
@@ -193,17 +188,13 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
{
unsigned int irq = irq_desc_get_irq(desc);
struct irqaction *action;
- unsigned long flags;
- if (bad_action_ret(action_ret)) {
- printk(KERN_ERR "irq event %d: bogus return value %x\n",
- irq, action_ret);
- } else {
- printk(KERN_ERR "irq %d: nobody cared (try booting with "
- "the \"irqpoll\" option)\n", irq);
- }
+ if (bad_action_ret(action_ret))
+ pr_err("irq event %d: bogus return value %x\n", irq, action_ret);
+ else
+ pr_err("irq %d: nobody cared (try booting with the \"irqpoll\" option)\n", irq);
dump_stack();
- printk(KERN_ERR "handlers:\n");
+ pr_err("handlers:\n");
/*
* We need to take desc->lock here. note_interrupt() is called
@@ -211,15 +202,13 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
* with something else removing an action. It's ok to take
* desc->lock here. See synchronize_irq().
*/
- raw_spin_lock_irqsave(&desc->lock, flags);
+ guard(raw_spinlock_irqsave)(&desc->lock);
for_each_action_of_desc(desc, action) {
- printk(KERN_ERR "[<%p>] %ps", action->handler, action->handler);
+ pr_err("[<%p>] %ps", action->handler, action->handler);
if (action->thread_fn)
- printk(KERN_CONT " threaded [<%p>] %ps",
- action->thread_fn, action->thread_fn);
- printk(KERN_CONT "\n");
+ pr_cont(" threaded [<%p>] %ps", action->thread_fn, action->thread_fn);
+ pr_cont("\n");
}
- raw_spin_unlock_irqrestore(&desc->lock, flags);
}
static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
@@ -232,18 +221,17 @@ static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
}
}
-static inline int
-try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
- irqreturn_t action_ret)
+static inline bool try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
+ irqreturn_t action_ret)
{
struct irqaction *action;
if (!irqfixup)
- return 0;
+ return false;
/* We didn't actually handle the IRQ - see if it was misrouted? */
if (action_ret == IRQ_NONE)
- return 1;
+ return true;
/*
* But for 'irqfixup == 2' we also do it for handled interrupts if
@@ -251,19 +239,16 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
* traditional PC timer interrupt.. Legacy)
*/
if (irqfixup < 2)
- return 0;
+ return false;
if (!irq)
- return 1;
+ return true;
/*
* Since we don't get the descriptor lock, "action" can
- * change under us. We don't really care, but we don't
- * want to follow a NULL pointer. So tell the compiler to
- * just load it once by using a barrier.
+ * change under us.
*/
- action = desc->action;
- barrier();
+ action = READ_ONCE(desc->action);
return action && (action->flags & IRQF_IRQPOLL);
}
@@ -273,8 +258,7 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
{
unsigned int irq;
- if (desc->istate & IRQS_POLL_INPROGRESS ||
- irq_settings_is_polled(desc))
+ if (desc->istate & IRQS_POLL_INPROGRESS || irq_settings_is_polled(desc))
return;
if (bad_action_ret(action_ret)) {
@@ -420,13 +404,12 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
/*
* Now kill the IRQ
*/
- printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
+ pr_emerg("Disabling IRQ #%d\n", irq);
desc->istate |= IRQS_SPURIOUS_DISABLED;
desc->depth++;
irq_disable(desc);
- mod_timer(&poll_spurious_irq_timer,
- jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+ mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
}
desc->irqs_unhandled = 0;
}
@@ -436,11 +419,9 @@ bool noirqdebug __read_mostly;
int noirqdebug_setup(char *str)
{
noirqdebug = 1;
- printk(KERN_INFO "IRQ lockup detection disabled\n");
-
+ pr_info("IRQ lockup detection disabled\n");
return 1;
}
-
__setup("noirqdebug", noirqdebug_setup);
module_param(noirqdebug, bool, 0644);
MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
@@ -452,12 +433,10 @@ static int __init irqfixup_setup(char *str)
return 1;
}
irqfixup = 1;
- printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
- printk(KERN_WARNING "This may impact system performance.\n");
-
+ pr_warn("Misrouted IRQ fixup support enabled.\n");
+ pr_warn("This may impact system performance.\n");
return 1;
}
-
__setup("irqfixup", irqfixup_setup);
module_param(irqfixup, int, 0644);
@@ -468,11 +447,8 @@ static int __init irqpoll_setup(char *str)
return 1;
}
irqfixup = 2;
- printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
- "enabled\n");
- printk(KERN_WARNING "This may significantly impact system "
- "performance\n");
+ pr_warn("Misrouted IRQ fixup and polling support enabled\n");
+ pr_warn("This may significantly impact system performance\n");
return 1;
}
-
__setup("irqpoll", irqpoll_setup);
diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
index 6ce73cceaf53..c2871180edcc 100644
--- a/kernel/kcsan/kcsan_test.c
+++ b/kernel/kcsan/kcsan_test.c
@@ -1501,7 +1501,7 @@ static int access_thread(void *arg)
}
} while (!torture_must_stop());
timer_delete_sync(&timer);
- destroy_timer_on_stack(&timer);
+ timer_destroy_on_stack(&timer);
torture_kthread_stopping("access_thread");
return 0;
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index ba069459c101..2351a19ac2a9 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -29,22 +29,13 @@ static unsigned int klp_signals_cnt;
/*
* When a livepatch is in progress, enable klp stack checking in
- * cond_resched(). This helps CPU-bound kthreads get patched.
+ * schedule(). This helps CPU-bound kthreads get patched.
*/
-#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-
-#define klp_cond_resched_enable() sched_dynamic_klp_enable()
-#define klp_cond_resched_disable() sched_dynamic_klp_disable()
-
-#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
-EXPORT_SYMBOL(klp_sched_try_switch_key);
-#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
-#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
-
-#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
+#define klp_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
+#define klp_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
/*
* This work can be performed periodically to finish patching or unpatching any
@@ -365,26 +356,18 @@ static bool klp_try_switch_task(struct task_struct *task)
void __klp_sched_try_switch(void)
{
- if (likely(!klp_patch_pending(current)))
- return;
-
/*
- * This function is called from cond_resched() which is called in many
- * places throughout the kernel. Using the klp_mutex here might
- * deadlock.
- *
- * Instead, disable preemption to prevent racing with other callers of
- * klp_try_switch_task(). Thanks to task_call_func() they won't be
- * able to switch this task while it's running.
+ * This function is called from __schedule() while a context switch is
+ * about to happen. Preemption is already disabled and klp_mutex
+ * can't be acquired.
+ * Disabled preemption is used to prevent racing with other callers of
+ * klp_try_switch_task(). Thanks to task_call_func() they won't be
+ * able to switch to this task while it's running.
*/
- preempt_disable();
+ lockdep_assert_preemption_disabled();
- /*
- * Make sure current didn't get patched between the above check and
- * preempt_disable().
- */
- if (unlikely(!klp_patch_pending(current)))
- goto out;
+ if (likely(!klp_patch_pending(current)))
+ return;
/*
* Enforce the order of the TIF_PATCH_PENDING read above and the
@@ -395,11 +378,7 @@ void __klp_sched_try_switch(void)
smp_rmb();
klp_try_switch_task(current);
-
-out:
- preempt_enable();
}
-EXPORT_SYMBOL(__klp_sched_try_switch);
/*
* Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
@@ -508,7 +487,7 @@ void klp_try_complete_transition(void)
}
/* Done! Now cleanup the data structures. */
- klp_cond_resched_disable();
+ klp_resched_disable();
patch = klp_transition_patch;
klp_complete_transition();
@@ -560,7 +539,7 @@ void klp_start_transition(void)
set_tsk_thread_flag(task, TIF_PATCH_PENDING);
}
- klp_cond_resched_enable();
+ klp_resched_enable();
klp_signals_cnt = 0;
}
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 58d78a33ac65..dd2bbf73718b 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -219,6 +219,7 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES);
static struct hlist_head lock_keys_hash[KEYHASH_SIZE];
unsigned long nr_lock_classes;
unsigned long nr_zapped_classes;
+unsigned long nr_dynamic_keys;
unsigned long max_lock_class_idx;
struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
@@ -1238,6 +1239,7 @@ void lockdep_register_key(struct lock_class_key *key)
goto out_unlock;
}
hlist_add_head_rcu(&key->hash_entry, hash_head);
+ nr_dynamic_keys++;
out_unlock:
graph_unlock();
restore_irqs:
@@ -1977,41 +1979,6 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
}
/*
- * We are about to add A -> B into the dependency graph, and in __bfs() a
- * strong dependency path A -> .. -> B is found: hlock_class equals
- * entry->class.
- *
- * If A -> .. -> B can replace A -> B in any __bfs() search (means the former
- * is _stronger_ than or equal to the latter), we consider A -> B as redundant.
- * For example if A -> .. -> B is -(EN)-> (i.e. A -(E*)-> .. -(*N)-> B), and A
- * -> B is -(ER)-> or -(EN)->, then we don't need to add A -> B into the
- * dependency graph, as any strong path ..-> A -> B ->.. we can get with
- * having dependency A -> B, we could already get a equivalent path ..-> A ->
- * .. -> B -> .. with A -> .. -> B. Therefore A -> B is redundant.
- *
- * We need to make sure both the start and the end of A -> .. -> B is not
- * weaker than A -> B. For the start part, please see the comment in
- * check_redundant(). For the end part, we need:
- *
- * Either
- *
- * a) A -> B is -(*R)-> (everything is not weaker than that)
- *
- * or
- *
- * b) A -> .. -> B is -(*N)-> (nothing is stronger than this)
- *
- */
-static inline bool hlock_equal(struct lock_list *entry, void *data)
-{
- struct held_lock *hlock = (struct held_lock *)data;
-
- return hlock_class(hlock) == entry->class && /* Found A -> .. -> B */
- (hlock->read == 2 || /* A -> B is -(*R)-> */
- !entry->only_xr); /* A -> .. -> B is -(*N)-> */
-}
-
-/*
* We are about to add B -> A into the dependency graph, and in __bfs() a
* strong dependency path A -> .. -> B is found: hlock_class equals
* entry->class.
@@ -2916,6 +2883,41 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
#ifdef CONFIG_LOCKDEP_SMALL
/*
+ * We are about to add A -> B into the dependency graph, and in __bfs() a
+ * strong dependency path A -> .. -> B is found: hlock_class equals
+ * entry->class.
+ *
+ * If A -> .. -> B can replace A -> B in any __bfs() search (means the former
+ * is _stronger_ than or equal to the latter), we consider A -> B as redundant.
+ * For example if A -> .. -> B is -(EN)-> (i.e. A -(E*)-> .. -(*N)-> B), and A
+ * -> B is -(ER)-> or -(EN)->, then we don't need to add A -> B into the
+ * dependency graph, as any strong path ..-> A -> B ->.. we can get with
+ * having dependency A -> B, we could already get a equivalent path ..-> A ->
+ * .. -> B -> .. with A -> .. -> B. Therefore A -> B is redundant.
+ *
+ * We need to make sure both the start and the end of A -> .. -> B is not
+ * weaker than A -> B. For the start part, please see the comment in
+ * check_redundant(). For the end part, we need:
+ *
+ * Either
+ *
+ * a) A -> B is -(*R)-> (everything is not weaker than that)
+ *
+ * or
+ *
+ * b) A -> .. -> B is -(*N)-> (nothing is stronger than this)
+ *
+ */
+static inline bool hlock_equal(struct lock_list *entry, void *data)
+{
+ struct held_lock *hlock = (struct held_lock *)data;
+
+ return hlock_class(hlock) == entry->class && /* Found A -> .. -> B */
+ (hlock->read == 2 || /* A -> B is -(*R)-> */
+ !entry->only_xr); /* A -> .. -> B is -(*N)-> */
+}
+
+/*
* Check that the dependency graph starting at <src> can lead to
* <target> or not. If it can, <src> -> <target> dependency is already
* in the graph.
@@ -5101,6 +5103,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
lockevent_inc(lockdep_nocheck);
}
+ if (DEBUG_LOCKS_WARN_ON(subclass >= MAX_LOCKDEP_SUBCLASSES))
+ return 0;
+
if (subclass < NR_LOCKDEP_CACHING_CLASSES)
class = lock->class_cache[subclass];
/*
@@ -6606,6 +6611,7 @@ void lockdep_unregister_key(struct lock_class_key *key)
pf = get_pending_free();
__lockdep_free_key_range(pf, key, 1);
need_callback = prepare_call_rcu_zapped(pf);
+ nr_dynamic_keys--;
}
lockdep_unlock();
raw_local_irq_restore(flags);
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index 20f9ef58d3d0..82156caf77d1 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -138,6 +138,7 @@ extern unsigned long nr_lock_classes;
extern unsigned long nr_zapped_classes;
extern unsigned long nr_zapped_lock_chains;
extern unsigned long nr_list_entries;
+extern unsigned long nr_dynamic_keys;
long lockdep_next_lockchain(long i);
unsigned long lock_chain_count(void);
extern unsigned long nr_stack_trace_entries;
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index 6db0f43fc4df..b52c07c4707c 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -286,6 +286,8 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
#endif
seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
nr_lock_classes, MAX_LOCKDEP_KEYS);
+ seq_printf(m, " dynamic-keys: %11lu\n",
+ nr_dynamic_keys);
seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
nr_list_entries, MAX_LOCKDEP_ENTRIES);
seq_printf(m, " indirect dependencies: %11lu\n",
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index d6964fc29f51..ef234469baac 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -138,7 +138,8 @@ static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
return !reader; /* wake (readers until) 1 writer */
}
-static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
+static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader,
+ bool freeze)
{
DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
bool wait;
@@ -156,7 +157,8 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
spin_unlock_irq(&sem->waiters.lock);
while (wait) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE |
+ (freeze ? TASK_FREEZABLE : 0));
if (!smp_load_acquire(&wq_entry.private))
break;
schedule();
@@ -164,7 +166,8 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
__set_current_state(TASK_RUNNING);
}
-bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
+bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try,
+ bool freeze)
{
if (__percpu_down_read_trylock(sem))
return true;
@@ -174,7 +177,7 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ);
preempt_enable();
- percpu_rwsem_wait(sem, /* .reader = */ true);
+ percpu_rwsem_wait(sem, /* .reader = */ true, freeze);
preempt_disable();
trace_contention_end(sem, 0);
@@ -237,7 +240,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
*/
if (!__percpu_down_write_trylock(sem)) {
trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
- percpu_rwsem_wait(sem, /* .reader = */ false);
+ percpu_rwsem_wait(sem, /* .reader = */ false, false);
contended = true;
}
diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index d7762ef5949a..39278737bb68 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -192,6 +192,11 @@ config GENDWARFKSYMS
depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT
# Requires ELF object files.
depends on !LTO
+ # To avoid conflicts with the discarded __gendwarfksyms_ptr symbols on
+ # X86, requires pahole before commit 47dcb534e253 ("btf_encoder: Stop
+ # indexing symbols for VARs") or after commit 9810758003ce ("btf_encoder:
+ # Verify 0 address DWARF variables are in ELF section").
+ depends on !X86 || !DEBUG_INFO_BTF || PAHOLE_VERSION < 128 || PAHOLE_VERSION > 129
help
Calculate symbol versions from DWARF debugging information using
gendwarfksyms. Requires DEBUG_INFO to be enabled.
diff --git a/kernel/module/main.c b/kernel/module/main.c
index a2859dc3eea6..5c6ab20240a6 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2829,6 +2829,7 @@ static void module_deallocate(struct module *mod, struct load_info *info)
{
percpu_modfree(mod);
module_arch_freeing_init(mod);
+ codetag_free_module_sections(mod);
free_mod_mem(mod);
}
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index c9d97ed20122..5f31fdff8a38 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -128,17 +128,13 @@ out_time:
out_net:
put_cgroup_ns(new_nsp->cgroup_ns);
out_cgroup:
- if (new_nsp->pid_ns_for_children)
- put_pid_ns(new_nsp->pid_ns_for_children);
+ put_pid_ns(new_nsp->pid_ns_for_children);
out_pid:
- if (new_nsp->ipc_ns)
- put_ipc_ns(new_nsp->ipc_ns);
+ put_ipc_ns(new_nsp->ipc_ns);
out_ipc:
- if (new_nsp->uts_ns)
- put_uts_ns(new_nsp->uts_ns);
+ put_uts_ns(new_nsp->uts_ns);
out_uts:
- if (new_nsp->mnt_ns)
- put_mnt_ns(new_nsp->mnt_ns);
+ put_mnt_ns(new_nsp->mnt_ns);
out_ns:
kmem_cache_free(nsproxy_cachep, new_nsp);
return ERR_PTR(err);
@@ -189,18 +185,12 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
void free_nsproxy(struct nsproxy *ns)
{
- if (ns->mnt_ns)
- put_mnt_ns(ns->mnt_ns);
- if (ns->uts_ns)
- put_uts_ns(ns->uts_ns);
- if (ns->ipc_ns)
- put_ipc_ns(ns->ipc_ns);
- if (ns->pid_ns_for_children)
- put_pid_ns(ns->pid_ns_for_children);
- if (ns->time_ns)
- put_time_ns(ns->time_ns);
- if (ns->time_ns_for_children)
- put_time_ns(ns->time_ns_for_children);
+ put_mnt_ns(ns->mnt_ns);
+ put_uts_ns(ns->uts_ns);
+ put_ipc_ns(ns->ipc_ns);
+ put_pid_ns(ns->pid_ns_for_children);
+ put_time_ns(ns->time_ns);
+ put_time_ns(ns->time_ns_for_children);
put_cgroup_ns(ns->cgroup_ns);
put_net(ns->net_ns);
kmem_cache_free(nsproxy_cachep, ns);
diff --git a/kernel/padata.c b/kernel/padata.c
index b3d4eacc4f5d..7eee94166357 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -358,7 +358,8 @@ static void padata_reorder(struct parallel_data *pd)
* To avoid UAF issue, add pd ref here, and put pd ref after reorder_work finish.
*/
padata_get_pd(pd);
- queue_work(pinst->serial_wq, &pd->reorder_work);
+ if (!queue_work(pinst->serial_wq, &pd->reorder_work))
+ padata_put_pd(pd);
}
}
diff --git a/kernel/panic.c b/kernel/panic.c
index a3889f38153d..047ea3215312 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -97,6 +97,36 @@ static const struct ctl_table kern_panic_table[] = {
},
#endif
{
+ .procname = "panic",
+ .data = &panic_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "panic_on_oops",
+ .data = &panic_on_oops,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "panic_print",
+ .data = &panic_print,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "panic_on_warn",
+ .data = &panic_on_warn,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
.procname = "warn_limit",
.data = &warn_limit,
.maxlen = sizeof(warn_limit),
diff --git a/kernel/pid.c b/kernel/pid.c
index 4ac2ce46817f..8317bcbc7cf7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -100,6 +100,7 @@ void put_pid(struct pid *pid)
ns = pid->numbers[pid->level].ns;
if (refcount_dec_and_test(&pid->count)) {
+ WARN_ON_ONCE(pid->stashed);
kmem_cache_free(ns->pid_cachep, pid);
put_pid_ns(ns);
}
@@ -359,11 +360,6 @@ static void __change_pid(struct pid **pids, struct task_struct *task,
hlist_del_rcu(&task->pid_links[type]);
*pid_ptr = new;
- if (type == PIDTYPE_PID) {
- WARN_ON_ONCE(pid_has_task(pid, PIDTYPE_PID));
- wake_up_all(&pid->wait_pidfd);
- }
-
for (tmp = PIDTYPE_MAX; --tmp >= 0; )
if (pid_has_task(pid, tmp))
return;
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index d9b7e2b38c7a..ea7995a25780 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -233,6 +233,10 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table,
unsigned long prev_cost = ULONG_MAX;
int i, ret;
+ /* This is needed only for CPUs and EAS skip other devices */
+ if (!_is_cpu_device(dev))
+ return 0;
+
/* Compute the cost of each performance state. */
for (i = nr_states - 1; i >= 0; i--) {
unsigned long power_res, cost;
@@ -698,10 +702,12 @@ static int em_recalc_and_update(struct device *dev, struct em_perf_domain *pd,
{
int ret;
- ret = em_compute_costs(dev, em_table->state, NULL, pd->nr_perf_states,
- pd->flags);
- if (ret)
- goto free_em_table;
+ if (!em_is_artificial(pd)) {
+ ret = em_compute_costs(dev, em_table->state, NULL,
+ pd->nr_perf_states, pd->flags);
+ if (ret)
+ goto free_em_table;
+ }
ret = em_dev_update_perf_domain(dev, em_table);
if (ret)
@@ -721,10 +727,24 @@ free_em_table:
* Adjustment of CPU performance values after boot, when all CPUs capacites
* are correctly calculated.
*/
-static void em_adjust_new_capacity(struct device *dev,
+static void em_adjust_new_capacity(unsigned int cpu, struct device *dev,
struct em_perf_domain *pd)
{
+ unsigned long cpu_capacity = arch_scale_cpu_capacity(cpu);
struct em_perf_table *em_table;
+ struct em_perf_state *table;
+ unsigned long em_max_perf;
+
+ rcu_read_lock();
+ table = em_perf_state_from_pd(pd);
+ em_max_perf = table[pd->nr_perf_states - 1].performance;
+ rcu_read_unlock();
+
+ if (em_max_perf == cpu_capacity)
+ return;
+
+ pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n", cpu,
+ cpu_capacity, em_max_perf);
em_table = em_table_dup(pd);
if (!em_table) {
@@ -737,12 +757,27 @@ static void em_adjust_new_capacity(struct device *dev,
em_recalc_and_update(dev, pd, em_table);
}
+/**
+ * em_adjust_cpu_capacity() - Adjust the EM for a CPU after a capacity update.
+ * @cpu: Target CPU.
+ *
+ * Adjust the existing EM for @cpu after a capacity update under the assumption
+ * that the capacity has been updated in the same way for all of the CPUs in
+ * the same perf domain.
+ */
+void em_adjust_cpu_capacity(unsigned int cpu)
+{
+ struct device *dev = get_cpu_device(cpu);
+ struct em_perf_domain *pd;
+
+ pd = em_pd_get(dev);
+ if (pd)
+ em_adjust_new_capacity(cpu, dev, pd);
+}
+
static void em_check_capacity_update(void)
{
cpumask_var_t cpu_done_mask;
- struct em_perf_state *table;
- struct em_perf_domain *pd;
- unsigned long cpu_capacity;
int cpu;
if (!zalloc_cpumask_var(&cpu_done_mask, GFP_KERNEL)) {
@@ -753,7 +788,7 @@ static void em_check_capacity_update(void)
/* Check if CPUs capacity has changed than update EM */
for_each_possible_cpu(cpu) {
struct cpufreq_policy *policy;
- unsigned long em_max_perf;
+ struct em_perf_domain *pd;
struct device *dev;
if (cpumask_test_cpu(cpu, cpu_done_mask))
@@ -776,24 +811,7 @@ static void em_check_capacity_update(void)
cpumask_or(cpu_done_mask, cpu_done_mask,
em_span_cpus(pd));
- cpu_capacity = arch_scale_cpu_capacity(cpu);
-
- rcu_read_lock();
- table = em_perf_state_from_pd(pd);
- em_max_perf = table[pd->nr_perf_states - 1].performance;
- rcu_read_unlock();
-
- /*
- * Check if the CPU capacity has been adjusted during boot
- * and trigger the update for new performance values.
- */
- if (em_max_perf == cpu_capacity)
- continue;
-
- pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n",
- cpu, cpu_capacity, em_max_perf);
-
- em_adjust_new_capacity(dev, pd);
+ em_adjust_new_capacity(cpu, dev, pd);
}
free_cpumask_var(cpu_done_mask);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 23c0f4e6cb2f..519fb09de5e0 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -90,6 +90,11 @@ void hibernate_release(void)
atomic_inc(&hibernate_atomic);
}
+bool hibernation_in_progress(void)
+{
+ return !atomic_read(&hibernate_atomic);
+}
+
bool hibernation_available(void)
{
return nohibernate == 0 &&
@@ -133,10 +138,15 @@ bool system_entering_hibernation(void)
EXPORT_SYMBOL(system_entering_hibernation);
#ifdef CONFIG_PM_DEBUG
+static unsigned int pm_test_delay = 5;
+module_param(pm_test_delay, uint, 0644);
+MODULE_PARM_DESC(pm_test_delay,
+ "Number of seconds to wait before resuming from hibernation test");
static void hibernation_debug_sleep(void)
{
- pr_info("debug: Waiting for 5 seconds.\n");
- mdelay(5000);
+ pr_info("hibernation debug: Waiting for %d second(s).\n",
+ pm_test_delay);
+ mdelay(pm_test_delay * 1000);
}
static int hibernation_test(int level)
@@ -757,7 +767,7 @@ int hibernate(void)
* Query for the compression algorithm support if compression is enabled.
*/
if (!nocompress) {
- strscpy(hib_comp_algo, hibernate_compressor, sizeof(hib_comp_algo));
+ strscpy(hib_comp_algo, hibernate_compressor);
if (!crypto_has_acomp(hib_comp_algo, 0, CRYPTO_ALG_ASYNC)) {
pr_err("%s compression is not available\n", hib_comp_algo);
return -EOPNOTSUPP;
@@ -778,6 +788,8 @@ int hibernate(void)
goto Restore;
ksys_sync_helper();
+ if (filesystem_freeze_enabled)
+ filesystems_freeze();
error = freeze_processes();
if (error)
@@ -846,6 +858,7 @@ int hibernate(void)
/* Don't bother checking whether freezer_test_done is true */
freezer_test_done = false;
Exit:
+ filesystems_thaw();
pm_notifier_call_chain(PM_POST_HIBERNATION);
Restore:
pm_restore_console();
@@ -882,6 +895,9 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data)
if (error)
goto restore;
+ if (filesystem_freeze_enabled)
+ filesystems_freeze();
+
error = freeze_processes();
if (error)
goto exit;
@@ -941,6 +957,7 @@ thaw:
thaw_processes();
exit:
+ filesystems_thaw();
pm_notifier_call_chain(PM_POST_HIBERNATION);
restore:
@@ -1006,9 +1023,9 @@ static int software_resume(void)
*/
if (!(swsusp_header_flags & SF_NOCOMPRESS_MODE)) {
if (swsusp_header_flags & SF_COMPRESSION_ALG_LZ4)
- strscpy(hib_comp_algo, COMPRESSION_ALGO_LZ4, sizeof(hib_comp_algo));
+ strscpy(hib_comp_algo, COMPRESSION_ALGO_LZ4);
else
- strscpy(hib_comp_algo, COMPRESSION_ALGO_LZO, sizeof(hib_comp_algo));
+ strscpy(hib_comp_algo, COMPRESSION_ALGO_LZO);
if (!crypto_has_acomp(hib_comp_algo, 0, CRYPTO_ALG_ASYNC)) {
pr_err("%s compression is not available\n", hib_comp_algo);
error = -EOPNOTSUPP;
@@ -1029,19 +1046,26 @@ static int software_resume(void)
if (error)
goto Restore;
+ if (filesystem_freeze_enabled)
+ filesystems_freeze();
+
pm_pr_dbg("Preparing processes for hibernation restore.\n");
error = freeze_processes();
- if (error)
+ if (error) {
+ filesystems_thaw();
goto Close_Finish;
+ }
error = freeze_kernel_threads();
if (error) {
thaw_processes();
+ filesystems_thaw();
goto Close_Finish;
}
error = load_image_and_restore();
thaw_processes();
+ filesystems_thaw();
Finish:
pm_notifier_call_chain(PM_POST_RESTORE);
Restore:
@@ -1456,8 +1480,7 @@ static int hibernate_compressor_param_set(const char *compressor,
if (index >= 0) {
ret = param_set_copystring(comp_alg_enabled[index], kp);
if (!ret)
- strscpy(hib_comp_algo, comp_alg_enabled[index],
- sizeof(hib_comp_algo));
+ strscpy(hib_comp_algo, comp_alg_enabled[index]);
} else {
ret = index;
}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6254814d4817..3d484630505a 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -557,6 +557,10 @@ static int __init pm_debugfs_init(void)
late_initcall(pm_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
+bool pm_sleep_transition_in_progress(void)
+{
+ return pm_suspend_in_progress() || hibernation_in_progress();
+}
#endif /* CONFIG_PM_SLEEP */
#ifdef CONFIG_PM_SLEEP_DEBUG
@@ -594,7 +598,7 @@ power_attr(pm_print_times);
static inline void pm_print_times_init(void)
{
- pm_print_times_enabled = !!initcall_debug;
+ pm_print_times_enabled = initcall_debug;
}
static ssize_t pm_wakeup_irq_show(struct kobject *kobj,
@@ -613,7 +617,7 @@ bool pm_debug_messages_on __read_mostly;
bool pm_debug_messages_should_print(void)
{
- return pm_debug_messages_on && pm_suspend_target_state != PM_SUSPEND_ON;
+ return pm_debug_messages_on && pm_sleep_transition_in_progress();
}
EXPORT_SYMBOL_GPL(pm_debug_messages_should_print);
@@ -962,6 +966,34 @@ power_attr(pm_freeze_timeout);
#endif /* CONFIG_FREEZER*/
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
+bool filesystem_freeze_enabled = false;
+
+static ssize_t freeze_filesystems_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", filesystem_freeze_enabled);
+}
+
+static ssize_t freeze_filesystems_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned long val;
+
+ if (kstrtoul(buf, 10, &val))
+ return -EINVAL;
+
+ if (val > 1)
+ return -EINVAL;
+
+ filesystem_freeze_enabled = !!val;
+ return n;
+}
+
+power_attr(freeze_filesystems);
+#endif /* CONFIG_SUSPEND || CONFIG_HIBERNATION */
+
static struct attribute * g[] = {
&state_attr.attr,
#ifdef CONFIG_PM_TRACE
@@ -992,6 +1024,9 @@ static struct attribute * g[] = {
#ifdef CONFIG_FREEZER
&pm_freeze_timeout_attr.attr,
#endif
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
+ &freeze_filesystems_attr.attr,
+#endif
NULL,
};
diff --git a/kernel/power/power.h b/kernel/power/power.h
index c352dea2f67b..cb1d71562002 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -18,6 +18,10 @@ struct swsusp_info {
unsigned long size;
} __aligned(PAGE_SIZE);
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
+extern bool filesystem_freeze_enabled;
+#endif
+
#ifdef CONFIG_HIBERNATION
/* kernel/power/snapshot.c */
extern void __init hibernate_reserved_size_init(void);
@@ -71,10 +75,14 @@ extern void enable_restore_image_protection(void);
static inline void enable_restore_image_protection(void) {}
#endif /* CONFIG_STRICT_KERNEL_RWX */
+extern bool hibernation_in_progress(void);
+
#else /* !CONFIG_HIBERNATION */
static inline void hibernate_reserved_size_init(void) {}
static inline void hibernate_image_size_init(void) {}
+
+static inline bool hibernation_in_progress(void) { return false; }
#endif /* !CONFIG_HIBERNATION */
#define power_attr(_name) \
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 66ac067d9ae6..dc0dfc349f22 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -189,7 +189,7 @@ void thaw_processes(void)
oom_killer_enable();
- pr_info("Restarting tasks ... ");
+ pr_info("Restarting tasks: Starting\n");
__usermodehelper_set_disable_depth(UMH_FREEZING);
thaw_workqueues();
@@ -208,7 +208,7 @@ void thaw_processes(void)
usermodehelper_enable();
schedule();
- pr_cont("done.\n");
+ pr_info("Restarting tasks: Done\n");
trace_suspend_resume(TPS("thaw_processes"), 0, false);
}
@@ -217,7 +217,7 @@ void thaw_kernel_threads(void)
struct task_struct *g, *p;
pm_nosig_freezing = false;
- pr_info("Restarting kernel threads ... ");
+ pr_info("Restarting kernel threads ...\n");
thaw_workqueues();
@@ -229,5 +229,5 @@ void thaw_kernel_threads(void)
read_unlock(&tasklist_lock);
schedule();
- pr_cont("done.\n");
+ pr_info("Done restarting kernel threads.\n");
}
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8eaec4ab121d..76b141b9aac0 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -30,6 +30,7 @@
#include <trace/events/power.h>
#include <linux/compiler.h>
#include <linux/moduleparam.h>
+#include <linux/fs.h>
#include "power.h"
@@ -374,6 +375,8 @@ static int suspend_prepare(suspend_state_t state)
if (error)
goto Restore;
+ if (filesystem_freeze_enabled)
+ filesystems_freeze();
trace_suspend_resume(TPS("freeze_processes"), 0, true);
error = suspend_freeze_processes();
trace_suspend_resume(TPS("freeze_processes"), 0, false);
@@ -550,6 +553,7 @@ int suspend_devices_and_enter(suspend_state_t state)
static void suspend_finish(void)
{
suspend_thaw_processes();
+ filesystems_thaw();
pm_notifier_call_chain(PM_POST_SUSPEND);
pm_restore_console();
}
@@ -588,6 +592,8 @@ static int enter_state(suspend_state_t state)
ksys_sync_helper();
trace_suspend_resume(TPS("sync_filesystems"), 0, false);
}
+ if (filesystem_freeze_enabled)
+ filesystems_freeze();
pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
pm_suspend_clear_flags();
@@ -609,6 +615,7 @@ static int enter_state(suspend_state_t state)
pm_pr_dbg("Finishing wakeup.\n");
suspend_finish();
Unlock:
+ filesystems_thaw();
mutex_unlock(&system_transition_mutex);
return error;
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 80ff5f933a62..ad13c461b657 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -268,35 +268,26 @@ static void hib_end_io(struct bio *bio)
bio_put(bio);
}
-static int hib_submit_io(blk_opf_t opf, pgoff_t page_off, void *addr,
+static int hib_submit_io_sync(blk_opf_t opf, pgoff_t page_off, void *addr)
+{
+ return bdev_rw_virt(file_bdev(hib_resume_bdev_file),
+ page_off * (PAGE_SIZE >> 9), addr, PAGE_SIZE, opf);
+}
+
+static int hib_submit_io_async(blk_opf_t opf, pgoff_t page_off, void *addr,
struct hib_bio_batch *hb)
{
- struct page *page = virt_to_page(addr);
struct bio *bio;
- int error = 0;
bio = bio_alloc(file_bdev(hib_resume_bdev_file), 1, opf,
GFP_NOIO | __GFP_HIGH);
bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
-
- if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
- pr_err("Adding page to bio failed at %llu\n",
- (unsigned long long)bio->bi_iter.bi_sector);
- bio_put(bio);
- return -EFAULT;
- }
-
- if (hb) {
- bio->bi_end_io = hib_end_io;
- bio->bi_private = hb;
- atomic_inc(&hb->count);
- submit_bio(bio);
- } else {
- error = submit_bio_wait(bio);
- bio_put(bio);
- }
-
- return error;
+ bio_add_virt_nofail(bio, addr, PAGE_SIZE);
+ bio->bi_end_io = hib_end_io;
+ bio->bi_private = hb;
+ atomic_inc(&hb->count);
+ submit_bio(bio);
+ return 0;
}
static int hib_wait_io(struct hib_bio_batch *hb)
@@ -316,7 +307,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
{
int error;
- hib_submit_io(REQ_OP_READ, swsusp_resume_block, swsusp_header, NULL);
+ hib_submit_io_sync(REQ_OP_READ, swsusp_resume_block, swsusp_header);
if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
!memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
@@ -329,8 +320,8 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
swsusp_header->flags = flags;
if (flags & SF_CRC32_MODE)
swsusp_header->crc32 = handle->crc32;
- error = hib_submit_io(REQ_OP_WRITE | REQ_SYNC,
- swsusp_resume_block, swsusp_header, NULL);
+ error = hib_submit_io_sync(REQ_OP_WRITE | REQ_SYNC,
+ swsusp_resume_block, swsusp_header);
} else {
pr_err("Swap header not found!\n");
error = -ENODEV;
@@ -380,36 +371,30 @@ static int swsusp_swap_check(void)
static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
{
+ gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
void *src;
int ret;
if (!offset)
return -ENOSPC;
- if (hb) {
- src = (void *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
- __GFP_NORETRY);
- if (src) {
- copy_page(src, buf);
- } else {
- ret = hib_wait_io(hb); /* Free pages */
- if (ret)
- return ret;
- src = (void *)__get_free_page(GFP_NOIO |
- __GFP_NOWARN |
- __GFP_NORETRY);
- if (src) {
- copy_page(src, buf);
- } else {
- WARN_ON_ONCE(1);
- hb = NULL; /* Go synchronous */
- src = buf;
- }
- }
- } else {
- src = buf;
+ if (!hb)
+ goto sync_io;
+
+ src = (void *)__get_free_page(gfp);
+ if (!src) {
+ ret = hib_wait_io(hb); /* Free pages */
+ if (ret)
+ return ret;
+ src = (void *)__get_free_page(gfp);
+ if (WARN_ON_ONCE(!src))
+ goto sync_io;
}
- return hib_submit_io(REQ_OP_WRITE | REQ_SYNC, offset, src, hb);
+
+ copy_page(src, buf);
+ return hib_submit_io_async(REQ_OP_WRITE | REQ_SYNC, offset, src, hb);
+sync_io:
+ return hib_submit_io_sync(REQ_OP_WRITE | REQ_SYNC, offset, buf);
}
static void release_swap_writer(struct swap_map_handle *handle)
@@ -1041,7 +1026,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
return -ENOMEM;
}
- error = hib_submit_io(REQ_OP_READ, offset, tmp->map, NULL);
+ error = hib_submit_io_sync(REQ_OP_READ, offset, tmp->map);
if (error) {
release_swap_reader(handle);
return error;
@@ -1065,7 +1050,10 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
offset = handle->cur->entries[handle->k];
if (!offset)
return -EFAULT;
- error = hib_submit_io(REQ_OP_READ, offset, buf, hb);
+ if (hb)
+ error = hib_submit_io_async(REQ_OP_READ, offset, buf, hb);
+ else
+ error = hib_submit_io_sync(REQ_OP_READ, offset, buf);
if (error)
return error;
if (++handle->k >= MAP_PAGE_ENTRIES) {
@@ -1590,8 +1578,8 @@ int swsusp_check(bool exclusive)
BLK_OPEN_READ, holder, NULL);
if (!IS_ERR(hib_resume_bdev_file)) {
clear_page(swsusp_header);
- error = hib_submit_io(REQ_OP_READ, swsusp_resume_block,
- swsusp_header, NULL);
+ error = hib_submit_io_sync(REQ_OP_READ, swsusp_resume_block,
+ swsusp_header);
if (error)
goto put;
@@ -1599,9 +1587,9 @@ int swsusp_check(bool exclusive)
memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
swsusp_header_flags = swsusp_header->flags;
/* Reset swap signature now */
- error = hib_submit_io(REQ_OP_WRITE | REQ_SYNC,
+ error = hib_submit_io_sync(REQ_OP_WRITE | REQ_SYNC,
swsusp_resume_block,
- swsusp_header, NULL);
+ swsusp_header);
} else {
error = -EINVAL;
}
@@ -1650,13 +1638,12 @@ int swsusp_unmark(void)
{
int error;
- hib_submit_io(REQ_OP_READ, swsusp_resume_block,
- swsusp_header, NULL);
+ hib_submit_io_sync(REQ_OP_READ, swsusp_resume_block, swsusp_header);
if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) {
memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10);
- error = hib_submit_io(REQ_OP_WRITE | REQ_SYNC,
+ error = hib_submit_io_sync(REQ_OP_WRITE | REQ_SYNC,
swsusp_resume_block,
- swsusp_header, NULL);
+ swsusp_header);
} else {
pr_err("Cannot find swsusp signature!\n");
error = -ENODEV;
diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
index 52571dcad768..4e941999a53b 100644
--- a/kernel/power/wakelock.c
+++ b/kernel/power/wakelock.c
@@ -49,6 +49,9 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active)
len += sysfs_emit_at(buf, len, "%s ", wl->name);
}
+ if (len > 0)
+ --len;
+
len += sysfs_emit_at(buf, len, "\n");
mutex_unlock(&wakelocks_lock);
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index eed2951a4962..9cf01832a6c3 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -57,6 +57,9 @@
/* Low-order bit definition for polled grace-period APIs. */
#define RCU_GET_STATE_COMPLETED 0x1
+/* A complete grace period count */
+#define RCU_SEQ_GP (RCU_SEQ_STATE_MASK + 1)
+
extern int sysctl_sched_rt_runtime;
/*
@@ -157,12 +160,21 @@ static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
* Given a snapshot from rcu_seq_snap(), determine whether or not a
* full update-side operation has occurred, but do not allow the
* (ULONG_MAX / 2) safety-factor/guard-band.
+ *
+ * The token returned by get_state_synchronize_rcu_full() is based on
+ * rcu_state.gp_seq but it is tested in poll_state_synchronize_rcu_full()
+ * against the root rnp->gp_seq. Since rcu_seq_start() is first called
+ * on rcu_state.gp_seq and only later reflected on the root rnp->gp_seq,
+ * it is possible that rcu_seq_snap(rcu_state.gp_seq) returns 2 full grace
+ * periods ahead of the root rnp->gp_seq. To prevent false-positives with the
+ * full polling API that a wrap around instantly completed the GP, when nothing
+ * like that happened, adjust for the 2 GPs in the ULONG_CMP_LT().
*/
static inline bool rcu_seq_done_exact(unsigned long *sp, unsigned long s)
{
unsigned long cur_s = READ_ONCE(*sp);
- return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (3 * RCU_SEQ_STATE_MASK + 1));
+ return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (2 * RCU_SEQ_GP));
}
/*
@@ -572,6 +584,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
unsigned long c_old,
unsigned long c);
void rcu_gp_set_torture_wait(int duration);
+void rcu_set_gpwrap_lag(unsigned long lag);
+int rcu_get_gpwrap_count(int cpu);
#else
static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
{
@@ -589,6 +603,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
do { } while (0)
#endif
static inline void rcu_gp_set_torture_wait(int duration) { }
+static inline void rcu_set_gpwrap_lag(unsigned long lag) { }
+static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
#endif
unsigned long long rcutorture_gather_gp_seqs(void);
void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 0f3059b1b80d..b521d0455992 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -762,7 +762,7 @@ kfree_scale_thread(void *arg)
}
for (i = 0; i < kfree_alloc_num; i++) {
- alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
+ alloc_ptr = kcalloc(kfree_mult, sizeof(struct kfree_obj), GFP_KERNEL);
if (!alloc_ptr)
return -ENOMEM;
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 4fa7772be183..70ec0f21abc3 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -115,6 +115,10 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
+torture_param(bool, gpwrap_lag, true, "Enable grace-period wrap lag testing");
+torture_param(int, gpwrap_lag_gps, 8, "Value to set for set_gpwrap_lag during an active testing period.");
+torture_param(int, gpwrap_lag_cycle_mins, 30, "Total cycle duration for gpwrap lag testing (in minutes)");
+torture_param(int, gpwrap_lag_active_mins, 5, "Duration for which gpwrap lag is active within each cycle (in minutes)");
torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
@@ -413,6 +417,8 @@ struct rcu_torture_ops {
bool (*reader_blocked)(void);
unsigned long long (*gather_gp_seqs)(void);
void (*format_gp_seqs)(unsigned long long seqs, char *cp, size_t len);
+ void (*set_gpwrap_lag)(unsigned long lag);
+ int (*get_gpwrap_count)(int cpu);
long cbflood_max;
int irq_capable;
int can_boost;
@@ -619,6 +625,8 @@ static struct rcu_torture_ops rcu_ops = {
: NULL,
.gather_gp_seqs = rcutorture_gather_gp_seqs,
.format_gp_seqs = rcutorture_format_gp_seqs,
+ .set_gpwrap_lag = rcu_set_gpwrap_lag,
+ .get_gpwrap_count = rcu_get_gpwrap_count,
.irq_capable = 1,
.can_boost = IS_ENABLED(CONFIG_RCU_BOOST),
.extendables = RCUTORTURE_MAX_EXTEND,
@@ -2164,53 +2172,70 @@ rcutorture_loop_extend(int *readstate, bool insoftirq, struct torture_random_sta
return &rtrsp[j];
}
-/*
- * Do one read-side critical section, returning false if there was
- * no data to read. Can be invoked both from process context and
- * from a timer handler.
- */
-static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
-{
- bool checkpolling = !(torture_random(trsp) & 0xfff);
+struct rcu_torture_one_read_state {
+ bool checkpolling;
unsigned long cookie;
struct rcu_gp_oldstate cookie_full;
- int i;
unsigned long started;
- unsigned long completed;
- int newstate;
struct rcu_torture *p;
- int pipe_count;
- bool preempted = false;
- int readstate = 0;
- struct rt_read_seg rtseg[RCUTORTURE_RDR_MAX_SEGS] = { { 0 } };
- struct rt_read_seg *rtrsp = &rtseg[0];
- struct rt_read_seg *rtrsp1;
+ int readstate;
+ struct rt_read_seg rtseg[RCUTORTURE_RDR_MAX_SEGS];
+ struct rt_read_seg *rtrsp;
unsigned long long ts;
+};
- WARN_ON_ONCE(!rcu_is_watching());
- newstate = rcutorture_extend_mask(readstate, trsp);
- rcutorture_one_extend(&readstate, newstate, myid < 0, trsp, rtrsp++);
- if (checkpolling) {
+static void init_rcu_torture_one_read_state(struct rcu_torture_one_read_state *rtorsp,
+ struct torture_random_state *trsp)
+{
+ memset(rtorsp, 0, sizeof(*rtorsp));
+ rtorsp->checkpolling = !(torture_random(trsp) & 0xfff);
+ rtorsp->rtrsp = &rtorsp->rtseg[0];
+}
+
+/*
+ * Set up the first segment of a series of overlapping read-side
+ * critical sections. The caller must have actually initiated the
+ * outermost read-side critical section.
+ */
+static bool rcu_torture_one_read_start(struct rcu_torture_one_read_state *rtorsp,
+ struct torture_random_state *trsp, long myid)
+{
+ if (rtorsp->checkpolling) {
if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
- cookie = cur_ops->get_gp_state();
+ rtorsp->cookie = cur_ops->get_gp_state();
if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full)
- cur_ops->get_gp_state_full(&cookie_full);
+ cur_ops->get_gp_state_full(&rtorsp->cookie_full);
}
- started = cur_ops->get_gp_seq();
- ts = rcu_trace_clock_local();
- p = rcu_dereference_check(rcu_torture_current,
+ rtorsp->started = cur_ops->get_gp_seq();
+ rtorsp->ts = rcu_trace_clock_local();
+ rtorsp->p = rcu_dereference_check(rcu_torture_current,
!cur_ops->readlock_held || cur_ops->readlock_held());
- if (p == NULL) {
+ if (rtorsp->p == NULL) {
/* Wait for rcu_torture_writer to get underway */
- rcutorture_one_extend(&readstate, 0, myid < 0, trsp, rtrsp);
+ rcutorture_one_extend(&rtorsp->readstate, 0, myid < 0, trsp, rtorsp->rtrsp);
return false;
}
- if (p->rtort_mbtest == 0)
+ if (rtorsp->p->rtort_mbtest == 0)
atomic_inc(&n_rcu_torture_mberror);
- rcu_torture_reader_do_mbchk(myid, p, trsp);
- rtrsp = rcutorture_loop_extend(&readstate, myid < 0, trsp, rtrsp);
+ rcu_torture_reader_do_mbchk(myid, rtorsp->p, trsp);
+ return true;
+}
+
+/*
+ * Complete the last segment of a series of overlapping read-side
+ * critical sections and check for errors.
+ */
+static void rcu_torture_one_read_end(struct rcu_torture_one_read_state *rtorsp,
+ struct torture_random_state *trsp, long myid)
+{
+ int i;
+ unsigned long completed;
+ int pipe_count;
+ bool preempted = false;
+ struct rt_read_seg *rtrsp1;
+
preempt_disable();
- pipe_count = READ_ONCE(p->rtort_pipe_count);
+ pipe_count = READ_ONCE(rtorsp->p->rtort_pipe_count);
if (pipe_count > RCU_TORTURE_PIPE_LEN) {
// Should not happen in a correct RCU implementation,
// happens quite often for torture_type=busted.
@@ -2218,28 +2243,28 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
}
completed = cur_ops->get_gp_seq();
if (pipe_count > 1) {
- do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
- ts, started, completed);
+ do_trace_rcu_torture_read(cur_ops->name, &rtorsp->p->rtort_rcu,
+ rtorsp->ts, rtorsp->started, completed);
rcu_ftrace_dump(DUMP_ALL);
}
__this_cpu_inc(rcu_torture_count[pipe_count]);
- completed = rcutorture_seq_diff(completed, started);
+ completed = rcutorture_seq_diff(completed, rtorsp->started);
if (completed > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
completed = RCU_TORTURE_PIPE_LEN;
}
__this_cpu_inc(rcu_torture_batch[completed]);
preempt_enable();
- if (checkpolling) {
+ if (rtorsp->checkpolling) {
if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
- WARN_ONCE(cur_ops->poll_gp_state(cookie),
+ WARN_ONCE(cur_ops->poll_gp_state(rtorsp->cookie),
"%s: Cookie check 2 failed %s(%d) %lu->%lu\n",
__func__,
rcu_torture_writer_state_getname(),
rcu_torture_writer_state,
- cookie, cur_ops->get_gp_state());
+ rtorsp->cookie, cur_ops->get_gp_state());
if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full)
- WARN_ONCE(cur_ops->poll_gp_state_full(&cookie_full),
+ WARN_ONCE(cur_ops->poll_gp_state_full(&rtorsp->cookie_full),
"%s: Cookie check 6 failed %s(%d) online %*pbl\n",
__func__,
rcu_torture_writer_state_getname(),
@@ -2248,21 +2273,42 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
}
if (cur_ops->reader_blocked)
preempted = cur_ops->reader_blocked();
- rcutorture_one_extend(&readstate, 0, myid < 0, trsp, rtrsp);
- WARN_ON_ONCE(readstate);
+ rcutorture_one_extend(&rtorsp->readstate, 0, myid < 0, trsp, rtorsp->rtrsp);
+ WARN_ON_ONCE(rtorsp->readstate);
// This next splat is expected behavior if leakpointer, especially
// for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels.
- WARN_ON_ONCE(leakpointer && READ_ONCE(p->rtort_pipe_count) > 1);
+ WARN_ON_ONCE(leakpointer && READ_ONCE(rtorsp->p->rtort_pipe_count) > 1);
/* If error or close call, record the sequence of reader protections. */
if ((pipe_count > 1 || completed > 1) && !xchg(&err_segs_recorded, 1)) {
i = 0;
- for (rtrsp1 = &rtseg[0]; rtrsp1 < rtrsp; rtrsp1++)
+ for (rtrsp1 = &rtorsp->rtseg[0]; rtrsp1 < rtorsp->rtrsp; rtrsp1++)
err_segs[i++] = *rtrsp1;
rt_read_nsegs = i;
rt_read_preempted = preempted;
}
+}
+/*
+ * Do one read-side critical section, returning false if there was
+ * no data to read. Can be invoked both from process context and
+ * from a timer handler.
+ */
+static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
+{
+ int newstate;
+ struct rcu_torture_one_read_state rtors;
+
+ WARN_ON_ONCE(!rcu_is_watching());
+ init_rcu_torture_one_read_state(&rtors, trsp);
+ newstate = rcutorture_extend_mask(rtors.readstate, trsp);
+ rcutorture_one_extend(&rtors.readstate, newstate, myid < 0, trsp, rtors.rtrsp++);
+ if (!rcu_torture_one_read_start(&rtors, trsp, myid)) {
+ rcutorture_one_extend(&rtors.readstate, 0, myid < 0, trsp, rtors.rtrsp);
+ return false;
+ }
+ rtors.rtrsp = rcutorture_loop_extend(&rtors.readstate, myid < 0, trsp, rtors.rtrsp);
+ rcu_torture_one_read_end(&rtors, trsp, myid);
return true;
}
@@ -2307,7 +2353,7 @@ rcu_torture_reader(void *arg)
set_user_nice(current, MAX_NICE);
if (irqreader && cur_ops->irq_capable)
timer_setup_on_stack(&t, rcu_torture_timer, 0);
- tick_dep_set_task(current, TICK_DEP_BIT_RCU);
+ tick_dep_set_task(current, TICK_DEP_BIT_RCU); // CPU bound, so need tick.
do {
if (irqreader && cur_ops->irq_capable) {
if (!timer_pending(&t))
@@ -2325,7 +2371,7 @@ rcu_torture_reader(void *arg)
} while (!torture_must_stop());
if (irqreader && cur_ops->irq_capable) {
timer_delete_sync(&t);
- destroy_timer_on_stack(&t);
+ timer_destroy_on_stack(&t);
}
tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
torture_kthread_stopping("rcu_torture_reader");
@@ -2394,6 +2440,7 @@ rcu_torture_stats_print(void)
int i;
long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
+ long n_gpwraps = 0;
struct rcu_torture *rtcp;
static unsigned long rtcv_snap = ULONG_MAX;
static bool splatted;
@@ -2404,6 +2451,8 @@ rcu_torture_stats_print(void)
pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
}
+ if (cur_ops->get_gpwrap_count)
+ n_gpwraps += cur_ops->get_gpwrap_count(cpu);
}
for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
if (pipesummary[i] != 0)
@@ -2435,8 +2484,9 @@ rcu_torture_stats_print(void)
data_race(n_barrier_attempts),
data_race(n_rcu_torture_barrier_error));
pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
- pr_cont("nocb-toggles: %ld:%ld\n",
+ pr_cont("nocb-toggles: %ld:%ld ",
atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
+ pr_cont("gpwraps: %ld\n", n_gpwraps);
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
if (atomic_read(&n_rcu_torture_mberror) ||
@@ -3036,7 +3086,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
cver = READ_ONCE(rcu_torture_current_version);
gps = cur_ops->get_gp_seq();
rfp->rcu_launder_gp_seq_start = gps;
- tick_dep_set_task(current, TICK_DEP_BIT_RCU);
+ tick_dep_set_task(current, TICK_DEP_BIT_RCU); // CPU bound, so need tick.
while (time_before(jiffies, stopat) &&
!shutdown_time_arrived() &&
!READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) {
@@ -3607,6 +3657,57 @@ static int rcu_torture_preempt(void *unused)
static enum cpuhp_state rcutor_hp;
+static struct hrtimer gpwrap_lag_timer;
+static bool gpwrap_lag_active;
+
+/* Timer handler for toggling RCU grace-period sequence overflow test lag value */
+static enum hrtimer_restart rcu_gpwrap_lag_timer(struct hrtimer *timer)
+{
+ ktime_t next_delay;
+
+ if (gpwrap_lag_active) {
+ pr_alert("rcu-torture: Disabling gpwrap lag (value=0)\n");
+ cur_ops->set_gpwrap_lag(0);
+ gpwrap_lag_active = false;
+ next_delay = ktime_set((gpwrap_lag_cycle_mins - gpwrap_lag_active_mins) * 60, 0);
+ } else {
+ pr_alert("rcu-torture: Enabling gpwrap lag (value=%d)\n", gpwrap_lag_gps);
+ cur_ops->set_gpwrap_lag(gpwrap_lag_gps);
+ gpwrap_lag_active = true;
+ next_delay = ktime_set(gpwrap_lag_active_mins * 60, 0);
+ }
+
+ if (torture_must_stop_irq())
+ return HRTIMER_NORESTART;
+
+ hrtimer_forward_now(timer, next_delay);
+ return HRTIMER_RESTART;
+}
+
+static int rcu_gpwrap_lag_init(void)
+{
+ if (!gpwrap_lag)
+ return 0;
+
+ if (gpwrap_lag_cycle_mins <= 0 || gpwrap_lag_active_mins <= 0) {
+ pr_alert("rcu-torture: lag timing parameters must be positive\n");
+ return -EINVAL;
+ }
+
+ hrtimer_setup(&gpwrap_lag_timer, rcu_gpwrap_lag_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ gpwrap_lag_active = false;
+ hrtimer_start(&gpwrap_lag_timer,
+ ktime_set((gpwrap_lag_cycle_mins - gpwrap_lag_active_mins) * 60, 0), HRTIMER_MODE_REL);
+
+ return 0;
+}
+
+static void rcu_gpwrap_lag_cleanup(void)
+{
+ hrtimer_cancel(&gpwrap_lag_timer);
+ cur_ops->set_gpwrap_lag(0);
+ gpwrap_lag_active = false;
+}
static void
rcu_torture_cleanup(void)
{
@@ -3776,6 +3877,9 @@ rcu_torture_cleanup(void)
torture_cleanup_end();
if (cur_ops->gp_slow_unregister)
cur_ops->gp_slow_unregister(NULL);
+
+ if (gpwrap_lag && cur_ops->set_gpwrap_lag)
+ rcu_gpwrap_lag_cleanup();
}
static void rcu_torture_leak_cb(struct rcu_head *rhp)
@@ -4272,9 +4376,17 @@ rcu_torture_init(void)
}
if (object_debug)
rcu_test_debug_objects();
- torture_init_end();
+
if (cur_ops->gp_slow_register && !WARN_ON_ONCE(!cur_ops->gp_slow_unregister))
cur_ops->gp_slow_register(&rcu_fwd_cb_nodelay);
+
+ if (gpwrap_lag && cur_ops->set_gpwrap_lag) {
+ firsterr = rcu_gpwrap_lag_init();
+ if (torture_init_error(firsterr))
+ goto unwind;
+ }
+
+ torture_init_end();
return 0;
unwind:
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 9a59b071501b..48047260697e 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -1589,7 +1589,7 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
{
if (cookie != SRCU_GET_STATE_COMPLETED &&
- !rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie))
+ !rcu_seq_done_exact(&ssp->srcu_sup->srcu_gp_seq, cookie))
return false;
// Ensure that the end of the SRCU grace period happens before
// any subsequent code that the caller might execute.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 659f83e71048..e8a4b720d7d2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -80,6 +80,15 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.gpwrap = true,
};
+
+int rcu_get_gpwrap_count(int cpu)
+{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+ return READ_ONCE(rdp->gpwrap_count);
+}
+EXPORT_SYMBOL_GPL(rcu_get_gpwrap_count);
+
static struct rcu_state rcu_state = {
.level = { &rcu_state.node[0] },
.gp_state = RCU_GP_IDLE,
@@ -757,6 +766,25 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
}
+static unsigned long seq_gpwrap_lag = ULONG_MAX / 4;
+
+/**
+ * rcu_set_gpwrap_lag - Set RCU GP sequence overflow lag value.
+ * @lag_gps: Set overflow lag to this many grace period worth of counters
+ * which is used by rcutorture to quickly force a gpwrap situation.
+ * @lag_gps = 0 means we reset it back to the boot-time value.
+ */
+void rcu_set_gpwrap_lag(unsigned long lag_gps)
+{
+ unsigned long lag_seq_count;
+
+ lag_seq_count = (lag_gps == 0)
+ ? ULONG_MAX / 4
+ : lag_gps << RCU_SEQ_CTR_SHIFT;
+ WRITE_ONCE(seq_gpwrap_lag, lag_seq_count);
+}
+EXPORT_SYMBOL_GPL(rcu_set_gpwrap_lag);
+
/*
* When trying to report a quiescent state on behalf of some other CPU,
* it is our responsibility to check for and handle potential overflow
@@ -767,9 +795,11 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
{
raw_lockdep_assert_held_rcu_node(rnp);
- if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
- rnp->gp_seq))
+ if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + seq_gpwrap_lag,
+ rnp->gp_seq)) {
WRITE_ONCE(rdp->gpwrap, true);
+ WRITE_ONCE(rdp->gpwrap_count, READ_ONCE(rdp->gpwrap_count) + 1);
+ }
if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
}
@@ -801,6 +831,10 @@ static int rcu_watching_snap_save(struct rcu_data *rdp)
return 0;
}
+#ifndef arch_irq_stat_cpu
+#define arch_irq_stat_cpu(cpu) 0
+#endif
+
/*
* Returns positive if the specified CPU has passed through a quiescent state
* by virtue of being in or having passed through an dynticks idle state since
@@ -936,9 +970,9 @@ static int rcu_watching_snap_recheck(struct rcu_data *rdp)
rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
- rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu);
- rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu);
- rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu);
+ rsrp->nr_hardirqs = kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu);
+ rsrp->nr_softirqs = kstat_cpu_softirqs_sum(cpu);
+ rsrp->nr_csw = nr_context_switches_cpu(cpu);
rsrp->jiffies = jiffies;
rsrp->gp_seq = rdp->gp_seq;
}
@@ -1060,38 +1094,6 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
return needmore;
}
-static void swake_up_one_online_ipi(void *arg)
-{
- struct swait_queue_head *wqh = arg;
-
- swake_up_one(wqh);
-}
-
-static void swake_up_one_online(struct swait_queue_head *wqh)
-{
- int cpu = get_cpu();
-
- /*
- * If called from rcutree_report_cpu_starting(), wake up
- * is dangerous that late in the CPU-down hotplug process. The
- * scheduler might queue an ignored hrtimer. Defer the wake up
- * to an online CPU instead.
- */
- if (unlikely(cpu_is_offline(cpu))) {
- int target;
-
- target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU),
- cpu_online_mask);
-
- smp_call_function_single(target, swake_up_one_online_ipi,
- wqh, 0);
- put_cpu();
- } else {
- put_cpu();
- swake_up_one(wqh);
- }
-}
-
/*
* Awaken the grace-period kthread. Don't do a self-awaken (unless in an
* interrupt or softirq handler, in which case we just might immediately
@@ -1116,7 +1118,7 @@ static void rcu_gp_kthread_wake(void)
return;
WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
- swake_up_one_online(&rcu_state.gp_wq);
+ swake_up_one(&rcu_state.gp_wq);
}
/*
@@ -1798,6 +1800,7 @@ static noinline_for_stack bool rcu_gp_init(void)
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root();
bool start_new_poll;
+ unsigned long old_gp_seq;
WRITE_ONCE(rcu_state.gp_activity, jiffies);
raw_spin_lock_irq_rcu_node(rnp);
@@ -1825,7 +1828,12 @@ static noinline_for_stack bool rcu_gp_init(void)
*/
start_new_poll = rcu_sr_normal_gp_init();
/* Record GP times before starting GP, hence rcu_seq_start(). */
+ old_gp_seq = rcu_state.gp_seq;
rcu_seq_start(&rcu_state.gp_seq);
+ /* Ensure that rcu_seq_done_exact() guardband doesn't give false positives. */
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) &&
+ rcu_seq_done_exact(&old_gp_seq, rcu_seq_snap(&rcu_state.gp_seq)));
+
ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index a9a811d9d7a3..3830c19cf2f6 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -168,7 +168,7 @@ struct rcu_snap_record {
u64 cputime_irq; /* Accumulated cputime of hard irqs */
u64 cputime_softirq;/* Accumulated cputime of soft irqs */
u64 cputime_system; /* Accumulated cputime of kernel tasks */
- unsigned long nr_hardirqs; /* Accumulated number of hard irqs */
+ u64 nr_hardirqs; /* Accumulated number of hard irqs */
unsigned int nr_softirqs; /* Accumulated number of soft irqs */
unsigned long long nr_csw; /* Accumulated number of task switches */
unsigned long jiffies; /* Track jiffies value */
@@ -183,6 +183,7 @@ struct rcu_data {
bool core_needs_qs; /* Core waits for quiescent state. */
bool beenonline; /* CPU online at least once. */
bool gpwrap; /* Possible ->gp_seq wrap. */
+ unsigned int gpwrap_count; /* Count of GP sequence wrap. */
bool cpu_started; /* RCU watching this onlining CPU. */
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 8d4895c854c5..c36c7d5575ca 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -200,7 +200,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp,
if (rnp->parent == NULL) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (wake)
- swake_up_one_online(&rcu_state.expedited_wq);
+ swake_up_one(&rcu_state.expedited_wq);
break;
}
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index fa269d34167a..1596812f7f12 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -216,7 +216,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
if (needwake) {
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
- swake_up_one_online(&rdp_gp->nocb_gp_wq);
+ swake_up_one(&rdp_gp->nocb_gp_wq);
}
return needwake;
@@ -554,19 +554,13 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy"));
- } else if (!irqs_disabled_flags(flags) && cpu_online(rdp->cpu)) {
+ } else if (!irqs_disabled_flags(flags)) {
/* ... if queue was empty ... */
rcu_nocb_unlock(rdp);
wake_nocb_gp(rdp, false);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeEmpty"));
} else {
- /*
- * Don't do the wake-up upfront on fragile paths.
- * Also offline CPUs can't call swake_up_one_online() from
- * (soft-)IRQs. Rely on the final deferred wake-up from
- * rcutree_report_cpu_dead()
- */
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
TPS("WakeEmptyIsDeferred"));
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 3c0bbbbb686f..0b0f56f6abc8 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -29,7 +29,7 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
(IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) ||
lockdep_is_held(&rdp->nocb_lock) ||
lockdep_is_held(&rcu_state.nocb_mutex) ||
- (!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) &&
+ ((!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) || softirq_count()) &&
rdp == this_cpu_ptr(&rcu_data)) ||
rcu_current_is_nocb_kthread(rdp)),
"Unsafe read of RCU_NOCB offloaded state"
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 925fcdad5dea..56b21219442b 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -435,8 +435,8 @@ static void print_cpu_stat_info(int cpu)
rsr.cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
pr_err("\t hardirqs softirqs csw/system\n");
- pr_err("\t number: %8ld %10d %12lld\n",
- kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs,
+ pr_err("\t number: %8lld %10d %12lld\n",
+ kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu) - rsrp->nr_hardirqs,
kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs,
nr_context_switches_cpu(cpu) - rsrp->nr_csw);
pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n",
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c81cf642dba0..62b3416f5e43 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -66,6 +66,7 @@
#include <linux/vtime.h>
#include <linux/wait_api.h>
#include <linux/workqueue_api.h>
+#include <linux/livepatch_sched.h>
#ifdef CONFIG_PREEMPT_DYNAMIC
# ifdef CONFIG_GENERIC_ENTRY
@@ -1752,7 +1753,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
}
}
-static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
+static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags)
{
enum uclamp_id clamp_id;
@@ -1768,7 +1769,8 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
if (unlikely(!p->sched_class->uclamp_enabled))
return;
- if (p->se.sched_delayed)
+ /* Only inc the delayed task which being woken up. */
+ if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
return;
for_each_clamp_id(clamp_id)
@@ -2036,7 +2038,7 @@ static void __init init_uclamp(void)
}
#else /* !CONFIG_UCLAMP_TASK */
-static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
+static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
static inline void uclamp_fork(struct task_struct *p) { }
static inline void uclamp_post_fork(struct task_struct *p) { }
@@ -2072,12 +2074,14 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
if (!(flags & ENQUEUE_NOCLOCK))
update_rq_clock(rq);
- p->sched_class->enqueue_task(rq, p, flags);
/*
- * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
- * ->sched_delayed.
+ * Can be before ->enqueue_task() because uclamp considers the
+ * ENQUEUE_DELAYED task before its ->sched_delayed gets cleared
+ * in ->enqueue_task().
*/
- uclamp_rq_inc(rq, p);
+ uclamp_rq_inc(rq, p, flags);
+
+ p->sched_class->enqueue_task(rq, p, flags);
psi_enqueue(p, flags);
@@ -2283,6 +2287,12 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
* just go back and repeat.
*/
rq = task_rq_lock(p, &rf);
+ /*
+ * If task is sched_delayed, force dequeue it, to avoid always
+ * hitting the tick timeout in the queued case
+ */
+ if (p->se.sched_delayed)
+ dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
trace_sched_wait_task(p);
running = task_on_cpu(rq, p);
queued = task_on_rq_queued(p);
@@ -6571,12 +6581,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
* Otherwise marks the task's __state as RUNNING
*/
static bool try_to_block_task(struct rq *rq, struct task_struct *p,
- unsigned long task_state)
+ unsigned long *task_state_p)
{
+ unsigned long task_state = *task_state_p;
int flags = DEQUEUE_NOCLOCK;
if (signal_pending_state(task_state, p)) {
WRITE_ONCE(p->__state, TASK_RUNNING);
+ *task_state_p = TASK_RUNNING;
return false;
}
@@ -6668,6 +6680,8 @@ static void __sched notrace __schedule(int sched_mode)
if (sched_feat(HRTICK) || sched_feat(HRTICK_DL))
hrtick_clear(rq);
+ klp_sched_try_switch(prev);
+
local_irq_disable();
rcu_note_context_switch(preempt);
@@ -6713,7 +6727,7 @@ static void __sched notrace __schedule(int sched_mode)
goto picked;
}
} else if (!preempt && prev_state) {
- try_to_block_task(rq, prev, prev_state);
+ try_to_block_task(rq, prev, &prev_state);
switch_count = &prev->nvcsw;
}
@@ -7328,7 +7342,6 @@ EXPORT_STATIC_CALL_TRAMP(might_resched);
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
int __sched dynamic_cond_resched(void)
{
- klp_sched_try_switch();
if (!static_branch_unlikely(&sk_dynamic_cond_resched))
return 0;
return __cond_resched();
@@ -7500,7 +7513,6 @@ int sched_dynamic_mode(const char *str)
#endif
static DEFINE_MUTEX(sched_dynamic_mutex);
-static bool klp_override;
static void __sched_dynamic_update(int mode)
{
@@ -7508,8 +7520,7 @@ static void __sched_dynamic_update(int mode)
* Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
* the ZERO state, which is invalid.
*/
- if (!klp_override)
- preempt_dynamic_enable(cond_resched);
+ preempt_dynamic_enable(cond_resched);
preempt_dynamic_enable(might_resched);
preempt_dynamic_enable(preempt_schedule);
preempt_dynamic_enable(preempt_schedule_notrace);
@@ -7518,8 +7529,7 @@ static void __sched_dynamic_update(int mode)
switch (mode) {
case preempt_dynamic_none:
- if (!klp_override)
- preempt_dynamic_enable(cond_resched);
+ preempt_dynamic_enable(cond_resched);
preempt_dynamic_disable(might_resched);
preempt_dynamic_disable(preempt_schedule);
preempt_dynamic_disable(preempt_schedule_notrace);
@@ -7530,8 +7540,7 @@ static void __sched_dynamic_update(int mode)
break;
case preempt_dynamic_voluntary:
- if (!klp_override)
- preempt_dynamic_enable(cond_resched);
+ preempt_dynamic_enable(cond_resched);
preempt_dynamic_enable(might_resched);
preempt_dynamic_disable(preempt_schedule);
preempt_dynamic_disable(preempt_schedule_notrace);
@@ -7542,8 +7551,7 @@ static void __sched_dynamic_update(int mode)
break;
case preempt_dynamic_full:
- if (!klp_override)
- preempt_dynamic_disable(cond_resched);
+ preempt_dynamic_disable(cond_resched);
preempt_dynamic_disable(might_resched);
preempt_dynamic_enable(preempt_schedule);
preempt_dynamic_enable(preempt_schedule_notrace);
@@ -7554,8 +7562,7 @@ static void __sched_dynamic_update(int mode)
break;
case preempt_dynamic_lazy:
- if (!klp_override)
- preempt_dynamic_disable(cond_resched);
+ preempt_dynamic_disable(cond_resched);
preempt_dynamic_disable(might_resched);
preempt_dynamic_enable(preempt_schedule);
preempt_dynamic_enable(preempt_schedule_notrace);
@@ -7576,36 +7583,6 @@ void sched_dynamic_update(int mode)
mutex_unlock(&sched_dynamic_mutex);
}
-#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL
-
-static int klp_cond_resched(void)
-{
- __klp_sched_try_switch();
- return __cond_resched();
-}
-
-void sched_dynamic_klp_enable(void)
-{
- mutex_lock(&sched_dynamic_mutex);
-
- klp_override = true;
- static_call_update(cond_resched, klp_cond_resched);
-
- mutex_unlock(&sched_dynamic_mutex);
-}
-
-void sched_dynamic_klp_disable(void)
-{
- mutex_lock(&sched_dynamic_mutex);
-
- klp_override = false;
- __sched_dynamic_update(preempt_dynamic_mode);
-
- mutex_unlock(&sched_dynamic_mutex);
-}
-
-#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
static int __init setup_preempt_mode(char *str)
{
int mode = sched_dynamic_mode(str);
@@ -9018,7 +8995,7 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
unsigned long flags;
spin_lock_irqsave(&task_group_lock, flags);
- list_add_rcu(&tg->list, &task_groups);
+ list_add_tail_rcu(&tg->list, &task_groups);
/* Root should already exist: */
WARN_ON(!parent);
@@ -9204,11 +9181,15 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
struct task_struct *task;
struct cgroup_subsys_state *css;
+ if (!rt_group_sched_enabled())
+ goto scx_check;
+
cgroup_taskset_for_each(task, css, tset) {
if (!sched_rt_can_attach(css_tg(css), task))
return -EINVAL;
}
-#endif
+scx_check:
+#endif /* CONFIG_RT_GROUP_SCHED */
return scx_cgroup_can_attach(tset);
}
@@ -9861,18 +9842,6 @@ static struct cftype cpu_legacy_files[] = {
.seq_show = cpu_cfs_local_stat_show,
},
#endif
-#ifdef CONFIG_RT_GROUP_SCHED
- {
- .name = "rt_runtime_us",
- .read_s64 = cpu_rt_runtime_read,
- .write_s64 = cpu_rt_runtime_write,
- },
- {
- .name = "rt_period_us",
- .read_u64 = cpu_rt_period_read_uint,
- .write_u64 = cpu_rt_period_write_uint,
- },
-#endif
#ifdef CONFIG_UCLAMP_TASK_GROUP
{
.name = "uclamp.min",
@@ -9890,6 +9859,55 @@ static struct cftype cpu_legacy_files[] = {
{ } /* Terminate */
};
+#ifdef CONFIG_RT_GROUP_SCHED
+static struct cftype rt_group_files[] = {
+ {
+ .name = "rt_runtime_us",
+ .read_s64 = cpu_rt_runtime_read,
+ .write_s64 = cpu_rt_runtime_write,
+ },
+ {
+ .name = "rt_period_us",
+ .read_u64 = cpu_rt_period_read_uint,
+ .write_u64 = cpu_rt_period_write_uint,
+ },
+ { } /* Terminate */
+};
+
+# ifdef CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED
+DEFINE_STATIC_KEY_FALSE(rt_group_sched);
+# else
+DEFINE_STATIC_KEY_TRUE(rt_group_sched);
+# endif
+
+static int __init setup_rt_group_sched(char *str)
+{
+ long val;
+
+ if (kstrtol(str, 0, &val) || val < 0 || val > 1) {
+ pr_warn("Unable to set rt_group_sched\n");
+ return 1;
+ }
+ if (val)
+ static_branch_enable(&rt_group_sched);
+ else
+ static_branch_disable(&rt_group_sched);
+
+ return 1;
+}
+__setup("rt_group_sched=", setup_rt_group_sched);
+
+static int __init cpu_rt_group_init(void)
+{
+ if (!rt_group_sched_enabled())
+ return 0;
+
+ WARN_ON(cgroup_add_legacy_cftypes(&cpu_cgrp_subsys, rt_group_files));
+ return 0;
+}
+subsys_initcall(cpu_rt_group_init);
+#endif /* CONFIG_RT_GROUP_SCHED */
+
static int cpu_extra_stat_show(struct seq_file *sf,
struct cgroup_subsys_state *css)
{
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 816f07f9d30f..461242ec958a 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -630,7 +630,7 @@ static const struct kobj_type sugov_tunables_ktype = {
/********************** cpufreq governor interface *********************/
-struct cpufreq_governor schedutil_gov;
+static struct cpufreq_governor schedutil_gov;
static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
{
@@ -909,7 +909,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
WRITE_ONCE(sg_policy->limits_changed, true);
}
-struct cpufreq_governor schedutil_gov = {
+static struct cpufreq_governor schedutil_gov = {
.name = "schedutil",
.owner = THIS_MODULE,
.flags = CPUFREQ_GOV_DYNAMIC_SWITCHING,
@@ -927,4 +927,9 @@ struct cpufreq_governor *cpufreq_default_governor(void)
}
#endif
+bool sugov_is_governor(struct cpufreq_policy *policy)
+{
+ return policy->governor == &schedutil_gov;
+}
+
cpufreq_governor_init(schedutil_gov);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 56ae54e0ce6a..557246880a7e 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -588,6 +588,10 @@ static void register_sd(struct sched_domain *sd, struct dentry *parent)
debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops);
debugfs_create_u32("level", 0444, parent, (u32 *)&sd->level);
+
+ if (sd->flags & SD_ASYM_PACKING)
+ debugfs_create_u32("group_asym_prefer_cpu", 0444, parent,
+ (u32 *)&sd->groups->asym_prefer_cpu);
}
void update_sched_domain_debugfs(void)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index fdbf249d1c68..793e288f63cf 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -26,7 +26,7 @@ enum scx_consts {
* Iterating all tasks may take a while. Periodically drop
* scx_tasks_lock to avoid causing e.g. CSD and RCU stalls.
*/
- SCX_OPS_TASK_ITER_BATCH = 32,
+ SCX_TASK_ITER_BATCH = 32,
};
enum scx_exit_kind {
@@ -44,9 +44,9 @@ enum scx_exit_kind {
};
/*
- * An exit code can be specified when exiting with scx_bpf_exit() or
- * scx_ops_exit(), corresponding to exit_kind UNREG_BPF and UNREG_KERN
- * respectively. The codes are 64bit of the format:
+ * An exit code can be specified when exiting with scx_bpf_exit() or scx_exit(),
+ * corresponding to exit_kind UNREG_BPF and UNREG_KERN respectively. The codes
+ * are 64bit of the format:
*
* Bits: [63 .. 48 47 .. 32 31 .. 0]
* [ SYS ACT ] [ SYS RSN ] [ USR ]
@@ -163,16 +163,21 @@ enum scx_ops_flags {
/*
* CPU cgroup support flags
*/
- SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */
+ SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */
- SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE |
- SCX_OPS_ENQ_LAST |
- SCX_OPS_ENQ_EXITING |
- SCX_OPS_ENQ_MIGRATION_DISABLED |
- SCX_OPS_ALLOW_QUEUED_WAKEUP |
- SCX_OPS_SWITCH_PARTIAL |
- SCX_OPS_BUILTIN_IDLE_PER_NODE |
- SCX_OPS_HAS_CGROUP_WEIGHT,
+ SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE |
+ SCX_OPS_ENQ_LAST |
+ SCX_OPS_ENQ_EXITING |
+ SCX_OPS_ENQ_MIGRATION_DISABLED |
+ SCX_OPS_ALLOW_QUEUED_WAKEUP |
+ SCX_OPS_SWITCH_PARTIAL |
+ SCX_OPS_BUILTIN_IDLE_PER_NODE |
+ SCX_OPS_HAS_CGROUP_WEIGHT,
+
+ /* high 8 bits are internal, don't include in SCX_OPS_ALL_FLAGS */
+ __SCX_OPS_INTERNAL_MASK = 0xffLLU << 56,
+
+ SCX_OPS_HAS_CPU_PREEMPT = 1LLU << 56,
};
/* argument container for ops.init_task() */
@@ -368,6 +373,15 @@ struct sched_ext_ops {
* @running: A task is starting to run on its associated CPU
* @p: task starting to run
*
+ * Note that this callback may be called from a CPU other than the
+ * one the task is going to run on. This can happen when a task
+ * property is changed (i.e., affinity), since scx_next_task_scx(),
+ * which triggers this callback, may run on a CPU different from
+ * the task's assigned CPU.
+ *
+ * Therefore, always use scx_bpf_task_cpu(@p) to determine the
+ * target CPU the task is going to use.
+ *
* See ->runnable() for explanation on the task state notifiers.
*/
void (*running)(struct task_struct *p);
@@ -377,6 +391,15 @@ struct sched_ext_ops {
* @p: task stopping to run
* @runnable: is task @p still runnable?
*
+ * Note that this callback may be called from a CPU other than the
+ * one the task was running on. This can happen when a task
+ * property is changed (i.e., affinity), since dequeue_task_scx(),
+ * which triggers this callback, may run on a CPU different from
+ * the task's assigned CPU.
+ *
+ * Therefore, always use scx_bpf_task_cpu(@p) to retrieve the CPU
+ * the task was running on.
+ *
* See ->runnable() for explanation on the task state notifiers. If
* !@runnable, ->quiescent() will be invoked after this operation
* returns.
@@ -465,6 +488,7 @@ struct sched_ext_ops {
* idle CPU tracking and the following helpers become unavailable:
*
* - scx_bpf_select_cpu_dfl()
+ * - scx_bpf_select_cpu_and()
* - scx_bpf_test_and_clear_cpu_idle()
* - scx_bpf_pick_idle_cpu()
*
@@ -728,6 +752,9 @@ struct sched_ext_ops {
* BPF scheduler is enabled.
*/
char name[SCX_OPS_NAME_LEN];
+
+ /* internal use only, must be NULL */
+ void *priv;
};
enum scx_opi {
@@ -739,6 +766,98 @@ enum scx_opi {
SCX_OPI_END = SCX_OP_IDX(init),
};
+/*
+ * Collection of event counters. Event types are placed in descending order.
+ */
+struct scx_event_stats {
+ /*
+ * If ops.select_cpu() returns a CPU which can't be used by the task,
+ * the core scheduler code silently picks a fallback CPU.
+ */
+ s64 SCX_EV_SELECT_CPU_FALLBACK;
+
+ /*
+ * When dispatching to a local DSQ, the CPU may have gone offline in
+ * the meantime. In this case, the task is bounced to the global DSQ.
+ */
+ s64 SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE;
+
+ /*
+ * If SCX_OPS_ENQ_LAST is not set, the number of times that a task
+ * continued to run because there were no other tasks on the CPU.
+ */
+ s64 SCX_EV_DISPATCH_KEEP_LAST;
+
+ /*
+ * If SCX_OPS_ENQ_EXITING is not set, the number of times that a task
+ * is dispatched to a local DSQ when exiting.
+ */
+ s64 SCX_EV_ENQ_SKIP_EXITING;
+
+ /*
+ * If SCX_OPS_ENQ_MIGRATION_DISABLED is not set, the number of times a
+ * migration disabled task skips ops.enqueue() and is dispatched to its
+ * local DSQ.
+ */
+ s64 SCX_EV_ENQ_SKIP_MIGRATION_DISABLED;
+
+ /*
+ * Total number of times a task's time slice was refilled with the
+ * default value (SCX_SLICE_DFL).
+ */
+ s64 SCX_EV_REFILL_SLICE_DFL;
+
+ /*
+ * The total duration of bypass modes in nanoseconds.
+ */
+ s64 SCX_EV_BYPASS_DURATION;
+
+ /*
+ * The number of tasks dispatched in the bypassing mode.
+ */
+ s64 SCX_EV_BYPASS_DISPATCH;
+
+ /*
+ * The number of times the bypassing mode has been activated.
+ */
+ s64 SCX_EV_BYPASS_ACTIVATE;
+};
+
+struct scx_sched {
+ struct sched_ext_ops ops;
+ DECLARE_BITMAP(has_op, SCX_OPI_END);
+
+ /*
+ * Dispatch queues.
+ *
+ * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability.
+ * This is to avoid live-locking in bypass mode where all tasks are
+ * dispatched to %SCX_DSQ_GLOBAL and all CPUs consume from it. If
+ * per-node split isn't sufficient, it can be further split.
+ */
+ struct rhashtable dsq_hash;
+ struct scx_dispatch_q **global_dsqs;
+
+ /*
+ * The event counters are in a per-CPU variable to minimize the
+ * accounting overhead. A system-wide view on the event counter is
+ * constructed when requested by scx_bpf_events().
+ */
+ struct scx_event_stats __percpu *event_stats_cpu;
+
+ bool warned_zero_slice;
+
+ atomic_t exit_kind;
+ struct scx_exit_info *exit_info;
+
+ struct kobject kobj;
+
+ struct kthread_worker *helper;
+ struct irq_work error_irq_work;
+ struct kthread_work disable_work;
+ struct rcu_work rcu_work;
+};
+
enum scx_wake_flags {
/* expose select WF_* flags as enums */
SCX_WAKE_FORK = WF_FORK,
@@ -838,18 +957,18 @@ enum scx_tg_flags {
SCX_TG_INITED = 1U << 1,
};
-enum scx_ops_enable_state {
- SCX_OPS_ENABLING,
- SCX_OPS_ENABLED,
- SCX_OPS_DISABLING,
- SCX_OPS_DISABLED,
+enum scx_enable_state {
+ SCX_ENABLING,
+ SCX_ENABLED,
+ SCX_DISABLING,
+ SCX_DISABLED,
};
-static const char *scx_ops_enable_state_str[] = {
- [SCX_OPS_ENABLING] = "enabling",
- [SCX_OPS_ENABLED] = "enabled",
- [SCX_OPS_DISABLING] = "disabling",
- [SCX_OPS_DISABLED] = "disabled",
+static const char *scx_enable_state_str[] = {
+ [SCX_ENABLING] = "enabling",
+ [SCX_ENABLED] = "enabled",
+ [SCX_DISABLING] = "disabling",
+ [SCX_DISABLED] = "disabled",
};
/*
@@ -898,6 +1017,16 @@ enum scx_ops_state {
#define SCX_OPSS_QSEQ_MASK (~SCX_OPSS_STATE_MASK)
/*
+ * NOTE: sched_ext is in the process of growing multiple scheduler support and
+ * scx_root usage is in a transitional state. Naked dereferences are safe if the
+ * caller is one of the tasks attached to SCX and explicit RCU dereference is
+ * necessary otherwise. Naked scx_root dereferences trigger sparse warnings but
+ * are used as temporary markers to indicate that the dereferences need to be
+ * updated to point to the associated scheduler instances rather than scx_root.
+ */
+static struct scx_sched __rcu *scx_root;
+
+/*
* During exit, a task may schedule after losing its PIDs. When disabling the
* BPF scheduler, we need to be able to iterate tasks in every state to
* guarantee system safety. Maintain a dedicated task list which contains every
@@ -907,33 +1036,17 @@ static DEFINE_SPINLOCK(scx_tasks_lock);
static LIST_HEAD(scx_tasks);
/* ops enable/disable */
-static struct kthread_worker *scx_ops_helper;
-static DEFINE_MUTEX(scx_ops_enable_mutex);
-DEFINE_STATIC_KEY_FALSE(__scx_ops_enabled);
+static DEFINE_MUTEX(scx_enable_mutex);
+DEFINE_STATIC_KEY_FALSE(__scx_enabled);
DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem);
-static atomic_t scx_ops_enable_state_var = ATOMIC_INIT(SCX_OPS_DISABLED);
+static atomic_t scx_enable_state_var = ATOMIC_INIT(SCX_DISABLED);
static unsigned long scx_in_softlockup;
-static atomic_t scx_ops_breather_depth = ATOMIC_INIT(0);
-static int scx_ops_bypass_depth;
-static bool scx_ops_init_task_enabled;
+static atomic_t scx_breather_depth = ATOMIC_INIT(0);
+static int scx_bypass_depth;
+static bool scx_init_task_enabled;
static bool scx_switching_all;
DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
-static struct sched_ext_ops scx_ops;
-static bool scx_warned_zero_slice;
-
-DEFINE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup);
-static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_last);
-static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_exiting);
-static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_migration_disabled);
-static DEFINE_STATIC_KEY_FALSE(scx_ops_cpu_preempt);
-
-static struct static_key_false scx_has_op[SCX_OPI_END] =
- { [0 ... SCX_OPI_END-1] = STATIC_KEY_FALSE_INIT };
-
-static atomic_t scx_exit_kind = ATOMIC_INIT(SCX_EXIT_DONE);
-static struct scx_exit_info *scx_exit_info;
-
static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
@@ -947,7 +1060,7 @@ static atomic_long_t scx_enable_seq = ATOMIC_LONG_INIT(0);
/*
* The maximum amount of time in jiffies that a task may be runnable without
* being scheduled on a CPU. If this timeout is exceeded, it will trigger
- * scx_ops_error().
+ * scx_error().
*/
static unsigned long scx_watchdog_timeout;
@@ -973,23 +1086,12 @@ static unsigned long __percpu *scx_kick_cpus_pnt_seqs;
*/
static DEFINE_PER_CPU(struct task_struct *, direct_dispatch_task);
-/*
- * Dispatch queues.
- *
- * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability. This is
- * to avoid live-locking in bypass mode where all tasks are dispatched to
- * %SCX_DSQ_GLOBAL and all CPUs consume from it. If per-node split isn't
- * sufficient, it can be further split.
- */
-static struct scx_dispatch_q **global_dsqs;
-
static const struct rhashtable_params dsq_hash_params = {
.key_len = sizeof_field(struct scx_dispatch_q, id),
.key_offset = offsetof(struct scx_dispatch_q, id),
.head_offset = offsetof(struct scx_dispatch_q, hash_node),
};
-static struct rhashtable dsq_hash;
static LLIST_HEAD(dsqs_to_free);
/* dispatch buf */
@@ -1036,27 +1138,46 @@ static struct scx_dump_data scx_dump_data = {
/* /sys/kernel/sched_ext interface */
static struct kset *scx_kset;
-static struct kobject *scx_root_kobj;
#define CREATE_TRACE_POINTS
#include <trace/events/sched_ext.h>
static void process_ddsp_deferred_locals(struct rq *rq);
static void scx_bpf_kick_cpu(s32 cpu, u64 flags);
-static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind,
- s64 exit_code,
- const char *fmt, ...);
+static void scx_vexit(struct scx_sched *sch, enum scx_exit_kind kind,
+ s64 exit_code, const char *fmt, va_list args);
+
+static __printf(4, 5) void scx_exit(struct scx_sched *sch,
+ enum scx_exit_kind kind, s64 exit_code,
+ const char *fmt, ...)
+{
+ va_list args;
-#define scx_ops_error_kind(err, fmt, args...) \
- scx_ops_exit_kind((err), 0, fmt, ##args)
+ va_start(args, fmt);
+ scx_vexit(sch, kind, exit_code, fmt, args);
+ va_end(args);
+}
+
+static __printf(3, 4) void scx_kf_exit(enum scx_exit_kind kind, s64 exit_code,
+ const char *fmt, ...)
+{
+ struct scx_sched *sch;
+ va_list args;
-#define scx_ops_exit(code, fmt, args...) \
- scx_ops_exit_kind(SCX_EXIT_UNREG_KERN, (code), fmt, ##args)
+ rcu_read_lock();
+ sch = rcu_dereference(scx_root);
+ if (sch) {
+ va_start(args, fmt);
+ scx_vexit(sch, kind, exit_code, fmt, args);
+ va_end(args);
+ }
+ rcu_read_unlock();
+}
-#define scx_ops_error(fmt, args...) \
- scx_ops_error_kind(SCX_EXIT_ERROR, fmt, ##args)
+#define scx_error(sch, fmt, args...) scx_exit((sch), SCX_EXIT_ERROR, 0, fmt, ##args)
+#define scx_kf_error(fmt, args...) scx_kf_exit(SCX_EXIT_ERROR, 0, fmt, ##args)
-#define SCX_HAS_OP(op) static_branch_likely(&scx_has_op[SCX_OP_IDX(op)])
+#define SCX_HAS_OP(sch, op) test_bit(SCX_OP_IDX(op), (sch)->has_op)
static long jiffies_delta_msecs(unsigned long at, unsigned long now)
{
@@ -1086,12 +1207,14 @@ static bool u32_before(u32 a, u32 b)
static struct scx_dispatch_q *find_global_dsq(struct task_struct *p)
{
- return global_dsqs[cpu_to_node(task_cpu(p))];
+ struct scx_sched *sch = scx_root;
+
+ return sch->global_dsqs[cpu_to_node(task_cpu(p))];
}
-static struct scx_dispatch_q *find_user_dsq(u64 dsq_id)
+static struct scx_dispatch_q *find_user_dsq(struct scx_sched *sch, u64 dsq_id)
{
- return rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params);
+ return rhashtable_lookup_fast(&sch->dsq_hash, &dsq_id, dsq_hash_params);
}
/*
@@ -1118,27 +1241,61 @@ static void scx_kf_disallow(u32 mask)
current->scx.kf_mask &= ~mask;
}
-#define SCX_CALL_OP(mask, op, args...) \
+/*
+ * Track the rq currently locked.
+ *
+ * This allows kfuncs to safely operate on rq from any scx ops callback,
+ * knowing which rq is already locked.
+ */
+static DEFINE_PER_CPU(struct rq *, locked_rq);
+
+static inline void update_locked_rq(struct rq *rq)
+{
+ /*
+ * Check whether @rq is actually locked. This can help expose bugs
+ * or incorrect assumptions about the context in which a kfunc or
+ * callback is executed.
+ */
+ if (rq)
+ lockdep_assert_rq_held(rq);
+ __this_cpu_write(locked_rq, rq);
+}
+
+/*
+ * Return the rq currently locked from an scx callback, or NULL if no rq is
+ * locked.
+ */
+static inline struct rq *scx_locked_rq(void)
+{
+ return __this_cpu_read(locked_rq);
+}
+
+#define SCX_CALL_OP(sch, mask, op, rq, args...) \
do { \
+ update_locked_rq(rq); \
if (mask) { \
scx_kf_allow(mask); \
- scx_ops.op(args); \
+ (sch)->ops.op(args); \
scx_kf_disallow(mask); \
} else { \
- scx_ops.op(args); \
+ (sch)->ops.op(args); \
} \
+ update_locked_rq(NULL); \
} while (0)
-#define SCX_CALL_OP_RET(mask, op, args...) \
+#define SCX_CALL_OP_RET(sch, mask, op, rq, args...) \
({ \
- __typeof__(scx_ops.op(args)) __ret; \
+ __typeof__((sch)->ops.op(args)) __ret; \
+ \
+ update_locked_rq(rq); \
if (mask) { \
scx_kf_allow(mask); \
- __ret = scx_ops.op(args); \
+ __ret = (sch)->ops.op(args); \
scx_kf_disallow(mask); \
} else { \
- __ret = scx_ops.op(args); \
+ __ret = (sch)->ops.op(args); \
} \
+ update_locked_rq(NULL); \
__ret; \
})
@@ -1153,31 +1310,31 @@ do { \
* scx_kf_allowed_on_arg_tasks() to test whether the invocation is allowed on
* the specific task.
*/
-#define SCX_CALL_OP_TASK(mask, op, task, args...) \
+#define SCX_CALL_OP_TASK(sch, mask, op, rq, task, args...) \
do { \
BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
current->scx.kf_tasks[0] = task; \
- SCX_CALL_OP(mask, op, task, ##args); \
+ SCX_CALL_OP((sch), mask, op, rq, task, ##args); \
current->scx.kf_tasks[0] = NULL; \
} while (0)
-#define SCX_CALL_OP_TASK_RET(mask, op, task, args...) \
+#define SCX_CALL_OP_TASK_RET(sch, mask, op, rq, task, args...) \
({ \
- __typeof__(scx_ops.op(task, ##args)) __ret; \
+ __typeof__((sch)->ops.op(task, ##args)) __ret; \
BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
current->scx.kf_tasks[0] = task; \
- __ret = SCX_CALL_OP_RET(mask, op, task, ##args); \
+ __ret = SCX_CALL_OP_RET((sch), mask, op, rq, task, ##args); \
current->scx.kf_tasks[0] = NULL; \
__ret; \
})
-#define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...) \
+#define SCX_CALL_OP_2TASKS_RET(sch, mask, op, rq, task0, task1, args...) \
({ \
- __typeof__(scx_ops.op(task0, task1, ##args)) __ret; \
+ __typeof__((sch)->ops.op(task0, task1, ##args)) __ret; \
BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
current->scx.kf_tasks[0] = task0; \
current->scx.kf_tasks[1] = task1; \
- __ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args); \
+ __ret = SCX_CALL_OP_RET((sch), mask, op, rq, task0, task1, ##args); \
current->scx.kf_tasks[0] = NULL; \
current->scx.kf_tasks[1] = NULL; \
__ret; \
@@ -1187,8 +1344,8 @@ do { \
static __always_inline bool scx_kf_allowed(u32 mask)
{
if (unlikely(!(current->scx.kf_mask & mask))) {
- scx_ops_error("kfunc with mask 0x%x called from an operation only allowing 0x%x",
- mask, current->scx.kf_mask);
+ scx_kf_error("kfunc with mask 0x%x called from an operation only allowing 0x%x",
+ mask, current->scx.kf_mask);
return false;
}
@@ -1201,13 +1358,13 @@ static __always_inline bool scx_kf_allowed(u32 mask)
*/
if (unlikely(highest_bit(mask) == SCX_KF_CPU_RELEASE &&
(current->scx.kf_mask & higher_bits(SCX_KF_CPU_RELEASE)))) {
- scx_ops_error("cpu_release kfunc called from a nested operation");
+ scx_kf_error("cpu_release kfunc called from a nested operation");
return false;
}
if (unlikely(highest_bit(mask) == SCX_KF_DISPATCH &&
(current->scx.kf_mask & higher_bits(SCX_KF_DISPATCH)))) {
- scx_ops_error("dispatch kfunc called from a nested operation");
+ scx_kf_error("dispatch kfunc called from a nested operation");
return false;
}
@@ -1223,18 +1380,13 @@ static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask,
if (unlikely((p != current->scx.kf_tasks[0] &&
p != current->scx.kf_tasks[1]))) {
- scx_ops_error("called on a task not being operated on");
+ scx_kf_error("called on a task not being operated on");
return false;
}
return true;
}
-static bool scx_kf_allowed_if_unlocked(void)
-{
- return !current->scx.kf_mask;
-}
-
/**
* nldsq_next_task - Iterate to the next task in a non-local DSQ
* @dsq: user dsq being iterated
@@ -1402,15 +1554,15 @@ static void scx_task_iter_stop(struct scx_task_iter *iter)
* @iter: iterator to walk
*
* Visit the next task. See scx_task_iter_start() for details. Locks are dropped
- * and re-acquired every %SCX_OPS_TASK_ITER_BATCH iterations to avoid causing
- * stalls by holding scx_tasks_lock for too long.
+ * and re-acquired every %SCX_TASK_ITER_BATCH iterations to avoid causing stalls
+ * by holding scx_tasks_lock for too long.
*/
static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
{
struct list_head *cursor = &iter->cursor.tasks_node;
struct sched_ext_entity *pos;
- if (!(++iter->cnt % SCX_OPS_TASK_ITER_BATCH)) {
+ if (!(++iter->cnt % SCX_TASK_ITER_BATCH)) {
scx_task_iter_unlock(iter);
cond_resched();
scx_task_iter_relock(iter);
@@ -1481,91 +1633,29 @@ static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter)
return p;
}
-/*
- * Collection of event counters. Event types are placed in descending order.
- */
-struct scx_event_stats {
- /*
- * If ops.select_cpu() returns a CPU which can't be used by the task,
- * the core scheduler code silently picks a fallback CPU.
- */
- s64 SCX_EV_SELECT_CPU_FALLBACK;
-
- /*
- * When dispatching to a local DSQ, the CPU may have gone offline in
- * the meantime. In this case, the task is bounced to the global DSQ.
- */
- s64 SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE;
-
- /*
- * If SCX_OPS_ENQ_LAST is not set, the number of times that a task
- * continued to run because there were no other tasks on the CPU.
- */
- s64 SCX_EV_DISPATCH_KEEP_LAST;
-
- /*
- * If SCX_OPS_ENQ_EXITING is not set, the number of times that a task
- * is dispatched to a local DSQ when exiting.
- */
- s64 SCX_EV_ENQ_SKIP_EXITING;
-
- /*
- * If SCX_OPS_ENQ_MIGRATION_DISABLED is not set, the number of times a
- * migration disabled task skips ops.enqueue() and is dispatched to its
- * local DSQ.
- */
- s64 SCX_EV_ENQ_SKIP_MIGRATION_DISABLED;
-
- /*
- * The total number of tasks enqueued (or pick_task-ed) with a
- * default time slice (SCX_SLICE_DFL).
- */
- s64 SCX_EV_ENQ_SLICE_DFL;
-
- /*
- * The total duration of bypass modes in nanoseconds.
- */
- s64 SCX_EV_BYPASS_DURATION;
-
- /*
- * The number of tasks dispatched in the bypassing mode.
- */
- s64 SCX_EV_BYPASS_DISPATCH;
-
- /*
- * The number of times the bypassing mode has been activated.
- */
- s64 SCX_EV_BYPASS_ACTIVATE;
-};
-
-/*
- * The event counter is organized by a per-CPU variable to minimize the
- * accounting overhead without synchronization. A system-wide view on the
- * event counter is constructed when requested by scx_bpf_get_event_stat().
- */
-static DEFINE_PER_CPU(struct scx_event_stats, event_stats_cpu);
-
/**
* scx_add_event - Increase an event counter for 'name' by 'cnt'
+ * @sch: scx_sched to account events for
* @name: an event name defined in struct scx_event_stats
* @cnt: the number of the event occured
*
* This can be used when preemption is not disabled.
*/
-#define scx_add_event(name, cnt) do { \
- this_cpu_add(event_stats_cpu.name, cnt); \
- trace_sched_ext_event(#name, cnt); \
+#define scx_add_event(sch, name, cnt) do { \
+ this_cpu_add((sch)->event_stats_cpu->name, (cnt)); \
+ trace_sched_ext_event(#name, (cnt)); \
} while(0)
/**
* __scx_add_event - Increase an event counter for 'name' by 'cnt'
+ * @sch: scx_sched to account events for
* @name: an event name defined in struct scx_event_stats
* @cnt: the number of the event occured
*
* This should be used only when preemption is disabled.
*/
-#define __scx_add_event(name, cnt) do { \
- __this_cpu_add(event_stats_cpu.name, cnt); \
+#define __scx_add_event(sch, name, cnt) do { \
+ __this_cpu_add((sch)->event_stats_cpu->name, (cnt)); \
trace_sched_ext_event(#name, cnt); \
} while(0)
@@ -1590,25 +1680,25 @@ static DEFINE_PER_CPU(struct scx_event_stats, event_stats_cpu);
} while (0)
-static void scx_bpf_events(struct scx_event_stats *events, size_t events__sz);
+static void scx_read_events(struct scx_sched *sch,
+ struct scx_event_stats *events);
-static enum scx_ops_enable_state scx_ops_enable_state(void)
+static enum scx_enable_state scx_enable_state(void)
{
- return atomic_read(&scx_ops_enable_state_var);
+ return atomic_read(&scx_enable_state_var);
}
-static enum scx_ops_enable_state
-scx_ops_set_enable_state(enum scx_ops_enable_state to)
+static enum scx_enable_state scx_set_enable_state(enum scx_enable_state to)
{
- return atomic_xchg(&scx_ops_enable_state_var, to);
+ return atomic_xchg(&scx_enable_state_var, to);
}
-static bool scx_ops_tryset_enable_state(enum scx_ops_enable_state to,
- enum scx_ops_enable_state from)
+static bool scx_tryset_enable_state(enum scx_enable_state to,
+ enum scx_enable_state from)
{
int from_v = from;
- return atomic_try_cmpxchg(&scx_ops_enable_state_var, &from_v, to);
+ return atomic_try_cmpxchg(&scx_enable_state_var, &from_v, to);
}
static bool scx_rq_bypassing(struct rq *rq)
@@ -1633,8 +1723,14 @@ static void wait_ops_state(struct task_struct *p, unsigned long opss)
} while (atomic_long_read_acquire(&p->scx.ops_state) == opss);
}
+static inline bool __cpu_valid(s32 cpu)
+{
+ return likely(cpu >= 0 && cpu < nr_cpu_ids && cpu_possible(cpu));
+}
+
/**
- * ops_cpu_valid - Verify a cpu number
+ * ops_cpu_valid - Verify a cpu number, to be used on ops input args
+ * @sch: scx_sched to abort on error
* @cpu: cpu number which came from a BPF ops
* @where: extra information reported on error
*
@@ -1642,35 +1738,52 @@ static void wait_ops_state(struct task_struct *p, unsigned long opss)
* Verify that it is in range and one of the possible cpus. If invalid, trigger
* an ops error.
*/
-static bool ops_cpu_valid(s32 cpu, const char *where)
+static bool ops_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where)
{
- if (likely(cpu >= 0 && cpu < nr_cpu_ids && cpu_possible(cpu))) {
+ if (__cpu_valid(cpu)) {
return true;
} else {
- scx_ops_error("invalid CPU %d%s%s", cpu,
- where ? " " : "", where ?: "");
+ scx_error(sch, "invalid CPU %d%s%s", cpu, where ? " " : "", where ?: "");
+ return false;
+ }
+}
+
+/**
+ * kf_cpu_valid - Verify a CPU number, to be used on kfunc input args
+ * @cpu: cpu number which came from a BPF ops
+ * @where: extra information reported on error
+ *
+ * The same as ops_cpu_valid() but @sch is implicit.
+ */
+static bool kf_cpu_valid(u32 cpu, const char *where)
+{
+ if (__cpu_valid(cpu)) {
+ return true;
+ } else {
+ scx_kf_error("invalid CPU %d%s%s", cpu, where ? " " : "", where ?: "");
return false;
}
}
/**
* ops_sanitize_err - Sanitize a -errno value
+ * @sch: scx_sched to error out on error
* @ops_name: operation to blame on failure
* @err: -errno value to sanitize
*
- * Verify @err is a valid -errno. If not, trigger scx_ops_error() and return
+ * Verify @err is a valid -errno. If not, trigger scx_error() and return
* -%EPROTO. This is necessary because returning a rogue -errno up the chain can
* cause misbehaviors. For an example, a large negative return from
* ops.init_task() triggers an oops when passed up the call chain because the
* value fails IS_ERR() test after being encoded with ERR_PTR() and then is
* handled as a pointer.
*/
-static int ops_sanitize_err(const char *ops_name, s32 err)
+static int ops_sanitize_err(struct scx_sched *sch, const char *ops_name, s32 err)
{
if (err < 0 && err >= -MAX_ERRNO)
return err;
- scx_ops_error("ops.%s() returned an invalid errno %d", ops_name, err);
+ scx_error(sch, "ops.%s() returned an invalid errno %d", ops_name, err);
return -EPROTO;
}
@@ -1777,7 +1890,7 @@ static void touch_core_sched_dispatch(struct rq *rq, struct task_struct *p)
lockdep_assert_rq_held(rq);
#ifdef CONFIG_SCHED_CORE
- if (SCX_HAS_OP(core_sched_before))
+ if (unlikely(SCX_HAS_OP(scx_root, core_sched_before)))
touch_core_sched(rq, p);
#endif
}
@@ -1815,8 +1928,14 @@ static void dsq_mod_nr(struct scx_dispatch_q *dsq, s32 delta)
WRITE_ONCE(dsq->nr, dsq->nr + delta);
}
-static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p,
- u64 enq_flags)
+static void refill_task_slice_dfl(struct task_struct *p)
+{
+ p->scx.slice = SCX_SLICE_DFL;
+ __scx_add_event(scx_root, SCX_EV_REFILL_SLICE_DFL, 1);
+}
+
+static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq,
+ struct task_struct *p, u64 enq_flags)
{
bool is_local = dsq->id == SCX_DSQ_LOCAL;
@@ -1827,7 +1946,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p,
if (!is_local) {
raw_spin_lock(&dsq->lock);
if (unlikely(dsq->id == SCX_DSQ_INVALID)) {
- scx_ops_error("attempting to dispatch to a destroyed dsq");
+ scx_error(sch, "attempting to dispatch to a destroyed dsq");
/* fall back to the global dsq */
raw_spin_unlock(&dsq->lock);
dsq = find_global_dsq(p);
@@ -1844,7 +1963,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p,
* disallow any internal DSQ from doing vtime ordering of
* tasks.
*/
- scx_ops_error("cannot use vtime ordering for built-in DSQs");
+ scx_error(sch, "cannot use vtime ordering for built-in DSQs");
enq_flags &= ~SCX_ENQ_DSQ_PRIQ;
}
@@ -1858,8 +1977,8 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p,
*/
if (unlikely(RB_EMPTY_ROOT(&dsq->priq) &&
nldsq_next_task(dsq, NULL, false)))
- scx_ops_error("DSQ ID 0x%016llx already had FIFO-enqueued tasks",
- dsq->id);
+ scx_error(sch, "DSQ ID 0x%016llx already had FIFO-enqueued tasks",
+ dsq->id);
p->scx.dsq_flags |= SCX_TASK_DSQ_ON_PRIQ;
rb_add(&p->scx.dsq_priq, &dsq->priq, scx_dsq_priq_less);
@@ -1880,8 +1999,8 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p,
} else {
/* a FIFO DSQ shouldn't be using PRIQ enqueuing */
if (unlikely(!RB_EMPTY_ROOT(&dsq->priq)))
- scx_ops_error("DSQ ID 0x%016llx already had PRIQ-enqueued tasks",
- dsq->id);
+ scx_error(sch, "DSQ ID 0x%016llx already had PRIQ-enqueued tasks",
+ dsq->id);
if (enq_flags & (SCX_ENQ_HEAD | SCX_ENQ_PREEMPT))
list_add(&p->scx.dsq_list.node, &dsq->list);
@@ -1996,7 +2115,8 @@ static void dispatch_dequeue(struct rq *rq, struct task_struct *p)
raw_spin_unlock(&dsq->lock);
}
-static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id,
+static struct scx_dispatch_q *find_dsq_for_dispatch(struct scx_sched *sch,
+ struct rq *rq, u64 dsq_id,
struct task_struct *p)
{
struct scx_dispatch_q *dsq;
@@ -2007,7 +2127,7 @@ static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id,
if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
- if (!ops_cpu_valid(cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict"))
+ if (!ops_cpu_valid(sch, cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict"))
return find_global_dsq(p);
return &cpu_rq(cpu)->scx.local_dsq;
@@ -2016,11 +2136,11 @@ static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id,
if (dsq_id == SCX_DSQ_GLOBAL)
dsq = find_global_dsq(p);
else
- dsq = find_user_dsq(dsq_id);
+ dsq = find_user_dsq(sch, dsq_id);
if (unlikely(!dsq)) {
- scx_ops_error("non-existent DSQ 0x%llx for %s[%d]",
- dsq_id, p->comm, p->pid);
+ scx_error(sch, "non-existent DSQ 0x%llx for %s[%d]",
+ dsq_id, p->comm, p->pid);
return find_global_dsq(p);
}
@@ -2041,12 +2161,12 @@ static void mark_direct_dispatch(struct task_struct *ddsp_task,
/* @p must match the task on the enqueue path */
if (unlikely(p != ddsp_task)) {
if (IS_ERR(ddsp_task))
- scx_ops_error("%s[%d] already direct-dispatched",
- p->comm, p->pid);
+ scx_kf_error("%s[%d] already direct-dispatched",
+ p->comm, p->pid);
else
- scx_ops_error("scheduling for %s[%d] but trying to direct-dispatch %s[%d]",
- ddsp_task->comm, ddsp_task->pid,
- p->comm, p->pid);
+ scx_kf_error("scheduling for %s[%d] but trying to direct-dispatch %s[%d]",
+ ddsp_task->comm, ddsp_task->pid,
+ p->comm, p->pid);
return;
}
@@ -2057,11 +2177,12 @@ static void mark_direct_dispatch(struct task_struct *ddsp_task,
p->scx.ddsp_enq_flags = enq_flags;
}
-static void direct_dispatch(struct task_struct *p, u64 enq_flags)
+static void direct_dispatch(struct scx_sched *sch, struct task_struct *p,
+ u64 enq_flags)
{
struct rq *rq = task_rq(p);
struct scx_dispatch_q *dsq =
- find_dsq_for_dispatch(rq, p->scx.ddsp_dsq_id, p);
+ find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p);
touch_core_sched_dispatch(rq, p);
@@ -2102,7 +2223,8 @@ static void direct_dispatch(struct task_struct *p, u64 enq_flags)
return;
}
- dispatch_enqueue(dsq, p, p->scx.ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
+ dispatch_enqueue(sch, dsq, p,
+ p->scx.ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
}
static bool scx_rq_online(struct rq *rq)
@@ -2120,6 +2242,7 @@ static bool scx_rq_online(struct rq *rq)
static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
int sticky_cpu)
{
+ struct scx_sched *sch = scx_root;
struct task_struct **ddsp_taskp;
unsigned long qseq;
@@ -2138,7 +2261,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
goto local;
if (scx_rq_bypassing(rq)) {
- __scx_add_event(SCX_EV_BYPASS_DISPATCH, 1);
+ __scx_add_event(sch, SCX_EV_BYPASS_DISPATCH, 1);
goto global;
}
@@ -2146,20 +2269,20 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
goto direct;
/* see %SCX_OPS_ENQ_EXITING */
- if (!static_branch_unlikely(&scx_ops_enq_exiting) &&
+ if (!(sch->ops.flags & SCX_OPS_ENQ_EXITING) &&
unlikely(p->flags & PF_EXITING)) {
- __scx_add_event(SCX_EV_ENQ_SKIP_EXITING, 1);
+ __scx_add_event(sch, SCX_EV_ENQ_SKIP_EXITING, 1);
goto local;
}
/* see %SCX_OPS_ENQ_MIGRATION_DISABLED */
- if (!static_branch_unlikely(&scx_ops_enq_migration_disabled) &&
+ if (!(sch->ops.flags & SCX_OPS_ENQ_MIGRATION_DISABLED) &&
is_migration_disabled(p)) {
- __scx_add_event(SCX_EV_ENQ_SKIP_MIGRATION_DISABLED, 1);
+ __scx_add_event(sch, SCX_EV_ENQ_SKIP_MIGRATION_DISABLED, 1);
goto local;
}
- if (!SCX_HAS_OP(enqueue))
+ if (unlikely(!SCX_HAS_OP(sch, enqueue)))
goto global;
/* DSQ bypass didn't trigger, enqueue on the BPF scheduler */
@@ -2172,7 +2295,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
WARN_ON_ONCE(*ddsp_taskp);
*ddsp_taskp = p;
- SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
+ SCX_CALL_OP_TASK(sch, SCX_KF_ENQUEUE, enqueue, rq, p, enq_flags);
*ddsp_taskp = NULL;
if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
@@ -2186,7 +2309,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
return;
direct:
- direct_dispatch(p, enq_flags);
+ direct_dispatch(sch, p, enq_flags);
return;
local:
@@ -2196,17 +2319,15 @@ local:
* higher priority it becomes from scx_prio_less()'s POV.
*/
touch_core_sched(rq, p);
- p->scx.slice = SCX_SLICE_DFL;
- __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
+ refill_task_slice_dfl(p);
local_norefill:
- dispatch_enqueue(&rq->scx.local_dsq, p, enq_flags);
+ dispatch_enqueue(sch, &rq->scx.local_dsq, p, enq_flags);
return;
global:
touch_core_sched(rq, p); /* see the comment in local: */
- p->scx.slice = SCX_SLICE_DFL;
- __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
- dispatch_enqueue(find_global_dsq(p), p, enq_flags);
+ refill_task_slice_dfl(p);
+ dispatch_enqueue(sch, find_global_dsq(p), p, enq_flags);
}
static bool task_runnable(const struct task_struct *p)
@@ -2224,7 +2345,7 @@ static void set_task_runnable(struct rq *rq, struct task_struct *p)
}
/*
- * list_add_tail() must be used. scx_ops_bypass() depends on tasks being
+ * list_add_tail() must be used. scx_bypass() depends on tasks being
* appended to the runnable_list.
*/
list_add_tail(&p->scx.runnable_node, &rq->scx.runnable_list);
@@ -2239,6 +2360,7 @@ static void clr_task_runnable(struct task_struct *p, bool reset_runnable_at)
static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags)
{
+ struct scx_sched *sch = scx_root;
int sticky_cpu = p->scx.sticky_cpu;
if (enq_flags & ENQUEUE_WAKEUP)
@@ -2268,8 +2390,8 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags
rq->scx.nr_running++;
add_nr_running(rq, 1);
- if (SCX_HAS_OP(runnable) && !task_on_rq_migrating(p))
- SCX_CALL_OP_TASK(SCX_KF_REST, runnable, p, enq_flags);
+ if (SCX_HAS_OP(sch, runnable) && !task_on_rq_migrating(p))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, runnable, rq, p, enq_flags);
if (enq_flags & SCX_ENQ_WAKEUP)
touch_core_sched(rq, p);
@@ -2280,11 +2402,12 @@ out:
if ((enq_flags & SCX_ENQ_CPU_SELECTED) &&
unlikely(cpu_of(rq) != p->scx.selected_cpu))
- __scx_add_event(SCX_EV_SELECT_CPU_FALLBACK, 1);
+ __scx_add_event(sch, SCX_EV_SELECT_CPU_FALLBACK, 1);
}
-static void ops_dequeue(struct task_struct *p, u64 deq_flags)
+static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
{
+ struct scx_sched *sch = scx_root;
unsigned long opss;
/* dequeue is always temporary, don't reset runnable_at */
@@ -2303,8 +2426,9 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags)
*/
BUG();
case SCX_OPSS_QUEUED:
- if (SCX_HAS_OP(dequeue))
- SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, p, deq_flags);
+ if (SCX_HAS_OP(sch, dequeue))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, dequeue, rq,
+ p, deq_flags);
if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss,
SCX_OPSS_NONE))
@@ -2332,12 +2456,14 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags)
static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags)
{
+ struct scx_sched *sch = scx_root;
+
if (!(p->scx.flags & SCX_TASK_QUEUED)) {
WARN_ON_ONCE(task_runnable(p));
return true;
}
- ops_dequeue(p, deq_flags);
+ ops_dequeue(rq, p, deq_flags);
/*
* A currently running task which is going off @rq first gets dequeued
@@ -2351,13 +2477,13 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
* information meaningful to the BPF scheduler and can be suppressed by
* skipping the callbacks if the task is !QUEUED.
*/
- if (SCX_HAS_OP(stopping) && task_current(rq, p)) {
+ if (SCX_HAS_OP(sch, stopping) && task_current(rq, p)) {
update_curr_scx(rq);
- SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, false);
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, stopping, rq, p, false);
}
- if (SCX_HAS_OP(quiescent) && !task_on_rq_migrating(p))
- SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, p, deq_flags);
+ if (SCX_HAS_OP(sch, quiescent) && !task_on_rq_migrating(p))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, quiescent, rq, p, deq_flags);
if (deq_flags & SCX_DEQ_SLEEP)
p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP;
@@ -2374,20 +2500,23 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
static void yield_task_scx(struct rq *rq)
{
+ struct scx_sched *sch = scx_root;
struct task_struct *p = rq->curr;
- if (SCX_HAS_OP(yield))
- SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, p, NULL);
+ if (SCX_HAS_OP(sch, yield))
+ SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq, p, NULL);
else
p->scx.slice = 0;
}
static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
{
+ struct scx_sched *sch = scx_root;
struct task_struct *from = rq->curr;
- if (SCX_HAS_OP(yield))
- return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, from, to);
+ if (SCX_HAS_OP(sch, yield))
+ return SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq,
+ from, to);
else
return false;
}
@@ -2467,7 +2596,8 @@ static void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
*
* The caller must ensure that @p and @rq are on different CPUs.
*/
-static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq,
+static bool task_can_run_on_remote_rq(struct scx_sched *sch,
+ struct task_struct *p, struct rq *rq,
bool enforce)
{
int cpu = cpu_of(rq);
@@ -2488,8 +2618,8 @@ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq,
*/
if (unlikely(is_migration_disabled(p))) {
if (enforce)
- scx_ops_error("SCX_DSQ_LOCAL[_ON] cannot move migration disabled %s[%d] from CPU %d to %d",
- p->comm, p->pid, task_cpu(p), cpu);
+ scx_error(sch, "SCX_DSQ_LOCAL[_ON] cannot move migration disabled %s[%d] from CPU %d to %d",
+ p->comm, p->pid, task_cpu(p), cpu);
return false;
}
@@ -2501,14 +2631,15 @@ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq,
*/
if (!task_allowed_on_cpu(p, cpu)) {
if (enforce)
- scx_ops_error("SCX_DSQ_LOCAL[_ON] target CPU %d not allowed for %s[%d]",
- cpu, p->comm, p->pid);
+ scx_error(sch, "SCX_DSQ_LOCAL[_ON] target CPU %d not allowed for %s[%d]",
+ cpu, p->comm, p->pid);
return false;
}
if (!scx_rq_online(rq)) {
if (enforce)
- __scx_add_event(SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE, 1);
+ __scx_add_event(scx_root,
+ SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE, 1);
return false;
}
@@ -2580,12 +2711,13 @@ static bool consume_remote_task(struct rq *this_rq, struct task_struct *p,
}
#else /* CONFIG_SMP */
static inline void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags, struct rq *src_rq, struct rq *dst_rq) { WARN_ON_ONCE(1); }
-static inline bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, bool enforce) { return false; }
+static inline bool task_can_run_on_remote_rq(struct scx_sched *sch, struct task_struct *p, struct rq *rq, bool enforce) { return false; }
static inline bool consume_remote_task(struct rq *this_rq, struct task_struct *p, struct scx_dispatch_q *dsq, struct rq *task_rq) { return false; }
#endif /* CONFIG_SMP */
/**
* move_task_between_dsqs() - Move a task from one DSQ to another
+ * @sch: scx_sched being operated on
* @p: target task
* @enq_flags: %SCX_ENQ_*
* @src_dsq: DSQ @p is currently on, must not be a local DSQ
@@ -2599,7 +2731,8 @@ static inline bool consume_remote_task(struct rq *this_rq, struct task_struct *p
* On return, @src_dsq is unlocked and only @p's new task_rq, which is the
* return value, is locked.
*/
-static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags,
+static struct rq *move_task_between_dsqs(struct scx_sched *sch,
+ struct task_struct *p, u64 enq_flags,
struct scx_dispatch_q *src_dsq,
struct scx_dispatch_q *dst_dsq)
{
@@ -2612,7 +2745,7 @@ static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags,
if (dst_dsq->id == SCX_DSQ_LOCAL) {
dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq);
if (src_rq != dst_rq &&
- unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) {
+ unlikely(!task_can_run_on_remote_rq(sch, p, dst_rq, true))) {
dst_dsq = find_global_dsq(p);
dst_rq = src_rq;
}
@@ -2646,7 +2779,7 @@ static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags,
p->scx.dsq = NULL;
raw_spin_unlock(&src_dsq->lock);
- dispatch_enqueue(dst_dsq, p, enq_flags);
+ dispatch_enqueue(sch, dst_dsq, p, enq_flags);
}
return dst_rq;
@@ -2658,13 +2791,13 @@ static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags,
* to the bypass mode can take a long time. Inject artificial delays while the
* bypass mode is switching to guarantee timely completion.
*/
-static void scx_ops_breather(struct rq *rq)
+static void scx_breather(struct rq *rq)
{
u64 until;
lockdep_assert_rq_held(rq);
- if (likely(!atomic_read(&scx_ops_breather_depth)))
+ if (likely(!atomic_read(&scx_breather_depth)))
return;
raw_spin_rq_unlock(rq);
@@ -2673,25 +2806,26 @@ static void scx_ops_breather(struct rq *rq)
do {
int cnt = 1024;
- while (atomic_read(&scx_ops_breather_depth) && --cnt)
+ while (atomic_read(&scx_breather_depth) && --cnt)
cpu_relax();
- } while (atomic_read(&scx_ops_breather_depth) &&
+ } while (atomic_read(&scx_breather_depth) &&
time_before64(ktime_get_ns(), until));
raw_spin_rq_lock(rq);
}
-static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq)
+static bool consume_dispatch_q(struct scx_sched *sch, struct rq *rq,
+ struct scx_dispatch_q *dsq)
{
struct task_struct *p;
retry:
/*
- * This retry loop can repeatedly race against scx_ops_bypass()
- * dequeueing tasks from @dsq trying to put the system into the bypass
- * mode. On some multi-socket machines (e.g. 2x Intel 8480c), this can
- * live-lock the machine into soft lockups. Give a breather.
+ * This retry loop can repeatedly race against scx_bypass() dequeueing
+ * tasks from @dsq trying to put the system into the bypass mode. On
+ * some multi-socket machines (e.g. 2x Intel 8480c), this can live-lock
+ * the machine into soft lockups. Give a breather.
*/
- scx_ops_breather(rq);
+ scx_breather(rq);
/*
* The caller can't expect to successfully consume a task if the task's
@@ -2713,7 +2847,7 @@ retry:
return true;
}
- if (task_can_run_on_remote_rq(p, rq, false)) {
+ if (task_can_run_on_remote_rq(sch, p, rq, false)) {
if (likely(consume_remote_task(rq, p, dsq, task_rq)))
return true;
goto retry;
@@ -2724,15 +2858,16 @@ retry:
return false;
}
-static bool consume_global_dsq(struct rq *rq)
+static bool consume_global_dsq(struct scx_sched *sch, struct rq *rq)
{
int node = cpu_to_node(cpu_of(rq));
- return consume_dispatch_q(rq, global_dsqs[node]);
+ return consume_dispatch_q(sch, rq, sch->global_dsqs[node]);
}
/**
* dispatch_to_local_dsq - Dispatch a task to a local dsq
+ * @sch: scx_sched being operated on
* @rq: current rq which is locked
* @dst_dsq: destination DSQ
* @p: task to dispatch
@@ -2745,7 +2880,8 @@ static bool consume_global_dsq(struct rq *rq)
* The caller must have exclusive ownership of @p (e.g. through
* %SCX_OPSS_DISPATCHING).
*/
-static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
+static void dispatch_to_local_dsq(struct scx_sched *sch, struct rq *rq,
+ struct scx_dispatch_q *dst_dsq,
struct task_struct *p, u64 enq_flags)
{
struct rq *src_rq = task_rq(p);
@@ -2761,14 +2897,15 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
* If dispatching to @rq that @p is already on, no lock dancing needed.
*/
if (rq == src_rq && rq == dst_rq) {
- dispatch_enqueue(dst_dsq, p, enq_flags | SCX_ENQ_CLEAR_OPSS);
+ dispatch_enqueue(sch, dst_dsq, p,
+ enq_flags | SCX_ENQ_CLEAR_OPSS);
return;
}
#ifdef CONFIG_SMP
if (src_rq != dst_rq &&
- unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) {
- dispatch_enqueue(find_global_dsq(p), p,
+ unlikely(!task_can_run_on_remote_rq(sch, p, dst_rq, true))) {
+ dispatch_enqueue(sch, find_global_dsq(p), p,
enq_flags | SCX_ENQ_CLEAR_OPSS);
return;
}
@@ -2806,7 +2943,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
*/
if (src_rq == dst_rq) {
p->scx.holding_cpu = -1;
- dispatch_enqueue(&dst_rq->scx.local_dsq, p, enq_flags);
+ dispatch_enqueue(sch, &dst_rq->scx.local_dsq, p,
+ enq_flags);
} else {
move_remote_task_to_local_dsq(p, enq_flags,
src_rq, dst_rq);
@@ -2848,7 +2986,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
* was valid in the first place. Make sure that the task is still owned by the
* BPF scheduler and claim the ownership before dispatching.
*/
-static void finish_dispatch(struct rq *rq, struct task_struct *p,
+static void finish_dispatch(struct scx_sched *sch, struct rq *rq,
+ struct task_struct *p,
unsigned long qseq_at_dispatch,
u64 dsq_id, u64 enq_flags)
{
@@ -2901,15 +3040,15 @@ retry:
BUG_ON(!(p->scx.flags & SCX_TASK_QUEUED));
- dsq = find_dsq_for_dispatch(this_rq(), dsq_id, p);
+ dsq = find_dsq_for_dispatch(sch, this_rq(), dsq_id, p);
if (dsq->id == SCX_DSQ_LOCAL)
- dispatch_to_local_dsq(rq, dsq, p, enq_flags);
+ dispatch_to_local_dsq(sch, rq, dsq, p, enq_flags);
else
- dispatch_enqueue(dsq, p, enq_flags | SCX_ENQ_CLEAR_OPSS);
+ dispatch_enqueue(sch, dsq, p, enq_flags | SCX_ENQ_CLEAR_OPSS);
}
-static void flush_dispatch_buf(struct rq *rq)
+static void flush_dispatch_buf(struct scx_sched *sch, struct rq *rq)
{
struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
u32 u;
@@ -2917,7 +3056,7 @@ static void flush_dispatch_buf(struct rq *rq)
for (u = 0; u < dspc->cursor; u++) {
struct scx_dsp_buf_ent *ent = &dspc->buf[u];
- finish_dispatch(rq, ent->task, ent->qseq, ent->dsq_id,
+ finish_dispatch(sch, rq, ent->task, ent->qseq, ent->dsq_id,
ent->enq_flags);
}
@@ -2927,6 +3066,7 @@ static void flush_dispatch_buf(struct rq *rq)
static int balance_one(struct rq *rq, struct task_struct *prev)
{
+ struct scx_sched *sch = scx_root;
struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
bool prev_on_scx = prev->sched_class == &ext_sched_class;
bool prev_on_rq = prev->scx.flags & SCX_TASK_QUEUED;
@@ -2936,7 +3076,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
rq->scx.flags |= SCX_RQ_IN_BALANCE;
rq->scx.flags &= ~(SCX_RQ_BAL_PENDING | SCX_RQ_BAL_KEEP);
- if (static_branch_unlikely(&scx_ops_cpu_preempt) &&
+ if ((sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT) &&
unlikely(rq->scx.cpu_released)) {
/*
* If the previous sched_class for the current CPU was not SCX,
@@ -2944,8 +3084,9 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
* core. This callback complements ->cpu_release(), which is
* emitted in switch_class().
*/
- if (SCX_HAS_OP(cpu_acquire))
- SCX_CALL_OP(SCX_KF_REST, cpu_acquire, cpu_of(rq), NULL);
+ if (SCX_HAS_OP(sch, cpu_acquire))
+ SCX_CALL_OP(sch, SCX_KF_REST, cpu_acquire, rq,
+ cpu_of(rq), NULL);
rq->scx.cpu_released = false;
}
@@ -2959,8 +3100,8 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
* scheduler wants to handle this explicitly, it should
* implement ->cpu_release().
*
- * See scx_ops_disable_workfn() for the explanation on the
- * bypassing test.
+ * See scx_disable_workfn() for the explanation on the bypassing
+ * test.
*/
if (prev_on_rq && prev->scx.slice && !scx_rq_bypassing(rq)) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
@@ -2972,10 +3113,11 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
if (rq->scx.local_dsq.nr)
goto has_tasks;
- if (consume_global_dsq(rq))
+ if (consume_global_dsq(sch, rq))
goto has_tasks;
- if (!SCX_HAS_OP(dispatch) || scx_rq_bypassing(rq) || !scx_rq_online(rq))
+ if (unlikely(!SCX_HAS_OP(sch, dispatch)) ||
+ scx_rq_bypassing(rq) || !scx_rq_online(rq))
goto no_tasks;
dspc->rq = rq;
@@ -2990,10 +3132,10 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
do {
dspc->nr_tasks = 0;
- SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, cpu_of(rq),
- prev_on_scx ? prev : NULL);
+ SCX_CALL_OP(sch, SCX_KF_DISPATCH, dispatch, rq,
+ cpu_of(rq), prev_on_scx ? prev : NULL);
- flush_dispatch_buf(rq);
+ flush_dispatch_buf(sch, rq);
if (prev_on_rq && prev->scx.slice) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
@@ -3001,7 +3143,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
}
if (rq->scx.local_dsq.nr)
goto has_tasks;
- if (consume_global_dsq(rq))
+ if (consume_global_dsq(sch, rq))
goto has_tasks;
/*
@@ -3024,10 +3166,10 @@ no_tasks:
* Didn't find another task to run. Keep running @prev unless
* %SCX_OPS_ENQ_LAST is in effect.
*/
- if (prev_on_rq && (!static_branch_unlikely(&scx_ops_enq_last) ||
- scx_rq_bypassing(rq))) {
+ if (prev_on_rq &&
+ (!(sch->ops.flags & SCX_OPS_ENQ_LAST) || scx_rq_bypassing(rq))) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
- __scx_add_event(SCX_EV_DISPATCH_KEEP_LAST, 1);
+ __scx_add_event(sch, SCX_EV_DISPATCH_KEEP_LAST, 1);
goto has_tasks;
}
rq->scx.flags &= ~SCX_RQ_IN_BALANCE;
@@ -3087,32 +3229,36 @@ static void process_ddsp_deferred_locals(struct rq *rq)
*/
while ((p = list_first_entry_or_null(&rq->scx.ddsp_deferred_locals,
struct task_struct, scx.dsq_list.node))) {
+ struct scx_sched *sch = scx_root;
struct scx_dispatch_q *dsq;
list_del_init(&p->scx.dsq_list.node);
- dsq = find_dsq_for_dispatch(rq, p->scx.ddsp_dsq_id, p);
+ dsq = find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p);
if (!WARN_ON_ONCE(dsq->id != SCX_DSQ_LOCAL))
- dispatch_to_local_dsq(rq, dsq, p, p->scx.ddsp_enq_flags);
+ dispatch_to_local_dsq(sch, rq, dsq, p,
+ p->scx.ddsp_enq_flags);
}
}
static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
{
+ struct scx_sched *sch = scx_root;
+
if (p->scx.flags & SCX_TASK_QUEUED) {
/*
* Core-sched might decide to execute @p before it is
* dispatched. Call ops_dequeue() to notify the BPF scheduler.
*/
- ops_dequeue(p, SCX_DEQ_CORE_SCHED_EXEC);
+ ops_dequeue(rq, p, SCX_DEQ_CORE_SCHED_EXEC);
dispatch_dequeue(rq, p);
}
p->se.exec_start = rq_clock_task(rq);
/* see dequeue_task_scx() on why we skip when !QUEUED */
- if (SCX_HAS_OP(running) && (p->scx.flags & SCX_TASK_QUEUED))
- SCX_CALL_OP_TASK(SCX_KF_REST, running, p);
+ if (SCX_HAS_OP(sch, running) && (p->scx.flags & SCX_TASK_QUEUED))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, running, rq, p);
clr_task_runnable(p, true);
@@ -3155,6 +3301,7 @@ preempt_reason_from_class(const struct sched_class *class)
static void switch_class(struct rq *rq, struct task_struct *next)
{
+ struct scx_sched *sch = scx_root;
const struct sched_class *next_class = next->sched_class;
#ifdef CONFIG_SMP
@@ -3165,7 +3312,7 @@ static void switch_class(struct rq *rq, struct task_struct *next)
*/
smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
#endif
- if (!static_branch_unlikely(&scx_ops_cpu_preempt))
+ if (!(sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT))
return;
/*
@@ -3187,14 +3334,14 @@ static void switch_class(struct rq *rq, struct task_struct *next)
* next time that balance_scx() is invoked.
*/
if (!rq->scx.cpu_released) {
- if (SCX_HAS_OP(cpu_release)) {
+ if (SCX_HAS_OP(sch, cpu_release)) {
struct scx_cpu_release_args args = {
.reason = preempt_reason_from_class(next_class),
.task = next,
};
- SCX_CALL_OP(SCX_KF_CPU_RELEASE,
- cpu_release, cpu_of(rq), &args);
+ SCX_CALL_OP(sch, SCX_KF_CPU_RELEASE, cpu_release, rq,
+ cpu_of(rq), &args);
}
rq->scx.cpu_released = true;
}
@@ -3203,11 +3350,12 @@ static void switch_class(struct rq *rq, struct task_struct *next)
static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
struct task_struct *next)
{
+ struct scx_sched *sch = scx_root;
update_curr_scx(rq);
/* see dequeue_task_scx() on why we skip when !QUEUED */
- if (SCX_HAS_OP(stopping) && (p->scx.flags & SCX_TASK_QUEUED))
- SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, true);
+ if (SCX_HAS_OP(sch, stopping) && (p->scx.flags & SCX_TASK_QUEUED))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, stopping, rq, p, true);
if (p->scx.flags & SCX_TASK_QUEUED) {
set_task_runnable(rq, p);
@@ -3219,7 +3367,8 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
* DSQ.
*/
if (p->scx.slice && !scx_rq_bypassing(rq)) {
- dispatch_enqueue(&rq->scx.local_dsq, p, SCX_ENQ_HEAD);
+ dispatch_enqueue(sch, &rq->scx.local_dsq, p,
+ SCX_ENQ_HEAD);
goto switch_class;
}
@@ -3230,7 +3379,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
* which should trigger an explicit follow-up scheduling event.
*/
if (sched_class_above(&ext_sched_class, next->sched_class)) {
- WARN_ON_ONCE(!static_branch_unlikely(&scx_ops_enq_last));
+ WARN_ON_ONCE(!(sch->ops.flags & SCX_OPS_ENQ_LAST));
do_enqueue_task(rq, p, SCX_ENQ_LAST, -1);
} else {
do_enqueue_task(rq, p, 0, -1);
@@ -3283,7 +3432,7 @@ static struct task_struct *pick_task_scx(struct rq *rq)
* Can happen while enabling as SCX_RQ_BAL_PENDING assertion is
* conditional on scx_enabled() and may have been skipped.
*/
- WARN_ON_ONCE(scx_ops_enable_state() == SCX_OPS_ENABLED);
+ WARN_ON_ONCE(scx_enable_state() == SCX_ENABLED);
keep_prev = false;
}
@@ -3294,10 +3443,8 @@ static struct task_struct *pick_task_scx(struct rq *rq)
*/
if (keep_prev) {
p = prev;
- if (!p->scx.slice) {
- p->scx.slice = SCX_SLICE_DFL;
- __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
- }
+ if (!p->scx.slice)
+ refill_task_slice_dfl(p);
} else {
p = first_local_task(rq);
if (!p) {
@@ -3307,13 +3454,14 @@ static struct task_struct *pick_task_scx(struct rq *rq)
}
if (unlikely(!p->scx.slice)) {
- if (!scx_rq_bypassing(rq) && !scx_warned_zero_slice) {
+ struct scx_sched *sch = scx_root;
+
+ if (!scx_rq_bypassing(rq) && !sch->warned_zero_slice) {
printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in %s()\n",
p->comm, p->pid, __func__);
- scx_warned_zero_slice = true;
+ sch->warned_zero_slice = true;
}
- p->scx.slice = SCX_SLICE_DFL;
- __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
+ refill_task_slice_dfl(p);
}
}
@@ -3342,13 +3490,17 @@ static struct task_struct *pick_task_scx(struct rq *rq)
bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
bool in_fi)
{
+ struct scx_sched *sch = scx_root;
+
/*
* The const qualifiers are dropped from task_struct pointers when
* calling ops.core_sched_before(). Accesses are controlled by the
* verifier.
*/
- if (SCX_HAS_OP(core_sched_before) && !scx_rq_bypassing(task_rq(a)))
- return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before,
+ if (SCX_HAS_OP(sch, core_sched_before) &&
+ !scx_rq_bypassing(task_rq(a)))
+ return SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, core_sched_before,
+ NULL,
(struct task_struct *)a,
(struct task_struct *)b);
else
@@ -3360,6 +3512,7 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flags)
{
+ struct scx_sched *sch = scx_root;
bool rq_bypass;
/*
@@ -3376,7 +3529,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
return prev_cpu;
rq_bypass = scx_rq_bypassing(task_rq(p));
- if (SCX_HAS_OP(select_cpu) && !rq_bypass) {
+ if (likely(SCX_HAS_OP(sch, select_cpu)) && !rq_bypass) {
s32 cpu;
struct task_struct **ddsp_taskp;
@@ -3384,29 +3537,30 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
WARN_ON_ONCE(*ddsp_taskp);
*ddsp_taskp = p;
- cpu = SCX_CALL_OP_TASK_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
- select_cpu, p, prev_cpu, wake_flags);
+ cpu = SCX_CALL_OP_TASK_RET(sch,
+ SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
+ select_cpu, NULL, p, prev_cpu,
+ wake_flags);
p->scx.selected_cpu = cpu;
*ddsp_taskp = NULL;
- if (ops_cpu_valid(cpu, "from ops.select_cpu()"))
+ if (ops_cpu_valid(sch, cpu, "from ops.select_cpu()"))
return cpu;
else
return prev_cpu;
} else {
s32 cpu;
- cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0);
+ cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, NULL, 0);
if (cpu >= 0) {
- p->scx.slice = SCX_SLICE_DFL;
+ refill_task_slice_dfl(p);
p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL;
- __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
} else {
cpu = prev_cpu;
}
p->scx.selected_cpu = cpu;
if (rq_bypass)
- __scx_add_event(SCX_EV_BYPASS_DISPATCH, 1);
+ __scx_add_event(sch, SCX_EV_BYPASS_DISPATCH, 1);
return cpu;
}
}
@@ -3419,6 +3573,8 @@ static void task_woken_scx(struct rq *rq, struct task_struct *p)
static void set_cpus_allowed_scx(struct task_struct *p,
struct affinity_context *ac)
{
+ struct scx_sched *sch = scx_root;
+
set_cpus_allowed_common(p, ac);
/*
@@ -3429,28 +3585,38 @@ static void set_cpus_allowed_scx(struct task_struct *p,
* Fine-grained memory write control is enforced by BPF making the const
* designation pointless. Cast it away when calling the operation.
*/
- if (SCX_HAS_OP(set_cpumask))
- SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
- (struct cpumask *)p->cpus_ptr);
+ if (SCX_HAS_OP(sch, set_cpumask))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, set_cpumask, NULL,
+ p, (struct cpumask *)p->cpus_ptr);
}
static void handle_hotplug(struct rq *rq, bool online)
{
+ struct scx_sched *sch = scx_root;
int cpu = cpu_of(rq);
atomic_long_inc(&scx_hotplug_seq);
+ /*
+ * scx_root updates are protected by cpus_read_lock() and will stay
+ * stable here. Note that we can't depend on scx_enabled() test as the
+ * hotplug ops need to be enabled before __scx_enabled is set.
+ */
+ if (unlikely(!sch))
+ return;
+
if (scx_enabled())
- scx_idle_update_selcpu_topology(&scx_ops);
+ scx_idle_update_selcpu_topology(&sch->ops);
- if (online && SCX_HAS_OP(cpu_online))
- SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, cpu);
- else if (!online && SCX_HAS_OP(cpu_offline))
- SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, cpu);
+ if (online && SCX_HAS_OP(sch, cpu_online))
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cpu_online, NULL, cpu);
+ else if (!online && SCX_HAS_OP(sch, cpu_offline))
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cpu_offline, NULL, cpu);
else
- scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
- "cpu %d going %s, exiting scheduler", cpu,
- online ? "online" : "offline");
+ scx_exit(sch, SCX_EXIT_UNREG_KERN,
+ SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
+ "cpu %d going %s, exiting scheduler", cpu,
+ online ? "online" : "offline");
}
void scx_rq_activate(struct rq *rq)
@@ -3477,11 +3643,16 @@ static void rq_offline_scx(struct rq *rq)
static bool check_rq_for_timeouts(struct rq *rq)
{
+ struct scx_sched *sch;
struct task_struct *p;
struct rq_flags rf;
bool timed_out = false;
rq_lock_irqsave(rq, &rf);
+ sch = rcu_dereference_bh(scx_root);
+ if (unlikely(!sch))
+ goto out_unlock;
+
list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node) {
unsigned long last_runnable = p->scx.runnable_at;
@@ -3489,16 +3660,15 @@ static bool check_rq_for_timeouts(struct rq *rq)
last_runnable + scx_watchdog_timeout))) {
u32 dur_ms = jiffies_to_msecs(jiffies - last_runnable);
- scx_ops_error_kind(SCX_EXIT_ERROR_STALL,
- "%s[%d] failed to run for %u.%03us",
- p->comm, p->pid,
- dur_ms / 1000, dur_ms % 1000);
+ scx_exit(sch, SCX_EXIT_ERROR_STALL, 0,
+ "%s[%d] failed to run for %u.%03us",
+ p->comm, p->pid, dur_ms / 1000, dur_ms % 1000);
timed_out = true;
break;
}
}
+out_unlock:
rq_unlock_irqrestore(rq, &rf);
-
return timed_out;
}
@@ -3520,19 +3690,24 @@ static void scx_watchdog_workfn(struct work_struct *work)
void scx_tick(struct rq *rq)
{
+ struct scx_sched *sch;
unsigned long last_check;
if (!scx_enabled())
return;
+ sch = rcu_dereference_bh(scx_root);
+ if (unlikely(!sch))
+ return;
+
last_check = READ_ONCE(scx_watchdog_timestamp);
if (unlikely(time_after(jiffies,
last_check + READ_ONCE(scx_watchdog_timeout)))) {
u32 dur_ms = jiffies_to_msecs(jiffies - last_check);
- scx_ops_error_kind(SCX_EXIT_ERROR_STALL,
- "watchdog failed to check in for %u.%03us",
- dur_ms / 1000, dur_ms % 1000);
+ scx_exit(sch, SCX_EXIT_ERROR_STALL, 0,
+ "watchdog failed to check in for %u.%03us",
+ dur_ms / 1000, dur_ms % 1000);
}
update_other_load_avgs(rq);
@@ -3540,6 +3715,8 @@ void scx_tick(struct rq *rq)
static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued)
{
+ struct scx_sched *sch = scx_root;
+
update_curr_scx(rq);
/*
@@ -3549,8 +3726,8 @@ static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued)
if (scx_rq_bypassing(rq)) {
curr->scx.slice = 0;
touch_core_sched(rq, curr);
- } else if (SCX_HAS_OP(tick)) {
- SCX_CALL_OP_TASK(SCX_KF_REST, tick, curr);
+ } else if (SCX_HAS_OP(sch, tick)) {
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, tick, rq, curr);
}
if (!curr->scx.slice)
@@ -3615,21 +3792,23 @@ static void scx_set_task_state(struct task_struct *p, enum scx_task_state state)
p->scx.flags |= state << SCX_TASK_STATE_SHIFT;
}
-static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool fork)
+static int scx_init_task(struct task_struct *p, struct task_group *tg, bool fork)
{
+ struct scx_sched *sch = scx_root;
int ret;
p->scx.disallow = false;
- if (SCX_HAS_OP(init_task)) {
+ if (SCX_HAS_OP(sch, init_task)) {
struct scx_init_task_args args = {
SCX_INIT_TASK_ARGS_CGROUP(tg)
.fork = fork,
};
- ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, p, &args);
+ ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, init_task, NULL,
+ p, &args);
if (unlikely(ret)) {
- ret = ops_sanitize_err("init_task", ret);
+ ret = ops_sanitize_err(sch, "init_task", ret);
return ret;
}
}
@@ -3657,8 +3836,8 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool
task_rq_unlock(rq, p, &rf);
} else if (p->policy == SCHED_EXT) {
- scx_ops_error("ops.init_task() set task->scx.disallow for %s[%d] during fork",
- p->comm, p->pid);
+ scx_error(sch, "ops.init_task() set task->scx.disallow for %s[%d] during fork",
+ p->comm, p->pid);
}
}
@@ -3666,11 +3845,13 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool
return 0;
}
-static void scx_ops_enable_task(struct task_struct *p)
+static void scx_enable_task(struct task_struct *p)
{
+ struct scx_sched *sch = scx_root;
+ struct rq *rq = task_rq(p);
u32 weight;
- lockdep_assert_rq_held(task_rq(p));
+ lockdep_assert_rq_held(rq);
/*
* Set the weight before calling ops.enable() so that the scheduler
@@ -3683,26 +3864,31 @@ static void scx_ops_enable_task(struct task_struct *p)
p->scx.weight = sched_weight_to_cgroup(weight);
- if (SCX_HAS_OP(enable))
- SCX_CALL_OP_TASK(SCX_KF_REST, enable, p);
+ if (SCX_HAS_OP(sch, enable))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, enable, rq, p);
scx_set_task_state(p, SCX_TASK_ENABLED);
- if (SCX_HAS_OP(set_weight))
- SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight);
+ if (SCX_HAS_OP(sch, set_weight))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, set_weight, rq,
+ p, p->scx.weight);
}
-static void scx_ops_disable_task(struct task_struct *p)
+static void scx_disable_task(struct task_struct *p)
{
- lockdep_assert_rq_held(task_rq(p));
+ struct scx_sched *sch = scx_root;
+ struct rq *rq = task_rq(p);
+
+ lockdep_assert_rq_held(rq);
WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED);
- if (SCX_HAS_OP(disable))
- SCX_CALL_OP_TASK(SCX_KF_REST, disable, p);
+ if (SCX_HAS_OP(sch, disable))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, disable, rq, p);
scx_set_task_state(p, SCX_TASK_READY);
}
-static void scx_ops_exit_task(struct task_struct *p)
+static void scx_exit_task(struct task_struct *p)
{
+ struct scx_sched *sch = scx_root;
struct scx_exit_task_args args = {
.cancelled = false,
};
@@ -3718,15 +3904,16 @@ static void scx_ops_exit_task(struct task_struct *p)
case SCX_TASK_READY:
break;
case SCX_TASK_ENABLED:
- scx_ops_disable_task(p);
+ scx_disable_task(p);
break;
default:
WARN_ON_ONCE(true);
return;
}
- if (SCX_HAS_OP(exit_task))
- SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, p, &args);
+ if (SCX_HAS_OP(sch, exit_task))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, exit_task, task_rq(p),
+ p, &args);
scx_set_task_state(p, SCX_TASK_NONE);
}
@@ -3758,15 +3945,15 @@ int scx_fork(struct task_struct *p)
{
percpu_rwsem_assert_held(&scx_fork_rwsem);
- if (scx_ops_init_task_enabled)
- return scx_ops_init_task(p, task_group(p), true);
+ if (scx_init_task_enabled)
+ return scx_init_task(p, task_group(p), true);
else
return 0;
}
void scx_post_fork(struct task_struct *p)
{
- if (scx_ops_init_task_enabled) {
+ if (scx_init_task_enabled) {
scx_set_task_state(p, SCX_TASK_READY);
/*
@@ -3779,7 +3966,7 @@ void scx_post_fork(struct task_struct *p)
struct rq *rq;
rq = task_rq_lock(p, &rf);
- scx_ops_enable_task(p);
+ scx_enable_task(p);
task_rq_unlock(rq, p, &rf);
}
}
@@ -3799,7 +3986,7 @@ void scx_cancel_fork(struct task_struct *p)
rq = task_rq_lock(p, &rf);
WARN_ON_ONCE(scx_get_task_state(p) >= SCX_TASK_READY);
- scx_ops_exit_task(p);
+ scx_exit_task(p);
task_rq_unlock(rq, p, &rf);
}
@@ -3815,15 +4002,15 @@ void sched_ext_free(struct task_struct *p)
spin_unlock_irqrestore(&scx_tasks_lock, flags);
/*
- * @p is off scx_tasks and wholly ours. scx_ops_enable()'s READY ->
- * ENABLED transitions can't race us. Disable ops for @p.
+ * @p is off scx_tasks and wholly ours. scx_enable()'s READY -> ENABLED
+ * transitions can't race us. Disable ops for @p.
*/
if (scx_get_task_state(p) != SCX_TASK_NONE) {
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(p, &rf);
- scx_ops_exit_task(p);
+ scx_exit_task(p);
task_rq_unlock(rq, p, &rf);
}
}
@@ -3831,11 +4018,14 @@ void sched_ext_free(struct task_struct *p)
static void reweight_task_scx(struct rq *rq, struct task_struct *p,
const struct load_weight *lw)
{
+ struct scx_sched *sch = scx_root;
+
lockdep_assert_rq_held(task_rq(p));
p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight));
- if (SCX_HAS_OP(set_weight))
- SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight);
+ if (SCX_HAS_OP(sch, set_weight))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, set_weight, rq,
+ p, p->scx.weight);
}
static void prio_changed_scx(struct rq *rq, struct task_struct *p, int oldprio)
@@ -3844,20 +4034,22 @@ static void prio_changed_scx(struct rq *rq, struct task_struct *p, int oldprio)
static void switching_to_scx(struct rq *rq, struct task_struct *p)
{
- scx_ops_enable_task(p);
+ struct scx_sched *sch = scx_root;
+
+ scx_enable_task(p);
/*
* set_cpus_allowed_scx() is not called while @p is associated with a
* different scheduler class. Keep the BPF scheduler up-to-date.
*/
- if (SCX_HAS_OP(set_cpumask))
- SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
- (struct cpumask *)p->cpus_ptr);
+ if (SCX_HAS_OP(sch, set_cpumask))
+ SCX_CALL_OP_TASK(sch, SCX_KF_REST, set_cpumask, rq,
+ p, (struct cpumask *)p->cpus_ptr);
}
static void switched_from_scx(struct rq *rq, struct task_struct *p)
{
- scx_ops_disable_task(p);
+ scx_disable_task(p);
}
static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p,int wake_flags) {}
@@ -3902,6 +4094,7 @@ static bool scx_cgroup_enabled;
int scx_tg_online(struct task_group *tg)
{
+ struct scx_sched *sch = scx_root;
int ret = 0;
WARN_ON_ONCE(tg->scx_flags & (SCX_TG_ONLINE | SCX_TG_INITED));
@@ -3909,14 +4102,14 @@ int scx_tg_online(struct task_group *tg)
percpu_down_read(&scx_cgroup_rwsem);
if (scx_cgroup_enabled) {
- if (SCX_HAS_OP(cgroup_init)) {
+ if (SCX_HAS_OP(sch, cgroup_init)) {
struct scx_cgroup_init_args args =
{ .weight = tg->scx_weight };
- ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init,
- tg->css.cgroup, &args);
+ ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init,
+ NULL, tg->css.cgroup, &args);
if (ret)
- ret = ops_sanitize_err("cgroup_init", ret);
+ ret = ops_sanitize_err(sch, "cgroup_init", ret);
}
if (ret == 0)
tg->scx_flags |= SCX_TG_ONLINE | SCX_TG_INITED;
@@ -3930,12 +4123,16 @@ int scx_tg_online(struct task_group *tg)
void scx_tg_offline(struct task_group *tg)
{
+ struct scx_sched *sch = scx_root;
+
WARN_ON_ONCE(!(tg->scx_flags & SCX_TG_ONLINE));
percpu_down_read(&scx_cgroup_rwsem);
- if (SCX_HAS_OP(cgroup_exit) && (tg->scx_flags & SCX_TG_INITED))
- SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, tg->css.cgroup);
+ if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_exit) &&
+ (tg->scx_flags & SCX_TG_INITED))
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_exit, NULL,
+ tg->css.cgroup);
tg->scx_flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED);
percpu_up_read(&scx_cgroup_rwsem);
@@ -3943,6 +4140,7 @@ void scx_tg_offline(struct task_group *tg)
int scx_cgroup_can_attach(struct cgroup_taskset *tset)
{
+ struct scx_sched *sch = scx_root;
struct cgroup_subsys_state *css;
struct task_struct *p;
int ret;
@@ -3967,8 +4165,9 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset)
if (from == to)
continue;
- if (SCX_HAS_OP(cgroup_prep_move)) {
- ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move,
+ if (SCX_HAS_OP(sch, cgroup_prep_move)) {
+ ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED,
+ cgroup_prep_move, NULL,
p, from, css->cgroup);
if (ret)
goto err;
@@ -3981,18 +4180,21 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset)
err:
cgroup_taskset_for_each(p, css, tset) {
- if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from)
- SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p,
- p->scx.cgrp_moving_from, css->cgroup);
+ if (SCX_HAS_OP(sch, cgroup_cancel_move) &&
+ p->scx.cgrp_moving_from)
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_cancel_move, NULL,
+ p, p->scx.cgrp_moving_from, css->cgroup);
p->scx.cgrp_moving_from = NULL;
}
percpu_up_read(&scx_cgroup_rwsem);
- return ops_sanitize_err("cgroup_prep_move", ret);
+ return ops_sanitize_err(sch, "cgroup_prep_move", ret);
}
void scx_cgroup_move_task(struct task_struct *p)
{
+ struct scx_sched *sch = scx_root;
+
if (!scx_cgroup_enabled)
return;
@@ -4000,9 +4202,11 @@ void scx_cgroup_move_task(struct task_struct *p)
* @p must have ops.cgroup_prep_move() called on it and thus
* cgrp_moving_from set.
*/
- if (SCX_HAS_OP(cgroup_move) && !WARN_ON_ONCE(!p->scx.cgrp_moving_from))
- SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, p,
- p->scx.cgrp_moving_from, tg_cgrp(task_group(p)));
+ if (SCX_HAS_OP(sch, cgroup_move) &&
+ !WARN_ON_ONCE(!p->scx.cgrp_moving_from))
+ SCX_CALL_OP_TASK(sch, SCX_KF_UNLOCKED, cgroup_move, NULL,
+ p, p->scx.cgrp_moving_from,
+ tg_cgrp(task_group(p)));
p->scx.cgrp_moving_from = NULL;
}
@@ -4013,6 +4217,7 @@ void scx_cgroup_finish_attach(void)
void scx_cgroup_cancel_attach(struct cgroup_taskset *tset)
{
+ struct scx_sched *sch = scx_root;
struct cgroup_subsys_state *css;
struct task_struct *p;
@@ -4020,9 +4225,10 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset)
goto out_unlock;
cgroup_taskset_for_each(p, css, tset) {
- if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from)
- SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p,
- p->scx.cgrp_moving_from, css->cgroup);
+ if (SCX_HAS_OP(sch, cgroup_cancel_move) &&
+ p->scx.cgrp_moving_from)
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_cancel_move, NULL,
+ p, p->scx.cgrp_moving_from, css->cgroup);
p->scx.cgrp_moving_from = NULL;
}
out_unlock:
@@ -4031,11 +4237,13 @@ out_unlock:
void scx_group_set_weight(struct task_group *tg, unsigned long weight)
{
+ struct scx_sched *sch = scx_root;
+
percpu_down_read(&scx_cgroup_rwsem);
if (scx_cgroup_enabled && tg->scx_weight != weight) {
- if (SCX_HAS_OP(cgroup_set_weight))
- SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight,
+ if (SCX_HAS_OP(sch, cgroup_set_weight))
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL,
tg_cgrp(tg), weight);
tg->scx_weight = weight;
}
@@ -4124,29 +4332,6 @@ static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id)
dsq->id = dsq_id;
}
-static struct scx_dispatch_q *create_dsq(u64 dsq_id, int node)
-{
- struct scx_dispatch_q *dsq;
- int ret;
-
- if (dsq_id & SCX_DSQ_FLAG_BUILTIN)
- return ERR_PTR(-EINVAL);
-
- dsq = kmalloc_node(sizeof(*dsq), GFP_KERNEL, node);
- if (!dsq)
- return ERR_PTR(-ENOMEM);
-
- init_dsq(dsq, dsq_id);
-
- ret = rhashtable_lookup_insert_fast(&dsq_hash, &dsq->hash_node,
- dsq_hash_params);
- if (ret) {
- kfree(dsq);
- return ERR_PTR(ret);
- }
- return dsq;
-}
-
static void free_dsq_irq_workfn(struct irq_work *irq_work)
{
struct llist_node *to_free = llist_del_all(&dsqs_to_free);
@@ -4158,26 +4343,27 @@ static void free_dsq_irq_workfn(struct irq_work *irq_work)
static DEFINE_IRQ_WORK(free_dsq_irq_work, free_dsq_irq_workfn);
-static void destroy_dsq(u64 dsq_id)
+static void destroy_dsq(struct scx_sched *sch, u64 dsq_id)
{
struct scx_dispatch_q *dsq;
unsigned long flags;
rcu_read_lock();
- dsq = find_user_dsq(dsq_id);
+ dsq = find_user_dsq(sch, dsq_id);
if (!dsq)
goto out_unlock_rcu;
raw_spin_lock_irqsave(&dsq->lock, flags);
if (dsq->nr) {
- scx_ops_error("attempting to destroy in-use dsq 0x%016llx (nr=%u)",
- dsq->id, dsq->nr);
+ scx_error(sch, "attempting to destroy in-use dsq 0x%016llx (nr=%u)",
+ dsq->id, dsq->nr);
goto out_unlock_dsq;
}
- if (rhashtable_remove_fast(&dsq_hash, &dsq->hash_node, dsq_hash_params))
+ if (rhashtable_remove_fast(&sch->dsq_hash, &dsq->hash_node,
+ dsq_hash_params))
goto out_unlock_dsq;
/*
@@ -4197,7 +4383,7 @@ out_unlock_rcu:
}
#ifdef CONFIG_EXT_GROUP_SCHED
-static void scx_cgroup_exit(void)
+static void scx_cgroup_exit(struct scx_sched *sch)
{
struct cgroup_subsys_state *css;
@@ -4217,14 +4403,15 @@ static void scx_cgroup_exit(void)
continue;
tg->scx_flags &= ~SCX_TG_INITED;
- if (!scx_ops.cgroup_exit)
+ if (!sch->ops.cgroup_exit)
continue;
if (WARN_ON_ONCE(!css_tryget(css)))
continue;
rcu_read_unlock();
- SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, css->cgroup);
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_exit, NULL,
+ css->cgroup);
rcu_read_lock();
css_put(css);
@@ -4232,7 +4419,7 @@ static void scx_cgroup_exit(void)
rcu_read_unlock();
}
-static int scx_cgroup_init(void)
+static int scx_cgroup_init(struct scx_sched *sch)
{
struct cgroup_subsys_state *css;
int ret;
@@ -4252,7 +4439,7 @@ static int scx_cgroup_init(void)
(SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE)
continue;
- if (!scx_ops.cgroup_init) {
+ if (!sch->ops.cgroup_init) {
tg->scx_flags |= SCX_TG_INITED;
continue;
}
@@ -4261,11 +4448,11 @@ static int scx_cgroup_init(void)
continue;
rcu_read_unlock();
- ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init,
+ ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init, NULL,
css->cgroup, &args);
if (ret) {
css_put(css);
- scx_ops_error("ops.cgroup_init() failed (%d)", ret);
+ scx_error(sch, "ops.cgroup_init() failed (%d)", ret);
return ret;
}
tg->scx_flags |= SCX_TG_INITED;
@@ -4282,8 +4469,8 @@ static int scx_cgroup_init(void)
}
#else
-static void scx_cgroup_exit(void) {}
-static int scx_cgroup_init(void) { return 0; }
+static void scx_cgroup_exit(struct scx_sched *sch) {}
+static int scx_cgroup_init(struct scx_sched *sch) { return 0; }
#endif
@@ -4300,8 +4487,7 @@ static int scx_cgroup_init(void) { return 0; }
static ssize_t scx_attr_state_show(struct kobject *kobj,
struct kobj_attribute *ka, char *buf)
{
- return sysfs_emit(buf, "%s\n",
- scx_ops_enable_state_str[scx_ops_enable_state()]);
+ return sysfs_emit(buf, "%s\n", scx_enable_state_str[scx_enable_state()]);
}
SCX_ATTR(state);
@@ -4346,15 +4532,51 @@ static const struct attribute_group scx_global_attr_group = {
.attrs = scx_global_attrs,
};
+static void free_exit_info(struct scx_exit_info *ei);
+
+static void scx_sched_free_rcu_work(struct work_struct *work)
+{
+ struct rcu_work *rcu_work = to_rcu_work(work);
+ struct scx_sched *sch = container_of(rcu_work, struct scx_sched, rcu_work);
+ struct rhashtable_iter rht_iter;
+ struct scx_dispatch_q *dsq;
+ int node;
+
+ kthread_stop(sch->helper->task);
+ free_percpu(sch->event_stats_cpu);
+
+ for_each_node_state(node, N_POSSIBLE)
+ kfree(sch->global_dsqs[node]);
+ kfree(sch->global_dsqs);
+
+ rhashtable_walk_enter(&sch->dsq_hash, &rht_iter);
+ do {
+ rhashtable_walk_start(&rht_iter);
+
+ while ((dsq = rhashtable_walk_next(&rht_iter)) && !IS_ERR(dsq))
+ destroy_dsq(sch, dsq->id);
+
+ rhashtable_walk_stop(&rht_iter);
+ } while (dsq == ERR_PTR(-EAGAIN));
+ rhashtable_walk_exit(&rht_iter);
+
+ rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL);
+ free_exit_info(sch->exit_info);
+ kfree(sch);
+}
+
static void scx_kobj_release(struct kobject *kobj)
{
- kfree(kobj);
+ struct scx_sched *sch = container_of(kobj, struct scx_sched, kobj);
+
+ INIT_RCU_WORK(&sch->rcu_work, scx_sched_free_rcu_work);
+ queue_rcu_work(system_unbound_wq, &sch->rcu_work);
}
static ssize_t scx_attr_ops_show(struct kobject *kobj,
struct kobj_attribute *ka, char *buf)
{
- return sysfs_emit(buf, "%s\n", scx_ops.name);
+ return sysfs_emit(buf, "%s\n", scx_root->ops.name);
}
SCX_ATTR(ops);
@@ -4365,16 +4587,17 @@ SCX_ATTR(ops);
static ssize_t scx_attr_events_show(struct kobject *kobj,
struct kobj_attribute *ka, char *buf)
{
+ struct scx_sched *sch = container_of(kobj, struct scx_sched, kobj);
struct scx_event_stats events;
int at = 0;
- scx_bpf_events(&events, sizeof(events));
+ scx_read_events(sch, &events);
at += scx_attr_event_show(buf, at, &events, SCX_EV_SELECT_CPU_FALLBACK);
at += scx_attr_event_show(buf, at, &events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
at += scx_attr_event_show(buf, at, &events, SCX_EV_DISPATCH_KEEP_LAST);
at += scx_attr_event_show(buf, at, &events, SCX_EV_ENQ_SKIP_EXITING);
at += scx_attr_event_show(buf, at, &events, SCX_EV_ENQ_SKIP_MIGRATION_DISABLED);
- at += scx_attr_event_show(buf, at, &events, SCX_EV_ENQ_SLICE_DFL);
+ at += scx_attr_event_show(buf, at, &events, SCX_EV_REFILL_SLICE_DFL);
at += scx_attr_event_show(buf, at, &events, SCX_EV_BYPASS_DURATION);
at += scx_attr_event_show(buf, at, &events, SCX_EV_BYPASS_DISPATCH);
at += scx_attr_event_show(buf, at, &events, SCX_EV_BYPASS_ACTIVATE);
@@ -4397,7 +4620,7 @@ static const struct kobj_type scx_ktype = {
static int scx_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
{
- return add_uevent_var(env, "SCXOPS=%s", scx_ops.name);
+ return add_uevent_var(env, "SCXOPS=%s", scx_root->ops.name);
}
static const struct kset_uevent_ops scx_uevent_ops = {
@@ -4410,14 +4633,20 @@ static const struct kset_uevent_ops scx_uevent_ops = {
*/
bool task_should_scx(int policy)
{
- if (!scx_enabled() ||
- unlikely(scx_ops_enable_state() == SCX_OPS_DISABLING))
+ if (!scx_enabled() || unlikely(scx_enable_state() == SCX_DISABLING))
return false;
if (READ_ONCE(scx_switching_all))
return true;
return policy == SCHED_EXT;
}
+bool scx_allow_ttwu_queue(const struct task_struct *p)
+{
+ return !scx_enabled() ||
+ (scx_root->ops.flags & SCX_OPS_ALLOW_QUEUED_WAKEUP) ||
+ p->sched_class != &ext_sched_class;
+}
+
/**
* scx_softlockup - sched_ext softlockup handler
* @dur_s: number of seconds of CPU stuck due to soft lockup
@@ -4430,40 +4659,48 @@ bool task_should_scx(int policy)
*/
void scx_softlockup(u32 dur_s)
{
- switch (scx_ops_enable_state()) {
- case SCX_OPS_ENABLING:
- case SCX_OPS_ENABLED:
+ struct scx_sched *sch;
+
+ rcu_read_lock();
+
+ sch = rcu_dereference(scx_root);
+ if (unlikely(!sch))
+ goto out_unlock;
+
+ switch (scx_enable_state()) {
+ case SCX_ENABLING:
+ case SCX_ENABLED:
break;
default:
- return;
+ goto out_unlock;
}
- /* allow only one instance, cleared at the end of scx_ops_bypass() */
+ /* allow only one instance, cleared at the end of scx_bypass() */
if (test_and_set_bit(0, &scx_in_softlockup))
- return;
+ goto out_unlock;
printk_deferred(KERN_ERR "sched_ext: Soft lockup - CPU%d stuck for %us, disabling \"%s\"\n",
- smp_processor_id(), dur_s, scx_ops.name);
+ smp_processor_id(), dur_s, scx_root->ops.name);
/*
* Some CPUs may be trapped in the dispatch paths. Enable breather
- * immediately; otherwise, we might even be able to get to
- * scx_ops_bypass().
+ * immediately; otherwise, we might even be able to get to scx_bypass().
*/
- atomic_inc(&scx_ops_breather_depth);
+ atomic_inc(&scx_breather_depth);
- scx_ops_error("soft lockup - CPU#%d stuck for %us",
- smp_processor_id(), dur_s);
+ scx_error(sch, "soft lockup - CPU#%d stuck for %us", smp_processor_id(), dur_s);
+out_unlock:
+ rcu_read_unlock();
}
static void scx_clear_softlockup(void)
{
if (test_and_clear_bit(0, &scx_in_softlockup))
- atomic_dec(&scx_ops_breather_depth);
+ atomic_dec(&scx_breather_depth);
}
/**
- * scx_ops_bypass - [Un]bypass scx_ops and guarantee forward progress
+ * scx_bypass - [Un]bypass scx_ops and guarantee forward progress
* @bypass: true for bypass, false for unbypass
*
* Bypassing guarantees that all runnable tasks make forward progress without
@@ -4493,32 +4730,36 @@ static void scx_clear_softlockup(void)
*
* - scx_prio_less() reverts to the default core_sched_at order.
*/
-static void scx_ops_bypass(bool bypass)
+static void scx_bypass(bool bypass)
{
static DEFINE_RAW_SPINLOCK(bypass_lock);
static unsigned long bypass_timestamp;
-
- int cpu;
+ struct scx_sched *sch;
unsigned long flags;
+ int cpu;
raw_spin_lock_irqsave(&bypass_lock, flags);
+ sch = rcu_dereference_bh(scx_root);
+
if (bypass) {
- scx_ops_bypass_depth++;
- WARN_ON_ONCE(scx_ops_bypass_depth <= 0);
- if (scx_ops_bypass_depth != 1)
+ scx_bypass_depth++;
+ WARN_ON_ONCE(scx_bypass_depth <= 0);
+ if (scx_bypass_depth != 1)
goto unlock;
bypass_timestamp = ktime_get_ns();
- scx_add_event(SCX_EV_BYPASS_ACTIVATE, 1);
+ if (sch)
+ scx_add_event(sch, SCX_EV_BYPASS_ACTIVATE, 1);
} else {
- scx_ops_bypass_depth--;
- WARN_ON_ONCE(scx_ops_bypass_depth < 0);
- if (scx_ops_bypass_depth != 0)
+ scx_bypass_depth--;
+ WARN_ON_ONCE(scx_bypass_depth < 0);
+ if (scx_bypass_depth != 0)
goto unlock;
- scx_add_event(SCX_EV_BYPASS_DURATION,
- ktime_get_ns() - bypass_timestamp);
+ if (sch)
+ scx_add_event(sch, SCX_EV_BYPASS_DURATION,
+ ktime_get_ns() - bypass_timestamp);
}
- atomic_inc(&scx_ops_breather_depth);
+ atomic_inc(&scx_breather_depth);
/*
* No task property is changing. We just need to make sure all currently
@@ -4576,7 +4817,7 @@ static void scx_ops_bypass(bool bypass)
raw_spin_rq_unlock(rq);
}
- atomic_dec(&scx_ops_breather_depth);
+ atomic_dec(&scx_breather_depth);
unlock:
raw_spin_unlock_irqrestore(&bypass_lock, flags);
scx_clear_softlockup();
@@ -4632,42 +4873,36 @@ static const char *scx_exit_reason(enum scx_exit_kind kind)
}
}
-static void scx_ops_disable_workfn(struct kthread_work *work)
+static void scx_disable_workfn(struct kthread_work *work)
{
- struct scx_exit_info *ei = scx_exit_info;
+ struct scx_sched *sch = container_of(work, struct scx_sched, disable_work);
+ struct scx_exit_info *ei = sch->exit_info;
struct scx_task_iter sti;
struct task_struct *p;
- struct rhashtable_iter rht_iter;
- struct scx_dispatch_q *dsq;
- int i, kind, cpu;
+ int kind, cpu;
- kind = atomic_read(&scx_exit_kind);
+ kind = atomic_read(&sch->exit_kind);
while (true) {
- /*
- * NONE indicates that a new scx_ops has been registered since
- * disable was scheduled - don't kill the new ops. DONE
- * indicates that the ops has already been disabled.
- */
- if (kind == SCX_EXIT_NONE || kind == SCX_EXIT_DONE)
+ if (kind == SCX_EXIT_DONE) /* already disabled? */
return;
- if (atomic_try_cmpxchg(&scx_exit_kind, &kind, SCX_EXIT_DONE))
+ WARN_ON_ONCE(kind == SCX_EXIT_NONE);
+ if (atomic_try_cmpxchg(&sch->exit_kind, &kind, SCX_EXIT_DONE))
break;
}
ei->kind = kind;
ei->reason = scx_exit_reason(ei->kind);
/* guarantee forward progress by bypassing scx_ops */
- scx_ops_bypass(true);
+ scx_bypass(true);
- switch (scx_ops_set_enable_state(SCX_OPS_DISABLING)) {
- case SCX_OPS_DISABLING:
+ switch (scx_set_enable_state(SCX_DISABLING)) {
+ case SCX_DISABLING:
WARN_ONCE(true, "sched_ext: duplicate disabling instance?");
break;
- case SCX_OPS_DISABLED:
+ case SCX_DISABLED:
pr_warn("sched_ext: ops error detected without ops (%s)\n",
- scx_exit_info->msg);
- WARN_ON_ONCE(scx_ops_set_enable_state(SCX_OPS_DISABLED) !=
- SCX_OPS_DISABLING);
+ sch->exit_info->msg);
+ WARN_ON_ONCE(scx_set_enable_state(SCX_DISABLED) != SCX_DISABLING);
goto done;
default:
break;
@@ -4678,17 +4913,17 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
* we can safely use blocking synchronization constructs. Actually
* disable ops.
*/
- mutex_lock(&scx_ops_enable_mutex);
+ mutex_lock(&scx_enable_mutex);
static_branch_disable(&__scx_switched_all);
WRITE_ONCE(scx_switching_all, false);
/*
* Shut down cgroup support before tasks so that the cgroup attach path
- * doesn't race against scx_ops_exit_task().
+ * doesn't race against scx_exit_task().
*/
scx_cgroup_lock();
- scx_cgroup_exit();
+ scx_cgroup_exit(sch);
scx_cgroup_unlock();
/*
@@ -4697,7 +4932,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
*/
percpu_down_write(&scx_fork_rwsem);
- scx_ops_init_task_enabled = false;
+ scx_init_task_enabled = false;
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
@@ -4717,7 +4952,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
sched_enq_and_set_task(&ctx);
check_class_changed(task_rq(p), p, old_class, p->prio);
- scx_ops_exit_task(p);
+ scx_exit_task(p);
}
scx_task_iter_stop(&sti);
percpu_up_write(&scx_fork_rwsem);
@@ -4732,98 +4967,71 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
}
/* no task is on scx, turn off all the switches and flush in-progress calls */
- static_branch_disable(&__scx_ops_enabled);
- for (i = SCX_OPI_BEGIN; i < SCX_OPI_END; i++)
- static_branch_disable(&scx_has_op[i]);
- static_branch_disable(&scx_ops_allow_queued_wakeup);
- static_branch_disable(&scx_ops_enq_last);
- static_branch_disable(&scx_ops_enq_exiting);
- static_branch_disable(&scx_ops_enq_migration_disabled);
- static_branch_disable(&scx_ops_cpu_preempt);
+ static_branch_disable(&__scx_enabled);
+ bitmap_zero(sch->has_op, SCX_OPI_END);
scx_idle_disable();
synchronize_rcu();
if (ei->kind >= SCX_EXIT_ERROR) {
pr_err("sched_ext: BPF scheduler \"%s\" disabled (%s)\n",
- scx_ops.name, ei->reason);
+ sch->ops.name, ei->reason);
if (ei->msg[0] != '\0')
- pr_err("sched_ext: %s: %s\n", scx_ops.name, ei->msg);
+ pr_err("sched_ext: %s: %s\n", sch->ops.name, ei->msg);
#ifdef CONFIG_STACKTRACE
stack_trace_print(ei->bt, ei->bt_len, 2);
#endif
} else {
pr_info("sched_ext: BPF scheduler \"%s\" disabled (%s)\n",
- scx_ops.name, ei->reason);
+ sch->ops.name, ei->reason);
}
- if (scx_ops.exit)
- SCX_CALL_OP(SCX_KF_UNLOCKED, exit, ei);
+ if (sch->ops.exit)
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, exit, NULL, ei);
cancel_delayed_work_sync(&scx_watchdog_work);
/*
- * Delete the kobject from the hierarchy eagerly in addition to just
- * dropping a reference. Otherwise, if the object is deleted
- * asynchronously, sysfs could observe an object of the same name still
- * in the hierarchy when another scheduler is loaded.
+ * scx_root clearing must be inside cpus_read_lock(). See
+ * handle_hotplug().
*/
- kobject_del(scx_root_kobj);
- kobject_put(scx_root_kobj);
- scx_root_kobj = NULL;
-
- memset(&scx_ops, 0, sizeof(scx_ops));
-
- rhashtable_walk_enter(&dsq_hash, &rht_iter);
- do {
- rhashtable_walk_start(&rht_iter);
-
- while ((dsq = rhashtable_walk_next(&rht_iter)) && !IS_ERR(dsq))
- destroy_dsq(dsq->id);
+ cpus_read_lock();
+ RCU_INIT_POINTER(scx_root, NULL);
+ cpus_read_unlock();
- rhashtable_walk_stop(&rht_iter);
- } while (dsq == ERR_PTR(-EAGAIN));
- rhashtable_walk_exit(&rht_iter);
+ /*
+ * Delete the kobject from the hierarchy synchronously. Otherwise, sysfs
+ * could observe an object of the same name still in the hierarchy when
+ * the next scheduler is loaded.
+ */
+ kobject_del(&sch->kobj);
free_percpu(scx_dsp_ctx);
scx_dsp_ctx = NULL;
scx_dsp_max_batch = 0;
- free_exit_info(scx_exit_info);
- scx_exit_info = NULL;
-
- mutex_unlock(&scx_ops_enable_mutex);
+ mutex_unlock(&scx_enable_mutex);
- WARN_ON_ONCE(scx_ops_set_enable_state(SCX_OPS_DISABLED) !=
- SCX_OPS_DISABLING);
+ WARN_ON_ONCE(scx_set_enable_state(SCX_DISABLED) != SCX_DISABLING);
done:
- scx_ops_bypass(false);
+ scx_bypass(false);
}
-static DEFINE_KTHREAD_WORK(scx_ops_disable_work, scx_ops_disable_workfn);
-
-static void schedule_scx_ops_disable_work(void)
-{
- struct kthread_worker *helper = READ_ONCE(scx_ops_helper);
-
- /*
- * We may be called spuriously before the first bpf_sched_ext_reg(). If
- * scx_ops_helper isn't set up yet, there's nothing to do.
- */
- if (helper)
- kthread_queue_work(helper, &scx_ops_disable_work);
-}
-
-static void scx_ops_disable(enum scx_exit_kind kind)
+static void scx_disable(enum scx_exit_kind kind)
{
int none = SCX_EXIT_NONE;
+ struct scx_sched *sch;
if (WARN_ON_ONCE(kind == SCX_EXIT_NONE || kind == SCX_EXIT_DONE))
kind = SCX_EXIT_ERROR;
- atomic_try_cmpxchg(&scx_exit_kind, &none, kind);
-
- schedule_scx_ops_disable_work();
+ rcu_read_lock();
+ sch = rcu_dereference(scx_root);
+ if (sch) {
+ atomic_try_cmpxchg(&sch->exit_kind, &none, kind);
+ kthread_queue_work(sch->helper, &sch->disable_work);
+ }
+ rcu_read_unlock();
}
static void dump_newline(struct seq_buf *s)
@@ -4941,6 +5149,7 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx,
struct task_struct *p, char marker)
{
static unsigned long bt[SCX_EXIT_BT_LEN];
+ struct scx_sched *sch = scx_root;
char dsq_id_buf[19] = "(n/a)";
unsigned long ops_state = atomic_long_read(&p->scx.ops_state);
unsigned int bt_len = 0;
@@ -4963,9 +5172,9 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx,
p->scx.dsq_vtime, p->scx.slice, p->scx.weight);
dump_line(s, " cpus=%*pb", cpumask_pr_args(p->cpus_ptr));
- if (SCX_HAS_OP(dump_task)) {
+ if (SCX_HAS_OP(sch, dump_task)) {
ops_dump_init(s, " ");
- SCX_CALL_OP(SCX_KF_REST, dump_task, dctx, p);
+ SCX_CALL_OP(sch, SCX_KF_REST, dump_task, NULL, dctx, p);
ops_dump_exit();
}
@@ -4982,6 +5191,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
{
static DEFINE_SPINLOCK(dump_lock);
static const char trunc_marker[] = "\n\n~~~~ TRUNCATED ~~~~\n";
+ struct scx_sched *sch = scx_root;
struct scx_dump_ctx dctx = {
.kind = ei->kind,
.exit_code = ei->exit_code,
@@ -5010,9 +5220,9 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
dump_stack_trace(&s, " ", ei->bt, ei->bt_len);
}
- if (SCX_HAS_OP(dump)) {
+ if (SCX_HAS_OP(sch, dump)) {
ops_dump_init(&s, "");
- SCX_CALL_OP(SCX_KF_UNLOCKED, dump, &dctx);
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, dump, NULL, &dctx);
ops_dump_exit();
}
@@ -5033,7 +5243,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
idle = list_empty(&rq->scx.runnable_list) &&
rq->curr->sched_class == &idle_sched_class;
- if (idle && !SCX_HAS_OP(dump_cpu))
+ if (idle && !SCX_HAS_OP(sch, dump_cpu))
goto next;
/*
@@ -5067,9 +5277,10 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
cpumask_pr_args(rq->scx.cpus_to_wait));
used = seq_buf_used(&ns);
- if (SCX_HAS_OP(dump_cpu)) {
+ if (SCX_HAS_OP(sch, dump_cpu)) {
ops_dump_init(&ns, " ");
- SCX_CALL_OP(SCX_KF_REST, dump_cpu, &dctx, cpu, idle);
+ SCX_CALL_OP(sch, SCX_KF_REST, dump_cpu, NULL,
+ &dctx, cpu, idle);
ops_dump_exit();
}
@@ -5103,13 +5314,13 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
dump_line(&s, "Event counters");
dump_line(&s, "--------------");
- scx_bpf_events(&events, sizeof(events));
+ scx_read_events(sch, &events);
scx_dump_event(s, &events, SCX_EV_SELECT_CPU_FALLBACK);
scx_dump_event(s, &events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
scx_dump_event(s, &events, SCX_EV_DISPATCH_KEEP_LAST);
scx_dump_event(s, &events, SCX_EV_ENQ_SKIP_EXITING);
scx_dump_event(s, &events, SCX_EV_ENQ_SKIP_MIGRATION_DISABLED);
- scx_dump_event(s, &events, SCX_EV_ENQ_SLICE_DFL);
+ scx_dump_event(s, &events, SCX_EV_REFILL_SLICE_DFL);
scx_dump_event(s, &events, SCX_EV_BYPASS_DURATION);
scx_dump_event(s, &events, SCX_EV_BYPASS_DISPATCH);
scx_dump_event(s, &events, SCX_EV_BYPASS_ACTIVATE);
@@ -5121,27 +5332,25 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
spin_unlock_irqrestore(&dump_lock, flags);
}
-static void scx_ops_error_irq_workfn(struct irq_work *irq_work)
+static void scx_error_irq_workfn(struct irq_work *irq_work)
{
- struct scx_exit_info *ei = scx_exit_info;
+ struct scx_sched *sch = container_of(irq_work, struct scx_sched, error_irq_work);
+ struct scx_exit_info *ei = sch->exit_info;
if (ei->kind >= SCX_EXIT_ERROR)
- scx_dump_state(ei, scx_ops.exit_dump_len);
+ scx_dump_state(ei, sch->ops.exit_dump_len);
- schedule_scx_ops_disable_work();
+ kthread_queue_work(sch->helper, &sch->disable_work);
}
-static DEFINE_IRQ_WORK(scx_ops_error_irq_work, scx_ops_error_irq_workfn);
-
-static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind,
- s64 exit_code,
- const char *fmt, ...)
+static void scx_vexit(struct scx_sched *sch,
+ enum scx_exit_kind kind, s64 exit_code,
+ const char *fmt, va_list args)
{
- struct scx_exit_info *ei = scx_exit_info;
+ struct scx_exit_info *ei = sch->exit_info;
int none = SCX_EXIT_NONE;
- va_list args;
- if (!atomic_try_cmpxchg(&scx_exit_kind, &none, kind))
+ if (!atomic_try_cmpxchg(&sch->exit_kind, &none, kind))
return;
ei->exit_code = exit_code;
@@ -5149,31 +5358,98 @@ static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind,
if (kind >= SCX_EXIT_ERROR)
ei->bt_len = stack_trace_save(ei->bt, SCX_EXIT_BT_LEN, 1);
#endif
- va_start(args, fmt);
vscnprintf(ei->msg, SCX_EXIT_MSG_LEN, fmt, args);
- va_end(args);
/*
* Set ei->kind and ->reason for scx_dump_state(). They'll be set again
- * in scx_ops_disable_workfn().
+ * in scx_disable_workfn().
*/
ei->kind = kind;
ei->reason = scx_exit_reason(ei->kind);
- irq_work_queue(&scx_ops_error_irq_work);
+ irq_work_queue(&sch->error_irq_work);
}
-static struct kthread_worker *scx_create_rt_helper(const char *name)
+static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
{
- struct kthread_worker *helper;
+ struct scx_sched *sch;
+ int node, ret;
- helper = kthread_run_worker(0, name);
- if (helper)
- sched_set_fifo(helper->task);
- return helper;
-}
+ sch = kzalloc(sizeof(*sch), GFP_KERNEL);
+ if (!sch)
+ return ERR_PTR(-ENOMEM);
+
+ sch->exit_info = alloc_exit_info(ops->exit_dump_len);
+ if (!sch->exit_info) {
+ ret = -ENOMEM;
+ goto err_free_sch;
+ }
+
+ ret = rhashtable_init(&sch->dsq_hash, &dsq_hash_params);
+ if (ret < 0)
+ goto err_free_ei;
-static void check_hotplug_seq(const struct sched_ext_ops *ops)
+ sch->global_dsqs = kcalloc(nr_node_ids, sizeof(sch->global_dsqs[0]),
+ GFP_KERNEL);
+ if (!sch->global_dsqs) {
+ ret = -ENOMEM;
+ goto err_free_hash;
+ }
+
+ for_each_node_state(node, N_POSSIBLE) {
+ struct scx_dispatch_q *dsq;
+
+ dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
+ if (!dsq) {
+ ret = -ENOMEM;
+ goto err_free_gdsqs;
+ }
+
+ init_dsq(dsq, SCX_DSQ_GLOBAL);
+ sch->global_dsqs[node] = dsq;
+ }
+
+ sch->event_stats_cpu = alloc_percpu(struct scx_event_stats);
+ if (!sch->event_stats_cpu)
+ goto err_free_gdsqs;
+
+ sch->helper = kthread_run_worker(0, "sched_ext_helper");
+ if (!sch->helper)
+ goto err_free_event_stats;
+ sched_set_fifo(sch->helper->task);
+
+ atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
+ init_irq_work(&sch->error_irq_work, scx_error_irq_workfn);
+ kthread_init_work(&sch->disable_work, scx_disable_workfn);
+ sch->ops = *ops;
+ ops->priv = sch;
+
+ sch->kobj.kset = scx_kset;
+ ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root");
+ if (ret < 0)
+ goto err_stop_helper;
+
+ return sch;
+
+err_stop_helper:
+ kthread_stop(sch->helper->task);
+err_free_event_stats:
+ free_percpu(sch->event_stats_cpu);
+err_free_gdsqs:
+ for_each_node_state(node, N_POSSIBLE)
+ kfree(sch->global_dsqs[node]);
+ kfree(sch->global_dsqs);
+err_free_hash:
+ rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL);
+err_free_ei:
+ free_exit_info(sch->exit_info);
+err_free_sch:
+ kfree(sch);
+ return ERR_PTR(ret);
+}
+
+static void check_hotplug_seq(struct scx_sched *sch,
+ const struct sched_ext_ops *ops)
{
unsigned long long global_hotplug_seq;
@@ -5185,21 +5461,22 @@ static void check_hotplug_seq(const struct sched_ext_ops *ops)
if (ops->hotplug_seq) {
global_hotplug_seq = atomic_long_read(&scx_hotplug_seq);
if (ops->hotplug_seq != global_hotplug_seq) {
- scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
- "expected hotplug seq %llu did not match actual %llu",
- ops->hotplug_seq, global_hotplug_seq);
+ scx_exit(sch, SCX_EXIT_UNREG_KERN,
+ SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
+ "expected hotplug seq %llu did not match actual %llu",
+ ops->hotplug_seq, global_hotplug_seq);
}
}
}
-static int validate_ops(const struct sched_ext_ops *ops)
+static int validate_ops(struct scx_sched *sch, const struct sched_ext_ops *ops)
{
/*
* It doesn't make sense to specify the SCX_OPS_ENQ_LAST flag if the
* ops.enqueue() callback isn't implemented.
*/
if ((ops->flags & SCX_OPS_ENQ_LAST) && !ops->enqueue) {
- scx_ops_error("SCX_OPS_ENQ_LAST requires ops.enqueue() to be implemented");
+ scx_error(sch, "SCX_OPS_ENQ_LAST requires ops.enqueue() to be implemented");
return -EINVAL;
}
@@ -5209,7 +5486,7 @@ static int validate_ops(const struct sched_ext_ops *ops)
*/
if ((ops->flags & SCX_OPS_BUILTIN_IDLE_PER_NODE) &&
(ops->update_idle && !(ops->flags & SCX_OPS_KEEP_BUILTIN_IDLE))) {
- scx_ops_error("SCX_OPS_BUILTIN_IDLE_PER_NODE requires CPU idle selection enabled");
+ scx_error(sch, "SCX_OPS_BUILTIN_IDLE_PER_NODE requires CPU idle selection enabled");
return -EINVAL;
}
@@ -5219,12 +5496,13 @@ static int validate_ops(const struct sched_ext_ops *ops)
return 0;
}
-static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
+static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
{
+ struct scx_sched *sch;
struct scx_task_iter sti;
struct task_struct *p;
unsigned long timeout;
- int i, cpu, node, ret;
+ int i, cpu, ret;
if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
cpu_possible_mask)) {
@@ -5232,87 +5510,25 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
return -EINVAL;
}
- mutex_lock(&scx_ops_enable_mutex);
-
- /*
- * Clear event counters so a new scx scheduler gets
- * fresh event counter values.
- */
- for_each_possible_cpu(cpu) {
- struct scx_event_stats *e = per_cpu_ptr(&event_stats_cpu, cpu);
- memset(e, 0, sizeof(*e));
- }
-
- if (!scx_ops_helper) {
- WRITE_ONCE(scx_ops_helper,
- scx_create_rt_helper("sched_ext_ops_helper"));
- if (!scx_ops_helper) {
- ret = -ENOMEM;
- goto err_unlock;
- }
- }
-
- if (!global_dsqs) {
- struct scx_dispatch_q **dsqs;
-
- dsqs = kcalloc(nr_node_ids, sizeof(dsqs[0]), GFP_KERNEL);
- if (!dsqs) {
- ret = -ENOMEM;
- goto err_unlock;
- }
+ mutex_lock(&scx_enable_mutex);
- for_each_node_state(node, N_POSSIBLE) {
- struct scx_dispatch_q *dsq;
-
- dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
- if (!dsq) {
- for_each_node_state(node, N_POSSIBLE)
- kfree(dsqs[node]);
- kfree(dsqs);
- ret = -ENOMEM;
- goto err_unlock;
- }
-
- init_dsq(dsq, SCX_DSQ_GLOBAL);
- dsqs[node] = dsq;
- }
-
- global_dsqs = dsqs;
- }
-
- if (scx_ops_enable_state() != SCX_OPS_DISABLED) {
+ if (scx_enable_state() != SCX_DISABLED) {
ret = -EBUSY;
goto err_unlock;
}
- scx_root_kobj = kzalloc(sizeof(*scx_root_kobj), GFP_KERNEL);
- if (!scx_root_kobj) {
- ret = -ENOMEM;
+ sch = scx_alloc_and_add_sched(ops);
+ if (IS_ERR(sch)) {
+ ret = PTR_ERR(sch);
goto err_unlock;
}
- scx_root_kobj->kset = scx_kset;
- ret = kobject_init_and_add(scx_root_kobj, &scx_ktype, NULL, "root");
- if (ret < 0)
- goto err;
-
- scx_exit_info = alloc_exit_info(ops->exit_dump_len);
- if (!scx_exit_info) {
- ret = -ENOMEM;
- goto err_del;
- }
-
/*
- * Set scx_ops, transition to ENABLING and clear exit info to arm the
- * disable path. Failure triggers full disabling from here on.
+ * Transition to ENABLING and clear exit info to arm the disable path.
+ * Failure triggers full disabling from here on.
*/
- scx_ops = *ops;
-
- WARN_ON_ONCE(scx_ops_set_enable_state(SCX_OPS_ENABLING) !=
- SCX_OPS_DISABLED);
-
- atomic_set(&scx_exit_kind, SCX_EXIT_NONE);
- scx_warned_zero_slice = false;
+ WARN_ON_ONCE(scx_set_enable_state(SCX_ENABLING) != SCX_DISABLED);
+ WARN_ON_ONCE(scx_root);
atomic_long_set(&scx_nr_rejected, 0);
@@ -5325,28 +5541,34 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
*/
cpus_read_lock();
+ /*
+ * Make the scheduler instance visible. Must be inside cpus_read_lock().
+ * See handle_hotplug().
+ */
+ rcu_assign_pointer(scx_root, sch);
+
scx_idle_enable(ops);
- if (scx_ops.init) {
- ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init);
+ if (sch->ops.init) {
+ ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, init, NULL);
if (ret) {
- ret = ops_sanitize_err("init", ret);
+ ret = ops_sanitize_err(sch, "init", ret);
cpus_read_unlock();
- scx_ops_error("ops.init() failed (%d)", ret);
+ scx_error(sch, "ops.init() failed (%d)", ret);
goto err_disable;
}
}
for (i = SCX_OPI_CPU_HOTPLUG_BEGIN; i < SCX_OPI_CPU_HOTPLUG_END; i++)
if (((void (**)(void))ops)[i])
- static_branch_enable_cpuslocked(&scx_has_op[i]);
+ set_bit(i, sch->has_op);
- check_hotplug_seq(ops);
+ check_hotplug_seq(sch, ops);
scx_idle_update_selcpu_topology(ops);
cpus_read_unlock();
- ret = validate_ops(ops);
+ ret = validate_ops(sch, ops);
if (ret)
goto err_disable;
@@ -5371,27 +5593,19 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
scx_watchdog_timeout / 2);
/*
- * Once __scx_ops_enabled is set, %current can be switched to SCX
- * anytime. This can lead to stalls as some BPF schedulers (e.g.
- * userspace scheduling) may not function correctly before all tasks are
- * switched. Init in bypass mode to guarantee forward progress.
+ * Once __scx_enabled is set, %current can be switched to SCX anytime.
+ * This can lead to stalls as some BPF schedulers (e.g. userspace
+ * scheduling) may not function correctly before all tasks are switched.
+ * Init in bypass mode to guarantee forward progress.
*/
- scx_ops_bypass(true);
+ scx_bypass(true);
for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++)
if (((void (**)(void))ops)[i])
- static_branch_enable(&scx_has_op[i]);
-
- if (ops->flags & SCX_OPS_ALLOW_QUEUED_WAKEUP)
- static_branch_enable(&scx_ops_allow_queued_wakeup);
- if (ops->flags & SCX_OPS_ENQ_LAST)
- static_branch_enable(&scx_ops_enq_last);
- if (ops->flags & SCX_OPS_ENQ_EXITING)
- static_branch_enable(&scx_ops_enq_exiting);
- if (ops->flags & SCX_OPS_ENQ_MIGRATION_DISABLED)
- static_branch_enable(&scx_ops_enq_migration_disabled);
- if (scx_ops.cpu_acquire || scx_ops.cpu_release)
- static_branch_enable(&scx_ops_cpu_preempt);
+ set_bit(i, sch->has_op);
+
+ if (sch->ops.cpu_acquire || sch->ops.cpu_release)
+ sch->ops.flags |= SCX_OPS_HAS_CPU_PREEMPT;
/*
* Lock out forks, cgroup on/offlining and moves before opening the
@@ -5399,8 +5613,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
*/
percpu_down_write(&scx_fork_rwsem);
- WARN_ON_ONCE(scx_ops_init_task_enabled);
- scx_ops_init_task_enabled = true;
+ WARN_ON_ONCE(scx_init_task_enabled);
+ scx_init_task_enabled = true;
/*
* Enable ops for every task. Fork is excluded by scx_fork_rwsem
@@ -5409,14 +5623,14 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
* tasks. Prep all tasks first and then enable them with preemption
* disabled.
*
- * All cgroups should be initialized before scx_ops_init_task() so that
- * the BPF scheduler can reliably track each task's cgroup membership
- * from scx_ops_init_task(). Lock out cgroup on/offlining and task
- * migrations while tasks are being initialized so that
- * scx_cgroup_can_attach() never sees uninitialized tasks.
+ * All cgroups should be initialized before scx_init_task() so that the
+ * BPF scheduler can reliably track each task's cgroup membership from
+ * scx_init_task(). Lock out cgroup on/offlining and task migrations
+ * while tasks are being initialized so that scx_cgroup_can_attach()
+ * never sees uninitialized tasks.
*/
scx_cgroup_lock();
- ret = scx_cgroup_init();
+ ret = scx_cgroup_init(sch);
if (ret)
goto err_disable_unlock_all;
@@ -5432,13 +5646,13 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
scx_task_iter_unlock(&sti);
- ret = scx_ops_init_task(p, task_group(p), false);
+ ret = scx_init_task(p, task_group(p), false);
if (ret) {
put_task_struct(p);
scx_task_iter_relock(&sti);
scx_task_iter_stop(&sti);
- scx_ops_error("ops.init_task() failed (%d) for %s[%d]",
- ret, p->comm, p->pid);
+ scx_error(sch, "ops.init_task() failed (%d) for %s[%d]",
+ ret, p->comm, p->pid);
goto err_disable_unlock_all;
}
@@ -5456,7 +5670,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
* all eligible tasks.
*/
WRITE_ONCE(scx_switching_all, !(ops->flags & SCX_OPS_SWITCH_PARTIAL));
- static_branch_enable(&__scx_ops_enabled);
+ static_branch_enable(&__scx_enabled);
/*
* We're fully committed and can't fail. The task READY -> ENABLED
@@ -5487,10 +5701,10 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
scx_task_iter_stop(&sti);
percpu_up_write(&scx_fork_rwsem);
- scx_ops_bypass(false);
+ scx_bypass(false);
- if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
- WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE);
+ if (!scx_tryset_enable_state(SCX_ENABLED, SCX_ENABLING)) {
+ WARN_ON_ONCE(atomic_read(&sch->exit_kind) == SCX_EXIT_NONE);
goto err_disable;
}
@@ -5498,44 +5712,35 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
static_branch_enable(&__scx_switched_all);
pr_info("sched_ext: BPF scheduler \"%s\" enabled%s\n",
- scx_ops.name, scx_switched_all() ? "" : " (partial)");
- kobject_uevent(scx_root_kobj, KOBJ_ADD);
- mutex_unlock(&scx_ops_enable_mutex);
+ sch->ops.name, scx_switched_all() ? "" : " (partial)");
+ kobject_uevent(&sch->kobj, KOBJ_ADD);
+ mutex_unlock(&scx_enable_mutex);
atomic_long_inc(&scx_enable_seq);
return 0;
-err_del:
- kobject_del(scx_root_kobj);
-err:
- kobject_put(scx_root_kobj);
- scx_root_kobj = NULL;
- if (scx_exit_info) {
- free_exit_info(scx_exit_info);
- scx_exit_info = NULL;
- }
err_unlock:
- mutex_unlock(&scx_ops_enable_mutex);
+ mutex_unlock(&scx_enable_mutex);
return ret;
err_disable_unlock_all:
scx_cgroup_unlock();
percpu_up_write(&scx_fork_rwsem);
- scx_ops_bypass(false);
+ scx_bypass(false);
err_disable:
- mutex_unlock(&scx_ops_enable_mutex);
+ mutex_unlock(&scx_enable_mutex);
/*
* Returning an error code here would not pass all the error information
- * to userspace. Record errno using scx_ops_error() for cases
- * scx_ops_error() wasn't already invoked and exit indicating success so
- * that the error is notified through ops.exit() with all the details.
+ * to userspace. Record errno using scx_error() for cases scx_error()
+ * wasn't already invoked and exit indicating success so that the error
+ * is notified through ops.exit() with all the details.
*
- * Flush scx_ops_disable_work to ensure that error is reported before
- * init completion.
+ * Flush scx_disable_work to ensure that error is reported before init
+ * completion. sch's base reference will be put by bpf_scx_unreg().
*/
- scx_ops_error("scx_ops_enable() failed (%d)", ret);
- kthread_flush_work(&scx_ops_disable_work);
+ scx_error(sch, "scx_enable() failed (%d)", ret);
+ kthread_flush_work(&sch->disable_work);
return 0;
}
@@ -5679,13 +5884,17 @@ static int bpf_scx_check_member(const struct btf_type *t,
static int bpf_scx_reg(void *kdata, struct bpf_link *link)
{
- return scx_ops_enable(kdata, link);
+ return scx_enable(kdata, link);
}
static void bpf_scx_unreg(void *kdata, struct bpf_link *link)
{
- scx_ops_disable(SCX_EXIT_UNREG);
- kthread_flush_work(&scx_ops_disable_work);
+ struct sched_ext_ops *ops = kdata;
+ struct scx_sched *sch = ops->priv;
+
+ scx_disable(SCX_EXIT_UNREG);
+ kthread_flush_work(&sch->disable_work);
+ kobject_put(&sch->kobj);
}
static int bpf_scx_init(struct btf *btf)
@@ -5807,10 +6016,7 @@ static struct bpf_struct_ops bpf_sched_ext_ops = {
static void sysrq_handle_sched_ext_reset(u8 key)
{
- if (scx_ops_helper)
- scx_ops_disable(SCX_EXIT_SYSRQ);
- else
- pr_info("sched_ext: BPF scheduler not yet used\n");
+ scx_disable(SCX_EXIT_SYSRQ);
}
static const struct sysrq_key_op sysrq_sched_ext_reset_op = {
@@ -5958,13 +6164,14 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work)
*/
void print_scx_info(const char *log_lvl, struct task_struct *p)
{
- enum scx_ops_enable_state state = scx_ops_enable_state();
+ struct scx_sched *sch = scx_root;
+ enum scx_enable_state state = scx_enable_state();
const char *all = READ_ONCE(scx_switching_all) ? "+all" : "";
char runnable_at_buf[22] = "?";
struct sched_class *class;
unsigned long runnable_at;
- if (state == SCX_OPS_DISABLED)
+ if (state == SCX_DISABLED)
return;
/*
@@ -5973,8 +6180,8 @@ void print_scx_info(const char *log_lvl, struct task_struct *p)
*/
if (copy_from_kernel_nofault(&class, &p->sched_class, sizeof(class)) ||
class != &ext_sched_class) {
- printk("%sSched_ext: %s (%s%s)", log_lvl, scx_ops.name,
- scx_ops_enable_state_str[state], all);
+ printk("%sSched_ext: %s (%s%s)", log_lvl, sch->ops.name,
+ scx_enable_state_str[state], all);
return;
}
@@ -5985,7 +6192,7 @@ void print_scx_info(const char *log_lvl, struct task_struct *p)
/* print everything onto one line to conserve console space */
printk("%sSched_ext: %s (%s%s), task: runnable_at=%s",
- log_lvl, scx_ops.name, scx_ops_enable_state_str[state], all,
+ log_lvl, sch->ops.name, scx_enable_state_str[state], all,
runnable_at_buf);
}
@@ -6001,12 +6208,12 @@ static int scx_pm_handler(struct notifier_block *nb, unsigned long event, void *
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
case PM_RESTORE_PREPARE:
- scx_ops_bypass(true);
+ scx_bypass(true);
break;
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
case PM_POST_RESTORE:
- scx_ops_bypass(false);
+ scx_bypass(false);
break;
}
@@ -6029,7 +6236,6 @@ void __init init_sched_ext_class(void)
WRITE_ONCE(v, SCX_ENQ_WAKEUP | SCX_DEQ_SLEEP | SCX_KICK_PREEMPT |
SCX_TG_ONLINE);
- BUG_ON(rhashtable_init(&dsq_hash, &dsq_hash_params));
scx_idle_init_masks();
scx_kick_cpus_pnt_seqs =
@@ -6073,12 +6279,12 @@ static bool scx_dsq_insert_preamble(struct task_struct *p, u64 enq_flags)
lockdep_assert_irqs_disabled();
if (unlikely(!p)) {
- scx_ops_error("called with NULL task");
+ scx_kf_error("called with NULL task");
return false;
}
if (unlikely(enq_flags & __SCX_ENQ_INTERNAL_MASK)) {
- scx_ops_error("invalid enq_flags 0x%llx", enq_flags);
+ scx_kf_error("invalid enq_flags 0x%llx", enq_flags);
return false;
}
@@ -6098,7 +6304,7 @@ static void scx_dsq_insert_commit(struct task_struct *p, u64 dsq_id,
}
if (unlikely(dspc->cursor >= scx_dsp_max_batch)) {
- scx_ops_error("dispatch buffer overflow");
+ scx_kf_error("dispatch buffer overflow");
return;
}
@@ -6230,6 +6436,7 @@ static const struct btf_kfunc_id_set scx_kfunc_set_enqueue_dispatch = {
static bool scx_dsq_move(struct bpf_iter_scx_dsq_kern *kit,
struct task_struct *p, u64 dsq_id, u64 enq_flags)
{
+ struct scx_sched *sch = scx_root;
struct scx_dispatch_q *src_dsq = kit->dsq, *dst_dsq;
struct rq *this_rq, *src_rq, *locked_rq;
bool dispatched = false;
@@ -6264,7 +6471,7 @@ static bool scx_dsq_move(struct bpf_iter_scx_dsq_kern *kit,
* cause similar live-lock conditions as consume_dispatch_q(). Insert a
* breather if necessary.
*/
- scx_ops_breather(src_rq);
+ scx_breather(src_rq);
locked_rq = src_rq;
raw_spin_lock(&src_dsq->lock);
@@ -6282,7 +6489,7 @@ static bool scx_dsq_move(struct bpf_iter_scx_dsq_kern *kit,
}
/* @p is still on $src_dsq and stable, determine the destination */
- dst_dsq = find_dsq_for_dispatch(this_rq, dsq_id, p);
+ dst_dsq = find_dsq_for_dispatch(sch, this_rq, dsq_id, p);
/*
* Apply vtime and slice updates before moving so that the new time is
@@ -6295,7 +6502,7 @@ static bool scx_dsq_move(struct bpf_iter_scx_dsq_kern *kit,
p->scx.slice = kit->slice;
/* execute move */
- locked_rq = move_task_between_dsqs(p, enq_flags, src_dsq, dst_dsq);
+ locked_rq = move_task_between_dsqs(sch, p, enq_flags, src_dsq, dst_dsq);
dispatched = true;
out:
if (in_balance) {
@@ -6343,7 +6550,7 @@ __bpf_kfunc void scx_bpf_dispatch_cancel(void)
if (dspc->cursor > 0)
dspc->cursor--;
else
- scx_ops_error("dispatch buffer underflow");
+ scx_kf_error("dispatch buffer underflow");
}
/**
@@ -6362,21 +6569,22 @@ __bpf_kfunc void scx_bpf_dispatch_cancel(void)
*/
__bpf_kfunc bool scx_bpf_dsq_move_to_local(u64 dsq_id)
{
+ struct scx_sched *sch = scx_root;
struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
struct scx_dispatch_q *dsq;
if (!scx_kf_allowed(SCX_KF_DISPATCH))
return false;
- flush_dispatch_buf(dspc->rq);
+ flush_dispatch_buf(sch, dspc->rq);
- dsq = find_user_dsq(dsq_id);
+ dsq = find_user_dsq(sch, dsq_id);
if (unlikely(!dsq)) {
- scx_ops_error("invalid DSQ ID 0x%016llx", dsq_id);
+ scx_error(sch, "invalid DSQ ID 0x%016llx", dsq_id);
return false;
}
- if (consume_dispatch_q(dspc->rq, dsq)) {
+ if (consume_dispatch_q(sch, dspc->rq, dsq)) {
/*
* A successfully consumed task can be dequeued before it starts
* running while the CPU is trying to migrate other dispatched
@@ -6630,10 +6838,36 @@ __bpf_kfunc_start_defs();
*/
__bpf_kfunc s32 scx_bpf_create_dsq(u64 dsq_id, s32 node)
{
+ struct scx_dispatch_q *dsq;
+ struct scx_sched *sch;
+ s32 ret;
+
if (unlikely(node >= (int)nr_node_ids ||
(node < 0 && node != NUMA_NO_NODE)))
return -EINVAL;
- return PTR_ERR_OR_ZERO(create_dsq(dsq_id, node));
+
+ if (unlikely(dsq_id & SCX_DSQ_FLAG_BUILTIN))
+ return -EINVAL;
+
+ dsq = kmalloc_node(sizeof(*dsq), GFP_KERNEL, node);
+ if (!dsq)
+ return -ENOMEM;
+
+ init_dsq(dsq, dsq_id);
+
+ rcu_read_lock();
+
+ sch = rcu_dereference(scx_root);
+ if (sch)
+ ret = rhashtable_lookup_insert_fast(&sch->dsq_hash, &dsq->hash_node,
+ dsq_hash_params);
+ else
+ ret = -ENODEV;
+
+ rcu_read_unlock();
+ if (ret)
+ kfree(dsq);
+ return ret;
}
__bpf_kfunc_end_defs();
@@ -6672,7 +6906,7 @@ __bpf_kfunc void scx_bpf_kick_cpu(s32 cpu, u64 flags)
struct rq *this_rq;
unsigned long irq_flags;
- if (!ops_cpu_valid(cpu, NULL))
+ if (!kf_cpu_valid(cpu, NULL))
return;
local_irq_save(irq_flags);
@@ -6696,7 +6930,7 @@ __bpf_kfunc void scx_bpf_kick_cpu(s32 cpu, u64 flags)
struct rq *target_rq = cpu_rq(cpu);
if (unlikely(flags & (SCX_KICK_PREEMPT | SCX_KICK_WAIT)))
- scx_ops_error("PREEMPT/WAIT cannot be used with SCX_KICK_IDLE");
+ scx_kf_error("PREEMPT/WAIT cannot be used with SCX_KICK_IDLE");
if (raw_spin_rq_trylock(target_rq)) {
if (can_skip_idle_kick(target_rq)) {
@@ -6729,23 +6963,30 @@ out:
*/
__bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id)
{
+ struct scx_sched *sch;
struct scx_dispatch_q *dsq;
s32 ret;
preempt_disable();
+ sch = rcu_dereference_sched(scx_root);
+ if (unlikely(!sch)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
if (dsq_id == SCX_DSQ_LOCAL) {
ret = READ_ONCE(this_rq()->scx.local_dsq.nr);
goto out;
} else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
- if (ops_cpu_valid(cpu, NULL)) {
+ if (ops_cpu_valid(sch, cpu, NULL)) {
ret = READ_ONCE(cpu_rq(cpu)->scx.local_dsq.nr);
goto out;
}
} else {
- dsq = find_user_dsq(dsq_id);
+ dsq = find_user_dsq(sch, dsq_id);
if (dsq) {
ret = READ_ONCE(dsq->nr);
goto out;
@@ -6768,7 +7009,13 @@ out:
*/
__bpf_kfunc void scx_bpf_destroy_dsq(u64 dsq_id)
{
- destroy_dsq(dsq_id);
+ struct scx_sched *sch;
+
+ rcu_read_lock();
+ sch = rcu_dereference(scx_root);
+ if (sch)
+ destroy_dsq(sch, dsq_id);
+ rcu_read_unlock();
}
/**
@@ -6785,16 +7032,27 @@ __bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id,
u64 flags)
{
struct bpf_iter_scx_dsq_kern *kit = (void *)it;
+ struct scx_sched *sch;
BUILD_BUG_ON(sizeof(struct bpf_iter_scx_dsq_kern) >
sizeof(struct bpf_iter_scx_dsq));
BUILD_BUG_ON(__alignof__(struct bpf_iter_scx_dsq_kern) !=
__alignof__(struct bpf_iter_scx_dsq));
+ /*
+ * next() and destroy() will be called regardless of the return value.
+ * Always clear $kit->dsq.
+ */
+ kit->dsq = NULL;
+
+ sch = rcu_dereference_check(scx_root, rcu_read_lock_bh_held());
+ if (unlikely(!sch))
+ return -ENODEV;
+
if (flags & ~__SCX_DSQ_ITER_USER_FLAGS)
return -EINVAL;
- kit->dsq = find_user_dsq(dsq_id);
+ kit->dsq = find_user_dsq(sch, dsq_id);
if (!kit->dsq)
return -ENOENT;
@@ -6884,21 +7142,20 @@ static s32 __bstr_format(u64 *data_buf, char *line_buf, size_t line_size,
if (data__sz % 8 || data__sz > MAX_BPRINTF_VARARGS * 8 ||
(data__sz && !data)) {
- scx_ops_error("invalid data=%p and data__sz=%u",
- (void *)data, data__sz);
+ scx_kf_error("invalid data=%p and data__sz=%u", (void *)data, data__sz);
return -EINVAL;
}
ret = copy_from_kernel_nofault(data_buf, data, data__sz);
if (ret < 0) {
- scx_ops_error("failed to read data fields (%d)", ret);
+ scx_kf_error("failed to read data fields (%d)", ret);
return ret;
}
ret = bpf_bprintf_prepare(fmt, UINT_MAX, data_buf, data__sz / 8,
&bprintf_data);
if (ret < 0) {
- scx_ops_error("format preparation failed (%d)", ret);
+ scx_kf_error("format preparation failed (%d)", ret);
return ret;
}
@@ -6906,8 +7163,7 @@ static s32 __bstr_format(u64 *data_buf, char *line_buf, size_t line_size,
bprintf_data.bin_args);
bpf_bprintf_cleanup(&bprintf_data);
if (ret < 0) {
- scx_ops_error("(\"%s\", %p, %u) failed to format",
- fmt, data, data__sz);
+ scx_kf_error("(\"%s\", %p, %u) failed to format", fmt, data, data__sz);
return ret;
}
@@ -6940,8 +7196,7 @@ __bpf_kfunc void scx_bpf_exit_bstr(s64 exit_code, char *fmt,
raw_spin_lock_irqsave(&scx_exit_bstr_buf_lock, flags);
if (bstr_format(&scx_exit_bstr_buf, fmt, data, data__sz) >= 0)
- scx_ops_exit_kind(SCX_EXIT_UNREG_BPF, exit_code, "%s",
- scx_exit_bstr_buf.line);
+ scx_kf_exit(SCX_EXIT_UNREG_BPF, exit_code, "%s", scx_exit_bstr_buf.line);
raw_spin_unlock_irqrestore(&scx_exit_bstr_buf_lock, flags);
}
@@ -6961,8 +7216,7 @@ __bpf_kfunc void scx_bpf_error_bstr(char *fmt, unsigned long long *data,
raw_spin_lock_irqsave(&scx_exit_bstr_buf_lock, flags);
if (bstr_format(&scx_exit_bstr_buf, fmt, data, data__sz) >= 0)
- scx_ops_exit_kind(SCX_EXIT_ERROR_BPF, 0, "%s",
- scx_exit_bstr_buf.line);
+ scx_kf_exit(SCX_EXIT_ERROR_BPF, 0, "%s", scx_exit_bstr_buf.line);
raw_spin_unlock_irqrestore(&scx_exit_bstr_buf_lock, flags);
}
@@ -6986,7 +7240,7 @@ __bpf_kfunc void scx_bpf_dump_bstr(char *fmt, unsigned long long *data,
s32 ret;
if (raw_smp_processor_id() != dd->cpu) {
- scx_ops_error("scx_bpf_dump() must only be called from ops.dump() and friends");
+ scx_kf_error("scx_bpf_dump() must only be called from ops.dump() and friends");
return;
}
@@ -7027,7 +7281,7 @@ __bpf_kfunc void scx_bpf_dump_bstr(char *fmt, unsigned long long *data,
*/
__bpf_kfunc u32 scx_bpf_cpuperf_cap(s32 cpu)
{
- if (ops_cpu_valid(cpu, NULL))
+ if (kf_cpu_valid(cpu, NULL))
return arch_scale_cpu_capacity(cpu);
else
return SCX_CPUPERF_ONE;
@@ -7049,7 +7303,7 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cap(s32 cpu)
*/
__bpf_kfunc u32 scx_bpf_cpuperf_cur(s32 cpu)
{
- if (ops_cpu_valid(cpu, NULL))
+ if (kf_cpu_valid(cpu, NULL))
return arch_scale_freq_capacity(cpu);
else
return SCX_CPUPERF_ONE;
@@ -7072,18 +7326,37 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cur(s32 cpu)
__bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf)
{
if (unlikely(perf > SCX_CPUPERF_ONE)) {
- scx_ops_error("Invalid cpuperf target %u for CPU %d", perf, cpu);
+ scx_kf_error("Invalid cpuperf target %u for CPU %d", perf, cpu);
return;
}
- if (ops_cpu_valid(cpu, NULL)) {
- struct rq *rq = cpu_rq(cpu);
+ if (kf_cpu_valid(cpu, NULL)) {
+ struct rq *rq = cpu_rq(cpu), *locked_rq = scx_locked_rq();
+ struct rq_flags rf;
+
+ /*
+ * When called with an rq lock held, restrict the operation
+ * to the corresponding CPU to prevent ABBA deadlocks.
+ */
+ if (locked_rq && rq != locked_rq) {
+ scx_kf_error("Invalid target CPU %d", cpu);
+ return;
+ }
+
+ /*
+ * If no rq lock is held, allow to operate on any CPU by
+ * acquiring the corresponding rq lock.
+ */
+ if (!locked_rq) {
+ rq_lock_irqsave(rq, &rf);
+ update_rq_clock(rq);
+ }
rq->scx.cpuperf_target = perf;
+ cpufreq_update_util(rq, 0);
- rcu_read_lock_sched_notrace();
- cpufreq_update_util(cpu_rq(cpu), 0);
- rcu_read_unlock_sched_notrace();
+ if (!locked_rq)
+ rq_unlock_irqrestore(rq, &rf);
}
}
@@ -7161,7 +7434,7 @@ __bpf_kfunc s32 scx_bpf_task_cpu(const struct task_struct *p)
*/
__bpf_kfunc struct rq *scx_bpf_cpu_rq(s32 cpu)
{
- if (!ops_cpu_valid(cpu, NULL))
+ if (!kf_cpu_valid(cpu, NULL))
return NULL;
return cpu_rq(cpu);
@@ -7257,6 +7530,27 @@ __bpf_kfunc u64 scx_bpf_now(void)
return clock;
}
+static void scx_read_events(struct scx_sched *sch, struct scx_event_stats *events)
+{
+ struct scx_event_stats *e_cpu;
+ int cpu;
+
+ /* Aggregate per-CPU event counters into @events. */
+ memset(events, 0, sizeof(*events));
+ for_each_possible_cpu(cpu) {
+ e_cpu = per_cpu_ptr(sch->event_stats_cpu, cpu);
+ scx_agg_event(events, e_cpu, SCX_EV_SELECT_CPU_FALLBACK);
+ scx_agg_event(events, e_cpu, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
+ scx_agg_event(events, e_cpu, SCX_EV_DISPATCH_KEEP_LAST);
+ scx_agg_event(events, e_cpu, SCX_EV_ENQ_SKIP_EXITING);
+ scx_agg_event(events, e_cpu, SCX_EV_ENQ_SKIP_MIGRATION_DISABLED);
+ scx_agg_event(events, e_cpu, SCX_EV_REFILL_SLICE_DFL);
+ scx_agg_event(events, e_cpu, SCX_EV_BYPASS_DURATION);
+ scx_agg_event(events, e_cpu, SCX_EV_BYPASS_DISPATCH);
+ scx_agg_event(events, e_cpu, SCX_EV_BYPASS_ACTIVATE);
+ }
+}
+
/*
* scx_bpf_events - Get a system-wide event counter to
* @events: output buffer from a BPF program
@@ -7265,23 +7559,16 @@ __bpf_kfunc u64 scx_bpf_now(void)
__bpf_kfunc void scx_bpf_events(struct scx_event_stats *events,
size_t events__sz)
{
- struct scx_event_stats e_sys, *e_cpu;
- int cpu;
+ struct scx_sched *sch;
+ struct scx_event_stats e_sys;
- /* Aggregate per-CPU event counters into the system-wide counters. */
- memset(&e_sys, 0, sizeof(e_sys));
- for_each_possible_cpu(cpu) {
- e_cpu = per_cpu_ptr(&event_stats_cpu, cpu);
- scx_agg_event(&e_sys, e_cpu, SCX_EV_SELECT_CPU_FALLBACK);
- scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
- scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_KEEP_LAST);
- scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SKIP_EXITING);
- scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SKIP_MIGRATION_DISABLED);
- scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SLICE_DFL);
- scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DURATION);
- scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DISPATCH);
- scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_ACTIVATE);
- }
+ rcu_read_lock();
+ sch = rcu_dereference(scx_root);
+ if (sch)
+ scx_read_events(sch, &e_sys);
+ else
+ memset(&e_sys, 0, sizeof(e_sys));
+ rcu_read_unlock();
/*
* We cannot entirely trust a BPF-provided size since a BPF program
@@ -7314,12 +7601,6 @@ BTF_ID_FLAGS(func, scx_bpf_nr_cpu_ids)
BTF_ID_FLAGS(func, scx_bpf_get_possible_cpumask, KF_ACQUIRE)
BTF_ID_FLAGS(func, scx_bpf_get_online_cpumask, KF_ACQUIRE)
BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE)
-BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE)
-BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE)
-BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE)
-BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle)
-BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU)
-BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_cpu_rq)
diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
index 1bda96b19a1b..6e5072f57771 100644
--- a/kernel/sched/ext.h
+++ b/kernel/sched/ext.h
@@ -8,6 +8,11 @@
*/
#ifdef CONFIG_SCHED_CLASS_EXT
+static inline bool scx_kf_allowed_if_unlocked(void)
+{
+ return !current->scx.kf_mask;
+}
+
DECLARE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup);
void scx_tick(struct rq *rq);
@@ -21,6 +26,7 @@ void scx_rq_activate(struct rq *rq);
void scx_rq_deactivate(struct rq *rq);
int scx_check_setscheduler(struct task_struct *p, int policy);
bool task_should_scx(int policy);
+bool scx_allow_ttwu_queue(const struct task_struct *p);
void init_sched_ext_class(void);
static inline u32 scx_cpuperf_target(s32 cpu)
@@ -36,13 +42,6 @@ static inline bool task_on_scx(const struct task_struct *p)
return scx_enabled() && p->sched_class == &ext_sched_class;
}
-static inline bool scx_allow_ttwu_queue(const struct task_struct *p)
-{
- return !scx_enabled() ||
- static_branch_likely(&scx_ops_allow_queued_wakeup) ||
- p->sched_class != &ext_sched_class;
-}
-
#ifdef CONFIG_SCHED_CORE
bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
bool in_fi);
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index cb343ca889e0..66da03cc0b33 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -47,6 +47,13 @@ static struct scx_idle_cpus scx_idle_global_masks;
static struct scx_idle_cpus **scx_idle_node_masks;
/*
+ * Local per-CPU cpumasks (used to generate temporary idle cpumasks).
+ */
+static DEFINE_PER_CPU(cpumask_var_t, local_idle_cpumask);
+static DEFINE_PER_CPU(cpumask_var_t, local_llc_idle_cpumask);
+static DEFINE_PER_CPU(cpumask_var_t, local_numa_idle_cpumask);
+
+/*
* Return the idle masks associated to a target @node.
*
* NUMA_NO_NODE identifies the global idle cpumask.
@@ -392,6 +399,14 @@ void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops)
}
/*
+ * Return true if @p can run on all possible CPUs, false otherwise.
+ */
+static inline bool task_affinity_all(const struct task_struct *p)
+{
+ return p->nr_cpus_allowed >= num_possible_cpus();
+}
+
+/*
* Built-in CPU idle selection policy:
*
* 1. Prioritize full-idle cores:
@@ -403,13 +418,15 @@ void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops)
* branch prediction optimizations.
*
* 3. Pick a CPU within the same LLC (Last-Level Cache):
- * - if the above conditions aren't met, pick a CPU that shares the same LLC
- * to maintain cache locality.
+ * - if the above conditions aren't met, pick a CPU that shares the same
+ * LLC, if the LLC domain is a subset of @cpus_allowed, to maintain
+ * cache locality.
*
* 4. Pick a CPU within the same NUMA node, if enabled:
- * - choose a CPU from the same NUMA node to reduce memory access latency.
+ * - choose a CPU from the same NUMA node, if the node cpumask is a
+ * subset of @cpus_allowed, to reduce memory access latency.
*
- * 5. Pick any idle CPU usable by the task.
+ * 5. Pick any idle CPU within the @cpus_allowed domain.
*
* Step 3 and 4 are performed only if the system has, respectively,
* multiple LLCs / multiple NUMA nodes (see scx_selcpu_topo_llc and
@@ -424,35 +441,77 @@ void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops)
* NOTE: tasks that can only run on 1 CPU are excluded by this logic, because
* we never call ops.select_cpu() for them, see select_task_rq().
*/
-s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags)
+s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+ const struct cpumask *cpus_allowed, u64 flags)
{
- const struct cpumask *llc_cpus = NULL;
- const struct cpumask *numa_cpus = NULL;
+ const struct cpumask *llc_cpus = NULL, *numa_cpus = NULL;
+ const struct cpumask *allowed = cpus_allowed ?: p->cpus_ptr;
int node = scx_cpu_node_if_enabled(prev_cpu);
s32 cpu;
+ preempt_disable();
+
+ /*
+ * Determine the subset of CPUs usable by @p within @cpus_allowed.
+ */
+ if (allowed != p->cpus_ptr) {
+ struct cpumask *local_cpus = this_cpu_cpumask_var_ptr(local_idle_cpumask);
+
+ if (task_affinity_all(p)) {
+ allowed = cpus_allowed;
+ } else if (cpumask_and(local_cpus, cpus_allowed, p->cpus_ptr)) {
+ allowed = local_cpus;
+ } else {
+ cpu = -EBUSY;
+ goto out_enable;
+ }
+
+ /*
+ * If @prev_cpu is not in the allowed CPUs, skip topology
+ * optimizations and try to pick any idle CPU usable by the
+ * task.
+ *
+ * If %SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled, prioritize
+ * the current node, as it may optimize some waker->wakee
+ * workloads.
+ */
+ if (!cpumask_test_cpu(prev_cpu, allowed)) {
+ node = scx_cpu_node_if_enabled(smp_processor_id());
+ cpu = scx_pick_idle_cpu(allowed, node, flags);
+ goto out_enable;
+ }
+ }
+
/*
* This is necessary to protect llc_cpus.
*/
rcu_read_lock();
/*
- * Determine the scheduling domain only if the task is allowed to run
- * on all CPUs.
+ * Determine the subset of CPUs that the task can use in its
+ * current LLC and node.
*
- * This is done primarily for efficiency, as it avoids the overhead of
- * updating a cpumask every time we need to select an idle CPU (which
- * can be costly in large SMP systems), but it also aligns logically:
- * if a task's scheduling domain is restricted by user-space (through
- * CPU affinity), the task will simply use the flat scheduling domain
- * defined by user-space.
+ * If the task can run on all CPUs, use the node and LLC cpumasks
+ * directly.
*/
- if (p->nr_cpus_allowed >= num_possible_cpus()) {
- if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa))
- numa_cpus = numa_span(prev_cpu);
+ if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa)) {
+ struct cpumask *local_cpus = this_cpu_cpumask_var_ptr(local_numa_idle_cpumask);
+ const struct cpumask *cpus = numa_span(prev_cpu);
+
+ if (allowed == p->cpus_ptr && task_affinity_all(p))
+ numa_cpus = cpus;
+ else if (cpus && cpumask_and(local_cpus, allowed, cpus))
+ numa_cpus = local_cpus;
+ }
+
+ if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) {
+ struct cpumask *local_cpus = this_cpu_cpumask_var_ptr(local_llc_idle_cpumask);
+ const struct cpumask *cpus = llc_span(prev_cpu);
- if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc))
- llc_cpus = llc_span(prev_cpu);
+ if (allowed == p->cpus_ptr && task_affinity_all(p))
+ llc_cpus = cpus;
+ else if (cpus && cpumask_and(local_cpus, allowed, cpus))
+ llc_cpus = local_cpus;
}
/*
@@ -490,7 +549,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
cpu_rq(cpu)->scx.local_dsq.nr == 0 &&
(!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) &&
!cpumask_empty(idle_cpumask(waker_node)->cpu)) {
- if (cpumask_test_cpu(cpu, p->cpus_ptr))
+ if (cpumask_test_cpu(cpu, allowed))
goto out_unlock;
}
}
@@ -535,7 +594,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
* begin in prev_cpu's node and proceed to other nodes in
* order of increasing distance.
*/
- cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags | SCX_PICK_IDLE_CORE);
+ cpu = scx_pick_idle_cpu(allowed, node, flags | SCX_PICK_IDLE_CORE);
if (cpu >= 0)
goto out_unlock;
@@ -583,10 +642,12 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
* in prev_cpu's node and proceed to other nodes in order of
* increasing distance.
*/
- cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags);
+ cpu = scx_pick_idle_cpu(allowed, node, flags);
out_unlock:
rcu_read_unlock();
+out_enable:
+ preempt_enable();
return cpu;
}
@@ -596,7 +657,7 @@ out_unlock:
*/
void scx_idle_init_masks(void)
{
- int node;
+ int i;
/* Allocate global idle cpumasks */
BUG_ON(!alloc_cpumask_var(&scx_idle_global_masks.cpu, GFP_KERNEL));
@@ -607,13 +668,23 @@ void scx_idle_init_masks(void)
sizeof(*scx_idle_node_masks), GFP_KERNEL);
BUG_ON(!scx_idle_node_masks);
- for_each_node(node) {
- scx_idle_node_masks[node] = kzalloc_node(sizeof(**scx_idle_node_masks),
- GFP_KERNEL, node);
- BUG_ON(!scx_idle_node_masks[node]);
+ for_each_node(i) {
+ scx_idle_node_masks[i] = kzalloc_node(sizeof(**scx_idle_node_masks),
+ GFP_KERNEL, i);
+ BUG_ON(!scx_idle_node_masks[i]);
+
+ BUG_ON(!alloc_cpumask_var_node(&scx_idle_node_masks[i]->cpu, GFP_KERNEL, i));
+ BUG_ON(!alloc_cpumask_var_node(&scx_idle_node_masks[i]->smt, GFP_KERNEL, i));
+ }
- BUG_ON(!alloc_cpumask_var_node(&scx_idle_node_masks[node]->cpu, GFP_KERNEL, node));
- BUG_ON(!alloc_cpumask_var_node(&scx_idle_node_masks[node]->smt, GFP_KERNEL, node));
+ /* Allocate local per-cpu idle cpumasks */
+ for_each_possible_cpu(i) {
+ BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_idle_cpumask, i),
+ GFP_KERNEL, cpu_to_node(i)));
+ BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_llc_idle_cpumask, i),
+ GFP_KERNEL, cpu_to_node(i)));
+ BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_numa_idle_cpumask, i),
+ GFP_KERNEL, cpu_to_node(i)));
}
}
@@ -662,21 +733,12 @@ static void update_builtin_idle(int cpu, bool idle)
*/
void __scx_update_idle(struct rq *rq, bool idle, bool do_notify)
{
+ struct scx_sched *sch = scx_root;
int cpu = cpu_of(rq);
lockdep_assert_rq_held(rq);
/*
- * Trigger ops.update_idle() only when transitioning from a task to
- * the idle thread and vice versa.
- *
- * Idle transitions are indicated by do_notify being set to true,
- * managed by put_prev_task_idle()/set_next_task_idle().
- */
- if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq))
- SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
-
- /*
* Update the idle masks:
* - for real idle transitions (do_notify == true)
* - for idle-to-idle transitions (indicated by the previous task
@@ -693,6 +755,21 @@ void __scx_update_idle(struct rq *rq, bool idle, bool do_notify)
if (static_branch_likely(&scx_builtin_idle_enabled))
if (do_notify || is_idle_task(rq->curr))
update_builtin_idle(cpu, idle);
+
+ /*
+ * Trigger ops.update_idle() only when transitioning from a task to
+ * the idle thread and vice versa.
+ *
+ * Idle transitions are indicated by do_notify being set to true,
+ * managed by put_prev_task_idle()/set_next_task_idle().
+ *
+ * This must come after builtin idle update so that BPF schedulers can
+ * create interlocking between ops.update_idle() and ops.enqueue() -
+ * either enqueue() sees the idle bit or update_idle() sees the task
+ * that enqueue() queued.
+ */
+ if (SCX_HAS_OP(sch, update_idle) && do_notify && !scx_rq_bypassing(rq))
+ SCX_CALL_OP(sch, SCX_KF_REST, update_idle, rq, cpu_of(rq), idle);
}
static void reset_idle_masks(struct sched_ext_ops *ops)
@@ -748,7 +825,7 @@ void scx_idle_disable(void)
static int validate_node(int node)
{
if (!static_branch_likely(&scx_builtin_idle_per_node)) {
- scx_ops_error("per-node idle tracking is disabled");
+ scx_kf_error("per-node idle tracking is disabled");
return -EOPNOTSUPP;
}
@@ -758,13 +835,13 @@ static int validate_node(int node)
/* Make sure node is in a valid range */
if (node < 0 || node >= nr_node_ids) {
- scx_ops_error("invalid node %d", node);
+ scx_kf_error("invalid node %d", node);
return -EINVAL;
}
/* Make sure the node is part of the set of possible nodes */
if (!node_possible(node)) {
- scx_ops_error("unavailable node %d", node);
+ scx_kf_error("unavailable node %d", node);
return -EINVAL;
}
@@ -778,10 +855,72 @@ static bool check_builtin_idle_enabled(void)
if (static_branch_likely(&scx_builtin_idle_enabled))
return true;
- scx_ops_error("built-in idle tracking is disabled");
+ scx_kf_error("built-in idle tracking is disabled");
return false;
}
+s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+ const struct cpumask *allowed, u64 flags)
+{
+ struct rq *rq;
+ struct rq_flags rf;
+ s32 cpu;
+
+ if (!kf_cpu_valid(prev_cpu, NULL))
+ return -EINVAL;
+
+ if (!check_builtin_idle_enabled())
+ return -EBUSY;
+
+ /*
+ * If called from an unlocked context, acquire the task's rq lock,
+ * so that we can safely access p->cpus_ptr and p->nr_cpus_allowed.
+ *
+ * Otherwise, allow to use this kfunc only from ops.select_cpu()
+ * and ops.select_enqueue().
+ */
+ if (scx_kf_allowed_if_unlocked()) {
+ rq = task_rq_lock(p, &rf);
+ } else {
+ if (!scx_kf_allowed(SCX_KF_SELECT_CPU | SCX_KF_ENQUEUE))
+ return -EPERM;
+ rq = scx_locked_rq();
+ }
+
+ /*
+ * Validate locking correctness to access p->cpus_ptr and
+ * p->nr_cpus_allowed: if we're holding an rq lock, we're safe;
+ * otherwise, assert that p->pi_lock is held.
+ */
+ if (!rq)
+ lockdep_assert_held(&p->pi_lock);
+
+#ifdef CONFIG_SMP
+ /*
+ * This may also be called from ops.enqueue(), so we need to handle
+ * per-CPU tasks as well. For these tasks, we can skip all idle CPU
+ * selection optimizations and simply check whether the previously
+ * used CPU is idle and within the allowed cpumask.
+ */
+ if (p->nr_cpus_allowed == 1) {
+ if (cpumask_test_cpu(prev_cpu, allowed ?: p->cpus_ptr) &&
+ scx_idle_test_and_clear_cpu(prev_cpu))
+ cpu = prev_cpu;
+ else
+ cpu = -EBUSY;
+ } else {
+ cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags,
+ allowed ?: p->cpus_ptr, flags);
+ }
+#else
+ cpu = -EBUSY;
+#endif
+ if (scx_kf_allowed_if_unlocked())
+ task_rq_unlock(rq, p, &rf);
+
+ return cpu;
+}
+
/**
* scx_bpf_cpu_node - Return the NUMA node the given @cpu belongs to, or
* trigger an error if @cpu is invalid
@@ -790,7 +929,7 @@ static bool check_builtin_idle_enabled(void)
__bpf_kfunc int scx_bpf_cpu_node(s32 cpu)
{
#ifdef CONFIG_NUMA
- if (!ops_cpu_valid(cpu, NULL))
+ if (!kf_cpu_valid(cpu, NULL))
return NUMA_NO_NODE;
return cpu_to_node(cpu);
@@ -806,9 +945,10 @@ __bpf_kfunc int scx_bpf_cpu_node(s32 cpu)
* @wake_flags: %SCX_WAKE_* flags
* @is_idle: out parameter indicating whether the returned CPU is idle
*
- * Can only be called from ops.select_cpu() if the built-in CPU selection is
- * enabled - ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE is set.
- * @p, @prev_cpu and @wake_flags match ops.select_cpu().
+ * Can be called from ops.select_cpu(), ops.enqueue(), or from an unlocked
+ * context such as a BPF test_run() call, as long as built-in CPU selection
+ * is enabled: ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE
+ * is set.
*
* Returns the picked CPU with *@is_idle indicating whether the picked CPU is
* currently idle and thus a good candidate for direct dispatching.
@@ -816,39 +956,52 @@ __bpf_kfunc int scx_bpf_cpu_node(s32 cpu)
__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
u64 wake_flags, bool *is_idle)
{
-#ifdef CONFIG_SMP
s32 cpu;
-#endif
- if (!ops_cpu_valid(prev_cpu, NULL))
- goto prev_cpu;
- if (!check_builtin_idle_enabled())
- goto prev_cpu;
-
- if (!scx_kf_allowed(SCX_KF_SELECT_CPU))
- goto prev_cpu;
-
-#ifdef CONFIG_SMP
- cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0);
+ cpu = select_cpu_from_kfunc(p, prev_cpu, wake_flags, NULL, 0);
if (cpu >= 0) {
*is_idle = true;
return cpu;
}
-#endif
-
-prev_cpu:
*is_idle = false;
+
return prev_cpu;
}
/**
+ * scx_bpf_select_cpu_and - Pick an idle CPU usable by task @p,
+ * prioritizing those in @cpus_allowed
+ * @p: task_struct to select a CPU for
+ * @prev_cpu: CPU @p was on previously
+ * @wake_flags: %SCX_WAKE_* flags
+ * @cpus_allowed: cpumask of allowed CPUs
+ * @flags: %SCX_PICK_IDLE* flags
+ *
+ * Can be called from ops.select_cpu(), ops.enqueue(), or from an unlocked
+ * context such as a BPF test_run() call, as long as built-in CPU selection
+ * is enabled: ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE
+ * is set.
+ *
+ * @p, @prev_cpu and @wake_flags match ops.select_cpu().
+ *
+ * Returns the selected idle CPU, which will be automatically awakened upon
+ * returning from ops.select_cpu() and can be used for direct dispatch, or
+ * a negative value if no idle CPU is available.
+ */
+__bpf_kfunc s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+ const struct cpumask *cpus_allowed, u64 flags)
+{
+ return select_cpu_from_kfunc(p, prev_cpu, wake_flags, cpus_allowed, flags);
+}
+
+/**
* scx_bpf_get_idle_cpumask_node - Get a referenced kptr to the
* idle-tracking per-CPU cpumask of a target NUMA node.
* @node: target NUMA node
*
* Returns an empty cpumask if idle tracking is not enabled, if @node is
* not valid, or running on a UP kernel. In this case the actual error will
- * be reported to the BPF scheduler via scx_ops_error().
+ * be reported to the BPF scheduler via scx_error().
*/
__bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask_node(int node)
{
@@ -873,7 +1026,7 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask_node(int node)
__bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void)
{
if (static_branch_unlikely(&scx_builtin_idle_per_node)) {
- scx_ops_error("SCX_OPS_BUILTIN_IDLE_PER_NODE enabled");
+ scx_kf_error("SCX_OPS_BUILTIN_IDLE_PER_NODE enabled");
return cpu_none_mask;
}
@@ -895,7 +1048,7 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void)
*
* Returns an empty cpumask if idle tracking is not enabled, if @node is
* not valid, or running on a UP kernel. In this case the actual error will
- * be reported to the BPF scheduler via scx_ops_error().
+ * be reported to the BPF scheduler via scx_error().
*/
__bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask_node(int node)
{
@@ -924,7 +1077,7 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask_node(int node)
__bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask(void)
{
if (static_branch_unlikely(&scx_builtin_idle_per_node)) {
- scx_ops_error("SCX_OPS_BUILTIN_IDLE_PER_NODE enabled");
+ scx_kf_error("SCX_OPS_BUILTIN_IDLE_PER_NODE enabled");
return cpu_none_mask;
}
@@ -971,7 +1124,7 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu)
if (!check_builtin_idle_enabled())
return false;
- if (ops_cpu_valid(cpu, NULL))
+ if (kf_cpu_valid(cpu, NULL))
return scx_idle_test_and_clear_cpu(cpu);
else
return false;
@@ -1032,7 +1185,7 @@ __bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed,
u64 flags)
{
if (static_branch_maybe(CONFIG_NUMA, &scx_builtin_idle_per_node)) {
- scx_ops_error("per-node idle tracking is enabled");
+ scx_kf_error("per-node idle tracking is enabled");
return -EBUSY;
}
@@ -1109,7 +1262,7 @@ __bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed,
s32 cpu;
if (static_branch_maybe(CONFIG_NUMA, &scx_builtin_idle_per_node)) {
- scx_ops_error("per-node idle tracking is enabled");
+ scx_kf_error("per-node idle tracking is enabled");
return -EBUSY;
}
@@ -1140,6 +1293,8 @@ BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu_node, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu_node, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU)
+BTF_ID_FLAGS(func, scx_bpf_select_cpu_and, KF_RCU)
+BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU)
BTF_KFUNCS_END(scx_kfunc_ids_idle)
static const struct btf_kfunc_id_set scx_kfunc_set_idle = {
@@ -1147,21 +1302,11 @@ static const struct btf_kfunc_id_set scx_kfunc_set_idle = {
.set = &scx_kfunc_ids_idle,
};
-BTF_KFUNCS_START(scx_kfunc_ids_select_cpu)
-BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU)
-BTF_KFUNCS_END(scx_kfunc_ids_select_cpu)
-
-static const struct btf_kfunc_id_set scx_kfunc_set_select_cpu = {
- .owner = THIS_MODULE,
- .set = &scx_kfunc_ids_select_cpu,
-};
-
int scx_idle_init(void)
{
int ret;
- ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_select_cpu) ||
- register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_idle) ||
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_idle) ||
register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &scx_kfunc_set_idle) ||
register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &scx_kfunc_set_idle);
diff --git a/kernel/sched/ext_idle.h b/kernel/sched/ext_idle.h
index 511cc2221f7a..37be78a7502b 100644
--- a/kernel/sched/ext_idle.h
+++ b/kernel/sched/ext_idle.h
@@ -27,7 +27,8 @@ static inline s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node
}
#endif /* CONFIG_SMP */
-s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags);
+s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+ const struct cpumask *cpus_allowed, u64 flags);
void scx_idle_enable(struct sched_ext_ops *ops);
void scx_idle_disable(void);
int scx_idle_init(void);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0fb9bf995a47..125912c0e9dd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3795,6 +3795,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
update_entity_lag(cfs_rq, se);
se->deadline -= se->vruntime;
se->rel_deadline = 1;
+ cfs_rq->nr_queued--;
if (!curr)
__dequeue_entity(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
@@ -3821,10 +3822,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
enqueue_load_avg(cfs_rq, se);
if (se->on_rq) {
- update_load_add(&cfs_rq->load, se->load.weight);
place_entity(cfs_rq, se, 0);
+ update_load_add(&cfs_rq->load, se->load.weight);
if (!curr)
__enqueue_entity(cfs_rq, se);
+ cfs_rq->nr_queued++;
/*
* The entity's vruntime has been adjusted, so let's check
@@ -4933,13 +4935,6 @@ static inline void util_est_update(struct cfs_rq *cfs_rq,
goto done;
/*
- * To avoid overestimation of actual task utilization, skip updates if
- * we cannot grant there is idle time in this CPU.
- */
- if (dequeued > arch_scale_cpu_capacity(cpu_of(rq_of(cfs_rq))))
- return;
-
- /*
* To avoid underestimate of task utilization, skip updates of EWMA if
* we cannot grant that thread got all CPU time it wanted.
*/
@@ -6941,7 +6936,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
* Let's add the task's estimated utilization to the cfs_rq's
* estimated utilization, before we update schedutil.
*/
- if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
+ if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
util_est_enqueue(&rq->cfs, p);
if (flags & ENQUEUE_DELAYED) {
@@ -7181,7 +7176,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
*/
static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
- if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
+ if (!p->se.sched_delayed)
util_est_dequeue(&rq->cfs, p);
util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
@@ -7196,6 +7191,11 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
return true;
}
+static inline unsigned int cfs_h_nr_delayed(struct rq *rq)
+{
+ return (rq->cfs.h_nr_queued - rq->cfs.h_nr_runnable);
+}
+
#ifdef CONFIG_SMP
/* Working cpumask for: sched_balance_rq(), sched_balance_newidle(). */
@@ -7357,8 +7357,12 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
- if (sync && cpu_rq(this_cpu)->nr_running == 1)
- return this_cpu;
+ if (sync) {
+ struct rq *rq = cpu_rq(this_cpu);
+
+ if ((rq->nr_running - cfs_h_nr_delayed(rq)) == 1)
+ return this_cpu;
+ }
if (available_idle_cpu(prev_cpu))
return prev_cpu;
@@ -10256,7 +10260,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group
(sgs->group_weight - sgs->idle_cpus != 1))
return false;
- return sched_asym(env->sd, env->dst_cpu, group->asym_prefer_cpu);
+ return sched_asym(env->sd, env->dst_cpu, READ_ONCE(group->asym_prefer_cpu));
}
/* One group has more than one SMT CPU while the other group does not */
@@ -10493,7 +10497,8 @@ static bool update_sd_pick_busiest(struct lb_env *env,
case group_asym_packing:
/* Prefer to move from lowest priority CPU's work */
- return sched_asym_prefer(sds->busiest->asym_prefer_cpu, sg->asym_prefer_cpu);
+ return sched_asym_prefer(READ_ONCE(sds->busiest->asym_prefer_cpu),
+ READ_ONCE(sg->asym_prefer_cpu));
case group_misfit_task:
/*
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 81bc8b329ef1..93b038d48900 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -40,7 +40,7 @@ int housekeeping_any_cpu(enum hk_type type)
if (cpu < nr_cpu_ids)
return cpu;
- cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
+ cpu = cpumask_any_and_distribute(housekeeping.cpumasks[type], cpu_online_mask);
if (likely(cpu < nr_cpu_ids))
return cpu;
/*
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index fa03ec3ed56a..e40422c37033 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -89,6 +89,7 @@ void init_rt_rq(struct rt_rq *rt_rq)
rt_rq->rt_throttled = 0;
rt_rq->rt_runtime = 0;
raw_spin_lock_init(&rt_rq->rt_runtime_lock);
+ rt_rq->tg = &root_task_group;
#endif
}
@@ -175,11 +176,14 @@ static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
+ /* Cannot fold with non-CONFIG_RT_GROUP_SCHED version, layout */
+ WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group);
return rt_rq->rq;
}
static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
{
+ WARN_ON(!rt_group_sched_enabled() && rt_se->rt_rq->tg != &root_task_group);
return rt_se->rt_rq;
}
@@ -187,11 +191,15 @@ static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
{
struct rt_rq *rt_rq = rt_se->rt_rq;
+ WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group);
return rt_rq->rq;
}
void unregister_rt_sched_group(struct task_group *tg)
{
+ if (!rt_group_sched_enabled())
+ return;
+
if (tg->rt_se)
destroy_rt_bandwidth(&tg->rt_bandwidth);
}
@@ -200,6 +208,9 @@ void free_rt_sched_group(struct task_group *tg)
{
int i;
+ if (!rt_group_sched_enabled())
+ return;
+
for_each_possible_cpu(i) {
if (tg->rt_rq)
kfree(tg->rt_rq[i]);
@@ -244,6 +255,9 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
struct sched_rt_entity *rt_se;
int i;
+ if (!rt_group_sched_enabled())
+ return 1;
+
tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
if (!tg->rt_rq)
goto err;
@@ -482,9 +496,6 @@ static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
- if (!rt_rq->tg)
- return RUNTIME_INF;
-
return rt_rq->rt_runtime;
}
@@ -497,6 +508,11 @@ typedef struct task_group *rt_rq_iter_t;
static inline struct task_group *next_task_group(struct task_group *tg)
{
+ if (!rt_group_sched_enabled()) {
+ WARN_ON(tg != &root_task_group);
+ return NULL;
+ }
+
do {
tg = list_entry_rcu(tg->list.next,
typeof(struct task_group), list);
@@ -509,9 +525,9 @@ static inline struct task_group *next_task_group(struct task_group *tg)
}
#define for_each_rt_rq(rt_rq, iter, rq) \
- for (iter = container_of(&task_groups, typeof(*iter), list); \
- (iter = next_task_group(iter)) && \
- (rt_rq = iter->rt_rq[cpu_of(rq)]);)
+ for (iter = &root_task_group; \
+ iter && (rt_rq = iter->rt_rq[cpu_of(rq)]); \
+ iter = next_task_group(iter))
#define for_each_sched_rt_entity(rt_se) \
for (; rt_se; rt_se = rt_se->parent)
@@ -1066,13 +1082,12 @@ inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
{
struct rq *rq = rq_of_rt_rq(rt_rq);
-#ifdef CONFIG_RT_GROUP_SCHED
/*
* Change rq's cpupri only if rt_rq is the top queue.
*/
- if (&rq->rt != rt_rq)
+ if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq)
return;
-#endif
+
if (rq->online && prio < prev_prio)
cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
}
@@ -1082,13 +1097,12 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
{
struct rq *rq = rq_of_rt_rq(rt_rq);
-#ifdef CONFIG_RT_GROUP_SCHED
/*
* Change rq's cpupri only if rt_rq is the top queue.
*/
- if (&rq->rt != rt_rq)
+ if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq)
return;
-#endif
+
if (rq->online && rt_rq->highest_prio.curr != prev_prio)
cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
}
@@ -1156,8 +1170,7 @@ inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
if (rt_se_boosted(rt_se))
rt_rq->rt_nr_boosted++;
- if (rt_rq->tg)
- start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
+ start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
}
static void
@@ -1257,11 +1270,9 @@ static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_arr
static inline struct sched_statistics *
__schedstats_from_rt_se(struct sched_rt_entity *rt_se)
{
-#ifdef CONFIG_RT_GROUP_SCHED
/* schedstats is not supported for rt group. */
if (!rt_entity_is_task(rt_se))
return NULL;
-#endif
return &rt_task_of(rt_se)->stats;
}
@@ -1883,6 +1894,27 @@ static int find_lowest_rq(struct task_struct *task)
return -1;
}
+static struct task_struct *pick_next_pushable_task(struct rq *rq)
+{
+ struct task_struct *p;
+
+ if (!has_pushable_tasks(rq))
+ return NULL;
+
+ p = plist_first_entry(&rq->rt.pushable_tasks,
+ struct task_struct, pushable_tasks);
+
+ BUG_ON(rq->cpu != task_cpu(p));
+ BUG_ON(task_current(rq, p));
+ BUG_ON(task_current_donor(rq, p));
+ BUG_ON(p->nr_cpus_allowed <= 1);
+
+ BUG_ON(!task_on_rq_queued(p));
+ BUG_ON(!rt_task(p));
+
+ return p;
+}
+
/* Will lock the rq it finds */
static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
{
@@ -1913,18 +1945,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
/*
* We had to unlock the run queue. In
* the mean time, task could have
- * migrated already or had its affinity changed.
- * Also make sure that it wasn't scheduled on its rq.
+ * migrated already or had its affinity changed,
+ * therefore check if the task is still at the
+ * head of the pushable tasks list.
* It is possible the task was scheduled, set
* "migrate_disabled" and then got preempted, so we must
* check the task migration disable flag here too.
*/
- if (unlikely(task_rq(task) != rq ||
+ if (unlikely(is_migration_disabled(task) ||
!cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
- task_on_cpu(rq, task) ||
- !rt_task(task) ||
- is_migration_disabled(task) ||
- !task_on_rq_queued(task))) {
+ task != pick_next_pushable_task(rq))) {
double_unlock_balance(rq, lowest_rq);
lowest_rq = NULL;
@@ -1944,27 +1974,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
return lowest_rq;
}
-static struct task_struct *pick_next_pushable_task(struct rq *rq)
-{
- struct task_struct *p;
-
- if (!has_pushable_tasks(rq))
- return NULL;
-
- p = plist_first_entry(&rq->rt.pushable_tasks,
- struct task_struct, pushable_tasks);
-
- BUG_ON(rq->cpu != task_cpu(p));
- BUG_ON(task_current(rq, p));
- BUG_ON(task_current_donor(rq, p));
- BUG_ON(p->nr_cpus_allowed <= 1);
-
- BUG_ON(!task_on_rq_queued(p));
- BUG_ON(!rt_task(p));
-
- return p;
-}
-
/*
* If the current CPU has more than one RT task, see if the non
* running task can migrate over to a CPU that is running a task
@@ -2602,8 +2611,9 @@ static int task_is_throttled_rt(struct task_struct *p, int cpu)
{
struct rt_rq *rt_rq;
-#ifdef CONFIG_RT_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED // XXX maybe add task_rt_rq(), see also sched_rt_period_rt_rq
rt_rq = task_group(p)->rt_rq[cpu];
+ WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group);
#else
rt_rq = &cpu_rq(cpu)->rt;
#endif
@@ -2713,6 +2723,9 @@ static int tg_rt_schedulable(struct task_group *tg, void *data)
tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
return -EBUSY;
+ if (WARN_ON(!rt_group_sched_enabled() && tg != &root_task_group))
+ return -EBUSY;
+
total = to_ratio(period, runtime);
/*
@@ -2868,7 +2881,7 @@ static int sched_rt_global_constraints(void)
int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
{
/* Don't accept real-time tasks when there is no way for them to run */
- if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
+ if (rt_group_sched_enabled() && rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
return 0;
return 1;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 47972f34ea70..475bb5998295 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -813,15 +813,17 @@ struct rt_rq {
#ifdef CONFIG_RT_GROUP_SCHED
int rt_throttled;
- u64 rt_time;
- u64 rt_runtime;
+ u64 rt_time; /* consumed RT time, goes up in update_curr_rt */
+ u64 rt_runtime; /* allotted RT time, "slice" from rt_bandwidth, RT sharing/balancing */
/* Nests inside the rq lock: */
raw_spinlock_t rt_runtime_lock;
unsigned int rt_nr_boosted;
- struct rq *rq;
- struct task_group *tg;
+ struct rq *rq; /* this is always top-level rq, cache? */
+#endif
+#ifdef CONFIG_CGROUP_SCHED
+ struct task_group *tg; /* this tg has "this" rt_rq on given CPU for runnable entities */
#endif
};
@@ -1498,6 +1500,23 @@ static inline bool sched_group_cookie_match(struct rq *rq,
}
#endif /* !CONFIG_SCHED_CORE */
+#ifdef CONFIG_RT_GROUP_SCHED
+# ifdef CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED
+DECLARE_STATIC_KEY_FALSE(rt_group_sched);
+static inline bool rt_group_sched_enabled(void)
+{
+ return static_branch_unlikely(&rt_group_sched);
+}
+# else
+DECLARE_STATIC_KEY_TRUE(rt_group_sched);
+static inline bool rt_group_sched_enabled(void)
+{
+ return static_branch_likely(&rt_group_sched);
+}
+# endif /* CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED */
+#else
+# define rt_group_sched_enabled() false
+#endif /* CONFIG_RT_GROUP_SCHED */
static inline void lockdep_assert_rq_held(struct rq *rq)
{
@@ -1717,10 +1736,10 @@ extern struct balance_callback balance_push_callback;
#ifdef CONFIG_SCHED_CLASS_EXT
extern const struct sched_class ext_sched_class;
-DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */
+DECLARE_STATIC_KEY_FALSE(__scx_enabled); /* SCX BPF scheduler loaded */
DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */
-#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled)
+#define scx_enabled() static_branch_unlikely(&__scx_enabled)
#define scx_switched_all() static_branch_unlikely(&__scx_switched_all)
static inline void scx_rq_clock_update(struct rq *rq, u64 clock)
@@ -2146,6 +2165,13 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
#endif
#ifdef CONFIG_RT_GROUP_SCHED
+ /*
+ * p->rt.rt_rq is NULL initially and it is easier to assign
+ * root_task_group's rt_rq than switching in rt_rq_of_se()
+ * Clobbers tg(!)
+ */
+ if (!rt_group_sched_enabled())
+ tg = &root_task_group;
p->rt.rt_rq = tg->rt_rq[cpu];
p->rt.parent = tg->rt_se[cpu];
#endif
@@ -3509,8 +3535,6 @@ static inline bool sched_energy_enabled(void)
return static_branch_unlikely(&sched_energy_present);
}
-extern struct cpufreq_governor schedutil_gov;
-
#else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
#define perf_domain_span(pd) NULL
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index c326de1344fb..547c1f05b667 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -634,13 +634,14 @@ change:
* Do not allow real-time tasks into groups that have no runtime
* assigned.
*/
- if (rt_bandwidth_enabled() && rt_policy(policy) &&
+ if (rt_group_sched_enabled() &&
+ rt_bandwidth_enabled() && rt_policy(policy) &&
task_group(p)->rt_bandwidth.rt_runtime == 0 &&
!task_group_is_autogroup(task_group(p))) {
retval = -EPERM;
goto unlock;
}
-#endif
+#endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_SMP
if (dl_bandwidth_enabled() && dl_policy(policy) &&
!(attr->sched_flags & SCHED_FLAG_SUGOV)) {
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index f1ebc60d967f..b958fe48e020 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -212,8 +212,6 @@ static bool sched_energy_update;
static bool sched_is_eas_possible(const struct cpumask *cpu_mask)
{
bool any_asym_capacity = false;
- struct cpufreq_policy *policy;
- struct cpufreq_governor *gov;
int i;
/* EAS is enabled for asymmetric CPU capacity topologies. */
@@ -248,25 +246,12 @@ static bool sched_is_eas_possible(const struct cpumask *cpu_mask)
return false;
}
- /* Do not attempt EAS if schedutil is not being used. */
- for_each_cpu(i, cpu_mask) {
- policy = cpufreq_cpu_get(i);
- if (!policy) {
- if (sched_debug()) {
- pr_info("rd %*pbl: Checking EAS, cpufreq policy not set for CPU: %d",
- cpumask_pr_args(cpu_mask), i);
- }
- return false;
- }
- gov = policy->governor;
- cpufreq_cpu_put(policy);
- if (gov != &schedutil_gov) {
- if (sched_debug()) {
- pr_info("rd %*pbl: Checking EAS, schedutil is mandatory\n",
- cpumask_pr_args(cpu_mask));
- }
- return false;
+ if (!cpufreq_ready_for_eas(cpu_mask)) {
+ if (sched_debug()) {
+ pr_info("rd %*pbl: Checking EAS: cpufreq is not ready\n",
+ cpumask_pr_args(cpu_mask));
}
+ return false;
}
return true;
@@ -1333,6 +1318,64 @@ next:
update_group_capacity(sd, cpu);
}
+#ifdef CONFIG_SMP
+
+/* Update the "asym_prefer_cpu" when arch_asym_cpu_priority() changes. */
+void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio)
+{
+ int asym_prefer_cpu = cpu;
+ struct sched_domain *sd;
+
+ guard(rcu)();
+
+ for_each_domain(cpu, sd) {
+ struct sched_group *sg;
+ int group_cpu;
+
+ if (!(sd->flags & SD_ASYM_PACKING))
+ continue;
+
+ /*
+ * Groups of overlapping domain are replicated per NUMA
+ * node and will require updating "asym_prefer_cpu" on
+ * each local copy.
+ *
+ * If you are hitting this warning, consider moving
+ * "sg->asym_prefer_cpu" to "sg->sgc->asym_prefer_cpu"
+ * which is shared by all the overlapping groups.
+ */
+ WARN_ON_ONCE(sd->flags & SD_OVERLAP);
+
+ sg = sd->groups;
+ if (cpu != sg->asym_prefer_cpu) {
+ /*
+ * Since the parent is a superset of the current group,
+ * if the cpu is not the "asym_prefer_cpu" at the
+ * current level, it cannot be the preferred CPU at a
+ * higher levels either.
+ */
+ if (!sched_asym_prefer(cpu, sg->asym_prefer_cpu))
+ return;
+
+ WRITE_ONCE(sg->asym_prefer_cpu, cpu);
+ continue;
+ }
+
+ /* Ranking has improved; CPU is still the preferred one. */
+ if (new_prio >= old_prio)
+ continue;
+
+ for_each_cpu(group_cpu, sched_group_span(sg)) {
+ if (sched_asym_prefer(group_cpu, asym_prefer_cpu))
+ asym_prefer_cpu = group_cpu;
+ }
+
+ WRITE_ONCE(sg->asym_prefer_cpu, asym_prefer_cpu);
+ }
+}
+
+#endif /* CONFIG_SMP */
+
/*
* Set of available CPUs grouped by their corresponding capacities
* Each list entry contains a CPU mask reflecting CPUs that share the same
@@ -2098,7 +2141,7 @@ int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
for (i = 0; i < sched_domains_numa_levels; i++) {
if (!masks[i][j])
break;
- cpu = cpumask_any_and(cpus, masks[i][j]);
+ cpu = cpumask_any_and_distribute(cpus, masks[i][j]);
if (cpu < nr_cpu_ids) {
found = cpu;
break;
@@ -2347,35 +2390,54 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
/*
* Ensure topology masks are sane, i.e. there are no conflicts (overlaps) for
- * any two given CPUs at this (non-NUMA) topology level.
+ * any two given CPUs on non-NUMA topology levels.
*/
-static bool topology_span_sane(struct sched_domain_topology_level *tl,
- const struct cpumask *cpu_map, int cpu)
+static bool topology_span_sane(const struct cpumask *cpu_map)
{
- int i = cpu + 1;
+ struct sched_domain_topology_level *tl;
+ struct cpumask *covered, *id_seen;
+ int cpu;
- /* NUMA levels are allowed to overlap */
- if (tl->flags & SDTL_OVERLAP)
- return true;
+ lockdep_assert_held(&sched_domains_mutex);
+ covered = sched_domains_tmpmask;
+ id_seen = sched_domains_tmpmask2;
+
+ for_each_sd_topology(tl) {
+
+ /* NUMA levels are allowed to overlap */
+ if (tl->flags & SDTL_OVERLAP)
+ continue;
+
+ cpumask_clear(covered);
+ cpumask_clear(id_seen);
- /*
- * Non-NUMA levels cannot partially overlap - they must be either
- * completely equal or completely disjoint. Otherwise we can end up
- * breaking the sched_group lists - i.e. a later get_group() pass
- * breaks the linking done for an earlier span.
- */
- for_each_cpu_from(i, cpu_map) {
/*
- * We should 'and' all those masks with 'cpu_map' to exactly
- * match the topology we're about to build, but that can only
- * remove CPUs, which only lessens our ability to detect
- * overlaps
+ * Non-NUMA levels cannot partially overlap - they must be either
+ * completely equal or completely disjoint. Otherwise we can end up
+ * breaking the sched_group lists - i.e. a later get_group() pass
+ * breaks the linking done for an earlier span.
*/
- if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&
- cpumask_intersects(tl->mask(cpu), tl->mask(i)))
- return false;
- }
+ for_each_cpu(cpu, cpu_map) {
+ const struct cpumask *tl_cpu_mask = tl->mask(cpu);
+ int id;
+
+ /* lowest bit set in this mask is used as a unique id */
+ id = cpumask_first(tl_cpu_mask);
+
+ if (cpumask_test_cpu(id, id_seen)) {
+ /* First CPU has already been seen, ensure identical spans */
+ if (!cpumask_equal(tl->mask(id), tl_cpu_mask))
+ return false;
+ } else {
+ /* First CPU hasn't been seen before, ensure it's a completely new span */
+ if (cpumask_intersects(tl_cpu_mask, covered))
+ return false;
+ cpumask_or(covered, covered, tl_cpu_mask);
+ cpumask_set_cpu(id, id_seen);
+ }
+ }
+ }
return true;
}
@@ -2408,9 +2470,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
sd = NULL;
for_each_sd_topology(tl) {
- if (WARN_ON(!topology_span_sane(tl, cpu_map, i)))
- goto error;
-
sd = build_sched_domain(tl, cpu_map, attr, sd, i);
has_asym |= sd->flags & SD_ASYM_CPUCAPACITY;
@@ -2424,6 +2483,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
}
}
+ if (WARN_ON(!topology_span_sane(cpu_map)))
+ goto error;
+
/* Build the groups for the domains */
for_each_cpu(i, cpu_map) {
for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
diff --git a/kernel/signal.c b/kernel/signal.c
index f8859faa26c5..148082db9a55 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -4981,9 +4981,20 @@ static const struct ctl_table signal_debug_table[] = {
#endif
};
+static const struct ctl_table signal_table[] = {
+ {
+ .procname = "print-fatal-signals",
+ .data = &print_fatal_signals,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+};
+
static int __init init_signal_sysctls(void)
{
register_sysctl_init("debug", signal_debug_table);
+ register_sysctl_init("kernel", signal_table);
return 0;
}
early_initcall(init_signal_sysctls);
diff --git a/kernel/sys.c b/kernel/sys.c
index c434968e9f5d..adc0de0aa364 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -52,6 +52,7 @@
#include <linux/user_namespace.h>
#include <linux/time_namespace.h>
#include <linux/binfmts.h>
+#include <linux/futex.h>
#include <linux/sched.h>
#include <linux/sched/autogroup.h>
@@ -2820,6 +2821,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
return -EINVAL;
error = posixtimer_create_prctl(arg2);
break;
+ case PR_FUTEX_HASH:
+ error = futex_hash_prctl(arg2, arg3, arg4);
+ break;
default:
trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5);
error = -EINVAL;
diff --git a/kernel/sysctl-test.c b/kernel/sysctl-test.c
index eb2842bd0557..92f94ea28957 100644
--- a/kernel/sysctl-test.c
+++ b/kernel/sysctl-test.c
@@ -367,54 +367,6 @@ static void sysctl_test_api_dointvec_write_single_greater_int_max(
KUNIT_EXPECT_EQ(test, 0, *((int *)table.data));
}
-/*
- * Test that registering an invalid extra value is not allowed.
- */
-static void sysctl_test_register_sysctl_sz_invalid_extra_value(
- struct kunit *test)
-{
- unsigned char data = 0;
- const struct ctl_table table_foo[] = {
- {
- .procname = "foo",
- .data = &data,
- .maxlen = sizeof(u8),
- .mode = 0644,
- .proc_handler = proc_dou8vec_minmax,
- .extra1 = SYSCTL_FOUR,
- .extra2 = SYSCTL_ONE_THOUSAND,
- },
- };
-
- const struct ctl_table table_bar[] = {
- {
- .procname = "bar",
- .data = &data,
- .maxlen = sizeof(u8),
- .mode = 0644,
- .proc_handler = proc_dou8vec_minmax,
- .extra1 = SYSCTL_NEG_ONE,
- .extra2 = SYSCTL_ONE_HUNDRED,
- },
- };
-
- const struct ctl_table table_qux[] = {
- {
- .procname = "qux",
- .data = &data,
- .maxlen = sizeof(u8),
- .mode = 0644,
- .proc_handler = proc_dou8vec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_TWO_HUNDRED,
- },
- };
-
- KUNIT_EXPECT_NULL(test, register_sysctl("foo", table_foo));
- KUNIT_EXPECT_NULL(test, register_sysctl("foo", table_bar));
- KUNIT_EXPECT_NOT_NULL(test, register_sysctl("foo", table_qux));
-}
-
static struct kunit_case sysctl_test_cases[] = {
KUNIT_CASE(sysctl_test_api_dointvec_null_tbl_data),
KUNIT_CASE(sysctl_test_api_dointvec_table_maxlen_unset),
@@ -426,7 +378,6 @@ static struct kunit_case sysctl_test_cases[] = {
KUNIT_CASE(sysctl_test_dointvec_write_happy_single_negative),
KUNIT_CASE(sysctl_test_api_dointvec_write_single_less_int_min),
KUNIT_CASE(sysctl_test_api_dointvec_write_single_greater_int_max),
- KUNIT_CASE(sysctl_test_register_sysctl_sz_invalid_extra_value),
{}
};
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3b7a7308e35b..9b4f0cff76ea 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -22,8 +22,6 @@
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/bitmap.h>
-#include <linux/signal.h>
-#include <linux/panic.h>
#include <linux/printk.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
@@ -46,7 +44,6 @@
#include <linux/nfs_fs.h>
#include <linux/acpi.h>
#include <linux/reboot.h>
-#include <linux/ftrace.h>
#include <linux/kmod.h>
#include <linux/capability.h>
#include <linux/binfmts.h>
@@ -61,12 +58,8 @@
#ifdef CONFIG_X86
#include <asm/nmi.h>
-#include <asm/stacktrace.h>
#include <asm/io.h>
#endif
-#ifdef CONFIG_SPARC
-#include <asm/setup.h>
-#endif
#ifdef CONFIG_RT_MUTEXES
#include <linux/rtmutex.h>
#endif
@@ -1588,13 +1581,6 @@ int proc_do_static_key(const struct ctl_table *table, int write,
}
static const struct ctl_table kern_table[] = {
- {
- .procname = "panic",
- .data = &panic_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
@@ -1612,45 +1598,6 @@ static const struct ctl_table kern_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif
- {
- .procname = "print-fatal-signals",
- .data = &print_fatal_signals,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#ifdef CONFIG_SPARC
- {
- .procname = "reboot-cmd",
- .data = reboot_command,
- .maxlen = 256,
- .mode = 0644,
- .proc_handler = proc_dostring,
- },
- {
- .procname = "stop-a",
- .data = &stop_a_enabled,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "scons-poweroff",
- .data = &scons_pwroff,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
-#ifdef CONFIG_SPARC64
- {
- .procname = "tsb-ratio",
- .data = &sysctl_tsb_ratio,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
#ifdef CONFIG_PARISC
{
.procname = "soft-power",
@@ -1669,38 +1616,6 @@ static const struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-#ifdef CONFIG_STACK_TRACER
- {
- .procname = "stack_tracer_enabled",
- .data = &stack_tracer_enabled,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = stack_trace_sysctl,
- },
-#endif
-#ifdef CONFIG_TRACING
- {
- .procname = "ftrace_dump_on_oops",
- .data = &ftrace_dump_on_oops,
- .maxlen = MAX_TRACER_SIZE,
- .mode = 0644,
- .proc_handler = proc_dostring,
- },
- {
- .procname = "traceoff_on_warning",
- .data = &__disable_trace_on_warning,
- .maxlen = sizeof(__disable_trace_on_warning),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tracepoint_printk",
- .data = &tracepoint_printk,
- .maxlen = sizeof(tracepoint_printk),
- .mode = 0644,
- .proc_handler = tracepoint_printk_sysctl,
- },
-#endif
#ifdef CONFIG_MODULES
{
.procname = "modprobe",
@@ -1773,20 +1688,6 @@ static const struct ctl_table kern_table[] = {
.extra2 = SYSCTL_MAXOLDUID,
},
{
- .procname = "panic_on_oops",
- .data = &panic_on_oops,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "panic_print",
- .data = &panic_print,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- },
- {
.procname = "ngroups_max",
.data = (void *)&ngroups_max,
.maxlen = sizeof (int),
@@ -1837,15 +1738,6 @@ static const struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
- {
- .procname = "panic_on_warn",
- .data = &panic_on_warn,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
#ifdef CONFIG_TREE_RCU
{
.procname = "panic_on_rcu_stall",
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 0ddccdff119a..577f0e6842d4 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -70,12 +70,10 @@ static DEFINE_SPINLOCK(rtcdev_lock);
*/
struct rtc_device *alarmtimer_get_rtcdev(void)
{
- unsigned long flags;
struct rtc_device *ret;
- spin_lock_irqsave(&rtcdev_lock, flags);
+ guard(spinlock_irqsave)(&rtcdev_lock);
ret = rtcdev;
- spin_unlock_irqrestore(&rtcdev_lock, flags);
return ret;
}
@@ -83,7 +81,6 @@ EXPORT_SYMBOL_GPL(alarmtimer_get_rtcdev);
static int alarmtimer_rtc_add_device(struct device *dev)
{
- unsigned long flags;
struct rtc_device *rtc = to_rtc_device(dev);
struct platform_device *pdev;
int ret = 0;
@@ -101,25 +98,18 @@ static int alarmtimer_rtc_add_device(struct device *dev)
if (!IS_ERR(pdev))
device_init_wakeup(&pdev->dev, true);
- spin_lock_irqsave(&rtcdev_lock, flags);
- if (!IS_ERR(pdev) && !rtcdev) {
- if (!try_module_get(rtc->owner)) {
+ scoped_guard(spinlock_irqsave, &rtcdev_lock) {
+ if (!IS_ERR(pdev) && !rtcdev && try_module_get(rtc->owner)) {
+ rtcdev = rtc;
+ /* hold a reference so it doesn't go away */
+ get_device(dev);
+ pdev = NULL;
+ } else {
ret = -1;
- goto unlock;
}
-
- rtcdev = rtc;
- /* hold a reference so it doesn't go away */
- get_device(dev);
- pdev = NULL;
- } else {
- ret = -1;
}
-unlock:
- spin_unlock_irqrestore(&rtcdev_lock, flags);
platform_device_unregister(pdev);
-
return ret;
}
@@ -198,7 +188,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
struct alarm *alarm = container_of(timer, struct alarm, timer);
struct alarm_base *base = &alarm_bases[alarm->type];
- scoped_guard (spinlock_irqsave, &base->lock)
+ scoped_guard(spinlock_irqsave, &base->lock)
alarmtimer_dequeue(base, alarm);
if (alarm->function)
@@ -228,17 +218,16 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
static int alarmtimer_suspend(struct device *dev)
{
ktime_t min, now, expires;
- int i, ret, type;
struct rtc_device *rtc;
- unsigned long flags;
struct rtc_time tm;
+ int i, ret, type;
- spin_lock_irqsave(&freezer_delta_lock, flags);
- min = freezer_delta;
- expires = freezer_expires;
- type = freezer_alarmtype;
- freezer_delta = 0;
- spin_unlock_irqrestore(&freezer_delta_lock, flags);
+ scoped_guard(spinlock_irqsave, &freezer_delta_lock) {
+ min = freezer_delta;
+ expires = freezer_expires;
+ type = freezer_alarmtype;
+ freezer_delta = 0;
+ }
rtc = alarmtimer_get_rtcdev();
/* If we have no rtcdev, just return */
@@ -251,9 +240,8 @@ static int alarmtimer_suspend(struct device *dev)
struct timerqueue_node *next;
ktime_t delta;
- spin_lock_irqsave(&base->lock, flags);
- next = timerqueue_getnext(&base->timerqueue);
- spin_unlock_irqrestore(&base->lock, flags);
+ scoped_guard(spinlock_irqsave, &base->lock)
+ next = timerqueue_getnext(&base->timerqueue);
if (!next)
continue;
delta = ktime_sub(next->expires, base->get_ktime());
@@ -352,13 +340,12 @@ EXPORT_SYMBOL_GPL(alarm_init);
void alarm_start(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
- unsigned long flags;
- spin_lock_irqsave(&base->lock, flags);
- alarm->node.expires = start;
- alarmtimer_enqueue(base, alarm);
- hrtimer_start(&alarm->timer, alarm->node.expires, HRTIMER_MODE_ABS);
- spin_unlock_irqrestore(&base->lock, flags);
+ scoped_guard(spinlock_irqsave, &base->lock) {
+ alarm->node.expires = start;
+ alarmtimer_enqueue(base, alarm);
+ hrtimer_start(&alarm->timer, alarm->node.expires, HRTIMER_MODE_ABS);
+ }
trace_alarmtimer_start(alarm, base->get_ktime());
}
@@ -381,13 +368,11 @@ EXPORT_SYMBOL_GPL(alarm_start_relative);
void alarm_restart(struct alarm *alarm)
{
struct alarm_base *base = &alarm_bases[alarm->type];
- unsigned long flags;
- spin_lock_irqsave(&base->lock, flags);
+ guard(spinlock_irqsave)(&base->lock);
hrtimer_set_expires(&alarm->timer, alarm->node.expires);
hrtimer_restart(&alarm->timer);
alarmtimer_enqueue(base, alarm);
- spin_unlock_irqrestore(&base->lock, flags);
}
EXPORT_SYMBOL_GPL(alarm_restart);
@@ -401,14 +386,13 @@ EXPORT_SYMBOL_GPL(alarm_restart);
int alarm_try_to_cancel(struct alarm *alarm)
{
struct alarm_base *base = &alarm_bases[alarm->type];
- unsigned long flags;
int ret;
- spin_lock_irqsave(&base->lock, flags);
- ret = hrtimer_try_to_cancel(&alarm->timer);
- if (ret >= 0)
- alarmtimer_dequeue(base, alarm);
- spin_unlock_irqrestore(&base->lock, flags);
+ scoped_guard(spinlock_irqsave, &base->lock) {
+ ret = hrtimer_try_to_cancel(&alarm->timer);
+ if (ret >= 0)
+ alarmtimer_dequeue(base, alarm);
+ }
trace_alarmtimer_cancel(alarm, base->get_ktime());
return ret;
@@ -479,7 +463,6 @@ EXPORT_SYMBOL_GPL(alarm_forward_now);
static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
{
struct alarm_base *base;
- unsigned long flags;
ktime_t delta;
switch(type) {
@@ -498,13 +481,12 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
delta = ktime_sub(absexp, base->get_ktime());
- spin_lock_irqsave(&freezer_delta_lock, flags);
+ guard(spinlock_irqsave)(&freezer_delta_lock);
if (!freezer_delta || (delta < freezer_delta)) {
freezer_delta = delta;
freezer_expires = absexp;
freezer_alarmtype = type;
}
- spin_unlock_irqrestore(&freezer_delta_lock, flags);
}
/**
@@ -515,9 +497,9 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid)
{
if (clockid == CLOCK_REALTIME_ALARM)
return ALARM_REALTIME;
- if (clockid == CLOCK_BOOTTIME_ALARM)
- return ALARM_BOOTTIME;
- return -1;
+
+ WARN_ON_ONCE(clockid != CLOCK_BOOTTIME_ALARM);
+ return ALARM_BOOTTIME;
}
/**
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index bb48498ebb5a..6a8bc7da9062 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -310,7 +310,7 @@ static void clocksource_verify_choose_cpus(void)
{
int cpu, i, n = verify_n_cpus;
- if (n < 0) {
+ if (n < 0 || n >= num_online_cpus()) {
/* Check all of the CPUs. */
cpumask_copy(&cpus_chosen, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index bc4db9e5ab70..34eeacac2253 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -75,13 +75,11 @@ struct clocksource * __init __weak clocksource_default_clock(void)
static struct clocksource refined_jiffies;
-int register_refined_jiffies(long cycles_per_second)
+void __init register_refined_jiffies(long cycles_per_second)
{
u64 nsec_per_tick, shift_hz;
long cycles_per_tick;
-
-
refined_jiffies = clocksource_jiffies;
refined_jiffies.name = "refined-jiffies";
refined_jiffies.rating++;
@@ -100,5 +98,4 @@ int register_refined_jiffies(long cycles_per_second)
refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
__clocksource_register(&refined_jiffies);
- return 0;
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 6222112533a7..2053b1a4c9e4 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -30,8 +30,6 @@
#include "timekeeping.h"
#include "posix-timers.h"
-static struct kmem_cache *posix_timers_cache;
-
/*
* Timers are managed in a hash table for lockless lookup. The hash key is
* constructed from current::signal and the timer ID and the timer is
@@ -49,10 +47,12 @@ struct timer_hash_bucket {
static struct {
struct timer_hash_bucket *buckets;
unsigned long mask;
-} __timer_data __ro_after_init __aligned(2*sizeof(long));
+ struct kmem_cache *cache;
+} __timer_data __ro_after_init __aligned(4*sizeof(long));
-#define timer_buckets (__timer_data.buckets)
-#define timer_hashmask (__timer_data.mask)
+#define timer_buckets (__timer_data.buckets)
+#define timer_hashmask (__timer_data.mask)
+#define posix_timers_cache (__timer_data.cache)
static const struct k_clock * const posix_clocks[];
static const struct k_clock *clockid_to_kclock(const clockid_t id);
@@ -283,14 +283,6 @@ static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp)
return 0;
}
-static __init int init_posix_timers(void)
-{
- posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof(struct k_itimer),
- __alignof__(struct k_itimer), SLAB_ACCOUNT, NULL);
- return 0;
-}
-__initcall(init_posix_timers);
-
/*
* The siginfo si_overrun field and the return value of timer_getoverrun(2)
* are of type int. Clamp the overrun value to INT_MAX
@@ -1556,6 +1548,11 @@ static int __init posixtimer_init(void)
unsigned long i, size;
unsigned int shift;
+ posix_timers_cache = kmem_cache_create("posix_timers_cache",
+ sizeof(struct k_itimer),
+ __alignof__(struct k_itimer),
+ SLAB_ACCOUNT, NULL);
+
if (IS_ENABLED(CONFIG_BASE_SMALL))
size = 512;
else
diff --git a/kernel/time/sleep_timeout.c b/kernel/time/sleep_timeout.c
index c0e960a5de39..5aa38b2cf40a 100644
--- a/kernel/time/sleep_timeout.c
+++ b/kernel/time/sleep_timeout.c
@@ -100,7 +100,7 @@ signed long __sched schedule_timeout(signed long timeout)
timer_delete_sync(&timer.timer);
/* Remove the timer from the object tracker */
- destroy_timer_on_stack(&timer.timer);
+ timer_destroy_on_stack(&timer.timer);
timeout = expire - jiffies;
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 4d915c0a263c..553fa469d7cc 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -386,32 +386,6 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu,
}
/**
- * __round_jiffies - function to round jiffies to a full second
- * @j: the time in (absolute) jiffies that should be rounded
- * @cpu: the processor number on which the timeout will happen
- *
- * __round_jiffies() rounds an absolute time in the future (in jiffies)
- * up or down to (approximately) full seconds. This is useful for timers
- * for which the exact time they fire does not matter too much, as long as
- * they fire approximately every X seconds.
- *
- * By rounding these timers to whole seconds, all such timers will fire
- * at the same time, rather than at various times spread out. The goal
- * of this is to have the CPU wake up less, which saves power.
- *
- * The exact rounding is skewed for each processor to avoid all
- * processors firing at the exact same time, which could lead
- * to lock contention or spurious cache line bouncing.
- *
- * The return value is the rounded version of the @j parameter.
- */
-unsigned long __round_jiffies(unsigned long j, int cpu)
-{
- return round_jiffies_common(j, cpu, false);
-}
-EXPORT_SYMBOL_GPL(__round_jiffies);
-
-/**
* __round_jiffies_relative - function to round jiffies to a full second
* @j: the time in (relative) jiffies that should be rounded
* @cpu: the processor number on which the timeout will happen
@@ -483,22 +457,6 @@ unsigned long round_jiffies_relative(unsigned long j)
EXPORT_SYMBOL_GPL(round_jiffies_relative);
/**
- * __round_jiffies_up - function to round jiffies up to a full second
- * @j: the time in (absolute) jiffies that should be rounded
- * @cpu: the processor number on which the timeout will happen
- *
- * This is the same as __round_jiffies() except that it will never
- * round down. This is useful for timeouts for which the exact time
- * of firing does not matter too much, as long as they don't fire too
- * early.
- */
-unsigned long __round_jiffies_up(unsigned long j, int cpu)
-{
- return round_jiffies_common(j, cpu, true);
-}
-EXPORT_SYMBOL_GPL(__round_jiffies_up);
-
-/**
* __round_jiffies_up_relative - function to round jiffies up to a full second
* @j: the time in (relative) jiffies that should be rounded
* @cpu: the processor number on which the timeout will happen
@@ -850,7 +808,7 @@ static void do_init_timer(struct timer_list *timer,
unsigned int flags,
const char *name, struct lock_class_key *key);
-void init_timer_on_stack_key(struct timer_list *timer,
+void timer_init_key_on_stack(struct timer_list *timer,
void (*func)(struct timer_list *),
unsigned int flags,
const char *name, struct lock_class_key *key)
@@ -858,13 +816,13 @@ void init_timer_on_stack_key(struct timer_list *timer,
debug_object_init_on_stack(timer, &timer_debug_descr);
do_init_timer(timer, func, flags, name, key);
}
-EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
+EXPORT_SYMBOL_GPL(timer_init_key_on_stack);
-void destroy_timer_on_stack(struct timer_list *timer)
+void timer_destroy_on_stack(struct timer_list *timer)
{
debug_object_free(timer, &timer_debug_descr);
}
-EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
+EXPORT_SYMBOL_GPL(timer_destroy_on_stack);
#else
static inline void debug_timer_init(struct timer_list *timer) { }
@@ -904,7 +862,7 @@ static void do_init_timer(struct timer_list *timer,
}
/**
- * init_timer_key - initialize a timer
+ * timer_init_key - initialize a timer
* @timer: the timer to be initialized
* @func: timer callback function
* @flags: timer flags
@@ -912,17 +870,17 @@ static void do_init_timer(struct timer_list *timer,
* @key: lockdep class key of the fake lock used for tracking timer
* sync lock dependencies
*
- * init_timer_key() must be done to a timer prior to calling *any* of the
+ * timer_init_key() must be done to a timer prior to calling *any* of the
* other timer functions.
*/
-void init_timer_key(struct timer_list *timer,
+void timer_init_key(struct timer_list *timer,
void (*func)(struct timer_list *), unsigned int flags,
const char *name, struct lock_class_key *key)
{
debug_init(timer);
do_init_timer(timer, func, flags, name, key);
}
-EXPORT_SYMBOL(init_timer_key);
+EXPORT_SYMBOL(timer_init_key);
static inline void detach_timer(struct timer_list *timer, bool clear_pending)
{
@@ -1511,7 +1469,7 @@ static int __try_to_del_timer_sync(struct timer_list *timer, bool shutdown)
}
/**
- * try_to_del_timer_sync - Try to deactivate a timer
+ * timer_delete_sync_try - Try to deactivate a timer
* @timer: Timer to deactivate
*
* This function tries to deactivate a timer. On success the timer is not
@@ -1526,11 +1484,11 @@ static int __try_to_del_timer_sync(struct timer_list *timer, bool shutdown)
* * %1 - The timer was pending and deactivated
* * %-1 - The timer callback function is running on a different CPU
*/
-int try_to_del_timer_sync(struct timer_list *timer)
+int timer_delete_sync_try(struct timer_list *timer)
{
return __try_to_del_timer_sync(timer, false);
}
-EXPORT_SYMBOL(try_to_del_timer_sync);
+EXPORT_SYMBOL(timer_delete_sync_try);
#ifdef CONFIG_PREEMPT_RT
static __init void timer_base_init_expiry_lock(struct timer_base *base)
@@ -1900,7 +1858,7 @@ static void timer_recalc_next_expiry(struct timer_base *base)
unsigned long clk, next, adj;
unsigned lvl, offset = 0;
- next = base->clk + NEXT_TIMER_MAX_DELTA;
+ next = base->clk + TIMER_NEXT_MAX_DELTA;
clk = base->clk;
for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
@@ -1963,7 +1921,7 @@ static void timer_recalc_next_expiry(struct timer_base *base)
WRITE_ONCE(base->next_expiry, next);
base->next_expiry_recalc = false;
- base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);
+ base->timers_pending = !(next == base->clk + TIMER_NEXT_MAX_DELTA);
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -2015,7 +1973,7 @@ static unsigned long next_timer_interrupt(struct timer_base *base,
* easy comparable to find out which base holds the first pending timer.
*/
if (!base->timers_pending)
- WRITE_ONCE(base->next_expiry, basej + NEXT_TIMER_MAX_DELTA);
+ WRITE_ONCE(base->next_expiry, basej + TIMER_NEXT_MAX_DELTA);
return base->next_expiry;
}
@@ -2399,7 +2357,7 @@ static inline void __run_timers(struct timer_base *base)
* timer at this clk are that all matching timers have been
* dequeued or no timer has been queued since
* base::next_expiry was set to base::clk +
- * NEXT_TIMER_MAX_DELTA.
+ * TIMER_NEXT_MAX_DELTA.
*/
WARN_ON_ONCE(!levels && !base->next_expiry_recalc
&& base->timers_pending);
@@ -2544,7 +2502,7 @@ int timers_prepare_cpu(unsigned int cpu)
for (b = 0; b < NR_BASES; b++) {
base = per_cpu_ptr(&timer_bases[b], cpu);
base->clk = jiffies;
- base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+ base->next_expiry = base->clk + TIMER_NEXT_MAX_DELTA;
base->next_expiry_recalc = false;
base->timers_pending = false;
base->is_idle = false;
@@ -2599,7 +2557,7 @@ static void __init init_timer_cpu(int cpu)
base->cpu = cpu;
raw_spin_lock_init(&base->lock);
base->clk = jiffies;
- base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+ base->next_expiry = base->clk + TIMER_NEXT_MAX_DELTA;
timer_base_init_expiry_lock(base);
}
}
@@ -2612,7 +2570,7 @@ static void __init init_timer_cpus(void)
init_timer_cpu(cpu);
}
-void __init init_timers(void)
+void __init timers_init(void)
{
init_timer_cpus();
posix_cputimers_init_work();
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 3679a6d18934..3f6a7bdc6edf 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -893,11 +893,6 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
rcu_read_unlock();
}
-static void blk_add_trace_bio_bounce(void *ignore, struct bio *bio)
-{
- blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_BOUNCE, 0);
-}
-
static void blk_add_trace_bio_complete(void *ignore,
struct request_queue *q, struct bio *bio)
{
@@ -1089,8 +1084,6 @@ static void blk_register_tracepoints(void)
WARN_ON(ret);
ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
WARN_ON(ret);
- ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
- WARN_ON(ret);
ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
WARN_ON(ret);
ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
@@ -1125,7 +1118,6 @@ static void blk_unregister_tracepoints(void)
unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
- unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
unregister_trace_block_rq_merge(blk_add_trace_rq_merge, NULL);
@@ -1462,7 +1454,6 @@ static const struct {
[__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug },
[__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic },
[__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split },
- [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic },
[__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
};
@@ -1896,6 +1887,8 @@ void blk_fill_rwbs(char *rwbs, blk_opf_t opf)
rwbs[i++] = 'S';
if (opf & REQ_META)
rwbs[i++] = 'M';
+ if (opf & REQ_ATOMIC)
+ rwbs[i++] = 'U';
rwbs[i] = '\0';
}
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 95c6e3473a76..ba7ff14f5339 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -454,7 +454,8 @@ static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *
struct fprobe_hlist_node *node;
int ret = 0;
- hlist_for_each_entry_rcu(node, head, hlist) {
+ hlist_for_each_entry_rcu(node, head, hlist,
+ lockdep_is_held(&fprobe_mutex)) {
if (!within_module(node->addr, mod))
continue;
if (delete_fprobe_node(node))
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c0f877d39a24..3f9bf562beea 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1887,10 +1887,12 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
head_page = cpu_buffer->head_page;
- /* If both the head and commit are on the reader_page then we are done. */
- if (head_page == cpu_buffer->reader_page &&
- head_page == cpu_buffer->commit_page)
+ /* If the commit_buffer is the reader page, update the commit page */
+ if (meta->commit_buffer == (unsigned long)cpu_buffer->reader_page->page) {
+ cpu_buffer->commit_page = cpu_buffer->reader_page;
+ /* Nothing more to do, the only page is the reader page */
goto done;
+ }
/* Iterate until finding the commit page */
for (i = 0; i < meta->nr_subbufs + 1; i++, rb_inc_page(&head_page)) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5b8db27fb6ef..d3459e715fbc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -120,6 +120,7 @@ static int tracing_disabled = 1;
cpumask_var_t __read_mostly tracing_buffer_mask;
+#define MAX_TRACER_SIZE 100
/*
* ftrace_dump_on_oops - variable to dump ftrace buffer on oops
*
@@ -142,7 +143,40 @@ cpumask_var_t __read_mostly tracing_buffer_mask;
char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
/* When set, tracing will stop when a WARN*() is hit */
-int __disable_trace_on_warning;
+static int __disable_trace_on_warning;
+
+int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
+static const struct ctl_table trace_sysctl_table[] = {
+ {
+ .procname = "ftrace_dump_on_oops",
+ .data = &ftrace_dump_on_oops,
+ .maxlen = MAX_TRACER_SIZE,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+ {
+ .procname = "traceoff_on_warning",
+ .data = &__disable_trace_on_warning,
+ .maxlen = sizeof(__disable_trace_on_warning),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tracepoint_printk",
+ .data = &tracepoint_printk,
+ .maxlen = sizeof(tracepoint_printk),
+ .mode = 0644,
+ .proc_handler = tracepoint_printk_sysctl,
+ },
+};
+
+static int __init init_trace_sysctls(void)
+{
+ register_sysctl_init("kernel", trace_sysctl_table);
+ return 0;
+}
+subsys_initcall(init_trace_sysctls);
#ifdef CONFIG_TRACE_EVAL_MAP_FILE
/* Map of enums to their values, for "eval_map" file */
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index a322e4f249a5..5d64a18cacac 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -16,7 +16,7 @@
#include "trace_output.h" /* for trace_event_sem */
#include "trace_dynevent.h"
-static DEFINE_MUTEX(dyn_event_ops_mutex);
+DEFINE_MUTEX(dyn_event_ops_mutex);
static LIST_HEAD(dyn_event_ops_list);
bool trace_event_dyn_try_get_ref(struct trace_event_call *dyn_call)
@@ -116,6 +116,20 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type
return ret;
}
+/*
+ * Locked version of event creation. The event creation must be protected by
+ * dyn_event_ops_mutex because of protecting trace_probe_log.
+ */
+int dyn_event_create(const char *raw_command, struct dyn_event_operations *type)
+{
+ int ret;
+
+ mutex_lock(&dyn_event_ops_mutex);
+ ret = type->create(raw_command);
+ mutex_unlock(&dyn_event_ops_mutex);
+ return ret;
+}
+
static int create_dyn_event(const char *raw_command)
{
struct dyn_event_operations *ops;
diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h
index 936477a111d3..beee3f8d7544 100644
--- a/kernel/trace/trace_dynevent.h
+++ b/kernel/trace/trace_dynevent.h
@@ -100,6 +100,7 @@ void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos);
void dyn_event_seq_stop(struct seq_file *m, void *v);
int dyn_events_release_all(struct dyn_event_operations *type);
int dyn_event_release(const char *raw_command, struct dyn_event_operations *type);
+int dyn_event_create(const char *raw_command, struct dyn_event_operations *type);
/*
* for_each_dyn_event - iterate over the dyn_event list
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index c08355c3ef32..916555f0de81 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -969,10 +969,13 @@ static int __trace_eprobe_create(int argc, const char *argv[])
goto error;
}
}
+ trace_probe_log_clear();
return ret;
+
parse_error:
ret = -EINVAL;
error:
+ trace_probe_log_clear();
trace_event_probe_cleanup(ep);
return ret;
}
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index b66b6d235d91..6e87ae2a1a66 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1560,7 +1560,7 @@ stacktrace_trigger(struct event_trigger_data *data,
struct trace_event_file *file = data->private_data;
if (file)
- __trace_stack(file->tr, tracing_gen_ctx(), STACK_SKIP);
+ __trace_stack(file->tr, tracing_gen_ctx_dec(), STACK_SKIP);
else
trace_dump_stack(STACK_SKIP);
}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 98ccf3f00c51..4e37a0f6aaa3 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -633,11 +633,7 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip,
static __always_inline void trace_stack(struct trace_array *tr)
{
- unsigned int trace_ctx;
-
- trace_ctx = tracing_gen_ctx();
-
- __trace_stack(tr, trace_ctx, FTRACE_STACK_SKIP);
+ __trace_stack(tr, tracing_gen_ctx_dec(), FTRACE_STACK_SKIP);
}
static void
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2703b96d8990..3e5c47b6d7b2 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1089,7 +1089,7 @@ static int create_or_delete_trace_kprobe(const char *raw_command)
if (raw_command[0] == '-')
return dyn_event_release(raw_command, &trace_kprobe_ops);
- ret = trace_kprobe_create(raw_command);
+ ret = dyn_event_create(raw_command, &trace_kprobe_ops);
return ret == -ECANCELED ? -EINVAL : ret;
}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 2eeecb6c95ee..424751cdf31f 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -154,9 +154,12 @@ fail:
}
static struct trace_probe_log trace_probe_log;
+extern struct mutex dyn_event_ops_mutex;
void trace_probe_log_init(const char *subsystem, int argc, const char **argv)
{
+ lockdep_assert_held(&dyn_event_ops_mutex);
+
trace_probe_log.subsystem = subsystem;
trace_probe_log.argc = argc;
trace_probe_log.argv = argv;
@@ -165,11 +168,15 @@ void trace_probe_log_init(const char *subsystem, int argc, const char **argv)
void trace_probe_log_clear(void)
{
+ lockdep_assert_held(&dyn_event_ops_mutex);
+
memset(&trace_probe_log, 0, sizeof(trace_probe_log));
}
void trace_probe_log_set_index(int index)
{
+ lockdep_assert_held(&dyn_event_ops_mutex);
+
trace_probe_log.index = index;
}
@@ -178,6 +185,8 @@ void __trace_probe_log_err(int offset, int err_type)
char *command, *p;
int i, len = 0, pos = 0;
+ lockdep_assert_held(&dyn_event_ops_mutex);
+
if (!trace_probe_log.argv)
return;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 14c6f272c4d8..e34223c8065d 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -32,7 +32,7 @@ static arch_spinlock_t stack_trace_max_lock =
DEFINE_PER_CPU(int, disable_stack_tracer);
static DEFINE_MUTEX(stack_sysctl_mutex);
-int stack_tracer_enabled;
+static int stack_tracer_enabled;
static void print_max_stack(void)
{
@@ -578,3 +578,23 @@ static __init int stack_trace_init(void)
}
device_initcall(stack_trace_init);
+
+
+static const struct ctl_table trace_stack_sysctl_table[] = {
+ {
+ .procname = "stack_tracer_enabled",
+ .data = &stack_tracer_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = stack_trace_sysctl,
+ },
+};
+
+static int __init init_trace_stack_sysctls(void)
+{
+ register_sysctl_init("kernel", trace_stack_sysctl_table);
+ return 0;
+}
+subsys_initcall(init_trace_stack_sysctls);
+
+
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 3386439ec9f6..35cf76c75dd7 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -741,7 +741,7 @@ static int create_or_delete_trace_uprobe(const char *raw_command)
if (raw_command[0] == '-')
return dyn_event_release(raw_command, &trace_uprobe_ops);
- ret = trace_uprobe_create(raw_command);
+ ret = dyn_event_create(raw_command, &trace_uprobe_ops);
return ret == -ECANCELED ? -EINVAL : ret;
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cf6203282737..3bef0754cf73 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -686,7 +686,7 @@ EXPORT_SYMBOL_GPL(destroy_work_on_stack);
void destroy_delayed_work_on_stack(struct delayed_work *work)
{
- destroy_timer_on_stack(&work->timer);
+ timer_destroy_on_stack(&work->timer);
debug_object_free(&work->work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
@@ -3241,7 +3241,7 @@ __acquires(&pool->lock)
* point will only record its address.
*/
trace_workqueue_execute_end(work, worker->current_func);
- pwq->stats[PWQ_STAT_COMPLETED]++;
+
lock_map_release(&lockdep_map);
if (!bh_draining)
lock_map_release(pwq->wq->lockdep_map);
@@ -3272,6 +3272,8 @@ __acquires(&pool->lock)
raw_spin_lock_irq(&pool->lock);
+ pwq->stats[PWQ_STAT_COMPLETED]++;
+
/*
* In addition to %WQ_CPU_INTENSIVE, @worker may also have been marked
* CPU intensive by wq_worker_tick() if @work hogged CPU longer than
@@ -5837,6 +5839,17 @@ static bool pwq_busy(struct pool_workqueue *pwq)
* @wq: target workqueue
*
* Safely destroy a workqueue. All work currently pending will be done first.
+ *
+ * This function does NOT guarantee that non-pending work that has been
+ * submitted with queue_delayed_work() and similar functions will be done
+ * before destroying the workqueue. The fundamental problem is that, currently,
+ * the workqueue has no way of accessing non-pending delayed_work. delayed_work
+ * is only linked on the timer-side. All delayed_work must, therefore, be
+ * canceled before calling this function.
+ *
+ * TODO: It would be better if the problem described above wouldn't exist and
+ * destroy_workqueue() would cleanly cancel all pending and non-pending
+ * delayed_work.
*/
void destroy_workqueue(struct workqueue_struct *wq)
{
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 25ecc1334b67..c7f602fa7b23 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -350,18 +350,28 @@ static bool needs_section_mem(struct module *mod, unsigned long size)
return size >= sizeof(struct alloc_tag);
}
-static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to)
+static bool clean_unused_counters(struct alloc_tag *start_tag,
+ struct alloc_tag *end_tag)
{
- while (from <= to) {
+ struct alloc_tag *tag;
+ bool ret = true;
+
+ for (tag = start_tag; tag <= end_tag; tag++) {
struct alloc_tag_counters counter;
- counter = alloc_tag_read(from);
- if (counter.bytes)
- return from;
- from++;
+ if (!tag->counters)
+ continue;
+
+ counter = alloc_tag_read(tag);
+ if (!counter.bytes) {
+ free_percpu(tag->counters);
+ tag->counters = NULL;
+ } else {
+ ret = false;
+ }
}
- return NULL;
+ return ret;
}
/* Called with mod_area_mt locked */
@@ -371,12 +381,16 @@ static void clean_unused_module_areas_locked(void)
struct module *val;
mas_for_each(&mas, val, module_tags.size) {
+ struct alloc_tag *start_tag;
+ struct alloc_tag *end_tag;
+
if (val != &unloaded_mod)
continue;
/* Release area if all tags are unused */
- if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index),
- (struct alloc_tag *)(module_tags.start_addr + mas.last)))
+ start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index);
+ end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last);
+ if (clean_unused_counters(start_tag, end_tag))
mas_erase(&mas);
}
}
@@ -561,7 +575,8 @@ unlock:
static void release_module_tags(struct module *mod, bool used)
{
MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size);
- struct alloc_tag *tag;
+ struct alloc_tag *start_tag;
+ struct alloc_tag *end_tag;
struct module *val;
mas_lock(&mas);
@@ -575,15 +590,22 @@ static void release_module_tags(struct module *mod, bool used)
if (!used)
goto release_area;
- /* Find out if the area is used */
- tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index),
- (struct alloc_tag *)(module_tags.start_addr + mas.last));
- if (tag) {
- struct alloc_tag_counters counter = alloc_tag_read(tag);
+ start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index);
+ end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last);
+ if (!clean_unused_counters(start_tag, end_tag)) {
+ struct alloc_tag *tag;
+
+ for (tag = start_tag; tag <= end_tag; tag++) {
+ struct alloc_tag_counters counter;
+
+ if (!tag->counters)
+ continue;
- pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
- tag->ct.filename, tag->ct.lineno, tag->ct.modname,
- tag->ct.function, counter.bytes);
+ counter = alloc_tag_read(tag);
+ pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
+ tag->ct.filename, tag->ct.lineno, tag->ct.modname,
+ tag->ct.function, counter.bytes);
+ }
} else {
used = false;
}
@@ -596,6 +618,34 @@ out:
mas_unlock(&mas);
}
+static void load_module(struct module *mod, struct codetag *start, struct codetag *stop)
+{
+ /* Allocate module alloc_tag percpu counters */
+ struct alloc_tag *start_tag;
+ struct alloc_tag *stop_tag;
+ struct alloc_tag *tag;
+
+ if (!mod)
+ return;
+
+ start_tag = ct_to_alloc_tag(start);
+ stop_tag = ct_to_alloc_tag(stop);
+ for (tag = start_tag; tag < stop_tag; tag++) {
+ WARN_ON(tag->counters);
+ tag->counters = alloc_percpu(struct alloc_tag_counters);
+ if (!tag->counters) {
+ while (--tag >= start_tag) {
+ free_percpu(tag->counters);
+ tag->counters = NULL;
+ }
+ shutdown_mem_profiling(true);
+ pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s. Memory allocation profiling is disabled!\n",
+ mod->name);
+ break;
+ }
+ }
+}
+
static void replace_module(struct module *mod, struct module *new_mod)
{
MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
@@ -757,6 +807,7 @@ static int __init alloc_tag_init(void)
.needs_section_mem = needs_section_mem,
.alloc_section_mem = reserve_module_tags,
.free_section_mem = release_module_tags,
+ .module_load = load_module,
.module_replaced = replace_module,
#endif
};
diff --git a/lib/codetag.c b/lib/codetag.c
index 42aadd6c1454..de332e98d6f5 100644
--- a/lib/codetag.c
+++ b/lib/codetag.c
@@ -194,7 +194,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
if (err >= 0) {
cttype->count += range_size(cttype, &range);
if (cttype->desc.module_load)
- cttype->desc.module_load(cttype, cmod);
+ cttype->desc.module_load(mod, range.start, range.stop);
}
up_write(&cttype->mod_lock);
@@ -333,7 +333,8 @@ void codetag_unload_module(struct module *mod)
}
if (found) {
if (cttype->desc.module_unload)
- cttype->desc.module_unload(cttype, cmod);
+ cttype->desc.module_unload(cmod->mod,
+ cmod->range.start, cmod->range.stop);
cttype->count -= range_size(cttype, &cmod->range);
idr_remove(&cttype->mod_idr, mod_id);
diff --git a/lib/crc16.c b/lib/crc16.c
index 5c3a803c01e0..9c71eda9bf4b 100644
--- a/lib/crc16.c
+++ b/lib/crc16.c
@@ -8,7 +8,7 @@
#include <linux/crc16.h>
/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */
-u16 const crc16_table[256] = {
+static const u16 crc16_table[256] = {
0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
@@ -42,20 +42,19 @@ u16 const crc16_table[256] = {
0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
};
-EXPORT_SYMBOL(crc16_table);
/**
* crc16 - compute the CRC-16 for the data buffer
* @crc: previous CRC value
- * @buffer: data pointer
+ * @p: data pointer
* @len: number of bytes in the buffer
*
* Returns the updated CRC value.
*/
-u16 crc16(u16 crc, u8 const *buffer, size_t len)
+u16 crc16(u16 crc, const u8 *p, size_t len)
{
while (len--)
- crc = crc16_byte(crc, *buffer++);
+ crc = (crc >> 8) ^ crc16_table[(crc & 0xff) ^ *p++];
return crc;
}
EXPORT_SYMBOL(crc16);
diff --git a/lib/crc32.c b/lib/crc32.c
index fddd424ff224..e690026f44f7 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin
* cleaned up code to current version of sparse and added the slicing-by-8
@@ -19,9 +20,6 @@
* drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0.
* fs/jffs2 uses seed 0, doesn't xor with ~0.
* fs/partitions/efi.c uses seed ~0, xor's with ~0.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
/* see: Documentation/staging/crc32.rst for a description of algorithms */
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 798972b29b68..1ec1466108cc 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -50,22 +50,16 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA
config CRYPTO_LIB_CHACHA_GENERIC
tristate
+ default CRYPTO_LIB_CHACHA if !CRYPTO_ARCH_HAVE_LIB_CHACHA
select CRYPTO_LIB_UTILS
help
- This symbol can be depended upon by arch implementations of the
- ChaCha library interface that require the generic code as a
- fallback, e.g., for SIMD implementations. If no arch specific
- implementation is enabled, this implementation serves the users
- of CRYPTO_LIB_CHACHA.
-
-config CRYPTO_LIB_CHACHA_INTERNAL
- tristate
- select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n
+ This symbol can be selected by arch implementations of the ChaCha
+ library interface that require the generic code as a fallback, e.g.,
+ for SIMD implementations. If no arch specific implementation is
+ enabled, this implementation serves the users of CRYPTO_LIB_CHACHA.
config CRYPTO_LIB_CHACHA
tristate
- select CRYPTO
- select CRYPTO_LIB_CHACHA_INTERNAL
help
Enable the ChaCha library interface. This interface may be fulfilled
by either the generic implementation or an arch-specific one, if one
@@ -120,21 +114,15 @@ config CRYPTO_ARCH_HAVE_LIB_POLY1305
config CRYPTO_LIB_POLY1305_GENERIC
tristate
+ default CRYPTO_LIB_POLY1305 if !CRYPTO_ARCH_HAVE_LIB_POLY1305
help
- This symbol can be depended upon by arch implementations of the
- Poly1305 library interface that require the generic code as a
- fallback, e.g., for SIMD implementations. If no arch specific
- implementation is enabled, this implementation serves the users
- of CRYPTO_LIB_POLY1305.
-
-config CRYPTO_LIB_POLY1305_INTERNAL
- tristate
- select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n
+ This symbol can be selected by arch implementations of the Poly1305
+ library interface that require the generic code as a fallback, e.g.,
+ for SIMD implementations. If no arch specific implementation is
+ enabled, this implementation serves the users of CRYPTO_LIB_POLY1305.
config CRYPTO_LIB_POLY1305
tristate
- select CRYPTO
- select CRYPTO_LIB_POLY1305_INTERNAL
help
Enable the Poly1305 library interface. This interface may be fulfilled
by either the generic implementation or an arch-specific one, if one
@@ -151,5 +139,62 @@ config CRYPTO_LIB_SHA1
config CRYPTO_LIB_SHA256
tristate
+ help
+ Enable the SHA-256 library interface. This interface may be fulfilled
+ by either the generic implementation or an arch-specific one, if one
+ is available and enabled.
+
+config CRYPTO_ARCH_HAVE_LIB_SHA256
+ bool
+ help
+ Declares whether the architecture provides an arch-specific
+ accelerated implementation of the SHA-256 library interface.
+
+config CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
+ bool
+ help
+ Declares whether the architecture provides an arch-specific
+ accelerated implementation of the SHA-256 library interface
+ that is SIMD-based and therefore not usable in hardirq
+ context.
+
+config CRYPTO_LIB_SHA256_GENERIC
+ tristate
+ default CRYPTO_LIB_SHA256 if !CRYPTO_ARCH_HAVE_LIB_SHA256
+ help
+ This symbol can be selected by arch implementations of the SHA-256
+ library interface that require the generic code as a fallback, e.g.,
+ for SIMD implementations. If no arch specific implementation is
+ enabled, this implementation serves the users of CRYPTO_LIB_SHA256.
+
+config CRYPTO_LIB_SM3
+ tristate
+
+if !KMSAN # avoid false positives from assembly
+if ARM
+source "arch/arm/lib/crypto/Kconfig"
+endif
+if ARM64
+source "arch/arm64/lib/crypto/Kconfig"
+endif
+if MIPS
+source "arch/mips/lib/crypto/Kconfig"
+endif
+if PPC
+source "arch/powerpc/lib/crypto/Kconfig"
+endif
+if RISCV
+source "arch/riscv/lib/crypto/Kconfig"
+endif
+if S390
+source "arch/s390/lib/crypto/Kconfig"
+endif
+if SPARC
+source "arch/sparc/lib/crypto/Kconfig"
+endif
+if X86
+source "arch/x86/lib/crypto/Kconfig"
+endif
+endif
endmenu
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 01fac1cd05a1..3e79283b617d 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -25,9 +25,11 @@ obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o
obj-y += libblake2s.o
libblake2s-y := blake2s.o
libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o
+libblake2s-$(CONFIG_CRYPTO_SELFTESTS) += blake2s-selftest.o
obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o
libchacha20poly1305-y += chacha20poly1305.o
+libchacha20poly1305-$(CONFIG_CRYPTO_SELFTESTS) += chacha20poly1305-selftest.o
obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o
libcurve25519-generic-y := curve25519-fiat32.o
@@ -36,27 +38,31 @@ libcurve25519-generic-y += curve25519-generic.o
obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o
libcurve25519-y += curve25519.o
+libcurve25519-$(CONFIG_CRYPTO_SELFTESTS) += curve25519-selftest.o
obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
libdes-y := des.o
-obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o
-libpoly1305-y := poly1305-donna32.o
-libpoly1305-$(CONFIG_ARCH_SUPPORTS_INT128) := poly1305-donna64.o
+obj-$(CONFIG_CRYPTO_LIB_POLY1305) += libpoly1305.o
libpoly1305-y += poly1305.o
+obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305-generic.o
+libpoly1305-generic-y := poly1305-donna32.o
+libpoly1305-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := poly1305-donna64.o
+libpoly1305-generic-y += poly1305-generic.o
+
obj-$(CONFIG_CRYPTO_LIB_SHA1) += libsha1.o
libsha1-y := sha1.o
obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
libsha256-y := sha256.o
-ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
-libblake2s-y += blake2s-selftest.o
-libchacha20poly1305-y += chacha20poly1305-selftest.o
-libcurve25519-y += curve25519-selftest.o
-endif
+obj-$(CONFIG_CRYPTO_LIB_SHA256_GENERIC) += libsha256-generic.o
+libsha256-generic-y := sha256-generic.o
obj-$(CONFIG_MPILIB) += mpi/
-obj-$(CONFIG_CRYPTO_MANAGER_EXTRA_TESTS) += simd.o
+obj-$(CONFIG_CRYPTO_SELFTESTS) += simd.o
+
+obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o
+libsm3-y := sm3.o
diff --git a/lib/crypto/aescfb.c b/lib/crypto/aescfb.c
index 749dc1258a44..437613265e14 100644
--- a/lib/crypto/aescfb.c
+++ b/lib/crypto/aescfb.c
@@ -99,7 +99,7 @@ MODULE_DESCRIPTION("Generic AES-CFB library");
MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>");
MODULE_LICENSE("GPL");
-#ifndef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
+#ifdef CONFIG_CRYPTO_SELFTESTS
/*
* Test code below. Vectors taken from crypto/testmgr.h
diff --git a/lib/crypto/aesgcm.c b/lib/crypto/aesgcm.c
index 902e49410aaf..277824d6b4af 100644
--- a/lib/crypto/aesgcm.c
+++ b/lib/crypto/aesgcm.c
@@ -199,7 +199,7 @@ MODULE_DESCRIPTION("Generic AES-GCM library");
MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>");
MODULE_LICENSE("GPL");
-#ifndef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
+#ifdef CONFIG_CRYPTO_SELFTESTS
/*
* Test code below. Vectors taken from crypto/testmgr.h
diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
index 71a316552cc5..b0f9a678300b 100644
--- a/lib/crypto/blake2s.c
+++ b/lib/crypto/blake2s.c
@@ -60,7 +60,7 @@ EXPORT_SYMBOL(blake2s_final);
static int __init blake2s_mod_init(void)
{
- if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
+ if (IS_ENABLED(CONFIG_CRYPTO_SELFTESTS) &&
WARN_ON(!blake2s_selftest()))
return -ENODEV;
return 0;
diff --git a/lib/crypto/chacha.c b/lib/crypto/chacha.c
index 3cdda3b5ee06..ced87dd31a97 100644
--- a/lib/crypto/chacha.c
+++ b/lib/crypto/chacha.c
@@ -13,8 +13,9 @@
#include <linux/unaligned.h>
#include <crypto/chacha.h>
-static void chacha_permute(u32 *x, int nrounds)
+static void chacha_permute(struct chacha_state *state, int nrounds)
{
+ u32 *x = state->x;
int i;
/* whitelist the allowed round counts */
@@ -65,34 +66,34 @@ static void chacha_permute(u32 *x, int nrounds)
/**
* chacha_block_generic - generate one keystream block and increment block counter
- * @state: input state matrix (16 32-bit words)
- * @stream: output keystream block (64 bytes)
+ * @state: input state matrix
+ * @out: output keystream block
* @nrounds: number of rounds (20 or 12; 20 is recommended)
*
* This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
* The caller has already converted the endianness of the input. This function
* also handles incrementing the block counter in the input matrix.
*/
-void chacha_block_generic(u32 *state, u8 *stream, int nrounds)
+void chacha_block_generic(struct chacha_state *state,
+ u8 out[CHACHA_BLOCK_SIZE], int nrounds)
{
- u32 x[16];
+ struct chacha_state permuted_state = *state;
int i;
- memcpy(x, state, 64);
+ chacha_permute(&permuted_state, nrounds);
- chacha_permute(x, nrounds);
+ for (i = 0; i < ARRAY_SIZE(state->x); i++)
+ put_unaligned_le32(permuted_state.x[i] + state->x[i],
+ &out[i * sizeof(u32)]);
- for (i = 0; i < ARRAY_SIZE(x); i++)
- put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
-
- state[12]++;
+ state->x[12]++;
}
EXPORT_SYMBOL(chacha_block_generic);
/**
* hchacha_block_generic - abbreviated ChaCha core, for XChaCha
- * @state: input state matrix (16 32-bit words)
- * @stream: output (8 32-bit words)
+ * @state: input state matrix
+ * @out: the output words
* @nrounds: number of rounds (20 or 12; 20 is recommended)
*
* HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
@@ -100,15 +101,14 @@ EXPORT_SYMBOL(chacha_block_generic);
* skips the final addition of the initial state, and outputs only certain words
* of the state. It should not be used for streaming directly.
*/
-void hchacha_block_generic(const u32 *state, u32 *stream, int nrounds)
+void hchacha_block_generic(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds)
{
- u32 x[16];
-
- memcpy(x, state, 64);
+ struct chacha_state permuted_state = *state;
- chacha_permute(x, nrounds);
+ chacha_permute(&permuted_state, nrounds);
- memcpy(&stream[0], &x[0], 16);
- memcpy(&stream[4], &x[12], 16);
+ memcpy(&out[0], &permuted_state.x[0], 16);
+ memcpy(&out[4], &permuted_state.x[12], 16);
}
EXPORT_SYMBOL(hchacha_block_generic);
diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
index 2ea61c28be4f..e4c85bc5a6d7 100644
--- a/lib/crypto/chacha20poly1305-selftest.c
+++ b/lib/crypto/chacha20poly1305-selftest.c
@@ -8832,7 +8832,7 @@ chacha20poly1305_encrypt_bignonce(u8 *dst, const u8 *src, const size_t src_len,
{
const u8 *pad0 = page_address(ZERO_PAGE(0));
struct poly1305_desc_ctx poly1305_state;
- u32 chacha20_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha20_state;
union {
u8 block0[POLY1305_KEY_SIZE];
__le64 lens[2];
@@ -8844,12 +8844,12 @@ chacha20poly1305_encrypt_bignonce(u8 *dst, const u8 *src, const size_t src_len,
memcpy(&bottom_row[4], nonce, 12);
for (i = 0; i < 8; ++i)
le_key[i] = get_unaligned_le32(key + sizeof(le_key[i]) * i);
- chacha_init(chacha20_state, le_key, bottom_row);
- chacha20_crypt(chacha20_state, b.block0, b.block0, sizeof(b.block0));
+ chacha_init(&chacha20_state, le_key, bottom_row);
+ chacha20_crypt(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
poly1305_init(&poly1305_state, b.block0);
poly1305_update(&poly1305_state, ad, ad_len);
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
- chacha20_crypt(chacha20_state, dst, src, src_len);
+ chacha20_crypt(&chacha20_state, dst, src, src_len);
poly1305_update(&poly1305_state, dst, src_len);
poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf);
b.lens[0] = cpu_to_le64(ad_len);
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index 9cfa886f1f89..e29eed49a5a1 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -18,8 +18,6 @@
#include <linux/mm.h>
#include <linux/module.h>
-#define CHACHA_KEY_WORDS (CHACHA_KEY_SIZE / sizeof(u32))
-
static void chacha_load_key(u32 *k, const u8 *in)
{
k[0] = get_unaligned_le32(in);
@@ -32,7 +30,8 @@ static void chacha_load_key(u32 *k, const u8 *in)
k[7] = get_unaligned_le32(in + 28);
}
-static void xchacha_init(u32 *chacha_state, const u8 *key, const u8 *nonce)
+static void xchacha_init(struct chacha_state *chacha_state,
+ const u8 *key, const u8 *nonce)
{
u32 k[CHACHA_KEY_WORDS];
u8 iv[CHACHA_IV_SIZE];
@@ -54,7 +53,8 @@ static void xchacha_init(u32 *chacha_state, const u8 *key, const u8 *nonce)
static void
__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
- const u8 *ad, const size_t ad_len, u32 *chacha_state)
+ const u8 *ad, const size_t ad_len,
+ struct chacha_state *chacha_state)
{
const u8 *pad0 = page_address(ZERO_PAGE(0));
struct poly1305_desc_ctx poly1305_state;
@@ -82,7 +82,7 @@ __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
poly1305_final(&poly1305_state, dst + src_len);
- memzero_explicit(chacha_state, CHACHA_STATE_WORDS * sizeof(u32));
+ chacha_zeroize_state(chacha_state);
memzero_explicit(&b, sizeof(b));
}
@@ -91,7 +91,7 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
- u32 chacha_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha_state;
u32 k[CHACHA_KEY_WORDS];
__le64 iv[2];
@@ -100,8 +100,9 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
iv[0] = 0;
iv[1] = cpu_to_le64(nonce);
- chacha_init(chacha_state, k, (u8 *)iv);
- __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, chacha_state);
+ chacha_init(&chacha_state, k, (u8 *)iv);
+ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
+ &chacha_state);
memzero_explicit(iv, sizeof(iv));
memzero_explicit(k, sizeof(k));
@@ -113,16 +114,18 @@ void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
- u32 chacha_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha_state;
- xchacha_init(chacha_state, key, nonce);
- __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, chacha_state);
+ xchacha_init(&chacha_state, key, nonce);
+ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
+ &chacha_state);
}
EXPORT_SYMBOL(xchacha20poly1305_encrypt);
static bool
__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
- const u8 *ad, const size_t ad_len, u32 *chacha_state)
+ const u8 *ad, const size_t ad_len,
+ struct chacha_state *chacha_state)
{
const u8 *pad0 = page_address(ZERO_PAGE(0));
struct poly1305_desc_ctx poly1305_state;
@@ -169,7 +172,7 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
- u32 chacha_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha_state;
u32 k[CHACHA_KEY_WORDS];
__le64 iv[2];
bool ret;
@@ -179,11 +182,11 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
iv[0] = 0;
iv[1] = cpu_to_le64(nonce);
- chacha_init(chacha_state, k, (u8 *)iv);
+ chacha_init(&chacha_state, k, (u8 *)iv);
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
- chacha_state);
+ &chacha_state);
- memzero_explicit(chacha_state, sizeof(chacha_state));
+ chacha_zeroize_state(&chacha_state);
memzero_explicit(iv, sizeof(iv));
memzero_explicit(k, sizeof(k));
return ret;
@@ -195,11 +198,11 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
- u32 chacha_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha_state;
- xchacha_init(chacha_state, key, nonce);
+ xchacha_init(&chacha_state, key, nonce);
return __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
- chacha_state);
+ &chacha_state);
}
EXPORT_SYMBOL(xchacha20poly1305_decrypt);
@@ -213,7 +216,7 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
{
const u8 *pad0 = page_address(ZERO_PAGE(0));
struct poly1305_desc_ctx poly1305_state;
- u32 chacha_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha_state;
struct sg_mapping_iter miter;
size_t partial = 0;
unsigned int flags;
@@ -240,8 +243,8 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
b.iv[0] = 0;
b.iv[1] = cpu_to_le64(nonce);
- chacha_init(chacha_state, b.k, (u8 *)b.iv);
- chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0));
+ chacha_init(&chacha_state, b.k, (u8 *)b.iv);
+ chacha20_crypt(&chacha_state, b.block0, pad0, sizeof(b.block0));
poly1305_init(&poly1305_state, b.block0);
if (unlikely(ad_len)) {
@@ -276,13 +279,13 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
if (unlikely(length < sl))
l &= ~(CHACHA_BLOCK_SIZE - 1);
- chacha20_crypt(chacha_state, addr, addr, l);
+ chacha20_crypt(&chacha_state, addr, addr, l);
addr += l;
length -= l;
}
if (unlikely(length > 0)) {
- chacha20_crypt(chacha_state, b.chacha_stream, pad0,
+ chacha20_crypt(&chacha_state, b.chacha_stream, pad0,
CHACHA_BLOCK_SIZE);
crypto_xor(addr, b.chacha_stream, length);
partial = length;
@@ -323,7 +326,7 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
!crypto_memneq(b.mac[0], b.mac[1], POLY1305_DIGEST_SIZE);
}
- memzero_explicit(chacha_state, sizeof(chacha_state));
+ chacha_zeroize_state(&chacha_state);
memzero_explicit(&b, sizeof(b));
return ret;
@@ -355,7 +358,7 @@ EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace);
static int __init chacha20poly1305_init(void)
{
- if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
+ if (IS_ENABLED(CONFIG_CRYPTO_SELFTESTS) &&
WARN_ON(!chacha20poly1305_selftest()))
return -ENODEV;
return 0;
diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c
index 064b352c6907..6850b76a80c9 100644
--- a/lib/crypto/curve25519.c
+++ b/lib/crypto/curve25519.c
@@ -15,7 +15,7 @@
static int __init curve25519_init(void)
{
- if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
+ if (IS_ENABLED(CONFIG_CRYPTO_SELFTESTS) &&
WARN_ON(!curve25519_selftest()))
return -ENODEV;
return 0;
diff --git a/lib/crypto/libchacha.c b/lib/crypto/libchacha.c
index cc1be0496eb9..ebcca381e248 100644
--- a/lib/crypto/libchacha.c
+++ b/lib/crypto/libchacha.c
@@ -12,7 +12,7 @@
#include <crypto/algapi.h> // for crypto_xor_cpy
#include <crypto/chacha.h>
-void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
+void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
/* aligned to potentially speed up crypto_xor() */
diff --git a/lib/crypto/poly1305-generic.c b/lib/crypto/poly1305-generic.c
new file mode 100644
index 000000000000..a73f700fa1fb
--- /dev/null
+++ b/lib/crypto/poly1305-generic.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Poly1305 authenticator algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
+ */
+
+#include <crypto/internal/poly1305.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+void poly1305_block_init_generic(struct poly1305_block_state *desc,
+ const u8 raw_key[POLY1305_BLOCK_SIZE])
+{
+ poly1305_core_init(&desc->h);
+ poly1305_core_setkey(&desc->core_r, raw_key);
+}
+EXPORT_SYMBOL_GPL(poly1305_block_init_generic);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("Poly1305 algorithm (generic implementation)");
diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c
index 6e80214ebad8..5f2f2af3b59f 100644
--- a/lib/crypto/poly1305.c
+++ b/lib/crypto/poly1305.c
@@ -7,72 +7,67 @@
* Based on public domain code by Andrew Moon and Daniel J. Bernstein.
*/
+#include <crypto/internal/blockhash.h>
#include <crypto/internal/poly1305.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/string.h>
#include <linux/unaligned.h>
-void poly1305_init_generic(struct poly1305_desc_ctx *desc,
- const u8 key[POLY1305_KEY_SIZE])
+void poly1305_init(struct poly1305_desc_ctx *desc,
+ const u8 key[POLY1305_KEY_SIZE])
{
- poly1305_core_setkey(&desc->core_r, key);
desc->s[0] = get_unaligned_le32(key + 16);
desc->s[1] = get_unaligned_le32(key + 20);
desc->s[2] = get_unaligned_le32(key + 24);
desc->s[3] = get_unaligned_le32(key + 28);
- poly1305_core_init(&desc->h);
desc->buflen = 0;
- desc->sset = true;
- desc->rset = 2;
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
+ poly1305_block_init_arch(&desc->state, key);
+ else
+ poly1305_block_init_generic(&desc->state, key);
}
-EXPORT_SYMBOL_GPL(poly1305_init_generic);
+EXPORT_SYMBOL(poly1305_init);
-void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src,
- unsigned int nbytes)
+static inline void poly1305_blocks(struct poly1305_block_state *state,
+ const u8 *src, unsigned int len)
{
- unsigned int bytes;
-
- if (unlikely(desc->buflen)) {
- bytes = min(nbytes, POLY1305_BLOCK_SIZE - desc->buflen);
- memcpy(desc->buf + desc->buflen, src, bytes);
- src += bytes;
- nbytes -= bytes;
- desc->buflen += bytes;
-
- if (desc->buflen == POLY1305_BLOCK_SIZE) {
- poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf,
- 1, 1);
- desc->buflen = 0;
- }
- }
-
- if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
- poly1305_core_blocks(&desc->h, &desc->core_r, src,
- nbytes / POLY1305_BLOCK_SIZE, 1);
- src += nbytes - (nbytes % POLY1305_BLOCK_SIZE);
- nbytes %= POLY1305_BLOCK_SIZE;
- }
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
+ poly1305_blocks_arch(state, src, len, 1);
+ else
+ poly1305_blocks_generic(state, src, len, 1);
+}
- if (unlikely(nbytes)) {
- desc->buflen = nbytes;
- memcpy(desc->buf, src, nbytes);
- }
+void poly1305_update(struct poly1305_desc_ctx *desc,
+ const u8 *src, unsigned int nbytes)
+{
+ desc->buflen = BLOCK_HASH_UPDATE(poly1305_blocks, &desc->state,
+ src, nbytes, POLY1305_BLOCK_SIZE,
+ desc->buf, desc->buflen);
}
-EXPORT_SYMBOL_GPL(poly1305_update_generic);
+EXPORT_SYMBOL(poly1305_update);
-void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst)
+void poly1305_final(struct poly1305_desc_ctx *desc, u8 *dst)
{
if (unlikely(desc->buflen)) {
desc->buf[desc->buflen++] = 1;
memset(desc->buf + desc->buflen, 0,
POLY1305_BLOCK_SIZE - desc->buflen);
- poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, 1, 0);
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
+ poly1305_blocks_arch(&desc->state, desc->buf,
+ POLY1305_BLOCK_SIZE, 0);
+ else
+ poly1305_blocks_generic(&desc->state, desc->buf,
+ POLY1305_BLOCK_SIZE, 0);
}
- poly1305_core_emit(&desc->h, desc->s, dst);
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
+ poly1305_emit_arch(&desc->state.h, dst, desc->s);
+ else
+ poly1305_emit_generic(&desc->state.h, dst, desc->s);
*desc = (struct poly1305_desc_ctx){};
}
-EXPORT_SYMBOL_GPL(poly1305_final_generic);
+EXPORT_SYMBOL(poly1305_final);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
diff --git a/lib/crypto/sha256-generic.c b/lib/crypto/sha256-generic.c
new file mode 100644
index 000000000000..a16ad4f25ebb
--- /dev/null
+++ b/lib/crypto/sha256-generic.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256, as specified in
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ *
+ * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2014 Red Hat Inc.
+ */
+
+#include <crypto/internal/sha2.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
+
+static const u32 SHA256_K[] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+
+static inline u32 Ch(u32 x, u32 y, u32 z)
+{
+ return z ^ (x & (y ^ z));
+}
+
+static inline u32 Maj(u32 x, u32 y, u32 z)
+{
+ return (x & y) | (z & (x | y));
+}
+
+#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
+#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
+#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
+#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
+
+static inline void LOAD_OP(int I, u32 *W, const u8 *input)
+{
+ W[I] = get_unaligned_be32((__u32 *)input + I);
+}
+
+static inline void BLEND_OP(int I, u32 *W)
+{
+ W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
+}
+
+#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) do { \
+ u32 t1, t2; \
+ t1 = h + e1(e) + Ch(e, f, g) + SHA256_K[i] + W[i]; \
+ t2 = e0(a) + Maj(a, b, c); \
+ d += t1; \
+ h = t1 + t2; \
+} while (0)
+
+static void sha256_block_generic(u32 state[SHA256_STATE_WORDS],
+ const u8 *input, u32 W[64])
+{
+ u32 a, b, c, d, e, f, g, h;
+ int i;
+
+ /* load the input */
+ for (i = 0; i < 16; i += 8) {
+ LOAD_OP(i + 0, W, input);
+ LOAD_OP(i + 1, W, input);
+ LOAD_OP(i + 2, W, input);
+ LOAD_OP(i + 3, W, input);
+ LOAD_OP(i + 4, W, input);
+ LOAD_OP(i + 5, W, input);
+ LOAD_OP(i + 6, W, input);
+ LOAD_OP(i + 7, W, input);
+ }
+
+ /* now blend */
+ for (i = 16; i < 64; i += 8) {
+ BLEND_OP(i + 0, W);
+ BLEND_OP(i + 1, W);
+ BLEND_OP(i + 2, W);
+ BLEND_OP(i + 3, W);
+ BLEND_OP(i + 4, W);
+ BLEND_OP(i + 5, W);
+ BLEND_OP(i + 6, W);
+ BLEND_OP(i + 7, W);
+ }
+
+ /* load the state into our registers */
+ a = state[0]; b = state[1]; c = state[2]; d = state[3];
+ e = state[4]; f = state[5]; g = state[6]; h = state[7];
+
+ /* now iterate */
+ for (i = 0; i < 64; i += 8) {
+ SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h);
+ SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g);
+ SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f);
+ SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e);
+ SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d);
+ SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c);
+ SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b);
+ SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a);
+ }
+
+ state[0] += a; state[1] += b; state[2] += c; state[3] += d;
+ state[4] += e; state[5] += f; state[6] += g; state[7] += h;
+}
+
+void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ u32 W[64];
+
+ do {
+ sha256_block_generic(state, data, W);
+ data += SHA256_BLOCK_SIZE;
+ } while (--nblocks);
+
+ memzero_explicit(W, sizeof(W));
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_generic);
+
+MODULE_DESCRIPTION("SHA-256 Algorithm (generic implementation)");
+MODULE_LICENSE("GPL");
diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c
index 04c1f2557e6c..107e5162507a 100644
--- a/lib/crypto/sha256.c
+++ b/lib/crypto/sha256.c
@@ -11,151 +11,65 @@
* Copyright (c) 2014 Red Hat Inc.
*/
-#include <linux/unaligned.h>
-#include <crypto/sha256_base.h>
+#include <crypto/internal/blockhash.h>
+#include <crypto/internal/sha2.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
-static const u32 SHA256_K[] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
-};
+/*
+ * If __DISABLE_EXPORTS is defined, then this file is being compiled for a
+ * pre-boot environment. In that case, ignore the kconfig options, pull the
+ * generic code into the same translation unit, and use that only.
+ */
+#ifdef __DISABLE_EXPORTS
+#include "sha256-generic.c"
+#endif
-static inline u32 Ch(u32 x, u32 y, u32 z)
+static inline bool sha256_purgatory(void)
{
- return z ^ (x & (y ^ z));
+ return __is_defined(__DISABLE_EXPORTS);
}
-static inline u32 Maj(u32 x, u32 y, u32 z)
+static inline void sha256_blocks(u32 state[SHA256_STATE_WORDS], const u8 *data,
+ size_t nblocks)
{
- return (x & y) | (z & (x | y));
+ sha256_choose_blocks(state, data, nblocks, sha256_purgatory(), false);
}
-#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
-#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
-#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
-#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
-
-static inline void LOAD_OP(int I, u32 *W, const u8 *input)
+void sha256_update(struct sha256_state *sctx, const u8 *data, size_t len)
{
- W[I] = get_unaligned_be32((__u32 *)input + I);
-}
+ size_t partial = sctx->count % SHA256_BLOCK_SIZE;
-static inline void BLEND_OP(int I, u32 *W)
-{
- W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
-}
-
-#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) do { \
- u32 t1, t2; \
- t1 = h + e1(e) + Ch(e, f, g) + SHA256_K[i] + W[i]; \
- t2 = e0(a) + Maj(a, b, c); \
- d += t1; \
- h = t1 + t2; \
-} while (0)
-
-static void sha256_transform(u32 *state, const u8 *input, u32 *W)
-{
- u32 a, b, c, d, e, f, g, h;
- int i;
-
- /* load the input */
- for (i = 0; i < 16; i += 8) {
- LOAD_OP(i + 0, W, input);
- LOAD_OP(i + 1, W, input);
- LOAD_OP(i + 2, W, input);
- LOAD_OP(i + 3, W, input);
- LOAD_OP(i + 4, W, input);
- LOAD_OP(i + 5, W, input);
- LOAD_OP(i + 6, W, input);
- LOAD_OP(i + 7, W, input);
- }
-
- /* now blend */
- for (i = 16; i < 64; i += 8) {
- BLEND_OP(i + 0, W);
- BLEND_OP(i + 1, W);
- BLEND_OP(i + 2, W);
- BLEND_OP(i + 3, W);
- BLEND_OP(i + 4, W);
- BLEND_OP(i + 5, W);
- BLEND_OP(i + 6, W);
- BLEND_OP(i + 7, W);
- }
-
- /* load the state into our registers */
- a = state[0]; b = state[1]; c = state[2]; d = state[3];
- e = state[4]; f = state[5]; g = state[6]; h = state[7];
-
- /* now iterate */
- for (i = 0; i < 64; i += 8) {
- SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h);
- SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g);
- SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f);
- SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e);
- SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d);
- SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c);
- SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b);
- SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a);
- }
-
- state[0] += a; state[1] += b; state[2] += c; state[3] += d;
- state[4] += e; state[5] += f; state[6] += g; state[7] += h;
-}
-
-static void sha256_transform_blocks(struct sha256_state *sctx,
- const u8 *input, int blocks)
-{
- u32 W[64];
-
- do {
- sha256_transform(sctx->state, input, W);
- input += SHA256_BLOCK_SIZE;
- } while (--blocks);
-
- memzero_explicit(W, sizeof(W));
-}
-
-void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
-{
- lib_sha256_base_do_update(sctx, data, len, sha256_transform_blocks);
+ sctx->count += len;
+ BLOCK_HASH_UPDATE_BLOCKS(sha256_blocks, sctx->ctx.state, data, len,
+ SHA256_BLOCK_SIZE, sctx->buf, partial);
}
EXPORT_SYMBOL(sha256_update);
-static void __sha256_final(struct sha256_state *sctx, u8 *out, int digest_size)
+static inline void __sha256_final(struct sha256_state *sctx, u8 *out,
+ size_t digest_size)
{
- lib_sha256_base_do_finalize(sctx, sha256_transform_blocks);
- lib_sha256_base_finish(sctx, out, digest_size);
+ size_t partial = sctx->count % SHA256_BLOCK_SIZE;
+
+ sha256_finup(&sctx->ctx, sctx->buf, partial, out, digest_size,
+ sha256_purgatory(), false);
+ memzero_explicit(sctx, sizeof(*sctx));
}
-void sha256_final(struct sha256_state *sctx, u8 *out)
+void sha256_final(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE])
{
- __sha256_final(sctx, out, 32);
+ __sha256_final(sctx, out, SHA256_DIGEST_SIZE);
}
EXPORT_SYMBOL(sha256_final);
-void sha224_final(struct sha256_state *sctx, u8 *out)
+void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE])
{
- __sha256_final(sctx, out, 28);
+ __sha256_final(sctx, out, SHA224_DIGEST_SIZE);
}
EXPORT_SYMBOL(sha224_final);
-void sha256(const u8 *data, unsigned int len, u8 *out)
+void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE])
{
struct sha256_state sctx;
diff --git a/crypto/sm3.c b/lib/crypto/sm3.c
index 18c2fb73ba16..efff0e267d84 100644
--- a/crypto/sm3.c
+++ b/lib/crypto/sm3.c
@@ -8,9 +8,11 @@
* Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
+#include <crypto/sm3.h>
+#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/string.h>
#include <linux/unaligned.h>
-#include <crypto/sm3.h>
static const u32 ____cacheline_aligned K[64] = {
0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb,
@@ -166,81 +168,18 @@ static void sm3_transform(struct sm3_state *sctx, u8 const *data, u32 W[16])
#undef W1
#undef W2
-static inline void sm3_block(struct sm3_state *sctx,
- u8 const *data, int blocks, u32 W[16])
-{
- while (blocks--) {
- sm3_transform(sctx, data, W);
- data += SM3_BLOCK_SIZE;
- }
-}
-
-void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len)
+void sm3_block_generic(struct sm3_state *sctx, u8 const *data, int blocks)
{
- unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
u32 W[16];
- sctx->count += len;
-
- if ((partial + len) >= SM3_BLOCK_SIZE) {
- int blocks;
-
- if (partial) {
- int p = SM3_BLOCK_SIZE - partial;
-
- memcpy(sctx->buffer + partial, data, p);
- data += p;
- len -= p;
-
- sm3_block(sctx, sctx->buffer, 1, W);
- }
-
- blocks = len / SM3_BLOCK_SIZE;
- len %= SM3_BLOCK_SIZE;
-
- if (blocks) {
- sm3_block(sctx, data, blocks, W);
- data += blocks * SM3_BLOCK_SIZE;
- }
-
- memzero_explicit(W, sizeof(W));
-
- partial = 0;
- }
- if (len)
- memcpy(sctx->buffer + partial, data, len);
-}
-EXPORT_SYMBOL_GPL(sm3_update);
-
-void sm3_final(struct sm3_state *sctx, u8 *out)
-{
- const int bit_offset = SM3_BLOCK_SIZE - sizeof(u64);
- __be64 *bits = (__be64 *)(sctx->buffer + bit_offset);
- __be32 *digest = (__be32 *)out;
- unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
- u32 W[16];
- int i;
-
- sctx->buffer[partial++] = 0x80;
- if (partial > bit_offset) {
- memset(sctx->buffer + partial, 0, SM3_BLOCK_SIZE - partial);
- partial = 0;
-
- sm3_block(sctx, sctx->buffer, 1, W);
- }
-
- memset(sctx->buffer + partial, 0, bit_offset - partial);
- *bits = cpu_to_be64(sctx->count << 3);
- sm3_block(sctx, sctx->buffer, 1, W);
-
- for (i = 0; i < 8; i++)
- put_unaligned_be32(sctx->state[i], digest++);
+ do {
+ sm3_transform(sctx, data, W);
+ data += SM3_BLOCK_SIZE;
+ } while (--blocks);
- /* Zeroize sensitive information. */
memzero_explicit(W, sizeof(W));
- memzero_explicit(sctx, sizeof(*sctx));
}
-EXPORT_SYMBOL_GPL(sm3_final);
+EXPORT_SYMBOL_GPL(sm3_block_generic);
MODULE_DESCRIPTION("Generic SM3 library");
MODULE_LICENSE("GPL v2");
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 0836bb3d76c5..06b6342aa3ae 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -117,6 +117,17 @@ EXPORT_SYMBOL(_find_first_and_bit);
#endif
/*
+ * Find the first bit set in 1st memory region and unset in 2nd.
+ */
+unsigned long _find_first_andnot_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ return FIND_FIRST_BIT(addr1[idx] & ~addr2[idx], /* nop */, size);
+}
+EXPORT_SYMBOL(_find_first_andnot_bit);
+
+/*
* Find the first set bit in three memory regions.
*/
unsigned long _find_first_and_and_bit(const unsigned long *addr1,
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 3f39955cb0f1..0061d4c7e351 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -177,7 +177,7 @@ kunit_filter_suites(const struct kunit_suite_set *suite_set,
const size_t max = suite_set->end - suite_set->start;
- copy = kcalloc(max, sizeof(*filtered.start), GFP_KERNEL);
+ copy = kcalloc(max, sizeof(*copy), GFP_KERNEL);
if (!copy) { /* won't be able to run anything, return an empty set */
return filtered;
}
diff --git a/lib/kunit/static_stub.c b/lib/kunit/static_stub.c
index 92b2cccd5e76..484fd85251b4 100644
--- a/lib/kunit/static_stub.c
+++ b/lib/kunit/static_stub.c
@@ -96,7 +96,7 @@ void __kunit_activate_static_stub(struct kunit *test,
/* If the replacement address is NULL, deactivate the stub. */
if (!replacement_addr) {
- kunit_deactivate_static_stub(test, replacement_addr);
+ kunit_deactivate_static_stub(test, real_fn_addr);
return;
}
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index ce945c17980b..859c251b23ce 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -33,44 +33,73 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
int interval = READ_ONCE(rs->interval);
int burst = READ_ONCE(rs->burst);
unsigned long flags;
- int ret;
+ int ret = 0;
- if (!interval)
- return 1;
+ /*
+ * Zero interval says never limit, otherwise, non-positive burst
+ * says always limit.
+ */
+ if (interval <= 0 || burst <= 0) {
+ WARN_ONCE(interval < 0 || burst < 0, "Negative interval (%d) or burst (%d): Uninitialized ratelimit_state structure?\n", interval, burst);
+ ret = interval == 0 || burst > 0;
+ if (!(READ_ONCE(rs->flags) & RATELIMIT_INITIALIZED) || (!interval && !burst) ||
+ !raw_spin_trylock_irqsave(&rs->lock, flags))
+ goto nolock_ret;
+
+ /* Force re-initialization once re-enabled. */
+ rs->flags &= ~RATELIMIT_INITIALIZED;
+ goto unlock_ret;
+ }
/*
- * If we contend on this state's lock then almost
- * by definition we are too busy to print a message,
- * in addition to the one that will be printed by
- * the entity that is holding the lock already:
+ * If we contend on this state's lock then just check if
+ * the current burst is used or not. It might cause
+ * false positive when we are past the interval and
+ * the current lock owner is just about to reset it.
*/
- if (!raw_spin_trylock_irqsave(&rs->lock, flags))
- return 0;
+ if (!raw_spin_trylock_irqsave(&rs->lock, flags)) {
+ if (READ_ONCE(rs->flags) & RATELIMIT_INITIALIZED &&
+ atomic_read(&rs->rs_n_left) > 0 && atomic_dec_return(&rs->rs_n_left) >= 0)
+ ret = 1;
+ goto nolock_ret;
+ }
- if (!rs->begin)
+ if (!(rs->flags & RATELIMIT_INITIALIZED)) {
rs->begin = jiffies;
+ rs->flags |= RATELIMIT_INITIALIZED;
+ atomic_set(&rs->rs_n_left, rs->burst);
+ }
if (time_is_before_jiffies(rs->begin + interval)) {
- if (rs->missed) {
- if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
+ int m;
+
+ /*
+ * Reset rs_n_left ASAP to reduce false positives
+ * in parallel calls, see above.
+ */
+ atomic_set(&rs->rs_n_left, rs->burst);
+ rs->begin = jiffies;
+
+ if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
+ m = ratelimit_state_reset_miss(rs);
+ if (m) {
printk_deferred(KERN_WARNING
- "%s: %d callbacks suppressed\n",
- func, rs->missed);
- rs->missed = 0;
+ "%s: %d callbacks suppressed\n", func, m);
}
}
- rs->begin = jiffies;
- rs->printed = 0;
}
- if (burst && burst > rs->printed) {
- rs->printed++;
+
+ /* Note that the burst might be taken by a parallel call. */
+ if (atomic_read(&rs->rs_n_left) > 0 && atomic_dec_return(&rs->rs_n_left) >= 0)
ret = 1;
- } else {
- rs->missed++;
- ret = 0;
- }
+
+unlock_ret:
raw_spin_unlock_irqrestore(&rs->lock, flags);
+nolock_ret:
+ if (!ret)
+ ratelimit_state_inc_miss(rs);
+
return ret;
}
EXPORT_SYMBOL(___ratelimit);
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 91fa37b5c510..ffb8ead6d4cd 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -138,6 +138,25 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
}
EXPORT_SYMBOL(string_get_size);
+int parse_int_array(const char *buf, size_t count, int **array)
+{
+ int *ints, nints;
+
+ get_options(buf, 0, &nints);
+ if (!nints)
+ return -ENOENT;
+
+ ints = kcalloc(nints + 1, sizeof(*ints), GFP_KERNEL);
+ if (!ints)
+ return -ENOMEM;
+
+ get_options(buf, nints + 1, ints);
+ *array = ints;
+
+ return 0;
+}
+EXPORT_SYMBOL(parse_int_array);
+
/**
* parse_int_array_user - Split string into a sequence of integers
* @from: The user space buffer to read from
@@ -153,30 +172,14 @@ EXPORT_SYMBOL(string_get_size);
*/
int parse_int_array_user(const char __user *from, size_t count, int **array)
{
- int *ints, nints;
char *buf;
- int ret = 0;
+ int ret;
buf = memdup_user_nul(from, count);
if (IS_ERR(buf))
return PTR_ERR(buf);
- get_options(buf, 0, &nints);
- if (!nints) {
- ret = -ENOENT;
- goto free_buf;
- }
-
- ints = kcalloc(nints + 1, sizeof(*ints), GFP_KERNEL);
- if (!ints) {
- ret = -ENOMEM;
- goto free_buf;
- }
-
- get_options(buf, nints + 1, ints);
- *array = ints;
-
-free_buf:
+ ret = parse_int_array(buf, count, array);
kfree(buf);
return ret;
}
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 4249e0cc8aaf..c02aa9c868f2 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -30,15 +30,17 @@ static int i_zero;
static int i_one_hundred = 100;
static int match_int_ok = 1;
+enum {
+ TEST_H_SETUP_NODE,
+ TEST_H_MNT,
+ TEST_H_MNTERROR,
+ TEST_H_EMPTY_ADD,
+ TEST_H_EMPTY,
+ TEST_H_U8,
+ TEST_H_SIZE /* Always at the end */
+};
-static struct {
- struct ctl_table_header *test_h_setup_node;
- struct ctl_table_header *test_h_mnt;
- struct ctl_table_header *test_h_mnterror;
- struct ctl_table_header *empty_add;
- struct ctl_table_header *empty;
-} sysctl_test_headers;
-
+static struct ctl_table_header *ctl_headers[TEST_H_SIZE] = {};
struct test_sysctl_data {
int int_0001;
int int_0002;
@@ -167,8 +169,8 @@ static int test_sysctl_setup_node_tests(void)
test_data.bitmap_0001 = kzalloc(SYSCTL_TEST_BITMAP_SIZE/8, GFP_KERNEL);
if (!test_data.bitmap_0001)
return -ENOMEM;
- sysctl_test_headers.test_h_setup_node = register_sysctl("debug/test_sysctl", test_table);
- if (!sysctl_test_headers.test_h_setup_node) {
+ ctl_headers[TEST_H_SETUP_NODE] = register_sysctl("debug/test_sysctl", test_table);
+ if (!ctl_headers[TEST_H_SETUP_NODE]) {
kfree(test_data.bitmap_0001);
return -ENOMEM;
}
@@ -202,12 +204,12 @@ static int test_sysctl_run_unregister_nested(void)
static int test_sysctl_run_register_mount_point(void)
{
- sysctl_test_headers.test_h_mnt
+ ctl_headers[TEST_H_MNT]
= register_sysctl_mount_point("debug/test_sysctl/mnt");
- if (!sysctl_test_headers.test_h_mnt)
+ if (!ctl_headers[TEST_H_MNT])
return -ENOMEM;
- sysctl_test_headers.test_h_mnterror
+ ctl_headers[TEST_H_MNTERROR]
= register_sysctl("debug/test_sysctl/mnt/mnt_error",
test_table_unregister);
/*
@@ -225,39 +227,94 @@ static const struct ctl_table test_table_empty[] = { };
static int test_sysctl_run_register_empty(void)
{
/* Tets that an empty dir can be created */
- sysctl_test_headers.empty_add
+ ctl_headers[TEST_H_EMPTY_ADD]
= register_sysctl("debug/test_sysctl/empty_add", test_table_empty);
- if (!sysctl_test_headers.empty_add)
+ if (!ctl_headers[TEST_H_EMPTY_ADD])
return -ENOMEM;
/* Test that register on top of an empty dir works */
- sysctl_test_headers.empty
+ ctl_headers[TEST_H_EMPTY]
= register_sysctl("debug/test_sysctl/empty_add/empty", test_table_empty);
- if (!sysctl_test_headers.empty)
+ if (!ctl_headers[TEST_H_EMPTY])
return -ENOMEM;
return 0;
}
-static int __init test_sysctl_init(void)
+static const struct ctl_table table_u8_over[] = {
+ {
+ .procname = "u8_over",
+ .data = &test_data.uint_0001,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_FOUR,
+ .extra2 = SYSCTL_ONE_THOUSAND,
+ },
+};
+
+static const struct ctl_table table_u8_under[] = {
+ {
+ .procname = "u8_under",
+ .data = &test_data.uint_0001,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_NEG_ONE,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+};
+
+static const struct ctl_table table_u8_valid[] = {
+ {
+ .procname = "u8_valid",
+ .data = &test_data.uint_0001,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO_HUNDRED,
+ },
+};
+
+static int test_sysctl_register_u8_extra(void)
{
- int err;
+ /* should fail because it's over */
+ ctl_headers[TEST_H_U8]
+ = register_sysctl("debug/test_sysctl", table_u8_over);
+ if (ctl_headers[TEST_H_U8])
+ return -ENOMEM;
+
+ /* should fail because it's under */
+ ctl_headers[TEST_H_U8]
+ = register_sysctl("debug/test_sysctl", table_u8_under);
+ if (ctl_headers[TEST_H_U8])
+ return -ENOMEM;
- err = test_sysctl_setup_node_tests();
- if (err)
- goto out;
+ /* should not fail because it's valid */
+ ctl_headers[TEST_H_U8]
+ = register_sysctl("debug/test_sysctl", table_u8_valid);
+ if (!ctl_headers[TEST_H_U8])
+ return -ENOMEM;
- err = test_sysctl_run_unregister_nested();
- if (err)
- goto out;
+ return 0;
+}
- err = test_sysctl_run_register_mount_point();
- if (err)
- goto out;
+static int __init test_sysctl_init(void)
+{
+ int err = 0;
+
+ int (*func_array[])(void) = {
+ test_sysctl_setup_node_tests,
+ test_sysctl_run_unregister_nested,
+ test_sysctl_run_register_mount_point,
+ test_sysctl_run_register_empty,
+ test_sysctl_register_u8_extra
+ };
- err = test_sysctl_run_register_empty();
+ for (int i = 0; !err && i < ARRAY_SIZE(func_array); i++)
+ err = func_array[i]();
-out:
return err;
}
module_init(test_sysctl_init);
@@ -265,16 +322,10 @@ module_init(test_sysctl_init);
static void __exit test_sysctl_exit(void)
{
kfree(test_data.bitmap_0001);
- if (sysctl_test_headers.test_h_setup_node)
- unregister_sysctl_table(sysctl_test_headers.test_h_setup_node);
- if (sysctl_test_headers.test_h_mnt)
- unregister_sysctl_table(sysctl_test_headers.test_h_mnt);
- if (sysctl_test_headers.test_h_mnterror)
- unregister_sysctl_table(sysctl_test_headers.test_h_mnterror);
- if (sysctl_test_headers.empty)
- unregister_sysctl_table(sysctl_test_headers.empty);
- if (sysctl_test_headers.empty_add)
- unregister_sysctl_table(sysctl_test_headers.empty_add);
+ for (int i = 0; i < TEST_H_SIZE; i++) {
+ if (ctl_headers[i])
+ unregister_sysctl_table(ctl_headers[i]);
+ }
}
module_exit(test_sysctl_exit);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 01699852f30c..e40843cb807e 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1981,15 +1981,11 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
if (check_pointer(&buf, end, clk, spec))
return buf;
- switch (fmt[1]) {
- case 'n':
- default:
#ifdef CONFIG_COMMON_CLK
- return string(buf, end, __clk_get_name(clk), spec);
+ return string(buf, end, __clk_get_name(clk), spec);
#else
- return ptr_to_id(buf, end, clk, spec);
+ return ptr_to_id(buf, end, clk, spec);
#endif
- }
}
static
@@ -2391,8 +2387,6 @@ early_param("no_hash_pointers", no_hash_pointers_enable);
* T time64_t
* - 'C' For a clock, it prints the name (Common Clock Framework) or address
* (legacy clock framework) of the clock
- * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address
- * (legacy clock framework) of the clock
* - 'G' For flags to be printed as a collection of symbolic strings that would
* construct the specific value. Supported flags given by option:
* p page flags (see struct page) given as pointer to unsigned long
diff --git a/mm/cma.c b/mm/cma.c
index 15632939f20a..c04be488b099 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -608,7 +608,10 @@ static int __init __cma_declare_contiguous_nid(phys_addr_t *basep,
* complain. Find the boundary by adding one to the last valid
* address.
*/
- highmem_start = __pa(high_memory - 1) + 1;
+ if (IS_ENABLED(CONFIG_HIGHMEM))
+ highmem_start = __pa(high_memory - 1) + 1;
+ else
+ highmem_start = memblock_end_of_DRAM();
pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n",
__func__, &size, &base, &limit, &alignment);
diff --git a/mm/dmapool.c b/mm/dmapool.c
index f0bfc6c490f4..5be8cc1c6529 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -56,6 +56,7 @@ struct dma_pool { /* the pool */
unsigned int size;
unsigned int allocation;
unsigned int boundary;
+ int node;
char name[32];
struct list_head pools;
};
@@ -199,12 +200,13 @@ static void pool_block_push(struct dma_pool *pool, struct dma_block *block,
/**
- * dma_pool_create - Creates a pool of consistent memory blocks, for dma.
+ * dma_pool_create_node - Creates a pool of consistent memory blocks, for dma.
* @name: name of pool, for diagnostics
* @dev: device that will be doing the DMA
* @size: size of the blocks in this pool.
* @align: alignment requirement for blocks; must be a power of two
* @boundary: returned blocks won't cross this power of two boundary
+ * @node: optional NUMA node to allocate structs 'dma_pool' and 'dma_page' on
* Context: not in_interrupt()
*
* Given one of these pools, dma_pool_alloc()
@@ -221,8 +223,8 @@ static void pool_block_push(struct dma_pool *pool, struct dma_block *block,
* Return: a dma allocation pool with the requested characteristics, or
* %NULL if one can't be created.
*/
-struct dma_pool *dma_pool_create(const char *name, struct device *dev,
- size_t size, size_t align, size_t boundary)
+struct dma_pool *dma_pool_create_node(const char *name, struct device *dev,
+ size_t size, size_t align, size_t boundary, int node)
{
struct dma_pool *retval;
size_t allocation;
@@ -251,7 +253,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
boundary = min(boundary, allocation);
- retval = kzalloc(sizeof(*retval), GFP_KERNEL);
+ retval = kzalloc_node(sizeof(*retval), GFP_KERNEL, node);
if (!retval)
return retval;
@@ -264,6 +266,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
retval->size = size;
retval->boundary = boundary;
retval->allocation = allocation;
+ retval->node = node;
INIT_LIST_HEAD(&retval->pools);
/*
@@ -295,7 +298,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
mutex_unlock(&pools_reg_lock);
return retval;
}
-EXPORT_SYMBOL(dma_pool_create);
+EXPORT_SYMBOL(dma_pool_create_node);
static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page)
{
@@ -335,7 +338,7 @@ static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags)
{
struct dma_page *page;
- page = kmalloc(sizeof(*page), mem_flags);
+ page = kmalloc_node(sizeof(*page), mem_flags, pool->node);
if (!page)
return NULL;
diff --git a/mm/execmem.c b/mm/execmem.c
index e6c4f5076ca8..6f7a2653b280 100644
--- a/mm/execmem.c
+++ b/mm/execmem.c
@@ -254,6 +254,34 @@ out_unlock:
return ptr;
}
+static bool execmem_cache_rox = false;
+
+void execmem_cache_make_ro(void)
+{
+ struct maple_tree *free_areas = &execmem_cache.free_areas;
+ struct maple_tree *busy_areas = &execmem_cache.busy_areas;
+ MA_STATE(mas_free, free_areas, 0, ULONG_MAX);
+ MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX);
+ struct mutex *mutex = &execmem_cache.mutex;
+ void *area;
+
+ execmem_cache_rox = true;
+
+ mutex_lock(mutex);
+
+ mas_for_each(&mas_free, area, ULONG_MAX) {
+ unsigned long pages = mas_range_len(&mas_free) >> PAGE_SHIFT;
+ set_memory_ro(mas_free.index, pages);
+ }
+
+ mas_for_each(&mas_busy, area, ULONG_MAX) {
+ unsigned long pages = mas_range_len(&mas_busy) >> PAGE_SHIFT;
+ set_memory_ro(mas_busy.index, pages);
+ }
+
+ mutex_unlock(mutex);
+}
+
static int execmem_cache_populate(struct execmem_range *range, size_t size)
{
unsigned long vm_flags = VM_ALLOW_HUGE_VMAP;
@@ -274,9 +302,15 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size)
/* fill memory with instructions that will trap */
execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true);
- err = set_memory_rox((unsigned long)p, vm->nr_pages);
- if (err)
- goto err_free_mem;
+ if (execmem_cache_rox) {
+ err = set_memory_rox((unsigned long)p, vm->nr_pages);
+ if (err)
+ goto err_free_mem;
+ } else {
+ err = set_memory_x((unsigned long)p, vm->nr_pages);
+ if (err)
+ goto err_free_mem;
+ }
err = execmem_cache_add(p, alloc_size);
if (err)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6ea1be71aa42..6a3cf7935c14 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
/*
* Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held.
- * This function should be only used by move_vma() and operate on
+ * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
*/
@@ -2949,12 +2949,20 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
while (start_pfn < end_pfn) {
folio = pfn_folio(start_pfn);
+
+ /*
+ * The folio might have been dissolved from under our feet, so make sure
+ * to carefully check the state under the lock.
+ */
+ spin_lock_irq(&hugetlb_lock);
if (folio_test_hugetlb(folio)) {
h = folio_hstate(folio);
} else {
+ spin_unlock_irq(&hugetlb_lock);
start_pfn++;
continue;
}
+ spin_unlock_irq(&hugetlb_lock);
if (!folio_ref_count(folio)) {
ret = alloc_and_dissolve_hugetlb_folio(h, folio,
@@ -3010,7 +3018,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct folio *folio;
- long retval, gbl_chg;
+ long retval, gbl_chg, gbl_reserve;
map_chg_state map_chg;
int ret, idx;
struct hugetlb_cgroup *h_cg = NULL;
@@ -3163,8 +3171,16 @@ out_uncharge_cgroup_reservation:
hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
h_cg);
out_subpool_put:
- if (map_chg)
- hugepage_subpool_put_pages(spool, 1);
+ /*
+ * put page to subpool iff the quota of subpool's rsv_hpages is used
+ * during hugepage_subpool_get_pages.
+ */
+ if (map_chg && !gbl_chg) {
+ gbl_reserve = hugepage_subpool_put_pages(spool, 1);
+ hugetlb_acct_memory(h, -gbl_reserve);
+ }
+
+
out_end_reservation:
if (map_chg != MAP_CHG_ENFORCED)
vma_end_reservation(h, vma, addr);
@@ -7239,7 +7255,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
struct vm_area_struct *vma,
vm_flags_t vm_flags)
{
- long chg = -1, add = -1;
+ long chg = -1, add = -1, spool_resv, gbl_resv;
struct hstate *h = hstate_inode(inode);
struct hugepage_subpool *spool = subpool_inode(inode);
struct resv_map *resv_map;
@@ -7374,8 +7390,16 @@ bool hugetlb_reserve_pages(struct inode *inode,
return true;
out_put_pages:
- /* put back original number of pages, chg */
- (void)hugepage_subpool_put_pages(spool, chg);
+ spool_resv = chg - gbl_reserve;
+ if (spool_resv) {
+ /* put sub pool's reservation back, chg - gbl_reserve */
+ gbl_resv = hugepage_subpool_put_pages(spool, spool_resv);
+ /*
+ * subpool's reserved pages can not be put back due to race,
+ * return to hstate.
+ */
+ hugetlb_acct_memory(h, -gbl_resv);
+ }
out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
@@ -7915,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}
+
+/*
+ * For hugetlb, mremap() is an odd edge case - while the VMA copying is
+ * performed, we permit both the old and new VMAs to reference the same
+ * reservation.
+ *
+ * We fix this up after the operation succeeds, or if a newly allocated VMA
+ * is closed as a result of a failure to allocate memory.
+ */
+void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ clear_vma_resv_huge_pages(vma);
+}
diff --git a/mm/internal.h b/mm/internal.h
index 25a29872c634..5c7a2b43ad76 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1590,7 +1590,6 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc);
#ifdef CONFIG_UNACCEPTED_MEMORY
void accept_page(struct page *page);
-void unaccepted_cleanup_work(struct work_struct *work);
#else /* CONFIG_UNACCEPTED_MEMORY */
static inline void accept_page(struct page *page)
{
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index 88d1c9dcb507..d2c70cd2afb1 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -292,33 +292,99 @@ void __init __weak kasan_populate_early_vm_area_shadow(void *start,
{
}
+struct vmalloc_populate_data {
+ unsigned long start;
+ struct page **pages;
+};
+
static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
- void *unused)
+ void *_data)
{
- unsigned long page;
+ struct vmalloc_populate_data *data = _data;
+ struct page *page;
pte_t pte;
+ int index;
if (likely(!pte_none(ptep_get(ptep))))
return 0;
- page = __get_free_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
-
- __memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE);
- pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL);
+ index = PFN_DOWN(addr - data->start);
+ page = data->pages[index];
+ __memset(page_to_virt(page), KASAN_VMALLOC_INVALID, PAGE_SIZE);
+ pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL);
spin_lock(&init_mm.page_table_lock);
if (likely(pte_none(ptep_get(ptep)))) {
set_pte_at(&init_mm, addr, ptep, pte);
- page = 0;
+ data->pages[index] = NULL;
}
spin_unlock(&init_mm.page_table_lock);
- if (page)
- free_page(page);
+
+ return 0;
+}
+
+static void ___free_pages_bulk(struct page **pages, int nr_pages)
+{
+ int i;
+
+ for (i = 0; i < nr_pages; i++) {
+ if (pages[i]) {
+ __free_pages(pages[i], 0);
+ pages[i] = NULL;
+ }
+ }
+}
+
+static int ___alloc_pages_bulk(struct page **pages, int nr_pages)
+{
+ unsigned long nr_populated, nr_total = nr_pages;
+ struct page **page_array = pages;
+
+ while (nr_pages) {
+ nr_populated = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages);
+ if (!nr_populated) {
+ ___free_pages_bulk(page_array, nr_total - nr_pages);
+ return -ENOMEM;
+ }
+ pages += nr_populated;
+ nr_pages -= nr_populated;
+ }
+
return 0;
}
+static int __kasan_populate_vmalloc(unsigned long start, unsigned long end)
+{
+ unsigned long nr_pages, nr_total = PFN_UP(end - start);
+ struct vmalloc_populate_data data;
+ int ret = 0;
+
+ data.pages = (struct page **)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!data.pages)
+ return -ENOMEM;
+
+ while (nr_total) {
+ nr_pages = min(nr_total, PAGE_SIZE / sizeof(data.pages[0]));
+ ret = ___alloc_pages_bulk(data.pages, nr_pages);
+ if (ret)
+ break;
+
+ data.start = start;
+ ret = apply_to_page_range(&init_mm, start, nr_pages * PAGE_SIZE,
+ kasan_populate_vmalloc_pte, &data);
+ ___free_pages_bulk(data.pages, nr_pages);
+ if (ret)
+ break;
+
+ start += nr_pages * PAGE_SIZE;
+ nr_total -= nr_pages;
+ }
+
+ free_page((unsigned long)data.pages);
+
+ return ret;
+}
+
int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
{
unsigned long shadow_start, shadow_end;
@@ -348,9 +414,7 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
shadow_start = PAGE_ALIGN_DOWN(shadow_start);
shadow_end = PAGE_ALIGN(shadow_end);
- ret = apply_to_page_range(&init_mm, shadow_start,
- shadow_end - shadow_start,
- kasan_populate_vmalloc_pte, NULL);
+ ret = __kasan_populate_vmalloc(shadow_start, shadow_end);
if (ret)
return ret;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c96c1f2b9cf5..ec39e62b172e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -582,7 +582,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
if (!val)
return;
- cgroup_rstat_updated(memcg->css.cgroup, cpu);
+ css_rstat_updated(&memcg->css, cpu);
statc = this_cpu_ptr(memcg->vmstats_percpu);
for (; statc; statc = statc->parent) {
stats_updates = READ_ONCE(statc->stats_updates) + abs(val);
@@ -614,7 +614,7 @@ static void __mem_cgroup_flush_stats(struct mem_cgroup *memcg, bool force)
if (mem_cgroup_is_root(memcg))
WRITE_ONCE(flush_last_time, jiffies_64);
- cgroup_rstat_flush(memcg->css.cgroup);
+ css_rstat_flush(&memcg->css);
}
/*
@@ -1168,7 +1168,6 @@ void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
{
struct mem_cgroup *iter;
int ret = 0;
- int i = 0;
BUG_ON(mem_cgroup_is_root(memcg));
@@ -1178,10 +1177,9 @@ void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it);
while (!ret && (task = css_task_iter_next(&it))) {
- /* Avoid potential softlockup warning */
- if ((++i & 1023) == 0)
- cond_resched();
ret = fn(task, arg);
+ /* Avoid potential softlockup warning */
+ cond_resched();
}
css_task_iter_end(&it);
if (ret) {
diff --git a/mm/memory.c b/mm/memory.c
index ba3ea0a82f7f..49199410805c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3751,7 +3751,7 @@ static bool __wp_can_reuse_large_anon_folio(struct folio *folio,
/* Stabilize the mapcount vs. refcount and recheck. */
folio_lock_large_mapcount(folio);
- VM_WARN_ON_ONCE(folio_large_mapcount(folio) < folio_ref_count(folio));
+ VM_WARN_ON_ONCE_FOLIO(folio_large_mapcount(folio) > folio_ref_count(folio), folio);
if (folio_test_large_maybe_mapped_shared(folio))
goto unlock;
diff --git a/mm/migrate.c b/mm/migrate.c
index 676d9cfc7059..c80591514e66 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -947,66 +947,20 @@ int filemap_migrate_folio(struct address_space *mapping,
EXPORT_SYMBOL_GPL(filemap_migrate_folio);
/*
- * Writeback a folio to clean the dirty state
- */
-static int writeout(struct address_space *mapping, struct folio *folio)
-{
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_NONE,
- .nr_to_write = 1,
- .range_start = 0,
- .range_end = LLONG_MAX,
- .for_reclaim = 1
- };
- int rc;
-
- if (!mapping->a_ops->writepage)
- /* No write method for the address space */
- return -EINVAL;
-
- if (!folio_clear_dirty_for_io(folio))
- /* Someone else already triggered a write */
- return -EAGAIN;
-
- /*
- * A dirty folio may imply that the underlying filesystem has
- * the folio on some queue. So the folio must be clean for
- * migration. Writeout may mean we lose the lock and the
- * folio state is no longer what we checked for earlier.
- * At this point we know that the migration attempt cannot
- * be successful.
- */
- remove_migration_ptes(folio, folio, 0);
-
- rc = mapping->a_ops->writepage(&folio->page, &wbc);
-
- if (rc != AOP_WRITEPAGE_ACTIVATE)
- /* unlocked. Relock */
- folio_lock(folio);
-
- return (rc < 0) ? -EIO : -EAGAIN;
-}
-
-/*
* Default handling if a filesystem does not provide a migration function.
*/
static int fallback_migrate_folio(struct address_space *mapping,
struct folio *dst, struct folio *src, enum migrate_mode mode)
{
- if (folio_test_dirty(src)) {
- /* Only writeback folios in full synchronous migration */
- switch (mode) {
- case MIGRATE_SYNC:
- break;
- default:
- return -EBUSY;
- }
- return writeout(mapping, src);
- }
+ WARN_ONCE(mapping->a_ops->writepages,
+ "%ps does not implement migrate_folio\n",
+ mapping->a_ops);
+ if (folio_test_dirty(src))
+ return -EBUSY;
/*
- * Buffers may be managed in a filesystem specific way.
- * We must have no buffers or drop them.
+ * Filesystem may have private data at folio->private that we
+ * can't migrate automatically.
*/
if (!filemap_release_folio(src, GFP_KERNEL))
return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY;
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 327764ca0ee4..eedce9321e13 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1441,7 +1441,6 @@ static void __meminit zone_init_free_lists(struct zone *zone)
#ifdef CONFIG_UNACCEPTED_MEMORY
INIT_LIST_HEAD(&zone->unaccepted_pages);
- INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work);
#endif
}
diff --git a/mm/mremap.c b/mm/mremap.c
index 7db9da609c84..0d4948b720e2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
mremap_userfaultfd_prep(new_vma, vrm->uf);
}
- if (is_vm_hugetlb_page(vma))
- clear_vma_resv_huge_pages(vma);
+ fixup_hugetlb_reservations(vma);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
diff --git a/mm/nommu.c b/mm/nommu.c
index 617e7ba8022f..70f92f9a7fab 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -200,7 +200,23 @@ void *vmalloc_noprof(unsigned long size)
}
EXPORT_SYMBOL(vmalloc_noprof);
-void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc_noprof);
+/*
+ * vmalloc_huge_node - allocate virtually contiguous memory, on a node
+ *
+ * @size: allocation size
+ * @gfp_mask: flags for the page level allocator
+ * @node: node to use for allocation or NUMA_NO_NODE
+ *
+ * Allocate enough pages to cover @size from the page level
+ * allocator and map them into contiguous kernel virtual space.
+ *
+ * Due to NOMMU implications the node argument and HUGE page attribute is
+ * ignored.
+ */
+void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node)
+{
+ return __vmalloc_noprof(size, gfp_mask);
+}
/*
* vzalloc - allocate virtually contiguous memory with zero fill
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c81624bc3969..76200cd85fe7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2621,27 +2621,6 @@ int write_cache_pages(struct address_space *mapping,
}
EXPORT_SYMBOL(write_cache_pages);
-static int writeback_use_writepage(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- struct folio *folio = NULL;
- struct blk_plug plug;
- int err;
-
- blk_start_plug(&plug);
- while ((folio = writeback_iter(mapping, wbc, folio, &err))) {
- err = mapping->a_ops->writepage(&folio->page, wbc);
- if (err == AOP_WRITEPAGE_ACTIVATE) {
- folio_unlock(folio);
- err = 0;
- }
- mapping_set_error(mapping, err);
- }
- blk_finish_plug(&plug);
-
- return err;
-}
-
int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
int ret;
@@ -2652,14 +2631,11 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
wb = inode_to_wb_wbc(mapping->host, wbc);
wb_bandwidth_estimate_start(wb);
while (1) {
- if (mapping->a_ops->writepages) {
+ if (mapping->a_ops->writepages)
ret = mapping->a_ops->writepages(mapping, wbc);
- } else if (mapping->a_ops->writepage) {
- ret = writeback_use_writepage(mapping, wbc);
- } else {
+ else
/* deal with chardevs and other special files */
ret = 0;
- }
if (ret != -ENOMEM || wbc->sync_mode != WB_SYNC_ALL)
break;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5669baf2a6fe..47fa713ccb4d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -290,7 +290,8 @@ EXPORT_SYMBOL(nr_online_nodes);
#endif
static bool page_contains_unaccepted(struct page *page, unsigned int order);
-static bool cond_accept_memory(struct zone *zone, unsigned int order);
+static bool cond_accept_memory(struct zone *zone, unsigned int order,
+ int alloc_flags);
static bool __free_unaccepted(struct page *page);
int page_group_by_mobility_disabled __read_mostly;
@@ -1151,14 +1152,9 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
__pgalloc_tag_sub(page, nr);
}
-static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr)
+/* When tag is not NULL, assuming mem_alloc_profiling_enabled */
+static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
{
- struct alloc_tag *tag;
-
- if (!mem_alloc_profiling_enabled())
- return;
-
- tag = __pgalloc_tag_get(page);
if (tag)
this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
}
@@ -1168,7 +1164,7 @@ static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr)
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
unsigned int nr) {}
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
-static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {}
+static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
#endif /* CONFIG_MEM_ALLOC_PROFILING */
@@ -3616,7 +3612,7 @@ retry:
}
}
- cond_accept_memory(zone, order);
+ cond_accept_memory(zone, order, alloc_flags);
/*
* Detect whether the number of free pages is below high
@@ -3643,7 +3639,7 @@ check_alloc_wmark:
gfp_mask)) {
int ret;
- if (cond_accept_memory(zone, order))
+ if (cond_accept_memory(zone, order, alloc_flags))
goto try_this_zone;
/*
@@ -3696,7 +3692,7 @@ try_this_zone:
return page;
} else {
- if (cond_accept_memory(zone, order))
+ if (cond_accept_memory(zone, order, alloc_flags))
goto try_this_zone;
/* Try again if zone has deferred pages */
@@ -4566,6 +4562,14 @@ restart:
}
retry:
+ /*
+ * Deal with possible cpuset update races or zonelist updates to avoid
+ * infinite retries.
+ */
+ if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
+ check_retry_zonelist(zonelist_iter_cookie))
+ goto restart;
+
/* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
if (alloc_flags & ALLOC_KSWAPD)
wake_all_kswapds(order, gfp_mask, ac);
@@ -4849,7 +4853,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
goto failed;
}
- cond_accept_memory(zone, 0);
+ cond_accept_memory(zone, 0, alloc_flags);
retry_this_zone:
mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages;
if (zone_watermark_fast(zone, 0, mark,
@@ -4858,7 +4862,7 @@ retry_this_zone:
break;
}
- if (cond_accept_memory(zone, 0))
+ if (cond_accept_memory(zone, 0, alloc_flags))
goto retry_this_zone;
/* Try again if zone has deferred pages */
@@ -5065,11 +5069,13 @@ static void ___free_pages(struct page *page, unsigned int order,
{
/* get PageHead before we drop reference */
int head = PageHead(page);
+ /* get alloc tag in case the page is released by others */
+ struct alloc_tag *tag = pgalloc_tag_get(page);
if (put_page_testzero(page))
__free_frozen_pages(page, order, fpi_flags);
else if (!head) {
- pgalloc_tag_sub_pages(page, (1 << order) - 1);
+ pgalloc_tag_sub_pages(tag, (1 << order) - 1);
while (order-- > 0)
__free_frozen_pages(page + (1 << order), order,
fpi_flags);
@@ -7174,16 +7180,8 @@ bool has_managed_dma(void)
#ifdef CONFIG_UNACCEPTED_MEMORY
-/* Counts number of zones with unaccepted pages. */
-static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages);
-
static bool lazy_accept = true;
-void unaccepted_cleanup_work(struct work_struct *work)
-{
- static_branch_dec(&zones_with_unaccepted_pages);
-}
-
static int __init accept_memory_parse(char *p)
{
if (!strcmp(p, "lazy")) {
@@ -7208,11 +7206,7 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order)
static void __accept_page(struct zone *zone, unsigned long *flags,
struct page *page)
{
- bool last;
-
list_del(&page->lru);
- last = list_empty(&zone->unaccepted_pages);
-
account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
__ClearPageUnaccepted(page);
@@ -7221,28 +7215,6 @@ static void __accept_page(struct zone *zone, unsigned long *flags,
accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER);
__free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);
-
- if (last) {
- /*
- * There are two corner cases:
- *
- * - If allocation occurs during the CPU bring up,
- * static_branch_dec() cannot be used directly as
- * it causes a deadlock on cpu_hotplug_lock.
- *
- * Instead, use schedule_work() to prevent deadlock.
- *
- * - If allocation occurs before workqueues are initialized,
- * static_branch_dec() should be called directly.
- *
- * Workqueues are initialized before CPU bring up, so this
- * will not conflict with the first scenario.
- */
- if (system_wq)
- schedule_work(&zone->unaccepted_cleanup);
- else
- unaccepted_cleanup_work(&zone->unaccepted_cleanup);
- }
}
void accept_page(struct page *page)
@@ -7279,20 +7251,17 @@ static bool try_to_accept_memory_one(struct zone *zone)
return true;
}
-static inline bool has_unaccepted_memory(void)
-{
- return static_branch_unlikely(&zones_with_unaccepted_pages);
-}
-
-static bool cond_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_accept_memory(struct zone *zone, unsigned int order,
+ int alloc_flags)
{
long to_accept, wmark;
bool ret = false;
- if (!has_unaccepted_memory())
+ if (list_empty(&zone->unaccepted_pages))
return false;
- if (list_empty(&zone->unaccepted_pages))
+ /* Bailout, since try_to_accept_memory_one() needs to take a lock */
+ if (alloc_flags & ALLOC_TRYLOCK)
return false;
wmark = promo_wmark_pages(zone);
@@ -7325,22 +7294,17 @@ static bool __free_unaccepted(struct page *page)
{
struct zone *zone = page_zone(page);
unsigned long flags;
- bool first = false;
if (!lazy_accept)
return false;
spin_lock_irqsave(&zone->lock, flags);
- first = list_empty(&zone->unaccepted_pages);
list_add_tail(&page->lru, &zone->unaccepted_pages);
account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
__SetPageUnaccepted(page);
spin_unlock_irqrestore(&zone->lock, flags);
- if (first)
- static_branch_inc(&zones_with_unaccepted_pages);
-
return true;
}
@@ -7351,7 +7315,8 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order)
return false;
}
-static bool cond_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_accept_memory(struct zone *zone, unsigned int order,
+ int alloc_flags)
{
return false;
}
@@ -7422,11 +7387,6 @@ struct page *try_alloc_pages_noprof(int nid, unsigned int order)
if (!pcp_allowed_order(order))
return NULL;
-#ifdef CONFIG_UNACCEPTED_MEMORY
- /* Bailout, since try_to_accept_memory_one() needs to take a lock */
- if (has_unaccepted_memory())
- return NULL;
-#endif
/* Bailout, since _deferred_grow_zone() needs to take a lock */
if (deferred_pages_enabled())
return NULL;
diff --git a/mm/page_io.c b/mm/page_io.c
index 4bce19df557b..f7716b6569fa 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -237,9 +237,8 @@ static void swap_zeromap_folio_clear(struct folio *folio)
* We may have stale swap cache pages in memory: notice
* them here and get rid of the unnecessary final write.
*/
-int swap_writepage(struct page *page, struct writeback_control *wbc)
+int swap_writeout(struct folio *folio, struct writeback_control *wbc)
{
- struct folio *folio = page_folio(page);
int ret;
if (folio_free_swap(folio)) {
diff --git a/mm/readahead.c b/mm/readahead.c
index 6a4e96b69702..20d36d6b055e 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -690,9 +690,15 @@ EXPORT_SYMBOL_GPL(page_cache_async_ra);
ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
{
+ struct file *file;
+ const struct inode *inode;
+
CLASS(fd, f)(fd);
+ if (fd_empty(f))
+ return -EBADF;
- if (fd_empty(f) || !(fd_file(f)->f_mode & FMODE_READ))
+ file = fd_file(f);
+ if (!(file->f_mode & FMODE_READ))
return -EBADF;
/*
@@ -700,9 +706,15 @@ ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
* that can execute readahead. If readahead is not possible
* on this file, then we must return -EINVAL.
*/
- if (!fd_file(f)->f_mapping || !fd_file(f)->f_mapping->a_ops ||
- (!S_ISREG(file_inode(fd_file(f))->i_mode) &&
- !S_ISBLK(file_inode(fd_file(f))->i_mode)))
+ if (!file->f_mapping)
+ return -EINVAL;
+ if (!file->f_mapping->a_ops)
+ return -EINVAL;
+
+ inode = file_inode(file);
+ if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
+ return -EINVAL;
+ if (IS_ANON_FILE(inode))
return -EINVAL;
return vfs_fadvise(fd_file(f), offset, count, POSIX_FADV_WILLNEED);
diff --git a/mm/shmem.c b/mm/shmem.c
index 99327c30507c..858cee02ca49 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -98,7 +98,7 @@ static struct vfsmount *shm_mnt __ro_after_init;
#define SHORT_SYMLINK_LEN 128
/*
- * shmem_fallocate communicates with shmem_fault or shmem_writepage via
+ * shmem_fallocate communicates with shmem_fault or shmem_writeout via
* inode->i_private (with i_rwsem making sure that it has only one user at
* a time): we would prefer not to enlarge the shmem inode just for that.
*/
@@ -107,7 +107,7 @@ struct shmem_falloc {
pgoff_t start; /* start of range currently being fallocated */
pgoff_t next; /* the next page offset to be fallocated */
pgoff_t nr_falloced; /* how many new pages have been fallocated */
- pgoff_t nr_unswapped; /* how often writepage refused to swap out */
+ pgoff_t nr_unswapped; /* how often writeout refused to swap out */
};
struct shmem_options {
@@ -446,7 +446,7 @@ static void shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
/*
* Special case: whereas normally shmem_recalc_inode() is called
* after i_mapping->nrpages has already been adjusted (up or down),
- * shmem_writepage() has to raise swapped before nrpages is lowered -
+ * shmem_writeout() has to raise swapped before nrpages is lowered -
* to stop a racing shmem_recalc_inode() from thinking that a page has
* been freed. Compensate here, to avoid the need for a followup call.
*/
@@ -1536,12 +1536,15 @@ int shmem_unuse(unsigned int type)
return error;
}
-/*
- * Move the page from the page cache to the swap cache.
+/**
+ * shmem_writeout - Write the folio to swap
+ * @folio: The folio to write
+ * @wbc: How writeback is to be done
+ *
+ * Move the folio from the page cache to the swap cache.
*/
-static int shmem_writepage(struct page *page, struct writeback_control *wbc)
+int shmem_writeout(struct folio *folio, struct writeback_control *wbc)
{
- struct folio *folio = page_folio(page);
struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
struct shmem_inode_info *info = SHMEM_I(inode);
@@ -1550,13 +1553,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
int nr_pages;
bool split = false;
- /*
- * Our capabilities prevent regular writeback or sync from ever calling
- * shmem_writepage; but a stacking filesystem might use ->writepage of
- * its underlying filesystem, in which case tmpfs should write out to
- * swap only in response to memory pressure, and not for the writeback
- * threads or sync.
- */
if (WARN_ON_ONCE(!wbc->for_reclaim))
goto redirty;
@@ -1586,9 +1582,8 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
try_split:
/* Ensure the subpages are still dirty */
folio_test_set_dirty(folio);
- if (split_huge_page_to_list_to_order(page, wbc->list, 0))
+ if (split_folio_to_list(folio, wbc->list))
goto redirty;
- folio = page_folio(page);
folio_clear_dirty(folio);
}
@@ -1646,7 +1641,7 @@ try_split:
mutex_unlock(&shmem_swaplist_mutex);
BUG_ON(folio_mapped(folio));
- return swap_writepage(&folio->page, wbc);
+ return swap_writeout(folio, wbc);
}
list_del_init(&info->swaplist);
@@ -1660,6 +1655,7 @@ redirty:
folio_unlock(folio);
return 0;
}
+EXPORT_SYMBOL_GPL(shmem_writeout);
#if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS)
static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
@@ -3768,7 +3764,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
index--;
/*
- * Inform shmem_writepage() how far we have reached.
+ * Inform shmem_writeout() how far we have reached.
* No need for lock or barrier: we have the page lock.
*/
if (!folio_test_uptodate(folio))
@@ -5191,7 +5187,6 @@ static int shmem_error_remove_folio(struct address_space *mapping,
}
static const struct address_space_operations shmem_aops = {
- .writepage = shmem_writepage,
.dirty_folio = noop_dirty_folio,
#ifdef CONFIG_TMPFS
.write_begin = shmem_write_begin,
diff --git a/mm/show_mem.c b/mm/show_mem.c
index 6af13bcd2ab3..5acb51a9fc49 100644
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -223,7 +223,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
global_node_page_state(NR_SHMEM),
global_node_page_state(NR_PAGETABLE),
global_node_page_state(NR_SECONDARY_PAGETABLE),
- global_zone_page_state(NR_BOUNCE),
+ 0UL,
global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
global_zone_page_state(NR_FREE_PAGES),
free_pcp,
@@ -341,7 +341,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
K(zone->present_pages),
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
- K(zone_page_state(zone, NR_BOUNCE)),
+ 0UL,
K(free_pcp),
K(this_cpu_read(zone->per_cpu_pageset->count)),
K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
diff --git a/mm/swap.h b/mm/swap.h
index 6f4a3f927edb..aa62463976d5 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -20,7 +20,7 @@ static inline void swap_read_unplug(struct swap_iocb *plug)
__swap_read_unplug(plug);
}
void swap_write_unplug(struct swap_iocb *sio);
-int swap_writepage(struct page *page, struct writeback_control *wbc);
+int swap_writeout(struct folio *folio, struct writeback_control *wbc);
void __swap_writepage(struct folio *folio, struct writeback_control *wbc);
/* linux/mm/swap_state.c */
@@ -141,7 +141,7 @@ static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
return NULL;
}
-static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
+static inline int swap_writeout(struct folio *f, struct writeback_control *wbc)
{
return 0;
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 68fd981b514f..ec2b1c9c9926 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -30,7 +30,6 @@
* vmscan's shrink_folio_list.
*/
static const struct address_space_operations swap_aops = {
- .writepage = swap_writepage,
.dirty_folio = noop_dirty_folio,
#ifdef CONFIG_MIGRATION
.migrate_folio = migrate_folio,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f214843612dc..86643b181098 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2368,7 +2368,7 @@ retry:
* Limit the number of retries? No: when mmget_not_zero()
* above fails, that mm is likely to be freeing swap from
* exit_mmap(), which proceeds at its own independent pace;
- * and even shmem_writepage() could have been preempted after
+ * and even shmem_writeout() could have been preempted after
* folio_alloc_swap(), temporarily hiding that swap. It's easy
* and robust (though cpu-intensive) just to keep retrying.
*/
@@ -3332,6 +3332,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
/*
+ * The swap subsystem needs a major overhaul to support this.
+ * It doesn't work yet so just disable it for now.
+ */
+ if (mapping_min_folio_order(mapping) > 0) {
+ error = -EINVAL;
+ goto bad_swap_unlock_inode;
+ }
+
+ /*
* Read the swap header.
*/
if (!mapping->a_ops->read_folio) {
diff --git a/mm/truncate.c b/mm/truncate.c
index 5d98054094d1..f2aaf99f2990 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -191,6 +191,7 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
{
loff_t pos = folio_pos(folio);
+ size_t size = folio_size(folio);
unsigned int offset, length;
struct page *split_at, *split_at2;
@@ -198,14 +199,13 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
offset = start - pos;
else
offset = 0;
- length = folio_size(folio);
- if (pos + length <= (u64)end)
- length = length - offset;
+ if (pos + size <= (u64)end)
+ length = size - offset;
else
length = end + 1 - pos - offset;
folio_wait_writeback(folio);
- if (length == folio_size(folio)) {
+ if (length == size) {
truncate_inode_folio(folio->mapping, folio);
return true;
}
@@ -224,16 +224,20 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
return true;
split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
- split_at2 = folio_page(folio,
- PAGE_ALIGN_DOWN(offset + length) / PAGE_SIZE);
-
if (!try_folio_split(folio, split_at, NULL)) {
/*
* try to split at offset + length to make sure folios within
* the range can be dropped, especially to avoid memory waste
* for shmem truncate
*/
- struct folio *folio2 = page_folio(split_at2);
+ struct folio *folio2;
+
+ if (offset + length == size)
+ goto no_split;
+
+ split_at2 = folio_page(folio,
+ PAGE_ALIGN_DOWN(offset + length) / PAGE_SIZE);
+ folio2 = page_folio(split_at2);
if (!folio_try_get(folio2))
goto no_split;
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 7d5d709cc838..e0db855c89b4 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1064,8 +1064,13 @@ static int move_present_pte(struct mm_struct *mm,
src_folio->index = linear_page_index(dst_vma, dst_addr);
orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
- /* Follow mremap() behavior and treat the entry dirty after the move */
- orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma);
+ /* Set soft dirty bit so userspace can notice the pte was moved */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ orig_dst_pte = pte_mksoft_dirty(orig_dst_pte);
+#endif
+ if (pte_dirty(orig_src_pte))
+ orig_dst_pte = pte_mkdirty(orig_dst_pte);
+ orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma);
set_pte_at(mm, dst_addr, dst_pte, orig_dst_pte);
out:
@@ -1100,6 +1105,9 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
}
orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
+#endif
set_pte_at(mm, dst_addr, dst_pte, orig_src_pte);
double_pt_unlock(dst_ptl, src_ptl);
diff --git a/mm/vma.c b/mm/vma.c
index 839d12f02c88..a468d4c29c0c 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return new_vma;
out_vma_link:
+ fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2d7511654831..8a1f7783bbdb 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3944,9 +3944,10 @@ void *vmalloc_noprof(unsigned long size)
EXPORT_SYMBOL(vmalloc_noprof);
/**
- * vmalloc_huge - allocate virtually contiguous memory, allow huge pages
+ * vmalloc_huge_node - allocate virtually contiguous memory, allow huge pages
* @size: allocation size
* @gfp_mask: flags for the page level allocator
+ * @node: node to use for allocation or NUMA_NO_NODE
*
* Allocate enough pages to cover @size from the page level
* allocator and map them into contiguous kernel virtual space.
@@ -3955,13 +3956,13 @@ EXPORT_SYMBOL(vmalloc_noprof);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask)
+void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node)
{
return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
- gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
- NUMA_NO_NODE, __builtin_return_address(0));
+ gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
+ node, __builtin_return_address(0));
}
-EXPORT_SYMBOL_GPL(vmalloc_huge_noprof);
+EXPORT_SYMBOL_GPL(vmalloc_huge_node_noprof);
/**
* vzalloc - allocate virtually contiguous memory with zero fill
@@ -4093,8 +4094,8 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
* would be a good heuristic for when to shrink the vm_area?
*/
if (size <= old_size) {
- /* Zero out "freed" memory. */
- if (want_init_on_free())
+ /* Zero out "freed" memory, potentially for future realloc. */
+ if (want_init_on_free() || want_init_on_alloc(flags))
memset((void *)p + size, 0, old_size - size);
vm->requested_size = size;
kasan_poison_vmalloc(p + size, old_size - size);
@@ -4107,10 +4108,13 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
if (size <= alloced_size) {
kasan_unpoison_vmalloc(p + old_size, size - old_size,
KASAN_VMALLOC_PROT_NORMAL);
- /* Zero out "alloced" memory. */
- if (want_init_on_alloc(flags))
- memset((void *)p + old_size, 0, size - old_size);
+ /*
+ * No need to zero memory here, as unused memory will have
+ * already been zeroed at initial allocation time or during
+ * realloc shrink time.
+ */
vm->requested_size = size;
+ return (void *)p;
}
/* TODO: Grow the vm_area, i.e. allocate and map additional pages. */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3783e45bfc92..b6f4db6c240f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -648,21 +648,20 @@ typedef enum {
/*
* pageout is called by shrink_folio_list() for each dirty folio.
- * Calls ->writepage().
*/
static pageout_t pageout(struct folio *folio, struct address_space *mapping,
struct swap_iocb **plug, struct list_head *folio_list)
{
+ int (*writeout)(struct folio *, struct writeback_control *);
+
/*
- * If the folio is dirty, only perform writeback if that write
- * will be non-blocking. To prevent this allocation from being
- * stalled by pagecache activity. But note that there may be
- * stalls if we need to run get_block(). We could test
- * PagePrivate for that.
- *
- * If this process is currently in __generic_file_write_iter() against
- * this folio's queue, we can perform writeback even if that
- * will block.
+ * We no longer attempt to writeback filesystem folios here, other
+ * than tmpfs/shmem. That's taken care of in page-writeback.
+ * If we find a dirty filesystem folio at the end of the LRU list,
+ * typically that means the filesystem is saturating the storage
+ * with contiguous writes and telling it to write a folio here
+ * would only make the situation worse by injecting an element
+ * of random access.
*
* If the folio is swapcache, write it back even if that would
* block, for some throttling. This happens by accident, because
@@ -685,7 +684,11 @@ static pageout_t pageout(struct folio *folio, struct address_space *mapping,
}
return PAGE_KEEP;
}
- if (mapping->a_ops->writepage == NULL)
+ if (shmem_mapping(mapping))
+ writeout = shmem_writeout;
+ else if (folio_test_anon(folio))
+ writeout = swap_writeout;
+ else
return PAGE_ACTIVATE;
if (folio_clear_dirty_for_io(folio)) {
@@ -708,7 +711,7 @@ static pageout_t pageout(struct folio *folio, struct address_space *mapping,
wbc.list = folio_list;
folio_set_reclaim(folio);
- res = mapping->a_ops->writepage(&folio->page, &wbc);
+ res = writeout(folio, &wbc);
if (res < 0)
handle_write_error(mapping, folio, res);
if (res == AOP_WRITEPAGE_ACTIVATE) {
@@ -717,7 +720,7 @@ static pageout_t pageout(struct folio *folio, struct address_space *mapping,
}
if (!folio_test_writeback(folio)) {
- /* synchronous write or broken a_ops? */
+ /* synchronous write? */
folio_clear_reclaim(folio);
}
trace_mm_vmscan_write_folio(folio);
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 961b270f023c..d14a7e317ac8 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1243,19 +1243,19 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle,
class = zspage_class(pool, zspage);
off = offset_in_page(class->size * obj_idx);
- if (off + class->size <= PAGE_SIZE) {
+ if (!ZsHugePage(zspage))
+ off += ZS_HANDLE_SIZE;
+
+ if (off + mem_len <= PAGE_SIZE) {
/* this object is contained entirely within a page */
void *dst = kmap_local_zpdesc(zpdesc);
- if (!ZsHugePage(zspage))
- off += ZS_HANDLE_SIZE;
memcpy(dst + off, handle_mem, mem_len);
kunmap_local(dst);
} else {
/* this object spans two pages */
size_t sizes[2];
- off += ZS_HANDLE_SIZE;
sizes[0] = PAGE_SIZE - off;
sizes[1] = mem_len - sizes[0];
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 7cd4bdcee439..558d39dffc23 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -506,28 +506,32 @@ batadv_hardif_is_iface_up(const struct batadv_hard_iface *hard_iface)
return false;
}
-static void batadv_check_known_mac_addr(const struct net_device *net_dev)
+static void batadv_check_known_mac_addr(const struct batadv_hard_iface *hard_iface)
{
- const struct batadv_hard_iface *hard_iface;
+ const struct net_device *mesh_iface = hard_iface->mesh_iface;
+ const struct batadv_hard_iface *tmp_hard_iface;
- rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if (hard_iface->if_status != BATADV_IF_ACTIVE &&
- hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
+ if (!mesh_iface)
+ return;
+
+ list_for_each_entry(tmp_hard_iface, &batadv_hardif_list, list) {
+ if (tmp_hard_iface == hard_iface)
+ continue;
+
+ if (tmp_hard_iface->mesh_iface != mesh_iface)
continue;
- if (hard_iface->net_dev == net_dev)
+ if (tmp_hard_iface->if_status == BATADV_IF_NOT_IN_USE)
continue;
- if (!batadv_compare_eth(hard_iface->net_dev->dev_addr,
- net_dev->dev_addr))
+ if (!batadv_compare_eth(tmp_hard_iface->net_dev->dev_addr,
+ hard_iface->net_dev->dev_addr))
continue;
pr_warn("The newly added mac address (%pM) already exists on: %s\n",
- net_dev->dev_addr, hard_iface->net_dev->name);
+ hard_iface->net_dev->dev_addr, tmp_hard_iface->net_dev->name);
pr_warn("It is strongly recommended to keep mac addresses unique to avoid problems!\n");
}
- rcu_read_unlock();
}
/**
@@ -763,6 +767,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->net_dev->name, hardif_mtu,
required_mtu);
+ batadv_check_known_mac_addr(hard_iface);
+
if (batadv_hardif_is_iface_up(hard_iface))
batadv_hardif_activate_interface(hard_iface);
else
@@ -901,7 +907,6 @@ batadv_hardif_add_interface(struct net_device *net_dev)
batadv_v_hardif_init(hard_iface);
- batadv_check_known_mac_addr(hard_iface->net_dev);
kref_get(&hard_iface->refcount);
list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
batadv_hardif_generation++;
@@ -988,7 +993,7 @@ static int batadv_hard_if_event(struct notifier_block *this,
if (hard_iface->if_status == BATADV_IF_NOT_IN_USE)
goto hardif_put;
- batadv_check_known_mac_addr(hard_iface->net_dev);
+ batadv_check_known_mac_addr(hard_iface);
bat_priv = netdev_priv(hard_iface->mesh_iface);
bat_priv->algo_ops->iface.update_mac(hard_iface);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6533e281ada3..946d2ae551f8 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -3023,3 +3023,27 @@ void hci_conn_tx_dequeue(struct hci_conn *conn)
kfree_skb(skb);
}
+
+u8 *hci_conn_key_enc_size(struct hci_conn *conn)
+{
+ if (conn->type == ACL_LINK) {
+ struct link_key *key;
+
+ key = hci_find_link_key(conn->hdev, &conn->dst);
+ if (!key)
+ return NULL;
+
+ return &key->pin_len;
+ } else if (conn->type == LE_LINK) {
+ struct smp_ltk *ltk;
+
+ ltk = hci_find_ltk(conn->hdev, &conn->dst, conn->dst_type,
+ conn->role);
+ if (!ltk)
+ return NULL;
+
+ return &ltk->enc_size;
+ }
+
+ return NULL;
+}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6d6061111ac5..c38ada69c3d7 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -739,10 +739,17 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data,
handle);
conn->enc_key_size = 0;
} else {
+ u8 *key_enc_size = hci_conn_key_enc_size(conn);
+
conn->enc_key_size = rp->key_size;
status = 0;
- if (conn->enc_key_size < hdev->min_enc_key_size) {
+ /* Attempt to check if the key size is too small or if it has
+ * been downgraded from the last time it was stored as part of
+ * the link_key.
+ */
+ if (conn->enc_key_size < hdev->min_enc_key_size ||
+ (key_enc_size && conn->enc_key_size < *key_enc_size)) {
/* As slave role, the conn->state has been set to
* BT_CONNECTED and l2cap conn req might not be received
* yet, at this moment the l2cap layer almost does
@@ -755,6 +762,10 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data,
clear_bit(HCI_CONN_ENCRYPT, &conn->flags);
clear_bit(HCI_CONN_AES_CCM, &conn->flags);
}
+
+ /* Update the key encryption size with the connection one */
+ if (key_enc_size && *key_enc_size != conn->enc_key_size)
+ *key_enc_size = conn->enc_key_size;
}
hci_encrypt_cfm(conn, status);
@@ -3065,6 +3076,34 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, void *edata,
hci_dev_unlock(hdev);
}
+static int hci_read_enc_key_size(struct hci_dev *hdev, struct hci_conn *conn)
+{
+ struct hci_cp_read_enc_key_size cp;
+ u8 *key_enc_size = hci_conn_key_enc_size(conn);
+
+ if (!read_key_size_capable(hdev)) {
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ return -EOPNOTSUPP;
+ }
+
+ bt_dev_dbg(hdev, "hcon %p", conn);
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = cpu_to_le16(conn->handle);
+
+ /* If the key enc_size is already known, use it as conn->enc_key_size,
+ * otherwise use hdev->min_enc_key_size so the likes of
+ * l2cap_check_enc_key_size don't fail while waiting for
+ * HCI_OP_READ_ENC_KEY_SIZE response.
+ */
+ if (key_enc_size && *key_enc_size)
+ conn->enc_key_size = *key_enc_size;
+ else
+ conn->enc_key_size = hdev->min_enc_key_size;
+
+ return hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp);
+}
+
static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -3157,23 +3196,11 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) &&
ev->link_type == ACL_LINK) {
struct link_key *key;
- struct hci_cp_read_enc_key_size cp;
key = hci_find_link_key(hdev, &ev->bdaddr);
if (key) {
set_bit(HCI_CONN_ENCRYPT, &conn->flags);
-
- if (!read_key_size_capable(hdev)) {
- conn->enc_key_size = HCI_LINK_KEY_SIZE;
- } else {
- cp.handle = cpu_to_le16(conn->handle);
- if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE,
- sizeof(cp), &cp)) {
- bt_dev_err(hdev, "sending read key size failed");
- conn->enc_key_size = HCI_LINK_KEY_SIZE;
- }
- }
-
+ hci_read_enc_key_size(hdev, conn);
hci_encrypt_cfm(conn, ev->status);
}
}
@@ -3612,24 +3639,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data,
/* Try reading the encryption key size for encrypted ACL links */
if (!ev->status && ev->encrypt && conn->type == ACL_LINK) {
- struct hci_cp_read_enc_key_size cp;
-
- /* Only send HCI_Read_Encryption_Key_Size if the
- * controller really supports it. If it doesn't, assume
- * the default size (16).
- */
- if (!read_key_size_capable(hdev)) {
- conn->enc_key_size = HCI_LINK_KEY_SIZE;
- goto notify;
- }
-
- cp.handle = cpu_to_le16(conn->handle);
- if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE,
- sizeof(cp), &cp)) {
- bt_dev_err(hdev, "sending read key size failed");
- conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ if (hci_read_enc_key_size(hdev, conn))
goto notify;
- }
goto unlock;
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 73472756618a..042d3ac3b4a3 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1411,7 +1411,8 @@ static void l2cap_request_info(struct l2cap_conn *conn)
sizeof(req), &req);
}
-static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
+static bool l2cap_check_enc_key_size(struct hci_conn *hcon,
+ struct l2cap_chan *chan)
{
/* The minimum encryption key size needs to be enforced by the
* host stack before establishing any L2CAP connections. The
@@ -1425,7 +1426,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
int min_key_size = hcon->hdev->min_enc_key_size;
/* On FIPS security level, key size must be 16 bytes */
- if (hcon->sec_level == BT_SECURITY_FIPS)
+ if (chan->sec_level == BT_SECURITY_FIPS)
min_key_size = 16;
return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) ||
@@ -1453,7 +1454,7 @@ static void l2cap_do_start(struct l2cap_chan *chan)
!__l2cap_no_conn_pending(chan))
return;
- if (l2cap_check_enc_key_size(conn->hcon))
+ if (l2cap_check_enc_key_size(conn->hcon, chan))
l2cap_start_connection(chan);
else
__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
@@ -1528,7 +1529,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
continue;
}
- if (l2cap_check_enc_key_size(conn->hcon))
+ if (l2cap_check_enc_key_size(conn->hcon, chan))
l2cap_start_connection(chan);
else
l2cap_chan_close(chan, ECONNREFUSED);
@@ -3992,7 +3993,7 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd,
/* Check if the ACL is secure enough (if not SDP) */
if (psm != cpu_to_le16(L2CAP_PSM_SDP) &&
(!hci_conn_check_link_mode(conn->hcon) ||
- !l2cap_check_enc_key_size(conn->hcon))) {
+ !l2cap_check_enc_key_size(conn->hcon, pchan))) {
conn->disc_reason = HCI_ERROR_AUTH_FAILURE;
result = L2CAP_CR_SEC_BLOCK;
goto response;
@@ -7352,7 +7353,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
}
if (chan->state == BT_CONNECT) {
- if (!status && l2cap_check_enc_key_size(hcon))
+ if (!status && l2cap_check_enc_key_size(hcon, chan))
l2cap_start_connection(chan);
else
__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
@@ -7362,7 +7363,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
struct l2cap_conn_rsp rsp;
__u16 res, stat;
- if (!status && l2cap_check_enc_key_size(hcon)) {
+ if (!status && l2cap_check_enc_key_size(hcon, chan)) {
if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) {
res = L2CAP_CR_PEND;
stat = L2CAP_CS_AUTHOR_PEND;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index c1e1e529e26c..46b22708dfbd 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7506,11 +7506,16 @@ static void add_device_complete(struct hci_dev *hdev, void *data, int err)
struct mgmt_cp_add_device *cp = cmd->param;
if (!err) {
+ struct hci_conn_params *params;
+
+ params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
+ le_addr_type(cp->addr.type));
+
device_added(cmd->sk, hdev, &cp->addr.bdaddr, cp->addr.type,
cp->action);
device_flags_changed(NULL, hdev, &cp->addr.bdaddr,
cp->addr.type, hdev->conn_flags,
- PTR_UINT(cmd->user_data));
+ params ? params->flags : 0);
}
mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_ADD_DEVICE,
@@ -7613,8 +7618,6 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- cmd->user_data = UINT_PTR(current_flags);
-
err = hci_cmd_sync_queue(hdev, add_device_sync, cmd,
add_device_complete);
if (err < 0) {
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index dcbf058de1e3..dc331b59b965 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2061,7 +2061,7 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query)
{
query->startup_sent = 0;
- if (try_to_del_timer_sync(&query->timer) >= 0 ||
+ if (timer_delete_sync_try(&query->timer) >= 0 ||
timer_delete(&query->timer))
mod_timer(&query->timer, jiffies);
}
@@ -3480,7 +3480,7 @@ static void br_ip4_multicast_query(struct net_bridge_mcast *brmctx,
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
- try_to_del_timer_sync(&mp->timer) >= 0))
+ timer_delete_sync_try(&mp->timer) >= 0))
mod_timer(&mp->timer, now + max_delay);
for (pp = &mp->ports;
@@ -3488,7 +3488,7 @@ static void br_ip4_multicast_query(struct net_bridge_mcast *brmctx,
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
- try_to_del_timer_sync(&p->timer) >= 0 &&
+ timer_delete_sync_try(&p->timer) >= 0 &&
(brmctx->multicast_igmp_version == 2 ||
p->filter_mode == MCAST_EXCLUDE))
mod_timer(&p->timer, now + max_delay);
@@ -3569,7 +3569,7 @@ static int br_ip6_multicast_query(struct net_bridge_mcast *brmctx,
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
- try_to_del_timer_sync(&mp->timer) >= 0))
+ timer_delete_sync_try(&mp->timer) >= 0))
mod_timer(&mp->timer, now + max_delay);
for (pp = &mp->ports;
@@ -3577,7 +3577,7 @@ static int br_ip6_multicast_query(struct net_bridge_mcast *brmctx,
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
- try_to_del_timer_sync(&p->timer) >= 0 &&
+ timer_delete_sync_try(&p->timer) >= 0 &&
(brmctx->multicast_mld_version == 1 ||
p->filter_mode == MCAST_EXCLUDE))
mod_timer(&p->timer, now + max_delay);
@@ -3649,7 +3649,7 @@ br_multicast_leave_group(struct net_bridge_mcast *brmctx,
if (!hlist_unhashed(&p->mglist) &&
(timer_pending(&p->timer) ?
time_after(p->timer.expires, time) :
- try_to_del_timer_sync(&p->timer) >= 0)) {
+ timer_delete_sync_try(&p->timer) >= 0)) {
mod_timer(&p->timer, time);
}
@@ -3665,7 +3665,7 @@ br_multicast_leave_group(struct net_bridge_mcast *brmctx,
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
- try_to_del_timer_sync(&mp->timer) >= 0)) {
+ timer_delete_sync_try(&mp->timer) >= 0)) {
mod_timer(&mp->timer, time);
}
@@ -3681,7 +3681,7 @@ br_multicast_leave_group(struct net_bridge_mcast *brmctx,
if (!hlist_unhashed(&p->mglist) &&
(timer_pending(&p->timer) ?
time_after(p->timer.expires, time) :
- try_to_del_timer_sync(&p->timer) >= 0)) {
+ timer_delete_sync_try(&p->timer) >= 0)) {
mod_timer(&p->timer, time);
}
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 98aea5485aae..a8c67035e23c 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -65,17 +65,14 @@ static struct dst_ops fake_dst_ops = {
* ipt_REJECT needs it. Future netfilter modules might
* require us to fill additional fields.
*/
-static const u32 br_dst_default_metrics[RTAX_MAX] = {
- [RTAX_MTU - 1] = 1500,
-};
-
void br_netfilter_rtable_init(struct net_bridge *br)
{
struct rtable *rt = &br->fake_rtable;
rcuref_init(&rt->dst.__rcuref, 1);
rt->dst.dev = br->dev;
- dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
+ dst_init_metrics(&rt->dst, br->metrics, false);
+ dst_metric_set(&rt->dst, RTAX_MTU, br->dev->mtu);
rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
rt->dst.ops = &fake_dst_ops;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d5b3c5936a79..4715a8d6dc32 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -505,6 +505,7 @@ struct net_bridge {
struct rtable fake_rtable;
struct rt6_info fake_rt6_info;
};
+ u32 metrics[RTAX_MAX];
#endif
u16 group_fwd_mask;
u16 group_fwd_mask_required;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 0bca1b9b3f70..6bc1cc4c94c5 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -58,6 +58,7 @@
#include <linux/can/skb.h>
#include <linux/can/bcm.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <net/sock.h>
#include <net/net_namespace.h>
@@ -122,6 +123,7 @@ struct bcm_op {
struct canfd_frame last_sframe;
struct sock *sk;
struct net_device *rx_reg_dev;
+ spinlock_t bcm_tx_lock; /* protect currframe/count in runtime updates */
};
struct bcm_sock {
@@ -217,7 +219,9 @@ static int bcm_proc_show(struct seq_file *m, void *v)
seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex));
seq_printf(m, " <<<\n");
- list_for_each_entry(op, &bo->rx_ops, list) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(op, &bo->rx_ops, list) {
unsigned long reduction;
@@ -273,6 +277,9 @@ static int bcm_proc_show(struct seq_file *m, void *v)
seq_printf(m, "# sent %ld\n", op->frames_abs);
}
seq_putc(m, '\n');
+
+ rcu_read_unlock();
+
return 0;
}
#endif /* CONFIG_PROC_FS */
@@ -285,13 +292,18 @@ static void bcm_can_tx(struct bcm_op *op)
{
struct sk_buff *skb;
struct net_device *dev;
- struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe;
+ struct canfd_frame *cf;
int err;
/* no target device? => exit */
if (!op->ifindex)
return;
+ /* read currframe under lock protection */
+ spin_lock_bh(&op->bcm_tx_lock);
+ cf = op->frames + op->cfsiz * op->currframe;
+ spin_unlock_bh(&op->bcm_tx_lock);
+
dev = dev_get_by_index(sock_net(op->sk), op->ifindex);
if (!dev) {
/* RFC: should this bcm_op remove itself here? */
@@ -312,6 +324,10 @@ static void bcm_can_tx(struct bcm_op *op)
skb->dev = dev;
can_skb_set_owner(skb, op->sk);
err = can_send(skb, 1);
+
+ /* update currframe and count under lock protection */
+ spin_lock_bh(&op->bcm_tx_lock);
+
if (!err)
op->frames_abs++;
@@ -320,6 +336,11 @@ static void bcm_can_tx(struct bcm_op *op)
/* reached last frame? */
if (op->currframe >= op->nframes)
op->currframe = 0;
+
+ if (op->count > 0)
+ op->count--;
+
+ spin_unlock_bh(&op->bcm_tx_lock);
out:
dev_put(dev);
}
@@ -430,7 +451,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
struct bcm_msg_head msg_head;
if (op->kt_ival1 && (op->count > 0)) {
- op->count--;
+ bcm_can_tx(op);
if (!op->count && (op->flags & TX_COUNTEVT)) {
/* create notification to user */
@@ -445,7 +466,6 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
bcm_send_to_user(op, &msg_head, NULL, 0);
}
- bcm_can_tx(op);
} else if (op->kt_ival2) {
bcm_can_tx(op);
@@ -843,7 +863,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
REGMASK(op->can_id),
bcm_rx_handler, op);
- list_del(&op->list);
+ list_del_rcu(&op->list);
bcm_remove_op(op);
return 1; /* done */
}
@@ -863,7 +883,7 @@ static int bcm_delete_tx_op(struct list_head *ops, struct bcm_msg_head *mh,
list_for_each_entry_safe(op, n, ops, list) {
if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
(op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
- list_del(&op->list);
+ list_del_rcu(&op->list);
bcm_remove_op(op);
return 1; /* done */
}
@@ -956,6 +976,27 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
}
op->flags = msg_head->flags;
+ /* only lock for unlikely count/nframes/currframe changes */
+ if (op->nframes != msg_head->nframes ||
+ op->flags & TX_RESET_MULTI_IDX ||
+ op->flags & SETTIMER) {
+
+ spin_lock_bh(&op->bcm_tx_lock);
+
+ if (op->nframes != msg_head->nframes ||
+ op->flags & TX_RESET_MULTI_IDX) {
+ /* potentially update changed nframes */
+ op->nframes = msg_head->nframes;
+ /* restart multiple frame transmission */
+ op->currframe = 0;
+ }
+
+ if (op->flags & SETTIMER)
+ op->count = msg_head->count;
+
+ spin_unlock_bh(&op->bcm_tx_lock);
+ }
+
} else {
/* insert new BCM operation for the given can_id */
@@ -963,9 +1004,14 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (!op)
return -ENOMEM;
+ spin_lock_init(&op->bcm_tx_lock);
op->can_id = msg_head->can_id;
op->cfsiz = CFSIZ(msg_head->flags);
op->flags = msg_head->flags;
+ op->nframes = msg_head->nframes;
+
+ if (op->flags & SETTIMER)
+ op->count = msg_head->count;
/* create array for CAN frames and copy the data */
if (msg_head->nframes > 1) {
@@ -1023,22 +1069,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
} /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */
- if (op->nframes != msg_head->nframes) {
- op->nframes = msg_head->nframes;
- /* start multiple frame transmission with index 0 */
- op->currframe = 0;
- }
-
- /* check flags */
-
- if (op->flags & TX_RESET_MULTI_IDX) {
- /* start multiple frame transmission with index 0 */
- op->currframe = 0;
- }
-
if (op->flags & SETTIMER) {
/* set timer values */
- op->count = msg_head->count;
op->ival1 = msg_head->ival1;
op->ival2 = msg_head->ival2;
op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1);
@@ -1055,11 +1087,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
op->flags |= TX_ANNOUNCE;
}
- if (op->flags & TX_ANNOUNCE) {
+ if (op->flags & TX_ANNOUNCE)
bcm_can_tx(op);
- if (op->count)
- op->count--;
- }
if (op->flags & STARTTIMER)
bcm_tx_start_timer(op);
@@ -1272,7 +1301,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
bcm_rx_handler, op, "bcm", sk);
if (err) {
/* this bcm rx op is broken -> remove it */
- list_del(&op->list);
+ list_del_rcu(&op->list);
bcm_remove_op(op);
return err;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 11da1e272ec2..0d891634c692 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10441,6 +10441,7 @@ static void netdev_sync_lower_features(struct net_device *upper,
if (!(features & feature) && (lower->features & feature)) {
netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
&feature, lower->name);
+ netdev_lock_ops(lower);
lower->wanted_features &= ~feature;
__netdev_update_features(lower);
@@ -10449,6 +10450,7 @@ static void netdev_sync_lower_features(struct net_device *upper,
&feature, lower->name);
else
netdev_features_change(lower);
+ netdev_unlock_ops(lower);
}
}
}
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 6e27a47d0493..2db428ab6b8b 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -200,6 +200,8 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
refcount_set(&binding->ref, 1);
+ mutex_init(&binding->lock);
+
binding->dmabuf = dmabuf;
binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
@@ -379,6 +381,11 @@ static void mp_dmabuf_devmem_uninstall(void *mp_priv,
xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) {
if (bound_rxq == rxq) {
xa_erase(&binding->bound_rxqs, xa_idx);
+ if (xa_empty(&binding->bound_rxqs)) {
+ mutex_lock(&binding->lock);
+ binding->dev = NULL;
+ mutex_unlock(&binding->lock);
+ }
break;
}
}
diff --git a/net/core/devmem.h b/net/core/devmem.h
index 7fc158d52729..a1aabc9685cc 100644
--- a/net/core/devmem.h
+++ b/net/core/devmem.h
@@ -20,6 +20,8 @@ struct net_devmem_dmabuf_binding {
struct sg_table *sgt;
struct net_device *dev;
struct gen_pool *chunk_pool;
+ /* Protect dev */
+ struct mutex lock;
/* The user holds a ref (via the netlink API) for as long as they want
* the binding to remain alive. Each page pool using this binding holds
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index dae9f0d432fb..a877693fecd6 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -979,14 +979,25 @@ void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv)
{
struct net_devmem_dmabuf_binding *binding;
struct net_devmem_dmabuf_binding *temp;
+ netdevice_tracker dev_tracker;
struct net_device *dev;
mutex_lock(&priv->lock);
list_for_each_entry_safe(binding, temp, &priv->bindings, list) {
+ mutex_lock(&binding->lock);
dev = binding->dev;
+ if (!dev) {
+ mutex_unlock(&binding->lock);
+ net_devmem_unbind_dmabuf(binding);
+ continue;
+ }
+ netdev_hold(dev, &dev_tracker, GFP_KERNEL);
+ mutex_unlock(&binding->lock);
+
netdev_lock(dev);
net_devmem_unbind_dmabuf(binding);
netdev_unlock(dev);
+ netdev_put(dev, &dev_tracker);
}
mutex_unlock(&priv->lock);
}
diff --git a/net/core/sock.c b/net/core/sock.c
index e54449c9ab0b..1d9466a1f54e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -148,6 +148,8 @@
#include <linux/ethtool.h>
+#include <uapi/linux/pidfd.h>
+
#include "dev.h"
static DEFINE_MUTEX(proto_list_mutex);
@@ -1879,6 +1881,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
{
struct pid *peer_pid;
struct file *pidfd_file = NULL;
+ unsigned int flags = 0;
int pidfd;
if (len > sizeof(pidfd))
@@ -1891,7 +1894,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
if (!peer_pid)
return -ENODATA;
- pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
+ /* The use of PIDFD_STALE requires stashing of struct pid
+ * on pidfs with pidfs_register_pid() and only AF_UNIX
+ * were prepared for this.
+ */
+ if (sk->sk_family == AF_UNIX)
+ flags = PIDFD_STALE;
+
+ pidfd = pidfd_prepare(peer_pid, flags, &pidfd_file);
put_pid(peer_pid);
if (pidfd < 0)
return pidfd;
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index c33d4bf17929..0b7564b53790 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -140,7 +140,12 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev)
{
- u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+ u8 *tag;
+
+ if (skb_linearize(skb))
+ return NULL;
+
+ tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
return ksz_common_rcv(skb, dev, tag[0] & KSZ8795_TAIL_TAG_EG_PORT_M,
KSZ_EGRESS_TAG_LEN);
@@ -311,10 +316,16 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
{
- /* Tag decoding */
- u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
- unsigned int port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M;
unsigned int len = KSZ_EGRESS_TAG_LEN;
+ unsigned int port;
+ u8 *tag;
+
+ if (skb_linearize(skb))
+ return NULL;
+
+ /* Tag decoding */
+ tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+ port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M;
/* Extra 4-bytes PTP timestamp */
if (tag[0] & KSZ9477_PTP_TAG_INDICATION) {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 0e4076866c0a..f14a41ee4aa1 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -120,47 +120,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
}
#ifdef CONFIG_INET_ESPINTCP
-struct esp_tcp_sk {
- struct sock *sk;
- struct rcu_head rcu;
-};
-
-static void esp_free_tcp_sk(struct rcu_head *head)
-{
- struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
-
- sock_put(esk->sk);
- kfree(esk);
-}
-
static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
{
struct xfrm_encap_tmpl *encap = x->encap;
struct net *net = xs_net(x);
- struct esp_tcp_sk *esk;
__be16 sport, dport;
- struct sock *nsk;
struct sock *sk;
- sk = rcu_dereference(x->encap_sk);
- if (sk && sk->sk_state == TCP_ESTABLISHED)
- return sk;
-
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
dport = encap->encap_dport;
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (sk && sk == nsk) {
- esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
- if (!esk) {
- spin_unlock_bh(&x->lock);
- return ERR_PTR(-ENOMEM);
- }
- RCU_INIT_POINTER(x->encap_sk, NULL);
- esk->sk = sk;
- call_rcu(&esk->rcu, esp_free_tcp_sk);
- }
spin_unlock_bh(&x->lock);
sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4,
@@ -173,20 +142,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
return ERR_PTR(-EINVAL);
}
- spin_lock_bh(&x->lock);
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (encap->encap_sport != sport ||
- encap->encap_dport != dport) {
- sock_put(sk);
- sk = nsk ?: ERR_PTR(-EREMCHG);
- } else if (sk == nsk) {
- sock_put(sk);
- } else {
- rcu_assign_pointer(x->encap_sk, sk);
- }
- spin_unlock_bh(&x->lock);
-
return sk;
}
@@ -199,8 +154,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
sk = esp_find_tcp_sk(x);
err = PTR_ERR_OR_ZERO(sk);
- if (err)
+ if (err) {
+ kfree_skb(skb);
goto out;
+ }
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
@@ -209,6 +166,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
err = espintcp_push_skb(sk, skb);
bh_unlock_sock(sk);
+ sock_put(sk);
+
out:
rcu_read_unlock();
return err;
@@ -392,6 +351,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
if (IS_ERR(sk))
return ERR_CAST(sk);
+ sock_put(sk);
+
*lenp = htons(len);
esph = (struct ip_esp_hdr *)(lenp + 1);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a8b04d4abcaa..85dc208f32e9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -120,11 +120,6 @@ static void ipmr_expire_process(struct timer_list *t);
lockdep_rtnl_is_held() || \
list_empty(&net->ipv4.mr_tables))
-static bool ipmr_can_free_table(struct net *net)
-{
- return !check_net(net) || !net_initialized(net);
-}
-
static struct mr_table *ipmr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -317,11 +312,6 @@ EXPORT_SYMBOL(ipmr_rule_default);
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
-static bool ipmr_can_free_table(struct net *net)
-{
- return !check_net(net);
-}
-
static struct mr_table *ipmr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -437,7 +427,7 @@ static void ipmr_free_table(struct mr_table *mrt)
{
struct net *net = read_pnet(&mrt->net);
- WARN_ON_ONCE(!ipmr_can_free_table(net));
+ WARN_ON_ONCE(!mr_can_free_table(net));
timer_shutdown_sync(&mrt->ipmr_expire_timer);
mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index b5b06323cfd9..0d31a8c108d4 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -182,11 +182,15 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
int offset = skb_gro_offset(skb);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
- int ret;
-
- offset = offset - sizeof(struct udphdr);
+ int len, dlen;
+ __u8 *udpdata;
+ __be32 *udpdata32;
- if (!pskb_pull(skb, offset))
+ len = skb->len - offset;
+ dlen = offset + min(len, 8);
+ udpdata = skb_gro_header(skb, dlen, offset);
+ udpdata32 = (__be32 *)udpdata;
+ if (unlikely(!udpdata))
return NULL;
rcu_read_lock();
@@ -194,11 +198,10 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (!ops || !ops->callbacks.gro_receive)
goto out;
- ret = __xfrm4_udp_encap_rcv(sk, skb, false);
- if (ret)
+ /* check if it is a keepalive or IKE packet */
+ if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
- skb_push(skb, offset);
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
@@ -208,7 +211,6 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
out:
rcu_read_unlock();
- skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 1;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9e73944e3b53..72adfc107b55 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -137,47 +137,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
}
#ifdef CONFIG_INET6_ESPINTCP
-struct esp_tcp_sk {
- struct sock *sk;
- struct rcu_head rcu;
-};
-
-static void esp_free_tcp_sk(struct rcu_head *head)
-{
- struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
-
- sock_put(esk->sk);
- kfree(esk);
-}
-
static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
{
struct xfrm_encap_tmpl *encap = x->encap;
struct net *net = xs_net(x);
- struct esp_tcp_sk *esk;
__be16 sport, dport;
- struct sock *nsk;
struct sock *sk;
- sk = rcu_dereference(x->encap_sk);
- if (sk && sk->sk_state == TCP_ESTABLISHED)
- return sk;
-
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
dport = encap->encap_dport;
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (sk && sk == nsk) {
- esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
- if (!esk) {
- spin_unlock_bh(&x->lock);
- return ERR_PTR(-ENOMEM);
- }
- RCU_INIT_POINTER(x->encap_sk, NULL);
- esk->sk = sk;
- call_rcu(&esk->rcu, esp_free_tcp_sk);
- }
spin_unlock_bh(&x->lock);
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6,
@@ -190,20 +159,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
return ERR_PTR(-EINVAL);
}
- spin_lock_bh(&x->lock);
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (encap->encap_sport != sport ||
- encap->encap_dport != dport) {
- sock_put(sk);
- sk = nsk ?: ERR_PTR(-EREMCHG);
- } else if (sk == nsk) {
- sock_put(sk);
- } else {
- rcu_assign_pointer(x->encap_sk, sk);
- }
- spin_unlock_bh(&x->lock);
-
return sk;
}
@@ -216,8 +171,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
sk = esp6_find_tcp_sk(x);
err = PTR_ERR_OR_ZERO(sk);
- if (err)
+ if (err) {
+ kfree_skb(skb);
goto out;
+ }
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
@@ -226,6 +183,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
err = espintcp_push_skb(sk, skb);
bh_unlock_sock(sk);
+ sock_put(sk);
+
out:
rcu_read_unlock();
return err;
@@ -422,6 +381,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
if (IS_ERR(sk))
return ERR_CAST(sk);
+ sock_put(sk);
+
*lenp = htons(len);
esph = (struct ip_esp_hdr *)(lenp + 1);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index b413c9c8a21c..3276cde5ebd7 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -108,11 +108,6 @@ static void ipmr_expire_process(struct timer_list *t);
lockdep_rtnl_is_held() || \
list_empty(&net->ipv6.mr6_tables))
-static bool ip6mr_can_free_table(struct net *net)
-{
- return !check_net(net) || !net_initialized(net);
-}
-
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -306,11 +301,6 @@ EXPORT_SYMBOL(ip6mr_rule_default);
#define ip6mr_for_each_table(mrt, net) \
for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
-static bool ip6mr_can_free_table(struct net *net)
-{
- return !check_net(net);
-}
-
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -416,7 +406,7 @@ static void ip6mr_free_table(struct mr_table *mrt)
{
struct net *net = read_pnet(&mrt->net);
- WARN_ON_ONCE(!ip6mr_can_free_table(net));
+ WARN_ON_ONCE(!mr_can_free_table(net));
timer_shutdown_sync(&mrt->ipmr_expire_timer);
mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 4abc5e9d6322..841c81abaaf4 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -179,14 +179,18 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
int offset = skb_gro_offset(skb);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
- int ret;
+ int len, dlen;
+ __u8 *udpdata;
+ __be32 *udpdata32;
if (skb->protocol == htons(ETH_P_IP))
return xfrm4_gro_udp_encap_rcv(sk, head, skb);
- offset = offset - sizeof(struct udphdr);
-
- if (!pskb_pull(skb, offset))
+ len = skb->len - offset;
+ dlen = offset + min(len, 8);
+ udpdata = skb_gro_header(skb, dlen, offset);
+ udpdata32 = (__be32 *)udpdata;
+ if (unlikely(!udpdata))
return NULL;
rcu_read_lock();
@@ -194,11 +198,10 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (!ops || !ops->callbacks.gro_receive)
goto out;
- ret = __xfrm6_udp_encap_rcv(sk, skb, false);
- if (ret)
+ /* check if it is a keepalive or IKE packet */
+ if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
- skb_push(skb, offset);
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
@@ -208,7 +211,6 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
out:
rcu_read_unlock();
- skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 1;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 0259cde394ba..cc77ec5769d8 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -887,15 +887,15 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (sk->sk_type != SOCK_STREAM)
goto copy_uaddr;
+ /* Partial read */
+ if (used + offset < skb_len)
+ continue;
+
if (!(flags & MSG_PEEK)) {
skb_unlink(skb, &sk->sk_receive_queue);
kfree_skb(skb);
*seq = 0;
}
-
- /* Partial read */
- if (used + offset < skb_len)
- continue;
} while (len > 0);
out:
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 741e6c7edcb7..6b6de43d9420 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1354,10 +1354,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_MONITOR);
- local->int_scan_req = kzalloc(sizeof(*local->int_scan_req) +
- sizeof(void *) * channels, GFP_KERNEL);
+ local->int_scan_req = kzalloc(struct_size(local->int_scan_req,
+ channels, channels),
+ GFP_KERNEL);
if (!local->int_scan_req)
return -ENOMEM;
+ local->int_scan_req->n_channels = channels;
eth_broadcast_addr(local->int_scan_req->bssid);
diff --git a/net/mctp/device.c b/net/mctp/device.c
index 8e0724c56723..7c0dcf3df319 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -117,11 +117,18 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct net_device *dev;
struct ifaddrmsg *hdr;
struct mctp_dev *mdev;
- int ifindex, rc;
-
- hdr = nlmsg_data(cb->nlh);
- // filter by ifindex if requested
- ifindex = hdr->ifa_index;
+ int ifindex = 0, rc;
+
+ /* Filter by ifindex if a header is provided */
+ if (cb->nlh->nlmsg_len >= nlmsg_msg_size(sizeof(*hdr))) {
+ hdr = nlmsg_data(cb->nlh);
+ ifindex = hdr->ifa_index;
+ } else {
+ if (cb->strict_check) {
+ NL_SET_ERR_MSG(cb->extack, "mctp: Invalid header for addr dump request");
+ return -EINVAL;
+ }
+ }
rcu_read_lock();
for_each_netdev_dump(net, dev, mcb->ifindex) {
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 4c460160914f..d9c8e5a5f9ce 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -313,8 +313,10 @@ static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
key = flow->key;
- if (WARN_ON(key->dev && key->dev != dev))
+ if (key->dev) {
+ WARN_ON(key->dev != dev);
return;
+ }
mctp_dev_set_key(dev, key);
}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 12dd71139da3..c93761040c6e 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -144,7 +144,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, true);
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 2ca5332cfcc5..902ff5470607 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -1136,7 +1136,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
}
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = fq_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
if (!skb)
break;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 6c9029f71e88..2a0f3a513bfa 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -441,7 +441,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
while (sch->q.qlen > sch->limit ||
q->memory_usage > q->memory_limit) {
- struct sk_buff *skb = fq_codel_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
q->cstats.drop_len += qdisc_pkt_len(skb);
rtnl_kfree_skbs(skb, skb);
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index f3b8203d3e85..df7fac95ab15 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -366,7 +366,7 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
/* Drop excess packets if new limit is lower */
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = fq_pie_qdisc_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
len_dropped += qdisc_pkt_len(skb);
num_dropped += 1;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index cb8c525ea20e..7986145a527c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1569,6 +1569,9 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
return err;
}
+ sch->qstats.backlog += len;
+ sch->q.qlen++;
+
if (first && !cl->cl_nactive) {
if (cl->cl_flags & HFSC_RSC)
init_ed(cl, len);
@@ -1584,9 +1587,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
}
- sch->qstats.backlog += len;
- sch->q.qlen++;
-
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 44d9efe1a96a..5aa434b46707 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -564,7 +564,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
qlen = sch->q.qlen;
prev_backlog = sch->qstats.backlog;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = hhf_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
rtnl_kfree_skbs(skb, skb);
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 3771d000b30d..ff49a6c97033 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -195,7 +195,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
/* Drop excess packets if new limit is lower */
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, true);
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index eadc00410ebc..98f78cd55905 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -631,7 +631,7 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
const char *name)
{
struct qstr q = QSTR(name);
- struct dentry *dentry = d_hash_and_lookup(parent, &q);
+ struct dentry *dentry = try_lookup_noperm(&q, parent);
if (!dentry) {
dentry = d_alloc(parent, &q);
if (!dentry)
@@ -658,7 +658,7 @@ static void __rpc_depopulate(struct dentry *parent,
for (i = start; i < eof; i++) {
name.name = files[i].name;
name.len = strlen(files[i].name);
- dentry = d_hash_and_lookup(parent, &name);
+ dentry = try_lookup_noperm(&name, parent);
if (dentry == NULL)
continue;
@@ -1190,7 +1190,7 @@ static const struct rpc_filelist files[] = {
struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
const unsigned char *dir_name)
{
- return d_hash_and_lookup(sb->s_root, &QSTR(dir_name));
+ return try_lookup_noperm(&QSTR(dir_name), sb->s_root);
}
EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);
@@ -1301,7 +1301,7 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
struct dentry *pipe_dentry = NULL;
/* We should never get this far if "gssd" doesn't exist */
- gssd_dentry = d_hash_and_lookup(root, &QSTR(files[RPCAUTH_gssd].name));
+ gssd_dentry = try_lookup_noperm(&QSTR(files[RPCAUTH_gssd].name), root);
if (!gssd_dentry)
return ERR_PTR(-ENOENT);
@@ -1311,8 +1311,8 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
goto out;
}
- clnt_dentry = d_hash_and_lookup(gssd_dentry,
- &QSTR(gssd_dummy_clnt_dir[0].name));
+ clnt_dentry = try_lookup_noperm(&QSTR(gssd_dummy_clnt_dir[0].name),
+ gssd_dentry);
if (!clnt_dentry) {
__rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
pipe_dentry = ERR_PTR(-ENOENT);
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index c524421ec652..8584893b4785 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -817,12 +817,16 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
goto exit;
}
+ /* Get net to avoid freed tipc_crypto when delete namespace */
+ get_net(aead->crypto->net);
+
/* Now, do encrypt */
rc = crypto_aead_encrypt(req);
if (rc == -EINPROGRESS || rc == -EBUSY)
return rc;
tipc_bearer_put(b);
+ put_net(aead->crypto->net);
exit:
kfree(ctx);
@@ -860,6 +864,7 @@ static void tipc_aead_encrypt_done(void *data, int err)
kfree(tx_ctx);
tipc_bearer_put(b);
tipc_aead_put(aead);
+ put_net(net);
}
/**
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 77e33e1e340e..65b0da6fdf6a 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -396,7 +396,6 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
return 0;
shinfo = skb_shinfo(strp->anchor);
- shinfo->frag_list = NULL;
/* If we don't know the length go max plus page for cipher overhead */
need_spc = strp->stm.full_len ?: TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE;
@@ -412,6 +411,8 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
page, 0, 0);
}
+ shinfo->frag_list = NULL;
+
strp->copy_mode = 1;
strp->stm.offset = 0;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f78a2492826f..59a64b2ced6e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -85,10 +85,13 @@
#include <linux/file.h>
#include <linux/filter.h>
#include <linux/fs.h>
+#include <linux/fs_struct.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mount.h>
#include <linux/namei.h>
+#include <linux/net.h>
+#include <linux/pidfs.h>
#include <linux/poll.h>
#include <linux/proc_fs.h>
#include <linux/sched/signal.h>
@@ -643,6 +646,9 @@ static void unix_sock_destructor(struct sock *sk)
return;
}
+ if (sk->sk_peer_pid)
+ pidfs_put_pid(sk->sk_peer_pid);
+
if (u->addr)
unix_release_addr(u->addr);
@@ -734,13 +740,48 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_gc(); /* Garbage collect fds */
}
-static void init_peercred(struct sock *sk)
+struct unix_peercred {
+ struct pid *peer_pid;
+ const struct cred *peer_cred;
+};
+
+static inline int prepare_peercred(struct unix_peercred *peercred)
{
- sk->sk_peer_pid = get_pid(task_tgid(current));
- sk->sk_peer_cred = get_current_cred();
+ struct pid *pid;
+ int err;
+
+ pid = task_tgid(current);
+ err = pidfs_register_pid(pid);
+ if (likely(!err)) {
+ peercred->peer_pid = get_pid(pid);
+ peercred->peer_cred = get_current_cred();
+ }
+ return err;
}
-static void update_peercred(struct sock *sk)
+static void drop_peercred(struct unix_peercred *peercred)
+{
+ const struct cred *cred = NULL;
+ struct pid *pid = NULL;
+
+ might_sleep();
+
+ swap(peercred->peer_pid, pid);
+ swap(peercred->peer_cred, cred);
+
+ pidfs_put_pid(pid);
+ put_pid(pid);
+ put_cred(cred);
+}
+
+static inline void init_peercred(struct sock *sk,
+ const struct unix_peercred *peercred)
+{
+ sk->sk_peer_pid = peercred->peer_pid;
+ sk->sk_peer_cred = peercred->peer_cred;
+}
+
+static void update_peercred(struct sock *sk, struct unix_peercred *peercred)
{
const struct cred *old_cred;
struct pid *old_pid;
@@ -748,11 +789,11 @@ static void update_peercred(struct sock *sk)
spin_lock(&sk->sk_peer_lock);
old_pid = sk->sk_peer_pid;
old_cred = sk->sk_peer_cred;
- init_peercred(sk);
+ init_peercred(sk, peercred);
spin_unlock(&sk->sk_peer_lock);
- put_pid(old_pid);
- put_cred(old_cred);
+ peercred->peer_pid = old_pid;
+ peercred->peer_cred = old_cred;
}
static void copy_peercred(struct sock *sk, struct sock *peersk)
@@ -761,6 +802,7 @@ static void copy_peercred(struct sock *sk, struct sock *peersk)
spin_lock(&sk->sk_peer_lock);
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
+ pidfs_get_pid(sk->sk_peer_pid);
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
spin_unlock(&sk->sk_peer_lock);
}
@@ -770,6 +812,7 @@ static int unix_listen(struct socket *sock, int backlog)
int err;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
+ struct unix_peercred peercred = {};
err = -EOPNOTSUPP;
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@@ -777,6 +820,9 @@ static int unix_listen(struct socket *sock, int backlog)
err = -EINVAL;
if (!READ_ONCE(u->addr))
goto out; /* No listens on an unbound socket */
+ err = prepare_peercred(&peercred);
+ if (err)
+ goto out;
unix_state_lock(sk);
if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
goto out_unlock;
@@ -786,11 +832,12 @@ static int unix_listen(struct socket *sock, int backlog)
WRITE_ONCE(sk->sk_state, TCP_LISTEN);
/* set credentials so connect can copy them */
- update_peercred(sk);
+ update_peercred(sk, &peercred);
err = 0;
out_unlock:
unix_state_unlock(sk);
+ drop_peercred(&peercred);
out:
return err;
}
@@ -1101,7 +1148,7 @@ static int unix_release(struct socket *sock)
}
static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
- int type)
+ int type, int flags)
{
struct inode *inode;
struct path path;
@@ -1109,13 +1156,39 @@ static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
int err;
unix_mkname_bsd(sunaddr, addr_len);
- err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
- if (err)
- goto fail;
- err = path_permission(&path, MAY_WRITE);
- if (err)
- goto path_put;
+ if (flags & SOCK_COREDUMP) {
+ const struct cred *cred;
+ struct cred *kcred;
+ struct path root;
+
+ kcred = prepare_kernel_cred(&init_task);
+ if (!kcred) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ task_lock(&init_task);
+ get_fs_root(init_task.fs, &root);
+ task_unlock(&init_task);
+
+ cred = override_creds(kcred);
+ err = vfs_path_lookup(root.dentry, root.mnt, sunaddr->sun_path,
+ LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS |
+ LOOKUP_NO_MAGICLINKS, &path);
+ put_cred(revert_creds(cred));
+ path_put(&root);
+ if (err)
+ goto fail;
+ } else {
+ err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
+ if (err)
+ goto fail;
+
+ err = path_permission(&path, MAY_WRITE);
+ if (err)
+ goto path_put;
+ }
err = -ECONNREFUSED;
inode = d_backing_inode(path.dentry);
@@ -1165,12 +1238,12 @@ static struct sock *unix_find_abstract(struct net *net,
static struct sock *unix_find_other(struct net *net,
struct sockaddr_un *sunaddr,
- int addr_len, int type)
+ int addr_len, int type, int flags)
{
struct sock *sk;
if (sunaddr->sun_path[0])
- sk = unix_find_bsd(sunaddr, addr_len, type);
+ sk = unix_find_bsd(sunaddr, addr_len, type, flags);
else
sk = unix_find_abstract(net, sunaddr, addr_len, type);
@@ -1428,7 +1501,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
}
restart:
- other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
+ other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type, 0);
if (IS_ERR(other)) {
err = PTR_ERR(other);
goto out;
@@ -1525,6 +1598,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
struct unix_sock *u = unix_sk(sk), *newu, *otheru;
+ struct unix_peercred peercred = {};
struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
unsigned char state;
@@ -1561,6 +1635,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
goto out;
}
+ err = prepare_peercred(&peercred);
+ if (err)
+ goto out;
+
/* Allocate skb for sending to listening sock */
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
if (!skb) {
@@ -1570,7 +1648,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
restart:
/* Find listening sock. */
- other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
+ other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, flags);
if (IS_ERR(other)) {
err = PTR_ERR(other);
goto out_free_skb;
@@ -1636,7 +1714,7 @@ restart:
unix_peer(newsk) = sk;
newsk->sk_state = TCP_ESTABLISHED;
newsk->sk_type = sk->sk_type;
- init_peercred(newsk);
+ init_peercred(newsk, &peercred);
newu = unix_sk(newsk);
newu->listener = other;
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
@@ -1695,20 +1773,33 @@ out_free_skb:
out_free_sk:
unix_release_sock(newsk, 0);
out:
+ drop_peercred(&peercred);
return err;
}
static int unix_socketpair(struct socket *socka, struct socket *sockb)
{
+ struct unix_peercred ska_peercred = {}, skb_peercred = {};
struct sock *ska = socka->sk, *skb = sockb->sk;
+ int err;
+
+ err = prepare_peercred(&ska_peercred);
+ if (err)
+ return err;
+
+ err = prepare_peercred(&skb_peercred);
+ if (err) {
+ drop_peercred(&ska_peercred);
+ return err;
+ }
/* Join our sockets back to back */
sock_hold(ska);
sock_hold(skb);
unix_peer(ska) = skb;
unix_peer(skb) = ska;
- init_peercred(ska);
- init_peercred(skb);
+ init_peercred(ska, &ska_peercred);
+ init_peercred(skb, &skb_peercred);
ska->sk_state = TCP_ESTABLISHED;
skb->sk_state = TCP_ESTABLISHED;
@@ -2026,7 +2117,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_namelen) {
lookup:
other = unix_find_other(sock_net(sk), msg->msg_name,
- msg->msg_namelen, sk->sk_type);
+ msg->msg_namelen, sk->sk_type, 0);
if (IS_ERR(other)) {
err = PTR_ERR(other);
goto out_free;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 4abc81f33d3e..72c000c0ae5f 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1304,7 +1304,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->queue_id = qid;
xp_add_xsk(xs->pool, xs);
- if (xs->zc && qid < dev->real_num_rx_queues) {
+ if (qid < dev->real_num_rx_queues) {
struct netdev_rx_queue *rxq;
rxq = __netif_get_rx_queue(dev, qid);
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index fe82e2d07300..fc7a603b04f1 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -171,8 +171,10 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
struct espintcp_ctx *ctx = espintcp_getctx(sk);
if (skb_queue_len(&ctx->out_queue) >=
- READ_ONCE(net_hotdata.max_backlog))
+ READ_ONCE(net_hotdata.max_backlog)) {
+ kfree_skb(skb);
return -ENOBUFS;
+ }
__skb_queue_tail(&ctx->out_queue, skb);
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 0c1420534394..907c3ccb440d 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -48,7 +48,6 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen)
{
struct acomp_req *req = ipcomp_cb(skb)->req;
struct ipcomp_req_extra *extra;
- const int plen = skb->data_len;
struct scatterlist *dsg;
int len, dlen;
@@ -64,7 +63,7 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen)
/* Only update truesize on input. */
if (!hlen)
- skb->truesize += dlen - plen;
+ skb->truesize += dlen;
skb->data_len = dlen;
skb->len += dlen;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 143ac3aa7537..f4bad8c895d6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1581,6 +1581,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
struct xfrm_policy *delpol;
struct hlist_head *chain;
+ /* Sanitize mark before store */
+ policy->mark.v &= policy->mark.m;
+
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
if (chain)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 341d79ecb5c2..07fe8e5daa32 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -838,9 +838,6 @@ int __xfrm_state_delete(struct xfrm_state *x)
xfrm_nat_keepalive_state_updated(x);
spin_unlock(&net->xfrm.xfrm_state_lock);
- if (x->encap_sk)
- sock_put(rcu_dereference_raw(x->encap_sk));
-
xfrm_dev_state_delete(x);
/* All xfrm_state objects are created by xfrm_state_alloc.
@@ -1721,6 +1718,9 @@ static void __xfrm_state_insert(struct xfrm_state *x)
list_add(&x->km.all, &net->xfrm.state_all);
+ /* Sanitize mark before store */
+ x->mark.v &= x->mark.m;
+
h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family);
XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index ab37e1d35c70..1a532b83a9af 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -10,6 +10,7 @@
#include <linux/blk-mq.h>
#include <linux/blk_types.h>
#include <linux/blkdev.h>
+#include <linux/configfs.h>
#include <linux/cpumask.h>
#include <linux/cred.h>
#include <linux/device/faux.h>
diff --git a/rust/helpers/mutex.c b/rust/helpers/mutex.c
index 06575553eda5..3e9b910a88e9 100644
--- a/rust/helpers/mutex.c
+++ b/rust/helpers/mutex.c
@@ -17,3 +17,8 @@ void rust_helper_mutex_assert_is_held(struct mutex *mutex)
{
lockdep_assert_held(mutex);
}
+
+void rust_helper_mutex_destroy(struct mutex *lock)
+{
+ mutex_destroy(lock);
+}
diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs
new file mode 100644
index 000000000000..b93ac7b0bebc
--- /dev/null
+++ b/rust/kernel/configfs.rs
@@ -0,0 +1,1049 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! configfs interface: Userspace-driven Kernel Object Configuration
+//!
+//! configfs is an in-memory pseudo file system for configuration of kernel
+//! modules. Please see the [C documentation] for details and intended use of
+//! configfs.
+//!
+//! This module does not support the following configfs features:
+//!
+//! - Items. All group children are groups.
+//! - Symlink support.
+//! - `disconnect_notify` hook.
+//! - Default groups.
+//!
+//! See the [`rust_configfs.rs`] sample for a full example use of this module.
+//!
+//! C header: [`include/linux/configfs.h`](srctree/include/linux/configfs.h)
+//!
+//! # Example
+//!
+//! ```ignore
+//! use kernel::alloc::flags;
+//! use kernel::c_str;
+//! use kernel::configfs_attrs;
+//! use kernel::configfs;
+//! use kernel::new_mutex;
+//! use kernel::page::PAGE_SIZE;
+//! use kernel::sync::Mutex;
+//! use kernel::ThisModule;
+//!
+//! #[pin_data]
+//! struct RustConfigfs {
+//! #[pin]
+//! config: configfs::Subsystem<Configuration>,
+//! }
+//!
+//! impl kernel::InPlaceModule for RustConfigfs {
+//! fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
+//! pr_info!("Rust configfs sample (init)\n");
+//!
+//! let item_type = configfs_attrs! {
+//! container: configfs::Subsystem<Configuration>,
+//! data: Configuration,
+//! attributes: [
+//! message: 0,
+//! bar: 1,
+//! ],
+//! };
+//!
+//! try_pin_init!(Self {
+//! config <- configfs::Subsystem::new(
+//! c_str!("rust_configfs"), item_type, Configuration::new()
+//! ),
+//! })
+//! }
+//! }
+//!
+//! #[pin_data]
+//! struct Configuration {
+//! message: &'static CStr,
+//! #[pin]
+//! bar: Mutex<(KBox<[u8; PAGE_SIZE]>, usize)>,
+//! }
+//!
+//! impl Configuration {
+//! fn new() -> impl PinInit<Self, Error> {
+//! try_pin_init!(Self {
+//! message: c_str!("Hello World\n"),
+//! bar <- new_mutex!((KBox::new([0; PAGE_SIZE], flags::GFP_KERNEL)?, 0)),
+//! })
+//! }
+//! }
+//!
+//! #[vtable]
+//! impl configfs::AttributeOperations<0> for Configuration {
+//! type Data = Configuration;
+//!
+//! fn show(container: &Configuration, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+//! pr_info!("Show message\n");
+//! let data = container.message;
+//! page[0..data.len()].copy_from_slice(data);
+//! Ok(data.len())
+//! }
+//! }
+//!
+//! #[vtable]
+//! impl configfs::AttributeOperations<1> for Configuration {
+//! type Data = Configuration;
+//!
+//! fn show(container: &Configuration, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+//! pr_info!("Show bar\n");
+//! let guard = container.bar.lock();
+//! let data = guard.0.as_slice();
+//! let len = guard.1;
+//! page[0..len].copy_from_slice(&data[0..len]);
+//! Ok(len)
+//! }
+//!
+//! fn store(container: &Configuration, page: &[u8]) -> Result {
+//! pr_info!("Store bar\n");
+//! let mut guard = container.bar.lock();
+//! guard.0[0..page.len()].copy_from_slice(page);
+//! guard.1 = page.len();
+//! Ok(())
+//! }
+//! }
+//! ```
+//!
+//! [C documentation]: srctree/Documentation/filesystems/configfs.rst
+//! [`rust_configfs.rs`]: srctree/samples/rust/rust_configfs.rs
+
+use crate::alloc::flags;
+use crate::container_of;
+use crate::page::PAGE_SIZE;
+use crate::prelude::*;
+use crate::str::CString;
+use crate::sync::Arc;
+use crate::sync::ArcBorrow;
+use crate::types::Opaque;
+use core::cell::UnsafeCell;
+use core::marker::PhantomData;
+
+/// A configfs subsystem.
+///
+/// This is the top level entrypoint for a configfs hierarchy. To register
+/// with configfs, embed a field of this type into your kernel module struct.
+#[pin_data(PinnedDrop)]
+pub struct Subsystem<Data> {
+ #[pin]
+ subsystem: Opaque<bindings::configfs_subsystem>,
+ #[pin]
+ data: Data,
+}
+
+// SAFETY: We do not provide any operations on `Subsystem`.
+unsafe impl<Data> Sync for Subsystem<Data> {}
+
+// SAFETY: Ownership of `Subsystem` can safely be transferred to other threads.
+unsafe impl<Data> Send for Subsystem<Data> {}
+
+impl<Data> Subsystem<Data> {
+ /// Create an initializer for a [`Subsystem`].
+ ///
+ /// The subsystem will appear in configfs as a directory name given by
+ /// `name`. The attributes available in directory are specified by
+ /// `item_type`.
+ pub fn new(
+ name: &'static CStr,
+ item_type: &'static ItemType<Subsystem<Data>, Data>,
+ data: impl PinInit<Data, Error>,
+ ) -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {
+ subsystem <- pin_init::zeroed().chain(
+ |place: &mut Opaque<bindings::configfs_subsystem>| {
+ // SAFETY: We initialized the required fields of `place.group` above.
+ unsafe {
+ bindings::config_group_init_type_name(
+ &mut (*place.get()).su_group,
+ name.as_ptr(),
+ item_type.as_ptr(),
+ )
+ };
+
+ // SAFETY: `place.su_mutex` is valid for use as a mutex.
+ unsafe {
+ bindings::__mutex_init(
+ &mut (*place.get()).su_mutex,
+ kernel::optional_name!().as_char_ptr(),
+ kernel::static_lock_class!().as_ptr(),
+ )
+ }
+ Ok(())
+ }
+ ),
+ data <- data,
+ })
+ .pin_chain(|this| {
+ crate::error::to_result(
+ // SAFETY: We initialized `this.subsystem` according to C API contract above.
+ unsafe { bindings::configfs_register_subsystem(this.subsystem.get()) },
+ )
+ })
+ }
+}
+
+#[pinned_drop]
+impl<Data> PinnedDrop for Subsystem<Data> {
+ fn drop(self: Pin<&mut Self>) {
+ // SAFETY: We registered `self.subsystem` in the initializer returned by `Self::new`.
+ unsafe { bindings::configfs_unregister_subsystem(self.subsystem.get()) };
+ // SAFETY: We initialized the mutex in `Subsystem::new`.
+ unsafe { bindings::mutex_destroy(&raw mut (*self.subsystem.get()).su_mutex) };
+ }
+}
+
+/// Trait that allows offset calculations for structs that embed a
+/// `bindings::config_group`.
+///
+/// Users of the configfs API should not need to implement this trait.
+///
+/// # Safety
+///
+/// - Implementers of this trait must embed a `bindings::config_group`.
+/// - Methods must be implemented according to method documentation.
+pub unsafe trait HasGroup<Data> {
+ /// Return the address of the `bindings::config_group` embedded in [`Self`].
+ ///
+ /// # Safety
+ ///
+ /// - `this` must be a valid allocation of at least the size of [`Self`].
+ unsafe fn group(this: *const Self) -> *const bindings::config_group;
+
+ /// Return the address of the [`Self`] that `group` is embedded in.
+ ///
+ /// # Safety
+ ///
+ /// - `group` must point to the `bindings::config_group` that is embedded in
+ /// [`Self`].
+ unsafe fn container_of(group: *const bindings::config_group) -> *const Self;
+}
+
+// SAFETY: `Subsystem<Data>` embeds a field of type `bindings::config_group`
+// within the `subsystem` field.
+unsafe impl<Data> HasGroup<Data> for Subsystem<Data> {
+ unsafe fn group(this: *const Self) -> *const bindings::config_group {
+ // SAFETY: By impl and function safety requirement this projection is in bounds.
+ unsafe { &raw const (*(*this).subsystem.get()).su_group }
+ }
+
+ unsafe fn container_of(group: *const bindings::config_group) -> *const Self {
+ // SAFETY: By impl and function safety requirement this projection is in bounds.
+ let c_subsys_ptr = unsafe { container_of!(group, bindings::configfs_subsystem, su_group) };
+ let opaque_ptr = c_subsys_ptr.cast::<Opaque<bindings::configfs_subsystem>>();
+ // SAFETY: By impl and function safety requirement, `opaque_ptr` and the
+ // pointer it returns, are within the same allocation.
+ unsafe { container_of!(opaque_ptr, Subsystem<Data>, subsystem) }
+ }
+}
+
+/// A configfs group.
+///
+/// To add a subgroup to configfs, pass this type as `ctype` to
+/// [`crate::configfs_attrs`] when creating a group in [`GroupOperations::make_group`].
+#[pin_data]
+pub struct Group<Data> {
+ #[pin]
+ group: Opaque<bindings::config_group>,
+ #[pin]
+ data: Data,
+}
+
+impl<Data> Group<Data> {
+ /// Create an initializer for a new group.
+ ///
+ /// When instantiated, the group will appear as a directory with the name
+ /// given by `name` and it will contain attributes specified by `item_type`.
+ pub fn new(
+ name: CString,
+ item_type: &'static ItemType<Group<Data>, Data>,
+ data: impl PinInit<Data, Error>,
+ ) -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {
+ group <- pin_init::zeroed().chain(|v: &mut Opaque<bindings::config_group>| {
+ let place = v.get();
+ let name = name.as_bytes_with_nul().as_ptr();
+ // SAFETY: It is safe to initialize a group once it has been zeroed.
+ unsafe {
+ bindings::config_group_init_type_name(place, name.cast(), item_type.as_ptr())
+ };
+ Ok(())
+ }),
+ data <- data,
+ })
+ }
+}
+
+// SAFETY: `Group<Data>` embeds a field of type `bindings::config_group`
+// within the `group` field.
+unsafe impl<Data> HasGroup<Data> for Group<Data> {
+ unsafe fn group(this: *const Self) -> *const bindings::config_group {
+ Opaque::raw_get(
+ // SAFETY: By impl and function safety requirements this field
+ // projection is within bounds of the allocation.
+ unsafe { &raw const (*this).group },
+ )
+ }
+
+ unsafe fn container_of(group: *const bindings::config_group) -> *const Self {
+ let opaque_ptr = group.cast::<Opaque<bindings::config_group>>();
+ // SAFETY: By impl and function safety requirement, `opaque_ptr` and
+ // pointer it returns will be in the same allocation.
+ unsafe { container_of!(opaque_ptr, Self, group) }
+ }
+}
+
+/// # Safety
+///
+/// `this` must be a valid pointer.
+///
+/// If `this` does not represent the root group of a configfs subsystem,
+/// `this` must be a pointer to a `bindings::config_group` embedded in a
+/// `Group<Parent>`.
+///
+/// Otherwise, `this` must be a pointer to a `bindings::config_group` that
+/// is embedded in a `bindings::configfs_subsystem` that is embedded in a
+/// `Subsystem<Parent>`.
+unsafe fn get_group_data<'a, Parent>(this: *mut bindings::config_group) -> &'a Parent {
+ // SAFETY: `this` is a valid pointer.
+ let is_root = unsafe { (*this).cg_subsys.is_null() };
+
+ if !is_root {
+ // SAFETY: By C API contact,`this` was returned from a call to
+ // `make_group`. The pointer is known to be embedded within a
+ // `Group<Parent>`.
+ unsafe { &(*Group::<Parent>::container_of(this)).data }
+ } else {
+ // SAFETY: By C API contract, `this` is a pointer to the
+ // `bindings::config_group` field within a `Subsystem<Parent>`.
+ unsafe { &(*Subsystem::container_of(this)).data }
+ }
+}
+
+struct GroupOperationsVTable<Parent, Child>(PhantomData<(Parent, Child)>);
+
+impl<Parent, Child> GroupOperationsVTable<Parent, Child>
+where
+ Parent: GroupOperations<Child = Child>,
+ Child: 'static,
+{
+ /// # Safety
+ ///
+ /// `this` must be a valid pointer.
+ ///
+ /// If `this` does not represent the root group of a configfs subsystem,
+ /// `this` must be a pointer to a `bindings::config_group` embedded in a
+ /// `Group<Parent>`.
+ ///
+ /// Otherwise, `this` must be a pointer to a `bindings::config_group` that
+ /// is embedded in a `bindings::configfs_subsystem` that is embedded in a
+ /// `Subsystem<Parent>`.
+ ///
+ /// `name` must point to a null terminated string.
+ unsafe extern "C" fn make_group(
+ this: *mut bindings::config_group,
+ name: *const kernel::ffi::c_char,
+ ) -> *mut bindings::config_group {
+ // SAFETY: By function safety requirements of this function, this call
+ // is safe.
+ let parent_data = unsafe { get_group_data(this) };
+
+ let group_init = match Parent::make_group(
+ parent_data,
+ // SAFETY: By function safety requirements, name points to a null
+ // terminated string.
+ unsafe { CStr::from_char_ptr(name) },
+ ) {
+ Ok(init) => init,
+ Err(e) => return e.to_ptr(),
+ };
+
+ let child_group = <Arc<Group<Child>> as InPlaceInit<Group<Child>>>::try_pin_init(
+ group_init,
+ flags::GFP_KERNEL,
+ );
+
+ match child_group {
+ Ok(child_group) => {
+ let child_group_ptr = child_group.into_raw();
+ // SAFETY: We allocated the pointee of `child_ptr` above as a
+ // `Group<Child>`.
+ unsafe { Group::<Child>::group(child_group_ptr) }.cast_mut()
+ }
+ Err(e) => e.to_ptr(),
+ }
+ }
+
+ /// # Safety
+ ///
+ /// If `this` does not represent the root group of a configfs subsystem,
+ /// `this` must be a pointer to a `bindings::config_group` embedded in a
+ /// `Group<Parent>`.
+ ///
+ /// Otherwise, `this` must be a pointer to a `bindings::config_group` that
+ /// is embedded in a `bindings::configfs_subsystem` that is embedded in a
+ /// `Subsystem<Parent>`.
+ ///
+ /// `item` must point to a `bindings::config_item` within a
+ /// `bindings::config_group` within a `Group<Child>`.
+ unsafe extern "C" fn drop_item(
+ this: *mut bindings::config_group,
+ item: *mut bindings::config_item,
+ ) {
+ // SAFETY: By function safety requirements of this function, this call
+ // is safe.
+ let parent_data = unsafe { get_group_data(this) };
+
+ // SAFETY: By function safety requirements, `item` is embedded in a
+ // `config_group`.
+ let c_child_group_ptr = unsafe { container_of!(item, bindings::config_group, cg_item) };
+ // SAFETY: By function safety requirements, `c_child_group_ptr` is
+ // embedded within a `Group<Child>`.
+ let r_child_group_ptr = unsafe { Group::<Child>::container_of(c_child_group_ptr) };
+
+ if Parent::HAS_DROP_ITEM {
+ // SAFETY: We called `into_raw` to produce `r_child_group_ptr` in
+ // `make_group`.
+ let arc: Arc<Group<Child>> = unsafe { Arc::from_raw(r_child_group_ptr.cast_mut()) };
+
+ Parent::drop_item(parent_data, arc.as_arc_borrow());
+ arc.into_raw();
+ }
+
+ // SAFETY: By C API contract, we are required to drop a refcount on
+ // `item`.
+ unsafe { bindings::config_item_put(item) };
+ }
+
+ const VTABLE: bindings::configfs_group_operations = bindings::configfs_group_operations {
+ make_item: None,
+ make_group: Some(Self::make_group),
+ disconnect_notify: None,
+ drop_item: Some(Self::drop_item),
+ is_visible: None,
+ is_bin_visible: None,
+ };
+
+ const fn vtable_ptr() -> *const bindings::configfs_group_operations {
+ &Self::VTABLE as *const bindings::configfs_group_operations
+ }
+}
+
+struct ItemOperationsVTable<Container, Data>(PhantomData<(Container, Data)>);
+
+impl<Data> ItemOperationsVTable<Group<Data>, Data>
+where
+ Data: 'static,
+{
+ /// # Safety
+ ///
+ /// `this` must be a pointer to a `bindings::config_group` embedded in a
+ /// `Group<Parent>`.
+ ///
+ /// This function will destroy the pointee of `this`. The pointee of `this`
+ /// must not be accessed after the function returns.
+ unsafe extern "C" fn release(this: *mut bindings::config_item) {
+ // SAFETY: By function safety requirements, `this` is embedded in a
+ // `config_group`.
+ let c_group_ptr = unsafe { kernel::container_of!(this, bindings::config_group, cg_item) };
+ // SAFETY: By function safety requirements, `c_group_ptr` is
+ // embedded within a `Group<Data>`.
+ let r_group_ptr = unsafe { Group::<Data>::container_of(c_group_ptr) };
+
+ // SAFETY: We called `into_raw` on `r_group_ptr` in
+ // `make_group`.
+ let pin_self: Arc<Group<Data>> = unsafe { Arc::from_raw(r_group_ptr.cast_mut()) };
+ drop(pin_self);
+ }
+
+ const VTABLE: bindings::configfs_item_operations = bindings::configfs_item_operations {
+ release: Some(Self::release),
+ allow_link: None,
+ drop_link: None,
+ };
+
+ const fn vtable_ptr() -> *const bindings::configfs_item_operations {
+ &Self::VTABLE as *const bindings::configfs_item_operations
+ }
+}
+
+impl<Data> ItemOperationsVTable<Subsystem<Data>, Data> {
+ const VTABLE: bindings::configfs_item_operations = bindings::configfs_item_operations {
+ release: None,
+ allow_link: None,
+ drop_link: None,
+ };
+
+ const fn vtable_ptr() -> *const bindings::configfs_item_operations {
+ &Self::VTABLE as *const bindings::configfs_item_operations
+ }
+}
+
+/// Operations implemented by configfs groups that can create subgroups.
+///
+/// Implement this trait on structs that embed a [`Subsystem`] or a [`Group`].
+#[vtable]
+pub trait GroupOperations {
+ /// The child data object type.
+ ///
+ /// This group will create subgroups (subdirectories) backed by this kind of
+ /// object.
+ type Child: 'static;
+
+ /// Creates a new subgroup.
+ ///
+ /// The kernel will call this method in response to `mkdir(2)` in the
+ /// directory representing `this`.
+ ///
+ /// To accept the request to create a group, implementations should
+ /// return an initializer of a `Group<Self::Child>`. To prevent creation,
+ /// return a suitable error.
+ fn make_group(&self, name: &CStr) -> Result<impl PinInit<Group<Self::Child>, Error>>;
+
+ /// Prepares the group for removal from configfs.
+ ///
+ /// The kernel will call this method before the directory representing `_child` is removed from
+ /// configfs.
+ ///
+ /// Implementations can use this method to do house keeping before configfs drops its
+ /// reference to `Child`.
+ ///
+ /// NOTE: "drop" in the name of this function is not related to the Rust drop term. Rather, the
+ /// name is inherited from the callback name in the underlying C code.
+ fn drop_item(&self, _child: ArcBorrow<'_, Group<Self::Child>>) {
+ kernel::build_error!(kernel::error::VTABLE_DEFAULT_ERROR)
+ }
+}
+
+/// A configfs attribute.
+///
+/// An attribute appears as a file in configfs, inside a folder that represent
+/// the group that the attribute belongs to.
+#[repr(transparent)]
+pub struct Attribute<const ID: u64, O, Data> {
+ attribute: Opaque<bindings::configfs_attribute>,
+ _p: PhantomData<(O, Data)>,
+}
+
+// SAFETY: We do not provide any operations on `Attribute`.
+unsafe impl<const ID: u64, O, Data> Sync for Attribute<ID, O, Data> {}
+
+// SAFETY: Ownership of `Attribute` can safely be transferred to other threads.
+unsafe impl<const ID: u64, O, Data> Send for Attribute<ID, O, Data> {}
+
+impl<const ID: u64, O, Data> Attribute<ID, O, Data>
+where
+ O: AttributeOperations<ID, Data = Data>,
+{
+ /// # Safety
+ ///
+ /// `item` must be embedded in a `bindings::config_group`.
+ ///
+ /// If `item` does not represent the root group of a configfs subsystem,
+ /// the group must be embedded in a `Group<Data>`.
+ ///
+ /// Otherwise, the group must be a embedded in a
+ /// `bindings::configfs_subsystem` that is embedded in a `Subsystem<Data>`.
+ ///
+ /// `page` must point to a writable buffer of size at least [`PAGE_SIZE`].
+ unsafe extern "C" fn show(
+ item: *mut bindings::config_item,
+ page: *mut kernel::ffi::c_char,
+ ) -> isize {
+ let c_group: *mut bindings::config_group =
+ // SAFETY: By function safety requirements, `item` is embedded in a
+ // `config_group`.
+ unsafe { container_of!(item, bindings::config_group, cg_item) }.cast_mut();
+
+ // SAFETY: The function safety requirements for this function satisfy
+ // the conditions for this call.
+ let data: &Data = unsafe { get_group_data(c_group) };
+
+ // SAFETY: By function safety requirements, `page` is writable for `PAGE_SIZE`.
+ let ret = O::show(data, unsafe { &mut *(page as *mut [u8; PAGE_SIZE]) });
+
+ match ret {
+ Ok(size) => size as isize,
+ Err(err) => err.to_errno() as isize,
+ }
+ }
+
+ /// # Safety
+ ///
+ /// `item` must be embedded in a `bindings::config_group`.
+ ///
+ /// If `item` does not represent the root group of a configfs subsystem,
+ /// the group must be embedded in a `Group<Data>`.
+ ///
+ /// Otherwise, the group must be a embedded in a
+ /// `bindings::configfs_subsystem` that is embedded in a `Subsystem<Data>`.
+ ///
+ /// `page` must point to a readable buffer of size at least `size`.
+ unsafe extern "C" fn store(
+ item: *mut bindings::config_item,
+ page: *const kernel::ffi::c_char,
+ size: usize,
+ ) -> isize {
+ let c_group: *mut bindings::config_group =
+ // SAFETY: By function safety requirements, `item` is embedded in a
+ // `config_group`.
+ unsafe { container_of!(item, bindings::config_group, cg_item) }.cast_mut();
+
+ // SAFETY: The function safety requirements for this function satisfy
+ // the conditions for this call.
+ let data: &Data = unsafe { get_group_data(c_group) };
+
+ let ret = O::store(
+ data,
+ // SAFETY: By function safety requirements, `page` is readable
+ // for at least `size`.
+ unsafe { core::slice::from_raw_parts(page.cast(), size) },
+ );
+
+ match ret {
+ Ok(()) => size as isize,
+ Err(err) => err.to_errno() as isize,
+ }
+ }
+
+ /// Create a new attribute.
+ ///
+ /// The attribute will appear as a file with name given by `name`.
+ pub const fn new(name: &'static CStr) -> Self {
+ Self {
+ attribute: Opaque::new(bindings::configfs_attribute {
+ ca_name: name.as_char_ptr(),
+ ca_owner: core::ptr::null_mut(),
+ ca_mode: 0o660,
+ show: Some(Self::show),
+ store: if O::HAS_STORE {
+ Some(Self::store)
+ } else {
+ None
+ },
+ }),
+ _p: PhantomData,
+ }
+ }
+}
+
+/// Operations supported by an attribute.
+///
+/// Implement this trait on type and pass that type as generic parameter when
+/// creating an [`Attribute`]. The type carrying the implementation serve no
+/// purpose other than specifying the attribute operations.
+///
+/// This trait must be implemented on the `Data` type of for types that
+/// implement `HasGroup<Data>`. The trait must be implemented once for each
+/// attribute of the group. The constant type parameter `ID` maps the
+/// implementation to a specific `Attribute`. `ID` must be passed when declaring
+/// attributes via the [`kernel::configfs_attrs`] macro, to tie
+/// `AttributeOperations` implementations to concrete named attributes.
+#[vtable]
+pub trait AttributeOperations<const ID: u64 = 0> {
+ /// The type of the object that contains the field that is backing the
+ /// attribute for this operation.
+ type Data;
+
+ /// Renders the value of an attribute.
+ ///
+ /// This function is called by the kernel to read the value of an attribute.
+ ///
+ /// Implementations should write the rendering of the attribute to `page`
+ /// and return the number of bytes written.
+ fn show(data: &Self::Data, page: &mut [u8; PAGE_SIZE]) -> Result<usize>;
+
+ /// Stores the value of an attribute.
+ ///
+ /// This function is called by the kernel to update the value of an attribute.
+ ///
+ /// Implementations should parse the value from `page` and update internal
+ /// state to reflect the parsed value.
+ fn store(_data: &Self::Data, _page: &[u8]) -> Result {
+ kernel::build_error!(kernel::error::VTABLE_DEFAULT_ERROR)
+ }
+}
+
+/// A list of attributes.
+///
+/// This type is used to construct a new [`ItemType`]. It represents a list of
+/// [`Attribute`] that will appear in the directory representing a [`Group`].
+/// Users should not directly instantiate this type, rather they should use the
+/// [`kernel::configfs_attrs`] macro to declare a static set of attributes for a
+/// group.
+///
+/// # Note
+///
+/// Instances of this type are constructed statically at compile by the
+/// [`kernel::configfs_attrs`] macro.
+#[repr(transparent)]
+pub struct AttributeList<const N: usize, Data>(
+ /// Null terminated Array of pointers to [`Attribute`]. The type is [`c_void`]
+ /// to conform to the C API.
+ UnsafeCell<[*mut kernel::ffi::c_void; N]>,
+ PhantomData<Data>,
+);
+
+// SAFETY: Ownership of `AttributeList` can safely be transferred to other threads.
+unsafe impl<const N: usize, Data> Send for AttributeList<N, Data> {}
+
+// SAFETY: We do not provide any operations on `AttributeList` that need synchronization.
+unsafe impl<const N: usize, Data> Sync for AttributeList<N, Data> {}
+
+impl<const N: usize, Data> AttributeList<N, Data> {
+ /// # Safety
+ ///
+ /// This function must only be called by the [`kernel::configfs_attrs`]
+ /// macro.
+ #[doc(hidden)]
+ pub const unsafe fn new() -> Self {
+ Self(UnsafeCell::new([core::ptr::null_mut(); N]), PhantomData)
+ }
+
+ /// # Safety
+ ///
+ /// The caller must ensure that there are no other concurrent accesses to
+ /// `self`. That is, the caller has exclusive access to `self.`
+ #[doc(hidden)]
+ pub const unsafe fn add<const I: usize, const ID: u64, O>(
+ &'static self,
+ attribute: &'static Attribute<ID, O, Data>,
+ ) where
+ O: AttributeOperations<ID, Data = Data>,
+ {
+ // We need a space at the end of our list for a null terminator.
+ const { assert!(I < N - 1, "Invalid attribute index") };
+
+ // SAFETY: By function safety requirements, we have exclusive access to
+ // `self` and the reference created below will be exclusive.
+ unsafe {
+ (&mut *self.0.get())[I] = (attribute as *const Attribute<ID, O, Data>)
+ .cast_mut()
+ .cast()
+ };
+ }
+}
+
+/// A representation of the attributes that will appear in a [`Group`] or
+/// [`Subsystem`].
+///
+/// Users should not directly instantiate objects of this type. Rather, they
+/// should use the [`kernel::configfs_attrs`] macro to statically declare the
+/// shape of a [`Group`] or [`Subsystem`].
+#[pin_data]
+pub struct ItemType<Container, Data> {
+ #[pin]
+ item_type: Opaque<bindings::config_item_type>,
+ _p: PhantomData<(Container, Data)>,
+}
+
+// SAFETY: We do not provide any operations on `ItemType` that need synchronization.
+unsafe impl<Container, Data> Sync for ItemType<Container, Data> {}
+
+// SAFETY: Ownership of `ItemType` can safely be transferred to other threads.
+unsafe impl<Container, Data> Send for ItemType<Container, Data> {}
+
+macro_rules! impl_item_type {
+ ($tpe:ty) => {
+ impl<Data> ItemType<$tpe, Data> {
+ #[doc(hidden)]
+ pub const fn new_with_child_ctor<const N: usize, Child>(
+ owner: &'static ThisModule,
+ attributes: &'static AttributeList<N, Data>,
+ ) -> Self
+ where
+ Data: GroupOperations<Child = Child>,
+ Child: 'static,
+ {
+ Self {
+ item_type: Opaque::new(bindings::config_item_type {
+ ct_owner: owner.as_ptr(),
+ ct_group_ops: GroupOperationsVTable::<Data, Child>::vtable_ptr().cast_mut(),
+ ct_item_ops: ItemOperationsVTable::<$tpe, Data>::vtable_ptr().cast_mut(),
+ ct_attrs: (attributes as *const AttributeList<N, Data>)
+ .cast_mut()
+ .cast(),
+ ct_bin_attrs: core::ptr::null_mut(),
+ }),
+ _p: PhantomData,
+ }
+ }
+
+ #[doc(hidden)]
+ pub const fn new<const N: usize>(
+ owner: &'static ThisModule,
+ attributes: &'static AttributeList<N, Data>,
+ ) -> Self {
+ Self {
+ item_type: Opaque::new(bindings::config_item_type {
+ ct_owner: owner.as_ptr(),
+ ct_group_ops: core::ptr::null_mut(),
+ ct_item_ops: ItemOperationsVTable::<$tpe, Data>::vtable_ptr().cast_mut(),
+ ct_attrs: (attributes as *const AttributeList<N, Data>)
+ .cast_mut()
+ .cast(),
+ ct_bin_attrs: core::ptr::null_mut(),
+ }),
+ _p: PhantomData,
+ }
+ }
+ }
+ };
+}
+
+impl_item_type!(Subsystem<Data>);
+impl_item_type!(Group<Data>);
+
+impl<Container, Data> ItemType<Container, Data> {
+ fn as_ptr(&self) -> *const bindings::config_item_type {
+ self.item_type.get()
+ }
+}
+
+/// Define a list of configfs attributes statically.
+///
+/// Invoking the macro in the following manner:
+///
+/// ```ignore
+/// let item_type = configfs_attrs! {
+/// container: configfs::Subsystem<Configuration>,
+/// data: Configuration,
+/// child: Child,
+/// attributes: [
+/// message: 0,
+/// bar: 1,
+/// ],
+/// };
+/// ```
+///
+/// Expands the following output:
+///
+/// ```ignore
+/// let item_type = {
+/// static CONFIGURATION_MESSAGE_ATTR: kernel::configfs::Attribute<
+/// 0,
+/// Configuration,
+/// Configuration,
+/// > = unsafe {
+/// kernel::configfs::Attribute::new({
+/// const S: &str = "message\u{0}";
+/// const C: &kernel::str::CStr = match kernel::str::CStr::from_bytes_with_nul(
+/// S.as_bytes()
+/// ) {
+/// Ok(v) => v,
+/// Err(_) => {
+/// core::panicking::panic_fmt(core::const_format_args!(
+/// "string contains interior NUL"
+/// ));
+/// }
+/// };
+/// C
+/// })
+/// };
+///
+/// static CONFIGURATION_BAR_ATTR: kernel::configfs::Attribute<
+/// 1,
+/// Configuration,
+/// Configuration
+/// > = unsafe {
+/// kernel::configfs::Attribute::new({
+/// const S: &str = "bar\u{0}";
+/// const C: &kernel::str::CStr = match kernel::str::CStr::from_bytes_with_nul(
+/// S.as_bytes()
+/// ) {
+/// Ok(v) => v,
+/// Err(_) => {
+/// core::panicking::panic_fmt(core::const_format_args!(
+/// "string contains interior NUL"
+/// ));
+/// }
+/// };
+/// C
+/// })
+/// };
+///
+/// const N: usize = (1usize + (1usize + 0usize)) + 1usize;
+///
+/// static CONFIGURATION_ATTRS: kernel::configfs::AttributeList<N, Configuration> =
+/// unsafe { kernel::configfs::AttributeList::new() };
+///
+/// {
+/// const N: usize = 0usize;
+/// unsafe { CONFIGURATION_ATTRS.add::<N, 0, _>(&CONFIGURATION_MESSAGE_ATTR) };
+/// }
+///
+/// {
+/// const N: usize = (1usize + 0usize);
+/// unsafe { CONFIGURATION_ATTRS.add::<N, 1, _>(&CONFIGURATION_BAR_ATTR) };
+/// }
+///
+/// static CONFIGURATION_TPE:
+/// kernel::configfs::ItemType<configfs::Subsystem<Configuration> ,Configuration>
+/// = kernel::configfs::ItemType::<
+/// configfs::Subsystem<Configuration>,
+/// Configuration
+/// >::new_with_child_ctor::<N,Child>(
+/// &THIS_MODULE,
+/// &CONFIGURATION_ATTRS
+/// );
+///
+/// &CONFIGURATION_TPE
+/// }
+/// ```
+#[macro_export]
+macro_rules! configfs_attrs {
+ (
+ container: $container:ty,
+ data: $data:ty,
+ attributes: [
+ $($name:ident: $attr:literal),* $(,)?
+ ] $(,)?
+ ) => {
+ $crate::configfs_attrs!(
+ count:
+ @container($container),
+ @data($data),
+ @child(),
+ @no_child(x),
+ @attrs($($name $attr)*),
+ @eat($($name $attr,)*),
+ @assign(),
+ @cnt(0usize),
+ )
+ };
+ (
+ container: $container:ty,
+ data: $data:ty,
+ child: $child:ty,
+ attributes: [
+ $($name:ident: $attr:literal),* $(,)?
+ ] $(,)?
+ ) => {
+ $crate::configfs_attrs!(
+ count:
+ @container($container),
+ @data($data),
+ @child($child),
+ @no_child(),
+ @attrs($($name $attr)*),
+ @eat($($name $attr,)*),
+ @assign(),
+ @cnt(0usize),
+ )
+ };
+ (count:
+ @container($container:ty),
+ @data($data:ty),
+ @child($($child:ty)?),
+ @no_child($($no_child:ident)?),
+ @attrs($($aname:ident $aattr:literal)*),
+ @eat($name:ident $attr:literal, $($rname:ident $rattr:literal,)*),
+ @assign($($assign:block)*),
+ @cnt($cnt:expr),
+ ) => {
+ $crate::configfs_attrs!(
+ count:
+ @container($container),
+ @data($data),
+ @child($($child)?),
+ @no_child($($no_child)?),
+ @attrs($($aname $aattr)*),
+ @eat($($rname $rattr,)*),
+ @assign($($assign)* {
+ const N: usize = $cnt;
+ // The following macro text expands to a call to `Attribute::add`.
+
+ // SAFETY: By design of this macro, the name of the variable we
+ // invoke the `add` method on below, is not visible outside of
+ // the macro expansion. The macro does not operate concurrently
+ // on this variable, and thus we have exclusive access to the
+ // variable.
+ unsafe {
+ $crate::macros::paste!(
+ [< $data:upper _ATTRS >]
+ .add::<N, $attr, _>(&[< $data:upper _ $name:upper _ATTR >])
+ )
+ };
+ }),
+ @cnt(1usize + $cnt),
+ )
+ };
+ (count:
+ @container($container:ty),
+ @data($data:ty),
+ @child($($child:ty)?),
+ @no_child($($no_child:ident)?),
+ @attrs($($aname:ident $aattr:literal)*),
+ @eat(),
+ @assign($($assign:block)*),
+ @cnt($cnt:expr),
+ ) =>
+ {
+ $crate::configfs_attrs!(
+ final:
+ @container($container),
+ @data($data),
+ @child($($child)?),
+ @no_child($($no_child)?),
+ @attrs($($aname $aattr)*),
+ @assign($($assign)*),
+ @cnt($cnt),
+ )
+ };
+ (final:
+ @container($container:ty),
+ @data($data:ty),
+ @child($($child:ty)?),
+ @no_child($($no_child:ident)?),
+ @attrs($($name:ident $attr:literal)*),
+ @assign($($assign:block)*),
+ @cnt($cnt:expr),
+ ) =>
+ {
+ $crate::macros::paste!{
+ {
+ $(
+ // SAFETY: We are expanding `configfs_attrs`.
+ static [< $data:upper _ $name:upper _ATTR >]:
+ $crate::configfs::Attribute<$attr, $data, $data> =
+ unsafe {
+ $crate::configfs::Attribute::new(c_str!(::core::stringify!($name)))
+ };
+ )*
+
+
+ // We need space for a null terminator.
+ const N: usize = $cnt + 1usize;
+
+ // SAFETY: We are expanding `configfs_attrs`.
+ static [< $data:upper _ATTRS >]:
+ $crate::configfs::AttributeList<N, $data> =
+ unsafe { $crate::configfs::AttributeList::new() };
+
+ $($assign)*
+
+ $(
+ const [<$no_child:upper>]: bool = true;
+
+ static [< $data:upper _TPE >] : $crate::configfs::ItemType<$container, $data> =
+ $crate::configfs::ItemType::<$container, $data>::new::<N>(
+ &THIS_MODULE, &[<$ data:upper _ATTRS >]
+ );
+ )?
+
+ $(
+ static [< $data:upper _TPE >]:
+ $crate::configfs::ItemType<$container, $data> =
+ $crate::configfs::ItemType::<$container, $data>::
+ new_with_child_ctor::<N, $child>(
+ &THIS_MODULE, &[<$ data:upper _ATTRS >]
+ );
+ )?
+
+ & [< $data:upper _TPE >]
+ }
+ }
+ };
+
+}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index de07aadd1ff5..bcbf5026449d 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -42,6 +42,8 @@ pub mod alloc;
pub mod block;
#[doc(hidden)]
pub mod build_assert;
+#[cfg(CONFIG_CONFIGFS_FS)]
+pub mod configfs;
pub mod cred;
pub mod device;
pub mod device_id;
diff --git a/rust/kernel/sync/rcu.rs b/rust/kernel/sync/rcu.rs
index b51d9150ffe2..a32bef6e490b 100644
--- a/rust/kernel/sync/rcu.rs
+++ b/rust/kernel/sync/rcu.rs
@@ -17,6 +17,7 @@ pub struct Guard(NotThreadSafe);
impl Guard {
/// Acquires the RCU read side lock and returns a guard.
+ #[inline]
pub fn new() -> Self {
// SAFETY: An FFI call with no additional requirements.
unsafe { bindings::rcu_read_lock() };
@@ -25,16 +26,19 @@ impl Guard {
}
/// Explicitly releases the RCU read side lock.
+ #[inline]
pub fn unlock(self) {}
}
impl Default for Guard {
+ #[inline]
fn default() -> Self {
Self::new()
}
}
impl Drop for Guard {
+ #[inline]
fn drop(&mut self) {
// SAFETY: By the type invariants, the RCU read side is locked, so it is ok to unlock it.
unsafe { bindings::rcu_read_unlock() };
@@ -42,6 +46,7 @@ impl Drop for Guard {
}
/// Acquires the RCU read side lock.
+#[inline]
pub fn read_lock() -> Guard {
Guard::new()
}
diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c
index dac67c367457..4147616102f9 100644
--- a/samples/ftrace/sample-trace-array.c
+++ b/samples/ftrace/sample-trace-array.c
@@ -112,7 +112,7 @@ static int __init sample_trace_array_init(void)
/*
* If context specific per-cpu buffers havent already been allocated.
*/
- trace_printk_init_buffers();
+ trace_array_init_printk(tr);
simple_tsk = kthread_run(simple_thread, NULL, "sample-instance");
if (IS_ERR(simple_tsk)) {
diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig
index cad52b7120b5..be491ad9b3af 100644
--- a/samples/rust/Kconfig
+++ b/samples/rust/Kconfig
@@ -10,6 +10,17 @@ menuconfig SAMPLES_RUST
if SAMPLES_RUST
+config SAMPLE_RUST_CONFIGFS
+ tristate "Configfs sample"
+ depends on CONFIGFS_FS
+ help
+ This option builds the Rust configfs sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_configfs.
+
+ If unsure, say N.
+
config SAMPLE_RUST_MINIMAL
tristate "Minimal"
help
diff --git a/samples/rust/Makefile b/samples/rust/Makefile
index c6a2479f7d9c..b3c9178d654a 100644
--- a/samples/rust/Makefile
+++ b/samples/rust/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_SAMPLE_RUST_DMA) += rust_dma.o
obj-$(CONFIG_SAMPLE_RUST_DRIVER_PCI) += rust_driver_pci.o
obj-$(CONFIG_SAMPLE_RUST_DRIVER_PLATFORM) += rust_driver_platform.o
obj-$(CONFIG_SAMPLE_RUST_DRIVER_FAUX) += rust_driver_faux.o
+obj-$(CONFIG_SAMPLE_RUST_CONFIGFS) += rust_configfs.o
rust_print-y := rust_print_main.o rust_print_events.o
diff --git a/samples/rust/rust_configfs.rs b/samples/rust/rust_configfs.rs
new file mode 100644
index 000000000000..60ddbe62cda3
--- /dev/null
+++ b/samples/rust/rust_configfs.rs
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust configfs sample.
+
+use kernel::alloc::flags;
+use kernel::c_str;
+use kernel::configfs;
+use kernel::configfs_attrs;
+use kernel::new_mutex;
+use kernel::page::PAGE_SIZE;
+use kernel::prelude::*;
+use kernel::sync::Mutex;
+
+module! {
+ type: RustConfigfs,
+ name: "rust_configfs",
+ author: "Rust for Linux Contributors",
+ description: "Rust configfs sample",
+ license: "GPL",
+}
+
+#[pin_data]
+struct RustConfigfs {
+ #[pin]
+ config: configfs::Subsystem<Configuration>,
+}
+
+#[pin_data]
+struct Configuration {
+ message: &'static CStr,
+ #[pin]
+ bar: Mutex<(KBox<[u8; PAGE_SIZE]>, usize)>,
+}
+
+impl Configuration {
+ fn new() -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {
+ message: c_str!("Hello World\n"),
+ bar <- new_mutex!((KBox::new([0; PAGE_SIZE], flags::GFP_KERNEL)?, 0)),
+ })
+ }
+}
+
+impl kernel::InPlaceModule for RustConfigfs {
+ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
+ pr_info!("Rust configfs sample (init)\n");
+
+ // Define a subsystem with the data type `Configuration`, two
+ // attributes, `message` and `bar` and child group type `Child`. `mkdir`
+ // in the directory representing this subsystem will create directories
+ // backed by the `Child` type.
+ let item_type = configfs_attrs! {
+ container: configfs::Subsystem<Configuration>,
+ data: Configuration,
+ child: Child,
+ attributes: [
+ message: 0,
+ bar: 1,
+ ],
+ };
+
+ try_pin_init!(Self {
+ config <- configfs::Subsystem::new(
+ c_str!("rust_configfs"), item_type, Configuration::new()
+ ),
+ })
+ }
+}
+
+#[vtable]
+impl configfs::GroupOperations for Configuration {
+ type Child = Child;
+
+ fn make_group(&self, name: &CStr) -> Result<impl PinInit<configfs::Group<Child>, Error>> {
+ // Define a group with data type `Child`, one attribute `baz` and child
+ // group type `GrandChild`. `mkdir` in the directory representing this
+ // group will create directories backed by the `GrandChild` type.
+ let tpe = configfs_attrs! {
+ container: configfs::Group<Child>,
+ data: Child,
+ child: GrandChild,
+ attributes: [
+ baz: 0,
+ ],
+ };
+
+ Ok(configfs::Group::new(name.try_into()?, tpe, Child::new()))
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<0> for Configuration {
+ type Data = Configuration;
+
+ fn show(container: &Configuration, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ pr_info!("Show message\n");
+ let data = container.message;
+ page[0..data.len()].copy_from_slice(data);
+ Ok(data.len())
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<1> for Configuration {
+ type Data = Configuration;
+
+ fn show(container: &Configuration, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ pr_info!("Show bar\n");
+ let guard = container.bar.lock();
+ let data = guard.0.as_slice();
+ let len = guard.1;
+ page[0..len].copy_from_slice(&data[0..len]);
+ Ok(len)
+ }
+
+ fn store(container: &Configuration, page: &[u8]) -> Result {
+ pr_info!("Store bar\n");
+ let mut guard = container.bar.lock();
+ guard.0[0..page.len()].copy_from_slice(page);
+ guard.1 = page.len();
+ Ok(())
+ }
+}
+
+// `pin_data` cannot handle structs without braces.
+#[pin_data]
+struct Child {}
+
+impl Child {
+ fn new() -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {})
+ }
+}
+
+#[vtable]
+impl configfs::GroupOperations for Child {
+ type Child = GrandChild;
+
+ fn make_group(&self, name: &CStr) -> Result<impl PinInit<configfs::Group<GrandChild>, Error>> {
+ // Define a group with data type `GrandChild`, one attribute `gc`. As no
+ // child type is specified, it will not be possible to create subgroups
+ // in this group, and `mkdir`in the directory representing this group
+ // will return an error.
+ let tpe = configfs_attrs! {
+ container: configfs::Group<GrandChild>,
+ data: GrandChild,
+ attributes: [
+ gc: 0,
+ ],
+ };
+
+ Ok(configfs::Group::new(
+ name.try_into()?,
+ tpe,
+ GrandChild::new(),
+ ))
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<0> for Child {
+ type Data = Child;
+
+ fn show(_container: &Child, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ pr_info!("Show baz\n");
+ let data = c"Hello Baz\n".to_bytes();
+ page[0..data.len()].copy_from_slice(data);
+ Ok(data.len())
+ }
+}
+
+// `pin_data` cannot handle structs without braces.
+#[pin_data]
+struct GrandChild {}
+
+impl GrandChild {
+ fn new() -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {})
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<0> for GrandChild {
+ type Data = GrandChild;
+
+ fn show(_container: &GrandChild, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ pr_info!("Show grand child\n");
+ let data = c"Hello GC\n".to_bytes();
+ page[0..data.len()].copy_from_slice(data);
+ Ok(data.len())
+ }
+}
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 13dcd86e74ca..884dc86ce04e 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -83,7 +83,7 @@ else ifeq ($(KBUILD_CHECKSRC),2)
endif
ifneq ($(KBUILD_EXTRA_WARN),)
- cmd_checkdoc = $(srctree)/scripts/kernel-doc -none $(KDOCFLAGS) \
+ cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(KDOCFLAGS) \
$(if $(findstring 2, $(KBUILD_EXTRA_WARN)), -Wall) \
$<
endif
diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler
index f4fcc1eaaeae..65cfa72e376b 100644
--- a/scripts/Makefile.compiler
+++ b/scripts/Makefile.compiler
@@ -43,7 +43,7 @@ as-instr = $(call try-run,\
# __cc-option
# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)
__cc-option = $(call try-run,\
- $(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4))
+ $(1) -Werror $(2) $(3:-Wno-%=-W%) -c -x c /dev/null -o "$$TMP",$(3),$(4))
# cc-option
# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
@@ -57,7 +57,7 @@ cc-option-yn = $(if $(call cc-option,$1),y,n)
# cc-disable-warning
# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
-cc-disable-warning = $(if $(call cc-option,-W$(strip $1)),-Wno-$(strip $1))
+cc-disable-warning = $(call cc-option,-Wno-$(strip $1))
# gcc-min-version
# Usage: cflags-$(call gcc-min-version, 70100) += -foo
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index d88acdf40855..540f3db5cd86 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -16,8 +16,8 @@ KBUILD_CFLAGS += -Werror=return-type
KBUILD_CFLAGS += -Werror=strict-prototypes
KBUILD_CFLAGS += -Wno-format-security
KBUILD_CFLAGS += -Wno-trigraphs
-KBUILD_CFLAGS += $(call cc-disable-warning, frame-address)
-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+KBUILD_CFLAGS += $(call cc-option, -Wno-frame-address)
+KBUILD_CFLAGS += $(call cc-option, -Wno-address-of-packed-member)
KBUILD_CFLAGS += -Wmissing-declarations
KBUILD_CFLAGS += -Wmissing-prototypes
@@ -35,8 +35,20 @@ KBUILD_CFLAGS += -Wno-gnu
# Clang checks for overflow/truncation with '%p', while GCC does not:
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219
-KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf)
-KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf)
+KBUILD_CFLAGS += $(call cc-option, -Wno-format-overflow-non-kprintf)
+KBUILD_CFLAGS += $(call cc-option, -Wno-format-truncation-non-kprintf)
+
+# Clang may emit a warning when a const variable, such as the dummy variables
+# in typecheck(), or const member of an aggregate type are not initialized,
+# which can result in unexpected behavior. However, in many audited cases of
+# the "field" variant of the warning, this is intentional because the field is
+# never used within a particular call path, the field is within a union with
+# other non-const members, or the containing object is not const so the field
+# can be modified via memcpy() / memset(). While the variable warning also gets
+# disabled with this same switch, there should not be too much coverage lost
+# because -Wuninitialized will still flag when an uninitialized const variable
+# is used.
+KBUILD_CFLAGS += $(call cc-option, -Wno-default-const-init-unsafe)
else
# gcc inanely warns about local variables called 'main'
@@ -44,7 +56,7 @@ KBUILD_CFLAGS += -Wno-main
endif
# These result in bogus false positives
-KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer)
+KBUILD_CFLAGS += $(call cc-option, -Wno-dangling-pointer)
# Variable Length Arrays (VLAs) should not be used anywhere in the kernel
KBUILD_CFLAGS += -Wvla
@@ -58,11 +70,11 @@ KBUILD_CFLAGS += -Wno-pointer-sign
KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type)
# Currently, disable -Wstringop-overflow for GCC 11, globally.
-KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-disable-warning, stringop-overflow)
+KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow)
KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow)
# Currently, disable -Wunterminated-string-initialization as broken
-KBUILD_CFLAGS += $(call cc-disable-warning, unterminated-string-initialization)
+KBUILD_CFLAGS += $(call cc-option, -Wno-unterminated-string-initialization)
# The allocators already balk at large sizes, so silence the compiler
# warnings for bounds checks involving those possible values. While
@@ -108,14 +120,14 @@ else
# Some diagnostics enabled by default are noisy.
# Suppress them by using -Wno... except for W=1.
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned)
-KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
+KBUILD_CFLAGS += $(call cc-option, -Wno-unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-option, -Wno-unused-const-variable)
+KBUILD_CFLAGS += $(call cc-option, -Wno-packed-not-aligned)
+KBUILD_CFLAGS += $(call cc-option, -Wno-format-overflow)
ifdef CONFIG_CC_IS_GCC
-KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
+KBUILD_CFLAGS += $(call cc-option, -Wno-format-truncation)
endif
-KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
+KBUILD_CFLAGS += $(call cc-option, -Wno-stringop-truncation)
KBUILD_CFLAGS += -Wno-override-init # alias for -Wno-initializer-overrides in clang
@@ -133,9 +145,9 @@ ifeq ($(call clang-min-version, 120000),y)
KBUILD_CFLAGS += -Wformat-insufficient-args
endif
endif
-KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
+KBUILD_CFLAGS += $(call cc-option, -Wno-pointer-to-enum-cast)
KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare
-KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access)
+KBUILD_CFLAGS += $(call cc-option, -Wno-unaligned-access)
KBUILD_CFLAGS += -Wno-enum-compare-conditional
endif
diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux
index 85d60d986401..b64862dc6f08 100644
--- a/scripts/Makefile.vmlinux
+++ b/scripts/Makefile.vmlinux
@@ -94,10 +94,10 @@ $(vmlinux-final): $(RESOLVE_BTFIDS)
endif
ifdef CONFIG_BUILDTIME_TABLE_SORT
-vmlinux: scripts/sorttable
+$(vmlinux-final): scripts/sorttable
endif
-# module.builtin.ranges
+# modules.builtin.ranges
# ---------------------------------------------------------------------------
ifdef CONFIG_BUILTIN_MODULE_RANGES
__default: modules.builtin.ranges
diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o
index 938c7457717e..b024ffb3e201 100644
--- a/scripts/Makefile.vmlinux_o
+++ b/scripts/Makefile.vmlinux_o
@@ -73,7 +73,7 @@ vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE
targets += vmlinux.o
-# module.builtin.modinfo
+# modules.builtin.modinfo
# ---------------------------------------------------------------------------
OBJCOPYFLAGS_modules.builtin.modinfo := -j .modinfo -O binary
@@ -82,7 +82,7 @@ targets += modules.builtin.modinfo
modules.builtin.modinfo: vmlinux.o FORCE
$(call if_changed,objcopy)
-# module.builtin
+# modules.builtin
# ---------------------------------------------------------------------------
# The second line aids cases where multiple modules share the same object.
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3d22bf863eec..b3b1939ccd19 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -839,6 +839,8 @@ our %deprecated_apis = (
"kunmap" => "kunmap_local",
"kmap_atomic" => "kmap_local_page",
"kunmap_atomic" => "kunmap_local",
+ "srcu_read_lock_lite" => "srcu_read_lock_fast",
+ "srcu_read_unlock_lite" => "srcu_read_unlock_fast",
);
#Create a search pattern for all these strings to speed up a loop below
diff --git a/scripts/find-unused-docs.sh b/scripts/find-unused-docs.sh
index ee6a50e33aba..d6d397fbf917 100755
--- a/scripts/find-unused-docs.sh
+++ b/scripts/find-unused-docs.sh
@@ -54,7 +54,7 @@ for file in `find $1 -name '*.c'`; do
if [[ ${FILES_INCLUDED[$file]+_} ]]; then
continue;
fi
- str=$(scripts/kernel-doc -export "$file" 2>/dev/null)
+ str=$(PYTHONDONTWRITEBYTECODE=1 scripts/kernel-doc -export "$file" 2>/dev/null)
if [[ -n "$str" ]]; then
echo "$file"
fi
diff --git a/scripts/gdb/linux/pgtable.py b/scripts/gdb/linux/pgtable.py
index 30d837f3dfae..09aac2421fb8 100644
--- a/scripts/gdb/linux/pgtable.py
+++ b/scripts/gdb/linux/pgtable.py
@@ -29,11 +29,9 @@ def page_mask(level=1):
raise Exception(f'Unknown page level: {level}')
-#page_offset_base in case CONFIG_DYNAMIC_MEMORY_LAYOUT is disabled
-POB_NO_DYNAMIC_MEM_LAYOUT = '0xffff888000000000'
def _page_offset_base():
pob_symbol = gdb.lookup_global_symbol('page_offset_base')
- pob = pob_symbol.name if pob_symbol else POB_NO_DYNAMIC_MEM_LAYOUT
+ pob = pob_symbol.name
return gdb.parse_and_eval(pob)
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index af6cf408b96d..3b6ef807791a 100755..120000
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1,2439 +1 @@
-#!/usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-# vim: softtabstop=4
-
-use warnings;
-use strict;
-
-## Copyright (c) 1998 Michael Zucchi, All Rights Reserved ##
-## Copyright (C) 2000, 1 Tim Waugh <twaugh@redhat.com> ##
-## Copyright (C) 2001 Simon Huggins ##
-## Copyright (C) 2005-2012 Randy Dunlap ##
-## Copyright (C) 2012 Dan Luedtke ##
-## ##
-## #define enhancements by Armin Kuster <akuster@mvista.com> ##
-## Copyright (c) 2000 MontaVista Software, Inc. ##
-#
-# Copyright (C) 2022 Tomasz Warniełło (POD)
-
-use Pod::Usage qw/pod2usage/;
-
-=head1 NAME
-
-kernel-doc - Print formatted kernel documentation to stdout
-
-=head1 SYNOPSIS
-
- kernel-doc [-h] [-v] [-Werror] [-Wall] [-Wreturn] [-Wshort-desc[ription]] [-Wcontents-before-sections]
- [ -man |
- -rst [-enable-lineno] |
- -none
- ]
- [
- -export |
- -internal |
- [-function NAME] ... |
- [-nosymbol NAME] ...
- ]
- [-no-doc-sections]
- [-export-file FILE] ...
- FILE ...
-
-Run `kernel-doc -h` for details.
-
-=head1 DESCRIPTION
-
-Read C language source or header FILEs, extract embedded documentation comments,
-and print formatted documentation to standard output.
-
-The documentation comments are identified by the "/**" opening comment mark.
-
-See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
-
-=cut
-
-# more perldoc at the end of the file
-
-## init lots of data
-
-my $errors = 0;
-my $warnings = 0;
-my $anon_struct_union = 0;
-
-# match expressions used to find embedded type information
-my $type_constant = '\b``([^\`]+)``\b';
-my $type_constant2 = '\%([-_*\w]+)';
-my $type_func = '(\w+)\(\)';
-my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
-my $type_param_ref = '([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
-my $type_fp_param = '\@(\w+)\(\)'; # Special RST handling for func ptr params
-my $type_fp_param2 = '\@(\w+->\S+)\(\)'; # Special RST handling for structs with func ptr params
-my $type_env = '(\$\w+)';
-my $type_enum = '\&(enum\s*([_\w]+))';
-my $type_struct = '\&(struct\s*([_\w]+))';
-my $type_typedef = '\&(typedef\s*([_\w]+))';
-my $type_union = '\&(union\s*([_\w]+))';
-my $type_member = '\&([_\w]+)(\.|->)([_\w]+)';
-my $type_fallback = '\&([_\w]+)';
-my $type_member_func = $type_member . '\(\)';
-
-# Output conversion substitutions.
-# One for each output format
-
-# these are pretty rough
-my @highlights_man = (
- [$type_constant, "\$1"],
- [$type_constant2, "\$1"],
- [$type_func, "\\\\fB\$1\\\\fP"],
- [$type_enum, "\\\\fI\$1\\\\fP"],
- [$type_struct, "\\\\fI\$1\\\\fP"],
- [$type_typedef, "\\\\fI\$1\\\\fP"],
- [$type_union, "\\\\fI\$1\\\\fP"],
- [$type_param, "\\\\fI\$1\\\\fP"],
- [$type_param_ref, "\\\\fI\$1\$2\\\\fP"],
- [$type_member, "\\\\fI\$1\$2\$3\\\\fP"],
- [$type_fallback, "\\\\fI\$1\\\\fP"]
- );
-my $blankline_man = "";
-
-# rst-mode
-my @highlights_rst = (
- [$type_constant, "``\$1``"],
- [$type_constant2, "``\$1``"],
-
- # Note: need to escape () to avoid func matching later
- [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"],
- [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"],
- [$type_fp_param, "**\$1\\\\(\\\\)**"],
- [$type_fp_param2, "**\$1\\\\(\\\\)**"],
- [$type_func, "\$1()"],
- [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"],
- [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"],
- [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"],
- [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"],
-
- # in rst this can refer to any type
- [$type_fallback, "\\:c\\:type\\:`\$1`"],
- [$type_param_ref, "**\$1\$2**"]
- );
-my $blankline_rst = "\n";
-
-# read arguments
-if ($#ARGV == -1) {
- pod2usage(
- -message => "No arguments!\n",
- -exitval => 1,
- -verbose => 99,
- -sections => 'SYNOPSIS',
- -output => \*STDERR,
- );
-}
-
-my $kernelversion;
-
-my $dohighlight = "";
-
-my $verbose = 0;
-my $Werror = 0;
-my $Wreturn = 0;
-my $Wshort_desc = 0;
-my $output_mode = "rst";
-my $output_preformatted = 0;
-my $no_doc_sections = 0;
-my $enable_lineno = 0;
-my @highlights = @highlights_rst;
-my $blankline = $blankline_rst;
-my $modulename = "Kernel API";
-
-use constant {
- OUTPUT_ALL => 0, # output all symbols and doc sections
- OUTPUT_INCLUDE => 1, # output only specified symbols
- OUTPUT_EXPORTED => 2, # output exported symbols
- OUTPUT_INTERNAL => 3, # output non-exported symbols
-};
-my $output_selection = OUTPUT_ALL;
-my $show_not_found = 0; # No longer used
-
-my @export_file_list;
-
-my @build_time;
-if (defined($ENV{'KBUILD_BUILD_TIMESTAMP'}) &&
- (my $seconds = `date -d "${ENV{'KBUILD_BUILD_TIMESTAMP'}}" +%s`) ne '') {
- @build_time = gmtime($seconds);
-} else {
- @build_time = localtime;
-}
-
-my $man_date = ('January', 'February', 'March', 'April', 'May', 'June',
- 'July', 'August', 'September', 'October',
- 'November', 'December')[$build_time[4]] .
- " " . ($build_time[5]+1900);
-
-# Essentially these are globals.
-# They probably want to be tidied up, made more localised or something.
-# CAVEAT EMPTOR! Some of the others I localised may not want to be, which
-# could cause "use of undefined value" or other bugs.
-my ($function, %function_table, %parametertypes, $declaration_purpose);
-my %nosymbol_table = ();
-my $declaration_start_line;
-my ($type, $declaration_name, $return_type);
-my ($newsection, $newcontents, $prototype, $brcount);
-
-if (defined($ENV{'KBUILD_VERBOSE'}) && $ENV{'KBUILD_VERBOSE'} =~ '1') {
- $verbose = 1;
-}
-
-if (defined($ENV{'KCFLAGS'})) {
- my $kcflags = "$ENV{'KCFLAGS'}";
-
- if ($kcflags =~ /(\s|^)-Werror(\s|$)/) {
- $Werror = 1;
- }
-}
-
-# reading this variable is for backwards compat just in case
-# someone was calling it with the variable from outside the
-# kernel's build system
-if (defined($ENV{'KDOC_WERROR'})) {
- $Werror = "$ENV{'KDOC_WERROR'}";
-}
-# other environment variables are converted to command-line
-# arguments in cmd_checkdoc in the build system
-
-# Generated docbook code is inserted in a template at a point where
-# docbook v3.1 requires a non-zero sequence of RefEntry's; see:
-# https://www.oasis-open.org/docbook/documentation/reference/html/refentry.html
-# We keep track of number of generated entries and generate a dummy
-# if needs be to ensure the expanded template can be postprocessed
-# into html.
-my $section_counter = 0;
-
-my $lineprefix="";
-
-# Parser states
-use constant {
- STATE_NORMAL => 0, # normal code
- STATE_NAME => 1, # looking for function name
- STATE_BODY_MAYBE => 2, # body - or maybe more description
- STATE_BODY => 3, # the body of the comment
- STATE_BODY_WITH_BLANK_LINE => 4, # the body, which has a blank line
- STATE_PROTO => 5, # scanning prototype
- STATE_DOCBLOCK => 6, # documentation block
- STATE_INLINE => 7, # gathering doc outside main block
-};
-my $state;
-my $leading_space;
-
-# Inline documentation state
-use constant {
- STATE_INLINE_NA => 0, # not applicable ($state != STATE_INLINE)
- STATE_INLINE_NAME => 1, # looking for member name (@foo:)
- STATE_INLINE_TEXT => 2, # looking for member documentation
- STATE_INLINE_END => 3, # done
- STATE_INLINE_ERROR => 4, # error - Comment without header was found.
- # Spit a warning as it's not
- # proper kernel-doc and ignore the rest.
-};
-my $inline_doc_state;
-
-#declaration types: can be
-# 'function', 'struct', 'union', 'enum', 'typedef'
-my $decl_type;
-
-# Name of the kernel-doc identifier for non-DOC markups
-my $identifier;
-
-my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start.
-my $doc_end = '\*/';
-my $doc_com = '\s*\*\s*';
-my $doc_com_body = '\s*\* ?';
-my $doc_decl = $doc_com . '(\w+)';
-# @params and a strictly limited set of supported section names
-# Specifically:
-# Match @word:
-# @...:
-# @{section-name}:
-# while trying to not match literal block starts like "example::"
-#
-my $doc_sect = $doc_com .
- '\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$';
-my $doc_content = $doc_com_body . '(.*)';
-my $doc_block = $doc_com . 'DOC:\s*(.*)?';
-my $doc_inline_start = '^\s*/\*\*\s*$';
-my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
-my $doc_inline_end = '^\s*\*/\s*$';
-my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
-my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
-my $export_symbol_ns = '^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*;';
-my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
-my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
-
-my %parameterdescs;
-my %parameterdesc_start_lines;
-my @parameterlist;
-my %sections;
-my @sectionlist;
-my %section_start_lines;
-my $sectcheck;
-my $struct_actual;
-
-my $contents = "";
-my $new_start_line = 0;
-
-# the canonical section names. see also $doc_sect above.
-my $section_default = "Description"; # default section
-my $section_intro = "Introduction";
-my $section = $section_default;
-my $section_context = "Context";
-my $section_return = "Return";
-
-my $undescribed = "-- undescribed --";
-
-reset_state();
-
-while ($ARGV[0] =~ m/^--?(.*)/) {
- my $cmd = $1;
- shift @ARGV;
- if ($cmd eq "man") {
- $output_mode = "man";
- @highlights = @highlights_man;
- $blankline = $blankline_man;
- } elsif ($cmd eq "rst") {
- $output_mode = "rst";
- @highlights = @highlights_rst;
- $blankline = $blankline_rst;
- } elsif ($cmd eq "none") {
- $output_mode = "none";
- } elsif ($cmd eq "module") { # not needed for XML, inherits from calling document
- $modulename = shift @ARGV;
- } elsif ($cmd eq "function") { # to only output specific functions
- $output_selection = OUTPUT_INCLUDE;
- $function = shift @ARGV;
- $function_table{$function} = 1;
- } elsif ($cmd eq "nosymbol") { # Exclude specific symbols
- my $symbol = shift @ARGV;
- $nosymbol_table{$symbol} = 1;
- } elsif ($cmd eq "export") { # only exported symbols
- $output_selection = OUTPUT_EXPORTED;
- %function_table = ();
- } elsif ($cmd eq "internal") { # only non-exported symbols
- $output_selection = OUTPUT_INTERNAL;
- %function_table = ();
- } elsif ($cmd eq "export-file") {
- my $file = shift @ARGV;
- push(@export_file_list, $file);
- } elsif ($cmd eq "v") {
- $verbose = 1;
- } elsif ($cmd eq "Werror") {
- $Werror = 1;
- } elsif ($cmd eq "Wreturn") {
- $Wreturn = 1;
- } elsif ($cmd eq "Wshort-desc" or $cmd eq "Wshort-description") {
- $Wshort_desc = 1;
- } elsif ($cmd eq "Wall") {
- $Wreturn = 1;
- $Wshort_desc = 1;
- } elsif (($cmd eq "h") || ($cmd eq "help")) {
- pod2usage(-exitval => 0, -verbose => 2);
- } elsif ($cmd eq 'no-doc-sections') {
- $no_doc_sections = 1;
- } elsif ($cmd eq 'enable-lineno') {
- $enable_lineno = 1;
- } elsif ($cmd eq 'show-not-found') {
- $show_not_found = 1; # A no-op but don't fail
- } else {
- # Unknown argument
- pod2usage(
- -message => "Argument unknown!\n",
- -exitval => 1,
- -verbose => 99,
- -sections => 'SYNOPSIS',
- -output => \*STDERR,
- );
- }
- if ($#ARGV < 0){
- pod2usage(
- -message => "FILE argument missing\n",
- -exitval => 1,
- -verbose => 99,
- -sections => 'SYNOPSIS',
- -output => \*STDERR,
- );
- }
-}
-
-# continue execution near EOF;
-
-sub findprog($)
-{
- foreach(split(/:/, $ENV{PATH})) {
- return "$_/$_[0]" if(-x "$_/$_[0]");
- }
-}
-
-# get kernel version from env
-sub get_kernel_version() {
- my $version = 'unknown kernel version';
-
- if (defined($ENV{'KERNELVERSION'})) {
- $version = $ENV{'KERNELVERSION'};
- }
- return $version;
-}
-
-#
-sub print_lineno {
- my $lineno = shift;
- if ($enable_lineno && defined($lineno)) {
- print ".. LINENO " . $lineno . "\n";
- }
-}
-
-sub emit_warning {
- my $location = shift;
- my $msg = shift;
- print STDERR "$location: warning: $msg";
- ++$warnings;
-}
-##
-# dumps section contents to arrays/hashes intended for that purpose.
-#
-sub dump_section {
- my $file = shift;
- my $name = shift;
- my $contents = join "\n", @_;
-
- if ($name =~ m/$type_param/) {
- $name = $1;
- $parameterdescs{$name} = $contents;
- $sectcheck = $sectcheck . $name . " ";
- $parameterdesc_start_lines{$name} = $new_start_line;
- $new_start_line = 0;
- } elsif ($name eq "@\.\.\.") {
- $name = "...";
- $parameterdescs{$name} = $contents;
- $sectcheck = $sectcheck . $name . " ";
- $parameterdesc_start_lines{$name} = $new_start_line;
- $new_start_line = 0;
- } else {
- if (defined($sections{$name}) && ($sections{$name} ne "")) {
- # Only warn on user specified duplicate section names.
- if ($name ne $section_default) {
- emit_warning("${file}:$.", "duplicate section name '$name'\n");
- }
- $sections{$name} .= $contents;
- } else {
- $sections{$name} = $contents;
- push @sectionlist, $name;
- $section_start_lines{$name} = $new_start_line;
- $new_start_line = 0;
- }
- }
-}
-
-##
-# dump DOC: section after checking that it should go out
-#
-sub dump_doc_section {
- my $file = shift;
- my $name = shift;
- my $contents = join "\n", @_;
-
- if ($no_doc_sections) {
- return;
- }
-
- return if (defined($nosymbol_table{$name}));
-
- if (($output_selection == OUTPUT_ALL) ||
- (($output_selection == OUTPUT_INCLUDE) &&
- defined($function_table{$name})))
- {
- dump_section($file, $name, $contents);
- output_blockhead({'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'module' => $modulename,
- 'content-only' => ($output_selection != OUTPUT_ALL), });
- }
-}
-
-##
-# output function
-#
-# parameterdescs, a hash.
-# function => "function name"
-# parameterlist => @list of parameters
-# parameterdescs => %parameter descriptions
-# sectionlist => @list of sections
-# sections => %section descriptions
-#
-
-sub output_highlight {
- my $contents = join "\n",@_;
- my $line;
-
-# DEBUG
-# if (!defined $contents) {
-# use Carp;
-# confess "output_highlight got called with no args?\n";
-# }
-
-# print STDERR "contents b4:$contents\n";
- eval $dohighlight;
- die $@ if $@;
-# print STDERR "contents af:$contents\n";
-
- foreach $line (split "\n", $contents) {
- if (! $output_preformatted) {
- $line =~ s/^\s*//;
- }
- if ($line eq ""){
- if (! $output_preformatted) {
- print $lineprefix, $blankline;
- }
- } else {
- if ($output_mode eq "man" && substr($line, 0, 1) eq ".") {
- print "\\&$line";
- } else {
- print $lineprefix, $line;
- }
- }
- print "\n";
- }
-}
-
-##
-# output function in man
-sub output_function_man(%) {
- my %args = %{$_[0]};
- my ($parameter, $section);
- my $count;
- my $func_macro = $args{'func_macro'};
- my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty
-
- print ".TH \"$args{'function'}\" 9 \"$args{'function'}\" \"$man_date\" \"Kernel Hacker's Manual\" LINUX\n";
-
- print ".SH NAME\n";
- print $args{'function'} . " \\- " . $args{'purpose'} . "\n";
-
- print ".SH SYNOPSIS\n";
- if ($args{'functiontype'} ne "") {
- print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n";
- } else {
- print ".B \"" . $args{'function'} . "\n";
- }
- $count = 0;
- my $parenth = "(";
- my $post = ",";
- foreach my $parameter (@{$args{'parameterlist'}}) {
- if ($count == $#{$args{'parameterlist'}}) {
- $post = ");";
- }
- $type = $args{'parametertypes'}{$parameter};
- if ($type =~ m/$function_pointer/) {
- # pointer-to-function
- print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
- } else {
- $type =~ s/([^\*])$/$1 /;
- print ".BI \"" . $parenth . $type . "\" " . " \"" . $post . "\"\n";
- }
- $count++;
- $parenth = "";
- }
-
- $paramcount = $#{$args{'parameterlist'}}; # -1 is empty
- if ($paramcount >= 0) {
- print ".SH ARGUMENTS\n";
- }
- foreach $parameter (@{$args{'parameterlist'}}) {
- my $parameter_name = $parameter;
- $parameter_name =~ s/\[.*//;
-
- print ".IP \"" . $parameter . "\" 12\n";
- output_highlight($args{'parameterdescs'}{$parameter_name});
- }
- foreach $section (@{$args{'sectionlist'}}) {
- print ".SH \"", uc $section, "\"\n";
- output_highlight($args{'sections'}{$section});
- }
-}
-
-##
-# output enum in man
-sub output_enum_man(%) {
- my %args = %{$_[0]};
- my ($parameter, $section);
- my $count;
-
- print ".TH \"$args{'module'}\" 9 \"enum $args{'enum'}\" \"$man_date\" \"API Manual\" LINUX\n";
-
- print ".SH NAME\n";
- print "enum " . $args{'enum'} . " \\- " . $args{'purpose'} . "\n";
-
- print ".SH SYNOPSIS\n";
- print "enum " . $args{'enum'} . " {\n";
- $count = 0;
- foreach my $parameter (@{$args{'parameterlist'}}) {
- print ".br\n.BI \" $parameter\"\n";
- if ($count == $#{$args{'parameterlist'}}) {
- print "\n};\n";
- last;
- } else {
- print ", \n.br\n";
- }
- $count++;
- }
-
- print ".SH Constants\n";
- foreach $parameter (@{$args{'parameterlist'}}) {
- my $parameter_name = $parameter;
- $parameter_name =~ s/\[.*//;
-
- print ".IP \"" . $parameter . "\" 12\n";
- output_highlight($args{'parameterdescs'}{$parameter_name});
- }
- foreach $section (@{$args{'sectionlist'}}) {
- print ".SH \"$section\"\n";
- output_highlight($args{'sections'}{$section});
- }
-}
-
-##
-# output struct in man
-sub output_struct_man(%) {
- my %args = %{$_[0]};
- my ($parameter, $section);
-
- print ".TH \"$args{'module'}\" 9 \"" . $args{'type'} . " " . $args{'struct'} . "\" \"$man_date\" \"API Manual\" LINUX\n";
-
- print ".SH NAME\n";
- print $args{'type'} . " " . $args{'struct'} . " \\- " . $args{'purpose'} . "\n";
-
- my $declaration = $args{'definition'};
- $declaration =~ s/\t/ /g;
- $declaration =~ s/\n/"\n.br\n.BI \"/g;
- print ".SH SYNOPSIS\n";
- print $args{'type'} . " " . $args{'struct'} . " {\n.br\n";
- print ".BI \"$declaration\n};\n.br\n\n";
-
- print ".SH Members\n";
- foreach $parameter (@{$args{'parameterlist'}}) {
- ($parameter =~ /^#/) && next;
-
- my $parameter_name = $parameter;
- $parameter_name =~ s/\[.*//;
-
- ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
- print ".IP \"" . $parameter . "\" 12\n";
- output_highlight($args{'parameterdescs'}{$parameter_name});
- }
- foreach $section (@{$args{'sectionlist'}}) {
- print ".SH \"$section\"\n";
- output_highlight($args{'sections'}{$section});
- }
-}
-
-##
-# output typedef in man
-sub output_typedef_man(%) {
- my %args = %{$_[0]};
- my ($parameter, $section);
-
- print ".TH \"$args{'module'}\" 9 \"$args{'typedef'}\" \"$man_date\" \"API Manual\" LINUX\n";
-
- print ".SH NAME\n";
- print "typedef " . $args{'typedef'} . " \\- " . $args{'purpose'} . "\n";
-
- foreach $section (@{$args{'sectionlist'}}) {
- print ".SH \"$section\"\n";
- output_highlight($args{'sections'}{$section});
- }
-}
-
-sub output_blockhead_man(%) {
- my %args = %{$_[0]};
- my ($parameter, $section);
- my $count;
-
- print ".TH \"$args{'module'}\" 9 \"$args{'module'}\" \"$man_date\" \"API Manual\" LINUX\n";
-
- foreach $section (@{$args{'sectionlist'}}) {
- print ".SH \"$section\"\n";
- output_highlight($args{'sections'}{$section});
- }
-}
-
-##
-# output in restructured text
-#
-
-#
-# This could use some work; it's used to output the DOC: sections, and
-# starts by putting out the name of the doc section itself, but that tends
-# to duplicate a header already in the template file.
-#
-sub output_blockhead_rst(%) {
- my %args = %{$_[0]};
- my ($parameter, $section);
-
- foreach $section (@{$args{'sectionlist'}}) {
- next if (defined($nosymbol_table{$section}));
-
- if ($output_selection != OUTPUT_INCLUDE) {
- print ".. _$section:\n\n";
- print "**$section**\n\n";
- }
- print_lineno($section_start_lines{$section});
- output_highlight_rst($args{'sections'}{$section});
- print "\n";
- }
-}
-
-#
-# Apply the RST highlights to a sub-block of text.
-#
-sub highlight_block($) {
- # The dohighlight kludge requires the text be called $contents
- my $contents = shift;
- eval $dohighlight;
- die $@ if $@;
- return $contents;
-}
-
-#
-# Regexes used only here.
-#
-my $sphinx_literal = '^[^.].*::$';
-my $sphinx_cblock = '^\.\.\ +code-block::';
-
-sub output_highlight_rst {
- my $input = join "\n",@_;
- my $output = "";
- my $line;
- my $in_literal = 0;
- my $litprefix;
- my $block = "";
-
- foreach $line (split "\n",$input) {
- #
- # If we're in a literal block, see if we should drop out
- # of it. Otherwise pass the line straight through unmunged.
- #
- if ($in_literal) {
- if (! ($line =~ /^\s*$/)) {
- #
- # If this is the first non-blank line in a literal
- # block we need to figure out what the proper indent is.
- #
- if ($litprefix eq "") {
- $line =~ /^(\s*)/;
- $litprefix = '^' . $1;
- $output .= $line . "\n";
- } elsif (! ($line =~ /$litprefix/)) {
- $in_literal = 0;
- } else {
- $output .= $line . "\n";
- }
- } else {
- $output .= $line . "\n";
- }
- }
- #
- # Not in a literal block (or just dropped out)
- #
- if (! $in_literal) {
- $block .= $line . "\n";
- if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) {
- $in_literal = 1;
- $litprefix = "";
- $output .= highlight_block($block);
- $block = ""
- }
- }
- }
-
- if ($block) {
- $output .= highlight_block($block);
- }
-
- $output =~ s/^\n+//g;
- $output =~ s/\n+$//g;
-
- foreach $line (split "\n", $output) {
- print $lineprefix . $line . "\n";
- }
-}
-
-sub output_function_rst(%) {
- my %args = %{$_[0]};
- my ($parameter, $section);
- my $oldprefix = $lineprefix;
-
- my $signature = "";
- my $func_macro = $args{'func_macro'};
- my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty
-
- if ($func_macro) {
- $signature = $args{'function'};
- } else {
- if ($args{'functiontype'}) {
- $signature = $args{'functiontype'} . " ";
- }
- $signature .= $args{'function'} . " (";
- }
-
- my $count = 0;
- foreach my $parameter (@{$args{'parameterlist'}}) {
- if ($count ne 0) {
- $signature .= ", ";
- }
- $count++;
- $type = $args{'parametertypes'}{$parameter};
-
- if ($type =~ m/$function_pointer/) {
- # pointer-to-function
- $signature .= $1 . $parameter . ") (" . $2 . ")";
- } else {
- $signature .= $type;
- }
- }
-
- if (!$func_macro) {
- $signature .= ")";
- }
-
- if ($args{'typedef'} || $args{'functiontype'} eq "") {
- print ".. c:macro:: ". $args{'function'} . "\n\n";
-
- if ($args{'typedef'}) {
- print_lineno($declaration_start_line);
- print " **Typedef**: ";
- $lineprefix = "";
- output_highlight_rst($args{'purpose'});
- print "\n\n**Syntax**\n\n";
- print " ``$signature``\n\n";
- } else {
- print "``$signature``\n\n";
- }
- } else {
- print ".. c:function:: $signature\n\n";
- }
-
- if (!$args{'typedef'}) {
- print_lineno($declaration_start_line);
- $lineprefix = " ";
- output_highlight_rst($args{'purpose'});
- print "\n";
- }
-
- #
- # Put our descriptive text into a container (thus an HTML <div>) to help
- # set the function prototypes apart.
- #
- $lineprefix = " ";
- if ($paramcount >= 0) {
- print ".. container:: kernelindent\n\n";
- print $lineprefix . "**Parameters**\n\n";
- }
- foreach $parameter (@{$args{'parameterlist'}}) {
- my $parameter_name = $parameter;
- $parameter_name =~ s/\[.*//;
- $type = $args{'parametertypes'}{$parameter};
-
- if ($type ne "") {
- print $lineprefix . "``$type``\n";
- } else {
- print $lineprefix . "``$parameter``\n";
- }
-
- print_lineno($parameterdesc_start_lines{$parameter_name});
-
- $lineprefix = " ";
- if (defined($args{'parameterdescs'}{$parameter_name}) &&
- $args{'parameterdescs'}{$parameter_name} ne $undescribed) {
- output_highlight_rst($args{'parameterdescs'}{$parameter_name});
- } else {
- print $lineprefix . "*undescribed*\n";
- }
- $lineprefix = " ";
- print "\n";
- }
-
- output_section_rst(@_);
- $lineprefix = $oldprefix;
-}
-
-sub output_section_rst(%) {
- my %args = %{$_[0]};
- my $section;
- my $oldprefix = $lineprefix;
-
- foreach $section (@{$args{'sectionlist'}}) {
- print $lineprefix . "**$section**\n\n";
- print_lineno($section_start_lines{$section});
- output_highlight_rst($args{'sections'}{$section});
- print "\n";
- }
- print "\n";
-}
-
-sub output_enum_rst(%) {
- my %args = %{$_[0]};
- my ($parameter);
- my $oldprefix = $lineprefix;
- my $count;
- my $outer;
-
- my $name = $args{'enum'};
- print "\n\n.. c:enum:: " . $name . "\n\n";
-
- print_lineno($declaration_start_line);
- $lineprefix = " ";
- output_highlight_rst($args{'purpose'});
- print "\n";
-
- print ".. container:: kernelindent\n\n";
- $outer = $lineprefix . " ";
- $lineprefix = $outer . " ";
- print $outer . "**Constants**\n\n";
- foreach $parameter (@{$args{'parameterlist'}}) {
- print $outer . "``$parameter``\n";
-
- if ($args{'parameterdescs'}{$parameter} ne $undescribed) {
- output_highlight_rst($args{'parameterdescs'}{$parameter});
- } else {
- print $lineprefix . "*undescribed*\n";
- }
- print "\n";
- }
- print "\n";
- $lineprefix = $oldprefix;
- output_section_rst(@_);
-}
-
-sub output_typedef_rst(%) {
- my %args = %{$_[0]};
- my ($parameter);
- my $oldprefix = $lineprefix;
- my $name;
-
- $name = $args{'typedef'};
-
- print "\n\n.. c:type:: " . $name . "\n\n";
- print_lineno($declaration_start_line);
- $lineprefix = " ";
- output_highlight_rst($args{'purpose'});
- print "\n";
-
- $lineprefix = $oldprefix;
- output_section_rst(@_);
-}
-
-sub output_struct_rst(%) {
- my %args = %{$_[0]};
- my ($parameter);
- my $oldprefix = $lineprefix;
-
- my $name = $args{'struct'};
- if ($args{'type'} eq 'union') {
- print "\n\n.. c:union:: " . $name . "\n\n";
- } else {
- print "\n\n.. c:struct:: " . $name . "\n\n";
- }
-
- print_lineno($declaration_start_line);
- $lineprefix = " ";
- output_highlight_rst($args{'purpose'});
- print "\n";
-
- print ".. container:: kernelindent\n\n";
- print $lineprefix . "**Definition**::\n\n";
- my $declaration = $args{'definition'};
- $lineprefix = $lineprefix . " ";
- $declaration =~ s/\t/$lineprefix/g;
- print $lineprefix . $args{'type'} . " " . $args{'struct'} . " {\n$declaration" . $lineprefix . "};\n\n";
-
- $lineprefix = " ";
- print $lineprefix . "**Members**\n\n";
- foreach $parameter (@{$args{'parameterlist'}}) {
- ($parameter =~ /^#/) && next;
-
- my $parameter_name = $parameter;
- $parameter_name =~ s/\[.*//;
-
- ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
- $type = $args{'parametertypes'}{$parameter};
- print_lineno($parameterdesc_start_lines{$parameter_name});
- print $lineprefix . "``" . $parameter . "``\n";
- $lineprefix = " ";
- output_highlight_rst($args{'parameterdescs'}{$parameter_name});
- $lineprefix = " ";
- print "\n";
- }
- print "\n";
-
- $lineprefix = $oldprefix;
- output_section_rst(@_);
-}
-
-## none mode output functions
-
-sub output_function_none(%) {
-}
-
-sub output_enum_none(%) {
-}
-
-sub output_typedef_none(%) {
-}
-
-sub output_struct_none(%) {
-}
-
-sub output_blockhead_none(%) {
-}
-
-##
-# generic output function for all types (function, struct/union, typedef, enum);
-# calls the generated, variable output_ function name based on
-# functype and output_mode
-sub output_declaration {
- no strict 'refs';
- my $name = shift;
- my $functype = shift;
- my $func = "output_${functype}_$output_mode";
-
- return if (defined($nosymbol_table{$name}));
-
- if (($output_selection == OUTPUT_ALL) ||
- (($output_selection == OUTPUT_INCLUDE ||
- $output_selection == OUTPUT_EXPORTED) &&
- defined($function_table{$name})) ||
- ($output_selection == OUTPUT_INTERNAL &&
- !($functype eq "function" && defined($function_table{$name}))))
- {
- &$func(@_);
- $section_counter++;
- }
-}
-
-##
-# generic output function - calls the right one based on current output mode.
-sub output_blockhead {
- no strict 'refs';
- my $func = "output_blockhead_" . $output_mode;
- &$func(@_);
- $section_counter++;
-}
-
-##
-# takes a declaration (struct, union, enum, typedef) and
-# invokes the right handler. NOT called for functions.
-sub dump_declaration($$) {
- no strict 'refs';
- my ($prototype, $file) = @_;
- my $func = "dump_" . $decl_type;
- &$func(@_);
-}
-
-sub dump_union($$) {
- dump_struct(@_);
-}
-
-sub dump_struct($$) {
- my $x = shift;
- my $file = shift;
- my $decl_type;
- my $members;
- my $type = qr{struct|union};
- # For capturing struct/union definition body, i.e. "{members*}qualifiers*"
- my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned};
- my $definition_body = qr{\{(.*)\}\s*$qualifiers*};
- my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};
-
- if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
- $decl_type = $1;
- $declaration_name = $2;
- $members = $3;
- } elsif ($x =~ /typedef\s+($type)\s*$definition_body\s*(\w+)\s*;/) {
- $decl_type = $1;
- $declaration_name = $3;
- $members = $2;
- }
-
- if ($members) {
- if ($identifier ne $declaration_name) {
- emit_warning("${file}:$.", "expecting prototype for $decl_type $identifier. Prototype was for $decl_type $declaration_name instead\n");
- return;
- }
-
- # ignore members marked private:
- $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi;
- $members =~ s/\/\*\s*private:.*//gosi;
- # strip comments:
- $members =~ s/\/\*.*?\*\///gos;
- # strip attributes
- $members =~ s/\s*$attribute/ /gi;
- $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
- $members =~ s/\s*__counted_by\s*\([^;]*\)/ /gos;
- $members =~ s/\s*__counted_by_(le|be)\s*\([^;]*\)/ /gos;
- $members =~ s/\s*__packed\s*/ /gos;
- $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
- $members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
- $members =~ s/\s*____cacheline_aligned/ /gos;
- # unwrap struct_group():
- # - first eat non-declaration parameters and rewrite for final match
- # - then remove macro, outer parens, and trailing semicolon
- $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos;
- $members =~ s/\bstruct_group_attr\s*\(([^,]*,){2}/STRUCT_GROUP(/gos;
- $members =~ s/\bstruct_group_tagged\s*\(([^,]*),([^,]*),/struct $1 $2; STRUCT_GROUP(/gos;
- $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos;
- $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos;
-
- my $args = qr{([^,)]+)};
- # replace DECLARE_BITMAP
- $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
- $members =~ s/DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, PHY_INTERFACE_MODE_MAX)/gos;
- $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
- # replace DECLARE_HASHTABLE
- $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
- # replace DECLARE_KFIFO
- $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
- # replace DECLARE_KFIFO_PTR
- $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
- # replace DECLARE_FLEX_ARRAY
- $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos;
- #replace DEFINE_DMA_UNMAP_ADDR
- $members =~ s/DEFINE_DMA_UNMAP_ADDR\s*\($args\)/dma_addr_t $1/gos;
- #replace DEFINE_DMA_UNMAP_LEN
- $members =~ s/DEFINE_DMA_UNMAP_LEN\s*\($args\)/__u32 $1/gos;
- my $declaration = $members;
-
- # Split nested struct/union elements as newer ones
- while ($members =~ m/$struct_members/) {
- my $newmember;
- my $maintype = $1;
- my $ids = $4;
- my $content = $3;
- foreach my $id(split /,/, $ids) {
- $newmember .= "$maintype $id; ";
-
- $id =~ s/[:\[].*//;
- $id =~ s/^\s*\**(\S+)\s*/$1/;
- foreach my $arg (split /;/, $content) {
- next if ($arg =~ m/^\s*$/);
- if ($arg =~ m/^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)/) {
- # pointer-to-function
- my $type = $1;
- my $name = $2;
- my $extra = $3;
- next if (!$name);
- if ($id =~ m/^\s*$/) {
- # anonymous struct/union
- $newmember .= "$type$name$extra; ";
- } else {
- $newmember .= "$type$id.$name$extra; ";
- }
- } else {
- my $type;
- my $names;
- $arg =~ s/^\s+//;
- $arg =~ s/\s+$//;
- # Handle bitmaps
- $arg =~ s/:\s*\d+\s*//g;
- # Handle arrays
- $arg =~ s/\[.*\]//g;
- # The type may have multiple words,
- # and multiple IDs can be defined, like:
- # const struct foo, *bar, foobar
- # So, we remove spaces when parsing the
- # names, in order to match just names
- # and commas for the names
- $arg =~ s/\s*,\s*/,/g;
- if ($arg =~ m/(.*)\s+([\S+,]+)/) {
- $type = $1;
- $names = $2;
- } else {
- $newmember .= "$arg; ";
- next;
- }
- foreach my $name (split /,/, $names) {
- $name =~ s/^\s*\**(\S+)\s*/$1/;
- next if (($name =~ m/^\s*$/));
- if ($id =~ m/^\s*$/) {
- # anonymous struct/union
- $newmember .= "$type $name; ";
- } else {
- $newmember .= "$type $id.$name; ";
- }
- }
- }
- }
- }
- $members =~ s/$struct_members/$newmember/;
- }
-
- # Ignore other nested elements, like enums
- $members =~ s/(\{[^\{\}]*\})//g;
-
- create_parameterlist($members, ';', $file, $declaration_name);
- check_sections($file, $declaration_name, $decl_type, $sectcheck, $struct_actual);
-
- # Adjust declaration for better display
- $declaration =~ s/([\{;])/$1\n/g;
- $declaration =~ s/\}\s+;/};/g;
- # Better handle inlined enums
- do {} while ($declaration =~ s/(enum\s+\{[^\}]+),([^\n])/$1,\n$2/);
-
- my @def_args = split /\n/, $declaration;
- my $level = 1;
- $declaration = "";
- foreach my $clause (@def_args) {
- $clause =~ s/^\s+//;
- $clause =~ s/\s+$//;
- $clause =~ s/\s+/ /;
- next if (!$clause);
- $level-- if ($clause =~ m/(\})/ && $level > 1);
- if (!($clause =~ m/^\s*#/)) {
- $declaration .= "\t" x $level;
- }
- $declaration .= "\t" . $clause . "\n";
- $level++ if ($clause =~ m/(\{)/ && !($clause =~m/\}/));
- }
- output_declaration($declaration_name,
- 'struct',
- {'struct' => $declaration_name,
- 'module' => $modulename,
- 'definition' => $declaration,
- 'parameterlist' => \@parameterlist,
- 'parameterdescs' => \%parameterdescs,
- 'parametertypes' => \%parametertypes,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose,
- 'type' => $decl_type
- });
- } else {
- print STDERR "${file}:$.: error: Cannot parse struct or union!\n";
- ++$errors;
- }
-}
-
-
-sub show_warnings($$) {
- my $functype = shift;
- my $name = shift;
-
- return 0 if (defined($nosymbol_table{$name}));
-
- return 1 if ($output_selection == OUTPUT_ALL);
-
- if ($output_selection == OUTPUT_EXPORTED) {
- if (defined($function_table{$name})) {
- return 1;
- } else {
- return 0;
- }
- }
- if ($output_selection == OUTPUT_INTERNAL) {
- if (!($functype eq "function" && defined($function_table{$name}))) {
- return 1;
- } else {
- return 0;
- }
- }
- if ($output_selection == OUTPUT_INCLUDE) {
- if (defined($function_table{$name})) {
- return 1;
- } else {
- return 0;
- }
- }
- die("Please add the new output type at show_warnings()");
-}
-
-sub dump_enum($$) {
- my $x = shift;
- my $file = shift;
- my $members;
-
- # ignore members marked private:
- $x =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi;
- $x =~ s/\/\*\s*private:.*}/}/gosi;
-
- $x =~ s@/\*.*?\*/@@gos; # strip comments.
- # strip #define macros inside enums
- $x =~ s@#\s*((define|ifdef|if)\s+|endif)[^;]*;@@gos;
-
- if ($x =~ /typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;/) {
- $declaration_name = $2;
- $members = $1;
- } elsif ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) {
- $declaration_name = $1;
- $members = $2;
- }
-
- if ($members) {
- if ($identifier ne $declaration_name) {
- if ($identifier eq "") {
- emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n");
- } else {
- emit_warning("${file}:$.", "expecting prototype for enum $identifier. Prototype was for enum $declaration_name instead\n");
- }
- return;
- }
- $declaration_name = "(anonymous)" if ($declaration_name eq "");
-
- my %_members;
-
- $members =~ s/\s+$//;
- $members =~ s/\([^;]*?[\)]//g;
-
- foreach my $arg (split ',', $members) {
- $arg =~ s/^\s*(\w+).*/$1/;
- push @parameterlist, $arg;
- if (!$parameterdescs{$arg}) {
- $parameterdescs{$arg} = $undescribed;
- if (show_warnings("enum", $declaration_name)) {
- emit_warning("${file}:$.", "Enum value '$arg' not described in enum '$declaration_name'\n");
- }
- }
- $_members{$arg} = 1;
- }
-
- while (my ($k, $v) = each %parameterdescs) {
- if (!exists($_members{$k})) {
- if (show_warnings("enum", $declaration_name)) {
- emit_warning("${file}:$.", "Excess enum value '$k' description in '$declaration_name'\n");
- }
- }
- }
-
- output_declaration($declaration_name,
- 'enum',
- {'enum' => $declaration_name,
- 'module' => $modulename,
- 'parameterlist' => \@parameterlist,
- 'parameterdescs' => \%parameterdescs,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose
- });
- } else {
- print STDERR "${file}:$.: error: Cannot parse enum!\n";
- ++$errors;
- }
-}
-
-my $typedef_type = qr { ((?:\s+[\w\*]+\b){1,8})\s* }x;
-my $typedef_ident = qr { \*?\s*(\w\S+)\s* }x;
-my $typedef_args = qr { \s*\((.*)\); }x;
-
-my $typedef1 = qr { typedef$typedef_type\($typedef_ident\)$typedef_args }x;
-my $typedef2 = qr { typedef$typedef_type$typedef_ident$typedef_args }x;
-
-sub dump_typedef($$) {
- my $x = shift;
- my $file = shift;
-
- $x =~ s@/\*.*?\*/@@gos; # strip comments.
-
- # Parse function typedef prototypes
- if ($x =~ $typedef1 || $x =~ $typedef2) {
- $return_type = $1;
- $declaration_name = $2;
- my $args = $3;
- $return_type =~ s/^\s+//;
-
- if ($identifier ne $declaration_name) {
- emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n");
- return;
- }
-
- create_parameterlist($args, ',', $file, $declaration_name);
-
- output_declaration($declaration_name,
- 'function',
- {'function' => $declaration_name,
- 'typedef' => 1,
- 'module' => $modulename,
- 'functiontype' => $return_type,
- 'parameterlist' => \@parameterlist,
- 'parameterdescs' => \%parameterdescs,
- 'parametertypes' => \%parametertypes,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose
- });
- return;
- }
-
- while (($x =~ /\(*.\)\s*;$/) || ($x =~ /\[*.\]\s*;$/)) {
- $x =~ s/\(*.\)\s*;$/;/;
- $x =~ s/\[*.\]\s*;$/;/;
- }
-
- if ($x =~ /typedef.*\s+(\w+)\s*;/) {
- $declaration_name = $1;
-
- if ($identifier ne $declaration_name) {
- emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n");
- return;
- }
-
- output_declaration($declaration_name,
- 'typedef',
- {'typedef' => $declaration_name,
- 'module' => $modulename,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose
- });
- } else {
- print STDERR "${file}:$.: error: Cannot parse typedef!\n";
- ++$errors;
- }
-}
-
-sub save_struct_actual($) {
- my $actual = shift;
-
- # strip all spaces from the actual param so that it looks like one string item
- $actual =~ s/\s*//g;
- $struct_actual = $struct_actual . $actual . " ";
-}
-
-sub create_parameterlist($$$$) {
- my $args = shift;
- my $splitter = shift;
- my $file = shift;
- my $declaration_name = shift;
- my $type;
- my $param;
-
- # temporarily replace commas inside function pointer definition
- my $arg_expr = qr{\([^\),]+};
- while ($args =~ /$arg_expr,/) {
- $args =~ s/($arg_expr),/$1#/g;
- }
-
- foreach my $arg (split($splitter, $args)) {
- # strip comments
- $arg =~ s/\/\*.*\*\///;
- # ignore argument attributes
- $arg =~ s/\sPOS0?\s/ /;
- # strip leading/trailing spaces
- $arg =~ s/^\s*//;
- $arg =~ s/\s*$//;
- $arg =~ s/\s+/ /;
-
- if ($arg =~ /^#/) {
- # Treat preprocessor directive as a typeless variable just to fill
- # corresponding data structures "correctly". Catch it later in
- # output_* subs.
- push_parameter($arg, "", "", $file);
- } elsif ($arg =~ m/\(.+\)\s*\(/) {
- # pointer-to-function
- $arg =~ tr/#/,/;
- $arg =~ m/[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)/;
- $param = $1;
- $type = $arg;
- $type =~ s/([^\(]+\(\*?)\s*$param/$1/;
- save_struct_actual($param);
- push_parameter($param, $type, $arg, $file, $declaration_name);
- } elsif ($arg =~ m/\(.+\)\s*\[/) {
- # array-of-pointers
- $arg =~ tr/#/,/;
- $arg =~ m/[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)/;
- $param = $1;
- $type = $arg;
- $type =~ s/([^\(]+\(\*?)\s*$param/$1/;
- save_struct_actual($param);
- push_parameter($param, $type, $arg, $file, $declaration_name);
- } elsif ($arg) {
- $arg =~ s/\s*:\s*/:/g;
- $arg =~ s/\s*\[/\[/g;
-
- my @args = split('\s*,\s*', $arg);
- if ($args[0] =~ m/\*/) {
- $args[0] =~ s/(\*+)\s*/ $1/;
- }
-
- my @first_arg;
- if ($args[0] =~ /^(.*\s+)(.*?\[.*\].*)$/) {
- shift @args;
- push(@first_arg, split('\s+', $1));
- push(@first_arg, $2);
- } else {
- @first_arg = split('\s+', shift @args);
- }
-
- unshift(@args, pop @first_arg);
- $type = join " ", @first_arg;
-
- foreach $param (@args) {
- if ($param =~ m/^(\*+)\s*(.*)/) {
- save_struct_actual($2);
-
- push_parameter($2, "$type $1", $arg, $file, $declaration_name);
- } elsif ($param =~ m/(.*?):(\w+)/) {
- if ($type ne "") { # skip unnamed bit-fields
- save_struct_actual($1);
- push_parameter($1, "$type:$2", $arg, $file, $declaration_name)
- }
- } else {
- save_struct_actual($param);
- push_parameter($param, $type, $arg, $file, $declaration_name);
- }
- }
- }
- }
-}
-
-sub push_parameter($$$$$) {
- my $param = shift;
- my $type = shift;
- my $org_arg = shift;
- my $file = shift;
- my $declaration_name = shift;
-
- if (($anon_struct_union == 1) && ($type eq "") &&
- ($param eq "}")) {
- return; # ignore the ending }; from anon. struct/union
- }
-
- $anon_struct_union = 0;
- $param =~ s/[\[\)].*//;
-
- if ($type eq "" && $param =~ /\.\.\.$/)
- {
- if (!$param =~ /\w\.\.\.$/) {
- # handles unnamed variable parameters
- $param = "...";
- } elsif ($param =~ /\w\.\.\.$/) {
- # for named variable parameters of the form `x...`, remove the dots
- $param =~ s/\.\.\.$//;
- }
- if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") {
- $parameterdescs{$param} = "variable arguments";
- }
- }
- elsif ($type eq "" && ($param eq "" or $param eq "void"))
- {
- $param="void";
- $parameterdescs{void} = "no arguments";
- }
- elsif ($type eq "" && ($param eq "struct" or $param eq "union"))
- # handle unnamed (anonymous) union or struct:
- {
- $type = $param;
- $param = "{unnamed_" . $param . "}";
- $parameterdescs{$param} = "anonymous\n";
- $anon_struct_union = 1;
- }
- elsif ($param =~ "__cacheline_group" )
- # handle cache group enforcing variables: they do not need be described in header files
- {
- return; # ignore __cacheline_group_begin and __cacheline_group_end
- }
-
- # warn if parameter has no description
- # (but ignore ones starting with # as these are not parameters
- # but inline preprocessor statements);
- # Note: It will also ignore void params and unnamed structs/unions
- if (!defined $parameterdescs{$param} && $param !~ /^#/) {
- $parameterdescs{$param} = $undescribed;
-
- if (show_warnings($type, $declaration_name) && $param !~ /\./) {
- emit_warning("${file}:$.", "Function parameter or struct member '$param' not described in '$declaration_name'\n");
- }
- }
-
- # strip spaces from $param so that it is one continuous string
- # on @parameterlist;
- # this fixes a problem where check_sections() cannot find
- # a parameter like "addr[6 + 2]" because it actually appears
- # as "addr[6", "+", "2]" on the parameter list;
- # but it's better to maintain the param string unchanged for output,
- # so just weaken the string compare in check_sections() to ignore
- # "[blah" in a parameter string;
- ###$param =~ s/\s*//g;
- push @parameterlist, $param;
- $org_arg =~ s/\s\s+/ /g;
- $parametertypes{$param} = $org_arg;
-}
-
-sub check_sections($$$$$) {
- my ($file, $decl_name, $decl_type, $sectcheck, $prmscheck) = @_;
- my @sects = split ' ', $sectcheck;
- my @prms = split ' ', $prmscheck;
- my $err;
- my ($px, $sx);
- my $prm_clean; # strip trailing "[array size]" and/or beginning "*"
-
- foreach $sx (0 .. $#sects) {
- $err = 1;
- foreach $px (0 .. $#prms) {
- $prm_clean = $prms[$px];
- $prm_clean =~ s/\[.*\]//;
- $prm_clean =~ s/$attribute//i;
- # ignore array size in a parameter string;
- # however, the original param string may contain
- # spaces, e.g.: addr[6 + 2]
- # and this appears in @prms as "addr[6" since the
- # parameter list is split at spaces;
- # hence just ignore "[..." for the sections check;
- $prm_clean =~ s/\[.*//;
-
- ##$prm_clean =~ s/^\**//;
- if ($prm_clean eq $sects[$sx]) {
- $err = 0;
- last;
- }
- }
- if ($err) {
- if ($decl_type eq "function") {
- emit_warning("${file}:$.",
- "Excess function parameter " .
- "'$sects[$sx]' " .
- "description in '$decl_name'\n");
- } elsif (($decl_type eq "struct") or
- ($decl_type eq "union")) {
- emit_warning("${file}:$.",
- "Excess $decl_type member " .
- "'$sects[$sx]' " .
- "description in '$decl_name'\n");
- }
- }
- }
-}
-
-##
-# Checks the section describing the return value of a function.
-sub check_return_section {
- my $file = shift;
- my $declaration_name = shift;
- my $return_type = shift;
-
- # Ignore an empty return type (It's a macro)
- # Ignore functions with a "void" return type. (But don't ignore "void *")
- if (($return_type eq "") || ($return_type =~ /void\s*\w*\s*$/)) {
- return;
- }
-
- if (!defined($sections{$section_return}) ||
- $sections{$section_return} eq "")
- {
- emit_warning("${file}:$.",
- "No description found for return value of " .
- "'$declaration_name'\n");
- }
-}
-
-##
-# takes a function prototype and the name of the current file being
-# processed and spits out all the details stored in the global
-# arrays/hashes.
-sub dump_function($$) {
- my $prototype = shift;
- my $file = shift;
- my $func_macro = 0;
-
- print_lineno($new_start_line);
-
- $prototype =~ s/^static +//;
- $prototype =~ s/^extern +//;
- $prototype =~ s/^asmlinkage +//;
- $prototype =~ s/^inline +//;
- $prototype =~ s/^__inline__ +//;
- $prototype =~ s/^__inline +//;
- $prototype =~ s/^__always_inline +//;
- $prototype =~ s/^noinline +//;
- $prototype =~ s/^__FORTIFY_INLINE +//;
- $prototype =~ s/__init +//;
- $prototype =~ s/__init_or_module +//;
- $prototype =~ s/__deprecated +//;
- $prototype =~ s/__flatten +//;
- $prototype =~ s/__meminit +//;
- $prototype =~ s/__must_check +//;
- $prototype =~ s/__weak +//;
- $prototype =~ s/__sched +//;
- $prototype =~ s/_noprof//;
- $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//;
- $prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//;
- $prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//;
- $prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/;
- my $define = $prototype =~ s/^#\s*define\s+//; #ak added
- $prototype =~ s/__attribute_const__ +//;
- $prototype =~ s/__attribute__\s*\(\(
- (?:
- [\w\s]++ # attribute name
- (?:\([^)]*+\))? # attribute arguments
- \s*+,? # optional comma at the end
- )+
- \)\)\s+//x;
-
- # Yes, this truly is vile. We are looking for:
- # 1. Return type (may be nothing if we're looking at a macro)
- # 2. Function name
- # 3. Function parameters.
- #
- # All the while we have to watch out for function pointer parameters
- # (which IIRC is what the two sections are for), C types (these
- # regexps don't even start to express all the possibilities), and
- # so on.
- #
- # If you mess with these regexps, it's a good idea to check that
- # the following functions' documentation still comes out right:
- # - parport_register_device (function pointer parameters)
- # - atomic_set (macro)
- # - pci_match_device, __copy_to_user (long return type)
- my $name = qr{[a-zA-Z0-9_~:]+};
- my $prototype_end1 = qr{[^\(]*};
- my $prototype_end2 = qr{[^\{]*};
- my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)};
- my $type1 = qr{[\w\s]+};
- my $type2 = qr{$type1\*+};
-
- if ($define && $prototype =~ m/^()($name)\s+/) {
- # This is an object-like macro, it has no return type and no parameter
- # list.
- # Function-like macros are not allowed to have spaces between
- # declaration_name and opening parenthesis (notice the \s+).
- $return_type = $1;
- $declaration_name = $2;
- $func_macro = 1;
- } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ ||
- $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ ||
- $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) {
- $return_type = $1;
- $declaration_name = $2;
- my $args = $3;
-
- create_parameterlist($args, ',', $file, $declaration_name);
- } else {
- emit_warning("${file}:$.", "cannot understand function prototype: '$prototype'\n");
- return;
- }
-
- if ($identifier ne $declaration_name) {
- emit_warning("${file}:$.", "expecting prototype for $identifier(). Prototype was for $declaration_name() instead\n");
- return;
- }
-
- my $prms = join " ", @parameterlist;
- check_sections($file, $declaration_name, "function", $sectcheck, $prms);
-
- # This check emits a lot of warnings at the moment, because many
- # functions don't have a 'Return' doc section. So until the number
- # of warnings goes sufficiently down, the check is only performed in
- # -Wreturn mode.
- # TODO: always perform the check.
- if ($Wreturn && !$func_macro) {
- check_return_section($file, $declaration_name, $return_type);
- }
-
- # The function parser can be called with a typedef parameter.
- # Handle it.
- if ($return_type =~ /typedef/) {
- output_declaration($declaration_name,
- 'function',
- {'function' => $declaration_name,
- 'typedef' => 1,
- 'module' => $modulename,
- 'functiontype' => $return_type,
- 'parameterlist' => \@parameterlist,
- 'parameterdescs' => \%parameterdescs,
- 'parametertypes' => \%parametertypes,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose,
- 'func_macro' => $func_macro
- });
- } else {
- output_declaration($declaration_name,
- 'function',
- {'function' => $declaration_name,
- 'module' => $modulename,
- 'functiontype' => $return_type,
- 'parameterlist' => \@parameterlist,
- 'parameterdescs' => \%parameterdescs,
- 'parametertypes' => \%parametertypes,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose,
- 'func_macro' => $func_macro
- });
- }
-}
-
-sub reset_state {
- $function = "";
- %parameterdescs = ();
- %parametertypes = ();
- @parameterlist = ();
- %sections = ();
- @sectionlist = ();
- $sectcheck = "";
- $struct_actual = "";
- $prototype = "";
-
- $state = STATE_NORMAL;
- $inline_doc_state = STATE_INLINE_NA;
-}
-
-sub tracepoint_munge($) {
- my $file = shift;
- my $tracepointname = 0;
- my $tracepointargs = 0;
-
- if ($prototype =~ m/TRACE_EVENT\((.*?),/) {
- $tracepointname = $1;
- }
- if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) {
- $tracepointname = $1;
- }
- if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) {
- $tracepointname = $2;
- }
- $tracepointname =~ s/^\s+//; #strip leading whitespace
- if ($prototype =~ m/TP_PROTO\((.*?)\)/) {
- $tracepointargs = $1;
- }
- if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
- emit_warning("${file}:$.", "Unrecognized tracepoint format: \n".
- "$prototype\n");
- } else {
- $prototype = "static inline void trace_$tracepointname($tracepointargs)";
- $identifier = "trace_$identifier";
- }
-}
-
-sub syscall_munge() {
- my $void = 0;
-
- $prototype =~ s@[\r\n]+@ @gos; # strip newlines/CR's
-## if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) {
- if ($prototype =~ m/SYSCALL_DEFINE0/) {
- $void = 1;
-## $prototype = "long sys_$1(void)";
- }
-
- $prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name
- if ($prototype =~ m/long (sys_.*?),/) {
- $prototype =~ s/,/\(/;
- } elsif ($void) {
- $prototype =~ s/\)/\(void\)/;
- }
-
- # now delete all of the odd-number commas in $prototype
- # so that arg types & arg names don't have a comma between them
- my $count = 0;
- my $len = length($prototype);
- if ($void) {
- $len = 0; # skip the for-loop
- }
- for (my $ix = 0; $ix < $len; $ix++) {
- if (substr($prototype, $ix, 1) eq ',') {
- $count++;
- if ($count % 2 == 1) {
- substr($prototype, $ix, 1) = ' ';
- }
- }
- }
-}
-
-sub process_proto_function($$) {
- my $x = shift;
- my $file = shift;
-
- $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line
-
- if ($x =~ /^#/ && $x !~ /^#\s*define/) {
- # do nothing
- } elsif ($x =~ /([^\{]*)/) {
- $prototype .= $1;
- }
-
- if (($x =~ /\{/) || ($x =~ /\#\s*define/) || ($x =~ /;/)) {
- $prototype =~ s@/\*.*?\*/@@gos; # strip comments.
- $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's.
- $prototype =~ s@^\s+@@gos; # strip leading spaces
-
- # Handle prototypes for function pointers like:
- # int (*pcs_config)(struct foo)
- $prototype =~ s@^(\S+\s+)\(\s*\*(\S+)\)@$1$2@gos;
-
- if ($prototype =~ /SYSCALL_DEFINE/) {
- syscall_munge();
- }
- if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ ||
- $prototype =~ /DEFINE_SINGLE_EVENT/)
- {
- tracepoint_munge($file);
- }
- dump_function($prototype, $file);
- reset_state();
- }
-}
-
-sub process_proto_type($$) {
- my $x = shift;
- my $file = shift;
-
- $x =~ s@[\r\n]+@ @gos; # strip newlines/cr's.
- $x =~ s@^\s+@@gos; # strip leading spaces
- $x =~ s@\s+$@@gos; # strip trailing spaces
- $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line
-
- if ($x =~ /^#/) {
- # To distinguish preprocessor directive from regular declaration later.
- $x .= ";";
- }
-
- while (1) {
- if ( $x =~ /([^\{\};]*)([\{\};])(.*)/ ) {
- if( length $prototype ) {
- $prototype .= " "
- }
- $prototype .= $1 . $2;
- ($2 eq '{') && $brcount++;
- ($2 eq '}') && $brcount--;
- if (($2 eq ';') && ($brcount == 0)) {
- dump_declaration($prototype, $file);
- reset_state();
- last;
- }
- $x = $3;
- } else {
- $prototype .= $x;
- last;
- }
- }
-}
-
-
-sub map_filename($) {
- my $file;
- my ($orig_file) = @_;
-
- if (defined($ENV{'SRCTREE'})) {
- $file = "$ENV{'SRCTREE'}" . "/" . $orig_file;
- } else {
- $file = $orig_file;
- }
-
- return $file;
-}
-
-sub process_export_file($) {
- my ($orig_file) = @_;
- my $file = map_filename($orig_file);
-
- if (!open(IN,"<$file")) {
- print STDERR "Error: Cannot open file $file\n";
- ++$errors;
- return;
- }
-
- while (<IN>) {
- if (/$export_symbol/) {
- next if (defined($nosymbol_table{$2}));
- $function_table{$2} = 1;
- }
- if (/$export_symbol_ns/) {
- next if (defined($nosymbol_table{$2}));
- $function_table{$2} = 1;
- }
- }
-
- close(IN);
-}
-
-#
-# Parsers for the various processing states.
-#
-# STATE_NORMAL: looking for the /** to begin everything.
-#
-sub process_normal() {
- if (/$doc_start/o) {
- $state = STATE_NAME; # next line is always the function name
- $declaration_start_line = $. + 1;
- }
-}
-
-#
-# STATE_NAME: Looking for the "name - description" line
-#
-sub process_name($$) {
- my $file = shift;
- my $descr;
-
- if (/$doc_block/o) {
- $state = STATE_DOCBLOCK;
- $contents = "";
- $new_start_line = $.;
-
- if ( $1 eq "" ) {
- $section = $section_intro;
- } else {
- $section = $1;
- }
- } elsif (/$doc_decl/o) {
- $identifier = $1;
- my $is_kernel_comment = 0;
- my $decl_start = qr{$doc_com};
- # test for pointer declaration type, foo * bar() - desc
- my $fn_type = qr{\w+\s*\*\s*};
- my $parenthesis = qr{\(\w*\)};
- my $decl_end = qr{[-:].*};
- if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
- $identifier = $1;
- }
- if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) {
- $decl_type = $1;
- $identifier = $2;
- $is_kernel_comment = 1;
- }
- # Look for foo() or static void foo() - description; or misspelt
- # identifier
- elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
- /^$decl_start$fn_type?(\w+[^-:]*)$parenthesis?\s*$decl_end$/) {
- $identifier = $1;
- $decl_type = 'function';
- $identifier =~ s/^define\s+//;
- $is_kernel_comment = 1;
- }
- $identifier =~ s/\s+$//;
-
- $state = STATE_BODY;
- # if there's no @param blocks need to set up default section
- # here
- $contents = "";
- $section = $section_default;
- $new_start_line = $. + 1;
- if (/[-:](.*)/) {
- # strip leading/trailing/multiple spaces
- $descr= $1;
- $descr =~ s/^\s*//;
- $descr =~ s/\s*$//;
- $descr =~ s/\s+/ /g;
- $declaration_purpose = $descr;
- $state = STATE_BODY_MAYBE;
- } else {
- $declaration_purpose = "";
- }
-
- if (!$is_kernel_comment) {
- emit_warning("${file}:$.", "This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n$_");
- $state = STATE_NORMAL;
- }
-
- if (($declaration_purpose eq "") && $Wshort_desc) {
- emit_warning("${file}:$.", "missing initial short description on line:\n$_");
- }
-
- if ($identifier eq "" && $decl_type ne "enum") {
- emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n$_");
- $state = STATE_NORMAL;
- }
-
- if ($verbose) {
- print STDERR "${file}:$.: info: Scanning doc for $decl_type $identifier\n";
- }
- } else {
- emit_warning("${file}:$.", "Cannot understand $_ on line $. - I thought it was a doc line\n");
- $state = STATE_NORMAL;
- }
-}
-
-
-#
-# STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment.
-#
-sub process_body($$) {
- my $file = shift;
-
- if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) {
- dump_section($file, $section, $contents);
- $section = $section_default;
- $new_start_line = $.;
- $contents = "";
- }
-
- if (/$doc_sect/i) { # case insensitive for supported section names
- $newsection = $1;
- $newcontents = $2;
-
- # map the supported section names to the canonical names
- if ($newsection =~ m/^description$/i) {
- $newsection = $section_default;
- } elsif ($newsection =~ m/^context$/i) {
- $newsection = $section_context;
- } elsif ($newsection =~ m/^returns?$/i) {
- $newsection = $section_return;
- } elsif ($newsection =~ m/^\@return$/) {
- # special: @return is a section, not a param description
- $newsection = $section_return;
- }
-
- if (($contents ne "") && ($contents ne "\n")) {
- dump_section($file, $section, $contents);
- $section = $section_default;
- }
-
- $state = STATE_BODY;
- $contents = $newcontents;
- $new_start_line = $.;
- while (substr($contents, 0, 1) eq " ") {
- $contents = substr($contents, 1);
- }
- if ($contents ne "") {
- $contents .= "\n";
- }
- $section = $newsection;
- $leading_space = undef;
- } elsif (/$doc_end/) {
- if (($contents ne "") && ($contents ne "\n")) {
- dump_section($file, $section, $contents);
- $section = $section_default;
- $contents = "";
- }
- # look for doc_com + <text> + doc_end:
- if ($_ =~ m'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') {
- emit_warning("${file}:$.", "suspicious ending line: $_");
- }
-
- $prototype = "";
- $state = STATE_PROTO;
- $brcount = 0;
- $new_start_line = $. + 1;
- } elsif (/$doc_content/) {
- if ($1 eq "") {
- if ($section eq $section_context) {
- dump_section($file, $section, $contents);
- $section = $section_default;
- $contents = "";
- $new_start_line = $.;
- $state = STATE_BODY;
- } else {
- if ($section ne $section_default) {
- $state = STATE_BODY_WITH_BLANK_LINE;
- } else {
- $state = STATE_BODY;
- }
- $contents .= "\n";
- }
- } elsif ($state == STATE_BODY_MAYBE) {
- # Continued declaration purpose
- chomp($declaration_purpose);
- $declaration_purpose .= " " . $1;
- $declaration_purpose =~ s/\s+/ /g;
- } else {
- my $cont = $1;
- if ($section =~ m/^@/ || $section eq $section_context) {
- if (!defined $leading_space) {
- if ($cont =~ m/^(\s+)/) {
- $leading_space = $1;
- } else {
- $leading_space = "";
- }
- }
- $cont =~ s/^$leading_space//;
- }
- $contents .= $cont . "\n";
- }
- } else {
- # i dont know - bad line? ignore.
- emit_warning("${file}:$.", "bad line: $_");
- }
-}
-
-
-#
-# STATE_PROTO: reading a function/whatever prototype.
-#
-sub process_proto($$) {
- my $file = shift;
-
- if (/$doc_inline_oneline/) {
- $section = $1;
- $contents = $2;
- if ($contents ne "") {
- $contents .= "\n";
- dump_section($file, $section, $contents);
- $section = $section_default;
- $contents = "";
- }
- } elsif (/$doc_inline_start/) {
- $state = STATE_INLINE;
- $inline_doc_state = STATE_INLINE_NAME;
- } elsif ($decl_type eq 'function') {
- process_proto_function($_, $file);
- } else {
- process_proto_type($_, $file);
- }
-}
-
-#
-# STATE_DOCBLOCK: within a DOC: block.
-#
-sub process_docblock($$) {
- my $file = shift;
-
- if (/$doc_end/) {
- dump_doc_section($file, $section, $contents);
- $section = $section_default;
- $contents = "";
- $function = "";
- %parameterdescs = ();
- %parametertypes = ();
- @parameterlist = ();
- %sections = ();
- @sectionlist = ();
- $prototype = "";
- $state = STATE_NORMAL;
- } elsif (/$doc_content/) {
- if ( $1 eq "" ) {
- $contents .= $blankline;
- } else {
- $contents .= $1 . "\n";
- }
- }
-}
-
-#
-# STATE_INLINE: docbook comments within a prototype.
-#
-sub process_inline($$) {
- my $file = shift;
-
- # First line (state 1) needs to be a @parameter
- if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) {
- $section = $1;
- $contents = $2;
- $new_start_line = $.;
- if ($contents ne "") {
- while (substr($contents, 0, 1) eq " ") {
- $contents = substr($contents, 1);
- }
- $contents .= "\n";
- }
- $inline_doc_state = STATE_INLINE_TEXT;
- # Documentation block end */
- } elsif (/$doc_inline_end/) {
- if (($contents ne "") && ($contents ne "\n")) {
- dump_section($file, $section, $contents);
- $section = $section_default;
- $contents = "";
- }
- $state = STATE_PROTO;
- $inline_doc_state = STATE_INLINE_NA;
- # Regular text
- } elsif (/$doc_content/) {
- if ($inline_doc_state == STATE_INLINE_TEXT) {
- $contents .= $1 . "\n";
- # nuke leading blank lines
- if ($contents =~ /^\s*$/) {
- $contents = "";
- }
- } elsif ($inline_doc_state == STATE_INLINE_NAME) {
- $inline_doc_state = STATE_INLINE_ERROR;
- emit_warning("${file}:$.", "Incorrect use of kernel-doc format: $_");
- }
- }
-}
-
-
-sub process_file($) {
- my $file;
- my ($orig_file) = @_;
-
- $file = map_filename($orig_file);
-
- if (!open(IN_FILE,"<$file")) {
- print STDERR "Error: Cannot open file $file\n";
- ++$errors;
- return;
- }
-
- $. = 1;
-
- $section_counter = 0;
- while (<IN_FILE>) {
- while (!/^ \*/ && s/\\\s*$//) {
- $_ .= <IN_FILE>;
- }
- # Replace tabs by spaces
- while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {};
- # Hand this line to the appropriate state handler
- if ($state == STATE_NORMAL) {
- process_normal();
- } elsif ($state == STATE_NAME) {
- process_name($file, $_);
- } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE ||
- $state == STATE_BODY_WITH_BLANK_LINE) {
- process_body($file, $_);
- } elsif ($state == STATE_INLINE) { # scanning for inline parameters
- process_inline($file, $_);
- } elsif ($state == STATE_PROTO) {
- process_proto($file, $_);
- } elsif ($state == STATE_DOCBLOCK) {
- process_docblock($file, $_);
- }
- }
-
- # Make sure we got something interesting.
- if (!$section_counter && $output_mode ne "none") {
- if ($output_selection == OUTPUT_INCLUDE) {
- emit_warning("${file}:1", "'$_' not found\n")
- for keys %function_table;
- } else {
- emit_warning("${file}:1", "no structured comments found\n");
- }
- }
- close IN_FILE;
-}
-
-$kernelversion = get_kernel_version();
-
-# generate a sequence of code that will splice in highlighting information
-# using the s// operator.
-for (my $k = 0; $k < @highlights; $k++) {
- my $pattern = $highlights[$k][0];
- my $result = $highlights[$k][1];
-# print STDERR "scanning pattern:$pattern, highlight:($result)\n";
- $dohighlight .= "\$contents =~ s:$pattern:$result:gs;\n";
-}
-
-if ($output_selection == OUTPUT_EXPORTED ||
- $output_selection == OUTPUT_INTERNAL) {
-
- push(@export_file_list, @ARGV);
-
- foreach (@export_file_list) {
- chomp;
- process_export_file($_);
- }
-}
-
-foreach (@ARGV) {
- chomp;
- process_file($_);
-}
-if ($verbose && $errors) {
- print STDERR "$errors errors\n";
-}
-if ($verbose && $warnings) {
- print STDERR "$warnings warnings\n";
-}
-
-if ($Werror && $warnings) {
- print STDERR "$warnings warnings as Errors\n";
- exit($warnings);
-} else {
- exit($output_mode eq "none" ? 0 : $errors)
-}
-
-__END__
-
-=head1 OPTIONS
-
-=head2 Output format selection (mutually exclusive):
-
-=over 8
-
-=item -man
-
-Output troff manual page format.
-
-=item -rst
-
-Output reStructuredText format. This is the default.
-
-=item -none
-
-Do not output documentation, only warnings.
-
-=back
-
-=head2 Output format modifiers
-
-=head3 reStructuredText only
-
-=head2 Output selection (mutually exclusive):
-
-=over 8
-
-=item -export
-
-Only output documentation for the symbols that have been exported using
-EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE.
-
-=item -internal
-
-Only output documentation for the symbols that have NOT been exported using
-EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE.
-
-=item -function NAME
-
-Only output documentation for the given function or DOC: section title.
-All other functions and DOC: sections are ignored.
-
-May be specified multiple times.
-
-=item -nosymbol NAME
-
-Exclude the specified symbol from the output documentation.
-
-May be specified multiple times.
-
-=back
-
-=head2 Output selection modifiers:
-
-=over 8
-
-=item -no-doc-sections
-
-Do not output DOC: sections.
-
-=item -export-file FILE
-
-Specify an additional FILE in which to look for EXPORT_SYMBOL information.
-
-To be used with -export or -internal.
-
-May be specified multiple times.
-
-=back
-
-=head3 reStructuredText only
-
-=over 8
-
-=item -enable-lineno
-
-Enable output of .. LINENO lines.
-
-=back
-
-=head2 Other parameters:
-
-=over 8
-
-=item -h, -help
-
-Print this help.
-
-=item -v
-
-Verbose output, more warnings and other information.
-
-=item -Werror
-
-Treat warnings as errors.
-
-=back
-
-=cut
+kernel-doc.py \ No newline at end of file
diff --git a/scripts/kernel-doc.pl b/scripts/kernel-doc.pl
new file mode 100755
index 000000000000..5db23cbf4eb2
--- /dev/null
+++ b/scripts/kernel-doc.pl
@@ -0,0 +1,2439 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+# vim: softtabstop=4
+
+use warnings;
+use strict;
+
+## Copyright (c) 1998 Michael Zucchi, All Rights Reserved ##
+## Copyright (C) 2000, 1 Tim Waugh <twaugh@redhat.com> ##
+## Copyright (C) 2001 Simon Huggins ##
+## Copyright (C) 2005-2012 Randy Dunlap ##
+## Copyright (C) 2012 Dan Luedtke ##
+## ##
+## #define enhancements by Armin Kuster <akuster@mvista.com> ##
+## Copyright (c) 2000 MontaVista Software, Inc. ##
+#
+# Copyright (C) 2022 Tomasz Warniełło (POD)
+
+use Pod::Usage qw/pod2usage/;
+
+=head1 NAME
+
+kernel-doc - Print formatted kernel documentation to stdout
+
+=head1 SYNOPSIS
+
+ kernel-doc [-h] [-v] [-Werror] [-Wall] [-Wreturn] [-Wshort-desc[ription]] [-Wcontents-before-sections]
+ [ -man |
+ -rst [-enable-lineno] |
+ -none
+ ]
+ [
+ -export |
+ -internal |
+ [-function NAME] ... |
+ [-nosymbol NAME] ...
+ ]
+ [-no-doc-sections]
+ [-export-file FILE] ...
+ FILE ...
+
+Run `kernel-doc -h` for details.
+
+=head1 DESCRIPTION
+
+Read C language source or header FILEs, extract embedded documentation comments,
+and print formatted documentation to standard output.
+
+The documentation comments are identified by the "/**" opening comment mark.
+
+See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
+
+=cut
+
+# more perldoc at the end of the file
+
+## init lots of data
+
+my $errors = 0;
+my $warnings = 0;
+my $anon_struct_union = 0;
+
+# match expressions used to find embedded type information
+my $type_constant = '\b``([^\`]+)``\b';
+my $type_constant2 = '\%([-_*\w]+)';
+my $type_func = '(\w+)\(\)';
+my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
+my $type_param_ref = '([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
+my $type_fp_param = '\@(\w+)\(\)'; # Special RST handling for func ptr params
+my $type_fp_param2 = '\@(\w+->\S+)\(\)'; # Special RST handling for structs with func ptr params
+my $type_env = '(\$\w+)';
+my $type_enum = '\&(enum\s*([_\w]+))';
+my $type_struct = '\&(struct\s*([_\w]+))';
+my $type_typedef = '\&(typedef\s*([_\w]+))';
+my $type_union = '\&(union\s*([_\w]+))';
+my $type_member = '\&([_\w]+)(\.|->)([_\w]+)';
+my $type_fallback = '\&([_\w]+)';
+my $type_member_func = $type_member . '\(\)';
+
+# Output conversion substitutions.
+# One for each output format
+
+# these are pretty rough
+my @highlights_man = (
+ [$type_constant, "\$1"],
+ [$type_constant2, "\$1"],
+ [$type_func, "\\\\fB\$1\\\\fP"],
+ [$type_enum, "\\\\fI\$1\\\\fP"],
+ [$type_struct, "\\\\fI\$1\\\\fP"],
+ [$type_typedef, "\\\\fI\$1\\\\fP"],
+ [$type_union, "\\\\fI\$1\\\\fP"],
+ [$type_param, "\\\\fI\$1\\\\fP"],
+ [$type_param_ref, "\\\\fI\$1\$2\\\\fP"],
+ [$type_member, "\\\\fI\$1\$2\$3\\\\fP"],
+ [$type_fallback, "\\\\fI\$1\\\\fP"]
+ );
+my $blankline_man = "";
+
+# rst-mode
+my @highlights_rst = (
+ [$type_constant, "``\$1``"],
+ [$type_constant2, "``\$1``"],
+
+ # Note: need to escape () to avoid func matching later
+ [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"],
+ [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"],
+ [$type_fp_param, "**\$1\\\\(\\\\)**"],
+ [$type_fp_param2, "**\$1\\\\(\\\\)**"],
+ [$type_func, "\$1()"],
+ [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"],
+ [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"],
+ [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"],
+ [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"],
+
+ # in rst this can refer to any type
+ [$type_fallback, "\\:c\\:type\\:`\$1`"],
+ [$type_param_ref, "**\$1\$2**"]
+ );
+my $blankline_rst = "\n";
+
+# read arguments
+if ($#ARGV == -1) {
+ pod2usage(
+ -message => "No arguments!\n",
+ -exitval => 1,
+ -verbose => 99,
+ -sections => 'SYNOPSIS',
+ -output => \*STDERR,
+ );
+}
+
+my $kernelversion;
+
+my $dohighlight = "";
+
+my $verbose = 0;
+my $Werror = 0;
+my $Wreturn = 0;
+my $Wshort_desc = 0;
+my $output_mode = "rst";
+my $output_preformatted = 0;
+my $no_doc_sections = 0;
+my $enable_lineno = 0;
+my @highlights = @highlights_rst;
+my $blankline = $blankline_rst;
+my $modulename = "Kernel API";
+
+use constant {
+ OUTPUT_ALL => 0, # output all symbols and doc sections
+ OUTPUT_INCLUDE => 1, # output only specified symbols
+ OUTPUT_EXPORTED => 2, # output exported symbols
+ OUTPUT_INTERNAL => 3, # output non-exported symbols
+};
+my $output_selection = OUTPUT_ALL;
+my $show_not_found = 0; # No longer used
+
+my @export_file_list;
+
+my @build_time;
+if (defined($ENV{'KBUILD_BUILD_TIMESTAMP'}) &&
+ (my $seconds = `date -d "${ENV{'KBUILD_BUILD_TIMESTAMP'}}" +%s`) ne '') {
+ @build_time = gmtime($seconds);
+} else {
+ @build_time = localtime;
+}
+
+my $man_date = ('January', 'February', 'March', 'April', 'May', 'June',
+ 'July', 'August', 'September', 'October',
+ 'November', 'December')[$build_time[4]] .
+ " " . ($build_time[5]+1900);
+
+# Essentially these are globals.
+# They probably want to be tidied up, made more localised or something.
+# CAVEAT EMPTOR! Some of the others I localised may not want to be, which
+# could cause "use of undefined value" or other bugs.
+my ($function, %function_table, %parametertypes, $declaration_purpose);
+my %nosymbol_table = ();
+my $declaration_start_line;
+my ($type, $declaration_name, $return_type);
+my ($newsection, $newcontents, $prototype, $brcount);
+
+if (defined($ENV{'KBUILD_VERBOSE'}) && $ENV{'KBUILD_VERBOSE'} =~ '1') {
+ $verbose = 1;
+}
+
+if (defined($ENV{'KCFLAGS'})) {
+ my $kcflags = "$ENV{'KCFLAGS'}";
+
+ if ($kcflags =~ /(\s|^)-Werror(\s|$)/) {
+ $Werror = 1;
+ }
+}
+
+# reading this variable is for backwards compat just in case
+# someone was calling it with the variable from outside the
+# kernel's build system
+if (defined($ENV{'KDOC_WERROR'})) {
+ $Werror = "$ENV{'KDOC_WERROR'}";
+}
+# other environment variables are converted to command-line
+# arguments in cmd_checkdoc in the build system
+
+# Generated docbook code is inserted in a template at a point where
+# docbook v3.1 requires a non-zero sequence of RefEntry's; see:
+# https://www.oasis-open.org/docbook/documentation/reference/html/refentry.html
+# We keep track of number of generated entries and generate a dummy
+# if needs be to ensure the expanded template can be postprocessed
+# into html.
+my $section_counter = 0;
+
+my $lineprefix="";
+
+# Parser states
+use constant {
+ STATE_NORMAL => 0, # normal code
+ STATE_NAME => 1, # looking for function name
+ STATE_BODY_MAYBE => 2, # body - or maybe more description
+ STATE_BODY => 3, # the body of the comment
+ STATE_BODY_WITH_BLANK_LINE => 4, # the body, which has a blank line
+ STATE_PROTO => 5, # scanning prototype
+ STATE_DOCBLOCK => 6, # documentation block
+ STATE_INLINE => 7, # gathering doc outside main block
+};
+my $state;
+my $leading_space;
+
+# Inline documentation state
+use constant {
+ STATE_INLINE_NA => 0, # not applicable ($state != STATE_INLINE)
+ STATE_INLINE_NAME => 1, # looking for member name (@foo:)
+ STATE_INLINE_TEXT => 2, # looking for member documentation
+ STATE_INLINE_END => 3, # done
+ STATE_INLINE_ERROR => 4, # error - Comment without header was found.
+ # Spit a warning as it's not
+ # proper kernel-doc and ignore the rest.
+};
+my $inline_doc_state;
+
+#declaration types: can be
+# 'function', 'struct', 'union', 'enum', 'typedef'
+my $decl_type;
+
+# Name of the kernel-doc identifier for non-DOC markups
+my $identifier;
+
+my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start.
+my $doc_end = '\*/';
+my $doc_com = '\s*\*\s*';
+my $doc_com_body = '\s*\* ?';
+my $doc_decl = $doc_com . '(\w+)';
+# @params and a strictly limited set of supported section names
+# Specifically:
+# Match @word:
+# @...:
+# @{section-name}:
+# while trying to not match literal block starts like "example::"
+#
+my $doc_sect = $doc_com .
+ '\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$';
+my $doc_content = $doc_com_body . '(.*)';
+my $doc_block = $doc_com . 'DOC:\s*(.*)?';
+my $doc_inline_start = '^\s*/\*\*\s*$';
+my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
+my $doc_inline_end = '^\s*\*/\s*$';
+my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
+my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
+my $export_symbol_ns = '^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*;';
+my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
+my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
+
+my %parameterdescs;
+my %parameterdesc_start_lines;
+my @parameterlist;
+my %sections;
+my @sectionlist;
+my %section_start_lines;
+my $sectcheck;
+my $struct_actual;
+
+my $contents = "";
+my $new_start_line = 0;
+
+# the canonical section names. see also $doc_sect above.
+my $section_default = "Description"; # default section
+my $section_intro = "Introduction";
+my $section = $section_default;
+my $section_context = "Context";
+my $section_return = "Return";
+
+my $undescribed = "-- undescribed --";
+
+reset_state();
+
+while ($ARGV[0] =~ m/^--?(.*)/) {
+ my $cmd = $1;
+ shift @ARGV;
+ if ($cmd eq "man") {
+ $output_mode = "man";
+ @highlights = @highlights_man;
+ $blankline = $blankline_man;
+ } elsif ($cmd eq "rst") {
+ $output_mode = "rst";
+ @highlights = @highlights_rst;
+ $blankline = $blankline_rst;
+ } elsif ($cmd eq "none") {
+ $output_mode = "none";
+ } elsif ($cmd eq "module") { # not needed for XML, inherits from calling document
+ $modulename = shift @ARGV;
+ } elsif ($cmd eq "function") { # to only output specific functions
+ $output_selection = OUTPUT_INCLUDE;
+ $function = shift @ARGV;
+ $function_table{$function} = 1;
+ } elsif ($cmd eq "nosymbol") { # Exclude specific symbols
+ my $symbol = shift @ARGV;
+ $nosymbol_table{$symbol} = 1;
+ } elsif ($cmd eq "export") { # only exported symbols
+ $output_selection = OUTPUT_EXPORTED;
+ %function_table = ();
+ } elsif ($cmd eq "internal") { # only non-exported symbols
+ $output_selection = OUTPUT_INTERNAL;
+ %function_table = ();
+ } elsif ($cmd eq "export-file") {
+ my $file = shift @ARGV;
+ push(@export_file_list, $file);
+ } elsif ($cmd eq "v") {
+ $verbose = 1;
+ } elsif ($cmd eq "Werror") {
+ $Werror = 1;
+ } elsif ($cmd eq "Wreturn") {
+ $Wreturn = 1;
+ } elsif ($cmd eq "Wshort-desc" or $cmd eq "Wshort-description") {
+ $Wshort_desc = 1;
+ } elsif ($cmd eq "Wall") {
+ $Wreturn = 1;
+ $Wshort_desc = 1;
+ } elsif (($cmd eq "h") || ($cmd eq "help")) {
+ pod2usage(-exitval => 0, -verbose => 2);
+ } elsif ($cmd eq 'no-doc-sections') {
+ $no_doc_sections = 1;
+ } elsif ($cmd eq 'enable-lineno') {
+ $enable_lineno = 1;
+ } elsif ($cmd eq 'show-not-found') {
+ $show_not_found = 1; # A no-op but don't fail
+ } else {
+ # Unknown argument
+ pod2usage(
+ -message => "Argument unknown!\n",
+ -exitval => 1,
+ -verbose => 99,
+ -sections => 'SYNOPSIS',
+ -output => \*STDERR,
+ );
+ }
+ if ($#ARGV < 0){
+ pod2usage(
+ -message => "FILE argument missing\n",
+ -exitval => 1,
+ -verbose => 99,
+ -sections => 'SYNOPSIS',
+ -output => \*STDERR,
+ );
+ }
+}
+
+# continue execution near EOF;
+
+sub findprog($)
+{
+ foreach(split(/:/, $ENV{PATH})) {
+ return "$_/$_[0]" if(-x "$_/$_[0]");
+ }
+}
+
+# get kernel version from env
+sub get_kernel_version() {
+ my $version = 'unknown kernel version';
+
+ if (defined($ENV{'KERNELVERSION'})) {
+ $version = $ENV{'KERNELVERSION'};
+ }
+ return $version;
+}
+
+#
+sub print_lineno {
+ my $lineno = shift;
+ if ($enable_lineno && defined($lineno)) {
+ print ".. LINENO " . $lineno . "\n";
+ }
+}
+
+sub emit_warning {
+ my $location = shift;
+ my $msg = shift;
+ print STDERR "$location: warning: $msg";
+ ++$warnings;
+}
+##
+# dumps section contents to arrays/hashes intended for that purpose.
+#
+sub dump_section {
+ my $file = shift;
+ my $name = shift;
+ my $contents = join "\n", @_;
+
+ if ($name =~ m/$type_param/) {
+ $name = $1;
+ $parameterdescs{$name} = $contents;
+ $sectcheck = $sectcheck . $name . " ";
+ $parameterdesc_start_lines{$name} = $new_start_line;
+ $new_start_line = 0;
+ } elsif ($name eq "@\.\.\.") {
+ $name = "...";
+ $parameterdescs{$name} = $contents;
+ $sectcheck = $sectcheck . $name . " ";
+ $parameterdesc_start_lines{$name} = $new_start_line;
+ $new_start_line = 0;
+ } else {
+ if (defined($sections{$name}) && ($sections{$name} ne "")) {
+ # Only warn on user specified duplicate section names.
+ if ($name ne $section_default) {
+ emit_warning("${file}:$.", "duplicate section name '$name'\n");
+ }
+ $sections{$name} .= $contents;
+ } else {
+ $sections{$name} = $contents;
+ push @sectionlist, $name;
+ $section_start_lines{$name} = $new_start_line;
+ $new_start_line = 0;
+ }
+ }
+}
+
+##
+# dump DOC: section after checking that it should go out
+#
+sub dump_doc_section {
+ my $file = shift;
+ my $name = shift;
+ my $contents = join "\n", @_;
+
+ if ($no_doc_sections) {
+ return;
+ }
+
+ return if (defined($nosymbol_table{$name}));
+
+ if (($output_selection == OUTPUT_ALL) ||
+ (($output_selection == OUTPUT_INCLUDE) &&
+ defined($function_table{$name})))
+ {
+ dump_section($file, $name, $contents);
+ output_blockhead({'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'module' => $modulename,
+ 'content-only' => ($output_selection != OUTPUT_ALL), });
+ }
+}
+
+##
+# output function
+#
+# parameterdescs, a hash.
+# function => "function name"
+# parameterlist => @list of parameters
+# parameterdescs => %parameter descriptions
+# sectionlist => @list of sections
+# sections => %section descriptions
+#
+
+sub output_highlight {
+ my $contents = join "\n",@_;
+ my $line;
+
+# DEBUG
+# if (!defined $contents) {
+# use Carp;
+# confess "output_highlight got called with no args?\n";
+# }
+
+# print STDERR "contents b4:$contents\n";
+ eval $dohighlight;
+ die $@ if $@;
+# print STDERR "contents af:$contents\n";
+
+ foreach $line (split "\n", $contents) {
+ if (! $output_preformatted) {
+ $line =~ s/^\s*//;
+ }
+ if ($line eq ""){
+ if (! $output_preformatted) {
+ print $lineprefix, $blankline;
+ }
+ } else {
+ if ($output_mode eq "man" && substr($line, 0, 1) eq ".") {
+ print "\\&$line";
+ } else {
+ print $lineprefix, $line;
+ }
+ }
+ print "\n";
+ }
+}
+
+##
+# output function in man
+sub output_function_man(%) {
+ my %args = %{$_[0]};
+ my ($parameter, $section);
+ my $count;
+ my $func_macro = $args{'func_macro'};
+ my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty
+
+ print ".TH \"$args{'function'}\" 9 \"$args{'function'}\" \"$man_date\" \"Kernel Hacker's Manual\" LINUX\n";
+
+ print ".SH NAME\n";
+ print $args{'function'} . " \\- " . $args{'purpose'} . "\n";
+
+ print ".SH SYNOPSIS\n";
+ if ($args{'functiontype'} ne "") {
+ print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n";
+ } else {
+ print ".B \"" . $args{'function'} . "\n";
+ }
+ $count = 0;
+ my $parenth = "(";
+ my $post = ",";
+ foreach my $parameter (@{$args{'parameterlist'}}) {
+ if ($count == $#{$args{'parameterlist'}}) {
+ $post = ");";
+ }
+ $type = $args{'parametertypes'}{$parameter};
+ if ($type =~ m/$function_pointer/) {
+ # pointer-to-function
+ print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
+ } else {
+ $type =~ s/([^\*])$/$1 /;
+ print ".BI \"" . $parenth . $type . "\" " . " \"" . $post . "\"\n";
+ }
+ $count++;
+ $parenth = "";
+ }
+
+ $paramcount = $#{$args{'parameterlist'}}; # -1 is empty
+ if ($paramcount >= 0) {
+ print ".SH ARGUMENTS\n";
+ }
+ foreach $parameter (@{$args{'parameterlist'}}) {
+ my $parameter_name = $parameter;
+ $parameter_name =~ s/\[.*//;
+
+ print ".IP \"" . $parameter . "\" 12\n";
+ output_highlight($args{'parameterdescs'}{$parameter_name});
+ }
+ foreach $section (@{$args{'sectionlist'}}) {
+ print ".SH \"", uc $section, "\"\n";
+ output_highlight($args{'sections'}{$section});
+ }
+}
+
+##
+# output enum in man
+sub output_enum_man(%) {
+ my %args = %{$_[0]};
+ my ($parameter, $section);
+ my $count;
+
+ print ".TH \"$args{'module'}\" 9 \"enum $args{'enum'}\" \"$man_date\" \"API Manual\" LINUX\n";
+
+ print ".SH NAME\n";
+ print "enum " . $args{'enum'} . " \\- " . $args{'purpose'} . "\n";
+
+ print ".SH SYNOPSIS\n";
+ print "enum " . $args{'enum'} . " {\n";
+ $count = 0;
+ foreach my $parameter (@{$args{'parameterlist'}}) {
+ print ".br\n.BI \" $parameter\"\n";
+ if ($count == $#{$args{'parameterlist'}}) {
+ print "\n};\n";
+ last;
+ } else {
+ print ", \n.br\n";
+ }
+ $count++;
+ }
+
+ print ".SH Constants\n";
+ foreach $parameter (@{$args{'parameterlist'}}) {
+ my $parameter_name = $parameter;
+ $parameter_name =~ s/\[.*//;
+
+ print ".IP \"" . $parameter . "\" 12\n";
+ output_highlight($args{'parameterdescs'}{$parameter_name});
+ }
+ foreach $section (@{$args{'sectionlist'}}) {
+ print ".SH \"$section\"\n";
+ output_highlight($args{'sections'}{$section});
+ }
+}
+
+##
+# output struct in man
+sub output_struct_man(%) {
+ my %args = %{$_[0]};
+ my ($parameter, $section);
+
+ print ".TH \"$args{'module'}\" 9 \"" . $args{'type'} . " " . $args{'struct'} . "\" \"$man_date\" \"API Manual\" LINUX\n";
+
+ print ".SH NAME\n";
+ print $args{'type'} . " " . $args{'struct'} . " \\- " . $args{'purpose'} . "\n";
+
+ my $declaration = $args{'definition'};
+ $declaration =~ s/\t/ /g;
+ $declaration =~ s/\n/"\n.br\n.BI \"/g;
+ print ".SH SYNOPSIS\n";
+ print $args{'type'} . " " . $args{'struct'} . " {\n.br\n";
+ print ".BI \"$declaration\n};\n.br\n\n";
+
+ print ".SH Members\n";
+ foreach $parameter (@{$args{'parameterlist'}}) {
+ ($parameter =~ /^#/) && next;
+
+ my $parameter_name = $parameter;
+ $parameter_name =~ s/\[.*//;
+
+ ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
+ print ".IP \"" . $parameter . "\" 12\n";
+ output_highlight($args{'parameterdescs'}{$parameter_name});
+ }
+ foreach $section (@{$args{'sectionlist'}}) {
+ print ".SH \"$section\"\n";
+ output_highlight($args{'sections'}{$section});
+ }
+}
+
+##
+# output typedef in man
+sub output_typedef_man(%) {
+ my %args = %{$_[0]};
+ my ($parameter, $section);
+
+ print ".TH \"$args{'module'}\" 9 \"$args{'typedef'}\" \"$man_date\" \"API Manual\" LINUX\n";
+
+ print ".SH NAME\n";
+ print "typedef " . $args{'typedef'} . " \\- " . $args{'purpose'} . "\n";
+
+ foreach $section (@{$args{'sectionlist'}}) {
+ print ".SH \"$section\"\n";
+ output_highlight($args{'sections'}{$section});
+ }
+}
+
+sub output_blockhead_man(%) {
+ my %args = %{$_[0]};
+ my ($parameter, $section);
+ my $count;
+
+ print ".TH \"$args{'module'}\" 9 \"$args{'module'}\" \"$man_date\" \"API Manual\" LINUX\n";
+
+ foreach $section (@{$args{'sectionlist'}}) {
+ print ".SH \"$section\"\n";
+ output_highlight($args{'sections'}{$section});
+ }
+}
+
+##
+# output in restructured text
+#
+
+#
+# This could use some work; it's used to output the DOC: sections, and
+# starts by putting out the name of the doc section itself, but that tends
+# to duplicate a header already in the template file.
+#
+sub output_blockhead_rst(%) {
+ my %args = %{$_[0]};
+ my ($parameter, $section);
+
+ foreach $section (@{$args{'sectionlist'}}) {
+ next if (defined($nosymbol_table{$section}));
+
+ if ($output_selection != OUTPUT_INCLUDE) {
+ print ".. _$section:\n\n";
+ print "**$section**\n\n";
+ }
+ print_lineno($section_start_lines{$section});
+ output_highlight_rst($args{'sections'}{$section});
+ print "\n";
+ }
+}
+
+#
+# Apply the RST highlights to a sub-block of text.
+#
+sub highlight_block($) {
+ # The dohighlight kludge requires the text be called $contents
+ my $contents = shift;
+ eval $dohighlight;
+ die $@ if $@;
+ return $contents;
+}
+
+#
+# Regexes used only here.
+#
+my $sphinx_literal = '^[^.].*::$';
+my $sphinx_cblock = '^\.\.\ +code-block::';
+
+sub output_highlight_rst {
+ my $input = join "\n",@_;
+ my $output = "";
+ my $line;
+ my $in_literal = 0;
+ my $litprefix;
+ my $block = "";
+
+ foreach $line (split "\n",$input) {
+ #
+ # If we're in a literal block, see if we should drop out
+ # of it. Otherwise pass the line straight through unmunged.
+ #
+ if ($in_literal) {
+ if (! ($line =~ /^\s*$/)) {
+ #
+ # If this is the first non-blank line in a literal
+ # block we need to figure out what the proper indent is.
+ #
+ if ($litprefix eq "") {
+ $line =~ /^(\s*)/;
+ $litprefix = '^' . $1;
+ $output .= $line . "\n";
+ } elsif (! ($line =~ /$litprefix/)) {
+ $in_literal = 0;
+ } else {
+ $output .= $line . "\n";
+ }
+ } else {
+ $output .= $line . "\n";
+ }
+ }
+ #
+ # Not in a literal block (or just dropped out)
+ #
+ if (! $in_literal) {
+ $block .= $line . "\n";
+ if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) {
+ $in_literal = 1;
+ $litprefix = "";
+ $output .= highlight_block($block);
+ $block = ""
+ }
+ }
+ }
+
+ if ($block) {
+ $output .= highlight_block($block);
+ }
+
+ $output =~ s/^\n+//g;
+ $output =~ s/\n+$//g;
+
+ foreach $line (split "\n", $output) {
+ print $lineprefix . $line . "\n";
+ }
+}
+
+sub output_function_rst(%) {
+ my %args = %{$_[0]};
+ my ($parameter, $section);
+ my $oldprefix = $lineprefix;
+
+ my $signature = "";
+ my $func_macro = $args{'func_macro'};
+ my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty
+
+ if ($func_macro) {
+ $signature = $args{'function'};
+ } else {
+ if ($args{'functiontype'}) {
+ $signature = $args{'functiontype'} . " ";
+ }
+ $signature .= $args{'function'} . " (";
+ }
+
+ my $count = 0;
+ foreach my $parameter (@{$args{'parameterlist'}}) {
+ if ($count ne 0) {
+ $signature .= ", ";
+ }
+ $count++;
+ $type = $args{'parametertypes'}{$parameter};
+
+ if ($type =~ m/$function_pointer/) {
+ # pointer-to-function
+ $signature .= $1 . $parameter . ") (" . $2 . ")";
+ } else {
+ $signature .= $type;
+ }
+ }
+
+ if (!$func_macro) {
+ $signature .= ")";
+ }
+
+ if ($args{'typedef'} || $args{'functiontype'} eq "") {
+ print ".. c:macro:: ". $args{'function'} . "\n\n";
+
+ if ($args{'typedef'}) {
+ print_lineno($declaration_start_line);
+ print " **Typedef**: ";
+ $lineprefix = "";
+ output_highlight_rst($args{'purpose'});
+ print "\n\n**Syntax**\n\n";
+ print " ``$signature``\n\n";
+ } else {
+ print "``$signature``\n\n";
+ }
+ } else {
+ print ".. c:function:: $signature\n\n";
+ }
+
+ if (!$args{'typedef'}) {
+ print_lineno($declaration_start_line);
+ $lineprefix = " ";
+ output_highlight_rst($args{'purpose'});
+ print "\n";
+ }
+
+ #
+ # Put our descriptive text into a container (thus an HTML <div>) to help
+ # set the function prototypes apart.
+ #
+ $lineprefix = " ";
+ if ($paramcount >= 0) {
+ print ".. container:: kernelindent\n\n";
+ print $lineprefix . "**Parameters**\n\n";
+ }
+ foreach $parameter (@{$args{'parameterlist'}}) {
+ my $parameter_name = $parameter;
+ $parameter_name =~ s/\[.*//;
+ $type = $args{'parametertypes'}{$parameter};
+
+ if ($type ne "") {
+ print $lineprefix . "``$type``\n";
+ } else {
+ print $lineprefix . "``$parameter``\n";
+ }
+
+ print_lineno($parameterdesc_start_lines{$parameter_name});
+
+ $lineprefix = " ";
+ if (defined($args{'parameterdescs'}{$parameter_name}) &&
+ $args{'parameterdescs'}{$parameter_name} ne $undescribed) {
+ output_highlight_rst($args{'parameterdescs'}{$parameter_name});
+ } else {
+ print $lineprefix . "*undescribed*\n";
+ }
+ $lineprefix = " ";
+ print "\n";
+ }
+
+ output_section_rst(@_);
+ $lineprefix = $oldprefix;
+}
+
+sub output_section_rst(%) {
+ my %args = %{$_[0]};
+ my $section;
+ my $oldprefix = $lineprefix;
+
+ foreach $section (@{$args{'sectionlist'}}) {
+ print $lineprefix . "**$section**\n\n";
+ print_lineno($section_start_lines{$section});
+ output_highlight_rst($args{'sections'}{$section});
+ print "\n";
+ }
+ print "\n";
+}
+
+sub output_enum_rst(%) {
+ my %args = %{$_[0]};
+ my ($parameter);
+ my $oldprefix = $lineprefix;
+ my $count;
+ my $outer;
+
+ my $name = $args{'enum'};
+ print "\n\n.. c:enum:: " . $name . "\n\n";
+
+ print_lineno($declaration_start_line);
+ $lineprefix = " ";
+ output_highlight_rst($args{'purpose'});
+ print "\n";
+
+ print ".. container:: kernelindent\n\n";
+ $outer = $lineprefix . " ";
+ $lineprefix = $outer . " ";
+ print $outer . "**Constants**\n\n";
+ foreach $parameter (@{$args{'parameterlist'}}) {
+ print $outer . "``$parameter``\n";
+
+ if ($args{'parameterdescs'}{$parameter} ne $undescribed) {
+ output_highlight_rst($args{'parameterdescs'}{$parameter});
+ } else {
+ print $lineprefix . "*undescribed*\n";
+ }
+ print "\n";
+ }
+ print "\n";
+ $lineprefix = $oldprefix;
+ output_section_rst(@_);
+}
+
+sub output_typedef_rst(%) {
+ my %args = %{$_[0]};
+ my ($parameter);
+ my $oldprefix = $lineprefix;
+ my $name;
+
+ $name = $args{'typedef'};
+
+ print "\n\n.. c:type:: " . $name . "\n\n";
+ print_lineno($declaration_start_line);
+ $lineprefix = " ";
+ output_highlight_rst($args{'purpose'});
+ print "\n";
+
+ $lineprefix = $oldprefix;
+ output_section_rst(@_);
+}
+
+sub output_struct_rst(%) {
+ my %args = %{$_[0]};
+ my ($parameter);
+ my $oldprefix = $lineprefix;
+
+ my $name = $args{'struct'};
+ if ($args{'type'} eq 'union') {
+ print "\n\n.. c:union:: " . $name . "\n\n";
+ } else {
+ print "\n\n.. c:struct:: " . $name . "\n\n";
+ }
+
+ print_lineno($declaration_start_line);
+ $lineprefix = " ";
+ output_highlight_rst($args{'purpose'});
+ print "\n";
+
+ print ".. container:: kernelindent\n\n";
+ print $lineprefix . "**Definition**::\n\n";
+ my $declaration = $args{'definition'};
+ $lineprefix = $lineprefix . " ";
+ $declaration =~ s/\t/$lineprefix/g;
+ print $lineprefix . $args{'type'} . " " . $args{'struct'} . " {\n$declaration" . $lineprefix . "};\n\n";
+
+ $lineprefix = " ";
+ print $lineprefix . "**Members**\n\n";
+ foreach $parameter (@{$args{'parameterlist'}}) {
+ ($parameter =~ /^#/) && next;
+
+ my $parameter_name = $parameter;
+ $parameter_name =~ s/\[.*//;
+
+ ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
+ $type = $args{'parametertypes'}{$parameter};
+ print_lineno($parameterdesc_start_lines{$parameter_name});
+ print $lineprefix . "``" . $parameter . "``\n";
+ $lineprefix = " ";
+ output_highlight_rst($args{'parameterdescs'}{$parameter_name});
+ $lineprefix = " ";
+ print "\n";
+ }
+ print "\n";
+
+ $lineprefix = $oldprefix;
+ output_section_rst(@_);
+}
+
+## none mode output functions
+
+sub output_function_none(%) {
+}
+
+sub output_enum_none(%) {
+}
+
+sub output_typedef_none(%) {
+}
+
+sub output_struct_none(%) {
+}
+
+sub output_blockhead_none(%) {
+}
+
+##
+# generic output function for all types (function, struct/union, typedef, enum);
+# calls the generated, variable output_ function name based on
+# functype and output_mode
+sub output_declaration {
+ no strict 'refs';
+ my $name = shift;
+ my $functype = shift;
+ my $func = "output_${functype}_$output_mode";
+
+ return if (defined($nosymbol_table{$name}));
+
+ if (($output_selection == OUTPUT_ALL) ||
+ (($output_selection == OUTPUT_INCLUDE ||
+ $output_selection == OUTPUT_EXPORTED) &&
+ defined($function_table{$name})) ||
+ ($output_selection == OUTPUT_INTERNAL &&
+ !($functype eq "function" && defined($function_table{$name}))))
+ {
+ &$func(@_);
+ $section_counter++;
+ }
+}
+
+##
+# generic output function - calls the right one based on current output mode.
+sub output_blockhead {
+ no strict 'refs';
+ my $func = "output_blockhead_" . $output_mode;
+ &$func(@_);
+ $section_counter++;
+}
+
+##
+# takes a declaration (struct, union, enum, typedef) and
+# invokes the right handler. NOT called for functions.
+sub dump_declaration($$) {
+ no strict 'refs';
+ my ($prototype, $file) = @_;
+ my $func = "dump_" . $decl_type;
+ &$func(@_);
+}
+
+sub dump_union($$) {
+ dump_struct(@_);
+}
+
+sub dump_struct($$) {
+ my $x = shift;
+ my $file = shift;
+ my $decl_type;
+ my $members;
+ my $type = qr{struct|union};
+ # For capturing struct/union definition body, i.e. "{members*}qualifiers*"
+ my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned};
+ my $definition_body = qr{\{(.*)\}\s*$qualifiers*};
+ my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};
+
+ if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
+ $decl_type = $1;
+ $declaration_name = $2;
+ $members = $3;
+ } elsif ($x =~ /typedef\s+($type)\s*$definition_body\s*(\w+)\s*;/) {
+ $decl_type = $1;
+ $declaration_name = $3;
+ $members = $2;
+ }
+
+ if ($members) {
+ if ($identifier ne $declaration_name) {
+ emit_warning("${file}:$.", "expecting prototype for $decl_type $identifier. Prototype was for $decl_type $declaration_name instead\n");
+ return;
+ }
+
+ # ignore members marked private:
+ $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi;
+ $members =~ s/\/\*\s*private:.*//gosi;
+ # strip comments:
+ $members =~ s/\/\*.*?\*\///gos;
+ # strip attributes
+ $members =~ s/\s*$attribute/ /gi;
+ $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
+ $members =~ s/\s*__counted_by\s*\([^;]*\)/ /gos;
+ $members =~ s/\s*__counted_by_(le|be)\s*\([^;]*\)/ /gos;
+ $members =~ s/\s*__packed\s*/ /gos;
+ $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
+ $members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
+ $members =~ s/\s*____cacheline_aligned/ /gos;
+ # unwrap struct_group():
+ # - first eat non-declaration parameters and rewrite for final match
+ # - then remove macro, outer parens, and trailing semicolon
+ $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos;
+ $members =~ s/\bstruct_group_attr\s*\(([^,]*,){2}/STRUCT_GROUP(/gos;
+ $members =~ s/\bstruct_group_tagged\s*\(([^,]*),([^,]*),/struct $1 $2; STRUCT_GROUP(/gos;
+ $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos;
+ $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos;
+
+ my $args = qr{([^,)]+)};
+ # replace DECLARE_BITMAP
+ $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
+ $members =~ s/DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, PHY_INTERFACE_MODE_MAX)/gos;
+ $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
+ # replace DECLARE_HASHTABLE
+ $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
+ # replace DECLARE_KFIFO
+ $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
+ # replace DECLARE_KFIFO_PTR
+ $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
+ # replace DECLARE_FLEX_ARRAY
+ $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos;
+ #replace DEFINE_DMA_UNMAP_ADDR
+ $members =~ s/DEFINE_DMA_UNMAP_ADDR\s*\($args\)/dma_addr_t $1/gos;
+ #replace DEFINE_DMA_UNMAP_LEN
+ $members =~ s/DEFINE_DMA_UNMAP_LEN\s*\($args\)/__u32 $1/gos;
+ my $declaration = $members;
+
+ # Split nested struct/union elements as newer ones
+ while ($members =~ m/$struct_members/) {
+ my $newmember;
+ my $maintype = $1;
+ my $ids = $4;
+ my $content = $3;
+ foreach my $id(split /,/, $ids) {
+ $newmember .= "$maintype $id; ";
+
+ $id =~ s/[:\[].*//;
+ $id =~ s/^\s*\**(\S+)\s*/$1/;
+ foreach my $arg (split /;/, $content) {
+ next if ($arg =~ m/^\s*$/);
+ if ($arg =~ m/^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)/) {
+ # pointer-to-function
+ my $type = $1;
+ my $name = $2;
+ my $extra = $3;
+ next if (!$name);
+ if ($id =~ m/^\s*$/) {
+ # anonymous struct/union
+ $newmember .= "$type$name$extra; ";
+ } else {
+ $newmember .= "$type$id.$name$extra; ";
+ }
+ } else {
+ my $type;
+ my $names;
+ $arg =~ s/^\s+//;
+ $arg =~ s/\s+$//;
+ # Handle bitmaps
+ $arg =~ s/:\s*\d+\s*//g;
+ # Handle arrays
+ $arg =~ s/\[.*\]//g;
+ # The type may have multiple words,
+ # and multiple IDs can be defined, like:
+ # const struct foo, *bar, foobar
+ # So, we remove spaces when parsing the
+ # names, in order to match just names
+ # and commas for the names
+ $arg =~ s/\s*,\s*/,/g;
+ if ($arg =~ m/(.*)\s+([\S+,]+)/) {
+ $type = $1;
+ $names = $2;
+ } else {
+ $newmember .= "$arg; ";
+ next;
+ }
+ foreach my $name (split /,/, $names) {
+ $name =~ s/^\s*\**(\S+)\s*/$1/;
+ next if (($name =~ m/^\s*$/));
+ if ($id =~ m/^\s*$/) {
+ # anonymous struct/union
+ $newmember .= "$type $name; ";
+ } else {
+ $newmember .= "$type $id.$name; ";
+ }
+ }
+ }
+ }
+ }
+ $members =~ s/$struct_members/$newmember/;
+ }
+
+ # Ignore other nested elements, like enums
+ $members =~ s/(\{[^\{\}]*\})//g;
+
+ create_parameterlist($members, ';', $file, $declaration_name);
+ check_sections($file, $declaration_name, $decl_type, $sectcheck, $struct_actual);
+
+ # Adjust declaration for better display
+ $declaration =~ s/([\{;])/$1\n/g;
+ $declaration =~ s/\}\s+;/};/g;
+ # Better handle inlined enums
+ do {} while ($declaration =~ s/(enum\s+\{[^\}]+),([^\n])/$1,\n$2/);
+
+ my @def_args = split /\n/, $declaration;
+ my $level = 1;
+ $declaration = "";
+ foreach my $clause (@def_args) {
+ $clause =~ s/^\s+//;
+ $clause =~ s/\s+$//;
+ $clause =~ s/\s+/ /;
+ next if (!$clause);
+ $level-- if ($clause =~ m/(\})/ && $level > 1);
+ if (!($clause =~ m/^\s*#/)) {
+ $declaration .= "\t" x $level;
+ }
+ $declaration .= "\t" . $clause . "\n";
+ $level++ if ($clause =~ m/(\{)/ && !($clause =~m/\}/));
+ }
+ output_declaration($declaration_name,
+ 'struct',
+ {'struct' => $declaration_name,
+ 'module' => $modulename,
+ 'definition' => $declaration,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'parametertypes' => \%parametertypes,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose,
+ 'type' => $decl_type
+ });
+ } else {
+ print STDERR "${file}:$.: error: Cannot parse struct or union!\n";
+ ++$errors;
+ }
+}
+
+
+sub show_warnings($$) {
+ my $functype = shift;
+ my $name = shift;
+
+ return 0 if (defined($nosymbol_table{$name}));
+
+ return 1 if ($output_selection == OUTPUT_ALL);
+
+ if ($output_selection == OUTPUT_EXPORTED) {
+ if (defined($function_table{$name})) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ if ($output_selection == OUTPUT_INTERNAL) {
+ if (!($functype eq "function" && defined($function_table{$name}))) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ if ($output_selection == OUTPUT_INCLUDE) {
+ if (defined($function_table{$name})) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ die("Please add the new output type at show_warnings()");
+}
+
+sub dump_enum($$) {
+ my $x = shift;
+ my $file = shift;
+ my $members;
+
+ # ignore members marked private:
+ $x =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi;
+ $x =~ s/\/\*\s*private:.*}/}/gosi;
+
+ $x =~ s@/\*.*?\*/@@gos; # strip comments.
+ # strip #define macros inside enums
+ $x =~ s@#\s*((define|ifdef|if)\s+|endif)[^;]*;@@gos;
+
+ if ($x =~ /typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;/) {
+ $declaration_name = $2;
+ $members = $1;
+ } elsif ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) {
+ $declaration_name = $1;
+ $members = $2;
+ }
+
+ if ($members) {
+ if ($identifier ne $declaration_name) {
+ if ($identifier eq "") {
+ emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n");
+ } else {
+ emit_warning("${file}:$.", "expecting prototype for enum $identifier. Prototype was for enum $declaration_name instead\n");
+ }
+ return;
+ }
+ $declaration_name = "(anonymous)" if ($declaration_name eq "");
+
+ my %_members;
+
+ $members =~ s/\s+$//;
+ $members =~ s/\([^;]*?[\)]//g;
+
+ foreach my $arg (split ',', $members) {
+ $arg =~ s/^\s*(\w+).*/$1/;
+ push @parameterlist, $arg;
+ if (!$parameterdescs{$arg}) {
+ $parameterdescs{$arg} = $undescribed;
+ if (show_warnings("enum", $declaration_name)) {
+ emit_warning("${file}:$.", "Enum value '$arg' not described in enum '$declaration_name'\n");
+ }
+ }
+ $_members{$arg} = 1;
+ }
+
+ while (my ($k, $v) = each %parameterdescs) {
+ if (!exists($_members{$k})) {
+ if (show_warnings("enum", $declaration_name)) {
+ emit_warning("${file}:$.", "Excess enum value '$k' description in '$declaration_name'\n");
+ }
+ }
+ }
+
+ output_declaration($declaration_name,
+ 'enum',
+ {'enum' => $declaration_name,
+ 'module' => $modulename,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose
+ });
+ } else {
+ print STDERR "${file}:$.: error: Cannot parse enum!\n";
+ ++$errors;
+ }
+}
+
+my $typedef_type = qr { ((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s* }x;
+my $typedef_ident = qr { \*?\s*(\w\S+)\s* }x;
+my $typedef_args = qr { \s*\((.*)\); }x;
+
+my $typedef1 = qr { typedef$typedef_type\($typedef_ident\)$typedef_args }x;
+my $typedef2 = qr { typedef$typedef_type$typedef_ident$typedef_args }x;
+
+sub dump_typedef($$) {
+ my $x = shift;
+ my $file = shift;
+
+ $x =~ s@/\*.*?\*/@@gos; # strip comments.
+
+ # Parse function typedef prototypes
+ if ($x =~ $typedef1 || $x =~ $typedef2) {
+ $return_type = $1;
+ $declaration_name = $2;
+ my $args = $3;
+ $return_type =~ s/^\s+//;
+
+ if ($identifier ne $declaration_name) {
+ emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n");
+ return;
+ }
+
+ create_parameterlist($args, ',', $file, $declaration_name);
+
+ output_declaration($declaration_name,
+ 'function',
+ {'function' => $declaration_name,
+ 'typedef' => 1,
+ 'module' => $modulename,
+ 'functiontype' => $return_type,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'parametertypes' => \%parametertypes,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose
+ });
+ return;
+ }
+
+ while (($x =~ /\(*.\)\s*;$/) || ($x =~ /\[*.\]\s*;$/)) {
+ $x =~ s/\(*.\)\s*;$/;/;
+ $x =~ s/\[*.\]\s*;$/;/;
+ }
+
+ if ($x =~ /typedef.*\s+(\w+)\s*;/) {
+ $declaration_name = $1;
+
+ if ($identifier ne $declaration_name) {
+ emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n");
+ return;
+ }
+
+ output_declaration($declaration_name,
+ 'typedef',
+ {'typedef' => $declaration_name,
+ 'module' => $modulename,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose
+ });
+ } else {
+ print STDERR "${file}:$.: error: Cannot parse typedef!\n";
+ ++$errors;
+ }
+}
+
+sub save_struct_actual($) {
+ my $actual = shift;
+
+ # strip all spaces from the actual param so that it looks like one string item
+ $actual =~ s/\s*//g;
+ $struct_actual = $struct_actual . $actual . " ";
+}
+
+sub create_parameterlist($$$$) {
+ my $args = shift;
+ my $splitter = shift;
+ my $file = shift;
+ my $declaration_name = shift;
+ my $type;
+ my $param;
+
+ # temporarily replace commas inside function pointer definition
+ my $arg_expr = qr{\([^\),]+};
+ while ($args =~ /$arg_expr,/) {
+ $args =~ s/($arg_expr),/$1#/g;
+ }
+
+ foreach my $arg (split($splitter, $args)) {
+ # strip comments
+ $arg =~ s/\/\*.*\*\///;
+ # ignore argument attributes
+ $arg =~ s/\sPOS0?\s/ /;
+ # strip leading/trailing spaces
+ $arg =~ s/^\s*//;
+ $arg =~ s/\s*$//;
+ $arg =~ s/\s+/ /;
+
+ if ($arg =~ /^#/) {
+ # Treat preprocessor directive as a typeless variable just to fill
+ # corresponding data structures "correctly". Catch it later in
+ # output_* subs.
+ push_parameter($arg, "", "", $file);
+ } elsif ($arg =~ m/\(.+\)\s*\(/) {
+ # pointer-to-function
+ $arg =~ tr/#/,/;
+ $arg =~ m/[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)/;
+ $param = $1;
+ $type = $arg;
+ $type =~ s/([^\(]+\(\*?)\s*$param/$1/;
+ save_struct_actual($param);
+ push_parameter($param, $type, $arg, $file, $declaration_name);
+ } elsif ($arg =~ m/\(.+\)\s*\[/) {
+ # array-of-pointers
+ $arg =~ tr/#/,/;
+ $arg =~ m/[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)/;
+ $param = $1;
+ $type = $arg;
+ $type =~ s/([^\(]+\(\*?)\s*$param/$1/;
+ save_struct_actual($param);
+ push_parameter($param, $type, $arg, $file, $declaration_name);
+ } elsif ($arg) {
+ $arg =~ s/\s*:\s*/:/g;
+ $arg =~ s/\s*\[/\[/g;
+
+ my @args = split('\s*,\s*', $arg);
+ if ($args[0] =~ m/\*/) {
+ $args[0] =~ s/(\*+)\s*/ $1/;
+ }
+
+ my @first_arg;
+ if ($args[0] =~ /^(.*\s+)(.*?\[.*\].*)$/) {
+ shift @args;
+ push(@first_arg, split('\s+', $1));
+ push(@first_arg, $2);
+ } else {
+ @first_arg = split('\s+', shift @args);
+ }
+
+ unshift(@args, pop @first_arg);
+ $type = join " ", @first_arg;
+
+ foreach $param (@args) {
+ if ($param =~ m/^(\*+)\s*(.*)/) {
+ save_struct_actual($2);
+
+ push_parameter($2, "$type $1", $arg, $file, $declaration_name);
+ } elsif ($param =~ m/(.*?):(\w+)/) {
+ if ($type ne "") { # skip unnamed bit-fields
+ save_struct_actual($1);
+ push_parameter($1, "$type:$2", $arg, $file, $declaration_name)
+ }
+ } else {
+ save_struct_actual($param);
+ push_parameter($param, $type, $arg, $file, $declaration_name);
+ }
+ }
+ }
+ }
+}
+
+sub push_parameter($$$$$) {
+ my $param = shift;
+ my $type = shift;
+ my $org_arg = shift;
+ my $file = shift;
+ my $declaration_name = shift;
+
+ if (($anon_struct_union == 1) && ($type eq "") &&
+ ($param eq "}")) {
+ return; # ignore the ending }; from anon. struct/union
+ }
+
+ $anon_struct_union = 0;
+ $param =~ s/[\[\)].*//;
+
+ if ($type eq "" && $param =~ /\.\.\.$/)
+ {
+ if (!$param =~ /\w\.\.\.$/) {
+ # handles unnamed variable parameters
+ $param = "...";
+ } elsif ($param =~ /\w\.\.\.$/) {
+ # for named variable parameters of the form `x...`, remove the dots
+ $param =~ s/\.\.\.$//;
+ }
+ if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") {
+ $parameterdescs{$param} = "variable arguments";
+ }
+ }
+ elsif ($type eq "" && ($param eq "" or $param eq "void"))
+ {
+ $param="void";
+ $parameterdescs{void} = "no arguments";
+ }
+ elsif ($type eq "" && ($param eq "struct" or $param eq "union"))
+ # handle unnamed (anonymous) union or struct:
+ {
+ $type = $param;
+ $param = "{unnamed_" . $param . "}";
+ $parameterdescs{$param} = "anonymous\n";
+ $anon_struct_union = 1;
+ }
+ elsif ($param =~ "__cacheline_group" )
+ # handle cache group enforcing variables: they do not need be described in header files
+ {
+ return; # ignore __cacheline_group_begin and __cacheline_group_end
+ }
+
+ # warn if parameter has no description
+ # (but ignore ones starting with # as these are not parameters
+ # but inline preprocessor statements);
+ # Note: It will also ignore void params and unnamed structs/unions
+ if (!defined $parameterdescs{$param} && $param !~ /^#/) {
+ $parameterdescs{$param} = $undescribed;
+
+ if (show_warnings($type, $declaration_name) && $param !~ /\./) {
+ emit_warning("${file}:$.", "Function parameter or struct member '$param' not described in '$declaration_name'\n");
+ }
+ }
+
+ # strip spaces from $param so that it is one continuous string
+ # on @parameterlist;
+ # this fixes a problem where check_sections() cannot find
+ # a parameter like "addr[6 + 2]" because it actually appears
+ # as "addr[6", "+", "2]" on the parameter list;
+ # but it's better to maintain the param string unchanged for output,
+ # so just weaken the string compare in check_sections() to ignore
+ # "[blah" in a parameter string;
+ ###$param =~ s/\s*//g;
+ push @parameterlist, $param;
+ $org_arg =~ s/\s\s+/ /g;
+ $parametertypes{$param} = $org_arg;
+}
+
+sub check_sections($$$$$) {
+ my ($file, $decl_name, $decl_type, $sectcheck, $prmscheck) = @_;
+ my @sects = split ' ', $sectcheck;
+ my @prms = split ' ', $prmscheck;
+ my $err;
+ my ($px, $sx);
+ my $prm_clean; # strip trailing "[array size]" and/or beginning "*"
+
+ foreach $sx (0 .. $#sects) {
+ $err = 1;
+ foreach $px (0 .. $#prms) {
+ $prm_clean = $prms[$px];
+ $prm_clean =~ s/\[.*\]//;
+ $prm_clean =~ s/$attribute//i;
+ # ignore array size in a parameter string;
+ # however, the original param string may contain
+ # spaces, e.g.: addr[6 + 2]
+ # and this appears in @prms as "addr[6" since the
+ # parameter list is split at spaces;
+ # hence just ignore "[..." for the sections check;
+ $prm_clean =~ s/\[.*//;
+
+ ##$prm_clean =~ s/^\**//;
+ if ($prm_clean eq $sects[$sx]) {
+ $err = 0;
+ last;
+ }
+ }
+ if ($err) {
+ if ($decl_type eq "function") {
+ emit_warning("${file}:$.",
+ "Excess function parameter " .
+ "'$sects[$sx]' " .
+ "description in '$decl_name'\n");
+ } elsif (($decl_type eq "struct") or
+ ($decl_type eq "union")) {
+ emit_warning("${file}:$.",
+ "Excess $decl_type member " .
+ "'$sects[$sx]' " .
+ "description in '$decl_name'\n");
+ }
+ }
+ }
+}
+
+##
+# Checks the section describing the return value of a function.
+sub check_return_section {
+ my $file = shift;
+ my $declaration_name = shift;
+ my $return_type = shift;
+
+ # Ignore an empty return type (It's a macro)
+ # Ignore functions with a "void" return type. (But don't ignore "void *")
+ if (($return_type eq "") || ($return_type =~ /void\s*\w*\s*$/)) {
+ return;
+ }
+
+ if (!defined($sections{$section_return}) ||
+ $sections{$section_return} eq "")
+ {
+ emit_warning("${file}:$.",
+ "No description found for return value of " .
+ "'$declaration_name'\n");
+ }
+}
+
+##
+# takes a function prototype and the name of the current file being
+# processed and spits out all the details stored in the global
+# arrays/hashes.
+sub dump_function($$) {
+ my $prototype = shift;
+ my $file = shift;
+ my $func_macro = 0;
+
+ print_lineno($new_start_line);
+
+ $prototype =~ s/^static +//;
+ $prototype =~ s/^extern +//;
+ $prototype =~ s/^asmlinkage +//;
+ $prototype =~ s/^inline +//;
+ $prototype =~ s/^__inline__ +//;
+ $prototype =~ s/^__inline +//;
+ $prototype =~ s/^__always_inline +//;
+ $prototype =~ s/^noinline +//;
+ $prototype =~ s/^__FORTIFY_INLINE +//;
+ $prototype =~ s/__init +//;
+ $prototype =~ s/__init_or_module +//;
+ $prototype =~ s/__deprecated +//;
+ $prototype =~ s/__flatten +//;
+ $prototype =~ s/__meminit +//;
+ $prototype =~ s/__must_check +//;
+ $prototype =~ s/__weak +//;
+ $prototype =~ s/__sched +//;
+ $prototype =~ s/_noprof//;
+ $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//;
+ $prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//;
+ $prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//;
+ $prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/;
+ my $define = $prototype =~ s/^#\s*define\s+//; #ak added
+ $prototype =~ s/__attribute_const__ +//;
+ $prototype =~ s/__attribute__\s*\(\(
+ (?:
+ [\w\s]++ # attribute name
+ (?:\([^)]*+\))? # attribute arguments
+ \s*+,? # optional comma at the end
+ )+
+ \)\)\s+//x;
+
+ # Yes, this truly is vile. We are looking for:
+ # 1. Return type (may be nothing if we're looking at a macro)
+ # 2. Function name
+ # 3. Function parameters.
+ #
+ # All the while we have to watch out for function pointer parameters
+ # (which IIRC is what the two sections are for), C types (these
+ # regexps don't even start to express all the possibilities), and
+ # so on.
+ #
+ # If you mess with these regexps, it's a good idea to check that
+ # the following functions' documentation still comes out right:
+ # - parport_register_device (function pointer parameters)
+ # - atomic_set (macro)
+ # - pci_match_device, __copy_to_user (long return type)
+ my $name = qr{[a-zA-Z0-9_~:]+};
+ my $prototype_end1 = qr{[^\(]*};
+ my $prototype_end2 = qr{[^\{]*};
+ my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)};
+ my $type1 = qr{[\w\s]+};
+ my $type2 = qr{$type1\*+};
+
+ if ($define && $prototype =~ m/^()($name)\s+/) {
+ # This is an object-like macro, it has no return type and no parameter
+ # list.
+ # Function-like macros are not allowed to have spaces between
+ # declaration_name and opening parenthesis (notice the \s+).
+ $return_type = $1;
+ $declaration_name = $2;
+ $func_macro = 1;
+ } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ ||
+ $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ ||
+ $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) {
+ $return_type = $1;
+ $declaration_name = $2;
+ my $args = $3;
+
+ create_parameterlist($args, ',', $file, $declaration_name);
+ } else {
+ emit_warning("${file}:$.", "cannot understand function prototype: '$prototype'\n");
+ return;
+ }
+
+ if ($identifier ne $declaration_name) {
+ emit_warning("${file}:$.", "expecting prototype for $identifier(). Prototype was for $declaration_name() instead\n");
+ return;
+ }
+
+ my $prms = join " ", @parameterlist;
+ check_sections($file, $declaration_name, "function", $sectcheck, $prms);
+
+ # This check emits a lot of warnings at the moment, because many
+ # functions don't have a 'Return' doc section. So until the number
+ # of warnings goes sufficiently down, the check is only performed in
+ # -Wreturn mode.
+ # TODO: always perform the check.
+ if ($Wreturn && !$func_macro) {
+ check_return_section($file, $declaration_name, $return_type);
+ }
+
+ # The function parser can be called with a typedef parameter.
+ # Handle it.
+ if ($return_type =~ /typedef/) {
+ output_declaration($declaration_name,
+ 'function',
+ {'function' => $declaration_name,
+ 'typedef' => 1,
+ 'module' => $modulename,
+ 'functiontype' => $return_type,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'parametertypes' => \%parametertypes,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose,
+ 'func_macro' => $func_macro
+ });
+ } else {
+ output_declaration($declaration_name,
+ 'function',
+ {'function' => $declaration_name,
+ 'module' => $modulename,
+ 'functiontype' => $return_type,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'parametertypes' => \%parametertypes,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose,
+ 'func_macro' => $func_macro
+ });
+ }
+}
+
+sub reset_state {
+ $function = "";
+ %parameterdescs = ();
+ %parametertypes = ();
+ @parameterlist = ();
+ %sections = ();
+ @sectionlist = ();
+ $sectcheck = "";
+ $struct_actual = "";
+ $prototype = "";
+
+ $state = STATE_NORMAL;
+ $inline_doc_state = STATE_INLINE_NA;
+}
+
+sub tracepoint_munge($) {
+ my $file = shift;
+ my $tracepointname = 0;
+ my $tracepointargs = 0;
+
+ if ($prototype =~ m/TRACE_EVENT\((.*?),/) {
+ $tracepointname = $1;
+ }
+ if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) {
+ $tracepointname = $1;
+ }
+ if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) {
+ $tracepointname = $2;
+ }
+ $tracepointname =~ s/^\s+//; #strip leading whitespace
+ if ($prototype =~ m/TP_PROTO\((.*?)\)/) {
+ $tracepointargs = $1;
+ }
+ if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
+ emit_warning("${file}:$.", "Unrecognized tracepoint format: \n".
+ "$prototype\n");
+ } else {
+ $prototype = "static inline void trace_$tracepointname($tracepointargs)";
+ $identifier = "trace_$identifier";
+ }
+}
+
+sub syscall_munge() {
+ my $void = 0;
+
+ $prototype =~ s@[\r\n]+@ @gos; # strip newlines/CR's
+## if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) {
+ if ($prototype =~ m/SYSCALL_DEFINE0/) {
+ $void = 1;
+## $prototype = "long sys_$1(void)";
+ }
+
+ $prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name
+ if ($prototype =~ m/long (sys_.*?),/) {
+ $prototype =~ s/,/\(/;
+ } elsif ($void) {
+ $prototype =~ s/\)/\(void\)/;
+ }
+
+ # now delete all of the odd-number commas in $prototype
+ # so that arg types & arg names don't have a comma between them
+ my $count = 0;
+ my $len = length($prototype);
+ if ($void) {
+ $len = 0; # skip the for-loop
+ }
+ for (my $ix = 0; $ix < $len; $ix++) {
+ if (substr($prototype, $ix, 1) eq ',') {
+ $count++;
+ if ($count % 2 == 1) {
+ substr($prototype, $ix, 1) = ' ';
+ }
+ }
+ }
+}
+
+sub process_proto_function($$) {
+ my $x = shift;
+ my $file = shift;
+
+ $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line
+
+ if ($x =~ /^#/ && $x !~ /^#\s*define/) {
+ # do nothing
+ } elsif ($x =~ /([^\{]*)/) {
+ $prototype .= $1;
+ }
+
+ if (($x =~ /\{/) || ($x =~ /\#\s*define/) || ($x =~ /;/)) {
+ $prototype =~ s@/\*.*?\*/@@gos; # strip comments.
+ $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's.
+ $prototype =~ s@^\s+@@gos; # strip leading spaces
+
+ # Handle prototypes for function pointers like:
+ # int (*pcs_config)(struct foo)
+ $prototype =~ s@^(\S+\s+)\(\s*\*(\S+)\)@$1$2@gos;
+
+ if ($prototype =~ /SYSCALL_DEFINE/) {
+ syscall_munge();
+ }
+ if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ ||
+ $prototype =~ /DEFINE_SINGLE_EVENT/)
+ {
+ tracepoint_munge($file);
+ }
+ dump_function($prototype, $file);
+ reset_state();
+ }
+}
+
+sub process_proto_type($$) {
+ my $x = shift;
+ my $file = shift;
+
+ $x =~ s@[\r\n]+@ @gos; # strip newlines/cr's.
+ $x =~ s@^\s+@@gos; # strip leading spaces
+ $x =~ s@\s+$@@gos; # strip trailing spaces
+ $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line
+
+ if ($x =~ /^#/) {
+ # To distinguish preprocessor directive from regular declaration later.
+ $x .= ";";
+ }
+
+ while (1) {
+ if ( $x =~ /([^\{\};]*)([\{\};])(.*)/ ) {
+ if( length $prototype ) {
+ $prototype .= " "
+ }
+ $prototype .= $1 . $2;
+ ($2 eq '{') && $brcount++;
+ ($2 eq '}') && $brcount--;
+ if (($2 eq ';') && ($brcount == 0)) {
+ dump_declaration($prototype, $file);
+ reset_state();
+ last;
+ }
+ $x = $3;
+ } else {
+ $prototype .= $x;
+ last;
+ }
+ }
+}
+
+
+sub map_filename($) {
+ my $file;
+ my ($orig_file) = @_;
+
+ if (defined($ENV{'SRCTREE'})) {
+ $file = "$ENV{'SRCTREE'}" . "/" . $orig_file;
+ } else {
+ $file = $orig_file;
+ }
+
+ return $file;
+}
+
+sub process_export_file($) {
+ my ($orig_file) = @_;
+ my $file = map_filename($orig_file);
+
+ if (!open(IN,"<$file")) {
+ print STDERR "Error: Cannot open file $file\n";
+ ++$errors;
+ return;
+ }
+
+ while (<IN>) {
+ if (/$export_symbol/) {
+ next if (defined($nosymbol_table{$2}));
+ $function_table{$2} = 1;
+ }
+ if (/$export_symbol_ns/) {
+ next if (defined($nosymbol_table{$2}));
+ $function_table{$2} = 1;
+ }
+ }
+
+ close(IN);
+}
+
+#
+# Parsers for the various processing states.
+#
+# STATE_NORMAL: looking for the /** to begin everything.
+#
+sub process_normal() {
+ if (/$doc_start/o) {
+ $state = STATE_NAME; # next line is always the function name
+ $declaration_start_line = $. + 1;
+ }
+}
+
+#
+# STATE_NAME: Looking for the "name - description" line
+#
+sub process_name($$) {
+ my $file = shift;
+ my $descr;
+
+ if (/$doc_block/o) {
+ $state = STATE_DOCBLOCK;
+ $contents = "";
+ $new_start_line = $.;
+
+ if ( $1 eq "" ) {
+ $section = $section_intro;
+ } else {
+ $section = $1;
+ }
+ } elsif (/$doc_decl/o) {
+ $identifier = $1;
+ my $is_kernel_comment = 0;
+ my $decl_start = qr{$doc_com};
+ # test for pointer declaration type, foo * bar() - desc
+ my $fn_type = qr{\w+\s*\*\s*};
+ my $parenthesis = qr{\(\w*\)};
+ my $decl_end = qr{[-:].*};
+ if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
+ $identifier = $1;
+ }
+ if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) {
+ $decl_type = $1;
+ $identifier = $2;
+ $is_kernel_comment = 1;
+ }
+ # Look for foo() or static void foo() - description; or misspelt
+ # identifier
+ elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
+ /^$decl_start$fn_type?(\w+[^-:]*)$parenthesis?\s*$decl_end$/) {
+ $identifier = $1;
+ $decl_type = 'function';
+ $identifier =~ s/^define\s+//;
+ $is_kernel_comment = 1;
+ }
+ $identifier =~ s/\s+$//;
+
+ $state = STATE_BODY;
+ # if there's no @param blocks need to set up default section
+ # here
+ $contents = "";
+ $section = $section_default;
+ $new_start_line = $. + 1;
+ if (/[-:](.*)/) {
+ # strip leading/trailing/multiple spaces
+ $descr= $1;
+ $descr =~ s/^\s*//;
+ $descr =~ s/\s*$//;
+ $descr =~ s/\s+/ /g;
+ $declaration_purpose = $descr;
+ $state = STATE_BODY_MAYBE;
+ } else {
+ $declaration_purpose = "";
+ }
+
+ if (!$is_kernel_comment) {
+ emit_warning("${file}:$.", "This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n$_");
+ $state = STATE_NORMAL;
+ }
+
+ if (($declaration_purpose eq "") && $Wshort_desc) {
+ emit_warning("${file}:$.", "missing initial short description on line:\n$_");
+ }
+
+ if ($identifier eq "" && $decl_type ne "enum") {
+ emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n$_");
+ $state = STATE_NORMAL;
+ }
+
+ if ($verbose) {
+ print STDERR "${file}:$.: info: Scanning doc for $decl_type $identifier\n";
+ }
+ } else {
+ emit_warning("${file}:$.", "Cannot understand $_ on line $. - I thought it was a doc line\n");
+ $state = STATE_NORMAL;
+ }
+}
+
+
+#
+# STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment.
+#
+sub process_body($$) {
+ my $file = shift;
+
+ if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) {
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ $new_start_line = $.;
+ $contents = "";
+ }
+
+ if (/$doc_sect/i) { # case insensitive for supported section names
+ $newsection = $1;
+ $newcontents = $2;
+
+ # map the supported section names to the canonical names
+ if ($newsection =~ m/^description$/i) {
+ $newsection = $section_default;
+ } elsif ($newsection =~ m/^context$/i) {
+ $newsection = $section_context;
+ } elsif ($newsection =~ m/^returns?$/i) {
+ $newsection = $section_return;
+ } elsif ($newsection =~ m/^\@return$/) {
+ # special: @return is a section, not a param description
+ $newsection = $section_return;
+ }
+
+ if (($contents ne "") && ($contents ne "\n")) {
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ }
+
+ $state = STATE_BODY;
+ $contents = $newcontents;
+ $new_start_line = $.;
+ while (substr($contents, 0, 1) eq " ") {
+ $contents = substr($contents, 1);
+ }
+ if ($contents ne "") {
+ $contents .= "\n";
+ }
+ $section = $newsection;
+ $leading_space = undef;
+ } elsif (/$doc_end/) {
+ if (($contents ne "") && ($contents ne "\n")) {
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ $contents = "";
+ }
+ # look for doc_com + <text> + doc_end:
+ if ($_ =~ m'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') {
+ emit_warning("${file}:$.", "suspicious ending line: $_");
+ }
+
+ $prototype = "";
+ $state = STATE_PROTO;
+ $brcount = 0;
+ $new_start_line = $. + 1;
+ } elsif (/$doc_content/) {
+ if ($1 eq "") {
+ if ($section eq $section_context) {
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ $contents = "";
+ $new_start_line = $.;
+ $state = STATE_BODY;
+ } else {
+ if ($section ne $section_default) {
+ $state = STATE_BODY_WITH_BLANK_LINE;
+ } else {
+ $state = STATE_BODY;
+ }
+ $contents .= "\n";
+ }
+ } elsif ($state == STATE_BODY_MAYBE) {
+ # Continued declaration purpose
+ chomp($declaration_purpose);
+ $declaration_purpose .= " " . $1;
+ $declaration_purpose =~ s/\s+/ /g;
+ } else {
+ my $cont = $1;
+ if ($section =~ m/^@/ || $section eq $section_context) {
+ if (!defined $leading_space) {
+ if ($cont =~ m/^(\s+)/) {
+ $leading_space = $1;
+ } else {
+ $leading_space = "";
+ }
+ }
+ $cont =~ s/^$leading_space//;
+ }
+ $contents .= $cont . "\n";
+ }
+ } else {
+ # i dont know - bad line? ignore.
+ emit_warning("${file}:$.", "bad line: $_");
+ }
+}
+
+
+#
+# STATE_PROTO: reading a function/whatever prototype.
+#
+sub process_proto($$) {
+ my $file = shift;
+
+ if (/$doc_inline_oneline/) {
+ $section = $1;
+ $contents = $2;
+ if ($contents ne "") {
+ $contents .= "\n";
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ $contents = "";
+ }
+ } elsif (/$doc_inline_start/) {
+ $state = STATE_INLINE;
+ $inline_doc_state = STATE_INLINE_NAME;
+ } elsif ($decl_type eq 'function') {
+ process_proto_function($_, $file);
+ } else {
+ process_proto_type($_, $file);
+ }
+}
+
+#
+# STATE_DOCBLOCK: within a DOC: block.
+#
+sub process_docblock($$) {
+ my $file = shift;
+
+ if (/$doc_end/) {
+ dump_doc_section($file, $section, $contents);
+ $section = $section_default;
+ $contents = "";
+ $function = "";
+ %parameterdescs = ();
+ %parametertypes = ();
+ @parameterlist = ();
+ %sections = ();
+ @sectionlist = ();
+ $prototype = "";
+ $state = STATE_NORMAL;
+ } elsif (/$doc_content/) {
+ if ( $1 eq "" ) {
+ $contents .= $blankline;
+ } else {
+ $contents .= $1 . "\n";
+ }
+ }
+}
+
+#
+# STATE_INLINE: docbook comments within a prototype.
+#
+sub process_inline($$) {
+ my $file = shift;
+
+ # First line (state 1) needs to be a @parameter
+ if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) {
+ $section = $1;
+ $contents = $2;
+ $new_start_line = $.;
+ if ($contents ne "") {
+ while (substr($contents, 0, 1) eq " ") {
+ $contents = substr($contents, 1);
+ }
+ $contents .= "\n";
+ }
+ $inline_doc_state = STATE_INLINE_TEXT;
+ # Documentation block end */
+ } elsif (/$doc_inline_end/) {
+ if (($contents ne "") && ($contents ne "\n")) {
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ $contents = "";
+ }
+ $state = STATE_PROTO;
+ $inline_doc_state = STATE_INLINE_NA;
+ # Regular text
+ } elsif (/$doc_content/) {
+ if ($inline_doc_state == STATE_INLINE_TEXT) {
+ $contents .= $1 . "\n";
+ # nuke leading blank lines
+ if ($contents =~ /^\s*$/) {
+ $contents = "";
+ }
+ } elsif ($inline_doc_state == STATE_INLINE_NAME) {
+ $inline_doc_state = STATE_INLINE_ERROR;
+ emit_warning("${file}:$.", "Incorrect use of kernel-doc format: $_");
+ }
+ }
+}
+
+
+sub process_file($) {
+ my $file;
+ my ($orig_file) = @_;
+
+ $file = map_filename($orig_file);
+
+ if (!open(IN_FILE,"<$file")) {
+ print STDERR "Error: Cannot open file $file\n";
+ ++$errors;
+ return;
+ }
+
+ $. = 1;
+
+ $section_counter = 0;
+ while (<IN_FILE>) {
+ while (!/^ \*/ && s/\\\s*$//) {
+ $_ .= <IN_FILE>;
+ }
+ # Replace tabs by spaces
+ while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {};
+ # Hand this line to the appropriate state handler
+ if ($state == STATE_NORMAL) {
+ process_normal();
+ } elsif ($state == STATE_NAME) {
+ process_name($file, $_);
+ } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE ||
+ $state == STATE_BODY_WITH_BLANK_LINE) {
+ process_body($file, $_);
+ } elsif ($state == STATE_INLINE) { # scanning for inline parameters
+ process_inline($file, $_);
+ } elsif ($state == STATE_PROTO) {
+ process_proto($file, $_);
+ } elsif ($state == STATE_DOCBLOCK) {
+ process_docblock($file, $_);
+ }
+ }
+
+ # Make sure we got something interesting.
+ if (!$section_counter && $output_mode ne "none") {
+ if ($output_selection == OUTPUT_INCLUDE) {
+ emit_warning("${file}:1", "'$_' not found\n")
+ for keys %function_table;
+ } else {
+ emit_warning("${file}:1", "no structured comments found\n");
+ }
+ }
+ close IN_FILE;
+}
+
+$kernelversion = get_kernel_version();
+
+# generate a sequence of code that will splice in highlighting information
+# using the s// operator.
+for (my $k = 0; $k < @highlights; $k++) {
+ my $pattern = $highlights[$k][0];
+ my $result = $highlights[$k][1];
+# print STDERR "scanning pattern:$pattern, highlight:($result)\n";
+ $dohighlight .= "\$contents =~ s:$pattern:$result:gs;\n";
+}
+
+if ($output_selection == OUTPUT_EXPORTED ||
+ $output_selection == OUTPUT_INTERNAL) {
+
+ push(@export_file_list, @ARGV);
+
+ foreach (@export_file_list) {
+ chomp;
+ process_export_file($_);
+ }
+}
+
+foreach (@ARGV) {
+ chomp;
+ process_file($_);
+}
+if ($verbose && $errors) {
+ print STDERR "$errors errors\n";
+}
+if ($verbose && $warnings) {
+ print STDERR "$warnings warnings\n";
+}
+
+if ($Werror && $warnings) {
+ print STDERR "$warnings warnings as Errors\n";
+ exit($warnings);
+} else {
+ exit($output_mode eq "none" ? 0 : $errors)
+}
+
+__END__
+
+=head1 OPTIONS
+
+=head2 Output format selection (mutually exclusive):
+
+=over 8
+
+=item -man
+
+Output troff manual page format.
+
+=item -rst
+
+Output reStructuredText format. This is the default.
+
+=item -none
+
+Do not output documentation, only warnings.
+
+=back
+
+=head2 Output format modifiers
+
+=head3 reStructuredText only
+
+=head2 Output selection (mutually exclusive):
+
+=over 8
+
+=item -export
+
+Only output documentation for the symbols that have been exported using
+EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE.
+
+=item -internal
+
+Only output documentation for the symbols that have NOT been exported using
+EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE.
+
+=item -function NAME
+
+Only output documentation for the given function or DOC: section title.
+All other functions and DOC: sections are ignored.
+
+May be specified multiple times.
+
+=item -nosymbol NAME
+
+Exclude the specified symbol from the output documentation.
+
+May be specified multiple times.
+
+=back
+
+=head2 Output selection modifiers:
+
+=over 8
+
+=item -no-doc-sections
+
+Do not output DOC: sections.
+
+=item -export-file FILE
+
+Specify an additional FILE in which to look for EXPORT_SYMBOL information.
+
+To be used with -export or -internal.
+
+May be specified multiple times.
+
+=back
+
+=head3 reStructuredText only
+
+=over 8
+
+=item -enable-lineno
+
+Enable output of .. LINENO lines.
+
+=back
+
+=head2 Other parameters:
+
+=over 8
+
+=item -h, -help
+
+Print this help.
+
+=item -v
+
+Verbose output, more warnings and other information.
+
+=item -Werror
+
+Treat warnings as errors.
+
+=back
+
+=cut
diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py
new file mode 100755
index 000000000000..12ae66f40bd7
--- /dev/null
+++ b/scripts/kernel-doc.py
@@ -0,0 +1,315 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0103,R0915
+#
+# Converted from the kernel-doc script originally written in Perl
+# under GPLv2, copyrighted since 1998 by the following authors:
+#
+# Aditya Srivastava <yashsri421@gmail.com>
+# Akira Yokosawa <akiyks@gmail.com>
+# Alexander A. Klimov <grandmaster@al2klimov.de>
+# Alexander Lobakin <aleksander.lobakin@intel.com>
+# André Almeida <andrealmeid@igalia.com>
+# Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+# Anna-Maria Behnsen <anna-maria@linutronix.de>
+# Armin Kuster <akuster@mvista.com>
+# Bart Van Assche <bart.vanassche@sandisk.com>
+# Ben Hutchings <ben@decadent.org.uk>
+# Borislav Petkov <bbpetkov@yahoo.de>
+# Chen-Yu Tsai <wenst@chromium.org>
+# Coco Li <lixiaoyan@google.com>
+# Conchúr Navid <conchur@web.de>
+# Daniel Santos <daniel.santos@pobox.com>
+# Danilo Cesar Lemes de Paula <danilo.cesar@collabora.co.uk>
+# Dan Luedtke <mail@danrl.de>
+# Donald Hunter <donald.hunter@gmail.com>
+# Gabriel Krisman Bertazi <krisman@collabora.co.uk>
+# Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+# Harvey Harrison <harvey.harrison@gmail.com>
+# Horia Geanta <horia.geanta@freescale.com>
+# Ilya Dryomov <idryomov@gmail.com>
+# Jakub Kicinski <kuba@kernel.org>
+# Jani Nikula <jani.nikula@intel.com>
+# Jason Baron <jbaron@redhat.com>
+# Jason Gunthorpe <jgg@nvidia.com>
+# Jérémy Bobbio <lunar@debian.org>
+# Johannes Berg <johannes.berg@intel.com>
+# Johannes Weiner <hannes@cmpxchg.org>
+# Jonathan Cameron <Jonathan.Cameron@huawei.com>
+# Jonathan Corbet <corbet@lwn.net>
+# Jonathan Neuschäfer <j.neuschaefer@gmx.net>
+# Kamil Rytarowski <n54@gmx.com>
+# Kees Cook <kees@kernel.org>
+# Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+# Levin, Alexander (Sasha Levin) <alexander.levin@verizon.com>
+# Linus Torvalds <torvalds@linux-foundation.org>
+# Lucas De Marchi <lucas.demarchi@profusion.mobi>
+# Mark Rutland <mark.rutland@arm.com>
+# Markus Heiser <markus.heiser@darmarit.de>
+# Martin Waitz <tali@admingilde.org>
+# Masahiro Yamada <masahiroy@kernel.org>
+# Matthew Wilcox <willy@infradead.org>
+# Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+# Michal Wajdeczko <michal.wajdeczko@intel.com>
+# Michael Zucchi
+# Mike Rapoport <rppt@linux.ibm.com>
+# Niklas Söderlund <niklas.soderlund@corigine.com>
+# Nishanth Menon <nm@ti.com>
+# Paolo Bonzini <pbonzini@redhat.com>
+# Pavan Kumar Linga <pavan.kumar.linga@intel.com>
+# Pavel Pisa <pisa@cmp.felk.cvut.cz>
+# Peter Maydell <peter.maydell@linaro.org>
+# Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+# Randy Dunlap <rdunlap@infradead.org>
+# Richard Kennedy <richard@rsk.demon.co.uk>
+# Rich Walker <rw@shadow.org.uk>
+# Rolf Eike Beer <eike-kernel@sf-tec.de>
+# Sakari Ailus <sakari.ailus@linux.intel.com>
+# Silvio Fricke <silvio.fricke@gmail.com>
+# Simon Huggins
+# Tim Waugh <twaugh@redhat.com>
+# Tomasz Warniełło <tomasz.warniello@gmail.com>
+# Utkarsh Tripathi <utripathi2002@gmail.com>
+# valdis.kletnieks@vt.edu <valdis.kletnieks@vt.edu>
+# Vegard Nossum <vegard.nossum@oracle.com>
+# Will Deacon <will.deacon@arm.com>
+# Yacine Belkadi <yacine.belkadi.1@gmail.com>
+# Yujie Liu <yujie.liu@intel.com>
+
+"""
+kernel_doc
+==========
+
+Print formatted kernel documentation to stdout
+
+Read C language source or header FILEs, extract embedded
+documentation comments, and print formatted documentation
+to standard output.
+
+The documentation comments are identified by the "/**"
+opening comment mark.
+
+See Documentation/doc-guide/kernel-doc.rst for the
+documentation comment syntax.
+"""
+
+import argparse
+import logging
+import os
+import sys
+
+# Import Python modules
+
+LIB_DIR = "lib/kdoc"
+SRC_DIR = os.path.dirname(os.path.realpath(__file__))
+
+sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR))
+
+from kdoc_files import KernelFiles # pylint: disable=C0413
+from kdoc_output import RestFormat, ManFormat # pylint: disable=C0413
+
+DESC = """
+Read C language source or header FILEs, extract embedded documentation comments,
+and print formatted documentation to standard output.
+
+The documentation comments are identified by the "/**" opening comment mark.
+
+See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
+"""
+
+EXPORT_FILE_DESC = """
+Specify an additional FILE in which to look for EXPORT_SYMBOL information.
+
+May be used multiple times.
+"""
+
+EXPORT_DESC = """
+Only output documentation for the symbols that have been
+exported using EXPORT_SYMBOL() and related macros in any input
+FILE or -export-file FILE.
+"""
+
+INTERNAL_DESC = """
+Only output documentation for the symbols that have NOT been
+exported using EXPORT_SYMBOL() and related macros in any input
+FILE or -export-file FILE.
+"""
+
+FUNCTION_DESC = """
+Only output documentation for the given function or DOC: section
+title. All other functions and DOC: sections are ignored.
+
+May be used multiple times.
+"""
+
+NOSYMBOL_DESC = """
+Exclude the specified symbol from the output documentation.
+
+May be used multiple times.
+"""
+
+FILES_DESC = """
+Header and C source files to be parsed.
+"""
+
+WARN_CONTENTS_BEFORE_SECTIONS_DESC = """
+Warns if there are contents before sections (deprecated).
+
+This option is kept just for backward-compatibility, but it does nothing,
+neither here nor at the original Perl script.
+"""
+
+
+class MsgFormatter(logging.Formatter):
+ """Helper class to format warnings on a similar way to kernel-doc.pl"""
+
+ def format(self, record):
+ record.levelname = record.levelname.capitalize()
+ return logging.Formatter.format(self, record)
+
+def main():
+ """Main program"""
+
+ parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
+ description=DESC)
+
+ # Normal arguments
+
+ parser.add_argument("-v", "-verbose", "--verbose", action="store_true",
+ help="Verbose output, more warnings and other information.")
+
+ parser.add_argument("-d", "-debug", "--debug", action="store_true",
+ help="Enable debug messages")
+
+ parser.add_argument("-M", "-modulename", "--modulename",
+ default="Kernel API",
+ help="Allow setting a module name at the output.")
+
+ parser.add_argument("-l", "-enable-lineno", "--enable_lineno",
+ action="store_true",
+ help="Enable line number output (only in ReST mode)")
+
+ # Arguments to control the warning behavior
+
+ parser.add_argument("-Wreturn", "--wreturn", action="store_true",
+ help="Warns about the lack of a return markup on functions.")
+
+ parser.add_argument("-Wshort-desc", "-Wshort-description", "--wshort-desc",
+ action="store_true",
+ help="Warns if initial short description is missing")
+
+ parser.add_argument("-Wcontents-before-sections",
+ "--wcontents-before-sections", action="store_true",
+ help=WARN_CONTENTS_BEFORE_SECTIONS_DESC)
+
+ parser.add_argument("-Wall", "--wall", action="store_true",
+ help="Enable all types of warnings")
+
+ parser.add_argument("-Werror", "--werror", action="store_true",
+ help="Treat warnings as errors.")
+
+ parser.add_argument("-export-file", "--export-file", action='append',
+ help=EXPORT_FILE_DESC)
+
+ # Output format mutually-exclusive group
+
+ out_group = parser.add_argument_group("Output format selection (mutually exclusive)")
+
+ out_fmt = out_group.add_mutually_exclusive_group()
+
+ out_fmt.add_argument("-m", "-man", "--man", action="store_true",
+ help="Output troff manual page format.")
+ out_fmt.add_argument("-r", "-rst", "--rst", action="store_true",
+ help="Output reStructuredText format (default).")
+ out_fmt.add_argument("-N", "-none", "--none", action="store_true",
+ help="Do not output documentation, only warnings.")
+
+ # Output selection mutually-exclusive group
+
+ sel_group = parser.add_argument_group("Output selection (mutually exclusive)")
+ sel_mut = sel_group.add_mutually_exclusive_group()
+
+ sel_mut.add_argument("-e", "-export", "--export", action='store_true',
+ help=EXPORT_DESC)
+
+ sel_mut.add_argument("-i", "-internal", "--internal", action='store_true',
+ help=INTERNAL_DESC)
+
+ sel_mut.add_argument("-s", "-function", "--symbol", action='append',
+ help=FUNCTION_DESC)
+
+ # Those are valid for all 3 types of filter
+ parser.add_argument("-n", "-nosymbol", "--nosymbol", action='append',
+ help=NOSYMBOL_DESC)
+
+ parser.add_argument("-D", "-no-doc-sections", "--no-doc-sections",
+ action='store_true', help="Don't outputt DOC sections")
+
+ parser.add_argument("files", metavar="FILE",
+ nargs="+", help=FILES_DESC)
+
+ args = parser.parse_args()
+
+ if args.wall:
+ args.wreturn = True
+ args.wshort_desc = True
+ args.wcontents_before_sections = True
+
+ logger = logging.getLogger()
+
+ if not args.debug:
+ logger.setLevel(logging.INFO)
+ else:
+ logger.setLevel(logging.DEBUG)
+
+ formatter = MsgFormatter('%(levelname)s: %(message)s')
+
+ handler = logging.StreamHandler()
+ handler.setFormatter(formatter)
+
+ logger.addHandler(handler)
+
+ if args.man:
+ out_style = ManFormat(modulename=args.modulename)
+ elif args.none:
+ out_style = None
+ else:
+ out_style = RestFormat()
+
+ kfiles = KernelFiles(verbose=args.verbose,
+ out_style=out_style, werror=args.werror,
+ wreturn=args.wreturn, wshort_desc=args.wshort_desc,
+ wcontents_before_sections=args.wcontents_before_sections)
+
+ kfiles.parse(args.files, export_file=args.export_file)
+
+ for t in kfiles.msg(enable_lineno=args.enable_lineno, export=args.export,
+ internal=args.internal, symbol=args.symbol,
+ nosymbol=args.nosymbol, export_file=args.export_file,
+ no_doc_sections=args.no_doc_sections):
+ msg = t[1]
+ if msg:
+ print(msg)
+
+ error_count = kfiles.errors
+ if not error_count:
+ sys.exit(0)
+
+ if args.werror:
+ print(f"{error_count} warnings as errors")
+ sys.exit(error_count)
+
+ if args.verbose:
+ print(f"{error_count} errors")
+
+ if args.none:
+ sys.exit(0)
+
+ sys.exit(error_count)
+
+
+# Call main method
+if __name__ == "__main__":
+ main()
diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py
new file mode 100644
index 000000000000..9be4a64df71d
--- /dev/null
+++ b/scripts/lib/kdoc/kdoc_files.py
@@ -0,0 +1,291 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=R0903,R0913,R0914,R0917
+
+"""
+Parse lernel-doc tags on multiple kernel source files.
+"""
+
+import argparse
+import logging
+import os
+import re
+
+from kdoc_parser import KernelDoc
+from kdoc_output import OutputFormat
+
+
+class GlobSourceFiles:
+ """
+ Parse C source code file names and directories via an Interactor.
+ """
+
+ def __init__(self, srctree=None, valid_extensions=None):
+ """
+ Initialize valid extensions with a tuple.
+
+ If not defined, assume default C extensions (.c and .h)
+
+ It would be possible to use python's glob function, but it is
+ very slow, and it is not interactive. So, it would wait to read all
+ directories before actually do something.
+
+ So, let's use our own implementation.
+ """
+
+ if not valid_extensions:
+ self.extensions = (".c", ".h")
+ else:
+ self.extensions = valid_extensions
+
+ self.srctree = srctree
+
+ def _parse_dir(self, dirname):
+ """Internal function to parse files recursively"""
+
+ with os.scandir(dirname) as obj:
+ for entry in obj:
+ name = os.path.join(dirname, entry.name)
+
+ if entry.is_dir():
+ yield from self._parse_dir(name)
+
+ if not entry.is_file():
+ continue
+
+ basename = os.path.basename(name)
+
+ if not basename.endswith(self.extensions):
+ continue
+
+ yield name
+
+ def parse_files(self, file_list, file_not_found_cb):
+ """
+ Define an interator to parse all source files from file_list,
+ handling directories if any
+ """
+
+ if not file_list:
+ return
+
+ for fname in file_list:
+ if self.srctree:
+ f = os.path.join(self.srctree, fname)
+ else:
+ f = fname
+
+ if os.path.isdir(f):
+ yield from self._parse_dir(f)
+ elif os.path.isfile(f):
+ yield f
+ elif file_not_found_cb:
+ file_not_found_cb(fname)
+
+
+class KernelFiles():
+ """
+ Parse kernel-doc tags on multiple kernel source files.
+
+ There are two type of parsers defined here:
+ - self.parse_file(): parses both kernel-doc markups and
+ EXPORT_SYMBOL* macros;
+ - self.process_export_file(): parses only EXPORT_SYMBOL* macros.
+ """
+
+ def warning(self, msg):
+ """Ancillary routine to output a warning and increment error count"""
+
+ self.config.log.warning(msg)
+ self.errors += 1
+
+ def error(self, msg):
+ """Ancillary routine to output an error and increment error count"""
+
+ self.config.log.error(msg)
+ self.errors += 1
+
+ def parse_file(self, fname):
+ """
+ Parse a single Kernel source.
+ """
+
+ # Prevent parsing the same file twice if results are cached
+ if fname in self.files:
+ return
+
+ doc = KernelDoc(self.config, fname)
+ export_table, entries = doc.parse_kdoc()
+
+ self.export_table[fname] = export_table
+
+ self.files.add(fname)
+ self.export_files.add(fname) # parse_kdoc() already check exports
+
+ self.results[fname] = entries
+
+ def process_export_file(self, fname):
+ """
+ Parses EXPORT_SYMBOL* macros from a single Kernel source file.
+ """
+
+ # Prevent parsing the same file twice if results are cached
+ if fname in self.export_files:
+ return
+
+ doc = KernelDoc(self.config, fname)
+ export_table = doc.parse_export()
+
+ if not export_table:
+ self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}")
+ export_table = set()
+
+ self.export_table[fname] = export_table
+ self.export_files.add(fname)
+
+ def file_not_found_cb(self, fname):
+ """
+ Callback to warn if a file was not found.
+ """
+
+ self.error(f"Cannot find file {fname}")
+
+ def __init__(self, verbose=False, out_style=None,
+ werror=False, wreturn=False, wshort_desc=False,
+ wcontents_before_sections=False,
+ logger=None):
+ """
+ Initialize startup variables and parse all files
+ """
+
+ if not verbose:
+ verbose = bool(os.environ.get("KBUILD_VERBOSE", 0))
+
+ if out_style is None:
+ out_style = OutputFormat()
+
+ if not werror:
+ kcflags = os.environ.get("KCFLAGS", None)
+ if kcflags:
+ match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags)
+ if match:
+ werror = True
+
+ # reading this variable is for backwards compat just in case
+ # someone was calling it with the variable from outside the
+ # kernel's build system
+ kdoc_werror = os.environ.get("KDOC_WERROR", None)
+ if kdoc_werror:
+ werror = kdoc_werror
+
+ # Some variables are global to the parser logic as a whole as they are
+ # used to send control configuration to KernelDoc class. As such,
+ # those variables are read-only inside the KernelDoc.
+ self.config = argparse.Namespace
+
+ self.config.verbose = verbose
+ self.config.werror = werror
+ self.config.wreturn = wreturn
+ self.config.wshort_desc = wshort_desc
+ self.config.wcontents_before_sections = wcontents_before_sections
+
+ if not logger:
+ self.config.log = logging.getLogger("kernel-doc")
+ else:
+ self.config.log = logger
+
+ self.config.warning = self.warning
+
+ self.config.src_tree = os.environ.get("SRCTREE", None)
+
+ # Initialize variables that are internal to KernelFiles
+
+ self.out_style = out_style
+
+ self.errors = 0
+ self.results = {}
+
+ self.files = set()
+ self.export_files = set()
+ self.export_table = {}
+
+ def parse(self, file_list, export_file=None):
+ """
+ Parse all files
+ """
+
+ glob = GlobSourceFiles(srctree=self.config.src_tree)
+
+ for fname in glob.parse_files(file_list, self.file_not_found_cb):
+ self.parse_file(fname)
+
+ for fname in glob.parse_files(export_file, self.file_not_found_cb):
+ self.process_export_file(fname)
+
+ def out_msg(self, fname, name, arg):
+ """
+ Return output messages from a file name using the output style
+ filtering.
+
+ If output type was not handled by the syler, return None.
+ """
+
+ # NOTE: we can add rules here to filter out unwanted parts,
+ # although OutputFormat.msg already does that.
+
+ return self.out_style.msg(fname, name, arg)
+
+ def msg(self, enable_lineno=False, export=False, internal=False,
+ symbol=None, nosymbol=None, no_doc_sections=False,
+ filenames=None, export_file=None):
+ """
+ Interacts over the kernel-doc results and output messages,
+ returning kernel-doc markups on each interaction
+ """
+
+ self.out_style.set_config(self.config)
+
+ if not filenames:
+ filenames = sorted(self.results.keys())
+
+ glob = GlobSourceFiles(srctree=self.config.src_tree)
+
+ for fname in filenames:
+ function_table = set()
+
+ if internal or export:
+ if not export_file:
+ export_file = [fname]
+
+ for f in glob.parse_files(export_file, self.file_not_found_cb):
+ function_table |= self.export_table[f]
+
+ if symbol:
+ for s in symbol:
+ function_table.add(s)
+
+ self.out_style.set_filter(export, internal, symbol, nosymbol,
+ function_table, enable_lineno,
+ no_doc_sections)
+
+ msg = ""
+ if fname not in self.results:
+ self.config.log.warning("No kernel-doc for file %s", fname)
+ continue
+
+ for name, arg in self.results[fname]:
+ m = self.out_msg(fname, name, arg)
+
+ if m is None:
+ ln = arg.get("ln", 0)
+ dtype = arg.get('type', "")
+
+ self.config.log.warning("%s:%d Can't handle %s",
+ fname, ln, dtype)
+ else:
+ msg += m
+
+ if msg:
+ yield fname, msg
diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py
new file mode 100644
index 000000000000..86102e628d91
--- /dev/null
+++ b/scripts/lib/kdoc/kdoc_output.py
@@ -0,0 +1,793 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917
+
+"""
+Implement output filters to print kernel-doc documentation.
+
+The implementation uses a virtual base class (OutputFormat) which
+contains a dispatches to virtual methods, and some code to filter
+out output messages.
+
+The actual implementation is done on one separate class per each type
+of output. Currently, there are output classes for ReST and man/troff.
+"""
+
+import os
+import re
+from datetime import datetime
+
+from kdoc_parser import KernelDoc, type_param
+from kdoc_re import KernRe
+
+
+function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False)
+
+# match expressions used to find embedded type information
+type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False)
+type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False)
+type_func = KernRe(r"(\w+)\(\)", cache=False)
+type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
+
+# Special RST handling for func ptr params
+type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False)
+
+# Special RST handling for structs with func ptr params
+type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False)
+
+type_env = KernRe(r"(\$\w+)", cache=False)
+type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False)
+type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False)
+type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False)
+type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False)
+type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False)
+type_fallback = KernRe(r"\&([_\w]+)", cache=False)
+type_member_func = type_member + KernRe(r"\(\)", cache=False)
+
+
+class OutputFormat:
+ """
+ Base class for OutputFormat. If used as-is, it means that only
+ warnings will be displayed.
+ """
+
+ # output mode.
+ OUTPUT_ALL = 0 # output all symbols and doc sections
+ OUTPUT_INCLUDE = 1 # output only specified symbols
+ OUTPUT_EXPORTED = 2 # output exported symbols
+ OUTPUT_INTERNAL = 3 # output non-exported symbols
+
+ # Virtual member to be overriden at the inherited classes
+ highlights = []
+
+ def __init__(self):
+ """Declare internal vars and set mode to OUTPUT_ALL"""
+
+ self.out_mode = self.OUTPUT_ALL
+ self.enable_lineno = None
+ self.nosymbol = {}
+ self.symbol = None
+ self.function_table = None
+ self.config = None
+ self.no_doc_sections = False
+
+ self.data = ""
+
+ def set_config(self, config):
+ """
+ Setup global config variables used by both parser and output.
+ """
+
+ self.config = config
+
+ def set_filter(self, export, internal, symbol, nosymbol, function_table,
+ enable_lineno, no_doc_sections):
+ """
+ Initialize filter variables according with the requested mode.
+
+ Only one choice is valid between export, internal and symbol.
+
+ The nosymbol filter can be used on all modes.
+ """
+
+ self.enable_lineno = enable_lineno
+ self.no_doc_sections = no_doc_sections
+ self.function_table = function_table
+
+ if symbol:
+ self.out_mode = self.OUTPUT_INCLUDE
+ elif export:
+ self.out_mode = self.OUTPUT_EXPORTED
+ elif internal:
+ self.out_mode = self.OUTPUT_INTERNAL
+ else:
+ self.out_mode = self.OUTPUT_ALL
+
+ if nosymbol:
+ self.nosymbol = set(nosymbol)
+
+
+ def highlight_block(self, block):
+ """
+ Apply the RST highlights to a sub-block of text.
+ """
+
+ for r, sub in self.highlights:
+ block = r.sub(sub, block)
+
+ return block
+
+ def out_warnings(self, args):
+ """
+ Output warnings for identifiers that will be displayed.
+ """
+
+ warnings = args.get('warnings', [])
+
+ for log_msg in warnings:
+ self.config.warning(log_msg)
+
+ def check_doc(self, name, args):
+ """Check if DOC should be output"""
+
+ if self.no_doc_sections:
+ return False
+
+ if name in self.nosymbol:
+ return False
+
+ if self.out_mode == self.OUTPUT_ALL:
+ self.out_warnings(args)
+ return True
+
+ if self.out_mode == self.OUTPUT_INCLUDE:
+ if name in self.function_table:
+ self.out_warnings(args)
+ return True
+
+ return False
+
+ def check_declaration(self, dtype, name, args):
+ """
+ Checks if a declaration should be output or not based on the
+ filtering criteria.
+ """
+
+ if name in self.nosymbol:
+ return False
+
+ if self.out_mode == self.OUTPUT_ALL:
+ self.out_warnings(args)
+ return True
+
+ if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]:
+ if name in self.function_table:
+ return True
+
+ if self.out_mode == self.OUTPUT_INTERNAL:
+ if dtype != "function":
+ self.out_warnings(args)
+ return True
+
+ if name not in self.function_table:
+ self.out_warnings(args)
+ return True
+
+ return False
+
+ def msg(self, fname, name, args):
+ """
+ Handles a single entry from kernel-doc parser
+ """
+
+ self.data = ""
+
+ dtype = args.get('type', "")
+
+ if dtype == "doc":
+ self.out_doc(fname, name, args)
+ return self.data
+
+ if not self.check_declaration(dtype, name, args):
+ return self.data
+
+ if dtype == "function":
+ self.out_function(fname, name, args)
+ return self.data
+
+ if dtype == "enum":
+ self.out_enum(fname, name, args)
+ return self.data
+
+ if dtype == "typedef":
+ self.out_typedef(fname, name, args)
+ return self.data
+
+ if dtype in ["struct", "union"]:
+ self.out_struct(fname, name, args)
+ return self.data
+
+ # Warn if some type requires an output logic
+ self.config.log.warning("doesn't now how to output '%s' block",
+ dtype)
+
+ return None
+
+ # Virtual methods to be overridden by inherited classes
+ # At the base class, those do nothing.
+ def out_doc(self, fname, name, args):
+ """Outputs a DOC block"""
+
+ def out_function(self, fname, name, args):
+ """Outputs a function"""
+
+ def out_enum(self, fname, name, args):
+ """Outputs an enum"""
+
+ def out_typedef(self, fname, name, args):
+ """Outputs a typedef"""
+
+ def out_struct(self, fname, name, args):
+ """Outputs a struct"""
+
+
+class RestFormat(OutputFormat):
+ """Consts and functions used by ReST output"""
+
+ highlights = [
+ (type_constant, r"``\1``"),
+ (type_constant2, r"``\1``"),
+
+ # Note: need to escape () to avoid func matching later
+ (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"),
+ (type_member, r":c:type:`\1\2\3 <\1>`"),
+ (type_fp_param, r"**\1\\(\\)**"),
+ (type_fp_param2, r"**\1\\(\\)**"),
+ (type_func, r"\1()"),
+ (type_enum, r":c:type:`\1 <\2>`"),
+ (type_struct, r":c:type:`\1 <\2>`"),
+ (type_typedef, r":c:type:`\1 <\2>`"),
+ (type_union, r":c:type:`\1 <\2>`"),
+
+ # in rst this can refer to any type
+ (type_fallback, r":c:type:`\1`"),
+ (type_param_ref, r"**\1\2**")
+ ]
+ blankline = "\n"
+
+ sphinx_literal = KernRe(r'^[^.].*::$', cache=False)
+ sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False)
+
+ def __init__(self):
+ """
+ Creates class variables.
+
+ Not really mandatory, but it is a good coding style and makes
+ pylint happy.
+ """
+
+ super().__init__()
+ self.lineprefix = ""
+
+ def print_lineno(self, ln):
+ """Outputs a line number"""
+
+ if self.enable_lineno and ln is not None:
+ ln += 1
+ self.data += f".. LINENO {ln}\n"
+
+ def output_highlight(self, args):
+ """
+ Outputs a C symbol that may require being converted to ReST using
+ the self.highlights variable
+ """
+
+ input_text = args
+ output = ""
+ in_literal = False
+ litprefix = ""
+ block = ""
+
+ for line in input_text.strip("\n").split("\n"):
+
+ # If we're in a literal block, see if we should drop out of it.
+ # Otherwise, pass the line straight through unmunged.
+ if in_literal:
+ if line.strip(): # If the line is not blank
+ # If this is the first non-blank line in a literal block,
+ # figure out the proper indent.
+ if not litprefix:
+ r = KernRe(r'^(\s*)')
+ if r.match(line):
+ litprefix = '^' + r.group(1)
+ else:
+ litprefix = ""
+
+ output += line + "\n"
+ elif not KernRe(litprefix).match(line):
+ in_literal = False
+ else:
+ output += line + "\n"
+ else:
+ output += line + "\n"
+
+ # Not in a literal block (or just dropped out)
+ if not in_literal:
+ block += line + "\n"
+ if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line):
+ in_literal = True
+ litprefix = ""
+ output += self.highlight_block(block)
+ block = ""
+
+ # Handle any remaining block
+ if block:
+ output += self.highlight_block(block)
+
+ # Print the output with the line prefix
+ for line in output.strip("\n").split("\n"):
+ self.data += self.lineprefix + line + "\n"
+
+ def out_section(self, args, out_docblock=False):
+ """
+ Outputs a block section.
+
+ This could use some work; it's used to output the DOC: sections, and
+ starts by putting out the name of the doc section itself, but that
+ tends to duplicate a header already in the template file.
+ """
+
+ sectionlist = args.get('sectionlist', [])
+ sections = args.get('sections', {})
+ section_start_lines = args.get('section_start_lines', {})
+
+ for section in sectionlist:
+ # Skip sections that are in the nosymbol_table
+ if section in self.nosymbol:
+ continue
+
+ if out_docblock:
+ if not self.out_mode == self.OUTPUT_INCLUDE:
+ self.data += f".. _{section}:\n\n"
+ self.data += f'{self.lineprefix}**{section}**\n\n'
+ else:
+ self.data += f'{self.lineprefix}**{section}**\n\n'
+
+ self.print_lineno(section_start_lines.get(section, 0))
+ self.output_highlight(sections[section])
+ self.data += "\n"
+ self.data += "\n"
+
+ def out_doc(self, fname, name, args):
+ if not self.check_doc(name, args):
+ return
+ self.out_section(args, out_docblock=True)
+
+ def out_function(self, fname, name, args):
+
+ oldprefix = self.lineprefix
+ signature = ""
+
+ func_macro = args.get('func_macro', False)
+ if func_macro:
+ signature = args['function']
+ else:
+ if args.get('functiontype'):
+ signature = args['functiontype'] + " "
+ signature += args['function'] + " ("
+
+ parameterlist = args.get('parameterlist', [])
+ parameterdescs = args.get('parameterdescs', {})
+ parameterdesc_start_lines = args.get('parameterdesc_start_lines', {})
+
+ ln = args.get('declaration_start_line', 0)
+
+ count = 0
+ for parameter in parameterlist:
+ if count != 0:
+ signature += ", "
+ count += 1
+ dtype = args['parametertypes'].get(parameter, "")
+
+ if function_pointer.search(dtype):
+ signature += function_pointer.group(1) + parameter + function_pointer.group(3)
+ else:
+ signature += dtype
+
+ if not func_macro:
+ signature += ")"
+
+ self.print_lineno(ln)
+ if args.get('typedef') or not args.get('functiontype'):
+ self.data += f".. c:macro:: {args['function']}\n\n"
+
+ if args.get('typedef'):
+ self.data += " **Typedef**: "
+ self.lineprefix = ""
+ self.output_highlight(args.get('purpose', ""))
+ self.data += "\n\n**Syntax**\n\n"
+ self.data += f" ``{signature}``\n\n"
+ else:
+ self.data += f"``{signature}``\n\n"
+ else:
+ self.data += f".. c:function:: {signature}\n\n"
+
+ if not args.get('typedef'):
+ self.print_lineno(ln)
+ self.lineprefix = " "
+ self.output_highlight(args.get('purpose', ""))
+ self.data += "\n"
+
+ # Put descriptive text into a container (HTML <div>) to help set
+ # function prototypes apart
+ self.lineprefix = " "
+
+ if parameterlist:
+ self.data += ".. container:: kernelindent\n\n"
+ self.data += f"{self.lineprefix}**Parameters**\n\n"
+
+ for parameter in parameterlist:
+ parameter_name = KernRe(r'\[.*').sub('', parameter)
+ dtype = args['parametertypes'].get(parameter, "")
+
+ if dtype:
+ self.data += f"{self.lineprefix}``{dtype}``\n"
+ else:
+ self.data += f"{self.lineprefix}``{parameter}``\n"
+
+ self.print_lineno(parameterdesc_start_lines.get(parameter_name, 0))
+
+ self.lineprefix = " "
+ if parameter_name in parameterdescs and \
+ parameterdescs[parameter_name] != KernelDoc.undescribed:
+
+ self.output_highlight(parameterdescs[parameter_name])
+ self.data += "\n"
+ else:
+ self.data += f"{self.lineprefix}*undescribed*\n\n"
+ self.lineprefix = " "
+
+ self.out_section(args)
+ self.lineprefix = oldprefix
+
+ def out_enum(self, fname, name, args):
+
+ oldprefix = self.lineprefix
+ name = args.get('enum', '')
+ parameterlist = args.get('parameterlist', [])
+ parameterdescs = args.get('parameterdescs', {})
+ ln = args.get('declaration_start_line', 0)
+
+ self.data += f"\n\n.. c:enum:: {name}\n\n"
+
+ self.print_lineno(ln)
+ self.lineprefix = " "
+ self.output_highlight(args.get('purpose', ''))
+ self.data += "\n"
+
+ self.data += ".. container:: kernelindent\n\n"
+ outer = self.lineprefix + " "
+ self.lineprefix = outer + " "
+ self.data += f"{outer}**Constants**\n\n"
+
+ for parameter in parameterlist:
+ self.data += f"{outer}``{parameter}``\n"
+
+ if parameterdescs.get(parameter, '') != KernelDoc.undescribed:
+ self.output_highlight(parameterdescs[parameter])
+ else:
+ self.data += f"{self.lineprefix}*undescribed*\n\n"
+ self.data += "\n"
+
+ self.lineprefix = oldprefix
+ self.out_section(args)
+
+ def out_typedef(self, fname, name, args):
+
+ oldprefix = self.lineprefix
+ name = args.get('typedef', '')
+ ln = args.get('declaration_start_line', 0)
+
+ self.data += f"\n\n.. c:type:: {name}\n\n"
+
+ self.print_lineno(ln)
+ self.lineprefix = " "
+
+ self.output_highlight(args.get('purpose', ''))
+
+ self.data += "\n"
+
+ self.lineprefix = oldprefix
+ self.out_section(args)
+
+ def out_struct(self, fname, name, args):
+
+ name = args.get('struct', "")
+ purpose = args.get('purpose', "")
+ declaration = args.get('definition', "")
+ dtype = args.get('type', "struct")
+ ln = args.get('declaration_start_line', 0)
+
+ parameterlist = args.get('parameterlist', [])
+ parameterdescs = args.get('parameterdescs', {})
+ parameterdesc_start_lines = args.get('parameterdesc_start_lines', {})
+
+ self.data += f"\n\n.. c:{dtype}:: {name}\n\n"
+
+ self.print_lineno(ln)
+
+ oldprefix = self.lineprefix
+ self.lineprefix += " "
+
+ self.output_highlight(purpose)
+ self.data += "\n"
+
+ self.data += ".. container:: kernelindent\n\n"
+ self.data += f"{self.lineprefix}**Definition**::\n\n"
+
+ self.lineprefix = self.lineprefix + " "
+
+ declaration = declaration.replace("\t", self.lineprefix)
+
+ self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n"
+ self.data += f"{declaration}{self.lineprefix}" + "};\n\n"
+
+ self.lineprefix = " "
+ self.data += f"{self.lineprefix}**Members**\n\n"
+ for parameter in parameterlist:
+ if not parameter or parameter.startswith("#"):
+ continue
+
+ parameter_name = parameter.split("[", maxsplit=1)[0]
+
+ if parameterdescs.get(parameter_name) == KernelDoc.undescribed:
+ continue
+
+ self.print_lineno(parameterdesc_start_lines.get(parameter_name, 0))
+
+ self.data += f"{self.lineprefix}``{parameter}``\n"
+
+ self.lineprefix = " "
+ self.output_highlight(parameterdescs[parameter_name])
+ self.lineprefix = " "
+
+ self.data += "\n"
+
+ self.data += "\n"
+
+ self.lineprefix = oldprefix
+ self.out_section(args)
+
+
+class ManFormat(OutputFormat):
+ """Consts and functions used by man pages output"""
+
+ highlights = (
+ (type_constant, r"\1"),
+ (type_constant2, r"\1"),
+ (type_func, r"\\fB\1\\fP"),
+ (type_enum, r"\\fI\1\\fP"),
+ (type_struct, r"\\fI\1\\fP"),
+ (type_typedef, r"\\fI\1\\fP"),
+ (type_union, r"\\fI\1\\fP"),
+ (type_param, r"\\fI\1\\fP"),
+ (type_param_ref, r"\\fI\1\2\\fP"),
+ (type_member, r"\\fI\1\2\3\\fP"),
+ (type_fallback, r"\\fI\1\\fP")
+ )
+ blankline = ""
+
+ date_formats = [
+ "%a %b %d %H:%M:%S %Z %Y",
+ "%a %b %d %H:%M:%S %Y",
+ "%Y-%m-%d",
+ "%b %d %Y",
+ "%B %d %Y",
+ "%m %d %Y",
+ ]
+
+ def __init__(self, modulename):
+ """
+ Creates class variables.
+
+ Not really mandatory, but it is a good coding style and makes
+ pylint happy.
+ """
+
+ super().__init__()
+ self.modulename = modulename
+
+ dt = None
+ tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP")
+ if tstamp:
+ for fmt in self.date_formats:
+ try:
+ dt = datetime.strptime(tstamp, fmt)
+ break
+ except ValueError:
+ pass
+
+ if not dt:
+ dt = datetime.now()
+
+ self.man_date = dt.strftime("%B %Y")
+
+ def output_highlight(self, block):
+ """
+ Outputs a C symbol that may require being highlighted with
+ self.highlights variable using troff syntax
+ """
+
+ contents = self.highlight_block(block)
+
+ if isinstance(contents, list):
+ contents = "\n".join(contents)
+
+ for line in contents.strip("\n").split("\n"):
+ line = KernRe(r"^\s*").sub("", line)
+ if not line:
+ continue
+
+ if line[0] == ".":
+ self.data += "\\&" + line + "\n"
+ else:
+ self.data += line + "\n"
+
+ def out_doc(self, fname, name, args):
+ sectionlist = args.get('sectionlist', [])
+ sections = args.get('sections', {})
+
+ if not self.check_doc(name, args):
+ return
+
+ self.data += f'.TH "{self.modulename}" 9 "{self.modulename}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ for section in sectionlist:
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(sections.get(section))
+
+ def out_function(self, fname, name, args):
+ """output function in man"""
+
+ parameterlist = args.get('parameterlist', [])
+ parameterdescs = args.get('parameterdescs', {})
+ sectionlist = args.get('sectionlist', [])
+ sections = args.get('sections', {})
+
+ self.data += f'.TH "{args["function"]}" 9 "{args["function"]}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"{args['function']} \\- {args['purpose']}\n"
+
+ self.data += ".SH SYNOPSIS\n"
+ if args.get('functiontype', ''):
+ self.data += f'.B "{args["functiontype"]}" {args["function"]}' + "\n"
+ else:
+ self.data += f'.B "{args["function"]}' + "\n"
+
+ count = 0
+ parenth = "("
+ post = ","
+
+ for parameter in parameterlist:
+ if count == len(parameterlist) - 1:
+ post = ");"
+
+ dtype = args['parametertypes'].get(parameter, "")
+ if function_pointer.match(dtype):
+ # Pointer-to-function
+ self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n"
+ else:
+ dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype)
+
+ self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n"
+ count += 1
+ parenth = ""
+
+ if parameterlist:
+ self.data += ".SH ARGUMENTS\n"
+
+ for parameter in parameterlist:
+ parameter_name = re.sub(r'\[.*', '', parameter)
+
+ self.data += f'.IP "{parameter}" 12' + "\n"
+ self.output_highlight(parameterdescs.get(parameter_name, ""))
+
+ for section in sectionlist:
+ self.data += f'.SH "{section.upper()}"' + "\n"
+ self.output_highlight(sections[section])
+
+ def out_enum(self, fname, name, args):
+
+ name = args.get('enum', '')
+ parameterlist = args.get('parameterlist', [])
+ sectionlist = args.get('sectionlist', [])
+ sections = args.get('sections', {})
+
+ self.data += f'.TH "{self.modulename}" 9 "enum {args["enum"]}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"enum {args['enum']} \\- {args['purpose']}\n"
+
+ self.data += ".SH SYNOPSIS\n"
+ self.data += f"enum {args['enum']}" + " {\n"
+
+ count = 0
+ for parameter in parameterlist:
+ self.data += f'.br\n.BI " {parameter}"' + "\n"
+ if count == len(parameterlist) - 1:
+ self.data += "\n};\n"
+ else:
+ self.data += ", \n.br\n"
+
+ count += 1
+
+ self.data += ".SH Constants\n"
+
+ for parameter in parameterlist:
+ parameter_name = KernRe(r'\[.*').sub('', parameter)
+ self.data += f'.IP "{parameter}" 12' + "\n"
+ self.output_highlight(args['parameterdescs'].get(parameter_name, ""))
+
+ for section in sectionlist:
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(sections[section])
+
+ def out_typedef(self, fname, name, args):
+ module = self.modulename
+ typedef = args.get('typedef')
+ purpose = args.get('purpose')
+ sectionlist = args.get('sectionlist', [])
+ sections = args.get('sections', {})
+
+ self.data += f'.TH "{module}" 9 "{typedef}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"typedef {typedef} \\- {purpose}\n"
+
+ for section in sectionlist:
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(sections.get(section))
+
+ def out_struct(self, fname, name, args):
+ module = self.modulename
+ struct_type = args.get('type')
+ struct_name = args.get('struct')
+ purpose = args.get('purpose')
+ definition = args.get('definition')
+ sectionlist = args.get('sectionlist', [])
+ parameterlist = args.get('parameterlist', [])
+ sections = args.get('sections', {})
+ parameterdescs = args.get('parameterdescs', {})
+
+ self.data += f'.TH "{module}" 9 "{struct_type} {struct_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"{struct_type} {struct_name} \\- {purpose}\n"
+
+ # Replace tabs with two spaces and handle newlines
+ declaration = definition.replace("\t", " ")
+ declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration)
+
+ self.data += ".SH SYNOPSIS\n"
+ self.data += f"{struct_type} {struct_name} " + "{" + "\n.br\n"
+ self.data += f'.BI "{declaration}\n' + "};\n.br\n\n"
+
+ self.data += ".SH Members\n"
+ for parameter in parameterlist:
+ if parameter.startswith("#"):
+ continue
+
+ parameter_name = re.sub(r"\[.*", "", parameter)
+
+ if parameterdescs.get(parameter_name) == KernelDoc.undescribed:
+ continue
+
+ self.data += f'.IP "{parameter}" 12' + "\n"
+ self.output_highlight(parameterdescs.get(parameter_name))
+
+ for section in sectionlist:
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(sections.get(section))
diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py
new file mode 100644
index 000000000000..062453eefc7a
--- /dev/null
+++ b/scripts/lib/kdoc/kdoc_parser.py
@@ -0,0 +1,1745 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
+
+"""
+kdoc_parser
+===========
+
+Read a C language source or header FILE and extract embedded
+documentation comments
+"""
+
+import re
+from pprint import pformat
+
+from kdoc_re import NestedMatch, KernRe
+
+
+#
+# Regular expressions used to parse kernel-doc markups at KernelDoc class.
+#
+# Let's declare them in lowercase outside any class to make easier to
+# convert from the python script.
+#
+# As those are evaluated at the beginning, no need to cache them
+#
+
+# Allow whitespace at end of comment start.
+doc_start = KernRe(r'^/\*\*\s*$', cache=False)
+
+doc_end = KernRe(r'\*/', cache=False)
+doc_com = KernRe(r'\s*\*\s*', cache=False)
+doc_com_body = KernRe(r'\s*\* ?', cache=False)
+doc_decl = doc_com + KernRe(r'(\w+)', cache=False)
+
+# @params and a strictly limited set of supported section names
+# Specifically:
+# Match @word:
+# @...:
+# @{section-name}:
+# while trying to not match literal block starts like "example::"
+#
+doc_sect = doc_com + \
+ KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$',
+ flags=re.I, cache=False)
+
+doc_content = doc_com_body + KernRe(r'(.*)', cache=False)
+doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
+doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
+doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
+doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
+doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False)
+attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)",
+ flags=re.I | re.S, cache=False)
+
+export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
+export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
+
+type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
+
+class state:
+ """
+ State machine enums
+ """
+
+ # Parser states
+ NORMAL = 0 # normal code
+ NAME = 1 # looking for function name
+ BODY_MAYBE = 2 # body - or maybe more description
+ BODY = 3 # the body of the comment
+ BODY_WITH_BLANK_LINE = 4 # the body which has a blank line
+ PROTO = 5 # scanning prototype
+ DOCBLOCK = 6 # documentation block
+ INLINE = 7 # gathering doc outside main block
+
+ name = [
+ "NORMAL",
+ "NAME",
+ "BODY_MAYBE",
+ "BODY",
+ "BODY_WITH_BLANK_LINE",
+ "PROTO",
+ "DOCBLOCK",
+ "INLINE",
+ ]
+
+ # Inline documentation state
+ INLINE_NA = 0 # not applicable ($state != INLINE)
+ INLINE_NAME = 1 # looking for member name (@foo:)
+ INLINE_TEXT = 2 # looking for member documentation
+ INLINE_END = 3 # done
+ INLINE_ERROR = 4 # error - Comment without header was found.
+ # Spit a warning as it's not
+ # proper kernel-doc and ignore the rest.
+
+ inline_name = [
+ "",
+ "_NAME",
+ "_TEXT",
+ "_END",
+ "_ERROR",
+ ]
+
+SECTION_DEFAULT = "Description" # default section
+
+class KernelEntry:
+
+ def __init__(self, config, ln):
+ self.config = config
+
+ self.contents = ""
+ self.function = ""
+ self.sectcheck = ""
+ self.struct_actual = ""
+ self.prototype = ""
+
+ self.warnings = []
+
+ self.parameterlist = []
+ self.parameterdescs = {}
+ self.parametertypes = {}
+ self.parameterdesc_start_lines = {}
+
+ self.section_start_lines = {}
+ self.sectionlist = []
+ self.sections = {}
+
+ self.anon_struct_union = False
+
+ self.leading_space = None
+
+ # State flags
+ self.brcount = 0
+
+ self.in_doc_sect = False
+ self.declaration_start_line = ln + 1
+
+ # TODO: rename to emit_message after removal of kernel-doc.pl
+ def emit_msg(self, log_msg, warning=True):
+ """Emit a message"""
+
+ if not warning:
+ self.config.log.info(log_msg)
+ return
+
+ # Delegate warning output to output logic, as this way it
+ # will report warnings/info only for symbols that are output
+
+ self.warnings.append(log_msg)
+ return
+
+ def dump_section(self, start_new=True):
+ """
+ Dumps section contents to arrays/hashes intended for that purpose.
+ """
+
+ name = self.section
+ contents = self.contents
+
+ if type_param.match(name):
+ name = type_param.group(1)
+
+ self.parameterdescs[name] = contents
+ self.parameterdesc_start_lines[name] = self.new_start_line
+
+ self.sectcheck += name + " "
+ self.new_start_line = 0
+
+ elif name == "@...":
+ name = "..."
+ self.parameterdescs[name] = contents
+ self.sectcheck += name + " "
+ self.parameterdesc_start_lines[name] = self.new_start_line
+ self.new_start_line = 0
+
+ else:
+ if name in self.sections and self.sections[name] != "":
+ # Only warn on user-specified duplicate section names
+ if name != SECTION_DEFAULT:
+ self.emit_msg(self.new_start_line,
+ f"duplicate section name '{name}'\n")
+ self.sections[name] += contents
+ else:
+ self.sections[name] = contents
+ self.sectionlist.append(name)
+ self.section_start_lines[name] = self.new_start_line
+ self.new_start_line = 0
+
+# self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
+
+ if start_new:
+ self.section = SECTION_DEFAULT
+ self.contents = ""
+
+
+class KernelDoc:
+ """
+ Read a C language source or header FILE and extract embedded
+ documentation comments.
+ """
+
+ # Section names
+
+ section_intro = "Introduction"
+ section_context = "Context"
+ section_return = "Return"
+
+ undescribed = "-- undescribed --"
+
+ def __init__(self, config, fname):
+ """Initialize internal variables"""
+
+ self.fname = fname
+ self.config = config
+
+ # Initial state for the state machines
+ self.state = state.NORMAL
+ self.inline_doc_state = state.INLINE_NA
+
+ # Store entry currently being processed
+ self.entry = None
+
+ # Place all potential outputs into an array
+ self.entries = []
+
+ def emit_msg(self, ln, msg, warning=True):
+ """Emit a message"""
+
+ log_msg = f"{self.fname}:{ln} {msg}"
+
+ if self.entry:
+ self.entry.emit_msg(log_msg, warning)
+ return
+
+ if warning:
+ self.config.log.warning(log_msg)
+ else:
+ self.config.log.info(log_msg)
+
+ def dump_section(self, start_new=True):
+ """
+ Dumps section contents to arrays/hashes intended for that purpose.
+ """
+
+ if self.entry:
+ self.entry.dump_section(start_new)
+
+ # TODO: rename it to store_declaration after removal of kernel-doc.pl
+ def output_declaration(self, dtype, name, **args):
+ """
+ Stores the entry into an entry array.
+
+ The actual output and output filters will be handled elsewhere
+ """
+
+ # The implementation here is different than the original kernel-doc:
+ # instead of checking for output filters or actually output anything,
+ # it just stores the declaration content at self.entries, as the
+ # output will happen on a separate class.
+ #
+ # For now, we're keeping the same name of the function just to make
+ # easier to compare the source code of both scripts
+
+ args["declaration_start_line"] = self.entry.declaration_start_line
+ args["type"] = dtype
+ args["warnings"] = self.entry.warnings
+
+ # TODO: use colletions.OrderedDict to remove sectionlist
+
+ sections = args.get('sections', {})
+ sectionlist = args.get('sectionlist', [])
+
+ # Drop empty sections
+ # TODO: improve empty sections logic to emit warnings
+ for section in ["Description", "Return"]:
+ if section in sectionlist:
+ if not sections[section].rstrip():
+ del sections[section]
+ sectionlist.remove(section)
+
+ self.entries.append((name, args))
+
+ self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
+
+ def reset_state(self, ln):
+ """
+ Ancillary routine to create a new entry. It initializes all
+ variables used by the state machine.
+ """
+
+ self.entry = KernelEntry(self.config, ln)
+
+ # State flags
+ self.state = state.NORMAL
+ self.inline_doc_state = state.INLINE_NA
+
+ def push_parameter(self, ln, decl_type, param, dtype,
+ org_arg, declaration_name):
+ """
+ Store parameters and their descriptions at self.entry.
+ """
+
+ if self.entry.anon_struct_union and dtype == "" and param == "}":
+ return # Ignore the ending }; from anonymous struct/union
+
+ self.entry.anon_struct_union = False
+
+ param = KernRe(r'[\[\)].*').sub('', param, count=1)
+
+ if dtype == "" and param.endswith("..."):
+ if KernRe(r'\w\.\.\.$').search(param):
+ # For named variable parameters of the form `x...`,
+ # remove the dots
+ param = param[:-3]
+ else:
+ # Handles unnamed variable parameters
+ param = "..."
+
+ if param not in self.entry.parameterdescs or \
+ not self.entry.parameterdescs[param]:
+
+ self.entry.parameterdescs[param] = "variable arguments"
+
+ elif dtype == "" and (not param or param == "void"):
+ param = "void"
+ self.entry.parameterdescs[param] = "no arguments"
+
+ elif dtype == "" and param in ["struct", "union"]:
+ # Handle unnamed (anonymous) union or struct
+ dtype = param
+ param = "{unnamed_" + param + "}"
+ self.entry.parameterdescs[param] = "anonymous\n"
+ self.entry.anon_struct_union = True
+
+ # Handle cache group enforcing variables: they do not need
+ # to be described in header files
+ elif "__cacheline_group" in param:
+ # Ignore __cacheline_group_begin and __cacheline_group_end
+ return
+
+ # Warn if parameter has no description
+ # (but ignore ones starting with # as these are not parameters
+ # but inline preprocessor statements)
+ if param not in self.entry.parameterdescs and not param.startswith("#"):
+ self.entry.parameterdescs[param] = self.undescribed
+
+ if "." not in param:
+ if decl_type == 'function':
+ dname = f"{decl_type} parameter"
+ else:
+ dname = f"{decl_type} member"
+
+ self.emit_msg(ln,
+ f"{dname} '{param}' not described in '{declaration_name}'")
+
+ # Strip spaces from param so that it is one continuous string on
+ # parameterlist. This fixes a problem where check_sections()
+ # cannot find a parameter like "addr[6 + 2]" because it actually
+ # appears as "addr[6", "+", "2]" on the parameter list.
+ # However, it's better to maintain the param string unchanged for
+ # output, so just weaken the string compare in check_sections()
+ # to ignore "[blah" in a parameter string.
+
+ self.entry.parameterlist.append(param)
+ org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
+ self.entry.parametertypes[param] = org_arg
+
+ def save_struct_actual(self, actual):
+ """
+ Strip all spaces from the actual param so that it looks like
+ one string item.
+ """
+
+ actual = KernRe(r'\s*').sub("", actual, count=1)
+
+ self.entry.struct_actual += actual + " "
+
+ def create_parameter_list(self, ln, decl_type, args,
+ splitter, declaration_name):
+ """
+ Creates a list of parameters, storing them at self.entry.
+ """
+
+ # temporarily replace all commas inside function pointer definition
+ arg_expr = KernRe(r'(\([^\),]+),')
+ while arg_expr.search(args):
+ args = arg_expr.sub(r"\1#", args)
+
+ for arg in args.split(splitter):
+ # Strip comments
+ arg = KernRe(r'\/\*.*\*\/').sub('', arg)
+
+ # Ignore argument attributes
+ arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
+
+ # Strip leading/trailing spaces
+ arg = arg.strip()
+ arg = KernRe(r'\s+').sub(' ', arg, count=1)
+
+ if arg.startswith('#'):
+ # Treat preprocessor directive as a typeless variable just to fill
+ # corresponding data structures "correctly". Catch it later in
+ # output_* subs.
+
+ # Treat preprocessor directive as a typeless variable
+ self.push_parameter(ln, decl_type, arg, "",
+ "", declaration_name)
+
+ elif KernRe(r'\(.+\)\s*\(').search(arg):
+ # Pointer-to-function
+
+ arg = arg.replace('#', ',')
+
+ r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)')
+ if r.match(arg):
+ param = r.group(1)
+ else:
+ self.emit_msg(ln, f"Invalid param: {arg}")
+ param = arg
+
+ dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg)
+ self.save_struct_actual(param)
+ self.push_parameter(ln, decl_type, param, dtype,
+ arg, declaration_name)
+
+ elif KernRe(r'\(.+\)\s*\[').search(arg):
+ # Array-of-pointers
+
+ arg = arg.replace('#', ',')
+ r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)')
+ if r.match(arg):
+ param = r.group(1)
+ else:
+ self.emit_msg(ln, f"Invalid param: {arg}")
+ param = arg
+
+ dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg)
+
+ self.save_struct_actual(param)
+ self.push_parameter(ln, decl_type, param, dtype,
+ arg, declaration_name)
+
+ elif arg:
+ arg = KernRe(r'\s*:\s*').sub(":", arg)
+ arg = KernRe(r'\s*\[').sub('[', arg)
+
+ args = KernRe(r'\s*,\s*').split(arg)
+ if args[0] and '*' in args[0]:
+ args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
+
+ first_arg = []
+ r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$')
+ if args[0] and r.match(args[0]):
+ args.pop(0)
+ first_arg.extend(r.group(1))
+ first_arg.append(r.group(2))
+ else:
+ first_arg = KernRe(r'\s+').split(args.pop(0))
+
+ args.insert(0, first_arg.pop())
+ dtype = ' '.join(first_arg)
+
+ for param in args:
+ if KernRe(r'^(\*+)\s*(.*)').match(param):
+ r = KernRe(r'^(\*+)\s*(.*)')
+ if not r.match(param):
+ self.emit_msg(ln, f"Invalid param: {param}")
+ continue
+
+ param = r.group(1)
+
+ self.save_struct_actual(r.group(2))
+ self.push_parameter(ln, decl_type, r.group(2),
+ f"{dtype} {r.group(1)}",
+ arg, declaration_name)
+
+ elif KernRe(r'(.*?):(\w+)').search(param):
+ r = KernRe(r'(.*?):(\w+)')
+ if not r.match(param):
+ self.emit_msg(ln, f"Invalid param: {param}")
+ continue
+
+ if dtype != "": # Skip unnamed bit-fields
+ self.save_struct_actual(r.group(1))
+ self.push_parameter(ln, decl_type, r.group(1),
+ f"{dtype}:{r.group(2)}",
+ arg, declaration_name)
+ else:
+ self.save_struct_actual(param)
+ self.push_parameter(ln, decl_type, param, dtype,
+ arg, declaration_name)
+
+ def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck):
+ """
+ Check for errors inside sections, emitting warnings if not found
+ parameters are described.
+ """
+
+ sects = sectcheck.split()
+ prms = prmscheck.split()
+ err = False
+
+ for sx in range(len(sects)): # pylint: disable=C0200
+ err = True
+ for px in range(len(prms)): # pylint: disable=C0200
+ prm_clean = prms[px]
+ prm_clean = KernRe(r'\[.*\]').sub('', prm_clean)
+ prm_clean = attribute.sub('', prm_clean)
+
+ # ignore array size in a parameter string;
+ # however, the original param string may contain
+ # spaces, e.g.: addr[6 + 2]
+ # and this appears in @prms as "addr[6" since the
+ # parameter list is split at spaces;
+ # hence just ignore "[..." for the sections check;
+ prm_clean = KernRe(r'\[.*').sub('', prm_clean)
+
+ if prm_clean == sects[sx]:
+ err = False
+ break
+
+ if err:
+ if decl_type == 'function':
+ dname = f"{decl_type} parameter"
+ else:
+ dname = f"{decl_type} member"
+
+ self.emit_msg(ln,
+ f"Excess {dname} '{sects[sx]}' description in '{decl_name}'")
+
+ def check_return_section(self, ln, declaration_name, return_type):
+ """
+ If the function doesn't return void, warns about the lack of a
+ return description.
+ """
+
+ if not self.config.wreturn:
+ return
+
+ # Ignore an empty return type (It's a macro)
+ # Ignore functions with a "void" return type (but not "void *")
+ if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
+ return
+
+ if not self.entry.sections.get("Return", None):
+ self.emit_msg(ln,
+ f"No description found for return value of '{declaration_name}'")
+
+ def dump_struct(self, ln, proto):
+ """
+ Store an entry for an struct or union
+ """
+
+ type_pattern = r'(struct|union)'
+
+ qualifiers = [
+ "__attribute__",
+ "__packed",
+ "__aligned",
+ "____cacheline_aligned_in_smp",
+ "____cacheline_aligned",
+ ]
+
+ definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
+ struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)')
+
+ # Extract struct/union definition
+ members = None
+ declaration_name = None
+ decl_type = None
+
+ r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
+ if r.search(proto):
+ decl_type = r.group(1)
+ declaration_name = r.group(2)
+ members = r.group(3)
+ else:
+ r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
+
+ if r.search(proto):
+ decl_type = r.group(1)
+ declaration_name = r.group(3)
+ members = r.group(2)
+
+ if not members:
+ self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
+ return
+
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln,
+ f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n")
+ return
+
+ args_pattern = r'([^,)]+)'
+
+ sub_prefixes = [
+ (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''),
+ (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''),
+
+ # Strip comments
+ (KernRe(r'\/\*.*?\*\/', re.S), ''),
+
+ # Strip attributes
+ (attribute, ' '),
+ (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__packed\s*', re.S), ' '),
+ (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
+ (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
+ (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
+
+ # Unwrap struct_group macros based on this definition:
+ # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
+ # which has variants like: struct_group(NAME, MEMBERS...)
+ # Only MEMBERS arguments require documentation.
+ #
+ # Parsing them happens on two steps:
+ #
+ # 1. drop struct group arguments that aren't at MEMBERS,
+ # storing them as STRUCT_GROUP(MEMBERS)
+ #
+ # 2. remove STRUCT_GROUP() ancillary macro.
+ #
+ # The original logic used to remove STRUCT_GROUP() using an
+ # advanced regex:
+ #
+ # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
+ #
+ # with two patterns that are incompatible with
+ # Python re module, as it has:
+ #
+ # - a recursive pattern: (?1)
+ # - an atomic grouping: (?>...)
+ #
+ # I tried a simpler version: but it didn't work either:
+ # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
+ #
+ # As it doesn't properly match the end parenthesis on some cases.
+ #
+ # So, a better solution was crafted: there's now a NestedMatch
+ # class that ensures that delimiters after a search are properly
+ # matched. So, the implementation to drop STRUCT_GROUP() will be
+ # handled in separate.
+
+ (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
+ (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
+ (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
+ (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
+
+ # Replace macros
+ #
+ # TODO: use NestedMatch for FOO($1, $2, ...) matches
+ #
+ # it is better to also move those to the NestedMatch logic,
+ # to ensure that parenthesis will be properly matched.
+
+ (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
+ (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
+ (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
+ (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
+ (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'),
+ (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'),
+ (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'),
+ (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'),
+ (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'),
+ ]
+
+ # Regexes here are guaranteed to have the end limiter matching
+ # the start delimiter. Yet, right now, only one replace group
+ # is allowed.
+
+ sub_nested_prefixes = [
+ (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
+ ]
+
+ for search, sub in sub_prefixes:
+ members = search.sub(sub, members)
+
+ nested = NestedMatch()
+
+ for search, sub in sub_nested_prefixes:
+ members = nested.sub(search, sub, members)
+
+ # Keeps the original declaration as-is
+ declaration = members
+
+ # Split nested struct/union elements
+ #
+ # This loop was simpler at the original kernel-doc perl version, as
+ # while ($members =~ m/$struct_members/) { ... }
+ # reads 'members' string on each interaction.
+ #
+ # Python behavior is different: it parses 'members' only once,
+ # creating a list of tuples from the first interaction.
+ #
+ # On other words, this won't get nested structs.
+ #
+ # So, we need to have an extra loop on Python to override such
+ # re limitation.
+
+ while True:
+ tuples = struct_members.findall(members)
+ if not tuples:
+ break
+
+ for t in tuples:
+ newmember = ""
+ maintype = t[0]
+ s_ids = t[5]
+ content = t[3]
+
+ oldmember = "".join(t)
+
+ for s_id in s_ids.split(','):
+ s_id = s_id.strip()
+
+ newmember += f"{maintype} {s_id}; "
+ s_id = KernRe(r'[:\[].*').sub('', s_id)
+ s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
+
+ for arg in content.split(';'):
+ arg = arg.strip()
+
+ if not arg:
+ continue
+
+ r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)')
+ if r.match(arg):
+ # Pointer-to-function
+ dtype = r.group(1)
+ name = r.group(2)
+ extra = r.group(3)
+
+ if not name:
+ continue
+
+ if not s_id:
+ # Anonymous struct/union
+ newmember += f"{dtype}{name}{extra}; "
+ else:
+ newmember += f"{dtype}{s_id}.{name}{extra}; "
+
+ else:
+ arg = arg.strip()
+ # Handle bitmaps
+ arg = KernRe(r':\s*\d+\s*').sub('', arg)
+
+ # Handle arrays
+ arg = KernRe(r'\[.*\]').sub('', arg)
+
+ # Handle multiple IDs
+ arg = KernRe(r'\s*,\s*').sub(',', arg)
+
+ r = KernRe(r'(.*)\s+([\S+,]+)')
+
+ if r.search(arg):
+ dtype = r.group(1)
+ names = r.group(2)
+ else:
+ newmember += f"{arg}; "
+ continue
+
+ for name in names.split(','):
+ name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip()
+
+ if not name:
+ continue
+
+ if not s_id:
+ # Anonymous struct/union
+ newmember += f"{dtype} {name}; "
+ else:
+ newmember += f"{dtype} {s_id}.{name}; "
+
+ members = members.replace(oldmember, newmember)
+
+ # Ignore other nested elements, like enums
+ members = re.sub(r'(\{[^\{\}]*\})', '', members)
+
+ self.create_parameter_list(ln, decl_type, members, ';',
+ declaration_name)
+ self.check_sections(ln, declaration_name, decl_type,
+ self.entry.sectcheck, self.entry.struct_actual)
+
+ # Adjust declaration for better display
+ declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
+ declaration = KernRe(r'\}\s+;').sub('};', declaration)
+
+ # Better handle inlined enums
+ while True:
+ r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
+ if not r.search(declaration):
+ break
+
+ declaration = r.sub(r'\1,\n\2', declaration)
+
+ def_args = declaration.split('\n')
+ level = 1
+ declaration = ""
+ for clause in def_args:
+
+ clause = clause.strip()
+ clause = KernRe(r'\s+').sub(' ', clause, count=1)
+
+ if not clause:
+ continue
+
+ if '}' in clause and level > 1:
+ level -= 1
+
+ if not KernRe(r'^\s*#').match(clause):
+ declaration += "\t" * level
+
+ declaration += "\t" + clause + "\n"
+ if "{" in clause and "}" not in clause:
+ level += 1
+
+ self.output_declaration(decl_type, declaration_name,
+ struct=declaration_name,
+ definition=declaration,
+ parameterlist=self.entry.parameterlist,
+ parameterdescs=self.entry.parameterdescs,
+ parametertypes=self.entry.parametertypes,
+ parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
+ sectionlist=self.entry.sectionlist,
+ sections=self.entry.sections,
+ section_start_lines=self.entry.section_start_lines,
+ purpose=self.entry.declaration_purpose)
+
+ def dump_enum(self, ln, proto):
+ """
+ Stores an enum inside self.entries array.
+ """
+
+ # Ignore members marked private
+ proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto)
+ proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto)
+
+ # Strip comments
+ proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto)
+
+ # Strip #define macros inside enums
+ proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
+
+ members = None
+ declaration_name = None
+
+ r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
+ if r.search(proto):
+ declaration_name = r.group(2)
+ members = r.group(1).rstrip()
+ else:
+ r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
+ if r.match(proto):
+ declaration_name = r.group(1)
+ members = r.group(2).rstrip()
+
+ if not members:
+ self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
+ return
+
+ if self.entry.identifier != declaration_name:
+ if self.entry.identifier == "":
+ self.emit_msg(ln,
+ f"{proto}: wrong kernel-doc identifier on prototype")
+ else:
+ self.emit_msg(ln,
+ f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead")
+ return
+
+ if not declaration_name:
+ declaration_name = "(anonymous)"
+
+ member_set = set()
+
+ members = KernRe(r'\([^;]*?[\)]').sub('', members)
+
+ for arg in members.split(','):
+ if not arg:
+ continue
+ arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+ self.entry.parameterlist.append(arg)
+ if arg not in self.entry.parameterdescs:
+ self.entry.parameterdescs[arg] = self.undescribed
+ self.emit_msg(ln,
+ f"Enum value '{arg}' not described in enum '{declaration_name}'")
+ member_set.add(arg)
+
+ for k in self.entry.parameterdescs:
+ if k not in member_set:
+ self.emit_msg(ln,
+ f"Excess enum value '%{k}' description in '{declaration_name}'")
+
+ self.output_declaration('enum', declaration_name,
+ enum=declaration_name,
+ parameterlist=self.entry.parameterlist,
+ parameterdescs=self.entry.parameterdescs,
+ parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
+ sectionlist=self.entry.sectionlist,
+ sections=self.entry.sections,
+ section_start_lines=self.entry.section_start_lines,
+ purpose=self.entry.declaration_purpose)
+
+ def dump_declaration(self, ln, prototype):
+ """
+ Stores a data declaration inside self.entries array.
+ """
+
+ if self.entry.decl_type == "enum":
+ self.dump_enum(ln, prototype)
+ return
+
+ if self.entry.decl_type == "typedef":
+ self.dump_typedef(ln, prototype)
+ return
+
+ if self.entry.decl_type in ["union", "struct"]:
+ self.dump_struct(ln, prototype)
+ return
+
+ self.output_declaration(self.entry.decl_type, prototype,
+ entry=self.entry)
+
+ def dump_function(self, ln, prototype):
+ """
+ Stores a function of function macro inside self.entries array.
+ """
+
+ func_macro = False
+ return_type = ''
+ decl_type = 'function'
+
+ # Prefixes that would be removed
+ sub_prefixes = [
+ (r"^static +", "", 0),
+ (r"^extern +", "", 0),
+ (r"^asmlinkage +", "", 0),
+ (r"^inline +", "", 0),
+ (r"^__inline__ +", "", 0),
+ (r"^__inline +", "", 0),
+ (r"^__always_inline +", "", 0),
+ (r"^noinline +", "", 0),
+ (r"^__FORTIFY_INLINE +", "", 0),
+ (r"__init +", "", 0),
+ (r"__init_or_module +", "", 0),
+ (r"__deprecated +", "", 0),
+ (r"__flatten +", "", 0),
+ (r"__meminit +", "", 0),
+ (r"__must_check +", "", 0),
+ (r"__weak +", "", 0),
+ (r"__sched +", "", 0),
+ (r"_noprof", "", 0),
+ (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0),
+ (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0),
+ (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0),
+ (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0),
+ (r"__attribute_const__ +", "", 0),
+
+ # It seems that Python support for re.X is broken:
+ # At least for me (Python 3.13), this didn't work
+# (r"""
+# __attribute__\s*\(\(
+# (?:
+# [\w\s]+ # attribute name
+# (?:\([^)]*\))? # attribute arguments
+# \s*,? # optional comma at the end
+# )+
+# \)\)\s+
+# """, "", re.X),
+
+ # So, remove whitespaces and comments from it
+ (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0),
+ ]
+
+ for search, sub, flags in sub_prefixes:
+ prototype = KernRe(search, flags).sub(sub, prototype)
+
+ # Macros are a special case, as they change the prototype format
+ new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
+ if new_proto != prototype:
+ is_define_proto = True
+ prototype = new_proto
+ else:
+ is_define_proto = False
+
+ # Yes, this truly is vile. We are looking for:
+ # 1. Return type (may be nothing if we're looking at a macro)
+ # 2. Function name
+ # 3. Function parameters.
+ #
+ # All the while we have to watch out for function pointer parameters
+ # (which IIRC is what the two sections are for), C types (these
+ # regexps don't even start to express all the possibilities), and
+ # so on.
+ #
+ # If you mess with these regexps, it's a good idea to check that
+ # the following functions' documentation still comes out right:
+ # - parport_register_device (function pointer parameters)
+ # - atomic_set (macro)
+ # - pci_match_device, __copy_to_user (long return type)
+
+ name = r'[a-zA-Z0-9_~:]+'
+ prototype_end1 = r'[^\(]*'
+ prototype_end2 = r'[^\{]*'
+ prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)'
+
+ # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group.
+ # So, this needs to be mapped in Python with (?:...)? or (?:...)+
+
+ type1 = r'(?:[\w\s]+)?'
+ type2 = r'(?:[\w\s]+\*+)+'
+
+ found = False
+
+ if is_define_proto:
+ r = KernRe(r'^()(' + name + r')\s+')
+
+ if r.search(prototype):
+ return_type = ''
+ declaration_name = r.group(2)
+ func_macro = True
+
+ found = True
+
+ if not found:
+ patterns = [
+ rf'^()({name})\s*{prototype_end}',
+ rf'^({type1})\s+({name})\s*{prototype_end}',
+ rf'^({type2})\s*({name})\s*{prototype_end}',
+ ]
+
+ for p in patterns:
+ r = KernRe(p)
+
+ if r.match(prototype):
+
+ return_type = r.group(1)
+ declaration_name = r.group(2)
+ args = r.group(3)
+
+ self.create_parameter_list(ln, decl_type, args, ',',
+ declaration_name)
+
+ found = True
+ break
+ if not found:
+ self.emit_msg(ln,
+ f"cannot understand function prototype: '{prototype}'")
+ return
+
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln,
+ f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead")
+ return
+
+ prms = " ".join(self.entry.parameterlist)
+ self.check_sections(ln, declaration_name, "function",
+ self.entry.sectcheck, prms)
+
+ self.check_return_section(ln, declaration_name, return_type)
+
+ if 'typedef' in return_type:
+ self.output_declaration(decl_type, declaration_name,
+ function=declaration_name,
+ typedef=True,
+ functiontype=return_type,
+ parameterlist=self.entry.parameterlist,
+ parameterdescs=self.entry.parameterdescs,
+ parametertypes=self.entry.parametertypes,
+ parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
+ sectionlist=self.entry.sectionlist,
+ sections=self.entry.sections,
+ section_start_lines=self.entry.section_start_lines,
+ purpose=self.entry.declaration_purpose,
+ func_macro=func_macro)
+ else:
+ self.output_declaration(decl_type, declaration_name,
+ function=declaration_name,
+ typedef=False,
+ functiontype=return_type,
+ parameterlist=self.entry.parameterlist,
+ parameterdescs=self.entry.parameterdescs,
+ parametertypes=self.entry.parametertypes,
+ parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
+ sectionlist=self.entry.sectionlist,
+ sections=self.entry.sections,
+ section_start_lines=self.entry.section_start_lines,
+ purpose=self.entry.declaration_purpose,
+ func_macro=func_macro)
+
+ def dump_typedef(self, ln, proto):
+ """
+ Stores a typedef inside self.entries array.
+ """
+
+ typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
+ typedef_ident = r'\*?\s*(\w\S+)\s*'
+ typedef_args = r'\s*\((.*)\);'
+
+ typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
+ typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args)
+
+ # Strip comments
+ proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto)
+
+ # Parse function typedef prototypes
+ for r in [typedef1, typedef2]:
+ if not r.match(proto):
+ continue
+
+ return_type = r.group(1).strip()
+ declaration_name = r.group(2)
+ args = r.group(3)
+
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln,
+ f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
+ return
+
+ decl_type = 'function'
+ self.create_parameter_list(ln, decl_type, args, ',', declaration_name)
+
+ self.output_declaration(decl_type, declaration_name,
+ function=declaration_name,
+ typedef=True,
+ functiontype=return_type,
+ parameterlist=self.entry.parameterlist,
+ parameterdescs=self.entry.parameterdescs,
+ parametertypes=self.entry.parametertypes,
+ parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
+ sectionlist=self.entry.sectionlist,
+ sections=self.entry.sections,
+ section_start_lines=self.entry.section_start_lines,
+ purpose=self.entry.declaration_purpose)
+ return
+
+ # Handle nested parentheses or brackets
+ r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$')
+ while r.search(proto):
+ proto = r.sub('', proto)
+
+ # Parse simple typedefs
+ r = KernRe(r'typedef.*\s+(\w+)\s*;')
+ if r.match(proto):
+ declaration_name = r.group(1)
+
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln,
+ f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
+ return
+
+ self.output_declaration('typedef', declaration_name,
+ typedef=declaration_name,
+ sectionlist=self.entry.sectionlist,
+ sections=self.entry.sections,
+ section_start_lines=self.entry.section_start_lines,
+ purpose=self.entry.declaration_purpose)
+ return
+
+ self.emit_msg(ln, "error: Cannot parse typedef!")
+
+ @staticmethod
+ def process_export(function_set, line):
+ """
+ process EXPORT_SYMBOL* tags
+
+ This method doesn't use any variable from the class, so declare it
+ with a staticmethod decorator.
+ """
+
+ # Note: it accepts only one EXPORT_SYMBOL* per line, as having
+ # multiple export lines would violate Kernel coding style.
+
+ if export_symbol.search(line):
+ symbol = export_symbol.group(2)
+ function_set.add(symbol)
+ return
+
+ if export_symbol_ns.search(line):
+ symbol = export_symbol_ns.group(2)
+ function_set.add(symbol)
+
+ def process_normal(self, ln, line):
+ """
+ STATE_NORMAL: looking for the /** to begin everything.
+ """
+
+ if not doc_start.match(line):
+ return
+
+ # start a new entry
+ self.reset_state(ln)
+ self.entry.in_doc_sect = False
+
+ # next line is always the function name
+ self.state = state.NAME
+
+ def process_name(self, ln, line):
+ """
+ STATE_NAME: Looking for the "name - description" line
+ """
+
+ if doc_block.search(line):
+ self.entry.new_start_line = ln
+
+ if not doc_block.group(1):
+ self.entry.section = self.section_intro
+ else:
+ self.entry.section = doc_block.group(1)
+
+ self.entry.identifier = self.entry.section
+ self.state = state.DOCBLOCK
+ return
+
+ if doc_decl.search(line):
+ self.entry.identifier = doc_decl.group(1)
+ self.entry.is_kernel_comment = False
+
+ decl_start = str(doc_com) # comment block asterisk
+ fn_type = r"(?:\w+\s*\*\s*)?" # type (for non-functions)
+ parenthesis = r"(?:\(\w*\))?" # optional parenthesis on function
+ decl_end = r"(?:[-:].*)" # end of the name part
+
+ # test for pointer declaration type, foo * bar() - desc
+ r = KernRe(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$")
+ if r.search(line):
+ self.entry.identifier = r.group(1)
+
+ # Test for data declaration
+ r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)")
+ if r.search(line):
+ self.entry.decl_type = r.group(1)
+ self.entry.identifier = r.group(2)
+ self.entry.is_kernel_comment = True
+ else:
+ # Look for foo() or static void foo() - description;
+ # or misspelt identifier
+
+ r1 = KernRe(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$")
+ r2 = KernRe(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$")
+
+ for r in [r1, r2]:
+ if r.search(line):
+ self.entry.identifier = r.group(1)
+ self.entry.decl_type = "function"
+
+ r = KernRe(r"define\s+")
+ self.entry.identifier = r.sub("", self.entry.identifier)
+ self.entry.is_kernel_comment = True
+ break
+
+ self.entry.identifier = self.entry.identifier.strip(" ")
+
+ self.state = state.BODY
+
+ # if there's no @param blocks need to set up default section here
+ self.entry.section = SECTION_DEFAULT
+ self.entry.new_start_line = ln + 1
+
+ r = KernRe("[-:](.*)")
+ if r.search(line):
+ # strip leading/trailing/multiple spaces
+ self.entry.descr = r.group(1).strip(" ")
+
+ r = KernRe(r"\s+")
+ self.entry.descr = r.sub(" ", self.entry.descr)
+ self.entry.declaration_purpose = self.entry.descr
+ self.state = state.BODY_MAYBE
+ else:
+ self.entry.declaration_purpose = ""
+
+ if not self.entry.is_kernel_comment:
+ self.emit_msg(ln,
+ f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}")
+ self.state = state.NORMAL
+
+ if not self.entry.declaration_purpose and self.config.wshort_desc:
+ self.emit_msg(ln,
+ f"missing initial short description on line:\n{line}")
+
+ if not self.entry.identifier and self.entry.decl_type != "enum":
+ self.emit_msg(ln,
+ f"wrong kernel-doc identifier on line:\n{line}")
+ self.state = state.NORMAL
+
+ if self.config.verbose:
+ self.emit_msg(ln,
+ f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
+ warning=False)
+
+ return
+
+ # Failed to find an identifier. Emit a warning
+ self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
+
+ def process_body(self, ln, line):
+ """
+ STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment.
+ """
+
+ if self.state == state.BODY_WITH_BLANK_LINE:
+ r = KernRe(r"\s*\*\s?\S")
+ if r.match(line):
+ self.dump_section()
+ self.entry.section = SECTION_DEFAULT
+ self.entry.new_start_line = ln
+ self.entry.contents = ""
+
+ if doc_sect.search(line):
+ self.entry.in_doc_sect = True
+ newsection = doc_sect.group(1)
+
+ if newsection.lower() in ["description", "context"]:
+ newsection = newsection.title()
+
+ # Special case: @return is a section, not a param description
+ if newsection.lower() in ["@return", "@returns",
+ "return", "returns"]:
+ newsection = "Return"
+
+ # Perl kernel-doc has a check here for contents before sections.
+ # the logic there is always false, as in_doc_sect variable is
+ # always true. So, just don't implement Wcontents_before_sections
+
+ # .title()
+ newcontents = doc_sect.group(2)
+ if not newcontents:
+ newcontents = ""
+
+ if self.entry.contents.strip("\n"):
+ self.dump_section()
+
+ self.entry.new_start_line = ln
+ self.entry.section = newsection
+ self.entry.leading_space = None
+
+ self.entry.contents = newcontents.lstrip()
+ if self.entry.contents:
+ self.entry.contents += "\n"
+
+ self.state = state.BODY
+ return
+
+ if doc_end.search(line):
+ self.dump_section()
+
+ # Look for doc_com + <text> + doc_end:
+ r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/')
+ if r.match(line):
+ self.emit_msg(ln, f"suspicious ending line: {line}")
+
+ self.entry.prototype = ""
+ self.entry.new_start_line = ln + 1
+
+ self.state = state.PROTO
+ return
+
+ if doc_content.search(line):
+ cont = doc_content.group(1)
+
+ if cont == "":
+ if self.entry.section == self.section_context:
+ self.dump_section()
+
+ self.entry.new_start_line = ln
+ self.state = state.BODY
+ else:
+ if self.entry.section != SECTION_DEFAULT:
+ self.state = state.BODY_WITH_BLANK_LINE
+ else:
+ self.state = state.BODY
+
+ self.entry.contents += "\n"
+
+ elif self.state == state.BODY_MAYBE:
+
+ # Continued declaration purpose
+ self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip()
+ self.entry.declaration_purpose += " " + cont
+
+ r = KernRe(r"\s+")
+ self.entry.declaration_purpose = r.sub(' ',
+ self.entry.declaration_purpose)
+
+ else:
+ if self.entry.section.startswith('@') or \
+ self.entry.section == self.section_context:
+ if self.entry.leading_space is None:
+ r = KernRe(r'^(\s+)')
+ if r.match(cont):
+ self.entry.leading_space = len(r.group(1))
+ else:
+ self.entry.leading_space = 0
+
+ # Double-check if leading space are realy spaces
+ pos = 0
+ for i in range(0, self.entry.leading_space):
+ if cont[i] != " ":
+ break
+ pos += 1
+
+ cont = cont[pos:]
+
+ # NEW LOGIC:
+ # In case it is different, update it
+ if self.entry.leading_space != pos:
+ self.entry.leading_space = pos
+
+ self.entry.contents += cont + "\n"
+ return
+
+ # Unknown line, ignore
+ self.emit_msg(ln, f"bad line: {line}")
+
+ def process_inline(self, ln, line):
+ """STATE_INLINE: docbook comments within a prototype."""
+
+ if self.inline_doc_state == state.INLINE_NAME and \
+ doc_inline_sect.search(line):
+ self.entry.section = doc_inline_sect.group(1)
+ self.entry.new_start_line = ln
+
+ self.entry.contents = doc_inline_sect.group(2).lstrip()
+ if self.entry.contents != "":
+ self.entry.contents += "\n"
+
+ self.inline_doc_state = state.INLINE_TEXT
+ # Documentation block end */
+ return
+
+ if doc_inline_end.search(line):
+ if self.entry.contents not in ["", "\n"]:
+ self.dump_section()
+
+ self.state = state.PROTO
+ self.inline_doc_state = state.INLINE_NA
+ return
+
+ if doc_content.search(line):
+ if self.inline_doc_state == state.INLINE_TEXT:
+ self.entry.contents += doc_content.group(1) + "\n"
+ if not self.entry.contents.strip(" ").rstrip("\n"):
+ self.entry.contents = ""
+
+ elif self.inline_doc_state == state.INLINE_NAME:
+ self.emit_msg(ln,
+ f"Incorrect use of kernel-doc format: {line}")
+
+ self.inline_doc_state = state.INLINE_ERROR
+
+ def syscall_munge(self, ln, proto): # pylint: disable=W0613
+ """
+ Handle syscall definitions
+ """
+
+ is_void = False
+
+ # Strip newlines/CR's
+ proto = re.sub(r'[\r\n]+', ' ', proto)
+
+ # Check if it's a SYSCALL_DEFINE0
+ if 'SYSCALL_DEFINE0' in proto:
+ is_void = True
+
+ # Replace SYSCALL_DEFINE with correct return type & function name
+ proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
+
+ r = KernRe(r'long\s+(sys_.*?),')
+ if r.search(proto):
+ proto = KernRe(',').sub('(', proto, count=1)
+ elif is_void:
+ proto = KernRe(r'\)').sub('(void)', proto, count=1)
+
+ # Now delete all of the odd-numbered commas in the proto
+ # so that argument types & names don't have a comma between them
+ count = 0
+ length = len(proto)
+
+ if is_void:
+ length = 0 # skip the loop if is_void
+
+ for ix in range(length):
+ if proto[ix] == ',':
+ count += 1
+ if count % 2 == 1:
+ proto = proto[:ix] + ' ' + proto[ix + 1:]
+
+ return proto
+
+ def tracepoint_munge(self, ln, proto):
+ """
+ Handle tracepoint definitions
+ """
+
+ tracepointname = None
+ tracepointargs = None
+
+ # Match tracepoint name based on different patterns
+ r = KernRe(r'TRACE_EVENT\((.*?),')
+ if r.search(proto):
+ tracepointname = r.group(1)
+
+ r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
+ if r.search(proto):
+ tracepointname = r.group(1)
+
+ r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
+ if r.search(proto):
+ tracepointname = r.group(2)
+
+ if tracepointname:
+ tracepointname = tracepointname.lstrip()
+
+ r = KernRe(r'TP_PROTO\((.*?)\)')
+ if r.search(proto):
+ tracepointargs = r.group(1)
+
+ if not tracepointname or not tracepointargs:
+ self.emit_msg(ln,
+ f"Unrecognized tracepoint format:\n{proto}\n")
+ else:
+ proto = f"static inline void trace_{tracepointname}({tracepointargs})"
+ self.entry.identifier = f"trace_{self.entry.identifier}"
+
+ return proto
+
+ def process_proto_function(self, ln, line):
+ """Ancillary routine to process a function prototype"""
+
+ # strip C99-style comments to end of line
+ r = KernRe(r"\/\/.*$", re.S)
+ line = r.sub('', line)
+
+ if KernRe(r'\s*#\s*define').match(line):
+ self.entry.prototype = line
+ elif line.startswith('#'):
+ # Strip other macros like #ifdef/#ifndef/#endif/...
+ pass
+ else:
+ r = KernRe(r'([^\{]*)')
+ if r.match(line):
+ self.entry.prototype += r.group(1) + " "
+
+ if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
+ # strip comments
+ r = KernRe(r'/\*.*?\*/')
+ self.entry.prototype = r.sub('', self.entry.prototype)
+
+ # strip newlines/cr's
+ r = KernRe(r'[\r\n]+')
+ self.entry.prototype = r.sub(' ', self.entry.prototype)
+
+ # strip leading spaces
+ r = KernRe(r'^\s+')
+ self.entry.prototype = r.sub('', self.entry.prototype)
+
+ # Handle self.entry.prototypes for function pointers like:
+ # int (*pcs_config)(struct foo)
+
+ r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
+ self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
+
+ if 'SYSCALL_DEFINE' in self.entry.prototype:
+ self.entry.prototype = self.syscall_munge(ln,
+ self.entry.prototype)
+
+ r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
+ if r.search(self.entry.prototype):
+ self.entry.prototype = self.tracepoint_munge(ln,
+ self.entry.prototype)
+
+ self.dump_function(ln, self.entry.prototype)
+ self.reset_state(ln)
+
+ def process_proto_type(self, ln, line):
+ """Ancillary routine to process a type"""
+
+ # Strip newlines/cr's.
+ line = KernRe(r'[\r\n]+', re.S).sub(' ', line)
+
+ # Strip leading spaces
+ line = KernRe(r'^\s+', re.S).sub('', line)
+
+ # Strip trailing spaces
+ line = KernRe(r'\s+$', re.S).sub('', line)
+
+ # Strip C99-style comments to the end of the line
+ line = KernRe(r"\/\/.*$", re.S).sub('', line)
+
+ # To distinguish preprocessor directive from regular declaration later.
+ if line.startswith('#'):
+ line += ";"
+
+ r = KernRe(r'([^\{\};]*)([\{\};])(.*)')
+ while True:
+ if r.search(line):
+ if self.entry.prototype:
+ self.entry.prototype += " "
+ self.entry.prototype += r.group(1) + r.group(2)
+
+ self.entry.brcount += r.group(2).count('{')
+ self.entry.brcount -= r.group(2).count('}')
+
+ self.entry.brcount = max(self.entry.brcount, 0)
+
+ if r.group(2) == ';' and self.entry.brcount == 0:
+ self.dump_declaration(ln, self.entry.prototype)
+ self.reset_state(ln)
+ break
+
+ line = r.group(3)
+ else:
+ self.entry.prototype += line
+ break
+
+ def process_proto(self, ln, line):
+ """STATE_PROTO: reading a function/whatever prototype."""
+
+ if doc_inline_oneline.search(line):
+ self.entry.section = doc_inline_oneline.group(1)
+ self.entry.contents = doc_inline_oneline.group(2)
+
+ if self.entry.contents != "":
+ self.entry.contents += "\n"
+ self.dump_section(start_new=False)
+
+ elif doc_inline_start.search(line):
+ self.state = state.INLINE
+ self.inline_doc_state = state.INLINE_NAME
+
+ elif self.entry.decl_type == 'function':
+ self.process_proto_function(ln, line)
+
+ else:
+ self.process_proto_type(ln, line)
+
+ def process_docblock(self, ln, line):
+ """STATE_DOCBLOCK: within a DOC: block."""
+
+ if doc_end.search(line):
+ self.dump_section()
+ self.output_declaration("doc", self.entry.identifier,
+ sectionlist=self.entry.sectionlist,
+ sections=self.entry.sections,
+ section_start_lines=self.entry.section_start_lines)
+ self.reset_state(ln)
+
+ elif doc_content.search(line):
+ self.entry.contents += doc_content.group(1) + "\n"
+
+ def parse_export(self):
+ """
+ Parses EXPORT_SYMBOL* macros from a single Kernel source file.
+ """
+
+ export_table = set()
+
+ try:
+ with open(self.fname, "r", encoding="utf8",
+ errors="backslashreplace") as fp:
+
+ for line in fp:
+ self.process_export(export_table, line)
+
+ except IOError:
+ return None
+
+ return export_table
+
+ def parse_kdoc(self):
+ """
+ Open and process each line of a C source file.
+ The parsing is controlled via a state machine, and the line is passed
+ to a different process function depending on the state. The process
+ function may update the state as needed.
+
+ Besides parsing kernel-doc tags, it also parses export symbols.
+ """
+
+ cont = False
+ prev = ""
+ prev_ln = None
+ export_table = set()
+
+ try:
+ with open(self.fname, "r", encoding="utf8",
+ errors="backslashreplace") as fp:
+ for ln, line in enumerate(fp):
+
+ line = line.expandtabs().strip("\n")
+
+ # Group continuation lines on prototypes
+ if self.state == state.PROTO:
+ if line.endswith("\\"):
+ prev += line.rstrip("\\")
+ cont = True
+
+ if not prev_ln:
+ prev_ln = ln
+
+ continue
+
+ if cont:
+ ln = prev_ln
+ line = prev + line
+ prev = ""
+ cont = False
+ prev_ln = None
+
+ self.config.log.debug("%d %s%s: %s",
+ ln, state.name[self.state],
+ state.inline_name[self.inline_doc_state],
+ line)
+
+ # This is an optimization over the original script.
+ # There, when export_file was used for the same file,
+ # it was read twice. Here, we use the already-existing
+ # loop to parse exported symbols as well.
+ #
+ # TODO: It should be noticed that not all states are
+ # needed here. On a future cleanup, process export only
+ # at the states that aren't handling comment markups.
+ self.process_export(export_table, line)
+
+ # Hand this line to the appropriate state handler
+ if self.state == state.NORMAL:
+ self.process_normal(ln, line)
+ elif self.state == state.NAME:
+ self.process_name(ln, line)
+ elif self.state in [state.BODY, state.BODY_MAYBE,
+ state.BODY_WITH_BLANK_LINE]:
+ self.process_body(ln, line)
+ elif self.state == state.INLINE: # scanning for inline parameters
+ self.process_inline(ln, line)
+ elif self.state == state.PROTO:
+ self.process_proto(ln, line)
+ elif self.state == state.DOCBLOCK:
+ self.process_docblock(ln, line)
+ except OSError:
+ self.config.log.error(f"Error: Cannot open file {self.fname}")
+
+ return export_table, self.entries
diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py
new file mode 100644
index 000000000000..e81695b273bf
--- /dev/null
+++ b/scripts/lib/kdoc/kdoc_re.py
@@ -0,0 +1,273 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Regular expression ancillary classes.
+
+Those help caching regular expressions and do matching for kernel-doc.
+"""
+
+import re
+
+# Local cache for regular expressions
+re_cache = {}
+
+
+class KernRe:
+ """
+ Helper class to simplify regex declaration and usage,
+
+ It calls re.compile for a given pattern. It also allows adding
+ regular expressions and define sub at class init time.
+
+ Regular expressions can be cached via an argument, helping to speedup
+ searches.
+ """
+
+ def _add_regex(self, string, flags):
+ """
+ Adds a new regex or re-use it from the cache.
+ """
+
+ if string in re_cache:
+ self.regex = re_cache[string]
+ else:
+ self.regex = re.compile(string, flags=flags)
+
+ if self.cache:
+ re_cache[string] = self.regex
+
+ def __init__(self, string, cache=True, flags=0):
+ """
+ Compile a regular expression and initialize internal vars.
+ """
+
+ self.cache = cache
+ self.last_match = None
+
+ self._add_regex(string, flags)
+
+ def __str__(self):
+ """
+ Return the regular expression pattern.
+ """
+ return self.regex.pattern
+
+ def __add__(self, other):
+ """
+ Allows adding two regular expressions into one.
+ """
+
+ return KernRe(str(self) + str(other), cache=self.cache or other.cache,
+ flags=self.regex.flags | other.regex.flags)
+
+ def match(self, string):
+ """
+ Handles a re.match storing its results
+ """
+
+ self.last_match = self.regex.match(string)
+ return self.last_match
+
+ def search(self, string):
+ """
+ Handles a re.search storing its results
+ """
+
+ self.last_match = self.regex.search(string)
+ return self.last_match
+
+ def findall(self, string):
+ """
+ Alias to re.findall
+ """
+
+ return self.regex.findall(string)
+
+ def split(self, string):
+ """
+ Alias to re.split
+ """
+
+ return self.regex.split(string)
+
+ def sub(self, sub, string, count=0):
+ """
+ Alias to re.sub
+ """
+
+ return self.regex.sub(sub, string, count=count)
+
+ def group(self, num):
+ """
+ Returns the group results of the last match
+ """
+
+ return self.last_match.group(num)
+
+
+class NestedMatch:
+ """
+ Finding nested delimiters is hard with regular expressions. It is
+ even harder on Python with its normal re module, as there are several
+ advanced regular expressions that are missing.
+
+ This is the case of this pattern:
+
+ '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
+
+ which is used to properly match open/close parenthesis of the
+ string search STRUCT_GROUP(),
+
+ Add a class that counts pairs of delimiters, using it to match and
+ replace nested expressions.
+
+ The original approach was suggested by:
+ https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+
+ Although I re-implemented it to make it more generic and match 3 types
+ of delimiters. The logic checks if delimiters are paired. If not, it
+ will ignore the search string.
+ """
+
+ # TODO: make NestedMatch handle multiple match groups
+ #
+ # Right now, regular expressions to match it are defined only up to
+ # the start delimiter, e.g.:
+ #
+ # \bSTRUCT_GROUP\(
+ #
+ # is similar to: STRUCT_GROUP\((.*)\)
+ # except that the content inside the match group is delimiter's aligned.
+ #
+ # The content inside parenthesis are converted into a single replace
+ # group (e.g. r`\1').
+ #
+ # It would be nice to change such definition to support multiple
+ # match groups, allowing a regex equivalent to.
+ #
+ # FOO\((.*), (.*), (.*)\)
+ #
+ # it is probably easier to define it not as a regular expression, but
+ # with some lexical definition like:
+ #
+ # FOO(arg1, arg2, arg3)
+
+ DELIMITER_PAIRS = {
+ '{': '}',
+ '(': ')',
+ '[': ']',
+ }
+
+ RE_DELIM = re.compile(r'[\{\}\[\]\(\)]')
+
+ def _search(self, regex, line):
+ """
+ Finds paired blocks for a regex that ends with a delimiter.
+
+ The suggestion of using finditer to match pairs came from:
+ https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+ but I ended using a different implementation to align all three types
+ of delimiters and seek for an initial regular expression.
+
+ The algorithm seeks for open/close paired delimiters and place them
+ into a stack, yielding a start/stop position of each match when the
+ stack is zeroed.
+
+ The algorithm shoud work fine for properly paired lines, but will
+ silently ignore end delimiters that preceeds an start delimiter.
+ This should be OK for kernel-doc parser, as unaligned delimiters
+ would cause compilation errors. So, we don't need to rise exceptions
+ to cover such issues.
+ """
+
+ stack = []
+
+ for match_re in regex.finditer(line):
+ start = match_re.start()
+ offset = match_re.end()
+
+ d = line[offset - 1]
+ if d not in self.DELIMITER_PAIRS:
+ continue
+
+ end = self.DELIMITER_PAIRS[d]
+ stack.append(end)
+
+ for match in self.RE_DELIM.finditer(line[offset:]):
+ pos = match.start() + offset
+
+ d = line[pos]
+
+ if d in self.DELIMITER_PAIRS:
+ end = self.DELIMITER_PAIRS[d]
+
+ stack.append(end)
+ continue
+
+ # Does the end delimiter match what it is expected?
+ if stack and d == stack[-1]:
+ stack.pop()
+
+ if not stack:
+ yield start, offset, pos + 1
+ break
+
+ def search(self, regex, line):
+ """
+ This is similar to re.search:
+
+ It matches a regex that it is followed by a delimiter,
+ returning occurrences only if all delimiters are paired.
+ """
+
+ for t in self._search(regex, line):
+
+ yield line[t[0]:t[2]]
+
+ def sub(self, regex, sub, line, count=0):
+ """
+ This is similar to re.sub:
+
+ It matches a regex that it is followed by a delimiter,
+ replacing occurrences only if all delimiters are paired.
+
+ if r'\1' is used, it works just like re: it places there the
+ matched paired data with the delimiter stripped.
+
+ If count is different than zero, it will replace at most count
+ items.
+ """
+ out = ""
+
+ cur_pos = 0
+ n = 0
+
+ for start, end, pos in self._search(regex, line):
+ out += line[cur_pos:start]
+
+ # Value, ignoring start/end delimiters
+ value = line[end:pos - 1]
+
+ # replaces \1 at the sub string, if \1 is used there
+ new_sub = sub
+ new_sub = new_sub.replace(r'\1', value)
+
+ out += new_sub
+
+ # Drop end ';' if any
+ if line[pos] == ';':
+ pos += 1
+
+ cur_pos = pos
+ n += 1
+
+ if count and count >= n:
+ break
+
+ # Append the remaining string
+ l = len(line)
+ out += line[cur_pos:l]
+
+ return out
diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec
index 726f34e11960..98f206cb7c60 100644
--- a/scripts/package/kernel.spec
+++ b/scripts/package/kernel.spec
@@ -16,6 +16,7 @@ Source1: config
Source2: diff.patch
Provides: kernel-%{KERNELRELEASE}
BuildRequires: bc binutils bison dwarves
+BuildRequires: (elfutils-devel or libdw-devel)
BuildRequires: (elfutils-libelf-devel or libelf-devel) flex
BuildRequires: gcc make openssl openssl-devel perl python3 rsync
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index 744ddba01d93..d4b007b38a47 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -210,7 +210,7 @@ Rules-Requires-Root: no
Build-Depends: debhelper-compat (= 12)
Build-Depends-Arch: bc, bison, flex,
gcc-${host_gnu} <!pkg.${sourcename}.nokernelheaders>,
- kmod, libelf-dev:native,
+ kmod, libdw-dev:native, libelf-dev:native,
libssl-dev:native, libssl-dev <!pkg.${sourcename}.nokernelheaders>,
python3:native, rsync
Homepage: https://www.kernel.org/
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 6039afae4bfc..0aef34b9609b 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -283,7 +283,7 @@ static struct dentry *aafs_create(const char *name, umode_t mode,
dir = d_inode(parent);
inode_lock(dir);
- dentry = lookup_one_len(name, parent, strlen(name));
+ dentry = lookup_noperm(&QSTR(name), parent);
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
goto fail_lock;
@@ -2551,7 +2551,7 @@ static int aa_mk_null_file(struct dentry *parent)
return error;
inode_lock(d_inode(parent));
- dentry = lookup_one_len(NULL_FILE_NAME, parent, strlen(NULL_FILE_NAME));
+ dentry = lookup_noperm(&QSTR(NULL_FILE_NAME), parent);
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
goto out;
diff --git a/security/inode.c b/security/inode.c
index da3ab44c8e57..3913501621fa 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -128,7 +128,7 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode,
dir = d_inode(parent);
inode_lock(dir);
- dentry = lookup_one_len(name, parent, strlen(name));
+ dentry = lookup_noperm(&QSTR(name), parent);
if (IS_ERR(dentry))
goto out;
diff --git a/security/landlock/audit.c b/security/landlock/audit.c
index 7e5e0ed0e4e5..c52d079cdb77 100644
--- a/security/landlock/audit.c
+++ b/security/landlock/audit.c
@@ -175,7 +175,7 @@ static void test_get_hierarchy(struct kunit *const test)
KUNIT_EXPECT_EQ(test, 10, get_hierarchy(&dom2, 0)->id);
KUNIT_EXPECT_EQ(test, 20, get_hierarchy(&dom2, 1)->id);
KUNIT_EXPECT_EQ(test, 30, get_hierarchy(&dom2, 2)->id);
- KUNIT_EXPECT_EQ(test, 30, get_hierarchy(&dom2, -1)->id);
+ /* KUNIT_EXPECT_EQ(test, 30, get_hierarchy(&dom2, -1)->id); */
}
#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
@@ -437,7 +437,7 @@ void landlock_log_denial(const struct landlock_cred_security *const subject,
return;
/* Checks if the current exec was restricting itself. */
- if (subject->domain_exec & (1 << youngest_layer)) {
+ if (subject->domain_exec & BIT(youngest_layer)) {
/* Ignores denials for the same execution. */
if (!youngest_denied->log_same_exec)
return;
diff --git a/security/landlock/id.c b/security/landlock/id.c
index 11fab9259c15..56f7cc0fc744 100644
--- a/security/landlock/id.c
+++ b/security/landlock/id.c
@@ -7,6 +7,7 @@
#include <kunit/test.h>
#include <linux/atomic.h>
+#include <linux/bitops.h>
#include <linux/random.h>
#include <linux/spinlock.h>
@@ -25,7 +26,7 @@ static void __init init_id(atomic64_t *const counter, const u32 random_32bits)
* Ensures sure 64-bit values are always used by user space (or may
* fail with -EOVERFLOW), and makes this testable.
*/
- init = 1ULL << 32;
+ init = BIT_ULL(32);
/*
* Makes a large (2^32) boot-time value to limit ID collision in logs
@@ -105,7 +106,7 @@ static u64 get_id_range(size_t number_of_ids, atomic64_t *const counter,
* to get a new ID (e.g. a full landlock_restrict_self() call), and the
* cost of draining all available IDs during the system's uptime.
*/
- random_4bits = random_4bits % (1 << 4);
+ random_4bits &= 0b1111;
step = number_of_ids + random_4bits;
/* It is safe to cast a signed atomic to an unsigned value. */
@@ -144,6 +145,19 @@ static void test_range1_rand1(struct kunit *const test)
init + 2);
}
+static void test_range1_rand15(struct kunit *const test)
+{
+ atomic64_t counter;
+ u64 init;
+
+ init = get_random_u32();
+ atomic64_set(&counter, init);
+ KUNIT_EXPECT_EQ(test, get_id_range(1, &counter, 15), init);
+ KUNIT_EXPECT_EQ(
+ test, get_id_range(get_random_u8(), &counter, get_random_u8()),
+ init + 16);
+}
+
static void test_range1_rand16(struct kunit *const test)
{
atomic64_t counter;
@@ -196,6 +210,19 @@ static void test_range2_rand2(struct kunit *const test)
init + 4);
}
+static void test_range2_rand15(struct kunit *const test)
+{
+ atomic64_t counter;
+ u64 init;
+
+ init = get_random_u32();
+ atomic64_set(&counter, init);
+ KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 15), init);
+ KUNIT_EXPECT_EQ(
+ test, get_id_range(get_random_u8(), &counter, get_random_u8()),
+ init + 17);
+}
+
static void test_range2_rand16(struct kunit *const test)
{
atomic64_t counter;
@@ -232,10 +259,12 @@ static struct kunit_case __refdata test_cases[] = {
KUNIT_CASE(test_init_once),
KUNIT_CASE(test_range1_rand0),
KUNIT_CASE(test_range1_rand1),
+ KUNIT_CASE(test_range1_rand15),
KUNIT_CASE(test_range1_rand16),
KUNIT_CASE(test_range2_rand0),
KUNIT_CASE(test_range2_rand1),
KUNIT_CASE(test_range2_rand2),
+ KUNIT_CASE(test_range2_rand15),
KUNIT_CASE(test_range2_rand16),
{}
/* clang-format on */
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index b9561e3417ae..33eafb71e4f3 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -9,6 +9,7 @@
#include <asm/current.h>
#include <linux/anon_inodes.h>
+#include <linux/bitops.h>
#include <linux/build_bug.h>
#include <linux/capability.h>
#include <linux/cleanup.h>
@@ -563,7 +564,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
new_llcred->domain = new_dom;
#ifdef CONFIG_AUDIT
- new_llcred->domain_exec |= 1 << (new_dom->num_layers - 1);
+ new_llcred->domain_exec |= BIT(new_dom->num_layers - 1);
#endif /* CONFIG_AUDIT */
return commit_creds(new_cred);
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 47480eb2189b..e67a8ce4b64c 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -2158,8 +2158,8 @@ static int __init init_sel_fs(void)
return err;
}
- selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root,
- &null_name);
+ selinux_null.dentry = try_lookup_noperm(&null_name,
+ selinux_null.mnt->mnt_root);
if (IS_ERR(selinux_null.dentry)) {
pr_err("selinuxfs: could not lookup null!\n");
err = PTR_ERR(selinux_null.dentry);
diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c
index 84e264f335ca..693d48f08b88 100644
--- a/sound/atmel/ac97c.c
+++ b/sound/atmel/ac97c.c
@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
+#include <linux/string.h>
#include <linux/types.h>
#include <linux/io.h>
@@ -589,7 +590,7 @@ static int atmel_ac97c_pcm_new(struct atmel_ac97c *chip)
pcm->private_data = chip;
pcm->info_flags = 0;
- strcpy(pcm->name, chip->card->shortname);
+ strscpy(pcm->name, chip->card->shortname);
chip->pcm = pcm;
return 0;
@@ -748,9 +749,9 @@ static int atmel_ac97c_probe(struct platform_device *pdev)
spin_lock_init(&chip->lock);
- strcpy(card->driver, "Atmel AC97C");
- strcpy(card->shortname, "Atmel AC97C");
- sprintf(card->longname, "Atmel AC97 controller");
+ strscpy(card->driver, "Atmel AC97C");
+ strscpy(card->shortname, "Atmel AC97C");
+ strscpy(card->longname, "Atmel AC97 controller");
chip->card = card;
chip->pclk = pclk;
diff --git a/sound/core/control.c b/sound/core/control.c
index 0ddade871b52..11f660fc6f2b 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -1405,7 +1405,7 @@ static bool check_user_elem_overflow(struct snd_card *card, ssize_t add)
static int snd_ctl_elem_user_info(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_info *uinfo)
{
- struct user_element *ue = kcontrol->private_data;
+ struct user_element *ue = snd_kcontrol_chip(kcontrol);
unsigned int offset;
offset = snd_ctl_get_ioff(kcontrol, &uinfo->id);
@@ -1418,7 +1418,7 @@ static int snd_ctl_elem_user_info(struct snd_kcontrol *kcontrol,
static int snd_ctl_elem_user_enum_info(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_info *uinfo)
{
- struct user_element *ue = kcontrol->private_data;
+ struct user_element *ue = snd_kcontrol_chip(kcontrol);
const char *names;
unsigned int item;
unsigned int offset;
@@ -1443,7 +1443,7 @@ static int snd_ctl_elem_user_enum_info(struct snd_kcontrol *kcontrol,
static int snd_ctl_elem_user_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct user_element *ue = kcontrol->private_data;
+ struct user_element *ue = snd_kcontrol_chip(kcontrol);
unsigned int size = ue->elem_data_size;
char *src = ue->elem_data +
snd_ctl_get_ioff(kcontrol, &ucontrol->id) * size;
@@ -1456,7 +1456,7 @@ static int snd_ctl_elem_user_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
int err, change;
- struct user_element *ue = kcontrol->private_data;
+ struct user_element *ue = snd_kcontrol_chip(kcontrol);
unsigned int size = ue->elem_data_size;
char *dst = ue->elem_data +
snd_ctl_get_ioff(kcontrol, &ucontrol->id) * size;
@@ -1475,7 +1475,7 @@ static int snd_ctl_elem_user_put(struct snd_kcontrol *kcontrol,
static int replace_user_tlv(struct snd_kcontrol *kctl, unsigned int __user *buf,
unsigned int size)
{
- struct user_element *ue = kctl->private_data;
+ struct user_element *ue = snd_kcontrol_chip(kctl);
unsigned int *container;
unsigned int mask = 0;
int i;
@@ -1528,7 +1528,7 @@ static int replace_user_tlv(struct snd_kcontrol *kctl, unsigned int __user *buf,
static int read_user_tlv(struct snd_kcontrol *kctl, unsigned int __user *buf,
unsigned int size)
{
- struct user_element *ue = kctl->private_data;
+ struct user_element *ue = snd_kcontrol_chip(kctl);
if (ue->tlv_data_size == 0 || ue->tlv_data == NULL)
return -ENXIO;
@@ -1598,7 +1598,7 @@ static size_t compute_user_elem_size(size_t size, unsigned int count)
static void snd_ctl_elem_user_free(struct snd_kcontrol *kcontrol)
{
- struct user_element *ue = kcontrol->private_data;
+ struct user_element *ue = snd_kcontrol_chip(kcontrol);
// decrement the allocation size.
ue->card->user_ctl_alloc_size -= compute_user_elem_size(ue->elem_data_size, kcontrol->count);
diff --git a/sound/core/device.c b/sound/core/device.c
index b57d80a17052..cdc5af526739 100644
--- a/sound/core/device.c
+++ b/sound/core/device.c
@@ -237,26 +237,3 @@ void snd_device_free_all(struct snd_card *card)
list_for_each_entry_safe_reverse(dev, next, &card->devices, list)
__snd_device_free(dev);
}
-
-/**
- * snd_device_get_state - Get the current state of the given device
- * @card: the card instance
- * @device_data: the data pointer to release
- *
- * Returns the current state of the given device object. For the valid
- * device, either @SNDRV_DEV_BUILD, @SNDRV_DEV_REGISTERED or
- * @SNDRV_DEV_DISCONNECTED is returned.
- * Or for a non-existing device, -1 is returned as an error.
- *
- * Return: the current state, or -1 if not found
- */
-int snd_device_get_state(struct snd_card *card, void *device_data)
-{
- struct snd_device *dev;
-
- dev = look_for_dev(card, device_data);
- if (dev)
- return dev->state;
- return -1;
-}
-EXPORT_SYMBOL_GPL(snd_device_get_state);
diff --git a/sound/core/jack.c b/sound/core/jack.c
index e4bcecdf89b7..850f82340278 100644
--- a/sound/core/jack.c
+++ b/sound/core/jack.c
@@ -575,25 +575,6 @@ EXPORT_SYMBOL(snd_jack_new);
#ifdef CONFIG_SND_JACK_INPUT_DEV
/**
- * snd_jack_set_parent - Set the parent device for a jack
- *
- * @jack: The jack to configure
- * @parent: The device to set as parent for the jack.
- *
- * Set the parent for the jack devices in the device tree. This
- * function is only valid prior to registration of the jack. If no
- * parent is configured then the parent device will be the sound card.
- */
-void snd_jack_set_parent(struct snd_jack *jack, struct device *parent)
-{
- WARN_ON(jack->registered);
- guard(mutex)(&jack->input_dev_lock);
- if (jack->input_dev)
- jack->input_dev->dev.parent = parent;
-}
-EXPORT_SYMBOL(snd_jack_set_parent);
-
-/**
* snd_jack_set_key - Set a key mapping on a jack
*
* @jack: The jack to configure
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 4683b9139c56..4ecb17bd5436 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -1074,8 +1074,7 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream)
runtime->oss.params = 0;
runtime->oss.prepare = 1;
runtime->oss.buffer_used = 0;
- if (runtime->dma_area)
- snd_pcm_format_set_silence(runtime->format, runtime->dma_area, bytes_to_samples(runtime, runtime->dma_bytes));
+ snd_pcm_runtime_buffer_set_silence(runtime);
runtime->oss.period_frames = snd_pcm_alsa_frames(substream, oss_period_size);
diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c
index b134a51b3fd5..72040964b6fd 100644
--- a/sound/core/pcm_dmaengine.c
+++ b/sound/core/pcm_dmaengine.c
@@ -328,27 +328,6 @@ int snd_dmaengine_pcm_open(struct snd_pcm_substream *substream,
}
EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_open);
-/**
- * snd_dmaengine_pcm_open_request_chan - Open a dmaengine based PCM substream and request channel
- * @substream: PCM substream
- * @filter_fn: Filter function used to request the DMA channel
- * @filter_data: Data passed to the DMA filter function
- *
- * This function will request a DMA channel using the passed filter function and
- * data. The function should usually be called from the pcm open callback. Note
- * that this function will use private_data field of the substream's runtime. So
- * it is not available to your pcm driver implementation.
- *
- * Return: 0 on success, a negative error code otherwise
- */
-int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream,
- dma_filter_fn filter_fn, void *filter_data)
-{
- return snd_dmaengine_pcm_open(substream,
- snd_dmaengine_pcm_request_channel(filter_fn, filter_data));
-}
-EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_open_request_chan);
-
int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream)
{
struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream);
diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c
index ea3941f8666b..56725d36825b 100644
--- a/sound/core/pcm_memory.c
+++ b/sound/core/pcm_memory.c
@@ -458,7 +458,7 @@ int snd_pcm_lib_malloc_pages(struct snd_pcm_substream *substream, size_t size)
substream->stream,
size, dmab) < 0) {
kfree(dmab);
- pr_debug("ALSA pcmC%dD%d%c,%d:%s: cannot preallocate for size %zu\n",
+ pr_debug("ALSA pcmC%dD%d%c,%d:%s: cannot allocate for size %zu\n",
substream->pcm->card->number, substream->pcm->device,
substream->stream ? 'c' : 'p', substream->number,
substream->pcm->name, size);
diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c
index d3a08e292072..71eec32a7a0a 100644
--- a/sound/core/pcm_misc.c
+++ b/sound/core/pcm_misc.c
@@ -586,33 +586,3 @@ unsigned int snd_pcm_rate_mask_intersect(unsigned int rates_a,
return rates_a & rates_b;
}
EXPORT_SYMBOL_GPL(snd_pcm_rate_mask_intersect);
-
-/**
- * snd_pcm_rate_range_to_bits - converts rate range to SNDRV_PCM_RATE_xxx bit
- * @rate_min: the minimum sample rate
- * @rate_max: the maximum sample rate
- *
- * This function has an implicit assumption: the rates in the given range have
- * only the pre-defined rates like 44100 or 16000.
- *
- * Return: The SNDRV_PCM_RATE_xxx flag that corresponds to the given rate range,
- * or SNDRV_PCM_RATE_KNOT for an unknown range.
- */
-unsigned int snd_pcm_rate_range_to_bits(unsigned int rate_min,
- unsigned int rate_max)
-{
- unsigned int rates = 0;
- int i;
-
- for (i = 0; i < snd_pcm_known_rates.count; i++) {
- if (snd_pcm_known_rates.list[i] >= rate_min
- && snd_pcm_known_rates.list[i] <= rate_max)
- rates |= 1 << i;
- }
-
- if (!rates)
- rates = SNDRV_PCM_RATE_KNOT;
-
- return rates;
-}
-EXPORT_SYMBOL_GPL(snd_pcm_rate_range_to_bits);
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 6c2b6a62d9d2..853ac5bb33ff 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -723,6 +723,17 @@ static void snd_pcm_buffer_access_unlock(struct snd_pcm_runtime *runtime)
atomic_inc(&runtime->buffer_accessing);
}
+/* fill the PCM buffer with the current silence format; called from pcm_oss.c */
+void snd_pcm_runtime_buffer_set_silence(struct snd_pcm_runtime *runtime)
+{
+ snd_pcm_buffer_access_lock(runtime);
+ if (runtime->dma_area)
+ snd_pcm_format_set_silence(runtime->format, runtime->dma_area,
+ bytes_to_samples(runtime, runtime->dma_bytes));
+ snd_pcm_buffer_access_unlock(runtime);
+}
+EXPORT_SYMBOL_GPL(snd_pcm_runtime_buffer_set_silence);
+
#if IS_ENABLED(CONFIG_SND_PCM_OSS)
#define is_oss_stream(substream) ((substream)->oss.oss)
#else
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 198c598a5393..880240924bfd 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -732,15 +732,21 @@ static int snd_seq_deliver_single_event(struct snd_seq_client *client,
*/
static int __deliver_to_subscribers(struct snd_seq_client *client,
struct snd_seq_event *event,
- struct snd_seq_client_port *src_port,
- int atomic, int hop)
+ int port, int atomic, int hop)
{
+ struct snd_seq_client_port *src_port;
struct snd_seq_subscribers *subs;
int err, result = 0, num_ev = 0;
union __snd_seq_event event_saved;
size_t saved_size;
struct snd_seq_port_subs_info *grp;
+ if (port < 0)
+ return 0;
+ src_port = snd_seq_port_use_ptr(client, port);
+ if (!src_port)
+ return 0;
+
/* save original event record */
saved_size = snd_seq_event_packet_size(event);
memcpy(&event_saved, event, saved_size);
@@ -775,6 +781,7 @@ static int __deliver_to_subscribers(struct snd_seq_client *client,
read_unlock(&grp->list_lock);
else
up_read(&grp->list_mutex);
+ snd_seq_port_unlock(src_port);
memcpy(event, &event_saved, saved_size);
return (result < 0) ? result : num_ev;
}
@@ -783,25 +790,32 @@ static int deliver_to_subscribers(struct snd_seq_client *client,
struct snd_seq_event *event,
int atomic, int hop)
{
- struct snd_seq_client_port *src_port;
- int ret = 0, ret2;
-
- src_port = snd_seq_port_use_ptr(client, event->source.port);
- if (src_port) {
- ret = __deliver_to_subscribers(client, event, src_port, atomic, hop);
- snd_seq_port_unlock(src_port);
- }
-
- if (client->ump_endpoint_port < 0 ||
- event->source.port == client->ump_endpoint_port)
- return ret;
+ int ret;
+#if IS_ENABLED(CONFIG_SND_SEQ_UMP)
+ int ret2;
+#endif
- src_port = snd_seq_port_use_ptr(client, client->ump_endpoint_port);
- if (!src_port)
+ ret = __deliver_to_subscribers(client, event,
+ event->source.port, atomic, hop);
+#if IS_ENABLED(CONFIG_SND_SEQ_UMP)
+ if (!snd_seq_client_is_ump(client) || client->ump_endpoint_port < 0)
return ret;
- ret2 = __deliver_to_subscribers(client, event, src_port, atomic, hop);
- snd_seq_port_unlock(src_port);
- return ret2 < 0 ? ret2 : ret;
+ /* If it's an event from EP port (and with a UMP group),
+ * deliver to subscribers of the corresponding UMP group port, too.
+ * Or, if it's from non-EP port, deliver to subscribers of EP port, too.
+ */
+ if (event->source.port == client->ump_endpoint_port)
+ ret2 = __deliver_to_subscribers(client, event,
+ snd_seq_ump_group_port(event),
+ atomic, hop);
+ else
+ ret2 = __deliver_to_subscribers(client, event,
+ client->ump_endpoint_port,
+ atomic, hop);
+ if (ret2 < 0)
+ return ret2;
+#endif
+ return ret;
}
/* deliver an event to the destination port(s).
diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c
index 5df26788dda4..10add922323d 100644
--- a/sound/core/seq/seq_queue.c
+++ b/sound/core/seq/seq_queue.c
@@ -564,22 +564,6 @@ void snd_seq_queue_client_leave(int client)
/*----------------------------------------------------------------*/
-/* remove cells from all queues */
-void snd_seq_queue_client_leave_cells(int client)
-{
- int i;
- struct snd_seq_queue *q;
-
- for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
- q = queueptr(i);
- if (!q)
- continue;
- snd_seq_prioq_leave(q->tickq, client, 0);
- snd_seq_prioq_leave(q->timeq, client, 0);
- queuefree(q);
- }
-}
-
/* remove cells based on flush criteria */
void snd_seq_queue_remove_cells(int client, struct snd_seq_remove_events *info)
{
diff --git a/sound/core/seq/seq_queue.h b/sound/core/seq/seq_queue.h
index 74cc31aacdac..b81379c9af43 100644
--- a/sound/core/seq/seq_queue.h
+++ b/sound/core/seq/seq_queue.h
@@ -66,7 +66,6 @@ void snd_seq_queue_client_leave(int client);
int snd_seq_enqueue_event(struct snd_seq_event_cell *cell, int atomic, int hop);
/* Remove events */
-void snd_seq_queue_client_leave_cells(int client);
void snd_seq_queue_remove_cells(int client, struct snd_seq_remove_events *info);
/* return pointer to queue structure for specified id */
diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
index ff7e558b4d51..db2f169cae11 100644
--- a/sound/core/seq/seq_ump_convert.c
+++ b/sound/core/seq/seq_ump_convert.c
@@ -1285,3 +1285,21 @@ int snd_seq_deliver_to_ump(struct snd_seq_client *source,
else
return cvt_to_ump_midi1(dest, dest_port, event, atomic, hop);
}
+
+/* return the UMP group-port number of the event;
+ * return -1 if groupless or non-UMP event
+ */
+int snd_seq_ump_group_port(const struct snd_seq_event *event)
+{
+ const struct snd_seq_ump_event *ump_ev =
+ (const struct snd_seq_ump_event *)event;
+ unsigned char type;
+
+ if (!snd_seq_ev_is_ump(event))
+ return -1;
+ type = ump_message_type(ump_ev->ump[0]);
+ if (ump_is_groupless_msg(type))
+ return -1;
+ /* group-port number starts from 1 */
+ return ump_message_group(ump_ev->ump[0]) + 1;
+}
diff --git a/sound/core/seq/seq_ump_convert.h b/sound/core/seq/seq_ump_convert.h
index 6c146d803280..4abf0a7637d7 100644
--- a/sound/core/seq/seq_ump_convert.h
+++ b/sound/core/seq/seq_ump_convert.h
@@ -18,5 +18,6 @@ int snd_seq_deliver_to_ump(struct snd_seq_client *source,
struct snd_seq_client_port *dest_port,
struct snd_seq_event *event,
int atomic, int hop);
+int snd_seq_ump_group_port(const struct snd_seq_event *event);
#endif /* __SEQ_UMP_CONVERT_H */
diff --git a/sound/core/seq_device.c b/sound/core/seq_device.c
index 4492be5d2317..bac9f8603734 100644
--- a/sound/core/seq_device.c
+++ b/sound/core/seq_device.c
@@ -43,7 +43,7 @@ MODULE_LICENSE("GPL");
static int snd_seq_bus_match(struct device *dev, const struct device_driver *drv)
{
struct snd_seq_device *sdev = to_seq_dev(dev);
- struct snd_seq_driver *sdrv = to_seq_drv(drv);
+ const struct snd_seq_driver *sdrv = to_seq_drv(drv);
return strcmp(sdrv->id, sdev->id) == 0 &&
sdrv->argsize == sdev->argsize;
diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c
index 6199bb60ccf0..c84754434d16 100644
--- a/sound/hda/ext/hdac_ext_controller.c
+++ b/sound/hda/ext/hdac_ext_controller.c
@@ -9,6 +9,7 @@
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
+#include <linux/bitfield.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <sound/hda_register.h>
@@ -81,6 +82,7 @@ int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus)
int idx;
u32 link_count;
struct hdac_ext_link *hlink;
+ u32 leptr;
link_count = readl(bus->mlcap + AZX_REG_ML_MLCD) + 1;
@@ -96,6 +98,12 @@ int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus)
(AZX_ML_INTERVAL * idx);
hlink->lcaps = readl(hlink->ml_addr + AZX_REG_ML_LCAP);
hlink->lsdiid = readw(hlink->ml_addr + AZX_REG_ML_LSDIID);
+ hlink->slcount = FIELD_GET(AZX_ML_HDA_LCAP_SLCOUNT, hlink->lcaps) + 1;
+
+ if (hdac_ext_link_alt(hlink)) {
+ leptr = readl(hlink->ml_addr + AZX_REG_ML_LEPTR);
+ hlink->id = FIELD_GET(AZX_REG_ML_LEPTR_ID, leptr);
+ }
/* since link in On, update the ref */
hlink->ref_count = 1;
@@ -125,6 +133,17 @@ void snd_hdac_ext_link_free_all(struct hdac_bus *bus)
}
EXPORT_SYMBOL_GPL(snd_hdac_ext_link_free_all);
+struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_id(struct hdac_bus *bus, u32 id)
+{
+ struct hdac_ext_link *hlink;
+
+ list_for_each_entry(hlink, &bus->hlink_list, list)
+ if (hdac_ext_link_alt(hlink) && hlink->id == id)
+ return hlink;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_get_hlink_by_id);
+
/**
* snd_hdac_ext_bus_get_hlink_by_addr - get hlink at specified address
* @bus: hlink's parent bus device
diff --git a/sound/hda/hda_bus_type.c b/sound/hda/hda_bus_type.c
index 7545ace7b0ee..eb72a7af2e56 100644
--- a/sound/hda/hda_bus_type.c
+++ b/sound/hda/hda_bus_type.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
* driver id_table and returns the matching device id entry.
*/
const struct hda_device_id *
-hdac_get_device_id(struct hdac_device *hdev, struct hdac_driver *drv)
+hdac_get_device_id(struct hdac_device *hdev, const struct hdac_driver *drv)
{
if (drv->id_table) {
const struct hda_device_id *id = drv->id_table;
@@ -38,7 +38,7 @@ hdac_get_device_id(struct hdac_device *hdev, struct hdac_driver *drv)
}
EXPORT_SYMBOL_GPL(hdac_get_device_id);
-static int hdac_codec_match(struct hdac_device *dev, struct hdac_driver *drv)
+static int hdac_codec_match(struct hdac_device *dev, const struct hdac_driver *drv)
{
if (hdac_get_device_id(dev, drv))
return 1;
@@ -49,7 +49,7 @@ static int hdac_codec_match(struct hdac_device *dev, struct hdac_driver *drv)
static int hda_bus_match(struct device *dev, const struct device_driver *drv)
{
struct hdac_device *hdev = dev_to_hdac_dev(dev);
- struct hdac_driver *hdrv = drv_to_hdac_driver(drv);
+ const struct hdac_driver *hdrv = drv_to_hdac_driver(drv);
if (hdev->type != hdrv->type)
return 0;
diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c
index 3fbb9793dcfc..0053831eed2d 100644
--- a/sound/hda/hdac_device.c
+++ b/sound/hda/hdac_device.c
@@ -801,7 +801,7 @@ unsigned int snd_hdac_stream_format(unsigned int channels, unsigned int bits, un
if (!rate_bits[i].hz)
return 0;
- if (channels == 0 || channels > 8)
+ if (channels == 0 || channels > 16)
return 0;
val |= channels - 1;
diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c
index 4e85a838ad7e..e7f6208af5b0 100644
--- a/sound/hda/hdac_stream.c
+++ b/sound/hda/hdac_stream.c
@@ -826,25 +826,6 @@ int snd_hdac_stream_set_spib(struct hdac_bus *bus,
EXPORT_SYMBOL_GPL(snd_hdac_stream_set_spib);
/**
- * snd_hdac_stream_get_spbmaxfifo - gets the spib value of a stream
- * @bus: HD-audio core bus
- * @azx_dev: hdac_stream
- *
- * Return maxfifo for the stream
- */
-int snd_hdac_stream_get_spbmaxfifo(struct hdac_bus *bus,
- struct hdac_stream *azx_dev)
-{
- if (!bus->spbcap) {
- dev_err(bus->dev, "Address of SPB capability is NULL\n");
- return -EINVAL;
- }
-
- return readl(azx_dev->fifo_addr);
-}
-EXPORT_SYMBOL_GPL(snd_hdac_stream_get_spbmaxfifo);
-
-/**
* snd_hdac_stream_drsm_enable - enable DMA resume for a stream
* @bus: HD-audio core bus
* @enable: flag to enable/disable DRSM
diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
index 5103e37be428..3cb1e7fc3b3b 100644
--- a/sound/hda/intel-dsp-config.c
+++ b/sound/hda/intel-dsp-config.c
@@ -112,6 +112,10 @@ static const struct config_entry config_table[] = {
.flags = FLAG_SST,
.device = PCI_DEVICE_ID_INTEL_HDA_RPL_M,
},
+ {
+ .flags = FLAG_SST,
+ .device = PCI_DEVICE_ID_INTEL_HDA_FCL,
+ },
#endif
#if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE)
{
@@ -537,7 +541,7 @@ static const struct config_entry config_table[] = {
},
#endif
- /* Panther Lake */
+ /* Panther Lake, Wildcat Lake */
#if IS_ENABLED(CONFIG_SND_SOC_SOF_PANTHERLAKE)
{
.flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
@@ -547,6 +551,10 @@ static const struct config_entry config_table[] = {
.flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
.device = PCI_DEVICE_ID_INTEL_HDA_PTL_H,
},
+ {
+ .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+ .device = PCI_DEVICE_ID_INTEL_HDA_WCL,
+ },
#endif
diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c
index 088cff799e0b..6d72a871bda0 100644
--- a/sound/hda/intel-nhlt.c
+++ b/sound/hda/intel-nhlt.c
@@ -350,8 +350,11 @@ int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt,
struct nhlt_endpoint *epnt;
int i;
- if (!nhlt)
+ if (!nhlt) {
+ dev_err(dev, "%s: NHLT table is missing (query for SSP%d)\n",
+ __func__, virtual_bus_id);
return -EINVAL;
+ }
epnt = (struct nhlt_endpoint *)nhlt->desc;
for (i = 0; i < nhlt->endpoint_count; i++) {
@@ -366,6 +369,20 @@ int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt,
epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length);
}
+ dev_err(dev, "%s: No match for SSP%d in NHLT table\n", __func__,
+ virtual_bus_id);
+
+ dev_dbg(dev, "Available endpoints:\n");
+ epnt = (struct nhlt_endpoint *)nhlt->desc;
+ for (i = 0; i < nhlt->endpoint_count; i++) {
+ dev_dbg(dev,
+ "%d: link_type: %d, vbus_id: %d, dir: %d, dev_type: %d\n",
+ i, epnt->linktype, epnt->virtual_bus_id,
+ epnt->direction, epnt->device_type);
+
+ epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length);
+ }
+
return -EINVAL;
}
EXPORT_SYMBOL(intel_nhlt_ssp_device_type);
diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c
index 8686adaf4531..d3511135f7d3 100644
--- a/sound/hda/intel-sdw-acpi.c
+++ b/sound/hda/intel-sdw-acpi.c
@@ -177,7 +177,7 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
* sdw_intel_startup() is required for creation of devices and bus
* startup
*/
-int sdw_intel_acpi_scan(acpi_handle *parent_handle,
+int sdw_intel_acpi_scan(acpi_handle parent_handle,
struct sdw_intel_acpi_info *info)
{
acpi_status status;
diff --git a/sound/i2c/other/pt2258.c b/sound/i2c/other/pt2258.c
index ba38c285241c..0fbac827124b 100644
--- a/sound/i2c/other/pt2258.c
+++ b/sound/i2c/other/pt2258.c
@@ -80,7 +80,7 @@ static int pt2258_stereo_volume_info(struct snd_kcontrol *kcontrol,
static int pt2258_stereo_volume_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct snd_pt2258 *pt = kcontrol->private_data;
+ struct snd_pt2258 *pt = snd_kcontrol_chip(kcontrol);
int base = kcontrol->private_value;
/* chip does not support register reads */
@@ -92,7 +92,7 @@ static int pt2258_stereo_volume_get(struct snd_kcontrol *kcontrol,
static int pt2258_stereo_volume_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct snd_pt2258 *pt = kcontrol->private_data;
+ struct snd_pt2258 *pt = snd_kcontrol_chip(kcontrol);
int base = kcontrol->private_value;
unsigned char bytes[2];
int val0, val1;
@@ -133,7 +133,7 @@ static int pt2258_stereo_volume_put(struct snd_kcontrol *kcontrol,
static int pt2258_switch_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct snd_pt2258 *pt = kcontrol->private_data;
+ struct snd_pt2258 *pt = snd_kcontrol_chip(kcontrol);
ucontrol->value.integer.value[0] = !pt->mute;
return 0;
@@ -142,7 +142,7 @@ static int pt2258_switch_get(struct snd_kcontrol *kcontrol,
static int pt2258_switch_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct snd_pt2258 *pt = kcontrol->private_data;
+ struct snd_pt2258 *pt = snd_kcontrol_chip(kcontrol);
unsigned char bytes[2];
int val;
diff --git a/sound/isa/gus/gus_io.c b/sound/isa/gus/gus_io.c
index 1dc9e0edb3d9..f167eb8d516b 100644
--- a/sound/isa/gus/gus_io.c
+++ b/sound/isa/gus/gus_io.c
@@ -228,49 +228,6 @@ unsigned short snd_gf1_i_look16(struct snd_gus_card * gus, unsigned char reg)
return res;
}
-#if 0
-
-void snd_gf1_i_adlib_write(struct snd_gus_card * gus,
- unsigned char reg,
- unsigned char data)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&gus->reg_lock, flags);
- __snd_gf1_adlib_write(gus, reg, data);
- spin_unlock_irqrestore(&gus->reg_lock, flags);
-}
-
-void snd_gf1_i_write_addr(struct snd_gus_card * gus, unsigned char reg,
- unsigned int addr, short w_16bit)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&gus->reg_lock, flags);
- __snd_gf1_write_addr(gus, reg, addr, w_16bit);
- spin_unlock_irqrestore(&gus->reg_lock, flags);
-}
-
-#endif /* 0 */
-
-#ifdef CONFIG_SND_DEBUG
-static unsigned int snd_gf1_i_read_addr(struct snd_gus_card * gus,
- unsigned char reg, short w_16bit)
-{
- unsigned int res;
- unsigned long flags;
-
- spin_lock_irqsave(&gus->reg_lock, flags);
- res = __snd_gf1_read_addr(gus, reg, w_16bit);
- spin_unlock_irqrestore(&gus->reg_lock, flags);
- return res;
-}
-#endif
-
-/*
-
- */
-
void snd_gf1_dram_addr(struct snd_gus_card * gus, unsigned int addr)
{
outb(0x43, gus->gf1.reg_regsel);
@@ -418,189 +375,3 @@ void snd_gf1_select_active_voices(struct snd_gus_card * gus)
udelay(100);
}
}
-
-#ifdef CONFIG_SND_DEBUG
-
-void snd_gf1_print_voice_registers(struct snd_gus_card * gus)
-{
- unsigned char mode;
- int voice, ctrl;
-
- voice = gus->gf1.active_voice;
- dev_info(gus->card->dev,
- " -%i- GF1 voice ctrl, ramp ctrl = 0x%x, 0x%x\n",
- voice, ctrl = snd_gf1_i_read8(gus, 0), snd_gf1_i_read8(gus, 0x0d));
- dev_info(gus->card->dev,
- " -%i- GF1 frequency = 0x%x\n",
- voice, snd_gf1_i_read16(gus, 1));
- dev_info(gus->card->dev,
- " -%i- GF1 loop start, end = 0x%x (0x%x), 0x%x (0x%x)\n",
- voice, snd_gf1_i_read_addr(gus, 2, ctrl & 4),
- snd_gf1_i_read_addr(gus, 2, (ctrl & 4) ^ 4),
- snd_gf1_i_read_addr(gus, 4, ctrl & 4),
- snd_gf1_i_read_addr(gus, 4, (ctrl & 4) ^ 4));
- dev_info(gus->card->dev,
- " -%i- GF1 ramp start, end, rate = 0x%x, 0x%x, 0x%x\n",
- voice, snd_gf1_i_read8(gus, 7), snd_gf1_i_read8(gus, 8),
- snd_gf1_i_read8(gus, 6));
- dev_info(gus->card->dev,
- " -%i- GF1 volume = 0x%x\n",
- voice, snd_gf1_i_read16(gus, 9));
- dev_info(gus->card->dev,
- " -%i- GF1 position = 0x%x (0x%x)\n",
- voice, snd_gf1_i_read_addr(gus, 0x0a, ctrl & 4),
- snd_gf1_i_read_addr(gus, 0x0a, (ctrl & 4) ^ 4));
- if (gus->interwave && snd_gf1_i_read8(gus, 0x19) & 0x01) { /* enhanced mode */
- mode = snd_gf1_i_read8(gus, 0x15);
- dev_info(gus->card->dev,
- " -%i- GFA1 mode = 0x%x\n",
- voice, mode);
- if (mode & 0x01) { /* Effect processor */
- dev_info(gus->card->dev,
- " -%i- GFA1 effect address = 0x%x\n",
- voice, snd_gf1_i_read_addr(gus, 0x11, ctrl & 4));
- dev_info(gus->card->dev,
- " -%i- GFA1 effect volume = 0x%x\n",
- voice, snd_gf1_i_read16(gus, 0x16));
- dev_info(gus->card->dev,
- " -%i- GFA1 effect volume final = 0x%x\n",
- voice, snd_gf1_i_read16(gus, 0x1d));
- dev_info(gus->card->dev,
- " -%i- GFA1 effect accumulator = 0x%x\n",
- voice, snd_gf1_i_read8(gus, 0x14));
- }
- if (mode & 0x20) {
- dev_info(gus->card->dev,
- " -%i- GFA1 left offset = 0x%x (%i)\n",
- voice, snd_gf1_i_read16(gus, 0x13),
- snd_gf1_i_read16(gus, 0x13) >> 4);
- dev_info(gus->card->dev,
- " -%i- GFA1 left offset final = 0x%x (%i)\n",
- voice, snd_gf1_i_read16(gus, 0x1c),
- snd_gf1_i_read16(gus, 0x1c) >> 4);
- dev_info(gus->card->dev,
- " -%i- GFA1 right offset = 0x%x (%i)\n",
- voice, snd_gf1_i_read16(gus, 0x0c),
- snd_gf1_i_read16(gus, 0x0c) >> 4);
- dev_info(gus->card->dev,
- " -%i- GFA1 right offset final = 0x%x (%i)\n",
- voice, snd_gf1_i_read16(gus, 0x1b),
- snd_gf1_i_read16(gus, 0x1b) >> 4);
- } else
- dev_info(gus->card->dev,
- " -%i- GF1 pan = 0x%x\n",
- voice, snd_gf1_i_read8(gus, 0x0c));
- } else
- dev_info(gus->card->dev,
- " -%i- GF1 pan = 0x%x\n",
- voice, snd_gf1_i_read8(gus, 0x0c));
-}
-
-#if 0
-
-void snd_gf1_print_global_registers(struct snd_gus_card * gus)
-{
- unsigned char global_mode = 0x00;
-
- dev_info(gus->card->dev,
- " -G- GF1 active voices = 0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_ACTIVE_VOICES));
- if (gus->interwave) {
- global_mode = snd_gf1_i_read8(gus, SNDRV_GF1_GB_GLOBAL_MODE);
- dev_info(gus->card->dev,
- " -G- GF1 global mode = 0x%x\n",
- global_mode);
- }
- if (global_mode & 0x02) /* LFO enabled? */
- dev_info(gus->card->dev,
- " -G- GF1 LFO base = 0x%x\n",
- snd_gf1_i_look16(gus, SNDRV_GF1_GW_LFO_BASE));
- dev_info(gus->card->dev,
- " -G- GF1 voices IRQ read = 0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_VOICES_IRQ_READ));
- dev_info(gus->card->dev,
- " -G- GF1 DRAM DMA control = 0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_DRAM_DMA_CONTROL));
- dev_info(gus->card->dev,
- " -G- GF1 DRAM DMA high/low = 0x%x/0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_DRAM_DMA_HIGH),
- snd_gf1_i_read16(gus, SNDRV_GF1_GW_DRAM_DMA_LOW));
- dev_info(gus->card->dev,
- " -G- GF1 DRAM IO high/low = 0x%x/0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_DRAM_IO_HIGH),
- snd_gf1_i_read16(gus, SNDRV_GF1_GW_DRAM_IO_LOW));
- if (!gus->interwave)
- dev_info(gus->card->dev,
- " -G- GF1 record DMA control = 0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_REC_DMA_CONTROL));
- dev_info(gus->card->dev,
- " -G- GF1 DRAM IO 16 = 0x%x\n",
- snd_gf1_i_look16(gus, SNDRV_GF1_GW_DRAM_IO16));
- if (gus->gf1.enh_mode) {
- dev_info(gus->card->dev,
- " -G- GFA1 memory config = 0x%x\n",
- snd_gf1_i_look16(gus, SNDRV_GF1_GW_MEMORY_CONFIG));
- dev_info(gus->card->dev,
- " -G- GFA1 memory control = 0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_MEMORY_CONTROL));
- dev_info(gus->card->dev,
- " -G- GFA1 FIFO record base = 0x%x\n",
- snd_gf1_i_look16(gus, SNDRV_GF1_GW_FIFO_RECORD_BASE_ADDR));
- dev_info(gus->card->dev,
- " -G- GFA1 FIFO playback base = 0x%x\n",
- snd_gf1_i_look16(gus, SNDRV_GF1_GW_FIFO_PLAY_BASE_ADDR));
- dev_info(gus->card->dev,
- " -G- GFA1 interleave control = 0x%x\n",
- snd_gf1_i_look16(gus, SNDRV_GF1_GW_INTERLEAVE));
- }
-}
-
-void snd_gf1_print_setup_registers(struct snd_gus_card * gus)
-{
- dev_info(gus->card->dev,
- " -S- mix control = 0x%x\n",
- inb(GUSP(gus, MIXCNTRLREG)));
- dev_info(gus->card->dev,
- " -S- IRQ status = 0x%x\n",
- inb(GUSP(gus, IRQSTAT)));
- dev_info(gus->card->dev,
- " -S- timer control = 0x%x\n",
- inb(GUSP(gus, TIMERCNTRL)));
- dev_info(gus->card->dev,
- " -S- timer data = 0x%x\n",
- inb(GUSP(gus, TIMERDATA)));
- dev_info(gus->card->dev,
- " -S- status read = 0x%x\n",
- inb(GUSP(gus, REGCNTRLS)));
- dev_info(gus->card->dev,
- " -S- Sound Blaster control = 0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_SOUND_BLASTER_CONTROL));
- dev_info(gus->card->dev,
- " -S- AdLib timer 1/2 = 0x%x/0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_ADLIB_TIMER_1),
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_ADLIB_TIMER_2));
- dev_info(gus->card->dev,
- " -S- reset = 0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_RESET));
- if (gus->interwave) {
- dev_info(gus->card->dev,
- " -S- compatibility = 0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_COMPATIBILITY));
- dev_info(gus->card->dev,
- " -S- decode control = 0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_DECODE_CONTROL));
- dev_info(gus->card->dev,
- " -S- version number = 0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_VERSION_NUMBER));
- dev_info(gus->card->dev,
- " -S- MPU-401 emul. control A/B = 0x%x/0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_MPU401_CONTROL_A),
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_MPU401_CONTROL_B));
- dev_info(gus->card->dev,
- " -S- emulation IRQ = 0x%x\n",
- snd_gf1_i_look8(gus, SNDRV_GF1_GB_EMULATION_IRQ));
- }
-}
-#endif /* 0 */
-
-#endif
diff --git a/sound/isa/gus/gus_main.c b/sound/isa/gus/gus_main.c
index 51ce405eba7a..7166869e423d 100644
--- a/sound/isa/gus/gus_main.c
+++ b/sound/isa/gus/gus_main.c
@@ -22,18 +22,6 @@ MODULE_LICENSE("GPL");
static int snd_gus_init_dma_irq(struct snd_gus_card * gus, int latches);
-int snd_gus_use_inc(struct snd_gus_card * gus)
-{
- if (!try_module_get(gus->card->module))
- return 0;
- return 1;
-}
-
-void snd_gus_use_dec(struct snd_gus_card * gus)
-{
- module_put(gus->card->module);
-}
-
static int snd_gus_joystick_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
{
uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
@@ -443,8 +431,6 @@ EXPORT_SYMBOL(snd_gf1_new_mixer);
/* gus_pcm.c */
EXPORT_SYMBOL(snd_gf1_pcm_new);
/* gus.c */
-EXPORT_SYMBOL(snd_gus_use_inc);
-EXPORT_SYMBOL(snd_gus_use_dec);
EXPORT_SYMBOL(snd_gus_create);
EXPORT_SYMBOL(snd_gus_initialize);
/* gus_irq.c */
diff --git a/sound/isa/gus/gus_reset.c b/sound/isa/gus/gus_reset.c
index ac5da1281042..326bc6028885 100644
--- a/sound/isa/gus/gus_reset.c
+++ b/sound/isa/gus/gus_reset.c
@@ -141,10 +141,6 @@ void snd_gf1_stop_voice(struct snd_gus_card * gus, unsigned short voice)
if (gus->gf1.enh_mode)
snd_gf1_write8(gus, SNDRV_GF1_VB_ACCUMULATOR, 0);
spin_unlock_irqrestore(&gus->reg_lock, flags);
-#if 0
- snd_gf1_lfo_shutdown(gus, voice, ULTRA_LFO_VIBRATO);
- snd_gf1_lfo_shutdown(gus, voice, ULTRA_LFO_TREMOLO);
-#endif
}
static void snd_gf1_clear_voices(struct snd_gus_card * gus, unsigned short v_min,
@@ -182,10 +178,6 @@ static void snd_gf1_clear_voices(struct snd_gus_card * gus, unsigned short v_min
snd_gf1_write16(gus, SNDRV_GF1_VW_EFFECT_VOLUME_FINAL, 0);
}
spin_unlock_irqrestore(&gus->reg_lock, flags);
-#if 0
- snd_gf1_lfo_shutdown(gus, i, ULTRA_LFO_VIBRATO);
- snd_gf1_lfo_shutdown(gus, i, ULTRA_LFO_TREMOLO);
-#endif
}
}
@@ -335,9 +327,7 @@ int snd_gf1_start(struct snd_gus_card * gus)
} else {
gus->gf1.sw_lfo = 1;
}
-#if 0
- snd_gf1_lfo_init(gus);
-#endif
+
if (gus->gf1.memory > 0)
for (i = 0; i < 4; i++)
snd_gf1_poke(gus, gus->gf1.default_voice_address + i, 0);
@@ -391,8 +381,6 @@ int snd_gf1_stop(struct snd_gus_card * gus)
snd_gf1_i_write8(gus, SNDRV_GF1_GB_RESET, 1); /* disable IRQ & DAC */
snd_gf1_timers_done(gus);
snd_gf1_mem_done(gus);
-#if 0
- snd_gf1_lfo_done(gus);
-#endif
+
return 0;
}
diff --git a/sound/isa/msnd/Makefile b/sound/isa/msnd/Makefile
index 5f8d6b472722..d56412aae857 100644
--- a/sound/isa/msnd/Makefile
+++ b/sound/isa/msnd/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-snd-msnd-lib-y := msnd.o msnd_midi.o msnd_pinnacle_mixer.o
+snd-msnd-lib-y := msnd.o msnd_pinnacle_mixer.o
snd-msnd-pinnacle-y := msnd_pinnacle.o
snd-msnd-classic-y := msnd_classic.o
diff --git a/sound/isa/msnd/msnd.h b/sound/isa/msnd/msnd.h
index 533d71cee9ba..3d7810ed9186 100644
--- a/sound/isa/msnd/msnd.h
+++ b/sound/isa/msnd/msnd.h
@@ -216,7 +216,6 @@ struct snd_msnd {
int captureLimit;
int capturePeriods;
struct snd_card *card;
- void *msndmidi_mpu;
struct snd_rawmidi *rmidi;
/* Hardware resources */
@@ -286,9 +285,6 @@ int snd_msnd_DAPQ(struct snd_msnd *chip, int start);
int snd_msnd_DARQ(struct snd_msnd *chip, int start);
int snd_msnd_pcm(struct snd_card *card, int device);
-int snd_msndmidi_new(struct snd_card *card, int device);
-void snd_msndmidi_input_read(void *mpu);
-
void snd_msndmix_setup(struct snd_msnd *chip);
int snd_msndmix_new(struct snd_card *card);
int snd_msndmix_force_recsrc(struct snd_msnd *chip, int recsrc);
diff --git a/sound/isa/msnd/msnd_midi.c b/sound/isa/msnd/msnd_midi.c
deleted file mode 100644
index 3ffc8758bec2..000000000000
--- a/sound/isa/msnd/msnd_midi.c
+++ /dev/null
@@ -1,163 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (c) by Jaroslav Kysela <perex@perex.cz>
- * Copyright (c) 2009 by Krzysztof Helt
- * Routines for control of MPU-401 in UART mode
- *
- * MPU-401 supports UART mode which is not capable generate transmit
- * interrupts thus output is done via polling. Also, if irq < 0, then
- * input is done also via polling. Do not expect good performance.
- */
-
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/ioport.h>
-#include <linux/errno.h>
-#include <linux/export.h>
-#include <sound/core.h>
-#include <sound/rawmidi.h>
-
-#include "msnd.h"
-
-#define MSNDMIDI_MODE_BIT_INPUT 0
-#define MSNDMIDI_MODE_BIT_OUTPUT 1
-#define MSNDMIDI_MODE_BIT_INPUT_TRIGGER 2
-#define MSNDMIDI_MODE_BIT_OUTPUT_TRIGGER 3
-
-struct snd_msndmidi {
- struct snd_msnd *dev;
-
- unsigned long mode; /* MSNDMIDI_MODE_XXXX */
-
- struct snd_rawmidi_substream *substream_input;
-
- spinlock_t input_lock;
-};
-
-/*
- * input/output open/close - protected by open_mutex in rawmidi.c
- */
-static int snd_msndmidi_input_open(struct snd_rawmidi_substream *substream)
-{
- struct snd_msndmidi *mpu;
-
- mpu = substream->rmidi->private_data;
-
- mpu->substream_input = substream;
-
- snd_msnd_enable_irq(mpu->dev);
-
- snd_msnd_send_dsp_cmd(mpu->dev, HDEX_MIDI_IN_START);
- set_bit(MSNDMIDI_MODE_BIT_INPUT, &mpu->mode);
- return 0;
-}
-
-static int snd_msndmidi_input_close(struct snd_rawmidi_substream *substream)
-{
- struct snd_msndmidi *mpu;
-
- mpu = substream->rmidi->private_data;
- snd_msnd_send_dsp_cmd(mpu->dev, HDEX_MIDI_IN_STOP);
- clear_bit(MSNDMIDI_MODE_BIT_INPUT, &mpu->mode);
- mpu->substream_input = NULL;
- snd_msnd_disable_irq(mpu->dev);
- return 0;
-}
-
-static void snd_msndmidi_input_drop(struct snd_msndmidi *mpu)
-{
- u16 tail;
-
- tail = readw(mpu->dev->MIDQ + JQS_wTail);
- writew(tail, mpu->dev->MIDQ + JQS_wHead);
-}
-
-/*
- * trigger input
- */
-static void snd_msndmidi_input_trigger(struct snd_rawmidi_substream *substream,
- int up)
-{
- unsigned long flags;
- struct snd_msndmidi *mpu;
-
- mpu = substream->rmidi->private_data;
- spin_lock_irqsave(&mpu->input_lock, flags);
- if (up) {
- if (!test_and_set_bit(MSNDMIDI_MODE_BIT_INPUT_TRIGGER,
- &mpu->mode))
- snd_msndmidi_input_drop(mpu);
- } else {
- clear_bit(MSNDMIDI_MODE_BIT_INPUT_TRIGGER, &mpu->mode);
- }
- spin_unlock_irqrestore(&mpu->input_lock, flags);
- if (up)
- snd_msndmidi_input_read(mpu);
-}
-
-void snd_msndmidi_input_read(void *mpuv)
-{
- unsigned long flags;
- struct snd_msndmidi *mpu = mpuv;
- void __iomem *pwMIDQData = mpu->dev->mappedbase + MIDQ_DATA_BUFF;
- u16 head, tail, size;
-
- spin_lock_irqsave(&mpu->input_lock, flags);
- head = readw(mpu->dev->MIDQ + JQS_wHead);
- tail = readw(mpu->dev->MIDQ + JQS_wTail);
- size = readw(mpu->dev->MIDQ + JQS_wSize);
- if (head > size || tail > size)
- goto out;
- while (head != tail) {
- unsigned char val = readw(pwMIDQData + 2 * head);
-
- if (test_bit(MSNDMIDI_MODE_BIT_INPUT_TRIGGER, &mpu->mode))
- snd_rawmidi_receive(mpu->substream_input, &val, 1);
- if (++head > size)
- head = 0;
- writew(head, mpu->dev->MIDQ + JQS_wHead);
- }
- out:
- spin_unlock_irqrestore(&mpu->input_lock, flags);
-}
-EXPORT_SYMBOL(snd_msndmidi_input_read);
-
-static const struct snd_rawmidi_ops snd_msndmidi_input = {
- .open = snd_msndmidi_input_open,
- .close = snd_msndmidi_input_close,
- .trigger = snd_msndmidi_input_trigger,
-};
-
-static void snd_msndmidi_free(struct snd_rawmidi *rmidi)
-{
- struct snd_msndmidi *mpu = rmidi->private_data;
- kfree(mpu);
-}
-
-int snd_msndmidi_new(struct snd_card *card, int device)
-{
- struct snd_msnd *chip = card->private_data;
- struct snd_msndmidi *mpu;
- struct snd_rawmidi *rmidi;
- int err;
-
- err = snd_rawmidi_new(card, "MSND-MIDI", device, 1, 1, &rmidi);
- if (err < 0)
- return err;
- mpu = kzalloc(sizeof(*mpu), GFP_KERNEL);
- if (mpu == NULL) {
- snd_device_free(card, rmidi);
- return -ENOMEM;
- }
- mpu->dev = chip;
- chip->msndmidi_mpu = mpu;
- rmidi->private_data = mpu;
- rmidi->private_free = snd_msndmidi_free;
- spin_lock_init(&mpu->input_lock);
- strcpy(rmidi->name, "MSND MIDI");
- snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT,
- &snd_msndmidi_input);
- rmidi->info_flags |= SNDRV_RAWMIDI_INFO_INPUT;
- return 0;
-}
diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c
index 635403301a15..8caf431677e5 100644
--- a/sound/isa/msnd/msnd_pinnacle.c
+++ b/sound/isa/msnd/msnd_pinnacle.c
@@ -142,11 +142,6 @@ static void snd_msnd_eval_dsp_msg(struct snd_msnd *chip, u16 wMessage)
}
break;
- case HIMT_MIDI_IN_UCHAR:
- if (chip->msndmidi_mpu)
- snd_msndmidi_input_read(chip->msndmidi_mpu);
- break;
-
default:
dev_dbg(chip->card->dev, LOGNAME ": HIMT message %d 0x%02x\n",
HIBYTE(wMessage), HIBYTE(wMessage));
diff --git a/sound/isa/sb/emu8000.c b/sound/isa/sb/emu8000.c
index 52884e6b9193..312b217491d4 100644
--- a/sound/isa/sb/emu8000.c
+++ b/sound/isa/sb/emu8000.c
@@ -14,6 +14,7 @@
#include <linux/export.h>
#include <linux/delay.h>
#include <linux/io.h>
+#include <linux/string.h>
#include <sound/core.h>
#include <sound/emu8000.h>
#include <sound/emu8000_reg.h>
@@ -1096,7 +1097,7 @@ snd_emu8000_new(struct snd_card *card, int index, long port, int seq_ports,
#if IS_ENABLED(CONFIG_SND_SEQUENCER)
if (snd_seq_device_new(card, index, SNDRV_SEQ_DEV_ID_EMU8000,
sizeof(struct snd_emu8000*), &awe) >= 0) {
- strcpy(awe->name, "EMU-8000");
+ strscpy(awe->name, "EMU-8000");
*(struct snd_emu8000 **)SNDRV_SEQ_DEVICE_ARGPTR(awe) = hw;
}
#else
diff --git a/sound/isa/sb/jazz16.c b/sound/isa/sb/jazz16.c
index b28490973892..69d9bfb6c14c 100644
--- a/sound/isa/sb/jazz16.c
+++ b/sound/isa/sb/jazz16.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/io.h>
#include <linux/delay.h>
+#include <linux/string.h>
#include <asm/dma.h>
#include <linux/isa.h>
#include <sound/core.h>
@@ -286,8 +287,8 @@ static int snd_jazz16_probe(struct device *devptr, unsigned int dev)
jazz16->chip = chip;
- strcpy(card->driver, "jazz16");
- strcpy(card->shortname, "Media Vision Jazz16");
+ strscpy(card->driver, "jazz16");
+ strscpy(card->shortname, "Media Vision Jazz16");
sprintf(card->longname,
"Media Vision Jazz16 at 0x%lx, irq %d, dma8 %d, dma16 %d",
port[dev], xirq, xdma8, xdma16);
diff --git a/sound/isa/sb/sb16.c b/sound/isa/sb/sb16.c
index 2f7505ad855c..752762117338 100644
--- a/sound/isa/sb/sb16.c
+++ b/sound/isa/sb/sb16.c
@@ -10,6 +10,7 @@
#include <linux/err.h>
#include <linux/isa.h>
#include <linux/module.h>
+#include <linux/string.h>
#include <sound/core.h>
#include <sound/sb.h>
#include <sound/sb16_csp.h>
@@ -337,12 +338,12 @@ static int snd_sb16_probe(struct snd_card *card, int dev)
if (err < 0)
return err;
- strcpy(card->driver,
+ strscpy(card->driver,
#ifdef SNDRV_SBAWE_EMU8000
awe_port[dev] > 0 ? "SB AWE" :
#endif
"SB16");
- strcpy(card->shortname, chip->name);
+ strscpy(card->shortname, chip->name);
sprintf(card->longname, "%s at 0x%lx, irq %i, dma ",
chip->name,
chip->port,
diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c
index 8726778c815e..6d5131265913 100644
--- a/sound/isa/sb/sb8.c
+++ b/sound/isa/sb/sb8.c
@@ -9,6 +9,7 @@
#include <linux/isa.h>
#include <linux/ioport.h>
#include <linux/module.h>
+#include <linux/string.h>
#include <sound/core.h>
#include <sound/sb.h>
#include <sound/opl3.h>
@@ -162,8 +163,8 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev)
if (err < 0)
return err;
- strcpy(card->driver, chip->hardware == SB_HW_PRO ? "SB Pro" : "SB8");
- strcpy(card->shortname, chip->name);
+ strscpy(card->driver, chip->hardware == SB_HW_PRO ? "SB Pro" : "SB8");
+ strscpy(card->shortname, chip->name);
sprintf(card->longname, "%s at 0x%lx, irq %d, dma %d",
chip->name,
chip->port,
diff --git a/sound/isa/sb/sb8_midi.c b/sound/isa/sb/sb8_midi.c
index d2908fc280f8..57867e51d367 100644
--- a/sound/isa/sb/sb8_midi.c
+++ b/sound/isa/sb/sb8_midi.c
@@ -14,6 +14,7 @@
*/
#include <linux/io.h>
+#include <linux/string.h>
#include <linux/time.h>
#include <sound/core.h>
#include <sound/sb.h>
@@ -254,7 +255,7 @@ int snd_sb8dsp_midi(struct snd_sb *chip, int device)
err = snd_rawmidi_new(chip->card, "SB8 MIDI", device, 1, 1, &rmidi);
if (err < 0)
return err;
- strcpy(rmidi->name, "SB8 MIDI");
+ strscpy(rmidi->name, "SB8 MIDI");
snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, &snd_sb8dsp_midi_output);
snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, &snd_sb8dsp_midi_input);
rmidi->info_flags |= SNDRV_RAWMIDI_INFO_OUTPUT | SNDRV_RAWMIDI_INFO_INPUT;
diff --git a/sound/isa/sb/sb_mixer.c b/sound/isa/sb/sb_mixer.c
index 9d23b7a4570b..b2709ed134b4 100644
--- a/sound/isa/sb/sb_mixer.c
+++ b/sound/isa/sb/sb_mixer.c
@@ -6,6 +6,7 @@
#include <linux/io.h>
#include <linux/delay.h>
+#include <linux/string.h>
#include <linux/time.h>
#include <sound/core.h>
#include <sound/sb.h>
@@ -718,7 +719,7 @@ static int snd_sbmixer_init(struct snd_sb *chip,
return err;
}
snd_component_add(card, name);
- strcpy(card->mixername, name);
+ strscpy(card->mixername, name);
return 0;
}
@@ -799,7 +800,7 @@ int snd_sbmixer_new(struct snd_sb *chip)
return err;
break;
default:
- strcpy(card->mixername, "???");
+ strscpy(card->mixername, "???");
}
return 0;
}
diff --git a/sound/mips/snd-n64.c b/sound/mips/snd-n64.c
index bff6d85b8fe2..e1b2ff65d850 100644
--- a/sound/mips/snd-n64.c
+++ b/sound/mips/snd-n64.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/spinlock.h>
+#include <linux/string.h>
#include <sound/control.h>
#include <sound/core.h>
@@ -327,14 +328,14 @@ static int __init n64audio_probe(struct platform_device *pdev)
goto fail_dma_alloc;
pcm->private_data = priv;
- strcpy(pcm->name, "N64 Audio");
+ strscpy(pcm->name, "N64 Audio");
snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &n64audio_pcm_ops);
snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_VMALLOC, card->dev, 0, 0);
- strcpy(card->driver, "N64 Audio");
- strcpy(card->shortname, "N64 Audio");
- strcpy(card->longname, "N64 Audio");
+ strscpy(card->driver, "N64 Audio");
+ strscpy(card->shortname, "N64 Audio");
+ strscpy(card->longname, "N64 Audio");
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
diff --git a/sound/pci/ad1889.c b/sound/pci/ad1889.c
index 9ed778b6b03c..ac27a93ce4ff 100644
--- a/sound/pci/ad1889.c
+++ b/sound/pci/ad1889.c
@@ -810,12 +810,11 @@ snd_ad1889_create(struct snd_card *card, struct pci_dev *pci)
chip->irq = -1;
/* (1) PCI resource allocation */
- err = pcim_iomap_regions(pci, 1 << 0, card->driver);
- if (err < 0)
- return err;
+ chip->iobase = pcim_iomap_region(pci, 0, card->driver);
+ if (IS_ERR(chip->iobase))
+ return PTR_ERR(chip->iobase);
chip->bar = pci_resource_start(pci, 0);
- chip->iobase = pcim_iomap_table(pci)[0];
pci_set_master(pci);
diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c
index 793d2f13267e..69c02bdd38ce 100644
--- a/sound/pci/ali5451/ali5451.c
+++ b/sound/pci/ali5451/ali5451.c
@@ -1688,7 +1688,7 @@ static int snd_ali_build_pcms(struct snd_ali *codec)
static int snd_ali5451_spdif_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct snd_ali *codec = kcontrol->private_data;
+ struct snd_ali *codec = snd_kcontrol_chip(kcontrol);
unsigned int spdif_enable;
spdif_enable = ucontrol->value.integer.value[0] ? 1 : 0;
@@ -1716,7 +1716,7 @@ static int snd_ali5451_spdif_get(struct snd_kcontrol *kcontrol,
static int snd_ali5451_spdif_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct snd_ali *codec = kcontrol->private_data;
+ struct snd_ali *codec = snd_kcontrol_chip(kcontrol);
unsigned int change = 0, spdif_enable = 0;
spdif_enable = ucontrol->value.integer.value[0] ? 1 : 0;
@@ -1989,7 +1989,7 @@ static int snd_ali_resources(struct snd_ali *codec)
int err;
dev_dbg(codec->card->dev, "resources allocation ...\n");
- err = pci_request_regions(codec->pci, "ALI 5451");
+ err = pcim_request_all_regions(codec->pci, "ALI 5451");
if (err < 0)
return err;
codec->port = pci_resource_start(codec->pci, 0);
diff --git a/sound/pci/als300.c b/sound/pci/als300.c
index c7c481203ef8..43f98719e61b 100644
--- a/sound/pci/als300.c
+++ b/sound/pci/als300.c
@@ -617,7 +617,7 @@ static int snd_als300_create(struct snd_card *card,
chip->chip_type = chip_type;
spin_lock_init(&chip->reg_lock);
- err = pci_request_regions(pci, "ALS300");
+ err = pcim_request_all_regions(pci, "ALS300");
if (err < 0)
return err;
diff --git a/sound/pci/als4000.c b/sound/pci/als4000.c
index 022473594c73..3f4f3037f71f 100644
--- a/sound/pci/als4000.c
+++ b/sound/pci/als4000.c
@@ -836,7 +836,7 @@ static int __snd_card_als4000_probe(struct pci_dev *pci,
return -ENXIO;
}
- err = pci_request_regions(pci, "ALS4000");
+ err = pcim_request_all_regions(pci, "ALS4000");
if (err < 0)
return err;
iobase = pci_resource_start(pci, 0);
diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c
index 65100f925b72..7e5ce96954b9 100644
--- a/sound/pci/asihpi/asihpi.c
+++ b/sound/pci/asihpi/asihpi.c
@@ -2300,8 +2300,7 @@ static const char * const sampleclock_sources[] = {
static int snd_asihpi_clksrc_info(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_info *uinfo)
{
- struct snd_card_asihpi *asihpi =
- (struct snd_card_asihpi *)(kcontrol->private_data);
+ struct snd_card_asihpi *asihpi = snd_kcontrol_chip(kcontrol);
struct clk_cache *clkcache = &asihpi->cc;
uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
uinfo->count = 1;
@@ -2319,8 +2318,7 @@ static int snd_asihpi_clksrc_info(struct snd_kcontrol *kcontrol,
static int snd_asihpi_clksrc_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct snd_card_asihpi *asihpi =
- (struct snd_card_asihpi *)(kcontrol->private_data);
+ struct snd_card_asihpi *asihpi = snd_kcontrol_chip(kcontrol);
struct clk_cache *clkcache = &asihpi->cc;
u32 h_control = kcontrol->private_value;
u16 source, srcindex = 0;
@@ -2347,8 +2345,7 @@ static int snd_asihpi_clksrc_get(struct snd_kcontrol *kcontrol,
static int snd_asihpi_clksrc_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct snd_card_asihpi *asihpi =
- (struct snd_card_asihpi *)(kcontrol->private_data);
+ struct snd_card_asihpi *asihpi = snd_kcontrol_chip(kcontrol);
struct clk_cache *clkcache = &asihpi->cc;
unsigned int item;
int change;
diff --git a/sound/pci/atiixp.c b/sound/pci/atiixp.c
index df2fef726d60..427006be240b 100644
--- a/sound/pci/atiixp.c
+++ b/sound/pci/atiixp.c
@@ -1544,11 +1544,10 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci)
chip->card = card;
chip->pci = pci;
chip->irq = -1;
- err = pcim_iomap_regions(pci, 1 << 0, "ATI IXP AC97");
- if (err < 0)
- return err;
+ chip->remap_addr = pcim_iomap_region(pci, 0, "ATI IXP AC97");
+ if (IS_ERR(chip->remap_addr))
+ return PTR_ERR(chip->remap_addr);
chip->addr = pci_resource_start(pci, 0);
- chip->remap_addr = pcim_iomap_table(pci)[0];
if (devm_request_irq(&pci->dev, pci->irq, snd_atiixp_interrupt,
IRQF_SHARED, KBUILD_MODNAME, chip)) {
diff --git a/sound/pci/atiixp_modem.c b/sound/pci/atiixp_modem.c
index eb569539f322..8d3083b9b024 100644
--- a/sound/pci/atiixp_modem.c
+++ b/sound/pci/atiixp_modem.c
@@ -1174,11 +1174,10 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci)
chip->card = card;
chip->pci = pci;
chip->irq = -1;
- err = pcim_iomap_regions(pci, 1 << 0, "ATI IXP MC97");
- if (err < 0)
- return err;
+ chip->remap_addr = pcim_iomap_region(pci, 0, "ATI IXP MC97");
+ if (IS_ERR(chip->remap_addr))
+ return PTR_ERR(chip->remap_addr);
chip->addr = pci_resource_start(pci, 0);
- chip->remap_addr = pcim_iomap_table(pci)[0];
if (devm_request_irq(&pci->dev, pci->irq, snd_atiixp_interrupt,
IRQF_SHARED, KBUILD_MODNAME, chip)) {
diff --git a/sound/pci/au88x0/au88x0.c b/sound/pci/au88x0/au88x0.c
index 62b10b0e07b1..fd986247331a 100644
--- a/sound/pci/au88x0/au88x0.c
+++ b/sound/pci/au88x0/au88x0.c
@@ -160,12 +160,11 @@ snd_vortex_create(struct snd_card *card, struct pci_dev *pci)
// (1) PCI resource allocation
// Get MMIO area
//
- err = pcim_iomap_regions(pci, 1 << 0, CARD_NAME_SHORT);
- if (err)
- return err;
+ chip->mmio = pcim_iomap_region(pci, 0, KBUILD_MODNAME);
+ if (IS_ERR(chip->mmio))
+ return PTR_ERR(chip->mmio);
chip->io = pci_resource_start(pci, 0);
- chip->mmio = pcim_iomap_table(pci)[0];
/* Init audio core.
* This must be done before we do request_irq otherwise we can get spurious
diff --git a/sound/pci/au88x0/au88x0_a3d.c b/sound/pci/au88x0/au88x0_a3d.c
index eabaee0463fe..d5cafaa229f1 100644
--- a/sound/pci/au88x0/au88x0_a3d.c
+++ b/sound/pci/au88x0/au88x0_a3d.c
@@ -754,7 +754,7 @@ snd_vortex_a3d_filter_info(struct snd_kcontrol *kcontrol,
static int
snd_vortex_a3d_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
{
- //a3dsrc_t *a = kcontrol->private_data;
+ //a3dsrc_t *a = snd_kcontrol_chip(kcontrol);
/* No read yet. Would this be really useable/needed ? */
return 0;
@@ -764,7 +764,7 @@ static int
snd_vortex_a3d_hrtf_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- a3dsrc_t *a = kcontrol->private_data;
+ a3dsrc_t *a = snd_kcontrol_chip(kcontrol);
int i;
int coord[6];
for (i = 0; i < 6; i++)
@@ -781,7 +781,7 @@ static int
snd_vortex_a3d_itd_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- a3dsrc_t *a = kcontrol->private_data;
+ a3dsrc_t *a = snd_kcontrol_chip(kcontrol);
int coord[6];
int i;
for (i = 0; i < 6; i++)
@@ -800,7 +800,7 @@ static int
snd_vortex_a3d_ild_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- a3dsrc_t *a = kcontrol->private_data;
+ a3dsrc_t *a = snd_kcontrol_chip(kcontrol);
int l, r;
/* There may be some scale tranlation needed here. */
l = ucontrol->value.integer.value[0];
@@ -816,7 +816,7 @@ static int
snd_vortex_a3d_filter_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- a3dsrc_t *a = kcontrol->private_data;
+ a3dsrc_t *a = snd_kcontrol_chip(kcontrol);
int i;
int params[6];
for (i = 0; i < 6; i++)
diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c
index 29a4bcdec237..7b4b8f785517 100644
--- a/sound/pci/aw2/aw2-alsa.c
+++ b/sound/pci/aw2/aw2-alsa.c
@@ -225,11 +225,10 @@ static int snd_aw2_create(struct snd_card *card,
chip->irq = -1;
/* (1) PCI resource allocation */
- err = pcim_iomap_regions(pci, 1 << 0, "Audiowerk2");
- if (err < 0)
- return err;
+ chip->iobase_virt = pcim_iomap_region(pci, 0, "Audiowerk2");
+ if (IS_ERR(chip->iobase_virt))
+ return PTR_ERR(chip->iobase_virt);
chip->iobase_phys = pci_resource_start(pci, 0);
- chip->iobase_virt = pcim_iomap_table(pci)[0];
/* (2) initialization of the chip hardware */
snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c
index 8a895d838005..053a18f434bf 100644
--- a/sound/pci/azt3328.c
+++ b/sound/pci/azt3328.c
@@ -2347,7 +2347,7 @@ snd_azf3328_create(struct snd_card *card,
return -ENXIO;
}
- err = pci_request_regions(pci, "Aztech AZF3328");
+ err = pcim_request_all_regions(pci, "Aztech AZF3328");
if (err < 0)
return err;
diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c
index 621985bfee5d..91492dd2b38a 100644
--- a/sound/pci/bt87x.c
+++ b/sound/pci/bt87x.c
@@ -696,10 +696,9 @@ static int snd_bt87x_create(struct snd_card *card,
chip->irq = -1;
spin_lock_init(&chip->reg_lock);
- err = pcim_iomap_regions(pci, 1 << 0, "Bt87x audio");
- if (err < 0)
- return err;
- chip->mmio = pcim_iomap_table(pci)[0];
+ chip->mmio = pcim_iomap_region(pci, 0, "Bt87x audio");
+ if (IS_ERR(chip->mmio))
+ return PTR_ERR(chip->mmio);
chip->reg_control = CTL_A_PWRDN | CTL_DA_ES2 |
CTL_PKTP_16 | (15 << CTL_DA_SDR_SHIFT);
diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c
index cf1bac7a435f..7c7119cad63c 100644
--- a/sound/pci/ca0106/ca0106_main.c
+++ b/sound/pci/ca0106/ca0106_main.c
@@ -1593,7 +1593,7 @@ static int snd_ca0106_create(int dev, struct snd_card *card,
spin_lock_init(&chip->emu_lock);
- err = pci_request_regions(pci, "snd_ca0106");
+ err = pcim_request_all_regions(pci, "snd_ca0106");
if (err < 0)
return err;
chip->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c
index cb8593c376ee..b00df0a60d3f 100644
--- a/sound/pci/cmipci.c
+++ b/sound/pci/cmipci.c
@@ -2980,7 +2980,7 @@ static int snd_cmipci_create(struct snd_card *card, struct pci_dev *pci,
cm->channel[1].ch = 1;
cm->channel[0].is_dac = cm->channel[1].is_dac = 1; /* dual DAC mode */
- err = pci_request_regions(pci, card->driver);
+ err = pcim_request_all_regions(pci, card->driver);
if (err < 0)
return err;
cm->iobase = pci_resource_start(pci, 0);
diff --git a/sound/pci/cs4281.c b/sound/pci/cs4281.c
index 0cc86e73cc62..90958a422b75 100644
--- a/sound/pci/cs4281.c
+++ b/sound/pci/cs4281.c
@@ -1302,14 +1302,15 @@ static int snd_cs4281_create(struct snd_card *card,
}
chip->dual_codec = dual_codec;
- err = pcim_iomap_regions(pci, 0x03, "CS4281"); /* 2 BARs */
- if (err < 0)
- return err;
+ chip->ba0 = pcim_iomap_region(pci, 0, "CS4281");
+ if (IS_ERR(chip->ba0))
+ return PTR_ERR(chip->ba0);
chip->ba0_addr = pci_resource_start(pci, 0);
- chip->ba1_addr = pci_resource_start(pci, 1);
- chip->ba0 = pcim_iomap_table(pci)[0];
- chip->ba1 = pcim_iomap_table(pci)[1];
+ chip->ba1 = pcim_iomap_region(pci, 1, "CS4281");
+ if (IS_ERR(chip->ba1))
+ return PTR_ERR(chip->ba1);
+ chip->ba1_addr = pci_resource_start(pci, 1);
if (devm_request_irq(&pci->dev, pci->irq, snd_cs4281_interrupt,
IRQF_SHARED, KBUILD_MODNAME, chip)) {
diff --git a/sound/pci/cs46xx/cs46xx_lib.c b/sound/pci/cs46xx/cs46xx_lib.c
index f3a94bb537bd..fb733633740b 100644
--- a/sound/pci/cs46xx/cs46xx_lib.c
+++ b/sound/pci/cs46xx/cs46xx_lib.c
@@ -3839,7 +3839,7 @@ int snd_cs46xx_create(struct snd_card *card,
chip->pci = pci;
chip->irq = -1;
- err = pci_request_regions(pci, "CS46xx");
+ err = pcim_request_all_regions(pci, "CS46xx");
if (err < 0)
return err;
chip->ba0_addr = pci_resource_start(pci, 0);
diff --git a/sound/pci/cs46xx/dsp_spos_scb_lib.c b/sound/pci/cs46xx/dsp_spos_scb_lib.c
index 1f90ca723f4d..28faca268196 100644
--- a/sound/pci/cs46xx/dsp_spos_scb_lib.c
+++ b/sound/pci/cs46xx/dsp_spos_scb_lib.c
@@ -201,13 +201,6 @@ void cs46xx_dsp_remove_scb (struct snd_cs46xx *chip, struct dsp_scb_descriptor *
if (ins->scb_highest_frag_index > ins->nscb) {
ins->scb_highest_frag_index = ins->nscb;
}
-
-#if 0
- /* !!!! THIS IS A PIECE OF SHIT MADE BY ME !!! */
- for(i = scb->index + 1;i < ins->nscb; ++i) {
- ins->scbs[i - 1].index = i - 1;
- }
-#endif
}
diff --git a/sound/pci/cs5530.c b/sound/pci/cs5530.c
index 93ff029e6583..532891e67c34 100644
--- a/sound/pci/cs5530.c
+++ b/sound/pci/cs5530.c
@@ -91,11 +91,10 @@ static int snd_cs5530_create(struct snd_card *card,
chip->card = card;
chip->pci = pci;
- err = pcim_iomap_regions(pci, 1 << 0, "CS5530");
- if (err < 0)
- return err;
+ mem = pcim_iomap_region(pci, 0, "CS5530");
+ if (IS_ERR(mem))
+ return PTR_ERR(mem);
chip->pci_base = pci_resource_start(pci, 0);
- mem = pcim_iomap_table(pci)[0];
map = readw(mem + 0x18);
/* Map bits
diff --git a/sound/pci/cs5535audio/cs5535audio.c b/sound/pci/cs5535audio/cs5535audio.c
index 440b8f9b40c9..0f319013a2a2 100644
--- a/sound/pci/cs5535audio/cs5535audio.c
+++ b/sound/pci/cs5535audio/cs5535audio.c
@@ -262,7 +262,7 @@ static int snd_cs5535audio_create(struct snd_card *card,
cs5535au->pci = pci;
cs5535au->irq = -1;
- err = pci_request_regions(pci, "CS5535 Audio");
+ err = pcim_request_all_regions(pci, "CS5535 Audio");
if (err < 0)
return err;
diff --git a/sound/pci/ctxfi/cttimer.c b/sound/pci/ctxfi/cttimer.c
index 89e47fa14f70..aa179644b5c9 100644
--- a/sound/pci/ctxfi/cttimer.c
+++ b/sound/pci/ctxfi/cttimer.c
@@ -119,7 +119,7 @@ static void ct_systimer_stop(struct ct_timer_instance *ti)
static void ct_systimer_prepare(struct ct_timer_instance *ti)
{
ct_systimer_stop(ti);
- try_to_del_timer_sync(&ti->timer);
+ timer_delete_sync_try(&ti->timer);
}
#define ct_systimer_free ct_systimer_prepare
diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c
index 4effcc0090ac..80d8ce75fdbb 100644
--- a/sound/pci/echoaudio/echoaudio.c
+++ b/sound/pci/echoaudio/echoaudio.c
@@ -1910,7 +1910,7 @@ static int snd_echo_create(struct snd_card *card,
chip->can_set_rate = 1;
/* PCI resource allocation */
- err = pci_request_regions(pci, ECHOCARD_NAME);
+ err = pcim_request_all_regions(pci, ECHOCARD_NAME);
if (err < 0)
return err;
diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c
index 5b8a5ba825bd..bbe252b8916c 100644
--- a/sound/pci/emu10k1/emu10k1_main.c
+++ b/sound/pci/emu10k1/emu10k1_main.c
@@ -1563,7 +1563,7 @@ int snd_emu10k1_create(struct snd_card *card,
else
emu->gpr_base = FXGPREGBASE;
- err = pci_request_regions(pci, "EMU10K1");
+ err = pcim_request_all_regions(pci, "EMU10K1");
if (err < 0)
return err;
emu->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c
index 89043392f3ec..30ac37b5a214 100644
--- a/sound/pci/emu10k1/emu10k1x.c
+++ b/sound/pci/emu10k1/emu10k1x.c
@@ -884,7 +884,7 @@ static int snd_emu10k1x_create(struct snd_card *card,
spin_lock_init(&chip->emu_lock);
spin_lock_init(&chip->voice_lock);
- err = pci_request_regions(pci, "EMU10K1X");
+ err = pcim_request_all_regions(pci, "EMU10K1X");
if (err < 0)
return err;
chip->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c
index 71c89e4e3090..1e6adf1ae304 100644
--- a/sound/pci/ens1370.c
+++ b/sound/pci/ens1370.c
@@ -2022,7 +2022,7 @@ static int snd_ensoniq_create(struct snd_card *card,
ensoniq->card = card;
ensoniq->pci = pci;
ensoniq->irq = -1;
- err = pci_request_regions(pci, "Ensoniq AudioPCI");
+ err = pcim_request_all_regions(pci, "Ensoniq AudioPCI");
if (err < 0)
return err;
ensoniq->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/es1938.c b/sound/pci/es1938.c
index 018a8d53ca53..27728bdfac57 100644
--- a/sound/pci/es1938.c
+++ b/sound/pci/es1938.c
@@ -1531,7 +1531,7 @@ static int snd_es1938_create(struct snd_card *card,
chip->card = card;
chip->pci = pci;
chip->irq = -1;
- err = pci_request_regions(pci, "ESS Solo-1");
+ err = pcim_request_all_regions(pci, "ESS Solo-1");
if (err < 0)
return err;
chip->io_port = pci_resource_start(pci, 0);
diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c
index c6c018b40c69..37bac890613e 100644
--- a/sound/pci/es1968.c
+++ b/sound/pci/es1968.c
@@ -1561,7 +1561,7 @@ static int snd_es1968_capture_open(struct snd_pcm_substream *substream)
struct snd_pcm_runtime *runtime = substream->runtime;
struct es1968 *chip = snd_pcm_substream_chip(substream);
struct esschan *es;
- int apu1, apu2;
+ int err, apu1, apu2;
apu1 = snd_es1968_alloc_apu_pair(chip, ESM_APU_PCM_CAPTURE);
if (apu1 < 0)
@@ -1605,7 +1605,9 @@ static int snd_es1968_capture_open(struct snd_pcm_substream *substream)
runtime->hw = snd_es1968_capture;
runtime->hw.buffer_bytes_max = runtime->hw.period_bytes_max =
calc_available_memory_size(chip) - 1024; /* keep MIXBUF size */
- snd_pcm_hw_constraint_pow2(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES);
+ err = snd_pcm_hw_constraint_pow2(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES);
+ if (err < 0)
+ return err;
spin_lock_irq(&chip->substream_lock);
list_add(&es->list, &chip->substream_list);
@@ -2647,7 +2649,7 @@ static int snd_es1968_create(struct snd_card *card,
chip->playback_streams = play_streams;
chip->capture_streams = capt_streams;
- err = pci_request_regions(pci, "ESS Maestro");
+ err = pcim_request_all_regions(pci, "ESS Maestro");
if (err < 0)
return err;
chip->io_port = pci_resource_start(pci, 0);
diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c
index 7f4834c2d5e6..f283256eda0d 100644
--- a/sound/pci/fm801.c
+++ b/sound/pci/fm801.c
@@ -1191,7 +1191,7 @@ static int snd_fm801_create(struct snd_card *card,
chip->dev = &pci->dev;
chip->irq = -1;
chip->tea575x_tuner = tea575x_tuner;
- err = pci_request_regions(pci, "FM801");
+ err = pcim_request_all_regions(pci, "FM801");
if (err < 0)
return err;
chip->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index 9c427270ff4f..745f120a5cee 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -42,6 +42,17 @@ config SND_HDA_TEGRA
To compile this driver as a module, choose M here: the module
will be called snd-hda-tegra.
+config SND_HDA_ACPI
+ tristate "HD Audio ACPI"
+ depends on ACPI
+ select SND_HDA
+ help
+ Say Y here to include support for Azalia-compatible HDA controllers
+ which are advertised via ACPI objects.
+
+ To compile this driver as a module, choose M here: the module
+ will be called snd-hda-acpi.
+
if SND_HDA
config SND_HDA_HWDEP
@@ -109,9 +120,6 @@ config SND_HDA_SCODEC_CS35L41
tristate
select SND_HDA_GENERIC
select REGMAP_IRQ
-
-config SND_HDA_CS_DSP_CONTROLS
- tristate
select FW_CS_DSP
config SND_HDA_SCODEC_COMPONENT
@@ -125,7 +133,6 @@ config SND_HDA_SCODEC_CS35L41_I2C
depends on SND_SOC
select SND_SOC_CS35L41_LIB
select SND_HDA_SCODEC_CS35L41
- select SND_HDA_CS_DSP_CONTROLS
select SND_SOC_CS_AMP_LIB
help
Say Y or M here to include CS35L41 I2C HD-audio side codec support
@@ -142,7 +149,6 @@ config SND_HDA_SCODEC_CS35L41_SPI
depends on SND_SOC
select SND_SOC_CS35L41_LIB
select SND_HDA_SCODEC_CS35L41
- select SND_HDA_CS_DSP_CONTROLS
select SND_SOC_CS_AMP_LIB
help
Say Y or M here to include CS35L41 SPI HD-audio side codec support
@@ -157,7 +163,7 @@ config SND_HDA_SCODEC_CS35L56
config SND_HDA_SCODEC_CS35L56_I2C
tristate "Build CS35L56 HD-audio side codec support for I2C Bus"
depends on I2C
- depends on ACPI || COMPILE_TEST
+ depends on ACPI
depends on SND_SOC
select FW_CS_DSP
imply SERIAL_MULTI_INSTANTIATE
@@ -165,7 +171,6 @@ config SND_HDA_SCODEC_CS35L56_I2C
select SND_SOC_CS35L56_SHARED
select SND_HDA_SCODEC_CS35L56
select SND_HDA_CIRRUS_SCODEC
- select SND_HDA_CS_DSP_CONTROLS
select SND_SOC_CS_AMP_LIB
help
Say Y or M here to include CS35L56 amplifier support with
@@ -174,7 +179,7 @@ config SND_HDA_SCODEC_CS35L56_I2C
config SND_HDA_SCODEC_CS35L56_SPI
tristate "Build CS35L56 HD-audio side codec support for SPI Bus"
depends on SPI_MASTER
- depends on ACPI || COMPILE_TEST
+ depends on ACPI
depends on SND_SOC
select FW_CS_DSP
imply SERIAL_MULTI_INSTANTIATE
@@ -182,19 +187,23 @@ config SND_HDA_SCODEC_CS35L56_SPI
select SND_SOC_CS35L56_SHARED
select SND_HDA_SCODEC_CS35L56
select SND_HDA_CIRRUS_SCODEC
- select SND_HDA_CS_DSP_CONTROLS
select SND_SOC_CS_AMP_LIB
help
Say Y or M here to include CS35L56 amplifier support with
SPI control.
+config SND_HDA_SCODEC_TAS2781
+ tristate
+ select SND_HDA_GENERIC
+
config SND_HDA_SCODEC_TAS2781_I2C
tristate "Build TAS2781 HD-audio side codec support for I2C Bus"
depends on I2C
depends on ACPI
depends on EFI
depends on SND_SOC
- select SND_SOC_TAS2781_COMLIB
+ select SND_HDA_SCODEC_TAS2781
+ select SND_SOC_TAS2781_COMLIB_I2C
select SND_SOC_TAS2781_FMWLIB
select CRC32
help
@@ -210,6 +219,10 @@ config SND_HDA_SCODEC_TAS2781_SPI
depends on ACPI
depends on EFI
depends on SND_SOC
+ select SND_HDA_SCODEC_TAS2781
+ select SND_SOC_TAS2781_COMLIB
+ select SND_SOC_TAS2781_FMWLIB
+ select CRC8
select CRC32
help
Say Y or M here to include TAS2781 SPI HD-audio side codec support
diff --git a/sound/pci/hda/Makefile b/sound/pci/hda/Makefile
index 210c406dfbc5..a5ab8ee2d7f9 100644
--- a/sound/pci/hda/Makefile
+++ b/sound/pci/hda/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
snd-hda-intel-y := hda_intel.o
snd-hda-tegra-y := hda_tegra.o
+snd-hda-acpi-y := hda_acpi.o
snd-hda-codec-y := hda_bind.o hda_codec.o hda_jack.o hda_auto_parser.o hda_sysfs.o
snd-hda-codec-y += hda_controller.o
@@ -37,10 +38,10 @@ snd-hda-scodec-cs35l41-spi-y := cs35l41_hda_spi.o
snd-hda-scodec-cs35l56-y := cs35l56_hda.o
snd-hda-scodec-cs35l56-i2c-y := cs35l56_hda_i2c.o
snd-hda-scodec-cs35l56-spi-y := cs35l56_hda_spi.o
-snd-hda-cs-dsp-ctls-y := hda_cs_dsp_ctl.o
snd-hda-scodec-component-y := hda_component.o
+snd-hda-scodec-tas2781-y := tas2781_hda.o
snd-hda-scodec-tas2781-i2c-y := tas2781_hda_i2c.o
-snd-hda-scodec-tas2781-spi-y := tas2781_hda_spi.o tas2781_spi_fwlib.o
+snd-hda-scodec-tas2781-spi-y := tas2781_hda_spi.o
# common driver
obj-$(CONFIG_SND_HDA) := snd-hda-codec.o
@@ -70,8 +71,8 @@ obj-$(CONFIG_SND_HDA_SCODEC_CS35L41_SPI) += snd-hda-scodec-cs35l41-spi.o
obj-$(CONFIG_SND_HDA_SCODEC_CS35L56) += snd-hda-scodec-cs35l56.o
obj-$(CONFIG_SND_HDA_SCODEC_CS35L56_I2C) += snd-hda-scodec-cs35l56-i2c.o
obj-$(CONFIG_SND_HDA_SCODEC_CS35L56_SPI) += snd-hda-scodec-cs35l56-spi.o
-obj-$(CONFIG_SND_HDA_CS_DSP_CONTROLS) += snd-hda-cs-dsp-ctls.o
obj-$(CONFIG_SND_HDA_SCODEC_COMPONENT) += snd-hda-scodec-component.o
+obj-$(CONFIG_SND_HDA_SCODEC_TAS2781) += snd-hda-scodec-tas2781.o
obj-$(CONFIG_SND_HDA_SCODEC_TAS2781_I2C) += snd-hda-scodec-tas2781-i2c.o
obj-$(CONFIG_SND_HDA_SCODEC_TAS2781_SPI) += snd-hda-scodec-tas2781-spi.o
@@ -80,3 +81,4 @@ obj-$(CONFIG_SND_HDA_SCODEC_TAS2781_SPI) += snd-hda-scodec-tas2781-spi.o
# when built in kernel
obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-intel.o
obj-$(CONFIG_SND_HDA_TEGRA) += snd-hda-tegra.o
+obj-$(CONFIG_SND_HDA_ACPI) += snd-hda-acpi.o
diff --git a/sound/pci/hda/cirrus_scodec_test.c b/sound/pci/hda/cirrus_scodec_test.c
index f5d6241daee4..93b9cbf1f08a 100644
--- a/sound/pci/hda/cirrus_scodec_test.c
+++ b/sound/pci/hda/cirrus_scodec_test.c
@@ -5,20 +5,30 @@
// Copyright (C) 2023 Cirrus Logic, Inc. and
// Cirrus Logic International Semiconductor Ltd.
+#include <kunit/platform_device.h>
+#include <kunit/resource.h>
#include <kunit/test.h>
+#include <linux/device.h>
+#include <linux/device/faux.h>
#include <linux/gpio/driver.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include "cirrus_scodec.h"
+KUNIT_DEFINE_ACTION_WRAPPER(faux_device_destroy_wrapper, faux_device_destroy,
+ struct faux_device *)
+KUNIT_DEFINE_ACTION_WRAPPER(device_remove_software_node_wrapper,
+ device_remove_software_node,
+ struct device *)
+
struct cirrus_scodec_test_gpio {
unsigned int pin_state;
struct gpio_chip chip;
};
struct cirrus_scodec_test_priv {
- struct platform_device amp_pdev;
+ struct faux_device *amp_dev;
struct platform_device *gpio_pdev;
struct cirrus_scodec_test_gpio *gpio_priv;
};
@@ -48,9 +58,10 @@ static int cirrus_scodec_test_gpio_direction_out(struct gpio_chip *chip,
return -EOPNOTSUPP;
}
-static void cirrus_scodec_test_gpio_set(struct gpio_chip *chip, unsigned int offset,
- int value)
+static int cirrus_scodec_test_gpio_set(struct gpio_chip *chip,
+ unsigned int offset, int value)
{
+ return -EOPNOTSUPP;
}
static int cirrus_scodec_test_gpio_set_config(struct gpio_chip *gc,
@@ -75,7 +86,7 @@ static const struct gpio_chip cirrus_scodec_test_gpio_chip = {
.direction_input = cirrus_scodec_test_gpio_direction_in,
.get = cirrus_scodec_test_gpio_get,
.direction_output = cirrus_scodec_test_gpio_direction_out,
- .set = cirrus_scodec_test_gpio_set,
+ .set_rv = cirrus_scodec_test_gpio_set,
.set_config = cirrus_scodec_test_gpio_set_config,
.base = -1,
.ngpio = 32,
@@ -103,6 +114,7 @@ static int cirrus_scodec_test_gpio_probe(struct platform_device *pdev)
static struct platform_driver cirrus_scodec_test_gpio_driver = {
.driver.name = "cirrus_scodec_test_gpio_drv",
+ .driver.owner = THIS_MODULE,
.probe = cirrus_scodec_test_gpio_probe,
};
@@ -111,37 +123,28 @@ static const struct software_node cirrus_scodec_test_gpio_swnode = {
.name = "cirrus_scodec_test_gpio",
};
-static int cirrus_scodec_test_create_gpio(struct kunit *test)
+static void cirrus_scodec_test_create_gpio(struct kunit *test)
{
struct cirrus_scodec_test_priv *priv = test->priv;
- int ret;
- priv->gpio_pdev = platform_device_alloc(cirrus_scodec_test_gpio_driver.driver.name, -1);
- if (!priv->gpio_pdev)
- return -ENOMEM;
+ KUNIT_ASSERT_EQ(test, 0,
+ kunit_platform_driver_register(test, &cirrus_scodec_test_gpio_driver));
- ret = device_add_software_node(&priv->gpio_pdev->dev, &cirrus_scodec_test_gpio_swnode);
- if (ret) {
- platform_device_put(priv->gpio_pdev);
- KUNIT_FAIL(test, "Failed to add swnode to gpio: %d\n", ret);
- return ret;
- }
+ priv->gpio_pdev = kunit_platform_device_alloc(test,
+ cirrus_scodec_test_gpio_driver.driver.name,
+ PLATFORM_DEVID_NONE);
+ KUNIT_ASSERT_NOT_NULL(test, priv->gpio_pdev);
- ret = platform_device_add(priv->gpio_pdev);
- if (ret) {
- platform_device_put(priv->gpio_pdev);
- KUNIT_FAIL(test, "Failed to add gpio platform device: %d\n", ret);
- return ret;
- }
+ KUNIT_ASSERT_EQ(test, 0, device_add_software_node(&priv->gpio_pdev->dev,
+ &cirrus_scodec_test_gpio_swnode));
+ KUNIT_ASSERT_EQ(test, 0, kunit_add_action_or_reset(test,
+ device_remove_software_node_wrapper,
+ &priv->gpio_pdev->dev));
- priv->gpio_priv = dev_get_drvdata(&priv->gpio_pdev->dev);
- if (!priv->gpio_priv) {
- platform_device_put(priv->gpio_pdev);
- KUNIT_FAIL(test, "Failed to get gpio private data\n");
- return -EINVAL;
- }
+ KUNIT_ASSERT_EQ(test, 0, kunit_platform_device_add(test, priv->gpio_pdev));
- return 0;
+ priv->gpio_priv = dev_get_drvdata(&priv->gpio_pdev->dev);
+ KUNIT_ASSERT_NOT_NULL(test, priv->gpio_priv);
}
static void cirrus_scodec_test_set_gpio_ref_arg(struct software_node_ref_args *arg,
@@ -191,7 +194,7 @@ static void cirrus_scodec_test_spkid_parse(struct kunit *test)
const struct cirrus_scodec_test_spkid_param *param = test->param_value;
int num_spk_id_refs = param->num_amps * param->gpios_per_amp;
struct software_node_ref_args *refs;
- struct device *dev = &priv->amp_pdev.dev;
+ struct device *dev = &priv->amp_dev->dev;
unsigned int v;
int i, ret;
@@ -234,21 +237,16 @@ static void cirrus_scodec_test_spkid_parse(struct kunit *test)
static void cirrus_scodec_test_no_spkid(struct kunit *test)
{
struct cirrus_scodec_test_priv *priv = test->priv;
- struct device *dev = &priv->amp_pdev.dev;
+ struct device *dev = &priv->amp_dev->dev;
int ret;
ret = cirrus_scodec_get_speaker_id(dev, 0, 4, -1);
KUNIT_EXPECT_EQ(test, ret, -ENOENT);
}
-static void cirrus_scodec_test_dev_release(struct device *dev)
-{
-}
-
static int cirrus_scodec_test_case_init(struct kunit *test)
{
struct cirrus_scodec_test_priv *priv;
- int ret;
priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL);
if (!priv)
@@ -257,52 +255,18 @@ static int cirrus_scodec_test_case_init(struct kunit *test)
test->priv = priv;
/* Create dummy GPIO */
- ret = cirrus_scodec_test_create_gpio(test);
- if (ret < 0)
- return ret;
+ cirrus_scodec_test_create_gpio(test);
/* Create dummy amp driver dev */
- priv->amp_pdev.name = "cirrus_scodec_test_amp_drv";
- priv->amp_pdev.id = -1;
- priv->amp_pdev.dev.release = cirrus_scodec_test_dev_release;
- ret = platform_device_register(&priv->amp_pdev);
- KUNIT_ASSERT_GE_MSG(test, ret, 0, "Failed to register amp platform device\n");
-
- return 0;
-}
-
-static void cirrus_scodec_test_case_exit(struct kunit *test)
-{
- struct cirrus_scodec_test_priv *priv = test->priv;
-
- if (priv->amp_pdev.name)
- platform_device_unregister(&priv->amp_pdev);
-
- if (priv->gpio_pdev) {
- device_remove_software_node(&priv->gpio_pdev->dev);
- platform_device_unregister(priv->gpio_pdev);
- }
-}
-
-static int cirrus_scodec_test_suite_init(struct kunit_suite *suite)
-{
- int ret;
-
- /* Register mock GPIO driver */
- ret = platform_driver_register(&cirrus_scodec_test_gpio_driver);
- if (ret < 0) {
- kunit_err(suite, "Failed to register gpio platform driver, %d\n", ret);
- return ret;
- }
+ priv->amp_dev = faux_device_create("cirrus_scodec_test_amp_drv", NULL, NULL);
+ KUNIT_ASSERT_NOT_NULL(test, priv->amp_dev);
+ KUNIT_ASSERT_EQ(test, 0, kunit_add_action_or_reset(test,
+ faux_device_destroy_wrapper,
+ priv->amp_dev));
return 0;
}
-static void cirrus_scodec_test_suite_exit(struct kunit_suite *suite)
-{
- platform_driver_unregister(&cirrus_scodec_test_gpio_driver);
-}
-
static const struct cirrus_scodec_test_spkid_param cirrus_scodec_test_spkid_param_cases[] = {
{ .num_amps = 2, .gpios_per_amp = 1, .num_amps_sharing = 1 },
{ .num_amps = 2, .gpios_per_amp = 2, .num_amps_sharing = 1 },
@@ -356,10 +320,7 @@ static struct kunit_case cirrus_scodec_test_cases[] = {
static struct kunit_suite cirrus_scodec_test_suite = {
.name = "snd-hda-scodec-cs35l56-test",
- .suite_init = cirrus_scodec_test_suite_init,
- .suite_exit = cirrus_scodec_test_suite_exit,
.init = cirrus_scodec_test_case_init,
- .exit = cirrus_scodec_test_case_exit,
.test_cases = cirrus_scodec_test_cases,
};
diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c
index 5dc021976c79..d9c8872b1866 100644
--- a/sound/pci/hda/cs35l41_hda.c
+++ b/sound/pci/hda/cs35l41_hda.c
@@ -20,7 +20,6 @@
#include "hda_generic.h"
#include "hda_component.h"
#include "cs35l41_hda.h"
-#include "hda_cs_dsp_ctl.h"
#include "cs35l41_hda_property.h"
#define CS35L41_PART "cs35l41"
@@ -74,6 +73,21 @@ static const struct cirrus_amp_cal_controls cs35l41_calibration_controls = {
.checksum = CAL_CHECKSUM_DSP_CTL_NAME,
};
+enum cs35l41_hda_fw_id {
+ CS35L41_HDA_FW_SPK_PROT,
+ CS35L41_HDA_FW_SPK_CALI,
+ CS35L41_HDA_FW_SPK_DIAG,
+ CS35L41_HDA_FW_MISC,
+ CS35L41_HDA_NUM_FW
+};
+
+static const char * const cs35l41_hda_fw_ids[CS35L41_HDA_NUM_FW] = {
+ [CS35L41_HDA_FW_SPK_PROT] = "spk-prot",
+ [CS35L41_HDA_FW_SPK_CALI] = "spk-cali",
+ [CS35L41_HDA_FW_SPK_DIAG] = "spk-diag",
+ [CS35L41_HDA_FW_MISC] = "misc",
+};
+
static bool firmware_autostart = 1;
module_param(firmware_autostart, bool, 0444);
MODULE_PARM_DESC(firmware_autostart, "Allow automatic firmware download on boot"
@@ -169,23 +183,23 @@ static int cs35l41_request_firmware_file(struct cs35l41_hda *cs35l41,
if (spkid > -1 && ssid && amp_name)
*filename = kasprintf(GFP_KERNEL, "cirrus/%s-%s-%s-%s-spkid%d-%s.%s", CS35L41_PART,
- dsp_name, hda_cs_dsp_fw_ids[cs35l41->firmware_type],
+ dsp_name, cs35l41_hda_fw_ids[cs35l41->firmware_type],
ssid, spkid, amp_name, filetype);
else if (spkid > -1 && ssid)
*filename = kasprintf(GFP_KERNEL, "cirrus/%s-%s-%s-%s-spkid%d.%s", CS35L41_PART,
- dsp_name, hda_cs_dsp_fw_ids[cs35l41->firmware_type],
+ dsp_name, cs35l41_hda_fw_ids[cs35l41->firmware_type],
ssid, spkid, filetype);
else if (ssid && amp_name)
*filename = kasprintf(GFP_KERNEL, "cirrus/%s-%s-%s-%s-%s.%s", CS35L41_PART,
- dsp_name, hda_cs_dsp_fw_ids[cs35l41->firmware_type],
+ dsp_name, cs35l41_hda_fw_ids[cs35l41->firmware_type],
ssid, amp_name, filetype);
else if (ssid)
*filename = kasprintf(GFP_KERNEL, "cirrus/%s-%s-%s-%s.%s", CS35L41_PART,
- dsp_name, hda_cs_dsp_fw_ids[cs35l41->firmware_type],
+ dsp_name, cs35l41_hda_fw_ids[cs35l41->firmware_type],
ssid, filetype);
else
*filename = kasprintf(GFP_KERNEL, "cirrus/%s-%s-%s.%s", CS35L41_PART,
- dsp_name, hda_cs_dsp_fw_ids[cs35l41->firmware_type],
+ dsp_name, cs35l41_hda_fw_ids[cs35l41->firmware_type],
filetype);
if (*filename == NULL)
@@ -588,7 +602,7 @@ static int cs35l41_init_dsp(struct cs35l41_hda *cs35l41)
}
ret = cs_dsp_power_up(dsp, wmfw_firmware, wmfw_filename, coeff_firmware, coeff_filename,
- hda_cs_dsp_fw_ids[cs35l41->firmware_type]);
+ cs35l41_hda_fw_ids[cs35l41->firmware_type]);
if (ret)
goto err;
@@ -1108,6 +1122,18 @@ err:
return ret;
}
+static int cs35l41_hda_read_ctl(struct cs_dsp *dsp, const char *name, int type,
+ unsigned int alg, void *buf, size_t len)
+{
+ int ret;
+
+ mutex_lock(&dsp->pwr_lock);
+ ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(dsp, name, type, alg), 0, buf, len);
+ mutex_unlock(&dsp->pwr_lock);
+
+ return ret;
+}
+
static int cs35l41_smart_amp(struct cs35l41_hda *cs35l41)
{
unsigned int fw_status;
@@ -1137,7 +1163,7 @@ static int cs35l41_smart_amp(struct cs35l41_hda *cs35l41)
goto clean_dsp;
}
- ret = read_poll_timeout(hda_cs_dsp_read_ctl, ret,
+ ret = read_poll_timeout(cs35l41_hda_read_ctl, ret,
be32_to_cpu(halo_sts) == HALO_STATE_CODE_RUN,
1000, 15000, false, &cs35l41->cs_dsp, HALO_STATE_DSP_CTL_NAME,
HALO_STATE_DSP_CTL_TYPE, HALO_STATE_DSP_CTL_ALG,
@@ -1174,7 +1200,7 @@ static int cs35l41_smart_amp(struct cs35l41_hda *cs35l41)
}
dev_info(cs35l41->dev, "Firmware Loaded - Type: %s, Gain: %d\n",
- hda_cs_dsp_fw_ids[cs35l41->firmware_type], cs35l41->tuning_gain);
+ cs35l41_hda_fw_ids[cs35l41->firmware_type], cs35l41->tuning_gain);
return 0;
@@ -1276,7 +1302,7 @@ static int cs35l41_fw_type_ctl_put(struct snd_kcontrol *kcontrol,
{
struct cs35l41_hda *cs35l41 = snd_kcontrol_chip(kcontrol);
- if (ucontrol->value.enumerated.item[0] < HDA_CS_DSP_NUM_FW) {
+ if (ucontrol->value.enumerated.item[0] < CS35L41_HDA_NUM_FW) {
if (cs35l41->firmware_type != ucontrol->value.enumerated.item[0]) {
cs35l41->firmware_type = ucontrol->value.enumerated.item[0];
return 1;
@@ -1290,7 +1316,7 @@ static int cs35l41_fw_type_ctl_put(struct snd_kcontrol *kcontrol,
static int cs35l41_fw_type_ctl_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
{
- return snd_ctl_enum_info(uinfo, 1, ARRAY_SIZE(hda_cs_dsp_fw_ids), hda_cs_dsp_fw_ids);
+ return snd_ctl_enum_info(uinfo, 1, ARRAY_SIZE(cs35l41_hda_fw_ids), cs35l41_hda_fw_ids);
}
static int cs35l41_create_controls(struct cs35l41_hda *cs35l41)
@@ -1430,7 +1456,7 @@ static int cs35l41_hda_bind(struct device *dev, struct device *master, void *mas
strscpy(comp->name, dev_name(dev), sizeof(comp->name));
- cs35l41->firmware_type = HDA_CS_DSP_FW_SPK_PROT;
+ cs35l41->firmware_type = CS35L41_HDA_FW_SPK_PROT;
if (firmware_autostart) {
dev_dbg(cs35l41->dev, "Firmware Autostart.\n");
@@ -2055,7 +2081,6 @@ const struct dev_pm_ops cs35l41_hda_pm_ops = {
EXPORT_SYMBOL_NS_GPL(cs35l41_hda_pm_ops, "SND_HDA_SCODEC_CS35L41");
MODULE_DESCRIPTION("CS35L41 HDA Driver");
-MODULE_IMPORT_NS("SND_HDA_CS_DSP_CONTROLS");
MODULE_IMPORT_NS("SND_SOC_CS_AMP_LIB");
MODULE_AUTHOR("Lucas Tanure, Cirrus Logic Inc, <tanureal@opensource.cirrus.com>");
MODULE_LICENSE("GPL");
diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c
index 61d2314834e7..d8249d997c2a 100644
--- a/sound/pci/hda/cs35l41_hda_property.c
+++ b/sound/pci/hda/cs35l41_hda_property.c
@@ -31,6 +31,9 @@ struct cs35l41_config {
};
static const struct cs35l41_config cs35l41_config_table[] = {
+ { "10251826", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
+ { "1025182C", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
+ { "10251844", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
{ "10280B27", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
{ "10280B28", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
{ "10280BEB", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 },
@@ -452,6 +455,9 @@ struct cs35l41_prop_model {
static const struct cs35l41_prop_model cs35l41_prop_model_table[] = {
{ "CLSA0100", NULL, lenovo_legion_no_acpi },
{ "CLSA0101", NULL, lenovo_legion_no_acpi },
+ { "CSC3551", "10251826", generic_dsd_config },
+ { "CSC3551", "1025182C", generic_dsd_config },
+ { "CSC3551", "10251844", generic_dsd_config },
{ "CSC3551", "10280B27", generic_dsd_config },
{ "CSC3551", "10280B28", generic_dsd_config },
{ "CSC3551", "10280BEB", generic_dsd_config },
diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c
index 235d22049aa9..3f2fd32f4ad9 100644
--- a/sound/pci/hda/cs35l56_hda.c
+++ b/sound/pci/hda/cs35l56_hda.c
@@ -20,7 +20,6 @@
#include "cirrus_scodec.h"
#include "cs35l56_hda.h"
#include "hda_component.h"
-#include "hda_cs_dsp_ctl.h"
#include "hda_generic.h"
/*
@@ -68,7 +67,7 @@ static void cs35l56_hda_play(struct cs35l56_hda *cs35l56)
if (ret == 0) {
/* Wait for firmware to enter PS0 power state */
ret = regmap_read_poll_timeout(cs35l56->base.regmap,
- CS35L56_TRANSDUCER_ACTUAL_PS,
+ cs35l56->base.fw_reg->transducer_actual_ps,
val, (val == CS35L56_PS0),
CS35L56_PS0_POLL_US,
CS35L56_PS0_TIMEOUT_US);
@@ -180,7 +179,7 @@ static int cs35l56_hda_mixer_info(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_mixer_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
unsigned int reg_val;
int i;
@@ -202,7 +201,7 @@ static int cs35l56_hda_mixer_get(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_mixer_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
unsigned int item = ucontrol->value.enumerated.item[0];
bool changed;
@@ -231,13 +230,14 @@ static int cs35l56_hda_posture_info(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_posture_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
unsigned int pos;
int ret;
cs35l56_hda_wait_dsp_ready(cs35l56);
- ret = regmap_read(cs35l56->base.regmap, CS35L56_MAIN_POSTURE_NUMBER, &pos);
+ ret = regmap_read(cs35l56->base.regmap,
+ cs35l56->base.fw_reg->posture_number, &pos);
if (ret)
return ret;
@@ -249,7 +249,7 @@ static int cs35l56_hda_posture_get(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_posture_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
unsigned long pos = ucontrol->value.integer.value[0];
bool changed;
int ret;
@@ -260,10 +260,8 @@ static int cs35l56_hda_posture_put(struct snd_kcontrol *kcontrol,
cs35l56_hda_wait_dsp_ready(cs35l56);
- ret = regmap_update_bits_check(cs35l56->base.regmap,
- CS35L56_MAIN_POSTURE_NUMBER,
- CS35L56_MAIN_POSTURE_MASK,
- pos, &changed);
+ ret = regmap_update_bits_check(cs35l56->base.regmap, cs35l56->base.fw_reg->posture_number,
+ CS35L56_MAIN_POSTURE_MASK, pos, &changed);
if (ret)
return ret;
@@ -298,14 +296,14 @@ static int cs35l56_hda_vol_info(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_vol_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
unsigned int raw_vol;
int vol;
int ret;
cs35l56_hda_wait_dsp_ready(cs35l56);
- ret = regmap_read(cs35l56->base.regmap, CS35L56_MAIN_RENDER_USER_VOLUME, &raw_vol);
+ ret = regmap_read(cs35l56->base.regmap, cs35l56->base.fw_reg->user_volume, &raw_vol);
if (ret)
return ret;
@@ -324,7 +322,7 @@ static int cs35l56_hda_vol_get(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_vol_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
long vol = ucontrol->value.integer.value[0];
unsigned int raw_vol;
bool changed;
@@ -339,10 +337,8 @@ static int cs35l56_hda_vol_put(struct snd_kcontrol *kcontrol,
cs35l56_hda_wait_dsp_ready(cs35l56);
- ret = regmap_update_bits_check(cs35l56->base.regmap,
- CS35L56_MAIN_RENDER_USER_VOLUME,
- CS35L56_MAIN_RENDER_USER_VOLUME_MASK,
- raw_vol, &changed);
+ ret = regmap_update_bits_check(cs35l56->base.regmap, cs35l56->base.fw_reg->user_volume,
+ CS35L56_MAIN_RENDER_USER_VOLUME_MASK, raw_vol, &changed);
if (ret)
return ret;
@@ -665,7 +661,8 @@ static void cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
regcache_sync(cs35l56->base.regmap);
- regmap_clear_bits(cs35l56->base.regmap, CS35L56_PROTECTION_STATUS,
+ regmap_clear_bits(cs35l56->base.regmap,
+ cs35l56->base.fw_reg->prot_sts,
CS35L56_FIRMWARE_MISSING);
cs35l56->base.fw_patched = true;
@@ -678,6 +675,8 @@ static void cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
if (ret)
cs_dsp_stop(&cs35l56->cs_dsp);
+ cs35l56_log_tuning(&cs35l56->base, &cs35l56->cs_dsp);
+
err_powered_up:
if (!cs35l56->base.fw_patched)
cs_dsp_power_down(&cs35l56->cs_dsp);
@@ -1118,7 +1117,6 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_hda_pm_ops, "SND_HDA_SCODEC_CS35L56");
MODULE_DESCRIPTION("CS35L56 HDA Driver");
MODULE_IMPORT_NS("FW_CS_DSP");
MODULE_IMPORT_NS("SND_HDA_CIRRUS_SCODEC");
-MODULE_IMPORT_NS("SND_HDA_CS_DSP_CONTROLS");
MODULE_IMPORT_NS("SND_SOC_CS35L56_SHARED");
MODULE_IMPORT_NS("SND_SOC_CS_AMP_LIB");
MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c
index c7b836613149..d10209e4eddd 100644
--- a/sound/pci/hda/cs35l56_hda_i2c.c
+++ b/sound/pci/hda/cs35l56_hda_i2c.c
@@ -26,6 +26,9 @@ static int cs35l56_hda_i2c_probe(struct i2c_client *clt)
#ifdef CS35L56_WAKE_HOLD_TIME_US
cs35l56->base.can_hibernate = true;
#endif
+
+ cs35l56->base.fw_reg = &cs35l56_fw_reg;
+
cs35l56->base.regmap = devm_regmap_init_i2c(clt, &cs35l56_regmap_i2c);
if (IS_ERR(cs35l56->base.regmap)) {
ret = PTR_ERR(cs35l56->base.regmap);
diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c
index 903578466905..f57533d3d728 100644
--- a/sound/pci/hda/cs35l56_hda_spi.c
+++ b/sound/pci/hda/cs35l56_hda_spi.c
@@ -29,6 +29,9 @@ static int cs35l56_hda_spi_probe(struct spi_device *spi)
#ifdef CS35L56_WAKE_HOLD_TIME_US
cs35l56->base.can_hibernate = true;
#endif
+
+ cs35l56->base.fw_reg = &cs35l56_fw_reg;
+
cs35l56->base.regmap = devm_regmap_init_spi(spi, &cs35l56_regmap_spi);
if (IS_ERR(cs35l56->base.regmap)) {
ret = PTR_ERR(cs35l56->base.regmap);
diff --git a/sound/pci/hda/hda_acpi.c b/sound/pci/hda/hda_acpi.c
new file mode 100644
index 000000000000..505cc97e0ee9
--- /dev/null
+++ b/sound/pci/hda/hda_acpi.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ALSA driver for ACPI-based HDA Controllers.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+
+#include <sound/hda_codec.h>
+
+#include "hda_controller.h"
+
+struct hda_acpi {
+ struct azx azx;
+ struct snd_card *card;
+ struct platform_device *pdev;
+ void __iomem *regs;
+ struct work_struct probe_work;
+ const struct hda_data *data;
+};
+
+/**
+ * struct hda_data - Optional device-specific data
+ * @short_name: Used for the ALSA card name; defaults to KBUILD_MODNAME
+ * @long_name: Used for longer description; defaults to short_name
+ * @flags: Passed to &azx->driver_caps
+ *
+ * A pointer to a record of this type may be stored in the
+ * &acpi_device_id->driver_data field of an ACPI match table entry in order to
+ * customize the naming and behavior of a particular device. All fields are
+ * optional and sensible defaults will be selected in their absence.
+ */
+struct hda_data {
+ const char *short_name;
+ const char *long_name;
+ unsigned long flags;
+};
+
+static int hda_acpi_dev_disconnect(struct snd_device *device)
+{
+ struct azx *chip = device->device_data;
+
+ chip->bus.shutdown = 1;
+ return 0;
+}
+
+static int hda_acpi_dev_free(struct snd_device *device)
+{
+ struct azx *azx = device->device_data;
+ struct hda_acpi *hda = container_of(azx, struct hda_acpi, azx);
+
+ cancel_work_sync(&hda->probe_work);
+ if (azx_bus(azx)->chip_init) {
+ azx_stop_all_streams(azx);
+ azx_stop_chip(azx);
+ }
+
+ azx_free_stream_pages(azx);
+ azx_free_streams(azx);
+ snd_hdac_bus_exit(azx_bus(azx));
+
+ return 0;
+}
+
+static int hda_acpi_init(struct hda_acpi *hda)
+{
+ struct hdac_bus *bus = azx_bus(&hda->azx);
+ struct snd_card *card = hda->azx.card;
+ struct device *dev = &hda->pdev->dev;
+ struct azx *azx = &hda->azx;
+ struct resource *res;
+ unsigned short gcap;
+ const char *sname, *lname;
+ int err, irq;
+
+ /* The base address for the HDA registers and the interrupt are wrapped
+ * in an ACPI _CRS object which can be parsed by platform_get_irq() and
+ * devm_platform_get_and_ioremap_resource()
+ */
+
+ irq = platform_get_irq(hda->pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ hda->regs = devm_platform_get_and_ioremap_resource(hda->pdev, 0, &res);
+ if (IS_ERR(hda->regs))
+ return PTR_ERR(hda->regs);
+
+ bus->remap_addr = hda->regs;
+ bus->addr = res->start;
+
+ err = devm_request_irq(dev, irq, azx_interrupt,
+ IRQF_SHARED, KBUILD_MODNAME, azx);
+ if (err) {
+ dev_err(dev, "unable to request IRQ %d, disabling device\n",
+ irq);
+ return err;
+ }
+ bus->irq = irq;
+ bus->dma_stop_delay = 100;
+ card->sync_irq = bus->irq;
+
+ gcap = azx_readw(azx, GCAP);
+ dev_dbg(dev, "chipset global capabilities = 0x%x\n", gcap);
+
+ azx->align_buffer_size = 1;
+
+ azx->capture_streams = (gcap >> 8) & 0x0f;
+ azx->playback_streams = (gcap >> 12) & 0x0f;
+
+ azx->capture_index_offset = 0;
+ azx->playback_index_offset = azx->capture_streams;
+ azx->num_streams = azx->playback_streams + azx->capture_streams;
+
+ err = azx_init_streams(azx);
+ if (err < 0) {
+ dev_err(dev, "failed to initialize streams: %d\n", err);
+ return err;
+ }
+
+ err = azx_alloc_stream_pages(azx);
+ if (err < 0) {
+ dev_err(dev, "failed to allocate stream pages: %d\n", err);
+ return err;
+ }
+
+ azx_init_chip(azx, 1);
+
+ if (!bus->codec_mask) {
+ dev_err(dev, "no codecs found!\n");
+ return -ENODEV;
+ }
+
+ strscpy(card->driver, "hda-acpi");
+
+ sname = hda->data->short_name ? hda->data->short_name : KBUILD_MODNAME;
+
+ if (strlen(sname) > sizeof(card->shortname))
+ dev_info(dev, "truncating shortname for card %s\n", sname);
+ strscpy(card->shortname, sname);
+
+ lname = hda->data->long_name ? hda->data->long_name : sname;
+
+ snprintf(card->longname, sizeof(card->longname),
+ "%s at 0x%lx irq %i", lname, bus->addr, bus->irq);
+
+ return 0;
+}
+
+static void hda_acpi_probe_work(struct work_struct *work)
+{
+ struct hda_acpi *hda = container_of(work, struct hda_acpi, probe_work);
+ struct azx *chip = &hda->azx;
+ int err;
+
+ err = hda_acpi_init(hda);
+ if (err < 0)
+ return;
+
+ err = azx_probe_codecs(chip, 8);
+ if (err < 0)
+ return;
+
+ err = azx_codec_configure(chip);
+ if (err < 0)
+ return;
+
+ err = snd_card_register(chip->card);
+ if (err < 0)
+ return;
+
+ chip->running = 1;
+}
+
+static int hda_acpi_create(struct hda_acpi *hda)
+{
+ static const struct snd_device_ops ops = {
+ .dev_disconnect = hda_acpi_dev_disconnect,
+ .dev_free = hda_acpi_dev_free,
+ };
+ static const struct hda_controller_ops null_ops;
+ struct azx *azx = &hda->azx;
+ int err;
+
+ mutex_init(&azx->open_mutex);
+ azx->card = hda->card;
+ INIT_LIST_HEAD(&azx->pcm_list);
+
+ azx->ops = &null_ops;
+ azx->driver_caps = hda->data->flags;
+ azx->driver_type = hda->data->flags & 0xff;
+ azx->codec_probe_mask = -1;
+
+ err = azx_bus_init(azx, NULL);
+ if (err < 0)
+ return err;
+
+ err = snd_device_new(hda->card, SNDRV_DEV_LOWLEVEL, &hda->azx, &ops);
+ if (err < 0) {
+ dev_err(&hda->pdev->dev, "Error creating device\n");
+ return err;
+ }
+
+ return 0;
+}
+
+static int hda_acpi_probe(struct platform_device *pdev)
+{
+ struct hda_acpi *hda;
+ int err;
+
+ hda = devm_kzalloc(&pdev->dev, sizeof(*hda), GFP_KERNEL);
+ if (!hda)
+ return -ENOMEM;
+
+ hda->pdev = pdev;
+ hda->data = acpi_device_get_match_data(&pdev->dev);
+
+ /* Fall back to defaults if the table didn't have a *struct hda_data */
+ if (!hda->data)
+ hda->data = devm_kzalloc(&pdev->dev, sizeof(*hda->data),
+ GFP_KERNEL);
+ if (!hda->data)
+ return -ENOMEM;
+
+ err = snd_card_new(&pdev->dev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
+ THIS_MODULE, 0, &hda->card);
+ if (err < 0) {
+ dev_err(&pdev->dev, "Error creating card!\n");
+ return err;
+ }
+
+ INIT_WORK(&hda->probe_work, hda_acpi_probe_work);
+
+ err = hda_acpi_create(hda);
+ if (err < 0)
+ goto out_free;
+ hda->card->private_data = &hda->azx;
+
+ dev_set_drvdata(&pdev->dev, hda->card);
+
+ schedule_work(&hda->probe_work);
+
+ return 0;
+
+out_free:
+ snd_card_free(hda->card);
+ return err;
+}
+
+static void hda_acpi_remove(struct platform_device *pdev)
+{
+ snd_card_free(dev_get_drvdata(&pdev->dev));
+}
+
+static void hda_acpi_shutdown(struct platform_device *pdev)
+{
+ struct snd_card *card = dev_get_drvdata(&pdev->dev);
+ struct azx *chip;
+
+ if (!card)
+ return;
+ chip = card->private_data;
+ if (chip && chip->running)
+ azx_stop_chip(chip);
+}
+
+static int hda_acpi_suspend(struct device *dev)
+{
+ struct snd_card *card = dev_get_drvdata(dev);
+ int rc;
+
+ rc = pm_runtime_force_suspend(dev);
+ if (rc < 0)
+ return rc;
+ snd_power_change_state(card, SNDRV_CTL_POWER_D3hot);
+
+ return 0;
+}
+
+static int hda_acpi_resume(struct device *dev)
+{
+ struct snd_card *card = dev_get_drvdata(dev);
+ int rc;
+
+ rc = pm_runtime_force_resume(dev);
+ if (rc < 0)
+ return rc;
+ snd_power_change_state(card, SNDRV_CTL_POWER_D0);
+
+ return 0;
+}
+
+static const struct dev_pm_ops hda_acpi_pm = {
+ SYSTEM_SLEEP_PM_OPS(hda_acpi_suspend, hda_acpi_resume)
+};
+
+static const struct hda_data nvidia_hda_data = {
+ .short_name = "NVIDIA",
+ .long_name = "NVIDIA HDA Controller",
+ .flags = AZX_DCAPS_CORBRP_SELF_CLEAR,
+};
+
+static const struct acpi_device_id hda_acpi_match[] = {
+ { .id = "NVDA2014", .driver_data = (uintptr_t) &nvidia_hda_data },
+ { .id = "NVDA2015", .driver_data = (uintptr_t) &nvidia_hda_data },
+ {},
+};
+MODULE_DEVICE_TABLE(acpi, hda_acpi_match);
+
+static struct platform_driver hda_acpi_platform_driver = {
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .pm = &hda_acpi_pm,
+ .acpi_match_table = hda_acpi_match,
+ },
+ .probe = hda_acpi_probe,
+ .remove = hda_acpi_remove,
+ .shutdown = hda_acpi_shutdown,
+};
+module_platform_driver(hda_acpi_platform_driver);
+
+MODULE_DESCRIPTION("Driver for ACPI-based HDA Controllers");
+MODULE_LICENSE("GPL");
diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c
index 9521e5e0e6e6..1fef350d821e 100644
--- a/sound/pci/hda/hda_bind.c
+++ b/sound/pci/hda/hda_bind.c
@@ -18,10 +18,10 @@
/*
* find a matching codec id
*/
-static int hda_codec_match(struct hdac_device *dev, struct hdac_driver *drv)
+static int hda_codec_match(struct hdac_device *dev, const struct hdac_driver *drv)
{
struct hda_codec *codec = container_of(dev, struct hda_codec, core);
- struct hda_codec_driver *driver =
+ const struct hda_codec_driver *driver =
container_of(drv, struct hda_codec_driver, core);
const struct hda_device_id *list;
/* check probe_id instead of vendor_id if set */
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index b436d436831b..c018beeecd3d 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -1732,37 +1732,6 @@ int snd_hda_ctl_add(struct hda_codec *codec, hda_nid_t nid,
EXPORT_SYMBOL_GPL(snd_hda_ctl_add);
/**
- * snd_hda_add_nid - Assign a NID to a control element
- * @codec: HD-audio codec
- * @kctl: the control element to assign
- * @index: index to kctl
- * @nid: corresponding NID (optional)
- *
- * Add the given control element to an array inside the codec instance.
- * This function is used when #snd_hda_ctl_add cannot be used for 1:1
- * NID:KCTL mapping - for example "Capture Source" selector.
- */
-int snd_hda_add_nid(struct hda_codec *codec, struct snd_kcontrol *kctl,
- unsigned int index, hda_nid_t nid)
-{
- struct hda_nid_item *item;
-
- if (nid > 0) {
- item = snd_array_new(&codec->nids);
- if (!item)
- return -ENOMEM;
- item->kctl = kctl;
- item->index = index;
- item->nid = nid;
- return 0;
- }
- codec_err(codec, "no NID for mapping control %s:%d:%d\n",
- kctl->id.name, kctl->id.index, index);
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(snd_hda_add_nid);
-
-/**
* snd_hda_ctls_clear - Clear all controls assigned to the given codec
* @codec: HD-audio codec
*/
diff --git a/sound/pci/hda/hda_cs_dsp_ctl.c b/sound/pci/hda/hda_cs_dsp_ctl.c
deleted file mode 100644
index 18fa6e7edb49..000000000000
--- a/sound/pci/hda/hda_cs_dsp_ctl.c
+++ /dev/null
@@ -1,249 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-//
-// HDA DSP ALSA Control Driver
-//
-// Copyright 2022 Cirrus Logic, Inc.
-//
-// Author: Stefan Binding <sbinding@opensource.cirrus.com>
-
-#include <linux/module.h>
-#include <sound/soc.h>
-#include <linux/cleanup.h>
-#include <linux/firmware/cirrus/cs_dsp.h>
-#include <linux/firmware/cirrus/wmfw.h>
-#include "hda_cs_dsp_ctl.h"
-
-#define ADSP_MAX_STD_CTRL_SIZE 512
-
-struct hda_cs_dsp_coeff_ctl {
- struct cs_dsp_coeff_ctl *cs_ctl;
- struct snd_card *card;
- struct snd_kcontrol *kctl;
-};
-
-static const char * const hda_cs_dsp_fw_text[HDA_CS_DSP_NUM_FW] = {
- [HDA_CS_DSP_FW_SPK_PROT] = "Prot",
- [HDA_CS_DSP_FW_SPK_CALI] = "Cali",
- [HDA_CS_DSP_FW_SPK_DIAG] = "Diag",
- [HDA_CS_DSP_FW_MISC] = "Misc",
-};
-
-const char * const hda_cs_dsp_fw_ids[HDA_CS_DSP_NUM_FW] = {
- [HDA_CS_DSP_FW_SPK_PROT] = "spk-prot",
- [HDA_CS_DSP_FW_SPK_CALI] = "spk-cali",
- [HDA_CS_DSP_FW_SPK_DIAG] = "spk-diag",
- [HDA_CS_DSP_FW_MISC] = "misc",
-};
-EXPORT_SYMBOL_NS_GPL(hda_cs_dsp_fw_ids, "SND_HDA_CS_DSP_CONTROLS");
-
-static int hda_cs_dsp_coeff_info(struct snd_kcontrol *kctl, struct snd_ctl_elem_info *uinfo)
-{
- struct hda_cs_dsp_coeff_ctl *ctl = (struct hda_cs_dsp_coeff_ctl *)snd_kcontrol_chip(kctl);
- struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl;
-
- uinfo->type = SNDRV_CTL_ELEM_TYPE_BYTES;
- uinfo->count = cs_ctl->len;
-
- return 0;
-}
-
-static int hda_cs_dsp_coeff_put(struct snd_kcontrol *kctl, struct snd_ctl_elem_value *ucontrol)
-{
- struct hda_cs_dsp_coeff_ctl *ctl = (struct hda_cs_dsp_coeff_ctl *)snd_kcontrol_chip(kctl);
- struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl;
- char *p = ucontrol->value.bytes.data;
-
- return cs_dsp_coeff_lock_and_write_ctrl(cs_ctl, 0, p, cs_ctl->len);
-}
-
-static int hda_cs_dsp_coeff_get(struct snd_kcontrol *kctl, struct snd_ctl_elem_value *ucontrol)
-{
- struct hda_cs_dsp_coeff_ctl *ctl = (struct hda_cs_dsp_coeff_ctl *)snd_kcontrol_chip(kctl);
- struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl;
- char *p = ucontrol->value.bytes.data;
-
- return cs_dsp_coeff_lock_and_read_ctrl(cs_ctl, 0, p, cs_ctl->len);
-}
-
-static unsigned int wmfw_convert_flags(unsigned int in)
-{
- unsigned int out, rd, wr, vol;
-
- rd = SNDRV_CTL_ELEM_ACCESS_READ;
- wr = SNDRV_CTL_ELEM_ACCESS_WRITE;
- vol = SNDRV_CTL_ELEM_ACCESS_VOLATILE;
-
- out = 0;
-
- if (in) {
- out |= rd;
- if (in & WMFW_CTL_FLAG_WRITEABLE)
- out |= wr;
- if (in & WMFW_CTL_FLAG_VOLATILE)
- out |= vol;
- } else {
- out |= rd | wr | vol;
- }
-
- return out;
-}
-
-static void hda_cs_dsp_free_kcontrol(struct snd_kcontrol *kctl)
-{
- struct hda_cs_dsp_coeff_ctl *ctl = (struct hda_cs_dsp_coeff_ctl *)snd_kcontrol_chip(kctl);
- struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl;
-
- /* NULL priv to prevent a double-free in hda_cs_dsp_control_remove() */
- cs_ctl->priv = NULL;
- kfree(ctl);
-}
-
-static void hda_cs_dsp_add_kcontrol(struct cs_dsp_coeff_ctl *cs_ctl,
- const struct hda_cs_dsp_ctl_info *info,
- const char *name)
-{
- struct snd_kcontrol_new kcontrol = {0};
- struct snd_kcontrol *kctl;
- struct hda_cs_dsp_coeff_ctl *ctl __free(kfree) = NULL;
- int ret = 0;
-
- if (cs_ctl->len > ADSP_MAX_STD_CTRL_SIZE) {
- dev_err(cs_ctl->dsp->dev, "KControl %s: length %zu exceeds maximum %d\n", name,
- cs_ctl->len, ADSP_MAX_STD_CTRL_SIZE);
- return;
- }
-
- ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
- if (!ctl)
- return;
-
- ctl->cs_ctl = cs_ctl;
- ctl->card = info->card;
-
- kcontrol.name = name;
- kcontrol.info = hda_cs_dsp_coeff_info;
- kcontrol.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
- kcontrol.access = wmfw_convert_flags(cs_ctl->flags);
- kcontrol.get = hda_cs_dsp_coeff_get;
- kcontrol.put = hda_cs_dsp_coeff_put;
-
- kctl = snd_ctl_new1(&kcontrol, (void *)ctl);
- if (!kctl)
- return;
-
- kctl->private_free = hda_cs_dsp_free_kcontrol;
- ctl->kctl = kctl;
-
- /* snd_ctl_add() calls our private_free on error, which will kfree(ctl) */
- cs_ctl->priv = no_free_ptr(ctl);
- ret = snd_ctl_add(info->card, kctl);
- if (ret) {
- dev_err(cs_ctl->dsp->dev, "Failed to add KControl %s = %d\n", kcontrol.name, ret);
- return;
- }
-
- dev_dbg(cs_ctl->dsp->dev, "Added KControl: %s\n", kcontrol.name);
-}
-
-static void hda_cs_dsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl,
- const struct hda_cs_dsp_ctl_info *info)
-{
- struct cs_dsp *cs_dsp = cs_ctl->dsp;
- char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
- const char *region_name;
- int ret;
-
- region_name = cs_dsp_mem_region_name(cs_ctl->alg_region.type);
- if (!region_name) {
- dev_warn(cs_dsp->dev, "Unknown region type: %d\n", cs_ctl->alg_region.type);
- return;
- }
-
- ret = scnprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s %s %.12s %x", info->device_name,
- cs_dsp->name, hda_cs_dsp_fw_text[info->fw_type], cs_ctl->alg_region.alg);
-
- if (cs_ctl->subname) {
- int avail = SNDRV_CTL_ELEM_ID_NAME_MAXLEN - ret - 2;
- int skip = 0;
-
- /* Truncate the subname from the start if it is too long */
- if (cs_ctl->subname_len > avail)
- skip = cs_ctl->subname_len - avail;
-
- snprintf(name + ret, SNDRV_CTL_ELEM_ID_NAME_MAXLEN - ret,
- " %.*s", cs_ctl->subname_len - skip, cs_ctl->subname + skip);
- }
-
- hda_cs_dsp_add_kcontrol(cs_ctl, info, name);
-}
-
-void hda_cs_dsp_add_controls(struct cs_dsp *dsp, const struct hda_cs_dsp_ctl_info *info)
-{
- struct cs_dsp_coeff_ctl *cs_ctl;
-
- /*
- * pwr_lock would cause mutex inversion with ALSA control lock compared
- * to the get/put functions.
- * It is safe to walk the list without holding a mutex because entries
- * are persistent and only cs_dsp_power_up() or cs_dsp_remove() can
- * change the list.
- */
- lockdep_assert_not_held(&dsp->pwr_lock);
-
- list_for_each_entry(cs_ctl, &dsp->ctl_list, list) {
- if (cs_ctl->flags & WMFW_CTL_FLAG_SYS)
- continue;
-
- if (cs_ctl->priv)
- continue;
-
- hda_cs_dsp_control_add(cs_ctl, info);
- }
-}
-EXPORT_SYMBOL_NS_GPL(hda_cs_dsp_add_controls, "SND_HDA_CS_DSP_CONTROLS");
-
-void hda_cs_dsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl)
-{
- struct hda_cs_dsp_coeff_ctl *ctl = cs_ctl->priv;
-
- /* ctl and kctl may already have been removed by ALSA private_free */
- if (ctl)
- snd_ctl_remove(ctl->card, ctl->kctl);
-}
-EXPORT_SYMBOL_NS_GPL(hda_cs_dsp_control_remove, "SND_HDA_CS_DSP_CONTROLS");
-
-int hda_cs_dsp_write_ctl(struct cs_dsp *dsp, const char *name, int type,
- unsigned int alg, const void *buf, size_t len)
-{
- struct cs_dsp_coeff_ctl *cs_ctl;
- int ret;
-
- mutex_lock(&dsp->pwr_lock);
- cs_ctl = cs_dsp_get_ctl(dsp, name, type, alg);
- ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len);
- mutex_unlock(&dsp->pwr_lock);
- if (ret < 0)
- return ret;
-
- return 0;
-}
-EXPORT_SYMBOL_NS_GPL(hda_cs_dsp_write_ctl, "SND_HDA_CS_DSP_CONTROLS");
-
-int hda_cs_dsp_read_ctl(struct cs_dsp *dsp, const char *name, int type,
- unsigned int alg, void *buf, size_t len)
-{
- int ret;
-
- mutex_lock(&dsp->pwr_lock);
- ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(dsp, name, type, alg), 0, buf, len);
- mutex_unlock(&dsp->pwr_lock);
-
- return ret;
-
-}
-EXPORT_SYMBOL_NS_GPL(hda_cs_dsp_read_ctl, "SND_HDA_CS_DSP_CONTROLS");
-
-MODULE_DESCRIPTION("CS_DSP ALSA Control HDA Library");
-MODULE_AUTHOR("Stefan Binding, <sbinding@opensource.cirrus.com>");
-MODULE_LICENSE("GPL");
-MODULE_IMPORT_NS("FW_CS_DSP");
diff --git a/sound/pci/hda/hda_cs_dsp_ctl.h b/sound/pci/hda/hda_cs_dsp_ctl.h
deleted file mode 100644
index 2cf93359c4f2..000000000000
--- a/sound/pci/hda/hda_cs_dsp_ctl.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * HDA DSP ALSA Control Driver
- *
- * Copyright 2022 Cirrus Logic, Inc.
- *
- * Author: Stefan Binding <sbinding@opensource.cirrus.com>
- */
-
-#ifndef __HDA_CS_DSP_CTL_H__
-#define __HDA_CS_DSP_CTL_H__
-
-#include <sound/soc.h>
-#include <linux/firmware/cirrus/cs_dsp.h>
-
-enum hda_cs_dsp_fw_id {
- HDA_CS_DSP_FW_SPK_PROT,
- HDA_CS_DSP_FW_SPK_CALI,
- HDA_CS_DSP_FW_SPK_DIAG,
- HDA_CS_DSP_FW_MISC,
- HDA_CS_DSP_NUM_FW
-};
-
-struct hda_cs_dsp_ctl_info {
- struct snd_card *card;
- enum hda_cs_dsp_fw_id fw_type;
- const char *device_name;
-};
-
-extern const char * const hda_cs_dsp_fw_ids[HDA_CS_DSP_NUM_FW];
-
-void hda_cs_dsp_add_controls(struct cs_dsp *dsp, const struct hda_cs_dsp_ctl_info *info);
-void hda_cs_dsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl);
-int hda_cs_dsp_write_ctl(struct cs_dsp *dsp, const char *name, int type,
- unsigned int alg, const void *buf, size_t len);
-int hda_cs_dsp_read_ctl(struct cs_dsp *dsp, const char *name, int type,
- unsigned int alg, void *buf, size_t len);
-
-#endif /*__HDA_CS_DSP_CTL_H__*/
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 512fb22f5e5e..e6df706f740d 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -240,6 +240,7 @@ enum {
AZX_DRIVER_CTHDA,
AZX_DRIVER_CMEDIA,
AZX_DRIVER_ZHAOXIN,
+ AZX_DRIVER_ZHAOXINHDMI,
AZX_DRIVER_LOONGSON,
AZX_DRIVER_GENERIC,
AZX_NUM_DRIVERS, /* keep this as last entry */
@@ -355,6 +356,7 @@ static const char * const driver_short_names[] = {
[AZX_DRIVER_CTHDA] = "HDA Creative",
[AZX_DRIVER_CMEDIA] = "HDA C-Media",
[AZX_DRIVER_ZHAOXIN] = "HDA Zhaoxin",
+ [AZX_DRIVER_ZHAOXINHDMI] = "HDA Zhaoxin HDMI",
[AZX_DRIVER_LOONGSON] = "HDA Loongson",
[AZX_DRIVER_GENERIC] = "HD-Audio Generic",
};
@@ -1750,6 +1752,8 @@ static int default_bdl_pos_adj(struct azx *chip)
case AZX_DRIVER_ICH:
case AZX_DRIVER_PCH:
return 1;
+ case AZX_DRIVER_ZHAOXINHDMI:
+ return 128;
default:
return 32;
}
@@ -1877,12 +1881,14 @@ static int azx_first_init(struct azx *chip)
chip->jackpoll_interval = msecs_to_jiffies(1500);
}
- err = pcim_iomap_regions(pci, 1 << 0, "ICH HD audio");
- if (err < 0)
- return err;
+ if (chip->driver_type == AZX_DRIVER_ZHAOXINHDMI)
+ bus->polling_mode = 1;
+
+ bus->remap_addr = pcim_iomap_region(pci, 0, "ICH HD audio");
+ if (IS_ERR(bus->remap_addr))
+ return PTR_ERR(bus->remap_addr);
bus->addr = pci_resource_start(pci, 0);
- bus->remap_addr = pcim_iomap_table(pci)[0];
if (chip->driver_type == AZX_DRIVER_SKL)
snd_hdac_bus_parse_capabilities(bus);
@@ -1974,6 +1980,7 @@ static int azx_first_init(struct azx *chip)
chip->capture_streams = ATIHDMI_NUM_CAPTURE;
break;
case AZX_DRIVER_GFHDMI:
+ case AZX_DRIVER_ZHAOXINHDMI:
case AZX_DRIVER_GENERIC:
default:
chip->playback_streams = ICH6_NUM_PLAYBACK;
@@ -2542,6 +2549,8 @@ static const struct pci_device_id azx_ids[] = {
{ PCI_DEVICE_DATA(INTEL, HDA_PTL, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_LNL) },
/* Panther Lake-H */
{ PCI_DEVICE_DATA(INTEL, HDA_PTL_H, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_LNL) },
+ /* Wildcat Lake */
+ { PCI_DEVICE_DATA(INTEL, HDA_WCL, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_LNL) },
/* Apollolake (Broxton-P) */
{ PCI_DEVICE_DATA(INTEL, HDA_APL, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_BROXTON) },
/* Gemini-Lake */
@@ -2782,6 +2791,21 @@ static const struct pci_device_id azx_ids[] = {
.driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_HDMI },
/* Zhaoxin */
{ PCI_VDEVICE(ZHAOXIN, 0x3288), .driver_data = AZX_DRIVER_ZHAOXIN },
+ { PCI_VDEVICE(ZHAOXIN, 0x9141),
+ .driver_data = AZX_DRIVER_ZHAOXINHDMI | AZX_DCAPS_POSFIX_LPIB |
+ AZX_DCAPS_NO_MSI | AZX_DCAPS_NO_64BIT },
+ { PCI_VDEVICE(ZHAOXIN, 0x9142),
+ .driver_data = AZX_DRIVER_ZHAOXINHDMI | AZX_DCAPS_POSFIX_LPIB |
+ AZX_DCAPS_NO_MSI | AZX_DCAPS_NO_64BIT },
+ { PCI_VDEVICE(ZHAOXIN, 0x9144),
+ .driver_data = AZX_DRIVER_ZHAOXINHDMI | AZX_DCAPS_POSFIX_LPIB |
+ AZX_DCAPS_NO_MSI | AZX_DCAPS_NO_64BIT },
+ { PCI_VDEVICE(ZHAOXIN, 0x9145),
+ .driver_data = AZX_DRIVER_ZHAOXINHDMI | AZX_DCAPS_POSFIX_LPIB |
+ AZX_DCAPS_NO_MSI | AZX_DCAPS_NO_64BIT },
+ { PCI_VDEVICE(ZHAOXIN, 0x9146),
+ .driver_data = AZX_DRIVER_ZHAOXINHDMI | AZX_DCAPS_POSFIX_LPIB |
+ AZX_DCAPS_NO_MSI | AZX_DCAPS_NO_64BIT },
/* Loongson HDAudio*/
{ PCI_VDEVICE(LOONGSON, PCI_DEVICE_ID_LOONGSON_HDA),
.driver_data = AZX_DRIVER_LOONGSON | AZX_DCAPS_NO_TCSEL },
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index 4714057dba85..68c31f5354b7 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -571,8 +571,6 @@ struct hda_nid_item {
int snd_hda_ctl_add(struct hda_codec *codec, hda_nid_t nid,
struct snd_kcontrol *kctl);
-int snd_hda_add_nid(struct hda_codec *codec, struct snd_kcontrol *kctl,
- unsigned int index, hda_nid_t nid);
void snd_hda_ctls_clear(struct hda_codec *codec);
/*
diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
index a590d431c5ff..6ab338f37db5 100644
--- a/sound/pci/hda/hda_tegra.c
+++ b/sound/pci/hda/hda_tegra.c
@@ -72,6 +72,10 @@
struct hda_tegra_soc {
bool has_hda2codec_2x_reset;
bool has_hda2hdmi;
+ bool has_hda2codec_2x;
+ bool input_stream;
+ bool always_on;
+ bool requires_init;
};
struct hda_tegra {
@@ -187,7 +191,9 @@ static int hda_tegra_runtime_resume(struct device *dev)
if (rc != 0)
return rc;
if (chip->running) {
- hda_tegra_init(hda);
+ if (hda->soc->requires_init)
+ hda_tegra_init(hda);
+
azx_init_chip(chip, 1);
/* disable controller wake up event*/
azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) &
@@ -250,7 +256,8 @@ static int hda_tegra_init_chip(struct azx *chip, struct platform_device *pdev)
bus->remap_addr = hda->regs + HDA_BAR0;
bus->addr = res->start + HDA_BAR0;
- hda_tegra_init(hda);
+ if (hda->soc->requires_init)
+ hda_tegra_init(hda);
return 0;
}
@@ -323,7 +330,7 @@ static int hda_tegra_first_init(struct azx *chip, struct platform_device *pdev)
* starts with offset 0 which is wrong as HW register for output stream
* offset starts with 4.
*/
- if (of_device_is_compatible(np, "nvidia,tegra234-hda"))
+ if (!hda->soc->input_stream)
chip->capture_streams = 4;
chip->playback_streams = (gcap >> 12) & 0x0f;
@@ -377,14 +384,14 @@ static int hda_tegra_first_init(struct azx *chip, struct platform_device *pdev)
}
/* driver name */
- strscpy(card->driver, drv_name, sizeof(card->driver));
+ strscpy(card->driver, drv_name);
/* shortname for card */
sname = of_get_property(np, "nvidia,model", NULL);
if (!sname)
sname = drv_name;
if (strlen(sname) > sizeof(card->shortname))
dev_info(card->dev, "truncating shortname for card\n");
- strscpy(card->shortname, sname, sizeof(card->shortname));
+ strscpy(card->shortname, sname);
/* longname for card */
snprintf(card->longname, sizeof(card->longname),
@@ -419,7 +426,6 @@ static int hda_tegra_create(struct snd_card *card,
chip->driver_caps = driver_caps;
chip->driver_type = driver_caps & 0xff;
chip->dev_index = 0;
- chip->jackpoll_interval = msecs_to_jiffies(5000);
INIT_LIST_HEAD(&chip->pcm_list);
chip->codec_probe_mask = -1;
@@ -436,7 +442,16 @@ static int hda_tegra_create(struct snd_card *card,
chip->bus.core.sync_write = 0;
chip->bus.core.needs_damn_long_delay = 1;
chip->bus.core.aligned_mmio = 1;
- chip->bus.jackpoll_in_suspend = 1;
+
+ /*
+ * HDA power domain and clocks are always on for Tegra264 and
+ * the jack detection logic would work always, so no need of
+ * jack polling mechanism running.
+ */
+ if (!hda->soc->always_on) {
+ chip->jackpoll_interval = msecs_to_jiffies(5000);
+ chip->bus.jackpoll_in_suspend = 1;
+ }
err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
if (err < 0) {
@@ -450,22 +465,44 @@ static int hda_tegra_create(struct snd_card *card,
static const struct hda_tegra_soc tegra30_data = {
.has_hda2codec_2x_reset = true,
.has_hda2hdmi = true,
+ .has_hda2codec_2x = true,
+ .input_stream = true,
+ .always_on = false,
+ .requires_init = true,
};
static const struct hda_tegra_soc tegra194_data = {
.has_hda2codec_2x_reset = false,
.has_hda2hdmi = true,
+ .has_hda2codec_2x = true,
+ .input_stream = true,
+ .always_on = false,
+ .requires_init = true,
};
static const struct hda_tegra_soc tegra234_data = {
.has_hda2codec_2x_reset = true,
.has_hda2hdmi = false,
+ .has_hda2codec_2x = true,
+ .input_stream = false,
+ .always_on = false,
+ .requires_init = true,
+};
+
+static const struct hda_tegra_soc tegra264_data = {
+ .has_hda2codec_2x_reset = true,
+ .has_hda2hdmi = false,
+ .has_hda2codec_2x = false,
+ .input_stream = false,
+ .always_on = true,
+ .requires_init = false,
};
static const struct of_device_id hda_tegra_match[] = {
{ .compatible = "nvidia,tegra30-hda", .data = &tegra30_data },
{ .compatible = "nvidia,tegra194-hda", .data = &tegra194_data },
{ .compatible = "nvidia,tegra234-hda", .data = &tegra234_data },
+ { .compatible = "nvidia,tegra264-hda", .data = &tegra264_data },
{},
};
MODULE_DEVICE_TABLE(of, hda_tegra_match);
@@ -520,7 +557,9 @@ static int hda_tegra_probe(struct platform_device *pdev)
hda->clocks[hda->nclocks++].id = "hda";
if (hda->soc->has_hda2hdmi)
hda->clocks[hda->nclocks++].id = "hda2hdmi";
- hda->clocks[hda->nclocks++].id = "hda2codec_2x";
+
+ if (hda->soc->has_hda2codec_2x)
+ hda->clocks[hda->nclocks++].id = "hda2codec_2x";
err = devm_clk_bulk_get(&pdev->dev, hda->nclocks, hda->clocks);
if (err < 0)
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 7167989a8d86..08308231b4ed 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -4551,6 +4551,7 @@ HDA_CODEC_ENTRY(0x10de002e, "Tegra186 HDMI/DP1", patch_tegra_hdmi),
HDA_CODEC_ENTRY(0x10de002f, "Tegra194 HDMI/DP2", patch_tegra_hdmi),
HDA_CODEC_ENTRY(0x10de0030, "Tegra194 HDMI/DP3", patch_tegra_hdmi),
HDA_CODEC_ENTRY(0x10de0031, "Tegra234 HDMI/DP", patch_tegra234_hdmi),
+HDA_CODEC_ENTRY(0x10de0034, "Tegra264 HDMI/DP", patch_tegra234_hdmi),
HDA_CODEC_ENTRY(0x10de0040, "GPU 40 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de0041, "GPU 41 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de0042, "GPU 42 HDMI/DP", patch_nvhdmi),
@@ -4610,6 +4611,17 @@ HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP", patch_via_hdmi),
HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP", patch_via_hdmi),
HDA_CODEC_ENTRY(0x11069f84, "VX11 HDMI/DP", patch_generic_hdmi),
HDA_CODEC_ENTRY(0x11069f85, "VX11 HDMI/DP", patch_generic_hdmi),
+HDA_CODEC_ENTRY(0x1d179f86, "ZX-100S HDMI/DP", patch_gf_hdmi),
+HDA_CODEC_ENTRY(0x1d179f87, "ZX-100S HDMI/DP", patch_gf_hdmi),
+HDA_CODEC_ENTRY(0x1d179f88, "KX-5000 HDMI/DP", patch_gf_hdmi),
+HDA_CODEC_ENTRY(0x1d179f89, "KX-5000 HDMI/DP", patch_gf_hdmi),
+HDA_CODEC_ENTRY(0x1d179f8a, "KX-6000 HDMI/DP", patch_gf_hdmi),
+HDA_CODEC_ENTRY(0x1d179f8b, "KX-6000 HDMI/DP", patch_gf_hdmi),
+HDA_CODEC_ENTRY(0x1d179f8c, "KX-6000G HDMI/DP", patch_gf_hdmi),
+HDA_CODEC_ENTRY(0x1d179f8d, "KX-6000G HDMI/DP", patch_gf_hdmi),
+HDA_CODEC_ENTRY(0x1d179f8e, "KX-7000 HDMI/DP", patch_gf_hdmi),
+HDA_CODEC_ENTRY(0x1d179f8f, "KX-7000 HDMI/DP", patch_gf_hdmi),
+HDA_CODEC_ENTRY(0x1d179f90, "KX-7000 HDMI/DP", patch_gf_hdmi),
HDA_CODEC_ENTRY(0x80860054, "IbexPeak HDMI", patch_i915_cpt_hdmi),
HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi),
HDA_CODEC_ENTRY(0x80862801, "Bearlake HDMI", patch_generic_hdmi),
@@ -4640,6 +4652,7 @@ HDA_CODEC_ENTRY(0x8086281e, "Battlemage HDMI", patch_i915_adlp_hdmi),
HDA_CODEC_ENTRY(0x8086281f, "Raptor Lake P HDMI", patch_i915_adlp_hdmi),
HDA_CODEC_ENTRY(0x80862820, "Lunar Lake HDMI", patch_i915_adlp_hdmi),
HDA_CODEC_ENTRY(0x80862822, "Panther Lake HDMI", patch_i915_adlp_hdmi),
+HDA_CODEC_ENTRY(0x80862823, "Wildcat Lake HDMI", patch_i915_adlp_hdmi),
HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi),
HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8a2b09e4a7d5..cd0d7ba7320e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6830,7 +6830,10 @@ static void alc256_fixup_chromebook(struct hda_codec *codec,
switch (action) {
case HDA_FIXUP_ACT_PRE_PROBE:
- spec->gen.suppress_auto_mute = 1;
+ if (codec->core.subsystem_id == 0x10280d76)
+ spec->gen.suppress_auto_mute = 0;
+ else
+ spec->gen.suppress_auto_mute = 1;
spec->gen.suppress_auto_mic = 1;
spec->en_3kpull_low = false;
break;
@@ -8026,6 +8029,7 @@ enum {
ALC283_FIXUP_DELL_HP_RESUME,
ALC294_FIXUP_ASUS_CS35L41_SPI_2,
ALC274_FIXUP_HP_AIO_BIND_DACS,
+ ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2,
};
/* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -9298,6 +9302,12 @@ static const struct hda_fixup alc269_fixups[] = {
{ }
}
},
+ [ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l41_fixup_i2c_two,
+ .chained = true,
+ .chain_id = ALC255_FIXUP_PREDATOR_SUBWOOFER
+ },
[ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
@@ -10453,6 +10463,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x1534, "Acer Predator PH315-54", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x159c, "Acer Nitro 5 AN515-58", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x169a, "Acer Swift SFG16", ALC256_FIXUP_ACER_SFG16_MICMUTE_LED),
+ SND_PCI_QUIRK(0x1025, 0x1826, "Acer Helios ZPC", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x1025, 0x182c, "Acer Helios ZPD", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x1025, 0x1844, "Acer Helios ZPS", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
@@ -10882,9 +10895,12 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
+ SND_PCI_QUIRK(0x103c, 0x8e1d, "HP ZBook X Gli 16 G12", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
@@ -11300,6 +11316,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x38fa, "Thinkbook 16P Gen5", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
SND_PCI_QUIRK(0x17aa, 0x38fd, "ThinkBook plus Gen5 Hybrid", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
+ SND_PCI_QUIRK(0x17aa, 0x390d, "Lenovo Yoga Pro 7 14ASP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
SND_PCI_QUIRK(0x17aa, 0x3913, "Lenovo 145", ALC236_FIXUP_LENOVO_INV_DMIC),
SND_PCI_QUIRK(0x17aa, 0x391f, "Yoga S990-16 pro Quad YC Quad", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x3920, "Yoga S990-16 pro Quad VECO Quad", ALC287_FIXUP_TAS2781_I2C),
diff --git a/sound/pci/hda/tas2781-spi.h b/sound/pci/hda/tas2781-spi.h
deleted file mode 100644
index 7a0faceeb675..000000000000
--- a/sound/pci/hda/tas2781-spi.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-//
-// ALSA SoC Texas Instruments TAS2781 Audio Smart Amplifier
-//
-// Copyright (C) 2024 Texas Instruments Incorporated
-// https://www.ti.com
-//
-// The TAS2781 driver implements a flexible and configurable
-// algo coefficient setting for TAS2781 chips.
-//
-// Author: Baojun Xu <baojun.xu@ti.com>
-//
-
-#ifndef __TAS2781_SPI_H__
-#define __TAS2781_SPI_H__
-
-#define TASDEVICE_RATES \
- (SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 | \
- SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_88200)
-
-#define TASDEVICE_FORMATS \
- (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE | \
- SNDRV_PCM_FMTBIT_S32_LE)
-
-#define TASDEVICE_MAX_BOOK_NUM 256
-#define TASDEVICE_MAX_PAGE 256
-
-#define TASDEVICE_MAX_SIZE (TASDEVICE_MAX_BOOK_NUM * TASDEVICE_MAX_PAGE)
-
-/* PAGE Control Register (available in page0 of each book) */
-#define TASDEVICE_PAGE_SELECT 0x00
-#define TASDEVICE_BOOKCTL_PAGE 0x00
-#define TASDEVICE_BOOKCTL_REG GENMASK(7, 1)
-#define TASDEVICE_BOOK_ID(reg) (((reg) & GENMASK(24, 16)) >> 16)
-#define TASDEVICE_PAGE_ID(reg) (((reg) & GENMASK(15, 8)) >> 8)
-#define TASDEVICE_REG_ID(reg) (((reg) & GENMASK(7, 1)) >> 1)
-#define TASDEVICE_PAGE_REG(reg) ((reg) & GENMASK(15, 1))
-#define TASDEVICE_REG(book, page, reg) \
- (((book) << 16) | ((page) << 8) | ((reg) << 1))
-
-/* Software Reset */
-#define TAS2781_REG_SWRESET TASDEVICE_REG(0x0, 0x0, 0x01)
-#define TAS2781_REG_SWRESET_RESET BIT(0)
-
-/* System Reset Check Register */
-#define TAS2781_REG_CLK_CONFIG TASDEVICE_REG(0x0, 0x0, 0x5c)
-#define TAS2781_REG_CLK_CONFIG_RESET (0x19)
-#define TAS2781_PRE_POST_RESET_CFG 3
-
-/* Block Checksum */
-#define TASDEVICE_CHECKSUM TASDEVICE_REG(0x0, 0x0, 0x7e)
-
-/* Volume control */
-#define TAS2781_DVC_LVL TASDEVICE_REG(0x0, 0x0, 0x1a)
-#define TAS2781_AMP_LEVEL TASDEVICE_REG(0x0, 0x0, 0x03)
-#define TAS2781_AMP_LEVEL_MASK GENMASK(5, 1)
-
-#define TASDEVICE_CMD_SING_W 0x1
-#define TASDEVICE_CMD_BURST 0x2
-#define TASDEVICE_CMD_DELAY 0x3
-#define TASDEVICE_CMD_FIELD_W 0x4
-
-#define TAS2781_SPI_MAX_FREQ (4 * HZ_PER_MHZ)
-
-#define TASDEVICE_CRC8_POLYNOMIAL 0x4d
-#define TASDEVICE_SPEAKER_CALIBRATION_SIZE 20
-
-/* Flag of calibration registers address. */
-#define TASDEVICE_CALIBRATION_REG_ADDRESS BIT(7)
-
-#define TASDEVICE_CALIBRATION_DATA_NAME L"CALI_DATA"
-#define TASDEVICE_CALIBRATION_DATA_SIZE 256
-
-enum calib_data {
- R0_VAL = 0,
- INV_R0,
- R0LOW,
- POWER,
- TLIM,
- CALIB_MAX
-};
-
-struct tasdevice_priv {
- struct tasdevice_fw *cali_data_fmw;
- struct tasdevice_rca rcabin;
- struct tasdevice_fw *fmw;
- struct gpio_desc *reset;
- struct mutex codec_lock;
- struct regmap *regmap;
- struct device *dev;
-
- unsigned char crc8_lkp_tbl[CRC8_TABLE_SIZE];
- unsigned char coef_binaryname[64];
- unsigned char rca_binaryname[64];
- unsigned char dev_name[32];
-
- bool force_fwload_status;
- bool playback_started;
- bool is_loading;
- bool is_loaderr;
- unsigned int cali_reg_array[CALIB_MAX];
- unsigned int cali_data[CALIB_MAX];
- unsigned int err_code;
- void *codec;
- int cur_book;
- int cur_prog;
- int cur_conf;
- int fw_state;
- int index;
- int irq;
-
- int (*fw_parse_variable_header)(struct tasdevice_priv *tas_priv,
- const struct firmware *fmw,
- int offset);
- int (*fw_parse_program_data)(struct tasdevice_priv *tas_priv,
- struct tasdevice_fw *tas_fmw,
- const struct firmware *fmw, int offset);
- int (*fw_parse_configuration_data)(struct tasdevice_priv *tas_priv,
- struct tasdevice_fw *tas_fmw,
- const struct firmware *fmw,
- int offset);
- int (*tasdevice_load_block)(struct tasdevice_priv *tas_priv,
- struct tasdev_blk *block);
-
- int (*save_calibration)(struct tasdevice_priv *tas_priv);
- void (*apply_calibration)(struct tasdevice_priv *tas_priv);
-};
-
-int tasdevice_spi_dev_read(struct tasdevice_priv *tas_priv,
- unsigned int reg, unsigned int *value);
-int tasdevice_spi_dev_write(struct tasdevice_priv *tas_priv,
- unsigned int reg, unsigned int value);
-int tasdevice_spi_dev_bulk_write(struct tasdevice_priv *tas_priv,
- unsigned int reg, unsigned char *p_data,
- unsigned int n_length);
-int tasdevice_spi_dev_bulk_read(struct tasdevice_priv *tas_priv,
- unsigned int reg, unsigned char *p_data,
- unsigned int n_length);
-int tasdevice_spi_dev_update_bits(struct tasdevice_priv *tasdevice,
- unsigned int reg, unsigned int mask,
- unsigned int value);
-
-void tasdevice_spi_select_cfg_blk(void *context, int conf_no,
- unsigned char block_type);
-void tasdevice_spi_config_info_remove(void *context);
-int tasdevice_spi_dsp_parser(void *context);
-int tasdevice_spi_rca_parser(void *context, const struct firmware *fmw);
-void tasdevice_spi_dsp_remove(void *context);
-void tasdevice_spi_calbin_remove(void *context);
-int tasdevice_spi_select_tuningprm_cfg(void *context, int prm, int cfg_no,
- int rca_conf_no);
-int tasdevice_spi_prmg_load(void *context, int prm_no);
-int tasdevice_spi_prmg_calibdata_load(void *context, int prm_no);
-void tasdevice_spi_tuning_switch(void *context, int state);
-int tas2781_spi_load_calibration(void *context, char *file_name,
- unsigned short i);
-#endif /* __TAS2781_SPI_H__ */
diff --git a/sound/pci/hda/tas2781_hda.c b/sound/pci/hda/tas2781_hda.c
new file mode 100644
index 000000000000..5f1d4b3e9688
--- /dev/null
+++ b/sound/pci/hda/tas2781_hda.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// TAS2781 HDA Shared Lib for I2C&SPI driver
+//
+// Copyright 2025 Texas Instruments, Inc.
+//
+// Author: Shenghao Ding <shenghao-ding@ti.com>
+
+#include <linux/component.h>
+#include <linux/crc8.h>
+#include <linux/crc32.h>
+#include <linux/efi.h>
+#include <linux/firmware.h>
+#include <linux/i2c.h>
+#include <linux/pm_runtime.h>
+#include <sound/soc.h>
+#include <sound/tas2781.h>
+
+#include "tas2781_hda.h"
+
+const efi_guid_t tasdev_fct_efi_guid[] = {
+ /* DELL */
+ EFI_GUID(0xcc92382d, 0x6337, 0x41cb, 0xa8, 0x8b, 0x8e, 0xce, 0x74,
+ 0x91, 0xea, 0x9f),
+ /* HP */
+ EFI_GUID(0x02f9af02, 0x7734, 0x4233, 0xb4, 0x3d, 0x93, 0xfe, 0x5a,
+ 0xa3, 0x5d, 0xb3),
+ /* LENOVO & OTHERS */
+ EFI_GUID(0x1f52d2a1, 0xbb3a, 0x457d, 0xbc, 0x09, 0x43, 0xa3, 0xf4,
+ 0x31, 0x0a, 0x92),
+};
+EXPORT_SYMBOL_NS_GPL(tasdev_fct_efi_guid, "SND_HDA_SCODEC_TAS2781");
+
+static void tas2781_apply_calib(struct tasdevice_priv *p)
+{
+ struct calidata *cali_data = &p->cali_data;
+ struct cali_reg *r = &cali_data->cali_reg_array;
+ unsigned char *data = cali_data->data;
+ unsigned int *tmp_val = (unsigned int *)data;
+ unsigned int cali_reg[TASDEV_CALIB_N] = {
+ TASDEVICE_REG(0, 0x17, 0x74),
+ TASDEVICE_REG(0, 0x18, 0x0c),
+ TASDEVICE_REG(0, 0x18, 0x14),
+ TASDEVICE_REG(0, 0x13, 0x70),
+ TASDEVICE_REG(0, 0x18, 0x7c),
+ };
+ unsigned int crc, oft;
+ unsigned char *buf;
+ int i, j, k, l;
+
+ if (tmp_val[0] == 2781) {
+ /*
+ * New features were added in calibrated Data V3:
+ * 1. Added calibration registers address define in
+ * a node, marked as Device id == 0x80.
+ * New features were added in calibrated Data V2:
+ * 1. Added some the fields to store the link_id and
+ * uniqie_id for multi-link solutions
+ * 2. Support flexible number of devices instead of
+ * fixed one in V1.
+ * Layout of calibrated data V2 in UEFI(total 256 bytes):
+ * ChipID (2781, 4 bytes)
+ * Data-Group-Sum (4 bytes)
+ * TimeStamp of Calibration (4 bytes)
+ * for (i = 0; i < Data-Group-Sum; i++) {
+ * if (Data type != 0x80) (4 bytes)
+ * Calibrated Data of Device #i (20 bytes)
+ * else
+ * Calibration registers address (5*4 = 20 bytes)
+ * # V2: No reg addr in data grp section.
+ * # V3: Normally the last grp is the reg addr.
+ * }
+ * CRC (4 bytes)
+ * Reserved (the rest)
+ */
+ crc = crc32(~0, data, (3 + tmp_val[1] * 6) * 4) ^ ~0;
+
+ if (crc != tmp_val[3 + tmp_val[1] * 6]) {
+ cali_data->total_sz = 0;
+ dev_err(p->dev, "%s: CRC error\n", __func__);
+ return;
+ }
+
+ for (j = 0, k = 0; j < tmp_val[1]; j++) {
+ oft = j * 6 + 3;
+ if (tmp_val[oft] == TASDEV_UEFI_CALI_REG_ADDR_FLG) {
+ for (i = 0; i < TASDEV_CALIB_N; i++) {
+ buf = &data[(oft + i + 1) * 4];
+ cali_reg[i] = TASDEVICE_REG(buf[1],
+ buf[2], buf[3]);
+ }
+ } else {
+ l = j * (cali_data->cali_dat_sz_per_dev + 1);
+ if (k >= p->ndev || l > oft * 4) {
+ dev_err(p->dev, "%s: dev sum error\n",
+ __func__);
+ cali_data->total_sz = 0;
+ return;
+ }
+
+ data[l] = k;
+ for (i = 0; i < TASDEV_CALIB_N * 4; i++)
+ data[l + i] = data[4 * oft + i];
+ k++;
+ }
+ }
+ } else {
+ /*
+ * Calibration data is in V1 format.
+ * struct cali_data {
+ * char cali_data[20];
+ * }
+ *
+ * struct {
+ * struct cali_data cali_data[4];
+ * int TimeStamp of Calibration (4 bytes)
+ * int CRC (4 bytes)
+ * } ueft;
+ */
+ crc = crc32(~0, data, 84) ^ ~0;
+ if (crc != tmp_val[21]) {
+ cali_data->total_sz = 0;
+ dev_err(p->dev, "%s: V1 CRC error\n", __func__);
+ return;
+ }
+
+ for (j = p->ndev - 1; j >= 0; j--) {
+ l = j * (cali_data->cali_dat_sz_per_dev + 1);
+ for (i = TASDEV_CALIB_N * 4; i > 0 ; i--)
+ data[l + i] = data[p->index * 5 + i];
+ data[l+i] = j;
+ }
+ }
+
+ if (p->dspbin_typ == TASDEV_BASIC) {
+ r->r0_reg = cali_reg[0];
+ r->invr0_reg = cali_reg[1];
+ r->r0_low_reg = cali_reg[2];
+ r->pow_reg = cali_reg[3];
+ r->tlimit_reg = cali_reg[4];
+ }
+
+ p->is_user_space_calidata = true;
+ cali_data->total_sz = p->ndev * (cali_data->cali_dat_sz_per_dev + 1);
+}
+
+/*
+ * Update the calibration data, including speaker impedance, f0, etc,
+ * into algo. Calibrate data is done by manufacturer in the factory.
+ * The data is used by Algo for calculating the speaker temperature,
+ * speaker membrane excursion and f0 in real time during playback.
+ * Calibration data format in EFI is V2, since 2024.
+ */
+int tas2781_save_calibration(struct tas2781_hda *hda)
+{
+ /*
+ * GUID was used for data access in BIOS, it was provided by board
+ * manufactory.
+ */
+ efi_guid_t efi_guid = tasdev_fct_efi_guid[LENOVO];
+ static efi_char16_t efi_name[] = TASDEVICE_CALIBRATION_DATA_NAME;
+ struct tasdevice_priv *p = hda->priv;
+ struct calidata *cali_data = &p->cali_data;
+ unsigned long total_sz = 0;
+ unsigned int attr, size;
+ unsigned char *data;
+ efi_status_t status;
+
+ if (hda->catlog_id < LENOVO)
+ efi_guid = tasdev_fct_efi_guid[hda->catlog_id];
+
+ cali_data->cali_dat_sz_per_dev = 20;
+ size = p->ndev * (cali_data->cali_dat_sz_per_dev + 1);
+ /* Get real size of UEFI variable */
+ status = efi.get_variable(efi_name, &efi_guid, &attr, &total_sz, NULL);
+ cali_data->total_sz = total_sz > size ? total_sz : size;
+ if (status == EFI_BUFFER_TOO_SMALL) {
+ /* Allocate data buffer of data_size bytes */
+ data = p->cali_data.data = devm_kzalloc(p->dev,
+ p->cali_data.total_sz, GFP_KERNEL);
+ if (!data) {
+ p->cali_data.total_sz = 0;
+ return -ENOMEM;
+ }
+ /* Get variable contents into buffer */
+ status = efi.get_variable(efi_name, &efi_guid, &attr,
+ &p->cali_data.total_sz, data);
+ }
+ if (status != EFI_SUCCESS) {
+ p->cali_data.total_sz = 0;
+ return status;
+ }
+
+ tas2781_apply_calib(p);
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(tas2781_save_calibration, "SND_HDA_SCODEC_TAS2781");
+
+void tas2781_hda_remove(struct device *dev,
+ const struct component_ops *ops)
+{
+ struct tas2781_hda *tas_hda = dev_get_drvdata(dev);
+
+ component_del(tas_hda->dev, ops);
+
+ pm_runtime_get_sync(tas_hda->dev);
+ pm_runtime_disable(tas_hda->dev);
+
+ pm_runtime_put_noidle(tas_hda->dev);
+
+ tasdevice_remove(tas_hda->priv);
+}
+EXPORT_SYMBOL_NS_GPL(tas2781_hda_remove, "SND_HDA_SCODEC_TAS2781");
+
+int tasdevice_info_profile(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+{
+ struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+
+ uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+ uinfo->count = 1;
+ uinfo->value.integer.min = 0;
+ uinfo->value.integer.max = tas_priv->rcabin.ncfgs - 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(tasdevice_info_profile, "SND_HDA_SCODEC_TAS2781");
+
+int tasdevice_info_programs(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+{
+ struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+
+ uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+ uinfo->count = 1;
+ uinfo->value.integer.min = 0;
+ uinfo->value.integer.max = tas_priv->fmw->nr_programs - 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(tasdevice_info_programs, "SND_HDA_SCODEC_TAS2781");
+
+int tasdevice_info_config(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+{
+ struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ struct tasdevice_fw *tas_fw = tas_priv->fmw;
+
+ uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+ uinfo->count = 1;
+ uinfo->value.integer.min = 0;
+ uinfo->value.integer.max = tas_fw->nr_configurations - 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(tasdevice_info_config, "SND_HDA_SCODEC_TAS2781");
+
+int tasdevice_get_profile_id(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+
+ ucontrol->value.integer.value[0] = tas_priv->rcabin.profile_cfg_id;
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n", __func__,
+ kcontrol->id.name, tas_priv->rcabin.profile_cfg_id);
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(tasdevice_get_profile_id, "SND_HDA_SCODEC_TAS2781");
+
+int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ int profile_id = ucontrol->value.integer.value[0];
+ int max = tas_priv->rcabin.ncfgs - 1;
+ int val, ret = 0;
+
+ val = clamp(profile_id, 0, max);
+
+ guard(mutex)(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n", __func__,
+ kcontrol->id.name, tas_priv->rcabin.profile_cfg_id, val);
+
+ if (tas_priv->rcabin.profile_cfg_id != val) {
+ tas_priv->rcabin.profile_cfg_id = val;
+ ret = 1;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(tasdevice_set_profile_id, "SND_HDA_SCODEC_TAS2781");
+
+int tasdevice_program_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+
+ ucontrol->value.integer.value[0] = tas_priv->cur_prog;
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n", __func__,
+ kcontrol->id.name, tas_priv->cur_prog);
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(tasdevice_program_get, "SND_HDA_SCODEC_TAS2781");
+
+int tasdevice_program_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ struct tasdevice_fw *tas_fw = tas_priv->fmw;
+ int nr_program = ucontrol->value.integer.value[0];
+ int max = tas_fw->nr_programs - 1;
+ int val, ret = 0;
+
+ val = clamp(nr_program, 0, max);
+
+ guard(mutex)(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n", __func__,
+ kcontrol->id.name, tas_priv->cur_prog, val);
+
+ if (tas_priv->cur_prog != val) {
+ tas_priv->cur_prog = val;
+ ret = 1;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(tasdevice_program_put, "SND_HDA_SCODEC_TAS2781");
+
+int tasdevice_config_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+
+ ucontrol->value.integer.value[0] = tas_priv->cur_conf;
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n", __func__,
+ kcontrol->id.name, tas_priv->cur_conf);
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(tasdevice_config_get, "SND_HDA_SCODEC_TAS2781");
+
+int tasdevice_config_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ struct tasdevice_fw *tas_fw = tas_priv->fmw;
+ int nr_config = ucontrol->value.integer.value[0];
+ int max = tas_fw->nr_configurations - 1;
+ int val, ret = 0;
+
+ val = clamp(nr_config, 0, max);
+
+ guard(mutex)(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n", __func__,
+ kcontrol->id.name, tas_priv->cur_conf, val);
+
+ if (tas_priv->cur_conf != val) {
+ tas_priv->cur_conf = val;
+ ret = 1;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(tasdevice_config_put, "SND_HDA_SCODEC_TAS2781");
+
+MODULE_DESCRIPTION("TAS2781 HDA Driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Shenghao Ding, TI, <shenghao-ding@ti.com>");
diff --git a/sound/pci/hda/tas2781_hda.h b/sound/pci/hda/tas2781_hda.h
new file mode 100644
index 000000000000..575a701c8dfb
--- /dev/null
+++ b/sound/pci/hda/tas2781_hda.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * HDA audio driver for Texas Instruments TAS2781 smart amp
+ *
+ * Copyright (C) 2025 Texas Instruments, Inc.
+ */
+#ifndef __TAS2781_HDA_H__
+#define __TAS2781_HDA_H__
+
+#include <sound/asound.h>
+
+/* Flag of calibration registers address. */
+#define TASDEV_UEFI_CALI_REG_ADDR_FLG BIT(7)
+#define TASDEVICE_CALIBRATION_DATA_NAME L"CALI_DATA"
+#define TASDEV_CALIB_N 5
+
+/*
+ * No standard control callbacks for SNDRV_CTL_ELEM_IFACE_CARD
+ * Define two controls, one is Volume control callbacks, the other is
+ * flag setting control callbacks.
+ */
+
+/* Volume control callbacks for tas2781 */
+#define ACARD_SINGLE_RANGE_EXT_TLV(xname, xreg, xshift, xmin, xmax, xinvert, \
+ xhandler_get, xhandler_put, tlv_array) { \
+ .iface = SNDRV_CTL_ELEM_IFACE_CARD, .name = (xname), \
+ .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | \
+ SNDRV_CTL_ELEM_ACCESS_READWRITE, \
+ .tlv.p = (tlv_array), \
+ .info = snd_soc_info_volsw, \
+ .get = xhandler_get, .put = xhandler_put, \
+ .private_value = (unsigned long)&(struct soc_mixer_control) { \
+ .reg = xreg, .rreg = xreg, \
+ .shift = xshift, .rshift = xshift,\
+ .min = xmin, .max = xmax, .invert = xinvert, \
+ } \
+}
+
+/* Flag control callbacks for tas2781 */
+#define ACARD_SINGLE_BOOL_EXT(xname, xdata, xhandler_get, xhandler_put) { \
+ .iface = SNDRV_CTL_ELEM_IFACE_CARD, \
+ .name = xname, \
+ .info = snd_ctl_boolean_mono_info, \
+ .get = xhandler_get, \
+ .put = xhandler_put, \
+ .private_value = xdata, \
+}
+
+enum device_catlog_id {
+ DELL = 0,
+ HP,
+ LENOVO,
+ OTHERS
+};
+
+struct tas2781_hda {
+ struct device *dev;
+ struct tasdevice_priv *priv;
+ struct snd_kcontrol *dsp_prog_ctl;
+ struct snd_kcontrol *dsp_conf_ctl;
+ struct snd_kcontrol *prof_ctl;
+ enum device_catlog_id catlog_id;
+ void *hda_priv;
+};
+
+extern const efi_guid_t tasdev_fct_efi_guid[];
+
+int tas2781_save_calibration(struct tas2781_hda *p);
+void tas2781_hda_remove(struct device *dev,
+ const struct component_ops *ops);
+int tasdevice_info_profile(struct snd_kcontrol *kctl,
+ struct snd_ctl_elem_info *uctl);
+int tasdevice_info_programs(struct snd_kcontrol *kctl,
+ struct snd_ctl_elem_info *uctl);
+int tasdevice_info_config(struct snd_kcontrol *kctl,
+ struct snd_ctl_elem_info *uctl);
+int tasdevice_set_profile_id(struct snd_kcontrol *kctl,
+ struct snd_ctl_elem_value *uctl);
+int tasdevice_get_profile_id(struct snd_kcontrol *kctl,
+ struct snd_ctl_elem_value *uctl);
+int tasdevice_program_get(struct snd_kcontrol *kctl,
+ struct snd_ctl_elem_value *uctl);
+int tasdevice_program_put(struct snd_kcontrol *kctl,
+ struct snd_ctl_elem_value *uctl);
+int tasdevice_config_put(struct snd_kcontrol *kctl,
+ struct snd_ctl_elem_value *uctl);
+int tasdevice_config_get(struct snd_kcontrol *kctl,
+ struct snd_ctl_elem_value *uctl);
+
+#endif
diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c
index 29dc4f500580..d91eed9f7804 100644
--- a/sound/pci/hda/tas2781_hda_i2c.c
+++ b/sound/pci/hda/tas2781_hda_i2c.c
@@ -2,7 +2,7 @@
//
// TAS2781 HDA I2C driver
//
-// Copyright 2023 - 2024 Texas Instruments, Inc.
+// Copyright 2023 - 2025 Texas Instruments, Inc.
//
// Author: Shenghao Ding <shenghao-ding@ti.com>
// Current maintainer: Baojun Xu <baojun.xu@ti.com>
@@ -22,6 +22,7 @@
#include <sound/hda_codec.h>
#include <sound/soc.h>
#include <sound/tas2781.h>
+#include <sound/tas2781-comlib-i2c.h>
#include <sound/tlv.h>
#include <sound/tas2781-tlv.h>
@@ -30,69 +31,23 @@
#include "hda_component.h"
#include "hda_jack.h"
#include "hda_generic.h"
+#include "tas2781_hda.h"
-#define TASDEVICE_SPEAKER_CALIBRATION_SIZE 20
-
-/* No standard control callbacks for SNDRV_CTL_ELEM_IFACE_CARD
- * Define two controls, one is Volume control callbacks, the other is
- * flag setting control callbacks.
- */
-
-/* Volume control callbacks for tas2781 */
-#define ACARD_SINGLE_RANGE_EXT_TLV(xname, xreg, xshift, xmin, xmax, xinvert, \
- xhandler_get, xhandler_put, tlv_array) \
-{ .iface = SNDRV_CTL_ELEM_IFACE_CARD, .name = (xname),\
- .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ |\
- SNDRV_CTL_ELEM_ACCESS_READWRITE,\
- .tlv.p = (tlv_array), \
- .info = snd_soc_info_volsw, \
- .get = xhandler_get, .put = xhandler_put, \
- .private_value = (unsigned long)&(struct soc_mixer_control) \
- {.reg = xreg, .rreg = xreg, .shift = xshift, \
- .rshift = xshift, .min = xmin, .max = xmax, \
- .invert = xinvert} }
-
-/* Flag control callbacks for tas2781 */
-#define ACARD_SINGLE_BOOL_EXT(xname, xdata, xhandler_get, xhandler_put) \
-{ .iface = SNDRV_CTL_ELEM_IFACE_CARD, .name = xname, \
- .info = snd_ctl_boolean_mono_info, \
- .get = xhandler_get, .put = xhandler_put, \
- .private_value = xdata }
-
-enum calib_data {
- R0_VAL = 0,
- INV_R0,
- R0LOW,
- POWER,
- TLIM,
- CALIB_MAX
-};
-
-#define TAS2563_MAX_CHANNELS 4
-
-#define TAS2563_CAL_POWER TASDEVICE_REG(0, 0x0d, 0x3c)
-#define TAS2563_CAL_R0 TASDEVICE_REG(0, 0x0f, 0x34)
-#define TAS2563_CAL_INVR0 TASDEVICE_REG(0, 0x0f, 0x40)
-#define TAS2563_CAL_R0_LOW TASDEVICE_REG(0, 0x0f, 0x48)
-#define TAS2563_CAL_TLIM TASDEVICE_REG(0, 0x10, 0x14)
-#define TAS2563_CAL_N 5
-#define TAS2563_CAL_DATA_SIZE 4
-#define TAS2563_CAL_CH_SIZE 20
-#define TAS2563_CAL_ARRAY_SIZE 80
-
-static unsigned int cal_regs[TAS2563_CAL_N] = {
- TAS2563_CAL_POWER, TAS2563_CAL_R0, TAS2563_CAL_INVR0,
- TAS2563_CAL_R0_LOW, TAS2563_CAL_TLIM,
-};
+#define TAS2563_CAL_VAR_NAME_MAX 16
+#define TAS2563_CAL_ARRAY_SIZE 80
+#define TAS2563_CAL_DATA_SIZE 4
+#define TAS2563_MAX_CHANNELS 4
+#define TAS2563_CAL_CH_SIZE 20
+#define TAS2563_CAL_R0_LOW TASDEVICE_REG(0, 0x0f, 0x48)
+#define TAS2563_CAL_POWER TASDEVICE_REG(0, 0x0d, 0x3c)
+#define TAS2563_CAL_INVR0 TASDEVICE_REG(0, 0x0f, 0x40)
+#define TAS2563_CAL_TLIM TASDEVICE_REG(0, 0x10, 0x14)
+#define TAS2563_CAL_R0 TASDEVICE_REG(0, 0x0f, 0x34)
-struct tas2781_hda {
- struct device *dev;
- struct tasdevice_priv *priv;
- struct snd_kcontrol *dsp_prog_ctl;
- struct snd_kcontrol *dsp_conf_ctl;
- struct snd_kcontrol *prof_ctl;
+struct tas2781_hda_i2c_priv {
struct snd_kcontrol *snd_ctls[2];
+ int (*save_calibration)(struct tas2781_hda *h);
};
static int tas2781_get_i2c_res(struct acpi_resource *ares, void *data)
@@ -210,176 +165,6 @@ static void tas2781_hda_playback_hook(struct device *dev, int action)
}
}
-static int tasdevice_info_profile(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_info *uinfo)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
-
- uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
- uinfo->count = 1;
- uinfo->value.integer.min = 0;
- uinfo->value.integer.max = tas_priv->rcabin.ncfgs - 1;
-
- return 0;
-}
-
-static int tasdevice_get_profile_id(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
-
- mutex_lock(&tas_priv->codec_lock);
-
- ucontrol->value.integer.value[0] = tas_priv->rcabin.profile_cfg_id;
-
- dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
- __func__, kcontrol->id.name, tas_priv->rcabin.profile_cfg_id);
-
- mutex_unlock(&tas_priv->codec_lock);
-
- return 0;
-}
-
-static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- int nr_profile = ucontrol->value.integer.value[0];
- int max = tas_priv->rcabin.ncfgs - 1;
- int val, ret = 0;
-
- val = clamp(nr_profile, 0, max);
-
- mutex_lock(&tas_priv->codec_lock);
-
- dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
- __func__, kcontrol->id.name,
- tas_priv->rcabin.profile_cfg_id, val);
-
- if (tas_priv->rcabin.profile_cfg_id != val) {
- tas_priv->rcabin.profile_cfg_id = val;
- ret = 1;
- }
-
- mutex_unlock(&tas_priv->codec_lock);
-
- return ret;
-}
-
-static int tasdevice_info_programs(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_info *uinfo)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- struct tasdevice_fw *tas_fw = tas_priv->fmw;
-
- uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
- uinfo->count = 1;
- uinfo->value.integer.min = 0;
- uinfo->value.integer.max = tas_fw->nr_programs - 1;
-
- return 0;
-}
-
-static int tasdevice_info_config(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_info *uinfo)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- struct tasdevice_fw *tas_fw = tas_priv->fmw;
-
- uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
- uinfo->count = 1;
- uinfo->value.integer.min = 0;
- uinfo->value.integer.max = tas_fw->nr_configurations - 1;
-
- return 0;
-}
-
-static int tasdevice_program_get(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
-
- mutex_lock(&tas_priv->codec_lock);
-
- ucontrol->value.integer.value[0] = tas_priv->cur_prog;
-
- dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
- __func__, kcontrol->id.name, tas_priv->cur_prog);
-
- mutex_unlock(&tas_priv->codec_lock);
-
- return 0;
-}
-
-static int tasdevice_program_put(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- struct tasdevice_fw *tas_fw = tas_priv->fmw;
- int nr_program = ucontrol->value.integer.value[0];
- int max = tas_fw->nr_programs - 1;
- int val, ret = 0;
-
- val = clamp(nr_program, 0, max);
-
- mutex_lock(&tas_priv->codec_lock);
-
- dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
- __func__, kcontrol->id.name, tas_priv->cur_prog, val);
-
- if (tas_priv->cur_prog != val) {
- tas_priv->cur_prog = val;
- ret = 1;
- }
-
- mutex_unlock(&tas_priv->codec_lock);
-
- return ret;
-}
-
-static int tasdevice_config_get(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
-
- mutex_lock(&tas_priv->codec_lock);
-
- ucontrol->value.integer.value[0] = tas_priv->cur_conf;
-
- dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
- __func__, kcontrol->id.name, tas_priv->cur_conf);
-
- mutex_unlock(&tas_priv->codec_lock);
-
- return 0;
-}
-
-static int tasdevice_config_put(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- struct tasdevice_fw *tas_fw = tas_priv->fmw;
- int nr_config = ucontrol->value.integer.value[0];
- int max = tas_fw->nr_configurations - 1;
- int val, ret = 0;
-
- val = clamp(nr_config, 0, max);
-
- mutex_lock(&tas_priv->codec_lock);
-
- dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
- __func__, kcontrol->id.name, tas_priv->cur_conf, val);
-
- if (tas_priv->cur_conf != val) {
- tas_priv->cur_conf = val;
- ret = 1;
- }
-
- mutex_unlock(&tas_priv->codec_lock);
-
- return ret;
-}
-
static int tas2781_amp_getvol(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
@@ -493,170 +278,103 @@ static const struct snd_kcontrol_new tas2781_dsp_conf_ctrl = {
.put = tasdevice_config_put,
};
-static void tas2563_apply_calib(struct tasdevice_priv *tas_priv)
+static int tas2563_save_calibration(struct tas2781_hda *h)
{
- int offset = 0;
- __be32 data;
- int ret;
-
- for (int i = 0; i < tas_priv->ndev; i++) {
- for (int j = 0; j < TAS2563_CAL_N; ++j) {
- data = cpu_to_be32(
- *(uint32_t *)&tas_priv->cali_data.data[offset]);
- ret = tasdevice_dev_bulk_write(tas_priv, i, cal_regs[j],
- (unsigned char *)&data, TAS2563_CAL_DATA_SIZE);
- if (ret)
- dev_err(tas_priv->dev,
- "Error writing calib regs\n");
- offset += TAS2563_CAL_DATA_SIZE;
- }
- }
-}
-
-static int tas2563_save_calibration(struct tasdevice_priv *tas_priv)
-{
- static efi_guid_t efi_guid = EFI_GUID(0x1f52d2a1, 0xbb3a, 0x457d, 0xbc,
- 0x09, 0x43, 0xa3, 0xf4, 0x31, 0x0a, 0x92);
-
- static efi_char16_t *efi_vars[TAS2563_MAX_CHANNELS][TAS2563_CAL_N] = {
- { L"Power_1", L"R0_1", L"InvR0_1", L"R0_Low_1", L"TLim_1" },
- { L"Power_2", L"R0_2", L"InvR0_2", L"R0_Low_2", L"TLim_2" },
- { L"Power_3", L"R0_3", L"InvR0_3", L"R0_Low_3", L"TLim_3" },
- { L"Power_4", L"R0_4", L"InvR0_4", L"R0_Low_4", L"TLim_4" },
+ efi_guid_t efi_guid = tasdev_fct_efi_guid[LENOVO];
+ char *vars[TASDEV_CALIB_N] = {
+ "R0_%d", "InvR0_%d", "R0_Low_%d", "Power_%d", "TLim_%d"
};
-
+ efi_char16_t efi_name[TAS2563_CAL_VAR_NAME_MAX];
unsigned long max_size = TAS2563_CAL_DATA_SIZE;
+ unsigned char var8[TAS2563_CAL_VAR_NAME_MAX];
+ struct tasdevice_priv *p = h->hda_priv;
+ struct calidata *cd = &p->cali_data;
+ struct cali_reg *r = &cd->cali_reg_array;
unsigned int offset = 0;
+ unsigned char *data;
efi_status_t status;
unsigned int attr;
+ int ret, i, j, k;
+
+ cd->cali_dat_sz_per_dev = TAS2563_CAL_DATA_SIZE * TASDEV_CALIB_N;
- tas_priv->cali_data.data = devm_kzalloc(tas_priv->dev,
- TAS2563_CAL_ARRAY_SIZE, GFP_KERNEL);
- if (!tas_priv->cali_data.data)
+ /* extra byte for each device is the device number */
+ cd->total_sz = (cd->cali_dat_sz_per_dev + 1) * p->ndev;
+ data = cd->data = devm_kzalloc(p->dev, cd->total_sz,
+ GFP_KERNEL);
+ if (!data)
return -ENOMEM;
- for (int i = 0; i < tas_priv->ndev; ++i) {
- for (int j = 0; j < TAS2563_CAL_N; ++j) {
- status = efi.get_variable(efi_vars[i][j],
+ for (i = 0; i < p->ndev; ++i) {
+ data[offset] = i;
+ offset++;
+ for (j = 0; j < TASDEV_CALIB_N; ++j) {
+ ret = snprintf(var8, sizeof(var8), vars[j], i);
+
+ if (ret < 0 || ret >= sizeof(var8) - 1) {
+ dev_err(p->dev, "%s: Read %s failed\n",
+ __func__, var8);
+ return -EINVAL;
+ }
+ /*
+ * Our variable names are ASCII by construction, but
+ * EFI names are wide chars. Convert and zero-pad.
+ */
+ memset(efi_name, 0, sizeof(efi_name));
+ for (k = 0; k < sizeof(var8) && var8[k]; k++)
+ efi_name[k] = var8[k];
+ status = efi.get_variable(efi_name,
&efi_guid, &attr, &max_size,
- &tas_priv->cali_data.data[offset]);
+ &data[offset]);
if (status != EFI_SUCCESS ||
max_size != TAS2563_CAL_DATA_SIZE) {
- dev_warn(tas_priv->dev,
- "Calibration data read failed %ld\n", status);
+ dev_warn(p->dev,
+ "Dev %d: Caldat[%d] read failed %ld\n",
+ i, j, status);
return -EINVAL;
}
offset += TAS2563_CAL_DATA_SIZE;
}
}
- tas_priv->cali_data.total_sz = offset;
- tasdevice_apply_calibration(tas_priv);
-
- return 0;
-}
-
-static void tas2781_apply_calib(struct tasdevice_priv *tas_priv)
-{
- struct calidata *cali_data = &tas_priv->cali_data;
- struct cali_reg *r = &cali_data->cali_reg_array;
- unsigned int cali_reg[CALIB_MAX] = {
- TASDEVICE_REG(0, 0x17, 0x74),
- TASDEVICE_REG(0, 0x18, 0x0c),
- TASDEVICE_REG(0, 0x18, 0x14),
- TASDEVICE_REG(0, 0x13, 0x70),
- TASDEVICE_REG(0, 0x18, 0x7c),
- };
- int i, j, rc;
- int oft = 0;
- __be32 data;
-
- if (tas_priv->dspbin_typ != TASDEV_BASIC) {
- cali_reg[0] = r->r0_reg;
- cali_reg[1] = r->invr0_reg;
- cali_reg[2] = r->r0_low_reg;
- cali_reg[3] = r->pow_reg;
- cali_reg[4] = r->tlimit_reg;
- }
-
- for (i = 0; i < tas_priv->ndev; i++) {
- for (j = 0; j < CALIB_MAX; j++) {
- data = cpu_to_be32(
- *(uint32_t *)&tas_priv->cali_data.data[oft]);
- rc = tasdevice_dev_bulk_write(tas_priv, i,
- cali_reg[j], (unsigned char *)&data, 4);
- if (rc < 0)
- dev_err(tas_priv->dev,
- "chn %d calib %d bulk_wr err = %d\n",
- i, j, rc);
- oft += 4;
- }
- }
-}
-
-/* Update the calibration data, including speaker impedance, f0, etc, into algo.
- * Calibrate data is done by manufacturer in the factory. These data are used
- * by Algo for calculating the speaker temperature, speaker membrane excursion
- * and f0 in real time during playback.
- */
-static int tas2781_save_calibration(struct tasdevice_priv *tas_priv)
-{
- efi_guid_t efi_guid = EFI_GUID(0x02f9af02, 0x7734, 0x4233, 0xb4, 0x3d,
- 0x93, 0xfe, 0x5a, 0xa3, 0x5d, 0xb3);
- static efi_char16_t efi_name[] = L"CALI_DATA";
- unsigned int attr, crc;
- unsigned int *tmp_val;
- efi_status_t status;
-
- /* Lenovo devices */
- if (tas_priv->catlog_id == LENOVO)
- efi_guid = EFI_GUID(0x1f52d2a1, 0xbb3a, 0x457d, 0xbc, 0x09,
- 0x43, 0xa3, 0xf4, 0x31, 0x0a, 0x92);
-
- tas_priv->cali_data.total_sz = 0;
- /* Get real size of UEFI variable */
- status = efi.get_variable(efi_name, &efi_guid, &attr,
- &tas_priv->cali_data.total_sz, tas_priv->cali_data.data);
- if (status == EFI_BUFFER_TOO_SMALL) {
- /* Allocate data buffer of data_size bytes */
- tas_priv->cali_data.data = devm_kzalloc(tas_priv->dev,
- tas_priv->cali_data.total_sz, GFP_KERNEL);
- if (!tas_priv->cali_data.data)
- return -ENOMEM;
- /* Get variable contents into buffer */
- status = efi.get_variable(efi_name, &efi_guid, &attr,
- &tas_priv->cali_data.total_sz,
- tas_priv->cali_data.data);
- }
- if (status != EFI_SUCCESS)
+ if (cd->total_sz != offset) {
+ dev_err(p->dev, "%s: tot_size(%lu) and offset(%u) dismatch\n",
+ __func__, cd->total_sz, offset);
return -EINVAL;
+ }
- tmp_val = (unsigned int *)tas_priv->cali_data.data;
-
- crc = crc32(~0, tas_priv->cali_data.data, 84) ^ ~0;
- dev_dbg(tas_priv->dev, "cali crc 0x%08x PK tmp_val 0x%08x\n",
- crc, tmp_val[21]);
-
- if (crc == tmp_val[21]) {
- time64_t seconds = tmp_val[20];
+ r->r0_reg = TAS2563_CAL_R0;
+ r->invr0_reg = TAS2563_CAL_INVR0;
+ r->r0_low_reg = TAS2563_CAL_R0_LOW;
+ r->pow_reg = TAS2563_CAL_POWER;
+ r->tlimit_reg = TAS2563_CAL_TLIM;
- dev_dbg(tas_priv->dev, "%ptTsr\n", &seconds);
- tasdevice_apply_calibration(tas_priv);
- } else
- tas_priv->cali_data.total_sz = 0;
+ /*
+ * TAS2781_FMWLIB supports two solutions of calibrated data. One is
+ * from the driver itself: driver reads the calibrated files directly
+ * during probe; The other from user space: during init of audio hal,
+ * the audio hal will pass the calibrated data via kcontrol interface.
+ * Driver will store this data in "struct calidata" for use. For hda
+ * device, calibrated data are usunally saved into UEFI. So Hda side
+ * codec driver use the mixture of these two solutions, driver reads
+ * the data from UEFI, then store this data in "struct calidata" for
+ * use.
+ */
+ p->is_user_space_calidata = true;
return 0;
}
static void tas2781_hda_remove_controls(struct tas2781_hda *tas_hda)
{
+ struct tas2781_hda_i2c_priv *hda_priv = tas_hda->hda_priv;
struct hda_codec *codec = tas_hda->priv->codec;
snd_ctl_remove(codec->card, tas_hda->dsp_prog_ctl);
snd_ctl_remove(codec->card, tas_hda->dsp_conf_ctl);
- for (int i = ARRAY_SIZE(tas_hda->snd_ctls) - 1; i >= 0; i--)
- snd_ctl_remove(codec->card, tas_hda->snd_ctls[i]);
+ for (int i = ARRAY_SIZE(hda_priv->snd_ctls) - 1; i >= 0; i--)
+ snd_ctl_remove(codec->card, hda_priv->snd_ctls[i]);
snd_ctl_remove(codec->card, tas_hda->prof_ctl);
}
@@ -665,6 +383,7 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context)
{
struct tasdevice_priv *tas_priv = context;
struct tas2781_hda *tas_hda = dev_get_drvdata(tas_priv->dev);
+ struct tas2781_hda_i2c_priv *hda_priv = tas_hda->hda_priv;
struct hda_codec *codec = tas_priv->codec;
int i, ret, spk_id;
@@ -685,9 +404,9 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context)
}
for (i = 0; i < ARRAY_SIZE(tas2781_snd_controls); i++) {
- tas_hda->snd_ctls[i] = snd_ctl_new1(&tas2781_snd_controls[i],
+ hda_priv->snd_ctls[i] = snd_ctl_new1(&tas2781_snd_controls[i],
tas_priv);
- ret = snd_ctl_add(codec->card, tas_hda->snd_ctls[i]);
+ ret = snd_ctl_add(codec->card, hda_priv->snd_ctls[i]);
if (ret) {
dev_err(tas_priv->dev,
"Failed to add KControl %s = %d\n",
@@ -756,7 +475,7 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context)
/* If calibrated data occurs error, dsp will still works with default
* calibrated data inside algo.
*/
- tasdevice_save_calibration(tas_priv);
+ hda_priv->save_calibration(tas_hda);
tasdevice_tuning_switch(tas_hda->priv, 0);
tas_hda->priv->playback_started = true;
@@ -789,11 +508,11 @@ static int tas2781_hda_bind(struct device *dev, struct device *master,
subid = codec->core.subsystem_id >> 16;
switch (subid) {
- case 0x17aa:
- tas_hda->priv->catlog_id = LENOVO;
+ case 0x1028:
+ tas_hda->catlog_id = DELL;
break;
default:
- tas_hda->priv->catlog_id = OTHERS;
+ tas_hda->catlog_id = LENOVO;
break;
}
@@ -840,31 +559,23 @@ static const struct component_ops tas2781_hda_comp_ops = {
.unbind = tas2781_hda_unbind,
};
-static void tas2781_hda_remove(struct device *dev)
-{
- struct tas2781_hda *tas_hda = dev_get_drvdata(dev);
-
- component_del(tas_hda->dev, &tas2781_hda_comp_ops);
-
- pm_runtime_get_sync(tas_hda->dev);
- pm_runtime_disable(tas_hda->dev);
-
- pm_runtime_put_noidle(tas_hda->dev);
-
- tasdevice_remove(tas_hda->priv);
-}
-
static int tas2781_hda_i2c_probe(struct i2c_client *clt)
{
+ struct tas2781_hda_i2c_priv *hda_priv;
struct tas2781_hda *tas_hda;
const char *device_name;
int ret;
-
tas_hda = devm_kzalloc(&clt->dev, sizeof(*tas_hda), GFP_KERNEL);
if (!tas_hda)
return -ENOMEM;
+ hda_priv = devm_kzalloc(&clt->dev, sizeof(*hda_priv), GFP_KERNEL);
+ if (!hda_priv)
+ return -ENOMEM;
+
+ tas_hda->hda_priv = hda_priv;
+
dev_set_drvdata(&clt->dev, tas_hda);
tas_hda->dev = &clt->dev;
@@ -874,13 +585,11 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt)
if (strstr(dev_name(&clt->dev), "TIAS2781")) {
device_name = "TIAS2781";
- tas_hda->priv->save_calibration = tas2781_save_calibration;
- tas_hda->priv->apply_calibration = tas2781_apply_calib;
+ hda_priv->save_calibration = tas2781_save_calibration;
tas_hda->priv->global_addr = TAS2781_GLOBAL_ADDR;
} else if (strstr(dev_name(&clt->dev), "INT8866")) {
device_name = "INT8866";
- tas_hda->priv->save_calibration = tas2563_save_calibration;
- tas_hda->priv->apply_calibration = tas2563_apply_calib;
+ hda_priv->save_calibration = tas2563_save_calibration;
tas_hda->priv->global_addr = TAS2563_GLOBAL_ADDR;
} else
return -ENODEV;
@@ -911,13 +620,13 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt)
err:
if (ret)
- tas2781_hda_remove(&clt->dev);
+ tas2781_hda_remove(&clt->dev, &tas2781_hda_comp_ops);
return ret;
}
static void tas2781_hda_i2c_remove(struct i2c_client *clt)
{
- tas2781_hda_remove(&clt->dev);
+ tas2781_hda_remove(&clt->dev, &tas2781_hda_comp_ops);
}
static int tas2781_runtime_suspend(struct device *dev)
@@ -951,11 +660,6 @@ static int tas2781_runtime_resume(struct device *dev)
tasdevice_prmg_load(tas_hda->priv, tas_hda->priv->cur_prog);
- /* If calibrated data occurs error, dsp will still works with default
- * calibrated data inside algo.
- */
- tasdevice_apply_calibration(tas_hda->priv);
-
mutex_unlock(&tas_hda->priv->codec_lock);
return 0;
@@ -999,11 +703,6 @@ static int tas2781_system_resume(struct device *dev)
tasdevice_reset(tas_hda->priv);
tasdevice_prmg_load(tas_hda->priv, tas_hda->priv->cur_prog);
- /* If calibrated data occurs error, dsp will still work with default
- * calibrated data inside algo.
- */
- tasdevice_apply_calibration(tas_hda->priv);
-
if (tas_hda->priv->playback_started)
tasdevice_tuning_switch(tas_hda->priv, 0);
@@ -1045,3 +744,4 @@ MODULE_DESCRIPTION("TAS2781 HDA Driver");
MODULE_AUTHOR("Shenghao Ding, TI, <shenghao-ding@ti.com>");
MODULE_LICENSE("GPL");
MODULE_IMPORT_NS("SND_SOC_TAS2781_FMWLIB");
+MODULE_IMPORT_NS("SND_HDA_SCODEC_TAS2781");
diff --git a/sound/pci/hda/tas2781_hda_spi.c b/sound/pci/hda/tas2781_hda_spi.c
index 25175ff4b3aa..5c03e9d2283a 100644
--- a/sound/pci/hda/tas2781_hda_spi.c
+++ b/sound/pci/hda/tas2781_hda_spi.c
@@ -2,7 +2,7 @@
//
// TAS2781 HDA SPI driver
//
-// Copyright 2024 Texas Instruments, Inc.
+// Copyright 2024 - 2025 Texas Instruments, Inc.
//
// Author: Baojun Xu <baojun.xu@ti.com>
@@ -27,68 +27,41 @@
#include <sound/hda_codec.h>
#include <sound/soc.h>
-#include <sound/tas2781-dsp.h>
+#include <sound/tas2781.h>
#include <sound/tlv.h>
#include <sound/tas2781-tlv.h>
-#include "tas2781-spi.h"
-
#include "hda_local.h"
#include "hda_auto_parser.h"
#include "hda_component.h"
#include "hda_jack.h"
#include "hda_generic.h"
+#include "tas2781_hda.h"
-/*
- * No standard control callbacks for SNDRV_CTL_ELEM_IFACE_CARD
- * Define two controls, one is Volume control callbacks, the other is
- * flag setting control callbacks.
- */
-
-/* Volume control callbacks for tas2781 */
-#define ACARD_SINGLE_RANGE_EXT_TLV(xname, xreg, xshift, xmin, xmax, xinvert, \
- xhandler_get, xhandler_put, tlv_array) { \
- .iface = SNDRV_CTL_ELEM_IFACE_CARD, .name = (xname), \
- .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | \
- SNDRV_CTL_ELEM_ACCESS_READWRITE, \
- .tlv.p = (tlv_array), \
- .info = snd_soc_info_volsw, \
- .get = xhandler_get, .put = xhandler_put, \
- .private_value = (unsigned long)&(struct soc_mixer_control) { \
- .reg = xreg, .rreg = xreg, \
- .shift = xshift, .rshift = xshift,\
- .min = xmin, .max = xmax, .invert = xinvert, \
- } \
-}
+#define TASDEVICE_RANGE_MAX_SIZE (256 * 128)
+#define TASDEVICE_WIN_LEN 128
+#define TAS2781_SPI_MAX_FREQ (4 * HZ_PER_MHZ)
+/* Flag of calibration registers address. */
+#define TASDEVICE_CALIBRATION_REG_ADDRESS BIT(7)
+#define TASDEV_UEFI_CALI_REG_ADDR_FLG BIT(7)
-/* Flag control callbacks for tas2781 */
-#define ACARD_SINGLE_BOOL_EXT(xname, xdata, xhandler_get, xhandler_put) { \
- .iface = SNDRV_CTL_ELEM_IFACE_CARD, \
- .name = xname, \
- .info = snd_ctl_boolean_mono_info, \
- .get = xhandler_get, \
- .put = xhandler_put, \
- .private_value = xdata, \
-}
+/* System Reset Check Register */
+#define TAS2781_REG_CLK_CONFIG TASDEVICE_REG(0x0, 0x0, 0x5c)
+#define TAS2781_REG_CLK_CONFIG_RESET 0x19
-struct tas2781_hda {
- struct tasdevice_priv *priv;
- struct acpi_device *dacpi;
- struct snd_kcontrol *dsp_prog_ctl;
- struct snd_kcontrol *dsp_conf_ctl;
+struct tas2781_hda_spi_priv {
struct snd_kcontrol *snd_ctls[3];
- struct snd_kcontrol *prof_ctl;
};
static const struct regmap_range_cfg tasdevice_ranges[] = {
{
.range_min = 0,
- .range_max = TASDEVICE_MAX_SIZE,
+ .range_max = TASDEVICE_RANGE_MAX_SIZE,
.selector_reg = TASDEVICE_PAGE_SELECT,
.selector_mask = GENMASK(7, 0),
.selector_shift = 0,
.window_start = 0,
- .window_len = TASDEVICE_MAX_PAGE,
+ .window_len = TASDEVICE_WIN_LEN,
},
};
@@ -96,39 +69,19 @@ static const struct regmap_config tasdevice_regmap = {
.reg_bits = 8,
.val_bits = 8,
.zero_flag_mask = true,
+ .read_flag_mask = 0x01,
+ .reg_shift = -1,
.cache_type = REGCACHE_NONE,
.ranges = tasdevice_ranges,
.num_ranges = ARRAY_SIZE(tasdevice_ranges),
- .max_register = TASDEVICE_MAX_SIZE,
+ .max_register = TASDEVICE_RANGE_MAX_SIZE,
};
-static int tasdevice_spi_switch_book(struct tasdevice_priv *tas_priv, int reg)
-{
- struct regmap *map = tas_priv->regmap;
-
- if (tas_priv->cur_book != TASDEVICE_BOOK_ID(reg)) {
- int ret = regmap_write(map, TASDEVICE_BOOKCTL_REG,
- TASDEVICE_BOOK_ID(reg));
- if (ret < 0) {
- dev_err(tas_priv->dev, "Switch Book E=%d\n", ret);
- return ret;
- }
- tas_priv->cur_book = TASDEVICE_BOOK_ID(reg);
- }
- return 0;
-}
-
-int tasdevice_spi_dev_read(struct tasdevice_priv *tas_priv,
- unsigned int reg,
- unsigned int *val)
+static int tasdevice_spi_dev_read(struct tasdevice_priv *tas_priv,
+ unsigned short chn, unsigned int reg, unsigned int *val)
{
- struct regmap *map = tas_priv->regmap;
int ret;
- ret = tasdevice_spi_switch_book(tas_priv, reg);
- if (ret < 0)
- return ret;
-
/*
* In our TAS2781 SPI mode, if read from other book (not book 0),
* or read from page number larger than 1 in book 0, one more byte
@@ -137,11 +90,11 @@ int tasdevice_spi_dev_read(struct tasdevice_priv *tas_priv,
if ((TASDEVICE_BOOK_ID(reg) > 0) || (TASDEVICE_PAGE_ID(reg) > 1)) {
unsigned char data[2];
- ret = regmap_bulk_read(map, TASDEVICE_PAGE_REG(reg) | 1,
+ ret = tasdevice_dev_bulk_read(tas_priv, chn, reg,
data, sizeof(data));
*val = data[1];
} else {
- ret = regmap_read(map, TASDEVICE_PAGE_REG(reg) | 1, val);
+ ret = tasdevice_dev_read(tas_priv, chn, reg, val);
}
if (ret < 0)
dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
@@ -149,71 +102,25 @@ int tasdevice_spi_dev_read(struct tasdevice_priv *tas_priv,
return ret;
}
-int tasdevice_spi_dev_write(struct tasdevice_priv *tas_priv,
- unsigned int reg,
- unsigned int value)
-{
- struct regmap *map = tas_priv->regmap;
- int ret;
-
- ret = tasdevice_spi_switch_book(tas_priv, reg);
- if (ret < 0)
- return ret;
-
- ret = regmap_write(map, TASDEVICE_PAGE_REG(reg), value);
- if (ret < 0)
- dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
-
- return ret;
-}
-
-int tasdevice_spi_dev_bulk_write(struct tasdevice_priv *tas_priv,
- unsigned int reg,
- unsigned char *data,
- unsigned int len)
-{
- struct regmap *map = tas_priv->regmap;
- int ret;
-
- ret = tasdevice_spi_switch_book(tas_priv, reg);
- if (ret < 0)
- return ret;
-
- ret = regmap_bulk_write(map, TASDEVICE_PAGE_REG(reg), data, len);
- if (ret < 0)
- dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
-
- return ret;
-}
-
-int tasdevice_spi_dev_bulk_read(struct tasdevice_priv *tas_priv,
- unsigned int reg,
- unsigned char *data,
- unsigned int len)
+static int tasdevice_spi_dev_bulk_read(struct tasdevice_priv *tas_priv,
+ unsigned short chn, unsigned int reg, unsigned char *data,
+ unsigned int len)
{
- struct regmap *map = tas_priv->regmap;
int ret;
- ret = tasdevice_spi_switch_book(tas_priv, reg);
- if (ret < 0)
- return ret;
-
- if (len > TASDEVICE_MAX_PAGE)
- len = TASDEVICE_MAX_PAGE;
/*
* In our TAS2781 SPI mode, if read from other book (not book 0),
* or read from page number larger than 1 in book 0, one more byte
* read is needed, and first byte is a dummy byte, need to be ignored.
*/
if ((TASDEVICE_BOOK_ID(reg) > 0) || (TASDEVICE_PAGE_ID(reg) > 1)) {
- unsigned char buf[TASDEVICE_MAX_PAGE+1];
+ unsigned char buf[TASDEVICE_WIN_LEN + 1];
- ret = regmap_bulk_read(map, TASDEVICE_PAGE_REG(reg) | 1, buf,
- len + 1);
+ ret = tasdevice_dev_bulk_read(tas_priv, chn, reg,
+ buf, len + 1);
memcpy(data, buf + 1, len);
} else {
- ret = regmap_bulk_read(map, TASDEVICE_PAGE_REG(reg) | 1, data,
- len);
+ ret = tasdevice_dev_bulk_read(tas_priv, chn, reg, data, len);
}
if (ret < 0)
dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
@@ -221,31 +128,55 @@ int tasdevice_spi_dev_bulk_read(struct tasdevice_priv *tas_priv,
return ret;
}
-int tasdevice_spi_dev_update_bits(struct tasdevice_priv *tas_priv,
- unsigned int reg,
- unsigned int mask,
- unsigned int value)
+static int tasdevice_spi_dev_update_bits(struct tasdevice_priv *tas_priv,
+ unsigned short chn, unsigned int reg, unsigned int mask,
+ unsigned int value)
{
- struct regmap *map = tas_priv->regmap;
int ret, val;
/*
* In our TAS2781 SPI mode, read/write was masked in last bit of
* address, it cause regmap_update_bits() not work as expected.
*/
- ret = tasdevice_spi_dev_read(tas_priv, reg, &val);
+ ret = tasdevice_dev_read(tas_priv, chn, reg, &val);
if (ret < 0) {
dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
return ret;
}
- ret = regmap_write(map, TASDEVICE_PAGE_REG(reg),
- (val & ~mask) | (mask & value));
+
+ ret = tasdevice_dev_write(tas_priv, chn, TASDEVICE_PAGE_REG(reg),
+ (val & ~mask) | (mask & value));
if (ret < 0)
dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
return ret;
}
+static int tasdevice_spi_change_chn_book(struct tasdevice_priv *p,
+ unsigned short chn, int book)
+{
+ int ret = 0;
+
+ if (chn == p->index) {
+ struct tasdevice *tasdev = &p->tasdevice[chn];
+ struct regmap *map = p->regmap;
+
+ if (tasdev->cur_book != book) {
+ ret = regmap_write(map, TASDEVICE_BOOKCTL_REG, book);
+ if (ret < 0)
+ dev_err(p->dev, "%s, E=%d\n", __func__, ret);
+ else
+ tasdev->cur_book = book;
+ }
+ } else {
+ ret = -EXDEV;
+ dev_dbg(p->dev, "Not error, %s ignore channel(%d)\n",
+ __func__, chn);
+ }
+
+ return ret;
+}
+
static void tas2781_spi_reset(struct tasdevice_priv *tas_dev)
{
int ret;
@@ -254,12 +185,15 @@ static void tas2781_spi_reset(struct tasdevice_priv *tas_dev)
gpiod_set_value_cansleep(tas_dev->reset, 0);
fsleep(800);
gpiod_set_value_cansleep(tas_dev->reset, 1);
+ } else {
+ ret = tasdevice_dev_write(tas_dev, tas_dev->index,
+ TASDEVICE_REG_SWRESET, TASDEVICE_REG_SWRESET_RESET);
+ if (ret < 0) {
+ dev_err(tas_dev->dev, "dev sw-reset fail, %d\n", ret);
+ return;
+ }
+ fsleep(1000);
}
- ret = tasdevice_spi_dev_write(tas_dev, TAS2781_REG_SWRESET,
- TAS2781_REG_SWRESET_RESET);
- if (ret < 0)
- dev_err(tas_dev->dev, "dev sw-reset fail, %d\n", ret);
- fsleep(1000);
}
static int tascodec_spi_init(struct tasdevice_priv *tas_priv,
@@ -276,7 +210,7 @@ static int tascodec_spi_init(struct tasdevice_priv *tas_priv,
scnprintf(tas_priv->rca_binaryname,
sizeof(tas_priv->rca_binaryname), "%sRCA%d.bin",
- tas_priv->dev_name, tas_priv->index);
+ tas_priv->dev_name, tas_priv->ndev);
crc8_populate_msb(tas_priv->crc8_lkp_tbl, TASDEVICE_CRC8_POLYNOMIAL);
tas_priv->codec = codec;
ret = request_firmware_nowait(module, FW_ACTION_UEVENT,
@@ -291,26 +225,22 @@ static int tascodec_spi_init(struct tasdevice_priv *tas_priv,
static void tasdevice_spi_init(struct tasdevice_priv *tas_priv)
{
- tas_priv->cur_prog = -1;
- tas_priv->cur_conf = -1;
+ tas_priv->tasdevice[tas_priv->index].cur_book = -1;
+ tas_priv->tasdevice[tas_priv->index].cur_conf = -1;
+ tas_priv->tasdevice[tas_priv->index].cur_prog = -1;
- tas_priv->cur_book = -1;
- tas_priv->cur_prog = -1;
- tas_priv->cur_conf = -1;
+ tas_priv->isspi = true;
- /* Store default registers address for calibration data. */
- tas_priv->cali_reg_array[0] = TASDEVICE_REG(0, 0x17, 0x74);
- tas_priv->cali_reg_array[1] = TASDEVICE_REG(0, 0x18, 0x0c);
- tas_priv->cali_reg_array[2] = TASDEVICE_REG(0, 0x18, 0x14);
- tas_priv->cali_reg_array[3] = TASDEVICE_REG(0, 0x13, 0x70);
- tas_priv->cali_reg_array[4] = TASDEVICE_REG(0, 0x18, 0x7c);
+ tas_priv->update_bits = tasdevice_spi_dev_update_bits;
+ tas_priv->change_chn_book = tasdevice_spi_change_chn_book;
+ tas_priv->dev_read = tasdevice_spi_dev_read;
+ tas_priv->dev_bulk_read = tasdevice_spi_dev_bulk_read;
mutex_init(&tas_priv->codec_lock);
}
static int tasdevice_spi_amp_putvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol,
- struct soc_mixer_control *mc)
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
{
unsigned int invert = mc->invert;
unsigned char mask;
@@ -321,7 +251,8 @@ static int tasdevice_spi_amp_putvol(struct tasdevice_priv *tas_priv,
mask <<= mc->shift;
val = clamp(invert ? max - ucontrol->value.integer.value[0] :
ucontrol->value.integer.value[0], 0, max);
- ret = tasdevice_spi_dev_update_bits(tas_priv,
+
+ ret = tasdevice_spi_dev_update_bits(tas_priv, tas_priv->index,
mc->reg, mask, (unsigned int)(val << mc->shift));
if (ret)
dev_err(tas_priv->dev, "set AMP vol error in dev %d\n",
@@ -331,16 +262,14 @@ static int tasdevice_spi_amp_putvol(struct tasdevice_priv *tas_priv,
}
static int tasdevice_spi_amp_getvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol,
- struct soc_mixer_control *mc)
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
{
unsigned int invert = mc->invert;
unsigned char mask = 0;
int max = mc->max;
int ret, val;
- /* Read the primary device */
- ret = tasdevice_spi_dev_read(tas_priv, mc->reg, &val);
+ ret = tasdevice_spi_dev_read(tas_priv, tas_priv->index, mc->reg, &val);
if (ret) {
dev_err(tas_priv->dev, "%s, get AMP vol error\n", __func__);
return ret;
@@ -355,9 +284,8 @@ static int tasdevice_spi_amp_getvol(struct tasdevice_priv *tas_priv,
return ret;
}
-static int tasdevice_spi_digital_putvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol,
- struct soc_mixer_control *mc)
+static int tasdevice_spi_digital_putvol(struct tasdevice_priv *p,
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
{
unsigned int invert = mc->invert;
int max = mc->max;
@@ -365,26 +293,23 @@ static int tasdevice_spi_digital_putvol(struct tasdevice_priv *tas_priv,
val = clamp(invert ? max - ucontrol->value.integer.value[0] :
ucontrol->value.integer.value[0], 0, max);
- ret = tasdevice_spi_dev_write(tas_priv, mc->reg, (unsigned int)val);
+ ret = tasdevice_dev_write(p, p->index, mc->reg, (unsigned int)val);
if (ret)
- dev_err(tas_priv->dev, "set digital vol err in dev %d\n",
- tas_priv->index);
+ dev_err(p->dev, "set digital vol err in dev %d\n", p->index);
return ret;
}
-static int tasdevice_spi_digital_getvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol,
- struct soc_mixer_control *mc)
+static int tasdevice_spi_digital_getvol(struct tasdevice_priv *p,
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
{
unsigned int invert = mc->invert;
int max = mc->max;
int ret, val;
- /* Read the primary device as the whole */
- ret = tasdevice_spi_dev_read(tas_priv, mc->reg, &val);
+ ret = tasdevice_spi_dev_read(p, p->index, mc->reg, &val);
if (ret) {
- dev_err(tas_priv->dev, "%s, get digital vol err\n", __func__);
+ dev_err(p->dev, "%s, get digital vol err\n", __func__);
return ret;
}
@@ -395,8 +320,7 @@ static int tasdevice_spi_digital_getvol(struct tasdevice_priv *tas_priv,
}
static int tas2781_read_acpi(struct tas2781_hda *tas_hda,
- const char *hid,
- int id)
+ const char *hid, int id)
{
struct tasdevice_priv *p = tas_hda->priv;
struct acpi_device *adev;
@@ -413,7 +337,6 @@ static int tas2781_read_acpi(struct tas2781_hda *tas_hda,
}
strscpy(p->dev_name, hid, sizeof(p->dev_name));
- tas_hda->dacpi = adev;
physdev = get_device(acpi_get_first_physical_node(adev));
acpi_dev_put(adev);
@@ -423,7 +346,7 @@ static int tas2781_read_acpi(struct tas2781_hda *tas_hda,
ret = -EINVAL;
goto err;
}
- nval = ret;
+ p->ndev = nval = ret;
ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret)
@@ -466,139 +389,22 @@ err:
static void tas2781_hda_playback_hook(struct device *dev, int action)
{
struct tas2781_hda *tas_hda = dev_get_drvdata(dev);
+ struct tasdevice_priv *tas_priv = tas_hda->priv;
if (action == HDA_GEN_PCM_ACT_OPEN) {
pm_runtime_get_sync(dev);
- guard(mutex)(&tas_hda->priv->codec_lock);
- tasdevice_spi_tuning_switch(tas_hda->priv, 0);
+ guard(mutex)(&tas_priv->codec_lock);
+ if (tas_priv->fw_state == TASDEVICE_DSP_FW_ALL_OK)
+ tasdevice_tuning_switch(tas_hda->priv, 0);
} else if (action == HDA_GEN_PCM_ACT_CLOSE) {
- guard(mutex)(&tas_hda->priv->codec_lock);
- tasdevice_spi_tuning_switch(tas_hda->priv, 1);
+ guard(mutex)(&tas_priv->codec_lock);
+ if (tas_priv->fw_state == TASDEVICE_DSP_FW_ALL_OK)
+ tasdevice_tuning_switch(tas_priv, 1);
pm_runtime_mark_last_busy(dev);
pm_runtime_put_autosuspend(dev);
}
}
-static int tasdevice_info_profile(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_info *uinfo)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
-
- uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
- uinfo->count = 1;
- uinfo->value.integer.min = 0;
- uinfo->value.integer.max = tas_priv->rcabin.ncfgs - 1;
-
- return 0;
-}
-
-static int tasdevice_get_profile_id(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
-
- ucontrol->value.integer.value[0] = tas_priv->rcabin.profile_cfg_id;
-
- return 0;
-}
-
-static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- int max = tas_priv->rcabin.ncfgs - 1;
- int val;
-
- val = clamp(ucontrol->value.integer.value[0], 0, max);
- if (tas_priv->rcabin.profile_cfg_id != val) {
- tas_priv->rcabin.profile_cfg_id = val;
- return 1;
- }
-
- return 0;
-}
-
-static int tasdevice_info_programs(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_info *uinfo)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
-
- uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
- uinfo->count = 1;
- uinfo->value.integer.min = 0;
- uinfo->value.integer.max = tas_priv->fmw->nr_programs - 1;
-
- return 0;
-}
-
-static int tasdevice_info_config(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_info *uinfo)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
-
- uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
- uinfo->count = 1;
- uinfo->value.integer.min = 0;
- uinfo->value.integer.max = tas_priv->fmw->nr_configurations - 1;
-
- return 0;
-}
-
-static int tasdevice_program_get(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
-
- ucontrol->value.integer.value[0] = tas_priv->cur_prog;
-
- return 0;
-}
-
-static int tasdevice_program_put(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- int nr_program = ucontrol->value.integer.value[0];
- int max = tas_priv->fmw->nr_programs - 1;
- int val;
-
- val = clamp(nr_program, 0, max);
-
- if (tas_priv->cur_prog != val) {
- tas_priv->cur_prog = val;
- return 1;
- }
-
- return 0;
-}
-
-static int tasdevice_config_get(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
-
- ucontrol->value.integer.value[0] = tas_priv->cur_conf;
-
- return 0;
-}
-
-static int tasdevice_config_put(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- int max = tas_priv->fmw->nr_configurations - 1;
- int val;
-
- val = clamp(ucontrol->value.integer.value[0], 0, max);
-
- if (tas_priv->cur_conf != val) {
- tas_priv->cur_conf = val;
- return 1;
- }
-
- return 0;
-}
-
/*
* tas2781_digital_getvol - get the volum control
* @kcontrol: control pointer
@@ -620,6 +426,7 @@ static int tas2781_digital_getvol(struct snd_kcontrol *kcontrol,
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
+ guard(mutex)(&tas_priv->codec_lock);
return tasdevice_spi_digital_getvol(tas_priv, ucontrol, mc);
}
@@ -630,6 +437,7 @@ static int tas2781_amp_getvol(struct snd_kcontrol *kcontrol,
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
+ guard(mutex)(&tas_priv->codec_lock);
return tasdevice_spi_amp_getvol(tas_priv, ucontrol, mc);
}
@@ -640,7 +448,7 @@ static int tas2781_digital_putvol(struct snd_kcontrol *kcontrol,
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
- /* The check of the given value is in tasdevice_digital_putvol. */
+ guard(mutex)(&tas_priv->codec_lock);
return tasdevice_spi_digital_putvol(tas_priv, ucontrol, mc);
}
@@ -651,7 +459,7 @@ static int tas2781_amp_putvol(struct snd_kcontrol *kcontrol,
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
- /* The check of the given value is in tasdevice_amp_putvol. */
+ guard(mutex)(&tas_priv->codec_lock);
return tasdevice_spi_amp_putvol(tas_priv, ucontrol, mc);
}
@@ -685,223 +493,139 @@ static int tas2781_force_fwload_put(struct snd_kcontrol *kcontrol,
return change;
}
-static const struct snd_kcontrol_new tas2781_snd_controls[] = {
- ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Gain 0", TAS2781_AMP_LEVEL,
- 1, 0, 20, 0, tas2781_amp_getvol,
- tas2781_amp_putvol, amp_vol_tlv),
- ACARD_SINGLE_RANGE_EXT_TLV("Speaker Digital Gain 0", TAS2781_DVC_LVL,
- 0, 0, 200, 1, tas2781_digital_getvol,
- tas2781_digital_putvol, dvc_tlv),
- ACARD_SINGLE_BOOL_EXT("Speaker Force Firmware Load 0", 0,
- tas2781_force_fwload_get, tas2781_force_fwload_put),
- ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Gain 1", TAS2781_AMP_LEVEL,
- 1, 0, 20, 0, tas2781_amp_getvol,
- tas2781_amp_putvol, amp_vol_tlv),
- ACARD_SINGLE_RANGE_EXT_TLV("Speaker Digital Gain 1", TAS2781_DVC_LVL,
- 0, 0, 200, 1, tas2781_digital_getvol,
- tas2781_digital_putvol, dvc_tlv),
- ACARD_SINGLE_BOOL_EXT("Speaker Force Firmware Load 1", 0,
- tas2781_force_fwload_get, tas2781_force_fwload_put),
+static struct snd_kcontrol_new tas2781_snd_ctls[] = {
+ ACARD_SINGLE_RANGE_EXT_TLV(NULL, TAS2781_AMP_LEVEL, 1, 0, 20, 0,
+ tas2781_amp_getvol, tas2781_amp_putvol, amp_vol_tlv),
+ ACARD_SINGLE_RANGE_EXT_TLV(NULL, TAS2781_DVC_LVL, 0, 0, 200, 1,
+ tas2781_digital_getvol, tas2781_digital_putvol, dvc_tlv),
+ ACARD_SINGLE_BOOL_EXT(NULL, 0, tas2781_force_fwload_get,
+ tas2781_force_fwload_put),
};
-static const struct snd_kcontrol_new tas2781_prof_ctrl[] = {
-{
- .name = "Speaker Profile Id - 0",
+static struct snd_kcontrol_new tas2781_prof_ctl = {
.iface = SNDRV_CTL_ELEM_IFACE_CARD,
.info = tasdevice_info_profile,
.get = tasdevice_get_profile_id,
.put = tasdevice_set_profile_id,
-},
-{
- .name = "Speaker Profile Id - 1",
- .iface = SNDRV_CTL_ELEM_IFACE_CARD,
- .info = tasdevice_info_profile,
- .get = tasdevice_get_profile_id,
- .put = tasdevice_set_profile_id,
-},
-};
-static const struct snd_kcontrol_new tas2781_dsp_prog_ctrl[] = {
-{
- .name = "Speaker Program Id 0",
- .iface = SNDRV_CTL_ELEM_IFACE_CARD,
- .info = tasdevice_info_programs,
- .get = tasdevice_program_get,
- .put = tasdevice_program_put,
-},
-{
- .name = "Speaker Program Id 1",
- .iface = SNDRV_CTL_ELEM_IFACE_CARD,
- .info = tasdevice_info_programs,
- .get = tasdevice_program_get,
- .put = tasdevice_program_put,
-},
};
-static const struct snd_kcontrol_new tas2781_dsp_conf_ctrl[] = {
-{
- .name = "Speaker Config Id 0",
- .iface = SNDRV_CTL_ELEM_IFACE_CARD,
- .info = tasdevice_info_config,
- .get = tasdevice_config_get,
- .put = tasdevice_config_put,
-},
-{
- .name = "Speaker Config Id 1",
- .iface = SNDRV_CTL_ELEM_IFACE_CARD,
- .info = tasdevice_info_config,
- .get = tasdevice_config_get,
- .put = tasdevice_config_put,
-},
+static struct snd_kcontrol_new tas2781_dsp_ctls[] = {
+ /* Speaker Program */
+ {
+ .iface = SNDRV_CTL_ELEM_IFACE_CARD,
+ .info = tasdevice_info_programs,
+ .get = tasdevice_program_get,
+ .put = tasdevice_program_put,
+ },
+ /* Speaker Config */
+ {
+ .iface = SNDRV_CTL_ELEM_IFACE_CARD,
+ .info = tasdevice_info_config,
+ .get = tasdevice_config_get,
+ .put = tasdevice_config_put,
+ },
};
-static void tas2781_apply_calib(struct tasdevice_priv *tas_priv)
+static void tas2781_hda_remove_controls(struct tas2781_hda *tas_hda)
{
- int i, rc;
+ struct hda_codec *codec = tas_hda->priv->codec;
+ struct tas2781_hda_spi_priv *h_priv = tas_hda->hda_priv;
- /*
- * If no calibration data exist in tasdevice_priv *tas_priv,
- * calibration apply will be ignored, and use default values
- * in firmware binary, which was loaded during firmware download.
- */
- if (tas_priv->cali_data[0] == 0)
- return;
- /*
- * Calibration data was saved in tasdevice_priv *tas_priv as:
- * unsigned int cali_data[CALIB_MAX];
- * and every data (in 4 bytes) will be saved in register which in
- * book 0, and page number in page_array[], offset was saved in
- * rgno_array[].
- */
- for (i = 0; i < CALIB_MAX; i++) {
- rc = tasdevice_spi_dev_bulk_write(tas_priv,
- tas_priv->cali_reg_array[i],
- (unsigned char *)&tas_priv->cali_data[i], 4);
- if (rc < 0)
- dev_err(tas_priv->dev,
- "chn %d calib %d bulk_wr err = %d\n",
- tas_priv->index, i, rc);
- }
+ snd_ctl_remove(codec->card, tas_hda->dsp_prog_ctl);
+
+ snd_ctl_remove(codec->card, tas_hda->dsp_conf_ctl);
+
+ for (int i = ARRAY_SIZE(h_priv->snd_ctls) - 1; i >= 0; i--)
+ snd_ctl_remove(codec->card, h_priv->snd_ctls[i]);
+
+ snd_ctl_remove(codec->card, tas_hda->prof_ctl);
}
-/*
- * Update the calibration data, including speaker impedance, f0, etc,
- * into algo. Calibrate data is done by manufacturer in the factory.
- * These data are used by Algo for calculating the speaker temperature,
- * speaker membrane excursion and f0 in real time during playback.
- * Calibration data format in EFI is V2, since 2024.
- */
-static int tas2781_save_calibration(struct tasdevice_priv *tas_priv)
-{
- /*
- * GUID was used for data access in BIOS, it was provided by board
- * manufactory, like HP: "{02f9af02-7734-4233-b43d-93fe5aa35db3}"
- */
- efi_guid_t efi_guid =
- EFI_GUID(0x02f9af02, 0x7734, 0x4233,
- 0xb4, 0x3d, 0x93, 0xfe, 0x5a, 0xa3, 0x5d, 0xb3);
- static efi_char16_t efi_name[] = TASDEVICE_CALIBRATION_DATA_NAME;
- unsigned char data[TASDEVICE_CALIBRATION_DATA_SIZE], *buf;
- unsigned int attr, crc, offset, *tmp_val;
- unsigned long total_sz = 0;
- efi_status_t status;
-
- tas_priv->cali_data[0] = 0;
- status = efi.get_variable(efi_name, &efi_guid, &attr, &total_sz, data);
- if (status == EFI_BUFFER_TOO_SMALL) {
- if (total_sz > TASDEVICE_CALIBRATION_DATA_SIZE)
- return -ENOMEM;
- /* Get variable contents into buffer */
- status = efi.get_variable(efi_name, &efi_guid, &attr,
- &total_sz, data);
+static int tas2781_hda_spi_prf_ctl(struct tas2781_hda *h)
+{
+ struct tasdevice_priv *p = h->priv;
+ struct hda_codec *c = p->codec;
+ char name[64];
+ int rc;
+
+ snprintf(name, sizeof(name), "Speaker-%d Profile Id", p->index);
+ tas2781_prof_ctl.name = name;
+ h->prof_ctl = snd_ctl_new1(&tas2781_prof_ctl, p);
+ rc = snd_ctl_add(c->card, h->prof_ctl);
+ if (rc)
+ dev_err(p->dev, "Failed to add KControl: %s, rc = %d\n",
+ tas2781_prof_ctl.name, rc);
+ return rc;
+}
+
+static int tas2781_hda_spi_snd_ctls(struct tas2781_hda *h)
+{
+ struct tas2781_hda_spi_priv *h_priv = h->hda_priv;
+ struct tasdevice_priv *p = h->priv;
+ struct hda_codec *c = p->codec;
+ char name[64];
+ int i = 0;
+ int rc;
+
+ snprintf(name, sizeof(name), "Speaker-%d Analog Volume", p->index);
+ tas2781_snd_ctls[i].name = name;
+ h_priv->snd_ctls[i] = snd_ctl_new1(&tas2781_snd_ctls[i], p);
+ rc = snd_ctl_add(c->card, h_priv->snd_ctls[i]);
+ if (rc) {
+ dev_err(p->dev, "Failed to add KControl: %s, rc = %d\n",
+ tas2781_snd_ctls[i].name, rc);
+ return rc;
}
- if (status != EFI_SUCCESS)
- return status;
-
- tmp_val = (unsigned int *)data;
- if (tmp_val[0] == 2781) {
- /*
- * New features were added in calibrated Data V3:
- * 1. Added calibration registers address define in
- * a node, marked as Device id == 0x80.
- * New features were added in calibrated Data V2:
- * 1. Added some the fields to store the link_id and
- * uniqie_id for multi-link solutions
- * 2. Support flexible number of devices instead of
- * fixed one in V1.
- * Layout of calibrated data V2 in UEFI(total 256 bytes):
- * ChipID (2781, 4 bytes)
- * Device-Sum (4 bytes)
- * TimeStamp of Calibration (4 bytes)
- * for (i = 0; i < Device-Sum; i++) {
- * Device #i index_info () {
- * SDW link id (2bytes)
- * SDW unique_id (2bytes)
- * } // if Device number is 0x80, mean it's
- * calibration registers address.
- * Calibrated Data of Device #i (20 bytes)
- * }
- * CRC (4 bytes)
- * Reserved (the rest)
- */
- crc = crc32(~0, data, (3 + tmp_val[1] * 6) * 4) ^ ~0;
-
- if (crc != tmp_val[3 + tmp_val[1] * 6])
- return 0;
-
- for (int j = 0; j < tmp_val[1]; j++) {
- offset = j * 6 + 3;
- if (tmp_val[offset] == tas_priv->index) {
- for (int i = 0; i < CALIB_MAX; i++)
- tas_priv->cali_data[i] =
- tmp_val[offset + i + 1];
- } else if (tmp_val[offset] ==
- TASDEVICE_CALIBRATION_REG_ADDRESS) {
- for (int i = 0; i < CALIB_MAX; i++) {
- buf = &data[(offset + i + 1) * 4];
- tas_priv->cali_reg_array[i] =
- TASDEVICE_REG(buf[1], buf[2],
- buf[3]);
- }
- }
- tas_priv->apply_calibration(tas_priv);
- }
- } else {
- /*
- * Calibration data is in V1 format.
- * struct cali_data {
- * char cali_data[20];
- * }
- *
- * struct {
- * struct cali_data cali_data[4];
- * int TimeStamp of Calibration (4 bytes)
- * int CRC (4 bytes)
- * } ueft;
- */
- crc = crc32(~0, data, 84) ^ ~0;
- if (crc == tmp_val[21]) {
- for (int i = 0; i < CALIB_MAX; i++)
- tas_priv->cali_data[i] =
- tmp_val[tas_priv->index * 5 + i];
- tas_priv->apply_calibration(tas_priv);
- }
+ i++;
+ snprintf(name, sizeof(name), "Speaker-%d Digital Volume", p->index);
+ tas2781_snd_ctls[i].name = name;
+ h_priv->snd_ctls[i] = snd_ctl_new1(&tas2781_snd_ctls[i], p);
+ rc = snd_ctl_add(c->card, h_priv->snd_ctls[i]);
+ if (rc) {
+ dev_err(p->dev, "Failed to add KControl: %s, rc = %d\n",
+ tas2781_snd_ctls[i].name, rc);
+ return rc;
}
-
- return 0;
+ i++;
+ snprintf(name, sizeof(name), "Froce Speaker-%d FW Load", p->index);
+ tas2781_snd_ctls[i].name = name;
+ h_priv->snd_ctls[i] = snd_ctl_new1(&tas2781_snd_ctls[i], p);
+ rc = snd_ctl_add(c->card, h_priv->snd_ctls[i]);
+ if (rc) {
+ dev_err(p->dev, "Failed to add KControl: %s, rc = %d\n",
+ tas2781_snd_ctls[i].name, rc);
+ }
+ return rc;
}
-static void tas2781_hda_remove_controls(struct tas2781_hda *tas_hda)
+static int tas2781_hda_spi_dsp_ctls(struct tas2781_hda *h)
{
- struct hda_codec *codec = tas_hda->priv->codec;
+ struct tasdevice_priv *p = h->priv;
+ struct hda_codec *c = p->codec;
+ char name[64];
+ int i = 0;
+ int rc;
- snd_ctl_remove(codec->card, tas_hda->dsp_prog_ctl);
-
- snd_ctl_remove(codec->card, tas_hda->dsp_conf_ctl);
-
- for (int i = ARRAY_SIZE(tas_hda->snd_ctls) - 1; i >= 0; i--)
- snd_ctl_remove(codec->card, tas_hda->snd_ctls[i]);
+ snprintf(name, sizeof(name), "Speaker-%d Program Id", p->index);
+ tas2781_dsp_ctls[i].name = name;
+ h->dsp_prog_ctl = snd_ctl_new1(&tas2781_dsp_ctls[i], p);
+ rc = snd_ctl_add(c->card, h->dsp_prog_ctl);
+ if (rc) {
+ dev_err(p->dev, "Failed to add KControl: %s, rc = %d\n",
+ tas2781_dsp_ctls[i].name, rc);
+ return rc;
+ }
+ i++;
+ snprintf(name, sizeof(name), "Speaker-%d Config Id", p->index);
+ tas2781_dsp_ctls[i].name = name;
+ h->dsp_conf_ctl = snd_ctl_new1(&tas2781_dsp_ctls[i], p);
+ rc = snd_ctl_add(c->card, h->dsp_conf_ctl);
+ if (rc) {
+ dev_err(p->dev, "Failed to add KControl: %s, rc = %d\n",
+ tas2781_dsp_ctls[i].name, rc);
+ }
- snd_ctl_remove(codec->card, tas_hda->prof_ctl);
+ return rc;
}
static void tasdev_fw_ready(const struct firmware *fmw, void *context)
@@ -909,44 +633,30 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context)
struct tasdevice_priv *tas_priv = context;
struct tas2781_hda *tas_hda = dev_get_drvdata(tas_priv->dev);
struct hda_codec *codec = tas_priv->codec;
- int i, j, ret, val;
+ int ret, val;
pm_runtime_get_sync(tas_priv->dev);
guard(mutex)(&tas_priv->codec_lock);
- ret = tasdevice_spi_rca_parser(tas_priv, fmw);
+ ret = tasdevice_rca_parser(tas_priv, fmw);
if (ret)
goto out;
/* Add control one time only. */
- tas_hda->prof_ctl = snd_ctl_new1(&tas2781_prof_ctrl[tas_priv->index],
- tas_priv);
- ret = snd_ctl_add(codec->card, tas_hda->prof_ctl);
- if (ret) {
- dev_err(tas_priv->dev, "Failed to add KControl %s = %d\n",
- tas2781_prof_ctrl[tas_priv->index].name, ret);
+ ret = tas2781_hda_spi_prf_ctl(tas_hda);
+ if (ret)
+ goto out;
+
+ ret = tas2781_hda_spi_snd_ctls(tas_hda);
+ if (ret)
goto out;
- }
- j = tas_priv->index * ARRAY_SIZE(tas2781_snd_controls) / 2;
- for (i = 0; i < 3; i++) {
- tas_hda->snd_ctls[i] = snd_ctl_new1(&tas2781_snd_controls[i+j],
- tas_priv);
- ret = snd_ctl_add(codec->card, tas_hda->snd_ctls[i]);
- if (ret) {
- dev_err(tas_priv->dev,
- "Failed to add KControl %s = %d\n",
- tas2781_snd_controls[i+tas_priv->index*3].name,
- ret);
- goto out;
- }
- }
- tasdevice_spi_dsp_remove(tas_priv);
+ tasdevice_dsp_remove(tas_priv);
tas_priv->fw_state = TASDEVICE_DSP_FW_PENDING;
- scnprintf(tas_priv->coef_binaryname, 64, "TAS2XXX%08X-%01d.bin",
- codec->core.subsystem_id, tas_priv->index);
- ret = tasdevice_spi_dsp_parser(tas_priv);
+ scnprintf(tas_priv->coef_binaryname, 64, "TAS2XXX%04X-%01d.bin",
+ lower_16_bits(codec->core.subsystem_id), tas_priv->index);
+ ret = tasdevice_dsp_parser(tas_priv);
if (ret) {
dev_err(tas_priv->dev, "dspfw load %s error\n",
tas_priv->coef_binaryname);
@@ -954,54 +664,38 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context)
goto out;
}
- /* Add control one time only. */
- tas_hda->dsp_prog_ctl =
- snd_ctl_new1(&tas2781_dsp_prog_ctrl[tas_priv->index],
- tas_priv);
- ret = snd_ctl_add(codec->card, tas_hda->dsp_prog_ctl);
- if (ret) {
- dev_err(tas_priv->dev,
- "Failed to add KControl %s = %d\n",
- tas2781_dsp_prog_ctrl[tas_priv->index].name, ret);
- goto out;
- }
-
- tas_hda->dsp_conf_ctl =
- snd_ctl_new1(&tas2781_dsp_conf_ctrl[tas_priv->index],
- tas_priv);
- ret = snd_ctl_add(codec->card, tas_hda->dsp_conf_ctl);
- if (ret) {
- dev_err(tas_priv->dev, "Failed to add KControl %s = %d\n",
- tas2781_dsp_conf_ctrl[tas_priv->index].name, ret);
+ ret = tas2781_hda_spi_dsp_ctls(tas_hda);
+ if (ret)
goto out;
- }
-
/* Perform AMP reset before firmware download. */
- tas_priv->rcabin.profile_cfg_id = TAS2781_PRE_POST_RESET_CFG;
tas2781_spi_reset(tas_priv);
tas_priv->rcabin.profile_cfg_id = 0;
tas_priv->fw_state = TASDEVICE_DSP_FW_ALL_OK;
- ret = tasdevice_spi_dev_read(tas_priv, TAS2781_REG_CLK_CONFIG, &val);
+ ret = tasdevice_spi_dev_read(tas_priv, tas_priv->index,
+ TAS2781_REG_CLK_CONFIG, &val);
if (ret < 0)
goto out;
- if (val == TAS2781_REG_CLK_CONFIG_RESET)
- ret = tasdevice_spi_prmg_load(tas_priv, 0);
- if (ret < 0) {
- dev_err(tas_priv->dev, "FW download failed = %d\n", ret);
- goto out;
+ if (val == TAS2781_REG_CLK_CONFIG_RESET) {
+ ret = tasdevice_prmg_load(tas_priv, 0);
+ if (ret < 0) {
+ dev_err(tas_priv->dev, "FW download failed = %d\n",
+ ret);
+ goto out;
+ }
+ tas_priv->fw_state = TASDEVICE_DSP_FW_ALL_OK;
}
if (tas_priv->fmw->nr_programs > 0)
- tas_priv->cur_prog = 0;
+ tas_priv->tasdevice[tas_priv->index].cur_prog = 0;
if (tas_priv->fmw->nr_configurations > 0)
- tas_priv->cur_conf = 0;
+ tas_priv->tasdevice[tas_priv->index].cur_conf = 0;
/*
* If calibrated data occurs error, dsp will still works with default
* calibrated data inside algo.
*/
-
+ tas2781_save_calibration(tas_hda);
out:
release_firmware(fmw);
pm_runtime_mark_last_busy(tas_hda->priv->dev);
@@ -1048,9 +742,10 @@ static void tas2781_hda_unbind(struct device *dev, struct device *master,
{
struct tas2781_hda *tas_hda = dev_get_drvdata(dev);
struct hda_component_parent *parent = master_data;
+ struct tasdevice_priv *tas_priv = tas_hda->priv;
struct hda_component *comp;
- comp = hda_component_from_index(parent, tas_hda->priv->index);
+ comp = hda_component_from_index(parent, tas_priv->index);
if (comp && (comp->dev == dev)) {
comp->dev = NULL;
memset(comp->name, 0, sizeof(comp->name));
@@ -1059,8 +754,8 @@ static void tas2781_hda_unbind(struct device *dev, struct device *master,
tas2781_hda_remove_controls(tas_hda);
- tasdevice_spi_config_info_remove(tas_hda->priv);
- tasdevice_spi_dsp_remove(tas_hda->priv);
+ tasdevice_config_info_remove(tas_priv);
+ tasdevice_dsp_remove(tas_priv);
tas_hda->priv->fw_state = TASDEVICE_DSP_FW_PENDING;
}
@@ -1070,22 +765,9 @@ static const struct component_ops tas2781_hda_comp_ops = {
.unbind = tas2781_hda_unbind,
};
-static void tas2781_hda_remove(struct device *dev)
-{
- struct tas2781_hda *tas_hda = dev_get_drvdata(dev);
-
- component_del(tas_hda->priv->dev, &tas2781_hda_comp_ops);
-
- pm_runtime_get_sync(tas_hda->priv->dev);
- pm_runtime_disable(tas_hda->priv->dev);
-
- pm_runtime_put_noidle(tas_hda->priv->dev);
-
- mutex_destroy(&tas_hda->priv->codec_lock);
-}
-
static int tas2781_hda_spi_probe(struct spi_device *spi)
{
+ struct tas2781_hda_spi_priv *hda_priv;
struct tasdevice_priv *tas_priv;
struct tas2781_hda *tas_hda;
const char *device_name;
@@ -1095,6 +777,11 @@ static int tas2781_hda_spi_probe(struct spi_device *spi)
if (!tas_hda)
return -ENOMEM;
+ hda_priv = devm_kzalloc(&spi->dev, sizeof(*hda_priv), GFP_KERNEL);
+ if (!hda_priv)
+ return -ENOMEM;
+
+ tas_hda->hda_priv = hda_priv;
spi->max_speed_hz = TAS2781_SPI_MAX_FREQ;
tas_priv = devm_kzalloc(&spi->dev, sizeof(*tas_priv), GFP_KERNEL);
@@ -1111,8 +798,6 @@ static int tas2781_hda_spi_probe(struct spi_device *spi)
}
if (strstr(dev_name(&spi->dev), "TXNW2781")) {
device_name = "TXNW2781";
- tas_priv->save_calibration = tas2781_save_calibration;
- tas_priv->apply_calibration = tas2781_apply_calib;
} else {
dev_err(tas_priv->dev, "Unmatched spi dev %s\n",
dev_name(&spi->dev));
@@ -1125,16 +810,10 @@ static int tas2781_hda_spi_probe(struct spi_device *spi)
spi_get_chipselect(spi, 0));
if (ret)
return dev_err_probe(tas_priv->dev, ret,
- "Platform not supported\n");
+ "Platform not supported\n");
tasdevice_spi_init(tas_priv);
- ret = component_add(tas_priv->dev, &tas2781_hda_comp_ops);
- if (ret) {
- dev_err(tas_priv->dev, "Register component fail: %d\n", ret);
- return ret;
- }
-
pm_runtime_set_autosuspend_delay(tas_priv->dev, 3000);
pm_runtime_use_autosuspend(tas_priv->dev);
pm_runtime_mark_last_busy(tas_priv->dev);
@@ -1144,25 +823,34 @@ static int tas2781_hda_spi_probe(struct spi_device *spi)
pm_runtime_put_autosuspend(tas_priv->dev);
- return 0;
+ ret = component_add(tas_priv->dev, &tas2781_hda_comp_ops);
+ if (ret) {
+ dev_err(tas_priv->dev, "Register component fail: %d\n", ret);
+ pm_runtime_disable(tas_priv->dev);
+ tas2781_hda_remove(&spi->dev, &tas2781_hda_comp_ops);
+ }
+
+ return ret;
}
static void tas2781_hda_spi_remove(struct spi_device *spi)
{
- tas2781_hda_remove(&spi->dev);
+ tas2781_hda_remove(&spi->dev, &tas2781_hda_comp_ops);
}
static int tas2781_runtime_suspend(struct device *dev)
{
struct tas2781_hda *tas_hda = dev_get_drvdata(dev);
+ struct tasdevice_priv *tas_priv = tas_hda->priv;
- guard(mutex)(&tas_hda->priv->codec_lock);
+ guard(mutex)(&tas_priv->codec_lock);
- if (tas_hda->priv->playback_started)
- tasdevice_spi_tuning_switch(tas_hda->priv, 1);
+ if (tas_priv->fw_state == TASDEVICE_DSP_FW_ALL_OK
+ && tas_priv->playback_started)
+ tasdevice_tuning_switch(tas_priv, 1);
- tas_hda->priv->cur_book = -1;
- tas_hda->priv->cur_conf = -1;
+ tas_priv->tasdevice[tas_priv->index].cur_book = -1;
+ tas_priv->tasdevice[tas_priv->index].cur_conf = -1;
return 0;
}
@@ -1170,11 +858,13 @@ static int tas2781_runtime_suspend(struct device *dev)
static int tas2781_runtime_resume(struct device *dev)
{
struct tas2781_hda *tas_hda = dev_get_drvdata(dev);
+ struct tasdevice_priv *tas_priv = tas_hda->priv;
- guard(mutex)(&tas_hda->priv->codec_lock);
+ guard(mutex)(&tas_priv->codec_lock);
- if (tas_hda->priv->playback_started)
- tasdevice_spi_tuning_switch(tas_hda->priv, 0);
+ if (tas_priv->fw_state == TASDEVICE_DSP_FW_ALL_OK
+ && tas_priv->playback_started)
+ tasdevice_tuning_switch(tas_priv, 0);
return 0;
}
@@ -1182,6 +872,7 @@ static int tas2781_runtime_resume(struct device *dev)
static int tas2781_system_suspend(struct device *dev)
{
struct tas2781_hda *tas_hda = dev_get_drvdata(dev);
+ struct tasdevice_priv *tas_priv = tas_hda->priv;
int ret;
ret = pm_runtime_force_suspend(dev);
@@ -1189,8 +880,9 @@ static int tas2781_system_suspend(struct device *dev)
return ret;
/* Shutdown chip before system suspend */
- if (tas_hda->priv->playback_started)
- tasdevice_spi_tuning_switch(tas_hda->priv, 1);
+ if (tas_priv->fw_state == TASDEVICE_DSP_FW_ALL_OK
+ && tas_priv->playback_started)
+ tasdevice_tuning_switch(tas_priv, 1);
return 0;
}
@@ -1198,32 +890,34 @@ static int tas2781_system_suspend(struct device *dev)
static int tas2781_system_resume(struct device *dev)
{
struct tas2781_hda *tas_hda = dev_get_drvdata(dev);
+ struct tasdevice_priv *tas_priv = tas_hda->priv;
int ret, val;
ret = pm_runtime_force_resume(dev);
if (ret)
return ret;
- guard(mutex)(&tas_hda->priv->codec_lock);
- ret = tasdevice_spi_dev_read(tas_hda->priv, TAS2781_REG_CLK_CONFIG,
- &val);
+ guard(mutex)(&tas_priv->codec_lock);
+ ret = tas_priv->dev_read(tas_priv, tas_priv->index,
+ TAS2781_REG_CLK_CONFIG, &val);
if (ret < 0)
return ret;
if (val == TAS2781_REG_CLK_CONFIG_RESET) {
- tas_hda->priv->cur_book = -1;
- tas_hda->priv->cur_conf = -1;
- tas_hda->priv->cur_prog = -1;
+ tas_priv->tasdevice[tas_priv->index].cur_book = -1;
+ tas_priv->tasdevice[tas_priv->index].cur_conf = -1;
+ tas_priv->tasdevice[tas_priv->index].cur_prog = -1;
- ret = tasdevice_spi_prmg_load(tas_hda->priv, 0);
+ ret = tasdevice_prmg_load(tas_priv, 0);
if (ret < 0) {
- dev_err(tas_hda->priv->dev,
+ dev_err(tas_priv->dev,
"FW download failed = %d\n", ret);
return ret;
}
+ tas_priv->fw_state = TASDEVICE_DSP_FW_ALL_OK;
- if (tas_hda->priv->playback_started)
- tasdevice_spi_tuning_switch(tas_hda->priv, 0);
+ if (tas_priv->playback_started)
+ tasdevice_tuning_switch(tas_priv, 0);
}
return ret;
@@ -1260,3 +954,5 @@ module_spi_driver(tas2781_hda_spi_driver);
MODULE_DESCRIPTION("TAS2781 HDA SPI Driver");
MODULE_AUTHOR("Baojun, Xu, <baojun.xug@ti.com>");
MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS("SND_SOC_TAS2781_FMWLIB");
+MODULE_IMPORT_NS("SND_HDA_SCODEC_TAS2781");
diff --git a/sound/pci/hda/tas2781_spi_fwlib.c b/sound/pci/hda/tas2781_spi_fwlib.c
deleted file mode 100644
index d90d022d8449..000000000000
--- a/sound/pci/hda/tas2781_spi_fwlib.c
+++ /dev/null
@@ -1,2006 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-//
-// TAS2781 HDA SPI driver
-//
-// Copyright 2024 - 2025 Texas Instruments, Inc.
-//
-// Author: Baojun Xu <baojun.xu@ti.com>
-
-#include <linux/crc8.h>
-#include <linux/firmware.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/unaligned.h>
-#include <sound/pcm_params.h>
-#include <sound/soc.h>
-#include <sound/tas2781-dsp.h>
-#include <sound/tlv.h>
-
-#include "tas2781-spi.h"
-
-#define OFFSET_ERROR_BIT BIT(31)
-
-#define ERROR_PRAM_CRCCHK 0x0000000
-#define ERROR_YRAM_CRCCHK 0x0000001
-#define PPC_DRIVER_CRCCHK 0x00000200
-
-#define TAS2781_SA_COEFF_SWAP_REG TASDEVICE_REG(0, 0x35, 0x2c)
-#define TAS2781_YRAM_BOOK1 140
-#define TAS2781_YRAM1_PAGE 42
-#define TAS2781_YRAM1_START_REG 88
-
-#define TAS2781_YRAM2_START_PAGE 43
-#define TAS2781_YRAM2_END_PAGE 49
-#define TAS2781_YRAM2_START_REG 8
-#define TAS2781_YRAM2_END_REG 127
-
-#define TAS2781_YRAM3_PAGE 50
-#define TAS2781_YRAM3_START_REG 8
-#define TAS2781_YRAM3_END_REG 27
-
-/* should not include B0_P53_R44-R47 */
-#define TAS2781_YRAM_BOOK2 0
-#define TAS2781_YRAM4_START_PAGE 50
-#define TAS2781_YRAM4_END_PAGE 60
-
-#define TAS2781_YRAM5_PAGE 61
-#define TAS2781_YRAM5_START_REG TAS2781_YRAM3_START_REG
-#define TAS2781_YRAM5_END_REG TAS2781_YRAM3_END_REG
-
-#define TASDEVICE_MAXPROGRAM_NUM_KERNEL 5
-#define TASDEVICE_MAXCONFIG_NUM_KERNEL_MULTIPLE_AMPS 64
-#define TASDEVICE_MAXCONFIG_NUM_KERNEL 10
-#define MAIN_ALL_DEVICES_1X 0x01
-#define MAIN_DEVICE_A_1X 0x02
-#define MAIN_DEVICE_B_1X 0x03
-#define MAIN_DEVICE_C_1X 0x04
-#define MAIN_DEVICE_D_1X 0x05
-#define COEFF_DEVICE_A_1X 0x12
-#define COEFF_DEVICE_B_1X 0x13
-#define COEFF_DEVICE_C_1X 0x14
-#define COEFF_DEVICE_D_1X 0x15
-#define PRE_DEVICE_A_1X 0x22
-#define PRE_DEVICE_B_1X 0x23
-#define PRE_DEVICE_C_1X 0x24
-#define PRE_DEVICE_D_1X 0x25
-#define PRE_SOFTWARE_RESET_DEVICE_A 0x41
-#define PRE_SOFTWARE_RESET_DEVICE_B 0x42
-#define PRE_SOFTWARE_RESET_DEVICE_C 0x43
-#define PRE_SOFTWARE_RESET_DEVICE_D 0x44
-#define POST_SOFTWARE_RESET_DEVICE_A 0x45
-#define POST_SOFTWARE_RESET_DEVICE_B 0x46
-#define POST_SOFTWARE_RESET_DEVICE_C 0x47
-#define POST_SOFTWARE_RESET_DEVICE_D 0x48
-
-struct tas_crc {
- unsigned char offset;
- unsigned char len;
-};
-
-struct blktyp_devidx_map {
- unsigned char blktyp;
- unsigned char dev_idx;
-};
-
-/* fixed m68k compiling issue: mapping table can save code field */
-static const struct blktyp_devidx_map ppc3_tas2781_mapping_table[] = {
- { MAIN_ALL_DEVICES_1X, 0x80 },
- { MAIN_DEVICE_A_1X, 0x81 },
- { COEFF_DEVICE_A_1X, 0x81 },
- { PRE_DEVICE_A_1X, 0x81 },
- { PRE_SOFTWARE_RESET_DEVICE_A, 0xC1 },
- { POST_SOFTWARE_RESET_DEVICE_A, 0xC1 },
- { MAIN_DEVICE_B_1X, 0x82 },
- { COEFF_DEVICE_B_1X, 0x82 },
- { PRE_DEVICE_B_1X, 0x82 },
- { PRE_SOFTWARE_RESET_DEVICE_B, 0xC2 },
- { POST_SOFTWARE_RESET_DEVICE_B, 0xC2 },
- { MAIN_DEVICE_C_1X, 0x83 },
- { COEFF_DEVICE_C_1X, 0x83 },
- { PRE_DEVICE_C_1X, 0x83 },
- { PRE_SOFTWARE_RESET_DEVICE_C, 0xC3 },
- { POST_SOFTWARE_RESET_DEVICE_C, 0xC3 },
- { MAIN_DEVICE_D_1X, 0x84 },
- { COEFF_DEVICE_D_1X, 0x84 },
- { PRE_DEVICE_D_1X, 0x84 },
- { PRE_SOFTWARE_RESET_DEVICE_D, 0xC4 },
- { POST_SOFTWARE_RESET_DEVICE_D, 0xC4 },
-};
-
-static const struct blktyp_devidx_map ppc3_mapping_table[] = {
- { MAIN_ALL_DEVICES_1X, 0x80 },
- { MAIN_DEVICE_A_1X, 0x81 },
- { COEFF_DEVICE_A_1X, 0xC1 },
- { PRE_DEVICE_A_1X, 0xC1 },
- { MAIN_DEVICE_B_1X, 0x82 },
- { COEFF_DEVICE_B_1X, 0xC2 },
- { PRE_DEVICE_B_1X, 0xC2 },
- { MAIN_DEVICE_C_1X, 0x83 },
- { COEFF_DEVICE_C_1X, 0xC3 },
- { PRE_DEVICE_C_1X, 0xC3 },
- { MAIN_DEVICE_D_1X, 0x84 },
- { COEFF_DEVICE_D_1X, 0xC4 },
- { PRE_DEVICE_D_1X, 0xC4 },
-};
-
-static const struct blktyp_devidx_map non_ppc3_mapping_table[] = {
- { MAIN_ALL_DEVICES, 0x80 },
- { MAIN_DEVICE_A, 0x81 },
- { COEFF_DEVICE_A, 0xC1 },
- { PRE_DEVICE_A, 0xC1 },
- { MAIN_DEVICE_B, 0x82 },
- { COEFF_DEVICE_B, 0xC2 },
- { PRE_DEVICE_B, 0xC2 },
- { MAIN_DEVICE_C, 0x83 },
- { COEFF_DEVICE_C, 0xC3 },
- { PRE_DEVICE_C, 0xC3 },
- { MAIN_DEVICE_D, 0x84 },
- { COEFF_DEVICE_D, 0xC4 },
- { PRE_DEVICE_D, 0xC4 },
-};
-
-/*
- * Device support different configurations for different scene,
- * like voice, music, calibration, was write in regbin file.
- * Will be stored into tas_priv after regbin was loaded.
- */
-static struct tasdevice_config_info *tasdevice_add_config(
- struct tasdevice_priv *tas_priv, unsigned char *config_data,
- unsigned int config_size, int *status)
-{
- struct tasdevice_config_info *cfg_info;
- struct tasdev_blk_data **bk_da;
- unsigned int config_offset = 0;
- unsigned int i;
-
- /*
- * In most projects are many audio cases, such as music, handfree,
- * receiver, games, audio-to-haptics, PMIC record, bypass mode,
- * portrait, landscape, etc. Even in multiple audios, one or
- * two of the chips will work for the special case, such as
- * ultrasonic application. In order to support these variable-numbers
- * of audio cases, flexible configs have been introduced in the
- * DSP firmware.
- */
- cfg_info = kzalloc(sizeof(*cfg_info), GFP_KERNEL);
- if (!cfg_info) {
- *status = -ENOMEM;
- return NULL;
- }
-
- if (tas_priv->rcabin.fw_hdr.binary_version_num >= 0x105) {
- if ((config_offset + 64) > config_size) {
- *status = -EINVAL;
- dev_err(tas_priv->dev, "add conf: Out of boundary\n");
- goto config_err;
- }
- config_offset += 64;
- }
-
- if ((config_offset + 4) > config_size) {
- *status = -EINVAL;
- dev_err(tas_priv->dev, "add config: Out of boundary\n");
- goto config_err;
- }
-
- /*
- * convert data[offset], data[offset + 1], data[offset + 2] and
- * data[offset + 3] into host
- */
- cfg_info->nblocks = get_unaligned_be32(&config_data[config_offset]);
- config_offset += 4;
-
- /*
- * Several kinds of dsp/algorithm firmwares can run on tas2781,
- * the number and size of blk are not fixed and different among
- * these firmwares.
- */
- bk_da = cfg_info->blk_data = kcalloc(cfg_info->nblocks,
- sizeof(*bk_da), GFP_KERNEL);
- if (!bk_da) {
- *status = -ENOMEM;
- goto config_err;
- }
- cfg_info->real_nblocks = 0;
- for (i = 0; i < cfg_info->nblocks; i++) {
- if (config_offset + 12 > config_size) {
- *status = -EINVAL;
- dev_err(tas_priv->dev,
- "%s: Out of boundary: i = %d nblocks = %u!\n",
- __func__, i, cfg_info->nblocks);
- goto block_err;
- }
- bk_da[i] = kzalloc(sizeof(*bk_da[i]), GFP_KERNEL);
- if (!bk_da[i]) {
- *status = -ENOMEM;
- goto block_err;
- }
-
- bk_da[i]->dev_idx = config_data[config_offset];
- config_offset++;
-
- bk_da[i]->block_type = config_data[config_offset];
- config_offset++;
-
- bk_da[i]->yram_checksum =
- get_unaligned_be16(&config_data[config_offset]);
- config_offset += 2;
- bk_da[i]->block_size =
- get_unaligned_be32(&config_data[config_offset]);
- config_offset += 4;
-
- bk_da[i]->n_subblks =
- get_unaligned_be32(&config_data[config_offset]);
-
- config_offset += 4;
-
- if (config_offset + bk_da[i]->block_size > config_size) {
- *status = -EINVAL;
- dev_err(tas_priv->dev,
- "%s: Out of boundary: i = %d blks = %u!\n",
- __func__, i, cfg_info->nblocks);
- goto block_err;
- }
- /* instead of kzalloc+memcpy */
- bk_da[i]->regdata = kmemdup(&config_data[config_offset],
- bk_da[i]->block_size, GFP_KERNEL);
- if (!bk_da[i]->regdata) {
- *status = -ENOMEM;
- i++;
- goto block_err;
- }
-
- config_offset += bk_da[i]->block_size;
- cfg_info->real_nblocks += 1;
- }
-
- return cfg_info;
-block_err:
- for (int j = 0; j < i; j++)
- kfree(bk_da[j]);
- kfree(bk_da);
-config_err:
- kfree(cfg_info);
- return NULL;
-}
-
-/* Regbin file parser function. */
-int tasdevice_spi_rca_parser(void *context, const struct firmware *fmw)
-{
- struct tasdevice_priv *tas_priv = context;
- struct tasdevice_config_info **cfg_info;
- struct tasdevice_rca_hdr *fw_hdr;
- struct tasdevice_rca *rca;
- unsigned int total_config_sz = 0;
- int offset = 0, ret = 0, i;
- unsigned char *buf;
-
- rca = &tas_priv->rcabin;
- fw_hdr = &rca->fw_hdr;
- if (!fmw || !fmw->data) {
- dev_err(tas_priv->dev, "Failed to read %s\n",
- tas_priv->rca_binaryname);
- tas_priv->fw_state = TASDEVICE_DSP_FW_FAIL;
- return -EINVAL;
- }
- buf = (unsigned char *)fmw->data;
- fw_hdr->img_sz = get_unaligned_be32(&buf[offset]);
- offset += 4;
- if (fw_hdr->img_sz != fmw->size) {
- dev_err(tas_priv->dev,
- "File size not match, %d %u", (int)fmw->size,
- fw_hdr->img_sz);
- tas_priv->fw_state = TASDEVICE_DSP_FW_FAIL;
- return -EINVAL;
- }
-
- fw_hdr->checksum = get_unaligned_be32(&buf[offset]);
- offset += 4;
- fw_hdr->binary_version_num = get_unaligned_be32(&buf[offset]);
- if (fw_hdr->binary_version_num < 0x103) {
- dev_err(tas_priv->dev, "File version 0x%04x is too low",
- fw_hdr->binary_version_num);
- tas_priv->fw_state = TASDEVICE_DSP_FW_FAIL;
- return -EINVAL;
- }
- offset += 4;
- fw_hdr->drv_fw_version = get_unaligned_be32(&buf[offset]);
- offset += 8;
- fw_hdr->plat_type = buf[offset++];
- fw_hdr->dev_family = buf[offset++];
- fw_hdr->reserve = buf[offset++];
- fw_hdr->ndev = buf[offset++];
- if (offset + TASDEVICE_DEVICE_SUM > fw_hdr->img_sz) {
- dev_err(tas_priv->dev, "rca_ready: Out of boundary!\n");
- tas_priv->fw_state = TASDEVICE_DSP_FW_FAIL;
- return -EINVAL;
- }
-
- for (i = 0; i < TASDEVICE_DEVICE_SUM; i++, offset++)
- fw_hdr->devs[i] = buf[offset];
-
- fw_hdr->nconfig = get_unaligned_be32(&buf[offset]);
- offset += 4;
-
- for (i = 0; i < TASDEVICE_CONFIG_SUM; i++) {
- fw_hdr->config_size[i] = get_unaligned_be32(&buf[offset]);
- offset += 4;
- total_config_sz += fw_hdr->config_size[i];
- }
-
- if (fw_hdr->img_sz - total_config_sz != (unsigned int)offset) {
- dev_err(tas_priv->dev, "Bin file err %d - %d != %d!\n",
- fw_hdr->img_sz, total_config_sz, (int)offset);
- tas_priv->fw_state = TASDEVICE_DSP_FW_FAIL;
- return -EINVAL;
- }
-
- cfg_info = kcalloc(fw_hdr->nconfig, sizeof(*cfg_info), GFP_KERNEL);
- if (!cfg_info) {
- tas_priv->fw_state = TASDEVICE_DSP_FW_FAIL;
- return -ENOMEM;
- }
- rca->cfg_info = cfg_info;
- rca->ncfgs = 0;
- for (i = 0; i < (int)fw_hdr->nconfig; i++) {
- rca->ncfgs += 1;
- cfg_info[i] = tasdevice_add_config(tas_priv, &buf[offset],
- fw_hdr->config_size[i], &ret);
- if (ret) {
- tas_priv->fw_state = TASDEVICE_DSP_FW_FAIL;
- return ret;
- }
- offset += (int)fw_hdr->config_size[i];
- }
-
- return ret;
-}
-
-/* fixed m68k compiling issue: mapping table can save code field */
-static unsigned char map_dev_idx(struct tasdevice_fw *tas_fmw,
- struct tasdev_blk *block)
-{
- struct blktyp_devidx_map *p =
- (struct blktyp_devidx_map *)non_ppc3_mapping_table;
- struct tasdevice_dspfw_hdr *fw_hdr = &tas_fmw->fw_hdr;
- struct tasdevice_fw_fixed_hdr *fw_fixed_hdr = &fw_hdr->fixed_hdr;
- int i, n = ARRAY_SIZE(non_ppc3_mapping_table);
- unsigned char dev_idx = 0;
-
- if (fw_fixed_hdr->ppcver >= PPC3_VERSION_TAS2781_BASIC_MIN) {
- p = (struct blktyp_devidx_map *)ppc3_tas2781_mapping_table;
- n = ARRAY_SIZE(ppc3_tas2781_mapping_table);
- } else if (fw_fixed_hdr->ppcver >= PPC3_VERSION_BASE) {
- p = (struct blktyp_devidx_map *)ppc3_mapping_table;
- n = ARRAY_SIZE(ppc3_mapping_table);
- }
-
- for (i = 0; i < n; i++) {
- if (block->type == p[i].blktyp) {
- dev_idx = p[i].dev_idx;
- break;
- }
- }
-
- return dev_idx;
-}
-
-/* Block parser function. */
-static int fw_parse_block_data_kernel(struct tasdevice_fw *tas_fmw,
- struct tasdev_blk *block, const struct firmware *fmw, int offset)
-{
- const unsigned char *data = fmw->data;
-
- if (offset + 16 > fmw->size) {
- dev_err(tas_fmw->dev, "%s: File Size error\n", __func__);
- return -EINVAL;
- }
-
- /*
- * Convert data[offset], data[offset + 1], data[offset + 2] and
- * data[offset + 3] into host.
- */
- block->type = get_unaligned_be32(&data[offset]);
- offset += 4;
-
- block->is_pchksum_present = data[offset++];
- block->pchksum = data[offset++];
- block->is_ychksum_present = data[offset++];
- block->ychksum = data[offset++];
- block->blk_size = get_unaligned_be32(&data[offset]);
- offset += 4;
- block->nr_subblocks = get_unaligned_be32(&data[offset]);
- offset += 4;
-
- /*
- * Fixed m68k compiling issue:
- * 1. mapping table can save code field.
- * 2. storing the dev_idx as a member of block can reduce unnecessary
- * time and system resource comsumption of dev_idx mapping every
- * time the block data writing to the dsp.
- */
- block->dev_idx = map_dev_idx(tas_fmw, block);
-
- if (offset + block->blk_size > fmw->size) {
- dev_err(tas_fmw->dev, "%s: nSublocks error\n", __func__);
- return -EINVAL;
- }
- /* instead of kzalloc+memcpy */
- block->data = kmemdup(&data[offset], block->blk_size, GFP_KERNEL);
- if (!block->data)
- return -ENOMEM;
-
- offset += block->blk_size;
-
- return offset;
-}
-
-/* Data of block parser function. */
-static int fw_parse_data_kernel(struct tasdevice_fw *tas_fmw,
- struct tasdevice_data *img_data, const struct firmware *fmw,
- int offset)
-{
- const unsigned char *data = fmw->data;
- struct tasdev_blk *blk;
- unsigned int i;
-
- if (offset + 4 > fmw->size) {
- dev_err(tas_fmw->dev, "%s: File Size error\n", __func__);
- return -EINVAL;
- }
- img_data->nr_blk = get_unaligned_be32(&data[offset]);
- offset += 4;
-
- img_data->dev_blks = kcalloc(img_data->nr_blk,
- sizeof(struct tasdev_blk), GFP_KERNEL);
- if (!img_data->dev_blks)
- return -ENOMEM;
-
- for (i = 0; i < img_data->nr_blk; i++) {
- blk = &img_data->dev_blks[i];
- offset = fw_parse_block_data_kernel(
- tas_fmw, blk, fmw, offset);
- if (offset < 0) {
- kfree(img_data->dev_blks);
- return -EINVAL;
- }
- }
-
- return offset;
-}
-
-/* Data of DSP program parser function. */
-static int fw_parse_program_data_kernel(
- struct tasdevice_priv *tas_priv, struct tasdevice_fw *tas_fmw,
- const struct firmware *fmw, int offset)
-{
- struct tasdevice_prog *program;
- unsigned int i;
-
- for (i = 0; i < tas_fmw->nr_programs; i++) {
- program = &tas_fmw->programs[i];
- if (offset + 72 > fmw->size) {
- dev_err(tas_priv->dev, "%s: mpName error\n", __func__);
- return -EINVAL;
- }
- /* skip 72 unused byts */
- offset += 72;
-
- offset = fw_parse_data_kernel(tas_fmw, &program->dev_data,
- fmw, offset);
- if (offset < 0)
- break;
- }
-
- return offset;
-}
-
-/* Data of DSP configurations parser function. */
-static int fw_parse_configuration_data_kernel(struct tasdevice_priv *tas_priv,
- struct tasdevice_fw *tas_fmw, const struct firmware *fmw, int offset)
-{
- const unsigned char *data = fmw->data;
- struct tasdevice_config *config;
- unsigned int i;
-
- for (i = 0; i < tas_fmw->nr_configurations; i++) {
- config = &tas_fmw->configs[i];
- if (offset + 80 > fmw->size) {
- dev_err(tas_priv->dev, "%s: mpName error\n", __func__);
- return -EINVAL;
- }
- memcpy(config->name, &data[offset], 64);
- /* skip extra 16 bytes */
- offset += 80;
-
- offset = fw_parse_data_kernel(tas_fmw, &config->dev_data,
- fmw, offset);
- if (offset < 0)
- break;
- }
-
- return offset;
-}
-
-/* DSP firmware file header parser function for early PPC3 firmware binary. */
-static int fw_parse_variable_header_kernel(struct tasdevice_priv *tas_priv,
- const struct firmware *fmw, int offset)
-{
- struct tasdevice_fw *tas_fmw = tas_priv->fmw;
- struct tasdevice_dspfw_hdr *fw_hdr = &tas_fmw->fw_hdr;
- struct tasdevice_config *config;
- struct tasdevice_prog *program;
- const unsigned char *buf = fmw->data;
- unsigned short max_confs;
- unsigned int i;
-
- if (offset + 12 + 4 * TASDEVICE_MAXPROGRAM_NUM_KERNEL > fmw->size) {
- dev_err(tas_priv->dev, "%s: File Size error\n", __func__);
- return -EINVAL;
- }
- fw_hdr->device_family = get_unaligned_be16(&buf[offset]);
- if (fw_hdr->device_family != 0) {
- dev_err(tas_priv->dev, "%s:not TAS device\n", __func__);
- return -EINVAL;
- }
- offset += 2;
- fw_hdr->device = get_unaligned_be16(&buf[offset]);
- if (fw_hdr->device >= TASDEVICE_DSP_TAS_MAX_DEVICE ||
- fw_hdr->device == 6) {
- dev_err(tas_priv->dev, "Unsupported dev %d\n", fw_hdr->device);
- return -EINVAL;
- }
- offset += 2;
-
- tas_fmw->nr_programs = get_unaligned_be32(&buf[offset]);
- offset += 4;
-
- if (tas_fmw->nr_programs == 0 ||
- tas_fmw->nr_programs > TASDEVICE_MAXPROGRAM_NUM_KERNEL) {
- dev_err(tas_priv->dev, "mnPrograms is invalid\n");
- return -EINVAL;
- }
-
- tas_fmw->programs = kcalloc(tas_fmw->nr_programs,
- sizeof(*tas_fmw->programs), GFP_KERNEL);
- if (!tas_fmw->programs)
- return -ENOMEM;
-
- for (i = 0; i < tas_fmw->nr_programs; i++) {
- program = &tas_fmw->programs[i];
- program->prog_size = get_unaligned_be32(&buf[offset]);
- offset += 4;
- }
-
- /* Skip the unused prog_size */
- offset += 4 * (TASDEVICE_MAXPROGRAM_NUM_KERNEL - tas_fmw->nr_programs);
-
- tas_fmw->nr_configurations = get_unaligned_be32(&buf[offset]);
- offset += 4;
-
- /*
- * The max number of config in firmware greater than 4 pieces of
- * tas2781s is different from the one lower than 4 pieces of
- * tas2781s.
- */
- max_confs = TASDEVICE_MAXCONFIG_NUM_KERNEL;
- if (tas_fmw->nr_configurations == 0 ||
- tas_fmw->nr_configurations > max_confs) {
- dev_err(tas_priv->dev, "%s: Conf is invalid\n", __func__);
- kfree(tas_fmw->programs);
- return -EINVAL;
- }
-
- if (offset + 4 * max_confs > fmw->size) {
- dev_err(tas_priv->dev, "%s: mpConfigurations err\n", __func__);
- kfree(tas_fmw->programs);
- return -EINVAL;
- }
-
- tas_fmw->configs = kcalloc(tas_fmw->nr_configurations,
- sizeof(*tas_fmw->configs), GFP_KERNEL);
- if (!tas_fmw->configs) {
- kfree(tas_fmw->programs);
- return -ENOMEM;
- }
-
- for (i = 0; i < tas_fmw->nr_programs; i++) {
- config = &tas_fmw->configs[i];
- config->cfg_size = get_unaligned_be32(&buf[offset]);
- offset += 4;
- }
-
- /* Skip the unused configs */
- offset += 4 * (max_confs - tas_fmw->nr_programs);
-
- return offset;
-}
-
-/*
- * In sub-block data, have three type sub-block:
- * 1. Single byte write.
- * 2. Multi-byte write.
- * 3. Delay.
- * 4. Bits update.
- * This function perform single byte write to device.
- */
-static int tasdevice_single_byte_wr(void *context, int dev_idx,
- unsigned char *data, int sublocksize)
-{
- struct tasdevice_priv *tas_priv = context;
- unsigned short len = get_unaligned_be16(&data[2]);
- int i, subblk_offset, rc;
-
- subblk_offset = 4;
- if (subblk_offset + 4 * len > sublocksize) {
- dev_err(tas_priv->dev, "process_block: Out of boundary\n");
- return 0;
- }
-
- for (i = 0; i < len; i++) {
- if (dev_idx == (tas_priv->index + 1) || dev_idx == 0) {
- rc = tasdevice_spi_dev_write(tas_priv,
- TASDEVICE_REG(data[subblk_offset],
- data[subblk_offset + 1],
- data[subblk_offset + 2]),
- data[subblk_offset + 3]);
- if (rc < 0) {
- dev_err(tas_priv->dev,
- "process_block: single write error\n");
- subblk_offset |= OFFSET_ERROR_BIT;
- }
- }
- subblk_offset += 4;
- }
-
- return subblk_offset;
-}
-
-/*
- * In sub-block data, have three type sub-block:
- * 1. Single byte write.
- * 2. Multi-byte write.
- * 3. Delay.
- * 4. Bits update.
- * This function perform multi-write to device.
- */
-static int tasdevice_burst_wr(void *context, int dev_idx, unsigned char *data,
- int sublocksize)
-{
- struct tasdevice_priv *tas_priv = context;
- unsigned short len = get_unaligned_be16(&data[2]);
- int subblk_offset, rc;
-
- subblk_offset = 4;
- if (subblk_offset + 4 + len > sublocksize) {
- dev_err(tas_priv->dev, "%s: BST Out of boundary\n", __func__);
- subblk_offset |= OFFSET_ERROR_BIT;
- }
- if (len % 4) {
- dev_err(tas_priv->dev, "%s:Bst-len(%u)not div by 4\n",
- __func__, len);
- subblk_offset |= OFFSET_ERROR_BIT;
- }
-
- if (dev_idx == (tas_priv->index + 1) || dev_idx == 0) {
- rc = tasdevice_spi_dev_bulk_write(tas_priv,
- TASDEVICE_REG(data[subblk_offset],
- data[subblk_offset + 1],
- data[subblk_offset + 2]),
- &data[subblk_offset + 4], len);
- if (rc < 0) {
- dev_err(tas_priv->dev, "%s: bulk_write error = %d\n",
- __func__, rc);
- subblk_offset |= OFFSET_ERROR_BIT;
- }
- }
- subblk_offset += (len + 4);
-
- return subblk_offset;
-}
-
-/* Just delay for ms.*/
-static int tasdevice_delay(void *context, int dev_idx, unsigned char *data,
- int sublocksize)
-{
- struct tasdevice_priv *tas_priv = context;
- unsigned int sleep_time, subblk_offset = 2;
-
- if (subblk_offset + 2 > sublocksize) {
- dev_err(tas_priv->dev, "%s: delay Out of boundary\n",
- __func__);
- subblk_offset |= OFFSET_ERROR_BIT;
- }
- if (dev_idx == (tas_priv->index + 1) || dev_idx == 0) {
- sleep_time = get_unaligned_be16(&data[2]) * 1000;
- fsleep(sleep_time);
- }
- subblk_offset += 2;
-
- return subblk_offset;
-}
-
-/*
- * In sub-block data, have three type sub-block:
- * 1. Single byte write.
- * 2. Multi-byte write.
- * 3. Delay.
- * 4. Bits update.
- * This function perform bits update.
- */
-static int tasdevice_field_wr(void *context, int dev_idx, unsigned char *data,
- int sublocksize)
-{
- struct tasdevice_priv *tas_priv = context;
- int rc, subblk_offset = 2;
-
- if (subblk_offset + 6 > sublocksize) {
- dev_err(tas_priv->dev, "%s: bit write Out of boundary\n",
- __func__);
- subblk_offset |= OFFSET_ERROR_BIT;
- }
- if (dev_idx == (tas_priv->index + 1) || dev_idx == 0) {
- rc = tasdevice_spi_dev_update_bits(tas_priv,
- TASDEVICE_REG(data[subblk_offset + 2],
- data[subblk_offset + 3],
- data[subblk_offset + 4]),
- data[subblk_offset + 1],
- data[subblk_offset + 5]);
- if (rc < 0) {
- dev_err(tas_priv->dev, "%s: update_bits error = %d\n",
- __func__, rc);
- subblk_offset |= OFFSET_ERROR_BIT;
- }
- }
- subblk_offset += 6;
-
- return subblk_offset;
-}
-
-/* Data block process function. */
-static int tasdevice_process_block(void *context, unsigned char *data,
- unsigned char dev_idx, int sublocksize)
-{
- struct tasdevice_priv *tas_priv = context;
- int blktyp = dev_idx & 0xC0, subblk_offset;
- unsigned char subblk_typ = data[1];
-
- switch (subblk_typ) {
- case TASDEVICE_CMD_SING_W:
- subblk_offset = tasdevice_single_byte_wr(tas_priv,
- dev_idx & 0x3f, data, sublocksize);
- break;
- case TASDEVICE_CMD_BURST:
- subblk_offset = tasdevice_burst_wr(tas_priv,
- dev_idx & 0x3f, data, sublocksize);
- break;
- case TASDEVICE_CMD_DELAY:
- subblk_offset = tasdevice_delay(tas_priv,
- dev_idx & 0x3f, data, sublocksize);
- break;
- case TASDEVICE_CMD_FIELD_W:
- subblk_offset = tasdevice_field_wr(tas_priv,
- dev_idx & 0x3f, data, sublocksize);
- break;
- default:
- subblk_offset = 2;
- break;
- }
- if (((subblk_offset & OFFSET_ERROR_BIT) != 0) && blktyp != 0) {
- if (blktyp == 0x80) {
- tas_priv->cur_prog = -1;
- tas_priv->cur_conf = -1;
- } else
- tas_priv->cur_conf = -1;
- }
- subblk_offset &= ~OFFSET_ERROR_BIT;
-
- return subblk_offset;
-}
-
-/*
- * Device support different configurations for different scene,
- * this function was used for choose different config.
- */
-void tasdevice_spi_select_cfg_blk(void *pContext, int conf_no,
- unsigned char block_type)
-{
- struct tasdevice_priv *tas_priv = pContext;
- struct tasdevice_rca *rca = &tas_priv->rcabin;
- struct tasdevice_config_info **cfg_info = rca->cfg_info;
- struct tasdev_blk_data **blk_data;
- unsigned int j, k;
-
- if (conf_no >= rca->ncfgs || conf_no < 0 || !cfg_info) {
- dev_err(tas_priv->dev, "conf_no should be not more than %u\n",
- rca->ncfgs);
- return;
- }
- blk_data = cfg_info[conf_no]->blk_data;
-
- for (j = 0; j < cfg_info[conf_no]->real_nblocks; j++) {
- unsigned int length = 0, rc = 0;
-
- if (block_type > 5 || block_type < 2) {
- dev_err(tas_priv->dev,
- "block_type should be in range from 2 to 5\n");
- break;
- }
- if (block_type != blk_data[j]->block_type)
- continue;
-
- for (k = 0; k < blk_data[j]->n_subblks; k++) {
- tas_priv->is_loading = true;
-
- rc = tasdevice_process_block(tas_priv,
- blk_data[j]->regdata + length,
- blk_data[j]->dev_idx,
- blk_data[j]->block_size - length);
- length += rc;
- if (blk_data[j]->block_size < length) {
- dev_err(tas_priv->dev,
- "%s: %u %u out of boundary\n",
- __func__, length,
- blk_data[j]->block_size);
- break;
- }
- }
- if (length != blk_data[j]->block_size)
- dev_err(tas_priv->dev, "%s: %u %u size is not same\n",
- __func__, length, blk_data[j]->block_size);
- }
-}
-
-/* Block process function. */
-static int tasdevice_load_block_kernel(
- struct tasdevice_priv *tasdevice, struct tasdev_blk *block)
-{
- const unsigned int blk_size = block->blk_size;
- unsigned char *data = block->data;
- unsigned int i, length;
-
- for (i = 0, length = 0; i < block->nr_subblocks; i++) {
- int rc = tasdevice_process_block(tasdevice, data + length,
- block->dev_idx, blk_size - length);
- if (rc < 0) {
- dev_err(tasdevice->dev,
- "%s: %u %u sublock write error\n",
- __func__, length, blk_size);
- return rc;
- }
- length += rc;
- if (blk_size < length) {
- dev_err(tasdevice->dev, "%s: %u %u out of boundary\n",
- __func__, length, blk_size);
- rc = -ENOMEM;
- return rc;
- }
- }
-
- return 0;
-}
-
-/* DSP firmware file header parser function. */
-static int fw_parse_variable_hdr(struct tasdevice_priv *tas_priv,
- struct tasdevice_dspfw_hdr *fw_hdr,
- const struct firmware *fmw, int offset)
-{
- const unsigned char *buf = fmw->data;
- int len = strlen((char *)&buf[offset]);
-
- len++;
-
- if (offset + len + 8 > fmw->size) {
- dev_err(tas_priv->dev, "%s: File Size error\n", __func__);
- return -EINVAL;
- }
-
- offset += len;
-
- fw_hdr->device_family = get_unaligned_be32(&buf[offset]);
- if (fw_hdr->device_family != 0) {
- dev_err(tas_priv->dev, "%s: not TAS device\n", __func__);
- return -EINVAL;
- }
- offset += 4;
-
- fw_hdr->device = get_unaligned_be32(&buf[offset]);
- if (fw_hdr->device >= TASDEVICE_DSP_TAS_MAX_DEVICE ||
- fw_hdr->device == 6) {
- dev_err(tas_priv->dev, "Unsupported dev %d\n", fw_hdr->device);
- return -EINVAL;
- }
- offset += 4;
- fw_hdr->ndev = 1;
-
- return offset;
-}
-
-/* DSP firmware file header parser function for size variabled header. */
-static int fw_parse_variable_header_git(struct tasdevice_priv
- *tas_priv, const struct firmware *fmw, int offset)
-{
- struct tasdevice_fw *tas_fmw = tas_priv->fmw;
- struct tasdevice_dspfw_hdr *fw_hdr = &tas_fmw->fw_hdr;
-
- offset = fw_parse_variable_hdr(tas_priv, fw_hdr, fmw, offset);
-
- return offset;
-}
-
-/* DSP firmware file block parser function. */
-static int fw_parse_block_data(struct tasdevice_fw *tas_fmw,
- struct tasdev_blk *block, const struct firmware *fmw, int offset)
-{
- unsigned char *data = (unsigned char *)fmw->data;
- int n;
-
- if (offset + 8 > fmw->size) {
- dev_err(tas_fmw->dev, "%s: Type error\n", __func__);
- return -EINVAL;
- }
- block->type = get_unaligned_be32(&data[offset]);
- offset += 4;
-
- if (tas_fmw->fw_hdr.fixed_hdr.drv_ver >= PPC_DRIVER_CRCCHK) {
- if (offset + 8 > fmw->size) {
- dev_err(tas_fmw->dev, "PChkSumPresent error\n");
- return -EINVAL;
- }
- block->is_pchksum_present = data[offset];
- offset++;
-
- block->pchksum = data[offset];
- offset++;
-
- block->is_ychksum_present = data[offset];
- offset++;
-
- block->ychksum = data[offset];
- offset++;
- } else {
- block->is_pchksum_present = 0;
- block->is_ychksum_present = 0;
- }
-
- block->nr_cmds = get_unaligned_be32(&data[offset]);
- offset += 4;
-
- n = block->nr_cmds * 4;
- if (offset + n > fmw->size) {
- dev_err(tas_fmw->dev,
- "%s: File Size(%lu) error offset = %d n = %d\n",
- __func__, (unsigned long)fmw->size, offset, n);
- return -EINVAL;
- }
- /* instead of kzalloc+memcpy */
- block->data = kmemdup(&data[offset], n, GFP_KERNEL);
- if (!block->data)
- return -ENOMEM;
-
- offset += n;
-
- return offset;
-}
-
-/*
- * When parsing error occurs, all the memory resource will be released
- * in the end of tasdevice_rca_ready.
- */
-static int fw_parse_data(struct tasdevice_fw *tas_fmw,
- struct tasdevice_data *img_data, const struct firmware *fmw,
- int offset)
-{
- const unsigned char *data = (unsigned char *)fmw->data;
- struct tasdev_blk *blk;
- unsigned int i, n;
-
- if (offset + 64 > fmw->size) {
- dev_err(tas_fmw->dev, "%s: Name error\n", __func__);
- return -EINVAL;
- }
- memcpy(img_data->name, &data[offset], 64);
- offset += 64;
-
- n = strlen((char *)&data[offset]);
- n++;
- if (offset + n + 2 > fmw->size) {
- dev_err(tas_fmw->dev, "%s: Description error\n", __func__);
- return -EINVAL;
- }
- offset += n;
- img_data->nr_blk = get_unaligned_be16(&data[offset]);
- offset += 2;
-
- img_data->dev_blks = kcalloc(img_data->nr_blk,
- sizeof(*img_data->dev_blks), GFP_KERNEL);
- if (!img_data->dev_blks)
- return -ENOMEM;
-
- for (i = 0; i < img_data->nr_blk; i++) {
- blk = &img_data->dev_blks[i];
- offset = fw_parse_block_data(tas_fmw, blk, fmw, offset);
- if (offset < 0)
- return -EINVAL;
- }
-
- return offset;
-}
-
-/*
- * When parsing error occurs, all the memory resource will be released
- * in the end of tasdevice_rca_ready.
- */
-static int fw_parse_program_data(struct tasdevice_priv *tas_priv,
- struct tasdevice_fw *tas_fmw, const struct firmware *fmw, int offset)
-{
- unsigned char *buf = (unsigned char *)fmw->data;
- struct tasdevice_prog *program;
- int i;
-
- if (offset + 2 > fmw->size) {
- dev_err(tas_priv->dev, "%s: File Size error\n", __func__);
- return -EINVAL;
- }
- tas_fmw->nr_programs = get_unaligned_be16(&buf[offset]);
- offset += 2;
-
- if (tas_fmw->nr_programs == 0) {
- /* Not error in calibration Data file, return directly */
- dev_dbg(tas_priv->dev, "%s: No Programs data, maybe calbin\n",
- __func__);
- return offset;
- }
-
- tas_fmw->programs =
- kcalloc(tas_fmw->nr_programs, sizeof(*tas_fmw->programs),
- GFP_KERNEL);
- if (!tas_fmw->programs)
- return -ENOMEM;
-
- for (i = 0; i < tas_fmw->nr_programs; i++) {
- int n = 0;
-
- program = &tas_fmw->programs[i];
- if (offset + 64 > fmw->size) {
- dev_err(tas_priv->dev, "%s: mpName error\n", __func__);
- return -EINVAL;
- }
- offset += 64;
-
- n = strlen((char *)&buf[offset]);
- /* skip '\0' and 5 unused bytes */
- n += 6;
- if (offset + n > fmw->size) {
- dev_err(tas_priv->dev, "Description err\n");
- return -EINVAL;
- }
-
- offset += n;
-
- offset = fw_parse_data(tas_fmw, &program->dev_data, fmw,
- offset);
- if (offset < 0)
- return offset;
- }
-
- return offset;
-}
-
-/*
- * When parsing error occurs, all the memory resource will be released
- * in the end of tasdevice_rca_ready.
- */
-static int fw_parse_configuration_data(struct tasdevice_priv *tas_priv,
- struct tasdevice_fw *tas_fmw, const struct firmware *fmw, int offset)
-{
- unsigned char *data = (unsigned char *)fmw->data;
- struct tasdevice_config *config;
- unsigned int i, n;
-
- if (offset + 2 > fmw->size) {
- dev_err(tas_priv->dev, "%s: File Size error\n", __func__);
- return -EINVAL;
- }
- tas_fmw->nr_configurations = get_unaligned_be16(&data[offset]);
- offset += 2;
-
- if (tas_fmw->nr_configurations == 0) {
- dev_err(tas_priv->dev, "%s: Conf is zero\n", __func__);
- /* Not error for calibration Data file, return directly */
- return offset;
- }
- tas_fmw->configs = kcalloc(tas_fmw->nr_configurations,
- sizeof(*tas_fmw->configs), GFP_KERNEL);
- if (!tas_fmw->configs)
- return -ENOMEM;
- for (i = 0; i < tas_fmw->nr_configurations; i++) {
- config = &tas_fmw->configs[i];
- if (offset + 64 > fmw->size) {
- dev_err(tas_priv->dev, "File Size err\n");
- return -EINVAL;
- }
- memcpy(config->name, &data[offset], 64);
- offset += 64;
-
- n = strlen((char *)&data[offset]);
- n += 15;
- if (offset + n > fmw->size) {
- dev_err(tas_priv->dev, "Description err\n");
- return -EINVAL;
- }
- offset += n;
- offset = fw_parse_data(tas_fmw, &config->dev_data,
- fmw, offset);
- if (offset < 0)
- break;
- }
-
- return offset;
-}
-
-/* yram5 page check. */
-static bool check_inpage_yram_rg(struct tas_crc *cd,
- unsigned char reg, unsigned char len)
-{
- bool in = false;
-
- if (reg <= TAS2781_YRAM5_END_REG &&
- reg >= TAS2781_YRAM5_START_REG) {
- if (reg + len > TAS2781_YRAM5_END_REG)
- cd->len = TAS2781_YRAM5_END_REG - reg + 1;
- else
- cd->len = len;
- cd->offset = reg;
- in = true;
- } else if (reg < TAS2781_YRAM5_START_REG) {
- if (reg + len > TAS2781_YRAM5_START_REG) {
- cd->offset = TAS2781_YRAM5_START_REG;
- cd->len = len - TAS2781_YRAM5_START_REG + reg;
- in = true;
- }
- }
-
- return in;
-}
-
-/* DSP firmware yram block check. */
-static bool check_inpage_yram_bk1(struct tas_crc *cd,
- unsigned char page, unsigned char reg, unsigned char len)
-{
- bool in = false;
-
- if (page == TAS2781_YRAM1_PAGE) {
- if (reg >= TAS2781_YRAM1_START_REG) {
- cd->offset = reg;
- cd->len = len;
- in = true;
- } else if (reg + len > TAS2781_YRAM1_START_REG) {
- cd->offset = TAS2781_YRAM1_START_REG;
- cd->len = len - TAS2781_YRAM1_START_REG + reg;
- in = true;
- }
- } else if (page == TAS2781_YRAM3_PAGE) {
- in = check_inpage_yram_rg(cd, reg, len);
- }
-
- return in;
-}
-
-/*
- * Return Code:
- * true -- the registers are in the inpage yram
- * false -- the registers are NOT in the inpage yram
- */
-static bool check_inpage_yram(struct tas_crc *cd, unsigned char book,
- unsigned char page, unsigned char reg, unsigned char len)
-{
- bool in = false;
-
- if (book == TAS2781_YRAM_BOOK1)
- in = check_inpage_yram_bk1(cd, page, reg, len);
- else if (book == TAS2781_YRAM_BOOK2 && page == TAS2781_YRAM5_PAGE)
- in = check_inpage_yram_rg(cd, reg, len);
-
- return in;
-}
-
-/* yram4 page check. */
-static bool check_inblock_yram_bk(struct tas_crc *cd,
- unsigned char page, unsigned char reg, unsigned char len)
-{
- bool in = false;
-
- if ((page >= TAS2781_YRAM4_START_PAGE &&
- page <= TAS2781_YRAM4_END_PAGE) ||
- (page >= TAS2781_YRAM2_START_PAGE &&
- page <= TAS2781_YRAM2_END_PAGE)) {
- if (reg <= TAS2781_YRAM2_END_REG &&
- reg >= TAS2781_YRAM2_START_REG) {
- cd->offset = reg;
- cd->len = len;
- in = true;
- } else if (reg < TAS2781_YRAM2_START_REG) {
- if (reg + len - 1 >= TAS2781_YRAM2_START_REG) {
- cd->offset = TAS2781_YRAM2_START_REG;
- cd->len = reg + len - TAS2781_YRAM2_START_REG;
- in = true;
- }
- }
- }
-
- return in;
-}
-
-/*
- * Return Code:
- * true -- the registers are in the inblock yram
- * false -- the registers are NOT in the inblock yram
- */
-static bool check_inblock_yram(struct tas_crc *cd, unsigned char book,
- unsigned char page, unsigned char reg, unsigned char len)
-{
- bool in = false;
-
- if (book == TAS2781_YRAM_BOOK1 || book == TAS2781_YRAM_BOOK2)
- in = check_inblock_yram_bk(cd, page, reg, len);
-
- return in;
-}
-
-/* yram page check. */
-static bool check_yram(struct tas_crc *cd, unsigned char book,
- unsigned char page, unsigned char reg, unsigned char len)
-{
- bool in;
-
- in = check_inpage_yram(cd, book, page, reg, len);
- if (!in)
- in = check_inblock_yram(cd, book, page, reg, len);
-
- return in;
-}
-
-/* Checksum for data block. */
-static int tasdev_multibytes_chksum(struct tasdevice_priv *tasdevice,
- unsigned char book, unsigned char page,
- unsigned char reg, unsigned int len)
-{
- struct tas_crc crc_data;
- unsigned char crc_chksum = 0;
- unsigned char nBuf1[128];
- int ret = 0, i;
- bool in;
-
- if ((reg + len - 1) > 127) {
- ret = -EINVAL;
- dev_err(tasdevice->dev, "firmware error\n");
- goto end;
- }
-
- if ((book == TASDEVICE_BOOK_ID(TAS2781_SA_COEFF_SWAP_REG)) &&
- (page == TASDEVICE_PAGE_ID(TAS2781_SA_COEFF_SWAP_REG)) &&
- (reg == TASDEVICE_REG_ID(TAS2781_SA_COEFF_SWAP_REG)) &&
- (len == 4)) {
- /* DSP swap command, pass */
- ret = 0;
- goto end;
- }
-
- in = check_yram(&crc_data, book, page, reg, len);
- if (!in)
- goto end;
-
- if (len == 1) {
- dev_err(tasdevice->dev, "firmware error\n");
- ret = -EINVAL;
- goto end;
- }
-
- ret = tasdevice_spi_dev_bulk_read(tasdevice,
- TASDEVICE_REG(book, page, crc_data.offset),
- nBuf1, crc_data.len);
- if (ret < 0)
- goto end;
-
- for (i = 0; i < crc_data.len; i++) {
- if ((book == TASDEVICE_BOOK_ID(TAS2781_SA_COEFF_SWAP_REG)) &&
- (page == TASDEVICE_PAGE_ID(TAS2781_SA_COEFF_SWAP_REG)) &&
- ((i + crc_data.offset) >=
- TASDEVICE_REG_ID(TAS2781_SA_COEFF_SWAP_REG)) &&
- ((i + crc_data.offset) <=
- (TASDEVICE_REG_ID(TAS2781_SA_COEFF_SWAP_REG) + 4)))
- /* DSP swap command, bypass */
- continue;
- else
- crc_chksum += crc8(tasdevice->crc8_lkp_tbl, &nBuf1[i],
- 1, 0);
- }
-
- ret = crc_chksum;
-
-end:
- return ret;
-}
-
-/* Checksum for single register. */
-static int do_singlereg_checksum(struct tasdevice_priv *tasdevice,
- unsigned char book, unsigned char page,
- unsigned char reg, unsigned char val)
-{
- struct tas_crc crc_data;
- unsigned int nData1;
- int ret = 0;
- bool in;
-
- /* DSP swap command, pass */
- if ((book == TASDEVICE_BOOK_ID(TAS2781_SA_COEFF_SWAP_REG)) &&
- (page == TASDEVICE_PAGE_ID(TAS2781_SA_COEFF_SWAP_REG)) &&
- (reg >= TASDEVICE_REG_ID(TAS2781_SA_COEFF_SWAP_REG)) &&
- (reg <= (TASDEVICE_REG_ID(TAS2781_SA_COEFF_SWAP_REG) + 4)))
- return 0;
-
- in = check_yram(&crc_data, book, page, reg, 1);
- if (!in)
- return 0;
- ret = tasdevice_spi_dev_read(tasdevice,
- TASDEVICE_REG(book, page, reg), &nData1);
- if (ret < 0)
- return ret;
-
- if (nData1 != val) {
- dev_err(tasdevice->dev,
- "B[0x%x]P[0x%x]R[0x%x] W[0x%x], R[0x%x]\n",
- book, page, reg, val, nData1);
- tasdevice->err_code |= ERROR_YRAM_CRCCHK;
- return -EAGAIN;
- }
-
- ret = crc8(tasdevice->crc8_lkp_tbl, &val, 1, 0);
-
- return ret;
-}
-
-/* Block type check. */
-static void set_err_prg_cfg(unsigned int type, struct tasdevice_priv *p)
-{
- if ((type == MAIN_ALL_DEVICES) || (type == MAIN_DEVICE_A) ||
- (type == MAIN_DEVICE_B) || (type == MAIN_DEVICE_C) ||
- (type == MAIN_DEVICE_D))
- p->cur_prog = -1;
- else
- p->cur_conf = -1;
-}
-
-/* Checksum for data bytes. */
-static int tasdev_bytes_chksum(struct tasdevice_priv *tas_priv,
- struct tasdev_blk *block, unsigned char book,
- unsigned char page, unsigned char reg, unsigned int len,
- unsigned char val, unsigned char *crc_chksum)
-{
- int ret;
-
- if (len > 1)
- ret = tasdev_multibytes_chksum(tas_priv, book, page, reg,
- len);
- else
- ret = do_singlereg_checksum(tas_priv, book, page, reg, val);
-
- if (ret > 0) {
- *crc_chksum += ret;
- goto end;
- }
-
- if (ret != -EAGAIN)
- goto end;
-
- block->nr_retry--;
- if (block->nr_retry > 0)
- goto end;
-
- set_err_prg_cfg(block->type, tas_priv);
-
-end:
- return ret;
-}
-
-/* Multi-data byte write. */
-static int tasdev_multibytes_wr(struct tasdevice_priv *tas_priv,
- struct tasdev_blk *block, unsigned char book,
- unsigned char page, unsigned char reg, unsigned char *data,
- unsigned int len, unsigned int *nr_cmds,
- unsigned char *crc_chksum)
-{
- int ret;
-
- if (len > 1) {
- ret = tasdevice_spi_dev_bulk_write(tas_priv,
- TASDEVICE_REG(book, page, reg), data + 3, len);
- if (ret < 0)
- return ret;
- if (block->is_ychksum_present)
- ret = tasdev_bytes_chksum(tas_priv, block,
- book, page, reg, len, 0, crc_chksum);
- } else {
- ret = tasdevice_spi_dev_write(tas_priv,
- TASDEVICE_REG(book, page, reg), data[3]);
- if (ret < 0)
- return ret;
- if (block->is_ychksum_present)
- ret = tasdev_bytes_chksum(tas_priv, block, book,
- page, reg, 1, data[3], crc_chksum);
- }
-
- if (!block->is_ychksum_present || ret >= 0) {
- *nr_cmds += 1;
- if (len >= 2)
- *nr_cmds += ((len - 2) / 4) + 1;
- }
-
- return ret;
-}
-
-/* Checksum for block. */
-static int tasdev_block_chksum(struct tasdevice_priv *tas_priv,
- struct tasdev_blk *block)
-{
- unsigned int nr_value;
- int ret;
-
- ret = tasdevice_spi_dev_read(tas_priv, TASDEVICE_CHECKSUM, &nr_value);
- if (ret < 0) {
- dev_err(tas_priv->dev, "%s: read error %d.\n", __func__, ret);
- set_err_prg_cfg(block->type, tas_priv);
- return ret;
- }
-
- if ((nr_value & 0xff) != block->pchksum) {
- dev_err(tas_priv->dev, "%s: PChkSum err %d ", __func__, ret);
- dev_err(tas_priv->dev, "PChkSum = 0x%x, Reg = 0x%x\n",
- block->pchksum, (nr_value & 0xff));
- tas_priv->err_code |= ERROR_PRAM_CRCCHK;
- ret = -EAGAIN;
- block->nr_retry--;
-
- if (block->nr_retry <= 0)
- set_err_prg_cfg(block->type, tas_priv);
- } else {
- tas_priv->err_code &= ~ERROR_PRAM_CRCCHK;
- }
-
- return ret;
-}
-
-/* Firmware block load function. */
-static int tasdev_load_blk(struct tasdevice_priv *tas_priv,
- struct tasdev_blk *block)
-{
- unsigned int sleep_time, len, nr_cmds;
- unsigned char offset, book, page, val;
- unsigned char *data = block->data;
- unsigned char crc_chksum = 0;
- int ret = 0;
-
- while (block->nr_retry > 0) {
- if (block->is_pchksum_present) {
- ret = tasdevice_spi_dev_write(tas_priv,
- TASDEVICE_CHECKSUM, 0);
- if (ret < 0)
- break;
- }
-
- if (block->is_ychksum_present)
- crc_chksum = 0;
-
- nr_cmds = 0;
-
- while (nr_cmds < block->nr_cmds) {
- data = block->data + nr_cmds * 4;
-
- book = data[0];
- page = data[1];
- offset = data[2];
- val = data[3];
-
- nr_cmds++;
- /* Single byte write */
- if (offset <= 0x7F) {
- ret = tasdevice_spi_dev_write(tas_priv,
- TASDEVICE_REG(book, page, offset),
- val);
- if (ret < 0)
- break;
- if (block->is_ychksum_present) {
- ret = tasdev_bytes_chksum(tas_priv,
- block, book, page, offset,
- 1, val, &crc_chksum);
- if (ret < 0)
- break;
- }
- continue;
- }
- /* sleep command */
- if (offset == 0x81) {
- /* book -- data[0] page -- data[1] */
- sleep_time = ((book << 8) + page)*1000;
- fsleep(sleep_time);
- continue;
- }
- /* Multiple bytes write */
- if (offset == 0x85) {
- data += 4;
- len = (book << 8) + page;
- book = data[0];
- page = data[1];
- offset = data[2];
- ret = tasdev_multibytes_wr(tas_priv,
- block, book, page, offset, data,
- len, &nr_cmds, &crc_chksum);
- if (ret < 0)
- break;
- }
- }
- if (ret == -EAGAIN) {
- if (block->nr_retry > 0)
- continue;
- } else if (ret < 0) {
- /* err in current device, skip it */
- break;
- }
-
- if (block->is_pchksum_present) {
- ret = tasdev_block_chksum(tas_priv, block);
- if (ret == -EAGAIN) {
- if (block->nr_retry > 0)
- continue;
- } else if (ret < 0) {
- /* err in current device, skip it */
- break;
- }
- }
-
- if (block->is_ychksum_present) {
- /* TBD, open it when FW ready */
- dev_err(tas_priv->dev,
- "Blk YChkSum: FW = 0x%x, YCRC = 0x%x\n",
- block->ychksum, crc_chksum);
-
- tas_priv->err_code &=
- ~ERROR_YRAM_CRCCHK;
- ret = 0;
- }
- /* skip current blk */
- break;
- }
-
- return ret;
-}
-
-/* Firmware block load function. */
-static int tasdevice_load_block(struct tasdevice_priv *tas_priv,
- struct tasdev_blk *block)
-{
- int ret = 0;
-
- block->nr_retry = 6;
- if (tas_priv->is_loading == false)
- return 0;
- ret = tasdev_load_blk(tas_priv, block);
- if (ret < 0)
- dev_err(tas_priv->dev, "Blk (%d) load error\n", block->type);
-
- return ret;
-}
-
-/*
- * Select firmware binary parser & load callback functions by ppc3 version
- * and firmware binary version.
- */
-static int dspfw_default_callback(struct tasdevice_priv *tas_priv,
- unsigned int drv_ver, unsigned int ppcver)
-{
- int rc = 0;
-
- if (drv_ver == 0x100) {
- if (ppcver >= PPC3_VERSION_BASE) {
- tas_priv->fw_parse_variable_header =
- fw_parse_variable_header_kernel;
- tas_priv->fw_parse_program_data =
- fw_parse_program_data_kernel;
- tas_priv->fw_parse_configuration_data =
- fw_parse_configuration_data_kernel;
- tas_priv->tasdevice_load_block =
- tasdevice_load_block_kernel;
- } else if (ppcver == 0x00) {
- tas_priv->fw_parse_variable_header =
- fw_parse_variable_header_git;
- tas_priv->fw_parse_program_data =
- fw_parse_program_data;
- tas_priv->fw_parse_configuration_data =
- fw_parse_configuration_data;
- tas_priv->tasdevice_load_block =
- tasdevice_load_block;
- } else {
- dev_err(tas_priv->dev,
- "Wrong PPCVer :0x%08x\n", ppcver);
- rc = -EINVAL;
- }
- } else {
- dev_err(tas_priv->dev, "Wrong DrvVer : 0x%02x\n", drv_ver);
- rc = -EINVAL;
- }
-
- return rc;
-}
-
-/* DSP firmware binary file header parser function. */
-static int fw_parse_header(struct tasdevice_priv *tas_priv,
- struct tasdevice_fw *tas_fmw, const struct firmware *fmw, int offset)
-{
- struct tasdevice_dspfw_hdr *fw_hdr = &tas_fmw->fw_hdr;
- struct tasdevice_fw_fixed_hdr *fw_fixed_hdr = &fw_hdr->fixed_hdr;
- static const unsigned char magic_number[] = {0x35, 0x35, 0x35, 0x32, };
- const unsigned char *buf = (unsigned char *)fmw->data;
-
- if (offset + 92 > fmw->size) {
- dev_err(tas_priv->dev, "%s: File Size error\n", __func__);
- offset = -EINVAL;
- goto out;
- }
- if (memcmp(&buf[offset], magic_number, 4)) {
- dev_err(tas_priv->dev, "%s: Magic num NOT match\n", __func__);
- offset = -EINVAL;
- goto out;
- }
- offset += 4;
-
- /*
- * Convert data[offset], data[offset + 1], data[offset + 2] and
- * data[offset + 3] into host
- */
- fw_fixed_hdr->fwsize = get_unaligned_be32(&buf[offset]);
- offset += 4;
- if (fw_fixed_hdr->fwsize != fmw->size) {
- dev_err(tas_priv->dev, "File size not match, %lu %u",
- (unsigned long)fmw->size, fw_fixed_hdr->fwsize);
- offset = -EINVAL;
- goto out;
- }
- offset += 4;
- fw_fixed_hdr->ppcver = get_unaligned_be32(&buf[offset]);
- offset += 8;
- fw_fixed_hdr->drv_ver = get_unaligned_be32(&buf[offset]);
- offset += 72;
-
-out:
- return offset;
-}
-
-/* DSP firmware binary file parser function. */
-static int tasdevice_dspfw_ready(const struct firmware *fmw, void *context)
-{
- struct tasdevice_priv *tas_priv = context;
- struct tasdevice_fw_fixed_hdr *fw_fixed_hdr;
- struct tasdevice_fw *tas_fmw;
- int offset = 0, ret = 0;
-
- if (!fmw || !fmw->data) {
- dev_err(tas_priv->dev, "%s: Failed to read firmware %s\n",
- __func__, tas_priv->coef_binaryname);
- return -EINVAL;
- }
-
- tas_priv->fmw = kzalloc(sizeof(*tas_priv->fmw), GFP_KERNEL);
- if (!tas_priv->fmw)
- return -ENOMEM;
- tas_fmw = tas_priv->fmw;
- tas_fmw->dev = tas_priv->dev;
- offset = fw_parse_header(tas_priv, tas_fmw, fmw, offset);
-
- if (offset == -EINVAL)
- return -EINVAL;
-
- fw_fixed_hdr = &tas_fmw->fw_hdr.fixed_hdr;
- /* Support different versions of firmware */
- switch (fw_fixed_hdr->drv_ver) {
- case 0x301:
- case 0x302:
- case 0x502:
- case 0x503:
- tas_priv->fw_parse_variable_header =
- fw_parse_variable_header_kernel;
- tas_priv->fw_parse_program_data =
- fw_parse_program_data_kernel;
- tas_priv->fw_parse_configuration_data =
- fw_parse_configuration_data_kernel;
- tas_priv->tasdevice_load_block =
- tasdevice_load_block_kernel;
- break;
- case 0x202:
- case 0x400:
- tas_priv->fw_parse_variable_header =
- fw_parse_variable_header_git;
- tas_priv->fw_parse_program_data =
- fw_parse_program_data;
- tas_priv->fw_parse_configuration_data =
- fw_parse_configuration_data;
- tas_priv->tasdevice_load_block =
- tasdevice_load_block;
- break;
- default:
- ret = dspfw_default_callback(tas_priv,
- fw_fixed_hdr->drv_ver, fw_fixed_hdr->ppcver);
- if (ret)
- return ret;
- break;
- }
-
- offset = tas_priv->fw_parse_variable_header(tas_priv, fmw, offset);
- if (offset < 0)
- return offset;
-
- offset = tas_priv->fw_parse_program_data(tas_priv, tas_fmw, fmw,
- offset);
- if (offset < 0)
- return offset;
-
- offset = tas_priv->fw_parse_configuration_data(tas_priv,
- tas_fmw, fmw, offset);
- if (offset < 0)
- ret = offset;
-
- return ret;
-}
-
-/* DSP firmware binary file parser function. */
-int tasdevice_spi_dsp_parser(void *context)
-{
- struct tasdevice_priv *tas_priv = context;
- const struct firmware *fw_entry;
- int ret;
-
- ret = request_firmware(&fw_entry, tas_priv->coef_binaryname,
- tas_priv->dev);
- if (ret) {
- dev_err(tas_priv->dev, "%s: load %s error\n", __func__,
- tas_priv->coef_binaryname);
- return ret;
- }
-
- ret = tasdevice_dspfw_ready(fw_entry, tas_priv);
- release_firmware(fw_entry);
- fw_entry = NULL;
-
- return ret;
-}
-
-/* DSP firmware program block data remove function. */
-static void tasdev_dsp_prog_blk_remove(struct tasdevice_prog *prog)
-{
- struct tasdevice_data *tas_dt;
- struct tasdev_blk *blk;
- unsigned int i;
-
- if (!prog)
- return;
-
- tas_dt = &prog->dev_data;
-
- if (!tas_dt->dev_blks)
- return;
-
- for (i = 0; i < tas_dt->nr_blk; i++) {
- blk = &tas_dt->dev_blks[i];
- kfree(blk->data);
- }
- kfree(tas_dt->dev_blks);
-}
-
-/* DSP firmware program block data remove function. */
-static void tasdev_dsp_prog_remove(struct tasdevice_prog *prog,
- unsigned short nr)
-{
- int i;
-
- for (i = 0; i < nr; i++)
- tasdev_dsp_prog_blk_remove(&prog[i]);
- kfree(prog);
-}
-
-/* DSP firmware config block data remove function. */
-static void tasdev_dsp_cfg_blk_remove(struct tasdevice_config *cfg)
-{
- struct tasdevice_data *tas_dt;
- struct tasdev_blk *blk;
- unsigned int i;
-
- if (cfg) {
- tas_dt = &cfg->dev_data;
-
- if (!tas_dt->dev_blks)
- return;
-
- for (i = 0; i < tas_dt->nr_blk; i++) {
- blk = &tas_dt->dev_blks[i];
- kfree(blk->data);
- }
- kfree(tas_dt->dev_blks);
- }
-}
-
-/* DSP firmware config remove function. */
-static void tasdev_dsp_cfg_remove(struct tasdevice_config *config,
- unsigned short nr)
-{
- int i;
-
- for (i = 0; i < nr; i++)
- tasdev_dsp_cfg_blk_remove(&config[i]);
- kfree(config);
-}
-
-/* DSP firmware remove function. */
-void tasdevice_spi_dsp_remove(void *context)
-{
- struct tasdevice_priv *tas_dev = context;
-
- if (!tas_dev->fmw)
- return;
-
- if (tas_dev->fmw->programs)
- tasdev_dsp_prog_remove(tas_dev->fmw->programs,
- tas_dev->fmw->nr_programs);
- if (tas_dev->fmw->configs)
- tasdev_dsp_cfg_remove(tas_dev->fmw->configs,
- tas_dev->fmw->nr_configurations);
- kfree(tas_dev->fmw);
- tas_dev->fmw = NULL;
-}
-
-/* DSP firmware calibration data remove function. */
-static void tas2781_clear_calfirmware(struct tasdevice_fw *tas_fmw)
-{
- struct tasdevice_calibration *calibration;
- struct tasdev_blk *block;
- unsigned int blks;
- int i;
-
- if (!tas_fmw->calibrations)
- goto out;
-
- for (i = 0; i < tas_fmw->nr_calibrations; i++) {
- calibration = &tas_fmw->calibrations[i];
- if (!calibration)
- continue;
-
- if (!calibration->dev_data.dev_blks)
- continue;
-
- for (blks = 0; blks < calibration->dev_data.nr_blk; blks++) {
- block = &calibration->dev_data.dev_blks[blks];
- if (!block)
- continue;
- kfree(block->data);
- }
- kfree(calibration->dev_data.dev_blks);
- }
- kfree(tas_fmw->calibrations);
-out:
- kfree(tas_fmw);
-}
-
-/* Calibration data from firmware remove function. */
-void tasdevice_spi_calbin_remove(void *context)
-{
- struct tasdevice_priv *tas_priv = context;
-
- if (tas_priv->cali_data_fmw) {
- tas2781_clear_calfirmware(tas_priv->cali_data_fmw);
- tas_priv->cali_data_fmw = NULL;
- }
-}
-
-/* Configuration remove function. */
-void tasdevice_spi_config_info_remove(void *context)
-{
- struct tasdevice_priv *tas_priv = context;
- struct tasdevice_rca *rca = &tas_priv->rcabin;
- struct tasdevice_config_info **ci = rca->cfg_info;
- unsigned int i, j;
-
- if (!ci)
- return;
- for (i = 0; i < rca->ncfgs; i++) {
- if (!ci[i])
- continue;
- if (ci[i]->blk_data) {
- for (j = 0; j < ci[i]->real_nblocks; j++) {
- if (!ci[i]->blk_data[j])
- continue;
- kfree(ci[i]->blk_data[j]->regdata);
- kfree(ci[i]->blk_data[j]);
- }
- kfree(ci[i]->blk_data);
- }
- kfree(ci[i]);
- }
- kfree(ci);
-}
-
-/* DSP firmware program block data load function. */
-static int tasdevice_load_data(struct tasdevice_priv *tas_priv,
- struct tasdevice_data *dev_data)
-{
- struct tasdev_blk *block;
- unsigned int i;
- int ret = 0;
-
- for (i = 0; i < dev_data->nr_blk; i++) {
- block = &dev_data->dev_blks[i];
- ret = tas_priv->tasdevice_load_block(tas_priv, block);
- if (ret < 0)
- break;
- }
-
- return ret;
-}
-
-/* DSP firmware program load interface function. */
-int tasdevice_spi_prmg_load(void *context, int prm_no)
-{
- struct tasdevice_priv *tas_priv = context;
- struct tasdevice_fw *tas_fmw = tas_priv->fmw;
- struct tasdevice_prog *program;
- struct tasdevice_config *conf;
- int ret = 0;
-
- if (!tas_fmw) {
- dev_err(tas_priv->dev, "%s: Firmware is NULL\n", __func__);
- return -EINVAL;
- }
- if (prm_no >= 0 && prm_no <= tas_fmw->nr_programs) {
- tas_priv->cur_conf = 0;
- tas_priv->is_loading = true;
- program = &tas_fmw->programs[prm_no];
- ret = tasdevice_load_data(tas_priv, &program->dev_data);
- if (ret < 0) {
- dev_err(tas_priv->dev, "Program failed %d.\n", ret);
- return ret;
- }
- tas_priv->cur_prog = prm_no;
-
- conf = &tas_fmw->configs[tas_priv->cur_conf];
- ret = tasdevice_load_data(tas_priv, &conf->dev_data);
- if (ret < 0)
- dev_err(tas_priv->dev, "Config failed %d.\n", ret);
- } else {
- dev_err(tas_priv->dev,
- "%s: prm(%d) is not in range of Programs %u\n",
- __func__, prm_no, tas_fmw->nr_programs);
- return -EINVAL;
- }
-
- return ret;
-}
-
-/* RCABIN configuration switch interface function. */
-void tasdevice_spi_tuning_switch(void *context, int state)
-{
- struct tasdevice_priv *tas_priv = context;
- int profile_cfg_id = tas_priv->rcabin.profile_cfg_id;
-
- if (tas_priv->fw_state == TASDEVICE_DSP_FW_FAIL) {
- dev_err(tas_priv->dev, "DSP bin file not loaded\n");
- return;
- }
-
- if (state == 0)
- tasdevice_spi_select_cfg_blk(tas_priv, profile_cfg_id,
- TASDEVICE_BIN_BLK_PRE_POWER_UP);
- else
- tasdevice_spi_select_cfg_blk(tas_priv, profile_cfg_id,
- TASDEVICE_BIN_BLK_PRE_SHUTDOWN);
-}
diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c
index 3b0c3e70987b..a8ac14887676 100644
--- a/sound/pci/ice1712/ice1712.c
+++ b/sound/pci/ice1712/ice1712.c
@@ -2502,7 +2502,7 @@ static int snd_ice1712_create(struct snd_card *card,
pci_write_config_word(ice->pci, 0x42, 0x0006);
snd_ice1712_proc_init(ice);
- err = pci_request_regions(pci, "ICE1712");
+ err = pcim_request_all_regions(pci, "ICE1712");
if (err < 0)
return err;
ice->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
index 1dc776acd637..be22b159e65a 100644
--- a/sound/pci/ice1712/ice1724.c
+++ b/sound/pci/ice1712/ice1724.c
@@ -2491,7 +2491,7 @@ static int snd_vt1724_create(struct snd_card *card,
pci_set_master(pci);
snd_vt1724_proc_init(ice);
- err = pci_request_regions(pci, "ICE1724");
+ err = pcim_request_all_regions(pci, "ICE1724");
if (err < 0)
return err;
ice->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index e4bb99f71c2c..51e7f1f1a48e 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -2926,7 +2926,7 @@ static int snd_intel8x0_init(struct snd_card *card,
pci->device == PCI_DEVICE_ID_INTEL_440MX)
chip->fix_nocache = 1; /* enable workaround */
- err = pci_request_regions(pci, card->shortname);
+ err = pcim_request_all_regions(pci, card->shortname);
if (err < 0)
return err;
diff --git a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c
index 38f8de51d641..1ce775fe8a70 100644
--- a/sound/pci/intel8x0m.c
+++ b/sound/pci/intel8x0m.c
@@ -1060,7 +1060,7 @@ static int snd_intel8x0m_init(struct snd_card *card,
chip->pci = pci;
chip->irq = -1;
- err = pci_request_regions(pci, card->shortname);
+ err = pcim_request_all_regions(pci, card->shortname);
if (err < 0)
return err;
diff --git a/sound/pci/korg1212/korg1212.c b/sound/pci/korg1212/korg1212.c
index 49b71082c485..56dea5b10828 100644
--- a/sound/pci/korg1212/korg1212.c
+++ b/sound/pci/korg1212/korg1212.c
@@ -308,9 +308,6 @@ struct snd_korg1212 {
spinlock_t lock;
struct mutex open_mutex;
- struct timer_list timer; /* timer callback for checking ack of stop request */
- int stop_pending_cnt; /* counter for stop pending check */
-
wait_queue_head_t wait;
unsigned long iomem;
@@ -382,7 +379,7 @@ struct snd_korg1212 {
unsigned long totalerrorcnt; // Total Error Count
int dsp_is_loaded;
- int dsp_stop_is_processed;
+ int dsp_stop_processing;
};
@@ -565,52 +562,17 @@ static int snd_korg1212_Send1212Command(struct snd_korg1212 *korg1212,
/* spinlock already held */
static void snd_korg1212_SendStop(struct snd_korg1212 *korg1212)
{
- if (! korg1212->stop_pending_cnt) {
- korg1212->sharedBufferPtr->cardCommand = 0xffffffff;
- /* program the timer */
- korg1212->stop_pending_cnt = HZ;
- mod_timer(&korg1212->timer, jiffies + 1);
- }
+ korg1212->dsp_stop_processing = 1;
+ korg1212->sharedBufferPtr->cardCommand = 0xffffffff;
}
static void snd_korg1212_SendStopAndWait(struct snd_korg1212 *korg1212)
{
unsigned long flags;
spin_lock_irqsave(&korg1212->lock, flags);
- korg1212->dsp_stop_is_processed = 0;
snd_korg1212_SendStop(korg1212);
spin_unlock_irqrestore(&korg1212->lock, flags);
- wait_event_timeout(korg1212->wait, korg1212->dsp_stop_is_processed, (HZ * 3) / 2);
-}
-
-/* timer callback for checking the ack of stop request */
-static void snd_korg1212_timer_func(struct timer_list *t)
-{
- struct snd_korg1212 *korg1212 = from_timer(korg1212, t, timer);
- unsigned long flags;
-
- spin_lock_irqsave(&korg1212->lock, flags);
- if (korg1212->sharedBufferPtr->cardCommand == 0) {
- /* ack'ed */
- korg1212->stop_pending_cnt = 0;
- korg1212->dsp_stop_is_processed = 1;
- wake_up(&korg1212->wait);
- K1212_DEBUG_PRINTK_VERBOSE("K1212_DEBUG: Stop ack'ed [%s]\n",
- stateName[korg1212->cardState]);
- } else {
- if (--korg1212->stop_pending_cnt > 0) {
- /* reprogram timer */
- mod_timer(&korg1212->timer, jiffies + 1);
- } else {
- dev_dbg(korg1212->card->dev, "korg1212_timer_func timeout\n");
- korg1212->sharedBufferPtr->cardCommand = 0;
- korg1212->dsp_stop_is_processed = 1;
- wake_up(&korg1212->wait);
- K1212_DEBUG_PRINTK("K1212_DEBUG: Stop timeout [%s]\n",
- stateName[korg1212->cardState]);
- }
- }
- spin_unlock_irqrestore(&korg1212->lock, flags);
+ wait_event_timeout(korg1212->wait, !korg1212->dsp_stop_processing, HZ);
}
static int snd_korg1212_TurnOnIdleMonitor(struct snd_korg1212 *korg1212)
@@ -1135,7 +1097,9 @@ static irqreturn_t snd_korg1212_interrupt(int irq, void *dev_id)
korg1212->errorcnt++;
korg1212->totalerrorcnt++;
korg1212->sharedBufferPtr->cardCommand = 0;
+ korg1212->dsp_stop_processing = 0;
snd_korg1212_setCardState(korg1212, K1212_STATE_ERRORSTOP);
+ wake_up(&korg1212->wait);
break;
// ------------------------------------------------------------------------
@@ -1147,6 +1111,8 @@ static irqreturn_t snd_korg1212_interrupt(int irq, void *dev_id)
korg1212->irqcount, doorbellValue,
stateName[korg1212->cardState]);
korg1212->sharedBufferPtr->cardCommand = 0;
+ korg1212->dsp_stop_processing = 0;
+ wake_up(&korg1212->wait);
break;
default:
@@ -1535,6 +1501,14 @@ static int snd_korg1212_hw_params(struct snd_pcm_substream *substream,
return 0;
}
+static int snd_korg1212_sync_stop(struct snd_pcm_substream *substream)
+{
+ struct snd_korg1212 *korg1212 = snd_pcm_substream_chip(substream);
+
+ wait_event_timeout(korg1212->wait, !korg1212->dsp_stop_processing, HZ);
+ return 0;
+}
+
static int snd_korg1212_prepare(struct snd_pcm_substream *substream)
{
struct snd_korg1212 *korg1212 = snd_pcm_substream_chip(substream);
@@ -1544,19 +1518,7 @@ static int snd_korg1212_prepare(struct snd_pcm_substream *substream)
stateName[korg1212->cardState]);
spin_lock_irq(&korg1212->lock);
-
- /* FIXME: we should wait for ack! */
- if (korg1212->stop_pending_cnt > 0) {
- K1212_DEBUG_PRINTK("K1212_DEBUG: snd_korg1212_prepare - Stop is pending... [%s]\n",
- stateName[korg1212->cardState]);
- spin_unlock_irq(&korg1212->lock);
- return -EAGAIN;
- /*
- korg1212->sharedBufferPtr->cardCommand = 0;
- del_timer(&korg1212->timer);
- korg1212->stop_pending_cnt = 0;
- */
- }
+ korg1212->dsp_stop_processing = 0;
rc = snd_korg1212_SetupForPlay(korg1212);
@@ -1668,6 +1630,7 @@ static const struct snd_pcm_ops snd_korg1212_playback_ops = {
.hw_params = snd_korg1212_hw_params,
.prepare = snd_korg1212_prepare,
.trigger = snd_korg1212_trigger,
+ .sync_stop = snd_korg1212_sync_stop,
.pointer = snd_korg1212_playback_pointer,
.copy = snd_korg1212_playback_copy,
.fill_silence = snd_korg1212_playback_silence,
@@ -1680,6 +1643,7 @@ static const struct snd_pcm_ops snd_korg1212_capture_ops = {
.hw_params = snd_korg1212_hw_params,
.prepare = snd_korg1212_prepare,
.trigger = snd_korg1212_trigger,
+ .sync_stop = snd_korg1212_sync_stop,
.pointer = snd_korg1212_capture_pointer,
.copy = snd_korg1212_capture_copy,
};
@@ -2086,7 +2050,6 @@ static int snd_korg1212_create(struct snd_card *card, struct pci_dev *pci)
init_waitqueue_head(&korg1212->wait);
spin_lock_init(&korg1212->lock);
mutex_init(&korg1212->open_mutex);
- timer_setup(&korg1212->timer, snd_korg1212_timer_func, 0);
korg1212->irq = -1;
korg1212->clkSource = K1212_CLKIDX_Local;
diff --git a/sound/pci/lola/lola.c b/sound/pci/lola/lola.c
index 1aa30e90b86a..fb8bd54e4c2d 100644
--- a/sound/pci/lola/lola.c
+++ b/sound/pci/lola/lola.c
@@ -541,6 +541,7 @@ static int lola_create(struct snd_card *card, struct pci_dev *pci, int dev)
struct lola *chip = card->private_data;
int err;
unsigned int dever;
+ void __iomem *iomem;
err = pcim_enable_device(pci);
if (err < 0)
@@ -580,14 +581,19 @@ static int lola_create(struct snd_card *card, struct pci_dev *pci, int dev)
chip->sample_rate_min = 16000;
}
- err = pcim_iomap_regions(pci, (1 << 0) | (1 << 2), DRVNAME);
- if (err < 0)
- return err;
+ iomem = pcim_iomap_region(pci, 0, DRVNAME);
+ if (IS_ERR(iomem))
+ return PTR_ERR(iomem);
+ chip->bar[0].remap_addr = iomem;
chip->bar[0].addr = pci_resource_start(pci, 0);
- chip->bar[0].remap_addr = pcim_iomap_table(pci)[0];
+
+ iomem = pcim_iomap_region(pci, 2, DRVNAME);
+ if (IS_ERR(iomem))
+ return PTR_ERR(iomem);
+
+ chip->bar[1].remap_addr = iomem;
chip->bar[1].addr = pci_resource_start(pci, 2);
- chip->bar[1].remap_addr = pcim_iomap_table(pci)[2];
pci_set_master(pci);
diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c
index bd9b6148dd6f..63ebf9803ea8 100644
--- a/sound/pci/lx6464es/lx6464es.c
+++ b/sound/pci/lx6464es/lx6464es.c
@@ -944,7 +944,7 @@ static int snd_lx6464es_create(struct snd_card *card,
mutex_init(&chip->setup_mutex);
/* request resources */
- err = pci_request_regions(pci, card_name);
+ err = pcim_request_all_regions(pci, card_name);
if (err < 0)
return err;
diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c
index f4d211970d7e..e61e15774706 100644
--- a/sound/pci/maestro3.c
+++ b/sound/pci/maestro3.c
@@ -2552,7 +2552,7 @@ snd_m3_create(struct snd_card *card, struct pci_dev *pci,
if (err < 0)
return err;
- err = pci_request_regions(pci, card->driver);
+ err = pcim_request_all_regions(pci, card->driver);
if (err < 0)
return err;
diff --git a/sound/pci/nm256/nm256.c b/sound/pci/nm256/nm256.c
index 44085237fb44..cd4dc43dbff1 100644
--- a/sound/pci/nm256/nm256.c
+++ b/sound/pci/nm256/nm256.c
@@ -1447,7 +1447,7 @@ snd_nm256_create(struct snd_card *card, struct pci_dev *pci)
chip->buffer_addr = pci_resource_start(pci, 0);
chip->cport_addr = pci_resource_start(pci, 1);
- err = pci_request_regions(pci, card->driver);
+ err = pcim_request_all_regions(pci, card->driver);
if (err < 0)
return err;
diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c
index 9340d3c9ffd6..39b8ccf37cdd 100644
--- a/sound/pci/oxygen/oxygen_lib.c
+++ b/sound/pci/oxygen/oxygen_lib.c
@@ -609,7 +609,7 @@ static int __oxygen_pci_probe(struct pci_dev *pci, int index, char *id,
if (err < 0)
return err;
- err = pci_request_regions(pci, DRIVER);
+ err = pcim_request_all_regions(pci, DRIVER);
if (err < 0) {
dev_err(card->dev, "cannot reserve PCI resources\n");
return err;
diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c
index 329816f37b76..578be0755b8a 100644
--- a/sound/pci/riptide/riptide.c
+++ b/sound/pci/riptide/riptide.c
@@ -1831,7 +1831,7 @@ snd_riptide_create(struct snd_card *card, struct pci_dev *pci)
chip->cif = NULL;
card->private_free = snd_riptide_free;
- err = pci_request_regions(pci, "RIPTIDE");
+ err = pcim_request_all_regions(pci, "RIPTIDE");
if (err < 0)
return err;
hwport = (struct riptideport *)chip->port;
diff --git a/sound/pci/rme32.c b/sound/pci/rme32.c
index a8c2ceaadef5..4bf122abea48 100644
--- a/sound/pci/rme32.c
+++ b/sound/pci/rme32.c
@@ -1284,7 +1284,7 @@ static int snd_rme32_create(struct rme32 *rme32)
if (err < 0)
return err;
- err = pci_request_regions(pci, "RME32");
+ err = pcim_request_all_regions(pci, "RME32");
if (err < 0)
return err;
rme32->port = pci_resource_start(rme32->pci, 0);
diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c
index 1265a7efac60..01029843d7f3 100644
--- a/sound/pci/rme96.c
+++ b/sound/pci/rme96.c
@@ -1575,7 +1575,7 @@ snd_rme96_create(struct rme96 *rme96)
if (err < 0)
return err;
- err = pci_request_regions(pci, "RME96");
+ err = pcim_request_all_regions(pci, "RME96");
if (err < 0)
return err;
rme96->port = pci_resource_start(rme96->pci, 0);
diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index dc326face54a..873b7eadfc50 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -5277,7 +5277,7 @@ static int snd_hdsp_create(struct snd_card *card,
pci_set_master(hdsp->pci);
- err = pci_request_regions(pci, "hdsp");
+ err = pcim_request_all_regions(pci, "hdsp");
if (err < 0)
return err;
hdsp->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index 1935de046f00..64de54089955 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -6558,13 +6558,12 @@ static int snd_hdspm_create(struct snd_card *card,
pci_set_master(hdspm->pci);
- err = pcim_iomap_regions(pci, 1 << 0, "hdspm");
- if (err < 0)
- return err;
+ hdspm->iobase = pcim_iomap_region(pci, 0, "hdspm");
+ if (IS_ERR(hdspm->iobase))
+ return PTR_ERR(hdspm->iobase);
hdspm->port = pci_resource_start(pci, 0);
io_extent = pci_resource_len(pci, 0);
- hdspm->iobase = pcim_iomap_table(pci)[0];
dev_dbg(card->dev, "remapped region (0x%lx) 0x%lx-0x%lx\n",
(unsigned long)hdspm->iobase, hdspm->port,
hdspm->port + io_extent - 1);
diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c
index 5b8dd7b0a02c..34d9c7995ddd 100644
--- a/sound/pci/rme9652/rme9652.c
+++ b/sound/pci/rme9652/rme9652.c
@@ -2406,7 +2406,7 @@ static int snd_rme9652_create(struct snd_card *card,
spin_lock_init(&rme9652->lock);
- err = pci_request_regions(pci, "rme9652");
+ err = pcim_request_all_regions(pci, "rme9652");
if (err < 0)
return err;
rme9652->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/sis7019.c b/sound/pci/sis7019.c
index 53206beb2cb5..42b22f123fa7 100644
--- a/sound/pci/sis7019.c
+++ b/sound/pci/sis7019.c
@@ -1273,7 +1273,7 @@ static int sis_chip_create(struct snd_card *card,
sis->irq = -1;
sis->ioport = pci_resource_start(pci, 0);
- rc = pci_request_regions(pci, "SiS7019");
+ rc = pcim_request_all_regions(pci, "SiS7019");
if (rc) {
dev_err(&pci->dev, "unable request regions\n");
return rc;
diff --git a/sound/pci/sonicvibes.c b/sound/pci/sonicvibes.c
index c30eaf1038e7..808a793ff4da 100644
--- a/sound/pci/sonicvibes.c
+++ b/sound/pci/sonicvibes.c
@@ -1227,7 +1227,7 @@ static int snd_sonicvibes_create(struct snd_card *card,
sonic->pci = pci;
sonic->irq = -1;
- err = pci_request_regions(pci, "S3 SonicVibes");
+ err = pcim_request_all_regions(pci, "S3 SonicVibes");
if (err < 0)
return err;
diff --git a/sound/pci/trident/trident_main.c b/sound/pci/trident/trident_main.c
index 8039f445bee2..4e16b79d6584 100644
--- a/sound/pci/trident/trident_main.c
+++ b/sound/pci/trident/trident_main.c
@@ -3533,7 +3533,7 @@ int snd_trident_create(struct snd_card *card,
trident->midi_port = TRID_REG(trident, T4D_MPU401_BASE);
pci_set_master(pci);
- err = pci_request_regions(pci, "Trident Audio");
+ err = pcim_request_all_regions(pci, "Trident Audio");
if (err < 0)
return err;
trident->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c
index 89838b4fb118..a04dbc0a420f 100644
--- a/sound/pci/via82xx.c
+++ b/sound/pci/via82xx.c
@@ -2326,7 +2326,7 @@ static int snd_via82xx_create(struct snd_card *card,
pci_write_config_byte(chip->pci, VIA_FUNC_ENABLE,
chip->old_legacy & ~(VIA_FUNC_ENABLE_SB|VIA_FUNC_ENABLE_FM));
- err = pci_request_regions(pci, card->driver);
+ err = pcim_request_all_regions(pci, card->driver);
if (err < 0)
return err;
chip->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/via82xx_modem.c b/sound/pci/via82xx_modem.c
index a0a49b8d1511..eef0f9ddaae0 100644
--- a/sound/pci/via82xx_modem.c
+++ b/sound/pci/via82xx_modem.c
@@ -1071,7 +1071,7 @@ static int snd_via82xx_create(struct snd_card *card,
chip->pci = pci;
chip->irq = -1;
- err = pci_request_regions(pci, card->driver);
+ err = pcim_request_all_regions(pci, card->driver);
if (err < 0)
return err;
chip->port = pci_resource_start(pci, 0);
diff --git a/sound/pci/vx222/vx222.c b/sound/pci/vx222/vx222.c
index fdb039896205..693a4e471cf7 100644
--- a/sound/pci/vx222/vx222.c
+++ b/sound/pci/vx222/vx222.c
@@ -123,7 +123,7 @@ static int snd_vx222_create(struct snd_card *card, struct pci_dev *pci,
vx = to_vx222(chip);
vx->pci = pci;
- err = pci_request_regions(pci, CARD_NAME);
+ err = pcim_request_all_regions(pci, KBUILD_MODNAME);
if (err < 0)
return err;
for (i = 0; i < 2; i++)
diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c
index 6b8d8690b6b2..d495f53a8324 100644
--- a/sound/pci/ymfpci/ymfpci_main.c
+++ b/sound/pci/ymfpci/ymfpci_main.c
@@ -2307,7 +2307,7 @@ int snd_ymfpci_create(struct snd_card *card,
chip->device_id = pci->device;
chip->rev = pci->revision;
- err = pci_request_regions(pci, "YMFPCI");
+ err = pcim_request_all_regions(pci, "YMFPCI");
if (err < 0)
return err;
diff --git a/sound/sh/Kconfig b/sound/sh/Kconfig
index b75fbb3236a7..f5fa09d740b4 100644
--- a/sound/sh/Kconfig
+++ b/sound/sh/Kconfig
@@ -14,7 +14,7 @@ if SND_SUPERH
config SND_AICA
tristate "Dreamcast Yamaha AICA sound"
- depends on SH_DREAMCAST
+ depends on SH_DREAMCAST && SH_DMA_API
select SND_PCM
select G2_DMA
help
diff --git a/sound/sh/aica.c b/sound/sh/aica.c
index 5a93f4587356..f88331a48638 100644
--- a/sound/sh/aica.c
+++ b/sound/sh/aica.c
@@ -469,8 +469,8 @@ static int aica_pcmvolume_info(struct snd_kcontrol *kcontrol,
static int aica_pcmvolume_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct snd_card_aica *dreamcastcard;
- dreamcastcard = kcontrol->private_data;
+ struct snd_card_aica *dreamcastcard = snd_kcontrol_chip(kcontrol);
+
if (unlikely(!dreamcastcard->channel))
return -ETXTBSY; /* we've not yet been set up */
ucontrol->value.integer.value[0] = dreamcastcard->channel->vol;
@@ -480,9 +480,9 @@ static int aica_pcmvolume_get(struct snd_kcontrol *kcontrol,
static int aica_pcmvolume_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct snd_card_aica *dreamcastcard;
+ struct snd_card_aica *dreamcastcard = snd_kcontrol_chip(kcontrol);
unsigned int vol;
- dreamcastcard = kcontrol->private_data;
+
if (unlikely(!dreamcastcard->channel))
return -ETXTBSY;
vol = ucontrol->value.integer.value[0];
diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c
index 02b04f355ca6..42aa009c4e13 100644
--- a/sound/soc/amd/acp-da7219-max98357a.c
+++ b/sound/soc/amd/acp-da7219-max98357a.c
@@ -517,11 +517,11 @@ static const struct snd_soc_ops cz_rt5682_dmic1_cap_ops = {
};
SND_SOC_DAILINK_DEF(designware1,
- DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.1.auto")));
+ DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.1")));
SND_SOC_DAILINK_DEF(designware2,
- DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.2.auto")));
+ DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.2")));
SND_SOC_DAILINK_DEF(designware3,
- DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.3.auto")));
+ DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.3")));
SND_SOC_DAILINK_DEF(dlgs,
DAILINK_COMP_ARRAY(COMP_CODEC("i2c-DLGS7219:00", "da7219-hifi")));
@@ -533,7 +533,7 @@ SND_SOC_DAILINK_DEF(adau,
DAILINK_COMP_ARRAY(COMP_CODEC("ADAU7002:00", "adau7002-hifi")));
SND_SOC_DAILINK_DEF(platform,
- DAILINK_COMP_ARRAY(COMP_PLATFORM("acp_audio_dma.0.auto")));
+ DAILINK_COMP_ARRAY(COMP_PLATFORM("acp_audio_dma.0")));
static struct snd_soc_dai_link cz_dai_7219_98357[] = {
{
diff --git a/sound/soc/amd/acp-es8336.c b/sound/soc/amd/acp-es8336.c
index 0193b3eae7a6..b16dde0e2987 100644
--- a/sound/soc/amd/acp-es8336.c
+++ b/sound/soc/amd/acp-es8336.c
@@ -137,11 +137,11 @@ static const struct snd_soc_ops st_es8336_ops = {
};
SND_SOC_DAILINK_DEF(designware1,
- DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.2.auto")));
+ DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.1")));
SND_SOC_DAILINK_DEF(codec,
DAILINK_COMP_ARRAY(COMP_CODEC("i2c-ESSX8336:00", "ES8316 HiFi")));
SND_SOC_DAILINK_DEF(platform,
- DAILINK_COMP_ARRAY(COMP_PLATFORM("acp_audio_dma.1.auto")));
+ DAILINK_COMP_ARRAY(COMP_PLATFORM("acp_audio_dma.0")));
static struct snd_soc_dai_link st_dai_es8336[] = {
{
diff --git a/sound/soc/amd/acp-rt5645.c b/sound/soc/amd/acp-rt5645.c
index 72ddad24dbda..11d373169380 100644
--- a/sound/soc/amd/acp-rt5645.c
+++ b/sound/soc/amd/acp-rt5645.c
@@ -108,15 +108,15 @@ static const struct snd_soc_ops cz_aif1_ops = {
};
SND_SOC_DAILINK_DEF(designware1,
- DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.1.auto")));
+ DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.1")));
SND_SOC_DAILINK_DEF(designware2,
- DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.2.auto")));
+ DAILINK_COMP_ARRAY(COMP_CPU("designware-i2s.2")));
SND_SOC_DAILINK_DEF(codec,
DAILINK_COMP_ARRAY(COMP_CODEC("i2c-10EC5650:00", "rt5645-aif1")));
SND_SOC_DAILINK_DEF(platform,
- DAILINK_COMP_ARRAY(COMP_PLATFORM("acp_audio_dma.0.auto")));
+ DAILINK_COMP_ARRAY(COMP_PLATFORM("acp_audio_dma.0")));
static struct snd_soc_dai_link cz_dai_rt5650[] = {
{
diff --git a/sound/soc/amd/acp/acp-rembrandt.c b/sound/soc/amd/acp/acp-rembrandt.c
index cccdd10c345e..aeffd24710e7 100644
--- a/sound/soc/amd/acp/acp-rembrandt.c
+++ b/sound/soc/amd/acp/acp-rembrandt.c
@@ -22,7 +22,7 @@
#include <linux/pci.h>
#include <linux/pm_runtime.h>
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
#include "amd.h"
#include "../mach-config.h"
diff --git a/sound/soc/amd/acp/acp-sdw-legacy-mach.c b/sound/soc/amd/acp/acp-sdw-legacy-mach.c
index 2020c5cfb3d5..6c24f9d8694e 100644
--- a/sound/soc/amd/acp/acp-sdw-legacy-mach.c
+++ b/sound/soc/amd/acp/acp-sdw-legacy-mach.c
@@ -272,7 +272,7 @@ static int create_sdw_dailinks(struct snd_soc_card *card,
/* generate DAI links by each sdw link */
while (soc_dais->initialised) {
- int current_be_id;
+ int current_be_id = 0;
ret = create_sdw_dailink(card, soc_dais, dai_links,
&current_be_id, codec_conf, sdw_platform_component);
@@ -321,7 +321,7 @@ static int create_dmic_dailinks(struct snd_soc_card *card,
*be_id = ACP_DMIC_BE_ID;
ret = asoc_sdw_init_simple_dai_link(dev, *dai_links, be_id, "acp-dmic-codec",
0, 1, // DMIC only supports capture
- pdm_cpu->name, pdm_platform->name, 1,
+ pdm_cpu->name, pdm_platform->name,
"dmic-codec.0", "dmic-hifi", no_pcm,
asoc_sdw_dmic_init, NULL);
if (ret)
diff --git a/sound/soc/amd/acp/acp-sdw-sof-mach.c b/sound/soc/amd/acp/acp-sdw-sof-mach.c
index c09b1f118a6c..654fe78b2e2e 100644
--- a/sound/soc/amd/acp/acp-sdw-sof-mach.c
+++ b/sound/soc/amd/acp/acp-sdw-sof-mach.c
@@ -128,6 +128,13 @@ static int create_sdw_dailink(struct snd_soc_card *card,
if (ret)
return ret;
break;
+ case ACP70_PCI_REV:
+ case ACP71_PCI_REV:
+ ret = get_acp70_cpu_pin_id(ffs(sof_end->link_mask - 1),
+ *be_id, &cpu_pin_id, dev);
+ if (ret)
+ return ret;
+ break;
default:
return -EINVAL;
}
@@ -219,7 +226,7 @@ static int create_sdw_dailinks(struct snd_soc_card *card,
/* generate DAI links by each sdw link */
while (sof_dais->initialised) {
- int current_be_id;
+ int current_be_id = 0;
ret = create_sdw_dailink(card, sof_dais, dai_links,
&current_be_id, codec_conf);
@@ -245,7 +252,6 @@ static int create_dmic_dailinks(struct snd_soc_card *card,
ret = asoc_sdw_init_simple_dai_link(dev, *dai_links, be_id, "acp-dmic-codec",
0, 1, // DMIC only supports capture
"acp-sof-dmic", platform_component->name,
- ARRAY_SIZE(platform_component),
"dmic-codec", "dmic-hifi", no_pcm,
asoc_sdw_dmic_init, NULL);
if (ret)
diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c
index d7b54f12f406..6215e31ecedd 100644
--- a/sound/soc/amd/acp/acp-sof-mach.c
+++ b/sound/soc/amd/acp/acp-sof-mach.c
@@ -88,7 +88,7 @@ static struct acp_card_drvdata sof_nau8821_max98388_data = {
static int acp_sof_probe(struct platform_device *pdev)
{
- struct snd_soc_card *card = NULL;
+ struct snd_soc_card *card;
struct device *dev = &pdev->dev;
struct snd_soc_acpi_mach *mach = dev_get_platdata(&pdev->dev);
const struct dmi_system_id *dmi_id;
diff --git a/sound/soc/amd/acp/acp63.c b/sound/soc/amd/acp/acp63.c
index 1f15c96a9b94..10fb416b959d 100644
--- a/sound/soc/amd/acp/acp63.c
+++ b/sound/soc/amd/acp/acp63.c
@@ -21,7 +21,7 @@
#include <linux/pm_runtime.h>
#include <linux/pci.h>
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
#include "amd.h"
#include "acp-mach.h"
diff --git a/sound/soc/amd/acp/acp70.c b/sound/soc/amd/acp/acp70.c
index 217b717e9beb..b95e3949e70b 100644
--- a/sound/soc/amd/acp/acp70.c
+++ b/sound/soc/amd/acp/acp70.c
@@ -23,7 +23,7 @@
#include "amd.h"
#include "acp-mach.h"
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
#define DRV_NAME "acp_asoc_acp70"
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index e632f16c9102..3d9da93d22ee 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -315,6 +315,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
.driver_data = &acp6x_card,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "83HN"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
DMI_MATCH(DMI_PRODUCT_NAME, "83L3"),
}
},
@@ -360,7 +367,7 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "M5402RA"),
}
},
- {
+ {
.driver_data = &acp6x_card,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
diff --git a/sound/soc/apple/mca.c b/sound/soc/apple/mca.c
index b4f4696809dd..5dd24ab90d0f 100644
--- a/sound/soc/apple/mca.c
+++ b/sound/soc/apple/mca.c
@@ -464,6 +464,28 @@ err:
return -EINVAL;
}
+static int mca_fe_startup(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+{
+ struct mca_cluster *cl = mca_dai_to_cluster(dai);
+ unsigned int mask, nchannels;
+
+ if (cl->tdm_slots) {
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+ mask = cl->tdm_tx_mask;
+ else
+ mask = cl->tdm_rx_mask;
+
+ nchannels = hweight32(mask);
+ } else {
+ nchannels = 2;
+ }
+
+ return snd_pcm_hw_constraint_minmax(substream->runtime,
+ SNDRV_PCM_HW_PARAM_CHANNELS,
+ 1, nchannels);
+}
+
static int mca_fe_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask,
unsigned int rx_mask, int slots, int slot_width)
{
@@ -680,6 +702,7 @@ static int mca_fe_hw_params(struct snd_pcm_substream *substream,
}
static const struct snd_soc_dai_ops mca_fe_ops = {
+ .startup = mca_fe_startup,
.set_fmt = mca_fe_set_fmt,
.set_bclk_ratio = mca_set_bclk_ratio,
.set_tdm_slot = mca_fe_set_tdm_slot,
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 20f99cbee29b..126f897312d4 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -103,6 +103,7 @@ config SND_SOC_ALL_CODECS
imply SND_SOC_CS47L85
imply SND_SOC_CS47L90
imply SND_SOC_CS47L92
+ imply SND_SOC_CS48L32
imply SND_SOC_CS53L30
imply SND_SOC_CS530X_I2C
imply SND_SOC_CX20442
@@ -119,6 +120,8 @@ config SND_SOC_ALL_CODECS
imply SND_SOC_ES8326
imply SND_SOC_ES8328_SPI
imply SND_SOC_ES8328_I2C
+ imply SND_SOC_ES8375
+ imply SND_SOC_ES8389
imply SND_SOC_ES7134
imply SND_SOC_ES7241
imply SND_SOC_FRAMER
@@ -234,6 +237,7 @@ config SND_SOC_ALL_CODECS
imply SND_SOC_RT1318_SDW
imply SND_SOC_RT1320_SDW
imply SND_SOC_RT9120
+ imply SND_SOC_RT9123
imply SND_SOC_RTQ9128
imply SND_SOC_SDW_MOCKUP
imply SND_SOC_SGTL5000
@@ -260,6 +264,7 @@ config SND_SOC_ALL_CODECS
imply SND_SOC_TAS2770
imply SND_SOC_TAS2780
imply SND_SOC_TAS2781_COMLIB
+ imply SND_SOC_TAS2781_COMLIB_I2C
imply SND_SOC_TAS2781_FMWLIB
imply SND_SOC_TAS2781_I2C
imply SND_SOC_TAS5086
@@ -403,6 +408,7 @@ config SND_SOC_WM_ADSP
default y if SND_SOC_CS35L45_SPI=y
default y if SND_SOC_CS35L45_I2C=y
default y if SND_SOC_CS35L56=y
+ default y if SND_SOC_CS48L32=y
default m if SND_SOC_MADERA=m
default m if SND_SOC_CS47L24=m
default m if SND_SOC_WM5102=m
@@ -413,6 +419,7 @@ config SND_SOC_WM_ADSP
default m if SND_SOC_CS35L45_SPI=m
default m if SND_SOC_CS35L45_I2C=m
default m if SND_SOC_CS35L56=m
+ default m if SND_SOC_CS48L32=m
config SND_SOC_AB8500_CODEC
tristate
@@ -1048,6 +1055,13 @@ config SND_SOC_CS47L92
tristate
depends on MFD_CS47L92
+config SND_SOC_CS48L32
+ tristate "Cirrus Logic CS48L32 audio DSP"
+ depends on SPI_MASTER
+ select REGMAP_SPI
+ help
+ Build the codec driver for the Cirrus Logic CS48L32 audio DSP.
+
# Cirrus Logic Quad-Channel ADC
config SND_SOC_CS53L30
tristate "Cirrus Logic CS53L30 CODEC"
@@ -1199,6 +1213,14 @@ config SND_SOC_ES8328_SPI
depends on SPI_MASTER
select SND_SOC_ES8328
+config SND_SOC_ES8375
+ tristate "Everest Semi ES8375 CODEC"
+ depends on I2C
+
+config SND_SOC_ES8389
+ tristate "Everest Semi ES8389 CODEC"
+ depends on I2C
+
config SND_SOC_FRAMER
tristate "Framer codec"
depends on GENERIC_FRAMER
@@ -1822,6 +1844,20 @@ config SND_SOC_RT9120
Enable support for Richtek RT9120 20W, stereo, inductor-less,
high-efficiency Class-D audio amplifier.
+config SND_SOC_RT9123
+ tristate "Richtek RT9123 Mono Class-D Amplifier"
+ depends on I2C
+ select REGMAP_I2C
+ help
+ Enable support for the I2C control mode of Richtek RT9123 3.2W mono
+ Class-D audio amplifier.
+
+config SND_SOC_RT9123P
+ tristate "Richtek RT9123P Mono Class-D Amplifier"
+ help
+ Enable support for the HW control mode of Richtek RT9123P 3.2W mono
+ Class-D audio amplifier.
+
config SND_SOC_RTQ9128
tristate "Richtek RTQ9128 45W Digital Input Amplifier"
depends on I2C
@@ -1990,20 +2026,24 @@ config SND_SOC_TAS2780
digital input mono Class-D audio power amplifiers.
config SND_SOC_TAS2781_COMLIB
+ tristate
+
+config SND_SOC_TAS2781_COMLIB_I2C
depends on I2C
select CRC8
select REGMAP_I2C
tristate
config SND_SOC_TAS2781_FMWLIB
- depends on SND_SOC_TAS2781_COMLIB
+ select SND_SOC_TAS2781_COMLIB
+ select CRC8
tristate
default n
config SND_SOC_TAS2781_I2C
tristate "Texas Instruments TAS2781 speaker amplifier based on I2C"
depends on I2C
- select SND_SOC_TAS2781_COMLIB
+ select SND_SOC_TAS2781_COMLIB_I2C
select SND_SOC_TAS2781_FMWLIB
help
Enable support for Texas Instruments TAS2781 Smart Amplifier
@@ -2238,6 +2278,7 @@ config SND_SOC_WCD938X
tristate
depends on SOUNDWIRE || !SOUNDWIRE
select SND_SOC_WCD_CLASSH
+ select MULTIPLEXER
config SND_SOC_WCD938X_SDW
tristate "WCD9380/WCD9385 Codec - SDW"
@@ -2478,7 +2519,7 @@ config SND_SOC_WM8997
depends on MFD_WM8997 && MFD_ARIZONA
config SND_SOC_WM8998
- tristate
+ tristate "Wolfson Microelectronics WM8998 codec driver"
depends on MFD_WM8998 && MFD_ARIZONA
config SND_SOC_WM9081
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 10f726066b6c..6d7aa109ede7 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -111,6 +111,7 @@ snd-soc-cs47l35-y := cs47l35.o
snd-soc-cs47l85-y := cs47l85.o
snd-soc-cs47l90-y := cs47l90.o
snd-soc-cs47l92-y := cs47l92.o
+snd-soc-cs48l32-y := cs48l32.o cs48l32-tables.o
snd-soc-cs53l30-y := cs53l30.o
snd-soc-cs530x-y := cs530x.o
snd-soc-cs530x-i2c-y := cs530x-i2c.o
@@ -133,6 +134,8 @@ snd-soc-es8326-y := es8326.o
snd-soc-es8328-y := es8328.o
snd-soc-es8328-i2c-y := es8328-i2c.o
snd-soc-es8328-spi-y := es8328-spi.o
+snd-soc-es8375-y := es8375.o
+snd-soc-es8389-y := es8389.o
snd-soc-framer-y := framer-codec.o
snd-soc-gtm601-y := gtm601.o
snd-soc-hdac-hdmi-y := hdac_hdmi.o
@@ -270,6 +273,8 @@ snd-soc-rt715-sdca-y := rt715-sdca.o rt715-sdca-sdw.o
snd-soc-rt721-sdca-y := rt721-sdca.o rt721-sdca-sdw.o
snd-soc-rt722-sdca-y := rt722-sdca.o rt722-sdca-sdw.o
snd-soc-rt9120-y := rt9120.o
+snd-soc-rt9123-y := rt9123.o
+snd-soc-rt9123p-y := rt9123p.o
snd-soc-rtq9128-y := rtq9128.o
snd-soc-sdw-mockup-y := sdw-mockup.o
snd-soc-sgtl5000-y := sgtl5000.o
@@ -305,6 +310,7 @@ snd-soc-tas6424-y := tas6424.o
snd-soc-tda7419-y := tda7419.o
snd-soc-tas2770-y := tas2770.o
snd-soc-tas2781-comlib-y := tas2781-comlib.o
+snd-soc-tas2781-comlib-i2c-y := tas2781-comlib-i2c.o
snd-soc-tas2781-fmwlib-y := tas2781-fmwlib.o
snd-soc-tas2781-i2c-y := tas2781-i2c.o
snd-soc-tfa9879-y := tfa9879.o
@@ -529,6 +535,7 @@ obj-$(CONFIG_SND_SOC_CS47L35) += snd-soc-cs47l35.o
obj-$(CONFIG_SND_SOC_CS47L85) += snd-soc-cs47l85.o
obj-$(CONFIG_SND_SOC_CS47L90) += snd-soc-cs47l90.o
obj-$(CONFIG_SND_SOC_CS47L92) += snd-soc-cs47l92.o
+obj-$(CONFIG_SND_SOC_CS48L32) += snd-soc-cs48l32.o
obj-$(CONFIG_SND_SOC_CS53L30) += snd-soc-cs53l30.o
obj-$(CONFIG_SND_SOC_CS530X) += snd-soc-cs530x.o
obj-$(CONFIG_SND_SOC_CS530X_I2C) += snd-soc-cs530x-i2c.o
@@ -551,6 +558,8 @@ obj-$(CONFIG_SND_SOC_ES8326) += snd-soc-es8326.o
obj-$(CONFIG_SND_SOC_ES8328) += snd-soc-es8328.o
obj-$(CONFIG_SND_SOC_ES8328_I2C)+= snd-soc-es8328-i2c.o
obj-$(CONFIG_SND_SOC_ES8328_SPI)+= snd-soc-es8328-spi.o
+obj-$(CONFIG_SND_SOC_ES8375) += snd-soc-es8375.o
+obj-$(CONFIG_SND_SOC_ES8389) += snd-soc-es8389.o
obj-$(CONFIG_SND_SOC_FRAMER) += snd-soc-framer.o
obj-$(CONFIG_SND_SOC_GTM601) += snd-soc-gtm601.o
obj-$(CONFIG_SND_SOC_HDAC_HDMI) += snd-soc-hdac-hdmi.o
@@ -684,6 +693,8 @@ obj-$(CONFIG_SND_SOC_RT715_SDCA_SDW) += snd-soc-rt715-sdca.o
obj-$(CONFIG_SND_SOC_RT721_SDCA_SDW) += snd-soc-rt721-sdca.o
obj-$(CONFIG_SND_SOC_RT722_SDCA_SDW) += snd-soc-rt722-sdca.o
obj-$(CONFIG_SND_SOC_RT9120) += snd-soc-rt9120.o
+obj-$(CONFIG_SND_SOC_RT9123) += snd-soc-rt9123.o
+obj-$(CONFIG_SND_SOC_RT9123P) += snd-soc-rt9123p.o
obj-$(CONFIG_SND_SOC_RTQ9128) += snd-soc-rtq9128.o
obj-$(CONFIG_SND_SOC_SDW_MOCKUP) += snd-soc-sdw-mockup.o
obj-$(CONFIG_SND_SOC_SGTL5000) += snd-soc-sgtl5000.o
@@ -713,6 +724,7 @@ obj-$(CONFIG_SND_SOC_TAS2562) += snd-soc-tas2562.o
obj-$(CONFIG_SND_SOC_TAS2764) += snd-soc-tas2764.o
obj-$(CONFIG_SND_SOC_TAS2780) += snd-soc-tas2780.o
obj-$(CONFIG_SND_SOC_TAS2781_COMLIB) += snd-soc-tas2781-comlib.o
+obj-$(CONFIG_SND_SOC_TAS2781_COMLIB_I2C) += snd-soc-tas2781-comlib-i2c.o
obj-$(CONFIG_SND_SOC_TAS2781_FMWLIB) += snd-soc-tas2781-fmwlib.o
obj-$(CONFIG_SND_SOC_TAS2781_I2C) += snd-soc-tas2781-i2c.o
obj-$(CONFIG_SND_SOC_TAS5086) += snd-soc-tas5086.o
@@ -841,4 +853,4 @@ obj-$(CONFIG_SND_SOC_LPASS_RX_MACRO) += snd-soc-lpass-rx-macro.o
obj-$(CONFIG_SND_SOC_LPASS_TX_MACRO) += snd-soc-lpass-tx-macro.o
# Mux
-obj-$(CONFIG_SND_SOC_SIMPLE_MUX) += snd-soc-simple-mux.o
+obj-$(CONFIG_SND_SOC_SIMPLE_MUX) += snd-soc-simple-mux.o \ No newline at end of file
diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c
index 0e013edfe63d..d8444a083af2 100644
--- a/sound/soc/codecs/ac97.c
+++ b/sound/soc/codecs/ac97.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <sound/core.h>
#include <sound/pcm.h>
#include <sound/ac97_codec.h>
@@ -127,9 +128,18 @@ static int ac97_probe(struct platform_device *pdev)
&soc_component_dev_ac97, &ac97_dai, 1);
}
+#ifdef CONFIG_OF
+static const struct of_device_id ac97_codec_of_match[] = {
+ { .compatible = "realtek,alc203", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, ac97_codec_of_match);
+#endif
+
static struct platform_driver ac97_codec_driver = {
.driver = {
.name = "ac97-codec",
+ .of_match_table = of_match_ptr(ac97_codec_of_match),
},
.probe = ac97_probe,
diff --git a/sound/soc/codecs/adau7118.c b/sound/soc/codecs/adau7118.c
index abc4764697a5..14259807c872 100644
--- a/sound/soc/codecs/adau7118.c
+++ b/sound/soc/codecs/adau7118.c
@@ -169,6 +169,12 @@ static int adau7118_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
case SND_SOC_DAIFMT_RIGHT_J:
st->right_j = true;
break;
+ case SND_SOC_DAIFMT_DSP_A:
+ ret = snd_soc_component_update_bits(dai->component,
+ ADAU7118_REG_SPT_CTRL1,
+ ADAU7118_DATA_FMT_MASK,
+ ADAU7118_DATA_FMT(1));
+ break;
default:
dev_err(st->dev, "Invalid format %d",
fmt & SND_SOC_DAIFMT_FORMAT_MASK);
diff --git a/sound/soc/codecs/ak4458.c b/sound/soc/codecs/ak4458.c
index 5f3a68dfe7bd..57cf601d3df3 100644
--- a/sound/soc/codecs/ak4458.c
+++ b/sound/soc/codecs/ak4458.c
@@ -586,13 +586,9 @@ static const struct snd_pcm_hw_constraint_list ak4458_rate_constraints = {
static int ak4458_startup(struct snd_pcm_substream *substream,
struct snd_soc_dai *dai)
{
- int ret;
-
- ret = snd_pcm_hw_constraint_list(substream->runtime, 0,
- SNDRV_PCM_HW_PARAM_RATE,
- &ak4458_rate_constraints);
-
- return ret;
+ return snd_pcm_hw_constraint_list(substream->runtime, 0,
+ SNDRV_PCM_HW_PARAM_RATE,
+ &ak4458_rate_constraints);
}
static const struct snd_soc_dai_ops ak4458_dai_ops = {
diff --git a/sound/soc/codecs/ak5386.c b/sound/soc/codecs/ak5386.c
index 21a44476f48d..6525d50b7ab2 100644
--- a/sound/soc/codecs/ak5386.c
+++ b/sound/soc/codecs/ak5386.c
@@ -6,11 +6,13 @@
* (c) 2013 Daniel Mack <zonque@gmail.com>
*/
+#include <linux/device.h>
+#include <linux/dev_printk.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
#include <sound/soc.h>
#include <sound/pcm.h>
#include <sound/initval.h>
@@ -20,7 +22,7 @@ static const char * const supply_names[] = {
};
struct ak5386_priv {
- int reset_gpio;
+ struct gpio_desc *reset_gpio;
struct regulator_bulk_data supplies[ARRAY_SIZE(supply_names)];
};
@@ -110,8 +112,7 @@ static int ak5386_hw_params(struct snd_pcm_substream *substream,
* the AK5386 in power-down mode (PDN pin = “Lâ€).
*/
- if (gpio_is_valid(priv->reset_gpio))
- gpio_set_value(priv->reset_gpio, 1);
+ gpiod_set_value(priv->reset_gpio, 1);
return 0;
}
@@ -122,8 +123,7 @@ static int ak5386_hw_free(struct snd_pcm_substream *substream,
struct snd_soc_component *component = dai->component;
struct ak5386_priv *priv = snd_soc_component_get_drvdata(component);
- if (gpio_is_valid(priv->reset_gpio))
- gpio_set_value(priv->reset_gpio, 0);
+ gpiod_set_value(priv->reset_gpio, 0);
return 0;
}
@@ -177,14 +177,12 @@ static int ak5386_probe(struct platform_device *pdev)
if (ret < 0)
return ret;
- priv->reset_gpio = of_get_named_gpio(dev->of_node,
- "reset-gpio", 0);
+ priv->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(priv->reset_gpio))
+ return dev_err_probe(dev, PTR_ERR(priv->reset_gpio),
+ "Failed to get AK5386 reset GPIO\n");
- if (gpio_is_valid(priv->reset_gpio))
- if (devm_gpio_request_one(dev, priv->reset_gpio,
- GPIOF_OUT_INIT_LOW,
- "AK5386 Reset"))
- priv->reset_gpio = -EINVAL;
+ gpiod_set_consumer_name(priv->reset_gpio, "AK5386 Reset");
return devm_snd_soc_register_component(dev, &soc_component_ak5386,
&ak5386_dai, 1);
diff --git a/sound/soc/codecs/aw88166.c b/sound/soc/codecs/aw88166.c
index 6c50c4a18b6a..4f76ebe11cc7 100644
--- a/sound/soc/codecs/aw88166.c
+++ b/sound/soc/codecs/aw88166.c
@@ -11,6 +11,7 @@
#include <linux/firmware.h>
#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
+#include <linux/minmax.h>
#include <linux/regmap.h>
#include <sound/soc.h>
#include "aw88166.h"
@@ -909,11 +910,7 @@ static int aw_dev_dsp_update_container(struct aw_device *aw_dev,
goto error_operation;
for (i = 0; i < len; i += AW88166_MAX_RAM_WRITE_BYTE_SIZE) {
- if ((len - i) < AW88166_MAX_RAM_WRITE_BYTE_SIZE)
- tmp_len = len - i;
- else
- tmp_len = AW88166_MAX_RAM_WRITE_BYTE_SIZE;
-
+ tmp_len = min(len - i, AW88166_MAX_RAM_WRITE_BYTE_SIZE);
ret = regmap_raw_write(aw_dev->regmap, AW88166_DSPMDAT_REG,
&data[i], tmp_len);
if (ret)
diff --git a/sound/soc/codecs/aw88395/aw88395_device.c b/sound/soc/codecs/aw88395/aw88395_device.c
index b7ea8be0d0cb..e1430940015d 100644
--- a/sound/soc/codecs/aw88395/aw88395_device.c
+++ b/sound/soc/codecs/aw88395/aw88395_device.c
@@ -10,6 +10,7 @@
#include <linux/crc32.h>
#include <linux/i2c.h>
+#include <linux/minmax.h>
#include <linux/regmap.h>
#include "aw88395_device.h"
#include "aw88395_reg.h"
@@ -1114,11 +1115,7 @@ static int aw_dev_dsp_update_container(struct aw_device *aw_dev,
goto error_operation;
for (i = 0; i < len; i += AW88395_MAX_RAM_WRITE_BYTE_SIZE) {
- if ((len - i) < AW88395_MAX_RAM_WRITE_BYTE_SIZE)
- tmp_len = len - i;
- else
- tmp_len = AW88395_MAX_RAM_WRITE_BYTE_SIZE;
-
+ tmp_len = min(len - i, AW88395_MAX_RAM_WRITE_BYTE_SIZE);
ret = regmap_raw_write(aw_dev->regmap, AW88395_DSPMDAT_REG,
&data[i], tmp_len);
if (ret)
diff --git a/sound/soc/codecs/aw88399.c b/sound/soc/codecs/aw88399.c
index ee3cc2a95f85..4b90133e5ab4 100644
--- a/sound/soc/codecs/aw88399.c
+++ b/sound/soc/codecs/aw88399.c
@@ -11,6 +11,7 @@
#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
#include <linux/firmware.h>
+#include <linux/minmax.h>
#include <linux/regmap.h>
#include <sound/soc.h>
#include "aw88399.h"
@@ -872,11 +873,7 @@ static int aw_dev_dsp_update_container(struct aw_device *aw_dev,
goto error_operation;
for (i = 0; i < len; i += AW88399_MAX_RAM_WRITE_BYTE_SIZE) {
- if ((len - i) < AW88399_MAX_RAM_WRITE_BYTE_SIZE)
- tmp_len = len - i;
- else
- tmp_len = AW88399_MAX_RAM_WRITE_BYTE_SIZE;
-
+ tmp_len = min(len - i, AW88399_MAX_RAM_WRITE_BYTE_SIZE);
ret = regmap_raw_write(aw_dev->regmap, AW88399_DSPMDAT_REG,
&data[i], tmp_len);
if (ret)
diff --git a/sound/soc/codecs/cs-amp-lib-test.c b/sound/soc/codecs/cs-amp-lib-test.c
index 45626f99a417..f53650128fc3 100644
--- a/sound/soc/codecs/cs-amp-lib-test.c
+++ b/sound/soc/codecs/cs-amp-lib-test.c
@@ -5,19 +5,25 @@
// Copyright (C) 2024 Cirrus Logic, Inc. and
// Cirrus Logic International Semiconductor Ltd.
+#include <kunit/resource.h>
#include <kunit/test.h>
#include <kunit/static_stub.h>
+#include <linux/device/faux.h>
#include <linux/firmware/cirrus/cs_dsp.h>
#include <linux/firmware/cirrus/wmfw.h>
#include <linux/gpio/driver.h>
#include <linux/list.h>
#include <linux/module.h>
+#include <linux/overflow.h>
#include <linux/platform_device.h>
#include <linux/random.h>
#include <sound/cs-amp-lib.h>
+KUNIT_DEFINE_ACTION_WRAPPER(faux_device_destroy_wrapper, faux_device_destroy,
+ struct faux_device *)
+
struct cs_amp_lib_test_priv {
- struct platform_device amp_pdev;
+ struct faux_device *amp_dev;
struct cirrus_amp_efi_data *cal_blob;
struct list_head ctl_write_list;
@@ -40,8 +46,7 @@ static void cs_amp_lib_test_init_dummy_cal_blob(struct kunit *test, int num_amps
unsigned int blob_size;
int i;
- blob_size = offsetof(struct cirrus_amp_efi_data, data) +
- sizeof(struct cirrus_amp_cal_data) * num_amps;
+ blob_size = struct_size(priv->cal_blob, data, num_amps);
priv->cal_blob = kunit_kzalloc(test, blob_size, GFP_KERNEL);
KUNIT_ASSERT_NOT_NULL(test, priv->cal_blob);
@@ -49,7 +54,7 @@ static void cs_amp_lib_test_init_dummy_cal_blob(struct kunit *test, int num_amps
priv->cal_blob->size = blob_size;
priv->cal_blob->count = num_amps;
- get_random_bytes(priv->cal_blob->data, sizeof(struct cirrus_amp_cal_data) * num_amps);
+ get_random_bytes(priv->cal_blob->data, flex_array_size(priv->cal_blob, data, num_amps));
/* Ensure all timestamps are non-zero to mark the entry valid. */
for (i = 0; i < num_amps; i++)
@@ -99,7 +104,7 @@ static void cs_amp_lib_test_cal_data_too_short_test(struct kunit *test)
cs_amp_test_hooks->get_efi_variable,
cs_amp_lib_test_get_efi_variable_nohead);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, 0, 0, &result_data);
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, 0, 0, &result_data);
KUNIT_EXPECT_EQ(test, ret, -EOVERFLOW);
kunit_deactivate_static_stub(test, cs_amp_test_hooks->get_efi_variable);
@@ -142,7 +147,7 @@ static void cs_amp_lib_test_cal_count_too_big_test(struct kunit *test)
cs_amp_test_hooks->get_efi_variable,
cs_amp_lib_test_get_efi_variable_bad_count);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, 0, 0, &result_data);
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, 0, 0, &result_data);
KUNIT_EXPECT_EQ(test, ret, -EOVERFLOW);
kunit_deactivate_static_stub(test, cs_amp_test_hooks->get_efi_variable);
@@ -169,7 +174,7 @@ static void cs_amp_lib_test_no_cal_data_test(struct kunit *test)
cs_amp_test_hooks->get_efi_variable,
cs_amp_lib_test_get_efi_variable_none);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, 0, 0, &result_data);
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, 0, 0, &result_data);
KUNIT_EXPECT_EQ(test, ret, -ENOENT);
kunit_deactivate_static_stub(test, cs_amp_test_hooks->get_efi_variable);
@@ -223,7 +228,7 @@ static void cs_amp_lib_test_get_efi_cal_by_uid_test(struct kunit *test)
cs_amp_lib_test_get_efi_variable);
target_uid = cs_amp_lib_test_get_target_uid(test);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, target_uid, -1, &result_data);
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, target_uid, -1, &result_data);
KUNIT_EXPECT_EQ(test, ret, 0);
kunit_deactivate_static_stub(test, cs_amp_test_hooks->get_efi_variable);
@@ -257,7 +262,7 @@ static void cs_amp_lib_test_get_efi_cal_by_index_unchecked_test(struct kunit *te
cs_amp_test_hooks->get_efi_variable,
cs_amp_lib_test_get_efi_variable);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, 0,
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, 0,
param->amp_index, &result_data);
KUNIT_EXPECT_EQ(test, ret, 0);
@@ -292,7 +297,7 @@ static void cs_amp_lib_test_get_efi_cal_by_index_checked_test(struct kunit *test
cs_amp_lib_test_get_efi_variable);
target_uid = cs_amp_lib_test_get_target_uid(test);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, target_uid,
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, target_uid,
param->amp_index, &result_data);
KUNIT_EXPECT_EQ(test, ret, 0);
@@ -331,7 +336,7 @@ static void cs_amp_lib_test_get_efi_cal_by_index_uid_mismatch_test(struct kunit
/* Get a target UID that won't match the entry */
target_uid = ~cs_amp_lib_test_get_target_uid(test);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, target_uid,
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, target_uid,
param->amp_index, &result_data);
KUNIT_EXPECT_EQ(test, ret, -ENOENT);
@@ -363,7 +368,7 @@ static void cs_amp_lib_test_get_efi_cal_by_index_fallback_test(struct kunit *tes
cs_amp_test_hooks->get_efi_variable,
cs_amp_lib_test_get_efi_variable);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, bad_target_uid,
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, bad_target_uid,
param->amp_index, &result_data);
KUNIT_EXPECT_EQ(test, ret, 0);
@@ -405,7 +410,7 @@ static void cs_amp_lib_test_get_efi_cal_uid_not_found_noindex_test(struct kunit
cs_amp_test_hooks->get_efi_variable,
cs_amp_lib_test_get_efi_variable);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, bad_target_uid, -1,
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, bad_target_uid, -1,
&result_data);
KUNIT_EXPECT_EQ(test, ret, -ENOENT);
@@ -436,7 +441,7 @@ static void cs_amp_lib_test_get_efi_cal_uid_not_found_index_not_found_test(struc
cs_amp_test_hooks->get_efi_variable,
cs_amp_lib_test_get_efi_variable);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, bad_target_uid, 99,
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, bad_target_uid, 99,
&result_data);
KUNIT_EXPECT_EQ(test, ret, -ENOENT);
@@ -460,7 +465,7 @@ static void cs_amp_lib_test_get_efi_cal_no_uid_index_not_found_test(struct kunit
cs_amp_test_hooks->get_efi_variable,
cs_amp_lib_test_get_efi_variable);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, 0, 99, &result_data);
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, 0, 99, &result_data);
KUNIT_EXPECT_EQ(test, ret, -ENOENT);
kunit_deactivate_static_stub(test, cs_amp_test_hooks->get_efi_variable);
@@ -480,7 +485,7 @@ static void cs_amp_lib_test_get_efi_cal_no_uid_no_index_test(struct kunit *test)
cs_amp_test_hooks->get_efi_variable,
cs_amp_lib_test_get_efi_variable);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, 0, -1, &result_data);
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, 0, -1, &result_data);
KUNIT_EXPECT_EQ(test, ret, -ENOENT);
kunit_deactivate_static_stub(test, cs_amp_test_hooks->get_efi_variable);
@@ -509,7 +514,7 @@ static void cs_amp_lib_test_get_efi_cal_zero_not_matched_test(struct kunit *test
cs_amp_test_hooks->get_efi_variable,
cs_amp_lib_test_get_efi_variable);
- ret = cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev, 0, -1, &result_data);
+ ret = cs_amp_get_efi_calibration_data(&priv->amp_dev->dev, 0, -1, &result_data);
KUNIT_EXPECT_EQ(test, ret, -ENOENT);
kunit_deactivate_static_stub(test, cs_amp_test_hooks->get_efi_variable);
@@ -543,14 +548,14 @@ static void cs_amp_lib_test_get_efi_cal_empty_entry_test(struct kunit *test)
/* Lookup by UID should not find it */
KUNIT_EXPECT_EQ(test,
- cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev,
+ cs_amp_get_efi_calibration_data(&priv->amp_dev->dev,
uid, -1,
&result_data),
-ENOENT);
/* Get by index should ignore it */
KUNIT_EXPECT_EQ(test,
- cs_amp_get_efi_calibration_data(&priv->amp_pdev.dev,
+ cs_amp_get_efi_calibration_data(&priv->amp_dev->dev,
0, 2,
&result_data),
-ENOENT);
@@ -600,7 +605,7 @@ static void cs_amp_lib_test_write_cal_data_test(struct kunit *test)
dsp = kunit_kzalloc(test, sizeof(*dsp), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dsp);
- dsp->dev = &priv->amp_pdev.dev;
+ dsp->dev = &priv->amp_dev->dev;
get_random_bytes(&data, sizeof(data));
@@ -637,14 +642,9 @@ static void cs_amp_lib_test_write_cal_data_test(struct kunit *test)
KUNIT_EXPECT_EQ(test, entry->value, data.calStatus);
}
-static void cs_amp_lib_test_dev_release(struct device *dev)
-{
-}
-
static int cs_amp_lib_test_case_init(struct kunit *test)
{
struct cs_amp_lib_test_priv *priv;
- int ret;
KUNIT_ASSERT_NOT_NULL(test, cs_amp_test_hooks);
@@ -656,23 +656,16 @@ static int cs_amp_lib_test_case_init(struct kunit *test)
INIT_LIST_HEAD(&priv->ctl_write_list);
/* Create dummy amp driver dev */
- priv->amp_pdev.name = "cs_amp_lib_test_drv";
- priv->amp_pdev.id = -1;
- priv->amp_pdev.dev.release = cs_amp_lib_test_dev_release;
- ret = platform_device_register(&priv->amp_pdev);
- KUNIT_ASSERT_GE_MSG(test, ret, 0, "Failed to register amp platform device\n");
+ priv->amp_dev = faux_device_create("cs_amp_lib_test_drv", NULL, NULL);
+ KUNIT_ASSERT_NOT_NULL(test, priv->amp_dev);
+ KUNIT_ASSERT_EQ(test, 0,
+ kunit_add_action_or_reset(test,
+ faux_device_destroy_wrapper,
+ priv->amp_dev));
return 0;
}
-static void cs_amp_lib_test_case_exit(struct kunit *test)
-{
- struct cs_amp_lib_test_priv *priv = test->priv;
-
- if (priv->amp_pdev.name)
- platform_device_unregister(&priv->amp_pdev);
-}
-
static const struct cs_amp_lib_test_param cs_amp_lib_test_get_cal_param_cases[] = {
{ .num_amps = 2, .amp_index = 0 },
{ .num_amps = 2, .amp_index = 1 },
@@ -750,7 +743,6 @@ static struct kunit_case cs_amp_lib_test_cases[] = {
static struct kunit_suite cs_amp_lib_test_suite = {
.name = "snd-soc-cs-amp-lib-test",
.init = cs_amp_lib_test_case_init,
- .exit = cs_amp_lib_test_case_exit,
.test_cases = cs_amp_lib_test_cases,
};
diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c
index c677868c5d5f..808e67c90f7c 100644
--- a/sound/soc/codecs/cs-amp-lib.c
+++ b/sound/soc/codecs/cs-amp-lib.c
@@ -11,6 +11,7 @@
#include <linux/efi.h>
#include <linux/firmware/cirrus/cs_dsp.h>
#include <linux/module.h>
+#include <linux/overflow.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <sound/cs-amp-lib.h>
@@ -147,7 +148,7 @@ static struct cirrus_amp_efi_data *cs_amp_get_cal_efi_buffer(struct device *dev)
dev_dbg(dev, "Calibration: Size=%d, Amp Count=%d\n", efi_data->size, efi_data->count);
if ((efi_data->count > 128) ||
- offsetof(struct cirrus_amp_efi_data, data[efi_data->count]) > data_size) {
+ struct_size(efi_data, data, efi_data->count) > data_size) {
dev_err(dev, "EFI cal variable truncated\n");
ret = -EOVERFLOW;
goto err;
diff --git a/sound/soc/codecs/cs35l56-i2c.c b/sound/soc/codecs/cs35l56-i2c.c
index 8a518df1e16e..073f1796ae29 100644
--- a/sound/soc/codecs/cs35l56-i2c.c
+++ b/sound/soc/codecs/cs35l56-i2c.c
@@ -17,9 +17,10 @@
static int cs35l56_i2c_probe(struct i2c_client *client)
{
+ unsigned int id = (u32)(uintptr_t)i2c_get_match_data(client);
struct cs35l56_private *cs35l56;
struct device *dev = &client->dev;
- const struct regmap_config *regmap_config = &cs35l56_regmap_i2c;
+ const struct regmap_config *regmap_config;
int ret;
cs35l56 = devm_kzalloc(dev, sizeof(struct cs35l56_private), GFP_KERNEL);
@@ -30,6 +31,20 @@ static int cs35l56_i2c_probe(struct i2c_client *client)
cs35l56->base.can_hibernate = true;
i2c_set_clientdata(client, cs35l56);
+
+ switch (id) {
+ case 0x3556:
+ regmap_config = &cs35l56_regmap_i2c;
+ cs35l56->base.fw_reg = &cs35l56_fw_reg;
+ break;
+ case 0x3563:
+ regmap_config = &cs35l63_regmap_i2c;
+ cs35l56->base.fw_reg = &cs35l63_fw_reg;
+ break;
+ default:
+ return -ENODEV;
+ }
+
cs35l56->base.regmap = devm_regmap_init_i2c(client, regmap_config);
if (IS_ERR(cs35l56->base.regmap)) {
ret = PTR_ERR(cs35l56->base.regmap);
@@ -57,14 +72,16 @@ static void cs35l56_i2c_remove(struct i2c_client *client)
}
static const struct i2c_device_id cs35l56_id_i2c[] = {
- { "cs35l56" },
+ { "cs35l56", 0x3556 },
+ { "cs35l63", 0x3563 },
{}
};
MODULE_DEVICE_TABLE(i2c, cs35l56_id_i2c);
#ifdef CONFIG_ACPI
static const struct acpi_device_id cs35l56_asoc_acpi_match[] = {
- { "CSC355C", 0 },
+ { "CSC355C", 0x3556 },
+ { "CSC356C", 0x3563 },
{},
};
MODULE_DEVICE_TABLE(acpi, cs35l56_asoc_acpi_match);
diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c
index 3f91cb3f9ae7..13f602f51bf3 100644
--- a/sound/soc/codecs/cs35l56-sdw.c
+++ b/sound/soc/codecs/cs35l56-sdw.c
@@ -393,6 +393,74 @@ static int cs35l56_sdw_update_status(struct sdw_slave *peripheral,
return 0;
}
+static int cs35l63_sdw_kick_divider(struct cs35l56_private *cs35l56,
+ struct sdw_slave *peripheral)
+{
+ unsigned int curr_scale_reg, next_scale_reg;
+ int curr_scale, next_scale, ret;
+
+ if (!cs35l56->base.init_done)
+ return 0;
+
+ if (peripheral->bus->params.curr_bank) {
+ curr_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B1;
+ next_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B0;
+ } else {
+ curr_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B0;
+ next_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B1;
+ }
+
+ /*
+ * Current clock scale value must be different to new value.
+ * Modify current to guarantee this. If next still has the dummy
+ * value we wrote when it was current, the core code has not set
+ * a new scale so restore its original good value
+ */
+ curr_scale = sdw_read_no_pm(peripheral, curr_scale_reg);
+ if (curr_scale < 0) {
+ dev_err(cs35l56->base.dev, "Failed to read current clock scale: %d\n", curr_scale);
+ return curr_scale;
+ }
+
+ next_scale = sdw_read_no_pm(peripheral, next_scale_reg);
+ if (next_scale < 0) {
+ dev_err(cs35l56->base.dev, "Failed to read next clock scale: %d\n", next_scale);
+ return next_scale;
+ }
+
+ if (next_scale == CS35L56_SDW_INVALID_BUS_SCALE) {
+ next_scale = cs35l56->old_sdw_clock_scale;
+ ret = sdw_write_no_pm(peripheral, next_scale_reg, next_scale);
+ if (ret < 0) {
+ dev_err(cs35l56->base.dev, "Failed to modify current clock scale: %d\n",
+ ret);
+ return ret;
+ }
+ }
+
+ cs35l56->old_sdw_clock_scale = curr_scale;
+ ret = sdw_write_no_pm(peripheral, curr_scale_reg, CS35L56_SDW_INVALID_BUS_SCALE);
+ if (ret < 0) {
+ dev_err(cs35l56->base.dev, "Failed to modify current clock scale: %d\n", ret);
+ return ret;
+ }
+
+ dev_dbg(cs35l56->base.dev, "Next bus scale: %#x\n", next_scale);
+
+ return 0;
+}
+
+static int cs35l56_sdw_bus_config(struct sdw_slave *peripheral,
+ struct sdw_bus_params *params)
+{
+ struct cs35l56_private *cs35l56 = dev_get_drvdata(&peripheral->dev);
+
+ if ((cs35l56->base.type == 0x63) && (cs35l56->base.rev < 0xa1))
+ return cs35l63_sdw_kick_divider(cs35l56, peripheral);
+
+ return 0;
+}
+
static int __maybe_unused cs35l56_sdw_clk_stop(struct sdw_slave *peripheral,
enum sdw_clk_stop_mode mode,
enum sdw_clk_stop_type type)
@@ -408,6 +476,7 @@ static const struct sdw_slave_ops cs35l56_sdw_ops = {
.read_prop = cs35l56_sdw_read_prop,
.interrupt_callback = cs35l56_sdw_interrupt,
.update_status = cs35l56_sdw_update_status,
+ .bus_config = cs35l56_sdw_bus_config,
#ifdef DEBUG
.clk_stop = cs35l56_sdw_clk_stop,
#endif
@@ -509,6 +578,7 @@ static int cs35l56_sdw_probe(struct sdw_slave *peripheral, const struct sdw_devi
{
struct device *dev = &peripheral->dev;
struct cs35l56_private *cs35l56;
+ const struct regmap_config *regmap_config;
int ret;
cs35l56 = devm_kzalloc(dev, sizeof(*cs35l56), GFP_KERNEL);
@@ -521,8 +591,22 @@ static int cs35l56_sdw_probe(struct sdw_slave *peripheral, const struct sdw_devi
dev_set_drvdata(dev, cs35l56);
+ switch ((unsigned int)id->driver_data) {
+ case 0x3556:
+ case 0x3557:
+ regmap_config = &cs35l56_regmap_sdw;
+ cs35l56->base.fw_reg = &cs35l56_fw_reg;
+ break;
+ case 0x3563:
+ regmap_config = &cs35l63_regmap_sdw;
+ cs35l56->base.fw_reg = &cs35l63_fw_reg;
+ break;
+ default:
+ return -ENODEV;
+ }
+
cs35l56->base.regmap = devm_regmap_init(dev, &cs35l56_regmap_bus_sdw,
- peripheral, &cs35l56_regmap_sdw);
+ peripheral, regmap_config);
if (IS_ERR(cs35l56->base.regmap)) {
ret = PTR_ERR(cs35l56->base.regmap);
return dev_err_probe(dev, ret, "Failed to allocate register map\n");
@@ -562,8 +646,9 @@ static const struct dev_pm_ops cs35l56_sdw_pm = {
};
static const struct sdw_device_id cs35l56_sdw_id[] = {
- SDW_SLAVE_ENTRY(0x01FA, 0x3556, 0),
- SDW_SLAVE_ENTRY(0x01FA, 0x3557, 0),
+ SDW_SLAVE_ENTRY(0x01FA, 0x3556, 0x3556),
+ SDW_SLAVE_ENTRY(0x01FA, 0x3557, 0x3557),
+ SDW_SLAVE_ENTRY(0x01FA, 0x3563, 0x3563),
{},
};
MODULE_DEVICE_TABLE(sdw, cs35l56_sdw_id);
diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c
index e28bfefa72f3..d0831d609584 100644
--- a/sound/soc/codecs/cs35l56-shared.c
+++ b/sound/soc/codecs/cs35l56-shared.c
@@ -38,17 +38,48 @@ static const struct reg_sequence cs35l56_patch[] = {
{ CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 },
{ CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 },
{ CS35L56_IRQ1_MASK_18, 0x1f7df0ff },
+};
+static const struct reg_sequence cs35l56_patch_fw[] = {
/* These are not reset by a soft-reset, so patch to defaults. */
{ CS35L56_MAIN_RENDER_USER_MUTE, 0x00000000 },
{ CS35L56_MAIN_RENDER_USER_VOLUME, 0x00000000 },
{ CS35L56_MAIN_POSTURE_NUMBER, 0x00000000 },
};
+static const struct reg_sequence cs35l63_patch_fw[] = {
+ /* These are not reset by a soft-reset, so patch to defaults. */
+ { CS35L63_MAIN_RENDER_USER_MUTE, 0x00000000 },
+ { CS35L63_MAIN_RENDER_USER_VOLUME, 0x00000000 },
+ { CS35L63_MAIN_POSTURE_NUMBER, 0x00000000 },
+};
+
int cs35l56_set_patch(struct cs35l56_base *cs35l56_base)
{
- return regmap_register_patch(cs35l56_base->regmap, cs35l56_patch,
+ int ret;
+
+ ret = regmap_register_patch(cs35l56_base->regmap, cs35l56_patch,
ARRAY_SIZE(cs35l56_patch));
+ if (ret)
+ return ret;
+
+
+ switch (cs35l56_base->type) {
+ case 0x54:
+ case 0x56:
+ case 0x57:
+ ret = regmap_register_patch(cs35l56_base->regmap, cs35l56_patch_fw,
+ ARRAY_SIZE(cs35l56_patch_fw));
+ break;
+ case 0x63:
+ ret = regmap_register_patch(cs35l56_base->regmap, cs35l63_patch_fw,
+ ARRAY_SIZE(cs35l63_patch_fw));
+ break;
+ default:
+ break;
+ }
+
+ return ret;
}
EXPORT_SYMBOL_NS_GPL(cs35l56_set_patch, "SND_SOC_CS35L56_SHARED");
@@ -82,6 +113,36 @@ static const struct reg_default cs35l56_reg_defaults[] = {
{ CS35L56_MAIN_POSTURE_NUMBER, 0x00000000 },
};
+static const struct reg_default cs35l63_reg_defaults[] = {
+ /* no defaults for OTP_MEM - first read populates cache */
+
+ { CS35L56_ASP1_ENABLES1, 0x00000000 },
+ { CS35L56_ASP1_CONTROL1, 0x00000028 },
+ { CS35L56_ASP1_CONTROL2, 0x18180200 },
+ { CS35L56_ASP1_CONTROL3, 0x00000002 },
+ { CS35L56_ASP1_FRAME_CONTROL1, 0x03020100 },
+ { CS35L56_ASP1_FRAME_CONTROL5, 0x00020100 },
+ { CS35L56_ASP1_DATA_CONTROL1, 0x00000018 },
+ { CS35L56_ASP1_DATA_CONTROL5, 0x00000018 },
+ { CS35L56_ASP1TX1_INPUT, 0x00000000 },
+ { CS35L56_ASP1TX2_INPUT, 0x00000000 },
+ { CS35L56_ASP1TX3_INPUT, 0x00000000 },
+ { CS35L56_ASP1TX4_INPUT, 0x00000000 },
+ { CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 },
+ { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 },
+ { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 },
+ { CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 },
+ { CS35L56_IRQ1_MASK_1, 0x8003ffff },
+ { CS35L56_IRQ1_MASK_2, 0xffff7fff },
+ { CS35L56_IRQ1_MASK_4, 0xe0ffffff },
+ { CS35L56_IRQ1_MASK_8, 0x8c000fff },
+ { CS35L56_IRQ1_MASK_18, 0x0760f000 },
+ { CS35L56_IRQ1_MASK_20, 0x15c00000 },
+ { CS35L63_MAIN_RENDER_USER_MUTE, 0x00000000 },
+ { CS35L63_MAIN_RENDER_USER_VOLUME, 0x00000000 },
+ { CS35L63_MAIN_POSTURE_NUMBER, 0x00000000 },
+};
+
static bool cs35l56_is_dsp_memory(unsigned int reg)
{
switch (reg) {
@@ -153,6 +214,8 @@ static bool cs35l56_readable_reg(struct device *dev, unsigned int reg)
case CS35L56_DSP_VIRTUAL1_MBOX_6:
case CS35L56_DSP_VIRTUAL1_MBOX_7:
case CS35L56_DSP_VIRTUAL1_MBOX_8:
+ case CS35L56_DIE_STS1:
+ case CS35L56_DIE_STS2:
case CS35L56_DSP_RESTRICT_STS1:
case CS35L56_DSP1_SYS_INFO_ID ... CS35L56_DSP1_SYS_INFO_END:
case CS35L56_DSP1_AHBM_WINDOW_DEBUG_0:
@@ -179,7 +242,7 @@ static bool cs35l56_precious_reg(struct device *dev, unsigned int reg)
}
}
-static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg)
+static bool cs35l56_common_volatile_reg(unsigned int reg)
{
switch (reg) {
case CS35L56_DEVID:
@@ -217,12 +280,32 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg)
case CS35L56_DSP1_SCRATCH3:
case CS35L56_DSP1_SCRATCH4:
return true;
+ default:
+ return cs35l56_is_dsp_memory(reg);
+ }
+}
+
+static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
case CS35L56_MAIN_RENDER_USER_MUTE:
case CS35L56_MAIN_RENDER_USER_VOLUME:
case CS35L56_MAIN_POSTURE_NUMBER:
return false;
default:
- return cs35l56_is_dsp_memory(reg);
+ return cs35l56_common_volatile_reg(reg);
+ }
+}
+
+static bool cs35l63_volatile_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case CS35L63_MAIN_RENDER_USER_MUTE:
+ case CS35L63_MAIN_RENDER_USER_VOLUME:
+ case CS35L63_MAIN_POSTURE_NUMBER:
+ return false;
+ default:
+ return cs35l56_common_volatile_reg(reg);
}
}
@@ -253,7 +336,8 @@ int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base)
if (ret)
return ret;
- ret = regmap_read_poll_timeout(cs35l56_base->regmap, CS35L56_DSP1_PM_CUR_STATE,
+ ret = regmap_read_poll_timeout(cs35l56_base->regmap,
+ cs35l56_base->fw_reg->pm_cur_stat,
val, (val == CS35L56_HALO_STATE_SHUTDOWN),
CS35L56_HALO_STATE_POLL_US,
CS35L56_HALO_STATE_TIMEOUT_US);
@@ -278,7 +362,9 @@ int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base)
CS35L56_HALO_STATE_POLL_US,
CS35L56_HALO_STATE_TIMEOUT_US,
false,
- cs35l56_base->regmap, CS35L56_DSP1_HALO_STATE, &val);
+ cs35l56_base->regmap,
+ cs35l56_base->fw_reg->halo_state,
+ &val);
if (poll_ret) {
dev_err(cs35l56_base->dev, "Firmware boot timed out(%d): HALO_STATE=%#x\n",
@@ -382,6 +468,11 @@ static const struct reg_sequence cs35l56_system_reset_seq[] = {
REG_SEQ0(CS35L56_DSP_VIRTUAL1_MBOX_1, CS35L56_MBOX_CMD_SYSTEM_RESET),
};
+static const struct reg_sequence cs35l63_system_reset_seq[] = {
+ REG_SEQ0(CS35L63_DSP1_HALO_STATE, 0),
+ REG_SEQ0(CS35L56_DSP_VIRTUAL1_MBOX_1, CS35L56_MBOX_CMD_SYSTEM_RESET),
+};
+
void cs35l56_system_reset(struct cs35l56_base *cs35l56_base, bool is_soundwire)
{
/*
@@ -395,9 +486,22 @@ void cs35l56_system_reset(struct cs35l56_base *cs35l56_base, bool is_soundwire)
return;
}
- regmap_multi_reg_write_bypassed(cs35l56_base->regmap,
- cs35l56_system_reset_seq,
- ARRAY_SIZE(cs35l56_system_reset_seq));
+ switch (cs35l56_base->type) {
+ case 0x54:
+ case 0x56:
+ case 0x57:
+ regmap_multi_reg_write_bypassed(cs35l56_base->regmap,
+ cs35l56_system_reset_seq,
+ ARRAY_SIZE(cs35l56_system_reset_seq));
+ break;
+ case 0x63:
+ regmap_multi_reg_write_bypassed(cs35l56_base->regmap,
+ cs35l63_system_reset_seq,
+ ARRAY_SIZE(cs35l63_system_reset_seq));
+ break;
+ default:
+ break;
+ }
/* On SoundWire the registers won't be accessible until it re-enumerates. */
if (is_soundwire)
@@ -514,7 +618,9 @@ int cs35l56_is_fw_reload_needed(struct cs35l56_base *cs35l56_base)
return ret;
}
- ret = regmap_read(cs35l56_base->regmap, CS35L56_PROTECTION_STATUS, &val);
+ ret = regmap_read(cs35l56_base->regmap,
+ cs35l56_base->fw_reg->prot_sts,
+ &val);
if (ret)
dev_err(cs35l56_base->dev, "Failed to read PROTECTION_STATUS: %d\n", ret);
else
@@ -562,7 +668,7 @@ int cs35l56_runtime_suspend_common(struct cs35l56_base *cs35l56_base)
/* Firmware must have entered a power-save state */
ret = regmap_read_poll_timeout(cs35l56_base->regmap,
- CS35L56_TRANSDUCER_ACTUAL_PS,
+ cs35l56_base->fw_reg->transducer_actual_ps,
val, (val >= CS35L56_PS3),
CS35L56_PS3_POLL_US,
CS35L56_PS3_TIMEOUT_US);
@@ -698,13 +804,29 @@ static int cs35l56_read_silicon_uid(struct cs35l56_base *cs35l56_base, u64 *uid)
unique_id |= (u32)pte.x | ((u32)pte.y << 8) | ((u32)pte.wafer_id << 16) |
((u32)pte.dvs << 24);
- dev_dbg(cs35l56_base->dev, "UniqueID = %#llx\n", unique_id);
-
*uid = unique_id;
return 0;
}
+static int cs35l63_read_silicon_uid(struct cs35l56_base *cs35l56_base, u64 *uid)
+{
+ u32 tmp[2];
+ int ret;
+
+ ret = regmap_bulk_read(cs35l56_base->regmap, CS35L56_DIE_STS1, tmp, ARRAY_SIZE(tmp));
+ if (ret) {
+ dev_err(cs35l56_base->dev, "Cannot obtain CS35L56_DIE_STS: %d\n", ret);
+ return ret;
+ }
+
+ *uid = tmp[1];
+ *uid <<= 32;
+ *uid |= tmp[0];
+
+ return 0;
+}
+
/* Firmware calibration controls */
const struct cirrus_amp_cal_controls cs35l56_calibration_controls = {
.alg_id = 0x9f210,
@@ -725,10 +847,25 @@ int cs35l56_get_calibration(struct cs35l56_base *cs35l56_base)
if (cs35l56_base->secured)
return 0;
- ret = cs35l56_read_silicon_uid(cs35l56_base, &silicon_uid);
+ switch (cs35l56_base->type) {
+ case 0x54:
+ case 0x56:
+ case 0x57:
+ ret = cs35l56_read_silicon_uid(cs35l56_base, &silicon_uid);
+ break;
+ case 0x63:
+ ret = cs35l63_read_silicon_uid(cs35l56_base, &silicon_uid);
+ break;
+ default:
+ ret = -ENODEV;
+ break;
+ }
+
if (ret < 0)
return ret;
+ dev_dbg(cs35l56_base->dev, "UniqueID = %#llx\n", silicon_uid);
+
ret = cs_amp_get_efi_calibration_data(cs35l56_base->dev, silicon_uid,
cs35l56_base->cal_index,
&cs35l56_base->cal_data);
@@ -752,7 +889,8 @@ int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base,
unsigned int prot_status;
int ret;
- ret = regmap_read(cs35l56_base->regmap, CS35L56_PROTECTION_STATUS, &prot_status);
+ ret = regmap_read(cs35l56_base->regmap,
+ cs35l56_base->fw_reg->prot_sts, &prot_status);
if (ret) {
dev_err(cs35l56_base->dev, "Get PROTECTION_STATUS failed: %d\n", ret);
return ret;
@@ -760,7 +898,8 @@ int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base,
*fw_missing = !!(prot_status & CS35L56_FIRMWARE_MISSING);
- ret = regmap_read(cs35l56_base->regmap, CS35L56_DSP1_FW_VER, fw_version);
+ ret = regmap_read(cs35l56_base->regmap,
+ cs35l56_base->fw_reg->fw_ver, fw_version);
if (ret) {
dev_err(cs35l56_base->dev, "Get FW VER failed: %d\n", ret);
return ret;
@@ -770,6 +909,33 @@ int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base,
}
EXPORT_SYMBOL_NS_GPL(cs35l56_read_prot_status, "SND_SOC_CS35L56_SHARED");
+void cs35l56_log_tuning(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_dsp)
+{
+ __be32 pid, sid, tid;
+ int ret;
+
+ scoped_guard(mutex, &cs_dsp->pwr_lock) {
+ ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(cs_dsp, "AS_PRJCT_ID",
+ WMFW_ADSP2_XM, 0x9f212),
+ 0, &pid, sizeof(pid));
+ if (!ret)
+ ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(cs_dsp, "AS_CHNNL_ID",
+ WMFW_ADSP2_XM, 0x9f212),
+ 0, &sid, sizeof(sid));
+ if (!ret)
+ ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(cs_dsp, "AS_SNPSHT_ID",
+ WMFW_ADSP2_XM, 0x9f212),
+ 0, &tid, sizeof(tid));
+ }
+
+ if (ret)
+ dev_warn(cs35l56_base->dev, "Can't read tuning IDs");
+ else
+ dev_info(cs35l56_base->dev, "Tuning PID: %#x, SID: %#x, TID: %#x\n",
+ be32_to_cpu(pid), be32_to_cpu(sid), be32_to_cpu(tid));
+}
+EXPORT_SYMBOL_NS_GPL(cs35l56_log_tuning, "SND_SOC_CS35L56_SHARED");
+
int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
{
int ret;
@@ -809,6 +975,9 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
case 0x35A56:
case 0x35A57:
break;
+ case 0x35A630:
+ devid = devid >> 4;
+ break;
default:
dev_err(cs35l56_base->dev, "Unknown device %x\n", devid);
return ret;
@@ -1045,8 +1214,66 @@ const struct regmap_config cs35l56_regmap_sdw = {
};
EXPORT_SYMBOL_NS_GPL(cs35l56_regmap_sdw, "SND_SOC_CS35L56_SHARED");
+const struct regmap_config cs35l63_regmap_i2c = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .reg_base = 0x8000,
+ .reg_format_endian = REGMAP_ENDIAN_BIG,
+ .val_format_endian = REGMAP_ENDIAN_BIG,
+ .max_register = CS35L56_DSP1_PMEM_5114,
+ .reg_defaults = cs35l63_reg_defaults,
+ .num_reg_defaults = ARRAY_SIZE(cs35l63_reg_defaults),
+ .volatile_reg = cs35l63_volatile_reg,
+ .readable_reg = cs35l56_readable_reg,
+ .precious_reg = cs35l56_precious_reg,
+ .cache_type = REGCACHE_MAPLE,
+};
+EXPORT_SYMBOL_NS_GPL(cs35l63_regmap_i2c, "SND_SOC_CS35L56_SHARED");
+
+const struct regmap_config cs35l63_regmap_sdw = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .reg_format_endian = REGMAP_ENDIAN_LITTLE,
+ .val_format_endian = REGMAP_ENDIAN_BIG,
+ .max_register = CS35L56_DSP1_PMEM_5114,
+ .reg_defaults = cs35l63_reg_defaults,
+ .num_reg_defaults = ARRAY_SIZE(cs35l63_reg_defaults),
+ .volatile_reg = cs35l63_volatile_reg,
+ .readable_reg = cs35l56_readable_reg,
+ .precious_reg = cs35l56_precious_reg,
+ .cache_type = REGCACHE_MAPLE,
+};
+EXPORT_SYMBOL_NS_GPL(cs35l63_regmap_sdw, "SND_SOC_CS35L56_SHARED");
+
+const struct cs35l56_fw_reg cs35l56_fw_reg = {
+ .fw_ver = CS35L56_DSP1_FW_VER,
+ .halo_state = CS35L56_DSP1_HALO_STATE,
+ .pm_cur_stat = CS35L56_DSP1_PM_CUR_STATE,
+ .prot_sts = CS35L56_PROTECTION_STATUS,
+ .transducer_actual_ps = CS35L56_TRANSDUCER_ACTUAL_PS,
+ .user_mute = CS35L56_MAIN_RENDER_USER_MUTE,
+ .user_volume = CS35L56_MAIN_RENDER_USER_VOLUME,
+ .posture_number = CS35L56_MAIN_POSTURE_NUMBER,
+};
+EXPORT_SYMBOL_NS_GPL(cs35l56_fw_reg, "SND_SOC_CS35L56_SHARED");
+
+const struct cs35l56_fw_reg cs35l63_fw_reg = {
+ .fw_ver = CS35L63_DSP1_FW_VER,
+ .halo_state = CS35L63_DSP1_HALO_STATE,
+ .pm_cur_stat = CS35L63_DSP1_PM_CUR_STATE,
+ .prot_sts = CS35L63_PROTECTION_STATUS,
+ .transducer_actual_ps = CS35L63_TRANSDUCER_ACTUAL_PS,
+ .user_mute = CS35L63_MAIN_RENDER_USER_MUTE,
+ .user_volume = CS35L63_MAIN_RENDER_USER_VOLUME,
+ .posture_number = CS35L63_MAIN_POSTURE_NUMBER,
+};
+EXPORT_SYMBOL_NS_GPL(cs35l63_fw_reg, "SND_SOC_CS35L56_SHARED");
+
MODULE_DESCRIPTION("ASoC CS35L56 Shared");
MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
MODULE_AUTHOR("Simon Trimmer <simont@opensource.cirrus.com>");
MODULE_LICENSE("GPL");
MODULE_IMPORT_NS("SND_SOC_CS_AMP_LIB");
+MODULE_IMPORT_NS("FW_CS_DSP");
diff --git a/sound/soc/codecs/cs35l56-spi.c b/sound/soc/codecs/cs35l56-spi.c
index ca6c03a8766d..c2ddee22cd23 100644
--- a/sound/soc/codecs/cs35l56-spi.c
+++ b/sound/soc/codecs/cs35l56-spi.c
@@ -25,6 +25,9 @@ static int cs35l56_spi_probe(struct spi_device *spi)
return -ENOMEM;
spi_set_drvdata(spi, cs35l56);
+
+ cs35l56->base.fw_reg = &cs35l56_fw_reg;
+
cs35l56->base.regmap = devm_regmap_init_spi(spi, regmap_config);
if (IS_ERR(cs35l56->base.regmap)) {
ret = PTR_ERR(cs35l56->base.regmap);
diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c
index b3158a84b87a..c78e4746e428 100644
--- a/sound/soc/codecs/cs35l56.c
+++ b/sound/soc/codecs/cs35l56.c
@@ -84,6 +84,25 @@ static const struct snd_kcontrol_new cs35l56_controls[] = {
cs35l56_dspwait_get_volsw, cs35l56_dspwait_put_volsw),
};
+static const struct snd_kcontrol_new cs35l63_controls[] = {
+ SOC_SINGLE_EXT("Speaker Switch",
+ CS35L63_MAIN_RENDER_USER_MUTE, 0, 1, 1,
+ cs35l56_dspwait_get_volsw, cs35l56_dspwait_put_volsw),
+ SOC_SINGLE_S_EXT_TLV("Speaker Volume",
+ CS35L63_MAIN_RENDER_USER_VOLUME,
+ CS35L56_MAIN_RENDER_USER_VOLUME_SHIFT,
+ CS35L56_MAIN_RENDER_USER_VOLUME_MIN,
+ CS35L56_MAIN_RENDER_USER_VOLUME_MAX,
+ CS35L56_MAIN_RENDER_USER_VOLUME_SIGNBIT,
+ 0,
+ cs35l56_dspwait_get_volsw,
+ cs35l56_dspwait_put_volsw,
+ vol_tlv),
+ SOC_SINGLE_EXT("Posture Number", CS35L63_MAIN_POSTURE_NUMBER,
+ 0, 255, 0,
+ cs35l56_dspwait_get_volsw, cs35l56_dspwait_put_volsw),
+};
+
static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx1_enum,
CS35L56_ASP1TX1_INPUT,
0, CS35L56_ASP_TXn_SRC_MASK,
@@ -174,7 +193,7 @@ static int cs35l56_play_event(struct snd_soc_dapm_widget *w,
case SND_SOC_DAPM_POST_PMU:
/* Wait for firmware to enter PS0 power state */
ret = regmap_read_poll_timeout(cs35l56->base.regmap,
- CS35L56_TRANSDUCER_ACTUAL_PS,
+ cs35l56->base.fw_reg->transducer_actual_ps,
val, (val == CS35L56_PS0),
CS35L56_PS0_POLL_US,
CS35L56_PS0_TIMEOUT_US);
@@ -760,7 +779,8 @@ static void cs35l56_patch(struct cs35l56_private *cs35l56, bool firmware_missing
goto err_unlock;
}
- regmap_clear_bits(cs35l56->base.regmap, CS35L56_PROTECTION_STATUS,
+ regmap_clear_bits(cs35l56->base.regmap,
+ cs35l56->base.fw_reg->prot_sts,
CS35L56_FIRMWARE_MISSING);
cs35l56->base.fw_patched = true;
@@ -827,6 +847,7 @@ static void cs35l56_dsp_work(struct work_struct *work)
else
cs35l56_patch(cs35l56, firmware_missing);
+ cs35l56_log_tuning(&cs35l56->base, &cs35l56->dsp.cs_dsp);
err:
pm_runtime_mark_last_busy(cs35l56->base.dev);
pm_runtime_put_autosuspend(cs35l56->base.dev);
@@ -837,6 +858,7 @@ static int cs35l56_component_probe(struct snd_soc_component *component)
struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component);
struct dentry *debugfs_root = component->debugfs_root;
unsigned short vendor, device;
+ int ret;
BUILD_BUG_ON(ARRAY_SIZE(cs35l56_tx_input_texts) != ARRAY_SIZE(cs35l56_tx_input_values));
@@ -876,6 +898,26 @@ static int cs35l56_component_probe(struct snd_soc_component *component)
debugfs_create_bool("can_hibernate", 0444, debugfs_root, &cs35l56->base.can_hibernate);
debugfs_create_bool("fw_patched", 0444, debugfs_root, &cs35l56->base.fw_patched);
+
+ switch (cs35l56->base.type) {
+ case 0x54:
+ case 0x56:
+ case 0x57:
+ ret = snd_soc_add_component_controls(component, cs35l56_controls,
+ ARRAY_SIZE(cs35l56_controls));
+ break;
+ case 0x63:
+ ret = snd_soc_add_component_controls(component, cs35l63_controls,
+ ARRAY_SIZE(cs35l63_controls));
+ break;
+ default:
+ ret = -ENODEV;
+ break;
+ }
+
+ if (ret)
+ return dev_err_probe(cs35l56->base.dev, ret, "unable to add controls\n");
+
queue_work(cs35l56->dsp_wq, &cs35l56->dsp_work);
return 0;
@@ -931,8 +973,6 @@ static const struct snd_soc_component_driver soc_component_dev_cs35l56 = {
.num_dapm_widgets = ARRAY_SIZE(cs35l56_dapm_widgets),
.dapm_routes = cs35l56_audio_map,
.num_dapm_routes = ARRAY_SIZE(cs35l56_audio_map),
- .controls = cs35l56_controls,
- .num_controls = ARRAY_SIZE(cs35l56_controls),
.set_bias_level = cs35l56_set_bias_level,
diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h
index 8a987ec01507..200f695efca3 100644
--- a/sound/soc/codecs/cs35l56.h
+++ b/sound/soc/codecs/cs35l56.h
@@ -51,6 +51,7 @@ struct cs35l56_private {
u8 asp_slot_count;
bool tdm_mode;
bool sysclk_set;
+ u8 old_sdw_clock_scale;
};
extern const struct dev_pm_ops cs35l56_pm_ops_i2c_spi;
diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c
index cd3f92c19045..a9ffba62aaf8 100644
--- a/sound/soc/codecs/cs42l52.c
+++ b/sound/soc/codecs/cs42l52.c
@@ -8,27 +8,26 @@
* Author: Brian Austin <brian.austin@cirrus.com>
*/
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
#include <linux/delay.h>
-#include <linux/of_gpio.h>
-#include <linux/pm.h>
+#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
+#include <linux/init.h>
#include <linux/input.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/platform_device.h>
#include <sound/core.h>
+#include <sound/initval.h>
#include <sound/pcm.h>
#include <sound/pcm_params.h>
#include <sound/soc.h>
#include <sound/soc-dapm.h>
-#include <sound/initval.h>
#include <sound/tlv.h>
-#include <sound/cs42l52.h>
#include "cs42l52.h"
struct sp_config {
@@ -36,6 +35,24 @@ struct sp_config {
u32 srate;
};
+struct cs42l52_platform_data {
+
+ /* MICBIAS Level. Check datasheet Pg48 */
+ unsigned int micbias_lvl;
+
+ /* MICA mode selection Differential or Single-ended */
+ bool mica_diff_cfg;
+
+ /* MICB mode selection Differential or Single-ended */
+ bool micb_diff_cfg;
+
+ /* Charge Pump Freq. Check datasheet Pg73 */
+ unsigned int chgfreq;
+
+ /* Reset GPIO */
+ struct gpio_desc *reset_gpio;
+};
+
struct cs42l52_private {
struct regmap *regmap;
struct snd_soc_component *component;
@@ -1090,7 +1107,7 @@ static const struct regmap_config cs42l52_regmap = {
static int cs42l52_i2c_probe(struct i2c_client *i2c_client)
{
struct cs42l52_private *cs42l52;
- struct cs42l52_platform_data *pdata = dev_get_platdata(&i2c_client->dev);
+ struct cs42l52_platform_data *pdata;
int ret;
unsigned int devid;
unsigned int reg;
@@ -1107,50 +1124,43 @@ static int cs42l52_i2c_probe(struct i2c_client *i2c_client)
dev_err(&i2c_client->dev, "regmap_init() failed: %d\n", ret);
return ret;
}
- if (pdata) {
- cs42l52->pdata = *pdata;
- } else {
- pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata),
- GFP_KERNEL);
- if (!pdata)
- return -ENOMEM;
-
- if (i2c_client->dev.of_node) {
- if (of_property_read_bool(i2c_client->dev.of_node,
- "cirrus,mica-differential-cfg"))
- pdata->mica_diff_cfg = true;
-
- if (of_property_read_bool(i2c_client->dev.of_node,
- "cirrus,micb-differential-cfg"))
- pdata->micb_diff_cfg = true;
-
- if (of_property_read_u32(i2c_client->dev.of_node,
- "cirrus,micbias-lvl", &val32) >= 0)
- pdata->micbias_lvl = val32;
-
- if (of_property_read_u32(i2c_client->dev.of_node,
- "cirrus,chgfreq-divisor", &val32) >= 0)
- pdata->chgfreq = val32;
-
- pdata->reset_gpio =
- of_get_named_gpio(i2c_client->dev.of_node,
- "cirrus,reset-gpio", 0);
- }
- cs42l52->pdata = *pdata;
+
+ pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+ if (i2c_client->dev.of_node) {
+ if (of_property_read_bool(i2c_client->dev.of_node,
+ "cirrus,mica-differential-cfg"))
+ pdata->mica_diff_cfg = true;
+
+ if (of_property_read_bool(i2c_client->dev.of_node,
+ "cirrus,micb-differential-cfg"))
+ pdata->micb_diff_cfg = true;
+
+ if (of_property_read_u32(i2c_client->dev.of_node,
+ "cirrus,micbias-lvl", &val32) >= 0)
+ pdata->micbias_lvl = val32;
+
+ if (of_property_read_u32(i2c_client->dev.of_node,
+ "cirrus,chgfreq-divisor", &val32) >= 0)
+ pdata->chgfreq = val32;
+
+ pdata->reset_gpio = devm_gpiod_get_optional(&i2c_client->dev,
+ "cirrus,reset",
+ GPIOD_OUT_LOW);
+
+ if (IS_ERR(pdata->reset_gpio))
+ return PTR_ERR(pdata->reset_gpio);
+
+ gpiod_set_consumer_name(pdata->reset_gpio, "CS42L52 /RST");
}
+ cs42l52->pdata = *pdata;
+
if (cs42l52->pdata.reset_gpio) {
- ret = devm_gpio_request_one(&i2c_client->dev,
- cs42l52->pdata.reset_gpio,
- GPIOF_OUT_INIT_HIGH,
- "CS42L52 /RST");
- if (ret < 0) {
- dev_err(&i2c_client->dev, "Failed to request /RST %d: %d\n",
- cs42l52->pdata.reset_gpio, ret);
- return ret;
- }
- gpio_set_value_cansleep(cs42l52->pdata.reset_gpio, 0);
- gpio_set_value_cansleep(cs42l52->pdata.reset_gpio, 1);
+ gpiod_set_value_cansleep(cs42l52->pdata.reset_gpio, 1);
+ gpiod_set_value_cansleep(cs42l52->pdata.reset_gpio, 0);
}
i2c_set_clientdata(i2c_client, cs42l52);
diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c
index aaf90c8b7339..98fa812bc07b 100644
--- a/sound/soc/codecs/cs42l56.c
+++ b/sound/soc/codecs/cs42l56.c
@@ -7,32 +7,64 @@
* Author: Brian Austin <brian.austin@cirrus.com>
*/
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
#include <linux/delay.h>
-#include <linux/pm.h>
+#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
+#include <linux/init.h>
#include <linux/input.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/platform_device.h>
-#include <linux/regulator/consumer.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
#include <sound/core.h>
+#include <sound/initval.h>
#include <sound/pcm.h>
#include <sound/pcm_params.h>
#include <sound/soc.h>
#include <sound/soc-dapm.h>
-#include <sound/initval.h>
#include <sound/tlv.h>
-#include <sound/cs42l56.h>
#include "cs42l56.h"
#define CS42L56_NUM_SUPPLIES 3
+
+struct cs42l56_platform_data {
+ /* GPIO for Reset */
+ struct gpio_desc *gpio_nreset;
+
+ /* MICBIAS Level. Check datasheet Pg48 */
+ unsigned int micbias_lvl;
+
+ /* Analog Input 1A Reference 0=Single 1=Pseudo-Differential */
+ unsigned int ain1a_ref_cfg;
+
+ /* Analog Input 2A Reference 0=Single 1=Pseudo-Differential */
+ unsigned int ain2a_ref_cfg;
+
+ /* Analog Input 1B Reference 0=Single 1=Pseudo-Differential */
+ unsigned int ain1b_ref_cfg;
+
+ /* Analog Input 2B Reference 0=Single 1=Pseudo-Differential */
+ unsigned int ain2b_ref_cfg;
+
+ /* Charge Pump Freq. Check datasheet Pg62 */
+ unsigned int chgfreq;
+
+ /* HighPass Filter Right Channel Corner Frequency */
+ unsigned int hpfb_freq;
+
+ /* HighPass Filter Left Channel Corner Frequency */
+ unsigned int hpfa_freq;
+
+ /* Adaptive Power Control for LO/HP */
+ unsigned int adaptive_pwr;
+};
+
static const char *const cs42l56_supply_names[CS42L56_NUM_SUPPLIES] = {
"VA",
"VCP",
@@ -1161,7 +1193,13 @@ static int cs42l56_handle_of_data(struct i2c_client *i2c_client,
if (of_property_read_u32(np, "cirrus,hpf-left-freq", &val32) >= 0)
pdata->hpfb_freq = val32;
- pdata->gpio_nreset = of_get_named_gpio(np, "cirrus,gpio-nreset", 0);
+ pdata->gpio_nreset = devm_gpiod_get_optional(&i2c_client->dev, "cirrus,gpio-nreset",
+ GPIOD_OUT_LOW);
+
+ if (IS_ERR(pdata->gpio_nreset))
+ return PTR_ERR(pdata->gpio_nreset);
+
+ gpiod_set_consumer_name(pdata->gpio_nreset, "CS42L56 /RST");
return 0;
}
@@ -1169,8 +1207,6 @@ static int cs42l56_handle_of_data(struct i2c_client *i2c_client,
static int cs42l56_i2c_probe(struct i2c_client *i2c_client)
{
struct cs42l56_private *cs42l56;
- struct cs42l56_platform_data *pdata =
- dev_get_platdata(&i2c_client->dev);
int ret, i;
unsigned int devid;
unsigned int alpha_rev, metal_rev;
@@ -1188,31 +1224,17 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client)
return ret;
}
- if (pdata) {
- cs42l56->pdata = *pdata;
- } else {
- if (i2c_client->dev.of_node) {
- ret = cs42l56_handle_of_data(i2c_client,
- &cs42l56->pdata);
- if (ret != 0)
- return ret;
- }
+ if (i2c_client->dev.of_node) {
+ ret = cs42l56_handle_of_data(i2c_client, &cs42l56->pdata);
+ if (ret != 0)
+ return ret;
}
if (cs42l56->pdata.gpio_nreset) {
- ret = gpio_request_one(cs42l56->pdata.gpio_nreset,
- GPIOF_OUT_INIT_HIGH, "CS42L56 /RST");
- if (ret < 0) {
- dev_err(&i2c_client->dev,
- "Failed to request /RST %d: %d\n",
- cs42l56->pdata.gpio_nreset, ret);
- return ret;
- }
- gpio_set_value_cansleep(cs42l56->pdata.gpio_nreset, 0);
- gpio_set_value_cansleep(cs42l56->pdata.gpio_nreset, 1);
+ gpiod_set_value_cansleep(cs42l56->pdata.gpio_nreset, 1);
+ gpiod_set_value_cansleep(cs42l56->pdata.gpio_nreset, 0);
}
-
i2c_set_clientdata(i2c_client, cs42l56);
for (i = 0; i < ARRAY_SIZE(cs42l56->supplies); i++)
diff --git a/sound/soc/codecs/cs42l73.c b/sound/soc/codecs/cs42l73.c
index ddf36001100e..535a867f9f2a 100644
--- a/sound/soc/codecs/cs42l73.c
+++ b/sound/soc/codecs/cs42l73.c
@@ -8,26 +8,33 @@
* Brian Austin, Cirrus Logic Inc, <brian.austin@cirrus.com>
*/
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/of_gpio.h>
#include <linux/pm.h>
-#include <linux/i2c.h>
#include <linux/regmap.h>
#include <linux/slab.h>
#include <sound/core.h>
+#include <sound/initval.h>
#include <sound/pcm.h>
#include <sound/pcm_params.h>
#include <sound/soc.h>
#include <sound/soc-dapm.h>
-#include <sound/initval.h>
#include <sound/tlv.h>
-#include <sound/cs42l73.h>
-#include "cs42l73.h"
#include "cirrus_legacy.h"
+#include "cs42l73.h"
+
+struct cs42l73_platform_data {
+ /* RST GPIO */
+ struct gpio_desc *reset_gpio;
+ unsigned int chgfreq;
+ int jack_detection;
+ unsigned int mclk_freq;
+};
struct sp_config {
u8 spc, mmcc, spfs;
@@ -1276,7 +1283,7 @@ static const struct regmap_config cs42l73_regmap = {
static int cs42l73_i2c_probe(struct i2c_client *i2c_client)
{
struct cs42l73_private *cs42l73;
- struct cs42l73_platform_data *pdata = dev_get_platdata(&i2c_client->dev);
+ struct cs42l73_platform_data *pdata;
int ret, devid;
unsigned int reg;
u32 val32;
@@ -1292,38 +1299,27 @@ static int cs42l73_i2c_probe(struct i2c_client *i2c_client)
return ret;
}
- if (pdata) {
- cs42l73->pdata = *pdata;
- } else {
- pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata),
- GFP_KERNEL);
- if (!pdata)
- return -ENOMEM;
-
- if (i2c_client->dev.of_node) {
- if (of_property_read_u32(i2c_client->dev.of_node,
- "chgfreq", &val32) >= 0)
- pdata->chgfreq = val32;
- }
- pdata->reset_gpio = of_get_named_gpio(i2c_client->dev.of_node,
- "reset-gpio", 0);
- cs42l73->pdata = *pdata;
+ pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+ if (i2c_client->dev.of_node) {
+ if (of_property_read_u32(i2c_client->dev.of_node, "chgfreq", &val32) >= 0)
+ pdata->chgfreq = val32;
}
+ pdata->reset_gpio = devm_gpiod_get_optional(&i2c_client->dev, "reset", GPIOD_OUT_LOW);
+
+ if (IS_ERR(pdata->reset_gpio))
+ return PTR_ERR(pdata->reset_gpio);
+
+ gpiod_set_consumer_name(pdata->reset_gpio, "CS42L73 /RST");
+ cs42l73->pdata = *pdata;
i2c_set_clientdata(i2c_client, cs42l73);
if (cs42l73->pdata.reset_gpio) {
- ret = devm_gpio_request_one(&i2c_client->dev,
- cs42l73->pdata.reset_gpio,
- GPIOF_OUT_INIT_HIGH,
- "CS42L73 /RST");
- if (ret < 0) {
- dev_err(&i2c_client->dev, "Failed to request /RST %d: %d\n",
- cs42l73->pdata.reset_gpio, ret);
- return ret;
- }
- gpio_set_value_cansleep(cs42l73->pdata.reset_gpio, 0);
- gpio_set_value_cansleep(cs42l73->pdata.reset_gpio, 1);
+ gpiod_set_value_cansleep(cs42l73->pdata.reset_gpio, 1);
+ gpiod_set_value_cansleep(cs42l73->pdata.reset_gpio, 0);
}
/* initialize codec */
@@ -1360,7 +1356,7 @@ static int cs42l73_i2c_probe(struct i2c_client *i2c_client)
return 0;
err_reset:
- gpio_set_value_cansleep(cs42l73->pdata.reset_gpio, 0);
+ gpiod_set_value_cansleep(cs42l73->pdata.reset_gpio, 1);
return ret;
}
diff --git a/sound/soc/codecs/cs48l32-tables.c b/sound/soc/codecs/cs48l32-tables.c
new file mode 100644
index 000000000000..59eaa9a5029f
--- /dev/null
+++ b/sound/soc/codecs/cs48l32-tables.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Regmap tables and other data for Cirrus Logic CS48L32 audio DSP.
+//
+// Copyright (C) 2018, 2020, 2022, 2025 Cirrus Logic, Inc. and
+// Cirrus Logic International Semiconductor Ltd.
+
+#include <linux/array_size.h>
+#include <linux/build_bug.h>
+#include <linux/device.h>
+#include <linux/linear_range.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <sound/cs48l32.h>
+#include <sound/cs48l32_registers.h>
+
+#include "cs48l32.h"
+
+static const struct reg_sequence cs48l32_reva_patch[] = {
+ { 0x00001044, 0x0005000f },
+ { 0x00001c34, 0x000037e8 },
+ { 0x000046d8, 0x00000fe0 },
+};
+
+int cs48l32_apply_patch(struct cs48l32 *cs48l32)
+{
+ int ret;
+
+ ret = regmap_register_patch(cs48l32->regmap, cs48l32_reva_patch,
+ ARRAY_SIZE(cs48l32_reva_patch));
+ if (ret < 0)
+ return dev_err_probe(cs48l32->dev, ret, "Failed to apply patch\n");
+
+ return 0;
+}
+
+static const struct reg_default cs48l32_reg_default[] = {
+ { 0x00000c08, 0xe1000001 }, /* GPIO1_CTRL1 */
+ { 0x00000c0c, 0xe1000001 }, /* GPIO2_CTRL1 */
+ { 0x00000c10, 0xe1000001 }, /* GPIO3_CTRL1 */
+ { 0x00000c14, 0xe1000001 }, /* GPIO4_CTRL1 */
+ { 0x00000c18, 0xe1000001 }, /* GPIO5_CTRL1 */
+ { 0x00000c1c, 0xe1000001 }, /* GPIO6_CTRL1 */
+ { 0x00000c20, 0xe1000001 }, /* GPIO7_CTRL1 */
+ { 0x00000c24, 0xe1000001 }, /* GPIO8_CTRL1 */
+ { 0x00000c28, 0xe1000001 }, /* GPIO9_CTRL1 */
+ { 0x00000c2c, 0xe1000001 }, /* GPIO10_CTRL1 */
+ { 0x00000c30, 0xe1000001 }, /* GPIO11_CTRL1 */
+ { 0x00000c34, 0xe1000001 }, /* GPIO12_CTRL1 */
+ { 0x00000c38, 0xe1000001 }, /* GPIO13_CTRL1 */
+ { 0x00000c3c, 0xe1000001 }, /* GPIO14_CTRL1 */
+ { 0x00000c40, 0xe1000001 }, /* GPIO15_CTRL1 */
+ { 0x00000c44, 0xe1000001 }, /* GPIO16_CTRL1 */
+ { 0x00001020, 0x00000000 }, /* OUTPUT_SYS_CLK */
+ { 0x00001044, 0x0005000f }, /* AUXPDM_CTRL */
+ { 0x0000105c, 0x00000000 }, /* AUXPDM_CTRL2 */
+ { 0x00001400, 0x00000002 }, /* CLOCK32K */
+ { 0x00001404, 0x00000404 }, /* SYSTEM_CLOCK1 */
+ { 0x00001420, 0x00000003 }, /* SAMPLE_RATE1 */
+ { 0x00001424, 0x00000003 }, /* SAMPLE_RATE2 */
+ { 0x00001428, 0x00000003 }, /* SAMPLE_RATE3 */
+ { 0x0000142c, 0x00000003 }, /* SAMPLE_RATE4 */
+ { 0x00001c00, 0x00000002 }, /* FLL1_CONTROL1 */
+ { 0x00001c04, 0x88203004 }, /* FLL1_CONTROL2 */
+ { 0x00001c08, 0x00000000 }, /* FLL1_CONTROL3 */
+ { 0x00001c0c, 0x21f05001 }, /* FLL1_CONTROL4 */
+ { 0x00001ca0, 0x00000c04 }, /* FLL1_GPIO_CLOCK */
+ { 0x00002000, 0x00000006 }, /* CHARGE_PUMP1 */
+ { 0x00002408, 0x000003e4 }, /* LDO2_CTRL1 */
+ { 0x00002410, 0x000000e6 }, /* MICBIAS_CTRL1 */
+ { 0x00002418, 0x00000222 }, /* MICBIAS_CTRL5 */
+ { 0x00002710, 0x00004600 }, /* IRQ1_CTRL_AOD */
+ { 0x00004000, 0x00000000 }, /* INPUT_CONTROL */
+ { 0x00004008, 0x00000400 }, /* INPUT_RATE_CONTROL */
+ { 0x0000400c, 0x00000000 }, /* INPUT_CONTROL2 */
+ { 0x00004020, 0x00050020 }, /* INPUT1_CONTROL1 */
+ { 0x00004024, 0x00000000 }, /* IN1L_CONTROL1 */
+ { 0x00004028, 0x10800080 }, /* IN1L_CONTROL2 */
+ { 0x00004044, 0x00000000 }, /* IN1R_CONTROL1 */
+ { 0x00004048, 0x10800080 }, /* IN1R_CONTROL2 */
+ { 0x00004060, 0x00050020 }, /* INPUT2_CONTROL1 */
+ { 0x00004064, 0x00000000 }, /* IN2L_CONTROL1 */
+ { 0x00004068, 0x10800000 }, /* IN2L_CONTROL2 */
+ { 0x00004084, 0x00000000 }, /* IN2R_CONTROL1 */
+ { 0x00004088, 0x10800000 }, /* IN2R_CONTROL2 */
+ { 0x00004244, 0x00000002 }, /* INPUT_HPF_CONTROL */
+ { 0x00004248, 0x00000022 }, /* INPUT_VOL_CONTROL */
+ { 0x00004300, 0x00000000 }, /* AUXPDM_CONTROL1 */
+ { 0x00004304, 0x00000000 }, /* AUXPDM_CONTROL2 */
+ { 0x00004308, 0x00010008 }, /* AUXPDM1_CONTROL1 */
+ { 0x00004310, 0x00010008 }, /* AUXPDM2_CONTROL1 */
+ { 0x00004688, 0x00000000 }, /* ADC1L_ANA_CONTROL1 */
+ { 0x0000468c, 0x00000000 }, /* ADC1R_ANA_CONTROL1 */
+ { 0x00006000, 0x00000000 }, /* ASP1_ENABLES1 */
+ { 0x00006004, 0x00000028 }, /* ASP1_CONTROL1 */
+ { 0x00006008, 0x18180200 }, /* ASP1_CONTROL2 */
+ { 0x0000600c, 0x00000002 }, /* ASP1_CONTROL3 */
+ { 0x00006010, 0x03020100 }, /* ASP1_FRAME_CONTROL1 */
+ { 0x00006014, 0x07060504 }, /* ASP1_FRAME_CONTROL2 */
+ { 0x00006020, 0x03020100 }, /* ASP1_FRAME_CONTROL5 */
+ { 0x00006024, 0x07060504 }, /* ASP1_FRAME_CONTROL6 */
+ { 0x00006030, 0x00000020 }, /* ASP1_DATA_CONTROL1 */
+ { 0x00006040, 0x00000020 }, /* ASP1_DATA_CONTROL5 */
+ { 0x00006080, 0x00000000 }, /* ASP2_ENABLES1 */
+ { 0x00006084, 0x00000028 }, /* ASP2_CONTROL1 */
+ { 0x00006088, 0x18180200 }, /* ASP2_CONTROL2 */
+ { 0x0000608c, 0x00000002 }, /* ASP2_CONTROL3 */
+ { 0x00006090, 0x03020100 }, /* ASP2_FRAME_CONTROL1 */
+ { 0x000060a0, 0x03020100 }, /* ASP2_FRAME_CONTROL5 */
+ { 0x000060b0, 0x00000020 }, /* ASP2_DATA_CONTROL1 */
+ { 0x000060c0, 0x00000020 }, /* ASP2_DATA_CONTROL5 */
+ { 0x00008200, 0x00800000 }, /* ASP1TX1_INPUT1 */
+ { 0x00008204, 0x00800000 }, /* ASP1TX1_INPUT2 */
+ { 0x00008208, 0x00800000 }, /* ASP1TX1_INPUT3 */
+ { 0x0000820c, 0x00800000 }, /* ASP1TX1_INPUT4 */
+ { 0x00008210, 0x00800000 }, /* ASP1TX2_INPUT1 */
+ { 0x00008214, 0x00800000 }, /* ASP1TX2_INPUT2 */
+ { 0x00008218, 0x00800000 }, /* ASP1TX2_INPUT3 */
+ { 0x0000821c, 0x00800000 }, /* ASP1TX2_INPUT4 */
+ { 0x00008220, 0x00800000 }, /* ASP1TX3_INPUT1 */
+ { 0x00008224, 0x00800000 }, /* ASP1TX3_INPUT2 */
+ { 0x00008228, 0x00800000 }, /* ASP1TX3_INPUT3 */
+ { 0x0000822c, 0x00800000 }, /* ASP1TX3_INPUT4 */
+ { 0x00008230, 0x00800000 }, /* ASP1TX4_INPUT1 */
+ { 0x00008234, 0x00800000 }, /* ASP1TX4_INPUT2 */
+ { 0x00008238, 0x00800000 }, /* ASP1TX4_INPUT3 */
+ { 0x0000823c, 0x00800000 }, /* ASP1TX4_INPUT4 */
+ { 0x00008240, 0x00800000 }, /* ASP1TX5_INPUT1 */
+ { 0x00008244, 0x00800000 }, /* ASP1TX5_INPUT2 */
+ { 0x00008248, 0x00800000 }, /* ASP1TX5_INPUT3 */
+ { 0x0000824c, 0x00800000 }, /* ASP1TX5_INPUT4 */
+ { 0x00008250, 0x00800000 }, /* ASP1TX6_INPUT1 */
+ { 0x00008254, 0x00800000 }, /* ASP1TX6_INPUT2 */
+ { 0x00008258, 0x00800000 }, /* ASP1TX6_INPUT3 */
+ { 0x0000825c, 0x00800000 }, /* ASP1TX6_INPUT4 */
+ { 0x00008260, 0x00800000 }, /* ASP1TX7_INPUT1 */
+ { 0x00008264, 0x00800000 }, /* ASP1TX7_INPUT2 */
+ { 0x00008268, 0x00800000 }, /* ASP1TX7_INPUT3 */
+ { 0x0000826c, 0x00800000 }, /* ASP1TX7_INPUT4 */
+ { 0x00008270, 0x00800000 }, /* ASP1TX8_INPUT1 */
+ { 0x00008274, 0x00800000 }, /* ASP1TX8_INPUT2 */
+ { 0x00008278, 0x00800000 }, /* ASP1TX8_INPUT3 */
+ { 0x0000827c, 0x00800000 }, /* ASP1TX8_INPUT4 */
+ { 0x00008300, 0x00800000 }, /* ASP2TX1_INPUT1 */
+ { 0x00008304, 0x00800000 }, /* ASP2TX1_INPUT2 */
+ { 0x00008308, 0x00800000 }, /* ASP2TX1_INPUT3 */
+ { 0x0000830c, 0x00800000 }, /* ASP2TX1_INPUT4 */
+ { 0x00008310, 0x00800000 }, /* ASP2TX2_INPUT1 */
+ { 0x00008314, 0x00800000 }, /* ASP2TX2_INPUT2 */
+ { 0x00008318, 0x00800000 }, /* ASP2TX2_INPUT3 */
+ { 0x0000831c, 0x00800000 }, /* ASP2TX2_INPUT4 */
+ { 0x00008320, 0x00800000 }, /* ASP2TX3_INPUT1 */
+ { 0x00008324, 0x00800000 }, /* ASP2TX3_INPUT2 */
+ { 0x00008328, 0x00800000 }, /* ASP2TX3_INPUT3 */
+ { 0x0000832c, 0x00800000 }, /* ASP2TX3_INPUT4 */
+ { 0x00008330, 0x00800000 }, /* ASP2TX4_INPUT1 */
+ { 0x00008334, 0x00800000 }, /* ASP2TX4_INPUT2 */
+ { 0x00008338, 0x00800000 }, /* ASP2TX4_INPUT3 */
+ { 0x0000833c, 0x00800000 }, /* ASP2TX4_INPUT4 */
+ { 0x00008980, 0x00000000 }, /* ISRC1INT1_INPUT1 */
+ { 0x00008990, 0x00000000 }, /* ISRC1INT2_INPUT1 */
+ { 0x000089a0, 0x00000000 }, /* ISRC1INT3_INPUT1 */
+ { 0x000089b0, 0x00000000 }, /* ISRC1INT4_INPUT1 */
+ { 0x000089c0, 0x00000000 }, /* ISRC1DEC1_INPUT1 */
+ { 0x000089d0, 0x00000000 }, /* ISRC1DEC2_INPUT1 */
+ { 0x000089e0, 0x00000000 }, /* ISRC1DEC3_INPUT1 */
+ { 0x000089f0, 0x00000000 }, /* ISRC1DEC4_INPUT1 */
+ { 0x00008a00, 0x00000000 }, /* ISRC2INT1_INPUT1 */
+ { 0x00008a10, 0x00000000 }, /* ISRC2INT2_INPUT1 */
+ { 0x00008a40, 0x00000000 }, /* ISRC2DEC1_INPUT1 */
+ { 0x00008a50, 0x00000000 }, /* ISRC2DEC2_INPUT1 */
+ { 0x00008a80, 0x00000000 }, /* ISRC3INT1_INPUT1 */
+ { 0x00008a90, 0x00000000 }, /* ISRC3INT2_INPUT1 */
+ { 0x00008ac0, 0x00000000 }, /* ISRC3DEC1_INPUT1 */
+ { 0x00008ad0, 0x00000000 }, /* ISRC3DEC2_INPUT1 */
+ { 0x00008b80, 0x00800000 }, /* EQ1_INPUT1 */
+ { 0x00008b84, 0x00800000 }, /* EQ1_INPUT2 */
+ { 0x00008b88, 0x00800000 }, /* EQ1_INPUT3 */
+ { 0x00008b8c, 0x00800000 }, /* EQ1_INPUT4 */
+ { 0x00008b90, 0x00800000 }, /* EQ2_INPUT1 */
+ { 0x00008b94, 0x00800000 }, /* EQ2_INPUT2 */
+ { 0x00008b98, 0x00800000 }, /* EQ2_INPUT3 */
+ { 0x00008b9c, 0x00800000 }, /* EQ2_INPUT4 */
+ { 0x00008ba0, 0x00800000 }, /* EQ3_INPUT1 */
+ { 0x00008ba4, 0x00800000 }, /* EQ3_INPUT2 */
+ { 0x00008ba8, 0x00800000 }, /* EQ3_INPUT3 */
+ { 0x00008bac, 0x00800000 }, /* EQ3_INPUT4 */
+ { 0x00008bb0, 0x00800000 }, /* EQ4_INPUT1 */
+ { 0x00008bb4, 0x00800000 }, /* EQ4_INPUT2 */
+ { 0x00008bb8, 0x00800000 }, /* EQ4_INPUT3 */
+ { 0x00008bbc, 0x00800000 }, /* EQ4_INPUT4 */
+ { 0x00008c00, 0x00800000 }, /* DRC1L_INPUT1 */
+ { 0x00008c04, 0x00800000 }, /* DRC1L_INPUT2 */
+ { 0x00008c08, 0x00800000 }, /* DRC1L_INPUT3 */
+ { 0x00008c0c, 0x00800000 }, /* DRC1L_INPUT4 */
+ { 0x00008c10, 0x00800000 }, /* DRC1R_INPUT1 */
+ { 0x00008c14, 0x00800000 }, /* DRC1R_INPUT2 */
+ { 0x00008c18, 0x00800000 }, /* DRC1R_INPUT3 */
+ { 0x00008c1c, 0x00800000 }, /* DRC1R_INPUT4 */
+ { 0x00008c20, 0x00800000 }, /* DRC2L_INPUT1 */
+ { 0x00008c24, 0x00800000 }, /* DRC2L_INPUT2 */
+ { 0x00008c28, 0x00800000 }, /* DRC2L_INPUT3 */
+ { 0x00008c2c, 0x00800000 }, /* DRC2L_INPUT4 */
+ { 0x00008c30, 0x00800000 }, /* DRC2R_INPUT1 */
+ { 0x00008c34, 0x00800000 }, /* DRC2R_INPUT2 */
+ { 0x00008c38, 0x00800000 }, /* DRC2R_INPUT3 */
+ { 0x00008c3c, 0x00800000 }, /* DRC2R_INPUT4 */
+ { 0x00008c80, 0x00800000 }, /* LHPF1_INPUT1 */
+ { 0x00008c84, 0x00800000 }, /* LHPF1_INPUT2 */
+ { 0x00008c88, 0x00800000 }, /* LHPF1_INPUT3 */
+ { 0x00008c8c, 0x00800000 }, /* LHPF1_INPUT4 */
+ { 0x00008c90, 0x00800000 }, /* LHPF2_INPUT1 */
+ { 0x00008c94, 0x00800000 }, /* LHPF2_INPUT2 */
+ { 0x00008c98, 0x00800000 }, /* LHPF2_INPUT3 */
+ { 0x00008c9c, 0x00800000 }, /* LHPF2_INPUT4 */
+ { 0x00008ca0, 0x00800000 }, /* LHPF3_INPUT1 */
+ { 0x00008ca4, 0x00800000 }, /* LHPF3_INPUT2 */
+ { 0x00008ca8, 0x00800000 }, /* LHPF3_INPUT3 */
+ { 0x00008cac, 0x00800000 }, /* LHPF3_INPUT4 */
+ { 0x00008cb0, 0x00800000 }, /* LHPF4_INPUT1 */
+ { 0x00008cb4, 0x00800000 }, /* LHPF4_INPUT2 */
+ { 0x00008cb8, 0x00800000 }, /* LHPF4_INPUT3 */
+ { 0x00008cbc, 0x00800000 }, /* LHPF4_INPUT4 */
+ { 0x00009000, 0x00800000 }, /* DSP1RX1_INPUT1 */
+ { 0x00009004, 0x00800000 }, /* DSP1RX1_INPUT2 */
+ { 0x00009008, 0x00800000 }, /* DSP1RX1_INPUT3 */
+ { 0x0000900c, 0x00800000 }, /* DSP1RX1_INPUT4 */
+ { 0x00009010, 0x00800000 }, /* DSP1RX2_INPUT1 */
+ { 0x00009014, 0x00800000 }, /* DSP1RX2_INPUT2 */
+ { 0x00009018, 0x00800000 }, /* DSP1RX2_INPUT3 */
+ { 0x0000901c, 0x00800000 }, /* DSP1RX2_INPUT4 */
+ { 0x00009020, 0x00800000 }, /* DSP1RX3_INPUT1 */
+ { 0x00009024, 0x00800000 }, /* DSP1RX3_INPUT2 */
+ { 0x00009028, 0x00800000 }, /* DSP1RX3_INPUT3 */
+ { 0x0000902c, 0x00800000 }, /* DSP1RX3_INPUT4 */
+ { 0x00009030, 0x00800000 }, /* DSP1RX4_INPUT1 */
+ { 0x00009034, 0x00800000 }, /* DSP1RX4_INPUT2 */
+ { 0x00009038, 0x00800000 }, /* DSP1RX4_INPUT3 */
+ { 0x0000903c, 0x00800000 }, /* DSP1RX4_INPUT4 */
+ { 0x00009040, 0x00800000 }, /* DSP1RX5_INPUT1 */
+ { 0x00009044, 0x00800000 }, /* DSP1RX5_INPUT2 */
+ { 0x00009048, 0x00800000 }, /* DSP1RX5_INPUT3 */
+ { 0x0000904c, 0x00800000 }, /* DSP1RX5_INPUT4 */
+ { 0x00009050, 0x00800000 }, /* DSP1RX6_INPUT1 */
+ { 0x00009054, 0x00800000 }, /* DSP1RX6_INPUT2 */
+ { 0x00009058, 0x00800000 }, /* DSP1RX6_INPUT3 */
+ { 0x0000905c, 0x00800000 }, /* DSP1RX6_INPUT4 */
+ { 0x00009060, 0x00800000 }, /* DSP1RX7_INPUT1 */
+ { 0x00009064, 0x00800000 }, /* DSP1RX7_INPUT2 */
+ { 0x00009068, 0x00800000 }, /* DSP1RX7_INPUT3 */
+ { 0x0000906c, 0x00800000 }, /* DSP1RX7_INPUT4 */
+ { 0x00009070, 0x00800000 }, /* DSP1RX8_INPUT1 */
+ { 0x00009074, 0x00800000 }, /* DSP1RX8_INPUT2 */
+ { 0x00009078, 0x00800000 }, /* DSP1RX8_INPUT3 */
+ { 0x0000907c, 0x00800000 }, /* DSP1RX8_INPUT4 */
+ { 0x0000a400, 0x00000000 }, /* ISRC1_CONTROL1 */
+ { 0x0000a404, 0x00000000 }, /* ISRC1_CONTROL2 */
+ { 0x0000a510, 0x00000000 }, /* ISRC2_CONTROL1 */
+ { 0x0000a514, 0x00000000 }, /* ISRC2_CONTROL2 */
+ { 0x0000a620, 0x00000000 }, /* ISRC3_CONTROL1 */
+ { 0x0000a624, 0x00000000 }, /* ISRC3_CONTROL2 */
+ { 0x0000a800, 0x00000000 }, /* FX_SAMPLE_RATE */
+ { 0x0000a808, 0x00000000 }, /* EQ_CONTROL1 */
+ { 0x0000a80c, 0x00000000 }, /* EQ_CONTROL2 */
+ { 0x0000a810, 0x0c0c0c0c }, /* EQ1_GAIN1 */
+ { 0x0000a814, 0x0000000c }, /* EQ1_GAIN2 */
+ { 0x0000a818, 0x03fe0fc8 }, /* EQ1_BAND1_COEFF1 */
+ { 0x0000a81c, 0x00000b75 }, /* EQ1_BAND1_COEFF2 */
+ { 0x0000a820, 0x000000e0 }, /* EQ1_BAND1_PG */
+ { 0x0000a824, 0xf1361ec4 }, /* EQ1_BAND2_COEFF1 */
+ { 0x0000a828, 0x00000409 }, /* EQ1_BAND2_COEFF2 */
+ { 0x0000a82c, 0x000004cc }, /* EQ1_BAND2_PG */
+ { 0x0000a830, 0xf3371c9b }, /* EQ1_BAND3_COEFF1 */
+ { 0x0000a834, 0x0000040b }, /* EQ1_BAND3_COEFF2 */
+ { 0x0000a838, 0x00000cbb }, /* EQ1_BAND3_PG */
+ { 0x0000a83c, 0xf7d916f8 }, /* EQ1_BAND4_COEFF1 */
+ { 0x0000a840, 0x0000040a }, /* EQ1_BAND4_COEFF2 */
+ { 0x0000a844, 0x00001f14 }, /* EQ1_BAND4_PG */
+ { 0x0000a848, 0x0563058c }, /* EQ1_BAND5_COEFF1 */
+ { 0x0000a84c, 0x00000000 }, /* EQ1_BAND5_COEFF1 + 4 */
+ { 0x0000a850, 0x00004000 }, /* EQ1_BAND5_PG */
+ { 0x0000a854, 0x0c0c0c0c }, /* EQ2_GAIN1 */
+ { 0x0000a858, 0x0000000c }, /* EQ2_GAIN2 */
+ { 0x0000a85c, 0x03fe0fc8 }, /* EQ2_BAND1_COEFF1 */
+ { 0x0000a860, 0x00000b75 }, /* EQ2_BAND1_COEFF2 */
+ { 0x0000a864, 0x000000e0 }, /* EQ2_BAND1_PG */
+ { 0x0000a868, 0xf1361ec4 }, /* EQ2_BAND2_COEFF1 */
+ { 0x0000a86c, 0x00000409 }, /* EQ2_BAND2_COEFF2 */
+ { 0x0000a870, 0x000004cc }, /* EQ2_BAND2_PG */
+ { 0x0000a874, 0xf3371c9b }, /* EQ2_BAND3_COEFF1 */
+ { 0x0000a878, 0x0000040b }, /* EQ2_BAND3_COEFF2 */
+ { 0x0000a87c, 0x00000cbb }, /* EQ2_BAND3_PG */
+ { 0x0000a880, 0xf7d916f8 }, /* EQ2_BAND4_COEFF1 */
+ { 0x0000a884, 0x0000040a }, /* EQ2_BAND4_COEFF2 */
+ { 0x0000a888, 0x00001f14 }, /* EQ2_BAND4_PG */
+ { 0x0000a88c, 0x0563058c }, /* EQ2_BAND5_COEFF1 */
+ { 0x0000a890, 0x00000000 }, /* EQ2_BAND5_COEFF1 + 4 */
+ { 0x0000a894, 0x00004000 }, /* EQ2_BAND5_PG */
+ { 0x0000a898, 0x0c0c0c0c }, /* EQ3_GAIN1 */
+ { 0x0000a89c, 0x0000000c }, /* EQ3_GAIN2 */
+ { 0x0000a8a0, 0x03fe0fc8 }, /* EQ3_BAND1_COEFF1 */
+ { 0x0000a8a4, 0x00000b75 }, /* EQ3_BAND1_COEFF2 */
+ { 0x0000a8a8, 0x000000e0 }, /* EQ3_BAND1_PG */
+ { 0x0000a8ac, 0xf1361ec4 }, /* EQ3_BAND2_COEFF1 */
+ { 0x0000a8b0, 0x00000409 }, /* EQ3_BAND2_COEFF2 */
+ { 0x0000a8b4, 0x000004cc }, /* EQ3_BAND2_PG */
+ { 0x0000a8b8, 0xf3371c9b }, /* EQ3_BAND3_COEFF1 */
+ { 0x0000a8bc, 0x0000040b }, /* EQ3_BAND3_COEFF2 */
+ { 0x0000a8c0, 0x00000cbb }, /* EQ3_BAND3_PG */
+ { 0x0000a8c4, 0xf7d916f8 }, /* EQ3_BAND4_COEFF1 */
+ { 0x0000a8c8, 0x0000040a }, /* EQ3_BAND4_COEFF2 */
+ { 0x0000a8cc, 0x00001f14 }, /* EQ3_BAND4_PG */
+ { 0x0000a8d0, 0x0563058c }, /* EQ3_BAND5_COEFF1 */
+ { 0x0000a8d4, 0x00000000 }, /* EQ3_BAND5_COEFF1 + 4 */
+ { 0x0000a8d8, 0x00004000 }, /* EQ3_BAND5_PG */
+ { 0x0000a8dc, 0x0c0c0c0c }, /* EQ4_GAIN1 */
+ { 0x0000a8e0, 0x0000000c }, /* EQ4_GAIN2 */
+ { 0x0000a8e4, 0x03fe0fc8 }, /* EQ4_BAND1_COEFF1 */
+ { 0x0000a8e8, 0x00000b75 }, /* EQ4_BAND1_COEFF2 */
+ { 0x0000a8ec, 0x000000e0 }, /* EQ4_BAND1_PG */
+ { 0x0000a8f0, 0xf1361ec4 }, /* EQ4_BAND2_COEFF1 */
+ { 0x0000a8f4, 0x00000409 }, /* EQ4_BAND2_COEFF2 */
+ { 0x0000a8f8, 0x000004cc }, /* EQ4_BAND2_PG */
+ { 0x0000a8fc, 0xf3371c9b }, /* EQ4_BAND3_COEFF1 */
+ { 0x0000a900, 0x0000040b }, /* EQ4_BAND3_COEFF2 */
+ { 0x0000a904, 0x00000cbb }, /* EQ4_BAND3_PG */
+ { 0x0000a908, 0xf7d916f8 }, /* EQ4_BAND4_COEFF1 */
+ { 0x0000a90c, 0x0000040a }, /* EQ4_BAND4_COEFF2 */
+ { 0x0000a910, 0x00001f14 }, /* EQ4_BAND4_PG */
+ { 0x0000a914, 0x0563058c }, /* EQ4_BAND5_COEFF1 */
+ { 0x0000a918, 0x00000000 }, /* EQ4_BAND5_COEFF1 + 4 */
+ { 0x0000a91c, 0x00004000 }, /* EQ4_BAND5_PG */
+ { 0x0000aa30, 0x00000000 }, /* LHPF_CONTROL1 */
+ { 0x0000aa34, 0x00000000 }, /* LHPF_CONTROL2 */
+ { 0x0000aa38, 0x00000000 }, /* LHPF1_COEFF */
+ { 0x0000aa3c, 0x00000000 }, /* LHPF2_COEFF */
+ { 0x0000aa40, 0x00000000 }, /* LHPF3_COEFF */
+ { 0x0000aa44, 0x00000000 }, /* LHPF4_COEFF */
+ { 0x0000ab00, 0x00000000 }, /* DRC1_CONTROL1 */
+ { 0x0000ab04, 0x49130018 }, /* DRC1_CONTROL2 */
+ { 0x0000ab08, 0x00000018 }, /* DRC1_CONTROL3 */
+ { 0x0000ab0c, 0x00000000 }, /* DRC1_CONTROL4 */
+ { 0x0000ab14, 0x00000000 }, /* DRC2_CONTROL1 */
+ { 0x0000ab18, 0x49130018 }, /* DRC2_CONTROL2 */
+ { 0x0000ab1c, 0x00000018 }, /* DRC2_CONTROL3 */
+ { 0x0000ab20, 0x00000000 }, /* DRC2_CONTROL4 */
+ { 0x0000b000, 0x00000000 }, /* TONE_GENERATOR1 */
+ { 0x0000b004, 0x00100000 }, /* TONE_GENERATOR2 */
+ { 0x0000b400, 0x00000000 }, /* COMFORT_NOISE_GENERATOR */
+ { 0x0000b800, 0x00000000 }, /* US_CONTROL */
+ { 0x0000b804, 0x00002020 }, /* US1_CONTROL */
+ { 0x0000b808, 0x00000000 }, /* US1_DET_CONTROL */
+ { 0x0000b814, 0x00002020 }, /* US2_CONTROL */
+ { 0x0000b818, 0x00000000 }, /* US2_DET_CONTROL */
+ { 0x00018110, 0x00000700 }, /* IRQ1_MASK_1 */
+ { 0x00018114, 0x00000004 }, /* IRQ1_MASK_2 */
+ { 0x00018120, 0x03ff0000 }, /* IRQ1_MASK_5 */
+ { 0x00018124, 0x00000103 }, /* IRQ1_MASK_6 */
+ { 0x00018128, 0x003f0000 }, /* IRQ1_MASK_7 */
+ { 0x00018130, 0xff00000f }, /* IRQ1_MASK_9 */
+ { 0x00018138, 0xffff0000 }, /* IRQ1_MASK_11 */
+};
+
+static bool cs48l32_readable_register(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case CS48L32_DEVID:
+ case CS48L32_REVID:
+ case CS48L32_OTPID:
+ case CS48L32_SFT_RESET:
+ case CS48L32_CTRL_IF_DEBUG3:
+ case CS48L32_MCU_CTRL1:
+ case CS48L32_GPIO1_CTRL1 ... CS48L32_GPIO16_CTRL1:
+ case CS48L32_OUTPUT_SYS_CLK:
+ case CS48L32_AUXPDM_CTRL:
+ case CS48L32_AUXPDM_CTRL2:
+ case CS48L32_CLOCK32K:
+ case CS48L32_SYSTEM_CLOCK1 ... CS48L32_SYSTEM_CLOCK2:
+ case CS48L32_SAMPLE_RATE1 ... CS48L32_SAMPLE_RATE4:
+ case CS48L32_FLL1_CONTROL1 ... CS48L32_FLL1_GPIO_CLOCK:
+ case CS48L32_CHARGE_PUMP1:
+ case CS48L32_LDO2_CTRL1:
+ case CS48L32_MICBIAS_CTRL1:
+ case CS48L32_MICBIAS_CTRL5:
+ case CS48L32_IRQ1_CTRL_AOD:
+ case CS48L32_INPUT_CONTROL:
+ case CS48L32_INPUT_STATUS:
+ case CS48L32_INPUT_RATE_CONTROL:
+ case CS48L32_INPUT_CONTROL2:
+ case CS48L32_INPUT_CONTROL3:
+ case CS48L32_INPUT1_CONTROL1:
+ case CS48L32_IN1L_CONTROL1 ... CS48L32_IN1L_CONTROL2:
+ case CS48L32_IN1R_CONTROL1 ... CS48L32_IN1R_CONTROL2:
+ case CS48L32_INPUT2_CONTROL1:
+ case CS48L32_IN2L_CONTROL1 ... CS48L32_IN2L_CONTROL2:
+ case CS48L32_IN2R_CONTROL1 ... CS48L32_IN2R_CONTROL2:
+ case CS48L32_INPUT_HPF_CONTROL:
+ case CS48L32_INPUT_VOL_CONTROL:
+ case CS48L32_AUXPDM_CONTROL1:
+ case CS48L32_AUXPDM_CONTROL2:
+ case CS48L32_AUXPDM1_CONTROL1:
+ case CS48L32_AUXPDM2_CONTROL1:
+ case CS48L32_ADC1L_ANA_CONTROL1:
+ case CS48L32_ADC1R_ANA_CONTROL1:
+ case CS48L32_ASP1_ENABLES1 ... CS48L32_ASP1_DATA_CONTROL5:
+ case CS48L32_ASP2_ENABLES1 ... CS48L32_ASP2_DATA_CONTROL5:
+ case CS48L32_ASP1TX1_INPUT1 ... CS48L32_ASP1TX8_INPUT4:
+ case CS48L32_ASP2TX1_INPUT1 ... CS48L32_ASP2TX4_INPUT4:
+ case CS48L32_ISRC1INT1_INPUT1 ... CS48L32_ISRC1DEC4_INPUT1:
+ case CS48L32_ISRC2INT1_INPUT1 ... CS48L32_ISRC2DEC2_INPUT1:
+ case CS48L32_ISRC3INT1_INPUT1 ... CS48L32_ISRC3DEC2_INPUT1:
+ case CS48L32_EQ1_INPUT1 ... CS48L32_EQ4_INPUT4:
+ case CS48L32_DRC1L_INPUT1 ... CS48L32_DRC1R_INPUT4:
+ case CS48L32_DRC2L_INPUT1 ... CS48L32_DRC2R_INPUT4:
+ case CS48L32_LHPF1_INPUT1 ... CS48L32_LHPF1_INPUT4:
+ case CS48L32_LHPF2_INPUT1 ... CS48L32_LHPF2_INPUT4:
+ case CS48L32_LHPF3_INPUT1 ... CS48L32_LHPF3_INPUT4:
+ case CS48L32_LHPF4_INPUT1 ... CS48L32_LHPF4_INPUT4:
+ case CS48L32_DSP1RX1_INPUT1 ... CS48L32_DSP1RX8_INPUT4:
+ case CS48L32_ISRC1_CONTROL1 ... CS48L32_ISRC1_CONTROL2:
+ case CS48L32_ISRC2_CONTROL1 ... CS48L32_ISRC2_CONTROL2:
+ case CS48L32_ISRC3_CONTROL1 ... CS48L32_ISRC3_CONTROL2:
+ case CS48L32_FX_SAMPLE_RATE:
+ case CS48L32_EQ_CONTROL1 ... CS48L32_EQ_CONTROL2:
+ case CS48L32_EQ1_GAIN1 ... CS48L32_EQ1_BAND5_PG:
+ case CS48L32_EQ2_GAIN1 ... CS48L32_EQ2_BAND5_PG:
+ case CS48L32_EQ3_GAIN1 ... CS48L32_EQ3_BAND5_PG:
+ case CS48L32_EQ4_GAIN1 ... CS48L32_EQ4_BAND5_PG:
+ case CS48L32_LHPF_CONTROL1 ... CS48L32_LHPF_CONTROL2:
+ case CS48L32_LHPF1_COEFF ... CS48L32_LHPF4_COEFF:
+ case CS48L32_DRC1_CONTROL1 ... CS48L32_DRC1_CONTROL4:
+ case CS48L32_DRC2_CONTROL1 ... CS48L32_DRC2_CONTROL4:
+ case CS48L32_TONE_GENERATOR1 ... CS48L32_TONE_GENERATOR2:
+ case CS48L32_COMFORT_NOISE_GENERATOR:
+ case CS48L32_US_CONTROL:
+ case CS48L32_US1_CONTROL:
+ case CS48L32_US1_DET_CONTROL:
+ case CS48L32_US2_CONTROL:
+ case CS48L32_US2_DET_CONTROL:
+ case CS48L32_DSP1_XM_SRAM_IBUS_SETUP_0 ... CS48L32_DSP1_XM_SRAM_IBUS_SETUP_24:
+ case CS48L32_DSP1_YM_SRAM_IBUS_SETUP_0 ... CS48L32_DSP1_YM_SRAM_IBUS_SETUP_8:
+ case CS48L32_DSP1_PM_SRAM_IBUS_SETUP_0 ... CS48L32_DSP1_PM_SRAM_IBUS_SETUP_7:
+ case CS48L32_IRQ1_STATUS:
+ case CS48L32_IRQ1_EINT_1 ... CS48L32_IRQ1_EINT_11:
+ case CS48L32_IRQ1_STS_1 ... CS48L32_IRQ1_STS_11:
+ case CS48L32_IRQ1_MASK_1 ... CS48L32_IRQ1_MASK_11:
+ case CS48L32_DSP1_XMEM_PACKED_0 ... CS48L32_DSP1_XMEM_PACKED_LAST:
+ case CS48L32_DSP1_SYS_INFO_ID ... CS48L32_DSP1_AHBM_WINDOW_DEBUG_1:
+ case CS48L32_DSP1_XMEM_UNPACKED24_0 ... CS48L32_DSP1_XMEM_UNPACKED24_LAST:
+ case CS48L32_DSP1_CLOCK_FREQ ... CS48L32_DSP1_SAMPLE_RATE_TX8:
+ case CS48L32_DSP1_SCRATCH1 ... CS48L32_DSP1_SCRATCH4:
+ case CS48L32_DSP1_CCM_CORE_CONTROL ... CS48L32_DSP1_STREAM_ARB_RESYNC_MSK1:
+ case CS48L32_DSP1_YMEM_PACKED_0 ... CS48L32_DSP1_YMEM_PACKED_LAST:
+ case CS48L32_DSP1_YMEM_UNPACKED24_0 ... CS48L32_DSP1_YMEM_UNPACKED24_LAST:
+ case CS48L32_DSP1_PMEM_0 ... CS48L32_DSP1_PMEM_LAST:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool cs48l32_volatile_register(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case CS48L32_DEVID:
+ case CS48L32_REVID:
+ case CS48L32_OTPID:
+ case CS48L32_SFT_RESET:
+ case CS48L32_CTRL_IF_DEBUG3:
+ case CS48L32_MCU_CTRL1:
+ case CS48L32_SYSTEM_CLOCK2:
+ case CS48L32_FLL1_CONTROL5:
+ case CS48L32_FLL1_CONTROL6:
+ case CS48L32_INPUT_STATUS:
+ case CS48L32_INPUT_CONTROL3:
+ case CS48L32_DSP1_XM_SRAM_IBUS_SETUP_0 ... CS48L32_DSP1_XM_SRAM_IBUS_SETUP_24:
+ case CS48L32_DSP1_YM_SRAM_IBUS_SETUP_0 ... CS48L32_DSP1_YM_SRAM_IBUS_SETUP_8:
+ case CS48L32_DSP1_PM_SRAM_IBUS_SETUP_0 ... CS48L32_DSP1_PM_SRAM_IBUS_SETUP_7:
+ case CS48L32_IRQ1_STATUS:
+ case CS48L32_IRQ1_EINT_1 ... CS48L32_IRQ1_EINT_11:
+ case CS48L32_IRQ1_STS_1 ... CS48L32_IRQ1_STS_11:
+ case CS48L32_DSP1_XMEM_PACKED_0 ... CS48L32_DSP1_XMEM_PACKED_LAST:
+ case CS48L32_DSP1_SYS_INFO_ID ... CS48L32_DSP1_AHBM_WINDOW_DEBUG_1:
+ case CS48L32_DSP1_XMEM_UNPACKED24_0 ... CS48L32_DSP1_XMEM_UNPACKED24_LAST:
+ case CS48L32_DSP1_CLOCK_FREQ ... CS48L32_DSP1_SAMPLE_RATE_TX8:
+ case CS48L32_DSP1_SCRATCH1 ... CS48L32_DSP1_SCRATCH4:
+ case CS48L32_DSP1_CCM_CORE_CONTROL ... CS48L32_DSP1_STREAM_ARB_RESYNC_MSK1:
+ case CS48L32_DSP1_YMEM_PACKED_0 ... CS48L32_DSP1_YMEM_PACKED_LAST:
+ case CS48L32_DSP1_YMEM_UNPACKED24_0 ... CS48L32_DSP1_YMEM_UNPACKED24_LAST:
+ case CS48L32_DSP1_PMEM_0 ... CS48L32_DSP1_PMEM_LAST:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * The bus bridge requires DSP packed memory registers to be accessed in
+ * aligned block multiples.
+ * Mark precious to prevent regmap debugfs causing an illegal bus transaction.
+ */
+static bool cs48l32_precious_register(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case CS48L32_DSP1_XMEM_PACKED_0 ... CS48L32_DSP1_XMEM_PACKED_LAST:
+ case CS48L32_DSP1_YMEM_PACKED_0 ... CS48L32_DSP1_YMEM_PACKED_LAST:
+ case CS48L32_DSP1_PMEM_0 ... CS48L32_DSP1_PMEM_LAST:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct regmap_config cs48l32_regmap = {
+ .name = "cs48l32",
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .pad_bits = 32,
+ .val_bits = 32,
+ .reg_format_endian = REGMAP_ENDIAN_BIG,
+ .val_format_endian = REGMAP_ENDIAN_BIG,
+
+ .max_register = CS48L32_DSP1_PMEM_LAST,
+ .readable_reg = &cs48l32_readable_register,
+ .volatile_reg = &cs48l32_volatile_register,
+ .precious_reg = &cs48l32_precious_register,
+
+ .cache_type = REGCACHE_MAPLE,
+ .reg_defaults = cs48l32_reg_default,
+ .num_reg_defaults = ARRAY_SIZE(cs48l32_reg_default),
+};
+
+int cs48l32_create_regmap(struct spi_device *spi, struct cs48l32 *cs48l32)
+{
+ cs48l32->regmap = devm_regmap_init_spi(spi, &cs48l32_regmap);
+ if (IS_ERR(cs48l32->regmap))
+ return PTR_ERR(cs48l32->regmap);
+
+ return 0;
+}
diff --git a/sound/soc/codecs/cs48l32.c b/sound/soc/codecs/cs48l32.c
new file mode 100644
index 000000000000..90a795230d27
--- /dev/null
+++ b/sound/soc/codecs/cs48l32.c
@@ -0,0 +1,4073 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Cirrus Logic CS48L32 audio DSP.
+//
+// Copyright (C) 2016-2018, 2020, 2022, 2025 Cirrus Logic, Inc. and
+// Cirrus Logic International Semiconductor Ltd.
+
+#include <dt-bindings/sound/cs48l32.h>
+#include <linux/array_size.h>
+#include <linux/build_bug.h>
+#include <linux/clk.h>
+#include <linux/container_of.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gcd.h>
+#include <linux/gpio/consumer.h>
+#include <linux/minmax.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/string_choices.h>
+#include <sound/cs48l32.h>
+#include <sound/cs48l32_registers.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-component.h>
+#include <sound/soc-dai.h>
+#include <sound/soc-dapm.h>
+#include <sound/tlv.h>
+
+#include "cs48l32.h"
+
+static const char * const cs48l32_core_supplies[] = { "vdd-a", "vdd-io" };
+
+static const struct cs_dsp_region cs48l32_dsp1_regions[] = {
+ { .type = WMFW_HALO_PM_PACKED, .base = 0x3800000 },
+ { .type = WMFW_HALO_XM_PACKED, .base = 0x2000000 },
+ { .type = WMFW_ADSP2_XM, .base = 0x2800000 },
+ { .type = WMFW_HALO_YM_PACKED, .base = 0x2C00000 },
+ { .type = WMFW_ADSP2_YM, .base = 0x3400000 },
+};
+
+static const struct cs48l32_dsp_power_reg_block cs48l32_dsp1_sram_ext_regs[] = {
+ { CS48L32_DSP1_XM_SRAM_IBUS_SETUP_1, CS48L32_DSP1_XM_SRAM_IBUS_SETUP_24 },
+ { CS48L32_DSP1_YM_SRAM_IBUS_SETUP_1, CS48L32_DSP1_YM_SRAM_IBUS_SETUP_8 },
+ { CS48L32_DSP1_PM_SRAM_IBUS_SETUP_1, CS48L32_DSP1_PM_SRAM_IBUS_SETUP_7 },
+};
+
+static const unsigned int cs48l32_dsp1_sram_pwd_regs[] = {
+ CS48L32_DSP1_XM_SRAM_IBUS_SETUP_0,
+ CS48L32_DSP1_YM_SRAM_IBUS_SETUP_0,
+ CS48L32_DSP1_PM_SRAM_IBUS_SETUP_0,
+};
+
+static const struct cs48l32_dsp_power_regs cs48l32_dsp_sram_regs = {
+ .ext = cs48l32_dsp1_sram_ext_regs,
+ .n_ext = ARRAY_SIZE(cs48l32_dsp1_sram_ext_regs),
+ .pwd = cs48l32_dsp1_sram_pwd_regs,
+ .n_pwd = ARRAY_SIZE(cs48l32_dsp1_sram_pwd_regs),
+};
+
+static const char * const cs48l32_mixer_texts[] = {
+ "None",
+ "Tone Generator 1",
+ "Tone Generator 2",
+ "Noise Generator",
+ "IN1L",
+ "IN1R",
+ "IN2L",
+ "IN2R",
+ "ASP1RX1",
+ "ASP1RX2",
+ "ASP1RX3",
+ "ASP1RX4",
+ "ASP1RX5",
+ "ASP1RX6",
+ "ASP1RX7",
+ "ASP1RX8",
+ "ASP2RX1",
+ "ASP2RX2",
+ "ASP2RX3",
+ "ASP2RX4",
+ "ISRC1INT1",
+ "ISRC1INT2",
+ "ISRC1INT3",
+ "ISRC1INT4",
+ "ISRC1DEC1",
+ "ISRC1DEC2",
+ "ISRC1DEC3",
+ "ISRC1DEC4",
+ "ISRC2INT1",
+ "ISRC2INT2",
+ "ISRC2DEC1",
+ "ISRC2DEC2",
+ "ISRC3INT1",
+ "ISRC3INT2",
+ "ISRC3DEC1",
+ "ISRC3DEC2",
+ "EQ1",
+ "EQ2",
+ "EQ3",
+ "EQ4",
+ "DRC1L",
+ "DRC1R",
+ "DRC2L",
+ "DRC2R",
+ "LHPF1",
+ "LHPF2",
+ "LHPF3",
+ "LHPF4",
+ "Ultrasonic 1",
+ "Ultrasonic 2",
+ "DSP1.1",
+ "DSP1.2",
+ "DSP1.3",
+ "DSP1.4",
+ "DSP1.5",
+ "DSP1.6",
+ "DSP1.7",
+ "DSP1.8",
+};
+
+static unsigned int cs48l32_mixer_values[] = {
+ 0x000, /* Silence (mute) */
+ 0x004, /* Tone generator 1 */
+ 0x005, /* Tone generator 2 */
+ 0x00C, /* Noise Generator */
+ 0x010, /* IN1L signal path */
+ 0x011, /* IN1R signal path */
+ 0x012, /* IN2L signal path */
+ 0x013, /* IN2R signal path */
+ 0x020, /* ASP1 RX1 */
+ 0x021, /* ASP1 RX2 */
+ 0x022, /* ASP1 RX3 */
+ 0x023, /* ASP1 RX4 */
+ 0x024, /* ASP1 RX5 */
+ 0x025, /* ASP1 RX6 */
+ 0x026, /* ASP1 RX7 */
+ 0x027, /* ASP1 RX8 */
+ 0x030, /* ASP2 RX1 */
+ 0x031, /* ASP2 RX2 */
+ 0x032, /* ASP2 RX3 */
+ 0x033, /* ASP2 RX4 */
+ 0x098, /* ISRC1 INT1 */
+ 0x099, /* ISRC1 INT2 */
+ 0x09a, /* ISRC1 INT3 */
+ 0x09b, /* ISRC1 INT4 */
+ 0x09C, /* ISRC1 DEC1 */
+ 0x09D, /* ISRC1 DEC2 */
+ 0x09e, /* ISRC1 DEC3 */
+ 0x09f, /* ISRC1 DEC4 */
+ 0x0A0, /* ISRC2 INT1 */
+ 0x0A1, /* ISRC2 INT2 */
+ 0x0A4, /* ISRC2 DEC1 */
+ 0x0A5, /* ISRC2 DEC2 */
+ 0x0A8, /* ISRC3 INT1 */
+ 0x0A9, /* ISRC3 INT2 */
+ 0x0AC, /* ISRC3 DEC1 */
+ 0x0AD, /* ISRC3 DEC2 */
+ 0x0B8, /* EQ1 */
+ 0x0B9, /* EQ2 */
+ 0x0BA, /* EQ3 */
+ 0x0BB, /* EQ4 */
+ 0x0C0, /* DRC1 Left */
+ 0x0C1, /* DRC1 Right */
+ 0x0C2, /* DRC2 Left */
+ 0x0C3, /* DRC2 Right */
+ 0x0C8, /* LHPF1 */
+ 0x0C9, /* LHPF2 */
+ 0x0CA, /* LHPF3 */
+ 0x0CB, /* LHPF4 */
+ 0x0D8, /* Ultrasonic 1 */
+ 0x0D9, /* Ultrasonic 2 */
+ 0x100, /* DSP1 channel 1 */
+ 0x101, /* DSP1 channel 2 */
+ 0x102, /* DSP1 channel 3 */
+ 0x103, /* DSP1 channel 4 */
+ 0x104, /* DSP1 channel 5 */
+ 0x105, /* DSP1 channel 6 */
+ 0x106, /* DSP1 channel 7 */
+ 0x107, /* DSP1 channel 8 */
+};
+static_assert(ARRAY_SIZE(cs48l32_mixer_texts) == ARRAY_SIZE(cs48l32_mixer_values));
+#define CS48L32_NUM_MIXER_INPUTS ARRAY_SIZE(cs48l32_mixer_values)
+
+static const DECLARE_TLV_DB_SCALE(cs48l32_ana_tlv, 0, 100, 0);
+static const DECLARE_TLV_DB_SCALE(cs48l32_eq_tlv, -1200, 100, 0);
+static const DECLARE_TLV_DB_SCALE(cs48l32_digital_tlv, -6400, 50, 0);
+static const DECLARE_TLV_DB_SCALE(cs48l32_noise_tlv, -10800, 600, 0);
+static const DECLARE_TLV_DB_SCALE(cs48l32_mixer_tlv, -3200, 100, 0);
+static const DECLARE_TLV_DB_SCALE(cs48l32_us_tlv, 0, 600, 0);
+
+static void cs48l32_spin_sysclk(struct cs48l32_codec *cs48l32_codec)
+{
+ struct cs48l32 *cs48l32 = &cs48l32_codec->core;
+ unsigned int val;
+ int ret, i;
+
+ /* Skip this if the chip is down */
+ if (pm_runtime_suspended(cs48l32->dev))
+ return;
+
+ /*
+ * Just read a register a few times to ensure the internal
+ * oscillator sends out some clocks.
+ */
+ for (i = 0; i < 4; i++) {
+ ret = regmap_read(cs48l32->regmap, CS48L32_DEVID, &val);
+ if (ret)
+ dev_err(cs48l32_codec->core.dev, "%s Failed to read register: %d (%d)\n",
+ __func__, ret, i);
+ }
+
+ udelay(300);
+}
+
+static const char * const cs48l32_rate_text[] = {
+ "Sample Rate 1", "Sample Rate 2", "Sample Rate 3", "Sample Rate 4",
+};
+
+static const unsigned int cs48l32_rate_val[] = {
+ 0x0, 0x1, 0x2, 0x3,
+};
+static_assert(ARRAY_SIZE(cs48l32_rate_val) == ARRAY_SIZE(cs48l32_rate_text));
+
+static int cs48l32_rate_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ int ret;
+
+ /* Prevent any mixer mux changes while we do this */
+ mutex_lock(&cs48l32_codec->rate_lock);
+
+ /* The write must be guarded by a number of SYSCLK cycles */
+ cs48l32_spin_sysclk(cs48l32_codec);
+ ret = snd_soc_put_enum_double(kcontrol, ucontrol);
+ cs48l32_spin_sysclk(cs48l32_codec);
+
+ mutex_unlock(&cs48l32_codec->rate_lock);
+
+ return ret;
+}
+
+static const char * const cs48l32_sample_rate_text[] = {
+ "12kHz",
+ "24kHz",
+ "48kHz",
+ "96kHz",
+ "192kHz",
+ "384kHz",
+ "768kHz",
+ "11.025kHz",
+ "22.05kHz",
+ "44.1kHz",
+ "88.2kHz",
+ "176.4kHz",
+ "352.8kHz",
+ "705.6kHz",
+ "8kHz",
+ "16kHz",
+ "32kHz",
+};
+
+static const unsigned int cs48l32_sample_rate_val[] = {
+ 0x01, /* 12kHz */
+ 0x02, /* 24kHz */
+ 0x03, /* 48kHz */
+ 0x04, /* 96kHz */
+ 0x05, /* 192kHz */
+ 0x06, /* 384kHz */
+ 0x07, /* 768kHz */
+ 0x09, /* 11.025kHz */
+ 0x0a, /* 22.05kHz */
+ 0x0b, /* 44.1kHz */
+ 0x0c, /* 88.2kHz */
+ 0x0d, /* 176.4kHz */
+ 0x0e, /* 352.8kHz */
+ 0x0f, /* 705.6kHz */
+ 0x11, /* 8kHz */
+ 0x12, /* 16kHz */
+ 0x13, /* 32kHz */
+};
+static_assert(ARRAY_SIZE(cs48l32_sample_rate_val) == ARRAY_SIZE(cs48l32_sample_rate_text));
+#define CS48L32_SAMPLE_RATE_ENUM_SIZE ARRAY_SIZE(cs48l32_sample_rate_val)
+
+static const struct soc_enum cs48l32_sample_rate[] = {
+ SOC_VALUE_ENUM_SINGLE(CS48L32_SAMPLE_RATE1,
+ CS48L32_SAMPLE_RATE_1_SHIFT,
+ CS48L32_SAMPLE_RATE_1_MASK >> CS48L32_SAMPLE_RATE_1_SHIFT,
+ CS48L32_SAMPLE_RATE_ENUM_SIZE,
+ cs48l32_sample_rate_text,
+ cs48l32_sample_rate_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_SAMPLE_RATE2,
+ CS48L32_SAMPLE_RATE_1_SHIFT,
+ CS48L32_SAMPLE_RATE_1_MASK >> CS48L32_SAMPLE_RATE_1_SHIFT,
+ CS48L32_SAMPLE_RATE_ENUM_SIZE,
+ cs48l32_sample_rate_text,
+ cs48l32_sample_rate_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_SAMPLE_RATE3,
+ CS48L32_SAMPLE_RATE_1_SHIFT,
+ CS48L32_SAMPLE_RATE_1_MASK >> CS48L32_SAMPLE_RATE_1_SHIFT,
+ CS48L32_SAMPLE_RATE_ENUM_SIZE,
+ cs48l32_sample_rate_text,
+ cs48l32_sample_rate_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_SAMPLE_RATE4,
+ CS48L32_SAMPLE_RATE_1_SHIFT,
+ CS48L32_SAMPLE_RATE_1_MASK >> CS48L32_SAMPLE_RATE_1_SHIFT,
+ CS48L32_SAMPLE_RATE_ENUM_SIZE,
+ cs48l32_sample_rate_text,
+ cs48l32_sample_rate_val),
+};
+
+static int cs48l32_inmux_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol);
+ struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct soc_enum *e = (struct soc_enum *) kcontrol->private_value;
+ unsigned int mux, src_val, in_type;
+ int ret;
+
+ mux = ucontrol->value.enumerated.item[0];
+ if (mux > 1)
+ return -EINVAL;
+
+ switch (e->reg) {
+ case CS48L32_IN1L_CONTROL1:
+ in_type = cs48l32_codec->in_type[0][mux];
+ break;
+ case CS48L32_IN1R_CONTROL1:
+ in_type = cs48l32_codec->in_type[1][mux];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ src_val = mux << e->shift_l;
+
+ if (in_type == CS48L32_IN_TYPE_SE)
+ src_val |= 1 << CS48L32_INx_SRC_SHIFT;
+
+ ret = snd_soc_component_update_bits(dapm->component,
+ e->reg,
+ CS48L32_INx_SRC_MASK,
+ src_val);
+ if (ret > 0)
+ snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+
+ return ret;
+}
+
+static const char * const cs48l32_inmux_texts[] = {
+ "Analog 1", "Analog 2",
+};
+
+static SOC_ENUM_SINGLE_DECL(cs48l32_in1muxl_enum,
+ CS48L32_IN1L_CONTROL1,
+ CS48L32_INx_SRC_SHIFT + 1,
+ cs48l32_inmux_texts);
+
+static SOC_ENUM_SINGLE_DECL(cs48l32_in1muxr_enum,
+ CS48L32_IN1R_CONTROL1,
+ CS48L32_INx_SRC_SHIFT + 1,
+ cs48l32_inmux_texts);
+
+static const struct snd_kcontrol_new cs48l32_inmux[] = {
+ SOC_DAPM_ENUM_EXT("IN1L Mux", cs48l32_in1muxl_enum,
+ snd_soc_dapm_get_enum_double, cs48l32_inmux_put),
+ SOC_DAPM_ENUM_EXT("IN1R Mux", cs48l32_in1muxr_enum,
+ snd_soc_dapm_get_enum_double, cs48l32_inmux_put),
+};
+
+static const char * const cs48l32_dmode_texts[] = {
+ "Analog", "Digital",
+};
+
+static int cs48l32_dmode_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol);
+ struct snd_soc_component *component = snd_soc_dapm_to_component(dapm);
+ struct soc_enum *e = (struct soc_enum *) kcontrol->private_value;
+ unsigned int mode;
+ int ret, result;
+
+ mode = ucontrol->value.enumerated.item[0];
+ switch (mode) {
+ case 0:
+ ret = snd_soc_component_update_bits(component,
+ CS48L32_ADC1L_ANA_CONTROL1,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK);
+ if (ret < 0) {
+ dev_err(component->dev,
+ "Failed to set ADC1L_INT_ENA_FRC: %d\n", ret);
+ return ret;
+ }
+
+ ret = snd_soc_component_update_bits(component,
+ CS48L32_ADC1R_ANA_CONTROL1,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK);
+ if (ret < 0) {
+ dev_err(component->dev,
+ "Failed to set ADC1R_INT_ENA_FRC: %d\n", ret);
+ return ret;
+ }
+
+ result = snd_soc_component_update_bits(component,
+ e->reg,
+ BIT(CS48L32_IN1_MODE_SHIFT),
+ 0);
+ if (result < 0) {
+ dev_err(component->dev, "Failed to set input mode: %d\n", result);
+ return result;
+ }
+
+ usleep_range(200, 300);
+
+ ret = snd_soc_component_update_bits(component,
+ CS48L32_ADC1L_ANA_CONTROL1,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK,
+ 0);
+ if (ret < 0) {
+ dev_err(component->dev,
+ "Failed to clear ADC1L_INT_ENA_FRC: %d\n", ret);
+ return ret;
+ }
+
+ ret = snd_soc_component_update_bits(component,
+ CS48L32_ADC1R_ANA_CONTROL1,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK,
+ 0);
+ if (ret < 0) {
+ dev_err(component->dev,
+ "Failed to clear ADC1R_INT_ENA_FRC: %d\n", ret);
+ return ret;
+ }
+
+ if (result > 0)
+ snd_soc_dapm_mux_update_power(dapm, kcontrol, mode, e, NULL);
+
+ return result;
+ case 1:
+ return snd_soc_dapm_put_enum_double(kcontrol, ucontrol);
+ default:
+ return -EINVAL;
+ }
+}
+
+static SOC_ENUM_SINGLE_DECL(cs48l32_in1dmode_enum,
+ CS48L32_INPUT1_CONTROL1,
+ CS48L32_IN1_MODE_SHIFT,
+ cs48l32_dmode_texts);
+
+static const struct snd_kcontrol_new cs48l32_dmode_mux[] = {
+ SOC_DAPM_ENUM_EXT("IN1 Mode", cs48l32_in1dmode_enum,
+ snd_soc_dapm_get_enum_double, cs48l32_dmode_put),
+};
+
+static const char * const cs48l32_in_texts[] = {
+ "IN1L", "IN1R", "IN2L", "IN2R",
+};
+static_assert(ARRAY_SIZE(cs48l32_in_texts) == CS48L32_MAX_INPUT);
+
+static const char * const cs48l32_us_freq_texts[] = {
+ "16-24kHz", "20-28kHz",
+};
+
+static const unsigned int cs48l32_us_freq_val[] = {
+ 0x2, 0x3,
+};
+
+static const struct soc_enum cs48l32_us_freq[] = {
+ SOC_VALUE_ENUM_SINGLE(CS48L32_US1_CONTROL,
+ CS48L32_US1_FREQ_SHIFT,
+ CS48L32_US1_FREQ_MASK >> CS48L32_US1_FREQ_SHIFT,
+ ARRAY_SIZE(cs48l32_us_freq_val),
+ cs48l32_us_freq_texts,
+ cs48l32_us_freq_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_US2_CONTROL,
+ CS48L32_US1_FREQ_SHIFT,
+ CS48L32_US1_FREQ_MASK >> CS48L32_US1_FREQ_SHIFT,
+ ARRAY_SIZE(cs48l32_us_freq_val),
+ cs48l32_us_freq_texts,
+ cs48l32_us_freq_val),
+};
+
+static const unsigned int cs48l32_us_in_val[] = {
+ 0x0, 0x1, 0x2, 0x3,
+};
+
+static const struct soc_enum cs48l32_us_inmux_enum[] = {
+ SOC_VALUE_ENUM_SINGLE(CS48L32_US1_CONTROL,
+ CS48L32_US1_SRC_SHIFT,
+ CS48L32_US1_SRC_MASK >> CS48L32_US1_SRC_SHIFT,
+ ARRAY_SIZE(cs48l32_us_in_val),
+ cs48l32_in_texts,
+ cs48l32_us_in_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_US2_CONTROL,
+ CS48L32_US1_SRC_SHIFT,
+ CS48L32_US1_SRC_MASK >> CS48L32_US1_SRC_SHIFT,
+ ARRAY_SIZE(cs48l32_us_in_val),
+ cs48l32_in_texts,
+ cs48l32_us_in_val),
+};
+
+static const struct snd_kcontrol_new cs48l32_us_inmux[] = {
+ SOC_DAPM_ENUM("Ultrasonic 1 Input", cs48l32_us_inmux_enum[0]),
+ SOC_DAPM_ENUM("Ultrasonic 2 Input", cs48l32_us_inmux_enum[1]),
+};
+
+static const char * const cs48l32_us_det_thr_texts[] = {
+ "-6dB", "-9dB", "-12dB", "-15dB", "-18dB", "-21dB", "-24dB", "-27dB",
+};
+
+static const struct soc_enum cs48l32_us_det_thr[] = {
+ SOC_ENUM_SINGLE(CS48L32_US1_DET_CONTROL,
+ CS48L32_US1_DET_THR_SHIFT,
+ ARRAY_SIZE(cs48l32_us_det_thr_texts),
+ cs48l32_us_det_thr_texts),
+ SOC_ENUM_SINGLE(CS48L32_US2_DET_CONTROL,
+ CS48L32_US1_DET_THR_SHIFT,
+ ARRAY_SIZE(cs48l32_us_det_thr_texts),
+ cs48l32_us_det_thr_texts),
+};
+
+static const char * const cs48l32_us_det_num_texts[] = {
+ "1 Sample",
+ "2 Samples",
+ "4 Samples",
+ "8 Samples",
+ "16 Samples",
+ "32 Samples",
+ "64 Samples",
+ "128 Samples",
+ "256 Samples",
+ "512 Samples",
+ "1024 Samples",
+ "2048 Samples",
+ "4096 Samples",
+ "8192 Samples",
+ "16384 Samples",
+ "32768 Samples",
+};
+
+static const struct soc_enum cs48l32_us_det_num[] = {
+ SOC_ENUM_SINGLE(CS48L32_US1_DET_CONTROL,
+ CS48L32_US1_DET_NUM_SHIFT,
+ ARRAY_SIZE(cs48l32_us_det_num_texts),
+ cs48l32_us_det_num_texts),
+ SOC_ENUM_SINGLE(CS48L32_US2_DET_CONTROL,
+ CS48L32_US1_DET_NUM_SHIFT,
+ ARRAY_SIZE(cs48l32_us_det_num_texts),
+ cs48l32_us_det_num_texts),
+};
+
+static const char * const cs48l32_us_det_hold_texts[] = {
+ "0 Samples",
+ "31 Samples",
+ "63 Samples",
+ "127 Samples",
+ "255 Samples",
+ "511 Samples",
+ "1023 Samples",
+ "2047 Samples",
+ "4095 Samples",
+ "8191 Samples",
+ "16383 Samples",
+ "32767 Samples",
+ "65535 Samples",
+ "131071 Samples",
+ "262143 Samples",
+ "524287 Samples",
+};
+
+static const struct soc_enum cs48l32_us_det_hold[] = {
+ SOC_ENUM_SINGLE(CS48L32_US1_DET_CONTROL,
+ CS48L32_US1_DET_HOLD_SHIFT,
+ ARRAY_SIZE(cs48l32_us_det_hold_texts),
+ cs48l32_us_det_hold_texts),
+ SOC_ENUM_SINGLE(CS48L32_US2_DET_CONTROL,
+ CS48L32_US1_DET_HOLD_SHIFT,
+ ARRAY_SIZE(cs48l32_us_det_hold_texts),
+ cs48l32_us_det_hold_texts),
+};
+
+static const struct soc_enum cs48l32_us_output_rate[] = {
+ SOC_VALUE_ENUM_SINGLE(CS48L32_US1_CONTROL,
+ CS48L32_US1_RATE_SHIFT,
+ CS48L32_US1_RATE_MASK >> CS48L32_US1_RATE_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_US2_CONTROL,
+ CS48L32_US1_RATE_SHIFT,
+ CS48L32_US1_RATE_MASK >> CS48L32_US1_RATE_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+};
+
+static const char * const cs48l32_us_det_lpf_cut_texts[] = {
+ "1722Hz", "833Hz", "408Hz", "203Hz",
+};
+
+static const struct soc_enum cs48l32_us_det_lpf_cut[] = {
+ SOC_ENUM_SINGLE(CS48L32_US1_DET_CONTROL,
+ CS48L32_US1_DET_LPF_CUT_SHIFT,
+ ARRAY_SIZE(cs48l32_us_det_lpf_cut_texts),
+ cs48l32_us_det_lpf_cut_texts),
+ SOC_ENUM_SINGLE(CS48L32_US2_DET_CONTROL,
+ CS48L32_US1_DET_LPF_CUT_SHIFT,
+ ARRAY_SIZE(cs48l32_us_det_lpf_cut_texts),
+ cs48l32_us_det_lpf_cut_texts),
+};
+
+static const char * const cs48l32_us_det_dcy_texts[] = {
+ "0 ms", "0.79 ms", "1.58 ms", "3.16 ms", "6.33 ms", "12.67 ms", "25.34 ms", "50.69 ms",
+};
+
+static const struct soc_enum cs48l32_us_det_dcy[] = {
+ SOC_ENUM_SINGLE(CS48L32_US1_DET_CONTROL,
+ CS48L32_US1_DET_DCY_SHIFT,
+ ARRAY_SIZE(cs48l32_us_det_dcy_texts),
+ cs48l32_us_det_dcy_texts),
+ SOC_ENUM_SINGLE(CS48L32_US2_DET_CONTROL,
+ CS48L32_US1_DET_DCY_SHIFT,
+ ARRAY_SIZE(cs48l32_us_det_dcy_texts),
+ cs48l32_us_det_dcy_texts),
+};
+
+static const struct snd_kcontrol_new cs48l32_us_switch[] = {
+ SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0),
+ SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0),
+};
+
+static const char * const cs48l32_vol_ramp_text[] = {
+ "0ms/6dB", "0.5ms/6dB", "1ms/6dB", "2ms/6dB", "4ms/6dB", "8ms/6dB", "16ms/6dB", "32ms/6dB",
+};
+
+static SOC_ENUM_SINGLE_DECL(cs48l32_in_vd_ramp,
+ CS48L32_INPUT_VOL_CONTROL,
+ CS48L32_IN_VD_RAMP_SHIFT,
+ cs48l32_vol_ramp_text);
+
+static SOC_ENUM_SINGLE_DECL(cs48l32_in_vi_ramp,
+ CS48L32_INPUT_VOL_CONTROL,
+ CS48L32_IN_VI_RAMP_SHIFT,
+ cs48l32_vol_ramp_text);
+
+static const char * const cs48l32_in_hpf_cut_text[] = {
+ "2.5Hz", "5Hz", "10Hz", "20Hz", "40Hz"
+};
+
+static SOC_ENUM_SINGLE_DECL(cs48l32_in_hpf_cut_enum,
+ CS48L32_INPUT_HPF_CONTROL,
+ CS48L32_IN_HPF_CUT_SHIFT,
+ cs48l32_in_hpf_cut_text);
+
+static const char * const cs48l32_in_dmic_osr_text[] = {
+ "384kHz", "768kHz", "1.536MHz", "2.048MHz", "2.4576MHz", "3.072MHz", "6.144MHz",
+};
+
+static const struct soc_enum cs48l32_in_dmic_osr[] = {
+ SOC_ENUM_SINGLE(CS48L32_INPUT1_CONTROL1,
+ CS48L32_IN1_OSR_SHIFT,
+ ARRAY_SIZE(cs48l32_in_dmic_osr_text),
+ cs48l32_in_dmic_osr_text),
+ SOC_ENUM_SINGLE(CS48L32_INPUT2_CONTROL1,
+ CS48L32_IN1_OSR_SHIFT,
+ ARRAY_SIZE(cs48l32_in_dmic_osr_text),
+ cs48l32_in_dmic_osr_text),
+};
+
+static bool cs48l32_is_input_enabled(struct snd_soc_component *component,
+ unsigned int reg)
+{
+ unsigned int input_active;
+
+ input_active = snd_soc_component_read(component, CS48L32_INPUT_CONTROL);
+ switch (reg) {
+ case CS48L32_IN1L_CONTROL1:
+ return input_active & BIT(CS48L32_IN1L_EN_SHIFT);
+ case CS48L32_IN1R_CONTROL1:
+ return input_active & BIT(CS48L32_IN1R_EN_SHIFT);
+ case CS48L32_IN2L_CONTROL1:
+ return input_active & BIT(CS48L32_IN2L_EN_SHIFT);
+ case CS48L32_IN2R_CONTROL1:
+ return input_active & BIT(CS48L32_IN2R_EN_SHIFT);
+ default:
+ return false;
+ }
+}
+
+static int cs48l32_in_rate_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
+ struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+ int ret;
+
+ snd_soc_dapm_mutex_lock(dapm);
+
+ /* Cannot change rate on an active input */
+ if (cs48l32_is_input_enabled(component, e->reg)) {
+ ret = -EBUSY;
+ goto exit;
+ }
+
+ ret = snd_soc_put_enum_double(kcontrol, ucontrol);
+exit:
+ snd_soc_dapm_mutex_unlock(dapm);
+
+ return ret;
+}
+
+static const struct soc_enum cs48l32_input_rate[] = {
+ SOC_VALUE_ENUM_SINGLE(CS48L32_IN1L_CONTROL1,
+ CS48L32_INx_RATE_SHIFT,
+ CS48L32_INx_RATE_MASK >> CS48L32_INx_RATE_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_IN1R_CONTROL1,
+ CS48L32_INx_RATE_SHIFT,
+ CS48L32_INx_RATE_MASK >> CS48L32_INx_RATE_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_IN2L_CONTROL1,
+ CS48L32_INx_RATE_SHIFT,
+ CS48L32_INx_RATE_MASK >> CS48L32_INx_RATE_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_IN2R_CONTROL1,
+ CS48L32_INx_RATE_SHIFT,
+ CS48L32_INx_RATE_MASK >> CS48L32_INx_RATE_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+};
+
+static int cs48l32_low_power_mode_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value;
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
+ int ret;
+
+ snd_soc_dapm_mutex_lock(dapm);
+
+ /* Cannot change rate on an active input */
+ if (cs48l32_is_input_enabled(component, mc->reg)) {
+ ret = -EBUSY;
+ goto exit;
+ }
+
+ ret = snd_soc_put_volsw(kcontrol, ucontrol);
+
+exit:
+ snd_soc_dapm_mutex_unlock(dapm);
+ return ret;
+}
+
+static const struct soc_enum noise_gen_rate =
+ SOC_VALUE_ENUM_SINGLE(CS48L32_COMFORT_NOISE_GENERATOR,
+ CS48L32_NOISE_GEN_RATE_SHIFT,
+ CS48L32_NOISE_GEN_RATE_MASK >> CS48L32_NOISE_GEN_RATE_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val);
+
+static const char * const cs48l32_auxpdm_freq_texts[] = {
+ "3.072MHz", "2.048MHz", "1.536MHz", "768kHz",
+};
+
+static SOC_ENUM_SINGLE_DECL(cs48l32_auxpdm1_freq,
+ CS48L32_AUXPDM1_CONTROL1,
+ CS48L32_AUXPDM1_FREQ_SHIFT,
+ cs48l32_auxpdm_freq_texts);
+
+static SOC_ENUM_SINGLE_DECL(cs48l32_auxpdm2_freq,
+ CS48L32_AUXPDM2_CONTROL1,
+ CS48L32_AUXPDM1_FREQ_SHIFT,
+ cs48l32_auxpdm_freq_texts);
+
+static const char * const cs48l32_auxpdm_src_texts[] = {
+ "Analog", "IN1 Digital", "IN2 Digital",
+};
+
+static SOC_ENUM_SINGLE_DECL(cs48l32_auxpdm1_in,
+ CS48L32_AUXPDM_CTRL2,
+ CS48L32_AUXPDMDAT1_SRC_SHIFT,
+ cs48l32_auxpdm_src_texts);
+
+static SOC_ENUM_SINGLE_DECL(cs48l32_auxpdm2_in,
+ CS48L32_AUXPDM_CTRL2,
+ CS48L32_AUXPDMDAT2_SRC_SHIFT,
+ cs48l32_auxpdm_src_texts);
+
+static const struct snd_kcontrol_new cs48l32_auxpdm_inmux[] = {
+ SOC_DAPM_ENUM("AUXPDM1 Input", cs48l32_auxpdm1_in),
+ SOC_DAPM_ENUM("AUXPDM2 Input", cs48l32_auxpdm2_in),
+};
+
+static const unsigned int cs48l32_auxpdm_analog_in_val[] = {
+ 0x0, 0x1,
+};
+
+static const struct soc_enum cs48l32_auxpdm_analog_inmux_enum[] = {
+ SOC_VALUE_ENUM_SINGLE(CS48L32_AUXPDM1_CONTROL1,
+ CS48L32_AUXPDM1_SRC_SHIFT,
+ CS48L32_AUXPDM1_SRC_MASK >> CS48L32_AUXPDM1_SRC_SHIFT,
+ ARRAY_SIZE(cs48l32_auxpdm_analog_in_val),
+ cs48l32_in_texts,
+ cs48l32_auxpdm_analog_in_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_AUXPDM2_CONTROL1,
+ CS48L32_AUXPDM1_SRC_SHIFT,
+ CS48L32_AUXPDM1_SRC_MASK >> CS48L32_AUXPDM1_SRC_SHIFT,
+ ARRAY_SIZE(cs48l32_auxpdm_analog_in_val),
+ cs48l32_in_texts,
+ cs48l32_auxpdm_analog_in_val),
+};
+
+static const struct snd_kcontrol_new cs48l32_auxpdm_analog_inmux[] = {
+ SOC_DAPM_ENUM("AUXPDM1 Analog Input", cs48l32_auxpdm_analog_inmux_enum[0]),
+ SOC_DAPM_ENUM("AUXPDM2 Analog Input", cs48l32_auxpdm_analog_inmux_enum[1]),
+};
+
+static const struct snd_kcontrol_new cs48l32_auxpdm_switch[] = {
+ SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0),
+ SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0),
+};
+
+static const struct soc_enum cs48l32_isrc_fsh[] = {
+ SOC_VALUE_ENUM_SINGLE(CS48L32_ISRC1_CONTROL1,
+ CS48L32_ISRC1_FSH_SHIFT,
+ CS48L32_ISRC1_FSH_MASK >> CS48L32_ISRC1_FSH_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_ISRC2_CONTROL1,
+ CS48L32_ISRC1_FSH_SHIFT,
+ CS48L32_ISRC1_FSH_MASK >> CS48L32_ISRC1_FSH_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_ISRC3_CONTROL1,
+ CS48L32_ISRC1_FSH_SHIFT,
+ CS48L32_ISRC1_FSH_MASK >> CS48L32_ISRC1_FSH_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+};
+
+static const struct soc_enum cs48l32_isrc_fsl[] = {
+ SOC_VALUE_ENUM_SINGLE(CS48L32_ISRC1_CONTROL1,
+ CS48L32_ISRC1_FSL_SHIFT,
+ CS48L32_ISRC1_FSL_MASK >> CS48L32_ISRC1_FSL_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_ISRC2_CONTROL1,
+ CS48L32_ISRC1_FSL_SHIFT,
+ CS48L32_ISRC1_FSL_MASK >> CS48L32_ISRC1_FSL_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(CS48L32_ISRC3_CONTROL1,
+ CS48L32_ISRC1_FSL_SHIFT,
+ CS48L32_ISRC1_FSL_MASK >> CS48L32_ISRC1_FSL_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val),
+};
+
+static const struct soc_enum cs48l32_fx_rate =
+ SOC_VALUE_ENUM_SINGLE(CS48L32_FX_SAMPLE_RATE,
+ CS48L32_FX_RATE_SHIFT,
+ CS48L32_FX_RATE_MASK >> CS48L32_FX_RATE_SHIFT,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text,
+ cs48l32_rate_val);
+
+static const char * const cs48l32_lhpf_mode_text[] = {
+ "Low-pass", "High-pass"
+};
+
+static const struct soc_enum cs48l32_lhpf_mode[] = {
+ SOC_ENUM_SINGLE(CS48L32_LHPF_CONTROL2, 0,
+ ARRAY_SIZE(cs48l32_lhpf_mode_text), cs48l32_lhpf_mode_text),
+ SOC_ENUM_SINGLE(CS48L32_LHPF_CONTROL2, 1,
+ ARRAY_SIZE(cs48l32_lhpf_mode_text), cs48l32_lhpf_mode_text),
+ SOC_ENUM_SINGLE(CS48L32_LHPF_CONTROL2, 2,
+ ARRAY_SIZE(cs48l32_lhpf_mode_text), cs48l32_lhpf_mode_text),
+ SOC_ENUM_SINGLE(CS48L32_LHPF_CONTROL2, 3,
+ ARRAY_SIZE(cs48l32_lhpf_mode_text), cs48l32_lhpf_mode_text),
+};
+
+static int cs48l32_lhpf_coeff_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ __be32 *data = (__be32 *)ucontrol->value.bytes.data;
+ s16 val = (s16)be32_to_cpu(*data);
+
+ if (abs(val) > CS48L32_LHPF_MAX_COEFF) {
+ dev_err(cs48l32_codec->core.dev, "Rejecting unstable LHPF coefficients\n");
+ return -EINVAL;
+ }
+
+ return snd_soc_bytes_put(kcontrol, ucontrol);
+}
+
+static const char * const cs48l32_eq_mode_text[] = {
+ "Low-pass", "High-pass",
+};
+
+static const struct soc_enum cs48l32_eq_mode[] = {
+ SOC_ENUM_SINGLE(CS48L32_EQ_CONTROL2, 0,
+ ARRAY_SIZE(cs48l32_eq_mode_text),
+ cs48l32_eq_mode_text),
+ SOC_ENUM_SINGLE(CS48L32_EQ_CONTROL2, 1,
+ ARRAY_SIZE(cs48l32_eq_mode_text),
+ cs48l32_eq_mode_text),
+ SOC_ENUM_SINGLE(CS48L32_EQ_CONTROL2, 2,
+ ARRAY_SIZE(cs48l32_eq_mode_text),
+ cs48l32_eq_mode_text),
+ SOC_ENUM_SINGLE(CS48L32_EQ_CONTROL2, 3,
+ ARRAY_SIZE(cs48l32_eq_mode_text),
+ cs48l32_eq_mode_text),
+};
+
+static int cs48l32_eq_mode_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct soc_enum *e = (struct soc_enum *) kcontrol->private_value;
+ unsigned int item;
+
+ item = snd_soc_enum_val_to_item(e, cs48l32_codec->eq_mode[e->shift_l]);
+ ucontrol->value.enumerated.item[0] = item;
+
+ return 0;
+}
+
+static int cs48l32_eq_mode_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct soc_enum *e = (struct soc_enum *) kcontrol->private_value;
+ unsigned int *item = ucontrol->value.enumerated.item;
+ unsigned int val;
+ bool changed = false;
+
+ if (item[0] >= e->items)
+ return -EINVAL;
+
+ val = snd_soc_enum_item_to_val(e, item[0]);
+
+ snd_soc_dapm_mutex_lock(dapm);
+ if (cs48l32_codec->eq_mode[e->shift_l] != val) {
+ cs48l32_codec->eq_mode[e->shift_l] = val;
+ changed = true;
+ }
+ snd_soc_dapm_mutex_unlock(dapm);
+
+ return changed;
+}
+
+static int cs48l32_eq_coeff_info(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+{
+ struct cs48l32_eq_control *ctl = (void *) kcontrol->private_value;
+
+ uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+ uinfo->count = 1;
+ uinfo->value.integer.min = 0;
+ uinfo->value.integer.max = ctl->max;
+
+ return 0;
+}
+
+static int cs48l32_eq_coeff_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct cs48l32_eq_control *params = (void *)kcontrol->private_value;
+ __be16 *coeffs;
+ unsigned int coeff_idx;
+ int block_idx;
+
+ block_idx = ((int) params->block_base - (int) CS48L32_EQ1_BAND1_COEFF1);
+ block_idx /= (CS48L32_EQ2_BAND1_COEFF1 - CS48L32_EQ1_BAND1_COEFF1);
+
+ coeffs = &cs48l32_codec->eq_coefficients[block_idx][0];
+ coeff_idx = (params->reg - params->block_base) / 2;
+
+ /* High __be16 is in [coeff_idx] and low __be16 in [coeff_idx + 1] */
+ if (params->shift == 0)
+ coeff_idx++;
+
+ ucontrol->value.integer.value[0] = be16_to_cpu(coeffs[coeff_idx]);
+
+ return 0;
+}
+
+static int cs48l32_eq_coeff_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct cs48l32_eq_control *params = (void *)kcontrol->private_value;
+ __be16 *coeffs;
+ unsigned int coeff_idx;
+ int block_idx;
+
+ block_idx = ((int) params->block_base - (int) CS48L32_EQ1_BAND1_COEFF1);
+ block_idx /= (CS48L32_EQ2_BAND1_COEFF1 - CS48L32_EQ1_BAND1_COEFF1);
+
+ coeffs = &cs48l32_codec->eq_coefficients[block_idx][0];
+ coeff_idx = (params->reg - params->block_base) / 2;
+
+ /* Put high __be16 in [coeff_idx] and low __be16 in [coeff_idx + 1] */
+ if (params->shift == 0)
+ coeff_idx++;
+
+ snd_soc_dapm_mutex_lock(dapm);
+ coeffs[coeff_idx] = cpu_to_be16(ucontrol->value.integer.value[0]);
+ snd_soc_dapm_mutex_unlock(dapm);
+
+ return 0;
+}
+
+static const struct snd_kcontrol_new cs48l32_drc_activity_output_mux[] = {
+ SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0),
+ SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0),
+};
+
+static const struct snd_kcontrol_new cs48l32_dsp_trigger_output_mux[] = {
+ SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0),
+};
+
+static int cs48l32_dsp_rate_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct soc_enum *e = (struct soc_enum *) kcontrol->private_value;
+ unsigned int cached_rate;
+ const unsigned int rate_num = e->mask;
+ int item;
+
+ if (rate_num >= ARRAY_SIZE(cs48l32_codec->dsp_dma_rates))
+ return -EINVAL;
+
+ cached_rate = cs48l32_codec->dsp_dma_rates[rate_num];
+ item = snd_soc_enum_val_to_item(e, cached_rate);
+ ucontrol->value.enumerated.item[0] = item;
+
+ return 0;
+}
+
+static int cs48l32_dsp_rate_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct soc_enum *e = (struct soc_enum *) kcontrol->private_value;
+ const unsigned int rate_num = e->mask;
+ const unsigned int item = ucontrol->value.enumerated.item[0];
+ unsigned int val;
+ bool changed = false;
+
+ if (item >= e->items)
+ return -EINVAL;
+
+ if (rate_num >= ARRAY_SIZE(cs48l32_codec->dsp_dma_rates))
+ return -EINVAL;
+
+ val = snd_soc_enum_item_to_val(e, item);
+
+ snd_soc_dapm_mutex_lock(dapm);
+ if (cs48l32_codec->dsp_dma_rates[rate_num] != val) {
+ cs48l32_codec->dsp_dma_rates[rate_num] = val;
+ changed = true;
+ }
+ snd_soc_dapm_mutex_unlock(dapm);
+
+ return changed;
+}
+
+static const struct soc_enum cs48l32_dsp_rate_enum[] = {
+ /* RX rates */
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 0,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 1,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 2,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 3,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 4,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 5,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 6,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 7,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ /* TX rates */
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 8,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 9,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 10,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 11,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 12,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 13,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 14,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+ SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0,
+ 15,
+ ARRAY_SIZE(cs48l32_rate_text),
+ cs48l32_rate_text, cs48l32_rate_val),
+};
+
+static int cs48l32_dsp_pre_run(struct wm_adsp *dsp)
+{
+ struct cs48l32_codec *cs48l32_codec = container_of(dsp, struct cs48l32_codec, dsp);
+ unsigned int reg;
+ const u8 *rate = cs48l32_codec->dsp_dma_rates;
+ int i;
+
+ reg = dsp->cs_dsp.base + CS48L32_HALO_SAMPLE_RATE_RX1;
+ for (i = 0; i < CS48L32_DSP_N_RX_CHANNELS; ++i) {
+ regmap_update_bits(dsp->cs_dsp.regmap, reg, CS48L32_HALO_DSP_RATE_MASK, *rate);
+ reg += 8;
+ rate++;
+ }
+
+ reg = dsp->cs_dsp.base + CS48L32_HALO_SAMPLE_RATE_TX1;
+ for (i = 0; i < CS48L32_DSP_N_TX_CHANNELS; ++i) {
+ regmap_update_bits(dsp->cs_dsp.regmap, reg, CS48L32_HALO_DSP_RATE_MASK, *rate);
+ reg += 8;
+ rate++;
+ }
+
+ usleep_range(300, 600);
+
+ return 0;
+}
+
+static void cs48l32_dsp_memory_disable(struct cs48l32_codec *cs48l32_codec,
+ const struct cs48l32_dsp_power_regs *regs)
+{
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ int i, j, ret;
+
+ for (i = 0; i < regs->n_pwd; ++i) {
+ ret = regmap_write(regmap, regs->pwd[i], 0);
+ if (ret)
+ goto err;
+ }
+
+ for (i = 0; i < regs->n_ext; ++i) {
+ for (j = regs->ext[i].start; j <= regs->ext[i].end; j += 4) {
+ ret = regmap_write(regmap, j, 0);
+ if (ret)
+ goto err;
+ }
+ }
+
+ return;
+
+err:
+ dev_warn(cs48l32_codec->core.dev, "Failed to write SRAM enables (%d)\n", ret);
+}
+
+static int cs48l32_dsp_memory_enable(struct cs48l32_codec *cs48l32_codec,
+ const struct cs48l32_dsp_power_regs *regs)
+{
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ int i, j, ret;
+
+ /* disable power-off */
+ for (i = 0; i < regs->n_ext; ++i) {
+ for (j = regs->ext[i].start; j <= regs->ext[i].end; j += 4) {
+ ret = regmap_write(regmap, j, 0x3);
+ if (ret)
+ goto err;
+ }
+ }
+
+ /* power-up the banks in sequence */
+ for (i = 0; i < regs->n_pwd; ++i) {
+ ret = regmap_write(regmap, regs->pwd[i], 0x1);
+ if (ret)
+ goto err;
+
+ udelay(1); /* allow bank to power-up */
+
+ ret = regmap_write(regmap, regs->pwd[i], 0x3);
+ if (ret)
+ goto err;
+
+ udelay(1); /* allow bank to power-up */
+ }
+
+ return 0;
+
+err:
+ dev_err(cs48l32_codec->core.dev, "Failed to write SRAM enables (%d)\n", ret);
+ cs48l32_dsp_memory_disable(cs48l32_codec, regs);
+
+ return ret;
+}
+
+static int cs48l32_dsp_freq_update(struct snd_soc_dapm_widget *w, unsigned int freq_reg,
+ unsigned int freqsel_reg)
+{
+ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ struct wm_adsp *dsp = &cs48l32_codec->dsp;
+ int ret;
+ unsigned int freq, freq_sel, freq_sts;
+
+ if (!freq_reg)
+ return -EINVAL;
+
+ ret = regmap_read(regmap, freq_reg, &freq);
+ if (ret) {
+ dev_err(component->dev, "Failed to read #%x: %d\n", freq_reg, ret);
+ return ret;
+ }
+
+ if (freqsel_reg) {
+ freq_sts = (freq & CS48L32_SYSCLK_FREQ_STS_MASK) >> CS48L32_SYSCLK_FREQ_STS_SHIFT;
+
+ ret = regmap_read(regmap, freqsel_reg, &freq_sel);
+ if (ret) {
+ dev_err(component->dev, "Failed to read #%x: %d\n", freqsel_reg, ret);
+ return ret;
+ }
+ freq_sel = (freq_sel & CS48L32_SYSCLK_FREQ_MASK) >> CS48L32_SYSCLK_FREQ_SHIFT;
+
+ if (freq_sts != freq_sel) {
+ dev_err(component->dev, "SYSCLK FREQ (#%x) != FREQ STS (#%x)\n",
+ freq_sel, freq_sts);
+ return -ETIMEDOUT;
+ }
+ }
+
+ freq &= CS48L32_DSP_CLK_FREQ_MASK;
+ freq >>= CS48L32_DSP_CLK_FREQ_SHIFT;
+
+ ret = regmap_write(dsp->cs_dsp.regmap,
+ dsp->cs_dsp.base + CS48L32_DSP_CLOCK_FREQ_OFFS, freq);
+ if (ret) {
+ dev_err(component->dev, "Failed to set HALO clock freq: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int cs48l32_dsp_freq_ev(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ switch (event) {
+ case SND_SOC_DAPM_POST_PMU:
+ return cs48l32_dsp_freq_update(w, CS48L32_SYSTEM_CLOCK2, CS48L32_SYSTEM_CLOCK1);
+ default:
+ return 0;
+ }
+}
+
+static irqreturn_t cs48l32_irq(int irq, void *data)
+{
+ static const unsigned int eint1_regs[] = {
+ CS48L32_IRQ1_EINT_9, CS48L32_IRQ1_MASK_9,
+ CS48L32_IRQ1_EINT_7, CS48L32_IRQ1_MASK_7
+ };
+ u32 reg_vals[4];
+ struct cs48l32_codec *cs48l32_codec = data;
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ irqreturn_t result = IRQ_NONE;
+ unsigned int eint_pending;
+ int i, ret;
+
+ static_assert(ARRAY_SIZE(eint1_regs) == ARRAY_SIZE(reg_vals));
+
+ ret = pm_runtime_resume_and_get(cs48l32_codec->core.dev);
+ if (ret) {
+ dev_warn(cs48l32_codec->core.dev, "irq could not get pm runtime: %d\n", ret);
+ return IRQ_NONE;
+ }
+
+ ret = regmap_read(regmap, CS48L32_IRQ1_STATUS, &eint_pending);
+ if (ret) {
+ dev_warn(cs48l32_codec->core.dev, "Read IRQ1_STATUS failed: %d\n", ret);
+ return IRQ_NONE;
+ }
+ if ((eint_pending & CS48L32_IRQ1_STS_MASK) == 0)
+ goto out;
+
+ ret = regmap_multi_reg_read(regmap, eint1_regs, reg_vals, ARRAY_SIZE(reg_vals));
+ if (ret) {
+ dev_warn(cs48l32_codec->core.dev, "Read IRQ regs failed: %d\n", ret);
+ return IRQ_NONE;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(reg_vals); i += 2) {
+ reg_vals[i] &= ~reg_vals[i + 1];
+ regmap_write(regmap, eint1_regs[i], reg_vals[i]);
+ }
+
+ if (reg_vals[0] & CS48L32_DSP1_IRQ0_EINT1_MASK)
+ wm_adsp_compr_handle_irq(&cs48l32_codec->dsp);
+
+ if (reg_vals[2] & CS48L32_DSP1_MPU_ERR_EINT1_MASK) {
+ dev_warn(cs48l32_codec->core.dev, "MPU err IRQ\n");
+ wm_halo_bus_error(irq, &cs48l32_codec->dsp);
+ }
+
+ if (reg_vals[2] & CS48L32_DSP1_WDT_EXPIRE_EINT1_MASK) {
+ dev_warn(cs48l32_codec->core.dev, "WDT expire IRQ\n");
+ wm_halo_wdt_expire(irq, &cs48l32_codec->dsp);
+ }
+
+ result = IRQ_HANDLED;
+
+out:
+ pm_runtime_mark_last_busy(cs48l32_codec->core.dev);
+ pm_runtime_put_autosuspend(cs48l32_codec->core.dev);
+
+ return result;
+}
+
+static int cs48l32_get_dspclk_setting(struct cs48l32_codec *cs48l32_codec, unsigned int freq,
+ int src, unsigned int *val)
+{
+ freq /= 15625; /* convert to 1/64ths of 1MHz */
+ *val |= freq << CS48L32_DSP_CLK_FREQ_SHIFT;
+
+ return 0;
+}
+
+static int cs48l32_get_sysclk_setting(unsigned int freq)
+{
+ switch (freq) {
+ case 0:
+ case 5644800:
+ case 6144000:
+ return CS48L32_SYSCLK_RATE_6MHZ;
+ case 11289600:
+ case 12288000:
+ return CS48L32_SYSCLK_RATE_12MHZ << CS48L32_SYSCLK_FREQ_SHIFT;
+ case 22579200:
+ case 24576000:
+ return CS48L32_SYSCLK_RATE_24MHZ << CS48L32_SYSCLK_FREQ_SHIFT;
+ case 45158400:
+ case 49152000:
+ return CS48L32_SYSCLK_RATE_49MHZ << CS48L32_SYSCLK_FREQ_SHIFT;
+ case 90316800:
+ case 98304000:
+ return CS48L32_SYSCLK_RATE_98MHZ << CS48L32_SYSCLK_FREQ_SHIFT;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int cs48l32_set_pdm_fllclk(struct snd_soc_component *component, int source)
+{
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ unsigned int val;
+
+ switch (source) {
+ case CS48L32_PDMCLK_SRC_IN1_PDMCLK:
+ case CS48L32_PDMCLK_SRC_IN2_PDMCLK:
+ case CS48L32_PDMCLK_SRC_IN3_PDMCLK:
+ case CS48L32_PDMCLK_SRC_IN4_PDMCLK:
+ case CS48L32_PDMCLK_SRC_AUXPDM1_CLK:
+ case CS48L32_PDMCLK_SRC_AUXPDM2_CLK:
+ val = source << CS48L32_PDM_FLLCLK_SRC_SHIFT;
+ break;
+ default:
+ dev_err(cs48l32_codec->core.dev, "Invalid PDM FLLCLK src %d\n", source);
+ return -EINVAL;
+ }
+
+ return regmap_update_bits(regmap, CS48L32_INPUT_CONTROL2,
+ CS48L32_PDM_FLLCLK_SRC_MASK, val);
+}
+
+static int cs48l32_set_sysclk(struct snd_soc_component *component, int clk_id, int source,
+ unsigned int freq, int dir)
+{
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ char *name;
+ unsigned int reg;
+ unsigned int mask = CS48L32_SYSCLK_SRC_MASK;
+ unsigned int val = source << CS48L32_SYSCLK_SRC_SHIFT;
+ int clk_freq_sel, *clk;
+
+ switch (clk_id) {
+ case CS48L32_CLK_SYSCLK_1:
+ name = "SYSCLK";
+ reg = CS48L32_SYSTEM_CLOCK1;
+ clk = &cs48l32_codec->sysclk;
+ clk_freq_sel = cs48l32_get_sysclk_setting(freq);
+ mask |= CS48L32_SYSCLK_FREQ_MASK | CS48L32_SYSCLK_FRAC_MASK;
+ break;
+ case CS48L32_CLK_DSPCLK:
+ name = "DSPCLK";
+ reg = CS48L32_DSP_CLOCK1;
+ clk = &cs48l32_codec->dspclk;
+ clk_freq_sel = cs48l32_get_dspclk_setting(cs48l32_codec, freq, source, &val);
+ mask |= CS48L32_DSP_CLK_FREQ_MASK;
+ break;
+ case CS48L32_CLK_PDM_FLLCLK:
+ return cs48l32_set_pdm_fllclk(component, source);
+ default:
+ return -EINVAL;
+ }
+
+ if (clk_freq_sel < 0) {
+ dev_err(cs48l32_codec->core.dev, "Failed to get %s setting for %dHZ\n", name, freq);
+ return clk_freq_sel;
+ }
+
+ *clk = freq;
+
+ if (freq == 0) {
+ dev_dbg(cs48l32_codec->core.dev, "%s cleared\n", name);
+ return 0;
+ }
+
+ val |= clk_freq_sel;
+
+ if (freq % 6144000)
+ val |= CS48L32_SYSCLK_FRAC_MASK;
+
+ dev_dbg(cs48l32_codec->core.dev, "%s set to %uHz", name, freq);
+
+ return regmap_update_bits(regmap, reg, mask, val);
+}
+
+static int cs48l32_is_enabled_fll(struct cs48l32_fll *fll, int base)
+{
+ struct regmap *regmap = fll->codec->core.regmap;
+ unsigned int reg;
+ int ret;
+
+ ret = regmap_read(regmap, base + CS48L32_FLL_CONTROL1_OFFS, &reg);
+ if (ret != 0) {
+ cs48l32_fll_err(fll, "Failed to read current state: %d\n", ret);
+ return ret;
+ }
+
+ return reg & CS48L32_FLL_EN_MASK;
+}
+
+static int cs48l32_wait_for_fll(struct cs48l32_fll *fll, bool requested)
+{
+ struct regmap *regmap = fll->codec->core.regmap;
+ unsigned int val = 0;
+ int i;
+
+ cs48l32_fll_dbg(fll, "Waiting for FLL...\n");
+
+ for (i = 0; i < 30; i++) {
+ regmap_read(regmap, fll->sts_addr, &val);
+ if (!!(val & fll->sts_mask) == requested)
+ return 0;
+
+ switch (i) {
+ case 0 ... 5:
+ usleep_range(75, 125);
+ break;
+ case 6 ... 20:
+ usleep_range(750, 1250);
+ break;
+ default:
+ fsleep(20000);
+ break;
+ }
+ }
+
+ cs48l32_fll_warn(fll, "Timed out waiting for %s\n", requested ? "lock" : "unlock");
+
+ return -ETIMEDOUT;
+}
+
+static int cs48l32_fllhj_disable(struct cs48l32_fll *fll)
+{
+ struct cs48l32 *cs48l32 = &fll->codec->core;
+ bool change;
+
+ cs48l32_fll_dbg(fll, "Disabling FLL\n");
+
+ /*
+ * Disable lockdet, but don't set ctrl_upd update bit. This allows the
+ * lock status bit to clear as normal, but should the FLL be enabled
+ * again due to a control clock being required, the lock won't re-assert
+ * as the FLL config registers are automatically applied when the FLL
+ * enables.
+ */
+ regmap_set_bits(cs48l32->regmap,
+ fll->base + CS48L32_FLL_CONTROL1_OFFS,
+ CS48L32_FLL_HOLD_MASK);
+ regmap_clear_bits(cs48l32->regmap,
+ fll->base + CS48L32_FLL_CONTROL2_OFFS,
+ CS48L32_FLL_LOCKDET_MASK);
+ regmap_set_bits(cs48l32->regmap,
+ fll->base + CS48L32_FLL_CONTROL5_OFFS,
+ CS48L32_FLL_FRC_INTEG_UPD_MASK);
+ regmap_update_bits_check(cs48l32->regmap,
+ fll->base + CS48L32_FLL_CONTROL1_OFFS,
+ CS48L32_FLL_EN_MASK,
+ 0,
+ &change);
+
+ cs48l32_wait_for_fll(fll, false);
+
+ /*
+ * ctrl_up gates the writes to all the fll's registers, setting it to 0
+ * here ensures that after a runtime suspend/resume cycle when one
+ * enables the fll then ctrl_up is the last bit that is configured
+ * by the fll enable code rather than the cache sync operation which
+ * would have updated it much earlier before writing out all fll
+ * registers
+ */
+ regmap_clear_bits(cs48l32->regmap,
+ fll->base + CS48L32_FLL_CONTROL1_OFFS,
+ CS48L32_FLL_CTRL_UPD_MASK);
+
+ if (change)
+ pm_runtime_put_autosuspend(cs48l32->dev);
+
+ return 0;
+}
+
+static int cs48l32_fllhj_apply(struct cs48l32_fll *fll, int fin)
+{
+ struct regmap *regmap = fll->codec->core.regmap;
+ int refdiv, fref, fout, lockdet_thr, fbdiv, fllgcd;
+ bool frac = false;
+ unsigned int fll_n, min_n, max_n, ratio, theta, lambda, hp;
+ unsigned int gains, num;
+
+ cs48l32_fll_dbg(fll, "fin=%d, fout=%d\n", fin, fll->fout);
+
+ for (refdiv = 0; refdiv < 4; refdiv++) {
+ if ((fin / (1 << refdiv)) <= CS48L32_FLLHJ_MAX_THRESH)
+ break;
+ }
+
+ fref = fin / (1 << refdiv);
+ fout = fll->fout;
+ frac = fout % fref;
+
+ /*
+ * Use simple heuristic approach to find a configuration that
+ * should work for most input clocks.
+ */
+ if (fref < CS48L32_FLLHJ_LOW_THRESH) {
+ lockdet_thr = 2;
+ gains = CS48L32_FLLHJ_LOW_GAINS;
+
+ if (frac)
+ fbdiv = 256;
+ else
+ fbdiv = 4;
+ } else if (fref < CS48L32_FLLHJ_MID_THRESH) {
+ lockdet_thr = 8;
+ gains = CS48L32_FLLHJ_MID_GAINS;
+ fbdiv = (frac) ? 16 : 2;
+ } else {
+ lockdet_thr = 8;
+ gains = CS48L32_FLLHJ_HIGH_GAINS;
+ fbdiv = 1;
+ }
+ /* Use high performance mode for fractional configurations. */
+ if (frac) {
+ hp = 3;
+ min_n = CS48L32_FLLHJ_FRAC_MIN_N;
+ max_n = CS48L32_FLLHJ_FRAC_MAX_N;
+ } else {
+ if (fref < CS48L32_FLLHJ_LP_INT_MODE_THRESH)
+ hp = 0;
+ else
+ hp = 1;
+
+ min_n = CS48L32_FLLHJ_INT_MIN_N;
+ max_n = CS48L32_FLLHJ_INT_MAX_N;
+ }
+
+ ratio = fout / fref;
+
+ cs48l32_fll_dbg(fll, "refdiv=%d, fref=%d, frac:%d\n", refdiv, fref, frac);
+
+ while (ratio / fbdiv < min_n) {
+ fbdiv /= 2;
+ if (fbdiv < min_n) {
+ cs48l32_fll_err(fll, "FBDIV (%u) < minimum N (%u)\n", fbdiv, min_n);
+ return -EINVAL;
+ }
+ }
+ while (frac && (ratio / fbdiv > max_n)) {
+ fbdiv *= 2;
+ if (fbdiv >= 1024) {
+ cs48l32_fll_err(fll, "FBDIV (%u) >= 1024\n", fbdiv);
+ return -EINVAL;
+ }
+ }
+
+ cs48l32_fll_dbg(fll, "lockdet=%d, hp=#%x, fbdiv:%d\n", lockdet_thr, hp, fbdiv);
+
+ /* Calculate N.K values */
+ fllgcd = gcd(fout, fbdiv * fref);
+ num = fout / fllgcd;
+ lambda = (fref * fbdiv) / fllgcd;
+ fll_n = num / lambda;
+ theta = num % lambda;
+
+ cs48l32_fll_dbg(fll, "fll_n=%d, gcd=%d, theta=%d, lambda=%d\n",
+ fll_n, fllgcd, theta, lambda);
+
+ /* Some sanity checks before any registers are written. */
+ if (fll_n < min_n || fll_n > max_n) {
+ cs48l32_fll_err(fll, "N not in valid %s mode range %d-%d: %d\n",
+ frac ? "fractional" : "integer", min_n, max_n, fll_n);
+ return -EINVAL;
+ }
+ if (fbdiv < 1 || (frac && fbdiv >= 1024) || (!frac && fbdiv >= 256)) {
+ cs48l32_fll_err(fll, "Invalid fbdiv for %s mode (%u)\n",
+ frac ? "fractional" : "integer", fbdiv);
+ return -EINVAL;
+ }
+
+ /* clear the ctrl_upd bit to guarantee we write to it later. */
+ regmap_update_bits(regmap,
+ fll->base + CS48L32_FLL_CONTROL2_OFFS,
+ CS48L32_FLL_LOCKDET_THR_MASK |
+ CS48L32_FLL_PHASEDET_MASK |
+ CS48L32_FLL_REFCLK_DIV_MASK |
+ CS48L32_FLL_N_MASK |
+ CS48L32_FLL_CTRL_UPD_MASK,
+ (lockdet_thr << CS48L32_FLL_LOCKDET_THR_SHIFT) |
+ (1 << CS48L32_FLL_PHASEDET_SHIFT) |
+ (refdiv << CS48L32_FLL_REFCLK_DIV_SHIFT) |
+ (fll_n << CS48L32_FLL_N_SHIFT));
+
+ regmap_update_bits(regmap,
+ fll->base + CS48L32_FLL_CONTROL3_OFFS,
+ CS48L32_FLL_LAMBDA_MASK |
+ CS48L32_FLL_THETA_MASK,
+ (lambda << CS48L32_FLL_LAMBDA_SHIFT) |
+ (theta << CS48L32_FLL_THETA_SHIFT));
+
+ regmap_update_bits(regmap,
+ fll->base + CS48L32_FLL_CONTROL4_OFFS,
+ (0xffff << CS48L32_FLL_FD_GAIN_COARSE_SHIFT) |
+ CS48L32_FLL_HP_MASK |
+ CS48L32_FLL_FB_DIV_MASK,
+ (gains << CS48L32_FLL_FD_GAIN_COARSE_SHIFT) |
+ (hp << CS48L32_FLL_HP_SHIFT) |
+ (fbdiv << CS48L32_FLL_FB_DIV_SHIFT));
+
+ return 0;
+}
+
+static int cs48l32_fllhj_enable(struct cs48l32_fll *fll)
+{
+ struct cs48l32 *cs48l32 = &fll->codec->core;
+ int already_enabled = cs48l32_is_enabled_fll(fll, fll->base);
+ int ret;
+
+ if (already_enabled < 0)
+ return already_enabled;
+
+ if (!already_enabled)
+ pm_runtime_get_sync(cs48l32->dev);
+
+ cs48l32_fll_dbg(fll, "Enabling FLL, initially %s\n",
+ str_enabled_disabled(already_enabled));
+
+ /* FLLn_HOLD must be set before configuring any registers */
+ regmap_set_bits(cs48l32->regmap,
+ fll->base + CS48L32_FLL_CONTROL1_OFFS,
+ CS48L32_FLL_HOLD_MASK);
+
+ /* Apply refclk */
+ ret = cs48l32_fllhj_apply(fll, fll->ref_freq);
+ if (ret) {
+ cs48l32_fll_err(fll, "Failed to set FLL: %d\n", ret);
+ goto out;
+ }
+ regmap_update_bits(cs48l32->regmap,
+ fll->base + CS48L32_FLL_CONTROL2_OFFS,
+ CS48L32_FLL_REFCLK_SRC_MASK,
+ fll->ref_src << CS48L32_FLL_REFCLK_SRC_SHIFT);
+
+ regmap_set_bits(cs48l32->regmap,
+ fll->base + CS48L32_FLL_CONTROL1_OFFS,
+ CS48L32_FLL_EN_MASK);
+
+out:
+ regmap_set_bits(cs48l32->regmap,
+ fll->base + CS48L32_FLL_CONTROL2_OFFS,
+ CS48L32_FLL_LOCKDET_MASK);
+
+ regmap_set_bits(cs48l32->regmap,
+ fll->base + CS48L32_FLL_CONTROL1_OFFS,
+ CS48L32_FLL_CTRL_UPD_MASK);
+
+ /* Release the hold so that flln locks to external frequency */
+ regmap_clear_bits(cs48l32->regmap,
+ fll->base + CS48L32_FLL_CONTROL1_OFFS,
+ CS48L32_FLL_HOLD_MASK);
+
+ if (!already_enabled)
+ cs48l32_wait_for_fll(fll, true);
+
+ return 0;
+}
+
+static int cs48l32_fllhj_validate(struct cs48l32_fll *fll,
+ unsigned int ref_in,
+ unsigned int fout)
+{
+ if (fout && !ref_in) {
+ cs48l32_fll_err(fll, "fllout set without valid input clk\n");
+ return -EINVAL;
+ }
+
+ if (fll->fout && fout != fll->fout) {
+ cs48l32_fll_err(fll, "Can't change output on active FLL\n");
+ return -EINVAL;
+ }
+
+ if (ref_in / CS48L32_FLL_MAX_REFDIV > CS48L32_FLLHJ_MAX_THRESH) {
+ cs48l32_fll_err(fll, "Can't scale %dMHz to <=13MHz\n", ref_in);
+ return -EINVAL;
+ }
+
+ if (fout > CS48L32_FLL_MAX_FOUT) {
+ cs48l32_fll_err(fll, "Fout=%dMHz exceeds maximum %dMHz\n",
+ fout, CS48L32_FLL_MAX_FOUT);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cs48l32_fllhj_set_refclk(struct cs48l32_fll *fll, int source,
+ unsigned int fin, unsigned int fout)
+{
+ int ret = 0;
+
+ if (fll->ref_src == source && fll->ref_freq == fin && fll->fout == fout)
+ return 0;
+
+ if (fin && fout && cs48l32_fllhj_validate(fll, fin, fout))
+ return -EINVAL;
+
+ fll->ref_src = source;
+ fll->ref_freq = fin;
+ fll->fout = fout;
+
+ if (fout)
+ ret = cs48l32_fllhj_enable(fll);
+ else
+ cs48l32_fllhj_disable(fll);
+
+ return ret;
+}
+
+static int cs48l32_init_fll(struct cs48l32_fll *fll)
+{
+ fll->ref_src = CS48L32_FLL_SRC_NONE;
+
+ return 0;
+}
+
+static int cs48l32_set_fll(struct snd_soc_component *component, int fll_id,
+ int source, unsigned int fref, unsigned int fout)
+{
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+
+ switch (fll_id) {
+ case CS48L32_FLL1_REFCLK:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return cs48l32_fllhj_set_refclk(&cs48l32_codec->fll, source, fref, fout);
+}
+
+static int cs48l32_asp_dai_probe(struct snd_soc_dai *dai)
+{
+ struct snd_soc_component *component = dai->component;
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ unsigned int pin_reg, last_pin_reg, hiz_reg;
+
+ switch (dai->id) {
+ case 1:
+ pin_reg = CS48L32_GPIO3_CTRL1;
+ hiz_reg = CS48L32_ASP1_CONTROL3;
+ break;
+ case 2:
+ pin_reg = CS48L32_GPIO7_CTRL1;
+ hiz_reg = CS48L32_ASP2_CONTROL3;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (last_pin_reg = pin_reg + 12; pin_reg <= last_pin_reg; ++pin_reg)
+ regmap_clear_bits(regmap, pin_reg, CS48L32_GPIOX_CTRL1_FN_MASK);
+
+ /* DOUT high-impendance when not transmitting */
+ regmap_set_bits(regmap, hiz_reg, CS48L32_ASP_DOUT_HIZ_MASK);
+
+ return 0;
+}
+
+static int cs48l32_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+ struct snd_soc_component *component = dai->component;
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ unsigned int val = 0U;
+ unsigned int base = dai->driver->base;
+ unsigned int mask = CS48L32_ASP_FMT_MASK | CS48L32_ASP_BCLK_INV_MASK |
+ CS48L32_ASP_BCLK_MSTR_MASK |
+ CS48L32_ASP_FSYNC_INV_MASK |
+ CS48L32_ASP_FSYNC_MSTR_MASK;
+
+ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_DSP_A:
+ val |= (CS48L32_ASP_FMT_DSP_MODE_A << CS48L32_ASP_FMT_SHIFT);
+ break;
+ case SND_SOC_DAIFMT_DSP_B:
+ if ((fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) != SND_SOC_DAIFMT_BP_FP) {
+ cs48l32_asp_err(dai, "DSP_B cannot be clock consumer\n");
+ return -EINVAL;
+ }
+ val |= (CS48L32_ASP_FMT_DSP_MODE_B << CS48L32_ASP_FMT_SHIFT);
+ break;
+ case SND_SOC_DAIFMT_I2S:
+ val |= (CS48L32_ASP_FMT_I2S_MODE << CS48L32_ASP_FMT_SHIFT);
+ break;
+ case SND_SOC_DAIFMT_LEFT_J:
+ if ((fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) != SND_SOC_DAIFMT_BP_FP) {
+ cs48l32_asp_err(dai, "LEFT_J cannot be clock consumer\n");
+ return -EINVAL;
+ }
+ val |= (CS48L32_ASP_FMT_LEFT_JUSTIFIED_MODE << CS48L32_ASP_FMT_SHIFT);
+ break;
+ default:
+ cs48l32_asp_err(dai, "Unsupported DAI format %d\n",
+ fmt & SND_SOC_DAIFMT_FORMAT_MASK);
+ return -EINVAL;
+ }
+
+ switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) {
+ case SND_SOC_DAIFMT_BC_FC:
+ break;
+ case SND_SOC_DAIFMT_BC_FP:
+ val |= CS48L32_ASP_FSYNC_MSTR_MASK;
+ break;
+ case SND_SOC_DAIFMT_BP_FC:
+ val |= CS48L32_ASP_BCLK_MSTR_MASK;
+ break;
+ case SND_SOC_DAIFMT_BP_FP:
+ val |= CS48L32_ASP_BCLK_MSTR_MASK;
+ val |= CS48L32_ASP_FSYNC_MSTR_MASK;
+ break;
+ default:
+ cs48l32_asp_err(dai, "Unsupported clock direction %d\n",
+ fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK);
+ return -EINVAL;
+ }
+
+ switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+ case SND_SOC_DAIFMT_NB_NF:
+ break;
+ case SND_SOC_DAIFMT_IB_IF:
+ val |= CS48L32_ASP_BCLK_INV_MASK;
+ val |= CS48L32_ASP_FSYNC_INV_MASK;
+ break;
+ case SND_SOC_DAIFMT_IB_NF:
+ val |= CS48L32_ASP_BCLK_INV_MASK;
+ break;
+ case SND_SOC_DAIFMT_NB_IF:
+ val |= CS48L32_ASP_FSYNC_INV_MASK;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ regmap_update_bits(regmap, base + CS48L32_ASP_CONTROL2, mask, val);
+
+ return 0;
+}
+
+static const struct {
+ u32 freq;
+ u32 id;
+} cs48l32_sclk_rates[] = {
+ { 128000, 12 },
+ { 176400, 13 },
+ { 192000, 14 },
+ { 256000, 15 },
+ { 352800, 16 },
+ { 384000, 17 },
+ { 512000, 18 },
+ { 705600, 19 },
+ { 768000, 21 },
+ { 1024000, 23 },
+ { 1411200, 25 },
+ { 1536000, 27 },
+ { 2048000, 29 },
+ { 2822400, 31 },
+ { 3072000, 33 },
+ { 4096000, 36 },
+ { 5644800, 38 },
+ { 6144000, 40 },
+ { 8192000, 47 },
+ { 11289600, 49 },
+ { 12288000, 51 },
+ { 22579200, 57 },
+ { 24576000, 59 },
+};
+
+#define CS48L32_48K_RATE_MASK 0x0e00fe
+#define CS48L32_44K1_RATE_MASK 0x00fe00
+#define CS48L32_RATE_MASK (CS48L32_48K_RATE_MASK | CS48L32_44K1_RATE_MASK)
+
+static const unsigned int cs48l32_sr_vals[] = {
+ 0,
+ 12000, /* CS48L32_48K_RATE_MASK */
+ 24000, /* CS48L32_48K_RATE_MASK */
+ 48000, /* CS48L32_48K_RATE_MASK */
+ 96000, /* CS48L32_48K_RATE_MASK */
+ 192000, /* CS48L32_48K_RATE_MASK */
+ 384000, /* CS48L32_48K_RATE_MASK */
+ 768000, /* CS48L32_48K_RATE_MASK */
+ 0,
+ 11025, /* CS48L32_44K1_RATE_MASK */
+ 22050, /* CS48L32_44K1_RATE_MASK */
+ 44100, /* CS48L32_44K1_RATE_MASK */
+ 88200, /* CS48L32_44K1_RATE_MASK */
+ 176400, /* CS48L32_44K1_RATE_MASK */
+ 352800, /* CS48L32_44K1_RATE_MASK */
+ 705600, /* CS48L32_44K1_RATE_MASK */
+ 0,
+ 8000, /* CS48L32_48K_RATE_MASK */
+ 16000, /* CS48L32_48K_RATE_MASK */
+ 32000, /* CS48L32_48K_RATE_MASK */
+};
+
+static const struct snd_pcm_hw_constraint_list cs48l32_constraint = {
+ .count = ARRAY_SIZE(cs48l32_sr_vals),
+ .list = cs48l32_sr_vals,
+};
+
+static int cs48l32_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
+{
+ struct snd_soc_component *component = dai->component;
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct cs48l32_dai_priv *dai_priv = &cs48l32_codec->dai[dai->id - 1];
+ unsigned int base_rate;
+
+ if (!substream->runtime)
+ return 0;
+
+ switch (dai_priv->clk) {
+ case CS48L32_CLK_SYSCLK_1:
+ case CS48L32_CLK_SYSCLK_2:
+ case CS48L32_CLK_SYSCLK_3:
+ case CS48L32_CLK_SYSCLK_4:
+ base_rate = cs48l32_codec->sysclk;
+ break;
+ default:
+ return 0;
+ }
+
+ if (base_rate == 0)
+ dai_priv->constraint.mask = CS48L32_RATE_MASK;
+ else if (base_rate % 4000)
+ dai_priv->constraint.mask = CS48L32_44K1_RATE_MASK;
+ else
+ dai_priv->constraint.mask = CS48L32_48K_RATE_MASK;
+
+ return snd_pcm_hw_constraint_list(substream->runtime, 0,
+ SNDRV_PCM_HW_PARAM_RATE,
+ &dai_priv->constraint);
+}
+
+static int cs48l32_hw_params_rate(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params,
+ struct snd_soc_dai *dai)
+{
+ struct snd_soc_component *component = dai->component;
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct cs48l32_dai_priv *dai_priv = &cs48l32_codec->dai[dai->id - 1];
+ unsigned int sr_val, sr_reg, rate;
+
+ rate = params_rate(params);
+ for (sr_val = 0; sr_val < ARRAY_SIZE(cs48l32_sr_vals); sr_val++)
+ if (cs48l32_sr_vals[sr_val] == rate)
+ break;
+
+ if (sr_val == ARRAY_SIZE(cs48l32_sr_vals)) {
+ cs48l32_asp_err(dai, "Unsupported sample rate %dHz\n", rate);
+ return -EINVAL;
+ }
+
+ switch (dai_priv->clk) {
+ case CS48L32_CLK_SYSCLK_1:
+ sr_reg = CS48L32_SAMPLE_RATE1;
+ break;
+ case CS48L32_CLK_SYSCLK_2:
+ sr_reg = CS48L32_SAMPLE_RATE2;
+ break;
+ case CS48L32_CLK_SYSCLK_3:
+ sr_reg = CS48L32_SAMPLE_RATE3;
+ break;
+ case CS48L32_CLK_SYSCLK_4:
+ sr_reg = CS48L32_SAMPLE_RATE4;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ snd_soc_component_update_bits(component, sr_reg, CS48L32_SAMPLE_RATE_1_MASK, sr_val);
+
+ return 0;
+}
+
+static bool cs48l32_asp_cfg_changed(struct snd_soc_component *component,
+ unsigned int base, unsigned int sclk,
+ unsigned int slotws, unsigned int dataw)
+{
+ unsigned int val;
+
+ val = snd_soc_component_read(component, base + CS48L32_ASP_CONTROL1);
+ if (sclk != (val & CS48L32_ASP_BCLK_FREQ_MASK))
+ return true;
+
+ val = snd_soc_component_read(component, base + CS48L32_ASP_CONTROL2);
+ if (slotws != (val & (CS48L32_ASP_RX_WIDTH_MASK | CS48L32_ASP_TX_WIDTH_MASK)))
+ return true;
+
+ val = snd_soc_component_read(component, base + CS48L32_ASP_DATA_CONTROL1);
+ if (dataw != (val & (CS48L32_ASP_TX_WL_MASK)))
+ return true;
+
+ val = snd_soc_component_read(component, base + CS48L32_ASP_DATA_CONTROL5);
+ if (dataw != (val & (CS48L32_ASP_RX_WL_MASK)))
+ return true;
+
+ return false;
+}
+
+static int cs48l32_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params,
+ struct snd_soc_dai *dai)
+{
+ struct snd_soc_component *component = dai->component;
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ int base = dai->driver->base;
+ int dai_id = dai->id - 1;
+ unsigned int rate = params_rate(params);
+ unsigned int dataw = snd_pcm_format_width(params_format(params));
+ unsigned int asp_state = 0;
+ int sclk, sclk_target;
+ unsigned int slotw, n_slots, n_slots_multiple, val;
+ int i, ret;
+
+ cs48l32_asp_dbg(dai, "hwparams in: ch:%u dataw:%u rate:%u\n",
+ params_channels(params), dataw, rate);
+ /*
+ * The following calculations hold only under the assumption that
+ * symmetric_[rates|channels|samplebits] are set to 1
+ */
+ if (cs48l32_codec->tdm_slots[dai_id]) {
+ n_slots = cs48l32_codec->tdm_slots[dai_id];
+ slotw = cs48l32_codec->tdm_width[dai_id];
+ } else {
+ n_slots = params_channels(params);
+ slotw = dataw;
+ }
+
+ val = snd_soc_component_read(component, base + CS48L32_ASP_CONTROL2);
+ val = (val & CS48L32_ASP_FMT_MASK) >> CS48L32_ASP_FMT_SHIFT;
+ if (val == CS48L32_ASP_FMT_I2S_MODE)
+ n_slots_multiple = 2;
+ else
+ n_slots_multiple = 1;
+
+ sclk_target = snd_soc_tdm_params_to_bclk(params, slotw, n_slots, n_slots_multiple);
+
+ for (i = 0; i < ARRAY_SIZE(cs48l32_sclk_rates); i++) {
+ if ((cs48l32_sclk_rates[i].freq >= sclk_target) &&
+ (cs48l32_sclk_rates[i].freq % rate == 0)) {
+ sclk = cs48l32_sclk_rates[i].id;
+ break;
+ }
+ }
+ if (i == ARRAY_SIZE(cs48l32_sclk_rates)) {
+ cs48l32_asp_err(dai, "Unsupported sample rate %dHz\n", rate);
+ return -EINVAL;
+ }
+
+ cs48l32_asp_dbg(dai, "hwparams out: n_slots:%u dataw:%u slotw:%u bclk:%u bclkid:%u\n",
+ n_slots, dataw, slotw, sclk_target, sclk);
+
+ slotw = (slotw << CS48L32_ASP_TX_WIDTH_SHIFT) |
+ (slotw << CS48L32_ASP_RX_WIDTH_SHIFT);
+
+ if (!cs48l32_asp_cfg_changed(component, base, sclk, slotw, dataw))
+ return cs48l32_hw_params_rate(substream, params, dai);
+
+ /* ASP must be disabled while changing configuration */
+ asp_state = snd_soc_component_read(component, base + CS48L32_ASP_ENABLES1);
+ regmap_clear_bits(regmap, base + CS48L32_ASP_ENABLES1, 0xff00ff);
+
+ ret = cs48l32_hw_params_rate(substream, params, dai);
+ if (ret != 0)
+ goto restore_asp;
+
+ regmap_update_bits_async(regmap,
+ base + CS48L32_ASP_CONTROL1,
+ CS48L32_ASP_BCLK_FREQ_MASK,
+ sclk);
+ regmap_update_bits_async(regmap,
+ base + CS48L32_ASP_CONTROL2,
+ CS48L32_ASP_RX_WIDTH_MASK | CS48L32_ASP_TX_WIDTH_MASK,
+ slotw);
+ regmap_update_bits_async(regmap,
+ base + CS48L32_ASP_DATA_CONTROL1,
+ CS48L32_ASP_TX_WL_MASK,
+ dataw);
+ regmap_update_bits(regmap,
+ base + CS48L32_ASP_DATA_CONTROL5,
+ CS48L32_ASP_RX_WL_MASK,
+ dataw);
+
+restore_asp:
+ /* Restore ASP TX/RX enable state */
+ regmap_update_bits(regmap,
+ base + CS48L32_ASP_ENABLES1,
+ 0xff00ff,
+ asp_state);
+ return ret;
+}
+
+static const char *cs48l32_dai_clk_str(int clk_id)
+{
+ switch (clk_id) {
+ case CS48L32_CLK_SYSCLK_1:
+ case CS48L32_CLK_SYSCLK_2:
+ case CS48L32_CLK_SYSCLK_3:
+ case CS48L32_CLK_SYSCLK_4:
+ return "SYSCLK";
+ default:
+ return "Unknown clock";
+ }
+}
+
+static int cs48l32_dai_set_sysclk(struct snd_soc_dai *dai,
+ int clk_id, unsigned int freq, int dir)
+{
+ struct snd_soc_component *component = dai->component;
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct cs48l32_dai_priv *dai_priv = &cs48l32_codec->dai[dai->id - 1];
+ unsigned int base = dai->driver->base;
+ unsigned int current_asp_rate, target_asp_rate;
+ bool change_rate_domain = false;
+ int ret;
+
+ if (clk_id == dai_priv->clk)
+ return 0;
+
+ if (snd_soc_dai_active(dai)) {
+ cs48l32_asp_err(dai, "Can't change clock on active DAI\n");
+ return -EBUSY;
+ }
+
+ switch (clk_id) {
+ case CS48L32_CLK_SYSCLK_1:
+ target_asp_rate = 0U << CS48L32_ASP_RATE_SHIFT;
+ break;
+ case CS48L32_CLK_SYSCLK_2:
+ target_asp_rate = 1U << CS48L32_ASP_RATE_SHIFT;
+ break;
+ case CS48L32_CLK_SYSCLK_3:
+ target_asp_rate = 2U << CS48L32_ASP_RATE_SHIFT;
+ break;
+ case CS48L32_CLK_SYSCLK_4:
+ target_asp_rate = 3U << CS48L32_ASP_RATE_SHIFT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ dai_priv->clk = clk_id;
+ cs48l32_asp_dbg(dai, "Setting to %s\n", cs48l32_dai_clk_str(clk_id));
+
+ if (base) {
+ ret = regmap_read(cs48l32_codec->core.regmap,
+ base + CS48L32_ASP_CONTROL1,
+ &current_asp_rate);
+ if (ret != 0) {
+ cs48l32_asp_err(dai, "Failed to check rate: %d\n", ret);
+ return ret;
+ }
+
+ if ((current_asp_rate & CS48L32_ASP_RATE_MASK) !=
+ (target_asp_rate & CS48L32_ASP_RATE_MASK)) {
+ change_rate_domain = true;
+
+ mutex_lock(&cs48l32_codec->rate_lock);
+ /* Guard the rate change with SYSCLK cycles */
+ cs48l32_spin_sysclk(cs48l32_codec);
+ }
+
+ snd_soc_component_update_bits(component, base + CS48L32_ASP_CONTROL1,
+ CS48L32_ASP_RATE_MASK, target_asp_rate);
+
+ if (change_rate_domain) {
+ cs48l32_spin_sysclk(cs48l32_codec);
+ mutex_unlock(&cs48l32_codec->rate_lock);
+ }
+ }
+
+ return 0;
+}
+
+static void cs48l32_set_channels_to_mask(struct snd_soc_dai *dai,
+ unsigned int base,
+ int channels, unsigned int mask)
+{
+ struct snd_soc_component *component = dai->component;
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ int slot, i, j = 0, shift;
+ unsigned int frame_ctls[2] = {0, 0};
+
+ for (i = 0; i < channels; ++i) {
+ slot = ffs(mask) - 1;
+ if (slot < 0)
+ return;
+
+ if (i - (j * 4) >= 4) {
+ ++j;
+ if (j >= 2)
+ break;
+ }
+
+ shift = (8 * (i - j * 4));
+
+ frame_ctls[j] |= slot << shift;
+
+ mask &= ~(1 << slot); /* ? mask ^= 1 << slot ? */
+ }
+
+ regmap_write(regmap, base, frame_ctls[0]);
+ regmap_write(regmap, base + 0x4, frame_ctls[1]);
+
+ if (mask)
+ cs48l32_asp_warn(dai, "Too many channels in TDM mask\n");
+}
+
+static int cs48l32_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask,
+ unsigned int rx_mask, int slots, int slot_width)
+{
+ struct snd_soc_component *component = dai->component;
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ int base = dai->driver->base;
+ int rx_max_chan = dai->driver->playback.channels_max;
+ int tx_max_chan = dai->driver->capture.channels_max;
+
+ /* Only support TDM for the physical ASPs */
+ if (dai->id > CS48L32_MAX_ASP)
+ return -EINVAL;
+
+ if (slots == 0) {
+ tx_mask = (1 << tx_max_chan) - 1;
+ rx_mask = (1 << rx_max_chan) - 1;
+ }
+
+ cs48l32_set_channels_to_mask(dai, base + CS48L32_ASP_FRAME_CONTROL1,
+ tx_max_chan, tx_mask);
+ cs48l32_set_channels_to_mask(dai, base + CS48L32_ASP_FRAME_CONTROL5,
+ rx_max_chan, rx_mask);
+
+ cs48l32_codec->tdm_width[dai->id - 1] = slot_width;
+ cs48l32_codec->tdm_slots[dai->id - 1] = slots;
+
+ return 0;
+}
+
+static const struct snd_soc_dai_ops cs48l32_dai_ops = {
+ .probe = &cs48l32_asp_dai_probe,
+ .startup = &cs48l32_startup,
+ .set_fmt = &cs48l32_set_fmt,
+ .set_tdm_slot = &cs48l32_set_tdm_slot,
+ .hw_params = &cs48l32_hw_params,
+ .set_sysclk = &cs48l32_dai_set_sysclk,
+};
+
+static int cs48l32_sysclk_ev(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+
+ cs48l32_spin_sysclk(cs48l32_codec);
+
+ return 0;
+}
+
+static int cs48l32_in_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ unsigned int reg;
+
+ if (w->shift % 2)
+ reg = CS48L32_IN1L_CONTROL2;
+ else
+ reg = CS48L32_IN1R_CONTROL2;
+
+ reg += (w->shift / 2) * (CS48L32_IN2L_CONTROL2 - CS48L32_IN1L_CONTROL2);
+
+ switch (event) {
+ case SND_SOC_DAPM_PRE_PMU:
+ switch (w->shift) {
+ case CS48L32_IN1L_EN_SHIFT:
+ snd_soc_component_update_bits(component,
+ CS48L32_ADC1L_ANA_CONTROL1,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK);
+ break;
+ case CS48L32_IN1R_EN_SHIFT:
+ snd_soc_component_update_bits(component,
+ CS48L32_ADC1R_ANA_CONTROL1,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK);
+ break;
+ default:
+ break;
+ }
+ cs48l32_codec->in_up_pending++;
+ break;
+ case SND_SOC_DAPM_POST_PMU:
+ usleep_range(200, 300);
+
+ switch (w->shift) {
+ case CS48L32_IN1L_EN_SHIFT:
+ snd_soc_component_update_bits(component,
+ CS48L32_ADC1L_ANA_CONTROL1,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK,
+ 0);
+ break;
+ case CS48L32_IN1R_EN_SHIFT:
+ snd_soc_component_update_bits(component,
+ CS48L32_ADC1R_ANA_CONTROL1,
+ CS48L32_ADC1x_INT_ENA_FRC_MASK,
+ 0);
+ break;
+
+ default:
+ break;
+ }
+ cs48l32_codec->in_up_pending--;
+ snd_soc_component_update_bits(component, reg, CS48L32_INx_MUTE_MASK, 0);
+
+ /* Uncached write-only register, no need for update_bits */
+ if (!cs48l32_codec->in_up_pending) {
+ snd_soc_component_write(component, cs48l32_codec->in_vu_reg,
+ CS48L32_IN_VU_MASK);
+ }
+ break;
+ case SND_SOC_DAPM_PRE_PMD:
+ snd_soc_component_update_bits(component, reg,
+ CS48L32_INx_MUTE_MASK, CS48L32_INx_MUTE_MASK);
+ snd_soc_component_write(component, cs48l32_codec->in_vu_reg,
+ CS48L32_IN_VU_MASK);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int cs48l32_in_put_volsw(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ int ret;
+
+ ret = snd_soc_put_volsw(kcontrol, ucontrol);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Uncached write-only register, no need for update_bits.
+ * Will fail if codec is off but that will be handled by cs48l32_in_ev
+ */
+ snd_soc_component_write(component, cs48l32_codec->in_vu_reg, CS48L32_IN_VU);
+
+ return ret;
+}
+
+static bool cs48l32_eq_filter_unstable(bool mode, __be16 in_a, __be16 in_b)
+{
+ s16 a = be16_to_cpu(in_a);
+ s16 b = be16_to_cpu(in_b);
+
+ if (!mode)
+ return abs(a) > CS48L32_EQ_MAX_COEFF;
+
+ if (abs(b) > CS48L32_EQ_MAX_COEFF)
+ return true;
+
+ if (abs((a << 16) / (CS48L32_EQ_MAX_COEFF + 1 - b)) >= ((CS48L32_EQ_MAX_COEFF + 1) << 4))
+ return true;
+
+ return false;
+}
+
+static int cs48l32_eq_ev(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ unsigned int mode = cs48l32_codec->eq_mode[w->shift];
+ unsigned int reg;
+ __be16 *data = &cs48l32_codec->eq_coefficients[w->shift][0];
+ int ret = 0;
+
+ reg = CS48L32_EQ1_BAND1_COEFF1;
+ reg += w->shift * (CS48L32_EQ2_BAND1_COEFF1 - CS48L32_EQ1_BAND1_COEFF1);
+
+ switch (event) {
+ case SND_SOC_DAPM_PRE_PMU:
+ if (cs48l32_eq_filter_unstable(!!mode, data[1], data[0]) ||
+ cs48l32_eq_filter_unstable(true, data[7], data[6]) ||
+ cs48l32_eq_filter_unstable(true, data[13], data[12]) ||
+ cs48l32_eq_filter_unstable(true, data[19], data[18]) ||
+ cs48l32_eq_filter_unstable(false, data[25], data[24])) {
+ dev_err(cs48l32_codec->core.dev, "Rejecting unstable EQ coefficients.\n");
+ ret = -EINVAL;
+ } else {
+ ret = regmap_raw_write(regmap, reg, data, CS48L32_EQ_BLOCK_SZ);
+ if (ret < 0) {
+ dev_err(cs48l32_codec->core.dev,
+ "Error writing EQ coefficients: %d\n", ret);
+ goto out;
+ }
+
+ ret = snd_soc_component_update_bits(component,
+ CS48L32_EQ_CONTROL2,
+ w->mask,
+ mode << w->shift);
+ if (ret < 0) {
+ dev_err(cs48l32_codec->core.dev,
+ "Error writing EQ mode: %d\n", ret);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+out:
+ return ret;
+}
+
+static const struct snd_kcontrol_new cs48l32_snd_controls[] = {
+SOC_ENUM("IN1 OSR", cs48l32_in_dmic_osr[0]),
+SOC_ENUM("IN2 OSR", cs48l32_in_dmic_osr[1]),
+
+SOC_SINGLE_RANGE_TLV("IN1L Volume", CS48L32_IN1L_CONTROL2,
+ CS48L32_INx_PGA_VOL_SHIFT, 0x40, 0x5f, 0, cs48l32_ana_tlv),
+SOC_SINGLE_RANGE_TLV("IN1R Volume", CS48L32_IN1R_CONTROL2,
+ CS48L32_INx_PGA_VOL_SHIFT, 0x40, 0x5f, 0, cs48l32_ana_tlv),
+
+SOC_ENUM("IN HPF Cutoff Frequency", cs48l32_in_hpf_cut_enum),
+
+SOC_SINGLE_EXT("IN1L LP Switch", CS48L32_IN1L_CONTROL1, CS48L32_INx_LP_MODE_SHIFT,
+ 1, 0, snd_soc_get_volsw, cs48l32_low_power_mode_put),
+SOC_SINGLE_EXT("IN1R LP Switch", CS48L32_IN1R_CONTROL1, CS48L32_INx_LP_MODE_SHIFT,
+ 1, 0, snd_soc_get_volsw, cs48l32_low_power_mode_put),
+
+SOC_SINGLE("IN1L HPF Switch", CS48L32_IN1L_CONTROL1, CS48L32_INx_HPF_SHIFT, 1, 0),
+SOC_SINGLE("IN1R HPF Switch", CS48L32_IN1R_CONTROL1, CS48L32_INx_HPF_SHIFT, 1, 0),
+SOC_SINGLE("IN2L HPF Switch", CS48L32_IN2L_CONTROL1, CS48L32_INx_HPF_SHIFT, 1, 0),
+SOC_SINGLE("IN2R HPF Switch", CS48L32_IN2R_CONTROL1, CS48L32_INx_HPF_SHIFT, 1, 0),
+
+SOC_SINGLE_EXT_TLV("IN1L Digital Volume", CS48L32_IN1L_CONTROL2,
+ CS48L32_INx_VOL_SHIFT, 0xbf, 0, snd_soc_get_volsw,
+ cs48l32_in_put_volsw, cs48l32_digital_tlv),
+SOC_SINGLE_EXT_TLV("IN1R Digital Volume", CS48L32_IN1R_CONTROL2,
+ CS48L32_INx_VOL_SHIFT, 0xbf, 0, snd_soc_get_volsw,
+ cs48l32_in_put_volsw, cs48l32_digital_tlv),
+SOC_SINGLE_EXT_TLV("IN2L Digital Volume", CS48L32_IN2L_CONTROL2,
+ CS48L32_INx_VOL_SHIFT, 0xbf, 0, snd_soc_get_volsw,
+ cs48l32_in_put_volsw, cs48l32_digital_tlv),
+SOC_SINGLE_EXT_TLV("IN2R Digital Volume", CS48L32_IN2R_CONTROL2,
+ CS48L32_INx_VOL_SHIFT, 0xbf, 0, snd_soc_get_volsw,
+ cs48l32_in_put_volsw, cs48l32_digital_tlv),
+
+SOC_ENUM("Input Ramp Up", cs48l32_in_vi_ramp),
+SOC_ENUM("Input Ramp Down", cs48l32_in_vd_ramp),
+
+CS48L32_RATE_ENUM("Ultrasonic 1 Rate", cs48l32_us_output_rate[0]),
+CS48L32_RATE_ENUM("Ultrasonic 2 Rate", cs48l32_us_output_rate[1]),
+
+SOC_ENUM("Ultrasonic 1 Freq", cs48l32_us_freq[0]),
+SOC_ENUM("Ultrasonic 2 Freq", cs48l32_us_freq[1]),
+
+SOC_SINGLE_TLV("Ultrasonic 1 Volume", CS48L32_US1_CONTROL, CS48L32_US1_GAIN_SHIFT,
+ 3, 0, cs48l32_us_tlv),
+SOC_SINGLE_TLV("Ultrasonic 2 Volume", CS48L32_US2_CONTROL, CS48L32_US1_GAIN_SHIFT,
+ 3, 0, cs48l32_us_tlv),
+
+SOC_ENUM("Ultrasonic 1 Detect Threshold", cs48l32_us_det_thr[0]),
+SOC_ENUM("Ultrasonic 2 Detect Threshold", cs48l32_us_det_thr[1]),
+
+SOC_ENUM("Ultrasonic 1 Detect Pulse Length", cs48l32_us_det_num[0]),
+SOC_ENUM("Ultrasonic 2 Detect Pulse Length", cs48l32_us_det_num[1]),
+
+SOC_ENUM("Ultrasonic 1 Detect Hold", cs48l32_us_det_hold[0]),
+SOC_ENUM("Ultrasonic 2 Detect Hold", cs48l32_us_det_hold[1]),
+
+SOC_ENUM("Ultrasonic 1 Detect Decay", cs48l32_us_det_dcy[0]),
+SOC_ENUM("Ultrasonic 2 Detect Decay", cs48l32_us_det_dcy[1]),
+
+SOC_SINGLE("Ultrasonic 1 Detect LPF Switch",
+ CS48L32_US1_DET_CONTROL, CS48L32_US1_DET_LPF_SHIFT, 1, 0),
+SOC_SINGLE("Ultrasonic 2 Detect LPF Switch",
+ CS48L32_US2_DET_CONTROL, CS48L32_US1_DET_LPF_SHIFT, 1, 0),
+
+SOC_ENUM("Ultrasonic 1 Detect LPF Cut-off", cs48l32_us_det_lpf_cut[0]),
+SOC_ENUM("Ultrasonic 2 Detect LPF Cut-off", cs48l32_us_det_lpf_cut[1]),
+
+CS48L32_MIXER_CONTROLS("EQ1", CS48L32_EQ1_INPUT1),
+CS48L32_MIXER_CONTROLS("EQ2", CS48L32_EQ2_INPUT1),
+CS48L32_MIXER_CONTROLS("EQ3", CS48L32_EQ3_INPUT1),
+CS48L32_MIXER_CONTROLS("EQ4", CS48L32_EQ4_INPUT1),
+
+SOC_ENUM_EXT("EQ1 Mode", cs48l32_eq_mode[0], cs48l32_eq_mode_get, cs48l32_eq_mode_put),
+
+CS48L32_EQ_COEFF_CONTROLS(EQ1),
+
+SOC_SINGLE_TLV("EQ1 B1 Volume", CS48L32_EQ1_GAIN1, 0, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ1 B2 Volume", CS48L32_EQ1_GAIN1, 8, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ1 B3 Volume", CS48L32_EQ1_GAIN1, 16, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ1 B4 Volume", CS48L32_EQ1_GAIN1, 24, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ1 B5 Volume", CS48L32_EQ1_GAIN2, 0, 24, 0, cs48l32_eq_tlv),
+
+SOC_ENUM_EXT("EQ2 Mode", cs48l32_eq_mode[1], cs48l32_eq_mode_get, cs48l32_eq_mode_put),
+CS48L32_EQ_COEFF_CONTROLS(EQ2),
+SOC_SINGLE_TLV("EQ2 B1 Volume", CS48L32_EQ2_GAIN1, 0, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ2 B2 Volume", CS48L32_EQ2_GAIN1, 8, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ2 B3 Volume", CS48L32_EQ2_GAIN1, 16, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ2 B4 Volume", CS48L32_EQ2_GAIN1, 24, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ2 B5 Volume", CS48L32_EQ2_GAIN2, 0, 24, 0, cs48l32_eq_tlv),
+
+SOC_ENUM_EXT("EQ3 Mode", cs48l32_eq_mode[2], cs48l32_eq_mode_get, cs48l32_eq_mode_put),
+CS48L32_EQ_COEFF_CONTROLS(EQ3),
+SOC_SINGLE_TLV("EQ3 B1 Volume", CS48L32_EQ3_GAIN1, 0, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ3 B2 Volume", CS48L32_EQ3_GAIN1, 8, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ3 B3 Volume", CS48L32_EQ3_GAIN1, 16, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ3 B4 Volume", CS48L32_EQ3_GAIN1, 24, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ3 B5 Volume", CS48L32_EQ3_GAIN2, 0, 24, 0, cs48l32_eq_tlv),
+
+SOC_ENUM_EXT("EQ4 Mode", cs48l32_eq_mode[3], cs48l32_eq_mode_get, cs48l32_eq_mode_put),
+CS48L32_EQ_COEFF_CONTROLS(EQ4),
+SOC_SINGLE_TLV("EQ4 B1 Volume", CS48L32_EQ4_GAIN1, 0, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ4 B2 Volume", CS48L32_EQ4_GAIN1, 8, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ4 B3 Volume", CS48L32_EQ4_GAIN1, 16, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ4 B4 Volume", CS48L32_EQ4_GAIN1, 24, 24, 0, cs48l32_eq_tlv),
+SOC_SINGLE_TLV("EQ4 B5 Volume", CS48L32_EQ4_GAIN2, 0, 24, 0, cs48l32_eq_tlv),
+
+CS48L32_MIXER_CONTROLS("DRC1L", CS48L32_DRC1L_INPUT1),
+CS48L32_MIXER_CONTROLS("DRC1R", CS48L32_DRC1R_INPUT1),
+CS48L32_MIXER_CONTROLS("DRC2L", CS48L32_DRC2L_INPUT1),
+CS48L32_MIXER_CONTROLS("DRC2R", CS48L32_DRC2R_INPUT1),
+
+SND_SOC_BYTES_MASK("DRC1 Coefficients", CS48L32_DRC1_CONTROL1, 4,
+ BIT(CS48L32_DRC1R_EN_SHIFT) | BIT(CS48L32_DRC1L_EN_SHIFT)),
+SND_SOC_BYTES_MASK("DRC2 Coefficients", CS48L32_DRC2_CONTROL1, 4,
+ BIT(CS48L32_DRC1R_EN_SHIFT) | BIT(CS48L32_DRC1L_EN_SHIFT)),
+
+CS48L32_MIXER_CONTROLS("LHPF1", CS48L32_LHPF1_INPUT1),
+CS48L32_MIXER_CONTROLS("LHPF2", CS48L32_LHPF2_INPUT1),
+CS48L32_MIXER_CONTROLS("LHPF3", CS48L32_LHPF3_INPUT1),
+CS48L32_MIXER_CONTROLS("LHPF4", CS48L32_LHPF4_INPUT1),
+
+CS48L32_LHPF_CONTROL("LHPF1 Coefficients", CS48L32_LHPF1_COEFF),
+CS48L32_LHPF_CONTROL("LHPF2 Coefficients", CS48L32_LHPF2_COEFF),
+CS48L32_LHPF_CONTROL("LHPF3 Coefficients", CS48L32_LHPF3_COEFF),
+CS48L32_LHPF_CONTROL("LHPF4 Coefficients", CS48L32_LHPF4_COEFF),
+
+SOC_ENUM("LHPF1 Mode", cs48l32_lhpf_mode[0]),
+SOC_ENUM("LHPF2 Mode", cs48l32_lhpf_mode[1]),
+SOC_ENUM("LHPF3 Mode", cs48l32_lhpf_mode[2]),
+SOC_ENUM("LHPF4 Mode", cs48l32_lhpf_mode[3]),
+
+CS48L32_RATE_CONTROL("Sample Rate 1", 1),
+CS48L32_RATE_CONTROL("Sample Rate 2", 2),
+CS48L32_RATE_CONTROL("Sample Rate 3", 3),
+CS48L32_RATE_CONTROL("Sample Rate 4", 4),
+
+CS48L32_RATE_ENUM("FX Rate", cs48l32_fx_rate),
+
+CS48L32_RATE_ENUM("ISRC1 FSL", cs48l32_isrc_fsl[0]),
+CS48L32_RATE_ENUM("ISRC2 FSL", cs48l32_isrc_fsl[1]),
+CS48L32_RATE_ENUM("ISRC3 FSL", cs48l32_isrc_fsl[2]),
+CS48L32_RATE_ENUM("ISRC1 FSH", cs48l32_isrc_fsh[0]),
+CS48L32_RATE_ENUM("ISRC2 FSH", cs48l32_isrc_fsh[1]),
+CS48L32_RATE_ENUM("ISRC3 FSH", cs48l32_isrc_fsh[2]),
+
+SOC_ENUM("AUXPDM1 Rate", cs48l32_auxpdm1_freq),
+SOC_ENUM("AUXPDM2 Rate", cs48l32_auxpdm2_freq),
+
+SOC_ENUM_EXT("IN1L Rate", cs48l32_input_rate[0], snd_soc_get_enum_double, cs48l32_in_rate_put),
+SOC_ENUM_EXT("IN1R Rate", cs48l32_input_rate[1], snd_soc_get_enum_double, cs48l32_in_rate_put),
+SOC_ENUM_EXT("IN2L Rate", cs48l32_input_rate[2], snd_soc_get_enum_double, cs48l32_in_rate_put),
+SOC_ENUM_EXT("IN2R Rate", cs48l32_input_rate[3], snd_soc_get_enum_double, cs48l32_in_rate_put),
+
+CS48L32_RATE_ENUM("Noise Generator Rate", noise_gen_rate),
+
+SOC_SINGLE_TLV("Noise Generator Volume", CS48L32_COMFORT_NOISE_GENERATOR,
+ CS48L32_NOISE_GEN_GAIN_SHIFT, 0x12, 0, cs48l32_noise_tlv),
+
+CS48L32_MIXER_CONTROLS("ASP1TX1", CS48L32_ASP1TX1_INPUT1),
+CS48L32_MIXER_CONTROLS("ASP1TX2", CS48L32_ASP1TX2_INPUT1),
+CS48L32_MIXER_CONTROLS("ASP1TX3", CS48L32_ASP1TX3_INPUT1),
+CS48L32_MIXER_CONTROLS("ASP1TX4", CS48L32_ASP1TX4_INPUT1),
+CS48L32_MIXER_CONTROLS("ASP1TX5", CS48L32_ASP1TX5_INPUT1),
+CS48L32_MIXER_CONTROLS("ASP1TX6", CS48L32_ASP1TX6_INPUT1),
+CS48L32_MIXER_CONTROLS("ASP1TX7", CS48L32_ASP1TX7_INPUT1),
+CS48L32_MIXER_CONTROLS("ASP1TX8", CS48L32_ASP1TX8_INPUT1),
+
+CS48L32_MIXER_CONTROLS("ASP2TX1", CS48L32_ASP2TX1_INPUT1),
+CS48L32_MIXER_CONTROLS("ASP2TX2", CS48L32_ASP2TX2_INPUT1),
+CS48L32_MIXER_CONTROLS("ASP2TX3", CS48L32_ASP2TX3_INPUT1),
+CS48L32_MIXER_CONTROLS("ASP2TX4", CS48L32_ASP2TX4_INPUT1),
+
+WM_ADSP2_PRELOAD_SWITCH("DSP1", 1),
+
+CS48L32_MIXER_CONTROLS("DSP1RX1", CS48L32_DSP1RX1_INPUT1),
+CS48L32_MIXER_CONTROLS("DSP1RX2", CS48L32_DSP1RX2_INPUT1),
+CS48L32_MIXER_CONTROLS("DSP1RX3", CS48L32_DSP1RX3_INPUT1),
+CS48L32_MIXER_CONTROLS("DSP1RX4", CS48L32_DSP1RX4_INPUT1),
+CS48L32_MIXER_CONTROLS("DSP1RX5", CS48L32_DSP1RX5_INPUT1),
+CS48L32_MIXER_CONTROLS("DSP1RX6", CS48L32_DSP1RX6_INPUT1),
+CS48L32_MIXER_CONTROLS("DSP1RX7", CS48L32_DSP1RX7_INPUT1),
+CS48L32_MIXER_CONTROLS("DSP1RX8", CS48L32_DSP1RX8_INPUT1),
+
+WM_ADSP_FW_CONTROL("DSP1", 0),
+
+CS48L32_DSP_RATE_CONTROL("DSP1RX1", 0),
+CS48L32_DSP_RATE_CONTROL("DSP1RX2", 1),
+CS48L32_DSP_RATE_CONTROL("DSP1RX3", 2),
+CS48L32_DSP_RATE_CONTROL("DSP1RX4", 3),
+CS48L32_DSP_RATE_CONTROL("DSP1RX5", 4),
+CS48L32_DSP_RATE_CONTROL("DSP1RX6", 5),
+CS48L32_DSP_RATE_CONTROL("DSP1RX7", 6),
+CS48L32_DSP_RATE_CONTROL("DSP1RX8", 7),
+CS48L32_DSP_RATE_CONTROL("DSP1TX1", 8),
+CS48L32_DSP_RATE_CONTROL("DSP1TX2", 9),
+CS48L32_DSP_RATE_CONTROL("DSP1TX3", 10),
+CS48L32_DSP_RATE_CONTROL("DSP1TX4", 11),
+CS48L32_DSP_RATE_CONTROL("DSP1TX5", 12),
+CS48L32_DSP_RATE_CONTROL("DSP1TX6", 13),
+CS48L32_DSP_RATE_CONTROL("DSP1TX7", 14),
+CS48L32_DSP_RATE_CONTROL("DSP1TX8", 15),
+};
+
+CS48L32_MIXER_ENUMS(EQ1, CS48L32_EQ1_INPUT1);
+CS48L32_MIXER_ENUMS(EQ2, CS48L32_EQ2_INPUT1);
+CS48L32_MIXER_ENUMS(EQ3, CS48L32_EQ3_INPUT1);
+CS48L32_MIXER_ENUMS(EQ4, CS48L32_EQ4_INPUT1);
+
+CS48L32_MIXER_ENUMS(DRC1L, CS48L32_DRC1L_INPUT1);
+CS48L32_MIXER_ENUMS(DRC1R, CS48L32_DRC1R_INPUT1);
+CS48L32_MIXER_ENUMS(DRC2L, CS48L32_DRC2L_INPUT1);
+CS48L32_MIXER_ENUMS(DRC2R, CS48L32_DRC2R_INPUT1);
+
+CS48L32_MIXER_ENUMS(LHPF1, CS48L32_LHPF1_INPUT1);
+CS48L32_MIXER_ENUMS(LHPF2, CS48L32_LHPF2_INPUT1);
+CS48L32_MIXER_ENUMS(LHPF3, CS48L32_LHPF3_INPUT1);
+CS48L32_MIXER_ENUMS(LHPF4, CS48L32_LHPF4_INPUT1);
+
+CS48L32_MIXER_ENUMS(ASP1TX1, CS48L32_ASP1TX1_INPUT1);
+CS48L32_MIXER_ENUMS(ASP1TX2, CS48L32_ASP1TX2_INPUT1);
+CS48L32_MIXER_ENUMS(ASP1TX3, CS48L32_ASP1TX3_INPUT1);
+CS48L32_MIXER_ENUMS(ASP1TX4, CS48L32_ASP1TX4_INPUT1);
+CS48L32_MIXER_ENUMS(ASP1TX5, CS48L32_ASP1TX5_INPUT1);
+CS48L32_MIXER_ENUMS(ASP1TX6, CS48L32_ASP1TX6_INPUT1);
+CS48L32_MIXER_ENUMS(ASP1TX7, CS48L32_ASP1TX7_INPUT1);
+CS48L32_MIXER_ENUMS(ASP1TX8, CS48L32_ASP1TX8_INPUT1);
+
+CS48L32_MIXER_ENUMS(ASP2TX1, CS48L32_ASP2TX1_INPUT1);
+CS48L32_MIXER_ENUMS(ASP2TX2, CS48L32_ASP2TX2_INPUT1);
+CS48L32_MIXER_ENUMS(ASP2TX3, CS48L32_ASP2TX3_INPUT1);
+CS48L32_MIXER_ENUMS(ASP2TX4, CS48L32_ASP2TX4_INPUT1);
+
+CS48L32_MUX_ENUMS(ISRC1INT1, CS48L32_ISRC1INT1_INPUT1);
+CS48L32_MUX_ENUMS(ISRC1INT2, CS48L32_ISRC1INT2_INPUT1);
+CS48L32_MUX_ENUMS(ISRC1INT3, CS48L32_ISRC1INT3_INPUT1);
+CS48L32_MUX_ENUMS(ISRC1INT4, CS48L32_ISRC1INT4_INPUT1);
+
+CS48L32_MUX_ENUMS(ISRC1DEC1, CS48L32_ISRC1DEC1_INPUT1);
+CS48L32_MUX_ENUMS(ISRC1DEC2, CS48L32_ISRC1DEC2_INPUT1);
+CS48L32_MUX_ENUMS(ISRC1DEC3, CS48L32_ISRC1DEC3_INPUT1);
+CS48L32_MUX_ENUMS(ISRC1DEC4, CS48L32_ISRC1DEC4_INPUT1);
+
+CS48L32_MUX_ENUMS(ISRC2INT1, CS48L32_ISRC2INT1_INPUT1);
+CS48L32_MUX_ENUMS(ISRC2INT2, CS48L32_ISRC2INT2_INPUT1);
+
+CS48L32_MUX_ENUMS(ISRC2DEC1, CS48L32_ISRC2DEC1_INPUT1);
+CS48L32_MUX_ENUMS(ISRC2DEC2, CS48L32_ISRC2DEC2_INPUT1);
+
+CS48L32_MUX_ENUMS(ISRC3INT1, CS48L32_ISRC3INT1_INPUT1);
+CS48L32_MUX_ENUMS(ISRC3INT2, CS48L32_ISRC3INT2_INPUT1);
+
+CS48L32_MUX_ENUMS(ISRC3DEC1, CS48L32_ISRC3DEC1_INPUT1);
+CS48L32_MUX_ENUMS(ISRC3DEC2, CS48L32_ISRC3DEC2_INPUT1);
+
+CS48L32_MIXER_ENUMS(DSP1RX1, CS48L32_DSP1RX1_INPUT1);
+CS48L32_MIXER_ENUMS(DSP1RX2, CS48L32_DSP1RX2_INPUT1);
+CS48L32_MIXER_ENUMS(DSP1RX3, CS48L32_DSP1RX3_INPUT1);
+CS48L32_MIXER_ENUMS(DSP1RX4, CS48L32_DSP1RX4_INPUT1);
+CS48L32_MIXER_ENUMS(DSP1RX5, CS48L32_DSP1RX5_INPUT1);
+CS48L32_MIXER_ENUMS(DSP1RX6, CS48L32_DSP1RX6_INPUT1);
+CS48L32_MIXER_ENUMS(DSP1RX7, CS48L32_DSP1RX7_INPUT1);
+CS48L32_MIXER_ENUMS(DSP1RX8, CS48L32_DSP1RX8_INPUT1);
+
+static int cs48l32_dsp_mem_ev(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+
+ switch (event) {
+ case SND_SOC_DAPM_POST_PMU:
+ return cs48l32_dsp_memory_enable(cs48l32_codec, &cs48l32_dsp_sram_regs);
+ case SND_SOC_DAPM_PRE_PMD:
+ cs48l32_dsp_memory_disable(cs48l32_codec, &cs48l32_dsp_sram_regs);
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+static const struct snd_soc_dapm_widget cs48l32_dapm_widgets[] = {
+SND_SOC_DAPM_SUPPLY("SYSCLK", CS48L32_SYSTEM_CLOCK1, CS48L32_SYSCLK_EN_SHIFT, 0,
+ cs48l32_sysclk_ev, SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+
+SND_SOC_DAPM_REGULATOR_SUPPLY("vdd-cp", 20, 0),
+
+SND_SOC_DAPM_SUPPLY("VOUT_MIC", CS48L32_CHARGE_PUMP1, CS48L32_CP2_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_SUPPLY("VOUT_MIC_REGULATED", CS48L32_CHARGE_PUMP1, CS48L32_CP2_BYPASS_SHIFT,
+ 1, NULL, 0),
+SND_SOC_DAPM_SUPPLY("MICBIAS1", CS48L32_MICBIAS_CTRL1, CS48L32_MICB1_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_SUPPLY("MICBIAS1A", CS48L32_MICBIAS_CTRL5, CS48L32_MICB1A_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_SUPPLY("MICBIAS1B", CS48L32_MICBIAS_CTRL5, CS48L32_MICB1B_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_SUPPLY("MICBIAS1C", CS48L32_MICBIAS_CTRL5, CS48L32_MICB1C_EN_SHIFT, 0, NULL, 0),
+
+SND_SOC_DAPM_SUPPLY("DSP1MEM", SND_SOC_NOPM, 0, 0, cs48l32_dsp_mem_ev,
+ SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+
+CS48L32_DSP_FREQ_WIDGET_EV("DSP1", 0, cs48l32_dsp_freq_ev),
+
+SND_SOC_DAPM_SIGGEN("TONE"),
+SND_SOC_DAPM_SIGGEN("NOISE"),
+
+SND_SOC_DAPM_INPUT("IN1LN_1"),
+SND_SOC_DAPM_INPUT("IN1LN_2"),
+SND_SOC_DAPM_INPUT("IN1LP_1"),
+SND_SOC_DAPM_INPUT("IN1LP_2"),
+SND_SOC_DAPM_INPUT("IN1RN_1"),
+SND_SOC_DAPM_INPUT("IN1RN_2"),
+SND_SOC_DAPM_INPUT("IN1RP_1"),
+SND_SOC_DAPM_INPUT("IN1RP_2"),
+SND_SOC_DAPM_INPUT("IN1_PDMCLK"),
+SND_SOC_DAPM_INPUT("IN1_PDMDATA"),
+
+SND_SOC_DAPM_INPUT("IN2_PDMCLK"),
+SND_SOC_DAPM_INPUT("IN2_PDMDATA"),
+
+SND_SOC_DAPM_MUX("Ultrasonic 1 Input", SND_SOC_NOPM, 0, 0, &cs48l32_us_inmux[0]),
+SND_SOC_DAPM_MUX("Ultrasonic 2 Input", SND_SOC_NOPM, 0, 0, &cs48l32_us_inmux[1]),
+
+SND_SOC_DAPM_OUTPUT("DRC1 Signal Activity"),
+SND_SOC_DAPM_OUTPUT("DRC2 Signal Activity"),
+
+SND_SOC_DAPM_OUTPUT("DSP Trigger Out"),
+
+SND_SOC_DAPM_MUX("IN1L Mux", SND_SOC_NOPM, 0, 0, &cs48l32_inmux[0]),
+SND_SOC_DAPM_MUX("IN1R Mux", SND_SOC_NOPM, 0, 0, &cs48l32_inmux[1]),
+
+SND_SOC_DAPM_MUX("IN1L Mode", SND_SOC_NOPM, 0, 0, &cs48l32_dmode_mux[0]),
+SND_SOC_DAPM_MUX("IN1R Mode", SND_SOC_NOPM, 0, 0, &cs48l32_dmode_mux[0]),
+
+SND_SOC_DAPM_AIF_OUT("ASP1TX1", NULL, 0, CS48L32_ASP1_ENABLES1, 0, 0),
+SND_SOC_DAPM_AIF_OUT("ASP1TX2", NULL, 1, CS48L32_ASP1_ENABLES1, 1, 0),
+SND_SOC_DAPM_AIF_OUT("ASP1TX3", NULL, 2, CS48L32_ASP1_ENABLES1, 2, 0),
+SND_SOC_DAPM_AIF_OUT("ASP1TX4", NULL, 3, CS48L32_ASP1_ENABLES1, 3, 0),
+SND_SOC_DAPM_AIF_OUT("ASP1TX5", NULL, 4, CS48L32_ASP1_ENABLES1, 4, 0),
+SND_SOC_DAPM_AIF_OUT("ASP1TX6", NULL, 5, CS48L32_ASP1_ENABLES1, 5, 0),
+SND_SOC_DAPM_AIF_OUT("ASP1TX7", NULL, 6, CS48L32_ASP1_ENABLES1, 6, 0),
+SND_SOC_DAPM_AIF_OUT("ASP1TX8", NULL, 7, CS48L32_ASP1_ENABLES1, 7, 0),
+
+SND_SOC_DAPM_AIF_OUT("ASP2TX1", NULL, 0, CS48L32_ASP2_ENABLES1, 0, 0),
+SND_SOC_DAPM_AIF_OUT("ASP2TX2", NULL, 1, CS48L32_ASP2_ENABLES1, 1, 0),
+SND_SOC_DAPM_AIF_OUT("ASP2TX3", NULL, 2, CS48L32_ASP2_ENABLES1, 2, 0),
+SND_SOC_DAPM_AIF_OUT("ASP2TX4", NULL, 3, CS48L32_ASP2_ENABLES1, 3, 0),
+
+SND_SOC_DAPM_SWITCH("AUXPDM1 Output", CS48L32_AUXPDM_CONTROL1, 0, 0, &cs48l32_auxpdm_switch[0]),
+SND_SOC_DAPM_SWITCH("AUXPDM2 Output", CS48L32_AUXPDM_CONTROL1, 1, 0, &cs48l32_auxpdm_switch[1]),
+
+SND_SOC_DAPM_MUX("AUXPDM1 Input", SND_SOC_NOPM, 0, 0, &cs48l32_auxpdm_inmux[0]),
+SND_SOC_DAPM_MUX("AUXPDM2 Input", SND_SOC_NOPM, 0, 0, &cs48l32_auxpdm_inmux[1]),
+
+SND_SOC_DAPM_MUX("AUXPDM1 Analog Input", SND_SOC_NOPM, 0, 0,
+ &cs48l32_auxpdm_analog_inmux[0]),
+SND_SOC_DAPM_MUX("AUXPDM2 Analog Input", SND_SOC_NOPM, 0, 0,
+ &cs48l32_auxpdm_analog_inmux[1]),
+
+SND_SOC_DAPM_SWITCH("Ultrasonic 1 Detect", CS48L32_US_CONTROL,
+ CS48L32_US1_DET_EN_SHIFT, 0, &cs48l32_us_switch[0]),
+SND_SOC_DAPM_SWITCH("Ultrasonic 2 Detect", CS48L32_US_CONTROL,
+ CS48L32_US1_DET_EN_SHIFT, 0, &cs48l32_us_switch[1]),
+
+/*
+ * mux_in widgets : arranged in the order of sources
+ * specified in CS48L32_MIXER_INPUT_ROUTES
+ */
+SND_SOC_DAPM_PGA("Tone Generator 1", CS48L32_TONE_GENERATOR1, 0, 0, NULL, 0),
+SND_SOC_DAPM_PGA("Tone Generator 2", CS48L32_TONE_GENERATOR1, 1, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("Noise Generator", CS48L32_COMFORT_NOISE_GENERATOR,
+ CS48L32_NOISE_GEN_EN_SHIFT, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA_E("IN1L PGA", CS48L32_INPUT_CONTROL, CS48L32_IN1L_EN_SHIFT,
+ 0, NULL, 0, cs48l32_in_ev,
+ SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+SND_SOC_DAPM_PGA_E("IN1R PGA", CS48L32_INPUT_CONTROL, CS48L32_IN1R_EN_SHIFT,
+ 0, NULL, 0, cs48l32_in_ev,
+ SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+SND_SOC_DAPM_PGA_E("IN2L PGA", CS48L32_INPUT_CONTROL, CS48L32_IN2L_EN_SHIFT,
+ 0, NULL, 0, cs48l32_in_ev,
+ SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+SND_SOC_DAPM_PGA_E("IN2R PGA", CS48L32_INPUT_CONTROL, CS48L32_IN2R_EN_SHIFT,
+ 0, NULL, 0, cs48l32_in_ev,
+ SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+
+SND_SOC_DAPM_AIF_IN("ASP1RX1", NULL, 0, CS48L32_ASP1_ENABLES1, 16, 0),
+SND_SOC_DAPM_AIF_IN("ASP1RX2", NULL, 1, CS48L32_ASP1_ENABLES1, 17, 0),
+SND_SOC_DAPM_AIF_IN("ASP1RX3", NULL, 2, CS48L32_ASP1_ENABLES1, 18, 0),
+SND_SOC_DAPM_AIF_IN("ASP1RX4", NULL, 3, CS48L32_ASP1_ENABLES1, 19, 0),
+SND_SOC_DAPM_AIF_IN("ASP1RX5", NULL, 4, CS48L32_ASP1_ENABLES1, 20, 0),
+SND_SOC_DAPM_AIF_IN("ASP1RX6", NULL, 5, CS48L32_ASP1_ENABLES1, 21, 0),
+SND_SOC_DAPM_AIF_IN("ASP1RX7", NULL, 6, CS48L32_ASP1_ENABLES1, 22, 0),
+SND_SOC_DAPM_AIF_IN("ASP1RX8", NULL, 7, CS48L32_ASP1_ENABLES1, 23, 0),
+
+SND_SOC_DAPM_AIF_IN("ASP2RX1", NULL, 0, CS48L32_ASP2_ENABLES1, 16, 0),
+SND_SOC_DAPM_AIF_IN("ASP2RX2", NULL, 1, CS48L32_ASP2_ENABLES1, 17, 0),
+SND_SOC_DAPM_AIF_IN("ASP2RX3", NULL, 2, CS48L32_ASP2_ENABLES1, 18, 0),
+SND_SOC_DAPM_AIF_IN("ASP2RX4", NULL, 3, CS48L32_ASP2_ENABLES1, 19, 0),
+
+SND_SOC_DAPM_PGA("ISRC1DEC1", CS48L32_ISRC1_CONTROL2, CS48L32_ISRC1_DEC1_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ISRC1DEC2", CS48L32_ISRC1_CONTROL2, CS48L32_ISRC1_DEC2_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ISRC1DEC3", CS48L32_ISRC1_CONTROL2, CS48L32_ISRC1_DEC3_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ISRC1DEC4", CS48L32_ISRC1_CONTROL2, CS48L32_ISRC1_DEC4_EN_SHIFT, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("ISRC1INT1", CS48L32_ISRC1_CONTROL2, CS48L32_ISRC1_INT1_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ISRC1INT2", CS48L32_ISRC1_CONTROL2, CS48L32_ISRC1_INT2_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ISRC1INT3", CS48L32_ISRC1_CONTROL2, CS48L32_ISRC1_INT3_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ISRC1INT4", CS48L32_ISRC1_CONTROL2, CS48L32_ISRC1_INT4_EN_SHIFT, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("ISRC2DEC1", CS48L32_ISRC2_CONTROL2, CS48L32_ISRC1_DEC1_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ISRC2DEC2", CS48L32_ISRC2_CONTROL2, CS48L32_ISRC1_DEC2_EN_SHIFT, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("ISRC2INT1", CS48L32_ISRC2_CONTROL2, CS48L32_ISRC1_INT1_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ISRC2INT2", CS48L32_ISRC2_CONTROL2, CS48L32_ISRC1_INT2_EN_SHIFT, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("ISRC3DEC1", CS48L32_ISRC3_CONTROL2, CS48L32_ISRC1_DEC1_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ISRC3DEC2", CS48L32_ISRC3_CONTROL2, CS48L32_ISRC1_DEC2_EN_SHIFT, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("ISRC3INT1", CS48L32_ISRC3_CONTROL2, CS48L32_ISRC1_INT1_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ISRC3INT2", CS48L32_ISRC3_CONTROL2, CS48L32_ISRC1_INT2_EN_SHIFT, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA_E("EQ1", CS48L32_EQ_CONTROL1, 0, 0, NULL, 0, cs48l32_eq_ev, SND_SOC_DAPM_PRE_PMU),
+SND_SOC_DAPM_PGA_E("EQ2", CS48L32_EQ_CONTROL1, 1, 0, NULL, 0, cs48l32_eq_ev, SND_SOC_DAPM_PRE_PMU),
+SND_SOC_DAPM_PGA_E("EQ3", CS48L32_EQ_CONTROL1, 2, 0, NULL, 0, cs48l32_eq_ev, SND_SOC_DAPM_PRE_PMU),
+SND_SOC_DAPM_PGA_E("EQ4", CS48L32_EQ_CONTROL1, 3, 0, NULL, 0, cs48l32_eq_ev, SND_SOC_DAPM_PRE_PMU),
+
+SND_SOC_DAPM_PGA("DRC1L", CS48L32_DRC1_CONTROL1, CS48L32_DRC1L_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("DRC1R", CS48L32_DRC1_CONTROL1, CS48L32_DRC1R_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("DRC2L", CS48L32_DRC2_CONTROL1, CS48L32_DRC1L_EN_SHIFT, 0, NULL, 0),
+SND_SOC_DAPM_PGA("DRC2R", CS48L32_DRC2_CONTROL1, CS48L32_DRC1R_EN_SHIFT, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("LHPF1", CS48L32_LHPF_CONTROL1, 0, 0, NULL, 0),
+SND_SOC_DAPM_PGA("LHPF2", CS48L32_LHPF_CONTROL1, 1, 0, NULL, 0),
+SND_SOC_DAPM_PGA("LHPF3", CS48L32_LHPF_CONTROL1, 2, 0, NULL, 0),
+SND_SOC_DAPM_PGA("LHPF4", CS48L32_LHPF_CONTROL1, 3, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("Ultrasonic 1", CS48L32_US_CONTROL, 0, 0, NULL, 0),
+SND_SOC_DAPM_PGA("Ultrasonic 2", CS48L32_US_CONTROL, 1, 0, NULL, 0),
+
+WM_ADSP2("DSP1", 0, wm_adsp_early_event),
+
+/* end of ordered widget list */
+
+CS48L32_MIXER_WIDGETS(EQ1, "EQ1"),
+CS48L32_MIXER_WIDGETS(EQ2, "EQ2"),
+CS48L32_MIXER_WIDGETS(EQ3, "EQ3"),
+CS48L32_MIXER_WIDGETS(EQ4, "EQ4"),
+
+CS48L32_MIXER_WIDGETS(DRC1L, "DRC1L"),
+CS48L32_MIXER_WIDGETS(DRC1R, "DRC1R"),
+CS48L32_MIXER_WIDGETS(DRC2L, "DRC2L"),
+CS48L32_MIXER_WIDGETS(DRC2R, "DRC2R"),
+
+SND_SOC_DAPM_SWITCH("DRC1 Activity Output", SND_SOC_NOPM, 0, 0,
+ &cs48l32_drc_activity_output_mux[0]),
+SND_SOC_DAPM_SWITCH("DRC2 Activity Output", SND_SOC_NOPM, 0, 0,
+ &cs48l32_drc_activity_output_mux[1]),
+
+CS48L32_MIXER_WIDGETS(LHPF1, "LHPF1"),
+CS48L32_MIXER_WIDGETS(LHPF2, "LHPF2"),
+CS48L32_MIXER_WIDGETS(LHPF3, "LHPF3"),
+CS48L32_MIXER_WIDGETS(LHPF4, "LHPF4"),
+
+CS48L32_MIXER_WIDGETS(ASP1TX1, "ASP1TX1"),
+CS48L32_MIXER_WIDGETS(ASP1TX2, "ASP1TX2"),
+CS48L32_MIXER_WIDGETS(ASP1TX3, "ASP1TX3"),
+CS48L32_MIXER_WIDGETS(ASP1TX4, "ASP1TX4"),
+CS48L32_MIXER_WIDGETS(ASP1TX5, "ASP1TX5"),
+CS48L32_MIXER_WIDGETS(ASP1TX6, "ASP1TX6"),
+CS48L32_MIXER_WIDGETS(ASP1TX7, "ASP1TX7"),
+CS48L32_MIXER_WIDGETS(ASP1TX8, "ASP1TX8"),
+
+CS48L32_MIXER_WIDGETS(ASP2TX1, "ASP2TX1"),
+CS48L32_MIXER_WIDGETS(ASP2TX2, "ASP2TX2"),
+CS48L32_MIXER_WIDGETS(ASP2TX3, "ASP2TX3"),
+CS48L32_MIXER_WIDGETS(ASP2TX4, "ASP2TX4"),
+
+CS48L32_MUX_WIDGETS(ISRC1DEC1, "ISRC1DEC1"),
+CS48L32_MUX_WIDGETS(ISRC1DEC2, "ISRC1DEC2"),
+CS48L32_MUX_WIDGETS(ISRC1DEC3, "ISRC1DEC3"),
+CS48L32_MUX_WIDGETS(ISRC1DEC4, "ISRC1DEC4"),
+
+CS48L32_MUX_WIDGETS(ISRC1INT1, "ISRC1INT1"),
+CS48L32_MUX_WIDGETS(ISRC1INT2, "ISRC1INT2"),
+CS48L32_MUX_WIDGETS(ISRC1INT3, "ISRC1INT3"),
+CS48L32_MUX_WIDGETS(ISRC1INT4, "ISRC1INT4"),
+
+CS48L32_MUX_WIDGETS(ISRC2DEC1, "ISRC2DEC1"),
+CS48L32_MUX_WIDGETS(ISRC2DEC2, "ISRC2DEC2"),
+
+CS48L32_MUX_WIDGETS(ISRC2INT1, "ISRC2INT1"),
+CS48L32_MUX_WIDGETS(ISRC2INT2, "ISRC2INT2"),
+
+CS48L32_MUX_WIDGETS(ISRC3DEC1, "ISRC3DEC1"),
+CS48L32_MUX_WIDGETS(ISRC3DEC2, "ISRC3DEC2"),
+
+CS48L32_MUX_WIDGETS(ISRC3INT1, "ISRC3INT1"),
+CS48L32_MUX_WIDGETS(ISRC3INT2, "ISRC3INT2"),
+
+CS48L32_MIXER_WIDGETS(DSP1RX1, "DSP1RX1"),
+CS48L32_MIXER_WIDGETS(DSP1RX2, "DSP1RX2"),
+CS48L32_MIXER_WIDGETS(DSP1RX3, "DSP1RX3"),
+CS48L32_MIXER_WIDGETS(DSP1RX4, "DSP1RX4"),
+CS48L32_MIXER_WIDGETS(DSP1RX5, "DSP1RX5"),
+CS48L32_MIXER_WIDGETS(DSP1RX6, "DSP1RX6"),
+CS48L32_MIXER_WIDGETS(DSP1RX7, "DSP1RX7"),
+CS48L32_MIXER_WIDGETS(DSP1RX8, "DSP1RX8"),
+
+SND_SOC_DAPM_SWITCH("DSP1 Trigger Output", SND_SOC_NOPM, 0, 0,
+ &cs48l32_dsp_trigger_output_mux[0]),
+
+SND_SOC_DAPM_OUTPUT("AUXPDM1_CLK"),
+SND_SOC_DAPM_OUTPUT("AUXPDM1_DOUT"),
+SND_SOC_DAPM_OUTPUT("AUXPDM2_CLK"),
+SND_SOC_DAPM_OUTPUT("AUXPDM2_DOUT"),
+
+SND_SOC_DAPM_OUTPUT("MICSUPP"),
+
+SND_SOC_DAPM_OUTPUT("Ultrasonic Dummy Output"),
+};
+
+static const struct snd_soc_dapm_route cs48l32_dapm_routes[] = {
+ { "IN1LN_1", NULL, "SYSCLK" },
+ { "IN1LN_2", NULL, "SYSCLK" },
+ { "IN1LP_1", NULL, "SYSCLK" },
+ { "IN1LP_2", NULL, "SYSCLK" },
+ { "IN1RN_1", NULL, "SYSCLK" },
+ { "IN1RN_2", NULL, "SYSCLK" },
+ { "IN1RP_1", NULL, "SYSCLK" },
+ { "IN1RP_2", NULL, "SYSCLK" },
+
+ { "IN1_PDMCLK", NULL, "SYSCLK" },
+ { "IN1_PDMDATA", NULL, "SYSCLK" },
+ { "IN2_PDMCLK", NULL, "SYSCLK" },
+ { "IN2_PDMDATA", NULL, "SYSCLK" },
+
+ { "DSP1 Preloader", NULL, "DSP1MEM" },
+ { "DSP1", NULL, "DSP1FREQ" },
+
+ { "Audio Trace DSP", NULL, "DSP1" },
+ { "Voice Ctrl DSP", NULL, "DSP1" },
+
+ { "VOUT_MIC_REGULATED", NULL, "VOUT_MIC" },
+ { "MICBIAS1", NULL, "VOUT_MIC_REGULATED" },
+ { "MICBIAS1A", NULL, "MICBIAS1" },
+ { "MICBIAS1B", NULL, "MICBIAS1" },
+ { "MICBIAS1C", NULL, "MICBIAS1" },
+
+ { "Tone Generator 1", NULL, "SYSCLK" },
+ { "Tone Generator 2", NULL, "SYSCLK" },
+ { "Noise Generator", NULL, "SYSCLK" },
+
+ { "Tone Generator 1", NULL, "TONE" },
+ { "Tone Generator 2", NULL, "TONE" },
+ { "Noise Generator", NULL, "NOISE" },
+
+ { "ASP1 Capture", NULL, "ASP1TX1" },
+ { "ASP1 Capture", NULL, "ASP1TX2" },
+ { "ASP1 Capture", NULL, "ASP1TX3" },
+ { "ASP1 Capture", NULL, "ASP1TX4" },
+ { "ASP1 Capture", NULL, "ASP1TX5" },
+ { "ASP1 Capture", NULL, "ASP1TX6" },
+ { "ASP1 Capture", NULL, "ASP1TX7" },
+ { "ASP1 Capture", NULL, "ASP1TX8" },
+
+ { "ASP1RX1", NULL, "ASP1 Playback" },
+ { "ASP1RX2", NULL, "ASP1 Playback" },
+ { "ASP1RX3", NULL, "ASP1 Playback" },
+ { "ASP1RX4", NULL, "ASP1 Playback" },
+ { "ASP1RX5", NULL, "ASP1 Playback" },
+ { "ASP1RX6", NULL, "ASP1 Playback" },
+ { "ASP1RX7", NULL, "ASP1 Playback" },
+ { "ASP1RX8", NULL, "ASP1 Playback" },
+
+ { "ASP2 Capture", NULL, "ASP2TX1" },
+ { "ASP2 Capture", NULL, "ASP2TX2" },
+ { "ASP2 Capture", NULL, "ASP2TX3" },
+ { "ASP2 Capture", NULL, "ASP2TX4" },
+
+ { "ASP2RX1", NULL, "ASP2 Playback" },
+ { "ASP2RX2", NULL, "ASP2 Playback" },
+ { "ASP2RX3", NULL, "ASP2 Playback" },
+ { "ASP2RX4", NULL, "ASP2 Playback" },
+
+ { "ASP1 Playback", NULL, "SYSCLK" },
+ { "ASP2 Playback", NULL, "SYSCLK" },
+
+ { "ASP1 Capture", NULL, "SYSCLK" },
+ { "ASP2 Capture", NULL, "SYSCLK" },
+
+ { "IN1L Mux", "Analog 1", "IN1LN_1" },
+ { "IN1L Mux", "Analog 2", "IN1LN_2" },
+ { "IN1L Mux", "Analog 1", "IN1LP_1" },
+ { "IN1L Mux", "Analog 2", "IN1LP_2" },
+ { "IN1R Mux", "Analog 1", "IN1RN_1" },
+ { "IN1R Mux", "Analog 2", "IN1RN_2" },
+ { "IN1R Mux", "Analog 1", "IN1RP_1" },
+ { "IN1R Mux", "Analog 2", "IN1RP_2" },
+
+ { "IN1L PGA", NULL, "IN1L Mode" },
+ { "IN1R PGA", NULL, "IN1R Mode" },
+
+ { "IN1L Mode", "Analog", "IN1L Mux" },
+ { "IN1R Mode", "Analog", "IN1R Mux" },
+
+ { "IN1L Mode", "Digital", "IN1_PDMCLK" },
+ { "IN1L Mode", "Digital", "IN1_PDMDATA" },
+ { "IN1R Mode", "Digital", "IN1_PDMCLK" },
+ { "IN1R Mode", "Digital", "IN1_PDMDATA" },
+
+ { "IN1L PGA", NULL, "VOUT_MIC" },
+ { "IN1R PGA", NULL, "VOUT_MIC" },
+
+ { "IN2L PGA", NULL, "VOUT_MIC" },
+ { "IN2R PGA", NULL, "VOUT_MIC" },
+
+ { "IN2L PGA", NULL, "IN2_PDMCLK" },
+ { "IN2R PGA", NULL, "IN2_PDMCLK" },
+ { "IN2L PGA", NULL, "IN2_PDMDATA" },
+ { "IN2R PGA", NULL, "IN2_PDMDATA" },
+
+ { "Ultrasonic 1", NULL, "Ultrasonic 1 Input" },
+ { "Ultrasonic 2", NULL, "Ultrasonic 2 Input" },
+
+ { "Ultrasonic 1 Input", "IN1L", "IN1L PGA" },
+ { "Ultrasonic 1 Input", "IN1R", "IN1R PGA" },
+ { "Ultrasonic 1 Input", "IN2L", "IN2L PGA" },
+ { "Ultrasonic 1 Input", "IN2R", "IN2R PGA" },
+
+ { "Ultrasonic 2 Input", "IN1L", "IN1L PGA" },
+ { "Ultrasonic 2 Input", "IN1R", "IN1R PGA" },
+ { "Ultrasonic 2 Input", "IN2L", "IN2L PGA" },
+ { "Ultrasonic 2 Input", "IN2R", "IN2R PGA" },
+
+ { "Ultrasonic 1 Detect", "Switch", "Ultrasonic 1 Input" },
+ { "Ultrasonic 2 Detect", "Switch", "Ultrasonic 2 Input" },
+
+ { "Ultrasonic Dummy Output", NULL, "Ultrasonic 1 Detect" },
+ { "Ultrasonic Dummy Output", NULL, "Ultrasonic 2 Detect" },
+
+ CS48L32_MIXER_ROUTES("ASP1TX1", "ASP1TX1"),
+ CS48L32_MIXER_ROUTES("ASP1TX2", "ASP1TX2"),
+ CS48L32_MIXER_ROUTES("ASP1TX3", "ASP1TX3"),
+ CS48L32_MIXER_ROUTES("ASP1TX4", "ASP1TX4"),
+ CS48L32_MIXER_ROUTES("ASP1TX5", "ASP1TX5"),
+ CS48L32_MIXER_ROUTES("ASP1TX6", "ASP1TX6"),
+ CS48L32_MIXER_ROUTES("ASP1TX7", "ASP1TX7"),
+ CS48L32_MIXER_ROUTES("ASP1TX8", "ASP1TX8"),
+
+ CS48L32_MIXER_ROUTES("ASP2TX1", "ASP2TX1"),
+ CS48L32_MIXER_ROUTES("ASP2TX2", "ASP2TX2"),
+ CS48L32_MIXER_ROUTES("ASP2TX3", "ASP2TX3"),
+ CS48L32_MIXER_ROUTES("ASP2TX4", "ASP2TX4"),
+
+ CS48L32_MIXER_ROUTES("EQ1", "EQ1"),
+ CS48L32_MIXER_ROUTES("EQ2", "EQ2"),
+ CS48L32_MIXER_ROUTES("EQ3", "EQ3"),
+ CS48L32_MIXER_ROUTES("EQ4", "EQ4"),
+
+ CS48L32_MIXER_ROUTES("DRC1L", "DRC1L"),
+ CS48L32_MIXER_ROUTES("DRC1R", "DRC1R"),
+ CS48L32_MIXER_ROUTES("DRC2L", "DRC2L"),
+ CS48L32_MIXER_ROUTES("DRC2R", "DRC2R"),
+
+ CS48L32_MIXER_ROUTES("LHPF1", "LHPF1"),
+ CS48L32_MIXER_ROUTES("LHPF2", "LHPF2"),
+ CS48L32_MIXER_ROUTES("LHPF3", "LHPF3"),
+ CS48L32_MIXER_ROUTES("LHPF4", "LHPF4"),
+
+ CS48L32_MUX_ROUTES("ISRC1INT1", "ISRC1INT1"),
+ CS48L32_MUX_ROUTES("ISRC1INT2", "ISRC1INT2"),
+ CS48L32_MUX_ROUTES("ISRC1INT3", "ISRC1INT3"),
+ CS48L32_MUX_ROUTES("ISRC1INT4", "ISRC1INT4"),
+
+ CS48L32_MUX_ROUTES("ISRC1DEC1", "ISRC1DEC1"),
+ CS48L32_MUX_ROUTES("ISRC1DEC2", "ISRC1DEC2"),
+ CS48L32_MUX_ROUTES("ISRC1DEC3", "ISRC1DEC3"),
+ CS48L32_MUX_ROUTES("ISRC1DEC4", "ISRC1DEC4"),
+
+ CS48L32_MUX_ROUTES("ISRC2INT1", "ISRC2INT1"),
+ CS48L32_MUX_ROUTES("ISRC2INT2", "ISRC2INT2"),
+
+ CS48L32_MUX_ROUTES("ISRC2DEC1", "ISRC2DEC1"),
+ CS48L32_MUX_ROUTES("ISRC2DEC2", "ISRC2DEC2"),
+
+ CS48L32_MUX_ROUTES("ISRC3INT1", "ISRC3INT1"),
+ CS48L32_MUX_ROUTES("ISRC3INT2", "ISRC3INT2"),
+
+ CS48L32_MUX_ROUTES("ISRC3DEC1", "ISRC3DEC1"),
+ CS48L32_MUX_ROUTES("ISRC3DEC2", "ISRC3DEC2"),
+
+ CS48L32_DSP_ROUTES_1_8_SYSCLK("DSP1"),
+
+ { "DSP Trigger Out", NULL, "DSP1 Trigger Output" },
+
+ { "DSP1 Trigger Output", "Switch", "DSP1" },
+
+ { "AUXPDM1 Analog Input", "IN1L", "IN1L PGA" },
+ { "AUXPDM1 Analog Input", "IN1R", "IN1R PGA" },
+
+ { "AUXPDM2 Analog Input", "IN1L", "IN1L PGA" },
+ { "AUXPDM2 Analog Input", "IN1R", "IN1R PGA" },
+
+ { "AUXPDM1 Input", "Analog", "AUXPDM1 Analog Input" },
+ { "AUXPDM1 Input", "IN1 Digital", "IN1L PGA" },
+ { "AUXPDM1 Input", "IN1 Digital", "IN1R PGA" },
+ { "AUXPDM1 Input", "IN2 Digital", "IN2L PGA" },
+ { "AUXPDM1 Input", "IN2 Digital", "IN2R PGA" },
+
+ { "AUXPDM2 Input", "Analog", "AUXPDM2 Analog Input" },
+ { "AUXPDM2 Input", "IN1 Digital", "IN1L PGA" },
+ { "AUXPDM2 Input", "IN1 Digital", "IN1R PGA" },
+ { "AUXPDM2 Input", "IN2 Digital", "IN2L PGA" },
+ { "AUXPDM2 Input", "IN2 Digital", "IN2R PGA" },
+
+ { "AUXPDM1 Output", "Switch", "AUXPDM1 Input" },
+ { "AUXPDM1_CLK", NULL, "AUXPDM1 Output" },
+ { "AUXPDM1_DOUT", NULL, "AUXPDM1 Output" },
+
+ { "AUXPDM2 Output", "Switch", "AUXPDM2 Input" },
+ { "AUXPDM2_CLK", NULL, "AUXPDM2 Output" },
+ { "AUXPDM2_DOUT", NULL, "AUXPDM2 Output" },
+
+ { "MICSUPP", NULL, "SYSCLK" },
+
+ { "DRC1 Signal Activity", NULL, "DRC1 Activity Output" },
+ { "DRC2 Signal Activity", NULL, "DRC2 Activity Output" },
+ { "DRC1 Activity Output", "Switch", "DRC1L" },
+ { "DRC1 Activity Output", "Switch", "DRC1R" },
+ { "DRC2 Activity Output", "Switch", "DRC2L" },
+ { "DRC2 Activity Output", "Switch", "DRC2R" },
+};
+
+static int cs48l32_compr_open(struct snd_soc_component *component,
+ struct snd_compr_stream *stream)
+{
+ struct snd_soc_pcm_runtime *rtd = stream->private_data;
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+
+ if (strcmp(snd_soc_rtd_to_codec(rtd, 0)->name, "cs48l32-dsp-trace") &&
+ strcmp(snd_soc_rtd_to_codec(rtd, 0)->name, "cs48l32-dsp-voicectrl")) {
+ dev_err(cs48l32_codec->core.dev, "No suitable compressed stream for DAI '%s'\n",
+ snd_soc_rtd_to_codec(rtd, 0)->name);
+ return -EINVAL;
+ }
+
+ return wm_adsp_compr_open(&cs48l32_codec->dsp, stream);
+}
+
+static const struct snd_compress_ops cs48l32_compress_ops = {
+ .open = &cs48l32_compr_open,
+ .free = &wm_adsp_compr_free,
+ .set_params = &wm_adsp_compr_set_params,
+ .get_caps = &wm_adsp_compr_get_caps,
+ .trigger = &wm_adsp_compr_trigger,
+ .pointer = &wm_adsp_compr_pointer,
+ .copy = &wm_adsp_compr_copy,
+};
+
+static const struct snd_soc_dai_ops cs48l32_compress_dai_ops = {
+ .compress_new = snd_soc_new_compress,
+};
+
+static struct snd_soc_dai_driver cs48l32_dai[] = {
+ {
+ .name = "cs48l32-asp1",
+ .id = 1,
+ .base = CS48L32_ASP1_ENABLES1,
+ .playback = {
+ .stream_name = "ASP1 Playback",
+ .channels_min = 1,
+ .channels_max = 8,
+ .rates = CS48L32_RATES,
+ .formats = CS48L32_FORMATS,
+ },
+ .capture = {
+ .stream_name = "ASP1 Capture",
+ .channels_min = 1,
+ .channels_max = 8,
+ .rates = CS48L32_RATES,
+ .formats = CS48L32_FORMATS,
+ },
+ .ops = &cs48l32_dai_ops,
+ .symmetric_rate = 1,
+ .symmetric_sample_bits = 1,
+ },
+ {
+ .name = "cs48l32-asp2",
+ .id = 2,
+ .base = CS48L32_ASP2_ENABLES1,
+ .playback = {
+ .stream_name = "ASP2 Playback",
+ .channels_min = 1,
+ .channels_max = 4,
+ .rates = CS48L32_RATES,
+ .formats = CS48L32_FORMATS,
+ },
+ .capture = {
+ .stream_name = "ASP2 Capture",
+ .channels_min = 1,
+ .channels_max = 4,
+ .rates = CS48L32_RATES,
+ .formats = CS48L32_FORMATS,
+ },
+ .ops = &cs48l32_dai_ops,
+ .symmetric_rate = 1,
+ .symmetric_sample_bits = 1,
+ },
+ {
+ .name = "cs48l32-cpu-trace",
+ .id = 3,
+ .capture = {
+ .stream_name = "Audio Trace CPU",
+ .channels_min = 1,
+ .channels_max = 8,
+ .rates = CS48L32_RATES,
+ .formats = CS48L32_FORMATS,
+ },
+ .ops = &cs48l32_compress_dai_ops,
+ },
+ {
+ .name = "cs48l32-dsp-trace",
+ .id = 4,
+ .capture = {
+ .stream_name = "Audio Trace DSP",
+ .channels_min = 1,
+ .channels_max = 8,
+ .rates = CS48L32_RATES,
+ .formats = CS48L32_FORMATS,
+ },
+ },
+ {
+ .name = "cs48l32-cpu-voicectrl",
+ .id = 5,
+ .capture = {
+ .stream_name = "Voice Ctrl CPU",
+ .channels_min = 1,
+ .channels_max = 8,
+ .rates = CS48L32_RATES,
+ .formats = CS48L32_FORMATS,
+ },
+ .ops = &cs48l32_compress_dai_ops,
+ },
+ {
+ .name = "cs48l32-dsp-voicectrl",
+ .id = 6,
+ .capture = {
+ .stream_name = "Voice Ctrl DSP",
+ .channels_min = 1,
+ .channels_max = 8,
+ .rates = CS48L32_RATES,
+ .formats = CS48L32_FORMATS,
+ },
+ },
+};
+
+static int cs48l32_init_inputs(struct snd_soc_component *component)
+{
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ unsigned int ana_mode_l, ana_mode_r, dig_mode;
+ int i;
+
+ /*
+ * Initialize input modes from the A settings. For muxed inputs the
+ * B settings will be applied if the mux is changed
+ */
+ switch (cs48l32_codec->in_type[0][0]) {
+ default:
+ case CS48L32_IN_TYPE_DIFF:
+ ana_mode_l = 0;
+ break;
+ case CS48L32_IN_TYPE_SE:
+ ana_mode_l = 1 << CS48L32_INx_SRC_SHIFT;
+ break;
+ }
+
+ switch (cs48l32_codec->in_type[1][0]) {
+ default:
+ case CS48L32_IN_TYPE_DIFF:
+ ana_mode_r = 0;
+ break;
+ case CS48L32_IN_TYPE_SE:
+ ana_mode_r = 1 << CS48L32_INx_SRC_SHIFT;
+ break;
+ }
+
+ dev_dbg(cs48l32_codec->core.dev, "IN1_1 Analogue mode=#%x,#%x\n",
+ ana_mode_l, ana_mode_r);
+
+ regmap_update_bits(regmap,
+ CS48L32_IN1L_CONTROL1,
+ CS48L32_INx_SRC_MASK,
+ ana_mode_l);
+
+ regmap_update_bits(regmap,
+ CS48L32_IN1R_CONTROL1,
+ CS48L32_INx_SRC_MASK,
+ ana_mode_r);
+
+ for (i = 0; i < ARRAY_SIZE(cs48l32_codec->pdm_sup); i++) {
+ dig_mode = cs48l32_codec->pdm_sup[i] << CS48L32_IN1_PDM_SUP_SHIFT;
+
+ dev_dbg(cs48l32_codec->core.dev, "IN%d PDM_SUP=#%x\n", i + 1, dig_mode);
+
+ regmap_update_bits(regmap,
+ CS48L32_INPUT1_CONTROL1 + (i * 0x40),
+ CS48L32_IN1_PDM_SUP_MASK, dig_mode);
+ }
+
+ return 0;
+}
+
+static int cs48l32_init_dai(struct cs48l32_codec *cs48l32_codec, int id)
+{
+ struct cs48l32_dai_priv *dai_priv = &cs48l32_codec->dai[id];
+
+ dai_priv->clk = CS48L32_CLK_SYSCLK_1;
+ dai_priv->constraint = cs48l32_constraint;
+
+ return 0;
+}
+
+static int cs48l32_init_eq(struct cs48l32_codec *cs48l32_codec)
+{
+ struct regmap *regmap = cs48l32_codec->core.regmap;
+ unsigned int reg = CS48L32_EQ1_BAND1_COEFF1, mode;
+ __be16 *data;
+ int i, ret;
+
+ ret = regmap_read(regmap, CS48L32_EQ_CONTROL2, &mode);
+ if (ret < 0) {
+ dev_err(cs48l32_codec->core.dev, "Error reading EQ mode: %d\n", ret);
+ goto out;
+ }
+
+ for (i = 0; i < 4; ++i) {
+ cs48l32_codec->eq_mode[i] = (mode >> i) & 0x1;
+
+ data = &cs48l32_codec->eq_coefficients[i][0];
+ ret = regmap_raw_read(regmap, reg + (i * 68), data,
+ CS48L32_EQ_BLOCK_SZ);
+ if (ret < 0) {
+ dev_err(cs48l32_codec->core.dev,
+ "Error reading EQ coefficients: %d\n", ret);
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int cs48l32_component_probe(struct snd_soc_component *component)
+{
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+ int i, ret;
+
+ snd_soc_component_init_regmap(component, cs48l32_codec->core.regmap);
+
+ ret = cs48l32_init_inputs(component);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ARRAY_SIZE(cs48l32_dai); i++)
+ cs48l32_init_dai(cs48l32_codec, i);
+
+ ret = cs48l32_init_eq(cs48l32_codec);
+ if (ret)
+ return ret;
+
+ wm_adsp2_component_probe(&cs48l32_codec->dsp, component);
+
+ /* Unmask DSP IRQs */
+ regmap_clear_bits(cs48l32_codec->core.regmap, CS48L32_IRQ1_MASK_7,
+ CS48L32_DSP1_MPU_ERR_EINT1_MASK | CS48L32_DSP1_WDT_EXPIRE_EINT1_MASK);
+ regmap_clear_bits(cs48l32_codec->core.regmap, CS48L32_IRQ1_MASK_9,
+ CS48L32_DSP1_IRQ0_EINT1_MASK);
+
+ return 0;
+}
+
+static void cs48l32_component_remove(struct snd_soc_component *component)
+{
+ struct cs48l32_codec *cs48l32_codec = snd_soc_component_get_drvdata(component);
+
+ /* Mask DSP IRQs */
+ regmap_set_bits(cs48l32_codec->core.regmap, CS48L32_IRQ1_MASK_7,
+ CS48L32_DSP1_MPU_ERR_EINT1_MASK | CS48L32_DSP1_WDT_EXPIRE_EINT1_MASK);
+ regmap_set_bits(cs48l32_codec->core.regmap, CS48L32_IRQ1_MASK_9,
+ CS48L32_DSP1_IRQ0_EINT1_MASK);
+
+ wm_adsp2_component_remove(&cs48l32_codec->dsp, component);
+}
+
+static const struct snd_soc_component_driver cs48l32_soc_component_drv = {
+ .probe = &cs48l32_component_probe,
+ .remove = &cs48l32_component_remove,
+ .set_sysclk = &cs48l32_set_sysclk,
+ .set_pll = &cs48l32_set_fll,
+ .name = "cs48l32-codec",
+ .compress_ops = &cs48l32_compress_ops,
+ .controls = cs48l32_snd_controls,
+ .num_controls = ARRAY_SIZE(cs48l32_snd_controls),
+ .dapm_widgets = cs48l32_dapm_widgets,
+ .num_dapm_widgets = ARRAY_SIZE(cs48l32_dapm_widgets),
+ .dapm_routes = cs48l32_dapm_routes,
+ .num_dapm_routes = ARRAY_SIZE(cs48l32_dapm_routes),
+ .use_pmdown_time = 1,
+ .endianness = 1,
+};
+
+static int cs48l32_prop_read_u32_array(struct cs48l32_codec *cs48l32_codec,
+ const char *propname,
+ u32 *dest,
+ int n_max)
+{
+ struct cs48l32 *cs48l32 = &cs48l32_codec->core;
+ int ret;
+
+ ret = device_property_read_u32_array(cs48l32->dev, propname, dest, n_max);
+ if (ret == -EINVAL)
+ return -ENOENT;
+
+ if (ret < 0)
+ return dev_err_probe(cs48l32->dev, ret, "%s malformed\n", propname);
+
+ return 0;
+}
+
+static void cs48l32_prop_get_in_type(struct cs48l32_codec *cs48l32_codec)
+{
+ const char *propname = "cirrus,in-type";
+ u32 tmp[CS48L32_MAX_ANALOG_INPUT * CS48L32_MAX_IN_MUX_WAYS];
+ int i, in_idx, mux_way_idx, ret;
+
+ static_assert(ARRAY_SIZE(tmp) ==
+ ARRAY_SIZE(cs48l32_codec->in_type) * ARRAY_SIZE(cs48l32_codec->in_type[0]));
+
+ ret = cs48l32_prop_read_u32_array(cs48l32_codec, propname, tmp, ARRAY_SIZE(tmp));
+ if (ret < 0)
+ return;
+
+ in_idx = 0;
+ mux_way_idx = 0;
+ for (i = 0; i < ARRAY_SIZE(tmp); ++i) {
+ switch (tmp[i]) {
+ case CS48L32_IN_TYPE_DIFF:
+ case CS48L32_IN_TYPE_SE:
+ cs48l32_codec->in_type[in_idx][mux_way_idx] = tmp[i];
+ break;
+ default:
+ dev_warn(cs48l32_codec->core.dev, "Illegal %s value %d ignored\n",
+ propname, tmp[i]);
+ break;
+ }
+
+ /*
+ * Property array is [mux_way][in_channel]. Swap to
+ * [in_channel][mux_way] for convenience.
+ */
+ if (++in_idx == ARRAY_SIZE(cs48l32_codec->in_type)) {
+ in_idx = 0;
+ ++mux_way_idx;
+ }
+ }
+}
+
+static void cs48l32_prop_get_pdm_sup(struct cs48l32_codec *cs48l32_codec)
+{
+ const char *propname = "cirrus,pdm-sup";
+ u32 tmp[CS48L32_MAX_ANALOG_INPUT];
+ int i;
+
+ static_assert(ARRAY_SIZE(tmp) == ARRAY_SIZE(cs48l32_codec->pdm_sup));
+
+ cs48l32_prop_read_u32_array(cs48l32_codec, propname, tmp, ARRAY_SIZE(tmp));
+
+ for (i = 0; i < ARRAY_SIZE(cs48l32_codec->pdm_sup); i++) {
+ switch (tmp[i]) {
+ case CS48L32_PDM_SUP_VOUT_MIC:
+ case CS48L32_PDM_SUP_MICBIAS1:
+ cs48l32_codec->pdm_sup[i] = tmp[i];
+ break;
+ default:
+ dev_warn(cs48l32_codec->core.dev, "Illegal %s value %d ignored\n",
+ propname, cs48l32_codec->pdm_sup[i]);
+ break;
+ }
+ }
+}
+
+static void cs48l32_handle_properties(struct cs48l32_codec *cs48l32_codec)
+{
+ cs48l32_prop_get_in_type(cs48l32_codec);
+ cs48l32_prop_get_pdm_sup(cs48l32_codec);
+}
+
+static int cs48l32_request_interrupt(struct cs48l32_codec *cs48l32_codec)
+{
+ int irq = cs48l32_codec->core.irq;
+ int ret;
+
+ if (irq < 1)
+ return 0;
+
+ /*
+ * Don't use devm because this must be freed before destroying the
+ * rest of the driver
+ */
+ ret = request_threaded_irq(irq, NULL, cs48l32_irq,
+ IRQF_ONESHOT | IRQF_SHARED | IRQF_TRIGGER_LOW,
+ "cs48l32", cs48l32_codec);
+ if (ret)
+ return dev_err_probe(cs48l32_codec->core.dev, ret, "Failed to get IRQ\n");
+
+ return 0;
+}
+
+static int cs48l32_create_codec_component(struct cs48l32_codec *cs48l32_codec)
+{
+ struct wm_adsp *dsp;
+ int ret;
+
+ ASSERT_STRUCT_OFFSET(struct cs48l32_codec, dsp, 0);
+ static_assert(ARRAY_SIZE(cs48l32_dai) == ARRAY_SIZE(cs48l32_codec->dai));
+
+ cs48l32_handle_properties(cs48l32_codec);
+
+ dsp = &cs48l32_codec->dsp;
+ dsp->part = "cs48l32";
+ dsp->cs_dsp.num = 1;
+ dsp->cs_dsp.type = WMFW_HALO;
+ dsp->cs_dsp.rev = 0;
+ dsp->cs_dsp.dev = cs48l32_codec->core.dev;
+ dsp->cs_dsp.regmap = cs48l32_codec->core.regmap;
+ dsp->cs_dsp.base = CS48L32_DSP1_CLOCK_FREQ;
+ dsp->cs_dsp.base_sysinfo = CS48L32_DSP1_SYS_INFO_ID;
+ dsp->cs_dsp.mem = cs48l32_dsp1_regions;
+ dsp->cs_dsp.num_mems = ARRAY_SIZE(cs48l32_dsp1_regions);
+ dsp->pre_run = cs48l32_dsp_pre_run;
+
+ ret = wm_halo_init(dsp);
+ if (ret != 0)
+ return ret;
+
+ cs48l32_codec->fll.codec = cs48l32_codec;
+ cs48l32_codec->fll.id = 1;
+ cs48l32_codec->fll.base = CS48L32_FLL1_CONTROL1;
+ cs48l32_codec->fll.sts_addr = CS48L32_IRQ1_STS_6;
+ cs48l32_codec->fll.sts_mask = CS48L32_FLL1_LOCK_STS1_MASK;
+ cs48l32_init_fll(&cs48l32_codec->fll);
+
+ ret = cs48l32_request_interrupt(cs48l32_codec);
+ if (ret)
+ goto err_dsp;
+
+ ret = devm_snd_soc_register_component(cs48l32_codec->core.dev,
+ &cs48l32_soc_component_drv,
+ cs48l32_dai,
+ ARRAY_SIZE(cs48l32_dai));
+ if (ret < 0) {
+ dev_err_probe(cs48l32_codec->core.dev, ret, "Failed to register component\n");
+ goto err_dsp;
+ }
+
+ return 0;
+
+err_dsp:
+ wm_adsp2_remove(&cs48l32_codec->dsp);
+
+ return ret;
+}
+
+static int cs48l32_wait_for_boot(struct cs48l32 *cs48l32)
+{
+ unsigned int val;
+ int ret;
+
+ ret = regmap_read_poll_timeout(cs48l32->regmap, CS48L32_IRQ1_EINT_2, val,
+ ((val < 0xffffffff) && (val & CS48L32_BOOT_DONE_EINT1_MASK)),
+ 1000, CS48L32_BOOT_TIMEOUT_US);
+ if (ret) {
+ dev_err(cs48l32->dev, "BOOT_DONE timed out\n");
+ return -ETIMEDOUT;
+ }
+
+ ret = regmap_read(cs48l32->regmap, CS48L32_MCU_CTRL1, &val);
+ if (ret) {
+ dev_err(cs48l32->dev, "Failed to read MCU_CTRL1: %d\n", ret);
+ return ret;
+ }
+
+ if (val & BIT(CS48L32_MCU_STS_SHIFT)) {
+ dev_err(cs48l32->dev, "MCU boot failed\n");
+ return -EIO;
+ }
+
+ pm_runtime_mark_last_busy(cs48l32->dev);
+
+ return 0;
+}
+
+static int cs48l32_soft_reset(struct cs48l32 *cs48l32)
+{
+ int ret;
+
+ ret = regmap_write(cs48l32->regmap, CS48L32_SFT_RESET, CS48L32_SFT_RESET_MAGIC);
+ if (ret != 0) {
+ dev_err(cs48l32->dev, "Failed to write soft reset: %d\n", ret);
+ return ret;
+ }
+
+ usleep_range(CS48L32_SOFT_RESET_US, CS48L32_SOFT_RESET_US + 1000);
+
+ return 0;
+}
+
+static void cs48l32_enable_hard_reset(struct cs48l32 *cs48l32)
+{
+ if (cs48l32->reset_gpio)
+ gpiod_set_raw_value_cansleep(cs48l32->reset_gpio, 0);
+}
+
+static void cs48l32_disable_hard_reset(struct cs48l32 *cs48l32)
+{
+ if (cs48l32->reset_gpio) {
+ gpiod_set_raw_value_cansleep(cs48l32->reset_gpio, 1);
+ usleep_range(CS48L32_HARD_RESET_MIN_US, CS48L32_HARD_RESET_MIN_US + 1000);
+ }
+}
+
+static int cs48l32_runtime_resume(struct device *dev)
+{
+ struct cs48l32_codec *cs48l32_codec = dev_get_drvdata(dev);
+ struct cs48l32 *cs48l32 = &cs48l32_codec->core;
+ unsigned int val;
+ int ret;
+
+ ret = regulator_enable(cs48l32->vdd_d);
+ if (ret) {
+ dev_err(cs48l32->dev, "Failed to enable VDD_D: %d\n", ret);
+ return ret;
+ }
+
+ usleep_range(CS48L32_SOFT_RESET_US, CS48L32_SOFT_RESET_US + 1000);
+
+ regcache_cache_only(cs48l32->regmap, false);
+
+ ret = cs48l32_wait_for_boot(cs48l32);
+ if (ret)
+ goto err;
+
+ /* Check whether registers reset during suspend */
+ regmap_read(cs48l32->regmap, CS48L32_CTRL_IF_DEBUG3, &val);
+ if (!val)
+ regcache_mark_dirty(cs48l32->regmap);
+ else
+ dev_dbg(cs48l32->dev, "Did not reset during suspend\n");
+
+ ret = regcache_sync(cs48l32->regmap);
+ if (ret) {
+ dev_err(cs48l32->dev, "Failed to restore register cache\n");
+ goto err;
+ }
+
+ return 0;
+
+err:
+ regcache_cache_only(cs48l32->regmap, true);
+ regulator_disable(cs48l32->vdd_d);
+
+ return ret;
+}
+
+static int cs48l32_runtime_suspend(struct device *dev)
+{
+ struct cs48l32_codec *cs48l32_codec = dev_get_drvdata(dev);
+ struct cs48l32 *cs48l32 = &cs48l32_codec->core;
+
+ /* Flag to detect if the registers reset during suspend */
+ regmap_write(cs48l32->regmap, CS48L32_CTRL_IF_DEBUG3, 1);
+
+ regcache_cache_only(cs48l32->regmap, true);
+ regulator_disable(cs48l32->vdd_d);
+
+ return 0;
+}
+
+static const struct dev_pm_ops cs48l32_pm_ops = {
+ RUNTIME_PM_OPS(cs48l32_runtime_suspend, cs48l32_runtime_resume, NULL)
+};
+
+static int cs48l32_configure_clk32k(struct cs48l32 *cs48l32)
+{
+ int ret = 0;
+
+ ret = clk_prepare_enable(cs48l32->mclk1);
+ if (ret)
+ return dev_err_probe(cs48l32->dev, ret, "Failed to enable 32k clock\n");
+
+ ret = regmap_update_bits(cs48l32->regmap, CS48L32_CLOCK32K,
+ CS48L32_CLK_32K_EN_MASK | CS48L32_CLK_32K_SRC_MASK,
+ CS48L32_CLK_32K_EN_MASK | CS48L32_32K_MCLK1);
+ if (ret) {
+ clk_disable_unprepare(cs48l32->mclk1);
+ return dev_err_probe(cs48l32->dev, ret, "Failed to init 32k clock\n");
+ }
+
+ return 0;
+}
+
+static int cs48l32_get_clocks(struct cs48l32 *cs48l32)
+{
+ cs48l32->mclk1 = devm_clk_get_optional(cs48l32->dev, "mclk1");
+ if (IS_ERR(cs48l32->mclk1))
+ return dev_err_probe(cs48l32->dev, PTR_ERR(cs48l32->mclk1),
+ "Failed to get mclk1\n");
+
+ return 0;
+}
+
+static int cs48l32_get_reset_gpio(struct cs48l32 *cs48l32)
+{
+ struct gpio_desc *reset;
+
+ reset = devm_gpiod_get_optional(cs48l32->dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(reset))
+ return dev_err_probe(cs48l32->dev, PTR_ERR(reset), "Failed to request /RESET\n");
+
+ /* ACPI can override the GPIOD_OUT_LOW so ensure it starts low */
+ gpiod_set_raw_value_cansleep(reset, 0);
+
+ cs48l32->reset_gpio = reset;
+
+ return 0;
+}
+
+static int cs48l32_spi_probe(struct spi_device *spi)
+{
+ struct device *dev = &spi->dev;
+ struct cs48l32_codec *cs48l32_codec;
+ struct cs48l32 *cs48l32;
+ unsigned int hwid, rev, otp_rev;
+ int i, ret;
+
+ cs48l32_codec = devm_kzalloc(&spi->dev, sizeof(*cs48l32_codec), GFP_KERNEL);
+ if (!cs48l32_codec)
+ return -ENOMEM;
+
+ cs48l32 = &cs48l32_codec->core;
+ cs48l32->dev = dev;
+ cs48l32->irq = spi->irq;
+ mutex_init(&cs48l32_codec->rate_lock);
+ cs48l32_codec->in_vu_reg = CS48L32_INPUT_CONTROL3;
+
+ dev_set_drvdata(cs48l32->dev, cs48l32_codec);
+
+ ret = cs48l32_create_regmap(spi, cs48l32);
+ if (ret)
+ return dev_err_probe(&spi->dev, ret, "Failed to allocate regmap\n");
+
+ regcache_cache_only(cs48l32->regmap, true);
+
+ ret = cs48l32_get_reset_gpio(cs48l32);
+ if (ret)
+ return ret;
+
+ ret = cs48l32_get_clocks(cs48l32);
+ if (ret)
+ return ret;
+
+ static_assert(ARRAY_SIZE(cs48l32_core_supplies) == ARRAY_SIZE(cs48l32->core_supplies));
+ for (i = 0; i < ARRAY_SIZE(cs48l32->core_supplies); i++)
+ cs48l32->core_supplies[i].supply = cs48l32_core_supplies[i];
+
+ ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(cs48l32->core_supplies),
+ cs48l32->core_supplies);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to request core supplies\n");
+
+ cs48l32->vdd_d = devm_regulator_get(cs48l32->dev, "vdd-d");
+ if (IS_ERR(cs48l32->vdd_d))
+ return dev_err_probe(dev, PTR_ERR(cs48l32->vdd_d), "Failed to request vdd-d\n");
+
+ ret = regulator_bulk_enable(ARRAY_SIZE(cs48l32->core_supplies), cs48l32->core_supplies);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to enable core supplies\n");
+
+ ret = regulator_enable(cs48l32->vdd_d);
+ if (ret) {
+ dev_err(dev, "Failed to enable vdd-d: %d\n", ret);
+ goto err_enable;
+ }
+
+ cs48l32_disable_hard_reset(cs48l32);
+
+ regcache_cache_only(cs48l32->regmap, false);
+
+ /* If we don't have a reset GPIO use a soft reset */
+ if (!cs48l32->reset_gpio) {
+ ret = cs48l32_soft_reset(cs48l32);
+ if (ret)
+ goto err_reset;
+ }
+
+ ret = cs48l32_wait_for_boot(cs48l32);
+ if (ret) {
+ dev_err(cs48l32->dev, "Device failed initial boot: %d\n", ret);
+ goto err_reset;
+ }
+
+ ret = regmap_read(cs48l32->regmap, CS48L32_DEVID, &hwid);
+ if (ret) {
+ dev_err(dev, "Failed to read ID register: %d\n", ret);
+ goto err_reset;
+ }
+ hwid &= CS48L32_DEVID_MASK;
+
+ switch (hwid) {
+ case CS48L32_SILICON_ID:
+ break;
+ default:
+ ret = -ENODEV;
+ dev_err_probe(cs48l32->dev, ret, "Unknown device ID: %#x\n", hwid);
+ goto err_reset;
+ }
+
+ ret = regmap_read(cs48l32->regmap, CS48L32_REVID, &rev);
+ if (ret) {
+ dev_err(dev, "Failed to read revision register: %d\n", ret);
+ goto err_reset;
+ }
+ rev &= CS48L32_AREVID_MASK | CS48L32_MTLREVID_MASK;
+
+ ret = regmap_read(cs48l32->regmap, CS48L32_OTPID, &otp_rev);
+ if (ret) {
+ dev_err(dev, "Failed to read OTP revision register: %d\n", ret);
+ goto err_reset;
+ }
+ otp_rev &= CS48L32_OTPID_MASK;
+
+ dev_info(dev, "CS48L%x revision %X%u OTP%u\n", hwid & 0xff,
+ rev >> CS48L32_AREVID_SHIFT, rev & CS48L32_MTLREVID_MASK, otp_rev);
+
+ /* Apply hardware patch */
+ ret = cs48l32_apply_patch(cs48l32);
+ if (ret) {
+ dev_err(cs48l32->dev, "Failed to apply patch %d\n", ret);
+ goto err_reset;
+ }
+
+ /* BOOT_DONE interrupt is unmasked by default, so mask it */
+ ret = regmap_set_bits(cs48l32->regmap, CS48L32_IRQ1_MASK_2, CS48L32_BOOT_DONE_EINT1_MASK);
+
+ ret = cs48l32_configure_clk32k(cs48l32);
+ if (ret)
+ goto err_reset;
+
+ pm_runtime_set_active(cs48l32->dev);
+ pm_runtime_set_autosuspend_delay(cs48l32->dev, 100);
+ pm_runtime_use_autosuspend(cs48l32->dev);
+ pm_runtime_enable(cs48l32->dev);
+
+ ret = cs48l32_create_codec_component(cs48l32_codec);
+ if (ret)
+ goto err_clk32k;
+
+ return 0;
+
+err_clk32k:
+ clk_disable_unprepare(cs48l32->mclk1);
+err_reset:
+ cs48l32_enable_hard_reset(cs48l32);
+ regulator_disable(cs48l32->vdd_d);
+err_enable:
+ regulator_bulk_disable(ARRAY_SIZE(cs48l32->core_supplies), cs48l32->core_supplies);
+
+ return ret;
+}
+
+static void cs48l32_spi_remove(struct spi_device *spi)
+{
+ struct cs48l32_codec *cs48l32_codec = spi_get_drvdata(spi);
+ struct cs48l32 *cs48l32 = &cs48l32_codec->core;
+
+ /* Remove IRQ handler before destroying anything else */
+ if (cs48l32->irq >= 1)
+ free_irq(cs48l32->irq, cs48l32_codec);
+
+ pm_runtime_disable(cs48l32->dev);
+ regulator_disable(cs48l32->vdd_d);
+ clk_disable_unprepare(cs48l32->mclk1);
+ cs48l32_enable_hard_reset(cs48l32);
+ regulator_bulk_disable(ARRAY_SIZE(cs48l32->core_supplies), cs48l32->core_supplies);
+
+ mutex_destroy(&cs48l32_codec->rate_lock);
+}
+
+static const struct of_device_id cs48l32_of_match[] = {
+ { .compatible = "cirrus,cs48l32", },
+ {},
+};
+
+static const struct spi_device_id cs48l32_spi_ids[] = {
+ { "cs48l32", },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, cs48l32_spi_ids);
+
+static struct spi_driver cs48l32_spi_driver = {
+ .driver = {
+ .name = "cs48l32",
+ .pm = pm_ptr(&cs48l32_pm_ops),
+ .of_match_table = cs48l32_of_match,
+ },
+ .probe = &cs48l32_spi_probe,
+ .remove = &cs48l32_spi_remove,
+ .id_table = cs48l32_spi_ids,
+};
+module_spi_driver(cs48l32_spi_driver);
+
+MODULE_DESCRIPTION("CS48L32 ASoC codec driver");
+MODULE_AUTHOR("Stuart Henderson <stuarth@opensource.cirrus.com>");
+MODULE_AUTHOR("Piotr Stankiewicz <piotrs@opensource.cirrus.com>");
+MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/cs48l32.h b/sound/soc/codecs/cs48l32.h
new file mode 100644
index 000000000000..c1b4e13feae4
--- /dev/null
+++ b/sound/soc/codecs/cs48l32.h
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Cirrus Logic CS48L32 audio DSP.
+ *
+ * Copyright (C) 2016-2018, 2020, 2022, 2025 Cirrus Logic, Inc. and
+ * Cirrus Logic International Semiconductor Ltd.
+ */
+#ifndef SND_SOC_CS48L32_H
+#define SND_SOC_CS48L32_H
+
+#include <linux/bits.h>
+#include <sound/soc.h>
+#include "wm_adsp.h"
+
+#define CS48L32_SILICON_ID 0x48a32
+
+#define CS48L32_32K_MCLK1 0
+
+#define CS48L32_SFT_RESET_MAGIC 0x5a000000
+#define CS48L32_SOFT_RESET_US 2000
+#define CS48L32_HARD_RESET_MIN_US 1000
+
+#define CS48L32_SEEN_BOOT_DONE BIT(0)
+#define CS48L32_BOOT_TIMEOUT_US 25000
+
+#define CS48L32_ASP_ENABLES1 0x00
+#define CS48L32_ASP_CONTROL1 0x04
+#define CS48L32_ASP_CONTROL2 0x08
+#define CS48L32_ASP_CONTROL3 0x0c
+#define CS48L32_ASP_FRAME_CONTROL1 0x10
+#define CS48L32_ASP_FRAME_CONTROL2 0x14
+#define CS48L32_ASP_FRAME_CONTROL5 0x20
+#define CS48L32_ASP_FRAME_CONTROL6 0x24
+#define CS48L32_ASP_DATA_CONTROL1 0x30
+#define CS48L32_ASP_DATA_CONTROL5 0x40
+#define CS48L32_SYSCLK_RATE_6MHZ 0
+#define CS48L32_SYSCLK_RATE_12MHZ 1
+#define CS48L32_SYSCLK_RATE_24MHZ 2
+#define CS48L32_SYSCLK_RATE_49MHZ 3
+#define CS48L32_SYSCLK_RATE_98MHZ 4
+#define CS48L32_FLLHJ_INT_MAX_N 1023
+#define CS48L32_FLLHJ_INT_MIN_N 1
+#define CS48L32_FLLHJ_FRAC_MAX_N 255
+#define CS48L32_FLLHJ_FRAC_MIN_N 2
+#define CS48L32_FLLHJ_LP_INT_MODE_THRESH 100000
+#define CS48L32_FLLHJ_LOW_THRESH 192000
+#define CS48L32_FLLHJ_MID_THRESH 1152000
+#define CS48L32_FLLHJ_MAX_THRESH 13000000
+#define CS48L32_FLLHJ_LOW_GAINS 0x23f0
+#define CS48L32_FLLHJ_MID_GAINS 0x22f2
+#define CS48L32_FLLHJ_HIGH_GAINS 0x21f0
+#define CS48L32_FLL_MAX_FOUT 50000000
+#define CS48L32_FLL_MAX_REFDIV 8
+#define CS48L32_FLL_CONTROL1_OFFS 0x00
+#define CS48L32_FLL_CONTROL2_OFFS 0x04
+#define CS48L32_FLL_CONTROL3_OFFS 0x08
+#define CS48L32_FLL_CONTROL4_OFFS 0x0c
+#define CS48L32_FLL_CONTROL5_OFFS 0x10
+#define CS48L32_FLL_CONTROL6_OFFS 0x14
+#define CS48L32_FLL_DIGITAL_TEST2_OFFS 0x34
+#define CS48L32_FLL_GPIO_CLOCK_OFFS 0xa0
+#define CS48L32_DSP_CLOCK_FREQ_OFFS 0x00000
+#define CS48L32_ASP_FMT_DSP_MODE_A 0
+#define CS48L32_ASP_FMT_DSP_MODE_B 1
+#define CS48L32_ASP_FMT_I2S_MODE 2
+#define CS48L32_ASP_FMT_LEFT_JUSTIFIED_MODE 3
+#define CS48L32_HALO_SAMPLE_RATE_RX1 0x00080
+#define CS48L32_HALO_SAMPLE_RATE_TX1 0x00280
+#define CS48L32_HALO_DSP_RATE_MASK 0x1f
+
+#define CS48L32_PDMCLK_SRC_IN1_PDMCLK 0x0
+#define CS48L32_PDMCLK_SRC_IN2_PDMCLK 0x1
+#define CS48L32_PDMCLK_SRC_IN3_PDMCLK 0x2
+#define CS48L32_PDMCLK_SRC_IN4_PDMCLK 0x3
+#define CS48L32_PDMCLK_SRC_AUXPDM1_CLK 0x8
+#define CS48L32_PDMCLK_SRC_AUXPDM2_CLK 0x9
+
+#define CS48L32_MAX_DAI 6
+#define CS48L32_MAX_INPUT 4
+#define CS48L32_MAX_ANALOG_INPUT 2
+#define CS48L32_MAX_IN_MUX_WAYS 2
+#define CS48L32_MAX_ASP 2
+
+#define CS48L32_EQ_BLOCK_SZ 60
+#define CS48L32_N_EQ_BLOCKS 4
+
+#define CS48L32_DSP_N_RX_CHANNELS 8
+#define CS48L32_DSP_N_TX_CHANNELS 8
+
+#define CS48L32_LHPF_MAX_COEFF 4095
+#define CS48L32_EQ_MAX_COEFF 4095
+
+#define CS48L32_MIXER_CONTROLS(name, base) \
+ SOC_SINGLE_RANGE_TLV(name " Input 1 Volume", base, \
+ CS48L32_MIXER_VOL_SHIFT, 0x20, 0x50, 0, \
+ cs48l32_mixer_tlv), \
+ SOC_SINGLE_RANGE_TLV(name " Input 2 Volume", base + 4, \
+ CS48L32_MIXER_VOL_SHIFT, 0x20, 0x50, 0, \
+ cs48l32_mixer_tlv), \
+ SOC_SINGLE_RANGE_TLV(name " Input 3 Volume", base + 8, \
+ CS48L32_MIXER_VOL_SHIFT, 0x20, 0x50, 0, \
+ cs48l32_mixer_tlv), \
+ SOC_SINGLE_RANGE_TLV(name " Input 4 Volume", base + 12, \
+ CS48L32_MIXER_VOL_SHIFT, 0x20, 0x50, 0, \
+ cs48l32_mixer_tlv)
+
+#define CS48L32_MUX_ENUM_DECL(name, reg) \
+ SOC_VALUE_ENUM_SINGLE_DECL( \
+ name, reg, 0, CS48L32_MIXER_SRC_MASK, \
+ cs48l32_mixer_texts, cs48l32_mixer_values)
+
+#define CS48L32_MUX_CTL_DECL(name) \
+ const struct snd_kcontrol_new name##_mux = SOC_DAPM_ENUM("Route", name##_enum)
+
+#define CS48L32_MUX_ENUMS(name, base_reg) \
+ static CS48L32_MUX_ENUM_DECL(name##_enum, base_reg); \
+ static CS48L32_MUX_CTL_DECL(name)
+
+#define CS48L32_MIXER_ENUMS(name, base_reg) \
+ CS48L32_MUX_ENUMS(name##_in1, base_reg); \
+ CS48L32_MUX_ENUMS(name##_in2, base_reg + 4); \
+ CS48L32_MUX_ENUMS(name##_in3, base_reg + 8); \
+ CS48L32_MUX_ENUMS(name##_in4, base_reg + 12)
+
+#define CS48L32_MUX(name, ctrl) SND_SOC_DAPM_MUX(name, SND_SOC_NOPM, 0, 0, ctrl)
+
+#define CS48L32_MUX_WIDGETS(name, name_str) CS48L32_MUX(name_str " Input 1", &name##_mux)
+
+#define CS48L32_MIXER_WIDGETS(name, name_str) \
+ CS48L32_MUX(name_str " Input 1", &name##_in1_mux), \
+ CS48L32_MUX(name_str " Input 2", &name##_in2_mux), \
+ CS48L32_MUX(name_str " Input 3", &name##_in3_mux), \
+ CS48L32_MUX(name_str " Input 4", &name##_in4_mux), \
+ SND_SOC_DAPM_MIXER(name_str " Mixer", SND_SOC_NOPM, 0, 0, NULL, 0)
+
+#define CS48L32_MUX_ROUTES(widget, name) \
+ { widget, NULL, name " Input 1" }, \
+ CS48L32_MIXER_INPUT_ROUTES(name " Input 1")
+
+#define CS48L32_MIXER_ROUTES(widget, name) \
+ { widget, NULL, name " Mixer" }, \
+ { name " Mixer", NULL, name " Input 1" }, \
+ { name " Mixer", NULL, name " Input 2" }, \
+ { name " Mixer", NULL, name " Input 3" }, \
+ { name " Mixer", NULL, name " Input 4" }, \
+ CS48L32_MIXER_INPUT_ROUTES(name " Input 1"), \
+ CS48L32_MIXER_INPUT_ROUTES(name " Input 2"), \
+ CS48L32_MIXER_INPUT_ROUTES(name " Input 3"), \
+ CS48L32_MIXER_INPUT_ROUTES(name " Input 4")
+
+#define CS48L32_DSP_ROUTES_1_8_SYSCLK(name) \
+ { name, NULL, name " Preloader" }, \
+ { name, NULL, "SYSCLK" }, \
+ { name " Preload", NULL, name " Preloader" }, \
+ CS48L32_MIXER_ROUTES(name, name "RX1"), \
+ CS48L32_MIXER_ROUTES(name, name "RX2"), \
+ CS48L32_MIXER_ROUTES(name, name "RX3"), \
+ CS48L32_MIXER_ROUTES(name, name "RX4"), \
+ CS48L32_MIXER_ROUTES(name, name "RX5"), \
+ CS48L32_MIXER_ROUTES(name, name "RX6"), \
+ CS48L32_MIXER_ROUTES(name, name "RX7"), \
+ CS48L32_MIXER_ROUTES(name, name "RX8") \
+
+#define CS48L32_DSP_ROUTES_1_8(name) \
+ { name, NULL, "DSPCLK" }, \
+ CS48L32_DSP_ROUTES_1_8_SYSCLK(name) \
+
+#define CS48L32_RATE_CONTROL(name, domain) SOC_ENUM(name, cs48l32_sample_rate[(domain) - 1])
+
+#define CS48L32_RATE_ENUM(name, enum) \
+ SOC_ENUM_EXT(name, enum, snd_soc_get_enum_double, cs48l32_rate_put)
+
+#define CS48L32_DSP_RATE_CONTROL(name, num) \
+ SOC_ENUM_EXT(name " Rate", cs48l32_dsp_rate_enum[num], \
+ cs48l32_dsp_rate_get, cs48l32_dsp_rate_put)
+
+#define CS48L32_EQ_COEFF_CONTROL(xname, xreg, xbase, xshift) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
+ .info = cs48l32_eq_coeff_info, .get = cs48l32_eq_coeff_get, \
+ .put = cs48l32_eq_coeff_put, .private_value = \
+ (unsigned long)&(struct cs48l32_eq_control) { .reg = xreg,\
+ .shift = xshift, .block_base = xbase, .max = 65535 } }
+
+#define CS48L32_EQ_REG_NAME_PASTER(eq, band, type) \
+ CS48L32_ ## eq ## _ ## band ## _ ## type
+
+#define CS48L32_EQ_BAND_COEFF_CONTROLS(name, band) \
+ CS48L32_EQ_COEFF_CONTROL(#name " " #band " A", \
+ CS48L32_EQ_REG_NAME_PASTER(name, band, COEFF1), \
+ CS48L32_EQ_REG_NAME_PASTER(name, BAND1, COEFF1), \
+ 0), \
+ CS48L32_EQ_COEFF_CONTROL(#name " " #band " B", \
+ CS48L32_EQ_REG_NAME_PASTER(name, band, COEFF1), \
+ CS48L32_EQ_REG_NAME_PASTER(name, BAND1, COEFF1), \
+ 16), \
+ CS48L32_EQ_COEFF_CONTROL(#name " " #band " C", \
+ CS48L32_EQ_REG_NAME_PASTER(name, band, COEFF2), \
+ CS48L32_EQ_REG_NAME_PASTER(name, BAND1, COEFF1), \
+ 0), \
+ CS48L32_EQ_COEFF_CONTROL(#name " " #band " PG", \
+ CS48L32_EQ_REG_NAME_PASTER(name, band, PG), \
+ CS48L32_EQ_REG_NAME_PASTER(name, BAND1, COEFF1), \
+ 0)
+
+#define CS48L32_EQ_COEFF_CONTROLS(name) \
+ CS48L32_EQ_BAND_COEFF_CONTROLS(name, BAND1), \
+ CS48L32_EQ_BAND_COEFF_CONTROLS(name, BAND2), \
+ CS48L32_EQ_BAND_COEFF_CONTROLS(name, BAND3), \
+ CS48L32_EQ_BAND_COEFF_CONTROLS(name, BAND4), \
+ CS48L32_EQ_COEFF_CONTROL(#name " BAND5 A", \
+ CS48L32_EQ_REG_NAME_PASTER(name, BAND5, COEFF1), \
+ CS48L32_EQ_REG_NAME_PASTER(name, BAND1, COEFF1), \
+ 0), \
+ CS48L32_EQ_COEFF_CONTROL(#name " BAND5 B", \
+ CS48L32_EQ_REG_NAME_PASTER(name, BAND5, COEFF1), \
+ CS48L32_EQ_REG_NAME_PASTER(name, BAND1, COEFF1), \
+ 16), \
+ CS48L32_EQ_COEFF_CONTROL(#name " BAND5 PG", \
+ CS48L32_EQ_REG_NAME_PASTER(name, BAND5, PG), \
+ CS48L32_EQ_REG_NAME_PASTER(name, BAND1, COEFF1), \
+ 0)
+
+#define CS48L32_LHPF_CONTROL(xname, xbase) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
+ .info = snd_soc_bytes_info, .get = snd_soc_bytes_get, \
+ .put = cs48l32_lhpf_coeff_put, .private_value = \
+ ((unsigned long)&(struct soc_bytes) { .base = xbase, \
+ .num_regs = 1 }) }
+
+/* these have a subseq number so they run after SYSCLK and DSPCLK widgets */
+#define CS48L32_DSP_FREQ_WIDGET_EV(name, num, event) \
+ SND_SOC_DAPM_SUPPLY_S(name "FREQ", 100, SND_SOC_NOPM, num, 0, \
+ event, SND_SOC_DAPM_POST_PMU)
+
+#define CS48L32_RATES SNDRV_PCM_RATE_KNOT
+
+#define CS48L32_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \
+ SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE)
+
+#define CS48L32_MIXER_INPUT_ROUTES(name) \
+ { name, "Tone Generator 1", "Tone Generator 1" }, \
+ { name, "Tone Generator 2", "Tone Generator 2" }, \
+ { name, "Noise Generator", "Noise Generator" }, \
+ { name, "IN1L", "IN1L PGA" }, \
+ { name, "IN1R", "IN1R PGA" }, \
+ { name, "IN2L", "IN2L PGA" }, \
+ { name, "IN2R", "IN2R PGA" }, \
+ { name, "ASP1RX1", "ASP1RX1" }, \
+ { name, "ASP1RX2", "ASP1RX2" }, \
+ { name, "ASP1RX3", "ASP1RX3" }, \
+ { name, "ASP1RX4", "ASP1RX4" }, \
+ { name, "ASP1RX5", "ASP1RX5" }, \
+ { name, "ASP1RX6", "ASP1RX6" }, \
+ { name, "ASP1RX7", "ASP1RX7" }, \
+ { name, "ASP1RX8", "ASP1RX8" }, \
+ { name, "ASP2RX1", "ASP2RX1" }, \
+ { name, "ASP2RX2", "ASP2RX2" }, \
+ { name, "ASP2RX3", "ASP2RX3" }, \
+ { name, "ASP2RX4", "ASP2RX4" }, \
+ { name, "ISRC1DEC1", "ISRC1DEC1" }, \
+ { name, "ISRC1DEC2", "ISRC1DEC2" }, \
+ { name, "ISRC1DEC3", "ISRC1DEC3" }, \
+ { name, "ISRC1DEC4", "ISRC1DEC4" }, \
+ { name, "ISRC1INT1", "ISRC1INT1" }, \
+ { name, "ISRC1INT2", "ISRC1INT2" }, \
+ { name, "ISRC1INT3", "ISRC1INT3" }, \
+ { name, "ISRC1INT4", "ISRC1INT4" }, \
+ { name, "ISRC2DEC1", "ISRC2DEC1" }, \
+ { name, "ISRC2DEC2", "ISRC2DEC2" }, \
+ { name, "ISRC2INT1", "ISRC2INT1" }, \
+ { name, "ISRC2INT2", "ISRC2INT2" }, \
+ { name, "ISRC3DEC1", "ISRC3DEC1" }, \
+ { name, "ISRC3DEC2", "ISRC3DEC2" }, \
+ { name, "ISRC3INT1", "ISRC3INT1" }, \
+ { name, "ISRC3INT2", "ISRC3INT2" }, \
+ { name, "EQ1", "EQ1" }, \
+ { name, "EQ2", "EQ2" }, \
+ { name, "EQ3", "EQ3" }, \
+ { name, "EQ4", "EQ4" }, \
+ { name, "DRC1L", "DRC1L" }, \
+ { name, "DRC1R", "DRC1R" }, \
+ { name, "DRC2L", "DRC2L" }, \
+ { name, "DRC2R", "DRC2R" }, \
+ { name, "LHPF1", "LHPF1" }, \
+ { name, "LHPF2", "LHPF2" }, \
+ { name, "LHPF3", "LHPF3" }, \
+ { name, "LHPF4", "LHPF4" }, \
+ { name, "Ultrasonic 1", "Ultrasonic 1" }, \
+ { name, "Ultrasonic 2", "Ultrasonic 2" }, \
+ { name, "DSP1.1", "DSP1" }, \
+ { name, "DSP1.2", "DSP1" }, \
+ { name, "DSP1.3", "DSP1" }, \
+ { name, "DSP1.4", "DSP1" }, \
+ { name, "DSP1.5", "DSP1" }, \
+ { name, "DSP1.6", "DSP1" }, \
+ { name, "DSP1.7", "DSP1" }, \
+ { name, "DSP1.8", "DSP1" }
+
+struct cs48l32_enum {
+ struct soc_enum mixer_enum;
+ int val;
+};
+
+struct cs48l32_eq_control {
+ unsigned int reg;
+ unsigned int shift;
+ unsigned int block_base;
+ unsigned int max;
+};
+
+struct cs48l32_dai_priv {
+ int clk;
+ struct snd_pcm_hw_constraint_list constraint;
+};
+
+struct cs48l32_dsp_power_reg_block {
+ unsigned int start;
+ unsigned int end;
+};
+
+struct cs48l32_dsp_power_regs {
+ const unsigned int *pwd;
+ unsigned int n_pwd;
+ const struct cs48l32_dsp_power_reg_block *ext;
+ unsigned int n_ext;
+};
+
+struct cs48l32;
+struct cs48l32_codec;
+struct spi_device;
+
+struct cs48l32_fll_cfg {
+ int n;
+ unsigned int theta;
+ unsigned int lambda;
+ int refdiv;
+ int fratio;
+ int gain;
+ int alt_gain;
+};
+
+struct cs48l32_fll {
+ struct cs48l32_codec *codec;
+ int id;
+ unsigned int base;
+
+ unsigned int sts_addr;
+ unsigned int sts_mask;
+ unsigned int fout;
+ int ref_src;
+ unsigned int ref_freq;
+
+ struct cs48l32_fll_cfg ref_cfg;
+};
+
+struct cs48l32_codec {
+ struct wm_adsp dsp; /* must be first */
+ struct cs48l32 core;
+ int sysclk;
+ int dspclk;
+ struct cs48l32_dai_priv dai[CS48L32_MAX_DAI];
+ struct cs48l32_fll fll;
+
+ unsigned int in_up_pending;
+ unsigned int in_vu_reg;
+
+ struct mutex rate_lock;
+
+ u8 dsp_dma_rates[CS48L32_DSP_N_RX_CHANNELS + CS48L32_DSP_N_TX_CHANNELS];
+
+ u8 in_type[CS48L32_MAX_ANALOG_INPUT][CS48L32_MAX_IN_MUX_WAYS];
+ u8 pdm_sup[CS48L32_MAX_ANALOG_INPUT];
+ u8 tdm_width[CS48L32_MAX_ASP];
+ u8 tdm_slots[CS48L32_MAX_ASP];
+
+ unsigned int eq_mode[CS48L32_N_EQ_BLOCKS];
+ __be16 eq_coefficients[CS48L32_N_EQ_BLOCKS][CS48L32_EQ_BLOCK_SZ / 2];
+
+ const struct cs48l32_dsp_power_regs *dsp_power_regs;
+};
+
+#define cs48l32_fll_err(_fll, fmt, ...) \
+ dev_err(_fll->codec->core.dev, "FLL%d: " fmt, _fll->id, ##__VA_ARGS__)
+#define cs48l32_fll_warn(_fll, fmt, ...) \
+ dev_warn(_fll->codec->core.dev, "FLL%d: " fmt, _fll->id, ##__VA_ARGS__)
+#define cs48l32_fll_dbg(_fll, fmt, ...) \
+ dev_dbg(_fll->codec->core.dev, "FLL%d: " fmt, _fll->id, ##__VA_ARGS__)
+
+#define cs48l32_asp_err(_dai, fmt, ...) \
+ dev_err(_dai->component->dev, "ASP%d: " fmt, _dai->id, ##__VA_ARGS__)
+#define cs48l32_asp_warn(_dai, fmt, ...) \
+ dev_warn(_dai->component->dev, "ASP%d: " fmt, _dai->id, ##__VA_ARGS__)
+#define cs48l32_asp_dbg(_dai, fmt, ...) \
+ dev_dbg(_dai->component->dev, "ASP%d: " fmt, _dai->id, ##__VA_ARGS__)
+
+int cs48l32_apply_patch(struct cs48l32 *cs48l32);
+int cs48l32_create_regmap(struct spi_device *spi, struct cs48l32 *cs48l32);
+int cs48l32_enable_asp1_pins(struct cs48l32_codec *cs48l32_codec);
+int cs48l32_enable_asp2_pins(struct cs48l32_codec *cs48l32_codec);
+int cs48l32_micvdd_voltage_index(u32 voltage);
+int cs48l32_micbias1_voltage_index(u32 voltage);
+
+#endif
diff --git a/sound/soc/codecs/es8375.c b/sound/soc/codecs/es8375.c
new file mode 100644
index 000000000000..decc86c92427
--- /dev/null
+++ b/sound/soc/codecs/es8375.c
@@ -0,0 +1,793 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * es8375.c -- ES8375 ALSA SoC Audio Codec
+ *
+ * Copyright Everest Semiconductor Co., Ltd
+ *
+ * Authors: Michael Zhang (zhangyi@everest-semi.com)
+ */
+
+#include <linux/gpio/consumer.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/tlv.h>
+#include <sound/soc.h>
+#include <linux/acpi.h>
+#include "es8375.h"
+
+struct es8375_priv {
+ struct regmap *regmap;
+ struct clk *mclk;
+ struct regulator_bulk_data core_supply[2];
+ unsigned int mclk_freq;
+ int mastermode;
+ u8 mclk_src;
+ u8 vddd;
+ enum snd_soc_bias_level bias_level;
+};
+
+static const char * const es8375_core_supplies[] = {
+ "vddd",
+ "vdda",
+};
+
+static const DECLARE_TLV_DB_SCALE(es8375_adc_osr_gain_tlv, -3100, 100, 0);
+static const DECLARE_TLV_DB_SCALE(es8375_adc_volume_tlv, -9550, 50, 0);
+static const DECLARE_TLV_DB_SCALE(es8375_adc_automute_attn_tlv, 0, 100, 0);
+static const DECLARE_TLV_DB_SCALE(es8375_adc_dmic_volume_tlv, 0, 600, 0);
+static const DECLARE_TLV_DB_SCALE(es8375_dac_volume_tlv, -9550, 50, 0);
+static const DECLARE_TLV_DB_SCALE(es8375_dac_vppscale_tlv, -388, 12, 0);
+static const DECLARE_TLV_DB_SCALE(es8375_dac_automute_attn_tlv, 0, 400, 0);
+static const DECLARE_TLV_DB_SCALE(es8375_automute_ng_tlv, -9600, 600, 0);
+
+static const char *const es8375_ramprate_txt[] = {
+ "0.125dB/LRCK",
+ "0.125dB/2LRCK",
+ "0.125dB/4LRCK",
+ "0.125dB/8LRCK",
+ "0.125dB/16LRCK",
+ "0.125dB/32LRCK",
+ "0.125dB/64LRCK",
+ "0.125dB/128LRCK",
+ "disable softramp",
+};
+static SOC_ENUM_SINGLE_DECL(es8375_adc_ramprate, ES8375_ADC2,
+ ADC_RAMPRATE_SHIFT_0, es8375_ramprate_txt);
+static SOC_ENUM_SINGLE_DECL(es8375_dac_ramprate, ES8375_DAC2,
+ DAC_RAMPRATE_SHIFT_0, es8375_ramprate_txt);
+
+static const char *const es8375_automute_ws_txt[] = {
+ "256 samples",
+ "512 samples",
+ "1024 samples",
+ "2048 samples",
+ "4096 samples",
+ "8192 samples",
+ "16384 samples",
+ "32768 samples",
+};
+static SOC_ENUM_SINGLE_DECL(es8375_adc_automute_ws, ES8375_ADC_AUTOMUTE,
+ ADC_AUTOMUTE_WS_SHIFT_3, es8375_automute_ws_txt);
+static SOC_ENUM_SINGLE_DECL(es8375_dac_automute_ws, ES8375_DAC_AUTOMUTE,
+ DAC_AUTOMUTE_WS_SHIFT_5, es8375_automute_ws_txt);
+
+static const char *const es8375_dmic_pol_txt[] = {
+ "Low",
+ "High",
+};
+
+static SOC_ENUM_SINGLE_DECL(es8375_dmic_pol, ES8375_ADC1,
+ DMIC_POL_SHIFT_4, es8375_dmic_pol_txt);
+
+static const char *const es8375_adc_hpf_txt[] = {
+ "Freeze Offset",
+ "Dynamic HPF",
+};
+
+static SOC_ENUM_SINGLE_DECL(es8375_adc_hpf, ES8375_HPF1,
+ ADC_HPF_SHIFT_5, es8375_adc_hpf_txt);
+
+static const char *const es8375_dmic_mux_txt[] = {
+ "AMIC",
+ "DMIC",
+};
+static const struct soc_enum es8375_dmic_mux_enum =
+ SOC_ENUM_SINGLE(ES8375_ADC1, ADC_SRC_SHIFT_7,
+ ARRAY_SIZE(es8375_dmic_mux_txt), es8375_dmic_mux_txt);
+
+static const struct snd_kcontrol_new es8375_dmic_mux_controls =
+ SOC_DAPM_ENUM("ADC MUX", es8375_dmic_mux_enum);
+
+static const struct snd_kcontrol_new es8375_snd_controls[] = {
+ SOC_SINGLE_TLV("ADC OSR Volume", ES8375_ADC_OSR_GAIN,
+ ADC_OSR_GAIN_SHIFT_0, ES8375_ADC_OSR_GAIN_MAX, 0,
+ es8375_adc_osr_gain_tlv),
+ SOC_SINGLE("ADC Invert Switch", ES8375_ADC1, ADC_INV_SHIFT_6, 1, 0),
+ SOC_SINGLE("ADC RAM Clear", ES8375_ADC1, ADC_RAMCLR_SHIFT_5, 1, 0),
+ SOC_ENUM("DMIC Polarity", es8375_dmic_pol),
+ SOC_SINGLE_TLV("DMIC Volume", ES8375_ADC1,
+ DMIC_GAIN_SHIFT_2, ES8375_DMIC_GAIN_MAX,
+ 0, es8375_adc_dmic_volume_tlv),
+ SOC_ENUM("ADC Ramp Rate", es8375_adc_ramprate),
+ SOC_SINGLE_TLV("ADC Volume", ES8375_ADC_VOLUME,
+ ADC_VOLUME_SHIFT_0, ES8375_ADC_VOLUME_MAX,
+ 0, es8375_adc_volume_tlv),
+ SOC_SINGLE("ADC Automute Switch", ES8375_ADC_AUTOMUTE,
+ ADC_AUTOMUTE_SHIFT_7, 1, 0),
+ SOC_ENUM("ADC Automute Winsize", es8375_adc_automute_ws),
+ SOC_SINGLE_TLV("ADC Automute Noise Gate", ES8375_ADC_AUTOMUTE,
+ ADC_AUTOMUTE_NG_SHIFT_0, ES8375_AUTOMUTE_NG_MAX,
+ 0, es8375_automute_ng_tlv),
+ SOC_SINGLE_TLV("ADC Automute Volume", ES8375_ADC_AUTOMUTE_ATTN,
+ ADC_AUTOMUTE_ATTN_SHIFT_0, ES8375_ADC_AUTOMUTE_ATTN_MAX,
+ 0, es8375_adc_automute_attn_tlv),
+ SOC_ENUM("ADC HPF", es8375_adc_hpf),
+
+ SOC_SINGLE("DAC DSM Mute Switch", ES8375_DAC1, DAC_DSMMUTE_SHIFT_7, 1, 0),
+ SOC_SINGLE("DAC DEM Mute Switch", ES8375_DAC1, DAC_DEMMUTE_SHIFT_6, 1, 0),
+ SOC_SINGLE("DAC Invert Switch", ES8375_DAC1, DAC_INV_SHIFT_5, 1, 0),
+ SOC_SINGLE("DAC RAM Clear", ES8375_DAC1, DAC_RAMCLR_SHIFT_4, 1, 0),
+ SOC_ENUM("DAC Ramp Rate", es8375_dac_ramprate),
+ SOC_SINGLE_TLV("DAC Volume", ES8375_DAC_VOLUME,
+ DAC_VOLUME_SHIFT_0, ES8375_DAC_VOLUME_MAX,
+ 0, es8375_dac_volume_tlv),
+ SOC_SINGLE_TLV("DAC VPP Scale", ES8375_DAC_VPPSCALE,
+ DAC_VPPSCALE_SHIFT_0, ES8375_DAC_VPPSCALE_MAX,
+ 0, es8375_dac_vppscale_tlv),
+ SOC_SINGLE("DAC Automute Switch", ES8375_DAC_AUTOMUTE1,
+ DAC_AUTOMUTE_EN_SHIFT_7, 1, 0),
+ SOC_SINGLE_TLV("DAC Automute Noise Gate", ES8375_DAC_AUTOMUTE1,
+ DAC_AUTOMUTE_NG_SHIFT_0, ES8375_AUTOMUTE_NG_MAX,
+ 0, es8375_automute_ng_tlv),
+ SOC_ENUM("DAC Automute Winsize", es8375_dac_automute_ws),
+ SOC_SINGLE_TLV("DAC Automute Volume", ES8375_DAC_AUTOMUTE,
+ DAC_AUTOMUTE_ATTN_SHIFT_0, ES8375_DAC_AUTOMUTE_ATTN_MAX,
+ 0, es8375_dac_automute_attn_tlv),
+};
+
+static const struct snd_soc_dapm_widget es8375_dapm_widgets[] = {
+ SND_SOC_DAPM_INPUT("MIC1"),
+ SND_SOC_DAPM_INPUT("DMIC"),
+ SND_SOC_DAPM_PGA("PGA", SND_SOC_NOPM, 0, 0, NULL, 0),
+ SND_SOC_DAPM_ADC("Mono ADC", NULL, SND_SOC_NOPM, 0, 0),
+ SND_SOC_DAPM_AIF_OUT("AIF1TX", "AIF1 Capture", 0, ES8375_SDP2,
+ ES8375_ADC_P2S_MUTE_SHIFT_5, 1),
+
+ SND_SOC_DAPM_MUX("ADC MUX", SND_SOC_NOPM, 0, 0, &es8375_dmic_mux_controls),
+
+ SND_SOC_DAPM_AIF_IN("AIF1RX", "AIF1 Playback", 0, ES8375_SDP,
+ SND_SOC_NOPM, 0),
+ SND_SOC_DAPM_DAC("Mono DAC", NULL, SND_SOC_NOPM, 0, 0),
+ SND_SOC_DAPM_OUTPUT("OUT"),
+};
+
+static const struct snd_soc_dapm_route es8375_dapm_routes[] = {
+ {"ADC MUX", "AMIC", "MIC1"},
+ {"ADC MUX", "DMIC", "DMIC"},
+ {"PGA", NULL, "ADC MUX"},
+ {"Mono ADC", NULL, "PGA"},
+ {"AIF1TX", NULL, "Mono ADC"},
+
+ {"Mono DAC", NULL, "AIF1RX"},
+ {"OUT", NULL, "Mono DAC"},
+};
+
+struct _coeff_div {
+ u16 mclk_lrck_ratio;
+ u32 mclk;
+ u32 rate;
+ u8 Reg0x04;
+ u8 Reg0x05;
+ u8 Reg0x06;
+ u8 Reg0x07;
+ u8 Reg0x08;
+ u8 Reg0x09;
+ u8 Reg0x0A;
+ u8 Reg0x0B;
+ u8 Reg0x19;
+ u8 dvdd_vol;
+ u8 dmic_sel;
+};
+
+static const struct _coeff_div coeff_div[] = {
+ {32, 256000, 8000, 0x05, 0x34, 0xDD, 0x55, 0x1F, 0x00, 0x95, 0x00, 0x1F, 2, 2},
+ {32, 512000, 16000, 0x05, 0x34, 0xDD, 0x55, 0x1F, 0x00, 0x94, 0x00, 0x1F, 2, 2},
+ {32, 1536000, 48000, 0x05, 0x33, 0xD5, 0x55, 0x1F, 0x00, 0x93, 0x00, 0x1F, 2, 2},
+ {36, 288000, 8000, 0x05, 0x34, 0xDD, 0x55, 0x23, 0x08, 0x95, 0x00, 0x1F, 2, 2},
+ {36, 576000, 16000, 0x05, 0x34, 0xDD, 0x55, 0x23, 0x08, 0x94, 0x00, 0x1F, 2, 2},
+ {36, 1728000, 48000, 0x05, 0x33, 0xD5, 0x55, 0x23, 0x08, 0x93, 0x00, 0x1F, 2, 2},
+ {48, 384000, 8000, 0x05, 0x14, 0x5D, 0x55, 0x17, 0x20, 0x94, 0x00, 0x28, 2, 2},
+ {48, 768000, 16000, 0x05, 0x14, 0x5D, 0x55, 0x17, 0x20, 0x94, 0x00, 0x28, 2, 2},
+ {48, 2304000, 48000, 0x05, 0x11, 0x53, 0x55, 0x17, 0x20, 0x92, 0x00, 0x28, 2, 2},
+ {50, 400000, 8000, 0x05, 0x14, 0x5D, 0x55, 0x18, 0x24, 0x94, 0x00, 0x27, 2, 2},
+ {50, 800000, 16000, 0x05, 0x14, 0x5D, 0x55, 0x18, 0x24, 0x94, 0x00, 0x27, 2, 2},
+ {50, 2400000, 48000, 0x05, 0x11, 0x53, 0x55, 0x18, 0x24, 0x92, 0x00, 0x27, 2, 2},
+ {64, 512000, 8000, 0x05, 0x14, 0x5D, 0x33, 0x1F, 0x00, 0x94, 0x00, 0x1F, 2, 2},
+ {64, 1024000, 16000, 0x05, 0x13, 0x55, 0x33, 0x1F, 0x00, 0x93, 0x00, 0x1F, 2, 2},
+ {64, 3072000, 48000, 0x05, 0x11, 0x53, 0x33, 0x1F, 0x00, 0x92, 0x00, 0x1F, 2, 2},
+ {72, 576000, 8000, 0x05, 0x14, 0x5D, 0x33, 0x23, 0x08, 0x94, 0x00, 0x1F, 2, 2},
+ {72, 1152000, 16000, 0x05, 0x13, 0x55, 0x33, 0x23, 0x08, 0x93, 0x00, 0x1F, 2, 2},
+ {72, 3456000, 48000, 0x05, 0x11, 0x53, 0x33, 0x23, 0x08, 0x92, 0x00, 0x1F, 2, 2},
+ {96, 768000, 8000, 0x15, 0x34, 0xDD, 0x55, 0x1F, 0x00, 0x94, 0x00, 0x1F, 2, 2},
+ {96, 1536000, 16000, 0x15, 0x34, 0xDD, 0x55, 0x1F, 0x00, 0x93, 0x00, 0x1F, 2, 2},
+ {96, 4608000, 48000, 0x15, 0x33, 0xD5, 0x55, 0x1F, 0x00, 0x92, 0x00, 0x1F, 2, 2},
+ {100, 800000, 8000, 0x05, 0x03, 0x35, 0x33, 0x18, 0x24, 0x94, 0x00, 0x27, 2, 2},
+ {100, 1600000, 16000, 0x05, 0x03, 0x35, 0x33, 0x18, 0x24, 0x93, 0x00, 0x27, 2, 2},
+ {100, 4800000, 48000, 0x03, 0x00, 0x31, 0x33, 0x18, 0x24, 0x92, 0x00, 0x27, 2, 2},
+ {128, 1024000, 8000, 0x05, 0x03, 0x35, 0x11, 0x1F, 0x00, 0x93, 0x01, 0x1F, 2, 2},
+ {128, 2048000, 16000, 0x03, 0x01, 0x33, 0x11, 0x1F, 0x00, 0x92, 0x01, 0x1F, 2, 2},
+ {128, 6144000, 48000, 0x03, 0x00, 0x31, 0x11, 0x1F, 0x00, 0x92, 0x01, 0x1F, 2, 2},
+ {144, 1152000, 8000, 0x05, 0x03, 0x35, 0x11, 0x23, 0x08, 0x93, 0x01, 0x1F, 2, 2},
+ {144, 2304000, 16000, 0x03, 0x01, 0x33, 0x11, 0x23, 0x08, 0x92, 0x01, 0x1F, 2, 2},
+ {144, 6912000, 48000, 0x03, 0x00, 0x31, 0x11, 0x23, 0x08, 0x92, 0x01, 0x1F, 2, 2},
+ {192, 1536000, 8000, 0x15, 0x14, 0x5D, 0x33, 0x1F, 0x00, 0x93, 0x02, 0x1F, 2, 2},
+ {192, 3072000, 16000, 0x15, 0x13, 0x55, 0x33, 0x1F, 0x00, 0x92, 0x02, 0x1F, 2, 2},
+ {192, 9216000, 48000, 0x15, 0x11, 0x53, 0x33, 0x1F, 0x00, 0x92, 0x02, 0x1F, 2, 2},
+ {250, 12000000, 48000, 0x25, 0x11, 0x53, 0x55, 0x18, 0x24, 0x92, 0x04, 0x27, 2, 2},
+ {256, 2048000, 8000, 0x0D, 0x03, 0x35, 0x11, 0x1F, 0x00, 0x92, 0x03, 0x1F, 2, 2},
+ {256, 4096000, 16000, 0x0B, 0x01, 0x33, 0x11, 0x1F, 0x00, 0x92, 0x03, 0x1F, 2, 2},
+ {256, 12288000, 48000, 0x0B, 0x00, 0x31, 0x11, 0x1F, 0x00, 0x92, 0x03, 0x1F, 2, 2},
+ {384, 3072000, 8000, 0x15, 0x03, 0x35, 0x11, 0x1F, 0x00, 0x92, 0x05, 0x1F, 2, 2},
+ {384, 6144000, 16000, 0x13, 0x01, 0x33, 0x11, 0x1F, 0x00, 0x92, 0x05, 0x1F, 2, 2},
+ {384, 18432000, 48000, 0x13, 0x00, 0x31, 0x11, 0x1F, 0x00, 0x92, 0x05, 0x1F, 2, 2},
+ {400, 19200000, 48000, 0x1B, 0x00, 0x31, 0x33, 0x18, 0x24, 0x92, 0x04, 0x27, 2, 2},
+ {500, 24000000, 48000, 0x23, 0x00, 0x31, 0x33, 0x18, 0x24, 0x92, 0x04, 0x27, 2, 2},
+ {512, 4096000, 8000, 0x1D, 0x03, 0x35, 0x11, 0x1F, 0x00, 0x92, 0x07, 0x1F, 2, 2},
+ {512, 8192000, 16000, 0x1B, 0x01, 0x33, 0x11, 0x1F, 0x00, 0x92, 0x07, 0x1F, 2, 2},
+ {512, 24576000, 48000, 0x1B, 0x00, 0x31, 0x11, 0x1F, 0x00, 0x92, 0x07, 0x1F, 2, 2},
+ {768, 6144000, 8000, 0x2D, 0x03, 0x35, 0x11, 0x1F, 0x00, 0x92, 0x0B, 0x1F, 2, 2},
+ {768, 12288000, 16000, 0x2B, 0x01, 0x33, 0x11, 0x1F, 0x00, 0x92, 0x0B, 0x1F, 2, 2},
+ {1024, 8192000, 8000, 0x3D, 0x03, 0x35, 0x11, 0x1F, 0x00, 0x92, 0x0F, 0x1F, 2, 2},
+ {1024, 16384000, 16000, 0x3B, 0x01, 0x33, 0x11, 0x1F, 0x00, 0x92, 0x0F, 0x1F, 2, 2},
+ {1152, 9216000, 8000, 0x45, 0x03, 0x35, 0x11, 0x1F, 0x00, 0x92, 0x0F, 0x1F, 2, 2},
+ {1152, 18432000, 16000, 0x43, 0x01, 0x33, 0x11, 0x1F, 0x00, 0x92, 0x0F, 0x1F, 2, 2},
+ {1200, 9600000, 8000, 0x5D, 0x03, 0x35, 0x33, 0x18, 0x24, 0x92, 0x11, 0x27, 2, 2},
+ {1200, 19200000, 16000, 0x5D, 0x03, 0x35, 0x33, 0x18, 0x24, 0x92, 0x11, 0x27, 2, 2},
+ {1536, 12288000, 8000, 0x5D, 0x03, 0x35, 0x11, 0x1F, 0x00, 0x92, 0x17, 0x1F, 2, 2},
+ {1536, 24576000, 16000, 0x5B, 0x01, 0x33, 0x11, 0x1F, 0x00, 0x92, 0x17, 0x1F, 2, 2},
+ {2048, 16384000, 8000, 0x7D, 0x03, 0x35, 0x11, 0x1F, 0x00, 0x92, 0x1F, 0x1F, 2, 2},
+ {2304, 18432000, 8000, 0x8D, 0x03, 0x35, 0x11, 0x1F, 0x00, 0x92, 0x23, 0x1F, 2, 2},
+ {2400, 19200000, 8000, 0xBD, 0x03, 0x35, 0x33, 0x18, 0x24, 0x92, 0x25, 0x27, 2, 2},
+ {3072, 24576000, 8000, 0xBD, 0x03, 0x35, 0x11, 0x1F, 0x00, 0x92, 0x2F, 0x1F, 2, 2},
+ {32, 3072000, 96000, 0x05, 0x11, 0x53, 0x55, 0x0F, 0x00, 0x92, 0x00, 0x37, 2, 2},
+ {64, 6144000, 96000, 0x03, 0x00, 0x31, 0x33, 0x0F, 0x00, 0x92, 0x00, 0x37, 2, 2},
+ {96, 9216000, 96000, 0x15, 0x11, 0x53, 0x55, 0x0F, 0x00, 0x92, 0x00, 0x37, 2, 2},
+ {128, 12288000, 96000, 0x0B, 0x00, 0x31, 0x33, 0x0F, 0x00, 0x92, 0x01, 0x37, 2, 2},
+};
+
+static inline int get_coeff(u8 vddd, u8 dmic, int mclk, int rate)
+{
+ int i;
+ u8 dmic_det, vddd_det;
+
+ for (i = 0; i < ARRAY_SIZE(coeff_div); i++) {
+ if (coeff_div[i].rate == rate && coeff_div[i].mclk == mclk) {
+ vddd_det = ~(coeff_div[i].dvdd_vol ^ vddd) & 0x01;
+ dmic_det = ~(coeff_div[i].dmic_sel ^ dmic) & 0x01;
+ vddd_det |= ~(coeff_div[i].dvdd_vol % 2) & 0x01;
+ dmic_det |= ~(coeff_div[i].dmic_sel % 2) & 0x01;
+
+ if (vddd_det && dmic_det)
+ return i;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int es8375_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params, struct snd_soc_dai *dai)
+{
+ struct snd_soc_component *component = dai->component;
+ struct es8375_priv *es8375 = snd_soc_component_get_drvdata(component);
+ int par_width = params_width(params);
+ u8 dmic_enable, iface = 0;
+ unsigned int regv;
+ int coeff, ret;
+
+ if (es8375->mclk_src == ES8375_BCLK_PIN) {
+ regmap_update_bits(es8375->regmap,
+ ES8375_MCLK_SEL, 0x80, 0x80);
+
+ es8375->mclk_freq = 2 * (unsigned int)par_width * params_rate(params);
+ }
+
+ regmap_read(es8375->regmap, ES8375_ADC1, &regv);
+ dmic_enable = regv >> 7 & 0x01;
+
+ ret = regulator_get_voltage(es8375->core_supply[ES8375_SUPPLY_VD].consumer);
+ switch (ret) {
+ case 1800000 ... 2000000:
+ es8375->vddd = ES8375_1V8;
+ break;
+ case 2500000 ... 3300000:
+ es8375->vddd = ES8375_3V3;
+ break;
+ default:
+ es8375->vddd = ES8375_3V3;
+ break;
+ }
+
+ coeff = get_coeff(es8375->vddd, dmic_enable, es8375->mclk_freq, params_rate(params));
+ if (coeff < 0) {
+ dev_warn(component->dev, "Clock coefficients do not match");
+ }
+ regmap_write(es8375->regmap, ES8375_CLK_MGR4,
+ coeff_div[coeff].Reg0x04);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR5,
+ coeff_div[coeff].Reg0x05);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR6,
+ coeff_div[coeff].Reg0x06);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR7,
+ coeff_div[coeff].Reg0x07);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR8,
+ coeff_div[coeff].Reg0x08);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR9,
+ coeff_div[coeff].Reg0x09);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR10,
+ coeff_div[coeff].Reg0x0A);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR11,
+ coeff_div[coeff].Reg0x0B);
+ regmap_write(es8375->regmap, ES8375_ADC_OSR_GAIN,
+ coeff_div[coeff].Reg0x19);
+
+ switch (params_format(params)) {
+ case SNDRV_PCM_FORMAT_S16_LE:
+ iface |= 0x0c;
+ break;
+ case SNDRV_PCM_FORMAT_S20_3LE:
+ iface |= 0x04;
+ break;
+ case SNDRV_PCM_FORMAT_S24_LE:
+ break;
+ case SNDRV_PCM_FORMAT_S32_LE:
+ iface |= 0x10;
+ break;
+ }
+
+ regmap_update_bits(es8375->regmap, ES8375_SDP, 0x1c, iface);
+
+ return 0;
+}
+
+static int es8375_set_sysclk(struct snd_soc_dai *dai, int clk_id,
+ unsigned int freq, int dir)
+{
+ struct snd_soc_component *component = dai->component;
+ struct es8375_priv *es8375 = snd_soc_component_get_drvdata(component);
+
+ es8375->mclk_freq = freq;
+
+ return 0;
+}
+
+static int es8375_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+ struct snd_soc_component *component = dai->component;
+ struct es8375_priv *es8375 = snd_soc_component_get_drvdata(component);
+ unsigned int iface, codeciface;
+
+ regmap_read(es8375->regmap, ES8375_SDP, &codeciface);
+
+ switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) {
+ case SND_SOC_DAIFMT_CBC_CFP:
+ es8375->mastermode = 1;
+ regmap_update_bits(es8375->regmap, ES8375_RESET1,
+ 0x80, 0x80);
+ break;
+ case SND_SOC_DAIFMT_CBC_CFC:
+ es8375->mastermode = 0;
+ regmap_update_bits(es8375->regmap, ES8375_RESET1,
+ 0x80, 0x00);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_I2S:
+ codeciface &= 0xFC;
+ break;
+ case SND_SOC_DAIFMT_RIGHT_J:
+ return -EINVAL;
+ case SND_SOC_DAIFMT_LEFT_J:
+ codeciface &= 0xFC;
+ codeciface |= 0x01;
+ break;
+ case SND_SOC_DAIFMT_DSP_A:
+ codeciface &= 0xDC;
+ codeciface |= 0x03;
+ break;
+ case SND_SOC_DAIFMT_DSP_B:
+ codeciface &= 0xDC;
+ codeciface |= 0x23;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ regmap_read(es8375->regmap, ES8375_CLK_MGR3, &iface);
+
+ switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+ case SND_SOC_DAIFMT_NB_NF:
+ iface &= 0xFE;
+ codeciface &= 0xDF;
+ break;
+ case SND_SOC_DAIFMT_IB_IF:
+ iface |= 0x01;
+ codeciface |= 0x20;
+ break;
+ case SND_SOC_DAIFMT_IB_NF:
+ iface |= 0x01;
+ codeciface &= 0xDF;
+ break;
+ case SND_SOC_DAIFMT_NB_IF:
+ iface &= 0xFE;
+ codeciface |= 0x20;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ regmap_write(es8375->regmap, ES8375_CLK_MGR3, iface);
+ regmap_write(es8375->regmap, ES8375_SDP, codeciface);
+
+ return 0;
+}
+
+static int es8375_set_bias_level(struct snd_soc_component *component,
+ enum snd_soc_bias_level level)
+{
+ struct es8375_priv *es8375 = snd_soc_component_get_drvdata(component);
+ int ret;
+
+ switch (level) {
+ case SND_SOC_BIAS_ON:
+ ret = clk_prepare_enable(es8375->mclk);
+ if (ret) {
+ dev_err(component->dev, "unable to prepare mclk\n");
+ return ret;
+ }
+ regmap_write(es8375->regmap, ES8375_CSM1, 0xA6);
+ break;
+ case SND_SOC_BIAS_PREPARE:
+ break;
+ case SND_SOC_BIAS_STANDBY:
+ regmap_write(es8375->regmap, ES8375_CSM1, 0x96);
+ clk_disable_unprepare(es8375->mclk);
+ break;
+ case SND_SOC_BIAS_OFF:
+ break;
+ }
+ return 0;
+}
+
+static int es8375_mute(struct snd_soc_dai *dai, int mute, int stream)
+{
+ struct snd_soc_component *component = dai->component;
+ struct es8375_priv *es8375 = snd_soc_component_get_drvdata(component);
+
+ if (mute) {
+ if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+ regmap_update_bits(es8375->regmap, ES8375_SDP, 0x40, 0x40);
+ else
+ regmap_update_bits(es8375->regmap, ES8375_SDP2, 0x20, 0x20);
+ } else {
+ if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+ regmap_update_bits(es8375->regmap, ES8375_SDP, 0x40, 0x00);
+ else
+ regmap_update_bits(es8375->regmap, ES8375_SDP2, 0x20, 0x00);
+ }
+
+ return 0;
+}
+
+#define es8375_RATES SNDRV_PCM_RATE_8000_96000
+
+#define es8375_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE |\
+ SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S32_LE)
+
+static const struct snd_soc_dai_ops es8375_ops = {
+ .hw_params = es8375_hw_params,
+ .mute_stream = es8375_mute,
+ .set_sysclk = es8375_set_sysclk,
+ .set_fmt = es8375_set_dai_fmt,
+};
+
+static struct snd_soc_dai_driver es8375_dai = {
+ .name = "ES8375 HiFi",
+ .playback = {
+ .stream_name = "AIF1 Playback",
+ .channels_min = 1,
+ .channels_max = 2,
+ .rates = es8375_RATES,
+ .formats = es8375_FORMATS,
+ },
+ .capture = {
+ .stream_name = "AIF1 Capture",
+ .channels_min = 1,
+ .channels_max = 2,
+ .rates = es8375_RATES,
+ .formats = es8375_FORMATS,
+ },
+ .ops = &es8375_ops,
+ .symmetric_rate = 1,
+};
+
+static void es8375_init(struct snd_soc_component *component)
+{
+ struct es8375_priv *es8375 = snd_soc_component_get_drvdata(component);
+
+ regmap_write(es8375->regmap, ES8375_CLK_MGR10, 0x95);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR3, 0x48);
+ regmap_write(es8375->regmap, ES8375_DIV_SPKCLK, 0x18);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR4, 0x02);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR5, 0x05);
+ regmap_write(es8375->regmap, ES8375_CSM1, 0x82);
+ regmap_write(es8375->regmap, ES8375_VMID_CHARGE2, 0x20);
+ regmap_write(es8375->regmap, ES8375_VMID_CHARGE3, 0x20);
+ regmap_write(es8375->regmap, ES8375_DAC_CAL, 0x28);
+ regmap_write(es8375->regmap, ES8375_ANALOG_SPK1, 0xFC);
+ regmap_write(es8375->regmap, ES8375_ANALOG_SPK2, 0xE0);
+ regmap_write(es8375->regmap, ES8375_VMID_SEL, 0xFE);
+ regmap_write(es8375->regmap, ES8375_ANALOG1, 0xB8);
+ regmap_write(es8375->regmap, ES8375_SYS_CTRL2, 0x03);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR2, 0x16);
+ regmap_write(es8375->regmap, ES8375_RESET1, 0x00);
+ msleep(80);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR3, 0x00);
+ regmap_write(es8375->regmap, ES8375_CSM1, 0x86);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR4, 0x0B);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR5, 0x00);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR6, 0x31);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR7, 0x11);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR8, 0x1F);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR9, 0x00);
+ regmap_write(es8375->regmap, ES8375_ADC_OSR_GAIN, 0x1F);
+ regmap_write(es8375->regmap, ES8375_ADC2, 0x00);
+ regmap_write(es8375->regmap, ES8375_DAC2, 0x00);
+ regmap_write(es8375->regmap, ES8375_DAC_OTP, 0x88);
+ regmap_write(es8375->regmap, ES8375_ANALOG_SPK2, 0xE7);
+ regmap_write(es8375->regmap, ES8375_ANALOG2, 0xF0);
+ regmap_write(es8375->regmap, ES8375_ANALOG3, 0x40);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR2, 0xFE);
+
+ regmap_update_bits(es8375->regmap, ES8375_SDP, 0x40, 0x40);
+ regmap_update_bits(es8375->regmap, ES8375_SDP2, 0x20, 0x20);
+}
+
+static int es8375_suspend(struct snd_soc_component *component)
+{
+ struct es8375_priv *es8375 = snd_soc_component_get_drvdata(component);
+
+ regmap_write(es8375->regmap, ES8375_CSM1, 0x96);
+ regcache_cache_only(es8375->regmap, true);
+ regcache_mark_dirty(es8375->regmap);
+ return 0;
+}
+
+static int es8375_resume(struct snd_soc_component *component)
+{
+ struct es8375_priv *es8375 = snd_soc_component_get_drvdata(component);
+ unsigned int reg;
+
+ regcache_cache_only(es8375->regmap, false);
+ regcache_cache_bypass(es8375->regmap, true);
+ regmap_read(es8375->regmap, ES8375_CLK_MGR2, &reg);
+ regcache_cache_bypass(es8375->regmap, false);
+
+ if (reg == 0x00)
+ es8375_init(component);
+ else
+ es8375_set_bias_level(component, SND_SOC_BIAS_ON);
+
+ regcache_sync(es8375->regmap);
+
+ return 0;
+}
+
+static int es8375_codec_probe(struct snd_soc_component *component)
+{
+ struct es8375_priv *es8375 = snd_soc_component_get_drvdata(component);
+
+ es8375->mastermode = 0;
+
+ es8375_init(component);
+
+ return 0;
+}
+
+static bool es8375_writeable_register(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case ES8375_CHIP_VERSION:
+ case ES8375_CHIP_ID0:
+ case ES8375_CHIP_ID1:
+ case ES8375_SPK_OFFSET:
+ case ES8375_FLAGS2:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static struct regmap_config es8375_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = ES8375_REG_MAX,
+ .cache_type = REGCACHE_MAPLE,
+ .use_single_read = true,
+ .use_single_write = true,
+ .writeable_reg = es8375_writeable_register,
+};
+
+static struct snd_soc_component_driver es8375_codec_driver = {
+ .probe = es8375_codec_probe,
+ .suspend = es8375_suspend,
+ .resume = es8375_resume,
+ .set_bias_level = es8375_set_bias_level,
+ .controls = es8375_snd_controls,
+ .num_controls = ARRAY_SIZE(es8375_snd_controls),
+ .dapm_widgets = es8375_dapm_widgets,
+ .num_dapm_widgets = ARRAY_SIZE(es8375_dapm_widgets),
+ .dapm_routes = es8375_dapm_routes,
+ .num_dapm_routes = ARRAY_SIZE(es8375_dapm_routes),
+
+ .idle_bias_on = 1,
+ .suspend_bias_off = 1,
+};
+
+static int es8375_read_device_properities(struct device *dev, struct es8375_priv *es8375)
+{
+ int ret, i;
+
+ ret = device_property_read_u8(dev, "everest,mclk-src", &es8375->mclk_src);
+ if (ret != 0)
+ es8375->mclk_src = ES8375_MCLK_SOURCE;
+ dev_dbg(dev, "mclk-src %x", es8375->mclk_src);
+
+ for (i = 0; i < ARRAY_SIZE(es8375_core_supplies); i++)
+ es8375->core_supply[i].supply = es8375_core_supplies[i];
+ ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(es8375_core_supplies), es8375->core_supply);
+ if (ret) {
+ dev_err(dev, "Failed to request core supplies %d\n", ret);
+ return ret;
+ }
+
+ es8375->mclk = devm_clk_get(dev, "mclk");
+ if (IS_ERR(es8375->mclk))
+ return dev_err_probe(dev, PTR_ERR(es8375->mclk), "unable to get mclk\n");
+
+ if (!es8375->mclk)
+ dev_warn(dev, "assuming static mclk\n");
+
+ ret = clk_prepare_enable(es8375->mclk);
+ if (ret) {
+ dev_err(dev, "unable to enable mclk\n");
+ return ret;
+ }
+ ret = regulator_bulk_enable(ARRAY_SIZE(es8375_core_supplies), es8375->core_supply);
+ if (ret) {
+ dev_err(dev, "Failed to enable core supplies: %d\n", ret);
+ clk_disable_unprepare(es8375->mclk);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int es8375_i2c_probe(struct i2c_client *i2c_client)
+{
+ struct es8375_priv *es8375;
+ struct device *dev = &i2c_client->dev;
+ int ret;
+ unsigned int val;
+
+ es8375 = devm_kzalloc(&i2c_client->dev, sizeof(*es8375), GFP_KERNEL);
+ if (!es8375)
+ return -ENOMEM;
+
+ es8375->regmap = devm_regmap_init_i2c(i2c_client,
+ &es8375_regmap_config);
+ if (IS_ERR(es8375->regmap))
+ return dev_err_probe(&i2c_client->dev, PTR_ERR(es8375->regmap),
+ "regmap_init() failed\n");
+
+ i2c_set_clientdata(i2c_client, es8375);
+
+ ret = regmap_read(es8375->regmap, ES8375_CHIP_ID1, &val);
+ if (ret < 0) {
+ dev_err(&i2c_client->dev, "failed to read i2c at addr %X\n",
+ i2c_client->addr);
+ return ret;
+ }
+
+ if (val != 0x83) {
+ dev_err(&i2c_client->dev, "device at addr %X is not an es8375\n",
+ i2c_client->addr);
+ return -ENODEV;
+ }
+
+ ret = regmap_read(es8375->regmap, ES8375_CHIP_ID0, &val);
+ if (val != 0x75) {
+ dev_err(&i2c_client->dev, "device at addr %X is not an es8375\n",
+ i2c_client->addr);
+ return -ENODEV;
+ }
+
+ ret = es8375_read_device_properities(dev, es8375);
+ if (ret != 0) {
+ dev_err(&i2c_client->dev, "get an error from dts info %X\n", ret);
+ return ret;
+ }
+
+ return devm_snd_soc_register_component(&i2c_client->dev, &es8375_codec_driver,
+ &es8375_dai, 1);
+}
+
+static void es8375_i2c_shutdown(struct i2c_client *i2c)
+{
+ struct es8375_priv *es8375;
+
+ es8375 = i2c_get_clientdata(i2c);
+
+ regmap_write(es8375->regmap, ES8375_CSM1, 0x3C);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR3, 0x48);
+ regmap_write(es8375->regmap, ES8375_CSM2, 0x80);
+ regmap_write(es8375->regmap, ES8375_CSM1, 0x3E);
+ regmap_write(es8375->regmap, ES8375_CLK_MGR10, 0x15);
+ regmap_write(es8375->regmap, ES8375_SYS_CTRL2, 0x0C);
+ regmap_write(es8375->regmap, ES8375_RESET1, 0x00);
+ regmap_write(es8375->regmap, ES8375_CSM2, 0x00);
+
+ regulator_bulk_disable(ARRAY_SIZE(es8375_core_supplies), es8375->core_supply);
+ clk_disable_unprepare(es8375->mclk);
+}
+
+static const struct i2c_device_id es8375_id[] = {
+ {"es8375"},
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, es8375_id);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id es8375_acpi_match[] = {
+ {"ESSX8375", 0},
+ {},
+};
+
+MODULE_DEVICE_TABLE(acpi, es8375_acpi_match);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id es8375_of_match[] = {
+ {.compatible = "everest,es8375",},
+ {}
+};
+
+MODULE_DEVICE_TABLE(of, es8375_of_match);
+#endif
+
+static struct i2c_driver es8375_i2c_driver = {
+ .driver = {
+ .name = "es8375",
+ .of_match_table = of_match_ptr(es8375_of_match),
+ .acpi_match_table = ACPI_PTR(es8375_acpi_match),
+ },
+ .shutdown = es8375_i2c_shutdown,
+ .probe = es8375_i2c_probe,
+ .id_table = es8375_id,
+};
+module_i2c_driver(es8375_i2c_driver);
+
+MODULE_DESCRIPTION("ASoC ES8375 driver");
+MODULE_AUTHOR("Michael Zhang <zhangyi@everest-semi.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/es8375.h b/sound/soc/codecs/es8375.h
new file mode 100644
index 000000000000..11e3ceec9b68
--- /dev/null
+++ b/sound/soc/codecs/es8375.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+* ES8375.h -- ES8375 ALSA SoC Audio Codec
+*
+* Authors:
+*
+* Based on ES8375.h by Michael Zhang
+*/
+#ifndef _ES8375_H
+#define _ES8375_H
+
+// Registors
+#define ES8375_RESET1 0x00
+#define ES8375_MCLK_SEL 0x01
+#define ES8375_CLK_MGR2 0x02
+#define ES8375_CLK_MGR3 0x03
+#define ES8375_CLK_MGR4 0x04
+#define ES8375_CLK_MGR5 0x05
+#define ES8375_CLK_MGR6 0x06
+#define ES8375_CLK_MGR7 0x07
+#define ES8375_CLK_MGR8 0x08
+#define ES8375_CLK_MGR9 0x09
+#define ES8375_CLK_MGR10 0x0A
+#define ES8375_CLK_MGR11 0x0B
+#define ES8375_CLK_MGR12 0x0C
+#define ES8375_DIV_SPKCLK 0x0E
+#define ES8375_CSM1 0x0F
+#define ES8375_CSM2 0x10
+#define ES8375_VMID_CHARGE2 0x11
+#define ES8375_VMID_CHARGE3 0x12
+#define ES8375_SDP 0x15
+#define ES8375_SDP2 0x16
+#define ES8375_ADC1 0x17
+#define ES8375_ADC2 0x18
+#define ES8375_ADC_OSR_GAIN 0x19
+#define ES8375_ADC_VOLUME 0x1A
+#define ES8375_ADC_AUTOMUTE 0x1B
+#define ES8375_ADC_AUTOMUTE_ATTN 0x1C
+#define ES8375_HPF1 0x1D
+#define ES8375_DAC1 0x1F
+#define ES8375_DAC2 0x20
+#define ES8375_DAC_VOLUME 0x21
+#define ES8375_DAC_VPPSCALE 0x22
+#define ES8375_DAC_AUTOMUTE1 0x23
+#define ES8375_DAC_AUTOMUTE 0x24
+#define ES8375_DAC_CAL 0x25
+#define ES8375_DAC_OTP 0x27
+#define ES8375_ANALOG_SPK1 0x28
+#define ES8375_ANALOG_SPK2 0x29
+#define ES8375_VMID_SEL 0x2D
+#define ES8375_ANALOG1 0x2E
+#define ES8375_ANALOG2 0x32
+#define ES8375_ANALOG3 0x37
+#define ES8375_ADC2DAC_CLKTRI 0xF8
+#define ES8375_SYS_CTRL2 0xF9
+#define ES8375_FLAGS2 0xFB
+#define ES8375_SPK_OFFSET 0xFC
+#define ES8375_CHIP_ID1 0xFD
+#define ES8375_CHIP_ID0 0xFE
+#define ES8375_CHIP_VERSION 0xFF
+
+// Bit Shifts
+#define ADC_OSR_GAIN_SHIFT_0 0
+#define ADC_RAMPRATE_SHIFT_0 0
+#define ADC_VOLUME_SHIFT_0 0
+#define ADC_AUTOMUTE_NG_SHIFT_0 0
+#define ADC_AUTOMUTE_ATTN_SHIFT_0 0
+#define DAC_RAMPRATE_SHIFT_0 0
+#define DAC_VOLUME_SHIFT_0 0
+#define DAC_VPPSCALE_SHIFT_0 0
+#define DAC_AUTOMUTE_NG_SHIFT_0 0
+#define DAC_AUTOMUTE_ATTN_SHIFT_0 0
+#define DMIC_GAIN_SHIFT_2 2
+#define ADC_AUTOMUTE_WS_SHIFT_3 3
+#define DMIC_POL_SHIFT_4 4
+#define DAC_RAMCLR_SHIFT_4 4
+#define ES8375_EN_MODL_SHIFT_4 4
+#define ADC_RAMCLR_SHIFT_5 5
+#define ADC_HPF_SHIFT_5 5
+#define DAC_INV_SHIFT_5 5
+#define DAC_AUTOMUTE_WS_SHIFT_5 5
+#define ES8375_EN_PGAL_SHIFT_5 5
+#define ES8375_ADC_P2S_MUTE_SHIFT_5 5
+#define ADC_INV_SHIFT_6 6
+#define DAC_DEMMUTE_SHIFT_6 6
+#define ES8375_DAC_S2P_MUTE_SHIFT_6 6
+#define ADC_SRC_SHIFT_7 7
+#define ADC_AUTOMUTE_SHIFT_7 7
+#define DAC_DSMMUTE_SHIFT_7 7
+#define DAC_AUTOMUTE_EN_SHIFT_7 7
+
+// Function values
+#define ES8375_ADC_OSR_GAIN_MAX 0x3F
+#define ES8375_DMIC_GAIN_MAX 0x04
+#define ES8375_ADC_AUTOMUTE_ATTN_MAX 0x1F
+#define ES8375_AUTOMUTE_NG_MAX 0x07
+#define ES8375_ADC_VOLUME_MAX 0xFF
+#define ES8375_DAC_VOLUME_MAX 0xFF
+#define ES8375_DAC_VPPSCALE_MAX 0x3F
+#define ES8375_DAC_AUTOMUTE_ATTN_MAX 0x17
+#define ES8375_REG_MAX 0xFF
+
+enum ES8375_supplies {
+ ES8375_SUPPLY_VD = 0,
+ ES8375_SUPPLY_VA,
+};
+
+// Properties
+#define ES8375_3V3 1
+#define ES8375_1V8 0
+
+#define ES8375_MCLK_PIN 0
+#define ES8375_BCLK_PIN 1
+#define ES8375_MCLK_SOURCE ES8375_MCLK_PIN
+
+#define DMIC_POSITIVE_EDGE 0
+#define DMIC_NEGATIVE_EDGE 1
+#define DMIC_POL DMIC_POSITIVE_EDGE
+
+#define PA_SHUTDOWN 0
+#define PA_ENABLE 1
+
+#endif
diff --git a/sound/soc/codecs/es8389.c b/sound/soc/codecs/es8389.c
new file mode 100644
index 000000000000..ba1763f36f17
--- /dev/null
+++ b/sound/soc/codecs/es8389.c
@@ -0,0 +1,962 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * es8389.c -- ES8389 ALSA SoC Audio Codec
+ *
+ * Copyright Everest Semiconductor Co., Ltd
+ *
+ * Authors: Michael Zhang (zhangyi@everest-semi.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/regmap.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/tlv.h>
+#include <sound/soc.h>
+
+#include "es8389.h"
+
+
+/* codec private data */
+
+struct es8389_private {
+ struct regmap *regmap;
+ struct clk *mclk;
+ unsigned int sysclk;
+ int mastermode;
+
+ u8 mclk_src;
+ enum snd_soc_bias_level bias_level;
+};
+
+static bool es8389_volatile_register(struct device *dev,
+ unsigned int reg)
+{
+ if ((reg <= 0xff))
+ return true;
+ else
+ return false;
+}
+
+static const DECLARE_TLV_DB_SCALE(dac_vol_tlv, -9550, 50, 0);
+static const DECLARE_TLV_DB_SCALE(adc_vol_tlv, -9550, 50, 0);
+static const DECLARE_TLV_DB_SCALE(pga_vol_tlv, 0, 300, 0);
+static const DECLARE_TLV_DB_SCALE(mix_vol_tlv, -9500, 100, 0);
+static const DECLARE_TLV_DB_SCALE(alc_target_tlv, -3200, 200, 0);
+static const DECLARE_TLV_DB_SCALE(alc_max_level, -3200, 200, 0);
+
+static int es8389_dmic_set(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol);
+ struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol);
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+ struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+ unsigned int val;
+ bool changed1, changed2;
+
+ val = ucontrol->value.integer.value[0];
+ if (val > 1)
+ return -EINVAL;
+
+ if (val) {
+ regmap_update_bits_check(es8389->regmap, ES8389_DMIC_EN, 0xC0, 0xC0, &changed1);
+ regmap_update_bits_check(es8389->regmap, ES8389_ADC_MODE, 0x03, 0x03, &changed2);
+ } else {
+ regmap_update_bits_check(es8389->regmap, ES8389_DMIC_EN, 0xC0, 0x00, &changed1);
+ regmap_update_bits_check(es8389->regmap, ES8389_ADC_MODE, 0x03, 0x00, &changed2);
+ }
+
+ if (changed1 & changed2)
+ return snd_soc_dapm_mux_update_power(dapm, kcontrol, val, e, NULL);
+ else
+ return 0;
+}
+
+static const char *const alc[] = {
+ "ALC OFF",
+ "ADCR ALC ON",
+ "ADCL ALC ON",
+ "ADCL & ADCL ALC ON",
+};
+
+static const char *const ramprate[] = {
+ "0.125db/1 LRCK",
+ "0.125db/4 LRCK",
+ "0.125db/8 LRCK",
+ "0.125db/16 LRCK",
+ "0.125db/32 LRCK",
+ "0.125db/64 LRCK",
+ "0.125db/128 LRCK",
+ "0.125db/256 LRCK",
+ "0.125db/512 LRCK",
+ "0.125db/1024 LRCK",
+ "0.125db/2048 LRCK",
+ "0.125db/4096 LRCK",
+ "0.125db/8192 LRCK",
+ "0.125db/16384 LRCK",
+ "0.125db/32768 LRCK",
+ "0.125db/65536 LRCK",
+};
+
+static const char *const winsize[] = {
+ "2 LRCK",
+ "4 LRCK",
+ "8 LRCK",
+ "16 LRCK",
+ "32 LRCK",
+ "64 LRCK",
+ "128 LRCK",
+ "256 LRCK",
+ "512 LRCK",
+ "1024 LRCK",
+ "2048 LRCK",
+ "4096 LRCK",
+ "8192 LRCK",
+ "16384 LRCK",
+ "32768 LRCK",
+ "65536 LRCK",
+};
+
+static const struct soc_enum alc_enable =
+ SOC_ENUM_SINGLE(ES8389_ALC_ON, 5, 4, alc);
+static const struct soc_enum alc_ramprate =
+ SOC_ENUM_SINGLE(ES8389_ALC_CTL, 4, 16, ramprate);
+static const struct soc_enum alc_winsize =
+ SOC_ENUM_SINGLE(ES8389_ALC_CTL, 0, 16, winsize);
+
+static const char *const es8389_outl_mux_txt[] = {
+ "Normal",
+ "DAC2 channel to DAC1 channel",
+};
+
+static const char *const es8389_outr_mux_txt[] = {
+ "Normal",
+ "DAC1 channel to DAC2 channel",
+};
+
+static const char *const es8389_dmic_mux_txt[] = {
+ "AMIC",
+ "DMIC",
+};
+
+static const char *const es8389_pga1_texts[] = {
+ "DifferentialL", "Line 1P", "Line 2P"
+};
+
+static const char *const es8389_pga2_texts[] = {
+ "DifferentialR", "Line 2N", "Line 1N"
+};
+
+static const unsigned int es8389_pga_values[] = {
+ 1, 5, 6
+};
+
+static const struct soc_enum es8389_outl_mux_enum =
+ SOC_ENUM_SINGLE(ES8389_DAC_MIX, 5,
+ ARRAY_SIZE(es8389_outl_mux_txt), es8389_outl_mux_txt);
+
+static const struct snd_kcontrol_new es8389_outl_mux_controls =
+ SOC_DAPM_ENUM("OUTL MUX", es8389_outl_mux_enum);
+
+static const struct soc_enum es8389_outr_mux_enum =
+ SOC_ENUM_SINGLE(ES8389_DAC_MIX, 4,
+ ARRAY_SIZE(es8389_outr_mux_txt), es8389_outr_mux_txt);
+
+static const struct snd_kcontrol_new es8389_outr_mux_controls =
+ SOC_DAPM_ENUM("OUTR MUX", es8389_outr_mux_enum);
+
+static SOC_ENUM_SINGLE_DECL(
+ es8389_dmic_mux_enum, ES8389_DMIC_EN, 6, es8389_dmic_mux_txt);
+
+static const struct soc_enum es8389_pgal_enum =
+ SOC_VALUE_ENUM_SINGLE(ES8389_MIC1_GAIN, 4, 7,
+ ARRAY_SIZE(es8389_pga1_texts), es8389_pga1_texts,
+ es8389_pga_values);
+
+static const struct soc_enum es8389_pgar_enum =
+ SOC_VALUE_ENUM_SINGLE(ES8389_MIC2_GAIN, 4, 7,
+ ARRAY_SIZE(es8389_pga2_texts), es8389_pga2_texts,
+ es8389_pga_values);
+
+static const struct snd_kcontrol_new es8389_dmic_mux_controls =
+ SOC_DAPM_ENUM_EXT("ADC MUX", es8389_dmic_mux_enum,
+ snd_soc_dapm_get_enum_double, es8389_dmic_set);
+
+static const struct snd_kcontrol_new es8389_left_mixer_controls[] = {
+ SOC_DAPM_SINGLE("DACR DACL Mixer", ES8389_DAC_MIX, 3, 1, 0),
+};
+
+static const struct snd_kcontrol_new es8389_right_mixer_controls[] = {
+ SOC_DAPM_SINGLE("DACL DACR Mixer", ES8389_DAC_MIX, 2, 1, 0),
+};
+
+static const struct snd_kcontrol_new es8389_leftadc_mixer_controls[] = {
+ SOC_DAPM_SINGLE("ADCL DACL Mixer", ES8389_DAC_MIX, 1, 1, 0),
+};
+
+static const struct snd_kcontrol_new es8389_rightadc_mixer_controls[] = {
+ SOC_DAPM_SINGLE("ADCR DACR Mixer", ES8389_DAC_MIX, 0, 1, 0),
+};
+
+static const struct snd_kcontrol_new es8389_adc_mixer_controls[] = {
+ SOC_DAPM_SINGLE("DACL ADCL Mixer", ES8389_ADC_RESET, 7, 1, 0),
+ SOC_DAPM_SINGLE("DACR ADCR Mixer", ES8389_ADC_RESET, 6, 1, 0),
+};
+
+static const struct snd_kcontrol_new es8389_snd_controls[] = {
+ SOC_SINGLE_TLV("ADCL Capture Volume", ES8389_ADCL_VOL, 0, 0xFF, 0, adc_vol_tlv),
+ SOC_SINGLE_TLV("ADCR Capture Volume", ES8389_ADCR_VOL, 0, 0xFF, 0, adc_vol_tlv),
+ SOC_SINGLE_TLV("ADCL PGA Volume", ES8389_MIC1_GAIN, 0, 0x0E, 0, pga_vol_tlv),
+ SOC_SINGLE_TLV("ADCR PGA Volume", ES8389_MIC2_GAIN, 0, 0x0E, 0, pga_vol_tlv),
+
+ SOC_ENUM("PGAL Select", es8389_pgal_enum),
+ SOC_ENUM("PGAR Select", es8389_pgar_enum),
+ SOC_ENUM("ALC Capture Switch", alc_enable),
+ SOC_SINGLE_TLV("ALC Capture Target Level", ES8389_ALC_TARGET,
+ 0, 0x0f, 0, alc_target_tlv),
+ SOC_SINGLE_TLV("ALC Capture Max Gain", ES8389_ALC_GAIN,
+ 0, 0x0f, 0, alc_max_level),
+ SOC_ENUM("ADC Ramp Rate", alc_ramprate),
+ SOC_ENUM("ALC Capture Winsize", alc_winsize),
+ SOC_DOUBLE("ADC OSR Volume ON Switch", ES8389_ADC_MUTE, 6, 7, 1, 0),
+ SOC_SINGLE_TLV("ADC OSR Volume", ES8389_OSR_VOL, 0, 0xFF, 0, adc_vol_tlv),
+ SOC_DOUBLE("ADC OUTPUT Invert Switch", ES8389_ADC_HPF2, 5, 6, 1, 0),
+
+ SOC_SINGLE_TLV("DACL Playback Volume", ES8389_DACL_VOL, 0, 0xFF, 0, dac_vol_tlv),
+ SOC_SINGLE_TLV("DACR Playback Volume", ES8389_DACR_VOL, 0, 0xFF, 0, dac_vol_tlv),
+ SOC_DOUBLE("DAC OUTPUT Invert Switch", ES8389_DAC_INV, 5, 6, 1, 0),
+ SOC_SINGLE_TLV("ADC2DAC Mixer Volume", ES8389_MIX_VOL, 0, 0x7F, 0, mix_vol_tlv),
+};
+
+static const struct snd_soc_dapm_widget es8389_dapm_widgets[] = {
+ /*Input Side*/
+ SND_SOC_DAPM_INPUT("INPUT1"),
+ SND_SOC_DAPM_INPUT("INPUT2"),
+ SND_SOC_DAPM_INPUT("DMIC"),
+ SND_SOC_DAPM_PGA("PGAL", SND_SOC_NOPM, 4, 0, NULL, 0),
+ SND_SOC_DAPM_PGA("PGAR", SND_SOC_NOPM, 4, 0, NULL, 0),
+
+ /*ADCs*/
+ SND_SOC_DAPM_ADC("ADCL", NULL, SND_SOC_NOPM, 0, 0),
+ SND_SOC_DAPM_ADC("ADCR", NULL, SND_SOC_NOPM, 0, 0),
+
+ /* Audio Interface */
+ SND_SOC_DAPM_AIF_OUT("I2S OUT", "I2S Capture", 0, SND_SOC_NOPM, 0, 0),
+ SND_SOC_DAPM_AIF_IN("I2S IN", "I2S Playback", 0, SND_SOC_NOPM, 0, 0),
+
+ /*DACs*/
+ SND_SOC_DAPM_DAC("DACL", NULL, SND_SOC_NOPM, 0, 0),
+ SND_SOC_DAPM_DAC("DACR", NULL, SND_SOC_NOPM, 0, 0),
+
+ /*Output Side*/
+ SND_SOC_DAPM_OUTPUT("HPOL"),
+ SND_SOC_DAPM_OUTPUT("HPOR"),
+
+ /* Digital Interface */
+ SND_SOC_DAPM_PGA("IF DAC", SND_SOC_NOPM, 0, 0, NULL, 0),
+ SND_SOC_DAPM_PGA("IF DACL1", SND_SOC_NOPM, 0, 0, NULL, 0),
+ SND_SOC_DAPM_PGA("IF DACR1", SND_SOC_NOPM, 0, 0, NULL, 0),
+ SND_SOC_DAPM_PGA("IF DACL2", SND_SOC_NOPM, 0, 0, NULL, 0),
+ SND_SOC_DAPM_PGA("IF DACR2", SND_SOC_NOPM, 0, 0, NULL, 0),
+ SND_SOC_DAPM_PGA("IF DACL3", SND_SOC_NOPM, 0, 0, NULL, 0),
+ SND_SOC_DAPM_PGA("IF DACR3", SND_SOC_NOPM, 0, 0, NULL, 0),
+
+ /* Digital Interface Select */
+ SND_SOC_DAPM_MIXER("IF DACL Mixer", SND_SOC_NOPM, 0, 0,
+ &es8389_left_mixer_controls[0],
+ ARRAY_SIZE(es8389_left_mixer_controls)),
+ SND_SOC_DAPM_MIXER("IF DACR Mixer", SND_SOC_NOPM, 0, 0,
+ &es8389_right_mixer_controls[0],
+ ARRAY_SIZE(es8389_right_mixer_controls)),
+ SND_SOC_DAPM_MIXER("IF ADCDACL Mixer", SND_SOC_NOPM, 0, 0,
+ &es8389_leftadc_mixer_controls[0],
+ ARRAY_SIZE(es8389_leftadc_mixer_controls)),
+ SND_SOC_DAPM_MIXER("IF ADCDACR Mixer", SND_SOC_NOPM, 0, 0,
+ &es8389_rightadc_mixer_controls[0],
+ ARRAY_SIZE(es8389_rightadc_mixer_controls)),
+
+ SND_SOC_DAPM_MIXER("ADC Mixer", SND_SOC_NOPM, 0, 0,
+ &es8389_adc_mixer_controls[0],
+ ARRAY_SIZE(es8389_adc_mixer_controls)),
+ SND_SOC_DAPM_MUX("ADC MUX", SND_SOC_NOPM, 0, 0, &es8389_dmic_mux_controls),
+
+ SND_SOC_DAPM_MUX("OUTL MUX", SND_SOC_NOPM, 0, 0, &es8389_outl_mux_controls),
+ SND_SOC_DAPM_MUX("OUTR MUX", SND_SOC_NOPM, 0, 0, &es8389_outr_mux_controls),
+};
+
+
+static const struct snd_soc_dapm_route es8389_dapm_routes[] = {
+ {"PGAL", NULL, "INPUT1"},
+ {"PGAR", NULL, "INPUT2"},
+
+ {"ADCL", NULL, "PGAL"},
+ {"ADCR", NULL, "PGAR"},
+
+ {"ADC Mixer", "DACL ADCL Mixer", "DACL"},
+ {"ADC Mixer", "DACR ADCR Mixer", "DACR"},
+ {"ADC Mixer", NULL, "ADCL"},
+ {"ADC Mixer", NULL, "ADCR"},
+
+ {"ADC MUX", "AMIC", "ADC Mixer"},
+ {"ADC MUX", "DMIC", "DMIC"},
+
+ {"I2S OUT", NULL, "ADC MUX"},
+
+ {"DACL", NULL, "I2S IN"},
+ {"DACR", NULL, "I2S IN"},
+
+ {"IF DACL1", NULL, "DACL"},
+ {"IF DACR1", NULL, "DACR"},
+ {"IF DACL2", NULL, "DACL"},
+ {"IF DACR2", NULL, "DACR"},
+ {"IF DACL3", NULL, "DACL"},
+ {"IF DACR3", NULL, "DACR"},
+
+ {"IF DACL Mixer", NULL, "IF DACL2"},
+ {"IF DACL Mixer", "DACR DACL Mixer", "IF DACR1"},
+ {"IF DACR Mixer", NULL, "IF DACR2"},
+ {"IF DACR Mixer", "DACL DACR Mixer", "IF DACL1"},
+
+ {"IF ADCDACL Mixer", NULL, "IF DACL Mixer"},
+ {"IF ADCDACL Mixer", "ADCL DACL Mixer", "IF DACL3"},
+ {"IF ADCDACR Mixer", NULL, "IF DACR Mixer"},
+ {"IF ADCDACR Mixer", "ADCR DACR Mixer", "IF DACR3"},
+
+ {"OUTL MUX", "Normal", "IF ADCDACL Mixer"},
+ {"OUTL MUX", "DAC2 channel to DAC1 channel", "IF ADCDACR Mixer"},
+ {"OUTR MUX", "Normal", "IF ADCDACR Mixer"},
+ {"OUTR MUX", "DAC1 channel to DAC2 channel", "IF ADCDACL Mixer"},
+
+ {"HPOL", NULL, "OUTL MUX"},
+ {"HPOR", NULL, "OUTR MUX"},
+
+};
+
+struct _coeff_div {
+ u16 fs;
+ u32 mclk;
+ u32 rate;
+ u8 Reg0x04;
+ u8 Reg0x05;
+ u8 Reg0x06;
+ u8 Reg0x07;
+ u8 Reg0x08;
+ u8 Reg0x09;
+ u8 Reg0x0A;
+ u8 Reg0x0F;
+ u8 Reg0x11;
+ u8 Reg0x21;
+ u8 Reg0x22;
+ u8 Reg0x26;
+ u8 Reg0x30;
+ u8 Reg0x41;
+ u8 Reg0x42;
+ u8 Reg0x43;
+ u8 Reg0xF0;
+ u8 Reg0xF1;
+ u8 Reg0x16;
+ u8 Reg0x18;
+ u8 Reg0x19;
+};
+
+/* codec hifi mclk clock divider coefficients */
+static const struct _coeff_div coeff_div[] = {
+ {32, 256000, 8000, 0x00, 0x57, 0x84, 0xD0, 0x03, 0xC1, 0xB0, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {36, 288000, 8000, 0x00, 0x55, 0x84, 0xD0, 0x01, 0xC1, 0x90, 0x00, 0x00, 0x23, 0x8F, 0xB7, 0xC0, 0x1F, 0x8F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {48, 384000, 8000, 0x02, 0x5F, 0x04, 0xC0, 0x03, 0xC1, 0xB0, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {64, 512000, 8000, 0x00, 0x4D, 0x24, 0xC0, 0x03, 0xD1, 0xB0, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {72, 576000, 8000, 0x00, 0x45, 0x24, 0xC0, 0x01, 0xD1, 0x90, 0x00, 0x00, 0x23, 0x8F, 0xB7, 0xC0, 0x1F, 0x8F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {96, 768000, 8000, 0x02, 0x57, 0x84, 0xD0, 0x03, 0xC1, 0xB0, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {128, 1024000, 8000, 0x00, 0x45, 0x04, 0xD0, 0x03, 0xC1, 0xB0, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {192, 1536000, 8000, 0x02, 0x4D, 0x24, 0xC0, 0x03, 0xD1, 0xB0, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {256, 2048000, 8000, 0x01, 0x45, 0x04, 0xD0, 0x03, 0xC1, 0xB0, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {288, 2304000, 8000, 0x01, 0x51, 0x00, 0xC0, 0x01, 0xC1, 0x90, 0x00, 0x00, 0x23, 0x8F, 0xB7, 0xC0, 0x1F, 0x8F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {384, 3072000, 8000, 0x02, 0x45, 0x04, 0xD0, 0x03, 0xC1, 0xB0, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {512, 4096000, 8000, 0x00, 0x41, 0x04, 0xE0, 0x00, 0xD1, 0xB0, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {768, 6144000, 8000, 0x05, 0x45, 0x04, 0xD0, 0x03, 0xC1, 0xB0, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {1024, 8192000, 8000, 0x01, 0x41, 0x06, 0xE0, 0x00, 0xD1, 0xB0, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {1536, 12288000, 8000, 0x02, 0x41, 0x04, 0xE0, 0x00, 0xD1, 0xB0, 0x40, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {1625, 13000000, 8000, 0x40, 0x6E, 0x05, 0xC8, 0x01, 0xC2, 0x90, 0x40, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0x63, 0x95, 0x00, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {2048, 16384000, 8000, 0x03, 0x44, 0x01, 0xC0, 0x00, 0xD2, 0x80, 0x40, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {2304, 18432000, 8000, 0x11, 0x45, 0x25, 0xF0, 0x00, 0xD1, 0xB0, 0x40, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {3072, 24576000, 8000, 0x05, 0x44, 0x01, 0xC0, 0x00, 0xD2, 0x80, 0x40, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {32, 512000, 16000, 0x00, 0x55, 0x84, 0xD0, 0x01, 0xC1, 0x90, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {36, 576000, 16000, 0x00, 0x55, 0x84, 0xD0, 0x01, 0xC1, 0x90, 0x00, 0x00, 0x23, 0x8F, 0xB7, 0xC0, 0x1F, 0x8F, 0x01, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {48, 768000, 16000, 0x02, 0x57, 0x04, 0xC0, 0x01, 0xC1, 0x90, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {50, 800000, 16000, 0x00, 0x7E, 0x01, 0xD9, 0x00, 0xC2, 0x80, 0x00, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0xC7, 0x95, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {64, 1024000, 16000, 0x00, 0x45, 0x24, 0xC0, 0x01, 0xD1, 0x90, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {72, 1152000, 16000, 0x00, 0x45, 0x24, 0xC0, 0x01, 0xD1, 0x90, 0x00, 0x00, 0x23, 0x8F, 0xB7, 0xC0, 0x1F, 0x8F, 0x01, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {96, 1536000, 16000, 0x02, 0x55, 0x84, 0xD0, 0x01, 0xC1, 0x90, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {128, 2048000, 16000, 0x00, 0x51, 0x04, 0xD0, 0x01, 0xC1, 0x90, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {144, 2304000, 16000, 0x00, 0x51, 0x00, 0xC0, 0x01, 0xC1, 0x90, 0x00, 0x00, 0x23, 0x8F, 0xB7, 0xC0, 0x1F, 0x8F, 0x01, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {192, 3072000, 16000, 0x02, 0x65, 0x25, 0xE0, 0x00, 0xE1, 0x90, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {256, 4096000, 16000, 0x00, 0x41, 0x04, 0xC0, 0x01, 0xD1, 0x90, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {300, 4800000, 16000, 0x02, 0x66, 0x01, 0xD9, 0x00, 0xC2, 0x80, 0x00, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0xC7, 0x95, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {384, 6144000, 16000, 0x02, 0x51, 0x04, 0xD0, 0x01, 0xC1, 0x90, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {512, 8192000, 16000, 0x01, 0x41, 0x04, 0xC0, 0x01, 0xD1, 0x90, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {750, 12000000, 16000, 0x0E, 0x7E, 0x01, 0xC9, 0x00, 0xC2, 0x80, 0x40, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0xC7, 0x95, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {768, 12288000, 16000, 0x02, 0x41, 0x04, 0xC0, 0x01, 0xD1, 0x90, 0x40, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {1024, 16384000, 16000, 0x03, 0x41, 0x04, 0xC0, 0x01, 0xD1, 0x90, 0x40, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {1152, 18432000, 16000, 0x08, 0x51, 0x04, 0xD0, 0x01, 0xC1, 0x90, 0x40, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {1200, 19200000, 16000, 0x0B, 0x66, 0x01, 0xD9, 0x00, 0xC2, 0x80, 0x40, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0xC7, 0x95, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {1500, 24000000, 16000, 0x0E, 0x26, 0x01, 0xD9, 0x00, 0xC2, 0x80, 0xC0, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0xC7, 0x95, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {1536, 24576000, 16000, 0x05, 0x41, 0x04, 0xC0, 0x01, 0xD1, 0x90, 0xC0, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0xFF, 0x7F, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {1625, 26000000, 16000, 0x40, 0x6E, 0x05, 0xC8, 0x01, 0xC2, 0x90, 0xC0, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0x63, 0x95, 0x00, 0x12, 0x00, 0x12, 0x31, 0x0E},
+ {800, 19200000, 24000, 0x07, 0x66, 0x01, 0xD9, 0x00, 0xC2, 0x80, 0x40, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0xC7, 0x95, 0x00, 0x12, 0x00, 0x1A, 0x49, 0x14},
+ {600, 19200000, 32000, 0x05, 0x46, 0x01, 0xD8, 0x10, 0xD2, 0x80, 0x40, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0x63, 0x95, 0x00, 0x12, 0x00, 0x23, 0x61, 0x1B},
+ {32, 1411200, 44100, 0x00, 0x45, 0xA4, 0xD0, 0x10, 0xD1, 0x80, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {64, 2822400, 44100, 0x00, 0x51, 0x00, 0xC0, 0x10, 0xC1, 0x80, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {128, 5644800, 44100, 0x00, 0x41, 0x04, 0xD0, 0x10, 0xD1, 0x80, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {256, 11289600, 44100, 0x01, 0x41, 0x04, 0xD0, 0x10, 0xD1, 0x80, 0x40, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {512, 22579200, 44100, 0x03, 0x41, 0x04, 0xD0, 0x10, 0xD1, 0x80, 0xC0, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {32, 1536000, 48000, 0x00, 0x45, 0xA4, 0xD0, 0x10, 0xD1, 0x80, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {48, 2304000, 48000, 0x02, 0x55, 0x04, 0xC0, 0x10, 0xC1, 0x80, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {50, 2400000, 48000, 0x00, 0x76, 0x01, 0xC8, 0x10, 0xC2, 0x80, 0x00, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0x63, 0x95, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {64, 3072000, 48000, 0x00, 0x51, 0x04, 0xC0, 0x10, 0xC1, 0x80, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {100, 4800000, 48000, 0x00, 0x46, 0x01, 0xD8, 0x10, 0xD2, 0x80, 0x00, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0x63, 0x95, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {125, 6000000, 48000, 0x04, 0x6E, 0x05, 0xC8, 0x10, 0xC2, 0x80, 0x00, 0x01, 0x18, 0x95, 0xD0, 0xC0, 0x63, 0x95, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {128, 6144000, 48000, 0x00, 0x41, 0x04, 0xD0, 0x10, 0xD1, 0x80, 0x00, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {200, 9600000, 48000, 0x01, 0x46, 0x01, 0xD8, 0x10, 0xD2, 0x80, 0x00, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0x63, 0x95, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {250, 12000000, 48000, 0x04, 0x76, 0x01, 0xC8, 0x10, 0xC2, 0x80, 0x40, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0x63, 0x95, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {256, 12288000, 48000, 0x01, 0x41, 0x04, 0xD0, 0x10, 0xD1, 0x80, 0x40, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {384, 18432000, 48000, 0x02, 0x41, 0x04, 0xD0, 0x10, 0xD1, 0x80, 0x40, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {400, 19200000, 48000, 0x03, 0x46, 0x01, 0xD8, 0x10, 0xD2, 0x80, 0x40, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0x63, 0x95, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {500, 24000000, 48000, 0x04, 0x46, 0x01, 0xD8, 0x10, 0xD2, 0x80, 0xC0, 0x00, 0x18, 0x95, 0xD0, 0xC0, 0x63, 0x95, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {512, 24576000, 48000, 0x03, 0x41, 0x04, 0xD0, 0x10, 0xD1, 0x80, 0xC0, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {800, 38400000, 48000, 0x18, 0x45, 0x04, 0xC0, 0x10, 0xC1, 0x80, 0xC0, 0x00, 0x1F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x00, 0x12, 0x00, 0x35, 0x91, 0x28},
+ {128, 11289600, 88200, 0x00, 0x50, 0x00, 0xC0, 0x10, 0xC1, 0x80, 0x40, 0x00, 0x9F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x80, 0x12, 0xC0, 0x32, 0x89, 0x25},
+ {64, 6144000, 96000, 0x00, 0x41, 0x00, 0xD0, 0x10, 0xD1, 0x80, 0x00, 0x00, 0x9F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x80, 0x12, 0xC0, 0x35, 0x91, 0x28},
+ {128, 12288000, 96000, 0x00, 0x50, 0x00, 0xC0, 0x10, 0xC1, 0x80, 0xC0, 0x00, 0x9F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x80, 0x12, 0xC0, 0x35, 0x91, 0x28},
+ {256, 24576000, 96000, 0x00, 0x40, 0x00, 0xC0, 0x10, 0xC1, 0x80, 0xC0, 0x00, 0x9F, 0x7F, 0xBF, 0xC0, 0x7F, 0x7F, 0x80, 0x12, 0xC0, 0x35, 0x91, 0x28},
+ {128, 24576000, 192000, 0x00, 0x50, 0x00, 0xC0, 0x18, 0xC1, 0x81, 0xC0, 0x00, 0x8F, 0x7F, 0xEF, 0xC0, 0x3F, 0x7F, 0x80, 0x12, 0xC0, 0x3F, 0xF9, 0x3F},
+
+ {50, 400000, 8000, 0x00, 0x75, 0x05, 0xC8, 0x01, 0xC1, 0x90, 0x10, 0x00, 0x18, 0xC7, 0xD0, 0xC0, 0x8F, 0xC7, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {600, 4800000, 8000, 0x05, 0x65, 0x25, 0xF9, 0x00, 0xD1, 0x90, 0x10, 0x00, 0x18, 0xC7, 0xD0, 0xC0, 0x8F, 0xC7, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {1500, 12000000, 8000, 0x0E, 0x25, 0x25, 0xE8, 0x00, 0xD1, 0x90, 0x40, 0x00, 0x31, 0xC7, 0xC5, 0x00, 0x8F, 0xC7, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {2400, 19200000, 8000, 0x0B, 0x01, 0x00, 0xD0, 0x00, 0xD1, 0x80, 0x90, 0x00, 0x31, 0xC7, 0xC5, 0x00, 0xC7, 0xC7, 0x00, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {3000, 24000000, 8000, 0x0E, 0x24, 0x05, 0xD0, 0x00, 0xC2, 0x80, 0xC0, 0x00, 0x31, 0xC7, 0xC5, 0x00, 0x8F, 0xC7, 0x01, 0x12, 0x00, 0x09, 0x19, 0x07},
+ {3250, 26000000, 8000, 0x40, 0x05, 0xA4, 0xC0, 0x00, 0xD1, 0x80, 0xD0, 0x00, 0x31, 0xC7, 0xC5, 0x00, 0xC7, 0xC7, 0x00, 0x12, 0x00, 0x09, 0x19, 0x07},
+};
+
+static inline int get_coeff(int mclk, int rate)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(coeff_div); i++) {
+ if (coeff_div[i].rate == rate && coeff_div[i].mclk == mclk)
+ return i;
+ }
+ return -EINVAL;
+}
+
+/*
+ * if PLL not be used, use internal clk1 for mclk,otherwise, use internal clk2 for PLL source.
+ */
+static int es8389_set_dai_sysclk(struct snd_soc_dai *dai,
+ int clk_id, unsigned int freq, int dir)
+{
+ struct snd_soc_component *component = dai->component;
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+
+ es8389->sysclk = freq;
+
+ return 0;
+}
+
+static int es8389_set_tdm_slot(struct snd_soc_dai *dai,
+ unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width)
+{
+ struct snd_soc_component *component = dai->component;
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+
+ regmap_update_bits(es8389->regmap, ES8389_PTDM_SLOT,
+ ES8389_TDM_SLOT, (slots << ES8389_TDM_SHIFT));
+ regmap_update_bits(es8389->regmap, ES8389_DAC_RAMP,
+ ES8389_TDM_SLOT, (slots << ES8389_TDM_SHIFT));
+
+ return 0;
+}
+
+static int es8389_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+ struct snd_soc_component *component = dai->component;
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+ u8 state = 0;
+
+ switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) {
+ case SND_SOC_DAIFMT_CBC_CFP:
+ regmap_update_bits(es8389->regmap, ES8389_MASTER_MODE,
+ ES8389_MASTER_MODE_EN, ES8389_MASTER_MODE_EN);
+ es8389->mastermode = 1;
+ break;
+ case SND_SOC_DAIFMT_CBC_CFC:
+ es8389->mastermode = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_I2S:
+ state |= ES8389_DAIFMT_I2S;
+ break;
+ case SND_SOC_DAIFMT_RIGHT_J:
+ dev_err(component->dev, "component driver does not support right justified\n");
+ return -EINVAL;
+ case SND_SOC_DAIFMT_LEFT_J:
+ state |= ES8389_DAIFMT_LEFT_J;
+ break;
+ case SND_SOC_DAIFMT_DSP_A:
+ state |= ES8389_DAIFMT_DSP_A;
+ break;
+ case SND_SOC_DAIFMT_DSP_B:
+ state |= ES8389_DAIFMT_DSP_B;
+ break;
+ default:
+ break;
+ }
+ regmap_update_bits(es8389->regmap, ES8389_ADC_FORMAT_MUTE, ES8389_DAIFMT_MASK, state);
+ regmap_update_bits(es8389->regmap, ES8389_DAC_FORMAT_MUTE, ES8389_DAIFMT_MASK, state);
+
+ return 0;
+}
+
+static int es8389_pcm_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params,
+ struct snd_soc_dai *dai)
+{
+ struct snd_soc_component *component = dai->component;
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+ int coeff;
+ u8 state = 0;
+
+ switch (params_format(params)) {
+ case SNDRV_PCM_FORMAT_S16_LE:
+ state |= ES8389_S16_LE;
+ break;
+ case SNDRV_PCM_FORMAT_S20_3LE:
+ state |= ES8389_S20_3_LE;
+ break;
+ case SNDRV_PCM_FORMAT_S18_3LE:
+ state |= ES8389_S18_LE;
+ break;
+ case SNDRV_PCM_FORMAT_S24_LE:
+ state |= ES8389_S24_LE;
+ break;
+ case SNDRV_PCM_FORMAT_S32_LE:
+ state |= ES8389_S32_LE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ regmap_update_bits(es8389->regmap, ES8389_ADC_FORMAT_MUTE, ES8389_DATA_LEN_MASK, state);
+ regmap_update_bits(es8389->regmap, ES8389_DAC_FORMAT_MUTE, ES8389_DATA_LEN_MASK, state);
+
+ if (es8389->mclk_src == ES8389_SCLK_PIN) {
+ regmap_update_bits(es8389->regmap, ES8389_MASTER_CLK,
+ ES8389_MCLK_SOURCE, es8389->mclk_src);
+ es8389->sysclk = params_channels(params) * params_width(params) * params_rate(params);
+ }
+
+ coeff = get_coeff(es8389->sysclk, params_rate(params));
+ if (coeff >= 0) {
+ regmap_write(es8389->regmap, ES8389_CLK_DIV1, coeff_div[coeff].Reg0x04);
+ regmap_write(es8389->regmap, ES8389_CLK_MUL, coeff_div[coeff].Reg0x05);
+ regmap_write(es8389->regmap, ES8389_CLK_MUX1, coeff_div[coeff].Reg0x06);
+ regmap_write(es8389->regmap, ES8389_CLK_MUX2, coeff_div[coeff].Reg0x07);
+ regmap_write(es8389->regmap, ES8389_CLK_CTL1, coeff_div[coeff].Reg0x08);
+ regmap_write(es8389->regmap, ES8389_CLK_CTL2, coeff_div[coeff].Reg0x09);
+ regmap_write(es8389->regmap, ES8389_CLK_CTL3, coeff_div[coeff].Reg0x0A);
+ regmap_update_bits(es8389->regmap, ES8389_OSC_CLK,
+ 0xC0, coeff_div[coeff].Reg0x0F);
+ regmap_write(es8389->regmap, ES8389_CLK_DIV2, coeff_div[coeff].Reg0x11);
+ regmap_write(es8389->regmap, ES8389_ADC_OSR, coeff_div[coeff].Reg0x21);
+ regmap_write(es8389->regmap, ES8389_ADC_DSP, coeff_div[coeff].Reg0x22);
+ regmap_write(es8389->regmap, ES8389_OSR_VOL, coeff_div[coeff].Reg0x26);
+ regmap_update_bits(es8389->regmap, ES8389_SYSTEM30,
+ 0xC0, coeff_div[coeff].Reg0x30);
+ regmap_write(es8389->regmap, ES8389_DAC_DSM_OSR, coeff_div[coeff].Reg0x41);
+ regmap_write(es8389->regmap, ES8389_DAC_DSP_OSR, coeff_div[coeff].Reg0x42);
+ regmap_update_bits(es8389->regmap, ES8389_DAC_MISC,
+ 0x81, coeff_div[coeff].Reg0x43);
+ regmap_update_bits(es8389->regmap, ES8389_CHIP_MISC,
+ 0x72, coeff_div[coeff].Reg0xF0);
+ regmap_write(es8389->regmap, ES8389_CSM_STATE1, coeff_div[coeff].Reg0xF1);
+ regmap_write(es8389->regmap, ES8389_SYSTEM16, coeff_div[coeff].Reg0x16);
+ regmap_write(es8389->regmap, ES8389_SYSTEM18, coeff_div[coeff].Reg0x18);
+ regmap_write(es8389->regmap, ES8389_SYSTEM19, coeff_div[coeff].Reg0x19);
+ } else {
+ dev_warn(component->dev, "Clock coefficients do not match");
+ }
+
+ return 0;
+}
+
+static int es8389_set_bias_level(struct snd_soc_component *component,
+ enum snd_soc_bias_level level)
+{
+ int ret;
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+
+ switch (level) {
+ case SND_SOC_BIAS_ON:
+ ret = clk_prepare_enable(es8389->mclk);
+ if (ret)
+ return ret;
+
+ regmap_update_bits(es8389->regmap, ES8389_HPSW, 0x20, 0x20);
+ regmap_write(es8389->regmap, ES8389_ANA_CTL1, 0xD9);
+ regmap_write(es8389->regmap, ES8389_ADC_EN, 0x8F);
+ regmap_write(es8389->regmap, ES8389_CSM_JUMP, 0xE4);
+ regmap_write(es8389->regmap, ES8389_RESET, 0x01);
+ regmap_write(es8389->regmap, ES8389_CLK_OFF1, 0xC3);
+ regmap_update_bits(es8389->regmap, ES8389_ADC_HPF1, 0x0f, 0x0a);
+ regmap_update_bits(es8389->regmap, ES8389_ADC_HPF2, 0x0f, 0x0a);
+ usleep_range(70000, 72000);
+ regmap_write(es8389->regmap, ES8389_DAC_RESET, 0X00);
+ break;
+ case SND_SOC_BIAS_PREPARE:
+ break;
+ case SND_SOC_BIAS_STANDBY:
+ regmap_update_bits(es8389->regmap, ES8389_ADC_HPF1, 0x0f, 0x04);
+ regmap_update_bits(es8389->regmap, ES8389_ADC_HPF2, 0x0f, 0x04);
+ regmap_write(es8389->regmap, ES8389_CSM_JUMP, 0xD4);
+ usleep_range(70000, 72000);
+ regmap_write(es8389->regmap, ES8389_ANA_CTL1, 0x59);
+ regmap_write(es8389->regmap, ES8389_ADC_EN, 0x00);
+ regmap_write(es8389->regmap, ES8389_CLK_OFF1, 0x00);
+ regmap_write(es8389->regmap, ES8389_RESET, 0x7E);
+ regmap_update_bits(es8389->regmap, ES8389_DAC_INV, 0x80, 0x80);
+ usleep_range(8000, 8500);
+ regmap_update_bits(es8389->regmap, ES8389_DAC_INV, 0x80, 0x00);
+
+ clk_disable_unprepare(es8389->mclk);
+ break;
+ case SND_SOC_BIAS_OFF:
+ break;
+ }
+ return 0;
+}
+
+
+
+static int es8389_mute(struct snd_soc_dai *dai, int mute, int direction)
+{
+ struct snd_soc_component *component = dai->component;
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+
+ if (mute) {
+ if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
+ regmap_update_bits(es8389->regmap, ES8389_DAC_FORMAT_MUTE,
+ 0x03, 0x03);
+ } else {
+ regmap_update_bits(es8389->regmap, ES8389_ADC_FORMAT_MUTE,
+ 0x03, 0x03);
+ }
+ } else {
+ if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
+ regmap_update_bits(es8389->regmap, ES8389_DAC_FORMAT_MUTE,
+ 0x03, 0x00);
+ } else {
+ regmap_update_bits(es8389->regmap, ES8389_ADC_FORMAT_MUTE,
+ 0x03, 0x00);
+ }
+ }
+
+ return 0;
+}
+
+#define es8389_RATES SNDRV_PCM_RATE_8000_96000
+
+#define es8389_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE |\
+ SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S32_LE)
+
+static const struct snd_soc_dai_ops es8389_ops = {
+ .hw_params = es8389_pcm_hw_params,
+ .set_fmt = es8389_set_dai_fmt,
+ .set_sysclk = es8389_set_dai_sysclk,
+ .set_tdm_slot = es8389_set_tdm_slot,
+ .mute_stream = es8389_mute,
+};
+
+static struct snd_soc_dai_driver es8389_dai = {
+ .name = "ES8389 HiFi",
+ .playback = {
+ .stream_name = "Playback",
+ .channels_min = 1,
+ .channels_max = 2,
+ .rates = es8389_RATES,
+ .formats = es8389_FORMATS,
+ },
+ .capture = {
+ .stream_name = "Capture",
+ .channels_min = 1,
+ .channels_max = 2,
+ .rates = es8389_RATES,
+ .formats = es8389_FORMATS,
+ },
+ .ops = &es8389_ops,
+ .symmetric_rate = 1,
+};
+
+static void es8389_init(struct snd_soc_component *component)
+{
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+
+ regmap_write(es8389->regmap, ES8389_ISO_CTL, 0x00);
+ regmap_write(es8389->regmap, ES8389_RESET, 0x7E);
+ regmap_write(es8389->regmap, ES8389_ISO_CTL, 0x38);
+ regmap_write(es8389->regmap, ES8389_ADC_HPF1, 0x64);
+ regmap_write(es8389->regmap, ES8389_ADC_HPF2, 0x04);
+ regmap_write(es8389->regmap, ES8389_DAC_INV, 0x03);
+
+ regmap_write(es8389->regmap, ES8389_VMID, 0x2A);
+ regmap_write(es8389->regmap, ES8389_ANA_CTL1, 0xC9);
+ regmap_write(es8389->regmap, ES8389_ANA_VSEL, 0x4F);
+ regmap_write(es8389->regmap, ES8389_ANA_CTL2, 0x06);
+ regmap_write(es8389->regmap, ES8389_LOW_POWER1, 0x00);
+ regmap_write(es8389->regmap, ES8389_DMIC_EN, 0x16);
+
+ regmap_write(es8389->regmap, ES8389_PGA_SW, 0xAA);
+ regmap_write(es8389->regmap, ES8389_MOD_SW1, 0x66);
+ regmap_write(es8389->regmap, ES8389_MOD_SW2, 0x99);
+ regmap_write(es8389->regmap, ES8389_ADC_MODE, (0x00 | ES8389_TDM_MODE));
+ regmap_update_bits(es8389->regmap, ES8389_DMIC_EN, 0xC0, 0x00);
+ regmap_update_bits(es8389->regmap, ES8389_ADC_MODE, 0x03, 0x00);
+
+ regmap_update_bits(es8389->regmap, ES8389_MIC1_GAIN,
+ ES8389_MIC_SEL_MASK, ES8389_MIC_DEFAULT);
+ regmap_update_bits(es8389->regmap, ES8389_MIC2_GAIN,
+ ES8389_MIC_SEL_MASK, ES8389_MIC_DEFAULT);
+ regmap_write(es8389->regmap, ES8389_CSM_JUMP, 0xC4);
+ regmap_write(es8389->regmap, ES8389_MASTER_MODE, 0x08);
+ regmap_write(es8389->regmap, ES8389_CSM_STATE1, 0x00);
+ regmap_write(es8389->regmap, ES8389_SYSTEM12, 0x01);
+ regmap_write(es8389->regmap, ES8389_SYSTEM13, 0x01);
+ regmap_write(es8389->regmap, ES8389_SYSTEM14, 0x01);
+ regmap_write(es8389->regmap, ES8389_SYSTEM15, 0x01);
+ regmap_write(es8389->regmap, ES8389_SYSTEM16, 0x35);
+ regmap_write(es8389->regmap, ES8389_SYSTEM17, 0x09);
+ regmap_write(es8389->regmap, ES8389_SYSTEM18, 0x91);
+ regmap_write(es8389->regmap, ES8389_SYSTEM19, 0x28);
+ regmap_write(es8389->regmap, ES8389_SYSTEM1A, 0x01);
+ regmap_write(es8389->regmap, ES8389_SYSTEM1B, 0x01);
+ regmap_write(es8389->regmap, ES8389_SYSTEM1C, 0x11);
+
+ regmap_write(es8389->regmap, ES8389_CHIP_MISC, 0x13);
+ regmap_write(es8389->regmap, ES8389_MASTER_CLK, 0x00);
+ regmap_write(es8389->regmap, ES8389_CLK_DIV1, 0x00);
+ regmap_write(es8389->regmap, ES8389_CLK_MUL, 0x10);
+ regmap_write(es8389->regmap, ES8389_CLK_MUX1, 0x00);
+ regmap_write(es8389->regmap, ES8389_CLK_MUX2, 0xC0);
+ regmap_write(es8389->regmap, ES8389_CLK_CTL1, 0x00);
+ regmap_write(es8389->regmap, ES8389_CLK_CTL2, 0xC0);
+ regmap_write(es8389->regmap, ES8389_CLK_CTL3, 0x80);
+ regmap_write(es8389->regmap, ES8389_SCLK_DIV, 0x04);
+ regmap_write(es8389->regmap, ES8389_LRCK_DIV1, 0x01);
+ regmap_write(es8389->regmap, ES8389_LRCK_DIV2, 0x00);
+ regmap_write(es8389->regmap, ES8389_OSC_CLK, 0x00);
+ regmap_write(es8389->regmap, ES8389_ADC_OSR, 0x1F);
+ regmap_write(es8389->regmap, ES8389_ADC_DSP, 0x7F);
+ regmap_write(es8389->regmap, ES8389_ADC_MUTE, 0xC0);
+ regmap_write(es8389->regmap, ES8389_SYSTEM30, 0xF4);
+ regmap_write(es8389->regmap, ES8389_DAC_DSM_OSR, 0x7F);
+ regmap_write(es8389->regmap, ES8389_DAC_DSP_OSR, 0x7F);
+ regmap_write(es8389->regmap, ES8389_DAC_MISC, 0x10);
+ regmap_write(es8389->regmap, ES8389_DAC_RAMP, 0x0F);
+ regmap_write(es8389->regmap, ES8389_SYSTEM4C, 0xC0);
+ regmap_write(es8389->regmap, ES8389_RESET, 0x00);
+ regmap_write(es8389->regmap, ES8389_CLK_OFF1, 0xC1);
+ regmap_write(es8389->regmap, ES8389_RESET, 0x01);
+ regmap_write(es8389->regmap, ES8389_DAC_RESET, 0x02);
+
+ regmap_update_bits(es8389->regmap, ES8389_ADC_FORMAT_MUTE, 0x03, 0x03);
+ regmap_update_bits(es8389->regmap, ES8389_DAC_FORMAT_MUTE, 0x03, 0x03);
+}
+
+static int es8389_suspend(struct snd_soc_component *component)
+{
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+
+ es8389_set_bias_level(component, SND_SOC_BIAS_STANDBY);
+ regcache_cache_only(es8389->regmap, true);
+ regcache_mark_dirty(es8389->regmap);
+
+ return 0;
+}
+
+static int es8389_resume(struct snd_soc_component *component)
+{
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+ unsigned int regv;
+
+ regcache_cache_only(es8389->regmap, false);
+ regcache_cache_bypass(es8389->regmap, true);
+ regmap_read(es8389->regmap, ES8389_RESET, &regv);
+ regcache_cache_bypass(es8389->regmap, false);
+
+ if (regv == 0xff)
+ es8389_init(component);
+ else
+ es8389_set_bias_level(component, SND_SOC_BIAS_ON);
+
+ regcache_sync(es8389->regmap);
+
+ return 0;
+}
+
+static int es8389_probe(struct snd_soc_component *component)
+{
+ int ret;
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+
+ ret = device_property_read_u8(component->dev, "everest,mclk-src", &es8389->mclk_src);
+ if (ret != 0) {
+ dev_dbg(component->dev, "mclk-src return %d", ret);
+ es8389->mclk_src = ES8389_MCLK_SOURCE;
+ }
+
+ es8389->mclk = devm_clk_get(component->dev, "mclk");
+ if (IS_ERR(es8389->mclk))
+ return dev_err_probe(component->dev, PTR_ERR(es8389->mclk),
+ "ES8389 is unable to get mclk\n");
+
+ if (!es8389->mclk)
+ dev_err(component->dev, "%s, assuming static mclk\n", __func__);
+
+ ret = clk_prepare_enable(es8389->mclk);
+ if (ret) {
+ dev_err(component->dev, "%s, unable to enable mclk\n", __func__);
+ return ret;
+ }
+
+ es8389_init(component);
+ es8389_set_bias_level(component, SND_SOC_BIAS_STANDBY);
+
+ return 0;
+}
+
+static void es8389_remove(struct snd_soc_component *component)
+{
+ struct es8389_private *es8389 = snd_soc_component_get_drvdata(component);
+
+ regmap_write(es8389->regmap, ES8389_MASTER_MODE, 0x28);
+ regmap_write(es8389->regmap, ES8389_HPSW, 0x00);
+ regmap_write(es8389->regmap, ES8389_VMID, 0x00);
+ regmap_write(es8389->regmap, ES8389_RESET, 0x00);
+ regmap_write(es8389->regmap, ES8389_CSM_JUMP, 0xCC);
+ usleep_range(500000, 550000);//500MS
+ regmap_write(es8389->regmap, ES8389_CSM_JUMP, 0x00);
+ regmap_write(es8389->regmap, ES8389_ANA_CTL1, 0x08);
+ regmap_write(es8389->regmap, ES8389_ISO_CTL, 0xC1);
+ regmap_write(es8389->regmap, ES8389_PULL_DOWN, 0x00);
+
+}
+
+static const struct snd_soc_component_driver soc_codec_dev_es8389 = {
+ .probe = es8389_probe,
+ .remove = es8389_remove,
+ .suspend = es8389_suspend,
+ .resume = es8389_resume,
+ .set_bias_level = es8389_set_bias_level,
+
+ .controls = es8389_snd_controls,
+ .num_controls = ARRAY_SIZE(es8389_snd_controls),
+ .dapm_widgets = es8389_dapm_widgets,
+ .num_dapm_widgets = ARRAY_SIZE(es8389_dapm_widgets),
+ .dapm_routes = es8389_dapm_routes,
+ .num_dapm_routes = ARRAY_SIZE(es8389_dapm_routes),
+ .idle_bias_on = 1,
+ .use_pmdown_time = 1,
+};
+
+static const struct regmap_config es8389_regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+
+ .max_register = ES8389_MAX_REGISTER,
+
+ .volatile_reg = es8389_volatile_register,
+ .cache_type = REGCACHE_MAPLE,
+};
+
+static void es8389_i2c_shutdown(struct i2c_client *i2c)
+{
+ struct es8389_private *es8389;
+
+ es8389 = i2c_get_clientdata(i2c);
+
+ regmap_write(es8389->regmap, ES8389_MASTER_MODE, 0x28);
+ regmap_write(es8389->regmap, ES8389_HPSW, 0x00);
+ regmap_write(es8389->regmap, ES8389_VMID, 0x00);
+ regmap_write(es8389->regmap, ES8389_RESET, 0x00);
+ regmap_write(es8389->regmap, ES8389_CSM_JUMP, 0xCC);
+ usleep_range(500000, 550000);//500MS
+ regmap_write(es8389->regmap, ES8389_CSM_JUMP, 0x00);
+ regmap_write(es8389->regmap, ES8389_ANA_CTL1, 0x08);
+ regmap_write(es8389->regmap, ES8389_ISO_CTL, 0xC1);
+ regmap_write(es8389->regmap, ES8389_PULL_DOWN, 0x00);
+}
+
+static int es8389_i2c_probe(struct i2c_client *i2c_client)
+{
+ struct es8389_private *es8389;
+ int ret;
+
+ es8389 = devm_kzalloc(&i2c_client->dev, sizeof(*es8389), GFP_KERNEL);
+ if (es8389 == NULL)
+ return -ENOMEM;
+
+ i2c_set_clientdata(i2c_client, es8389);
+ es8389->regmap = devm_regmap_init_i2c(i2c_client, &es8389_regmap);
+ if (IS_ERR(es8389->regmap))
+ return dev_err_probe(&i2c_client->dev, PTR_ERR(es8389->regmap),
+ "regmap_init() failed\n");
+
+ ret = devm_snd_soc_register_component(&i2c_client->dev,
+ &soc_codec_dev_es8389,
+ &es8389_dai,
+ 1);
+
+ return ret;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id es8389_if_dt_ids[] = {
+ { .compatible = "everest,es8389", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, es8389_if_dt_ids);
+#endif
+
+static const struct i2c_device_id es8389_i2c_id[] = {
+ {"es8389"},
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, es8389_i2c_id);
+
+static struct i2c_driver es8389_i2c_driver = {
+ .driver = {
+ .name = "es8389",
+ .of_match_table = of_match_ptr(es8389_if_dt_ids),
+ },
+ .shutdown = es8389_i2c_shutdown,
+ .probe = es8389_i2c_probe,
+ .id_table = es8389_i2c_id,
+};
+module_i2c_driver(es8389_i2c_driver);
+
+MODULE_DESCRIPTION("ASoC es8389 driver");
+MODULE_AUTHOR("Michael Zhang <zhangyi@everest-semi.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/es8389.h b/sound/soc/codecs/es8389.h
new file mode 100644
index 000000000000..123d1e4b2d53
--- /dev/null
+++ b/sound/soc/codecs/es8389.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+* ES8389.h -- ES8389 ALSA SoC Audio Codec
+*
+* Authors:
+*
+* Based on ES8374.h by Michael Zhang
+*/
+
+#ifndef _ES8389_H
+#define _ES8389_H
+
+/*
+* ES8389_REGISTER NAME_REG_REGISTER ADDRESS
+*/
+#define ES8389_RESET 0x00 /*reset digital,csm,clock manager etc.*/
+
+/*
+* Clock Scheme Register definition
+*/
+#define ES8389_MASTER_MODE 0x01
+#define ES8389_MASTER_CLK 0x02
+#define ES8389_CLK_OFF1 0x03
+#define ES8389_CLK_DIV1 0x04
+#define ES8389_CLK_MUL 0x05
+#define ES8389_CLK_MUX1 0x06
+#define ES8389_CLK_MUX2 0x07
+#define ES8389_CLK_CTL1 0x08
+#define ES8389_CLK_CTL2 0x09
+#define ES8389_CLK_CTL3 0x0A
+#define ES8389_SCLK_DIV 0x0B
+#define ES8389_LRCK_DIV1 0x0C
+#define ES8389_LRCK_DIV2 0x0D
+#define ES8389_CLK_OFF2 0x0E
+#define ES8389_OSC_CLK 0x0F
+#define ES8389_CSM_JUMP 0x10
+#define ES8389_CLK_DIV2 0x11
+#define ES8389_SYSTEM12 0x12
+#define ES8389_SYSTEM13 0x13
+#define ES8389_SYSTEM14 0x14
+#define ES8389_SYSTEM15 0x15
+#define ES8389_SYSTEM16 0x16
+#define ES8389_SYSTEM17 0x17
+#define ES8389_SYSTEM18 0x18
+#define ES8389_SYSTEM19 0x19
+#define ES8389_SYSTEM1A 0x1A
+#define ES8389_SYSTEM1B 0x1B
+#define ES8389_SYSTEM1C 0x1C
+#define ES8389_ADC_FORMAT_MUTE 0x20
+#define ES8389_ADC_OSR 0x21
+#define ES8389_ADC_DSP 0x22
+#define ES8389_ADC_MODE 0x23
+#define ES8389_ADC_HPF1 0x24
+#define ES8389_ADC_HPF2 0x25
+#define ES8389_OSR_VOL 0x26
+#define ES8389_ADCL_VOL 0x27
+#define ES8389_ADCR_VOL 0x28
+#define ES8389_ALC_CTL 0x29
+#define ES8389_PTDM_SLOT 0x2A
+#define ES8389_ALC_ON 0x2B
+#define ES8389_ALC_TARGET 0x2C
+#define ES8389_ALC_GAIN 0x2D
+#define ES8389_SYSTEM2E 0x2E
+#define ES8389_ADC_MUTE 0x2F
+#define ES8389_SYSTEM30 0x30
+#define ES8389_ADC_RESET 0x31
+#define ES8389_DAC_FORMAT_MUTE 0x40
+#define ES8389_DAC_DSM_OSR 0x41
+#define ES8389_DAC_DSP_OSR 0x42
+#define ES8389_DAC_MISC 0x43
+#define ES8389_DAC_MIX 0x44
+#define ES8389_DAC_INV 0x45
+#define ES8389_DACL_VOL 0x46
+#define ES8389_DACR_VOL 0x47
+#define ES8389_MIX_VOL 0x48
+#define ES8389_DAC_RAMP 0x49
+#define ES8389_SYSTEM4C 0x4C
+#define ES8389_DAC_RESET 0x4D
+#define ES8389_VMID 0x60
+#define ES8389_ANA_CTL1 0x61
+#define ES8389_ANA_VSEL 0x62
+#define ES8389_ANA_CTL2 0x63
+#define ES8389_ADC_EN 0x64
+#define ES8389_HPSW 0x69
+#define ES8389_LOW_POWER1 0x6B
+#define ES8389_LOW_POWER2 0x6C
+#define ES8389_DMIC_EN 0x6D
+#define ES8389_PGA_SW 0x6E
+#define ES8389_MOD_SW1 0x6F
+#define ES8389_MOD_SW2 0x70
+#define ES8389_MOD_SW3 0x71
+#define ES8389_MIC1_GAIN 0x72
+#define ES8389_MIC2_GAIN 0x73
+
+#define ES8389_CHIP_MISC 0xF0
+#define ES8389_CSM_STATE1 0xF1
+#define ES8389_PULL_DOWN 0xF2
+#define ES8389_ISO_CTL 0xF3
+#define ES8389_CSM_STATE2 0xF4
+
+#define ES8389_CHIP_ID0 0xFD
+#define ES8389_CHIP_ID1 0xFE
+
+#define ES8389_MAX_REGISTER 0xFF
+
+#define ES8389_MIC_SEL_MASK (7 << 4)
+#define ES8389_MIC_DEFAULT (1 << 4)
+
+#define ES8389_MASTER_MODE_EN (1 << 0)
+
+#define ES8389_TDM_OFF (0 << 0)
+#define ES8389_STDM_ON (1 << 7)
+#define ES8389_PTDM_ON (1 << 6)
+
+#define ES8389_TDM_MODE ES8389_TDM_OFF
+#define ES8389_TDM_SLOT (0x70 << 0)
+#define ES8389_TDM_SHIFT 4
+
+#define ES8389_MCLK_SOURCE (1 << 6)
+#define ES8389_MCLK_PIN (1 << 6)
+#define ES8389_SCLK_PIN (0 << 6)
+
+/* ES8389_FMT */
+#define ES8389_S24_LE (0 << 5)
+#define ES8389_S20_3_LE (1 << 5)
+#define ES8389_S18_LE (2 << 5)
+#define ES8389_S16_LE (3 << 5)
+#define ES8389_S32_LE (4 << 5)
+#define ES8389_DATA_LEN_MASK (7 << 5)
+
+#define ES8389_DAIFMT_MASK (7 << 2)
+#define ES8389_DAIFMT_I2S 0
+#define ES8389_DAIFMT_LEFT_J (1 << 2)
+#define ES8389_DAIFMT_DSP_A (1 << 3)
+#define ES8389_DAIFMT_DSP_B (3 << 3)
+
+#define ES8389_STATE_ON (13 << 0)
+#define ES8389_STATE_STANDBY (7 << 0)
+
+#endif
diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index bc01ff65bd6f..31121f9c18c9 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -1026,6 +1026,7 @@ static const struct snd_soc_dai_ops hdmi_codec_spdif_dai_ops = {
.startup = hdmi_codec_startup,
.shutdown = hdmi_codec_shutdown,
.hw_params = hdmi_codec_hw_params,
+ .prepare = hdmi_codec_prepare,
.mute_stream = hdmi_codec_mute,
.pcm_new = hdmi_codec_pcm_new,
};
diff --git a/sound/soc/codecs/idt821034.c b/sound/soc/codecs/idt821034.c
index cb7a68c799f8..55e90604bbaa 100644
--- a/sound/soc/codecs/idt821034.c
+++ b/sound/soc/codecs/idt821034.c
@@ -957,7 +957,8 @@ static const struct snd_soc_component_driver idt821034_component_driver = {
#define IDT821034_GPIO_OFFSET_TO_SLIC_CHANNEL(_offset) (((_offset) / 5) % 4)
#define IDT821034_GPIO_OFFSET_TO_SLIC_MASK(_offset) BIT((_offset) % 5)
-static void idt821034_chip_gpio_set(struct gpio_chip *c, unsigned int offset, int val)
+static int idt821034_chip_gpio_set(struct gpio_chip *c, unsigned int offset,
+ int val)
{
u8 ch = IDT821034_GPIO_OFFSET_TO_SLIC_CHANNEL(offset);
u8 mask = IDT821034_GPIO_OFFSET_TO_SLIC_MASK(offset);
@@ -973,12 +974,14 @@ static void idt821034_chip_gpio_set(struct gpio_chip *c, unsigned int offset, in
else
slic_raw &= ~mask;
ret = idt821034_write_slic_raw(idt821034, ch, slic_raw);
- if (ret) {
+
+ mutex_unlock(&idt821034->mutex);
+
+ if (ret)
dev_err(&idt821034->spi->dev, "set gpio %d (%u, 0x%x) failed (%d)\n",
offset, ch, mask, ret);
- }
- mutex_unlock(&idt821034->mutex);
+ return ret;
}
static int idt821034_chip_gpio_get(struct gpio_chip *c, unsigned int offset)
@@ -1054,7 +1057,9 @@ static int idt821034_chip_direction_output(struct gpio_chip *c, unsigned int off
u8 slic_conf;
int ret;
- idt821034_chip_gpio_set(c, offset, val);
+ ret = idt821034_chip_gpio_set(c, offset, val);
+ if (ret)
+ return ret;
mutex_lock(&idt821034->mutex);
@@ -1112,7 +1117,7 @@ static int idt821034_gpio_init(struct idt821034 *idt821034)
idt821034->gpio_chip.direction_input = idt821034_chip_direction_input;
idt821034->gpio_chip.direction_output = idt821034_chip_direction_output;
idt821034->gpio_chip.get = idt821034_chip_gpio_get;
- idt821034->gpio_chip.set = idt821034_chip_gpio_set;
+ idt821034->gpio_chip.set_rv = idt821034_chip_gpio_set;
idt821034->gpio_chip.can_sleep = true;
return devm_gpiochip_add_data(&idt821034->spi->dev, &idt821034->gpio_chip,
diff --git a/sound/soc/codecs/pcm6240.c b/sound/soc/codecs/pcm6240.c
index b2bd2f172ae7..75af12231d1d 100644
--- a/sound/soc/codecs/pcm6240.c
+++ b/sound/soc/codecs/pcm6240.c
@@ -1642,8 +1642,7 @@ static int pcmdevice_comp_probe(struct snd_soc_component *comp)
}
ret = pcmdev_profile_ctrl_add(pcm_dev);
out:
- if (fw_entry)
- release_firmware(fw_entry);
+ release_firmware(fw_entry);
mutex_unlock(&pcm_dev->codec_lock);
return ret;
diff --git a/sound/soc/codecs/peb2466.c b/sound/soc/codecs/peb2466.c
index a989cfe058f0..b8905c03445e 100644
--- a/sound/soc/codecs/peb2466.c
+++ b/sound/soc/codecs/peb2466.c
@@ -1726,7 +1726,8 @@ end:
return ret;
}
-static void peb2466_chip_gpio_set(struct gpio_chip *c, unsigned int offset, int val)
+static int peb2466_chip_gpio_set(struct gpio_chip *c, unsigned int offset,
+ int val)
{
struct peb2466 *peb2466 = gpiochip_get_data(c);
unsigned int xr_reg;
@@ -1740,14 +1741,14 @@ static void peb2466_chip_gpio_set(struct gpio_chip *c, unsigned int offset, int
*/
dev_warn(&peb2466->spi->dev, "cannot set gpio %d (read-only)\n",
offset);
- return;
+ return -EINVAL;
}
ret = peb2466_chip_gpio_offset_to_data_regmask(offset, &xr_reg, &mask);
if (ret) {
dev_err(&peb2466->spi->dev, "cannot set gpio %d (%d)\n",
offset, ret);
- return;
+ return ret;
}
ret = peb2466_chip_gpio_update_bits(peb2466, xr_reg, mask, val ? mask : 0);
@@ -1755,6 +1756,8 @@ static void peb2466_chip_gpio_set(struct gpio_chip *c, unsigned int offset, int
dev_err(&peb2466->spi->dev, "set gpio %d (0x%x, 0x%x) failed (%d)\n",
offset, xr_reg, mask, ret);
}
+
+ return ret;
}
static int peb2466_chip_gpio_get(struct gpio_chip *c, unsigned int offset)
@@ -1879,7 +1882,9 @@ static int peb2466_chip_direction_output(struct gpio_chip *c, unsigned int offse
return -EINVAL;
}
- peb2466_chip_gpio_set(c, offset, val);
+ ret = peb2466_chip_gpio_set(c, offset, val);
+ if (ret)
+ return ret;
if (offset < 16) {
/* SOx_{0,1} */
@@ -1940,7 +1945,7 @@ static int peb2466_gpio_init(struct peb2466 *peb2466)
peb2466->gpio.gpio_chip.direction_input = peb2466_chip_direction_input;
peb2466->gpio.gpio_chip.direction_output = peb2466_chip_direction_output;
peb2466->gpio.gpio_chip.get = peb2466_chip_gpio_get;
- peb2466->gpio.gpio_chip.set = peb2466_chip_gpio_set;
+ peb2466->gpio.gpio_chip.set_rv = peb2466_chip_gpio_set;
peb2466->gpio.gpio_chip.can_sleep = true;
return devm_gpiochip_add_data(&peb2466->spi->dev, &peb2466->gpio.gpio_chip,
diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c
index bcb6d7c6f301..b16b2c66e754 100644
--- a/sound/soc/codecs/rt5665.c
+++ b/sound/soc/codecs/rt5665.c
@@ -1022,102 +1022,6 @@ static int rt5665_mono_vol_put(struct snd_kcontrol *kcontrol,
return ret;
}
-/**
- * rt5665_sel_asrc_clk_src - select ASRC clock source for a set of filters
- * @component: SoC audio component device.
- * @filter_mask: mask of filters.
- * @clk_src: clock source
- *
- * The ASRC function is for asynchronous MCLK and LRCK. Also, since RT5665 can
- * only support standard 32fs or 64fs i2s format, ASRC should be enabled to
- * support special i2s clock format such as Intel's 100fs(100 * sampling rate).
- * ASRC function will track i2s clock and generate a corresponding system clock
- * for codec. This function provides an API to select the clock source for a
- * set of filters specified by the mask. And the codec driver will turn on ASRC
- * for these filters if ASRC is selected as their clock source.
- */
-int rt5665_sel_asrc_clk_src(struct snd_soc_component *component,
- unsigned int filter_mask, unsigned int clk_src)
-{
- unsigned int asrc2_mask = 0;
- unsigned int asrc2_value = 0;
- unsigned int asrc3_mask = 0;
- unsigned int asrc3_value = 0;
-
- switch (clk_src) {
- case RT5665_CLK_SEL_SYS:
- case RT5665_CLK_SEL_I2S1_ASRC:
- case RT5665_CLK_SEL_I2S2_ASRC:
- case RT5665_CLK_SEL_I2S3_ASRC:
- case RT5665_CLK_SEL_SYS2:
- case RT5665_CLK_SEL_SYS3:
- case RT5665_CLK_SEL_SYS4:
- break;
-
- default:
- return -EINVAL;
- }
-
- if (filter_mask & RT5665_DA_STEREO1_FILTER) {
- asrc2_mask |= RT5665_DA_STO1_CLK_SEL_MASK;
- asrc2_value = (asrc2_value & ~RT5665_DA_STO1_CLK_SEL_MASK)
- | (clk_src << RT5665_DA_STO1_CLK_SEL_SFT);
- }
-
- if (filter_mask & RT5665_DA_STEREO2_FILTER) {
- asrc2_mask |= RT5665_DA_STO2_CLK_SEL_MASK;
- asrc2_value = (asrc2_value & ~RT5665_DA_STO2_CLK_SEL_MASK)
- | (clk_src << RT5665_DA_STO2_CLK_SEL_SFT);
- }
-
- if (filter_mask & RT5665_DA_MONO_L_FILTER) {
- asrc2_mask |= RT5665_DA_MONOL_CLK_SEL_MASK;
- asrc2_value = (asrc2_value & ~RT5665_DA_MONOL_CLK_SEL_MASK)
- | (clk_src << RT5665_DA_MONOL_CLK_SEL_SFT);
- }
-
- if (filter_mask & RT5665_DA_MONO_R_FILTER) {
- asrc2_mask |= RT5665_DA_MONOR_CLK_SEL_MASK;
- asrc2_value = (asrc2_value & ~RT5665_DA_MONOR_CLK_SEL_MASK)
- | (clk_src << RT5665_DA_MONOR_CLK_SEL_SFT);
- }
-
- if (filter_mask & RT5665_AD_STEREO1_FILTER) {
- asrc3_mask |= RT5665_AD_STO1_CLK_SEL_MASK;
- asrc3_value = (asrc2_value & ~RT5665_AD_STO1_CLK_SEL_MASK)
- | (clk_src << RT5665_AD_STO1_CLK_SEL_SFT);
- }
-
- if (filter_mask & RT5665_AD_STEREO2_FILTER) {
- asrc3_mask |= RT5665_AD_STO2_CLK_SEL_MASK;
- asrc3_value = (asrc2_value & ~RT5665_AD_STO2_CLK_SEL_MASK)
- | (clk_src << RT5665_AD_STO2_CLK_SEL_SFT);
- }
-
- if (filter_mask & RT5665_AD_MONO_L_FILTER) {
- asrc3_mask |= RT5665_AD_MONOL_CLK_SEL_MASK;
- asrc3_value = (asrc3_value & ~RT5665_AD_MONOL_CLK_SEL_MASK)
- | (clk_src << RT5665_AD_MONOL_CLK_SEL_SFT);
- }
-
- if (filter_mask & RT5665_AD_MONO_R_FILTER) {
- asrc3_mask |= RT5665_AD_MONOR_CLK_SEL_MASK;
- asrc3_value = (asrc3_value & ~RT5665_AD_MONOR_CLK_SEL_MASK)
- | (clk_src << RT5665_AD_MONOR_CLK_SEL_SFT);
- }
-
- if (asrc2_mask)
- snd_soc_component_update_bits(component, RT5665_ASRC_2,
- asrc2_mask, asrc2_value);
-
- if (asrc3_mask)
- snd_soc_component_update_bits(component, RT5665_ASRC_3,
- asrc3_mask, asrc3_value);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(rt5665_sel_asrc_clk_src);
-
static int rt5665_button_detect(struct snd_soc_component *component)
{
int btn_type, val;
diff --git a/sound/soc/codecs/rt5665.h b/sound/soc/codecs/rt5665.h
index 12ab28e5f10d..089e4078d37a 100644
--- a/sound/soc/codecs/rt5665.h
+++ b/sound/soc/codecs/rt5665.h
@@ -1999,7 +1999,4 @@ enum {
RT5665_CLK_SEL_SYS4,
};
-int rt5665_sel_asrc_clk_src(struct snd_soc_component *component,
- unsigned int filter_mask, unsigned int clk_src);
-
#endif /* __RT5665_H__ */
diff --git a/sound/soc/codecs/rt5668.c b/sound/soc/codecs/rt5668.c
index f626453f332b..8442dd09cfaf 100644
--- a/sound/soc/codecs/rt5668.c
+++ b/sound/soc/codecs/rt5668.c
@@ -799,49 +799,6 @@ static void rt5668_reset(struct regmap *regmap)
regmap_write(regmap, RT5668_RESET, 0);
regmap_write(regmap, RT5668_I2C_MODE, 1);
}
-/**
- * rt5668_sel_asrc_clk_src - select ASRC clock source for a set of filters
- * @component: SoC audio component device.
- * @filter_mask: mask of filters.
- * @clk_src: clock source
- *
- * The ASRC function is for asynchronous MCLK and LRCK. Also, since RT5668 can
- * only support standard 32fs or 64fs i2s format, ASRC should be enabled to
- * support special i2s clock format such as Intel's 100fs(100 * sampling rate).
- * ASRC function will track i2s clock and generate a corresponding system clock
- * for codec. This function provides an API to select the clock source for a
- * set of filters specified by the mask. And the component driver will turn on
- * ASRC for these filters if ASRC is selected as their clock source.
- */
-int rt5668_sel_asrc_clk_src(struct snd_soc_component *component,
- unsigned int filter_mask, unsigned int clk_src)
-{
-
- switch (clk_src) {
- case RT5668_CLK_SEL_SYS:
- case RT5668_CLK_SEL_I2S1_ASRC:
- case RT5668_CLK_SEL_I2S2_ASRC:
- break;
-
- default:
- return -EINVAL;
- }
-
- if (filter_mask & RT5668_DA_STEREO1_FILTER) {
- snd_soc_component_update_bits(component, RT5668_PLL_TRACK_2,
- RT5668_FILTER_CLK_SEL_MASK,
- clk_src << RT5668_FILTER_CLK_SEL_SFT);
- }
-
- if (filter_mask & RT5668_AD_STEREO1_FILTER) {
- snd_soc_component_update_bits(component, RT5668_PLL_TRACK_3,
- RT5668_FILTER_CLK_SEL_MASK,
- clk_src << RT5668_FILTER_CLK_SEL_SFT);
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(rt5668_sel_asrc_clk_src);
static int rt5668_button_detect(struct snd_soc_component *component)
{
diff --git a/sound/soc/codecs/rt5668.h b/sound/soc/codecs/rt5668.h
index 6b851ddcc58a..b34a61d2109c 100644
--- a/sound/soc/codecs/rt5668.h
+++ b/sound/soc/codecs/rt5668.h
@@ -1309,7 +1309,4 @@ enum {
RT5668_CLK_SEL_I2S2_ASRC,
};
-int rt5668_sel_asrc_clk_src(struct snd_soc_component *component,
- unsigned int filter_mask, unsigned int clk_src);
-
#endif /* __RT5668_H__ */
diff --git a/sound/soc/codecs/rt5677-spi.c b/sound/soc/codecs/rt5677-spi.c
index abe0a5a95770..885edcf0a3a5 100644
--- a/sound/soc/codecs/rt5677-spi.c
+++ b/sound/soc/codecs/rt5677-spi.c
@@ -365,8 +365,8 @@ static void rt5677_spi_copy_work(struct work_struct *work)
new_bytes -= copy_bytes;
}
- delay = bytes_to_frames(runtime, period_bytes) / (runtime->rate / 1000);
- schedule_delayed_work(&rt5677_dsp->copy_work, msecs_to_jiffies(delay));
+ delay = bytes_to_frames(runtime, period_bytes) / runtime->rate;
+ schedule_delayed_work(&rt5677_dsp->copy_work, secs_to_jiffies(delay));
done:
mutex_unlock(&rt5677_dsp->dma_lock);
}
diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c
index 6e4774148566..69a0fb8d7f77 100644
--- a/sound/soc/codecs/rt5677.c
+++ b/sound/soc/codecs/rt5677.c
@@ -4725,13 +4725,14 @@ static int rt5677_update_gpio_bits(struct rt5677_priv *rt5677, unsigned offset,
}
#ifdef CONFIG_GPIOLIB
-static void rt5677_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int rt5677_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct rt5677_priv *rt5677 = gpiochip_get_data(chip);
int level = value ? RT5677_GPIOx_OUT_HI : RT5677_GPIOx_OUT_LO;
int m = RT5677_GPIOx_OUT_MASK;
- rt5677_update_gpio_bits(rt5677, offset, m, level);
+ return rt5677_update_gpio_bits(rt5677, offset, m, level);
}
static int rt5677_gpio_direction_out(struct gpio_chip *chip,
@@ -4834,7 +4835,7 @@ static const struct gpio_chip rt5677_template_chip = {
.label = RT5677_DRV_NAME,
.owner = THIS_MODULE,
.direction_output = rt5677_gpio_direction_out,
- .set = rt5677_gpio_set,
+ .set_rv = rt5677_gpio_set,
.direction_input = rt5677_gpio_direction_in,
.get = rt5677_gpio_get,
.to_irq = rt5677_to_irq,
diff --git a/sound/soc/codecs/rt712-sdca-dmic.c b/sound/soc/codecs/rt712-sdca-dmic.c
index db011da63bd9..4d044dfa3136 100644
--- a/sound/soc/codecs/rt712-sdca-dmic.c
+++ b/sound/soc/codecs/rt712-sdca-dmic.c
@@ -263,12 +263,8 @@ static int rt712_sdca_dmic_set_gain_get(struct snd_kcontrol *kcontrol,
if (!adc_vol_flag) /* boost gain */
ctl = regvalue / 0x0a00;
- else { /* ADC gain */
- if (adc_vol_flag)
- ctl = p->max - (((0x1e00 - regvalue) & 0xffff) / interval_offset);
- else
- ctl = p->max - (((0 - regvalue) & 0xffff) / interval_offset);
- }
+ else /* ADC gain */
+ ctl = p->max - (((0x1e00 - regvalue) & 0xffff) / interval_offset);
ucontrol->value.integer.value[i] = ctl;
}
diff --git a/sound/soc/codecs/rt712-sdca.c b/sound/soc/codecs/rt712-sdca.c
index 19d99b9d4ab2..570c2af1245d 100644
--- a/sound/soc/codecs/rt712-sdca.c
+++ b/sound/soc/codecs/rt712-sdca.c
@@ -1065,12 +1065,8 @@ static int rt712_sdca_dmic_set_gain_get(struct snd_kcontrol *kcontrol,
if (!adc_vol_flag) /* boost gain */
ctl = regvalue / 0x0a00;
- else { /* ADC gain */
- if (adc_vol_flag)
- ctl = p->max - (((0x1e00 - regvalue) & 0xffff) / interval_offset);
- else
- ctl = p->max - (((0 - regvalue) & 0xffff) / interval_offset);
- }
+ else /* ADC gain */
+ ctl = p->max - (((0x1e00 - regvalue) & 0xffff) / interval_offset);
ucontrol->value.integer.value[i] = ctl;
}
diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c
index 11e2e8f68a98..609ca0d6c83a 100644
--- a/sound/soc/codecs/rt722-sdca-sdw.c
+++ b/sound/soc/codecs/rt722-sdca-sdw.c
@@ -24,6 +24,7 @@ static int rt722_sdca_mbq_size(struct device *dev, unsigned int reg)
case 0x2f50:
case 0x2f54:
case 0x2f58 ... 0x2f5d:
+ case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0):
case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_GE49, RT722_SDCA_CTL_SELECTED_MODE,
0):
case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_GE49, RT722_SDCA_CTL_DETECTED_MODE,
@@ -42,8 +43,12 @@ static int rt722_sdca_mbq_size(struct device *dev, unsigned int reg)
RT722_SDCA_CTL_FU_MUTE, CH_R):
case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE40,
RT722_SDCA_CTL_REQ_POWER_STATE, 0):
+ case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE40,
+ RT722_SDCA_CTL_ACTUAL_POWER_STATE, 0):
case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE12,
RT722_SDCA_CTL_REQ_POWER_STATE, 0):
+ case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE12,
+ RT722_SDCA_CTL_ACTUAL_POWER_STATE, 0):
case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_CS01,
RT722_SDCA_CTL_SAMPLE_FREQ_INDEX, 0):
case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_CS11,
@@ -56,6 +61,9 @@ static int rt722_sdca_mbq_size(struct device *dev, unsigned int reg)
RT722_SDCA_CTL_VENDOR_DEF, 0):
case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_PDE2A,
RT722_SDCA_CTL_REQ_POWER_STATE, 0):
+ case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_PDE2A,
+ RT722_SDCA_CTL_ACTUAL_POWER_STATE, 0):
+ case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0):
case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_CS1F,
RT722_SDCA_CTL_SAMPLE_FREQ_INDEX, 0):
case SDW_SDCA_CTL(FUNC_NUM_HID, RT722_SDCA_ENT_HID01,
@@ -70,6 +78,9 @@ static int rt722_sdca_mbq_size(struct device *dev, unsigned int reg)
RT722_SDCA_CTL_VENDOR_DEF, CH_08):
case SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT_PDE23,
RT722_SDCA_CTL_REQ_POWER_STATE, 0):
+ case SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT_PDE23,
+ RT722_SDCA_CTL_ACTUAL_POWER_STATE, 0):
+ case SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0):
case SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT_CS31,
RT722_SDCA_CTL_SAMPLE_FREQ_INDEX, 0):
case RT722_BUF_ADDR_HID1 ... RT722_BUF_ADDR_HID2:
@@ -150,11 +161,18 @@ static bool rt722_sdca_volatile_register(struct device *dev, unsigned int reg)
switch (reg) {
case 0x2f01:
case 0x2f54:
+ case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0):
+ case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE12, RT722_SDCA_CTL_ACTUAL_POWER_STATE, 0):
+ case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE40, RT722_SDCA_CTL_ACTUAL_POWER_STATE, 0):
case SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_GE49, RT722_SDCA_CTL_DETECTED_MODE,
0):
+ case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0):
+ case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_PDE2A, RT722_SDCA_CTL_ACTUAL_POWER_STATE, 0):
case SDW_SDCA_CTL(FUNC_NUM_HID, RT722_SDCA_ENT_HID01, RT722_SDCA_CTL_HIDTX_CURRENT_OWNER,
0) ... SDW_SDCA_CTL(FUNC_NUM_HID, RT722_SDCA_ENT_HID01,
RT722_SDCA_CTL_HIDTX_MESSAGE_LENGTH, 0):
+ case SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0):
+ case SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT_PDE23, RT722_SDCA_CTL_ACTUAL_POWER_STATE, 0):
case RT722_BUF_ADDR_HID1 ... RT722_BUF_ADDR_HID2:
case 0x2000000:
case 0x200000d:
@@ -166,6 +184,8 @@ static bool rt722_sdca_volatile_register(struct device *dev, unsigned int reg)
case 0x2000084:
case 0x2000086:
case 0x3110000:
+ case 0x5800003:
+ case 0x5810000:
return true;
default:
return false;
diff --git a/sound/soc/codecs/rt722-sdca-sdw.h b/sound/soc/codecs/rt722-sdca-sdw.h
index 80b014456940..c5dd472a2c00 100644
--- a/sound/soc/codecs/rt722-sdca-sdw.h
+++ b/sound/soc/codecs/rt722-sdca-sdw.h
@@ -34,6 +34,7 @@ static const struct reg_default rt722_sdca_reg_defaults[] = {
{ 0x200003c, 0xc214 },
{ 0x2000046, 0x8004 },
{ 0x5810000, 0x702d },
+ { 0x6100000, 0x0201 },
{ 0x6100006, 0x0005 },
{ 0x6100010, 0x2630 },
{ 0x6100011, 0x152f },
diff --git a/sound/soc/codecs/rt722-sdca.c b/sound/soc/codecs/rt722-sdca.c
index f093ce841b3f..ac9588284a95 100644
--- a/sound/soc/codecs/rt722-sdca.c
+++ b/sound/soc/codecs/rt722-sdca.c
@@ -842,6 +842,7 @@ static int rt722_sdca_fu113_event(struct snd_soc_dapm_widget *w,
case SND_SOC_DAPM_POST_PMU:
rt722->fu1e_dapm_mute = false;
rt722_sdca_set_fu1e_capture_ctl(rt722);
+ usleep_range(150000, 160000);
break;
case SND_SOC_DAPM_PRE_PMD:
rt722->fu1e_dapm_mute = true;
@@ -871,6 +872,28 @@ static int rt722_sdca_fu36_event(struct snd_soc_dapm_widget *w,
return 0;
}
+static void rt722_pde_transition_delay(struct rt722_sdca_priv *rt722, unsigned char func,
+ unsigned char entity, unsigned char ps)
+{
+ unsigned int delay = 1000, val;
+
+ pm_runtime_mark_last_busy(&rt722->slave->dev);
+
+ /* waiting for Actual PDE becomes to PS0/PS3 */
+ while (delay) {
+ regmap_read(rt722->regmap,
+ SDW_SDCA_CTL(func, entity, RT722_SDCA_CTL_ACTUAL_POWER_STATE, 0), &val);
+ if (val == ps)
+ break;
+
+ usleep_range(1000, 1500);
+ delay--;
+ }
+ if (!delay) {
+ dev_warn(&rt722->slave->dev, "%s PDE to %s is NOT ready", __func__, ps?"PS3":"PS0");
+ }
+}
+
static int rt722_sdca_pde47_event(struct snd_soc_dapm_widget *w,
struct snd_kcontrol *kcontrol, int event)
{
@@ -884,11 +907,13 @@ static int rt722_sdca_pde47_event(struct snd_soc_dapm_widget *w,
regmap_write(rt722->regmap,
SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE40,
RT722_SDCA_CTL_REQ_POWER_STATE, 0), ps0);
+ rt722_pde_transition_delay(rt722, FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE40, ps0);
break;
case SND_SOC_DAPM_PRE_PMD:
regmap_write(rt722->regmap,
SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE40,
RT722_SDCA_CTL_REQ_POWER_STATE, 0), ps3);
+ rt722_pde_transition_delay(rt722, FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE40, ps3);
break;
}
return 0;
@@ -907,11 +932,13 @@ static int rt722_sdca_pde23_event(struct snd_soc_dapm_widget *w,
regmap_write(rt722->regmap,
SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT_PDE23,
RT722_SDCA_CTL_REQ_POWER_STATE, 0), ps0);
+ rt722_pde_transition_delay(rt722, FUNC_NUM_AMP, RT722_SDCA_ENT_PDE23, ps0);
break;
case SND_SOC_DAPM_PRE_PMD:
regmap_write(rt722->regmap,
SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT_PDE23,
RT722_SDCA_CTL_REQ_POWER_STATE, 0), ps3);
+ rt722_pde_transition_delay(rt722, FUNC_NUM_AMP, RT722_SDCA_ENT_PDE23, ps3);
break;
}
return 0;
@@ -930,11 +957,13 @@ static int rt722_sdca_pde11_event(struct snd_soc_dapm_widget *w,
regmap_write(rt722->regmap,
SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_PDE2A,
RT722_SDCA_CTL_REQ_POWER_STATE, 0), ps0);
+ rt722_pde_transition_delay(rt722, FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_PDE2A, ps0);
break;
case SND_SOC_DAPM_PRE_PMD:
regmap_write(rt722->regmap,
SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_PDE2A,
RT722_SDCA_CTL_REQ_POWER_STATE, 0), ps3);
+ rt722_pde_transition_delay(rt722, FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_PDE2A, ps3);
break;
}
return 0;
@@ -953,11 +982,13 @@ static int rt722_sdca_pde12_event(struct snd_soc_dapm_widget *w,
regmap_write(rt722->regmap,
SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE12,
RT722_SDCA_CTL_REQ_POWER_STATE, 0), ps0);
+ rt722_pde_transition_delay(rt722, FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE12, ps0);
break;
case SND_SOC_DAPM_PRE_PMD:
regmap_write(rt722->regmap,
SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE12,
RT722_SDCA_CTL_REQ_POWER_STATE, 0), ps3);
+ rt722_pde_transition_delay(rt722, FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT_PDE12, ps3);
break;
}
return 0;
@@ -1278,6 +1309,8 @@ int rt722_sdca_init(struct device *dev, struct regmap *regmap, struct sdw_slave
rt722->slave = slave;
rt722->regmap = regmap;
+ regcache_cache_only(rt722->regmap, true);
+
mutex_init(&rt722->calibrate_mutex);
mutex_init(&rt722->disable_irq_lock);
@@ -1302,140 +1335,183 @@ int rt722_sdca_init(struct device *dev, struct regmap *regmap, struct sdw_slave
static void rt722_sdca_dmic_preset(struct rt722_sdca_priv *rt722)
{
- /* Set AD07 power entity floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
- RT722_ADC0A_08_PDE_FLOAT_CTL, 0x2a29);
- /* Set AD10 power entity floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
- RT722_ADC10_PDE_FLOAT_CTL, 0x2a00);
- /* Set DMIC1/DMIC2 power entity floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
- RT722_DMIC1_2_PDE_FLOAT_CTL, 0x2a2a);
- /* Set DMIC2 IT entity floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
- RT722_DMIC_ENT_FLOAT_CTL, 0x2626);
- /* Set AD10 FU entity floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
- RT722_ADC_ENT_FLOAT_CTL, 0x1e00);
- /* Set DMIC2 FU entity floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
- RT722_DMIC_GAIN_ENT_FLOAT_CTL0, 0x1515);
- /* Set AD10 FU channel floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
- RT722_ADC_VOL_CH_FLOAT_CTL, 0x0304);
- /* Set DMIC2 FU channel floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
- RT722_DMIC_GAIN_ENT_FLOAT_CTL2, 0x0304);
- /* vf71f_r12_07_06 and vf71f_r13_07_06 = 2’b00 */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
- RT722_HDA_LEGACY_CONFIG_CTL0, 0x0000);
- /* Enable vf707_r12_05/vf707_r13_05 */
- regmap_write(rt722->regmap,
- SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_IT26,
- RT722_SDCA_CTL_VENDOR_DEF, 0), 0x01);
- /* Fine tune PDE2A latency */
- regmap_write(rt722->regmap, 0x2f5c, 0x25);
+ unsigned int mic_func_status;
+ struct device *dev = &rt722->slave->dev;
+
+ regmap_read(rt722->regmap,
+ SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0), &mic_func_status);
+ dev_dbg(dev, "%s mic func_status=0x%x\n", __func__, mic_func_status);
+
+ if ((mic_func_status & FUNCTION_NEEDS_INITIALIZATION) || (!rt722->first_hw_init)) {
+ /* Set AD07 power entity floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
+ RT722_ADC0A_08_PDE_FLOAT_CTL, 0x2a29);
+ /* Set AD10 power entity floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
+ RT722_ADC10_PDE_FLOAT_CTL, 0x2a00);
+ /* Set DMIC1/DMIC2 power entity floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
+ RT722_DMIC1_2_PDE_FLOAT_CTL, 0x2a2a);
+ /* Set DMIC2 IT entity floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
+ RT722_DMIC_ENT_FLOAT_CTL, 0x2626);
+ /* Set AD10 FU entity floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
+ RT722_ADC_ENT_FLOAT_CTL, 0x1e00);
+ /* Set DMIC2 FU entity floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
+ RT722_DMIC_GAIN_ENT_FLOAT_CTL0, 0x1515);
+ /* Set AD10 FU channel floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
+ RT722_ADC_VOL_CH_FLOAT_CTL, 0x0304);
+ /* Set DMIC2 FU channel floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
+ RT722_DMIC_GAIN_ENT_FLOAT_CTL2, 0x0304);
+ /* vf71f_r12_07_06 and vf71f_r13_07_06 = 2’b00 */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL,
+ RT722_HDA_LEGACY_CONFIG_CTL0, 0x0000);
+ /* Enable vf707_r12_05/vf707_r13_05 */
+ regmap_write(rt722->regmap,
+ SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_IT26,
+ RT722_SDCA_CTL_VENDOR_DEF, 0), 0x01);
+ /* Fine tune PDE2A latency */
+ regmap_write(rt722->regmap, 0x2f5c, 0x25);
+ /* PHYtiming TDZ/TZD control */
+ regmap_write(rt722->regmap, 0x2f03, 0x06);
+
+ /* clear flag */
+ regmap_write(rt722->regmap,
+ SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0),
+ FUNCTION_NEEDS_INITIALIZATION);
+ }
}
static void rt722_sdca_amp_preset(struct rt722_sdca_priv *rt722)
{
- /* Set DVQ=01 */
- rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_CLSD_CTRL6,
- 0xc215);
- /* Reset dc_cal_top */
- rt722_sdca_index_write(rt722, RT722_VENDOR_CALI, RT722_DC_CALIB_CTRL,
- 0x702c);
- /* W1C Trigger Calibration */
- rt722_sdca_index_write(rt722, RT722_VENDOR_CALI, RT722_DC_CALIB_CTRL,
- 0xf02d);
- /* Set DAC02/ClassD power entity floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_AMP_PDE_FLOAT_CTL,
- 0x2323);
- /* Set EAPD high */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_EAPD_CTL,
- 0x0002);
- /* Enable vf707_r14 */
- regmap_write(rt722->regmap,
- SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT_OT23,
- RT722_SDCA_CTL_VENDOR_DEF, CH_08), 0x04);
+ unsigned int amp_func_status;
+ struct device *dev = &rt722->slave->dev;
+
+ regmap_read(rt722->regmap,
+ SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0), &amp_func_status);
+ dev_dbg(dev, "%s amp func_status=0x%x\n", __func__, amp_func_status);
+
+ if ((amp_func_status & FUNCTION_NEEDS_INITIALIZATION) || (!rt722->first_hw_init)) {
+ /* Set DVQ=01 */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_CLSD_CTRL6,
+ 0xc215);
+ /* Reset dc_cal_top */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_CALI, RT722_DC_CALIB_CTRL,
+ 0x702c);
+ /* W1C Trigger Calibration */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_CALI, RT722_DC_CALIB_CTRL,
+ 0xf02d);
+ /* Set DAC02/ClassD power entity floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_AMP_PDE_FLOAT_CTL,
+ 0x2323);
+ /* Set EAPD high */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_EAPD_CTL,
+ 0x0002);
+ /* Enable vf707_r14 */
+ regmap_write(rt722->regmap,
+ SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT_OT23,
+ RT722_SDCA_CTL_VENDOR_DEF, CH_08), 0x04);
+
+ /* clear flag */
+ regmap_write(rt722->regmap,
+ SDW_SDCA_CTL(FUNC_NUM_AMP, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0),
+ FUNCTION_NEEDS_INITIALIZATION);
+ }
}
static void rt722_sdca_jack_preset(struct rt722_sdca_priv *rt722)
{
int loop_check, chk_cnt = 100, ret;
unsigned int calib_status = 0;
+ unsigned int jack_func_status;
+ struct device *dev = &rt722->slave->dev;
+
+ regmap_read(rt722->regmap,
+ SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0), &jack_func_status);
+ dev_dbg(dev, "%s jack func_status=0x%x\n", __func__, jack_func_status);
+
+ if ((jack_func_status & FUNCTION_NEEDS_INITIALIZATION) || (!rt722->first_hw_init)) {
+ /* Config analog bias */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_ANALOG_BIAS_CTL3,
+ 0xa081);
+ /* GE related settings */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_GE_RELATED_CTL2,
+ 0xa009);
+ /* Button A, B, C, D bypass mode */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_UMP_HID_CTL4,
+ 0xcf00);
+ /* HID1 slot enable */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_UMP_HID_CTL5,
+ 0x000f);
+ /* Report ID for HID1 */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_UMP_HID_CTL0,
+ 0x1100);
+ /* OSC/OOC for slot 2, 3 */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_UMP_HID_CTL7,
+ 0x0c12);
+ /* Set JD de-bounce clock control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_JD_CTRL1,
+ 0x7002);
+ /* Set DVQ=01 */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_CLSD_CTRL6,
+ 0xc215);
+ /* FSM switch to calibration manual mode */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_FSM_CTL,
+ 0x4100);
+ /* W1C Trigger DC calibration (HP) */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_CALI, RT722_DAC_DC_CALI_CTL3,
+ 0x008d);
+ /* check HP calibration FSM status */
+ for (loop_check = 0; loop_check < chk_cnt; loop_check++) {
+ usleep_range(10000, 11000);
+ ret = rt722_sdca_index_read(rt722, RT722_VENDOR_CALI,
+ RT722_DAC_DC_CALI_CTL3, &calib_status);
+ if (ret < 0)
+ dev_dbg(&rt722->slave->dev, "calibration failed!, ret=%d\n", ret);
+ if ((calib_status & 0x0040) == 0x0)
+ break;
+ }
- /* Config analog bias */
- rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_ANALOG_BIAS_CTL3,
- 0xa081);
- /* GE related settings */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_GE_RELATED_CTL2,
- 0xa009);
- /* Button A, B, C, D bypass mode */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_UMP_HID_CTL4,
- 0xcf00);
- /* HID1 slot enable */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_UMP_HID_CTL5,
- 0x000f);
- /* Report ID for HID1 */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_UMP_HID_CTL0,
- 0x1100);
- /* OSC/OOC for slot 2, 3 */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_UMP_HID_CTL7,
- 0x0c12);
- /* Set JD de-bounce clock control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_JD_CTRL1,
- 0x7002);
- /* Set DVQ=01 */
- rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_CLSD_CTRL6,
- 0xc215);
- /* FSM switch to calibration manual mode */
- rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_FSM_CTL,
- 0x4100);
- /* W1C Trigger DC calibration (HP) */
- rt722_sdca_index_write(rt722, RT722_VENDOR_CALI, RT722_DAC_DC_CALI_CTL3,
- 0x008d);
- /* check HP calibration FSM status */
- for (loop_check = 0; loop_check < chk_cnt; loop_check++) {
- usleep_range(10000, 11000);
- ret = rt722_sdca_index_read(rt722, RT722_VENDOR_CALI,
- RT722_DAC_DC_CALI_CTL3, &calib_status);
- if (ret < 0)
- dev_dbg(&rt722->slave->dev, "calibration failed!, ret=%d\n", ret);
- if ((calib_status & 0x0040) == 0x0)
- break;
+ if (loop_check == chk_cnt)
+ dev_dbg(&rt722->slave->dev, "%s, calibration time-out!\n", __func__);
+
+ /* Set ADC09 power entity floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ADC0A_08_PDE_FLOAT_CTL,
+ 0x2a12);
+ /* Set MIC2 and LINE1 power entity floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_MIC2_LINE2_PDE_FLOAT_CTL,
+ 0x3429);
+ /* Set ET41h and LINE2 power entity floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ET41_LINE2_PDE_FLOAT_CTL,
+ 0x4112);
+ /* Set DAC03 and HP power entity floating control */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_DAC03_HP_PDE_FLOAT_CTL,
+ 0x4040);
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ENT_FLOAT_CTRL_1,
+ 0x4141);
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_FLOAT_CTRL_1,
+ 0x0101);
+ /* Fine tune PDE40 latency */
+ regmap_write(rt722->regmap, 0x2f58, 0x07);
+ regmap_write(rt722->regmap, 0x2f03, 0x06);
+ /* MIC VRefo */
+ rt722_sdca_index_update_bits(rt722, RT722_VENDOR_REG,
+ RT722_COMBO_JACK_AUTO_CTL1, 0x0200, 0x0200);
+ rt722_sdca_index_update_bits(rt722, RT722_VENDOR_REG,
+ RT722_VREFO_GAT, 0x4000, 0x4000);
+ /* Release HP-JD, EN_CBJ_TIE_GL/R open, en_osw gating auto done bit */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_DIGITAL_MISC_CTRL4,
+ 0x0010);
+
+ /* clear flag */
+ regmap_write(rt722->regmap,
+ SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT722_SDCA_ENT0, RT722_SDCA_CTL_FUNC_STATUS, 0),
+ FUNCTION_NEEDS_INITIALIZATION);
}
-
- if (loop_check == chk_cnt)
- dev_dbg(&rt722->slave->dev, "%s, calibration time-out!\n", __func__);
-
- /* Set ADC09 power entity floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ADC0A_08_PDE_FLOAT_CTL,
- 0x2a12);
- /* Set MIC2 and LINE1 power entity floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_MIC2_LINE2_PDE_FLOAT_CTL,
- 0x3429);
- /* Set ET41h and LINE2 power entity floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ET41_LINE2_PDE_FLOAT_CTL,
- 0x4112);
- /* Set DAC03 and HP power entity floating control */
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_DAC03_HP_PDE_FLOAT_CTL,
- 0x4040);
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ENT_FLOAT_CTRL_1,
- 0x4141);
- rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_FLOAT_CTRL_1,
- 0x0101);
- /* Fine tune PDE40 latency */
- regmap_write(rt722->regmap, 0x2f58, 0x07);
- regmap_write(rt722->regmap, 0x2f03, 0x06);
- /* MIC VRefo */
- rt722_sdca_index_update_bits(rt722, RT722_VENDOR_REG,
- RT722_COMBO_JACK_AUTO_CTL1, 0x0200, 0x0200);
- rt722_sdca_index_update_bits(rt722, RT722_VENDOR_REG,
- RT722_VREFO_GAT, 0x4000, 0x4000);
- /* Release HP-JD, EN_CBJ_TIE_GL/R open, en_osw gating auto done bit */
- rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_DIGITAL_MISC_CTRL4,
- 0x0010);
}
int rt722_sdca_io_init(struct device *dev, struct sdw_slave *slave)
@@ -1447,8 +1523,8 @@ int rt722_sdca_io_init(struct device *dev, struct sdw_slave *slave)
if (rt722->hw_init)
return 0;
+ regcache_cache_only(rt722->regmap, false);
if (rt722->first_hw_init) {
- regcache_cache_only(rt722->regmap, false);
regcache_cache_bypass(rt722->regmap, true);
} else {
/*
diff --git a/sound/soc/codecs/rt722-sdca.h b/sound/soc/codecs/rt722-sdca.h
index 04c3b4232ef3..3c383705dd3c 100644
--- a/sound/soc/codecs/rt722-sdca.h
+++ b/sound/soc/codecs/rt722-sdca.h
@@ -183,6 +183,7 @@ struct rt722_sdca_dmic_kctrl_priv {
#define RT722_SDCA_ENT_PLATFORM_FU44 0x44
#define RT722_SDCA_ENT_XU03 0x03
#define RT722_SDCA_ENT_XU0D 0x0d
+#define RT722_SDCA_ENT0 0x00
/* RT722 SDCA control */
#define RT722_SDCA_CTL_SAMPLE_FREQ_INDEX 0x10
@@ -197,6 +198,8 @@ struct rt722_sdca_dmic_kctrl_priv {
#define RT722_SDCA_CTL_REQ_POWER_STATE 0x01
#define RT722_SDCA_CTL_VENDOR_DEF 0x30
#define RT722_SDCA_CTL_FU_CH_GAIN 0x0b
+#define RT722_SDCA_CTL_FUNC_STATUS 0x10
+#define RT722_SDCA_CTL_ACTUAL_POWER_STATE 0x10
/* RT722 SDCA channel */
#define CH_L 0x01
@@ -215,6 +218,9 @@ struct rt722_sdca_dmic_kctrl_priv {
#define RT722_SDCA_RATE_96000HZ 0x0b
#define RT722_SDCA_RATE_192000HZ 0x0d
+/* Function_Status */
+#define FUNCTION_NEEDS_INITIALIZATION BIT(5)
+
enum {
RT722_AIF1, /* For headset mic and headphone */
RT722_AIF2, /* For speaker */
diff --git a/sound/soc/codecs/rt9123.c b/sound/soc/codecs/rt9123.c
new file mode 100644
index 000000000000..242e8c975a62
--- /dev/null
+++ b/sound/soc/codecs/rt9123.c
@@ -0,0 +1,503 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// rt9123.c -- RT9123 (SW I2C Mode) ALSA SoC Codec driver
+//
+// Author: ChiYuan Huang <cy_huang@richtek.com>
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/byteorder/generic.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dai.h>
+#include <sound/soc-dapm.h>
+#include <sound/tlv.h>
+
+#define RT9123_REG_AMPCTRL 0x01
+#define RT9123_REG_I2SOPT 0x02
+#define RT9123_REG_TDMRX 0x03
+#define RT9123_REG_SILVOLEN 0x04
+#define RT9123_REG_VOLGAIN 0x12
+#define RT9123_REG_ANAFLAG 0x36
+#define RT9123_REG_COMBOID 0xF7
+
+#define RT9123_MASK_SWRST BIT(15)
+#define RT9123_MASK_SWMUTE BIT(14)
+#define RT9123_MASK_AMPON BIT(12)
+#define RT9123_MASK_AUDBIT GENMASK(14, 12)
+#define RT9123_MASK_AUDFMT GENMASK(11, 8)
+#define RT9123_MASK_TDMRXLOC GENMASK(4, 0)
+#define RT9123_MASK_VENID GENMASK(15, 4)
+
+#define RT9123_FIXED_VENID 0x340
+
+struct rt9123_priv {
+ struct gpio_desc *enable;
+ unsigned int dai_fmt;
+ int tdm_slots;
+ int tdm_slot_width;
+};
+
+static int rt9123_enable_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol,
+ int event)
+{
+ struct snd_soc_component *comp = snd_soc_dapm_to_component(w->dapm);
+ struct device *dev = comp->dev;
+ unsigned int enable;
+ int ret;
+
+ switch (event) {
+ case SND_SOC_DAPM_POST_PMU:
+ enable = 1;
+ break;
+ case SND_SOC_DAPM_POST_PMD:
+ enable = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret)
+ return ret;
+
+ /* AMPON bit is located in volatile RG, use pm_runtime to guarantee the RG access */
+ snd_soc_component_write_field(comp, RT9123_REG_AMPCTRL, RT9123_MASK_AMPON, enable);
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+
+ return 0;
+}
+
+static const struct snd_soc_dapm_widget rt9123_dapm_widgets[] = {
+ SND_SOC_DAPM_OUTPUT("SPK"),
+ SND_SOC_DAPM_OUT_DRV_E("Amp Drv", SND_SOC_NOPM, 0, 0, NULL, 0, rt9123_enable_event,
+ SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD),
+};
+
+static const struct snd_soc_dapm_route rt9123_dapm_routes[] = {
+ { "Amp Drv", NULL, "HiFi Playback" },
+ { "SPK", NULL, "Amp Drv" },
+};
+
+static const DECLARE_TLV_DB_SCALE(dig_tlv, -10375, 25, 0);
+static const DECLARE_TLV_DB_RANGE(ana_tlv,
+ 0, 0, TLV_DB_SCALE_ITEM(-1200, 0, 0),
+ 1, 9, TLV_DB_SCALE_ITEM(0, 150, 0),
+ 10, 10, TLV_DB_SCALE_ITEM(1400, 0, 0));
+static const char * const pwmfreq_text[] = { "300KHz", "325KHz", "350KHz", "375KHz" };
+static const struct soc_enum rt9123_pwm_freq_enum =
+ SOC_ENUM_SINGLE(RT9123_REG_AMPCTRL, 4, ARRAY_SIZE(pwmfreq_text), pwmfreq_text);
+static const char * const i2sch_text[] = { "(L+R)/2", "LCH", "RCH", "(L+R)/2" };
+static const struct soc_enum rt9123_i2sch_select_enum =
+ SOC_ENUM_SINGLE(RT9123_REG_I2SOPT, 4, ARRAY_SIZE(i2sch_text), i2sch_text);
+
+static int rt9123_kcontrol_name_comp(struct snd_kcontrol *kcontrol, const char *s)
+{
+ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+ const char *kctlname = kcontrol->id.name;
+
+ if (comp && comp->name_prefix)
+ kctlname += strlen(comp->name_prefix) + 1;
+
+ return strcmp(kctlname, s);
+}
+
+static int rt9123_xhandler_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+ struct device *dev = comp->dev;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret)
+ return ret;
+
+ /*
+ * Since the RG bitfield for 'Speaker Volume' and 'PWM Frequency Select' are located in
+ * volatile RG address, special handling here with pm runtime API to guarantee RG read
+ * operation.
+ */
+ if (rt9123_kcontrol_name_comp(kcontrol, "Speaker Volume") == 0)
+ ret = snd_soc_get_volsw(kcontrol, ucontrol);
+ else
+ ret = snd_soc_get_enum_double(kcontrol, ucontrol);
+
+ if (ret < 0)
+ dev_err(dev, "Failed to get control (%d)\n", ret);
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+ return ret;
+}
+
+static int rt9123_xhandler_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+ struct device *dev = comp->dev;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret)
+ return ret;
+
+ /*
+ * Since the RG bitfield for 'Speaker Volume' and 'PWM Frequency Select' are located in
+ * volatile RG address, special handling here with pm runtime API to guarantee RG write
+ * operation.
+ */
+ if (rt9123_kcontrol_name_comp(kcontrol, "Speaker Volume") == 0)
+ ret = snd_soc_put_volsw(kcontrol, ucontrol);
+ else
+ ret = snd_soc_put_enum_double(kcontrol, ucontrol);
+
+ if (ret < 0)
+ dev_err(dev, "Failed to put control (%d)\n", ret);
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+ return ret;
+}
+
+static const struct snd_kcontrol_new rt9123_controls[] = {
+ SOC_SINGLE_TLV("Master Volume", RT9123_REG_VOLGAIN, 2, 511, 1, dig_tlv),
+ SOC_SINGLE_EXT_TLV("Speaker Volume", RT9123_REG_AMPCTRL, 0, 10, 0, rt9123_xhandler_get,
+ rt9123_xhandler_put, ana_tlv),
+ SOC_ENUM_EXT("PWM Frequency Select", rt9123_pwm_freq_enum, rt9123_xhandler_get,
+ rt9123_xhandler_put),
+ SOC_ENUM("I2S CH Select", rt9123_i2sch_select_enum),
+ SOC_SINGLE("Silence Detect Switch", RT9123_REG_SILVOLEN, 14, 1, 0),
+};
+
+static const struct snd_soc_component_driver rt9123_comp_driver = {
+ .controls = rt9123_controls,
+ .num_controls = ARRAY_SIZE(rt9123_controls),
+ .dapm_widgets = rt9123_dapm_widgets,
+ .num_dapm_widgets = ARRAY_SIZE(rt9123_dapm_widgets),
+ .dapm_routes = rt9123_dapm_routes,
+ .num_dapm_routes = ARRAY_SIZE(rt9123_dapm_routes),
+ .use_pmdown_time = 1,
+ .endianness = 1,
+};
+
+static int rt9123_dai_set_format(struct snd_soc_dai *dai, unsigned int fmt)
+{
+ struct rt9123_priv *rt9123 = snd_soc_dai_get_drvdata(dai);
+
+ rt9123->dai_fmt = fmt;
+ return 0;
+}
+
+static int rt9123_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask,
+ unsigned int rx_mask, int slots, int slot_width)
+{
+ struct rt9123_priv *rt9123 = snd_soc_dai_get_drvdata(dai);
+ struct snd_soc_component *comp = dai->component;
+ struct device *dev = dai->dev;
+ unsigned int rx_loc;
+
+ dev_dbg(dev, "(slots, slot_width) = (%d, %d), (txmask, rxmask) = 0x%x, 0x%x\n", slots,
+ slot_width, tx_mask, rx_mask);
+
+ if (slots <= 0 || slot_width <= 0 || slots % 2 || slot_width % 8 ||
+ slots * slot_width > 256) {
+ dev_err(dev, "Invalid slot parameter (%d, %d)\n", slots, slot_width);
+ return -EINVAL;
+ }
+
+ if (!rx_mask || hweight_long(rx_mask) > 1 || ffs(rx_mask) > slots) {
+ dev_err(dev, "Invalid rx_mask 0x%08x, slots = %d\n", rx_mask, slots);
+ return -EINVAL;
+ }
+
+ /* Configure rx channel data location */
+ rx_loc = (ffs(rx_mask) - 1) * slot_width / 8;
+ snd_soc_component_write_field(comp, RT9123_REG_TDMRX, RT9123_MASK_TDMRXLOC, rx_loc);
+
+ rt9123->tdm_slots = slots;
+ rt9123->tdm_slot_width = slot_width;
+
+ return 0;
+}
+
+static int rt9123_dai_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *param, struct snd_soc_dai *dai)
+{
+ struct rt9123_priv *rt9123 = snd_soc_dai_get_drvdata(dai);
+ struct snd_soc_component *comp = dai->component;
+ unsigned int fmtval, width, slot_width;
+ struct device *dev = dai->dev;
+ unsigned int audfmt, audbit;
+
+ fmtval = FIELD_GET(SND_SOC_DAIFMT_FORMAT_MASK, rt9123->dai_fmt);
+ if (rt9123->tdm_slots && fmtval != SND_SOC_DAIFMT_DSP_A && fmtval != SND_SOC_DAIFMT_DSP_B) {
+ dev_err(dev, "TDM only can support DSP_A or DSP_B format\n");
+ return -EINVAL;
+ }
+
+ switch (fmtval) {
+ case SND_SOC_DAIFMT_I2S:
+ audfmt = 0;
+ break;
+ case SND_SOC_DAIFMT_LEFT_J:
+ audfmt = 1;
+ break;
+ case SND_SOC_DAIFMT_RIGHT_J:
+ audfmt = 2;
+ break;
+ case SND_SOC_DAIFMT_DSP_B:
+ audfmt = rt9123->tdm_slots ? 4 : 3;
+ break;
+ case SND_SOC_DAIFMT_DSP_A:
+ audfmt = rt9123->tdm_slots ? 12 : 11;
+ break;
+ default:
+ dev_err(dev, "Unsupported format %d\n", fmtval);
+ return -EINVAL;
+ }
+
+ switch (width = params_width(param)) {
+ case 16:
+ audbit = 0;
+ break;
+ case 20:
+ audbit = 1;
+ break;
+ case 24:
+ audbit = 2;
+ break;
+ case 32:
+ audbit = 3;
+ break;
+ case 8:
+ audbit = 4;
+ break;
+ default:
+ dev_err(dev, "Unsupported width %d\n", width);
+ return -EINVAL;
+ }
+
+ slot_width = params_physical_width(param);
+ if (rt9123->tdm_slots && slot_width > rt9123->tdm_slot_width) {
+ dev_err(dev, "Slot width is larger than TDM slot width\n");
+ return -EINVAL;
+ }
+
+ snd_soc_component_write_field(comp, RT9123_REG_I2SOPT, RT9123_MASK_AUDFMT, audfmt);
+ snd_soc_component_write_field(comp, RT9123_REG_I2SOPT, RT9123_MASK_AUDBIT, audbit);
+
+ return 0;
+}
+
+static const struct snd_soc_dai_ops rt9123_dai_ops = {
+ .set_fmt = rt9123_dai_set_format,
+ .set_tdm_slot = rt9123_dai_set_tdm_slot,
+ .hw_params = rt9123_dai_hw_params,
+};
+
+static struct snd_soc_dai_driver rt9123_dai_driver = {
+ .name = "HiFi",
+ .playback = {
+ .stream_name = "HiFi Playback",
+ .formats = SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S24 |
+ SNDRV_PCM_FMTBIT_S32,
+ .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |
+ SNDRV_PCM_RATE_22050 | SNDRV_PCM_RATE_24000 |
+ SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 |
+ SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 |
+ SNDRV_PCM_RATE_96000,
+ .rate_min = 8000,
+ .rate_max = 96000,
+ .channels_min = 1,
+ .channels_max = 2,
+ },
+ .ops = &rt9123_dai_ops,
+};
+
+static bool rt9123_readable_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case 0x00 ... 0x05:
+ case 0x12 ... 0x13:
+ case 0x20 ... 0x21:
+ case 0x36:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool rt9123_writeable_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case 0x01 ... 0x05:
+ case 0x12 ... 0x13:
+ case 0x20 ... 0x21:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool rt9123_volatile_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case 0x01:
+ case 0x20:
+ case 0x36:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct regmap_config rt9123_regmap_config = {
+ .name = "rt9123",
+ .reg_bits = 8,
+ .val_bits = 16,
+ .val_format_endian = REGMAP_ENDIAN_BIG,
+ .readable_reg = rt9123_readable_reg,
+ .writeable_reg = rt9123_writeable_reg,
+ .volatile_reg = rt9123_volatile_reg,
+ .cache_type = REGCACHE_MAPLE,
+ .num_reg_defaults_raw = RT9123_REG_ANAFLAG + 1,
+};
+
+static int rt9123_i2c_probe(struct i2c_client *i2c)
+{
+ struct device *dev = &i2c->dev;
+ struct rt9123_priv *rt9123;
+ struct regmap *regmap;
+ __be16 value;
+ u16 venid;
+ int ret;
+
+ rt9123 = devm_kzalloc(dev, sizeof(*rt9123), GFP_KERNEL);
+ if (!rt9123)
+ return -ENOMEM;
+
+ rt9123->enable = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_HIGH);
+ if (IS_ERR(rt9123->enable))
+ return PTR_ERR(rt9123->enable);
+ else if (rt9123->enable)
+ usleep_range(250, 350);
+ else
+ dev_dbg(dev, "No 'enable' GPIO specified, treat it as default on\n");
+
+ /* Check vendor id information */
+ ret = i2c_smbus_read_i2c_block_data(i2c, RT9123_REG_COMBOID, sizeof(value), (u8 *)&value);
+ if (ret < 0)
+ return dev_err_probe(dev, ret, "Failed to read vendor-id\n");
+
+ venid = be16_to_cpu(value);
+ if ((venid & RT9123_MASK_VENID) != RT9123_FIXED_VENID)
+ return dev_err_probe(dev, -ENODEV, "Incorrect vendor-id 0x%04x\n", venid);
+
+ /* Trigger RG reset before regmap init cache */
+ value = cpu_to_be16(RT9123_MASK_SWRST);
+ ret = i2c_smbus_write_i2c_block_data(i2c, RT9123_REG_AMPCTRL, sizeof(value), (u8 *)&value);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to trigger RG reset\n");
+
+ /* Need to wait 10ms for the reset to complete */
+ usleep_range(10000, 11000);
+
+ regmap = devm_regmap_init_i2c(i2c, &rt9123_regmap_config);
+ if (IS_ERR(regmap))
+ return dev_err_probe(dev, PTR_ERR(regmap), "Failed to init regmap\n");
+
+ i2c_set_clientdata(i2c, rt9123);
+
+ pm_runtime_set_autosuspend_delay(dev, 500);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_active(dev);
+ ret = devm_pm_runtime_enable(dev);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to enable pm runtime\n");
+
+ return devm_snd_soc_register_component(dev, &rt9123_comp_driver, &rt9123_dai_driver, 1);
+}
+
+#ifdef CONFIG_PM
+static int rt9123_runtime_suspend(struct device *dev)
+{
+ struct rt9123_priv *rt9123 = dev_get_drvdata(dev);
+ struct regmap *regmap = dev_get_regmap(dev, NULL);
+
+ if (rt9123->enable) {
+ regcache_cache_only(regmap, true);
+ regcache_mark_dirty(regmap);
+ gpiod_set_value(rt9123->enable, 0);
+ }
+
+ return 0;
+}
+
+static int rt9123_runtime_resume(struct device *dev)
+{
+ struct rt9123_priv *rt9123 = dev_get_drvdata(dev);
+ struct regmap *regmap = dev_get_regmap(dev, NULL);
+ int ret;
+
+ if (rt9123->enable) {
+ gpiod_set_value(rt9123->enable, 1);
+ usleep_range(250, 350);
+
+ regcache_cache_only(regmap, false);
+ ret = regcache_sync(regmap);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops rt9123_dev_pm_ops = {
+ SET_RUNTIME_PM_OPS(rt9123_runtime_suspend, rt9123_runtime_resume, NULL)
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id rt9123_device_id[] = {
+ { .compatible = "richtek,rt9123" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, rt9123_device_id);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id rt9123_acpi_match[] = {
+ { "RT9123", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, rt9123_acpi_match);
+#endif
+
+static struct i2c_driver rt9123_i2c_driver = {
+ .driver = {
+ .name = "rt9123",
+ .of_match_table = of_match_ptr(rt9123_device_id),
+ .acpi_match_table = ACPI_PTR(rt9123_acpi_match),
+ .pm = pm_ptr(&rt9123_dev_pm_ops),
+ },
+ .probe = rt9123_i2c_probe,
+};
+module_i2c_driver(rt9123_i2c_driver);
+
+MODULE_AUTHOR("ChiYuan Huang <cy_huang@richtek.com>");
+MODULE_DESCRIPTION("ASoC rt9123 Driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/rt9123p.c b/sound/soc/codecs/rt9123p.c
new file mode 100644
index 000000000000..d509659e735b
--- /dev/null
+++ b/sound/soc/codecs/rt9123p.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// rt9123p.c -- RT9123 (HW Mode) ALSA SoC Codec driver
+//
+// Author: ChiYuan Huang <cy_huang@richtek.com>
+
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include <sound/soc-dai.h>
+#include <sound/soc-dapm.h>
+
+struct rt9123p_priv {
+ struct gpio_desc *enable;
+ unsigned int enable_delay;
+ int enable_switch;
+};
+
+static int rt9123p_daiops_trigger(struct snd_pcm_substream *substream, int cmd,
+ struct snd_soc_dai *dai)
+{
+ struct snd_soc_component *comp = dai->component;
+ struct rt9123p_priv *rt9123p = snd_soc_component_get_drvdata(comp);
+
+ if (!rt9123p->enable)
+ return 0;
+
+ switch (cmd) {
+ case SNDRV_PCM_TRIGGER_START:
+ case SNDRV_PCM_TRIGGER_RESUME:
+ case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+ mdelay(rt9123p->enable_delay);
+ if (rt9123p->enable_switch) {
+ gpiod_set_value(rt9123p->enable, 1);
+ dev_dbg(comp->dev, "set enable to 1");
+ }
+ break;
+ case SNDRV_PCM_TRIGGER_STOP:
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+ gpiod_set_value(rt9123p->enable, 0);
+ dev_dbg(comp->dev, "set enable to 0");
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int rt9123p_enable_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol,
+ int event)
+{
+ struct snd_soc_component *comp = snd_soc_dapm_to_component(w->dapm);
+ struct rt9123p_priv *rt9123p = snd_soc_component_get_drvdata(comp);
+
+ if (event & SND_SOC_DAPM_POST_PMU)
+ rt9123p->enable_switch = 1;
+ else if (event & SND_SOC_DAPM_POST_PMD)
+ rt9123p->enable_switch = 0;
+
+ return 0;
+}
+
+static const struct snd_soc_dapm_widget rt9123p_dapm_widgets[] = {
+ SND_SOC_DAPM_OUTPUT("SPK"),
+ SND_SOC_DAPM_OUT_DRV_E("Amp Drv", SND_SOC_NOPM, 0, 0, NULL, 0, rt9123p_enable_event,
+ SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD),
+};
+
+static const struct snd_soc_dapm_route rt9123p_dapm_routes[] = {
+ {"Amp Drv", NULL, "HiFi Playback"},
+ {"SPK", NULL, "Amp Drv"},
+};
+
+static const struct snd_soc_component_driver rt9123p_comp_driver = {
+ .dapm_widgets = rt9123p_dapm_widgets,
+ .num_dapm_widgets = ARRAY_SIZE(rt9123p_dapm_widgets),
+ .dapm_routes = rt9123p_dapm_routes,
+ .num_dapm_routes = ARRAY_SIZE(rt9123p_dapm_routes),
+ .idle_bias_on = 1,
+ .use_pmdown_time = 1,
+ .endianness = 1,
+};
+
+static const struct snd_soc_dai_ops rt9123p_dai_ops = {
+ .trigger = rt9123p_daiops_trigger,
+};
+
+static struct snd_soc_dai_driver rt9123p_dai_driver = {
+ .name = "HiFi",
+ .playback = {
+ .stream_name = "HiFi Playback",
+ .formats = SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S24 |
+ SNDRV_PCM_FMTBIT_S32,
+ .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |
+ SNDRV_PCM_RATE_22050 | SNDRV_PCM_RATE_24000 |
+ SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 |
+ SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 |
+ SNDRV_PCM_RATE_96000,
+ .rate_min = 8000,
+ .rate_max = 96000,
+ .channels_min = 1,
+ .channels_max = 2,
+ },
+ .ops = &rt9123p_dai_ops,
+};
+
+static int rt9123p_platform_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct rt9123p_priv *rt9123p;
+ int ret;
+
+ rt9123p = devm_kzalloc(dev, sizeof(*rt9123p), GFP_KERNEL);
+ if (!rt9123p)
+ return -ENOMEM;
+
+ rt9123p->enable = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_LOW);
+ if (IS_ERR(rt9123p->enable))
+ return PTR_ERR(rt9123p->enable);
+
+ ret = device_property_read_u32(dev, "enable-delay-ms", &rt9123p->enable_delay);
+ if (ret) {
+ rt9123p->enable_delay = 0;
+ dev_dbg(dev, "no optional property 'enable-delay-ms' found, default: no delay\n");
+ }
+
+ platform_set_drvdata(pdev, rt9123p);
+
+ return devm_snd_soc_register_component(dev, &rt9123p_comp_driver, &rt9123p_dai_driver, 1);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id rt9123p_device_id[] = {
+ { .compatible = "richtek,rt9123p" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, rt9123p_device_id);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id rt9123p_acpi_match[] = {
+ { "RT9123P", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, rt9123p_acpi_match);
+#endif
+
+static struct platform_driver rt9123p_platform_driver = {
+ .driver = {
+ .name = "rt9123p",
+ .of_match_table = of_match_ptr(rt9123p_device_id),
+ .acpi_match_table = ACPI_PTR(rt9123p_acpi_match),
+ },
+ .probe = rt9123p_platform_probe,
+};
+module_platform_driver(rt9123p_platform_driver);
+
+MODULE_AUTHOR("ChiYuan Huang <cy_huang@richtek.com>");
+MODULE_DESCRIPTION("ASoC rt9123p Driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/sma1307.c b/sound/soc/codecs/sma1307.c
index 498189ab691c..b3d401ada176 100644
--- a/sound/soc/codecs/sma1307.c
+++ b/sound/soc/codecs/sma1307.c
@@ -8,7 +8,6 @@
#include <linux/firmware.h>
#include <linux/i2c.h>
-#include <linux/of_gpio.h>
#include <linux/regmap.h>
#include <sound/pcm_params.h>
#include <sound/tlv.h>
diff --git a/sound/soc/codecs/sta32x.c b/sound/soc/codecs/sta32x.c
index bd8848ea1ec2..24d4b643917d 100644
--- a/sound/soc/codecs/sta32x.c
+++ b/sound/soc/codecs/sta32x.c
@@ -983,8 +983,7 @@ static int sta32x_probe(struct snd_soc_component *component)
err_regulator_bulk_disable:
regulator_bulk_disable(ARRAY_SIZE(sta32x->supplies), sta32x->supplies);
err_clk_disable_unprepare:
- if (sta32x->xti_clk)
- clk_disable_unprepare(sta32x->xti_clk);
+ clk_disable_unprepare(sta32x->xti_clk);
return ret;
}
@@ -995,8 +994,7 @@ static void sta32x_remove(struct snd_soc_component *component)
sta32x_watchdog_stop(sta32x);
regulator_bulk_disable(ARRAY_SIZE(sta32x->supplies), sta32x->supplies);
- if (sta32x->xti_clk)
- clk_disable_unprepare(sta32x->xti_clk);
+ clk_disable_unprepare(sta32x->xti_clk);
}
static const struct snd_soc_component_driver sta32x_component = {
diff --git a/sound/soc/codecs/tas2764-quirks.h b/sound/soc/codecs/tas2764-quirks.h
new file mode 100644
index 000000000000..7a62b3ba5b40
--- /dev/null
+++ b/sound/soc/codecs/tas2764-quirks.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __TAS2764_QUIRKS__
+#define __TAS2764_QUIRKS__
+
+#include <linux/regmap.h>
+
+#include "tas2764.h"
+
+/* Bitmask of enabled Apple quirks */
+#define ENABLED_APPLE_QUIRKS 0x3f
+
+/*
+ * Disable noise gate and flip down reserved bit in NS_CFG0
+ */
+#define TAS2764_NOISE_GATE_DISABLE BIT(0)
+
+static const struct reg_sequence tas2764_noise_gate_dis_seq[] = {
+ REG_SEQ0(TAS2764_REG(0x0, 0x35), 0xb0)
+};
+
+/*
+ * CONV_VBAT_PVDD_MODE=1
+ */
+#define TAS2764_CONV_VBAT_PVDD_MODE BIT(1)
+
+static const struct reg_sequence tas2764_conv_vbat_pvdd_mode_seq[] = {
+ REG_SEQ0(TAS2764_REG(0x0, 0x6b), 0x41)
+};
+
+/*
+ * Reset of DAC modulator when DSP is OFF
+ */
+#define TAS2764_DMOD_RST BIT(2)
+
+static const struct reg_sequence tas2764_dmod_rst_seq[] = {
+ REG_SEQ0(TAS2764_REG(0x0, 0x76), 0x0)
+};
+
+/*
+ * Unknown 0x133/0x137 writes (maybe TDM related)
+ */
+#define TAS2764_UNK_SEQ0 BIT(3)
+
+static const struct reg_sequence tas2764_unk_seq0[] = {
+ REG_SEQ0(TAS2764_REG(0x1, 0x33), 0x80),
+ REG_SEQ0(TAS2764_REG(0x1, 0x37), 0x3a),
+};
+
+/*
+ * Unknown 0x614 - 0x61f writes
+ */
+#define TAS2764_APPLE_UNK_SEQ1 BIT(4)
+
+static const struct reg_sequence tas2764_unk_seq1[] = {
+ REG_SEQ0(TAS2764_REG(0x6, 0x14), 0x0),
+ REG_SEQ0(TAS2764_REG(0x6, 0x15), 0x13),
+ REG_SEQ0(TAS2764_REG(0x6, 0x16), 0x52),
+ REG_SEQ0(TAS2764_REG(0x6, 0x17), 0x0),
+ REG_SEQ0(TAS2764_REG(0x6, 0x18), 0xe4),
+ REG_SEQ0(TAS2764_REG(0x6, 0x19), 0xc),
+ REG_SEQ0(TAS2764_REG(0x6, 0x16), 0xaa),
+ REG_SEQ0(TAS2764_REG(0x6, 0x1b), 0x0),
+ REG_SEQ0(TAS2764_REG(0x6, 0x1c), 0x12),
+ REG_SEQ0(TAS2764_REG(0x6, 0x1d), 0xa0),
+ REG_SEQ0(TAS2764_REG(0x6, 0x1e), 0xd8),
+ REG_SEQ0(TAS2764_REG(0x6, 0x1f), 0x0),
+};
+
+/*
+ * Unknown writes in the 0xfd page (with secondary paging inside)
+ */
+#define TAS2764_APPLE_UNK_SEQ2 BIT(5)
+
+static const struct reg_sequence tas2764_unk_seq2[] = {
+ REG_SEQ0(TAS2764_REG(0xfd, 0x0d), 0xd),
+ REG_SEQ0(TAS2764_REG(0xfd, 0x6c), 0x2),
+ REG_SEQ0(TAS2764_REG(0xfd, 0x6d), 0xf),
+ REG_SEQ0(TAS2764_REG(0xfd, 0x0d), 0x0),
+};
+
+/*
+ * Disable 'Thermal Threshold 1'
+ */
+#define TAS2764_THERMAL_TH1_DISABLE BIT(6)
+
+static const struct reg_sequence tas2764_thermal_th1_dis_seq[] = {
+ REG_SEQ0(TAS2764_REG(0x1, 0x47), 0x2),
+};
+
+/*
+ * Imitate Apple's shutdown dance
+ */
+#define TAS2764_SHUTDOWN_DANCE BIT(7)
+
+static const struct reg_sequence tas2764_shutdown_dance_init_seq[] = {
+ /*
+ * SDZ_MODE=01 (immediate)
+ *
+ * We want the shutdown to happen under the influence of
+ * the magic writes in the 0xfdXX region, so make sure
+ * the shutdown is immediate and there's no grace period
+ * followed by the codec part.
+ */
+ REG_SEQ0(TAS2764_REG(0x0, 0x7), 0x60),
+};
+
+static const struct reg_sequence tas2764_pre_shutdown_seq[] = {
+ REG_SEQ0(TAS2764_REG(0xfd, 0x0d), 0xd), /* switch hidden page */
+ REG_SEQ0(TAS2764_REG(0xfd, 0x64), 0x4), /* do write (unknown semantics) */
+ REG_SEQ0(TAS2764_REG(0xfd, 0x0d), 0x0), /* switch hidden page back */
+};
+
+static const struct reg_sequence tas2764_post_shutdown_seq[] = {
+ REG_SEQ0(TAS2764_REG(0xfd, 0x0d), 0xd),
+ REG_SEQ0(TAS2764_REG(0xfd, 0x64), 0x0), /* revert write from pre sequence */
+ REG_SEQ0(TAS2764_REG(0xfd, 0x0d), 0x0),
+};
+
+static int tas2764_do_quirky_pwr_ctrl_change(struct tas2764_priv *tas2764,
+ unsigned int target)
+{
+ unsigned int curr;
+ int ret;
+
+ curr = snd_soc_component_read_field(tas2764->component,
+ TAS2764_PWR_CTRL,
+ TAS2764_PWR_CTRL_MASK);
+
+ if (target == curr)
+ return 0;
+
+ /* Handle power state transition to shutdown */
+ if (target == TAS2764_PWR_CTRL_SHUTDOWN &&
+ (curr == TAS2764_PWR_CTRL_MUTE || curr == TAS2764_PWR_CTRL_ACTIVE)) {
+ ret = regmap_multi_reg_write(tas2764->regmap, tas2764_pre_shutdown_seq,
+ ARRAY_SIZE(tas2764_pre_shutdown_seq));
+ if (!ret)
+ ret = snd_soc_component_update_bits(tas2764->component,
+ TAS2764_PWR_CTRL,
+ TAS2764_PWR_CTRL_MASK,
+ TAS2764_PWR_CTRL_SHUTDOWN);
+ if (!ret)
+ ret = regmap_multi_reg_write(tas2764->regmap,
+ tas2764_post_shutdown_seq,
+ ARRAY_SIZE(tas2764_post_shutdown_seq));
+ }
+
+ ret = snd_soc_component_update_bits(tas2764->component, TAS2764_PWR_CTRL,
+ TAS2764_PWR_CTRL_MASK, target);
+
+ return ret;
+}
+
+/*
+ * Via devicetree (TODO):
+ * - switch from spread spectrum to class-D switching
+ * - disable edge control
+ * - set BOP settings (the BOP config bits *and* BOP_SRC)
+ */
+
+/*
+ * Other setup TODOs:
+ * - DVC ramp rate
+ */
+
+static const struct tas2764_quirk_init_sequence {
+ const struct reg_sequence *seq;
+ int len;
+} tas2764_quirk_init_sequences[] = {
+ { tas2764_noise_gate_dis_seq, ARRAY_SIZE(tas2764_noise_gate_dis_seq) },
+ { tas2764_dmod_rst_seq, ARRAY_SIZE(tas2764_dmod_rst_seq) },
+ { tas2764_conv_vbat_pvdd_mode_seq, ARRAY_SIZE(tas2764_conv_vbat_pvdd_mode_seq) },
+ { tas2764_unk_seq0, ARRAY_SIZE(tas2764_unk_seq0) },
+ { tas2764_unk_seq1, ARRAY_SIZE(tas2764_unk_seq1) },
+ { tas2764_unk_seq2, ARRAY_SIZE(tas2764_unk_seq2) },
+ { tas2764_thermal_th1_dis_seq, ARRAY_SIZE(tas2764_thermal_th1_dis_seq) },
+ { tas2764_shutdown_dance_init_seq, ARRAY_SIZE(tas2764_shutdown_dance_init_seq) },
+};
+
+#endif /* __TAS2764_QUIRKS__ */
diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c
index 08aa7ee34256..36e25e48b354 100644
--- a/sound/soc/codecs/tas2764.c
+++ b/sound/soc/codecs/tas2764.c
@@ -8,6 +8,7 @@
#include <linux/err.h>
#include <linux/init.h>
#include <linux/delay.h>
+#include <linux/hwmon.h>
#include <linux/pm.h>
#include <linux/i2c.h>
#include <linux/gpio/consumer.h>
@@ -45,6 +46,8 @@ struct tas2764_priv {
bool unmuted;
};
+#include "tas2764-quirks.h"
+
static const char *tas2764_int_ltch0_msgs[8] = {
"fault: over temperature", /* INT_LTCH0 & BIT(0) */
"fault: over current",
@@ -122,6 +125,9 @@ static int tas2764_update_pwr_ctrl(struct tas2764_priv *tas2764)
else
val = TAS2764_PWR_CTRL_SHUTDOWN;
+ if (ENABLED_APPLE_QUIRKS & TAS2764_SHUTDOWN_DANCE)
+ return tas2764_do_quirky_pwr_ctrl_change(tas2764, val);
+
ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
TAS2764_PWR_CTRL_MASK, val);
if (ret < 0)
@@ -546,6 +552,106 @@ static uint8_t sn012776_bop_presets[] = {
0x06, 0x3e, 0x37, 0x30, 0xff, 0xe6
};
+static const struct regmap_config tas2764_i2c_regmap;
+
+static int tas2764_apply_init_quirks(struct tas2764_priv *tas2764)
+{
+ int ret, i;
+
+ for (i = 0; i < ARRAY_SIZE(tas2764_quirk_init_sequences); i++) {
+ const struct tas2764_quirk_init_sequence *init_seq =
+ &tas2764_quirk_init_sequences[i];
+
+ if (!init_seq->seq)
+ continue;
+
+ if (!(BIT(i) & ENABLED_APPLE_QUIRKS))
+ continue;
+
+ ret = regmap_multi_reg_write(tas2764->regmap, init_seq->seq,
+ init_seq->len);
+
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int tas2764_read_die_temp(struct tas2764_priv *tas2764, long *result)
+{
+ int ret, reg;
+
+ ret = regmap_read(tas2764->regmap, TAS2764_TEMP, &reg);
+ if (ret)
+ return ret;
+ /*
+ * As per datasheet, subtract 93 from raw value to get degrees
+ * Celsius. hwmon wants millidegrees.
+ *
+ * NOTE: The chip will initialise the TAS2764_TEMP register to
+ * 2.6 *C to avoid triggering temperature protection. Since the
+ * ADC is powered down during software shutdown, this value will
+ * persist until the chip is fully powered up (e.g. the PCM it's
+ * attached to is opened). The ADC will power down again when
+ * the chip is put back into software shutdown, with the last
+ * value sampled persisting in the ADC's register.
+ */
+ *result = (reg - 93) * 1000;
+ return 0;
+}
+
+static umode_t tas2764_hwmon_is_visible(const void *data,
+ enum hwmon_sensor_types type, u32 attr,
+ int channel)
+{
+ if (type != hwmon_temp)
+ return 0;
+
+ switch (attr) {
+ case hwmon_temp_input:
+ return 0444;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int tas2764_hwmon_read(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct tas2764_priv *tas2764 = dev_get_drvdata(dev);
+ int ret;
+
+ switch (attr) {
+ case hwmon_temp_input:
+ ret = tas2764_read_die_temp(tas2764, val);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+static const struct hwmon_channel_info *const tas2764_hwmon_info[] = {
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
+ NULL
+};
+
+static const struct hwmon_ops tas2764_hwmon_ops = {
+ .is_visible = tas2764_hwmon_is_visible,
+ .read = tas2764_hwmon_read,
+};
+
+static const struct hwmon_chip_info tas2764_hwmon_chip_info = {
+ .ops = &tas2764_hwmon_ops,
+ .info = tas2764_hwmon_info,
+};
+
static int tas2764_codec_probe(struct snd_soc_component *component)
{
struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component);
@@ -559,9 +665,10 @@ static int tas2764_codec_probe(struct snd_soc_component *component)
}
tas2764_reset(tas2764);
+ regmap_reinit_cache(tas2764->regmap, &tas2764_i2c_regmap);
if (tas2764->irq) {
- ret = snd_soc_component_write(tas2764->component, TAS2764_INT_MASK0, 0xff);
+ ret = snd_soc_component_write(tas2764->component, TAS2764_INT_MASK0, 0x00);
if (ret < 0)
return ret;
@@ -614,6 +721,13 @@ static int tas2764_codec_probe(struct snd_soc_component *component)
if (ret < 0)
return ret;
}
+
+ /* Apply all enabled Apple quirks */
+ ret = tas2764_apply_init_quirks(tas2764);
+
+ if (ret < 0)
+ return ret;
+
break;
default:
break;
@@ -682,7 +796,7 @@ static const struct reg_default tas2764_reg_defaults[] = {
static const struct regmap_range_cfg tas2764_regmap_ranges[] = {
{
.range_min = 0,
- .range_max = 1 * 128,
+ .range_max = 0xffff,
.selector_reg = TAS2764_PAGE,
.selector_mask = 0xff,
.selector_shift = 0,
@@ -698,6 +812,9 @@ static bool tas2764_volatile_register(struct device *dev, unsigned int reg)
case TAS2764_INT_LTCH0 ... TAS2764_INT_LTCH4:
case TAS2764_INT_CLK_CFG:
return true;
+ case TAS2764_REG(0xf0, 0x0) ... TAS2764_REG(0xff, 0x0):
+ /* TI's undocumented registers for the application of quirks */
+ return true;
default:
return false;
}
@@ -712,7 +829,7 @@ static const struct regmap_config tas2764_i2c_regmap = {
.cache_type = REGCACHE_RBTREE,
.ranges = tas2764_regmap_ranges,
.num_ranges = ARRAY_SIZE(tas2764_regmap_ranges),
- .max_register = 1 * 128,
+ .max_register = 0xffff,
};
static int tas2764_parse_dt(struct device *dev, struct tas2764_priv *tas2764)
@@ -759,7 +876,7 @@ static int tas2764_i2c_probe(struct i2c_client *client)
if (!tas2764)
return -ENOMEM;
- tas2764->devid = (enum tas2764_devid)of_device_get_match_data(&client->dev);
+ tas2764->devid = (kernel_ulong_t)of_device_get_match_data(&client->dev);
tas2764->dev = &client->dev;
tas2764->irq = client->irq;
@@ -783,6 +900,20 @@ static int tas2764_i2c_probe(struct i2c_client *client)
}
}
+ if (IS_REACHABLE(CONFIG_HWMON)) {
+ struct device *hwmon;
+
+ hwmon = devm_hwmon_device_register_with_info(&client->dev, "tas2764",
+ tas2764,
+ &tas2764_hwmon_chip_info,
+ NULL);
+ if (IS_ERR(hwmon)) {
+ return dev_err_probe(&client->dev, PTR_ERR(hwmon),
+ "Failed to register temp sensor\n");
+ }
+ }
+
+
return devm_snd_soc_register_component(tas2764->dev,
&soc_component_driver_tas2764,
tas2764_dai_driver,
diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h
index 3251dc0106e0..538290ed3d92 100644
--- a/sound/soc/codecs/tas2764.h
+++ b/sound/soc/codecs/tas2764.h
@@ -117,6 +117,9 @@
#define TAS2764_INT_LTCH3 TAS2764_REG(0x0, 0x50)
#define TAS2764_INT_LTCH4 TAS2764_REG(0x0, 0x51)
+/* Readout Registers */
+#define TAS2764_TEMP TAS2764_REG(0x0, 0x56)
+
/* Clock/IRQ Settings */
#define TAS2764_INT_CLK_CFG TAS2764_REG(0x0, 0x5c)
#define TAS2764_INT_CLK_CFG_IRQZ_CLR BIT(2)
diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c
index 7f219df8be70..6f878b01716f 100644
--- a/sound/soc/codecs/tas2770.c
+++ b/sound/soc/codecs/tas2770.c
@@ -12,6 +12,7 @@
#include <linux/err.h>
#include <linux/init.h>
#include <linux/delay.h>
+#include <linux/hwmon.h>
#include <linux/pm.h>
#include <linux/i2c.h>
#include <linux/gpio/consumer.h>
@@ -156,11 +157,37 @@ static const struct snd_kcontrol_new isense_switch =
static const struct snd_kcontrol_new vsense_switch =
SOC_DAPM_SINGLE("Switch", TAS2770_PWR_CTRL, 2, 1, 1);
+static int sense_event(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+ struct tas2770_priv *tas2770 = snd_soc_component_get_drvdata(component);
+
+ /*
+ * Powering up ISENSE/VSENSE requires a trip through the shutdown state.
+ * Do that here to ensure that our changes are applied properly, otherwise
+ * we might end up with non-functional IVSENSE if playback started earlier,
+ * which would break software speaker protection.
+ */
+ switch (event) {
+ case SND_SOC_DAPM_PRE_REG:
+ return snd_soc_component_update_bits(component, TAS2770_PWR_CTRL,
+ TAS2770_PWR_CTRL_MASK,
+ TAS2770_PWR_CTRL_SHUTDOWN);
+ case SND_SOC_DAPM_POST_REG:
+ return tas2770_update_pwr_ctrl(tas2770);
+ default:
+ return 0;
+ }
+}
+
static const struct snd_soc_dapm_widget tas2770_dapm_widgets[] = {
SND_SOC_DAPM_AIF_IN("ASI1", "ASI1 Playback", 0, SND_SOC_NOPM, 0, 0),
SND_SOC_DAPM_MUX("ASI1 Sel", SND_SOC_NOPM, 0, 0, &tas2770_asi1_mux),
- SND_SOC_DAPM_SWITCH("ISENSE", TAS2770_PWR_CTRL, 3, 1, &isense_switch),
- SND_SOC_DAPM_SWITCH("VSENSE", TAS2770_PWR_CTRL, 2, 1, &vsense_switch),
+ SND_SOC_DAPM_SWITCH_E("ISENSE", TAS2770_PWR_CTRL, 3, 1, &isense_switch,
+ sense_event, SND_SOC_DAPM_PRE_REG | SND_SOC_DAPM_POST_REG),
+ SND_SOC_DAPM_SWITCH_E("VSENSE", TAS2770_PWR_CTRL, 2, 1, &vsense_switch,
+ sense_event, SND_SOC_DAPM_PRE_REG | SND_SOC_DAPM_POST_REG),
SND_SOC_DAPM_DAC_E("DAC", NULL, SND_SOC_NOPM, 0, 0, tas2770_dac_event,
SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
SND_SOC_DAPM_OUTPUT("OUT"),
@@ -214,6 +241,19 @@ static int tas2770_set_ivsense_transmit(struct tas2770_priv *tas2770,
return 0;
}
+static int tas2770_set_pdm_transmit(struct tas2770_priv *tas2770, int slot)
+{
+ struct snd_soc_component *component = tas2770->component;
+ int ret;
+
+ ret = snd_soc_component_update_bits(component, TAS2770_TDM_CFG_REG7,
+ TAS2770_TDM_CFG_REG7_PDM_MASK |
+ TAS2770_TDM_CFG_REG7_50_MASK,
+ TAS2770_TDM_CFG_REG7_PDM_ENABLE |
+ slot);
+ return ret;
+}
+
static int tas2770_set_bitwidth(struct tas2770_priv *tas2770, int bitwidth)
{
int ret;
@@ -491,6 +531,88 @@ static struct snd_soc_dai_driver tas2770_dai_driver[] = {
},
};
+static int tas2770_read_die_temp(struct tas2770_priv *tas2770, long *result)
+{
+ int ret = 0;
+ int reading, msb, lsb;
+
+ ret = regmap_read(tas2770->regmap, TAS2770_TEMP_MSB, &msb);
+ if (ret)
+ return ret;
+
+ ret = regmap_read(tas2770->regmap, TAS2770_TEMP_LSB, &lsb);
+ if (ret)
+ return ret;
+
+ reading = (msb << 4) | (lsb >> 4);
+
+ /*
+ * As per datasheet: divide register by 16 and subtract 93 to get
+ * degrees Celsius. hwmon requires millidegrees. Let's avoid rounding
+ * errors by subtracting 93 * 16 then multiplying by 1000 / 16.
+ *
+ * NOTE: The ADC registers are initialised to 0 on reset. This means
+ * that the temperature will read -93 *C until the chip is brought out
+ * of software shutdown (e.g. the PCM it's attached to is opened). The
+ * ADC is also shut down in software shutdown/low-power mode, so the
+ * value read back from its registers will be the last value sampled
+ * before entering software shutdown.
+ */
+ *result = (reading - (93 * 16)) * (1000 / 16);
+ return 0;
+}
+
+static umode_t tas2770_hwmon_is_visible(const void *data,
+ enum hwmon_sensor_types type, u32 attr,
+ int channel)
+{
+ if (type != hwmon_temp)
+ return 0;
+
+ switch (attr) {
+ case hwmon_temp_input:
+ return 0444;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int tas2770_hwmon_read(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct tas2770_priv *tas2770 = dev_get_drvdata(dev);
+ int ret;
+
+ switch (attr) {
+ case hwmon_temp_input:
+ ret = tas2770_read_die_temp(tas2770, val);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+static const struct hwmon_channel_info *const tas2770_hwmon_info[] = {
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
+ NULL
+};
+
+static const struct hwmon_ops tas2770_hwmon_ops = {
+ .is_visible = tas2770_hwmon_is_visible,
+ .read = tas2770_hwmon_read,
+};
+
+static const struct hwmon_chip_info tas2770_hwmon_chip_info = {
+ .ops = &tas2770_hwmon_ops,
+ .info = tas2770_hwmon_info,
+};
+
static const struct regmap_config tas2770_i2c_regmap;
static int tas2770_codec_probe(struct snd_soc_component *component)
@@ -517,6 +639,13 @@ static int tas2770_codec_probe(struct snd_soc_component *component)
return ret;
}
+ if (tas2770->pdm_slot != -1) {
+ ret = tas2770_set_pdm_transmit(tas2770, tas2770->pdm_slot);
+
+ if (ret < 0)
+ return ret;
+ }
+
return 0;
}
@@ -656,6 +785,11 @@ static int tas2770_parse_dt(struct device *dev, struct tas2770_priv *tas2770)
tas2770->v_sense_slot = -1;
}
+ rc = fwnode_property_read_u32(dev->fwnode, "ti,pdm-slot-no",
+ &tas2770->pdm_slot);
+ if (rc)
+ tas2770->pdm_slot = -1;
+
tas2770->sdz_gpio = devm_gpiod_get_optional(dev, "shutdown", GPIOD_OUT_HIGH);
if (IS_ERR(tas2770->sdz_gpio)) {
if (PTR_ERR(tas2770->sdz_gpio) == -EPROBE_DEFER)
@@ -707,6 +841,19 @@ static int tas2770_i2c_probe(struct i2c_client *client)
}
}
+ if (IS_REACHABLE(CONFIG_HWMON)) {
+ struct device *hwmon;
+
+ hwmon = devm_hwmon_device_register_with_info(&client->dev, "tas2770",
+ tas2770,
+ &tas2770_hwmon_chip_info,
+ NULL);
+ if (IS_ERR(hwmon)) {
+ return dev_err_probe(&client->dev, PTR_ERR(hwmon),
+ "Failed to register temp sensor\n");
+ }
+ }
+
result = tas2770_register_codec(tas2770);
if (result)
dev_err(tas2770->dev, "Register codec failed.\n");
diff --git a/sound/soc/codecs/tas2770.h b/sound/soc/codecs/tas2770.h
index f75f40781ab1..3fd2e7003c50 100644
--- a/sound/soc/codecs/tas2770.h
+++ b/sound/soc/codecs/tas2770.h
@@ -77,6 +77,11 @@
#define TAS2770_TDM_CFG_REG6_ISNS_MASK BIT(6)
#define TAS2770_TDM_CFG_REG6_ISNS_ENABLE BIT(6)
#define TAS2770_TDM_CFG_REG6_50_MASK GENMASK(5, 0)
+ /* TDM Configuration Reg10 */
+#define TAS2770_TDM_CFG_REG7 TAS2770_REG(0X0, 0x11)
+#define TAS2770_TDM_CFG_REG7_PDM_MASK BIT(6)
+#define TAS2770_TDM_CFG_REG7_PDM_ENABLE BIT(6)
+#define TAS2770_TDM_CFG_REG7_50_MASK GENMASK(5, 0)
/* Brown Out Prevention Reg0 */
#define TAS2770_BO_PRV_REG0 TAS2770_REG(0X0, 0x1B)
/* Interrupt MASK Reg0 */
@@ -138,6 +143,7 @@ struct tas2770_priv {
struct device *dev;
int v_sense_slot;
int i_sense_slot;
+ int pdm_slot;
bool dac_powered;
bool unmuted;
};
diff --git a/sound/soc/codecs/tas2781-comlib-i2c.c b/sound/soc/codecs/tas2781-comlib-i2c.c
new file mode 100644
index 000000000000..c078bb0a8437
--- /dev/null
+++ b/sound/soc/codecs/tas2781-comlib-i2c.c
@@ -0,0 +1,369 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// TAS2563/TAS2781 Common functions for HDA and ASoC Audio drivers based on I2C
+//
+// Copyright 2025 Texas Instruments, Inc.
+//
+// Author: Shenghao Ding <shenghao-ding@ti.com>
+
+#include <linux/crc8.h>
+#include <linux/firmware.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/tas2781.h>
+#include <sound/tas2781-comlib-i2c.h>
+
+static const struct regmap_range_cfg tasdevice_ranges[] = {
+ {
+ .range_min = 0,
+ .range_max = 256 * 128,
+ .selector_reg = TASDEVICE_PAGE_SELECT,
+ .selector_mask = 0xff,
+ .selector_shift = 0,
+ .window_start = 0,
+ .window_len = 128,
+ },
+};
+
+static const struct regmap_config tasdevice_regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .cache_type = REGCACHE_NONE,
+ .ranges = tasdevice_ranges,
+ .num_ranges = ARRAY_SIZE(tasdevice_ranges),
+ .max_register = 256 * 128,
+};
+
+static int tasdevice_change_chn_book(struct tasdevice_priv *tas_priv,
+ unsigned short chn, int book)
+{
+ struct i2c_client *client = (struct i2c_client *)tas_priv->client;
+ int ret = 0;
+
+ if (chn < tas_priv->ndev) {
+ struct tasdevice *tasdev = &tas_priv->tasdevice[chn];
+ struct regmap *map = tas_priv->regmap;
+
+ if (client->addr != tasdev->dev_addr) {
+ client->addr = tasdev->dev_addr;
+ /* All tas2781s share the same regmap, clear the page
+ * inside regmap once switching to another tas2781.
+ * Register 0 at any pages and any books inside tas2781
+ * is the same one for page-switching.
+ */
+ ret = regmap_write(map, TASDEVICE_PAGE_SELECT, 0);
+ if (ret < 0) {
+ dev_err(tas_priv->dev, "%s, E=%d channel:%d\n",
+ __func__, ret, chn);
+ goto out;
+ }
+ }
+
+ if (tasdev->cur_book != book) {
+ ret = regmap_write(map, TASDEVICE_BOOKCTL_REG, book);
+ if (ret < 0) {
+ dev_err(tas_priv->dev, "%s, E=%d\n",
+ __func__, ret);
+ goto out;
+ }
+ tasdev->cur_book = book;
+ }
+ } else {
+ ret = -EINVAL;
+ dev_err(tas_priv->dev, "%s, no such channel(%d)\n", __func__,
+ chn);
+ }
+
+out:
+ return ret;
+}
+
+int tasdev_chn_switch(struct tasdevice_priv *tas_priv,
+ unsigned short chn)
+{
+ struct i2c_client *client = (struct i2c_client *)tas_priv->client;
+ struct tasdevice *tasdev = &tas_priv->tasdevice[chn];
+ struct regmap *map = tas_priv->regmap;
+ int ret;
+
+ if (client->addr != tasdev->dev_addr) {
+ client->addr = tasdev->dev_addr;
+ /* All devices share the same regmap, clear the page
+ * inside regmap once switching to another device.
+ * Register 0 at any pages and any books inside tas2781
+ * is the same one for page-switching.
+ */
+ ret = regmap_write(map, TASDEVICE_PAGE_SELECT, 0);
+ if (ret < 0) {
+ dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
+ return ret;
+ }
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tasdev_chn_switch);
+
+int tasdevice_dev_update_bits(
+ struct tasdevice_priv *tas_priv, unsigned short chn,
+ unsigned int reg, unsigned int mask, unsigned int value)
+{
+ int ret = 0;
+
+ if (chn < tas_priv->ndev) {
+ struct regmap *map = tas_priv->regmap;
+
+ ret = tas_priv->change_chn_book(tas_priv, chn,
+ TASDEVICE_BOOK_ID(reg));
+ if (ret < 0)
+ goto out;
+
+ ret = regmap_update_bits(map, TASDEVICE_PGRG(reg),
+ mask, value);
+ if (ret < 0)
+ dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
+ } else {
+ dev_err(tas_priv->dev, "%s, no such channel(%d)\n", __func__,
+ chn);
+ ret = -EINVAL;
+ }
+
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tasdevice_dev_update_bits);
+
+struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c)
+{
+ struct tasdevice_priv *tas_priv;
+
+ tas_priv = devm_kzalloc(&i2c->dev, sizeof(*tas_priv), GFP_KERNEL);
+ if (!tas_priv)
+ return NULL;
+ tas_priv->dev = &i2c->dev;
+ tas_priv->client = (void *)i2c;
+
+ return tas_priv;
+}
+EXPORT_SYMBOL_GPL(tasdevice_kzalloc);
+
+int tasdevice_init(struct tasdevice_priv *tas_priv)
+{
+ int ret = 0;
+ int i;
+
+ tas_priv->regmap = devm_regmap_init_i2c(tas_priv->client,
+ &tasdevice_regmap);
+ if (IS_ERR(tas_priv->regmap)) {
+ ret = PTR_ERR(tas_priv->regmap);
+ dev_err(tas_priv->dev, "Failed to allocate register map: %d\n",
+ ret);
+ goto out;
+ }
+
+ tas_priv->cur_prog = -1;
+ tas_priv->cur_conf = -1;
+ tas_priv->isspi = false;
+
+ for (i = 0; i < tas_priv->ndev; i++) {
+ tas_priv->tasdevice[i].cur_book = -1;
+ tas_priv->tasdevice[i].cur_prog = -1;
+ tas_priv->tasdevice[i].cur_conf = -1;
+ }
+
+ tas_priv->update_bits = tasdevice_dev_update_bits;
+ tas_priv->change_chn_book = tasdevice_change_chn_book;
+ tas_priv->dev_read = tasdevice_dev_read;
+ tas_priv->dev_bulk_read = tasdevice_dev_bulk_read;
+
+ mutex_init(&tas_priv->codec_lock);
+
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tasdevice_init);
+
+static int tasdevice_clamp(int val, int max, unsigned int invert)
+{
+ if (val > max)
+ val = max;
+ if (invert)
+ val = max - val;
+ if (val < 0)
+ val = 0;
+ return val;
+}
+
+int tasdevice_amp_putvol(struct tasdevice_priv *tas_priv,
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
+{
+ unsigned int invert = mc->invert;
+ unsigned char mask;
+ int max = mc->max;
+ int err_cnt = 0;
+ int val, i, ret;
+
+ mask = (1 << fls(max)) - 1;
+ mask <<= mc->shift;
+ val = tasdevice_clamp(ucontrol->value.integer.value[0], max, invert);
+ for (i = 0; i < tas_priv->ndev; i++) {
+ ret = tasdevice_dev_update_bits(tas_priv, i,
+ mc->reg, mask, (unsigned int)(val << mc->shift));
+ if (!ret)
+ continue;
+ err_cnt++;
+ dev_err(tas_priv->dev, "set AMP vol error in dev %d\n", i);
+ }
+
+ /* All the devices set error, return 0 */
+ return (err_cnt == tas_priv->ndev) ? 0 : 1;
+}
+EXPORT_SYMBOL_GPL(tasdevice_amp_putvol);
+
+int tasdevice_amp_getvol(struct tasdevice_priv *tas_priv,
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
+{
+ unsigned int invert = mc->invert;
+ unsigned char mask = 0;
+ int max = mc->max;
+ int ret = 0;
+ int val;
+
+ /* Read the primary device */
+ ret = tasdevice_dev_read(tas_priv, 0, mc->reg, &val);
+ if (ret) {
+ dev_err(tas_priv->dev, "%s, get AMP vol error\n", __func__);
+ goto out;
+ }
+
+ mask = (1 << fls(max)) - 1;
+ mask <<= mc->shift;
+ val = (val & mask) >> mc->shift;
+ val = tasdevice_clamp(val, max, invert);
+ ucontrol->value.integer.value[0] = val;
+
+out:
+ return ret;
+
+}
+EXPORT_SYMBOL_GPL(tasdevice_amp_getvol);
+
+int tasdevice_digital_getvol(struct tasdevice_priv *tas_priv,
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
+{
+ unsigned int invert = mc->invert;
+ int max = mc->max;
+ int ret, val;
+
+ /* Read the primary device as the whole */
+ ret = tasdevice_dev_read(tas_priv, 0, mc->reg, &val);
+ if (ret) {
+ dev_err(tas_priv->dev, "%s, get digital vol error\n",
+ __func__);
+ goto out;
+ }
+
+ val = tasdevice_clamp(val, max, invert);
+ ucontrol->value.integer.value[0] = val;
+
+out:
+ return ret;
+
+}
+EXPORT_SYMBOL_GPL(tasdevice_digital_getvol);
+
+int tasdevice_digital_putvol(struct tasdevice_priv *tas_priv,
+ struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
+{
+ unsigned int invert = mc->invert;
+ int max = mc->max;
+ int err_cnt = 0;
+ int ret;
+ int val, i;
+
+ val = tasdevice_clamp(ucontrol->value.integer.value[0], max, invert);
+
+ for (i = 0; i < tas_priv->ndev; i++) {
+ ret = tasdevice_dev_write(tas_priv, i, mc->reg,
+ (unsigned int)val);
+ if (!ret)
+ continue;
+ err_cnt++;
+ dev_err(tas_priv->dev,
+ "set digital vol err in dev %d\n", i);
+ }
+
+ /* All the devices set error, return 0 */
+ return (err_cnt == tas_priv->ndev) ? 0 : 1;
+
+}
+EXPORT_SYMBOL_GPL(tasdevice_digital_putvol);
+
+void tasdevice_reset(struct tasdevice_priv *tas_dev)
+{
+ int ret, i;
+
+ if (tas_dev->reset) {
+ gpiod_set_value_cansleep(tas_dev->reset, 0);
+ usleep_range(500, 1000);
+ gpiod_set_value_cansleep(tas_dev->reset, 1);
+ } else {
+ for (i = 0; i < tas_dev->ndev; i++) {
+ ret = tasdevice_dev_write(tas_dev, i,
+ TASDEVICE_REG_SWRESET,
+ TASDEVICE_REG_SWRESET_RESET);
+ if (ret < 0)
+ dev_err(tas_dev->dev,
+ "dev %d swreset fail, %d\n",
+ i, ret);
+ }
+ }
+ usleep_range(1000, 1050);
+}
+EXPORT_SYMBOL_GPL(tasdevice_reset);
+
+int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
+ struct module *module,
+ void (*cont)(const struct firmware *fw, void *context))
+{
+ int ret = 0;
+
+ /* Codec Lock Hold to ensure that codec_probe and firmware parsing and
+ * loading do not simultaneously execute.
+ */
+ mutex_lock(&tas_priv->codec_lock);
+
+ if (tas_priv->name_prefix)
+ scnprintf(tas_priv->rca_binaryname, 64, "%s-%sRCA%d.bin",
+ tas_priv->name_prefix, tas_priv->dev_name,
+ tas_priv->ndev);
+ else
+ scnprintf(tas_priv->rca_binaryname, 64, "%sRCA%d.bin",
+ tas_priv->dev_name, tas_priv->ndev);
+ crc8_populate_msb(tas_priv->crc8_lkp_tbl, TASDEVICE_CRC8_POLYNOMIAL);
+ tas_priv->codec = codec;
+ ret = request_firmware_nowait(module, FW_ACTION_UEVENT,
+ tas_priv->rca_binaryname, tas_priv->dev, GFP_KERNEL, tas_priv,
+ cont);
+ if (ret)
+ dev_err(tas_priv->dev, "request_firmware_nowait err:0x%08x\n",
+ ret);
+
+ /* Codec Lock Release*/
+ mutex_unlock(&tas_priv->codec_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tascodec_init);
+
+MODULE_DESCRIPTION("TAS2781 common library for I2C");
+MODULE_AUTHOR("Shenghao Ding, TI, <shenghao-ding@ti.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/tas2781-comlib.c b/sound/soc/codecs/tas2781-comlib.c
index 1e0b3aa95749..4cec9f8a00af 100644
--- a/sound/soc/codecs/tas2781-comlib.c
+++ b/sound/soc/codecs/tas2781-comlib.c
@@ -2,14 +2,14 @@
//
// TAS2563/TAS2781 Common functions for HDA and ASoC Audio drivers
//
-// Copyright 2023 - 2024 Texas Instruments, Inc.
+// Copyright 2023 - 2025 Texas Instruments, Inc.
//
// Author: Shenghao Ding <shenghao-ding@ti.com>
#include <linux/crc8.h>
+#include <linux/dev_printk.h>
#include <linux/firmware.h>
#include <linux/gpio/consumer.h>
-#include <linux/i2c.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/module.h>
@@ -17,67 +17,24 @@
#include <linux/of_irq.h>
#include <linux/regmap.h>
#include <linux/slab.h>
-#include <sound/pcm_params.h>
-#include <sound/soc.h>
#include <sound/tas2781.h>
-#define TASDEVICE_CRC8_POLYNOMIAL 0x4d
-
-static const struct regmap_range_cfg tasdevice_ranges[] = {
- {
- .range_min = 0,
- .range_max = 256 * 128,
- .selector_reg = TASDEVICE_PAGE_SELECT,
- .selector_mask = 0xff,
- .selector_shift = 0,
- .window_start = 0,
- .window_len = 128,
- },
-};
-
-static const struct regmap_config tasdevice_regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .cache_type = REGCACHE_NONE,
- .ranges = tasdevice_ranges,
- .num_ranges = ARRAY_SIZE(tasdevice_ranges),
- .max_register = 256 * 128,
-};
-
-static int tasdevice_change_chn_book(struct tasdevice_priv *tas_priv,
- unsigned short chn, int book)
+int tasdevice_dev_read(struct tasdevice_priv *tas_priv,
+ unsigned short chn, unsigned int reg, unsigned int *val)
{
- struct i2c_client *client = (struct i2c_client *)tas_priv->client;
int ret = 0;
if (chn < tas_priv->ndev) {
- struct tasdevice *tasdev = &tas_priv->tasdevice[chn];
struct regmap *map = tas_priv->regmap;
- if (client->addr != tasdev->dev_addr) {
- client->addr = tasdev->dev_addr;
- /* All tas2781s share the same regmap, clear the page
- * inside regmap once switching to another tas2781.
- * Register 0 at any pages and any books inside tas2781
- * is the same one for page-switching.
- */
- ret = regmap_write(map, TASDEVICE_PAGE_SELECT, 0);
- if (ret < 0) {
- dev_err(tas_priv->dev, "%s, E=%d channel:%d\n",
- __func__, ret, chn);
- goto out;
- }
- }
+ ret = tas_priv->change_chn_book(tas_priv, chn,
+ TASDEVICE_BOOK_ID(reg));
+ if (ret < 0)
+ goto out;
- if (tasdev->cur_book != book) {
- ret = regmap_write(map, TASDEVICE_BOOKCTL_REG, book);
- if (ret < 0) {
- dev_err(tas_priv->dev, "%s, E=%d\n",
- __func__, ret);
- goto out;
- }
- tasdev->cur_book = book;
- }
+ ret = regmap_read(map, TASDEVICE_PGRG(reg), val);
+ if (ret < 0)
+ dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
} else {
ret = -EINVAL;
dev_err(tas_priv->dev, "%s, no such channel(%d)\n", __func__,
@@ -87,59 +44,33 @@ static int tasdevice_change_chn_book(struct tasdevice_priv *tas_priv,
out:
return ret;
}
+EXPORT_SYMBOL_GPL(tasdevice_dev_read);
-int tasdev_chn_switch(struct tasdevice_priv *tas_priv,
- unsigned short chn)
-{
- struct i2c_client *client = (struct i2c_client *)tas_priv->client;
- struct tasdevice *tasdev = &tas_priv->tasdevice[chn];
- struct regmap *map = tas_priv->regmap;
- int ret;
-
- if (client->addr != tasdev->dev_addr) {
- client->addr = tasdev->dev_addr;
- /* All devices share the same regmap, clear the page
- * inside regmap once switching to another device.
- * Register 0 at any pages and any books inside tas2781
- * is the same one for page-switching.
- */
- ret = regmap_write(map, TASDEVICE_PAGE_SELECT, 0);
- if (ret < 0) {
- dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
- return ret;
- }
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(tasdev_chn_switch);
-
-int tasdevice_dev_read(struct tasdevice_priv *tas_priv,
- unsigned short chn, unsigned int reg, unsigned int *val)
+int tasdevice_dev_bulk_read(struct tasdevice_priv *tas_priv,
+ unsigned short chn, unsigned int reg, unsigned char *data,
+ unsigned int len)
{
int ret = 0;
if (chn < tas_priv->ndev) {
struct regmap *map = tas_priv->regmap;
- ret = tasdevice_change_chn_book(tas_priv, chn,
+ ret = tas_priv->change_chn_book(tas_priv, chn,
TASDEVICE_BOOK_ID(reg));
if (ret < 0)
goto out;
- ret = regmap_read(map, TASDEVICE_PGRG(reg), val);
+ ret = regmap_bulk_read(map, TASDEVICE_PGRG(reg), data, len);
if (ret < 0)
dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
- } else {
- ret = -EINVAL;
+ } else
dev_err(tas_priv->dev, "%s, no such channel(%d)\n", __func__,
chn);
- }
out:
return ret;
}
-EXPORT_SYMBOL_GPL(tasdevice_dev_read);
+EXPORT_SYMBOL_GPL(tasdevice_dev_bulk_read);
int tasdevice_dev_write(struct tasdevice_priv *tas_priv,
unsigned short chn, unsigned int reg, unsigned int value)
@@ -149,7 +80,7 @@ int tasdevice_dev_write(struct tasdevice_priv *tas_priv,
if (chn < tas_priv->ndev) {
struct regmap *map = tas_priv->regmap;
- ret = tasdevice_change_chn_book(tas_priv, chn,
+ ret = tas_priv->change_chn_book(tas_priv, chn,
TASDEVICE_BOOK_ID(reg));
if (ret < 0)
goto out;
@@ -179,7 +110,7 @@ int tasdevice_dev_bulk_write(
if (chn < tas_priv->ndev) {
struct regmap *map = tas_priv->regmap;
- ret = tasdevice_change_chn_book(tas_priv, chn,
+ ret = tas_priv->change_chn_book(tas_priv, chn,
TASDEVICE_BOOK_ID(reg));
if (ret < 0)
goto out;
@@ -199,161 +130,6 @@ out:
}
EXPORT_SYMBOL_GPL(tasdevice_dev_bulk_write);
-int tasdevice_dev_bulk_read(struct tasdevice_priv *tas_priv,
- unsigned short chn, unsigned int reg, unsigned char *data,
- unsigned int len)
-{
- int ret = 0;
-
- if (chn < tas_priv->ndev) {
- struct regmap *map = tas_priv->regmap;
-
- ret = tasdevice_change_chn_book(tas_priv, chn,
- TASDEVICE_BOOK_ID(reg));
- if (ret < 0)
- goto out;
-
- ret = regmap_bulk_read(map, TASDEVICE_PGRG(reg), data, len);
- if (ret < 0)
- dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
- } else
- dev_err(tas_priv->dev, "%s, no such channel(%d)\n", __func__,
- chn);
-
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(tasdevice_dev_bulk_read);
-
-int tasdevice_dev_update_bits(
- struct tasdevice_priv *tas_priv, unsigned short chn,
- unsigned int reg, unsigned int mask, unsigned int value)
-{
- int ret = 0;
-
- if (chn < tas_priv->ndev) {
- struct regmap *map = tas_priv->regmap;
-
- ret = tasdevice_change_chn_book(tas_priv, chn,
- TASDEVICE_BOOK_ID(reg));
- if (ret < 0)
- goto out;
-
- ret = regmap_update_bits(map, TASDEVICE_PGRG(reg),
- mask, value);
- if (ret < 0)
- dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret);
- } else {
- dev_err(tas_priv->dev, "%s, no such channel(%d)\n", __func__,
- chn);
- ret = -EINVAL;
- }
-
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(tasdevice_dev_update_bits);
-
-struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c)
-{
- struct tasdevice_priv *tas_priv;
-
- tas_priv = devm_kzalloc(&i2c->dev, sizeof(*tas_priv), GFP_KERNEL);
- if (!tas_priv)
- return NULL;
- tas_priv->dev = &i2c->dev;
- tas_priv->client = (void *)i2c;
-
- return tas_priv;
-}
-EXPORT_SYMBOL_GPL(tasdevice_kzalloc);
-
-void tasdevice_reset(struct tasdevice_priv *tas_dev)
-{
- int ret, i;
-
- if (tas_dev->reset) {
- gpiod_set_value_cansleep(tas_dev->reset, 0);
- usleep_range(500, 1000);
- gpiod_set_value_cansleep(tas_dev->reset, 1);
- } else {
- for (i = 0; i < tas_dev->ndev; i++) {
- ret = tasdevice_dev_write(tas_dev, i,
- TASDEVICE_REG_SWRESET,
- TASDEVICE_REG_SWRESET_RESET);
- if (ret < 0)
- dev_err(tas_dev->dev,
- "dev %d swreset fail, %d\n",
- i, ret);
- }
- }
- usleep_range(1000, 1050);
-}
-EXPORT_SYMBOL_GPL(tasdevice_reset);
-
-int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
- struct module *module,
- void (*cont)(const struct firmware *fw, void *context))
-{
- int ret = 0;
-
- /* Codec Lock Hold to ensure that codec_probe and firmware parsing and
- * loading do not simultaneously execute.
- */
- mutex_lock(&tas_priv->codec_lock);
-
- if (tas_priv->name_prefix)
- scnprintf(tas_priv->rca_binaryname, 64, "%s-%sRCA%d.bin",
- tas_priv->name_prefix, tas_priv->dev_name,
- tas_priv->ndev);
- else
- scnprintf(tas_priv->rca_binaryname, 64, "%sRCA%d.bin",
- tas_priv->dev_name, tas_priv->ndev);
- crc8_populate_msb(tas_priv->crc8_lkp_tbl, TASDEVICE_CRC8_POLYNOMIAL);
- tas_priv->codec = codec;
- ret = request_firmware_nowait(module, FW_ACTION_UEVENT,
- tas_priv->rca_binaryname, tas_priv->dev, GFP_KERNEL, tas_priv,
- cont);
- if (ret)
- dev_err(tas_priv->dev, "request_firmware_nowait err:0x%08x\n",
- ret);
-
- /* Codec Lock Release*/
- mutex_unlock(&tas_priv->codec_lock);
- return ret;
-}
-EXPORT_SYMBOL_GPL(tascodec_init);
-
-int tasdevice_init(struct tasdevice_priv *tas_priv)
-{
- int ret = 0;
- int i;
-
- tas_priv->regmap = devm_regmap_init_i2c(tas_priv->client,
- &tasdevice_regmap);
- if (IS_ERR(tas_priv->regmap)) {
- ret = PTR_ERR(tas_priv->regmap);
- dev_err(tas_priv->dev, "Failed to allocate register map: %d\n",
- ret);
- goto out;
- }
-
- tas_priv->cur_prog = -1;
- tas_priv->cur_conf = -1;
-
- for (i = 0; i < tas_priv->ndev; i++) {
- tas_priv->tasdevice[i].cur_book = -1;
- tas_priv->tasdevice[i].cur_prog = -1;
- tas_priv->tasdevice[i].cur_conf = -1;
- }
-
- mutex_init(&tas_priv->codec_lock);
-
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(tasdevice_init);
-
static void tasdev_dsp_prog_blk_remove(struct tasdevice_prog *prog)
{
struct tasdevice_data *tas_dt;
@@ -440,137 +216,6 @@ void tasdevice_remove(struct tasdevice_priv *tas_priv)
}
EXPORT_SYMBOL_GPL(tasdevice_remove);
-int tasdevice_save_calibration(struct tasdevice_priv *tas_priv)
-{
- if (tas_priv->save_calibration)
- return tas_priv->save_calibration(tas_priv);
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(tasdevice_save_calibration);
-
-void tasdevice_apply_calibration(struct tasdevice_priv *tas_priv)
-{
- if (tas_priv->apply_calibration && tas_priv->cali_data.total_sz)
- tas_priv->apply_calibration(tas_priv);
-}
-EXPORT_SYMBOL_GPL(tasdevice_apply_calibration);
-
-static int tasdevice_clamp(int val, int max, unsigned int invert)
-{
- if (val > max)
- val = max;
- if (invert)
- val = max - val;
- if (val < 0)
- val = 0;
- return val;
-}
-
-int tasdevice_amp_putvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
-{
- unsigned int invert = mc->invert;
- unsigned char mask;
- int max = mc->max;
- int err_cnt = 0;
- int val, i, ret;
-
- mask = (1 << fls(max)) - 1;
- mask <<= mc->shift;
- val = tasdevice_clamp(ucontrol->value.integer.value[0], max, invert);
- for (i = 0; i < tas_priv->ndev; i++) {
- ret = tasdevice_dev_update_bits(tas_priv, i,
- mc->reg, mask, (unsigned int)(val << mc->shift));
- if (!ret)
- continue;
- err_cnt++;
- dev_err(tas_priv->dev, "set AMP vol error in dev %d\n", i);
- }
-
- /* All the devices set error, return 0 */
- return (err_cnt == tas_priv->ndev) ? 0 : 1;
-}
-EXPORT_SYMBOL_GPL(tasdevice_amp_putvol);
-
-int tasdevice_amp_getvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
-{
- unsigned int invert = mc->invert;
- unsigned char mask = 0;
- int max = mc->max;
- int ret = 0;
- int val;
-
- /* Read the primary device */
- ret = tasdevice_dev_read(tas_priv, 0, mc->reg, &val);
- if (ret) {
- dev_err(tas_priv->dev, "%s, get AMP vol error\n", __func__);
- goto out;
- }
-
- mask = (1 << fls(max)) - 1;
- mask <<= mc->shift;
- val = (val & mask) >> mc->shift;
- val = tasdevice_clamp(val, max, invert);
- ucontrol->value.integer.value[0] = val;
-
-out:
- return ret;
-
-}
-EXPORT_SYMBOL_GPL(tasdevice_amp_getvol);
-
-int tasdevice_digital_putvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
-{
- unsigned int invert = mc->invert;
- int max = mc->max;
- int err_cnt = 0;
- int ret;
- int val, i;
-
- val = tasdevice_clamp(ucontrol->value.integer.value[0], max, invert);
-
- for (i = 0; i < tas_priv->ndev; i++) {
- ret = tasdevice_dev_write(tas_priv, i, mc->reg,
- (unsigned int)val);
- if (!ret)
- continue;
- err_cnt++;
- dev_err(tas_priv->dev,
- "set digital vol err in dev %d\n", i);
- }
-
- /* All the devices set error, return 0 */
- return (err_cnt == tas_priv->ndev) ? 0 : 1;
-
-}
-EXPORT_SYMBOL_GPL(tasdevice_digital_putvol);
-
-int tasdevice_digital_getvol(struct tasdevice_priv *tas_priv,
- struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc)
-{
- unsigned int invert = mc->invert;
- int max = mc->max;
- int ret, val;
-
- /* Read the primary device as the whole */
- ret = tasdevice_dev_read(tas_priv, 0, mc->reg, &val);
- if (ret) {
- dev_err(tas_priv->dev, "%s, get digital vol error\n",
- __func__);
- goto out;
- }
-
- val = tasdevice_clamp(val, max, invert);
- ucontrol->value.integer.value[0] = val;
-
-out:
- return ret;
-
-}
-EXPORT_SYMBOL_GPL(tasdevice_digital_getvol);
-
MODULE_DESCRIPTION("TAS2781 common library");
MODULE_AUTHOR("Shenghao Ding, TI, <shenghao-ding@ti.com>");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c
index 13a197468193..c9c1e608ddb7 100644
--- a/sound/soc/codecs/tas2781-fmwlib.c
+++ b/sound/soc/codecs/tas2781-fmwlib.c
@@ -50,6 +50,11 @@
#define TAS2781_YRAM5_START_REG TAS2781_YRAM3_START_REG
#define TAS2781_YRAM5_END_REG TAS2781_YRAM3_END_REG
+#define TASDEVICE_CMD_SING_W 0x1
+#define TASDEVICE_CMD_BURST 0x2
+#define TASDEVICE_CMD_DELAY 0x3
+#define TASDEVICE_CMD_FIELD_W 0x4
+
#define TASDEVICE_MAXPROGRAM_NUM_KERNEL 5
#define TASDEVICE_MAXCONFIG_NUM_KERNEL_MULTIPLE_AMPS 64
#define TASDEVICE_MAXCONFIG_NUM_KERNEL 10
@@ -803,8 +808,13 @@ static int tasdevice_process_block(void *context, unsigned char *data,
chn = idx - 1;
chnend = idx;
} else {
- chn = 0;
- chnend = tas_priv->ndev;
+ if (tas_priv->isspi) {
+ chn = tas_priv->index;
+ chnend = chn + 1;
+ } else {
+ chn = 0;
+ chnend = tas_priv->ndev;
+ }
}
for (; chn < chnend; chn++) {
@@ -896,7 +906,7 @@ static int tasdevice_process_block(void *context, unsigned char *data,
is_err = true;
break;
}
- rc = tasdevice_dev_update_bits(tas_priv, chn,
+ rc = tas_priv->update_bits(tas_priv, chn,
TASDEVICE_REG(data[subblk_offset + 2],
data[subblk_offset + 3],
data[subblk_offset + 4]),
@@ -1461,7 +1471,7 @@ static int tasdev_multibytes_chksum(struct tasdevice_priv *tasdevice,
goto end;
}
- ret = tasdevice_dev_bulk_read(tasdevice, chn,
+ ret = tasdevice->dev_bulk_read(tasdevice, chn,
TASDEVICE_REG(book, page, crc_data.offset),
nBuf1, crc_data.len);
if (ret < 0)
@@ -1511,7 +1521,7 @@ static int do_singlereg_checksum(struct tasdevice_priv *tasdevice,
in = check_yram(&crc_data, book, page, reg, 1);
if (!in)
goto end;
- ret = tasdevice_dev_read(tasdevice, chl,
+ ret = tasdevice->dev_read(tasdevice, chl,
TASDEVICE_REG(book, page, reg), &nData1);
if (ret < 0)
goto end;
@@ -1615,7 +1625,7 @@ static int tasdev_block_chksum(struct tasdevice_priv *tas_priv,
unsigned int nr_value;
int ret;
- ret = tasdevice_dev_read(tas_priv, chn, TASDEVICE_CHECKSUM_REG,
+ ret = tas_priv->dev_read(tas_priv, chn, TASDEVICE_CHECKSUM_REG,
&nr_value);
if (ret < 0) {
dev_err(tas_priv->dev, "%s: Chn %d\n", __func__, chn);
@@ -2074,8 +2084,7 @@ int tas2781_load_calibration(void *context, char *file_name,
}
out:
- if (fw_entry)
- release_firmware(fw_entry);
+ release_firmware(fw_entry);
return ret;
}
diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c
index b950914b7d48..c40d8f754d89 100644
--- a/sound/soc/codecs/tas2781-i2c.c
+++ b/sound/soc/codecs/tas2781-i2c.c
@@ -14,6 +14,9 @@
//
#include <linux/crc8.h>
+#ifdef CONFIG_SND_SOC_TAS2781_ACOUST_I2C
+#include <linux/debugfs.h>
+#endif
#include <linux/firmware.h>
#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
@@ -28,6 +31,7 @@
#include <sound/pcm_params.h>
#include <sound/soc.h>
#include <sound/tas2781.h>
+#include <sound/tas2781-comlib-i2c.h>
#include <sound/tlv.h>
#include <sound/tas2563-tlv.h>
#include <sound/tas2781-tlv.h>
@@ -1422,10 +1426,150 @@ static int tasdevice_create_cali_ctrls(struct tasdevice_priv *priv)
nctrls < i ? nctrls : i);
}
+#ifdef CONFIG_SND_SOC_TAS2781_ACOUST_I2C
+/*
+ * This debugfs node is a bridge to the acoustic tuning application
+ * tool which can tune the chips' acoustic effect.
+ *
+ * package structure for PPC3 communications:
+ * Pkg len (1 byte)
+ * Pkg id (1 byte, 'r' or 'w')
+ * Dev id (1 byte, i2c address)
+ * Book id (1 byte)
+ * Page id (1 byte)
+ * Reg id (1 byte)
+ * switch (pkg id) {
+ * case 'w':
+ * 1 byte, length of data to read
+ * case 'r':
+ * data payload (1~128 bytes)
+ * }
+ */
+static ssize_t acoustic_ctl_read(struct file *file, char __user *to,
+ size_t count, loff_t *ppos)
+{
+ struct snd_soc_component *comp = file->private_data;
+ struct tasdevice_priv *tas_priv = snd_soc_component_get_drvdata(comp);
+ struct acoustic_data *p = &tas_priv->acou_data;
+ int ret = -1;
+
+ if (p->id == 'r' && p->len == count && count <= sizeof(*p))
+ ret = simple_read_from_buffer(to, count, ppos, p, p->len);
+ else
+ dev_err(tas_priv->dev, "Not ready for get.\n");
+ return ret;
+}
+
+static ssize_t acoustic_ctl_write(struct file *file,
+ const char __user *from, size_t count, loff_t *ppos)
+{
+ struct snd_soc_component *comp = file->private_data;
+ struct tasdevice_priv *priv = snd_soc_component_get_drvdata(comp);
+ struct acoustic_data *p = &priv->acou_data;
+ unsigned int max_pkg_len = sizeof(*p);
+ unsigned char *src;
+ int j, len, reg, val;
+ unsigned short chn;
+ int ret = -1;
+
+ if (count > sizeof(*p)) {
+ dev_err(priv->dev, "count(%u) is larger than max(%u).\n",
+ (unsigned int)count, max_pkg_len);
+ return ret;
+ }
+
+ src = memdup_user(from, count);
+ if (IS_ERR(src))
+ return PTR_ERR(src);
+
+ if (src[0] > max_pkg_len && src[0] != count) {
+ dev_err(priv->dev, "pkg(%u), max(%u), count(%u) dismatch.\n",
+ src[0], max_pkg_len, (unsigned int)count);
+ ret = 0;
+ goto exit;
+ }
+
+ switch (src[1]) {
+ case 'r':
+ /* length of data to read */
+ len = src[6];
+ break;
+ case 'w':
+ /* Skip 6 bytes for package type and register address */
+ len = src[0] - 6;
+ break;
+ default:
+ dev_err(priv->dev, "%s Wrong code %02x.\n", __func__, src[1]);
+ ret = 0;
+ goto exit;
+ }
+
+ if (len < 1) {
+ dev_err(priv->dev, "pkg fmt invalid %02x.\n", len);
+ ret = 0;
+ goto exit;
+ }
+
+ for (j = 0; j < priv->ndev; j++)
+ if (src[2] == priv->tasdevice[j].dev_addr) {
+ chn = j;
+ break;
+ }
+ if (j >= priv->ndev) {
+ dev_err(priv->dev, "no such device 0x%02x.\n", src[2]);
+ ret = 0;
+ goto exit;
+ }
+
+ reg = TASDEVICE_REG(src[3], src[4], src[5]);
+
+ guard(mutex)(&priv->codec_lock);
+
+ if (src[1] == 'w') {
+ if (len > 1)
+ ret = tasdevice_dev_bulk_write(priv, chn, reg,
+ &src[6], len);
+ else
+ ret = tasdevice_dev_write(priv, chn, reg, src[6]);
+ } else {
+ struct acoustic_data *p = &priv->acou_data;
+
+ memcpy(p, src, 6);
+ if (len > 1) {
+ ret = tasdevice_dev_bulk_read(priv, chn, reg,
+ p->data, len);
+ } else {
+ ret = tasdevice_dev_read(priv, chn, reg, &val);
+ p->data[0] = val;
+ }
+ p->len = len + 6;
+ }
+
+ if (ret)
+ dev_err(priv->dev, "i2c communication error.\n");
+ else
+ ret = count;
+exit:
+ kfree(src);
+ return ret;
+}
+
+static const struct file_operations acoustic_ctl_fops = {
+ .open = simple_open,
+ .read = acoustic_ctl_read,
+ .write = acoustic_ctl_write,
+};
+#endif
+
static void tasdevice_fw_ready(const struct firmware *fmw,
void *context)
{
struct tasdevice_priv *tas_priv = context;
+#ifdef CONFIG_SND_SOC_TAS2781_ACOUST_I2C
+ struct snd_soc_component *comp = tas_priv->codec;
+ struct dentry *debugfs_root = comp->debugfs_root;
+ char *acoustic_debugfs_node;
+#endif
int ret = 0;
int i;
@@ -1499,14 +1643,24 @@ static void tasdevice_fw_ready(const struct firmware *fmw,
tasdevice_prmg_load(tas_priv, 0);
tas_priv->cur_prog = 0;
+
+#ifdef CONFIG_SND_SOC_TAS2781_ACOUST_I2C
+ if (tas_priv->name_prefix)
+ acoustic_debugfs_node = devm_kasprintf(tas_priv->dev,
+ GFP_KERNEL, "%s_acoustic_ctl", tas_priv->name_prefix);
+ else
+ acoustic_debugfs_node = devm_kstrdup(tas_priv->dev,
+ "acoustic_ctl", GFP_KERNEL);
+ debugfs_create_file(acoustic_debugfs_node, 0644, debugfs_root,
+ comp, &acoustic_ctl_fops);
+#endif
out:
if (tas_priv->fw_state == TASDEVICE_RCA_FW_OK) {
/* If DSP FW fail, DSP kcontrol won't be created. */
tasdevice_dsp_remove(tas_priv);
}
mutex_unlock(&tas_priv->codec_lock);
- if (fmw)
- release_firmware(fmw);
+ release_firmware(fmw);
}
static int tasdevice_dapm_event(struct snd_soc_dapm_widget *w,
diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c
index 191e067ed1c9..1035ba17dc5d 100644
--- a/sound/soc/codecs/tlv320adc3xxx.c
+++ b/sound/soc/codecs/tlv320adc3xxx.c
@@ -1015,10 +1015,10 @@ static int adc3xxx_gpio_direction_out(struct gpio_chip *chip,
* so we set the output mode and output value in the same call. Hence
* .set in practice does the same thing as .direction_out .
*/
-static void adc3xxx_gpio_set(struct gpio_chip *chip, unsigned int offset,
- int value)
+static int adc3xxx_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
- (void) adc3xxx_gpio_direction_out(chip, offset, value);
+ return adc3xxx_gpio_direction_out(chip, offset, value);
}
/* Even though we only support GPIO output for now, some GPIO clients
@@ -1052,7 +1052,7 @@ static const struct gpio_chip adc3xxx_gpio_chip = {
.owner = THIS_MODULE,
.request = adc3xxx_gpio_request,
.direction_output = adc3xxx_gpio_direction_out,
- .set = adc3xxx_gpio_set,
+ .set_rv = adc3xxx_gpio_set,
.get = adc3xxx_gpio_get,
.can_sleep = 1,
};
diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
index 56e795a00e22..f1649df19738 100644
--- a/sound/soc/codecs/tlv320aic3x.c
+++ b/sound/soc/codecs/tlv320aic3x.c
@@ -1818,10 +1818,8 @@ int aic3x_probe(struct device *dev, struct regmap *regmap, kernel_ulong_t driver
ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(aic3x->supplies),
aic3x->supplies);
- if (ret) {
- dev_err(dev, "Failed to request supplies: %d\n", ret);
- return ret;
- }
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to request supplies\n");
aic3x_configure_ocmv(dev, aic3x);
diff --git a/sound/soc/codecs/tpa6130a2.c b/sound/soc/codecs/tpa6130a2.c
index b5472fa1bdda..38cc000891ea 100644
--- a/sound/soc/codecs/tpa6130a2.c
+++ b/sound/soc/codecs/tpa6130a2.c
@@ -7,19 +7,17 @@
* Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
*/
-#include <linux/module.h>
-#include <linux/errno.h>
#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
-#include <linux/gpio.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/slab.h>
-#include <sound/tpa6130a2-plat.h>
#include <sound/soc.h>
#include <sound/tlv.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
-#include <linux/regmap.h>
#include "tpa6130a2.h"
@@ -33,7 +31,7 @@ struct tpa6130a2_data {
struct device *dev;
struct regmap *regmap;
struct regulator *supply;
- int power_gpio;
+ struct gpio_desc *power_gpio;
enum tpa_model id;
};
@@ -49,8 +47,7 @@ static int tpa6130a2_power(struct tpa6130a2_data *data, bool enable)
return ret;
}
/* Power on */
- if (data->power_gpio >= 0)
- gpio_set_value(data->power_gpio, 1);
+ gpiod_set_value(data->power_gpio, 1);
/* Sync registers */
regcache_cache_only(data->regmap, false);
@@ -59,8 +56,7 @@ static int tpa6130a2_power(struct tpa6130a2_data *data, bool enable)
dev_err(data->dev,
"Failed to sync registers: %d\n", ret);
regcache_cache_only(data->regmap, true);
- if (data->power_gpio >= 0)
- gpio_set_value(data->power_gpio, 0);
+ gpiod_set_value(data->power_gpio, 0);
ret2 = regulator_disable(data->supply);
if (ret2 != 0)
dev_err(data->dev,
@@ -76,8 +72,7 @@ static int tpa6130a2_power(struct tpa6130a2_data *data, bool enable)
regcache_cache_only(data->regmap, true);
/* Power off */
- if (data->power_gpio >= 0)
- gpio_set_value(data->power_gpio, 0);
+ gpiod_set_value(data->power_gpio, 0);
ret = regulator_disable(data->supply);
if (ret != 0) {
@@ -209,18 +204,10 @@ static const struct regmap_config tpa6130a2_regmap_config = {
.cache_type = REGCACHE_RBTREE,
};
-static const struct i2c_device_id tpa6130a2_id[] = {
- { "tpa6130a2", TPA6130A2 },
- { "tpa6140a2", TPA6140A2 },
- { }
-};
-MODULE_DEVICE_TABLE(i2c, tpa6130a2_id);
-
static int tpa6130a2_probe(struct i2c_client *client)
{
struct device *dev;
struct tpa6130a2_data *data;
- struct tpa6130a2_platform_data *pdata = client->dev.platform_data;
struct device_node *np = client->dev.of_node;
const char *regulator;
unsigned int version;
@@ -238,10 +225,13 @@ static int tpa6130a2_probe(struct i2c_client *client)
if (IS_ERR(data->regmap))
return PTR_ERR(data->regmap);
- if (pdata) {
- data->power_gpio = pdata->power_gpio;
- } else if (np) {
- data->power_gpio = of_get_named_gpio(np, "power-gpio", 0);
+ if (np) {
+ data->power_gpio = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
+ if (IS_ERR(data->power_gpio)) {
+ return dev_err_probe(dev, PTR_ERR(data->power_gpio),
+ "Failed to request power GPIO\n");
+ }
+ gpiod_set_consumer_name(data->power_gpio, "tpa6130a2 enable");
} else {
dev_err(dev, "Platform data not set\n");
dump_stack();
@@ -252,17 +242,6 @@ static int tpa6130a2_probe(struct i2c_client *client)
data->id = (uintptr_t)i2c_get_match_data(client);
- if (data->power_gpio >= 0) {
- ret = devm_gpio_request(dev, data->power_gpio,
- "tpa6130a2 enable");
- if (ret < 0) {
- dev_err(dev, "Failed to request power GPIO (%d)\n",
- data->power_gpio);
- return ret;
- }
- gpio_direction_output(data->power_gpio, 0);
- }
-
switch (data->id) {
default:
dev_warn(dev, "Unknown TPA model (%d). Assuming 6130A2\n",
@@ -318,7 +297,6 @@ static struct i2c_driver tpa6130a2_i2c_driver = {
.of_match_table = of_match_ptr(tpa6130a2_of_match),
},
.probe = tpa6130a2_probe,
- .id_table = tpa6130a2_id,
};
module_i2c_driver(tpa6130a2_i2c_driver);
diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 609886461805..92194579e15b 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -5,28 +5,25 @@
* Author: Steve Sakoman, <steve@sakoman.com>
*/
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/mfd/twl.h>
+#include <linux/mfd/twl4030-audio.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/delay.h>
+#include <linux/of.h>
#include <linux/pm.h>
-#include <linux/i2c.h>
#include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
-#include <linux/mfd/twl.h>
#include <linux/slab.h>
-#include <linux/gpio.h>
#include <sound/core.h>
+#include <sound/initval.h>
#include <sound/pcm.h>
#include <sound/pcm_params.h>
#include <sound/soc.h>
-#include <sound/initval.h>
#include <sound/tlv.h>
-/* Register descriptions are here */
-#include <linux/mfd/twl4030-audio.h>
-
/* TWL4030 PMBR1 Register */
#define TWL4030_PMBR1_REG 0x0D
/* TWL4030 PMBR1 Register GPIO6 mux bits */
@@ -39,7 +36,7 @@ struct twl4030_board_params {
unsigned int ramp_delay_value;
unsigned int offset_cncl_path;
unsigned int hs_extmute:1;
- int hs_extmute_gpio;
+ struct gpio_desc *hs_extmute_gpio;
};
/* codec private data */
@@ -213,8 +210,7 @@ twl4030_get_board_param_values(struct twl4030_board_params *board_params,
if (!of_property_read_u32(node, "ti,hs_extmute", &value))
board_params->hs_extmute = value;
- board_params->hs_extmute_gpio = of_get_named_gpio(node, "ti,hs_extmute_gpio", 0);
- if (gpio_is_valid(board_params->hs_extmute_gpio))
+ if (of_property_present(node, "ti,hs_extmute_gpio"))
board_params->hs_extmute = 1;
}
@@ -242,7 +238,7 @@ twl4030_get_board_params(struct snd_soc_component *component)
return board_params;
}
-static void twl4030_init_chip(struct snd_soc_component *component)
+static int twl4030_init_chip(struct snd_soc_component *component)
{
struct twl4030_board_params *board_params;
struct twl4030_priv *twl4030 = snd_soc_component_get_drvdata(component);
@@ -252,24 +248,20 @@ static void twl4030_init_chip(struct snd_soc_component *component)
board_params = twl4030_get_board_params(component);
if (board_params && board_params->hs_extmute) {
- if (gpio_is_valid(board_params->hs_extmute_gpio)) {
- int ret;
-
- if (!board_params->hs_extmute_gpio)
- dev_warn(component->dev,
- "Extmute GPIO is 0 is this correct?\n");
-
- ret = gpio_request_one(board_params->hs_extmute_gpio,
- GPIOF_OUT_INIT_LOW,
- "hs_extmute");
- if (ret) {
- dev_err(component->dev,
- "Failed to get hs_extmute GPIO\n");
- board_params->hs_extmute_gpio = -1;
- }
+ board_params->hs_extmute_gpio = devm_gpiod_get_optional(component->dev,
+ "ti,hs_extmute",
+ GPIOD_OUT_LOW);
+ if (IS_ERR(board_params->hs_extmute_gpio))
+ return dev_err_probe(component->dev, PTR_ERR(board_params->hs_extmute_gpio),
+ "Failed to get hs_extmute GPIO\n");
+
+ if (board_params->hs_extmute_gpio) {
+ gpiod_set_consumer_name(board_params->hs_extmute_gpio, "hs_extmute");
} else {
u8 pin_mux;
+ dev_info(component->dev, "use TWL4030 GPIO6\n");
+
/* Set TWL4030 GPIO6 as EXTMUTE signal */
twl_i2c_read_u8(TWL4030_MODULE_INTBR, &pin_mux,
TWL4030_PMBR1_REG);
@@ -297,7 +289,7 @@ static void twl4030_init_chip(struct snd_soc_component *component)
/* Machine dependent setup */
if (!board_params)
- return;
+ return 0;
twl4030->board_params = board_params;
@@ -332,6 +324,8 @@ static void twl4030_init_chip(struct snd_soc_component *component)
TWL4030_CNCL_OFFSET_START));
twl4030_codec_enable(component, 0);
+
+ return 0;
}
static void twl4030_apll_enable(struct snd_soc_component *component, int enable)
@@ -714,8 +708,8 @@ static void headset_ramp(struct snd_soc_component *component, int ramp)
/* Enable external mute control, this dramatically reduces
* the pop-noise */
if (board_params && board_params->hs_extmute) {
- if (gpio_is_valid(board_params->hs_extmute_gpio)) {
- gpio_set_value(board_params->hs_extmute_gpio, 1);
+ if (board_params->hs_extmute_gpio) {
+ gpiod_set_value(board_params->hs_extmute_gpio, 1);
} else {
hs_pop |= TWL4030_EXTMUTE;
twl4030_write(component, TWL4030_REG_HS_POPN_SET, hs_pop);
@@ -750,8 +744,8 @@ static void headset_ramp(struct snd_soc_component *component, int ramp)
/* Disable external mute */
if (board_params && board_params->hs_extmute) {
- if (gpio_is_valid(board_params->hs_extmute_gpio)) {
- gpio_set_value(board_params->hs_extmute_gpio, 0);
+ if (board_params->hs_extmute_gpio) {
+ gpiod_set_value(board_params->hs_extmute_gpio, 0);
} else {
hs_pop &= ~TWL4030_EXTMUTE;
twl4030_write(component, TWL4030_REG_HS_POPN_SET, hs_pop);
@@ -2168,24 +2162,11 @@ static int twl4030_soc_probe(struct snd_soc_component *component)
/* Set the defaults, and power up the codec */
twl4030->sysclk = twl4030_audio_get_mclk() / 1000;
- twl4030_init_chip(component);
-
- return 0;
-}
-
-static void twl4030_soc_remove(struct snd_soc_component *component)
-{
- struct twl4030_priv *twl4030 = snd_soc_component_get_drvdata(component);
- struct twl4030_board_params *board_params = twl4030->board_params;
-
- if (board_params && board_params->hs_extmute &&
- gpio_is_valid(board_params->hs_extmute_gpio))
- gpio_free(board_params->hs_extmute_gpio);
+ return twl4030_init_chip(component);
}
static const struct snd_soc_component_driver soc_component_dev_twl4030 = {
.probe = twl4030_soc_probe,
- .remove = twl4030_soc_remove,
.read = twl4030_read,
.write = twl4030_write,
.set_bias_level = twl4030_set_bias_level,
diff --git a/sound/soc/codecs/wcd-mbhc-v2.c b/sound/soc/codecs/wcd-mbhc-v2.c
index d589a212b768..4b7c3d6080a1 100644
--- a/sound/soc/codecs/wcd-mbhc-v2.c
+++ b/sound/soc/codecs/wcd-mbhc-v2.c
@@ -1260,7 +1260,7 @@ correct_plug_type:
if (pt_gnd_mic_swap_cnt == mbhc->swap_thr) {
/* US_EU gpio present, flip switch */
if (mbhc->cfg->swap_gnd_mic) {
- if (mbhc->cfg->swap_gnd_mic(component, true))
+ if (mbhc->cfg->swap_gnd_mic(component))
continue;
}
}
diff --git a/sound/soc/codecs/wcd-mbhc-v2.h b/sound/soc/codecs/wcd-mbhc-v2.h
index b977e8f87d7c..a5d52b9643f5 100644
--- a/sound/soc/codecs/wcd-mbhc-v2.h
+++ b/sound/soc/codecs/wcd-mbhc-v2.h
@@ -194,7 +194,7 @@ struct wcd_mbhc_config {
int num_btn;
bool mono_stero_detection;
bool typec_analog_mux;
- bool (*swap_gnd_mic)(struct snd_soc_component *component, bool active);
+ bool (*swap_gnd_mic)(struct snd_soc_component *component);
bool hs_ext_micbias;
bool gnd_det_en;
uint32_t linein_th;
diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c
index 7cef43bb2a88..8ee4360aff92 100644
--- a/sound/soc/codecs/wcd9335.c
+++ b/sound/soc/codecs/wcd9335.c
@@ -17,7 +17,7 @@
#include <sound/soc.h>
#include <sound/pcm_params.h>
#include <sound/soc-dapm.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/of.h>
#include <linux/of_irq.h>
#include <sound/tlv.h>
@@ -331,7 +331,7 @@ struct wcd9335_codec {
int comp_enabled[COMPANDER_MAX];
int intr1;
- int reset_gpio;
+ struct gpio_desc *reset_gpio;
struct regulator_bulk_data supplies[WCD9335_MAX_SUPPLY];
unsigned int rx_port_value[WCD9335_RX_MAX];
@@ -4975,12 +4975,11 @@ static const struct regmap_irq_chip wcd9335_regmap_irq1_chip = {
static int wcd9335_parse_dt(struct wcd9335_codec *wcd)
{
struct device *dev = wcd->dev;
- struct device_node *np = dev->of_node;
int ret;
- wcd->reset_gpio = of_get_named_gpio(np, "reset-gpios", 0);
- if (wcd->reset_gpio < 0)
- return dev_err_probe(dev, wcd->reset_gpio, "Reset GPIO missing from DT\n");
+ wcd->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(wcd->reset_gpio))
+ return dev_err_probe(dev, PTR_ERR(wcd->reset_gpio), "Reset GPIO missing from DT\n");
wcd->mclk = devm_clk_get(dev, "mclk");
if (IS_ERR(wcd->mclk))
@@ -5023,9 +5022,9 @@ static int wcd9335_power_on_reset(struct wcd9335_codec *wcd)
*/
usleep_range(600, 650);
- gpio_direction_output(wcd->reset_gpio, 0);
+ gpiod_set_value(wcd->reset_gpio, 1);
msleep(20);
- gpio_set_value(wcd->reset_gpio, 1);
+ gpiod_set_value(wcd->reset_gpio, 0);
msleep(20);
return 0;
diff --git a/sound/soc/codecs/wcd937x.c b/sound/soc/codecs/wcd937x.c
index dd2045a5d26d..3b1a1518e764 100644
--- a/sound/soc/codecs/wcd937x.c
+++ b/sound/soc/codecs/wcd937x.c
@@ -2656,7 +2656,7 @@ static void wcd937x_dt_parse_micbias_info(struct device *dev, struct wcd937x_pri
dev_warn(dev, "Micbias3 DT property not found\n");
}
-static bool wcd937x_swap_gnd_mic(struct snd_soc_component *component, bool active)
+static bool wcd937x_swap_gnd_mic(struct snd_soc_component *component)
{
int value;
struct wcd937x_priv *wcd937x;
diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c
index cabddadc90ef..e822cc145250 100644
--- a/sound/soc/codecs/wcd938x-sdw.c
+++ b/sound/soc/codecs/wcd938x-sdw.c
@@ -1225,7 +1225,7 @@ static int wcd9380_probe(struct sdw_slave *pdev,
if (!wcd)
return -ENOMEM;
- /**
+ /*
* Port map index starts with 0, however the data port for this codec
* are from index 1
*/
diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
index 1ae498c32391..d9b61eab029a 100644
--- a/sound/soc/codecs/wcd938x.c
+++ b/sound/soc/codecs/wcd938x.c
@@ -11,7 +11,6 @@
#include <linux/pm_runtime.h>
#include <linux/component.h>
#include <sound/tlv.h>
-#include <linux/of_gpio.h>
#include <linux/of.h>
#include <sound/jack.h>
#include <sound/pcm.h>
@@ -19,6 +18,7 @@
#include <linux/regmap.h>
#include <sound/soc.h>
#include <sound/soc-dapm.h>
+#include <linux/mux/consumer.h>
#include <linux/regulator/consumer.h>
#include "wcd-clsh-v2.h"
@@ -171,8 +171,10 @@ struct wcd938x_priv {
int flyback_cur_det_disable;
int ear_rx_path;
int variant;
- int reset_gpio;
+ struct gpio_desc *reset_gpio;
struct gpio_desc *us_euro_gpio;
+ struct mux_control *us_euro_mux;
+ unsigned int mux_state;
u32 micb1_mv;
u32 micb2_mv;
u32 micb3_mv;
@@ -183,6 +185,7 @@ struct wcd938x_priv {
bool comp1_enable;
bool comp2_enable;
bool ldoh;
+ bool mux_setup_done;
};
static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800);
@@ -3230,17 +3233,28 @@ static void wcd938x_dt_parse_micbias_info(struct device *dev, struct wcd938x_pri
dev_info(dev, "%s: Micbias4 DT property not found\n", __func__);
}
-static bool wcd938x_swap_gnd_mic(struct snd_soc_component *component, bool active)
+static bool wcd938x_swap_gnd_mic(struct snd_soc_component *component)
{
- int value;
-
- struct wcd938x_priv *wcd938x;
+ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+ struct device *dev = component->dev;
+ int ret;
- wcd938x = snd_soc_component_get_drvdata(component);
+ if (wcd938x->us_euro_mux) {
+ if (wcd938x->mux_setup_done)
+ mux_control_deselect(wcd938x->us_euro_mux);
- value = gpiod_get_value(wcd938x->us_euro_gpio);
+ ret = mux_control_try_select(wcd938x->us_euro_mux, !wcd938x->mux_state);
+ if (ret) {
+ dev_err(dev, "Error (%d) Unable to select us/euro mux state\n", ret);
+ wcd938x->mux_setup_done = false;
+ return false;
+ }
+ wcd938x->mux_setup_done = true;
+ } else {
+ gpiod_set_value(wcd938x->us_euro_gpio, !wcd938x->mux_state);
+ }
- gpiod_set_value(wcd938x->us_euro_gpio, !value);
+ wcd938x->mux_state = !wcd938x->mux_state;
return true;
}
@@ -3251,16 +3265,30 @@ static int wcd938x_populate_dt_data(struct wcd938x_priv *wcd938x, struct device
struct wcd_mbhc_config *cfg = &wcd938x->mbhc_cfg;
int ret;
- wcd938x->reset_gpio = of_get_named_gpio(dev->of_node, "reset-gpios", 0);
- if (wcd938x->reset_gpio < 0)
- return dev_err_probe(dev, wcd938x->reset_gpio,
+ wcd938x->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(wcd938x->reset_gpio))
+ return dev_err_probe(dev, PTR_ERR(wcd938x->reset_gpio),
"Failed to get reset gpio\n");
- wcd938x->us_euro_gpio = devm_gpiod_get_optional(dev, "us-euro",
- GPIOD_OUT_LOW);
- if (IS_ERR(wcd938x->us_euro_gpio))
- return dev_err_probe(dev, PTR_ERR(wcd938x->us_euro_gpio),
- "us-euro swap Control GPIO not found\n");
+ if (of_property_present(dev->of_node, "mux-controls")) {
+ wcd938x->us_euro_mux = devm_mux_control_get(dev, NULL);
+ if (IS_ERR(wcd938x->us_euro_mux)) {
+ ret = PTR_ERR(wcd938x->us_euro_mux);
+ return dev_err_probe(dev, ret, "failed to get mux control\n");
+ }
+
+ ret = mux_control_try_select(wcd938x->us_euro_mux, wcd938x->mux_state);
+ if (ret) {
+ dev_err(dev, "Error (%d) Unable to select us/euro mux state\n", ret);
+ return ret;
+ }
+ wcd938x->mux_setup_done = true;
+ } else {
+ wcd938x->us_euro_gpio = devm_gpiod_get_optional(dev, "us-euro", GPIOD_OUT_LOW);
+ if (IS_ERR(wcd938x->us_euro_gpio))
+ return dev_err_probe(dev, PTR_ERR(wcd938x->us_euro_gpio),
+ "us-euro swap Control GPIO not found\n");
+ }
cfg->swap_gnd_mic = wcd938x_swap_gnd_mic;
@@ -3297,10 +3325,10 @@ static int wcd938x_populate_dt_data(struct wcd938x_priv *wcd938x, struct device
static int wcd938x_reset(struct wcd938x_priv *wcd938x)
{
- gpio_direction_output(wcd938x->reset_gpio, 0);
+ gpiod_set_value(wcd938x->reset_gpio, 1);
/* 20us sleep required after pulling the reset gpio to LOW */
usleep_range(20, 30);
- gpio_set_value(wcd938x->reset_gpio, 1);
+ gpiod_set_value(wcd938x->reset_gpio, 0);
/* 20us sleep required after pulling the reset gpio to HIGH */
usleep_range(20, 30);
@@ -3576,6 +3604,9 @@ static void wcd938x_remove(struct platform_device *pdev)
pm_runtime_set_suspended(dev);
pm_runtime_dont_use_autosuspend(dev);
+ if (wcd938x->us_euro_mux && wcd938x->mux_setup_done)
+ mux_control_deselect(wcd938x->us_euro_mux);
+
regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies);
regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies);
}
diff --git a/sound/soc/codecs/wcd939x.c b/sound/soc/codecs/wcd939x.c
index 0a87a79772da..067d23c7ecf9 100644
--- a/sound/soc/codecs/wcd939x.c
+++ b/sound/soc/codecs/wcd939x.c
@@ -15,7 +15,6 @@
#include <linux/pm_runtime.h>
#include <linux/component.h>
#include <sound/tlv.h>
-#include <linux/of_gpio.h>
#include <linux/of_graph.h>
#include <linux/of.h>
#include <sound/jack.h>
@@ -201,7 +200,7 @@ struct wcd939x_priv {
u32 hph_mode;
u32 tx_mode[TX_ADC_MAX];
int variant;
- int reset_gpio;
+ struct gpio_desc *reset_gpio;
u32 micb1_mv;
u32 micb2_mv;
u32 micb3_mv;
@@ -3215,7 +3214,7 @@ static void wcd939x_dt_parse_micbias_info(struct device *dev, struct wcd939x_pri
}
#if IS_ENABLED(CONFIG_TYPEC)
-static bool wcd939x_swap_gnd_mic(struct snd_soc_component *component, bool active)
+static bool wcd939x_swap_gnd_mic(struct snd_soc_component *component)
{
struct wcd939x_priv *wcd939x = snd_soc_component_get_drvdata(component);
@@ -3239,10 +3238,11 @@ static int wcd939x_populate_dt_data(struct wcd939x_priv *wcd939x, struct device
#endif /* CONFIG_TYPEC */
int ret;
- wcd939x->reset_gpio = of_get_named_gpio(dev->of_node, "reset-gpios", 0);
- if (wcd939x->reset_gpio < 0)
- return dev_err_probe(dev, wcd939x->reset_gpio,
- "Failed to get reset gpio\n");
+ wcd939x->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(wcd939x->reset_gpio)) {
+ ret = PTR_ERR(wcd939x->reset_gpio);
+ return dev_err_probe(dev, ret, "Failed to get reset gpio\n");
+ }
wcd939x->supplies[0].supply = "vdd-rxtx";
wcd939x->supplies[1].supply = "vdd-io";
@@ -3290,10 +3290,10 @@ static int wcd939x_populate_dt_data(struct wcd939x_priv *wcd939x, struct device
static int wcd939x_reset(struct wcd939x_priv *wcd939x)
{
- gpio_direction_output(wcd939x->reset_gpio, 0);
+ gpiod_set_value(wcd939x->reset_gpio, 1);
/* 20us sleep required after pulling the reset gpio to LOW */
usleep_range(20, 30);
- gpio_set_value(wcd939x->reset_gpio, 1);
+ gpiod_set_value(wcd939x->reset_gpio, 0);
/* 20us sleep required after pulling the reset gpio to HIGH */
usleep_range(20, 30);
diff --git a/sound/soc/codecs/wm5100.c b/sound/soc/codecs/wm5100.c
index d9e5762324df..fb5ed4ba7f60 100644
--- a/sound/soc/codecs/wm5100.c
+++ b/sound/soc/codecs/wm5100.c
@@ -2236,12 +2236,14 @@ static irqreturn_t wm5100_edge_irq(int irq, void *data)
}
#ifdef CONFIG_GPIOLIB
-static void wm5100_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int wm5100_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct wm5100_priv *wm5100 = gpiochip_get_data(chip);
- regmap_update_bits(wm5100->regmap, WM5100_GPIO_CTRL_1 + offset,
- WM5100_GP1_LVL, !!value << WM5100_GP1_LVL_SHIFT);
+ return regmap_update_bits(wm5100->regmap, WM5100_GPIO_CTRL_1 + offset,
+ WM5100_GP1_LVL,
+ !!value << WM5100_GP1_LVL_SHIFT);
}
static int wm5100_gpio_direction_out(struct gpio_chip *chip,
@@ -2288,7 +2290,7 @@ static const struct gpio_chip wm5100_template_chip = {
.label = "wm5100",
.owner = THIS_MODULE,
.direction_output = wm5100_gpio_direction_out,
- .set = wm5100_gpio_set,
+ .set_rv = wm5100_gpio_set,
.direction_input = wm5100_gpio_direction_in,
.get = wm5100_gpio_get,
.can_sleep = 1,
diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index 03902909f27e..2ed9f493d507 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -1825,13 +1825,15 @@ static int wm8903_gpio_direction_out(struct gpio_chip *chip,
return 0;
}
-static void wm8903_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int wm8903_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct wm8903_priv *wm8903 = gpiochip_get_data(chip);
- regmap_update_bits(wm8903->regmap, WM8903_GPIO_CONTROL_1 + offset,
- WM8903_GP1_LVL_MASK,
- !!value << WM8903_GP1_LVL_SHIFT);
+ return regmap_update_bits(wm8903->regmap,
+ WM8903_GPIO_CONTROL_1 + offset,
+ WM8903_GP1_LVL_MASK,
+ !!value << WM8903_GP1_LVL_SHIFT);
}
static const struct gpio_chip wm8903_template_chip = {
@@ -1841,7 +1843,7 @@ static const struct gpio_chip wm8903_template_chip = {
.direction_input = wm8903_gpio_direction_in,
.get = wm8903_gpio_get,
.direction_output = wm8903_gpio_direction_out,
- .set = wm8903_gpio_set,
+ .set_rv = wm8903_gpio_set,
.can_sleep = 1,
};
diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 68f746626c33..d69aa8b15629 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -3407,13 +3407,16 @@ static int wm8962_gpio_request(struct gpio_chip *chip, unsigned offset)
return 0;
}
-static void wm8962_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int wm8962_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct wm8962_priv *wm8962 = gpiochip_get_data(chip);
struct snd_soc_component *component = wm8962->component;
- snd_soc_component_update_bits(component, WM8962_GPIO_BASE + offset,
- WM8962_GP2_LVL, !!value << WM8962_GP2_LVL_SHIFT);
+ return snd_soc_component_update_bits(component,
+ WM8962_GPIO_BASE + offset,
+ WM8962_GP2_LVL,
+ !!value << WM8962_GP2_LVL_SHIFT);
}
static int wm8962_gpio_direction_out(struct gpio_chip *chip,
@@ -3439,7 +3442,7 @@ static const struct gpio_chip wm8962_template_chip = {
.owner = THIS_MODULE,
.request = wm8962_gpio_request,
.direction_output = wm8962_gpio_direction_out,
- .set = wm8962_gpio_set,
+ .set_rv = wm8962_gpio_set,
.can_sleep = 1,
};
diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c
index c2af8d7ecdd8..e364d0da9044 100644
--- a/sound/soc/codecs/wm8996.c
+++ b/sound/soc/codecs/wm8996.c
@@ -2136,12 +2136,14 @@ static int wm8996_set_fll(struct snd_soc_component *component, int fll_id, int s
}
#ifdef CONFIG_GPIOLIB
-static void wm8996_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static int wm8996_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct wm8996_priv *wm8996 = gpiochip_get_data(chip);
- regmap_update_bits(wm8996->regmap, WM8996_GPIO_1 + offset,
- WM8996_GP1_LVL, !!value << WM8996_GP1_LVL_SHIFT);
+ return regmap_update_bits(wm8996->regmap, WM8996_GPIO_1 + offset,
+ WM8996_GP1_LVL,
+ !!value << WM8996_GP1_LVL_SHIFT);
}
static int wm8996_gpio_direction_out(struct gpio_chip *chip,
@@ -2184,7 +2186,7 @@ static const struct gpio_chip wm8996_template_chip = {
.label = "wm8996",
.owner = THIS_MODULE,
.direction_output = wm8996_gpio_direction_out,
- .set = wm8996_gpio_set,
+ .set_rv = wm8996_gpio_set,
.direction_input = wm8996_gpio_direction_in,
.get = wm8996_gpio_get,
.can_sleep = 1,
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 91c8697c29c3..3c580faab3b7 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -8,6 +8,7 @@
*/
#include <linux/array_size.h>
+#include <linux/cleanup.h>
#include <linux/ctype.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -19,7 +20,7 @@
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/slab.h>
-#include <linux/vmalloc.h>
+#include <linux/string.h>
#include <linux/workqueue.h>
#include <linux/debugfs.h>
#include <sound/core.h>
@@ -415,21 +416,12 @@ static int wm_coeff_tlv_put(struct snd_kcontrol *kctl,
(struct soc_bytes_ext *)kctl->private_value;
struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext);
struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl;
- void *scratch;
- int ret = 0;
+ void *scratch __free(kvfree) = vmemdup_user(bytes, size);
- scratch = vmalloc(size);
- if (!scratch)
- return -ENOMEM;
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
- if (copy_from_user(scratch, bytes, size))
- ret = -EFAULT;
- else
- ret = cs_dsp_coeff_lock_and_write_ctrl(cs_ctl, 0, scratch, size);
-
- vfree(scratch);
-
- return ret;
+ return cs_dsp_coeff_lock_and_write_ctrl(cs_ctl, 0, scratch, size);
}
static int wm_coeff_put_acked(struct snd_kcontrol *kctl,
@@ -718,12 +710,10 @@ static void wm_adsp_release_firmware_files(struct wm_adsp *dsp,
const struct firmware *coeff_firmware,
char *coeff_filename)
{
- if (wmfw_firmware)
- release_firmware(wmfw_firmware);
+ release_firmware(wmfw_firmware);
kfree(wmfw_filename);
- if (coeff_firmware)
- release_firmware(coeff_firmware);
+ release_firmware(coeff_firmware);
kfree(coeff_filename);
}
@@ -785,7 +775,7 @@ static int wm_adsp_request_firmware_file(struct wm_adsp *dsp,
return ret;
}
-static const char *cirrus_dir = "cirrus/";
+static const char * const cirrus_dir = "cirrus/";
static int wm_adsp_request_firmware_files(struct wm_adsp *dsp,
const struct firmware **wmfw_firmware,
char **wmfw_filename,
diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c
index 1c9df7c061bd..f04d99c66f33 100644
--- a/sound/soc/codecs/wsa883x.c
+++ b/sound/soc/codecs/wsa883x.c
@@ -1584,7 +1584,7 @@ static int wsa883x_probe(struct sdw_slave *pdev,
wsa883x->sconfig.type = SDW_STREAM_PDM;
mutex_init(&wsa883x->sp_lock);
- /**
+ /*
* Port map index starts with 0, however the data port for this codec
* are from index 1
*/
diff --git a/sound/soc/codecs/wsa884x.c b/sound/soc/codecs/wsa884x.c
index daada1a2a34c..fd6ebc25fe89 100644
--- a/sound/soc/codecs/wsa884x.c
+++ b/sound/soc/codecs/wsa884x.c
@@ -2085,7 +2085,7 @@ static int wsa884x_probe(struct sdw_slave *pdev,
wsa884x->sconfig.direction = SDW_DATA_DIR_RX;
wsa884x->sconfig.type = SDW_STREAM_PDM;
- /**
+ /*
* Port map index starts with 0, however the data port for this codec
* are from index 1
*/
diff --git a/sound/soc/codecs/zl38060.c b/sound/soc/codecs/zl38060.c
index 28c92d90299e..180d45a349ac 100644
--- a/sound/soc/codecs/zl38060.c
+++ b/sound/soc/codecs/zl38060.c
@@ -387,12 +387,12 @@ static const struct snd_soc_component_driver zl38_component_dev = {
.endianness = 1,
};
-static void chip_gpio_set(struct gpio_chip *c, unsigned int offset, int val)
+static int chip_gpio_set(struct gpio_chip *c, unsigned int offset, int val)
{
struct regmap *regmap = gpiochip_get_data(c);
unsigned int mask = BIT(offset);
- regmap_update_bits(regmap, REG_GPIO_DAT, mask, val ? mask : 0);
+ return regmap_update_bits(regmap, REG_GPIO_DAT, mask, val ? mask : 0);
}
static int chip_gpio_get(struct gpio_chip *c, unsigned int offset)
@@ -422,8 +422,12 @@ chip_direction_output(struct gpio_chip *c, unsigned int offset, int val)
{
struct regmap *regmap = gpiochip_get_data(c);
unsigned int mask = BIT(offset);
+ int ret;
+
+ ret = chip_gpio_set(c, offset, val);
+ if (ret)
+ return ret;
- chip_gpio_set(c, offset, val);
return regmap_update_bits(regmap, REG_GPIO_DIR, mask, mask);
}
@@ -436,7 +440,7 @@ static const struct gpio_chip template_chip = {
.direction_input = chip_direction_input,
.direction_output = chip_direction_output,
.get = chip_gpio_get,
- .set = chip_gpio_set,
+ .set_rv = chip_gpio_set,
.can_sleep = true,
};
diff --git a/sound/soc/fsl/fsl_rpmsg.c b/sound/soc/fsl/fsl_rpmsg.c
index 0a551be3053b..5708b3a9878d 100644
--- a/sound/soc/fsl/fsl_rpmsg.c
+++ b/sound/soc/fsl/fsl_rpmsg.c
@@ -24,6 +24,8 @@
/* 192kHz/32bit/2ch/60s size is 0x574e00 */
#define LPA_LARGE_BUFFER_SIZE (0x6000000)
+/* 16kHz/32bit/8ch/1s size is 0x7D000 */
+#define LPA_CAPTURE_BUFFER_SIZE (0x100000)
static const unsigned int fsl_rpmsg_rates[] = {
8000, 11025, 16000, 22050, 44100,
@@ -97,13 +99,9 @@ static int fsl_rpmsg_hw_free(struct snd_pcm_substream *substream,
static int fsl_rpmsg_startup(struct snd_pcm_substream *substream,
struct snd_soc_dai *cpu_dai)
{
- int ret;
-
- ret = snd_pcm_hw_constraint_list(substream->runtime, 0,
- SNDRV_PCM_HW_PARAM_RATE,
- &fsl_rpmsg_rate_constraints);
-
- return ret;
+ return snd_pcm_hw_constraint_list(substream->runtime, 0,
+ SNDRV_PCM_HW_PARAM_RATE,
+ &fsl_rpmsg_rate_constraints);
}
static const struct snd_soc_dai_ops fsl_rpmsg_dai_ops = {
@@ -233,11 +231,23 @@ static int fsl_rpmsg_probe(struct platform_device *pdev)
}
dai_drv->name = dai_name;
+ /* Setup cpu dai for sound card that sits on rpmsg-micfil-channel */
+ if (!strcmp(dai_name, "rpmsg-micfil-channel")) {
+ dai_drv->capture.channels_min = 1;
+ dai_drv->capture.channels_max = 8;
+ dai_drv->capture.rates = SNDRV_PCM_RATE_8000_48000;
+ dai_drv->capture.formats = SNDRV_PCM_FMTBIT_S32_LE;
+ if (of_device_is_compatible(np, "fsl,imx8mm-rpmsg-audio"))
+ dai_drv->capture.formats = SNDRV_PCM_FMTBIT_S16_LE;
+ }
+
if (of_property_read_bool(np, "fsl,enable-lpa")) {
rpmsg->enable_lpa = 1;
- rpmsg->buffer_size = LPA_LARGE_BUFFER_SIZE;
+ rpmsg->buffer_size[SNDRV_PCM_STREAM_PLAYBACK] = LPA_LARGE_BUFFER_SIZE;
+ rpmsg->buffer_size[SNDRV_PCM_STREAM_CAPTURE] = LPA_CAPTURE_BUFFER_SIZE;
} else {
- rpmsg->buffer_size = IMX_DEFAULT_DMABUF_SIZE;
+ rpmsg->buffer_size[SNDRV_PCM_STREAM_PLAYBACK] = IMX_DEFAULT_DMABUF_SIZE;
+ rpmsg->buffer_size[SNDRV_PCM_STREAM_CAPTURE] = IMX_DEFAULT_DMABUF_SIZE;
}
/* Get the optional clocks */
diff --git a/sound/soc/fsl/fsl_rpmsg.h b/sound/soc/fsl/fsl_rpmsg.h
index b04086fbf828..1b1683808507 100644
--- a/sound/soc/fsl/fsl_rpmsg.h
+++ b/sound/soc/fsl/fsl_rpmsg.h
@@ -42,6 +42,6 @@ struct fsl_rpmsg {
unsigned int mclk_streams;
int force_lpa;
int enable_lpa;
- int buffer_size;
+ int buffer_size[2];
};
#endif /* __FSL_RPMSG_H */
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index ed2b4780c470..af1a168d35e3 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -163,14 +163,46 @@ out:
return iret;
}
-static int fsl_sai_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask,
- u32 rx_mask, int slots, int slot_width)
+static int fsl_sai_set_dai_tdm_slot_tx(struct snd_soc_dai *cpu_dai, u32 tx_mask,
+ u32 rx_mask, int slots, int slot_width)
{
struct fsl_sai *sai = snd_soc_dai_get_drvdata(cpu_dai);
+ bool tx = true;
+
+ sai->slots[tx] = slots;
+ sai->slot_width[tx] = slot_width;
+
+ return 0;
+}
+
+static int fsl_sai_set_dai_tdm_slot_rx(struct snd_soc_dai *cpu_dai, u32 tx_mask,
+ u32 rx_mask, int slots, int slot_width)
+{
+ struct fsl_sai *sai = snd_soc_dai_get_drvdata(cpu_dai);
+ bool tx = false;
+
+ sai->slots[tx] = slots;
+ sai->slot_width[tx] = slot_width;
+
+ return 0;
+}
+
+static int fsl_sai_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask,
+ u32 rx_mask, int slots, int slot_width)
+{
+ int ret;
+
+ ret = fsl_sai_set_dai_tdm_slot_tx(cpu_dai, tx_mask, rx_mask, slots, slot_width);
+ if (ret)
+ return ret;
- sai->slots = slots;
- sai->slot_width = slot_width;
+ return fsl_sai_set_dai_tdm_slot_rx(cpu_dai, tx_mask, rx_mask, slots, slot_width);
+}
+static int fsl_sai_xlate_tdm_slot_mask(unsigned int slots,
+ unsigned int *tx_mask, unsigned int *rx_mask)
+{
+ /* Leave it empty, don't change the value of tx_mask and rx_mask */
return 0;
}
@@ -238,22 +270,22 @@ static int fsl_sai_set_dai_sysclk(struct snd_soc_dai *cpu_dai,
if (dir == SND_SOC_CLOCK_IN)
return 0;
- if (freq > 0 && clk_id != FSL_SAI_CLK_BUS) {
- if (clk_id < 0 || clk_id >= FSL_SAI_MCLK_MAX) {
- dev_err(cpu_dai->dev, "Unknown clock id: %d\n", clk_id);
- return -EINVAL;
- }
+ if (clk_id < 0 || clk_id >= FSL_SAI_MCLK_MAX) {
+ dev_err(cpu_dai->dev, "Unknown clock id: %d\n", clk_id);
+ return -EINVAL;
+ }
- if (IS_ERR_OR_NULL(sai->mclk_clk[clk_id])) {
- dev_err(cpu_dai->dev, "Unassigned clock: %d\n", clk_id);
- return -EINVAL;
- }
+ if (IS_ERR_OR_NULL(sai->mclk_clk[clk_id])) {
+ dev_err(cpu_dai->dev, "Unassigned clock: %d\n", clk_id);
+ return -EINVAL;
+ }
- if (sai->mclk_streams == 0) {
- ret = fsl_sai_set_mclk_rate(cpu_dai, clk_id, freq);
- if (ret < 0)
- return ret;
- }
+ if (sai->mclk_streams == 0 && freq > 0) {
+ ret = fsl_sai_set_mclk_rate(cpu_dai,
+ clk_id ? clk_id : FSL_SAI_CLK_MAST1,
+ freq);
+ if (ret < 0)
+ return ret;
}
ret = fsl_sai_set_dai_sysclk_tr(cpu_dai, clk_id, freq, true);
@@ -280,7 +312,7 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai,
val_cr4 |= FSL_SAI_CR4_MF;
sai->is_pdm_mode = false;
- sai->is_dsp_mode = false;
+ sai->is_dsp_mode[tx] = false;
/* DAI mode */
switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
case SND_SOC_DAIFMT_I2S:
@@ -309,7 +341,7 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai,
*/
val_cr2 |= FSL_SAI_CR2_BCP;
val_cr4 |= FSL_SAI_CR4_FSE;
- sai->is_dsp_mode = true;
+ sai->is_dsp_mode[tx] = true;
break;
case SND_SOC_DAIFMT_DSP_B:
/*
@@ -317,7 +349,7 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai,
* frame sync asserts with the first bit of the frame.
*/
val_cr2 |= FSL_SAI_CR2_BCP;
- sai->is_dsp_mode = true;
+ sai->is_dsp_mode[tx] = true;
break;
case SND_SOC_DAIFMT_PDM:
val_cr2 |= FSL_SAI_CR2_BCP;
@@ -541,11 +573,11 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream,
u32 watermark;
int ret, i;
- if (sai->slot_width)
- slot_width = sai->slot_width;
+ if (sai->slot_width[tx])
+ slot_width = sai->slot_width[tx];
- if (sai->slots)
- slots = sai->slots;
+ if (sai->slots[tx])
+ slots = sai->slots[tx];
else if (sai->bclk_ratio)
slots = sai->bclk_ratio / slot_width;
@@ -600,7 +632,7 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream,
}
}
- if (!sai->is_dsp_mode && !sai->is_pdm_mode)
+ if (!sai->is_dsp_mode[tx] && !sai->is_pdm_mode)
val_cr4 |= FSL_SAI_CR4_SYWD(slot_width);
val_cr5 |= FSL_SAI_CR5_WNW(slot_width);
@@ -932,7 +964,8 @@ static const struct snd_soc_dai_ops fsl_sai_pcm_dai_tx_ops = {
.set_bclk_ratio = fsl_sai_set_dai_bclk_ratio,
.set_sysclk = fsl_sai_set_dai_sysclk,
.set_fmt = fsl_sai_set_dai_fmt_tx,
- .set_tdm_slot = fsl_sai_set_dai_tdm_slot,
+ .set_tdm_slot = fsl_sai_set_dai_tdm_slot_tx,
+ .xlate_tdm_slot_mask = fsl_sai_xlate_tdm_slot_mask,
.hw_params = fsl_sai_hw_params,
.hw_free = fsl_sai_hw_free,
.trigger = fsl_sai_trigger,
@@ -944,7 +977,8 @@ static const struct snd_soc_dai_ops fsl_sai_pcm_dai_rx_ops = {
.set_bclk_ratio = fsl_sai_set_dai_bclk_ratio,
.set_sysclk = fsl_sai_set_dai_sysclk,
.set_fmt = fsl_sai_set_dai_fmt_rx,
- .set_tdm_slot = fsl_sai_set_dai_tdm_slot,
+ .set_tdm_slot = fsl_sai_set_dai_tdm_slot_rx,
+ .xlate_tdm_slot_mask = fsl_sai_xlate_tdm_slot_mask,
.hw_params = fsl_sai_hw_params,
.hw_free = fsl_sai_hw_free,
.trigger = fsl_sai_trigger,
diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h
index 0e25e2fc7ce0..6c917f79c6b0 100644
--- a/sound/soc/fsl/fsl_sai.h
+++ b/sound/soc/fsl/fsl_sai.h
@@ -286,7 +286,7 @@ struct fsl_sai {
bool is_consumer_mode[2];
bool is_lsb_first;
- bool is_dsp_mode;
+ bool is_dsp_mode[2];
bool is_pdm_mode;
bool is_multi_fifo_dma;
bool synchronous[2];
@@ -296,8 +296,8 @@ struct fsl_sai {
unsigned int mclk_id[2];
unsigned int mclk_streams;
- unsigned int slots;
- unsigned int slot_width;
+ unsigned int slots[2];
+ unsigned int slot_width[2];
unsigned int bclk_ratio;
const struct fsl_sai_soc_data *soc_data;
diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c
index 83aea341c1b6..e3111dd80be4 100644
--- a/sound/soc/fsl/fsl_xcvr.c
+++ b/sound/soc/fsl/fsl_xcvr.c
@@ -1827,7 +1827,7 @@ static const struct dev_pm_ops fsl_xcvr_pm_ops = {
static struct platform_driver fsl_xcvr_driver = {
.probe = fsl_xcvr_probe,
.driver = {
- .name = "fsl,imx8mp-audio-xcvr",
+ .name = "fsl-xcvr",
.pm = pm_ptr(&fsl_xcvr_pm_ops),
.of_match_table = fsl_xcvr_dt_ids,
},
diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c
index 45e000f61ecc..9e668ae68039 100644
--- a/sound/soc/fsl/imx-card.c
+++ b/sound/soc/fsl/imx-card.c
@@ -670,9 +670,12 @@ static int imx_card_parse_of(struct imx_card_data *data)
}
} else if (!strncmp(link->name, "HiFi-ASRC-BE", 12)) {
/* DPCM backend */
+ /*
+ * No need to have link->platforms. alloced dlc[1] will be just wasted,
+ * but it won't leak.
+ */
link->no_pcm = 1;
- link->platforms->of_node = NULL;
- link->platforms->name = "snd-soc-dummy";
+ link->platforms = NULL;
link->be_hw_params_fixup = be_hw_params_fixup;
link->ops = &imx_aif_ops_be;
diff --git a/sound/soc/fsl/imx-pcm-rpmsg.c b/sound/soc/fsl/imx-pcm-rpmsg.c
index de5f87600fac..8ed62d43ffd5 100644
--- a/sound/soc/fsl/imx-pcm-rpmsg.c
+++ b/sound/soc/fsl/imx-pcm-rpmsg.c
@@ -261,7 +261,7 @@ static int imx_rpmsg_pcm_open(struct snd_soc_component *component,
info->send_message(msg, info);
pcm_hardware = imx_rpmsg_pcm_hardware;
- pcm_hardware.buffer_bytes_max = rpmsg->buffer_size;
+ pcm_hardware.buffer_bytes_max = rpmsg->buffer_size[substream->stream];
pcm_hardware.period_bytes_max = pcm_hardware.buffer_bytes_max / 2;
snd_soc_set_runtime_hwparams(substream, &pcm_hardware);
@@ -597,14 +597,29 @@ static int imx_rpmsg_pcm_new(struct snd_soc_component *component,
struct snd_pcm *pcm = rtd->pcm;
struct snd_soc_dai *cpu_dai = snd_soc_rtd_to_cpu(rtd, 0);
struct fsl_rpmsg *rpmsg = dev_get_drvdata(cpu_dai->dev);
+ struct snd_pcm_substream *substream;
int ret;
ret = dma_coerce_mask_and_coherent(card->dev, DMA_BIT_MASK(32));
if (ret)
return ret;
- return snd_pcm_set_fixed_buffer_all(pcm, SNDRV_DMA_TYPE_DEV_WC,
- pcm->card->dev, rpmsg->buffer_size);
+ substream = pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream;
+ if (substream) {
+ ret = snd_pcm_set_fixed_buffer(substream, SNDRV_DMA_TYPE_DEV_WC, pcm->card->dev,
+ rpmsg->buffer_size[SNDRV_PCM_STREAM_PLAYBACK]);
+ if (ret < 0)
+ return ret;
+ }
+ substream = pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream;
+ if (substream) {
+ ret = snd_pcm_set_fixed_buffer(substream, SNDRV_DMA_TYPE_DEV_WC, pcm->card->dev,
+ rpmsg->buffer_size[SNDRV_PCM_STREAM_CAPTURE]);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
}
static const struct snd_soc_component_driver imx_rpmsg_soc_component = {
diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c
index 3ae2a212a2e3..355f7ec8943c 100644
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -1119,12 +1119,16 @@ int graph_util_parse_dai(struct simple_util_priv *priv, struct device_node *ep,
args.np = ep;
dai = snd_soc_get_dai_via_args(&args);
if (dai) {
+ const char *dai_name = snd_soc_dai_name_get(dai);
+ const struct of_phandle_args *dai_args = snd_soc_copy_dai_args(dev, &args);
+
ret = -ENOMEM;
+ if (!dai_args)
+ goto err;
+
dlc->of_node = node;
- dlc->dai_name = snd_soc_dai_name_get(dai);
- dlc->dai_args = snd_soc_copy_dai_args(dev, &args);
- if (!dlc->dai_args)
- goto end;
+ dlc->dai_name = dai_name;
+ dlc->dai_args = dai_args;
goto parse_dai_end;
}
@@ -1154,16 +1158,17 @@ int graph_util_parse_dai(struct simple_util_priv *priv, struct device_node *ep,
* if he unbinded CPU or Codec.
*/
ret = snd_soc_get_dlc(&args, dlc);
- if (ret < 0) {
- of_node_put(node);
- goto end;
- }
+ if (ret < 0)
+ goto err;
parse_dai_end:
if (is_single_link)
*is_single_link = of_graph_get_endpoint_count(node) == 1;
ret = 0;
-end:
+err:
+ if (ret < 0)
+ of_node_put(node);
+
return simple_ret(priv, ret);
}
EXPORT_SYMBOL_GPL(graph_util_parse_dai);
diff --git a/sound/soc/generic/test-component.c b/sound/soc/generic/test-component.c
index 5430d25deaef..89b995987e2d 100644
--- a/sound/soc/generic/test-component.c
+++ b/sound/soc/generic/test-component.c
@@ -140,6 +140,15 @@ static int test_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
return 0;
}
+static int test_dai_set_tdm_slot(struct snd_soc_dai *dai,
+ unsigned int tx_mask, unsigned int rx_mask,
+ int slots, int slot_width)
+{
+ dev_info(dai->dev, "set tdm slot: tx_mask=0x%08X, rx_mask=0x%08X, slots=%d, slot_width=%d\n",
+ tx_mask, rx_mask, slots, slot_width);
+ return 0;
+}
+
static int test_dai_mute_stream(struct snd_soc_dai *dai, int mute, int stream)
{
mile_stone(dai);
@@ -203,6 +212,7 @@ static const u64 test_dai_formats =
static const struct snd_soc_dai_ops test_ops = {
.set_fmt = test_dai_set_fmt,
+ .set_tdm_slot = test_dai_set_tdm_slot,
.startup = test_dai_startup,
.shutdown = test_dai_shutdown,
.auto_selectable_formats = &test_dai_formats,
@@ -214,6 +224,7 @@ static const struct snd_soc_dai_ops test_verbose_ops = {
.set_pll = test_dai_set_pll,
.set_clkdiv = test_dai_set_clkdiv,
.set_fmt = test_dai_set_fmt,
+ .set_tdm_slot = test_dai_set_tdm_slot,
.mute_stream = test_dai_mute_stream,
.startup = test_dai_startup,
.shutdown = test_dai_shutdown,
diff --git a/sound/soc/intel/atom/sst/sst.h b/sound/soc/intel/atom/sst/sst.h
index 126903e126e4..c43946c5ecee 100644
--- a/sound/soc/intel/atom/sst/sst.h
+++ b/sound/soc/intel/atom/sst/sst.h
@@ -443,9 +443,6 @@ int sst_set_stream_param(int str_id, struct snd_sst_params *str_param);
int sst_set_metadata(int str_id, char *params);
int sst_get_stream(struct intel_sst_drv *ctx,
struct snd_sst_params *str_param);
-int sst_get_stream_allocated(struct intel_sst_drv *ctx,
- struct snd_sst_params *str_param,
- struct snd_sst_lib_download **lib_dnld);
int sst_drain_stream(struct intel_sst_drv *sst_drv_ctx,
int str_id, bool partial_drain);
int sst_post_message_mrfld(struct intel_sst_drv *sst_drv_ctx,
@@ -461,8 +458,6 @@ void sst_post_download_mrfld(struct intel_sst_drv *ctx);
int sst_get_block_stream(struct intel_sst_drv *sst_drv_ctx);
void sst_memcpy_free_resources(struct intel_sst_drv *sst_drv_ctx);
-int sst_wait_interruptible(struct intel_sst_drv *sst_drv_ctx,
- struct sst_block *block);
int sst_wait_timeout(struct intel_sst_drv *sst_drv_ctx,
struct sst_block *block);
int sst_create_ipc_msg(struct ipc_post **arg, bool large);
@@ -470,7 +465,6 @@ int free_stream_context(struct intel_sst_drv *ctx, unsigned int str_id);
void sst_clean_stream(struct stream_info *stream);
int intel_sst_register_compress(struct intel_sst_drv *sst);
int intel_sst_remove_compress(struct intel_sst_drv *sst);
-void sst_cdev_fragment_elapsed(struct intel_sst_drv *ctx, int str_id);
int sst_send_sync_msg(int ipc, int str_id);
int sst_get_num_channel(struct snd_sst_params *str_param);
int sst_get_sfreq(struct snd_sst_params *str_param);
diff --git a/sound/soc/intel/atom/sst/sst_drv_interface.c b/sound/soc/intel/atom/sst/sst_drv_interface.c
index dc31c2c8f54c..8bb27f86eb65 100644
--- a/sound/soc/intel/atom/sst/sst_drv_interface.c
+++ b/sound/soc/intel/atom/sst/sst_drv_interface.c
@@ -55,19 +55,6 @@ int free_stream_context(struct intel_sst_drv *ctx, unsigned int str_id)
return ret;
}
-int sst_get_stream_allocated(struct intel_sst_drv *ctx,
- struct snd_sst_params *str_param,
- struct snd_sst_lib_download **lib_dnld)
-{
- int retval;
-
- retval = ctx->ops->alloc_stream(ctx, str_param);
- if (retval > 0)
- dev_dbg(ctx->dev, "Stream allocated %d\n", retval);
- return retval;
-
-}
-
/*
* sst_get_sfreq - this function returns the frequency of the stream
*
@@ -430,17 +417,6 @@ static int sst_cdev_codec_caps(struct snd_compr_codec_caps *codec)
return 0;
}
-void sst_cdev_fragment_elapsed(struct intel_sst_drv *ctx, int str_id)
-{
- struct stream_info *stream;
-
- dev_dbg(ctx->dev, "fragment elapsed from firmware for str_id %d\n",
- str_id);
- stream = &ctx->streams[str_id];
- if (stream->compr_cb)
- stream->compr_cb(stream->compr_cb_param);
-}
-
/*
* sst_close_pcm_stream - Close PCM interface
*
diff --git a/sound/soc/intel/atom/sst/sst_pci.c b/sound/soc/intel/atom/sst/sst_pci.c
index d1e64c3500be..22ae2d22f121 100644
--- a/sound/soc/intel/atom/sst/sst_pci.c
+++ b/sound/soc/intel/atom/sst/sst_pci.c
@@ -26,7 +26,7 @@ static int sst_platform_get_resources(struct intel_sst_drv *ctx)
int ddr_base, ret = 0;
struct pci_dev *pci = ctx->pci;
- ret = pci_request_regions(pci, SST_DRV_NAME);
+ ret = pcim_request_all_regions(pci, SST_DRV_NAME);
if (ret)
return ret;
@@ -38,67 +38,57 @@ static int sst_platform_get_resources(struct intel_sst_drv *ctx)
ddr_base = relocate_imr_addr_mrfld(ctx->ddr_base);
if (!ctx->pdata->lib_info) {
dev_err(ctx->dev, "lib_info pointer NULL\n");
- ret = -EINVAL;
- goto do_release_regions;
+ return -EINVAL;
}
if (ddr_base != ctx->pdata->lib_info->mod_base) {
dev_err(ctx->dev,
"FW LSP DDR BASE does not match with IFWI\n");
- ret = -EINVAL;
- goto do_release_regions;
+ return -EINVAL;
}
ctx->ddr_end = pci_resource_end(pci, 0);
- ctx->ddr = pcim_iomap(pci, 0,
- pci_resource_len(pci, 0));
- if (!ctx->ddr) {
- ret = -EINVAL;
- goto do_release_regions;
- }
+ ctx->ddr = pcim_iomap(pci, 0, 0);
+ if (!ctx->ddr)
+ return -ENOMEM;
+
dev_dbg(ctx->dev, "sst: DDR Ptr %p\n", ctx->ddr);
} else {
ctx->ddr = NULL;
}
/* SHIM */
ctx->shim_phy_add = pci_resource_start(pci, 1);
- ctx->shim = pcim_iomap(pci, 1, pci_resource_len(pci, 1));
- if (!ctx->shim) {
- ret = -EINVAL;
- goto do_release_regions;
- }
+ ctx->shim = pcim_iomap(pci, 1, 0);
+ if (!ctx->shim)
+ return -ENOMEM;
+
dev_dbg(ctx->dev, "SST Shim Ptr %p\n", ctx->shim);
/* Shared SRAM */
ctx->mailbox_add = pci_resource_start(pci, 2);
- ctx->mailbox = pcim_iomap(pci, 2, pci_resource_len(pci, 2));
- if (!ctx->mailbox) {
- ret = -EINVAL;
- goto do_release_regions;
- }
+ ctx->mailbox = pcim_iomap(pci, 2, 0);
+ if (!ctx->mailbox)
+ return -ENOMEM;
+
dev_dbg(ctx->dev, "SRAM Ptr %p\n", ctx->mailbox);
/* IRAM */
ctx->iram_end = pci_resource_end(pci, 3);
ctx->iram_base = pci_resource_start(pci, 3);
- ctx->iram = pcim_iomap(pci, 3, pci_resource_len(pci, 3));
- if (!ctx->iram) {
- ret = -EINVAL;
- goto do_release_regions;
- }
+ ctx->iram = pcim_iomap(pci, 3, 0);
+ if (!ctx->iram)
+ return -ENOMEM;
+
dev_dbg(ctx->dev, "IRAM Ptr %p\n", ctx->iram);
/* DRAM */
ctx->dram_end = pci_resource_end(pci, 4);
ctx->dram_base = pci_resource_start(pci, 4);
- ctx->dram = pcim_iomap(pci, 4, pci_resource_len(pci, 4));
- if (!ctx->dram) {
- ret = -EINVAL;
- goto do_release_regions;
- }
+ ctx->dram = pcim_iomap(pci, 4, 0);
+ if (!ctx->dram)
+ return -ENOMEM;
+
dev_dbg(ctx->dev, "DRAM Ptr %p\n", ctx->dram);
-do_release_regions:
- pci_release_regions(pci);
- return ret;
+ return 0;
}
/*
@@ -167,7 +157,6 @@ static void intel_sst_remove(struct pci_dev *pci)
sst_context_cleanup(sst_drv_ctx);
pci_dev_put(sst_drv_ctx->pci);
- pci_release_regions(pci);
pci_set_drvdata(pci, NULL);
}
diff --git a/sound/soc/intel/atom/sst/sst_pvt.c b/sound/soc/intel/atom/sst/sst_pvt.c
index e6a5c18a7018..5d08f7d803f9 100644
--- a/sound/soc/intel/atom/sst/sst_pvt.c
+++ b/sound/soc/intel/atom/sst/sst_pvt.c
@@ -70,39 +70,6 @@ void sst_set_fw_state_locked(
}
/*
- * sst_wait_interruptible - wait on event
- *
- * @sst_drv_ctx: Driver context
- * @block: Driver block to wait on
- *
- * This function waits without a timeout (and is interruptable) for a
- * given block event
- */
-int sst_wait_interruptible(struct intel_sst_drv *sst_drv_ctx,
- struct sst_block *block)
-{
- int retval = 0;
-
- if (!wait_event_interruptible(sst_drv_ctx->wait_queue,
- block->condition)) {
- /* event wake */
- if (block->ret_code < 0) {
- dev_err(sst_drv_ctx->dev,
- "stream failed %d\n", block->ret_code);
- retval = -EBUSY;
- } else {
- dev_dbg(sst_drv_ctx->dev, "event up\n");
- retval = 0;
- }
- } else {
- dev_err(sst_drv_ctx->dev, "signal interrupted\n");
- retval = -EINTR;
- }
- return retval;
-
-}
-
-/*
* sst_wait_timeout - wait on event for timeout
*
* @sst_drv_ctx: Driver context
diff --git a/sound/soc/intel/avs/Makefile b/sound/soc/intel/avs/Makefile
index 5139a019a4ad..576dc0da381d 100644
--- a/sound/soc/intel/avs/Makefile
+++ b/sound/soc/intel/avs/Makefile
@@ -1,10 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
snd-soc-avs-y := dsp.o ipc.o messages.o utils.o core.o loader.o \
- topology.o path.o pcm.o board_selection.o control.o \
- sysfs.o
+ topology.o path.o pcm.o board_selection.o control.o \
+ sysfs.o
snd-soc-avs-y += cldma.o
-snd-soc-avs-y += skl.o apl.o cnl.o icl.o tgl.o
+snd-soc-avs-y += skl.o apl.o cnl.o icl.o tgl.o mtl.o lnl.o ptl.o
snd-soc-avs-y += trace.o
# tell define_trace.h where to find the trace header
diff --git a/sound/soc/intel/avs/avs.h b/sound/soc/intel/avs/avs.h
index 585543f872fc..4c096afc5848 100644
--- a/sound/soc/intel/avs/avs.h
+++ b/sound/soc/intel/avs/avs.h
@@ -69,9 +69,12 @@ extern const struct avs_dsp_ops avs_apl_dsp_ops;
extern const struct avs_dsp_ops avs_cnl_dsp_ops;
extern const struct avs_dsp_ops avs_icl_dsp_ops;
extern const struct avs_dsp_ops avs_tgl_dsp_ops;
+extern const struct avs_dsp_ops avs_ptl_dsp_ops;
#define AVS_PLATATTR_CLDMA BIT_ULL(0)
#define AVS_PLATATTR_IMR BIT_ULL(1)
+#define AVS_PLATATTR_ACE BIT_ULL(2)
+#define AVS_PLATATTR_ALTHDA BIT_ULL(3)
#define avs_platattr_test(adev, attr) \
((adev)->spec->attributes & AVS_PLATATTR_##attr)
@@ -79,7 +82,6 @@ extern const struct avs_dsp_ops avs_tgl_dsp_ops;
struct avs_sram_spec {
const u32 base_offset;
const u32 window_size;
- const u32 rom_status_offset;
};
struct avs_hipc_spec {
@@ -91,6 +93,7 @@ struct avs_hipc_spec {
const u32 rsp_offset;
const u32 rsp_busy_mask;
const u32 ctl_offset;
+ const u32 sts_offset;
};
/* Platform specific descriptor */
@@ -265,8 +268,14 @@ void avs_ipc_block(struct avs_ipc *ipc);
int avs_dsp_disable_d0ix(struct avs_dev *adev);
int avs_dsp_enable_d0ix(struct avs_dev *adev);
+int avs_mtl_core_power(struct avs_dev *adev, u32 core_mask, bool power);
+int avs_mtl_core_reset(struct avs_dev *adev, u32 core_mask, bool power);
+int avs_mtl_core_stall(struct avs_dev *adev, u32 core_mask, bool stall);
+int avs_lnl_core_stall(struct avs_dev *adev, u32 core_mask, bool stall);
+void avs_mtl_interrupt_control(struct avs_dev *adev, bool enable);
void avs_skl_ipc_interrupt(struct avs_dev *adev);
irqreturn_t avs_cnl_dsp_interrupt(struct avs_dev *adev);
+irqreturn_t avs_mtl_dsp_interrupt(struct avs_dev *adev);
int avs_apl_enable_logs(struct avs_dev *adev, enum avs_log_enable enable, u32 aging_period,
u32 fifo_full_period, unsigned long resource_mask, u32 *priorities);
int avs_icl_enable_logs(struct avs_dev *adev, enum avs_log_enable enable, u32 aging_period,
@@ -340,7 +349,7 @@ struct avs_soc_component {
extern const struct snd_soc_dai_ops avs_dai_fe_ops;
int avs_soc_component_register(struct device *dev, const char *name,
- const struct snd_soc_component_driver *drv,
+ struct snd_soc_component_driver *drv,
struct snd_soc_dai_driver *cpu_dais, int num_cpu_dais);
int avs_dmic_platform_register(struct avs_dev *adev, const char *name);
int avs_i2s_platform_register(struct avs_dev *adev, const char *name, unsigned long port_mask,
diff --git a/sound/soc/intel/avs/board_selection.c b/sound/soc/intel/avs/board_selection.c
index 2d706edcbf92..636315060eb4 100644
--- a/sound/soc/intel/avs/board_selection.c
+++ b/sound/soc/intel/avs/board_selection.c
@@ -17,10 +17,15 @@
#include <sound/soc-acpi.h>
#include <sound/soc-component.h>
#include "avs.h"
+#include "utils.h"
-static bool i2s_test;
-module_param(i2s_test, bool, 0444);
-MODULE_PARM_DESC(i2s_test, "Probe I2S test-board and skip all other I2S boards");
+static char *i2s_test;
+module_param(i2s_test, charp, 0444);
+MODULE_PARM_DESC(i2s_test, "Use I2S test-board instead of ACPI, i2s_test=ssp0tdm,ssp1tdm,... 0 to ignore port");
+
+bool obsolete_card_names = IS_ENABLED(CONFIG_SND_SOC_INTEL_AVS_CARDNAME_OBSOLETE);
+module_param_named(obsolete_card_names, obsolete_card_names, bool, 0444);
+MODULE_PARM_DESC(obsolete_card_names, "Use obsolete card names 0=no, 1=yes");
static const struct dmi_system_id kbl_dmi_table[] = {
{
@@ -141,7 +146,7 @@ static struct snd_soc_acpi_mach avs_kbl_i2s_machines[] = {
.mach_params = {
.i2s_link_mask = AVS_SSP(0),
},
- .pdata = (unsigned long[]){ 0x2, 0, 0, 0, 0, 0 }, /* SSP0 TDMs */
+ .pdata = (struct avs_mach_pdata[]){ { .tdms = (unsigned long[]){ 0x2 } } },
.tplg_filename = "rt5514-tplg.bin",
},
{
@@ -202,7 +207,9 @@ static struct snd_soc_acpi_mach avs_apl_i2s_machines[] = {
.mach_params = {
.i2s_link_mask = AVS_SSP_RANGE(0, 5),
},
- .pdata = (unsigned long[]){ 0x1, 0x1, 0x14, 0x1, 0x1, 0x1 }, /* SSP2 TDMs */
+ .pdata = (struct avs_mach_pdata[]){ {
+ .tdms = (unsigned long[]){ 0x1, 0x1, 0x14, 0x1, 0x1, 0x1 }
+ } },
.tplg_filename = "tdf8532-tplg.bin",
},
{
@@ -324,52 +331,6 @@ static struct snd_soc_acpi_mach avs_mbl_i2s_machines[] = {
{}
};
-static struct snd_soc_acpi_mach avs_test_i2s_machines[] = {
- {
- .drv_name = "avs_i2s_test",
- .mach_params = {
- .i2s_link_mask = AVS_SSP(0),
- },
- .tplg_filename = "i2s-test-tplg.bin",
- },
- {
- .drv_name = "avs_i2s_test",
- .mach_params = {
- .i2s_link_mask = AVS_SSP(1),
- },
- .tplg_filename = "i2s-test-tplg.bin",
- },
- {
- .drv_name = "avs_i2s_test",
- .mach_params = {
- .i2s_link_mask = AVS_SSP(2),
- },
- .tplg_filename = "i2s-test-tplg.bin",
- },
- {
- .drv_name = "avs_i2s_test",
- .mach_params = {
- .i2s_link_mask = AVS_SSP(3),
- },
- .tplg_filename = "i2s-test-tplg.bin",
- },
- {
- .drv_name = "avs_i2s_test",
- .mach_params = {
- .i2s_link_mask = AVS_SSP(4),
- },
- .tplg_filename = "i2s-test-tplg.bin",
- },
- {
- .drv_name = "avs_i2s_test",
- .mach_params = {
- .i2s_link_mask = AVS_SSP(5),
- },
- .tplg_filename = "i2s-test-tplg.bin",
- },
- /* no NULL terminator, as we depend on ARRAY SIZE due to .id == NULL */
-};
-
struct avs_acpi_boards {
int id;
struct snd_soc_acpi_mach *machs;
@@ -394,7 +355,8 @@ static const struct avs_acpi_boards i2s_boards[] = {
AVS_MACH_ENTRY(HDA_ADL_P, avs_tgl_i2s_machines),
AVS_MACH_ENTRY(HDA_RPL_P_0, avs_tgl_i2s_machines),
AVS_MACH_ENTRY(HDA_RPL_M, avs_mbl_i2s_machines),
- {}
+ AVS_MACH_ENTRY(HDA_FCL, avs_tgl_i2s_machines),
+ { },
};
static const struct avs_acpi_boards *avs_get_i2s_boards(struct avs_dev *adev)
@@ -445,6 +407,7 @@ static int avs_register_dmic_board(struct avs_dev *adev)
{
struct platform_device *codec, *board;
struct snd_soc_acpi_mach mach = {{0}};
+ struct avs_mach_pdata *pdata;
int ret;
if (!acpi_nhlt_find_endpoint(ACPI_NHLT_LINKTYPE_PDM, -1, -1, -1)) {
@@ -468,6 +431,11 @@ static int avs_register_dmic_board(struct avs_dev *adev)
if (ret < 0)
return ret;
+ pdata = devm_kzalloc(adev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+ pdata->obsolete_card_names = obsolete_card_names;
+ mach.pdata = pdata;
mach.tplg_filename = "dmic-tplg.bin";
mach.mach_params.platform = "dmic-platform";
@@ -490,9 +458,11 @@ static int avs_register_dmic_board(struct avs_dev *adev)
static int avs_register_i2s_board(struct avs_dev *adev, struct snd_soc_acpi_mach *mach)
{
struct platform_device *board;
+ struct avs_mach_pdata *pdata;
int num_ssps;
char *name;
int ret;
+ int uid;
num_ssps = adev->hw_cfg.i2s_caps.ctrl_count;
if (fls(mach->mach_params.i2s_link_mask) > num_ssps) {
@@ -502,18 +472,29 @@ static int avs_register_i2s_board(struct avs_dev *adev, struct snd_soc_acpi_mach
return -ENODEV;
}
- name = devm_kasprintf(adev->dev, GFP_KERNEL, "%s.%d-platform", mach->drv_name,
- mach->mach_params.i2s_link_mask);
+ pdata = mach->pdata;
+ if (!pdata)
+ pdata = devm_kzalloc(adev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+ pdata->obsolete_card_names = obsolete_card_names;
+ mach->pdata = pdata;
+
+ uid = mach->mach_params.i2s_link_mask;
+ if (avs_mach_singular_ssp(mach))
+ uid = (uid << AVS_CHANNELS_MAX) + avs_mach_ssp_tdm(mach, avs_mach_ssp_port(mach));
+
+ name = devm_kasprintf(adev->dev, GFP_KERNEL, "%s.%d-platform", mach->drv_name, uid);
if (!name)
return -ENOMEM;
- ret = avs_i2s_platform_register(adev, name, mach->mach_params.i2s_link_mask, mach->pdata);
+ ret = avs_i2s_platform_register(adev, name, mach->mach_params.i2s_link_mask, pdata->tdms);
if (ret < 0)
return ret;
mach->mach_params.platform = name;
- board = platform_device_register_data(NULL, mach->drv_name, mach->mach_params.i2s_link_mask,
+ board = platform_device_register_data(NULL, mach->drv_name, uid,
(const void *)mach, sizeof(*mach));
if (IS_ERR(board)) {
dev_err(adev->dev, "ssp board register failed\n");
@@ -529,35 +510,82 @@ static int avs_register_i2s_board(struct avs_dev *adev, struct snd_soc_acpi_mach
return 0;
}
-static int avs_register_i2s_boards(struct avs_dev *adev)
+static int avs_register_i2s_test_board(struct avs_dev *adev, int ssp_port, int tdm_slot)
{
- const struct avs_acpi_boards *boards;
struct snd_soc_acpi_mach *mach;
+ int tdm_mask = BIT(tdm_slot);
+ unsigned long *tdm_cfg;
+ char *tplg_name;
int ret;
- if (!acpi_nhlt_find_endpoint(ACPI_NHLT_LINKTYPE_SSP, -1, -1, -1)) {
- dev_dbg(adev->dev, "no I2S endpoints present\n");
- return 0;
+ mach = devm_kzalloc(adev->dev, sizeof(*mach), GFP_KERNEL);
+ tdm_cfg = devm_kcalloc(adev->dev, ssp_port + 1, sizeof(unsigned long), GFP_KERNEL);
+ tplg_name = devm_kasprintf(adev->dev, GFP_KERNEL, AVS_STRING_FMT("i2s", "-test-tplg.bin",
+ ssp_port, tdm_slot));
+ if (!mach || !tdm_cfg || !tplg_name)
+ return -ENOMEM;
+
+ mach->drv_name = "avs_i2s_test";
+ mach->mach_params.i2s_link_mask = AVS_SSP(ssp_port);
+ tdm_cfg[ssp_port] = tdm_mask;
+ mach->pdata = tdm_cfg;
+ mach->tplg_filename = tplg_name;
+
+ ret = avs_register_i2s_board(adev, mach);
+ if (ret < 0) {
+ dev_warn(adev->dev, "register i2s %s failed: %d\n", mach->drv_name, ret);
+ return ret;
}
- if (i2s_test) {
- int i, num_ssps;
+ return 0;
+}
- num_ssps = adev->hw_cfg.i2s_caps.ctrl_count;
- /* constrain just in case FW says there can be more SSPs than possible */
- num_ssps = min_t(int, ARRAY_SIZE(avs_test_i2s_machines), num_ssps);
+static int avs_register_i2s_test_boards(struct avs_dev *adev)
+{
+ int max_ssps = adev->hw_cfg.i2s_caps.ctrl_count;
+ int ssp_port, tdm_slot, ret;
+ unsigned long tdm_slots;
+ u32 *array, num_elems;
- mach = avs_test_i2s_machines;
+ ret = parse_int_array(i2s_test, strlen(i2s_test), (int **)&array);
+ if (ret < 0) {
+ dev_err(adev->dev, "failed to parse i2s_test parameter\n");
+ return ret;
+ }
- for (i = 0; i < num_ssps; i++) {
- ret = avs_register_i2s_board(adev, &mach[i]);
- if (ret < 0)
- dev_warn(adev->dev, "register i2s %s failed: %d\n", mach->drv_name,
- ret);
+ num_elems = *array;
+ if (num_elems > max_ssps) {
+ dev_err(adev->dev, "board supports only %d SSP, %d specified\n",
+ max_ssps, num_elems);
+ return -EINVAL;
+ }
+
+ for (ssp_port = 0; ssp_port < num_elems; ssp_port++) {
+ tdm_slots = array[1 + ssp_port];
+ for_each_set_bit(tdm_slot, &tdm_slots, 16) {
+ ret = avs_register_i2s_test_board(adev, ssp_port, tdm_slot);
+ if (ret)
+ return ret;
}
+ }
+
+ return 0;
+}
+
+static int avs_register_i2s_boards(struct avs_dev *adev)
+{
+ const struct avs_acpi_boards *boards;
+ struct snd_soc_acpi_mach *mach;
+ int ret;
+
+ if (!acpi_nhlt_find_endpoint(ACPI_NHLT_LINKTYPE_SSP, -1, -1, -1)) {
+ dev_dbg(adev->dev, "no I2S endpoints present\n");
return 0;
}
+ if (i2s_test)
+ return avs_register_i2s_test_boards(adev);
+
boards = avs_get_i2s_boards(adev);
if (!boards) {
dev_dbg(adev->dev, "no I2S endpoints supported\n");
@@ -584,6 +612,7 @@ static int avs_register_hda_board(struct avs_dev *adev, struct hda_codec *codec)
{
struct snd_soc_acpi_mach mach = {{0}};
struct platform_device *board;
+ struct avs_mach_pdata *pdata;
struct hdac_device *hdev = &codec->core;
char *pname;
int ret, id;
@@ -592,11 +621,17 @@ static int avs_register_hda_board(struct avs_dev *adev, struct hda_codec *codec)
if (!pname)
return -ENOMEM;
+ pdata = devm_kzalloc(adev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+ pdata->obsolete_card_names = obsolete_card_names;
+ pdata->codec = codec;
+
ret = avs_hda_platform_register(adev, pname);
if (ret < 0)
return ret;
- mach.pdata = codec;
+ mach.pdata = pdata;
mach.mach_params.platform = pname;
mach.tplg_filename = devm_kasprintf(adev->dev, GFP_KERNEL, "hda-%08x-tplg.bin",
hdev->vendor_id);
diff --git a/sound/soc/intel/avs/boards/Kconfig b/sound/soc/intel/avs/boards/Kconfig
index ba4bee42124c..8b654181004e 100644
--- a/sound/soc/intel/avs/boards/Kconfig
+++ b/sound/soc/intel/avs/boards/Kconfig
@@ -4,6 +4,14 @@ menu "Intel AVS Machine drivers"
comment "Available DSP configurations"
+config SND_SOC_INTEL_AVS_CARDNAME_OBSOLETE
+ bool "Use obsolete card names"
+ default n
+ help
+ Use obsolete names for some of avs cards. This option should be
+ used if your system depends on old card names, for example having
+ not up to date UCM files.
+
config SND_SOC_INTEL_AVS_MACH_DA7219
tristate "da7219 I2S board"
depends on I2C
diff --git a/sound/soc/intel/avs/boards/da7219.c b/sound/soc/intel/avs/boards/da7219.c
index 7217ae51e89c..3ef0db254142 100644
--- a/sound/soc/intel/avs/boards/da7219.c
+++ b/sound/soc/intel/avs/boards/da7219.c
@@ -214,6 +214,7 @@ static int avs_da7219_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct snd_soc_jack *jack;
struct device *dev = &pdev->dev;
@@ -222,6 +223,7 @@ static int avs_da7219_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -238,7 +240,12 @@ static int avs_da7219_probe(struct platform_device *pdev)
if (!jack || !card)
return -ENOMEM;
- card->name = "avs_da7219";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_da7219";
+ } else {
+ card->driver_name = "avs_da7219";
+ card->long_name = card->name = "AVS I2S DA7219";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->dai_link = dai_link;
@@ -256,7 +263,7 @@ static int avs_da7219_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_da7219_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/dmic.c b/sound/soc/intel/avs/boards/dmic.c
index 4dd9591ee98b..a1448a98874d 100644
--- a/sound/soc/intel/avs/boards/dmic.c
+++ b/sound/soc/intel/avs/boards/dmic.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <sound/soc.h>
#include <sound/soc-acpi.h>
+#include "../utils.h"
SND_SOC_DAILINK_DEF(dmic_pin, DAILINK_COMP_ARRAY(COMP_CPU("DMIC Pin")));
SND_SOC_DAILINK_DEF(dmic_wov_pin, DAILINK_COMP_ARRAY(COMP_CPU("DMIC WoV Pin")));
@@ -49,17 +50,24 @@ static const struct snd_soc_dapm_route card_routes[] = {
static int avs_dmic_probe(struct platform_device *pdev)
{
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct device *dev = &pdev->dev;
int ret;
mach = dev_get_platdata(dev);
+ pdata = mach->pdata;
card = devm_kzalloc(dev, sizeof(*card), GFP_KERNEL);
if (!card)
return -ENOMEM;
- card->name = "avs_dmic";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_dmic";
+ } else {
+ card->driver_name = "avs_dmic";
+ card->long_name = card->name = "AVS DMIC";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->dai_link = card_dai_links;
@@ -74,7 +82,7 @@ static int avs_dmic_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_dmic_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/es8336.c b/sound/soc/intel/avs/boards/es8336.c
index 6f3c4f6c9302..1955f2d383c5 100644
--- a/sound/soc/intel/avs/boards/es8336.c
+++ b/sound/soc/intel/avs/boards/es8336.c
@@ -259,6 +259,7 @@ static int avs_es8336_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct avs_card_drvdata *data;
struct snd_soc_card *card;
struct device *dev = &pdev->dev;
@@ -267,6 +268,7 @@ static int avs_es8336_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -283,7 +285,12 @@ static int avs_es8336_probe(struct platform_device *pdev)
if (!data || !card)
return -ENOMEM;
- card->name = "avs_es8336";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_es8336";
+ } else {
+ card->driver_name = "avs_es8336";
+ card->long_name = card->name = "AVS I2S ES8336";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->suspend_pre = avs_card_suspend_pre;
@@ -303,7 +310,7 @@ static int avs_es8336_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_es8336_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/hdaudio.c b/sound/soc/intel/avs/boards/hdaudio.c
index cb6d54db7189..19b2255a8ac3 100644
--- a/sound/soc/intel/avs/boards/hdaudio.c
+++ b/sound/soc/intel/avs/boards/hdaudio.c
@@ -13,6 +13,7 @@
#include <sound/soc.h>
#include <sound/soc-acpi.h>
#include "../../../codecs/hda.h"
+#include "../utils.h"
static int avs_create_dai_links(struct device *dev, struct hda_codec *codec, int pcm_count,
const char *platform_name, struct snd_soc_dai_link **links)
@@ -95,7 +96,8 @@ avs_card_hdmi_pcm_at(struct snd_soc_card *card, int hdmi_idx)
static int avs_card_late_probe(struct snd_soc_card *card)
{
struct snd_soc_acpi_mach *mach = dev_get_platdata(card->dev);
- struct hda_codec *codec = mach->pdata;
+ struct avs_mach_pdata *pdata = mach->pdata;
+ struct hda_codec *codec = pdata->codec;
struct hda_pcm *hpcm;
/* Topology pcm indexing is 1-based */
int i = 1;
@@ -124,6 +126,7 @@ static int avs_card_late_probe(struct snd_soc_card *card)
static int avs_probing_link_init(struct snd_soc_pcm_runtime *rtm)
{
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_dai_link *links = NULL;
struct snd_soc_card *card = rtm->card;
struct hda_codec *codec;
@@ -131,7 +134,8 @@ static int avs_probing_link_init(struct snd_soc_pcm_runtime *rtm)
int ret, pcm_count = 0;
mach = dev_get_platdata(card->dev);
- codec = mach->pdata;
+ pdata = mach->pdata;
+ codec = pdata->codec;
if (list_empty(&codec->pcm_list_head))
return -EINVAL;
@@ -167,12 +171,14 @@ static int avs_hdaudio_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *binder;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct device *dev = &pdev->dev;
struct hda_codec *codec;
mach = dev_get_platdata(dev);
- codec = mach->pdata;
+ pdata = mach->pdata;
+ codec = pdata->codec;
/* codec may be unloaded before card's probe() fires */
if (!device_is_registered(&codec->core.dev))
@@ -200,7 +206,16 @@ static int avs_hdaudio_probe(struct platform_device *pdev)
if (!card)
return -ENOMEM;
- card->name = binder->codecs->name;
+ if (pdata->obsolete_card_names) {
+ card->name = binder->codecs->name;
+ } else {
+ card->driver_name = "avs_hdaudio";
+ if (hda_codec_is_display(codec))
+ card->long_name = card->name = "AVS HDMI";
+ else
+ card->long_name = card->name = "AVS HD-Audio";
+ }
+
card->dev = dev;
card->owner = THIS_MODULE;
card->dai_link = binder;
@@ -209,7 +224,7 @@ static int avs_hdaudio_probe(struct platform_device *pdev)
if (hda_codec_is_display(codec))
card->late_probe = avs_card_late_probe;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_hdaudio_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/i2s_test.c b/sound/soc/intel/avs/boards/i2s_test.c
index 4556f105c793..f7b6d7715738 100644
--- a/sound/soc/intel/avs/boards/i2s_test.c
+++ b/sound/soc/intel/avs/boards/i2s_test.c
@@ -56,6 +56,7 @@ static int avs_i2s_test_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct device *dev = &pdev->dev;
const char *pname;
@@ -63,6 +64,7 @@ static int avs_i2s_test_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
if (!avs_mach_singular_ssp(mach)) {
dev_err(dev, "Invalid SSP configuration\n");
@@ -80,8 +82,15 @@ static int avs_i2s_test_probe(struct platform_device *pdev)
if (!card)
return -ENOMEM;
- card->name = devm_kasprintf(dev, GFP_KERNEL,
- AVS_STRING_FMT("ssp", "-loopback", ssp_port, tdm_slot));
+ if (pdata->obsolete_card_names) {
+ card->name = devm_kasprintf(dev, GFP_KERNEL,
+ AVS_STRING_FMT("ssp", "-loopback", ssp_port, tdm_slot));
+ } else {
+ card->driver_name = "avs_i2s_test";
+ card->long_name = card->name = devm_kasprintf(dev, GFP_KERNEL,
+ AVS_STRING_FMT("AVS I2S TEST-", "",
+ ssp_port, tdm_slot));
+ }
if (!card->name)
return -ENOMEM;
@@ -101,7 +110,7 @@ static int avs_i2s_test_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_i2s_test_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/max98357a.c b/sound/soc/intel/avs/boards/max98357a.c
index c55c1d60b777..72053f83e98b 100644
--- a/sound/soc/intel/avs/boards/max98357a.c
+++ b/sound/soc/intel/avs/boards/max98357a.c
@@ -93,6 +93,7 @@ static int avs_max98357a_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct device *dev = &pdev->dev;
const char *pname;
@@ -100,6 +101,7 @@ static int avs_max98357a_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -115,7 +117,12 @@ static int avs_max98357a_probe(struct platform_device *pdev)
if (!card)
return -ENOMEM;
- card->name = "avs_max98357a";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_max98357a";
+ } else {
+ card->driver_name = "avs_max98357a";
+ card->long_name = card->name = "AVS I2S MAX98357A";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->dai_link = dai_link;
@@ -132,7 +139,7 @@ static int avs_max98357a_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_max98357a_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/max98373.c b/sound/soc/intel/avs/boards/max98373.c
index 970f82bf4ce8..cdba1c3ee20b 100644
--- a/sound/soc/intel/avs/boards/max98373.c
+++ b/sound/soc/intel/avs/boards/max98373.c
@@ -146,6 +146,7 @@ static int avs_max98373_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct device *dev = &pdev->dev;
const char *pname;
@@ -153,6 +154,7 @@ static int avs_max98373_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -168,7 +170,12 @@ static int avs_max98373_probe(struct platform_device *pdev)
if (!card)
return -ENOMEM;
- card->name = "avs_max98373";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_max98373";
+ } else {
+ card->driver_name = "avs_max98373";
+ card->long_name = card->name = "AVS I2S MAX98373";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->dai_link = dai_link;
@@ -187,7 +194,7 @@ static int avs_max98373_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_max98373_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/max98927.c b/sound/soc/intel/avs/boards/max98927.c
index e4ce553bf1d6..a68e227044c5 100644
--- a/sound/soc/intel/avs/boards/max98927.c
+++ b/sound/soc/intel/avs/boards/max98927.c
@@ -143,6 +143,7 @@ static int avs_max98927_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct device *dev = &pdev->dev;
const char *pname;
@@ -150,6 +151,7 @@ static int avs_max98927_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -165,7 +167,12 @@ static int avs_max98927_probe(struct platform_device *pdev)
if (!card)
return -ENOMEM;
- card->name = "avs_max98927";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_max98927";
+ } else {
+ card->driver_name = "avs_max98927";
+ card->long_name = card->name = "AVS I2S MAX98927";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->dai_link = dai_link;
@@ -184,7 +191,7 @@ static int avs_max98927_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_max98927_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/nau8825.c b/sound/soc/intel/avs/boards/nau8825.c
index e203ceb83b87..3fb1a5d07ae1 100644
--- a/sound/soc/intel/avs/boards/nau8825.c
+++ b/sound/soc/intel/avs/boards/nau8825.c
@@ -246,6 +246,7 @@ static int avs_nau8825_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct snd_soc_jack *jack;
struct device *dev = &pdev->dev;
@@ -254,6 +255,7 @@ static int avs_nau8825_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -270,7 +272,12 @@ static int avs_nau8825_probe(struct platform_device *pdev)
if (!jack || !card)
return -ENOMEM;
- card->name = "avs_nau8825";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_nau8825";
+ } else {
+ card->driver_name = "avs_nau8825";
+ card->long_name = card->name = "AVS I2S NAU8825";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->suspend_pre = avs_card_suspend_pre;
@@ -290,7 +297,7 @@ static int avs_nau8825_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_nau8825_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/pcm3168a.c b/sound/soc/intel/avs/boards/pcm3168a.c
index 5d0e7a5bdc74..b5bebadbbcb2 100644
--- a/sound/soc/intel/avs/boards/pcm3168a.c
+++ b/sound/soc/intel/avs/boards/pcm3168a.c
@@ -11,6 +11,8 @@
#include <sound/pcm.h>
#include <sound/pcm_params.h>
#include <sound/soc.h>
+#include <sound/soc-acpi.h>
+#include "../utils.h"
static const struct snd_soc_dapm_widget card_widgets[] = {
SND_SOC_DAPM_HP("CPB Stereo HP 1", NULL),
@@ -95,10 +97,15 @@ static int avs_create_dai_links(struct device *dev, struct snd_soc_dai_link **li
static int avs_pcm3168a_probe(struct platform_device *pdev)
{
+ struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct device *dev = &pdev->dev;
struct snd_soc_card *card;
int ret;
+ mach = dev_get_platdata(dev);
+ pdata = mach->pdata;
+
card = devm_kzalloc(dev, sizeof(*card), GFP_KERNEL);
if (!card)
return -ENOMEM;
@@ -107,7 +114,12 @@ static int avs_pcm3168a_probe(struct platform_device *pdev)
if (ret)
return ret;
- card->name = "avs_pcm3168a";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_pcm3168a";
+ } else {
+ card->driver_name = "avs_pcm3168a";
+ card->long_name = card->name = "AVS I2S PCM3168A";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->dapm_widgets = card_widgets;
@@ -116,7 +128,7 @@ static int avs_pcm3168a_probe(struct platform_device *pdev)
card->num_dapm_routes = ARRAY_SIZE(card_routes);
card->fully_routed = true;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_pcm3168a_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/probe.c b/sound/soc/intel/avs/boards/probe.c
index 1cdc285ab810..06c1f19f27aa 100644
--- a/sound/soc/intel/avs/boards/probe.c
+++ b/sound/soc/intel/avs/boards/probe.c
@@ -36,7 +36,8 @@ static int avs_probe_mb_probe(struct platform_device *pdev)
if (!card)
return -ENOMEM;
- card->name = "avs_probe_mb";
+ card->driver_name = "avs_probe_mb";
+ card->long_name = card->name = "AVS PROBE";
card->dev = dev;
card->owner = THIS_MODULE;
card->dai_link = probe_mb_dai_links;
@@ -47,7 +48,7 @@ static int avs_probe_mb_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_probe_mb_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/rt274.c b/sound/soc/intel/avs/boards/rt274.c
index 29977aee2153..ec5382925157 100644
--- a/sound/soc/intel/avs/boards/rt274.c
+++ b/sound/soc/intel/avs/boards/rt274.c
@@ -210,6 +210,7 @@ static int avs_rt274_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct snd_soc_jack *jack;
struct device *dev = &pdev->dev;
@@ -218,6 +219,7 @@ static int avs_rt274_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -234,7 +236,12 @@ static int avs_rt274_probe(struct platform_device *pdev)
if (!jack || !card)
return -ENOMEM;
- card->name = "avs_rt274";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_rt274";
+ } else {
+ card->driver_name = "avs_rt274";
+ card->long_name = card->name = "AVS I2S ALC274";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->suspend_pre = avs_card_suspend_pre;
@@ -254,7 +261,7 @@ static int avs_rt274_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_rt274_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/rt286.c b/sound/soc/intel/avs/boards/rt286.c
index 6a27c6a9f394..2566e971ce1c 100644
--- a/sound/soc/intel/avs/boards/rt286.c
+++ b/sound/soc/intel/avs/boards/rt286.c
@@ -179,6 +179,7 @@ static int avs_rt286_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct snd_soc_jack *jack;
struct device *dev = &pdev->dev;
@@ -187,6 +188,7 @@ static int avs_rt286_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -204,7 +206,12 @@ static int avs_rt286_probe(struct platform_device *pdev)
if (!jack || !card)
return -ENOMEM;
- card->name = "avs_rt286";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_rt286";
+ } else {
+ card->driver_name = "avs_rt286";
+ card->long_name = card->name = "AVS I2S ALC286";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->suspend_pre = avs_card_suspend_pre;
@@ -224,7 +231,7 @@ static int avs_rt286_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_rt286_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/rt298.c b/sound/soc/intel/avs/boards/rt298.c
index a7a463d6f852..7be34c8ad167 100644
--- a/sound/soc/intel/avs/boards/rt298.c
+++ b/sound/soc/intel/avs/boards/rt298.c
@@ -199,6 +199,7 @@ static int avs_rt298_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct snd_soc_jack *jack;
struct device *dev = &pdev->dev;
@@ -207,6 +208,7 @@ static int avs_rt298_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -223,7 +225,12 @@ static int avs_rt298_probe(struct platform_device *pdev)
if (!jack || !card)
return -ENOMEM;
- card->name = "avs_rt298";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_rt298";
+ } else {
+ card->driver_name = "avs_rt298";
+ card->long_name = card->name = "AVS I2S ALC298";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->suspend_pre = avs_card_suspend_pre;
@@ -243,7 +250,7 @@ static int avs_rt298_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_rt298_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/rt5514.c b/sound/soc/intel/avs/boards/rt5514.c
index d1c3e29a28ff..45f091f2ce22 100644
--- a/sound/soc/intel/avs/boards/rt5514.c
+++ b/sound/soc/intel/avs/boards/rt5514.c
@@ -133,6 +133,7 @@ static int avs_rt5514_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct device *dev = &pdev->dev;
const char *pname;
@@ -140,6 +141,7 @@ static int avs_rt5514_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -155,7 +157,12 @@ static int avs_rt5514_probe(struct platform_device *pdev)
if (!card)
return -ENOMEM;
- card->name = "avs_rt5514";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_rt5514";
+ } else {
+ card->driver_name = "avs_rt5514";
+ card->long_name = card->name = "AVS I2S ALC5514";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->dai_link = dai_link;
@@ -170,7 +177,7 @@ static int avs_rt5514_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_rt5514_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/rt5663.c b/sound/soc/intel/avs/boards/rt5663.c
index e3310b3268ba..122b6c48fd80 100644
--- a/sound/soc/intel/avs/boards/rt5663.c
+++ b/sound/soc/intel/avs/boards/rt5663.c
@@ -198,6 +198,7 @@ static int avs_rt5663_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct rt5663_private *priv;
struct device *dev = &pdev->dev;
@@ -206,6 +207,7 @@ static int avs_rt5663_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -222,7 +224,12 @@ static int avs_rt5663_probe(struct platform_device *pdev)
if (!priv || !card)
return -ENOMEM;
- card->name = "avs_rt5663";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_rt5663";
+ } else {
+ card->driver_name = "avs_rt5663";
+ card->long_name = card->name = "AVS I2S ALC5640";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->suspend_pre = avs_card_suspend_pre;
@@ -242,7 +249,7 @@ static int avs_rt5663_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_rt5663_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/rt5682.c b/sound/soc/intel/avs/boards/rt5682.c
index 339df0b944c1..9677b9ebeff1 100644
--- a/sound/soc/intel/avs/boards/rt5682.c
+++ b/sound/soc/intel/avs/boards/rt5682.c
@@ -268,6 +268,7 @@ static int avs_rt5682_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct snd_soc_jack *jack;
struct device *dev = &pdev->dev;
@@ -282,6 +283,7 @@ static int avs_rt5682_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -298,7 +300,12 @@ static int avs_rt5682_probe(struct platform_device *pdev)
if (!jack || !card)
return -ENOMEM;
- card->name = "avs_rt5682";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_rt5682";
+ } else {
+ card->driver_name = "avs_rt5682";
+ card->long_name = card->name = "AVS I2S ALC5682";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->suspend_pre = avs_card_suspend_pre;
@@ -318,7 +325,7 @@ static int avs_rt5682_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_rt5682_driver_ids[] = {
diff --git a/sound/soc/intel/avs/boards/ssm4567.c b/sound/soc/intel/avs/boards/ssm4567.c
index 7667790d5273..3786eef8c494 100644
--- a/sound/soc/intel/avs/boards/ssm4567.c
+++ b/sound/soc/intel/avs/boards/ssm4567.c
@@ -132,6 +132,7 @@ static int avs_ssm4567_probe(struct platform_device *pdev)
{
struct snd_soc_dai_link *dai_link;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct snd_soc_card *card;
struct device *dev = &pdev->dev;
const char *pname;
@@ -139,6 +140,7 @@ static int avs_ssm4567_probe(struct platform_device *pdev)
mach = dev_get_platdata(dev);
pname = mach->mach_params.platform;
+ pdata = mach->pdata;
ret = avs_mach_get_ssp_tdm(dev, mach, &ssp_port, &tdm_slot);
if (ret)
@@ -154,7 +156,12 @@ static int avs_ssm4567_probe(struct platform_device *pdev)
if (!card)
return -ENOMEM;
- card->name = "avs_ssm4567";
+ if (pdata->obsolete_card_names) {
+ card->name = "avs_ssm4567";
+ } else {
+ card->driver_name = "avs_ssm4567";
+ card->long_name = card->name = "AVS I2S SSM4567";
+ }
card->dev = dev;
card->owner = THIS_MODULE;
card->dai_link = dai_link;
@@ -173,7 +180,7 @@ static int avs_ssm4567_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_snd_soc_register_card(dev, card);
+ return devm_snd_soc_register_deferrable_card(dev, card);
}
static const struct platform_device_id avs_ssm4567_driver_ids[] = {
diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c
index 8fbf33e30dfc..485842838025 100644
--- a/sound/soc/intel/avs/core.c
+++ b/sound/soc/intel/avs/core.c
@@ -54,14 +54,17 @@ void avs_hda_power_gating_enable(struct avs_dev *adev, bool enable)
{
u32 value = enable ? 0 : pgctl_mask;
- avs_hda_update_config_dword(&adev->base.core, AZX_PCIREG_PGCTL, pgctl_mask, value);
+ if (!avs_platattr_test(adev, ACE))
+ avs_hda_update_config_dword(&adev->base.core, AZX_PCIREG_PGCTL, pgctl_mask, value);
}
static void avs_hdac_clock_gating_enable(struct hdac_bus *bus, bool enable)
{
+ struct avs_dev *adev = hdac_to_avs(bus);
u32 value = enable ? cgctl_mask : 0;
- avs_hda_update_config_dword(bus, AZX_PCIREG_CGCTL, cgctl_mask, value);
+ if (!avs_platattr_test(adev, ACE))
+ avs_hda_update_config_dword(bus, AZX_PCIREG_CGCTL, cgctl_mask, value);
}
void avs_hda_clock_gating_enable(struct avs_dev *adev, bool enable)
@@ -71,6 +74,8 @@ void avs_hda_clock_gating_enable(struct avs_dev *adev, bool enable)
void avs_hda_l1sen_enable(struct avs_dev *adev, bool enable)
{
+ if (avs_platattr_test(adev, ACE))
+ return;
if (enable) {
if (atomic_inc_and_test(&adev->l1sen_counter))
snd_hdac_chip_updatel(&adev->base.core, VS_EM2, AZX_VS_EM2_L1SEN,
@@ -99,6 +104,7 @@ static int avs_hdac_bus_init_streams(struct hdac_bus *bus)
static bool avs_hdac_bus_init_chip(struct hdac_bus *bus, bool full_reset)
{
+ struct avs_dev *adev = hdac_to_avs(bus);
struct hdac_ext_link *hlink;
bool ret;
@@ -114,7 +120,8 @@ static bool avs_hdac_bus_init_chip(struct hdac_bus *bus, bool full_reset)
/* Set DUM bit to address incorrect position reporting for capture
* streams. In order to do so, CTRL needs to be out of reset state
*/
- snd_hdac_chip_updatel(bus, VS_EM2, AZX_VS_EM2_DUM, AZX_VS_EM2_DUM);
+ if (!avs_platattr_test(adev, ACE))
+ snd_hdac_chip_updatel(bus, VS_EM2, AZX_VS_EM2_DUM, AZX_VS_EM2_DUM);
return ret;
}
@@ -445,7 +452,7 @@ static int avs_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
return ret;
}
- ret = pci_request_regions(pci, "AVS HDAudio");
+ ret = pcim_request_all_regions(pci, "AVS HDAudio");
if (ret < 0)
return ret;
@@ -454,8 +461,7 @@ static int avs_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
bus->remap_addr = pci_ioremap_bar(pci, 0);
if (!bus->remap_addr) {
dev_err(bus->dev, "ioremap error\n");
- ret = -ENXIO;
- goto err_remap_bar0;
+ return -ENXIO;
}
adev->dsp_ba = pci_ioremap_bar(pci, 4);
@@ -512,8 +518,6 @@ err_init_streams:
iounmap(adev->dsp_ba);
err_remap_bar4:
iounmap(bus->remap_addr);
-err_remap_bar0:
- pci_release_regions(pci);
return ret;
}
@@ -584,7 +588,6 @@ static void avs_pci_remove(struct pci_dev *pci)
pci_free_irq_vectors(pci);
iounmap(bus->remap_addr);
iounmap(adev->dsp_ba);
- pci_release_regions(pci);
/* Firmware is not needed anymore */
avs_release_firmwares(adev);
@@ -748,13 +751,16 @@ static const struct dev_pm_ops avs_dev_pm = {
static const struct avs_sram_spec skl_sram_spec = {
.base_offset = SKL_ADSP_SRAM_BASE_OFFSET,
.window_size = SKL_ADSP_SRAM_WINDOW_SIZE,
- .rom_status_offset = SKL_ADSP_SRAM_BASE_OFFSET,
};
static const struct avs_sram_spec apl_sram_spec = {
.base_offset = APL_ADSP_SRAM_BASE_OFFSET,
.window_size = APL_ADSP_SRAM_WINDOW_SIZE,
- .rom_status_offset = APL_ADSP_SRAM_BASE_OFFSET,
+};
+
+static const struct avs_sram_spec mtl_sram_spec = {
+ .base_offset = MTL_ADSP_SRAM_BASE_OFFSET,
+ .window_size = MTL_ADSP_SRAM_WINDOW_SIZE,
};
static const struct avs_hipc_spec skl_hipc_spec = {
@@ -766,6 +772,19 @@ static const struct avs_hipc_spec skl_hipc_spec = {
.rsp_offset = SKL_ADSP_REG_HIPCT,
.rsp_busy_mask = SKL_ADSP_HIPCT_BUSY,
.ctl_offset = SKL_ADSP_REG_HIPCCTL,
+ .sts_offset = SKL_ADSP_SRAM_BASE_OFFSET,
+};
+
+static const struct avs_hipc_spec apl_hipc_spec = {
+ .req_offset = SKL_ADSP_REG_HIPCI,
+ .req_ext_offset = SKL_ADSP_REG_HIPCIE,
+ .req_busy_mask = SKL_ADSP_HIPCI_BUSY,
+ .ack_offset = SKL_ADSP_REG_HIPCIE,
+ .ack_done_mask = SKL_ADSP_HIPCIE_DONE,
+ .rsp_offset = SKL_ADSP_REG_HIPCT,
+ .rsp_busy_mask = SKL_ADSP_HIPCT_BUSY,
+ .ctl_offset = SKL_ADSP_REG_HIPCCTL,
+ .sts_offset = APL_ADSP_SRAM_BASE_OFFSET,
};
static const struct avs_hipc_spec cnl_hipc_spec = {
@@ -777,6 +796,19 @@ static const struct avs_hipc_spec cnl_hipc_spec = {
.rsp_offset = CNL_ADSP_REG_HIPCTDR,
.rsp_busy_mask = CNL_ADSP_HIPCTDR_BUSY,
.ctl_offset = CNL_ADSP_REG_HIPCCTL,
+ .sts_offset = APL_ADSP_SRAM_BASE_OFFSET,
+};
+
+static const struct avs_hipc_spec lnl_hipc_spec = {
+ .req_offset = MTL_REG_HfIPCxIDR,
+ .req_ext_offset = MTL_REG_HfIPCxIDD,
+ .req_busy_mask = MTL_HfIPCxIDR_BUSY,
+ .ack_offset = MTL_REG_HfIPCxIDA,
+ .ack_done_mask = MTL_HfIPCxIDA_DONE,
+ .rsp_offset = MTL_REG_HfIPCxTDR,
+ .rsp_busy_mask = MTL_HfIPCxTDR_BUSY,
+ .ctl_offset = MTL_REG_HfIPCxCTL,
+ .sts_offset = LNL_REG_HfDFR(0),
};
static const struct avs_spec skl_desc = {
@@ -796,7 +828,7 @@ static const struct avs_spec apl_desc = {
.core_init_mask = 3,
.attributes = AVS_PLATATTR_IMR,
.sram = &apl_sram_spec,
- .hipc = &skl_hipc_spec,
+ .hipc = &apl_hipc_spec,
};
static const struct avs_spec cnl_desc = {
@@ -846,6 +878,16 @@ AVS_TGL_BASED_SPEC(ehl, 30);
AVS_TGL_BASED_SPEC(adl, 35);
AVS_TGL_BASED_SPEC(adl_n, 35);
+static const struct avs_spec fcl_desc = {
+ .name = "fcl",
+ .min_fw_version = { 0 },
+ .dsp_ops = &avs_ptl_dsp_ops,
+ .core_init_mask = 1,
+ .attributes = AVS_PLATATTR_IMR | AVS_PLATATTR_ACE | AVS_PLATATTR_ALTHDA,
+ .sram = &mtl_sram_spec,
+ .hipc = &lnl_hipc_spec,
+};
+
static const struct pci_device_id avs_ids[] = {
{ PCI_DEVICE_DATA(INTEL, HDA_SKL_LP, &skl_desc) },
{ PCI_DEVICE_DATA(INTEL, HDA_SKL, &skl_desc) },
@@ -881,6 +923,7 @@ static const struct pci_device_id avs_ids[] = {
{ PCI_DEVICE_DATA(INTEL, HDA_RPL_P_1, &adl_desc) },
{ PCI_DEVICE_DATA(INTEL, HDA_RPL_M, &adl_desc) },
{ PCI_DEVICE_DATA(INTEL, HDA_RPL_PX, &adl_desc) },
+ { PCI_DEVICE_DATA(INTEL, HDA_FCL, &fcl_desc) },
{ 0 }
};
MODULE_DEVICE_TABLE(pci, avs_ids);
@@ -912,3 +955,4 @@ MODULE_FIRMWARE("intel/tgl/dsp_basefw.bin");
MODULE_FIRMWARE("intel/ehl/dsp_basefw.bin");
MODULE_FIRMWARE("intel/adl/dsp_basefw.bin");
MODULE_FIRMWARE("intel/adl_n/dsp_basefw.bin");
+MODULE_FIRMWARE("intel/fcl/dsp_basefw.bin");
diff --git a/sound/soc/intel/avs/dsp.c b/sound/soc/intel/avs/dsp.c
index b9de691e9b9b..464bd6859182 100644
--- a/sound/soc/intel/avs/dsp.c
+++ b/sound/soc/intel/avs/dsp.c
@@ -12,8 +12,6 @@
#include "registers.h"
#include "trace.h"
-#define AVS_ADSPCS_INTERVAL_US 500
-#define AVS_ADSPCS_TIMEOUT_US 50000
#define AVS_ADSPCS_DELAY_US 1000
int avs_dsp_core_power(struct avs_dev *adev, u32 core_mask, bool power)
diff --git a/sound/soc/intel/avs/lnl.c b/sound/soc/intel/avs/lnl.c
new file mode 100644
index 000000000000..03208596dfb1
--- /dev/null
+++ b/sound/soc/intel/avs/lnl.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright(c) 2021-2025 Intel Corporation
+ *
+ * Authors: Cezary Rojewski <cezary.rojewski@intel.com>
+ * Amadeusz Slawinski <amadeuszx.slawinski@linux.intel.com>
+ */
+
+#include <sound/hdaudio_ext.h>
+#include "avs.h"
+#include "registers.h"
+
+int avs_lnl_core_stall(struct avs_dev *adev, u32 core_mask, bool stall)
+{
+ struct hdac_bus *bus = &adev->base.core;
+ struct hdac_ext_link *hlink;
+ int ret;
+
+ ret = avs_mtl_core_stall(adev, core_mask, stall);
+
+ /* On unstall, route interrupts from the links to the DSP firmware. */
+ if (!ret && !stall)
+ list_for_each_entry(hlink, &bus->hlink_list, list)
+ snd_hdac_updatel(hlink->ml_addr, AZX_REG_ML_LCTL, AZX_ML_LCTL_OFLEN,
+ AZX_ML_LCTL_OFLEN);
+ return ret;
+}
diff --git a/sound/soc/intel/avs/loader.c b/sound/soc/intel/avs/loader.c
index 0b29941feb0e..138e4e9de5e3 100644
--- a/sound/soc/intel/avs/loader.c
+++ b/sound/soc/intel/avs/loader.c
@@ -310,7 +310,7 @@ avs_hda_init_rom(struct avs_dev *adev, unsigned int dma_id, bool purge)
}
/* await ROM init */
- ret = snd_hdac_adsp_readl_poll(adev, spec->sram->rom_status_offset, reg,
+ ret = snd_hdac_adsp_readl_poll(adev, spec->hipc->sts_offset, reg,
(reg & 0xF) == AVS_ROM_INIT_DONE ||
(reg & 0xF) == APL_ROM_FW_ENTERED,
AVS_ROM_INIT_POLLING_US, APL_ROM_INIT_TIMEOUT_US);
@@ -683,6 +683,7 @@ int avs_dsp_boot_firmware(struct avs_dev *adev, bool purge)
static int avs_dsp_alloc_resources(struct avs_dev *adev)
{
+ struct hdac_ext_link *link;
int ret, i;
ret = avs_ipc_get_hw_config(adev, &adev->hw_cfg);
@@ -693,6 +694,14 @@ static int avs_dsp_alloc_resources(struct avs_dev *adev)
if (ret)
return AVS_IPC_RET(ret);
+ /* If hw allows, read capabilities directly from it. */
+ if (avs_platattr_test(adev, ALTHDA)) {
+ link = snd_hdac_ext_bus_get_hlink_by_id(&adev->base.core,
+ AZX_REG_ML_LEPTR_ID_INTEL_SSP);
+ if (link)
+ adev->hw_cfg.i2s_caps.ctrl_count = link->slcount;
+ }
+
adev->core_refs = devm_kcalloc(adev->dev, adev->hw_cfg.dsp_cores,
sizeof(*adev->core_refs), GFP_KERNEL);
adev->lib_names = devm_kcalloc(adev->dev, adev->fw_cfg.max_libs_count,
diff --git a/sound/soc/intel/avs/messages.h b/sound/soc/intel/avs/messages.h
index 2f243802ccc2..55c04b0142ae 100644
--- a/sound/soc/intel/avs/messages.h
+++ b/sound/soc/intel/avs/messages.h
@@ -102,6 +102,8 @@ struct avs_tlv {
} __packed;
static_assert(sizeof(struct avs_tlv) == 8);
+#define avs_tlv_size(tlv) struct_size(tlv, value, (tlv)->length / 4)
+
enum avs_module_msg_type {
AVS_MOD_INIT_INSTANCE = 0,
AVS_MOD_LARGE_CONFIG_GET = 3,
@@ -697,8 +699,9 @@ enum avs_sample_type {
AVS_SAMPLE_TYPE_FLOAT = 4,
};
-#define AVS_CHANNELS_MAX 8
+#define AVS_COEFF_CHANNELS_MAX 8
#define AVS_ALL_CHANNELS_MASK UINT_MAX
+#define AVS_CHANNELS_MAX 16
struct avs_audio_format {
u32 sampling_freq;
@@ -786,6 +789,33 @@ union avs_gtw_attributes {
} __packed;
static_assert(sizeof(union avs_gtw_attributes) == 4);
+#define AVS_GTW_DMA_CONFIG_ID 0x1000
+#define AVS_DMA_METHOD_HDA 1
+
+struct avs_dma_device_stream_channel_map {
+ u32 device_address;
+ u32 channel_map;
+} __packed;
+static_assert(sizeof(struct avs_dma_device_stream_channel_map) == 8);
+
+struct avs_dma_stream_channel_map {
+ u32 device_count;
+ struct avs_dma_device_stream_channel_map map[16];
+} __packed;
+static_assert(sizeof(struct avs_dma_stream_channel_map) == 132);
+
+struct avs_dma_cfg {
+ u8 dma_method;
+ u8 pre_allocated;
+ u16 rsvd;
+ u32 dma_channel_id;
+ u32 stream_id;
+ struct avs_dma_stream_channel_map map;
+ u32 config_size;
+ u8 config[] __counted_by(config_size);
+} __packed;
+static_assert(sizeof(struct avs_dma_cfg) == 148);
+
struct avs_copier_gtw_cfg {
union avs_connector_node_id node_id;
u32 dma_buffer_size;
@@ -846,7 +876,7 @@ struct avs_updown_mixer_cfg {
struct avs_modcfg_base base;
u32 out_channel_config;
u32 coefficients_select;
- s32 coefficients[AVS_CHANNELS_MAX];
+ s32 coefficients[AVS_COEFF_CHANNELS_MAX];
u32 channel_map;
} __packed;
static_assert(sizeof(struct avs_updown_mixer_cfg) == 84);
diff --git a/sound/soc/intel/avs/mtl.c b/sound/soc/intel/avs/mtl.c
new file mode 100644
index 000000000000..e7b7915b2a82
--- /dev/null
+++ b/sound/soc/intel/avs/mtl.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright(c) 2021-2025 Intel Corporation
+ *
+ * Authors: Cezary Rojewski <cezary.rojewski@intel.com>
+ * Amadeusz Slawinski <amadeuszx.slawinski@linux.intel.com>
+ */
+
+#include <sound/hdaudio_ext.h>
+#include "avs.h"
+#include "registers.h"
+#include "trace.h"
+
+#define MTL_HfDSSGBL_BASE 0x1000
+#define MTL_REG_HfDSSCS (MTL_HfDSSGBL_BASE + 0x0)
+#define MTL_HfDSSCS_SPA BIT(16)
+#define MTL_HfDSSCS_CPA BIT(24)
+
+#define MTL_DSPCS_BASE 0x178D00
+#define MTL_REG_DSPCCTL (MTL_DSPCS_BASE + 0x4)
+#define MTL_DSPCCTL_SPA BIT(0)
+#define MTL_DSPCCTL_CPA BIT(8)
+#define MTL_DSPCCTL_OSEL GENMASK(25, 24)
+#define MTL_DSPCCTL_OSEL_HOST BIT(25)
+
+#define MTL_HfINT_BASE 0x1100
+#define MTL_REG_HfINTIPPTR (MTL_HfINT_BASE + 0x8)
+#define MTL_REG_HfHIPCIE (MTL_HfINT_BASE + 0x40)
+#define MTL_HfINTIPPTR_PTR GENMASK(20, 0)
+#define MTL_HfHIPCIE_IE BIT(0)
+
+#define MTL_DWICTL_INTENL_IE BIT(0)
+#define MTL_DWICTL_FINALSTATUSL_IPC BIT(0) /* same as ADSPIS_IPC */
+
+static int avs_mtl_core_power_on(struct avs_dev *adev)
+{
+ u32 reg;
+ int ret;
+
+ /* Power up DSP domain. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfDSSCS, MTL_HfDSSCS_SPA, MTL_HfDSSCS_SPA);
+ trace_avs_dsp_core_op(1, AVS_MAIN_CORE_MASK, "power dsp", true);
+
+ ret = snd_hdac_adsp_readl_poll(adev, MTL_REG_HfDSSCS, reg,
+ (reg & MTL_HfDSSCS_CPA) == MTL_HfDSSCS_CPA,
+ AVS_ADSPCS_INTERVAL_US, AVS_ADSPCS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adev->dev, "power on domain dsp failed: %d\n", ret);
+ return ret;
+ }
+
+ /* Prevent power gating of DSP domain. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfPWRCTL, MTL_HfPWRCTL_WPDSPHPxPG,
+ MTL_HfPWRCTL_WPDSPHPxPG);
+ trace_avs_dsp_core_op(1, AVS_MAIN_CORE_MASK, "prevent dsp PG", true);
+
+ ret = snd_hdac_adsp_readl_poll(adev, MTL_REG_HfPWRSTS, reg,
+ (reg & MTL_HfPWRSTS_DSPHPxPGS) == MTL_HfPWRSTS_DSPHPxPGS,
+ AVS_ADSPCS_INTERVAL_US, AVS_ADSPCS_TIMEOUT_US);
+
+ /* Set ownership to HOST. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_DSPCCTL, MTL_DSPCCTL_OSEL, MTL_DSPCCTL_OSEL_HOST);
+ return ret;
+}
+
+static int avs_mtl_core_power_off(struct avs_dev *adev)
+{
+ u32 reg;
+
+ /* Allow power gating of DSP domain. No STS polling as HOST is only one of its users. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfPWRCTL, MTL_HfPWRCTL_WPDSPHPxPG, 0);
+ trace_avs_dsp_core_op(0, AVS_MAIN_CORE_MASK, "allow dsp pg", false);
+
+ /* Power down DSP domain. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfDSSCS, MTL_HfDSSCS_SPA, 0);
+ trace_avs_dsp_core_op(0, AVS_MAIN_CORE_MASK, "power dsp", false);
+
+ return snd_hdac_adsp_readl_poll(adev, MTL_REG_HfDSSCS, reg,
+ (reg & MTL_HfDSSCS_CPA) == 0,
+ AVS_ADSPCS_INTERVAL_US, AVS_ADSPCS_TIMEOUT_US);
+}
+
+int avs_mtl_core_power(struct avs_dev *adev, u32 core_mask, bool power)
+{
+ core_mask &= AVS_MAIN_CORE_MASK;
+ if (!core_mask)
+ return 0;
+
+ if (power)
+ return avs_mtl_core_power_on(adev);
+ return avs_mtl_core_power_off(adev);
+}
+
+int avs_mtl_core_reset(struct avs_dev *adev, u32 core_mask, bool power)
+{
+ /* No logical equivalent on ACE 1.x. */
+ return 0;
+}
+
+int avs_mtl_core_stall(struct avs_dev *adev, u32 core_mask, bool stall)
+{
+ u32 value, reg;
+ int ret;
+
+ core_mask &= AVS_MAIN_CORE_MASK;
+ if (!core_mask)
+ return 0;
+
+ value = snd_hdac_adsp_readl(adev, MTL_REG_DSPCCTL);
+ trace_avs_dsp_core_op(value, core_mask, "stall", stall);
+ if (value == UINT_MAX)
+ return 0;
+
+ value = stall ? 0 : MTL_DSPCCTL_SPA;
+ snd_hdac_adsp_updatel(adev, MTL_REG_DSPCCTL, MTL_DSPCCTL_SPA, value);
+
+ value = stall ? 0 : MTL_DSPCCTL_CPA;
+ ret = snd_hdac_adsp_readl_poll(adev, MTL_REG_DSPCCTL,
+ reg, (reg & MTL_DSPCCTL_CPA) == value,
+ AVS_ADSPCS_INTERVAL_US, AVS_ADSPCS_TIMEOUT_US);
+ if (ret)
+ dev_err(adev->dev, "core_mask %d %sstall failed: %d\n",
+ core_mask, stall ? "" : "un", ret);
+ return ret;
+}
+
+static void avs_mtl_ipc_interrupt(struct avs_dev *adev)
+{
+ const struct avs_spec *spec = adev->spec;
+ u32 hipc_ack, hipc_rsp;
+
+ snd_hdac_adsp_updatel(adev, spec->hipc->ctl_offset,
+ AVS_ADSP_HIPCCTL_DONE | AVS_ADSP_HIPCCTL_BUSY, 0);
+
+ hipc_ack = snd_hdac_adsp_readl(adev, spec->hipc->ack_offset);
+ hipc_rsp = snd_hdac_adsp_readl(adev, spec->hipc->rsp_offset);
+
+ /* DSP acked host's request. */
+ if (hipc_ack & spec->hipc->ack_done_mask) {
+ complete(&adev->ipc->done_completion);
+
+ /* Tell DSP it has our attention. */
+ snd_hdac_adsp_updatel(adev, spec->hipc->ack_offset, spec->hipc->ack_done_mask,
+ spec->hipc->ack_done_mask);
+ }
+
+ /* DSP sent new response to process. */
+ if (hipc_rsp & spec->hipc->rsp_busy_mask) {
+ union avs_reply_msg msg;
+
+ msg.primary = snd_hdac_adsp_readl(adev, MTL_REG_HfIPCxTDR);
+ msg.ext.val = snd_hdac_adsp_readl(adev, MTL_REG_HfIPCxTDD);
+
+ avs_dsp_process_response(adev, msg.val);
+
+ /* Tell DSP we accepted its message. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfIPCxTDR,
+ MTL_HfIPCxTDR_BUSY, MTL_HfIPCxTDR_BUSY);
+ /* Ack this response. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfIPCxTDA, MTL_HfIPCxTDA_BUSY, 0);
+ }
+
+ snd_hdac_adsp_updatel(adev, spec->hipc->ctl_offset,
+ AVS_ADSP_HIPCCTL_DONE | AVS_ADSP_HIPCCTL_BUSY,
+ AVS_ADSP_HIPCCTL_DONE | AVS_ADSP_HIPCCTL_BUSY);
+}
+
+irqreturn_t avs_mtl_dsp_interrupt(struct avs_dev *adev)
+{
+ u32 adspis = snd_hdac_adsp_readl(adev, MTL_DWICTL_REG_FINALSTATUSL);
+ irqreturn_t ret = IRQ_NONE;
+
+ if (adspis == UINT_MAX)
+ return ret;
+
+ if (adspis & MTL_DWICTL_FINALSTATUSL_IPC) {
+ avs_mtl_ipc_interrupt(adev);
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+void avs_mtl_interrupt_control(struct avs_dev *adev, bool enable)
+{
+ if (enable) {
+ snd_hdac_adsp_updatel(adev, MTL_DWICTL_REG_INTENL, MTL_DWICTL_INTENL_IE,
+ MTL_DWICTL_INTENL_IE);
+ snd_hdac_adsp_updatew(adev, MTL_REG_HfHIPCIE, MTL_HfHIPCIE_IE, MTL_HfHIPCIE_IE);
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfIPCxCTL, AVS_ADSP_HIPCCTL_DONE,
+ AVS_ADSP_HIPCCTL_DONE);
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfIPCxCTL, AVS_ADSP_HIPCCTL_BUSY,
+ AVS_ADSP_HIPCCTL_BUSY);
+ } else {
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfIPCxCTL, AVS_ADSP_HIPCCTL_BUSY, 0);
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfIPCxCTL, AVS_ADSP_HIPCCTL_DONE, 0);
+ snd_hdac_adsp_updatew(adev, MTL_REG_HfHIPCIE, MTL_HfHIPCIE_IE, 0);
+ snd_hdac_adsp_updatel(adev, MTL_DWICTL_REG_INTENL, MTL_DWICTL_INTENL_IE, 0);
+ }
+}
diff --git a/sound/soc/intel/avs/path.c b/sound/soc/intel/avs/path.c
index cafb8c6198be..ed8f0ea0e10d 100644
--- a/sound/soc/intel/avs/path.c
+++ b/sound/soc/intel/avs/path.c
@@ -131,9 +131,9 @@ int avs_path_set_constraint(struct avs_dev *adev, struct avs_tplg_path_template
list_for_each_entry(path_template, &template->path_list, node)
i++;
- rlist = kcalloc(i, sizeof(rlist), GFP_KERNEL);
- clist = kcalloc(i, sizeof(clist), GFP_KERNEL);
- slist = kcalloc(i, sizeof(slist), GFP_KERNEL);
+ rlist = kcalloc(i, sizeof(*rlist), GFP_KERNEL);
+ clist = kcalloc(i, sizeof(*clist), GFP_KERNEL);
+ slist = kcalloc(i, sizeof(*slist), GFP_KERNEL);
i = 0;
list_for_each_entry(path_template, &template->path_list, node) {
@@ -282,8 +282,51 @@ avs_nhlt_config_or_default(struct avs_dev *adev, struct avs_tplg_module *t)
return &fmtcfg->config;
}
+static int avs_append_dma_cfg(struct avs_dev *adev, struct avs_copier_gtw_cfg *gtw,
+ struct avs_tplg_module *t, u32 dma_id, size_t *cfg_size)
+{
+ u32 dma_type = t->cfg_ext->copier.dma_type;
+ struct avs_dma_cfg *dma;
+ struct avs_tlv *tlv;
+ size_t tlv_size;
+
+ if (!avs_platattr_test(adev, ALTHDA))
+ return 0;
+
+ switch (dma_type) {
+ case AVS_DMA_HDA_HOST_OUTPUT:
+ case AVS_DMA_HDA_HOST_INPUT:
+ case AVS_DMA_HDA_LINK_OUTPUT:
+ case AVS_DMA_HDA_LINK_INPUT:
+ return 0;
+ default:
+ break;
+ }
+
+ tlv_size = sizeof(*tlv) + sizeof(*dma);
+ if (*cfg_size + tlv_size > AVS_MAILBOX_SIZE)
+ return -E2BIG;
+
+ /* DMA config is a TLV tailing the existing payload. */
+ tlv = (struct avs_tlv *)&gtw->config.blob[gtw->config_length];
+ tlv->type = AVS_GTW_DMA_CONFIG_ID;
+ tlv->length = sizeof(*dma);
+
+ dma = (struct avs_dma_cfg *)tlv->value;
+ memset(dma, 0, sizeof(*dma));
+ dma->dma_method = AVS_DMA_METHOD_HDA;
+ dma->pre_allocated = true;
+ dma->dma_channel_id = dma_id;
+ dma->stream_id = dma_id + 1;
+
+ gtw->config_length += tlv_size / sizeof(u32);
+ *cfg_size += tlv_size;
+
+ return 0;
+}
+
static int avs_fill_gtw_config(struct avs_dev *adev, struct avs_copier_gtw_cfg *gtw,
- struct avs_tplg_module *t, size_t *cfg_size)
+ struct avs_tplg_module *t, u32 dma_id, size_t *cfg_size)
{
struct acpi_nhlt_config *blob;
size_t gtw_size;
@@ -300,7 +343,7 @@ static int avs_fill_gtw_config(struct avs_dev *adev, struct avs_copier_gtw_cfg *
memcpy(gtw->config.blob, blob->capabilities, blob->capabilities_size);
*cfg_size += gtw_size;
- return 0;
+ return avs_append_dma_cfg(adev, gtw, t, dma_id, cfg_size);
}
static int avs_copier_create(struct avs_dev *adev, struct avs_path_module *mod)
@@ -317,7 +360,7 @@ static int avs_copier_create(struct avs_dev *adev, struct avs_path_module *mod)
dma_id = mod->owner->owner->dma_id;
cfg_size = offsetof(struct avs_copier_cfg, gtw_cfg.config);
- ret = avs_fill_gtw_config(adev, &cfg->gtw_cfg, t, &cfg_size);
+ ret = avs_fill_gtw_config(adev, &cfg->gtw_cfg, t, dma_id, &cfg_size);
if (ret)
return ret;
@@ -351,7 +394,7 @@ static int avs_whm_create(struct avs_dev *adev, struct avs_path_module *mod)
dma_id = mod->owner->owner->dma_id;
cfg_size = offsetof(struct avs_whm_cfg, gtw_cfg.config);
- ret = avs_fill_gtw_config(adev, &cfg->gtw_cfg, t, &cfg_size);
+ ret = avs_fill_gtw_config(adev, &cfg->gtw_cfg, t, dma_id, &cfg_size);
if (ret)
return ret;
@@ -524,7 +567,7 @@ static int avs_updown_mix_create(struct avs_dev *adev, struct avs_path_module *m
cfg.base.audio_fmt = *t->in_fmt;
cfg.out_channel_config = t->cfg_ext->updown_mix.out_channel_config;
cfg.coefficients_select = t->cfg_ext->updown_mix.coefficients_select;
- for (i = 0; i < AVS_CHANNELS_MAX; i++)
+ for (i = 0; i < AVS_COEFF_CHANNELS_MAX; i++)
cfg.coefficients[i] = t->cfg_ext->updown_mix.coefficients[i];
cfg.channel_map = t->cfg_ext->updown_mix.channel_map;
diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c
index d83ef504643b..405cfc1ab0cb 100644
--- a/sound/soc/intel/avs/pcm.c
+++ b/sound/soc/intel/avs/pcm.c
@@ -18,6 +18,7 @@
#include "path.h"
#include "pcm.h"
#include "topology.h"
+#include "utils.h"
#include "../../codecs/hda.h"
struct avs_dma_data {
@@ -36,6 +37,7 @@ struct avs_dma_data {
struct snd_pcm_hw_constraint_list sample_bits_list;
struct work_struct period_elapsed_work;
+ struct hdac_ext_link *link;
struct snd_pcm_substream *substream;
};
@@ -325,32 +327,75 @@ static const struct snd_soc_dai_ops avs_dai_nonhda_be_ops = {
.trigger = avs_dai_nonhda_be_trigger,
};
-static int avs_dai_hda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
+static int __avs_dai_hda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai,
+ struct hdac_ext_link *link)
{
- struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
struct hdac_ext_stream *link_stream;
struct avs_dma_data *data;
- struct hda_codec *codec;
int ret;
ret = avs_dai_startup(substream, dai);
if (ret)
return ret;
- codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev);
- link_stream = snd_hdac_ext_stream_assign(&codec->bus->core, substream,
+ data = snd_soc_dai_get_dma_data(dai, substream);
+ link_stream = snd_hdac_ext_stream_assign(&data->adev->base.core, substream,
HDAC_EXT_STREAM_TYPE_LINK);
if (!link_stream) {
avs_dai_shutdown(substream, dai);
return -EBUSY;
}
- data = snd_soc_dai_get_dma_data(dai, substream);
data->link_stream = link_stream;
- substream->runtime->private_data = link_stream;
+ data->link = link;
return 0;
}
+static int avs_dai_hda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
+{
+ struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
+ struct hdac_ext_link *link;
+ struct avs_dma_data *data;
+ struct hda_codec *codec;
+ int ret;
+
+ codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev);
+
+ link = snd_hdac_ext_bus_get_hlink_by_addr(&codec->bus->core, codec->core.addr);
+ if (!link)
+ return -EINVAL;
+
+ ret = __avs_dai_hda_be_startup(substream, dai, link);
+ if (!ret) {
+ data = snd_soc_dai_get_dma_data(dai, substream);
+ substream->runtime->private_data = data->link_stream;
+ }
+
+ return ret;
+}
+
+static int avs_dai_i2shda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
+{
+ struct avs_dev *adev = to_avs_dev(dai->component->dev);
+ struct hdac_ext_link *link;
+
+ link = snd_hdac_ext_bus_get_hlink_by_id(&adev->base.core, AZX_REG_ML_LEPTR_ID_INTEL_SSP);
+ if (!link)
+ return -EINVAL;
+ return __avs_dai_hda_be_startup(substream, dai, link);
+}
+
+static int avs_dai_dmichda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
+{
+ struct avs_dev *adev = to_avs_dev(dai->component->dev);
+ struct hdac_ext_link *link;
+
+ link = snd_hdac_ext_bus_get_hlink_by_id(&adev->base.core, AZX_REG_ML_LEPTR_ID_INTEL_DMIC);
+ if (!link)
+ return -EINVAL;
+ return __avs_dai_hda_be_startup(substream, dai, link);
+}
+
static void avs_dai_hda_be_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
{
struct avs_dma_data *data = snd_soc_dai_get_dma_data(dai, substream);
@@ -360,6 +405,14 @@ static void avs_dai_hda_be_shutdown(struct snd_pcm_substream *substream, struct
avs_dai_shutdown(substream, dai);
}
+static void avs_dai_althda_be_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
+{
+ struct avs_dma_data *data = snd_soc_dai_get_dma_data(dai, substream);
+
+ snd_hdac_ext_stream_release(data->link_stream, HDAC_EXT_STREAM_TYPE_LINK);
+ avs_dai_shutdown(substream, dai);
+}
+
static int avs_dai_hda_be_hw_params(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params *hw_params, struct snd_soc_dai *dai)
{
@@ -375,13 +428,8 @@ static int avs_dai_hda_be_hw_params(struct snd_pcm_substream *substream,
static int avs_dai_hda_be_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
{
- struct avs_dma_data *data;
- struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
struct hdac_ext_stream *link_stream;
- struct hdac_ext_link *link;
- struct hda_codec *codec;
-
- dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name);
+ struct avs_dma_data *data;
data = snd_soc_dai_get_dma_data(dai, substream);
if (!data->path)
@@ -393,27 +441,19 @@ static int avs_dai_hda_be_hw_free(struct snd_pcm_substream *substream, struct sn
data->path = NULL;
/* clear link <-> stream mapping */
- codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev);
- link = snd_hdac_ext_bus_get_hlink_by_addr(&codec->bus->core, codec->core.addr);
- if (!link)
- return -EINVAL;
-
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
- snd_hdac_ext_bus_link_clear_stream_id(link, hdac_stream(link_stream)->stream_tag);
+ snd_hdac_ext_bus_link_clear_stream_id(data->link,
+ hdac_stream(link_stream)->stream_tag);
return 0;
}
static int avs_dai_hda_be_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
{
- struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
struct snd_pcm_runtime *runtime = substream->runtime;
const struct snd_soc_pcm_stream *stream_info;
struct hdac_ext_stream *link_stream;
- struct hdac_ext_link *link;
struct avs_dma_data *data;
- struct hda_codec *codec;
- struct hdac_bus *bus;
unsigned int format_val;
unsigned int bits;
int ret;
@@ -424,23 +464,18 @@ static int avs_dai_hda_be_prepare(struct snd_pcm_substream *substream, struct sn
if (link_stream->link_prepared)
return 0;
- codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev);
- bus = &codec->bus->core;
stream_info = snd_soc_dai_get_pcm_stream(dai, substream->stream);
bits = snd_hdac_stream_format_bits(runtime->format, runtime->subformat,
stream_info->sig_bits);
format_val = snd_hdac_stream_format(runtime->channels, bits, runtime->rate);
- snd_hdac_ext_stream_decouple(bus, link_stream, true);
+ snd_hdac_ext_stream_decouple(&data->adev->base.core, link_stream, true);
snd_hdac_ext_stream_reset(link_stream);
snd_hdac_ext_stream_setup(link_stream, format_val);
- link = snd_hdac_ext_bus_get_hlink_by_addr(bus, codec->core.addr);
- if (!link)
- return -EINVAL;
-
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
- snd_hdac_ext_bus_link_set_stream_id(link, hdac_stream(link_stream)->stream_tag);
+ snd_hdac_ext_bus_link_set_stream_id(data->link,
+ hdac_stream(link_stream)->stream_tag);
ret = avs_dai_prepare(substream, dai);
if (ret)
@@ -515,6 +550,24 @@ static const struct snd_soc_dai_ops avs_dai_hda_be_ops = {
.trigger = avs_dai_hda_be_trigger,
};
+static const struct snd_soc_dai_ops avs_dai_i2shda_be_ops = {
+ .startup = avs_dai_i2shda_be_startup,
+ .shutdown = avs_dai_althda_be_shutdown,
+ .hw_params = avs_dai_hda_be_hw_params,
+ .hw_free = avs_dai_hda_be_hw_free,
+ .prepare = avs_dai_hda_be_prepare,
+ .trigger = avs_dai_hda_be_trigger,
+};
+
+static const struct snd_soc_dai_ops avs_dai_dmichda_be_ops = {
+ .startup = avs_dai_dmichda_be_startup,
+ .shutdown = avs_dai_althda_be_shutdown,
+ .hw_params = avs_dai_hda_be_hw_params,
+ .hw_free = avs_dai_hda_be_hw_free,
+ .prepare = avs_dai_hda_be_prepare,
+ .trigger = avs_dai_hda_be_trigger,
+};
+
static int hw_rule_param_size(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule)
{
struct snd_interval *interval = hw_param_interval(params, rule->var);
@@ -1314,7 +1367,7 @@ static int avs_component_construct(struct snd_soc_component *component,
return 0;
}
-static const struct snd_soc_component_driver avs_component_driver = {
+static struct snd_soc_component_driver avs_component_driver = {
.name = "avs-pcm",
.probe = avs_component_probe,
.remove = avs_component_remove,
@@ -1329,7 +1382,7 @@ static const struct snd_soc_component_driver avs_component_driver = {
};
int avs_soc_component_register(struct device *dev, const char *name,
- const struct snd_soc_component_driver *drv,
+ struct snd_soc_component_driver *drv,
struct snd_soc_dai_driver *cpu_dais, int num_cpu_dais)
{
struct avs_soc_component *acomp;
@@ -1347,13 +1400,14 @@ int avs_soc_component_register(struct device *dev, const char *name,
acomp->base.name = name;
INIT_LIST_HEAD(&acomp->node);
+ drv->use_dai_pcm_id = !obsolete_card_names;
+
return snd_soc_add_component(&acomp->base, cpu_dais, num_cpu_dais);
}
static struct snd_soc_dai_driver dmic_cpu_dais[] = {
{
.name = "DMIC Pin",
- .ops = &avs_dai_nonhda_be_ops,
.capture = {
.stream_name = "DMIC Rx",
.channels_min = 1,
@@ -1364,7 +1418,6 @@ static struct snd_soc_dai_driver dmic_cpu_dais[] = {
},
{
.name = "DMIC WoV Pin",
- .ops = &avs_dai_nonhda_be_ops,
.capture = {
.stream_name = "DMIC WoV Rx",
.channels_min = 1,
@@ -1377,15 +1430,23 @@ static struct snd_soc_dai_driver dmic_cpu_dais[] = {
int avs_dmic_platform_register(struct avs_dev *adev, const char *name)
{
+ const struct snd_soc_dai_ops *ops;
+
+ if (avs_platattr_test(adev, ALTHDA))
+ ops = &avs_dai_dmichda_be_ops;
+ else
+ ops = &avs_dai_nonhda_be_ops;
+
+ dmic_cpu_dais[0].ops = ops;
+ dmic_cpu_dais[1].ops = ops;
return avs_soc_component_register(adev->dev, name, &avs_component_driver, dmic_cpu_dais,
ARRAY_SIZE(dmic_cpu_dais));
}
static const struct snd_soc_dai_driver i2s_dai_template = {
- .ops = &avs_dai_nonhda_be_ops,
.playback = {
.channels_min = 1,
- .channels_max = 8,
+ .channels_max = AVS_CHANNELS_MAX,
.rates = SNDRV_PCM_RATE_8000_192000 |
SNDRV_PCM_RATE_12000 |
SNDRV_PCM_RATE_24000 |
@@ -1398,7 +1459,7 @@ static const struct snd_soc_dai_driver i2s_dai_template = {
},
.capture = {
.channels_min = 1,
- .channels_max = 8,
+ .channels_max = AVS_CHANNELS_MAX,
.rates = SNDRV_PCM_RATE_8000_192000 |
SNDRV_PCM_RATE_12000 |
SNDRV_PCM_RATE_24000 |
@@ -1415,10 +1476,15 @@ int avs_i2s_platform_register(struct avs_dev *adev, const char *name, unsigned l
unsigned long *tdms)
{
struct snd_soc_dai_driver *cpus, *dai;
+ const struct snd_soc_dai_ops *ops;
size_t ssp_count, cpu_count;
int i, j;
ssp_count = adev->hw_cfg.i2s_caps.ctrl_count;
+ if (avs_platattr_test(adev, ALTHDA))
+ ops = &avs_dai_i2shda_be_ops;
+ else
+ ops = &avs_dai_nonhda_be_ops;
cpu_count = 0;
for_each_set_bit(i, &port_mask, ssp_count)
@@ -1446,6 +1512,7 @@ int avs_i2s_platform_register(struct avs_dev *adev, const char *name, unsigned l
if (!dai->name || !dai->playback.stream_name || !dai->capture.stream_name)
return -ENOMEM;
+ dai->ops = ops;
dai++;
}
}
@@ -1454,7 +1521,7 @@ int avs_i2s_platform_register(struct avs_dev *adev, const char *name, unsigned l
goto plat_register;
for_each_set_bit(i, &port_mask, ssp_count) {
- for_each_set_bit(j, &tdms[i], ssp_count) {
+ for_each_set_bit(j, &tdms[i], AVS_CHANNELS_MAX) {
memcpy(dai, &i2s_dai_template, sizeof(*dai));
dai->name =
@@ -1466,6 +1533,7 @@ int avs_i2s_platform_register(struct avs_dev *adev, const char *name, unsigned l
if (!dai->name || !dai->playback.stream_name || !dai->capture.stream_name)
return -ENOMEM;
+ dai->ops = ops;
dai++;
}
}
@@ -1479,7 +1547,7 @@ static const struct snd_soc_dai_driver hda_cpu_dai = {
.ops = &avs_dai_hda_be_ops,
.playback = {
.channels_min = 1,
- .channels_max = 8,
+ .channels_max = AVS_CHANNELS_MAX,
.rates = SNDRV_PCM_RATE_8000_192000,
.formats = SNDRV_PCM_FMTBIT_S16_LE |
SNDRV_PCM_FMTBIT_S32_LE,
@@ -1489,7 +1557,7 @@ static const struct snd_soc_dai_driver hda_cpu_dai = {
},
.capture = {
.channels_min = 1,
- .channels_max = 8,
+ .channels_max = AVS_CHANNELS_MAX,
.rates = SNDRV_PCM_RATE_8000_192000,
.formats = SNDRV_PCM_FMTBIT_S16_LE |
SNDRV_PCM_FMTBIT_S32_LE,
@@ -1528,6 +1596,7 @@ static int avs_component_hda_probe(struct snd_soc_component *component)
struct snd_soc_dapm_context *dapm;
struct snd_soc_dai_driver *dais;
struct snd_soc_acpi_mach *mach;
+ struct avs_mach_pdata *pdata;
struct hda_codec *codec;
struct hda_pcm *pcm;
const char *cname;
@@ -1537,7 +1606,8 @@ static int avs_component_hda_probe(struct snd_soc_component *component)
if (!mach)
return -EINVAL;
- codec = mach->pdata;
+ pdata = mach->pdata;
+ codec = pdata->codec;
if (list_empty(&codec->pcm_list_head))
return -EINVAL;
list_for_each_entry(pcm, &codec->pcm_list_head, list)
@@ -1671,7 +1741,7 @@ static int avs_component_hda_open(struct snd_soc_component *component,
return 0;
}
-static const struct snd_soc_component_driver avs_hda_component_driver = {
+static struct snd_soc_component_driver avs_hda_component_driver = {
.name = "avs-hda-pcm",
.probe = avs_component_hda_probe,
.remove = avs_component_hda_remove,
diff --git a/sound/soc/intel/avs/probes.c b/sound/soc/intel/avs/probes.c
index f0b010956303..a42736b9aa55 100644
--- a/sound/soc/intel/avs/probes.c
+++ b/sound/soc/intel/avs/probes.c
@@ -284,7 +284,7 @@ static struct snd_soc_dai_driver probe_cpu_dais[] = {
},
};
-static const struct snd_soc_component_driver avs_probe_component_driver = {
+static struct snd_soc_component_driver avs_probe_component_driver = {
.name = "avs-probe-compr",
.compress_ops = &avs_probe_compress_ops,
.module_get_upon_open = 1, /* increment refcount when a stream is opened */
diff --git a/sound/soc/intel/avs/ptl.c b/sound/soc/intel/avs/ptl.c
new file mode 100644
index 000000000000..2be4b545c91d
--- /dev/null
+++ b/sound/soc/intel/avs/ptl.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright(c) 2024-2025 Intel Corporation
+ *
+ * Authors: Cezary Rojewski <cezary.rojewski@intel.com>
+ * Amadeusz Slawinski <amadeuszx.slawinski@linux.intel.com>
+ */
+
+#include <sound/hdaudio_ext.h>
+#include "avs.h"
+#include "registers.h"
+#include "trace.h"
+
+#define MTL_HfDSSGBL_BASE 0x1000
+#define MTL_REG_HfDSSCS (MTL_HfDSSGBL_BASE + 0x0)
+#define MTL_HfDSSCS_SPA BIT(16)
+#define MTL_HfDSSCS_CPA BIT(24)
+
+#define MTL_DSPCS_BASE 0x178D00
+#define MTL_REG_DSPCCTL (MTL_DSPCS_BASE + 0x4)
+#define MTL_DSPCCTL_OSEL GENMASK(25, 24)
+#define MTL_DSPCCTL_OSEL_HOST BIT(25)
+
+static int avs_ptl_core_power_on(struct avs_dev *adev)
+{
+ u32 reg;
+ int ret;
+
+ /* Power up DSP domain. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfDSSCS, MTL_HfDSSCS_SPA, MTL_HfDSSCS_SPA);
+ trace_avs_dsp_core_op(1, AVS_MAIN_CORE_MASK, "power dsp", true);
+
+ ret = snd_hdac_adsp_readl_poll(adev, MTL_REG_HfDSSCS, reg,
+ (reg & MTL_HfDSSCS_CPA) == MTL_HfDSSCS_CPA,
+ AVS_ADSPCS_INTERVAL_US, AVS_ADSPCS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adev->dev, "power on domain dsp failed: %d\n", ret);
+ return ret;
+ }
+
+ /* Prevent power gating of DSP domain. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfPWRCTL2, MTL_HfPWRCTL2_WPDSPHPxPG,
+ MTL_HfPWRCTL2_WPDSPHPxPG);
+ trace_avs_dsp_core_op(1, AVS_MAIN_CORE_MASK, "prevent dsp PG", true);
+
+ ret = snd_hdac_adsp_readl_poll(adev, MTL_REG_HfPWRSTS2, reg,
+ (reg & MTL_HfPWRSTS2_DSPHPxPGS) == MTL_HfPWRSTS2_DSPHPxPGS,
+ AVS_ADSPCS_INTERVAL_US, AVS_ADSPCS_TIMEOUT_US);
+
+ /* Set ownership to HOST. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_DSPCCTL, MTL_DSPCCTL_OSEL, MTL_DSPCCTL_OSEL_HOST);
+ return ret;
+}
+
+static int avs_ptl_core_power_off(struct avs_dev *adev)
+{
+ u32 reg;
+
+ /* Allow power gating of DSP domain. No STS polling as HOST is only one of its users. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfPWRCTL2, MTL_HfPWRCTL2_WPDSPHPxPG, 0);
+ trace_avs_dsp_core_op(0, AVS_MAIN_CORE_MASK, "allow dsp pg", false);
+
+ /* Power down DSP domain. */
+ snd_hdac_adsp_updatel(adev, MTL_REG_HfDSSCS, MTL_HfDSSCS_SPA, 0);
+ trace_avs_dsp_core_op(0, AVS_MAIN_CORE_MASK, "power dsp", false);
+
+ return snd_hdac_adsp_readl_poll(adev, MTL_REG_HfDSSCS, reg,
+ (reg & MTL_HfDSSCS_CPA) == 0,
+ AVS_ADSPCS_INTERVAL_US, AVS_ADSPCS_TIMEOUT_US);
+}
+
+static int avs_ptl_core_power(struct avs_dev *adev, u32 core_mask, bool power)
+{
+ core_mask &= AVS_MAIN_CORE_MASK;
+ if (!core_mask)
+ return 0;
+
+ if (power)
+ return avs_ptl_core_power_on(adev);
+ return avs_ptl_core_power_off(adev);
+}
+
+const struct avs_dsp_ops avs_ptl_dsp_ops = {
+ .power = avs_ptl_core_power,
+ .reset = avs_mtl_core_reset,
+ .stall = avs_lnl_core_stall,
+ .dsp_interrupt = avs_mtl_dsp_interrupt,
+ .int_control = avs_mtl_interrupt_control,
+ .load_basefw = avs_hda_load_basefw,
+ .load_lib = avs_hda_load_library,
+ .transfer_mods = avs_hda_transfer_modules,
+ .log_buffer_offset = avs_icl_log_buffer_offset,
+ .log_buffer_status = avs_apl_log_buffer_status,
+ .coredump = avs_apl_coredump,
+ .d0ix_toggle = avs_icl_d0ix_toggle,
+ .set_d0ix = avs_icl_set_d0ix,
+ AVS_SET_ENABLE_LOGS_OP(icl)
+};
diff --git a/sound/soc/intel/avs/registers.h b/sound/soc/intel/avs/registers.h
index 368ede05f2cd..97767882ffa1 100644
--- a/sound/soc/intel/avs/registers.h
+++ b/sound/soc/intel/avs/registers.h
@@ -35,6 +35,8 @@
#define AVS_ADSPCS_CSTALL_MASK(cm) ((cm) << 8)
#define AVS_ADSPCS_SPA_MASK(cm) ((cm) << 16)
#define AVS_ADSPCS_CPA_MASK(cm) ((cm) << 24)
+#define AVS_ADSPCS_INTERVAL_US 500
+#define AVS_ADSPCS_TIMEOUT_US 10000
#define AVS_MAIN_CORE_MASK BIT(0)
#define AVS_ADSP_HIPCCTL_BUSY BIT(0)
@@ -67,14 +69,50 @@
#define CNL_ADSP_HIPCIDR_BUSY BIT(31)
#define CNL_ADSP_HIPCIDA_DONE BIT(31)
+/* MTL Intel HOST Inter-Processor Communication Registers */
+#define MTL_HfIPC_BASE 0x73000
+#define MTL_REG_HfIPCxTDR (MTL_HfIPC_BASE + 0x200)
+#define MTL_REG_HfIPCxTDA (MTL_HfIPC_BASE + 0x204)
+#define MTL_REG_HfIPCxIDR (MTL_HfIPC_BASE + 0x210)
+#define MTL_REG_HfIPCxIDA (MTL_HfIPC_BASE + 0x214)
+#define MTL_REG_HfIPCxCTL (MTL_HfIPC_BASE + 0x228)
+#define MTL_REG_HfIPCxTDD (MTL_HfIPC_BASE + 0x300)
+#define MTL_REG_HfIPCxIDD (MTL_HfIPC_BASE + 0x380)
+
+#define MTL_HfIPCxTDR_BUSY BIT(31)
+#define MTL_HfIPCxTDA_BUSY BIT(31)
+#define MTL_HfIPCxIDR_BUSY BIT(31)
+#define MTL_HfIPCxIDA_DONE BIT(31)
+
+#define MTL_HfFLV_BASE 0x162000
+#define MTL_REG_HfFLGP(x, y) (MTL_HfFLV_BASE + 0x1200 + (x) * 0x20 + (y) * 0x08)
+#define LNL_REG_HfDFR(x) (0x160200 + (x) * 0x8)
+
+#define MTL_DWICTL_BASE 0x1800
+#define MTL_DWICTL_REG_INTENL (MTL_DWICTL_BASE + 0x0)
+#define MTL_DWICTL_REG_FINALSTATUSL (MTL_DWICTL_BASE + 0x30)
+
+#define MTL_HfPMCCU_BASE 0x1D00
+#define MTL_REG_HfCLKCTL (MTL_HfPMCCU_BASE + 0x10)
+#define MTL_REG_HfPWRCTL (MTL_HfPMCCU_BASE + 0x18)
+#define MTL_REG_HfPWRSTS (MTL_HfPMCCU_BASE + 0x1C)
+#define MTL_REG_HfPWRCTL2 (MTL_HfPMCCU_BASE + 0x20)
+#define MTL_REG_HfPWRSTS2 (MTL_HfPMCCU_BASE + 0x24)
+#define MTL_HfPWRCTL_WPDSPHPxPG BIT(0)
+#define MTL_HfPWRSTS_DSPHPxPGS BIT(0)
+#define MTL_HfPWRCTL2_WPDSPHPxPG BIT(0)
+#define MTL_HfPWRSTS2_DSPHPxPGS BIT(0)
+
/* Intel HD Audio SRAM windows base addresses */
#define SKL_ADSP_SRAM_BASE_OFFSET 0x8000
#define SKL_ADSP_SRAM_WINDOW_SIZE 0x2000
#define APL_ADSP_SRAM_BASE_OFFSET 0x80000
#define APL_ADSP_SRAM_WINDOW_SIZE 0x20000
+#define MTL_ADSP_SRAM_BASE_OFFSET 0x180000
+#define MTL_ADSP_SRAM_WINDOW_SIZE 0x8000
/* Constants used when accessing SRAM, space shared with firmware */
-#define AVS_FW_REG_BASE(adev) ((adev)->spec->sram->base_offset)
+#define AVS_FW_REG_BASE(adev) ((adev)->spec->hipc->sts_offset)
#define AVS_FW_REG_STATUS(adev) (AVS_FW_REG_BASE(adev) + 0x0)
#define AVS_FW_REG_ERROR(adev) (AVS_FW_REG_BASE(adev) + 0x4)
diff --git a/sound/soc/intel/avs/tgl.c b/sound/soc/intel/avs/tgl.c
index 56905f2b9eb2..9dbb3ad0954a 100644
--- a/sound/soc/intel/avs/tgl.c
+++ b/sound/soc/intel/avs/tgl.c
@@ -47,7 +47,7 @@ static int avs_tgl_config_basefw(struct avs_dev *adev)
#ifdef CONFIG_X86
unsigned int ecx;
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
ecx = cpuid_ecx(CPUID_TSC_LEAF);
if (ecx) {
ret = avs_ipc_set_fw_config(adev, 1, AVS_FW_CFG_XTAL_FREQ_HZ, sizeof(ecx), &ecx);
diff --git a/sound/soc/intel/avs/topology.c b/sound/soc/intel/avs/topology.c
index 3c222c352701..f2e4ad8b8e14 100644
--- a/sound/soc/intel/avs/topology.c
+++ b/sound/soc/intel/avs/topology.c
@@ -1668,8 +1668,8 @@ static int avs_widget_load(struct snd_soc_component *comp, int index,
/* See parse_link_formatted_string() for dynamic naming when(s). */
if (avs_mach_singular_tdm(mach, ssp_port)) {
- /* size is based on possible %d -> SSP:TDM, where SSP and TDM < 10 + '\0' */
- size_t size = strlen(dw->name) + 2;
+ /* size is based on possible %d -> SSP:TDM, where SSP and TDM < 16 + '\0' */
+ size_t size = strlen(dw->name) + 3;
char *buf;
tdm_slot = avs_mach_ssp_tdm(mach, ssp_port);
diff --git a/sound/soc/intel/avs/topology.h b/sound/soc/intel/avs/topology.h
index 304880997717..f5601a4e3ec8 100644
--- a/sound/soc/intel/avs/topology.h
+++ b/sound/soc/intel/avs/topology.h
@@ -87,7 +87,7 @@ struct avs_tplg_modcfg_ext {
struct {
u32 out_channel_config;
u32 coefficients_select;
- s32 coefficients[AVS_CHANNELS_MAX];
+ s32 coefficients[AVS_COEFF_CHANNELS_MAX];
u32 channel_map;
} updown_mix;
struct {
diff --git a/sound/soc/intel/avs/utils.h b/sound/soc/intel/avs/utils.h
index 5ee569c39380..955a40d2c30c 100644
--- a/sound/soc/intel/avs/utils.h
+++ b/sound/soc/intel/avs/utils.h
@@ -11,6 +11,16 @@
#include <sound/soc-acpi.h>
+extern bool obsolete_card_names;
+
+struct avs_mach_pdata {
+ struct hda_codec *codec;
+ unsigned long *tdms;
+ char *codec_name; /* DMIC only */
+
+ bool obsolete_card_names;
+};
+
static inline bool avs_mach_singular_ssp(struct snd_soc_acpi_mach *mach)
{
return hweight_long(mach->mach_params.i2s_link_mask) == 1;
@@ -23,14 +33,16 @@ static inline u32 avs_mach_ssp_port(struct snd_soc_acpi_mach *mach)
static inline bool avs_mach_singular_tdm(struct snd_soc_acpi_mach *mach, u32 port)
{
- unsigned long *tdms = mach->pdata;
+ struct avs_mach_pdata *pdata = mach->pdata;
+ unsigned long *tdms = pdata->tdms;
return !tdms || (hweight_long(tdms[port]) == 1);
}
static inline u32 avs_mach_ssp_tdm(struct snd_soc_acpi_mach *mach, u32 port)
{
- unsigned long *tdms = mach->pdata;
+ struct avs_mach_pdata *pdata = mach->pdata;
+ unsigned long *tdms = pdata->tdms;
return tdms ? __ffs(tdms[port]) : 0;
}
diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig
index 9b80b19bb8d0..2df7afa2f469 100644
--- a/sound/soc/intel/boards/Kconfig
+++ b/sound/soc/intel/boards/Kconfig
@@ -252,14 +252,6 @@ config SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH
endif ## SND_SST_ATOM_HIFI2_PLATFORM
-config SND_SOC_INTEL_DA7219_MAX98357A_GENERIC
- tristate
- select SND_SOC_DA7219
- select SND_SOC_MAX98357A
- select SND_SOC_DMIC
- select SND_SOC_HDAC_HDMI
- select SND_SOC_INTEL_HDA_DSP_COMMON
-
if SND_SOC_SOF_APOLLOLAKE
config SND_SOC_INTEL_SOF_WM8804_MACH
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index 095d08b3fc82..81a914bd7ec2 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -759,6 +759,24 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
SOF_BT_OFFLOAD_SSP(2) |
SOF_SSP_BT_OFFLOAD_PRESENT),
},
+ /* Wildcatlake devices*/
+ {
+ .callback = sof_sdw_quirk_cb,
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_wclrvp"),
+ },
+ .driver_data = (void *)(SOC_SDW_PCH_DMIC),
+ },
+ {
+ .callback = sof_sdw_quirk_cb,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Google"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Ocelot"),
+ },
+ .driver_data = (void *)(SOC_SDW_PCH_DMIC |
+ SOF_BT_OFFLOAD_SSP(2) |
+ SOF_SSP_BT_OFFLOAD_PRESENT),
+ },
{}
};
@@ -780,13 +798,6 @@ static void sof_sdw_check_ssid_quirk(const struct snd_soc_acpi_mach *mach)
sof_sdw_quirk = quirk_entry->value;
}
-static struct snd_soc_dai_link_component platform_component[] = {
- {
- /* name might be overridden during probe */
- .name = "0000:00:1f.3"
- }
-};
-
static const struct snd_soc_ops sdw_ops = {
.startup = asoc_sdw_startup,
.prepare = asoc_sdw_prepare,
@@ -836,6 +847,7 @@ static int create_sdw_dailink(struct snd_soc_card *card,
struct snd_soc_dai_link_ch_map *codec_maps;
struct snd_soc_dai_link_component *codecs;
struct snd_soc_dai_link_component *cpus;
+ struct snd_soc_dai_link_component *platform;
int num_cpus = hweight32(sof_dai->link_mask[stream]);
int num_codecs = sof_dai->num_devs[stream];
int playback, capture;
@@ -876,6 +888,10 @@ static int create_sdw_dailink(struct snd_soc_card *card,
if (!codecs)
return -ENOMEM;
+ platform = devm_kzalloc(dev, sizeof(*platform), GFP_KERNEL);
+ if (!platform)
+ return -ENOMEM;
+
codec_maps = devm_kcalloc(dev, num_codecs, sizeof(*codec_maps), GFP_KERNEL);
if (!codec_maps)
return -ENOMEM;
@@ -917,8 +933,7 @@ static int create_sdw_dailink(struct snd_soc_card *card,
capture = (stream == SNDRV_PCM_STREAM_CAPTURE);
asoc_sdw_init_dai_link(dev, *dai_links, be_id, name, playback, capture,
- cpus, num_cpus, platform_component,
- ARRAY_SIZE(platform_component), codecs, num_codecs,
+ cpus, num_cpus, platform, 1, codecs, num_codecs,
1, asoc_sdw_rtd_init, &sdw_ops);
/*
@@ -994,8 +1009,7 @@ static int create_ssp_dailinks(struct snd_soc_card *card,
ret = asoc_sdw_init_simple_dai_link(dev, *dai_links, be_id, name,
playback, capture, cpu_dai_name,
- platform_component->name,
- ARRAY_SIZE(platform_component), codec_name,
+ "dummy", codec_name,
ssp_info->dais[0].dai_name, 1, NULL,
ssp_info->ops);
if (ret)
@@ -1019,8 +1033,7 @@ static int create_dmic_dailinks(struct snd_soc_card *card,
ret = asoc_sdw_init_simple_dai_link(dev, *dai_links, be_id, "dmic01",
0, 1, // DMIC only supports capture
- "DMIC01 Pin", platform_component->name,
- ARRAY_SIZE(platform_component),
+ "DMIC01 Pin", "dummy",
"dmic-codec", "dmic-hifi", 1,
asoc_sdw_dmic_init, NULL);
if (ret)
@@ -1030,8 +1043,7 @@ static int create_dmic_dailinks(struct snd_soc_card *card,
ret = asoc_sdw_init_simple_dai_link(dev, *dai_links, be_id, "dmic16k",
0, 1, // DMIC only supports capture
- "DMIC16k Pin", platform_component->name,
- ARRAY_SIZE(platform_component),
+ "DMIC16k Pin", "dummy",
"dmic-codec", "dmic-hifi", 1,
/* don't call asoc_sdw_dmic_init() twice */
NULL, NULL);
@@ -1074,8 +1086,7 @@ static int create_hdmi_dailinks(struct snd_soc_card *card,
ret = asoc_sdw_init_simple_dai_link(dev, *dai_links, be_id, name,
1, 0, // HDMI only supports playback
- cpu_dai_name, platform_component->name,
- ARRAY_SIZE(platform_component),
+ cpu_dai_name, "dummy",
codec_name, codec_dai_name, 1,
i == 0 ? sof_sdw_hdmi_init : NULL, NULL);
if (ret)
@@ -1101,8 +1112,7 @@ static int create_bt_dailinks(struct snd_soc_card *card,
int ret;
ret = asoc_sdw_init_simple_dai_link(dev, *dai_links, be_id, name,
- 1, 1, cpu_dai_name, platform_component->name,
- ARRAY_SIZE(platform_component),
+ 1, 1, cpu_dai_name, "dummy",
snd_soc_dummy_dlc.name, snd_soc_dummy_dlc.dai_name,
1, NULL, NULL);
if (ret)
diff --git a/sound/soc/intel/common/Makefile b/sound/soc/intel/common/Makefile
index 0afd114be9e5..7822bcae6c69 100644
--- a/sound/soc/intel/common/Makefile
+++ b/sound/soc/intel/common/Makefile
@@ -12,7 +12,7 @@ snd-soc-acpi-intel-match-y := soc-acpi-intel-byt-match.o soc-acpi-intel-cht-matc
soc-acpi-intel-lnl-match.o \
soc-acpi-intel-ptl-match.o \
soc-acpi-intel-hda-match.o \
- soc-acpi-intel-sdw-mockup-match.o
+ soc-acpi-intel-sdw-mockup-match.o sof-function-topology-lib.o
snd-soc-acpi-intel-match-y += soc-acpi-intel-ssp-common.o
diff --git a/sound/soc/intel/common/soc-acpi-intel-arl-match.c b/sound/soc/intel/common/soc-acpi-intel-arl-match.c
index 32147dc9d2d6..73e581e93755 100644
--- a/sound/soc/intel/common/soc-acpi-intel-arl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-arl-match.c
@@ -8,6 +8,7 @@
#include <sound/soc-acpi.h>
#include <sound/soc-acpi-intel-match.h>
#include <sound/soc-acpi-intel-ssp-common.h>
+#include "sof-function-topology-lib.h"
static const struct snd_soc_acpi_endpoint single_endpoint = {
.num = 0,
@@ -436,42 +437,49 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = {
.links = arl_cs42l43_l0_cs35l56_l23,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-arl-cs42l43-l0-cs35l56-l23.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(0) | BIT(2) | BIT(3),
.links = arl_cs42l43_l0_cs35l56_2_l23,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-arl-cs42l43-l0-cs35l56-l23.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(0) | BIT(2) | BIT(3),
.links = arl_cs42l43_l0_cs35l56_3_l23,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-arl-cs42l43-l0-cs35l56-l23.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(0) | BIT(2),
.links = arl_cs42l43_l0_cs35l56_l2,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-arl-cs42l43-l0-cs35l56-l2.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(0),
.links = arl_cs42l43_l0,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-arl-cs42l43-l0.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(2),
.links = arl_cs42l43_l2,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-arl-cs42l43-l2.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(2) | BIT(3),
.links = arl_cs42l43_l2_cs35l56_l3,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-arl-cs42l43-l2-cs35l56-l3.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = 0x1, /* link0 required */
@@ -490,6 +498,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = {
.links = arl_rt722_l0_rt1320_l2,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-arl-rt722-l0_rt1320-l2.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{},
};
diff --git a/sound/soc/intel/common/soc-acpi-intel-lnl-match.c b/sound/soc/intel/common/soc-acpi-intel-lnl-match.c
index e04f6de746eb..558dc4c91239 100644
--- a/sound/soc/intel/common/soc-acpi-intel-lnl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-lnl-match.c
@@ -8,6 +8,7 @@
#include <sound/soc-acpi.h>
#include <sound/soc-acpi-intel-match.h>
+#include "sof-function-topology-lib.h"
#include "soc-acpi-intel-sdca-quirks.h"
#include "soc-acpi-intel-sdw-mockup-match.h"
@@ -136,7 +137,7 @@ static const struct snd_soc_acpi_endpoint jack_dmic_endpoints[] = {
},
};
-static const struct snd_soc_acpi_endpoint jack_amp_g1_dmic_endpoints_endpoints[] = {
+static const struct snd_soc_acpi_endpoint jack_amp_g1_dmic_endpoints[] = {
/* Jack Endpoint */
{
.num = 0,
@@ -349,8 +350,8 @@ static const struct snd_soc_acpi_adr_device rt1712_3_single_adr[] = {
static const struct snd_soc_acpi_adr_device rt712_vb_2_group1_adr[] = {
{
.adr = 0x000230025D071201ull,
- .num_endpoints = ARRAY_SIZE(jack_amp_g1_dmic_endpoints_endpoints),
- .endpoints = jack_amp_g1_dmic_endpoints_endpoints,
+ .num_endpoints = ARRAY_SIZE(jack_amp_g1_dmic_endpoints),
+ .endpoints = jack_amp_g1_dmic_endpoints,
.name_prefix = "rt712"
}
};
@@ -712,6 +713,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_lnl_sdw_machines[] = {
.links = lnl_cs42l43_l0,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-lnl-cs42l43-l0.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(0),
@@ -730,6 +732,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_lnl_sdw_machines[] = {
.links = lnl_rt722_only,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-lnl-rt722-l0.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = GENMASK(2, 0),
@@ -748,14 +751,16 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_lnl_sdw_machines[] = {
.links = lnl_sdw_rt712_vb_l2_rt1320_l1,
.drv_name = "sof_sdw",
.machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb,
- .sof_tplg_filename = "sof-lnl-rt712-l2-rt1320-l1.tplg"
+ .sof_tplg_filename = "sof-lnl-rt712-l2-rt1320-l1.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(1) | BIT(2) | BIT(3),
.links = lnl_sdw_rt713_vb_l2_rt1320_l13,
.drv_name = "sof_sdw",
.machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb,
- .sof_tplg_filename = "sof-lnl-rt713-l2-rt1320-l13.tplg"
+ .sof_tplg_filename = "sof-lnl-rt713-l2-rt1320-l13.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{},
};
diff --git a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c
index 9e611e3667ad..75dc8935a794 100644
--- a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c
@@ -11,6 +11,7 @@
#include <sound/soc-acpi.h>
#include <sound/soc-acpi-intel-match.h>
#include <sound/soc-acpi-intel-ssp-common.h>
+#include "sof-function-topology-lib.h"
#include "soc-acpi-intel-sdca-quirks.h"
#include "soc-acpi-intel-sdw-mockup-match.h"
@@ -729,6 +730,24 @@ static const struct snd_soc_acpi_adr_device cs35l56_3_l_adr[] = {
}
};
+static const struct snd_soc_acpi_adr_device cs35l63_1_fb_adr[] = {
+ {
+ .adr = 0x00013001FA356301ull,
+ .num_endpoints = ARRAY_SIZE(cs35l56_l_fb_endpoints),
+ .endpoints = cs35l56_l_fb_endpoints,
+ .name_prefix = "AMP1"
+ },
+};
+
+static const struct snd_soc_acpi_adr_device cs35l63_3_fb_adr[] = {
+ {
+ .adr = 0x00033101FA356301ull,
+ .num_endpoints = ARRAY_SIZE(cs35l56_r_fb_endpoints),
+ .endpoints = cs35l56_r_fb_endpoints,
+ .name_prefix = "AMP2"
+ },
+};
+
static const struct snd_soc_acpi_link_adr rt5682_link2_max98373_link0[] = {
/* Expected order: jack -> amp */
{
@@ -1026,6 +1045,20 @@ static const struct snd_soc_acpi_link_adr mtl_cs35l56_x8_link0_link1_fb[] = {
{}
};
+static const struct snd_soc_acpi_link_adr mtl_cs35l63_x2_link1_link3_fb[] = {
+ {
+ .mask = BIT(3),
+ .num_adr = ARRAY_SIZE(cs35l63_3_fb_adr),
+ .adr_d = cs35l63_3_fb_adr,
+ },
+ {
+ .mask = BIT(1),
+ .num_adr = ARRAY_SIZE(cs35l63_1_fb_adr),
+ .adr_d = cs35l63_1_fb_adr,
+ },
+ {}
+};
+
/* this table is used when there is no I2S codec present */
struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_sdw_machines[] = {
/* mockup tests need to be first */
@@ -1083,12 +1116,14 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_sdw_machines[] = {
.drv_name = "sof_sdw",
.machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb,
.sof_tplg_filename = "sof-mtl-rt712-vb-l0.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(0),
.links = mtl_712_l0,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-mtl-rt712-l0.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = GENMASK(2, 0),
@@ -1101,30 +1136,41 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_sdw_machines[] = {
.links = cs42l43_link0_cs35l56_link2_link3,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-mtl-cs42l43-l0-cs35l56-l23.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(0) | BIT(1) | BIT(3),
.links = cs42l43_link3_cs35l56_x4_link0_link1_spkagg,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-mtl-cs42l43-l3-cs35l56-l01-spkagg.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = GENMASK(2, 0),
.links = mtl_cs42l43_cs35l56,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-mtl-cs42l43-l0-cs35l56-l12.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(0) | BIT(1),
.links = mtl_cs35l56_x8_link0_link1_fb,
.drv_name = "sof_sdw",
- .sof_tplg_filename = "sof-mtl-cs35l56-l01-fb8.tplg"
+ .sof_tplg_filename = "sof-mtl-cs35l56-l01-fb8.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(0),
.links = mtl_cs42l43_l0,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-mtl-cs42l43-l0.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
+ },
+ {
+ .link_mask = BIT(1) | BIT(3),
+ .links = mtl_cs35l63_x2_link1_link3_fb,
+ .drv_name = "sof_sdw",
+ .sof_tplg_filename = "sof-mtl-cs35l56-l01-fb8.tplg",
},
{
.link_mask = GENMASK(3, 0),
@@ -1143,6 +1189,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_sdw_machines[] = {
.links = mtl_rt722_only,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-mtl-rt722-l0.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(0),
diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c
index c599eb43eeb1..eae75f3f0fa4 100644
--- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c
@@ -4,10 +4,14 @@
*
* Copyright (c) 2024, Intel Corporation.
*
+ * Order of entries in snd_soc_acpi_intel_ptl_sdw_machines[] matters.
+ * Check subset of link mask when matching the machine driver, rule is
+ * superset match should be ordered before subset matches.
*/
#include <sound/soc-acpi.h>
#include <sound/soc-acpi-intel-match.h>
+#include "sof-function-topology-lib.h"
#include "soc-acpi-intel-sdca-quirks.h"
#include "soc-acpi-intel-sdw-mockup-match.h"
#include <sound/soc-acpi-intel-ssp-common.h>
@@ -134,7 +138,7 @@ static const struct snd_soc_acpi_endpoint jack_dmic_endpoints[] = {
},
};
-static const struct snd_soc_acpi_endpoint jack_amp_g1_dmic_endpoints_endpoints[] = {
+static const struct snd_soc_acpi_endpoint jack_amp_g1_dmic_endpoints[] = {
/* Jack Endpoint */
{
.num = 0,
@@ -284,8 +288,8 @@ static const struct snd_soc_acpi_adr_device rt711_sdca_0_adr[] = {
static const struct snd_soc_acpi_adr_device rt712_vb_2_group1_adr[] = {
{
.adr = 0x000230025D071201ull,
- .num_endpoints = ARRAY_SIZE(jack_amp_g1_dmic_endpoints_endpoints),
- .endpoints = jack_amp_g1_dmic_endpoints_endpoints,
+ .num_endpoints = ARRAY_SIZE(jack_amp_g1_dmic_endpoints),
+ .endpoints = jack_amp_g1_dmic_endpoints,
.name_prefix = "rt712"
}
};
@@ -293,8 +297,8 @@ static const struct snd_soc_acpi_adr_device rt712_vb_2_group1_adr[] = {
static const struct snd_soc_acpi_adr_device rt712_vb_3_group1_adr[] = {
{
.adr = 0x000330025D071201ull,
- .num_endpoints = ARRAY_SIZE(jack_amp_g1_dmic_endpoints_endpoints),
- .endpoints = jack_amp_g1_dmic_endpoints_endpoints,
+ .num_endpoints = ARRAY_SIZE(jack_amp_g1_dmic_endpoints),
+ .endpoints = jack_amp_g1_dmic_endpoints,
.name_prefix = "rt712"
}
};
@@ -317,6 +321,15 @@ static const struct snd_soc_acpi_adr_device rt713_vb_3_adr[] = {
}
};
+static const struct snd_soc_acpi_adr_device rt1320_3_group1_adr[] = {
+ {
+ .adr = 0x000330025D132001ull,
+ .num_endpoints = 1,
+ .endpoints = &spk_r_endpoint,
+ .name_prefix = "rt1320-1"
+ }
+};
+
static const struct snd_soc_acpi_adr_device rt721_3_single_adr[] = {
{
.adr = 0x000330025d072101ull,
@@ -537,9 +550,23 @@ static const struct snd_soc_acpi_link_adr ptl_sdw_rt712_vb_l3_rt1320_l2[] = {
{}
};
+static const struct snd_soc_acpi_link_adr ptl_sdw_rt712_vb_l3_rt1320_l3[] = {
+ {
+ .mask = BIT(3),
+ .num_adr = ARRAY_SIZE(rt712_vb_3_group1_adr),
+ .adr_d = rt712_vb_3_group1_adr,
+ },
+ {
+ .mask = BIT(3),
+ .num_adr = ARRAY_SIZE(rt1320_3_group1_adr),
+ .adr_d = rt1320_3_group1_adr,
+ },
+ {}
+};
+
/* this table is used when there is no I2S codec present */
struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[] = {
- /* mockup tests need to be first */
+/* Order Priority: mockup > most links > most bit link-mask > alphabetical */
{
.link_mask = GENMASK(3, 0),
.links = sdw_mockup_headset_2amps_mic,
@@ -559,80 +586,97 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[] = {
.sof_tplg_filename = "sof-ptl-rt715-rt711-rt1308-mono.tplg",
},
{
+ .link_mask = BIT(0),
+ .links = sdw_mockup_multi_func,
+ .drv_name = "sof_sdw",
+ .sof_tplg_filename = "sof-ptl-rt722.tplg", /* Reuse the existing tplg file */
+ },
+ {
.link_mask = BIT(1) | BIT(2) | BIT(3),
- .links = ptl_cs42l43_l2_cs35l56x6_l13,
+ .links = ptl_sdw_rt713_vb_l2_rt1320_l13,
.drv_name = "sof_sdw",
- .sof_tplg_filename = "sof-ptl-cs42l43-l2-cs35l56x6-l13.tplg",
+ .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb,
+ .sof_tplg_filename = "sof-ptl-rt713-l2-rt1320-l13.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
- .link_mask = BIT(0),
- .links = sdw_mockup_multi_func,
+ .link_mask = BIT(1) | BIT(2) | BIT(3),
+ .links = ptl_sdw_rt713_vb_l3_rt1320_l12,
.drv_name = "sof_sdw",
- .sof_tplg_filename = "sof-ptl-rt722.tplg", /* Reuse the existing tplg file */
+ .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb,
+ .sof_tplg_filename = "sof-ptl-rt713-l3-rt1320-l12.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
- .link_mask = BIT(0),
- .links = ptl_rvp,
+ .link_mask = BIT(1) | BIT(2) | BIT(3),
+ .links = ptl_cs42l43_l2_cs35l56x6_l13,
.drv_name = "sof_sdw",
- .sof_tplg_filename = "sof-ptl-rt711.tplg",
+ .sof_tplg_filename = "sof-ptl-cs42l43-l2-cs35l56x6-l13.tplg",
},
{
- .link_mask = BIT(3),
- .links = ptl_cs42l43_l3,
+ .link_mask = BIT(1) | BIT(2),
+ .links = ptl_sdw_rt712_vb_l2_rt1320_l1,
.drv_name = "sof_sdw",
- .sof_tplg_filename = "sof-ptl-cs42l43-l3.tplg",
+ .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb,
+ .sof_tplg_filename = "sof-ptl-rt712-l2-rt1320-l1.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
- .link_mask = BIT(3),
- .links = ptl_rt721_l3,
+ .link_mask = BIT(2) | BIT(3),
+ .links = ptl_sdw_rt712_vb_l3_rt1320_l2,
.drv_name = "sof_sdw",
- .sof_tplg_filename = "sof-ptl-rt721.tplg",
+ .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb,
+ .sof_tplg_filename = "sof-ptl-rt712-l3-rt1320-l2.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
+ },
+ {
+ .link_mask = BIT(0),
+ .links = ptl_rvp,
+ .drv_name = "sof_sdw",
+ .sof_tplg_filename = "sof-ptl-rt711.tplg",
},
{
.link_mask = BIT(0),
.links = ptl_rt722_only,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-ptl-rt722.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(1),
.links = ptl_rt722_l1,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-ptl-rt722.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
.link_mask = BIT(3),
- .links = ptl_rt722_l3,
- .drv_name = "sof_sdw",
- .sof_tplg_filename = "sof-ptl-rt722.tplg",
- },
- {
- .link_mask = BIT(1) | BIT(2),
- .links = ptl_sdw_rt712_vb_l2_rt1320_l1,
+ .links = ptl_cs42l43_l3,
.drv_name = "sof_sdw",
- .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb,
- .sof_tplg_filename = "sof-ptl-rt712-l2-rt1320-l1.tplg"
+ .sof_tplg_filename = "sof-ptl-cs42l43-l3.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
- .link_mask = BIT(2) | BIT(3),
- .links = ptl_sdw_rt712_vb_l3_rt1320_l2,
+ .link_mask = BIT(3),
+ .links = ptl_sdw_rt712_vb_l3_rt1320_l3,
.drv_name = "sof_sdw",
.machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb,
- .sof_tplg_filename = "sof-ptl-rt712-l3-rt1320-l2.tplg"
+ .sof_tplg_filename = "sof-ptl-rt712-l3-rt1320-l3.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
- .link_mask = BIT(1) | BIT(2) | BIT(3),
- .links = ptl_sdw_rt713_vb_l2_rt1320_l13,
+ .link_mask = BIT(3),
+ .links = ptl_rt721_l3,
.drv_name = "sof_sdw",
- .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb,
- .sof_tplg_filename = "sof-ptl-rt713-l2-rt1320-l13.tplg"
+ .sof_tplg_filename = "sof-ptl-rt721.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
- .link_mask = BIT(1) | BIT(2) | BIT(3),
- .links = ptl_sdw_rt713_vb_l3_rt1320_l12,
+ .link_mask = BIT(3),
+ .links = ptl_rt722_l3,
.drv_name = "sof_sdw",
- .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb,
- .sof_tplg_filename = "sof-ptl-rt713-l3-rt1320-l12.tplg"
+ .sof_tplg_filename = "sof-ptl-rt722.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
},
{},
};
diff --git a/sound/soc/intel/common/sof-function-topology-lib.c b/sound/soc/intel/common/sof-function-topology-lib.c
new file mode 100644
index 000000000000..90fe7aa3df1c
--- /dev/null
+++ b/sound/soc/intel/common/sof-function-topology-lib.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license. When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2025 Intel Corporation.
+//
+
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/firmware.h>
+#include <sound/soc.h>
+#include <sound/soc-acpi.h>
+#include "sof-function-topology-lib.h"
+
+enum tplg_device_id {
+ TPLG_DEVICE_SDCA_JACK,
+ TPLG_DEVICE_SDCA_AMP,
+ TPLG_DEVICE_SDCA_MIC,
+ TPLG_DEVICE_INTEL_PCH_DMIC,
+ TPLG_DEVICE_HDMI,
+ TPLG_DEVICE_MAX
+};
+
+#define SDCA_DEVICE_MASK (BIT(TPLG_DEVICE_SDCA_JACK) | BIT(TPLG_DEVICE_SDCA_AMP) | \
+ BIT(TPLG_DEVICE_SDCA_MIC))
+
+#define SOF_INTEL_PLATFORM_NAME_MAX 4
+
+int sof_sdw_get_tplg_files(struct snd_soc_card *card, const struct snd_soc_acpi_mach *mach,
+ const char *prefix, const char ***tplg_files)
+{
+ struct snd_soc_acpi_mach_params mach_params = mach->mach_params;
+ struct snd_soc_dai_link *dai_link;
+ const struct firmware *fw;
+ char platform[SOF_INTEL_PLATFORM_NAME_MAX];
+ unsigned long tplg_mask = 0;
+ int tplg_num = 0;
+ int tplg_dev;
+ int ret;
+ int i;
+
+ ret = sscanf(mach->sof_tplg_filename, "sof-%3s-*.tplg", platform);
+ if (ret != 1) {
+ dev_err(card->dev, "Invalid platform name %s of tplg %s\n",
+ platform, mach->sof_tplg_filename);
+ return -EINVAL;
+ }
+
+ for_each_card_prelinks(card, i, dai_link) {
+ char *tplg_dev_name;
+
+ dev_dbg(card->dev, "dai_link %s id %d\n", dai_link->name, dai_link->id);
+ if (strstr(dai_link->name, "SimpleJack")) {
+ tplg_dev = TPLG_DEVICE_SDCA_JACK;
+ tplg_dev_name = "sdca-jack";
+ } else if (strstr(dai_link->name, "SmartAmp")) {
+ tplg_dev = TPLG_DEVICE_SDCA_AMP;
+ tplg_dev_name = devm_kasprintf(card->dev, GFP_KERNEL,
+ "sdca-%damp", dai_link->num_cpus);
+ if (!tplg_dev_name)
+ return -ENOMEM;
+ } else if (strstr(dai_link->name, "SmartMic")) {
+ tplg_dev = TPLG_DEVICE_SDCA_MIC;
+ tplg_dev_name = "sdca-mic";
+ } else if (strstr(dai_link->name, "dmic")) {
+ switch (mach_params.dmic_num) {
+ case 2:
+ tplg_dev_name = "dmic-2ch";
+ break;
+ case 4:
+ tplg_dev_name = "dmic-4ch";
+ break;
+ default:
+ dev_warn(card->dev,
+ "only -2ch and -4ch are supported for dmic\n");
+ continue;
+ }
+ tplg_dev = TPLG_DEVICE_INTEL_PCH_DMIC;
+ } else if (strstr(dai_link->name, "iDisp")) {
+ tplg_dev = TPLG_DEVICE_HDMI;
+ tplg_dev_name = "hdmi-pcm5";
+
+ } else {
+ /* The dai link is not supported by separated tplg yet */
+ dev_dbg(card->dev,
+ "dai_link %s is not supported by separated tplg yet\n",
+ dai_link->name);
+ return 0;
+ }
+ if (tplg_mask & BIT(tplg_dev))
+ continue;
+
+ tplg_mask |= BIT(tplg_dev);
+
+ /*
+ * The tplg file naming rule is sof-<platform>-<function>-id<BE id number>.tplg
+ * where <platform> is only required for the DMIC function as the nhlt blob
+ * is platform dependent.
+ */
+ switch (tplg_dev) {
+ case TPLG_DEVICE_INTEL_PCH_DMIC:
+ (*tplg_files)[tplg_num] = devm_kasprintf(card->dev, GFP_KERNEL,
+ "%s/sof-%s-%s-id%d.tplg",
+ prefix, platform,
+ tplg_dev_name, dai_link->id);
+ break;
+ default:
+ (*tplg_files)[tplg_num] = devm_kasprintf(card->dev, GFP_KERNEL,
+ "%s/sof-%s-id%d.tplg",
+ prefix, tplg_dev_name,
+ dai_link->id);
+ break;
+ }
+ if (!(*tplg_files)[tplg_num])
+ return -ENOMEM;
+ tplg_num++;
+ }
+
+ dev_dbg(card->dev, "tplg_mask %#lx tplg_num %d\n", tplg_mask, tplg_num);
+
+ /* Check presence of sub-topologies */
+ for (i = 0; i < tplg_num; i++) {
+ ret = firmware_request_nowarn(&fw, (*tplg_files)[i], card->dev);
+ if (!ret) {
+ release_firmware(fw);
+ } else {
+ dev_dbg(card->dev, "Failed to open topology file: %s\n", (*tplg_files)[i]);
+ return 0;
+ }
+ }
+
+ return tplg_num;
+}
+
diff --git a/sound/soc/intel/common/sof-function-topology-lib.h b/sound/soc/intel/common/sof-function-topology-lib.h
new file mode 100644
index 000000000000..e7d0c39d0788
--- /dev/null
+++ b/sound/soc/intel/common/sof-function-topology-lib.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * soc-acpi-intel-get-tplg.h - get-tplg-files ops
+ *
+ * Copyright (c) 2025, Intel Corporation.
+ *
+ */
+
+#ifndef _SND_SOC_ACPI_INTEL_GET_TPLG_H
+#define _SND_SOC_ACPI_INTEL_GET_TPLG_H
+
+int sof_sdw_get_tplg_files(struct snd_soc_card *card, const struct snd_soc_acpi_mach *mach,
+ const char *prefix, const char ***tplg_files);
+
+#endif
diff --git a/sound/soc/loongson/Kconfig b/sound/soc/loongson/Kconfig
index 2d8291c1443c..1a3c28816e7a 100644
--- a/sound/soc/loongson/Kconfig
+++ b/sound/soc/loongson/Kconfig
@@ -37,3 +37,13 @@ config SND_SOC_LOONGSON_I2S_PLATFORM
The controller work as a platform device, we can found it in
Loongson-2K1000 SoCs.
endmenu
+
+config SND_LOONGSON1_AC97
+ tristate "Loongson1 AC97 Support"
+ depends on LOONGSON1_APB_DMA
+ select SND_SOC_AC97_CODEC
+ select SND_SOC_GENERIC_DMAENGINE_PCM
+ select REGMAP_MMIO
+ help
+ Say Y or M if you want to add support for codecs attached to
+ the Loongson1 AC97 controller.
diff --git a/sound/soc/loongson/Makefile b/sound/soc/loongson/Makefile
index c0cb1acb36e3..4c6d3130bcee 100644
--- a/sound/soc/loongson/Makefile
+++ b/sound/soc/loongson/Makefile
@@ -8,6 +8,8 @@ obj-$(CONFIG_SND_SOC_LOONGSON_I2S_PLATFORM) += snd-soc-loongson-i2s-plat.o snd-s
snd-soc-loongson-i2s-y := loongson_i2s.o
+obj-$(CONFIG_SND_LOONGSON1_AC97) += loongson1_ac97.o
+
#Machine Support
snd-soc-loongson-card-y := loongson_card.o
obj-$(CONFIG_SND_SOC_LOONGSON_CARD) += snd-soc-loongson-card.o
diff --git a/sound/soc/loongson/loongson1_ac97.c b/sound/soc/loongson/loongson1_ac97.c
new file mode 100644
index 000000000000..84901900ad43
--- /dev/null
+++ b/sound/soc/loongson/loongson1_ac97.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AC97 Controller Driver for Loongson-1 SoC
+ *
+ * Copyright (C) 2025 Keguang Zhang <keguang.zhang@gmail.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <sound/dmaengine_pcm.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+
+/* Loongson-1 AC97 Controller Registers */
+#define AC97_CSR 0x0
+#define AC97_OCC0 0x4
+#define AC97_ICC 0x10
+#define AC97_CRAC 0x18
+#define AC97_INTRAW 0x54
+#define AC97_INTM 0x58
+#define AC97_INT_CW_CLR 0x68
+#define AC97_INT_CR_CLR 0x6c
+
+/* Control Status Register Bits (CSR) */
+#define CSR_RESUME BIT(1)
+#define CSR_RST_FORCE BIT(0)
+
+/* MIC Channel Configuration Bits */
+#define M_DMA_EN BIT(22)
+#define M_FIFO_THRES GENMASK(21, 20)
+#define M_FIFO_THRES_FULL FIELD_PREP(M_FIFO_THRES, 3)
+#define M_FIFO_THRES_HALF FIELD_PREP(M_FIFO_THRES, 1)
+#define M_FIFO_THRES_QUARTER FIELD_PREP(M_FIFO_THRES, 0)
+#define M_SW GENMASK(19, 18)
+#define M_SW_16_BITS FIELD_PREP(M_SW, 2)
+#define M_SW_8_BITS FIELD_PREP(M_SW, 0)
+#define M_VSR BIT(17)
+#define M_CH_EN BIT(16)
+/* Right Channel Configuration Bits */
+#define R_DMA_EN BIT(14)
+#define R_FIFO_THRES GENMASK(13, 12)
+#define R_FIFO_THRES_EMPTY FIELD_PREP(R_FIFO_THRES, 3)
+#define R_FIFO_THRES_HALF FIELD_PREP(R_FIFO_THRES, 1)
+#define R_FIFO_THRES_QUARTER FIELD_PREP(R_FIFO_THRES, 0)
+#define R_SW GENMASK(11, 10)
+#define R_SW_16_BITS FIELD_PREP(R_SW, 2)
+#define R_SW_8_BITS FIELD_PREP(R_SW, 0)
+#define R_VSR BIT(9)
+#define R_CH_EN BIT(8)
+/* Left Channel Configuration Bits */
+#define L_DMA_EN BIT(6)
+#define L_FIFO_THRES GENMASK(5, 4)
+#define L_FIFO_THRES_EMPTY FIELD_PREP(L_FIFO_THRES, 3)
+#define L_FIFO_THRES_HALF FIELD_PREP(L_FIFO_THRES, 1)
+#define L_FIFO_THRES_QUARTER FIELD_PREP(L_FIFO_THRES, 0)
+#define L_SW GENMASK(3, 2)
+#define L_SW_16_BITS FIELD_PREP(L_SW, 2)
+#define L_SW_8_BITS FIELD_PREP(L_SW, 0)
+#define L_VSR BIT(1)
+#define L_CH_EN BIT(0)
+
+/* Codec Register Access Command Bits (CRAC) */
+#define CODEC_WR BIT(31)
+#define CODEC_ADR GENMASK(22, 16)
+#define CODEC_DAT GENMASK(15, 0)
+
+/* Interrupt Register (INTRAW) */
+#define CW_DONE BIT(1)
+#define CR_DONE BIT(0)
+
+#define LS1X_AC97_DMA_TX_EN BIT(31)
+#define LS1X_AC97_DMA_STEREO BIT(30)
+#define LS1X_AC97_DMA_TX_BYTES GENMASK(29, 28)
+#define LS1X_AC97_DMA_TX_4_BYTES FIELD_PREP(LS1X_AC97_DMA_TX_BYTES, 2)
+#define LS1X_AC97_DMA_TX_2_BYTES FIELD_PREP(LS1X_AC97_DMA_TX_BYTES, 1)
+#define LS1X_AC97_DMA_TX_1_BYTE FIELD_PREP(LS1X_AC97_DMA_TX_BYTES, 0)
+#define LS1X_AC97_DMA_DADDR_MASK GENMASK(27, 0)
+
+#define LS1X_AC97_DMA_FIFO_SIZE 128
+
+#define LS1X_AC97_TIMEOUT 3000
+
+struct ls1x_ac97 {
+ void __iomem *reg_base;
+ struct regmap *regmap;
+ dma_addr_t tx_dma_base;
+ dma_addr_t rx_dma_base;
+ struct snd_dmaengine_dai_dma_data capture_dma_data;
+ struct snd_dmaengine_dai_dma_data playback_dma_data;
+};
+
+static struct ls1x_ac97 *ls1x_ac97;
+
+static const struct regmap_config ls1x_ac97_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+};
+
+static void ls1x_ac97_reset(struct snd_ac97 *ac97)
+{
+ int val;
+
+ regmap_write(ls1x_ac97->regmap, AC97_CSR, CSR_RST_FORCE);
+ regmap_read_poll_timeout(ls1x_ac97->regmap, AC97_CSR, val,
+ !(val & CSR_RESUME), 0, LS1X_AC97_TIMEOUT);
+}
+
+static void ls1x_ac97_write(struct snd_ac97 *ac97, unsigned short reg, unsigned short val)
+{
+ int tmp, ret;
+
+ tmp = FIELD_PREP(CODEC_ADR, reg) | FIELD_PREP(CODEC_DAT, val);
+ regmap_write(ls1x_ac97->regmap, AC97_CRAC, tmp);
+ ret = regmap_read_poll_timeout(ls1x_ac97->regmap, AC97_INTRAW, tmp,
+ (tmp & CW_DONE), 0, LS1X_AC97_TIMEOUT);
+ if (ret)
+ pr_err("timeout on AC97 write! %d\n", ret);
+
+ regmap_read(ls1x_ac97->regmap, AC97_INT_CW_CLR, &ret);
+}
+
+static unsigned short ls1x_ac97_read(struct snd_ac97 *ac97, unsigned short reg)
+{
+ int val, ret;
+
+ val = CODEC_WR | FIELD_PREP(CODEC_ADR, reg);
+ regmap_write(ls1x_ac97->regmap, AC97_CRAC, val);
+ ret = regmap_read_poll_timeout(ls1x_ac97->regmap, AC97_INTRAW, val,
+ (val & CR_DONE), 0, LS1X_AC97_TIMEOUT);
+ if (ret) {
+ pr_err("timeout on AC97 read! %d\n", ret);
+ return ret;
+ }
+
+ regmap_read(ls1x_ac97->regmap, AC97_INT_CR_CLR, &ret);
+ regmap_read(ls1x_ac97->regmap, AC97_CRAC, &ret);
+
+ return (ret & CODEC_DAT);
+}
+
+static void ls1x_ac97_init(struct snd_ac97 *ac97)
+{
+ writel(0, ls1x_ac97->reg_base + AC97_INTRAW);
+ writel(0, ls1x_ac97->reg_base + AC97_INTM);
+
+ /* Config output channels */
+ regmap_update_bits(ls1x_ac97->regmap, AC97_OCC0,
+ R_DMA_EN | R_FIFO_THRES | R_CH_EN |
+ L_DMA_EN | L_FIFO_THRES | L_CH_EN,
+ R_DMA_EN | R_FIFO_THRES_EMPTY | R_CH_EN |
+ L_DMA_EN | L_FIFO_THRES_EMPTY | L_CH_EN);
+
+ /* Config inputs channel */
+ regmap_update_bits(ls1x_ac97->regmap, AC97_ICC,
+ M_DMA_EN | M_FIFO_THRES | M_CH_EN |
+ R_DMA_EN | R_FIFO_THRES | R_CH_EN |
+ L_DMA_EN | L_FIFO_THRES | L_CH_EN,
+ M_DMA_EN | M_FIFO_THRES_FULL | M_CH_EN |
+ R_DMA_EN | R_FIFO_THRES_EMPTY | R_CH_EN |
+ L_DMA_EN | L_FIFO_THRES_EMPTY | L_CH_EN);
+
+ if (ac97->ext_id & AC97_EI_VRA) {
+ regmap_update_bits(ls1x_ac97->regmap, AC97_OCC0, R_VSR | L_VSR, R_VSR | L_VSR);
+ regmap_update_bits(ls1x_ac97->regmap, AC97_ICC, M_VSR, M_VSR);
+ }
+}
+
+static struct snd_ac97_bus_ops ls1x_ac97_ops = {
+ .reset = ls1x_ac97_reset,
+ .write = ls1x_ac97_write,
+ .read = ls1x_ac97_read,
+ .init = ls1x_ac97_init,
+};
+
+static int ls1x_ac97_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params,
+ struct snd_soc_dai *cpu_dai)
+{
+ struct ls1x_ac97 *ac97 = dev_get_drvdata(cpu_dai->dev);
+ struct snd_dmaengine_dai_dma_data *dma_data = snd_soc_dai_get_dma_data(cpu_dai, substream);
+
+ switch (params_channels(params)) {
+ case 1:
+ dma_data->addr &= ~LS1X_AC97_DMA_STEREO;
+ break;
+ case 2:
+ dma_data->addr |= LS1X_AC97_DMA_STEREO;
+ break;
+ default:
+ dev_err(cpu_dai->dev, "unsupported channels! %d\n", params_channels(params));
+ return -EINVAL;
+ }
+
+ switch (params_format(params)) {
+ case SNDRV_PCM_FORMAT_S8:
+ case SNDRV_PCM_FORMAT_U8:
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+ regmap_update_bits(ac97->regmap, AC97_OCC0,
+ R_SW | L_SW,
+ R_SW_8_BITS | L_SW_8_BITS);
+ else
+ regmap_update_bits(ac97->regmap, AC97_ICC,
+ M_SW | R_SW | L_SW,
+ M_SW_8_BITS | R_SW_8_BITS | L_SW_8_BITS);
+ break;
+ case SNDRV_PCM_FORMAT_S16_LE:
+ case SNDRV_PCM_FORMAT_U16_LE:
+ case SNDRV_PCM_FORMAT_S16_BE:
+ case SNDRV_PCM_FORMAT_U16_BE:
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+ regmap_update_bits(ac97->regmap, AC97_OCC0,
+ R_SW | L_SW,
+ R_SW_16_BITS | L_SW_16_BITS);
+ else
+ regmap_update_bits(ac97->regmap, AC97_ICC,
+ M_SW | R_SW | L_SW,
+ M_SW_16_BITS | R_SW_16_BITS | L_SW_16_BITS);
+ break;
+ default:
+ dev_err(cpu_dai->dev, "unsupported format! %d\n", params_format(params));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ls1x_ac97_dai_probe(struct snd_soc_dai *cpu_dai)
+{
+ struct ls1x_ac97 *ac97 = dev_get_drvdata(cpu_dai->dev);
+
+ ac97->capture_dma_data.addr = ac97->rx_dma_base & LS1X_AC97_DMA_DADDR_MASK;
+ ac97->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ ac97->capture_dma_data.fifo_size = LS1X_AC97_DMA_FIFO_SIZE;
+
+ ac97->playback_dma_data.addr = ac97->tx_dma_base & LS1X_AC97_DMA_DADDR_MASK;
+ ac97->playback_dma_data.addr |= LS1X_AC97_DMA_TX_4_BYTES;
+ ac97->playback_dma_data.addr |= LS1X_AC97_DMA_TX_EN;
+ ac97->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ ac97->playback_dma_data.fifo_size = LS1X_AC97_DMA_FIFO_SIZE;
+
+ snd_soc_dai_init_dma_data(cpu_dai, &ac97->playback_dma_data, &ac97->capture_dma_data);
+ snd_soc_dai_set_drvdata(cpu_dai, ac97);
+
+ return 0;
+}
+
+static const struct snd_soc_dai_ops ls1x_ac97_dai_ops = {
+ .probe = ls1x_ac97_dai_probe,
+ .hw_params = ls1x_ac97_hw_params,
+};
+
+#define LS1X_AC97_FMTS (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_U8 |\
+ SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S16_BE |\
+ SNDRV_PCM_FMTBIT_U16_LE | SNDRV_PCM_FMTBIT_U16_BE)
+
+static struct snd_soc_dai_driver ls1x_ac97_dai[] = {
+ {
+ .name = "ls1x-ac97",
+ .playback = {
+ .stream_name = "AC97 Playback",
+ .channels_min = 1,
+ .channels_max = 2,
+ .rates = SNDRV_PCM_RATE_8000_48000,
+ .formats = LS1X_AC97_FMTS,
+ },
+ .capture = {
+ .stream_name = "AC97 Capture",
+ .channels_min = 1,
+ .channels_max = 2,
+ .rates = SNDRV_PCM_RATE_8000_48000,
+ .formats = LS1X_AC97_FMTS,
+ },
+ .ops = &ls1x_ac97_dai_ops,
+ },
+};
+
+static const struct snd_soc_component_driver ls1x_ac97_component = {
+ .name = KBUILD_MODNAME,
+ .legacy_dai_naming = 1,
+};
+
+static int ls1x_ac97_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct ls1x_ac97 *ac97;
+ struct resource *res;
+ int ret;
+
+ ac97 = devm_kzalloc(dev, sizeof(struct ls1x_ac97), GFP_KERNEL);
+ if (!ac97)
+ return -ENOMEM;
+ ls1x_ac97 = ac97;
+ platform_set_drvdata(pdev, ac97);
+
+ ac97->reg_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(ac97->reg_base))
+ return PTR_ERR(ac97->reg_base);
+
+ ac97->regmap = devm_regmap_init_mmio(dev, ac97->reg_base, &ls1x_ac97_regmap_config);
+ if (IS_ERR(ac97->regmap))
+ return dev_err_probe(dev, PTR_ERR(ac97->regmap), "devm_regmap_init_mmio failed\n");
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "audio-tx");
+ if (!res)
+ return dev_err_probe(dev, -EINVAL, "Missing 'audio-tx' in reg-names property\n");
+
+ ac97->tx_dma_base = dma_map_resource(dev, res->start, resource_size(res),
+ DMA_TO_DEVICE, 0);
+ if (dma_mapping_error(dev, ac97->tx_dma_base))
+ return -ENXIO;
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "audio-rx");
+ if (!res)
+ return dev_err_probe(dev, -EINVAL, "Missing 'audio-rx' in reg-names property\n");
+
+ ac97->rx_dma_base = dma_map_resource(dev, res->start, resource_size(res),
+ DMA_FROM_DEVICE, 0);
+ if (dma_mapping_error(dev, ac97->rx_dma_base))
+ return -ENXIO;
+
+ ret = devm_snd_dmaengine_pcm_register(dev, NULL, 0);
+ if (ret)
+ dev_err_probe(dev, ret, "failed to register PCM\n");
+
+ ret = devm_snd_soc_register_component(dev, &ls1x_ac97_component,
+ ls1x_ac97_dai, ARRAY_SIZE(ls1x_ac97_dai));
+ if (ret)
+ dev_err_probe(dev, ret, "failed to register DAI\n");
+
+ return snd_soc_set_ac97_ops(&ls1x_ac97_ops);
+}
+
+static void ls1x_ac97_remove(struct platform_device *pdev)
+{
+ ls1x_ac97 = NULL;
+ snd_soc_set_ac97_ops(NULL);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int ls1x_ac97_suspend(struct device *dev)
+{
+ int val;
+
+ regmap_clear_bits(ls1x_ac97->regmap, AC97_OCC0, R_DMA_EN | R_CH_EN | L_DMA_EN | L_CH_EN);
+ regmap_clear_bits(ls1x_ac97->regmap, AC97_ICC,
+ M_DMA_EN | M_CH_EN | R_DMA_EN | R_CH_EN | L_DMA_EN | L_CH_EN);
+ regmap_set_bits(ls1x_ac97->regmap, AC97_CSR, CSR_RESUME);
+
+ return regmap_read_poll_timeout(ls1x_ac97->regmap, AC97_CSR, val,
+ (val & CSR_RESUME), 0, LS1X_AC97_TIMEOUT);
+}
+
+static int ls1x_ac97_resume(struct device *dev)
+{
+ int val;
+
+ regmap_set_bits(ls1x_ac97->regmap, AC97_OCC0, R_DMA_EN | R_CH_EN | L_DMA_EN | L_CH_EN);
+ regmap_set_bits(ls1x_ac97->regmap, AC97_ICC,
+ M_DMA_EN | M_CH_EN | R_DMA_EN | R_CH_EN | L_DMA_EN | L_CH_EN);
+ regmap_set_bits(ls1x_ac97->regmap, AC97_CSR, CSR_RESUME);
+
+ return regmap_read_poll_timeout(ls1x_ac97->regmap, AC97_CSR, val,
+ !(val & CSR_RESUME), 0, LS1X_AC97_TIMEOUT);
+}
+#endif
+
+static const struct dev_pm_ops ls1x_ac97_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(ls1x_ac97_suspend, ls1x_ac97_resume)
+};
+
+static const struct of_device_id ls1x_ac97_match[] = {
+ { .compatible = "loongson,ls1b-ac97" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, ls1x_ac97_match);
+
+static struct platform_driver ls1x_ac97_driver = {
+ .probe = ls1x_ac97_probe,
+ .remove = ls1x_ac97_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .of_match_table = ls1x_ac97_match,
+ .pm = &ls1x_ac97_pm_ops,
+ },
+};
+
+module_platform_driver(ls1x_ac97_driver);
+
+MODULE_AUTHOR("Keguang Zhang <keguang.zhang@gmail.com>");
+MODULE_DESCRIPTION("Loongson-1 AC97 Controller Driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/loongson/loongson_i2s_pci.c b/sound/soc/loongson/loongson_i2s_pci.c
index d2d0e5d8cac9..1ea5501a97f8 100644
--- a/sound/soc/loongson/loongson_i2s_pci.c
+++ b/sound/soc/loongson/loongson_i2s_pci.c
@@ -16,6 +16,8 @@
#include "loongson_i2s.h"
#include "loongson_dma.h"
+#define DRIVER_NAME "loongson-i2s-pci"
+
static bool loongson_i2s_wr_reg(struct device *dev, unsigned int reg)
{
switch (reg) {
@@ -92,13 +94,12 @@ static int loongson_i2s_pci_probe(struct pci_dev *pdev,
i2s->dev = dev;
pci_set_drvdata(pdev, i2s);
- ret = pcim_iomap_regions(pdev, 1 << 0, dev_name(dev));
- if (ret < 0) {
- dev_err(dev, "iomap_regions failed\n");
- return ret;
+ i2s->reg_base = pcim_iomap_region(pdev, 0, DRIVER_NAME);
+ if (IS_ERR(i2s->reg_base)) {
+ dev_err(dev, "iomap_region failed\n");
+ return PTR_ERR(i2s->reg_base);
}
- i2s->reg_base = pcim_iomap_table(pdev)[0];
i2s->regmap = devm_regmap_init_mmio(dev, i2s->reg_base,
&loongson_i2s_regmap_config);
if (IS_ERR(i2s->regmap))
@@ -147,7 +148,7 @@ static const struct pci_device_id loongson_i2s_ids[] = {
MODULE_DEVICE_TABLE(pci, loongson_i2s_ids);
static struct pci_driver loongson_i2s_driver = {
- .name = "loongson-i2s-pci",
+ .name = DRIVER_NAME,
.id_table = loongson_i2s_ids,
.probe = loongson_i2s_pci_probe,
.driver = {
diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig
index 3033e2d3fe16..90e367586493 100644
--- a/sound/soc/mediatek/Kconfig
+++ b/sound/soc/mediatek/Kconfig
@@ -228,6 +228,7 @@ config SND_SOC_MT8188
config SND_SOC_MT8188_MT6359
tristate "ASoC Audio driver for MT8188 with MT6359 and I2S codecs"
depends on SND_SOC_MT8188 && MTK_PMIC_WRAP
+ depends on SND_SOC_MT6359_ACCDET || !SND_SOC_MT6359_ACCDET
depends on I2C
select SND_SOC_MT6359
select SND_SOC_HDMI_CODEC
diff --git a/sound/soc/mediatek/mt8183/mt8183-afe-pcm.c b/sound/soc/mediatek/mt8183/mt8183-afe-pcm.c
index d083b4bf0f95..e8884354995c 100644
--- a/sound/soc/mediatek/mt8183/mt8183-afe-pcm.c
+++ b/sound/soc/mediatek/mt8183/mt8183-afe-pcm.c
@@ -424,342 +424,97 @@ static const struct snd_soc_component_driver mt8183_afe_pcm_dai_component = {
.name = "mt8183-afe-pcm-dai",
};
+#define MT8183_MEMIF_BASE(_id, _en_reg, _fs_reg, _mono_reg) \
+ [MT8183_MEMIF_##_id] = { \
+ .name = #_id, \
+ .id = MT8183_MEMIF_##_id, \
+ .reg_ofs_base = AFE_##_id##_BASE, \
+ .reg_ofs_cur = AFE_##_id##_CUR, \
+ .reg_ofs_end = AFE_##_id##_END, \
+ .fs_reg = (_fs_reg), \
+ .fs_shift = _id##_MODE_SFT, \
+ .fs_maskbit = _id##_MODE_MASK, \
+ .mono_reg = (_mono_reg), \
+ .mono_shift = _id##_DATA_SFT, \
+ .enable_reg = (_en_reg), \
+ .enable_shift = _id##_ON_SFT, \
+ .hd_reg = AFE_MEMIF_HD_MODE, \
+ .hd_align_reg = AFE_MEMIF_HDALIGN, \
+ .hd_shift = _id##_HD_SFT, \
+ .hd_align_mshift = _id##_HD_ALIGN_SFT, \
+ .agent_disable_reg = -1, \
+ .agent_disable_shift = -1, \
+ .msb_reg = -1, \
+ .msb_shift = -1, \
+ }
+
+#define MT8183_MEMIF(_id, _fs_reg, _mono_reg) \
+ MT8183_MEMIF_BASE(_id, AFE_DAC_CON0, _fs_reg, _mono_reg)
+
+/* For convenience with macros: missing register fields */
+#define MOD_DAI_DATA_SFT -1
+#define HDMI_MODE_SFT -1
+#define HDMI_MODE_MASK -1
+#define HDMI_DATA_SFT -1
+#define HDMI_ON_SFT -1
+
+/* For convenience with macros: register name differences */
+#define AFE_VUL12_BASE AFE_VUL_D2_BASE
+#define AFE_VUL12_CUR AFE_VUL_D2_CUR
+#define AFE_VUL12_END AFE_VUL_D2_END
+#define AWB2_HD_ALIGN_SFT AWB2_ALIGN_SFT
+#define VUL12_DATA_SFT VUL12_MONO_SFT
+#define AFE_HDMI_BASE AFE_HDMI_OUT_BASE
+#define AFE_HDMI_CUR AFE_HDMI_OUT_CUR
+#define AFE_HDMI_END AFE_HDMI_OUT_END
+
static const struct mtk_base_memif_data memif_data[MT8183_MEMIF_NUM] = {
- [MT8183_MEMIF_DL1] = {
- .name = "DL1",
- .id = MT8183_MEMIF_DL1,
- .reg_ofs_base = AFE_DL1_BASE,
- .reg_ofs_cur = AFE_DL1_CUR,
- .fs_reg = AFE_DAC_CON1,
- .fs_shift = DL1_MODE_SFT,
- .fs_maskbit = DL1_MODE_MASK,
- .mono_reg = AFE_DAC_CON1,
- .mono_shift = DL1_DATA_SFT,
- .enable_reg = AFE_DAC_CON0,
- .enable_shift = DL1_ON_SFT,
- .hd_reg = AFE_MEMIF_HD_MODE,
- .hd_align_reg = AFE_MEMIF_HDALIGN,
- .hd_shift = DL1_HD_SFT,
- .hd_align_mshift = DL1_HD_ALIGN_SFT,
- .agent_disable_reg = -1,
- .agent_disable_shift = -1,
- .msb_reg = -1,
- .msb_shift = -1,
- },
- [MT8183_MEMIF_DL2] = {
- .name = "DL2",
- .id = MT8183_MEMIF_DL2,
- .reg_ofs_base = AFE_DL2_BASE,
- .reg_ofs_cur = AFE_DL2_CUR,
- .fs_reg = AFE_DAC_CON1,
- .fs_shift = DL2_MODE_SFT,
- .fs_maskbit = DL2_MODE_MASK,
- .mono_reg = AFE_DAC_CON1,
- .mono_shift = DL2_DATA_SFT,
- .enable_reg = AFE_DAC_CON0,
- .enable_shift = DL2_ON_SFT,
- .hd_reg = AFE_MEMIF_HD_MODE,
- .hd_align_reg = AFE_MEMIF_HDALIGN,
- .hd_shift = DL2_HD_SFT,
- .hd_align_mshift = DL2_HD_ALIGN_SFT,
- .agent_disable_reg = -1,
- .agent_disable_shift = -1,
- .msb_reg = -1,
- .msb_shift = -1,
- },
- [MT8183_MEMIF_DL3] = {
- .name = "DL3",
- .id = MT8183_MEMIF_DL3,
- .reg_ofs_base = AFE_DL3_BASE,
- .reg_ofs_cur = AFE_DL3_CUR,
- .fs_reg = AFE_DAC_CON2,
- .fs_shift = DL3_MODE_SFT,
- .fs_maskbit = DL3_MODE_MASK,
- .mono_reg = AFE_DAC_CON1,
- .mono_shift = DL3_DATA_SFT,
- .enable_reg = AFE_DAC_CON0,
- .enable_shift = DL3_ON_SFT,
- .hd_reg = AFE_MEMIF_HD_MODE,
- .hd_align_reg = AFE_MEMIF_HDALIGN,
- .hd_shift = DL3_HD_SFT,
- .hd_align_mshift = DL3_HD_ALIGN_SFT,
- .agent_disable_reg = -1,
- .agent_disable_shift = -1,
- .msb_reg = -1,
- .msb_shift = -1,
- },
- [MT8183_MEMIF_VUL2] = {
- .name = "VUL2",
- .id = MT8183_MEMIF_VUL2,
- .reg_ofs_base = AFE_VUL2_BASE,
- .reg_ofs_cur = AFE_VUL2_CUR,
- .fs_reg = AFE_DAC_CON2,
- .fs_shift = VUL2_MODE_SFT,
- .fs_maskbit = VUL2_MODE_MASK,
- .mono_reg = AFE_DAC_CON2,
- .mono_shift = VUL2_DATA_SFT,
- .enable_reg = AFE_DAC_CON0,
- .enable_shift = VUL2_ON_SFT,
- .hd_reg = AFE_MEMIF_HD_MODE,
- .hd_align_reg = AFE_MEMIF_HDALIGN,
- .hd_shift = VUL2_HD_SFT,
- .hd_align_mshift = VUL2_HD_ALIGN_SFT,
- .agent_disable_reg = -1,
- .agent_disable_shift = -1,
- .msb_reg = -1,
- .msb_shift = -1,
- },
- [MT8183_MEMIF_AWB] = {
- .name = "AWB",
- .id = MT8183_MEMIF_AWB,
- .reg_ofs_base = AFE_AWB_BASE,
- .reg_ofs_cur = AFE_AWB_CUR,
- .fs_reg = AFE_DAC_CON1,
- .fs_shift = AWB_MODE_SFT,
- .fs_maskbit = AWB_MODE_MASK,
- .mono_reg = AFE_DAC_CON1,
- .mono_shift = AWB_DATA_SFT,
- .enable_reg = AFE_DAC_CON0,
- .enable_shift = AWB_ON_SFT,
- .hd_reg = AFE_MEMIF_HD_MODE,
- .hd_align_reg = AFE_MEMIF_HDALIGN,
- .hd_shift = AWB_HD_SFT,
- .hd_align_mshift = AWB_HD_ALIGN_SFT,
- .agent_disable_reg = -1,
- .agent_disable_shift = -1,
- .msb_reg = -1,
- .msb_shift = -1,
- },
- [MT8183_MEMIF_AWB2] = {
- .name = "AWB2",
- .id = MT8183_MEMIF_AWB2,
- .reg_ofs_base = AFE_AWB2_BASE,
- .reg_ofs_cur = AFE_AWB2_CUR,
- .fs_reg = AFE_DAC_CON2,
- .fs_shift = AWB2_MODE_SFT,
- .fs_maskbit = AWB2_MODE_MASK,
- .mono_reg = AFE_DAC_CON2,
- .mono_shift = AWB2_DATA_SFT,
- .enable_reg = AFE_DAC_CON0,
- .enable_shift = AWB2_ON_SFT,
- .hd_reg = AFE_MEMIF_HD_MODE,
- .hd_align_reg = AFE_MEMIF_HDALIGN,
- .hd_shift = AWB2_HD_SFT,
- .hd_align_mshift = AWB2_ALIGN_SFT,
- .agent_disable_reg = -1,
- .agent_disable_shift = -1,
- .msb_reg = -1,
- .msb_shift = -1,
- },
- [MT8183_MEMIF_VUL12] = {
- .name = "VUL12",
- .id = MT8183_MEMIF_VUL12,
- .reg_ofs_base = AFE_VUL_D2_BASE,
- .reg_ofs_cur = AFE_VUL_D2_CUR,
- .fs_reg = AFE_DAC_CON0,
- .fs_shift = VUL12_MODE_SFT,
- .fs_maskbit = VUL12_MODE_MASK,
- .mono_reg = AFE_DAC_CON0,
- .mono_shift = VUL12_MONO_SFT,
- .enable_reg = AFE_DAC_CON0,
- .enable_shift = VUL12_ON_SFT,
- .hd_reg = AFE_MEMIF_HD_MODE,
- .hd_align_reg = AFE_MEMIF_HDALIGN,
- .hd_shift = VUL12_HD_SFT,
- .hd_align_mshift = VUL12_HD_ALIGN_SFT,
- .agent_disable_reg = -1,
- .agent_disable_shift = -1,
- .msb_reg = -1,
- .msb_shift = -1,
- },
- [MT8183_MEMIF_MOD_DAI] = {
- .name = "MOD_DAI",
- .id = MT8183_MEMIF_MOD_DAI,
- .reg_ofs_base = AFE_MOD_DAI_BASE,
- .reg_ofs_cur = AFE_MOD_DAI_CUR,
- .fs_reg = AFE_DAC_CON1,
- .fs_shift = MOD_DAI_MODE_SFT,
- .fs_maskbit = MOD_DAI_MODE_MASK,
- .mono_reg = -1,
- .mono_shift = 0,
- .enable_reg = AFE_DAC_CON0,
- .enable_shift = MOD_DAI_ON_SFT,
- .hd_reg = AFE_MEMIF_HD_MODE,
- .hd_align_reg = AFE_MEMIF_HDALIGN,
- .hd_shift = MOD_DAI_HD_SFT,
- .hd_align_mshift = MOD_DAI_HD_ALIGN_SFT,
- .agent_disable_reg = -1,
- .agent_disable_shift = -1,
- .msb_reg = -1,
- .msb_shift = -1,
- },
- [MT8183_MEMIF_HDMI] = {
- .name = "HDMI",
- .id = MT8183_MEMIF_HDMI,
- .reg_ofs_base = AFE_HDMI_OUT_BASE,
- .reg_ofs_cur = AFE_HDMI_OUT_CUR,
- .fs_reg = -1,
- .fs_shift = -1,
- .fs_maskbit = -1,
- .mono_reg = -1,
- .mono_shift = -1,
- .enable_reg = -1, /* control in tdm for sync start */
- .enable_shift = -1,
- .hd_reg = AFE_MEMIF_HD_MODE,
- .hd_align_reg = AFE_MEMIF_HDALIGN,
- .hd_shift = HDMI_HD_SFT,
- .hd_align_mshift = HDMI_HD_ALIGN_SFT,
- .agent_disable_reg = -1,
- .agent_disable_shift = -1,
- .msb_reg = -1,
- .msb_shift = -1,
- },
+ MT8183_MEMIF(DL1, AFE_DAC_CON1, AFE_DAC_CON1),
+ MT8183_MEMIF(DL2, AFE_DAC_CON1, AFE_DAC_CON1),
+ MT8183_MEMIF(DL3, AFE_DAC_CON2, AFE_DAC_CON1),
+ MT8183_MEMIF(VUL2, AFE_DAC_CON2, AFE_DAC_CON2),
+ MT8183_MEMIF(AWB, AFE_DAC_CON1, AFE_DAC_CON1),
+ MT8183_MEMIF(AWB2, AFE_DAC_CON2, AFE_DAC_CON2),
+ MT8183_MEMIF(VUL12, AFE_DAC_CON0, AFE_DAC_CON0),
+ MT8183_MEMIF(MOD_DAI, AFE_DAC_CON1, -1),
+ /* enable control in tdm for sync start */
+ MT8183_MEMIF_BASE(HDMI, -1, -1, -1),
};
+#define MT8183_AFE_IRQ_BASE(_id, _fs_reg, _fs_shift, _fs_maskbit) \
+ [MT8183_IRQ_##_id] = { \
+ .id = MT8183_IRQ_##_id, \
+ .irq_cnt_reg = AFE_IRQ_MCU_CNT##_id, \
+ .irq_cnt_shift = 0, \
+ .irq_cnt_maskbit = 0x3ffff, \
+ .irq_fs_reg = _fs_reg, \
+ .irq_fs_shift = _fs_shift, \
+ .irq_fs_maskbit = _fs_maskbit, \
+ .irq_en_reg = AFE_IRQ_MCU_CON0, \
+ .irq_en_shift = IRQ##_id##_MCU_ON_SFT, \
+ .irq_clr_reg = AFE_IRQ_MCU_CLR, \
+ .irq_clr_shift = IRQ##_id##_MCU_CLR_SFT, \
+ }
+
+#define MT8183_AFE_IRQ(_id) \
+ MT8183_AFE_IRQ_BASE(_id, AFE_IRQ_MCU_CON1 + _id / 8 * 4, \
+ IRQ##_id##_MCU_MODE_SFT, \
+ IRQ##_id##_MCU_MODE_MASK)
+
+#define MT8183_AFE_IRQ_NOFS(_id) MT8183_AFE_IRQ_BASE(_id, -1, -1, -1)
+
static const struct mtk_base_irq_data irq_data[MT8183_IRQ_NUM] = {
- [MT8183_IRQ_0] = {
- .id = MT8183_IRQ_0,
- .irq_cnt_reg = AFE_IRQ_MCU_CNT0,
- .irq_cnt_shift = 0,
- .irq_cnt_maskbit = 0x3ffff,
- .irq_fs_reg = AFE_IRQ_MCU_CON1,
- .irq_fs_shift = IRQ0_MCU_MODE_SFT,
- .irq_fs_maskbit = IRQ0_MCU_MODE_MASK,
- .irq_en_reg = AFE_IRQ_MCU_CON0,
- .irq_en_shift = IRQ0_MCU_ON_SFT,
- .irq_clr_reg = AFE_IRQ_MCU_CLR,
- .irq_clr_shift = IRQ0_MCU_CLR_SFT,
- },
- [MT8183_IRQ_1] = {
- .id = MT8183_IRQ_1,
- .irq_cnt_reg = AFE_IRQ_MCU_CNT1,
- .irq_cnt_shift = 0,
- .irq_cnt_maskbit = 0x3ffff,
- .irq_fs_reg = AFE_IRQ_MCU_CON1,
- .irq_fs_shift = IRQ1_MCU_MODE_SFT,
- .irq_fs_maskbit = IRQ1_MCU_MODE_MASK,
- .irq_en_reg = AFE_IRQ_MCU_CON0,
- .irq_en_shift = IRQ1_MCU_ON_SFT,
- .irq_clr_reg = AFE_IRQ_MCU_CLR,
- .irq_clr_shift = IRQ1_MCU_CLR_SFT,
- },
- [MT8183_IRQ_2] = {
- .id = MT8183_IRQ_2,
- .irq_cnt_reg = AFE_IRQ_MCU_CNT2,
- .irq_cnt_shift = 0,
- .irq_cnt_maskbit = 0x3ffff,
- .irq_fs_reg = AFE_IRQ_MCU_CON1,
- .irq_fs_shift = IRQ2_MCU_MODE_SFT,
- .irq_fs_maskbit = IRQ2_MCU_MODE_MASK,
- .irq_en_reg = AFE_IRQ_MCU_CON0,
- .irq_en_shift = IRQ2_MCU_ON_SFT,
- .irq_clr_reg = AFE_IRQ_MCU_CLR,
- .irq_clr_shift = IRQ2_MCU_CLR_SFT,
- },
- [MT8183_IRQ_3] = {
- .id = MT8183_IRQ_3,
- .irq_cnt_reg = AFE_IRQ_MCU_CNT3,
- .irq_cnt_shift = 0,
- .irq_cnt_maskbit = 0x3ffff,
- .irq_fs_reg = AFE_IRQ_MCU_CON1,
- .irq_fs_shift = IRQ3_MCU_MODE_SFT,
- .irq_fs_maskbit = IRQ3_MCU_MODE_MASK,
- .irq_en_reg = AFE_IRQ_MCU_CON0,
- .irq_en_shift = IRQ3_MCU_ON_SFT,
- .irq_clr_reg = AFE_IRQ_MCU_CLR,
- .irq_clr_shift = IRQ3_MCU_CLR_SFT,
- },
- [MT8183_IRQ_4] = {
- .id = MT8183_IRQ_4,
- .irq_cnt_reg = AFE_IRQ_MCU_CNT4,
- .irq_cnt_shift = 0,
- .irq_cnt_maskbit = 0x3ffff,
- .irq_fs_reg = AFE_IRQ_MCU_CON1,
- .irq_fs_shift = IRQ4_MCU_MODE_SFT,
- .irq_fs_maskbit = IRQ4_MCU_MODE_MASK,
- .irq_en_reg = AFE_IRQ_MCU_CON0,
- .irq_en_shift = IRQ4_MCU_ON_SFT,
- .irq_clr_reg = AFE_IRQ_MCU_CLR,
- .irq_clr_shift = IRQ4_MCU_CLR_SFT,
- },
- [MT8183_IRQ_5] = {
- .id = MT8183_IRQ_5,
- .irq_cnt_reg = AFE_IRQ_MCU_CNT5,
- .irq_cnt_shift = 0,
- .irq_cnt_maskbit = 0x3ffff,
- .irq_fs_reg = AFE_IRQ_MCU_CON1,
- .irq_fs_shift = IRQ5_MCU_MODE_SFT,
- .irq_fs_maskbit = IRQ5_MCU_MODE_MASK,
- .irq_en_reg = AFE_IRQ_MCU_CON0,
- .irq_en_shift = IRQ5_MCU_ON_SFT,
- .irq_clr_reg = AFE_IRQ_MCU_CLR,
- .irq_clr_shift = IRQ5_MCU_CLR_SFT,
- },
- [MT8183_IRQ_6] = {
- .id = MT8183_IRQ_6,
- .irq_cnt_reg = AFE_IRQ_MCU_CNT6,
- .irq_cnt_shift = 0,
- .irq_cnt_maskbit = 0x3ffff,
- .irq_fs_reg = AFE_IRQ_MCU_CON1,
- .irq_fs_shift = IRQ6_MCU_MODE_SFT,
- .irq_fs_maskbit = IRQ6_MCU_MODE_MASK,
- .irq_en_reg = AFE_IRQ_MCU_CON0,
- .irq_en_shift = IRQ6_MCU_ON_SFT,
- .irq_clr_reg = AFE_IRQ_MCU_CLR,
- .irq_clr_shift = IRQ6_MCU_CLR_SFT,
- },
- [MT8183_IRQ_7] = {
- .id = MT8183_IRQ_7,
- .irq_cnt_reg = AFE_IRQ_MCU_CNT7,
- .irq_cnt_shift = 0,
- .irq_cnt_maskbit = 0x3ffff,
- .irq_fs_reg = AFE_IRQ_MCU_CON1,
- .irq_fs_shift = IRQ7_MCU_MODE_SFT,
- .irq_fs_maskbit = IRQ7_MCU_MODE_MASK,
- .irq_en_reg = AFE_IRQ_MCU_CON0,
- .irq_en_shift = IRQ7_MCU_ON_SFT,
- .irq_clr_reg = AFE_IRQ_MCU_CLR,
- .irq_clr_shift = IRQ7_MCU_CLR_SFT,
- },
- [MT8183_IRQ_8] = {
- .id = MT8183_IRQ_8,
- .irq_cnt_reg = AFE_IRQ_MCU_CNT8,
- .irq_cnt_shift = 0,
- .irq_cnt_maskbit = 0x3ffff,
- .irq_fs_reg = -1,
- .irq_fs_shift = -1,
- .irq_fs_maskbit = -1,
- .irq_en_reg = AFE_IRQ_MCU_CON0,
- .irq_en_shift = IRQ8_MCU_ON_SFT,
- .irq_clr_reg = AFE_IRQ_MCU_CLR,
- .irq_clr_shift = IRQ8_MCU_CLR_SFT,
- },
- [MT8183_IRQ_11] = {
- .id = MT8183_IRQ_11,
- .irq_cnt_reg = AFE_IRQ_MCU_CNT11,
- .irq_cnt_shift = 0,
- .irq_cnt_maskbit = 0x3ffff,
- .irq_fs_reg = AFE_IRQ_MCU_CON2,
- .irq_fs_shift = IRQ11_MCU_MODE_SFT,
- .irq_fs_maskbit = IRQ11_MCU_MODE_MASK,
- .irq_en_reg = AFE_IRQ_MCU_CON0,
- .irq_en_shift = IRQ11_MCU_ON_SFT,
- .irq_clr_reg = AFE_IRQ_MCU_CLR,
- .irq_clr_shift = IRQ11_MCU_CLR_SFT,
- },
- [MT8183_IRQ_12] = {
- .id = MT8183_IRQ_12,
- .irq_cnt_reg = AFE_IRQ_MCU_CNT12,
- .irq_cnt_shift = 0,
- .irq_cnt_maskbit = 0x3ffff,
- .irq_fs_reg = AFE_IRQ_MCU_CON2,
- .irq_fs_shift = IRQ12_MCU_MODE_SFT,
- .irq_fs_maskbit = IRQ12_MCU_MODE_MASK,
- .irq_en_reg = AFE_IRQ_MCU_CON0,
- .irq_en_shift = IRQ12_MCU_ON_SFT,
- .irq_clr_reg = AFE_IRQ_MCU_CLR,
- .irq_clr_shift = IRQ12_MCU_CLR_SFT,
- },
+ MT8183_AFE_IRQ(0),
+ MT8183_AFE_IRQ(1),
+ MT8183_AFE_IRQ(2),
+ MT8183_AFE_IRQ(3),
+ MT8183_AFE_IRQ(4),
+ MT8183_AFE_IRQ(5),
+ MT8183_AFE_IRQ(6),
+ MT8183_AFE_IRQ(7),
+ MT8183_AFE_IRQ_NOFS(8),
+ MT8183_AFE_IRQ(11),
+ MT8183_AFE_IRQ(12),
};
static bool mt8183_is_volatile_reg(struct device *dev, unsigned int reg)
@@ -767,86 +522,46 @@ static bool mt8183_is_volatile_reg(struct device *dev, unsigned int reg)
/* these auto-gen reg has read-only bit, so put it as volatile */
/* volatile reg cannot be cached, so cannot be set when power off */
switch (reg) {
- case AUDIO_TOP_CON0: /* reg bit controlled by CCF */
- case AUDIO_TOP_CON1: /* reg bit controlled by CCF */
+ case AUDIO_TOP_CON0 ... AUDIO_TOP_CON1: /* reg bit controlled by CCF */
case AUDIO_TOP_CON3:
- case AFE_DL1_CUR:
- case AFE_DL1_END:
- case AFE_DL2_CUR:
- case AFE_DL2_END:
- case AFE_AWB_END:
- case AFE_AWB_CUR:
- case AFE_VUL_END:
- case AFE_VUL_CUR:
- case AFE_MEMIF_MON0:
- case AFE_MEMIF_MON1:
- case AFE_MEMIF_MON2:
- case AFE_MEMIF_MON3:
- case AFE_MEMIF_MON4:
- case AFE_MEMIF_MON5:
- case AFE_MEMIF_MON6:
- case AFE_MEMIF_MON7:
- case AFE_MEMIF_MON8:
- case AFE_MEMIF_MON9:
- case AFE_ADDA_SRC_DEBUG_MON0:
- case AFE_ADDA_SRC_DEBUG_MON1:
- case AFE_ADDA_UL_SRC_MON0:
- case AFE_ADDA_UL_SRC_MON1:
+ case AFE_DL1_CUR ... AFE_DL1_END:
+ case AFE_DL2_CUR ... AFE_DL2_END:
+ case AFE_AWB_END ... AFE_AWB_CUR:
+ case AFE_VUL_END ... AFE_VUL_CUR:
+ case AFE_MEMIF_MON0 ... AFE_MEMIF_MON9:
+ case AFE_ADDA_SRC_DEBUG_MON0 ... AFE_ADDA_SRC_DEBUG_MON1:
+ case AFE_ADDA_UL_SRC_MON0 ... AFE_ADDA_UL_SRC_MON1:
case AFE_SIDETONE_MON:
- case AFE_SIDETONE_CON0:
- case AFE_SIDETONE_COEFF:
+ case AFE_SIDETONE_CON0 ... AFE_SIDETONE_COEFF:
case AFE_BUS_MON0:
- case AFE_MRGIF_MON0:
- case AFE_MRGIF_MON1:
- case AFE_MRGIF_MON2:
- case AFE_I2S_MON:
+ case AFE_MRGIF_MON0 ... AFE_I2S_MON:
case AFE_DAC_MON:
- case AFE_VUL2_END:
- case AFE_VUL2_CUR:
- case AFE_IRQ0_MCU_CNT_MON:
- case AFE_IRQ6_MCU_CNT_MON:
- case AFE_MOD_DAI_END:
- case AFE_MOD_DAI_CUR:
- case AFE_VUL_D2_END:
- case AFE_VUL_D2_CUR:
- case AFE_DL3_CUR:
- case AFE_DL3_END:
+ case AFE_VUL2_END ... AFE_VUL2_CUR:
+ case AFE_IRQ0_MCU_CNT_MON ... AFE_IRQ6_MCU_CNT_MON:
+ case AFE_MOD_DAI_END ... AFE_MOD_DAI_CUR:
+ case AFE_VUL_D2_END ... AFE_VUL_D2_CUR:
+ case AFE_DL3_CUR ... AFE_DL3_END:
case AFE_HDMI_OUT_CON0:
- case AFE_HDMI_OUT_CUR:
- case AFE_HDMI_OUT_END:
- case AFE_IRQ3_MCU_CNT_MON:
- case AFE_IRQ4_MCU_CNT_MON:
- case AFE_IRQ_MCU_STATUS:
- case AFE_IRQ_MCU_CLR:
+ case AFE_HDMI_OUT_CUR ... AFE_HDMI_OUT_END:
+ case AFE_IRQ3_MCU_CNT_MON... AFE_IRQ4_MCU_CNT_MON:
+ case AFE_IRQ_MCU_STATUS ... AFE_IRQ_MCU_CLR:
case AFE_IRQ_MCU_MON2:
- case AFE_IRQ1_MCU_CNT_MON:
- case AFE_IRQ2_MCU_CNT_MON:
- case AFE_IRQ1_MCU_EN_CNT_MON:
- case AFE_IRQ5_MCU_CNT_MON:
+ case AFE_IRQ1_MCU_CNT_MON ... AFE_IRQ5_MCU_CNT_MON:
case AFE_IRQ7_MCU_CNT_MON:
case AFE_GAIN1_CUR:
case AFE_GAIN2_CUR:
case AFE_SRAM_DELSEL_CON0:
- case AFE_SRAM_DELSEL_CON2:
- case AFE_SRAM_DELSEL_CON3:
- case AFE_ASRC_2CH_CON12:
- case AFE_ASRC_2CH_CON13:
+ case AFE_SRAM_DELSEL_CON2 ... AFE_SRAM_DELSEL_CON3:
+ case AFE_ASRC_2CH_CON12 ... AFE_ASRC_2CH_CON13:
case PCM_INTF_CON2:
- case FPGA_CFG0:
- case FPGA_CFG1:
- case FPGA_CFG2:
- case FPGA_CFG3:
- case AUDIO_TOP_DBG_MON0:
- case AUDIO_TOP_DBG_MON1:
- case AFE_IRQ8_MCU_CNT_MON:
- case AFE_IRQ11_MCU_CNT_MON:
- case AFE_IRQ12_MCU_CNT_MON:
+ case FPGA_CFG0 ... FPGA_CFG1:
+ case FPGA_CFG2 ... FPGA_CFG3:
+ case AUDIO_TOP_DBG_MON0 ... AUDIO_TOP_DBG_MON1:
+ case AFE_IRQ8_MCU_CNT_MON ... AFE_IRQ12_MCU_CNT_MON:
case AFE_CBIP_MON0:
- case AFE_CBIP_SLV_MUX_MON0:
- case AFE_CBIP_SLV_DECODER_MON0:
+ case AFE_CBIP_SLV_MUX_MON0 ... AFE_CBIP_SLV_DECODER_MON0:
case AFE_ADDA6_SRC_DEBUG_MON0:
- case AFE_ADD6A_UL_SRC_MON0:
- case AFE_ADDA6_UL_SRC_MON1:
+ case AFE_ADD6A_UL_SRC_MON0... AFE_ADDA6_UL_SRC_MON1:
case AFE_DL1_CUR_MSB:
case AFE_DL2_CUR_MSB:
case AFE_AWB_CUR_MSB:
@@ -856,55 +571,23 @@ static bool mt8183_is_volatile_reg(struct device *dev, unsigned int reg)
case AFE_VUL_D2_CUR_MSB:
case AFE_DL3_CUR_MSB:
case AFE_HDMI_OUT_CUR_MSB:
- case AFE_AWB2_END:
- case AFE_AWB2_CUR:
+ case AFE_AWB2_END ... AFE_AWB2_CUR:
case AFE_AWB2_CUR_MSB:
- case AFE_ADDA_DL_SDM_FIFO_MON:
- case AFE_ADDA_DL_SRC_LCH_MON:
- case AFE_ADDA_DL_SRC_RCH_MON:
- case AFE_ADDA_DL_SDM_OUT_MON:
- case AFE_CONNSYS_I2S_MON:
- case AFE_ASRC_2CH_CON0:
- case AFE_ASRC_2CH_CON2:
- case AFE_ASRC_2CH_CON3:
- case AFE_ASRC_2CH_CON4:
- case AFE_ASRC_2CH_CON5:
- case AFE_ASRC_2CH_CON7:
- case AFE_ASRC_2CH_CON8:
- case AFE_MEMIF_MON12:
- case AFE_MEMIF_MON13:
- case AFE_MEMIF_MON14:
- case AFE_MEMIF_MON15:
- case AFE_MEMIF_MON16:
- case AFE_MEMIF_MON17:
- case AFE_MEMIF_MON18:
- case AFE_MEMIF_MON19:
- case AFE_MEMIF_MON20:
- case AFE_MEMIF_MON21:
- case AFE_MEMIF_MON22:
- case AFE_MEMIF_MON23:
- case AFE_MEMIF_MON24:
- case AFE_ADDA_MTKAIF_MON0:
- case AFE_ADDA_MTKAIF_MON1:
+ case AFE_ADDA_DL_SDM_FIFO_MON ... AFE_ADDA_DL_SDM_OUT_MON:
+ case AFE_CONNSYS_I2S_MON ... AFE_ASRC_2CH_CON0:
+ case AFE_ASRC_2CH_CON2 ... AFE_ASRC_2CH_CON5:
+ case AFE_ASRC_2CH_CON7 ... AFE_ASRC_2CH_CON8:
+ case AFE_MEMIF_MON12 ... AFE_MEMIF_MON24:
+ case AFE_ADDA_MTKAIF_MON0 ... AFE_ADDA_MTKAIF_MON1:
case AFE_AUD_PAD_TOP:
case AFE_GENERAL1_ASRC_2CH_CON0:
- case AFE_GENERAL1_ASRC_2CH_CON2:
- case AFE_GENERAL1_ASRC_2CH_CON3:
- case AFE_GENERAL1_ASRC_2CH_CON4:
- case AFE_GENERAL1_ASRC_2CH_CON5:
- case AFE_GENERAL1_ASRC_2CH_CON7:
- case AFE_GENERAL1_ASRC_2CH_CON8:
- case AFE_GENERAL1_ASRC_2CH_CON12:
- case AFE_GENERAL1_ASRC_2CH_CON13:
+ case AFE_GENERAL1_ASRC_2CH_CON2 ... AFE_GENERAL1_ASRC_2CH_CON5:
+ case AFE_GENERAL1_ASRC_2CH_CON7 ... AFE_GENERAL1_ASRC_2CH_CON8:
+ case AFE_GENERAL1_ASRC_2CH_CON12 ... AFE_GENERAL1_ASRC_2CH_CON13:
case AFE_GENERAL2_ASRC_2CH_CON0:
- case AFE_GENERAL2_ASRC_2CH_CON2:
- case AFE_GENERAL2_ASRC_2CH_CON3:
- case AFE_GENERAL2_ASRC_2CH_CON4:
- case AFE_GENERAL2_ASRC_2CH_CON5:
- case AFE_GENERAL2_ASRC_2CH_CON7:
- case AFE_GENERAL2_ASRC_2CH_CON8:
- case AFE_GENERAL2_ASRC_2CH_CON12:
- case AFE_GENERAL2_ASRC_2CH_CON13:
+ case AFE_GENERAL2_ASRC_2CH_CON2 ... AFE_GENERAL2_ASRC_2CH_CON5:
+ case AFE_GENERAL2_ASRC_2CH_CON7 ... AFE_GENERAL2_ASRC_2CH_CON8:
+ case AFE_GENERAL2_ASRC_2CH_CON12 ... AFE_GENERAL2_ASRC_2CH_CON13:
return true;
default:
return false;
diff --git a/sound/soc/mediatek/mt8188/mt8188-mt6359.c b/sound/soc/mediatek/mt8188/mt8188-mt6359.c
index 20dc9470ba76..a2a76b6df631 100644
--- a/sound/soc/mediatek/mt8188/mt8188-mt6359.c
+++ b/sound/soc/mediatek/mt8188/mt8188-mt6359.c
@@ -1333,11 +1333,11 @@ static int mt8188_mt6359_soc_card_probe(struct mtk_soc_card_data *soc_card_data,
for_each_card_prelinks(card, i, dai_link) {
if (strcmp(dai_link->name, "DPTX_BE") == 0) {
if (dai_link->num_codecs &&
- strcmp(dai_link->codecs->dai_name, "snd-soc-dummy-dai"))
+ !snd_soc_dlc_is_dummy(dai_link->codecs))
dai_link->init = mt8188_dptx_codec_init;
} else if (strcmp(dai_link->name, "ETDM3_OUT_BE") == 0) {
if (dai_link->num_codecs &&
- strcmp(dai_link->codecs->dai_name, "snd-soc-dummy-dai"))
+ !snd_soc_dlc_is_dummy(dai_link->codecs))
dai_link->init = mt8188_hdmi_codec_init;
} else if (strcmp(dai_link->name, "DL_SRC_BE") == 0 ||
strcmp(dai_link->name, "UL_SRC_BE") == 0) {
@@ -1387,7 +1387,7 @@ static int mt8188_mt6359_soc_card_probe(struct mtk_soc_card_data *soc_card_data,
init_es8326 = true;
}
} else {
- if (strcmp(dai_link->codecs->dai_name, "snd-soc-dummy-dai")) {
+ if (!snd_soc_dlc_is_dummy(dai_link->codecs)) {
if (!init_dumb) {
dai_link->init = mt8188_dumb_amp_init;
init_dumb = true;
diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359.c b/sound/soc/mediatek/mt8195/mt8195-mt6359.c
index df29a9fa5aee..e57391c213e7 100644
--- a/sound/soc/mediatek/mt8195/mt8195-mt6359.c
+++ b/sound/soc/mediatek/mt8195/mt8195-mt6359.c
@@ -92,10 +92,6 @@ static const struct snd_soc_dapm_widget mt8195_mt6359_widgets[] = {
};
static const struct snd_soc_dapm_route mt8195_mt6359_routes[] = {
- /* headset */
- { "Headphone", NULL, "HPOL" },
- { "Headphone", NULL, "HPOR" },
- { "IN1P", NULL, "Headset Mic" },
/* SOF Uplink */
{SOF_DMA_UL4, NULL, "O034"},
{SOF_DMA_UL4, NULL, "O035"},
@@ -131,6 +127,13 @@ static const struct snd_kcontrol_new mt8195_speaker_controls[] = {
SOC_DAPM_PIN_SWITCH("Ext Spk"),
};
+static const struct snd_soc_dapm_route mt8195_rt5682_routes[] = {
+ /* headset */
+ { "Headphone", NULL, "HPOL" },
+ { "Headphone", NULL, "HPOR" },
+ { "IN1P", NULL, "Headset Mic" },
+};
+
static const struct snd_soc_dapm_route mt8195_rt1011_routes[] = {
{ "Left Spk", NULL, "Left SPO" },
{ "Right Spk", NULL, "Right SPO" },
@@ -447,6 +450,7 @@ static int mt8195_rt5682_init(struct snd_soc_pcm_runtime *rtd)
snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
struct mtk_base_afe *afe = snd_soc_component_get_drvdata(cmpnt_afe);
struct mt8195_afe_private *afe_priv = afe->platform_priv;
+ struct snd_soc_card *card = rtd->card;
int ret;
priv->i2so1_mclk = afe_priv->clk[MT8195_CLK_TOP_APLL12_DIV2];
@@ -473,7 +477,12 @@ static int mt8195_rt5682_init(struct snd_soc_pcm_runtime *rtd)
return ret;
}
- return 0;
+ ret = snd_soc_dapm_add_routes(&card->dapm, mt8195_rt5682_routes,
+ ARRAY_SIZE(mt8195_rt5682_routes));
+ if (ret)
+ dev_err(rtd->dev, "unable to add dapm routes, ret %d\n", ret);
+
+ return ret;
};
static int mt8195_rt1011_etdm_hw_params(struct snd_pcm_substream *substream,
@@ -822,12 +831,12 @@ SND_SOC_DAILINK_DEFS(ETDM1_IN_BE,
SND_SOC_DAILINK_DEFS(ETDM2_IN_BE,
DAILINK_COMP_ARRAY(COMP_CPU("ETDM2_IN")),
- DAILINK_COMP_ARRAY(COMP_EMPTY()),
+ DAILINK_COMP_ARRAY(COMP_DUMMY()),
DAILINK_COMP_ARRAY(COMP_EMPTY()));
SND_SOC_DAILINK_DEFS(ETDM1_OUT_BE,
DAILINK_COMP_ARRAY(COMP_CPU("ETDM1_OUT")),
- DAILINK_COMP_ARRAY(COMP_EMPTY()),
+ DAILINK_COMP_ARRAY(COMP_DUMMY()),
DAILINK_COMP_ARRAY(COMP_EMPTY()));
SND_SOC_DAILINK_DEFS(ETDM2_OUT_BE,
@@ -1379,11 +1388,11 @@ static int mt8195_mt6359_soc_card_probe(struct mtk_soc_card_data *soc_card_data,
for_each_card_prelinks(card, i, dai_link) {
if (strcmp(dai_link->name, "DPTX_BE") == 0) {
if (dai_link->num_codecs &&
- strcmp(dai_link->codecs->dai_name, "snd-soc-dummy-dai"))
+ !snd_soc_dlc_is_dummy(dai_link->codecs))
dai_link->init = mt8195_dptx_codec_init;
} else if (strcmp(dai_link->name, "ETDM3_OUT_BE") == 0) {
if (dai_link->num_codecs &&
- strcmp(dai_link->codecs->dai_name, "snd-soc-dummy-dai"))
+ !snd_soc_dlc_is_dummy(dai_link->codecs))
dai_link->init = mt8195_hdmi_codec_init;
} else if (strcmp(dai_link->name, "DL_SRC_BE") == 0 ||
strcmp(dai_link->name, "UL_SRC1_BE") == 0 ||
@@ -1423,7 +1432,7 @@ static int mt8195_mt6359_soc_card_probe(struct mtk_soc_card_data *soc_card_data,
codec_init |= RT5682_CODEC_INIT;
}
} else {
- if (strcmp(dai_link->codecs->dai_name, "snd-soc-dummy-dai")) {
+ if (!snd_soc_dlc_is_dummy(dai_link->codecs)) {
if (!(codec_init & DUMB_CODEC_INIT)) {
dai_link->init = mt8195_dumb_amp_init;
codec_init |= DUMB_CODEC_INIT;
@@ -1515,6 +1524,18 @@ static const struct mtk_soundcard_pdata mt8195_mt6359_max98390_rt5682_card = {
.soc_probe = mt8195_mt6359_soc_card_probe
};
+static const struct mtk_soundcard_pdata mt8195_mt6359_card = {
+ .card_name = "mt8195_mt6359",
+ .card_data = &(struct mtk_platform_card_data) {
+ .card = &mt8195_mt6359_soc_card,
+ .num_jacks = MT8195_JACK_MAX,
+ .pcm_constraints = mt8195_pcm_constraints,
+ .num_pcm_constraints = ARRAY_SIZE(mt8195_pcm_constraints),
+ },
+ .sof_priv = &mt8195_sof_priv,
+ .soc_probe = mt8195_mt6359_soc_card_probe
+};
+
static const struct of_device_id mt8195_mt6359_dt_match[] = {
{
.compatible = "mediatek,mt8195_mt6359_rt1019_rt5682",
@@ -1528,6 +1549,10 @@ static const struct of_device_id mt8195_mt6359_dt_match[] = {
.compatible = "mediatek,mt8195_mt6359_max98390_rt5682",
.data = &mt8195_mt6359_max98390_rt5682_card,
},
+ {
+ .compatible = "mediatek,mt8195_mt6359",
+ .data = &mt8195_mt6359_card,
+ },
{},
};
MODULE_DEVICE_TABLE(of, mt8195_mt6359_dt_match);
diff --git a/sound/soc/meson/meson-card-utils.c b/sound/soc/meson/meson-card-utils.c
index cfc7f6e41ab5..68531183fb60 100644
--- a/sound/soc/meson/meson-card-utils.c
+++ b/sound/soc/meson/meson-card-utils.c
@@ -231,7 +231,7 @@ static int meson_card_parse_of_optional(struct snd_soc_card *card,
const char *p))
{
/* If property is not provided, don't fail ... */
- if (!of_property_read_bool(card->dev->of_node, propname))
+ if (!of_property_present(card->dev->of_node, propname))
return 0;
/* ... but do fail if it is provided and the parsing fails */
diff --git a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c
index 9c98a35ad099..a0d90462fd6a 100644
--- a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c
+++ b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c
@@ -206,7 +206,7 @@ static int q6apm_lpass_dai_prepare(struct snd_pcm_substream *substream, struct s
rc = q6apm_graph_start(dai_data->graph[dai->id]);
if (rc < 0) {
- dev_err(dai->dev, "fail to start APM port %x\n", dai->id);
+ dev_err(dai->dev, "Failed to start APM port %d\n", dai->id);
goto err;
}
dai_data->is_port_started[dai->id] = true;
diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c
index 311377317176..99fd34728e38 100644
--- a/sound/soc/qcom/sc8280xp.c
+++ b/sound/soc/qcom/sc8280xp.c
@@ -186,6 +186,8 @@ static int sc8280xp_platform_probe(struct platform_device *pdev)
static const struct of_device_id snd_sc8280xp_dt_match[] = {
{.compatible = "qcom,qcm6490-idp-sndcard", "qcm6490"},
{.compatible = "qcom,qcs6490-rb3gen2-sndcard", "qcs6490"},
+ {.compatible = "qcom,qcs9075-sndcard", "qcs9075"},
+ {.compatible = "qcom,qcs9100-sndcard", "qcs9100"},
{.compatible = "qcom,sc8280xp-sndcard", "sc8280xp"},
{.compatible = "qcom,sm8450-sndcard", "sm8450"},
{.compatible = "qcom,sm8550-sndcard", "sm8550"},
diff --git a/sound/soc/qcom/sdm845.c b/sound/soc/qcom/sdm845.c
index fcc7df75346f..a233b80049ee 100644
--- a/sound/soc/qcom/sdm845.c
+++ b/sound/soc/qcom/sdm845.c
@@ -91,6 +91,10 @@ static int sdm845_slim_snd_hw_params(struct snd_pcm_substream *substream,
else
ret = snd_soc_dai_set_channel_map(cpu_dai, tx_ch_cnt,
tx_ch, 0, NULL);
+ if (ret != 0 && ret != -ENOTSUPP) {
+ dev_err(rtd->dev, "failed to set cpu chan map, err:%d\n", ret);
+ return ret;
+ }
}
return 0;
diff --git a/sound/soc/renesas/Kconfig b/sound/soc/renesas/Kconfig
index cb01fb36355f..dabf02a955ca 100644
--- a/sound/soc/renesas/Kconfig
+++ b/sound/soc/renesas/Kconfig
@@ -46,6 +46,13 @@ config SND_SOC_RCAR
help
This option enables R-Car SRU/SCU/SSIU/SSI sound support
+config SND_SOC_MSIOF
+ tristate "R-Car series MSIOF support"
+ depends on OF
+ select SND_DMAENGINE_PCM
+ help
+ This option enables R-Car MSIOF sound support
+
config SND_SOC_RZ
tristate "RZ/G2L series SSIF-2 support"
depends on ARCH_RZG2L || COMPILE_TEST
diff --git a/sound/soc/renesas/rcar/Makefile b/sound/soc/renesas/rcar/Makefile
index 45eb875a912a..3a2c875595bd 100644
--- a/sound/soc/renesas/rcar/Makefile
+++ b/sound/soc/renesas/rcar/Makefile
@@ -1,3 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
snd-soc-rcar-y := core.o gen.o dma.o adg.o ssi.o ssiu.o src.o ctu.o mix.o dvc.o cmd.o debugfs.o
obj-$(CONFIG_SND_SOC_RCAR) += snd-soc-rcar.o
+
+snd-soc-msiof-y := msiof.o
+obj-$(CONFIG_SND_SOC_MSIOF) += snd-soc-msiof.o
diff --git a/sound/soc/renesas/rcar/adg.c b/sound/soc/renesas/rcar/adg.c
index 191f212d338c..8641b73d1f77 100644
--- a/sound/soc/renesas/rcar/adg.c
+++ b/sound/soc/renesas/rcar/adg.c
@@ -19,6 +19,7 @@
#define CLKOUT3 3
#define CLKOUTMAX 4
+#define BRGCKR_31 (1 << 31)
#define BRRx_MASK(x) (0x3FF & x)
static struct rsnd_mod_ops adg_ops = {
@@ -30,6 +31,7 @@ static struct rsnd_mod_ops adg_ops = {
#define ADG_HZ_SIZE 2
struct rsnd_adg {
+ struct clk *adg;
struct clk *clkin[CLKINMAX];
struct clk *clkout[CLKOUTMAX];
struct clk *null_clk;
@@ -361,10 +363,13 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate)
rsnd_adg_set_ssi_clk(ssi_mod, data);
+ ckr = adg->ckr & ~BRGCKR_31;
if (0 == (rate % 8000))
- ckr = 0x80000000; /* BRGB output = 48kHz */
-
- rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr);
+ ckr |= BRGCKR_31; /* use BRGB output = 48kHz */
+ if (ckr != adg->ckr) {
+ rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr);
+ adg->ckr = ckr;
+ }
dev_dbg(dev, "CLKOUT is based on BRG%c (= %dHz)\n",
(ckr) ? 'B' : 'A',
@@ -382,6 +387,10 @@ int rsnd_adg_clk_control(struct rsnd_priv *priv, int enable)
int ret = 0, i;
if (enable) {
+ ret = clk_prepare_enable(adg->adg);
+ if (ret < 0)
+ return ret;
+
rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr);
rsnd_mod_write(adg_mod, BRRA, adg->brga);
rsnd_mod_write(adg_mod, BRRB, adg->brgb);
@@ -415,6 +424,10 @@ int rsnd_adg_clk_control(struct rsnd_priv *priv, int enable)
if (ret < 0)
rsnd_adg_clk_disable(priv);
+ /* disable adg */
+ if (!enable)
+ clk_disable_unprepare(adg->adg);
+
return ret;
}
@@ -471,6 +484,16 @@ static int rsnd_adg_get_clkin(struct rsnd_priv *priv)
clkin_size = ARRAY_SIZE(clkin_name_gen4);
}
+ /*
+ * get adg
+ * No "adg" is not error
+ */
+ clk = devm_clk_get(dev, "adg");
+ if (IS_ERR_OR_NULL(clk))
+ clk = rsnd_adg_null_clk_get(priv);
+ adg->adg = clk;
+
+ /* get clkin */
for (i = 0; i < clkin_size; i++) {
clk = devm_clk_get(dev, clkin_name[i]);
@@ -683,6 +706,9 @@ static int rsnd_adg_get_clkout(struct rsnd_priv *priv)
}
rsnd_adg_get_clkout_end:
+ if (0 == (req_rate[0] % 8000))
+ ckr |= BRGCKR_31; /* use BRGB output = 48kHz */
+
adg->ckr = ckr;
adg->brga = brga;
adg->brgb = brgb;
diff --git a/sound/soc/renesas/rcar/core.c b/sound/soc/renesas/rcar/core.c
index 30afc942d381..a72f36d3ca2c 100644
--- a/sound/soc/renesas/rcar/core.c
+++ b/sound/soc/renesas/rcar/core.c
@@ -597,7 +597,7 @@ int rsnd_dai_connect(struct rsnd_mod *mod,
dev_dbg(dev, "%s is connected to io (%s)\n",
rsnd_mod_name(mod),
- rsnd_io_is_play(io) ? "Playback" : "Capture");
+ snd_pcm_direction_name(io->substream->stream));
return 0;
}
@@ -1482,8 +1482,13 @@ static int rsnd_dai_probe(struct rsnd_priv *priv)
int dai_i;
nr = rsnd_dai_of_node(priv, &is_graph);
+
+ /*
+ * There is a case that it is used only for ADG (Sound Clock).
+ * No DAI is not error
+ */
if (!nr)
- return -EINVAL;
+ return 0;
rdrv = devm_kcalloc(dev, nr, sizeof(*rdrv), GFP_KERNEL);
rdai = devm_kcalloc(dev, nr, sizeof(*rdai), GFP_KERNEL);
diff --git a/sound/soc/renesas/rcar/msiof.c b/sound/soc/renesas/rcar/msiof.c
new file mode 100644
index 000000000000..75c9e91bada1
--- /dev/null
+++ b/sound/soc/renesas/rcar/msiof.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Renesas R-Car MSIOF (Clock-Synchronized Serial Interface with FIFO) I2S driver
+//
+// Copyright (C) 2025 Renesas Solutions Corp.
+// Author: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+//
+
+/*
+ * [NOTE]
+ *
+ * This driver doesn't support Clock/Frame Provider Mode
+ *
+ * Basically MSIOF is created for SPI, but we can use it as I2S (Sound), etc. Because of it, when
+ * we use it as I2S (Sound) with Provider Mode, we need to send dummy TX data even though it was
+ * used for RX. Because SPI HW needs TX Clock/Frame output for RX purpose.
+ * But it makes driver code complex in I2S (Sound).
+ *
+ * And when we use it as I2S (Sound) as Provider Mode, the clock source is [MSO clock] (= 133.33MHz)
+ * SoC internal clock. It is not for 48kHz/44.1kHz base clock. Thus the output/input will not be
+ * accurate sound.
+ *
+ * Because of these reasons, this driver doesn't support Clock/Frame Provider Mode. Use it as
+ * Clock/Frame Consumer Mode.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_graph.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <sound/dmaengine_pcm.h>
+#include <sound/soc.h>
+
+/* register */
+#define SITMDR1 0x00
+#define SITMDR2 0x04
+#define SITMDR3 0x08
+#define SIRMDR1 0x10
+#define SIRMDR2 0x14
+#define SIRMDR3 0x18
+#define SICTR 0x28
+#define SISTR 0x40
+#define SIIER 0x44
+#define SITFDR 0x50
+#define SIRFDR 0x60
+
+/* SITMDR1/ SIRMDR1 */
+#define PCON (1 << 30) /* Transfer Signal Connection */
+#define SYNCMD_LR (3 << 28) /* L/R mode */
+#define SYNCAC (1 << 25) /* Sync Polarity (Active-low) */
+#define DTDL_1 (1 << 20) /* 1-clock-cycle delay */
+#define TXSTP (1 << 0) /* Transmission/Reception Stop on FIFO */
+
+/* SITMDR2 and SIRMDR2 */
+#define BITLEN1(x) (((x) - 1) << 24) /* Data Size (8-32 bits) */
+#define GRP (1 << 30) /* Group count */
+
+/* SICTR */
+#define TEDG (1 << 27) /* Transmit Timing (1 = falling edge) */
+#define REDG (1 << 26) /* Receive Timing (1 = rising edge) */
+#define TXE (1 << 9) /* Transmit Enable */
+#define RXE (1 << 8) /* Receive Enable */
+
+/* SISTR */
+#define TFSERR (1 << 21) /* Transmit Frame Synchronization Error */
+#define TFOVF (1 << 20) /* Transmit FIFO Overflow */
+#define TFUDF (1 << 19) /* Transmit FIFO Underflow */
+#define RFSERR (1 << 5) /* Receive Frame Synchronization Error */
+#define RFUDF (1 << 4) /* Receive FIFO Underflow */
+#define RFOVF (1 << 3) /* Receive FIFO Overflow */
+#define SISTR_ERR_TX (TFSERR | TFOVF | TFUDF)
+#define SISTR_ERR_RX (RFSERR | RFOVF | RFUDF)
+#define SISTR_ERR (SISTR_ERR_TX | SISTR_ERR_RX)
+
+/* SIIER */
+#define TDMAE (1 << 31) /* Transmit Data DMA Transfer Req. Enable */
+#define TDREQE (1 << 28) /* Transmit Data Transfer Request Enable */
+#define RDMAE (1 << 15) /* Receive Data DMA Transfer Req. Enable */
+#define RDREQE (1 << 12) /* Receive Data Transfer Request Enable */
+
+/*
+ * The data on memory in 24bit case is located at <right> side
+ * [ xxxxxx]
+ * [ xxxxxx]
+ * [ xxxxxx]
+ *
+ * HW assuming signal in 24bit case is located at <left> side
+ * ---+ +---------+
+ * +---------+ +---------+...
+ * [xxxxxx ][xxxxxx ][xxxxxx ]
+ *
+ * When we use 24bit data, it will be transferred via 32bit width via DMA,
+ * and MSIOF/DMA doesn't support data shift, we can't use 24bit data correctly.
+ * There is no such issue on 16/32bit data case.
+ */
+#define MSIOF_RATES SNDRV_PCM_RATE_8000_192000
+#define MSIOF_FMTS (SNDRV_PCM_FMTBIT_S16_LE |\
+ SNDRV_PCM_FMTBIT_S32_LE)
+
+struct msiof_priv {
+ struct device *dev;
+ struct snd_pcm_substream *substream[SNDRV_PCM_STREAM_LAST + 1];
+ spinlock_t lock;
+ void __iomem *base;
+ resource_size_t phy_addr;
+
+ /* for error */
+ int err_syc[SNDRV_PCM_STREAM_LAST + 1];
+ int err_ovf[SNDRV_PCM_STREAM_LAST + 1];
+ int err_udf[SNDRV_PCM_STREAM_LAST + 1];
+
+ /* bit field */
+ u32 flags;
+#define MSIOF_FLAGS_NEED_DELAY (1 << 0)
+};
+#define msiof_flag_has(priv, flag) (priv->flags & flag)
+#define msiof_flag_set(priv, flag) (priv->flags |= flag)
+
+#define msiof_is_play(substream) ((substream)->stream == SNDRV_PCM_STREAM_PLAYBACK)
+#define msiof_read(priv, reg) ioread32((priv)->base + reg)
+#define msiof_write(priv, reg, val) iowrite32(val, (priv)->base + reg)
+#define msiof_status_clear(priv) msiof_write(priv, SISTR, SISTR_ERR)
+
+static void msiof_update(struct msiof_priv *priv, u32 reg, u32 mask, u32 val)
+{
+ u32 old = msiof_read(priv, reg);
+ u32 new = (old & ~mask) | (val & mask);
+
+ if (old != new)
+ msiof_write(priv, reg, new);
+}
+
+static void msiof_update_and_wait(struct msiof_priv *priv, u32 reg, u32 mask, u32 val, u32 expect)
+{
+ u32 data;
+ int ret;
+
+ msiof_update(priv, reg, mask, val);
+
+ ret = readl_poll_timeout_atomic(priv->base + reg, data,
+ (data & mask) == expect, 1, 128);
+ if (ret)
+ dev_warn(priv->dev, "write timeout [0x%02x] 0x%08x / 0x%08x\n",
+ reg, data, expect);
+}
+
+static int msiof_hw_start(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream, int cmd)
+{
+ struct msiof_priv *priv = snd_soc_component_get_drvdata(component);
+ struct snd_pcm_runtime *runtime = substream->runtime;
+ int is_play = msiof_is_play(substream);
+ int width = snd_pcm_format_width(runtime->format);
+ u32 val;
+
+ /*
+ * see
+ * [NOTE] on top of this driver
+ */
+ /*
+ * see
+ * Datasheet 109.3.6 [Transmit and Receive Procedures]
+ *
+ * TX: Fig 109.14 - Fig 109.23
+ * RX: Fig 109.15
+ */
+
+ /* reset errors */
+ priv->err_syc[substream->stream] =
+ priv->err_ovf[substream->stream] =
+ priv->err_udf[substream->stream] = 0;
+
+ /* SITMDRx */
+ if (is_play) {
+ val = PCON | SYNCMD_LR | SYNCAC | TXSTP;
+ if (msiof_flag_has(priv, MSIOF_FLAGS_NEED_DELAY))
+ val |= DTDL_1;
+
+ msiof_write(priv, SITMDR1, val);
+
+ val = BITLEN1(width);
+ msiof_write(priv, SITMDR2, val | GRP);
+ msiof_write(priv, SITMDR3, val);
+
+ }
+ /* SIRMDRx */
+ else {
+ val = SYNCMD_LR | SYNCAC;
+ if (msiof_flag_has(priv, MSIOF_FLAGS_NEED_DELAY))
+ val |= DTDL_1;
+
+ msiof_write(priv, SIRMDR1, val);
+
+ val = BITLEN1(width);
+ msiof_write(priv, SIRMDR2, val | GRP);
+ msiof_write(priv, SIRMDR3, val);
+ }
+
+ /* SIIER */
+ if (is_play)
+ val = TDREQE | TDMAE | SISTR_ERR_TX;
+ else
+ val = RDREQE | RDMAE | SISTR_ERR_RX;
+ msiof_update(priv, SIIER, val, val);
+
+ /* SICTR */
+ if (is_play)
+ val = TXE | TEDG;
+ else
+ val = RXE | REDG;
+ msiof_update_and_wait(priv, SICTR, val, val, val);
+
+ msiof_status_clear(priv);
+
+ /* Start DMAC */
+ snd_dmaengine_pcm_trigger(substream, cmd);
+
+ return 0;
+}
+
+static int msiof_hw_stop(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream, int cmd)
+{
+ struct msiof_priv *priv = snd_soc_component_get_drvdata(component);
+ struct device *dev = component->dev;
+ int is_play = msiof_is_play(substream);
+ u32 val;
+
+ /* SIIER */
+ if (is_play)
+ val = TDREQE | TDMAE | SISTR_ERR_TX;
+ else
+ val = RDREQE | RDMAE | SISTR_ERR_RX;
+ msiof_update(priv, SIIER, val, 0);
+
+ /* Stop DMAC */
+ snd_dmaengine_pcm_trigger(substream, cmd);
+
+ /* SICTR */
+ if (is_play)
+ val = TXE;
+ else
+ val = RXE;
+ msiof_update_and_wait(priv, SICTR, val, 0, 0);
+
+ /* indicate error status if exist */
+ if (priv->err_syc[substream->stream] ||
+ priv->err_ovf[substream->stream] ||
+ priv->err_udf[substream->stream])
+ dev_warn(dev, "FSERR(%s) = %d, FOVF = %d, FUDF = %d\n",
+ snd_pcm_direction_name(substream->stream),
+ priv->err_syc[substream->stream],
+ priv->err_ovf[substream->stream],
+ priv->err_udf[substream->stream]);
+
+ return 0;
+}
+
+static int msiof_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+ struct msiof_priv *priv = snd_soc_dai_get_drvdata(dai);
+
+ switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) {
+ /*
+ * It supports Clock/Frame Consumer Mode only
+ * see
+ * [NOTE] on top of this driver
+ */
+ case SND_SOC_DAIFMT_BC_FC:
+ break;
+ /* others are error */
+ default:
+ return -EINVAL;
+ }
+
+ switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+ /* it supports NB_NF only */
+ case SND_SOC_DAIFMT_NB_NF:
+ default:
+ break;
+ /* others are error */
+ case SND_SOC_DAIFMT_NB_IF:
+ case SND_SOC_DAIFMT_IB_NF:
+ case SND_SOC_DAIFMT_IB_IF:
+ return -EINVAL;
+ }
+
+ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_I2S:
+ msiof_flag_set(priv, MSIOF_FLAGS_NEED_DELAY);
+ break;
+ case SND_SOC_DAIFMT_LEFT_J:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Select below from Sound Card, not auto
+ * SND_SOC_DAIFMT_CBC_CFC
+ * SND_SOC_DAIFMT_CBP_CFP
+ */
+static const u64 msiof_dai_formats = SND_SOC_POSSIBLE_DAIFMT_I2S |
+ SND_SOC_POSSIBLE_DAIFMT_LEFT_J |
+ SND_SOC_POSSIBLE_DAIFMT_NB_NF;
+
+static const struct snd_soc_dai_ops msiof_dai_ops = {
+ .set_fmt = msiof_dai_set_fmt,
+ .auto_selectable_formats = &msiof_dai_formats,
+ .num_auto_selectable_formats = 1,
+};
+
+static struct snd_soc_dai_driver msiof_dai_driver = {
+ .name = "msiof-dai",
+ .playback = {
+ .rates = MSIOF_RATES,
+ .formats = MSIOF_FMTS,
+ .channels_min = 2,
+ .channels_max = 2,
+ },
+ .capture = {
+ .rates = MSIOF_RATES,
+ .formats = MSIOF_FMTS,
+ .channels_min = 2,
+ .channels_max = 2,
+ },
+ .ops = &msiof_dai_ops,
+};
+
+static struct snd_pcm_hardware msiof_pcm_hardware = {
+ .info = SNDRV_PCM_INFO_INTERLEAVED |
+ SNDRV_PCM_INFO_MMAP |
+ SNDRV_PCM_INFO_MMAP_VALID,
+ .buffer_bytes_max = 64 * 1024,
+ .period_bytes_min = 32,
+ .period_bytes_max = 8192,
+ .periods_min = 1,
+ .periods_max = 32,
+ .fifo_size = 64,
+};
+
+static int msiof_open(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ struct device *dev = component->dev;
+ struct dma_chan *chan;
+ static const char * const dma_names[] = {"rx", "tx"};
+ int is_play = msiof_is_play(substream);
+ int ret;
+
+ chan = of_dma_request_slave_channel(dev->of_node, dma_names[is_play]);
+ if (IS_ERR(chan))
+ return PTR_ERR(chan);
+
+ ret = snd_dmaengine_pcm_open(substream, chan);
+ if (ret < 0)
+ goto open_err_dma;
+
+ snd_soc_set_runtime_hwparams(substream, &msiof_pcm_hardware);
+
+ ret = snd_pcm_hw_constraint_integer(substream->runtime, SNDRV_PCM_HW_PARAM_PERIODS);
+
+open_err_dma:
+ if (ret < 0)
+ dma_release_channel(chan);
+
+ return ret;
+}
+
+static int msiof_close(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ return snd_dmaengine_pcm_close_release_chan(substream);
+}
+
+static snd_pcm_uframes_t msiof_pointer(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ return snd_dmaengine_pcm_pointer(substream);
+}
+
+#define PREALLOC_BUFFER (32 * 1024)
+#define PREALLOC_BUFFER_MAX (32 * 1024)
+static int msiof_new(struct snd_soc_component *component,
+ struct snd_soc_pcm_runtime *rtd)
+{
+ snd_pcm_set_managed_buffer_all(rtd->pcm, SNDRV_DMA_TYPE_DEV,
+ rtd->card->snd_card->dev,
+ PREALLOC_BUFFER, PREALLOC_BUFFER_MAX);
+ return 0;
+}
+
+static int msiof_trigger(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream, int cmd)
+{
+ struct device *dev = component->dev;
+ struct msiof_priv *priv = dev_get_drvdata(dev);
+ unsigned long flags;
+ int ret = -EINVAL;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ switch (cmd) {
+ case SNDRV_PCM_TRIGGER_START:
+ priv->substream[substream->stream] = substream;
+ fallthrough;
+ case SNDRV_PCM_TRIGGER_RESUME:
+ ret = msiof_hw_start(component, substream, cmd);
+ break;
+ case SNDRV_PCM_TRIGGER_STOP:
+ priv->substream[substream->stream] = NULL;
+ fallthrough;
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ ret = msiof_hw_stop(component, substream, cmd);
+ break;
+ }
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return ret;
+}
+
+static int msiof_hw_params(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params)
+{
+ struct msiof_priv *priv = dev_get_drvdata(component->dev);
+ struct dma_chan *chan = snd_dmaengine_pcm_get_chan(substream);
+ struct dma_slave_config cfg = {};
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ ret = snd_hwparams_to_dma_slave_config(substream, params, &cfg);
+ if (ret < 0)
+ goto hw_params_out;
+
+ cfg.dst_addr = priv->phy_addr + SITFDR;
+ cfg.src_addr = priv->phy_addr + SIRFDR;
+
+ ret = dmaengine_slave_config(chan, &cfg);
+hw_params_out:
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return ret;
+}
+
+static const struct snd_soc_component_driver msiof_component_driver = {
+ .name = "msiof",
+ .open = msiof_open,
+ .close = msiof_close,
+ .pointer = msiof_pointer,
+ .pcm_construct = msiof_new,
+ .trigger = msiof_trigger,
+ .hw_params = msiof_hw_params,
+};
+
+static irqreturn_t msiof_interrupt(int irq, void *data)
+{
+ struct msiof_priv *priv = data;
+ struct snd_pcm_substream *substream;
+ u32 sistr;
+
+ spin_lock(&priv->lock);
+
+ sistr = msiof_read(priv, SISTR);
+ msiof_status_clear(priv);
+
+ spin_unlock(&priv->lock);
+
+ /* overflow/underflow error */
+ substream = priv->substream[SNDRV_PCM_STREAM_PLAYBACK];
+ if (substream && (sistr & SISTR_ERR_TX)) {
+ // snd_pcm_stop_xrun(substream);
+ if (sistr & TFSERR)
+ priv->err_syc[SNDRV_PCM_STREAM_PLAYBACK]++;
+ if (sistr & TFOVF)
+ priv->err_ovf[SNDRV_PCM_STREAM_PLAYBACK]++;
+ if (sistr & TFUDF)
+ priv->err_udf[SNDRV_PCM_STREAM_PLAYBACK]++;
+ }
+
+ substream = priv->substream[SNDRV_PCM_STREAM_CAPTURE];
+ if (substream && (sistr & SISTR_ERR_RX)) {
+ // snd_pcm_stop_xrun(substream);
+ if (sistr & RFSERR)
+ priv->err_syc[SNDRV_PCM_STREAM_CAPTURE]++;
+ if (sistr & RFOVF)
+ priv->err_ovf[SNDRV_PCM_STREAM_CAPTURE]++;
+ if (sistr & RFUDF)
+ priv->err_udf[SNDRV_PCM_STREAM_CAPTURE]++;
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int msiof_probe(struct platform_device *pdev)
+{
+ struct msiof_priv *priv;
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+ int irq, ret;
+
+ /* Check MSIOF as Sound mode or SPI mode */
+ struct device_node *port __free(device_node) = of_graph_get_next_port(dev->of_node, NULL);
+ if (!port)
+ return -ENODEV;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq <= 0)
+ return irq;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
+ ret = devm_request_irq(dev, irq, msiof_interrupt, 0, dev_name(dev), priv);
+ if (ret)
+ return ret;
+
+ priv->dev = dev;
+ priv->phy_addr = res->start;
+
+ spin_lock_init(&priv->lock);
+ platform_set_drvdata(pdev, priv);
+
+ devm_pm_runtime_enable(dev);
+
+ ret = devm_snd_soc_register_component(dev, &msiof_component_driver,
+ &msiof_dai_driver, 1);
+
+ return ret;
+}
+
+static const struct of_device_id msiof_of_match[] = {
+ { .compatible = "renesas,rcar-gen4-msiof", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, msiof_of_match);
+
+static struct platform_driver msiof_driver = {
+ .driver = {
+ .name = "msiof-pcm-audio",
+ .of_match_table = msiof_of_match,
+ },
+ .probe = msiof_probe,
+};
+module_platform_driver(msiof_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Renesas R-Car MSIOF I2S audio driver");
+MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>");
diff --git a/sound/soc/rockchip/Kconfig b/sound/soc/rockchip/Kconfig
index f98a2fa85edd..a08544827b2a 100644
--- a/sound/soc/rockchip/Kconfig
+++ b/sound/soc/rockchip/Kconfig
@@ -37,6 +37,16 @@ config SND_SOC_ROCKCHIP_PDM
Rockchip PDM Controller. The Controller supports up to maximum of
8 channels record.
+config SND_SOC_ROCKCHIP_SAI
+ tristate "Rockchip SAI Controller Driver"
+ depends on HAVE_CLK && SND_SOC_ROCKCHIP
+ select SND_SOC_GENERIC_DMAENGINE_PCM
+ help
+ Say Y or M if you want to add support for the Rockchip Serial Audio
+ Interface controller found on Rockchip SoCs such as the RK3576. The
+ controller may support both playback and recording, with up to 4 lanes
+ for each and up to 128 channels per lane in TDM mode.
+
config SND_SOC_ROCKCHIP_SPDIF
tristate "Rockchip SPDIF Device Driver"
depends on HAVE_CLK && SND_SOC_ROCKCHIP
diff --git a/sound/soc/rockchip/Makefile b/sound/soc/rockchip/Makefile
index 2ee9c08131d1..af6dc1165347 100644
--- a/sound/soc/rockchip/Makefile
+++ b/sound/soc/rockchip/Makefile
@@ -3,10 +3,12 @@
snd-soc-rockchip-i2s-y := rockchip_i2s.o
snd-soc-rockchip-i2s-tdm-y := rockchip_i2s_tdm.o
snd-soc-rockchip-pdm-y := rockchip_pdm.o
+snd-soc-rockchip-sai-y := rockchip_sai.o
snd-soc-rockchip-spdif-y := rockchip_spdif.o
obj-$(CONFIG_SND_SOC_ROCKCHIP_I2S) += snd-soc-rockchip-i2s.o
obj-$(CONFIG_SND_SOC_ROCKCHIP_PDM) += snd-soc-rockchip-pdm.o
+obj-$(CONFIG_SND_SOC_ROCKCHIP_SAI) += snd-soc-rockchip-sai.o
obj-$(CONFIG_SND_SOC_ROCKCHIP_SPDIF) += snd-soc-rockchip-spdif.o
obj-$(CONFIG_SND_SOC_ROCKCHIP_I2S_TDM) += snd-soc-rockchip-i2s-tdm.o
diff --git a/sound/soc/rockchip/rockchip_sai.c b/sound/soc/rockchip/rockchip_sai.c
new file mode 100644
index 000000000000..602f1ddfad00
--- /dev/null
+++ b/sound/soc/rockchip/rockchip_sai.c
@@ -0,0 +1,1555 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ALSA SoC Audio Layer - Rockchip SAI Controller driver
+ *
+ * Copyright (c) 2022 Rockchip Electronics Co. Ltd.
+ * Copyright (c) 2025 Collabora Ltd.
+ */
+
+#include <linux/module.h>
+#include <linux/mfd/syscon.h>
+#include <linux/delay.h>
+#include <linux/of_gpio.h>
+#include <linux/of_device.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/spinlock.h>
+#include <sound/pcm_params.h>
+#include <sound/dmaengine_pcm.h>
+#include <sound/tlv.h>
+
+#include "rockchip_sai.h"
+
+#define DRV_NAME "rockchip-sai"
+
+#define CLK_SHIFT_RATE_HZ_MAX 5
+#define FW_RATIO_MAX 8
+#define FW_RATIO_MIN 1
+#define MAXBURST_PER_FIFO 8
+
+#define TIMEOUT_US 1000
+#define WAIT_TIME_MS_MAX 10000
+
+#define MAX_LANES 4
+
+enum fpw_mode {
+ FPW_ONE_BCLK_WIDTH,
+ FPW_ONE_SLOT_WIDTH,
+ FPW_HALF_FRAME_WIDTH,
+};
+
+struct rk_sai_dev {
+ struct device *dev;
+ struct clk *hclk;
+ struct clk *mclk;
+ struct regmap *regmap;
+ struct reset_control *rst_h;
+ struct reset_control *rst_m;
+ struct snd_dmaengine_dai_dma_data capture_dma_data;
+ struct snd_dmaengine_dai_dma_data playback_dma_data;
+ struct snd_pcm_substream *substreams[SNDRV_PCM_STREAM_LAST + 1];
+ unsigned int mclk_rate;
+ unsigned int wait_time[SNDRV_PCM_STREAM_LAST + 1];
+ unsigned int tx_lanes;
+ unsigned int rx_lanes;
+ unsigned int sdi[MAX_LANES];
+ unsigned int sdo[MAX_LANES];
+ unsigned int version;
+ enum fpw_mode fpw;
+ int fw_ratio;
+ bool has_capture;
+ bool has_playback;
+ bool is_master_mode;
+ bool is_tdm;
+ bool initialized;
+ /* protects register writes that depend on the state of XFER[1:0] */
+ spinlock_t xfer_lock;
+};
+
+static bool rockchip_sai_stream_valid(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+{
+ struct rk_sai_dev *sai = snd_soc_dai_get_drvdata(dai);
+
+ if (!substream)
+ return false;
+
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
+ sai->has_playback)
+ return true;
+
+ if (substream->stream == SNDRV_PCM_STREAM_CAPTURE &&
+ sai->has_capture)
+ return true;
+
+ return false;
+}
+
+static int rockchip_sai_fsync_lost_detect(struct rk_sai_dev *sai, bool en)
+{
+ unsigned int fw, cnt;
+
+ if (sai->is_master_mode || sai->version < SAI_VER_2311)
+ return 0;
+
+ regmap_read(sai->regmap, SAI_FSCR, &fw);
+ cnt = SAI_FSCR_FW_V(fw) << 1; /* two fsync lost */
+
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_FSLOSTC, SAI_INTCR_FSLOSTC);
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_FSLOST_MASK,
+ SAI_INTCR_FSLOST(en));
+ /*
+ * The `cnt` is the number of SCLK cycles of the CRU's SCLK signal that
+ * should be used as timeout. Consequently, in slave mode, this value
+ * is only correct if the CRU SCLK is equal to the external SCLK.
+ */
+ regmap_update_bits(sai->regmap, SAI_FS_TIMEOUT,
+ SAI_FS_TIMEOUT_VAL_MASK | SAI_FS_TIMEOUT_EN_MASK,
+ SAI_FS_TIMEOUT_VAL(cnt) | SAI_FS_TIMEOUT_EN(en));
+
+ return 0;
+}
+
+static int rockchip_sai_fsync_err_detect(struct rk_sai_dev *sai,
+ bool en)
+{
+ if (sai->is_master_mode || sai->version < SAI_VER_2311)
+ return 0;
+
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_FSERRC, SAI_INTCR_FSERRC);
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_FSERR_MASK,
+ SAI_INTCR_FSERR(en));
+
+ return 0;
+}
+
+static int rockchip_sai_poll_clk_idle(struct rk_sai_dev *sai)
+{
+ unsigned int reg, idle, val;
+ int ret;
+
+ if (sai->version >= SAI_VER_2307) {
+ reg = SAI_STATUS;
+ idle = SAI_STATUS_FS_IDLE;
+ idle = sai->version >= SAI_VER_2311 ? idle >> 1 : idle;
+ } else {
+ reg = SAI_XFER;
+ idle = SAI_XFER_FS_IDLE;
+ }
+
+ ret = regmap_read_poll_timeout_atomic(sai->regmap, reg, val,
+ (val & idle), 10, TIMEOUT_US);
+ if (ret < 0)
+ dev_warn(sai->dev, "Failed to idle FS\n");
+
+ return ret;
+}
+
+static int rockchip_sai_poll_stream_idle(struct rk_sai_dev *sai, bool playback, bool capture)
+{
+ unsigned int reg, val;
+ unsigned int idle = 0;
+ int ret;
+
+ if (sai->version >= SAI_VER_2307) {
+ reg = SAI_STATUS;
+ if (playback)
+ idle |= SAI_STATUS_TX_IDLE;
+ if (capture)
+ idle |= SAI_STATUS_RX_IDLE;
+ idle = sai->version >= SAI_VER_2311 ? idle >> 1 : idle;
+ } else {
+ reg = SAI_XFER;
+ if (playback)
+ idle |= SAI_XFER_TX_IDLE;
+ if (capture)
+ idle |= SAI_XFER_RX_IDLE;
+ }
+
+ ret = regmap_read_poll_timeout_atomic(sai->regmap, reg, val,
+ (val & idle), 10, TIMEOUT_US);
+ if (ret < 0)
+ dev_warn(sai->dev, "Failed to idle stream\n");
+
+ return ret;
+}
+
+/**
+ * rockchip_sai_xfer_clk_stop_and_wait() - stop the xfer clock and wait for it to be idle
+ * @sai: pointer to the driver instance's rk_sai_dev struct
+ * @to_restore: pointer to store the CLK/FSS register values in as they were
+ * found before they were cleared, or NULL.
+ *
+ * Clear the XFER_CLK and XFER_FSS registers if needed, then busy-waits for the
+ * XFER clocks to be idle. Before clearing the bits, it stores the state of the
+ * registers as it encountered them in to_restore if it isn't NULL.
+ *
+ * Context: Any context. Expects sai->xfer_lock to be held by caller.
+ */
+static void rockchip_sai_xfer_clk_stop_and_wait(struct rk_sai_dev *sai, unsigned int *to_restore)
+{
+ unsigned int mask = SAI_XFER_CLK_MASK | SAI_XFER_FSS_MASK;
+ unsigned int disable = SAI_XFER_CLK_DIS | SAI_XFER_FSS_DIS;
+ unsigned int val;
+
+ assert_spin_locked(&sai->xfer_lock);
+
+ regmap_read(sai->regmap, SAI_XFER, &val);
+ if ((val & mask) == disable)
+ goto wait_for_idle;
+
+ if (sai->is_master_mode)
+ regmap_update_bits(sai->regmap, SAI_XFER, mask, disable);
+
+wait_for_idle:
+ rockchip_sai_poll_clk_idle(sai);
+
+ if (to_restore)
+ *to_restore = val;
+}
+
+static int rockchip_sai_runtime_suspend(struct device *dev)
+{
+ struct rk_sai_dev *sai = dev_get_drvdata(dev);
+ unsigned long flags;
+
+ rockchip_sai_fsync_lost_detect(sai, 0);
+ rockchip_sai_fsync_err_detect(sai, 0);
+
+ spin_lock_irqsave(&sai->xfer_lock, flags);
+ rockchip_sai_xfer_clk_stop_and_wait(sai, NULL);
+ spin_unlock_irqrestore(&sai->xfer_lock, flags);
+
+ regcache_cache_only(sai->regmap, true);
+ /*
+ * After FS is idle, we should wait at least 2 BCLK cycles to make sure
+ * the CLK gate operation has completed, and only then disable mclk.
+ *
+ * Otherwise, the BCLK is still ungated, and once the mclk is enabled,
+ * there is a risk that a few BCLK cycles leak. This is true especially
+ * at low speeds, such as with a samplerate of 8k.
+ *
+ * Ideally we'd adjust the delay based on the samplerate, but it's such
+ * a tiny value that we can just delay for the maximum clock period
+ * for the sake of simplicity.
+ *
+ * The maximum BCLK period is 31us @ 8K-8Bit (64kHz BCLK). We wait for
+ * 40us to give ourselves a safety margin in case udelay falls short.
+ */
+ udelay(40);
+ clk_disable_unprepare(sai->mclk);
+ clk_disable_unprepare(sai->hclk);
+
+ return 0;
+}
+
+static int rockchip_sai_runtime_resume(struct device *dev)
+{
+ struct rk_sai_dev *sai = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_prepare_enable(sai->hclk);
+ if (ret)
+ goto err_hclk;
+
+ ret = clk_prepare_enable(sai->mclk);
+ if (ret)
+ goto err_mclk;
+
+ regcache_cache_only(sai->regmap, false);
+ regcache_mark_dirty(sai->regmap);
+ ret = regcache_sync(sai->regmap);
+ if (ret)
+ goto err_regmap;
+
+ return 0;
+
+err_regmap:
+ clk_disable_unprepare(sai->mclk);
+err_mclk:
+ clk_disable_unprepare(sai->hclk);
+err_hclk:
+ return ret;
+}
+
+static void rockchip_sai_fifo_xrun_detect(struct rk_sai_dev *sai,
+ int stream, bool en)
+{
+ if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
+ /* clear irq status which was asserted before TXUIE enabled */
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_TXUIC, SAI_INTCR_TXUIC);
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_TXUIE_MASK,
+ SAI_INTCR_TXUIE(en));
+ } else {
+ /* clear irq status which was asserted before RXOIE enabled */
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_RXOIC, SAI_INTCR_RXOIC);
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_RXOIE_MASK,
+ SAI_INTCR_RXOIE(en));
+ }
+}
+
+static void rockchip_sai_dma_ctrl(struct rk_sai_dev *sai,
+ int stream, bool en)
+{
+ if (!en)
+ rockchip_sai_fifo_xrun_detect(sai, stream, 0);
+
+ if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
+ regmap_update_bits(sai->regmap, SAI_DMACR,
+ SAI_DMACR_TDE_MASK,
+ SAI_DMACR_TDE(en));
+ } else {
+ regmap_update_bits(sai->regmap, SAI_DMACR,
+ SAI_DMACR_RDE_MASK,
+ SAI_DMACR_RDE(en));
+ }
+
+ if (en)
+ rockchip_sai_fifo_xrun_detect(sai, stream, 1);
+}
+
+static void rockchip_sai_reset(struct rk_sai_dev *sai)
+{
+ /*
+ * It is advised to reset the hclk domain before resetting the mclk
+ * domain, especially in slave mode without a clock input.
+ *
+ * To deal with the aforementioned case of slave mode without a clock
+ * input, we work around a potential issue by resetting the whole
+ * controller, bringing it back into master mode, and then recovering
+ * the controller configuration in the regmap.
+ */
+ reset_control_assert(sai->rst_h);
+ udelay(10);
+ reset_control_deassert(sai->rst_h);
+ udelay(10);
+ reset_control_assert(sai->rst_m);
+ udelay(10);
+ reset_control_deassert(sai->rst_m);
+ udelay(10);
+
+ /* recover regmap config */
+ regcache_mark_dirty(sai->regmap);
+ regcache_sync(sai->regmap);
+}
+
+static int rockchip_sai_clear(struct rk_sai_dev *sai, unsigned int clr)
+{
+ unsigned int val = 0;
+ int ret = 0;
+
+ regmap_update_bits(sai->regmap, SAI_CLR, clr, clr);
+ ret = regmap_read_poll_timeout_atomic(sai->regmap, SAI_CLR, val,
+ !(val & clr), 10, TIMEOUT_US);
+ if (ret < 0) {
+ dev_warn(sai->dev, "Failed to clear %u\n", clr);
+ rockchip_sai_reset(sai);
+ }
+
+ return ret;
+}
+
+static void rockchip_sai_xfer_start(struct rk_sai_dev *sai, int stream)
+{
+ unsigned int msk, val;
+
+ if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
+ msk = SAI_XFER_TXS_MASK;
+ val = SAI_XFER_TXS_EN;
+
+ } else {
+ msk = SAI_XFER_RXS_MASK;
+ val = SAI_XFER_RXS_EN;
+ }
+
+ regmap_update_bits(sai->regmap, SAI_XFER, msk, val);
+}
+
+static void rockchip_sai_xfer_stop(struct rk_sai_dev *sai, int stream)
+{
+ unsigned int msk = 0, val = 0, clr = 0;
+ bool playback;
+ bool capture;
+
+ if (stream < 0) {
+ playback = true;
+ capture = true;
+ } else if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
+ playback = true;
+ capture = false;
+ } else {
+ playback = true;
+ capture = false;
+ }
+
+ if (playback) {
+ msk |= SAI_XFER_TXS_MASK;
+ val |= SAI_XFER_TXS_DIS;
+ clr |= SAI_CLR_TXC;
+ }
+ if (capture) {
+ msk |= SAI_XFER_RXS_MASK;
+ val |= SAI_XFER_RXS_DIS;
+ clr |= SAI_CLR_RXC;
+ }
+
+ regmap_update_bits(sai->regmap, SAI_XFER, msk, val);
+ rockchip_sai_poll_stream_idle(sai, playback, capture);
+
+ rockchip_sai_clear(sai, clr);
+}
+
+static void rockchip_sai_start(struct rk_sai_dev *sai, int stream)
+{
+ rockchip_sai_dma_ctrl(sai, stream, 1);
+ rockchip_sai_xfer_start(sai, stream);
+}
+
+static void rockchip_sai_stop(struct rk_sai_dev *sai, int stream)
+{
+ rockchip_sai_dma_ctrl(sai, stream, 0);
+ rockchip_sai_xfer_stop(sai, stream);
+}
+
+static void rockchip_sai_fmt_create(struct rk_sai_dev *sai, unsigned int fmt)
+{
+ unsigned int xcr_mask = 0, xcr_val = 0, xsft_mask = 0, xsft_val = 0;
+ unsigned int fscr_mask = 0, fscr_val = 0;
+
+ assert_spin_locked(&sai->xfer_lock);
+
+ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_RIGHT_J:
+ xcr_mask = SAI_XCR_VDJ_MASK | SAI_XCR_EDGE_SHIFT_MASK;
+ xcr_val = SAI_XCR_VDJ_R | SAI_XCR_EDGE_SHIFT_0;
+ xsft_mask = SAI_XSHIFT_RIGHT_MASK;
+ xsft_val = SAI_XSHIFT_RIGHT(0);
+ fscr_mask = SAI_FSCR_EDGE_MASK;
+ fscr_val = SAI_FSCR_EDGE_DUAL;
+ sai->fpw = FPW_HALF_FRAME_WIDTH;
+ break;
+ case SND_SOC_DAIFMT_LEFT_J:
+ xcr_mask = SAI_XCR_VDJ_MASK | SAI_XCR_EDGE_SHIFT_MASK;
+ xcr_val = SAI_XCR_VDJ_L | SAI_XCR_EDGE_SHIFT_0;
+ xsft_mask = SAI_XSHIFT_RIGHT_MASK;
+ xsft_val = SAI_XSHIFT_RIGHT(0);
+ fscr_mask = SAI_FSCR_EDGE_MASK;
+ fscr_val = SAI_FSCR_EDGE_DUAL;
+ sai->fpw = FPW_HALF_FRAME_WIDTH;
+ break;
+ case SND_SOC_DAIFMT_I2S:
+ xcr_mask = SAI_XCR_VDJ_MASK | SAI_XCR_EDGE_SHIFT_MASK;
+ xcr_val = SAI_XCR_VDJ_L | SAI_XCR_EDGE_SHIFT_1;
+ xsft_mask = SAI_XSHIFT_RIGHT_MASK;
+ if (sai->is_tdm)
+ xsft_val = SAI_XSHIFT_RIGHT(1);
+ else
+ xsft_val = SAI_XSHIFT_RIGHT(2);
+ fscr_mask = SAI_FSCR_EDGE_MASK;
+ fscr_val = SAI_FSCR_EDGE_DUAL;
+ sai->fpw = FPW_HALF_FRAME_WIDTH;
+ break;
+ case SND_SOC_DAIFMT_DSP_A:
+ xcr_mask = SAI_XCR_VDJ_MASK | SAI_XCR_EDGE_SHIFT_MASK;
+ xcr_val = SAI_XCR_VDJ_L | SAI_XCR_EDGE_SHIFT_0;
+ xsft_mask = SAI_XSHIFT_RIGHT_MASK;
+ xsft_val = SAI_XSHIFT_RIGHT(2);
+ fscr_mask = SAI_FSCR_EDGE_MASK;
+ fscr_val = SAI_FSCR_EDGE_RISING;
+ sai->fpw = FPW_ONE_BCLK_WIDTH;
+ break;
+ case SND_SOC_DAIFMT_DSP_B:
+ xcr_mask = SAI_XCR_VDJ_MASK | SAI_XCR_EDGE_SHIFT_MASK;
+ xcr_val = SAI_XCR_VDJ_L | SAI_XCR_EDGE_SHIFT_0;
+ xsft_mask = SAI_XSHIFT_RIGHT_MASK;
+ xsft_val = SAI_XSHIFT_RIGHT(0);
+ fscr_mask = SAI_FSCR_EDGE_MASK;
+ fscr_val = SAI_FSCR_EDGE_RISING;
+ sai->fpw = FPW_ONE_BCLK_WIDTH;
+ break;
+ default:
+ dev_err(sai->dev, "Unsupported fmt %u\n", fmt);
+ break;
+ }
+
+ regmap_update_bits(sai->regmap, SAI_TXCR, xcr_mask, xcr_val);
+ regmap_update_bits(sai->regmap, SAI_RXCR, xcr_mask, xcr_val);
+ regmap_update_bits(sai->regmap, SAI_TX_SHIFT, xsft_mask, xsft_val);
+ regmap_update_bits(sai->regmap, SAI_RX_SHIFT, xsft_mask, xsft_val);
+ regmap_update_bits(sai->regmap, SAI_FSCR, fscr_mask, fscr_val);
+}
+
+static int rockchip_sai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+ struct rk_sai_dev *sai = snd_soc_dai_get_drvdata(dai);
+ unsigned int mask = 0, val = 0;
+ unsigned int clk_gates;
+ unsigned long flags;
+ int ret = 0;
+
+ pm_runtime_get_sync(dai->dev);
+
+ mask = SAI_CKR_MSS_MASK;
+ switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) {
+ case SND_SOC_DAIFMT_BP_FP:
+ val = SAI_CKR_MSS_MASTER;
+ sai->is_master_mode = true;
+ break;
+ case SND_SOC_DAIFMT_BC_FC:
+ val = SAI_CKR_MSS_SLAVE;
+ sai->is_master_mode = false;
+ break;
+ default:
+ ret = -EINVAL;
+ goto err_pm_put;
+ }
+
+ spin_lock_irqsave(&sai->xfer_lock, flags);
+ rockchip_sai_xfer_clk_stop_and_wait(sai, &clk_gates);
+ if (sai->initialized) {
+ if (sai->has_capture && sai->has_playback)
+ rockchip_sai_xfer_stop(sai, -1);
+ else if (sai->has_capture)
+ rockchip_sai_xfer_stop(sai, SNDRV_PCM_STREAM_CAPTURE);
+ else
+ rockchip_sai_xfer_stop(sai, SNDRV_PCM_STREAM_PLAYBACK);
+ } else {
+ rockchip_sai_clear(sai, 0);
+ sai->initialized = true;
+ }
+
+ regmap_update_bits(sai->regmap, SAI_CKR, mask, val);
+
+ mask = SAI_CKR_CKP_MASK | SAI_CKR_FSP_MASK;
+ switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+ case SND_SOC_DAIFMT_NB_NF:
+ val = SAI_CKR_CKP_NORMAL | SAI_CKR_FSP_NORMAL;
+ break;
+ case SND_SOC_DAIFMT_NB_IF:
+ val = SAI_CKR_CKP_NORMAL | SAI_CKR_FSP_INVERTED;
+ break;
+ case SND_SOC_DAIFMT_IB_NF:
+ val = SAI_CKR_CKP_INVERTED | SAI_CKR_FSP_NORMAL;
+ break;
+ case SND_SOC_DAIFMT_IB_IF:
+ val = SAI_CKR_CKP_INVERTED | SAI_CKR_FSP_INVERTED;
+ break;
+ default:
+ ret = -EINVAL;
+ goto err_xfer_unlock;
+ }
+
+ regmap_update_bits(sai->regmap, SAI_CKR, mask, val);
+
+ rockchip_sai_fmt_create(sai, fmt);
+
+err_xfer_unlock:
+ if (clk_gates)
+ regmap_update_bits(sai->regmap, SAI_XFER,
+ SAI_XFER_CLK_MASK | SAI_XFER_FSS_MASK,
+ clk_gates);
+ spin_unlock_irqrestore(&sai->xfer_lock, flags);
+err_pm_put:
+ pm_runtime_put(dai->dev);
+
+ return ret;
+}
+
+static int rockchip_sai_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params,
+ struct snd_soc_dai *dai)
+{
+ struct rk_sai_dev *sai = snd_soc_dai_get_drvdata(dai);
+ struct snd_dmaengine_dai_dma_data *dma_data;
+ unsigned int mclk_rate, mclk_req_rate, bclk_rate, div_bclk;
+ unsigned int ch_per_lane, slot_width;
+ unsigned int val, fscr, reg;
+ unsigned int lanes, req_lanes;
+ unsigned long flags;
+ int ret = 0;
+
+ if (!rockchip_sai_stream_valid(substream, dai))
+ return 0;
+
+ dma_data = snd_soc_dai_get_dma_data(dai, substream);
+ dma_data->maxburst = MAXBURST_PER_FIFO * params_channels(params) / 2;
+
+ pm_runtime_get_sync(sai->dev);
+
+ regmap_read(sai->regmap, SAI_DMACR, &val);
+
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+ reg = SAI_TXCR;
+ lanes = sai->tx_lanes;
+ } else {
+ reg = SAI_RXCR;
+ lanes = sai->rx_lanes;
+ }
+
+ if (!sai->is_tdm) {
+ req_lanes = DIV_ROUND_UP(params_channels(params), 2);
+ if (lanes < req_lanes) {
+ dev_err(sai->dev, "not enough lanes (%d) for requested number of %s channels (%d)\n",
+ lanes, reg == SAI_TXCR ? "playback" : "capture",
+ params_channels(params));
+ ret = -EINVAL;
+ goto err_pm_put;
+ } else {
+ lanes = req_lanes;
+ }
+ }
+
+ dev_dbg(sai->dev, "using %d lanes totalling %d%s channels for %s\n",
+ lanes, params_channels(params), sai->is_tdm ? " (TDM)" : "",
+ reg == SAI_TXCR ? "playback" : "capture");
+
+ switch (params_format(params)) {
+ case SNDRV_PCM_FORMAT_S8:
+ case SNDRV_PCM_FORMAT_U8:
+ val = SAI_XCR_VDW(8);
+ break;
+ case SNDRV_PCM_FORMAT_S16_LE:
+ val = SAI_XCR_VDW(16);
+ break;
+ case SNDRV_PCM_FORMAT_S24_LE:
+ val = SAI_XCR_VDW(24);
+ break;
+ case SNDRV_PCM_FORMAT_S32_LE:
+ case SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE:
+ val = SAI_XCR_VDW(32);
+ break;
+ default:
+ ret = -EINVAL;
+ goto err_pm_put;
+ }
+
+ val |= SAI_XCR_CSR(lanes);
+
+ spin_lock_irqsave(&sai->xfer_lock, flags);
+
+ regmap_update_bits(sai->regmap, reg, SAI_XCR_VDW_MASK | SAI_XCR_CSR_MASK, val);
+
+ regmap_read(sai->regmap, reg, &val);
+
+ slot_width = SAI_XCR_SBW_V(val);
+ ch_per_lane = params_channels(params) / lanes;
+
+ regmap_update_bits(sai->regmap, reg, SAI_XCR_SNB_MASK,
+ SAI_XCR_SNB(ch_per_lane));
+
+ fscr = SAI_FSCR_FW(sai->fw_ratio * slot_width * ch_per_lane);
+
+ switch (sai->fpw) {
+ case FPW_ONE_BCLK_WIDTH:
+ fscr |= SAI_FSCR_FPW(1);
+ break;
+ case FPW_ONE_SLOT_WIDTH:
+ fscr |= SAI_FSCR_FPW(slot_width);
+ break;
+ case FPW_HALF_FRAME_WIDTH:
+ fscr |= SAI_FSCR_FPW(sai->fw_ratio * slot_width * ch_per_lane / 2);
+ break;
+ default:
+ dev_err(sai->dev, "Invalid Frame Pulse Width %d\n", sai->fpw);
+ ret = -EINVAL;
+ goto err_xfer_unlock;
+ }
+
+ regmap_update_bits(sai->regmap, SAI_FSCR,
+ SAI_FSCR_FW_MASK | SAI_FSCR_FPW_MASK, fscr);
+
+ if (sai->is_master_mode) {
+ bclk_rate = sai->fw_ratio * slot_width * ch_per_lane * params_rate(params);
+ ret = clk_set_rate(sai->mclk, sai->mclk_rate);
+ if (ret) {
+ dev_err(sai->dev, "Failed to set mclk to %u: %pe\n",
+ sai->mclk_rate, ERR_PTR(ret));
+ goto err_xfer_unlock;
+ }
+
+ mclk_rate = clk_get_rate(sai->mclk);
+ if (mclk_rate < bclk_rate) {
+ dev_err(sai->dev, "Mismatch mclk: %u, at least %u\n",
+ mclk_rate, bclk_rate);
+ ret = -EINVAL;
+ goto err_xfer_unlock;
+ }
+
+ div_bclk = DIV_ROUND_CLOSEST(mclk_rate, bclk_rate);
+ mclk_req_rate = bclk_rate * div_bclk;
+
+ if (mclk_rate < mclk_req_rate - CLK_SHIFT_RATE_HZ_MAX ||
+ mclk_rate > mclk_req_rate + CLK_SHIFT_RATE_HZ_MAX) {
+ dev_err(sai->dev, "Mismatch mclk: %u, expected %u (+/- %dHz)\n",
+ mclk_rate, mclk_req_rate, CLK_SHIFT_RATE_HZ_MAX);
+ ret = -EINVAL;
+ goto err_xfer_unlock;
+ }
+
+ regmap_update_bits(sai->regmap, SAI_CKR, SAI_CKR_MDIV_MASK,
+ SAI_CKR_MDIV(div_bclk));
+ }
+
+err_xfer_unlock:
+ spin_unlock_irqrestore(&sai->xfer_lock, flags);
+err_pm_put:
+ pm_runtime_put(sai->dev);
+
+ return ret;
+}
+
+static int rockchip_sai_prepare(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+{
+ struct rk_sai_dev *sai = snd_soc_dai_get_drvdata(dai);
+ unsigned long flags;
+
+ if (!rockchip_sai_stream_valid(substream, dai))
+ return 0;
+
+ if (sai->is_master_mode) {
+ /*
+ * We should wait for the first BCLK pulse to have definitely
+ * occurred after any DIV settings have potentially been
+ * changed in order to guarantee a clean clock signal once we
+ * ungate the clock.
+ *
+ * Ideally, this would be done depending on the samplerate, but
+ * for the sake of simplicity, we'll just delay for the maximum
+ * possible clock offset time, which is quite a small value.
+ *
+ * The maximum BCLK offset is 15.6us @ 8K-8Bit (64kHz BCLK). We
+ * wait for 20us in order to give us a safety margin in case
+ * udelay falls short.
+ */
+ udelay(20);
+ spin_lock_irqsave(&sai->xfer_lock, flags);
+ regmap_update_bits(sai->regmap, SAI_XFER,
+ SAI_XFER_CLK_MASK |
+ SAI_XFER_FSS_MASK,
+ SAI_XFER_CLK_EN |
+ SAI_XFER_FSS_EN);
+ spin_unlock_irqrestore(&sai->xfer_lock, flags);
+ }
+
+ rockchip_sai_fsync_lost_detect(sai, 1);
+ rockchip_sai_fsync_err_detect(sai, 1);
+
+ return 0;
+}
+
+static void rockchip_sai_path_config(struct rk_sai_dev *sai,
+ int num, bool is_rx)
+{
+ int i;
+
+ if (is_rx)
+ for (i = 0; i < num; i++)
+ regmap_update_bits(sai->regmap, SAI_PATH_SEL,
+ SAI_RX_PATH_MASK(i),
+ SAI_RX_PATH(i, sai->sdi[i]));
+ else
+ for (i = 0; i < num; i++)
+ regmap_update_bits(sai->regmap, SAI_PATH_SEL,
+ SAI_TX_PATH_MASK(i),
+ SAI_TX_PATH(i, sai->sdo[i]));
+}
+
+static int rockchip_sai_path_prepare(struct rk_sai_dev *sai,
+ struct device_node *np,
+ bool is_rx)
+{
+ const char *path_prop;
+ unsigned int *data;
+ unsigned int *lanes;
+ int i, num, ret;
+
+ if (is_rx) {
+ path_prop = "rockchip,sai-rx-route";
+ data = sai->sdi;
+ lanes = &sai->rx_lanes;
+ } else {
+ path_prop = "rockchip,sai-tx-route";
+ data = sai->sdo;
+ lanes = &sai->tx_lanes;
+ }
+
+ num = of_count_phandle_with_args(np, path_prop, NULL);
+ if (num == -ENOENT) {
+ return 0;
+ } else if (num > MAX_LANES || num == 0) {
+ dev_err(sai->dev, "found %d entries in %s, outside of range 1 to %d\n",
+ num, path_prop, MAX_LANES);
+ return -EINVAL;
+ } else if (num < 0) {
+ dev_err(sai->dev, "error in %s property: %pe\n", path_prop,
+ ERR_PTR(num));
+ return num;
+ }
+
+ *lanes = num;
+ ret = device_property_read_u32_array(sai->dev, path_prop, data, num);
+ if (ret < 0) {
+ dev_err(sai->dev, "failed to read property '%s': %pe\n",
+ path_prop, ERR_PTR(ret));
+ return ret;
+ }
+
+ for (i = 0; i < num; i++) {
+ if (data[i] >= MAX_LANES) {
+ dev_err(sai->dev, "%s[%d] is %d, should be less than %d\n",
+ path_prop, i, data[i], MAX_LANES);
+ return -EINVAL;
+ }
+ }
+
+ rockchip_sai_path_config(sai, num, is_rx);
+
+ return 0;
+}
+
+static int rockchip_sai_parse_paths(struct rk_sai_dev *sai,
+ struct device_node *np)
+{
+ int ret;
+
+ if (sai->has_playback) {
+ sai->tx_lanes = 1;
+ ret = rockchip_sai_path_prepare(sai, np, false);
+ if (ret < 0) {
+ dev_err(sai->dev, "Failed to prepare TX path: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
+ }
+
+ if (sai->has_capture) {
+ sai->rx_lanes = 1;
+ ret = rockchip_sai_path_prepare(sai, np, true);
+ if (ret < 0) {
+ dev_err(sai->dev, "Failed to prepare RX path: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int rockchip_sai_trigger(struct snd_pcm_substream *substream,
+ int cmd, struct snd_soc_dai *dai)
+{
+ struct rk_sai_dev *sai = snd_soc_dai_get_drvdata(dai);
+ int ret = 0;
+
+ if (!rockchip_sai_stream_valid(substream, dai))
+ return 0;
+
+ switch (cmd) {
+ case SNDRV_PCM_TRIGGER_START:
+ case SNDRV_PCM_TRIGGER_RESUME:
+ case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+ rockchip_sai_start(sai, substream->stream);
+ break;
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ case SNDRV_PCM_TRIGGER_STOP:
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+ rockchip_sai_stop(sai, substream->stream);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+
+static int rockchip_sai_dai_probe(struct snd_soc_dai *dai)
+{
+ struct rk_sai_dev *sai = snd_soc_dai_get_drvdata(dai);
+
+ snd_soc_dai_init_dma_data(dai,
+ sai->has_playback ? &sai->playback_dma_data : NULL,
+ sai->has_capture ? &sai->capture_dma_data : NULL);
+
+ return 0;
+}
+
+static int rockchip_sai_startup(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+{
+ struct rk_sai_dev *sai = snd_soc_dai_get_drvdata(dai);
+ int stream = substream->stream;
+
+ if (!rockchip_sai_stream_valid(substream, dai))
+ return 0;
+
+ if (sai->substreams[stream])
+ return -EBUSY;
+
+ if (sai->wait_time[stream])
+ substream->wait_time = sai->wait_time[stream];
+
+ sai->substreams[stream] = substream;
+
+ return 0;
+}
+
+static void rockchip_sai_shutdown(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+{
+ struct rk_sai_dev *sai = snd_soc_dai_get_drvdata(dai);
+
+ if (!rockchip_sai_stream_valid(substream, dai))
+ return;
+
+ sai->substreams[substream->stream] = NULL;
+}
+
+static int rockchip_sai_set_tdm_slot(struct snd_soc_dai *dai,
+ unsigned int tx_mask, unsigned int rx_mask,
+ int slots, int slot_width)
+{
+ struct rk_sai_dev *sai = snd_soc_dai_get_drvdata(dai);
+ unsigned long flags;
+ unsigned int clk_gates;
+ int sw = slot_width;
+
+ if (!slots) {
+ /* Disabling TDM, set slot width back to 32 bits */
+ sai->is_tdm = false;
+ sw = 32;
+ } else {
+ sai->is_tdm = true;
+ }
+
+ if (sw < 16 || sw > 32)
+ return -EINVAL;
+
+ pm_runtime_get_sync(dai->dev);
+ spin_lock_irqsave(&sai->xfer_lock, flags);
+ rockchip_sai_xfer_clk_stop_and_wait(sai, &clk_gates);
+ regmap_update_bits(sai->regmap, SAI_TXCR, SAI_XCR_SBW_MASK,
+ SAI_XCR_SBW(sw));
+ regmap_update_bits(sai->regmap, SAI_RXCR, SAI_XCR_SBW_MASK,
+ SAI_XCR_SBW(sw));
+ regmap_update_bits(sai->regmap, SAI_XFER,
+ SAI_XFER_CLK_MASK | SAI_XFER_FSS_MASK,
+ clk_gates);
+ spin_unlock_irqrestore(&sai->xfer_lock, flags);
+ pm_runtime_put(dai->dev);
+
+ return 0;
+}
+
+static int rockchip_sai_set_sysclk(struct snd_soc_dai *dai, int stream,
+ unsigned int freq, int dir)
+{
+ struct rk_sai_dev *sai = snd_soc_dai_get_drvdata(dai);
+
+ sai->mclk_rate = freq;
+
+ return 0;
+}
+
+static const struct snd_soc_dai_ops rockchip_sai_dai_ops = {
+ .probe = rockchip_sai_dai_probe,
+ .startup = rockchip_sai_startup,
+ .shutdown = rockchip_sai_shutdown,
+ .hw_params = rockchip_sai_hw_params,
+ .set_fmt = rockchip_sai_set_fmt,
+ .set_sysclk = rockchip_sai_set_sysclk,
+ .prepare = rockchip_sai_prepare,
+ .trigger = rockchip_sai_trigger,
+ .set_tdm_slot = rockchip_sai_set_tdm_slot,
+};
+
+static const struct snd_soc_dai_driver rockchip_sai_dai = {
+ .ops = &rockchip_sai_dai_ops,
+ .symmetric_rate = 1,
+};
+
+static bool rockchip_sai_wr_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case SAI_TXCR:
+ case SAI_FSCR:
+ case SAI_RXCR:
+ case SAI_MONO_CR:
+ case SAI_XFER:
+ case SAI_CLR:
+ case SAI_CKR:
+ case SAI_DMACR:
+ case SAI_INTCR:
+ case SAI_TXDR:
+ case SAI_PATH_SEL:
+ case SAI_TX_SLOT_MASK0:
+ case SAI_TX_SLOT_MASK1:
+ case SAI_TX_SLOT_MASK2:
+ case SAI_TX_SLOT_MASK3:
+ case SAI_RX_SLOT_MASK0:
+ case SAI_RX_SLOT_MASK1:
+ case SAI_RX_SLOT_MASK2:
+ case SAI_RX_SLOT_MASK3:
+ case SAI_TX_SHIFT:
+ case SAI_RX_SHIFT:
+ case SAI_FSXN:
+ case SAI_FS_TIMEOUT:
+ case SAI_LOOPBACK_LR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool rockchip_sai_rd_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case SAI_TXCR:
+ case SAI_FSCR:
+ case SAI_RXCR:
+ case SAI_MONO_CR:
+ case SAI_XFER:
+ case SAI_CLR:
+ case SAI_CKR:
+ case SAI_TXFIFOLR:
+ case SAI_RXFIFOLR:
+ case SAI_DMACR:
+ case SAI_INTCR:
+ case SAI_INTSR:
+ case SAI_TXDR:
+ case SAI_RXDR:
+ case SAI_PATH_SEL:
+ case SAI_TX_SLOT_MASK0:
+ case SAI_TX_SLOT_MASK1:
+ case SAI_TX_SLOT_MASK2:
+ case SAI_TX_SLOT_MASK3:
+ case SAI_RX_SLOT_MASK0:
+ case SAI_RX_SLOT_MASK1:
+ case SAI_RX_SLOT_MASK2:
+ case SAI_RX_SLOT_MASK3:
+ case SAI_TX_DATA_CNT:
+ case SAI_RX_DATA_CNT:
+ case SAI_TX_SHIFT:
+ case SAI_RX_SHIFT:
+ case SAI_STATUS:
+ case SAI_VERSION:
+ case SAI_FSXN:
+ case SAI_FS_TIMEOUT:
+ case SAI_LOOPBACK_LR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool rockchip_sai_volatile_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case SAI_XFER:
+ case SAI_INTCR:
+ case SAI_INTSR:
+ case SAI_CLR:
+ case SAI_TXFIFOLR:
+ case SAI_RXFIFOLR:
+ case SAI_TXDR:
+ case SAI_RXDR:
+ case SAI_TX_DATA_CNT:
+ case SAI_RX_DATA_CNT:
+ case SAI_STATUS:
+ case SAI_VERSION:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool rockchip_sai_precious_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case SAI_RXDR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct reg_default rockchip_sai_reg_defaults[] = {
+ { SAI_TXCR, 0x00000bff },
+ { SAI_FSCR, 0x0001f03f },
+ { SAI_RXCR, 0x00000bff },
+ { SAI_PATH_SEL, 0x0000e4e4 },
+};
+
+static const struct regmap_config rockchip_sai_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = SAI_LOOPBACK_LR,
+ .reg_defaults = rockchip_sai_reg_defaults,
+ .num_reg_defaults = ARRAY_SIZE(rockchip_sai_reg_defaults),
+ .writeable_reg = rockchip_sai_wr_reg,
+ .readable_reg = rockchip_sai_rd_reg,
+ .volatile_reg = rockchip_sai_volatile_reg,
+ .precious_reg = rockchip_sai_precious_reg,
+ .cache_type = REGCACHE_FLAT,
+};
+
+static int rockchip_sai_init_dai(struct rk_sai_dev *sai, struct resource *res,
+ struct snd_soc_dai_driver **dp)
+{
+ struct device_node *node = sai->dev->of_node;
+ struct snd_soc_dai_driver *dai;
+ struct property *dma_names;
+ const char *dma_name;
+
+ of_property_for_each_string(node, "dma-names", dma_names, dma_name) {
+ if (!strcmp(dma_name, "tx"))
+ sai->has_playback = true;
+ if (!strcmp(dma_name, "rx"))
+ sai->has_capture = true;
+ }
+
+ dai = devm_kmemdup(sai->dev, &rockchip_sai_dai,
+ sizeof(*dai), GFP_KERNEL);
+ if (!dai)
+ return -ENOMEM;
+
+ if (sai->has_playback) {
+ dai->playback.stream_name = "Playback";
+ dai->playback.channels_min = 1;
+ dai->playback.channels_max = 512;
+ dai->playback.rates = SNDRV_PCM_RATE_8000_384000;
+ dai->playback.formats = SNDRV_PCM_FMTBIT_S8 |
+ SNDRV_PCM_FMTBIT_S16_LE |
+ SNDRV_PCM_FMTBIT_S24_LE |
+ SNDRV_PCM_FMTBIT_S32_LE |
+ SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE;
+
+ sai->playback_dma_data.addr = res->start + SAI_TXDR;
+ sai->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ sai->playback_dma_data.maxburst = MAXBURST_PER_FIFO;
+ }
+
+ if (sai->has_capture) {
+ dai->capture.stream_name = "Capture";
+ dai->capture.channels_min = 1;
+ dai->capture.channels_max = 512;
+ dai->capture.rates = SNDRV_PCM_RATE_8000_384000;
+ dai->capture.formats = SNDRV_PCM_FMTBIT_S8 |
+ SNDRV_PCM_FMTBIT_S16_LE |
+ SNDRV_PCM_FMTBIT_S24_LE |
+ SNDRV_PCM_FMTBIT_S32_LE |
+ SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE;
+
+ sai->capture_dma_data.addr = res->start + SAI_RXDR;
+ sai->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ sai->capture_dma_data.maxburst = MAXBURST_PER_FIFO;
+ }
+
+ regmap_update_bits(sai->regmap, SAI_DMACR, SAI_DMACR_TDL_MASK,
+ SAI_DMACR_TDL(16));
+ regmap_update_bits(sai->regmap, SAI_DMACR, SAI_DMACR_RDL_MASK,
+ SAI_DMACR_RDL(16));
+
+ if (dp)
+ *dp = dai;
+
+ return 0;
+}
+
+static const char * const mono_text[] = { "Disable", "Enable" };
+
+static DECLARE_TLV_DB_SCALE(rmss_tlv, 0, 128, 0);
+
+static const char * const lplrc_text[] = { "L:MIC R:LP", "L:LP R:MIC" };
+static const char * const lplr_text[] = { "Disable", "Enable" };
+
+static const char * const lpx_text[] = {
+ "From SDO0", "From SDO1", "From SDO2", "From SDO3" };
+
+static const char * const lps_text[] = { "Disable", "Enable" };
+static const char * const sync_out_text[] = { "From CRU", "From IO" };
+static const char * const sync_in_text[] = { "From IO", "From Sync Port" };
+
+static const char * const rpaths_text[] = {
+ "From SDI0", "From SDI1", "From SDI2", "From SDI3" };
+
+static const char * const tpaths_text[] = {
+ "From PATH0", "From PATH1", "From PATH2", "From PATH3" };
+
+/* MONO_CR */
+static SOC_ENUM_SINGLE_DECL(rmono_switch, SAI_MONO_CR, 1, mono_text);
+static SOC_ENUM_SINGLE_DECL(tmono_switch, SAI_MONO_CR, 0, mono_text);
+
+/* PATH_SEL */
+static SOC_ENUM_SINGLE_DECL(lp3_enum, SAI_PATH_SEL, 28, lpx_text);
+static SOC_ENUM_SINGLE_DECL(lp2_enum, SAI_PATH_SEL, 26, lpx_text);
+static SOC_ENUM_SINGLE_DECL(lp1_enum, SAI_PATH_SEL, 24, lpx_text);
+static SOC_ENUM_SINGLE_DECL(lp0_enum, SAI_PATH_SEL, 22, lpx_text);
+static SOC_ENUM_SINGLE_DECL(lp3_switch, SAI_PATH_SEL, 21, lps_text);
+static SOC_ENUM_SINGLE_DECL(lp2_switch, SAI_PATH_SEL, 20, lps_text);
+static SOC_ENUM_SINGLE_DECL(lp1_switch, SAI_PATH_SEL, 19, lps_text);
+static SOC_ENUM_SINGLE_DECL(lp0_switch, SAI_PATH_SEL, 18, lps_text);
+static SOC_ENUM_SINGLE_DECL(sync_out_switch, SAI_PATH_SEL, 17, sync_out_text);
+static SOC_ENUM_SINGLE_DECL(sync_in_switch, SAI_PATH_SEL, 16, sync_in_text);
+static SOC_ENUM_SINGLE_DECL(rpath3_enum, SAI_PATH_SEL, 14, rpaths_text);
+static SOC_ENUM_SINGLE_DECL(rpath2_enum, SAI_PATH_SEL, 12, rpaths_text);
+static SOC_ENUM_SINGLE_DECL(rpath1_enum, SAI_PATH_SEL, 10, rpaths_text);
+static SOC_ENUM_SINGLE_DECL(rpath0_enum, SAI_PATH_SEL, 8, rpaths_text);
+static SOC_ENUM_SINGLE_DECL(tpath3_enum, SAI_PATH_SEL, 6, tpaths_text);
+static SOC_ENUM_SINGLE_DECL(tpath2_enum, SAI_PATH_SEL, 4, tpaths_text);
+static SOC_ENUM_SINGLE_DECL(tpath1_enum, SAI_PATH_SEL, 2, tpaths_text);
+static SOC_ENUM_SINGLE_DECL(tpath0_enum, SAI_PATH_SEL, 0, tpaths_text);
+
+/* LOOPBACK_LR */
+static SOC_ENUM_SINGLE_DECL(lp3lrc_enum, SAI_LOOPBACK_LR, 7, lplrc_text);
+static SOC_ENUM_SINGLE_DECL(lp2lrc_enum, SAI_LOOPBACK_LR, 6, lplrc_text);
+static SOC_ENUM_SINGLE_DECL(lp1lrc_enum, SAI_LOOPBACK_LR, 5, lplrc_text);
+static SOC_ENUM_SINGLE_DECL(lp0lrc_enum, SAI_LOOPBACK_LR, 4, lplrc_text);
+static SOC_ENUM_SINGLE_DECL(lp3lr_switch, SAI_LOOPBACK_LR, 3, lplr_text);
+static SOC_ENUM_SINGLE_DECL(lp2lr_switch, SAI_LOOPBACK_LR, 2, lplr_text);
+static SOC_ENUM_SINGLE_DECL(lp1lr_switch, SAI_LOOPBACK_LR, 1, lplr_text);
+static SOC_ENUM_SINGLE_DECL(lp0lr_switch, SAI_LOOPBACK_LR, 0, lplr_text);
+
+static int rockchip_sai_wait_time_info(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+{
+ uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+ uinfo->count = 1;
+ uinfo->value.integer.min = 0;
+ uinfo->value.integer.max = WAIT_TIME_MS_MAX;
+ uinfo->value.integer.step = 1;
+
+ return 0;
+}
+
+static int rockchip_sai_rd_wait_time_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct rk_sai_dev *sai = snd_soc_component_get_drvdata(component);
+
+ ucontrol->value.integer.value[0] = sai->wait_time[SNDRV_PCM_STREAM_CAPTURE];
+
+ return 0;
+}
+
+static int rockchip_sai_rd_wait_time_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct rk_sai_dev *sai = snd_soc_component_get_drvdata(component);
+
+ if (ucontrol->value.integer.value[0] > WAIT_TIME_MS_MAX)
+ return -EINVAL;
+
+ sai->wait_time[SNDRV_PCM_STREAM_CAPTURE] = ucontrol->value.integer.value[0];
+
+ return 1;
+}
+
+static int rockchip_sai_wr_wait_time_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct rk_sai_dev *sai = snd_soc_component_get_drvdata(component);
+
+ ucontrol->value.integer.value[0] = sai->wait_time[SNDRV_PCM_STREAM_PLAYBACK];
+
+ return 0;
+}
+
+static int rockchip_sai_wr_wait_time_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct rk_sai_dev *sai = snd_soc_component_get_drvdata(component);
+
+ if (ucontrol->value.integer.value[0] > WAIT_TIME_MS_MAX)
+ return -EINVAL;
+
+ sai->wait_time[SNDRV_PCM_STREAM_PLAYBACK] = ucontrol->value.integer.value[0];
+
+ return 1;
+}
+
+#define SAI_PCM_WAIT_TIME(xname, xhandler_get, xhandler_put) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = xname, \
+ .info = rockchip_sai_wait_time_info, \
+ .get = xhandler_get, .put = xhandler_put }
+
+static const struct snd_kcontrol_new rockchip_sai_controls[] = {
+ SOC_SINGLE_TLV("Receive Mono Slot Select", SAI_MONO_CR,
+ 2, 128, 0, rmss_tlv),
+ SOC_ENUM("Receive Mono Switch", rmono_switch),
+ SOC_ENUM("Transmit Mono Switch", tmono_switch),
+
+ SOC_ENUM("SDI3 Loopback I2S LR Channel Sel", lp3lrc_enum),
+ SOC_ENUM("SDI2 Loopback I2S LR Channel Sel", lp2lrc_enum),
+ SOC_ENUM("SDI1 Loopback I2S LR Channel Sel", lp1lrc_enum),
+ SOC_ENUM("SDI0 Loopback I2S LR Channel Sel", lp0lrc_enum),
+ SOC_ENUM("SDI3 Loopback I2S LR Switch", lp3lr_switch),
+ SOC_ENUM("SDI2 Loopback I2S LR Switch", lp2lr_switch),
+ SOC_ENUM("SDI1 Loopback I2S LR Switch", lp1lr_switch),
+ SOC_ENUM("SDI0 Loopback I2S LR Switch", lp0lr_switch),
+
+ SOC_ENUM("SDI3 Loopback Src Select", lp3_enum),
+ SOC_ENUM("SDI2 Loopback Src Select", lp2_enum),
+ SOC_ENUM("SDI1 Loopback Src Select", lp1_enum),
+ SOC_ENUM("SDI0 Loopback Src Select", lp0_enum),
+ SOC_ENUM("SDI3 Loopback Switch", lp3_switch),
+ SOC_ENUM("SDI2 Loopback Switch", lp2_switch),
+ SOC_ENUM("SDI1 Loopback Switch", lp1_switch),
+ SOC_ENUM("SDI0 Loopback Switch", lp0_switch),
+ SOC_ENUM("Sync Out Switch", sync_out_switch),
+ SOC_ENUM("Sync In Switch", sync_in_switch),
+ SOC_ENUM("Receive PATH3 Source Select", rpath3_enum),
+ SOC_ENUM("Receive PATH2 Source Select", rpath2_enum),
+ SOC_ENUM("Receive PATH1 Source Select", rpath1_enum),
+ SOC_ENUM("Receive PATH0 Source Select", rpath0_enum),
+ SOC_ENUM("Transmit SDO3 Source Select", tpath3_enum),
+ SOC_ENUM("Transmit SDO2 Source Select", tpath2_enum),
+ SOC_ENUM("Transmit SDO1 Source Select", tpath1_enum),
+ SOC_ENUM("Transmit SDO0 Source Select", tpath0_enum),
+
+ SAI_PCM_WAIT_TIME("PCM Read Wait Time MS",
+ rockchip_sai_rd_wait_time_get,
+ rockchip_sai_rd_wait_time_put),
+ SAI_PCM_WAIT_TIME("PCM Write Wait Time MS",
+ rockchip_sai_wr_wait_time_get,
+ rockchip_sai_wr_wait_time_put),
+};
+
+static const struct snd_soc_component_driver rockchip_sai_component = {
+ .name = DRV_NAME,
+ .controls = rockchip_sai_controls,
+ .num_controls = ARRAY_SIZE(rockchip_sai_controls),
+ .legacy_dai_naming = 1,
+};
+
+static irqreturn_t rockchip_sai_isr(int irq, void *devid)
+{
+ struct rk_sai_dev *sai = (struct rk_sai_dev *)devid;
+ struct snd_pcm_substream *substream;
+ u32 val;
+
+ regmap_read(sai->regmap, SAI_INTSR, &val);
+ if (val & SAI_INTSR_TXUI_ACT) {
+ dev_warn_ratelimited(sai->dev, "TX FIFO Underrun\n");
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_TXUIC, SAI_INTCR_TXUIC);
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_TXUIE_MASK,
+ SAI_INTCR_TXUIE(0));
+ substream = sai->substreams[SNDRV_PCM_STREAM_PLAYBACK];
+ if (substream)
+ snd_pcm_stop_xrun(substream);
+ }
+
+ if (val & SAI_INTSR_RXOI_ACT) {
+ dev_warn_ratelimited(sai->dev, "RX FIFO Overrun\n");
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_RXOIC, SAI_INTCR_RXOIC);
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_RXOIE_MASK,
+ SAI_INTCR_RXOIE(0));
+ substream = sai->substreams[SNDRV_PCM_STREAM_CAPTURE];
+ if (substream)
+ snd_pcm_stop_xrun(substream);
+ }
+
+ if (val & SAI_INTSR_FSERRI_ACT) {
+ dev_warn_ratelimited(sai->dev, "Frame Sync Error\n");
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_FSERRC, SAI_INTCR_FSERRC);
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_FSERR_MASK,
+ SAI_INTCR_FSERR(0));
+ }
+
+ if (val & SAI_INTSR_FSLOSTI_ACT) {
+ dev_warn_ratelimited(sai->dev, "Frame Sync Lost\n");
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_FSLOSTC, SAI_INTCR_FSLOSTC);
+ regmap_update_bits(sai->regmap, SAI_INTCR,
+ SAI_INTCR_FSLOST_MASK,
+ SAI_INTCR_FSLOST(0));
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int rockchip_sai_probe(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct rk_sai_dev *sai;
+ struct snd_soc_dai_driver *dai;
+ struct resource *res;
+ void __iomem *regs;
+ int ret, irq;
+
+ sai = devm_kzalloc(&pdev->dev, sizeof(*sai), GFP_KERNEL);
+ if (!sai)
+ return -ENOMEM;
+
+ sai->dev = &pdev->dev;
+ sai->fw_ratio = 1;
+ /* match to register default */
+ sai->is_master_mode = true;
+ dev_set_drvdata(&pdev->dev, sai);
+
+ spin_lock_init(&sai->xfer_lock);
+
+ sai->rst_h = devm_reset_control_get_optional_exclusive(&pdev->dev, "h");
+ if (IS_ERR(sai->rst_h))
+ return dev_err_probe(&pdev->dev, PTR_ERR(sai->rst_h),
+ "Error in 'h' reset control\n");
+
+ sai->rst_m = devm_reset_control_get_optional_exclusive(&pdev->dev, "m");
+ if (IS_ERR(sai->rst_m))
+ return dev_err_probe(&pdev->dev, PTR_ERR(sai->rst_m),
+ "Error in 'm' reset control\n");
+
+ regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(regs))
+ return dev_err_probe(&pdev->dev, PTR_ERR(regs),
+ "Failed to get and ioremap resource\n");
+
+ sai->regmap = devm_regmap_init_mmio(&pdev->dev, regs,
+ &rockchip_sai_regmap_config);
+ if (IS_ERR(sai->regmap))
+ return dev_err_probe(&pdev->dev, PTR_ERR(sai->regmap),
+ "Failed to initialize regmap\n");
+
+ irq = platform_get_irq_optional(pdev, 0);
+ if (irq > 0) {
+ ret = devm_request_irq(&pdev->dev, irq, rockchip_sai_isr,
+ IRQF_SHARED, node->name, sai);
+ if (ret) {
+ return dev_err_probe(&pdev->dev, ret,
+ "Failed to request irq %d\n", irq);
+ }
+ } else {
+ dev_dbg(&pdev->dev, "Asked for an IRQ but got %d\n", irq);
+ }
+
+ sai->mclk = devm_clk_get(&pdev->dev, "mclk");
+ if (IS_ERR(sai->mclk)) {
+ return dev_err_probe(&pdev->dev, PTR_ERR(sai->mclk),
+ "Failed to get mclk\n");
+ }
+
+ sai->hclk = devm_clk_get(&pdev->dev, "hclk");
+ if (IS_ERR(sai->hclk)) {
+ return dev_err_probe(&pdev->dev, PTR_ERR(sai->hclk),
+ "Failed to get hclk\n");
+ }
+
+ ret = clk_prepare_enable(sai->hclk);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to enable hclk\n");
+
+ regmap_read(sai->regmap, SAI_VERSION, &sai->version);
+
+ ret = rockchip_sai_init_dai(sai, res, &dai);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to initialize DAI: %d\n", ret);
+ goto err_disable_hclk;
+ }
+
+ ret = rockchip_sai_parse_paths(sai, node);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to parse paths: %d\n", ret);
+ goto err_disable_hclk;
+ }
+
+ /*
+ * From here on, all register accesses need to be wrapped in
+ * pm_runtime_get_sync/pm_runtime_put calls
+ *
+ * NB: we don't rely on _resume_and_get in case of !CONFIG_PM
+ */
+ devm_pm_runtime_enable(&pdev->dev);
+ pm_runtime_get_noresume(&pdev->dev);
+ ret = rockchip_sai_runtime_resume(&pdev->dev);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to resume device: %pe\n", ERR_PTR(ret));
+ goto err_disable_hclk;
+ }
+
+ ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register PCM: %d\n", ret);
+ goto err_runtime_suspend;
+ }
+
+ ret = devm_snd_soc_register_component(&pdev->dev,
+ &rockchip_sai_component,
+ dai, 1);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register component: %d\n", ret);
+ goto err_runtime_suspend;
+ }
+
+ pm_runtime_use_autosuspend(&pdev->dev);
+ pm_runtime_put(&pdev->dev);
+
+ clk_disable_unprepare(sai->hclk);
+
+ return 0;
+
+err_runtime_suspend:
+ /* If we're !CONFIG_PM, we get -ENOSYS and disable manually */
+ if (pm_runtime_put(&pdev->dev))
+ rockchip_sai_runtime_suspend(&pdev->dev);
+err_disable_hclk:
+ clk_disable_unprepare(sai->hclk);
+
+ return ret;
+}
+
+static void rockchip_sai_remove(struct platform_device *pdev)
+{
+#ifndef CONFIG_PM
+ rockchip_sai_runtime_suspend(&pdev->dev);
+#endif
+}
+
+static const struct dev_pm_ops rockchip_sai_pm_ops = {
+ SET_RUNTIME_PM_OPS(rockchip_sai_runtime_suspend, rockchip_sai_runtime_resume, NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+};
+
+static const struct of_device_id rockchip_sai_match[] = {
+ { .compatible = "rockchip,rk3576-sai", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, rockchip_sai_match);
+
+static struct platform_driver rockchip_sai_driver = {
+ .probe = rockchip_sai_probe,
+ .remove = rockchip_sai_remove,
+ .driver = {
+ .name = DRV_NAME,
+ .of_match_table = rockchip_sai_match,
+ .pm = &rockchip_sai_pm_ops,
+ },
+};
+module_platform_driver(rockchip_sai_driver);
+
+MODULE_DESCRIPTION("Rockchip SAI ASoC Interface");
+MODULE_AUTHOR("Sugar Zhang <sugar.zhang@rock-chips.com>");
+MODULE_AUTHOR("Nicolas Frattaroli <nicolas.frattaroli@collabora.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/rockchip/rockchip_sai.h b/sound/soc/rockchip/rockchip_sai.h
new file mode 100644
index 000000000000..c359c5d0311c
--- /dev/null
+++ b/sound/soc/rockchip/rockchip_sai.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * ALSA SoC Audio Layer - Rockchip SAI Controller driver
+ *
+ * Copyright (c) 2022 Rockchip Electronics Co. Ltd.
+ */
+
+#ifndef _ROCKCHIP_SAI_H
+#define _ROCKCHIP_SAI_H
+
+/* XCR Transmit / Receive Control Register */
+#define SAI_XCR_START_SEL_MASK BIT(23)
+#define SAI_XCR_START_SEL_CHAINED BIT(23)
+#define SAI_XCR_START_SEL_STANDALONE 0
+#define SAI_XCR_EDGE_SHIFT_MASK BIT(22)
+#define SAI_XCR_EDGE_SHIFT_1 BIT(22)
+#define SAI_XCR_EDGE_SHIFT_0 0
+#define SAI_XCR_CSR_MASK GENMASK(21, 20)
+#define SAI_XCR_CSR(x) ((x - 1) << 20)
+#define SAI_XCR_CSR_V(v) ((((v) & SAI_XCR_CSR_MASK) >> 20) + 1)
+#define SAI_XCR_SJM_MASK BIT(19)
+#define SAI_XCR_SJM_L BIT(19)
+#define SAI_XCR_SJM_R 0
+#define SAI_XCR_FBM_MASK BIT(18)
+#define SAI_XCR_FBM_LSB BIT(18)
+#define SAI_XCR_FBM_MSB 0
+#define SAI_XCR_SNB_MASK GENMASK(17, 11)
+#define SAI_XCR_SNB(x) ((x - 1) << 11)
+#define SAI_XCR_VDJ_MASK BIT(10)
+#define SAI_XCR_VDJ_L BIT(10)
+#define SAI_XCR_VDJ_R 0
+#define SAI_XCR_SBW_MASK GENMASK(9, 5)
+#define SAI_XCR_SBW(x) ((x - 1) << 5)
+#define SAI_XCR_SBW_V(v) ((((v) & SAI_XCR_SBW_MASK) >> 5) + 1)
+#define SAI_XCR_VDW_MASK GENMASK(4, 0)
+#define SAI_XCR_VDW(x) ((x - 1) << 0)
+
+/* FSCR Frame Sync Control Register */
+#define SAI_FSCR_EDGE_MASK BIT(24)
+#define SAI_FSCR_EDGE_DUAL BIT(24)
+#define SAI_FSCR_EDGE_RISING 0
+#define SAI_FSCR_FPW_MASK GENMASK(23, 12)
+#define SAI_FSCR_FPW(x) ((x - 1) << 12)
+#define SAI_FSCR_FW_MASK GENMASK(11, 0)
+#define SAI_FSCR_FW(x) ((x - 1) << 0)
+#define SAI_FSCR_FW_V(v) ((((v) & SAI_FSCR_FW_MASK) >> 0) + 1)
+
+/* MONO_CR Mono Control Register */
+#define SAI_MCR_RX_MONO_SLOT_MASK GENMASK(8, 2)
+#define SAI_MCR_RX_MONO_SLOT_SEL(x) ((x - 1) << 2)
+#define SAI_MCR_RX_MONO_MASK BIT(1)
+#define SAI_MCR_RX_MONO_EN BIT(1)
+#define SAI_MCR_RX_MONO_DIS 0
+#define SAI_MCR_TX_MONO_MASK BIT(0)
+#define SAI_MCR_TX_MONO_EN BIT(0)
+#define SAI_MCR_TX_MONO_DIS 0
+
+/* XFER Transfer Start Register */
+#define SAI_XFER_RX_IDLE BIT(8)
+#define SAI_XFER_TX_IDLE BIT(7)
+#define SAI_XFER_FS_IDLE BIT(6)
+/*
+ * Used for TX only (VERSION >= SAI_VER_2311)
+ *
+ * SCLK/FSYNC auto gated when TX FIFO empty.
+ */
+#define SAI_XFER_TX_AUTO_MASK BIT(6)
+#define SAI_XFER_TX_AUTO_EN BIT(6)
+#define SAI_XFER_TX_AUTO_DIS 0
+#define SAI_XFER_RX_CNT_MASK BIT(5)
+#define SAI_XFER_RX_CNT_EN BIT(5)
+#define SAI_XFER_RX_CNT_DIS 0
+#define SAI_XFER_TX_CNT_MASK BIT(4)
+#define SAI_XFER_TX_CNT_EN BIT(4)
+#define SAI_XFER_TX_CNT_DIS 0
+#define SAI_XFER_RXS_MASK BIT(3)
+#define SAI_XFER_RXS_EN BIT(3)
+#define SAI_XFER_RXS_DIS 0
+#define SAI_XFER_TXS_MASK BIT(2)
+#define SAI_XFER_TXS_EN BIT(2)
+#define SAI_XFER_TXS_DIS 0
+#define SAI_XFER_FSS_MASK BIT(1)
+#define SAI_XFER_FSS_EN BIT(1)
+#define SAI_XFER_FSS_DIS 0
+#define SAI_XFER_CLK_MASK BIT(0)
+#define SAI_XFER_CLK_EN BIT(0)
+#define SAI_XFER_CLK_DIS 0
+
+/* CLR Clear Logic Register */
+#define SAI_CLR_FCR BIT(3) /* TODO: what is this? */
+#define SAI_CLR_FSC BIT(2)
+#define SAI_CLR_RXC BIT(1)
+#define SAI_CLR_TXC BIT(0)
+
+/* CKR Clock Generation Register */
+#define SAI_CKR_MDIV_MASK GENMASK(14, 3)
+#define SAI_CKR_MDIV(x) ((x - 1) << 3)
+#define SAI_CKR_MSS_MASK BIT(2)
+#define SAI_CKR_MSS_SLAVE BIT(2)
+#define SAI_CKR_MSS_MASTER 0
+#define SAI_CKR_CKP_MASK BIT(1)
+#define SAI_CKR_CKP_INVERTED BIT(1)
+#define SAI_CKR_CKP_NORMAL 0
+#define SAI_CKR_FSP_MASK BIT(0)
+#define SAI_CKR_FSP_INVERTED BIT(0)
+#define SAI_CKR_FSP_NORMAL 0
+
+/* DMACR DMA Control Register */
+#define SAI_DMACR_RDE_MASK BIT(24)
+#define SAI_DMACR_RDE(x) ((x) << 24)
+#define SAI_DMACR_RDL_MASK GENMASK(20, 16)
+#define SAI_DMACR_RDL(x) ((x - 1) << 16)
+#define SAI_DMACR_RDL_V(v) ((((v) & SAI_DMACR_RDL_MASK) >> 16) + 1)
+#define SAI_DMACR_TDE_MASK BIT(8)
+#define SAI_DMACR_TDE(x) ((x) << 8)
+#define SAI_DMACR_TDL_MASK GENMASK(4, 0)
+#define SAI_DMACR_TDL(x) ((x) << 0)
+#define SAI_DMACR_TDL_V(v) (((v) & SAI_DMACR_TDL_MASK) >> 0)
+
+/* INTCR Interrupt Ctrl Register */
+#define SAI_INTCR_FSLOSTC BIT(28)
+#define SAI_INTCR_FSLOST_MASK BIT(27)
+#define SAI_INTCR_FSLOST(x) ((x) << 27)
+#define SAI_INTCR_FSERRC BIT(26)
+#define SAI_INTCR_FSERR_MASK BIT(25)
+#define SAI_INTCR_FSERR(x) ((x) << 25)
+#define SAI_INTCR_RXOIC BIT(18)
+#define SAI_INTCR_RXOIE_MASK BIT(17)
+#define SAI_INTCR_RXOIE(x) ((x) << 17)
+#define SAI_INTCR_TXUIC BIT(2)
+#define SAI_INTCR_TXUIE_MASK BIT(1)
+#define SAI_INTCR_TXUIE(x) ((x) << 1)
+
+/* INTSR Interrupt Status Register */
+#define SAI_INTSR_FSLOSTI_INA 0
+#define SAI_INTSR_FSLOSTI_ACT BIT(19)
+#define SAI_INTSR_FSERRI_INA 0
+#define SAI_INTSR_FSERRI_ACT BIT(18)
+#define SAI_INTSR_RXOI_INA 0
+#define SAI_INTSR_RXOI_ACT BIT(17)
+#define SAI_INTSR_TXUI_INA 0
+#define SAI_INTSR_TXUI_ACT BIT(1)
+
+/* PATH_SEL: Transfer / Receive Path Select Register */
+#define SAI_RX_PATH_SHIFT(x) (8 + (x) * 2)
+#define SAI_RX_PATH_MASK(x) (0x3 << SAI_RX_PATH_SHIFT(x))
+#define SAI_RX_PATH(x, v) ((v) << SAI_RX_PATH_SHIFT(x))
+#define SAI_TX_PATH_SHIFT(x) (0 + (x) * 2)
+#define SAI_TX_PATH_MASK(x) (0x3 << SAI_TX_PATH_SHIFT(x))
+#define SAI_TX_PATH(x, v) ((v) << SAI_TX_PATH_SHIFT(x))
+
+/* XSHIFT: Transfer / Receive Frame Sync Shift Register */
+
+/*
+ * TX-ONLY: LEFT Direction Feature
+ * +------------------------------------------------+
+ * | DATA LEFTx (step: 0.5 cycle) | FSYNC Edge |
+ * +------------------------------------------------+
+ */
+#define SAI_XSHIFT_LEFT_MASK GENMASK(25, 24)
+#define SAI_XSHIFT_LEFT(x) ((x) << 24)
+/*
+ * +------------------------------------------------+
+ * | FSYNC Edge | DATA RIGHTx (step: 0.5 cycle) |
+ * +------------------------------------------------+
+ */
+#define SAI_XSHIFT_RIGHT_MASK GENMASK(23, 0)
+#define SAI_XSHIFT_RIGHT(x) (x)
+
+/* XFIFOLR: Transfer / Receive FIFO Level Register */
+#define SAI_FIFOLR_XFL3_SHIFT 18
+#define SAI_FIFOLR_XFL3_MASK GENMASK(23, 18)
+#define SAI_FIFOLR_XFL2_SHIFT 12
+#define SAI_FIFOLR_XFL2_MASK GENMASK(17, 12)
+#define SAI_FIFOLR_XFL1_SHIFT 6
+#define SAI_FIFOLR_XFL1_MASK GENMASK(11, 6)
+#define SAI_FIFOLR_XFL0_SHIFT 0
+#define SAI_FIFOLR_XFL0_MASK GENMASK(5, 0)
+
+/* STATUS Status Register (VERSION >= SAI_VER_2307) */
+#define SAI_STATUS_RX_IDLE BIT(3)
+#define SAI_STATUS_TX_IDLE BIT(2)
+#define SAI_STATUS_FS_IDLE BIT(1)
+
+/* VERSION */
+/*
+ * Updates:
+ *
+ * VERSION >= SAI_VER_2311
+ *
+ * Support Frame Sync xN (FSXN)
+ * Support Frame Sync Error Detect (FSE)
+ * Support Frame Sync Lost Detect (FSLOST)
+ * Support Force Clear (FCR)
+ * Support SAIn-Chained (e.g. SAI0-CLK-DATA + SAI3-DATA +...)
+ * Support Transmit Auto Gate Mode
+ * Support Timing Shift Left for TX
+ *
+ * Optimize SCLK/FSYNC Timing Alignment
+ *
+ * VERSION >= SAI_VER_2403
+ *
+ * Support Loopback LR Select (e.g. L:MIC R:LP)
+ *
+ */
+#define SAI_VER_2307 0x23073576
+#define SAI_VER_2311 0x23112118
+#define SAI_VER_2401 0x24013506
+#define SAI_VER_2403 0x24031103
+
+/* FS_TIMEOUT: Frame Sync Timeout Register */
+#define SAI_FS_TIMEOUT_VAL_MASK GENMASK(31, 1)
+#define SAI_FS_TIMEOUT_VAL(x) ((x) << 1)
+#define SAI_FS_TIMEOUT_EN_MASK BIT(0)
+#define SAI_FS_TIMEOUT_EN(x) ((x) << 0)
+
+/* SAI Registers */
+#define SAI_TXCR (0x0000)
+#define SAI_FSCR (0x0004)
+#define SAI_RXCR (0x0008)
+#define SAI_MONO_CR (0x000c)
+#define SAI_XFER (0x0010)
+#define SAI_CLR (0x0014)
+#define SAI_CKR (0x0018)
+#define SAI_TXFIFOLR (0x001c)
+#define SAI_RXFIFOLR (0x0020)
+#define SAI_DMACR (0x0024)
+#define SAI_INTCR (0x0028)
+#define SAI_INTSR (0x002c)
+#define SAI_TXDR (0x0030)
+#define SAI_RXDR (0x0034)
+#define SAI_PATH_SEL (0x0038)
+#define SAI_TX_SLOT_MASK0 (0x003c)
+#define SAI_TX_SLOT_MASK1 (0x0040)
+#define SAI_TX_SLOT_MASK2 (0x0044)
+#define SAI_TX_SLOT_MASK3 (0x0048)
+#define SAI_RX_SLOT_MASK0 (0x004c)
+#define SAI_RX_SLOT_MASK1 (0x0050)
+#define SAI_RX_SLOT_MASK2 (0x0054)
+#define SAI_RX_SLOT_MASK3 (0x0058)
+#define SAI_TX_DATA_CNT (0x005c)
+#define SAI_RX_DATA_CNT (0x0060)
+#define SAI_TX_SHIFT (0x0064)
+#define SAI_RX_SHIFT (0x0068)
+#define SAI_STATUS (0x006c)
+#define SAI_VERSION (0x0070)
+#define SAI_FSXN (0x0074)
+#define SAI_FS_TIMEOUT (0x0078)
+#define SAI_LOOPBACK_LR (0x007c)
+
+#endif /* _ROCKCHIP_SAI_H */
diff --git a/sound/soc/sdca/Makefile b/sound/soc/sdca/Makefile
index dddc3e694256..53344f108ca6 100644
--- a/sound/soc/sdca/Makefile
+++ b/sound/soc/sdca/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-snd-soc-sdca-y := sdca_functions.o sdca_device.o sdca_regmap.o
+snd-soc-sdca-y := sdca_functions.o sdca_device.o sdca_regmap.o sdca_asoc.o
obj-$(CONFIG_SND_SOC_SDCA) += snd-soc-sdca.o
diff --git a/sound/soc/sdca/sdca_asoc.c b/sound/soc/sdca/sdca_asoc.c
new file mode 100644
index 000000000000..7bc8f6069f3d
--- /dev/null
+++ b/sound/soc/sdca/sdca_asoc.c
@@ -0,0 +1,1311 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2025 Cirrus Logic, Inc. and
+// Cirrus Logic International Semiconductor Ltd.
+
+/*
+ * The MIPI SDCA specification is available for public downloads at
+ * https://www.mipi.org/mipi-sdca-v1-0-download
+ */
+
+#include <linux/bitmap.h>
+#include <linux/delay.h>
+#include <linux/dev_printk.h>
+#include <linux/device.h>
+#include <linux/minmax.h>
+#include <linux/module.h>
+#include <linux/overflow.h>
+#include <linux/soundwire/sdw_registers.h>
+#include <linux/string_helpers.h>
+#include <sound/control.h>
+#include <sound/sdca.h>
+#include <sound/sdca_asoc.h>
+#include <sound/sdca_function.h>
+#include <sound/soc.h>
+#include <sound/soc-component.h>
+#include <sound/soc-dai.h>
+#include <sound/soc-dapm.h>
+#include <sound/tlv.h>
+
+static struct sdca_control *selector_find_control(struct device *dev,
+ struct sdca_entity *entity,
+ const int sel)
+{
+ int i;
+
+ for (i = 0; i < entity->num_controls; i++) {
+ struct sdca_control *control = &entity->controls[i];
+
+ if (control->sel == sel)
+ return control;
+ }
+
+ dev_err(dev, "%s: control %#x: missing\n", entity->label, sel);
+ return NULL;
+}
+
+static struct sdca_control_range *control_find_range(struct device *dev,
+ struct sdca_entity *entity,
+ struct sdca_control *control,
+ int cols, int rows)
+{
+ struct sdca_control_range *range = &control->range;
+
+ if ((cols && range->cols != cols) || (rows && range->rows != rows) ||
+ !range->data) {
+ dev_err(dev, "%s: control %#x: ranges invalid (%d,%d)\n",
+ entity->label, control->sel, range->cols, range->rows);
+ return NULL;
+ }
+
+ return range;
+}
+
+static struct sdca_control_range *selector_find_range(struct device *dev,
+ struct sdca_entity *entity,
+ int sel, int cols, int rows)
+{
+ struct sdca_control *control;
+
+ control = selector_find_control(dev, entity, sel);
+ if (!control)
+ return NULL;
+
+ return control_find_range(dev, entity, control, cols, rows);
+}
+
+static bool exported_control(struct sdca_entity *entity, struct sdca_control *control)
+{
+ switch (SDCA_CTL_TYPE(entity->type, control->sel)) {
+ case SDCA_CTL_TYPE_S(GE, DETECTED_MODE):
+ return true;
+ default:
+ break;
+ }
+
+ return control->layers & (SDCA_ACCESS_LAYER_USER |
+ SDCA_ACCESS_LAYER_APPLICATION);
+}
+
+static bool readonly_control(struct sdca_control *control)
+{
+ return control->has_fixed || control->mode == SDCA_ACCESS_MODE_RO;
+}
+
+/**
+ * sdca_asoc_count_component - count the various component parts
+ * @function: Pointer to the Function information.
+ * @num_widgets: Output integer pointer, will be filled with the
+ * required number of DAPM widgets for the Function.
+ * @num_routes: Output integer pointer, will be filled with the
+ * required number of DAPM routes for the Function.
+ * @num_controls: Output integer pointer, will be filled with the
+ * required number of ALSA controls for the Function.
+ * @num_dais: Output integer pointer, will be filled with the
+ * required number of ASoC DAIs for the Function.
+ *
+ * This function counts various things within the SDCA Function such
+ * that the calling driver can allocate appropriate space before
+ * calling the appropriate population functions.
+ *
+ * Return: Returns zero on success, and a negative error code on failure.
+ */
+int sdca_asoc_count_component(struct device *dev, struct sdca_function_data *function,
+ int *num_widgets, int *num_routes, int *num_controls,
+ int *num_dais)
+{
+ int i, j;
+
+ *num_widgets = function->num_entities - 1;
+ *num_routes = 0;
+ *num_controls = 0;
+ *num_dais = 0;
+
+ for (i = 0; i < function->num_entities - 1; i++) {
+ struct sdca_entity *entity = &function->entities[i];
+
+ /* Add supply/DAI widget connections */
+ switch (entity->type) {
+ case SDCA_ENTITY_TYPE_IT:
+ case SDCA_ENTITY_TYPE_OT:
+ *num_routes += !!entity->iot.clock;
+ *num_routes += !!entity->iot.is_dataport;
+ *num_controls += !entity->iot.is_dataport;
+ *num_dais += !!entity->iot.is_dataport;
+ break;
+ case SDCA_ENTITY_TYPE_PDE:
+ *num_routes += entity->pde.num_managed;
+ break;
+ default:
+ break;
+ }
+
+ if (entity->group)
+ (*num_routes)++;
+
+ /* Add primary entity connections from DisCo */
+ *num_routes += entity->num_sources;
+
+ for (j = 0; j < entity->num_controls; j++) {
+ if (exported_control(entity, &entity->controls[j]))
+ (*num_controls)++;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS(sdca_asoc_count_component, "SND_SOC_SDCA");
+
+static const char *get_terminal_name(enum sdca_terminal_type type)
+{
+ switch (type) {
+ case SDCA_TERM_TYPE_LINEIN_STEREO:
+ return SDCA_TERM_TYPE_LINEIN_STEREO_NAME;
+ case SDCA_TERM_TYPE_LINEIN_FRONT_LR:
+ return SDCA_TERM_TYPE_LINEIN_FRONT_LR_NAME;
+ case SDCA_TERM_TYPE_LINEIN_CENTER_LFE:
+ return SDCA_TERM_TYPE_LINEIN_CENTER_LFE_NAME;
+ case SDCA_TERM_TYPE_LINEIN_SURROUND_LR:
+ return SDCA_TERM_TYPE_LINEIN_SURROUND_LR_NAME;
+ case SDCA_TERM_TYPE_LINEIN_REAR_LR:
+ return SDCA_TERM_TYPE_LINEIN_REAR_LR_NAME;
+ case SDCA_TERM_TYPE_LINEOUT_STEREO:
+ return SDCA_TERM_TYPE_LINEOUT_STEREO_NAME;
+ case SDCA_TERM_TYPE_LINEOUT_FRONT_LR:
+ return SDCA_TERM_TYPE_LINEOUT_FRONT_LR_NAME;
+ case SDCA_TERM_TYPE_LINEOUT_CENTER_LFE:
+ return SDCA_TERM_TYPE_LINEOUT_CENTER_LFE_NAME;
+ case SDCA_TERM_TYPE_LINEOUT_SURROUND_LR:
+ return SDCA_TERM_TYPE_LINEOUT_SURROUND_LR_NAME;
+ case SDCA_TERM_TYPE_LINEOUT_REAR_LR:
+ return SDCA_TERM_TYPE_LINEOUT_REAR_LR_NAME;
+ case SDCA_TERM_TYPE_MIC_JACK:
+ return SDCA_TERM_TYPE_MIC_JACK_NAME;
+ case SDCA_TERM_TYPE_STEREO_JACK:
+ return SDCA_TERM_TYPE_STEREO_JACK_NAME;
+ case SDCA_TERM_TYPE_FRONT_LR_JACK:
+ return SDCA_TERM_TYPE_FRONT_LR_JACK_NAME;
+ case SDCA_TERM_TYPE_CENTER_LFE_JACK:
+ return SDCA_TERM_TYPE_CENTER_LFE_JACK_NAME;
+ case SDCA_TERM_TYPE_SURROUND_LR_JACK:
+ return SDCA_TERM_TYPE_SURROUND_LR_JACK_NAME;
+ case SDCA_TERM_TYPE_REAR_LR_JACK:
+ return SDCA_TERM_TYPE_REAR_LR_JACK_NAME;
+ case SDCA_TERM_TYPE_HEADPHONE_JACK:
+ return SDCA_TERM_TYPE_HEADPHONE_JACK_NAME;
+ case SDCA_TERM_TYPE_HEADSET_JACK:
+ return SDCA_TERM_TYPE_HEADSET_JACK_NAME;
+ default:
+ return NULL;
+ }
+}
+
+static int entity_early_parse_ge(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity)
+{
+ struct sdca_control_range *range;
+ struct sdca_control *control;
+ struct snd_kcontrol_new *kctl;
+ struct soc_enum *soc_enum;
+ const char *control_name;
+ unsigned int *values;
+ const char **texts;
+ int i;
+
+ control = selector_find_control(dev, entity, SDCA_CTL_GE_SELECTED_MODE);
+ if (!control)
+ return -EINVAL;
+
+ if (control->layers != SDCA_ACCESS_LAYER_CLASS)
+ dev_warn(dev, "%s: unexpected access layer: %x\n",
+ entity->label, control->layers);
+
+ range = control_find_range(dev, entity, control, SDCA_SELECTED_MODE_NCOLS, 0);
+ if (!range)
+ return -EINVAL;
+
+ control_name = devm_kasprintf(dev, GFP_KERNEL, "%s %s",
+ entity->label, control->label);
+ if (!control_name)
+ return -ENOMEM;
+
+ kctl = devm_kmalloc(dev, sizeof(*kctl), GFP_KERNEL);
+ if (!kctl)
+ return -ENOMEM;
+
+ soc_enum = devm_kmalloc(dev, sizeof(*soc_enum), GFP_KERNEL);
+ if (!soc_enum)
+ return -ENOMEM;
+
+ texts = devm_kcalloc(dev, range->rows + 3, sizeof(*texts), GFP_KERNEL);
+ if (!texts)
+ return -ENOMEM;
+
+ values = devm_kcalloc(dev, range->rows + 3, sizeof(*values), GFP_KERNEL);
+ if (!values)
+ return -ENOMEM;
+
+ texts[0] = "No Jack";
+ texts[1] = "Jack Unknown";
+ texts[2] = "Detection in Progress";
+ values[0] = 0;
+ values[1] = 1;
+ values[2] = 2;
+ for (i = 0; i < range->rows; i++) {
+ enum sdca_terminal_type type;
+
+ type = sdca_range(range, SDCA_SELECTED_MODE_TERM_TYPE, i);
+
+ values[i + 3] = sdca_range(range, SDCA_SELECTED_MODE_INDEX, i);
+ texts[i + 3] = get_terminal_name(type);
+ if (!texts[i + 3]) {
+ dev_err(dev, "%s: unrecognised terminal type: %#x\n",
+ entity->label, type);
+ return -EINVAL;
+ }
+ }
+
+ soc_enum->reg = SDW_SDCA_CTL(function->desc->adr, entity->id, control->sel, 0);
+ soc_enum->items = range->rows + 3;
+ soc_enum->mask = roundup_pow_of_two(soc_enum->items) - 1;
+ soc_enum->texts = texts;
+ soc_enum->values = values;
+
+ kctl->iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+ kctl->name = control_name;
+ kctl->info = snd_soc_info_enum_double;
+ kctl->get = snd_soc_dapm_get_enum_double;
+ kctl->put = snd_soc_dapm_put_enum_double;
+ kctl->private_value = (unsigned long)soc_enum;
+
+ entity->ge.kctl = kctl;
+
+ return 0;
+}
+
+static void add_route(struct snd_soc_dapm_route **route, const char *sink,
+ const char *control, const char *source)
+{
+ (*route)->sink = sink;
+ (*route)->control = control;
+ (*route)->source = source;
+ (*route)++;
+}
+
+static int entity_parse_simple(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity,
+ struct snd_soc_dapm_widget **widget,
+ struct snd_soc_dapm_route **route,
+ enum snd_soc_dapm_type id)
+{
+ int i;
+
+ (*widget)->id = id;
+ (*widget)++;
+
+ for (i = 0; i < entity->num_sources; i++)
+ add_route(route, entity->label, NULL, entity->sources[i]->label);
+
+ return 0;
+}
+
+static int entity_parse_it(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity,
+ struct snd_soc_dapm_widget **widget,
+ struct snd_soc_dapm_route **route)
+{
+ int i;
+
+ if (entity->iot.is_dataport) {
+ const char *aif_name = devm_kasprintf(dev, GFP_KERNEL, "%s %s",
+ entity->label, "Playback");
+ if (!aif_name)
+ return -ENOMEM;
+
+ (*widget)->id = snd_soc_dapm_aif_in;
+
+ add_route(route, entity->label, NULL, aif_name);
+ } else {
+ (*widget)->id = snd_soc_dapm_mic;
+ }
+
+ if (entity->iot.clock)
+ add_route(route, entity->label, NULL, entity->iot.clock->label);
+
+ for (i = 0; i < entity->num_sources; i++)
+ add_route(route, entity->label, NULL, entity->sources[i]->label);
+
+ (*widget)++;
+
+ return 0;
+}
+
+static int entity_parse_ot(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity,
+ struct snd_soc_dapm_widget **widget,
+ struct snd_soc_dapm_route **route)
+{
+ int i;
+
+ if (entity->iot.is_dataport) {
+ const char *aif_name = devm_kasprintf(dev, GFP_KERNEL, "%s %s",
+ entity->label, "Capture");
+ if (!aif_name)
+ return -ENOMEM;
+
+ (*widget)->id = snd_soc_dapm_aif_out;
+
+ add_route(route, aif_name, NULL, entity->label);
+ } else {
+ (*widget)->id = snd_soc_dapm_spk;
+ }
+
+ if (entity->iot.clock)
+ add_route(route, entity->label, NULL, entity->iot.clock->label);
+
+ for (i = 0; i < entity->num_sources; i++)
+ add_route(route, entity->label, NULL, entity->sources[i]->label);
+
+ (*widget)++;
+
+ return 0;
+}
+
+static int entity_pde_event(struct snd_soc_dapm_widget *widget,
+ struct snd_kcontrol *kctl, int event)
+{
+ struct snd_soc_component *component = widget->dapm->component;
+ struct sdca_entity *entity = widget->priv;
+ static const int polls = 100;
+ unsigned int reg, val;
+ int from, to, i;
+ int poll_us;
+ int ret;
+
+ if (!component)
+ return -EIO;
+
+ switch (event) {
+ case SND_SOC_DAPM_POST_PMD:
+ from = widget->on_val;
+ to = widget->off_val;
+ break;
+ case SND_SOC_DAPM_POST_PMU:
+ from = widget->off_val;
+ to = widget->on_val;
+ break;
+ }
+
+ for (i = 0; i < entity->pde.num_max_delay; i++) {
+ struct sdca_pde_delay *delay = &entity->pde.max_delay[i];
+
+ if (delay->from_ps == from && delay->to_ps == to) {
+ poll_us = delay->us / polls;
+ break;
+ }
+ }
+
+ reg = SDW_SDCA_CTL(SDW_SDCA_CTL_FUNC(widget->reg),
+ SDW_SDCA_CTL_ENT(widget->reg),
+ SDCA_CTL_PDE_ACTUAL_PS, 0);
+
+ for (i = 0; i < polls; i++) {
+ if (i)
+ fsleep(poll_us);
+
+ ret = regmap_read(component->regmap, reg, &val);
+ if (ret)
+ return ret;
+ else if (val == to)
+ return 0;
+ }
+
+ dev_err(component->dev, "%s: power transition failed: %x\n",
+ entity->label, val);
+ return -ETIMEDOUT;
+}
+
+static int entity_parse_pde(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity,
+ struct snd_soc_dapm_widget **widget,
+ struct snd_soc_dapm_route **route)
+{
+ unsigned int target = (1 << SDCA_PDE_PS0) | (1 << SDCA_PDE_PS3);
+ struct sdca_control_range *range;
+ struct sdca_control *control;
+ unsigned int mask = 0;
+ int i;
+
+ control = selector_find_control(dev, entity, SDCA_CTL_PDE_REQUESTED_PS);
+ if (!control)
+ return -EINVAL;
+
+ /* Power should only be controlled by the driver */
+ if (control->layers != SDCA_ACCESS_LAYER_CLASS)
+ dev_warn(dev, "%s: unexpected access layer: %x\n",
+ entity->label, control->layers);
+
+ range = control_find_range(dev, entity, control, SDCA_REQUESTED_PS_NCOLS, 0);
+ if (!range)
+ return -EINVAL;
+
+ for (i = 0; i < range->rows; i++)
+ mask |= 1 << sdca_range(range, SDCA_REQUESTED_PS_STATE, i);
+
+ if ((mask & target) != target) {
+ dev_err(dev, "%s: power control missing states\n", entity->label);
+ return -EINVAL;
+ }
+
+ (*widget)->id = snd_soc_dapm_supply;
+ (*widget)->reg = SDW_SDCA_CTL(function->desc->adr, entity->id, control->sel, 0);
+ (*widget)->mask = GENMASK(control->nbits - 1, 0);
+ (*widget)->on_val = SDCA_PDE_PS0;
+ (*widget)->off_val = SDCA_PDE_PS3;
+ (*widget)->event_flags = SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD;
+ (*widget)->event = entity_pde_event;
+ (*widget)->priv = entity;
+ (*widget)++;
+
+ for (i = 0; i < entity->pde.num_managed; i++)
+ add_route(route, entity->pde.managed[i]->label, NULL, entity->label);
+
+ for (i = 0; i < entity->num_sources; i++)
+ add_route(route, entity->label, NULL, entity->sources[i]->label);
+
+ return 0;
+}
+
+/* Device selector units are controlled through a group entity */
+static int entity_parse_su_device(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity,
+ struct snd_soc_dapm_widget **widget,
+ struct snd_soc_dapm_route **route)
+{
+ struct sdca_control_range *range;
+ int num_routes = 0;
+ int i, j;
+
+ if (!entity->group) {
+ dev_err(dev, "%s: device selector unit missing group\n", entity->label);
+ return -EINVAL;
+ }
+
+ range = selector_find_range(dev, entity->group, SDCA_CTL_GE_SELECTED_MODE,
+ SDCA_SELECTED_MODE_NCOLS, 0);
+ if (!range)
+ return -EINVAL;
+
+ (*widget)->id = snd_soc_dapm_mux;
+ (*widget)->kcontrol_news = entity->group->ge.kctl;
+ (*widget)->num_kcontrols = 1;
+ (*widget)++;
+
+ for (i = 0; i < entity->group->ge.num_modes; i++) {
+ struct sdca_ge_mode *mode = &entity->group->ge.modes[i];
+
+ for (j = 0; j < mode->num_controls; j++) {
+ struct sdca_ge_control *affected = &mode->controls[j];
+ int term;
+
+ if (affected->id != entity->id ||
+ affected->sel != SDCA_CTL_SU_SELECTOR ||
+ !affected->val)
+ continue;
+
+ if (affected->val - 1 >= entity->num_sources) {
+ dev_err(dev, "%s: bad control value: %#x\n",
+ entity->label, affected->val);
+ return -EINVAL;
+ }
+
+ if (++num_routes > entity->num_sources) {
+ dev_err(dev, "%s: too many input routes\n", entity->label);
+ return -EINVAL;
+ }
+
+ term = sdca_range_search(range, SDCA_SELECTED_MODE_INDEX,
+ mode->val, SDCA_SELECTED_MODE_TERM_TYPE);
+ if (!term) {
+ dev_err(dev, "%s: mode not found: %#x\n",
+ entity->label, mode->val);
+ return -EINVAL;
+ }
+
+ add_route(route, entity->label, get_terminal_name(term),
+ entity->sources[affected->val - 1]->label);
+ }
+ }
+
+ return 0;
+}
+
+/* Class selector units will be exported as an ALSA control */
+static int entity_parse_su_class(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity,
+ struct sdca_control *control,
+ struct snd_soc_dapm_widget **widget,
+ struct snd_soc_dapm_route **route)
+{
+ struct snd_kcontrol_new *kctl;
+ struct soc_enum *soc_enum;
+ const char **texts;
+ int i;
+
+ kctl = devm_kmalloc(dev, sizeof(*kctl), GFP_KERNEL);
+ if (!kctl)
+ return -ENOMEM;
+
+ soc_enum = devm_kmalloc(dev, sizeof(*soc_enum), GFP_KERNEL);
+ if (!soc_enum)
+ return -ENOMEM;
+
+ texts = devm_kcalloc(dev, entity->num_sources + 1, sizeof(*texts), GFP_KERNEL);
+ if (!texts)
+ return -ENOMEM;
+
+ texts[0] = "No Signal";
+ for (i = 0; i < entity->num_sources; i++)
+ texts[i + 1] = entity->sources[i]->label;
+
+ soc_enum->reg = SDW_SDCA_CTL(function->desc->adr, entity->id, control->sel, 0);
+ soc_enum->items = entity->num_sources + 1;
+ soc_enum->mask = roundup_pow_of_two(soc_enum->items) - 1;
+ soc_enum->texts = texts;
+
+ kctl->iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+ kctl->name = "Route";
+ kctl->info = snd_soc_info_enum_double;
+ kctl->get = snd_soc_dapm_get_enum_double;
+ kctl->put = snd_soc_dapm_put_enum_double;
+ kctl->private_value = (unsigned long)soc_enum;
+
+ (*widget)->id = snd_soc_dapm_mux;
+ (*widget)->kcontrol_news = kctl;
+ (*widget)->num_kcontrols = 1;
+ (*widget)++;
+
+ for (i = 0; i < entity->num_sources; i++)
+ add_route(route, entity->label, texts[i + 1], entity->sources[i]->label);
+
+ return 0;
+}
+
+static int entity_parse_su(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity,
+ struct snd_soc_dapm_widget **widget,
+ struct snd_soc_dapm_route **route)
+{
+ struct sdca_control *control;
+
+ if (!entity->num_sources) {
+ dev_err(dev, "%s: selector with no inputs\n", entity->label);
+ return -EINVAL;
+ }
+
+ control = selector_find_control(dev, entity, SDCA_CTL_SU_SELECTOR);
+ if (!control)
+ return -EINVAL;
+
+ if (control->layers == SDCA_ACCESS_LAYER_DEVICE)
+ return entity_parse_su_device(dev, function, entity, widget, route);
+
+ if (control->layers != SDCA_ACCESS_LAYER_CLASS)
+ dev_warn(dev, "%s: unexpected access layer: %x\n",
+ entity->label, control->layers);
+
+ return entity_parse_su_class(dev, function, entity, control, widget, route);
+}
+
+static int entity_parse_mu(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity,
+ struct snd_soc_dapm_widget **widget,
+ struct snd_soc_dapm_route **route)
+{
+ struct sdca_control *control;
+ struct snd_kcontrol_new *kctl;
+ int cn;
+ int i;
+
+ if (!entity->num_sources) {
+ dev_err(dev, "%s: selector 1 or more inputs\n", entity->label);
+ return -EINVAL;
+ }
+
+ control = selector_find_control(dev, entity, SDCA_CTL_MU_MIXER);
+ if (!control)
+ return -EINVAL;
+
+ /* MU control should be through DAPM */
+ if (control->layers != SDCA_ACCESS_LAYER_CLASS)
+ dev_warn(dev, "%s: unexpected access layer: %x\n",
+ entity->label, control->layers);
+
+ if (entity->num_sources != hweight64(control->cn_list)) {
+ dev_err(dev, "%s: mismatched control and sources\n", entity->label);
+ return -EINVAL;
+ }
+
+ kctl = devm_kcalloc(dev, entity->num_sources, sizeof(*kctl), GFP_KERNEL);
+ if (!kctl)
+ return -ENOMEM;
+
+ i = 0;
+ for_each_set_bit(cn, (unsigned long *)&control->cn_list,
+ BITS_PER_TYPE(control->cn_list)) {
+ const char *control_name;
+ struct soc_mixer_control *mc;
+
+ control_name = devm_kasprintf(dev, GFP_KERNEL, "%s %d",
+ control->label, i + 1);
+ if (!control_name)
+ return -ENOMEM;
+
+ mc = devm_kmalloc(dev, sizeof(*mc), GFP_KERNEL);
+ if (!mc)
+ return -ENOMEM;
+
+ mc->reg = SND_SOC_NOPM;
+ mc->rreg = SND_SOC_NOPM;
+ mc->invert = 1; // Ensure default is connected
+ mc->min = 0;
+ mc->max = 1;
+
+ kctl[i].name = control_name;
+ kctl[i].private_value = (unsigned long)mc;
+ kctl[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+ kctl[i].info = snd_soc_info_volsw;
+ kctl[i].get = snd_soc_dapm_get_volsw;
+ kctl[i].put = snd_soc_dapm_put_volsw;
+ i++;
+ }
+
+ (*widget)->id = snd_soc_dapm_mixer;
+ (*widget)->kcontrol_news = kctl;
+ (*widget)->num_kcontrols = entity->num_sources;
+ (*widget)++;
+
+ for (i = 0; i < entity->num_sources; i++)
+ add_route(route, entity->label, kctl[i].name, entity->sources[i]->label);
+
+ return 0;
+}
+
+static int entity_cs_event(struct snd_soc_dapm_widget *widget,
+ struct snd_kcontrol *kctl, int event)
+{
+ struct snd_soc_component *component = widget->dapm->component;
+ struct sdca_entity *entity = widget->priv;
+
+ if (!component)
+ return -EIO;
+
+ if (entity->cs.max_delay)
+ fsleep(entity->cs.max_delay);
+
+ return 0;
+}
+
+static int entity_parse_cs(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity,
+ struct snd_soc_dapm_widget **widget,
+ struct snd_soc_dapm_route **route)
+{
+ int i;
+
+ (*widget)->id = snd_soc_dapm_supply;
+ (*widget)->subseq = 1; /* Ensure these run after PDEs */
+ (*widget)->event_flags = SND_SOC_DAPM_POST_PMU;
+ (*widget)->event = entity_cs_event;
+ (*widget)->priv = entity;
+ (*widget)++;
+
+ for (i = 0; i < entity->num_sources; i++)
+ add_route(route, entity->label, NULL, entity->sources[i]->label);
+
+ return 0;
+}
+
+/**
+ * sdca_asoc_populate_dapm - fill in arrays of DAPM widgets and routes
+ * @dev: Pointer to the device against which allocations will be done.
+ * @function: Pointer to the Function information.
+ * @widget: Array of DAPM widgets to be populated.
+ * @route: Array of DAPM routes to be populated.
+ *
+ * This function populates arrays of DAPM widgets and routes from the
+ * DisCo information for a particular SDCA Function. Typically,
+ * snd_soc_asoc_count_component will be used to allocate appropriately
+ * sized arrays before calling this function.
+ *
+ * Return: Returns zero on success, and a negative error code on failure.
+ */
+int sdca_asoc_populate_dapm(struct device *dev, struct sdca_function_data *function,
+ struct snd_soc_dapm_widget *widget,
+ struct snd_soc_dapm_route *route)
+{
+ int ret;
+ int i;
+
+ for (i = 0; i < function->num_entities - 1; i++) {
+ struct sdca_entity *entity = &function->entities[i];
+
+ /*
+ * Some entities need to add controls "early" as they are
+ * referenced by other entities.
+ */
+ switch (entity->type) {
+ case SDCA_ENTITY_TYPE_GE:
+ ret = entity_early_parse_ge(dev, function, entity);
+ if (ret)
+ return ret;
+ break;
+ default:
+ break;
+ }
+ }
+
+ for (i = 0; i < function->num_entities - 1; i++) {
+ struct sdca_entity *entity = &function->entities[i];
+
+ widget->name = entity->label;
+ widget->reg = SND_SOC_NOPM;
+
+ switch (entity->type) {
+ case SDCA_ENTITY_TYPE_IT:
+ ret = entity_parse_it(dev, function, entity, &widget, &route);
+ break;
+ case SDCA_ENTITY_TYPE_OT:
+ ret = entity_parse_ot(dev, function, entity, &widget, &route);
+ break;
+ case SDCA_ENTITY_TYPE_PDE:
+ ret = entity_parse_pde(dev, function, entity, &widget, &route);
+ break;
+ case SDCA_ENTITY_TYPE_SU:
+ ret = entity_parse_su(dev, function, entity, &widget, &route);
+ break;
+ case SDCA_ENTITY_TYPE_MU:
+ ret = entity_parse_mu(dev, function, entity, &widget, &route);
+ break;
+ case SDCA_ENTITY_TYPE_CS:
+ ret = entity_parse_cs(dev, function, entity, &widget, &route);
+ break;
+ case SDCA_ENTITY_TYPE_CX:
+ /*
+ * FIXME: For now we will just treat these as a supply,
+ * meaning all options are enabled.
+ */
+ dev_warn(dev, "%s: clock selectors not fully supported yet\n",
+ entity->label);
+ ret = entity_parse_simple(dev, function, entity, &widget,
+ &route, snd_soc_dapm_supply);
+ break;
+ case SDCA_ENTITY_TYPE_TG:
+ ret = entity_parse_simple(dev, function, entity, &widget,
+ &route, snd_soc_dapm_siggen);
+ break;
+ case SDCA_ENTITY_TYPE_GE:
+ ret = entity_parse_simple(dev, function, entity, &widget,
+ &route, snd_soc_dapm_supply);
+ break;
+ default:
+ ret = entity_parse_simple(dev, function, entity, &widget,
+ &route, snd_soc_dapm_pga);
+ break;
+ }
+ if (ret)
+ return ret;
+
+ if (entity->group)
+ add_route(&route, entity->label, NULL, entity->group->label);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS(sdca_asoc_populate_dapm, "SND_SOC_SDCA");
+
+static int control_limit_kctl(struct device *dev,
+ struct sdca_entity *entity,
+ struct sdca_control *control,
+ struct snd_kcontrol_new *kctl)
+{
+ struct soc_mixer_control *mc = (struct soc_mixer_control *)kctl->private_value;
+ struct sdca_control_range *range;
+ int min, max, step;
+ unsigned int *tlv;
+ int shift;
+
+ if (control->type != SDCA_CTL_DATATYPE_Q7P8DB)
+ return 0;
+
+ /*
+ * FIXME: For now only handle the simple case of a single linear range
+ */
+ range = control_find_range(dev, entity, control, SDCA_VOLUME_LINEAR_NCOLS, 1);
+ if (!range)
+ return -EINVAL;
+
+ min = sdca_range(range, SDCA_VOLUME_LINEAR_MIN, 0);
+ max = sdca_range(range, SDCA_VOLUME_LINEAR_MAX, 0);
+ step = sdca_range(range, SDCA_VOLUME_LINEAR_STEP, 0);
+
+ min = sign_extend32(min, control->nbits - 1);
+ max = sign_extend32(max, control->nbits - 1);
+
+ /*
+ * FIXME: Only support power of 2 step sizes as this can be supported
+ * by a simple shift.
+ */
+ if (hweight32(step) != 1) {
+ dev_err(dev, "%s: %s: currently unsupported step size\n",
+ entity->label, control->label);
+ return -EINVAL;
+ }
+
+ /*
+ * The SDCA volumes are in steps of 1/256th of a dB, a step down of
+ * 64 (shift of 6) gives 1/4dB. 1/4dB is the smallest unit that is also
+ * representable in the ALSA TLVs which are in 1/100ths of a dB.
+ */
+ shift = max(ffs(step) - 1, 6);
+
+ tlv = devm_kcalloc(dev, 4, sizeof(*tlv), GFP_KERNEL);
+ if (!tlv)
+ return -ENOMEM;
+
+ tlv[0] = SNDRV_CTL_TLVT_DB_SCALE;
+ tlv[1] = 2 * sizeof(*tlv);
+ tlv[2] = (min * 100) >> 8;
+ tlv[3] = ((1 << shift) * 100) >> 8;
+
+ mc->min = min >> shift;
+ mc->max = max >> shift;
+ mc->shift = shift;
+ mc->rshift = shift;
+ mc->sign_bit = 15 - shift;
+
+ kctl->tlv.p = tlv;
+ kctl->access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+
+ return 0;
+}
+
+static int populate_control(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity,
+ struct sdca_control *control,
+ struct snd_kcontrol_new **kctl)
+{
+ const char *control_suffix = "";
+ const char *control_name;
+ struct soc_mixer_control *mc;
+ int index = 0;
+ int ret;
+ int cn;
+
+ if (!exported_control(entity, control))
+ return 0;
+
+ if (control->type == SDCA_CTL_DATATYPE_ONEBIT)
+ control_suffix = " Switch";
+
+ control_name = devm_kasprintf(dev, GFP_KERNEL, "%s %s%s", entity->label,
+ control->label, control_suffix);
+ if (!control_name)
+ return -ENOMEM;
+
+ mc = devm_kmalloc(dev, sizeof(*mc), GFP_KERNEL);
+ if (!mc)
+ return -ENOMEM;
+
+ for_each_set_bit(cn, (unsigned long *)&control->cn_list,
+ BITS_PER_TYPE(control->cn_list)) {
+ switch (index++) {
+ case 0:
+ mc->reg = SDW_SDCA_CTL(function->desc->adr, entity->id,
+ control->sel, cn);
+ mc->rreg = mc->reg;
+ break;
+ case 1:
+ mc->rreg = SDW_SDCA_CTL(function->desc->adr, entity->id,
+ control->sel, cn);
+ break;
+ default:
+ dev_err(dev, "%s: %s: only mono/stereo controls supported\n",
+ entity->label, control->label);
+ return -EINVAL;
+ }
+ }
+
+ mc->min = 0;
+ mc->max = clamp((0x1ull << control->nbits) - 1, 0, type_max(mc->max));
+
+ (*kctl)->name = control_name;
+ (*kctl)->private_value = (unsigned long)mc;
+ (*kctl)->iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+ (*kctl)->info = snd_soc_info_volsw;
+ (*kctl)->get = snd_soc_get_volsw;
+ (*kctl)->put = snd_soc_put_volsw;
+
+ if (readonly_control(control))
+ (*kctl)->access = SNDRV_CTL_ELEM_ACCESS_READ;
+ else
+ (*kctl)->access = SNDRV_CTL_ELEM_ACCESS_READWRITE;
+
+ ret = control_limit_kctl(dev, entity, control, *kctl);
+ if (ret)
+ return ret;
+
+ (*kctl)++;
+
+ return 0;
+}
+
+static int populate_pin_switch(struct device *dev,
+ struct sdca_entity *entity,
+ struct snd_kcontrol_new **kctl)
+{
+ const char *control_name;
+
+ control_name = devm_kasprintf(dev, GFP_KERNEL, "%s Switch", entity->label);
+ if (!control_name)
+ return -ENOMEM;
+
+ (*kctl)->name = control_name;
+ (*kctl)->private_value = (unsigned long)entity->label;
+ (*kctl)->iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+ (*kctl)->info = snd_soc_dapm_info_pin_switch;
+ (*kctl)->get = snd_soc_dapm_get_component_pin_switch;
+ (*kctl)->put = snd_soc_dapm_put_component_pin_switch;
+ (*kctl)++;
+
+ return 0;
+}
+
+/**
+ * sdca_asoc_populate_controls - fill in an array of ALSA controls for a Function
+ * @dev: Pointer to the device against which allocations will be done.
+ * @function: Pointer to the Function information.
+ * @route: Array of ALSA controls to be populated.
+ *
+ * This function populates an array of ALSA controls from the DisCo
+ * information for a particular SDCA Function. Typically,
+ * snd_soc_asoc_count_component will be used to allocate an
+ * appropriately sized array before calling this function.
+ *
+ * Return: Returns zero on success, and a negative error code on failure.
+ */
+int sdca_asoc_populate_controls(struct device *dev,
+ struct sdca_function_data *function,
+ struct snd_kcontrol_new *kctl)
+{
+ int i, j;
+ int ret;
+
+ for (i = 0; i < function->num_entities; i++) {
+ struct sdca_entity *entity = &function->entities[i];
+
+ switch (entity->type) {
+ case SDCA_ENTITY_TYPE_IT:
+ case SDCA_ENTITY_TYPE_OT:
+ if (!entity->iot.is_dataport) {
+ ret = populate_pin_switch(dev, entity, &kctl);
+ if (ret)
+ return ret;
+ }
+ break;
+ default:
+ break;
+ }
+
+ for (j = 0; j < entity->num_controls; j++) {
+ ret = populate_control(dev, function, entity,
+ &entity->controls[j], &kctl);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS(sdca_asoc_populate_controls, "SND_SOC_SDCA");
+
+static unsigned int rate_find_mask(unsigned int rate)
+{
+ switch (rate) {
+ case 0:
+ return SNDRV_PCM_RATE_8000_768000;
+ case 5512:
+ return SNDRV_PCM_RATE_5512;
+ case 8000:
+ return SNDRV_PCM_RATE_8000;
+ case 11025:
+ return SNDRV_PCM_RATE_11025;
+ case 16000:
+ return SNDRV_PCM_RATE_16000;
+ case 22050:
+ return SNDRV_PCM_RATE_22050;
+ case 32000:
+ return SNDRV_PCM_RATE_32000;
+ case 44100:
+ return SNDRV_PCM_RATE_44100;
+ case 48000:
+ return SNDRV_PCM_RATE_48000;
+ case 64000:
+ return SNDRV_PCM_RATE_64000;
+ case 88200:
+ return SNDRV_PCM_RATE_88200;
+ case 96000:
+ return SNDRV_PCM_RATE_96000;
+ case 176400:
+ return SNDRV_PCM_RATE_176400;
+ case 192000:
+ return SNDRV_PCM_RATE_192000;
+ case 352800:
+ return SNDRV_PCM_RATE_352800;
+ case 384000:
+ return SNDRV_PCM_RATE_384000;
+ case 705600:
+ return SNDRV_PCM_RATE_705600;
+ case 768000:
+ return SNDRV_PCM_RATE_768000;
+ case 12000:
+ return SNDRV_PCM_RATE_12000;
+ case 24000:
+ return SNDRV_PCM_RATE_24000;
+ case 128000:
+ return SNDRV_PCM_RATE_128000;
+ default:
+ return 0;
+ }
+}
+
+static u64 width_find_mask(unsigned int bits)
+{
+ switch (bits) {
+ case 0:
+ return SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE |
+ SNDRV_PCM_FMTBIT_S20_LE | SNDRV_PCM_FMTBIT_S24_LE |
+ SNDRV_PCM_FMTBIT_S32_LE;
+ case 8:
+ return SNDRV_PCM_FMTBIT_S8;
+ case 16:
+ return SNDRV_PCM_FMTBIT_S16_LE;
+ case 20:
+ return SNDRV_PCM_FMTBIT_S20_LE;
+ case 24:
+ return SNDRV_PCM_FMTBIT_S24_LE;
+ case 32:
+ return SNDRV_PCM_FMTBIT_S32_LE;
+ default:
+ return 0;
+ }
+}
+
+static int populate_rate_format(struct device *dev,
+ struct sdca_function_data *function,
+ struct sdca_entity *entity,
+ struct snd_soc_pcm_stream *stream)
+{
+ struct sdca_control_range *range;
+ unsigned int sample_rate, sample_width;
+ unsigned int clock_rates = 0;
+ unsigned int rates = 0;
+ u64 formats = 0;
+ int sel, i;
+
+ switch (entity->type) {
+ case SDCA_ENTITY_TYPE_IT:
+ sel = SDCA_CTL_IT_USAGE;
+ break;
+ case SDCA_ENTITY_TYPE_OT:
+ sel = SDCA_CTL_OT_USAGE;
+ break;
+ default:
+ dev_err(dev, "%s: entity type has no usage control\n",
+ entity->label);
+ return -EINVAL;
+ }
+
+ if (entity->iot.clock) {
+ range = selector_find_range(dev, entity->iot.clock,
+ SDCA_CTL_CS_SAMPLERATEINDEX,
+ SDCA_SAMPLERATEINDEX_NCOLS, 0);
+ if (!range)
+ return -EINVAL;
+
+ for (i = 0; i < range->rows; i++) {
+ sample_rate = sdca_range(range, SDCA_SAMPLERATEINDEX_RATE, i);
+ clock_rates |= rate_find_mask(sample_rate);
+ }
+ } else {
+ clock_rates = UINT_MAX;
+ }
+
+ range = selector_find_range(dev, entity, sel, SDCA_USAGE_NCOLS, 0);
+ if (!range)
+ return -EINVAL;
+
+ for (i = 0; i < range->rows; i++) {
+ sample_rate = sdca_range(range, SDCA_USAGE_SAMPLE_RATE, i);
+ sample_rate = rate_find_mask(sample_rate);
+
+ if (sample_rate & clock_rates) {
+ rates |= sample_rate;
+
+ sample_width = sdca_range(range, SDCA_USAGE_SAMPLE_WIDTH, i);
+ formats |= width_find_mask(sample_width);
+ }
+ }
+
+ stream->formats = formats;
+ stream->rates = rates;
+
+ return 0;
+}
+
+/**
+ * sdca_asoc_populate_dais - fill in an array of DAI drivers for a Function
+ * @dev: Pointer to the device against which allocations will be done.
+ * @function: Pointer to the Function information.
+ * @dais: Array of DAI drivers to be populated.
+ * @ops: DAI ops to be attached to each of the created DAI drivers.
+ *
+ * This function populates an array of ASoC DAI drivers from the DisCo
+ * information for a particular SDCA Function. Typically,
+ * snd_soc_asoc_count_component will be used to allocate an
+ * appropriately sized array before calling this function.
+ *
+ * Return: Returns zero on success, and a negative error code on failure.
+ */
+int sdca_asoc_populate_dais(struct device *dev, struct sdca_function_data *function,
+ struct snd_soc_dai_driver *dais,
+ const struct snd_soc_dai_ops *ops)
+{
+ int i, j;
+ int ret;
+
+ for (i = 0, j = 0; i < function->num_entities - 1; i++) {
+ struct sdca_entity *entity = &function->entities[i];
+ struct snd_soc_pcm_stream *stream;
+ const char *stream_suffix;
+
+ switch (entity->type) {
+ case SDCA_ENTITY_TYPE_IT:
+ stream = &dais[j].playback;
+ stream_suffix = "Playback";
+ break;
+ case SDCA_ENTITY_TYPE_OT:
+ stream = &dais[j].capture;
+ stream_suffix = "Capture";
+ break;
+ default:
+ continue;
+ }
+
+ /* Can't check earlier as only terminals have an iot member. */
+ if (!entity->iot.is_dataport)
+ continue;
+
+ stream->stream_name = devm_kasprintf(dev, GFP_KERNEL, "%s %s",
+ entity->label, stream_suffix);
+ if (!stream->stream_name)
+ return -ENOMEM;
+ /* Channels will be further limited by constraints */
+ stream->channels_min = 1;
+ stream->channels_max = SDCA_MAX_CHANNEL_COUNT;
+
+ ret = populate_rate_format(dev, function, entity, stream);
+ if (ret)
+ return ret;
+
+ dais[j].id = i;
+ dais[j].name = entity->label;
+ dais[j].ops = ops;
+ j++;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS(sdca_asoc_populate_dais, "SND_SOC_SDCA");
+
+/**
+ * sdca_asoc_populate_component - fill in a component driver for a Function
+ * @dev: Pointer to the device against which allocations will be done.
+ * @function: Pointer to the Function information.
+ * @copmonent_drv: Pointer to the component driver to be populated.
+ *
+ * This function populates a snd_soc_component_driver structure based
+ * on the DisCo information for a particular SDCA Function. It does
+ * all allocation internally.
+ *
+ * Return: Returns zero on success, and a negative error code on failure.
+ */
+int sdca_asoc_populate_component(struct device *dev,
+ struct sdca_function_data *function,
+ struct snd_soc_component_driver *component_drv,
+ struct snd_soc_dai_driver **dai_drv, int *num_dai_drv,
+ const struct snd_soc_dai_ops *ops)
+{
+ struct snd_soc_dapm_widget *widgets;
+ struct snd_soc_dapm_route *routes;
+ struct snd_kcontrol_new *controls;
+ struct snd_soc_dai_driver *dais;
+ int num_widgets, num_routes, num_controls, num_dais;
+ int ret;
+
+ ret = sdca_asoc_count_component(dev, function, &num_widgets, &num_routes,
+ &num_controls, &num_dais);
+ if (ret)
+ return ret;
+
+ widgets = devm_kcalloc(dev, num_widgets, sizeof(*widgets), GFP_KERNEL);
+ if (!widgets)
+ return -ENOMEM;
+
+ routes = devm_kcalloc(dev, num_routes, sizeof(*routes), GFP_KERNEL);
+ if (!routes)
+ return -ENOMEM;
+
+ controls = devm_kcalloc(dev, num_controls, sizeof(*controls), GFP_KERNEL);
+ if (!controls)
+ return -ENOMEM;
+
+ dais = devm_kcalloc(dev, num_dais, sizeof(*dais), GFP_KERNEL);
+ if (!dais)
+ return -ENOMEM;
+
+ ret = sdca_asoc_populate_dapm(dev, function, widgets, routes);
+ if (ret)
+ return ret;
+
+ ret = sdca_asoc_populate_controls(dev, function, controls);
+ if (ret)
+ return ret;
+
+ ret = sdca_asoc_populate_dais(dev, function, dais, ops);
+ if (ret)
+ return ret;
+
+ component_drv->dapm_widgets = widgets;
+ component_drv->num_dapm_widgets = num_widgets;
+ component_drv->dapm_routes = routes;
+ component_drv->num_dapm_routes = num_routes;
+ component_drv->controls = controls;
+ component_drv->num_controls = num_controls;
+
+ *dai_drv = dais;
+ *num_dai_drv = num_dais;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS(sdca_asoc_populate_component, "SND_SOC_SDCA");
diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c
index 493f390f087a..64ac26443890 100644
--- a/sound/soc/sdca/sdca_functions.c
+++ b/sound/soc/sdca/sdca_functions.c
@@ -1105,12 +1105,6 @@ static int find_sdca_entity_pde(struct device *dev,
return -EINVAL;
}
- /* There are 3 values per delay */
- delays = devm_kcalloc(dev, num_delays / mult_delay,
- sizeof(*delays), GFP_KERNEL);
- if (!delays)
- return -ENOMEM;
-
delay_list = kcalloc(num_delays, sizeof(*delay_list), GFP_KERNEL);
if (!delay_list)
return -ENOMEM;
@@ -1121,6 +1115,10 @@ static int find_sdca_entity_pde(struct device *dev,
num_delays /= mult_delay;
+ delays = devm_kcalloc(dev, num_delays, sizeof(*delays), GFP_KERNEL);
+ if (!delays)
+ return -ENOMEM;
+
for (i = 0, j = 0; i < num_delays; i++) {
delays[i].from_ps = delay_list[j++];
delays[i].to_ps = delay_list[j++];
diff --git a/sound/soc/sdca/sdca_regmap.c b/sound/soc/sdca/sdca_regmap.c
index 4b78188cfceb..66e7eee7d7f4 100644
--- a/sound/soc/sdca/sdca_regmap.c
+++ b/sound/soc/sdca/sdca_regmap.c
@@ -316,6 +316,3 @@ int sdca_regmap_write_defaults(struct device *dev, struct regmap *regmap,
return 0;
}
EXPORT_SYMBOL_NS(sdca_regmap_write_defaults, "SND_SOC_SDCA");
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SDCA library");
diff --git a/sound/soc/sdw_utils/soc_sdw_rt_amp.c b/sound/soc/sdw_utils/soc_sdw_rt_amp.c
index 0538c252ba69..83c2368170cb 100644
--- a/sound/soc/sdw_utils/soc_sdw_rt_amp.c
+++ b/sound/soc/sdw_utils/soc_sdw_rt_amp.c
@@ -190,7 +190,7 @@ int asoc_sdw_rt_amp_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc
const struct snd_soc_dapm_route *rt_amp_map;
char codec_name[CODEC_NAME_SIZE];
struct snd_soc_dai *codec_dai;
- int ret;
+ int ret = -EINVAL;
int i;
rt_amp_map = get_codec_name_and_route(dai, codec_name);
diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c
index 5175818ff2c1..b7060b746356 100644
--- a/sound/soc/sdw_utils/soc_sdw_utils.c
+++ b/sound/soc/sdw_utils/soc_sdw_utils.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/soundwire/sdw.h>
#include <linux/soundwire/sdw_type.h>
+#include <sound/sdca_function.h>
#include <sound/soc_sdw_utils.h>
static const struct snd_soc_dapm_widget generic_dmic_widgets[] = {
@@ -510,6 +511,31 @@ struct asoc_sdw_codec_info codec_info_list[] = {
.dai_num = 2,
},
{
+ .part_id = 0x3563,
+ .dais = {
+ {
+ .direction = {true, false},
+ .dai_name = "cs35l56-sdw1",
+ .dai_type = SOC_SDW_DAI_TYPE_AMP,
+ .dailink = {SOC_SDW_AMP_OUT_DAI_ID, SOC_SDW_UNUSED_DAI_ID},
+ .init = asoc_sdw_cs_amp_init,
+ .rtd_init = asoc_sdw_cs_spk_rtd_init,
+ .controls = generic_spk_controls,
+ .num_controls = ARRAY_SIZE(generic_spk_controls),
+ .widgets = generic_spk_widgets,
+ .num_widgets = ARRAY_SIZE(generic_spk_widgets),
+ },
+ {
+ .direction = {false, true},
+ .dai_name = "cs35l56-sdw1c",
+ .dai_type = SOC_SDW_DAI_TYPE_AMP,
+ .dailink = {SOC_SDW_UNUSED_DAI_ID, SOC_SDW_AMP_IN_DAI_ID},
+ .rtd_init = asoc_sdw_cs_spk_feedback_rtd_init,
+ },
+ },
+ .dai_num = 2,
+ },
+ {
.part_id = 0x4242,
.dais = {
{
@@ -934,10 +960,10 @@ static bool asoc_sdw_is_unique_device(const struct snd_soc_acpi_link_adr *adr_li
return true;
}
-const char *asoc_sdw_get_codec_name(struct device *dev,
- const struct asoc_sdw_codec_info *codec_info,
- const struct snd_soc_acpi_link_adr *adr_link,
- int adr_index)
+static const char *_asoc_sdw_get_codec_name(struct device *dev,
+ const struct asoc_sdw_codec_info *codec_info,
+ const struct snd_soc_acpi_link_adr *adr_link,
+ int adr_index)
{
u64 adr = adr_link->adr_d[adr_index].adr;
unsigned int sdw_version = SDW_VERSION(adr);
@@ -947,17 +973,24 @@ const char *asoc_sdw_get_codec_name(struct device *dev,
unsigned int part_id = SDW_PART_ID(adr);
unsigned int class_id = SDW_CLASS_ID(adr);
- if (codec_info->codec_name)
- return devm_kstrdup(dev, codec_info->codec_name, GFP_KERNEL);
- else if (asoc_sdw_is_unique_device(adr_link, sdw_version, mfg_id, part_id,
- class_id, adr_index))
+ if (asoc_sdw_is_unique_device(adr_link, sdw_version, mfg_id, part_id,
+ class_id, adr_index))
return devm_kasprintf(dev, GFP_KERNEL, "sdw:0:%01x:%04x:%04x:%02x",
link_id, mfg_id, part_id, class_id);
- else
- return devm_kasprintf(dev, GFP_KERNEL, "sdw:0:%01x:%04x:%04x:%02x:%01x",
- link_id, mfg_id, part_id, class_id, unique_id);
- return NULL;
+ return devm_kasprintf(dev, GFP_KERNEL, "sdw:0:%01x:%04x:%04x:%02x:%01x",
+ link_id, mfg_id, part_id, class_id, unique_id);
+}
+
+const char *asoc_sdw_get_codec_name(struct device *dev,
+ const struct asoc_sdw_codec_info *codec_info,
+ const struct snd_soc_acpi_link_adr *adr_link,
+ int adr_index)
+{
+ if (codec_info->codec_name)
+ return devm_kstrdup(dev, codec_info->codec_name, GFP_KERNEL);
+
+ return _asoc_sdw_get_codec_name(dev, codec_info, adr_link, adr_index);
}
EXPORT_SYMBOL_NS(asoc_sdw_get_codec_name, "SND_SOC_SDW_UTILS");
@@ -1059,9 +1092,8 @@ EXPORT_SYMBOL_NS(asoc_sdw_init_dai_link, "SND_SOC_SDW_UTILS");
int asoc_sdw_init_simple_dai_link(struct device *dev, struct snd_soc_dai_link *dai_links,
int *be_id, char *name, int playback, int capture,
const char *cpu_dai_name, const char *platform_comp_name,
- int num_platforms, const char *codec_name,
- const char *codec_dai_name, int no_pcm,
- int (*init)(struct snd_soc_pcm_runtime *rtd),
+ const char *codec_name, const char *codec_dai_name,
+ int no_pcm, int (*init)(struct snd_soc_pcm_runtime *rtd),
const struct snd_soc_ops *ops)
{
struct snd_soc_dai_link_component *dlc;
@@ -1078,8 +1110,8 @@ int asoc_sdw_init_simple_dai_link(struct device *dev, struct snd_soc_dai_link *d
dlc[2].dai_name = codec_dai_name;
asoc_sdw_init_dai_link(dev, dai_links, be_id, name, playback, capture,
- &dlc[0], 1, &dlc[1], num_platforms,
- &dlc[2], 1, no_pcm, init, ops);
+ &dlc[0], 1, &dlc[1], 1, &dlc[2], 1,
+ no_pcm, init, ops);
return 0;
}
@@ -1124,6 +1156,106 @@ struct asoc_sdw_dailink *asoc_sdw_find_dailink(struct asoc_sdw_dailink *dailinks
}
EXPORT_SYMBOL_NS(asoc_sdw_find_dailink, "SND_SOC_SDW_UTILS");
+static int asoc_sdw_get_dai_type(u32 type)
+{
+ switch (type) {
+ case SDCA_FUNCTION_TYPE_SMART_AMP:
+ case SDCA_FUNCTION_TYPE_SIMPLE_AMP:
+ return SOC_SDW_DAI_TYPE_AMP;
+ case SDCA_FUNCTION_TYPE_SMART_MIC:
+ case SDCA_FUNCTION_TYPE_SIMPLE_MIC:
+ case SDCA_FUNCTION_TYPE_SPEAKER_MIC:
+ return SOC_SDW_DAI_TYPE_MIC;
+ case SDCA_FUNCTION_TYPE_UAJ:
+ case SDCA_FUNCTION_TYPE_RJ:
+ case SDCA_FUNCTION_TYPE_SIMPLE_JACK:
+ return SOC_SDW_DAI_TYPE_JACK;
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * Check if the SDCA endpoint is present by the SDW peripheral
+ *
+ * @dev: Device pointer
+ * @codec_info: Codec info pointer
+ * @adr_link: ACPI link address
+ * @adr_index: Index of the ACPI link address
+ * @end_index: Index of the endpoint
+ *
+ * Return: 1 if the endpoint is present,
+ * 0 if the endpoint is not present,
+ * negative error code.
+ */
+
+static int is_sdca_endpoint_present(struct device *dev,
+ struct asoc_sdw_codec_info *codec_info,
+ const struct snd_soc_acpi_link_adr *adr_link,
+ int adr_index, int end_index)
+{
+ const struct snd_soc_acpi_adr_device *adr_dev = &adr_link->adr_d[adr_index];
+ const struct snd_soc_acpi_endpoint *adr_end;
+ const struct asoc_sdw_dai_info *dai_info;
+ struct snd_soc_dai_link_component *dlc;
+ struct snd_soc_dai *codec_dai;
+ struct sdw_slave *slave;
+ struct device *sdw_dev;
+ const char *sdw_codec_name;
+ int i;
+
+ dlc = kzalloc(sizeof(*dlc), GFP_KERNEL);
+
+ adr_end = &adr_dev->endpoints[end_index];
+ dai_info = &codec_info->dais[adr_end->num];
+
+ dlc->dai_name = dai_info->dai_name;
+ codec_dai = snd_soc_find_dai_with_mutex(dlc);
+ if (!codec_dai) {
+ dev_warn(dev, "codec dai %s not registered yet\n", dlc->dai_name);
+ kfree(dlc);
+ return -EPROBE_DEFER;
+ }
+ kfree(dlc);
+
+ sdw_codec_name = _asoc_sdw_get_codec_name(dev, codec_info,
+ adr_link, adr_index);
+ if (!sdw_codec_name)
+ return -ENOMEM;
+
+ sdw_dev = bus_find_device_by_name(&sdw_bus_type, NULL, sdw_codec_name);
+ if (!sdw_dev) {
+ dev_err(dev, "codec %s not found\n", sdw_codec_name);
+ return -EINVAL;
+ }
+
+ slave = dev_to_sdw_dev(sdw_dev);
+ if (!slave)
+ return -EINVAL;
+
+ /* Make sure BIOS provides SDCA properties */
+ if (!slave->sdca_data.interface_revision) {
+ dev_warn(&slave->dev, "SDCA properties not found in the BIOS\n");
+ return 1;
+ }
+
+ for (i = 0; i < slave->sdca_data.num_functions; i++) {
+ int dai_type = asoc_sdw_get_dai_type(slave->sdca_data.function[i].type);
+
+ if (dai_type == dai_info->dai_type) {
+ dev_dbg(&slave->dev, "DAI type %d sdca function %s found\n",
+ dai_type, slave->sdca_data.function[i].name);
+ return 1;
+ }
+ }
+
+ dev_dbg(&slave->dev,
+ "SDCA device function for DAI type %d not supported, skip endpoint\n",
+ dai_info->dai_type);
+
+ return 0;
+}
+
int asoc_sdw_parse_sdw_endpoints(struct snd_soc_card *card,
struct asoc_sdw_dailink *soc_dais,
struct asoc_sdw_endpoint *soc_ends,
@@ -1152,6 +1284,7 @@ int asoc_sdw_parse_sdw_endpoints(struct snd_soc_card *card,
const struct snd_soc_acpi_adr_device *adr_dev = &adr_link->adr_d[i];
struct asoc_sdw_codec_info *codec_info;
const char *codec_name;
+ bool check_sdca = false;
if (!adr_dev->name_prefix) {
dev_err(dev, "codec 0x%llx does not have a name prefix\n",
@@ -1182,6 +1315,9 @@ int asoc_sdw_parse_sdw_endpoints(struct snd_soc_card *card,
soc_end->include_sidecar = true;
}
+ if (SDW_CLASS_ID(adr_dev->adr) && adr_dev->num_endpoints > 1)
+ check_sdca = true;
+
for (j = 0; j < adr_dev->num_endpoints; j++) {
const struct snd_soc_acpi_endpoint *adr_end;
const struct asoc_sdw_dai_info *dai_info;
@@ -1192,9 +1328,35 @@ int asoc_sdw_parse_sdw_endpoints(struct snd_soc_card *card,
dai_info = &codec_info->dais[adr_end->num];
soc_dai = asoc_sdw_find_dailink(soc_dais, adr_end);
- if (dai_info->quirk &&
- !(dai_info->quirk_exclude ^ !!(dai_info->quirk & ctx->mc_quirk)))
- continue;
+ /*
+ * quirk should have higher priority than the sdca properties
+ * in the BIOS. We can't always check the DAI quirk because we
+ * will set the mc_quirk when the BIOS doesn't provide the right
+ * information. The endpoint will be skipped if the dai_info->
+ * quirk_exclude and mc_quirk are both not set if we always skip
+ * the endpoint according to the quirk information. We need to
+ * keep the endpoint if it is present in the BIOS. So, only
+ * check the DAI quirk when the mc_quirk is set or SDCA endpoint
+ * present check is not needed.
+ */
+ if (dai_info->quirk & ctx->mc_quirk || !check_sdca) {
+ /*
+ * Check the endpoint if a matching quirk is set or SDCA
+ * endpoint check is not necessary
+ */
+ if (dai_info->quirk &&
+ !(dai_info->quirk_exclude ^ !!(dai_info->quirk & ctx->mc_quirk)))
+ continue;
+ } else {
+ /* Check SDCA codec endpoint if there is no matching quirk */
+ ret = is_sdca_endpoint_present(dev, codec_info, adr_link, i, j);
+ if (ret < 0)
+ return ret;
+
+ /* The endpoint is not present, skip */
+ if (!ret)
+ continue;
+ }
dev_dbg(dev,
"Add dev: %d, 0x%llx end: %d, dai: %d, %c/%c to %s: %d\n",
diff --git a/sound/soc/soc-ac97.c b/sound/soc/soc-ac97.c
index 079e4ff5a14e..29790807d785 100644
--- a/sound/soc/soc-ac97.c
+++ b/sound/soc/soc-ac97.c
@@ -87,8 +87,8 @@ static int snd_soc_ac97_gpio_get(struct gpio_chip *chip, unsigned int offset)
return !!(ret & (1 << offset));
}
-static void snd_soc_ac97_gpio_set(struct gpio_chip *chip, unsigned int offset,
- int value)
+static int snd_soc_ac97_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct snd_ac97_gpio_priv *gpio_priv = gpiochip_get_data(chip);
struct snd_soc_component *component = gpio_to_component(chip);
@@ -98,15 +98,22 @@ static void snd_soc_ac97_gpio_set(struct gpio_chip *chip, unsigned int offset,
snd_soc_component_write(component, AC97_GPIO_STATUS,
gpio_priv->gpios_set);
dev_dbg(component->dev, "set gpio %d to %d\n", offset, !!value);
+
+ return 0;
}
static int snd_soc_ac97_gpio_direction_out(struct gpio_chip *chip,
unsigned offset, int value)
{
struct snd_soc_component *component = gpio_to_component(chip);
+ int ret;
dev_dbg(component->dev, "set gpio %d to output\n", offset);
- snd_soc_ac97_gpio_set(chip, offset, value);
+
+ ret = snd_soc_ac97_gpio_set(chip, offset, value);
+ if (ret)
+ return ret;
+
return snd_soc_component_update_bits(component, AC97_GPIO_CFG,
1 << offset, 0);
}
@@ -118,7 +125,7 @@ static const struct gpio_chip snd_soc_ac97_gpio_chip = {
.direction_input = snd_soc_ac97_gpio_direction_in,
.get = snd_soc_ac97_gpio_get,
.direction_output = snd_soc_ac97_gpio_direction_out,
- .set = snd_soc_ac97_gpio_set,
+ .set_rv = snd_soc_ac97_gpio_set,
.can_sleep = 1,
};
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 3f97d1f132c6..67bebc339148 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2134,18 +2134,13 @@ static void soc_cleanup_card_resources(struct snd_soc_card *card)
}
}
-static void snd_soc_unbind_card(struct snd_soc_card *card, bool unregister)
+static void snd_soc_unbind_card(struct snd_soc_card *card)
{
if (snd_soc_card_is_instantiated(card)) {
card->instantiated = false;
snd_soc_flush_all_delayed_work(card);
soc_cleanup_card_resources(card);
- if (!unregister)
- list_add(&card->list, &unbind_card_list);
- } else {
- if (unregister)
- list_del(&card->list);
}
}
@@ -2155,9 +2150,7 @@ static int snd_soc_bind_card(struct snd_soc_card *card)
struct snd_soc_component *component;
int ret;
- mutex_lock(&client_mutex);
snd_soc_card_mutex_lock_root(card);
-
snd_soc_fill_dummy_dai(card);
snd_soc_dapm_init(&card->dapm, card, NULL);
@@ -2304,9 +2297,49 @@ static int snd_soc_bind_card(struct snd_soc_card *card)
probe_end:
if (ret < 0)
soc_cleanup_card_resources(card);
-
snd_soc_card_mutex_unlock(card);
- mutex_unlock(&client_mutex);
+
+ return ret;
+}
+
+static void devm_card_bind_release(struct device *dev, void *res)
+{
+ snd_soc_unregister_card(*(struct snd_soc_card **)res);
+}
+
+static int devm_snd_soc_bind_card(struct device *dev, struct snd_soc_card *card)
+{
+ struct snd_soc_card **ptr;
+ int ret;
+
+ ptr = devres_alloc(devm_card_bind_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
+
+ ret = snd_soc_bind_card(card);
+ if (ret == 0 || ret == -EPROBE_DEFER) {
+ *ptr = card;
+ devres_add(dev, ptr);
+ } else {
+ devres_free(ptr);
+ }
+
+ return ret;
+}
+
+static int snd_soc_rebind_card(struct snd_soc_card *card)
+{
+ int ret;
+
+ if (card->devres_dev) {
+ devres_destroy(card->devres_dev, devm_card_bind_release, NULL, NULL);
+ ret = devm_snd_soc_bind_card(card->devres_dev, card);
+ } else {
+ ret = snd_soc_bind_card(card);
+ }
+
+ if (ret != -EPROBE_DEFER)
+ list_del_init(&card->list);
return ret;
}
@@ -2506,6 +2539,8 @@ EXPORT_SYMBOL_GPL(snd_soc_add_dai_controls);
*/
int snd_soc_register_card(struct snd_soc_card *card)
{
+ int ret;
+
if (!card->name || !card->dev)
return -EINVAL;
@@ -2526,7 +2561,21 @@ int snd_soc_register_card(struct snd_soc_card *card)
mutex_init(&card->dapm_mutex);
mutex_init(&card->pcm_mutex);
- return snd_soc_bind_card(card);
+ mutex_lock(&client_mutex);
+
+ if (card->devres_dev) {
+ ret = devm_snd_soc_bind_card(card->devres_dev, card);
+ if (ret == -EPROBE_DEFER) {
+ list_add(&card->list, &unbind_card_list);
+ ret = 0;
+ }
+ } else {
+ ret = snd_soc_bind_card(card);
+ }
+
+ mutex_unlock(&client_mutex);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(snd_soc_register_card);
@@ -2539,7 +2588,8 @@ EXPORT_SYMBOL_GPL(snd_soc_register_card);
void snd_soc_unregister_card(struct snd_soc_card *card)
{
mutex_lock(&client_mutex);
- snd_soc_unbind_card(card, true);
+ snd_soc_unbind_card(card);
+ list_del(&card->list);
mutex_unlock(&client_mutex);
dev_dbg(card->dev, "ASoC: Unregistered card '%s'\n", card->name);
}
@@ -2753,23 +2803,19 @@ static void convert_endianness_formats(struct snd_soc_pcm_stream *stream)
stream->formats |= endianness_format_map[i];
}
-static void snd_soc_try_rebind_card(void)
-{
- struct snd_soc_card *card, *c;
-
- list_for_each_entry_safe(card, c, &unbind_card_list, list)
- if (!snd_soc_bind_card(card))
- list_del(&card->list);
-}
-
static void snd_soc_del_component_unlocked(struct snd_soc_component *component)
{
struct snd_soc_card *card = component->card;
+ bool instantiated;
snd_soc_unregister_dais(component);
- if (card)
- snd_soc_unbind_card(card, false);
+ if (card) {
+ instantiated = card->instantiated;
+ snd_soc_unbind_card(card);
+ if (instantiated)
+ list_add(&card->list, &unbind_card_list);
+ }
list_del(&component->list);
}
@@ -2808,6 +2854,7 @@ int snd_soc_add_component(struct snd_soc_component *component,
struct snd_soc_dai_driver *dai_drv,
int num_dai)
{
+ struct snd_soc_card *card, *c;
int ret;
int i;
@@ -2838,15 +2885,14 @@ int snd_soc_add_component(struct snd_soc_component *component,
/* see for_each_component */
list_add(&component->list, &component_list);
+ list_for_each_entry_safe(card, c, &unbind_card_list, list)
+ snd_soc_rebind_card(card);
+
err_cleanup:
if (ret < 0)
snd_soc_del_component_unlocked(component);
mutex_unlock(&client_mutex);
-
- if (ret == 0)
- snd_soc_try_rebind_card();
-
return ret;
}
EXPORT_SYMBOL_GPL(snd_soc_add_component);
@@ -2881,34 +2927,14 @@ EXPORT_SYMBOL_GPL(snd_soc_register_component);
void snd_soc_unregister_component_by_driver(struct device *dev,
const struct snd_soc_component_driver *component_driver)
{
- struct snd_soc_component *component;
+ const char *driver_name = NULL;
- if (!component_driver)
- return;
-
- mutex_lock(&client_mutex);
- component = snd_soc_lookup_component_nolocked(dev, component_driver->name);
- if (!component)
- goto out;
-
- snd_soc_del_component_unlocked(component);
-
-out:
- mutex_unlock(&client_mutex);
-}
-EXPORT_SYMBOL_GPL(snd_soc_unregister_component_by_driver);
+ if (component_driver)
+ driver_name = component_driver->name;
-/**
- * snd_soc_unregister_component - Unregister all related component
- * from the ASoC core
- *
- * @dev: The device to unregister
- */
-void snd_soc_unregister_component(struct device *dev)
-{
mutex_lock(&client_mutex);
while (1) {
- struct snd_soc_component *component = snd_soc_lookup_component_nolocked(dev, NULL);
+ struct snd_soc_component *component = snd_soc_lookup_component_nolocked(dev, driver_name);
if (!component)
break;
@@ -2917,7 +2943,7 @@ void snd_soc_unregister_component(struct device *dev)
}
mutex_unlock(&client_mutex);
}
-EXPORT_SYMBOL_GPL(snd_soc_unregister_component);
+EXPORT_SYMBOL_GPL(snd_soc_unregister_component_by_driver);
/* Retrieve a card's name from device tree */
int snd_soc_of_parse_card_name(struct snd_soc_card *card,
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index b7818388984e..f26f9e9d7ce7 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -3626,11 +3626,25 @@ int snd_soc_dapm_info_pin_switch(struct snd_kcontrol *kcontrol,
}
EXPORT_SYMBOL_GPL(snd_soc_dapm_info_pin_switch);
+static int __snd_soc_dapm_get_pin_switch(struct snd_soc_dapm_context *dapm,
+ const char *pin,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ snd_soc_dapm_mutex_lock(dapm);
+ ucontrol->value.integer.value[0] = snd_soc_dapm_get_pin_status(dapm, pin);
+ snd_soc_dapm_mutex_unlock(dapm);
+
+ return 0;
+}
+
/**
* snd_soc_dapm_get_pin_switch - Get information for a pin switch
*
* @kcontrol: mixer control
* @ucontrol: Value
+ *
+ * Callback to provide information for a pin switch added at the card
+ * level.
*/
int snd_soc_dapm_get_pin_switch(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
@@ -3638,40 +3652,82 @@ int snd_soc_dapm_get_pin_switch(struct snd_kcontrol *kcontrol,
struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
const char *pin = (const char *)kcontrol->private_value;
- snd_soc_dapm_mutex_lock(card);
+ return __snd_soc_dapm_get_pin_switch(&card->dapm, pin, ucontrol);
+}
+EXPORT_SYMBOL_GPL(snd_soc_dapm_get_pin_switch);
- ucontrol->value.integer.value[0] =
- snd_soc_dapm_get_pin_status(&card->dapm, pin);
+/**
+ * snd_soc_dapm_get_component_pin_switch - Get information for a pin switch
+ *
+ * @kcontrol: mixer control
+ * @ucontrol: Value
+ *
+ * Callback to provide information for a pin switch added at the component
+ * level.
+ */
+int snd_soc_dapm_get_component_pin_switch(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+ const char *pin = (const char *)kcontrol->private_value;
- snd_soc_dapm_mutex_unlock(card);
+ return __snd_soc_dapm_get_pin_switch(&component->dapm, pin, ucontrol);
+}
+EXPORT_SYMBOL_GPL(snd_soc_dapm_get_component_pin_switch);
- return 0;
+static int __snd_soc_dapm_put_pin_switch(struct snd_soc_dapm_context *dapm,
+ const char *pin,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ int ret;
+
+ snd_soc_dapm_mutex_lock(dapm);
+ ret = __snd_soc_dapm_set_pin(dapm, pin, !!ucontrol->value.integer.value[0]);
+ snd_soc_dapm_mutex_unlock(dapm);
+
+ snd_soc_dapm_sync(dapm);
+
+ return ret;
}
-EXPORT_SYMBOL_GPL(snd_soc_dapm_get_pin_switch);
/**
* snd_soc_dapm_put_pin_switch - Set information for a pin switch
*
* @kcontrol: mixer control
* @ucontrol: Value
+ *
+ * Callback to provide information for a pin switch added at the card
+ * level.
*/
int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
const char *pin = (const char *)kcontrol->private_value;
- int ret;
-
- snd_soc_dapm_mutex_lock(card);
- ret = __snd_soc_dapm_set_pin(&card->dapm, pin,
- !!ucontrol->value.integer.value[0]);
- snd_soc_dapm_mutex_unlock(card);
- snd_soc_dapm_sync(&card->dapm);
- return ret;
+ return __snd_soc_dapm_put_pin_switch(&card->dapm, pin, ucontrol);
}
EXPORT_SYMBOL_GPL(snd_soc_dapm_put_pin_switch);
+/**
+ * snd_soc_dapm_put_component_pin_switch - Set information for a pin switch
+ *
+ * @kcontrol: mixer control
+ * @ucontrol: Value
+ *
+ * Callback to provide information for a pin switch added at the component
+ * level.
+ */
+int snd_soc_dapm_put_component_pin_switch(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+ const char *pin = (const char *)kcontrol->private_value;
+
+ return __snd_soc_dapm_put_pin_switch(&component->dapm, pin, ucontrol);
+}
+EXPORT_SYMBOL_GPL(snd_soc_dapm_put_component_pin_switch);
+
struct snd_soc_dapm_widget *
snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm,
const struct snd_soc_dapm_widget *widget)
diff --git a/sound/soc/soc-devres.c b/sound/soc/soc-devres.c
index c6364caabc0e..d33f83ec24f2 100644
--- a/sound/soc/soc-devres.c
+++ b/sound/soc/soc-devres.c
@@ -83,6 +83,13 @@ int devm_snd_soc_register_card(struct device *dev, struct snd_soc_card *card)
}
EXPORT_SYMBOL_GPL(devm_snd_soc_register_card);
+int devm_snd_soc_register_deferrable_card(struct device *dev, struct snd_soc_card *card)
+{
+ card->devres_dev = dev;
+ return snd_soc_register_card(card);
+}
+EXPORT_SYMBOL_GPL(devm_snd_soc_register_deferrable_card);
+
#ifdef CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM
static void devm_dmaengine_pcm_release(struct device *dev, void *res)
diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c
index 5e3e4f14c392..c8adfff826bd 100644
--- a/sound/soc/soc-utils.c
+++ b/sound/soc/soc-utils.c
@@ -262,6 +262,19 @@ struct snd_soc_dai_link_component snd_soc_dummy_dlc = {
};
EXPORT_SYMBOL_GPL(snd_soc_dummy_dlc);
+int snd_soc_dlc_is_dummy(struct snd_soc_dai_link_component *dlc)
+{
+ if (dlc == &snd_soc_dummy_dlc)
+ return true;
+
+ if ((dlc->name && strcmp(dlc->name, snd_soc_dummy_dlc.name) == 0) ||
+ (dlc->dai_name && strcmp(dlc->dai_name, snd_soc_dummy_dlc.dai_name) == 0))
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(snd_soc_dlc_is_dummy);
+
static int snd_soc_dummy_probe(struct faux_device *fdev)
{
int ret;
diff --git a/sound/soc/sof/amd/Kconfig b/sound/soc/sof/amd/Kconfig
index 3ea82fa72e35..05faf1c6d6fc 100644
--- a/sound/soc/sof/amd/Kconfig
+++ b/sound/soc/sof/amd/Kconfig
@@ -94,13 +94,14 @@ config SND_SOC_SOF_AMD_ACP63
If unsure select "N".
config SND_SOC_SOF_AMD_ACP70
- tristate "SOF support for ACP7.0 platform"
+ tristate "SOF support for ACP7.0/ACP7.1 platforms"
depends on SND_SOC_SOF_PCI
depends on AMD_NODE
select SND_SOC_SOF_AMD_COMMON
+ select SND_SOC_SOF_AMD_SOUNDWIRE_LINK_BASELINE
help
Select this option for SOF support on
- AMD ACP7.0 version based platforms.
- Say Y if you want to enable SOF on ACP7.0 based platform.
+ AMD ACP7.0/ACP7.1 version based platforms.
+ Say Y if you want to enable SOF on ACP7.0/ACP7.1 based platforms.
endif
diff --git a/sound/soc/sof/amd/acp-dsp-offset.h b/sound/soc/sof/amd/acp-dsp-offset.h
index ecdcae07ace7..08583a91afbc 100644
--- a/sound/soc/sof/amd/acp-dsp-offset.h
+++ b/sound/soc/sof/amd/acp-dsp-offset.h
@@ -130,4 +130,14 @@
#define ACP_SW0_EN 0x3000
#define ACP_SW1_EN 0x3C00
+#define ACP70_PME_EN 0x1400
+#define ACP70_EXTERNAL_INTR_CNTL1 0x1A08
+#define ACP70_SW0_WAKE_EN 0x1458
+#define ACP70_SW1_WAKE_EN 0x1460
+#define ACP70_SDW_HOST_WAKE_MASK 0x0C00000
+#define ACP70_SDW0_HOST_WAKE_STAT BIT(24)
+#define ACP70_SDW1_HOST_WAKE_STAT BIT(25)
+#define ACP70_SDW0_PME_STAT BIT(26)
+#define ACP70_SDW1_PME_STAT BIT(27)
+
#endif
diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c
index 7c6d647fa253..7132916aa253 100644
--- a/sound/soc/sof/amd/acp.c
+++ b/sound/soc/sof/amd/acp.c
@@ -16,7 +16,7 @@
#include <linux/module.h>
#include <linux/pci.h>
-#include <asm/amd_node.h>
+#include <asm/amd/node.h>
#include "../ops.h"
#include "acp.h"
@@ -58,6 +58,7 @@ static void init_dma_descriptor(struct acp_dev_data *adata)
switch (acp_data->pci_rev) {
case ACP70_PCI_ID:
+ case ACP71_PCI_ID:
acp_dma_desc_base_addr = ACP70_DMA_DESC_BASE_ADDR;
acp_dma_desc_max_num_dscr = ACP70_DMA_DESC_MAX_NUM_DSCR;
break;
@@ -97,6 +98,7 @@ static int config_dma_channel(struct acp_dev_data *adata, unsigned int ch,
switch (acp_data->pci_rev) {
case ACP70_PCI_ID:
+ case ACP71_PCI_ID:
acp_dma_cntl_0 = ACP70_DMA_CNTL_0;
acp_dma_ch_rst_sts = ACP70_DMA_CH_RST_STS;
acp_dma_dscr_err_sts_0 = ACP70_DMA_ERR_STS_0;
@@ -336,6 +338,7 @@ int acp_dma_status(struct acp_dev_data *adata, unsigned char ch)
switch (adata->pci_rev) {
case ACP70_PCI_ID:
+ case ACP71_PCI_ID:
acp_dma_ch_sts = ACP70_DMA_CH_STS;
break;
default:
@@ -383,6 +386,69 @@ static int acp_memory_init(struct snd_sof_dev *sdev)
return 0;
}
+static void amd_sof_handle_acp70_sdw_wake_event(struct acp_dev_data *adata)
+{
+ struct amd_sdw_manager *amd_manager;
+
+ if (adata->acp70_sdw0_wake_event) {
+ amd_manager = dev_get_drvdata(&adata->sdw->pdev[0]->dev);
+ if (amd_manager)
+ pm_request_resume(amd_manager->dev);
+ adata->acp70_sdw0_wake_event = 0;
+ }
+
+ if (adata->acp70_sdw1_wake_event) {
+ amd_manager = dev_get_drvdata(&adata->sdw->pdev[1]->dev);
+ if (amd_manager)
+ pm_request_resume(amd_manager->dev);
+ adata->acp70_sdw1_wake_event = 0;
+ }
+}
+
+static int amd_sof_check_and_handle_acp70_sdw_wake_irq(struct snd_sof_dev *sdev)
+{
+ const struct sof_amd_acp_desc *desc = get_chip_info(sdev->pdata);
+ struct acp_dev_data *adata = sdev->pdata->hw_pdata;
+ u32 ext_intr_stat1;
+ int irq_flag = 0;
+ bool sdw_wake_irq = false;
+
+ ext_intr_stat1 = snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->ext_intr_stat1);
+ if (ext_intr_stat1 & ACP70_SDW0_HOST_WAKE_STAT) {
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->ext_intr_stat1,
+ ACP70_SDW0_HOST_WAKE_STAT);
+ adata->acp70_sdw0_wake_event = true;
+ sdw_wake_irq = true;
+ }
+
+ if (ext_intr_stat1 & ACP70_SDW1_HOST_WAKE_STAT) {
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->ext_intr_stat1,
+ ACP70_SDW1_HOST_WAKE_STAT);
+ adata->acp70_sdw1_wake_event = true;
+ sdw_wake_irq = true;
+ }
+
+ if (ext_intr_stat1 & ACP70_SDW0_PME_STAT) {
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP70_SW0_WAKE_EN, 0);
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->ext_intr_stat1, ACP70_SDW0_PME_STAT);
+ adata->acp70_sdw0_wake_event = true;
+ sdw_wake_irq = true;
+ }
+
+ if (ext_intr_stat1 & ACP70_SDW1_PME_STAT) {
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP70_SW1_WAKE_EN, 0);
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->ext_intr_stat1, ACP70_SDW1_PME_STAT);
+ adata->acp70_sdw1_wake_event = true;
+ sdw_wake_irq = true;
+ }
+
+ if (sdw_wake_irq) {
+ amd_sof_handle_acp70_sdw_wake_event(adata);
+ irq_flag = 1;
+ }
+ return irq_flag;
+}
+
static irqreturn_t acp_irq_thread(int irq, void *context)
{
struct snd_sof_dev *sdev = context;
@@ -415,7 +481,7 @@ static irqreturn_t acp_irq_handler(int irq, void *dev_id)
struct acp_dev_data *adata = sdev->pdata->hw_pdata;
unsigned int base = desc->dsp_intr_base;
unsigned int val;
- int irq_flag = 0;
+ int irq_flag = 0, wake_irq_flag = 0;
val = snd_sof_dsp_read(sdev, ACP_DSP_BAR, base + DSP_SW_INTR_STAT_OFFSET);
if (val & ACP_DSP_TO_HOST_IRQ) {
@@ -453,8 +519,14 @@ static irqreturn_t acp_irq_handler(int irq, void *dev_id)
schedule_work(&amd_manager->amd_sdw_irq_thread);
irq_flag = 1;
}
+ switch (adata->pci_rev) {
+ case ACP70_PCI_ID:
+ case ACP71_PCI_ID:
+ wake_irq_flag = amd_sof_check_and_handle_acp70_sdw_wake_irq(sdev);
+ break;
+ }
}
- if (irq_flag)
+ if (irq_flag || wake_irq_flag)
return IRQ_HANDLED;
else
return IRQ_NONE;
@@ -486,6 +558,7 @@ static int acp_power_on(struct snd_sof_dev *sdev)
acp_pgfsm_cntl_mask = ACP6X_PGFSM_CNTL_POWER_ON_MASK;
break;
case ACP70_PCI_ID:
+ case ACP71_PCI_ID:
acp_pgfsm_status_mask = ACP70_PGFSM_STATUS_MASK;
acp_pgfsm_cntl_mask = ACP70_PGFSM_CNTL_POWER_ON_MASK;
break;
@@ -507,7 +580,6 @@ static int acp_power_on(struct snd_sof_dev *sdev)
static int acp_reset(struct snd_sof_dev *sdev)
{
- const struct sof_amd_acp_desc *desc = get_chip_info(sdev->pdata);
unsigned int val;
int ret;
@@ -528,14 +600,6 @@ static int acp_reset(struct snd_sof_dev *sdev)
if (ret < 0)
dev_err(sdev->dev, "timeout in releasing reset\n");
- if (desc->acp_clkmux_sel)
- snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->acp_clkmux_sel, ACP_CLOCK_ACLK);
-
- if (desc->ext_intr_enb)
- snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->ext_intr_enb, 0x01);
-
- if (desc->ext_intr_cntl)
- snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->ext_intr_cntl, ACP_ERROR_IRQ_MASK);
return ret;
}
@@ -566,9 +630,13 @@ static int acp_dsp_reset(struct snd_sof_dev *sdev)
static int acp_init(struct snd_sof_dev *sdev)
{
+ const struct sof_amd_acp_desc *desc = get_chip_info(sdev->pdata);
+ struct acp_dev_data *acp_data;
+ unsigned int sdw0_wake_en, sdw1_wake_en;
int ret;
/* power on */
+ acp_data = sdev->pdata->hw_pdata;
ret = acp_power_on(sdev);
if (ret) {
dev_err(sdev->dev, "ACP power on failed\n");
@@ -577,7 +645,32 @@ static int acp_init(struct snd_sof_dev *sdev)
snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP_CONTROL, 0x01);
/* Reset */
- return acp_reset(sdev);
+ ret = acp_reset(sdev);
+ if (ret)
+ return ret;
+
+ if (desc->acp_clkmux_sel)
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->acp_clkmux_sel, ACP_CLOCK_ACLK);
+
+ if (desc->ext_intr_enb)
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->ext_intr_enb, 0x01);
+
+ if (desc->ext_intr_cntl)
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->ext_intr_cntl, ACP_ERROR_IRQ_MASK);
+
+ switch (acp_data->pci_rev) {
+ case ACP70_PCI_ID:
+ case ACP71_PCI_ID:
+ sdw0_wake_en = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP70_SW0_WAKE_EN);
+ sdw1_wake_en = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP70_SW1_WAKE_EN);
+ if (sdw0_wake_en || sdw1_wake_en)
+ snd_sof_dsp_update_bits(sdev, ACP_DSP_BAR, ACP70_EXTERNAL_INTR_CNTL1,
+ ACP70_SDW_HOST_WAKE_MASK, ACP70_SDW_HOST_WAKE_MASK);
+
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP70_PME_EN, 1);
+ break;
+ }
+ return 0;
}
static bool check_acp_sdw_enable_status(struct snd_sof_dev *sdev)
@@ -616,8 +709,12 @@ int amd_sof_acp_suspend(struct snd_sof_dev *sdev, u32 target_state)
dev_err(sdev->dev, "ACP Reset failed\n");
return ret;
}
- if (acp_data->pci_rev == ACP70_PCI_ID)
+ switch (acp_data->pci_rev) {
+ case ACP70_PCI_ID:
+ case ACP71_PCI_ID:
enable = true;
+ break;
+ }
snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP_CONTROL, enable);
return 0;
@@ -637,9 +734,15 @@ int amd_sof_acp_resume(struct snd_sof_dev *sdev)
return ret;
}
return acp_memory_init(sdev);
- } else {
- return acp_dsp_reset(sdev);
}
+ switch (acp_data->pci_rev) {
+ case ACP70_PCI_ID:
+ case ACP71_PCI_ID:
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP70_PME_EN, 1);
+ break;
+ }
+
+ return acp_dsp_reset(sdev);
}
EXPORT_SYMBOL_NS(amd_sof_acp_resume, "SND_SOC_SOF_AMD_COMMON");
diff --git a/sound/soc/sof/amd/acp.h b/sound/soc/sof/amd/acp.h
index d084db34eed8..d3c5b2386cdf 100644
--- a/sound/soc/sof/amd/acp.h
+++ b/sound/soc/sof/amd/acp.h
@@ -74,6 +74,7 @@
#define ACP_RMB_PCI_ID 0x6F
#define ACP63_PCI_ID 0x63
#define ACP70_PCI_ID 0x70
+#define ACP71_PCI_ID 0x71
#define HOST_BRIDGE_CZN 0x1630
#define HOST_BRIDGE_VGH 0x1645
@@ -109,9 +110,11 @@
#define ACP_SDW0_IRQ_MASK BIT(21)
#define ACP_SDW1_IRQ_MASK BIT(2)
#define SDW_ACPI_ADDR_ACP63 5
+#define SDW_ACPI_ADDR_ACP70 SDW_ACPI_ADDR_ACP63
#define ACP_DEFAULT_SRAM_LENGTH 0x00080000
#define ACP_SRAM_PAGE_COUNT 128
#define ACP6X_SDW_MAX_MANAGER_COUNT 2
+#define ACP70_SDW_MAX_MANAGER_COUNT ACP6X_SDW_MAX_MANAGER_COUNT
enum clock_source {
ACP_CLOCK_96M = 0,
@@ -260,6 +263,10 @@ struct acp_dev_data {
bool is_dram_in_use;
bool is_sram_in_use;
bool sdw_en_stat;
+ /* acp70_sdw0_wake_event flag set to true when wake irq asserted for SW0 instance */
+ bool acp70_sdw0_wake_event;
+ /* acp70_sdw1_wake_event flag set to true when wake irq asserted for SW1 instance */
+ bool acp70_sdw1_wake_event;
unsigned int pci_rev;
};
diff --git a/sound/soc/sof/amd/pci-acp70.c b/sound/soc/sof/amd/pci-acp70.c
index 8fa1170a2161..c4db21668252 100644
--- a/sound/soc/sof/amd/pci-acp70.c
+++ b/sound/soc/sof/amd/pci-acp70.c
@@ -33,12 +33,15 @@ static const struct sof_amd_acp_desc acp70_chip_info = {
.ext_intr_cntl = ACP70_EXTERNAL_INTR_CNTL,
.ext_intr_stat = ACP70_EXT_INTR_STAT,
.ext_intr_stat1 = ACP70_EXT_INTR_STAT1,
+ .acp_error_stat = ACP70_ERROR_STATUS,
.dsp_intr_base = ACP70_DSP_SW_INTR_BASE,
.acp_sw0_i2s_err_reason = ACP7X_SW0_I2S_ERROR_REASON,
.sram_pte_offset = ACP70_SRAM_PTE_OFFSET,
.hw_semaphore_offset = ACP70_AXI2DAGB_SEM_0,
.fusion_dsp_offset = ACP70_DSP_FUSION_RUNSTALL,
.probe_reg_offset = ACP70_FUTURE_REG_ACLK_0,
+ .sdw_max_link_count = ACP70_SDW_MAX_MANAGER_COUNT,
+ .sdw_acpi_dev_addr = SDW_ACPI_ADDR_ACP70,
.reg_start_addr = ACP70_REG_START,
.reg_end_addr = ACP70_REG_END,
};
@@ -70,8 +73,13 @@ static int acp70_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_
{
unsigned int flag;
- if (pci->revision != ACP70_PCI_ID)
+ switch (pci->revision) {
+ case ACP70_PCI_ID:
+ case ACP71_PCI_ID:
+ break;
+ default:
return -ENODEV;
+ }
flag = snd_amd_acp_find_config(pci);
if (flag != FLAG_AMD_SOF && flag != FLAG_AMD_SOF_ONLY_DMIC)
diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c
index aed834d03e10..b11f408f1366 100644
--- a/sound/soc/sof/core.c
+++ b/sound/soc/sof/core.c
@@ -607,7 +607,8 @@ static void sof_probe_work(struct work_struct *work)
}
static void
-sof_apply_profile_override(struct sof_loadable_file_profile *path_override)
+sof_apply_profile_override(struct sof_loadable_file_profile *path_override,
+ struct snd_sof_pdata *plat_data)
{
if (override_ipc_type >= 0 && override_ipc_type < SOF_IPC_TYPE_COUNT)
path_override->ipc_type = override_ipc_type;
@@ -619,8 +620,11 @@ sof_apply_profile_override(struct sof_loadable_file_profile *path_override)
path_override->fw_lib_path = override_lib_path;
if (override_tplg_path)
path_override->tplg_path = override_tplg_path;
- if (override_tplg_filename)
+ if (override_tplg_filename) {
path_override->tplg_name = override_tplg_filename;
+ /* User requested a specific topology file and expect it to be loaded */
+ plat_data->disable_function_topology = true;
+ }
}
int snd_sof_device_probe(struct device *dev, struct snd_sof_pdata *plat_data)
@@ -654,7 +658,7 @@ int snd_sof_device_probe(struct device *dev, struct snd_sof_pdata *plat_data)
}
}
- sof_apply_profile_override(&plat_data->ipc_file_profile_base);
+ sof_apply_profile_override(&plat_data->ipc_file_profile_base, plat_data);
/* Initialize sof_ops based on the initial selected IPC version */
ret = sof_init_sof_ops(sdev);
diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c
index ab07512e511d..a40a8047873e 100644
--- a/sound/soc/sof/imx/imx8.c
+++ b/sound/soc/sof/imx/imx8.c
@@ -11,6 +11,7 @@
#include <linux/arm-smccc.h>
#include <linux/firmware/imx/svc/misc.h>
#include <linux/mfd/syscon.h>
+#include <linux/reset.h>
#include "imx-common.h"
@@ -23,13 +24,6 @@
#define IMX8M_DAP_PWRCTL (0x4000 + 0x3020)
#define IMX8M_PWRCTL_CORERESET BIT(16)
-#define AudioDSP_REG0 0x100
-#define AudioDSP_REG1 0x104
-#define AudioDSP_REG2 0x108
-#define AudioDSP_REG3 0x10c
-
-#define AudioDSP_REG2_RUNSTALL BIT(5)
-
/* imx8ulp macros */
#define FSL_SIP_HIFI_XRDC 0xc200000e
#define SYSCTRL0 0x8
@@ -43,6 +37,7 @@
struct imx8m_chip_data {
void __iomem *dap;
struct regmap *regmap;
+ struct reset_control *run_stall;
};
/*
@@ -137,8 +132,7 @@ static int imx8m_reset(struct snd_sof_dev *sdev)
/* keep reset asserted for 10 cycles */
usleep_range(1, 2);
- regmap_update_bits(chip->regmap, AudioDSP_REG2,
- AudioDSP_REG2_RUNSTALL, AudioDSP_REG2_RUNSTALL);
+ reset_control_assert(chip->run_stall);
/* take the DSP out of reset and keep stalled for FW loading */
pwrctl = readl(chip->dap + IMX8M_DAP_PWRCTL);
@@ -152,9 +146,7 @@ static int imx8m_run(struct snd_sof_dev *sdev)
{
struct imx8m_chip_data *chip = get_chip_pdata(sdev);
- regmap_update_bits(chip->regmap, AudioDSP_REG2, AudioDSP_REG2_RUNSTALL, 0);
-
- return 0;
+ return reset_control_deassert(chip->run_stall);
}
static int imx8m_probe(struct snd_sof_dev *sdev)
@@ -174,10 +166,10 @@ static int imx8m_probe(struct snd_sof_dev *sdev)
return dev_err_probe(sdev->dev, -ENODEV,
"failed to ioremap DAP\n");
- chip->regmap = syscon_regmap_lookup_by_phandle(sdev->dev->of_node, "fsl,dsp-ctrl");
- if (IS_ERR(chip->regmap))
- return dev_err_probe(sdev->dev, PTR_ERR(chip->regmap),
- "failed to fetch dsp ctrl regmap\n");
+ chip->run_stall = devm_reset_control_get_exclusive(sdev->dev, "runstall");
+ if (IS_ERR(chip->run_stall))
+ return dev_err_probe(sdev->dev, PTR_ERR(chip->run_stall),
+ "failed to get dsp runstall reset control\n");
common->chip_pdata = chip;
diff --git a/sound/soc/sof/intel/hda-bus.c b/sound/soc/sof/intel/hda-bus.c
index b1be03011d7e..6492e1cefbfb 100644
--- a/sound/soc/sof/intel/hda-bus.c
+++ b/sound/soc/sof/intel/hda-bus.c
@@ -76,7 +76,7 @@ void sof_hda_bus_init(struct snd_sof_dev *sdev, struct device *dev)
snd_hdac_ext_bus_init(bus, dev, &bus_core_ops, sof_hda_ext_ops);
- if (chip && chip->hw_ip_version == SOF_INTEL_ACE_2_0)
+ if (chip && chip->hw_ip_version >= SOF_INTEL_ACE_2_0)
bus->use_pio_for_commands = true;
#else
snd_hdac_ext_bus_init(bus, dev, NULL, NULL);
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index b34e5fdf10f1..6a3932d90b43 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -1049,7 +1049,21 @@ static void hda_generic_machine_select(struct snd_sof_dev *sdev,
if (!*mach && codec_num <= 2) {
bool tplg_fixup = false;
- hda_mach = snd_soc_acpi_intel_hda_machines;
+ /*
+ * make a local copy of the match array since we might
+ * be modifying it
+ */
+ hda_mach = devm_kmemdup_array(sdev->dev,
+ snd_soc_acpi_intel_hda_machines,
+ 2, /* we have one entry + sentinel in the array */
+ sizeof(snd_soc_acpi_intel_hda_machines[0]),
+ GFP_KERNEL);
+ if (!hda_mach) {
+ dev_err(bus->dev,
+ "%s: failed to duplicate the HDA match table\n",
+ __func__);
+ return;
+ }
dev_info(bus->dev, "using HDA machine driver %s now\n",
hda_mach->drv_name);
diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h
index 108cad04879e..e14f82c0831f 100644
--- a/sound/soc/sof/intel/hda.h
+++ b/sound/soc/sof/intel/hda.h
@@ -935,6 +935,7 @@ extern const struct sof_intel_dsp_desc mtl_chip_info;
extern const struct sof_intel_dsp_desc arl_s_chip_info;
extern const struct sof_intel_dsp_desc lnl_chip_info;
extern const struct sof_intel_dsp_desc ptl_chip_info;
+extern const struct sof_intel_dsp_desc wcl_chip_info;
/* Probes support */
#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_PROBES)
diff --git a/sound/soc/sof/intel/pci-ptl.c b/sound/soc/sof/intel/pci-ptl.c
index 7d4c46f56931..68f6a9841633 100644
--- a/sound/soc/sof/intel/pci-ptl.c
+++ b/sound/soc/sof/intel/pci-ptl.c
@@ -55,10 +55,40 @@ static const struct sof_dev_desc ptl_desc = {
.ops_init = sof_ptl_ops_init,
};
+static const struct sof_dev_desc wcl_desc = {
+ .use_acpi_target_states = true,
+ .machines = snd_soc_acpi_intel_ptl_machines,
+ .alt_machines = snd_soc_acpi_intel_ptl_sdw_machines,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .chip_info = &wcl_chip_info,
+ .ipc_supported_mask = BIT(SOF_IPC_TYPE_4),
+ .ipc_default = SOF_IPC_TYPE_4,
+ .dspless_mode_supported = true,
+ .default_fw_path = {
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4/wcl",
+ },
+ .default_lib_path = {
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/wcl",
+ },
+ .default_tplg_path = {
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
+ },
+ .default_fw_filename = {
+ [SOF_IPC_TYPE_4] = "sof-wcl.ri",
+ },
+ .nocodec_tplg_filename = "sof-ptl-nocodec.tplg",
+ .ops = &sof_ptl_ops,
+ .ops_init = sof_ptl_ops_init,
+};
+
/* PCI IDs */
static const struct pci_device_id sof_pci_ids[] = {
{ PCI_DEVICE_DATA(INTEL, HDA_PTL, &ptl_desc) }, /* PTL */
{ PCI_DEVICE_DATA(INTEL, HDA_PTL_H, &ptl_desc) }, /* PTL-H */
+ { PCI_DEVICE_DATA(INTEL, HDA_WCL, &wcl_desc) }, /* WCL */
{ 0, }
};
MODULE_DEVICE_TABLE(pci, sof_pci_ids);
diff --git a/sound/soc/sof/intel/ptl.c b/sound/soc/sof/intel/ptl.c
index aa0b772178bc..875d18193b05 100644
--- a/sound/soc/sof/intel/ptl.c
+++ b/sound/soc/sof/intel/ptl.c
@@ -126,6 +126,29 @@ const struct sof_intel_dsp_desc ptl_chip_info = {
.hw_ip_version = SOF_INTEL_ACE_3_0,
};
+const struct sof_intel_dsp_desc wcl_chip_info = {
+ .cores_num = 3,
+ .init_core_mask = BIT(0),
+ .host_managed_cores_mask = BIT(0),
+ .ipc_req = MTL_DSP_REG_HFIPCXIDR,
+ .ipc_req_mask = MTL_DSP_REG_HFIPCXIDR_BUSY,
+ .ipc_ack = MTL_DSP_REG_HFIPCXIDA,
+ .ipc_ack_mask = MTL_DSP_REG_HFIPCXIDA_DONE,
+ .ipc_ctl = MTL_DSP_REG_HFIPCXCTL,
+ .rom_status_reg = LNL_DSP_REG_HFDSC,
+ .rom_init_timeout = 300,
+ .ssp_count = MTL_SSP_COUNT,
+ .d0i3_offset = MTL_HDA_VS_D0I3C,
+ .read_sdw_lcount = hda_sdw_check_lcount_ext,
+ .check_sdw_irq = lnl_dsp_check_sdw_irq,
+ .check_sdw_wakeen_irq = lnl_sdw_check_wakeen_irq,
+ .check_ipc_irq = mtl_dsp_check_ipc_irq,
+ .cl_init = mtl_dsp_cl_init,
+ .power_down_dsp = mtl_power_down_dsp,
+ .disable_interrupts = lnl_dsp_disable_interrupts,
+ .hw_ip_version = SOF_INTEL_ACE_3_0,
+};
+
MODULE_IMPORT_NS("SND_SOC_SOF_INTEL_MTL");
MODULE_IMPORT_NS("SND_SOC_SOF_INTEL_LNL");
MODULE_IMPORT_NS("SND_SOC_SOF_HDA_MLINK");
diff --git a/sound/soc/sof/ipc4-control.c b/sound/soc/sof/ipc4-control.c
index 576f407cd456..976a4794d610 100644
--- a/sound/soc/sof/ipc4-control.c
+++ b/sound/soc/sof/ipc4-control.c
@@ -531,6 +531,14 @@ static int sof_ipc4_bytes_ext_put(struct snd_sof_control *scontrol,
return -EINVAL;
}
+ /* Check header id */
+ if (header.numid != SOF_CTRL_CMD_BINARY) {
+ dev_err_ratelimited(scomp->dev,
+ "Incorrect numid for bytes put %d\n",
+ header.numid);
+ return -EINVAL;
+ }
+
/* Verify the ABI header first */
if (copy_from_user(&abi_hdr, tlvd->tlv, sizeof(abi_hdr)))
return -EFAULT;
@@ -613,7 +621,8 @@ static int _sof_ipc4_bytes_ext_get(struct snd_sof_control *scontrol,
if (data_size > size)
return -ENOSPC;
- header.numid = scontrol->comp_id;
+ /* Set header id and length */
+ header.numid = SOF_CTRL_CMD_BINARY;
header.length = data_size;
if (copy_to_user(tlvd, &header, sizeof(struct snd_ctl_tlv)))
diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c
index 1a2841899ff5..8eee3e1aadf9 100644
--- a/sound/soc/sof/ipc4-pcm.c
+++ b/sound/soc/sof/ipc4-pcm.c
@@ -784,7 +784,8 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm
/* allocate memory for max number of pipeline IDs */
pipeline_list->pipelines = kcalloc(ipc4_data->max_num_pipelines,
- sizeof(struct snd_sof_widget *), GFP_KERNEL);
+ sizeof(*pipeline_list->pipelines),
+ GFP_KERNEL);
if (!pipeline_list->pipelines) {
sof_ipc4_pcm_free(sdev, spcm);
return -ENOMEM;
@@ -798,7 +799,8 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm
spcm->stream[stream].private = stream_priv;
- if (!support_info)
+ /* Delay reporting is only supported on playback */
+ if (!support_info || stream == SNDRV_PCM_STREAM_CAPTURE)
continue;
time_info = kzalloc(sizeof(*time_info), GFP_KERNEL);
diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c
index 2fc14b9a33d4..c50249aadea9 100644
--- a/sound/soc/sof/sof-pci-dev.c
+++ b/sound/soc/sof/sof-pci-dev.c
@@ -216,7 +216,7 @@ int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
if (ret < 0)
return ret;
- ret = pci_request_regions(pci, "Audio DSP");
+ ret = pcim_request_all_regions(pci, "Audio DSP");
if (ret < 0)
return ret;
@@ -240,8 +240,7 @@ int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
path_override->ipc_type = sof_pci_ipc_type;
} else {
dev_err(dev, "Invalid IPC type requested: %d\n", sof_pci_ipc_type);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
path_override->fw_path = fw_path;
@@ -271,13 +270,7 @@ int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
sof_pdata->sof_probe_complete = sof_pci_probe_complete;
/* call sof helper for DSP hardware probe */
- ret = snd_sof_device_probe(dev, sof_pdata);
-
-out:
- if (ret)
- pci_release_regions(pci);
-
- return ret;
+ return snd_sof_device_probe(dev, sof_pdata);
}
EXPORT_SYMBOL_NS(sof_pci_probe, "SND_SOC_SOF_PCI_DEV");
@@ -290,9 +283,6 @@ void sof_pci_remove(struct pci_dev *pci)
if (snd_sof_device_probe_completed(&pci->dev) &&
!(sof_pci_debug & SOF_PCI_DISABLE_PM_RUNTIME))
pm_runtime_get_noresume(&pci->dev);
-
- /* release pci regions and disable device */
- pci_release_regions(pci);
}
EXPORT_SYMBOL_NS(sof_pci_remove, "SND_SOC_SOF_PCI_DEV");
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index dc9cb8324067..d612d693efc3 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -19,6 +19,10 @@
#include "sof-audio.h"
#include "ops.h"
+static bool disable_function_topology;
+module_param(disable_function_topology, bool, 0444);
+MODULE_PARM_DESC(disable_function_topology, "Disable function topology loading");
+
#define COMP_ID_UNASSIGNED 0xffffffff
/*
* Constants used in the computation of linear volume gain
@@ -571,7 +575,11 @@ static int sof_copy_tuples(struct snd_sof_dev *sdev, struct snd_soc_tplg_vendor_
continue;
tuples[*num_copied_tuples].token = tokens[j].token;
- tuples[*num_copied_tuples].value.s = elem->string;
+ tuples[*num_copied_tuples].value.s =
+ devm_kasprintf(sdev->dev, GFP_KERNEL,
+ "%s", elem->string);
+ if (!tuples[*num_copied_tuples].value.s)
+ return -ENOMEM;
} else {
struct snd_soc_tplg_vendor_value_elem *elem;
@@ -1063,7 +1071,7 @@ static int sof_connect_dai_widget(struct snd_soc_component *scomp,
struct snd_sof_dai *dai)
{
struct snd_soc_card *card = scomp->card;
- struct snd_soc_pcm_runtime *rtd;
+ struct snd_soc_pcm_runtime *rtd, *full, *partial;
struct snd_soc_dai *cpu_dai;
int stream;
int i;
@@ -1080,12 +1088,22 @@ static int sof_connect_dai_widget(struct snd_soc_component *scomp,
else
goto end;
+ full = NULL;
+ partial = NULL;
list_for_each_entry(rtd, &card->rtd_list, list) {
/* does stream match DAI link ? */
- if (!rtd->dai_link->stream_name ||
- !strstr(rtd->dai_link->stream_name, w->sname))
- continue;
+ if (rtd->dai_link->stream_name) {
+ if (!strcmp(rtd->dai_link->stream_name, w->sname)) {
+ full = rtd;
+ break;
+ } else if (strstr(rtd->dai_link->stream_name, w->sname)) {
+ partial = rtd;
+ }
+ }
+ }
+ rtd = full ? full : partial;
+ if (rtd) {
for_each_rtd_cpu_dais(rtd, i, cpu_dai) {
/*
* Please create DAI widget in the right order
@@ -2306,8 +2324,10 @@ static const struct snd_soc_tplg_ops sof_tplg_ops = {
.link_load = sof_link_load,
.link_unload = sof_link_unload,
- /* completion - called at completion of firmware loading */
- .complete = sof_complete,
+ /*
+ * No need to set the complete callback. sof_complete will be called explicitly after
+ * topology loading is complete.
+ */
/* manifest - optional to inform component of manifest */
.manifest = sof_manifest,
@@ -2463,36 +2483,83 @@ static const struct snd_soc_tplg_ops sof_dspless_tplg_ops = {
int snd_sof_load_topology(struct snd_soc_component *scomp, const char *file)
{
struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp);
+ struct snd_sof_pdata *sof_pdata = sdev->pdata;
+ const char *tplg_filename_prefix = sof_pdata->tplg_filename_prefix;
const struct firmware *fw;
+ const char **tplg_files;
+ int tplg_cnt = 0;
int ret;
+ int i;
- dev_dbg(scomp->dev, "loading topology:%s\n", file);
+ tplg_files = kcalloc(scomp->card->num_links, sizeof(char *), GFP_KERNEL);
+ if (!tplg_files)
+ return -ENOMEM;
- ret = request_firmware(&fw, file, scomp->dev);
- if (ret < 0) {
- dev_err(scomp->dev, "error: tplg request firmware %s failed err: %d\n",
- file, ret);
- dev_err(scomp->dev,
- "you may need to download the firmware from https://github.com/thesofproject/sof-bin/\n");
- return ret;
+ if (!sof_pdata->disable_function_topology && !disable_function_topology &&
+ sof_pdata->machine && sof_pdata->machine->get_function_tplg_files) {
+ tplg_cnt = sof_pdata->machine->get_function_tplg_files(scomp->card,
+ sof_pdata->machine,
+ tplg_filename_prefix,
+ &tplg_files);
+ if (tplg_cnt < 0) {
+ kfree(tplg_files);
+ return tplg_cnt;
+ }
}
- if (sdev->dspless_mode_selected)
- ret = snd_soc_tplg_component_load(scomp, &sof_dspless_tplg_ops, fw);
- else
- ret = snd_soc_tplg_component_load(scomp, &sof_tplg_ops, fw);
+ /*
+ * The monolithic topology will be used if there is no get_function_tplg_files
+ * callback or the callback returns 0.
+ */
+ if (!tplg_cnt) {
+ tplg_files[0] = file;
+ tplg_cnt = 1;
+ dev_dbg(scomp->dev, "loading topology: %s\n", file);
+ } else {
+ dev_info(scomp->dev, "Using function topologies instead %s\n", file);
+ }
- if (ret < 0) {
- dev_err(scomp->dev, "error: tplg component load failed %d\n",
- ret);
- ret = -EINVAL;
+ for (i = 0; i < tplg_cnt; i++) {
+ /* Only print the file names if the function topologies are used */
+ if (tplg_files[0] != file)
+ dev_info(scomp->dev, "loading topology %d: %s\n", i, tplg_files[i]);
+
+ ret = request_firmware(&fw, tplg_files[i], scomp->dev);
+ if (ret < 0) {
+ /*
+ * snd_soc_tplg_component_remove(scomp) will be called
+ * if snd_soc_tplg_component_load(scomp) failed and all
+ * objects in the scomp will be removed. No need to call
+ * snd_soc_tplg_component_remove(scomp) here.
+ */
+ dev_err(scomp->dev, "tplg request firmware %s failed err: %d\n",
+ tplg_files[i], ret);
+ goto out;
+ }
+
+ if (sdev->dspless_mode_selected)
+ ret = snd_soc_tplg_component_load(scomp, &sof_dspless_tplg_ops, fw);
+ else
+ ret = snd_soc_tplg_component_load(scomp, &sof_tplg_ops, fw);
+
+ release_firmware(fw);
+
+ if (ret < 0) {
+ dev_err(scomp->dev, "tplg %s component load failed %d\n",
+ tplg_files[i], ret);
+ goto out;
+ }
}
- release_firmware(fw);
+ /* call sof_complete when topologies are loaded successfully */
+ ret = sof_complete(scomp);
+out:
if (ret >= 0 && sdev->led_present)
ret = snd_ctl_led_request();
+ kfree(tplg_files);
+
return ret;
}
EXPORT_SYMBOL(snd_sof_load_topology);
diff --git a/sound/soc/starfive/jh7110_tdm.c b/sound/soc/starfive/jh7110_tdm.c
index d38090e68df5..afdcde7df91a 100644
--- a/sound/soc/starfive/jh7110_tdm.c
+++ b/sound/soc/starfive/jh7110_tdm.c
@@ -10,6 +10,7 @@
#include <linux/clk.h>
#include <linux/device.h>
#include <linux/dmaengine.h>
+#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
@@ -187,16 +188,8 @@ static int jh7110_tdm_syncdiv(struct jh7110_tdm_dev *tdm)
{
u32 sl, sscale, syncdiv;
- if (tdm->rx.sl >= tdm->tx.sl)
- sl = tdm->rx.sl;
- else
- sl = tdm->tx.sl;
-
- if (tdm->rx.sscale >= tdm->tx.sscale)
- sscale = tdm->rx.sscale;
- else
- sscale = tdm->tx.sscale;
-
+ sl = max(tdm->rx.sl, tdm->tx.sl);
+ sscale = max(tdm->rx.sscale, tdm->tx.sscale);
syncdiv = tdm->pcmclk / tdm->samplerate - 1;
if ((syncdiv + 1) < (sl * sscale)) {
diff --git a/sound/soc/stm/stm32_sai.c b/sound/soc/stm/stm32_sai.c
index 504a14584765..fa821e3fb427 100644
--- a/sound/soc/stm/stm32_sai.c
+++ b/sound/soc/stm/stm32_sai.c
@@ -169,20 +169,14 @@ static int stm32_sai_get_parent_clk(struct stm32_sai_data *sai)
struct device *dev = &sai->pdev->dev;
sai->clk_x8k = devm_clk_get(dev, "x8k");
- if (IS_ERR(sai->clk_x8k)) {
- if (PTR_ERR(sai->clk_x8k) != -EPROBE_DEFER)
- dev_err(dev, "missing x8k parent clock: %ld\n",
- PTR_ERR(sai->clk_x8k));
- return PTR_ERR(sai->clk_x8k);
- }
+ if (IS_ERR(sai->clk_x8k))
+ return dev_err_probe(dev, PTR_ERR(sai->clk_x8k),
+ "missing x8k parent clock\n");
sai->clk_x11k = devm_clk_get(dev, "x11k");
- if (IS_ERR(sai->clk_x11k)) {
- if (PTR_ERR(sai->clk_x11k) != -EPROBE_DEFER)
- dev_err(dev, "missing x11k parent clock: %ld\n",
- PTR_ERR(sai->clk_x11k));
- return PTR_ERR(sai->clk_x11k);
- }
+ if (IS_ERR(sai->clk_x11k))
+ return dev_err_probe(dev, PTR_ERR(sai->clk_x11k),
+ "missing x11k parent clock\n");
return 0;
}
diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c
index 8b9eb1a202f7..7b3496caa31e 100644
--- a/sound/soc/sunxi/sun8i-codec.c
+++ b/sound/soc/sunxi/sun8i-codec.c
@@ -248,12 +248,10 @@ static int sun8i_codec_runtime_resume(struct device *dev)
struct sun8i_codec *scodec = dev_get_drvdata(dev);
int ret;
- if (scodec->clk_bus) {
- ret = clk_prepare_enable(scodec->clk_bus);
- if (ret) {
- dev_err(dev, "Failed to enable the bus clock\n");
- return ret;
- }
+ ret = clk_prepare_enable(scodec->clk_bus);
+ if (ret) {
+ dev_err(dev, "Failed to enable the bus clock\n");
+ return ret;
}
regcache_cache_only(scodec->regmap, false);
@@ -274,8 +272,7 @@ static int sun8i_codec_runtime_suspend(struct device *dev)
regcache_cache_only(scodec->regmap, true);
regcache_mark_dirty(scodec->regmap);
- if (scodec->clk_bus)
- clk_disable_unprepare(scodec->clk_bus);
+ clk_disable_unprepare(scodec->clk_bus);
return 0;
}
diff --git a/sound/soc/tegra/tegra186_asrc.c b/sound/soc/tegra/tegra186_asrc.c
index 5c67e1f01d9b..851509ae07f5 100644
--- a/sound/soc/tegra/tegra186_asrc.c
+++ b/sound/soc/tegra/tegra186_asrc.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION. All rights reserved.
//
// tegra186_asrc.c - Tegra186 ASRC driver
-//
-// Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#include <linux/clk.h>
#include <linux/delay.h>
@@ -99,7 +98,7 @@ static int tegra186_asrc_runtime_resume(struct device *dev)
* sync is done after this to restore other settings.
*/
regmap_write(asrc->regmap, TEGRA186_ASRC_GLOBAL_SCRATCH_ADDR,
- TEGRA186_ASRC_ARAM_START_ADDR);
+ asrc->soc_data->aram_start_addr);
regmap_write(asrc->regmap, TEGRA186_ASRC_GLOBAL_ENB,
TEGRA186_ASRC_GLOBAL_EN);
@@ -954,8 +953,17 @@ static const struct regmap_config tegra186_asrc_regmap_config = {
.cache_type = REGCACHE_FLAT,
};
+static const struct tegra_asrc_soc_data soc_data_tegra186 = {
+ .aram_start_addr = TEGRA186_ASRC_ARAM_START_ADDR,
+};
+
+static const struct tegra_asrc_soc_data soc_data_tegra264 = {
+ .aram_start_addr = TEGRA264_ASRC_ARAM_START_ADDR,
+};
+
static const struct of_device_id tegra186_asrc_of_match[] = {
- { .compatible = "nvidia,tegra186-asrc" },
+ { .compatible = "nvidia,tegra186-asrc", .data = &soc_data_tegra186 },
+ { .compatible = "nvidia,tegra264-asrc", .data = &soc_data_tegra264 },
{},
};
MODULE_DEVICE_TABLE(of, tegra186_asrc_of_match);
@@ -985,6 +993,8 @@ static int tegra186_asrc_platform_probe(struct platform_device *pdev)
return PTR_ERR(asrc->regmap);
}
+ asrc->soc_data = of_device_get_match_data(&pdev->dev);
+
regcache_cache_only(asrc->regmap, true);
regmap_write(asrc->regmap, TEGRA186_ASRC_GLOBAL_CFG,
diff --git a/sound/soc/tegra/tegra186_asrc.h b/sound/soc/tegra/tegra186_asrc.h
index 094fcc723c02..0c98e26d5e72 100644
--- a/sound/soc/tegra/tegra186_asrc.h
+++ b/sound/soc/tegra/tegra186_asrc.h
@@ -1,9 +1,7 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
+/* SPDX-License-Identifier: GPL-2.0-only
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION. All rights reserved.
* tegra186_asrc.h - Definitions for Tegra186 ASRC driver
*
- * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
*/
#ifndef __TEGRA186_ASRC_H__
@@ -94,6 +92,7 @@
#define TEGRA186_ASRC_RATIO_SOURCE_SW 0x1
#define TEGRA186_ASRC_ARAM_START_ADDR 0x3f800000
+#define TEGRA264_ASRC_ARAM_START_ADDR 0x8a080000
struct tegra186_asrc_lane {
unsigned int int_part;
@@ -104,7 +103,12 @@ struct tegra186_asrc_lane {
unsigned int output_thresh;
};
+struct tegra_asrc_soc_data {
+ unsigned int aram_start_addr;
+};
+
struct tegra186_asrc {
+ const struct tegra_asrc_soc_data *soc_data;
struct tegra186_asrc_lane lane[TEGRA186_ASRC_STREAM_MAX];
struct regmap *regmap;
};
diff --git a/sound/soc/tegra/tegra210_admaif.c b/sound/soc/tegra/tegra210_admaif.c
index 76ff4fe40f65..f88d6a2356e0 100644
--- a/sound/soc/tegra/tegra210_admaif.c
+++ b/sound/soc/tegra/tegra210_admaif.c
@@ -25,12 +25,12 @@
#define CH_RX_REG(reg, id) CH_REG(admaif->soc_data->rx_base, reg, id)
-#define REG_DEFAULTS(id, rx_ctrl, tx_ctrl, tx_base, rx_base) \
+#define REG_DEFAULTS(id, rx_ctrl, tx_ctrl, tx_base, rx_base, cif_ctrl) \
{ CH_REG(rx_base, TEGRA_ADMAIF_RX_INT_MASK, id), 0x00000001 }, \
- { CH_REG(rx_base, TEGRA_ADMAIF_CH_ACIF_RX_CTRL, id), 0x00007700 }, \
+ { CH_REG(rx_base, TEGRA_ADMAIF_CH_ACIF_RX_CTRL, id), cif_ctrl }, \
{ CH_REG(rx_base, TEGRA_ADMAIF_RX_FIFO_CTRL, id), rx_ctrl }, \
{ CH_REG(tx_base, TEGRA_ADMAIF_TX_INT_MASK, id), 0x00000001 }, \
- { CH_REG(tx_base, TEGRA_ADMAIF_CH_ACIF_TX_CTRL, id), 0x00007700 }, \
+ { CH_REG(tx_base, TEGRA_ADMAIF_CH_ACIF_TX_CTRL, id), cif_ctrl }, \
{ CH_REG(tx_base, TEGRA_ADMAIF_TX_FIFO_CTRL, id), tx_ctrl }
#define ADMAIF_REG_DEFAULTS(id, chip) \
@@ -38,7 +38,8 @@
chip ## _ADMAIF_RX ## id ## _FIFO_CTRL_REG_DEFAULT, \
chip ## _ADMAIF_TX ## id ## _FIFO_CTRL_REG_DEFAULT, \
chip ## _ADMAIF_TX_BASE, \
- chip ## _ADMAIF_RX_BASE)
+ chip ## _ADMAIF_RX_BASE, \
+ chip ## _ADMAIF_CIF_REG_DEFAULT)
static const struct reg_default tegra186_admaif_reg_defaults[] = {
{(TEGRA_ADMAIF_GLOBAL_CG_0 + TEGRA186_ADMAIF_GLOBAL_BASE), 0x00000003},
@@ -78,6 +79,42 @@ static const struct reg_default tegra210_admaif_reg_defaults[] = {
ADMAIF_REG_DEFAULTS(10, TEGRA210)
};
+static const struct reg_default tegra264_admaif_reg_defaults[] = {
+ {(TEGRA_ADMAIF_GLOBAL_CG_0 + TEGRA264_ADMAIF_GLOBAL_BASE), 0x00000003},
+ ADMAIF_REG_DEFAULTS(1, TEGRA264),
+ ADMAIF_REG_DEFAULTS(2, TEGRA264),
+ ADMAIF_REG_DEFAULTS(3, TEGRA264),
+ ADMAIF_REG_DEFAULTS(4, TEGRA264),
+ ADMAIF_REG_DEFAULTS(5, TEGRA264),
+ ADMAIF_REG_DEFAULTS(6, TEGRA264),
+ ADMAIF_REG_DEFAULTS(7, TEGRA264),
+ ADMAIF_REG_DEFAULTS(8, TEGRA264),
+ ADMAIF_REG_DEFAULTS(9, TEGRA264),
+ ADMAIF_REG_DEFAULTS(10, TEGRA264),
+ ADMAIF_REG_DEFAULTS(11, TEGRA264),
+ ADMAIF_REG_DEFAULTS(12, TEGRA264),
+ ADMAIF_REG_DEFAULTS(13, TEGRA264),
+ ADMAIF_REG_DEFAULTS(14, TEGRA264),
+ ADMAIF_REG_DEFAULTS(15, TEGRA264),
+ ADMAIF_REG_DEFAULTS(16, TEGRA264),
+ ADMAIF_REG_DEFAULTS(17, TEGRA264),
+ ADMAIF_REG_DEFAULTS(18, TEGRA264),
+ ADMAIF_REG_DEFAULTS(19, TEGRA264),
+ ADMAIF_REG_DEFAULTS(20, TEGRA264),
+ ADMAIF_REG_DEFAULTS(21, TEGRA264),
+ ADMAIF_REG_DEFAULTS(22, TEGRA264),
+ ADMAIF_REG_DEFAULTS(23, TEGRA264),
+ ADMAIF_REG_DEFAULTS(24, TEGRA264),
+ ADMAIF_REG_DEFAULTS(25, TEGRA264),
+ ADMAIF_REG_DEFAULTS(26, TEGRA264),
+ ADMAIF_REG_DEFAULTS(27, TEGRA264),
+ ADMAIF_REG_DEFAULTS(28, TEGRA264),
+ ADMAIF_REG_DEFAULTS(29, TEGRA264),
+ ADMAIF_REG_DEFAULTS(30, TEGRA264),
+ ADMAIF_REG_DEFAULTS(31, TEGRA264),
+ ADMAIF_REG_DEFAULTS(32, TEGRA264)
+};
+
static bool tegra_admaif_wr_reg(struct device *dev, unsigned int reg)
{
struct tegra_admaif *admaif = dev_get_drvdata(dev);
@@ -220,6 +257,19 @@ static const struct regmap_config tegra186_admaif_regmap_config = {
.cache_type = REGCACHE_FLAT,
};
+static const struct regmap_config tegra264_admaif_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = TEGRA264_ADMAIF_LAST_REG,
+ .writeable_reg = tegra_admaif_wr_reg,
+ .readable_reg = tegra_admaif_rd_reg,
+ .volatile_reg = tegra_admaif_volatile_reg,
+ .reg_defaults = tegra264_admaif_reg_defaults,
+ .num_reg_defaults = TEGRA264_ADMAIF_CHANNEL_COUNT * 6 + 1,
+ .cache_type = REGCACHE_FLAT,
+};
+
static int tegra_admaif_runtime_suspend(struct device *dev)
{
struct tegra_admaif *admaif = dev_get_drvdata(dev);
@@ -330,7 +380,10 @@ static int tegra_admaif_hw_params(struct snd_pcm_substream *substream,
tegra_admaif_set_pack_mode(admaif->regmap, reg, valid_bit);
- tegra_set_cif(admaif->regmap, reg, &cif_conf);
+ if (admaif->soc_data->max_stream_ch == TEGRA264_ADMAIF_MAX_CHANNEL)
+ tegra264_set_cif(admaif->regmap, reg, &cif_conf);
+ else
+ tegra_set_cif(admaif->regmap, reg, &cif_conf);
return 0;
}
@@ -571,13 +624,13 @@ static const struct snd_soc_dai_ops tegra_admaif_dai_ops = {
.prepare = tegra_admaif_prepare,
};
-#define DAI(dai_name) \
+#define DAI(dai_name, channel) \
{ \
.name = dai_name, \
.playback = { \
.stream_name = dai_name " Playback", \
.channels_min = 1, \
- .channels_max = 16, \
+ .channels_max = channel, \
.rates = SNDRV_PCM_RATE_8000_192000, \
.formats = SNDRV_PCM_FMTBIT_S8 | \
SNDRV_PCM_FMTBIT_S16_LE | \
@@ -587,7 +640,7 @@ static const struct snd_soc_dai_ops tegra_admaif_dai_ops = {
.capture = { \
.stream_name = dai_name " Capture", \
.channels_min = 1, \
- .channels_max = 16, \
+ .channels_max = channel, \
.rates = SNDRV_PCM_RATE_8000_192000, \
.formats = SNDRV_PCM_FMTBIT_S8 | \
SNDRV_PCM_FMTBIT_S16_LE | \
@@ -598,39 +651,74 @@ static const struct snd_soc_dai_ops tegra_admaif_dai_ops = {
}
static struct snd_soc_dai_driver tegra210_admaif_cmpnt_dais[] = {
- DAI("ADMAIF1"),
- DAI("ADMAIF2"),
- DAI("ADMAIF3"),
- DAI("ADMAIF4"),
- DAI("ADMAIF5"),
- DAI("ADMAIF6"),
- DAI("ADMAIF7"),
- DAI("ADMAIF8"),
- DAI("ADMAIF9"),
- DAI("ADMAIF10"),
+ DAI("ADMAIF1", TEGRA210_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF2", TEGRA210_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF3", TEGRA210_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF4", TEGRA210_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF5", TEGRA210_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF6", TEGRA210_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF7", TEGRA210_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF8", TEGRA210_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF9", TEGRA210_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF10", TEGRA210_ADMAIF_MAX_CHANNEL),
};
static struct snd_soc_dai_driver tegra186_admaif_cmpnt_dais[] = {
- DAI("ADMAIF1"),
- DAI("ADMAIF2"),
- DAI("ADMAIF3"),
- DAI("ADMAIF4"),
- DAI("ADMAIF5"),
- DAI("ADMAIF6"),
- DAI("ADMAIF7"),
- DAI("ADMAIF8"),
- DAI("ADMAIF9"),
- DAI("ADMAIF10"),
- DAI("ADMAIF11"),
- DAI("ADMAIF12"),
- DAI("ADMAIF13"),
- DAI("ADMAIF14"),
- DAI("ADMAIF15"),
- DAI("ADMAIF16"),
- DAI("ADMAIF17"),
- DAI("ADMAIF18"),
- DAI("ADMAIF19"),
- DAI("ADMAIF20"),
+ DAI("ADMAIF1", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF2", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF3", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF4", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF5", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF6", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF7", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF8", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF9", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF10", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF11", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF12", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF13", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF14", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF15", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF16", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF17", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF18", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF19", TEGRA186_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF20", TEGRA186_ADMAIF_MAX_CHANNEL),
+};
+
+static struct snd_soc_dai_driver tegra264_admaif_cmpnt_dais[] = {
+ DAI("ADMAIF1", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF2", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF3", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF4", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF5", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF6", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF7", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF8", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF9", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF10", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF11", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF12", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF13", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF14", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF15", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF16", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF17", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF18", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF19", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF20", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF21", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF22", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF23", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF24", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF25", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF26", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF27", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF28", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF29", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF30", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF31", TEGRA264_ADMAIF_MAX_CHANNEL),
+ DAI("ADMAIF32", TEGRA264_ADMAIF_MAX_CHANNEL),
};
static const char * const tegra_admaif_stereo_conv_text[] = {
@@ -710,6 +798,41 @@ static struct snd_kcontrol_new tegra186_admaif_controls[] = {
TEGRA_ADMAIF_CIF_CTRL(20),
};
+static struct snd_kcontrol_new tegra264_admaif_controls[] = {
+ TEGRA_ADMAIF_CIF_CTRL(1),
+ TEGRA_ADMAIF_CIF_CTRL(2),
+ TEGRA_ADMAIF_CIF_CTRL(3),
+ TEGRA_ADMAIF_CIF_CTRL(4),
+ TEGRA_ADMAIF_CIF_CTRL(5),
+ TEGRA_ADMAIF_CIF_CTRL(6),
+ TEGRA_ADMAIF_CIF_CTRL(7),
+ TEGRA_ADMAIF_CIF_CTRL(8),
+ TEGRA_ADMAIF_CIF_CTRL(9),
+ TEGRA_ADMAIF_CIF_CTRL(10),
+ TEGRA_ADMAIF_CIF_CTRL(11),
+ TEGRA_ADMAIF_CIF_CTRL(12),
+ TEGRA_ADMAIF_CIF_CTRL(13),
+ TEGRA_ADMAIF_CIF_CTRL(14),
+ TEGRA_ADMAIF_CIF_CTRL(15),
+ TEGRA_ADMAIF_CIF_CTRL(16),
+ TEGRA_ADMAIF_CIF_CTRL(17),
+ TEGRA_ADMAIF_CIF_CTRL(18),
+ TEGRA_ADMAIF_CIF_CTRL(19),
+ TEGRA_ADMAIF_CIF_CTRL(20),
+ TEGRA_ADMAIF_CIF_CTRL(21),
+ TEGRA_ADMAIF_CIF_CTRL(22),
+ TEGRA_ADMAIF_CIF_CTRL(23),
+ TEGRA_ADMAIF_CIF_CTRL(24),
+ TEGRA_ADMAIF_CIF_CTRL(25),
+ TEGRA_ADMAIF_CIF_CTRL(26),
+ TEGRA_ADMAIF_CIF_CTRL(27),
+ TEGRA_ADMAIF_CIF_CTRL(28),
+ TEGRA_ADMAIF_CIF_CTRL(29),
+ TEGRA_ADMAIF_CIF_CTRL(30),
+ TEGRA_ADMAIF_CIF_CTRL(31),
+ TEGRA_ADMAIF_CIF_CTRL(32),
+};
+
static const struct snd_soc_component_driver tegra210_admaif_cmpnt = {
.controls = tegra210_admaif_controls,
.num_controls = ARRAY_SIZE(tegra210_admaif_controls),
@@ -730,8 +853,19 @@ static const struct snd_soc_component_driver tegra186_admaif_cmpnt = {
.pointer = tegra_pcm_pointer,
};
+static const struct snd_soc_component_driver tegra264_admaif_cmpnt = {
+ .controls = tegra264_admaif_controls,
+ .num_controls = ARRAY_SIZE(tegra264_admaif_controls),
+ .pcm_construct = tegra_pcm_construct,
+ .open = tegra_pcm_open,
+ .close = tegra_pcm_close,
+ .hw_params = tegra_pcm_hw_params,
+ .pointer = tegra_pcm_pointer,
+};
+
static const struct tegra_admaif_soc_data soc_data_tegra210 = {
.num_ch = TEGRA210_ADMAIF_CHANNEL_COUNT,
+ .max_stream_ch = TEGRA210_ADMAIF_MAX_CHANNEL,
.cmpnt = &tegra210_admaif_cmpnt,
.dais = tegra210_admaif_cmpnt_dais,
.regmap_conf = &tegra210_admaif_regmap_config,
@@ -742,6 +876,7 @@ static const struct tegra_admaif_soc_data soc_data_tegra210 = {
static const struct tegra_admaif_soc_data soc_data_tegra186 = {
.num_ch = TEGRA186_ADMAIF_CHANNEL_COUNT,
+ .max_stream_ch = TEGRA186_ADMAIF_MAX_CHANNEL,
.cmpnt = &tegra186_admaif_cmpnt,
.dais = tegra186_admaif_cmpnt_dais,
.regmap_conf = &tegra186_admaif_regmap_config,
@@ -750,9 +885,21 @@ static const struct tegra_admaif_soc_data soc_data_tegra186 = {
.rx_base = TEGRA186_ADMAIF_RX_BASE,
};
+static const struct tegra_admaif_soc_data soc_data_tegra264 = {
+ .num_ch = TEGRA264_ADMAIF_CHANNEL_COUNT,
+ .max_stream_ch = TEGRA264_ADMAIF_MAX_CHANNEL,
+ .cmpnt = &tegra264_admaif_cmpnt,
+ .dais = tegra264_admaif_cmpnt_dais,
+ .regmap_conf = &tegra264_admaif_regmap_config,
+ .global_base = TEGRA264_ADMAIF_GLOBAL_BASE,
+ .tx_base = TEGRA264_ADMAIF_TX_BASE,
+ .rx_base = TEGRA264_ADMAIF_RX_BASE,
+};
+
static const struct of_device_id tegra_admaif_of_match[] = {
{ .compatible = "nvidia,tegra210-admaif", .data = &soc_data_tegra210 },
{ .compatible = "nvidia,tegra186-admaif", .data = &soc_data_tegra186 },
+ { .compatible = "nvidia,tegra264-admaif", .data = &soc_data_tegra264 },
{},
};
MODULE_DEVICE_TABLE(of, tegra_admaif_of_match);
diff --git a/sound/soc/tegra/tegra210_admaif.h b/sound/soc/tegra/tegra210_admaif.h
index 748f886ee74e..304d45c76a9a 100644
--- a/sound/soc/tegra/tegra210_admaif.h
+++ b/sound/soc/tegra/tegra210_admaif.h
@@ -16,12 +16,21 @@
#define TEGRA210_ADMAIF_RX_BASE 0x0
#define TEGRA210_ADMAIF_TX_BASE 0x300
#define TEGRA210_ADMAIF_GLOBAL_BASE 0x700
+#define TEGRA210_ADMAIF_MAX_CHANNEL 16
/* Tegra186 specific */
#define TEGRA186_ADMAIF_LAST_REG 0xd5f
#define TEGRA186_ADMAIF_CHANNEL_COUNT 20
#define TEGRA186_ADMAIF_RX_BASE 0x0
#define TEGRA186_ADMAIF_TX_BASE 0x500
#define TEGRA186_ADMAIF_GLOBAL_BASE 0xd00
+#define TEGRA186_ADMAIF_MAX_CHANNEL 16
+/* Tegra264 specific */
+#define TEGRA264_ADMAIF_LAST_REG 0x205f
+#define TEGRA264_ADMAIF_CHANNEL_COUNT 32
+#define TEGRA264_ADMAIF_RX_BASE 0x0
+#define TEGRA264_ADMAIF_TX_BASE 0x1000
+#define TEGRA264_ADMAIF_GLOBAL_BASE 0x2000
+#define TEGRA264_ADMAIF_MAX_CHANNEL 32
/* Global registers */
#define TEGRA_ADMAIF_GLOBAL_ENABLE 0x0
#define TEGRA_ADMAIF_GLOBAL_CG_0 0x8
@@ -66,6 +75,7 @@
#define SW_RESET_MASK 1
#define SW_RESET 1
/* Default values - Tegra210 */
+#define TEGRA210_ADMAIF_CIF_REG_DEFAULT 0x00007700
#define TEGRA210_ADMAIF_RX1_FIFO_CTRL_REG_DEFAULT 0x00000300
#define TEGRA210_ADMAIF_RX2_FIFO_CTRL_REG_DEFAULT 0x00000304
#define TEGRA210_ADMAIF_RX3_FIFO_CTRL_REG_DEFAULT 0x00000208
@@ -87,6 +97,7 @@
#define TEGRA210_ADMAIF_TX9_FIFO_CTRL_REG_DEFAULT 0x0180021a
#define TEGRA210_ADMAIF_TX10_FIFO_CTRL_REG_DEFAULT 0x0180021d
/* Default values - Tegra186 */
+#define TEGRA186_ADMAIF_CIF_REG_DEFAULT 0x00007700
#define TEGRA186_ADMAIF_RX1_FIFO_CTRL_REG_DEFAULT 0x00000300
#define TEGRA186_ADMAIF_RX2_FIFO_CTRL_REG_DEFAULT 0x00000304
#define TEGRA186_ADMAIF_RX3_FIFO_CTRL_REG_DEFAULT 0x00000308
@@ -127,6 +138,72 @@
#define TEGRA186_ADMAIF_TX18_FIFO_CTRL_REG_DEFAULT 0x01800237
#define TEGRA186_ADMAIF_TX19_FIFO_CTRL_REG_DEFAULT 0x0180023a
#define TEGRA186_ADMAIF_TX20_FIFO_CTRL_REG_DEFAULT 0x0180023d
+/* Default values - Tegra264 */
+#define TEGRA264_ADMAIF_CIF_REG_DEFAULT 0x00003f00
+#define TEGRA264_ADMAIF_RX1_FIFO_CTRL_REG_DEFAULT 0x00000200
+#define TEGRA264_ADMAIF_RX2_FIFO_CTRL_REG_DEFAULT 0x00000203
+#define TEGRA264_ADMAIF_RX3_FIFO_CTRL_REG_DEFAULT 0x00000206
+#define TEGRA264_ADMAIF_RX4_FIFO_CTRL_REG_DEFAULT 0x00000209
+#define TEGRA264_ADMAIF_RX5_FIFO_CTRL_REG_DEFAULT 0x0000020c
+#define TEGRA264_ADMAIF_RX6_FIFO_CTRL_REG_DEFAULT 0x0000020f
+#define TEGRA264_ADMAIF_RX7_FIFO_CTRL_REG_DEFAULT 0x00000212
+#define TEGRA264_ADMAIF_RX8_FIFO_CTRL_REG_DEFAULT 0x00000215
+#define TEGRA264_ADMAIF_RX9_FIFO_CTRL_REG_DEFAULT 0x00000218
+#define TEGRA264_ADMAIF_RX10_FIFO_CTRL_REG_DEFAULT 0x0000021b
+#define TEGRA264_ADMAIF_RX11_FIFO_CTRL_REG_DEFAULT 0x0000021e
+#define TEGRA264_ADMAIF_RX12_FIFO_CTRL_REG_DEFAULT 0x00000221
+#define TEGRA264_ADMAIF_RX13_FIFO_CTRL_REG_DEFAULT 0x00000224
+#define TEGRA264_ADMAIF_RX14_FIFO_CTRL_REG_DEFAULT 0x00000227
+#define TEGRA264_ADMAIF_RX15_FIFO_CTRL_REG_DEFAULT 0x0000022a
+#define TEGRA264_ADMAIF_RX16_FIFO_CTRL_REG_DEFAULT 0x0000022d
+#define TEGRA264_ADMAIF_RX17_FIFO_CTRL_REG_DEFAULT 0x00000230
+#define TEGRA264_ADMAIF_RX18_FIFO_CTRL_REG_DEFAULT 0x00000233
+#define TEGRA264_ADMAIF_RX19_FIFO_CTRL_REG_DEFAULT 0x00000236
+#define TEGRA264_ADMAIF_RX20_FIFO_CTRL_REG_DEFAULT 0x00000239
+#define TEGRA264_ADMAIF_RX21_FIFO_CTRL_REG_DEFAULT 0x0000023c
+#define TEGRA264_ADMAIF_RX22_FIFO_CTRL_REG_DEFAULT 0x0000023f
+#define TEGRA264_ADMAIF_RX23_FIFO_CTRL_REG_DEFAULT 0x00000242
+#define TEGRA264_ADMAIF_RX24_FIFO_CTRL_REG_DEFAULT 0x00000245
+#define TEGRA264_ADMAIF_RX25_FIFO_CTRL_REG_DEFAULT 0x00000248
+#define TEGRA264_ADMAIF_RX26_FIFO_CTRL_REG_DEFAULT 0x0000024b
+#define TEGRA264_ADMAIF_RX27_FIFO_CTRL_REG_DEFAULT 0x0000024e
+#define TEGRA264_ADMAIF_RX28_FIFO_CTRL_REG_DEFAULT 0x00000251
+#define TEGRA264_ADMAIF_RX29_FIFO_CTRL_REG_DEFAULT 0x00000254
+#define TEGRA264_ADMAIF_RX30_FIFO_CTRL_REG_DEFAULT 0x00000257
+#define TEGRA264_ADMAIF_RX31_FIFO_CTRL_REG_DEFAULT 0x0000025a
+#define TEGRA264_ADMAIF_RX32_FIFO_CTRL_REG_DEFAULT 0x0000025d
+#define TEGRA264_ADMAIF_TX1_FIFO_CTRL_REG_DEFAULT 0x01800200
+#define TEGRA264_ADMAIF_TX2_FIFO_CTRL_REG_DEFAULT 0x01800203
+#define TEGRA264_ADMAIF_TX3_FIFO_CTRL_REG_DEFAULT 0x01800206
+#define TEGRA264_ADMAIF_TX4_FIFO_CTRL_REG_DEFAULT 0x01800209
+#define TEGRA264_ADMAIF_TX5_FIFO_CTRL_REG_DEFAULT 0x0180020c
+#define TEGRA264_ADMAIF_TX6_FIFO_CTRL_REG_DEFAULT 0x0180020f
+#define TEGRA264_ADMAIF_TX7_FIFO_CTRL_REG_DEFAULT 0x01800212
+#define TEGRA264_ADMAIF_TX8_FIFO_CTRL_REG_DEFAULT 0x01800215
+#define TEGRA264_ADMAIF_TX9_FIFO_CTRL_REG_DEFAULT 0x01800218
+#define TEGRA264_ADMAIF_TX10_FIFO_CTRL_REG_DEFAULT 0x0180021b
+#define TEGRA264_ADMAIF_TX11_FIFO_CTRL_REG_DEFAULT 0x0180021e
+#define TEGRA264_ADMAIF_TX12_FIFO_CTRL_REG_DEFAULT 0x01800221
+#define TEGRA264_ADMAIF_TX13_FIFO_CTRL_REG_DEFAULT 0x01800224
+#define TEGRA264_ADMAIF_TX14_FIFO_CTRL_REG_DEFAULT 0x01800227
+#define TEGRA264_ADMAIF_TX15_FIFO_CTRL_REG_DEFAULT 0x0180022a
+#define TEGRA264_ADMAIF_TX16_FIFO_CTRL_REG_DEFAULT 0x0180022d
+#define TEGRA264_ADMAIF_TX17_FIFO_CTRL_REG_DEFAULT 0x01800230
+#define TEGRA264_ADMAIF_TX18_FIFO_CTRL_REG_DEFAULT 0x01800233
+#define TEGRA264_ADMAIF_TX19_FIFO_CTRL_REG_DEFAULT 0x01800236
+#define TEGRA264_ADMAIF_TX20_FIFO_CTRL_REG_DEFAULT 0x01800239
+#define TEGRA264_ADMAIF_TX21_FIFO_CTRL_REG_DEFAULT 0x0180023c
+#define TEGRA264_ADMAIF_TX22_FIFO_CTRL_REG_DEFAULT 0x0180023f
+#define TEGRA264_ADMAIF_TX23_FIFO_CTRL_REG_DEFAULT 0x01800242
+#define TEGRA264_ADMAIF_TX24_FIFO_CTRL_REG_DEFAULT 0x01800245
+#define TEGRA264_ADMAIF_TX25_FIFO_CTRL_REG_DEFAULT 0x01800248
+#define TEGRA264_ADMAIF_TX26_FIFO_CTRL_REG_DEFAULT 0x0180024b
+#define TEGRA264_ADMAIF_TX27_FIFO_CTRL_REG_DEFAULT 0x0180024e
+#define TEGRA264_ADMAIF_TX28_FIFO_CTRL_REG_DEFAULT 0x01800251
+#define TEGRA264_ADMAIF_TX29_FIFO_CTRL_REG_DEFAULT 0x01800254
+#define TEGRA264_ADMAIF_TX30_FIFO_CTRL_REG_DEFAULT 0x01800257
+#define TEGRA264_ADMAIF_TX31_FIFO_CTRL_REG_DEFAULT 0x0180025a
+#define TEGRA264_ADMAIF_TX32_FIFO_CTRL_REG_DEFAULT 0x0180025d
enum {
DATA_8BIT,
@@ -148,6 +225,7 @@ struct tegra_admaif_soc_data {
unsigned int tx_base;
unsigned int rx_base;
unsigned int num_ch;
+ unsigned int max_stream_ch;
};
struct tegra_admaif {
diff --git a/sound/soc/tegra/tegra210_adx.c b/sound/soc/tegra/tegra210_adx.c
index b6c798baedea..ad7cd8655047 100644
--- a/sound/soc/tegra/tegra210_adx.c
+++ b/sound/soc/tegra/tegra210_adx.c
@@ -9,6 +9,7 @@
#include <linux/io.h>
#include <linux/mod_devicetable.h>
#include <linux/module.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/regmap.h>
@@ -32,21 +33,37 @@ static const struct reg_default tegra210_adx_reg_defaults[] = {
{ TEGRA210_ADX_CFG_RAM_CTRL, 0x00004000},
};
+static const struct reg_default tegra264_adx_reg_defaults[] = {
+ { TEGRA210_ADX_RX_INT_MASK, 0x00000001},
+ { TEGRA210_ADX_RX_CIF_CTRL, 0x00003800},
+ { TEGRA210_ADX_TX_INT_MASK, 0x0000000f },
+ { TEGRA210_ADX_TX1_CIF_CTRL, 0x00003800},
+ { TEGRA210_ADX_TX2_CIF_CTRL, 0x00003800},
+ { TEGRA210_ADX_TX3_CIF_CTRL, 0x00003800},
+ { TEGRA210_ADX_TX4_CIF_CTRL, 0x00003800},
+ { TEGRA210_ADX_CG, 0x1},
+ { TEGRA264_ADX_CFG_RAM_CTRL, 0x00004000},
+};
+
static void tegra210_adx_write_map_ram(struct tegra210_adx *adx)
{
int i;
- regmap_write(adx->regmap, TEGRA210_ADX_CFG_RAM_CTRL,
+ regmap_write(adx->regmap, TEGRA210_ADX_CFG_RAM_CTRL +
+ adx->soc_data->cya_offset,
TEGRA210_ADX_CFG_RAM_CTRL_SEQ_ACCESS_EN |
TEGRA210_ADX_CFG_RAM_CTRL_ADDR_INIT_EN |
TEGRA210_ADX_CFG_RAM_CTRL_RW_WRITE);
- for (i = 0; i < TEGRA210_ADX_RAM_DEPTH; i++)
- regmap_write(adx->regmap, TEGRA210_ADX_CFG_RAM_DATA,
+ for (i = 0; i < adx->soc_data->ram_depth; i++)
+ regmap_write(adx->regmap, TEGRA210_ADX_CFG_RAM_DATA +
+ adx->soc_data->cya_offset,
adx->map[i]);
- regmap_write(adx->regmap, TEGRA210_ADX_IN_BYTE_EN0, adx->byte_mask[0]);
- regmap_write(adx->regmap, TEGRA210_ADX_IN_BYTE_EN1, adx->byte_mask[1]);
+ for (i = 0; i < adx->soc_data->byte_mask_size; i++)
+ regmap_write(adx->regmap,
+ TEGRA210_ADX_IN_BYTE_EN0 + (i * TEGRA210_ADX_AUDIOCIF_CH_STRIDE),
+ adx->byte_mask[i]);
}
static int tegra210_adx_startup(struct snd_pcm_substream *substream,
@@ -117,7 +134,7 @@ static int tegra210_adx_set_audio_cif(struct snd_soc_dai *dai,
memset(&cif_conf, 0, sizeof(struct tegra_cif_conf));
- if (channels < 1 || channels > 16)
+ if (channels < 1 || channels > adx->soc_data->max_ch)
return -EINVAL;
switch (format) {
@@ -140,7 +157,10 @@ static int tegra210_adx_set_audio_cif(struct snd_soc_dai *dai,
cif_conf.audio_bits = audio_bits;
cif_conf.client_bits = audio_bits;
- tegra_set_cif(adx->regmap, reg, &cif_conf);
+ if (adx->soc_data->max_ch == 32)
+ tegra264_set_cif(adx->regmap, reg, &cif_conf);
+ else
+ tegra_set_cif(adx->regmap, reg, &cif_conf);
return 0;
}
@@ -169,7 +189,7 @@ static int tegra210_adx_get_byte_map(struct snd_kcontrol *kcontrol,
struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
struct tegra210_adx *adx = snd_soc_component_get_drvdata(cmpnt);
struct soc_mixer_control *mc;
- unsigned char *bytes_map = (unsigned char *)&adx->map;
+ unsigned char *bytes_map = (unsigned char *)adx->map;
int enabled;
mc = (struct soc_mixer_control *)kcontrol->private_value;
@@ -198,7 +218,7 @@ static int tegra210_adx_put_byte_map(struct snd_kcontrol *kcontrol,
{
struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
struct tegra210_adx *adx = snd_soc_component_get_drvdata(cmpnt);
- unsigned char *bytes_map = (unsigned char *)&adx->map;
+ unsigned char *bytes_map = (unsigned char *)adx->map;
int value = ucontrol->value.integer.value[0];
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
@@ -402,7 +422,90 @@ static struct snd_kcontrol_new tegra210_adx_controls[] = {
TEGRA210_ADX_BYTE_MAP_CTRL(63),
};
+static struct snd_kcontrol_new tegra264_adx_controls[] = {
+ TEGRA210_ADX_BYTE_MAP_CTRL(64),
+ TEGRA210_ADX_BYTE_MAP_CTRL(65),
+ TEGRA210_ADX_BYTE_MAP_CTRL(66),
+ TEGRA210_ADX_BYTE_MAP_CTRL(67),
+ TEGRA210_ADX_BYTE_MAP_CTRL(68),
+ TEGRA210_ADX_BYTE_MAP_CTRL(69),
+ TEGRA210_ADX_BYTE_MAP_CTRL(70),
+ TEGRA210_ADX_BYTE_MAP_CTRL(71),
+ TEGRA210_ADX_BYTE_MAP_CTRL(72),
+ TEGRA210_ADX_BYTE_MAP_CTRL(73),
+ TEGRA210_ADX_BYTE_MAP_CTRL(74),
+ TEGRA210_ADX_BYTE_MAP_CTRL(75),
+ TEGRA210_ADX_BYTE_MAP_CTRL(76),
+ TEGRA210_ADX_BYTE_MAP_CTRL(77),
+ TEGRA210_ADX_BYTE_MAP_CTRL(78),
+ TEGRA210_ADX_BYTE_MAP_CTRL(79),
+ TEGRA210_ADX_BYTE_MAP_CTRL(80),
+ TEGRA210_ADX_BYTE_MAP_CTRL(81),
+ TEGRA210_ADX_BYTE_MAP_CTRL(82),
+ TEGRA210_ADX_BYTE_MAP_CTRL(83),
+ TEGRA210_ADX_BYTE_MAP_CTRL(84),
+ TEGRA210_ADX_BYTE_MAP_CTRL(85),
+ TEGRA210_ADX_BYTE_MAP_CTRL(86),
+ TEGRA210_ADX_BYTE_MAP_CTRL(87),
+ TEGRA210_ADX_BYTE_MAP_CTRL(88),
+ TEGRA210_ADX_BYTE_MAP_CTRL(89),
+ TEGRA210_ADX_BYTE_MAP_CTRL(90),
+ TEGRA210_ADX_BYTE_MAP_CTRL(91),
+ TEGRA210_ADX_BYTE_MAP_CTRL(92),
+ TEGRA210_ADX_BYTE_MAP_CTRL(93),
+ TEGRA210_ADX_BYTE_MAP_CTRL(94),
+ TEGRA210_ADX_BYTE_MAP_CTRL(95),
+ TEGRA210_ADX_BYTE_MAP_CTRL(96),
+ TEGRA210_ADX_BYTE_MAP_CTRL(97),
+ TEGRA210_ADX_BYTE_MAP_CTRL(98),
+ TEGRA210_ADX_BYTE_MAP_CTRL(99),
+ TEGRA210_ADX_BYTE_MAP_CTRL(100),
+ TEGRA210_ADX_BYTE_MAP_CTRL(101),
+ TEGRA210_ADX_BYTE_MAP_CTRL(102),
+ TEGRA210_ADX_BYTE_MAP_CTRL(103),
+ TEGRA210_ADX_BYTE_MAP_CTRL(104),
+ TEGRA210_ADX_BYTE_MAP_CTRL(105),
+ TEGRA210_ADX_BYTE_MAP_CTRL(106),
+ TEGRA210_ADX_BYTE_MAP_CTRL(107),
+ TEGRA210_ADX_BYTE_MAP_CTRL(108),
+ TEGRA210_ADX_BYTE_MAP_CTRL(109),
+ TEGRA210_ADX_BYTE_MAP_CTRL(110),
+ TEGRA210_ADX_BYTE_MAP_CTRL(111),
+ TEGRA210_ADX_BYTE_MAP_CTRL(112),
+ TEGRA210_ADX_BYTE_MAP_CTRL(113),
+ TEGRA210_ADX_BYTE_MAP_CTRL(114),
+ TEGRA210_ADX_BYTE_MAP_CTRL(115),
+ TEGRA210_ADX_BYTE_MAP_CTRL(116),
+ TEGRA210_ADX_BYTE_MAP_CTRL(117),
+ TEGRA210_ADX_BYTE_MAP_CTRL(118),
+ TEGRA210_ADX_BYTE_MAP_CTRL(119),
+ TEGRA210_ADX_BYTE_MAP_CTRL(120),
+ TEGRA210_ADX_BYTE_MAP_CTRL(121),
+ TEGRA210_ADX_BYTE_MAP_CTRL(122),
+ TEGRA210_ADX_BYTE_MAP_CTRL(123),
+ TEGRA210_ADX_BYTE_MAP_CTRL(124),
+ TEGRA210_ADX_BYTE_MAP_CTRL(125),
+ TEGRA210_ADX_BYTE_MAP_CTRL(126),
+ TEGRA210_ADX_BYTE_MAP_CTRL(127),
+};
+
+static int tegra210_adx_component_probe(struct snd_soc_component *component)
+{
+ struct tegra210_adx *adx = snd_soc_component_get_drvdata(component);
+ int err = 0;
+
+ if (adx->soc_data->num_controls) {
+ err = snd_soc_add_component_controls(component, adx->soc_data->controls,
+ adx->soc_data->num_controls);
+ if (err)
+ dev_err(component->dev, "can't add ADX controls, err: %d\n", err);
+ }
+
+ return err;
+}
+
static const struct snd_soc_component_driver tegra210_adx_cmpnt = {
+ .probe = tegra210_adx_component_probe,
.dapm_widgets = tegra210_adx_widgets,
.num_dapm_widgets = ARRAY_SIZE(tegra210_adx_widgets),
.dapm_routes = tegra210_adx_routes,
@@ -460,6 +563,58 @@ static bool tegra210_adx_volatile_reg(struct device *dev,
return false;
}
+static bool tegra264_adx_wr_reg(struct device *dev,
+ unsigned int reg)
+{
+ switch (reg) {
+ case TEGRA210_ADX_TX_INT_MASK ... TEGRA210_ADX_TX4_CIF_CTRL:
+ case TEGRA210_ADX_RX_INT_MASK ... TEGRA210_ADX_RX_CIF_CTRL:
+ case TEGRA210_ADX_ENABLE ... TEGRA210_ADX_CG:
+ case TEGRA210_ADX_CTRL ... TEGRA264_ADX_CYA:
+ case TEGRA264_ADX_CFG_RAM_CTRL ... TEGRA264_ADX_CFG_RAM_DATA:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool tegra264_adx_rd_reg(struct device *dev,
+ unsigned int reg)
+{
+ switch (reg) {
+ case TEGRA210_ADX_RX_STATUS ... TEGRA210_ADX_RX_CIF_CTRL:
+ case TEGRA210_ADX_TX_STATUS ... TEGRA210_ADX_TX4_CIF_CTRL:
+ case TEGRA210_ADX_ENABLE ... TEGRA210_ADX_INT_STATUS:
+ case TEGRA210_ADX_CTRL ... TEGRA264_ADX_CFG_RAM_DATA:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool tegra264_adx_volatile_reg(struct device *dev,
+ unsigned int reg)
+{
+ switch (reg) {
+ case TEGRA210_ADX_RX_STATUS:
+ case TEGRA210_ADX_RX_INT_STATUS:
+ case TEGRA210_ADX_RX_INT_SET:
+ case TEGRA210_ADX_TX_STATUS:
+ case TEGRA210_ADX_TX_INT_STATUS:
+ case TEGRA210_ADX_TX_INT_SET:
+ case TEGRA210_ADX_SOFT_RESET:
+ case TEGRA210_ADX_STATUS:
+ case TEGRA210_ADX_INT_STATUS:
+ case TEGRA264_ADX_CFG_RAM_CTRL:
+ case TEGRA264_ADX_CFG_RAM_DATA:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
static const struct regmap_config tegra210_adx_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
@@ -473,8 +628,40 @@ static const struct regmap_config tegra210_adx_regmap_config = {
.cache_type = REGCACHE_FLAT,
};
+static const struct regmap_config tegra264_adx_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = TEGRA264_ADX_CFG_RAM_DATA,
+ .writeable_reg = tegra264_adx_wr_reg,
+ .readable_reg = tegra264_adx_rd_reg,
+ .volatile_reg = tegra264_adx_volatile_reg,
+ .reg_defaults = tegra264_adx_reg_defaults,
+ .num_reg_defaults = ARRAY_SIZE(tegra264_adx_reg_defaults),
+ .cache_type = REGCACHE_FLAT,
+};
+
+static const struct tegra210_adx_soc_data soc_data_tegra210 = {
+ .regmap_conf = &tegra210_adx_regmap_config,
+ .max_ch = TEGRA210_ADX_MAX_CHANNEL,
+ .ram_depth = TEGRA210_ADX_RAM_DEPTH,
+ .byte_mask_size = TEGRA210_ADX_BYTE_MASK_COUNT,
+ .cya_offset = TEGRA210_ADX_CYA_OFFSET,
+};
+
+static const struct tegra210_adx_soc_data soc_data_tegra264 = {
+ .regmap_conf = &tegra264_adx_regmap_config,
+ .max_ch = TEGRA264_ADX_MAX_CHANNEL,
+ .ram_depth = TEGRA264_ADX_RAM_DEPTH,
+ .byte_mask_size = TEGRA264_ADX_BYTE_MASK_COUNT,
+ .cya_offset = TEGRA264_ADX_CYA_OFFSET,
+ .controls = tegra264_adx_controls,
+ .num_controls = ARRAY_SIZE(tegra264_adx_controls),
+};
+
static const struct of_device_id tegra210_adx_of_match[] = {
- { .compatible = "nvidia,tegra210-adx" },
+ { .compatible = "nvidia,tegra210-adx", .data = &soc_data_tegra210 },
+ { .compatible = "nvidia,tegra264-adx", .data = &soc_data_tegra264 },
{},
};
MODULE_DEVICE_TABLE(of, tegra210_adx_of_match);
@@ -483,6 +670,8 @@ static int tegra210_adx_platform_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct tegra210_adx *adx;
+ const struct of_device_id *match;
+ struct tegra210_adx_soc_data *soc_data;
void __iomem *regs;
int err;
@@ -490,6 +679,10 @@ static int tegra210_adx_platform_probe(struct platform_device *pdev)
if (!adx)
return -ENOMEM;
+ match = of_match_device(tegra210_adx_of_match, dev);
+ soc_data = (struct tegra210_adx_soc_data *)match->data;
+ adx->soc_data = soc_data;
+
dev_set_drvdata(dev, adx);
regs = devm_platform_ioremap_resource(pdev, 0);
@@ -497,7 +690,7 @@ static int tegra210_adx_platform_probe(struct platform_device *pdev)
return PTR_ERR(regs);
adx->regmap = devm_regmap_init_mmio(dev, regs,
- &tegra210_adx_regmap_config);
+ soc_data->regmap_conf);
if (IS_ERR(adx->regmap)) {
dev_err(dev, "regmap init failed\n");
return PTR_ERR(adx->regmap);
@@ -505,6 +698,20 @@ static int tegra210_adx_platform_probe(struct platform_device *pdev)
regcache_cache_only(adx->regmap, true);
+ adx->map = devm_kzalloc(dev, soc_data->ram_depth * sizeof(*adx->map),
+ GFP_KERNEL);
+ if (!adx->map)
+ return -ENOMEM;
+
+ adx->byte_mask = devm_kzalloc(dev,
+ soc_data->byte_mask_size * sizeof(*adx->byte_mask),
+ GFP_KERNEL);
+ if (!adx->byte_mask)
+ return -ENOMEM;
+
+ tegra210_adx_dais[TEGRA_ADX_IN_DAI_ID].playback.channels_max =
+ adx->soc_data->max_ch;
+
err = devm_snd_soc_register_component(dev, &tegra210_adx_cmpnt,
tegra210_adx_dais,
ARRAY_SIZE(tegra210_adx_dais));
diff --git a/sound/soc/tegra/tegra210_adx.h b/sound/soc/tegra/tegra210_adx.h
index d7dcb6497978..176a4e40de0a 100644
--- a/sound/soc/tegra/tegra210_adx.h
+++ b/sound/soc/tegra/tegra210_adx.h
@@ -1,8 +1,7 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tegra210_adx.h - Definitions for Tegra210 ADX driver
+/* SPDX-License-Identifier: GPL-2.0-only
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION. All rights reserved.
*
- * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+ * tegra210_adx.h - Definitions for Tegra210 ADX driver
*
*/
@@ -36,6 +35,10 @@
#define TEGRA210_ADX_CFG_RAM_CTRL 0xb8
#define TEGRA210_ADX_CFG_RAM_DATA 0xbc
+#define TEGRA264_ADX_CYA 0xb8
+#define TEGRA264_ADX_CFG_RAM_CTRL 0xc0
+#define TEGRA264_ADX_CFG_RAM_DATA 0xc4
+
/* Fields in TEGRA210_ADX_ENABLE */
#define TEGRA210_ADX_ENABLE_SHIFT 0
@@ -62,11 +65,32 @@
#define TEGRA210_ADX_MAP_STREAM_NUMBER_SHIFT 6
#define TEGRA210_ADX_MAP_WORD_NUMBER_SHIFT 2
#define TEGRA210_ADX_MAP_BYTE_NUMBER_SHIFT 0
+#define TEGRA210_ADX_BYTE_MASK_COUNT 2
+#define TEGRA210_ADX_MAX_CHANNEL 16
+#define TEGRA210_ADX_CYA_OFFSET 0
+
+#define TEGRA264_ADX_RAM_DEPTH 32
+#define TEGRA264_ADX_BYTE_MASK_COUNT 4
+#define TEGRA264_ADX_MAX_CHANNEL 32
+#define TEGRA264_ADX_CYA_OFFSET 8
+
+#define TEGRA_ADX_IN_DAI_ID 4
+
+struct tegra210_adx_soc_data {
+ const struct regmap_config *regmap_conf;
+ const struct snd_kcontrol_new *controls;
+ unsigned int num_controls;
+ unsigned int max_ch;
+ unsigned int ram_depth;
+ unsigned int byte_mask_size;
+ unsigned int cya_offset;
+};
struct tegra210_adx {
struct regmap *regmap;
- unsigned int map[TEGRA210_ADX_RAM_DEPTH];
- unsigned int byte_mask[2];
+ unsigned int *map;
+ unsigned int *byte_mask;
+ const struct tegra210_adx_soc_data *soc_data;
};
#endif
diff --git a/sound/soc/tegra/tegra210_ahub.c b/sound/soc/tegra/tegra210_ahub.c
index 99683f292b5d..2376cc76e684 100644
--- a/sound/soc/tegra/tegra210_ahub.c
+++ b/sound/soc/tegra/tegra210_ahub.c
@@ -2,7 +2,7 @@
//
// tegra210_ahub.c - Tegra210 AHUB driver
//
-// Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
#include <linux/clk.h>
#include <linux/device.h>
@@ -29,7 +29,7 @@ static int tegra_ahub_get_value_enum(struct snd_kcontrol *kctl,
for (i = 0; i < ahub->soc_data->reg_count; i++) {
unsigned int reg_val;
- reg = e->reg + (TEGRA210_XBAR_PART1_RX * i);
+ reg = e->reg + (ahub->soc_data->xbar_part_size * i);
reg_val = snd_soc_component_read(cmpnt, reg);
reg_val &= ahub->soc_data->mask[i];
@@ -80,7 +80,7 @@ static int tegra_ahub_put_value_enum(struct snd_kcontrol *kctl,
* different part of the MUX register.
*/
for (i = 0; i < ahub->soc_data->reg_count; i++) {
- update[i].reg = e->reg + (TEGRA210_XBAR_PART1_RX * i);
+ update[i].reg = e->reg + (ahub->soc_data->xbar_part_size * i);
update[i].val = (i == reg_idx) ? reg_val : 0;
update[i].mask = ahub->soc_data->mask[i];
update[i].kcontrol = kctl;
@@ -304,6 +304,164 @@ static struct snd_soc_dai_driver tegra186_ahub_dais[] = {
DAI(OPE1 TX),
};
+static struct snd_soc_dai_driver tegra264_ahub_dais[] = {
+ DAI(ADMAIF1),
+ DAI(ADMAIF2),
+ DAI(ADMAIF3),
+ DAI(ADMAIF4),
+ DAI(ADMAIF5),
+ DAI(ADMAIF6),
+ DAI(ADMAIF7),
+ DAI(ADMAIF8),
+ DAI(ADMAIF9),
+ DAI(ADMAIF10),
+ DAI(ADMAIF11),
+ DAI(ADMAIF12),
+ DAI(ADMAIF13),
+ DAI(ADMAIF14),
+ DAI(ADMAIF15),
+ DAI(ADMAIF16),
+ DAI(ADMAIF17),
+ DAI(ADMAIF18),
+ DAI(ADMAIF19),
+ DAI(ADMAIF20),
+ DAI(ADMAIF21),
+ DAI(ADMAIF22),
+ DAI(ADMAIF23),
+ DAI(ADMAIF24),
+ DAI(ADMAIF25),
+ DAI(ADMAIF26),
+ DAI(ADMAIF27),
+ DAI(ADMAIF28),
+ DAI(ADMAIF29),
+ DAI(ADMAIF30),
+ DAI(ADMAIF31),
+ DAI(ADMAIF32),
+ /* XBAR <-> I2S <-> Codec */
+ DAI(I2S1),
+ DAI(I2S2),
+ DAI(I2S3),
+ DAI(I2S4),
+ DAI(I2S5),
+ DAI(I2S6),
+ DAI(I2S7),
+ DAI(I2S8),
+ /* XBAR <-> DMIC <-> Codec */
+ DAI(DMIC1),
+ DAI(DMIC2),
+ /* XBAR <-> DSPK <-> Codec */
+ DAI(DSPK1),
+ /* XBAR -> SFC -> XBAR */
+ DAI(SFC1 RX),
+ DAI(SFC1 TX),
+ DAI(SFC2 RX),
+ DAI(SFC2 TX),
+ DAI(SFC3 RX),
+ DAI(SFC3 TX),
+ DAI(SFC4 RX),
+ DAI(SFC4 TX),
+ /* XBAR -> MVC -> XBAR */
+ DAI(MVC1 RX),
+ DAI(MVC1 TX),
+ DAI(MVC2 RX),
+ DAI(MVC2 TX),
+ /* XBAR -> AMX(4:1) -> XBAR */
+ DAI(AMX1 RX1),
+ DAI(AMX1 RX2),
+ DAI(AMX1 RX3),
+ DAI(AMX1 RX4),
+ DAI(AMX1),
+ DAI(AMX2 RX1),
+ DAI(AMX2 RX2),
+ DAI(AMX2 RX3),
+ DAI(AMX2 RX4),
+ DAI(AMX2),
+ DAI(AMX3 RX1),
+ DAI(AMX3 RX2),
+ DAI(AMX3 RX3),
+ DAI(AMX3 RX4),
+ DAI(AMX3),
+ DAI(AMX4 RX1),
+ DAI(AMX4 RX2),
+ DAI(AMX4 RX3),
+ DAI(AMX4 RX4),
+ DAI(AMX4),
+ DAI(AMX5 RX1),
+ DAI(AMX5 RX2),
+ DAI(AMX5 RX3),
+ DAI(AMX5 RX4),
+ DAI(AMX5),
+ DAI(AMX6 RX1),
+ DAI(AMX6 RX2),
+ DAI(AMX6 RX3),
+ DAI(AMX6 RX4),
+ DAI(AMX6),
+ /* XBAR -> ADX(1:4) -> XBAR */
+ DAI(ADX1),
+ DAI(ADX1 TX1),
+ DAI(ADX1 TX2),
+ DAI(ADX1 TX3),
+ DAI(ADX1 TX4),
+ DAI(ADX2),
+ DAI(ADX2 TX1),
+ DAI(ADX2 TX2),
+ DAI(ADX2 TX3),
+ DAI(ADX2 TX4),
+ DAI(ADX3),
+ DAI(ADX3 TX1),
+ DAI(ADX3 TX2),
+ DAI(ADX3 TX3),
+ DAI(ADX3 TX4),
+ DAI(ADX4),
+ DAI(ADX4 TX1),
+ DAI(ADX4 TX2),
+ DAI(ADX4 TX3),
+ DAI(ADX4 TX4),
+ DAI(ADX5),
+ DAI(ADX5 TX1),
+ DAI(ADX5 TX2),
+ DAI(ADX5 TX3),
+ DAI(ADX5 TX4),
+ DAI(ADX6),
+ DAI(ADX6 TX1),
+ DAI(ADX6 TX2),
+ DAI(ADX6 TX3),
+ DAI(ADX6 TX4),
+ /* XBAR -> MIXER1(10:5) -> XBAR */
+ DAI(MIXER1 RX1),
+ DAI(MIXER1 RX2),
+ DAI(MIXER1 RX3),
+ DAI(MIXER1 RX4),
+ DAI(MIXER1 RX5),
+ DAI(MIXER1 RX6),
+ DAI(MIXER1 RX7),
+ DAI(MIXER1 RX8),
+ DAI(MIXER1 RX9),
+ DAI(MIXER1 RX10),
+ DAI(MIXER1 TX1),
+ DAI(MIXER1 TX2),
+ DAI(MIXER1 TX3),
+ DAI(MIXER1 TX4),
+ DAI(MIXER1 TX5),
+ /* XBAR -> ASRC -> XBAR */
+ DAI(ASRC1 RX1),
+ DAI(ASRC1 TX1),
+ DAI(ASRC1 RX2),
+ DAI(ASRC1 TX2),
+ DAI(ASRC1 RX3),
+ DAI(ASRC1 TX3),
+ DAI(ASRC1 RX4),
+ DAI(ASRC1 TX4),
+ DAI(ASRC1 RX5),
+ DAI(ASRC1 TX5),
+ DAI(ASRC1 RX6),
+ DAI(ASRC1 TX6),
+ DAI(ASRC1 RX7),
+ /* XBAR -> OPE -> XBAR */
+ DAI(OPE1 RX),
+ DAI(OPE1 TX),
+};
+
static const char * const tegra210_ahub_mux_texts[] = {
"None",
"ADMAIF1",
@@ -421,6 +579,100 @@ static const char * const tegra186_ahub_mux_texts[] = {
"OPE1",
};
+static const char * const tegra264_ahub_mux_texts[] = {
+ "None",
+ "ADMAIF1",
+ "ADMAIF2",
+ "ADMAIF3",
+ "ADMAIF4",
+ "ADMAIF5",
+ "ADMAIF6",
+ "ADMAIF7",
+ "ADMAIF8",
+ "ADMAIF9",
+ "ADMAIF10",
+ "ADMAIF11",
+ "ADMAIF12",
+ "ADMAIF13",
+ "ADMAIF14",
+ "ADMAIF15",
+ "ADMAIF16",
+ "I2S1",
+ "I2S2",
+ "I2S3",
+ "I2S4",
+ "I2S5",
+ "I2S6",
+ "I2S7",
+ "I2S8",
+ "SFC1",
+ "SFC2",
+ "SFC3",
+ "SFC4",
+ "MIXER1 TX1",
+ "MIXER1 TX2",
+ "MIXER1 TX3",
+ "MIXER1 TX4",
+ "MIXER1 TX5",
+ "AMX1",
+ "AMX2",
+ "AMX3",
+ "AMX4",
+ "AMX5",
+ "AMX6",
+ "OPE1",
+ "MVC1",
+ "MVC2",
+ "DMIC1",
+ "DMIC2",
+ "ADX1 TX1",
+ "ADX1 TX2",
+ "ADX1 TX3",
+ "ADX1 TX4",
+ "ADX2 TX1",
+ "ADX2 TX2",
+ "ADX2 TX3",
+ "ADX2 TX4",
+ "ADX3 TX1",
+ "ADX3 TX2",
+ "ADX3 TX3",
+ "ADX3 TX4",
+ "ADX4 TX1",
+ "ADX4 TX2",
+ "ADX4 TX3",
+ "ADX4 TX4",
+ "ADX5 TX1",
+ "ADX5 TX2",
+ "ADX5 TX3",
+ "ADX5 TX4",
+ "ADX6 TX1",
+ "ADX6 TX2",
+ "ADX6 TX3",
+ "ADX6 TX4",
+ "ASRC1 TX1",
+ "ASRC1 TX2",
+ "ASRC1 TX3",
+ "ASRC1 TX4",
+ "ASRC1 TX5",
+ "ASRC1 TX6",
+ "ADMAIF17",
+ "ADMAIF18",
+ "ADMAIF19",
+ "ADMAIF20",
+ "ADMAIF21",
+ "ADMAIF22",
+ "ADMAIF23",
+ "ADMAIF24",
+ "ADMAIF25",
+ "ADMAIF26",
+ "ADMAIF27",
+ "ADMAIF28",
+ "ADMAIF29",
+ "ADMAIF30",
+ "ADMAIF31",
+ "ADMAIF32",
+};
+
static const unsigned int tegra210_ahub_mux_values[] = {
0,
/* ADMAIF */
@@ -558,6 +810,111 @@ static const unsigned int tegra186_ahub_mux_values[] = {
MUX_VALUE(2, 0),
};
+static const unsigned int tegra264_ahub_mux_values[] = {
+ 0,
+ /* ADMAIF */
+ MUX_VALUE(0, 0),
+ MUX_VALUE(0, 1),
+ MUX_VALUE(0, 2),
+ MUX_VALUE(0, 3),
+ MUX_VALUE(0, 4),
+ MUX_VALUE(0, 5),
+ MUX_VALUE(0, 6),
+ MUX_VALUE(0, 7),
+ MUX_VALUE(0, 8),
+ MUX_VALUE(0, 9),
+ MUX_VALUE(0, 10),
+ MUX_VALUE(0, 11),
+ MUX_VALUE(0, 12),
+ MUX_VALUE(0, 13),
+ MUX_VALUE(0, 14),
+ MUX_VALUE(0, 15),
+ /* I2S */
+ MUX_VALUE(0, 16),
+ MUX_VALUE(0, 17),
+ MUX_VALUE(0, 18),
+ MUX_VALUE(0, 19),
+ MUX_VALUE(0, 20),
+ MUX_VALUE(0, 21),
+ MUX_VALUE(0, 22),
+ MUX_VALUE(0, 23),
+ /* SFC */
+ MUX_VALUE(0, 24),
+ MUX_VALUE(0, 25),
+ MUX_VALUE(0, 26),
+ MUX_VALUE(0, 27),
+ /* MIXER */
+ MUX_VALUE(1, 0),
+ MUX_VALUE(1, 1),
+ MUX_VALUE(1, 2),
+ MUX_VALUE(1, 3),
+ MUX_VALUE(1, 4),
+ /* AMX */
+ MUX_VALUE(1, 8),
+ MUX_VALUE(1, 9),
+ MUX_VALUE(1, 10),
+ MUX_VALUE(1, 11),
+ MUX_VALUE(1, 12),
+ MUX_VALUE(1, 13),
+ /* OPE */
+ MUX_VALUE(2, 0),
+ /* MVC */
+ MUX_VALUE(2, 8),
+ MUX_VALUE(2, 9),
+ /* DMIC */
+ MUX_VALUE(2, 18),
+ MUX_VALUE(2, 19),
+ /* ADX */
+ MUX_VALUE(2, 24),
+ MUX_VALUE(2, 25),
+ MUX_VALUE(2, 26),
+ MUX_VALUE(2, 27),
+ MUX_VALUE(2, 28),
+ MUX_VALUE(2, 29),
+ MUX_VALUE(2, 30),
+ MUX_VALUE(2, 31),
+ MUX_VALUE(3, 0),
+ MUX_VALUE(3, 1),
+ MUX_VALUE(3, 2),
+ MUX_VALUE(3, 3),
+ MUX_VALUE(3, 4),
+ MUX_VALUE(3, 5),
+ MUX_VALUE(3, 6),
+ MUX_VALUE(3, 7),
+ MUX_VALUE(3, 8),
+ MUX_VALUE(3, 9),
+ MUX_VALUE(3, 10),
+ MUX_VALUE(3, 11),
+ MUX_VALUE(3, 12),
+ MUX_VALUE(3, 13),
+ MUX_VALUE(3, 14),
+ MUX_VALUE(3, 15),
+ /* ASRC */
+ MUX_VALUE(3, 24),
+ MUX_VALUE(3, 25),
+ MUX_VALUE(3, 26),
+ MUX_VALUE(3, 27),
+ MUX_VALUE(3, 28),
+ MUX_VALUE(3, 29),
+ /* ADMAIF */
+ MUX_VALUE(4, 7),
+ MUX_VALUE(4, 8),
+ MUX_VALUE(4, 9),
+ MUX_VALUE(4, 10),
+ MUX_VALUE(4, 11),
+ MUX_VALUE(4, 12),
+ MUX_VALUE(4, 13),
+ MUX_VALUE(4, 14),
+ MUX_VALUE(4, 15),
+ MUX_VALUE(4, 16),
+ MUX_VALUE(4, 17),
+ MUX_VALUE(4, 18),
+ MUX_VALUE(4, 19),
+ MUX_VALUE(4, 20),
+ MUX_VALUE(4, 21),
+ MUX_VALUE(4, 22),
+};
+
/* Controls for t210 */
MUX_ENUM_CTRL_DECL(t210_admaif1_tx, 0x00);
MUX_ENUM_CTRL_DECL(t210_admaif2_tx, 0x01);
@@ -712,6 +1069,103 @@ MUX_ENUM_CTRL_DECL_234(t234_asrc15_tx, 0x68);
MUX_ENUM_CTRL_DECL_234(t234_asrc16_tx, 0x69);
MUX_ENUM_CTRL_DECL_234(t234_asrc17_tx, 0x6a);
+/* Controls for t264 */
+MUX_ENUM_CTRL_DECL_264(t264_admaif1_tx, 0x00);
+MUX_ENUM_CTRL_DECL_264(t264_admaif2_tx, 0x01);
+MUX_ENUM_CTRL_DECL_264(t264_admaif3_tx, 0x02);
+MUX_ENUM_CTRL_DECL_264(t264_admaif4_tx, 0x03);
+MUX_ENUM_CTRL_DECL_264(t264_admaif5_tx, 0x04);
+MUX_ENUM_CTRL_DECL_264(t264_admaif6_tx, 0x05);
+MUX_ENUM_CTRL_DECL_264(t264_admaif7_tx, 0x06);
+MUX_ENUM_CTRL_DECL_264(t264_admaif8_tx, 0x07);
+MUX_ENUM_CTRL_DECL_264(t264_admaif9_tx, 0x08);
+MUX_ENUM_CTRL_DECL_264(t264_admaif10_tx, 0x09);
+MUX_ENUM_CTRL_DECL_264(t264_admaif11_tx, 0x0a);
+MUX_ENUM_CTRL_DECL_264(t264_admaif12_tx, 0x0b);
+MUX_ENUM_CTRL_DECL_264(t264_admaif13_tx, 0x0c);
+MUX_ENUM_CTRL_DECL_264(t264_admaif14_tx, 0x0d);
+MUX_ENUM_CTRL_DECL_264(t264_admaif15_tx, 0x0e);
+MUX_ENUM_CTRL_DECL_264(t264_admaif16_tx, 0x0f);
+MUX_ENUM_CTRL_DECL_264(t264_i2s1_tx, 0x10);
+MUX_ENUM_CTRL_DECL_264(t264_i2s2_tx, 0x11);
+MUX_ENUM_CTRL_DECL_264(t264_i2s3_tx, 0x12);
+MUX_ENUM_CTRL_DECL_264(t264_i2s4_tx, 0x13);
+MUX_ENUM_CTRL_DECL_264(t264_i2s5_tx, 0x14);
+MUX_ENUM_CTRL_DECL_264(t264_i2s6_tx, 0x15);
+MUX_ENUM_CTRL_DECL_264(t264_i2s7_tx, 0x16);
+MUX_ENUM_CTRL_DECL_264(t264_i2s8_tx, 0x17);
+MUX_ENUM_CTRL_DECL_264(t264_sfc1_tx, 0x18);
+MUX_ENUM_CTRL_DECL_264(t264_sfc2_tx, 0x19);
+MUX_ENUM_CTRL_DECL_264(t264_sfc3_tx, 0x1a);
+MUX_ENUM_CTRL_DECL_264(t264_sfc4_tx, 0x1b);
+MUX_ENUM_CTRL_DECL_264(t264_mixer11_tx, 0x20);
+MUX_ENUM_CTRL_DECL_264(t264_mixer12_tx, 0x21);
+MUX_ENUM_CTRL_DECL_264(t264_mixer13_tx, 0x22);
+MUX_ENUM_CTRL_DECL_264(t264_mixer14_tx, 0x23);
+MUX_ENUM_CTRL_DECL_264(t264_mixer15_tx, 0x24);
+MUX_ENUM_CTRL_DECL_264(t264_mixer16_tx, 0x25);
+MUX_ENUM_CTRL_DECL_264(t264_mixer17_tx, 0x26);
+MUX_ENUM_CTRL_DECL_264(t264_mixer18_tx, 0x27);
+MUX_ENUM_CTRL_DECL_264(t264_mixer19_tx, 0x28);
+MUX_ENUM_CTRL_DECL_264(t264_mixer110_tx, 0x29);
+MUX_ENUM_CTRL_DECL_264(t264_dspk1_tx, 0x30);
+MUX_ENUM_CTRL_DECL_264(t264_ope1_tx, 0x40);
+MUX_ENUM_CTRL_DECL_264(t264_mvc1_tx, 0x44);
+MUX_ENUM_CTRL_DECL_264(t264_mvc2_tx, 0x45);
+MUX_ENUM_CTRL_DECL_264(t264_amx11_tx, 0x48);
+MUX_ENUM_CTRL_DECL_264(t264_amx12_tx, 0x49);
+MUX_ENUM_CTRL_DECL_264(t264_amx13_tx, 0x4a);
+MUX_ENUM_CTRL_DECL_264(t264_amx14_tx, 0x4b);
+MUX_ENUM_CTRL_DECL_264(t264_amx21_tx, 0x4c);
+MUX_ENUM_CTRL_DECL_264(t264_amx22_tx, 0x4d);
+MUX_ENUM_CTRL_DECL_264(t264_amx23_tx, 0x4e);
+MUX_ENUM_CTRL_DECL_264(t264_amx24_tx, 0x4f);
+MUX_ENUM_CTRL_DECL_264(t264_amx31_tx, 0x50);
+MUX_ENUM_CTRL_DECL_264(t264_amx32_tx, 0x51);
+MUX_ENUM_CTRL_DECL_264(t264_amx33_tx, 0x52);
+MUX_ENUM_CTRL_DECL_264(t264_amx34_tx, 0x53);
+MUX_ENUM_CTRL_DECL_264(t264_adx1_tx, 0x58);
+MUX_ENUM_CTRL_DECL_264(t264_adx2_tx, 0x59);
+MUX_ENUM_CTRL_DECL_264(t264_adx3_tx, 0x5a);
+MUX_ENUM_CTRL_DECL_264(t264_adx4_tx, 0x5b);
+MUX_ENUM_CTRL_DECL_264(t264_amx41_tx, 0x5c);
+MUX_ENUM_CTRL_DECL_264(t264_amx42_tx, 0x5d);
+MUX_ENUM_CTRL_DECL_264(t264_amx43_tx, 0x5e);
+MUX_ENUM_CTRL_DECL_264(t264_amx44_tx, 0x5f);
+MUX_ENUM_CTRL_DECL_264(t264_admaif17_tx, 0x60);
+MUX_ENUM_CTRL_DECL_264(t264_admaif18_tx, 0x61);
+MUX_ENUM_CTRL_DECL_264(t264_admaif19_tx, 0x62);
+MUX_ENUM_CTRL_DECL_264(t264_admaif20_tx, 0x63);
+MUX_ENUM_CTRL_DECL_264(t264_asrc11_tx, 0x64);
+MUX_ENUM_CTRL_DECL_264(t264_asrc12_tx, 0x65);
+MUX_ENUM_CTRL_DECL_264(t264_asrc13_tx, 0x66);
+MUX_ENUM_CTRL_DECL_264(t264_asrc14_tx, 0x67);
+MUX_ENUM_CTRL_DECL_264(t264_asrc15_tx, 0x68);
+MUX_ENUM_CTRL_DECL_264(t264_asrc16_tx, 0x69);
+MUX_ENUM_CTRL_DECL_264(t264_asrc17_tx, 0x6a);
+MUX_ENUM_CTRL_DECL_264(t264_admaif21_tx, 0x74);
+MUX_ENUM_CTRL_DECL_264(t264_admaif22_tx, 0x75);
+MUX_ENUM_CTRL_DECL_264(t264_admaif23_tx, 0x76);
+MUX_ENUM_CTRL_DECL_264(t264_admaif24_tx, 0x77);
+MUX_ENUM_CTRL_DECL_264(t264_admaif25_tx, 0x78);
+MUX_ENUM_CTRL_DECL_264(t264_admaif26_tx, 0x79);
+MUX_ENUM_CTRL_DECL_264(t264_admaif27_tx, 0x7a);
+MUX_ENUM_CTRL_DECL_264(t264_admaif28_tx, 0x7b);
+MUX_ENUM_CTRL_DECL_264(t264_admaif29_tx, 0x7c);
+MUX_ENUM_CTRL_DECL_264(t264_admaif30_tx, 0x7d);
+MUX_ENUM_CTRL_DECL_264(t264_admaif31_tx, 0x7e);
+MUX_ENUM_CTRL_DECL_264(t264_admaif32_tx, 0x7f);
+MUX_ENUM_CTRL_DECL_264(t264_amx51_tx, 0x80);
+MUX_ENUM_CTRL_DECL_264(t264_amx52_tx, 0x81);
+MUX_ENUM_CTRL_DECL_264(t264_amx53_tx, 0x82);
+MUX_ENUM_CTRL_DECL_264(t264_amx54_tx, 0x83);
+MUX_ENUM_CTRL_DECL_264(t264_amx61_tx, 0x84);
+MUX_ENUM_CTRL_DECL_264(t264_amx62_tx, 0x85);
+MUX_ENUM_CTRL_DECL_264(t264_amx63_tx, 0x86);
+MUX_ENUM_CTRL_DECL_264(t264_amx64_tx, 0x87);
+MUX_ENUM_CTRL_DECL_264(t264_adx5_tx, 0x88);
+MUX_ENUM_CTRL_DECL_264(t264_adx6_tx, 0x89);
+
static const struct snd_soc_dapm_widget tegra210_ahub_widgets[] = {
WIDGETS("ADMAIF1", t210_admaif1_tx),
WIDGETS("ADMAIF2", t210_admaif2_tx),
@@ -996,6 +1450,147 @@ static const struct snd_soc_dapm_widget tegra234_ahub_widgets[] = {
WIDGETS("OPE1", t186_ope1_tx),
};
+static const struct snd_soc_dapm_widget tegra264_ahub_widgets[] = {
+ WIDGETS("ADMAIF1", t264_admaif1_tx),
+ WIDGETS("ADMAIF2", t264_admaif2_tx),
+ WIDGETS("ADMAIF3", t264_admaif3_tx),
+ WIDGETS("ADMAIF4", t264_admaif4_tx),
+ WIDGETS("ADMAIF5", t264_admaif5_tx),
+ WIDGETS("ADMAIF6", t264_admaif6_tx),
+ WIDGETS("ADMAIF7", t264_admaif7_tx),
+ WIDGETS("ADMAIF8", t264_admaif8_tx),
+ WIDGETS("ADMAIF9", t264_admaif9_tx),
+ WIDGETS("ADMAIF10", t264_admaif10_tx),
+ WIDGETS("ADMAIF11", t264_admaif11_tx),
+ WIDGETS("ADMAIF12", t264_admaif12_tx),
+ WIDGETS("ADMAIF13", t264_admaif13_tx),
+ WIDGETS("ADMAIF14", t264_admaif14_tx),
+ WIDGETS("ADMAIF15", t264_admaif15_tx),
+ WIDGETS("ADMAIF16", t264_admaif16_tx),
+ WIDGETS("ADMAIF17", t264_admaif17_tx),
+ WIDGETS("ADMAIF18", t264_admaif18_tx),
+ WIDGETS("ADMAIF19", t264_admaif19_tx),
+ WIDGETS("ADMAIF20", t264_admaif20_tx),
+ WIDGETS("ADMAIF21", t264_admaif21_tx),
+ WIDGETS("ADMAIF22", t264_admaif22_tx),
+ WIDGETS("ADMAIF23", t264_admaif23_tx),
+ WIDGETS("ADMAIF24", t264_admaif24_tx),
+ WIDGETS("ADMAIF25", t264_admaif25_tx),
+ WIDGETS("ADMAIF26", t264_admaif26_tx),
+ WIDGETS("ADMAIF27", t264_admaif27_tx),
+ WIDGETS("ADMAIF28", t264_admaif28_tx),
+ WIDGETS("ADMAIF29", t264_admaif29_tx),
+ WIDGETS("ADMAIF30", t264_admaif30_tx),
+ WIDGETS("ADMAIF31", t264_admaif31_tx),
+ WIDGETS("ADMAIF32", t264_admaif32_tx),
+ WIDGETS("I2S1", t264_i2s1_tx),
+ WIDGETS("I2S2", t264_i2s2_tx),
+ WIDGETS("I2S3", t264_i2s3_tx),
+ WIDGETS("I2S4", t264_i2s4_tx),
+ WIDGETS("I2S5", t264_i2s5_tx),
+ WIDGETS("I2S6", t264_i2s6_tx),
+ WIDGETS("I2S7", t264_i2s7_tx),
+ WIDGETS("I2S8", t264_i2s8_tx),
+ TX_WIDGETS("DMIC1"),
+ TX_WIDGETS("DMIC2"),
+ WIDGETS("DSPK1", t264_dspk1_tx),
+ WIDGETS("SFC1", t264_sfc1_tx),
+ WIDGETS("SFC2", t264_sfc2_tx),
+ WIDGETS("SFC3", t264_sfc3_tx),
+ WIDGETS("SFC4", t264_sfc4_tx),
+ WIDGETS("MVC1", t264_mvc1_tx),
+ WIDGETS("MVC2", t264_mvc2_tx),
+ WIDGETS("AMX1 RX1", t264_amx11_tx),
+ WIDGETS("AMX1 RX2", t264_amx12_tx),
+ WIDGETS("AMX1 RX3", t264_amx13_tx),
+ WIDGETS("AMX1 RX4", t264_amx14_tx),
+ WIDGETS("AMX2 RX1", t264_amx21_tx),
+ WIDGETS("AMX2 RX2", t264_amx22_tx),
+ WIDGETS("AMX2 RX3", t264_amx23_tx),
+ WIDGETS("AMX2 RX4", t264_amx24_tx),
+ WIDGETS("AMX3 RX1", t264_amx31_tx),
+ WIDGETS("AMX3 RX2", t264_amx32_tx),
+ WIDGETS("AMX3 RX3", t264_amx33_tx),
+ WIDGETS("AMX3 RX4", t264_amx34_tx),
+ WIDGETS("AMX4 RX1", t264_amx41_tx),
+ WIDGETS("AMX4 RX2", t264_amx42_tx),
+ WIDGETS("AMX4 RX3", t264_amx43_tx),
+ WIDGETS("AMX4 RX4", t264_amx44_tx),
+ WIDGETS("AMX5 RX1", t264_amx51_tx),
+ WIDGETS("AMX5 RX2", t264_amx52_tx),
+ WIDGETS("AMX5 RX3", t264_amx53_tx),
+ WIDGETS("AMX5 RX4", t264_amx54_tx),
+ WIDGETS("AMX6 RX1", t264_amx61_tx),
+ WIDGETS("AMX6 RX2", t264_amx62_tx),
+ WIDGETS("AMX6 RX3", t264_amx63_tx),
+ WIDGETS("AMX6 RX4", t264_amx64_tx),
+ TX_WIDGETS("AMX1"),
+ TX_WIDGETS("AMX2"),
+ TX_WIDGETS("AMX3"),
+ TX_WIDGETS("AMX4"),
+ TX_WIDGETS("AMX5"),
+ TX_WIDGETS("AMX6"),
+ WIDGETS("ADX1", t264_adx1_tx),
+ WIDGETS("ADX2", t264_adx2_tx),
+ WIDGETS("ADX3", t264_adx3_tx),
+ WIDGETS("ADX4", t264_adx4_tx),
+ WIDGETS("ADX5", t264_adx5_tx),
+ WIDGETS("ADX6", t264_adx6_tx),
+ TX_WIDGETS("ADX1 TX1"),
+ TX_WIDGETS("ADX1 TX2"),
+ TX_WIDGETS("ADX1 TX3"),
+ TX_WIDGETS("ADX1 TX4"),
+ TX_WIDGETS("ADX2 TX1"),
+ TX_WIDGETS("ADX2 TX2"),
+ TX_WIDGETS("ADX2 TX3"),
+ TX_WIDGETS("ADX2 TX4"),
+ TX_WIDGETS("ADX3 TX1"),
+ TX_WIDGETS("ADX3 TX2"),
+ TX_WIDGETS("ADX3 TX3"),
+ TX_WIDGETS("ADX3 TX4"),
+ TX_WIDGETS("ADX4 TX1"),
+ TX_WIDGETS("ADX4 TX2"),
+ TX_WIDGETS("ADX4 TX3"),
+ TX_WIDGETS("ADX4 TX4"),
+ TX_WIDGETS("ADX5 TX1"),
+ TX_WIDGETS("ADX5 TX2"),
+ TX_WIDGETS("ADX5 TX3"),
+ TX_WIDGETS("ADX5 TX4"),
+ TX_WIDGETS("ADX6 TX1"),
+ TX_WIDGETS("ADX6 TX2"),
+ TX_WIDGETS("ADX6 TX3"),
+ TX_WIDGETS("ADX6 TX4"),
+ WIDGETS("MIXER1 RX1", t264_mixer11_tx),
+ WIDGETS("MIXER1 RX2", t264_mixer12_tx),
+ WIDGETS("MIXER1 RX3", t264_mixer13_tx),
+ WIDGETS("MIXER1 RX4", t264_mixer14_tx),
+ WIDGETS("MIXER1 RX5", t264_mixer15_tx),
+ WIDGETS("MIXER1 RX6", t264_mixer16_tx),
+ WIDGETS("MIXER1 RX7", t264_mixer17_tx),
+ WIDGETS("MIXER1 RX8", t264_mixer18_tx),
+ WIDGETS("MIXER1 RX9", t264_mixer19_tx),
+ WIDGETS("MIXER1 RX10", t264_mixer110_tx),
+ TX_WIDGETS("MIXER1 TX1"),
+ TX_WIDGETS("MIXER1 TX2"),
+ TX_WIDGETS("MIXER1 TX3"),
+ TX_WIDGETS("MIXER1 TX4"),
+ TX_WIDGETS("MIXER1 TX5"),
+ WIDGETS("ASRC1 RX1", t264_asrc11_tx),
+ WIDGETS("ASRC1 RX2", t264_asrc12_tx),
+ WIDGETS("ASRC1 RX3", t264_asrc13_tx),
+ WIDGETS("ASRC1 RX4", t264_asrc14_tx),
+ WIDGETS("ASRC1 RX5", t264_asrc15_tx),
+ WIDGETS("ASRC1 RX6", t264_asrc16_tx),
+ WIDGETS("ASRC1 RX7", t264_asrc17_tx),
+ TX_WIDGETS("ASRC1 TX1"),
+ TX_WIDGETS("ASRC1 TX2"),
+ TX_WIDGETS("ASRC1 TX3"),
+ TX_WIDGETS("ASRC1 TX4"),
+ TX_WIDGETS("ASRC1 TX5"),
+ TX_WIDGETS("ASRC1 TX6"),
+ WIDGETS("OPE1", t264_ope1_tx),
+};
+
#define TEGRA_COMMON_MUX_ROUTES(name) \
{ name " XBAR-TX", NULL, name " Mux" }, \
{ name " Mux", "ADMAIF1", "ADMAIF1 XBAR-RX" }, \
@@ -1015,7 +1610,6 @@ static const struct snd_soc_dapm_widget tegra234_ahub_widgets[] = {
{ name " Mux", "I2S5", "I2S5 XBAR-RX" }, \
{ name " Mux", "DMIC1", "DMIC1 XBAR-RX" }, \
{ name " Mux", "DMIC2", "DMIC2 XBAR-RX" }, \
- { name " Mux", "DMIC3", "DMIC3 XBAR-RX" }, \
{ name " Mux", "SFC1", "SFC1 XBAR-RX" }, \
{ name " Mux", "SFC2", "SFC2 XBAR-RX" }, \
{ name " Mux", "SFC3", "SFC3 XBAR-RX" }, \
@@ -1040,6 +1634,7 @@ static const struct snd_soc_dapm_widget tegra234_ahub_widgets[] = {
{ name " Mux", "OPE1", "OPE1 XBAR-RX" },
#define TEGRA210_ONLY_MUX_ROUTES(name) \
+ { name " Mux", "DMIC3", "DMIC3 XBAR-RX" }, \
{ name " Mux", "OPE2", "OPE2 XBAR-RX" },
#define TEGRA186_ONLY_MUX_ROUTES(name) \
@@ -1054,6 +1649,7 @@ static const struct snd_soc_dapm_widget tegra234_ahub_widgets[] = {
{ name " Mux", "ADMAIF19", "ADMAIF19 XBAR-RX" }, \
{ name " Mux", "ADMAIF20", "ADMAIF20 XBAR-RX" }, \
{ name " Mux", "I2S6", "I2S6 XBAR-RX" }, \
+ { name " Mux", "DMIC3", "DMIC3 XBAR-RX" }, \
{ name " Mux", "DMIC4", "DMIC4 XBAR-RX" }, \
{ name " Mux", "AMX3", "AMX3 XBAR-RX" }, \
{ name " Mux", "AMX4", "AMX4 XBAR-RX" }, \
@@ -1072,6 +1668,59 @@ static const struct snd_soc_dapm_widget tegra234_ahub_widgets[] = {
{ name " Mux", "ASRC1 TX5", "ASRC1 TX5 XBAR-RX" }, \
{ name " Mux", "ASRC1 TX6", "ASRC1 TX6 XBAR-RX" },
+#define TEGRA264_ONLY_MUX_ROUTES(name) \
+ { name " Mux", "ADMAIF11", "ADMAIF11 XBAR-RX" }, \
+ { name " Mux", "ADMAIF12", "ADMAIF12 XBAR-RX" }, \
+ { name " Mux", "ADMAIF13", "ADMAIF13 XBAR-RX" }, \
+ { name " Mux", "ADMAIF14", "ADMAIF14 XBAR-RX" }, \
+ { name " Mux", "ADMAIF15", "ADMAIF15 XBAR-RX" }, \
+ { name " Mux", "ADMAIF16", "ADMAIF16 XBAR-RX" }, \
+ { name " Mux", "ADMAIF17", "ADMAIF17 XBAR-RX" }, \
+ { name " Mux", "ADMAIF18", "ADMAIF18 XBAR-RX" }, \
+ { name " Mux", "ADMAIF19", "ADMAIF19 XBAR-RX" }, \
+ { name " Mux", "ADMAIF20", "ADMAIF20 XBAR-RX" }, \
+ { name " Mux", "ADMAIF21", "ADMAIF21 XBAR-RX" }, \
+ { name " Mux", "ADMAIF22", "ADMAIF22 XBAR-RX" }, \
+ { name " Mux", "ADMAIF23", "ADMAIF23 XBAR-RX" }, \
+ { name " Mux", "ADMAIF24", "ADMAIF24 XBAR-RX" }, \
+ { name " Mux", "ADMAIF25", "ADMAIF25 XBAR-RX" }, \
+ { name " Mux", "ADMAIF26", "ADMAIF26 XBAR-RX" }, \
+ { name " Mux", "ADMAIF27", "ADMAIF27 XBAR-RX" }, \
+ { name " Mux", "ADMAIF28", "ADMAIF28 XBAR-RX" }, \
+ { name " Mux", "ADMAIF29", "ADMAIF29 XBAR-RX" }, \
+ { name " Mux", "ADMAIF30", "ADMAIF30 XBAR-RX" }, \
+ { name " Mux", "ADMAIF31", "ADMAIF31 XBAR-RX" }, \
+ { name " Mux", "ADMAIF32", "ADMAIF32 XBAR-RX" }, \
+ { name " Mux", "I2S6", "I2S6 XBAR-RX" }, \
+ { name " Mux", "I2S7", "I2S7 XBAR-RX" }, \
+ { name " Mux", "I2S8", "I2S8 XBAR-RX" }, \
+ { name " Mux", "AMX3", "AMX3 XBAR-RX" }, \
+ { name " Mux", "AMX4", "AMX4 XBAR-RX" }, \
+ { name " Mux", "AMX5", "AMX5 XBAR-RX" }, \
+ { name " Mux", "AMX6", "AMX6 XBAR-RX" }, \
+ { name " Mux", "ADX3 TX1", "ADX3 TX1 XBAR-RX" }, \
+ { name " Mux", "ADX3 TX2", "ADX3 TX2 XBAR-RX" }, \
+ { name " Mux", "ADX3 TX3", "ADX3 TX3 XBAR-RX" }, \
+ { name " Mux", "ADX3 TX4", "ADX3 TX4 XBAR-RX" }, \
+ { name " Mux", "ADX4 TX1", "ADX4 TX1 XBAR-RX" }, \
+ { name " Mux", "ADX4 TX2", "ADX4 TX2 XBAR-RX" }, \
+ { name " Mux", "ADX4 TX3", "ADX4 TX3 XBAR-RX" }, \
+ { name " Mux", "ADX4 TX4", "ADX4 TX4 XBAR-RX" }, \
+ { name " Mux", "ADX5 TX1", "ADX5 TX1 XBAR-RX" }, \
+ { name " Mux", "ADX5 TX2", "ADX5 TX2 XBAR-RX" }, \
+ { name " Mux", "ADX5 TX3", "ADX5 TX3 XBAR-RX" }, \
+ { name " Mux", "ADX5 TX4", "ADX5 TX4 XBAR-RX" }, \
+ { name " Mux", "ADX6 TX1", "ADX6 TX1 XBAR-RX" }, \
+ { name " Mux", "ADX6 TX2", "ADX6 TX2 XBAR-RX" }, \
+ { name " Mux", "ADX6 TX3", "ADX6 TX3 XBAR-RX" }, \
+ { name " Mux", "ADX6 TX4", "ADX6 TX4 XBAR-RX" }, \
+ { name " Mux", "ASRC1 TX1", "ASRC1 TX1 XBAR-RX" }, \
+ { name " Mux", "ASRC1 TX2", "ASRC1 TX2 XBAR-RX" }, \
+ { name " Mux", "ASRC1 TX3", "ASRC1 TX3 XBAR-RX" }, \
+ { name " Mux", "ASRC1 TX4", "ASRC1 TX4 XBAR-RX" }, \
+ { name " Mux", "ASRC1 TX5", "ASRC1 TX5 XBAR-RX" }, \
+ { name " Mux", "ASRC1 TX6", "ASRC1 TX6 XBAR-RX" },
+
#define TEGRA210_MUX_ROUTES(name) \
TEGRA_COMMON_MUX_ROUTES(name) \
TEGRA210_ONLY_MUX_ROUTES(name)
@@ -1080,6 +1729,10 @@ static const struct snd_soc_dapm_widget tegra234_ahub_widgets[] = {
TEGRA_COMMON_MUX_ROUTES(name) \
TEGRA186_ONLY_MUX_ROUTES(name)
+#define TEGRA264_MUX_ROUTES(name) \
+ TEGRA_COMMON_MUX_ROUTES(name) \
+ TEGRA264_ONLY_MUX_ROUTES(name)
+
/* Connect FEs with XBAR */
#define TEGRA_FE_ROUTES(name) \
{ name " XBAR-Playback", NULL, name " Playback" }, \
@@ -1238,6 +1891,136 @@ static const struct snd_soc_dapm_route tegra186_ahub_routes[] = {
TEGRA186_MUX_ROUTES("OPE1")
};
+static const struct snd_soc_dapm_route tegra264_ahub_routes[] = {
+ TEGRA_FE_ROUTES("ADMAIF1")
+ TEGRA_FE_ROUTES("ADMAIF2")
+ TEGRA_FE_ROUTES("ADMAIF3")
+ TEGRA_FE_ROUTES("ADMAIF4")
+ TEGRA_FE_ROUTES("ADMAIF5")
+ TEGRA_FE_ROUTES("ADMAIF6")
+ TEGRA_FE_ROUTES("ADMAIF7")
+ TEGRA_FE_ROUTES("ADMAIF8")
+ TEGRA_FE_ROUTES("ADMAIF9")
+ TEGRA_FE_ROUTES("ADMAIF10")
+ TEGRA_FE_ROUTES("ADMAIF11")
+ TEGRA_FE_ROUTES("ADMAIF12")
+ TEGRA_FE_ROUTES("ADMAIF13")
+ TEGRA_FE_ROUTES("ADMAIF14")
+ TEGRA_FE_ROUTES("ADMAIF15")
+ TEGRA_FE_ROUTES("ADMAIF16")
+ TEGRA_FE_ROUTES("ADMAIF17")
+ TEGRA_FE_ROUTES("ADMAIF18")
+ TEGRA_FE_ROUTES("ADMAIF19")
+ TEGRA_FE_ROUTES("ADMAIF20")
+ TEGRA_FE_ROUTES("ADMAIF21")
+ TEGRA_FE_ROUTES("ADMAIF22")
+ TEGRA_FE_ROUTES("ADMAIF23")
+ TEGRA_FE_ROUTES("ADMAIF24")
+ TEGRA_FE_ROUTES("ADMAIF25")
+ TEGRA_FE_ROUTES("ADMAIF26")
+ TEGRA_FE_ROUTES("ADMAIF27")
+ TEGRA_FE_ROUTES("ADMAIF28")
+ TEGRA_FE_ROUTES("ADMAIF29")
+ TEGRA_FE_ROUTES("ADMAIF30")
+ TEGRA_FE_ROUTES("ADMAIF31")
+ TEGRA_FE_ROUTES("ADMAIF32")
+ TEGRA264_MUX_ROUTES("ADMAIF1")
+ TEGRA264_MUX_ROUTES("ADMAIF2")
+ TEGRA264_MUX_ROUTES("ADMAIF3")
+ TEGRA264_MUX_ROUTES("ADMAIF4")
+ TEGRA264_MUX_ROUTES("ADMAIF5")
+ TEGRA264_MUX_ROUTES("ADMAIF6")
+ TEGRA264_MUX_ROUTES("ADMAIF7")
+ TEGRA264_MUX_ROUTES("ADMAIF8")
+ TEGRA264_MUX_ROUTES("ADMAIF9")
+ TEGRA264_MUX_ROUTES("ADMAIF10")
+ TEGRA264_MUX_ROUTES("ADMAIF11")
+ TEGRA264_MUX_ROUTES("ADMAIF12")
+ TEGRA264_MUX_ROUTES("ADMAIF13")
+ TEGRA264_MUX_ROUTES("ADMAIF14")
+ TEGRA264_MUX_ROUTES("ADMAIF15")
+ TEGRA264_MUX_ROUTES("ADMAIF16")
+ TEGRA264_MUX_ROUTES("ADMAIF17")
+ TEGRA264_MUX_ROUTES("ADMAIF18")
+ TEGRA264_MUX_ROUTES("ADMAIF19")
+ TEGRA264_MUX_ROUTES("ADMAIF20")
+ TEGRA264_MUX_ROUTES("ADMAIF21")
+ TEGRA264_MUX_ROUTES("ADMAIF22")
+ TEGRA264_MUX_ROUTES("ADMAIF23")
+ TEGRA264_MUX_ROUTES("ADMAIF24")
+ TEGRA264_MUX_ROUTES("ADMAIF25")
+ TEGRA264_MUX_ROUTES("ADMAIF26")
+ TEGRA264_MUX_ROUTES("ADMAIF27")
+ TEGRA264_MUX_ROUTES("ADMAIF28")
+ TEGRA264_MUX_ROUTES("ADMAIF29")
+ TEGRA264_MUX_ROUTES("ADMAIF30")
+ TEGRA264_MUX_ROUTES("ADMAIF31")
+ TEGRA264_MUX_ROUTES("ADMAIF32")
+ TEGRA264_MUX_ROUTES("I2S1")
+ TEGRA264_MUX_ROUTES("I2S2")
+ TEGRA264_MUX_ROUTES("I2S3")
+ TEGRA264_MUX_ROUTES("I2S4")
+ TEGRA264_MUX_ROUTES("I2S5")
+ TEGRA264_MUX_ROUTES("I2S6")
+ TEGRA264_MUX_ROUTES("I2S7")
+ TEGRA264_MUX_ROUTES("I2S8")
+ TEGRA264_MUX_ROUTES("DSPK1")
+ TEGRA264_MUX_ROUTES("SFC1")
+ TEGRA264_MUX_ROUTES("SFC2")
+ TEGRA264_MUX_ROUTES("SFC3")
+ TEGRA264_MUX_ROUTES("SFC4")
+ TEGRA264_MUX_ROUTES("MVC1")
+ TEGRA264_MUX_ROUTES("MVC2")
+ TEGRA264_MUX_ROUTES("AMX1 RX1")
+ TEGRA264_MUX_ROUTES("AMX1 RX2")
+ TEGRA264_MUX_ROUTES("AMX1 RX3")
+ TEGRA264_MUX_ROUTES("AMX1 RX4")
+ TEGRA264_MUX_ROUTES("AMX2 RX1")
+ TEGRA264_MUX_ROUTES("AMX2 RX2")
+ TEGRA264_MUX_ROUTES("AMX2 RX3")
+ TEGRA264_MUX_ROUTES("AMX2 RX4")
+ TEGRA264_MUX_ROUTES("AMX3 RX1")
+ TEGRA264_MUX_ROUTES("AMX3 RX2")
+ TEGRA264_MUX_ROUTES("AMX3 RX3")
+ TEGRA264_MUX_ROUTES("AMX3 RX4")
+ TEGRA264_MUX_ROUTES("AMX4 RX1")
+ TEGRA264_MUX_ROUTES("AMX4 RX2")
+ TEGRA264_MUX_ROUTES("AMX4 RX3")
+ TEGRA264_MUX_ROUTES("AMX4 RX4")
+ TEGRA264_MUX_ROUTES("AMX5 RX1")
+ TEGRA264_MUX_ROUTES("AMX5 RX2")
+ TEGRA264_MUX_ROUTES("AMX5 RX3")
+ TEGRA264_MUX_ROUTES("AMX5 RX4")
+ TEGRA264_MUX_ROUTES("AMX6 RX1")
+ TEGRA264_MUX_ROUTES("AMX6 RX2")
+ TEGRA264_MUX_ROUTES("AMX6 RX3")
+ TEGRA264_MUX_ROUTES("AMX6 RX4")
+ TEGRA264_MUX_ROUTES("ADX1")
+ TEGRA264_MUX_ROUTES("ADX2")
+ TEGRA264_MUX_ROUTES("ADX3")
+ TEGRA264_MUX_ROUTES("ADX4")
+ TEGRA264_MUX_ROUTES("ADX5")
+ TEGRA264_MUX_ROUTES("ADX6")
+ TEGRA264_MUX_ROUTES("MIXER1 RX1")
+ TEGRA264_MUX_ROUTES("MIXER1 RX2")
+ TEGRA264_MUX_ROUTES("MIXER1 RX3")
+ TEGRA264_MUX_ROUTES("MIXER1 RX4")
+ TEGRA264_MUX_ROUTES("MIXER1 RX5")
+ TEGRA264_MUX_ROUTES("MIXER1 RX6")
+ TEGRA264_MUX_ROUTES("MIXER1 RX7")
+ TEGRA264_MUX_ROUTES("MIXER1 RX8")
+ TEGRA264_MUX_ROUTES("MIXER1 RX9")
+ TEGRA264_MUX_ROUTES("MIXER1 RX10")
+ TEGRA264_MUX_ROUTES("ASRC1 RX1")
+ TEGRA264_MUX_ROUTES("ASRC1 RX2")
+ TEGRA264_MUX_ROUTES("ASRC1 RX3")
+ TEGRA264_MUX_ROUTES("ASRC1 RX4")
+ TEGRA264_MUX_ROUTES("ASRC1 RX5")
+ TEGRA264_MUX_ROUTES("ASRC1 RX6")
+ TEGRA264_MUX_ROUTES("ASRC1 RX7")
+ TEGRA264_MUX_ROUTES("OPE1")
+};
+
static const struct snd_soc_component_driver tegra210_ahub_component = {
.dapm_widgets = tegra210_ahub_widgets,
.num_dapm_widgets = ARRAY_SIZE(tegra210_ahub_widgets),
@@ -1259,6 +2042,36 @@ static const struct snd_soc_component_driver tegra234_ahub_component = {
.num_dapm_routes = ARRAY_SIZE(tegra186_ahub_routes),
};
+static const struct snd_soc_component_driver tegra264_ahub_component = {
+ .dapm_widgets = tegra264_ahub_widgets,
+ .num_dapm_widgets = ARRAY_SIZE(tegra264_ahub_widgets),
+ .dapm_routes = tegra264_ahub_routes,
+ .num_dapm_routes = ARRAY_SIZE(tegra264_ahub_routes),
+};
+
+static bool tegra264_ahub_wr_reg(struct device *dev, unsigned int reg)
+{
+ int part;
+
+ for (part = 0; part < TEGRA264_XBAR_UPDATE_MAX_REG; part++) {
+ switch (reg & ~(part << 12)) {
+ case TEGRA264_AXBAR_ADMAIF_RX1 ... TEGRA264_AXBAR_SFC4_RX1:
+ case TEGRA264_AXBAR_MIXER1_RX1 ... TEGRA264_AXBAR_MIXER1_RX10:
+ case TEGRA264_AXBAR_DSPK1_RX1:
+ case TEGRA264_AXBAR_OPE1_RX1:
+ case TEGRA264_AXBAR_MVC1_RX1 ... TEGRA264_AXBAR_MVC2_RX1:
+ case TEGRA264_AXBAR_AMX1_RX1 ... TEGRA264_AXBAR_AMX3_RX4:
+ case TEGRA264_AXBAR_ADX1_RX1 ... TEGRA264_AXBAR_ASRC1_RX7:
+ case TEGRA264_AXBAR_ADMAIF_RX21 ... TEGRA264_AXBAR_ADX6_RX1:
+ return true;
+ default:
+ break;
+ };
+ }
+
+ return false;
+}
+
static const struct regmap_config tegra210_ahub_regmap_config = {
.reg_bits = 32,
.val_bits = 32,
@@ -1275,6 +2088,15 @@ static const struct regmap_config tegra186_ahub_regmap_config = {
.cache_type = REGCACHE_FLAT,
};
+static const struct regmap_config tegra264_ahub_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .writeable_reg = tegra264_ahub_wr_reg,
+ .max_register = TEGRA264_MAX_REGISTER_ADDR,
+ .cache_type = REGCACHE_FLAT,
+};
+
static const struct tegra_ahub_soc_data soc_data_tegra210 = {
.cmpnt_drv = &tegra210_ahub_component,
.dai_drv = tegra210_ahub_dais,
@@ -1285,6 +2107,7 @@ static const struct tegra_ahub_soc_data soc_data_tegra210 = {
.mask[2] = TEGRA210_XBAR_REG_MASK_2,
.mask[3] = TEGRA210_XBAR_REG_MASK_3,
.reg_count = TEGRA210_XBAR_UPDATE_MAX_REG,
+ .xbar_part_size = TEGRA210_XBAR_PART1_RX,
};
static const struct tegra_ahub_soc_data soc_data_tegra186 = {
@@ -1297,6 +2120,7 @@ static const struct tegra_ahub_soc_data soc_data_tegra186 = {
.mask[2] = TEGRA186_XBAR_REG_MASK_2,
.mask[3] = TEGRA186_XBAR_REG_MASK_3,
.reg_count = TEGRA186_XBAR_UPDATE_MAX_REG,
+ .xbar_part_size = TEGRA210_XBAR_PART1_RX,
};
static const struct tegra_ahub_soc_data soc_data_tegra234 = {
@@ -1309,12 +2133,28 @@ static const struct tegra_ahub_soc_data soc_data_tegra234 = {
.mask[2] = TEGRA186_XBAR_REG_MASK_2,
.mask[3] = TEGRA186_XBAR_REG_MASK_3,
.reg_count = TEGRA186_XBAR_UPDATE_MAX_REG,
+ .xbar_part_size = TEGRA210_XBAR_PART1_RX,
+};
+
+static const struct tegra_ahub_soc_data soc_data_tegra264 = {
+ .cmpnt_drv = &tegra264_ahub_component,
+ .dai_drv = tegra264_ahub_dais,
+ .num_dais = ARRAY_SIZE(tegra264_ahub_dais),
+ .regmap_config = &tegra264_ahub_regmap_config,
+ .mask[0] = TEGRA264_XBAR_REG_MASK_0,
+ .mask[1] = TEGRA264_XBAR_REG_MASK_1,
+ .mask[2] = TEGRA264_XBAR_REG_MASK_2,
+ .mask[3] = TEGRA264_XBAR_REG_MASK_3,
+ .mask[4] = TEGRA264_XBAR_REG_MASK_4,
+ .reg_count = TEGRA264_XBAR_UPDATE_MAX_REG,
+ .xbar_part_size = TEGRA264_XBAR_PART1_RX,
};
static const struct of_device_id tegra_ahub_of_match[] = {
{ .compatible = "nvidia,tegra210-ahub", .data = &soc_data_tegra210 },
{ .compatible = "nvidia,tegra186-ahub", .data = &soc_data_tegra186 },
{ .compatible = "nvidia,tegra234-ahub", .data = &soc_data_tegra234 },
+ { .compatible = "nvidia,tegra264-ahub", .data = &soc_data_tegra264 },
{},
};
MODULE_DEVICE_TABLE(of, tegra_ahub_of_match);
@@ -1359,6 +2199,8 @@ static int tegra_ahub_probe(struct platform_device *pdev)
return -ENOMEM;
ahub->soc_data = of_device_get_match_data(&pdev->dev);
+ if (!ahub->soc_data)
+ return -ENODEV;
platform_set_drvdata(pdev, ahub);
diff --git a/sound/soc/tegra/tegra210_ahub.h b/sound/soc/tegra/tegra210_ahub.h
index 2728db4d24f2..f355b2cfd19b 100644
--- a/sound/soc/tegra/tegra210_ahub.h
+++ b/sound/soc/tegra/tegra210_ahub.h
@@ -2,7 +2,7 @@
/*
* tegra210_ahub.h - TEGRA210 AHUB
*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
*
*/
@@ -28,7 +28,39 @@
#define TEGRA186_XBAR_REG_MASK_3 0x3f0f00ff
#define TEGRA186_XBAR_UPDATE_MAX_REG 4
-#define TEGRA_XBAR_UPDATE_MAX_REG (TEGRA186_XBAR_UPDATE_MAX_REG)
+/* Tegra264 specific */
+#define TEGRA264_XBAR_PART1_RX 0x1000
+#define TEGRA264_XBAR_PART2_RX 0x2000
+#define TEGRA264_XBAR_PART3_RX 0x3000
+#define TEGRA264_XBAR_PART4_RX 0x4000
+#define TEGRA264_XBAR_PART0_ADX6_RX1 0x224
+#define TEGRA264_XBAR_AUDIO_RX_COUNT ((TEGRA264_XBAR_PART0_ADX6_RX1 / 4) + 1)
+#define TEGRA264_XBAR_REG_MASK_0 0xfffffff
+#define TEGRA264_XBAR_REG_MASK_1 0x3f013f1f
+#define TEGRA264_XBAR_REG_MASK_2 0xff3c0301
+#define TEGRA264_XBAR_REG_MASK_3 0x3f00ffff
+#define TEGRA264_XBAR_REG_MASK_4 0x7fff9f
+#define TEGRA264_XBAR_UPDATE_MAX_REG 5
+
+#define TEGRA264_AXBAR_ADMAIF_RX1 0x0
+#define TEGRA264_AXBAR_SFC4_RX1 0x6c
+#define TEGRA264_AXBAR_MIXER1_RX1 0x80
+#define TEGRA264_AXBAR_MIXER1_RX10 0xa4
+#define TEGRA264_AXBAR_DSPK1_RX1 0xc0
+#define TEGRA264_AXBAR_OPE1_RX1 0x100
+#define TEGRA264_AXBAR_MVC1_RX1 0x110
+#define TEGRA264_AXBAR_MVC2_RX1 0x114
+#define TEGRA264_AXBAR_AMX1_RX1 0x120
+#define TEGRA264_AXBAR_AMX3_RX4 0x14c
+#define TEGRA264_AXBAR_ADX1_RX1 0x160
+#define TEGRA264_AXBAR_ASRC1_RX7 0x1a8
+#define TEGRA264_AXBAR_ADMAIF_RX21 0x1d0
+#define TEGRA264_AXBAR_ADX6_RX1 0x224
+
+#define TEGRA_XBAR_UPDATE_MAX_REG (TEGRA264_XBAR_UPDATE_MAX_REG)
+
+#define TEGRA264_MAX_REGISTER_ADDR (TEGRA264_XBAR_PART4_RX + \
+ (TEGRA210_XBAR_RX_STRIDE * (TEGRA264_XBAR_AUDIO_RX_COUNT - 1)))
#define TEGRA186_MAX_REGISTER_ADDR (TEGRA186_XBAR_PART3_RX + \
(TEGRA210_XBAR_RX_STRIDE * (TEGRA186_XBAR_AUDIO_RX_COUNT - 1)))
@@ -76,6 +108,15 @@
#define MUX_ENUM_CTRL_DECL_234(ename, id) MUX_ENUM_CTRL_DECL_186(ename, id)
+#define MUX_ENUM_CTRL_DECL_264(ename, id) \
+ SOC_VALUE_ENUM_WIDE_DECL(ename##_enum, MUX_REG(id), 0, \
+ tegra264_ahub_mux_texts, \
+ tegra264_ahub_mux_values); \
+ static const struct snd_kcontrol_new ename##_control = \
+ SOC_DAPM_ENUM_EXT("Route", ename##_enum, \
+ tegra_ahub_get_value_enum, \
+ tegra_ahub_put_value_enum)
+
#define WIDGETS(sname, ename) \
SND_SOC_DAPM_AIF_IN(sname " XBAR-RX", NULL, 0, SND_SOC_NOPM, 0, 0), \
SND_SOC_DAPM_AIF_OUT(sname " XBAR-TX", NULL, 0, SND_SOC_NOPM, 0, 0), \
@@ -92,7 +133,7 @@
.playback = { \
.stream_name = #sname " XBAR-Playback", \
.channels_min = 1, \
- .channels_max = 16, \
+ .channels_max = 32, \
.rates = SNDRV_PCM_RATE_8000_192000, \
.formats = SNDRV_PCM_FMTBIT_S8 | \
SNDRV_PCM_FMTBIT_S16_LE | \
@@ -102,7 +143,7 @@
.capture = { \
.stream_name = #sname " XBAR-Capture", \
.channels_min = 1, \
- .channels_max = 16, \
+ .channels_max = 32, \
.rates = SNDRV_PCM_RATE_8000_192000, \
.formats = SNDRV_PCM_FMTBIT_S8 | \
SNDRV_PCM_FMTBIT_S16_LE | \
@@ -115,9 +156,10 @@ struct tegra_ahub_soc_data {
const struct regmap_config *regmap_config;
const struct snd_soc_component_driver *cmpnt_drv;
struct snd_soc_dai_driver *dai_drv;
- unsigned int mask[4];
+ unsigned int mask[TEGRA_XBAR_UPDATE_MAX_REG];
unsigned int reg_count;
unsigned int num_dais;
+ unsigned int xbar_part_size;
};
struct tegra_ahub {
diff --git a/sound/soc/tegra/tegra210_amx.c b/sound/soc/tegra/tegra210_amx.c
index 1981b94009cf..7f558c40e097 100644
--- a/sound/soc/tegra/tegra210_amx.c
+++ b/sound/soc/tegra/tegra210_amx.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-// SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES.
+// SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES.
// All rights reserved.
//
// tegra210_amx.c - Tegra210 AMX driver
@@ -46,21 +46,35 @@ static const struct reg_default tegra210_amx_reg_defaults[] = {
{ TEGRA210_AMX_CFG_RAM_CTRL, 0x00004000},
};
+static const struct reg_default tegra264_amx_reg_defaults[] = {
+ { TEGRA210_AMX_RX_INT_MASK, 0x0000000f},
+ { TEGRA210_AMX_RX1_CIF_CTRL, 0x00003800},
+ { TEGRA210_AMX_RX2_CIF_CTRL, 0x00003800},
+ { TEGRA210_AMX_RX3_CIF_CTRL, 0x00003800},
+ { TEGRA210_AMX_RX4_CIF_CTRL, 0x00003800},
+ { TEGRA210_AMX_TX_INT_MASK, 0x00000001},
+ { TEGRA210_AMX_TX_CIF_CTRL, 0x00003800},
+ { TEGRA210_AMX_CG, 0x1},
+ { TEGRA264_AMX_CFG_RAM_CTRL, 0x00004000},
+};
+
static void tegra210_amx_write_map_ram(struct tegra210_amx *amx)
{
int i;
- regmap_write(amx->regmap, TEGRA210_AMX_CFG_RAM_CTRL,
+ regmap_write(amx->regmap, TEGRA210_AMX_CFG_RAM_CTRL + amx->soc_data->reg_offset,
TEGRA210_AMX_CFG_RAM_CTRL_SEQ_ACCESS_EN |
TEGRA210_AMX_CFG_RAM_CTRL_ADDR_INIT_EN |
TEGRA210_AMX_CFG_RAM_CTRL_RW_WRITE);
- for (i = 0; i < TEGRA210_AMX_RAM_DEPTH; i++)
- regmap_write(amx->regmap, TEGRA210_AMX_CFG_RAM_DATA,
+ for (i = 0; i < amx->soc_data->ram_depth; i++)
+ regmap_write(amx->regmap, TEGRA210_AMX_CFG_RAM_DATA + amx->soc_data->reg_offset,
amx->map[i]);
- regmap_write(amx->regmap, TEGRA210_AMX_OUT_BYTE_EN0, amx->byte_mask[0]);
- regmap_write(amx->regmap, TEGRA210_AMX_OUT_BYTE_EN1, amx->byte_mask[1]);
+ for (i = 0; i < amx->soc_data->byte_mask_size; i++)
+ regmap_write(amx->regmap,
+ TEGRA210_AMX_OUT_BYTE_EN0 + (i * TEGRA210_AMX_AUDIOCIF_CH_STRIDE),
+ amx->byte_mask[i]);
}
static int tegra210_amx_startup(struct snd_pcm_substream *substream,
@@ -157,7 +171,10 @@ static int tegra210_amx_set_audio_cif(struct snd_soc_dai *dai,
cif_conf.audio_bits = audio_bits;
cif_conf.client_bits = audio_bits;
- tegra_set_cif(amx->regmap, reg, &cif_conf);
+ if (amx->soc_data->max_ch == TEGRA264_AMX_MAX_CHANNEL)
+ tegra264_set_cif(amx->regmap, reg, &cif_conf);
+ else
+ tegra_set_cif(amx->regmap, reg, &cif_conf);
return 0;
}
@@ -170,9 +187,10 @@ static int tegra210_amx_in_hw_params(struct snd_pcm_substream *substream,
if (amx->soc_data->auto_disable) {
regmap_write(amx->regmap,
- AMX_CH_REG(dai->id, TEGRA194_AMX_RX1_FRAME_PERIOD),
+ AMX_CH_REG(dai->id, TEGRA194_AMX_RX1_FRAME_PERIOD +
+ amx->soc_data->reg_offset),
TEGRA194_MAX_FRAME_IDLE_COUNT);
- regmap_write(amx->regmap, TEGRA210_AMX_CYA, 1);
+ regmap_write(amx->regmap, TEGRA210_AMX_CYA + amx->soc_data->reg_offset, 1);
}
return tegra210_amx_set_audio_cif(dai, params,
@@ -194,14 +212,11 @@ static int tegra210_amx_get_byte_map(struct snd_kcontrol *kcontrol,
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
struct tegra210_amx *amx = snd_soc_component_get_drvdata(cmpnt);
- unsigned char *bytes_map = (unsigned char *)&amx->map;
+ unsigned char *bytes_map = (unsigned char *)amx->map;
int reg = mc->reg;
int enabled;
- if (reg > 31)
- enabled = amx->byte_mask[1] & (1 << (reg - 32));
- else
- enabled = amx->byte_mask[0] & (1 << reg);
+ enabled = amx->byte_mask[reg / 32] & (1 << (reg % 32));
/*
* TODO: Simplify this logic to just return from bytes_map[]
@@ -228,7 +243,7 @@ static int tegra210_amx_put_byte_map(struct snd_kcontrol *kcontrol,
(struct soc_mixer_control *)kcontrol->private_value;
struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
struct tegra210_amx *amx = snd_soc_component_get_drvdata(cmpnt);
- unsigned char *bytes_map = (unsigned char *)&amx->map;
+ unsigned char *bytes_map = (unsigned char *)amx->map;
int reg = mc->reg;
int value = ucontrol->value.integer.value[0];
unsigned int mask_val = amx->byte_mask[reg / 32];
@@ -418,7 +433,90 @@ static struct snd_kcontrol_new tegra210_amx_controls[] = {
TEGRA210_AMX_BYTE_MAP_CTRL(63),
};
+static struct snd_kcontrol_new tegra264_amx_controls[] = {
+ TEGRA210_AMX_BYTE_MAP_CTRL(64),
+ TEGRA210_AMX_BYTE_MAP_CTRL(65),
+ TEGRA210_AMX_BYTE_MAP_CTRL(66),
+ TEGRA210_AMX_BYTE_MAP_CTRL(67),
+ TEGRA210_AMX_BYTE_MAP_CTRL(68),
+ TEGRA210_AMX_BYTE_MAP_CTRL(69),
+ TEGRA210_AMX_BYTE_MAP_CTRL(70),
+ TEGRA210_AMX_BYTE_MAP_CTRL(71),
+ TEGRA210_AMX_BYTE_MAP_CTRL(72),
+ TEGRA210_AMX_BYTE_MAP_CTRL(73),
+ TEGRA210_AMX_BYTE_MAP_CTRL(74),
+ TEGRA210_AMX_BYTE_MAP_CTRL(75),
+ TEGRA210_AMX_BYTE_MAP_CTRL(76),
+ TEGRA210_AMX_BYTE_MAP_CTRL(77),
+ TEGRA210_AMX_BYTE_MAP_CTRL(78),
+ TEGRA210_AMX_BYTE_MAP_CTRL(79),
+ TEGRA210_AMX_BYTE_MAP_CTRL(80),
+ TEGRA210_AMX_BYTE_MAP_CTRL(81),
+ TEGRA210_AMX_BYTE_MAP_CTRL(82),
+ TEGRA210_AMX_BYTE_MAP_CTRL(83),
+ TEGRA210_AMX_BYTE_MAP_CTRL(84),
+ TEGRA210_AMX_BYTE_MAP_CTRL(85),
+ TEGRA210_AMX_BYTE_MAP_CTRL(86),
+ TEGRA210_AMX_BYTE_MAP_CTRL(87),
+ TEGRA210_AMX_BYTE_MAP_CTRL(88),
+ TEGRA210_AMX_BYTE_MAP_CTRL(89),
+ TEGRA210_AMX_BYTE_MAP_CTRL(90),
+ TEGRA210_AMX_BYTE_MAP_CTRL(91),
+ TEGRA210_AMX_BYTE_MAP_CTRL(92),
+ TEGRA210_AMX_BYTE_MAP_CTRL(93),
+ TEGRA210_AMX_BYTE_MAP_CTRL(94),
+ TEGRA210_AMX_BYTE_MAP_CTRL(95),
+ TEGRA210_AMX_BYTE_MAP_CTRL(96),
+ TEGRA210_AMX_BYTE_MAP_CTRL(97),
+ TEGRA210_AMX_BYTE_MAP_CTRL(98),
+ TEGRA210_AMX_BYTE_MAP_CTRL(99),
+ TEGRA210_AMX_BYTE_MAP_CTRL(100),
+ TEGRA210_AMX_BYTE_MAP_CTRL(101),
+ TEGRA210_AMX_BYTE_MAP_CTRL(102),
+ TEGRA210_AMX_BYTE_MAP_CTRL(103),
+ TEGRA210_AMX_BYTE_MAP_CTRL(104),
+ TEGRA210_AMX_BYTE_MAP_CTRL(105),
+ TEGRA210_AMX_BYTE_MAP_CTRL(106),
+ TEGRA210_AMX_BYTE_MAP_CTRL(107),
+ TEGRA210_AMX_BYTE_MAP_CTRL(108),
+ TEGRA210_AMX_BYTE_MAP_CTRL(109),
+ TEGRA210_AMX_BYTE_MAP_CTRL(110),
+ TEGRA210_AMX_BYTE_MAP_CTRL(111),
+ TEGRA210_AMX_BYTE_MAP_CTRL(112),
+ TEGRA210_AMX_BYTE_MAP_CTRL(113),
+ TEGRA210_AMX_BYTE_MAP_CTRL(114),
+ TEGRA210_AMX_BYTE_MAP_CTRL(115),
+ TEGRA210_AMX_BYTE_MAP_CTRL(116),
+ TEGRA210_AMX_BYTE_MAP_CTRL(117),
+ TEGRA210_AMX_BYTE_MAP_CTRL(118),
+ TEGRA210_AMX_BYTE_MAP_CTRL(119),
+ TEGRA210_AMX_BYTE_MAP_CTRL(120),
+ TEGRA210_AMX_BYTE_MAP_CTRL(121),
+ TEGRA210_AMX_BYTE_MAP_CTRL(122),
+ TEGRA210_AMX_BYTE_MAP_CTRL(123),
+ TEGRA210_AMX_BYTE_MAP_CTRL(124),
+ TEGRA210_AMX_BYTE_MAP_CTRL(125),
+ TEGRA210_AMX_BYTE_MAP_CTRL(126),
+ TEGRA210_AMX_BYTE_MAP_CTRL(127),
+};
+
+static int tegra210_amx_component_probe(struct snd_soc_component *component)
+{
+ struct tegra210_amx *amx = snd_soc_component_get_drvdata(component);
+ int err = 0;
+
+ if (amx->soc_data->num_controls) {
+ err = snd_soc_add_component_controls(component, amx->soc_data->controls,
+ amx->soc_data->num_controls);
+ if (err)
+ dev_err(component->dev, "can't add AMX controls, err: %d\n", err);
+ }
+
+ return err;
+}
+
static const struct snd_soc_component_driver tegra210_amx_cmpnt = {
+ .probe = tegra210_amx_component_probe,
.dapm_widgets = tegra210_amx_widgets,
.num_dapm_widgets = ARRAY_SIZE(tegra210_amx_widgets),
.dapm_routes = tegra210_amx_routes,
@@ -450,6 +548,22 @@ static bool tegra194_amx_wr_reg(struct device *dev, unsigned int reg)
}
}
+static bool tegra264_amx_wr_reg(struct device *dev,
+ unsigned int reg)
+{
+ switch (reg) {
+ case TEGRA210_AMX_RX_INT_MASK ... TEGRA210_AMX_RX4_CIF_CTRL:
+ case TEGRA210_AMX_TX_INT_MASK ... TEGRA210_AMX_TX_CIF_CTRL:
+ case TEGRA210_AMX_ENABLE ... TEGRA210_AMX_CG:
+ case TEGRA210_AMX_CTRL ... TEGRA264_AMX_STREAMS_AUTO_DISABLE:
+ case TEGRA264_AMX_CFG_RAM_CTRL ... TEGRA264_AMX_CFG_RAM_DATA:
+ case TEGRA264_AMX_RX1_FRAME_PERIOD ... TEGRA264_AMX_RX4_FRAME_PERIOD:
+ return true;
+ default:
+ return false;
+ }
+}
+
static bool tegra210_amx_rd_reg(struct device *dev, unsigned int reg)
{
switch (reg) {
@@ -470,6 +584,21 @@ static bool tegra194_amx_rd_reg(struct device *dev, unsigned int reg)
}
}
+static bool tegra264_amx_rd_reg(struct device *dev,
+ unsigned int reg)
+{
+ switch (reg) {
+ case TEGRA210_AMX_RX_STATUS ... TEGRA210_AMX_RX4_CIF_CTRL:
+ case TEGRA210_AMX_TX_STATUS ... TEGRA210_AMX_TX_CIF_CTRL:
+ case TEGRA210_AMX_ENABLE ... TEGRA210_AMX_INT_STATUS:
+ case TEGRA210_AMX_CTRL ... TEGRA264_AMX_CFG_RAM_DATA:
+ case TEGRA264_AMX_RX1_FRAME_PERIOD ... TEGRA264_AMX_RX4_FRAME_PERIOD:
+ return true;
+ default:
+ return false;
+ }
+}
+
static bool tegra210_amx_volatile_reg(struct device *dev, unsigned int reg)
{
switch (reg) {
@@ -492,6 +621,29 @@ static bool tegra210_amx_volatile_reg(struct device *dev, unsigned int reg)
return false;
}
+static bool tegra264_amx_volatile_reg(struct device *dev,
+ unsigned int reg)
+{
+ switch (reg) {
+ case TEGRA210_AMX_RX_STATUS:
+ case TEGRA210_AMX_RX_INT_STATUS:
+ case TEGRA210_AMX_RX_INT_SET:
+ case TEGRA210_AMX_TX_STATUS:
+ case TEGRA210_AMX_TX_INT_STATUS:
+ case TEGRA210_AMX_TX_INT_SET:
+ case TEGRA210_AMX_SOFT_RESET:
+ case TEGRA210_AMX_STATUS:
+ case TEGRA210_AMX_INT_STATUS:
+ case TEGRA264_AMX_CFG_RAM_CTRL:
+ case TEGRA264_AMX_CFG_RAM_DATA:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
static const struct regmap_config tegra210_amx_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
@@ -518,18 +670,51 @@ static const struct regmap_config tegra194_amx_regmap_config = {
.cache_type = REGCACHE_FLAT,
};
+static const struct regmap_config tegra264_amx_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = TEGRA264_AMX_RX4_LAST_FRAME_PERIOD,
+ .writeable_reg = tegra264_amx_wr_reg,
+ .readable_reg = tegra264_amx_rd_reg,
+ .volatile_reg = tegra264_amx_volatile_reg,
+ .reg_defaults = tegra264_amx_reg_defaults,
+ .num_reg_defaults = ARRAY_SIZE(tegra264_amx_reg_defaults),
+ .cache_type = REGCACHE_FLAT,
+};
+
static const struct tegra210_amx_soc_data soc_data_tegra210 = {
.regmap_conf = &tegra210_amx_regmap_config,
+ .max_ch = TEGRA210_AMX_MAX_CHANNEL,
+ .ram_depth = TEGRA210_AMX_RAM_DEPTH,
+ .byte_mask_size = TEGRA210_AMX_BYTE_MASK_COUNT,
+ .reg_offset = TEGRA210_AMX_AUTO_DISABLE_OFFSET,
};
static const struct tegra210_amx_soc_data soc_data_tegra194 = {
.regmap_conf = &tegra194_amx_regmap_config,
.auto_disable = true,
+ .max_ch = TEGRA210_AMX_MAX_CHANNEL,
+ .ram_depth = TEGRA210_AMX_RAM_DEPTH,
+ .byte_mask_size = TEGRA210_AMX_BYTE_MASK_COUNT,
+ .reg_offset = TEGRA210_AMX_AUTO_DISABLE_OFFSET,
+};
+
+static const struct tegra210_amx_soc_data soc_data_tegra264 = {
+ .regmap_conf = &tegra264_amx_regmap_config,
+ .auto_disable = true,
+ .max_ch = TEGRA264_AMX_MAX_CHANNEL,
+ .ram_depth = TEGRA264_AMX_RAM_DEPTH,
+ .byte_mask_size = TEGRA264_AMX_BYTE_MASK_COUNT,
+ .reg_offset = TEGRA264_AMX_AUTO_DISABLE_OFFSET,
+ .controls = tegra264_amx_controls,
+ .num_controls = ARRAY_SIZE(tegra264_amx_controls),
};
static const struct of_device_id tegra210_amx_of_match[] = {
{ .compatible = "nvidia,tegra210-amx", .data = &soc_data_tegra210 },
{ .compatible = "nvidia,tegra194-amx", .data = &soc_data_tegra194 },
+ { .compatible = "nvidia,tegra264-amx", .data = &soc_data_tegra264 },
{},
};
MODULE_DEVICE_TABLE(of, tegra210_amx_of_match);
@@ -562,6 +747,20 @@ static int tegra210_amx_platform_probe(struct platform_device *pdev)
regcache_cache_only(amx->regmap, true);
+ amx->map = devm_kzalloc(dev, amx->soc_data->ram_depth * sizeof(*amx->map),
+ GFP_KERNEL);
+ if (!amx->map)
+ return -ENOMEM;
+
+ amx->byte_mask = devm_kzalloc(dev,
+ amx->soc_data->byte_mask_size * sizeof(*amx->byte_mask),
+ GFP_KERNEL);
+ if (!amx->byte_mask)
+ return -ENOMEM;
+
+ tegra210_amx_dais[TEGRA_AMX_OUT_DAI_ID].capture.channels_max =
+ amx->soc_data->max_ch;
+
err = devm_snd_soc_register_component(dev, &tegra210_amx_cmpnt,
tegra210_amx_dais,
ARRAY_SIZE(tegra210_amx_dais));
diff --git a/sound/soc/tegra/tegra210_amx.h b/sound/soc/tegra/tegra210_amx.h
index e277741e4258..50a237b197ba 100644
--- a/sound/soc/tegra/tegra210_amx.h
+++ b/sound/soc/tegra/tegra210_amx.h
@@ -1,8 +1,7 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tegra210_amx.h - Definitions for Tegra210 AMX driver
+/* SPDX-License-Identifier: GPL-2.0-only
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION. All rights reserved.
*
- * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+ * tegra210_amx.h - Definitions for Tegra210 AMX driver
*
*/
@@ -32,7 +31,6 @@
#define TEGRA210_AMX_INT_STATUS 0x90
#define TEGRA210_AMX_CTRL 0xa4
#define TEGRA210_AMX_OUT_BYTE_EN0 0xa8
-#define TEGRA210_AMX_OUT_BYTE_EN1 0xac
#define TEGRA210_AMX_CYA 0xb0
#define TEGRA210_AMX_CFG_RAM_CTRL 0xb8
#define TEGRA210_AMX_CFG_RAM_DATA 0xbc
@@ -41,6 +39,13 @@
#define TEGRA194_AMX_RX4_FRAME_PERIOD 0xcc
#define TEGRA194_AMX_RX4_LAST_FRAME_PERIOD 0xdc
+#define TEGRA264_AMX_STREAMS_AUTO_DISABLE 0xb8
+#define TEGRA264_AMX_CFG_RAM_CTRL 0xc0
+#define TEGRA264_AMX_CFG_RAM_DATA 0xc4
+#define TEGRA264_AMX_RX1_FRAME_PERIOD 0xc8
+#define TEGRA264_AMX_RX4_FRAME_PERIOD 0xd4
+#define TEGRA264_AMX_RX4_LAST_FRAME_PERIOD 0xe4
+
/* Fields in TEGRA210_AMX_ENABLE */
#define TEGRA210_AMX_ENABLE_SHIFT 0
@@ -72,6 +77,15 @@
#define TEGRA210_AMX_MAP_STREAM_NUM_SHIFT 6
#define TEGRA210_AMX_MAP_WORD_NUM_SHIFT 2
#define TEGRA210_AMX_MAP_BYTE_NUM_SHIFT 0
+#define TEGRA210_AMX_BYTE_MASK_COUNT 2
+#define TEGRA210_AMX_MAX_CHANNEL 16
+#define TEGRA210_AMX_AUTO_DISABLE_OFFSET 0
+
+#define TEGRA264_AMX_RAM_DEPTH 32
+#define TEGRA264_AMX_BYTE_MASK_COUNT 4
+#define TEGRA264_AMX_MAX_CHANNEL 32
+#define TEGRA264_AMX_AUTO_DISABLE_OFFSET 8
+#define TEGRA_AMX_OUT_DAI_ID 4
enum {
TEGRA210_AMX_WAIT_ON_ALL,
@@ -81,13 +95,19 @@ enum {
struct tegra210_amx_soc_data {
const struct regmap_config *regmap_conf;
bool auto_disable;
+ const struct snd_kcontrol_new *controls;
+ unsigned int num_controls;
+ unsigned int max_ch;
+ unsigned int ram_depth;
+ unsigned int byte_mask_size;
+ unsigned int reg_offset;
};
struct tegra210_amx {
const struct tegra210_amx_soc_data *soc_data;
- unsigned int map[TEGRA210_AMX_RAM_DEPTH];
+ unsigned int *map;
+ unsigned int *byte_mask;
struct regmap *regmap;
- unsigned int byte_mask[2];
};
#endif
diff --git a/sound/soc/tegra/tegra210_i2s.c b/sound/soc/tegra/tegra210_i2s.c
index 766cddebd5f6..100277c39001 100644
--- a/sound/soc/tegra/tegra210_i2s.c
+++ b/sound/soc/tegra/tegra210_i2s.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-// SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES.
+// SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES.
// All rights reserved.
//
// tegra210_i2s.c - Tegra210 I2S driver
@@ -36,14 +36,28 @@ static const struct reg_default tegra210_i2s_reg_defaults[] = {
{ TEGRA210_I2S_CYA, 0x1 },
};
-static void tegra210_i2s_set_slot_ctrl(struct regmap *regmap,
+static const struct reg_default tegra264_i2s_reg_defaults[] = {
+ { TEGRA210_I2S_RX_INT_MASK, 0x00000003 },
+ { TEGRA210_I2S_RX_CIF_CTRL, 0x00003f00 },
+ { TEGRA264_I2S_TX_INT_MASK, 0x00000003 },
+ { TEGRA264_I2S_TX_CIF_CTRL, 0x00003f00 },
+ { TEGRA264_I2S_CG, 0x1 },
+ { TEGRA264_I2S_TIMING, 0x0000001f },
+ { TEGRA264_I2S_ENABLE, 0x1 },
+ { TEGRA264_I2S_RX_FIFO_WR_ACCESS_MODE, 0x1 },
+ { TEGRA264_I2S_TX_FIFO_RD_ACCESS_MODE, 0x1 },
+};
+
+static void tegra210_i2s_set_slot_ctrl(struct tegra210_i2s *i2s,
unsigned int total_slots,
unsigned int tx_slot_mask,
unsigned int rx_slot_mask)
{
- regmap_write(regmap, TEGRA210_I2S_SLOT_CTRL, total_slots - 1);
- regmap_write(regmap, TEGRA210_I2S_TX_SLOT_CTRL, tx_slot_mask);
- regmap_write(regmap, TEGRA210_I2S_RX_SLOT_CTRL, rx_slot_mask);
+ regmap_write(i2s->regmap, TEGRA210_I2S_SLOT_CTRL + i2s->soc_data->i2s_ctrl_offset,
+ total_slots - 1);
+ regmap_write(i2s->regmap, TEGRA210_I2S_TX_SLOT_CTRL + i2s->soc_data->tx_offset,
+ tx_slot_mask);
+ regmap_write(i2s->regmap, TEGRA210_I2S_RX_SLOT_CTRL, rx_slot_mask);
}
static int tegra210_i2s_set_clock_rate(struct device *dev,
@@ -53,7 +67,7 @@ static int tegra210_i2s_set_clock_rate(struct device *dev,
unsigned int val;
int err;
- regmap_read(i2s->regmap, TEGRA210_I2S_CTRL, &val);
+ regmap_read(i2s->regmap, TEGRA210_I2S_CTRL + i2s->soc_data->i2s_ctrl_offset, &val);
/* No need to set rates if I2S is being operated in slave */
if (!(val & I2S_CTRL_MASTER_EN))
@@ -100,15 +114,15 @@ static int tegra210_i2s_sw_reset(struct snd_soc_component *compnt,
cif_reg = TEGRA210_I2S_RX_CIF_CTRL;
stream_reg = TEGRA210_I2S_RX_CTRL;
} else {
- reset_reg = TEGRA210_I2S_TX_SOFT_RESET;
- cif_reg = TEGRA210_I2S_TX_CIF_CTRL;
- stream_reg = TEGRA210_I2S_TX_CTRL;
+ reset_reg = TEGRA210_I2S_TX_SOFT_RESET + i2s->soc_data->tx_offset;
+ cif_reg = TEGRA210_I2S_TX_CIF_CTRL + i2s->soc_data->tx_offset;
+ stream_reg = TEGRA210_I2S_TX_CTRL + i2s->soc_data->tx_offset;
}
/* Store CIF and I2S control values */
regmap_read(i2s->regmap, cif_reg, &cif_ctrl);
regmap_read(i2s->regmap, stream_reg, &stream_ctrl);
- regmap_read(i2s->regmap, TEGRA210_I2S_CTRL, &i2s_ctrl);
+ regmap_read(i2s->regmap, TEGRA210_I2S_CTRL + i2s->soc_data->i2s_ctrl_offset, &i2s_ctrl);
/* Reset to make sure the previous transactions are clean */
regmap_update_bits(i2s->regmap, reset_reg, reset_mask, reset_en);
@@ -125,7 +139,7 @@ static int tegra210_i2s_sw_reset(struct snd_soc_component *compnt,
/* Restore CIF and I2S control values */
regmap_write(i2s->regmap, cif_reg, cif_ctrl);
regmap_write(i2s->regmap, stream_reg, stream_ctrl);
- regmap_write(i2s->regmap, TEGRA210_I2S_CTRL, i2s_ctrl);
+ regmap_write(i2s->regmap, TEGRA210_I2S_CTRL + i2s->soc_data->i2s_ctrl_offset, i2s_ctrl);
return 0;
}
@@ -140,16 +154,13 @@ static int tegra210_i2s_init(struct snd_soc_dapm_widget *w,
int stream;
int err;
- switch (w->reg) {
- case TEGRA210_I2S_RX_ENABLE:
+ if (w->reg == TEGRA210_I2S_RX_ENABLE) {
stream = SNDRV_PCM_STREAM_PLAYBACK;
status_reg = TEGRA210_I2S_RX_STATUS;
- break;
- case TEGRA210_I2S_TX_ENABLE:
+ } else if (w->reg == (TEGRA210_I2S_TX_ENABLE + i2s->soc_data->tx_offset)) {
stream = SNDRV_PCM_STREAM_CAPTURE;
- status_reg = TEGRA210_I2S_TX_STATUS;
- break;
- default:
+ status_reg = TEGRA210_I2S_TX_STATUS + i2s->soc_data->tx_offset;
+ } else {
return -EINVAL;
}
@@ -199,7 +210,7 @@ static void tegra210_i2s_set_data_offset(struct tegra210_i2s *i2s,
unsigned int data_offset)
{
/* Capture path */
- regmap_update_bits(i2s->regmap, TEGRA210_I2S_TX_CTRL,
+ regmap_update_bits(i2s->regmap, TEGRA210_I2S_TX_CTRL + i2s->soc_data->tx_offset,
I2S_CTRL_DATA_OFFSET_MASK,
data_offset << I2S_DATA_SHIFT);
@@ -282,7 +293,8 @@ static int tegra210_i2s_set_fmt(struct snd_soc_dai *dai,
return -EINVAL;
}
- regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL, mask, val);
+ regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL + i2s->soc_data->i2s_ctrl_offset,
+ mask, val);
i2s->dai_fmt = fmt & SND_SOC_DAIFMT_FORMAT_MASK;
@@ -296,10 +308,10 @@ static int tegra210_i2s_set_tdm_slot(struct snd_soc_dai *dai,
struct tegra210_i2s *i2s = snd_soc_dai_get_drvdata(dai);
/* Copy the required tx and rx mask */
- i2s->tx_mask = (tx_mask > DEFAULT_I2S_SLOT_MASK) ?
- DEFAULT_I2S_SLOT_MASK : tx_mask;
- i2s->rx_mask = (rx_mask > DEFAULT_I2S_SLOT_MASK) ?
- DEFAULT_I2S_SLOT_MASK : rx_mask;
+ i2s->tx_mask = (tx_mask > i2s->soc_data->slot_mask) ?
+ i2s->soc_data->slot_mask : tx_mask;
+ i2s->rx_mask = (rx_mask > i2s->soc_data->slot_mask) ?
+ i2s->soc_data->slot_mask : rx_mask;
return 0;
}
@@ -327,8 +339,8 @@ static int tegra210_i2s_put_loopback(struct snd_kcontrol *kcontrol,
i2s->loopback = value;
- regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL, I2S_CTRL_LPBK_MASK,
- i2s->loopback << I2S_CTRL_LPBK_SHIFT);
+ regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL + i2s->soc_data->i2s_ctrl_offset,
+ I2S_CTRL_LPBK_MASK, i2s->loopback << I2S_CTRL_LPBK_SHIFT);
return 1;
}
@@ -364,9 +376,9 @@ static int tegra210_i2s_put_fsync_width(struct snd_kcontrol *kcontrol,
* cases mixer control is used to update custom values. A value
* of "N" here means, width is "N + 1" bit clock wide.
*/
- regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL,
- I2S_CTRL_FSYNC_WIDTH_MASK,
- i2s->fsync_width << I2S_FSYNC_WIDTH_SHIFT);
+ regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL + i2s->soc_data->i2s_ctrl_offset,
+ i2s->soc_data->fsync_width_mask,
+ i2s->fsync_width << i2s->soc_data->fsync_width_shift);
return 1;
}
@@ -562,7 +574,7 @@ static int tegra210_i2s_set_timing_params(struct device *dev,
return err;
}
- regmap_read(i2s->regmap, TEGRA210_I2S_CTRL, &val);
+ regmap_read(i2s->regmap, TEGRA210_I2S_CTRL + i2s->soc_data->i2s_ctrl_offset, &val);
/*
* For LRCK mode, channel bit count depends on number of bit clocks
@@ -578,7 +590,7 @@ static int tegra210_i2s_set_timing_params(struct device *dev,
case I2S_CTRL_FRAME_FMT_FSYNC_MODE:
bit_count = (bclk_rate / srate) - 1;
- tegra210_i2s_set_slot_ctrl(i2s->regmap, channels,
+ tegra210_i2s_set_slot_ctrl(i2s, channels,
i2s->tx_mask, i2s->rx_mask);
break;
default:
@@ -591,7 +603,7 @@ static int tegra210_i2s_set_timing_params(struct device *dev,
return -EINVAL;
}
- regmap_write(i2s->regmap, TEGRA210_I2S_TIMING,
+ regmap_write(i2s->regmap, TEGRA210_I2S_TIMING + i2s->soc_data->i2s_ctrl_offset,
bit_count << I2S_TIMING_CH_BIT_CNT_SHIFT);
return 0;
@@ -673,7 +685,7 @@ static int tegra210_i2s_hw_params(struct snd_pcm_substream *substream,
}
/* Program sample size */
- regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL,
+ regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL + i2s->soc_data->i2s_ctrl_offset,
I2S_CTRL_BIT_SIZE_MASK, val);
srate = params_rate(params);
@@ -697,13 +709,16 @@ static int tegra210_i2s_hw_params(struct snd_pcm_substream *substream,
reg = TEGRA210_I2S_RX_CIF_CTRL;
} else {
- reg = TEGRA210_I2S_TX_CIF_CTRL;
+ reg = TEGRA210_I2S_TX_CIF_CTRL + i2s->soc_data->tx_offset;
}
cif_conf.mono_conv = i2s->mono_to_stereo[path];
cif_conf.stereo_conv = i2s->stereo_to_mono[path];
- tegra_set_cif(i2s->regmap, reg, &cif_conf);
+ if (i2s->soc_data->max_ch == TEGRA264_I2S_MAX_CHANNEL)
+ tegra264_set_cif(i2s->regmap, reg, &cif_conf);
+ else
+ tegra_set_cif(i2s->regmap, reg, &cif_conf);
return tegra210_i2s_set_timing_params(dev, sample_size, srate,
cif_conf.client_ch);
@@ -808,13 +823,20 @@ static const struct snd_kcontrol_new tegra210_i2s_controls[] = {
tegra210_i2s_put_bclk_ratio),
};
-static const struct snd_soc_dapm_widget tegra210_i2s_widgets[] = {
- SND_SOC_DAPM_AIF_IN_E("RX", NULL, 0, TEGRA210_I2S_RX_ENABLE,
- 0, 0, tegra210_i2s_init, SND_SOC_DAPM_PRE_PMU),
- SND_SOC_DAPM_AIF_OUT_E("TX", NULL, 0, TEGRA210_I2S_TX_ENABLE,
- 0, 0, tegra210_i2s_init, SND_SOC_DAPM_PRE_PMU),
- SND_SOC_DAPM_MIC("MIC", NULL),
+#define TEGRA_I2S_WIDGETS(tx_enable_reg) \
+ SND_SOC_DAPM_AIF_IN_E("RX", NULL, 0, TEGRA210_I2S_RX_ENABLE, \
+ 0, 0, tegra210_i2s_init, SND_SOC_DAPM_PRE_PMU), \
+ SND_SOC_DAPM_AIF_OUT_E("TX", NULL, 0, tx_enable_reg, \
+ 0, 0, tegra210_i2s_init, SND_SOC_DAPM_PRE_PMU), \
+ SND_SOC_DAPM_MIC("MIC", NULL), \
SND_SOC_DAPM_SPK("SPK", NULL),
+
+static const struct snd_soc_dapm_widget tegra210_i2s_widgets[] = {
+ TEGRA_I2S_WIDGETS(TEGRA210_I2S_TX_ENABLE)
+};
+
+static const struct snd_soc_dapm_widget tegra264_i2s_widgets[] = {
+ TEGRA_I2S_WIDGETS(TEGRA264_I2S_TX_ENABLE)
};
static const struct snd_soc_dapm_route tegra210_i2s_routes[] = {
@@ -841,6 +863,15 @@ static const struct snd_soc_component_driver tegra210_i2s_cmpnt = {
.num_controls = ARRAY_SIZE(tegra210_i2s_controls),
};
+static const struct snd_soc_component_driver tegra264_i2s_cmpnt = {
+ .dapm_widgets = tegra264_i2s_widgets,
+ .num_dapm_widgets = ARRAY_SIZE(tegra264_i2s_widgets),
+ .dapm_routes = tegra210_i2s_routes,
+ .num_dapm_routes = ARRAY_SIZE(tegra210_i2s_routes),
+ .controls = tegra210_i2s_controls,
+ .num_controls = ARRAY_SIZE(tegra210_i2s_controls),
+};
+
static bool tegra210_i2s_wr_reg(struct device *dev, unsigned int reg)
{
switch (reg) {
@@ -895,7 +926,68 @@ static bool tegra210_i2s_volatile_reg(struct device *dev, unsigned int reg)
}
}
-static const struct regmap_config tegra210_i2s_regmap_config = {
+static bool tegra264_i2s_wr_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case TEGRA210_I2S_RX_ENABLE ... TEGRA210_I2S_RX_SOFT_RESET:
+ case TEGRA210_I2S_RX_INT_MASK ... TEGRA264_I2S_RX_CYA:
+ case TEGRA264_I2S_TX_ENABLE ... TEGRA264_I2S_TX_SOFT_RESET:
+ case TEGRA264_I2S_TX_INT_MASK ... TEGRA264_I2S_TX_FIFO_RD_ACCESS_MODE:
+ case TEGRA264_I2S_TX_FIFO_THRESHOLD ... TEGRA264_I2S_TX_CYA:
+ case TEGRA264_I2S_ENABLE ... TEGRA264_I2S_CG:
+ case TEGRA264_I2S_INT_SET ... TEGRA264_I2S_INT_MASK:
+ case TEGRA264_I2S_CTRL ... TEGRA264_I2S_CYA:
+ return true;
+ default:
+ return false;
+ };
+}
+
+static bool tegra264_i2s_rd_reg(struct device *dev, unsigned int reg)
+{
+ if (tegra264_i2s_wr_reg(dev, reg))
+ return true;
+
+ switch (reg) {
+ case TEGRA210_I2S_RX_STATUS:
+ case TEGRA210_I2S_RX_INT_STATUS:
+ case TEGRA264_I2S_RX_CIF_FIFO_STATUS:
+ case TEGRA264_I2S_TX_STATUS:
+ case TEGRA264_I2S_TX_INT_STATUS:
+ case TEGRA264_I2S_TX_FIFO_RD_DATA:
+ case TEGRA264_I2S_TX_CIF_FIFO_STATUS:
+ case TEGRA264_I2S_STATUS:
+ case TEGRA264_I2S_INT_STATUS:
+ case TEGRA264_I2S_PIO_MODE_ENABLE:
+ case TEGRA264_I2S_PAD_MACRO_STATUS:
+ return true;
+ default:
+ return false;
+ };
+}
+
+static bool tegra264_i2s_volatile_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case TEGRA210_I2S_RX_SOFT_RESET:
+ case TEGRA210_I2S_RX_STATUS:
+ case TEGRA210_I2S_RX_INT_STATUS:
+ case TEGRA264_I2S_RX_CIF_FIFO_STATUS:
+ case TEGRA264_I2S_TX_STATUS:
+ case TEGRA264_I2S_TX_INT_STATUS:
+ case TEGRA264_I2S_TX_FIFO_RD_DATA:
+ case TEGRA264_I2S_TX_CIF_FIFO_STATUS:
+ case TEGRA264_I2S_STATUS:
+ case TEGRA264_I2S_INT_STATUS:
+ case TEGRA264_I2S_TX_SOFT_RESET:
+ case TEGRA264_I2S_PAD_MACRO_STATUS:
+ return true;
+ default:
+ return false;
+ };
+}
+
+static const struct regmap_config tegra210_regmap_conf = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
@@ -942,20 +1034,34 @@ static void tegra210_parse_client_convert(struct device *dev)
i2s->client_sample_format = simple_util_get_sample_fmt(&data);
}
+static const struct regmap_config tegra264_regmap_conf = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = TEGRA264_I2S_PAD_MACRO_STATUS,
+ .writeable_reg = tegra264_i2s_wr_reg,
+ .readable_reg = tegra264_i2s_rd_reg,
+ .volatile_reg = tegra264_i2s_volatile_reg,
+ .reg_defaults = tegra264_i2s_reg_defaults,
+ .num_reg_defaults = ARRAY_SIZE(tegra264_i2s_reg_defaults),
+ .cache_type = REGCACHE_FLAT,
+};
+
static int tegra210_i2s_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct tegra210_i2s *i2s;
void __iomem *regs;
- int err;
+ int err, id;
i2s = devm_kzalloc(dev, sizeof(*i2s), GFP_KERNEL);
if (!i2s)
return -ENOMEM;
+ i2s->soc_data = of_device_get_match_data(&pdev->dev);
i2s->rx_fifo_th = DEFAULT_I2S_RX_FIFO_THRESHOLD;
- i2s->tx_mask = DEFAULT_I2S_SLOT_MASK;
- i2s->rx_mask = DEFAULT_I2S_SLOT_MASK;
+ i2s->tx_mask = i2s->soc_data->slot_mask;
+ i2s->rx_mask = i2s->soc_data->slot_mask;
i2s->loopback = false;
i2s->client_sample_format = -EINVAL;
@@ -981,7 +1087,7 @@ static int tegra210_i2s_probe(struct platform_device *pdev)
return PTR_ERR(regs);
i2s->regmap = devm_regmap_init_mmio(dev, regs,
- &tegra210_i2s_regmap_config);
+ i2s->soc_data->regmap_conf);
if (IS_ERR(i2s->regmap)) {
dev_err(dev, "regmap init failed\n");
return PTR_ERR(i2s->regmap);
@@ -991,7 +1097,13 @@ static int tegra210_i2s_probe(struct platform_device *pdev)
regcache_cache_only(i2s->regmap, true);
- err = devm_snd_soc_register_component(dev, &tegra210_i2s_cmpnt,
+ /* Update the dais max channel as per soc */
+ for (id = 0; id < ARRAY_SIZE(tegra210_i2s_dais); id++) {
+ tegra210_i2s_dais[id].playback.channels_max = i2s->soc_data->max_ch;
+ tegra210_i2s_dais[id].capture.channels_max = i2s->soc_data->max_ch;
+ }
+
+ err = devm_snd_soc_register_component(dev, i2s->soc_data->i2s_cmpnt,
tegra210_i2s_dais,
ARRAY_SIZE(tegra210_i2s_dais));
if (err) {
@@ -1015,8 +1127,31 @@ static const struct dev_pm_ops tegra210_i2s_pm_ops = {
SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
};
+static const struct tegra_i2s_soc_data soc_data_tegra210 = {
+ .regmap_conf = &tegra210_regmap_conf,
+ .i2s_cmpnt = &tegra210_i2s_cmpnt,
+ .max_ch = TEGRA210_I2S_MAX_CHANNEL,
+ .tx_offset = TEGRA210_I2S_TX_OFFSET,
+ .i2s_ctrl_offset = TEGRA210_I2S_CTRL_OFFSET,
+ .fsync_width_mask = I2S_CTRL_FSYNC_WIDTH_MASK,
+ .fsync_width_shift = I2S_FSYNC_WIDTH_SHIFT,
+ .slot_mask = DEFAULT_I2S_SLOT_MASK,
+};
+
+static const struct tegra_i2s_soc_data soc_data_tegra264 = {
+ .regmap_conf = &tegra264_regmap_conf,
+ .i2s_cmpnt = &tegra264_i2s_cmpnt,
+ .max_ch = TEGRA264_I2S_MAX_CHANNEL,
+ .tx_offset = TEGRA264_I2S_TX_OFFSET,
+ .i2s_ctrl_offset = TEGRA264_I2S_CTRL_OFFSET,
+ .fsync_width_mask = TEGRA264_I2S_CTRL_FSYNC_WIDTH_MASK,
+ .fsync_width_shift = TEGRA264_I2S_FSYNC_WIDTH_SHIFT,
+ .slot_mask = TEGRA264_DEFAULT_I2S_SLOT_MASK,
+};
+
static const struct of_device_id tegra210_i2s_of_match[] = {
- { .compatible = "nvidia,tegra210-i2s" },
+ { .compatible = "nvidia,tegra210-i2s", .data = &soc_data_tegra210 },
+ { .compatible = "nvidia,tegra264-i2s", .data = &soc_data_tegra264 },
{},
};
MODULE_DEVICE_TABLE(of, tegra210_i2s_of_match);
diff --git a/sound/soc/tegra/tegra210_i2s.h b/sound/soc/tegra/tegra210_i2s.h
index 543332de7405..42be2137342c 100644
--- a/sound/soc/tegra/tegra210_i2s.h
+++ b/sound/soc/tegra/tegra210_i2s.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only
- * SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES.
* All rights reserved.
*
* tegra210_i2s.h - Definitions for Tegra210 I2S driver
@@ -47,9 +47,38 @@
#define TEGRA210_I2S_CLK_TRIM 0xac
#define TEGRA210_I2S_CYA 0xb0
+/* T264 specific registers */
+#define TEGRA264_I2S_RX_FIFO_WR_ACCESS_MODE 0x30
+#define TEGRA264_I2S_RX_CYA 0x3c
+#define TEGRA264_I2S_RX_CIF_FIFO_STATUS 0x40
+#define TEGRA264_I2S_TX_ENABLE 0x80
+#define TEGRA264_I2S_TX_SOFT_RESET 0x84
+#define TEGRA264_I2S_TX_STATUS 0x8c
+#define TEGRA264_I2S_TX_INT_STATUS 0x90
+#define TEGRA264_I2S_TX_INT_MASK 0x94
+#define TEGRA264_I2S_TX_CIF_CTRL 0xa0
+#define TEGRA264_I2S_TX_FIFO_RD_ACCESS_MODE 0xb0
+#define TEGRA264_I2S_TX_FIFO_RD_DATA 0xb4
+#define TEGRA264_I2S_TX_FIFO_THRESHOLD 0xb8
+#define TEGRA264_I2S_TX_CYA 0xbc
+#define TEGRA264_I2S_TX_CIF_FIFO_STATUS 0xc0
+#define TEGRA264_I2S_ENABLE 0x100
+#define TEGRA264_I2S_CG 0x108
+#define TEGRA264_I2S_STATUS 0x10c
+#define TEGRA264_I2S_INT_STATUS 0x110
+#define TEGRA264_I2S_INT_SET 0x114
+#define TEGRA264_I2S_INT_MASK 0x11c
+#define TEGRA264_I2S_CTRL 0x12c
+#define TEGRA264_I2S_TIMING 0x130
+#define TEGRA264_I2S_CYA 0x13c
+#define TEGRA264_I2S_PIO_MODE_ENABLE 0x140
+#define TEGRA264_I2S_PAD_MACRO_STATUS 0x144
+
/* Bit fields, shifts and masks */
#define I2S_DATA_SHIFT 8
#define I2S_CTRL_DATA_OFFSET_MASK (0x7ff << I2S_DATA_SHIFT)
+#define TEGRA264_I2S_FSYNC_WIDTH_SHIFT 23
+#define TEGRA264_I2S_CTRL_FSYNC_WIDTH_MASK (0x1ff << TEGRA264_I2S_FSYNC_WIDTH_SHIFT)
#define I2S_EN_SHIFT 0
#define I2S_EN_MASK BIT(I2S_EN_SHIFT)
@@ -102,6 +131,14 @@
#define DEFAULT_I2S_RX_FIFO_THRESHOLD 3
#define DEFAULT_I2S_SLOT_MASK 0xffff
+#define TEGRA210_I2S_TX_OFFSET 0
+#define TEGRA210_I2S_CTRL_OFFSET 0
+#define TEGRA210_I2S_MAX_CHANNEL 16
+
+#define TEGRA264_DEFAULT_I2S_SLOT_MASK 0xffffffff
+#define TEGRA264_I2S_TX_OFFSET 0x40
+#define TEGRA264_I2S_CTRL_OFFSET 0x8c
+#define TEGRA264_I2S_MAX_CHANNEL 32
enum tegra210_i2s_path {
I2S_RX_PATH,
@@ -109,7 +146,19 @@ enum tegra210_i2s_path {
I2S_PATHS,
};
+struct tegra_i2s_soc_data {
+ const struct regmap_config *regmap_conf;
+ const struct snd_soc_component_driver *i2s_cmpnt;
+ unsigned int max_ch;
+ unsigned int tx_offset;
+ unsigned int i2s_ctrl_offset;
+ unsigned int fsync_width_mask;
+ unsigned int fsync_width_shift;
+ unsigned int slot_mask;
+};
+
struct tegra210_i2s {
+ const struct tegra_i2s_soc_data *soc_data;
struct clk *clk_i2s;
struct clk *clk_sync_input;
struct regmap *regmap;
diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c
index 8b48813c2c59..94b5ab77649b 100644
--- a/sound/soc/tegra/tegra_audio_graph_card.c
+++ b/sound/soc/tegra/tegra_audio_graph_card.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION. All rights reserved.
//
// tegra_audio_graph_card.c - Audio Graph based Tegra Machine Driver
-//
-// Copyright (c) 2020-2021 NVIDIA CORPORATION. All rights reserved.
#include <linux/math64.h>
#include <linux/module.h>
@@ -232,11 +231,22 @@ static const struct tegra_audio_cdata tegra186_data = {
.plla_out0_rates[x11_RATE] = 45158400,
};
+static const struct tegra_audio_cdata tegra264_data = {
+ /* PLLA1 */
+ .plla_rates[x8_RATE] = 983040000,
+ .plla_rates[x11_RATE] = 993484800,
+ /* PLLA1_OUT1 */
+ .plla_out0_rates[x8_RATE] = 49152000,
+ .plla_out0_rates[x11_RATE] = 45158400,
+};
+
static const struct of_device_id graph_of_tegra_match[] = {
{ .compatible = "nvidia,tegra210-audio-graph-card",
.data = &tegra210_data },
{ .compatible = "nvidia,tegra186-audio-graph-card",
.data = &tegra186_data },
+ { .compatible = "nvidia,tegra264-audio-graph-card",
+ .data = &tegra264_data },
{},
};
MODULE_DEVICE_TABLE(of, graph_of_tegra_match);
diff --git a/sound/soc/tegra/tegra_cif.h b/sound/soc/tegra/tegra_cif.h
index 7cca8068f4b5..916aa10d8af8 100644
--- a/sound/soc/tegra/tegra_cif.h
+++ b/sound/soc/tegra/tegra_cif.h
@@ -1,8 +1,7 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tegra_cif.h - TEGRA Audio CIF Programming
+/* SPDX-License-Identifier: GPL-2.0-only
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION. All rights reserved.
*
- * Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
+ * tegra_cif.h - TEGRA Audio CIF Programming
*
*/
@@ -22,6 +21,10 @@
#define TEGRA_ACIF_CTRL_TRUNCATE_SHIFT 1
#define TEGRA_ACIF_CTRL_MONO_CONV_SHIFT 0
+#define TEGRA264_ACIF_CTRL_AUDIO_BITS_SHIFT 11
+#define TEGRA264_ACIF_CTRL_CLIENT_CH_SHIFT 14
+#define TEGRA264_ACIF_CTRL_AUDIO_CH_SHIFT 19
+
/* AUDIO/CLIENT_BITS values */
#define TEGRA_ACIF_BITS_8 1
#define TEGRA_ACIF_BITS_16 3
@@ -62,4 +65,23 @@ static inline void tegra_set_cif(struct regmap *regmap, unsigned int reg,
regmap_update_bits(regmap, reg, TEGRA_ACIF_UPDATE_MASK, value);
}
+static inline void tegra264_set_cif(struct regmap *regmap, unsigned int reg,
+ struct tegra_cif_conf *conf)
+{
+ unsigned int value;
+
+ value = (conf->threshold << TEGRA_ACIF_CTRL_FIFO_TH_SHIFT) |
+ ((conf->audio_ch - 1) << TEGRA264_ACIF_CTRL_AUDIO_CH_SHIFT) |
+ ((conf->client_ch - 1) << TEGRA264_ACIF_CTRL_CLIENT_CH_SHIFT) |
+ (conf->audio_bits << TEGRA264_ACIF_CTRL_AUDIO_BITS_SHIFT) |
+ (conf->client_bits << TEGRA_ACIF_CTRL_CLIENT_BITS_SHIFT) |
+ (conf->expand << TEGRA_ACIF_CTRL_EXPAND_SHIFT) |
+ (conf->stereo_conv << TEGRA_ACIF_CTRL_STEREO_CONV_SHIFT) |
+ (conf->replicate << TEGRA_ACIF_CTRL_REPLICATE_SHIFT) |
+ (conf->truncate << TEGRA_ACIF_CTRL_TRUNCATE_SHIFT) |
+ (conf->mono_conv << TEGRA_ACIF_CTRL_MONO_CONV_SHIFT);
+
+ regmap_update_bits(regmap, reg, TEGRA_ACIF_UPDATE_MASK, value);
+}
+
#endif
diff --git a/sound/soc/tegra/tegra_isomgr_bw.c b/sound/soc/tegra/tegra_isomgr_bw.c
index 18e802bca6a6..fa979960bc09 100644
--- a/sound/soc/tegra/tegra_isomgr_bw.c
+++ b/sound/soc/tegra/tegra_isomgr_bw.c
@@ -11,8 +11,8 @@
#include "tegra_isomgr_bw.h"
#include "tegra210_admaif.h"
-/* Max possible rate is 192KHz x 16channel x 4bytes */
-#define MAX_BW_PER_DEV 12288
+#define MAX_SAMPLE_RATE 192 /* KHz*/
+#define MAX_BYTES_PER_SAMPLE 4
int tegra_isomgr_adma_setbw(struct snd_pcm_substream *substream,
struct snd_soc_dai *dai, bool is_running)
@@ -98,7 +98,8 @@ int tegra_isomgr_adma_register(struct device *dev)
}
adma_isomgr->max_pcm_device = admaif->soc_data->num_ch;
- adma_isomgr->max_bw = STREAM_TYPE * MAX_BW_PER_DEV * adma_isomgr->max_pcm_device;
+ adma_isomgr->max_bw = STREAM_TYPE * MAX_SAMPLE_RATE * MAX_BYTES_PER_SAMPLE *
+ admaif->soc_data->max_stream_ch * adma_isomgr->max_pcm_device;
for (i = 0; i < STREAM_TYPE; i++) {
adma_isomgr->bw_per_dev[i] = devm_kzalloc(dev, adma_isomgr->max_pcm_device *
diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c
index a0b8cca31cba..caf1887cc9d1 100644
--- a/sound/soc/ti/davinci-mcasp.c
+++ b/sound/soc/ti/davinci-mcasp.c
@@ -2157,8 +2157,8 @@ static int davinci_mcasp_gpio_direction_out(struct gpio_chip *chip,
return 0;
}
-static void davinci_mcasp_gpio_set(struct gpio_chip *chip, unsigned offset,
- int value)
+static int davinci_mcasp_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct davinci_mcasp *mcasp = gpiochip_get_data(chip);
@@ -2166,6 +2166,8 @@ static void davinci_mcasp_gpio_set(struct gpio_chip *chip, unsigned offset,
mcasp_set_bits(mcasp, DAVINCI_MCASP_PDOUT_REG, BIT(offset));
else
mcasp_clr_bits(mcasp, DAVINCI_MCASP_PDOUT_REG, BIT(offset));
+
+ return 0;
}
static int davinci_mcasp_gpio_direction_in(struct gpio_chip *chip,
@@ -2216,7 +2218,7 @@ static const struct gpio_chip davinci_mcasp_template_chip = {
.request = davinci_mcasp_gpio_request,
.free = davinci_mcasp_gpio_free,
.direction_output = davinci_mcasp_gpio_direction_out,
- .set = davinci_mcasp_gpio_set,
+ .set_rv = davinci_mcasp_gpio_set,
.direction_input = davinci_mcasp_gpio_direction_in,
.get = davinci_mcasp_gpio_get,
.get_direction = davinci_mcasp_gpio_get_direction,
diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c
index 9bdf3db51d62..e73d3b262f57 100644
--- a/sound/sparc/amd7930.c
+++ b/sound/sparc/amd7930.c
@@ -39,6 +39,7 @@
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/io.h>
+#include <linux/string.h>
#include <sound/core.h>
#include <sound/pcm.h>
@@ -754,7 +755,7 @@ static int snd_amd7930_pcm(struct snd_amd7930 *amd)
pcm->private_data = amd;
pcm->info_flags = 0;
- strcpy(pcm->name, amd->card->shortname);
+ strscpy(pcm->name, amd->card->shortname);
amd->pcm = pcm;
snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_CONTINUOUS,
@@ -874,7 +875,7 @@ static int snd_amd7930_mixer(struct snd_amd7930 *amd)
return -EINVAL;
card = amd->card;
- strcpy(card->mixername, card->shortname);
+ strscpy(card->mixername, card->shortname);
for (idx = 0; idx < ARRAY_SIZE(amd7930_controls); idx++) {
if ((err = snd_ctl_add(card,
@@ -1007,8 +1008,8 @@ static int amd7930_sbus_probe(struct platform_device *op)
if (err < 0)
return err;
- strcpy(card->driver, "AMD7930");
- strcpy(card->shortname, "Sun AMD7930");
+ strscpy(card->driver, "AMD7930");
+ strscpy(card->shortname, "Sun AMD7930");
sprintf(card->longname, "%s at 0x%02lx:0x%08Lx, irq %d",
card->shortname,
rp->flags & 0xffL,
diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 69f1c9e37f4b..93cbe158009f 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -60,6 +60,7 @@
#include <linux/io.h>
#include <linux/dma-mapping.h>
#include <linux/gfp.h>
+#include <linux/string.h>
#include <sound/core.h>
#include <sound/pcm.h>
@@ -2239,7 +2240,7 @@ static int snd_dbri_pcm(struct snd_card *card)
pcm->private_data = card->private_data;
pcm->info_flags = 0;
- strcpy(pcm->name, card->shortname);
+ strscpy(pcm->name, card->shortname);
snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_CONTINUOUS,
NULL, 64 * 1024, 64 * 1024);
@@ -2446,7 +2447,7 @@ static int snd_dbri_mixer(struct snd_card *card)
return -EINVAL;
dbri = card->private_data;
- strcpy(card->mixername, card->shortname);
+ strscpy(card->mixername, card->shortname);
for (idx = 0; idx < ARRAY_SIZE(dbri_controls); idx++) {
err = snd_ctl_add(card,
@@ -2613,8 +2614,8 @@ static int dbri_probe(struct platform_device *op)
if (err < 0)
return err;
- strcpy(card->driver, "DBRI");
- strcpy(card->shortname, "Sun DBRI");
+ strscpy(card->driver, "DBRI");
+ strscpy(card->shortname, "Sun DBRI");
rp = &op->resource[0];
sprintf(card->longname, "%s at 0x%02lx:0x%016llx, irq %d",
card->shortname,
diff --git a/sound/usb/fcp.c b/sound/usb/fcp.c
index 7df65041ace5..98f9964311a7 100644
--- a/sound/usb/fcp.c
+++ b/sound/usb/fcp.c
@@ -1092,8 +1092,7 @@ static int fcp_find_fc_interface(struct usb_mixer_interface *mixer)
epd = get_endpoint(intf->altsetting, 0);
private->bInterfaceNumber = desc->bInterfaceNumber;
- private->bEndpointAddress = epd->bEndpointAddress &
- USB_ENDPOINT_NUMBER_MASK;
+ private->bEndpointAddress = usb_endpoint_num(epd);
private->wMaxPacketSize = le16_to_cpu(epd->wMaxPacketSize);
private->bInterval = epd->bInterval;
return 0;
diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index cfed000f243a..3a8a977ed359 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -1530,6 +1530,7 @@ static void snd_usbmidi_free(struct snd_usb_midi *umidi)
snd_usbmidi_in_endpoint_delete(ep->in);
}
mutex_destroy(&umidi->mutex);
+ timer_shutdown_sync(&umidi->error_timer);
kfree(umidi);
}
@@ -1553,7 +1554,7 @@ void snd_usbmidi_disconnect(struct list_head *p)
spin_unlock_irq(&umidi->disc_lock);
up_write(&umidi->disc_rwsem);
- timer_delete_sync(&umidi->error_timer);
+ timer_shutdown_sync(&umidi->error_timer);
for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) {
struct snd_usb_midi_endpoint *ep = &umidi->endpoints[i];
@@ -2088,7 +2089,7 @@ static int roland_load_get(struct snd_kcontrol *kcontrol,
static int roland_load_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *value)
{
- struct snd_usb_midi *umidi = kcontrol->private_data;
+ struct snd_usb_midi *umidi = snd_kcontrol_chip(kcontrol);
int changed;
if (value->value.enumerated.item[0] > 1)
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 66976be06bc0..11be79af26db 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -547,7 +547,7 @@ int snd_usb_set_cur_mix_value(struct usb_mixer_elem_info *cval, int channel,
int snd_usb_mixer_vol_tlv(struct snd_kcontrol *kcontrol, int op_flag,
unsigned int size, unsigned int __user *_tlv)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
DECLARE_TLV_DB_MINMAX(scale, 0, 0);
if (size < sizeof(scale))
@@ -1367,7 +1367,7 @@ static int get_max_exposed(struct usb_mixer_elem_info *cval)
static int mixer_ctl_feature_info(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_info *uinfo)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
if (cval->val_type == USB_MIXER_BOOLEAN ||
cval->val_type == USB_MIXER_INV_BOOLEAN)
@@ -1399,7 +1399,7 @@ static int mixer_ctl_feature_info(struct snd_kcontrol *kcontrol,
static int mixer_ctl_feature_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
int c, cnt, val, err;
ucontrol->value.integer.value[0] = cval->min;
@@ -1431,7 +1431,7 @@ static int mixer_ctl_feature_get(struct snd_kcontrol *kcontrol,
static int mixer_ctl_feature_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
int max_val = get_max_exposed(cval);
int c, cnt, val, oval, err;
int changed = 0;
@@ -1475,7 +1475,7 @@ static int mixer_ctl_feature_put(struct snd_kcontrol *kcontrol,
static int mixer_ctl_master_bool_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
int val, err;
err = snd_usb_get_cur_mix_value(cval, 0, 0, &val);
@@ -1543,7 +1543,7 @@ error:
static int mixer_ctl_connector_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
int ret, val;
ret = get_connector_value(cval, kcontrol->id.name, &val);
@@ -2302,7 +2302,7 @@ static int parse_audio_mixer_unit(struct mixer_build *state, int unitid,
static int mixer_ctl_procunit_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
int err, val;
err = get_cur_ctl_value(cval, cval->control << 8, &val);
@@ -2319,7 +2319,7 @@ static int mixer_ctl_procunit_get(struct snd_kcontrol *kcontrol,
static int mixer_ctl_procunit_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
int val, oval, err;
err = get_cur_ctl_value(cval, cval->control << 8, &oval);
@@ -2654,7 +2654,7 @@ static int parse_audio_extension_unit(struct mixer_build *state, int unitid,
static int mixer_ctl_selector_info(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_info *uinfo)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
const char **itemlist = (const char **)kcontrol->private_value;
if (snd_BUG_ON(!itemlist))
@@ -2666,7 +2666,7 @@ static int mixer_ctl_selector_info(struct snd_kcontrol *kcontrol,
static int mixer_ctl_selector_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
int val, err;
err = get_cur_ctl_value(cval, cval->control << 8, &val);
@@ -2683,7 +2683,7 @@ static int mixer_ctl_selector_get(struct snd_kcontrol *kcontrol,
static int mixer_ctl_selector_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
int val, oval, err;
err = get_cur_ctl_value(cval, cval->control << 8, &oval);
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index a90673d43822..aad205df93b2 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -2000,7 +2000,7 @@ static int realtek_hda_get(struct snd_usb_audio *chip, u32 cmd, u32 *value)
static int realtek_ctl_connector_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *cval = kcontrol->private_data;
+ struct usb_mixer_elem_info *cval = snd_kcontrol_chip(kcontrol);
struct snd_usb_audio *chip = cval->head.mixer->chip;
u32 pv = kcontrol->private_value;
u32 node_id = pv & 0xff;
@@ -3525,10 +3525,13 @@ static int snd_rme_digiface_controls_create(struct usb_mixer_interface *mixer)
#define SND_DJM_CAP_RECOUT 0x0a
#define SND_DJM_CAP_RECOUT_NOMIC 0x0e
#define SND_DJM_CAP_NONE 0x0f
+#define SND_DJM_CAP_FXSEND 0x10
#define SND_DJM_CAP_CH1PFADER 0x11
#define SND_DJM_CAP_CH2PFADER 0x12
#define SND_DJM_CAP_CH3PFADER 0x13
#define SND_DJM_CAP_CH4PFADER 0x14
+#define SND_DJM_CAP_EXT1SEND 0x21
+#define SND_DJM_CAP_EXT2SEND 0x22
#define SND_DJM_CAP_CH1PREFADER 0x31
#define SND_DJM_CAP_CH2PREFADER 0x32
#define SND_DJM_CAP_CH3PREFADER 0x33
@@ -3559,6 +3562,7 @@ static int snd_rme_digiface_controls_create(struct usb_mixer_interface *mixer)
#define SND_DJM_750MK2_IDX 0x4
#define SND_DJM_450_IDX 0x5
#define SND_DJM_A9_IDX 0x6
+#define SND_DJM_V10_IDX 0x7
#define SND_DJM_CTL(_name, suffix, _default_value, _windex) { \
@@ -3598,8 +3602,8 @@ static const char *snd_djm_get_label_caplevel_common(u16 wvalue)
}
};
-// The DJM-A9 has different capture levels than other, older models
-static const char *snd_djm_get_label_caplevel_a9(u16 wvalue)
+// Models like DJM-A9 or DJM-V10 have different capture levels than others
+static const char *snd_djm_get_label_caplevel_high(u16 wvalue)
{
switch (wvalue) {
case 0x0000: return "+15dB";
@@ -3627,6 +3631,7 @@ static const char *snd_djm_get_label_cap_common(u16 wvalue)
case SND_DJM_CAP_RECOUT_NOMIC: return "Rec Out without Mic";
case SND_DJM_CAP_AUX: return "Aux";
case SND_DJM_CAP_NONE: return "None";
+ case SND_DJM_CAP_FXSEND: return "FX SEND";
case SND_DJM_CAP_CH1PREFADER: return "Pre Fader Ch1";
case SND_DJM_CAP_CH2PREFADER: return "Pre Fader Ch2";
case SND_DJM_CAP_CH3PREFADER: return "Pre Fader Ch3";
@@ -3635,6 +3640,8 @@ static const char *snd_djm_get_label_cap_common(u16 wvalue)
case SND_DJM_CAP_CH2PFADER: return "Post Fader Ch2";
case SND_DJM_CAP_CH3PFADER: return "Post Fader Ch3";
case SND_DJM_CAP_CH4PFADER: return "Post Fader Ch4";
+ case SND_DJM_CAP_EXT1SEND: return "EXT1 SEND";
+ case SND_DJM_CAP_EXT2SEND: return "EXT2 SEND";
default: return NULL;
}
};
@@ -3653,7 +3660,8 @@ static const char *snd_djm_get_label_cap_850(u16 wvalue)
static const char *snd_djm_get_label_caplevel(u8 device_idx, u16 wvalue)
{
switch (device_idx) {
- case SND_DJM_A9_IDX: return snd_djm_get_label_caplevel_a9(wvalue);
+ case SND_DJM_A9_IDX: return snd_djm_get_label_caplevel_high(wvalue);
+ case SND_DJM_V10_IDX: return snd_djm_get_label_caplevel_high(wvalue);
default: return snd_djm_get_label_caplevel_common(wvalue);
}
};
@@ -3705,13 +3713,13 @@ static const u16 snd_djm_opts_250mk2_pb2[] = { 0x0200, 0x0201, 0x0204 };
static const u16 snd_djm_opts_250mk2_pb3[] = { 0x0300, 0x0301, 0x0304 };
static const struct snd_djm_ctl snd_djm_ctls_250mk2[] = {
- SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
- SND_DJM_CTL("Ch1 Input", 250mk2_cap1, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch2 Input", 250mk2_cap2, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch3 Input", 250mk2_cap3, 0, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch1 Output", 250mk2_pb1, 0, SND_DJM_WINDEX_PB),
- SND_DJM_CTL("Ch2 Output", 250mk2_pb2, 1, SND_DJM_WINDEX_PB),
- SND_DJM_CTL("Ch3 Output", 250mk2_pb3, 2, SND_DJM_WINDEX_PB)
+ SND_DJM_CTL("Master Input Level Capture Switch", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
+ SND_DJM_CTL("Input 1 Capture Switch", 250mk2_cap1, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 2 Capture Switch", 250mk2_cap2, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 3 Capture Switch", 250mk2_cap3, 0, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Output 1 Playback Switch", 250mk2_pb1, 0, SND_DJM_WINDEX_PB),
+ SND_DJM_CTL("Output 2 Playback Switch", 250mk2_pb2, 1, SND_DJM_WINDEX_PB),
+ SND_DJM_CTL("Output 3 Playback Switch", 250mk2_pb3, 2, SND_DJM_WINDEX_PB)
};
@@ -3730,13 +3738,13 @@ static const u16 snd_djm_opts_450_pb2[] = { 0x0200, 0x0201, 0x0204 };
static const u16 snd_djm_opts_450_pb3[] = { 0x0300, 0x0301, 0x0304 };
static const struct snd_djm_ctl snd_djm_ctls_450[] = {
- SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
- SND_DJM_CTL("Ch1 Input", 450_cap1, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch2 Input", 450_cap2, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch3 Input", 450_cap3, 0, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch1 Output", 450_pb1, 0, SND_DJM_WINDEX_PB),
- SND_DJM_CTL("Ch2 Output", 450_pb2, 1, SND_DJM_WINDEX_PB),
- SND_DJM_CTL("Ch3 Output", 450_pb3, 2, SND_DJM_WINDEX_PB)
+ SND_DJM_CTL("Master Input Level Capture Switch", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
+ SND_DJM_CTL("Input 1 Capture Switch", 450_cap1, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 2 Capture Switch", 450_cap2, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 3 Capture Switch", 450_cap3, 0, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Output 1 Playback Switch", 450_pb1, 0, SND_DJM_WINDEX_PB),
+ SND_DJM_CTL("Output 2 Playback Switch", 450_pb2, 1, SND_DJM_WINDEX_PB),
+ SND_DJM_CTL("Output 3 Playback Switch", 450_pb3, 2, SND_DJM_WINDEX_PB)
};
@@ -3751,11 +3759,11 @@ static const u16 snd_djm_opts_750_cap4[] = {
0x0401, 0x0403, 0x0406, 0x0407, 0x0408, 0x0409, 0x040a, 0x040f };
static const struct snd_djm_ctl snd_djm_ctls_750[] = {
- SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
- SND_DJM_CTL("Ch1 Input", 750_cap1, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch2 Input", 750_cap2, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch3 Input", 750_cap3, 0, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch4 Input", 750_cap4, 0, SND_DJM_WINDEX_CAP)
+ SND_DJM_CTL("Master Input Level Capture Switch", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
+ SND_DJM_CTL("Input 1 Capture Switch", 750_cap1, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 2 Capture Switch", 750_cap2, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 3 Capture Switch", 750_cap3, 0, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 4 Capture Switch", 750_cap4, 0, SND_DJM_WINDEX_CAP)
};
@@ -3770,11 +3778,11 @@ static const u16 snd_djm_opts_850_cap4[] = {
0x0400, 0x0403, 0x0406, 0x0407, 0x0408, 0x0409, 0x040a, 0x040f };
static const struct snd_djm_ctl snd_djm_ctls_850[] = {
- SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
- SND_DJM_CTL("Ch1 Input", 850_cap1, 1, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch2 Input", 850_cap2, 0, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch3 Input", 850_cap3, 0, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch4 Input", 850_cap4, 1, SND_DJM_WINDEX_CAP)
+ SND_DJM_CTL("Master Input Level Capture Switch", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
+ SND_DJM_CTL("Input 1 Capture Switch", 850_cap1, 1, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 2 Capture Switch", 850_cap2, 0, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 3 Capture Switch", 850_cap3, 0, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 4 Capture Switch", 850_cap4, 1, SND_DJM_WINDEX_CAP)
};
@@ -3791,12 +3799,12 @@ static const u16 snd_djm_opts_900nxs2_cap5[] = {
0x0507, 0x0508, 0x0509, 0x050a, 0x0511, 0x0512, 0x0513, 0x0514 };
static const struct snd_djm_ctl snd_djm_ctls_900nxs2[] = {
- SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
- SND_DJM_CTL("Ch1 Input", 900nxs2_cap1, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch2 Input", 900nxs2_cap2, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch3 Input", 900nxs2_cap3, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch4 Input", 900nxs2_cap4, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch5 Input", 900nxs2_cap5, 3, SND_DJM_WINDEX_CAP)
+ SND_DJM_CTL("Master Input Level Capture Switch", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
+ SND_DJM_CTL("Input 1 Capture Switch", 900nxs2_cap1, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 2 Capture Switch", 900nxs2_cap2, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 3 Capture Switch", 900nxs2_cap3, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 4 Capture Switch", 900nxs2_cap4, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 5 Capture Switch", 900nxs2_cap5, 3, SND_DJM_WINDEX_CAP)
};
// DJM-750MK2
@@ -3817,15 +3825,15 @@ static const u16 snd_djm_opts_750mk2_pb3[] = { 0x0300, 0x0301, 0x0304 };
static const struct snd_djm_ctl snd_djm_ctls_750mk2[] = {
- SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
- SND_DJM_CTL("Ch1 Input", 750mk2_cap1, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch2 Input", 750mk2_cap2, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch3 Input", 750mk2_cap3, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch4 Input", 750mk2_cap4, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch5 Input", 750mk2_cap5, 3, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch1 Output", 750mk2_pb1, 0, SND_DJM_WINDEX_PB),
- SND_DJM_CTL("Ch2 Output", 750mk2_pb2, 1, SND_DJM_WINDEX_PB),
- SND_DJM_CTL("Ch3 Output", 750mk2_pb3, 2, SND_DJM_WINDEX_PB)
+ SND_DJM_CTL("Master Input Level Capture Switch", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
+ SND_DJM_CTL("Input 1 Capture Switch", 750mk2_cap1, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 2 Capture Switch", 750mk2_cap2, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 3 Capture Switch", 750mk2_cap3, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 4 Capture Switch", 750mk2_cap4, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 5 Capture Switch", 750mk2_cap5, 3, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Output 1 Playback Switch", 750mk2_pb1, 0, SND_DJM_WINDEX_PB),
+ SND_DJM_CTL("Output 2 Playback Switch", 750mk2_pb2, 1, SND_DJM_WINDEX_PB),
+ SND_DJM_CTL("Output 3 Playback Switch", 750mk2_pb3, 2, SND_DJM_WINDEX_PB)
};
@@ -3845,12 +3853,56 @@ static const u16 snd_djm_opts_a9_cap5[] = {
0x0501, 0x0502, 0x0503, 0x0505, 0x0506, 0x0507, 0x0508, 0x0509, 0x050a, 0x050e };
static const struct snd_djm_ctl snd_djm_ctls_a9[] = {
- SND_DJM_CTL("Capture Level", a9_cap_level, 0, SND_DJM_WINDEX_CAPLVL),
- SND_DJM_CTL("Master Input", a9_cap1, 3, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch1 Input", a9_cap2, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch2 Input", a9_cap3, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch3 Input", a9_cap4, 2, SND_DJM_WINDEX_CAP),
- SND_DJM_CTL("Ch4 Input", a9_cap5, 2, SND_DJM_WINDEX_CAP)
+ SND_DJM_CTL("Master Input Level Capture Switch", a9_cap_level, 0, SND_DJM_WINDEX_CAPLVL),
+ SND_DJM_CTL("Master Input Capture Switch", a9_cap1, 3, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 1 Capture Switch", a9_cap2, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 2 Capture Switch", a9_cap3, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 3 Capture Switch", a9_cap4, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 4 Capture Switch", a9_cap5, 2, SND_DJM_WINDEX_CAP)
+};
+
+// DJM-V10
+static const u16 snd_djm_opts_v10_cap_level[] = {
+ 0x0000, 0x0100, 0x0200, 0x0300, 0x0400, 0x0500
+};
+static const u16 snd_djm_opts_v10_cap1[] = {
+ 0x0103,
+ 0x0100, 0x0102, 0x0106, 0x0110, 0x0107,
+ 0x0108, 0x0109, 0x010a, 0x0121, 0x0122
+};
+static const u16 snd_djm_opts_v10_cap2[] = {
+ 0x0200, 0x0202, 0x0206, 0x0210, 0x0207,
+ 0x0208, 0x0209, 0x020a, 0x0221, 0x0222
+};
+static const u16 snd_djm_opts_v10_cap3[] = {
+ 0x0303,
+ 0x0300, 0x0302, 0x0306, 0x0310, 0x0307,
+ 0x0308, 0x0309, 0x030a, 0x0321, 0x0322
+};
+static const u16 snd_djm_opts_v10_cap4[] = {
+ 0x0403,
+ 0x0400, 0x0402, 0x0406, 0x0410, 0x0407,
+ 0x0408, 0x0409, 0x040a, 0x0421, 0x0422
+};
+static const u16 snd_djm_opts_v10_cap5[] = {
+ 0x0500, 0x0502, 0x0506, 0x0510, 0x0507,
+ 0x0508, 0x0509, 0x050a, 0x0521, 0x0522
+};
+static const u16 snd_djm_opts_v10_cap6[] = {
+ 0x0603,
+ 0x0600, 0x0602, 0x0606, 0x0610, 0x0607,
+ 0x0608, 0x0609, 0x060a, 0x0621, 0x0622
+};
+
+static const struct snd_djm_ctl snd_djm_ctls_v10[] = {
+ SND_DJM_CTL("Master Input Level Capture Switch", v10_cap_level, 0, SND_DJM_WINDEX_CAPLVL),
+ SND_DJM_CTL("Input 1 Capture Switch", v10_cap1, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 2 Capture Switch", v10_cap2, 2, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 3 Capture Switch", v10_cap3, 0, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 4 Capture Switch", v10_cap4, 0, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 5 Capture Switch", v10_cap5, 0, SND_DJM_WINDEX_CAP),
+ SND_DJM_CTL("Input 6 Capture Switch", v10_cap6, 0, SND_DJM_WINDEX_CAP)
+ // playback channels are fixed and controlled by hardware knobs on the mixer
};
static const struct snd_djm_device snd_djm_devices[] = {
@@ -3861,6 +3913,7 @@ static const struct snd_djm_device snd_djm_devices[] = {
[SND_DJM_750MK2_IDX] = SND_DJM_DEVICE(750mk2),
[SND_DJM_450_IDX] = SND_DJM_DEVICE(450),
[SND_DJM_A9_IDX] = SND_DJM_DEVICE(a9),
+ [SND_DJM_V10_IDX] = SND_DJM_DEVICE(v10),
};
@@ -4151,6 +4204,9 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer)
case USB_ID(0x2b73, 0x003c): /* Pioneer DJ / AlphaTheta DJM-A9 */
err = snd_djm_controls_create(mixer, SND_DJM_A9_IDX);
break;
+ case USB_ID(0x2b73, 0x0034): /* Pioneer DJ DJM-V10 */
+ err = snd_djm_controls_create(mixer, SND_DJM_V10_IDX);
+ break;
}
return err;
diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c
index 288d22e6a0b2..93589e86828a 100644
--- a/sound/usb/mixer_scarlett2.c
+++ b/sound/usb/mixer_scarlett2.c
@@ -8574,8 +8574,7 @@ static int scarlett2_find_fc_interface(struct usb_device *dev,
epd = get_endpoint(intf->altsetting, 0);
private->bInterfaceNumber = desc->bInterfaceNumber;
- private->bEndpointAddress = epd->bEndpointAddress &
- USB_ENDPOINT_NUMBER_MASK;
+ private->bEndpointAddress = usb_endpoint_num(epd);
private->wMaxPacketSize = le16_to_cpu(epd->wMaxPacketSize);
private->bInterval = epd->bInterval;
return 0;
diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c
index 20ac32635f1f..236b69054867 100644
--- a/sound/usb/mixer_us16x08.c
+++ b/sound/usb/mixer_us16x08.c
@@ -180,7 +180,7 @@ static int snd_us16x08_route_info(struct snd_kcontrol *kcontrol,
static int snd_us16x08_route_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
int index = ucontrol->id.index;
/* route has no bias */
@@ -192,7 +192,7 @@ static int snd_us16x08_route_get(struct snd_kcontrol *kcontrol,
static int snd_us16x08_route_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_usb_audio *chip = elem->head.mixer->chip;
int index = ucontrol->id.index;
char buf[sizeof(route_msg)];
@@ -249,7 +249,7 @@ static int snd_us16x08_master_info(struct snd_kcontrol *kcontrol,
static int snd_us16x08_master_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
int index = ucontrol->id.index;
ucontrol->value.integer.value[0] = elem->cache_val[index];
@@ -260,7 +260,7 @@ static int snd_us16x08_master_get(struct snd_kcontrol *kcontrol,
static int snd_us16x08_master_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_usb_audio *chip = elem->head.mixer->chip;
char buf[sizeof(mix_msg_out)];
int val, err;
@@ -297,7 +297,7 @@ static int snd_us16x08_master_put(struct snd_kcontrol *kcontrol,
static int snd_us16x08_bus_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_usb_audio *chip = elem->head.mixer->chip;
char buf[sizeof(mix_msg_out)];
int val, err = 0;
@@ -338,7 +338,7 @@ static int snd_us16x08_bus_put(struct snd_kcontrol *kcontrol,
static int snd_us16x08_bus_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
switch (elem->head.id) {
case SND_US16X08_ID_BUSS_OUT:
@@ -359,7 +359,7 @@ static int snd_us16x08_bus_get(struct snd_kcontrol *kcontrol,
static int snd_us16x08_channel_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
int index = ucontrol->id.index;
ucontrol->value.integer.value[0] = elem->cache_val[index];
@@ -370,7 +370,7 @@ static int snd_us16x08_channel_get(struct snd_kcontrol *kcontrol,
static int snd_us16x08_channel_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_usb_audio *chip = elem->head.mixer->chip;
char buf[sizeof(mix_msg_in)];
int val, err;
@@ -417,7 +417,7 @@ static int snd_us16x08_mix_info(struct snd_kcontrol *kcontrol,
static int snd_us16x08_comp_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_us16x08_comp_store *store = elem->private_data;
int index = ucontrol->id.index;
int val_idx = COMP_STORE_IDX(elem->head.id);
@@ -430,7 +430,7 @@ static int snd_us16x08_comp_get(struct snd_kcontrol *kcontrol,
static int snd_us16x08_comp_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_usb_audio *chip = elem->head.mixer->chip;
struct snd_us16x08_comp_store *store = elem->private_data;
int index = ucontrol->id.index;
@@ -485,7 +485,7 @@ static int snd_us16x08_eqswitch_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
int val;
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_us16x08_eq_store *store = elem->private_data;
int index = ucontrol->id.index;
@@ -500,7 +500,7 @@ static int snd_us16x08_eqswitch_get(struct snd_kcontrol *kcontrol,
static int snd_us16x08_eqswitch_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_usb_audio *chip = elem->head.mixer->chip;
struct snd_us16x08_eq_store *store = elem->private_data;
int index = ucontrol->id.index;
@@ -544,7 +544,7 @@ static int snd_us16x08_eq_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
int val;
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_us16x08_eq_store *store = elem->private_data;
int index = ucontrol->id.index;
int b_idx = EQ_STORE_BAND_IDX(elem->head.id) - 1;
@@ -560,7 +560,7 @@ static int snd_us16x08_eq_get(struct snd_kcontrol *kcontrol,
static int snd_us16x08_eq_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_usb_audio *chip = elem->head.mixer->chip;
struct snd_us16x08_eq_store *store = elem->private_data;
int index = ucontrol->id.index;
@@ -684,7 +684,7 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
int i, set;
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_usb_audio *chip = elem->head.mixer->chip;
struct snd_us16x08_meter_store *store = elem->private_data;
u8 meter_urb[64] = {0};
@@ -744,7 +744,7 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol,
static int snd_us16x08_meter_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct usb_mixer_elem_info *elem = kcontrol->private_data;
+ struct usb_mixer_elem_info *elem = snd_kcontrol_chip(kcontrol);
struct snd_us16x08_meter_store *store = elem->private_data;
int val;
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 8954be23325c..eafc0d73cca1 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -3187,6 +3187,57 @@ YAMAHA_DEVICE(0x7010, "UB99"),
}
}
},
+{
+ /*
+ * Pioneer DJ DJM-V10
+ */
+ USB_DEVICE_VENDOR_SPEC(0x2b73, 0x0034),
+ QUIRK_DRIVER_INFO {
+ QUIRK_DATA_COMPOSITE {
+ {
+ QUIRK_DATA_AUDIOFORMAT(0) {
+ .formats = SNDRV_PCM_FMTBIT_S24_3LE,
+ .channels = 12, // outputs
+ .iface = 0,
+ .altsetting = 1,
+ .altset_idx = 1,
+ .endpoint = 0x01,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC|
+ USB_ENDPOINT_SYNC_ASYNC,
+ .rates = SNDRV_PCM_RATE_44100|
+ SNDRV_PCM_RATE_48000|
+ SNDRV_PCM_RATE_96000,
+ .rate_min = 44100,
+ .rate_max = 96000,
+ .nr_rates = 3,
+ .rate_table = (unsigned int[]) { 44100, 48000, 96000 }
+ }
+ },
+ {
+ QUIRK_DATA_AUDIOFORMAT(0) {
+ .formats = SNDRV_PCM_FMTBIT_S24_3LE,
+ .channels = 12, // inputs
+ .iface = 0,
+ .altsetting = 1,
+ .altset_idx = 1,
+ .endpoint = 0x82,
+ .ep_idx = 1,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC|
+ USB_ENDPOINT_SYNC_ASYNC|
+ USB_ENDPOINT_USAGE_IMPLICIT_FB,
+ .rates = SNDRV_PCM_RATE_44100|
+ SNDRV_PCM_RATE_48000|
+ SNDRV_PCM_RATE_96000,
+ .rate_min = 44100,
+ .rate_max = 96000,
+ .nr_rates = 3,
+ .rate_table = (unsigned int[]) { 44100, 48000, 96000 }
+ }
+ },
+ QUIRK_COMPOSITE_END
+ }
+ }
+},
/*
* MacroSilicon MS2100/MS2106 based AV capture cards
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 9112313a9dbc..5ed523b13fad 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1872,6 +1872,7 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs,
break;
case USB_ID(0x2b73, 0x000a): /* Pioneer DJM-900NXS2 */
case USB_ID(0x2b73, 0x0013): /* Pioneer DJM-450 */
+ case USB_ID(0x2b73, 0x0034): /* Pioneer DJM-V10 */
pioneer_djm_set_format_quirk(subs, 0x0082);
break;
case USB_ID(0x08e4, 0x017f): /* Pioneer DJM-750 */
@@ -2242,6 +2243,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_CTL_MSG_DELAY_1M),
DEVICE_FLG(0x0c45, 0x6340, /* Sonix HD USB Camera */
QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x0c45, 0x636b, /* Microdia JP001 USB Camera */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
DEVICE_FLG(0x0d8c, 0x0014, /* USB Audio Device */
QUIRK_FLAG_CTL_MSG_DELAY_1M),
DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */
@@ -2250,6 +2253,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_FIXED_RATE),
DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */
QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
+ DEVICE_FLG(0x1101, 0x0003, /* Audioengine D1 */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */
QUIRK_FLAG_GET_SAMPLE_RATE | QUIRK_FLAG_MIC_RES_16),
DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */
diff --git a/sound/virtio/virtio_kctl.c b/sound/virtio/virtio_kctl.c
index 7aa79c05b464..ffb903d56297 100644
--- a/sound/virtio/virtio_kctl.c
+++ b/sound/virtio/virtio_kctl.c
@@ -47,7 +47,7 @@ static const unsigned int g_v2a_mask_map[] = {
static int virtsnd_kctl_info(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_info *uinfo)
{
- struct virtio_snd *snd = kcontrol->private_data;
+ struct virtio_snd *snd = snd_kcontrol_chip(kcontrol);
struct virtio_kctl *kctl = &snd->kctls[kcontrol->private_value];
struct virtio_snd_ctl_info *kinfo =
&snd->kctl_infos[kcontrol->private_value];
@@ -102,7 +102,7 @@ static int virtsnd_kctl_info(struct snd_kcontrol *kcontrol,
static int virtsnd_kctl_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *uvalue)
{
- struct virtio_snd *snd = kcontrol->private_data;
+ struct virtio_snd *snd = snd_kcontrol_chip(kcontrol);
struct virtio_snd_ctl_info *kinfo =
&snd->kctl_infos[kcontrol->private_value];
unsigned int type = le32_to_cpu(kinfo->type);
@@ -175,7 +175,7 @@ on_failure:
static int virtsnd_kctl_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *uvalue)
{
- struct virtio_snd *snd = kcontrol->private_data;
+ struct virtio_snd *snd = snd_kcontrol_chip(kcontrol);
struct virtio_snd_ctl_info *kinfo =
&snd->kctl_infos[kcontrol->private_value];
unsigned int type = le32_to_cpu(kinfo->type);
@@ -239,7 +239,7 @@ static int virtsnd_kctl_put(struct snd_kcontrol *kcontrol,
static int virtsnd_kctl_tlv_op(struct snd_kcontrol *kcontrol, int op_flag,
unsigned int size, unsigned int __user *utlv)
{
- struct virtio_snd *snd = kcontrol->private_data;
+ struct virtio_snd *snd = snd_kcontrol_chip(kcontrol);
struct virtio_snd_msg *msg;
struct virtio_snd_ctl_hdr *hdr;
unsigned int *tlv;
diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c
index 7fcc528a0204..fe5cb4139088 100644
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
@@ -22,6 +22,7 @@
#include <linux/pm_runtime.h>
#include <linux/dma-mapping.h>
#include <linux/delay.h>
+#include <linux/string.h>
#include <sound/core.h>
#include <sound/asoundef.h>
#include <sound/pcm.h>
@@ -1698,9 +1699,9 @@ static int __hdmi_lpe_audio_probe(struct platform_device *pdev)
card_ctx = card->private_data;
card_ctx->dev = &pdev->dev;
card_ctx->card = card;
- strcpy(card->driver, INTEL_HAD);
- strcpy(card->shortname, "Intel HDMI/DP LPE Audio");
- strcpy(card->longname, "Intel HDMI/DP LPE Audio");
+ strscpy(card->driver, INTEL_HAD);
+ strscpy(card->shortname, "Intel HDMI/DP LPE Audio");
+ strscpy(card->longname, "Intel HDMI/DP LPE Audio");
card_ctx->irq = -1;
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd/ibs.h
index cb1740bc3da2..300b6e0765b2 100644
--- a/tools/arch/x86/include/asm/amd-ibs.h
+++ b/tools/arch/x86/include/asm/amd/ibs.h
@@ -4,7 +4,7 @@
* 55898 Rev 0.35 - Feb 5, 2021
*/
-#include "msr-index.h"
+#include "../msr-index.h"
/* IBS_OP_DATA2 DataSrc */
#define IBS_DATA_SRC_LOC_CACHE 2
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 6c2c152d8a67..bc81b9d1aeca 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -476,11 +476,11 @@
#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */
#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
-#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
-#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
-#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
-#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
-#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */
+#define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */
+#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */
+#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */
+#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
/*
* BUG word(s)
@@ -519,7 +519,7 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */
@@ -527,10 +527,10 @@
#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */
/* BUG word 2 */
-#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */
-#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */
-#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
-#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
-#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
-#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
+#define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */
+#define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */
+#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */
+#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h
index 253690eb3c26..183aa662b165 100644
--- a/tools/arch/x86/include/asm/inat.h
+++ b/tools/arch/x86/include/asm/inat.h
@@ -82,6 +82,7 @@
#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8))
#define INAT_REX2_VARIANT (1 << (INAT_FLAG_OFFS + 9))
#define INAT_EVEX_SCALABLE (1 << (INAT_FLAG_OFFS + 10))
+#define INAT_INV64 (1 << (INAT_FLAG_OFFS + 11))
/* Attribute making macros for attribute tables */
#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
@@ -242,4 +243,9 @@ static inline int inat_evex_scalable(insn_attr_t attr)
{
return attr & INAT_EVEX_SCALABLE;
}
+
+static inline int inat_is_invalid64(insn_attr_t attr)
+{
+ return attr & INAT_INV64;
+}
#endif
diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv
index d751eb8585d0..8d925ce9750f 100644
--- a/tools/arch/x86/kcpuid/cpuid.csv
+++ b/tools/arch/x86/kcpuid/cpuid.csv
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: CC0-1.0
-# Generator: x86-cpuid-db v1.0
+# Generator: x86-cpuid-db v2.4
#
# Auto-generated file.
@@ -12,297 +12,298 @@
# Leaf 0H
# Maximum standard leaf number + CPU vendor string
- 0, 0, eax, 31:0, max_std_leaf , Highest cpuid standard leaf supported
- 0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3
- 0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11
- 0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7
+ 0x0, 0, eax, 31:0, max_std_leaf , Highest standard CPUID leaf supported
+ 0x0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3
+ 0x0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11
+ 0x0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7
# Leaf 1H
# CPU FMS (Family/Model/Stepping) + standard feature flags
- 1, 0, eax, 3:0, stepping , Stepping ID
- 1, 0, eax, 7:4, base_model , Base CPU model ID
- 1, 0, eax, 11:8, base_family_id , Base CPU family ID
- 1, 0, eax, 13:12, cpu_type , CPU type
- 1, 0, eax, 19:16, ext_model , Extended CPU model ID
- 1, 0, eax, 27:20, ext_family , Extended CPU family ID
- 1, 0, ebx, 7:0, brand_id , Brand index
- 1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size
- 1, 0, ebx, 23:16, n_logical_cpu , Logical CPU (HW threads) count
- 1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID
- 1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3)
- 1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support
- 1, 0, ecx, 2, dtes64 , 64-bit DS save area
- 1, 0, ecx, 3, monitor , MONITOR/MWAIT support
- 1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store
- 1, 0, ecx, 5, vmx , Virtual Machine Extensions
- 1, 0, ecx, 6, smx , Safer Mode Extensions
- 1, 0, ecx, 7, est , Enhanced Intel SpeedStep
- 1, 0, ecx, 8, tm2 , Thermal Monitor 2
- 1, 0, ecx, 9, ssse3 , Supplemental SSE3
- 1, 0, ecx, 10, cid , L1 Context ID
- 1, 0, ecx, 11, sdbg , Sillicon Debug
- 1, 0, ecx, 12, fma , FMA extensions using YMM state
- 1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support
- 1, 0, ecx, 14, xtpr , xTPR Update Control
- 1, 0, ecx, 15, pdcm , Perfmon and Debug Capability
- 1, 0, ecx, 17, pcid , Process-context identifiers
- 1, 0, ecx, 18, dca , Direct Cache Access
- 1, 0, ecx, 19, sse4_1 , SSE4.1
- 1, 0, ecx, 20, sse4_2 , SSE4.2
- 1, 0, ecx, 21, x2apic , X2APIC support
- 1, 0, ecx, 22, movbe , MOVBE instruction support
- 1, 0, ecx, 23, popcnt , POPCNT instruction support
- 1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation
- 1, 0, ecx, 25, aes , AES instructions
- 1, 0, ecx, 26, xsave , XSAVE (and related instructions) support
- 1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS
- 1, 0, ecx, 28, avx , AVX instructions support
- 1, 0, ecx, 29, f16c , Half-precision floating-point conversion support
- 1, 0, ecx, 30, rdrand , RDRAND instruction support
- 1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system
- 1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87)
- 1, 0, edx, 1, vme , Virtual-8086 Mode Extensions
- 1, 0, edx, 2, de , Debugging Extensions
- 1, 0, edx, 3, pse , Page Size Extension
- 1, 0, edx, 4, tsc , Time Stamp Counter
- 1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support)
- 1, 0, edx, 6, pae , Physical Address Extensions
- 1, 0, edx, 7, mce , Machine Check Exception
- 1, 0, edx, 8, cx8 , CMPXCHG8B instruction
- 1, 0, edx, 9, apic , APIC on-chip
- 1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs
- 1, 0, edx, 12, mtrr , Memory Type Range Registers
- 1, 0, edx, 13, pge , Page Global Extensions
- 1, 0, edx, 14, mca , Machine Check Architecture
- 1, 0, edx, 15, cmov , Conditional Move Instruction
- 1, 0, edx, 16, pat , Page Attribute Table
- 1, 0, edx, 17, pse36 , Page Size Extension (36-bit)
- 1, 0, edx, 18, pn , Processor Serial Number
- 1, 0, edx, 19, clflush , CLFLUSH instruction
- 1, 0, edx, 21, dts , Debug Store
- 1, 0, edx, 22, acpi , Thermal monitor and clock control
- 1, 0, edx, 23, mmx , MMX instructions
- 1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions
- 1, 0, edx, 25, sse , SSE instructions
- 1, 0, edx, 26, sse2 , SSE2 instructions
- 1, 0, edx, 27, ss , Self Snoop
- 1, 0, edx, 28, ht , Hyper-threading
- 1, 0, edx, 29, tm , Thermal Monitor
- 1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now resreved
- 1, 0, edx, 31, pbe , Pending Break Enable
+ 0x1, 0, eax, 3:0, stepping , Stepping ID
+ 0x1, 0, eax, 7:4, base_model , Base CPU model ID
+ 0x1, 0, eax, 11:8, base_family_id , Base CPU family ID
+ 0x1, 0, eax, 13:12, cpu_type , CPU type
+ 0x1, 0, eax, 19:16, ext_model , Extended CPU model ID
+ 0x1, 0, eax, 27:20, ext_family , Extended CPU family ID
+ 0x1, 0, ebx, 7:0, brand_id , Brand index
+ 0x1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size
+ 0x1, 0, ebx, 23:16, n_logical_cpu , Logical CPU count
+ 0x1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID
+ 0x1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3)
+ 0x1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support
+ 0x1, 0, ecx, 2, dtes64 , 64-bit DS save area
+ 0x1, 0, ecx, 3, monitor , MONITOR/MWAIT support
+ 0x1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store
+ 0x1, 0, ecx, 5, vmx , Virtual Machine Extensions
+ 0x1, 0, ecx, 6, smx , Safer Mode Extensions
+ 0x1, 0, ecx, 7, est , Enhanced Intel SpeedStep
+ 0x1, 0, ecx, 8, tm2 , Thermal Monitor 2
+ 0x1, 0, ecx, 9, ssse3 , Supplemental SSE3
+ 0x1, 0, ecx, 10, cid , L1 Context ID
+ 0x1, 0, ecx, 11, sdbg , Silicon Debug
+ 0x1, 0, ecx, 12, fma , FMA extensions using YMM state
+ 0x1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support
+ 0x1, 0, ecx, 14, xtpr , xTPR Update Control
+ 0x1, 0, ecx, 15, pdcm , Perfmon and Debug Capability
+ 0x1, 0, ecx, 17, pcid , Process-context identifiers
+ 0x1, 0, ecx, 18, dca , Direct Cache Access
+ 0x1, 0, ecx, 19, sse4_1 , SSE4.1
+ 0x1, 0, ecx, 20, sse4_2 , SSE4.2
+ 0x1, 0, ecx, 21, x2apic , X2APIC support
+ 0x1, 0, ecx, 22, movbe , MOVBE instruction support
+ 0x1, 0, ecx, 23, popcnt , POPCNT instruction support
+ 0x1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation
+ 0x1, 0, ecx, 25, aes , AES instructions
+ 0x1, 0, ecx, 26, xsave , XSAVE (and related instructions) support
+ 0x1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS
+ 0x1, 0, ecx, 28, avx , AVX instructions support
+ 0x1, 0, ecx, 29, f16c , Half-precision floating-point conversion support
+ 0x1, 0, ecx, 30, rdrand , RDRAND instruction support
+ 0x1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system
+ 0x1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87)
+ 0x1, 0, edx, 1, vme , Virtual-8086 Mode Extensions
+ 0x1, 0, edx, 2, de , Debugging Extensions
+ 0x1, 0, edx, 3, pse , Page Size Extension
+ 0x1, 0, edx, 4, tsc , Time Stamp Counter
+ 0x1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support)
+ 0x1, 0, edx, 6, pae , Physical Address Extensions
+ 0x1, 0, edx, 7, mce , Machine Check Exception
+ 0x1, 0, edx, 8, cx8 , CMPXCHG8B instruction
+ 0x1, 0, edx, 9, apic , APIC on-chip
+ 0x1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs
+ 0x1, 0, edx, 12, mtrr , Memory Type Range Registers
+ 0x1, 0, edx, 13, pge , Page Global Extensions
+ 0x1, 0, edx, 14, mca , Machine Check Architecture
+ 0x1, 0, edx, 15, cmov , Conditional Move Instruction
+ 0x1, 0, edx, 16, pat , Page Attribute Table
+ 0x1, 0, edx, 17, pse36 , Page Size Extension (36-bit)
+ 0x1, 0, edx, 18, pn , Processor Serial Number
+ 0x1, 0, edx, 19, clflush , CLFLUSH instruction
+ 0x1, 0, edx, 21, dts , Debug Store
+ 0x1, 0, edx, 22, acpi , Thermal monitor and clock control
+ 0x1, 0, edx, 23, mmx , MMX instructions
+ 0x1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions
+ 0x1, 0, edx, 25, sse , SSE instructions
+ 0x1, 0, edx, 26, sse2 , SSE2 instructions
+ 0x1, 0, edx, 27, ss , Self Snoop
+ 0x1, 0, edx, 28, ht , Hyper-threading
+ 0x1, 0, edx, 29, tm , Thermal Monitor
+ 0x1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now reserved
+ 0x1, 0, edx, 31, pbe , Pending Break Enable
# Leaf 2H
# Intel cache and TLB information one-byte descriptors
- 2, 0, eax, 7:0, iteration_count , Number of times this CPUD leaf must be queried
- 2, 0, eax, 15:8, desc1 , Descriptor #1
- 2, 0, eax, 23:16, desc2 , Descriptor #2
- 2, 0, eax, 30:24, desc3 , Descriptor #3
- 2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set
- 2, 0, ebx, 7:0, desc4 , Descriptor #4
- 2, 0, ebx, 15:8, desc5 , Descriptor #5
- 2, 0, ebx, 23:16, desc6 , Descriptor #6
- 2, 0, ebx, 30:24, desc7 , Descriptor #7
- 2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set
- 2, 0, ecx, 7:0, desc8 , Descriptor #8
- 2, 0, ecx, 15:8, desc9 , Descriptor #9
- 2, 0, ecx, 23:16, desc10 , Descriptor #10
- 2, 0, ecx, 30:24, desc11 , Descriptor #11
- 2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set
- 2, 0, edx, 7:0, desc12 , Descriptor #12
- 2, 0, edx, 15:8, desc13 , Descriptor #13
- 2, 0, edx, 23:16, desc14 , Descriptor #14
- 2, 0, edx, 30:24, desc15 , Descriptor #15
- 2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set
+ 0x2, 0, eax, 7:0, iteration_count , Number of times this leaf must be queried
+ 0x2, 0, eax, 15:8, desc1 , Descriptor #1
+ 0x2, 0, eax, 23:16, desc2 , Descriptor #2
+ 0x2, 0, eax, 30:24, desc3 , Descriptor #3
+ 0x2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set
+ 0x2, 0, ebx, 7:0, desc4 , Descriptor #4
+ 0x2, 0, ebx, 15:8, desc5 , Descriptor #5
+ 0x2, 0, ebx, 23:16, desc6 , Descriptor #6
+ 0x2, 0, ebx, 30:24, desc7 , Descriptor #7
+ 0x2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set
+ 0x2, 0, ecx, 7:0, desc8 , Descriptor #8
+ 0x2, 0, ecx, 15:8, desc9 , Descriptor #9
+ 0x2, 0, ecx, 23:16, desc10 , Descriptor #10
+ 0x2, 0, ecx, 30:24, desc11 , Descriptor #11
+ 0x2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set
+ 0x2, 0, edx, 7:0, desc12 , Descriptor #12
+ 0x2, 0, edx, 15:8, desc13 , Descriptor #13
+ 0x2, 0, edx, 23:16, desc14 , Descriptor #14
+ 0x2, 0, edx, 30:24, desc15 , Descriptor #15
+ 0x2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set
# Leaf 4H
# Intel deterministic cache parameters
- 4, 31:0, eax, 4:0, cache_type , Cache type field
- 4, 31:0, eax, 7:5, cache_level , Cache level (1-based)
- 4, 31:0, eax, 8, cache_self_init , Self-initialializing cache level
- 4, 31:0, eax, 9, fully_associative , Fully-associative cache
- 4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache
- 4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package
- 4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based)
- 4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based)
- 4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based)
- 4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based)
- 4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches
- 4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches
- 4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function)
+ 0x4, 31:0, eax, 4:0, cache_type , Cache type field
+ 0x4, 31:0, eax, 7:5, cache_level , Cache level (1-based)
+ 0x4, 31:0, eax, 8, cache_self_init , Self-initializing cache level
+ 0x4, 31:0, eax, 9, fully_associative , Fully-associative cache
+ 0x4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache
+ 0x4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package
+ 0x4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based)
+ 0x4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based)
+ 0x4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based)
+ 0x4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based)
+ 0x4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches
+ 0x4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches
+ 0x4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function)
# Leaf 5H
# MONITOR/MWAIT instructions enumeration
- 5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes
- 5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes
- 5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported
- 5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported
- 5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT
- 5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT
- 5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT
- 5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT
- 5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT
- 5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT
- 5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT
- 5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT
+ 0x5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes
+ 0x5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes
+ 0x5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported
+ 0x5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported
+ 0x5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT
+ 0x5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT
+ 0x5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT
+ 0x5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT
+ 0x5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT
+ 0x5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT
+ 0x5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT
+ 0x5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT
# Leaf 6H
# Thermal and Power Management enumeration
- 6, 0, eax, 0, dtherm , Digital temprature sensor
- 6, 0, eax, 1, turbo_boost , Intel Turbo Boost
- 6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state)
- 6, 0, eax, 4, pln , Power Limit Notification (PLN) event
- 6, 0, eax, 5, ecmd , Clock modulation duty cycle extension
- 6, 0, eax, 6, pts , Package thermal management
- 6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported
- 6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR)
- 6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported
- 6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference
- 6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request
- 6, 0, eax, 13, hdc_base_regs , HDC base registers are supported
- 6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0
- 6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change
- 6, 0, eax, 16, hwp_peci_override , HWP PECI override
- 6, 0, eax, 17, hwp_flexible , Flexible HWP
- 6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode
- 6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported
- 6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported
- 6, 0, eax, 23, thread_director , Intel thread director support
- 6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported
- 6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds
- 6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface)
- 6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support
- 6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director
- 6, 0, edx, 0, perfcap_reporting , Performance capability reporting
- 6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting
- 6, 0, edx, 11:8, feedback_sz , HW feedback interface struct size, in 4K pages
- 6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU index @ HW feedback struct, 0-based
+ 0x6, 0, eax, 0, dtherm , Digital temperature sensor
+ 0x6, 0, eax, 1, turbo_boost , Intel Turbo Boost
+ 0x6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state)
+ 0x6, 0, eax, 4, pln , Power Limit Notification (PLN) event
+ 0x6, 0, eax, 5, ecmd , Clock modulation duty cycle extension
+ 0x6, 0, eax, 6, pts , Package thermal management
+ 0x6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported
+ 0x6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR)
+ 0x6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported
+ 0x6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference
+ 0x6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request
+ 0x6, 0, eax, 13, hdc_base_regs , HDC base registers are supported
+ 0x6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0
+ 0x6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change
+ 0x6, 0, eax, 16, hwp_peci_override , HWP PECI override
+ 0x6, 0, eax, 17, hwp_flexible , Flexible HWP
+ 0x6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode
+ 0x6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported
+ 0x6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported
+ 0x6, 0, eax, 23, thread_director , Intel thread director support
+ 0x6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported
+ 0x6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds
+ 0x6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface)
+ 0x6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support
+ 0x6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director
+ 0x6, 0, edx, 0, perfcap_reporting , Performance capability reporting
+ 0x6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting
+ 0x6, 0, edx, 11:8, feedback_sz , Feedback interface structure size, in 4K pages
+ 0x6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU hardware feedback interface index
# Leaf 7H
# Extended CPU features enumeration
- 7, 0, eax, 31:0, leaf7_n_subleaves , Number of cpuid 0x7 subleaves
- 7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support
- 7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported
- 7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions)
- 7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1
- 7, 0, ebx, 4, hle , Hardware Lock Elision
- 7, 0, ebx, 5, avx2 , AVX2 instruction set
- 7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions
- 7, 0, ebx, 7, smep , Supervisor Mode Execution Protection
- 7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2
- 7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB
- 7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID)
- 7, 0, ebx, 11, rtm , Intel restricted transactional memory
- 7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring
- 7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero)
- 7, 0, ebx, 14, mpx , Intel memory protection extensions
- 7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcemeent
- 7, 0, ebx, 16, avx512f , AVX-512 foundation instructions
- 7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions
- 7, 0, ebx, 18, rdseed , RDSEED instruction
- 7, 0, ebx, 19, adx , ADCX/ADOX instructions
- 7, 0, ebx, 20, smap , Supervisor mode access prevention
- 7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add
- 7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction
- 7, 0, ebx, 24, clwb , CLWB instruction
- 7, 0, ebx, 25, intel_pt , Intel processor trace
- 7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions
- 7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instrs
- 7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instrs
- 7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions
- 7, 0, ebx, 30, avx512bw , AVX-512 BW (byte/word granular) instructions
- 7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions
- 7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only)
- 7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instrs
- 7, 0, ecx, 2, umip , User mode instruction protection
- 7, 0, ecx, 3, pku , Protection keys for user-space
- 7, 0, ecx, 4, ospke , OS protection keys enable
- 7, 0, ecx, 5, waitpkg , WAITPKG instructions
- 7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instrs group 2
- 7, 0, ecx, 7, cet_ss , CET shadow stack features
- 7, 0, ecx, 8, gfni , Galois field new instructions
- 7, 0, ecx, 9, vaes , Vector AES instrs
- 7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support
- 7, 0, ecx, 11, avx512_vnni , Vector neural network instructions
- 7, 0, ecx, 12, avx512_bitalg , AVX-512 bit count/shiffle
- 7, 0, ecx, 13, tme , Intel total memory encryption
- 7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DW/QW
- 7, 0, ecx, 16, la57 , 57-bit linear addreses (five-level paging)
- 7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode
- 7, 0, ecx, 22, rdpid , RDPID instruction
- 7, 0, ecx, 23, key_locker , Intel key locker support
- 7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection
- 7, 0, ecx, 25, cldemote , CLDEMOTE instruction
- 7, 0, ecx, 27, movdiri , MOVDIRI instruction
- 7, 0, ecx, 28, movdir64b , MOVDIR64B instruction
- 7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S})
- 7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration
- 7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages
- 7, 0, edx, 1, sgx_keys , Intel SGX attestation services
- 7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions
- 7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision
- 7, 0, edx, 4, fsrm , Fast short REP MOV
- 7, 0, edx, 5, uintr , CPU supports user interrupts
- 7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions
- 7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available
- 7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support
- 7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts
- 7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported
- 7, 0, edx, 14, serialize , SERIALIZE instruction
- 7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part'
- 7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking
- 7, 0, edx, 18, pconfig , PCONFIG instruction
- 7, 0, edx, 19, arch_lbr , Intel architectural LBRs
- 7, 0, edx, 20, ibt , CET indirect branch tracking
- 7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support
- 7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions
- 7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support
- 7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support
- 7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions)
- 7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors
- 7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR
- 7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR
- 7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR
- 7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable
- 7, 1, eax, 4, avx_vnni , AVX-VNNI instructions
- 7, 1, eax, 5, avx512_bf16 , AVX-512 bFloat16 instructions
- 7, 1, eax, 6, lass , Linear address space separation
- 7, 1, eax, 7, cmpccxadd , CMPccXADD instructions
- 7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: CPUID leaf 0x23 is supported
- 7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB
- 7, 1, eax, 11, fsrs , Fast short REP STOSB
- 7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB
- 7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions
- 7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS
- 7, 1, eax, 19, wrmsrns , WRMSRNS instr (WRMSR-non-serializing)
- 7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations
- 7, 1, eax, 22, hreset , History reset support
- 7, 1, eax, 23, avx_ifma , Integer fused multiply add
- 7, 1, eax, 26, lam , Linear address masking
- 7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions
- 7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs)
- 7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions
- 7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions
- 7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids)
- 7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions
- 7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use
- 7, 2, edx, 0, intel_psfd , Intel predictive store forward disable
- 7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S}
- 7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S}
- 7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U
- 7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S
- 7, 2, edx, 5, mcdt_no , MCDT mitigation not needed
- 7, 2, edx, 6, uclock_disable , UC-lock disable is supported
+ 0x7, 0, eax, 31:0, leaf7_n_subleaves , Number of leaf 0x7 subleaves
+ 0x7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support
+ 0x7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported
+ 0x7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions)
+ 0x7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1
+ 0x7, 0, ebx, 4, hle , Hardware Lock Elision
+ 0x7, 0, ebx, 5, avx2 , AVX2 instruction set
+ 0x7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions
+ 0x7, 0, ebx, 7, smep , Supervisor Mode Execution Protection
+ 0x7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2
+ 0x7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB
+ 0x7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID)
+ 0x7, 0, ebx, 11, rtm , Intel restricted transactional memory
+ 0x7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring
+ 0x7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero)
+ 0x7, 0, ebx, 14, mpx , Intel memory protection extensions
+ 0x7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcement
+ 0x7, 0, ebx, 16, avx512f , AVX-512 foundation instructions
+ 0x7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions
+ 0x7, 0, ebx, 18, rdseed , RDSEED instruction
+ 0x7, 0, ebx, 19, adx , ADCX/ADOX instructions
+ 0x7, 0, ebx, 20, smap , Supervisor mode access prevention
+ 0x7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add
+ 0x7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction
+ 0x7, 0, ebx, 24, clwb , CLWB instruction
+ 0x7, 0, ebx, 25, intel_pt , Intel processor trace
+ 0x7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions
+ 0x7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instructions
+ 0x7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instructions
+ 0x7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions
+ 0x7, 0, ebx, 30, avx512bw , AVX-512 byte/word instructions
+ 0x7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions
+ 0x7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only)
+ 0x7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instructions
+ 0x7, 0, ecx, 2, umip , User mode instruction protection
+ 0x7, 0, ecx, 3, pku , Protection keys for user-space
+ 0x7, 0, ecx, 4, ospke , OS protection keys enable
+ 0x7, 0, ecx, 5, waitpkg , WAITPKG instructions
+ 0x7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instructions group 2
+ 0x7, 0, ecx, 7, cet_ss , CET shadow stack features
+ 0x7, 0, ecx, 8, gfni , Galois field new instructions
+ 0x7, 0, ecx, 9, vaes , Vector AES instructions
+ 0x7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support
+ 0x7, 0, ecx, 11, avx512_vnni , Vector neural network instructions
+ 0x7, 0, ecx, 12, avx512_bitalg , AVX-512 bitwise algorithms
+ 0x7, 0, ecx, 13, tme , Intel total memory encryption
+ 0x7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DWORD/QWORD
+ 0x7, 0, ecx, 16, la57 , 57-bit linear addresses (five-level paging)
+ 0x7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode
+ 0x7, 0, ecx, 22, rdpid , RDPID instruction
+ 0x7, 0, ecx, 23, key_locker , Intel key locker support
+ 0x7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection
+ 0x7, 0, ecx, 25, cldemote , CLDEMOTE instruction
+ 0x7, 0, ecx, 27, movdiri , MOVDIRI instruction
+ 0x7, 0, ecx, 28, movdir64b , MOVDIR64B instruction
+ 0x7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S})
+ 0x7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration
+ 0x7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages
+ 0x7, 0, edx, 1, sgx_keys , Intel SGX attestation services
+ 0x7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions
+ 0x7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision
+ 0x7, 0, edx, 4, fsrm , Fast short REP MOV
+ 0x7, 0, edx, 5, uintr , CPU supports user interrupts
+ 0x7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions
+ 0x7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available
+ 0x7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support
+ 0x7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts
+ 0x7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported
+ 0x7, 0, edx, 14, serialize , SERIALIZE instruction
+ 0x7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part'
+ 0x7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking
+ 0x7, 0, edx, 18, pconfig , PCONFIG instruction
+ 0x7, 0, edx, 19, arch_lbr , Intel architectural LBRs
+ 0x7, 0, edx, 20, ibt , CET indirect branch tracking
+ 0x7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support
+ 0x7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions
+ 0x7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support
+ 0x7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support
+ 0x7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions)
+ 0x7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors
+ 0x7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR
+ 0x7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR
+ 0x7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR
+ 0x7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable
+ 0x7, 1, eax, 4, avx_vnni , AVX-VNNI instructions
+ 0x7, 1, eax, 5, avx512_bf16 , AVX-512 bfloat16 instructions
+ 0x7, 1, eax, 6, lass , Linear address space separation
+ 0x7, 1, eax, 7, cmpccxadd , CMPccXADD instructions
+ 0x7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: leaf 0x23 is supported
+ 0x7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB
+ 0x7, 1, eax, 11, fsrs , Fast short REP STOSB
+ 0x7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB
+ 0x7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions
+ 0x7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS
+ 0x7, 1, eax, 19, wrmsrns , WRMSRNS instruction (WRMSR-non-serializing)
+ 0x7, 1, eax, 20, nmi_src , NMI-source reporting with FRED event data
+ 0x7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations
+ 0x7, 1, eax, 22, hreset , History reset support
+ 0x7, 1, eax, 23, avx_ifma , Integer fused multiply add
+ 0x7, 1, eax, 26, lam , Linear address masking
+ 0x7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions
+ 0x7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs)
+ 0x7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions
+ 0x7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions
+ 0x7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids)
+ 0x7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions
+ 0x7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use
+ 0x7, 2, edx, 0, intel_psfd , Intel predictive store forward disable
+ 0x7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S}
+ 0x7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S}
+ 0x7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U
+ 0x7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S
+ 0x7, 2, edx, 5, mcdt_no , MCDT mitigation not needed
+ 0x7, 2, edx, 6, uclock_disable , UC-lock disable is supported
# Leaf 9H
# Intel DCA (Direct Cache Access) enumeration
- 9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS
+ 0x9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS
# Leaf AH
# Intel PMU (Performance Monitoring Unit) enumeration
@@ -310,7 +311,7 @@
0xa, 0, eax, 7:0, pmu_version , Performance monitoring unit version ID
0xa, 0, eax, 15:8, pmu_n_gcounters , Number of general PMU counters per logical CPU
0xa, 0, eax, 23:16, pmu_gcounters_nbits , Bitwidth of PMU general counters
- 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of cpuid leaf 0xa EBX bit vector
+ 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of leaf 0xa EBX bit vector
0xa, 0, ebx, 0, no_core_cycle_evt , Core cycle event not available
0xa, 0, ebx, 1, no_insn_retired_evt , Instruction retired event not available
0xa, 0, ebx, 2, no_refcycle_evt , Reference cycles event not available
@@ -339,18 +340,18 @@
0xd, 0, eax, 0, xcr0_x87 , XCR0.X87 (bit 0) supported
0xd, 0, eax, 1, xcr0_sse , XCR0.SEE (bit 1) supported
0xd, 0, eax, 2, xcr0_avx , XCR0.AVX (bit 2) supported
- 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 regs)
- 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs)
- 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 regs)
- 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs)
- 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs)
- 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU reg)
- 0xd, 0, eax, 11, xcr0_cet_u , AMD XCR0.CET_U (bit 11) supported (CET supervisor state)
- 0xd, 0, eax, 12, xcr0_cet_s , AMD XCR0.CET_S (bit 12) support (CET user state)
+ 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 registers)
+ 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers)
+ 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 registers)
+ 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers)
+ 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers)
+ 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU registers)
+ 0xd, 0, eax, 11, xcr0_cet_u , XCR0.CET_U (bit 11) supported (CET user state)
+ 0xd, 0, eax, 12, xcr0_cet_s , XCR0.CET_S (bit 12) supported (CET supervisor state)
0xd, 0, eax, 17, xcr0_tileconfig , XCR0.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG)
0xd, 0, eax, 18, xcr0_tiledata , XCR0.TILEDATA (bit 18) supported (AMX can manage TILEDATA)
- 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTR area byte size, for XCR0 enabled features
- 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTR area max byte size, all CPU features
+ 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTOR area byte size, for XCR0 enabled features
+ 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTOR area max byte size, all CPU features
0xd, 0, edx, 30, xcr0_lwp , AMD XCR0.LWP (bit 62) supported (Light-weight Profiling)
0xd, 1, eax, 0, xsaveopt , XSAVEOPT instruction
0xd, 1, eax, 1, xsavec , XSAVEC instruction
@@ -369,7 +370,7 @@
0xd, 63:2, eax, 31:0, xsave_sz , Size of save area for subleaf-N feature, in bytes
0xd, 63:2, ebx, 31:0, xsave_offset , Offset of save area for subleaf-N feature, in bytes
0xd, 63:2, ecx, 0, is_xss_bit , Subleaf N describes an XSS bit, otherwise XCR0 bit
- 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature xsave area is 64-byte aligned
+ 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature XSAVE area is 64-byte aligned
# Leaf FH
# Intel RDT / AMD PQoS resource monitoring
@@ -426,17 +427,17 @@
0x12, 1, ecx, 0, xfrm_x87 , Enclave XFRM.X87 (bit 0) supported
0x12, 1, ecx, 1, xfrm_sse , Enclave XFRM.SEE (bit 1) supported
0x12, 1, ecx, 2, xfrm_avx , Enclave XFRM.AVX (bit 2) supported
- 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 regs)
- 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs)
- 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 regs)
- 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs)
- 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs)
- 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU reg)
+ 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 registers)
+ 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers)
+ 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 registers)
+ 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers)
+ 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers)
+ 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU registers)
0x12, 1, ecx, 17, xfrm_tileconfig , Enclave XFRM.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG)
0x12, 1, ecx, 18, xfrm_tiledata , Enclave XFRM.TILEDATA (bit 18) supported (AMX can manage TILEDATA)
0x12, 31:2, eax, 3:0, subleaf_type , Subleaf type (dictates output layout)
- 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base addr, bits[12:31]
- 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base addr, bits[32:51]
+ 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base address, bits[12:31]
+ 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base address, bits[32:51]
0x12, 31:2, ecx, 3:0, epc_sec_type , EPC section type / property encoding
0x12, 31:2, ecx, 31:12, epc_sec_size_0 , EPC section size, bits[12:31]
0x12, 31:2, edx, 19:0, epc_sec_size_1 , EPC section size, bits[32:51]
@@ -444,7 +445,7 @@
# Leaf 14H
# Intel Processor Trace enumeration
- 0x14, 0, eax, 31:0, pt_max_subleaf , Max cpuid 0x14 subleaf
+ 0x14, 0, eax, 31:0, pt_max_subleaf , Maximum leaf 0x14 subleaf
0x14, 0, ebx, 0, cr3_filtering , IA32_RTIT_CR3_MATCH is accessible
0x14, 0, ebx, 1, psb_cyc , Configurable PSB and cycle-accurate mode
0x14, 0, ebx, 2, ip_filtering , IP/TraceStop filtering; Warm-reset PT MSRs preservation
@@ -472,7 +473,7 @@
0x15, 0, ecx, 31:0, cpu_crystal_hz , Core crystal clock nominal frequency, in Hz
# Leaf 16H
-# Intel processor fequency enumeration
+# Intel processor frequency enumeration
0x16, 0, eax, 15:0, cpu_base_mhz , Processor base frequency, in MHz
0x16, 0, ebx, 15:0, cpu_max_mhz , Processor max frequency, in MHz
@@ -481,9 +482,9 @@
# Leaf 17H
# Intel SoC vendor attributes enumeration
- 0x17, 0, eax, 31:0, soc_max_subleaf , Max cpuid leaf 0x17 subleaf
+ 0x17, 0, eax, 31:0, soc_max_subleaf , Maximum leaf 0x17 subleaf
0x17, 0, ebx, 15:0, soc_vendor_id , SoC vendor ID
- 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumaeratoion scheme (not Intel)
+ 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumeration scheme (not Intel)
0x17, 0, ecx, 31:0, soc_proj_id , SoC project ID, assigned by vendor
0x17, 0, edx, 31:0, soc_stepping_id , Soc project stepping ID, assigned by vendor
0x17, 3:1, eax, 31:0, vendor_brand_a , Vendor Brand ID string, bytes subleaf_nr * (0 -> 3)
@@ -494,18 +495,18 @@
# Leaf 18H
# Intel determenestic address translation (TLB) parameters
- 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Max cpuid 0x18 subleaf
+ 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Maximum leaf 0x18 subleaf
0x18, 31:0, ebx, 0, tlb_4k_page , TLB 4KB-page entries supported
0x18, 31:0, ebx, 1, tlb_2m_page , TLB 2MB-page entries supported
0x18, 31:0, ebx, 2, tlb_4m_page , TLB 4MB-page entries supported
0x18, 31:0, ebx, 3, tlb_1g_page , TLB 1GB-page entries supported
- 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this struct
+ 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this structure
0x18, 31:0, ebx, 31:16, n_way_associative , Ways of associativity
0x18, 31:0, ecx, 31:0, n_sets , Number of sets
0x18, 31:0, edx, 4:0, tlb_type , Translation cache type (TLB type)
0x18, 31:0, edx, 7:5, tlb_cache_level , Translation cache level (1-based)
0x18, 31:0, edx, 8, is_fully_associative , Fully-associative structure
- 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max num of addressible IDs for logical CPUs sharing this TLB - 1
+ 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max number of addressable IDs for logical CPUs sharing this TLB - 1
# Leaf 19H
# Intel Key Locker enumeration
@@ -568,7 +569,7 @@
# Intel AMX, TMUL (Tile-matrix MULtiply) accelerator unit enumeration
0x1e, 0, ebx, 7:0, tmul_maxk , TMUL unit maximum height, K (rows or columns)
- 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maxiumum SIMD dimension, N (column bytes)
+ 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maximum SIMD dimension, N (column bytes)
# Leaf 1FH
# Intel extended topology enumeration v2
@@ -623,9 +624,9 @@
0x40000000, 0, edx, 31:0, hypervisor_id_2 , Hypervisor ID string bytes 8 - 11
# Leaf 80000000H
-# Maximum extended leaf number + CPU vendor string (AMD)
+# Maximum extended leaf number + AMD/Transmeta CPU vendor string
-0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended cpuid leaf supported
+0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended CPUID leaf supported
0x80000000, 0, ebx, 31:0, cpu_vendorid_0 , Vendor ID string bytes 0 - 3
0x80000000, 0, ecx, 31:0, cpu_vendorid_2 , Vendor ID string bytes 8 - 11
0x80000000, 0, edx, 31:0, cpu_vendorid_1 , Vendor ID string bytes 4 - 7
@@ -636,6 +637,7 @@
0x80000001, 0, eax, 3:0, e_stepping_id , Stepping ID
0x80000001, 0, eax, 7:4, e_base_model , Base processor model
0x80000001, 0, eax, 11:8, e_base_family , Base processor family
+0x80000001, 0, eax, 13:12, e_base_type , Base processor type (Transmeta)
0x80000001, 0, eax, 19:16, e_ext_model , Extended processor model
0x80000001, 0, eax, 27:20, e_ext_family , Extended processor family
0x80000001, 0, ebx, 15:0, brand_id , Brand ID
@@ -659,7 +661,7 @@
0x80000001, 0, ecx, 17, tce , Translation cache extension
0x80000001, 0, ecx, 19, nodeid_msr , NodeId MSR (0xc001100c)
0x80000001, 0, ecx, 21, tbm , Trailing bit manipulations
-0x80000001, 0, ecx, 22, topoext , Topology Extensions (cpuid leaf 0x8000001d)
+0x80000001, 0, ecx, 22, topoext , Topology Extensions (leaf 0x8000001d)
0x80000001, 0, ecx, 23, perfctr_core , Core performance counter extensions
0x80000001, 0, ecx, 24, perfctr_nb , NB/DF performance counter extensions
0x80000001, 0, ecx, 26, bpext , Data access breakpoint extension
@@ -687,6 +689,7 @@
0x80000001, 0, edx, 19, mp , Out-of-spec AMD Multiprocessing bit
0x80000001, 0, edx, 20, nx , No-execute page protection
0x80000001, 0, edx, 22, mmxext , AMD MMX extensions
+0x80000001, 0, edx, 23, e_mmx , MMX instructions
0x80000001, 0, edx, 24, e_fxsr , FXSAVE and FXRSTOR instructions
0x80000001, 0, edx, 25, fxsr_opt , FXSAVE and FXRSTOR optimizations
0x80000001, 0, edx, 26, pdpe1gb , 1-GB large page support
@@ -720,11 +723,11 @@
0x80000004, 0, edx, 31:0, cpu_brandid_11 , CPU brand ID string, bytes 44 - 47
# Leaf 80000005H
-# AMD L1 cache and L1 TLB enumeration
+# AMD/Transmeta L1 cache and L1 TLB enumeration
-0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entires, 2M and 4M pages
+0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entries, 2M and 4M pages
0x80000005, 0, eax, 15:8, l1_itlb_2m_4m_assoc , L1 ITLB associativity, 2M and 4M pages
-0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entires, 2M and 4M pages
+0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entries, 2M and 4M pages
0x80000005, 0, eax, 31:24, l1_dtlb_2m_4m_assoc , L1 DTLB associativity, 2M and 4M pages
0x80000005, 0, ebx, 7:0, l1_itlb_4k_nentries , L1 ITLB #entries, 4K pages
0x80000005, 0, ebx, 15:8, l1_itlb_4k_assoc , L1 ITLB associativity, 4K pages
@@ -763,11 +766,11 @@
# CPU power management (mostly AMD) and AMD RAS enumeration
0x80000007, 0, ebx, 0, overflow_recov , MCA overflow conditions not fatal
-0x80000007, 0, ebx, 1, succor , Software containment of UnCORRectable errors
+0x80000007, 0, ebx, 1, succor , Software containment of uncorrectable errors
0x80000007, 0, ebx, 2, hw_assert , Hardware assert MSRs
0x80000007, 0, ebx, 3, smca , Scalable MCA (MCAX MSRs)
0x80000007, 0, ecx, 31:0, cpu_pwr_sample_ratio , CPU power sample time ratio
-0x80000007, 0, edx, 0, digital_temp , Digital temprature sensor
+0x80000007, 0, edx, 0, digital_temp , Digital temperature sensor
0x80000007, 0, edx, 1, powernow_freq_id , PowerNOW! frequency scaling
0x80000007, 0, edx, 2, powernow_volt_id , PowerNOW! voltage scaling
0x80000007, 0, edx, 3, thermal_trip , THERMTRIP (Thermal Trip)
@@ -810,7 +813,7 @@
0x80000008, 0, ebx, 23, amd_ppin , Protected Processor Inventory Number
0x80000008, 0, ebx, 24, amd_ssbd , Speculative Store Bypass Disable
0x80000008, 0, ebx, 25, virt_ssbd , virtualized SSBD (Speculative Store Bypass Disable)
-0x80000008, 0, ebx, 26, amd_ssb_no , SSBD not needed (fixed in HW)
+0x80000008, 0, ebx, 26, amd_ssb_no , SSBD is not needed (fixed in hardware)
0x80000008, 0, ebx, 27, cppc , Collaborative Processor Performance Control
0x80000008, 0, ebx, 28, amd_psfd , Predictive Store Forward Disable
0x80000008, 0, ebx, 29, btc_no , CPU not affected by Branch Type Confusion
@@ -838,7 +841,7 @@
0x8000000a, 0, edx, 10, pausefilter , Pause intercept filter
0x8000000a, 0, edx, 12, pfthreshold , Pause filter threshold
0x8000000a, 0, edx, 13, avic , Advanced virtual interrupt controller
-0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virt)
+0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virtualization)
0x8000000a, 0, edx, 16, vgif , Virtualize the Global Interrupt Flag
0x8000000a, 0, edx, 17, gmet , Guest mode execution trap
0x8000000a, 0, edx, 18, x2avic , Virtual x2APIC
@@ -850,7 +853,7 @@
0x8000000a, 0, edx, 25, vnmi , NMI virtualization
0x8000000a, 0, edx, 26, ibs_virt , IBS Virtualization
0x8000000a, 0, edx, 27, ext_lvt_off_chg , Extended LVT offset fault change
-0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME addr check
+0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME address check
# Leaf 80000019H
# AMD TLB 1G-pages enumeration
@@ -891,20 +894,20 @@
# AMD LWP (Lightweight Profiling)
0x8000001c, 0, eax, 0, os_lwp_avail , LWP is available to application programs (supported by OS)
-0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction (EventId=1) is supported by OS
-0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event (EventId=2) is supported by OS
-0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event (EventId=3) is supported by OS
-0x8000001c, 0, eax, 4, os_lwp_dme , DCache Miss Event (EventId=4) is supported by OS
-0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is supported by OS
-0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is supported by OS
+0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction is supported by OS
+0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event is supported by OS
+0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event is supported by OS
+0x8000001c, 0, eax, 4, os_lwp_dme , Dcache Miss Event is supported by OS
+0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event is supported by OS
+0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event is supported by OS
0x8000001c, 0, eax, 29, os_lwp_cont , LWP sampling in continuous mode is supported by OS
0x8000001c, 0, eax, 30, os_lwp_ptsc , Performance Time Stamp Counter in event records is supported by OS
0x8000001c, 0, eax, 31, os_lwp_int , Interrupt on threshold overflow is supported by OS
0x8000001c, 0, ebx, 7:0, lwp_lwpcb_sz , LWP Control Block size, in quadwords
0x8000001c, 0, ebx, 15:8, lwp_event_sz , LWP event record size, in bytes
-0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventId value (EventID 255 not included)
+0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventID value (EventID 255 not included)
0x8000001c, 0, ebx, 31:24, lwp_event_offset , LWP events area offset in the LWP Control Block
-0x8000001c, 0, ecx, 4:0, lwp_latency_max , Num of bits in cache latency counters (10 to 31)
+0x8000001c, 0, ecx, 4:0, lwp_latency_max , Number of bits in cache latency counters (10 to 31)
0x8000001c, 0, ecx, 5, lwp_data_adddr , Cache miss events report the data address of the reference
0x8000001c, 0, ecx, 8:6, lwp_latency_rnd , Amount by which cache latency is rounded
0x8000001c, 0, ecx, 15:9, lwp_version , LWP implementation version
@@ -913,16 +916,16 @@
0x8000001c, 0, ecx, 29, lwp_ip_filtering , IP filtering (IPI, IPF, BaseIP, and LimitIP @ LWPCP) supported
0x8000001c, 0, ecx, 30, lwp_cache_levels , Cache-related events can be filtered by cache level
0x8000001c, 0, ecx, 31, lwp_cache_latency , Cache-related events can be filtered by latency
-0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in Hardware
-0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction (EventId=1) is available in HW
-0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event (EventId=2) is available in HW
-0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event (EventId=3) is available in HW
-0x8000001c, 0, edx, 4, hw_lwp_dme , DCache Miss Event (EventId=4) is available in HW
-0x8000001c, 0, edx, 5, hw_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is available in HW
-0x8000001c, 0, edx, 6, hw_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is available in HW
-0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in HW
-0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in HW
-0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in HW
+0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in hardware
+0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction is available in hardware
+0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event is available in hardware
+0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event is available in hardware
+0x8000001c, 0, edx, 4, hw_lwp_dme , Dcache Miss Event is available in hardware
+0x8000001c, 0, edx, 5, hw_lwp_cnh , Clocks Not Halted event is available in hardware
+0x8000001c, 0, edx, 6, hw_lwp_rnh , Reference clocks Not Halted event is available in hardware
+0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in hardware
+0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in hardware
+0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in hardware
# Leaf 8000001DH
# AMD deterministic cache parameters
@@ -958,10 +961,10 @@
0x8000001f, 0, eax, 4, sev_nested_paging , SEV secure nested paging supported
0x8000001f, 0, eax, 5, vm_permission_levels , VMPL supported
0x8000001f, 0, eax, 6, rpmquery , RPMQUERY instruction supported
-0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadwo stack supported
+0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadow stack supported
0x8000001f, 0, eax, 8, secure_tsc , Secure TSC supported
0x8000001f, 0, eax, 9, v_tsc_aux , Hardware virtualizes TSC_AUX
-0x8000001f, 0, eax, 10, sme_coherent , HW enforces cache coherency across encryption domains
+0x8000001f, 0, eax, 10, sme_coherent , Cache coherency is enforced across encryption domains
0x8000001f, 0, eax, 11, req_64bit_hypervisor , SEV guest mandates 64-bit hypervisor
0x8000001f, 0, eax, 12, restricted_injection , Restricted Injection supported
0x8000001f, 0, eax, 13, alternate_injection , Alternate Injection supported
@@ -973,13 +976,13 @@
0x8000001f, 0, eax, 19, virt_ibs , IBS state virtualization is supported for SEV-ES guests
0x8000001f, 0, eax, 24, vmsa_reg_protection , VMSA register protection is supported
0x8000001f, 0, eax, 25, smt_protection , SMT protection is supported
-0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000h) is supported
+0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000) is supported
0x8000001f, 0, eax, 29, nested_virt_snp_msr , VIRT_RMPUPDATE/VIRT_PSMASH MSRs are supported
0x8000001f, 0, ebx, 5:0, pte_cbit_pos , PTE bit number used to enable memory encryption
0x8000001f, 0, ebx, 11:6, phys_addr_reduction_nbits, Reduction of phys address space when encryption is enabled, in bits
0x8000001f, 0, ebx, 15:12, vmpl_count , Number of VM permission levels (VMPL) supported
0x8000001f, 0, ecx, 31:0, enc_guests_max , Max supported number of simultaneous encrypted guests
-0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Mininum ASID for SEV-enabled SEV-ES-disabled guest
+0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Minimum ASID for SEV-enabled SEV-ES-disabled guest
# Leaf 80000020H
# AMD Platform QoS extended feature IDs
@@ -988,6 +991,8 @@
0x80000020, 0, ebx, 2, smba , Slow Memory Bandwidth Allocation support
0x80000020, 0, ebx, 3, bmec , Bandwidth Monitoring Event Configuration support
0x80000020, 0, ebx, 4, l3rr , L3 Range Reservation support
+0x80000020, 0, ebx, 5, abmc , Assignable Bandwidth Monitoring Counters
+0x80000020, 0, ebx, 6, sdciae , Smart Data Cache Injection (SDCI) Allocation Enforcement
0x80000020, 1, eax, 31:0, mba_limit_len , MBA enforcement limit size
0x80000020, 1, edx, 31:0, mba_cos_max , MBA max Class of Service number (zero-based)
0x80000020, 2, eax, 31:0, smba_limit_len , SMBA enforcement limit size
@@ -1007,17 +1012,26 @@
0x80000021, 0, eax, 0, no_nested_data_bp , No nested data breakpoints
0x80000021, 0, eax, 1, fsgs_non_serializing , WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing
0x80000021, 0, eax, 2, lfence_rdtsc , LFENCE always serializing / synchronizes RDTSC
-0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock is supported
+0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock
0x80000021, 0, eax, 6, null_sel_clr_base , Null selector clears base
-0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore Enable bit supported
-0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS enable bit supported
-0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not present
-0x80000021, 0, eax, 10, fsrs_supported , Fast Short Rep Stosb (FSRS) is supported
-0x80000021, 0, eax, 11, fsrc_supported , Fast Short Repe Cmpsb (FSRC) is supported
-0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is supported
+0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore
+0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS
+0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not available
+0x80000021, 0, eax, 10, fsrs , Fast Short Rep STOSB
+0x80000021, 0, eax, 11, fsrc , Fast Short Rep CMPSB
+0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is available
+0x80000021, 0, eax, 16, opcode_reclaim , Reserves opcode space
0x80000021, 0, eax, 17, user_cpuid_disable , #GP when executing CPUID at CPL > 0 is supported
-0x80000021, 0, eax, 18, epsf_supported , Enhanced Predictive Store Forwarding (EPSF) is supported
-0x80000021, 0, ebx, 11:0, microcode_patch_size , Size of microcode patch, in 16-byte units
+0x80000021, 0, eax, 18, epsf , Enhanced Predictive Store Forwarding
+0x80000021, 0, eax, 22, wl_feedback , Workload-based heuristic feedback to OS
+0x80000021, 0, eax, 24, eraps , Enhanced Return Address Predictor Security
+0x80000021, 0, eax, 27, sbpb , Selective Branch Predictor Barrier
+0x80000021, 0, eax, 28, ibpb_brtype , Branch predictions flushed from CPU branch predictor
+0x80000021, 0, eax, 29, srso_no , CPU is not subject to the SRSO vulnerability
+0x80000021, 0, eax, 30, srso_uk_no , CPU is not vulnerable to SRSO at user-kernel boundary
+0x80000021, 0, eax, 31, srso_msr_fix , Software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO
+0x80000021, 0, ebx, 15:0, microcode_patch_size , Size of microcode patch, in 16-byte units
+0x80000021, 0, ebx, 23:16, rap_size , Return Address Predictor size
# Leaf 80000022H
# AMD Performance Monitoring v2 enumeration
@@ -1025,7 +1039,7 @@
0x80000022, 0, eax, 0, perfmon_v2 , Performance monitoring v2 supported
0x80000022, 0, eax, 1, lbr_v2 , Last Branch Record v2 extensions (LBR Stack)
0x80000022, 0, eax, 2, lbr_pmc_freeze , Freezing core performance counters / LBR Stack supported
-0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core perfomance counters
+0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core performance counters
0x80000022, 0, ebx, 9:4, lbr_v2_stack_size , Number of available LBR stack entries
0x80000022, 0, ebx, 15:10, n_pmc_northbridge , Number of available northbridge (data fabric) performance counters
0x80000022, 0, ebx, 21:16, n_pmc_umc , Number of available UMC performance counters
@@ -1035,7 +1049,7 @@
# AMD Secure Multi-key Encryption enumeration
0x80000023, 0, eax, 0, mem_hmk_mode , MEM-HMK encryption mode is supported
-0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total num of available encryption keys
+0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total number of available encryption keys
# Leaf 80000026H
# AMD extended topology enumeration v2
@@ -1051,3 +1065,108 @@
0x80000026, 3:0, ecx, 7:0, domain_level , This domain level (subleaf ID)
0x80000026, 3:0, ecx, 15:8, domain_type , This domain type
0x80000026, 3:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU
+
+# Leaf 80860000H
+# Maximum Transmeta leaf number + CPU vendor ID string
+
+0x80860000, 0, eax, 31:0, max_tra_leaf , Maximum supported Transmeta leaf number
+0x80860000, 0, ebx, 31:0, cpu_vendorid_0 , Transmeta Vendor ID string bytes 0 - 3
+0x80860000, 0, ecx, 31:0, cpu_vendorid_2 , Transmeta Vendor ID string bytes 8 - 11
+0x80860000, 0, edx, 31:0, cpu_vendorid_1 , Transmeta Vendor ID string bytes 4 - 7
+
+# Leaf 80860001H
+# Transmeta extended CPU information
+
+0x80860001, 0, eax, 3:0, stepping , Stepping ID
+0x80860001, 0, eax, 7:4, base_model , Base CPU model ID
+0x80860001, 0, eax, 11:8, base_family_id , Base CPU family ID
+0x80860001, 0, eax, 13:12, cpu_type , CPU type
+0x80860001, 0, ebx, 7:0, cpu_rev_mask_minor , CPU revision ID, mask minor
+0x80860001, 0, ebx, 15:8, cpu_rev_mask_major , CPU revision ID, mask major
+0x80860001, 0, ebx, 23:16, cpu_rev_minor , CPU revision ID, minor
+0x80860001, 0, ebx, 31:24, cpu_rev_major , CPU revision ID, major
+0x80860001, 0, ecx, 31:0, cpu_base_mhz , CPU nominal frequency, in MHz
+0x80860001, 0, edx, 0, recovery , Recovery CMS is active (after bad flush)
+0x80860001, 0, edx, 1, longrun , LongRun power management capabilities
+0x80860001, 0, edx, 3, lrti , LongRun Table Interface
+
+# Leaf 80860002H
+# Transmeta Code Morphing Software (CMS) enumeration
+
+0x80860002, 0, eax, 31:0, cpu_rev_id , CPU revision ID
+0x80860002, 0, ebx, 7:0, cms_rev_mask_2 , CMS revision ID, mask component 2
+0x80860002, 0, ebx, 15:8, cms_rev_mask_1 , CMS revision ID, mask component 1
+0x80860002, 0, ebx, 23:16, cms_rev_minor , CMS revision ID, minor
+0x80860002, 0, ebx, 31:24, cms_rev_major , CMS revision ID, major
+0x80860002, 0, ecx, 31:0, cms_rev_mask_3 , CMS revision ID, mask component 3
+
+# Leaf 80860003H
+# Transmeta CPU information string, bytes 0 - 15
+
+0x80860003, 0, eax, 31:0, cpu_info_0 , CPU info string bytes 0 - 3
+0x80860003, 0, ebx, 31:0, cpu_info_1 , CPU info string bytes 4 - 7
+0x80860003, 0, ecx, 31:0, cpu_info_2 , CPU info string bytes 8 - 11
+0x80860003, 0, edx, 31:0, cpu_info_3 , CPU info string bytes 12 - 15
+
+# Leaf 80860004H
+# Transmeta CPU information string, bytes 16 - 31
+
+0x80860004, 0, eax, 31:0, cpu_info_4 , CPU info string bytes 16 - 19
+0x80860004, 0, ebx, 31:0, cpu_info_5 , CPU info string bytes 20 - 23
+0x80860004, 0, ecx, 31:0, cpu_info_6 , CPU info string bytes 24 - 27
+0x80860004, 0, edx, 31:0, cpu_info_7 , CPU info string bytes 28 - 31
+
+# Leaf 80860005H
+# Transmeta CPU information string, bytes 32 - 47
+
+0x80860005, 0, eax, 31:0, cpu_info_8 , CPU info string bytes 32 - 35
+0x80860005, 0, ebx, 31:0, cpu_info_9 , CPU info string bytes 36 - 39
+0x80860005, 0, ecx, 31:0, cpu_info_10 , CPU info string bytes 40 - 43
+0x80860005, 0, edx, 31:0, cpu_info_11 , CPU info string bytes 44 - 47
+
+# Leaf 80860006H
+# Transmeta CPU information string, bytes 48 - 63
+
+0x80860006, 0, eax, 31:0, cpu_info_12 , CPU info string bytes 48 - 51
+0x80860006, 0, ebx, 31:0, cpu_info_13 , CPU info string bytes 52 - 55
+0x80860006, 0, ecx, 31:0, cpu_info_14 , CPU info string bytes 56 - 59
+0x80860006, 0, edx, 31:0, cpu_info_15 , CPU info string bytes 60 - 63
+
+# Leaf 80860007H
+# Transmeta live CPU information
+
+0x80860007, 0, eax, 31:0, cpu_cur_mhz , Current CPU frequency, in MHz
+0x80860007, 0, ebx, 31:0, cpu_cur_voltage , Current CPU voltage, in millivolts
+0x80860007, 0, ecx, 31:0, cpu_cur_perf_pctg , Current CPU performance percentage, 0 - 100
+0x80860007, 0, edx, 31:0, cpu_cur_gate_delay , Current CPU gate delay, in femtoseconds
+
+# Leaf C0000000H
+# Maximum Centaur/Zhaoxin leaf number
+
+0xc0000000, 0, eax, 31:0, max_cntr_leaf , Maximum Centaur/Zhaoxin leaf number
+
+# Leaf C0000001H
+# Centaur/Zhaoxin extended CPU features
+
+0xc0000001, 0, edx, 0, ccs_sm2 , CCS SM2 instructions
+0xc0000001, 0, edx, 1, ccs_sm2_en , CCS SM2 enabled
+0xc0000001, 0, edx, 2, xstore , Random Number Generator
+0xc0000001, 0, edx, 3, xstore_en , RNG enabled
+0xc0000001, 0, edx, 4, ccs_sm3_sm4 , CCS SM3 and SM4 instructions
+0xc0000001, 0, edx, 5, ccs_sm3_sm4_en , CCS SM3/SM4 enabled
+0xc0000001, 0, edx, 6, ace , Advanced Cryptography Engine
+0xc0000001, 0, edx, 7, ace_en , ACE enabled
+0xc0000001, 0, edx, 8, ace2 , Advanced Cryptography Engine v2
+0xc0000001, 0, edx, 9, ace2_en , ACE v2 enabled
+0xc0000001, 0, edx, 10, phe , PadLock Hash Engine
+0xc0000001, 0, edx, 11, phe_en , PHE enabled
+0xc0000001, 0, edx, 12, pmm , PadLock Montgomery Multiplier
+0xc0000001, 0, edx, 13, pmm_en , PMM enabled
+0xc0000001, 0, edx, 16, parallax , Parallax auto adjust processor voltage
+0xc0000001, 0, edx, 17, parallax_en , Parallax enabled
+0xc0000001, 0, edx, 20, tm3 , Thermal Monitor v3
+0xc0000001, 0, edx, 21, tm3_en , TM v3 enabled
+0xc0000001, 0, edx, 25, phe2 , PadLock Hash Engine v2 (SHA384/SHA512)
+0xc0000001, 0, edx, 26, phe2_en , PHE v2 enabled
+0xc0000001, 0, edx, 27, rsa , RSA instructions (XMODEXP/MONTMUL2)
+0xc0000001, 0, edx, 28, rsa_en , RSA instructions enabled
diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c
index 1b25c0a95d3f..7dc6b9235d02 100644
--- a/tools/arch/x86/kcpuid/kcpuid.c
+++ b/tools/arch/x86/kcpuid/kcpuid.c
@@ -1,14 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
-#include <stdio.h>
+#include <cpuid.h>
+#include <err.h>
+#include <getopt.h>
#include <stdbool.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <getopt.h>
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define min(a, b) (((a) < (b)) ? (a) : (b))
+#define __noreturn __attribute__((__noreturn__))
typedef unsigned int u32;
typedef unsigned long long u64;
@@ -49,7 +52,7 @@ static const char * const reg_names[] = {
struct subleaf {
u32 index;
u32 sub;
- u32 eax, ebx, ecx, edx;
+ u32 output[NR_REGS];
struct reg_desc info[NR_REGS];
};
@@ -63,21 +66,64 @@ struct cpuid_func {
int nr;
};
+enum range_index {
+ RANGE_STD = 0, /* Standard */
+ RANGE_EXT = 0x80000000, /* Extended */
+ RANGE_TSM = 0x80860000, /* Transmeta */
+ RANGE_CTR = 0xc0000000, /* Centaur/Zhaoxin */
+};
+
+#define CPUID_INDEX_MASK 0xffff0000
+#define CPUID_FUNCTION_MASK (~CPUID_INDEX_MASK)
+
struct cpuid_range {
/* array of main leafs */
struct cpuid_func *funcs;
/* number of valid leafs */
int nr;
- bool is_ext;
+ enum range_index index;
};
-/*
- * basic: basic functions range: [0... ]
- * ext: extended functions range: [0x80000000... ]
- */
-struct cpuid_range *leafs_basic, *leafs_ext;
+static struct cpuid_range ranges[] = {
+ { .index = RANGE_STD, },
+ { .index = RANGE_EXT, },
+ { .index = RANGE_TSM, },
+ { .index = RANGE_CTR, },
+};
+
+static char *range_to_str(struct cpuid_range *range)
+{
+ switch (range->index) {
+ case RANGE_STD: return "Standard";
+ case RANGE_EXT: return "Extended";
+ case RANGE_TSM: return "Transmeta";
+ case RANGE_CTR: return "Centaur";
+ default: return NULL;
+ }
+}
+
+#define __for_each_cpuid_range(range, __condition) \
+ for (unsigned int i = 0; \
+ i < ARRAY_SIZE(ranges) && ((range) = &ranges[i]) && (__condition); \
+ i++)
+
+#define for_each_valid_cpuid_range(range) __for_each_cpuid_range(range, (range)->nr != 0)
+#define for_each_cpuid_range(range) __for_each_cpuid_range(range, true)
+
+struct cpuid_range *index_to_cpuid_range(u32 index)
+{
+ u32 func_idx = index & CPUID_FUNCTION_MASK;
+ u32 range_idx = index & CPUID_INDEX_MASK;
+ struct cpuid_range *range;
+
+ for_each_valid_cpuid_range(range) {
+ if (range->index == range_idx && (u32)range->nr > func_idx)
+ return range;
+ }
+
+ return NULL;
+}
-static bool is_amd;
static bool show_details;
static bool show_raw;
static bool show_flags_only = true;
@@ -85,16 +131,16 @@ static u32 user_index = 0xFFFFFFFF;
static u32 user_sub = 0xFFFFFFFF;
static int flines;
-static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
+/*
+ * Force using <cpuid.h> __cpuid_count() instead of __cpuid(). The
+ * latter leaves ECX uninitialized, which can break CPUID queries.
+ */
+
+#define cpuid(leaf, a, b, c, d) \
+ __cpuid_count(leaf, 0, a, b, c, d)
+
+#define cpuid_count(leaf, subleaf, a, b, c, d) \
+ __cpuid_count(leaf, subleaf, a, b, c, d)
static inline bool has_subleafs(u32 f)
{
@@ -117,11 +163,11 @@ static void leaf_print_raw(struct subleaf *leaf)
if (leaf->sub == 0)
printf("0x%08x: subleafs:\n", leaf->index);
- printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n",
- leaf->sub, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx);
+ printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->sub,
+ leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]);
} else {
- printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n",
- leaf->index, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx);
+ printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->index,
+ leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]);
}
}
@@ -140,19 +186,19 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf,
* Cut off vendor-prefix from CPUID function as we're using it as an
* index into ->funcs.
*/
- func = &range->funcs[f & 0xffff];
+ func = &range->funcs[f & CPUID_FUNCTION_MASK];
if (!func->leafs) {
func->leafs = malloc(sizeof(struct subleaf));
if (!func->leafs)
- perror("malloc func leaf");
+ err(EXIT_FAILURE, NULL);
func->nr = 1;
} else {
s = func->nr;
func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf));
if (!func->leafs)
- perror("realloc f->leafs");
+ err(EXIT_FAILURE, NULL);
func->nr++;
}
@@ -161,84 +207,73 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf,
leaf->index = f;
leaf->sub = subleaf;
- leaf->eax = a;
- leaf->ebx = b;
- leaf->ecx = c;
- leaf->edx = d;
+ leaf->output[R_EAX] = a;
+ leaf->output[R_EBX] = b;
+ leaf->output[R_ECX] = c;
+ leaf->output[R_EDX] = d;
return false;
}
static void raw_dump_range(struct cpuid_range *range)
{
- u32 f;
- int i;
-
- printf("%s Leafs :\n", range->is_ext ? "Extended" : "Basic");
+ printf("%s Leafs :\n", range_to_str(range));
printf("================\n");
- for (f = 0; (int)f < range->nr; f++) {
+ for (u32 f = 0; (int)f < range->nr; f++) {
struct cpuid_func *func = &range->funcs[f];
- u32 index = f;
-
- if (range->is_ext)
- index += 0x80000000;
/* Skip leaf without valid items */
if (!func->nr)
continue;
/* First item is the main leaf, followed by all subleafs */
- for (i = 0; i < func->nr; i++)
+ for (int i = 0; i < func->nr; i++)
leaf_print_raw(&func->leafs[i]);
}
}
#define MAX_SUBLEAF_NUM 64
-struct cpuid_range *setup_cpuid_range(u32 input_eax)
+#define MAX_RANGE_INDEX_OFFSET 0xff
+void setup_cpuid_range(struct cpuid_range *range)
{
- u32 max_func, idx_func, subleaf, max_subleaf;
- u32 eax, ebx, ecx, edx, f = input_eax;
- struct cpuid_range *range;
- bool allzero;
-
- eax = input_eax;
- ebx = ecx = edx = 0;
+ u32 max_func, range_funcs_sz;
+ u32 eax, ebx, ecx, edx;
- cpuid(&eax, &ebx, &ecx, &edx);
- max_func = eax;
- idx_func = (max_func & 0xffff) + 1;
+ cpuid(range->index, max_func, ebx, ecx, edx);
- range = malloc(sizeof(struct cpuid_range));
- if (!range)
- perror("malloc range");
+ /*
+ * If the CPUID range's maximum function value is garbage, then it
+ * is not recognized by this CPU. Set the range's number of valid
+ * leaves to zero so that for_each_valid_cpu_range() can ignore it.
+ */
+ if (max_func < range->index || max_func > (range->index + MAX_RANGE_INDEX_OFFSET)) {
+ range->nr = 0;
+ return;
+ }
- if (input_eax & 0x80000000)
- range->is_ext = true;
- else
- range->is_ext = false;
+ range->nr = (max_func & CPUID_FUNCTION_MASK) + 1;
+ range_funcs_sz = range->nr * sizeof(struct cpuid_func);
- range->funcs = malloc(sizeof(struct cpuid_func) * idx_func);
+ range->funcs = malloc(range_funcs_sz);
if (!range->funcs)
- perror("malloc range->funcs");
+ err(EXIT_FAILURE, NULL);
- range->nr = idx_func;
- memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func);
+ memset(range->funcs, 0, range_funcs_sz);
- for (; f <= max_func; f++) {
- eax = f;
- subleaf = ecx = 0;
+ for (u32 f = range->index; f <= max_func; f++) {
+ u32 max_subleaf = MAX_SUBLEAF_NUM;
+ bool allzero;
- cpuid(&eax, &ebx, &ecx, &edx);
- allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx);
+ cpuid(f, eax, ebx, ecx, edx);
+
+ allzero = cpuid_store(range, f, 0, eax, ebx, ecx, edx);
if (allzero)
continue;
if (!has_subleafs(f))
continue;
- max_subleaf = MAX_SUBLEAF_NUM;
-
/*
* Some can provide the exact number of subleafs,
* others have to be tried (0xf)
@@ -256,20 +291,15 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
if (f == 0x80000026)
max_subleaf = 5;
- for (subleaf = 1; subleaf < max_subleaf; subleaf++) {
- eax = f;
- ecx = subleaf;
+ for (u32 subleaf = 1; subleaf < max_subleaf; subleaf++) {
+ cpuid_count(f, subleaf, eax, ebx, ecx, edx);
- cpuid(&eax, &ebx, &ecx, &edx);
- allzero = cpuid_store(range, f, subleaf,
- eax, ebx, ecx, edx);
+ allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx);
if (allzero)
continue;
}
}
-
- return range;
}
/*
@@ -280,15 +310,13 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
* 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs
* 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3)
*/
-static int parse_line(char *line)
+static void parse_line(char *line)
{
char *str;
- int i;
struct cpuid_range *range;
struct cpuid_func *func;
struct subleaf *leaf;
u32 index;
- u32 sub;
char buffer[512];
char *buf;
/*
@@ -310,12 +338,12 @@ static int parse_line(char *line)
/* Skip comments and NULL line */
if (line[0] == '#' || line[0] == '\n')
- return 0;
+ return;
strncpy(buffer, line, 511);
buffer[511] = 0;
str = buffer;
- for (i = 0; i < 5; i++) {
+ for (int i = 0; i < 5; i++) {
tokens[i] = strtok(str, ",");
if (!tokens[i])
goto err_exit;
@@ -328,21 +356,19 @@ static int parse_line(char *line)
/* index/main-leaf */
index = strtoull(tokens[0], NULL, 0);
- if (index & 0x80000000)
- range = leafs_ext;
- else
- range = leafs_basic;
-
- index &= 0x7FFFFFFF;
- /* Skip line parsing for non-existing indexes */
- if ((int)index >= range->nr)
- return -1;
+ /*
+ * Skip line parsing if the index is not covered by known-valid
+ * CPUID ranges on this CPU.
+ */
+ range = index_to_cpuid_range(index);
+ if (!range)
+ return;
+ /* Skip line parsing if the index CPUID output is all zero */
+ index &= CPUID_FUNCTION_MASK;
func = &range->funcs[index];
-
- /* Return if the index has no valid item on this platform */
if (!func->nr)
- return 0;
+ return;
/* subleaf */
buf = tokens[1];
@@ -355,11 +381,11 @@ static int parse_line(char *line)
subleaf_start = strtoul(start, NULL, 0);
subleaf_end = min(subleaf_end, (u32)(func->nr - 1));
if (subleaf_start > subleaf_end)
- return 0;
+ return;
} else {
subleaf_start = subleaf_end;
if (subleaf_start > (u32)(func->nr - 1))
- return 0;
+ return;
}
/* register */
@@ -382,7 +408,7 @@ static int parse_line(char *line)
bit_end = strtoul(end, NULL, 0);
bit_start = (start) ? strtoul(start, NULL, 0) : bit_end;
- for (sub = subleaf_start; sub <= subleaf_end; sub++) {
+ for (u32 sub = subleaf_start; sub <= subleaf_end; sub++) {
leaf = &func->leafs[sub];
reg = &leaf->info[reg_index];
bdesc = &reg->descs[reg->nr++];
@@ -392,12 +418,11 @@ static int parse_line(char *line)
strcpy(bdesc->simp, strtok(tokens[4], " \t"));
strcpy(bdesc->detail, tokens[5]);
}
- return 0;
+ return;
err_exit:
- printf("Warning: wrong line format:\n");
- printf("\tline[%d]: %s\n", flines, line);
- return -1;
+ warnx("Wrong line format:\n"
+ "\tline[%d]: %s", flines, line);
}
/* Parse csv file, and construct the array of all leafs and subleafs */
@@ -418,10 +443,8 @@ static void parse_text(void)
file = fopen("./cpuid.csv", "r");
}
- if (!file) {
- printf("Fail to open '%s'\n", filename);
- return;
- }
+ if (!file)
+ err(EXIT_FAILURE, "%s", filename);
while (1) {
ret = getline(&line, &len, file);
@@ -436,21 +459,13 @@ static void parse_text(void)
fclose(file);
}
-
-/* Decode every eax/ebx/ecx/edx */
-static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg)
+static void show_reg(const struct reg_desc *rdesc, u32 value)
{
- struct bits_desc *bdesc;
- int start, end, i;
+ const struct bits_desc *bdesc;
+ int start, end;
u32 mask;
- if (!rdesc->nr) {
- if (show_details)
- printf("\t %s: 0x%08x\n", reg_names[reg], value);
- return;
- }
-
- for (i = 0; i < rdesc->nr; i++) {
+ for (int i = 0; i < rdesc->nr; i++) {
bdesc = &rdesc->descs[i];
start = bdesc->start;
@@ -480,23 +495,21 @@ static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg)
}
}
-static void show_leaf(struct subleaf *leaf)
+static void show_reg_header(bool has_entries, u32 leaf, u32 subleaf, const char *reg_name)
{
- if (!leaf)
- return;
+ if (show_details && has_entries)
+ printf("CPUID_0x%x_%s[0x%x]:\n", leaf, reg_name, subleaf);
+}
- if (show_raw) {
+static void show_leaf(struct subleaf *leaf)
+{
+ if (show_raw)
leaf_print_raw(leaf);
- } else {
- if (show_details)
- printf("CPUID_0x%x_ECX[0x%x]:\n",
- leaf->index, leaf->sub);
- }
- decode_bits(leaf->eax, &leaf->info[R_EAX], R_EAX);
- decode_bits(leaf->ebx, &leaf->info[R_EBX], R_EBX);
- decode_bits(leaf->ecx, &leaf->info[R_ECX], R_ECX);
- decode_bits(leaf->edx, &leaf->info[R_EDX], R_EDX);
+ for (int i = R_EAX; i < NR_REGS; i++) {
+ show_reg_header((leaf->info[i].nr > 0), leaf->index, leaf->sub, reg_names[i]);
+ show_reg(&leaf->info[i], leaf->output[i]);
+ }
if (!show_raw && show_details)
printf("\n");
@@ -504,46 +517,37 @@ static void show_leaf(struct subleaf *leaf)
static void show_func(struct cpuid_func *func)
{
- int i;
-
- if (!func)
- return;
-
- for (i = 0; i < func->nr; i++)
+ for (int i = 0; i < func->nr; i++)
show_leaf(&func->leafs[i]);
}
static void show_range(struct cpuid_range *range)
{
- int i;
-
- for (i = 0; i < range->nr; i++)
+ for (int i = 0; i < range->nr; i++)
show_func(&range->funcs[i]);
}
static inline struct cpuid_func *index_to_func(u32 index)
{
+ u32 func_idx = index & CPUID_FUNCTION_MASK;
struct cpuid_range *range;
- u32 func_idx;
-
- range = (index & 0x80000000) ? leafs_ext : leafs_basic;
- func_idx = index & 0xffff;
- if ((func_idx + 1) > (u32)range->nr) {
- printf("ERR: invalid input index (0x%x)\n", index);
+ range = index_to_cpuid_range(index);
+ if (!range)
return NULL;
- }
+
return &range->funcs[func_idx];
}
static void show_info(void)
{
+ struct cpuid_range *range;
struct cpuid_func *func;
if (show_raw) {
/* Show all of the raw output of 'cpuid' instr */
- raw_dump_range(leafs_basic);
- raw_dump_range(leafs_ext);
+ for_each_valid_cpuid_range(range)
+ raw_dump_range(range);
return;
}
@@ -551,18 +555,19 @@ static void show_info(void)
/* Only show specific leaf/subleaf info */
func = index_to_func(user_index);
if (!func)
- return;
+ errx(EXIT_FAILURE, "Invalid input leaf (0x%x)", user_index);
/* Dump the raw data also */
show_raw = true;
if (user_sub != 0xFFFFFFFF) {
- if (user_sub + 1 <= (u32)func->nr) {
- show_leaf(&func->leafs[user_sub]);
- return;
+ if (user_sub + 1 > (u32)func->nr) {
+ errx(EXIT_FAILURE, "Leaf 0x%x has no valid subleaf = 0x%x",
+ user_index, user_sub);
}
- printf("ERR: invalid input subleaf (0x%x)\n", user_sub);
+ show_leaf(&func->leafs[user_sub]);
+ return;
}
show_func(func);
@@ -570,38 +575,21 @@ static void show_info(void)
}
printf("CPU features:\n=============\n\n");
- show_range(leafs_basic);
- show_range(leafs_ext);
+ for_each_valid_cpuid_range(range)
+ show_range(range);
}
-static void setup_platform_cpuid(void)
+static void __noreturn usage(int exit_code)
{
- u32 eax, ebx, ecx, edx;
-
- /* Check vendor */
- eax = ebx = ecx = edx = 0;
- cpuid(&eax, &ebx, &ecx, &edx);
-
- /* "htuA" */
- if (ebx == 0x68747541)
- is_amd = true;
-
- /* Setup leafs for the basic and extended range */
- leafs_basic = setup_cpuid_range(0x0);
- leafs_ext = setup_cpuid_range(0x80000000);
-}
-
-static void usage(void)
-{
- printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n"
- "\t-a|--all Show both bit flags and complex bit fields info\n"
- "\t-b|--bitflags Show boolean flags only\n"
- "\t-d|--detail Show details of the flag/fields (default)\n"
- "\t-f|--flags Specify the cpuid csv file\n"
- "\t-h|--help Show usage info\n"
- "\t-l|--leaf=index Specify the leaf you want to check\n"
- "\t-r|--raw Show raw cpuid data\n"
- "\t-s|--subleaf=sub Specify the subleaf you want to check\n"
+ errx(exit_code, "kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n"
+ "\t-a|--all Show both bit flags and complex bit fields info\n"
+ "\t-b|--bitflags Show boolean flags only\n"
+ "\t-d|--detail Show details of the flag/fields (default)\n"
+ "\t-f|--flags Specify the CPUID CSV file\n"
+ "\t-h|--help Show usage info\n"
+ "\t-l|--leaf=index Specify the leaf you want to check\n"
+ "\t-r|--raw Show raw CPUID data\n"
+ "\t-s|--subleaf=sub Specify the subleaf you want to check"
);
}
@@ -617,7 +605,7 @@ static struct option opts[] = {
{ NULL, 0, NULL, 0 }
};
-static int parse_options(int argc, char *argv[])
+static void parse_options(int argc, char *argv[])
{
int c;
@@ -637,9 +625,7 @@ static int parse_options(int argc, char *argv[])
user_csv = optarg;
break;
case 'h':
- usage();
- exit(1);
- break;
+ usage(EXIT_SUCCESS);
case 'l':
/* main leaf */
user_index = strtoul(optarg, NULL, 0);
@@ -652,11 +638,8 @@ static int parse_options(int argc, char *argv[])
user_sub = strtoul(optarg, NULL, 0);
break;
default:
- printf("%s: Invalid option '%c'\n", argv[0], optopt);
- return -1;
- }
-
- return 0;
+ usage(EXIT_FAILURE);
+ }
}
/*
@@ -669,11 +652,13 @@ static int parse_options(int argc, char *argv[])
*/
int main(int argc, char *argv[])
{
- if (parse_options(argc, argv))
- return -1;
+ struct cpuid_range *range;
+
+ parse_options(argc, argv);
/* Setup the cpuid leafs of current platform */
- setup_platform_cpuid();
+ for_each_cpuid_range(range)
+ setup_cpuid_range(range);
/* Read and parse the 'cpuid.csv' */
parse_text();
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index e91d4c4e1c16..bce69c6bfa69 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -324,6 +324,11 @@ int insn_get_opcode(struct insn *insn)
}
insn->attr = inat_get_opcode_attribute(op);
+ if (insn->x86_64 && inat_is_invalid64(insn->attr)) {
+ /* This instruction is invalid, like UD2. Stop decoding. */
+ insn->attr &= INAT_INV64;
+ }
+
while (inat_is_escape(insn->attr)) {
/* Get escaped opcode */
op = get_next(insn_byte_t, insn);
@@ -337,6 +342,7 @@ int insn_get_opcode(struct insn *insn)
insn->attr = 0;
return -EINVAL;
}
+
end:
opcode->got = 1;
return 0;
@@ -658,7 +664,6 @@ int insn_get_immediate(struct insn *insn)
}
if (!inat_has_immediate(insn->attr))
- /* no immediates */
goto done;
switch (inat_immediate_size(insn->attr)) {
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index f5dd84eb55dc..262f7ca1fb95 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -35,7 +35,7 @@
# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
#
-# REX2 Prefix
+# REX2 Prefix Superscripts
# - (!REX2): REX2 is not allowed
# - (REX2): REX2 variant e.g. JMPABS
@@ -147,7 +147,7 @@ AVXcode:
# 0x60 - 0x6f
60: PUSHA/PUSHAD (i64)
61: POPA/POPAD (i64)
-62: BOUND Gv,Ma (i64) | EVEX (Prefix)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix),(o64)
63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
64: SEG=FS (Prefix)
65: SEG=GS (Prefix)
@@ -253,8 +253,8 @@ c0: Grp2 Eb,Ib (1A)
c1: Grp2 Ev,Ib (1A)
c2: RETN Iw (f64)
c3: RETN
-c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
-c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix),(o64)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix),(o64)
c6: Grp11A Eb,Ib (1A)
c7: Grp11B Ev,Iz (1A)
c8: ENTER Iw,Ib
@@ -286,10 +286,10 @@ df: ESC
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
-e0: LOOPNE/LOOPNZ Jb (f64) (!REX2)
-e1: LOOPE/LOOPZ Jb (f64) (!REX2)
-e2: LOOP Jb (f64) (!REX2)
-e3: JrCXZ Jb (f64) (!REX2)
+e0: LOOPNE/LOOPNZ Jb (f64),(!REX2)
+e1: LOOPE/LOOPZ Jb (f64),(!REX2)
+e2: LOOP Jb (f64),(!REX2)
+e3: JrCXZ Jb (f64),(!REX2)
e4: IN AL,Ib (!REX2)
e5: IN eAX,Ib (!REX2)
e6: OUT Ib,AL (!REX2)
@@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2)
# in "near" jumps and calls is 16-bit. For CALL,
# push of return address is 16-bit wide, RSP is decremented by 2
# but is not truncated to 16 bits, unlike RIP.
-e8: CALL Jz (f64) (!REX2)
-e9: JMP-near Jz (f64) (!REX2)
-ea: JMP-far Ap (i64) (!REX2)
-eb: JMP-short Jb (f64) (!REX2)
+e8: CALL Jz (f64),(!REX2)
+e9: JMP-near Jz (f64),(!REX2)
+ea: JMP-far Ap (i64),(!REX2)
+eb: JMP-short Jb (f64),(!REX2)
ec: IN AL,DX (!REX2)
ed: IN eAX,DX (!REX2)
ee: OUT DX,AL (!REX2)
@@ -478,22 +478,22 @@ AVXcode: 1
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
# 0x0f 0x80-0x8f
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
-80: JO Jz (f64) (!REX2)
-81: JNO Jz (f64) (!REX2)
-82: JB/JC/JNAE Jz (f64) (!REX2)
-83: JAE/JNB/JNC Jz (f64) (!REX2)
-84: JE/JZ Jz (f64) (!REX2)
-85: JNE/JNZ Jz (f64) (!REX2)
-86: JBE/JNA Jz (f64) (!REX2)
-87: JA/JNBE Jz (f64) (!REX2)
-88: JS Jz (f64) (!REX2)
-89: JNS Jz (f64) (!REX2)
-8a: JP/JPE Jz (f64) (!REX2)
-8b: JNP/JPO Jz (f64) (!REX2)
-8c: JL/JNGE Jz (f64) (!REX2)
-8d: JNL/JGE Jz (f64) (!REX2)
-8e: JLE/JNG Jz (f64) (!REX2)
-8f: JNLE/JG Jz (f64) (!REX2)
+80: JO Jz (f64),(!REX2)
+81: JNO Jz (f64),(!REX2)
+82: JB/JC/JNAE Jz (f64),(!REX2)
+83: JAE/JNB/JNC Jz (f64),(!REX2)
+84: JE/JZ Jz (f64),(!REX2)
+85: JNE/JNZ Jz (f64),(!REX2)
+86: JBE/JNA Jz (f64),(!REX2)
+87: JA/JNBE Jz (f64),(!REX2)
+88: JS Jz (f64),(!REX2)
+89: JNS Jz (f64),(!REX2)
+8a: JP/JPE Jz (f64),(!REX2)
+8b: JNP/JPO Jz (f64),(!REX2)
+8c: JL/JNGE Jz (f64),(!REX2)
+8d: JNL/JGE Jz (f64),(!REX2)
+8e: JLE/JNG Jz (f64),(!REX2)
+8f: JNLE/JG Jz (f64),(!REX2)
# 0x0f 0x90-0x9f
90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk
index 5770c8097f32..2c19d7fc8a85 100644
--- a/tools/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk
@@ -64,6 +64,8 @@ BEGIN {
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
+ invalid64_expr = "\\(i64\\)"
+ only64_expr = "\\(o64\\)"
rex_expr = "^((REX(\\.[XRWB]+)+)|(REX$))"
rex2_expr = "\\(REX2\\)"
no_rex2_expr = "\\(!REX2\\)"
@@ -319,6 +321,11 @@ function convert_operands(count,opnd, i,j,imm,mod)
if (match(ext, force64_expr))
flags = add_flags(flags, "INAT_FORCE64")
+ # check invalid in 64-bit (and no only64)
+ if (match(ext, invalid64_expr) &&
+ !match($0, only64_expr))
+ flags = add_flags(flags, "INAT_INV64")
+
# check REX2 not allowed
if (match(ext, no_rex2_expr))
flags = add_flags(flags, "INAT_NO_REX2")
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index f9702877ac21..c335ce0bd195 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -30,18 +30,41 @@ all_files := \
crt.h \
ctype.h \
dirent.h \
+ elf.h \
errno.h \
+ fcntl.h \
+ getopt.h \
limits.h \
+ math.h \
nolibc.h \
+ poll.h \
+ sched.h \
signal.h \
stackprotector.h \
std.h \
stdarg.h \
stdbool.h \
+ stddef.h \
stdint.h \
stdlib.h \
string.h \
sys.h \
+ sys/auxv.h \
+ sys/ioctl.h \
+ sys/mman.h \
+ sys/mount.h \
+ sys/prctl.h \
+ sys/random.h \
+ sys/reboot.h \
+ sys/resource.h \
+ sys/stat.h \
+ sys/syscall.h \
+ sys/sysmacros.h \
+ sys/time.h \
+ sys/timerfd.h \
+ sys/types.h \
+ sys/utsname.h \
+ sys/wait.h \
time.h \
types.h \
unistd.h \
@@ -72,7 +95,7 @@ help:
headers:
$(Q)mkdir -p $(OUTPUT)sysroot
$(Q)mkdir -p $(OUTPUT)sysroot/include
- $(Q)cp $(all_files) $(OUTPUT)sysroot/include/
+ $(Q)cp --parents $(all_files) $(OUTPUT)sysroot/include/
$(Q)if [ "$(ARCH)" = "x86" ]; then \
sed -e \
's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \
@@ -91,5 +114,14 @@ headers_standalone: headers
$(Q)$(MAKE) -C $(srctree) headers
$(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot
+# GCC uses "s390", clang "systemz"
+CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS))
+
+headers_check: headers_standalone
+ for header in $(filter-out crt.h std.h,$(all_files)); do \
+ $(CC) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \
+ -I$(or $(objtree),$(srctree))/usr/include -include $$header -include $$header || exit 1; \
+ done
+
clean:
$(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot"
diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
index 06fdef7b291a..937a348da42e 100644
--- a/tools/include/nolibc/arch-aarch64.h
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -146,7 +146,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
{
__asm__ volatile (
"mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */
- "and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */
"bl _start_c\n" /* transfer to c runtime */
);
__nolibc_entrypoint_epilogue();
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index 6180ff99ab43..1f66e7e5a444 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -189,8 +189,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
{
__asm__ volatile (
"mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */
- "and ip, r0, #-8\n" /* sp must be 8-byte aligned in the callee */
- "mov sp, ip\n"
"bl _start_c\n" /* transfer to c runtime */
);
__nolibc_entrypoint_epilogue();
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
index ff5afc35bbd8..7c9b38e96418 100644
--- a/tools/include/nolibc/arch-i386.h
+++ b/tools/include/nolibc/arch-i386.h
@@ -167,8 +167,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
__asm__ volatile (
"xor %ebp, %ebp\n" /* zero the stack frame */
"mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */
- "add $12, %esp\n" /* avoid over-estimating after the 'and' & 'sub' below */
- "and $-16, %esp\n" /* the %esp must be 16-byte aligned on 'call' */
"sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */
"push %eax\n" /* push arg1 on stack to support plain stack modes too */
"call _start_c\n" /* transfer to c runtime */
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index fb519545959e..5511705303ea 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -142,18 +142,11 @@
_arg1; \
})
-#if __loongarch_grlen == 32
-#define LONG_BSTRINS "bstrins.w"
-#else /* __loongarch_grlen == 64 */
-#define LONG_BSTRINS "bstrins.d"
-#endif
-
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
"move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */
- LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */
"bl _start_c\n" /* transfer to c runtime */
);
__nolibc_entrypoint_epilogue();
diff --git a/tools/include/nolibc/arch-m68k.h b/tools/include/nolibc/arch-m68k.h
new file mode 100644
index 000000000000..6dac1845f298
--- /dev/null
+++ b/tools/include/nolibc/arch-m68k.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * m68k specific definitions for NOLIBC
+ * Copyright (C) 2025 Daniel Palmer<daniel@thingy.jp>
+ *
+ * Roughly based on one or more of the other arch files.
+ *
+ */
+
+#ifndef _NOLIBC_ARCH_M68K_H
+#define _NOLIBC_ARCH_M68K_H
+
+#include "compiler.h"
+#include "crt.h"
+
+#define _NOLIBC_SYSCALL_CLOBBERLIST "memory"
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_num) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_arg1) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_arg1), "r"(_arg2) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ register long _arg4 __asm__ ("d4") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r" (_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ register long _arg4 __asm__ ("d4") = (long)(arg4); \
+ register long _arg5 __asm__ ("d5") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r" (_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ register long _arg4 __asm__ ("d4") = (long)(arg4); \
+ register long _arg5 __asm__ ("d5") = (long)(arg5); \
+ register long _arg6 __asm__ ("a0") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r" (_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+void _start(void);
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ "movel %sp, %sp@-\n"
+ "jsr _start_c\n"
+ );
+ __nolibc_entrypoint_epilogue();
+}
+
+#endif /* _NOLIBC_ARCH_M68K_H */
diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h
index ee2fdb8d601d..204564bbcd32 100644
--- a/tools/include/nolibc/arch-powerpc.h
+++ b/tools/include/nolibc/arch-powerpc.h
@@ -201,7 +201,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
__asm__ volatile (
"mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */
- "clrrdi 1, 1, 4\n" /* align the stack to 16 bytes */
"li 0, 0\n" /* zero the frame pointer */
"stdu 1, -32(1)\n" /* the initial stack frame */
"bl _start_c\n" /* transfer to c runtime */
@@ -209,7 +208,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
#else
__asm__ volatile (
"mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */
- "clrrwi 1, 1, 4\n" /* align the stack to 16 bytes */
"li 0, 0\n" /* zero the frame pointer */
"stwu 1, -16(1)\n" /* the initial stack frame */
"bl _start_c\n" /* transfer to c runtime */
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index 8827bf936212..885383a86c38 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -148,7 +148,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
"lla gp, __global_pointer$\n"
".option pop\n"
"mv a0, sp\n" /* save stack pointer to a0, as arg1 of _start_c */
- "andi sp, a0, -16\n" /* sp must be 16-byte aligned */
"call _start_c\n" /* transfer to c runtime */
);
__nolibc_entrypoint_epilogue();
diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h
new file mode 100644
index 000000000000..1435172f3dfe
--- /dev/null
+++ b/tools/include/nolibc/arch-sparc.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * SPARC (32bit and 64bit) specific definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_ARCH_SPARC_H
+#define _NOLIBC_ARCH_SPARC_H
+
+#include <linux/unistd.h>
+
+#include "compiler.h"
+#include "crt.h"
+
+/*
+ * Syscalls for SPARC:
+ * - registers are native word size
+ * - syscall number is passed in g1
+ * - arguments are in o0-o5
+ * - the system call is performed by calling a trap instruction
+ * - syscall return value is in o0
+ * - syscall error flag is in the carry bit of the processor status register
+ */
+
+#ifdef __arch64__
+
+#define _NOLIBC_SYSCALL "t 0x6d\n" \
+ "bcs,a %%xcc, 1f\n" \
+ "sub %%g0, %%o0, %%o0\n" \
+ "1:\n"
+
+#else
+
+#define _NOLIBC_SYSCALL "t 0x10\n" \
+ "bcs,a 1f\n" \
+ "sub %%g0, %%o0, %%o0\n" \
+ "1:\n"
+
+#endif /* __arch64__ */
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0"); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ register long _arg4 __asm__ ("o3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ register long _arg4 __asm__ ("o3") = (long)(arg4); \
+ register long _arg5 __asm__ ("o4") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ register long _arg4 __asm__ ("o3") = (long)(arg4); \
+ register long _arg5 __asm__ ("o4") = (long)(arg5); \
+ register long _arg6 __asm__ ("o5") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+/* startup code */
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ /*
+ * Save argc pointer to o0, as arg1 of _start_c.
+ * Account for the window save area, which is 16 registers wide.
+ */
+#ifdef __arch64__
+ "add %sp, 128 + 2047, %o0\n" /* on sparc64 / v9 the stack is offset by 2047 */
+#else
+ "add %sp, 64, %o0\n"
+#endif
+ "b,a _start_c\n" /* transfer to c runtime */
+ );
+ __nolibc_entrypoint_epilogue();
+}
+
+static pid_t getpid(void);
+
+static __attribute__((unused))
+pid_t sys_fork(void)
+{
+ pid_t parent, ret;
+
+ parent = getpid();
+ ret = my_syscall0(__NR_fork);
+
+ /* The syscall returns the parent pid in the child instead of 0 */
+ if (ret == parent)
+ return 0;
+ else
+ return ret;
+}
+#define sys_fork sys_fork
+
+#endif /* _NOLIBC_ARCH_SPARC_H */
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
index 1e40620a2b33..67305e24dbef 100644
--- a/tools/include/nolibc/arch-x86_64.h
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -166,7 +166,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
__asm__ volatile (
"xor %ebp, %ebp\n" /* zero the stack frame */
"mov %rsp, %rdi\n" /* save stack pointer to %rdi, as arg1 of _start_c */
- "and $-16, %rsp\n" /* %rsp must be 16-byte aligned before call */
"call _start_c\n" /* transfer to c runtime */
"hlt\n" /* ensure it does not return */
);
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index 8a2c143c0fba..d20b2304aac2 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -33,6 +33,10 @@
#include "arch-s390.h"
#elif defined(__loongarch__)
#include "arch-loongarch.h"
+#elif defined(__sparc__)
+#include "arch-sparc.h"
+#elif defined(__m68k__)
+#include "arch-m68k.h"
#else
#error Unsupported Architecture
#endif
diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h
index fa1f547e7f13..369cfb5a0e78 100644
--- a/tools/include/nolibc/compiler.h
+++ b/tools/include/nolibc/compiler.h
@@ -12,6 +12,15 @@
# define __nolibc_has_attribute(attr) 0
#endif
+#if defined(__has_feature)
+# define __nolibc_has_feature(feature) __has_feature(feature)
+#else
+# define __nolibc_has_feature(feature) 0
+#endif
+
+#define __nolibc_aligned(alignment) __attribute__((aligned(alignment)))
+#define __nolibc_aligned_as(type) __nolibc_aligned(__alignof__(type))
+
#if __nolibc_has_attribute(naked)
# define __nolibc_entrypoint __attribute__((naked))
# define __nolibc_entrypoint_epilogue()
diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index c4b10103bbec..961cfe777c35 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -7,6 +7,8 @@
#ifndef _NOLIBC_CRT_H
#define _NOLIBC_CRT_H
+#include "compiler.h"
+
char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
@@ -25,6 +27,9 @@ extern void (*const __fini_array_end[])(void) __attribute__((weak));
void _start_c(long *sp);
__attribute__((weak,used))
+#if __nolibc_has_feature(undefined_behavior_sanitizer)
+ __attribute__((no_sanitize("function")))
+#endif
void _start_c(long *sp)
{
long argc;
diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h
index 6f90706d0644..470fdf34394a 100644
--- a/tools/include/nolibc/ctype.h
+++ b/tools/include/nolibc/ctype.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_CTYPE_H
#define _NOLIBC_CTYPE_H
@@ -96,7 +99,4 @@ int ispunct(int c)
return isgraph(c) && !isalnum(c);
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_CTYPE_H */
diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h
index c5c30d0dd680..758b95c48e7a 100644
--- a/tools/include/nolibc/dirent.h
+++ b/tools/include/nolibc/dirent.h
@@ -4,11 +4,16 @@
* Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_DIRENT_H
#define _NOLIBC_DIRENT_H
+#include "compiler.h"
#include "stdint.h"
#include "types.h"
+#include "fcntl.h"
#include <linux/limits.h>
@@ -58,7 +63,7 @@ int closedir(DIR *dirp)
static __attribute__((unused))
int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
{
- char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1];
+ char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1] __nolibc_aligned_as(struct linux_dirent64);
struct linux_dirent64 *ldir = (void *)buf;
intptr_t i = (intptr_t)dirp;
int fd, ret;
@@ -92,7 +97,4 @@ int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
return 0;
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_DIRENT_H */
diff --git a/tools/include/nolibc/elf.h b/tools/include/nolibc/elf.h
new file mode 100644
index 000000000000..3e2c5228bf3d
--- /dev/null
+++ b/tools/include/nolibc/elf.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Shim elf.h header for NOLIBC.
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_SYS_ELF_H
+#define _NOLIBC_SYS_ELF_H
+
+#include <linux/elf.h>
+
+#endif /* _NOLIBC_SYS_ELF_H */
diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h
index 1d8d8033e8ff..08a33c40ec0c 100644
--- a/tools/include/nolibc/errno.h
+++ b/tools/include/nolibc/errno.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_ERRNO_H
#define _NOLIBC_ERRNO_H
@@ -22,7 +25,4 @@ int errno __attribute__((weak));
*/
#define MAX_ERRNO 4095
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_ERRNO_H */
diff --git a/tools/include/nolibc/fcntl.h b/tools/include/nolibc/fcntl.h
new file mode 100644
index 000000000000..bff2e542f20f
--- /dev/null
+++ b/tools/include/nolibc/fcntl.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * fcntl definition for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_FCNTL_H
+#define _NOLIBC_FCNTL_H
+
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+/*
+ * int openat(int dirfd, const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_openat(int dirfd, const char *path, int flags, mode_t mode)
+{
+ return my_syscall4(__NR_openat, dirfd, path, flags, mode);
+}
+
+static __attribute__((unused))
+int openat(int dirfd, const char *path, int flags, ...)
+{
+ mode_t mode = 0;
+
+ if (flags & O_CREAT) {
+ va_list args;
+
+ va_start(args, flags);
+ mode = va_arg(args, mode_t);
+ va_end(args);
+ }
+
+ return __sysret(sys_openat(dirfd, path, flags, mode));
+}
+
+/*
+ * int open(const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_open(const char *path, int flags, mode_t mode)
+{
+ return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
+}
+
+static __attribute__((unused))
+int open(const char *path, int flags, ...)
+{
+ mode_t mode = 0;
+
+ if (flags & O_CREAT) {
+ va_list args;
+
+ va_start(args, flags);
+ mode = va_arg(args, mode_t);
+ va_end(args);
+ }
+
+ return __sysret(sys_open(path, flags, mode));
+}
+
+#endif /* _NOLIBC_FCNTL_H */
diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h
new file mode 100644
index 000000000000..217abb95264b
--- /dev/null
+++ b/tools/include/nolibc/getopt.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * getopt function definitions for NOLIBC, adapted from musl libc
+ * Copyright (C) 2005-2020 Rich Felker, et al.
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_GETOPT_H
+#define _NOLIBC_GETOPT_H
+
+struct FILE;
+static struct FILE *const stderr;
+static int fprintf(struct FILE *stream, const char *fmt, ...);
+
+__attribute__((weak,unused,section(".data.nolibc_getopt")))
+char *optarg;
+
+__attribute__((weak,unused,section(".data.nolibc_getopt")))
+int optind = 1, opterr = 1, optopt;
+
+static __attribute__((unused))
+int getopt(int argc, char * const argv[], const char *optstring)
+{
+ static int __optpos;
+ int i;
+ char c, d;
+ char *optchar;
+
+ if (!optind) {
+ __optpos = 0;
+ optind = 1;
+ }
+
+ if (optind >= argc || !argv[optind])
+ return -1;
+
+ if (argv[optind][0] != '-') {
+ if (optstring[0] == '-') {
+ optarg = argv[optind++];
+ return 1;
+ }
+ return -1;
+ }
+
+ if (!argv[optind][1])
+ return -1;
+
+ if (argv[optind][1] == '-' && !argv[optind][2])
+ return optind++, -1;
+
+ if (!__optpos)
+ __optpos++;
+ c = argv[optind][__optpos];
+ optchar = argv[optind] + __optpos;
+ __optpos++;
+
+ if (!argv[optind][__optpos]) {
+ optind++;
+ __optpos = 0;
+ }
+
+ if (optstring[0] == '-' || optstring[0] == '+')
+ optstring++;
+
+ i = 0;
+ d = 0;
+ do {
+ d = optstring[i++];
+ } while (d && d != c);
+
+ if (d != c || c == ':') {
+ optopt = c;
+ if (optstring[0] != ':' && opterr)
+ fprintf(stderr, "%s: unrecognized option: %c\n", argv[0], *optchar);
+ return '?';
+ }
+ if (optstring[i] == ':') {
+ optarg = 0;
+ if (optstring[i + 1] != ':' || __optpos) {
+ optarg = argv[optind++];
+ if (__optpos)
+ optarg += __optpos;
+ __optpos = 0;
+ }
+ if (optind > argc) {
+ optopt = c;
+ if (optstring[0] == ':')
+ return ':';
+ if (opterr)
+ fprintf(stderr, "%s: option requires argument: %c\n",
+ argv[0], *optchar);
+ return '?';
+ }
+ }
+ return c;
+}
+
+#endif /* _NOLIBC_GETOPT_H */
diff --git a/tools/include/nolibc/math.h b/tools/include/nolibc/math.h
new file mode 100644
index 000000000000..9df823ddd412
--- /dev/null
+++ b/tools/include/nolibc/math.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * math definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_SYS_MATH_H
+#define _NOLIBC_SYS_MATH_H
+
+static __inline__
+double fabs(double x)
+{
+ return x >= 0 ? x : -x;
+}
+
+static __inline__
+float fabsf(float x)
+{
+ return x >= 0 ? x : -x;
+}
+
+static __inline__
+long double fabsl(long double x)
+{
+ return x >= 0 ? x : -x;
+}
+
+#endif /* _NOLIBC_SYS_MATH_H */
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 70872401aca8..c199ade200c2 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -96,7 +96,24 @@
#include "arch.h"
#include "types.h"
#include "sys.h"
+#include "sys/auxv.h"
+#include "sys/ioctl.h"
+#include "sys/mman.h"
+#include "sys/mount.h"
+#include "sys/prctl.h"
+#include "sys/random.h"
+#include "sys/reboot.h"
+#include "sys/resource.h"
+#include "sys/stat.h"
+#include "sys/syscall.h"
+#include "sys/sysmacros.h"
+#include "sys/time.h"
+#include "sys/timerfd.h"
+#include "sys/utsname.h"
+#include "sys/wait.h"
#include "ctype.h"
+#include "elf.h"
+#include "sched.h"
#include "signal.h"
#include "unistd.h"
#include "stdio.h"
@@ -105,6 +122,10 @@
#include "time.h"
#include "stackprotector.h"
#include "dirent.h"
+#include "fcntl.h"
+#include "getopt.h"
+#include "poll.h"
+#include "math.h"
/* Used by programs to avoid std includes */
#define NOLIBC
diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h
new file mode 100644
index 000000000000..1765acb17ea0
--- /dev/null
+++ b/tools/include/nolibc/poll.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * poll definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_POLL_H
+#define _NOLIBC_POLL_H
+
+#include "arch.h"
+#include "sys.h"
+
+#include <linux/poll.h>
+#include <linux/time.h>
+
+/*
+ * int poll(struct pollfd *fds, int nfds, int timeout);
+ */
+
+static __attribute__((unused))
+int sys_poll(struct pollfd *fds, int nfds, int timeout)
+{
+#if defined(__NR_ppoll)
+ struct timespec t;
+
+ if (timeout >= 0) {
+ t.tv_sec = timeout / 1000;
+ t.tv_nsec = (timeout % 1000) * 1000000;
+ }
+ return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0);
+#elif defined(__NR_ppoll_time64)
+ struct __kernel_timespec t;
+
+ if (timeout >= 0) {
+ t.tv_sec = timeout / 1000;
+ t.tv_nsec = (timeout % 1000) * 1000000;
+ }
+ return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0);
+#elif defined(__NR_poll)
+ return my_syscall3(__NR_poll, fds, nfds, timeout);
+#else
+ return __nolibc_enosys(__func__, fds, nfds, timeout);
+#endif
+}
+
+static __attribute__((unused))
+int poll(struct pollfd *fds, int nfds, int timeout)
+{
+ return __sysret(sys_poll(fds, nfds, timeout));
+}
+
+#endif /* _NOLIBC_POLL_H */
diff --git a/tools/include/nolibc/sched.h b/tools/include/nolibc/sched.h
new file mode 100644
index 000000000000..32221562c166
--- /dev/null
+++ b/tools/include/nolibc/sched.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * sched function definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_SCHED_H
+#define _NOLIBC_SCHED_H
+
+#include "sys.h"
+
+#include <linux/sched.h>
+
+/*
+ * int setns(int fd, int nstype);
+ */
+
+static __attribute__((unused))
+int sys_setns(int fd, int nstype)
+{
+ return my_syscall2(__NR_setns, fd, nstype);
+}
+
+static __attribute__((unused))
+int setns(int fd, int nstype)
+{
+ return __sysret(sys_setns(fd, nstype));
+}
+
+
+/*
+ * int unshare(int flags);
+ */
+
+static __attribute__((unused))
+int sys_unshare(int flags)
+{
+ return my_syscall1(__NR_unshare, flags);
+}
+
+static __attribute__((unused))
+int unshare(int flags)
+{
+ return __sysret(sys_unshare(flags));
+}
+
+#endif /* _NOLIBC_SCHED_H */
diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h
index cdcc5904c51e..ac13e53ac31d 100644
--- a/tools/include/nolibc/signal.h
+++ b/tools/include/nolibc/signal.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_SIGNAL_H
#define _NOLIBC_SIGNAL_H
@@ -20,7 +23,4 @@ int raise(int signal)
return sys_kill(sys_getpid(), signal);
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_SIGNAL_H */
diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
index 933bc0be7e1c..adda7333d12e 100644
--- a/tools/include/nolibc/std.h
+++ b/tools/include/nolibc/std.h
@@ -13,12 +13,8 @@
* syscall-specific stuff, as this file is expected to be included very early.
*/
-/* note: may already be defined */
-#ifndef NULL
-#define NULL ((void *)0)
-#endif
-
#include "stdint.h"
+#include "stddef.h"
/* those are commonly provided by sys/types.h */
typedef unsigned int dev_t;
diff --git a/tools/include/nolibc/stddef.h b/tools/include/nolibc/stddef.h
new file mode 100644
index 000000000000..ecbd13eab1f5
--- /dev/null
+++ b/tools/include/nolibc/stddef.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Stddef definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_STDDEF_H
+#define _NOLIBC_STDDEF_H
+
+#include "stdint.h"
+
+/* note: may already be defined */
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+#ifndef offsetof
+#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
+#endif
+
+#endif /* _NOLIBC_STDDEF_H */
diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h
index cd79ddd6170e..b052ad6303c3 100644
--- a/tools/include/nolibc/stdint.h
+++ b/tools/include/nolibc/stdint.h
@@ -39,8 +39,8 @@ typedef size_t uint_fast32_t;
typedef int64_t int_fast64_t;
typedef uint64_t uint_fast64_t;
-typedef int64_t intmax_t;
-typedef uint64_t uintmax_t;
+typedef __INTMAX_TYPE__ intmax_t;
+typedef __UINTMAX_TYPE__ uintmax_t;
/* limits of integral types */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index a403351dbf60..c470d334ef3f 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -4,12 +4,16 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_STDIO_H
#define _NOLIBC_STDIO_H
#include "std.h"
#include "arch.h"
#include "errno.h"
+#include "fcntl.h"
#include "types.h"
#include "sys.h"
#include "stdarg.h"
@@ -17,6 +21,8 @@
#include "string.h"
#include "compiler.h"
+static const char *strerror(int errnum);
+
#ifndef EOF
#define EOF (-1)
#endif
@@ -50,6 +56,32 @@ FILE *fdopen(int fd, const char *mode __attribute__((unused)))
return (FILE*)(intptr_t)~fd;
}
+static __attribute__((unused))
+FILE *fopen(const char *pathname, const char *mode)
+{
+ int flags, fd;
+
+ switch (*mode) {
+ case 'r':
+ flags = O_RDONLY;
+ break;
+ case 'w':
+ flags = O_WRONLY | O_CREAT | O_TRUNC;
+ break;
+ case 'a':
+ flags = O_WRONLY | O_CREAT | O_APPEND;
+ break;
+ default:
+ SET_ERRNO(EINVAL); return NULL;
+ }
+
+ if (mode[1] == '+')
+ flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR;
+
+ fd = open(pathname, flags, 0666);
+ return fdopen(fd, mode);
+}
+
/* provides the fd of stream. */
static __attribute__((unused))
int fileno(FILE *stream)
@@ -208,28 +240,40 @@ char *fgets(char *s, int size, FILE *stream)
}
-/* minimal vfprintf(). It supports the following formats:
+/* minimal printf(). It supports the following formats:
* - %[l*]{d,u,c,x,p}
* - %s
* - unknown modifiers are ignored.
*/
-static __attribute__((unused, format(printf, 2, 0)))
-int vfprintf(FILE *stream, const char *fmt, va_list args)
+typedef int (*__nolibc_printf_cb)(intptr_t state, const char *buf, size_t size);
+
+static __attribute__((unused, format(printf, 4, 0)))
+int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char *fmt, va_list args)
{
char escape, lpref, c;
unsigned long long v;
- unsigned int written;
- size_t len, ofs;
+ unsigned int written, width;
+ size_t len, ofs, w;
char tmpbuf[21];
const char *outstr;
written = ofs = escape = lpref = 0;
while (1) {
c = fmt[ofs++];
+ width = 0;
if (escape) {
/* we're in an escape sequence, ofs == 1 */
escape = 0;
+
+ /* width */
+ while (c >= '0' && c <= '9') {
+ width *= 10;
+ width += c - '0';
+
+ c = fmt[ofs++];
+ }
+
if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') {
char *out = tmpbuf;
@@ -277,6 +321,11 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
if (!outstr)
outstr="(null)";
}
+#ifndef NOLIBC_IGNORE_ERRNO
+ else if (c == 'm') {
+ outstr = strerror(errno);
+ }
+#endif /* NOLIBC_IGNORE_ERRNO */
else if (c == '%') {
/* queue it verbatim */
continue;
@@ -286,6 +335,8 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
if (c == 'l') {
/* long format prefix, maintain the escape */
lpref++;
+ } else if (c == 'j') {
+ lpref = 2;
}
escape = 1;
goto do_escape;
@@ -302,8 +353,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
outstr = fmt;
len = ofs - 1;
flush_str:
- if (_fwrite(outstr, len, stream) != 0)
- break;
+ if (n) {
+ w = len < n ? len : n;
+ n -= w;
+ while (width-- > w) {
+ if (cb(state, " ", 1) != 0)
+ break;
+ written += 1;
+ }
+ if (cb(state, outstr, w) != 0)
+ break;
+ }
written += len;
do_escape:
@@ -319,6 +379,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
return written;
}
+static int __nolibc_fprintf_cb(intptr_t state, const char *buf, size_t size)
+{
+ return _fwrite(buf, size, (FILE *)state);
+}
+
+static __attribute__((unused, format(printf, 2, 0)))
+int vfprintf(FILE *stream, const char *fmt, va_list args)
+{
+ return __nolibc_printf(__nolibc_fprintf_cb, (intptr_t)stream, SIZE_MAX, fmt, args);
+}
+
static __attribute__((unused, format(printf, 1, 0)))
int vprintf(const char *fmt, va_list args)
{
@@ -349,6 +420,85 @@ int printf(const char *fmt, ...)
return ret;
}
+static __attribute__((unused, format(printf, 2, 0)))
+int vdprintf(int fd, const char *fmt, va_list args)
+{
+ FILE *stream;
+
+ stream = fdopen(fd, NULL);
+ if (!stream)
+ return -1;
+ /* Technically 'stream' is leaked, but as it's only a wrapper around 'fd' that is fine */
+ return vfprintf(stream, fmt, args);
+}
+
+static __attribute__((unused, format(printf, 2, 3)))
+int dprintf(int fd, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vdprintf(fd, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
+static int __nolibc_sprintf_cb(intptr_t _state, const char *buf, size_t size)
+{
+ char **state = (char **)_state;
+
+ memcpy(*state, buf, size);
+ *state += size;
+ return 0;
+}
+
+static __attribute__((unused, format(printf, 3, 0)))
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ char *state = buf;
+ int ret;
+
+ ret = __nolibc_printf(__nolibc_sprintf_cb, (intptr_t)&state, size, fmt, args);
+ if (ret < 0)
+ return ret;
+ buf[(size_t)ret < size ? (size_t)ret : size - 1] = '\0';
+ return ret;
+}
+
+static __attribute__((unused, format(printf, 3, 4)))
+int snprintf(char *buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
+static __attribute__((unused, format(printf, 2, 0)))
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+ return vsnprintf(buf, SIZE_MAX, fmt, args);
+}
+
+static __attribute__((unused, format(printf, 2, 3)))
+int sprintf(char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vsprintf(buf, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
static __attribute__((unused))
int vsscanf(const char *str, const char *format, va_list args)
{
@@ -485,7 +635,4 @@ const char *strerror(int errno)
return buf;
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_STDIO_H */
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 86ad378ab1ea..5fd99a480f82 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_STDLIB_H
#define _NOLIBC_STDLIB_H
@@ -29,6 +32,24 @@ static __attribute__((unused)) char itoa_buffer[21];
* As much as possible, please keep functions alphabetically sorted.
*/
+static __inline__
+int abs(int j)
+{
+ return j >= 0 ? j : -j;
+}
+
+static __inline__
+long labs(long j)
+{
+ return j >= 0 ? j : -j;
+}
+
+static __inline__
+long long llabs(long long j)
+{
+ return j >= 0 ? j : -j;
+}
+
/* must be exported, as it's used by libgcc for various divide functions */
void abort(void);
__attribute__((weak,unused,noreturn,section(".text.nolibc_abort")))
@@ -103,32 +124,6 @@ char *getenv(const char *name)
}
static __attribute__((unused))
-unsigned long getauxval(unsigned long type)
-{
- const unsigned long *auxv = _auxv;
- unsigned long ret;
-
- if (!auxv)
- return 0;
-
- while (1) {
- if (!auxv[0] && !auxv[1]) {
- ret = 0;
- break;
- }
-
- if (auxv[0] == type) {
- ret = auxv[1];
- break;
- }
-
- auxv += 2;
- }
-
- return ret;
-}
-
-static __attribute__((unused))
void *malloc(size_t len)
{
struct nolibc_heap *heap;
@@ -275,7 +270,7 @@ int itoa_r(long in, char *buffer)
int len = 0;
if (in < 0) {
- in = -in;
+ in = -(unsigned long)in;
*(ptr++) = '-';
len++;
}
@@ -411,7 +406,7 @@ int i64toa_r(int64_t in, char *buffer)
int len = 0;
if (in < 0) {
- in = -in;
+ in = -(uint64_t)in;
*(ptr++) = '-';
len++;
}
@@ -548,7 +543,4 @@ uintmax_t strtoumax(const char *nptr, char **endptr, int base)
return __strtox(nptr, endptr, base, 0, UINTMAX_MAX);
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_STDLIB_H */
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index ba84ab700e30..163a17e7dd38 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_STRING_H
#define _NOLIBC_STRING_H
@@ -289,7 +292,40 @@ char *strrchr(const char *s, int c)
return (char *)ret;
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
+static __attribute__((unused))
+char *strstr(const char *haystack, const char *needle)
+{
+ size_t len_haystack, len_needle;
+
+ len_needle = strlen(needle);
+ if (!len_needle)
+ return NULL;
+
+ len_haystack = strlen(haystack);
+ while (len_haystack >= len_needle) {
+ if (!memcmp(haystack, needle, len_needle))
+ return (char *)haystack;
+ haystack++;
+ len_haystack--;
+ }
+
+ return NULL;
+}
+
+static __attribute__((unused))
+int tolower(int c)
+{
+ if (c >= 'A' && c <= 'Z')
+ return c - 'A' + 'a';
+ return c;
+}
+
+static __attribute__((unused))
+int toupper(int c)
+{
+ if (c >= 'a' && c <= 'z')
+ return c - 'a' + 'A';
+ return c;
+}
#endif /* _NOLIBC_STRING_H */
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 08c1c074bec8..9556c69a6ae1 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_SYS_H
#define _NOLIBC_SYS_H
@@ -20,11 +23,7 @@
#include <linux/auxvec.h>
#include <linux/fcntl.h> /* for O_* and AT_* */
#include <linux/stat.h> /* for statx() */
-#include <linux/prctl.h>
-#include <linux/resource.h>
-#include <linux/utsname.h>
-#include "arch.h"
#include "errno.h"
#include "stdarg.h"
#include "types.h"
@@ -301,11 +300,17 @@ void sys_exit(int status)
}
static __attribute__((noreturn,unused))
-void exit(int status)
+void _exit(int status)
{
sys_exit(status);
}
+static __attribute__((noreturn,unused))
+void exit(int status)
+{
+ _exit(status);
+}
+
/*
* pid_t fork(void);
@@ -489,27 +494,6 @@ int getpagesize(void)
/*
- * int gettimeofday(struct timeval *tv, struct timezone *tz);
- */
-
-static __attribute__((unused))
-int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-#ifdef __NR_gettimeofday
- return my_syscall2(__NR_gettimeofday, tv, tz);
-#else
- return __nolibc_enosys(__func__, tv, tz);
-#endif
-}
-
-static __attribute__((unused))
-int gettimeofday(struct timeval *tv, struct timezone *tz)
-{
- return __sysret(sys_gettimeofday(tv, tz));
-}
-
-
-/*
* uid_t getuid(void);
*/
@@ -531,18 +515,6 @@ uid_t getuid(void)
/*
- * int ioctl(int fd, unsigned long cmd, ... arg);
- */
-
-static __attribute__((unused))
-long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- return my_syscall3(__NR_ioctl, fd, cmd, arg);
-}
-
-#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg)))
-
-/*
* int kill(pid_t pid, int signal);
*/
@@ -697,125 +669,6 @@ int mknod(const char *path, mode_t mode, dev_t dev)
return __sysret(sys_mknod(path, mode, dev));
}
-#ifndef sys_mmap
-static __attribute__((unused))
-void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
- off_t offset)
-{
- int n;
-
-#if defined(__NR_mmap2)
- n = __NR_mmap2;
- offset >>= 12;
-#else
- n = __NR_mmap;
-#endif
-
- return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset);
-}
-#endif
-
-/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret()
- * which returns -1 upon error and still satisfy user land that checks for
- * MAP_FAILED.
- */
-
-static __attribute__((unused))
-void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
-{
- void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
-
- if ((unsigned long)ret >= -4095UL) {
- SET_ERRNO(-(long)ret);
- ret = MAP_FAILED;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int sys_munmap(void *addr, size_t length)
-{
- return my_syscall2(__NR_munmap, addr, length);
-}
-
-static __attribute__((unused))
-int munmap(void *addr, size_t length)
-{
- return __sysret(sys_munmap(addr, length));
-}
-
-/*
- * int mount(const char *source, const char *target,
- * const char *fstype, unsigned long flags,
- * const void *data);
- */
-static __attribute__((unused))
-int sys_mount(const char *src, const char *tgt, const char *fst,
- unsigned long flags, const void *data)
-{
- return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
-}
-
-static __attribute__((unused))
-int mount(const char *src, const char *tgt,
- const char *fst, unsigned long flags,
- const void *data)
-{
- return __sysret(sys_mount(src, tgt, fst, flags, data));
-}
-
-/*
- * int openat(int dirfd, const char *path, int flags[, mode_t mode]);
- */
-
-static __attribute__((unused))
-int sys_openat(int dirfd, const char *path, int flags, mode_t mode)
-{
- return my_syscall4(__NR_openat, dirfd, path, flags, mode);
-}
-
-static __attribute__((unused))
-int openat(int dirfd, const char *path, int flags, ...)
-{
- mode_t mode = 0;
-
- if (flags & O_CREAT) {
- va_list args;
-
- va_start(args, flags);
- mode = va_arg(args, mode_t);
- va_end(args);
- }
-
- return __sysret(sys_openat(dirfd, path, flags, mode));
-}
-
-/*
- * int open(const char *path, int flags[, mode_t mode]);
- */
-
-static __attribute__((unused))
-int sys_open(const char *path, int flags, mode_t mode)
-{
- return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
-}
-
-static __attribute__((unused))
-int open(const char *path, int flags, ...)
-{
- mode_t mode = 0;
-
- if (flags & O_CREAT) {
- va_list args;
-
- va_start(args, flags);
- mode = va_arg(args, mode_t);
- va_end(args);
- }
-
- return __sysret(sys_open(path, flags, mode));
-}
-
/*
* int pipe2(int pipefd[2], int flags);
@@ -842,26 +695,6 @@ int pipe(int pipefd[2])
/*
- * int prctl(int option, unsigned long arg2, unsigned long arg3,
- * unsigned long arg4, unsigned long arg5);
- */
-
-static __attribute__((unused))
-int sys_prctl(int option, unsigned long arg2, unsigned long arg3,
- unsigned long arg4, unsigned long arg5)
-{
- return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5);
-}
-
-static __attribute__((unused))
-int prctl(int option, unsigned long arg2, unsigned long arg3,
- unsigned long arg4, unsigned long arg5)
-{
- return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5));
-}
-
-
-/*
* int pivot_root(const char *new, const char *old);
*/
@@ -879,35 +712,6 @@ int pivot_root(const char *new, const char *old)
/*
- * int poll(struct pollfd *fds, int nfds, int timeout);
- */
-
-static __attribute__((unused))
-int sys_poll(struct pollfd *fds, int nfds, int timeout)
-{
-#if defined(__NR_ppoll)
- struct timespec t;
-
- if (timeout >= 0) {
- t.tv_sec = timeout / 1000;
- t.tv_nsec = (timeout % 1000) * 1000000;
- }
- return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0);
-#elif defined(__NR_poll)
- return my_syscall3(__NR_poll, fds, nfds, timeout);
-#else
- return __nolibc_enosys(__func__, fds, nfds, timeout);
-#endif
-}
-
-static __attribute__((unused))
-int poll(struct pollfd *fds, int nfds, int timeout)
-{
- return __sysret(sys_poll(fds, nfds, timeout));
-}
-
-
-/*
* ssize_t read(int fd, void *buf, size_t count);
*/
@@ -925,61 +729,6 @@ ssize_t read(int fd, void *buf, size_t count)
/*
- * int reboot(int cmd);
- * <cmd> is among LINUX_REBOOT_CMD_*
- */
-
-static __attribute__((unused))
-ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
-{
- return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
-}
-
-static __attribute__((unused))
-int reboot(int cmd)
-{
- return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0));
-}
-
-
-/*
- * int getrlimit(int resource, struct rlimit *rlim);
- * int setrlimit(int resource, const struct rlimit *rlim);
- */
-
-static __attribute__((unused))
-int sys_prlimit64(pid_t pid, int resource,
- const struct rlimit64 *new_limit, struct rlimit64 *old_limit)
-{
- return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit);
-}
-
-static __attribute__((unused))
-int getrlimit(int resource, struct rlimit *rlim)
-{
- struct rlimit64 rlim64;
- int ret;
-
- ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64));
- rlim->rlim_cur = rlim64.rlim_cur;
- rlim->rlim_max = rlim64.rlim_max;
-
- return ret;
-}
-
-static __attribute__((unused))
-int setrlimit(int resource, const struct rlimit *rlim)
-{
- struct rlimit64 rlim64 = {
- .rlim_cur = rlim->rlim_cur,
- .rlim_max = rlim->rlim_max,
- };
-
- return __sysret(sys_prlimit64(0, resource, &rlim64, NULL));
-}
-
-
-/*
* int sched_yield(void);
*/
@@ -1023,6 +772,14 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva
t.tv_nsec = timeout->tv_usec * 1000;
}
return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#elif defined(__NR_pselect6_time64)
+ struct __kernel_timespec t;
+
+ if (timeout) {
+ t.tv_sec = timeout->tv_sec;
+ t.tv_nsec = timeout->tv_usec * 1000;
+ }
+ return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
#else
return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout);
#endif
@@ -1051,77 +808,31 @@ int setpgid(pid_t pid, pid_t pgid)
return __sysret(sys_setpgid(pid, pgid));
}
-
/*
- * pid_t setsid(void);
+ * pid_t setpgrp(void)
*/
static __attribute__((unused))
-pid_t sys_setsid(void)
+pid_t setpgrp(void)
{
- return my_syscall0(__NR_setsid);
+ return setpgid(0, 0);
}
-static __attribute__((unused))
-pid_t setsid(void)
-{
- return __sysret(sys_setsid());
-}
/*
- * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf);
- * int stat(const char *path, struct stat *buf);
+ * pid_t setsid(void);
*/
static __attribute__((unused))
-int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
-{
-#ifdef __NR_statx
- return my_syscall5(__NR_statx, fd, path, flags, mask, buf);
-#else
- return __nolibc_enosys(__func__, fd, path, flags, mask, buf);
-#endif
-}
-
-static __attribute__((unused))
-int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
+pid_t sys_setsid(void)
{
- return __sysret(sys_statx(fd, path, flags, mask, buf));
+ return my_syscall0(__NR_setsid);
}
-
static __attribute__((unused))
-int stat(const char *path, struct stat *buf)
+pid_t setsid(void)
{
- struct statx statx;
- long ret;
-
- ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx));
- if (ret == -1)
- return ret;
-
- buf->st_dev = ((statx.stx_dev_minor & 0xff)
- | (statx.stx_dev_major << 8)
- | ((statx.stx_dev_minor & ~0xff) << 12));
- buf->st_ino = statx.stx_ino;
- buf->st_mode = statx.stx_mode;
- buf->st_nlink = statx.stx_nlink;
- buf->st_uid = statx.stx_uid;
- buf->st_gid = statx.stx_gid;
- buf->st_rdev = ((statx.stx_rdev_minor & 0xff)
- | (statx.stx_rdev_major << 8)
- | ((statx.stx_rdev_minor & ~0xff) << 12));
- buf->st_size = statx.stx_size;
- buf->st_blksize = statx.stx_blksize;
- buf->st_blocks = statx.stx_blocks;
- buf->st_atim.tv_sec = statx.stx_atime.tv_sec;
- buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec;
- buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec;
- buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec;
- buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec;
- buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec;
-
- return 0;
+ return __sysret(sys_setsid());
}
@@ -1183,32 +894,6 @@ int umount2(const char *path, int flags)
/*
- * int uname(struct utsname *buf);
- */
-
-struct utsname {
- char sysname[65];
- char nodename[65];
- char release[65];
- char version[65];
- char machine[65];
- char domainname[65];
-};
-
-static __attribute__((unused))
-int sys_uname(struct utsname *buf)
-{
- return my_syscall1(__NR_uname, buf);
-}
-
-static __attribute__((unused))
-int uname(struct utsname *buf)
-{
- return __sysret(sys_uname(buf));
-}
-
-
-/*
* int unlink(const char *path);
*/
@@ -1232,59 +917,6 @@ int unlink(const char *path)
/*
- * pid_t wait(int *status);
- * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage);
- * pid_t waitpid(pid_t pid, int *status, int options);
- */
-
-static __attribute__((unused))
-pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
-#ifdef __NR_wait4
- return my_syscall4(__NR_wait4, pid, status, options, rusage);
-#else
- return __nolibc_enosys(__func__, pid, status, options, rusage);
-#endif
-}
-
-static __attribute__((unused))
-pid_t wait(int *status)
-{
- return __sysret(sys_wait4(-1, status, 0, NULL));
-}
-
-static __attribute__((unused))
-pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
- return __sysret(sys_wait4(pid, status, options, rusage));
-}
-
-
-static __attribute__((unused))
-pid_t waitpid(pid_t pid, int *status, int options)
-{
- return __sysret(sys_wait4(pid, status, options, NULL));
-}
-
-
-/*
- * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options);
- */
-
-static __attribute__((unused))
-int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage)
-{
- return my_syscall5(__NR_waitid, which, pid, infop, options, rusage);
-}
-
-static __attribute__((unused))
-int waitid(int which, pid_t pid, siginfo_t *infop, int options)
-{
- return __sysret(sys_waitid(which, pid, infop, options, NULL));
-}
-
-
-/*
* ssize_t write(int fd, const void *buf, size_t count);
*/
@@ -1317,7 +949,4 @@ int memfd_create(const char *name, unsigned int flags)
return __sysret(sys_memfd_create(name, flags));
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_SYS_H */
diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h
new file mode 100644
index 000000000000..c52463d6c18d
--- /dev/null
+++ b/tools/include/nolibc/sys/auxv.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * auxv definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_AUXV_H
+#define _NOLIBC_SYS_AUXV_H
+
+#include "../crt.h"
+
+static __attribute__((unused))
+unsigned long getauxval(unsigned long type)
+{
+ const unsigned long *auxv = _auxv;
+ unsigned long ret;
+
+ if (!auxv)
+ return 0;
+
+ while (1) {
+ if (!auxv[0] && !auxv[1]) {
+ ret = 0;
+ break;
+ }
+
+ if (auxv[0] == type) {
+ ret = auxv[1];
+ break;
+ }
+
+ auxv += 2;
+ }
+
+ return ret;
+}
+
+#endif /* _NOLIBC_SYS_AUXV_H */
diff --git a/tools/include/nolibc/sys/ioctl.h b/tools/include/nolibc/sys/ioctl.h
new file mode 100644
index 000000000000..fc880687e02a
--- /dev/null
+++ b/tools/include/nolibc/sys/ioctl.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Ioctl definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_IOCTL_H
+#define _NOLIBC_SYS_IOCTL_H
+
+#include "../sys.h"
+
+#include <linux/ioctl.h>
+
+/*
+ * int ioctl(int fd, unsigned long cmd, ... arg);
+ */
+
+static __attribute__((unused))
+long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+ return my_syscall3(__NR_ioctl, fd, cmd, arg);
+}
+
+#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg)))
+
+#endif /* _NOLIBC_SYS_IOCTL_H */
diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h
new file mode 100644
index 000000000000..5228751b458c
--- /dev/null
+++ b/tools/include/nolibc/sys/mman.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * mm definition for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_MMAN_H
+#define _NOLIBC_SYS_MMAN_H
+
+#include "../arch.h"
+#include "../sys.h"
+
+#ifndef sys_mmap
+static __attribute__((unused))
+void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
+ off_t offset)
+{
+ int n;
+
+#if defined(__NR_mmap2)
+ n = __NR_mmap2;
+ offset >>= 12;
+#else
+ n = __NR_mmap;
+#endif
+
+ return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset);
+}
+#endif
+
+/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret()
+ * which returns -1 upon error and still satisfy user land that checks for
+ * MAP_FAILED.
+ */
+
+static __attribute__((unused))
+void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+ void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
+
+ if ((unsigned long)ret >= -4095UL) {
+ SET_ERRNO(-(long)ret);
+ ret = MAP_FAILED;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+void *sys_mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address)
+{
+ return (void *)my_syscall5(__NR_mremap, old_address, old_size,
+ new_size, flags, new_address);
+}
+
+static __attribute__((unused))
+void *mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address)
+{
+ void *ret = sys_mremap(old_address, old_size, new_size, flags, new_address);
+
+ if ((unsigned long)ret >= -4095UL) {
+ SET_ERRNO(-(long)ret);
+ ret = MAP_FAILED;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int sys_munmap(void *addr, size_t length)
+{
+ return my_syscall2(__NR_munmap, addr, length);
+}
+
+static __attribute__((unused))
+int munmap(void *addr, size_t length)
+{
+ return __sysret(sys_munmap(addr, length));
+}
+
+#endif /* _NOLIBC_SYS_MMAN_H */
diff --git a/tools/include/nolibc/sys/mount.h b/tools/include/nolibc/sys/mount.h
new file mode 100644
index 000000000000..e39ec02ea24c
--- /dev/null
+++ b/tools/include/nolibc/sys/mount.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Mount definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_MOUNT_H
+#define _NOLIBC_SYS_MOUNT_H
+
+#include "../sys.h"
+
+#include <linux/mount.h>
+
+/*
+ * int mount(const char *source, const char *target,
+ * const char *fstype, unsigned long flags,
+ * const void *data);
+ */
+static __attribute__((unused))
+int sys_mount(const char *src, const char *tgt, const char *fst,
+ unsigned long flags, const void *data)
+{
+ return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
+}
+
+static __attribute__((unused))
+int mount(const char *src, const char *tgt,
+ const char *fst, unsigned long flags,
+ const void *data)
+{
+ return __sysret(sys_mount(src, tgt, fst, flags, data));
+}
+
+#endif /* _NOLIBC_SYS_MOUNT_H */
diff --git a/tools/include/nolibc/sys/prctl.h b/tools/include/nolibc/sys/prctl.h
new file mode 100644
index 000000000000..0205907b6ac8
--- /dev/null
+++ b/tools/include/nolibc/sys/prctl.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Prctl definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_PRCTL_H
+#define _NOLIBC_SYS_PRCTL_H
+
+#include "../sys.h"
+
+#include <linux/prctl.h>
+
+/*
+ * int prctl(int option, unsigned long arg2, unsigned long arg3,
+ * unsigned long arg4, unsigned long arg5);
+ */
+
+static __attribute__((unused))
+int sys_prctl(int option, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5);
+}
+
+static __attribute__((unused))
+int prctl(int option, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5));
+}
+
+#endif /* _NOLIBC_SYS_PRCTL_H */
diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h
new file mode 100644
index 000000000000..8d9749f1c845
--- /dev/null
+++ b/tools/include/nolibc/sys/random.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * random definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_RANDOM_H
+#define _NOLIBC_SYS_RANDOM_H
+
+#include "../arch.h"
+#include "../sys.h"
+
+#include <linux/random.h>
+
+/*
+ * ssize_t getrandom(void *buf, size_t buflen, unsigned int flags);
+ */
+
+static __attribute__((unused))
+ssize_t sys_getrandom(void *buf, size_t buflen, unsigned int flags)
+{
+ return my_syscall3(__NR_getrandom, buf, buflen, flags);
+}
+
+static __attribute__((unused))
+ssize_t getrandom(void *buf, size_t buflen, unsigned int flags)
+{
+ return __sysret(sys_getrandom(buf, buflen, flags));
+}
+
+#endif /* _NOLIBC_SYS_RANDOM_H */
diff --git a/tools/include/nolibc/sys/reboot.h b/tools/include/nolibc/sys/reboot.h
new file mode 100644
index 000000000000..4a1e435be669
--- /dev/null
+++ b/tools/include/nolibc/sys/reboot.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Reboot definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_REBOOT_H
+#define _NOLIBC_SYS_REBOOT_H
+
+#include "../sys.h"
+
+#include <linux/reboot.h>
+
+/*
+ * int reboot(int cmd);
+ * <cmd> is among LINUX_REBOOT_CMD_*
+ */
+
+static __attribute__((unused))
+ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
+{
+ return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
+}
+
+static __attribute__((unused))
+int reboot(int cmd)
+{
+ return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0));
+}
+
+#endif /* _NOLIBC_SYS_REBOOT_H */
diff --git a/tools/include/nolibc/sys/resource.h b/tools/include/nolibc/sys/resource.h
new file mode 100644
index 000000000000..b990f914dc56
--- /dev/null
+++ b/tools/include/nolibc/sys/resource.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Resource definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_RESOURCE_H
+#define _NOLIBC_SYS_RESOURCE_H
+
+#include "../sys.h"
+
+#include <linux/resource.h>
+
+/*
+ * int getrlimit(int resource, struct rlimit *rlim);
+ * int setrlimit(int resource, const struct rlimit *rlim);
+ */
+
+static __attribute__((unused))
+int sys_prlimit64(pid_t pid, int resource,
+ const struct rlimit64 *new_limit, struct rlimit64 *old_limit)
+{
+ return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit);
+}
+
+static __attribute__((unused))
+int getrlimit(int resource, struct rlimit *rlim)
+{
+ struct rlimit64 rlim64;
+ int ret;
+
+ ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64));
+ rlim->rlim_cur = rlim64.rlim_cur;
+ rlim->rlim_max = rlim64.rlim_max;
+
+ return ret;
+}
+
+static __attribute__((unused))
+int setrlimit(int resource, const struct rlimit *rlim)
+{
+ struct rlimit64 rlim64 = {
+ .rlim_cur = rlim->rlim_cur,
+ .rlim_max = rlim->rlim_max,
+ };
+
+ return __sysret(sys_prlimit64(0, resource, &rlim64, NULL));
+}
+
+#endif /* _NOLIBC_SYS_RESOURCE_H */
diff --git a/tools/include/nolibc/sys/stat.h b/tools/include/nolibc/sys/stat.h
new file mode 100644
index 000000000000..8b4d80e3ea03
--- /dev/null
+++ b/tools/include/nolibc/sys/stat.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * stat definition for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_STAT_H
+#define _NOLIBC_SYS_STAT_H
+
+#include "../arch.h"
+#include "../types.h"
+#include "../sys.h"
+
+/*
+ * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf);
+ * int stat(const char *path, struct stat *buf);
+ * int fstatat(int fd, const char *path, struct stat *buf, int flag);
+ * int fstat(int fildes, struct stat *buf);
+ * int lstat(const char *path, struct stat *buf);
+ */
+
+static __attribute__((unused))
+int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
+{
+#ifdef __NR_statx
+ return my_syscall5(__NR_statx, fd, path, flags, mask, buf);
+#else
+ return __nolibc_enosys(__func__, fd, path, flags, mask, buf);
+#endif
+}
+
+static __attribute__((unused))
+int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
+{
+ return __sysret(sys_statx(fd, path, flags, mask, buf));
+}
+
+
+static __attribute__((unused))
+int fstatat(int fd, const char *path, struct stat *buf, int flag)
+{
+ struct statx statx;
+ long ret;
+
+ ret = __sysret(sys_statx(fd, path, flag | AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx));
+ if (ret == -1)
+ return ret;
+
+ buf->st_dev = ((statx.stx_dev_minor & 0xff)
+ | (statx.stx_dev_major << 8)
+ | ((statx.stx_dev_minor & ~0xff) << 12));
+ buf->st_ino = statx.stx_ino;
+ buf->st_mode = statx.stx_mode;
+ buf->st_nlink = statx.stx_nlink;
+ buf->st_uid = statx.stx_uid;
+ buf->st_gid = statx.stx_gid;
+ buf->st_rdev = ((statx.stx_rdev_minor & 0xff)
+ | (statx.stx_rdev_major << 8)
+ | ((statx.stx_rdev_minor & ~0xff) << 12));
+ buf->st_size = statx.stx_size;
+ buf->st_blksize = statx.stx_blksize;
+ buf->st_blocks = statx.stx_blocks;
+ buf->st_atim.tv_sec = statx.stx_atime.tv_sec;
+ buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec;
+ buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec;
+ buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec;
+ buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec;
+ buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec;
+
+ return 0;
+}
+
+static __attribute__((unused))
+int stat(const char *path, struct stat *buf)
+{
+ return fstatat(AT_FDCWD, path, buf, 0);
+}
+
+static __attribute__((unused))
+int fstat(int fildes, struct stat *buf)
+{
+ return fstatat(fildes, "", buf, AT_EMPTY_PATH);
+}
+
+static __attribute__((unused))
+int lstat(const char *path, struct stat *buf)
+{
+ return fstatat(AT_FDCWD, path, buf, AT_SYMLINK_NOFOLLOW);
+}
+
+#endif /* _NOLIBC_SYS_STAT_H */
diff --git a/tools/include/nolibc/sys/syscall.h b/tools/include/nolibc/sys/syscall.h
new file mode 100644
index 000000000000..4bf97f1386a0
--- /dev/null
+++ b/tools/include/nolibc/sys/syscall.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * syscall() definition for NOLIBC
+ * Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_SYSCALL_H
+#define _NOLIBC_SYS_SYSCALL_H
+
+#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N
+#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0)
+#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__))
+#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__)
+#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__)
+
+#endif /* _NOLIBC_SYS_SYSCALL_H */
diff --git a/tools/include/nolibc/sys/sysmacros.h b/tools/include/nolibc/sys/sysmacros.h
new file mode 100644
index 000000000000..37c33f030f02
--- /dev/null
+++ b/tools/include/nolibc/sys/sysmacros.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Sysmacro definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_SYSMACROS_H
+#define _NOLIBC_SYS_SYSMACROS_H
+
+#include "../std.h"
+
+/* WARNING, it only deals with the 4096 first majors and 256 first minors */
+#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff)))
+#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff))
+#define minor(dev) ((unsigned int)((dev) & 0xff))
+
+#endif /* _NOLIBC_SYS_SYSMACROS_H */
diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h
new file mode 100644
index 000000000000..33782a19aae9
--- /dev/null
+++ b/tools/include/nolibc/sys/time.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * time definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_TIME_H
+#define _NOLIBC_SYS_TIME_H
+
+#include "../arch.h"
+#include "../sys.h"
+
+static int sys_clock_gettime(clockid_t clockid, struct timespec *tp);
+
+/*
+ * int gettimeofday(struct timeval *tv, struct timezone *tz);
+ */
+
+static __attribute__((unused))
+int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+#ifdef __NR_gettimeofday
+ return my_syscall2(__NR_gettimeofday, tv, tz);
+#else
+ (void) tz; /* Non-NULL tz is undefined behaviour */
+
+ struct timespec tp;
+ int ret;
+
+ ret = sys_clock_gettime(CLOCK_REALTIME, &tp);
+ if (!ret && tv) {
+ tv->tv_sec = tp.tv_sec;
+ tv->tv_usec = tp.tv_nsec / 1000;
+ }
+
+ return ret;
+#endif
+}
+
+static __attribute__((unused))
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ return __sysret(sys_gettimeofday(tv, tz));
+}
+
+#endif /* _NOLIBC_SYS_TIME_H */
diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h
new file mode 100644
index 000000000000..4375d546ba58
--- /dev/null
+++ b/tools/include/nolibc/sys/timerfd.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * timerfd definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_TIMERFD_H
+#define _NOLIBC_SYS_TIMERFD_H
+
+#include "../sys.h"
+#include "../time.h"
+
+#include <linux/timerfd.h>
+
+
+static __attribute__((unused))
+int sys_timerfd_create(int clockid, int flags)
+{
+ return my_syscall2(__NR_timerfd_create, clockid, flags);
+}
+
+static __attribute__((unused))
+int timerfd_create(int clockid, int flags)
+{
+ return __sysret(sys_timerfd_create(clockid, flags));
+}
+
+
+static __attribute__((unused))
+int sys_timerfd_gettime(int fd, struct itimerspec *curr_value)
+{
+#if defined(__NR_timerfd_gettime)
+ return my_syscall2(__NR_timerfd_gettime, fd, curr_value);
+#elif defined(__NR_timerfd_gettime64)
+ struct __kernel_itimerspec kcurr_value;
+ int ret;
+
+ ret = my_syscall2(__NR_timerfd_gettime64, fd, &kcurr_value);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value);
+ return ret;
+#else
+ return __nolibc_enosys(__func__, fd, curr_value);
+#endif
+}
+
+static __attribute__((unused))
+int timerfd_gettime(int fd, struct itimerspec *curr_value)
+{
+ return __sysret(sys_timerfd_gettime(fd, curr_value));
+}
+
+
+static __attribute__((unused))
+int sys_timerfd_settime(int fd, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+#if defined(__NR_timerfd_settime)
+ return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value);
+#elif defined(__NR_timerfd_settime64)
+ struct __kernel_itimerspec knew_value, kold_value;
+ int ret;
+
+ __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value);
+ __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval);
+ ret = my_syscall4(__NR_timerfd_settime64, fd, flags, &knew_value, &kold_value);
+ if (old_value) {
+ __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value);
+ }
+ return ret;
+#else
+ return __nolibc_enosys(__func__, fd, flags, new_value, old_value);
+#endif
+}
+
+static __attribute__((unused))
+int timerfd_settime(int fd, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+ return __sysret(sys_timerfd_settime(fd, flags, new_value, old_value));
+}
+
+#endif /* _NOLIBC_SYS_TIMERFD_H */
diff --git a/tools/include/nolibc/sys/types.h b/tools/include/nolibc/sys/types.h
new file mode 100644
index 000000000000..8a264a13275c
--- /dev/null
+++ b/tools/include/nolibc/sys/types.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * sys/types.h shim for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+#include "../types.h"
diff --git a/tools/include/nolibc/sys/utsname.h b/tools/include/nolibc/sys/utsname.h
new file mode 100644
index 000000000000..01023e1bb439
--- /dev/null
+++ b/tools/include/nolibc/sys/utsname.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Utsname definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_UTSNAME_H
+#define _NOLIBC_SYS_UTSNAME_H
+
+#include "../sys.h"
+
+#include <linux/utsname.h>
+
+/*
+ * int uname(struct utsname *buf);
+ */
+
+struct utsname {
+ char sysname[65];
+ char nodename[65];
+ char release[65];
+ char version[65];
+ char machine[65];
+ char domainname[65];
+};
+
+static __attribute__((unused))
+int sys_uname(struct utsname *buf)
+{
+ return my_syscall1(__NR_uname, buf);
+}
+
+static __attribute__((unused))
+int uname(struct utsname *buf)
+{
+ return __sysret(sys_uname(buf));
+}
+
+#endif /* _NOLIBC_SYS_UTSNAME_H */
diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h
new file mode 100644
index 000000000000..4d44e3da0ba8
--- /dev/null
+++ b/tools/include/nolibc/sys/wait.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * wait definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_WAIT_H
+#define _NOLIBC_SYS_WAIT_H
+
+#include "../arch.h"
+#include "../std.h"
+#include "../types.h"
+
+/*
+ * pid_t wait(int *status);
+ * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage);
+ * pid_t waitpid(pid_t pid, int *status, int options);
+ * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options);
+ */
+
+static __attribute__((unused))
+pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+#ifdef __NR_wait4
+ return my_syscall4(__NR_wait4, pid, status, options, rusage);
+#else
+ return __nolibc_enosys(__func__, pid, status, options, rusage);
+#endif
+}
+
+static __attribute__((unused))
+pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+ return __sysret(sys_wait4(pid, status, options, rusage));
+}
+
+static __attribute__((unused))
+int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage)
+{
+ return my_syscall5(__NR_waitid, which, pid, infop, options, rusage);
+}
+
+static __attribute__((unused))
+int waitid(int which, pid_t pid, siginfo_t *infop, int options)
+{
+ return __sysret(sys_waitid(which, pid, infop, options, NULL));
+}
+
+
+static __attribute__((unused))
+pid_t waitpid(pid_t pid, int *status, int options)
+{
+ int idtype, ret;
+ siginfo_t info;
+ pid_t id;
+
+ if (pid == INT_MIN) {
+ SET_ERRNO(ESRCH);
+ return -1;
+ } else if (pid < -1) {
+ idtype = P_PGID;
+ id = -pid;
+ } else if (pid == -1) {
+ idtype = P_ALL;
+ id = 0;
+ } else if (pid == 0) {
+ idtype = P_PGID;
+ id = 0;
+ } else {
+ idtype = P_PID;
+ id = pid;
+ }
+
+ options |= WEXITED;
+
+ ret = waitid(idtype, id, &info, options);
+ if (ret)
+ return ret;
+
+ switch (info.si_code) {
+ case 0:
+ *status = 0;
+ break;
+ case CLD_EXITED:
+ *status = (info.si_status & 0xff) << 8;
+ break;
+ case CLD_KILLED:
+ *status = info.si_status & 0x7f;
+ break;
+ case CLD_DUMPED:
+ *status = (info.si_status & 0x7f) | 0x80;
+ break;
+ case CLD_STOPPED:
+ case CLD_TRAPPED:
+ *status = (info.si_status << 8) + 0x7f;
+ break;
+ case CLD_CONTINUED:
+ *status = 0xffff;
+ break;
+ default:
+ return -1;
+ }
+
+ return info.si_pid;
+}
+
+static __attribute__((unused))
+pid_t wait(int *status)
+{
+ return waitpid(-1, status, 0);
+}
+
+#endif /* _NOLIBC_SYS_WAIT_H */
diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h
index 84655361b9ad..fc387940d51f 100644
--- a/tools/include/nolibc/time.h
+++ b/tools/include/nolibc/time.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_TIME_H
#define _NOLIBC_TIME_H
@@ -12,6 +15,106 @@
#include "types.h"
#include "sys.h"
+#include <linux/signal.h>
+#include <linux/time.h>
+
+static __inline__
+void __nolibc_timespec_user_to_kernel(const struct timespec *ts, struct __kernel_timespec *kts)
+{
+ kts->tv_sec = ts->tv_sec;
+ kts->tv_nsec = ts->tv_nsec;
+}
+
+static __inline__
+void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struct timespec *ts)
+{
+ ts->tv_sec = kts->tv_sec;
+ ts->tv_nsec = kts->tv_nsec;
+}
+
+/*
+ * int clock_getres(clockid_t clockid, struct timespec *res);
+ * int clock_gettime(clockid_t clockid, struct timespec *tp);
+ * int clock_settime(clockid_t clockid, const struct timespec *tp);
+ */
+
+static __attribute__((unused))
+int sys_clock_getres(clockid_t clockid, struct timespec *res)
+{
+#if defined(__NR_clock_getres)
+ return my_syscall2(__NR_clock_getres, clockid, res);
+#elif defined(__NR_clock_getres_time64)
+ struct __kernel_timespec kres;
+ int ret;
+
+ ret = my_syscall2(__NR_clock_getres_time64, clockid, &kres);
+ if (res)
+ __nolibc_timespec_kernel_to_user(&kres, res);
+ return ret;
+#else
+ return __nolibc_enosys(__func__, clockid, res);
+#endif
+}
+
+static __attribute__((unused))
+int clock_getres(clockid_t clockid, struct timespec *res)
+{
+ return __sysret(sys_clock_getres(clockid, res));
+}
+
+static __attribute__((unused))
+int sys_clock_gettime(clockid_t clockid, struct timespec *tp)
+{
+#if defined(__NR_clock_gettime)
+ return my_syscall2(__NR_clock_gettime, clockid, tp);
+#elif defined(__NR_clock_gettime64)
+ struct __kernel_timespec ktp;
+ int ret;
+
+ ret = my_syscall2(__NR_clock_gettime64, clockid, &ktp);
+ if (tp)
+ __nolibc_timespec_kernel_to_user(&ktp, tp);
+ return ret;
+#else
+ return __nolibc_enosys(__func__, clockid, tp);
+#endif
+}
+
+static __attribute__((unused))
+int clock_gettime(clockid_t clockid, struct timespec *tp)
+{
+ return __sysret(sys_clock_gettime(clockid, tp));
+}
+
+static __attribute__((unused))
+int sys_clock_settime(clockid_t clockid, struct timespec *tp)
+{
+#if defined(__NR_clock_settime)
+ return my_syscall2(__NR_clock_settime, clockid, tp);
+#elif defined(__NR_clock_settime64)
+ struct __kernel_timespec ktp;
+
+ __nolibc_timespec_user_to_kernel(tp, &ktp);
+ return my_syscall2(__NR_clock_settime64, clockid, &ktp);
+#else
+ return __nolibc_enosys(__func__, clockid, tp);
+#endif
+}
+
+static __attribute__((unused))
+int clock_settime(clockid_t clockid, struct timespec *tp)
+{
+ return __sysret(sys_clock_settime(clockid, tp));
+}
+
+
+static __inline__
+double difftime(time_t time1, time_t time2)
+{
+ return time1 - time2;
+}
+
+
static __attribute__((unused))
time_t time(time_t *tptr)
{
@@ -25,7 +128,89 @@ time_t time(time_t *tptr)
return tv.tv_sec;
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
+
+/*
+ * int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid);
+ * int timer_gettime(timer_t timerid, struct itimerspec *curr_value);
+ * int timer_settime(timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value);
+ */
+
+static __attribute__((unused))
+int sys_timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid)
+{
+ return my_syscall3(__NR_timer_create, clockid, evp, timerid);
+}
+
+static __attribute__((unused))
+int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid)
+{
+ return __sysret(sys_timer_create(clockid, evp, timerid));
+}
+
+static __attribute__((unused))
+int sys_timer_delete(timer_t timerid)
+{
+ return my_syscall1(__NR_timer_delete, timerid);
+}
+
+static __attribute__((unused))
+int timer_delete(timer_t timerid)
+{
+ return __sysret(sys_timer_delete(timerid));
+}
+
+static __attribute__((unused))
+int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value)
+{
+#if defined(__NR_timer_gettime)
+ return my_syscall2(__NR_timer_gettime, timerid, curr_value);
+#elif defined(__NR_timer_gettime64)
+ struct __kernel_itimerspec kcurr_value;
+ int ret;
+
+ ret = my_syscall2(__NR_timer_gettime64, timerid, &kcurr_value);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value);
+ return ret;
+#else
+ return __nolibc_enosys(__func__, timerid, curr_value);
+#endif
+}
+
+static __attribute__((unused))
+int timer_gettime(timer_t timerid, struct itimerspec *curr_value)
+{
+ return __sysret(sys_timer_gettime(timerid, curr_value));
+}
+
+static __attribute__((unused))
+int sys_timer_settime(timer_t timerid, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+#if defined(__NR_timer_settime)
+ return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value);
+#elif defined(__NR_timer_settime64)
+ struct __kernel_itimerspec knew_value, kold_value;
+ int ret;
+
+ __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value);
+ __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval);
+ ret = my_syscall4(__NR_timer_settime64, timerid, flags, &knew_value, &kold_value);
+ if (old_value) {
+ __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value);
+ }
+ return ret;
+#else
+ return __nolibc_enosys(__func__, timerid, flags, new_value, old_value);
+#endif
+}
+
+static __attribute__((unused))
+int timer_settime(timer_t timerid, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+ return __sysret(sys_timer_settime(timerid, flags, new_value, old_value));
+}
#endif /* _NOLIBC_TIME_H */
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index b26a5d0c417c..30904be544ed 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -4,16 +4,17 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_TYPES_H
#define _NOLIBC_TYPES_H
#include "std.h"
#include <linux/mman.h>
-#include <linux/reboot.h> /* for LINUX_REBOOT_* */
#include <linux/stat.h>
#include <linux/time.h>
#include <linux/wait.h>
-#include <linux/resource.h>
/* Only the generic macros and types may be defined here. The arch-specific
@@ -156,20 +157,6 @@ typedef struct {
__set->fds[__idx] = 0; \
} while (0)
-/* for poll() */
-#define POLLIN 0x0001
-#define POLLPRI 0x0002
-#define POLLOUT 0x0004
-#define POLLERR 0x0008
-#define POLLHUP 0x0010
-#define POLLNVAL 0x0020
-
-struct pollfd {
- int fd;
- short int events;
- short int revents;
-};
-
/* for getdents64() */
struct linux_dirent64 {
uint64_t d_ino;
@@ -198,14 +185,8 @@ struct stat {
union { time_t st_ctime; struct timespec st_ctim; }; /* time of last status change */
};
-/* WARNING, it only deals with the 4096 first majors and 256 first minors */
-#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff)))
-#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff))
-#define minor(dev) ((unsigned int)(((dev) & 0xff))
-
-#ifndef offsetof
-#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
-#endif
+typedef __kernel_clockid_t clockid_t;
+typedef int timer_t;
#ifndef container_of
#define container_of(PTR, TYPE, FIELD) ({ \
@@ -214,7 +195,4 @@ struct stat {
})
#endif
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_TYPES_H */
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
index e38f3660c051..25bfc7732ec7 100644
--- a/tools/include/nolibc/unistd.h
+++ b/tools/include/nolibc/unistd.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_UNISTD_H
#define _NOLIBC_UNISTD_H
@@ -17,6 +20,34 @@
#define STDOUT_FILENO 1
#define STDERR_FILENO 2
+#define F_OK 0
+#define X_OK 1
+#define W_OK 2
+#define R_OK 4
+
+/*
+ * int access(const char *path, int amode);
+ * int faccessat(int fd, const char *path, int amode, int flag);
+ */
+
+static __attribute__((unused))
+int sys_faccessat(int fd, const char *path, int amode, int flag)
+{
+ return my_syscall4(__NR_faccessat, fd, path, amode, flag);
+}
+
+static __attribute__((unused))
+int faccessat(int fd, const char *path, int amode, int flag)
+{
+ return __sysret(sys_faccessat(fd, path, amode, flag));
+}
+
+static __attribute__((unused))
+int access(const char *path, int amode)
+{
+ return faccessat(AT_FDCWD, path, amode, 0);
+}
+
static __attribute__((unused))
int msleep(unsigned int msecs)
@@ -56,13 +87,4 @@ int tcsetpgrp(int fd, pid_t pid)
return ioctl(fd, TIOCSPGRP, &pid);
}
-#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N
-#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0)
-#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__))
-#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__)
-#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__)
-
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_UNISTD_H */
diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h
new file mode 100644
index 000000000000..e710967c7c26
--- /dev/null
+++ b/tools/include/uapi/linux/fanotify.h
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FANOTIFY_H
+#define _UAPI_LINUX_FANOTIFY_H
+
+#include <linux/types.h>
+
+/* the following events that user-space can register for */
+#define FAN_ACCESS 0x00000001 /* File was accessed */
+#define FAN_MODIFY 0x00000002 /* File was modified */
+#define FAN_ATTRIB 0x00000004 /* Metadata changed */
+#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */
+#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */
+#define FAN_OPEN 0x00000020 /* File was opened */
+#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */
+#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */
+#define FAN_CREATE 0x00000100 /* Subfile was created */
+#define FAN_DELETE 0x00000200 /* Subfile was deleted */
+#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */
+#define FAN_MOVE_SELF 0x00000800 /* Self was moved */
+#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */
+
+#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */
+#define FAN_FS_ERROR 0x00008000 /* Filesystem error */
+
+#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */
+#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */
+#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */
+/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */
+
+#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */
+#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
+#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
+
+#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
+
+#define FAN_RENAME 0x10000000 /* File was renamed */
+
+#define FAN_ONDIR 0x40000000 /* Event occurred against dir */
+
+/* helper events */
+#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */
+#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */
+
+/* flags used for fanotify_init() */
+#define FAN_CLOEXEC 0x00000001
+#define FAN_NONBLOCK 0x00000002
+
+/* These are NOT bitwise flags. Both bits are used together. */
+#define FAN_CLASS_NOTIF 0x00000000
+#define FAN_CLASS_CONTENT 0x00000004
+#define FAN_CLASS_PRE_CONTENT 0x00000008
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
+ FAN_CLASS_PRE_CONTENT)
+
+#define FAN_UNLIMITED_QUEUE 0x00000010
+#define FAN_UNLIMITED_MARKS 0x00000020
+#define FAN_ENABLE_AUDIT 0x00000040
+
+/* Flags to determine fanotify event format */
+#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */
+#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */
+#define FAN_REPORT_FID 0x00000200 /* Report unique file id */
+#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */
+#define FAN_REPORT_NAME 0x00000800 /* Report events with name */
+#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */
+#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */
+#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
+
+/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
+#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
+/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */
+#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \
+ FAN_REPORT_FID | FAN_REPORT_TARGET_FID)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \
+ FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\
+ FAN_UNLIMITED_MARKS)
+
+/* flags used for fanotify_modify_mark() */
+#define FAN_MARK_ADD 0x00000001
+#define FAN_MARK_REMOVE 0x00000002
+#define FAN_MARK_DONT_FOLLOW 0x00000004
+#define FAN_MARK_ONLYDIR 0x00000008
+/* FAN_MARK_MOUNT is 0x00000010 */
+#define FAN_MARK_IGNORED_MASK 0x00000020
+#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040
+#define FAN_MARK_FLUSH 0x00000080
+/* FAN_MARK_FILESYSTEM is 0x00000100 */
+#define FAN_MARK_EVICTABLE 0x00000200
+/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */
+#define FAN_MARK_IGNORE 0x00000400
+
+/* These are NOT bitwise flags. Both bits can be used togther. */
+#define FAN_MARK_INODE 0x00000000
+#define FAN_MARK_MOUNT 0x00000010
+#define FAN_MARK_FILESYSTEM 0x00000100
+#define FAN_MARK_MNTNS 0x00000110
+
+/*
+ * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
+ * for non-inode mark types.
+ */
+#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\
+ FAN_MARK_REMOVE |\
+ FAN_MARK_DONT_FOLLOW |\
+ FAN_MARK_ONLYDIR |\
+ FAN_MARK_MOUNT |\
+ FAN_MARK_IGNORED_MASK |\
+ FAN_MARK_IGNORED_SURV_MODIFY |\
+ FAN_MARK_FLUSH)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_EVENTS (FAN_ACCESS |\
+ FAN_MODIFY |\
+ FAN_CLOSE |\
+ FAN_OPEN)
+
+/*
+ * All events which require a permission response from userspace
+ */
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\
+ FAN_ACCESS_PERM)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\
+ FAN_ALL_PERM_EVENTS |\
+ FAN_Q_OVERFLOW)
+
+#define FANOTIFY_METADATA_VERSION 3
+
+struct fanotify_event_metadata {
+ __u32 event_len;
+ __u8 vers;
+ __u8 reserved;
+ __u16 metadata_len;
+ __aligned_u64 mask;
+ __s32 fd;
+ __s32 pid;
+};
+
+#define FAN_EVENT_INFO_TYPE_FID 1
+#define FAN_EVENT_INFO_TYPE_DFID_NAME 2
+#define FAN_EVENT_INFO_TYPE_DFID 3
+#define FAN_EVENT_INFO_TYPE_PIDFD 4
+#define FAN_EVENT_INFO_TYPE_ERROR 5
+#define FAN_EVENT_INFO_TYPE_RANGE 6
+#define FAN_EVENT_INFO_TYPE_MNT 7
+
+/* Special info types for FAN_RENAME */
+#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10
+/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */
+#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12
+/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */
+
+/* Variable length info record following event metadata */
+struct fanotify_event_info_header {
+ __u8 info_type;
+ __u8 pad;
+ __u16 len;
+};
+
+/*
+ * Unique file identifier info record.
+ * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID,
+ * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME.
+ * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated
+ * name immediately after the file handle.
+ */
+struct fanotify_event_info_fid {
+ struct fanotify_event_info_header hdr;
+ __kernel_fsid_t fsid;
+ /*
+ * Following is an opaque struct file_handle that can be passed as
+ * an argument to open_by_handle_at(2).
+ */
+ unsigned char handle[];
+};
+
+/*
+ * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD.
+ * It holds a pidfd for the pid that was responsible for generating an event.
+ */
+struct fanotify_event_info_pidfd {
+ struct fanotify_event_info_header hdr;
+ __s32 pidfd;
+};
+
+struct fanotify_event_info_error {
+ struct fanotify_event_info_header hdr;
+ __s32 error;
+ __u32 error_count;
+};
+
+struct fanotify_event_info_range {
+ struct fanotify_event_info_header hdr;
+ __u32 pad;
+ __u64 offset;
+ __u64 count;
+};
+
+struct fanotify_event_info_mnt {
+ struct fanotify_event_info_header hdr;
+ __u64 mnt_id;
+};
+
+/*
+ * User space may need to record additional information about its decision.
+ * The extra information type records what kind of information is included.
+ * The default is none. We also define an extra information buffer whose
+ * size is determined by the extra information type.
+ *
+ * If the information type is Audit Rule, then the information following
+ * is the rule number that triggered the user space decision that
+ * requires auditing.
+ */
+
+#define FAN_RESPONSE_INFO_NONE 0
+#define FAN_RESPONSE_INFO_AUDIT_RULE 1
+
+struct fanotify_response {
+ __s32 fd;
+ __u32 response;
+};
+
+struct fanotify_response_info_header {
+ __u8 type;
+ __u8 pad;
+ __u16 len;
+};
+
+struct fanotify_response_info_audit_rule {
+ struct fanotify_response_info_header hdr;
+ __u32 rule_number;
+ __u32 subj_trust;
+ __u32 obj_trust;
+};
+
+/* Legit userspace responses to a _PERM event */
+#define FAN_ALLOW 0x01
+#define FAN_DENY 0x02
+/* errno other than EPERM can specified in upper byte of deny response */
+#define FAN_ERRNO_BITS 8
+#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS)
+#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1)
+#define FAN_DENY_ERRNO(err) \
+ (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT))
+
+#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */
+#define FAN_INFO 0x20 /* Bitmask to indicate additional information */
+
+/* No fd set in event */
+#define FAN_NOFD -1
+#define FAN_NOPIDFD FAN_NOFD
+#define FAN_EPIDFD -2
+
+/* Helper functions to deal with fanotify_event_metadata buffers */
+#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
+
+#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \
+ (struct fanotify_event_metadata*)(((char *)(meta)) + \
+ (meta)->event_len))
+
+#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \
+ (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \
+ (long)(meta)->event_len <= (long)(len))
+
+#endif /* _UAPI_LINUX_FANOTIFY_H */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
new file mode 100644
index 000000000000..7fa67c2031a5
--- /dev/null
+++ b/tools/include/uapi/linux/mount.h
@@ -0,0 +1,235 @@
+#ifndef _UAPI_LINUX_MOUNT_H
+#define _UAPI_LINUX_MOUNT_H
+
+#include <linux/types.h>
+
+/*
+ * These are the fs-independent mount-flags: up to 32 flags are supported
+ *
+ * Usage of these is restricted within the kernel to core mount(2) code and
+ * callers of sys_mount() only. Filesystems should be using the SB_*
+ * equivalent instead.
+ */
+#define MS_RDONLY 1 /* Mount read-only */
+#define MS_NOSUID 2 /* Ignore suid and sgid bits */
+#define MS_NODEV 4 /* Disallow access to device special files */
+#define MS_NOEXEC 8 /* Disallow program execution */
+#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
+#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
+#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
+#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
+#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */
+#define MS_NOATIME 1024 /* Do not update access times. */
+#define MS_NODIRATIME 2048 /* Do not update directory access times */
+#define MS_BIND 4096
+#define MS_MOVE 8192
+#define MS_REC 16384
+#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence.
+ MS_VERBOSE is deprecated. */
+#define MS_SILENT 32768
+#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
+#define MS_UNBINDABLE (1<<17) /* change to unbindable */
+#define MS_PRIVATE (1<<18) /* change to private */
+#define MS_SLAVE (1<<19) /* change to slave */
+#define MS_SHARED (1<<20) /* change to shared */
+#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
+#define MS_I_VERSION (1<<23) /* Update inode I_version field */
+#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
+
+/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT (1<<26)
+#define MS_NOREMOTELOCK (1<<27)
+#define MS_NOSEC (1<<28)
+#define MS_BORN (1<<29)
+#define MS_ACTIVE (1<<30)
+#define MS_NOUSER (1<<31)
+
+/*
+ * Superblock flags that can be altered by MS_REMOUNT
+ */
+#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+ MS_LAZYTIME)
+
+/*
+ * Old magic mount flag and mask
+ */
+#define MS_MGC_VAL 0xC0ED0000
+#define MS_MGC_MSK 0xffff0000
+
+/*
+ * open_tree() flags.
+ */
+#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */
+#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */
+
+/*
+ * move_mount() flags.
+ */
+#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */
+#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
+#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
+#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
+#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
+#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */
+#define MOVE_MOUNT__MASK 0x00000377
+
+/*
+ * fsopen() flags.
+ */
+#define FSOPEN_CLOEXEC 0x00000001
+
+/*
+ * fspick() flags.
+ */
+#define FSPICK_CLOEXEC 0x00000001
+#define FSPICK_SYMLINK_NOFOLLOW 0x00000002
+#define FSPICK_NO_AUTOMOUNT 0x00000004
+#define FSPICK_EMPTY_PATH 0x00000008
+
+/*
+ * The type of fsconfig() call made.
+ */
+enum fsconfig_command {
+ FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */
+ FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */
+ FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */
+ FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
+ FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
+ FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
+ FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */
+ FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
+ FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */
+};
+
+/*
+ * fsmount() flags.
+ */
+#define FSMOUNT_CLOEXEC 0x00000001
+
+/*
+ * Mount attributes.
+ */
+#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */
+#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */
+#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */
+#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */
+#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */
+#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */
+#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */
+#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
+#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
+#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */
+
+/*
+ * mount_setattr()
+ */
+struct mount_attr {
+ __u64 attr_set;
+ __u64 attr_clr;
+ __u64 propagation;
+ __u64 userns_fd;
+};
+
+/* List of all mount_attr versions. */
+#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
+
+
+/*
+ * Structure for getting mount/superblock/filesystem info with statmount(2).
+ *
+ * The interface is similar to statx(2): individual fields or groups can be
+ * selected with the @mask argument of statmount(). Kernel will set the @mask
+ * field according to the supported fields.
+ *
+ * If string fields are selected, then the caller needs to pass a buffer that
+ * has space after the fixed part of the structure. Nul terminated strings are
+ * copied there and offsets relative to @str are stored in the relevant fields.
+ * If the buffer is too small, then EOVERFLOW is returned. The actually used
+ * size is returned in @size.
+ */
+struct statmount {
+ __u32 size; /* Total size, including strings */
+ __u32 mnt_opts; /* [str] Options (comma separated, escaped) */
+ __u64 mask; /* What results were written */
+ __u32 sb_dev_major; /* Device ID */
+ __u32 sb_dev_minor;
+ __u64 sb_magic; /* ..._SUPER_MAGIC */
+ __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
+ __u32 fs_type; /* [str] Filesystem type */
+ __u64 mnt_id; /* Unique ID of mount */
+ __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */
+ __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */
+ __u32 mnt_parent_id_old;
+ __u64 mnt_attr; /* MOUNT_ATTR_... */
+ __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
+ __u64 mnt_peer_group; /* ID of shared peer group */
+ __u64 mnt_master; /* Mount receives propagation from this ID */
+ __u64 propagate_from; /* Propagation from in current namespace */
+ __u32 mnt_root; /* [str] Root of mount relative to root of fs */
+ __u32 mnt_point; /* [str] Mountpoint relative to current root */
+ __u64 mnt_ns_id; /* ID of the mount namespace */
+ __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */
+ __u32 sb_source; /* [str] Source string of the mount */
+ __u32 opt_num; /* Number of fs options */
+ __u32 opt_array; /* [str] Array of nul terminated fs options */
+ __u32 opt_sec_num; /* Number of security options */
+ __u32 opt_sec_array; /* [str] Array of nul terminated security options */
+ __u64 supported_mask; /* Mask flags that this kernel supports */
+ __u32 mnt_uidmap_num; /* Number of uid mappings */
+ __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */
+ __u32 mnt_gidmap_num; /* Number of gid mappings */
+ __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */
+ __u64 __spare2[43];
+ char str[]; /* Variable size part containing strings */
+};
+
+/*
+ * Structure for passing mount ID and miscellaneous parameters to statmount(2)
+ * and listmount(2).
+ *
+ * For statmount(2) @param represents the request mask.
+ * For listmount(2) @param represents the last listed mount id (or zero).
+ */
+struct mnt_id_req {
+ __u32 size;
+ __u32 spare;
+ __u64 mnt_id;
+ __u64 param;
+ __u64 mnt_ns_id;
+};
+
+/* List of all mnt_id_req versions. */
+#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
+#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
+
+/*
+ * @mask bits for statmount(2)
+ */
+#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
+#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
+#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
+#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
+#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
+#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
+#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
+#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
+#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */
+#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */
+#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */
+#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */
+#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */
+#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */
+#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */
+
+/*
+ * Special @mnt_id values that can be passed to listmount
+ */
+#define LSMT_ROOT 0xffffffffffffffff /* root mount */
+#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
+
+#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h
new file mode 100644
index 000000000000..34127653fd00
--- /dev/null
+++ b/tools/include/uapi/linux/nsfs.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_NSFS_H
+#define __LINUX_NSFS_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define NSIO 0xb7
+
+/* Returns a file descriptor that refers to an owning user namespace */
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+/* Returns a file descriptor that refers to a parent namespace */
+#define NS_GET_PARENT _IO(NSIO, 0x2)
+/* Returns the type of namespace (CLONE_NEW* value) referred to by
+ file descriptor */
+#define NS_GET_NSTYPE _IO(NSIO, 0x3)
+/* Get owner UID (in the caller's user namespace) for a user namespace */
+#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
+/* Get the id for a mount namespace */
+#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64)
+/* Translate pid from target pid namespace into the caller's pid namespace. */
+#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
+/* Return thread-group leader id of pid in the callers pid namespace. */
+#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int)
+/* Translate pid from caller's pid namespace into a target pid namespace. */
+#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int)
+/* Return thread-group leader id of pid in the target pid namespace. */
+#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int)
+
+struct mnt_ns_info {
+ __u32 size;
+ __u32 nr_mounts;
+ __u64 mnt_ns_id;
+};
+
+#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
+
+/* Get information about namespace. */
+#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info)
+/* Get next namespace. */
+#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info)
+/* Get previous namespace. */
+#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
+
+#endif /* __LINUX_NSFS_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 5fc753c23734..78a362b80027 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -39,18 +39,21 @@ enum perf_type_id {
/*
* attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
+ *
* PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA
* AA: hardware event ID
* EEEEEEEE: PMU type ID
+ *
* PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB
* BB: hardware cache ID
* CC: hardware cache op ID
* DD: hardware cache op result ID
* EEEEEEEE: PMU type ID
- * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
+ *
+ * If the PMU type ID is 0, PERF_TYPE_RAW will be applied.
*/
-#define PERF_PMU_TYPE_SHIFT 32
-#define PERF_HW_EVENT_MASK 0xffffffff
+#define PERF_PMU_TYPE_SHIFT 32
+#define PERF_HW_EVENT_MASK 0xffffffff
/*
* Generalized performance event event_id types, used by the
@@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id {
/*
* Special "software" events provided by the kernel, even if the hardware
* does not support performance events. These events measure various
- * physical and sw events of the kernel (and allow the profiling of them as
+ * physical and SW events of the kernel (and allow the profiling of them as
* well):
*/
enum perf_sw_ids {
@@ -167,8 +170,9 @@ enum perf_event_sample_format {
};
#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
+
/*
- * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set.
*
* If the user does not pass priv level information via branch_sample_type,
* the kernel uses the event's priv level. Branch and event priv levels do
@@ -178,20 +182,20 @@ enum perf_event_sample_format {
* of branches and therefore it supersedes all the other types.
*/
enum perf_branch_sample_type_shift {
- PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
- PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
- PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
-
- PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
- PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
- PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
- PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
- PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
- PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
- PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
+ PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
+ PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
+ PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
+
+ PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
+ PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
+ PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
+ PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
+ PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
+ PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
+ PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */
- PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */
+ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */
PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */
PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */
@@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift {
};
enum perf_branch_sample_type {
- PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
- PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
- PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+ PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+ PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+ PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
- PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
- PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
- PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
- PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
- PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
- PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
- PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
- PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
- PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
- PERF_SAMPLE_BRANCH_TYPE_SAVE =
- 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
+ PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
- PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
+ PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
- PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
+ PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
/*
- * Common flow change classification
+ * Common control flow change classifications:
*/
enum {
- PERF_BR_UNKNOWN = 0, /* unknown */
- PERF_BR_COND = 1, /* conditional */
- PERF_BR_UNCOND = 2, /* unconditional */
- PERF_BR_IND = 3, /* indirect */
- PERF_BR_CALL = 4, /* function call */
- PERF_BR_IND_CALL = 5, /* indirect function call */
- PERF_BR_RET = 6, /* function return */
- PERF_BR_SYSCALL = 7, /* syscall */
- PERF_BR_SYSRET = 8, /* syscall return */
- PERF_BR_COND_CALL = 9, /* conditional function call */
- PERF_BR_COND_RET = 10, /* conditional function return */
- PERF_BR_ERET = 11, /* exception return */
- PERF_BR_IRQ = 12, /* irq */
- PERF_BR_SERROR = 13, /* system error */
- PERF_BR_NO_TX = 14, /* not in transaction */
- PERF_BR_EXTEND_ABI = 15, /* extend ABI */
+ PERF_BR_UNKNOWN = 0, /* Unknown */
+ PERF_BR_COND = 1, /* Conditional */
+ PERF_BR_UNCOND = 2, /* Unconditional */
+ PERF_BR_IND = 3, /* Indirect */
+ PERF_BR_CALL = 4, /* Function call */
+ PERF_BR_IND_CALL = 5, /* Indirect function call */
+ PERF_BR_RET = 6, /* Function return */
+ PERF_BR_SYSCALL = 7, /* Syscall */
+ PERF_BR_SYSRET = 8, /* Syscall return */
+ PERF_BR_COND_CALL = 9, /* Conditional function call */
+ PERF_BR_COND_RET = 10, /* Conditional function return */
+ PERF_BR_ERET = 11, /* Exception return */
+ PERF_BR_IRQ = 12, /* IRQ */
+ PERF_BR_SERROR = 13, /* System error */
+ PERF_BR_NO_TX = 14, /* Not in transaction */
+ PERF_BR_EXTEND_ABI = 15, /* Extend ABI */
PERF_BR_MAX,
};
/*
- * Common branch speculation outcome classification
+ * Common branch speculation outcome classifications:
*/
enum {
- PERF_BR_SPEC_NA = 0, /* Not available */
- PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
- PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
- PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
+ PERF_BR_SPEC_NA = 0, /* Not available */
+ PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
+ PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
+ PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
PERF_BR_SPEC_MAX,
};
enum {
- PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
- PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
- PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
- PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
- PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
- PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
- PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
- PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
+ PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
+ PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
+ PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
+ PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
+ PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
+ PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
+ PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
+ PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
PERF_BR_NEW_MAX,
};
enum {
- PERF_BR_PRIV_UNKNOWN = 0,
- PERF_BR_PRIV_USER = 1,
- PERF_BR_PRIV_KERNEL = 2,
- PERF_BR_PRIV_HV = 3,
+ PERF_BR_PRIV_UNKNOWN = 0,
+ PERF_BR_PRIV_USER = 1,
+ PERF_BR_PRIV_KERNEL = 2,
+ PERF_BR_PRIV_HV = 3,
};
-#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
-#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
-#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
-#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
-#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
+#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
+#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
+#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
+#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
+#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
@@ -310,9 +313,9 @@ enum {
* Values to determine ABI of the registers dump.
*/
enum perf_sample_regs_abi {
- PERF_SAMPLE_REGS_ABI_NONE = 0,
- PERF_SAMPLE_REGS_ABI_32 = 1,
- PERF_SAMPLE_REGS_ABI_64 = 2,
+ PERF_SAMPLE_REGS_ABI_NONE = 0,
+ PERF_SAMPLE_REGS_ABI_32 = 1,
+ PERF_SAMPLE_REGS_ABI_64 = 2,
};
/*
@@ -320,21 +323,21 @@ enum perf_sample_regs_abi {
* abort events. Multiple bits can be set.
*/
enum {
- PERF_TXN_ELISION = (1 << 0), /* From elision */
- PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
- PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
- PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */
- PERF_TXN_RETRY = (1 << 4), /* Retry possible */
- PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
- PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
- PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
+ PERF_TXN_ELISION = (1 << 0), /* From elision */
+ PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
+ PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
+ PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */
+ PERF_TXN_RETRY = (1 << 4), /* Retry possible */
+ PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
+ PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+ PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
- PERF_TXN_MAX = (1 << 8), /* non-ABI */
+ PERF_TXN_MAX = (1 << 8), /* non-ABI */
- /* bits 32..63 are reserved for the abort code */
+ /* Bits 32..63 are reserved for the abort code */
- PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
- PERF_TXN_ABORT_SHIFT = 32,
+ PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
+ PERF_TXN_ABORT_SHIFT = 32,
};
/*
@@ -369,24 +372,22 @@ enum perf_event_read_format {
PERF_FORMAT_MAX = 1U << 5, /* non-ABI */
};
-#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
-#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
-#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
-#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
- /* add: sample_stack_user */
-#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
-#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
-#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
-#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
-#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
+#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */
+#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */
+#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */
+ /* Add: sample_stack_user */
+#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */
+#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */
+#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */
/*
- * Hardware event_id to monitor via a performance monitoring event:
- *
- * @sample_max_stack: Max number of frame pointers in a callchain,
- * should be < /proc/sys/kernel/perf_event_max_stack
- * Max number of entries of branch stack
- * should be < hardware limit
+ * 'struct perf_event_attr' contains various attributes that define
+ * a performance event - most of them hardware related configuration
+ * details, but also a lot of behavioral switches and values implemented
+ * by the kernel.
*/
struct perf_event_attr {
@@ -396,7 +397,7 @@ struct perf_event_attr {
__u32 type;
/*
- * Size of the attr structure, for fwd/bwd compat.
+ * Size of the attr structure, for forward/backwards compatibility.
*/
__u32 size;
@@ -451,21 +452,21 @@ struct perf_event_attr {
comm_exec : 1, /* flag comm events that are due to an exec */
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
- write_backward : 1, /* Write ring buffer from end to beginning */
+ write_backward : 1, /* write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
ksymbol : 1, /* include ksymbol events */
- bpf_event : 1, /* include bpf events */
+ bpf_event : 1, /* include BPF events */
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
- build_id : 1, /* use build id in mmap2 events */
+ build_id : 1, /* use build ID in mmap2 events */
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
__reserved_1 : 26;
union {
- __u32 wakeup_events; /* wakeup every n events */
+ __u32 wakeup_events; /* wake up every n events */
__u32 wakeup_watermark; /* bytes before wakeup */
};
@@ -474,13 +475,13 @@ struct perf_event_attr {
__u64 bp_addr;
__u64 kprobe_func; /* for perf_kprobe */
__u64 uprobe_path; /* for perf_uprobe */
- __u64 config1; /* extension of config */
+ __u64 config1; /* extension of config */
};
union {
__u64 bp_len;
- __u64 kprobe_addr; /* when kprobe_func == NULL */
+ __u64 kprobe_addr; /* when kprobe_func == NULL */
__u64 probe_offset; /* for perf_[k,u]probe */
- __u64 config2; /* extension of config1 */
+ __u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum perf_branch_sample_type */
@@ -510,7 +511,16 @@ struct perf_event_attr {
* Wakeup watermark for AUX area
*/
__u32 aux_watermark;
+
+ /*
+ * Max number of frame pointers in a callchain, should be
+ * lower than /proc/sys/kernel/perf_event_max_stack.
+ *
+ * Max number of entries of branch stack should be lower
+ * than the hardware limit.
+ */
__u16 sample_max_stack;
+
__u16 __reserved_2;
__u32 aux_sample_size;
@@ -537,7 +547,7 @@ struct perf_event_attr {
/*
* Structure used by below PERF_EVENT_IOC_QUERY_BPF command
- * to query bpf programs attached to the same perf tracepoint
+ * to query BPF programs attached to the same perf tracepoint
* as the given perf event.
*/
struct perf_event_query_bpf {
@@ -559,21 +569,21 @@ struct perf_event_query_bpf {
/*
* Ioctls that can be done on a perf event fd:
*/
-#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
-#define PERF_EVENT_IOC_RESET _IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
-#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
-#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
-#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
-#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
-#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
+#define PERF_EVENT_IOC_RESET _IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *)
+#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32)
#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
-#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *)
+#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *)
enum perf_event_ioc_flags {
- PERF_IOC_FLAG_GROUP = 1U << 0,
+ PERF_IOC_FLAG_GROUP = 1U << 0,
};
/*
@@ -584,7 +594,7 @@ struct perf_event_mmap_page {
__u32 compat_version; /* lowest version this is compat with */
/*
- * Bits needed to read the hw events in user-space.
+ * Bits needed to read the HW events in user-space.
*
* u32 seq, time_mult, time_shift, index, width;
* u64 count, enabled, running;
@@ -622,7 +632,7 @@ struct perf_event_mmap_page {
__u32 index; /* hardware event identifier */
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
- __u64 time_running; /* time event on cpu */
+ __u64 time_running; /* time event on CPU */
union {
__u64 capabilities;
struct {
@@ -650,7 +660,7 @@ struct perf_event_mmap_page {
/*
* If cap_usr_time the below fields can be used to compute the time
- * delta since time_enabled (in ns) using rdtsc or similar.
+ * delta since time_enabled (in ns) using RDTSC or similar.
*
* u64 quot, rem;
* u64 delta;
@@ -723,7 +733,7 @@ struct perf_event_mmap_page {
* after reading this value.
*
* When the mapping is PROT_WRITE the @data_tail value should be
- * written by userspace to reflect the last read data, after issueing
+ * written by user-space to reflect the last read data, after issuing
* an smp_mb() to separate the data read from the ->data_tail store.
* In this case the kernel will not over-write unread data.
*
@@ -739,7 +749,7 @@ struct perf_event_mmap_page {
/*
* AUX area is defined by aux_{offset,size} fields that should be set
- * by the userspace, so that
+ * by the user-space, so that
*
* aux_offset >= data_offset + data_size
*
@@ -813,7 +823,7 @@ struct perf_event_mmap_page {
* Indicates that thread was preempted in TASK_RUNNING state.
*
* PERF_RECORD_MISC_MMAP_BUILD_ID:
- * Indicates that mmap2 event carries build id data.
+ * Indicates that mmap2 event carries build ID data.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
@@ -824,26 +834,26 @@ struct perf_event_mmap_page {
#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
struct perf_event_header {
- __u32 type;
- __u16 misc;
- __u16 size;
+ __u32 type;
+ __u16 misc;
+ __u16 size;
};
struct perf_ns_link_info {
- __u64 dev;
- __u64 ino;
+ __u64 dev;
+ __u64 ino;
};
enum {
- NET_NS_INDEX = 0,
- UTS_NS_INDEX = 1,
- IPC_NS_INDEX = 2,
- PID_NS_INDEX = 3,
- USER_NS_INDEX = 4,
- MNT_NS_INDEX = 5,
- CGROUP_NS_INDEX = 6,
-
- NR_NAMESPACES, /* number of available namespaces */
+ NET_NS_INDEX = 0,
+ UTS_NS_INDEX = 1,
+ IPC_NS_INDEX = 2,
+ PID_NS_INDEX = 3,
+ USER_NS_INDEX = 4,
+ MNT_NS_INDEX = 5,
+ CGROUP_NS_INDEX = 6,
+
+ NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
@@ -859,11 +869,11 @@ enum perf_event_type {
* optional fields being ignored.
*
* struct sample_id {
- * { u32 pid, tid; } && PERF_SAMPLE_TID
- * { u64 time; } && PERF_SAMPLE_TIME
- * { u64 id; } && PERF_SAMPLE_ID
- * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
- * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 id; } && PERF_SAMPLE_IDENTIFIER
* } && perf_event_attr::sample_id_all
*
@@ -874,7 +884,7 @@ enum perf_event_type {
/*
* The MMAP events record the PROT_EXEC mappings so that we can
- * correlate userspace IPs to code. They have the following structure:
+ * correlate user-space IPs to code. They have the following structure:
*
* struct {
* struct perf_event_header header;
@@ -884,7 +894,7 @@ enum perf_event_type {
* u64 len;
* u64 pgoff;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP = 1,
@@ -894,7 +904,7 @@ enum perf_event_type {
* struct perf_event_header header;
* u64 id;
* u64 lost;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_LOST = 2,
@@ -905,7 +915,7 @@ enum perf_event_type {
*
* u32 pid, tid;
* char comm[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_COMM = 3,
@@ -916,7 +926,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_EXIT = 4,
@@ -927,7 +937,7 @@ enum perf_event_type {
* u64 time;
* u64 id;
* u64 stream_id;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_THROTTLE = 5,
@@ -939,7 +949,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_FORK = 7,
@@ -950,7 +960,7 @@ enum perf_event_type {
* u32 pid, tid;
*
* struct read_format values;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_READ = 8,
@@ -1005,12 +1015,12 @@ enum perf_event_type {
* { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
* } && PERF_SAMPLE_BRANCH_STACK
*
- * { u64 abi; # enum perf_sample_regs_abi
- * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
*
- * { u64 size;
- * char data[size];
- * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
+ * { u64 size;
+ * char data[size];
+ * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
*
* { union perf_sample_weight
* {
@@ -1035,10 +1045,11 @@ enum perf_event_type {
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
- * { u64 size;
- * char data[size]; } && PERF_SAMPLE_AUX
+ * { u64 cgroup;} && PERF_SAMPLE_CGROUP
* { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
* { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE
+ * { u64 size;
+ * char data[size]; } && PERF_SAMPLE_AUX
* };
*/
PERF_RECORD_SAMPLE = 9,
@@ -1070,7 +1081,7 @@ enum perf_event_type {
* };
* u32 prot, flags;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP2 = 10,
@@ -1079,12 +1090,12 @@ enum perf_event_type {
* Records that new data landed in the AUX buffer part.
*
* struct {
- * struct perf_event_header header;
+ * struct perf_event_header header;
*
- * u64 aux_offset;
- * u64 aux_size;
+ * u64 aux_offset;
+ * u64 aux_size;
* u64 flags;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_AUX = 11,
@@ -1167,7 +1178,7 @@ enum perf_event_type {
PERF_RECORD_KSYMBOL = 17,
/*
- * Record bpf events:
+ * Record BPF events:
* enum perf_bpf_event_type {
* PERF_BPF_EVENT_UNKNOWN = 0,
* PERF_BPF_EVENT_PROG_LOAD = 1,
@@ -1245,181 +1256,181 @@ enum perf_record_ksymbol_type {
#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
enum perf_bpf_event_type {
- PERF_BPF_EVENT_UNKNOWN = 0,
- PERF_BPF_EVENT_PROG_LOAD = 1,
- PERF_BPF_EVENT_PROG_UNLOAD = 2,
- PERF_BPF_EVENT_MAX, /* non-ABI */
+ PERF_BPF_EVENT_UNKNOWN = 0,
+ PERF_BPF_EVENT_PROG_LOAD = 1,
+ PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ PERF_BPF_EVENT_MAX, /* non-ABI */
};
-#define PERF_MAX_STACK_DEPTH 127
-#define PERF_MAX_CONTEXTS_PER_STACK 8
+#define PERF_MAX_STACK_DEPTH 127
+#define PERF_MAX_CONTEXTS_PER_STACK 8
enum perf_callchain_context {
- PERF_CONTEXT_HV = (__u64)-32,
- PERF_CONTEXT_KERNEL = (__u64)-128,
- PERF_CONTEXT_USER = (__u64)-512,
+ PERF_CONTEXT_HV = (__u64)-32,
+ PERF_CONTEXT_KERNEL = (__u64)-128,
+ PERF_CONTEXT_USER = (__u64)-512,
- PERF_CONTEXT_GUEST = (__u64)-2048,
- PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
- PERF_CONTEXT_GUEST_USER = (__u64)-2560,
+ PERF_CONTEXT_GUEST = (__u64)-2048,
+ PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
+ PERF_CONTEXT_GUEST_USER = (__u64)-2560,
- PERF_CONTEXT_MAX = (__u64)-4095,
+ PERF_CONTEXT_MAX = (__u64)-4095,
};
/**
* PERF_RECORD_AUX::flags bits
*/
-#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
-#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
-#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
-#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */
+#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */
+#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */
#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */
/* CoreSight PMU AUX buffer formats */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
-#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
-#define PERF_FLAG_FD_OUTPUT (1UL << 1)
-#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
-#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
+#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
+#define PERF_FLAG_FD_OUTPUT (1UL << 1)
+#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */
+#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
#if defined(__LITTLE_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_op:5, /* type of opcode */
- mem_lvl:14, /* memory hierarchy level */
- mem_snoop:5, /* snoop mode */
- mem_lock:2, /* lock instr */
- mem_dtlb:7, /* tlb access */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_remote:1, /* remote */
- mem_snoopx:2, /* snoop mode, ext */
- mem_blk:3, /* access blocked */
- mem_hops:3, /* hop level */
- mem_rsvd:18;
+ __u64 mem_op : 5, /* Type of opcode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lock : 2, /* Lock instr */
+ mem_dtlb : 7, /* TLB access */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_remote : 1, /* Remote */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_blk : 3, /* Access blocked */
+ mem_hops : 3, /* Hop level */
+ mem_rsvd : 18;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd:18,
- mem_hops:3, /* hop level */
- mem_blk:3, /* access blocked */
- mem_snoopx:2, /* snoop mode, ext */
- mem_remote:1, /* remote */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_dtlb:7, /* tlb access */
- mem_lock:2, /* lock instr */
- mem_snoop:5, /* snoop mode */
- mem_lvl:14, /* memory hierarchy level */
- mem_op:5; /* type of opcode */
+ __u64 mem_rsvd : 18,
+ mem_hops : 3, /* Hop level */
+ mem_blk : 3, /* Access blocked */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_remote : 1, /* Remote */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_dtlb : 7, /* TLB access */
+ mem_lock : 2, /* Lock instr */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_op : 5; /* Type of opcode */
};
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
-/* type of opcode (load/store/prefetch,code) */
-#define PERF_MEM_OP_NA 0x01 /* not available */
-#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
-#define PERF_MEM_OP_STORE 0x04 /* store instruction */
-#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
-#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
-#define PERF_MEM_OP_SHIFT 0
+/* Type of memory opcode: */
+#define PERF_MEM_OP_NA 0x0001 /* Not available */
+#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */
+#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */
+#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */
+#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */
+#define PERF_MEM_OP_SHIFT 0
/*
- * PERF_MEM_LVL_* namespace being depricated to some extent in the
+ * The PERF_MEM_LVL_* namespace is being deprecated to some extent in
* favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
- * Supporting this namespace inorder to not break defined ABIs.
+ * We support this namespace in order to not break defined ABIs.
*
- * memory hierarchy (memory level, hit or miss)
+ * Memory hierarchy (memory level, hit or miss)
*/
-#define PERF_MEM_LVL_NA 0x01 /* not available */
-#define PERF_MEM_LVL_HIT 0x02 /* hit level */
-#define PERF_MEM_LVL_MISS 0x04 /* miss level */
-#define PERF_MEM_LVL_L1 0x08 /* L1 */
-#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
-#define PERF_MEM_LVL_L2 0x20 /* L2 */
-#define PERF_MEM_LVL_L3 0x40 /* L3 */
-#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
-#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
-#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
-#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
-#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
-#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
-#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
-#define PERF_MEM_LVL_SHIFT 5
-
-#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */
-#define PERF_MEM_REMOTE_SHIFT 37
-
-#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */
-#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
-#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
-#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
-#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */
-#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */
-/* 0x7 available */
-#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
-#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
-#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
-#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */
-#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
-#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
-#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
-
-#define PERF_MEM_LVLNUM_SHIFT 33
-
-/* snoop mode */
-#define PERF_MEM_SNOOP_NA 0x01 /* not available */
-#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
-#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
-#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
-#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
-#define PERF_MEM_SNOOP_SHIFT 19
-
-#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
-#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
-#define PERF_MEM_SNOOPX_SHIFT 38
-
-/* locked instruction */
-#define PERF_MEM_LOCK_NA 0x01 /* not available */
-#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
-#define PERF_MEM_LOCK_SHIFT 24
+#define PERF_MEM_LVL_NA 0x0001 /* Not available */
+#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */
+#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */
+#define PERF_MEM_LVL_L1 0x0008 /* L1 */
+#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2 0x0020 /* L2 */
+#define PERF_MEM_LVL_L3 0x0040 /* L3 */
+#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT 5
+
+#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */
+#define PERF_MEM_REMOTE_SHIFT 37
+
+#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */
+#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */
+#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */
+#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */
+#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */
+/* 0x007 available */
+#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */
+#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */
+#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */
+#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */
+#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */
+#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */
+#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */
+
+#define PERF_MEM_LVLNUM_SHIFT 33
+
+/* Snoop mode */
+#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */
+#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */
+#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */
+#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */
+#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT 19
+
+#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */
+#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */
+#define PERF_MEM_SNOOPX_SHIFT 38
+
+/* Locked instruction */
+#define PERF_MEM_LOCK_NA 0x0001 /* Not available */
+#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */
+#define PERF_MEM_LOCK_SHIFT 24
/* TLB access */
-#define PERF_MEM_TLB_NA 0x01 /* not available */
-#define PERF_MEM_TLB_HIT 0x02 /* hit level */
-#define PERF_MEM_TLB_MISS 0x04 /* miss level */
-#define PERF_MEM_TLB_L1 0x08 /* L1 */
-#define PERF_MEM_TLB_L2 0x10 /* L2 */
-#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
-#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
-#define PERF_MEM_TLB_SHIFT 26
+#define PERF_MEM_TLB_NA 0x0001 /* Not available */
+#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */
+#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */
+#define PERF_MEM_TLB_L1 0x0008 /* L1 */
+#define PERF_MEM_TLB_L2 0x0010 /* L2 */
+#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT 26
/* Access blocked */
-#define PERF_MEM_BLK_NA 0x01 /* not available */
-#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */
-#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
-#define PERF_MEM_BLK_SHIFT 40
-
-/* hop level */
-#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
-#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
-#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
-#define PERF_MEM_HOPS_3 0x04 /* remote board */
+#define PERF_MEM_BLK_NA 0x0001 /* Not available */
+#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */
+#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */
+#define PERF_MEM_BLK_SHIFT 40
+
+/* Hop level */
+#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */
+#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */
+#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */
+#define PERF_MEM_HOPS_3 0x0004 /* Remote board */
/* 5-7 available */
-#define PERF_MEM_HOPS_SHIFT 43
+#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
/*
- * single taken branch record layout:
+ * Layout of single taken branch records:
*
* from: source instruction (may not always be a branch insn)
* to: branch target
@@ -1438,37 +1449,37 @@ union perf_mem_data_src {
struct perf_branch_entry {
__u64 from;
__u64 to;
- __u64 mispred:1, /* target mispredicted */
- predicted:1,/* target predicted */
- in_tx:1, /* in transaction */
- abort:1, /* transaction abort */
- cycles:16, /* cycle count to last branch */
- type:4, /* branch type */
- spec:2, /* branch speculation info */
- new_type:4, /* additional branch type */
- priv:3, /* privilege level */
- reserved:31;
+ __u64 mispred : 1, /* target mispredicted */
+ predicted : 1, /* target predicted */
+ in_tx : 1, /* in transaction */
+ abort : 1, /* transaction abort */
+ cycles : 16, /* cycle count to last branch */
+ type : 4, /* branch type */
+ spec : 2, /* branch speculation info */
+ new_type : 4, /* additional branch type */
+ priv : 3, /* privilege level */
+ reserved : 31;
};
/* Size of used info bits in struct perf_branch_entry */
#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33
union perf_sample_weight {
- __u64 full;
+ __u64 full;
#if defined(__LITTLE_ENDIAN_BITFIELD)
struct {
- __u32 var1_dw;
- __u16 var2_w;
- __u16 var3_w;
+ __u32 var1_dw;
+ __u16 var2_w;
+ __u16 var3_w;
};
#elif defined(__BIG_ENDIAN_BITFIELD)
struct {
- __u16 var3_w;
- __u16 var2_w;
- __u32 var1_dw;
+ __u16 var3_w;
+ __u16 var2_w;
+ __u32 var1_dw;
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
};
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 35791791a879..43dec6eed559 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -230,7 +230,7 @@ struct prctl_mm_map {
# define PR_PAC_APDBKEY (1UL << 3)
# define PR_PAC_APGAKEY (1UL << 4)
-/* Tagged user address controls for arm64 */
+/* Tagged user address controls for arm64 and RISC-V */
#define PR_SET_TAGGED_ADDR_CTRL 55
#define PR_GET_TAGGED_ADDR_CTRL 56
# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
@@ -244,6 +244,9 @@ struct prctl_mm_map {
# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
/* Unused; kept only for source compatibility */
# define PR_MTE_TCF_SHIFT 1
+/* RISC-V pointer masking tag length */
+# define PR_PMLEN_SHIFT 24
+# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT)
/* Control reclaim behavior when allocating memory */
#define PR_SET_IO_FLUSHER 57
@@ -328,4 +331,44 @@ struct prctl_mm_map {
# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */
# define PR_PPC_DEXCR_CTRL_MASK 0x1f
+/*
+ * Get the current shadow stack configuration for the current thread,
+ * this will be the value configured via PR_SET_SHADOW_STACK_STATUS.
+ */
+#define PR_GET_SHADOW_STACK_STATUS 74
+
+/*
+ * Set the current shadow stack configuration. Enabling the shadow
+ * stack will cause a shadow stack to be allocated for the thread.
+ */
+#define PR_SET_SHADOW_STACK_STATUS 75
+# define PR_SHADOW_STACK_ENABLE (1UL << 0)
+# define PR_SHADOW_STACK_WRITE (1UL << 1)
+# define PR_SHADOW_STACK_PUSH (1UL << 2)
+
+/*
+ * Prevent further changes to the specified shadow stack
+ * configuration. All bits may be locked via this call, including
+ * undefined bits.
+ */
+#define PR_LOCK_SHADOW_STACK_STATUS 76
+
+/*
+ * Controls the mode of timer_create() for CRIU restore operations.
+ * Enabling this allows CRIU to restore timers with explicit IDs.
+ *
+ * Don't use for normal operations as the result might be undefined.
+ */
+#define PR_TIMER_CREATE_RESTORE_IDS 77
+# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0
+# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
+# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+
+/* FUTEX hash management */
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define FH_FLAG_IMMUTABLE (1ULL << 0)
+# define PR_FUTEX_HASH_GET_SLOTS 2
+# define PR_FUTEX_HASH_GET_IMMUTABLE 3
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README
index 9999c1effdb6..88870b0bceea 100644
--- a/tools/memory-model/Documentation/README
+++ b/tools/memory-model/Documentation/README
@@ -23,8 +23,11 @@ o You are familiar with the Linux-kernel concurrency primitives
that you need, and just want to get started with LKMM litmus
tests: litmus-tests.txt
-o You would like to access lock-protected shared variables without
- having their corresponding locks held: locking.txt
+o You need to locklessly access shared variables that are otherwise
+ protected by a lock: locking.txt
+
+ This locking.txt file expands on the "Locking" section in
+ recipes.txt, but is self-contained.
o You are familiar with Linux-kernel concurrency, and would
like a detailed intuitive understanding of LKMM, including
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 6dc8b3642458..34aa3172071b 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -1896,7 +1896,7 @@ following respects:
3. The srcu_down_read() and srcu_up_read() primitives work
exactly like srcu_read_lock() and srcu_read_unlock(), except
- that matching calls don't have to execute on the same CPU.
+ that matching calls don't have to execute within the same context.
(The names are meant to be suggestive of operations on
semaphores.) Since the matching is determined by the domain
pointer and index value, these primitives make it possible for
diff --git a/tools/memory-model/Documentation/locking.txt b/tools/memory-model/Documentation/locking.txt
index 65c898c64a93..d6dc3cc34ab6 100644
--- a/tools/memory-model/Documentation/locking.txt
+++ b/tools/memory-model/Documentation/locking.txt
@@ -1,3 +1,8 @@
+[!] Note:
+ This file expands on the "Locking" section of recipes.txt,
+ focusing on locklessly accessing shared variables that are
+ otherwise protected by a lock.
+
Locking
=======
diff --git a/tools/memory-model/Documentation/ordering.txt b/tools/memory-model/Documentation/ordering.txt
index 9b0949d3f5ec..7ab3744929d8 100644
--- a/tools/memory-model/Documentation/ordering.txt
+++ b/tools/memory-model/Documentation/ordering.txt
@@ -223,7 +223,7 @@ The Linux kernel's compiler barrier is barrier(). This primitive
prohibits compiler code-motion optimizations that might move memory
references across the point in the code containing the barrier(), but
does not constrain hardware memory ordering. For example, this can be
-used to prevent to compiler from moving code across an infinite loop:
+used to prevent the compiler from moving code across an infinite loop:
WRITE_ONCE(x, 1);
while (dontstop)
@@ -274,7 +274,7 @@ different pieces of the concurrent algorithm. The variable stored to
by the smp_store_release(), in this case "y", will normally be used in
an acquire operation in other parts of the concurrent algorithm.
-To see the performance advantages, suppose that the above example read
+To see the performance advantages, suppose that the above example reads
from "x" instead of writing to it. Then an smp_wmb() could not guarantee
ordering, and an smp_mb() would be needed instead:
@@ -394,17 +394,17 @@ from the value returned by the rcu_dereference() or srcu_dereference()
to that subsequent memory access.
A call to rcu_dereference() for a given RCU-protected pointer is
-usually paired with a call to a call to rcu_assign_pointer() for that
-same pointer in much the same way that a call to smp_load_acquire() is
-paired with a call to smp_store_release(). Calls to rcu_dereference()
-and rcu_assign_pointer are often buried in other APIs, for example,
+usually paired with a call to rcu_assign_pointer() for that same pointer
+in much the same way that a call to smp_load_acquire() is paired with
+a call to smp_store_release(). Calls to rcu_dereference() and
+rcu_assign_pointer() are often buried in other APIs, for example,
the RCU list API members defined in include/linux/rculist.h. For more
information, please see the docbook headers in that file, the most
-recent LWN article on the RCU API (https://lwn.net/Articles/777036/),
+recent LWN article on the RCU API (https://lwn.net/Articles/988638/),
and of course the material in Documentation/RCU.
If the pointer value is manipulated between the rcu_dereference()
-that returned it and a later dereference(), please read
+that returned it and a later rcu_dereference(), please read
Documentation/RCU/rcu_dereference.rst. It can also be quite helpful to
review uses in the Linux kernel.
@@ -457,7 +457,7 @@ described earlier in this document.
These operations come in three categories:
o Marked writes, such as WRITE_ONCE() and atomic_set(). These
- primitives required the compiler to emit the corresponding store
+ primitives require the compiler to emit the corresponding store
instructions in the expected execution order, thus suppressing
a number of destructive optimizations. However, they provide no
hardware ordering guarantees, and in fact many CPUs will happily
@@ -465,7 +465,7 @@ o Marked writes, such as WRITE_ONCE() and atomic_set(). These
operations, unless these operations are to the same variable.
o Marked reads, such as READ_ONCE() and atomic_read(). These
- primitives required the compiler to emit the corresponding load
+ primitives require the compiler to emit the corresponding load
instructions in the expected execution order, thus suppressing
a number of destructive optimizations. However, they provide no
hardware ordering guarantees, and in fact many CPUs will happily
@@ -506,7 +506,7 @@ of the old value and the new value.
Unmarked C-language accesses are unordered, and are also subject to
any number of compiler optimizations, many of which can break your
-concurrent code. It is possible to used unmarked C-language accesses for
+concurrent code. It is possible to use unmarked C-language accesses for
shared variables that are subject to concurrent access, but great care
is required on an ongoing basis. The compiler-constraining barrier()
primitive can be helpful, as can the various ordering primitives discussed
diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt
index 03f58b11c252..52115ee5f393 100644
--- a/tools/memory-model/Documentation/recipes.txt
+++ b/tools/memory-model/Documentation/recipes.txt
@@ -61,6 +61,10 @@ usual) some things to be careful of:
Locking
-------
+[!] Note:
+ locking.txt expands on this section, providing more detail on
+ locklessly accessing lock-protected shared variables.
+
Locking is well-known and straightforward, at least if you don't think
about it too hard. And the basic rule is indeed quite simple: Any CPU that
has acquired a given lock sees any changes previously seen or made by any
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
index c5fdfd19df24..d691390620b3 100644
--- a/tools/memory-model/Documentation/references.txt
+++ b/tools/memory-model/Documentation/references.txt
@@ -46,8 +46,7 @@ o ARM Ltd. (Ed.). 2014. "ARM Architecture Reference Manual (ARMv8,
o Imagination Technologies, LTD. 2015. "MIPS(R) Architecture
For Programmers, Volume II-A: The MIPS64(R) Instruction,
- Set Reference Manual". Imagination Technologies,
- LTD. https://imgtec.com/?do-download=4302.
+ Set Reference Manual". Imagination Technologies, LTD.
o Shaked Flur, Kathryn E. Gray, Christopher Pulte, Susmit
Sarkar, Ali Sezgin, Luc Maranget, Will Deacon, and Peter
diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt
index 21f06c1d1b70..2df148630cdc 100644
--- a/tools/memory-model/Documentation/simple.txt
+++ b/tools/memory-model/Documentation/simple.txt
@@ -134,7 +134,7 @@ Packaged primitives: Sequence locking
Lockless programming is considered by many to be more difficult than
lock-based programming, but there are a few lockless design patterns that
have been built out into an API. One of these APIs is sequence locking.
-Although this APIs can be used in extremely complex ways, there are simple
+Although this API can be used in extremely complex ways, there are simple
and effective ways of using it that avoid the need to pay attention to
memory ordering.
@@ -205,7 +205,7 @@ If you want to keep things simple, use the initialization and read-out
operations from the previous section only when there are no racing
accesses. Otherwise, use only fully ordered operations when accessing
or modifying the variable. This approach guarantees that code prior
-to a given access to that variable will be seen by all CPUs has having
+to a given access to that variable will be seen by all CPUs as having
happened before any code following any later access to that same variable.
Please note that per-CPU functions are not atomic operations and
diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py
index af7fddd7b085..cab6b576c876 100755
--- a/tools/net/ynl/pyynl/ethtool.py
+++ b/tools/net/ynl/pyynl/ethtool.py
@@ -338,16 +338,24 @@ def main():
print('Capabilities:')
[print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])]
- print(f'PTP Hardware Clock: {tsinfo["phc-index"]}')
+ print(f'PTP Hardware Clock: {tsinfo.get("phc-index", "none")}')
- print('Hardware Transmit Timestamp Modes:')
- [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])]
+ if 'tx-types' in tsinfo:
+ print('Hardware Transmit Timestamp Modes:')
+ [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])]
+ else:
+ print('Hardware Transmit Timestamp Modes: none')
+
+ if 'rx-filters' in tsinfo:
+ print('Hardware Receive Filter Modes:')
+ [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])]
+ else:
+ print('Hardware Receive Filter Modes: none')
- print('Hardware Receive Filter Modes:')
- [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])]
+ if 'stats' in tsinfo and tsinfo['stats']:
+ print('Statistics:')
+ [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()]
- print('Statistics:')
- [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()]
return
print(f'Settings for {args.device}:')
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index 30c0a34b2784..a7f08edbc235 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -1143,10 +1143,9 @@ class Family(SpecFamily):
self.pure_nested_structs[nested].request = True
if attr in rs_members['reply']:
self.pure_nested_structs[nested].reply = True
-
- if spec.is_multi_val():
- child = self.pure_nested_structs.get(nested)
- child.in_multi_val = True
+ if spec.is_multi_val():
+ child = self.pure_nested_structs.get(nested)
+ child.in_multi_val = True
self._sort_pure_types()
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 3ce7b54003c2..98c4713c1b09 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -189,6 +189,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
op2 = ins.opcode.bytes[1];
op3 = ins.opcode.bytes[2];
+ /*
+ * XXX hack, decoder is buggered and thinks 0xea is 7 bytes long.
+ */
+ if (op1 == 0xea) {
+ insn->len = 1;
+ insn->type = INSN_BUG;
+ return 0;
+ }
+
if (ins.rex_prefix.nbytes) {
rex = ins.rex_prefix.bytes[0];
rex_w = X86_REX_W(rex) >> 3;
@@ -842,12 +851,14 @@ int arch_decode_hint_reg(u8 sp_reg, int *base)
bool arch_is_retpoline(struct symbol *sym)
{
- return !strncmp(sym->name, "__x86_indirect_", 15);
+ return !strncmp(sym->name, "__x86_indirect_", 15) ||
+ !strncmp(sym->name, "__pi___x86_indirect_", 20);
}
bool arch_is_rethunk(struct symbol *sym)
{
- return !strcmp(sym->name, "__x86_return_thunk");
+ return !strcmp(sym->name, "__x86_return_thunk") ||
+ !strcmp(sym->name, "__pi___x86_return_thunk");
}
bool arch_is_embedded_insn(struct symbol *sym)
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 727a3a4fd9d7..ca5d77db692a 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -572,6 +572,34 @@ err:
return -1;
}
+static int mark_group_syms(struct elf *elf)
+{
+ struct section *symtab, *sec;
+ struct symbol *sym;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (!symtab) {
+ ERROR("no .symtab");
+ return -1;
+ }
+
+ list_for_each_entry(sec, &elf->sections, list) {
+ if (sec->sh.sh_type == SHT_GROUP &&
+ sec->sh.sh_link == symtab->idx) {
+ sym = find_symbol_by_index(elf, sec->sh.sh_info);
+ if (!sym) {
+ ERROR("%s: can't find SHT_GROUP signature symbol",
+ sec->name);
+ return -1;
+ }
+
+ sym->group_sec = sec;
+ }
+ }
+
+ return 0;
+}
+
/*
* @sym's idx has changed. Update the relocs which reference it.
*/
@@ -745,7 +773,7 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
/*
* Move the first global symbol, as per sh_info, into a new, higher
- * symbol index. This fees up a spot for a new local symbol.
+ * symbol index. This frees up a spot for a new local symbol.
*/
first_non_local = symtab->sh.sh_info;
old = find_symbol_by_index(elf, first_non_local);
@@ -763,6 +791,11 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
if (elf_update_sym_relocs(elf, old))
return NULL;
+ if (old->group_sec) {
+ old->group_sec->sh.sh_info = new_idx;
+ mark_sec_changed(elf, old->group_sec, true);
+ }
+
new_idx = first_non_local;
}
@@ -1035,6 +1068,9 @@ struct elf *elf_open_read(const char *name, int flags)
if (read_symbols(elf))
goto err;
+ if (mark_group_syms(elf))
+ goto err;
+
if (read_relocs(elf))
goto err;
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index c7c4e87ebe88..0a2fa3ac0079 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -72,6 +72,7 @@ struct symbol {
u8 ignore : 1;
struct list_head pv_target;
struct reloc *relocs;
+ struct section *group_sec;
};
struct reloc {
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 279ab2ab4abe..b558ab98719f 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -3,6 +3,7 @@ perf-bench-y += sched-pipe.o
perf-bench-y += sched-seccomp-notify.o
perf-bench-y += syscall.o
perf-bench-y += mem-functions.o
+perf-bench-y += futex.o
perf-bench-y += futex-hash.o
perf-bench-y += futex-wake.o
perf-bench-y += futex-wake-parallel.o
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index b472eded521b..fdf133c9520f 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -18,9 +18,11 @@
#include <stdlib.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/prctl.h>
#include <linux/zalloc.h>
#include <sys/time.h>
#include <sys/mman.h>
+#include <sys/prctl.h>
#include <perf/cpumap.h>
#include "../util/mutex.h"
@@ -50,9 +52,12 @@ struct worker {
static struct bench_futex_parameters params = {
.nfutexes = 1024,
.runtime = 10,
+ .nbuckets = -1,
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
+ OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
OPT_UINTEGER('f', "futexes", &params.nfutexes, "Specify amount of futexes per threads"),
@@ -118,6 +123,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
(int)bench__runtime.tv_sec);
+ futex_print_nbuckets(&params);
}
int bench_futex_hash(int argc, const char **argv)
@@ -161,6 +167,7 @@ int bench_futex_hash(int argc, const char **argv)
if (!params.fshared)
futex_flag = FUTEX_PRIVATE_FLAG;
+ futex_set_nbuckets_param(&params);
printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime);
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 0416120c091b..5144a158512c 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -41,10 +41,13 @@ static struct stats throughput_stats;
static struct cond thread_parent, thread_worker;
static struct bench_futex_parameters params = {
+ .nbuckets = -1,
.runtime = 10,
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
+ OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
OPT_BOOLEAN( 'M', "multi", &params.multi, "Use multiple futexes"),
@@ -67,6 +70,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
(int)bench__runtime.tv_sec);
+ futex_print_nbuckets(&params);
}
static void toggle_done(int sig __maybe_unused,
@@ -203,6 +207,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
mutex_init(&thread_lock);
cond_init(&thread_parent);
cond_init(&thread_worker);
+ futex_set_nbuckets_param(&params);
threads_starting = params.nthreads;
gettimeofday(&bench__start, NULL);
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index aad5bfc4fe18..a2f91ee1950b 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -42,6 +42,7 @@ static unsigned int threads_starting;
static int futex_flag = 0;
static struct bench_futex_parameters params = {
+ .nbuckets = -1,
/*
* How many tasks to requeue at a time.
* Default to 1 in order to make the kernel work more.
@@ -50,6 +51,8 @@ static struct bench_futex_parameters params = {
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
+ OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('q', "nrequeue", &params.nrequeue, "Specify amount of threads to requeue at once"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
@@ -77,6 +80,7 @@ static void print_summary(void)
params.nthreads,
requeuetime_avg / USEC_PER_MSEC,
rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
+ futex_print_nbuckets(&params);
}
static void *workerfn(void *arg __maybe_unused)
@@ -204,6 +208,8 @@ int bench_futex_requeue(int argc, const char **argv)
if (params.broadcast)
params.nrequeue = params.nthreads;
+ futex_set_nbuckets_param(&params);
+
printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
"%d at a time.\n\n", getpid(), params.nthreads,
params.fshared ? "shared":"private", &futex1,
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 4352e318631e..ee66482c29fd 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -57,9 +57,13 @@ static struct stats waketime_stats, wakeup_stats;
static unsigned int threads_starting;
static int futex_flag = 0;
-static struct bench_futex_parameters params;
+static struct bench_futex_parameters params = {
+ .nbuckets = -1,
+};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
+ OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakers", &params.nwakes, "Specify amount of waking threads"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
@@ -218,6 +222,7 @@ static void print_summary(void)
params.nthreads,
waketime_avg / USEC_PER_MSEC,
rel_stddev_stats(waketime_stddev, waketime_avg));
+ futex_print_nbuckets(&params);
}
@@ -291,6 +296,8 @@ int bench_futex_wake_parallel(int argc, const char **argv)
if (!params.fshared)
futex_flag = FUTEX_PRIVATE_FLAG;
+ futex_set_nbuckets_param(&params);
+
printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
"futex %p), %d threads waking up %d at a time.\n\n",
getpid(), params.nthreads, params.fshared ? "shared":"private",
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 49b3c89b0b35..8d6107f7cd94 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -42,6 +42,7 @@ static unsigned int threads_starting;
static int futex_flag = 0;
static struct bench_futex_parameters params = {
+ .nbuckets = -1,
/*
* How many wakeups to do at a time.
* Default to 1 in order to make the kernel work more.
@@ -50,6 +51,8 @@ static struct bench_futex_parameters params = {
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
+ OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakes", &params.nwakes, "Specify amount of threads to wake at once"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
@@ -93,6 +96,7 @@ static void print_summary(void)
params.nthreads,
waketime_avg / USEC_PER_MSEC,
rel_stddev_stats(waketime_stddev, waketime_avg));
+ futex_print_nbuckets(&params);
}
static void block_threads(pthread_t *w, struct perf_cpu_map *cpu)
diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c
new file mode 100644
index 000000000000..26382e4d8d4c
--- /dev/null
+++ b/tools/perf/bench/futex.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/prctl.h>
+#include <sys/prctl.h>
+
+#include "futex.h"
+
+void futex_set_nbuckets_param(struct bench_futex_parameters *params)
+{
+ unsigned long flags;
+ int ret;
+
+ if (params->nbuckets < 0)
+ return;
+
+ flags = params->buckets_immutable ? FH_FLAG_IMMUTABLE : 0;
+ ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, flags);
+ if (ret) {
+ printf("Requesting %d hash buckets failed: %d/%m\n",
+ params->nbuckets, ret);
+ err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
+ }
+}
+
+void futex_print_nbuckets(struct bench_futex_parameters *params)
+{
+ char *futex_hash_mode;
+ int ret;
+
+ ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
+ if (params->nbuckets >= 0) {
+ if (ret != params->nbuckets) {
+ if (ret < 0) {
+ printf("Can't query number of buckets: %m\n");
+ err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
+ }
+ printf("Requested number of hash buckets does not currently used.\n");
+ printf("Requested: %d in usage: %d\n", params->nbuckets, ret);
+ err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
+ }
+ if (params->nbuckets == 0) {
+ ret = asprintf(&futex_hash_mode, "Futex hashing: global hash");
+ } else {
+ ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE);
+ if (ret < 0) {
+ printf("Can't check if the hash is immutable: %m\n");
+ err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
+ }
+ ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets %s",
+ params->nbuckets,
+ ret == 1 ? "(immutable)" : "");
+ }
+ } else {
+ if (ret <= 0) {
+ ret = asprintf(&futex_hash_mode, "Futex hashing: global hash");
+ } else {
+ ret = asprintf(&futex_hash_mode, "Futex hashing: auto resized to %d buckets",
+ ret);
+ }
+ }
+ if (ret < 0)
+ err(EXIT_FAILURE, "ENOMEM, futex_hash_mode");
+ printf("%s\n", futex_hash_mode);
+ free(futex_hash_mode);
+}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index ebdc2b032afc..9c9a73f9d865 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -25,6 +25,8 @@ struct bench_futex_parameters {
unsigned int nfutexes;
unsigned int nwakes;
unsigned int nrequeue;
+ int nbuckets;
+ bool buckets_immutable;
};
/**
@@ -143,4 +145,7 @@ futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
val, opflags);
}
+void futex_set_nbuckets_param(struct bench_futex_parameters *params);
+void futex_print_nbuckets(struct bench_futex_parameters *params);
+
#endif /* _FUTEX_H */
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 857f6646cc23..e9fab20e9330 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -186,7 +186,7 @@ done
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
-check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
+check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"'
check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c
index 9d0ce88e90e4..456ce64ad822 100644
--- a/tools/perf/util/amd-sample-raw.c
+++ b/tools/perf/util/amd-sample-raw.c
@@ -9,7 +9,7 @@
#include <inttypes.h>
#include <linux/string.h>
-#include "../../arch/x86/include/asm/amd-ibs.h"
+#include "../../arch/x86/include/asm/amd/ibs.h"
#include "debug.h"
#include "session.h"
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c
index 68b9ea86b86c..af0e558f231c 100644
--- a/tools/power/acpi/common/cmfsize.c
+++ b/tools/power/acpi/common/cmfsize.c
@@ -3,7 +3,7 @@
*
* Module Name: cmfsize - Common get file size function
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c
index 6a0cdba6fdfd..3d63626d80e7 100644
--- a/tools/power/acpi/common/getopt.c
+++ b/tools/power/acpi/common/getopt.c
@@ -3,7 +3,7 @@
*
* Module Name: getopt
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index 9d70d8c945af..9741e7503591 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -3,7 +3,7 @@
*
* Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -19,7 +19,7 @@ ACPI_MODULE_NAME("oslinuxtbl")
typedef struct osl_table_info {
struct osl_table_info *next;
u32 instance;
- char signature[ACPI_NAMESEG_SIZE];
+ char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING;
} osl_table_info;
diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c
index 39f3bffd9355..b9bb83116549 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixdir.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixdir - Unix directory access interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c
index 2b7d56252684..b93ebc9371a5 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixmap.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixmap - Unix OSL for file mappings
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 46429417c71a..36f27491713c 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixxf - UNIX OSL interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h
index 643e3e722340..fe0d23c4f2ed 100644
--- a/tools/power/acpi/tools/acpidump/acpidump.h
+++ b/tools/power/acpi/tools/acpidump/acpidump.h
@@ -3,7 +3,7 @@
*
* Module Name: acpidump.h - Include file for acpi_dump utility
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index 0742b00b61a1..bf30143efbdc 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -3,7 +3,7 @@
*
* Module Name: apdump - Dump routines for ACPI tables (acpidump)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index 13817f9112c0..75db0091e275 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -3,7 +3,7 @@
*
* Module Name: apfiles - File-related functions for acpidump utility
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -103,7 +103,7 @@ int ap_open_output_file(char *pathname)
int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
{
- char filename[ACPI_NAMESEG_SIZE + 16];
+ char filename[ACPI_NAMESEG_SIZE + 16] ACPI_NONSTRING;
char instance_str[16];
ACPI_FILE file;
acpi_size actual;
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index 666a9675e743..9f3850e3af5b 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -3,7 +3,7 @@
*
* Module Name: apmain - Main module for the acpidump utility
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 835123add0ed..be8dfac14076 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -2,6 +2,7 @@
# Makefile for cpupower
#
# Copyright (C) 2005,2006 Dominik Brodowski <linux@dominikbrodowski.net>
+# Copyright (C) 2025 Francesco Poli <invernomuto@paranoici.org>
#
# Based largely on the Makefile for udev by:
#
@@ -71,6 +72,7 @@ bindir ?= /usr/bin
sbindir ?= /usr/sbin
mandir ?= /usr/man
libdir ?= /usr/lib
+libexecdir ?= /usr/libexec
includedir ?= /usr/include
localedir ?= /usr/share/locale
docdir ?= /usr/share/doc/packages/cpupower
@@ -83,6 +85,7 @@ CP = cp -fpR
INSTALL = /usr/bin/install -c
INSTALL_PROGRAM = ${INSTALL}
INSTALL_DATA = ${INSTALL} -m 644
+SETPERM_DATA = chmod 644
#bash completion scripts get sourced and so they should be rw only.
INSTALL_SCRIPT = ${INSTALL} -m 644
@@ -302,6 +305,13 @@ install-tools: $(OUTPUT)cpupower
$(INSTALL_PROGRAM) $(OUTPUT)cpupower $(DESTDIR)${bindir}
$(INSTALL) -d $(DESTDIR)${bash_completion_dir}
$(INSTALL_SCRIPT) cpupower-completion.sh '$(DESTDIR)${bash_completion_dir}/cpupower'
+ $(INSTALL) -d $(DESTDIR)${confdir}
+ $(INSTALL_DATA) cpupower-service.conf '$(DESTDIR)${confdir}'
+ $(INSTALL) -d $(DESTDIR)${libexecdir}
+ $(INSTALL_PROGRAM) cpupower.sh '$(DESTDIR)${libexecdir}/cpupower'
+ $(INSTALL) -d $(DESTDIR)${libdir}/systemd/system
+ sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${libdir}/systemd/system/cpupower.service'
+ $(SETPERM_DATA) '$(DESTDIR)${libdir}/systemd/system/cpupower.service'
install-man:
$(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1
@@ -336,6 +346,9 @@ uninstall:
- rm -f $(DESTDIR)${includedir}/cpufreq.h
- rm -f $(DESTDIR)${includedir}/cpuidle.h
- rm -f $(DESTDIR)${bindir}/utils/cpupower
+ - rm -f $(DESTDIR)${confdir}cpupower-service.conf
+ - rm -f $(DESTDIR)${libexecdir}/cpupower
+ - rm -f $(DESTDIR)${libdir}/systemd/system/cpupower.service
- rm -f $(DESTDIR)${mandir}/man1/cpupower.1
- rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1
- rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1
diff --git a/tools/power/cpupower/README b/tools/power/cpupower/README
index 2678ed81d311..9de449469568 100644
--- a/tools/power/cpupower/README
+++ b/tools/power/cpupower/README
@@ -59,6 +59,10 @@ $ sudo make install
-----------------------------------------------------------------------
| man pages | /usr/man |
-----------------------------------------------------------------------
+| systemd service | /usr/lib/systemd/system |
+-----------------------------------------------------------------------
+| systemd support script | /usr/libexec |
+-----------------------------------------------------------------------
To put it in other words it makes build results available system-wide,
enabling any user to simply start using it without any additional steps
@@ -109,6 +113,10 @@ The files will be installed to the following dirs:
-----------------------------------------------------------------------
| man pages | ${DESTDIR}/usr/man |
-----------------------------------------------------------------------
+| systemd service | ${DESTDIR}/usr/lib/systemd/system |
+-----------------------------------------------------------------------
+| systemd support script | ${DESTDIR}/usr/libexec |
+-----------------------------------------------------------------------
If you look at the table for the default 'make' output dirs you will
notice that the only difference with the non-default case is the
@@ -173,6 +181,26 @@ The issue is that binary cannot find the 'libcpupower' library. So, we
shall point to the lib dir:
sudo LD_LIBRARY_PATH=lib64/ ./bin/cpupower
+systemd service
+---------------
+
+A systemd service is also provided to run the cpupower utility at boot with
+settings read from a configuration file.
+
+If you want systemd to find the new service after the installation, the service
+unit must have been installed in one of the system unit search path directories
+(such as '/usr/lib/systemd/system/', which is the default location) and (unless
+you are willing to wait for the next reboot) you need to issue the following
+command:
+
+$ sudo systemctl daemon-reload
+
+If you want to enable this systemd service, edit '/etc/cpupower-service.conf'
+(uncommenting at least one of the options, depending on your preferences)
+and then issue the following command:
+
+$ sudo systemctl enable --now cpupower.service
+
THANKS
------
diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile
index 741f21477432..81db39a03efb 100644
--- a/tools/power/cpupower/bindings/python/Makefile
+++ b/tools/power/cpupower/bindings/python/Makefile
@@ -1,22 +1,20 @@
# SPDX-License-Identifier: GPL-2.0-only
# Makefile for libcpupower's Python bindings
#
-# This Makefile expects you have already run the makefile for cpupower to build
-# the .o files in the lib directory for the bindings to be created.
+# This Makefile expects you have already run `make install-lib` in the lib
+# directory for the bindings to be created.
CC := gcc
HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi)
HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi)
-LIB_DIR := ../../lib
PY_INCLUDE = $(firstword $(shell python-config --includes))
-OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o)
INSTALL_DIR = $(shell python3 -c "import site; print(site.getsitepackages()[0])")
all: _raw_pylibcpupower.so
_raw_pylibcpupower.so: raw_pylibcpupower_wrap.o
- $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so
+ $(CC) -shared -lcpupower raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so
raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c
$(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE)
diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README
index 952e2e02fd32..2a4896b648b7 100644
--- a/tools/power/cpupower/bindings/python/README
+++ b/tools/power/cpupower/bindings/python/README
@@ -5,18 +5,21 @@ libcpupower (aside from the libcpupower object files).
requirements
------------
-* You need the object files in the libcpupower directory compiled by
-cpupower's makefile.
+* If you are building completely from upstream; please install libcpupower by
+running `make install-lib` within the cpupower directory. This installs the
+libcpupower.so file and symlinks needed. Otherwise, please make sure a symlink
+to libcpupower.so exists in your library path from your distribution's
+packages.
* The SWIG program must be installed.
-* The Python's development libraries installed.
+* The Python's development libraries must be installed.
Please check that your version of SWIG is compatible with the version of Python
installed on your machine by checking the SWIG changelog on their website.
https://swig.org/
Note that while SWIG itself is GPL v3+ licensed; the resulting output,
-the bindings code: is permissively licensed + the license of libcpupower's .o
-files. For these bindings that means GPL v2.
+the bindings code: is permissively licensed + the license of libcpupower's
+library files. For these bindings that means GPL v2.
Please see https://swig.org/legal.html and the discussion [1] for more details.
diff --git a/tools/power/cpupower/cpupower-service.conf b/tools/power/cpupower/cpupower-service.conf
new file mode 100644
index 000000000000..02eabe8e3614
--- /dev/null
+++ b/tools/power/cpupower/cpupower-service.conf
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2012, Sébastien Luttringer
+# Copyright (C) 2024-2025, Francesco Poli <invernomuto@paranoici.org>
+
+# Configuration file for cpupower.service systemd service unit
+#
+# Edit this file (uncommenting at least one of the options, depending on
+# your preferences) and then enable cpupower.service, if you want cpupower
+# to run at boot with these settings.
+
+# --- CPU clock frequency ---
+
+# Define CPU governor
+# Valid governors: ondemand, performance, powersave, conservative, userspace
+#GOVERNOR='ondemand'
+
+# Limit frequency range
+# Valid suffixes: Hz, kHz (default), MHz, GHz, THz
+#MIN_FREQ="2.25GHz"
+#MAX_FREQ="3GHz"
+
+# Set a specific frequency
+# Requires userspace governor to be available.
+# If this option is set, all the previous frequency options are ignored
+#FREQ=
+
+# --- CPU policy ---
+
+# Set a register on supported Intel processore which allows software to convey
+# its policy for the relative importance of performance versus energy savings to
+# the processor. See man CPUPOWER-SET(1) for additional details
+#PERF_BIAS=
diff --git a/tools/power/cpupower/cpupower.service.in b/tools/power/cpupower/cpupower.service.in
new file mode 100644
index 000000000000..fbd5b8c14270
--- /dev/null
+++ b/tools/power/cpupower/cpupower.service.in
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2012-2020, Sébastien Luttringer
+# Copyright (C) 2024-2025, Francesco Poli <invernomuto@paranoici.org>
+
+[Unit]
+Description=Apply cpupower configuration
+ConditionVirtualization=!container
+
+[Service]
+Type=oneshot
+EnvironmentFile=-___CDIR___cpupower-service.conf
+ExecStart=___LDIR___/cpupower
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
diff --git a/tools/power/cpupower/cpupower.sh b/tools/power/cpupower/cpupower.sh
new file mode 100644
index 000000000000..a37dd4cfdb2b
--- /dev/null
+++ b/tools/power/cpupower/cpupower.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2012, Sébastien Luttringer
+# Copyright (C) 2024, Francesco Poli <invernomuto@paranoici.org>
+
+ESTATUS=0
+
+# apply CPU clock frequency options
+if test -n "$FREQ"
+then
+ cpupower frequency-set -f "$FREQ" > /dev/null || ESTATUS=1
+elif test -n "${GOVERNOR}${MIN_FREQ}${MAX_FREQ}"
+then
+ cpupower frequency-set \
+ ${GOVERNOR:+ -g "$GOVERNOR"} \
+ ${MIN_FREQ:+ -d "$MIN_FREQ"} ${MAX_FREQ:+ -u "$MAX_FREQ"} \
+ > /dev/null || ESTATUS=1
+fi
+
+# apply CPU policy options
+if test -n "$PERF_BIAS"
+then
+ cpupower set -b "$PERF_BIAS" > /dev/null || ESTATUS=1
+fi
+
+exit $ESTATUS
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index e2261f33a082..1555b51a7d55 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -4017,7 +4017,8 @@ def parseKernelLog(data):
'PM: early restore of devices complete after.*'],
'resume_complete': ['PM: resume of devices complete after.*',
'PM: restore of devices complete after.*'],
- 'post_resume': [r'.*Restarting tasks \.\.\..*'],
+ 'post_resume': [r'.*Restarting tasks \.\.\..*',
+ 'Done restarting tasks.*'],
}
# action table (expected events that occur and show up in dmesg)
diff --git a/tools/sched_ext/Makefile b/tools/sched_ext/Makefile
index ca3815e572d8..d68780e2e03d 100644
--- a/tools/sched_ext/Makefile
+++ b/tools/sched_ext/Makefile
@@ -61,8 +61,8 @@ SCXOBJ_DIR := $(OBJ_DIR)/sched_ext
BINDIR := $(OUTPUT_DIR)/bin
BPFOBJ := $(BPFOBJ_DIR)/libbpf.a
ifneq ($(CROSS_COMPILE),)
-HOST_BUILD_DIR := $(OBJ_DIR)/host
-HOST_OUTPUT_DIR := host-tools
+HOST_BUILD_DIR := $(OBJ_DIR)/host/obj
+HOST_OUTPUT_DIR := $(OBJ_DIR)/host
HOST_INCLUDE_DIR := $(HOST_OUTPUT_DIR)/include
else
HOST_BUILD_DIR := $(OBJ_DIR)
@@ -98,7 +98,7 @@ ifneq ($(LLVM),)
CFLAGS += -Wno-unused-command-line-argument
endif
-LDFLAGS = -lelf -lz -lpthread
+LDFLAGS += -lelf -lz -lpthread
IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
@@ -136,14 +136,25 @@ $(MAKE_DIRS):
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(APIDIR)/linux/bpf.h \
| $(OBJ_DIR)/libbpf
- $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(OBJ_DIR)/libbpf/ \
+ $(Q)$(MAKE) $(submake_extras) CROSS_COMPILE=$(CROSS_COMPILE) \
+ -C $(BPFDIR) OUTPUT=$(OBJ_DIR)/libbpf/ \
EXTRA_CFLAGS='-g -O0 -fPIC' \
+ LDFLAGS="$(LDFLAGS)" \
DESTDIR=$(OUTPUT_DIR) prefix= all install_headers
+$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ $(APIDIR)/linux/bpf.h \
+ | $(HOST_BUILD_DIR)/libbpf
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
+ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
+ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD=$(HOSTLD) \
+ EXTRA_CFLAGS='-g -O0 -fPIC' \
+ DESTDIR=$(HOST_OUTPUT_DIR) prefix= all install_headers
+
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
- ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \
+ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD=$(HOSTLD) \
EXTRA_CFLAGS='-g -O0' \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
@@ -185,7 +196,7 @@ $(addprefix $(BINDIR)/,$(c-sched-targets)): \
$(SCX_COMMON_DEPS)
$(eval sched=$(notdir $@))
$(CC) $(CFLAGS) -c $(sched).c -o $(SCXOBJ_DIR)/$(sched).o
- $(CC) -o $@ $(SCXOBJ_DIR)/$(sched).o $(HOST_BPFOBJ) $(LDFLAGS)
+ $(CC) -o $@ $(SCXOBJ_DIR)/$(sched).o $(BPFOBJ) $(LDFLAGS)
$(c-sched-targets): %: $(BINDIR)/%
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index 8787048c6762..d4e21558e982 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -48,6 +48,8 @@ static inline void ___vmlinux_h_sanity_check___(void)
s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym;
+s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+ const struct cpumask *cpus_allowed, u64 flags) __ksym __weak;
void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
u32 scx_bpf_dispatch_nr_slots(void) __ksym;
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index 26c40ca4f36c..c3cd9a17d48e 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -784,8 +784,8 @@ static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST));
bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING",
scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING));
- bpf_printk("%35s: %lld", "SCX_EV_ENQ_SLICE_DFL",
- scx_read_event(&events, SCX_EV_ENQ_SLICE_DFL));
+ bpf_printk("%35s: %lld", "SCX_EV_REFILL_SLICE_DFL",
+ scx_read_event(&events, SCX_EV_REFILL_SLICE_DFL));
bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION",
scx_read_event(&events, SCX_EV_BYPASS_DURATION));
bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH",
diff --git a/tools/sched_ext/scx_show_state.py b/tools/sched_ext/scx_show_state.py
index b800d4f5f2e9..7cdcc6729ea4 100644
--- a/tools/sched_ext/scx_show_state.py
+++ b/tools/sched_ext/scx_show_state.py
@@ -24,19 +24,19 @@ def read_atomic(name):
def read_static_key(name):
return prog[name].key.enabled.counter.value_()
-def ops_state_str(state):
- return prog['scx_ops_enable_state_str'][state].string_().decode()
+def state_str(state):
+ return prog['scx_enable_state_str'][state].string_().decode()
ops = prog['scx_ops']
-enable_state = read_atomic("scx_ops_enable_state_var")
+enable_state = read_atomic("scx_enable_state_var")
print(f'ops : {ops.name.string_().decode()}')
-print(f'enabled : {read_static_key("__scx_ops_enabled")}')
+print(f'enabled : {read_static_key("__scx_enabled")}')
print(f'switching_all : {read_int("scx_switching_all")}')
print(f'switched_all : {read_static_key("__scx_switched_all")}')
-print(f'enable_state : {ops_state_str(enable_state)} ({enable_state})')
+print(f'enable_state : {state_str(enable_state)} ({enable_state})')
print(f'in_softlockup : {prog["scx_in_softlockup"].value_()}')
-print(f'breather_depth: {read_atomic("scx_ops_breather_depth")}')
-print(f'bypass_depth : {prog["scx_ops_bypass_depth"].value_()}')
+print(f'breather_depth: {read_atomic("scx_breather_depth")}')
+print(f'bypass_depth : {prog["scx_bypass_depth"].value_()}')
print(f'nr_rejected : {read_atomic("scx_nr_rejected")}')
print(f'enable_seq : {read_atomic("scx_enable_seq")}')
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c
index 35ea65c54ffa..827507844e8f 100644
--- a/tools/testing/crypto/chacha20-s390/test-cipher.c
+++ b/tools/testing/crypto/chacha20-s390/test-cipher.c
@@ -50,7 +50,7 @@ struct skcipher_def {
/* Perform cipher operations with the chacha lib */
static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
{
- u32 chacha_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha_state;
u8 iv[16], key[32];
u64 start, end;
@@ -66,10 +66,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
}
/* Encrypt */
- chacha_init(chacha_state, (u32 *)key, iv);
+ chacha_init(&chacha_state, (u32 *)key, iv);
start = ktime_get_ns();
- chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20);
+ chacha_crypt_arch(&chacha_state, cipher, plain, data_size, 20);
end = ktime_get_ns();
@@ -81,10 +81,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
pr_info("lib encryption took: %lld nsec", end - start);
/* Decrypt */
- chacha_init(chacha_state, (u32 *)key, iv);
+ chacha_init(&chacha_state, (u32 *)key, iv);
start = ktime_get_ns();
- chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20);
+ chacha_crypt_arch(&chacha_state, revert, cipher, data_size, 20);
end = ktime_get_ns();
if (debug)
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index 422e186cf3cf..e70c502a16df 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -10,6 +10,7 @@ CONFIG_KUNIT_EXAMPLE_TEST=y
CONFIG_KUNIT_ALL_TESTS=y
CONFIG_FORTIFY_SOURCE=y
+CONFIG_INIT_STACK_ALL_PATTERN=y
CONFIG_IIO=y
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
index 10ff65689dd8..80fa4e354a17 100644
--- a/tools/testing/kunit/kunit_json.py
+++ b/tools/testing/kunit/kunit_json.py
@@ -39,10 +39,20 @@ def _get_group_json(test: Test, common_fields: JsonObj) -> JsonObj:
status = _status_map.get(subtest.status, "FAIL")
test_cases.append({"name": subtest.name, "status": status})
+ test_counts = test.counts
+ counts_json = {
+ "tests": test_counts.total(),
+ "passed": test_counts.passed,
+ "failed": test_counts.failed,
+ "crashed": test_counts.crashed,
+ "skipped": test_counts.skipped,
+ "errors": test_counts.errors,
+ }
test_group = {
"name": test.name,
"sub_groups": sub_groups,
"test_cases": test_cases,
+ "misc": counts_json
}
test_group.update(common_fields)
return test_group
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index d3f39bc1ceec..260d8d9aa1db 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -14,6 +14,7 @@ import os
import shlex
import shutil
import signal
+import sys
import threading
from typing import Iterator, List, Optional, Tuple
from types import FrameType
@@ -201,6 +202,13 @@ def _default_qemu_config_path(arch: str) -> str:
return config_path
options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')]
+
+ if arch == 'help':
+ print('um')
+ for option in options:
+ print(option)
+ sys.exit()
+
raise ConfigError(arch + ' is not a valid arch, options are ' + str(sorted(options)))
def _get_qemu_ops(config_path: str,
diff --git a/tools/testing/kunit/qemu_configs/powerpc.py b/tools/testing/kunit/qemu_configs/powerpc.py
index 7ec38d4131f7..5b4c895d5d5a 100644
--- a/tools/testing/kunit/qemu_configs/powerpc.py
+++ b/tools/testing/kunit/qemu_configs/powerpc.py
@@ -3,6 +3,7 @@ from ..qemu_config import QemuArchParams
QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
kconfig='''
CONFIG_PPC64=y
+CONFIG_CPU_BIG_ENDIAN=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_HVC_CONSOLE=y''',
diff --git a/tools/testing/kunit/qemu_configs/powerpc32.py b/tools/testing/kunit/qemu_configs/powerpc32.py
new file mode 100644
index 000000000000..88bd60dbb948
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/powerpc32.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
+ kconfig='''
+CONFIG_PPC32=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_ADB_CUDA=y
+CONFIG_SERIAL_PMACZILOG=y
+CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
+''',
+ qemu_arch='ppc',
+ kernel_path='vmlinux',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'g3beige', '-cpu', 'max'])
diff --git a/tools/testing/kunit/qemu_configs/powerpcle.py b/tools/testing/kunit/qemu_configs/powerpcle.py
new file mode 100644
index 000000000000..7ddee8af4bd7
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/powerpcle.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
+ kconfig='''
+CONFIG_PPC64=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_HVC_CONSOLE=y
+''',
+ qemu_arch='ppc64',
+ kernel_path='vmlinux',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'pseries', '-cpu', 'power8'])
diff --git a/tools/testing/kunit/qemu_configs/riscv32.py b/tools/testing/kunit/qemu_configs/riscv32.py
new file mode 100644
index 000000000000..b79ba0ae30f8
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/riscv32.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='riscv',
+ kconfig='''
+CONFIG_NONPORTABLE=y
+CONFIG_ARCH_RV32I=y
+CONFIG_ARCH_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+''',
+ qemu_arch='riscv32',
+ kernel_path='arch/riscv/boot/Image',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-machine', 'virt'])
diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py
index 256d9573b446..2019550a1b69 100644
--- a/tools/testing/kunit/qemu_configs/sparc.py
+++ b/tools/testing/kunit/qemu_configs/sparc.py
@@ -2,6 +2,8 @@ from ..qemu_config import QemuArchParams
QEMU_ARCH = QemuArchParams(linux_arch='sparc',
kconfig='''
+CONFIG_KUNIT_FAULT_TEST=n
+CONFIG_SPARC32=y
CONFIG_SERIAL_SUNZILOG=y
CONFIG_SERIAL_SUNZILOG_CONSOLE=y
''',
diff --git a/tools/testing/kunit/qemu_configs/sparc64.py b/tools/testing/kunit/qemu_configs/sparc64.py
new file mode 100644
index 000000000000..53d4e5a8c972
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/sparc64.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='sparc',
+ kconfig='''
+CONFIG_64BIT=y
+CONFIG_SPARC64=y
+CONFIG_PCI=y
+CONFIG_SERIAL_SUNSU=y
+CONFIG_SERIAL_SUNSU_CONSOLE=y
+''',
+ qemu_arch='sparc64',
+ kernel_path='arch/sparc/boot/image',
+ kernel_command_line='console=ttyS0 kunit_shutdown=poweroff',
+ extra_qemu_params=[])
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c77c8c8e3d9b..84e7f4ed4c97 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -48,6 +48,7 @@ TARGETS += ipc
TARGETS += ir
TARGETS += kcmp
TARGETS += kexec
+TARGETS += kselftest_harness
TARGETS += kvm
TARGETS += landlock
TARGETS += lib
@@ -121,6 +122,7 @@ TARGETS += user_events
TARGETS += vDSO
TARGETS += mm
TARGETS += x86
+TARGETS += x86/bugs
TARGETS += zram
#Please keep the TARGETS list alphabetically sorted
# Run "make quicktest=1 run_tests" or
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 1bd403a5ef7b..0fd8c9b0d38f 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -526,6 +526,12 @@ extern const struct bench bench_trig_uprobe_multi_push;
extern const struct bench bench_trig_uretprobe_multi_push;
extern const struct bench bench_trig_uprobe_multi_ret;
extern const struct bench bench_trig_uretprobe_multi_ret;
+#ifdef __x86_64__
+extern const struct bench bench_trig_uprobe_nop5;
+extern const struct bench bench_trig_uretprobe_nop5;
+extern const struct bench bench_trig_uprobe_multi_nop5;
+extern const struct bench bench_trig_uretprobe_multi_nop5;
+#endif
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
@@ -586,6 +592,12 @@ static const struct bench *benchs[] = {
&bench_trig_uretprobe_multi_push,
&bench_trig_uprobe_multi_ret,
&bench_trig_uretprobe_multi_ret,
+#ifdef __x86_64__
+ &bench_trig_uprobe_nop5,
+ &bench_trig_uretprobe_nop5,
+ &bench_trig_uprobe_multi_nop5,
+ &bench_trig_uretprobe_multi_nop5,
+#endif
/* ringbuf/perfbuf benchmarks */
&bench_rb_libbpf,
&bench_rb_custom,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 32e9f194d449..82327657846e 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -333,6 +333,20 @@ static void *uprobe_producer_ret(void *input)
return NULL;
}
+#ifdef __x86_64__
+__nocf_check __weak void uprobe_target_nop5(void)
+{
+ asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");
+}
+
+static void *uprobe_producer_nop5(void *input)
+{
+ while (true)
+ uprobe_target_nop5();
+ return NULL;
+}
+#endif
+
static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
{
size_t uprobe_offset;
@@ -448,6 +462,28 @@ static void uretprobe_multi_ret_setup(void)
usetup(true, true /* use_multi */, &uprobe_target_ret);
}
+#ifdef __x86_64__
+static void uprobe_nop5_setup(void)
+{
+ usetup(false, false /* !use_multi */, &uprobe_target_nop5);
+}
+
+static void uretprobe_nop5_setup(void)
+{
+ usetup(true, false /* !use_multi */, &uprobe_target_nop5);
+}
+
+static void uprobe_multi_nop5_setup(void)
+{
+ usetup(false, true /* use_multi */, &uprobe_target_nop5);
+}
+
+static void uretprobe_multi_nop5_setup(void)
+{
+ usetup(true, true /* use_multi */, &uprobe_target_nop5);
+}
+#endif
+
const struct bench bench_trig_syscall_count = {
.name = "trig-syscall-count",
.validate = trigger_validate,
@@ -506,3 +542,9 @@ BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
+#ifdef __x86_64__
+BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
+BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
+BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
+BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
+#endif
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
index af169f831f2f..03f55405484b 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@
set -eufo pipefail
-for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret}
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5}
do
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
printf "%-15s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index 3720b7611523..e1495a4bbc99 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -158,7 +158,6 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_TUN=y
CONFIG_UNIX=y
CONFIG_UPROBES=y
-CONFIG_USELIB=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 706931a8c2c6..26c3bc2ce11d 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -128,7 +128,6 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_TUN=y
CONFIG_UNIX=y
CONFIG_UPROBES=y
-CONFIG_USELIB=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
index 38f78d9345de..69f81cb555ca 100644
--- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -30,22 +30,27 @@ int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg)
/* trace_cgroup_mkdir(struct cgroup *cgrp, const char *path)
*
- * struct cgroup_rstat_cpu {
+ * struct css_rstat_cpu {
* ...
- * struct cgroup *updated_children;
+ * struct cgroup_subsys_state *updated_children;
* ...
* };
*
- * struct cgroup {
+ * struct cgroup_subsys_state {
+ * ...
+ * struct css_rstat_cpu __percpu *rstat_cpu;
* ...
- * struct cgroup_rstat_cpu __percpu *rstat_cpu;
+ * };
+ *
+ * struct cgroup {
+ * struct cgroup_subsys_state self;
* ...
* };
*/
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
{
- g = (__u64)cgrp->rstat_cpu->updated_children;
+ g = (__u64)cgrp->self.rstat_cpu->updated_children;
return 0;
}
@@ -56,7 +61,8 @@ int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
__u32 cpu;
cpu = bpf_get_smp_processor_id();
- rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(cgrp->rstat_cpu, cpu);
+ rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(
+ cgrp->self.rstat_cpu, cpu);
if (rstat) {
/* READ_ONCE */
*(volatile int *)rstat;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
index c74362854948..ff189a736ad8 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -37,8 +37,9 @@ struct {
__type(value, struct attach_counter);
} attach_counters SEC(".maps");
-extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
-extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
+extern void css_rstat_updated(
+ struct cgroup_subsys_state *css, int cpu) __ksym;
+extern void css_rstat_flush(struct cgroup_subsys_state *css) __ksym;
static uint64_t cgroup_id(struct cgroup *cgrp)
{
@@ -75,7 +76,7 @@ int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
else if (create_percpu_attach_counter(cg_id, 1))
return 0;
- cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id());
+ css_rstat_updated(&dst_cgrp->self, bpf_get_smp_processor_id());
return 0;
}
@@ -141,7 +142,7 @@ int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
return 1;
/* Flush the stats to make sure we get the most updated numbers */
- cgroup_rstat_flush(cgrp);
+ css_rstat_flush(&cgrp->self);
total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
if (!total_counter) {
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index 137b2364a082..9984413be9f0 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -1,14 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
#include <fcntl.h>
+#include <inttypes.h>
#include <libgen.h>
#include <linux/limits.h>
#include <pthread.h>
#include <string.h>
+#include <sys/mount.h>
#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/un.h>
#include <unistd.h>
#include "../kselftest_harness.h"
+#include "../pidfd/pidfd.h"
#define STACKDUMP_FILE "stack_values"
#define STACKDUMP_SCRIPT "stackdump"
@@ -35,6 +41,7 @@ static void crashing_child(void)
FIXTURE(coredump)
{
char original_core_pattern[256];
+ pid_t pid_coredump_server;
};
FIXTURE_SETUP(coredump)
@@ -44,6 +51,7 @@ FIXTURE_SETUP(coredump)
char *dir;
int ret;
+ self->pid_coredump_server = -ESRCH;
file = fopen("/proc/sys/kernel/core_pattern", "r");
ASSERT_NE(NULL, file);
@@ -61,10 +69,17 @@ FIXTURE_TEARDOWN(coredump)
{
const char *reason;
FILE *file;
- int ret;
+ int ret, status;
unlink(STACKDUMP_FILE);
+ if (self->pid_coredump_server > 0) {
+ kill(self->pid_coredump_server, SIGTERM);
+ waitpid(self->pid_coredump_server, &status, 0);
+ }
+ unlink("/tmp/coredump.file");
+ unlink("/tmp/coredump.socket");
+
file = fopen("/proc/sys/kernel/core_pattern", "w");
if (!file) {
reason = "Unable to open core_pattern";
@@ -89,14 +104,14 @@ fail:
fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason);
}
-TEST_F(coredump, stackdump)
+TEST_F_TIMEOUT(coredump, stackdump, 120)
{
struct sigaction action = {};
unsigned long long stack;
char *test_dir, *line;
size_t line_length;
char buf[PATH_MAX];
- int ret, i;
+ int ret, i, status;
FILE *file;
pid_t pid;
@@ -129,6 +144,10 @@ TEST_F(coredump, stackdump)
/*
* Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file
*/
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
for (i = 0; i < 10; ++i) {
file = fopen(STACKDUMP_FILE, "r");
if (file)
@@ -138,14 +157,466 @@ TEST_F(coredump, stackdump)
ASSERT_NE(file, NULL);
/* Step 4: Make sure all stack pointer values are non-zero */
+ line = NULL;
for (i = 0; -1 != getline(&line, &line_length, file); ++i) {
stack = strtoull(line, NULL, 10);
ASSERT_NE(stack, 0);
}
+ free(line);
ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN);
fclose(file);
}
+TEST_F(coredump, socket)
+{
+ int fd, pidfd, ret, status;
+ FILE *file;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ char core_file[PATH_MAX];
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ ASSERT_NE(file, NULL);
+
+ ret = fprintf(file, "@/tmp/coredump.socket");
+ ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
+ ASSERT_EQ(fclose(file), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file;
+ socklen_t fd_peer_pidfd_len;
+
+ close(ipc_sockets[0]);
+
+ fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd_server < 0)
+ _exit(EXIT_FAILURE);
+
+ ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to bind coredump socket\n");
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ ret = listen(fd_server, 1);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to listen on coredump socket\n");
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "Failed to accept coredump socket connection\n");
+ close(fd_server);
+ _exit(EXIT_FAILURE);
+ }
+
+ fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
+ ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD,
+ &fd_peer_pidfd, &fd_peer_pidfd_len);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
+ close(fd_coredump);
+ close(fd_server);
+ _exit(EXIT_FAILURE);
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "Missing coredump information from coredumping task\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "Received connection from non-coredumping task\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ fd_core_file = creat("/tmp/coredump.file", 0644);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "Failed to create coredump file\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ close(fd_core_file);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ close(fd_core_file);
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ close(fd_core_file);
+ _exit(EXIT_SUCCESS);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_GT(st.st_size, 0);
+ /*
+ * We should somehow validate the produced core file.
+ * For now just allow for visual inspection
+ */
+ system("file /tmp/coredump.file");
+}
+
+TEST_F(coredump, socket_detect_userspace_client)
+{
+ int fd, pidfd, ret, status;
+ FILE *file;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ char core_file[PATH_MAX];
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ ASSERT_NE(file, NULL);
+
+ ret = fprintf(file, "@/tmp/coredump.socket");
+ ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
+ ASSERT_EQ(fclose(file), 0);
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file;
+ socklen_t fd_peer_pidfd_len;
+
+ close(ipc_sockets[0]);
+
+ fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd_server < 0)
+ _exit(EXIT_FAILURE);
+
+ ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to bind coredump socket\n");
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ ret = listen(fd_server, 1);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to listen on coredump socket\n");
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "Failed to accept coredump socket connection\n");
+ close(fd_server);
+ _exit(EXIT_FAILURE);
+ }
+
+ fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
+ ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD,
+ &fd_peer_pidfd, &fd_peer_pidfd_len);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
+ close(fd_coredump);
+ close(fd_server);
+ _exit(EXIT_FAILURE);
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "Missing coredump information from coredumping task\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (info.coredump_mask & PIDFD_COREDUMPED) {
+ fprintf(stderr, "Received unexpected connection from coredumping task\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ close(fd_core_file);
+ _exit(EXIT_SUCCESS);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ int fd_socket;
+ ssize_t ret;
+
+ fd_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd_socket < 0)
+ _exit(EXIT_FAILURE);
+
+
+ ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0)
+ _exit(EXIT_FAILURE);
+
+ (void *)write(fd_socket, &(char){ 0 }, 1);
+ close(fd_socket);
+ _exit(EXIT_SUCCESS);
+ }
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_NE(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_EQ(errno, ENOENT);
+}
+
+TEST_F(coredump, socket_enoent)
+{
+ int pidfd, ret, status;
+ FILE *file;
+ pid_t pid;
+ char core_file[PATH_MAX];
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ ASSERT_NE(file, NULL);
+
+ ret = fprintf(file, "@/tmp/coredump.socket");
+ ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
+ ASSERT_EQ(fclose(file), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+}
+
+TEST_F(coredump, socket_no_listener)
+{
+ int pidfd, ret, status;
+ FILE *file;
+ pid_t pid, pid_coredump_server;
+ int ipc_sockets[2];
+ char c;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ ASSERT_NE(file, NULL);
+
+ ret = fprintf(file, "@/tmp/coredump.socket");
+ ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
+ ASSERT_EQ(fclose(file), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server;
+ socklen_t fd_peer_pidfd_len;
+
+ close(ipc_sockets[0]);
+
+ fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd_server < 0)
+ _exit(EXIT_FAILURE);
+
+ ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to bind coredump socket\n");
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_SUCCESS);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
index e350c521b467..9927b654fb8f 100755
--- a/tools/testing/selftests/cpufreq/cpufreq.sh
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -52,7 +52,14 @@ read_cpufreq_files_in_dir()
for file in $files; do
if [ -f $1/$file ]; then
printf "$file:"
- cat $1/$file
+ #file is readable ?
+ local rfile=$(ls -l $1/$file | awk '$1 ~ /^.*r.*/ { print $NF; }')
+
+ if [ ! -z $rfile ]; then
+ cat $1/$file
+ else
+ printf "$file is not readable\n"
+ fi
else
printf "\n"
read_cpufreq_files_in_dir "$1/$file"
@@ -83,10 +90,10 @@ update_cpufreq_files_in_dir()
for file in $files; do
if [ -f $1/$file ]; then
- # file is writable ?
- local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }')
+ # file is readable and writable ?
+ local rwfile=$(ls -l $1/$file | awk '$1 ~ /^.*rw.*/ { print $NF; }')
- if [ ! -z $wfile ]; then
+ if [ ! -z $rwfile ]; then
# scaling_setspeed is a special file and we
# should skip updating it
if [ $file != "scaling_setspeed" ]; then
@@ -244,9 +251,10 @@ do_suspend()
printf "Failed to suspend using RTC wake alarm\n"
return 1
fi
+ else
+ echo $filename > $SYSFS/power/state
fi
- echo $filename > $SYSFS/power/state
printf "Came out of $1\n"
printf "Do basic tests after finishing $1 to verify cpufreq state\n\n"
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 2bf14ac2b8c6..9d48004ff1a1 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -431,6 +431,22 @@ static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
return 0;
}
+static struct netdev_queue_id *create_queues(void)
+{
+ struct netdev_queue_id *queues;
+ size_t i = 0;
+
+ queues = calloc(num_queues, sizeof(*queues));
+ for (i = 0; i < num_queues; i++) {
+ queues[i]._present.type = 1;
+ queues[i]._present.id = 1;
+ queues[i].type = NETDEV_QUEUE_TYPE_RX;
+ queues[i].id = start_queue + i;
+ }
+
+ return queues;
+}
+
int do_server(struct memory_buffer *mem)
{
char ctrl_data[sizeof(int) * 20000];
@@ -448,7 +464,6 @@ int do_server(struct memory_buffer *mem)
char buffer[256];
int socket_fd;
int client_fd;
- size_t i = 0;
int ret;
ret = parse_address(server_ip, atoi(port), &server_sin);
@@ -471,16 +486,7 @@ int do_server(struct memory_buffer *mem)
sleep(1);
- queues = malloc(sizeof(*queues) * num_queues);
-
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
- }
-
- if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
error(1, 0, "Failed to bind\n");
tmp_mem = malloc(mem->size);
@@ -545,7 +551,6 @@ int do_server(struct memory_buffer *mem)
goto cleanup;
}
- i++;
for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
if (cm->cmsg_level != SOL_SOCKET ||
(cm->cmsg_type != SCM_DEVMEM_DMABUF &&
@@ -630,10 +635,8 @@ cleanup:
void run_devmem_tests(void)
{
- struct netdev_queue_id *queues;
struct memory_buffer *mem;
struct ynl_sock *ys;
- size_t i = 0;
mem = provider->alloc(getpagesize() * NUM_PAGES);
@@ -641,38 +644,24 @@ void run_devmem_tests(void)
if (configure_rss())
error(1, 0, "rss error\n");
- queues = calloc(num_queues, sizeof(*queues));
-
if (configure_headersplit(1))
error(1, 0, "Failed to configure header split\n");
- if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (!bind_rx_queue(ifindex, mem->fd,
+ calloc(num_queues, sizeof(struct netdev_queue_id)),
+ num_queues, &ys))
error(1, 0, "Binding empty queues array should have failed\n");
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
- }
-
if (configure_headersplit(0))
error(1, 0, "Failed to configure header split\n");
- if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
error(1, 0, "Configure dmabuf with header split off should have failed\n");
if (configure_headersplit(1))
error(1, 0, "Failed to configure header split\n");
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
- }
-
- if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
error(1, 0, "Failed to bind\n");
/* Deactivating a bound queue should not be legal */
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
index 828b66a10c63..7afa58e2bb20 100644
--- a/tools/testing/selftests/filesystems/.gitignore
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -2,3 +2,4 @@
dnotify_test
devpts_pts
file_stressor
+anon_inode_test
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index 66305fc34c60..b02326193fee 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := devpts_pts file_stressor
+TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test
TEST_GEN_PROGS_EXTENDED := dnotify_test
include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c
new file mode 100644
index 000000000000..e8e0ef1460d2
--- /dev/null
+++ b/tools/testing/selftests/filesystems/anon_inode_test.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include "../kselftest_harness.h"
+#include "overlayfs/wrappers.h"
+
+TEST(anon_inode_no_chown)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(fchown(fd_context, 1234, 5678), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_chmod)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(fchmod(fd_context, 0777), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_exec)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(execveat(fd_context, "", NULL, NULL, AT_EMPTY_PATH), 0);
+ ASSERT_EQ(errno, EACCES);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_open)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_GE(dup2(fd_context, 500), 0);
+ ASSERT_EQ(close(fd_context), 0);
+ fd_context = 500;
+
+ ASSERT_LT(open("/proc/self/fd/500", 0), 0);
+ ASSERT_EQ(errno, ENXIO);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore
index 82a4846cbc4b..124339ea7845 100644
--- a/tools/testing/selftests/filesystems/mount-notify/.gitignore
+++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
/*_test
+/*_test_ns
diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile
index 10be0227b5ae..836a4eb7be06 100644
--- a/tools/testing/selftests/filesystems/mount-notify/Makefile
+++ b/tools/testing/selftests/filesystems/mount-notify/Makefile
@@ -1,6 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
-TEST_GEN_PROGS := mount-notify_test
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+LDLIBS += -lcap
+
+TEST_GEN_PROGS := mount-notify_test mount-notify_test_ns
include ../../lib.mk
+
+$(OUTPUT)/mount-notify_test: ../utils.c
+$(OUTPUT)/mount-notify_test_ns: ../utils.c
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
index 59a71f22fb11..63ce708d93ed 100644
--- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -8,43 +8,21 @@
#include <string.h>
#include <sys/stat.h>
#include <sys/mount.h>
-#include <linux/fanotify.h>
#include <unistd.h>
-#include <sys/fanotify.h>
#include <sys/syscall.h>
#include "../../kselftest_harness.h"
#include "../statmount/statmount.h"
+#include "../utils.h"
-#ifndef FAN_MNT_ATTACH
-struct fanotify_event_info_mnt {
- struct fanotify_event_info_header hdr;
- __u64 mnt_id;
-};
-#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
-#endif
-
-#ifndef FAN_MNT_DETACH
-#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
-#endif
-
-#ifndef FAN_REPORT_MNT
-#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
+// Needed for linux/fanotify.h
+#ifndef __kernel_fsid_t
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
#endif
-#ifndef FAN_MARK_MNTNS
-#define FAN_MARK_MNTNS 0x00000110
-#endif
-
-static uint64_t get_mnt_id(struct __test_metadata *const _metadata,
- const char *path)
-{
- struct statx sx;
-
- ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0);
- ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE));
- return sx.stx_mnt_id;
-}
+#include <sys/fanotify.h>
static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
@@ -94,7 +72,7 @@ FIXTURE_SETUP(fanotify)
ASSERT_EQ(mkdir("b", 0700), 0);
- self->root_id = get_mnt_id(_metadata, "/");
+ self->root_id = get_unique_mnt_id("/");
ASSERT_NE(self->root_id, 0);
for (i = 0; i < NUM_FAN_FDS; i++) {
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
new file mode 100644
index 000000000000..090a5ca65004
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
@@ -0,0 +1,557 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "../../kselftest_harness.h"
+#include "../../pidfd/pidfd.h"
+#include "../statmount/statmount.h"
+#include "../utils.h"
+
+// Needed for linux/fanotify.h
+#ifndef __kernel_fsid_t
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
+#endif
+
+#include <sys/fanotify.h>
+
+static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+
+static const int mark_types[] = {
+ FAN_MARK_FILESYSTEM,
+ FAN_MARK_MOUNT,
+ FAN_MARK_INODE
+};
+
+static const int mark_cmds[] = {
+ FAN_MARK_ADD,
+ FAN_MARK_REMOVE,
+ FAN_MARK_FLUSH
+};
+
+#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
+
+FIXTURE(fanotify) {
+ int fan_fd[NUM_FAN_FDS];
+ char buf[256];
+ unsigned int rem;
+ void *next;
+ char root_mntpoint[sizeof(root_mntpoint_templ)];
+ int orig_root;
+ int orig_ns_fd;
+ int ns_fd;
+ uint64_t root_id;
+};
+
+FIXTURE_SETUP(fanotify)
+{
+ int i, ret;
+
+ self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->orig_ns_fd, 0);
+
+ ret = setup_userns();
+ ASSERT_EQ(ret, 0);
+
+ self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->ns_fd, 0);
+
+ strcpy(self->root_mntpoint, root_mntpoint_templ);
+ ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
+
+ self->orig_root = open("/", O_PATH | O_CLOEXEC);
+ ASSERT_GE(self->orig_root, 0);
+
+ ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
+
+ ASSERT_EQ(chroot(self->root_mntpoint), 0);
+
+ ASSERT_EQ(chdir("/"), 0);
+
+ ASSERT_EQ(mkdir("a", 0700), 0);
+
+ ASSERT_EQ(mkdir("b", 0700), 0);
+
+ self->root_id = get_unique_mnt_id("/");
+ ASSERT_NE(self->root_id, 0);
+
+ for (i = 0; i < NUM_FAN_FDS; i++) {
+ int fan_fd = fanotify_init(FAN_REPORT_FID, 0);
+ // Verify that watching tmpfs mounted inside userns is allowed
+ ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i],
+ FAN_OPEN, AT_FDCWD, "/");
+ ASSERT_EQ(ret, 0);
+ // ...but watching entire orig root filesystem is not allowed
+ ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM,
+ FAN_OPEN, self->orig_root, ".");
+ ASSERT_NE(ret, 0);
+ close(fan_fd);
+
+ self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
+ 0);
+ ASSERT_GE(self->fan_fd[i], 0);
+ // Verify that watching mntns where group was created is allowed
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ // ...but watching orig mntns is not allowed
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->orig_ns_fd, NULL);
+ ASSERT_NE(ret, 0);
+ // On fd[0] we do an extra ADD that changes nothing.
+ // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
+ ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+
+ self->rem = 0;
+}
+
+FIXTURE_TEARDOWN(fanotify)
+{
+ int i;
+
+ ASSERT_EQ(self->rem, 0);
+ for (i = 0; i < NUM_FAN_FDS; i++)
+ close(self->fan_fd[i]);
+
+ ASSERT_EQ(fchdir(self->orig_root), 0);
+
+ ASSERT_EQ(chroot("."), 0);
+
+ EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
+ EXPECT_EQ(chdir(self->root_mntpoint), 0);
+ EXPECT_EQ(chdir("/"), 0);
+ EXPECT_EQ(rmdir(self->root_mntpoint), 0);
+}
+
+static uint64_t expect_notify(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t *mask)
+{
+ struct fanotify_event_metadata *meta;
+ struct fanotify_event_info_mnt *mnt;
+ unsigned int thislen;
+
+ if (!self->rem) {
+ ssize_t len;
+ int i;
+
+ for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
+ len = read(self->fan_fd[i], self->buf,
+ sizeof(self->buf));
+ if (i > 0) {
+ // Groups 1,2 should get EAGAIN
+ ASSERT_EQ(len, -1);
+ ASSERT_EQ(errno, EAGAIN);
+ } else {
+ // Group 0 should get events
+ ASSERT_GT(len, 0);
+ }
+ }
+
+ self->rem = len;
+ self->next = (void *) self->buf;
+ }
+
+ meta = self->next;
+ ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
+
+ thislen = meta->event_len;
+ self->rem -= thislen;
+ self->next += thislen;
+
+ *mask = meta->mask;
+ thislen -= sizeof(*meta);
+
+ mnt = ((void *) meta) + meta->event_len - thislen;
+
+ ASSERT_EQ(thislen, sizeof(*mnt));
+
+ return mnt->mnt_id;
+}
+
+static void expect_notify_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ unsigned int n, uint64_t mask[], uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify(_metadata, self, &mask[i]);
+}
+
+static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t expect_mask)
+{
+ uint64_t mntid, mask;
+
+ mntid = expect_notify(_metadata, self, &mask);
+ ASSERT_EQ(expect_mask, mask);
+
+ return mntid;
+}
+
+
+static void expect_notify_mask_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t mask, unsigned int n, uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify_mask(_metadata, self, mask);
+}
+
+static void verify_mount_ids(struct __test_metadata *const _metadata,
+ const uint64_t list1[], const uint64_t list2[],
+ size_t num)
+{
+ unsigned int i, j;
+
+ // Check that neither list has any duplicates
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (i != j) {
+ ASSERT_NE(list1[i], list1[j]);
+ ASSERT_NE(list2[i], list2[j]);
+ }
+ }
+ }
+ // Check that all list1 memebers can be found in list2. Together with
+ // the above it means that the list1 and list2 represent the same sets.
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (list1[i] == list2[j])
+ break;
+ }
+ ASSERT_NE(j, num);
+ }
+}
+
+static void check_mounted(struct __test_metadata *const _metadata,
+ const uint64_t mnts[], size_t num)
+{
+ ssize_t ret;
+ uint64_t *list;
+
+ list = malloc((num + 1) * sizeof(list[0]));
+ ASSERT_NE(list, NULL);
+
+ ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
+ ASSERT_EQ(ret, num);
+
+ verify_mount_ids(_metadata, mnts, list, num);
+
+ free(list);
+}
+
+static void setup_mount_tree(struct __test_metadata *const _metadata,
+ int log2_num)
+{
+ int ret, i;
+
+ ret = mount("", "/", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ for (i = 0; i < log2_num; i++) {
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+TEST_F(fanotify, bind)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, move)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+ uint64_t move_id;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
+ ASSERT_EQ(ret, 0);
+
+ move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ ASSERT_EQ(move_id, mnts[1]);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, propagate)
+{
+ const unsigned int log2_num = 4;
+ const unsigned int num = (1 << log2_num);
+ uint64_t mnts[num];
+
+ setup_mount_tree(_metadata, log2_num);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
+
+ mnts[0] = self->root_id;
+ check_mounted(_metadata, mnts, num);
+
+ // Cleanup
+ int ret;
+ uint64_t mnts2[num];
+ ret = umount2("/", MNT_DETACH);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/", NULL, MS_PRIVATE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts2[0] = self->root_id;
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
+ verify_mount_ids(_metadata, mnts, mnts2, num);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, fsmount)
+{
+ int ret, fs, mnt;
+ uint64_t mnts[2] = { self->root_id };
+
+ fs = fsopen("tmpfs", 0);
+ ASSERT_GE(fs, 0);
+
+ ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ mnt = fsmount(fs, 0, 0);
+ ASSERT_GE(mnt, 0);
+
+ close(fs);
+
+ ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
+ ASSERT_EQ(ret, 0);
+
+ close(mnt);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, reparent)
+{
+ uint64_t mnts[6] = { self->root_id };
+ uint64_t dmnts[3];
+ uint64_t masks[3];
+ unsigned int i;
+ int ret;
+
+ // Create setup with a[1] -> b[2] propagation
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/a", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/b", NULL, MS_SLAVE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ // Mount on a[3], which is propagated to b[4]
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
+
+ check_mounted(_metadata, mnts, 5);
+
+ // Mount on b[5], not propagated
+ ret = mount("/", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ check_mounted(_metadata, mnts, 6);
+
+ // Umount a[3], which is propagated to b[4], but not b[5]
+ // This will result in b[5] "falling" on b[2]
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_n(_metadata, self, 3, masks, dmnts);
+ verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
+
+ for (i = 0; i < 3; i++) {
+ if (dmnts[i] == mnts[5]) {
+ ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ } else {
+ ASSERT_EQ(masks[i], FAN_MNT_DETACH);
+ }
+ }
+
+ mnts[3] = mnts[5];
+ check_mounted(_metadata, mnts, 4);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
+ verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, rmdir)
+{
+ uint64_t mnts[3] = { self->root_id };
+ int ret;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ ret = chdir("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret == 0) {
+ chdir("/");
+ unshare(CLONE_NEWNS);
+ mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ umount2("/a", MNT_DETACH);
+ // This triggers a detach in the other namespace
+ rmdir("/a");
+ exit(0);
+ }
+ wait(NULL);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
+ check_mounted(_metadata, mnts, 1);
+
+ // Cleanup
+ ret = chdir("/");
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(fanotify, pivot_root)
+{
+ uint64_t mnts[3] = { self->root_id };
+ uint64_t mnts2[3];
+ int ret;
+
+ ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ ret = mkdir("/a/new", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mkdir("/a/old", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ check_mounted(_metadata, mnts, 3);
+
+ ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
+ verify_mount_ids(_metadata, mnts, mnts2, 2);
+ check_mounted(_metadata, mnts, 3);
+
+ // Cleanup
+ ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile
index 6c661232b3b5..d3ad4a77db9b 100644
--- a/tools/testing/selftests/filesystems/overlayfs/Makefile
+++ b/tools/testing/selftests/filesystems/overlayfs/Makefile
@@ -4,7 +4,7 @@ CFLAGS += -Wall
CFLAGS += $(KHDR_INCLUDES)
LDLIBS += -lcap
-LOCAL_HDRS += wrappers.h log.h
+LOCAL_HDRS += ../wrappers.h log.h
TEST_GEN_PROGS := dev_in_maps
TEST_GEN_PROGS += set_layers_via_fds
diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
index 3b796264223f..31db54b00e64 100644
--- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
+++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
@@ -17,7 +17,7 @@
#include "../../kselftest.h"
#include "log.h"
-#include "wrappers.h"
+#include "../wrappers.h"
static long get_file_dev_and_inode(void *addr, struct statx *stx)
{
diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
index 5074e64e74a8..dc0449fa628f 100644
--- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
+++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
@@ -16,7 +16,7 @@
#include "../../pidfd/pidfd.h"
#include "log.h"
#include "../utils.h"
-#include "wrappers.h"
+#include "../wrappers.h"
FIXTURE(set_layers_via_fds) {
int pidfd;
diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile
index 14ee91a41650..8e354fe99b44 100644
--- a/tools/testing/selftests/filesystems/statmount/Makefile
+++ b/tools/testing/selftests/filesystems/statmount/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+LDLIBS += -lcap
+
TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test
include ../../lib.mk
+
+$(OUTPUT)/statmount_test_ns: ../utils.c
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
index a7a5289ddae9..99e5ad082fb1 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount.h
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -7,6 +7,42 @@
#include <linux/mount.h>
#include <asm/unistd.h>
+#ifndef __NR_statmount
+ #if defined __alpha__
+ #define __NR_statmount 567
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_statmount 4457
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_statmount 6457
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_statmount 5457
+ #endif
+ #else
+ #define __NR_statmount 457
+ #endif
+#endif
+
+#ifndef __NR_listmount
+ #if defined __alpha__
+ #define __NR_listmount 568
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_listmount 4458
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_listmount 6458
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_listmount 5458
+ #endif
+ #else
+ #define __NR_listmount 458
+ #endif
+#endif
+
static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
struct statmount *buf, size_t bufsize,
unsigned int flags)
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
index 70cb0c8b21cf..605a3fa16bf7 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
@@ -14,6 +14,7 @@
#include <linux/stat.h>
#include "statmount.h"
+#include "../utils.h"
#include "../../kselftest.h"
#define NSID_PASS 0
@@ -78,87 +79,10 @@ static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id)
return NSID_PASS;
}
-static int get_mnt_id(const char *path, uint64_t *mnt_id)
-{
- struct statx sx;
- int ret;
-
- ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
- if (ret == -1) {
- ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
- strerror(errno));
- return NSID_ERROR;
- }
-
- if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
- ksft_print_msg("no unique mount ID available for %s\n", path);
- return NSID_ERROR;
- }
-
- *mnt_id = sx.stx_mnt_id;
- return NSID_PASS;
-}
-
-static int write_file(const char *path, const char *val)
-{
- int fd = open(path, O_WRONLY);
- size_t len = strlen(val);
- int ret;
-
- if (fd == -1) {
- ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
- return NSID_ERROR;
- }
-
- ret = write(fd, val, len);
- if (ret == -1) {
- ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
- return NSID_ERROR;
- }
- if (ret != len) {
- ksft_print_msg("short write to %s\n", path);
- return NSID_ERROR;
- }
-
- ret = close(fd);
- if (ret == -1) {
- ksft_print_msg("closing %s\n", path);
- return NSID_ERROR;
- }
-
- return NSID_PASS;
-}
-
static int setup_namespace(void)
{
- int ret;
- char buf[32];
- uid_t uid = getuid();
- gid_t gid = getgid();
-
- ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
- if (ret == -1)
- ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
- strerror(errno));
-
- sprintf(buf, "0 %d 1", uid);
- ret = write_file("/proc/self/uid_map", buf);
- if (ret != NSID_PASS)
- return ret;
- ret = write_file("/proc/self/setgroups", "deny");
- if (ret != NSID_PASS)
- return ret;
- sprintf(buf, "0 %d 1", gid);
- ret = write_file("/proc/self/gid_map", buf);
- if (ret != NSID_PASS)
- return ret;
-
- ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
- if (ret == -1) {
- ksft_print_msg("making mount tree private: %s\n",
- strerror(errno));
+ if (setup_userns() != 0)
return NSID_ERROR;
- }
return NSID_PASS;
}
@@ -174,9 +98,9 @@ static int _test_statmount_mnt_ns_id(void)
if (ret != NSID_PASS)
return ret;
- ret = get_mnt_id("/", &root_id);
- if (ret != NSID_PASS)
- return ret;
+ root_id = get_unique_mnt_id("/");
+ if (!root_id)
+ return NSID_ERROR;
ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
if (ret == -1) {
diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
index e553c89c5b19..c43a69dffd83 100644
--- a/tools/testing/selftests/filesystems/utils.c
+++ b/tools/testing/selftests/filesystems/utils.c
@@ -18,7 +18,10 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/xattr.h>
+#include <sys/mount.h>
+#include "../kselftest.h"
+#include "wrappers.h"
#include "utils.h"
#define MAX_USERNS_LEVEL 32
@@ -447,6 +450,71 @@ out_close:
return fret;
}
+static int write_file(const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+ int ret;
+
+ if (fd == -1) {
+ ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
+ return -1;
+ }
+
+ ret = write(fd, val, len);
+ if (ret == -1) {
+ ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
+ return -1;
+ }
+ if (ret != len) {
+ ksft_print_msg("short write to %s\n", path);
+ return -1;
+ }
+
+ ret = close(fd);
+ if (ret == -1) {
+ ksft_print_msg("closing %s\n", path);
+ return -1;
+ }
+
+ return 0;
+}
+
+int setup_userns(void)
+{
+ int ret;
+ char buf[32];
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
+ if (ret) {
+ ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
+ strerror(errno));
+ return ret;
+ }
+
+ sprintf(buf, "0 %d 1", uid);
+ ret = write_file("/proc/self/uid_map", buf);
+ if (ret)
+ return ret;
+ ret = write_file("/proc/self/setgroups", "deny");
+ if (ret)
+ return ret;
+ sprintf(buf, "0 %d 1", gid);
+ ret = write_file("/proc/self/gid_map", buf);
+ if (ret)
+ return ret;
+
+ ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ if (ret) {
+ ksft_print_msg("making mount tree private: %s\n", strerror(errno));
+ return ret;
+ }
+
+ return 0;
+}
+
/* caps_down - lower all effective caps */
int caps_down(void)
{
@@ -499,3 +567,23 @@ out:
cap_free(caps);
return fret;
}
+
+uint64_t get_unique_mnt_id(const char *path)
+{
+ struct statx sx;
+ int ret;
+
+ ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
+ if (ret == -1) {
+ ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
+ strerror(errno));
+ return 0;
+ }
+
+ if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
+ ksft_print_msg("no unique mount ID available for %s\n", path);
+ return 0;
+ }
+
+ return sx.stx_mnt_id;
+}
diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h
index 7f1df2a3e94c..70f7ccc607f4 100644
--- a/tools/testing/selftests/filesystems/utils.h
+++ b/tools/testing/selftests/filesystems/utils.h
@@ -27,6 +27,7 @@ extern int caps_down(void);
extern int cap_down(cap_value_t down);
extern bool switch_ids(uid_t uid, gid_t gid);
+extern int setup_userns(void);
static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
{
@@ -42,4 +43,6 @@ static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
return true;
}
+extern uint64_t get_unique_mnt_id(const char *path);
+
#endif /* __IDMAP_UTILS_H */
diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/wrappers.h
index c38bc48e0cfa..420ae4f908cf 100644
--- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h
+++ b/tools/testing/selftests/filesystems/wrappers.h
@@ -9,6 +9,10 @@
#include <linux/mount.h>
#include <sys/syscall.h>
+#ifndef STATX_MNT_ID_UNIQUE
+#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
+#endif
+
static inline int sys_fsopen(const char *fsname, unsigned int flags)
{
return syscall(__NR_fsopen, fsname, flags);
@@ -36,6 +40,28 @@ static inline int sys_mount(const char *src, const char *tgt, const char *fst,
#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
#endif
+#ifndef MOVE_MOUNT_T_EMPTY_PATH
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
+#endif
+
+#ifndef __NR_move_mount
+ #if defined __alpha__
+ #define __NR_move_mount 539
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_move_mount 4429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_move_mount 6429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_move_mount 5429
+ #endif
+ #else
+ #define __NR_move_mount 429
+ #endif
+#endif
+
static inline int sys_move_mount(int from_dfd, const char *from_pathname,
int to_dfd, const char *to_pathname,
unsigned int flags)
@@ -53,7 +79,25 @@ static inline int sys_move_mount(int from_dfd, const char *from_pathname,
#endif
#ifndef AT_RECURSIVE
-#define AT_RECURSIVE 0x8000
+#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+#endif
+
+#ifndef __NR_open_tree
+ #if defined __alpha__
+ #define __NR_open_tree 538
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_open_tree 4428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_open_tree 6428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_open_tree 5428
+ #endif
+ #else
+ #define __NR_open_tree 428
+ #endif
#endif
static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index 49d96bb16355..7c12263f8260 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -6,6 +6,6 @@ TEST_PROGS := ftracetest-ktap
TEST_FILES := test.d settings
EXTRA_CLEAN := $(OUTPUT)/logs/*
-TEST_GEN_PROGS = poll
+TEST_GEN_FILES := poll
include ../lib.mk
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index fbcbdb6963b3..7b24ae89594a 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -1,11 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
+futex_numa_mpol
+futex_priv_hash
+futex_requeue
futex_requeue_pi
futex_requeue_pi_mismatched_ops
futex_requeue_pi_signal_restart
+futex_wait
futex_wait_private_mapped_file
futex_wait_timeout
futex_wait_uninitialized_heap
futex_wait_wouldblock
-futex_wait
-futex_requeue
futex_waitv
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index f79f9bac7918..8cfb87f7f7c5 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
INCLUDES := -I../include -I../../ $(KHDR_INCLUDES)
CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES)
-LDLIBS := -lpthread -lrt
+LDLIBS := -lpthread -lrt -lnuma
LOCAL_HDRS := \
../include/futextest.h \
@@ -17,7 +17,10 @@ TEST_GEN_PROGS := \
futex_wait_private_mapped_file \
futex_wait \
futex_requeue \
- futex_waitv
+ futex_priv_hash \
+ futex_numa_mpol \
+ futex_waitv \
+ futex_numa
TEST_PROGS := run.sh
diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c
new file mode 100644
index 000000000000..f29e4d627e79
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_numa.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <pthread.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <time.h>
+#include <assert.h>
+#include "logging.h"
+#include "futextest.h"
+#include "futex2test.h"
+
+typedef u_int32_t u32;
+typedef int32_t s32;
+typedef u_int64_t u64;
+
+static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
+static int fnode = FUTEX_NO_NODE;
+
+/* fairly stupid test-and-set lock with a waiter flag */
+
+#define N_LOCK 0x0000001
+#define N_WAITERS 0x0001000
+
+struct futex_numa_32 {
+ union {
+ u64 full;
+ struct {
+ u32 val;
+ u32 node;
+ };
+ };
+};
+
+void futex_numa_32_lock(struct futex_numa_32 *lock)
+{
+ for (;;) {
+ struct futex_numa_32 new, old = {
+ .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
+ };
+
+ for (;;) {
+ new = old;
+ if (old.val == 0) {
+ /* no waiter, no lock -> first lock, set no-node */
+ new.node = fnode;
+ }
+ if (old.val & N_LOCK) {
+ /* contention, set waiter */
+ new.val |= N_WAITERS;
+ }
+ new.val |= N_LOCK;
+
+ /* nothing changed, ready to block */
+ if (old.full == new.full)
+ break;
+
+ /*
+ * Use u64 cmpxchg to set the futex value and node in a
+ * consistent manner.
+ */
+ if (__atomic_compare_exchange_n(&lock->full,
+ &old.full, new.full,
+ /* .weak */ false,
+ __ATOMIC_ACQUIRE,
+ __ATOMIC_RELAXED)) {
+
+ /* if we just set N_LOCK, we own it */
+ if (!(old.val & N_LOCK))
+ return;
+
+ /* go block */
+ break;
+ }
+ }
+
+ futex2_wait(lock, new.val, fflags, NULL, 0);
+ }
+}
+
+void futex_numa_32_unlock(struct futex_numa_32 *lock)
+{
+ u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
+ assert((s32)val >= 0);
+ if (val & N_WAITERS) {
+ int woken = futex2_wake(lock, 1, fflags);
+ assert(val == N_WAITERS);
+ if (!woken) {
+ __atomic_compare_exchange_n(&lock->val, &val, 0U,
+ false, __ATOMIC_RELAXED,
+ __ATOMIC_RELAXED);
+ }
+ }
+}
+
+static long nanos = 50000;
+
+struct thread_args {
+ pthread_t tid;
+ volatile int * done;
+ struct futex_numa_32 *lock;
+ int val;
+ int *val1, *val2;
+ int node;
+};
+
+static void *threadfn(void *_arg)
+{
+ struct thread_args *args = _arg;
+ struct timespec ts = {
+ .tv_nsec = nanos,
+ };
+ int node;
+
+ while (!*args->done) {
+
+ futex_numa_32_lock(args->lock);
+ args->val++;
+
+ assert(*args->val1 == *args->val2);
+ (*args->val1)++;
+ nanosleep(&ts, NULL);
+ (*args->val2)++;
+
+ node = args->lock->node;
+ futex_numa_32_unlock(args->lock);
+
+ if (node != args->node) {
+ args->node = node;
+ printf("node: %d\n", node);
+ }
+
+ nanosleep(&ts, NULL);
+ }
+
+ return NULL;
+}
+
+static void *contendfn(void *_arg)
+{
+ struct thread_args *args = _arg;
+
+ while (!*args->done) {
+ /*
+ * futex2_wait() will take hb-lock, verify *var == val and
+ * queue/abort. By knowingly setting val 'wrong' this will
+ * abort and thereby generate hb-lock contention.
+ */
+ futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0);
+ args->val++;
+ }
+
+ return NULL;
+}
+
+static volatile int done = 0;
+static struct futex_numa_32 lock = { .val = 0, };
+static int val1, val2;
+
+int main(int argc, char *argv[])
+{
+ struct thread_args *tas[512], *cas[512];
+ int c, t, threads = 2, contenders = 0;
+ int sleeps = 10;
+ int total = 0;
+
+ while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
+ switch (c) {
+ case 'c':
+ contenders = atoi(optarg);
+ break;
+ case 't':
+ threads = atoi(optarg);
+ break;
+ case 's':
+ sleeps = atoi(optarg);
+ break;
+ case 'n':
+ nanos = atoi(optarg);
+ break;
+ case 'N':
+ fflags |= FUTEX2_NUMA;
+ if (optarg)
+ fnode = atoi(optarg);
+ break;
+ default:
+ exit(1);
+ break;
+ }
+ }
+
+ for (t = 0; t < contenders; t++) {
+ struct thread_args *args = calloc(1, sizeof(*args));
+ if (!args) {
+ perror("thread_args");
+ exit(-1);
+ }
+
+ args->done = &done;
+ args->lock = &lock;
+ args->val1 = &val1;
+ args->val2 = &val2;
+ args->node = -1;
+
+ if (pthread_create(&args->tid, NULL, contendfn, args)) {
+ perror("pthread_create");
+ exit(-1);
+ }
+
+ cas[t] = args;
+ }
+
+ for (t = 0; t < threads; t++) {
+ struct thread_args *args = calloc(1, sizeof(*args));
+ if (!args) {
+ perror("thread_args");
+ exit(-1);
+ }
+
+ args->done = &done;
+ args->lock = &lock;
+ args->val1 = &val1;
+ args->val2 = &val2;
+ args->node = -1;
+
+ if (pthread_create(&args->tid, NULL, threadfn, args)) {
+ perror("pthread_create");
+ exit(-1);
+ }
+
+ tas[t] = args;
+ }
+
+ sleep(sleeps);
+
+ done = true;
+
+ for (t = 0; t < threads; t++) {
+ struct thread_args *args = tas[t];
+
+ pthread_join(args->tid, NULL);
+ total += args->val;
+// printf("tval: %d\n", args->val);
+ }
+ printf("total: %d\n", total);
+
+ if (contenders) {
+ total = 0;
+ for (t = 0; t < contenders; t++) {
+ struct thread_args *args = cas[t];
+
+ pthread_join(args->tid, NULL);
+ total += args->val;
+ // printf("tval: %d\n", args->val);
+ }
+ printf("contenders: %d\n", total);
+ }
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
new file mode 100644
index 000000000000..20a9d3ecf743
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <numa.h>
+#include <numaif.h>
+
+#include <linux/futex.h>
+#include <sys/mman.h>
+
+#include "logging.h"
+#include "futextest.h"
+#include "futex2test.h"
+
+#define MAX_THREADS 64
+
+static pthread_barrier_t barrier_main;
+static pthread_t threads[MAX_THREADS];
+
+struct thread_args {
+ void *futex_ptr;
+ unsigned int flags;
+ int result;
+};
+
+static struct thread_args thread_args[MAX_THREADS];
+
+#ifndef FUTEX_NO_NODE
+#define FUTEX_NO_NODE (-1)
+#endif
+
+#ifndef FUTEX2_MPOL
+#define FUTEX2_MPOL 0x08
+#endif
+
+static void *thread_lock_fn(void *arg)
+{
+ struct thread_args *args = arg;
+ int ret;
+
+ pthread_barrier_wait(&barrier_main);
+ ret = futex2_wait(args->futex_ptr, 0, args->flags, NULL, 0);
+ args->result = ret;
+ return NULL;
+}
+
+static void create_max_threads(void *futex_ptr)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ thread_args[i].futex_ptr = futex_ptr;
+ thread_args[i].flags = FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA;
+ thread_args[i].result = 0;
+ ret = pthread_create(&threads[i], NULL, thread_lock_fn, &thread_args[i]);
+ if (ret)
+ ksft_exit_fail_msg("pthread_create failed\n");
+ }
+}
+
+static void join_max_threads(void)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_join(threads[i], NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_join failed for thread %d\n", i);
+ }
+}
+
+static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flags)
+{
+ int to_wake, ret, i, need_exit = 0;
+
+ pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
+ create_max_threads(futex_ptr);
+ pthread_barrier_wait(&barrier_main);
+ to_wake = MAX_THREADS;
+
+ do {
+ ret = futex2_wake(futex_ptr, to_wake, futex_flags);
+ if (must_fail) {
+ if (ret < 0)
+ break;
+ ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n",
+ to_wake, futex_flags);
+ }
+ if (ret < 0) {
+ ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n",
+ to_wake, futex_flags);
+ }
+ if (!ret)
+ usleep(50);
+ to_wake -= ret;
+
+ } while (to_wake);
+ join_max_threads();
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ if (must_fail && thread_args[i].result != -1) {
+ ksft_print_msg("Thread %d should fail but succeeded (%d)\n",
+ i, thread_args[i].result);
+ need_exit = 1;
+ }
+ if (!must_fail && thread_args[i].result != 0) {
+ ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result);
+ need_exit = 1;
+ }
+ }
+ if (need_exit)
+ ksft_exit_fail_msg("Aborting due to earlier errors.\n");
+}
+
+static void test_futex(void *futex_ptr, int must_fail)
+{
+ __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA);
+}
+
+static void test_futex_mpol(void *futex_ptr, int must_fail)
+{
+ __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
+}
+
+static void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+ struct futex32_numa *futex_numa;
+ int mem_size, i;
+ void *futex_ptr;
+ char c;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ mem_size = sysconf(_SC_PAGE_SIZE);
+ futex_ptr = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ if (futex_ptr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size);
+
+ futex_numa = futex_ptr;
+
+ ksft_print_msg("Regular test\n");
+ futex_numa->futex = 0;
+ futex_numa->numa = FUTEX_NO_NODE;
+ test_futex(futex_ptr, 0);
+
+ if (futex_numa->numa == FUTEX_NO_NODE)
+ ksft_exit_fail_msg("NUMA node is left uninitialized\n");
+
+ ksft_print_msg("Memory too small\n");
+ test_futex(futex_ptr + mem_size - 4, 1);
+
+ ksft_print_msg("Memory out of range\n");
+ test_futex(futex_ptr + mem_size, 1);
+
+ futex_numa->numa = FUTEX_NO_NODE;
+ mprotect(futex_ptr, mem_size, PROT_READ);
+ ksft_print_msg("Memory, RO\n");
+ test_futex(futex_ptr, 1);
+
+ mprotect(futex_ptr, mem_size, PROT_NONE);
+ ksft_print_msg("Memory, no access\n");
+ test_futex(futex_ptr, 1);
+
+ mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE);
+ ksft_print_msg("Memory back to RW\n");
+ test_futex(futex_ptr, 0);
+
+ /* MPOL test. Does not work as expected */
+ for (i = 0; i < 4; i++) {
+ unsigned long nodemask;
+ int ret;
+
+ nodemask = 1 << i;
+ ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask,
+ sizeof(nodemask) * 8, 0);
+ if (ret == 0) {
+ ksft_print_msg("Node %d test\n", i);
+ futex_numa->futex = 0;
+ futex_numa->numa = FUTEX_NO_NODE;
+
+ ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
+ if (ret < 0)
+ ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n");
+ if (0)
+ test_futex_mpol(futex_numa, 0);
+ if (futex_numa->numa != i) {
+ ksft_test_result_fail("Returned NUMA node is %d expected %d\n",
+ futex_numa->numa, i);
+ }
+ }
+ }
+ ksft_test_result_pass("NUMA MPOL tests passed\n");
+ ksft_finished();
+ return 0;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c
new file mode 100644
index 000000000000..2dca18fefedc
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/prctl.h>
+#include <sys/prctl.h>
+
+#include "logging.h"
+
+#define MAX_THREADS 64
+
+static pthread_barrier_t barrier_main;
+static pthread_mutex_t global_lock;
+static pthread_t threads[MAX_THREADS];
+static int counter;
+
+#ifndef PR_FUTEX_HASH
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define FH_FLAG_IMMUTABLE (1ULL << 0)
+# define PR_FUTEX_HASH_GET_SLOTS 2
+# define PR_FUTEX_HASH_GET_IMMUTABLE 3
+#endif
+
+static int futex_hash_slots_set(unsigned int slots, int flags)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags);
+}
+
+static int futex_hash_slots_get(void)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
+}
+
+static int futex_hash_immutable_get(void)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE);
+}
+
+static void futex_hash_slots_set_verify(int slots)
+{
+ int ret;
+
+ ret = futex_hash_slots_set(slots, 0);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set slots to %d: %m\n", slots);
+ ksft_finished();
+ }
+ ret = futex_hash_slots_get();
+ if (ret != slots) {
+ ksft_test_result_fail("Set %d slots but PR_FUTEX_HASH_GET_SLOTS returns: %d, %m\n",
+ slots, ret);
+ ksft_finished();
+ }
+ ksft_test_result_pass("SET and GET slots %d passed\n", slots);
+}
+
+static void futex_hash_slots_set_must_fail(int slots, int flags)
+{
+ int ret;
+
+ ret = futex_hash_slots_set(slots, flags);
+ ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n",
+ slots, flags);
+}
+
+static void *thread_return_fn(void *arg)
+{
+ return NULL;
+}
+
+static void *thread_lock_fn(void *arg)
+{
+ pthread_barrier_wait(&barrier_main);
+
+ pthread_mutex_lock(&global_lock);
+ counter++;
+ usleep(20);
+ pthread_mutex_unlock(&global_lock);
+ return NULL;
+}
+
+static void create_max_threads(void *(*thread_fn)(void *))
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_create(&threads[i], NULL, thread_fn, NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_create failed: %m\n");
+ }
+}
+
+static void join_max_threads(void)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_join(threads[i], NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_join failed for thread %d\n", i);
+ }
+}
+
+static void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -g Test global hash instead intead local immutable \n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n";
+static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n";
+
+int main(int argc, char *argv[])
+{
+ int futex_slots1, futex_slotsn, online_cpus;
+ pthread_mutexattr_t mutex_attr_pi;
+ int use_global_hash = 0;
+ int ret;
+ char c;
+
+ while ((c = getopt(argc, argv, "cghv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'g':
+ use_global_hash = 1;
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(22);
+
+ ret = pthread_mutexattr_init(&mutex_attr_pi);
+ ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT);
+ ret |= pthread_mutex_init(&global_lock, &mutex_attr_pi);
+ if (ret != 0) {
+ ksft_exit_fail_msg("Failed to initialize pthread mutex.\n");
+ }
+ /* First thread, expect to be 0, not yet initialized */
+ ret = futex_hash_slots_get();
+ if (ret != 0)
+ ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret);
+
+ ret = futex_hash_immutable_get();
+ if (ret != 0)
+ ksft_exit_fail_msg("futex_hash_immutable_get() failed: %d, %m\n", ret);
+
+ ksft_test_result_pass("Basic get slots and immutable status.\n");
+ ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_create() failed: %d, %m\n", ret);
+
+ ret = pthread_join(threads[0], NULL);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret);
+
+ /* First thread, has to initialiaze private hash */
+ futex_slots1 = futex_hash_slots_get();
+ if (futex_slots1 <= 0) {
+ ksft_print_msg("Current hash buckets: %d\n", futex_slots1);
+ ksft_exit_fail_msg(test_msg_auto_create);
+ }
+
+ ksft_test_result_pass(test_msg_auto_create);
+
+ online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_barrier_init failed: %m.\n");
+
+ ret = pthread_mutex_lock(&global_lock);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_mutex_lock failed: %m.\n");
+
+ counter = 0;
+ create_max_threads(thread_lock_fn);
+ pthread_barrier_wait(&barrier_main);
+
+ /*
+ * The current default size of hash buckets is 16. The auto increase
+ * works only if more than 16 CPUs are available.
+ */
+ ksft_print_msg("Online CPUs: %d\n", online_cpus);
+ if (online_cpus > 16) {
+ futex_slotsn = futex_hash_slots_get();
+ if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) {
+ ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n",
+ futex_slots1, futex_slotsn);
+ ksft_exit_fail_msg(test_msg_auto_inc);
+ }
+ ksft_test_result_pass(test_msg_auto_inc);
+ } else {
+ ksft_test_result_skip(test_msg_auto_inc);
+ }
+ ret = pthread_mutex_unlock(&global_lock);
+
+ /* Once the user changes it, it has to be what is set */
+ futex_hash_slots_set_verify(2);
+ futex_hash_slots_set_verify(4);
+ futex_hash_slots_set_verify(8);
+ futex_hash_slots_set_verify(32);
+ futex_hash_slots_set_verify(16);
+
+ ret = futex_hash_slots_set(15, 0);
+ ksft_test_result(ret < 0, "Use 15 slots\n");
+
+ futex_hash_slots_set_verify(2);
+ join_max_threads();
+ ksft_test_result(counter == MAX_THREADS, "Created of waited for %d of %d threads\n",
+ counter, MAX_THREADS);
+ counter = 0;
+ /* Once the user set something, auto reisze must be disabled */
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
+
+ create_max_threads(thread_lock_fn);
+ join_max_threads();
+
+ ret = futex_hash_slots_get();
+ ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n",
+ ret);
+
+ futex_hash_slots_set_must_fail(1 << 29, 0);
+
+ /*
+ * Once the private hash has been made immutable or global hash has been requested,
+ * then this requested can not be undone.
+ */
+ if (use_global_hash) {
+ ret = futex_hash_slots_set(0, 0);
+ ksft_test_result(ret == 0, "Global hash request\n");
+ } else {
+ ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE);
+ ksft_test_result(ret == 0, "Immutable resize to 4\n");
+ }
+ if (ret != 0)
+ goto out;
+
+ futex_hash_slots_set_must_fail(4, 0);
+ futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE);
+ futex_hash_slots_set_must_fail(8, 0);
+ futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE);
+ futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE);
+ futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE);
+
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
+ if (ret != 0) {
+ ksft_exit_fail_msg("pthread_barrier_init failed: %m\n");
+ return 1;
+ }
+ create_max_threads(thread_lock_fn);
+ join_max_threads();
+
+ ret = futex_hash_slots_get();
+ if (use_global_hash) {
+ ksft_test_result(ret == 0, "Continue to use global hash\n");
+ } else {
+ ksft_test_result(ret == 4, "Continue to use the 4 hash buckets\n");
+ }
+
+ ret = futex_hash_immutable_get();
+ ksft_test_result(ret == 1, "Hash reports to be immutable\n");
+
+out:
+ ksft_finished();
+ return 0;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 5ccd599da6c3..81739849f299 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -82,3 +82,10 @@ echo
echo
./futex_waitv $COLOR
+
+echo
+./futex_priv_hash $COLOR
+./futex_priv_hash -g $COLOR
+
+echo
+./futex_numa_mpol $COLOR
diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
index 9d305520e849..ea79662405bc 100644
--- a/tools/testing/selftests/futex/include/futex2test.h
+++ b/tools/testing/selftests/futex/include/futex2test.h
@@ -8,6 +8,53 @@
#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
+#ifndef __NR_futex_waitv
+#define __NR_futex_waitv 449
+struct futex_waitv {
+ __u64 val;
+ __u64 uaddr;
+ __u32 flags;
+ __u32 __reserved;
+};
+#endif
+
+#ifndef __NR_futex_wake
+#define __NR_futex_wake 454
+#endif
+
+#ifndef __NR_futex_wait
+#define __NR_futex_wait 455
+#endif
+
+#ifndef FUTEX2_SIZE_U32
+#define FUTEX2_SIZE_U32 0x02
+#endif
+
+#ifndef FUTEX2_NUMA
+#define FUTEX2_NUMA 0x04
+#endif
+
+#ifndef FUTEX2_MPOL
+#define FUTEX2_MPOL 0x08
+#endif
+
+#ifndef FUTEX2_PRIVATE
+#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG
+#endif
+
+#ifndef FUTEX2_NO_NODE
+#define FUTEX_NO_NODE (-1)
+#endif
+
+#ifndef FUTEX_32
+#define FUTEX_32 FUTEX2_SIZE_U32
+#endif
+
+struct futex32_numa {
+ futex_t futex;
+ futex_t numa;
+};
+
/**
* futex_waitv - Wait at multiple futexes, wake on any
* @waiters: Array of waiters
@@ -20,3 +67,26 @@ static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned lon
{
return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
}
+
+/*
+ * futex_wait() - block on uaddr with optional timeout
+ * @val: Expected value
+ * @flags: FUTEX2 flags
+ * @timeout: Relative timeout
+ * @clockid: Clock id for the timeout
+ */
+static inline int futex2_wait(void *uaddr, long val, unsigned int flags,
+ struct timespec *timeout, clockid_t clockid)
+{
+ return syscall(__NR_futex_wait, uaddr, val, ~0U, flags, timeout, clockid);
+}
+
+/*
+ * futex2_wake() - Wake a number of futexes
+ * @nr: Number of threads to wake at most
+ * @flags: FUTEX2 flags
+ */
+static inline int futex2_wake(void *uaddr, int nr, unsigned int flags)
+{
+ return syscall(__NR_futex_wake, uaddr, ~0U, nr, flags);
+}
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index e0884390447d..7bfe315f7001 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := gpio-mockup.sh gpio-sim.sh
+TEST_PROGS := gpio-mockup.sh gpio-sim.sh gpio-aggregator.sh
TEST_FILES := gpio-mockup-sysfs.sh
TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name
CFLAGS += -O2 -g -Wall $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config
index 409a8532facc..1287abeaac7e 100644
--- a/tools/testing/selftests/gpio/config
+++ b/tools/testing/selftests/gpio/config
@@ -2,3 +2,4 @@ CONFIG_GPIOLIB=y
CONFIG_GPIO_CDEV=y
CONFIG_GPIO_MOCKUP=m
CONFIG_GPIO_SIM=m
+CONFIG_GPIO_AGGREGATOR=m
diff --git a/tools/testing/selftests/gpio/gpio-aggregator.sh b/tools/testing/selftests/gpio/gpio-aggregator.sh
new file mode 100755
index 000000000000..9b6f80ad9f8a
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-aggregator.sh
@@ -0,0 +1,727 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 Bartosz Golaszewski <brgl@bgdev.pl>
+# Copyright (C) 2025 Koichiro Den <koichiro.den@canonical.com>
+
+BASE_DIR=$(dirname "$0")
+CONFIGFS_SIM_DIR="/sys/kernel/config/gpio-sim"
+CONFIGFS_AGG_DIR="/sys/kernel/config/gpio-aggregator"
+SYSFS_AGG_DIR="/sys/bus/platform/drivers/gpio-aggregator"
+MODULE="gpio-aggregator"
+
+fail() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test FAIL"
+ exit 1
+}
+
+skip() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test SKIP"
+ exit 4
+}
+
+# gpio-sim
+sim_enable_chip() {
+ local CHIP=$1
+
+ echo 1 > "$CONFIGFS_SIM_DIR/$CHIP/live" || fail "Unable to enable the chip"
+}
+
+sim_disable_chip() {
+ local CHIP=$1
+
+ echo 0 > "$CONFIGFS_SIM_DIR/$CHIP/live" || fail "Unable to disable the chip"
+}
+
+sim_configfs_cleanup() {
+ local NOCHECK=${1:-0}
+
+ for CHIP_DIR in "$CONFIGFS_SIM_DIR"/*; do
+ [ -d "$CHIP_DIR" ] || continue
+ echo 0 > "$CHIP_DIR/live"
+ find "$CHIP_DIR" -depth -type d -exec rmdir {} \;
+ done
+ [ "$NOCHECK" -eq 1 ] && return;
+ remaining=$(find "$CONFIGFS_SIM_DIR" -mindepth 1 -type d 2> /dev/null)
+ if [ -n "$remaining" ]; then
+ fail "Directories remain in $CONFIGFS_SIM_DIR: $remaining"
+ fi
+}
+
+sim_get_chip_label() {
+ local CHIP=$1
+ local BANK=$2
+ local CHIP_NAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name" 2> /dev/null) || \
+ fail "Unable to read the chip name from configfs"
+
+ $BASE_DIR/gpio-chip-info "/dev/$CHIP_NAME" label || \
+ fail "Unable to read the chip label from the character device"
+}
+
+# gpio-aggregator
+agg_create_chip() {
+ local CHIP=$1
+
+ mkdir "$CONFIGFS_AGG_DIR/$CHIP"
+}
+
+agg_remove_chip() {
+ local CHIP=$1
+
+ find "$CONFIGFS_AGG_DIR/$CHIP/" -depth -type d -exec rmdir {} \; || \
+ fail "Unable to remove $CONFIGFS_AGG_DIR/$CHIP"
+}
+
+agg_create_line() {
+ local CHIP=$1
+ local LINE=$2
+
+ mkdir "$CONFIGFS_AGG_DIR/$CHIP/$LINE"
+}
+
+agg_remove_line() {
+ local CHIP=$1
+ local LINE=$2
+
+ rmdir "$CONFIGFS_AGG_DIR/$CHIP/$LINE"
+}
+
+agg_set_key() {
+ local CHIP=$1
+ local LINE=$2
+ local KEY=$3
+
+ echo "$KEY" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/key" || fail "Unable to set the lookup key"
+}
+
+agg_set_offset() {
+ local CHIP=$1
+ local LINE=$2
+ local OFFSET=$3
+
+ echo "$OFFSET" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/offset" || \
+ fail "Unable to set the lookup offset"
+}
+
+agg_set_line_name() {
+ local CHIP=$1
+ local LINE=$2
+ local NAME=$3
+
+ echo "$NAME" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/name" || fail "Unable to set the line name"
+}
+
+agg_enable_chip() {
+ local CHIP=$1
+
+ echo 1 > "$CONFIGFS_AGG_DIR/$CHIP/live" || fail "Unable to enable the chip"
+}
+
+agg_disable_chip() {
+ local CHIP=$1
+
+ echo 0 > "$CONFIGFS_AGG_DIR/$CHIP/live" || fail "Unable to disable the chip"
+}
+
+agg_configfs_cleanup() {
+ local NOCHECK=${1:-0}
+
+ for CHIP_DIR in "$CONFIGFS_AGG_DIR"/*; do
+ [ -d "$CHIP_DIR" ] || continue
+ echo 0 > "$CHIP_DIR/live" 2> /dev/null
+ find "$CHIP_DIR" -depth -type d -exec rmdir {} \;
+ done
+ [ "$NOCHECK" -eq 1 ] && return;
+ remaining=$(find "$CONFIGFS_AGG_DIR" -mindepth 1 -type d 2> /dev/null)
+ if [ -n "$remaining" ]; then
+ fail "Directories remain in $CONFIGFS_AGG_DIR: $remaining"
+ fi
+}
+
+agg_configfs_dev_name() {
+ local CHIP=$1
+
+ cat "$CONFIGFS_AGG_DIR/$CHIP/dev_name" 2> /dev/null || \
+ fail "Unable to read the device name from configfs"
+}
+
+agg_configfs_chip_name() {
+ local CHIP=$1
+ local DEV_NAME=$(agg_configfs_dev_name "$CHIP")
+ local CHIP_LIST=$(find "/sys/devices/platform/$DEV_NAME" \
+ -maxdepth 1 -type d -name "gpiochip[0-9]*" 2> /dev/null)
+ local CHIP_COUNT=$(echo "$CHIP_LIST" | wc -l)
+
+ if [ -z "$CHIP_LIST" ]; then
+ fail "No gpiochip in /sys/devices/platform/$DEV_NAME/"
+ elif [ "$CHIP_COUNT" -ne 1 ]; then
+ fail "Multiple gpiochips unexpectedly found: $CHIP_LIST"
+ fi
+ basename "$CHIP_LIST"
+}
+
+agg_get_chip_num_lines() {
+ local CHIP=$1
+ local N_DIR=$(ls -d $CONFIGFS_AGG_DIR/$CHIP/line[0-9]* 2> /dev/null | wc -l)
+ local N_LINES
+
+ if [ "$(cat $CONFIGFS_AGG_DIR/$CHIP/live)" = 0 ]; then
+ echo "$N_DIR"
+ else
+ N_LINES=$(
+ $BASE_DIR/gpio-chip-info \
+ "/dev/$(agg_configfs_chip_name "$CHIP")" num-lines
+ ) || fail "Unable to read the number of lines from the character device"
+ if [ $N_DIR != $N_LINES ]; then
+ fail "Discrepancy between two sources for the number of lines"
+ fi
+ echo "$N_LINES"
+ fi
+}
+
+agg_get_chip_label() {
+ local CHIP=$1
+
+ $BASE_DIR/gpio-chip-info "/dev/$(agg_configfs_chip_name "$CHIP")" label || \
+ fail "Unable to read the chip label from the character device"
+}
+
+agg_get_line_name() {
+ local CHIP=$1
+ local OFFSET=$2
+ local NAME_CONFIGFS=$(cat "$CONFIGFS_AGG_DIR/$CHIP/line${OFFSET}/name")
+ local NAME_CDEV
+
+ if [ "$(cat "$CONFIGFS_AGG_DIR/$CHIP/live")" = 0 ]; then
+ echo "$NAME_CONFIGFS"
+ else
+ NAME_CDEV=$(
+ $BASE_DIR/gpio-line-name \
+ "/dev/$(agg_configfs_chip_name "$CHIP")" "$OFFSET"
+ ) || fail "Unable to read the line name from the character device"
+ if [ "$NAME_CONFIGFS" != "$NAME_CDEV" ]; then
+ fail "Discrepancy between two sources for the name of line"
+ fi
+ echo "$NAME_CDEV"
+ fi
+}
+
+
+# Load the modules. This will pull in configfs if needed too.
+modprobe gpio-sim || skip "unable to load the gpio-sim module"
+modprobe gpio-aggregator || skip "unable to load the gpio-aggregator module"
+
+# Make sure configfs is mounted at /sys/kernel/config. Wait a bit if needed.
+for IDX in $(seq 5); do
+ if [ "$IDX" -eq "5" ]; then
+ skip "configfs not mounted at /sys/kernel/config"
+ fi
+
+ mountpoint -q /sys/kernel/config && break
+ sleep 0.1
+done
+
+# If the module was already loaded: remove all previous chips
+agg_configfs_cleanup
+sim_configfs_cleanup
+
+trap "exit 1" SIGTERM SIGINT
+trap "agg_configfs_cleanup 1; sim_configfs_cleanup 1" EXIT
+
+# Use gpio-sim chips as the test backend
+for CHIP in $(seq -f "chip%g" 0 1); do
+ mkdir $CONFIGFS_SIM_DIR/$CHIP
+ for BANK in $(seq -f "bank%g" 0 1); do
+ mkdir -p "$CONFIGFS_SIM_DIR/$CHIP/$BANK"
+ echo "${CHIP}_${BANK}" > "$CONFIGFS_SIM_DIR/$CHIP/$BANK/label" || \
+ fail "unable to set the chip label"
+ echo 16 > "$CONFIGFS_SIM_DIR/$CHIP/$BANK/num_lines" || \
+ fail "unable to set the number of lines"
+ for IDX in $(seq 0 15); do
+ LINE_NAME="${CHIP}${BANK}_${IDX}"
+ LINE_DIR="$CONFIGFS_SIM_DIR/$CHIP/$BANK/line$IDX"
+ mkdir -p $LINE_DIR
+ echo "$LINE_NAME" > "$LINE_DIR/name" || fail "unable to set the line name"
+ done
+ done
+ sim_enable_chip "$CHIP"
+done
+
+echo "1. GPIO aggregator creation/deletion"
+
+echo "1.1. Creation/deletion via configfs"
+
+echo "1.1.1. Minimum creation/deletion"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+agg_set_offset agg0 line0 5
+agg_set_line_name agg0 line0 test0
+agg_enable_chip agg0
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label agg0)" = "$(agg_configfs_dev_name agg0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines agg0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name agg0 0)" = "test0" || fail "line name is unset"
+agg_disable_chip agg0
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.2. Complex creation/deletion"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_create_line agg0 line1
+agg_create_line agg0 line2
+agg_create_line agg0 line3
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+agg_set_key agg0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key agg0 line2 "$(sim_get_chip_label chip1 bank0)"
+agg_set_key agg0 line3 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset agg0 line0 1
+agg_set_offset agg0 line1 3
+agg_set_offset agg0 line2 5
+agg_set_offset agg0 line3 7
+agg_set_line_name agg0 line0 test0
+agg_set_line_name agg0 line1 test1
+agg_set_line_name agg0 line2 test2
+agg_set_line_name agg0 line3 test3
+agg_enable_chip agg0
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label agg0)" = "$(agg_configfs_dev_name agg0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines agg0)" = "4" || fail "number of lines is not 1"
+test "$(agg_get_line_name agg0 0)" = "test0" || fail "line name is unset"
+test "$(agg_get_line_name agg0 1)" = "test1" || fail "line name is unset"
+test "$(agg_get_line_name agg0 2)" = "test2" || fail "line name is unset"
+test "$(agg_get_line_name agg0 3)" = "test3" || fail "line name is unset"
+agg_disable_chip agg0
+agg_remove_line agg0 line0
+agg_remove_line agg0 line1
+agg_remove_line agg0 line2
+agg_remove_line agg0 line3
+agg_remove_chip agg0
+
+echo "1.1.3. Can't instantiate a chip without any line"
+agg_create_chip agg0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+agg_remove_chip agg0
+
+echo "1.1.4. Can't instantiate a chip with invalid configuration"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "chipX_bankX"
+agg_set_offset agg0 line0 99
+agg_set_line_name agg0 line0 test0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.5. Can't instantiate a chip asynchronously via deferred probe"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "chip0_bank0"
+agg_set_offset agg0 line0 5
+agg_set_line_name agg0 line0 test0
+sim_disable_chip chip0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+sim_enable_chip chip0
+sleep 1
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || \
+ fail "chip unexpectedly transitioned to 'live' state"
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.6. Can't instantiate a chip with _sysfs prefix"
+mkdir "$CONFIGFS_AGG_DIR/_sysfs" 2> /dev/null && fail "chip _sysfs unexpectedly created"
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.foo" 2> /dev/null && fail "chip _sysfs.foo unexpectedly created"
+
+echo "1.2. Creation/deletion via sysfs"
+
+echo "1.2.1. Minimum creation/deletion"
+echo "chip0_bank0 0" > "$SYSFS_AGG_DIR/new_device"
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name is unset"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.2. Complex creation/deletion"
+echo "chip0bank0_0 chip1_bank1 10-11" > "$SYSFS_AGG_DIR/new_device"
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "3" || fail "number of lines is not 3"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 1)" = "" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 2)" = "" || fail "line name is unset"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.3. Asynchronous creation with deferred probe"
+sim_disable_chip chip0
+echo 'chip0_bank0 0' > $SYSFS_AGG_DIR/new_device
+sleep 1
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 0 || fail "chip unexpectedly alive"
+sim_enable_chip chip0
+sleep 1
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly remains dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name unexpectedly set"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.4. Can't instantiate a chip with invalid configuration"
+echo "xyz 0" > "$SYSFS_AGG_DIR/new_device"
+test "$(cat $CONFIGFS_AGG_DIR/_sysfs.0/live)" = 0 || fail "chip unexpectedly alive"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+
+echo "2. GPIO aggregator configuration"
+
+echo "2.1. Configuring aggregators instantiated via configfs"
+setup_2_1() {
+ agg_create_chip agg0
+ agg_create_line agg0 line0
+ agg_create_line agg0 line1
+ agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+ agg_set_key agg0 line1 "$(sim_get_chip_label chip1 bank0)"
+ agg_set_offset agg0 line0 1
+ agg_set_offset agg0 line1 3
+ agg_set_line_name agg0 line0 test0
+ agg_set_line_name agg0 line1 test1
+ agg_enable_chip agg0
+}
+teardown_2_1() {
+ agg_configfs_cleanup
+}
+
+echo "2.1.1. While offline"
+
+echo "2.1.1.1. Line can be added/removed"
+setup_2_1
+agg_disable_chip agg0
+agg_create_line agg0 line2
+agg_set_key agg0 line2 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line2 5
+agg_enable_chip agg0
+test "$(agg_get_chip_num_lines agg0)" = "3" || fail "number of lines is not 1"
+teardown_2_1
+
+echo "2.1.1.2. Line key can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key agg0 line1 "$(sim_get_chip_label chip1 bank1)"
+agg_enable_chip agg0
+teardown_2_1
+
+echo "2.1.1.3. Line name can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_line_name agg0 line0 new0
+agg_set_line_name agg0 line1 new1
+agg_enable_chip agg0
+test "$(agg_get_line_name agg0 0)" = "new0" || fail "line name is unset"
+test "$(agg_get_line_name agg0 1)" = "new1" || fail "line name is unset"
+teardown_2_1
+
+echo "2.1.1.4. Line offset can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_offset agg0 line0 5
+agg_set_offset agg0 line1 7
+agg_enable_chip agg0
+teardown_2_1
+
+echo "2.1.1.5. Can re-enable a chip after valid reconfiguration"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset agg0 line0 15
+agg_set_key agg0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line0 14
+agg_create_line agg0 line2
+agg_set_key agg0 line2 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line2 13
+agg_enable_chip agg0
+test "$(agg_get_chip_num_lines agg0)" = "3" || fail "number of lines is not 1"
+teardown_2_1
+
+echo "2.1.1.7. Can't re-enable a chip with invalid reconfiguration"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 invalidkey
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_1
+setup_2_1
+agg_disable_chip agg0
+agg_set_offset agg0 line0 99
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_1
+
+echo "2.1.2. While online"
+
+echo "2.1.2.1. Can't add/remove line"
+setup_2_1
+mkdir "$CONFIGFS_AGG_DIR/agg0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/agg0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_1
+
+echo "2.1.2.2. Can't modify line key"
+setup_2_1
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/agg0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_1
+
+echo "2.1.2.3. Can't modify line name"
+setup_2_1
+echo "new0" > "$CONFIGFS_AGG_DIR/agg0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_1
+
+echo "2.1.2.4. Can't modify line offset"
+setup_2_1
+echo "5" > "$CONFIGFS_AGG_DIR/agg0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_1
+
+echo "2.2. Configuring aggregators instantiated via sysfs"
+setup_2_2() {
+ echo "chip0_bank0 1 chip1_bank0 3" > "$SYSFS_AGG_DIR/new_device"
+}
+teardown_2_2() {
+ echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+}
+
+echo "2.2.1. While online"
+
+echo "2.2.1.1. Can toggle live"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.1.2. Can't add/remove line"
+setup_2_2
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+
+echo "2.2.1.3. Can't modify line key"
+setup_2_2
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_2
+
+echo "2.2.1.4. Can't modify line name"
+setup_2_2
+echo "new0" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_2
+
+echo "2.2.1.5. Can't modify line offset"
+setup_2_2
+echo "5" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_2
+
+echo "2.2.2. While waiting for deferred probe"
+
+echo "2.2.2.1. Can't add/remove line despite live = 0"
+sim_disable_chip chip0
+setup_2_2
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.2. Can't modify line key"
+sim_disable_chip chip0
+setup_2_2
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.3. Can't modify line name"
+sim_disable_chip chip0
+setup_2_2
+echo "new0" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.4. Can't modify line offset"
+sim_disable_chip chip0
+setup_2_2
+echo 5 > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.5. Can't toggle live"
+sim_disable_chip chip0
+setup_2_2
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 0 || fail "chip unexpectedly alive"
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.3. While offline"
+
+echo "2.2.3.1. Can't add/remove line despite live = 0"
+setup_2_2
+agg_disable_chip _sysfs.0
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+
+echo "2.2.3.2. Line key can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key _sysfs.0 line1 "$(sim_get_chip_label chip1 bank1)"
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.3. Line name can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_line_name _sysfs.0 line0 new0
+agg_set_line_name _sysfs.0 line1 new1
+agg_enable_chip _sysfs.0
+test "$(agg_get_line_name _sysfs.0 0)" = "new0" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 1)" = "new1" || fail "line name is unset"
+teardown_2_2
+
+echo "2.2.3.4. Line offset can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_offset _sysfs.0 line0 5
+agg_set_offset _sysfs.0 line1 7
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.5. Can re-enable a chip with valid reconfiguration"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset _sysfs.0 line0 15
+agg_set_key _sysfs.0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset _sysfs.0 line0 14
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.6. Can't re-enable a chip with invalid reconfiguration"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 invalidkey
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_offset _sysfs.0 line0 99
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+
+echo "3. Module unload"
+
+echo "3.1. Can't unload module if there is at least one device created via configfs"
+agg_create_chip agg0
+modprobe -r gpio-aggregator 2> /dev/null
+test -d /sys/module/gpio_aggregator || fail "module unexpectedly unloaded"
+agg_remove_chip agg0
+
+echo "3.2. Can unload module if there is no device created via configfs"
+echo "chip0_bank0 1 chip1_bank0 3" > "$SYSFS_AGG_DIR/new_device"
+modprobe -r gpio-aggregator 2> /dev/null
+test -d /sys/module/gpio_aggregator && fail "module unexpectedly remains to be loaded"
+modprobe gpio-aggregator 2> /dev/null
+
+echo "4. GPIO forwarder functional"
+SETTINGS="chip0:bank0:2 chip0:bank1:4 chip1:bank0:6 chip1:bank1:8"
+setup_4() {
+ local OFFSET=0
+ agg_create_chip agg0
+ for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ agg_create_line agg0 "line${OFFSET}"
+ agg_set_key agg0 "line${OFFSET}" "$(sim_get_chip_label "$CHIP" "$BANK")"
+ agg_set_offset agg0 "line${OFFSET}" "$LINE"
+ OFFSET=$(expr $OFFSET + 1)
+ done
+ agg_enable_chip agg0
+}
+teardown_4() {
+ agg_configfs_cleanup
+}
+
+echo "4.1. Forwarding set values"
+setup_4
+OFFSET=0
+for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ DEVNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/dev_name")
+ CHIPNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name")
+ VAL_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio${LINE}/value"
+ test $(cat $VAL_PATH) = "0" || fail "incorrect value read from sysfs"
+ $BASE_DIR/gpio-mockup-cdev -s 1 "/dev/$(agg_configfs_chip_name agg0)" "$OFFSET" &
+ mock_pid=$!
+ sleep 0.1 # FIXME Any better way?
+ test "$(cat $VAL_PATH)" = "1" || fail "incorrect value read from sysfs"
+ kill "$mock_pid"
+ OFFSET=$(expr $OFFSET + 1)
+done
+teardown_4
+
+echo "4.2. Forwarding set config"
+setup_4
+OFFSET=0
+for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ DEVNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/dev_name")
+ CHIPNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name")
+ VAL_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio${LINE}/value"
+ $BASE_DIR/gpio-mockup-cdev -b pull-up "/dev/$(agg_configfs_chip_name agg0)" "$OFFSET"
+ test $(cat "$VAL_PATH") = "1" || fail "incorrect value read from sysfs"
+ OFFSET=$(expr $OFFSET + 1)
+done
+teardown_4
+
+echo "5. Race condition verification"
+
+echo "5.1. Stress test of new_device/delete_device and module load/unload"
+for _ in $(seq 1000); do
+ {
+ echo "dummy 0" > "$SYSFS_AGG_DIR/new_device"
+ cat "$CONFIGFS_AGG_DIR/_sysfs.0/dev_name" > "$SYSFS_AGG_DIR/delete_device"
+ } 2> /dev/null
+done &
+writer_pid=$!
+while kill -0 "$writer_pid" 2> /dev/null; do
+ {
+ modprobe gpio-aggregator
+ modprobe -r gpio-aggregator
+ } 2> /dev/null
+done
+
+echo "GPIO $MODULE test PASS"
diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile
index 67fe7a46cb62..e3000ccb9a5d 100644
--- a/tools/testing/selftests/kexec/Makefile
+++ b/tools/testing/selftests/kexec/Makefile
@@ -8,6 +8,13 @@ ifeq ($(ARCH_PROCESSED),$(filter $(ARCH_PROCESSED),x86 ppc64le))
TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh
TEST_FILES := kexec_common_lib.sh
+include ../../../scripts/Makefile.arch
+
+ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86)
+TEST_PROGS += test_kexec_jump.sh
+test_kexec_jump.sh: $(OUTPUT)/test_kexec_jump
+endif
+
include ../lib.mk
endif
diff --git a/tools/testing/selftests/kexec/test_kexec_jump.c b/tools/testing/selftests/kexec/test_kexec_jump.c
new file mode 100644
index 000000000000..fbce287866f5
--- /dev/null
+++ b/tools/testing/selftests/kexec/test_kexec_jump.c
@@ -0,0 +1,72 @@
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/kexec.h>
+#include <linux/reboot.h>
+#include <sys/reboot.h>
+#include <sys/syscall.h>
+
+asm(
+ " .code64\n"
+ " .data\n"
+ "purgatory_start:\n"
+
+ // Trigger kexec debug exception handling
+ " int3\n"
+
+ // Set load address for next time
+ " leaq purgatory_start_b(%rip), %r11\n"
+ " movq %r11, 8(%rsp)\n"
+
+ // Back to Linux
+ " ret\n"
+
+ // Same again
+ "purgatory_start_b:\n"
+
+ // Trigger kexec debug exception handling
+ " int3\n"
+
+ // Set load address for next time
+ " leaq purgatory_start(%rip), %r11\n"
+ " movq %r11, 8(%rsp)\n"
+
+ // Back to Linux
+ " ret\n"
+
+ "purgatory_end:\n"
+ ".previous"
+);
+extern char purgatory_start[], purgatory_end[];
+
+int main (void)
+{
+ struct kexec_segment segment = {};
+ int ret;
+
+ segment.buf = purgatory_start;
+ segment.bufsz = purgatory_end - purgatory_start;
+ segment.mem = (void *)0x400000;
+ segment.memsz = 0x1000;
+ ret = syscall(__NR_kexec_load, 0x400000, 1, &segment, KEXEC_PRESERVE_CONTEXT);
+ if (ret) {
+ perror("kexec_load");
+ exit(1);
+ }
+
+ ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+ if (ret) {
+ perror("kexec reboot");
+ exit(1);
+ }
+
+ ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+ if (ret) {
+ perror("kexec reboot");
+ exit(1);
+ }
+ printf("Success\n");
+ return 0;
+}
+
diff --git a/tools/testing/selftests/kexec/test_kexec_jump.sh b/tools/testing/selftests/kexec/test_kexec_jump.sh
new file mode 100755
index 000000000000..6ae977054ba2
--- /dev/null
+++ b/tools/testing/selftests/kexec/test_kexec_jump.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Prevent loading a kernel image via the kexec_load syscall when
+# signatures are required. (Dependent on CONFIG_IMA_ARCH_POLICY.)
+
+TEST="$0"
+. ./kexec_common_lib.sh
+
+# kexec requires root privileges
+require_root_privileges
+
+# get the kernel config
+get_kconfig
+
+kconfig_enabled "CONFIG_KEXEC_JUMP=y" "kexec_jump is enabled"
+if [ $? -eq 0 ]; then
+ log_skip "kexec_jump is not enabled"
+fi
+
+kconfig_enabled "CONFIG_IMA_APPRAISE=y" "IMA enabled"
+ima_appraise=$?
+
+kconfig_enabled "CONFIG_IMA_ARCH_POLICY=y" \
+ "IMA architecture specific policy enabled"
+arch_policy=$?
+
+get_secureboot_mode
+secureboot=$?
+
+if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ]; then
+ log_skip "Secure boot and CONFIG_IMA_ARCH_POLICY are enabled"
+fi
+
+./test_kexec_jump
+if [ $? -eq 0 ]; then
+ log_pass "kexec_jump succeeded"
+else
+ # The more likely failure mode if anything went wrong is that the
+ # kernel just crashes. But if we get back here, sure, whine anyway.
+ log_fail "kexec_jump failed"
+fi
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 666c9fde76da..2925e47db995 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -56,6 +56,8 @@
#include <asm/types.h>
#include <ctype.h>
#include <errno.h>
+#include <linux/unistd.h>
+#include <poll.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -65,7 +67,6 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
-#include <setjmp.h>
#include "kselftest.h"
@@ -172,14 +173,11 @@
#define __TEST_IMPL(test_name, _signal) \
static void test_name(struct __test_metadata *_metadata); \
- static inline void wrapper_##test_name( \
+ static void wrapper_##test_name( \
struct __test_metadata *_metadata, \
- struct __fixture_variant_metadata *variant) \
+ struct __fixture_variant_metadata __attribute__((unused)) *variant) \
{ \
- _metadata->setup_completed = true; \
- if (setjmp(_metadata->env) == 0) \
- test_name(_metadata); \
- __test_check_assert(_metadata); \
+ test_name(_metadata); \
} \
static struct __test_metadata _##test_name##_object = \
{ .name = #test_name, \
@@ -258,7 +256,7 @@
* A bare "return;" statement may be used to return early.
*/
#define FIXTURE_SETUP(fixture_name) \
- void fixture_name##_setup( \
+ static void fixture_name##_setup( \
struct __test_metadata __attribute__((unused)) *_metadata, \
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
const FIXTURE_VARIANT(fixture_name) \
@@ -307,7 +305,7 @@
__FIXTURE_TEARDOWN(fixture_name)
#define __FIXTURE_TEARDOWN(fixture_name) \
- void fixture_name##_teardown( \
+ static void fixture_name##_teardown( \
struct __test_metadata __attribute__((unused)) *_metadata, \
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
const FIXTURE_VARIANT(fixture_name) \
@@ -401,7 +399,7 @@
struct __test_metadata *_metadata, \
FIXTURE_DATA(fixture_name) *self, \
const FIXTURE_VARIANT(fixture_name) *variant); \
- static inline void wrapper_##fixture_name##_##test_name( \
+ static void wrapper_##fixture_name##_##test_name( \
struct __test_metadata *_metadata, \
struct __fixture_variant_metadata *variant) \
{ \
@@ -410,9 +408,9 @@
pid_t child = 1; \
int status = 0; \
/* Makes sure there is only one teardown, even when child forks again. */ \
- bool *teardown = mmap(NULL, sizeof(*teardown), \
+ _metadata->no_teardown = mmap(NULL, sizeof(*_metadata->no_teardown), \
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
- *teardown = false; \
+ *_metadata->no_teardown = true; \
if (sizeof(*self) > 0) { \
if (fixture_name##_teardown_parent) { \
self = mmap(NULL, sizeof(*self), PROT_READ | PROT_WRITE, \
@@ -422,31 +420,26 @@
self = &self_private; \
} \
} \
- if (setjmp(_metadata->env) == 0) { \
- /* _metadata and potentially self are shared with all forks. */ \
- child = fork(); \
- if (child == 0) { \
- fixture_name##_setup(_metadata, self, variant->data); \
- /* Let setup failure terminate early. */ \
- if (_metadata->exit_code) \
- _exit(0); \
- _metadata->setup_completed = true; \
- fixture_name##_##test_name(_metadata, self, variant->data); \
- } else if (child < 0 || child != waitpid(child, &status, 0)) { \
- ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
- _metadata->exit_code = KSFT_FAIL; \
- } \
- } \
+ _metadata->variant = variant->data; \
+ _metadata->self = self; \
+ /* _metadata and potentially self are shared with all forks. */ \
+ child = fork(); \
if (child == 0) { \
- if (_metadata->setup_completed && !fixture_name##_teardown_parent && \
- __sync_bool_compare_and_swap(teardown, false, true)) \
- fixture_name##_teardown(_metadata, self, variant->data); \
+ fixture_name##_setup(_metadata, self, variant->data); \
+ /* Let setup failure terminate early. */ \
+ if (_metadata->exit_code) \
+ _exit(0); \
+ *_metadata->no_teardown = false; \
+ fixture_name##_##test_name(_metadata, self, variant->data); \
+ _metadata->teardown_fn(false, _metadata, self, variant->data); \
_exit(0); \
+ } else if (child < 0 || child != waitpid(child, &status, 0)) { \
+ ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
+ _metadata->exit_code = KSFT_FAIL; \
} \
- if (_metadata->setup_completed && fixture_name##_teardown_parent && \
- __sync_bool_compare_and_swap(teardown, false, true)) \
- fixture_name##_teardown(_metadata, self, variant->data); \
- munmap(teardown, sizeof(*teardown)); \
+ _metadata->teardown_fn(true, _metadata, self, variant->data); \
+ munmap(_metadata->no_teardown, sizeof(*_metadata->no_teardown)); \
+ _metadata->no_teardown = NULL; \
if (self && fixture_name##_teardown_parent) \
munmap(self, sizeof(*self)); \
if (WIFEXITED(status)) { \
@@ -456,7 +449,14 @@
/* Forward signal to __wait_for_test(). */ \
kill(getpid(), WTERMSIG(status)); \
} \
- __test_check_assert(_metadata); \
+ } \
+ static void wrapper_##fixture_name##_##test_name##_teardown( \
+ bool in_parent, struct __test_metadata *_metadata, \
+ void *self, const void *variant) \
+ { \
+ if (fixture_name##_teardown_parent == in_parent && \
+ !__atomic_test_and_set(_metadata->no_teardown, __ATOMIC_RELAXED)) \
+ fixture_name##_teardown(_metadata, self, variant); \
} \
static struct __test_metadata *_##fixture_name##_##test_name##_object; \
static void __attribute__((constructor)) \
@@ -467,6 +467,7 @@
object->name = #test_name; \
object->fn = &wrapper_##fixture_name##_##test_name; \
object->fixture = &_##fixture_name##_fixture_object; \
+ object->teardown_fn = &wrapper_##fixture_name##_##test_name##_teardown; \
object->termsig = signal; \
object->timeout = tmout; \
_##fixture_name##_##test_name##_object = object; \
@@ -910,14 +911,16 @@ struct __test_metadata {
struct __fixture_variant_metadata *);
pid_t pid; /* pid of test when being run */
struct __fixture_metadata *fixture;
+ void (*teardown_fn)(bool in_parent, struct __test_metadata *_metadata,
+ void *self, const void *variant);
int termsig;
int exit_code;
int trigger; /* extra handler after the evaluation */
int timeout; /* seconds to wait for test timeout */
- bool timed_out; /* did this test timeout instead of exiting? */
bool aborted; /* stopped test due to failed ASSERT */
- bool setup_completed; /* did setup finish? */
- jmp_buf env; /* for exiting out of test early */
+ bool *no_teardown; /* fixture needs teardown */
+ void *self;
+ const void *variant;
struct __test_results *results;
struct __test_metadata *prev, *next;
};
@@ -951,88 +954,60 @@ static inline int __bail(int for_realz, struct __test_metadata *t)
{
/* if this is ASSERT, return immediately. */
if (for_realz) {
- t->aborted = true;
- longjmp(t->env, 1);
+ if (t->teardown_fn)
+ t->teardown_fn(false, t, t->self, t->variant);
+ abort();
}
/* otherwise, end the for loop and continue. */
return 0;
}
-static inline void __test_check_assert(struct __test_metadata *t)
-{
- if (t->aborted)
- abort();
-}
-
-struct __test_metadata *__active_test;
-static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
-{
- struct __test_metadata *t = __active_test;
-
- /* Sanity check handler execution environment. */
- if (!t) {
- fprintf(TH_LOG_STREAM,
- "# no active test in SIGALRM handler!?\n");
- abort();
- }
- if (sig != SIGALRM || sig != info->si_signo) {
- fprintf(TH_LOG_STREAM,
- "# %s: SIGALRM handler caught signal %d!?\n",
- t->name, sig != SIGALRM ? sig : info->si_signo);
- abort();
- }
-
- t->timed_out = true;
- // signal process group
- kill(-(t->pid), SIGKILL);
-}
-
-void __wait_for_test(struct __test_metadata *t)
+static void __wait_for_test(struct __test_metadata *t)
{
- struct sigaction action = {
- .sa_sigaction = __timeout_handler,
- .sa_flags = SA_SIGINFO,
- };
- struct sigaction saved_action;
/*
* Sets status so that WIFEXITED(status) returns true and
* WEXITSTATUS(status) returns KSFT_FAIL. This safe default value
* should never be evaluated because of the waitpid(2) check and
- * SIGALRM handling.
+ * timeout handling.
*/
int status = KSFT_FAIL << 8;
- int child;
+ struct pollfd poll_child;
+ int ret, child, childfd;
+ bool timed_out = false;
- if (sigaction(SIGALRM, &action, &saved_action)) {
+ childfd = syscall(__NR_pidfd_open, t->pid, 0);
+ if (childfd == -1) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: unable to install SIGALRM handler\n",
+ "# %s: unable to open pidfd\n",
t->name);
return;
}
- __active_test = t;
- t->timed_out = false;
- alarm(t->timeout);
- child = waitpid(t->pid, &status, 0);
- if (child == -1 && errno != EINTR) {
+
+ poll_child.fd = childfd;
+ poll_child.events = POLLIN;
+ ret = poll(&poll_child, 1, t->timeout * 1000);
+ if (ret == -1) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: Failed to wait for PID %d (errno: %d)\n",
- t->name, t->pid, errno);
+ "# %s: unable to wait on child pidfd\n",
+ t->name);
return;
+ } else if (ret == 0) {
+ timed_out = true;
+ /* signal process group */
+ kill(-(t->pid), SIGKILL);
}
-
- alarm(0);
- if (sigaction(SIGALRM, &saved_action, NULL)) {
+ child = waitpid(t->pid, &status, WNOHANG);
+ if (child == -1 && errno != EINTR) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: unable to uninstall SIGALRM handler\n",
- t->name);
+ "# %s: Failed to wait for PID %d (errno: %d)\n",
+ t->name, t->pid, errno);
return;
}
- __active_test = NULL;
- if (t->timed_out) {
+ if (timed_out) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test terminated by timeout\n", t->name);
@@ -1205,9 +1180,9 @@ static bool test_enabled(int argc, char **argv,
return !has_positive;
}
-void __run_test(struct __fixture_metadata *f,
- struct __fixture_variant_metadata *variant,
- struct __test_metadata *t)
+static void __run_test(struct __fixture_metadata *f,
+ struct __fixture_variant_metadata *variant,
+ struct __test_metadata *t)
{
struct __test_xfail *xfail;
char test_name[1024];
@@ -1218,8 +1193,7 @@ void __run_test(struct __fixture_metadata *f,
t->exit_code = KSFT_PASS;
t->trigger = 0;
t->aborted = false;
- t->setup_completed = false;
- memset(t->env, 0, sizeof(t->env));
+ t->no_teardown = NULL;
memset(t->results->reason, 0, sizeof(t->results->reason));
snprintf(test_name, sizeof(test_name), "%s%s%s.%s",
diff --git a/tools/testing/selftests/kselftest_harness/.gitignore b/tools/testing/selftests/kselftest_harness/.gitignore
new file mode 100644
index 000000000000..e4e476a333c9
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/.gitignore
@@ -0,0 +1,2 @@
+/harness-selftest
+/harness-selftest.seen
diff --git a/tools/testing/selftests/kselftest_harness/Makefile b/tools/testing/selftests/kselftest_harness/Makefile
new file mode 100644
index 000000000000..0617535a6ce4
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS_EXTENDED := harness-selftest
+TEST_PROGS := harness-selftest.sh
+EXTRA_CLEAN := harness-selftest.seen
+
+include ../lib.mk
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.c b/tools/testing/selftests/kselftest_harness/harness-selftest.c
new file mode 100644
index 000000000000..b555493bdb4d
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+
+#include <sys/resource.h>
+#include <sys/prctl.h>
+
+/* Avoid any inconsistencies */
+#define TH_LOG_STREAM stdout
+
+#include "../kselftest_harness.h"
+
+static void test_helper(struct __test_metadata *_metadata)
+{
+ ASSERT_EQ(0, 0);
+}
+
+TEST(standalone_pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ EXPECT_EQ(0, 0);
+ test_helper(_metadata);
+ TH_LOG("after");
+}
+
+TEST(standalone_fail) {
+ TH_LOG("before");
+ EXPECT_EQ(0, 0);
+ EXPECT_EQ(0, 1);
+ ASSERT_EQ(0, 1);
+ TH_LOG("after");
+}
+
+TEST_SIGNAL(signal_pass, SIGUSR1) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+ kill(getpid(), SIGUSR1);
+}
+
+TEST_SIGNAL(signal_fail, SIGUSR1) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 1);
+ TH_LOG("after");
+ kill(getpid(), SIGUSR1);
+}
+
+FIXTURE(fixture) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+}
+
+FIXTURE_TEARDOWN(fixture) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ test_helper(_metadata);
+ standalone_pass(_metadata);
+ TH_LOG("after");
+}
+
+TEST_F(fixture, fail) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 1);
+ fixture_pass(_metadata, self, variant);
+ TH_LOG("after");
+}
+
+TEST_F_TIMEOUT(fixture, timeout, 1) {
+ TH_LOG("before");
+ sleep(2);
+ TH_LOG("after");
+}
+
+FIXTURE(fixture_parent) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture_parent) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+}
+
+FIXTURE_TEARDOWN_PARENT(fixture_parent) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture_parent, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+}
+
+FIXTURE(fixture_setup_failure) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture_setup_failure) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+ ASSERT_EQ(0, 1);
+}
+
+FIXTURE_TEARDOWN(fixture_setup_failure) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture_setup_failure, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+}
+
+int main(int argc, char **argv)
+{
+ /*
+ * The harness uses abort() to signal assertion failures, which triggers coredumps.
+ * This may be useful to debug real failures but not for this selftest, disable them.
+ */
+ struct rlimit rlimit = {
+ .rlim_cur = 0,
+ .rlim_max = 0,
+ };
+
+ prctl(PR_SET_DUMPABLE, 0, 0, 0, 0);
+ setrlimit(RLIMIT_CORE, &rlimit);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.expected b/tools/testing/selftests/kselftest_harness/harness-selftest.expected
new file mode 100644
index 000000000000..97e1418c1c7e
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.expected
@@ -0,0 +1,64 @@
+TAP version 13
+1..9
+# Starting 9 tests from 4 test cases.
+# RUN global.standalone_pass ...
+# harness-selftest.c:19:standalone_pass:before
+# harness-selftest.c:23:standalone_pass:after
+# OK global.standalone_pass
+ok 1 global.standalone_pass
+# RUN global.standalone_fail ...
+# harness-selftest.c:27:standalone_fail:before
+# harness-selftest.c:29:standalone_fail:Expected 0 (0) == 1 (1)
+# harness-selftest.c:30:standalone_fail:Expected 0 (0) == 1 (1)
+# standalone_fail: Test terminated by assertion
+# FAIL global.standalone_fail
+not ok 2 global.standalone_fail
+# RUN global.signal_pass ...
+# harness-selftest.c:35:signal_pass:before
+# harness-selftest.c:37:signal_pass:after
+# OK global.signal_pass
+ok 3 global.signal_pass
+# RUN global.signal_fail ...
+# harness-selftest.c:42:signal_fail:before
+# harness-selftest.c:43:signal_fail:Expected 0 (0) == 1 (1)
+# signal_fail: Test terminated by assertion
+# FAIL global.signal_fail
+not ok 4 global.signal_fail
+# RUN fixture.pass ...
+# harness-selftest.c:53:pass:setup
+# harness-selftest.c:62:pass:before
+# harness-selftest.c:19:pass:before
+# harness-selftest.c:23:pass:after
+# harness-selftest.c:66:pass:after
+# harness-selftest.c:58:pass:teardown same-process=1
+# OK fixture.pass
+ok 5 fixture.pass
+# RUN fixture.fail ...
+# harness-selftest.c:53:fail:setup
+# harness-selftest.c:70:fail:before
+# harness-selftest.c:71:fail:Expected 0 (0) == 1 (1)
+# harness-selftest.c:58:fail:teardown same-process=1
+# fail: Test terminated by assertion
+# FAIL fixture.fail
+not ok 6 fixture.fail
+# RUN fixture.timeout ...
+# harness-selftest.c:53:timeout:setup
+# harness-selftest.c:77:timeout:before
+# timeout: Test terminated by timeout
+# FAIL fixture.timeout
+not ok 7 fixture.timeout
+# RUN fixture_parent.pass ...
+# harness-selftest.c:87:pass:setup
+# harness-selftest.c:96:pass:before
+# harness-selftest.c:98:pass:after
+# harness-selftest.c:92:pass:teardown same-process=0
+# OK fixture_parent.pass
+ok 8 fixture_parent.pass
+# RUN fixture_setup_failure.pass ...
+# harness-selftest.c:106:pass:setup
+# harness-selftest.c:108:pass:Expected 0 (0) == 1 (1)
+# pass: Test terminated by assertion
+# FAIL fixture_setup_failure.pass
+not ok 9 fixture_setup_failure.pass
+# FAILED: 4 / 9 tests passed.
+# Totals: pass:4 fail:5 xfail:0 xpass:0 skip:0 error:0
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.sh b/tools/testing/selftests/kselftest_harness/harness-selftest.sh
new file mode 100755
index 000000000000..fe72d16370fe
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Selftest for kselftest_harness.h
+#
+
+set -e
+
+DIR="$(dirname $(readlink -f "$0"))"
+
+"$DIR"/harness-selftest > harness-selftest.seen || true
+
+diff -u "$DIR"/harness-selftest.expected harness-selftest.seen
diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile
index 0c0d7b1234c1..4d4f810cdf2c 100644
--- a/tools/testing/selftests/mount_setattr/Makefile
+++ b/tools/testing/selftests/mount_setattr/Makefile
@@ -2,6 +2,8 @@
# Makefile for mount selftests.
CFLAGS = -g $(KHDR_INCLUDES) -Wall -O2 -pthread
+LOCAL_HDRS += ../filesystems/wrappers.h
+
TEST_GEN_PROGS := mount_setattr_test
include ../lib.mk
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 48a000cabc97..8b378c91debf 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -20,7 +20,7 @@
#include <stdarg.h>
#include <linux/mount.h>
-#include "../filesystems/overlayfs/wrappers.h"
+#include "../filesystems/wrappers.h"
#include "../kselftest_harness.h"
#ifndef CLONE_NEWNS
@@ -107,46 +107,6 @@
#endif
#endif
-#ifndef __NR_open_tree
- #if defined __alpha__
- #define __NR_open_tree 538
- #elif defined _MIPS_SIM
- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
- #define __NR_open_tree 4428
- #endif
- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
- #define __NR_open_tree 6428
- #endif
- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
- #define __NR_open_tree 5428
- #endif
- #elif defined __ia64__
- #define __NR_open_tree (428 + 1024)
- #else
- #define __NR_open_tree 428
- #endif
-#endif
-
-#ifndef __NR_move_mount
- #if defined __alpha__
- #define __NR_move_mount 539
- #elif defined _MIPS_SIM
- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
- #define __NR_move_mount 4429
- #endif
- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
- #define __NR_move_mount 6429
- #endif
- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
- #define __NR_move_mount 5429
- #endif
- #elif defined __ia64__
- #define __NR_move_mount (428 + 1024)
- #else
- #define __NR_move_mount 429
- #endif
-#endif
-
#ifndef MOUNT_ATTR_IDMAP
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
@@ -161,23 +121,6 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag
return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
}
-#ifndef OPEN_TREE_CLONE
-#define OPEN_TREE_CLONE 1
-#endif
-
-#ifndef OPEN_TREE_CLOEXEC
-#define OPEN_TREE_CLOEXEC O_CLOEXEC
-#endif
-
-#ifndef AT_RECURSIVE
-#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
-#endif
-
-static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
-{
- return syscall(__NR_open_tree, dfd, filename, flags);
-}
-
static ssize_t write_nointr(int fd, const void *buf, size_t count)
{
ssize_t ret;
@@ -1076,7 +1019,7 @@ FIXTURE_SETUP(mount_setattr_idmapped)
ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
ASSERT_GE(img_fd, 0);
- ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
+ ASSERT_EQ(ftruncate(img_fd, 2147483648 /* 2 GB */), 0);
ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
ASSERT_EQ(close(img_fd), 0);
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 58bcbbd029bc..94176ffe4646 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -56,6 +56,8 @@ ARCH_mips32be = mips
ARCH_riscv32 = riscv
ARCH_riscv64 = riscv
ARCH_s390x = s390
+ARCH_sparc32 = sparc
+ARCH_sparc64 = sparc
ARCH := $(or $(ARCH_$(XARCH)),$(XARCH))
# kernel image names by architecture
@@ -76,6 +78,9 @@ IMAGE_riscv64 = arch/riscv/boot/Image
IMAGE_s390x = arch/s390/boot/bzImage
IMAGE_s390 = arch/s390/boot/bzImage
IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi
+IMAGE_sparc32 = arch/sparc/boot/image
+IMAGE_sparc64 = arch/sparc/boot/image
+IMAGE_m68k = vmlinux
IMAGE = $(objtree)/$(IMAGE_$(XARCH))
IMAGE_NAME = $(notdir $(IMAGE))
@@ -97,9 +102,15 @@ DEFCONFIG_riscv64 = defconfig
DEFCONFIG_s390x = defconfig
DEFCONFIG_s390 = defconfig compat.config
DEFCONFIG_loongarch = defconfig
+DEFCONFIG_sparc32 = sparc32_defconfig
+DEFCONFIG_sparc64 = sparc64_defconfig
+DEFCONFIG_m68k = virt_defconfig
DEFCONFIG = $(DEFCONFIG_$(XARCH))
+EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD
EXTRACONFIG = $(EXTRACONFIG_$(XARCH))
+EXTRACONFIG_arm = -e CONFIG_NAMESPACES
+EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES
# optional tests to run (default = all)
TEST =
@@ -122,6 +133,9 @@ QEMU_ARCH_riscv64 = riscv64
QEMU_ARCH_s390x = s390x
QEMU_ARCH_s390 = s390x
QEMU_ARCH_loongarch = loongarch64
+QEMU_ARCH_sparc32 = sparc
+QEMU_ARCH_sparc64 = sparc64
+QEMU_ARCH_m68k = m68k
QEMU_ARCH = $(QEMU_ARCH_$(XARCH))
QEMU_ARCH_USER_ppc64le = ppc64le
@@ -152,6 +166,9 @@ QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_T
QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
# OUTPUT is only set when run from the main makefile, otherwise
@@ -174,10 +191,15 @@ CFLAGS_s390x = -m64
CFLAGS_s390 = -m31
CFLAGS_mips32le = -EL -mabi=32 -fPIC
CFLAGS_mips32be = -EB -mabi=32
+CFLAGS_sparc32 = $(call cc-option,-m32)
+ifeq ($(origin XARCH),command line)
+CFLAGS_XARCH = $(CFLAGS_$(XARCH))
+endif
CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
+CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all)
CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
$(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \
- $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA)
+ $(CFLAGS_XARCH) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_SANITIZER) $(CFLAGS_EXTRA)
LDFLAGS :=
LIBGCC := -lgcc
@@ -232,11 +254,11 @@ all: run
sysroot: sysroot/$(ARCH)/include
-sysroot/$(ARCH)/include: | defconfig
+sysroot/$(ARCH)/include:
$(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot
$(QUIET_MKDIR)mkdir -p sysroot
$(Q)$(MAKE) -C $(srctree) outputmakefile
- $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone
+ $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check
$(Q)mv sysroot/sysroot sysroot/$(ARCH)
ifneq ($(NOLIBC_SYSROOT),0)
diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
index a7ca8325863f..0636d1b6e808 100644
--- a/tools/testing/selftests/nolibc/nolibc-test-linkage.c
+++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
@@ -2,9 +2,7 @@
#include "nolibc-test-linkage.h"
-#ifndef NOLIBC
#include <errno.h>
-#endif
void *linkage_test_errno_addr(void)
{
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 5884a891c491..dbe13000fb1a 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -9,24 +9,22 @@
* $(CC) -nostdlib -I/path/to/nolibc/sysroot => _NOLIBC_* guards are present
* $(CC) with default libc => NOLIBC* never defined
*/
-#ifndef NOLIBC
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#ifndef _NOLIBC_STDIO_H
-/* standard libcs need more includes */
#include <sys/auxv.h>
-#include <sys/io.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/prctl.h>
+#include <sys/random.h>
#include <sys/reboot.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/sysmacros.h>
#include <sys/time.h>
+#include <sys/timerfd.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <dirent.h>
@@ -38,10 +36,10 @@
#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
+#include <time.h>
#include <unistd.h>
#include <limits.h>
-#endif
-#endif
+#include <ctype.h>
#pragma GCC diagnostic ignored "-Wmissing-prototypes"
@@ -807,6 +805,26 @@ static int test_dirent(void)
return 0;
}
+int test_getrandom(void)
+{
+ uint64_t rng = 0;
+ ssize_t ret;
+
+ ret = getrandom(&rng, sizeof(rng), GRND_NONBLOCK);
+ if (ret == -1 && errno == EAGAIN)
+ return 0; /* No entropy available yet */
+
+ if (ret != sizeof(rng))
+ return ret;
+
+ if (!rng) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ return 0;
+}
+
int test_getpagesize(void)
{
int x = getpagesize();
@@ -836,6 +854,29 @@ int test_getpagesize(void)
return !c;
}
+int test_file_stream(void)
+{
+ FILE *f;
+ int r;
+
+ f = fopen("/dev/null", "r");
+ if (!f)
+ return -1;
+
+ errno = 0;
+ r = fwrite("foo", 1, 3, f);
+ if (r != 0 || errno != EBADF) {
+ fclose(f);
+ return -1;
+ }
+
+ r = fclose(f);
+ if (r == EOF)
+ return -1;
+
+ return 0;
+}
+
int test_fork(void)
{
int status;
@@ -883,6 +924,102 @@ int test_stat_timestamps(void)
return 0;
}
+int test_timer(void)
+{
+ struct itimerspec timerspec;
+ struct sigevent evp;
+ timer_t timer;
+ int ret;
+
+ evp.sigev_notify = SIGEV_NONE;
+
+ ret = timer_create(CLOCK_MONOTONIC, &evp, &timer);
+ if (ret)
+ return ret;
+
+ timerspec = (struct itimerspec) {
+ .it_value.tv_sec = 1000000,
+ };
+ ret = timer_settime(timer, 0, &timerspec, NULL);
+ if (ret)
+ goto err;
+
+ timerspec = (struct itimerspec) {
+ .it_value.tv_sec = -1,
+ .it_value.tv_nsec = -1,
+ .it_interval.tv_sec = -1,
+ .it_interval.tv_nsec = -1,
+ };
+ ret = timer_gettime(timer, &timerspec);
+ if (ret)
+ goto err;
+
+ errno = EINVAL;
+ ret = -1;
+
+ if (timerspec.it_interval.tv_sec || timerspec.it_interval.tv_nsec)
+ goto err;
+
+ if (timerspec.it_value.tv_sec > 1000000)
+ goto err;
+
+ ret = timer_delete(timer);
+ if (ret)
+ return ret;
+
+ return 0;
+
+err:
+ timer_delete(timer);
+ return ret;
+}
+
+int test_timerfd(void)
+{
+ struct itimerspec timerspec;
+ int timer, ret;
+
+ timer = timerfd_create(CLOCK_MONOTONIC, 0);
+ if (timer == -1)
+ return -1;
+
+ timerspec = (struct itimerspec) {
+ .it_value.tv_sec = 1000000,
+ };
+ ret = timerfd_settime(timer, 0, &timerspec, NULL);
+ if (ret)
+ goto err;
+
+ timerspec = (struct itimerspec) {
+ .it_value.tv_sec = -1,
+ .it_value.tv_nsec = -1,
+ .it_interval.tv_sec = -1,
+ .it_interval.tv_nsec = -1,
+ };
+ ret = timerfd_gettime(timer, &timerspec);
+ if (ret)
+ goto err;
+
+ errno = EINVAL;
+ ret = -1;
+
+ if (timerspec.it_interval.tv_sec || timerspec.it_interval.tv_nsec)
+ goto err;
+
+ if (timerspec.it_value.tv_sec > 1000000)
+ goto err;
+
+ ret = close(timer);
+ if (ret)
+ return ret;
+
+ return 0;
+
+err:
+ close(timer);
+ return ret;
+}
+
int test_uname(void)
{
struct utsname buf;
@@ -926,7 +1063,7 @@ int test_mmap_munmap(void)
{
int ret, fd, i, page_size;
void *mem;
- size_t file_size, length;
+ size_t file_size, length, mem_length;
off_t offset, pa_offset;
struct stat stat_buf;
const char * const files[] = {
@@ -966,14 +1103,22 @@ int test_mmap_munmap(void)
offset = 0;
length = file_size - offset;
pa_offset = offset & ~(page_size - 1);
+ mem_length = length + offset - pa_offset;
- mem = mmap(NULL, length + offset - pa_offset, PROT_READ, MAP_SHARED, fd, pa_offset);
+ mem = mmap(NULL, mem_length, PROT_READ, MAP_SHARED, fd, pa_offset);
if (mem == MAP_FAILED) {
ret = 1;
goto end;
}
- ret = munmap(mem, length + offset - pa_offset);
+ mem = mremap(mem, mem_length, mem_length * 2, MREMAP_MAYMOVE, 0);
+ if (mem == MAP_FAILED) {
+ munmap(mem, mem_length);
+ ret = 1;
+ goto end;
+ }
+
+ ret = munmap(mem, mem_length * 2);
end:
close(fd);
@@ -1045,6 +1190,72 @@ int test_openat(void)
return 0;
}
+int test_namespace(void)
+{
+ int original_ns, new_ns, ret;
+ ino_t original_ns_ino;
+ struct stat stat_buf;
+
+ original_ns = open("/proc/self/ns/uts", O_RDONLY);
+ if (original_ns == -1)
+ return -1;
+
+ ret = fstat(original_ns, &stat_buf);
+ if (ret)
+ goto out;
+
+ original_ns_ino = stat_buf.st_ino;
+
+ ret = unshare(CLONE_NEWUTS);
+ if (ret)
+ goto out;
+
+ new_ns = open("/proc/self/ns/uts", O_RDONLY);
+ if (new_ns == -1) {
+ ret = new_ns;
+ goto out;
+ }
+
+ ret = fstat(new_ns, &stat_buf);
+ close(new_ns);
+ if (ret)
+ goto out;
+
+ if (stat_buf.st_ino == original_ns_ino) {
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ ret = setns(original_ns, CLONE_NEWUTS);
+ if (ret)
+ goto out;
+
+ new_ns = open("/proc/self/ns/uts", O_RDONLY);
+ if (new_ns == -1) {
+ ret = new_ns;
+ goto out;
+ }
+
+ ret = fstat(new_ns, &stat_buf);
+ if (ret)
+ goto out;
+
+ close(new_ns);
+
+ if (stat_buf.st_ino != original_ns_ino) {
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ close(original_ns);
+ return ret;
+}
+
/* Run syscall tests between IDs <min> and <max>.
* Return 0 on success, non-zero on failure.
*/
@@ -1052,6 +1263,7 @@ int run_syscall(int min, int max)
{
struct timeval tv;
struct timezone tz;
+ struct timespec ts;
struct stat stat_buf;
int euid0;
int proc;
@@ -1083,6 +1295,11 @@ int run_syscall(int min, int max)
* test numbers.
*/
switch (test + __LINE__ + 1) {
+ CASE_TEST(access); EXPECT_SYSZR(proc, access("/proc/self", R_OK)); break;
+ CASE_TEST(access_bad); EXPECT_SYSER(proc, access("/proc/self", W_OK), -1, EPERM); break;
+ CASE_TEST(clock_getres); EXPECT_SYSZR(1, clock_getres(CLOCK_MONOTONIC, &ts)); break;
+ CASE_TEST(clock_gettime); EXPECT_SYSZR(1, clock_gettime(CLOCK_MONOTONIC, &ts)); break;
+ CASE_TEST(clock_settime); EXPECT_SYSER(1, clock_settime(CLOCK_MONOTONIC, &ts), -1, EINVAL); break;
CASE_TEST(getpid); EXPECT_SYSNE(1, getpid(), -1); break;
CASE_TEST(getppid); EXPECT_SYSNE(1, getppid(), -1); break;
CASE_TEST(gettid); EXPECT_SYSNE(has_gettid, gettid(), -1); break;
@@ -1112,10 +1329,12 @@ int run_syscall(int min, int max)
CASE_TEST(dup3_0); tmp = dup3(0, 100, 0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break;
CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break;
CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break;
+ CASE_TEST(file_stream); EXPECT_SYSZR(1, test_file_stream()); break;
CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break;
CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break;
CASE_TEST(directories); EXPECT_SYSZR(proc, test_dirent()); break;
+ CASE_TEST(getrandom); EXPECT_SYSZR(1, test_getrandom()); break;
CASE_TEST(gettimeofday_tv); EXPECT_SYSZR(1, gettimeofday(&tv, NULL)); break;
CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break;
CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break;
@@ -1149,6 +1368,8 @@ int run_syscall(int min, int max)
CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break;
CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break;
CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break;
+ CASE_TEST(timer); EXPECT_SYSZR(1, test_timer()); break;
+ CASE_TEST(timerfd); EXPECT_SYSZR(1, test_timerfd()); break;
CASE_TEST(uname); EXPECT_SYSZR(proc, test_uname()); break;
CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break;
CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break;
@@ -1160,6 +1381,7 @@ int run_syscall(int min, int max)
CASE_TEST(write_zero); EXPECT_SYSZR(1, write(1, &tmp, 0)); break;
CASE_TEST(syscall_noargs); EXPECT_SYSEQ(1, syscall(__NR_getpid), getpid()); break;
CASE_TEST(syscall_args); EXPECT_SYSER(1, syscall(__NR_statx, 0, NULL, 0, 0, NULL), -1, EFAULT); break;
+ CASE_TEST(namespace); EXPECT_SYSZR(euid0 && proc, test_namespace()); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
@@ -1168,6 +1390,17 @@ int run_syscall(int min, int max)
return ret;
}
+int test_difftime(void)
+{
+ if (difftime(200., 100.) != 100.)
+ return 1;
+
+ if (difftime(100., 200.) != -100.)
+ return 1;
+
+ return 0;
+}
+
int run_stdlib(int min, int max)
{
int test;
@@ -1211,6 +1444,9 @@ int run_stdlib(int min, int max)
CASE_TEST(strlcpy_2); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 2), buf, 3, "b"); break;
CASE_TEST(strlcpy_3); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 3), buf, 3, "ba"); break;
CASE_TEST(strlcpy_4); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 4), buf, 3, "bar"); break;
+ CASE_TEST(strstr_foobar_foo); EXPECT_STREQ(1, strstr("foobar", "foo"), "foobar"); break;
+ CASE_TEST(strstr_foobar_bar); EXPECT_STREQ(1, strstr("foobar", "bar"), "bar"); break;
+ CASE_TEST(strstr_foobar_baz); EXPECT_PTREQ(1, strstr("foobar", "baz"), NULL); break;
CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break;
CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break;
CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break;
@@ -1281,6 +1517,13 @@ int run_stdlib(int min, int max)
CASE_TEST(strerror_EINVAL); EXPECT_STREQ(is_nolibc, strerror(EINVAL), "errno=22"); break;
CASE_TEST(strerror_int_max); EXPECT_STREQ(is_nolibc, strerror(INT_MAX), "errno=2147483647"); break;
CASE_TEST(strerror_int_min); EXPECT_STREQ(is_nolibc, strerror(INT_MIN), "errno=-2147483648"); break;
+ CASE_TEST(tolower); EXPECT_EQ(1, tolower('A'), 'a'); break;
+ CASE_TEST(tolower_noop); EXPECT_EQ(1, tolower('a'), 'a'); break;
+ CASE_TEST(toupper); EXPECT_EQ(1, toupper('a'), 'A'); break;
+ CASE_TEST(toupper_noop); EXPECT_EQ(1, toupper('A'), 'A'); break;
+ CASE_TEST(abs); EXPECT_EQ(1, abs(-10), 10); break;
+ CASE_TEST(abs_noop); EXPECT_EQ(1, abs(10), 10); break;
+ CASE_TEST(difftime); EXPECT_ZR(1, test_difftime()); break;
case __LINE__:
return ret; /* must be last */
@@ -1295,27 +1538,15 @@ int run_stdlib(int min, int max)
static int expect_vfprintf(int llen, int c, const char *expected, const char *fmt, ...)
{
- int ret, pipefd[2];
- ssize_t w, r;
char buf[100];
- FILE *memfile;
va_list args;
+ ssize_t w;
+ int ret;
- ret = pipe(pipefd);
- if (ret == -1) {
- llen += printf(" pipe() != %s", strerror(errno));
- result(llen, FAIL);
- return 1;
- }
-
- memfile = fdopen(pipefd[1], "w");
- if (!memfile) {
- result(llen, FAIL);
- return 1;
- }
va_start(args, fmt);
- w = vfprintf(memfile, fmt, args);
+ /* Only allow writing 21 bytes, to test truncation */
+ w = vsnprintf(buf, 21, fmt, args);
va_end(args);
if (w != c) {
@@ -1324,17 +1555,6 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm
return 1;
}
- fclose(memfile);
-
- r = read(pipefd[0], buf, sizeof(buf) - 1);
-
- if (r != w) {
- llen += printf(" written(%d) != read(%d)", (int)w, (int)r);
- result(llen, FAIL);
- return 1;
- }
-
- buf[r] = '\0';
llen += printf(" \"%s\" = \"%s\"", expected, buf);
ret = strncmp(expected, buf, c);
@@ -1409,7 +1629,24 @@ static int test_scanf(void)
return 0;
}
-static int run_vfprintf(int min, int max)
+int test_strerror(void)
+{
+ char buf[100];
+ ssize_t ret;
+
+ memset(buf, 'A', sizeof(buf));
+
+ errno = EINVAL;
+ ret = snprintf(buf, sizeof(buf), "%m");
+ if (is_nolibc) {
+ if (ret < 6 || memcmp(buf, "errno=", 6))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int run_printf(int min, int max)
{
int test;
int ret = 0;
@@ -1430,7 +1667,14 @@ static int run_vfprintf(int min, int max)
CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break;
CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break;
CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break;
+ CASE_TEST(uintmax_t); EXPECT_VFPRINTF(20, "18446744073709551615", "%ju", 0xffffffffffffffffULL); break;
+ CASE_TEST(intmax_t); EXPECT_VFPRINTF(20, "-9223372036854775807", "%jd", 0x8000000000000001LL); break;
+ CASE_TEST(truncation); EXPECT_VFPRINTF(25, "01234567890123456789", "%s", "0123456789012345678901234"); break;
+ CASE_TEST(string_width); EXPECT_VFPRINTF(10, " 1", "%10s", "1"); break;
+ CASE_TEST(number_width); EXPECT_VFPRINTF(10, " 1", "%10d", 1); break;
+ CASE_TEST(width_trunc); EXPECT_VFPRINTF(25, " ", "%25d", 1); break;
CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break;
+ CASE_TEST(strerror); EXPECT_ZR(1, test_strerror()); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
@@ -1439,6 +1683,7 @@ static int run_vfprintf(int min, int max)
return ret;
}
+__attribute__((no_sanitize("undefined")))
static int smash_stack(void)
{
char buf[100];
@@ -1455,8 +1700,7 @@ static int run_protection(int min __attribute__((unused)),
int max __attribute__((unused)))
{
pid_t pid;
- int llen = 0, ret;
- siginfo_t siginfo = {};
+ int llen = 0, status;
struct rlimit rlimit = { 0, 0 };
llen += printf("0 -fstackprotector ");
@@ -1494,11 +1738,10 @@ static int run_protection(int min __attribute__((unused)),
return 1;
default:
- ret = waitid(P_PID, pid, &siginfo, WEXITED);
+ pid = waitpid(pid, &status, 0);
- if (ret != 0 || siginfo.si_signo != SIGCHLD ||
- siginfo.si_code != CLD_KILLED || siginfo.si_status != SIGABRT) {
- llen += printf("waitid()");
+ if (pid == -1 || !WIFSIGNALED(status) || WTERMSIG(status) != SIGABRT) {
+ llen += printf("waitpid()");
result(llen, FAIL);
return 1;
}
@@ -1570,7 +1813,7 @@ static const struct test test_names[] = {
{ .name = "startup", .func = run_startup },
{ .name = "syscall", .func = run_syscall },
{ .name = "stdlib", .func = run_stdlib },
- { .name = "vfprintf", .func = run_vfprintf },
+ { .name = "printf", .func = run_printf },
{ .name = "protection", .func = run_protection },
{ 0 }
};
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
index 0299a0912d40..8277599e6441 100755
--- a/tools/testing/selftests/nolibc/run-tests.sh
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -25,6 +25,8 @@ all_archs=(
riscv32 riscv64
s390x s390
loongarch
+ sparc32 sparc64
+ m68k
)
archs="${all_archs[@]}"
@@ -111,6 +113,7 @@ crosstool_arch() {
loongarch) echo loongarch64;;
mips*) echo mips;;
s390*) echo s390;;
+ sparc*) echo sparc64;;
*) echo "$1";;
esac
}
@@ -184,6 +187,10 @@ test_arch() {
echo "Unsupported configuration"
return
fi
+ if [ "$arch" = "m68k" ] && [ "$llvm" = "1" ]; then
+ echo "Unsupported configuration"
+ return
+ fi
mkdir -p "$build_dir"
swallow_output "${MAKE[@]}" defconfig
diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c
index 49dc1e831174..e03fe1b9bba2 100644
--- a/tools/testing/selftests/perf_events/watermark_signal.c
+++ b/tools/testing/selftests/perf_events/watermark_signal.c
@@ -75,7 +75,7 @@ TEST(watermark_signal)
if (waitpid(child, &child_status, WSTOPPED) != child ||
!(WIFSTOPPED(child_status) && WSTOPSIG(child_status) == SIGSTOP)) {
fprintf(stderr,
- "failed to sycnhronize with child errno=%d status=%x\n",
+ "failed to synchronize with child errno=%d status=%x\n",
errno,
child_status);
goto cleanup;
diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c
index 51c414faabb0..96f274f0582b 100644
--- a/tools/testing/selftests/pid_namespace/pid_max.c
+++ b/tools/testing/selftests/pid_namespace/pid_max.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
+#include <sys/mount.h>
#include <sys/wait.h>
#include "../kselftest_harness.h"
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index 55bcf81a2b9a..efd74063126e 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -131,6 +131,26 @@
#define PIDFD_INFO_EXIT (1UL << 3) /* Always returned if available, even if not requested */
#endif
+#ifndef PIDFD_INFO_COREDUMP
+#define PIDFD_INFO_COREDUMP (1UL << 4)
+#endif
+
+#ifndef PIDFD_COREDUMPED
+#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */
+#endif
+
+#ifndef PIDFD_COREDUMP_SKIP
+#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */
+#endif
+
+#ifndef PIDFD_COREDUMP_USER
+#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */
+#endif
+
+#ifndef PIDFD_COREDUMP_ROOT
+#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */
+#endif
+
#ifndef PIDFD_THREAD
#define PIDFD_THREAD O_EXCL
#endif
@@ -150,6 +170,8 @@ struct pidfd_info {
__u32 fsuid;
__u32 fsgid;
__s32 exit_code;
+ __u32 coredump_mask;
+ __u32 __spare1;
};
/*
diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c
index 7822dd080258..c094aeb1c620 100644
--- a/tools/testing/selftests/pidfd/pidfd_bind_mount.c
+++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c
@@ -15,79 +15,7 @@
#include "pidfd.h"
#include "../kselftest_harness.h"
-
-#ifndef __NR_open_tree
- #if defined __alpha__
- #define __NR_open_tree 538
- #elif defined _MIPS_SIM
- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
- #define __NR_open_tree 4428
- #endif
- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
- #define __NR_open_tree 6428
- #endif
- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
- #define __NR_open_tree 5428
- #endif
- #elif defined __ia64__
- #define __NR_open_tree (428 + 1024)
- #else
- #define __NR_open_tree 428
- #endif
-#endif
-
-#ifndef __NR_move_mount
- #if defined __alpha__
- #define __NR_move_mount 539
- #elif defined _MIPS_SIM
- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
- #define __NR_move_mount 4429
- #endif
- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
- #define __NR_move_mount 6429
- #endif
- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
- #define __NR_move_mount 5429
- #endif
- #elif defined __ia64__
- #define __NR_move_mount (428 + 1024)
- #else
- #define __NR_move_mount 429
- #endif
-#endif
-
-#ifndef MOVE_MOUNT_F_EMPTY_PATH
-#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
-#endif
-
-#ifndef MOVE_MOUNT_F_EMPTY_PATH
-#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
-#endif
-
-static inline int sys_move_mount(int from_dfd, const char *from_pathname,
- int to_dfd, const char *to_pathname,
- unsigned int flags)
-{
- return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
- to_pathname, flags);
-}
-
-#ifndef OPEN_TREE_CLONE
-#define OPEN_TREE_CLONE 1
-#endif
-
-#ifndef OPEN_TREE_CLOEXEC
-#define OPEN_TREE_CLOEXEC O_CLOEXEC
-#endif
-
-#ifndef AT_RECURSIVE
-#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
-#endif
-
-static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
-{
- return syscall(__NR_open_tree, dfd, filename, flags);
-}
+#include "../filesystems/wrappers.h"
FIXTURE(pidfd_bind_mount) {
char template[PATH_MAX];
diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c
index 1758a1b0457b..a0eb6e81eaa2 100644
--- a/tools/testing/selftests/pidfd/pidfd_info_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_info_test.c
@@ -299,6 +299,7 @@ TEST_F(pidfd_info, thread_group)
/* Opening a thread as a thread-group leader must fail. */
pidfd_thread = sys_pidfd_open(pid_thread, 0);
ASSERT_LT(pidfd_thread, 0);
+ ASSERT_EQ(errno, ENOENT);
/* Opening a thread as a PIDFD_THREAD must succeed. */
pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
@@ -362,9 +363,9 @@ TEST_F(pidfd_info, thread_group)
ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
- /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
- ASSERT_TRUE(WIFEXITED(info.exit_code));
- ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+ /* Even though the thread-group exited successfully it will still report the group exit code. */
+ ASSERT_TRUE(WIFSIGNALED(info.exit_code));
+ ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
/*
* Retrieve exit information for the thread-group leader via the
@@ -375,9 +376,9 @@ TEST_F(pidfd_info, thread_group)
ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_CREDS));
ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_EXIT));
- /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
- ASSERT_TRUE(WIFEXITED(info2.exit_code));
- ASSERT_EQ(WEXITSTATUS(info2.exit_code), 0);
+ /* Even though the thread-group exited successfully it will still report the group exit code. */
+ ASSERT_TRUE(WIFSIGNALED(info2.exit_code));
+ ASSERT_EQ(WTERMSIG(info2.exit_code), SIGKILL);
/* Retrieve exit information for the thread. */
info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
index aad51e7c0183..991fb11306eb 100755
--- a/tools/testing/selftests/rcutorture/bin/console-badness.sh
+++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh
@@ -10,7 +10,7 @@
#
# Authors: Paul E. McKenney <paulmck@kernel.org>
-grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
+grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Call trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
grep -v 'ODEBUG: ' |
grep -v 'This means that this is a DEBUG kernel and it is' |
grep -v 'Warning: unable to open an initial console' |
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index ad79784e552d..957800c9ffba 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -73,7 +73,7 @@ config_override_param "$config_dir/CFcommon.$(uname -m)" KcList \
cp $T/KcList $resdir/ConfigFragment
base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
-if test "$base_resdir" != "$resdir" && test -f $base_resdir/bzImage && test -f $base_resdir/vmlinux
+if test "$base_resdir" != "$resdir" && (test -f $base_resdir/bzImage || test -f $base_resdir/Image) && test -f $base_resdir/vmlinux
then
# Rerunning previous test, so use that test's kernel.
QEMU="`identify_qemu $base_resdir/vmlinux`"
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index b07c11cf6929..21e6ba3615f6 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -148,7 +148,7 @@ then
summary="$summary KCSAN: $n_kcsan"
fi
fi
- n_calltrace=`grep -c 'Call Trace:' $file`
+ n_calltrace=`grep -Ec 'Call Trace:|Call trace:' $file`
if test "$n_calltrace" -ne 0
then
summary="$summary Call Traces: $n_calltrace"
diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
index 2db12c5cad9c..208be7d09a61 100755
--- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
+++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
@@ -39,8 +39,9 @@ do
shift
done
-err=
nerrs=0
+
+# Test lockdep's handling of deadlocks.
for d in 0 1
do
for t in 0 1 2
@@ -52,6 +53,12 @@ do
tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
ret=$?
mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val"
+ if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config
+ then
+ echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario"
+ nerrs=$((nerrs+1))
+ err=1
+ fi
if test "$d" -ne 0 && test "$ret" -eq 0
then
err=1
@@ -71,6 +78,39 @@ do
done
done
done
+
+# Test lockdep-enabled testing of mixed SRCU readers.
+for val in 0x1 0xf
+do
+ err=
+ tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.reader_flavor=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
+ ret=$?
+ mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val"
+ if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config
+ then
+ echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario"
+ nerrs=$((nerrs+1))
+ err=1
+ fi
+ if test "$val" -eq 0xf && test "$ret" -eq 0
+ then
+ err=1
+ echo -n Unexpected success for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ fi
+ if test "$val" -eq 0x1 && test "$ret" -ne 0
+ then
+ err=1
+ echo -n Unexpected failure for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ fi
+ if test -n "$err"
+ then
+ grep "rcu_torture_init_srcu_lockdep: test_srcu_lockdep = " "$RCUTORTURE/res/$ds/$val/SRCU-P/console.log" | sed -e 's/^.*rcu_torture_init_srcu_lockdep://' >> "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ cat "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ nerrs=$((nerrs+1))
+ fi
+done
+
+# Set up exit code.
if test "$nerrs" -ne 0
then
exit 1
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 0447c4a00cc4..e03fdaca89b3 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -51,12 +51,15 @@ do_scftorture=yes
do_rcuscale=yes
do_refscale=yes
do_kvfree=yes
+do_normal=yes
+explicit_normal=no
do_kasan=yes
do_kcsan=no
do_clocksourcewd=yes
do_rt=yes
do_rcutasksflavors=yes
do_srcu_lockdep=yes
+do_rcu_rust=no
# doyesno - Helper function for yes/no arguments
function doyesno () {
@@ -87,6 +90,7 @@ usage () {
echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture"
echo " --do-refscale / --do-no-refscale / --no-refscale"
echo " --do-rt / --do-no-rt / --no-rt"
+ echo " --do-rcu-rust / --do-no-rcu-rust / --no-rcu-rust"
echo " --do-scftorture / --do-no-scftorture / --no-scftorture"
echo " --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep"
echo " --duration [ <minutes> | <hours>h | <days>d ]"
@@ -128,6 +132,8 @@ do
do_refscale=yes
do_rt=yes
do_kvfree=yes
+ do_normal=yes
+ explicit_normal=no
do_kasan=yes
do_kcsan=yes
do_clocksourcewd=yes
@@ -161,11 +167,17 @@ do
do_refscale=no
do_rt=no
do_kvfree=no
+ do_normal=no
+ explicit_normal=no
do_kasan=no
do_kcsan=no
do_clocksourcewd=no
do_srcu_lockdep=no
;;
+ --do-normal|--do-no-normal|--no-normal)
+ do_normal=`doyesno "$1" --do-normal`
+ explicit_normal=yes
+ ;;
--do-rcuscale|--do-no-rcuscale|--no-rcuscale)
do_rcuscale=`doyesno "$1" --do-rcuscale`
;;
@@ -181,6 +193,9 @@ do
--do-rt|--do-no-rt|--no-rt)
do_rt=`doyesno "$1" --do-rt`
;;
+ --do-rcu-rust|--do-no-rcu-rust|--no-rcu-rust)
+ do_rcu_rust=`doyesno "$1" --do-rcu-rust`
+ ;;
--do-scftorture|--do-no-scftorture|--no-scftorture)
do_scftorture=`doyesno "$1" --do-scftorture`
;;
@@ -242,6 +257,17 @@ trap 'rm -rf $T' 0 2
echo " --- " $scriptname $args | tee -a $T/log
echo " --- Results directory: " $ds | tee -a $T/log
+if test "$do_normal" = "no" && test "$do_kasan" = "no" && test "$do_kcsan" = "no"
+then
+ # Match old scripts so that "--do-none --do-rcutorture" does
+ # normal rcutorture testing, but no KASAN or KCSAN testing.
+ if test $explicit_normal = yes
+ then
+ echo " --- Everything disabled, so explicit --do-normal overridden" | tee -a $T/log
+ fi
+ do_normal=yes
+fi
+
# Calculate rcutorture defaults and apportion time
if test -z "$configs_rcutorture"
then
@@ -332,9 +358,12 @@ function torture_set {
local kcsan_kmake_tag=
local flavor=$1
shift
- curflavor=$flavor
- torture_one "$@"
- mv $T/last-resdir $T/last-resdir-nodebug || :
+ if test "$do_normal" = "yes"
+ then
+ curflavor=$flavor
+ torture_one "$@"
+ mv $T/last-resdir $T/last-resdir-nodebug || :
+ fi
if test "$do_kasan" = "yes"
then
curflavor=${flavor}-kasan
@@ -448,13 +477,57 @@ fi
if test "$do_rt" = "yes"
then
- # With all post-boot grace periods forced to normal.
- torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1"
- torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+ # In both runs, disable testing of RCU priority boosting because
+ # -rt doesn't like its interaction with testing of callback
+ # flooding.
+
+ # With all post-boot grace periods forced to normal (default for PREEMPT_RT).
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcutorture.preempt_duration=0"
+ torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_RCU_NOCB_CPU=y" --trust-make
# With all post-boot grace periods forced to expedited.
- torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1"
- torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcupdate.rcu_normal_after_boot=0 rcupdate.rcu_expedited=1 rcutorture.preempt_duration=0"
+ torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_FULL=y CONFIG_RCU_NOCB_CPU=y" --trust-make
+fi
+
+if test "$do_rcu_rust" = "yes"
+then
+ echo " --- do-rcu-rust:" Start `date` | tee -a $T/log
+ rrdir="tools/testing/selftests/rcutorture/res/$ds/results-rcu-rust"
+ mkdir -p "$rrdir"
+ echo " --- make LLVM=1 rustavailable " | tee -a $rrdir/log > $rrdir/rustavailable.out
+ make LLVM=1 rustavailable > $T/rustavailable.out 2>&1
+ retcode=$?
+ echo $retcode > $rrdir/rustavailable.exitcode
+ cat $T/rustavailable.out | tee -a $rrdir/log >> $rrdir/rustavailable.out 2>&1
+ buildphase=rustavailable
+ if test "$retcode" -eq 0
+ then
+ echo " --- Running 'make mrproper' in order to run kunit." | tee -a $rrdir/log > $rrdir/mrproper.out
+ make mrproper > $rrdir/mrproper.out 2>&1
+ retcode=$?
+ echo $retcode > $rrdir/mrproper.exitcode
+ buildphase=mrproper
+ fi
+ if test "$retcode" -eq 0
+ then
+ echo " --- Running rust_doctests_kernel." | tee -a $rrdir/log > $rrdir/rust_doctests_kernel.out
+ ./tools/testing/kunit/kunit.py run --make_options LLVM=1 --make_options CLIPPY=1 --arch arm64 --kconfig_add CONFIG_SMP=y --kconfig_add CONFIG_WERROR=y --kconfig_add CONFIG_RUST=y rust_doctests_kernel >> $rrdir/rust_doctests_kernel.out 2>&1
+ # @@@ Remove "--arch arm64" in order to test on native architecture?
+ # @@@ Analyze $rrdir/rust_doctests_kernel.out contents?
+ retcode=$?
+ echo $retcode > $rrdir/rust_doctests_kernel.exitcode
+ buildphase=rust_doctests_kernel
+ fi
+ if test "$retcode" -eq 0
+ then
+ echo "rcu-rust($retcode)" $rrdir >> $T/successes
+ echo Success >> $rrdir/log
+ else
+ echo "rcu-rust($retcode)" $rrdir >> $T/failures
+ echo " --- rcu-rust Test summary:" >> $rrdir/log
+ echo " --- Summary: Exit code $retcode from $buildphase, see $rrdir/$buildphase.out" >> $rrdir/log
+ fi
fi
if test "$do_srcu_lockdep" = "yes"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index 8ae41d5f81a3..54b1600c7eb5 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -8,8 +8,6 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
-CONFIG_MAXSMP=y
-CONFIG_CPUMASK_OFFSTACK=y
CONFIG_RCU_NOCB_CPU=y
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_RCU_BOOST=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index 40af3df0f397..1cc5b47dde28 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -1,4 +1,4 @@
-maxcpus=8 nr_cpus=43
+maxcpus=8 nr_cpus=17
rcutree.gp_preinit_delay=3
rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
index 50e03eefe7ac..0443beacf362 100755
--- a/tools/testing/selftests/run_kselftest.sh
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -3,7 +3,14 @@
#
# Run installed kselftest tests.
#
-BASE_DIR=$(realpath $(dirname $0))
+
+# Fallback to readlink if realpath is not available
+if which realpath > /dev/null; then
+ BASE_DIR=$(realpath $(dirname $0))
+else
+ BASE_DIR=$(readlink -f $(dirname $0))
+fi
+
cd $BASE_DIR
TESTS="$BASE_DIR"/kselftest-list.txt
if [ ! -r "$TESTS" ] ; then
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index f4531327b8e7..9d9d6b4c38b0 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -162,10 +162,10 @@ all_test_bpfprogs := $(foreach prog,$(wildcard *.bpf.c),$(INCLUDE_DIR)/$(patsubs
auto-test-targets := \
create_dsq \
enq_last_no_enq_fails \
- enq_select_cpu_fails \
ddsp_bogus_dsq_fail \
ddsp_vtimelocal_fail \
dsp_local_on \
+ enq_select_cpu \
exit \
hotplug \
init_enable_count \
@@ -173,6 +173,7 @@ auto-test-targets := \
maybe_null \
minimal \
numa \
+ allowed_cpus \
prog_run \
reload_loop \
select_cpu_dfl \
diff --git a/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c b/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c
new file mode 100644
index 000000000000..35923e74a2ec
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A scheduler that validates the behavior of scx_bpf_select_cpu_and() by
+ * selecting idle CPUs strictly within a subset of allowed CPUs.
+ *
+ * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+UEI_DEFINE(uei);
+
+private(PREF_CPUS) struct bpf_cpumask __kptr * allowed_cpumask;
+
+static void
+validate_idle_cpu(const struct task_struct *p, const struct cpumask *allowed, s32 cpu)
+{
+ if (scx_bpf_test_and_clear_cpu_idle(cpu))
+ scx_bpf_error("CPU %d should be marked as busy", cpu);
+
+ if (bpf_cpumask_subset(allowed, p->cpus_ptr) &&
+ !bpf_cpumask_test_cpu(cpu, allowed))
+ scx_bpf_error("CPU %d not in the allowed domain for %d (%s)",
+ cpu, p->pid, p->comm);
+}
+
+s32 BPF_STRUCT_OPS(allowed_cpus_select_cpu,
+ struct task_struct *p, s32 prev_cpu, u64 wake_flags)
+{
+ const struct cpumask *allowed;
+ s32 cpu;
+
+ allowed = cast_mask(allowed_cpumask);
+ if (!allowed) {
+ scx_bpf_error("allowed domain not initialized");
+ return -EINVAL;
+ }
+
+ /*
+ * Select an idle CPU strictly within the allowed domain.
+ */
+ cpu = scx_bpf_select_cpu_and(p, prev_cpu, wake_flags, allowed, 0);
+ if (cpu >= 0) {
+ validate_idle_cpu(p, allowed, cpu);
+ scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
+
+ return cpu;
+ }
+
+ return prev_cpu;
+}
+
+void BPF_STRUCT_OPS(allowed_cpus_enqueue, struct task_struct *p, u64 enq_flags)
+{
+ const struct cpumask *allowed;
+ s32 prev_cpu = scx_bpf_task_cpu(p), cpu;
+
+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
+
+ allowed = cast_mask(allowed_cpumask);
+ if (!allowed) {
+ scx_bpf_error("allowed domain not initialized");
+ return;
+ }
+
+ /*
+ * Use scx_bpf_select_cpu_and() to proactively kick an idle CPU
+ * within @allowed_cpumask, usable by @p.
+ */
+ cpu = scx_bpf_select_cpu_and(p, prev_cpu, 0, allowed, 0);
+ if (cpu >= 0) {
+ validate_idle_cpu(p, allowed, cpu);
+ scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
+ }
+}
+
+s32 BPF_STRUCT_OPS_SLEEPABLE(allowed_cpus_init)
+{
+ struct bpf_cpumask *mask;
+
+ mask = bpf_cpumask_create();
+ if (!mask)
+ return -ENOMEM;
+
+ mask = bpf_kptr_xchg(&allowed_cpumask, mask);
+ if (mask)
+ bpf_cpumask_release(mask);
+
+ bpf_rcu_read_lock();
+
+ /*
+ * Assign the first online CPU to the allowed domain.
+ */
+ mask = allowed_cpumask;
+ if (mask) {
+ const struct cpumask *online = scx_bpf_get_online_cpumask();
+
+ bpf_cpumask_set_cpu(bpf_cpumask_first(online), mask);
+ scx_bpf_put_cpumask(online);
+ }
+
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+void BPF_STRUCT_OPS(allowed_cpus_exit, struct scx_exit_info *ei)
+{
+ UEI_RECORD(uei, ei);
+}
+
+struct task_cpu_arg {
+ pid_t pid;
+};
+
+SEC("syscall")
+int select_cpu_from_user(struct task_cpu_arg *input)
+{
+ struct task_struct *p;
+ int cpu;
+
+ p = bpf_task_from_pid(input->pid);
+ if (!p)
+ return -EINVAL;
+
+ bpf_rcu_read_lock();
+ cpu = scx_bpf_select_cpu_and(p, bpf_get_smp_processor_id(), 0, p->cpus_ptr, 0);
+ bpf_rcu_read_unlock();
+
+ bpf_task_release(p);
+
+ return cpu;
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops allowed_cpus_ops = {
+ .select_cpu = (void *)allowed_cpus_select_cpu,
+ .enqueue = (void *)allowed_cpus_enqueue,
+ .init = (void *)allowed_cpus_init,
+ .exit = (void *)allowed_cpus_exit,
+ .name = "allowed_cpus",
+};
diff --git a/tools/testing/selftests/sched_ext/allowed_cpus.c b/tools/testing/selftests/sched_ext/allowed_cpus.c
new file mode 100644
index 000000000000..093f285ab4ba
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/allowed_cpus.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
+ */
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "allowed_cpus.bpf.skel.h"
+#include "scx_test.h"
+
+static enum scx_test_status setup(void **ctx)
+{
+ struct allowed_cpus *skel;
+
+ skel = allowed_cpus__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(allowed_cpus__load(skel), "Failed to load skel");
+
+ *ctx = skel;
+
+ return SCX_TEST_PASS;
+}
+
+static int test_select_cpu_from_user(const struct allowed_cpus *skel)
+{
+ int fd, ret;
+ __u64 args[1];
+
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+
+ args[0] = getpid();
+ fd = bpf_program__fd(skel->progs.select_cpu_from_user);
+ if (fd < 0)
+ return fd;
+
+ ret = bpf_prog_test_run_opts(fd, &attr);
+ if (ret < 0)
+ return ret;
+
+ fprintf(stderr, "%s: CPU %d\n", __func__, attr.retval);
+
+ return 0;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+ struct allowed_cpus *skel = ctx;
+ struct bpf_link *link;
+
+ link = bpf_map__attach_struct_ops(skel->maps.allowed_cpus_ops);
+ SCX_FAIL_IF(!link, "Failed to attach scheduler");
+
+ /* Pick an idle CPU from user-space */
+ SCX_FAIL_IF(test_select_cpu_from_user(skel), "Failed to pick idle CPU");
+
+ /* Just sleeping is fine, plenty of scheduling events happening */
+ sleep(1);
+
+ SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
+ bpf_link__destroy(link);
+
+ return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+ struct allowed_cpus *skel = ctx;
+
+ allowed_cpus__destroy(skel);
+}
+
+struct scx_test allowed_cpus = {
+ .name = "allowed_cpus",
+ .description = "Verify scx_bpf_select_cpu_and()",
+ .setup = setup,
+ .run = run,
+ .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&allowed_cpus)
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c
new file mode 100644
index 000000000000..ee2c9b89716e
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+UEI_DEFINE(uei);
+
+s32 BPF_STRUCT_OPS(enq_select_cpu_select_cpu, struct task_struct *p,
+ s32 prev_cpu, u64 wake_flags)
+{
+ /* Bounce all tasks to ops.enqueue() */
+ return prev_cpu;
+}
+
+void BPF_STRUCT_OPS(enq_select_cpu_enqueue, struct task_struct *p,
+ u64 enq_flags)
+{
+ s32 cpu, prev_cpu = scx_bpf_task_cpu(p);
+ bool found = false;
+
+ cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, 0, &found);
+ if (found) {
+ scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, SCX_SLICE_DFL, enq_flags);
+ return;
+ }
+
+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+}
+
+void BPF_STRUCT_OPS(enq_select_cpu_exit, struct scx_exit_info *ei)
+{
+ UEI_RECORD(uei, ei);
+}
+
+struct task_cpu_arg {
+ pid_t pid;
+};
+
+SEC("syscall")
+int select_cpu_from_user(struct task_cpu_arg *input)
+{
+ struct task_struct *p;
+ bool found = false;
+ s32 cpu;
+
+ p = bpf_task_from_pid(input->pid);
+ if (!p)
+ return -EINVAL;
+
+ bpf_rcu_read_lock();
+ cpu = scx_bpf_select_cpu_dfl(p, bpf_get_smp_processor_id(), 0, &found);
+ if (!found)
+ cpu = -EBUSY;
+ bpf_rcu_read_unlock();
+
+ bpf_task_release(p);
+
+ return cpu;
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops enq_select_cpu_ops = {
+ .select_cpu = (void *)enq_select_cpu_select_cpu,
+ .enqueue = (void *)enq_select_cpu_enqueue,
+ .exit = (void *)enq_select_cpu_exit,
+ .name = "enq_select_cpu",
+ .timeout_ms = 1000U,
+};
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu.c b/tools/testing/selftests/sched_ext/enq_select_cpu.c
new file mode 100644
index 000000000000..340c6f8b86da
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/enq_select_cpu.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "enq_select_cpu.bpf.skel.h"
+#include "scx_test.h"
+
+static enum scx_test_status setup(void **ctx)
+{
+ struct enq_select_cpu *skel;
+
+ skel = enq_select_cpu__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(enq_select_cpu__load(skel), "Failed to load skel");
+
+ *ctx = skel;
+
+ return SCX_TEST_PASS;
+}
+
+static int test_select_cpu_from_user(const struct enq_select_cpu *skel)
+{
+ int fd, ret;
+ __u64 args[1];
+
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+
+ args[0] = getpid();
+ fd = bpf_program__fd(skel->progs.select_cpu_from_user);
+ if (fd < 0)
+ return fd;
+
+ ret = bpf_prog_test_run_opts(fd, &attr);
+ if (ret < 0)
+ return ret;
+
+ fprintf(stderr, "%s: CPU %d\n", __func__, attr.retval);
+
+ return 0;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+ struct enq_select_cpu *skel = ctx;
+ struct bpf_link *link;
+
+ link = bpf_map__attach_struct_ops(skel->maps.enq_select_cpu_ops);
+ if (!link) {
+ SCX_ERR("Failed to attach scheduler");
+ return SCX_TEST_FAIL;
+ }
+
+ /* Pick an idle CPU from user-space */
+ SCX_FAIL_IF(test_select_cpu_from_user(skel), "Failed to pick idle CPU");
+
+ sleep(1);
+
+ SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
+ bpf_link__destroy(link);
+
+ return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+ struct enq_select_cpu *skel = ctx;
+
+ enq_select_cpu__destroy(skel);
+}
+
+struct scx_test enq_select_cpu = {
+ .name = "enq_select_cpu",
+ .description = "Verify scx_bpf_select_cpu_dfl() from multiple contexts",
+ .setup = setup,
+ .run = run,
+ .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&enq_select_cpu)
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
deleted file mode 100644
index a7cf868d5e31..000000000000
--- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
- * Copyright (c) 2023 David Vernet <dvernet@meta.com>
- * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
- */
-
-#include <scx/common.bpf.h>
-
-char _license[] SEC("license") = "GPL";
-
-/* Manually specify the signature until the kfunc is added to the scx repo. */
-s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
- bool *found) __ksym;
-
-s32 BPF_STRUCT_OPS(enq_select_cpu_fails_select_cpu, struct task_struct *p,
- s32 prev_cpu, u64 wake_flags)
-{
- return prev_cpu;
-}
-
-void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p,
- u64 enq_flags)
-{
- /*
- * Need to initialize the variable or the verifier will fail to load.
- * Improving these semantics is actively being worked on.
- */
- bool found = false;
-
- /* Can only call from ops.select_cpu() */
- scx_bpf_select_cpu_dfl(p, 0, 0, &found);
-
- scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
-}
-
-SEC(".struct_ops.link")
-struct sched_ext_ops enq_select_cpu_fails_ops = {
- .select_cpu = (void *) enq_select_cpu_fails_select_cpu,
- .enqueue = (void *) enq_select_cpu_fails_enqueue,
- .name = "enq_select_cpu_fails",
- .timeout_ms = 1000U,
-};
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
deleted file mode 100644
index a80e3a3b3698..000000000000
--- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
- * Copyright (c) 2023 David Vernet <dvernet@meta.com>
- * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
- */
-#include <bpf/bpf.h>
-#include <scx/common.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include "enq_select_cpu_fails.bpf.skel.h"
-#include "scx_test.h"
-
-static enum scx_test_status setup(void **ctx)
-{
- struct enq_select_cpu_fails *skel;
-
- skel = enq_select_cpu_fails__open();
- SCX_FAIL_IF(!skel, "Failed to open");
- SCX_ENUM_INIT(skel);
- SCX_FAIL_IF(enq_select_cpu_fails__load(skel), "Failed to load skel");
-
- *ctx = skel;
-
- return SCX_TEST_PASS;
-}
-
-static enum scx_test_status run(void *ctx)
-{
- struct enq_select_cpu_fails *skel = ctx;
- struct bpf_link *link;
-
- link = bpf_map__attach_struct_ops(skel->maps.enq_select_cpu_fails_ops);
- if (!link) {
- SCX_ERR("Failed to attach scheduler");
- return SCX_TEST_FAIL;
- }
-
- sleep(1);
-
- bpf_link__destroy(link);
-
- return SCX_TEST_PASS;
-}
-
-static void cleanup(void *ctx)
-{
- struct enq_select_cpu_fails *skel = ctx;
-
- enq_select_cpu_fails__destroy(skel);
-}
-
-struct scx_test enq_select_cpu_fails = {
- .name = "enq_select_cpu_fails",
- .description = "Verify we fail to call scx_bpf_select_cpu_dfl() "
- "from ops.enqueue()",
- .setup = setup,
- .run = run,
- .cleanup = cleanup,
-};
-REGISTER_SCX_TEST(&enq_select_cpu_fails)
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index db1616857d89..a10350c8a46e 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -36,6 +36,7 @@ ALL_TESTS="$ALL_TESTS 0008:1:1:match_int:1"
ALL_TESTS="$ALL_TESTS 0009:1:1:unregister_error:0"
ALL_TESTS="$ALL_TESTS 0010:1:1:mnt/mnt_error:0"
ALL_TESTS="$ALL_TESTS 0011:1:1:empty_add:0"
+ALL_TESTS="$ALL_TESTS 0012:1:1:u8_valid:0"
function allow_user_defaults()
{
@@ -851,6 +852,34 @@ sysctl_test_0011()
return 0
}
+sysctl_test_0012()
+{
+ TARGET="${SYSCTL}/$(get_test_target 0012)"
+ echo -n "Testing u8 range check in sysctl table check in ${TARGET} ... "
+ if [ ! -f ${TARGET} ]; then
+ echo -e "FAIL\nCould not create ${TARGET}" >&2
+ rc=1
+ test_rc
+ fi
+
+ local u8over_msg=$(dmesg | grep "u8_over range value" | wc -l)
+ if [ ! ${u8over_msg} -eq 1 ]; then
+ echo -e "FAIL\nu8 overflow not detected" >&2
+ rc=1
+ test_rc
+ fi
+
+ local u8under_msg=$(dmesg | grep "u8_under range value" | wc -l)
+ if [ ! ${u8under_msg} -eq 1 ]; then
+ echo -e "FAIL\nu8 underflow not detected" >&2
+ rc=1
+ test_rc
+ fi
+
+ echo "OK"
+ return 0
+}
+
list_tests()
{
echo "Test ID list:"
@@ -870,6 +899,7 @@ list_tests()
echo "0009 x $(get_test_count 0009) - tests sysct unregister"
echo "0010 x $(get_test_count 0010) - tests sysct mount point"
echo "0011 x $(get_test_count 0011) - tests empty directories"
+ echo "0012 x $(get_test_count 0012) - tests range check for u8 proc_handler"
}
usage()
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index a951c0d33cd2..ddc97ecd8b39 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -573,5 +573,32 @@
"teardown": [
"$TC qdisc del dev $DEV1 handle 1: root"
]
+ },
+ {
+ "id": "831d",
+ "name": "Test HFSC qlen accounting with DRR/NETEM/BLACKHOLE chain",
+ "category": ["qdisc", "hfsc", "drr", "netem", "blackhole"],
+ "plugins": { "requires": ["nsPlugin", "scapyPlugin"] },
+ "setup": [
+ "$IP link set dev $DEV1 up || true",
+ "$TC qdisc add dev $DEV1 root handle 1: drr",
+ "$TC filter add dev $DEV1 parent 1: basic classid 1:1",
+ "$TC class add dev $DEV1 parent 1: classid 1:1 drr",
+ "$TC qdisc add dev $DEV1 parent 1:1 handle 2: hfsc def 1",
+ "$TC class add dev $DEV1 parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0",
+ "$TC qdisc add dev $DEV1 parent 2:1 handle 3: netem",
+ "$TC qdisc add dev $DEV1 parent 3:1 handle 4: blackhole"
+ ],
+ "scapy": {
+ "iface": "$DEV0",
+ "count": 5,
+ "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()"
+ },
+ "cmdUnderTest": "$TC -s qdisc show dev $DEV1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DEV1",
+ "matchPattern": "qdisc hfsc",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
index e9469ee71e6f..6d515d0e5ed6 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
@@ -189,5 +189,29 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "deb1",
+ "name": "CODEL test qdisc limit trimming",
+ "category": ["qdisc", "codel"],
+ "plugins": {
+ "requires": ["nsPlugin", "scapyPlugin"]
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root codel limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root codel limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1p target 5ms interval 100ms",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
index 3a537b2ec4c9..24faf4e12dfa 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
@@ -377,5 +377,27 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "9479",
+ "name": "FQ test qdisc limit trimming",
+ "category": ["qdisc", "fq"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root fq limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 1p",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
index 9774b1e8801b..4ce62b857fd7 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
@@ -294,5 +294,27 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "0436",
+ "name": "FQ_CODEL test qdisc limit trimming",
+ "category": ["qdisc", "fq_codel"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root fq_codel limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_codel limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
index d012d88d67fe..229fe1bf4a90 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
@@ -18,5 +18,27 @@
"matchCount": "1",
"teardown": [
]
+ },
+ {
+ "id": "83bf",
+ "name": "FQ_PIE test qdisc limit trimming",
+ "category": ["qdisc", "fq_pie"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root fq_pie limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_pie limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc fq_pie 1: root refcnt [0-9]+ limit 1p",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
index dbef5474b26b..0ca19fac54a5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
@@ -188,5 +188,27 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "385f",
+ "name": "HHF test qdisc limit trimming",
+ "category": ["qdisc", "hhf"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root hhf limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root hhf limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json
new file mode 100644
index 000000000000..1a98b66e8030
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json
@@ -0,0 +1,24 @@
+[
+ {
+ "id": "6158",
+ "name": "PIE test qdisc limit trimming",
+ "category": ["qdisc", "pie"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root pie limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root pie limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc pie 1: root refcnt [0-9]+ limit 1p",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
+ }
+]
diff --git a/tools/testing/selftests/timens/clock_nanosleep.c b/tools/testing/selftests/timens/clock_nanosleep.c
index 72d41b955fb2..5cc0010e85ff 100644
--- a/tools/testing/selftests/timens/clock_nanosleep.c
+++ b/tools/testing/selftests/timens/clock_nanosleep.c
@@ -38,7 +38,7 @@ void *call_nanosleep(void *_args)
return NULL;
}
-int run_test(int clockid, int abs)
+static int run_test(int clockid, int abs)
{
struct timespec now = {}, rem;
struct thread_args args = { .now = &now, .rem = &rem, .clockid = clockid};
@@ -115,6 +115,8 @@ int main(int argc, char *argv[])
{
int ret, nsfd;
+ ksft_print_header();
+
nscheck();
ksft_set_plan(4);
diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c
index d12ff955de0d..a644162d56fd 100644
--- a/tools/testing/selftests/timens/exec.c
+++ b/tools/testing/selftests/timens/exec.c
@@ -36,6 +36,8 @@ int main(int argc, char *argv[])
return 0;
}
+ ksft_print_header();
+
nscheck();
ksft_set_plan(1);
diff --git a/tools/testing/selftests/timens/futex.c b/tools/testing/selftests/timens/futex.c
index 6b2b9264e851..339633ae037a 100644
--- a/tools/testing/selftests/timens/futex.c
+++ b/tools/testing/selftests/timens/futex.c
@@ -66,6 +66,8 @@ int main(int argc, char *argv[])
pid_t pid;
struct timespec mtime_now;
+ ksft_print_header();
+
nscheck();
ksft_set_plan(2);
diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c
index 6b13dc277724..d6658b7b7548 100644
--- a/tools/testing/selftests/timens/gettime_perf.c
+++ b/tools/testing/selftests/timens/gettime_perf.c
@@ -67,6 +67,8 @@ int main(int argc, char *argv[])
time_t offset = 10;
int nsfd;
+ ksft_print_header();
+
ksft_set_plan(8);
fill_function_pointers();
diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c
index 1833ca97eb24..0a9ff90ee69a 100644
--- a/tools/testing/selftests/timens/procfs.c
+++ b/tools/testing/selftests/timens/procfs.c
@@ -180,6 +180,8 @@ int main(int argc, char *argv[])
{
int ret = 0;
+ ksft_print_header();
+
nscheck();
ksft_set_plan(2);
diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c
index 387220791a05..a9c0534ef8f6 100644
--- a/tools/testing/selftests/timens/timens.c
+++ b/tools/testing/selftests/timens/timens.c
@@ -151,6 +151,8 @@ int main(int argc, char *argv[])
time_t offset;
int ret = 0;
+ ksft_print_header();
+
nscheck();
check_supported_timers();
diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c
index 5b939f59dfa4..79543ceb2c0f 100644
--- a/tools/testing/selftests/timens/timer.c
+++ b/tools/testing/selftests/timens/timer.c
@@ -15,7 +15,7 @@
#include "log.h"
#include "timens.h"
-int run_test(int clockid, struct timespec now)
+static int run_test(int clockid, struct timespec now)
{
struct itimerspec new_value;
long long elapsed;
@@ -75,6 +75,8 @@ int main(int argc, char *argv[])
pid_t pid;
struct timespec btime_now, mtime_now;
+ ksft_print_header();
+
nscheck();
check_supported_timers();
diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c
index a4196bbd6e33..402e2e415545 100644
--- a/tools/testing/selftests/timens/timerfd.c
+++ b/tools/testing/selftests/timens/timerfd.c
@@ -15,14 +15,14 @@
#include "log.h"
#include "timens.h"
-static int tclock_gettime(clock_t clockid, struct timespec *now)
+static int tclock_gettime(clockid_t clockid, struct timespec *now)
{
if (clockid == CLOCK_BOOTTIME_ALARM)
clockid = CLOCK_BOOTTIME;
return clock_gettime(clockid, now);
}
-int run_test(int clockid, struct timespec now)
+static int run_test(int clockid, struct timespec now)
{
struct itimerspec new_value;
long long elapsed;
@@ -82,6 +82,8 @@ int main(int argc, char *argv[])
pid_t pid;
struct timespec btime_now, mtime_now;
+ ksft_print_header();
+
nscheck();
check_supported_timers();
diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c
index 5b8907bf451d..b957e1a65124 100644
--- a/tools/testing/selftests/timens/vfork_exec.c
+++ b/tools/testing/selftests/timens/vfork_exec.c
@@ -91,6 +91,8 @@ int main(int argc, char *argv[])
return check("child after exec", &now);
}
+ ksft_print_header();
+
nscheck();
ksft_set_plan(4);
diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index f34ac0bac696..4dde8838261d 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)
+CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)/usr/include
+ifneq ($(WERROR),0)
+ CFLAGS += -Werror
+endif
+
LDLIBS += -lpthread -lm -luring
TEST_PROGS := test_generic_01.sh
@@ -11,6 +15,11 @@ TEST_PROGS += test_generic_05.sh
TEST_PROGS += test_generic_06.sh
TEST_PROGS += test_generic_07.sh
+TEST_PROGS += test_generic_08.sh
+TEST_PROGS += test_generic_09.sh
+TEST_PROGS += test_generic_10.sh
+TEST_PROGS += test_generic_11.sh
+
TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
TEST_PROGS += test_loop_01.sh
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
index 94a8e729ba4c..5421774d7867 100644
--- a/tools/testing/selftests/ublk/fault_inject.c
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -16,6 +16,11 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
unsigned long dev_size = 250UL << 30;
+ if (ctx->auto_zc_fallback) {
+ ublk_err("%s: not support auto_zc_fallback\n", __func__);
+ return -EINVAL;
+ }
+
dev->tgt.dev_size = dev_size;
dev->tgt.params = (struct ublk_params) {
.types = UBLK_PARAM_TYPE_BASIC,
diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c
index 6f34eabfae97..509842df9bee 100644
--- a/tools/testing/selftests/ublk/file_backed.c
+++ b/tools/testing/selftests/ublk/file_backed.c
@@ -29,19 +29,23 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des
static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
{
unsigned ublk_op = ublksrv_get_op(iod);
- int zc = ublk_queue_use_zc(q);
- enum io_uring_op op = ublk_to_uring_op(iod, zc);
+ unsigned zc = ublk_queue_use_zc(q);
+ unsigned auto_zc = ublk_queue_use_auto_zc(q);
+ enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc);
struct io_uring_sqe *sqe[3];
+ void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr;
- if (!zc) {
+ if (!zc || auto_zc) {
ublk_queue_alloc_sqes(q, sqe, 1);
if (!sqe[0])
return -ENOMEM;
io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/,
- (void *)iod->addr,
+ addr,
iod->nr_sectors << 9,
iod->start_sector << 9);
+ if (auto_zc)
+ sqe[0]->buf_index = tag;
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1);
@@ -145,6 +149,11 @@ static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
},
};
+ if (ctx->auto_zc_fallback) {
+ ublk_err("%s: not support auto_zc_fallback\n", __func__);
+ return -EINVAL;
+ }
+
ret = backing_file_tgt_init(dev);
if (ret)
return ret;
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 842b40736a9b..b5131a000795 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -216,6 +216,30 @@ static int ublk_ctrl_get_features(struct ublk_dev *dev,
return __ublk_ctrl_cmd(dev, &data);
}
+static int ublk_ctrl_update_size(struct ublk_dev *dev,
+ __u64 nr_sects)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_UPDATE_SIZE,
+ .flags = CTRL_CMD_HAS_DATA,
+ };
+
+ data.data[0] = nr_sects;
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev,
+ unsigned int timeout_ms)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_QUIESCE_DEV,
+ .flags = CTRL_CMD_HAS_DATA,
+ };
+
+ data.data[0] = timeout_ms;
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
static const char *ublk_dev_state_desc(struct ublk_dev *dev)
{
switch (dev->dev_info.state) {
@@ -405,7 +429,7 @@ static void ublk_queue_deinit(struct ublk_queue *q)
free(q->ios[i].buf_addr);
}
-static int ublk_queue_init(struct ublk_queue *q)
+static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
{
struct ublk_dev *dev = q->dev;
int depth = dev->dev_info.queue_depth;
@@ -420,10 +444,14 @@ static int ublk_queue_init(struct ublk_queue *q)
q->cmd_inflight = 0;
q->tid = gettid();
- if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
+ if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
q->state |= UBLKSRV_NO_BUF;
- q->state |= UBLKSRV_ZC;
+ if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
+ q->state |= UBLKSRV_ZC;
+ if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG)
+ q->state |= UBLKSRV_AUTO_BUF_REG;
}
+ q->state |= extra_flags;
cmd_buf_size = ublk_queue_cmd_buf_sz(q);
off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
@@ -461,7 +489,7 @@ static int ublk_queue_init(struct ublk_queue *q)
goto fail;
}
- if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
+ if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth);
if (ret) {
ublk_err("ublk dev %d queue %d register spare buffers failed %d",
@@ -525,6 +553,23 @@ static void ublk_dev_unprep(struct ublk_dev *dev)
close(dev->fds[0]);
}
+static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
+ struct io_uring_sqe *sqe,
+ unsigned short tag)
+{
+ struct ublk_auto_buf_reg buf = {};
+
+ if (q->tgt_ops->buf_index)
+ buf.index = q->tgt_ops->buf_index(q, tag);
+ else
+ buf.index = tag;
+
+ if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
+ buf.flags = UBLK_AUTO_BUF_REG_FALLBACK;
+
+ sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf);
+}
+
int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
{
struct ublksrv_io_cmd *cmd;
@@ -579,6 +624,9 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
else
cmd->addr = 0;
+ if (q->state & UBLKSRV_AUTO_BUF_REG)
+ ublk_set_auto_buf_reg(q, sqe[0], tag);
+
user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
io_uring_sqe_set_data64(sqe[0], user_data);
@@ -729,6 +777,7 @@ struct ublk_queue_info {
struct ublk_queue *q;
sem_t *queue_sem;
cpu_set_t *affinity;
+ unsigned char auto_zc_fallback;
};
static void *ublk_io_handler_fn(void *data)
@@ -736,9 +785,13 @@ static void *ublk_io_handler_fn(void *data)
struct ublk_queue_info *info = data;
struct ublk_queue *q = info->q;
int dev_id = q->dev->dev_info.dev_id;
+ unsigned extra_flags = 0;
int ret;
- ret = ublk_queue_init(q);
+ if (info->auto_zc_fallback)
+ extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK;
+
+ ret = ublk_queue_init(q, extra_flags);
if (ret) {
ublk_err("ublk dev %d queue %d init queue failed\n",
dev_id, q->q_id);
@@ -831,6 +884,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
qinfo[i].q = &dev->q[i];
qinfo[i].queue_sem = &queue_sem;
qinfo[i].affinity = &affinity_buf[i];
+ qinfo[i].auto_zc_fallback = ctx->auto_zc_fallback;
pthread_create(&dev->q[i].thread, NULL,
ublk_io_handler_fn,
&qinfo[i]);
@@ -1011,6 +1065,9 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
info->nr_hw_queues = nr_queues;
info->queue_depth = depth;
info->flags = ctx->flags;
+ if ((features & UBLK_F_QUIESCE) &&
+ (info->flags & UBLK_F_USER_RECOVERY))
+ info->flags |= UBLK_F_QUIESCE;
dev->tgt.ops = ops;
dev->tgt.sq_depth = depth;
dev->tgt.cq_depth = depth;
@@ -1206,6 +1263,9 @@ static int cmd_dev_get_features(void)
[const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY",
[const_ilog2(UBLK_F_ZONED)] = "ZONED",
[const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO",
+ [const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE",
+ [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG",
+ [const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE",
};
struct ublk_dev *dev;
__u64 features = 0;
@@ -1239,13 +1299,66 @@ static int cmd_dev_get_features(void)
return ret;
}
+static int cmd_dev_update_size(struct dev_ctx *ctx)
+{
+ struct ublk_dev *dev = ublk_ctrl_init();
+ struct ublk_params p;
+ int ret = -EINVAL;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (ctx->dev_id < 0) {
+ fprintf(stderr, "device id isn't provided\n");
+ goto out;
+ }
+
+ dev->dev_info.dev_id = ctx->dev_id;
+ ret = ublk_ctrl_get_params(dev, &p);
+ if (ret < 0) {
+ ublk_err("failed to get params %d %s\n", ret, strerror(-ret));
+ goto out;
+ }
+
+ if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) {
+ ublk_err("size isn't aligned with logical block size\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = ublk_ctrl_update_size(dev, ctx->size >> 9);
+out:
+ ublk_ctrl_deinit(dev);
+ return ret;
+}
+
+static int cmd_dev_quiesce(struct dev_ctx *ctx)
+{
+ struct ublk_dev *dev = ublk_ctrl_init();
+ int ret = -EINVAL;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (ctx->dev_id < 0) {
+ fprintf(stderr, "device id isn't provided for quiesce\n");
+ goto out;
+ }
+ dev->dev_info.dev_id = ctx->dev_id;
+ ret = ublk_ctrl_quiesce_dev(dev, 10000);
+
+out:
+ ublk_ctrl_deinit(dev);
+ return ret;
+}
+
static void __cmd_create_help(char *exe, bool recovery)
{
int i;
printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
exe, recovery ? "recover" : "add");
- printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n");
+ printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n");
printf("\t[-e 0|1 ] [-i 0|1]\n");
printf("\t[target options] [backfile1] [backfile2] ...\n");
printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
@@ -1281,6 +1394,8 @@ static int cmd_dev_help(char *exe)
printf("%s list [-n dev_id] -a \n", exe);
printf("\t -a list all devices, -n list specified device, default -a \n\n");
printf("%s features\n", exe);
+ printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe);
+ printf("%s quiesce -n dev_id\n", exe);
return 0;
}
@@ -1300,6 +1415,9 @@ int main(int argc, char *argv[])
{ "recovery_fail_io", 1, NULL, 'e'},
{ "recovery_reissue", 1, NULL, 'i'},
{ "get_data", 1, NULL, 'g'},
+ { "auto_zc", 0, NULL, 0 },
+ { "auto_zc_fallback", 0, NULL, 0 },
+ { "size", 1, NULL, 's'},
{ 0, 0, 0, 0 }
};
const struct ublk_tgt_ops *ops = NULL;
@@ -1321,7 +1439,7 @@ int main(int argc, char *argv[])
opterr = 0;
optind = 2;
- while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz",
+ while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz",
longopts, &option_idx)) != -1) {
switch (opt) {
case 'a':
@@ -1361,6 +1479,9 @@ int main(int argc, char *argv[])
case 'g':
ctx.flags |= UBLK_F_NEED_GET_DATA;
break;
+ case 's':
+ ctx.size = strtoull(optarg, NULL, 10);
+ break;
case 0:
if (!strcmp(longopts[option_idx].name, "debug_mask"))
ublk_dbg_mask = strtol(optarg, NULL, 16);
@@ -1368,6 +1489,10 @@ int main(int argc, char *argv[])
ublk_dbg_mask = 0;
if (!strcmp(longopts[option_idx].name, "foreground"))
ctx.fg = 1;
+ if (!strcmp(longopts[option_idx].name, "auto_zc"))
+ ctx.flags |= UBLK_F_AUTO_BUF_REG;
+ if (!strcmp(longopts[option_idx].name, "auto_zc_fallback"))
+ ctx.auto_zc_fallback = 1;
break;
case '?':
/*
@@ -1391,6 +1516,16 @@ int main(int argc, char *argv[])
}
}
+ /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */
+ if (ctx.auto_zc_fallback &&
+ !((ctx.flags & UBLK_F_AUTO_BUF_REG) &&
+ (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) {
+ ublk_err("%s: auto_zc_fallback is set but neither "
+ "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n",
+ __func__);
+ return -EINVAL;
+ }
+
i = optind;
while (i < argc && ctx.nr_files < MAX_BACK_FILES) {
ctx.files[ctx.nr_files++] = argv[i++];
@@ -1423,6 +1558,10 @@ int main(int argc, char *argv[])
ret = cmd_dev_help(argv[0]);
else if (!strcmp(cmd, "features"))
ret = cmd_dev_get_features();
+ else if (!strcmp(cmd, "update_size"))
+ ret = cmd_dev_update_size(&ctx);
+ else if (!strcmp(cmd, "quiesce"))
+ ret = cmd_dev_quiesce(&ctx);
else
cmd_dev_help(argv[0]);
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 44ee1e4ac55b..e34508bf5798 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -19,7 +19,6 @@
#include <sys/inotify.h>
#include <sys/wait.h>
#include <sys/eventfd.h>
-#include <sys/uio.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <linux/io_uring.h>
@@ -85,6 +84,7 @@ struct dev_ctx {
unsigned int all:1;
unsigned int fg:1;
unsigned int recovery:1;
+ unsigned int auto_zc_fallback:1;
int _evtfd;
int _shmid;
@@ -92,6 +92,9 @@ struct dev_ctx {
/* built from shmem, only for ublk_dump_dev() */
struct ublk_dev *shadow_dev;
+ /* for 'update_size' command */
+ unsigned long long size;
+
union {
struct stripe_ctx stripe;
struct fault_inject_ctx fault_inject;
@@ -116,6 +119,7 @@ struct ublk_io {
#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
#define UBLKSRV_IO_FREE (1UL << 2)
#define UBLKSRV_NEED_GET_DATA (1UL << 3)
+#define UBLKSRV_NEED_REG_BUF (1UL << 4)
unsigned short flags;
unsigned short refs; /* used by target code only */
@@ -141,6 +145,9 @@ struct ublk_tgt_ops {
*/
void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
void (*usage)(const struct ublk_tgt_ops *ops);
+
+ /* return buffer index for UBLK_F_AUTO_BUF_REG */
+ unsigned short (*buf_index)(const struct ublk_queue *, int tag);
};
struct ublk_tgt {
@@ -169,6 +176,8 @@ struct ublk_queue {
#define UBLKSRV_QUEUE_IDLE (1U << 1)
#define UBLKSRV_NO_BUF (1U << 2)
#define UBLKSRV_ZC (1U << 3)
+#define UBLKSRV_AUTO_BUF_REG (1U << 4)
+#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5)
unsigned state;
pid_t tid;
pthread_t thread;
@@ -204,6 +213,12 @@ struct ublk_dev {
extern unsigned int ublk_dbg_mask;
extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag);
+
+static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
+{
+ return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF);
+}
+
static inline int is_target_io(__u64 user_data)
{
return (user_data & (1ULL << 63)) != 0;
@@ -388,6 +403,11 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q)
return q->state & UBLKSRV_ZC;
}
+static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q)
+{
+ return q->state & UBLKSRV_AUTO_BUF_REG;
+}
+
extern const struct ublk_tgt_ops null_tgt_ops;
extern const struct ublk_tgt_ops loop_tgt_ops;
extern const struct ublk_tgt_ops stripe_tgt_ops;
diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c
index 91fec3690d4b..44aca31cf2b0 100644
--- a/tools/testing/selftests/ublk/null.c
+++ b/tools/testing/selftests/ublk/null.c
@@ -42,10 +42,22 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
return 0;
}
+static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod,
+ struct io_uring_sqe *sqe)
+{
+ unsigned ublk_op = ublksrv_get_op(iod);
+
+ io_uring_prep_nop(sqe);
+ sqe->buf_index = tag;
+ sqe->flags |= IOSQE_FIXED_FILE;
+ sqe->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT;
+ sqe->len = iod->nr_sectors << 9; /* injected result */
+ sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
+}
+
static int null_queue_zc_io(struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
- unsigned ublk_op = ublksrv_get_op(iod);
struct io_uring_sqe *sqe[3];
ublk_queue_alloc_sqes(q, sqe, 3);
@@ -55,12 +67,8 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag)
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
- io_uring_prep_nop(sqe[1]);
- sqe[1]->buf_index = tag;
- sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
- sqe[1]->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT;
- sqe[1]->len = iod->nr_sectors << 9; /* injected result */
- sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1);
+ __setup_nop_io(tag, iod, sqe[1]);
+ sqe[1]->flags |= IOSQE_IO_HARDLINK;
io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag);
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1);
@@ -69,6 +77,16 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag)
return 2;
}
+static int null_queue_auto_zc_io(struct ublk_queue *q, int tag)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+ struct io_uring_sqe *sqe[1];
+
+ ublk_queue_alloc_sqes(q, sqe, 1);
+ __setup_nop_io(tag, iod, sqe[0]);
+ return 1;
+}
+
static void ublk_null_io_done(struct ublk_queue *q, int tag,
const struct io_uring_cqe *cqe)
{
@@ -94,22 +112,37 @@ static void ublk_null_io_done(struct ublk_queue *q, int tag,
static int ublk_null_queue_io(struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
- int zc = ublk_queue_use_zc(q);
+ unsigned auto_zc = ublk_queue_use_auto_zc(q);
+ unsigned zc = ublk_queue_use_zc(q);
int queued;
- if (!zc) {
+ if (auto_zc && !ublk_io_auto_zc_fallback(iod))
+ queued = null_queue_auto_zc_io(q, tag);
+ else if (zc)
+ queued = null_queue_zc_io(q, tag);
+ else {
ublk_complete_io(q, tag, iod->nr_sectors << 9);
return 0;
}
-
- queued = null_queue_zc_io(q, tag);
ublk_queued_tgt_io(q, tag, queued);
return 0;
}
+/*
+ * return invalid buffer index for triggering auto buffer register failure,
+ * then UBLK_IO_RES_NEED_REG_BUF handling is covered
+ */
+static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag)
+{
+ if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
+ return (unsigned short)-1;
+ return tag;
+}
+
const struct ublk_tgt_ops null_tgt_ops = {
.name = "null",
.init_tgt = ublk_null_tgt_init,
.queue_io = ublk_null_queue_io,
.tgt_io_done = ublk_null_io_done,
+ .buf_index = ublk_null_buf_index,
};
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 5dbd6392d83d..404a143bf3d6 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -70,7 +70,7 @@ static void free_stripe_array(struct stripe_array *s)
}
static void calculate_stripe_array(const struct stripe_conf *conf,
- const struct ublksrv_io_desc *iod, struct stripe_array *s)
+ const struct ublksrv_io_desc *iod, struct stripe_array *s, void *base)
{
const unsigned shift = conf->shift - 9;
const unsigned chunk_sects = 1 << shift;
@@ -102,7 +102,7 @@ static void calculate_stripe_array(const struct stripe_conf *conf,
}
assert(this->nr_vec < this->cap);
- this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done);
+ this->vec[this->nr_vec].iov_base = (void *)(base + done);
this->vec[this->nr_vec++].iov_len = nr_sects << 9;
start += nr_sects;
@@ -126,15 +126,17 @@ static inline enum io_uring_op stripe_to_uring_op(
static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
{
const struct stripe_conf *conf = get_chunk_shift(q);
- int zc = !!(ublk_queue_use_zc(q) != 0);
- enum io_uring_op op = stripe_to_uring_op(iod, zc);
+ unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0);
+ unsigned zc = (ublk_queue_use_zc(q) != 0);
+ enum io_uring_op op = stripe_to_uring_op(iod, zc | auto_zc);
struct io_uring_sqe *sqe[NR_STRIPE];
struct stripe_array *s = alloc_stripe_array(conf, iod);
struct ublk_io *io = ublk_get_io(q, tag);
int i, extra = zc ? 2 : 0;
+ void *base = (zc | auto_zc) ? NULL : (void *)iod->addr;
io->private_data = s;
- calculate_stripe_array(conf, iod, s);
+ calculate_stripe_array(conf, iod, s, base);
ublk_queue_alloc_sqes(q, sqe, s->nr + extra);
@@ -153,12 +155,11 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_
(void *)t->vec,
t->nr_vec,
t->start << 9);
- if (zc) {
+ io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
+ if (auto_zc || zc) {
sqe[i]->buf_index = tag;
- io_uring_sqe_set_flags(sqe[i],
- IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK);
- } else {
- io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
+ if (zc)
+ sqe[i]->flags |= IOSQE_IO_HARDLINK;
}
/* bit63 marks us as tgt io */
sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1);
@@ -287,6 +288,11 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
loff_t bytes = 0;
int ret, i, mul = 1;
+ if (ctx->auto_zc_fallback) {
+ ublk_err("%s: not support auto_zc_fallback\n", __func__);
+ return -EINVAL;
+ }
+
if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
ublk_err("invalid chunk size %u\n", chunk_size);
return -EINVAL;
diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh
index a81210ca3e99..0145569ee7e9 100755
--- a/tools/testing/selftests/ublk/test_common.sh
+++ b/tools/testing/selftests/ublk/test_common.sh
@@ -23,6 +23,11 @@ _get_disk_dev_t() {
echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) ))
}
+_get_disk_size()
+{
+ lsblk -b -o SIZE -n "$1"
+}
+
_run_fio_verify_io() {
fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \
--bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \
@@ -215,6 +220,26 @@ _recover_ublk_dev() {
echo "$state"
}
+# quiesce device and return ublk device state
+__ublk_quiesce_dev()
+{
+ local dev_id=$1
+ local exp_state=$2
+ local state
+
+ if ! ${UBLK_PROG} quiesce -n "${dev_id}"; then
+ state=$(_get_ublk_dev_state "${dev_id}")
+ return "$state"
+ fi
+
+ for ((j=0;j<50;j++)); do
+ state=$(_get_ublk_dev_state "${dev_id}")
+ [ "$state" == "$exp_state" ] && break
+ sleep 1
+ done
+ echo "$state"
+}
+
# kill the ublk daemon and return ublk device state
__ublk_kill_daemon()
{
@@ -251,7 +276,7 @@ __run_io_and_remove()
local kill_server=$3
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
- --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
+ --rw=randrw --norandommap --iodepth=256 --size="${size}" --numjobs="$(nproc)" \
--runtime=20 --time_based > /dev/null 2>&1 &
sleep 2
if [ "${kill_server}" = "yes" ]; then
@@ -303,20 +328,26 @@ run_io_and_kill_daemon()
run_io_and_recover()
{
+ local action=$1
local state
local dev_id
+ shift 1
dev_id=$(_add_ublk_dev "$@")
_check_add_dev "$TID" $?
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
- --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
+ --rw=randread --iodepth=256 --size="${size}" --numjobs=4 \
--runtime=20 --time_based > /dev/null 2>&1 &
sleep 4
- state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
+ if [ "$action" == "kill_daemon" ]; then
+ state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
+ elif [ "$action" == "quiesce_dev" ]; then
+ state=$(__ublk_quiesce_dev "${dev_id}" "QUIESCED")
+ fi
if [ "$state" != "QUIESCED" ]; then
- echo "device isn't quiesced($state) after killing daemon"
+ echo "device isn't quiesced($state) after $action"
return 255
fi
diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh
index 8a3bc080c577..8b533217d4a1 100755
--- a/tools/testing/selftests/ublk/test_generic_04.sh
+++ b/tools/testing/selftests/ublk/test_generic_04.sh
@@ -8,7 +8,7 @@ ERR_CODE=0
ublk_run_recover_test()
{
- run_io_and_recover "$@"
+ run_io_and_recover "kill_daemon" "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh
index 3bb00a347402..398e9e2b58e1 100755
--- a/tools/testing/selftests/ublk/test_generic_05.sh
+++ b/tools/testing/selftests/ublk/test_generic_05.sh
@@ -8,7 +8,7 @@ ERR_CODE=0
ublk_run_recover_test()
{
- run_io_and_recover "$@"
+ run_io_and_recover "kill_daemon" "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh
index b67230c42c84..fd42062b7b76 100755
--- a/tools/testing/selftests/ublk/test_generic_06.sh
+++ b/tools/testing/selftests/ublk/test_generic_06.sh
@@ -17,7 +17,7 @@ STARTTIME=${SECONDS}
dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 &
dd_pid=$!
-__ublk_kill_daemon ${dev_id} "DEAD"
+__ublk_kill_daemon ${dev_id} "DEAD" >/dev/null
wait $dd_pid
dd_exitcode=$?
diff --git a/tools/testing/selftests/ublk/test_generic_08.sh b/tools/testing/selftests/ublk/test_generic_08.sh
new file mode 100755
index 000000000000..b222f3a77e12
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_08.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_08"
+ERR_CODE=0
+
+if ! _have_feature "AUTO_BUF_REG"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "generic" "test UBLK_F_AUTO_BUF_REG"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t loop -q 2 --auto_zc "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+if ! _mkfs_mount_test /dev/ublkb"${dev_id}"; then
+ _cleanup_test "generic"
+ _show_result $TID 255
+fi
+
+dev_id=$(_add_ublk_dev -t stripe --auto_zc "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "generic"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_09.sh b/tools/testing/selftests/ublk/test_generic_09.sh
new file mode 100755
index 000000000000..bb6f77ca5522
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_09.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_09"
+ERR_CODE=0
+
+if ! _have_feature "AUTO_BUF_REG"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "null" "basic IO test"
+
+dev_id=$(_add_ublk_dev -t null -z --auto_zc --auto_zc_fallback)
+_check_add_dev $TID $?
+
+# run fio over the two disks
+fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "null"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_10.sh b/tools/testing/selftests/ublk/test_generic_10.sh
new file mode 100755
index 000000000000..abc11c3d416b
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_10.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_10"
+ERR_CODE=0
+
+if ! _have_feature "UPDATE_SIZE"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "null" "check update size"
+
+dev_id=$(_add_ublk_dev -t null)
+_check_add_dev $TID $?
+
+size=$(_get_disk_size /dev/ublkb"${dev_id}")
+size=$(( size / 2 ))
+if ! "$UBLK_PROG" update_size -n "$dev_id" -s "$size"; then
+ ERR_CODE=255
+fi
+
+new_size=$(_get_disk_size /dev/ublkb"${dev_id}")
+if [ "$new_size" != "$size" ]; then
+ ERR_CODE=255
+fi
+
+_cleanup_test "null"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_11.sh b/tools/testing/selftests/ublk/test_generic_11.sh
new file mode 100755
index 000000000000..a00357a5ec6b
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_11.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_11"
+ERR_CODE=0
+
+ublk_run_quiesce_recover()
+{
+ run_io_and_recover "quiesce_dev" "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_feature "QUIESCE"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "quiesce" "basic quiesce & recover function verification"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_run_quiesce_recover -t null -q 2 -r 1 &
+ublk_run_quiesce_recover -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_quiesce_recover -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_run_quiesce_recover -t null -q 2 -r 1 -i 1 &
+ublk_run_quiesce_recover -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_quiesce_recover -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "quiesce"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh
index 1a9065125ae1..4bdd921081e5 100755
--- a/tools/testing/selftests/ublk/test_stress_02.sh
+++ b/tools/testing/selftests/ublk/test_stress_02.sh
@@ -25,10 +25,12 @@ _create_backfile 0 256M
_create_backfile 1 128M
_create_backfile 2 128M
-ublk_io_and_kill_daemon 8G -t null -q 4 &
-ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
-ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
-wait
+for nr_queue in 1 4; do
+ ublk_io_and_kill_daemon 8G -t null -q "$nr_queue" &
+ ublk_io_and_kill_daemon 256M -t loop -q "$nr_queue" "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_kill_daemon 256M -t stripe -q "$nr_queue" "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ wait
+done
_cleanup_test "stress"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh
index e0854f71d35b..7d728ce50774 100755
--- a/tools/testing/selftests/ublk/test_stress_03.sh
+++ b/tools/testing/selftests/ublk/test_stress_03.sh
@@ -32,6 +32,13 @@ _create_backfile 2 128M
ublk_io_and_remove 8G -t null -q 4 -z &
ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+
+if _have_feature "AUTO_BUF_REG"; then
+ ublk_io_and_remove 8G -t null -q 4 --auto_zc &
+ ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
+fi
wait
_cleanup_test "stress"
diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
index 1798a98387e8..9bcfa64ea1f0 100755
--- a/tools/testing/selftests/ublk/test_stress_04.sh
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -31,6 +31,13 @@ _create_backfile 2 128M
ublk_io_and_kill_daemon 8G -t null -q 4 -z &
ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+
+if _have_feature "AUTO_BUF_REG"; then
+ ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc &
+ ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
+fi
wait
_cleanup_test "stress"
diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh
index 88601b48f1cd..bcfc904cefc6 100755
--- a/tools/testing/selftests/ublk/test_stress_05.sh
+++ b/tools/testing/selftests/ublk/test_stress_05.sh
@@ -60,5 +60,14 @@ if _have_feature "ZERO_COPY"; then
done
fi
+if _have_feature "AUTO_BUF_REG"; then
+ for reissue in $(seq 0 1); do
+ ublk_io_and_remove 8G -t null -q 4 -g --auto_zc -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 -g --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
+ ublk_io_and_remove 8G -t null -q 4 -g -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" &
+ wait
+ done
+fi
+
_cleanup_test "stress"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 28422c32cc8f..f703fcfe9f7c 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -19,7 +19,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
- corrupt_xstate_header amx lam test_shadow_stack avx
+ corrupt_xstate_header amx lam test_shadow_stack avx apx
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
@@ -136,3 +136,4 @@ $(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
$(OUTPUT)/avx_64: CFLAGS += -mno-avx -mno-avx512f
$(OUTPUT)/amx_64: EXTRA_FILES += xstate.c
$(OUTPUT)/avx_64: EXTRA_FILES += xstate.c
+$(OUTPUT)/apx_64: EXTRA_FILES += xstate.c
diff --git a/tools/testing/selftests/x86/apx.c b/tools/testing/selftests/x86/apx.c
new file mode 100644
index 000000000000..d9c8d41b8c5a
--- /dev/null
+++ b/tools/testing/selftests/x86/apx.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include "xstate.h"
+
+int main(void)
+{
+ test_xstate(XFEATURE_APX);
+}
diff --git a/tools/testing/selftests/x86/bugs/Makefile b/tools/testing/selftests/x86/bugs/Makefile
new file mode 100644
index 000000000000..8ff2d7226c7f
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/Makefile
@@ -0,0 +1,3 @@
+TEST_PROGS := its_sysfs.py its_permutations.py its_indirect_alignment.py its_ret_alignment.py
+TEST_FILES := common.py
+include ../../lib.mk
diff --git a/tools/testing/selftests/x86/bugs/common.py b/tools/testing/selftests/x86/bugs/common.py
new file mode 100755
index 000000000000..2f9664a80617
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/common.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# This contains kselftest framework adapted common functions for testing
+# mitigation for x86 bugs.
+
+import os, sys, re, shutil
+
+sys.path.insert(0, '../../kselftest')
+import ksft
+
+def read_file(path):
+ if not os.path.exists(path):
+ return None
+ with open(path, 'r') as file:
+ return file.read().strip()
+
+def cpuinfo_has(arg):
+ cpuinfo = read_file('/proc/cpuinfo')
+ if arg in cpuinfo:
+ return True
+ return False
+
+def cmdline_has(arg):
+ cmdline = read_file('/proc/cmdline')
+ if arg in cmdline:
+ return True
+ return False
+
+def cmdline_has_either(args):
+ cmdline = read_file('/proc/cmdline')
+ for arg in args:
+ if arg in cmdline:
+ return True
+ return False
+
+def cmdline_has_none(args):
+ return not cmdline_has_either(args)
+
+def cmdline_has_all(args):
+ cmdline = read_file('/proc/cmdline')
+ for arg in args:
+ if arg not in cmdline:
+ return False
+ return True
+
+def get_sysfs(bug):
+ return read_file("/sys/devices/system/cpu/vulnerabilities/" + bug)
+
+def sysfs_has(bug, mitigation):
+ status = get_sysfs(bug)
+ if mitigation in status:
+ return True
+ return False
+
+def sysfs_has_either(bugs, mitigations):
+ for bug in bugs:
+ for mitigation in mitigations:
+ if sysfs_has(bug, mitigation):
+ return True
+ return False
+
+def sysfs_has_none(bugs, mitigations):
+ return not sysfs_has_either(bugs, mitigations)
+
+def sysfs_has_all(bugs, mitigations):
+ for bug in bugs:
+ for mitigation in mitigations:
+ if not sysfs_has(bug, mitigation):
+ return False
+ return True
+
+def bug_check_pass(bug, found):
+ ksft.print_msg(f"\nFound: {found}")
+ # ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}")
+ ksft.test_result_pass(f'{bug}: {found}')
+
+def bug_check_fail(bug, found, expected):
+ ksft.print_msg(f'\nFound:\t {found}')
+ ksft.print_msg(f'Expected:\t {expected}')
+ ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}")
+ ksft.test_result_fail(f'{bug}: {found}')
+
+def bug_status_unknown(bug, found):
+ ksft.print_msg(f'\nUnknown status: {found}')
+ ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}")
+ ksft.test_result_fail(f'{bug}: {found}')
+
+def basic_checks_sufficient(bug, mitigation):
+ if not mitigation:
+ bug_status_unknown(bug, "None")
+ return True
+ elif mitigation == "Not affected":
+ ksft.test_result_pass(bug)
+ return True
+ elif mitigation == "Vulnerable":
+ if cmdline_has_either([f'{bug}=off', 'mitigations=off']):
+ bug_check_pass(bug, mitigation)
+ return True
+ return False
+
+def get_section_info(vmlinux, section_name):
+ from elftools.elf.elffile import ELFFile
+ with open(vmlinux, 'rb') as f:
+ elffile = ELFFile(f)
+ section = elffile.get_section_by_name(section_name)
+ if section is None:
+ ksft.print_msg("Available sections in vmlinux:")
+ for sec in elffile.iter_sections():
+ ksft.print_msg(sec.name)
+ raise ValueError(f"Section {section_name} not found in {vmlinux}")
+ return section['sh_addr'], section['sh_offset'], section['sh_size']
+
+def get_patch_sites(vmlinux, offset, size):
+ import struct
+ output = []
+ with open(vmlinux, 'rb') as f:
+ f.seek(offset)
+ i = 0
+ while i < size:
+ data = f.read(4) # s32
+ if not data:
+ break
+ sym_offset = struct.unpack('<i', data)[0] + i
+ i += 4
+ output.append(sym_offset)
+ return output
+
+def get_instruction_from_vmlinux(elffile, section, virtual_address, target_address):
+ from capstone import Cs, CS_ARCH_X86, CS_MODE_64
+ section_start = section['sh_addr']
+ section_end = section_start + section['sh_size']
+
+ if not (section_start <= target_address < section_end):
+ return None
+
+ offset = target_address - section_start
+ code = section.data()[offset:offset + 16]
+
+ cap = init_capstone()
+ for instruction in cap.disasm(code, target_address):
+ if instruction.address == target_address:
+ return instruction
+ return None
+
+def init_capstone():
+ from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_OPT_SYNTAX_ATT
+ cap = Cs(CS_ARCH_X86, CS_MODE_64)
+ cap.syntax = CS_OPT_SYNTAX_ATT
+ return cap
+
+def get_runtime_kernel():
+ import drgn
+ return drgn.program_from_kernel()
+
+def check_dependencies_or_skip(modules, script_name="unknown test"):
+ for mod in modules:
+ try:
+ __import__(mod)
+ except ImportError:
+ ksft.test_result_skip(f"Skipping {script_name}: missing module '{mod}'")
+ ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_indirect_alignment.py b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py
new file mode 100755
index 000000000000..cdc33ae6a91c
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for indirect target selection (ITS) mitigation.
+#
+# Test if indirect CALL/JMP are correctly patched by evaluating
+# the vmlinux .retpoline_sites in /proc/kcore.
+
+# Install dependencies
+# add-apt-repository ppa:michel-slm/kernel-utils
+# apt update
+# apt install -y python3-drgn python3-pyelftools python3-capstone
+#
+# Best to copy the vmlinux at a standard location:
+# mkdir -p /usr/lib/debug/lib/modules/$(uname -r)
+# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux
+#
+# Usage: ./its_indirect_alignment.py [vmlinux]
+
+import os, sys, argparse
+from pathlib import Path
+
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, this_dir + '/../../kselftest')
+import ksft
+import common as c
+
+bug = "indirect_target_selection"
+
+mitigation = c.get_sysfs(bug)
+if not mitigation or "Aligned branch/return thunks" not in mitigation:
+ ksft.test_result_skip("Skipping its_indirect_alignment.py: Aligned branch/return thunks not enabled")
+ ksft.finished()
+
+if c.sysfs_has("spectre_v2", "Retpolines"):
+ ksft.test_result_skip("Skipping its_indirect_alignment.py: Retpolines deployed")
+ ksft.finished()
+
+c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_indirect_alignment.py")
+
+from elftools.elf.elffile import ELFFile
+from drgn.helpers.common.memory import identify_address
+
+cap = c.init_capstone()
+
+if len(os.sys.argv) > 1:
+ arg_vmlinux = os.sys.argv[1]
+ if not os.path.exists(arg_vmlinux):
+ ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at argument path: {arg_vmlinux}")
+ ksft.exit_fail()
+ os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True)
+ os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux')
+
+vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux"
+if not os.path.exists(vmlinux):
+ ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at {vmlinux}")
+ ksft.exit_fail()
+
+ksft.print_msg(f"Using vmlinux: {vmlinux}")
+
+retpolines_start_vmlinux, retpolines_sec_offset, size = c.get_section_info(vmlinux, '.retpoline_sites')
+ksft.print_msg(f"vmlinux: Section .retpoline_sites (0x{retpolines_start_vmlinux:x}) found at 0x{retpolines_sec_offset:x} with size 0x{size:x}")
+
+sites_offset = c.get_patch_sites(vmlinux, retpolines_sec_offset, size)
+total_retpoline_tests = len(sites_offset)
+ksft.print_msg(f"Found {total_retpoline_tests} retpoline sites")
+
+prog = c.get_runtime_kernel()
+retpolines_start_kcore = prog.symbol('__retpoline_sites').address
+ksft.print_msg(f'kcore: __retpoline_sites: 0x{retpolines_start_kcore:x}')
+
+x86_indirect_its_thunk_r15 = prog.symbol('__x86_indirect_its_thunk_r15').address
+ksft.print_msg(f'kcore: __x86_indirect_its_thunk_r15: 0x{x86_indirect_its_thunk_r15:x}')
+
+tests_passed = 0
+tests_failed = 0
+tests_unknown = 0
+
+with open(vmlinux, 'rb') as f:
+ elffile = ELFFile(f)
+ text_section = elffile.get_section_by_name('.text')
+
+ for i in range(0, len(sites_offset)):
+ site = retpolines_start_kcore + sites_offset[i]
+ vmlinux_site = retpolines_start_vmlinux + sites_offset[i]
+ passed = unknown = failed = False
+ try:
+ vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site)
+ kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0]
+ operand = kcore_insn.op_str
+ insn_end = site + kcore_insn.size - 1 # TODO handle Jcc.32 __x86_indirect_thunk_\reg
+ safe_site = insn_end & 0x20
+ site_status = "" if safe_site else "(unsafe)"
+
+ ksft.print_msg(f"\nSite {i}: {identify_address(prog, site)} <0x{site:x}> {site_status}")
+ ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}")
+ ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}")
+
+ if (site & 0x20) ^ (insn_end & 0x20):
+ ksft.print_msg(f"\tSite at safe/unsafe boundary: {str(kcore_insn.bytes)} {kcore_insn.mnemonic} {operand}")
+ if safe_site:
+ tests_passed += 1
+ passed = True
+ ksft.print_msg(f"\tPASSED: At safe address")
+ continue
+
+ if operand.startswith('0xffffffff'):
+ thunk = int(operand, 16)
+ if thunk > x86_indirect_its_thunk_r15:
+ insn_at_thunk = list(cap.disasm(prog.read(thunk, 16), thunk))[0]
+ operand += ' -> ' + insn_at_thunk.mnemonic + ' ' + insn_at_thunk.op_str + ' <dynamic-thunk?>'
+ if 'jmp' in insn_at_thunk.mnemonic and thunk & 0x20:
+ ksft.print_msg(f"\tPASSED: Found {operand} at safe address")
+ passed = True
+ if not passed:
+ if kcore_insn.operands[0].type == capstone.CS_OP_IMM:
+ operand += ' <' + prog.symbol(int(operand, 16)) + '>'
+ if '__x86_indirect_its_thunk_' in operand:
+ ksft.print_msg(f"\tPASSED: Found {operand}")
+ else:
+ ksft.print_msg(f"\tPASSED: Found direct branch: {kcore_insn}, ITS thunk not required.")
+ passed = True
+ else:
+ unknown = True
+ if passed:
+ tests_passed += 1
+ elif unknown:
+ ksft.print_msg(f"UNKNOWN: unexpected operand: {kcore_insn}")
+ tests_unknown += 1
+ else:
+ ksft.print_msg(f'\t************* FAILED *************')
+ ksft.print_msg(f"\tFound {kcore_insn.bytes} {kcore_insn.mnemonic} {operand}")
+ ksft.print_msg(f'\t**********************************')
+ tests_failed += 1
+ except Exception as e:
+ ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}")
+ tests_unknown += 1
+
+ksft.print_msg(f"\n\nSummary:")
+ksft.print_msg(f"PASS: \t{tests_passed} \t/ {total_retpoline_tests}")
+ksft.print_msg(f"FAIL: \t{tests_failed} \t/ {total_retpoline_tests}")
+ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_retpoline_tests}")
+
+if tests_failed == 0:
+ ksft.test_result_pass("All ITS return thunk sites passed")
+else:
+ ksft.test_result_fail(f"{tests_failed} ITS return thunk sites failed")
+ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_permutations.py b/tools/testing/selftests/x86/bugs/its_permutations.py
new file mode 100755
index 000000000000..3204f4728c62
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_permutations.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for indirect target selection (ITS) cmdline permutations with other bugs
+# like spectre_v2 and retbleed.
+
+import os, sys, subprocess, itertools, re, shutil
+
+test_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, test_dir + '/../../kselftest')
+import ksft
+import common as c
+
+bug = "indirect_target_selection"
+mitigation = c.get_sysfs(bug)
+
+if not mitigation or "Not affected" in mitigation:
+ ksft.test_result_skip("Skipping its_permutations.py: not applicable")
+ ksft.finished()
+
+if shutil.which('vng') is None:
+ ksft.test_result_skip("Skipping its_permutations.py: virtme-ng ('vng') not found in PATH.")
+ ksft.finished()
+
+TEST = f"{test_dir}/its_sysfs.py"
+default_kparam = ['clearcpuid=hypervisor', 'panic=5', 'panic_on_warn=1', 'oops=panic', 'nmi_watchdog=1', 'hung_task_panic=1']
+
+DEBUG = " -v "
+
+# Install dependencies
+# https://github.com/arighi/virtme-ng
+# apt install virtme-ng
+BOOT_CMD = f"vng --run {test_dir}/../../../../../arch/x86/boot/bzImage "
+#BOOT_CMD += DEBUG
+
+bug = "indirect_target_selection"
+
+input_options = {
+ 'indirect_target_selection' : ['off', 'on', 'stuff', 'vmexit'],
+ 'retbleed' : ['off', 'stuff', 'auto'],
+ 'spectre_v2' : ['off', 'on', 'eibrs', 'retpoline', 'ibrs', 'eibrs,retpoline'],
+}
+
+def pretty_print(output):
+ OKBLUE = '\033[94m'
+ OKGREEN = '\033[92m'
+ WARNING = '\033[93m'
+ FAIL = '\033[91m'
+ ENDC = '\033[0m'
+ BOLD = '\033[1m'
+
+ # Define patterns and their corresponding colors
+ patterns = {
+ r"^ok \d+": OKGREEN,
+ r"^not ok \d+": FAIL,
+ r"^# Testing .*": OKBLUE,
+ r"^# Found: .*": WARNING,
+ r"^# Totals: .*": BOLD,
+ r"pass:([1-9]\d*)": OKGREEN,
+ r"fail:([1-9]\d*)": FAIL,
+ r"skip:([1-9]\d*)": WARNING,
+ }
+
+ # Apply colors based on patterns
+ for pattern, color in patterns.items():
+ output = re.sub(pattern, lambda match: f"{color}{match.group(0)}{ENDC}", output, flags=re.MULTILINE)
+
+ print(output)
+
+combinations = list(itertools.product(*input_options.values()))
+ksft.print_header()
+ksft.set_plan(len(combinations))
+
+logs = ""
+
+for combination in combinations:
+ append = ""
+ log = ""
+ for p in default_kparam:
+ append += f' --append={p}'
+ command = BOOT_CMD + append
+ test_params = ""
+ for i, key in enumerate(input_options.keys()):
+ param = f'{key}={combination[i]}'
+ test_params += f' {param}'
+ command += f" --append={param}"
+ command += f" -- {TEST}"
+ test_name = f"{bug} {test_params}"
+ pretty_print(f'# Testing {test_name}')
+ t = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ t.wait()
+ output, _ = t.communicate()
+ if t.returncode == 0:
+ ksft.test_result_pass(test_name)
+ else:
+ ksft.test_result_fail(test_name)
+ output = output.decode()
+ log += f" {output}"
+ pretty_print(log)
+ logs += output + "\n"
+
+# Optionally use tappy to parse the output
+# apt install python3-tappy
+with open("logs.txt", "w") as f:
+ f.write(logs)
+
+ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_ret_alignment.py b/tools/testing/selftests/x86/bugs/its_ret_alignment.py
new file mode 100755
index 000000000000..f40078d9f6ff
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_ret_alignment.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for indirect target selection (ITS) mitigation.
+#
+# Tests if the RETs are correctly patched by evaluating the
+# vmlinux .return_sites in /proc/kcore.
+#
+# Install dependencies
+# add-apt-repository ppa:michel-slm/kernel-utils
+# apt update
+# apt install -y python3-drgn python3-pyelftools python3-capstone
+#
+# Run on target machine
+# mkdir -p /usr/lib/debug/lib/modules/$(uname -r)
+# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux
+#
+# Usage: ./its_ret_alignment.py
+
+import os, sys, argparse
+from pathlib import Path
+
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, this_dir + '/../../kselftest')
+import ksft
+import common as c
+
+bug = "indirect_target_selection"
+mitigation = c.get_sysfs(bug)
+if not mitigation or "Aligned branch/return thunks" not in mitigation:
+ ksft.test_result_skip("Skipping its_ret_alignment.py: Aligned branch/return thunks not enabled")
+ ksft.finished()
+
+c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_ret_alignment.py")
+
+from elftools.elf.elffile import ELFFile
+from drgn.helpers.common.memory import identify_address
+
+cap = c.init_capstone()
+
+if len(os.sys.argv) > 1:
+ arg_vmlinux = os.sys.argv[1]
+ if not os.path.exists(arg_vmlinux):
+ ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at user-supplied path: {arg_vmlinux}")
+ ksft.exit_fail()
+ os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True)
+ os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux')
+
+vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux"
+if not os.path.exists(vmlinux):
+ ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at {vmlinux}")
+ ksft.exit_fail()
+
+ksft.print_msg(f"Using vmlinux: {vmlinux}")
+
+rethunks_start_vmlinux, rethunks_sec_offset, size = c.get_section_info(vmlinux, '.return_sites')
+ksft.print_msg(f"vmlinux: Section .return_sites (0x{rethunks_start_vmlinux:x}) found at 0x{rethunks_sec_offset:x} with size 0x{size:x}")
+
+sites_offset = c.get_patch_sites(vmlinux, rethunks_sec_offset, size)
+total_rethunk_tests = len(sites_offset)
+ksft.print_msg(f"Found {total_rethunk_tests} rethunk sites")
+
+prog = c.get_runtime_kernel()
+rethunks_start_kcore = prog.symbol('__return_sites').address
+ksft.print_msg(f'kcore: __rethunk_sites: 0x{rethunks_start_kcore:x}')
+
+its_return_thunk = prog.symbol('its_return_thunk').address
+ksft.print_msg(f'kcore: its_return_thunk: 0x{its_return_thunk:x}')
+
+tests_passed = 0
+tests_failed = 0
+tests_unknown = 0
+tests_skipped = 0
+
+with open(vmlinux, 'rb') as f:
+ elffile = ELFFile(f)
+ text_section = elffile.get_section_by_name('.text')
+
+ for i in range(len(sites_offset)):
+ site = rethunks_start_kcore + sites_offset[i]
+ vmlinux_site = rethunks_start_vmlinux + sites_offset[i]
+ try:
+ passed = unknown = failed = skipped = False
+
+ symbol = identify_address(prog, site)
+ vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site)
+ kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0]
+
+ insn_end = site + kcore_insn.size - 1
+
+ safe_site = insn_end & 0x20
+ site_status = "" if safe_site else "(unsafe)"
+
+ ksft.print_msg(f"\nSite {i}: {symbol} <0x{site:x}> {site_status}")
+ ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}")
+ ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}")
+
+ if safe_site:
+ tests_passed += 1
+ passed = True
+ ksft.print_msg(f"\tPASSED: At safe address")
+ continue
+
+ if "jmp" in kcore_insn.mnemonic:
+ passed = True
+ elif "ret" not in kcore_insn.mnemonic:
+ skipped = True
+
+ if passed:
+ ksft.print_msg(f"\tPASSED: Found {kcore_insn.mnemonic} {kcore_insn.op_str}")
+ tests_passed += 1
+ elif skipped:
+ ksft.print_msg(f"\tSKIPPED: Found '{kcore_insn.mnemonic}'")
+ tests_skipped += 1
+ elif unknown:
+ ksft.print_msg(f"UNKNOWN: An unknown instruction: {kcore_insn}")
+ tests_unknown += 1
+ else:
+ ksft.print_msg(f'\t************* FAILED *************')
+ ksft.print_msg(f"\tFound {kcore_insn.mnemonic} {kcore_insn.op_str}")
+ ksft.print_msg(f'\t**********************************')
+ tests_failed += 1
+ except Exception as e:
+ ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}")
+ tests_unknown += 1
+
+ksft.print_msg(f"\n\nSummary:")
+ksft.print_msg(f"PASSED: \t{tests_passed} \t/ {total_rethunk_tests}")
+ksft.print_msg(f"FAILED: \t{tests_failed} \t/ {total_rethunk_tests}")
+ksft.print_msg(f"SKIPPED: \t{tests_skipped} \t/ {total_rethunk_tests}")
+ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_rethunk_tests}")
+
+if tests_failed == 0:
+ ksft.test_result_pass("All ITS return thunk sites passed.")
+else:
+ ksft.test_result_fail(f"{tests_failed} failed sites need ITS return thunks.")
+ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_sysfs.py b/tools/testing/selftests/x86/bugs/its_sysfs.py
new file mode 100755
index 000000000000..7bca81f2f606
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_sysfs.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for Indirect Target Selection(ITS) mitigation sysfs status.
+
+import sys, os, re
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, this_dir + '/../../kselftest')
+import ksft
+
+from common import *
+
+bug = "indirect_target_selection"
+mitigation = get_sysfs(bug)
+
+ITS_MITIGATION_ALIGNED_THUNKS = "Mitigation: Aligned branch/return thunks"
+ITS_MITIGATION_RETPOLINE_STUFF = "Mitigation: Retpolines, Stuffing RSB"
+ITS_MITIGATION_VMEXIT_ONLY = "Mitigation: Vulnerable, KVM: Not affected"
+ITS_MITIGATION_VULNERABLE = "Vulnerable"
+
+def check_mitigation():
+ if mitigation == ITS_MITIGATION_ALIGNED_THUNKS:
+ if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"):
+ bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF)
+ return
+ if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'):
+ bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_VMEXIT_ONLY)
+ return
+ bug_check_pass(bug, ITS_MITIGATION_ALIGNED_THUNKS)
+ return
+
+ if mitigation == ITS_MITIGATION_RETPOLINE_STUFF:
+ if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"):
+ bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF)
+ return
+ if sysfs_has('retbleed', 'Stuffing'):
+ bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF)
+ return
+ bug_check_fail(bug, ITS_MITIGATION_RETPOLINE_STUFF, ITS_MITIGATION_ALIGNED_THUNKS)
+
+ if mitigation == ITS_MITIGATION_VMEXIT_ONLY:
+ if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'):
+ bug_check_pass(bug, ITS_MITIGATION_VMEXIT_ONLY)
+ return
+ bug_check_fail(bug, ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS)
+
+ if mitigation == ITS_MITIGATION_VULNERABLE:
+ if sysfs_has("spectre_v2", "Vulnerable"):
+ bug_check_pass(bug, ITS_MITIGATION_VULNERABLE)
+ else:
+ bug_check_fail(bug, "Mitigation", ITS_MITIGATION_VULNERABLE)
+
+ bug_status_unknown(bug, mitigation)
+ return
+
+ksft.print_header()
+ksft.set_plan(1)
+ksft.print_msg(f'{bug}: {mitigation} ...')
+
+if not basic_checks_sufficient(bug, mitigation):
+ check_mitigation()
+
+ksft.finished()
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 18d736640ece..0873b0e5f48b 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -682,7 +682,7 @@ int do_uring(unsigned long lam)
return 1;
if (fstat(file_fd, &st) < 0)
- return 1;
+ goto cleanup;
off_t file_sz = st.st_size;
@@ -690,7 +690,7 @@ int do_uring(unsigned long lam)
fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
if (!fi)
- return 1;
+ goto cleanup;
fi->file_sz = file_sz;
fi->file_fd = file_fd;
@@ -698,7 +698,7 @@ int do_uring(unsigned long lam)
ring = malloc(sizeof(*ring));
if (!ring) {
free(fi);
- return 1;
+ goto cleanup;
}
memset(ring, 0, sizeof(struct io_ring));
@@ -729,6 +729,8 @@ out:
}
free(fi);
+cleanup:
+ close(file_fd);
return ret;
}
@@ -1189,6 +1191,7 @@ void *allocate_dsa_pasid(void)
wq = mmap(NULL, 0x1000, PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd, 0);
+ close(fd);
if (wq == MAP_FAILED)
perror("mmap");
diff --git a/tools/testing/selftests/x86/xstate.c b/tools/testing/selftests/x86/xstate.c
index 23c1d6c964ea..97fe4bd8bc77 100644
--- a/tools/testing/selftests/x86/xstate.c
+++ b/tools/testing/selftests/x86/xstate.c
@@ -31,7 +31,8 @@
(1 << XFEATURE_OPMASK) | \
(1 << XFEATURE_ZMM_Hi256) | \
(1 << XFEATURE_Hi16_ZMM) | \
- (1 << XFEATURE_XTILEDATA))
+ (1 << XFEATURE_XTILEDATA) | \
+ (1 << XFEATURE_APX))
static inline uint64_t xgetbv(uint32_t index)
{
diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h
index 42af36ec852f..e91e3092b5d2 100644
--- a/tools/testing/selftests/x86/xstate.h
+++ b/tools/testing/selftests/x86/xstate.h
@@ -33,6 +33,7 @@ enum xfeature {
XFEATURE_RSRVD_COMP_16,
XFEATURE_XTILECFG,
XFEATURE_XTILEDATA,
+ XFEATURE_APX,
XFEATURE_MAX,
};
@@ -59,6 +60,7 @@ static const char *xfeature_names[] =
"unknown xstate feature",
"AMX Tile config",
"AMX Tile data",
+ "APX registers",
"unknown xstate feature",
};
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index d0f6d253ac72..613551132a96 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -1264,21 +1264,25 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type)
send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
control_expectln("RECEIVED");
- ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
- if (ret < 0) {
- if (errno == EOPNOTSUPP) {
- fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
- } else {
+ /* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though
+ * the "RECEIVED" message means that the other side has received the
+ * data, there can be a delay in our kernel before updating the "unsent
+ * bytes" counter. Repeat SIOCOUTQ until it returns 0.
+ */
+ timeout_begin(TIMEOUT);
+ do {
+ ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
+ break;
+ }
perror("ioctl");
exit(EXIT_FAILURE);
}
- } else if (ret == 0 && sock_bytes_unsent != 0) {
- fprintf(stderr,
- "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n",
- sock_bytes_unsent);
- exit(EXIT_FAILURE);
- }
-
+ timeout_check("SIOCOUTQ");
+ } while (sock_bytes_unsent != 0);
+ timeout_end();
close(fd);
}
diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt
index dd5621038c55..43e98311d10f 100644
--- a/tools/tracing/rtla/README.txt
+++ b/tools/tracing/rtla/README.txt
@@ -13,6 +13,13 @@ RTLA depends on the following libraries and tools:
- libtraceevent
- libcpupower (optional, for --deepest-idle-state)
+For BPF sample collection support, the following extra dependencies are
+required:
+
+ - libbpf 1.0.0 or later
+ - bpftool with skeleton support
+ - clang with BPF CO-RE support
+
It also depends on python3-docutils to compile man pages.
For development, we suggest the following steps for compiling rtla:
diff --git a/usr/include/Makefile b/usr/include/Makefile
index e3d6b03527fe..f02f41941b60 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -59,6 +59,10 @@ ifeq ($(SRCARCH),arc)
no-header-test += linux/bpf_perf_event.h
endif
+ifeq ($(SRCARCH),openrisc)
+no-header-test += linux/bpf_perf_event.h
+endif
+
ifeq ($(SRCARCH),powerpc)
no-header-test += linux/bpf_perf_event.h
endif